မာတိကာသို့ ခုန်သွားရန်

မဝ်ဂျူ:ceb-pron

နူ ဝိက်ရှေန်နရဳ

Documentation for this module may be created at မဝ်ဂျူ:ceb-pron/doc

local export = {}
local gsub = mw.ustring.gsub
local len = mw.ustring.len

function export.show(word, phonetic, do_debug)
	local debug = {}
	
	if type(word) == 'table' then
		do_debug = word.args[4]
		word = word.args[1]
	end
	
	local orig_word = word
	word = mw.ustring.lower(word or mw.title.getCurrentTitle().text)
	word = mw.ustring.gsub(word,"[^abcdefghijklmnopqrstuvwxyzáâàéêèíîìóôòúûùñ7_.]","") -- 7 is for glottal stop. Underscore is used as spaces or to break consonant pair that could be treated as digraph.
	
	table.insert(debug,word)
	
	local V = "[aeiouáâàéèêíîìóôòúùû]" -- vowels
	local C = "[^aeiouáâàéèêíîìóôòúùû_.]" -- consonants
	
	-- add glottal stop for words starting with 
	word = mw.ustring.gsub(word, "^([aáeéiíoóuú])", "ʔ%1")

	--Substitute C, CH, QU, QUI and X
	word = mw.ustring.gsub (word,"c([h])","ts")
    word = mw.ustring.gsub (word,"q([u])","k") --Spanish QU only
	word = mw.ustring.gsub (word,"x","ks")

    --C and G before I and E, and CU plus vowel (proper nouns from Spanish, native words spelled in Spanish only)
    --If the original Spanish uses güe/i, please respell to gw.
    --Only "gue" and "gui" are replaced to avoid affecting native "ge" and "gi". If spelled with "gi" and "ge", please respell to "hi" and "he"
    
    word = mw.ustring.gsub(word, "c([ieíé])", "s%1")
    word = mw.ustring.gsub(word, "cu([aeo])", "kw%1")
	word = mw.ustring.gsub(word, "gu([ieíé])", "g%1")
	
	--Glottal stop in word boundary or other places
	word = mw.ustring.gsub(word,"7","ʔ")
	
    --Underscore to break consonant cluster or add space
	word = mw.ustring.gsub (word,"_", " ")

	table.insert(debug,word)

    -- letter-to-IPA

    word = mw.ustring.gsub (word,"c","k")
    word = mw.ustring.gsub (word,"f","p")
    word = mw.ustring.gsub (word,"g","ɡ")
	word = mw.ustring.gsub (word,"j","ĵ") --Please respell Spanish J to H. Not the real sound. Generally only for dialectal pronunciations, otherwise transcribe foreign J to DY
	word = mw.ustring.gsub (word,"ñ", "nj") --
	word = mw.ustring.gsub (word,"nɡ","ŋ")
	word = mw.ustring.gsub (word,"r","ɾ") --Use this to respell Spanish "rr"
	word = mw.ustring.gsub (word,"v","b")
	word = mw.ustring.gsub (word,"y","j")
	word = mw.ustring.gsub (word,"z","s")

    table.insert(debug, word)

    --syllable division
	for _ = 1, 2 do
		word = mw.ustring.gsub(word,"(" .. V .. ")(" .. C  .. V .. ")",
        "%1.%2")
	end
	for _ = 1, 2 do
		word = mw.ustring.gsub(word,"(" .. V .. C .. ")(" .. C .. V .. ")",
		"%1.%2")
	end
	for _ = 1, 2 do
		word = mw.ustring.gsub(word,"(" .. V .. C .. ")(" .. C .. C .. V .. ")",
		"%1.%2")
	end
    word = mw.ustring.gsub(word, "([aáeéiíoóuú])([aáeéiíoóuú])", "%1%2")
	word = mw.ustring.gsub(word, "([ií])([ií])", "%1.%2")
	word = mw.ustring.gsub(word, "([oóuú])([oóuú])", "%1.%2")

    table.insert(debug, word)
	--accentuation
	local syllables = mw.text.split(word,"%.")
	if mw.ustring.find(word,"[áéíóúâêîôû]") then
		for i=1,#syllables do
			if mw.ustring.find(syllables[i],"[áéíóúâêîôû]") then 
				syllables[i] = "ˈ"..syllables[i] 
			end
		end
	else
		if mw.ustring.find(word,"[^aàeèiìoòuùbdɡfjklmnŋpɾstw]$") then
			syllables[#syllables] = "ˈ"..syllables[#syllables]
		else
			if #syllables > 1 then syllables[#syllables-1] = "ˈ"..syllables[#syllables-1] end
		end
	end

    table.insert(debug, word)

	word = table.concat(syllables)
	
	--back-replace
    word = mw.ustring.gsub(word,"ĵ","d͡ʒ") 
	
    --secondary stress
	word = mw.ustring.gsub(word, "ˈ(.+)ˈ", "ˌ%1ˈ")
	word = mw.ustring.gsub(word, "ˈ(.+)ˌ", "ˌ%1ˌ")
	word = mw.ustring.gsub(word, "ˌ(.+)ˈ(.+)ˈ", "ˌ%1ˌ%2ˈ")

   table.insert(debug,word)
   
   --remove "j" and "w" inserted on vowel pair starting with "i" and "u"
   word = mw.ustring.gsub(word,"([ií])([ˈˌ]?)j([aáeéoóuú])","%1%2%3")
   word = mw.ustring.gsub(word,"([uú])([ˈˌ]?)w([aáéeií])","%1%2%3")
   
   table.insert(debug,word)

	-- Change the semivowels /j/ or /w/ to /i/ or /u/ (part of diphthongs).
	word = mw.ustring.gsub(word,"j([ˈˌ]?)([bdɡjklmnŋpɾstwʔ])","i%1%2")
	word = mw.ustring.gsub(word,"j$","i") 
	word = mw.ustring.gsub(word,"w([ˈˌ]?)([bdɡjklmnŋpɾstwʔ])","u%1%2")
	word = mw.ustring.gsub(word,"w$","u")

   table.insert(debug,word)

      --Corrections for diphthongs
	word = mw.ustring.gsub(word,"([aá])([i])","ai") --ay
	word = mw.ustring.gsub(word,"([aá])([u])","au") --aw
	word = mw.ustring.gsub(word,"([ií])u","iu") --iw
	word = mw.ustring.gsub(word,"([oó])u","ou") --ow


   table.insert(debug,word)

	--Phonemic to phonetic/allophonic transcription
    if phonetic then

	    table.insert(debug,word)

        --turn phonemic diphthongs to phonetic diphthongs

	    word = mw.ustring.gsub(word,"([aá])i","aɪ̯") --ay
	    word = mw.ustring.gsub(word,"([aá])u","aʊ̯") --aw
	    word = mw.ustring.gsub(word,"([oó])i","oɪ̯") --oy
	    word = mw.ustring.gsub(word,"([eé])i","eɪ̯") --ey
	    word = mw.ustring.gsub(word,"([ií])[u]","ɪʊ̯") --iw

	    table.insert(debug, word)

        --replace unstressed vowels
	    word = mw.ustring.gsub (word,"a","ʌ")
	    word = mw.ustring.gsub (word,"e","ɪ")
	    word = mw.ustring.gsub (word,"i","ɪ")
	    word = mw.ustring.gsub (word,"o","ʊ")
	    word = mw.ustring.gsub (word,"u","ʊ")

        --remove accents
        word = mw.ustring.gsub(word,"[áàâéèêíìîóòôúùû]",{
        ["á"] = "a", ["â"] ="aʔ", ["à"] = "ʌʔ", 
        ["é"] ="i", ["ê"] = "iʔ", ["è"] ="ɪʔ", 
        ["í"]="i", ["î"] = "iʔ", ["ì"] = "ɪʔ", 
        ["ó"] = "u", ["ô"] = "uʔ", ["ò"]="ʊʔ", 
        ["ú"]="u", ["û"] ="uʔ", ["ù"] = "ʊʔ"
        })

        table.insert(debug,word)
        
        --remove "j" and "w" inserted on vowel pair starting with "i" and "u"
        word = mw.ustring.gsub(word,"([ɪi])([ˈˌ]?)j([ʌaeoʊu])","%1%2%3")
        word = mw.ustring.gsub(word,"([ʊu])([ˈˌ]?)w([ʌaeɪio])","%1%2%3")
        
        table.insert(debug,word)

        --Combine consonants (except H) followed by I/U and certain stressed vowels
	    word = mw.ustring.gsub(word,"([bdɡklmnpɾst])([ɪi])([ˈˌ])([ʌaeoʊu])","%3%1j%4")
	    word = mw.ustring.gsub(word,"([bdɡklmnpɾst])([ʊu])([ˈˌ])([ʌaeɪi])","%3%1w%4")


        table.insert(debug,word)

        word = mw.ustring.gsub(word,"n([ˈˌ]?)k","ŋ%1k") -- /n/ before /k/ (some proper nouns)
        word = mw.ustring.gsub(word,"n([ˈˌ]?)ɡ","ŋ%1ɡ") -- /n/ before /ɡ/ (some proper nouns and loanwords)
        word = mw.ustring.gsub(word,"n̪([ˈˌ]?)h","ŋ%1h") -- /n/ before /h/ (some proper nouns)
        word = mw.ustring.gsub(word,"n([ˈˌ]?)m","m%1m") -- /n/ before /m/
	    
        --final fix for phonetic diphthongs

	    word = mw.ustring.gsub(word,"([aʌ])ɪ̯","aɪ̯") --ay
	    word = mw.ustring.gsub(word,"([aʌ])ʊ̯","aʊ̯") --aw
	    word = mw.ustring.gsub(word,"([eɪi])ɪ̯","eɪ̯") --ey
	    word = mw.ustring.gsub(word,"([ʊu])ɪ̯","oɪ̯") --oy

	    
	    table.insert(debug,word)
        
        --Change /ʌ/, /ɪ/ and /ʊ/ back to /a/, /i/ and /u/ in penultimate
	    word = mw.ustring.gsub(word,"([ˈˌ])([ʔ bdɡhklmnŋpɾstwj])([lɾstj]?)ʌ","%1%2%3a")
	    word = mw.ustring.gsub(word,"([ˈˌ])([ʔ bdɡhklmnŋpɾstwj])([lɾstj]?)ɪ","%1%2%3i")
	    word = mw.ustring.gsub(word,"([ˈˌ])([ʔ bdɡhklmnŋpɾstwj])([lɾstj]?)ʊ","%1%2%3u")

table.insert(debug,word)
        
        --Coalesce [wʊ] to [w]
	    word = mw.ustring.gsub(word,"([w])([ʊ])","%1")

	    table.insert(debug,word)

 --turn phonemic consonants to Cebuano dental consonants

	    word = mw.ustring.gsub(word,"d","d̪")
	    word = mw.ustring.gsub(word,"l","l̪")
	    word = mw.ustring.gsub(word,"n","n̪")
	    word = mw.ustring.gsub(word,"ɾ","ɾ̪")
	    word = mw.ustring.gsub(word,"s","s̪")
	    word = mw.ustring.gsub(word,"t","t̪")

    end

    table.insert(debug,word)

    --remove accents
    word = mw.ustring.gsub(word,"[áàâéèêíìîóòôúùû]",{
    ["á"] = "a", ["â"] ="aʔ", ["à"] = "aʔ", 
    ["é"] ="ɛ", ["ê"] = "ɛʔ", ["è"] ="ɛʔ", 
    ["í"] ="i", ["î"] = "iʔ", ["ì"] = "iʔ", 
    ["ó"] = "o", ["ô"] = "oʔ", ["ò"] = "oʔ", 
    ["ú"] ="u", ["û"] = "uʔ", ["ù"] = "uʔ"
    })

    return word .. (do_debug == "yes" and table.concat(debug, "") or "")

end

function export.phonetic(frame)
	return export.show(frame, true)
end

return export