မဝ်ဂျူ:ceb-pron
မံက်ပြာကတ်
Documentation for this module may be created at မဝ်ဂျူ:ceb-pron/doc
local export = {}
local gsub = mw.ustring.gsub
local len = mw.ustring.len
function export.show(word, phonetic, do_debug)
local debug = {}
if type(word) == 'table' then
do_debug = word.args[4]
word = word.args[1]
end
local orig_word = word
word = mw.ustring.lower(word or mw.title.getCurrentTitle().text)
word = mw.ustring.gsub(word,"[^abcdefghijklmnopqrstuvwxyzáâàéêèíîìóôòúûùñ7_.]","") -- 7 is for glottal stop. Underscore is used as spaces or to break consonant pair that could be treated as digraph.
table.insert(debug,word)
local V = "[aeiouáâàéèêíîìóôòúùû]" -- vowels
local C = "[^aeiouáâàéèêíîìóôòúùû_.]" -- consonants
-- add glottal stop for words starting with
word = mw.ustring.gsub(word, "^([aáeéiíoóuú])", "ʔ%1")
--Substitute C, CH, QU, QUI and X
word = mw.ustring.gsub (word,"c([h])","ts")
word = mw.ustring.gsub (word,"q([u])","k") --Spanish QU only
word = mw.ustring.gsub (word,"x","ks")
--C and G before I and E, and CU plus vowel (proper nouns from Spanish, native words spelled in Spanish only)
--If the original Spanish uses güe/i, please respell to gw.
--Only "gue" and "gui" are replaced to avoid affecting native "ge" and "gi". If spelled with "gi" and "ge", please respell to "hi" and "he"
word = mw.ustring.gsub(word, "c([ieíé])", "s%1")
word = mw.ustring.gsub(word, "cu([aeo])", "kw%1")
word = mw.ustring.gsub(word, "gu([ieíé])", "g%1")
--Glottal stop in word boundary or other places
word = mw.ustring.gsub(word,"7","ʔ")
--Underscore to break consonant cluster or add space
word = mw.ustring.gsub (word,"_", " ")
table.insert(debug,word)
-- letter-to-IPA
word = mw.ustring.gsub (word,"c","k")
word = mw.ustring.gsub (word,"f","p")
word = mw.ustring.gsub (word,"g","ɡ")
word = mw.ustring.gsub (word,"j","ĵ") --Please respell Spanish J to H. Not the real sound. Generally only for dialectal pronunciations, otherwise transcribe foreign J to DY
word = mw.ustring.gsub (word,"ñ", "nj") --
word = mw.ustring.gsub (word,"nɡ","ŋ")
word = mw.ustring.gsub (word,"r","ɾ") --Use this to respell Spanish "rr"
word = mw.ustring.gsub (word,"v","b")
word = mw.ustring.gsub (word,"y","j")
word = mw.ustring.gsub (word,"z","s")
table.insert(debug, word)
--syllable division
for _ = 1, 2 do
word = mw.ustring.gsub(word,"(" .. V .. ")(" .. C .. V .. ")",
"%1.%2")
end
for _ = 1, 2 do
word = mw.ustring.gsub(word,"(" .. V .. C .. ")(" .. C .. V .. ")",
"%1.%2")
end
for _ = 1, 2 do
word = mw.ustring.gsub(word,"(" .. V .. C .. ")(" .. C .. C .. V .. ")",
"%1.%2")
end
word = mw.ustring.gsub(word, "([aáeéiíoóuú])([aáeéiíoóuú])", "%1%2")
word = mw.ustring.gsub(word, "([ií])([ií])", "%1.%2")
word = mw.ustring.gsub(word, "([oóuú])([oóuú])", "%1.%2")
table.insert(debug, word)
--accentuation
local syllables = mw.text.split(word,"%.")
if mw.ustring.find(word,"[áéíóúâêîôû]") then
for i=1,#syllables do
if mw.ustring.find(syllables[i],"[áéíóúâêîôû]") then
syllables[i] = "ˈ"..syllables[i]
end
end
else
if mw.ustring.find(word,"[^aàeèiìoòuùbdɡfjklmnŋpɾstw]$") then
syllables[#syllables] = "ˈ"..syllables[#syllables]
else
if #syllables > 1 then syllables[#syllables-1] = "ˈ"..syllables[#syllables-1] end
end
end
table.insert(debug, word)
word = table.concat(syllables)
--back-replace
word = mw.ustring.gsub(word,"ĵ","d͡ʒ")
--secondary stress
word = mw.ustring.gsub(word, "ˈ(.+)ˈ", "ˌ%1ˈ")
word = mw.ustring.gsub(word, "ˈ(.+)ˌ", "ˌ%1ˌ")
word = mw.ustring.gsub(word, "ˌ(.+)ˈ(.+)ˈ", "ˌ%1ˌ%2ˈ")
table.insert(debug,word)
--remove "j" and "w" inserted on vowel pair starting with "i" and "u"
word = mw.ustring.gsub(word,"([ií])([ˈˌ]?)j([aáeéoóuú])","%1%2%3")
word = mw.ustring.gsub(word,"([uú])([ˈˌ]?)w([aáéeií])","%1%2%3")
table.insert(debug,word)
-- Change the semivowels /j/ or /w/ to /i/ or /u/ (part of diphthongs).
word = mw.ustring.gsub(word,"j([ˈˌ]?)([bdɡjklmnŋpɾstwʔ])","i%1%2")
word = mw.ustring.gsub(word,"j$","i")
word = mw.ustring.gsub(word,"w([ˈˌ]?)([bdɡjklmnŋpɾstwʔ])","u%1%2")
word = mw.ustring.gsub(word,"w$","u")
table.insert(debug,word)
--Corrections for diphthongs
word = mw.ustring.gsub(word,"([aá])([i])","ai") --ay
word = mw.ustring.gsub(word,"([aá])([u])","au") --aw
word = mw.ustring.gsub(word,"([ií])u","iu") --iw
word = mw.ustring.gsub(word,"([oó])u","ou") --ow
table.insert(debug,word)
--Phonemic to phonetic/allophonic transcription
if phonetic then
table.insert(debug,word)
--turn phonemic diphthongs to phonetic diphthongs
word = mw.ustring.gsub(word,"([aá])i","aɪ̯") --ay
word = mw.ustring.gsub(word,"([aá])u","aʊ̯") --aw
word = mw.ustring.gsub(word,"([oó])i","oɪ̯") --oy
word = mw.ustring.gsub(word,"([eé])i","eɪ̯") --ey
word = mw.ustring.gsub(word,"([ií])[u]","ɪʊ̯") --iw
table.insert(debug, word)
--replace unstressed vowels
word = mw.ustring.gsub (word,"a","ʌ")
word = mw.ustring.gsub (word,"e","ɪ")
word = mw.ustring.gsub (word,"i","ɪ")
word = mw.ustring.gsub (word,"o","ʊ")
word = mw.ustring.gsub (word,"u","ʊ")
--remove accents
word = mw.ustring.gsub(word,"[áàâéèêíìîóòôúùû]",{
["á"] = "a", ["â"] ="aʔ", ["à"] = "ʌʔ",
["é"] ="i", ["ê"] = "iʔ", ["è"] ="ɪʔ",
["í"]="i", ["î"] = "iʔ", ["ì"] = "ɪʔ",
["ó"] = "u", ["ô"] = "uʔ", ["ò"]="ʊʔ",
["ú"]="u", ["û"] ="uʔ", ["ù"] = "ʊʔ"
})
table.insert(debug,word)
--remove "j" and "w" inserted on vowel pair starting with "i" and "u"
word = mw.ustring.gsub(word,"([ɪi])([ˈˌ]?)j([ʌaeoʊu])","%1%2%3")
word = mw.ustring.gsub(word,"([ʊu])([ˈˌ]?)w([ʌaeɪio])","%1%2%3")
table.insert(debug,word)
--Combine consonants (except H) followed by I/U and certain stressed vowels
word = mw.ustring.gsub(word,"([bdɡklmnpɾst])([ɪi])([ˈˌ])([ʌaeoʊu])","%3%1j%4")
word = mw.ustring.gsub(word,"([bdɡklmnpɾst])([ʊu])([ˈˌ])([ʌaeɪi])","%3%1w%4")
table.insert(debug,word)
word = mw.ustring.gsub(word,"n([ˈˌ]?)k","ŋ%1k") -- /n/ before /k/ (some proper nouns)
word = mw.ustring.gsub(word,"n([ˈˌ]?)ɡ","ŋ%1ɡ") -- /n/ before /ɡ/ (some proper nouns and loanwords)
word = mw.ustring.gsub(word,"n̪([ˈˌ]?)h","ŋ%1h") -- /n/ before /h/ (some proper nouns)
word = mw.ustring.gsub(word,"n([ˈˌ]?)m","m%1m") -- /n/ before /m/
--final fix for phonetic diphthongs
word = mw.ustring.gsub(word,"([aʌ])ɪ̯","aɪ̯") --ay
word = mw.ustring.gsub(word,"([aʌ])ʊ̯","aʊ̯") --aw
word = mw.ustring.gsub(word,"([eɪi])ɪ̯","eɪ̯") --ey
word = mw.ustring.gsub(word,"([ʊu])ɪ̯","oɪ̯") --oy
table.insert(debug,word)
--Change /ʌ/, /ɪ/ and /ʊ/ back to /a/, /i/ and /u/ in penultimate
word = mw.ustring.gsub(word,"([ˈˌ])([ʔ bdɡhklmnŋpɾstwj])([lɾstj]?)ʌ","%1%2%3a")
word = mw.ustring.gsub(word,"([ˈˌ])([ʔ bdɡhklmnŋpɾstwj])([lɾstj]?)ɪ","%1%2%3i")
word = mw.ustring.gsub(word,"([ˈˌ])([ʔ bdɡhklmnŋpɾstwj])([lɾstj]?)ʊ","%1%2%3u")
table.insert(debug,word)
--Coalesce [wʊ] to [w]
word = mw.ustring.gsub(word,"([w])([ʊ])","%1")
table.insert(debug,word)
--turn phonemic consonants to Cebuano dental consonants
word = mw.ustring.gsub(word,"d","d̪")
word = mw.ustring.gsub(word,"l","l̪")
word = mw.ustring.gsub(word,"n","n̪")
word = mw.ustring.gsub(word,"ɾ","ɾ̪")
word = mw.ustring.gsub(word,"s","s̪")
word = mw.ustring.gsub(word,"t","t̪")
end
table.insert(debug,word)
--remove accents
word = mw.ustring.gsub(word,"[áàâéèêíìîóòôúùû]",{
["á"] = "a", ["â"] ="aʔ", ["à"] = "aʔ",
["é"] ="ɛ", ["ê"] = "ɛʔ", ["è"] ="ɛʔ",
["í"] ="i", ["î"] = "iʔ", ["ì"] = "iʔ",
["ó"] = "o", ["ô"] = "oʔ", ["ò"] = "oʔ",
["ú"] ="u", ["û"] = "uʔ", ["ù"] = "uʔ"
})
return word .. (do_debug == "yes" and table.concat(debug, "") or "")
end
function export.phonetic(frame)
return export.show(frame, true)
end
return export