မဝ်ဂျူ:cmn-pron-Nanjing
မံက်ပြာကတ်
Documentation for this module may be created at မဝ်ဂျူ:cmn-pron-Nanjing/doc
-- Nanjing mandarin
local export = {}
-- see the encoding below
local initials = {
b = "p", p = "pʰ", m = "m", f = "f",
d = "t", t = "tʰ", l = "l",
g = "k", k = "kʰ", h = "x",
j = "t͡ɕ", q = "t͡ɕʰ", x = "ɕ",
Z = "ʈ͡ʂ", C = "ʈ͡ʂʰ", S = "ʂ", r = "ʐ",
z = "t͡s", c = "t͡sʰ", s = "s",
[""] = "",
}
-- see the encoding below (U=ü, N=ng)
local finals = {
ii = "z̩", iU = "ʐ̩", i = "i", u = "u", U = "y",
a = "a", ia = "ia", ua = "ua",
o = "o",
e = "e", E = "ə", ie = "ie", Ue = "ye",
ai = "ɛ", iai = "iɛ", uai = "uɛ",
ei = "əi", ui = "uəi",
ao = "ɔ", iao = "iɔ",
ou = "əɯ", iu = "iəɯ",
an = "ã", ian = "iã", uan = "uã",
ien = "iẽ", Uen = "yẽ",
en = "ə̃", ["in"] = "ĩ", un = "uə̃", Un = "yĩ",
on = "oŋ", ion = "ioŋ",
iUq = "ʐ̩ʔ", iq = "iʔ", uq = "uʔ", Uq = "yʔ",
aq = "aʔ", iaq = "iaʔ", uaq = "uaʔ",
eq = "əʔ", ieq = "ieʔ", ueq = "ueʔ", Ueq = "yeʔ",
oq = "oʔ", ioq = "ioʔ",
er = "ər", ir = "iər", ur = "uər", Ur = "yər",
ar = "ar", iar = "iar", uar = "uar",
["or"] = "or", ior = "ior",
ier = "ier",
air = "ɛr", iair = "iɛr", uair = "uɛr",
aor = "ɔr", iaor = "iɔr",
anr = "ãr", ianr = "iãr", uanr = "uãr",
enr = "ɵ̃r",
m = "m̩", n = "n̩", N = "ŋ̍",
}
local tones = {
["1"] = "31", --陰平(T1)
["2"] = "24", --陽平(T2)
["3"] = "11", --上(T3)
["4"] = "44", --去(T4)
["5"] = "5", --入(T5)
["0"] = "", -- toneless (T0)
["11"]="33",
["25"]="11", ["20"]="11",
["31"]="12", ["33"]="12",
["45"]="42",
["55"]="3",
}
local function tone_superscript(text)
text = text:gsub("[1-5]",{['1']='¹',['2']='²',['3']='³',['4']='⁴',['5']='⁵'})
return text
end
local tone_sandhi_num = {
["11"]="4",
["25"]="3", ["20"]="3",
["31"]="2", ["33"]="2",
["45"]="1",
}
-- internal use, encode and decode digraphs
local digraph_encode = {
ng = "N", zh = "Z", ch = "C", sh = "S",
["\204\128"] = "1",
["\204\129"] = "2",
["\204\140"] = "3",
["\204\132"] = "4",
["\204\138"] = "5",
}
local digraph_decode = {
N = "ng", Z = "zh", C = "ch", S = "sh", U = "ü",
["0"] = "",
["1"] = "\204\128",
["2"] = "\204\129",
["3"] = "\204\140",
["4"] = "\204\132",
["5"] = "\204\138",
["6"] = '<span style="background-color:#F5DEB3">',
["7"] = "</span>",
}
local function encode(text)
text = mw.ustring.toNFD(text)
:gsub("[A-Z]",function(c) return "^"..c:lower() end)
:gsub("u\204\136","U")
:gsub("[bpnzcs\204][vfgh\128\129\132\138\140]",digraph_encode)
:gsub("n([1-5])g","N%1")
return text
end
local function decode_error(text)
text = text:gsub("[NZCSU]",digraph_decode)
return text
end
local function decode(text)
text = text
:gsub("N([0-5])","n%1g")
:gsub("[NZCSU1-7]",digraph_decode)
:gsub("%^([a-z])",string.upper)
return mw.ustring.toNFC(text)
end
-- check that the text is a valid input e.g. ^lan2jin1 ^beq5hua4
local function check_syllable_format(text)
local check = text:gsub("[ /]?[%^>]?[bpmfdtlgkhjqxZCSrzcsyw]?[aeiouUmnN][aeiou]*[nq]?r?[0-5]","")
if check ~= "" then error("Nanjing: Invalid syllable(s): "..check) end
end
-- inverse of py_divide_syllables
local function py_join_syllables(text)
text = text
:gsub("([bpmfdtlgkhjqxZCSrzcsyw]?)([aeiouUmnN][aeiou]*[nq]?r?)([0-5])", function(a,b,c)
local d,e = b:match("^([iuU]?[aeiouU])(%l*)$")
if d then
return "'"..a..d..(c~="0" and c or "")..e
else
return "'"..a..b..(c~="0" and c or "")
end
end)
:gsub("'([bpmfdtlgkhjqxZCSrzcsyw][aeiouU])","%1")
:gsub("%f[^ %z](6?)'","%1")
return decode(text)
end
-- Lánjìn Be̊qhuā --> ^lan2jin1 ^beq5hua4
local function py_divide_syllables(text)
local res = encode(text)
:gsub("([aeiouU1-5])N%f[aeiouU]","%1n'g")
:gsub("'?([bpmfdtlgkhjqxZCSrzcsyw][aeiouU])","'%1")
:gsub("'?([bpmfdtlgkhjqxZCSrzcsyw]?[iuU]?[aeiouUmnN])([1-5]?)([aeiou]*[nq]?r?)",
function(a,b,c) return a..c..(b~="" and b or "0") end)
check_syllable_format(res)
local check = py_join_syllables(res)
if text ~= check then error("Nanjing: input should be "..check) end
return res
end
local function py_numbered(text)
text = text:gsub("[0-5]","<sup>%0</sup>")
:gsub("[NZCSU67]",digraph_decode)
return text
end
-- canonize to adhere to pinyin rules, e.g. jü -> ju
local function py_canonize(text)
text = text
:gsub("([jqx])U","%1u")
:gsub("%f[%l%u]u[in]?",{u="w",ui="wei",un="wen"})
:gsub("%f[%l%u]w%f[qr0-5]","wu")
:gsub("%f[%l%u]i[uU]?",{i="y",iu="you",iU="rii"})
:gsub("%f[%l%u]y%f[nqr0-5]","yi")
:gsub("iU","ii")
:gsub("%f[%l%u]U","yu")
:gsub("([ZCSr])i%f[qr0-5]","%1ii") -- give error for zhi
:gsub("E","e")
return text
end
-- normalize to initial+final, e.g. ju -> jü
local function py_normalize(text)
local res = text
:gsub("([jqx])u","%1U")
:gsub("w[ue][in]?",{wu="u",wei="ui",wen="un"})
:gsub("w","u")
:gsub("%f[%l%u]y[iuo]u?",{yi="i",yu="U",you="iu"})
:gsub("%f[%l%u]y","i")
:gsub("([ZCSr])ii","%1iU")
:gsub("riU%f[q0-5]","iU")
:gsub("([bpmfdtlgkhjqxZCSrzcs])e0","%1E0")
local check = py_canonize(res)
if text ~= check then
error("Nanjing: "..decode_error(text).." should be "..decode_error(check))
end
return res
end
local function py_to_ipa(text)
text = text:gsub("[^ ]+",function(syllable)
local a,b,c,d = syllable:match("^([bpmfdtlgkhjqxZCSrzcs]?)([aeiouUEmnN][aeiouU]*[nq]?r?)([0-5])([0-5]?)$")
if not a then error("Nanjing: Invalid syllable: " .. decode_error(syllable)) end
local e = d~="" and tones[c..d]
return (initials[a] or error("Nanjing: Invalid initial: " .. decode_error(a)))
.. (finals[b] or error ("Nanjing: Invalid final: " .. decode_error(b)))
.. tones[c]
.. (e and ("⁻"..e) or "")
end)
return "/" .. text .. "/"
end
-- returns (display_text, phonetic_text, ipa)
function export.py_process(text)
local conv_display = {}
local conv_hidden = {}
local conv_numbered = {}
local conv_ipa = {}
local i = 0
for reading in mw.text.gsplit(text,"/",true) do
i = i + 1
reading = py_divide_syllables(reading)
conv_display[i] = py_join_syllables(reading:gsub(">([a-zZCSUN]+[0-5])","<sup>→%1</sup>"))
local original = reading:gsub("([a-zZCSUN]+[0-5])>[a-zZCSUN]+[0-5]","%1")
local phonetic = reading:gsub("[a-zZCSUN]+[0-5]>([a-zZCSUN]+[0-5])","6%17")
phonetic = phonetic:gsub("%^","")
reading = phonetic:gsub("%f[^0-5](7?6?[a-zZCSUN]+)([0-5])","%2%1%2")
phonetic = reading:gsub("([a-zZCSUN]+)([0-5])([0-5])",function(a,b,c)
local d = tone_sandhi_num[b..c]
return d and ('6'..a..d..'7') or (a..b)
end)
phonetic = phonetic:gsub("([a-zZCS][a-zU]+)r3","6%1r27")
phonetic = phonetic:gsub("6+","6"):gsub("7+","7")
reading = reading:gsub("([a-zZCS][a-zU]+)r3","%1r2"):gsub("r2[0-5]","r2")
local original_num = original:gsub("([0-5]) ?","%1 "):gsub(" $",""):gsub("%^","")
local phonetic_num = phonetic:gsub("([0-5]7?) ?","%1 "):gsub(" $","")
if phonetic:find("6") then
conv_hidden[i] = py_join_syllables(original) .. " [Phonetic: " .. py_join_syllables(phonetic) .. "]"
conv_numbered[i] = py_numbered(original_num) .. " [Phonetic: " .. py_numbered(phonetic_num) .. "]"
else
conv_hidden[i] = py_join_syllables(original)
conv_numbered[i] = py_numbered(original_num)
end
reading = reading:gsub("[67]",""):gsub("([0-5][0-5]?) ?","%1 "):gsub(" $","")
reading = py_normalize(reading)
conv_ipa[i] = py_to_ipa(reading)
end
return table.concat(conv_display, " / "),
table.concat(conv_hidden, " / "),
table.concat(conv_numbered, " / "),
tone_superscript(table.concat(conv_ipa, ", "))
end
return export