မဝ်ဂျူ:zlw-lch-IPA
မံက်ပြာကတ်
--[[
TODO: Decide on whether we want the Northern Borderlands dialect.
The general consensus is to including it by doing the consonant subsitution and put the transcription in brackets
Also the SBD should be included
--]]
local export = {}
local u = mw.ustring.char
local rfind = mw.ustring.find
local rsubn = mw.ustring.gsub
local ulen = mw.ustring.len
local ulower = mw.ustring.lower
local uupper = mw.ustring.upper
local usub = mw.ustring.sub
-- version of rsubn() that discards all but the first return value
local function rsub(term, foo, bar)
local retval = rsubn(term, foo, bar)
return retval
end
local OVERTIE = u(0x361) -- COMBINING DOUBLE INVERTED BREVE
--[[
As can be seen from the last lines of the function, this returns a table of transcriptions,
and if do_hyph, also a string being the hyphenation. These are based on a single spelling given,
so the reason why the transcriptions are multiple is only because of the -yka alternating stress
et sim. This only accepts single-word terms. Multiword terms are handled by multiword().
--]]
local function phonemic(txt, do_hyph, lang, is_prep, period)
local ante = 0
local unstressed = is_prep or false
local colloquial = true
function tsub(s, r)
txt, c = rsubn(txt, s, r)
return c > 0
end
function lg(s) return s[lang] or s[1] end
function tfind(s) return rfind(txt, s) end
-- Save indices of uppercase characters before setting everything lowercase.
local uppercase_indices
if do_hyph then
uppercase_indices = {}
local capitals = ("[A-Z%s]"):format(lg {
pl = "ĄĆĘŁŃÓŚŹŻ",
mpl = "ĄÁÅĆĘÉŁḾŃÓṔŚẂŹŻ",
szl = "ÃĆŁŃŌŎÔÕŚŹŻ",
csb = "ÔÒÃËÙÉÓĄŚŁŻŹĆŃ",
slv = "ÃÉËÊÓÕÔÚÙŃŻ",
mas = "ÁÄÉŁŃÓÔŚÛŸŻŹ"
})
if tfind(capitals) then
local i = 1
local str = rsub(txt, "[.'^*+]", "")
while rfind(str, capitals, i) do
local r, _ = rfind(str, capitals, i)
table.insert(uppercase_indices, r)
i = r + 1
end
end
if #uppercase_indices == 0 then
uppercase_indices = nil
end
end
txt = ulower(txt)
-- Prevent palatisation of the special case kwazi-.
tsub("^kwazi", "kwaz-i")
-- falling diphthongs <au> and <eu>, and diacriticised variants
tsub(lg { "([ae])u", mpl = "([aáåeé])u", csb = "([ae])ù" }, "%1U")
-- rising diphthongs with <iV>
local V = lg { pl = "aąeęioóuy", mpl = "aąáåeęéioóuy", szl = "aãeéioōŏôõuy", csb = "ôòãëùéóąeyuioa", slv = "aãeéëêioóõôuúùyăĭŏŭŭùy̆ā", mas="aáäeéioóôuûÿ" }
tsub(("([^%s])" .. (lang == "slv" and "j" or "i") .. "([%s])"):format(V, V), "%1I%2")
if txt:find("^*") then
-- The symbol <*> before a word indicates it is unstressed.
unstressed = true
txt = txt:sub(2)
elseif txt:find("^%^+") then
-- The symbol <^> before a word indicates it is stressed on the ante-penult,
-- <^^> on the ante-ante-penult, etc.
ante = txt:gsub("(%^).*", "%1"):len()
txt = txt:sub(ante + 1)
elseif txt:find("^%+") then
-- The symbol <+> indicates the word is stressed regularly on the penult. This is useful
-- for avoiding the following checks to come into place.
txt = txt:sub(2)
else
if tfind(".+[łlb][iy].+") then
-- Some words endings trigger stress on the ante-penult or ante-ante-penult regularly.
if tfind("liśmy$") or tfind("[bł]yśmy$") or tfind("liście$") or tfind("[bł]yście$") then
ante = 2
elseif tfind("by[mś]?$") and not tfind("ła?by[mś]?$") then
ante = 1
colloquial = false
end
end
if tfind(".+[yi][kc].+") then
local endings = lg {
{ "k[aąęio]", "ce", "kach", "kom" },
szl = { "k[aãio]", "ce", "kacj", "kōm", "kach" },
csb = { "k[aeąãùóoô]", "ce", "kacj", "kóm", "kama", "kach" },
slv = { "k[aãêúóoôõ]", "cê", "kacj", "kji", "kóm", "kóma", "kamy", "kach" }
}
for _, v in ipairs(endings) do
if tfind(("[yi]%s$"):format(v)) then
ante = 1
end
end
end
if lang == "mpl" then
if tfind(".+[yi]j.+") then
local endings = lg {
mpl = { "[ąåéo]", "[ée]j", "[áo]m", "ach" },
}
for _, v in ipairs(endings) do
if tfind(("[yi]j%s$"):format(v)) then
ante = 1
end
end
end
end
end
-- TODO: mpl, csb, szl, slv, mas
if not txt:find("%.") then
-- Don't recognise affixes whenever there's only one vowel (or dipthong).
local _, n_vowels = rsubn(txt, ("[%s]"):format(V), "")
if n_vowels > 1 then
-- syllabify common prefixes as separate
local prefixes = {
"do", "wy", "za", "aktyno", "akusto", "akwa", "anarcho", "andro", "anemo", "antropo", "arachno", "archeo", "archi", "arcy", "areo", "arytmo", "audio", "awio", "balneo", "biblio", "brachy", "broncho", "ceno", "centro", "centy", "chalko", "chiro", "chloro", "chole", "chondro", "choreo", "chromato", "chrysto", "cyber", "cyklo", "cztero", "ćwierć", "daktylo", "decy", "deka", "dendro", "dermato", "diafano", "dwu", "dynamo", "egzo", "ekstra", "elektro", "encefalo", "endo", "entero", "entomo", "ergo", "erytro", "etno", "farmako", "femto", "ferro", "fizjo", "flebo", "franko", "ftyzjo", "galakto", "galwano", "germano", "geronto", "giganto", "giga", "gineko", "giro", "gliko", "gloso", "glotto", "grafo", "granulo", "grawi", "haplo", "helio", "hemato", "hepta", "hetero", "hiper", "histo", "hydro", "info", "inter", "jedno", "kardio", "kortyko", "kosmo", "krypto", "kseno", "logo", "magneto", "między", "niby", "nie", "nowo", "około", "oksy", "onto", "ornito", "para", "pierwo", "pięcio", "pneumo", "poli", "ponad", "post", "poza", "proto", "pseudo", "psycho", "radio", "samo", "sfigmo", "sklero", "staro", "stereo", "tele", "tetra", "wice", "zoo", "żyro", "am[bf]i", "ang[il]o", "ant[ey]", "a?steno", "[be]lasto", "chro[mn]o", "cys?to", "de[rs]mo", "h?ekto", "[gn]eo", "hi[ge]ro", "kontra?", "me[gt]a", "mi[nl]i", "a[efg]ro", "[pt]rzy", "przed?", "wielk?o", "mi?elo", "eur[oy]", "ne[ku]ro", "allo", "astro", "atto", "brio", "heksa", "all?o", "at[mt]o", "a[rs]tro", "br?io", "heksa?", "pato", "ba[tr][oy]", "izo", "myzo", "m[ai]kro", "mi[mzk]o", "chemo", "gono", "kilo", "lipo", "nano", "kilk[ou]", "hem[io]", "home?o", "fi[lt]o", "ma[łn]o", "h[ioy]lo", "hip[ns]?o", "[fm]o[nt]o",
-- <na-, po-, o-, u-> would hit too many false positives
}
for _, v in ipairs(prefixes) do
if tfind("^"..v) then
local _, other_vowels = rsubn(v, ("[%s]"):format(V), "")
if (n_vowels - other_vowels) > 0 then
tsub(("^(%s)"):format(v), "%1.")
break
end
end
end
if do_hyph then
-- syllabify common suffixes as separate
-- TODO: mpl, csb, szl, slv, mas
local suffixes = lg {
pl = {
"nąć",
"[sc]tw[aou]", "[sc]twie", "[sc]tw[eo]m", "[sc]twami", "[sc]twach",
"dztw[aou]", "dztwie", "dztw[eo]m", "dztwami", "dztwach",
"dł[aou]", "dł[eo]m", "dłami", "dłach",
"[czs]j[aeięąo]", "[czs]jom", "[czs]jami", "[czs]jach",
}, szl = {
"nōńć", "dło",
}, csb = {
"nąc", "dło"
}, slv = {
"nõc", "dlô"
}, mas = {
"nóncz", "dło"
}
}
for _, v in ipairs(suffixes) do
if tsub(("(%s)$"):format(v), ".%1") then break end
end
-- syllabify <istka> as /ist.ka/
if txt:find("[iy]st[kc]") then
local endings = lg {
{ "k[aąęio]", "ce", "kach", "kom", "kami" },
szl = { "k[aãio]", "ce", "kami", "kacj", "kacach", "kōma" },
csb = { "k[aãąioôòùó]", "ce", "kami", "kacj", "kacach", },
}
for _, v in ipairs(endings) do
if tsub(("([iy])st(%s)$"):format(v), "%1st.%2") then break end
end
end
end
end
end
-- syllabification
for _ = 0, 1 do
tsub(("([%sU])([^%sU.']*)([%s])"):format(V, V, V), function (a, b, c)
local function find(x) return rfind(b, x) end
local function is_diagraph(thing)
local r = find(thing:format("[crsd]z")) or find(thing:format("ch")) or find(thing:format("d[żź]"))
if lang == "mpl" then return r or find(thing:format("b́")) end
if lang == "slv" then return r or find(thing:format("gh")) end
if lang == "mas" then return r or find(thing:format("rż")) end
return r
end
if ((ulen(b) < 2) or is_diagraph("^%s$")) then
b = "."..b
else
local i = 2
if is_diagraph("^%s") then i = 3 end
if usub(b, i, i):find("^[rlłI-]$") then
b = "."..b
else
b = ("%s.%s"):format(usub(b, 0, i - 1), usub(b, i))
end
end
return ("%s%s%s"):format(a, b, c)
end)
end
local hyph
if do_hyph then
-- Ignore certain symbols and diacritics for the hyphenation.
hyph = txt:gsub("'", "."):gsub("-", "")
if lang == "slv" then
local BREVE = u(0x306)
hyph = rsubn(hyph, "[Iăĭŏŭ" .. BREVE .. "ā]", {
["I"] = "j",
["ă"] = "a", ["ĭ"] = "i", ["ŏ"] = "o",
["ŭ"] = "u", [BREVE] = "", ["ā"] = "a",
})
end
hyph = hyph:lower()
-- Restore uppercase characters.
if uppercase_indices then
-- str_i loops through all the characters of the string
-- list_i loops as above but doesn't count dots
-- array_i loops through the indices at which the capital letters are
local str_i, list_i, array_i = 1, 1, 1
function h_sub(x, y) return usub(hyph, x, y) end
while array_i <= #uppercase_indices do
if h_sub(str_i, str_i) ~= "." then
if list_i == uppercase_indices[array_i] then
hyph = ("%s%s%s"):format(h_sub(1,str_i-1), uupper(h_sub(str_i,str_i)), h_sub(str_i+1))
array_i = array_i + 1
end
list_i = list_i + 1
end
str_i = str_i + 1
end
end
end
tsub("'", "ˈ")
-- handle digraphs
tsub("ch", "x")
tsub("[crs]z", { ["cz"]="t_ʂ", ["sz"]="ʂ" })
if lang ~= "mas" then tsub("rz", "R") end
tsub("d([zżź])", "d_%1")
if lang == "mpl" then tsub("b́", "bʲ") end
if lang == "slv" then tsub("gh", "ɣ") end
if lang == "mas" then tsub("rż", "R") end
-- basic orthographical rules
-- not using lg() here for speed
if lang == "pl" then
tsub(".", {
-- vowels
["e"]="ɛ", ["o"]="ɔ",
["ą"]="ɔN", ["ę"]="ɛN",
["ó"]="u", ["y"]="ɨ",
-- consonants
["c"]="t_s", ["ć"]="t_ɕ",
["ń"]="ɲ", ["ś"]="ɕ", ["ź"]="ʑ",
["ł"]="w", ["w"]="v", ["ż"]="ʐ",
["g"]="ɡ", ["h"]="x", ["y"]="ɘ"
})
elseif lang == "mpl" then
tsub(".", {
-- vowels
["á"]="ɒ", ["å"]="ɒ",
["ę"]="ɛ̃", ["ą"]="ɔ̃",
["e"]="ɛ", ["o"]="ɔ",
["é"]="e", ["ó"]="o",
["y"]="ɨ",
-- consonants
["ṕ"]="pʲ", -- <b́> has no unicode character and is hence handled above
["ḿ"]="mʲ", ["ẃ"]="vʲ",
["c"]="t_s", ["ć"]="t_ɕ",
["ń"]="ɲ", ["ś"]="ɕ", ["ź"]="ʑ",
["ł"]="ɫ", ["w"]="v", ["ż"]="ʐ",
["g"]="ɡ", ["h"]="x",
})
elseif lang == "szl" then
tsub(".", {
-- vowels
["e"]="ɛ", ["o"]="ɔ",
["ō"]="o", ["ŏ"]="O",
["ô"]="wɔ", ["y"]="ɨ",
["õ"] = "ɔ̃", ["ã"] = "ã",
["y"] = "ɪ",
-- consonants
["c"]="t_s", ["ć"]="t_ɕ",
["ń"]="ɲ", ["ś"]="ɕ", ["ź"]="ʑ",
["ł"]="w", ["w"]="v", ["ż"]="ʐ",
["g"]="ɡ", ["h"]="x",
})
elseif lang == "csb" then
tsub(".", {
-- vowels
["e"]="ɛ", ["é"]="e", ["o"]="ɔ",
["ó"]="o", ["ô"]="ɞ", ["ë"]="ɜ",
["ò"]="wɛ", ["y"]="Y", ["ù"]="wu",
["ą"] = "ɔ̃", ["ã"] = "ã",
-- consonants
["c"]="t_s", ["ć"]="t_ɕ",
["ń"]="ɲ", ["ś"]="ɕ", ["ź"]="ʑ",
["ł"]="w", ["w"]="v", ["ż"]="ʒ",
["g"]="ɡ", ["h"]="x",
})
elseif lang == "slv" then
tsub(".", {
-- vowels
["e"]="ɛ", ["é"]="e", ["o"]="ɔ",
["ó"]="o", ["ô"]="ɵ", ["ë"]="ə", ["ê"]="E",
["y"]="ɪ", ["ú"]="ʉ", ["ù"]="y",
["õ"] = "ɔ̃", ["ã"] = "ã",
["ă"]="ă", ["ĭ"]="ĭ", ["ŏ"]="ŏ", ["ŭ"]="ŭ", ["ù̆"]="y̆", ["ā"]="aː",
-- consonants
["c"]="t_s",
["ń"]="n",
["w"]="v", ["ż"]="ʒ",
["g"]="ɡ", ["h"]="x",
})
elseif lang == "mas" then
tsub(".", {
-- vowels
["á"]="ɒ", ["ä"]="æ", ["e"]="ɛ", ["é"]="e",
["o"]="ɔ", ["ó"]="o", ["ô"]="wɔ",
["ÿ"]="Y", ["û"]="wu",
-- consonants
["c"]="t_s",
["ń"]="ɲ", ["ś"]="ʃ",
["ł"]="w", ["w"]="v", ["ż"]="ʒ",
["g"]="ɡ", ["h"]="x", ["ź"]="ʑ",
})
end
if lang == "csb" then
tsub("ʂ", "ʃ")
tsub("ʐ", "ʒ")
end
if lang == "slv" then
tsub("ʂ", "ʃ")
tsub("ʐ", "ʒ")
end
if lang == "mas" then
tsub("ʂ", "ʃ")
tsub("ʐ", "ʒ")
end
-- palatalisation
local palatise_into = { ["n"] = "ɲ", ["s"] = "ɕ", ["z"] = "ʑ" }
tsub("([nsz])I", function (c) return palatise_into[c] end)
tsub("([nsz])i", function (c) return palatise_into[c] .. "i" end)
-- voicing and devoicing
local T = "pftsʂɕkxʃx"
local D = "bdzʐʑɡʒɣ"
tsub(("([%s][.ˈ]?)v"):format(T), "%1f")
tsub(("([%s][.ˈ]?)R"):format(T), "%1S")
if lang == "slv" then
tsub(("([%s][.ˈ]?)ɣ"):format(T), "%1x")
tsub(("([%s][.ˈ]?)x"):format(D), "%1ɣ")
end
local function arr_list(x)
local r = ""
for i in pairs(x) do
r = r .. i
end
return r
end
local devoice = {
["b"] = "p", ["d"] = "t", ["ɡ"] = "k",
["z"] = "s", ["v"] = "f",
["ʑ"] = "ɕ", ["ʐ"] = "ʂ", ["R"] = "S",
["ʒ"] = "ʃ"
}
if lang == "slv" then
devoice["ɣ"] = "x"
end
local mpl_J = lg { "", mpl = "ʲ?" }
local arr_list_devoice = arr_list(devoice)
if not is_prep then
tsub(("([%s])(%s)$"):format(arr_list_devoice, mpl_J), function (a, b)
return devoice[a] .. (type(b) == "string" and b or "")
end)
end
if lang == "csb" then
tsub("Y", "i")
tsub("S", "r̝")
tsub("R", "r̝")
tsub("po", "pwo")
tsub("bo", "bwo")
tsub("mo", "mwo")
tsub("fo", "fwo")
tsub("vo", "vwo")
tsub("ko", "kwo")
tsub("ɡo", "ɡwo")
tsub("xo", "xwo")
tsub("vw", "w")
end
if lang == "slv" then
local V = "aɛeɔ̃oɵəEɪiʉuyã"
tsub("S", "r̝")
tsub("R", "r̝")
tsub("nj$", "n")
tsub("nj([^" .. V .. "])", "n%1")
tsub("ɲ([" .. V .. "])", "nj%1")
tsub("ɛ$", "ə")
end
if lang == "mas" then
tsub("Y", "i")
tsub("S", "r̝")
tsub("R", "r̝")
end
if lang ~= "mpl" then
tsub("S", "ʂ")
tsub("R", "ʐ")
end
local voice = {}
for i, v in pairs(devoice) do
voice[v] = i
end
local new_text
local devoice_string = ("([%s])(%s[._ˈ]?[%s])"):format(arr_list_devoice, mpl_J, T)
local voice_string = ("([%s])(%s[._ˈ]?[%s])"):format(arr_list(voice), mpl_J, D)
local function devoice_func(a, b) return devoice[a] .. b end
local function voice_func(a, b) return voice[a] .. b end
while txt ~= new_txt do
new_txt = txt
tsub(devoice_string, devoice_func)
tsub(voice_string, voice_func)
end
if lang == "pl" then
-- nasal vowels
tsub("N([.ˈ]?[pb])", "m%1")
tsub("N([.ˈ]?[ɕʑ])", "ɲ%1")
tsub("N([.ˈ]?[td]_[ɕʑ])", "ɲ%1")
tsub("N([.ˈ]?[tdsz])", "n%1")
tsub("N([.ˈ]?[wl])", "%1")
tsub("ɛN$", "ɛ")
tsub("N", "w̃")
end
-- Hyphen separator, e.g. to prevent palatisation of <kwazi->.
tsub("-", "")
tsub("_", OVERTIE)
tsub("I", "j")
tsub("U", "w")
-- stress
local function add_stress(a)
local s = ""
for _ = 0, a do
s = s .. "[^.]+%."
end
local r = rsub(txt, ("%%.(%s[^.]+)$"):format(s), "ˈ%1")
if not rfind(r, "ˈ") then
r = "ˈ" .. r
end
return (r:gsub("%.", ""))
end
local should_stress = not (unstressed or txt:find("ˈ"))
local prons = should_stress and add_stress(ante) or txt
if is_prep then
prons = prons .. "$"
end
if lang == "pl" then
if should_stress and ante > 0 and colloquial then
local thing = add_stress(0)
if thing ~= prons then
prons = { prons, thing }
end
end
elseif lang == "mpl" then
if tfind("[RS]") then
local mp_early = prons:gsub("[RS]", "r̝")
local mp_late = prons:gsub("R", "ʐ"):gsub("S", "ʂ")
if period == "early" then
prons = mp_early
elseif period == "late" then
prons = mp_late
elseif not period then
prons = {
mp_early, mp_late,
}
else
error(("'%s' is not a supported Middle Polish period, try with 'early' or 'late'."):format(period))
end
end
elseif lang == "szl" then
if tfind("O") then
prons = {
prons:gsub("O", "ɔ"),
prons:gsub("O", "ɔw"),
prons:gsub("O", "ɛw"),
}
end
elseif lang == "slv" then
if tfind("E") then
local V = "aɛeɔ̃oɵəEɪiʉuyã"
prons = prons:gsub("ˈ([^" .. V .. "]*)E", "ˈ%1i̯ɛ")
prons = prons:gsub("E$", "ə")
prons = prons:gsub("E", "ɛ")
end
end
if do_hyph then
return prons, hyph
else
return prons
end
end
-- TODO: This might slow things down if used too much?
local function table_insert_if_absent(t, s)
for _, v in ipairs(t) do
if v == s then return end
end
table.insert(t, s)
end
-- Returns rhyme from a transcription.
local function do_rhyme(pron, lang)
local V = ({ pl = "aɛiɔuɨ", szl = "aãɛiɔɔ̃ouɪ", csb = "aãɛeɜiɔoõɞu", slv = "aãɛəei̯iɔoõɵuʉyɪăĭŏŭŭùy̆ā", mas = "aɒæɛeiɔou"})[lang]
local num_syl = select(2, rsubn(pron, ("[%s]"):format(V), ""))
return {
rhyme = rsub(rsub(rsub(pron, "^.*ˈ", ""), ("^[^%s]-([%s])"):format(V, V), "%1"), "%.", ""),
num_syl = num_syl
}
end
--[[
Handles a single input, returning a table of transcriptions. Returns also a string of
hyphenation and a table of rhymes if it is a single-word term.
--]]
local function multiword(term, lang, period)
if term:find("^%[.+%]$") then
return { phonetic = term }
elseif term:find(" ") then
-- TODO: repeated
function lg(s)
return s[lang] or s[1]
end
local prepositions = lg {
{
"beze?", "na", "dla", "do", "ku",
"nade?", "o", "ode?", "po", "pode?", "przede?",
"przeze?", "przy", "spode?", "u", "we?",
"z[ae]?", "znade?", "zza",
}, szl = {
"bezy?", "na", "dlŏ", "d[oō]", "ku",
"nady?", "ô", "ôdy?", "po", "pody?", "przedy?",
"przezy?", "przi", "spody?", "u", "w[ey]?",
"z[aey]?", "[śs]", "znady?"
}, csb = {
"beze?", "na", "dlô", "do", "kù",
"nade?", "ò", "òde?", "pò", "pòde?", "przede?",
"przeze?", "przë", "spòde?", "ù", "we?", "wew",
"z[ae]?", "zez", "zeza", "zó", "znade?"
}, slv= {
"dlo", "dô", "na", "nade?", "przêde?", "przêze?",
"przë", "pô", "pôde?", "sê?", "vô", "we?", "wôde?",
"wù", "za"
}, mas= {
"dlá", "do", "ku", "na", "nade?", "po", "pode?",
"ponade?", "poza", "prżede?", "prżeze", "prżi",
"we?", "ze?", "za", "ô", "ôde?", "û", "beze?"
}
}
local p
local contains_preps = false
for word in term:gmatch("[^ ]+") do
local is_prep = false
for _, prep in ipairs(prepositions) do
if (rfind(word, ("^%s$"):format(prep))) then
is_prep = true
contains_preps = true
break
end
end
local v = phonemic(word, false, lang, is_prep, period)
local sep = "%s %s"
if p == nil then
p = v
elseif type(p) == "string" then
if type(v) == "string" then
p = sep:format(p, v)
else
p = { sep:format(p, v[1]), sep:format(p, v[2]) }
end
else
if type(v) == "string" then
p = { sep:format(p[1], v), sep:format(p[2], v) }
else
p = { sep:format(p[1], v[1]), sep:format(p[2], v[2]) }
end
end
end
local function assimilate_preps(str)
local function assim(from, to, before)
str = rsub(str, ("%s(%%$ ˈ?[%s])"):format(from, before), to.."%1")
end
local T = "ptsʂɕkx"
assim("d", "t", T)
assim("v", "f", T)
assim("z", "s", T)
if lang == "szl" then
local D = "bdzʐʑɡ"
assim("s", "z", D)
assim("ɕ", "ʑ", D)
end
return rsub(str, "%$", "")
end
if contains_preps then
if type(p) == "string" then
p = assimilate_preps(p)
else
p[1] = assimilate_preps(p[1])
p[2] = assimilate_preps(p[2])
end
end
return p
else
return phonemic(term, lang ~= "mpl", lang, false, period)
end
end
-- This handles all the magic characters <*>, <^>, <+>, <.>, <#>.
local function normalise_input(term, title)
local function check_af(str, af, reg, repl, err_msg)
reg = reg:format(af)
if not rfind(str, reg) then
error(("the word does not %s with %s!"):format(err_msg, af))
end
return str:gsub(reg, repl)
end
local function check_pref(str, pref) return check_af(str, pref, "^(%s)", "%1.", "start") end
local function check_suf(str, suf) return check_af(str, suf, "(%s)$", ".%1", "end") end
if term == "#" then
-- The diesis stands simply for {{PAGENAME}}.
return title
elseif (term == "+") or term:find("^%^+$") or (term == "*") then
-- Inputs that are just '+', '*', '^', '^^', etc. are treated as
-- if they contained the title with those symbols preceding it.
return term .. title
-- Handle syntax like <po.>, <.ka> and <po..ka>. This allows to not respell
-- the entire word when all is needed is to specify syllabification of a prefix
-- and/or a suffix.
elseif term:find(".+%.$") then
return check_pref(title, term:sub(1, -2))
elseif term:find("^%..+") then
return check_suf(title, term:sub(2))
elseif term:find(".+%.%..+") then
return check_suf(check_pref(title, term:gsub("%.%..+", "")), term:gsub(".+%.%.", ""))
end
return term
end
local function sort_things(lang, title, args_terms, args_quals, args_refs, args_period)
local pron_list, hyph_list, rhyme_list, do_hyph = { {}, {}, {} }, { }, { {}, {}, {} }, false
for index, term in ipairs(args_terms) do
term = normalise_input(term, title)
local pron, hyph = multiword(term, lang, args_period)
local qualifiers = {}
if args_quals[index] then
for qual in args_quals[index]:gmatch("[^;]+") do
table.insert(qualifiers, qual)
end
end
local function new_pron(p, additional, dont_refs)
local ret = {
pron = ("/%s/"):format(p),
qualifiers = qualifiers,
refs = not dont_refs and {args_refs[index]},
}
if additional then
local new_qualifiers = {}
for _, v in ipairs(qualifiers) do
table.insert(new_qualifiers, v)
end
table.insert(new_qualifiers, additional)
ret.qualifiers = new_qualifiers
end
return ret
end
local should_rhyme = lang ~= "mpl"
if type(pron) == "string" then
table.insert(pron_list[1], new_pron(pron))
if should_rhyme then
table.insert(rhyme_list[1], do_rhyme(pron, lang))
end
elseif pron.phonetic then
table.insert(pron_list[1], {
pron = pron.phonetic,
qualifiers = qualifiers,
refs = {args_refs[index]},
})
else
local double_trancript = ({
pl = { "prescribed", "casual" },
mpl = { "16<sup>th</sup> c.", "17<sup>th</sup>–18<sup>th</sup> c." },
szl = { nil, "Western", "Głogówek"},
})[lang]
table.insert(pron_list[2], new_pron(pron[1], double_trancript[1]))
table.insert(pron_list[3], new_pron(pron[2], double_trancript[2], true))
if should_rhyme then
table.insert(rhyme_list[2], do_rhyme(pron[1], lang))
table.insert(rhyme_list[3], do_rhyme(pron[2], lang))
end
end
if hyph then
do_hyph = true
if hyph:gsub("%.", "") == title then
table_insert_if_absent(hyph_list, hyph)
end
end
end
-- TODO: looks rather slow.
local function merge_subtables(t)
local r = {}
if #t[2] + #t[3] == 0 then
return t[1]
end
for _, subtable in ipairs(t) do
for _, value in ipairs(subtable) do
table.insert(r, value)
end
end
return r
end
pron_list = merge_subtables(pron_list)
rhyme_list = merge_subtables(rhyme_list)
return pron_list, hyph_list, rhyme_list, do_hyph
end
function export.mpl_IPA(frame)
local args = require("Module:parameters").process(frame:getParent().args, {
[1] = { list = true },
["qual"] = { list = true, allow_holes = true },
["q"] = { list = true, allow_holes = true, alias_of = "qual" },
["period"] = {},
["ref"] = { list = true, allow_holes = true },
["title"] = {}, -- for debugging or demonstration only
})
local terms = args[1]
if #terms == 0 then
terms = { "#" }
end
local lang = require("Module:languages").getByCode("pl")
return ("* %s %s"):format(require("Module:accent qualifier").format_qualifiers(lang, {"Middle Polish"}),
require("Module:IPA").format_IPA_full {
lang = lang,
items = (sort_things(
"mpl",
args.title or mw.title.getCurrentTitle().text,
terms,
args.qual,
args.ref,
args.period
)),
}
)
end
function export.IPA(frame)
local arg_lang = frame.args.lang
local process_args = {
[1] = { list = true },
["qual"] = { list = true, allow_holes = true },
["q"] = { list = true, allow_holes = true, alias_of = "qual" },
["hyphs"] = {}, ["h"] = { alias_of = "hyphs" },
["rhymes"] = {}, ["r"] = { alias_of = "rhymes" },
["audios"] = {}, ["a"] = { alias_of = "audios" },
["homophones"] = {}, ["hh"] = { alias_of = "homophones" },
["ref"] = { list = true, allow_holes = true },
["title"] = {}, -- for debugging or demonstration only
}
if arg_lang == "pl" then
process_args["mp"] = { list = true }
process_args["mp_qual"] = { list = true, allow_holes = true }
process_args["mp_q"] = { list = true, allow_holes = true, alias_of = "mp_qual" }
process_args["mp_period"] = {}
process_args["mp_ref"] = { list = true, allow_holes = true }
end
local args = require("Module:parameters").process(frame:getParent().args, process_args)
local lang = require("Module:languages").getByCode((arg_lang == "slv" or arg_lang == "mas") and ("zlw-" .. arg_lang) or arg_lang)
local terms = args[1]
local title = args.title or mw.title.getCurrentTitle().text
if #terms == 0 then
terms = { "#" }
end
local pron_list, hyph_list, rhyme_list, do_hyph = sort_things(arg_lang, title, terms, args.qual, args.ref)
local mp_prons
if arg_lang == "pl" then
if #args.mp > 0 then
mp_prons = (sort_things("mpl", title, args.mp, args.mp_qual, args.mp_ref, args.mp_period))
end
end
if args.hyphs then
if args.hyphs == "-" then
do_hyph = false
else
hyph_list = {}
for v in args.hyphs:gmatch("[^;]+") do
table.insert(hyph_list, v)
end
do_hyph = true
end
end
if args.rhymes then
if args.rhymes == "-" then
rhyme_list = {}
elseif args.rhymes ~= "+" then
rhyme_list = {}
for v in args.rhymes:gmatch("[^;]+") do
if rfind(v, ".+/.+") then
table.insert(rhyme_list, {
rhyme = rsub(v, "/.+", ""),
num_syl = tonumber(rsub(v, ".+/", "")),
})
else
error(("The manual rhyme %s did not specify syllable number as RHYME/NUM_SYL."):format(v))
end
end
end
end
for ooi, oov in ipairs(rhyme_list) do
oov.num_syl = { oov.num_syl }
for coi = ooi + 1, #rhyme_list do
local cov = rhyme_list[coi]
if oov.rhyme == cov.rhyme then
local add_ns = true
for _, onv in ipairs(oov.num_syl) do
if cov.num_syl == onv then
add_ns = false
break
end
end
if add_ns then
table.insert(oov.num_syl, cov.num_syl)
end
table.remove(rhyme_list, coi)
end
end
end
local m_IPA_format = require("Module:IPA").format_IPA_full
local ret = "*" .. m_IPA_format { lang = lang, items = pron_list }
if mp_prons then
ret = ("%s\n*%s %s"):format(ret,
require("Module:accent qualifier").format_qualifiers(lang, { "Middle Polish" }),
m_IPA_format { lang = lang, items = mp_prons }
)
end
if args.audios then
for v in args.audios:gmatch("[^;]+") do
-- TODO: can I expand a template or is it a bad thing to do?
ret = ("%s\n*%s"):format(ret, require("Module:audio").format_audio {
lang = lang,
file = v:gsub("#", title),
})
end
end
if #rhyme_list > 0 then
ret = ("%s\n*%s"):format(ret, require("Module:rhymes").format_rhymes({ lang = lang, rhymes = rhyme_list }))
end
if do_hyph then
ret = ret .. "\n*"
if #hyph_list > 0 then
local hyphs = {}
for hyph_i, hyph_v in ipairs(hyph_list) do
hyphs[hyph_i] = { hyph = {} }
for syl_v in hyph_v:gmatch("[^.]+") do
table.insert(hyphs[hyph_i].hyph, syl_v)
end
end
ret = ret..require("Module:hyphenation").format_hyphenations {
lang = lang, hyphs = hyphs, caption = "Syllabification"
}
else
ret = ret.."Syllabification: <small>[please specify syllabification manually]</small>"
if mw.title.getCurrentTitle().nsText == "" then
ret = ("%s[[Category:%s-pronunciation_without_hyphenation]]"):format(ret, arg_lang)
end
end
end
if args.homophones then
local homophone_list = {}
for v in args.homophones:gmatch("[^;]+") do
if v:find("<.->$") then
table.insert(homophone_list, {
term = v:gsub("<.->$", ""),
qualifiers = { (v:gsub(".+<(.-)>$", "%1")) },
})
else
table.insert(homophone_list, { term = v })
end
end
ret = ("%s\n*%s"):format(ret, require("Module:homophones").format_homophones {
lang = lang,
homophones = homophone_list,
})
end
return ret
end
return export