မဝ်ဂျူ:eo-pron
မံက်ပြာကတ်
Documentation for this module may be created at မဝ်ဂျူ:eo-pron/doc
local export = {}
local m_str_utils = require("Module:string utilities")
local m_table = require("Module:table")
local audio_module = "Module:audio"
local parse_utilities_module = "Module:parse utilities"
local rfind = m_str_utils.find
local rsplit = m_str_utils.split
local rsubn = m_str_utils.gsub
local ulen = m_str_utils.len
local ulower = m_str_utils.lower
local usub = m_str_utils.sub
local concat = table.concat
local insert = table.insert
local remove = table.remove
local lang = require("Module:languages").getByCode("eo")
local dz = mw.ustring.char(0xF000)
local consonants = {
["b"] = "b",
["c"] = "t͡s",
["ĉ"] = "t͡ʃ",
["d"] = "d",
["dz"] = "d͡z",
["f"] = "f",
["g"] = "ɡ",
["ĝ"] = "d͡ʒ",
["h"] = "h",
["ĥ"] = "x",
["j"] = "j",
["ĵ"] = "ʒ",
["k"] = "k",
["l"] = "l",
["m"] = "m",
["n"] = "n",
["p"] = "p",
["r"] = "r",
["s"] = "s",
["ŝ"] = "ʃ",
["t"] = "t",
["v"] = "v",
["z"] = "z",
['ŭ'] = "w"
}
local vowels = {
["a"] = "a",
["e"] = "e",
["i"] = "i",
["o"] = "o",
["u"] = "u",
}
local phonemes = {}
for k, v in pairs(vowels) do phonemes[k] = v end
for k, v in pairs(consonants) do phonemes[k] = v end
local function rsub(term, foo, bar)
local retval = rsubn(term, foo, bar)
return retval
end
local function flatmap(items, fun)
local new = {}
for _, item in ipairs(items) do
local results = fun(item)
for _, result in ipairs(results) do
m_table.insertIfNot(new, result)
end
end
return new
end
local function generate_obj(respelling)
return { respelling = respelling }
end
local function combine_qualifiers(qual1, qual2)
if not qual1 then
return qual2
end
if not qual2 then
return qual1
end
local qualifiers = m_table.deepCopy(qual1)
for _, qual in ipairs(qual2) do
m_table.insertIfNot(qualifiers, qual)
end
return qualifiers
end
local function split_on_comma(term)
if not term then
return nil
end
if term:find(",%s") or term:find("\\") then
return require(parse_utilities_module).split_on_comma(term)
else
return rsplit(term, ",")
end
end
local function parse_respellings_with_modifiers(respelling, paramname)
if respelling:find("[<%[]") then
local put = require(parse_utilities_module)
local segments = put.parse_multi_delimiter_balanced_segment_run(respelling, { { "<", ">" }, { "[", "]" } })
local comma_separated_groups = put.split_alternating_runs_on_comma(segments)
local retval = {}
for _, group in ipairs(comma_separated_groups) do
local j = 2
while j <= #group do
if not group[j]:find("^<.*>$") then
group[j - 1] = group[j - 1] .. group[j] .. group[j + 1]
table.remove(group, j)
table.remove(group, j)
else
j = j + 2
end
end
local param_mods = {
q = { type = "qualifier" },
qq = { type = "qualifier" },
a = { type = "labels" },
aa = { type = "labels" },
ref = { item_dest = "refs", type = "references" },
}
table.insert(retval, put.parse_inline_modifiers_from_segments {
group = group,
arg = respelling,
props = {
paramname = paramname,
param_mods = param_mods,
generate_obj = generate_obj,
},
})
end
return retval
else
local retval = {}
for _, item in ipairs(split_on_comma(respelling)) do
table.insert(retval, generate_obj(item))
end
return retval
end
end
local function parse_pron_modifier(arg, paramname, generate_obj, param_mods, splitchar)
splitchar = splitchar or ","
if arg:find("<") then
param_mods.q = { type = "qualifier" }
param_mods.qq = { type = "qualifier" }
param_mods.a = { type = "labels" }
param_mods.aa = { type = "labels" }
param_mods.ref = { item_dest = "refs", type = "references" }
return require(parse_utilities_module).parse_inline_modifiers(arg, {
param_mods = param_mods,
generate_obj = generate_obj,
paramname = paramname,
splitchar = splitchar,
})
else
local retval = {}
local split_arg = splitchar == "," and split_on_comma(arg) or rsplit(arg, splitchar)
for _, term in ipairs(split_arg) do
table.insert(retval, generate_obj(term))
end
return retval
end
end
local function parse_audio(lang, arg, pagename, paramname)
local param_mods = {
IPA = {
sublist = true,
},
text = {},
t = {
item_dest = "gloss",
},
gloss = {},
pos = {},
lit = {},
g = {
item_dest = "genders",
sublist = true,
},
bad = {},
cap = {
item_dest = "caption",
},
}
local function process_special_chars(val)
if not val then
return val
end
return (val:gsub("#", pagename))
end
local function generate_audio_obj(arg)
return { file = process_special_chars(arg) }
end
local retvals = parse_pron_modifier(arg, paramname, generate_audio_obj, param_mods, "%s*;%s*")
for _, retval in ipairs(retvals) do
retval.lang = lang
retval.text = process_special_chars(retval.text)
retval.caption = process_special_chars(retval.caption)
local textobj = require(audio_module).construct_audio_textobj(retval)
retval.text = textobj
retval.gloss = nil
retval.pos = nil
retval.lit = nil
retval.genders = nil
end
return retvals
end
local function parse_homophones(arg, paramname)
local function generate_obj(term)
return { term = term }
end
local param_mods = {
t = {
item_dest = "gloss",
},
gloss = {},
pos = {},
alt = {},
lit = {},
id = {},
g = {
item_dest = "genders",
sublist = true,
},
}
return parse_pron_modifier(arg, paramname, generate_obj, param_mods)
end
local function has_vowel(term)
return ulower(term):find("[aeiou]") ~= nil
end
local function letters_to_syllables(letters)
if not letters[2] then
return { [1] = letters[1] }
end
local l_r_exceptions = { ["m"] = true, ["n"] = true, ["ŭ"] = true, ["j"] = true }
local i = 1
while true do
local letter = letters[i]
if not letter then
break
elseif ulower(letter) == "d" then
local letter1 = letters[i + 1]
if not letter1 then
break
end
if ulower(letter1) == "z" then
letter = letter .. letter1
letters[i] = letter
remove(letters, i + 1)
end
end
i = i + 1
end
local result = { [1] = "" }
local j = 1
for i = 1, #letters - 2 do
if not letters[i] then
break
end
result[j] = result[j] .. letters[i]
local letter = letters[i] and ulower(letters[i]) or ""
local letter1 = letters[i + 1] and ulower(letters[i + 1]) or ""
local letter2 = letters[i + 2] and ulower(letters[i + 2]) or ""
if phonemes[letter] then
if consonants[letter1] and vowels[letter2] then
if vowels[letter] or ulen(result[j]) ~= 1 then
if has_vowel(result[j]) and (letter1 ~= 'ŭ') then
j = j + 1
result[j] = ""
end
end
elseif consonants[letter1] and not l_r_exceptions[letter1] and (letter2 == 'l' or letter2 == 'r') and (letter1 ~= 'l' and letter1 ~= 'r') then
if has_vowel(result[j]) then
j = j + 1
result[j] = ""
end
elseif vowels[letter] and (letter1 == 'j' or letter1 == 'ŭ') then
elseif vowels[letter1] then
if has_vowel(result[j]) then
j = j + 1
result[j] = ""
end
end
end
end
if letters[2] then
local c1 = letters[#letters - 1]
local c2 = letters[#letters]
local c1_lower = c1 and ulower(c1) or ""
local c2_lower = c2 and ulower(c2) or ""
if vowels[c1_lower] and (c2_lower == 'j' or c2_lower == 'ŭ') then
result[j] = result[j] .. c1 .. c2
elseif c1_lower == 'ŭ' then
local c0 = letters[#letters - 2]
local c0_lower = c0 and ulower(c0) or ""
if vowels[c0_lower] and vowels[c2_lower] then
result[j] = result[j] .. c1
j = j + 1
result[j] = c2
elseif has_vowel(result[j]) and has_vowel(c1 .. c2) then
j = j + 1
result[j] = c1 .. c2
else
result[j] = result[j] .. c1 .. c2
end
elseif vowels[c1_lower] and vowels[c2_lower] then
result[j] = result[j] .. c1
j = j + 1
result[j] = c2
elseif has_vowel(result[j]) and has_vowel(c1 .. c2) then
j = j + 1
result[j] = c1 .. c2
else
result[j] = result[j] .. c1 .. c2
end
end
local result2 = {}
for i, j in ipairs(result) do
if j and j ~= "" then
insert(result2, j)
end
end
return result2
end
local function string_to_letters(term)
if not term or term == "" then
return {}
end
return mw.text.split(term, "")
end
local function string_to_syllables(term)
if not term or term == "" then
return {}
end
term = term:gsub("%.", "‧")
local split_input = mw.text.split(term, '‧', true)
local result = {}
for _, split in ipairs(split_input) do
for j, syllable in ipairs(letters_to_syllables(string_to_letters(split))) do
insert(result, syllable)
end
end
return result
end
local function letter_to_ipa(letter)
return letter == dz and phonemes.dz or phonemes[ulower(letter)] or ""
end
local function string_to_ipa(syllable)
local result = syllable:gsub("[dD][zZ]", dz)
:gsub("([aeiou])j", "%1i̯")
:gsub("([aeiou])ŭ", "%1u̯")
:gsub("([aeiou])w", "%1u̯")
result = result:gsub("ŭ", "w")
result = result:gsub("Ŭ", "w")
local chars = m_str_utils.explode_utf8(result)
local output = {}
for _, char in ipairs(chars) do
if char:find("̯") then
table.insert(output, char)
else
local ipa = letter_to_ipa(char)
if ipa ~= "" then
table.insert(output, ipa)
else
table.insert(output, char)
end
end
end
return table.concat(output)
end
local function count_syllables(pron)
return 1 + ulen(rsub(rsub(rsub(pron:gsub(" | ", " "), " [ˈˌ]", " "), "^[%[/]?[ˈˌ]", ""), "[^.ˈˌ ]", ""))
end
local function do_rhyme(pron, num_syl)
if pron:find(" ") then
return nil
end
if not num_syl or type(num_syl) ~= "number" or num_syl < 1 then
num_syl = 1
end
local V = "aeiou"
return {
rhyme = rsub(rsub(pron:gsub("^.*ˈ", ""), ("^[^%s]-([%s])"):format(V, V), "%1"), "[.ˌ]", ""),
num_syl = { num_syl },
}
end
local function multiword(term, pagename)
if term:find("^raw:%[.+%]$") then
return { { phonetic = term:gsub("^raw:", "") } }, nil
end
local ipa, syl
term = rsub(term, "%s*,%s*", " | ")
local function process_word_with_stress(word)
if word == "" then
return "", ""
end
local stress_pos = nil
local stress_marker_pos = rfind(word, "'")
if stress_marker_pos then
if stress_marker_pos == 1 then
stress_pos = 1
else
local before_stress = usub(word, 1, stress_marker_pos - 1)
local before_stress_syllables = string_to_syllables(before_stress)
stress_pos = #before_stress_syllables + 1
end
word = rsub(word, "'", "")
end
local hyphenated = string_to_syllables(word)
local word_result = {}
for j, syllable in ipairs(hyphenated) do
word_result[j] = string_to_ipa(syllable)
end
if stress_pos then
if stress_pos >= 1 and stress_pos <= #word_result then
insert(word_result, stress_pos, "ˈ")
elseif word_result[2] then
insert(word_result, #word_result - 1, "ˈ")
end
elseif word_result[2] then
insert(word_result, #word_result - 1, "ˈ")
end
return concat(word_result), concat(hyphenated, "‧")
end
if term:find(" ") then
local ipaparts, sylparts = {}, {}
local words = rsplit(term, " +")
for i, word in ipairs(words) do
if word == "|" then
table.insert(ipaparts, word)
if sylparts[#sylparts] then
sylparts[#sylparts] = sylparts[#sylparts] .. ","
else
sylparts[1] = ","
end
else
local word_ipa, word_syl = process_word_with_stress(word)
if word_ipa ~= "" then
table.insert(ipaparts, word_ipa)
table.insert(sylparts, word_syl)
if i < #words then
table.insert(ipaparts, " ")
table.insert(sylparts, " ")
end
end
end
end
ipa = concat(ipaparts)
syl = concat(sylparts)
else
ipa, syl = process_word_with_stress(term)
end
local num_syl = 1
if syl and syl ~= "" then
num_syl = 0
for _ in syl:gmatch("[^‧]+") do
num_syl = num_syl + 1
end
if num_syl < 1 then num_syl = 1 end
end
local result = { {
pron = ipa,
norhyme = false,
num_syl = num_syl,
} }
while true do
local changed = false
result = flatmap(result, function(item)
if rfind(item.pron, "([^ ‿]*)ˈ([^ ‿]-)ˈ") then
changed = true
return { {
pron = rsub(item.pron, "([^ ‿]*)ˈ([^ ‿]-)ˈ", "%1ˌ%2ˈ"),
a = item.a,
norhyme = item.norhyme,
num_syl = item.num_syl or 1,
} }
else
return { item }
end
end)
if not changed then
break
end
end
return result, syl
end
function export.get_pron_info(terms, pagename, paramname)
if #terms == 1 and terms[1].respelling == "-" then
return {
pron_list = nil,
rhyme_list = {},
hyph_list = {},
hyphen_list = {},
}
end
local pron_list = {}
local rhyme_list = {}
local syl_list = {}
local hyphen_list = {}
local brackets = "/%s/"
for _, term in ipairs(terms) do
local respelling = term.respelling
if not respelling or respelling == "" or respelling == "#" then
respelling = pagename
end
if not respelling or respelling == "" then
respelling = pagename or ""
end
local prons, syl = multiword(respelling, pagename)
for i, pron in ipairs(prons) do
if pron.phonetic then
table.insert(pron_list, {
pron = pron.phonetic,
pron_with_syldivs = pron.phonetic,
q = term.q,
qq = term.qq,
a = term.a,
aa = term.aa,
refs = i == 1 and term.refs or nil,
})
else
local bracketed_pron = brackets:format(pron.pron)
table.insert(pron_list, {
pron = bracketed_pron,
pron_with_syldivs = bracketed_pron,
q = term.q,
qq = term.qq,
a = combine_qualifiers(pron.a, term.a),
aa = term.aa,
refs = i == 1 and term.refs or nil,
})
if not pron.norhyme and ulen(respelling) > 1 then
local num_syl = pron.num_syl
if not num_syl or type(num_syl) ~= "number" or num_syl < 1 then
num_syl = count_syllables(pron.pron)
end
if type(num_syl) ~= "number" or num_syl < 1 then
num_syl = 1
end
local rhyme_obj = do_rhyme(pron.pron, num_syl)
if rhyme_obj and rhyme_obj.num_syl and #rhyme_obj.num_syl > 0 then
table.insert(rhyme_list, rhyme_obj)
end
end
end
end
if syl and ulen(respelling) > 1 then
local syl_normalized = ulower(syl:gsub("‧", ""):gsub(" ", ""))
local pagename_normalized = ulower(pagename):gsub(" ", "")
local respelling_normalized = respelling and ulower(respelling):gsub(" ", ""):gsub("'", "") or ""
if syl:find(" ") or syl_normalized == pagename_normalized or syl_normalized == respelling_normalized then
m_table.insertIfNot(syl_list, syl)
end
end
end
return {
pron_list = pron_list,
syl_list = syl_list,
rhyme_list = rhyme_list,
hyphen_list = hyphen_list,
}
end
function export.show(frame)
local parent_args = frame:getParent().args
local process = require("Module:parameters").process
local lang_obj = lang
local langcode = "eo"
local params = {
[1] = {},
[2] = {},
[3] = {},
["syl"] = true,
["s"] = { alias_of = "syl" },
["rhymes"] = true,
["r"] = { alias_of = "rhymes" },
["audios"] = true,
["a"] = { alias_of = "audios" },
["homophones"] = true,
["hh"] = { alias_of = "homophones" },
["pagename"] = true,
["indent"] = true,
["hyphens"] = true,
["hyph"] = { alias_of = "hyphens" },
["h"] = { alias_of = "hyphens" },
}
local args = process(parent_args, params)
local termspec = args[1] or "#"
local terms = parse_respellings_with_modifiers(termspec, 1)
local pagename = args.pagename or mw.loadData("Module:headword/data").pagename
local indent = args.indent or "*"
local pronobj = export.get_pron_info(terms, pagename, 1)
local syl_list, rhyme_list = pronobj.syl_list, pronobj.rhyme_list
local hyphen_list = {}
local syl_automatic = true
local do_syl
if args.syl then
syl_automatic = false
if args.syl == "-" then
do_syl = false
else
syl_list = split_on_comma(args.syl)
do_syl = true
end
elseif terms[1].respelling == "-" then
do_syl = false
else
do_syl = true
end
-- Handle manual hyphenation override if provided
if args.hyphens then
if args.hyphens == "-" then
hyphen_list = {}
else
hyphen_list = split_on_comma(args.hyphens)
end
else
-- [[Special:WhatLinksHere/Wiktionary:Tracking/eo-pr/no-hyphenation]]
require("Module:debug/track")("eo-pr/no-hyphenation")
end
if args.rhymes then
if args.rhymes == "-" then
rhyme_list = {}
elseif args.rhymes ~= "+" then
rhyme_list = {}
for _, rhyme in ipairs(split_on_comma(args.rhymes)) do
if rfind(rhyme, ".+/.+") then
table.insert(rhyme_list, {
rhyme = rsub(rhyme, "/.+", ""),
num_syl = { tonumber(rsub(rhyme, ".+/", "")) },
})
else
error(("The manual rhyme %s did not specify syllable number as RHYME/NUM_SYL"):format(rhyme))
end
end
end
end
if #rhyme_list > 0 then
local temp_rhyme_list = {}
local indices = {}
for _, rhymeobj in ipairs(rhyme_list) do
local index = indices[rhymeobj.rhyme]
if index == nil then
table.insert(temp_rhyme_list, rhymeobj)
indices[rhymeobj.rhyme] = #temp_rhyme_list
else
local different_num_syl = true
for _, ns in ipairs(temp_rhyme_list[index].num_syl) do
if ns == rhymeobj.num_syl[1] then
different_num_syl = false
break
end
end
if different_num_syl then
table.insert(temp_rhyme_list[index].num_syl, rhymeobj.num_syl[1])
end
end
end
rhyme_list = temp_rhyme_list
end
local m_IPA_format = require("Module:IPA").format_IPA_full
local parts = {}
local function ins(text)
table.insert(parts, text)
end
if pronobj.pron_list then
local formatted = m_IPA_format { lang = lang_obj, items = pronobj.pron_list }
ins(indent .. mw.ustring.toNFC(formatted))
end
if args.audios then
local format_audio = require("Module:audio").format_audio
local audio_objs = parse_audio(lang_obj, args.audios, pagename, "audios")
local num_audios = #audio_objs
for i, audio_obj in ipairs(audio_objs) do
if num_audios > 1 and not audio_obj.caption then
audio_obj.caption = "ရမျာၚ် " .. i
end
ins("\n" .. indent .. " " .. format_audio(audio_obj))
end
end
if #rhyme_list > 0 then
local formatted_rhymes = require("Module:rhymes").format_rhymes { lang = lang_obj, rhymes = rhyme_list }
ins("\n" .. indent .. " " .. mw.ustring.toNFC(formatted_rhymes))
end
if do_syl then
local is_single_letter = false
for _, term in ipairs(terms) do
local respelling = term.respelling
if respelling == "#" or respelling == nil or respelling == "" then
respelling = pagename
end
if respelling and ulen(respelling) <= 1 then
is_single_letter = true
break
end
end
if not is_single_letter then
ins("\n" .. indent .. " ")
if #syl_list > 0 then
local syls = {}
for i, syl in ipairs(syl_list) do
syls[i] = { hyph = {} }
for s in syl:gmatch("[^‧]+") do
table.insert(syls[i].hyph, s)
end
end
ins(require("Module:hyphenation").format_hyphenations {
lang = lang_obj, hyphs = syls, caption = "ပရေၚ်ပါ်ဝဏ္ဏ"
})
else
ins("Syllabification: <small>[please specify syllabification manually]</small>")
if mw.title.getCurrentTitle().nsText == "" then
ins(("[[Category:%s entries with Template:%s-pr without syllabification]]"):format(
lang_obj:getFullName(), langcode))
end
end
if #hyphen_list > 0 then
ins("\n" .. indent .. " ")
local hyphens = {}
for i, hyph in ipairs(hyphen_list) do
hyphens[i] = { hyph = {} }
for part in hyph:gmatch("[^‧]+") do
table.insert(hyphens[i].hyph, part)
end
end
ins(require("Module:hyphenation").format_hyphenations {
lang = lang_obj, hyphs = hyphens, caption = "ဗီုစုတ်ဂၠေံဂၠေံ"
})
end
end
end
if args.homophones then
local homophone_list = parse_homophones(args.homophones, "ဗီုပြၚ်ပ္တိတ်ရမျာၚ်")
ins("\n" .. indent .. " " .. require("Module:homophones").format_homophones {
lang = lang_obj,
homophones = homophone_list,
})
end
return concat(parts)
end
return export