မာတိကာသို့ ခုန်သွားရန်

မဝ်ဂျူ:eo-pron

နူ ဝိက်ရှေန်နရဳ

Documentation for this module may be created at မဝ်ဂျူ:eo-pron/doc

local export = {}

local m_str_utils = require("Module:string utilities")
local m_table = require("Module:table")
local audio_module = "Module:audio"
local parse_utilities_module = "Module:parse utilities"

local rfind = m_str_utils.find
local rsplit = m_str_utils.split
local rsubn = m_str_utils.gsub
local ulen = m_str_utils.len
local ulower = m_str_utils.lower
local usub = m_str_utils.sub

local concat = table.concat
local insert = table.insert
local remove = table.remove

local lang = require("Module:languages").getByCode("eo")
local dz = mw.ustring.char(0xF000)

local consonants = {
	["b"] = "b",
	["c"] = "t͡s",
	["ĉ"] = "t͡ʃ",
	["d"] = "d",
	["dz"] = "d͡z",
	["f"] = "f",
	["g"] = "ɡ",
	["ĝ"] = "d͡ʒ",
	["h"] = "h",
	["ĥ"] = "x",
	["j"] = "j",
	["ĵ"] = "ʒ",
	["k"] = "k",
	["l"] = "l",
	["m"] = "m",
	["n"] = "n",
	["p"] = "p",
	["r"] = "r",
	["s"] = "s",
	["ŝ"] = "ʃ",
	["t"] = "t",
	["v"] = "v",
	["z"] = "z",
	['ŭ'] = "w"
}

local vowels = {
	["a"] = "a",
	["e"] = "e",
	["i"] = "i",
	["o"] = "o",
	["u"] = "u",
}

local phonemes = {}

for k, v in pairs(vowels) do phonemes[k] = v end
for k, v in pairs(consonants) do phonemes[k] = v end

local function rsub(term, foo, bar)
	local retval = rsubn(term, foo, bar)
	return retval
end

local function flatmap(items, fun)
	local new = {}
	for _, item in ipairs(items) do
		local results = fun(item)
		for _, result in ipairs(results) do
			m_table.insertIfNot(new, result)
		end
	end
	return new
end

local function generate_obj(respelling)
	return { respelling = respelling }
end

local function combine_qualifiers(qual1, qual2)
	if not qual1 then
		return qual2
	end
	if not qual2 then
		return qual1
	end
	local qualifiers = m_table.deepCopy(qual1)
	for _, qual in ipairs(qual2) do
		m_table.insertIfNot(qualifiers, qual)
	end
	return qualifiers
end

local function split_on_comma(term)
	if not term then
		return nil
	end
	if term:find(",%s") or term:find("\\") then
		return require(parse_utilities_module).split_on_comma(term)
	else
		return rsplit(term, ",")
	end
end

local function parse_respellings_with_modifiers(respelling, paramname)
	if respelling:find("[<%[]") then
		local put = require(parse_utilities_module)
		local segments = put.parse_multi_delimiter_balanced_segment_run(respelling, { { "<", ">" }, { "[", "]" } })

		local comma_separated_groups = put.split_alternating_runs_on_comma(segments)

		local retval = {}
		for _, group in ipairs(comma_separated_groups) do
			local j = 2
			while j <= #group do
				if not group[j]:find("^<.*>$") then
					group[j - 1] = group[j - 1] .. group[j] .. group[j + 1]
					table.remove(group, j)
					table.remove(group, j)
				else
					j = j + 2
				end
			end

			local param_mods = {
				q = { type = "qualifier" },
				qq = { type = "qualifier" },
				a = { type = "labels" },
				aa = { type = "labels" },
				ref = { item_dest = "refs", type = "references" },
			}

			table.insert(retval, put.parse_inline_modifiers_from_segments {
				group = group,
				arg = respelling,
				props = {
					paramname = paramname,
					param_mods = param_mods,
					generate_obj = generate_obj,
				},
			})
		end
		return retval
	else
		local retval = {}
		for _, item in ipairs(split_on_comma(respelling)) do
			table.insert(retval, generate_obj(item))
		end
		return retval
	end
end

local function parse_pron_modifier(arg, paramname, generate_obj, param_mods, splitchar)
	splitchar = splitchar or ","
	if arg:find("<") then
		param_mods.q = { type = "qualifier" }
		param_mods.qq = { type = "qualifier" }
		param_mods.a = { type = "labels" }
		param_mods.aa = { type = "labels" }
		param_mods.ref = { item_dest = "refs", type = "references" }
		return require(parse_utilities_module).parse_inline_modifiers(arg, {
			param_mods = param_mods,
			generate_obj = generate_obj,
			paramname = paramname,
			splitchar = splitchar,
		})
	else
		local retval = {}
		local split_arg = splitchar == "," and split_on_comma(arg) or rsplit(arg, splitchar)
		for _, term in ipairs(split_arg) do
			table.insert(retval, generate_obj(term))
		end
		return retval
	end
end

local function parse_audio(lang, arg, pagename, paramname)
	local param_mods = {
		IPA = {
			sublist = true,
		},
		text = {},
		t = {
			item_dest = "gloss",
		},
		gloss = {},
		pos = {},
		lit = {},
		g = {
			item_dest = "genders",
			sublist = true,
		},
		bad = {},
		cap = {
			item_dest = "caption",
		},
	}

	local function process_special_chars(val)
		if not val then
			return val
		end
		return (val:gsub("#", pagename))
	end

	local function generate_audio_obj(arg)
		return { file = process_special_chars(arg) }
	end

	local retvals = parse_pron_modifier(arg, paramname, generate_audio_obj, param_mods, "%s*;%s*")
	for _, retval in ipairs(retvals) do
		retval.lang = lang
		retval.text = process_special_chars(retval.text)
		retval.caption = process_special_chars(retval.caption)
		local textobj = require(audio_module).construct_audio_textobj(retval)
		retval.text = textobj
		retval.gloss = nil
		retval.pos = nil
		retval.lit = nil
		retval.genders = nil
	end
	return retvals
end

local function parse_homophones(arg, paramname)
	local function generate_obj(term)
		return { term = term }
	end
	local param_mods = {
		t = {
			item_dest = "gloss",
		},
		gloss = {},
		pos = {},
		alt = {},
		lit = {},
		id = {},
		g = {
			item_dest = "genders",
			sublist = true,
		},
	}

	return parse_pron_modifier(arg, paramname, generate_obj, param_mods)
end

local function has_vowel(term)
	return ulower(term):find("[aeiou]") ~= nil
end

local function letters_to_syllables(letters)
	if not letters[2] then
		return { [1] = letters[1] }
	end
	local l_r_exceptions = { ["m"] = true, ["n"] = true, ["ŭ"] = true, ["j"] = true }

	local i = 1
	while true do
		local letter = letters[i]
		if not letter then
			break
		elseif ulower(letter) == "d" then
			local letter1 = letters[i + 1]
			if not letter1 then
				break
			end
			if ulower(letter1) == "z" then
				letter = letter .. letter1
				letters[i] = letter
				remove(letters, i + 1)
			end
		end
		i = i + 1
	end

	local result = { [1] = "" }
	local j = 1
	for i = 1, #letters - 2 do
		if not letters[i] then
			break
		end
		result[j] = result[j] .. letters[i]
		local letter = letters[i] and ulower(letters[i]) or ""
		local letter1 = letters[i + 1] and ulower(letters[i + 1]) or ""
		local letter2 = letters[i + 2] and ulower(letters[i + 2]) or ""

		if phonemes[letter] then
			if consonants[letter1] and vowels[letter2] then
				if vowels[letter] or ulen(result[j]) ~= 1 then
					if has_vowel(result[j]) and (letter1 ~= 'ŭ') then
						j = j + 1
						result[j] = ""
					end
				end
			elseif consonants[letter1] and not l_r_exceptions[letter1] and (letter2 == 'l' or letter2 == 'r') and (letter1 ~= 'l' and letter1 ~= 'r') then
				if has_vowel(result[j]) then
					j = j + 1
					result[j] = ""
				end
			elseif vowels[letter] and (letter1 == 'j' or letter1 == 'ŭ') then
			elseif vowels[letter1] then
				if has_vowel(result[j]) then
					j = j + 1
					result[j] = ""
				end
			end
		end
	end

	if letters[2] then
		local c1 = letters[#letters - 1]
		local c2 = letters[#letters]
		local c1_lower = c1 and ulower(c1) or ""
		local c2_lower = c2 and ulower(c2) or ""

		if vowels[c1_lower] and (c2_lower == 'j' or c2_lower == 'ŭ') then
			result[j] = result[j] .. c1 .. c2
		elseif c1_lower == 'ŭ' then
			local c0 = letters[#letters - 2]
			local c0_lower = c0 and ulower(c0) or ""
			if vowels[c0_lower] and vowels[c2_lower] then
				result[j] = result[j] .. c1
				j = j + 1
				result[j] = c2
			elseif has_vowel(result[j]) and has_vowel(c1 .. c2) then
				j = j + 1
				result[j] = c1 .. c2
			else
				result[j] = result[j] .. c1 .. c2
			end
		elseif vowels[c1_lower] and vowels[c2_lower] then
			result[j] = result[j] .. c1
			j = j + 1
			result[j] = c2
		elseif has_vowel(result[j]) and has_vowel(c1 .. c2) then
			j = j + 1
			result[j] = c1 .. c2
		else
			result[j] = result[j] .. c1 .. c2
		end
	end

	local result2 = {}
	for i, j in ipairs(result) do
		if j and j ~= "" then
			insert(result2, j)
		end
	end
	return result2
end

local function string_to_letters(term)
	if not term or term == "" then
		return {}
	end
	return mw.text.split(term, "")
end

local function string_to_syllables(term)
	if not term or term == "" then
		return {}
	end
	term = term:gsub("%.", "‧")
	local split_input = mw.text.split(term, '‧', true)
	local result = {}
	for _, split in ipairs(split_input) do
		for j, syllable in ipairs(letters_to_syllables(string_to_letters(split))) do
			insert(result, syllable)
		end
	end

	return result
end



local function letter_to_ipa(letter)
	return letter == dz and phonemes.dz or phonemes[ulower(letter)] or ""
end

local function string_to_ipa(syllable)
	local result = syllable:gsub("[dD][zZ]", dz)
		:gsub("([aeiou])j", "%1i̯")
		:gsub("([aeiou])ŭ", "%1u̯")
		:gsub("([aeiou])w", "%1u̯")

	result = result:gsub("ŭ", "w")
	result = result:gsub("Ŭ", "w")

	local chars = m_str_utils.explode_utf8(result)
	local output = {}
	for _, char in ipairs(chars) do
		if char:find("̯") then
			table.insert(output, char)
		else
			local ipa = letter_to_ipa(char)
			if ipa ~= "" then
				table.insert(output, ipa)
			else
				table.insert(output, char)
			end
		end
	end

	return table.concat(output)
end

local function count_syllables(pron)
	return 1 + ulen(rsub(rsub(rsub(pron:gsub(" | ", " "), " [ˈˌ]", " "), "^[%[/]?[ˈˌ]", ""), "[^.ˈˌ ]", ""))
end

local function do_rhyme(pron, num_syl)
	if pron:find(" ") then
		return nil
	end
	if not num_syl or type(num_syl) ~= "number" or num_syl < 1 then
		num_syl = 1
	end
	local V = "aeiou"
	return {
		rhyme = rsub(rsub(pron:gsub("^.*ˈ", ""), ("^[^%s]-([%s])"):format(V, V), "%1"), "[.ˌ]", ""),
		num_syl = { num_syl },
	}
end

local function multiword(term, pagename)
	if term:find("^raw:%[.+%]$") then
		return { { phonetic = term:gsub("^raw:", "") } }, nil
	end

	local ipa, syl
	term = rsub(term, "%s*,%s*", " | ")

	local function process_word_with_stress(word)
		if word == "" then
			return "", ""
		end

		local stress_pos = nil
		local stress_marker_pos = rfind(word, "'")
		if stress_marker_pos then
			if stress_marker_pos == 1 then
				stress_pos = 1
			else
				local before_stress = usub(word, 1, stress_marker_pos - 1)
				local before_stress_syllables = string_to_syllables(before_stress)
				stress_pos = #before_stress_syllables + 1
			end
			word = rsub(word, "'", "")
		end

		local hyphenated = string_to_syllables(word)
		local word_result = {}
		for j, syllable in ipairs(hyphenated) do
			word_result[j] = string_to_ipa(syllable)
		end

		if stress_pos then
			if stress_pos >= 1 and stress_pos <= #word_result then
				insert(word_result, stress_pos, "ˈ")
			elseif word_result[2] then
				insert(word_result, #word_result - 1, "ˈ")
			end
		elseif word_result[2] then
			insert(word_result, #word_result - 1, "ˈ")
		end

		return concat(word_result), concat(hyphenated, "‧")
	end

	if term:find(" ") then
		local ipaparts, sylparts = {}, {}
		local words = rsplit(term, " +")

		for i, word in ipairs(words) do
			if word == "|" then
				table.insert(ipaparts, word)
				if sylparts[#sylparts] then
					sylparts[#sylparts] = sylparts[#sylparts] .. ","
				else
					sylparts[1] = ","
				end
			else
				local word_ipa, word_syl = process_word_with_stress(word)
				if word_ipa ~= "" then
					table.insert(ipaparts, word_ipa)
					table.insert(sylparts, word_syl)

					if i < #words then
						table.insert(ipaparts, " ")
						table.insert(sylparts, " ")
					end
				end
			end
		end

		ipa = concat(ipaparts)
		syl = concat(sylparts)
	else
		ipa, syl = process_word_with_stress(term)
	end

	local num_syl = 1
	if syl and syl ~= "" then
		num_syl = 0
		for _ in syl:gmatch("[^‧]+") do
			num_syl = num_syl + 1
		end
		if num_syl < 1 then num_syl = 1 end
	end

	local result = { {
		pron = ipa,
		norhyme = false,
		num_syl = num_syl,
	} }

	while true do
		local changed = false
		result = flatmap(result, function(item)
			if rfind(item.pron, "([^ ‿]*)ˈ([^ ‿]-)ˈ") then
				changed = true
				return { {
					pron = rsub(item.pron, "([^ ‿]*)ˈ([^ ‿]-)ˈ", "%1ˌ%2ˈ"),
					a = item.a,
					norhyme = item.norhyme,
					num_syl = item.num_syl or 1,
				} }
			else
				return { item }
			end
		end)
		if not changed then
			break
		end
	end

	return result, syl
end

function export.get_pron_info(terms, pagename, paramname)
	if #terms == 1 and terms[1].respelling == "-" then
		return {
			pron_list = nil,
			rhyme_list = {},
			hyph_list = {},
			hyphen_list = {},
		}
	end

	local pron_list = {}
	local rhyme_list = {}
	local syl_list = {}
	local hyphen_list = {}

	local brackets = "/%s/"

	for _, term in ipairs(terms) do
		local respelling = term.respelling
		if not respelling or respelling == "" or respelling == "#" then
			respelling = pagename
		end

		if not respelling or respelling == "" then
			respelling = pagename or ""
		end

		local prons, syl = multiword(respelling, pagename)

		for i, pron in ipairs(prons) do
			if pron.phonetic then
				table.insert(pron_list, {
					pron = pron.phonetic,
					pron_with_syldivs = pron.phonetic,
					q = term.q,
					qq = term.qq,
					a = term.a,
					aa = term.aa,
					refs = i == 1 and term.refs or nil,
				})
			else
				local bracketed_pron = brackets:format(pron.pron)
				table.insert(pron_list, {
					pron = bracketed_pron,
					pron_with_syldivs = bracketed_pron,
					q = term.q,
					qq = term.qq,
					a = combine_qualifiers(pron.a, term.a),
					aa = term.aa,
					refs = i == 1 and term.refs or nil,
				})
				if not pron.norhyme and ulen(respelling) > 1 then
					local num_syl = pron.num_syl
					if not num_syl or type(num_syl) ~= "number" or num_syl < 1 then
						num_syl = count_syllables(pron.pron)
					end
					if type(num_syl) ~= "number" or num_syl < 1 then
						num_syl = 1
					end
					local rhyme_obj = do_rhyme(pron.pron, num_syl)
					if rhyme_obj and rhyme_obj.num_syl and #rhyme_obj.num_syl > 0 then
						table.insert(rhyme_list, rhyme_obj)
					end
				end
			end
		end

		if syl and ulen(respelling) > 1 then
			local syl_normalized = ulower(syl:gsub("‧", ""):gsub(" ", ""))
			local pagename_normalized = ulower(pagename):gsub(" ", "")
			local respelling_normalized = respelling and ulower(respelling):gsub(" ", ""):gsub("'", "") or ""
			if syl:find(" ") or syl_normalized == pagename_normalized or syl_normalized == respelling_normalized then
				m_table.insertIfNot(syl_list, syl)
			end
		end
	end

	return {
		pron_list = pron_list,
		syl_list = syl_list,
		rhyme_list = rhyme_list,
		hyphen_list = hyphen_list,
	}
end

function export.show(frame)
	local parent_args = frame:getParent().args

	local process = require("Module:parameters").process
	local lang_obj = lang
	local langcode = "eo"

	local params = {
		[1] = {},
		[2] = {},
		[3] = {},
		["syl"] = true,
		["s"] = { alias_of = "syl" },
		["rhymes"] = true,
		["r"] = { alias_of = "rhymes" },
		["audios"] = true,
		["a"] = { alias_of = "audios" },
		["homophones"] = true,
		["hh"] = { alias_of = "homophones" },
		["pagename"] = true,
		["indent"] = true,
		["hyphens"] = true,
		["hyph"] = { alias_of = "hyphens" },
		["h"] = { alias_of = "hyphens" },
	}

	local args = process(parent_args, params)

	local termspec = args[1] or "#"
	local terms = parse_respellings_with_modifiers(termspec, 1)
	local pagename = args.pagename or mw.loadData("Module:headword/data").pagename
	local indent = args.indent or "*"

	local pronobj = export.get_pron_info(terms, pagename, 1)
	local syl_list, rhyme_list = pronobj.syl_list, pronobj.rhyme_list
	local hyphen_list = {}
	local syl_automatic = true
	local do_syl

	if args.syl then
		syl_automatic = false
		if args.syl == "-" then
			do_syl = false
		else
			syl_list = split_on_comma(args.syl)
			do_syl = true
		end
	elseif terms[1].respelling == "-" then
		do_syl = false
	else
		do_syl = true
	end

	-- Handle manual hyphenation override if provided
	if args.hyphens then
		if args.hyphens == "-" then
			hyphen_list = {}
		else
			hyphen_list = split_on_comma(args.hyphens)
		end
	else
		-- [[Special:WhatLinksHere/Wiktionary:Tracking/eo-pr/no-hyphenation]]
		require("Module:debug/track")("eo-pr/no-hyphenation")
	end

	if args.rhymes then
		if args.rhymes == "-" then
			rhyme_list = {}
		elseif args.rhymes ~= "+" then
			rhyme_list = {}
			for _, rhyme in ipairs(split_on_comma(args.rhymes)) do
				if rfind(rhyme, ".+/.+") then
					table.insert(rhyme_list, {
						rhyme = rsub(rhyme, "/.+", ""),
						num_syl = { tonumber(rsub(rhyme, ".+/", "")) },
					})
				else
					error(("The manual rhyme %s did not specify syllable number as RHYME/NUM_SYL"):format(rhyme))
				end
			end
		end
	end

	if #rhyme_list > 0 then
		local temp_rhyme_list = {}
		local indices = {}
		for _, rhymeobj in ipairs(rhyme_list) do
			local index = indices[rhymeobj.rhyme]
			if index == nil then
				table.insert(temp_rhyme_list, rhymeobj)
				indices[rhymeobj.rhyme] = #temp_rhyme_list
			else
				local different_num_syl = true
				for _, ns in ipairs(temp_rhyme_list[index].num_syl) do
					if ns == rhymeobj.num_syl[1] then
						different_num_syl = false
						break
					end
				end
				if different_num_syl then
					table.insert(temp_rhyme_list[index].num_syl, rhymeobj.num_syl[1])
				end
			end
		end
		rhyme_list = temp_rhyme_list
	end

	local m_IPA_format = require("Module:IPA").format_IPA_full
	local parts = {}
	local function ins(text)
		table.insert(parts, text)
	end

	if pronobj.pron_list then
		local formatted = m_IPA_format { lang = lang_obj, items = pronobj.pron_list }
		ins(indent .. mw.ustring.toNFC(formatted))
	end

	if args.audios then
		local format_audio = require("Module:audio").format_audio
		local audio_objs = parse_audio(lang_obj, args.audios, pagename, "audios")
		local num_audios = #audio_objs
		for i, audio_obj in ipairs(audio_objs) do
			if num_audios > 1 and not audio_obj.caption then
				audio_obj.caption = "ရမျာၚ် " .. i
			end
			ins("\n" .. indent .. " " .. format_audio(audio_obj))
		end
	end

	if #rhyme_list > 0 then
		local formatted_rhymes = require("Module:rhymes").format_rhymes { lang = lang_obj, rhymes = rhyme_list }
		ins("\n" .. indent .. " " .. mw.ustring.toNFC(formatted_rhymes))
	end

	if do_syl then
		local is_single_letter = false
		for _, term in ipairs(terms) do
			local respelling = term.respelling
			if respelling == "#" or respelling == nil or respelling == "" then
				respelling = pagename
			end
			if respelling and ulen(respelling) <= 1 then
				is_single_letter = true
				break
			end
		end

		if not is_single_letter then
			ins("\n" .. indent .. " ")
			if #syl_list > 0 then
				local syls = {}
				for i, syl in ipairs(syl_list) do
					syls[i] = { hyph = {} }
					for s in syl:gmatch("[^‧]+") do
						table.insert(syls[i].hyph, s)
					end
				end
				ins(require("Module:hyphenation").format_hyphenations {
					lang = lang_obj, hyphs = syls, caption = "ပရေၚ်ပါ်ဝဏ္ဏ"
				})
			else
				ins("Syllabification: <small>[please specify syllabification manually]</small>")
				if mw.title.getCurrentTitle().nsText == "" then
					ins(("[[Category:%s entries with Template:%s-pr without syllabification]]"):format(
						lang_obj:getFullName(), langcode))
				end
			end

			if #hyphen_list > 0 then
				ins("\n" .. indent .. " ")
				local hyphens = {}
				for i, hyph in ipairs(hyphen_list) do
					hyphens[i] = { hyph = {} }
					for part in hyph:gmatch("[^‧]+") do
						table.insert(hyphens[i].hyph, part)
					end
				end
				ins(require("Module:hyphenation").format_hyphenations {
					lang = lang_obj, hyphs = hyphens, caption = "ဗီုစုတ်ဂၠေံဂၠေံ"
				})
			end
		end
	end

	if args.homophones then
		local homophone_list = parse_homophones(args.homophones, "ဗီုပြၚ်ပ္တိတ်ရမျာၚ်")
		ins("\n" .. indent .. " " .. require("Module:homophones").format_homophones {
			lang = lang_obj,
			homophones = homophone_list,
		})
	end

	return concat(parts)
end

return export