မဝ်ဂျူ:ro-pronunciation

Documentation for this module may be created at မဝ်ဂျူ:ro-pronunciation/doc
local export = {}

local stress = "ˈ"
local long = "ː"
local acute = mw.ustring.char(0x301)
local grave = mw.ustring.char(0x300)
local circumflex = mw.ustring.char(0x302)
local acute_or_grave = "[" .. acute .. grave .. "]"
local vowels = "aeiouəɨ"
local vowel = "[" .. vowels .. "]"
local vowel_or_semivowel = "[" .. vowels .. "jw]"
local not_vowel = "[^" .. vowels .. "]"
local front = "[ij]"
local fronted = mw.ustring.char(0x031F)
local voiced_consonant = "[bdɡlmnrvz]"

local full_affricates = { ["ʦ"] = "t͡s", ["ʣ"] = "d͡z", ["ʧ"] = "t͡ʃ", ["ʤ"] = "d͡ʒ" }

-- ʦ, ʣ, ʧ, ʤ used for
-- t͡s, d͡z, t͡ʃ, d͡ʒ in body of function.

function export.to_phonemic(word, single_character_affricates)
	word = mw.ustring.lower(word):gsub("'", ""):gsub("â", "ɨ"):gsub("î", "ɨ"):gsub("ă", "ə"):gsub("j", "ʒ"):gsub("ș", "ʃ"):gsub("ț", "ʦ"):gsub("cc", "kc"):gsub("uu", "uw")
	
	-- Decompose combining characters: for instance, è → e + ◌̀
	local decomposed = mw.ustring.toNFD(word):gsub("x", "ks"):gsub("y", "i")
		:gsub("ck", "k"):gsub("sh", "ʃ")

	-- Transcriptions must contain an acute or grave, to indicate stress position.
	-- This does not handle phrases containing more than one stressed word.
	-- Default to penultimate stress rather than throw error?
	local vowel_count
	if not mw.ustring.find(decomposed, acute_or_grave) then
		-- Allow monosyllabic unstressed words.
		vowel_count = select(2, decomposed:gsub(vowel, "%1"))
		if vowel_count ~= 1 then
			-- Add acute accent on second-to-last vowel.
			decomposed = mw.ustring.gsub(decomposed, 
				"(" .. vowel .. ")(" .. not_vowel .. "*[iu]?" .. vowel .. not_vowel .. "*)$",
				"%1" .. acute .. "%2")
		end
	end
	
	local transcription = decomposed

	-- ci, gi + vowel
	-- Do ci, gi + e, é, è sometimes contain /j/?
	transcription = mw.ustring.gsub(transcription,
		"([cg])([cg]?)i(" .. vowel .. ")",
		function (consonant, double, vowel)
			local out_consonant
			if consonant == "c" then
				out_consonant = "ʧ"
			else
				out_consonant = "ʤ"
			end
			
			if double ~= "" then
				if double ~= consonant then
					error("Invalid sequence " .. consonant .. double .. ".")
				end
				
				out_consonant = out_consonant .. out_consonant
			end
			
			return out_consonant .. vowel
		end)
	
	-- Handle other cases of c, g.
	transcription = mw.ustring.gsub(transcription,
		"(([cg])([cg]?)(h?))(.?)",
		function (consonant, first, double, second, next)
			-- Don't allow the combinations cg, gc.
			-- Or do something else?
			if double ~= "" and double ~= first then
				error("Invalid sequence " .. first .. double .. ".")
			end
			
			-- c, g is soft before e, i.
			local consonant
			if (next == "e" or next == "i") and second ~= "h" then
				if first == "c" then
					consonant = "ʧ"
				else
					consonant = "ʤ"
				end
			else
				if first == "c" then
					consonant = "k"
				else
					consonant = "ɡ"
				end
			end
			
			if double ~= "" then
				consonant = consonant .. consonant
			end
			
			return consonant .. next
		end)
	
	-- ⟨qu⟩ represents /kw/.
	transcription = transcription:gsub("qu", "kw")
	
	transcription = mw.ustring.gsub(transcription,  "i$", "ʲ")
	transcription = mw.ustring.gsub(transcription, "iiʲ$", "iji")
	transcription = mw.ustring.gsub(transcription, "iʲ$", "ij")

	-- u or i (without accent) before another vowel is a semivowel.
	transcription = mw.ustring.gsub(transcription,
		"([iu])(" .. vowel .. ")",
		function (semivowel, vowel)
			if semivowel == "i" then
				semivowel = "j"
			else
				semivowel = "w"
			end
			
			return semivowel .. vowel
		end)

	transcription = mw.ustring.gsub(transcription,
		"(" .. vowel .. ")([iu])",
		function (vowel, semivowel)
			if semivowel == "i" then
				semivowel = "j"
			else
				semivowel = "w"
			end
			
			return vowel .. semivowel
		end)

	transcription = mw.ustring.gsub(transcription, "je$", "ie")
	
	-- Replace acute and grave with stress mark.
	transcription = mw.ustring.gsub(transcription,
		"(" .. vowel .. ")" .. acute_or_grave, stress .. "%1")
	
	transcription = mw.ustring.gsub(transcription, "lpt", "lp.t")
	transcription = mw.ustring.gsub(transcription, "mpt", "mp.t")
	transcription = mw.ustring.gsub(transcription, "nct", "nc.t")
	transcription = mw.ustring.gsub(transcription, "ncʦ", "nc.ʦ")
	transcription = mw.ustring.gsub(transcription, "ncʃ", "nc.ʃ")
	transcription = mw.ustring.gsub(transcription, "ndv", "nd.v")
	transcription = mw.ustring.gsub(transcription, "rct", "rc.t")
	transcription = mw.ustring.gsub(transcription, "rtf", "rt.f")
	transcription = mw.ustring.gsub(transcription, "stm", "st.m")

	transcription = mw.ustring.gsub(transcription,
		"(" .. vowels .. ")" .. "(bkhdɡlmnrvz)" .. "(" .. vowels .. ")" ,
		function (vowel, consonant, anothervowel)
			return vowel .. "." .. consonant .. anothervowel
		end)


	-- Move stress before syllable onset, and add syllable breaks.
	-- This rule may need refinement.
--	transcription = mw.ustring.gsub(transcription,
--		"()(" .. not_vowel .. "?)([^" .. vowels .. stress .. "]*)(" .. stress
--			.. "?)(" .. vowel .. ")",
--		function (position, first, rest, syllable_divider, vowel)
--			-- beginning of word, that is, at the moment, beginning of string
--			if position == 1 then
--				return syllable_divider .. first .. rest .. vowel
--			end
--			if syllable_divider == "" then
--				syllable_divider = "."
--			end
--			if rest == "" then
--				return syllable_divider .. first .. vowel
--			else
--				return first .. syllable_divider .. rest .. vowel
--			end
--		end)
	
	if not single_character_affricates then
		transcription = mw.ustring.gsub(transcription, "([ʦʣʧʤ])([%." .. stress .. "]*)([ʦʣʧʤ]*)",
			function (affricate1, divider, affricate2)
				local full_affricate = full_affricates[affricate1]
				
				if affricate2 ~= "" then
					return mw.ustring.sub(full_affricate, 1, 1) .. divider .. full_affricate
				end
				
				return full_affricate .. divider
			end)
	end
	
	transcription = mw.ustring.gsub(transcription, "[h%-" .. circumflex .. "]", "")
	transcription = transcription:gsub("%.ˈ", "ˈ")
	
	return transcription
end

function export.show(frame)
	local m_IPA = require "Module:IPA"
	
	local args = require "Module:parameters".process(
		frame:getParent().args,
		{
			-- words to transcribe
			[1] = { list = true, default = mw.title.getCurrentTitle().text }
		})
	
	local Array = require "Module:array"
	
	local transcriptions = Array(args[1])
		:map(
			function (word, i)
				return { pron = "/" .. export.to_phonemic(word) .. "/" }
			end)
	
	return m_IPA.format_IPA_full(
		require "Module:languages".getByCode "ro", transcriptions)
end

return export