မဝ်ဂျူ:mnw-translit

	This module is in beta stage.
	Its interface has been stabilised, but the module may still contain errors. Do not deploy widely until the module has been tested.

Mark-up

Traditionally, transliteration of Mon distinguishes the various reading of anusvara. By immediately following it by one of the circled letters Ⓐ, Ⓗ, Ⓞ and Ⓜ, the meaning can be selected as follows:

Combination	Interpretation	Mnemonic
ံⒶ (ṃⒶ)	Equivalent to အ် (ʼ)	အ (ʼa) is the independent vowel <a>.
ံⒽ (ṃⒽ)	Equivalent to ဟ် (h)	ဟ (ha) is the letter <h>
ံⓄ (ṃⓄ)	The vowel is not affected by the coda being a velar consonant	The resulting vowel sound (in the clear register) is /ɔ/
ံⓂ (ṃⓂ)	The anusvara represents a final /m/. This is the default interpretation.	/m/

This mark-up can be used in the citation and usage example template {{mnw-quote}}.

In all environments, word boundaries in Burmese script text can be marked up using the HTML tag <wbr>. These tags will be converted to single spaces as part of transliteration. Note that only all lowercase tags will be recognised for conversion. This may be useful in some obscure circumstances.

local export = {}
local gsub = mw.ustring.gsub
local u = mw.ustring.char
local letter_with_mark = "(.["..u(0x0300).."-"..u(0x036F).."]?)"

local pre = {
	["ျ"] = "္ယ", ["ြ"] = "္ရ", ["ွ"] = "္ဝ", ["ှ"] = "္ဟ",
	["ၞ"] = "္န", ["ၟ"] = "္မ", ["ၠ"] = "္လ",
}

local tt1 = {
	-- consonants ; Unicode doesn't have exclusive great nya, that looks like ည with another curve, so use ည္ည as it should be.
	["က"] = "kᵃ", ["ခ"] = "khᵃ", ["ဂ"] = "gᵃ", ["ဃ"] = "ghᵃ", ["င"] = "ṅᵃ", ["ၚ"] = "ṅᵃ",
	["စ"] = "cᵃ", ["ဆ"] = "chᵃ", ["ဇ"] = "jᵃ", ["ၛ"] = "jhᵃ", ["ဉ"] = "ñᵃ", ["ည"] = "ññᵃ",  -- ññ -> ñ later
	["ဋ"] = "ṭᵃ", ["ဌ"] = "ṭhᵃ", ["ဍ"] = "ḍᵃ", ["ဎ"] = "ḍhᵃ", ["ဏ"] = "ṇᵃ",
	["တ"] = "tᵃ", ["ထ"] = "thᵃ", ["ဒ"] = "dᵃ", ["ဓ"] = "dhᵃ", ["န"] = "nᵃ",
	["ပ"] = "pᵃ", ["ဖ"] = "phᵃ", ["ဗ"] = "bᵃ", ["ဘ"] = "bhᵃ", ["မ"] = "mᵃ",
	["ယ"] = "yᵃ", ["ရ"] = "rᵃ", ["လ"] = "lᵃ", ["ဝ"] = "wᵃ", ["သ"] = "sᵃ", ["ဿ"] = "ssᵃ",
	["ဟ"] = "hᵃ", ["ဠ"] = "ḷᵃ", ["ၜ"] = "ṗᵃ", ["အ"] = "ʼᵃ", ["ၝ"] = "ḅᵃ",
	-- independent vowels (1 char)
	["ဣ"] = "ʼi", ["ဥ"] = "ʼu",
	["ဨ"] = "ʼe", ["ဩ"] = "ʼo",
	-- dependent vowels and diacritics (1 char)
	["ါ"] = "ā", ["ာ"] = "ā", ["ိ"] = "i", ["ီ"] = "iṃ", ["ဳ"] = "ī", ["ု"] = "u", ["ူ"] = "ū", ["ဲ"] = "ʸ",
	["ဴ"] = "ao", ["ေ"] = "e", ["ဵ"] = "e", 
	["ံ"] = "ṃ", ["း"] = "ḥ", ["္"] = "¡", ["်"] = "¤",
	-- punctuation marks
	["၊"] = ",", ["။"] = ".", 
	-- numerals
	["၀"] = "0", ["၁"] = "1", ["၂"] = "2", ["၃"] = "3", ["၄"] = "4",
	["၅"] = "5", ["၆"] = "6", ["၇"] = "7", ["၈"] = "8", ["၉"] = "9",
	-- zero-width space (display it if it hides in a word)
	[u(0x200B)] = "‼", [u(0x200C)] = "‼", [u(0x200D)] = "‼",
}

local tt2 = {
	-- vowels (2 chars)
	["ဣဳ"] = "ʼī", ["ဥု"] = "ʼū",
	["ေါ"] = "o", ["ော"] = "o",
}

function export.tr(text, lang, sc, debug_mode)

	if type(text) == "table" then -- called directly from a template
		text = text.args[1]
	end

	text = gsub(text, ".", pre)
	text = gsub(text, "ဲါ", "ါဲ") -- fixed ay+aa to aa+ay; it often occurs

	for k, v in pairs(tt2) do
		text = gsub(text, k, v)
	end

	text = gsub(text, ".", tt1)

	text = gsub(text, "([aeiuoāīū])ʸ", "%1y")
	text = gsub(text, "ᵃʸ", "oa")

	text = gsub(text, "ᵃ([¡¤]+)", "")
	text = gsub(text, "([aeiuoāīū])¤", "%1k")
	text = gsub(text, "ᵃ([aeiuoāīū])", "%1")
	text = gsub(text, "ᵃ", "a")

	text = gsub(text, "iṃu", "iuṃ")
	if lang == "mnw" then --Modern Mon
		text = gsub(text, "ññ", "ñ")
	end

	return text
 
end
 
return export