မဝ်ဂျူ:tru-translit

နူ ဝိက်ရှေန်နရဳ

This is the unit-testing module for Module:tru-translit.

All tests passed. (refresh)

လိက် ဗွဲမရံၚ်လၟဳ မဇေတ်ဍာံ ဒၞာဲတၞဟ်ခြာ
testcases for tr function in Module:tru-translit:
Passed ܐܰܕܡܐ admo admo
Passed ܐܰܒܪܐ abro abro
Passed ܐܰܕ݂ܢܐ aḏno aḏno
Passed ܐܷܫܬܐ ëšto ëšto
Passed ܫܰܘܥܐ šawco šawco
Passed ܬܡܰܢܝܐ tmanyo tmanyo
Passed ܬܷܫܥܐ tëšco tëšco
Passed ܚܰܡܫܐ ḥamšo ḥamšo
Passed ܐܰܪܒܥܐ arbco arbco
Passed ܬܠܳܬ݂ܐ tloṯo tloṯo
Passed ܬܪܶܐ tre tre
Passed ܚܰܐ ḥa ḥa
Passed ܬܡܝܢܳܝܐ tminoyo tminoyo
Passed ܕܰܬ ܬܡܰܢܝܐ dat tmanyo dat tmanyo
Passed ܬܡܳܢܰܥܣܰܪ tmonacsar tmonacsar
Passed ܐܰܕܥܰܨܪܝـܝܶܐ adcaṣriye adcaṣriye
Passed ܐܰܕܠܰܠܝܐ adlalyo adlalyo
Passed ܐܰܕܨܰܦܪܐ adṣafro adṣafro
Passed ܐܰܕܝܰܘܡܰܐ adyawma adyawma
Passed ܐܰܬ݂ܡܷܠ aṯmël aṯmël
Passed ܐܰܬ݂ܝܳܢܐ aṯyono aṯyono
Passed ܐܰܙܙܝ azzi azzi
Passed ܒܰܛܝܠܶܐ ܢܶܐ baṭile ne baṭile ne
Passed ܒܢܳܝܐ bnoyo bnoyo
Passed ܕܰܪܓ݂ܶܐ darġe darġe
Passed ܕܘܥܪܝܢܰܐ ducrina ducrina
Passed ܕܘܪܳܫܶܐ ܓܘܫܡܳܢܳܝܶܐ duroše gušmonoye duroše gušmonoye
Passed ܦܰܠܩܐ falqo falqo
Passed ܠܰܫܰܢ lašan lašan
Passed ܡܶܐ ܙܰܒܢܐ ܠܙܰܒܢܐ me zabno lzabno me zabno lzabno
Passed ܩܷܛܪܐ qëṭro qëṭro
Passed ܣܚܳܝܐ sḥoyo sḥoyo
Passed ܬܰܡܐ tamo tamo
Passed ܘܰܥܕܐ wacdo wacdo
Passed ܙܰܒܢܐ zabno zabno
Passed ܢܚܝܪܐ nḥiro nḥiro
Passed ܢܳܫܐ nošo nošo
Passed ܪܝܫܐ rišo rišo
Passed ܫܰܒܬ݂ܐ šabṯo šabṯo
Passed ܐܘܥܕܐ ucdo ucdo
Passed ܘܳܠܝܬ݂ܐ woliṯo woliṯo
Passed ܙܥܘܪܐ zcuro zcuro
Passed ܙܥܘܪܬܐ zcurto zcurto
Passed ܕܰܫܷܫܬܐ dašëšto dašëšto
Passed ܥܷܢܘܶܐ cënwe cënwe
Passed ܨܷܪܬܐ ṣërto ṣërto
Passed ܘܰܟ݂ܰܡ waxam waxam
Passed ܙܷܒܕܐ zëbdo zëbdo
Passed ܐܰܝ ܝܰܘܡܰܢܝ ay yawmani ay yawmani
Passed ܫ̰ܰܢܛܰܐ čanṭa čanṭa
Passed ܙ̰ܱܒܰܫܶܐ žäbaše žäbaše
Passed ܙܱ̰ܒܰܫܶܐ žäbaše žäbaše
Passed ܦ݁ܠܰܢ plan plan
Passed ܒ݂ܝܠܠܰܐ villa villa
Passed ܐܳܢܳܐ ono ono
Passed ܗܰܬܘ hatu hatu
Passed ܐܝـܝܰܪ iyar iyar
Passed ܐܰܘ aw aw
Passed ”ܝܐ.“ “yo.” “yo.”
Passed ܒܷܬ݂ܷܪ bëṯër bëṯër
Passed ܒܷܬܷ݂ܪ bëṯër bëṯër
Passed ܐܰܘܪܘܦ݁ܰܐ awrupa awrupa
Passed ܐܰܘܪܘܦܰ݁ܐ awrupa awrupa
Passed (ܣܘܪܝܳܝܐ) (suryoyo) (suryoyo)
Passed ܣܘܪܝܳܝܐ܆ suryoyo; suryoyo;
Passed ܡܳܪܝ mor mor
Passed ܕܡܳܪܝ dmor dmor
Passed ܛܒܷܥܥܶܗ ṭbëceh ṭbëceh
Passed ܦܬܷܚܚܶܗ ftëḥeh ftëḥeh
Passed ܘܡܰܠܘܰܫܫܶܗ umalwašeh umalwašeh
Passed ܝܘܠܦܳܢܰܬ݂ܬ݂ܶܗ yulfonaṯeh yulfonaṯeh
Passed ܡܰܠܰܟ݂ܟ݂ܶܗ malaxeh malaxeh
Passed ܡ̈ܶܠܐ melo melo

local export = {}

local U = mw.ustring.char
local U = mw.ustring.char
local rsub = mw.ustring.gsub

local rbasa_below = U(0x737)
local pthaha_below = U(0x731)
local rbasa = U(0x736)
local zqapha = U(0x733)
local pthaha = U(0x730)
local vowel_diacritics_capture = "([" .. rbasa_below .. pthaha_below .. rbasa .. zqapha .. pthaha .. "])"

-- we declare consonants representing vowels (matres lectionis) as constants to mitigate differences in how mixing
-- right-to-left and left-to-right characters in the same line appears in an IDE vs wiktionary.  Since matres is used in
-- concatenation via the .. operator,  "ܘ" .. "ܐ" on wiktionary would render as "ܐ" .. "ܘ" in an IDE
local alaph = U(0x710)
local waw = U(0x718)
local yudh = U(0x71D)

local combining_diaeresis = U(0x308)
local combining_tilde_below = U(0x330)
local qushshaya = U(0x741)
local rukkakha = U(0x742)

local tt_transpose_punc = {
	-- left/right single/double quotes
	["“"] = "”",
	["”"] = "“",
	["‘"] = "’",
	["’"] = "‘",
	["؟"] = "?", -- question mark
	["«"] = '“', -- quotation mark
	["»"] = '”', -- quotation mark
	["،"] = ",", -- comma
	["؛"] = ";", -- semicolon
	-- skewed colons from https://r12a.github.io/scripts/syrc/tru.html#phrase
	["܇"] = ',',
	["܆"] = ';'
}

local tt_transpose_punc_keys = ''
for key, _ in pairs(tt_transpose_punc) do tt_transpose_punc_keys = tt_transpose_punc_keys .. key end

local fix = {
	{ vowel_diacritics_capture .. qushshaya, qushshaya .. "%1" },
	{ vowel_diacritics_capture .. rukkakha, rukkakha .. "%1" },
	{ vowel_diacritics_capture .. combining_tilde_below, combining_tilde_below .. "%1" },
	-- partition punctuation marks so "starts with" and "ends with" substitutions work
	{"([".. tt_transpose_punc_keys .. "()!.:\"'])", "#%1#"},
}

local tt = {
	["ܦ"] = "f", ["ܒ"] = "b", ["ܬ"] = "t", ["ܛ"] = "ṭ", ["ܕ"] = "d", ["ܟ"] = "k",
	["ܓ"] = "g", ["ܩ"] = "q", ["ܔ"] = "j", ["ܣ"] = "s", ["ܨ"] = "ṣ", ["ܙ"] = "z",
	["ܫ"] = "š", ["ܚ"] = "ḥ", ["ܥ"] = "c", ["ܗ"] = "h", ["ܡ"] = "m", ["ܢ"] = "n",
	["ܪ"] = "r", ["ܠ"] = "l",
}

local tt_next = {
	[waw] = "w",
	[yudh] = "y",

	[rbasa_below] = "ë",
	[pthaha_below] = "ä",
	[rbasa] = "e",
	[zqapha] = "o",
	[pthaha] = "a",
}

local consonants = "fbtṭdkgqjsṣzšḥchmnrlvžpvṯḏxġ" .. yudh .. waw
local consonants_group = "([" .. consonants .. "])"

local special_cases = {
	-- { matching_aii_text, latin_substitution }
	--
	-- the # symbol pads the start and end of a word, consider the follow examples for matching_aii_text
	-- #float#    only float matches
	-- #float     words starting with float like float or floats match
	-- float#     words ending with float like float or afloat match
	-- float      words containing float like float, floats, afloat and refloats match
	{"ܡܳܪܝ#", "mor#"},
}

function export.tr(text, lang, sc)

	text = rsub(text, " | ", "# | #")
	text = "##" .. rsub(text, " ", "# #") .. "##"
	text = rsub(text, "ـ", "")
	text = rsub(text, combining_diaeresis, "")
	for _, sub in ipairs(fix) do text = rsub(text, unpack(sub)) end

	-- Special cases
	for _, sub in ipairs(special_cases) do text = rsub(text, unpack(sub)) end

	text = rsub(text, "ܫ" .. combining_tilde_below, "č")
	text = rsub(text, "ܙ" .. combining_tilde_below, "ž")

	text = rsub(text, "ܦ" .. qushshaya, "p")

	text = rsub(text, "ܒ" .. rukkakha, "v")
	text = rsub(text, "ܬ" .. rukkakha, "ṯ")
	text = rsub(text, "ܕ" .. rukkakha, "ḏ")
	text = rsub(text, "ܟ" .. rukkakha, "x")
	text = rsub(text, "ܓ" .. rukkakha, "ġ")

	text = rsub(text, ".", tt_transpose_punc)
	text = rsub(text, ".", tt)

	text = rsub(text, consonants_group .. waw .. consonants_group, "%1u%2")
	text = rsub(text, consonants_group .. yudh .. consonants_group, "%1i%2")
	text = rsub(text, "#" .. waw .. consonants_group, "#u%1")
	text = rsub(text, "#" .. yudh .. consonants_group, "#i%1") -- this needs a test case

	text = rsub(text, alaph .. pthaha ..  waw .. "#", "#aw")
	text = rsub(text, alaph .. pthaha .. yudh .. "#", "#ay")

	text = rsub(text, "#" .. alaph .. waw, "#u")
	text = rsub(text, "#" .. alaph .. yudh, "#i")

	text = rsub(text, waw .. "#", "u#")
	text = rsub(text, yudh .. "#", "i#")

	text = rsub(text, pthaha .. alaph .. "#", "a#")
	text = rsub(text, rbasa .. alaph .. "#", "e#")
	text = rsub(text, zqapha .. alaph .. "#", "o#")
	text = rsub(text, alaph .. "#", "o#")
	text = rsub(text, alaph, "")

	text = rsub(text, ".", tt_next)

	text = rsub(text, "cc", "c")
	text = rsub(text, "ḥḥ", "ḥ")
	text = rsub(text, "šš", "š")
	text = rsub(text, "ṯṯ", "ṯ")
	text = rsub(text, "xx", "x")

	text = rsub(text, "#", "")

	return text
end

return export