မဝ်ဂျူ:tru-translit

This is the unit-testing module for Module:tru-translit.

All tests passed. (refresh)

testcases for `tr` function in Module:tru-translit:
လိက်	ဗွဲမရံၚ်လၟဳ	မဇေတ်ဍာံ
ܐܰܕܡܐ‎	admo	admo
ܐܰܒܪܐ‎	abro	abro
ܐܰܕ݂ܢܐ‎	aḏno	aḏno
ܐܷܫܬܐ‎	ëšto	ëšto
ܫܰܘܥܐ‎	šawco	šawco
ܬܡܰܢܝܐ‎	tmanyo	tmanyo
ܬܷܫܥܐ‎	tëšco	tëšco
ܚܰܡܫܐ‎	ḥamšo	ḥamšo
ܐܰܪܒܥܐ‎	arbco	arbco
ܬܠܳܬ݂ܐ‎	tloṯo	tloṯo
ܬܪܶܐ‎	tre	tre
ܚܰܐ‎	ḥa	ḥa
ܬܡܝܢܳܝܐ‎	tminoyo	tminoyo
ܕܰܬ ܬܡܰܢܝܐ‎	dat tmanyo	dat tmanyo
ܬܡܳܢܰܥܣܰܪ‎	tmonacsar	tmonacsar
ܐܰܕܥܰܨܪܝـܝܶܐ‎	adcaṣriye	adcaṣriye
ܐܰܕܠܰܠܝܐ‎	adlalyo	adlalyo
ܐܰܕܨܰܦܪܐ‎	adṣafro	adṣafro
ܐܰܕܝܰܘܡܰܐ‎	adyawma	adyawma
ܐܰܬ݂ܡܷܠ‎	aṯmël	aṯmël
ܐܰܬ݂ܝܳܢܐ‎	aṯyono	aṯyono
ܐܰܙܙܝ‎	azzi	azzi
ܒܰܛܝܠܶܐ ܢܶܐ‎	baṭile ne	baṭile ne
ܒܢܳܝܐ‎	bnoyo	bnoyo
ܕܰܪܓ݂ܶܐ‎	darġe	darġe
ܕܘܥܪܝܢܰܐ‎	ducrina	ducrina
ܕܘܪܳܫܶܐ ܓܘܫܡܳܢܳܝܶܐ‎	duroše gušmonoye	duroše gušmonoye
ܦܰܠܩܐ‎	falqo	falqo
ܠܰܫܰܢ‎	lašan	lašan
ܡܶܐ ܙܰܒܢܐ ܠܙܰܒܢܐ‎	me zabno lzabno	me zabno lzabno
ܩܷܛܪܐ‎	qëṭro	qëṭro
ܣܚܳܝܐ‎	sḥoyo	sḥoyo
ܬܰܡܐ‎	tamo	tamo
ܘܰܥܕܐ‎	wacdo	wacdo
ܙܰܒܢܐ‎	zabno	zabno
ܢܚܝܪܐ‎	nḥiro	nḥiro
ܢܳܫܐ‎	nošo	nošo
ܪܝܫܐ‎	rišo	rišo
ܫܰܒܬ݂ܐ‎	šabṯo	šabṯo
ܐܘܥܕܐ‎	ucdo	ucdo
ܘܳܠܝܬ݂ܐ‎	woliṯo	woliṯo
ܙܥܘܪܐ‎	zcuro	zcuro
ܙܥܘܪܬܐ‎	zcurto	zcurto
ܕܰܫܷܫܬܐ‎	dašëšto	dašëšto
ܥܷܢܘܶܐ‎	cënwe	cënwe
ܨܷܪܬܐ‎	ṣërto	ṣërto
ܘܰܟ݂ܰܡ‎	waxam	waxam
ܙܷܒܕܐ‎	zëbdo	zëbdo
ܐܰܝ ܝܰܘܡܰܢܝ‎	ay yawmani	ay yawmani
ܫ̰ܰܢܛܰܐ‎	čanṭa	čanṭa
ܙ̰ܱܒܰܫܶܐ‎	žäbaše	žäbaše
ܙܱ̰ܒܰܫܶܐ‎	žäbaše	žäbaše
ܦ݁ܠܰܢ‎	plan	plan
ܒ݂ܝܠܠܰܐ‎	villa	villa
ܐܳܢܳܐ‎	ono	ono
ܗܰܬܘ‎	hatu	hatu
ܐܝـܝܰܪ‎	iyar	iyar
ܐܰܘ‎	aw	aw
”ܝܐ.“‎	“yo.”	“yo.”
ܒܷܬ݂ܷܪ‎	bëṯër	bëṯër
ܒܷܬܷ݂ܪ‎	bëṯër	bëṯër
ܐܰܘܪܘܦ݁ܰܐ‎	awrupa	awrupa
ܐܰܘܪܘܦܰ݁ܐ‎	awrupa	awrupa
(ܣܘܪܝܳܝܐ)‎	(suryoyo)	(suryoyo)
ܣܘܪܝܳܝܐ܆‎	suryoyo;	suryoyo;
ܡܳܪܝ‎	mor	mor
ܕܡܳܪܝ‎	dmor	dmor
ܛܒܷܥܥܶܗ‎	ṭbëceh	ṭbëceh
ܦܬܷܚܚܶܗ‎	ftëḥeh	ftëḥeh
ܘܡܰܠܘܰܫܫܶܗ‎	umalwašeh	umalwašeh
ܝܘܠܦܳܢܰܬ݂ܬ݂ܶܗ‎	yulfonaṯeh	yulfonaṯeh
ܡܰܠܰܟ݂ܟ݂ܶܗ‎	malaxeh	malaxeh
ܡ̈ܶܠܐ‎	melo	melo

local export = {}

local U = mw.ustring.char
local U = mw.ustring.char
local rsub = mw.ustring.gsub

local rbasa_below = U(0x737)
local pthaha_below = U(0x731)
local rbasa = U(0x736)
local zqapha = U(0x733)
local pthaha = U(0x730)
local vowel_diacritics_capture = "([" .. rbasa_below .. pthaha_below .. rbasa .. zqapha .. pthaha .. "])"

-- we declare consonants representing vowels (matres lectionis) as constants to mitigate differences in how mixing
-- right-to-left and left-to-right characters in the same line appears in an IDE vs wiktionary.  Since matres is used in
-- concatenation via the .. operator,  "ܘ" .. "ܐ" on wiktionary would render as "ܐ" .. "ܘ" in an IDE
local alaph = U(0x710)
local waw = U(0x718)
local yudh = U(0x71D)

local combining_diaeresis = U(0x308)
local combining_tilde_below = U(0x330)
local qushshaya = U(0x741)
local rukkakha = U(0x742)

local tt_transpose_punc = {
	-- left/right single/double quotes
	["“"] = "”",
	["”"] = "“",
	["‘"] = "’",
	["’"] = "‘",
	["؟"] = "?", -- question mark
	["«"] = '“', -- quotation mark
	["»"] = '”', -- quotation mark
	["،"] = ",", -- comma
	["؛"] = ";", -- semicolon
	-- skewed colons from https://r12a.github.io/scripts/syrc/tru.html#phrase
	["܇"] = ',',
	["܆"] = ';'
}

local tt_transpose_punc_keys = ''
for key, _ in pairs(tt_transpose_punc) do tt_transpose_punc_keys = tt_transpose_punc_keys .. key end

local fix = {
	{ vowel_diacritics_capture .. qushshaya, qushshaya .. "%1" },
	{ vowel_diacritics_capture .. rukkakha, rukkakha .. "%1" },
	{ vowel_diacritics_capture .. combining_tilde_below, combining_tilde_below .. "%1" },
	-- partition punctuation marks so "starts with" and "ends with" substitutions work
	{"([".. tt_transpose_punc_keys .. "()!.:\"'])", "#%1#"},
}

local tt = {
	["ܦ"] = "f", ["ܒ"] = "b", ["ܬ"] = "t", ["ܛ"] = "ṭ", ["ܕ"] = "d", ["ܟ"] = "k",
	["ܓ"] = "g", ["ܩ"] = "q", ["ܔ"] = "j", ["ܣ"] = "s", ["ܨ"] = "ṣ", ["ܙ"] = "z",
	["ܫ"] = "š", ["ܚ"] = "ḥ", ["ܥ"] = "c", ["ܗ"] = "h", ["ܡ"] = "m", ["ܢ"] = "n",
	["ܪ"] = "r", ["ܠ"] = "l",
}

local tt_next = {
	[waw] = "w",
	[yudh] = "y",

	[rbasa_below] = "ë",
	[pthaha_below] = "ä",
	[rbasa] = "e",
	[zqapha] = "o",
	[pthaha] = "a",
}

local consonants = "fbtṭdkgqjsṣzšḥchmnrlvžpvṯḏxġ" .. yudh .. waw
local consonants_group = "([" .. consonants .. "])"

local special_cases = {
	-- { matching_aii_text, latin_substitution }
	--
	-- the # symbol pads the start and end of a word, consider the follow examples for matching_aii_text
	-- #float#    only float matches
	-- #float     words starting with float like float or floats match
	-- float#     words ending with float like float or afloat match
	-- float      words containing float like float, floats, afloat and refloats match
	{"ܡܳܪܝ#", "mor#"},
}

function export.tr(text, lang, sc)

	text = rsub(text, " | ", "# | #")
	text = "##" .. rsub(text, " ", "# #") .. "##"
	text = rsub(text, "ـ", "")
	text = rsub(text, combining_diaeresis, "")
	for _, sub in ipairs(fix) do text = rsub(text, unpack(sub)) end

	-- Special cases
	for _, sub in ipairs(special_cases) do text = rsub(text, unpack(sub)) end

	text = rsub(text, "ܫ" .. combining_tilde_below, "č")
	text = rsub(text, "ܙ" .. combining_tilde_below, "ž")

	text = rsub(text, "ܦ" .. qushshaya, "p")

	text = rsub(text, "ܒ" .. rukkakha, "v")
	text = rsub(text, "ܬ" .. rukkakha, "ṯ")
	text = rsub(text, "ܕ" .. rukkakha, "ḏ")
	text = rsub(text, "ܟ" .. rukkakha, "x")
	text = rsub(text, "ܓ" .. rukkakha, "ġ")

	text = rsub(text, ".", tt_transpose_punc)
	text = rsub(text, ".", tt)

	text = rsub(text, consonants_group .. waw .. consonants_group, "%1u%2")
	text = rsub(text, consonants_group .. yudh .. consonants_group, "%1i%2")
	text = rsub(text, "#" .. waw .. consonants_group, "#u%1")
	text = rsub(text, "#" .. yudh .. consonants_group, "#i%1") -- this needs a test case

	text = rsub(text, alaph .. pthaha ..  waw .. "#", "#aw")
	text = rsub(text, alaph .. pthaha .. yudh .. "#", "#ay")

	text = rsub(text, "#" .. alaph .. waw, "#u")
	text = rsub(text, "#" .. alaph .. yudh, "#i")

	text = rsub(text, waw .. "#", "u#")
	text = rsub(text, yudh .. "#", "i#")

	text = rsub(text, pthaha .. alaph .. "#", "a#")
	text = rsub(text, rbasa .. alaph .. "#", "e#")
	text = rsub(text, zqapha .. alaph .. "#", "o#")
	text = rsub(text, alaph .. "#", "o#")
	text = rsub(text, alaph, "")

	text = rsub(text, ".", tt_next)

	text = rsub(text, "cc", "c")
	text = rsub(text, "ḥḥ", "ḥ")
	text = rsub(text, "šš", "š")
	text = rsub(text, "ṯṯ", "ṯ")
	text = rsub(text, "xx", "x")

	text = rsub(text, "#", "")

	return text
end

return export