မာတိကာသို့ ခုန်သွားရန်

မဝ်ဂျူ:tru-translit

နူ ဝိက်ရှေန်နရဳ

This is the unit-testing module for Module:tru-translit.

All tests passed. (refresh)

TextExpectedActualDiffers at
testcases for tr function in Module:tru-translit:
Passedܐܰܕܡܐadmoadmo
Passedܐܰܒܪܐabroabro
Passedܐܰܕ݂ܢܐaḏnoaḏno
Passedܐܷܫܬܐëštoëšto
Passedܫܰܘܥܐšawcošawco
Passedܬܡܰܢܝܐtmanyotmanyo
Passedܬܷܫܥܐtëšcotëšco
Passedܚܰܡܫܐḥamšoḥamšo
Passedܐܰܪܒܥܐarbcoarbco
Passedܬܠܳܬ݂ܐtloṯotloṯo
Passedܬܪܶܐtretre
Passedܚܰܐḥaḥa
Passedܬܡܝܢܳܝܐtminoyotminoyo
Passedܕܰܬ ܬܡܰܢܝܐdat tmanyodat tmanyo
Passedܬܡܳܢܰܥܣܰܪtmonacsartmonacsar
Passedܐܰܕܥܰܨܪܝـܝܶܐadcaṣriyeadcaṣriye
Passedܐܰܕܠܰܠܝܐadlalyoadlalyo
Passedܐܰܕܨܰܦܪܐadṣafroadṣafro
Passedܐܰܕܝܰܘܡܰܐadyawmaadyawma
Passedܐܰܬ݂ܡܷܠaṯmëlaṯmël
Passedܐܰܬ݂ܝܳܢܐaṯyonoaṯyono
Passedܐܰܙܙܝazziazzi
Passedܒܰܛܝܠܶܐ ܢܶܐbaṭile nebaṭile ne
Passedܒܢܳܝܐbnoyobnoyo
Passedܕܰܪܓ݂ܶܐdarġedarġe
Passedܕܘܥܪܝܢܰܐducrinaducrina
Passedܕܘܪܳܫܶܐ ܓܘܫܡܳܢܳܝܶܐduroše gušmonoyeduroše gušmonoye
Passedܦܰܠܩܐfalqofalqo
Passedܠܰܫܰܢlašanlašan
Passedܡܶܐ ܙܰܒܢܐ ܠܙܰܒܢܐme zabno lzabnome zabno lzabno
Passedܩܷܛܪܐqëṭroqëṭro
Passedܣܚܳܝܐsḥoyosḥoyo
Passedܬܰܡܐtamotamo
Passedܘܰܥܕܐwacdowacdo
Passedܙܰܒܢܐzabnozabno
Passedܢܚܝܪܐnḥironḥiro
Passedܢܳܫܐnošonošo
Passedܪܝܫܐrišorišo
Passedܫܰܒܬ݂ܐšabṯošabṯo
Passedܐܘܥܕܐucdoucdo
Passedܘܳܠܝܬ݂ܐwoliṯowoliṯo
Passedܙܥܘܪܐzcurozcuro
Passedܙܥܘܪܬܐzcurtozcurto
Passedܕܰܫܷܫܬܐdašëštodašëšto
Passedܥܷܢܘܶܐcënwecënwe
Passedܨܷܪܬܐṣërtoṣërto
Passedܘܰܟ݂ܰܡwaxamwaxam
Passedܙܷܒܕܐzëbdozëbdo
Passedܐܰܝ ܝܰܘܡܰܢܝay yawmaniay yawmani
Passedܫ̰ܰܢܛܰܐčanṭačanṭa
Passedܙ̰ܱܒܰܫܶܐžäbašežäbaše
Passedܙܱ̰ܒܰܫܶܐžäbašežäbaše
Passedܦ݁ܠܰܢplanplan
Passedܒ݂ܝܠܠܰܐvillavilla
Passedܐܳܢܳܐonoono
Passedܗܰܬܘhatuhatu
Passedܐܝـܝܰܪiyariyar
Passedܐܰܘawaw
Passed”ܝܐ.““yo.”“yo.”
Passedܒܷܬ݂ܷܪbëṯërbëṯër
Passedܒܷܬܷ݂ܪbëṯërbëṯër
Passedܐܰܘܪܘܦ݁ܰܐawrupaawrupa
Passedܐܰܘܪܘܦܰ݁ܐawrupaawrupa
Passed(ܣܘܪܝܳܝܐ)(suryoyo)(suryoyo)
Passedܣܘܪܝܳܝܐ܆suryoyo;suryoyo;
Passedܡܳܪܝmormor
Passedܕܡܳܪܝdmordmor
Passedܛܒܷܥܥܶܗṭbëcehṭbëceh
Passedܦܬܷܚܚܶܗftëḥehftëḥeh
Passedܘܡܰܠܘܰܫܫܶܗumalwašehumalwašeh
Passedܝܘܠܦܳܢܰܬ݂ܬ݂ܶܗyulfonaṯehyulfonaṯeh
Passedܡܰܠܰܟ݂ܟ݂ܶܗmalaxehmalaxeh
Passedܡ̈ܶܠܐmelomelo

local export = {}

local U = mw.ustring.char
local U = mw.ustring.char
local rsub = mw.ustring.gsub

local rbasa_below = U(0x737)
local pthaha_below = U(0x731)
local rbasa = U(0x736)
local zqapha = U(0x733)
local pthaha = U(0x730)
local vowel_diacritics_capture = "([" .. rbasa_below .. pthaha_below .. rbasa .. zqapha .. pthaha .. "])"

-- we declare consonants representing vowels (matres lectionis) as constants to mitigate differences in how mixing
-- right-to-left and left-to-right characters in the same line appears in an IDE vs wiktionary.  Since matres is used in
-- concatenation via the .. operator,  "ܘ" .. "ܐ" on wiktionary would render as "ܐ" .. "ܘ" in an IDE
local alaph = U(0x710)
local waw = U(0x718)
local yudh = U(0x71D)

local combining_diaeresis = U(0x308)
local combining_tilde_below = U(0x330)
local qushshaya = U(0x741)
local rukkakha = U(0x742)

local tt_transpose_punc = {
	-- left/right single/double quotes
	["“"] = "”",
	["”"] = "“",
	["‘"] = "’",
	["’"] = "‘",
	["؟"] = "?", -- question mark
	["«"] = '“', -- quotation mark
	["»"] = '”', -- quotation mark
	["،"] = ",", -- comma
	["؛"] = ";", -- semicolon
	-- skewed colons from https://r12a.github.io/scripts/syrc/tru.html#phrase
	["܇"] = ',',
	["܆"] = ';'
}

local tt_transpose_punc_keys = ''
for key, _ in pairs(tt_transpose_punc) do tt_transpose_punc_keys = tt_transpose_punc_keys .. key end

local fix = {
	{ vowel_diacritics_capture .. qushshaya, qushshaya .. "%1" },
	{ vowel_diacritics_capture .. rukkakha, rukkakha .. "%1" },
	{ vowel_diacritics_capture .. combining_tilde_below, combining_tilde_below .. "%1" },
	-- partition punctuation marks so "starts with" and "ends with" substitutions work
	{"([".. tt_transpose_punc_keys .. "()!.:\"'])", "#%1#"},
}

local tt = {
	["ܦ"] = "f", ["ܒ"] = "b", ["ܬ"] = "t", ["ܛ"] = "ṭ", ["ܕ"] = "d", ["ܟ"] = "k",
	["ܓ"] = "g", ["ܩ"] = "q", ["ܔ"] = "j", ["ܣ"] = "s", ["ܨ"] = "ṣ", ["ܙ"] = "z",
	["ܫ"] = "š", ["ܚ"] = "ḥ", ["ܥ"] = "c", ["ܗ"] = "h", ["ܡ"] = "m", ["ܢ"] = "n",
	["ܪ"] = "r", ["ܠ"] = "l",
}

local tt_next = {
	[waw] = "w",
	[yudh] = "y",

	[rbasa_below] = "ë",
	[pthaha_below] = "ä",
	[rbasa] = "e",
	[zqapha] = "o",
	[pthaha] = "a",
}

local consonants = "fbtṭdkgqjsṣzšḥchmnrlvžpvṯḏxġ" .. yudh .. waw
local consonants_group = "([" .. consonants .. "])"

local special_cases = {
	-- { matching_aii_text, latin_substitution }
	--
	-- the # symbol pads the start and end of a word, consider the follow examples for matching_aii_text
	-- #float#    only float matches
	-- #float     words starting with float like float or floats match
	-- float#     words ending with float like float or afloat match
	-- float      words containing float like float, floats, afloat and refloats match
	{"ܡܳܪܝ#", "mor#"},
}

function export.tr(text, lang, sc)

	text = rsub(text, " | ", "# | #")
	text = "##" .. rsub(text, " ", "# #") .. "##"
	text = rsub(text, "ـ", "")
	text = rsub(text, combining_diaeresis, "")
	for _, sub in ipairs(fix) do text = rsub(text, unpack(sub)) end

	-- Special cases
	for _, sub in ipairs(special_cases) do text = rsub(text, unpack(sub)) end

	text = rsub(text, "ܫ" .. combining_tilde_below, "č")
	text = rsub(text, "ܙ" .. combining_tilde_below, "ž")

	text = rsub(text, "ܦ" .. qushshaya, "p")

	text = rsub(text, "ܒ" .. rukkakha, "v")
	text = rsub(text, "ܬ" .. rukkakha, "ṯ")
	text = rsub(text, "ܕ" .. rukkakha, "ḏ")
	text = rsub(text, "ܟ" .. rukkakha, "x")
	text = rsub(text, "ܓ" .. rukkakha, "ġ")

	text = rsub(text, ".", tt_transpose_punc)
	text = rsub(text, ".", tt)

	text = rsub(text, consonants_group .. waw .. consonants_group, "%1u%2")
	text = rsub(text, consonants_group .. yudh .. consonants_group, "%1i%2")
	text = rsub(text, "#" .. waw .. consonants_group, "#u%1")
	text = rsub(text, "#" .. yudh .. consonants_group, "#i%1") -- this needs a test case

	text = rsub(text, alaph .. pthaha ..  waw .. "#", "#aw")
	text = rsub(text, alaph .. pthaha .. yudh .. "#", "#ay")

	text = rsub(text, "#" .. alaph .. waw, "#u")
	text = rsub(text, "#" .. alaph .. yudh, "#i")

	text = rsub(text, waw .. "#", "u#")
	text = rsub(text, yudh .. "#", "i#")

	text = rsub(text, pthaha .. alaph .. "#", "a#")
	text = rsub(text, rbasa .. alaph .. "#", "e#")
	text = rsub(text, zqapha .. alaph .. "#", "o#")
	text = rsub(text, alaph .. "#", "o#")
	text = rsub(text, alaph, "")

	text = rsub(text, ".", tt_next)

	text = rsub(text, "cc", "c")
	text = rsub(text, "ḥḥ", "ḥ")
	text = rsub(text, "šš", "š")
	text = rsub(text, "ṯṯ", "ṯ")
	text = rsub(text, "xx", "x")

	text = rsub(text, "#", "")

	return text
end

return export