မဝ်ဂျူ:tru-translit
မံက်ပြာကတ်
This is the unit-testing module for Module:tru-translit.
All tests passed. (refresh)
| Text | Expected | Actual | Differs at | |
|---|---|---|---|---|
| ܐܰܕܡܐ | admo | admo | ||
| ܐܰܒܪܐ | abro | abro | ||
| ܐܰܕ݂ܢܐ | aḏno | aḏno | ||
| ܐܷܫܬܐ | ëšto | ëšto | ||
| ܫܰܘܥܐ | šawco | šawco | ||
| ܬܡܰܢܝܐ | tmanyo | tmanyo | ||
| ܬܷܫܥܐ | tëšco | tëšco | ||
| ܚܰܡܫܐ | ḥamšo | ḥamšo | ||
| ܐܰܪܒܥܐ | arbco | arbco | ||
| ܬܠܳܬ݂ܐ | tloṯo | tloṯo | ||
| ܬܪܶܐ | tre | tre | ||
| ܚܰܐ | ḥa | ḥa | ||
| ܬܡܝܢܳܝܐ | tminoyo | tminoyo | ||
| ܕܰܬ ܬܡܰܢܝܐ | dat tmanyo | dat tmanyo | ||
| ܬܡܳܢܰܥܣܰܪ | tmonacsar | tmonacsar | ||
| ܐܰܕܥܰܨܪܝـܝܶܐ | adcaṣriye | adcaṣriye | ||
| ܐܰܕܠܰܠܝܐ | adlalyo | adlalyo | ||
| ܐܰܕܨܰܦܪܐ | adṣafro | adṣafro | ||
| ܐܰܕܝܰܘܡܰܐ | adyawma | adyawma | ||
| ܐܰܬ݂ܡܷܠ | aṯmël | aṯmël | ||
| ܐܰܬ݂ܝܳܢܐ | aṯyono | aṯyono | ||
| ܐܰܙܙܝ | azzi | azzi | ||
| ܒܰܛܝܠܶܐ ܢܶܐ | baṭile ne | baṭile ne | ||
| ܒܢܳܝܐ | bnoyo | bnoyo | ||
| ܕܰܪܓ݂ܶܐ | darġe | darġe | ||
| ܕܘܥܪܝܢܰܐ | ducrina | ducrina | ||
| ܕܘܪܳܫܶܐ ܓܘܫܡܳܢܳܝܶܐ | duroše gušmonoye | duroše gušmonoye | ||
| ܦܰܠܩܐ | falqo | falqo | ||
| ܠܰܫܰܢ | lašan | lašan | ||
| ܡܶܐ ܙܰܒܢܐ ܠܙܰܒܢܐ | me zabno lzabno | me zabno lzabno | ||
| ܩܷܛܪܐ | qëṭro | qëṭro | ||
| ܣܚܳܝܐ | sḥoyo | sḥoyo | ||
| ܬܰܡܐ | tamo | tamo | ||
| ܘܰܥܕܐ | wacdo | wacdo | ||
| ܙܰܒܢܐ | zabno | zabno | ||
| ܢܚܝܪܐ | nḥiro | nḥiro | ||
| ܢܳܫܐ | nošo | nošo | ||
| ܪܝܫܐ | rišo | rišo | ||
| ܫܰܒܬ݂ܐ | šabṯo | šabṯo | ||
| ܐܘܥܕܐ | ucdo | ucdo | ||
| ܘܳܠܝܬ݂ܐ | woliṯo | woliṯo | ||
| ܙܥܘܪܐ | zcuro | zcuro | ||
| ܙܥܘܪܬܐ | zcurto | zcurto | ||
| ܕܰܫܷܫܬܐ | dašëšto | dašëšto | ||
| ܥܷܢܘܶܐ | cënwe | cënwe | ||
| ܨܷܪܬܐ | ṣërto | ṣërto | ||
| ܘܰܟ݂ܰܡ | waxam | waxam | ||
| ܙܷܒܕܐ | zëbdo | zëbdo | ||
| ܐܰܝ ܝܰܘܡܰܢܝ | ay yawmani | ay yawmani | ||
| ܫ̰ܰܢܛܰܐ | čanṭa | čanṭa | ||
| ܙ̰ܱܒܰܫܶܐ | žäbaše | žäbaše | ||
| ܙܱ̰ܒܰܫܶܐ | žäbaše | žäbaše | ||
| ܦ݁ܠܰܢ | plan | plan | ||
| ܒ݂ܝܠܠܰܐ | villa | villa | ||
| ܐܳܢܳܐ | ono | ono | ||
| ܗܰܬܘ | hatu | hatu | ||
| ܐܝـܝܰܪ | iyar | iyar | ||
| ܐܰܘ | aw | aw | ||
| ”ܝܐ.“ | “yo.” | “yo.” | ||
| ܒܷܬ݂ܷܪ | bëṯër | bëṯër | ||
| ܒܷܬܷ݂ܪ | bëṯër | bëṯër | ||
| ܐܰܘܪܘܦ݁ܰܐ | awrupa | awrupa | ||
| ܐܰܘܪܘܦܰ݁ܐ | awrupa | awrupa | ||
| (ܣܘܪܝܳܝܐ) | (suryoyo) | (suryoyo) | ||
| ܣܘܪܝܳܝܐ܆ | suryoyo; | suryoyo; | ||
| ܡܳܪܝ | mor | mor | ||
| ܕܡܳܪܝ | dmor | dmor | ||
| ܛܒܷܥܥܶܗ | ṭbëceh | ṭbëceh | ||
| ܦܬܷܚܚܶܗ | ftëḥeh | ftëḥeh | ||
| ܘܡܰܠܘܰܫܫܶܗ | umalwašeh | umalwašeh | ||
| ܝܘܠܦܳܢܰܬ݂ܬ݂ܶܗ | yulfonaṯeh | yulfonaṯeh | ||
| ܡܰܠܰܟ݂ܟ݂ܶܗ | malaxeh | malaxeh | ||
| ܡ̈ܶܠܐ | melo | melo |
local export = {}
local U = mw.ustring.char
local U = mw.ustring.char
local rsub = mw.ustring.gsub
local rbasa_below = U(0x737)
local pthaha_below = U(0x731)
local rbasa = U(0x736)
local zqapha = U(0x733)
local pthaha = U(0x730)
local vowel_diacritics_capture = "([" .. rbasa_below .. pthaha_below .. rbasa .. zqapha .. pthaha .. "])"
-- we declare consonants representing vowels (matres lectionis) as constants to mitigate differences in how mixing
-- right-to-left and left-to-right characters in the same line appears in an IDE vs wiktionary. Since matres is used in
-- concatenation via the .. operator, "ܘ" .. "ܐ" on wiktionary would render as "ܐ" .. "ܘ" in an IDE
local alaph = U(0x710)
local waw = U(0x718)
local yudh = U(0x71D)
local combining_diaeresis = U(0x308)
local combining_tilde_below = U(0x330)
local qushshaya = U(0x741)
local rukkakha = U(0x742)
local tt_transpose_punc = {
-- left/right single/double quotes
["“"] = "”",
["”"] = "“",
["‘"] = "’",
["’"] = "‘",
["؟"] = "?", -- question mark
["«"] = '“', -- quotation mark
["»"] = '”', -- quotation mark
["،"] = ",", -- comma
["؛"] = ";", -- semicolon
-- skewed colons from https://r12a.github.io/scripts/syrc/tru.html#phrase
["܇"] = ',',
["܆"] = ';'
}
local tt_transpose_punc_keys = ''
for key, _ in pairs(tt_transpose_punc) do tt_transpose_punc_keys = tt_transpose_punc_keys .. key end
local fix = {
{ vowel_diacritics_capture .. qushshaya, qushshaya .. "%1" },
{ vowel_diacritics_capture .. rukkakha, rukkakha .. "%1" },
{ vowel_diacritics_capture .. combining_tilde_below, combining_tilde_below .. "%1" },
-- partition punctuation marks so "starts with" and "ends with" substitutions work
{"([".. tt_transpose_punc_keys .. "()!.:\"'])", "#%1#"},
}
local tt = {
["ܦ"] = "f", ["ܒ"] = "b", ["ܬ"] = "t", ["ܛ"] = "ṭ", ["ܕ"] = "d", ["ܟ"] = "k",
["ܓ"] = "g", ["ܩ"] = "q", ["ܔ"] = "j", ["ܣ"] = "s", ["ܨ"] = "ṣ", ["ܙ"] = "z",
["ܫ"] = "š", ["ܚ"] = "ḥ", ["ܥ"] = "c", ["ܗ"] = "h", ["ܡ"] = "m", ["ܢ"] = "n",
["ܪ"] = "r", ["ܠ"] = "l",
}
local tt_next = {
[waw] = "w",
[yudh] = "y",
[rbasa_below] = "ë",
[pthaha_below] = "ä",
[rbasa] = "e",
[zqapha] = "o",
[pthaha] = "a",
}
local consonants = "fbtṭdkgqjsṣzšḥchmnrlvžpvṯḏxġ" .. yudh .. waw
local consonants_group = "([" .. consonants .. "])"
local special_cases = {
-- { matching_aii_text, latin_substitution }
--
-- the # symbol pads the start and end of a word, consider the follow examples for matching_aii_text
-- #float# only float matches
-- #float words starting with float like float or floats match
-- float# words ending with float like float or afloat match
-- float words containing float like float, floats, afloat and refloats match
{"ܡܳܪܝ#", "mor#"},
}
function export.tr(text, lang, sc)
text = rsub(text, " | ", "# | #")
text = "##" .. rsub(text, " ", "# #") .. "##"
text = rsub(text, "ـ", "")
text = rsub(text, combining_diaeresis, "")
for _, sub in ipairs(fix) do text = rsub(text, unpack(sub)) end
-- Special cases
for _, sub in ipairs(special_cases) do text = rsub(text, unpack(sub)) end
text = rsub(text, "ܫ" .. combining_tilde_below, "č")
text = rsub(text, "ܙ" .. combining_tilde_below, "ž")
text = rsub(text, "ܦ" .. qushshaya, "p")
text = rsub(text, "ܒ" .. rukkakha, "v")
text = rsub(text, "ܬ" .. rukkakha, "ṯ")
text = rsub(text, "ܕ" .. rukkakha, "ḏ")
text = rsub(text, "ܟ" .. rukkakha, "x")
text = rsub(text, "ܓ" .. rukkakha, "ġ")
text = rsub(text, ".", tt_transpose_punc)
text = rsub(text, ".", tt)
text = rsub(text, consonants_group .. waw .. consonants_group, "%1u%2")
text = rsub(text, consonants_group .. yudh .. consonants_group, "%1i%2")
text = rsub(text, "#" .. waw .. consonants_group, "#u%1")
text = rsub(text, "#" .. yudh .. consonants_group, "#i%1") -- this needs a test case
text = rsub(text, alaph .. pthaha .. waw .. "#", "#aw")
text = rsub(text, alaph .. pthaha .. yudh .. "#", "#ay")
text = rsub(text, "#" .. alaph .. waw, "#u")
text = rsub(text, "#" .. alaph .. yudh, "#i")
text = rsub(text, waw .. "#", "u#")
text = rsub(text, yudh .. "#", "i#")
text = rsub(text, pthaha .. alaph .. "#", "a#")
text = rsub(text, rbasa .. alaph .. "#", "e#")
text = rsub(text, zqapha .. alaph .. "#", "o#")
text = rsub(text, alaph .. "#", "o#")
text = rsub(text, alaph, "")
text = rsub(text, ".", tt_next)
text = rsub(text, "cc", "c")
text = rsub(text, "ḥḥ", "ḥ")
text = rsub(text, "šš", "š")
text = rsub(text, "ṯṯ", "ṯ")
text = rsub(text, "xx", "x")
text = rsub(text, "#", "")
return text
end
return export