မဝ်ဂျူ:Tibt-sortkey
မံက်ပြာကတ်
This module will sort text in the အက္ခရ်တိဗိတ်. It is used to sort အဒပ်, ဗဝ်လ်တဳ, ဂါဟရဳ, Brokkat, Chocangaca, Idu, Changthang, ဂါတ်လေဝ် ကူတာန် , Bumthangkha, Kalaktang Monpa, Ladakhi, Lunanakha, Layakha, Nyenkha, Manangba, တိဗိတ်တြေံ, ချာလဳ, ချာန်လာ, တဝါန် မန်ပါ, တိဗိတ်ဝၚ်ဂန္ထ, Khengkha, ခူတာတ်, သှေပါ, ဂျေန်ကျေန်, and Zangskari.
The module should preferably not be called directly from templates or other modules.
To use it from a template, use {{sortkey}}.
Within a module, use Module:languages#Language:makeSortKey.
For testcases, see မဝ်ဂျူ:Tibt-sortkey/testcases.
Functions
makeSortKey(text, lang, sc)- Generates a sortkey for a given piece of
textwritten in the script specified by the codesc, and language specified by the codelang. - When the sort fails, returns
nil.
local export = {}
local m_str_utils = require("Module:string utilities")
local gsub = m_str_utils.gsub
local len = m_str_utils.len
local match = m_str_utils.match
local sub = m_str_utils.sub
local toNFC = mw.ustring.toNFC
local u = m_str_utils.char
local Tibt = require("Module:Tibt-common")
local a, b, c, d = u(0xE000), u(0xE001), u(0xE002), u(0xE003)
local letters = {
{"ཀ", "ྐ"}, {"ཀ༹", "ྐ༹", ""}, {"ཫ", "ཫ"}, {"ཫ༹", "ཫ༹", ""}, {"ཁ", "ྑ"}, {"ཁ༹", "ྑ༹", ""}, {"ག", "ྒ"}, {"ག༹", "ྒ༹", ""}, {"ང", "ྔ"}, {"ང༹", "ྔ༹", ""}, {"ཅ", "ྕ"}, {"ཆ", "ྖ"}, {"ཇ", "ྗ"}, {"ཉ", "ྙ"}, {"ཉ༹", "ྙ༹", ""}, {"ཊ", "ྚ"}, {"ཊ༹", "ྚ༹", ""}, {"ཋ", "ྛ"}, {"ཋ༹", "ྛ༹", ""}, {"ཌ", "ྜ"}, {"ཌ༹", "ྜ༹", ""}, {"ཎ", "ྞ"}, {"ཎ༹", "ྞ༹", ""}, {"ཏ", "ྟ"}, {"ཏ༹", "ྟ༹", ""}, {"ཐ", "ྠ"}, {"ཐ༹", "ྠ༹", ""}, {"ད", "ྡ"}, {"ད༹", "ྡ༹", ""}, {"ན", "ྣ"}, {"ན༹", "ྣ༹", ""}, {"པ", "ྤ"}, {"པ༹", "ྤ༹", ""}, {"ཕ", "ྥ"}, {"ཕ༹", "ྥ༹", ""}, {"བ", "ྦ"}, {"བ༹", "ྦ༹", ""}, {"མ", "ྨ"}, {"མ༹", "ྨ༹", ""}, {"ཙ", "ྩ"}, {"ཚ", "ྪ"}, {"ཛ", "ྫ"}, {"ཝ", "ྭ"}, {"ཝ༹", "ྭ༹", ""}, {"ཞ", "ྮ"}, {"ཞ༹", "ྮ༹", ""}, {"ཟ", "ྯ"}, {"ཟ༹", "ྯ༹", ""}, {"འ", "ྰ"}, {"འ༹", "ྰ༹", ""}, {"ཡ", "ྱ"}, {"ཡ༹", "ྱ༹", ""}, {"ར", "ྲ"}, {"ར༹", "ྲ༹", ""}, {"ཬ", "ཬ"}, {"ཬ༹", "ཬ༹", ""}, {"ལ", "ླ"}, {"ལ༹", "ླ༹", ""}, {"ཤ", "ྴ"}, {"ཤ༹", "ྴ༹", ""}, {"ཥ", "ྵ"}, {"ཥ༹", "ྵ༹", ""}, {"ས", "ྶ"}, {"ས༹", "ྶ༹", ""}, {"ཧ", "ྷ"}, {"ཧ༹", "ྷ༹", ""}, {"ཨ", "ྸ"}, {"ཨ༹", "ྸ༹", ""}, {"ཱ", "ཱ"}, {"ི", "ི"}, {u(0xF73), "ཱི"}, {"ུ", "ུ"}, {u(0xF75), "ཱུ"}, {u(0xF76), "ྲྀ"}, {u(0xF77), "ྲཱྀ"}, {u(0xF78), "ླྀ"}, {u(0xF79), "ླཱྀ"}, {"ེ", "ེ"}, {"ཻ", "ཻ"}, {"ོ", "ོ"}, {"ཽ", "ཽ"}
}
local function findAffixes(text, mainStack)
return (gsub(text, "(.*)" .. mainStack .. ".*", "%1")), (gsub(text, ".*" .. mainStack .. "(.*)", "%1"))
end
local function findVowel(mainStack)
return (gsub(mainStack, "[ཱ-ཽྀ]+", "")), match(mainStack, "[ཱ-ཽྀ]+") or ""
end
local function mainStackParts(mainStack)
local superjoined = match(mainStack, "(ར)[ྐྒྔྗྙྟྡྣྦྨྩྫ]") or match(mainStack, "(ལ)[ྐྒྔྕྗྟྡྤྦྷ]") or match(mainStack, "(ས)[ྐྒྔྙྟྡྣྤྦྨྩ]") or ""
if (superjoined == "ར" and match(mainStack, "ར[^ྐྒྨ]ྱ")) or (superjoined == "ས" and (match(mainStack, "ས[^ྐྒྤྦྨ]ྱ") or match(mainStack, "ས[^ྐྒྣྤྦྨ]ྲ"))) then
superjoined = ""
end
local radical = match(mainStack, "^" .. superjoined .. "(.)")
local subjoined = match(mainStack, "^" .. superjoined .. radical .. "(.*)")
for _, letter in ipairs(letters) do
radical = gsub(radical, letter[2], letter[1])
end
return superjoined, radical, subjoined
end
local function sortRadical(radical)
for _, letter in ipairs(letters) do
if letter[3] then
radical = gsub(radical, letter[3], letter[1])
end
end
radical = gsub(radical, "༹", b)
local radicalSubs = {
["ཫ" .. b] = "ཀ" .. d, ["ཬ" .. b] = "ར" .. d
}
local radicalSubs2 = {
["ཫ"] = "ཀ" .. c, ["ཬ"] = "ར" .. c
}
for char, replacement in pairs(radicalSubs) do
radical = gsub(radical, char, replacement)
end
radical = gsub(radical, ".", radicalSubs2)
return (gsub(radical, "([^" .. b .. "-" .. d .. "])$", "%1" .. a))
end
-- Convert into base-6724 to reduce length.
function baseConvert(value)
if #value%2 ~= 0 then table.insert(value, 1, 0) end
local newValue = {}
for i = 1, #value/2 do
newValue[i] = u(0x4E00+(value[(i*2)-1]*(#letters+1))+value[i*2])
end
return table.concat(newValue)
end
local function sortValue(part, partType)
local length
if partType == "superjoined" or partType == "prefix" or partType == "vowel" then
length = 1
elseif partType == "subjoined" then
length = 9
elseif partType == "suffix" then
length = 6
end
local partLetters = {}
for i = 1, length do
if len(part) >= i then
table.insert(partLetters, sub(part, i, i))
else
table.insert(partLetters, "")
end
end
for i, partLetter in ipairs(partLetters) do
for j, letter in ipairs(letters) do
if partLetter == letter[1] or partLetter == letter[2] or partLetter == letter[3] then
partLetters[i] = j
end
end
if match(tostring(partLetters[i]), "[^0-9]") or partLetters[i] == "" then
partLetters[i] = 0
end
end
return partLetters
end
local initSubs = {
["ཪ"] = "ར", ["ྺ"] = "ྭ", ["ྻ"] = "ྱ", ["ྼ"] = "ྲ"
}
function export.makeSortKey(text, lang, sc)
text = lang:makeEntryName(text)
text = gsub(text, ".", initSubs)
local syllables = {}
local sort, prefix, mainStack, superjoined, radical, subjoined, vowel, suffix
for word in Tibt.getWords(text) do
for syllable in Tibt.getSyllables(word) do
mainStack = Tibt.findMainStack(syllable, lang)
for _, letter in ipairs(letters) do
if letter[3] then
syllable = gsub(syllable, letter[1], letter[3])
syllable = gsub(syllable, letter[2], letter[3])
mainStack = gsub(mainStack, letter[1], letter[3])
mainStack = gsub(mainStack, letter[2], letter[3])
end
end
for i = 42, #letters do
syllable = gsub(syllable, letters[i][2], letters[i][1])
mainStack = gsub(mainStack, letters[i][2], letters[i][1])
end
prefix, suffix = findAffixes(syllable, mainStack)
mainStack, vowel = findVowel(mainStack)
superjoined, radical, subjoined = mainStackParts(mainStack)
local set1 = {table.concat(sortValue(superjoined, "superjoined")), table.concat(sortValue(prefix, "prefix"))}
local set2 = sortValue(subjoined, "subjoined")
table.insert(set2, table.concat(sortValue(vowel, "vowel")))
local set3 = sortValue(suffix, "suffix")
sort = sortRadical(radical) .. baseConvert(set1) .. baseConvert(set2) .. baseConvert(set3)
table.insert(syllables, sort)
end
end
text = table.concat(syllables)
if match(text, ".[་༌]") or match(text, "[་༌].") then
text = gsub(text, "[་༌]", "")
end
return toNFC(text)
end
local bo = require("Module:languages").getByCode("bo")
local function tag(text)
return require("Module:script utilities").tag_text(text, bo)
end
function export.showSorting(frame)
local terms = {}
for _, term in ipairs(frame.args) do
table.insert(terms, term)
end
local makeSortKey = require("Module:memoize")(export.makeSortKey)
local function comp(term1, term2)
return makeSortKey(term1, "bo", "Tibt") < makeSortKey(term2, "bo", "Tibt")
end
table.sort(terms, comp)
for i, term in pairs(terms) do
local sc = require("Module:scripts").getByCode("Tibt")
local sortkey = export.makeSortKey(term, "bo", sc)
terms[i] = "\n* " .. tag(term)
end
return table.concat(terms)
end
return export