မာတိကာသို့ ခုန်သွားရန်

မဝ်ဂျူ:Tibt-sortkey

နူ ဝိက်ရှေန်နရဳ

This module will sort text in the အက္ခရ်တိဗိတ်. It is used to sort အဒပ်, ဗဝ်လ်တဳ, ဂါဟရဳ, Brokkat, Chocangaca, Idu, Changthang, ဂါတ်လေဝ် ကူတာန် , Bumthangkha, Kalaktang Monpa, Ladakhi, Lunanakha, Layakha, Nyenkha, Manangba, တိဗိတ်တြေံ, ချာလဳ, ချာန်လာ, တဝါန် မန်ပါ, တိဗိတ်ဝၚ်ဂန္ထ, Khengkha, ခူတာတ်, သှေပါ, ဂျေန်ကျေန်, and Zangskari. The module should preferably not be called directly from templates or other modules. To use it from a template, use {{sortkey}}. Within a module, use Module:languages#Language:makeSortKey.

For testcases, see မဝ်ဂျူ:Tibt-sortkey/testcases.

Functions

makeSortKey(text, lang, sc)
Generates a sortkey for a given piece of text written in the script specified by the code sc, and language specified by the code lang.
When the sort fails, returns nil.

local export = {}

local m_str_utils = require("Module:string utilities")

local gsub = m_str_utils.gsub
local len = m_str_utils.len
local match = m_str_utils.match
local sub = m_str_utils.sub
local toNFC = mw.ustring.toNFC
local u = m_str_utils.char

local Tibt = require("Module:Tibt-common")
local a, b, c, d = u(0xE000), u(0xE001), u(0xE002), u(0xE003)

local letters = {
	{"ཀ", "ྐ"}, {"ཀ༹", "ྐ༹", ""}, {"ཫ", "ཫ"}, {"ཫ༹", "ཫ༹", ""}, {"ཁ", "ྑ"}, {"ཁ༹", "ྑ༹", ""}, {"ག", "ྒ"}, {"ག༹", "ྒ༹", ""}, {"ང", "ྔ"}, {"ང༹", "ྔ༹", ""}, {"ཅ", "ྕ"}, {"ཆ", "ྖ"}, {"ཇ", "ྗ"}, {"ཉ", "ྙ"}, {"ཉ༹", "ྙ༹", ""}, {"ཊ", "ྚ"}, {"ཊ༹", "ྚ༹", ""}, {"ཋ", "ྛ"}, {"ཋ༹", "ྛ༹", ""}, {"ཌ", "ྜ"}, {"ཌ༹", "ྜ༹", ""}, {"ཎ", "ྞ"}, {"ཎ༹", "ྞ༹", ""}, {"ཏ", "ྟ"}, {"ཏ༹", "ྟ༹", ""}, {"ཐ", "ྠ"}, {"ཐ༹", "ྠ༹", ""}, {"ད", "ྡ"}, {"ད༹", "ྡ༹", ""}, {"ན", "ྣ"}, {"ན༹", "ྣ༹", ""}, {"པ", "ྤ"}, {"པ༹", "ྤ༹", ""}, {"ཕ", "ྥ"}, {"ཕ༹", "ྥ༹", ""}, {"བ", "ྦ"}, {"བ༹", "ྦ༹", ""}, {"མ", "ྨ"}, {"མ༹", "ྨ༹", ""}, {"ཙ", "ྩ"}, {"ཚ", "ྪ"}, {"ཛ", "ྫ"}, {"ཝ", "ྭ"}, {"ཝ༹", "ྭ༹", ""}, {"ཞ", "ྮ"}, {"ཞ༹", "ྮ༹", ""}, {"ཟ", "ྯ"}, {"ཟ༹", "ྯ༹", ""}, {"འ", "ྰ"}, {"འ༹", "ྰ༹", ""}, {"ཡ", "ྱ"}, {"ཡ༹", "ྱ༹", ""}, {"ར", "ྲ"}, {"ར༹", "ྲ༹", ""}, {"ཬ", "ཬ"}, {"ཬ༹", "ཬ༹", ""}, {"ལ", "ླ"}, {"ལ༹", "ླ༹", ""}, {"ཤ", "ྴ"}, {"ཤ༹", "ྴ༹", ""}, {"ཥ", "ྵ"}, {"ཥ༹", "ྵ༹", ""}, {"ས", "ྶ"}, {"ས༹", "ྶ༹", ""}, {"ཧ", "ྷ"}, {"ཧ༹", "ྷ༹", ""}, {"ཨ", "ྸ"}, {"ཨ༹", "ྸ༹", ""}, {"ཱ", "ཱ"}, {"ི", "ི"}, {u(0xF73), "ཱི"}, {"ུ", "ུ"}, {u(0xF75), "ཱུ"}, {u(0xF76), "ྲྀ"}, {u(0xF77), "ྲཱྀ"}, {u(0xF78), "ླྀ"}, {u(0xF79), "ླཱྀ"}, {"ེ", "ེ"}, {"ཻ", "ཻ"}, {"ོ", "ོ"}, {"ཽ", "ཽ"}
}

local function findAffixes(text, mainStack)
	return (gsub(text, "(.*)" .. mainStack .. ".*", "%1")), (gsub(text, ".*" .. mainStack .. "(.*)", "%1"))
end

local function findVowel(mainStack)
	return (gsub(mainStack, "[ཱ-ཽྀ]+", "")), match(mainStack, "[ཱ-ཽྀ]+") or ""
end

local function mainStackParts(mainStack)
	local superjoined = match(mainStack, "(ར)[ྐྒྔྗྙྟྡྣྦྨྩྫ]") or match(mainStack, "(ལ)[ྐྒྔྕྗྟྡྤྦྷ]") or match(mainStack, "(ས)[ྐྒྔྙྟྡྣྤྦྨྩ]") or ""
	if (superjoined == "ར" and match(mainStack, "ར[^ྐྒྨ]ྱ")) or (superjoined == "ས" and (match(mainStack, "ས[^ྐྒྤྦྨ]ྱ") or match(mainStack, "ས[^ྐྒྣྤྦྨ]ྲ"))) then
		superjoined = ""
	end
	local radical = match(mainStack, "^" .. superjoined .. "(.)")
	local subjoined = match(mainStack, "^" .. superjoined .. radical .. "(.*)")
	for _, letter in ipairs(letters) do
		radical = gsub(radical, letter[2], letter[1])
	end
	return superjoined, radical, subjoined
end

local function sortRadical(radical)
	for _, letter in ipairs(letters) do
		if letter[3] then
			radical = gsub(radical, letter[3], letter[1])
		end
	end
	radical = gsub(radical, "༹", b)
	local radicalSubs = {
		["ཫ" .. b] = "ཀ" .. d, ["ཬ" .. b] = "ར" .. d
	}
	local radicalSubs2 = {
		["ཫ"] = "ཀ" .. c, ["ཬ"] = "ར" .. c
	}
	for char, replacement in pairs(radicalSubs) do
		radical = gsub(radical, char, replacement)
	end
	radical = gsub(radical, ".", radicalSubs2)
	return  (gsub(radical, "([^" .. b .. "-" .. d .. "])$", "%1" .. a))
end

-- Convert into base-6724 to reduce length.
function baseConvert(value)
	if #value%2 ~= 0 then table.insert(value, 1, 0) end
	local newValue = {}
	for i = 1, #value/2 do
		newValue[i] = u(0x4E00+(value[(i*2)-1]*(#letters+1))+value[i*2])
	end
	return table.concat(newValue)
end

local function sortValue(part, partType)
	local length
	if partType == "superjoined" or partType == "prefix" or partType == "vowel" then
		length = 1
	elseif partType == "subjoined" then
		length = 9
	elseif partType == "suffix" then
		length = 6
	end
	local partLetters = {}
	for i = 1, length do
		if len(part) >= i then
			table.insert(partLetters, sub(part, i, i))
		else
			table.insert(partLetters, "")
		end
	end
	for i, partLetter in ipairs(partLetters) do
		for j, letter in ipairs(letters) do
			if partLetter == letter[1] or partLetter == letter[2] or partLetter == letter[3] then
				partLetters[i] = j
			end
		end
		if match(tostring(partLetters[i]), "[^0-9]") or partLetters[i] == "" then
			partLetters[i] = 0
		end
	end
	return partLetters
end

local initSubs = {
	["ཪ"] = "ར", ["ྺ"] = "ྭ", ["ྻ"] = "ྱ", ["ྼ"] = "ྲ"
}

function export.makeSortKey(text, lang, sc)
	text = lang:makeEntryName(text)
	text = gsub(text, ".", initSubs)
	
	local syllables = {}
	local sort, prefix, mainStack, superjoined, radical, subjoined, vowel, suffix
	for word in Tibt.getWords(text) do
		for syllable in Tibt.getSyllables(word) do
			mainStack = Tibt.findMainStack(syllable, lang)
			
			for _, letter in ipairs(letters) do
				if letter[3] then
					syllable = gsub(syllable, letter[1], letter[3])
					syllable = gsub(syllable, letter[2], letter[3])
					mainStack = gsub(mainStack, letter[1], letter[3])
					mainStack = gsub(mainStack, letter[2], letter[3])
				end
			end
			
			for i = 42, #letters do
				syllable = gsub(syllable, letters[i][2], letters[i][1])
				mainStack = gsub(mainStack, letters[i][2], letters[i][1])
			end
			
			prefix, suffix = findAffixes(syllable, mainStack)
			mainStack, vowel = findVowel(mainStack)
			superjoined, radical, subjoined = mainStackParts(mainStack)
			
			local set1 = {table.concat(sortValue(superjoined, "superjoined")), table.concat(sortValue(prefix, "prefix"))}
			local set2 = sortValue(subjoined, "subjoined")
			table.insert(set2, table.concat(sortValue(vowel, "vowel")))
			local set3 = sortValue(suffix, "suffix")
			
			sort = sortRadical(radical) .. baseConvert(set1) .. baseConvert(set2) .. baseConvert(set3)
			
			table.insert(syllables, sort)
		end
	end
	
	text = table.concat(syllables)
	
	if match(text, ".[་༌]") or match(text, "[་༌].") then
		text = gsub(text, "[་༌]", "")
	end
	
	return toNFC(text)
end

local bo = require("Module:languages").getByCode("bo")
local function tag(text)
	return require("Module:script utilities").tag_text(text, bo)
end

function export.showSorting(frame)
	local terms = {}
	
	for _, term in ipairs(frame.args) do
		table.insert(terms, term)
	end
	
	local makeSortKey = require("Module:memoize")(export.makeSortKey)
	local function comp(term1, term2)
		return makeSortKey(term1, "bo", "Tibt") < makeSortKey(term2, "bo", "Tibt")
	end
	
	table.sort(terms, comp)
	
	for i, term in pairs(terms) do
		local sc = require("Module:scripts").getByCode("Tibt")
		local sortkey = export.makeSortKey(term, "bo", sc)
		terms[i] = "\n* " .. tag(term)
	end
	
	return table.concat(terms)
end

return export