မဝ်ဂျူ:zh-translit
မံက်ပြာကတ်
Documentation for this module may be created at မဝ်ဂျူ:zh-translit/doc
local m_str_utils = require("Module:string utilities")
local find_templates = require("Module:template parser").find_templates
local get_section = require("Module:pages").get_section
local gsub = string.gsub
local insert = table.insert
local safe_require = require("Module:utilities").safe_require
local split = m_str_utils.split
local toNFD = mw.ustring.toNFD
local trim = m_str_utils.trim
local ugsub = m_str_utils.gsub
local ulen = m_str_utils.len
local ulower = m_str_utils.lower
local usub = m_str_utils.sub
local uupper = m_str_utils.upper
local tag
local lect_code = mw.loadData("Module:zh/data/lect codes").langcode_to_abbr
local export = {}
local function fail(lang, request)
require("Module:debug/track")("zh-translit/needs manual translit/" .. lang)
return nil
end
local function get_content(title)
local content = mw.title.new(title)
if not content then
return false
end
return get_section(content:getContent(), "Chinese", 2)
end
-- Match function for regex ",(?! )".
local function split_on_comma_without_space(str, start)
local i
repeat
i = str:find(",", start)
if not i then
return
end
start = i + 1
until str:sub(start, start) ~= " "
return i, i
end
local function handle_readings(readings, lang, tr)
if lang == "ltc" or lang == "och" then
if tr and readings ~= tr then
return false
end
return readings
elseif (
lang == "cmn" or
lang == "csp" or
lang == "wuu" or
lang == "yue" or
lang == "zhx-tai"
) then
readings = split(readings, split_on_comma_without_space, true)
else
readings = split(readings, "/", true, true)
end
local tr_orig = tr
for _, reading in ipairs(readings) do
reading = trim(reading)
if not reading:find("=") then
if (
not tr or
tr == reading or
gsub(ulower(tr), "%^", "") == reading
) then
tr = reading
elseif ulower(reading) ~= tr then
return false
end
elseif lang == "cmn" and reading == "cap=y" then
local tr_cap = "^" .. tr
if not tr_orig or tr_orig == tr_cap then
tr = tr_cap
end
end
end
return tr
end
local function iterate_content(content, lang, see, seen, tr)
for template in find_templates(content) do
local name = template:get_name()
if name == "zh-pron" then
for k, v in pairs(template:get_arguments()) do
if (
#v > 0 and
type(k) == "string" and
k == lect_code[lang]
) then
tr = handle_readings(v, lang, tr)
break
end
end
if tr == false then
return tr
end
elseif name == "zh-see" then
local arg = trim(template:get_arguments()[1])
if not seen[arg] then
insert(see, arg)
end
end
end
return tr
end
function export.tr(text, lang, sc)
if (not text) or text == "" then
return text
end
if lang == "zh" or lang == "lzh" then
lang = "cmn"
end
if not lect_code[lang] then
lang = require("Module:languages").getByCode(lang, nil, true):getFullCode()
end
local content = get_content(text)
if not content then
return fail(lang)
end
local see = {}
local seen = {
[text] = true
}
local tr = iterate_content(content, lang, see, seen)
if tr == nil then
local i, title = 1
while i <= #see do
title = see[i]
content = get_content(title)
if content then
tr = iterate_content(content, lang, see, seen, tr)
if tr == false then
return fail(lang)
end
seen[title] = true
end
i = i + 1
end
end
if not tr then
return fail(lang)
end
if lang == "cmn" then
tr = tr:gsub("#", "")
if tr:match("[\194-\244]") then
tag = tag or mw.loadData("Module:zh/data/cmn-tag").MT
tr = tr:gsub(".[\128-\191]*", function(m)
if m == "一" then
return "yī"
elseif m == "不" then
return "bù"
else
m = tag[m] and tag[m][1]
if m then
return toNFD(m):gsub("^[aeiou]", "\1%0") -- temporarily use \1 for apostrophes, as it's not in %p
end
end
end)
tr = ugsub(tr, "%f[^%z%s%p](^?)\1", "%1") -- remove any initial apostrophes inserted by the previous function
:gsub("\1", "'")
end
tr = ugsub(tr, "%^('?.)", uupper)
elseif lang == "csp" or lang == "yue" or lang == "zhx-tai" then
tr = tr:gsub("%d[%d%*%-]*%f[^%d%*]", "<sup>%0</sup>")
elseif lang == "hak" then
-- TODO
elseif lang == "ltc" or lang == "och" then
if tr == "n" then
return fail(lang)
end
local index = tr and split(tr, lang == "ltc" and "," or ";", true, true) or {}
for i = 1, ulen(text) do
local module_type = lang .. "-pron"
if lang == "och" then
module_type = module_type .. "-ZS"
end
local data_module = safe_require("Module:zh/data/" .. module_type .. "/" .. usub(text, i, i))
if not data_module or (((not index[i]) or index[i] == "y") and #data_module > 1) then
return fail(lang)
end
if index[i] == "y" then
index[i] = 1
elseif index[i] then
index[i] = tonumber(index[i])
end
index[i] = index[i] and data_module[index[i]] or data_module[1]
if lang == "ltc" then
local data = mw.loadData("Module:ltc-pron/data")
local initial, final, tone = require("Module:ltc-pron").infer_categories(index[i])
tone = tone ~= "" and ("<sup>" .. tone .. "</sup>") or tone
index[i] = data.initialConv["Zhengzhang"][initial] .. data.finalConv["Zhengzhang"][final] .. tone
else
index[i] = index[i][6]
end
end
tr = table.concat(index, " ")
if lang == "och" then
tr = "*" .. tr
end
elseif lang == "nan" then
-- TODO
elseif lang == "nan-tws" then
tr = require("Module:nan-pron").pengim_display(tr)
elseif lang == "wuu" then
local w_pron = require("Module:wuu-pron")
if tr:match(';') then
--TODO
return fail(lang)
elseif tr:match(':') then
tr = w_pron.wugniu_format(tr:sub(4))
else
tr = w_pron.wugniu_format(w_pron.wikt_to_wugniu(tr))
end
elseif lang == "zhx-sic" then
tr = ugsub(tr, "([%d-])(%a)", "%1 %2")
:gsub("%d[%d%*%-]*%f[^%d%*]", "<sup>%0</sup>")
else
tr = require("Module:" .. lang .. "-pron").rom(tr)
end
-- End with a space so that concurrent parts of running text that need to be transliterated separately (e.g. due to links) are still properly separated.
return tr .. " "
end
return export