မဝ်ဂျူ:ja-pron
မံက်ပြာကတ်
Documentation for this module may be created at မဝ်ဂျူ:ja-pron/doc
local m_str_utils = require("Module:string utilities")
local concat = table.concat
local gsplit = m_str_utils.gsplit
local gsub = m_str_utils.gsub
local insert = table.insert
local len = m_str_utils.len
local match = m_str_utils.match
local min = math.min
local split = m_str_utils.split
local sub = m_str_utils.sub
local toNFC = mw.ustring.toNFC
local lang = require("Module:languages").getByCode("ja")
local kana_to_romaji = require("Module:Hrkt-translit").tr
local m_accent = require("Module:accent qualifier")
-- also [[Module:qualifier]]
local PAGENAME = mw.loadData("Module:headword/data").pagename
local range = mw.loadData("Module:ja/data/range")
local a_kana = range.vowels.a
local i_kana = range.vowels.i
local u_kana = range.vowels.u
local e_kana = range.vowels.e
local o_kana = range.vowels.o
local n_kana = range.vowels.n
local submoraic_kana = range.submoraic_kana
local export = {}
local ref_template_name_data = {
["DJR"] = "R:Daijirin",
["DJR4"] = "R:Daijirin4",
["DJS"] = "R:Daijisen",
["KDJ"] = "R:Kokugo Dai Jiten",
["NHK"] = "R:NHK Hatsuon",
["NKD2"] = "R:Nihon Kokugo Daijiten 2 Online",
["SMK2"] = "R:Shinmeikai2",
["SMK5"] = "R:Shinmeikai5",
["SMK7"] = "R:Shinmeikai7",
["SMK8"] = "R:Shinmeikai8",
["SKK8"] = "R:Sankoku8",
["ZAJ"] = "R:Zenkoku Akusento Jiten",
["JEL"] = "R:Kenkyusha JEL Pocket",
["JAC"] = "R:ja:JAccent",
}
local function add_acc_refs(frame, text)
local output = {}
for ref_name in gsplit(text, ",") do
mw.log(ref_name)
local ref_template_name = ref_template_name_data[ref_name]
if ref_template_name then
insert(output, frame:extensionTag("ref", "{{" .. ref_template_name .. "}}", {name = ref_name}))
elseif match(ref_name, "ref") then
insert(output, frame:preprocess(ref_name))
else
-- [[Special:WhatLinksHere/Wiktionary:Tracking/ja-pron/unrecognized ref]]
require("Module:debug").track("ja-pron/unrecognized ref")
end
end
return concat(output)
end
function export.show(frame)
local params = {
[1] = {default = PAGENAME, list = true},
["accent"] = {list = true},
["accent\1_loc"] = {list = true, allow_holes = true},
["accent\1_ref"] = {list = true, allow_holes = true},
["accent\1_note"] = {list = true, allow_holes = true},
["acc"] = {alias_of = "accent", list = true},
["acc\1_loc"] = {alias_of = "accent\1_loc", list = true},
["acc\1_ref"] = {alias_of = "accent\1_ref", list = true},
["acc\1_note"] = {alias_of = "accent\1_note", list = true},
["dev"] = {},
["dev2"] = {},
["devm"] = {},
["a"] = {alias_of = "audio"},
["audio"] = {}
}
local args = require("Module:parameters").process(frame:getParent().args, params)
local au = args.audio
local dev = args.dev or args.devm
local dev2 = args.dev2
local maxindex = table.getn(args[1])
local html_list_main = mw.html.create("ul")
local categories = {}
-- Deals with the accents
local a, al, ar, an = args.accent, args.accent_loc, args.accent_ref, args.accent_note
for i, position in ipairs(a) do
local text = args[1][min(maxindex,i)]
if not al[i] then
al[i] = "[[w:Tokyo dialect|Tokyo]]"
end
local result = m_accent.format_qualifiers(lang, {al[i]}) .. " "
result = result .. export.accent(text, position, dev, dev2)
if ar[i] then
result = result .. add_acc_refs(frame, ar[i])
else
require("Module:debug").track("ja-pron/unsourced accent")
end
result = result .. (an[i] and (" " .. an[i]) or "")
html_list_main:tag("li"):wikitext(
result
)
end
-- Deals with the IPA
local m_IPA, no_acc = require("Module:IPA"), false
for i, text in ipairs(args[1]) do
html_list_main:tag("li"):wikitext(
m_IPA.format_IPA_full {
lang = lang,
items = {{ pron = "[" .. export.ipa(text, dev, dev2) .. "]" }},
}
)
if not a[i] then
no_acc = true
end
end
if no_acc then
-- insert(categories, lang:getCanonicalName() .. " terms with IPA pronunciation missing pitch accent")
end
-- Deals with the audio
if au then
html_list_main:tag("li"):wikitext(
require("Module:audio").format_audio {
lang = lang,
file = au,
}
)
end
return "\n" .. tostring(html_list_main) ..
(#categories > 0 and require("Module:utilities").format_categories(categories, lang) or "")
end
function export.ipa(text, dev, dev2)
if type(text) == "table" then
text, dev, dev2 = text.args[1], text.args["dev"], text.args["dev2"] end
dev = dev or ""
dev2 = dev2 or ""
if dev2 ~= "" then error("Please remove parameter dev2 and change parameter dev to \"dev=" .. dev .. "," .. dev2 .. "\"") end
-- Convert 〜 and 〰 to a regular ー.
text = gsub(text, "[〜〰]", "ー")
local position_mora = {}
for i = 1, len(text) do
if not match(sub(text, i, i), "[ " .. submoraic_kana .. "%.]") then
local nxt = sub(text, i + 1,i + 1)
if nxt and match(nxt, "[" .. submoraic_kana .. "]") then
insert(position_mora, i + 1)
else
insert(position_mora, i)
end
end
end
-- insert @ to stand for devoicing
if dev ~= "" then
for position in gsplit(dev, ",") do
position = tonumber(position)
if #position_mora == position then
text = text .. "@"
else
local position_devspace = position_mora[position]
text = sub(text, 1, position_devspace) .. "@" .. sub(text, position_devspace+1, -1)
end
for i = position + 1, #position_mora do
position_mora[i] = position_mora[i] + 1
end
end
end
text = toNFC(kana_to_romaji(text, "ja", nil, {keep_dot = true, disambig = true}))
text = gsub(text, "[lv'@]", {
["l"] = "r", ["v"] = "b", ["'"] = "ʔ", ["@"] = "̥"
})
-- Hyphens which have been geminated over are removed; otherwise converted to dots.
text = gsub(text, "([bcdfghjkmnprstvw])%-%1", "%1%1")
:gsub("-", ".")
text = text:gsub("([kprt])(%s*)%1", "%1̚%2%1")
:gsub("t(%s*)ch", "t̚%1ch")
:gsub("([bd])(%s*)%1", "%1̚%2%1̥")
:gsub("g(%s*)g", "g̚%1g̊")
:gsub("([jz])(%s*)%1", "d̚%2%1")
:gsub("s(%s*)sh", "ɕ%1ɕ")
text = gsub(text, "ei", "ē")
text = gsub(text, "[āēīōūfvjryz]", {
["ā"] = "aː", ["ē"] = "eː", ["ī"] = "iː", ["ō"] = "oː", ["ū"] = "uː",
["f"] = "ɸ", ["j"] = "d͡ʑ", ["r"] = "ɾ", ["y"] = "j", ["z"] = "d͡z" })
text = gsub(text, "[sct][hs]", {
["sh"] = "ɕ",
["ch"] = "t͡ɕ",
["ts"] = "t͡s" })
text = gsub(text, "([aeiouː̥])d͡([zʑ])", "%1%2")
text = gsub(text, "([bdɸgkmnpɾstz][̥̊]*)i", "%1ʲi")
text = gsub(text, "([bdɸgkmnpɾstwz][̥̊]*)j", "%1ʲ")
text = gsub(text, "([bɕdɸghjkmpɾstzʑʲ][̥̊]*)w", "%1ᵝ")
text = gsub(text, "nʲ", "ɲ̟")
text = gsub(text, "n+$", function(n)
return ("ɴ"):rep(#n)
end)
text = gsub(text, "([^ ː_nɴʔ])(ː*)ɴ", "%1̃%2ɴ")
text = gsub(text, "([^ ː_nʔ])(ː*)n([^aeou%s])", "%1̃%2n%3")
text = gsub(text, "n[n ]*[bmp]", function(m)
return m:gsub("n", "m")
end)
text = gsub(text, "n[n ]*.͡[ɕʑ]", function(m)
return m:gsub("n", "ɲ̟")
end)
text = gsub(text, "n[n ]*ɲ̟", function(m)
return m:gsub("n", "ɲ̟")
end)
text = gsub(text, "n[n ]*ɾ", function(m)
return m:gsub("n", "n̺")
end)
text = gsub(text, "_ng", "ŋ")
text = gsub(text, "(n[n ]*)([kg])([ʲᵝ]*)", function(m1, m2, m3)
return m1:gsub("n", "ŋ" .. m3) .. m2 .. m3
end)
text = gsub(text, "_nw", "nᵝ")
text = gsub(text, "n[n ]*[ɸszɕhjw]", function(m)
return m:gsub("n", "ɰ̃")
end)
text = gsub(text, "([n ]*n)[ʔ_]", function(m) -- ʔ to be removed once Hrkt-translit `disambig` flag is implemented, as ʔ will always represent a glottal
return m:gsub("n", "ɰ̃")
end)
text = gsub(text, "n[n ]* [aeiou]", function(m)
return m:gsub("n", "ɰ̃")
end)
text = gsub(text, "h[iju]", {
["hi"] = "çi", ["hj"] = "ç",
["hu"] = "ɸu"
})
text = gsub(text, "h([çɸ])", "%1%1")
for _, cons in ipairs{"ç", "ɕ", "ɸ", "h", "j", "m", "n", "ɴ", "ŋ", "ɾ", "s", "w", "z", "ʑ"} do
text = gsub(text, "(" .. cons .. ")()" .. cons .. "+()", function(cons, i, j)
return cons .. ("ː"):rep(j - i)
end)
end
for _, cons in ipairs{"n̺", "nᵝ", "ɲ̟", "ŋʲ", "ŋᵝ", "ɰ̃"} do
local char1, char2 = cons:match("(.[\128-\191]*)(.*)")
text = gsub(text, "(" .. cons .. ")()" .. char1 .. "[" .. char1 .. char2 .. "]*" .. char2 .. "()", function(cons, i, j)
return cons .. ("ː"):rep((j - i) / 2)
end)
end
text = gsub(text, "(ː+)([ʲᵝ]+)", "%2%1")
text = gsub(text, "̚(.[̥̊]*)([ʲᵝ]+)", "̚%2%1%2")
text = gsub(text, "[aeiouw]", {
["a"] = "a̠",
["e"] = "e̞",
["o"] = "o̞",
["u"] = "ɯ̟",
["w"] = "β̞"
})
text = gsub(text, "([szɕʑɲçʲ][̟̥̊]*ː*)ɯ̟", "%1ɨ")
text = gsub(text, "̠[̥̃][̥̃]", "̥̃˗")
text = gsub(text, "̞[̥̃][̥̃]", "̥̃˕")
text = gsub(text, "̟[̥̃][̥̃]", "̥̃˖")
text = gsub(text, "([̠̞̟])̥", "%1̊")
:gsub("[%._]", "")
:gsub("g", "ɡ")
return text
end
function export.rise_and_fall(word, rftype)
word = gsub(word, "([" .. o_kana .. "][゙゚]?)([うウ])", "%1.%2")
word = gsub(word, "([" .. e_kana .. "][゙゚]?)([いイ])", "%1.%2")
word = kana_to_romaji(word, "ja")
if rftype == "rise" then
word = gsub(word, ".", {
["a"] = "á", ["e"] = "é", ["i"] = "í", ["o"] = "ó", ["u"] = "ú",
["ā"] = "áá", ["ē"] = "éé", ["ī"] = "íí", ["ō"] = "óó", ["ū"] = "úú" })
word = gsub(gsub(word, "n([bcdfghjkmnprstvw%'z ])", "ń%1"), "n$", "ń")
elseif rftype == "fall" then
word = gsub(word, ".", {
["a"] = "à", ["e"] = "è", ["i"] = "ì", ["o"] = "ò", ["u"] = "ù",
["ā"] = "àà", ["ē"] = "èè", ["ī"] = "ìì", ["ō"] = "òò", ["ū"] = "ùù" })
word = gsub(gsub(word, "n([bcdfghjkmnprstvw%'z ])", "ǹ%1"), "n$", "ǹ")
else
return error("Type not recognised.")
end
return word
end
-- [[Module:ja-ojad]] and [[Module:ja-infl-demo]] rely on the output format of this function
function export.accent(text, class, dev, dev2)
local result
if(type(text)) == "table" then text, class, dev, dev2 = text.args[1], text.args[2], text.args["dev"], text.args["dev2"] end
text = gsub(text, "([" .. o_kana .. "][゙゚]?)[うウ]", "%1ー")
text = gsub(text, "([" .. e_kana .. "][゙゚]?)[いイ]", "%1ー")
text = gsub(text, "%.", "")
if dev == "" then dev = false end
if dev2 == "" then dev2 = false end
local down_first = "<span style=\"border-top:1px solid;position:relative;padding:1px;\">"
local down_last = "<span style=\"position:absolute;top:0;bottom:67%;right:0%;border-right:1px solid;\"></span></span>"
local high_first = "<span style=\"border-top:1px solid\">"
local start = "<span lang=\"ja\" class=\"Jpan\">"
local romaji_start = " <span class=\"Latn\"><samp>["
local romaji_last = "]</samp></span> "
local last = "</span>"
local position_kana = {} --position of each kana (ぁ counted), text without space
local position_mora = {} --position of each mora (ぁ not counted), text without space
local position_mora_space = {} --position of each mora (ぁ not counted), text with space
for i=1, len(text) do
if not match(sub(text,i,i), "[ " .. submoraic_kana .. "]") then
local extra = len(match(sub(text,i+1), "^[" .. submoraic_kana .. "]*"))
insert(position_mora_space, i+extra)
end
end
local space_removed = gsub(text," ","")
for i=1, len(space_removed) do
insert(position_kana, i)
if not match(sub(space_removed,i,i), "[" .. submoraic_kana .. "]") then
local extra = len(match(sub(space_removed,i+1), "^[" .. submoraic_kana .. "]*"))
insert(position_mora, i+extra)
end
end
local acc_type, acc_number
if match(class, "^[h0]$") then
acc_type, acc_number = "h", 0
elseif match(class, "^[a1]$") then
acc_type, acc_number = "a", 1
elseif match(class, "^o$") then
acc_type = "o"
acc_number = len(gsub(text, "[ " .. submoraic_kana .. "]", ""))
end
if match(class, "^[0-9]+$") and not match(class,"^[01]$") then
class = gsub(class, "[on]", "")
acc_number = tonumber(class)
local morae_count = len(gsub(text, "[ " .. submoraic_kana .. "]", ""))
if morae_count == acc_number then
acc_type = "o"
elseif morae_count < acc_number then
return error(("Mora count (%d) is smaller than position of downstep mora (%d).")
:format(morae_count, acc_number))
else
acc_type = "n"
end
elseif not acc_number then
acc_number = class
end
local start_index = 1
while match(sub(text, start_index+1, start_index+1), "[" .. submoraic_kana .. "]") do
start_index = start_index + 1
end
local kanas = {}
local single_mora
for i=1, len(text) do
if not match(sub(text,i,i), "[ " .. submoraic_kana .. "]") then
single_mora = gsub(sub(text, i, -1), "^(.[" .. submoraic_kana .. "]*).*", "%1")
insert(kanas, single_mora)
end
end
local function kana_devoice(text)
return "<span style=\"border:1px dotted gray; border-radius:50%;\">" .. text .. "</span>"
end
if dev then
for position in gsplit(dev, ",") do
position = tonumber(position)
kanas[position] = kana_devoice(kanas[position])
end
end
local romaji_text = gsub(text, "([" .. o_kana .. "][゙゚]?)ー", "%1お")
romaji_text = gsub(romaji_text, "([" .. e_kana .. "][゙゚]?)ー", "%1え")
romaji_text = gsub(romaji_text, "([" .. u_kana .. "][゙゚]?)ー", "%1う")
romaji_text = gsub(romaji_text, "([" .. i_kana .. "][゙゚]?)ー", "%1い")
romaji_text = gsub(romaji_text, "([" .. a_kana .. "][゙゚]?)ー", "%1あ")
romaji_text = gsub(romaji_text, "([" .. n_kana .. "][゙゚]?)ー", "%1%1")
local romajis = split(romaji_text, "")
local function count_nspaces(text, index)
local i, sample, nspaces = 0, "", 0
while len(sample) < index do
i = i + 1
sample, nspaces = gsub(sub(text, 1, i), " ", "")
end
return nspaces
end
local function romaji_devoice(text)
-- use @ instead of ̥
return text .. "@"
end
if dev then
for position in gsplit(dev,",") do
position = position_mora_space[tonumber(position)]
romajis[position] = romaji_devoice(romajis[position])
end
end
if acc_type == "n" then
local r_start_index = start_index + count_nspaces(romaji_text, start_index)
local r_index = position_mora_space[acc_number]
local k_index = acc_number
local r_parts = {
[1] = concat(romajis, "", 1, r_start_index),
[2] = concat(romajis, "", r_start_index + 1, r_index),
[3] = concat(romajis, "", r_index + 1, #romajis)
}
local k_parts = {
[1] = concat(kanas, "", 1, 1),
[2] = concat(kanas, "", 2, k_index),
[3] = concat(kanas, "", k_index + 1, #kanas)
}
local space2 = ""
local space3 = ""
if sub(r_parts[2], 1, 1) == " " then
space2 = " "
end
if sub(r_parts[3], 1, 1) == " " then
space3 = " "
end
result = start ..
k_parts[1] ..
down_first ..
k_parts[2] ..
down_last ..
k_parts[3] ..
last ..
romaji_start ..
export.rise_and_fall(r_parts[1], "fall") ..
space2 ..
export.rise_and_fall(r_parts[2], "rise") ..
"ꜜ" ..
space3 ..
export.rise_and_fall(r_parts[3], "fall") ..
romaji_last ..
"([[中高型|Nakadaka]] – [" .. acc_number .. "])"
else
local r_start_index = start_index + count_nspaces(romaji_text, start_index)
local r_parts = {
[1] = concat(romajis, "", 1, r_start_index),
[2] = concat(romajis, "", r_start_index + 1, #romajis)
}
local k_parts = {
[1] = concat(kanas, "", 1, 1),
[2] = concat(kanas, "", 2, #kanas)
}
local space2 = ""
if sub(r_parts[2], 1, 1) == " " then
space2 = " "
end
if acc_type == "h" then
result = start ..
k_parts[1] ..
high_first ..
k_parts[2] ..
last ..
last ..
romaji_start ..
export.rise_and_fall(r_parts[1], "fall") ..
space2 ..
export.rise_and_fall(r_parts[2], "rise") ..
romaji_last ..
"([[平板型|Heiban]] – [" .. acc_number .. "])"
elseif acc_type == "a" then
result = start ..
down_first ..
k_parts[1] ..
down_last ..
k_parts[2] ..
last ..
romaji_start ..
export.rise_and_fall(r_parts[1], "rise") ..
"ꜜ" ..
space2 ..
export.rise_and_fall(r_parts[2], "fall") ..
romaji_last ..
"([[頭高型|Atamadaka]] – [" .. acc_number .. "])"
elseif acc_type == "o" then
result = start ..
k_parts[1] ..
down_first ..
k_parts[2] ..
down_last ..
last ..
romaji_start ..
export.rise_and_fall(r_parts[1], "fall") ..
space2 ..
export.rise_and_fall(r_parts[2], "rise") ..
"ꜜ" ..
romaji_last ..
"([[尾高型|Odaka]] – [" .. acc_number .. "])"
else
return error("Accent type not recognised.")
end
end
result = gsub(result, "(.)@", "<del>%1</del>")
return result
end
return export