မဝ်ဂျူ:ain-pron
မံက်ပြာကတ်
Documentation for this module may be created at မဝ်ဂျူ:ain-pron/doc
local export = {}
-- TODO: x /x/ ?
local CONSONANTS = {
"p", "t", "c", "k",
"m", "n", "s", "h",
"w", "r", "y", "'"
}
local VOWELS = {
"a", "i", "u", "e", "o",
"á", "í", "ú", "é", "ó",
}
local CONVERSION_TABLE = {
-- Consonants
["p"] = "p", ["t"] = "t", ["c"] = "t͡ʃ", ["k"] = "k",
["m"] = "m", ["n"] = "n", ["s"] = "s", ["h"] = "h",
["w"] = "w", ["r"] = "ɾ", ["y"] = "j", ["'"] = "",
-- Vowels
["a"] = "a", ["i"] = "i", ["u"] = "u", ["e"] = "e", ["o"] = "o",
["á"] = "á", ["í"] = "í", ["ú"] = "ú", ["é"] = "é", ["ó"] = "ó"
}
-- Append Glottal Stop ruunpe -> ruʔunpe / -> teʔeta
local CONVERSION_TABLE_PHONETIC = {
-- Consonants
["p"] = "p", ["t"] = "t", ["c"] = "t͡ʃ", ["k"] = "k",
["m"] = "m", ["n"] = "n", ["s"] = "s", ["h"] = "h",
["w"] = "w", ["r"] = "ɾ", ["y"] = "j", ["'"] = "ʔ",
-- Vowels
["a"] = "a", ["i"] = "i", ["u"] = "u", ["e"] = "e", ["o"] = "o",
["á"] = "á", ["í"] = "í", ["ú"] = "ú", ["é"] = "é", ["ó"] = "ó"
}
-- as -> aʃ ( /e.ɾa.mus.ka.ɾe/ -> /e.ɾa.mus.ka.ɾe/ [/e.ɾa.muʃ.ka.ɾe/] )
local SPECIAL_CODA = {
["s"] = "ʃ", ["p"] = "p̚", ["k"] = "k̚", ["t"] = "t̚",
}
function in_values(item, items)
for _, v in pairs(items) do
if v == item then
return true
end
end
return false
end
function in_keys(item, items)
for k, _ in pairs(items) do
if k == item then
return true
end
end
return false
end
local function convert_syllable(syllable)
-- yay > jaj
-- mur > muɾ
-- an > ʔan
-- ka > ka
local result = ""
for char in mw.ustring.gmatch(syllable, ".") do
result = result .. CONVERSION_TABLE[char]
end
return result
end
local function convert_syllable_phonetic(syllable)
local result = ""
if not in_values(mw.ustring.sub(syllable, 1, 1), CONSONANTS) then
syllable = "'" .. syllable
end
for char in mw.ustring.gmatch(syllable, ".") do
result = result .. CONVERSION_TABLE_PHONETIC[char]
end
local l_result = mw.ustring.len(result)
local coda = ""
if l_result > 1 then
coda = mw.ustring.sub(syllable, l_result, l_result)
if in_keys(coda, SPECIAL_CODA) then
coda = SPECIAL_CODA[coda]
result = mw.ustring.sub(result, 1, l_result - 1) .. coda
end
end
return result
end
-- local function print_groups(group_ids, temp)
-- local str_buffer = ""
-- for i = 1, #temp do
-- if group_ids[i] ~= nil then
-- str_buffer = str_buffer .. group_ids[i]
-- else
-- str_buffer = str_buffer .. "X"
-- end
-- end
-- print("group_indicies: " .. str_buffer)
-- print("group_contents: " .. temp)
-- end
local function do_convert(romanized)
local group_ids = {}
-- Count syllables by vowels and save to a map with onset and nucleus marked
local syllable_count = 1
local i = 1
for char in mw.ustring.gmatch(romanized, ".") do
-- print("Current Char (No. " .. tostring(i) .. "): " .. char)
if in_values(char, VOWELS) then
-- print("-- Current Vowel: " .. char)
-- print("-- Char Before: " .. mw.ustring.sub(romanized, i - 1, i - 1))
if in_values(mw.ustring.sub(romanized, i - 1, i - 1), CONSONANTS) then
-- print("---- Char Before is Consonant")
group_ids[i - 1] = syllable_count
end
group_ids[i] = syllable_count
syllable_count = syllable_count + 1
end
i = i + 1
end
-- print_groups(group_ids, romanized)
-- Fill codas
local i = 1
for char in mw.ustring.gmatch(romanized, ".") do
if group_ids[i] == nil then
group_ids[i] = group_ids[i - 1]
end
i = i + 1
end
-- print_groups(group_ids, romanized)
local result = {}
local i = 1
local current_group_id = 1
local head = 1
local tail = 1
local content = ""
while i < mw.ustring.len(romanized) do
-- print("group_id " .. tostring(group_ids[i]) .. " " .. tostring(mw.ustring.sub(romanized, i, i)))
if group_ids[i] ~= current_group_id then
current_group_id = group_ids[i]
tail = i - 1
content = mw.ustring.sub(romanized, head, tail)
-- print(convert_syllable(content))
table.insert(result, convert_syllable(content))
head = i
end
i = i + 1
end
content = mw.ustring.sub(romanized, head, mw.ustring.len(romanized))
table.insert(result, convert_syllable(content))
return table.concat(result, ".")
end
local function convert_phonetic(romanized)
local group_ids = {}
-- Count syllables by vowels and save to a map with onset and nucleus marked
local syllable_count = 1
local i = 1
for char in mw.ustring.gmatch(romanized, ".") do
if in_values(char, VOWELS) then
if in_values(mw.ustring.sub(romanized, i - 1, i - 1), CONSONANTS) then
-- Char Before is Consonant
group_ids[i - 1] = syllable_count
end
group_ids[i] = syllable_count
syllable_count = syllable_count + 1
end
i = i + 1
end
-- Fill codas
local i = 1
for char in mw.ustring.gmatch(romanized, ".") do
if group_ids[i] == nil then
group_ids[i] = group_ids[i - 1]
end
i = i + 1
end
local result = {}
local i = 1
local current_group_id = 1
local head = 1
local tail = 1
local content = ""
while i < mw.ustring.len(romanized) do
if group_ids[i] ~= current_group_id then
current_group_id = group_ids[i]
tail = i - 1
content = mw.ustring.sub(romanized, head, tail)
table.insert(result, convert_syllable_phonetic(content))
head = i
end
i = i + 1
end
content = mw.ustring.sub(romanized, head, mw.ustring.len(romanized))
table.insert(result, convert_syllable_phonetic(content))
local result = table.concat(result, ".")
-- TODO: Phonetic Transcription: konkane /kon.ka.ne/ [koŋ.ɡa.ne] / ʔ
result = mw.ustring.gsub(result, "n%.k", "ŋ.k")
result = mw.ustring.gsub(result, "m%.k", "ŋ.k")
result = mw.ustring.gsub(result, "si", "ʃi")
return result
end
-- local function valid_ainu_word(word)
-- -- TODO:
-- end
function export.ain_IPA(frame)
local params = {
[1] = {list = true, allow_holes = true}
}
local err = nil
local args = require("Module:parameters").process(frame:getParent().args, params)
local items = {}
-- FIXME: IPA(?): /hi.oj.oj/, [hi.oj.oj] → IPA(?): /hi.oj.oj/ [hi.oj.oj]
for i = 1, math.max(args[1].maxindex, 1) do
-- TODO: error("") if not valid Ainu word
local romanized = args[1][i]
if not romanized or romanized == "" then
romanized = mw.title.getCurrentTitle().text
end
-- Normalize
-- # Lower
romanized = mw.ustring.lower(romanized)
-- # Clear special characters such as "-", ".", "="
romanized = mw.ustring.gsub(romanized, "[^%u%l']", "")
local phonemic = do_convert(romanized)
local phonetic = convert_phonetic(romanized)
table.insert(items, {pron = "/" .. phonemic .. "/"})
if phonemic ~= phonetic then
table.insert(items, {pron = "[" .. phonetic .. "]"})
end
-- for word in mw.text.gsplit(args[1][i], " ") do
-- table.insert(items, {pron = "/" .. do_convert(word) .. "/"})
-- end
end
-- end
local lang = require("Module:languages").getByCode("ain")
return require("Module:IPA").format_IPA_full(lang, items, err)
end
function export.convert(frame)
-- Get Args
local temp = frame.args[1]
return do_convert(temp)
end
return export