မဝ်ဂျူ:tl-utilities
မံက်ပြာကတ်
Documentation for this module may be created at မဝ်ဂျူ:tl-utilities/doc
local export = {}
local m_str_utils = require("Module:string utilities")
local m_table = require("Module:table")
local baybayin_encode_module = "Module:tl-bay_sc"
local lang = require("Module:languages").getByCode("tl")
local sc_Tglg = require("Module:scripts").getByCode("Tglg")
local rfind = m_str_utils.find
local rmatch = m_str_utils.match
local rsubn = m_str_utils.gsub
local rsplit = m_str_utils.split
local toNFC = mw.ustring.toNFC
local toNFD = mw.ustring.toNFD
local trim = mw.text.trim
local u = m_str_utils.char
local ulen = m_str_utils.len
local ulower = m_str_utils.lower
local uupper = m_str_utils.upper
local AC = u(0x0301) -- acute = ́
local GR = u(0x0300) -- grave = ̀
local CFLEX = u(0x0302) -- circumflex = ̂
local TILDE = u(0x0303) -- tilde = ̃
local DIA = u(0x0308) -- diaeresis = ̈
local MACRON = u(0x0304) -- macron = ̄
local DOTOVER = u(0x0307) -- dot over = ̇
local vowel = "aeëəiou" -- vowel
local V = "[" .. vowel .. "]"
local NV = "[^" .. vowel .. "]"
local accent = AC .. GR .. CFLEX .. MACRON
local accent_c = "[" .. accent .. "]"
local ipa_stress = "ˈˌ"
local ipa_stress_c = "[" .. ipa_stress .. "]"
local separator = accent .. ipa_stress .. "# .-"
local C = "[^" .. vowel .. separator .. "]" -- consonant
-- version of rsubn() that discards all but the first return value
local function rsub(term, foo, bar)
local retval = rsubn(term, foo, bar)
return retval
end
-- apply rsub() repeatedly until no change
local function rsub_repeatedly(term, foo, bar)
while true do
local new_term = rsub(term, foo, bar)
if new_term == term then
return term
end
term = new_term
end
end
local function decompose(text, recompose_e_dia)
-- decompose everything but ñ and ü
text = toNFD(text)
text = rsub(text, ".[" .. TILDE .. DIA .. "]", {
["n" .. TILDE] = "ñ",
["N" .. TILDE] = "Ñ",
["u" .. DIA] = "ü",
["U" .. DIA] = "Ü",
})
if recompose_e_dia then
text = rsub(text, ".[" .. DIA .. "]", {
["e" .. DIA] = "ë",
["E" .. DIA] = "Ë",
})
end
return text
end
-- Fix capitalization but considers syllable breaks
local function fix_capitalization(input, caps_map)
local syllbreak = 0
local text = ulower(input)
local syllbreak_chars = ".7"
for i=1, #text do
local text_pre = text:sub(1, i-1)
local text_current = text:sub(i,i)
local text_post = text:sub(i+1)
local caps_current = caps_map:sub(i-syllbreak, i-syllbreak)
if rfind(text_current, "[|" .. syllbreak_chars .. "]") and not rfind(caps_current, "[" .. syllbreak_chars .. "]")then
syllbreak = syllbreak + 1
elseif uupper(text_current) == caps_current then
text = table.concat({text_pre, uupper(text_current), text_post})
end
end
return text
end
function export.remove_accents(str)
str = decompose(str, "recompose e-dia")
str = rsub(str, "(.)" .. accent_c, "%1")
return str
end
--Cleanup Baybayin inputs--
function export.decode_baybayin(text)
local text = rsub(text, "[ᜀ-ᜟ᜵᜶]+", function(baybayin)
result = lang:transliterate(baybayin, sc_Tglg)
result = rsub(result, "([aeiou])([aeiou])", "%1-%2")
result = rsub(result, "%-", "7")
result = rsub(result, "([aeiou])", "%1" .. MACRON) -- No way to know stress in Baybayin. Disable for now.
return result
end)
return text
end
-- "Align" syllabified respelling `syllab` to original spelling `spelling` by matching character-by-character, allowing
-- for extra syllable and accent markers in the syllabification and certain mismatches in the consonants. The goal is to
-- produce the appropriately syllabified version of the original spelling (the pagename) by matching characters in the
-- syllabified respelling to the original spelling, putting the syllable boundaries in the appropriate places in the
-- original spelling. As an example, given syllabified respelling 'a.ma.7ín' and original spelling 'amain', we would
-- like to produce 'a.ma.in'.
--
-- If we encounter an extra syllable marker (.), we allow and keep it. If we encounter an extra accent marker in thes
-- syllabification, we drop it. We allow for mismatches in capitalization and for certain other mismatches, e.g. extra
-- glottal stops (written 7), h in respelling vs. g or j in the original, etc. If we can't match, we return nil
-- indicating the alignment failed.
function export.align_syllabification_to_spelling(syllab, spelling)
local result = {}
local function concat_result()
-- Postprocess to remove dots (syllable boundaries) next to hyphens.
return (toNFC(table.concat(result)):gsub("%.%-", "-"):gsub("%-%.", "-"))
end
-- Remove glottal stop (7) from respelling to simplify the code below, because it's never found in the original
-- spelling. (FIXME: We should do the same for diacritics, but they're currently removed earlier, in
-- syllabify_from_spelling(). We should probably get rid of the removal there and put it here.)
syllab = decompose(syllab:gsub("ː", ""), "recompose e-dia"):gsub("7", "")
spelling = decompose(spelling, "recompose e-dia")
local syll_chars = rsplit(ulower(syllab), "")
local spelling_chars = rsplit(spelling, "")
local i = 1
local j = 1
local function matches(uci, ucj)
-- Return true if a syllabified respelling character (uci) matches the corresponding spelling char (ucj).
-- Both uci and ucj should be lowercase.
-- Sound is at the key, values are the letters sound can match
local matching_chars = {
["e"] = {"i"},
["ë"] = {"a", "e", "o", "u"},
["h"] = {"g", "j", "x"},
["i"] = {"e"},
["j"] = {"g"},
["k"] = {"c", "j"},
["o"] = {"u"},
["s"] = {"j", "c", "x"},
["u"] = {"o"},
["w"] = {"u", "o"},
["y"] = {"i"}
}
return uci == ucj or (matching_chars[uci] and m_table.contains(matching_chars[uci], ucj))
end
local function silent_spelling_letter(ucj)
return ucj == "h" or ucj == "'" or ucj == "-"
end
local function syll_at(pos)
return syll_chars[pos] or ""
end
local function spell_at(pos)
return spelling_chars[pos] or ""
end
local function uspell_at(pos)
local c = spelling_chars[pos]
return c and ulower(c) or ""
end
while i <= #syll_chars or j <= #spelling_chars do
local uci = syll_at(i)
local cj = spell_at(j)
local ucj = uspell_at(j)
if uci == "g" and syll_at(i - 1) == "n" and syll_at(i + 1) == "." and matches(syll_at(i + 2), ucj) and
not matches(syll_at(i + 2), uspell_at(j + 1)) then
-- As a special case, before checking whether the corresponding characters match, we have to skip an extra
-- g in an -ng- sequence in the syllabified respelling if the corresponding spelling character matches the
-- next respelling character (taking into account the syllable boundary). This is so that e.g.
-- syll='ba.rang.gay' matches spelling='barangay'. Otherwise we will match the first respelling g against
-- the spelling g and the second respelling g won't match. A similar case occurs with
-- syll='E.vang.he.lis.ta' and spelling='Evangelista'. But we need an extra condition to not do this hack
-- when syll='ba.rang.gay' matches spelling='baranggay'.
i = i + 1
elseif uci == "g" and ucj == "g" and uspell_at(j + 1) == TILDE then
table.insert(result, cj)
table.insert(result, uspell_at(j + 1))
i = i + 1
j = j + 2
elseif matches(uci, ucj) then
table.insert(result, cj)
i = i + 1
j = j + 1
elseif ucj == uspell_at(j - 1) and uci == "." and ucj ~= syll_at(i + 1) then
-- See below. We want to allow for a doubled letter in spelling that is pronounced single, and preserve the
-- doubled letter. But it's tricky in the presence of syllable boundaries on both sides of the doubled
-- letter as well as doubled letters pronounced double. Specifically, there are three possibilities,
-- exemplified by:
-- (1) syll='Mal.lig', spelling='Mallig' -> 'Mal.lig';
-- (2) syll='Ma.lig', spelling='Mallig' -> 'Ma.llig';
-- (3) syll='Wil.iam', spelling='William' -> 'Will.iam'.
-- If we copy the dot first, we get (1) and (2) right but not (3).
-- If we copy the double letter first, we get (2) and (3) right but not (1).
-- We choose to copy the dot first except in the situation exemplified by (3), where we copy the doubled
-- letter first. The condition above handles (3) (the doubled letter matches against a dot) while not
-- interfering with (1) (where the doubled letter also matches against a dot but the next letter in the
-- syllabification is the same as the doubled letter, because the doubled letter is pronounced double).
table.insert(result, cj)
j = j + 1
elseif silent_spelling_letter(ucj) and uci == "." and ucj ~= syll_at(i + 1) and
not rfind(uspell_at(j + 1), V) then
-- See below for silent h or apostrophe in spelling. This condition is parallel to the one directly above
-- for silent doubled letters in spelling and handles the case of syllab='Abduramán', spelling='Abdurahman',
-- which should be syllabified 'Ab.du.rah.man'. But we need a check to see that the next spelling character
-- isn't a vowel, because in that case we want the silent letter to go after the period, e.g.
-- syllab='Jumu7á', spelling='Jumu'ah' -> 'Ju.mu.'ah' (the 7 is removed above).
table.insert(result, cj)
j = j + 1
elseif uci == "." then
table.insert(result, uci)
i = i + 1
elseif ucj == uspell_at(j - 1) then
-- A doubled letter in spelling that is pronounced single. Examples:
-- * syllab='Ma.líg', spelling='Mallig' -> 'Ma.llig' (with l)
-- * syllab='Lu.il.yér', spelling='Lhuillier' -> 'Lhu.ill.ier' (with l; a more complex example)
-- * syllab='a.sa.la.mu a.lai.kum', spelling='assalamu alaikum' -> 'as.sa.la.mu a.lai.kum' (with s)
-- * syllab='Jé.fer.son', spelling='Jefferson' -> 'Je.ffer.son' (with f)
-- * syllab='Je.ma', spelling='Gemma' -> 'Ge.mma' (with m)
-- * syllab='Ha.na', spelling='Hannah' -> 'Ha.nnah' (with n)
-- * syllab='A.by', spelling='Abby' -> 'A.bby' (with b)
-- * syllab='Ka.ba', spelling='Kaaba' -> 'Kaa.ba' (with a)
-- * syllab='Fu.ji', spelling='Fujii' -> 'Fu.jii' (with i)
table.insert(result, cj)
j = j + 1
elseif silent_spelling_letter(ucj) then
-- A silent h, apostrophe or hyphen in spelling. Examples:
-- * syllab='adán', spelling='adhan' -> 'a.dhan'
-- * syllab='Atanasya', spelling='Athanasia' -> 'A.tha.nas.ia'
-- * syllab='Cýntiya', spelling='Cynthia' -> 'Cyn.thi.a'
-- * syllab='Ermóhenes', spelling='Hermogenes' -> 'Her.mo.ge.nes'
-- * syllab='Abduramán', spelling='Abdurahman' -> 'Ab.du.rah.man'
-- * syllab='Jumu7á', spelling='Jumu'ah' -> 'Ju.mu.'ah'
-- * syllab='pag7ibig', spelling='pag-ibig' -> 'pag-i.big'
table.insert(result, cj)
j = j + 1
elseif uci == AC or uci == GR or uci == CFLEX or uci == DIA or uci == TILDE or uci == MACRON or
uci == "y" or uci == "w" then
-- skip character
i = i + 1
else
-- non-matching character
mw.log(("Syllabification alignment mismatch for pagename '%s' (position %s, character %s), syllabified respelling '%s' (position %s, character %s), aligned result so far '%s'"
):format(spelling, j, ucj, syllab, i, uci, concat_result()))
return nil
end
end
if i <= #syll_chars or j <= #spelling_chars then
-- left-over characters on one side or the other
mw.log(("Syllabification alignment mismatch for pagename '%s' (%s), syllabified respelling '%s' (%s), aligned result so far '%s'"
):format(
spelling, j > #spelling_chars and "end of string" or ("position %s, character %s"):format(j, uspell_at(j)),
syllab, i > #syll_chars and "end of string" or ("position %s, character %s"):format(i, syll_at(i)),
concat_result()))
return nil
end
return concat_result()
end
function export.has_baybayin(text)
return text:match("[ᜀ-ᜟ]")
end
function export.syllabify_from_spelling(text, pagename)
-- Auto syllabifications start --
local vowel = vowel .. "ẃý" -- vowel
local V = "[" .. vowel .. "]"
local NV = "[^" .. vowel .. "]"
local C = "[^" .. vowel .. separator .."]" -- consonant
-- canonicalize multiple spaces and remove leading and trailing spaces
local function canon_spaces(text)
text = rsub(text, "%s+", " ")
text = rsub(text, "^ ", "")
text = rsub(text, " $", "")
return text
end
text = trim(text)
text = canon_spaces(text)
text = rsub(text, "[ᜀ-ᜟ]+", function(baybayin)
return "<᜶" .. export.decode_baybayin(baybayin) .. "᜶>"
end)
text = decompose(text, "recompose e-dia")
local origtext = text
text = string.lower(text)
text = rsub(text, "[.] ", "․ ")
text = rsub(text, "[.]$", "․")
-- put # at word beginning and end and double ## at text/foot boundary beginning/end
text = rsub(text, " | ", "# | #")
text = "##" .. rsub(text, " ", "# #") .. "##"
text = rsub_repeatedly(text, "([.]?)#([.]?)", "#")
text = rsub(text, "ng̃", "ŋ")
text = rsub(text, "ng", "ŋ")
text = rsub(text, "g̃", "ġ")
text = rsub(text, "ch", "ĉ")
text = rsub(text, "t_s", "ć")
text = rsub(text, "sh", "ʃ")
text = rsub(text, "gu([eëiy])", "ǵ%1")
text = rsub(text, "qu([eëiy])", "ḱ%1")
text = rsub(text, "r", "ɾ")
text = rsub(text, "ɾɾ", "r")
text = rsub(text, "ʔ", "7")
text = rsub_repeatedly(text, "#(" .. C .. "+)u([aeio])","#%1u.%2")
text = rsub_repeatedly(text, "#(" .. C .. "+)i([aeou])","#%1i.%2")
text = rsub_repeatedly(text, "(" .. C .. ")u([aeio])","#%1.u%2")
text = rsub_repeatedly(text, "(" .. C .. ")i([aeou])","#%1.i%2")
text = rsub_repeatedly(text, "(" .. V .. accent_c .. "*)u([aeio])","%1.u%2")
text = rsub_repeatedly(text, "(" .. V .. accent_c .. "*)o([aei])","%1.ó%2")
text = rsub(text, "a(" .. accent_c .. "*)o([#.7])","a%1ó%2")
-- eu rules
text = rsub_repeatedly(text, "([^" .. vowel .. "#])([e])(" .. accent_c .. "?)([u])(" .. accent_c .. "?)","%1%2%3.%4%5")
text = rsub(text, "y([ˈˌ." .. accent .. "]*)([bćĉdfgǵhjĵkḱlmnɲŋpɾrsʃtvwɟzʔ#" .. vowel .. "])","ý%1%2")
text = rsub(text, "ý(" .. V .. ")", "y%1")
text = rsub(text, "w([ˈˌ]?)([bćĉdfgǵjĵkḱlmnɲŋpɾrsʃtvwɟzʔ#" .. vowel .. "])","ẃ%1%2")
text = rsub(text, "ẃ(" .. V .. ")","w%1")
text = rsub(text, "(" .. V .. ")(" .. accent_c .. "?)ẃ([bdfgǵkḱpt])([ɾr" .. vowel .. separator .."])" ,"%1%2w%3%4")
text = rsub(text, "(" .. V .. ")(" .. accent_c .. "?)ẃ([bfgǵkḱp])([l" .. vowel .. separator .."])" ,"%1%2w%3%4")
text = rsub(text, "(" .. V .. ")(" .. accent_c .. "?)ý([bdfgǵkḱpt])([ɾr" .. vowel .. separator .."])" ,"%1%2y%3%4")
text = rsub(text, "(" .. V .. ")(" .. accent_c .. "?)ý([bfgǵkḱp])([l" .. vowel .. separator .."])" ,"%1%2y%3%4")
text = rsub_repeatedly(text, "(" .. V .. accent_c .. "*)(" .. C .. V .. ")", "%1.%2")
-- "mb", "mp", "nd", "nk", "nt" combinations
text = rsub_repeatedly(text, "(m)([bp])([^lɾrɟy" .. vowel .. separator .."])(" .. V .. ")", "%1%2.%3%4")
text = rsub_repeatedly(text, "(n)([dk])([^lɾrɟy" .. vowel .. separator .. "])(" .. V .. ")", "%1%2.%3%4")
text = rsub_repeatedly(text, "(n)([s])([^ɟy" .. vowel .. separator .. "])(" .. V .. ")", "%1%2.%3%4")
text = rsub_repeatedly(text, "(n)([t])([^lɾrɟys" .. vowel .. separator .. "])(" .. V .. ")", "%1%2.%3%4")
text = rsub_repeatedly(text, "(ŋ)([k])([^lɾrɟy" .. vowel .. separator .. "])(" .. V .. ")", "%1%2.%3%4")
text = rsub_repeatedly(text, "([ɾr])([bćĉdfgǵkḱlmnpsʃvz])([^lɾrɟy" .. vowel .. separator .. "])(" .. V .. ")", "%1%2.%3%4")
text = rsub_repeatedly(text, "([ɾr])([t])([sz]?)([^lɾrɟysʃ" .. vowel .. separator .. "])(" .. V .. ")", "%1%2%3.%4%5")
text = rsub_repeatedly(text, "(s)([ktp])([^lɾrwɟy" .. vowel .. separator .. "])(" .. V .. ")", "%1%2.%3%4")
text = rsub_repeatedly(text, "(" .. V .. accent_c .. "*" .. C .. ")(" .. C .. V .. ")", "%1.%2")
text = rsub_repeatedly(text, "(" .. V .. accent_c .. "*" .. C .. "+)(" .. C .. C .. V .. ")", "%1.%2")
text = rsub_repeatedly(text, "(" .. C .. ")%.s(" .. C .. ")", "%1s.%2")
-- Any aeëo, or stressed iu, should be syllabically divided from a following aeëo or stressed iu.
text = rsub_repeatedly(text, "([aeëo]" .. accent_c .. "*)([aeëo])", "%1.%2")
text = rsub_repeatedly(text, "([aeëo]" .. accent_c .. "*)(" .. V .. accent_c .. ")", "%1.%2")
text = rsub(text, "([iu]" .. accent_c .. ")([aeëo])", "%1.%2")
text = rsub_repeatedly(text, "([iu]" .. accent_c .. ")(" .. V .. accent_c .. ")", "%1.%2")
text = rsub_repeatedly(text, "i(" .. accent_c .. "*)i", "i%1.i")
text = rsub_repeatedly(text, "u(" .. accent_c .. "*)u", "u%1.u")
text = rsub(text, "ĉ", "ch")
text = rsub(text, "ć", "ts")
text = rsub(text, "ŋ", "ng")
text = rsub(text, "ʃ", "sh")
text = rsub(text, "ǵ.([ei])", "g.u%1")
text = rsub(text, "ǵ", "gu")
text = rsub(text, "ġ", "g̃")
text = rsub(text, "ḱ", "qu")
text = rsub(text, "r", "rr")
text = rsub(text, "ɾ", "r")
text = rsub_repeatedly(text, "([.]+)", ".")
text = rsub(text, "[.]?-[.]?", "-")
text = rsub(text, "[‿]([^ ])", "|%1")
text = rsub(text, "[.]([^ ])", "|%1")
text = rsub(text, "([|])+", "%1")
-- remove # symbols at word and text boundaries
text = rsub_repeatedly(text, "([.]?)#([.]?)", "")
text = rsub(text, "․", ".")
text = rsub(text, "ẃ", "w")
text = rsub(text, "ý", "y")
-- Fix Capitalization --
text = fix_capitalization(text, origtext)
-- Fix hyphens --
-- FIXME!!! Why are we relying on looking at the pagename here? This should not be happening.
origtext = pagename
if (table.concat(rsplit(origtext, "-")) == table.concat(rsplit(table.concat(rsplit(text, "|")), "-"))) then
syllbreak = 0
for i=1, #text do
if text:sub(i,i) == "|" then
if origtext:sub(i-syllbreak, i-syllbreak) == "-" then
text = table.concat({text:sub(1, i-1), "-", text:sub(i+1)})
else
syllbreak = syllbreak + 1
end
end
end
end
-- Reencode Baybayin
text = rsub(text, "[<][᜶]([^᜶]+)[᜶][>]", function(baybayin)
baybayin = baybayin:gsub("|", "/"):gsub("7", "")
local result = require(baybayin_encode_module).transcribe(baybayin:gsub("|", "/"), false, false, false)
result = rsub(result, " ᜵ ", "|")
return result
end)
-- FIXME! Hack -- up above we changed periods to vertical bars. The rest of the code expects periods so change
-- them back. We should clean up the code above to leave the periods alone.
return (text:gsub("|", "%."))
end
function export.syllabify_and_align(respelling, pagename)
if pagename == nil then
pagename = respelling
end
local syllabification = export.syllabify_from_spelling(respelling, pagename)
return export.align_syllabification_to_spelling(syllabification, pagename)
end
local function nasal_adjust(text1, text2, assimilation)
local t1 = text1
t1 = rsub(t1, "ng([- ]*)$", "ŋ%1")
t1 = rsub(t1, "m([- ]*)$", "ṃ")
t1 = rsub(t1, "n([- ]*)$", "ṇ")
local result = t1 .. text2
if assimilation == "partial" then
result = rsub(result, "[ŋṇ]([- ]*)([bp])", "m%1%2")
result = rsub(result, "[ŋ]([- ]*)([dlnst])", "n%1%2")
result = rsub(result, "[ṇ]([- ]*)([kgʔ])", "ŋ%1%2")
elseif assimilation == "total" then
result = rsub(result, "[ŋṇṃ][- ]*([bp])(" .. V .. ")%1([lr]?)%2(" .. NV .. "+)(" .. V .. ")", "m%2m%3%2%4%5")
result = rsub(result, "[ŋṇṃ][- ]*([bp])([lr]?)(" .. V .. ")%1%2%3(" .. NV .. "+)(" .. V .. ")", "m%2%3m%2%3%4%5")
result = rsub(result, "[ŋṇ][- ]*([dnst])(" .. V .. ")%1([lr]?)%2(" .. NV .. "+)(" .. V .. ")", "n%2n%3%2%4%5")
result = rsub(result, "[ŋṇ][- ]*([dnst])([lr]?)(" .. V .. ")%1%2%3(" .. NV .. "+)(" .. V .. ")", "n%2%3n%2%3%4%5")
result = rsub(result, "[ŋṇ][- ]*([kgʔ])(" .. V .. ")%1([lr]?)%2(" .. NV .. "+)(" .. V .. ")", "ŋ%2ŋ%3%2%4%5")
result = rsub(result, "[ŋṇ][- ]*([kgʔ])([lr]?)(" .. V .. ")%1%2%3(" .. NV .. "+)(" .. V .. ")", "ŋ%2%3ŋ%2%3%4%5")
result = rsub(result, "[ŋṇṃ][- ]*([bp])", "m")
result = rsub(result, "[ŋṇ][- ]*([dnst])", "n")
result = rsub(result, "[ŋṇ][- ]*([kgʔ])", "ŋ")
result = rsub(result, "[ŋṇ]([- ]*)([l])", "n%1%2")
end
result = rsub(result, "ŋ", "ng")
result = rsub(result, "ṃ", "m")
result = rsub(result, "ṇ", "n")
return result
end
local function add_prefix(root, prefix, assimilation, add_hyphen)
local hyphen = ''
local result = root
local root_vowel_start = rfind(ulower(result), "^(" .. V .. ")")
local affix_consonant_end = rmatch(prefix, C .. "$")
if root_vowel_start then
result = "ʔ" .. result
result = rsub(result, "^ʔ(.)%1", "ʔ%1ʔ%1")
end
if add_hyphen or (root_vowel_start and affix_consonant_end) then
hyphen = '-'
end
prefix = prefix .. hyphen
result = nasal_adjust(prefix, result, assimilation)
result = rsub(result, "[-]+", "-")
return result
end
-- TODO
-- Prefix -- DONE
-- Consonant cluster cases
-- Infix
-- Suffix + changing spellings
-- Circumfix
-- By word affixation
-- Hyphen addition
-- Nasal assimilation
-- Syllabify
-- Reduplication
-- Pronunciation doesn't match spelling of root, provide phonetic spellings
-- D/R change
-- double o or uo
-- Metathesis (nl, w, y)
-- Diacritics (optional)
-- Syncope
-- Baybayin?
function export.add_affix(root, affix, assimilation, wordct, add_hyphen, metathesis, syllabify)
local affix_type = ""
local new_affix = affix
local has_beginning_hyphen = rfind(affix, "^-")
local has_ending_hyphen = rfind(affix, "-$")
if has_beginning_hyphen and has_ending_hyphen then
affix_type = "infix"
elseif has_beginning_hyphen then
affix_type = "suffix"
elseif has_ending_hyphen then
affix_type = "prefix"
end
if has_beginning_hyphen then
new_affix = rsub(new_affix, "^-", "")
end
if has_ending_hyphen then
new_affix = rsub(new_affix, "-$", "")
end
local word_idx = 1
local wordct = wordct
if wordct == nil then
wordct = 0
end
local words = rsplit(root, " ")
for i=1, #words do
local hyph_words = rsplit(words[i], "-")
for j=1, #hyph_words do
local affix_action = nil
if affix_type == "prefix" and
((wordct == 0 and word_idx == 1) or word_idx == wordct) then
affix_action = add_prefix
end
if affix_action then
hyph_words[j] = affix_action(hyph_words[j], new_affix, assimilation, add_hyphen)
end
hyph_words[j] = rsub(hyph_words[j], "ʔ", "")
word_idx = word_idx + 1
end
words[i] = table.concat(hyph_words, '-')
end
words = table.concat(words, " ")
return words
end
return export