မဝ်ဂျူ:sa-utilities/translit/post replace fix/Sinh
မံက်ပြာကတ်
Documentation for this module may be created at မဝ်ဂျူ:sa-utilities/translit/post replace fix/Sinh/doc
local U = mw.ustring.char
local virAma = U(0x0DCA)
local s1con = 'කඛගඝඞචඡජඣඤටඨඩඪණතථදධනපඵබභමයරලවළශෂසහ' -- Basic consonants
local s2con = s1con .. 'ඥඟඦඬඳඹ' -- Add in single characters composed of 2 SLP1 characters.
local repha_source = 'ර්(['..s1con..'])'
local pattern1 = '(['..s1con..'])'..virAma..'(['..s1con..'])'
local pattern2 = '(['..s2con..'])'..virAma..'(['..s2con..'])'
local liga = U(0x0DCA, 0x200D) -- 'Conjuncts' in Unicode-speak
local abut = U(0x200D, 0x0DCA) -- Make touching consonants
local conj = {
-- ['ඞග'] = 'ඟ', ['ඤජ'] = 'ඦ', ['ණඩ'] = 'ඬ', ['නද'] = 'ඳ', ['මබ'] = 'ඹ', -- prenasalised in Sinhalese
['ජඤ'] = 'ඥ', -- jñ
--Shared with Pali:
['කව'] = 'ක්ව', -- kv
['තථ'] = 'ත්ථ', -- tth
['තව'] = 'ත්ව', -- tv
['නථ'] = 'න්ථ', -- nth
['නද'] = 'න්ද', -- nd.
['නධ'] = 'න්ධ', -- ndh
['නව'] = 'න්ව', -- nv
-- Similar
['කෂ'] = 'ක්ෂ', --kṣ
['ගධ'] = 'ග්ධ', -- ɡdh
-- Using sanyaka and not listed above
['ඤච'] = 'ඤ්ච', -- ñc
['ඤඡ'] = 'ඤ්ඡ', -- ñch -- ill-supported.
['ටඨ'] = 'ට්ඨ', -- ṭṭh
-- ['දධ'] = 'ද්ධ', -- ddh -- Minority taste.
['දව'] = 'ද්ව', -- dv
}
local function select(a, b)
local merged = conj[a..b]
return merged or a..abut..b
end
local fixes = {
{'ය්ය', 'ය්ය'}, -- Pairs of ය touch Triple ය does not occur.
{virAma..'([යර])', liga..'%1'}, -- ය and ර ligate with preceding.
{repha_source, 'ර'..liga..'%1'}, -- Form repha.
{pattern1, select},
{pattern2, select},
}
return fixes