မဝ်ဂျူ:headword/data
မံက်ပြာကတ်
This module contains data used by Module:headword and Module:headword/templates.
The function at the bottom of the module transforms the tables, so that they contain [item1] = true, [item2] = true, ....
local headword_page_module = "Module:headword/page"
local list_to_set = require("Module:table").listToSet
local data = {}
------ 1. Lists which are converted into sets. ------
--[==[ var:
Large pages where we disable label tracking, red link checking and similar.
]==]
data.large_pages = list_to_set {
-- pages that consistently hit timeouts
"a",
-- pages that sometimes hit timeouts
"A",
"baba",
"de",
"e",
"i",
"lima",
"o",
"u",
"и",
"山",
"子",
"月",
"一",
"人",
}
--[==[ var:
Map from singular to plural, and from plural to itself, for recognized parts of speech with irregular plurals. Most of
these are invariable plurals, e.g. `kanji` is its own plural; but we also have `mora` plural `morae`.
]==]
data.irregular_plurals = list_to_set({
"cmavo",
"cmene",
"fu'ivla",
"gismu",
"Han tu",
"hanja",
"hanzi",
"jyutping",
"kana",
"kanji",
"lujvo",
"phrasebook",
"pinyin",
"rafsi",
}, function(_, item)
return item
end)
local irregular_plurals = data.irregular_plurals
-- Irregular non-zero plurals AND any regular plurals where the singular ends in "s",
-- because the module assumes that inputs ending in "s" are plurals. The singular and
-- plural both need to be added, as the module will generate a default plural if
-- the input doesn't match a key in this table.
for sg, pl in next, {
mora = "morae"
} do
irregular_plurals[sg], irregular_plurals[pl] = pl, pl
end
--[==[ var:
Recognized lemmas. If the part of speech in {{tl|head}} is set to one of these or its singular equivalent, the category
'LANG lemmas' will automatically be added. If the part of speech is not a singular or plural lemma or non-lemma form and
is not an abbreviation that expands to a recognized lemma or non-lemma form, the page will be added to various tracking
categories:
* [[Special:WhatLinksHere/Wiktionary:Tracking/headword/unrecognized pos]]
* [[Special:WhatLinksHere/Wiktionary:Tracking/headword/unrecognized pos/LANG]]
* [[Special:WhatLinksHere/Wiktionary:Tracking/headword/unrecognized pos/pos/POS]]
* [[Special:WhatLinksHere/Wiktionary:Tracking/headword/unrecognized pos/pos/POS/LANG]]
]==]
data.lemmas = list_to_set{
"အက္ခရ်ဂၠေံ",
"ဝေါဟာဂၠေံ",
"နာမဝိသေသန",
"အာက်နဝ်မာဲနဝ်",
"adpositions",
"ဝါကျ",
"အဳမိုဂျဳ",
"ကြိယာဝိသေသန",
"အဆက်စုတ်လက္ကရဴ",
"ambipositions",
"လိက်ပရေၚ်",
"circumfixes",
"ကြိယာဗီုပြၚ်သိုၚ်တၟိ",
"circumpositions",
"နာမ်ပါ်ကၞာတ်",
"သဳမာဝဝ်",
"ခၠာတ်သတေသဳမာဝဝ်",
"cmene",
"combining forms",
"သမ္ဗန္ဓ",
"counters",
"ဖျေံလဝ်သန္နိဋ္ဌာန်",
"ခရက်သမ္တီလဝ်ဓမံက်ထ္ၜးရမျာၚ်",
"digraphs",
"equative adjectives",
"fu'ivla",
"gismu",
"Han characters",
"Han tu",
"ဟာန်ဂျာ",
"ဟာန်သဳ",
"ideophones",
"idioms",
"စန်",
"initialisms",
"iteration marks",
"interfixes",
"အာမေဍိက်",
"kana",
"kanji",
"အက္ခရ်",
"ligatures",
"logograms",
"lujvo",
"morae",
"morphemes",
"non-constituents",
"နာမ်",
"ဂၞန်သၚ်္ချာ",
"သၚ်္ကေတဂၞန်သၚ်္ချာ",
"ဂၞန်သၚ်္ချာ",
"ပစ္စဲ",
"မအရေဝ်",
"ကဆံၚ်အကာဲအရာ",
"ဝါကျကဆံၚ်အကာဲအရာ",
"predicatives",
"အဆက်ဂတ",
"ဝါကျဝိဘတ်",
"ဝိဘတ်",
"ပေါရာဏာံပေါရာဒါံ",
"pronominal adverbs",
"သဗ္ဗနာမ်",
"နာမ်မကိတ်ညဳ",
"punctuation marks",
"မသ",
"တံရိုဟ်",
"တံမအရေဝ်",
"အဆက်လက္ကရဴ",
"ဝဏ္ဏ",
"သၚ်္ကေတ",
"ကြိယာ",
"ကၞာတ်အမှိက်",
}
--[==[ var:
Recognized non-lemma forms. If the part of speech in {{tl|head}} is set to one of these or its singular equivalent, the
category 'LANG non-lemma forms' will automatically be added. If the part of speech is not a singular or plural lemma or
non-lemma form and is not an abbreviation that expands to a recognized lemma or non-lemma form, the page will be added
to various tracking categories; see the documentation of `data.lemmas`.
]==]
data.nonlemmas = list_to_set{
"ဗီုပြၚ်လုပ်ကၠောန်စွံလဝ်နကဵုမစိုပ်တရဴ",
"လုပ်ကၠောန်စွံလဝ်နကဵုမစိုပ်တရဴ",
"ခ္ဍံက်လိက်ဗၠေတ်",
"နာမ်ဗီုပြၚ်သိုၚ်တၟိ",
"adjectival participles",
"ဗီုပြၚ်နာမဝိသေသန",
"နာမဝိသေသနဗီုပြၚ်ဣတ္တိလိၚ်",
"နာမဝိသေသနဗီုပြၚ်ကိုန်ဗဟုဝစ်",
"ဗီုပြၚ်ကြိယာဝိသေသန",
"adverbial participles",
"agent participles",
"article forms",
"circumfix forms",
"combined forms",
"ဗီုပြၚ်ပတုပ်ရံၚ်နာမဝိသေသန",
"နာမဝိသေသနပတုပ်ရံၚ်",
"ဗီုပြၚ်ပတုပ်ရံၚ်ကြိယာဝိသေသန",
"ကြိယာဝိသေသနပတုပ်ရံၚ်",
"ပွမထညောံ",
"contractions",
"converbs",
"determiner comparative forms",
"ဗီုပြၚ်ဖျေံလဝ်သန္နိဋ္ဌာန်",
"determiner superlative forms",
"နာမ်မလဟုတ်စှ်ေ",
"elative adjectives",
"equative adjective forms",
"equative adjectives",
"future participles",
"ဗီုပြၚ်ကြိယာမဒှ်နာမ်",
"infinitive forms",
"infinitives",
"interjection forms",
"ယျဝဳဖေန်",
"kanji readings",
"negative participles",
"nominal participles",
"သဗ္ဗနာမ်ဝိဘတ်",
"ဗီုပြၚ်ရုပ်နာမ်",
"နာမ်ဗီုပြၚ်ၜါလ္ပာ်",
"ဗီုပြၚ်နာမ်",
"ဗီုပြၚ်နာမ်ပဝ်ကာယ်လ်",
"နာမ်ဗီုပြၚ်ကိုန်ဗဟုဝစ်",
"နာမ်ဗီုပြၚ်ပိုၚ်ပြဳ",
"နာမ်ဗီုပြၚ်ကိုန်ဨကဝုစ်",
"ဗီုပြၚ်ဂၞန်သၚ်္ချာ",
"လုပ်ကၠောန်စွံလဝ်",
"ဗီုပြၚ်လုပ်ကၠောန်စွံလဝ်",
"ဗီုပြၚ်ကၞာတ်အမှိက်",
"လုပ်ကၠောန်စွံလဝ်ဟွံတဝ်စၞေဟ်",
"လုပ်ကၠောန်စွံလဝ်နကဵုအတိက်ပြဟ်ပြေဟ်",
"လုပ်ကၠောန်စွံလဝ်နကဵုအတိက်",
"ဗီုပြၚ်လုပ်ကၠောန်စွံလဝ်နကဵုအတိက်",
"လုပ်ကၠောန်စွံလဝ်ဗီုပြၚ်ဟွံတဝ်စၞေဟ်နူအတိက်",
"လုပ်ကၠောန်စွံလဝ်မက္ဍိုပ်ပေၚ်ပြဟ်ပြေဟ်",
"လုပ်ကၠောန်စွံလဝ်ဗီုပြၚ်မက္ဍိုပ်ပေၚ်",
"လုပ်ကၠောန်စွံလဝ်မက္ဍိုပ်ပေၚ်ဟွံတဝ်စၞေဟ်",
"ဖေန်အိန်",
"ကိုန်ဗဟုဝစ်",
"ဗီုပြၚ်ပသ္ၚောဲထောံ",
"ဗီုပြၚ်မုက်နာမ်",
"ဝိဘတ်ပသ္ၚောဲထောံလဝ်",
"ဗီုပြၚ်ဝိဘတ်",
"ဝိဘတ်ဗီုပြၚ်သဗ္ဗနာမ်",
"လုပ်ကၠောန်စွံလဝ်ပစ္စုပ္ပန်ပြဟ်ပြေဟ်",
"လုပ်ကၠောန်စွံလဝ်ပစ္စုပ္ပန်",
"လုပ်ကၠောန်စွံလဝ်ပစ္စုပ္ပန်ဗီုပြၚ်ဟွံတဝ်စၞေဟ်",
"ဗီုပြၚ်သဗ္ဗနာမ်",
"သဗ္ဗနာမ်ဗီုပြၚ်ပိုၚ်ပြဳ",
"ဗီုပြၚ်နာမ်မကိတ်ညဳ",
"နာမ်မကိတ်ညဳဗီုပြၚ်ကိုန်ဗဟုဝစ်",
"rafsi",
"ဗီုအက္ခရ်ရောမ",
"ဗီုပြၚ်တံရိုဟ်",
"ကိုန်ဨကဝုစ်",
"ဗီုပြၚ်အဆက်လက္ကရဴ",
"ဗီုပြၚ်သဒ္ဒာနာမဝိသေသန",
"သဒ္ဒာနာမဝိသေသန",
"ဗီုပြၚ်သဒ္ဒာကြိယာဝိသေသန",
"သဒ္ဒာကြိယာဝိသေသန",
"ဗီုပြၚ်ကြိယာ",
"နာမ်ဝါစာ",
"နာမ်အပြံၚ်အသၠာဲ",
"ကြိယာအပြံၚ်အသၠာဲ",
}
--[==[ var:
List of languages that will not have links to separate parts of the headword.
]==]
data.no_multiword_links = list_to_set{
"zh",
}
--[==[ var:
List of languages that will not have `LANG multiword terms` categories added. There are various reasons why languages
are in this list: (a) words are written without spaces between them; (b) syllables are written with spaces between them;
(c) variant reconstructions are notated with a tilde surrounded by spaces; (d) the language is a sign language, where
pagenames are multiword descriptions of the gesture(s) required to make an individual sign; (e) some other weirdnesses.
]==]
data.no_multiword_cat = list_to_set{
-------- Languages without spaces between words (sometimes spaces between phrases) --------
"blt", -- Tai Dam
"ja", -- Japanese
"khb", -- Lü
"km", -- Khmer
"lo", -- Lao
"mnw", -- Mon
"my", -- Burmese
"nan", -- Min Nan (some words in Latin script; hyphens between syllables)
"nan-hbl", -- Hokkien (some words in Latin script; hyphens between syllables)
"nod", -- Northern Thai
"ojp", -- Old Japanese
"shn", -- Shan
"sou", -- Southern Thai
"tdd", -- Tai Nüa
"th", -- Thai
"tts", -- Isan
"twh", -- Tai Dón
"txg", -- Tangut
"zh", -- Chinese (all varieties with Chinese characters)
"zkt", -- Khitan
-------- Languages with spaces between syllables --------
"ahk", -- Akha
"aou", -- A'ou
"atb", -- Zaiwa
"byk", -- Biao
"cdy", -- Chadong
--"duu", -- Drung; not sure
--"hmx-pro", -- Proto-Hmong-Mien
--"hnj", -- Green Hmong; not sure
"huq", -- Tsat
"ium", -- Iu Mien
--"lis", -- Lisu; not sure
"mtq", -- Muong
--"mww", -- White Hmong; not sure
"onb", -- Lingao
--"sit-gkh", -- Gokhy; not sure
--"swi", -- Sui; not sure
"tbq-lol-pro", -- Proto-Loloish
"tdh", -- Thulung
"ukk", -- Muak Sa-aak
"vi", -- Vietnamese
"yig", -- Wusa Nasu
"zng", -- Mang
-------- Languages with ~ with surrounding spaces used to separate variants --------
"mkh-ban-pro", -- Proto-Bahnaric
"sit-pro", -- Proto-Sino-Tibetan; listed above
-------- Other weirdnesses --------
"mul", -- Translingual; gestures, Morse code, etc.
"aot", -- Atong (India); bullet is a letter
-------- All sign languages --------
"ads",
"aed",
"aen",
"afg",
"ase",
"asf",
"asp",
"asq",
"asw",
"bfi",
"bfk",
"bog",
"bqn",
"bqy",
"bvl",
"bzs",
"cds",
"csc",
"csd",
"cse",
"csf",
"csg",
"csl",
"csn",
"csq",
"csr",
"doq",
"dse",
"dsl",
"ecs",
"esl",
"esn",
"eso",
"eth",
"fcs",
"fse",
"fsl",
"fss",
"gds",
"gse",
"gsg",
"gsm",
"gss",
"gus",
"hab",
"haf",
"hds",
"hks",
"hos",
"hps",
"hsh",
"hsl",
"icl",
"iks",
"ils",
"inl",
"ins",
"ise",
"isg",
"isr",
"jcs",
"jhs",
"jls",
"jos",
"jsl",
"jus",
"kgi",
"kvk",
"lbs",
"lls",
"lsl",
"lso",
"lsp",
"lst",
"lsy",
"lws",
"mdl",
"mfs",
"mre",
"msd",
"msr",
"mzc",
"mzg",
"mzy",
"nbs",
"ncs",
"nsi",
"nsl",
"nsp",
"nsr",
"nzs",
"okl",
"pgz",
"pks",
"prl",
"prz",
"psc",
"psd",
"psg",
"psl",
"pso",
"psp",
"psr",
"pys",
"rms",
"rsl",
"rsm",
"sdl",
"sfb",
"sfs",
"sgg",
"sgx",
"slf",
"sls",
"sqk",
"sqs",
"ssp",
"ssr",
"svk",
"swl",
"syy",
"tse",
"tsm",
"tsq",
"tss",
"tsy",
"tza",
"ugn",
"ugy",
"ukl",
"uks",
"vgt",
"vsi",
"vsl",
"vsv",
"xki",
"xml",
"xms",
"ygs",
"ysl",
"zib",
"zsl",
}
--[==[ var:
List of languages where a hyphen is not considered a word separator for the `LANG multiword terms` category. There are
numerous reasons why languages are in this list; by each language should be listed the reason for inclusion.
]==]
data.hyphen_not_multiword_sep = list_to_set{
"akk", -- Akkadian; hyphens between syllables
"akl", -- Aklanon; hyphens for mid-word glottal stops
"ber-pro", -- Proto-Berber; morphemes separated by hyphens
"ceb", -- Cebuano; hyphens for mid-word glottal stops
"cnk", -- Khumi Chin; hyphens used in single words
"cpi", -- Chinese Pidgin English; Chinese-derived words with hyphens between syllables
"de", -- German; too many false positives
"esx-esk-pro", -- hyphen used to separate morphemes
"fi", -- Finnish; hyphen used to separate components in compound words if the final and initial vowels match, respectively
"gd", -- Scottish Gaelic; too many false positives like [[a-chianaibh]], [[a-nìos]], [[an-dè]] and other adverbs in a- and an-
"hil", -- Hiligaynon; hyphens for mid-word glottal stops
"hnn", -- Hanunoo; too many false positives
"ilo", -- Ilocano; hyphens for mid-word glottal stops
"kne", -- Kankanaey; hyphens for mid-word glottal stops
"lcp", -- Western Lawa; dash as syllable joiner
"lwl", -- Eastern Lawa; dash as syllable joiner
"mfa", -- Pattani Malay in Thai script; dash as syllable joiner
"mkh-vie-pro", -- Proto-Vietic; morphemes separated by hyphens
"msb", -- Masbatenyo; too many false positives
"tl", -- Tagalog; too many false positives
"war", -- Waray-Waray; too many false positives
"yo", -- Yoruba; hyphens used to show lengthened nasal vowels
}
--[==[ var:
List of languages that will not have `LANG masculine nouns` and similar categories added. Generally, these languages are
lacking gender but use the gender field for other purposes. (This is a massive hack and should be changed.)
]==]
data.no_gender_cat = list_to_set{
-- Languages without gender but which use the gender field for other purposes
"ja",
"th",
}
--[==[ var:
List of languages where [[Module:headword]] should not attempt to generate a transliteration even if the term is written
in a non-Latin script. FIXME: Notate reasons why each language is in this list.
]==]
data.notranslit = list_to_set{
"ams",
"az",
"bbc",
"bug",
"cdo",
"cia",
"cjm",
"cjy",
"cmn",
"cnp",
"cpi",
"cpx",
"csp",
"czh",
"czo",
"gan",
"hak",
"hnm",
"hsn",
"ja",
"kzg",
"lad",
"ltc",
"luh",
"lzh",
"mnp",
"ms",
"mul",
"mvi",
"nan",
"nan-dat",
"nan-hbl",
"nan-hlh",
"nan-lnx",
"nan-tws",
"nan-zhe",
"nan-zsh",
"och",
"oj",
"okn",
"ryn",
"rys",
"ryu",
"sh",
"sjc",
"tgt",
"th",
"tkn",
"tly",
"txg",
"und",
"vi",
"wuu",
"xug",
"yoi",
"yox",
"yue",
"za",
"zh",
"zhx-sic",
"zhx-tai",
}
--[==[ var:
List of languages that will default to `sccat` being true, i.e. categories like `LANG POS in SCRIPT script` will
automatically be generated. This can be overridden using {{para|sccat|0}} in {{tl|head}} or setting `sccat` to
`false` in Lua.
]==]
data.default_sccat = list_to_set{
"inc-apa",
"inc-ash",
"kfr",
"ks",
"mr",
"mwr",
"inc-oaw",
"inc-ohi",
"omr",
"inc-opa",
"phr",
"pi",
"pra",
"sa",
"skr",
"sd",
}
--[==[ var:
List of script codes for which a script-tagged display title will be added.
]==]
data.toBeTagged = list_to_set{
"Ahom",
"Arab",
"fa-Arab",
"glk-Arab",
"kk-Arab",
"ks-Arab",
"ku-Arab",
"mzn-Arab",
"ms-Arab",
"ota-Arab",
"pa-Arab",
"ps-Arab",
"sd-Arab",
"tt-Arab",
"ug-Arab",
"ur-Arab",
"Armi",
"Armn",
"Avst",
"Bali",
"Bamu",
"Batk",
"Beng",
"as-Beng",
"Bopo",
"Brah",
"Brai",
"Bugi",
"Buhd",
"Cakm",
"Cans",
"Cari",
"Cham",
"Cher",
"Copt",
"Cprt",
"Cyrl",
"Cyrs",
"Deva",
"Dsrt",
"Egyd",
"Egyp",
"Ethi",
"Geok",
"Geor",
"Glag",
"Goth",
"Grek",
"Polyt",
"polytonic",
"Gujr",
"Guru",
"Hang",
"Hani",
"Hano",
"Hebr",
"Hira",
"Hluw",
"Ital",
"Java",
"Kali",
"Kana",
"Khar",
"Khmr",
"Knda",
"Kthi",
"Lana",
"Laoo",
"Latn",
"Latf",
"Latg",
"Latnx",
"Latinx",
"pjt-Latn",
"Lepc",
"Limb",
"Linb",
"Lisu",
"Lyci",
"Lydi",
"Mand",
"Mani",
"Marc",
"Merc",
"Mero",
"Mlym",
"Mong",
"mnc-Mong",
"sjo-Mong",
"xwo-Mong",
"Mtei",
"Mymr",
"Narb",
"Nkoo",
"Nshu",
"Ogam",
"Olck",
"Orkh",
"Orya",
"Osma",
"Ougr",
"Palm",
"Phag",
"Phli",
"Phlv",
"Phnx",
"Plrd",
"Prti",
"Rjng",
"Runr",
"Samr",
"Sarb",
"Saur",
"Sgnw",
"Shaw",
"Shrd",
"Sinh",
"Sora",
"Sund",
"Sylo",
"Syrc",
"Tagb",
"Tale",
"Talu",
"Taml",
"Tang",
"Tavt",
"Telu",
"Tfng",
"Tglg",
"Thaa",
"Thai",
"Tibt",
"Ugar",
"Vaii",
"Xpeo",
"Xsux",
"Yiii",
"Zmth",
"Zsym",
"Ipach",
"Music",
"Rumin",
}
--[==[ var:
Parts of speech which will not be categorised in categories like `English terms spelled with É` if the term is the
character in question (e.g. the letter entry for English [[é]]). This contrasts with entries like the French adjective
[[m̂]], which is a one-letter word spelled with the letter.
]==]
data.pos_not_spelled_with_self = list_to_set{
"diacritical marks",
"Han characters",
"Han tu",
"hanja",
"hanzi",
"iteration marks",
"kana",
"kanji",
"letters",
"ligatures",
"logograms",
"morae",
"numeral symbols",
"numerals",
"punctuation marks",
"syllables",
"symbols",
}
------ 2. Lists not converted into sets. ------
--[==[ var:
Recognized aliases for parts of speech (param 2=). Key is the short form and value is the canonical singular (not
pluralized) form. It is singular so the same table can be used in [[Module:form of]] for the {{para|p}}/{{para|POS}}
param and [[Module:links]] for the pos= param. Note that any part of speech, abbreviated or not, can be suffixed with
`f` to generate the corresponding non-lemma form part of speech, such as `adjf`, `af` or `adjectivef` for
`adjective form`, and `nounf` or `nf` for `noun form`. This expansion happens even when it does not make sense for the
given part of speech (e.g. `pclf` expands to `particle form` and `symf` expands to `symbol form`), and currently also,
at least in [[Module:headword]] (but not [[Module:links]]), even if the part before the `f` is not a recognized part of
speech or abbreviation (hence `nerf` expands to `ner form`).
]==]
data.pos_aliases = {
a = "နာမဝိသေသန",
adj = "နာမဝိသေသန",
adv = "ကြိယာဝိသေသန",
art = "ပစ္စဲ",
det = "determiner",
cnum = "cardinal number",
conj = "သမ္ဗန္ဓ",
conv = "converb",
int = "အာမေဍိက်",
interj = "အာမေဍိက်",
intj = "အာမေဍိက်",
interjections = "အာမေဍိက်",
interjection = "အာမေဍိက်",
n = "နာမ်",
nounf = "ဗီုပြၚ်နာမ်",
num = "ဂၞန်သၚ်္ချာ",
part = "လုပ်ကၠောန်စွံလဝ်",
pcl = "လုပ်ကၠောန်စွံလဝ်",
phr = "ဝါကျ",
pn = "နာမ်မကိတ်ညဳ",
postp = "ကဆံၚ်အကာဲအရာ",
pre = "ဝိဘတ်",
prep = "ဝိဘတ်",
pro = "သဗ္ဗနာမ်",
pron = "သဗ္ဗနာမ်",
prop = "နာမ်မကိတ်ညဳ",
proper = "နာမ်မကိတ်ညဳ",
onum = "ordinal number",
rom = "ဗီုအက္ခရ်ရောမ",
romanization = "ဗီုအက္ခရ်ရောမ",
romanizations = "ဗီုအက္ခရ်ရောမ",
v = "ကြိယာ",
vb = "ကြိယာ",
vi = "intransitive verb",
vt = "transitive verb",
vti = "transitive and intransitive verb",
["diminutive noun"] = "နာမ်မလဟုတ်စှ်ေ",
["diminutive nouns"] = "နာမ်မလဟုတ်စှ်ေ",
noun = "နာမ်",
nouns = "နာမ်",
["noun form"] = "ဗီုပြၚ်နာမ်",
["noun forms"] = "ဗီုပြၚ်နာမ်",
verb = "ကြိယာ",
verbs = "ကြိယာ",
["verb form"] = "ဗီုပြၚ်ကြိယာ",
["verb forms"] = "ဗီုပြၚ်ကြိယာ",
verbf = "ဗီုပြၚ်ကြိယာ",
adjective = "နာမဝိသေသန",
adjectives = "နာမဝိသေသန",
adjf = "ဗီုပြၚ်နာမဝိသေသန",
adjectivef = "ဗီုပြၚ်နာမဝိသေသန",
["adjective form"] = "ဗီုပြၚ်နာမဝိသေသန",
["adjective forms"] = "ဗီုပြၚ်နာမဝိသေသန",
adverb = "ကြိယာဝိသေသန",
adverbs = "ကြိယာဝိသေသန",
["adverb form"] = "ဗီုပြၚ်ကြိယာဝိသေသန",
["adverb forms"] = "ဗီုပြၚ်ကြိယာဝိသေသန",
interjection = "အာမေဍိက်",
["interjection form"] = "ဗီုပြၚ်အာမေဍိက်",
["interjection forms"] = "ဗီုပြၚ်အာမေဍိက်",
pronoun = "သဗ္ဗနာမ်",
pronouns = "သဗ္ဗနာမ်",
["pronoun form"] = "ဗီုပြၚ်သဗ္ဗနာမ်",
["pronoun forms"] = "ဗီုပြၚ်သဗ္ဗနာမ်",
preposition = "ဝိဘတ်",
["preposition form"] = "ဗီုပြၚ်ဝိဘတ်",
["preposition forms"] = "ဗီုပြၚ်ဝိဘတ်",
suffix = "အဆက်လက္ကရဴ",
["suffix form"] = "ဗီုပြၚ်အဆက်လက္ကရဴ",
["suffix forms"] = "ဗီုပြၚ်အဆက်လက္ကရဴ",
phrase = "ဝါကျ",
["phrase form"] = "ဗီုပြၚ်ဝါကျ",
["phrase forms"] = "ဗီုပြၚ်ဝါကျ",
numeral = "ဂၞန်သၚ်္ချာ",
numerals = "ဂၞန်သၚ်္ချာ",
["numeral form"] = "ဗီုပြၚ်ဂၞန်သၚ်္ချာ",
["numeral forms"] = "ဗီုပြၚ်ဂၞန်သၚ်္ချာ",
proverb = "ပေါရာဏာံပေါရာဒါံ",
conjunction = "သမ္ဗန္ဓ",
conjunctions = "သမ္ဗန္ဓ",
["conjunction form"] = "ဗီုပြၚ်သမ္ဗန္ဓ",
["conjunction forms"] = "ဗီုပြၚ်သမ္ဗန္ဓ",
contraction = "ပွမထညောံ",
["contraction form"] = "ဗီုပြၚ်ပွမထညောံ",
["contraction forms"] = "ဗီုပြၚ်ပွမထညောံ",
article = "ပစ္စဲ",
articles = "ပစ္စဲ",
["article form"] = "ဗီုပြၚ်ပစ္စဲ",
["article forms"] = "ဗီုပြၚ်ပစ္စဲ",
root = "တံရိုဟ်",
["root form"] = "ဗီုပြၚ်တံရိုဟ်",
["root forms"] = "ဗီုပြၚ်တံရိုဟ်",
prefix = "အဆက်ဂတ",
["prefix form"] = "ဗီုပြၚ်အဆက်ဂတ",
["prefix forms"] = "ဗီုပြၚ်အဆက်ဂတ",
particle = "ကၞာတ်အမှိက်",
classifier = "နာမ်ပါ်ကၞာတ်",
determiner = "ဖျေံလဝ်သန္နိဋ္ဌာန်",
determiners = "ဖျေံလဝ်သန္နိဋ္ဌာန်",
["mutated noun"] = "နာမ်အပြံၚ်အသၠာဲ",
["mutated verb"] = "ကြိယာအပြံၚ်အသၠာဲ",
["determiner form"] = "ဗီုပြၚ်ဖျေံလဝ်သန္နိဋ္ဌာန်",
["determiner forms"] = "ဗီုပြၚ်ဖျေံလဝ်သန္နိဋ္ဌာန်",
["reconstructed determiner"] = "ဖျေံလဝ်သန္နိဋ္ဌာန်နကဵုဗီုပြၚ်သိုၚ်တၟိ",
["reconstructed verb"] = "ကြိယာဗီုပြၚ်သိုၚ်တၟိ",
["reconstructed noun"] = "နာမ်ဗီုပြၚ်သိုၚ်တၟိ",
counter = "ရိုဟ်သၠုဲ",
postposition = "ကဆံၚ်",
misspelling = "ခ္ဍံက်လိက်ဗၠေတ်",
final = "အဆံၚ်လက္ကရဴ",
["verb finals"] = "ကြိယာအဆံၚ်လက္ကရဴ",
["transitive inanimate verb finals"] = "ကြိယာအပြံၚ်အလှာဲအဆံၚ်လက္ကရဴမသ္ကုဟၟဲကဵုလမျီုနကဵုဘာသာ",
affix = "အဆက်စုတ်လက္ကရဴ",
["affix form"] = "ဗီုပြၚ်အဆက်စုတ်လက္ကရဴ",
["affix forms"] = "ဗီုပြၚ်အဆက်စုတ်လက္ကရဴ",
["diacritical mark"] = "ခရက်သမ္တီလဝ်ဓမံက်ထ္ၜးရမျာၚ်",
postposition = "ကဆံၚ်အကာဲအရာ",
["postposition form"] = "ဗီုပြၚ်ကဆံၚ်အကာဲအရာ",
symbol = "သၚ်္ကေတ",
symbols = "သၚ်္ကေတ",
infix = "စန်",
letter = "အက္ခရ်",
hanzi = "ဟာန်သဳ",
["prepositional phrase"] = "ဝါကျဝိဘတ်",
["prepositional pronoun"] = "သဗ္ဗနာမ်ဝိဘတ်",
stem = "တံမအရေဝ်",
participle = "လုပ်ကၠောန်စွံလဝ်",
participles = "လုပ်ကၠောန်စွံလဝ်",
["past participle"] = "လုပ်ကၠောန်စွံလဝ်နကဵုအတိက်",
["past participle form"] = "ဗီုပြၚ်လုပ်ကၠောန်စွံလဝ်နကဵုအတိက်",
["proper noun"] = "နာမ်မကိတ်ညဳ",
["proper noun form"] = "ဗီုပြၚ်နာမ်မကိတ်ညဳ",
["participle form"] = "ဗီုပြၚ်လုပ်ကၠောန်စွံလဝ်",
["comparative adjective"] = "နာမဝိသေသနပတုပ်ရံၚ်",
["superlative adjective"] = "သဒ္ဒာနာမဝိသေသန",
["numeral symbol"] = "သၚ်္ကေတဂၞန်သၚ်္ချာ",
["particle forms"] = "ဗီုပြၚ်ကၞာတ်အမှိက်",
["particle form"] = "ဗီုပြၚ်ကၞာတ်အမှိက်",
["present participles"] = "လုပ်ကၠောန်စွံလဝ်ပစ္စုပ္ပန်",
["present participle"] = "လုပ်ကၠောန်စွံလဝ်ပစ္စုပ္ပန်",
["alternative form"] = "ဗီုပြၚ်တၞဟ်ခြာ",
singulative = "ကိုန်ဨကဝုစ်",
singulatives = "ကိုန်ဨကဝုစ်",
gerund = "ဗီုပြၚ်ကြိယာမဒှ်နာမ်",
gerunds = "ဗီုပြၚ်ကြိယာမဒှ်နာမ်",
}
--[==[ var:
Map of parts of speech for which categories like `German masculine nouns` or `Russian imperfective verbs` will be
generated if the headword is of the appropriate gender/number. The map is used to canonicalize parts of speech for
categorization purposes; specifically, proper nouns categorizes like nouns.
]==]
data.pos_for_gender_number_cat = {
["နာမ်"] = "နာမ်",
["နာမ်မကိတ်ညဳ"] = "နာမ်",
["အဆက်လက္ကရဴ"] = "အဆက်လက္ကရဴ",
-- We include verbs because impf and pf are valid "genders".
["ကြိယာ"] = "ကြိယာ",
}
--[==[ var:
Lower limit for a "long" word in a particular language. Used to categorize terms into e.g.
[[:Category:Long English words]] automatically. Languages with no mapping here do not get categorized.
]==]
data.long_word_thresholds = {
["af"] = 20,
["bg"] = 20,
["cy"] = 25,
["de"] = 20,
["en"] = 25,
["es"] = 20,
["fr"] = 20,
["ka"] = 20,
["sv"] = 20,
["tl"] = 25,
}
------ 3. Page-wide processing (so that it only needs to be done once per page). ------
data.page = require(headword_page_module).process_page()
-- Set some page properties directly on `data` for ease of use.
data.pagename = data.page.pagename
data.encoded_pagename = data.page.encoded_pagename
return data