Module:Ko-translit/data
Appearance
![]() | This module is rated as pre-alpha. It is unfinished, and may or may not be in active development. It should not be used from article namespace pages. Modules remain pre-alpha until the original editor (or someone who takes one over if it is abandoned for some time) is satisfied with the basic structure. |
Data module for Module:Ko-translit.
local p = {}
-- for null-init consonant ㅇ (연음)
p.null_init_ieung = {
["ᆨᄋ"] = "ᄀ",
["ᆩᄋ"] = "ᄁ",
["ᆪᄋ"] = "ᆨᄉ",
["ᆬᄋ"] = "ᆫᄌ",
["ᆮᄋ"] = "ᄃ",
["[ᆯᆶ]ᄋ"] = "ᄅ",
["ᆰᄋ"] = "ᆯᄀ",
["ᆱᄋ"] = "ᆯᄆ",
["ᆲᄋ"] = "ᆯᄇ",
["ᆳᄋ"] = "ᆯᄉ",
["ᆴᄋ"] = "ᆯᄐ",
["ᆵᄋ"] = "ᆯᄑ",
["ᆸᄋ"] = "ᄇ",
["ᆹᄋ"] = "ᆸᄉ",
["ᆺᄋ"] = "ᄉ",
["ᆻᄋ"] = "ᄊ",
["ᆽᄋ"] = "ᄌ",
["ᆾᄋ"] = "ᄎ",
["ᆿᄋ"] = "ᄏ",
["ᇀᄋ"] = "ᄐ",
["ᇁᄋ"] = "ᄑ",
["ᇂᄋ"] = "ᄋ" -- silent; 좋아 [조아]
}
-- convert ㅎ combinations
-- trivia: {ㄶ, ㅀ, ㅎ} + ㅂ doesn't actually exist, but added for completeness (syl-final ㅎ is for aspiration anyway)
p.process_hieut = {
["ᆭᄀ"] = "ᆫᄏ",
["ᆭᄃ"] = "ᆫᄐ",
["ᆭᄇ"] = "ᆫᄑ",
["ᆭᄌ"] = "ᆫᄎ",
["ᆶᄀ"] = "ᆯᄏ",
["ᆶᄃ"] = "ᆯᄐ",
["ᆶᄇ"] = "ᆯᄑ",
["ᆶᄌ"] = "ᆯᄎ",
["ᇂᄀ"] = "ᄏ",
["ᇂᄃ"] = "ᄐ",
["ᇂᄇ"] = "ᄑ",
["ᇂᄌ"] = "ᄎ"
}
-- addition ㅎ combinations for mr only
p.process_hieut_additional_mr = {
["[ᆬᆭ]ᄉ"] = "ᆫᄊ",
["[ᆲᆴᆶ]ᄉ"] = "ᆯᄊ",
["ᇂᄉ"] = "ᄊ"
}
-- neutralization of syl-final consonants
p.neutralize_syl_final_consonants = {
["[ᆩᆪᆰᆿ]"] = "ᆨ",
["[ᆬᆭ]"] = "ᆫ",
["[ᆺᆻᆽᆾᇀᇂ]"] = "ᆮ",
["[ᆲᆳᆴᆶ]"] = "ᆯ",
["ᆱ"] = "ᆷ",
["[ᆵᆹᇁ]"] = "ᆸ"
}
-- @ for 절음 법칙, ㄴㄹ pronounced [ㄴㄴ]
p.at_irregularities = {
["ᆨ@ᄋ"] = "ᄀ",
["ᆮ@ᄋ"] = "ᄃ", -- 웃어른 [우더른]
["ᆯ@ᄋ"] = "ᄅ",
["ᆸ@ᄋ"] = "ᄇ",
["ᆫ@ᄅ"] = "ᆫᄂ" -- 음운론 [으문논]
}
p.consonant_assimilations = {
["[ᆨᆼ][ᄂᄅ]"] = "ᆼᄂ",
["ᆨᄆ"] = "ᆼᄆ",
["ᆫᄅ"] = "ᆯᄅ",
["ᆮ[ᄂᄅ]"] = "ᆫᄂ",
["ᆮᄆ"] = "ᆫᄆ",
["ᆯᄂ"] = "ᆯᄅ",
["[ᆷᆸ][ᄂᄅ]"] = "ᆷᄂ",
["ᆸᄆ"] = "ᆷᄆ"
}
p.consonant_assimilations_additional_mr = {
-- no {kkk, ttt, ppp, sss/ts/tss, ttch}
["ᆨᄁ"] = "ᄁ",
["ᆮᄄ"] = "ᄄ",
["ᆸᄈ"] = "ᄈ",
["ᆮ[ᄉᄊ]"] = "ᄊ",
["ᆮᄍ"] = "ᄍ",
-- other misc conversions
["ᆯᄒ"] = "rᄒ",
["ᄉ[ᅱ]"] = "shᅱ"
}
-- vowels to romanized text for rr
p.vowels_rr = {
["[ᅡㅏ]"] = "a",
["[ᅢㅐ]"] = "ae",
["[ᅣㅑ]"] = "ya",
["[ᅤㅒ]"] = "yae",
["[ᅥㅓ]"] = "eo",
["[ᅦㅔ]"] = "e",
["[ᅧㅕ]"] = "yeo",
["[ᅨㅖ]"] = "ye",
["[ᅩㅗ]"] = "o",
["[ᅪㅘ]"] = "wa",
["[ᅫㅙ]"] = "wae",
["[ᅬㅚ]"] = "oe",
["[ᅭㅛ]"] = "yo",
["[ᅮㅜ]"] = "u",
["[ᅯㅝ]"] = "wo",
["[ᅰㅞ]"] = "we",
["[ᅱㅟ]"] = "wi",
["[ᅲㅠ]"] = "yu",
["[ᅳㅡ]"] = "eu",
["[ᅴㅢ]"] = "ui",
["[ᅵㅣ]"] = "i"
}
-- vowels to romanized text for mr
p.vowels_mr = {
["[ᅡㅏ]"] = "a",
["[ᅢㅐ]"] = "ae",
["[ᅣㅑ]"] = "ya",
["[ᅤㅒ]"] = "yae",
["[ᅥㅓ]"] = "ŏ",
["[ᅦㅔ]"] = "e",
["[ᅧㅕ]"] = "yŏ",
["[ᅨㅖ]"] = "ye",
["[ᅩㅗ]"] = "o",
["[ᅪㅘ]"] = "wa",
["[ᅫㅙ]"] = "wae",
["[ᅬㅚ]"] = "oe",
["[ᅭㅛ]"] = "yo",
["[ᅮㅜ]"] = "u",
["[ᅯㅝ]"] = "wŏ",
["[ᅰㅞ]"] = "we",
["[ᅱㅟ]"] = "wi",
["[ᅲㅠ]"] = "yu",
["[ᅳㅡ]"] = "ŭ",
["[ᅴㅢ]"] = "ŭi",
["[ᅵㅣ]"] = "i"
}
-- single consonants to romanized text for rr
p.single_consonants_rr = {
["[ᄀㄱ]"] = "g",
["[ᄁㄲ]"] = "kk",
["ㄳ"] = "ks",
["[ᄂᆫㄴ]"] = "n",
["ㄵ"] = "nj",
["ㄶ"] = "nh",
["[ᄃㄷ]"] = "d",
["[ᄄㄸ]"] = "tt",
["[ᄅㄹ]"] = "r",
["ᆯ"] = "l",
["ㄺ"] = "lg",
["ㄻ"] = "lm",
["ㄼ"] = "lb",
["ㄽ"] = "ls",
["ㄾ"] = "lt",
["ㄿ"] = "lp",
["ㅀ"] = "lh",
["[ᄆᆷㅁ]"] = "m",
["[ᄇㅂ]"] = "b",
["[ᄈㅃ]"] = "pp",
["ㅄ"] = "ps",
["[ᄉㅅ]"] = "s",
["[ᄊㅆ]"] = "ss",
["[ᄋㅇ]"] = "",
["ᆼ"] = "ng",
["[ᄌㅈ]"] = "j",
["[ᄍㅉ]"] = "jj",
["[ᄎㅊ]"] = "ch",
["[ᄏᆨㅋ]"] = "k",
["[ᄐᆮㅌ]"] = "t",
["[ᄑᆸㅍ]"] = "p",
["[ᄒㅎ]"] = "h"
}
-- single consonants to romanized text for mr
p.single_consonants_mr = {
["[ᄀᆨㄱ]"] = "k",
["[ᄁㄲ]"] = "kk",
["ㄳ"] = "ks",
["[ᄂᆫㄴ]"] = "n",
["ㄵ"] = "nj",
["ㄶ"] = "nh",
["[ᄃᆮㄷ]"] = "t",
["[ᄄㄸ]"] = "tt",
["[ᄅㄹ]"] = "r",
["ᆯ"] = "l",
["ㄺ"] = "lg",
["ㄻ"] = "lm",
["ㄼ"] = "lb",
["ㄽ"] = "ls",
["ㄾ"] = "lt'",
["ㄿ"] = "lp'",
["ㅀ"] = "rh",
["[ᄆᆷㅁ]"] = "m",
["[ᄇᆸㅂ]"] = "p",
["[ᄈㅃ]"] = "pp",
["ㅄ"] = "ps",
["[ᄉㅅ]"] = "s",
["[ᄊㅆ]"] = "ss",
["[ᄋㅇ]"] = "",
["ᆼ"] = "ng",
["[ᄌㅈ]"] = "ch",
["[ᄍㅉ]"] = "tch",
["[ᄎㅊ]"] = "ch'",
["[ᄏㅋ]"] = "k'",
["[ᄐㅌ]"] = "t'",
["[ᄑㅍ]"] = "p'",
["[ᄒㅎ]"] = "h"
}
-- drop y after {ㅈ, ㅉ, ㅊ}
p.drop_y = {
["([ᄌ-ᄎ])ᅣ"] = "%1ᅡ",
["([ᄌ-ᄎ])ᅤ"] = "%1ᅢ",
["([ᄌ-ᄎ])ᅧ"] = "%1ᅥ",
["([ᄌ-ᄎ])ᅨ"] = "%1ᅦ",
["([ᄌ-ᄎ])ᅭ"] = "%1ᅩ",
["([ᄌ-ᄎ])ᅲ"] = "%1ᅮ"
}
-- unwrapping enclosed Hangul text
-- actually not very necessary, but these are also classified as Hangul chars in Unicode
-- no distinction is made between parenthesized and circled chars
p.enclosed_hangul = {
["[㈀㉠]"] = "(기역)",
["[㈁㉡]"] = "(니은)",
["[㈂㉢]"] = "(디귿)",
["[㈃㉣]"] = "(리을)",
["[㈄㉤]"] = "(미음)",
["[㈅㉥]"] = "(비읍)",
["[㈆㉦]"] = "(시옷)",
["[㈇㉧]"] = "(이응)",
["[㈈㉨]"] = "(지읒)",
["[㈉㉩]"] = "(치읓)",
["[㈊㉪]"] = "(키읔)",
["[㈋㉫]"] = "(티읕)",
["[㈌㉬]"] = "(피읖)",
["[㈍㉭]"] = "(히읗)",
["[㈎㉮]"] = "(가)",
["[㈏㉯]"] = "(나)",
["[㈐㉰]"] = "(다)",
["[㈑㉱]"] = "(라)",
["[㈒㉲]"] = "(마)",
["[㈓㉳]"] = "(바)",
["[㈔㉴]"] = "(사)",
["[㈕㉵]"] = "(아)",
["[㈖㉶]"] = "(자)",
["[㈗㉷]"] = "(차)",
["[㈘㉸]"] = "(카)",
["[㈙㉹]"] = "(타)",
["[㈚㉺]"] = "(파)",
["[㈛㉻]"] = "(하)",
["㈜"] = "(주)",
["㈝"] = "(오전)",
["㈞"] = "(오후)",
["㉼"] = "(참고)",
["㉽"] = "(주의)",
["㉾"] = "(우)"
}
-- converting escaped special characters to html tags to preserve them
p.escaped_to_html_enc = {
["\\%$"] = "$",
["\\%%"] = "%",
["\\%*"] = "*",
["\\@"] = "@",
["\\%^"] = "^",
["\\_"] = "_",
["\\`"] = "`"
}
-- converting html tags back to unescaped characters
p.html_enc_to_ascii = {
["$"] = "$",
["%"] = "%%",
["*"] = "*",
["@"] = "@",
["^"] = "^",
["_"] = "_",
["`"] = "`"
}
return p