Module:Ko-translit/data
Appearance
![]() | This module is rated as pre-alpha. It is unfinished, and may or may not be in active development. It should not be used from article namespace pages. Modules remain pre-alpha until the original editor (or someone who takes one over if it is abandoned for some time) is satisfied with the basic structure. |
![]() | This module is subject to page protection. It is a highly visible module in use by a very large number of pages, or is substituted very frequently. Because vandalism or mistakes would affect many pages, and even trivial editing might cause substantial load on the servers, it is protected from editing. |
Data module for Module:Ko-translit.
local p = {}
-- for null-init consonant ㅇ (연음)
p.null_init_ieung = {
["ᆨᄋ"] = "ᄀ",
["ᆩᄋ"] = "ᄁ",
["ᆪᄋ"] = "ᆨᄉ",
["ᆬᄋ"] = "ᆫᄌ",
["ᆮᄋ"] = "ᄃ",
["[ᆯᆶ]ᄋ"] = "ᄅ",
["ᆰᄋ"] = "ᆯᄀ",
["ᆱᄋ"] = "ᆯᄆ",
["ᆲᄋ"] = "ᆯᄇ",
["ᆳᄋ"] = "ᆯᄉ",
["ᆴᄋ"] = "ᆯᄐ",
["ᆵᄋ"] = "ᆯᄑ",
["ᆸᄋ"] = "ᄇ",
["ᆹᄋ"] = "ᆸᄉ",
["ᆺᄋ"] = "ᄉ",
["ᆻᄋ"] = "ᄊ",
["ᆽᄋ"] = "ᄌ",
["ᆾᄋ"] = "ᄎ",
["ᆿᄋ"] = "ᄏ",
["ᇀᄋ"] = "ᄐ",
["ᇁᄋ"] = "ᄑ",
["ᇂᄋ"] = "ᄋ" -- silent; 좋아 [조아]
}
-- convert ㅎ combinations
-- trivia: {ㄶ, ㅀ, ㅎ} + ㅂ doesn't actually exist, but added for completeness (syl-final ㅎ is for aspiration anyway)
p.process_hieut = {
["ᆭᄀ"] = "ᆫᄏ",
["ᆭᄃ"] = "ᆫᄐ",
["ᆭᄇ"] = "ᆫᄑ",
["ᆭᄌ"] = "ᆫᄎ",
["ᆶᄀ"] = "ᆯᄏ",
["ᆶᄃ"] = "ᆯᄐ",
["ᆶᄇ"] = "ᆯᄑ",
["ᆶᄌ"] = "ᆯᄎ",
["ᇂᄀ"] = "ᄏ",
["ᇂᄃ"] = "ᄐ",
["ᇂᄇ"] = "ᄑ",
["ᇂᄌ"] = "ᄎ"
}
-- addition ㅎ combinations for MR only
p.process_hieut_additional_mr = {
["[ᆬᆭ]ᄉ"] = "ᆫᄊ",
["[ᆲᆴᆶ]ᄉ"] = "ᆯᄊ",
["ᇂᄉ"] = "ᄊ"
}
-- neutralization of syl-final consonants
p.neutralize_syl_final_consonants = {
["[ᆩᆪᆰᆿ]"] = "ᆨ",
["[ᆬᆭ]"] = "ᆫ",
["[ᆺᆻᆽᆾᇀᇂ]"] = "ᆮ",
["[ᆲᆳᆴᆶ]"] = "ᆯ",
["ᆱ"] = "ᆷ",
["[ᆵᆹᇁ]"] = "ᆸ"
}
-- @ for 절음 법칙, ㄴㄹ pronounced [ㄴㄴ]
-- other irregularities documented are automatically handled
p.at_irregularities = {
["ᆨ@ᄋ"] = "ᄀ",
["ᆮ@ᄋ"] = "ᄃ", -- 웃어른 [우더른]
["ᆯ@ᄋ"] = "ᄅ",
["ᆸ@ᄋ"] = "ᄇ",
["ᆫ@ᄅ"] = "ᆫᄂ" -- 음운론 [으문논]
}
-- @ for ㄱㅎ/ㄷㅎ/ㅂㅎ → k/t/p for RR only
p.at_irregularities_additional_rr = {
["ᆨ@ᄒ"] = "ᄏ",
["ᆮ@ᄒ"] = "ᄐ",
["ᆸ@ᄒ"] = "ᄑ"
}
p.consonant_assimilations = {
["[ᆨᆼ][ᄂᄅ]"] = "ᆼᄂ",
["ᆨᄆ"] = "ᆼᄆ",
["ᆫᄅ"] = "ᆯᄅ",
["ᆮ[ᄂᄅ]"] = "ᆫᄂ",
["ᆮᄆ"] = "ᆫᄆ",
["ᆯᄂ"] = "ᆯᄅ",
["[ᆷᆸ][ᄂᄅ]"] = "ᆷᄂ",
["ᆸᄆ"] = "ᆷᄆ"
}
-- addition consonant assimilations for MR only
p.consonant_assimilations_additional_mr = {
-- no {kkk, ttt, ppp, sss/ts/tss, ttch}
["ᆨᄁ"] = "ᄁ",
["ᆮᄄ"] = "ᄄ",
["ᆸᄈ"] = "ᄈ",
["ᆮ[ᄉᄊ]"] = "ᄊ",
["ᆮᄍ"] = "ᄍ",
-- other misc conversions
["ᆯᄅ"] = "ᆯl",
["ᆯᄒ"] = "rᄒ",
["ᄉ[ᅱ]"] = "shᅱ"
}
-- vowels to romanized text for RR
p.vowels_rr = {
["[ᅡㅏ]"] = "a",
["[ᅢㅐ]"] = "ae",
["[ᅣㅑ]"] = "ya",
["[ᅤㅒ]"] = "yae",
["[ᅥㅓ]"] = "eo",
["[ᅦㅔ]"] = "e",
["[ᅧㅕ]"] = "yeo",
["[ᅨㅖ]"] = "ye",
["[ᅩㅗ]"] = "o",
["[ᅪㅘ]"] = "wa",
["[ᅫㅙ]"] = "wae",
["[ᅬㅚ]"] = "oe",
["[ᅭㅛ]"] = "yo",
["[ᅮㅜ]"] = "u",
["[ᅯㅝ]"] = "wo",
["[ᅰㅞ]"] = "we",
["[ᅱㅟ]"] = "wi",
["[ᅲㅠ]"] = "yu",
["[ᅳㅡ]"] = "eu",
["[ᅴㅢ]"] = "ui",
["[ᅵㅣ]"] = "i"
}
-- vowels to romanized text for MR
p.vowels_mr = {
["[ᅡㅏ]"] = "a",
["[ᅢㅐ]"] = "ae",
["[ᅣㅑ]"] = "ya",
["[ᅤㅒ]"] = "yae",
["[ᅥㅓ]"] = "ŏ",
["[ᅦㅔ]"] = "e",
["[ᅧㅕ]"] = "yŏ",
["[ᅨㅖ]"] = "ye",
["[ᅩㅗ]"] = "o",
["[ᅪㅘ]"] = "wa",
["[ᅫㅙ]"] = "wae",
["[ᅬㅚ]"] = "oe",
["[ᅭㅛ]"] = "yo",
["[ᅮㅜ]"] = "u",
["[ᅯㅝ]"] = "wŏ",
["[ᅰㅞ]"] = "we",
["[ᅱㅟ]"] = "wi",
["[ᅲㅠ]"] = "yu",
["[ᅳㅡ]"] = "ŭ",
["[ᅴㅢ]"] = "ŭi",
["[ᅵㅣ]"] = "i"
}
-- single consonants to romanized text for RR
p.single_consonants_rr = {
["[ᄀㄱ]"] = "g",
["[ᄁㄲ]"] = "kk",
["ㄳ"] = "ks",
["[ᄂᆫㄴ]"] = "n",
["ㄵ"] = "nj",
["ㄶ"] = "nh",
["[ᄃㄷ]"] = "d",
["[ᄄㄸ]"] = "tt",
["[ᄅㄹ]"] = "r",
["ᆯ"] = "l",
["ㄺ"] = "lg",
["ㄻ"] = "lm",
["ㄼ"] = "lb",
["ㄽ"] = "ls",
["ㄾ"] = "lt",
["ㄿ"] = "lp",
["ㅀ"] = "lh",
["[ᄆᆷㅁ]"] = "m",
["[ᄇㅂ]"] = "b",
["[ᄈㅃ]"] = "pp",
["ㅄ"] = "ps",
["[ᄉㅅ]"] = "s",
["[ᄊㅆ]"] = "ss",
["[ᄋㅇ]"] = "",
["ᆼ"] = "ng",
["[ᄌㅈ]"] = "j",
["[ᄍㅉ]"] = "jj",
["[ᄎㅊ]"] = "ch",
["[ᄏᆨㅋ]"] = "k",
["[ᄐᆮㅌ]"] = "t",
["[ᄑᆸㅍ]"] = "p",
["[ᄒㅎ]"] = "h"
}
-- single consonants to romanized text for MR
p.single_consonants_mr = {
["[ᄀᆨㄱ]"] = "k",
["[ᄁㄲ]"] = "kk",
["ㄳ"] = "ks",
["[ᄂᆫㄴ]"] = "n",
["ㄵ"] = "nj",
["ㄶ"] = "nh",
["[ᄃᆮㄷ]"] = "t",
["[ᄄㄸ]"] = "tt",
["[ᄅㄹ]"] = "r",
["ᆯ"] = "l",
["ㄺ"] = "lg",
["ㄻ"] = "lm",
["ㄼ"] = "lb",
["ㄽ"] = "ls",
["ㄾ"] = "lt'",
["ㄿ"] = "lp'",
["ㅀ"] = "rh",
["[ᄆᆷㅁ]"] = "m",
["[ᄇᆸㅂ]"] = "p",
["[ᄈㅃ]"] = "pp",
["ㅄ"] = "ps",
["[ᄉㅅ]"] = "s",
["[ᄊㅆ]"] = "ss",
["[ᄋㅇ]"] = "",
["ᆼ"] = "ng",
["[ᄌㅈ]"] = "ch",
["[ᄍㅉ]"] = "tch",
["[ᄎㅊ]"] = "ch'",
["[ᄏㅋ]"] = "k'",
["[ᄐㅌ]"] = "t'",
["[ᄑㅍ]"] = "p'",
["[ᄒㅎ]"] = "h"
}
-- drop y after {ㅈ, ㅉ, ㅊ}
p.drop_y = {
["([ᄌ-ᄎ])ᅣ"] = "%1ᅡ",
["([ᄌ-ᄎ])ᅤ"] = "%1ᅢ",
["([ᄌ-ᄎ])ᅧ"] = "%1ᅥ",
["([ᄌ-ᄎ])ᅨ"] = "%1ᅦ",
["([ᄌ-ᄎ])ᅭ"] = "%1ᅩ",
["([ᄌ-ᄎ])ᅲ"] = "%1ᅮ"
}
-- unwrapping enclosed Hangul text
-- actually not very necessary, but these are also classified as Hangul chars in Unicode
-- no distinction is made between parenthesized and circled chars
p.enclosed_hangul = {
["[㈀㉠]"] = "(기역)",
["[㈁㉡]"] = "(니은)",
["[㈂㉢]"] = "(디귿)",
["[㈃㉣]"] = "(리을)",
["[㈄㉤]"] = "(미음)",
["[㈅㉥]"] = "(비읍)",
["[㈆㉦]"] = "(시옷)",
["[㈇㉧]"] = "(이응)",
["[㈈㉨]"] = "(지읒)",
["[㈉㉩]"] = "(치읓)",
["[㈊㉪]"] = "(키읔)",
["[㈋㉫]"] = "(티읕)",
["[㈌㉬]"] = "(피읖)",
["[㈍㉭]"] = "(히읗)",
["[㈎㉮]"] = "(가)",
["[㈏㉯]"] = "(나)",
["[㈐㉰]"] = "(다)",
["[㈑㉱]"] = "(라)",
["[㈒㉲]"] = "(마)",
["[㈓㉳]"] = "(바)",
["[㈔㉴]"] = "(사)",
["[㈕㉵]"] = "(아)",
["[㈖㉶]"] = "(자)",
["[㈗㉷]"] = "(차)",
["[㈘㉸]"] = "(카)",
["[㈙㉹]"] = "(타)",
["[㈚㉺]"] = "(파)",
["[㈛㉻]"] = "(하)",
["㈜"] = "(주)",
["㈝"] = "(오전)",
["㈞"] = "(오후)",
["㉼"] = "(참고)",
["㉽"] = "(주의)",
["㉾"] = "(우)"
}
-- converting escaped special chars to HTML tags to preserve them
p.escaped_to_html_enc = {
["\\%$"] = "$",
["\\%%"] = "%",
["\\%*"] = "*",
["\\@"] = "@",
["\\%^"] = "^",
["\\_"] = "_",
["\\`"] = "`"
}
-- converting HTML tags back to unescaped chars
p.html_enc_to_ascii = {
["$"] = "$",
["%"] = "%%",
["*"] = "*",
["@"] = "@",
["^"] = "^",
["_"] = "_",
["`"] = "`"
}
return p