Module:Ko-translit/data
Appearance
![]() | This module is rated as pre-alpha. It is unfinished, and may or may not be in active development. It should not be used from article namespace pages. Modules remain pre-alpha until the original editor (or someone who takes one over if it is abandoned for some time) is satisfied with the basic structure. |
![]() | This module is subject to page protection. It is a highly visible module in use by a very large number of pages, or is substituted very frequently. Because vandalism or mistakes would affect many pages, and even trivial editing might cause substantial load on the servers, it is protected from editing. |
Data module for Module:Ko-translit.
local p = {}
p.exceptions = {
--[[
pre-processing exceptions that apply to both RR and MR
IMPORTANT: Before adding an exception, be sure to check if it can ALWAYS be applied in ALL contexts.
Good example: 싫증 → 실@증
Bad example: 문자 → 문@자 (affects words like 방문자 (pronounced [방문자], not [방문짜]))
Hangul status: decomposed (ᄒ+ᅡ+ᆫ)
--]]
-- for linguistic contexts
["ㄴ([ᄀ-ᄒ])"] = "ᆫ%1", -- -ㄴ다
["ㄹ([ᄀ-ᄒ])"] = "ᆯ%1", -- -ㄹ까, -ㄹ래
["ㄹ@([ᄀᄃᄇᄉᄌ])"] = "ᆯ@%1", -- -ㄹ지
["ㅁ([ᄀ-ᄒ])"] = "ᆷ%1",
["ㅂ([ᄀ-ᄒ])"] = "ᆸ%1", -- -ㅂ니다, -ㅂ시다
-- ㄴ-addition always occurs before 윷 and 잎
["([ᆨ-ᇂ])ᄋ(ᅲᆾ)"] = "%1ᄂ%2",
["([ᆨ-ᇂ])ᄋ(ᅵᇁ)"] = "%1ᄂ%2",
-- 곧이어 [고디어]
["(ᄀ[ᅩ])ᆮᄋ(ᅵᄋ[ᅥ][^ᆨ-ᇂ])"] = "%1ᄃ%2",
["(ᄀ[ᅩ])ᆮᄋ(ᅵᄋ[ᅥ])$"] = "%1ᄃ%2",
-- 싫증 [실쯩]
["(ᄉ[ᅵ])ᆶ(ᄌ[ᅳ]ᆼ)"] = "%1ᆯ@%2",
-- cases where ㄺㄱ is pronounced [ㄱㄲ]
-- not including very rarely used words such as 삼시욹, 안찱, 우줅거리다, etc.
["([ᄃᄉᄐ]ᅡ)ᆰᄀ"] = "%1ᆨᄀ", -- 닭, 삵, 수탉/암탉
["([ᄉᄒ]ᅳ)ᆰᄀ"] = "%1ᆨᄀ", -- 기슭, 흙
["(ᄎ[ᅵ])ᆰᄀ"] = "%1ᆨᄀ", -- 칡
-- palatalization and ㅈ + -히-
["ᆮᄋ(ᅵ[ᆫᆯᆷᆸ])"] = "ᄌ%1", -- 해돋이 [해도지]
["ᆮᄋ(ᅵ[^ᆨ-ᇂ])"] = "ᄌ%1",
["ᆮᄋ(ᅵ)$"] = "ᄌ%1",
["[ᆮᆽ]ᄒ(ᅧᆻ)"] = "ᄎ%1", -- 굳히다 [구치다], 꽂히다 [꼬치다]
["[ᆮᆽ]ᄒ(ᅵ[ᆫᆯᆷᆸ])"] = "ᄎ%1",
["[ᆮᆽ]ᄒ([ᅧᅵ][^ᆨ-ᇂ])"] = "ᄎ%1",
["[ᆮᆽ]ᄒ([ᅧᅵ])$"] = "ᄎ%1",
["ᆴᄋ(ᅧᆻ)"] = "ᆯᄎ%1", -- 훑이다 [훌치다]
["ᆴᄋ(ᅵ[ᆫᆯᆷᆸ])"] = "ᆯᄎ%1",
["ᆴᄋ([ᅧᅵ][^ᆨ-ᇂ])"] = "ᆯᄎ%1",
["ᆴᄋ([ᅧᅵ])$"] = "ᆯᄎ%1",
["ᇀᄋ(ᅧᆻ)"] = "ᄎ%1", -- 붙이다 [부치다]
["ᇀᄋ(ᅵ[ᆫᆯᆷᆸ])"] = "ᄎ%1",
["ᇀᄋ([ᅧᅵ][^ᆨ-ᇂ])"] = "ᄎ%1",
["ᇀᄋ([ᅧᅵ])$"] = "ᄎ%1",
-- {ㄵ, ㄺ, ㄼ} + -히-
["ᆬᄒ(ᅧᆻ)"] = "ᆫᄎ%1", -- 앉히다 [안치다]
["ᆬᄒ(ᅵ[ᆫᆯᆷᆸ])"] = "ᆫᄎ%1",
["ᆬᄒ([ᅧᅵ][^ᆨ-ᇂ])"] = "ᆫᄎ%1",
["ᆬᄒ([ᅧᅵ])$"] = "ᆫᄎ%1",
["ᆰᄒ(ᅧᆻ)"] = "ᆯᄏ%1", -- 밝히다 [발키다]
["ᆰᄒ(ᅵ[ᆫᆯᆷᆸ])"] = "ᆯᄏ%1",
["ᆰᄒ([ᅧᅵ][^ᆨ-ᇂ])"] = "ᆯᄏ%1",
["ᆰᄒ([ᅧᅵ])$"] = "ᆯᄏ%1",
["ᆲᄒ(ᅧᆻ)"] = "ᆯᄑ%1", -- 넓히다 [널피다], 밟히다 [발피다]
["ᆲᄒ(ᅵ[ᆫᆯᆷᆸ])"] = "ᆯᄑ%1",
["ᆲᄒ([ᅧᅵ][^ᆨ-ᇂ])"] = "ᆯᄑ%1",
["ᆲᄒ([ᅧᅵ])$"] = "ᆯᄑ%1",
-- cases where 넓- is pronounced [넙] before consonant
["(ᄂ[ᅥ])ᆲ([ᄁᄄ-ᄈᄊᄍ-ᄒ])"] = "%1ᆸ%2",
["(ᄂ[ᅥ])ᆲ(ᄃ[ᅡ]ᄃ[ᅳ]ᆷ)"] = "%1ᆸ%2", -- 넓다듬이
["(ᄂ[ᅥ])ᆲ(ᄃ[ᅮ]ᆼ)"] = "%1ᆸ%2", -- 넓둥글다
["(ᄂ[ᅥ])ᆲ(ᄉ[ᅡ]ᆯᄆ[ᅮ]ᆫ)"] = "%1ᆸ%2", -- 넓살문
["(ᄂ[ᅥ])ᆲ(ᄌ[ᅥᅮ]ᆨ)"] = "%1ᆸ%2", -- 넓적-, 넓죽-
-- 밟- is [밥] before consonant (except null-init consonant ㅇ)
["(ᄇ[ᅡ])ᆲ([^ᄋ])"] = "%1ᆸ%2",
["(ᄇ[ᅡ])ᆲ$"] = "%1ᆸ",
-- automatic 절음 법칙
["(ᄋ[ᅥ])ᆹᄋ(ᅢ[ᆫᆯᆷᆸᆻ])"] = "%1ᆸᄉ%2", -- except 없애다 [업쌔다]
["(ᄋ[ᅥ])ᆹᄋ(ᅢ[^ᆨ-ᇂ])"] = "%1ᆸᄉ%2",
["(ᄋ[ᅥ])ᆹᄋ(ᅢ)$"] = "%1ᆸᄉ%2",
["(ᄆ[ᅡᅥ])ᆺᄋ(ᅵᆻ)"] = "%1ᄉ%2", -- except 맛있다 and 멋있다 which are usually pronounced [마싣따] and [머싣따] respectively
["([ᆩᆪᆬᆰ-ᆵᆹ-ᆻᆽ-ᇂ])(ᄋ[ᅡᅥᅧ][ᆨ-ᆺᆼ-ᇂ])"] = "%1@%2", -- except 아, 았, 어, 었, 여, 였
["([ᆩᆪᆬᆰ-ᆵᆹ-ᆻᆽ-ᇂ])(ᄋ[ᅦ][ᆨ-ᆪᆬ-ᆮᆰ-ᇂ])"] = "%1@%2", -- except 에, 엔, 엘
["([ᆩᆪᆬᆰ-ᆵᆹ-ᆻᆽ-ᇂ])(ᄋ[ᅭᅴ][ᆨ-ᇂ])"] = "%1@%2", -- except 요, 의 (w/o final consonant)
["([ᆩᆪᆬᆰ-ᆵᆹ-ᆻᆽ-ᇂ])(ᄋ[ᅳᅵ][ᆨ-ᆪᆬ-ᆮᆰ-ᆶᆹ-ᇂ])"] = "%1@%2", -- except 으, 은, 을, 음, 읍, 이, 인, 일, 임, 입
["([ᆩᆪᆬᆰ-ᆵᆹ-ᆻᆽ-ᇂ])(ᄋ[ᅢ-ᅤᅨ-ᅬᅮ-ᅲ])"] = "%1@%2",
-- _ for additional space in romanization only
["_"] = " "
}
-- $ for ㄴ-addition
p.n_addition = {
["([ᆨ-ᇂ])%$ᄋ([ᅣᅤᅧᅨᅭᅲᅵ])"] = "%1ᄂ%2",
["%$"] = ""
}
-- for null-init consonant ㅇ (연음)
p.null_init_ieung = {
["ᆨᄋ"] = "ᄀ",
["ᆩᄋ"] = "ᄁ",
["ᆪᄋ"] = "ᆨᄉ",
["ᆬᄋ"] = "ᆫᄌ",
["ᆮᄋ"] = "ᄃ",
["[ᆯᆶ]ᄋ"] = "ᄅ",
["ᆰᄋ"] = "ᆯᄀ",
["ᆱᄋ"] = "ᆯᄆ",
["ᆲᄋ"] = "ᆯᄇ",
["ᆳᄋ"] = "ᆯᄉ",
["ᆴᄋ"] = "ᆯᄐ",
["ᆵᄋ"] = "ᆯᄑ",
["ᆸᄋ"] = "ᄇ",
["ᆹᄋ"] = "ᆸᄉ",
["ᆺᄋ"] = "ᄉ",
["ᆻᄋ"] = "ᄊ",
["ᆽᄋ"] = "ᄌ",
["ᆾᄋ"] = "ᄎ",
["ᆿᄋ"] = "ᄏ",
["ᇀᄋ"] = "ᄐ",
["ᇁᄋ"] = "ᄑ",
["ᇂᄋ"] = "ᄋ" -- silent; 좋아 [조아]
}
-- convert ㅎ combinations
-- trivia: {ㄶ, ㅀ, ㅎ} + ㅂ doesn't actually exist, but added for completeness (syl-final ㅎ is for aspiration anyway)
p.process_hieut = {
["ᆭᄀ"] = "ᆫᄏ",
["ᆭᄃ"] = "ᆫᄐ",
["ᆭᄇ"] = "ᆫᄑ",
["ᆭᄌ"] = "ᆫᄎ",
["ᆶᄀ"] = "ᆯᄏ",
["ᆶᄃ"] = "ᆯᄐ",
["ᆶᄇ"] = "ᆯᄑ",
["ᆶᄌ"] = "ᆯᄎ",
["ᇂᄀ"] = "ᄏ",
["ᇂᄃ"] = "ᄐ",
["ᇂᄇ"] = "ᄑ",
["ᇂᄉ"] = "ᄉ",
["ᇂᄌ"] = "ᄎ"
}
-- addition ㅎ combinations for mr only
p.process_hieut_additional_mr = {
["[ᆬᆭ]ᄉ"] = "ᆫᄊ",
["[ᆲᆴᆶ]ᄉ"] = "ᆯᄊ",
["ᇂᄉ"] = "ᄊ",
}
-- neutralization of syl-final consonants
p.neutralize_syl_final_consonants = {
["[ᆩᆪᆰᆿ]"] = "ᆨ",
["[ᆬᆭ]"] = "ᆫ",
["[ᆺᆻᆽᆾᇀᇂ]"] = "ᆮ",
["[ᆲᆳᆴᆶ]"] = "ᆯ",
["ᆱ"] = "ᆷ",
["[ᆵᆹᇁ]"] = "ᆸ"
}
-- @ for 절음 법칙, ㄴㄹ pronounced [ㄴㄴ]
p.at_irregularities = {
["ᆨ@ᄋ"] = "ᄀ",
["ᆮ@ᄋ"] = "ᄃ", -- 웃어른 [우더른]
["ᆯ@ᄋ"] = "ᄅ",
["ᆸ@ᄋ"] = "ᄇ",
["ᆫ@ᄅ"] = "ᆫᄂ" -- 음운론 [으문논]
}
p.consonant_assimilations = {
["[ᆨᆼ][ᄂᄅ]"] = "ᆼᄂ",
["ᆨᄆ"] = "ᆼᄆ",
["ᆫᄅ"] = "ᆯᄅ",
["ᆮ[ᄂᄅ]"] = "ᆫᄂ",
["ᆮᄆ"] = "ᆫᄆ",
["ᆯᄂ"] = "ᆯᄅ",
["[ᆷᆸ][ᄂᄅ]"] = "ᆷᄂ",
["ᆸᄆ"] = "ᆷᄆ",
["ᆯᄅ"] = "ᆯl"
}
p.consonant_assimilations_additional_mr = {
-- no {kkk, ttt, ppp, sss/ts/tss, ttch}
["ᆨᄁ"] = "ᄁ",
["ᆮᄄ"] = "ᄄ",
["ᆸᄈ"] = "ᄈ",
["ᆮ[ᄉᄊ]"] = "ᄊ",
["ᆮᄍ"] = "ᄍ",
-- other misc conversions
["ᆯᄒ"] = "rᄒ",
["ᄉ[ᅱ]"] = "shᅱ"
}
-- vowels to romanized text for rr
p.vowels_rr = {
["[ᅡㅏ]"] = "a",
["[ᅢㅐ]"] = "ae",
["[ᅣㅑ]"] = "ya",
["[ᅤㅒ]"] = "yae",
["[ᅥㅓ]"] = "eo",
["[ᅦㅔ]"] = "e",
["[ᅧㅕ]"] = "yeo",
["[ᅨㅖ]"] = "ye",
["[ᅩㅗ]"] = "o",
["[ᅪㅘ]"] = "wa",
["[ᅫㅙ]"] = "wae",
["[ᅬㅚ]"] = "oe",
["[ᅭㅛ]"] = "yo",
["[ᅮㅜ]"] = "u",
["[ᅯㅝ]"] = "wo",
["[ᅰㅞ]"] = "we",
["[ᅱㅟ]"] = "wi",
["[ᅲㅠ]"] = "yu",
["[ᅳㅡ]"] = "eu",
["[ᅴㅢ]"] = "ui",
["[ᅵㅣ]"] = "i"
}
-- vowels to romanized text for mr
p.vowels_mr = {
["[ᅡㅏ]"] = "a",
["[ᅢㅐ]"] = "ae",
["[ᅣㅑ]"] = "ya",
["[ᅤㅒ]"] = "yae",
["[ᅥㅓ]"] = "ŏ",
["[ᅦㅔ]"] = "e",
["[ᅧㅕ]"] = "yŏ",
["[ᅨㅖ]"] = "ye",
["[ᅩㅗ]"] = "o",
["[ᅪㅘ]"] = "wa",
["[ᅫㅙ]"] = "wae",
["[ᅬㅚ]"] = "oe",
["[ᅭㅛ]"] = "yo",
["[ᅮㅜ]"] = "u",
["[ᅯㅝ]"] = "wŏ",
["[ᅰㅞ]"] = "we",
["[ᅱㅟ]"] = "wi",
["[ᅲㅠ]"] = "yu",
["[ᅳㅡ]"] = "ŭ",
["[ᅴㅢ]"] = "ŭi",
["[ᅵㅣ]"] = "i"
}
-- single consonants to romanized text for rr
p.single_consonants_rr = {
["[ᄀㄱ]"] = "g",
["[ᄁㄲ]"] = "kk",
["ㄳ"] = "ks",
["[ᄂᆫㄴ]"] = "n",
["ㄵ"] = "nj",
["ㄶ"] = "nh",
["[ᄃㄷ]"] = "d",
["[ᄄㄸ]"] = "tt",
["[ᄅㄹ]"] = "r",
["ᆯ"] = "l",
["ㄺ"] = "lg",
["ㄻ"] = "lm",
["ㄼ"] = "lb",
["ㄽ"] = "ls",
["ㄾ"] = "lt",
["ㄿ"] = "lp",
["ㅀ"] = "lh",
["[ᄆᆷㅁ]"] = "m",
["[ᄇㅂ]"] = "b",
["[ᄈㅃ]"] = "pp",
["ㅄ"] = "ps",
["[ᄉㅅ]"] = "s",
["[ᄊㅆ]"] = "ss",
["[ᄋㅇ]"] = "",
["ᆼ"] = "ng",
["[ᄌㅈ]"] = "j",
["[ᄍㅉ]"] = "jj",
["[ᄎㅊ]"] = "ch",
["[ᄏᆨㅋ]"] = "k",
["[ᄐᆮㅌ]"] = "t",
["[ᄑᆸㅍ]"] = "p",
["[ᄒㅎ]"] = "h"
}
-- single consonants to romanized text for mr
p.single_consonants_mr = {
["[ᄀᆨㄱ]"] = "k",
["[ᄁㄲ]"] = "kk",
["ㄳ"] = "ks",
["[ᄂᆫㄴ]"] = "n",
["ㄵ"] = "nj",
["ㄶ"] = "nh",
["[ᄃᆮㄷ]"] = "t",
["[ᄄㄸ]"] = "tt",
["[ᄅㄹ]"] = "r",
["ᆯ"] = "l",
["ㄺ"] = "lg",
["ㄻ"] = "lm",
["ㄼ"] = "lb",
["ㄽ"] = "ls",
["ㄾ"] = "lt'",
["ㄿ"] = "lp'",
["ㅀ"] = "rh",
["[ᄆᆷㅁ]"] = "m",
["[ᄇᆸㅂ]"] = "p",
["[ᄈㅃ]"] = "pp",
["ㅄ"] = "ps",
["[ᄉㅅ]"] = "s",
["[ᄊㅆ]"] = "ss",
["[ᄋㅇ]"] = "",
["ᆼ"] = "ng",
["[ᄌㅈ]"] = "ch",
["[ᄍㅉ]"] = "tch",
["[ᄎㅊ]"] = "ch'",
["[ᄏㅋ]"] = "k'",
["[ᄐㅌ]"] = "t'",
["[ᄑㅍ]"] = "p'",
["[ᄒㅎ]"] = "h",
}
-- drop y after {ㅈ, ㅉ, ㅊ}
p.drop_y = {
["([ᄌ-ᄎ])ᅣ"] = "%1ᅡ",
["([ᄌ-ᄎ])ᅤ"] = "%1ᅢ",
["([ᄌ-ᄎ])ᅧ"] = "%1ᅥ",
["([ᄌ-ᄎ])ᅨ"] = "%1ᅦ",
["([ᄌ-ᄎ])ᅭ"] = "%1ᅩ",
["([ᄌ-ᄎ])ᅲ"] = "%1ᅮ",
}
p.name_rieul_dsj_tensification = {
["([^]*)달달([^]*)"] = "%1달달%2",
["([^]*)돌돌([^]*)"] = "%1돌돌%2",
["([^]*)살살([^]*)"] = "%1살살%2",
["([^]*)설설([^]*)"] = "%1설설%2",
["([^]*)솔솔([^]*)"] = "%1솔솔%2",
["([^]*)술술([^]*)"] = "%1술술%2",
["([^]*)슬슬([^]*)"] = "%1슬슬%2",
["([^]*)실실([^]*)"] = "%1실실%2",
["([^]*)절절([^]*)"] = "%1절절%2",
["([^]*)졸졸([^]*)"] = "%1졸졸%2",
["([^]*)줄줄([^]*)"] = "%1줄줄%2",
["([^]*)즐즐([^]*)"] = "%1즐즐%2",
["([^]*)질질([^]*)"] = "%1질질%2"
}
-- unwrapping enclosed Hangul text
p.enclosed_hangul = {
-- actually not very necessary, but these are also classified as Hangul chars in Unicode
-- no distinction is made between parenthesized and circled chars
["[㈀㉠]"] = "(기역)",
["[㈁㉡]"] = "(니은)",
["[㈂㉢]"] = "(디귿)",
["[㈃㉣]"] = "(리을)",
["[㈄㉤]"] = "(미음)",
["[㈅㉥]"] = "(비읍)",
["[㈆㉦]"] = "(시옷)",
["[㈇㉧]"] = "(이응)",
["[㈈㉨]"] = "(지읒)",
["[㈉㉩]"] = "(치읓)",
["[㈊㉪]"] = "(키읔)",
["[㈋㉫]"] = "(티읕)",
["[㈌㉬]"] = "(피읖)",
["[㈍㉭]"] = "(히읗)",
["[㈎㉮]"] = "(가)",
["[㈏㉯]"] = "(나)",
["[㈐㉰]"] = "(다)",
["[㈑㉱]"] = "(라)",
["[㈒㉲]"] = "(마)",
["[㈓㉳]"] = "(바)",
["[㈔㉴]"] = "(사)",
["[㈕㉵]"] = "(아)",
["[㈖㉶]"] = "(자)",
["[㈗㉷]"] = "(차)",
["[㈘㉸]"] = "(카)",
["[㈙㉹]"] = "(타)",
["[㈚㉺]"] = "(파)",
["[㈛㉻]"] = "(하)",
["㈜"] = "(주)",
["㈝"] = "(오전)",
["㈞"] = "(오후)",
["㉼"] = "(참고)",
["㉽"] = "(주의)",
["㉾"] = "(우)"
}
-- converting escaped special characters to html tags to preserve them
p.escaped_to_html_enc = {
["\\%$"] = "$",
["\\%%"] = "%",
["\\%*"] = "*",
["\\@"] = "@",
["\\%^"] = "^",
["\\_"] = "_",
["\\`"] = "`"
}
-- converting html tags back to unescaped characters
p.html_enc_to_ascii = {
["$"] = "$",
["%"] = "%%",
["*"] = "*",
["@"] = "@",
["^"] = "^",
["_"] = "_",
["`"] = "`"
}
return p