Module:Hangul
Appearance
local p = {}
local data = mw.loadData 'Module:Hangul/data'
---- From [[wikt:Module:ko-hangul]
-- Given the "syllable index" of a precomposed Hangul syllable (see
-- above), returns "indices" representing the three constituent jamo
-- ("lead", i.e. initial consonant; "vowel"; and "tail", i.e. final
-- consonant, except that zero denotes the absence of a final consonant).
local function syllableIndex2JamoIndices(syllableIndex)
local lIndex = math.floor(syllableIndex / 588)
local vIndex = math.floor((syllableIndex % 588) / 28)
local tIndex = syllableIndex % 28
return lIndex, vIndex, tIndex
end
----
local tocodepoint = mw.ustring.codepoint
local function indexof(arr, val)
for i, v in ipairs(arr) do
if v == val then
return i
end
end
return -1
end
local function get_name(char)
local codepoint = tocodepoint(char)
-- Hangul Compatibility Jamo block
if 0x3130 <= codepoint and codepoint <= 0x318F then
return ('U+%X: HANGUL LETTER %s'):format(codepoint, data.names[codepoint - 0x3130])
-- Hangul Syllables block
-- From [[wikt:Module:Unicode data]].
-- Cheaper to derive names from decomposed form of syllable?
elseif 0xAC00 <= codepoint and codepoint <= 0xD7A3 then
local li, vi, ti = syllableIndex2JamoIndices(codepoint - 0xAC00)
return ("U+%X: HANGUL SYLLABLE %s%s%s"):format(
codepoint, data.leads[li], data.vowels[vi], data.trails[ti])
else
error(('No name for U+%X found.'):format(codepoint))
end
end
function p.show(frame)
local initial = frame.args[1] or 'ᄀ'
local codepoint = mw.ustring.codepoint(initial)
if not (0x1100 <= codepoint and codepoint <= 0x1112) then
error('Incorrect initial ' .. initial .. '. Should be between U+1100 and U+1112.')
end
local initial_index = indexof(data.initials, initial)
local output = {}
local i = 0
function output.add(text)
i = i + 1
output[i] = text
end
function output.row()
output.add('|-\n')
end
output.add(
([[
{| class="wikitable collapsible collapsed" style="width: 96px; height: 96px;"
! id="%s" colspan="29" | Initial %s
|-
! Final→<br>Medial↓]]):format(
string.char(('a'):byte() + initial_index - 1):rep(2), -- anchor
data.independent_initials[initial_index])) -- initial jamo
for _, final in ipairs(data.independent_finals) do
output.add(('! title="%s" | %s')
:format(final ~= '' and get_name(final) or '', final))
end
for i, medial in ipairs(data.medials) do
output.row()
local independent_medial = data.independent_medials[i]
output.add(('! scope="row" title="%s" | %s')
:format(get_name(independent_medial), independent_medial))
for _, final in ipairs(data.finals) do
output.add(('| %s%s%s'):format(initial, medial, final))
end
end
output.add('|}')
output = table.concat(output, '\n')
output = mw.ustring.toNFC(output)
output = mw.ustring.gsub( -- Add names of syllable codepoints.
output,
'[가-힣]', -- [[Hangul Syllables]] block (U+AC00-D7AF)
function (syllable)
return ('title="%s" | %s'):format(get_name(syllable), syllable)
end)
-- Tag all Korean characters.
-- The pattern matches a single UTF-8-encoded character in the ranges
-- U+1000-1FFF, U+3000-3FFF, and U+A000-DFFF.
-- (See [[UTF-8#Codepage layout]].)
output = output:gsub(
'[\225\227\234-\237][\128-\191]+',
function (text)
return '<span lang="ko">' .. text .. '</span>'
end)
-- Check for consecutive span tags.
-- output:gsub('<span[^>]+>[^<]*</span><span[^>]+>[^<]*</span>', mw.log)
return output
end
function export.TOC(frame)
local output = {}
--[=[
{| class="wikitable collapsible collapsed" style=" width: 96px; height: 10px;"
|| [[#aa|ᄀ]] || [[#bb|ᄁ]] || [[#cc|ᄂ]] || [[#dd|ᄃ]] || [[#ee|ᄄ]] || [[#ff|ᄅ]] || [[#gg|ᄆ]] || [[#hh|ᄇ]] || [[#ii|ᄈ]] || [[#jj|ᄉ]] || [[#kk|ᄊ]] || [[#ll|ᄋ]] || [[#mm|ᄌ]] || [[#nn|ᄍ]] || [[#oo|ᄎ]] || [[#pp|ᄏ]] || [[#qq|ᄐ]] || [[#rr|ᄑ]] || [[#ss|ᄒ]]
|}
]=]
for i, initial in ipairs(data.independent_initials) do
table.insert(output, ('| [[#%c|%s]]'):format(('a'):byte() + i - 1, initial))
end
table.insert(output, 1, '{| class="wikitable collapsible collapsed" style=" width: 96px; height: 10px;"')
table.insert(output, '|}')
return table.concat(output, '\n')
end
return p