Module:Hangul
Appearance
local p = {}
local data = mw.loadData 'Module:Hangul/data'
---- From [[wikt:Module:ko-hangul]
-- Given the "syllable index" of a precomposed Hangul syllable (see
-- above), returns "indices" representing the three constituent jamo
-- ("lead", i.e. initial consonant; "vowel"; and "tail", i.e. final
-- consonant, except that zero denotes the absence of a final consonant).
local function syllableIndex2JamoIndices(syllableIndex)
local lIndex = math.floor(syllableIndex / 588)
local vIndex = math.floor((syllableIndex % 588) / 28)
local tIndex = syllableIndex % 28
return lIndex, vIndex, tIndex
end
---- From [[wikt:Module:Unicode data]].
local tocodepoint = mw.ustring.codepoint
-- Cheaper to derive names from decomposed form of syllable?
local function get_syllable_name(syllable)
local li, vi, ti = syllableIndex2JamoIndices(tocodepoint(syllable) - 0xAC00)
return ("HANGUL SYLLABLE %s%s%s"):format(
data.leads[li], data.vowels[vi], data.trails[ti])
end
----
local function indexof(arr, val)
for i, v in ipairs(arr) do
if v == val then
return i
end
end
return -1
end
local function tag(text)
return '<span lang="ko">' .. text .. '</span>'
end
-- Get name for character in Hangul Compatibility Jamo block.
local function get_jamo_name(char)
return 'HANGUL LETTER ' .. data.names[tocodepoint(char) - 0x3130]
end
function p.show(frame)
local initial = frame.args[1] or 'ᄀ'
local codepoint = mw.ustring.codepoint(initial)
if not (0x1100 <= codepoint and codepoint <= 0x1112) then
error('Incorrect initial ' .. initial .. '. Should be between U+1100 and U+1112.')
end
local output = {}
local i = 0
function output.add(text)
i = i + 1
output[i] = text
end
function output.row()
output.add('|-\n')
end
output.add(
([[
{| class="wikitable collapsible collapsed" style="width: 96px; height: 96px;"
! colspan="29" | Initial %s
|-
! Final→<br>Medial↓]]):format(data.independent_initials[indexof(data.initials, initial)]))
for _, final in ipairs(data.independent_finals) do
output.add(('! title="%s" | %s')
:format(final ~= '' and get_jamo_name(final) or '', final))
end
for i, medial in ipairs(data.medials) do
output.row()
local independent_medial = data.independent_medials[i]
output.add(('! scope="row" title="%s" | %s')
:format(get_jamo_name(independent_medial), independent_medial))
for _, final in ipairs(data.finals) do
output.add(('| %s%s%s'):format(initial, medial, final))
end
end
output.add('|}')
output = table.concat(output, '\n')
output = mw.ustring.toNFC(output)
output = mw.ustring.gsub( -- Add names of syllable codepoints.
output,
'[가-힣]', -- [[Hangul Syllables]] block (U+AC00-D7AF)
function (syllable)
return ('title="%s" | %s'):format(get_syllable_name(syllable), syllable)
end)
-- Tag all Korean characters.
-- The pattern matches the UTF-8 encoding of the codepoint ranges
-- U+1000-1FFF, U+3000-3FFF, and U+A000-DFFF.
-- (See [[UTF-8#Codepage layout]].)
output = output:gsub('[\225\227\234-\237][\128-\191]+', tag)
-- Check for consecutive span tags.
-- output:gsub('<span[^>]+>[^<]*</span><span[^>]+>[^<]*</span>', mw.log)
return output
end
return p