Module:Hangul
Appearance
local p = {}
local data = mw.loadData 'Module:Hangul/data'
---- From [[wikt:Module:ko-hangul]
-- Given the "syllable index" of a precomposed Hangul syllable (see
-- above), returns "indices" representing the three constituent jamo
-- ("lead", i.e. initial consonant; "vowel"; and "tail", i.e. final
-- consonant, except that zero denotes the absence of a final consonant).
local function syllableIndex2JamoIndices(syllableIndex)
local lIndex = math.floor(syllableIndex / 588)
local vIndex = math.floor((syllableIndex % 588) / 28)
local tIndex = syllableIndex % 28
return lIndex, vIndex, tIndex
end
---- From [[wikt:Module:Unicode data]].
local tocodepoint = mw.ustring.codepoint
-- Cheaper to derive names from decomposed form of syllable?
local function get_name(syllable)
local li, vi, ti = syllableIndex2JamoIndices(tocodepoint(syllable) - 0xAC00)
return ("HANGUL SYLLABLE %s%s%s"):format(
data.leads[li], data.vowels[vi], data.trails[ti])
end
----
local function tag(text)
return '<span lang="ko">' .. text .. '</span>'
end
function p.show(frame)
local initial = frame.args[1] or 'ᄀ'
local codepoint = mw.ustring.codepoint(initial)
if not (0x1100 <= codepoint and codepoint <= 0x1112) then
error('Incorrect initial ' .. initial .. '. Should be between U+1100 and U+1112.')
end
local output = {}
local i = 0
function output.add(text)
i = i + 1
output[i] = text
end
function output.row()
output.add('|-\n')
end
output.add(
([[
{| class="wikitable collapsible collapsed" style="width: 96px; height: 96px;"
! colspan="29" | Initial %s
|-
! Final→<br>Medial↓]]):format(initial))
for _, final in ipairs(data.finals) do
output.add(('! %s'):format(final))
end
for _, medial in ipairs(data.medials) do
output.row()
output.add(('! scope="row" | %s'):format(medial))
for _, final in ipairs(data.finals) do
output.add(('| %s%s%s'):format(initial, medial, final))
end
end
output.add('|}')
output = table.concat(output, '\n')
output = mw.ustring.toNFC(output)
output = mw.ustring.gsub( -- Add names of syllable codepoints.
output,
'[가-힣]', -- [[Hangul Syllables]] block (U+AC00-D7AF)
function (syllable)
return ('title="%s" | %s'):format(get_name(syllable), syllable)
end)
-- Tag all Korean characters.
-- The pattern matches the UTF-8 encoding of the codepoint ranges
-- U+1000-1FFF, U+3000-3FFF, and U+A000-DFFF.
-- (See [[UTF-8#Codepage layout]].)
output = output:gsub('[\225\227\234-\237][\128-\191]+', tag)
-- Check for consecutive span tags.
-- output:gsub('<span[^>]+>[^<]*</span><span[^>]+>[^<]*</span>', mw.log)
return output
end
return p