Module:Unicode data/category/make
Appearance
local p = {}
function p.make_singles_and_ranges()
local text = mw.title.new('DerivedGeneralCategory.txt'):getContent()
local singles, ranges = {}, {}
for code_point1, code_point2, general_category in text:gmatch("%f[^\n%z](%x+)%.?%.?(%x+)%s+;%s+(%u%l)") do
if category ~= 'Cn' then
code_point1 = tonumber(code_point1, 16)
-- XXXX..XXXX ; gc
if code_point2 ~= "" then
code_point2 = tonumber(code_point2, 16)
table.insert(ranges, { code_point1, code_point2, category })
-- XXXX ; gc
else
singles[code_point1] = category
end
end
end
return singles, ranges
end
function p.print_data(data)
local output = require "Module:array"()
local function writef(...)
output:insert(string.format(...))
end
writef [[
return {
singles = {
]]
-- Check that maximum "singles" codepoint is less than 0x100000?
for codepoint, category in require 't'.spairs(data.singles) do
writef('\t\t[0x%05X] = "%s",\n', codepoint, category)
end
writef [[
},
ranges = {
]]
local function compare_ranges(range1, range2)
return range1[1] < range2[1]
end
table.sort(data.ranges, compare_ranges)
for _, range in ipairs(data.ranges) do
writef('\t\t{ 0x%06X, 0x%06X, "%s" },\n', table.unpack(range))
end
writef [[
},
}]]
return output:concat()
end
function p.main(frame)
local data = {}
data.singles, data.ranges = p.make_singles_and_ranges
return frame:extensionTag{
name = "syntaxhighlight",
content = p.print_data(data),
args = { lang = "lua" }
}
end
return p