Module:Make emoji zwj table
Appearance
This module creates a new version of emoji_t
for use in Module:Citation/CS1/Configuration.
To use this module:
- locate a copy of the new version of the Unicode file emoji-zwj-sequences.txt. This file might be found in https://unicode.org/Public/emoji/VV.V/ (where
VV.V
is the new Unicode version number). - copy the whole content of emoji-zwj-sequences.txt to your clipboard
- edit this page (the module's documentation page)
- paste your clipboard into this page overwriting any previous version of the Unicode data; do not disturb the html comment tags.
- replace the url in the
{{#invoke:}}
with the url of the new emoji-zwj-sequences.txt file - preview this page; if nothing wrong with the rendering, save.
- copy
emoji_t
to your clipboard and paste it overemoji_t
in Module:Citation/CS1/Configuration/sandbox (always update the live module suite from its sandboxen)
Lua error in package.lua at line 80: module 'Module:No globals' not found.
--[[
This module creates an associative table emoji code points that may follow a zero-width joiner character (U+200D).
The module reads a copy of the Unicode Emoji ZWJ Sequences for UTS (typically emoji-zwj-sequences.txt found in
https://unicode.org/Public/emoji/VV.V/ where VV.V is the Unicode version number). The copy of the unicode data
file is held inside html comments in the module's /doc page. From that file, the module extracts pairs of
<zwj> <emoji code point>. The moculde save each unique code point, transformed as necessary to build a new version
of emoji_t for use in Module:Citation/CS1/Configuration.
The module takes one positional parameter:
{{#invoke:make emoji zwj table|main|<url>}}
<url> is the url that matches the Unicode data file. Alas, Lua modules cannot read external data files so <url>
is merely used to document where the data may be found.
Use of this module is documented on its /doc page
]]
require('Module:No globals');
local emoji_names_t = { -- keys are decimal forms of the hex values in U+xxxx
[127752] = 'rainbow',
[127806] = 'ear of rice',
[127859] = 'cooking',
[127891] = 'graduation cap',
[127908] = 'microphone',
[127912] = 'artist palette',
[127979] = 'school',
[127981] = 'factory',
[128102] = 'boy',
[128103] = 'girl',
[128104] = 'man',
[128105] = 'woman',
[128139] = 'kiss mark',
[128187] = 'personal computer',
[128188] = 'brief case',
[128295] = 'wrench',
[128300] = 'microscope',
[128488] = 'left speech bubble',
[128640] = 'rocket',
[128658] = 'fire engine',
[129309] = 'handshake',
[129455] = 'probing cane',
[129456] = 'emoji component red hair',
[129457] = 'emoji component curly hair',
[129458] = 'emoji component bald',
[129459] = 'emoji component white hair',
[129466] = 'safety vest',
[129468] = 'motorized wheelchair',
[129469] = 'manual wheelchair',
[129489] = 'adult',
[9760] = 'skull and crossbones',
[9792] = 'female sign',
[9794] = 'male sign',
[9877] = 'staff of aesculapius',
[9878] = 'scales',
[9992] = 'airplane',
[10084] = 'heavy black heart',
}
--[[--------------------------< M A I N >----------------------------------------------------------------------
]]
local function main (frame)
local this_wiki = table.concat ({':', mw.language.getContentLanguage():getCode(), ':'});
local title_obj = mw.title.getCurrentTitle();
local content = mw.title.new (table.concat ({title_obj.prefixedText, '/doc'})):getContent();
local code_points_t = {}; -- sequence to hold unique code points that follow U+200D in RGI Emoji ZWJ Sequences in decimal
local out_t = {}; -- final output goes here
local new_emoji_names_t = {}; -- used to update emoji_names_t in this module
local tabs_15 = string.rep ('\t', 15); -- for six-digit keys
local tabs_16 = string.rep ('\t', 16); -- for keys that have fewer than six digits
local file_date = content:match ('# *Date: *(%d%d%d%d%-%d%d%-%d%d)'); -- file date of the Unicode source
local file_version = content:match ('# *Version: *([%d%.]+)'); -- version of the Unicode source
for code_point in content:gmatch ('200D (%x+)') do -- find each <zwj> <code point> pair
local code_point_dec = tonumber ('0x' .. code_point); -- convert hex code point to decimal for output table key
if not code_points_t[code_point] then -- if we have not seen this <code_point> before
code_points_t[code_point] = true; -- remember that we have now seen this <code_point>
table.insert (out_t, table.concat ({ -- build a line for this code point
'\t[', -- open key markup
code_point_dec, -- <code_point> in decimal
'] = true,', -- close key and assign it the value 'true'
(100000 <= code_point_dec) and tabs_15 or tabs_16, -- insert a bunch of tabs between the k/v pair and an associated comment
'-- U+', -- start the comment; prefix for the hex <code point>
code_point, -- add the <code point>
' &#x', -- hex html entity prefix for <code point>
code_point, -- add the <code point>
'; ', -- finish the html entity
emoji_names_t[code_point_dec] and emoji_names_t[code_point_dec] or '', -- if we have a name for this code point, add the name; empty string else
}));
table.insert (new_emoji_names_t, table.concat ({ -- build a line for this code point
'\t[', -- open key markup
code_point_dec, -- <code_point> in decimal
'] = \'', -- close key, open quote mark and ready to assign it a name
emoji_names_t[code_point_dec] and emoji_names_t[code_point_dec] or '', -- if we have a name for this code point, add the name; empty string else
'\',', -- add closing quote mark and terminal comma
}));
end
end
local function compare (a, b) -- local compare function for table.sort() ascending
a = a:match ('%[(%d+)%]'); -- extract decimal key text
b = b:match ('%[(%d+)%]');
return tonumber (a) < tonumber (b); -- convert decimal key text to numbers and compare
end
table.sort (out_t, compare); -- ascending numerical sort on decimal keys
local prefix_t = {}; -- build a prefix for this version of the table
table.insert (prefix_t, '==<span style="font-family: monospace, monospace;">emoji_t</span>==');
table.insert (prefix_t, 'use this table to overwrite same-named table in [[Module:Citation/CS1/Configuration/sandbox]]');
table.insert (prefix_t, '<pre>-- list of emoji that use a zwj character (U+200D) to combine with another emoji');
table.insert (prefix_t, table.concat ({'-- from: ', frame.args[1], '; version: ', file_version, '; ', file_date}));
table.insert (prefix_t, table.concat ({'-- table created by: ', this_wiki, title_obj.nsText, ':', title_obj.baseText}));
table.insert (prefix_t, table.concat ({'local emoji_t = {', tabs_16, '-- indexes are decimal forms of the hex values in U+xxxx'}));
table.insert (out_t, 1, table.concat (prefix_t, '\n')); -- insert at the head of the output table
table.insert (out_t, '\t}</pre>'); -- close the <pre> tag
table.sort (new_emoji_names_t, compare); -- ascending numerical sort on decimal keys
table.insert (out_t, '==<span style="font-family: monospace, monospace;">emoji_names_t</span>==');
table.insert (out_t, 'use this table to overwrite same-named table in [[Module:Make emoji zwj table]]; add missing names.');
table.insert (out_t, table.concat ({'\n<pre>local emoji_names_t = {', tabs_15, '-- keys are decimal forms of the hex values in U+xxxx'}));
for _, v in ipairs (new_emoji_names_t)do
table.insert (out_t, v);
end
table.insert (out_t, '\t}</pre>'); -- close the <pre> tag
return frame:preprocess (table.concat (out_t, '\n')); -- make a big string and done
end
--[[--------------------------< E X P O R T S >----------------------------------------------------------------
]]
return {
main = main,
}