Module:Charmap
Appearance
| This module is rated as beta. It is considered ready for widespread use, but as it is still relatively new, it should be applied with some caution to ensure results are as expected. |
| Editing of this module by new or unregistered users is currently disabled. See the protection policy and protection log for more details. If you cannot edit this module and you wish to make a change, you can submit an edit request, discuss changes on the talk page, request unprotection, log in, or create an account. |
| This module uses TemplateStyles: |
| This Lua module is used on approximately 640 pages and changes may be widely noticed. Test changes in the module's /sandbox or /testcases subpages, or in your own module sandbox. Consider discussing changes on the talk page before implementing them. |
This module implements {{charmap}}.
local p = {}
local unicode_convert = require('Module:Unicode convert')
local yesno = require('Module:Yesno')
local GB18030_cache = {}
local report = ''
-- Wrapper to let us replace Template:UTF-8 and Template:UTF-16
local function template(title, args)
if title == 'UTF-8' then
return unicode_convert.getUTF8{ args = args }
elseif title == 'UTF-16' then
return unicode_convert.getUTF16{ args = args }
elseif title == 'hex2dec' then
return tostring(tonumber(args[1], 16))
elseif title == 'charmap/numcharref' then
local format = args['base'] == 'dec' and '&#%d;<wbr>' or '&#x%02X;<wbr>'
return format:format(tonumber(args[1], 16))
elseif title == 'charmap/showchar' then
return '&#x' .. mw.text.trim(args[1]) .. ';'
elseif title == 'GB18030' then
if GB18030_cache[args[1]] then
if args['base'] == 'dec' then
return mw.getCurrentFrame():expandTemplate{
title = 'GB18030/decimal', args = { GB18030_cache[args[1]] }}
else return GB18030_cache[args[1]] end
elseif os.clock() > 7 then
return "''currently unavailable''"
else
GB18030_cache[args[1]] = mw.getCurrentFrame():expandTemplate{title = 'GB18030', args = {args[1]}}
if args['base'] == 'dec' then
return mw.getCurrentFrame():expandTemplate{
title = 'GB18030/decimal', args = { GB18030_cache[args[1]] }}
else return GB18030_cache[args[1]] end
end
else
return mw.getCurrentFrame():expandTemplate{ title = title, args = args }
end
end
local function _multiUTF(args)
local code = args[1] or ''
local encoding = args[2] or 'UTF-8'
local output = ''
local pstp_sep = encoding:sub(1, 8) == 'charmap/' and '' or ' '
local words = mw.text.split(mw.text.trim(code), '%s')
if not (encoding == 'charmap/showchar' or encoding == 'hex2dec') then
output = '|| '
for i = 1, #words do
output = output .. template(encoding, {words[i], base = 'dec'})
if i == 4 then break end
output = output .. pstp_sep
end
output = output .. '|| '
end
for i = 1, #words do
output = output .. template(encoding, {words[i]})
if i == 4 then break end
output = output .. pstp_sep
end
return output
end
p.multiUTF = function(frame) -- temporary
return _multiUTF(frame.args)
end
local function paramCoalesce(args, arg1, arg2)
if args[arg1] == nil or args[arg1] == '' then return args[arg2] end
return args[arg1]
end
p.head = function(frame)
output = '|- style="text-align:center;"\n'
codes = {} -- May contain nils if bad input
infos = {} -- boolean array
names = {} -- string array
args = frame:getParent().args
for i = 1, 10 do
if args['name' .. i] and args['name' .. i] ~= '' then
-- The parser function uc: preserves strip markers.
codes[1 + #names] = frame:callParserFunction('uc', mw.text.trim(args[i]))
infos[1 + #names] = paramCoalesce(args, 'Info' .. i, 'Info') == 'yes'
names[1 + #names] = frame:callParserFunction('uc', args['name' .. i])
end
end
if #names > 0 then
output = output .. "| align=\"center\" | '''" .. frame:preprocess('<templatestyles src="smallcaps/styles.css"/>') ..
(next(codes) == nil and 'Character' or 'Unicode') .. " name''' "
for i, n in ipairs(names) do
-- Display the character in smallcaps
output = output .. '|| colspan=2 | <span class="smallcaps smallcaps-smaller">' .. n .. '</span>'
end
end
output = output .. '\n|-\n! style="text-align:left;" | Encodings' ..
string.rep('|| decimal || hex', #names) ..
'\n|-\n| style="text-align:left;" | [[Unicode]]'
for i, n in ipairs(names) do
local code = codes[i] or '';
code = (#code >= 4) and code or ('0000' .. (code or '')):sub(-4) -- padleft
output = output .. ' || ' .. _multiUTF{codes[i] or '0', 'hex2dec'} .. ' || ' ..
(infos[i] and '[http://www.fileformat.info/info/unicode/char/%s/index.htm U+%s]' or 'U+%s'):format(code, code)
end
output = output .. '\n|-\n| style="text-align:left;" | [[UTF-8]]'
for i, n in ipairs(names) do
output = output .. _multiUTF{codes[i]}
end
local outsideBMP = false -- Do we need to show surrogate pairs?
for i, n in ipairs(names) do
if (tonumber(codes[i] or '', 16) or 0) > 0xFFFF then
outsideBMP = true
break
end
end
if outsideBMP then
output = output .. '\n|-\n| style="text-align:left;" | [[UTF-16]]'
for i, n in ipairs(names) do
output = output .. _multiUTF{codes[i], 'UTF-16'}
end
end
if yesno(args['IncludeGB']) then
output = output .. '\n|-\n| style="text-align:left;" | [[GB 18030]]'
for i, n in ipairs(names) do
output = output .. _multiUTF{codes[i], 'GB18030'}
end
end
output = output .. '\n|-\n| style="text-align:left;" | [[Numeric character reference]]'
for i, n in ipairs(names) do
output = output .. _multiUTF{codes[i], 'charmap/numcharref'}
end
return output .. report
end
p.named = function(frame, n)
n = n or frame.args[1]
local args = frame:getParent().args
local refchars = {}
local empty = true;
local namedref = (n == 'html') and '[[List of XML and HTML character entity references|Named character reference]]' or
args['namedref' .. n] or '';
for i = 1, 10 do
if args['name' .. i] and args['name' .. i] ~= '' then
if n == 'html' then
refchars[1 + #refchars] = frame:expandTemplate{ title = 'numcr2namecr',
args = { mw.text.trim(args[i] or '0') }}
else
local x = args['ref' .. n .. 'char' .. i]
empty = empty and not x
refchars[1 + #refchars] = x or ''
end
end
end
if empty then return '' end
return '|- style="text-align:center"\n| style="text-align:left" | ' .. namedref ..
(' || colspan="2" | %s'):rep(#refchars):format(unpack(refchars)) .. '\n'
end
return p