Module:Charmap

local p = {}
local unicode_convert = require('Module:Unicode convert')
local yesno = require('Module:Yesno')

-- Wrapper to let us replace Template:UTF-8 and Template:UTF-16
local function template(title, args)
	if title == 'UTF-8' then
		return unicode_convert.getUTF8{ args = args }
	elseif title == 'UTF-16' then
		return unicode_convert.getUTF16{ args = args }
	elseif title == 'hex2dec' then
		return tostring(tonumber(args[1], 16))
	elseif title == 'charmap/numcharref' then
		local format = args['base'] == 'dec' and '&amp;#%d;<wbr>' or '&amp;#x%02X;<wbr>'
		return format:format(tonumber(args[1], 16))		
	else
		return mw.getCurrentFrame():expandTemplate{ title = title, args = args }
	end
end

local function _multiUTF(args)
	local code = args[1] or ''
	local encoding = args[2] or 'UTF-8'
	local output = ''
	local pstp_sep = encoding:sub(1, 8) == 'charmap/' and '' or ' '
	local words = mw.text.split(mw.text.trim(code), '%s')
	
	if not (encoding == 'charmap/showchar' or encoding == 'hex2dec') then
		output = '|| '
		for i = 1, #words do
			output = output .. template(encoding, {words[i], base = 'dec'})
			if i == 4 then break end
			output = output .. pstp_sep
		end
		output = output .. '|| '
	end
	
	for i = 1, #words do
		output = output .. template(encoding, {words[i]})
		if i == 4 then break end
		output = output .. pstp_sep
	end
	
	return output
end

p.multiUTF = function(frame) -- temporary
	return _multiUTF(frame.args)
end

p.head = function(frame)
	output = '|- style="text-align:center;"\n'
	codes = {} -- May contain nils if bad input
	infos = {} -- boolean array
	names = {} -- string array
	for i = 1, 10 do
		if frame.args['name' .. i] and frame.args['name' .. i] ~= '' then
			codes[1 + #names] = frame.args[i]
			infos[1 + #names] = frame.args['Info' .. i] == 'yes'
			names[1 + #names] = frame.args['name' .. i]
		end
	end
	
	if #names > 0 then
		output = output .. "| align=\"center\" | '''" ..
			(next(codes) == nil and 'Character' or 'Unicode') .. " name''' "
		for i, n in ipairs(names) do
			-- Display the character in smallcaps
			output = output .. '|| colspan=2 | <span class="smallcaps smallcaps-smaller">' .. 
				mw.ustring.upper(n) .. '</span>'
		end
	end

	output = output .. '\n|-\n! style="text-align:left;" | Encodings' ..
		string.rep('|| decimal || hex', #names) ..
		'\n|-\n| style="text-align:left;" | [[Unicode]]'
	for i, n in ipairs(names) do
		local code = codes[i] or '';
		code = (#code >= 4) and code or ('0000' .. (code or '')):sub(-4) -- padleft
		output = output .. ' || ' .. _multiUTF{codes[i] or '0', 'hex2dec'} .. ' || ' ..
			(infos[i] and '[http://www.fileformat.info/info/unicode/char/%s/index.htm U+%s]' or 'U+%s'):format(code, code)
	end
	
	output = output .. '\n|-\n| style="text-align:left;" | [[UTF-8]]'
	for i, n in ipairs(names) do
		output = output .. _multiUTF{codes[i]}
	end
	
	local outsideBMP = false -- Do we need to show surrogate pairs?
	for i, n in ipairs(names) do
		if (tonumber(codes[i] or '', 16) or 0) > 0xFFFF then
			outsideBMP = true
			break
		end
	end
	
	if outsideBMP then
		output = output .. '\n|-\n| style="text-align:left;" | [[UTF-16]]'
		for i, n in ipairs(names) do
			output = output .. _multiUTF{codes[i], 'UTF-16'}
		end
	end
	
	if yesno(frame.args['IncludeGB']) then
		output = output .. '\n|-\n| style="text-align:left;" | [[GB 18030]]'
		for i, n in ipairs(names) do
			output = output .. _multiUTF{codes[i], 'GB18030'}
		end
	end
	
	output = output .. '\n|-\n| style="text-align:left;" | [[Numeric character reference]]'
	for i, n in ipairs(names) do
		output = output .. _multiUTF{codes[i], 'charmap/numcharref'}
	end
	
	return output
end

return p