Jump to content

Module:Convert/makeunits

विकिपीडिया से
Johnuniq (बातचीत | योगदान) (if the input is not from the default page, assume am preparing some sandbox units and do not output extraneous stuff, and do not warn if sections are missing) के द्वारा 08:45, 10 जनवरी 2014 के बदलाव

-- This module generates the wiktext required at [[Module:Convert/data]]
-- by reading and processing the wikitext of the unit definitions at
-- [[Module:Convert/documentation/conversion data/doc]].
--
-- Usage: Put the following line (with nothing else) in a sandbox:
-- {{#invoke:convert/makeunits|makeunits}}
-- Previewing the sandbox should display the required wikitext.
--
-- It is also possible to use the following where PAGE is replaced
-- with the title of the page holding the unit definitions.
-- {{#invoke:convert/makeunits|makeunits|PAGE}}
--
-- LATER:
-- If support additional currencies, need to update table:
--     local currency = { ['$'] = true, ['£'] = true }
--
-- Script method:
-- * Read lines, ignoring everything before "== Conversions ==".
-- * Process the following lines:
--   * Find next level-3 heading like "=== Length ===".
--   * Parse each following line starting with "|"
--     (but ignore lines starting with "|-" or "|}".
--   * Split such lines into fields (delimiter "||") and trim
--     leading/trailing whitespace from each field.
--     Remove any "colspan" at front of second field (symbol).
--   * Remove thousand separators (commas) from the scale field.
--     If the scale is a number, do not change it.
--     Otherwise, it should be an expression like "5/9", in
--     which case it is replaced by the value of the expression.
--   * Remove wiki formatting '[[...]]' from the link field.
--   * Remove redundant fields from the unit to reduce size of data table.
--   * Create alternative forms of a unit such as an alias or a combination.
-- * Stop processing when encounter end of text or a line starting
--   with a level-2 heading ("==" but not "===").
-- * Repeat above for each heading listed at prepare_data().
-- * Output Lua source for the units table.
--
-- -- Output has the following form.
-- local all_units = {
--     ["unitcode"] = {                        -- standard format
--         name1    = "singular name",         -- omitted if redundant
--         name1_us = "singular name sp=us",   -- omitted if redundant
--         name2    = "plural name",           -- omitted if redundant
--         name2_us = "plural name sp=us",     -- omitted if redundant
--         symbol   = "symbol",
--         sym_us   = "symbol sp=us",          -- omitted if redundant
--         usename  = 1,                       -- omitted if empty
--         utype    = "unit type",             -- from level-3 heading
--         scale    = 1,                       -- a value, if necessary from evaluating an expression
--         subdivs  = { ["ft"] = { 5280, default = "km" }, ["yd"] = { 1760 } }  -- composite input; omitted if empty
--         link     = "title of article for wikilink",  -- omitted if empty or redundant
--         ...                                 -- other values
--     },
--     ["unitcode"] = {        -- alternative format to generate an alias
--         target   = "unit code",
--         ...                                 -- optional values to override those of target
--     },
--     ["unitcode"] = {        -- alternative format to generate a "per" unit like $/acre or BTU/h
--         per      = {u1, u2},                -- numbered table of unitcodes (u1 may be a currency symbol)
--         ...                                 -- optional values
--     },
--     ["unitcode"] = {        -- alternative format to generate an error message
--         shouldbe = "message that some other unit code should be used",
--     },
--     ["unitcode"] = {        -- alternative format for combination outputs (like 'm ft')
--         combination = {u1, u2, ...},        -- numbered table of unitcodes
--         utype    = "unit type",             -- as for standard format
--     },
--     ["unitcode"] = {        -- alternative format for output multiples (like 'ftin')
--         combination = {u1, u2, ...},        -- numbered table of unitcodes
--         multiple = {f1, f2, ...},           -- numbered table of integer factors
--         utype    = "unit type",             -- as for standard format
--     },
--     ...
-- }

local specials = {
	-- This table is used to add extra fields when defining some units which
	-- require exceptions to normal processing.
	-- Each key is in the local language, while each value is fixed text,
	-- however, this script is NOT edited to localize the keys. Instead,
	-- [[Module:Convert/text]] is edited, and this script replaces sections
	-- of the following with localized definitions, if given.
	-- LATER: It would be better if this was defined in the conversions table.
	utype = {
		-- ["unit type in local language"] = "name_used_in_this_script"
		["fuel efficiency"] = "type_fuel_efficiency",
		["length"] = "type_length",
		["temperature"] = "type_temperature",
		["volume"] = "type_volume",
	},
	ucode = {
		exception = {
			-- ["unit code in local language"] = "name_used_in_module_convert"
			["ft"] = "integer_more_precision",
			["in"] = "subunit_more_precision",
			["lb"] = "integer_more_precision",
		},
		istemperature = {
			-- Common temperature scales (not keVT or MK).
			-- ["unit code in local language"] = 1
			["C"] = true,
			["F"] = true,
			["K"] = true,
			["R"] = true,
		},
		usesymbol = {
			-- Use unit symbol not name if abbr not specified.
			-- ["unit code in local language"] = 1
			["C"] = 1,
			["F"] = 1,
			["K"] = 1,
			["R"] = 1,
			["C-change"] = 1,
			["F-change"] = 1,
			["K-change"] = 1,
		},
		alttype = {
			-- Unit has an alternate type that is a valid conversion.
			-- ["unit code in local language"] = "alternate type in local language"
			["Nm"] = "energy",
			["ftlb"] = "torque",
			["ftlb-f"] = "torque",
			["ftlbf"] = "torque",
			["inlb"] = "torque",
			["inlb-f"] = "torque",
			["inlbf"] = "torque",
			["inoz-f"] = "torque",
			["inozf"] = "torque",
		},
	},
}

-- When a number is formatted as a string, the result can indicate
-- that the number is not valid. For example, on some Lua installs,
-- tostring(1/0) includes "#INF", and tostring(0/0) includes "#IND".
-- On Scribunto, the results are "inf" and "nan" (each includes "n").
-- A testing program can set the global variable is_test_run.
local bad_number_char = is_test_run and '#' or 'n'

local function collection()
	-- Return a table to hold items.
	return {
		n = 0,
		add = function (self, item)
			self.n = self.n + 1
			self[self.n] = item
		end,
		pop = function (self, item)
			if self.n > 0 then
				local top = self[self.n]
				self.n = self.n - 1
				return top
			end
		end,
		join = function (self, sep)
			return table.concat(self, sep or '\n')
		end,
	}
end

local warnings = collection()
local function add_warning(text)
	-- Add a warning that will be inserted above the final result.
	warnings:add(text)
end

---Begin code to evaluate expressions-----------------------------------
-- This is needed because Lua's loadstring() is not available in Scribunto,
-- and each scale value can be specifed as an expression such as "5/9".
-- More complex expressions are supported, including use of parentheses
-- and the binary operators: + - * / ^

local operators = {
	['+'] = { precedence = 1, associativity = 1, func = function (a, b) return a + b end },
	['-'] = { precedence = 1, associativity = 1, func = function (a, b) return a - b end },
	['*'] = { precedence = 2, associativity = 1, func = function (a, b) return a * b end },
	['/'] = { precedence = 2, associativity = 1, func = function (a, b) return a / b end },
	['^'] = { precedence = 3, associativity = 2, func = function (a, b) return a ^ b end },
	['('] = '(',
	[')'] = ')',
}

local function tokenizer(text)
	-- Function 'next' returns the next token which is one of:
	--     number
	--     table (operator)
	--     string ('(' or ')')
	--     nil (end of text)
	-- If invalid, an error is thrown.
	-- The number is unsigned (unary operators are not supported).
	return {
		pos = 1,
		maxpos = #text,
		text = text,
		next = function(self)
			if self.pos <= self.maxpos then
				local p1, p2, hit = self.text:find('^%s*([+%-*/^()])', self.pos)
				if hit then
					self.pos = p2 + 1
					return operators[hit]
				end
				p1, p2, hit = self.text:find('^%s*(%d*%.?%d*[eE][+-]?%d*)', self.pos)
				if not hit then
					p1, p2, hit = self.text:find('^%s*(%d*%.?%d*)', self.pos)
				end
				local value = tonumber(hit)
				if value then
					self.pos = p2 + 1
					return value
				end
				error('invalid number "' .. self.text:sub(self.pos) .. '"', 0)
			end
		end
	}
end

local function evaluate_tokens(tokens, inparens)
	-- Return the value from evaluating tokenized expression, or throw an error.
	local numstack, opstack = collection(), collection()
	local function perform_ops(precedence, associativity)
		while opstack.n > 0 and (opstack[opstack.n].precedence > precedence or
			(opstack[opstack.n].precedence == precedence and associativity == 1)) do
			local rhs = numstack:pop()
			local lhs = numstack:pop()
			if not (rhs and lhs) then error('missing number', 0) end
			local op = opstack:pop()
			numstack:add(op.func(lhs, rhs))
		end
	end
	local token_last
	local function set_state(token_type)
		if token_last == token_type then
			local missing = (token_type == 'number') and 'operator' or 'number'
			error('missing ' .. missing, 0)
		end
		token_last = token_type
	end
	while true do
		local token = tokens:next()
		if type(token) == 'number' then
			set_state('number')
			numstack:add(token)
		elseif type(token) == 'table' then
			set_state('operator')
			perform_ops(token.precedence, token.associativity)
			opstack:add(token)
		elseif token == '(' then
			set_state('number')
			numstack:add(evaluate_tokens(tokens, true))
		elseif token == ')' then
			if inparens then
				break
			end
			error('unbalanced parentheses', 0)
		else
			break
		end
	end
	perform_ops(0)
	if numstack.n > 1 then error('missing operator', 0) end
	if numstack.n < 1 then error('missing number', 0) end
	return numstack:pop()
end

local function evaluate(expression)
	-- Return value (a number) from evaluating expression (a string),
	-- or throw an error if invalid.
	-- This is not bullet proof, but it should support the expressions used.
	return evaluate_tokens(tokenizer(expression))
end
---End code to evaluate expressions-------------------------------------
---Begin code adapted from Module:Convert-------------------------------

local SIprefixes, eng_scales
local plural_suffix = 's'

local function shallow_copy(t)
	-- Return a shallow copy of t.
	-- Do not need the features and overhead of mw.clone() provided by Scribunto.
	local result = {}
	for k, v in pairs(t) do
		result[k] = v
	end
	return result
end

local unit_mt = {
	-- Metatable to get missing values for a unit that does not accept SI prefixes,
	-- or a for a unit that accepts prefixes but where no prefix was used.
	-- In the latter case, and before use, fields symbol, name1, name1_us
	-- must be set from _symbol, _name1, _name1_us respectively.
	__index = function (self, key)
		local value
		if key == 'name1' or key == 'sym_us' then
			value = self.symbol
		elseif key == 'name2' then
			value = self.name1 .. plural_suffix
		elseif key == 'name1_us' then
			value = self.name1
			if not rawget(self, 'name2_us') then
				-- If name1_us is 'foot', do not make name2_us by appending plural_suffix.
				self.name2_us = self.name2
			end
		elseif key == 'name2_us' then
			local raw1_us = rawget(self, 'name1_us')
			if raw1_us then
				value = raw1_us .. plural_suffix
			else
				value = self.name2
			end
		elseif key == 'link' then
			value = self.name1
		else
			return nil
		end
		rawset(self, key, value)
		return value
	end
}

local unit_prefixed_mt = {
	-- Metatable to get missing values for a unit that accepts SI prefixes,
	-- and where a prefix has been used.
	-- Before use, fields si_name, si_prefix must be defined.
	__index = function (self, key)
		local value
		if key == 'symbol' then
			value = self.si_prefix .. self._symbol
		elseif key == 'sym_us' then
			value = self.symbol  -- always the same as sym_us for prefixed units
		elseif key == 'name1' then
			local pos = rawget(self, 'prefix_position') or 1
			value = self._name1
			value = value:sub(1, pos - 1) .. self.si_name .. value:sub(pos)
		elseif key == 'name2' then
			value = self.name1 .. plural_suffix
		elseif key == 'name1_us' then
			value = rawget(self, '_name1_us')
			if value then
				local pos = rawget(self, 'prefix_position') or 1
				value = value:sub(1, pos - 1) .. self.si_name .. value:sub(pos)
			else
				value = self.name1
			end
		elseif key == 'name2_us' then
			if rawget(self, '_name1_us') then
				value = self.name1_us .. plural_suffix
			else
				value = self.name2
			end
		elseif key == 'link' then
			value = self.name1
		else
			return nil
		end
		rawset(self, key, value)
		return value
	end
}

local function lookup(units, unitcode, sp, what)
	-- Return a copy of the unit if found, or return nil.
	-- In this cut-down code, sp is always nil, and what is ignored.
	local t = units[unitcode]
	if t then
		if t.shouldbe then
			return nil
		end
		local result = shallow_copy(t)
		if result.prefixes then
			result.symbol = result._symbol
			result.name1 = result._name1
			result.name1_us = result._name1_us
		end
		return setmetatable(result, unit_mt)
	end
	for plen = 2, 1, -1 do
		-- Look for an SI prefix; should never occur with an alias.
		-- Check for longer prefix first ('dam' is decametre).
		-- Micro (µ) is two bytes in utf-8, so is found with plen = 2.
		local prefix = string.sub(unitcode, 1, plen)
		local si = SIprefixes[prefix]
		if si then
			local t = units[unitcode:sub(plen+1)]
			if t and t.prefixes then
				local result = shallow_copy(t)
				if (sp == 'us' or t.sp_us) and si.name_us then
					result.si_name = si.name_us
				else
					result.si_name = si.name
				end
				result.si_prefix = si.prefix or prefix
				-- In this script, each scale is a string.
				result.scale = tostring(tonumber(t.scale) * 10 ^ (si.exponent * t.prefixes))
				result.prefixes = nil  -- a prefixed unit does not take more prefixes (in this script, the returned unit may be added to the list of units)
				return setmetatable(result, unit_prefixed_mt)
			end
		end
	end
	local exponent, baseunit = unitcode:match('^e(%d+)(.*)')
	if exponent then
		local engscale = eng_scales[exponent]
		if engscale then
			local result = lookup(units, baseunit, sp, 'no_combination')
			if not result then return nil end
			if result.engscale == nil then
				result.defkey = unitcode  -- key to lookup default exception
				engscale.exponent = exponent
				result.engscale = engscale
				-- Do not set result.scale as this code is called for units where that is not set.
				return result
			end
		end
	end
	return nil
end

local function evaluate_condition(value, condition)
	-- Return true or false from applying a conditional expression to value,
	-- or throw an error if invalid.
	-- A very limited set of expressions is supported:
	--    v < 9
	--    v * 9 < 9
	-- where
	--    'v' is replaced with value
	--    9 is any number (as defined by Lua tonumber)
	--    '<' can also be '<=' or '>' or '>='
	-- In addition, the following form is supported:
	--    LHS and RHS
	-- where
	--    LHS, RHS = any of above expressions.
	local function compare(value, text)
		local arithop, factor, compop, limit = text:match('^%s*v%s*([*]?)(.-)([<>]=?)(.*)$')
		if arithop == nil then
			error('Invalid default expression.', 0)
		elseif arithop == '*' then
			factor = tonumber(factor)
			if factor == nil then
				error('Invalid default expression.', 0)
			end
			value = value * factor
		end
		limit = tonumber(limit)
		if limit == nil then
			error('Invalid default expression.', 0)
		end
		if compop == '<' then
			return value < limit
		elseif compop == '<=' then
			return value <= limit
		elseif compop == '>' then
			return value > limit
		elseif compop == '>=' then
			return value >= limit
		end
		error('Invalid default expression.', 0)  -- should not occur
	end
	local lhs, rhs = condition:match('^(.-%W)and(%W.*)')
	if lhs == nil then
		return compare(value, condition)
	end
	return compare(value, lhs) and compare(value, rhs)
end

---End code adapted from Module:Convert---------------------------------

local function strip(text)
	-- Return text with no leading/trailing whitespace.
	return text:match("^%s*(.-)%s*$")
end

local function empty(text)
	-- Return true if text is nil or empty (assuming a string).
	return text == nil or text == ''
end

-- Tables of units: k = unit code, v = unit table.
local units_index = {}  -- all units: normal, alias, per, combination, or multiple
local alias_index = {}  -- all aliases (to detect attempts to define more than once)
local per_index = {}    -- all "per" units (to detect attempts to define more than once)

local function get_unit(ucode)
	-- Look up unit code in our cache of units.
	if empty(ucode) then
		return nil
	end
	return lookup(units_index, ucode)
end

local overrides = {}  -- read from input for unit codes that should not be checked for a duplicate

local function insert_unique_unit(data, unit, index)
	-- After inserting any required built-in data, insert the unit into the
	-- data table and (if index not nil) add to index,
	-- but not if the unit code is already defined.
	local ucode = unit.unitcode
	local known = get_unit(ucode)
	if known and not overrides[ucode] then
		error('Unit code "' .. ucode .. '" has already been defined.', 0)
	end
	for item, t in pairs(specials.ucode) do
		unit[item] = t[ucode]
	end
	if index then
		index[ucode] = unit
	end
	table.insert(data, unit)
end

local function check_condition(condition)
	-- Return true if condition appears to be valid; otherwise return false.
	for _, value in ipairs({ 0, 0.1, 1, 1.1, 10, 100, 1000, 1e4, 1e5 }) do
		local success, result = pcall(evaluate_condition, value, condition)
		if not success then
			return false
		end
	end
	return true
end

local function check_default_expression(default, ucode)
	-- Return a numbered table of names present in param default
	-- (two names if an expression, or one name (param default) otherwise).
	-- Throw an error if a problem occurs.
	-- An expression uses pipe-delimited fields with 'v' representing
	-- the input value for the conversion.
	-- Example (suffix is optional): 'v < 120 ! small ! big ! suffix'
	-- returns { 'smallsuffix', 'bigsuffix' }.
	if default:find('!', 1, true) == nil then
		return { default }
	end
	local t = {}
	for item in (default .. '!'):gmatch('%s*(.-)%s*!') do
		t[#t+1] = item  -- split on '!', removing leading/trailing whitespace
	end
	if not (#t == 3 or #t == 4) then
		error('Default output "' .. default .. '" for unit "' .. ucode .. '" should have 2 or 3 "!".', 0)
	end
	local condition, default1, default2 = t[1], t[2], t[3]
	if #t == 4 then
		default1 = default1 .. t[4]
		default2 = default2 .. t[4]
	end
	if not check_condition(condition) then
		error('Invalid condition in default "' .. default .. '" for unit "' .. ucode .. '".', 0)
	end
	return { default1, default2 }
end

local function check_default(default, ucode, utype, unit_table)
	-- Check the given name (or expression) of a default output.
	-- Throw an error if a problem occurs.
	local done = {}
	for _, default in ipairs(check_default_expression(default, ucode)) do
		if done[default] then
			error('Default output "' .. default .. '" for unit "' .. ucode .. '" is repeated.', 0)
		end
		if default == ucode then
			error('Default output for unit "' .. ucode .. '" is the same unit.', 0)
		end
		local default_table = get_unit(default)
		if default_table == nil then
			error('Default output "' .. default .. '" for unit "' .. ucode .. '" is not defined.', 0)
		end
		if not (utype == unit_table.utype and utype == default_table.utype) then
			error('Default output "' .. default .. '" for unit "' .. ucode .. '" has wrong type.', 0)
		end
		done[default] = true
	end
end

local function check_all_defaults(units, maxerrors)
	-- Check each default in units and warn if needed.
	-- This is done after all input data has been processed.
	-- Throw an error if a problem occurs.
	local errors = collection()
	local missing = collection()  -- unitcodes with missing defaults
	for _, unit in ipairs(units) do
		if unit.shouldbe == nil and unit.combination == nil then
			-- This is a standard unit or an alias/per (not shouldbe, combo).
			-- An alias may have a default defined, but it is optional.
			local default = unit.default
			local ucode = unit.unitcode
			if empty(default) then
				if unit.target == nil then  -- unit should have a default
					missing:add(ucode)
				end
			else
				local ok, msg = pcall(check_default, default, ucode, unit.utype, unit)
				if not ok then
					errors:add(msg)
					if errors.n >= maxerrors then
						break
					end
				end
			end
		end
	end
	if errors.n > 0 then
		error(errors:join(), 0)
	end
	if missing.n > 0 then
		add_warning('Units with the following unit codes have no default output.')
		local limit = maxerrors
		for _, v in ipairs(missing) do
			limit = limit - 1
			if limit < 0 then
				add_warning('  (and more not shown)')
				break
			end
			add_warning('  ' .. v)
		end
	end
end

local function check_all_pers(units, maxerrors)
	-- Check each component of each "per" unit and warn if needed.
	-- In addition, add any required extra fields for some types of units.
	-- This is done after all input data has been processed.
	-- Throw an error if a problem occurs.
	local currency = { ['$'] = true, ['£'] = true }
	local errors = collection()
	local function errmsg(text)
		errors:add(text)
	end
	for _, unit in ipairs(units) do
		local per = unit.per
		if per then
			local ucode = unit.unitcode
			if #per ~= 2 then
				errmsg('Unit "' .. ucode .. '" does not have exactly 2 fields in the "per".')
			else
				local types = {}
				for i, v in ipairs(per) do
					if empty(v) then
						errmsg('Unit "' .. ucode .. '" has an empty field in the "per".')
					end
					if not currency[v] then
						local t = get_unit(v)
						if t then
							types[i] = t.utype
						else
							errmsg('Unit "' .. ucode .. '" has undefined unit code "' .. v .. '" in the "per".')
						end
					end
				end
				if specials.utype[unit.utype] == 'type_fuel_efficiency' then
					local expected = { type_volume = 1, type_length = 2 }
					local top_type = expected[specials.utype[types[1]]]
					local bot_type = expected[specials.utype[types[2]]]
					if top_type and bot_type and top_type ~= bot_type then
						unit.iscomplex = true
						if top_type == 1 then
							unit.invert = 1
						else
							unit.invert = -1
						end
					else
						errmsg('Unit "' .. ucode .. '" has invalid unit types for fuel efficiency.')
					end
				end
			end
		end
		if errors.n >= maxerrors then
			break
		end
	end
	if errors.n > 0 then
		error(errors:join(), 0)
	end
end

local function update_units(units, composites)
	-- Update some unit definitions with extra data defined in other sections.
	-- This is done after all input data has been processed.
	for _, unit in ipairs(units) do
		local comp = composites[unit.unitcode]
		if comp then
			unit.subdivs = '{ ' .. table.concat(comp.subdivs, ', ') .. ' }'
		end
	end
end

local function make_override(data)
	-- Return a function which, when called, stores a unit code is not to be
	-- checked for a duplicate. The table is stored in data (also a table).
	return function (utype, fields)
		local ucode = fields[1]
		if ucode == nil then
			error('Missing unit code for an override.', 0)
		end
		if data[ucode] then
			error('Override "' .. ucode .. '" is already defined.', 0)
		end
		data[ucode] = true
	end
end

local function make_default(data)
	-- Return a function which, when called, stores a table that defines a
	-- default output unit. The table is stored in data (also a table).
	local defaults_index = {}  -- to detect attempts to define a default twice
	return function (utype, fields)
		-- Store a table defining a unit.
		-- This is for a unit such as 'kg' that has a default output unit
		-- different from what is defined for the base unit ('g').
		-- Throw an error if a problem occurs.
		local ucode = fields[1]
		local default = fields[2]
		if empty(ucode) then
			error('Defaults section: no unit code specified.', 0)
		end
		if empty(default) then
			error('Defaults section: unit "' .. ucode .. '" has no default specified.', 0)
		end
		if #fields ~= 2 then
			error('Defaults section: unit "' .. ucode .. '" should have two fields only.', 0)
		end
		local unit_table = get_unit(ucode)
		if unit_table == nil then
			error('Defaults section: unit "' .. ucode .. '" is not defined.', 0)
		end
		local symbol = unit_table.defkey or unit_table.symbol
		if empty(symbol) then
			error('Defaults section: unit "' .. ucode .. '" must have a symbol.', 0)
		end
		check_default(default, ucode, utype, unit_table)
		if defaults_index[ucode] then
			error('Defaults section: unit "' .. ucode .. '" has already been specified.', 0)
		end
		defaults_index[ucode] = default
		table.insert(data, { symbol = symbol, default = default })
	end
end

local function clean_link(link, name)
	-- Return link, customary where:
	--   link = given link after removing any '[[...]]' wiki formatting
	--          and removing any leading '+' or '*' or '@';
	--   customary = 1 if leading '+', or 2 if '*' or 3 if '@', or nil
	--   (for extra "US" or "U.S." or "Imperial" customary units link).
	-- Result has leading/trailing whitespace removed, and is nil if empty
	-- or if link matches the name, if a name is specified.
	-- Exception: If the link is empty and the name starts with '[[',
	-- the link is stored as '' (for a unit name which is always linked).
	-- If the resulting link is nil, no link field is stored, and
	-- if a link is required, it will be set from the unit's name.
	if empty(link) then
		return (name:sub(1, 2) == '[[') and '' or nil
	end
	local prefixes = { ['+'] = 1, ['*'] = 2, ['@'] = 3 }
	local customary = prefixes[link:sub(1, 1)]
	if customary then
		link = strip(link:sub(2))
	end
	if link:sub(1, 2) == '[[' then
		link = link:sub(3)
	end
	if link:sub(-2) == ']]' then
		link = link:sub(1, -3)
	end
	link = strip(link)
	if link:sub(1, 1) == '[' or link:sub(-1) == ']' then
		error('Link "' .. link .. '" has wrong number of brackets.', 0)
	end
	if link == '' then
		link = nil
	elseif name then
		local l = link:sub(1, 1):lower() .. link:sub(2)
		local n = name:sub(1, 1):lower() .. name:sub(2)
		if l == n then
			link = nil  -- link == name, ignoring case of first letter
		end
	end
	return link, customary
end

local function make_link(data)
	-- Return a function which, when called, stores a table that defines a
	-- link exception. The table is stored in data (also a table).
	local links_index = {}  -- to detect attempts to define a link twice
	return function (utype, fields)
		-- Store a table defining a unit.
		-- This is for a unit such as 'kg' that has a linked article
		-- different from what is defined for the base unit ('g').
		-- Throw an error if a problem occurs.
		local ucode = fields[1]
		local link = clean_link(fields[2])
		if empty(ucode) then
			error('Missing unit code for a link.', 0)
		end
		if empty(link) then
			error('No link defined for unit "' .. ucode .. '".', 0)
		end
		if #fields ~= 2 then
			error('Row for unit "' .. ucode .. '" link should have two fields only.', 0)
		end
		local unit_table = get_unit(ucode)
		if unit_table == nil then
			error('Unit code "' .. ucode .. '" for a link is not defined.', 0)
		end
		if utype ~= unit_table.utype then
			error('Link exception "' .. ucode .. '" has wrong type.', 0)
		end
		local symbol = unit_table.symbol
		if empty(symbol) then
			error('Unit code "' .. ucode .. '" for a link must have a symbol.', 0)
		end
		if links_index[ucode] then
			error('Link exception "' .. ucode .. '" is already defined.', 0)
		end
		links_index[ucode] = link
		table.insert(data, { symbol = symbol, link = link })
	end
end

local function clean_scale(scale)
	-- Return cleaned scale as a string, after evaluating any expression.
	-- It would be better to retain scale expressions like "5/9" so that
	-- the expression is evaluated on the server and maintains the full
	-- resolution of the server. However, there are many such expressions
	-- in the table of all units, and it seems pointless to require the
	-- server to evaluate all of them just to do one convert.
	if empty(scale) then
		error('Missing scale.', 0)
	end
	assert(type(scale) == 'string', 'Bug: scale has an unexpected type')
	scale = string.gsub(scale, ',', '')  -- remove comma separators
	if tonumber(scale) then  -- not an expression
		return scale
	end
	local status, value = pcall(evaluate, scale)
	if not (status and type(value) == 'number') then
		error('Scale expression is invalid: "' .. scale .. '".', 0)
	end
	local result = string.format('%.17g', value)
	if result:find(bad_number_char, 1, true) then
		error('Scale expression gives an invalid value: "' .. scale .. '".', 0)
	end
	-- Omit redundant zeros from results like '1.2e-005'.
	-- Do not bother looking for results like '1.2e+005' as none occur in practice.
	local lhs, zeros, rhs = result:match('^(.-e%-)(0+)(.*)')
	if zeros then
		result = lhs .. rhs
	end
	return result
end

local function add_alias_optional_fields(unit, start, fields, target)
	-- Inspect fields[i] for i = start, start+1 ..., and extract any
	-- definitions appropriate for an alias or "per", and add them to unit.
	-- For an alias, target is a valid unit; for a "per", target is nil.
	-- Throw error if encounter an invalid entry.
	for i = start, #fields do
		local field = fields[i]
		if not empty(field) then
			local lhs, rhs = field:match('^%s*(.-)%s*=%s*(.-)%s*$')
			local good
			if not empty(rhs) then
				for _, item in ipairs({ 'sp', 'default', 'link', 'multiplier', 'symbol', 'symlink' }) do
					if lhs == item then
						if item == 'sp' then
							if rhs == 'us' then
								unit.sp_us = true
								good = true
							end
						elseif item == 'link' then
							local tlink
							if target then
								tlink = target[item]
							end
							local link, customary = clean_link(rhs, tlink)
							if link then
								unit[item] = link
							end
							if customary then
								unit.customary = customary
							end
							good = true
						elseif item == 'symlink' then
							local pos1 = rhs:find('[[', 1, true)
							local pos2 = rhs:find(']]', 1, true)
							if not (pos1 and pos2 and (pos1 < pos2)) then
								error('Alias "' .. unit.unitcode .. '" must include a wikilink ("[[...]]") in the symlink text.', 0)
							end
							unit.symlink = rhs
							good = true
						elseif item == 'multiplier' then
							if not tonumber(rhs) then
								error('Alias "' .. unit.unitcode .. '" has multiplier "' .. rhs .. '" which is not a number.', 0)
							end
							unit[item] = rhs
							good = true
						else
							if target and rhs == target[item] then
								error('Should omit "' .. item .. '" for alias "' .. unit.unitcode .. '" because it is the same as its target.', 0)
							end
							unit[item] = rhs
							good = true
						end
						break
					end
				end
			end
			if not good then
				error('Alias has invalid text in field "' .. field .. '".', 0)
			end
		end
	end
end

local function make_alias(fields, ucode, utype, symbol)
	-- Return a new alias unit, or return nil if symbol is not already
	-- defined as the unit code of the target unit.
	-- Throw an error if invalid.
	local target = get_unit(symbol)
	if not target then
		return nil
	end
	local unit = { unitcode = ucode, utype = utype, target = symbol }
	add_alias_optional_fields(unit, 3, fields, target)
	if alias_index[ucode] then
		error('Alias "' .. ucode .. '" already defined.', 0)
	else
		alias_index[ucode] = unit
	end
	if target.utype ~= utype then
		error('Target of alias "' .. ucode .. '" has wrong type.', 0)
	end
	return unit
end

local function make_per(fields, ucode, utype, symbol)
	-- Return a new "per" unit, or return nil if symbol is not of form "x/y".
	-- Throw an error if invalid.
	-- The top, bottom unit codes are checked later, after all units are defined.
	local top, bottom = symbol:match('^(.-)/(.*)$')
	if not top then
		return nil
	end
	local unit = { unitcode = ucode, utype = utype, per = { strip(top), strip(bottom) } }
	add_alias_optional_fields(unit, 3, fields)
	if per_index[ucode] then
		error('Per unit "' .. ucode .. '" already defined.', 0)
	else
		per_index[ucode] = unit
	end
	return unit
end

local function make_unit(data)
	-- Return a function which, when called, stores a table that defines a
	-- single unit. The table is stored in data (also a table).
	local fieldnames = {
		-- Fields in the Conversions section are assumed to be in the following order.
		'unitcode',
		'symbol',
		'sym_us',
		'scale',
		'extra',
		'name1',
		'name2',
		'name1_us',
		'name2_us',
		'prefixes',
		'default',
		'link',
	}
	return function (utype, fields)
		-- Store a table defining a unit.
		-- Throw an error if a problem occurs.
		local ucode, symbol = fields[1], fields[2]
		if empty(utype) then
			error('Missing unit type.', 0)
		end
		if empty(ucode) then
			error('Missing unit code.', 0)
		end
		if empty(symbol) then
			error('Missing symbol.', 0)
		end
		local prefix = symbol:sub(1, 1)
		if prefix == '~' or prefix == '=' or prefix == '!' or prefix == '*' then
			if symbol:sub(1, 2) == '==' then
				prefix = symbol:sub(1, 2)
			end
			symbol = strip(symbol:sub(#prefix + 1))  -- omit prefix and any following whitespace
			fields[2] = symbol
		else
			prefix = nil  -- not a valid prefix
		end
		if prefix == '=' or prefix == '==' then
			-- ucode is an alias (a fake unit code used in a convert template), or
			-- defines a "per" unit like "$/acre" or "BTU/h".
			-- For an alias, symbol is the unit code of the actual unit.
			-- For a "per", symbol is of form "x/y" where x and y are unit codes,
			-- or x is a recognized currency symbol and y is a unit code.
			-- Checking that x and y are valid is deferred until all units have
			-- been defined so, for example, "BTU/h" can be defined before "h".
			local unit
			if prefix == '=' then
				unit = make_alias(fields, ucode, utype, symbol)
			else
				unit = make_per(fields, ucode, utype, symbol)
			end
			if not unit then
				-- Do not define an alias in terms of another alias.
				error('Primary unit must be defined before alias =' .. symbol, 0)
			end
			insert_unique_unit(data, unit, units_index)
			return
		elseif prefix == '!' then
			-- ucode may be incorrectly entered as a unit code.
			-- symbol is a message saying what unit code should be used.
			local unit = { unitcode = ucode, shouldbe = symbol }
			insert_unique_unit(data, unit, nil)
			return
		end
		-- Make the unit.
		local unit = { utype = utype }
		for i, name in ipairs(fieldnames) do
			if not empty(fields[i]) then
				unit[name] = fields[i]
			end
		end
		-- Remove redundancy from unit.
		if unit.sym_us == symbol then
			unit.sym_us = nil
		end
		local prefixes = unit.prefixes
		if empty(prefixes) then
			prefixes = nil
		end
		local name1, name2 = unit.name1, unit.name2
		if name1 then
			if name1 == symbol and not prefixes then
				-- A unit which takes an SI prefix must not have a nil name because,
				-- for example, the name for "kW" = "kilo" .. "watt" (name for "W").
				-- The "not prefixes" test is needed for bn.wikipedia where the
				-- watt unit has the same name and symbol.
				unit.name1 = nil
			end
		else
			name1 = symbol
		end
		if name2 then
			if plural_suffix ~= '' and name2 == name1 .. plural_suffix then
				unit.name2 = nil
			end
		end
		local name1_us, name2_us = unit.name1_us, unit.name2_us
		if name1_us then
			if name1_us == name1 then
				unit.name1_us = nil
			end
		else
			name1_us = name1
		end
		if name2_us then
			if plural_suffix ~= '' and name2_us == name1_us .. plural_suffix then
				unit.name2_us = nil
			end
		end
		-- Other changes to unit.
		unit.scale = clean_scale(unit.scale)
		local extra = unit.extra
		if not empty(extra) then
			-- Set appropriate fields for a unit that needs more than a simple
			-- multiplication by a ratio of unit scales to convert values.
			unit.iscomplex = true
			if extra == 'volume/length' then
				unit.invert = 1
			elseif extra == 'length/volume' then
				unit.invert = -1
			elseif specials.utype[utype] == 'type_temperature' then
				unit.offset = extra
			elseif extra == 'invert' then
				unit.invert = -1
			else
				unit.builtin = extra
			end
		end
		if prefix == '~' then
			-- Magic code for units like "acre" where the symbol is not really a
			-- symbol, and output should use the singular or plural name instead.
			unit.usename = 1
		elseif prefix == '*' then
			-- Magic code for units like "pitch" which have a symbol that is the same as
			-- another unit with entries defined in the default or link exceptions tables.
			unit.defkey = ucode  -- key for default exceptions
			unit.linkey = ucode  -- key for link exceptions
		end
		local name_for_link
		if prefixes then
			-- If set name_for_link = name1 for a prefixed unit like g, then the
			-- link is "kilogram" for kg, and "yottagram" for Yg, and so on for all
			-- prefixes. That might be good for some units, but not all.
			if prefixes == 'SI' then
				unit.prefixes = 1
			elseif prefixes == 'SI2' then
				unit.prefixes = 2
			elseif prefixes == 'SI3' then
				unit.prefixes = 3
			else
				error('Unknown prefix: "' .. prefixes .. '".', 0)
			end
		else
			name_for_link = name1
		end
		unit.link, unit.customary = clean_link(unit.link, name_for_link)
		if prefixes then
			local name1, name1_us = unit.name1, unit.name1_us  -- after redundancy removed
			if name1 == nil then
				error('Unit with Prefix set must include Name.', 0)
			end
			if unit.name2 or unit.name2_us then
				error('Unit with Prefix set must have plural name that is Name + "s".', 0)
			end
			if unit.sym_us then
				error('Unit with Prefix set must have same Symbol and US symbol.', 0)
			end
			local pos = name1:find('%s', 1, true)
			local pos_us
			if name1_us then
				pos_us = name1_us:find('%s', 1, true)
			else
				pos_us = pos
			end
			if pos ~= pos_us then
				-- The only cases with "%s" are "square %smetre" and "cubic %smetre" (or "meter" for US),
				-- so do not bother having a procedure to handle different positions of "%s".
				error('Unit with Prefix set and "%s" in Name and US name, must have the "%s" at the same position.', 0)
			end
			if pos then
				if pos == 1 then
					-- Omit leading "%s"; convert will assume position = 1 since not stored.
					unit.name1 = name1:sub(3)
					if name1_us then
						unit.name1_us = name1_us:sub(3)
					end
				else
					-- Omit "%s" and store its position.
					unit.name1 = name1:sub(1, pos - 1) .. name1:sub(pos + 2)
					if name1_us then
						unit.name1_us = name1_us:sub(1, pos - 1) .. name1_us:sub(pos + 2)
					end
					unit.prefix_position = pos
				end
			end
			for _, name in ipairs({ 'symbol', 'name1', 'name1_us' }) do
				unit['_' .. name] = unit[name]
				unit[name] = nil  -- force call to __index metamethod so any SI prefix can be handled
			end
		end
		for name, v in pairs(unit) do
			-- Reject if a string field includes "%s" (should never occur after above).
			if type(v) == 'string' and v:find('%s', 1, true) then
				error('Field "' .. name .. '" must not contain "%s".', 0)
			end
		end
		insert_unique_unit(data, unit, units_index)
	end
end

local function make_combination(data)
	-- Return a function which, when called, stores a table that defines a
	-- single combination unit. The table is stored in data (also a table).
	return function (utype, fields)
		-- Store a table defining a unit.
		-- This is for a combination unit that specifies more than one output.
		-- The target units must be defined first.
		-- Throw an error if a problem occurs.
		local unit = { utype = utype, combination = {} }
		for i, v in ipairs(fields) do
			if i == 1 then  -- unitcode
				if v == '' then
					error('Missing unit code for a combination.', 0)
				end
				unit.unitcode = v
			elseif v == '' then
				-- Ignore empty fields.
			else
				local target = get_unit(v)
				if target == nil then
					error('Unit "' .. v .. '" in combination "' .. unit.unitcode .. '" not defined.', 0)
				end
				if target.utype ~= utype then
					error('Unit "' .. v .. '" in combination "' .. unit.unitcode .. '" has wrong type.', 0)
				end
				table.insert(unit.combination, v)
			end
		end
		if #unit.combination < 2 then
			local msg
			if #unit.combination == 0 then
				msg = 'No units specified for combination "' .. unit.unitcode .. '"'
			else
				msg = 'Only one unit specified for combination "' .. unit.unitcode .. '"'
			end
			error(msg, 0)
		end
		insert_unique_unit(data, unit, units_index)
	end
end

local function reversed(t)
	-- Return a numbered table in reverse order.
	local reversed, count = {}, #t
	for i = 1, count do
		reversed[i] = t[count + 1 - i]
	end
	return reversed
end

local function make_inputmultiple(data)
	-- Return a function which, when called, stores a table that defines a
	-- single composite (multiple input) unit. The table is stored in data (also a table).
	return function (utype, fields)
		-- Set or update an entry in the data table to record that a unit
		-- accepts subdivisions to make a composite input unit like '|2|ft|6|in'.
		-- The target units must be defined first.
		-- Throw an error if a problem occurs.
		local unitcode  -- dummy code required for simplicity, but which is not used in output
		local alternate_code  -- an alternative unit code can be specified to replace convert input
		local fixed_name  -- a fixed name can be specified to replace the unit's normal symbol/name
		local default_code
		local ucodes, scales = {}, {}
		for i, v in ipairs(fields) do
			-- 1=composite, 2=ucode1, 3=ucode2, 4=default, 5=alternate, 6=name
			if i == 1 then
				if v == '' then
					error('Missing unit code for a composite.', 0)
				end
				unitcode = v
			elseif 2 <= i and i <= 5 then
				if not (i == 5 and v == '') then
					local target = get_unit(v)
					if target == nil then
						error('Unit "' .. v .. '" in composite "' .. unitcode .. '" not defined.', 0)
					end
					if target.utype ~= utype then
						error('Unit "' .. v .. '" in composite "' .. unitcode .. '" has wrong type.', 0)
					end
					if i < 4 then
						table.insert(ucodes, v)
						table.insert(scales, target.scale)
					elseif i == 4 then
						default_code = v
					else
						if scales[#scales] ~= target.scale then
							error('Alternate unit "' .. v .. '" in composite "' .. unitcode .. '" has wrong scale.', 0)
						end
						alternate_code = v
					end
				end
			elseif i == 6 then
				if v ~= '' then
					fixed_name = v
				end
			else
				error('Composite "' .. unitcode .. '" has too many fields.', 0)
			end
		end
		if #ucodes ~= 2 then
			error('Composite "' .. unitcode .. '" must specify exactly two unit codes.', 0)
		end
		if default_code == nil then
			error('Composite "' .. unitcode .. '" must specify a default unit code.', 0)
		end
		-- Component units must be specified from most-significant to least-significant,
		-- and each ratio of a pair of scales must be very close to an integer.
		-- Currently, there will be exactly two scales and one ratio.
		local ratios, count = {}, #scales
		for i = 1, count do
			local scale = tonumber(scales[i])
			if scale == nil or scale <= 0 then
				error('Composite "' .. unitcode .. '" has a component with an invalid scale, "' .. scales[i] .. '".', 0)
			end
			scales[i] = scale
		end
		for i = 1, count - 1 do
			local ratio = scales[i] / scales[i + 1]
			local rounded = math.floor(ratio + 0.5)
			if rounded < 2 then
				error('Composite "' .. unitcode .. '" has components in wrong order or with invalid scales.', 0)
			end
			if math.abs(ratio - rounded)/ratio > 1e-6 then
				error('Composite "' .. unitcode .. '" has components where scale ratios are not integers.', 0)
			end
			ratios[i] = rounded
		end
		local text = { tostring(ratios[1]) }
		local function add_text(key, value)
			table.insert(text, string.format('%s = %q', key, value))
		end
		if default_code then
			add_text('default', default_code)
		end
		if alternate_code then
			add_text('unit', alternate_code)
		end
		if fixed_name then
			add_text('name', fixed_name)
		end
		local subdiv = string.format('["%s"] = { %s }', ucodes[2], table.concat(text, ', '))
		local main_code = ucodes[1]
		local item = data[main_code]
		if item then
			table.insert(item.subdivs, subdiv)
		else
			data[main_code] = { subdivs = { subdiv } }
		end
	end
end

local function make_outputmultiple(data)
	-- Return a function which, when called, stores a table that defines a
	-- single multiple output unit. The table is stored in data (also a table).
	return function (utype, fields)
		-- Store a table defining a unit.
		-- This is for a multiple unit like 'ydftin' (result in yards, feet, inches).
		-- The target units must be defined first.
		-- Throw an error if a problem occurs.
		local unit = { utype = utype }
		local ucodes, scales = {}, {}
		for i, v in ipairs(fields) do
			if i == 1 then  -- unitcode
				if v == '' then
					error('Missing unit code for a multiple.', 0)
				end
				unit.unitcode = v
			elseif v == '' then
				-- Ignore empty fields.
			else
				local target = get_unit(v)
				if target == nil then
					error('Unit "' .. v .. '" in multiple "' .. unit.unitcode .. '" not defined.', 0)
				end
				if target.utype ~= utype then
					error('Unit "' .. v .. '" in multiple "' .. unit.unitcode .. '" has wrong type.', 0)
				end
				table.insert(ucodes, v)
				table.insert(scales, target.scale)
			end
		end
		if #ucodes < 2 then
			local msg
			if #ucodes == 0 then
				msg = 'No units specified for multiple "' .. unit.unitcode .. '"'
			else
				msg = 'Only one unit specified for multiple "' .. unit.unitcode .. '"'
			end
			error(msg, 0)
		end
		-- Component units must be specified from most-significant to least-significant
		-- (so scale values will be in descending order),
		-- and each ratio of a pair of scales must be very close to an integer.
		-- The componenets and ratios are stored in reverse order (least significant first).
		-- This script stores a unit scale as a string (might be an expression like "5/9"),
		-- but scales in a multiple are handled as numbers (should never be expressions).
		local ratios, count = {}, #scales
		for i = 1, count do
			local scale = tonumber(scales[i])
			if scale == nil or scale <= 0 then
				error('Multiple "' .. unit.unitcode .. '" has a component with an invalid scale, "' .. scales[i] .. '".', 0)
			end
			scales[i] = scale
		end
		for i = 1, count - 1 do
			local ratio = scales[i] / scales[i + 1]
			local rounded = math.floor(ratio + 0.5)
			if rounded < 2 then
				error('Multiple "' .. unit.unitcode .. '" has components in wrong order or with invalid scales.', 0)
			end
			if math.abs(ratio - rounded)/ratio > 1e-6 then
				error('Multiple "' .. unit.unitcode .. '" has components where scale ratios are not integers.', 0)
			end
			ratios[i] = rounded
		end
		unit.combination = reversed(ucodes)
		unit.multiple = reversed(ratios)
		insert_unique_unit(data, unit, units_index)
	end
end

-- To make updating Module:Convert/data easier, this script inserts a preamble
-- and a postamble so the result can be used to replace the whole page.
local data_preamble = [=[
-- Conversion data used by [[Module:Convert]] which uses mw.loadData() for
-- read-only access to this module so that it is loaded only once per page.
--
-- These data tables follow:
--   all_units           all properties for a unit, including default output
--   default_exceptions  exceptions for default output ('kg' and 'g' have different defaults)
--   link_exceptions     exceptions for links ('kg' and 'g' have different links)
--
-- These tables are generated by a script which reads the wikitext of a page that
-- documents the required properties of each unit; see [[Module:Convert/doc]].
]=]

local data_postamble = [=[
return {
	all_units = all_units,
	default_exceptions = default_exceptions,
	link_exceptions = link_exceptions,
}]=]

local out_unit_prefix = [[
---------------------------------------------------------------------------
-- Do not change the data in this table because it is created by running --
-- a script that reads the wikitext from a wiki page (see note above).   --
---------------------------------------------------------------------------
local all_units = {]]

local out_unit_suffix = [[
}
]]

local out_default_prefix = [[
---------------------------------------------------------------------------
-- Do not change the data in this table because it is created by running --
-- a script that reads the wikitext from a wiki page (see note above).   --
---------------------------------------------------------------------------
local default_exceptions = {
	-- Prefixed units with a default different from that of the base unit.
	-- Each key item is a prefixed symbol (unitcode for engineering notation).]]

local out_default_suffix = [[
}
]]

local out_default_item = [[
	["{symbol}"] = "{default}",]]

local out_link_prefix = [[
---------------------------------------------------------------------------
-- Do not change the data in this table because it is created by running --
-- a script that reads the wikitext from a wiki page (see note above).   --
---------------------------------------------------------------------------
local link_exceptions = {
	-- Prefixed units with a linked article different from that of the base unit.
	-- Each key item is a prefixed symbol (not unitcode).]]

local out_link_suffix = [[
}
]]

local out_link_item = [[
	["{symbol}"] = "{link}",]]

local combination_specification = {     -- pure combination like 'm ft', or a multiple like 'ftin'
	'combination',
	'multiple',
	'utype',
}

local alias_specification = {
	'target',
	'symbol',
	'sp_us',
	'default',
	'link',
	'symlink',
	'customary',
	'multiplier',
}

local per_specification = {
	'per',
	'symbol',
	'sp_us',
	'utype',
	'invert',
	'iscomplex',
	'default',
	'link',
	'symlink',
	'customary',
	'multiplier',
}

local shouldbe_specification = {
	'shouldbe',
}

local unit_specification = {
	'_name1',
	'_name1_us',
	'_symbol',
	'prefix_position',
	'name1',
	'name1_us',
	'name2',
	'name2_us',
	'symbol',
	'sym_us',
	'usename',
	'usesymbol',
	'utype',
	'alttype',
	'builtin',
	'scale',
	'offset',
	'invert',
	'iscomplex',
	'istemperature',
	'exception',
	'prefixes',
	'default',
	'subdivs',
	'defkey',
	'linkey',
	'link',
	'customary',
	'sp_us',
}

local no_quotes = {
	combination = true,
	customary = true,
	multiple = true,
	multiplier = true,
	offset = true,
	per = true,
	prefix_position = true,
	scale = true,
	subdivs = true,
}

local function add_unit_lines(results, unit, spec)
	-- Add lines of Lua source to define a unit to the results collection.
	local function add_line(line)
		-- Had planned to replace sequences of spaces with 4-column tabs here
		-- (because the CodeEditor now assumes the use of such tabs).
		-- However, 4-column tabs are only visible when editing Module:Convert
		-- with browser scripting and the CodeEditor enabled, and that is rare.
		-- Module:Convert would usually be viewed (with 8-column tabs), and some
		-- indents would be messed up unless 8-column tabs are used. Therefore,
		-- have decided to simply replace 8 spaces at start of line with a single
		-- tab which reduces the size of the module, and is correct for viewing.
		if line:sub(1, 8) == string.rep(' ', 8) then
			line = '\t' .. line:sub(9)
		end
		results:add(line)
	end
	local first_item = '    ["' .. unit.unitcode .. '"] = {'
	local last_item  = '    },'
	add_line(first_item)
	for _, k in ipairs(spec) do
		local v = unit[k]
		if v then
			local want_quotes = (type(v) == 'string' and not no_quotes[k])
			if type(v) == 'boolean' then
				v = tostring(v)
			elseif type(v) == 'number' or k == 'scale' then
				-- Replace results like '1e-006' with '1e-6'.
				v = string.gsub(tostring(v), '(e[+-])0+([1-9].*)', '%1%2', 1)
			elseif type(v) ~= 'string' then
				error('Fatal error: unknown data type for ' .. unit.unitcode)
			end
			local fmt = string.format('%8s%%-9s= %%%s,', '', want_quotes and 'q' or 's')
			add_line(fmt:format(k, v))
		end
	end
	add_line(last_item)
end

local function numbered_table_as_string(data, unit)
	local t = {}
	for _, v in ipairs(data) do
		if type(v) == 'string' then
			table.insert(t, '"' .. v .. '"')
		elseif type(v) == 'number' then
			table.insert(t, tostring(v))
		else
			error('Fatal error: unknown data type for ' .. unit.unitcode)
		end
	end
	return '{ ' .. table.concat(t, ', ') .. ' }'
end

local function extract_heading(line)
	-- Return n, s where n = heading level number (nil if none), and
	-- s = heading text (with leading/trailing whitespace removed).
	local pattern = '^(==+)%s*(.-)%s*(==+)%s*$'
	local before, heading, after = line:match(pattern)
	if heading and #heading > 0 then
		-- Don't bother checking if before == after.
		return #before, heading
	end
end

local function fields(line)
	-- Return a numbered table of fields split from line.
	-- Items are delimited by "||".
	-- Each item has leading/trailing whitespace removed, and any encoded pipe
	-- characters are decoded.
	-- The second field (for symbol when processing units) is adjusted to
	-- remove any "colspan" at the front of lines like:
	-- "| unitcode || colspan="11" | !Text to display for an error message".
	local t = {}
	line = line .. "||"  -- to get last field
	for item in line:gmatch("%s*(.-)%s*||") do
		table.insert(t, (item:gsub('&#124;', '|')))
	end
	local cleaned = t[2]:match('^%s*colspan%s*=.-|%s*(.*)$')
	if cleaned then
		t[2] = cleaned
	end
	return t
end

local function prepare_section(maker, lines, section, maxerrors, is_sandbox)
	-- Process the first level-two section with the given section name
	-- in the given table of lines of wikitext.
	-- If successful, maker inserts each item into a table.
	-- Otherwise, an error is thrown.
	local skip = true
	local errors = collection()
	local utype  -- unit type (from level-three heading)
	local nbsp = '\160'  -- nonbreaking space is utf-8 encoded as hex c2 a0
	for linenumber, line in ipairs(lines) do
		if skip then
			-- Skip down to and including the starting heading.
			local level, heading = extract_heading(line)
			if level == 2 and heading == section then
				skip = false
			end
		else
			-- Accummulate unit definitions.
			local c1 = line:sub(1, 1)
			local c2 = line:sub(2, 2)
			if c1 == '|' and not (c2 == '-' or c2 == '}') then
				if empty(utype) then
					error('No level 3 heading before: ' .. line, 0)
				end
				if line:find(nbsp, 1, true) then
					-- For example, "acre ft" does not work if it contains nbsp.
					add_warning('Line ' .. linenumber .. ' contains a nonbreaking space')
				end
				local ok, msg = pcall(maker, utype, fields(line:sub(2)))
				if not ok then
					if msg:sub(-1) == '.' then msg = msg:sub(1, -2) end
					errors:add(msg .. ' (line ' .. linenumber .. ').')
					if errors.n >= maxerrors then
						break
					end
				end
			else
				local level, heading = extract_heading(line)
				if level == 3 then
					utype = heading:lower()
				elseif level == 2 then
					break
				end
			end
		end
	end
	if skip and not is_sandbox then
		error('Level 2 heading "' .. section ..'" not found.', 0)
	end
	if errors.n > 0 then
		error(errors:join(), 0)
	end
end

local function get_page_lines(page_title)
	-- Read the wikitext of the page at the given title; split the text into
	-- lines with leading and trailing space removed from each line.
	-- Return a numbered table of the lines, or throw an error.
	if empty(page_title) then
		error('Need title of page with unit definitions.', 0)
	end
	local t = mw.title.new(page_title)
	if t then
		local content = t:getContent()
		if content then
			if content:sub(-1) ~= '\n' then
				content = content .. '\n'
			end
			local lines = collection()
			for line in string.gmatch(content, '[\t ]*(.-)[\t\r ]*\n') do
				lines:add(line)
			end
			return lines
		end
	end
	error('Could not read wikitext from "[[' .. page_title .. ']]".', 0)
end

local function prepare_data(conversion_data_title, maxerrors, is_sandbox)
	-- Read the page of conversion data, and process the wikitext
	-- in the sections with wanted level-two headings.
	-- Return units, defaults, links (three tables).
	-- Throw an error if a problem occurs.
	local composites, defaults, links, units = {}, {}, {}, {}
	local sections = {
		-- LATER: Section names must be in English to match following text.
		{ 'Overrides'       , make_override      , overrides  },
		{ 'Conversions'     , make_unit          , units      },
		{ 'Output multiples', make_outputmultiple, units      },
		{ 'Combinations'    , make_combination   , units      },
		{ 'Input multiples' , make_inputmultiple , composites },  -- after all units defined so default will be defined
		{ 'Defaults'        , make_default       , defaults   },
		{ 'Links'           , make_link          , links      },
	}
	local lines = get_page_lines(conversion_data_title)
	for _, section in ipairs(sections) do
		local heading = section[1]
		local maker = section[2](section[3])
		prepare_section(maker, lines, heading, maxerrors, is_sandbox)
	end
	check_all_defaults(units, maxerrors)
	check_all_pers(units, maxerrors)
	update_units(units, composites)
	return units, defaults, links
end

local function _makeunits(results, conversion_data_title, text_module_title, is_sandbox)
	-- Read the wikitext for the conversion data.
	-- Append output to given results collection, or throw error if a problem.
	local text_code = require(text_module_title)
	SIprefixes = text_code.SIprefixes
	eng_scales = text_code.eng_scales
	local translation = text_code.translation_table
	if translation then
		if translation.plural_suffix then
			plural_suffix = translation.plural_suffix
		end
		local ts = translation.specials
		if ts then
			if ts.utype then
				specials.utype = ts.utype
			end
			if ts.ucode then
				specials.ucode = ts.ucode
			end
		end
	end
	local units, defaults, links = prepare_data(conversion_data_title, 20, is_sandbox)
	if data_preamble then
		results:add(data_preamble)
	end
	results:add(out_unit_prefix)
	for _, unit in ipairs(units) do
		local spec
		if unit.target then
			spec = alias_specification
		elseif unit.per then
			spec = per_specification
			unit.per = numbered_table_as_string(unit.per, unit)
		elseif unit.shouldbe then
			spec = shouldbe_specification
		elseif unit.combination then
			spec = combination_specification
			unit.combination = numbered_table_as_string(unit.combination, unit)
			if unit.multiple then
				unit.multiple = numbered_table_as_string(unit.multiple, unit)
			end
		else
			spec = unit_specification
		end
		add_unit_lines(results, unit, spec)
	end
	results:add(out_unit_suffix)
	if #defaults > 0 or not is_sandbox then
		results:add(out_default_prefix)
		for _, unit in ipairs(defaults) do
			results:add((out_default_item:gsub('{([%w_]+)}', unit)))
		end
		results:add(out_default_suffix)
	end
	if #links > 0 or not is_sandbox then
		results:add(out_link_prefix)
		for _, unit in ipairs(links) do
			results:add((out_link_item:gsub('{([%w_]+)}', unit)))
		end
		results:add(out_link_suffix)
	end
	if data_postamble then
		results:add(data_postamble)
	end
end

local function makeunits(frame)
	local is_sandbox
	local args = frame.args
	local conversion_data_title = 'Module:Convert/documentation/conversion data/doc'
	local text_module_title = args[2] or 'Module:Convert/text'
	local title = args[1]
	if title and conversion_data_title ~= title then
		conversion_data_title = title
		is_sandbox = true
		data_preamble = nil
		data_postamble = nil
		out_unit_prefix = 'local all_units = {'
		out_unit_suffix = '}'
		out_default_prefix = '\nlocal default_exceptions = {'
		out_default_suffix = '}'
		out_default_item = '\t["{symbol}"] = "{default}",'
		out_link_prefix = '\nlocal link_exceptions = {'
		out_link_suffix = '}'
		out_link_item = '\t["{symbol}"] = "{link}",'
	end
	local results = collection()
	local ok, msg = pcall(_makeunits, results, conversion_data_title, text_module_title, is_sandbox)
	if not ok then
		results:add('Error:\n')
		results:add(msg)
	end
	local warn = ''
	if warnings.n > 0 then
		warn = 'Warning:\n\n' .. warnings:join() .. '\n\n'
	end
	-- Pre tags returned by a module are html tags, not like wikitext <pre>...</pre>.
	-- The following renders the text as is, and preserves tab characters.
	return '<pre>\n' .. mw.text.nowiki(warn .. results:join()) .. '\n</pre>\n'
end

return { makeunits = makeunits }