Jump to content

Module:Convert/makeunits

From Wikipedia, the free encyclopedia
This is an old revision of this page, as edited by Johnuniq (talk | contribs) at 03:20, 9 October 2013 (something (mw.text.nowiki?) is only preserving the first tab character on each line and is misaligning the second tab, so use spaces second tab for less ugly result). The present address (URL) is a permanent link to this revision, which may differ significantly from the current revision.

-- This module generates the wiktext required at [[Module:Convert/data]]
-- by reading and processing the wikitext of the unit definitions at
-- [[Module:Convert/documentation/conversion data/doc]].
--
-- Usage: Put the following line (with nothing else) in a sandbox:
-- {{#invoke:convert/makeunits|makeunits}}
-- Previewing the sandbox should display the required wikitext.
--
-- It is also possible to use the following where PAGE is replaced
-- with the title of the page holding the unit definitions.
-- {{#invoke:convert/makeunits|makeunits|PAGE}}
--
-- LATER:
-- If support additional currencies, need to update table:
--     local currency = { ['$'] = true, ['£'] = true }
--
-- Script method:
-- * Read lines, ignoring everything before "== Conversions ==".
-- * Process the following lines:
--   * Find next level-3 heading like "=== Length ===".
--   * Parse each following line starting with "|"
--     (but ignore lines starting with "|-" or "|}".
--   * Split such lines into fields (delimiter "||") and trim
--     leading/trailing whitespace from each field.
--     Remove any "colspan" at front of second field (symbol).
--   * Remove thousand separators (commas) from the scale field.
--     If the scale is a number, do not change it.
--     Otherwise, it should be an expression like "5/9", in
--     which case it is replaced by the value of the expression.
--   * Remove wiki formatting '[[...]]' from the link field.
--   * Remove redundant fields from the unit to reduce size of data table.
--   * Create alternative forms of a unit such as an alias or a combination.
-- * Stop processing when encounter end of text or a line starting
--   with a level-2 heading ("==" but not "===").
-- * Repeat above for each heading listed at prepare_data().
-- * Output Lua source for the units table.
--
-- -- Output has the following form.
-- local all_units = {
--     ["unitcode"] = {                        -- standard format
--         name1    = "singular name",         -- omitted if redundant
--         name1_us = "singular name sp=us",   -- omitted if redundant
--         name2    = "plural name",           -- omitted if redundant
--         name2_us = "plural name sp=us",     -- omitted if redundant
--         symbol   = "symbol",
--         sym_us   = "symbol sp=us",          -- omitted if redundant
--         usename  = 1,                       -- omitted if empty
--         utype    = "unit type",             -- from level-3 heading
--         scale    = 1,                       -- a value, if necessary from evaluating an expression
--         subdivs  = { ["ft"] = { 5280, default = "km" }, ["yd"] = { 1760 } }  -- composite input; omitted if empty
--         link     = "title of article for wikilink",  -- omitted if empty or redundant
--         ...                                 -- other values
--     },
--     ["unitcode"] = {        -- alternative format to generate an alias
--         target   = "unit code",
--         ...                                 -- optional values to override those of target
--     },
--     ["unitcode"] = {        -- alternative format to generate a "per" unit like $/acre or BTU/h
--         per      = {u1, u2},                -- numbered table of unitcodes (u1 may be a currency symbol)
--         ...                                 -- optional values
--     },
--     ["unitcode"] = {        -- alternative format to generate an error message
--         shouldbe = "message that some other unit code should be used",
--     },
--     ["unitcode"] = {        -- alternative format for combination outputs (like 'm ft')
--         combination = {u1, u2, ...},        -- numbered table of unitcodes
--         utype    = "unit type",             -- as for standard format
--     },
--     ["unitcode"] = {        -- alternative format for output multiples (like 'ftin')
--         combination = {u1, u2, ...},        -- numbered table of unitcodes
--         multiple = {f1, f2, ...},           -- numbered table of integer factors
--         utype    = "unit type",             -- as for standard format
--     },
--     ...
-- }

local specials = {
    -- This table is used to add extra fields when defining some units which
    -- require exceptions to normal processing.
    -- Each key is in the local language, while each value is fixed text,
    -- however, this script is NOT edited to localize the keys. Instead,
    -- [[Module:Convert/text]] is edited, and this script replaces sections
    -- of the following with localized definitions, if given.
    -- LATER: It would be better if this was defined in the conversions table.
    utype = {
        -- ["unit type in local language"] = "name_used_in_this_script"
        ["fuel efficiency"] = "type_fuel_efficiency",
        ["length"] = "type_length",
        ["temperature"] = "type_temperature",
        ["volume"] = "type_volume",
    },
    ucode = {
        exception = {
            -- ["unit code in local language"] = "name_used_in_module_convert"
            ["ft"] = "integer_more_precision",
            ["in"] = "subunit_more_precision",
        },
        istemperature = {
            -- Common temperature scales (not keVT or MK).
            -- ["unit code in local language"] = 1
            ["C"] = true,
            ["F"] = true,
            ["K"] = true,
            ["R"] = true,
        },
        usesymbol = {
            -- Use unit symbol not name if abbr not specified.
            -- ["unit code in local language"] = 1
            ["C"] = 1,
            ["F"] = 1,
            ["K"] = 1,
            ["R"] = 1,
            ["C-change"] = 1,
            ["F-change"] = 1,
            ["K-change"] = 1,
        },
        alttype = {
            -- Unit has an alternate type that is valid conversion.
            -- ["unit code in local language"] = "alternate type in local language"
            ["Nm"] = "energy",
            ["ftlb"] = "torque",
            ["ftlb-f"] = "torque",
            ["ftlbf"] = "torque",
            ["inlb"] = "torque",
            ["inlb-f"] = "torque",
            ["inlbf"] = "torque",
            ["inoz-f"] = "torque",
            ["inozf"] = "torque",
        },
    },
}

-- When a number is formatted as a string, the result can indicate
-- that the number is not valid. For example, on some Lua installs,
-- tostring(1/0) includes "#INF", and tostring(0/0) includes "#IND".
-- On Scribunto, the results are "inf" and "nan" (each includes "n").
-- A testing program can set the global variable is_test_run.
local bad_number_char = is_test_run and '#' or 'n'

local function collection()
    -- Return a table to hold items.
    return {
        n = 0,
        add = function (self, item)
            self.n = self.n + 1
            self[self.n] = item
        end,
        pop = function (self, item)
            if self.n > 0 then
                local top = self[self.n]
                self.n = self.n - 1
                return top
            end
        end,
        join = function (self, sep)
            return table.concat(self, sep or '\n')
        end,
    }
end

local warnings = collection()
local function add_warning(text)
    -- Add a warning that will be inserted above the final result.
    warnings:add(text)
end

---Begin code to evaluate expressions-----------------------------------
-- This is needed because Lua's loadstring() is not available in Scribunto,
-- and each scale value can be specifed as an expression such as "5/9".
-- More complex expressions are supported, including use of parentheses
-- and the binary operators: + - * / ^

local operators = {
    ['+'] = { precedence = 1, associativity = 1, func = function (a, b) return a + b end },
    ['-'] = { precedence = 1, associativity = 1, func = function (a, b) return a - b end },
    ['*'] = { precedence = 2, associativity = 1, func = function (a, b) return a * b end },
    ['/'] = { precedence = 2, associativity = 1, func = function (a, b) return a / b end },
    ['^'] = { precedence = 3, associativity = 2, func = function (a, b) return a ^ b end },
    ['('] = '(',
    [')'] = ')',
}

local function tokenizer(text)
    -- Function 'next' returns the next token which is one of:
    --     number
    --     table (operator)
    --     string ('(' or ')')
    --     nil (end of text)
    -- If invalid, an error is thrown.
    -- The number is unsigned (unary operators are not supported).
    return {
        pos = 1,
        maxpos = #text,
        text = text,
        next = function(self)
            if self.pos <= self.maxpos then
                local p1, p2, hit = self.text:find('^%s*([+%-*/^()])', self.pos)
                if hit then
                    self.pos = p2 + 1
                    return operators[hit]
                end
                p1, p2, hit = self.text:find('^%s*(%d*%.?%d*[eE][+-]?%d*)', self.pos)
                if not hit then
                    p1, p2, hit = self.text:find('^%s*(%d*%.?%d*)', self.pos)
                end
                local value = tonumber(hit)
                if value then
                    self.pos = p2 + 1
                    return value
                end
                error('invalid number "' .. self.text:sub(self.pos) .. '"', 0)
            end
        end
    }
end

local function evaluate_tokens(tokens, inparens)
    -- Return the value from evaluating tokenized expression, or throw an error.
    local numstack, opstack = collection(), collection()
    local function perform_ops(precedence, associativity)
        while opstack.n > 0 and (opstack[opstack.n].precedence > precedence or
              (opstack[opstack.n].precedence == precedence and associativity == 1)) do
            local rhs = numstack:pop()
            local lhs = numstack:pop()
            if not (rhs and lhs) then error('missing number', 0) end
            local op = opstack:pop()
            numstack:add(op.func(lhs, rhs))
        end
    end
    local token_last
    local function set_state(token_type)
        if token_last == token_type then
            local missing = (token_type == 'number') and 'operator' or 'number'
            error('missing ' .. missing, 0)
        end
        token_last = token_type
    end
    while true do
        local token = tokens:next()
        if type(token) == 'number' then
            set_state('number')
            numstack:add(token)
        elseif type(token) == 'table' then
            set_state('operator')
            perform_ops(token.precedence, token.associativity)
            opstack:add(token)
        elseif token == '(' then
            set_state('number')
            numstack:add(evaluate_tokens(tokens, true))
        elseif token == ')' then
            if inparens then
                break
            end
            error('unbalanced parentheses', 0)
        else
            break
        end
    end
    perform_ops(0)
    if numstack.n > 1 then error('missing operator', 0) end
    if numstack.n < 1 then error('missing number', 0) end
    return numstack:pop()
end

local function evaluate(expression)
    -- Return value (a number) from evaluating expression (a string),
    -- or throw an error if invalid.
    -- This is not bullet proof, but it should support the expressions used.
    return evaluate_tokens(tokenizer(expression))
end
---End code to evaluate expressions-------------------------------------
---Begin code adapted from Module:Convert-------------------------------

local SIprefixes, eng_scales
local plural_suffix = 's'

local function shallow_copy(t)
    -- Return a shallow copy of t.
    -- Do not need the features and overhead of mw.clone() provided by Scribunto.
    local result = {}
    for k, v in pairs(t) do
        result[k] = v
    end
    return result
end

local unit_mt = {
    -- Metatable to get missing values for a unit that does not accept SI prefixes,
    -- or a for a unit that accepts prefixes but where no prefix was used.
    -- In the latter case, and before use, fields symbol, name1, name1_us
    -- must be set from _symbol, _name1, _name1_us respectively.
    __index = function (self, key)
        local value
        if key == 'name1' or key == 'sym_us' then
            value = self.symbol
        elseif key == 'name2' then
            value = self.name1 .. plural_suffix
        elseif key == 'name1_us' then
            value = self.name1
            if not rawget(self, 'name2_us') then
                -- If name1_us is 'foot', do not make name2_us by appending plural_suffix.
                self.name2_us = self.name2
            end
        elseif key == 'name2_us' then
            local raw1_us = rawget(self, 'name1_us')
            if raw1_us then
                value = raw1_us .. plural_suffix
            else
                value = self.name2
            end
        elseif key == 'link' then
            value = self.name1
        else
            return nil
        end
        rawset(self, key, value)
        return value
    end
}

local unit_prefixed_mt = {
    -- Metatable to get missing values for a unit that accepts SI prefixes,
    -- and where a prefix has been used.
    -- Before use, fields si_name, si_prefix must be defined.
    __index = function (self, key)
        local value
        if key == 'symbol' then
            value = self.si_prefix .. self._symbol
        elseif key == 'sym_us' then
            value = self.symbol  -- always the same as sym_us for prefixed units
        elseif key == 'name1' then
            local pos = rawget(self, 'prefix_position') or 1
            value = self._name1
            value = value:sub(1, pos - 1) .. self.si_name .. value:sub(pos)
        elseif key == 'name2' then
            value = self.name1 .. plural_suffix
        elseif key == 'name1_us' then
            value = rawget(self, '_name1_us')
            if value then
                local pos = rawget(self, 'prefix_position') or 1
                value = value:sub(1, pos - 1) .. self.si_name .. value:sub(pos)
            else
                value = self.name1
            end
        elseif key == 'name2_us' then
            if rawget(self, '_name1_us') then
                value = self.name1_us .. plural_suffix
            else
                value = self.name2
            end
        elseif key == 'link' then
            value = self.name1
        else
            return nil
        end
        rawset(self, key, value)
        return value
    end
}

local function lookup(units, unitcode, sp, what)
    -- Return a copy of the unit if found, or return nil.
    -- In this cut-down code, sp is always nil, and what is ignored.
    local t = units[unitcode]
    if t then
        if t.shouldbe then
            return nil
        end
        local result = shallow_copy(t)
        if result.prefixes then
            result.symbol = result._symbol
            result.name1 = result._name1
            result.name1_us = result._name1_us
        end
        return setmetatable(result, unit_mt)
    end
    for plen = 2, 1, -1 do
        -- Look for an SI prefix; should never occur with an alias.
        -- Check for longer prefix first ('dam' is decametre).
        -- Micro (µ) is two bytes in utf-8, so is found with plen = 2.
        local prefix = string.sub(unitcode, 1, plen)
        local si = SIprefixes[prefix]
        if si then
            local t = units[unitcode:sub(plen+1)]
            if t and t.prefixes then
                local result = shallow_copy(t)
                if (sp == 'us' or t.sp_us) and si.name_us then
                    result.si_name = si.name_us
                else
                    result.si_name = si.name
                end
                result.si_prefix = si.prefix or prefix
                -- In this script, each scale is a string.
                result.scale = tostring(tonumber(t.scale) * 10 ^ (si.exponent * t.prefixes))
                result.prefixes = nil  -- a prefixed unit does not take more prefixes (in this script, the returned unit may be added to the list of units)
                return setmetatable(result, unit_prefixed_mt)
            end
        end
    end
    local exponent, baseunit = unitcode:match('^e(%d+)(.*)')
    if exponent then
        local engscale = eng_scales[exponent]
        if engscale then
            local result = lookup(units, baseunit, sp, 'no_combination')
            if not result then return nil end
            if result.engscale == nil then
                result.defkey = unitcode  -- key to lookup default exception
                engscale.exponent = exponent
                result.engscale = engscale
                -- Do not set result.scale as this code is called for units where that is not set.
                return result
            end
        end
    end
    return nil
end

local function evaluate_condition(value, condition)
    -- Return true or false from applying a conditional expression to value,
    -- or throw an error if invalid.
    -- A very limited set of expressions is supported:
    --    v < 9
    --    v * 9 < 9
    -- where
    --    'v' is replaced with value
    --    9 is any number (as defined by Lua tonumber)
    --    '<' can also be '<=' or '>' or '>='
    -- In addition, the following form is supported:
    --    LHS and RHS
    -- where
    --    LHS, RHS = any of above expressions.
    local function compare(value, text)
        local arithop, factor, compop, limit = text:match('^%s*v%s*([*]?)(.-)([<>]=?)(.*)$')
        if arithop == nil then
            error('Invalid default expression.', 0)
        elseif arithop == '*' then
            factor = tonumber(factor)
            if factor == nil then
                error('Invalid default expression.', 0)
            end
            value = value * factor
        end
        limit = tonumber(limit)
        if limit == nil then
            error('Invalid default expression.', 0)
        end
        if compop == '<' then
            return value < limit
        elseif compop == '<=' then
            return value <= limit
        elseif compop == '>' then
            return value > limit
        elseif compop == '>=' then
            return value >= limit
        end
        error('Invalid default expression.', 0)  -- should not occur
    end
    local lhs, rhs = condition:match('^(.-%W)and(%W.*)')
    if lhs == nil then
        return compare(value, condition)
    end
    return compare(value, lhs) and compare(value, rhs)
end

---End code adapted from Module:Convert---------------------------------

local function strip(text)
    -- Return text with no leading/trailing whitespace.
    return text:match("^%s*(.-)%s*$")
end

local function empty(text)
    -- Return true if text is nil or empty (assuming a string).
    return text == nil or text == ''
end

-- Tables of units: k = unit code, v = unit table.
local units_index = {}  -- all units: normal, alias, per, combination, or multiple
local alias_index = {}  -- all aliases (to detect attempts to define more than once)
local per_index = {}    -- all "per" units (to detect attempts to define more than once)

local function get_unit(ucode)
    -- Look up unit code in our cache of units.
    if empty(ucode) then
        return nil
    end
    return lookup(units_index, ucode)
end

local overrides = {}  -- read from input for unit codes that should not be checked for a duplicate

local function insert_unique_unit(data, unit, index)
    -- After inserting any required built-in data, insert the unit into the
    -- data table and (if index not nil) add to index,
    -- but not if the unit code is already defined.
    local ucode = unit.unitcode
    local known = get_unit(ucode)
    if known and not overrides[ucode] then
        error('Unit code "' .. ucode .. '" has already been defined.', 0)
    end
    for item, t in pairs(specials.ucode) do
        unit[item] = t[ucode]
    end
    if index then
        index[ucode] = unit
    end
    table.insert(data, unit)
end

local function check_condition(condition)
    -- Return true if condition appears to be valid; otherwise return false.
    for _, value in ipairs({ 0, 0.1, 1, 1.1, 10, 100, 1000, 1e4, 1e5 }) do
        local success, result = pcall(evaluate_condition, value, condition)
        if not success then
            return false
        end
    end
    return true
end

local function check_default_expression(default, ucode)
    -- Return a numbered table of names present in param default
    -- (two names if an expression, or one name (param default) otherwise).
    -- Throw an error if a problem occurs.
    -- An expression uses pipe-delimited fields with 'v' representing
    -- the input value for the conversion.
    -- Example (suffix is optional): 'v < 120 ! small ! big ! suffix'
    -- returns { 'smallsuffix', 'bigsuffix' }.
    if default:find('!', 1, true) == nil then
        return { default }
    end
    local t = {}
    for item in (default .. '!'):gmatch('%s*(.-)%s*!') do
        t[#t+1] = item  -- split on '!', removing leading/trailing whitespace
    end
    if not (#t == 3 or #t == 4) then
        error('Default output "' .. default .. '" for unit "' .. ucode .. '" should have 2 or 3 "!".', 0)
    end
    local condition, default1, default2 = t[1], t[2], t[3]
    if #t == 4 then
        default1 = default1 .. t[4]
        default2 = default2 .. t[4]
    end
    if not check_condition(condition) then
        error('Invalid condition in default "' .. default .. '" for unit "' .. ucode .. '".', 0)
    end
    return { default1, default2 }
end

local function check_default(default, ucode, utype, unit_table)
    -- Check the given name (or expression) of a default output.
    -- Throw an error if a problem occurs.
    local done = {}
    for _, default in ipairs(check_default_expression(default, ucode)) do
        if done[default] then
            error('Default output "' .. default .. '" for unit "' .. ucode .. '" is repeated.', 0)
        end
        if default == ucode then
            error('Default output for unit "' .. ucode .. '" is the same unit.', 0)
        end
        local default_table = get_unit(default)
        if default_table == nil then
            error('Default output "' .. default .. '" for unit "' .. ucode .. '" is not defined.', 0)
        end
        if not (utype == unit_table.utype and utype == default_table.utype) then
            error('Default output "' .. default .. '" for unit "' .. ucode .. '" has wrong type.', 0)
        end
        done[default] = true
    end
end

local function check_all_defaults(units, maxerrors)
    -- Check each default in units and warn if needed.
    -- This is done after all input data has been processed.
    -- Throw an error if a problem occurs.
    local errors = collection()
    local missing = collection()  -- unitcodes with missing defaults
    for _, unit in ipairs(units) do
        if unit.shouldbe == nil and unit.combination == nil then
            -- This is a standard unit or an alias/per (not shouldbe, combo).
            -- An alias may have a default defined, but it is optional.
            local default = unit.default
            local ucode = unit.unitcode
            if empty(default) then
                if unit.target == nil then  -- unit should have a default
                    missing:add(ucode)
                end
            else
                local ok, msg = pcall(check_default, default, ucode, unit.utype, unit)
                if not ok then
                    errors:add(msg)
                    if errors.n >= maxerrors then
                        break
                    end
                end
            end
        end
    end
    if errors.n > 0 then
        error(errors:join(), 0)
    end
    if missing.n > 0 then
        add_warning('Units with the following unit codes have no default output.')
        local limit = maxerrors
        for _, v in ipairs(missing) do
            limit = limit - 1
            if limit < 0 then
                add_warning('  (and more not shown)')
                break
            end
            add_warning('  ' .. v)
        end
    end
end

local function check_all_pers(units, maxerrors)
    -- Check each component of each "per" unit and warn if needed.
    -- In addition, add any required extra fields for some types of units.
    -- This is done after all input data has been processed.
    -- Throw an error if a problem occurs.
    local currency = { ['$'] = true, ['£'] = true }
    local errors = collection()
    local function errmsg(text)
        errors:add(text)
    end
    for _, unit in ipairs(units) do
        local per = unit.per
        if per then
            local ucode = unit.unitcode
            if #per ~= 2 then
                errmsg('Unit "' .. ucode .. '" does not have exactly 2 fields in the "per".')
            else
                local types = {}
                for i, v in ipairs(per) do
                    if empty(v) then
                        errmsg('Unit "' .. ucode .. '" has an empty field in the "per".')
                    end
                    if not currency[v] then
                        local t = get_unit(v)
                        if t then
                            types[i] = t.utype
                        else
                            errmsg('Unit "' .. ucode .. '" has undefined unit code "' .. v .. '" in the "per".')
                        end
                    end
                end
                if specials.utype[unit.utype] == 'type_fuel_efficiency' then
                    local expected = { type_volume = 1, type_length = 2 }
                    local top_type = expected[specials.utype[types[1]]]
                    local bot_type = expected[specials.utype[types[2]]]
                    if top_type and bot_type and top_type ~= bot_type then
                        unit.iscomplex = true
                        if top_type == 1 then
                            unit.invert = 1
                        else
                            unit.invert = -1
                        end
                    else
                        errmsg('Unit "' .. ucode .. '" has invalid unit types for fuel efficiency.')
                    end
                end
            end
        end
        if errors.n >= maxerrors then
            break
        end
    end
    if errors.n > 0 then
        error(errors:join(), 0)
    end
end

local function update_units(units, composites)
    -- Update some unit definitions with extra data defined in other sections.
    -- This is done after all input data has been processed.
    for _, unit in ipairs(units) do
        local comp = composites[unit.unitcode]
        if comp then
            unit.subdivs = '{ ' .. table.concat(comp.subdivs, ', ') .. ' }'
        end
    end
end

local function make_override(data)
    -- Return a function which, when called, stores a unit code is not to be
    -- checked for a duplicate. The table is stored in data (also a table).
    return function (utype, fields)
        local ucode = fields[1]
        if ucode == nil then
            error('Missing unit code for an override.', 0)
        end
        if data[ucode] then
            error('Override "' .. ucode .. '" is already defined.', 0)
        end
        data[ucode] = true
    end
end

local function make_default(data)
    -- Return a function which, when called, stores a table that defines a
    -- default output unit. The table is stored in data (also a table).
    local defaults_index = {}  -- to detect attempts to define a default twice
    return function (utype, fields)
        -- Store a table defining a unit.
        -- This is for a unit such as 'kg' that has a default output unit
        -- different from what is defined for the base unit ('g').
        -- Throw an error if a problem occurs.
        local ucode = fields[1]
        local default = fields[2]
        if empty(ucode) then
            error('Defaults section: no unit code specified.', 0)
        end
        if empty(default) then
            error('Defaults section: unit "' .. ucode .. '" has no default specified.', 0)
        end
        if #fields ~= 2 then
            error('Defaults section: unit "' .. ucode .. '" should have two fields only.', 0)
        end
        local unit_table = get_unit(ucode)
        if unit_table == nil then
            error('Defaults section: unit "' .. ucode .. '" is not defined.', 0)
        end
        local symbol = unit_table.defkey or unit_table.symbol
        if empty(symbol) then
            error('Defaults section: unit "' .. ucode .. '" must have a symbol.', 0)
        end
        check_default(default, ucode, utype, unit_table)
        if defaults_index[ucode] then
            error('Defaults section: unit "' .. ucode .. '" has already been specified.', 0)
        end
        defaults_index[ucode] = default
        table.insert(data, { symbol = symbol, default = default })
    end
end

local function clean_link(link, name)
    -- Return link, customary where:
    --   link = given link after removing any '[[...]]' wiki formatting
    --          and removing any leading '+' or '*' or '@';
    --   customary = 1 if leading '+', or 2 if '*' or 3 if '@', or nil
    --   (for extra "US" or "U.S." or "Imperial" customary units link).
    -- Result has leading/trailing whitespace removed, and is nil if empty
    -- or if link matches the name, if a name is specified.
    -- If the resulting link is nil, no link field is stored, and
    -- if a link is required, it will be set from the unit's name.
    if empty(link) then
        return nil
    end
    local prefixes = { ['+'] = 1, ['*'] = 2, ['@'] = 3 }
    local customary = prefixes[link:sub(1, 1)]
    if customary then
        link = strip(link:sub(2))
    end
    if link:sub(1, 2) == '[[' then
        link = link:sub(3)
    end
    if link:sub(-2) == ']]' then
        link = link:sub(1, -3)
    end
    link = strip(link)
    if link:sub(1, 1) == '[' or link:sub(-1) == ']' then
        error('Link "' .. link .. '" has wrong number of brackets.', 0)
    end
    if link == '' then
        link = nil
    elseif name then
        local l = link:sub(1, 1):lower() .. link:sub(2)
        local n = name:sub(1, 1):lower() .. name:sub(2)
        if l == n then
            link = nil  -- link == name, ignoring case of first letter
        end
    end
    return link, customary
end

local function make_link(data)
    -- Return a function which, when called, stores a table that defines a
    -- link exception. The table is stored in data (also a table).
    local links_index = {}  -- to detect attempts to define a link twice
    return function (utype, fields)
        -- Store a table defining a unit.
        -- This is for a unit such as 'kg' that has a linked article
        -- different from what is defined for the base unit ('g').
        -- Throw an error if a problem occurs.
        local ucode = fields[1]
        local link = clean_link(fields[2])
        if empty(ucode) then
            error('Missing unit code for a link.', 0)
        end
        if empty(link) then
            error('No link defined for unit "' .. ucode .. '".', 0)
        end
        if #fields ~= 2 then
            error('Row for unit "' .. ucode .. '" link should have two fields only.', 0)
        end
        local unit_table = get_unit(ucode)
        if unit_table == nil then
            error('Unit code "' .. ucode .. '" for a link is not defined.', 0)
        end
        if utype ~= unit_table.utype then
            error('Link exception "' .. ucode .. '" has wrong type.', 0)
        end
        local symbol = unit_table.symbol
        if empty(symbol) then
            error('Unit code "' .. ucode .. '" for a link must have a symbol.', 0)
        end
        if links_index[ucode] then
            error('Link exception "' .. ucode .. '" is already defined.', 0)
        end
        links_index[ucode] = link
        table.insert(data, { symbol = symbol, link = link })
    end
end

local function clean_scale(scale)
    -- Return cleaned scale as a string, after evaluating any expression.
    -- It would be better to retain scale expressions like "5/9" so that
    -- the expression is evaluated on the server and maintains the full
    -- resolution of the server. However, there are many such expressions
    -- in the table of all units, and it seems pointless to require the
    -- server to evaluate all of them just to do one convert.
    if empty(scale) then
        error('Missing scale.', 0)
    end
    assert(type(scale) == 'string', 'Bug: scale has an unexpected type')
    scale = string.gsub(scale, ',', '')  -- remove comma separators
    if tonumber(scale) then  -- not an expression
        return scale
    end
    local status, value = pcall(evaluate, scale)
    if not (status and type(value) == 'number') then
        error('Scale expression is invalid: "' .. scale .. '".', 0)
    end
    local result = string.format('%.30g', value)
    if result:find(bad_number_char, 1, true) then
        error('Scale expression gives an invalid value: "' .. scale .. '".', 0)
    end
    -- Omit redundant zeros from results like '1.2e-005'.
    -- Do not bother looking for results like '1.2e+005' as none occur in practice.
    local lhs, zeros, rhs = result:match('^(.-e%-)(0+)(.*)')
    if zeros then
        result = lhs .. rhs
    end
    return result
end

local function add_alias_optional_fields(unit, start, fields, target)
    -- Inspect fields[i] for i = start, start+1 ..., and extract any
    -- definitions appropriate for an alias or "per", and add them to unit.
    -- For an alias, target is a valid unit; for a "per", target is nil.
    -- Throw error if encounter an invalid entry.
    for i = start, #fields do
        local field = fields[i]
        if not empty(field) then
            local lhs, rhs = field:match('^%s*(.-)%s*=%s*(.-)%s*$')
            local good
            if not empty(rhs) then
                for _, item in ipairs({ 'sp', 'default', 'link', 'multiplier', 'symbol', 'symlink' }) do
                    if lhs == item then
                        if item == 'sp' then
                            if rhs == 'us' then
                                unit.sp_us = true
                                good = true
                            end
                        elseif item == 'link' then
                            local tlink
                            if target then
                                tlink = target[item]
                            end
                            local link, customary = clean_link(rhs, tlink)
                            if link then
                                unit[item] = link
                            end
                            if customary then
                                unit.customary = customary
                            end
                            good = true
                        elseif item == 'symlink' then
                            local pos1 = rhs:find('[[', 1, true)
                            local pos2 = rhs:find(']]', 1, true)
                            if not (pos1 and pos2 and (pos1 < pos2)) then
                                error('Alias "' .. unit.unitcode .. '" must include a wikilink ("[[...]]") in the symlink text.', 0)
                            end
                            unit.symlink = rhs
                            good = true
                        elseif item == 'multiplier' then
                            if not tonumber(rhs) then
                                error('Alias "' .. unit.unitcode .. '" has multiplier "' .. rhs .. '" which is not a number.', 0)
                            end
                            unit[item] = rhs
                            good = true
                        else
                            if target and rhs == target[item] then
                                error('Should omit "' .. item .. '" for alias "' .. unit.unitcode .. '" because it is the same as its target.', 0)
                            end
                            unit[item] = rhs
                            good = true
                        end
                        break
                    end
                end
            end
            if not good then
                error('Alias has invalid text in field "' .. field .. '".', 0)
            end
        end
    end
end

local function make_alias(fields, ucode, utype, symbol)
    -- Return a new alias unit, or return nil if symbol is not already
    -- defined as the unit code of the target unit.
    -- Throw an error if invalid.
    local target = get_unit(symbol)
    if not target then
        return nil
    end
    local unit = { unitcode = ucode, utype = utype, target = symbol }
    add_alias_optional_fields(unit, 3, fields, target)
    if alias_index[ucode] then
        error('Alias "' .. ucode .. '" already defined.', 0)
    else
        alias_index[ucode] = unit
    end
    if target.utype ~= utype then
        error('Target of alias "' .. ucode .. '" has wrong type.', 0)
    end
    return unit
end

local function make_per(fields, ucode, utype, symbol)
    -- Return a new "per" unit, or return nil if symbol is not of form "x/y".
    -- Throw an error if invalid.
    -- The top, bottom unit codes are checked later, after all units are defined.
    local top, bottom = symbol:match('^(.-)/(.*)$')
    if not top then
        return nil
    end
    local unit = { unitcode = ucode, utype = utype, per = { strip(top), strip(bottom) } }
    add_alias_optional_fields(unit, 3, fields)
    if per_index[ucode] then
        error('Per unit "' .. ucode .. '" already defined.', 0)
    else
        per_index[ucode] = unit
    end
    return unit
end

local function make_unit(data)
    -- Return a function which, when called, stores a table that defines a
    -- single unit. The table is stored in data (also a table).
    local fieldnames = {
        -- Fields in the Conversions section are assumed to be in the following order.
        'unitcode',
        'symbol',
        'sym_us',
        'scale',
        'extra',
        'name1',
        'name2',
        'name1_us',
        'name2_us',
        'prefixes',
        'default',
        'link',
    }
    return function (utype, fields)
        -- Store a table defining a unit.
        -- Throw an error if a problem occurs.
        local ucode, symbol = fields[1], fields[2]
        if empty(utype) then
            error('Missing unit type.', 0)
        end
        if empty(ucode) then
            error('Missing unit code.', 0)
        end
        if empty(symbol) then
            error('Missing symbol.', 0)
        end
        local prefix = symbol:sub(1, 1)
        if prefix == '~' or prefix == '=' or prefix == '!' then
            if symbol:sub(1, 2) == '==' then
                prefix = symbol:sub(1, 2)
            end
            symbol = strip(symbol:sub(#prefix + 1))  -- omit prefix and any following whitespace
            fields[2] = symbol
        else
            prefix = nil  -- not a valid prefix
        end
        if prefix == '=' or prefix == '==' then
            -- ucode is an alias (a fake unit code used in a convert template), or
            -- defines a "per" unit like "$/acre" or "BTU/h".
            -- For an alias, symbol is the unit code of the actual unit.
            -- For a "per", symbol is of form "x/y" where x and y are unit codes,
            -- or x is a recognized currency symbol and y is a unit code.
            -- Checking that x and y are valid is deferred until all units have
            -- been defined so, for example, "BTU/h" can be defined before "h".
            local unit
            if prefix == '=' then
                unit = make_alias(fields, ucode, utype, symbol)
            else
                unit = make_per(fields, ucode, utype, symbol)
            end
            if not unit then
                -- Do not define an alias in terms of another alias.
                error('Primary unit must be defined before alias =' .. symbol, 0)
            end
            insert_unique_unit(data, unit, units_index)
            return
        elseif prefix == '!' then
            -- ucode may be incorrectly entered as a unit code.
            -- symbol is a message saying what unit code should be used.
            local unit = { unitcode = ucode, shouldbe = symbol }
            insert_unique_unit(data, unit, nil)
            return
        end
        -- Make the unit.
        local unit = { utype = utype }
        for i, name in ipairs(fieldnames) do
            if not empty(fields[i]) then
                unit[name] = fields[i]
            end
        end
        -- Remove redundancy from unit.
        if unit.sym_us == symbol then
            unit.sym_us = nil
        end
        local prefixes = unit.prefixes
        if empty(prefixes) then
            prefixes = nil
        end
        local name1, name2 = unit.name1, unit.name2
        if name1 then
            if name1 == symbol and not prefixes then
                -- A unit which takes an SI prefix must not have a nil name because,
                -- for example, the name for "kW" = "kilo" .. "watt" (name for "W").
                -- The "not prefixes" test is needed for bn.wikipedia where the
                -- watt unit has the same name and symbol.
                unit.name1 = nil
            end
        else
            name1 = symbol
        end
        if name2 then
            if plural_suffix ~= '' and name2 == name1 .. plural_suffix then
                unit.name2 = nil
            end
        end
        local name1_us, name2_us = unit.name1_us, unit.name2_us
        if name1_us then
            if name1_us == name1 then
                unit.name1_us = nil
            end
        else
            name1_us = name1
        end
        if name2_us then
            if plural_suffix ~= '' and name2_us == name1_us .. plural_suffix then
                unit.name2_us = nil
            end
        end
        -- Other changes to unit.
        unit.scale = clean_scale(unit.scale)
        local extra = unit.extra
        if not empty(extra) then
            -- Set appropriate fields for a unit that needs more than a simple
            -- multiplication by a ratio of unit scales to convert values.
            unit.iscomplex = true
            if extra == 'volume/length' then
                unit.invert = 1
            elseif extra == 'length/volume' then
                unit.invert = -1
            elseif specials.utype[utype] == 'type_temperature' then
                unit.offset = extra
            else
                unit.builtin = extra
            end
        end
        if prefix == '~' then
            -- Magic code for units like "acre" where the symbol is not really a
            -- symbol, and output should use the singular or plural name instead.
            unit.usename = 1
        end
        local name_for_link
        if prefixes then
            -- If set name_for_link = name1 for a prefixed unit like g, then the
            -- link is "kilogram" for kg, and "yottagram" for Yg, and so on for all
            -- prefixes. That might be good for some units, but not all.
            if prefixes == 'SI' then
                unit.prefixes = 1
            elseif prefixes == 'SI2' then
                unit.prefixes = 2
            elseif prefixes == 'SI3' then
                unit.prefixes = 3
            else
                error('Unknown prefix: "' .. prefixes .. '".', 0)
            end
        else
            name_for_link = name1
        end
        unit.link, unit.customary = clean_link(unit.link, name_for_link)
        if prefixes then
            local name1, name1_us = unit.name1, unit.name1_us  -- after redundancy removed
            if name1 == nil then
                error('Unit with Prefix set must include Name.', 0)
            end
            if unit.name2 or unit.name2_us then
                error('Unit with Prefix set must have plural name that is Name + "s".', 0)
            end
            if unit.sym_us then
                error('Unit with Prefix set must have same Symbol and US symbol.', 0)
            end
            local pos = name1:find('%s', 1, true)
            local pos_us
            if name1_us then
                pos_us = name1_us:find('%s', 1, true)
            else
                pos_us = pos
            end
            if pos ~= pos_us then
                -- The only cases with "%s" are "square %smetre" and "cubic %smetre" (or "meter" for US),
                -- so do not bother having a procedure to handle different positions of "%s".
                error('Unit with Prefix set and "%s" in Name and US name, must have the "%s" at the same position.', 0)
            end
            if pos then
                if pos == 1 then
                    -- Omit leading "%s"; convert will assume position = 1 since not stored.
                    unit.name1 = name1:sub(3)
                    if name1_us then
                        unit.name1_us = name1_us:sub(3)
                    end
                else
                    -- Omit "%s" and store its position.
                    unit.name1 = name1:sub(1, pos - 1) .. name1:sub(pos + 2)
                    if name1_us then
                        unit.name1_us = name1_us:sub(1, pos - 1) .. name1_us:sub(pos + 2)
                    end
                    unit.prefix_position = pos
                end
            end
            for _, name in ipairs({ 'symbol', 'name1', 'name1_us' }) do
                unit['_' .. name] = unit[name]
                unit[name] = nil  -- force call to __index metamethod so any SI prefix can be handled
            end
        end
        for name, v in pairs(unit) do
            -- Reject if a string field includes "%s" (should never occur after above).
            if type(v) == 'string' and v:find('%s', 1, true) then
                error('Field "' .. name .. '" must not contain "%s".', 0)
            end
        end
        insert_unique_unit(data, unit, units_index)
    end
end

local function make_combination(data)
    -- Return a function which, when called, stores a table that defines a
    -- single combination unit. The table is stored in data (also a table).
    return function (utype, fields)
        -- Store a table defining a unit.
        -- This is for a combination unit that specifies more than one output.
        -- The target units must be defined first.
        -- Throw an error if a problem occurs.
        local unit = { utype = utype, combination = {} }
        for i, v in ipairs(fields) do
            if i == 1 then  -- unitcode
                if v == '' then
                    error('Missing unit code for a combination.', 0)
                end
                unit.unitcode = v
            elseif v == '' then
                -- Ignore empty fields.
            else
                local target = get_unit(v)
                if target == nil then
                    error('Unit "' .. v .. '" in combination "' .. unit.unitcode .. '" not defined.', 0)
                end
                if target.utype ~= utype then
                    error('Unit "' .. v .. '" in combination "' .. unit.unitcode .. '" has wrong type.', 0)
                end
                table.insert(unit.combination, v)
            end
        end
        if #unit.combination < 2 then
            local msg
            if #unit.combination == 0 then
                msg = 'No units specified for combination "' .. unit.unitcode .. '"'
            else
                msg = 'Only one unit specified for combination "' .. unit.unitcode .. '"'
            end
            error(msg, 0)
        end
        insert_unique_unit(data, unit, units_index)
    end
end

local function reversed(t)
    -- Return a numbered table in reverse order.
    local reversed, count = {}, #t
    for i = 1, count do
        reversed[i] = t[count + 1 - i]
    end
    return reversed
end

local function make_inputmultiple(data)
    -- Return a function which, when called, stores a table that defines a
    -- single composite (multiple input) unit. The table is stored in data (also a table).
    return function (utype, fields)
        -- Set or update an entry in the data table to record that a unit
        -- accepts subdivisions to make a composite input unit like '|2|ft|6|in'.
        -- The target units must be defined first.
        -- Throw an error if a problem occurs.
        local unitcode  -- dummy code required for simplicity, but which is not used in output
        local alternate_code  -- an alternative unit code can be specified to replace convert input
        local fixed_name  -- a fixed name can be specified to replace the unit's normal symbol/name
        local default_code
        local ucodes, scales = {}, {}
        for i, v in ipairs(fields) do
            -- 1=composite, 2=ucode1, 3=ucode2, 4=default, 5=alternate, 6=name
            if i == 1 then
                if v == '' then
                    error('Missing unit code for a composite.', 0)
                end
                unitcode = v
            elseif 2 <= i and i <= 5 then
                if not (i == 5 and v == '') then
                    local target = get_unit(v)
                    if target == nil then
                        error('Unit "' .. v .. '" in composite "' .. unitcode .. '" not defined.', 0)
                    end
                    if target.utype ~= utype then
                        error('Unit "' .. v .. '" in composite "' .. unitcode .. '" has wrong type.', 0)
                    end
                    if i < 4 then
                        table.insert(ucodes, v)
                        table.insert(scales, target.scale)
                    elseif i == 4 then
                        default_code = v
                    else
                        if scales[#scales] ~= target.scale then
                            error('Alternate unit "' .. v .. '" in composite "' .. unitcode .. '" has wrong scale.', 0)
                        end
                        alternate_code = v
                    end
                end
            elseif i == 6 then
                if v ~= '' then
                    fixed_name = v
                end
            else
                error('Composite "' .. unitcode .. '" has too many fields.', 0)
            end
        end
        if #ucodes ~= 2 then
            error('Composite "' .. unitcode .. '" must specify exactly two unit codes.', 0)
        end
        if default_code == nil then
            error('Composite "' .. unitcode .. '" must specify a default unit code.', 0)
        end
        -- Component units must be specified from most-significant to least-significant,
        -- and each ratio of a pair of scales must be very close to an integer.
        -- Currently, there will be exactly two scales and one ratio.
        local ratios, count = {}, #scales
        for i = 1, count do
            local scale = tonumber(scales[i])
            if scale == nil or scale <= 0 then
                error('Composite "' .. unitcode .. '" has a component with an invalid scale, "' .. scales[i] .. '".', 0)
            end
            scales[i] = scale
        end
        for i = 1, count - 1 do
            local ratio = scales[i] / scales[i + 1]
            local rounded = math.floor(ratio + 0.5)
            if rounded < 2 then
                error('Composite "' .. unitcode .. '" has components in wrong order or with invalid scales.', 0)
            end
            if math.abs(ratio - rounded)/ratio > 1e-6 then
                error('Composite "' .. unitcode .. '" has components where scale ratios are not integers.', 0)
            end
            ratios[i] = rounded
        end
        local text = { tostring(ratios[1]) }
        local function add_text(key, value)
            table.insert(text, string.format('%s = %q', key, value))
        end
        if default_code then
            add_text('default', default_code)
        end
        if alternate_code then
            add_text('unit', alternate_code)
        end
        if fixed_name then
            add_text('name', fixed_name)
        end
        local subdiv = string.format('["%s"] = { %s }', ucodes[2], table.concat(text, ', '))
        local main_code = ucodes[1]
        local item = data[main_code]
        if item then
            table.insert(item.subdivs, subdiv)
        else
            data[main_code] = { subdivs = { subdiv } }
        end
    end
end

local function make_outputmultiple(data)
    -- Return a function which, when called, stores a table that defines a
    -- single multiple output unit. The table is stored in data (also a table).
    return function (utype, fields)
        -- Store a table defining a unit.
        -- This is for a multiple unit like 'ydftin' (result in yards, feet, inches).
        -- The target units must be defined first.
        -- Throw an error if a problem occurs.
        local unit = { utype = utype }
        local ucodes, scales = {}, {}
        for i, v in ipairs(fields) do
            if i == 1 then  -- unitcode
                if v == '' then
                    error('Missing unit code for a multiple.', 0)
                end
                unit.unitcode = v
            elseif v == '' then
                -- Ignore empty fields.
            else
                local target = get_unit(v)
                if target == nil then
                    error('Unit "' .. v .. '" in multiple "' .. unit.unitcode .. '" not defined.', 0)
                end
                if target.utype ~= utype then
                    error('Unit "' .. v .. '" in multiple "' .. unit.unitcode .. '" has wrong type.', 0)
                end
                table.insert(ucodes, v)
                table.insert(scales, target.scale)
            end
        end
        if #ucodes < 2 then
            local msg
            if #ucodes == 0 then
                msg = 'No units specified for multiple "' .. unit.unitcode .. '"'
            else
                msg = 'Only one unit specified for multiple "' .. unit.unitcode .. '"'
            end
            error(msg, 0)
        end
        -- Component units must be specified from most-significant to least-significant
        -- (so scale values will be in descending order),
        -- and each ratio of a pair of scales must be very close to an integer.
        -- The componenets and ratios are stored in reverse order (least significant first).
        -- This script stores a unit scale as a string (might be an expression like "5/9"),
        -- but scales in a multiple are handled as numbers (should never be expressions).
        local ratios, count = {}, #scales
        for i = 1, count do
            local scale = tonumber(scales[i])
            if scale == nil or scale <= 0 then
                error('Multiple "' .. unit.unitcode .. '" has a component with an invalid scale, "' .. scales[i] .. '".', 0)
            end
            scales[i] = scale
        end
        for i = 1, count - 1 do
            local ratio = scales[i] / scales[i + 1]
            local rounded = math.floor(ratio + 0.5)
            if rounded < 2 then
                error('Multiple "' .. unit.unitcode .. '" has components in wrong order or with invalid scales.', 0)
            end
            if math.abs(ratio - rounded)/ratio > 1e-6 then
                error('Multiple "' .. unit.unitcode .. '" has components where scale ratios are not integers.', 0)
            end
            ratios[i] = rounded
        end
        unit.combination = reversed(ucodes)
        unit.multiple = reversed(ratios)
        insert_unique_unit(data, unit, units_index)
    end
end

-- To make updating Module:Convert/data easier, this script inserts a preamble
-- and a postamble so the result can be used to replace the whole page.
local data_preamble = [=[
-- Conversion data used by [[Module:Convert]] which uses mw.loadData() for
-- read-only access to this module so that it is loaded only once per page.
--
-- These data tables follow:
--   all_units           all properties for a unit, including default output
--   default_exceptions  exceptions for default output ('kg' and 'g' have different defaults)
--   link_exceptions     exceptions for links ('kg' and 'g' have different links)
--
-- These tables are generated by a script which reads the wikitext of a page that
-- documents the required properties of each unit; see [[Module:Convert/doc]].
]=]

local data_postamble = [=[
return {
    all_units = all_units,
    default_exceptions = default_exceptions,
    link_exceptions = link_exceptions,
}]=]

local out_unit_prefix = [[
---------------------------------------------------------------------------
-- Do not change the data in this table because it is created by running --
-- a script that reads the wikitext from a wiki page (see note above).   --
---------------------------------------------------------------------------
local all_units = {]]

local out_unit_suffix = [[
}
]]

local out_default_prefix = [[
---------------------------------------------------------------------------
-- Do not change the data in this table because it is created by running --
-- a script that reads the wikitext from a wiki page (see note above).   --
---------------------------------------------------------------------------
local default_exceptions = {
    -- Prefixed units with a default different from that of the base unit.
    -- Each key item is a prefixed symbol (unitcode for engineering notation).]]

local out_default_suffix = [[
}
]]

local out_default_item = [[
    ["{symbol}"] = "{default}",]]

local out_link_prefix = [[
---------------------------------------------------------------------------
-- Do not change the data in this table because it is created by running --
-- a script that reads the wikitext from a wiki page (see note above).   --
---------------------------------------------------------------------------
local link_exceptions = {
    -- Prefixed units with a linked article different from that of the base unit.
    -- Each key item is a prefixed symbol (not unitcode).]]

local out_link_suffix = [[
}
]]

local out_link_item = [[
    ["{symbol}"] = "{link}",]]

local combination_specification = {     -- pure combination like 'm ft', or a multiple like 'ftin'
    'combination',
    'multiple',
    'utype',
}

local alias_specification = {
    'target',
    'symbol',
    'sp_us',
    'default',
    'link',
    'symlink',
    'customary',
    'multiplier',
}

local per_specification = {
    'per',
    'symbol',
    'sp_us',
    'utype',
    'invert',
    'iscomplex',
    'default',
    'link',
    'symlink',
    'customary',
}

local shouldbe_specification = {
    'shouldbe',
}

local unit_specification = {
    '_name1',
    '_name1_us',
    '_symbol',
    'prefix_position',
    'name1',
    'name1_us',
    'name2',
    'name2_us',
    'symbol',
    'sym_us',
    'usename',
    'usesymbol',
    'utype',
    'alttype',
    'builtin',
    'scale',
    'offset',
    'invert',
    'iscomplex',
    'istemperature',
    'exception',
    'prefixes',
    'default',
    'subdivs',
    'link',
    'customary',
    'sp_us',
}

local no_quotes = {
    combination = true,
    customary = true,
    multiple = true,
    multiplier = true,
    offset = true,
    per = true,
    prefix_position = true,
    scale = true,
    subdivs = true,
}

local function add_unit_lines(results, unit, spec)
    -- Add lines of Lua source to define a unit to the results collection.
    local first_item = '    ["' .. unit.unitcode .. '"] = {'
    local last_item  = '    },'
    results:add(first_item)
    for _, k in ipairs(spec) do
        local v = unit[k]
        if v then
            local want_quotes = (type(v) == 'string' and not no_quotes[k])
            if type(v) == 'boolean' then
                v = tostring(v)
            elseif type(v) == 'number' or k == 'scale' then
                -- Replace results like '1e-006' with '1e-6'.
                v = string.gsub(tostring(v), '(e[+-])0+([1-9].*)', '%1%2', 1)
            elseif type(v) ~= 'string' then
                error('Fatal error: unknown data type for ' .. unit.unitcode)
            end
            local fmt = string.format('\t%%-9s= %%%s,', want_quotes and 'q' or 's')
            results:add(fmt:format(k, v))
        end
    end
    results:add(last_item)
end

local function numbered_table_as_string(data, unit)
    local t = {}
    for _, v in ipairs(data) do
        if type(v) == 'string' then
            table.insert(t, '"' .. v .. '"')
        elseif type(v) == 'number' then
            table.insert(t, tostring(v))
        else
            error('Fatal error: unknown data type for ' .. unit.unitcode)
        end
    end
    return '{ ' .. table.concat(t, ', ') .. ' }'
end

local function extract_heading(line)
    -- Return n, s where n = heading level number (nil if none), and
    -- s = heading text (with leading/trailing whitespace removed).
    local pattern = '^(==+)%s*(.-)%s*(==+)%s*$'
    local before, heading, after = line:match(pattern)
    if heading and #heading > 0 then
        -- Don't bother checking if before == after.
        return #before, heading
    end
end

local function fields(line)
    -- Return a numbered table of fields split from line.
    -- Items are delimited by "||".
    -- Each item has leading/trailing whitespace removed, and any encoded pipe
    -- characters are decoded.
    -- The second field (for symbol when processing units) is adjusted to
    -- remove any "colspan" at the front of lines like:
    -- "| unitcode || colspan="11" | !Text to display for an error message".
    local t = {}
    line = line .. "||"  -- to get last field
    for item in line:gmatch("%s*(.-)%s*||") do
        table.insert(t, (item:gsub('&#124;', '|')))
    end
    local cleaned = t[2]:match('^%s*colspan%s*=.-|%s*(.*)$')
    if cleaned then
        t[2] = cleaned
    end
    return t
end

local function prepare_section(maker, lines, section, maxerrors)
    -- Process the first level-two section with the given section name
    -- in the given table of lines of wikitext.
    -- If successful, maker inserts each item into a table.
    -- Otherwise, an error is thrown.
    local skip = true
    local errors = collection()
    local utype  -- unit type (from level-three heading)
    local nbsp = '\160'  -- nonbreaking space is utf-8 encoded as hex c2 a0
    for linenumber, line in ipairs(lines) do
        if skip then
            -- Skip down to and including the starting heading.
            local level, heading = extract_heading(line)
            if level == 2 and heading == section then
                skip = false
            end
        else
            -- Accummulate unit definitions.
            local c1 = line:sub(1, 1)
            local c2 = line:sub(2, 2)
            if c1 == '|' and not (c2 == '-' or c2 == '}') then
                if empty(utype) then
                    error('No level 3 heading before: ' .. line, 0)
                end
                if line:find(nbsp, 1, true) then
                    -- For example, "acre ft" does not work if it contains nbsp.
                    add_warning('Line ' .. linenumber .. ' contains a nonbreaking space')
                end
                local ok, msg = pcall(maker, utype, fields(line:sub(2)))
                if not ok then
                    if msg:sub(-1) == '.' then msg = msg:sub(1, -2) end
                    errors:add(msg .. ' (line ' .. linenumber .. ').')
                    if errors.n >= maxerrors then
                        break
                    end
                end
            else
                local level, heading = extract_heading(line)
                if level == 3 then
                    utype = heading:lower()
                elseif level == 2 then
                    break
                end
            end
        end
    end
    if skip then
        error('Level 2 heading "' .. section ..'" not found.', 0)
    end
    if errors.n > 0 then
        error(errors:join(), 0)
    end
end

local function get_page_lines(page_title)
    -- Read the wikitext of the page at the given title; split the text into
    -- lines with leading and trailing space removed from each line.
    -- Return a numbered table of the lines, or throw an error.
    if empty(page_title) then
        error('Need title of page with unit definitions.', 0)
    end
    local t = mw.title.new(page_title)
    if t then
        local content = t:getContent()
        if content then
            local lines = collection()
            for line in string.gmatch(content, '[\t ]*(.-)[\t ]*\n') do
                lines:add(line)
            end
            return lines
        end
    end
    error('Could not read wikitext from [["' .. page_title .. ']]".', 0)
end

local function prepare_data(conversion_data_title, maxerrors)
    -- Read the page of conversion data, and process the wikitext
    -- in the sections with wanted level-two headings.
    -- Return units, defaults, links (three tables).
    -- Throw an error if a problem occurs.
    local composites, defaults, links, units = {}, {}, {}, {}
    local sections = {
        -- LATER: Section names must be in English to match following text.
        { 'Overrides'       , make_override      , overrides  },
        { 'Conversions'     , make_unit          , units      },
        { 'Output multiples', make_outputmultiple, units      },
        { 'Combinations'    , make_combination   , units      },
        { 'Input multiples' , make_inputmultiple , composites },  -- after all units defined so default will be defined
        { 'Defaults'        , make_default       , defaults   },
        { 'Links'           , make_link          , links      },
    }
    local lines = get_page_lines(conversion_data_title)
    for _, section in ipairs(sections) do
        local heading = section[1]
        local maker = section[2](section[3])
        prepare_section(maker, lines, heading, maxerrors)
    end
    check_all_defaults(units, maxerrors)
    check_all_pers(units, maxerrors)
    update_units(units, composites)
    return units, defaults, links
end

local function _makeunits(results, conversion_data_title, text_module_title)
    -- Read the wikitext for the conversion data.
    -- Append output to given results collection, or throw error if a problem.
    local text_code = require(text_module_title)
    SIprefixes = text_code.SIprefixes
    eng_scales = text_code.eng_scales
    local translation = text_code.translation_table
    if translation then
        if translation.plural_suffix then
            plural_suffix = translation.plural_suffix
        end
        local ts = translation.specials
        if ts then
            if ts.utype then
                specials.utype = ts.utype
            end
            if ts.ucode then
                specials.ucode = ts.ucode
            end
        end
    end
    local units, defaults, links = prepare_data(conversion_data_title, 20)
    results:add(data_preamble)
    results:add(out_unit_prefix)
    for _, unit in ipairs(units) do
        local spec
        if unit.target then
            spec = alias_specification
        elseif unit.per then
            spec = per_specification
            unit.per = numbered_table_as_string(unit.per, unit)
        elseif unit.shouldbe then
            spec = shouldbe_specification
        elseif unit.combination then
            spec = combination_specification
            unit.combination = numbered_table_as_string(unit.combination, unit)
            if unit.multiple then
                unit.multiple = numbered_table_as_string(unit.multiple, unit)
            end
        else
            spec = unit_specification
        end
        add_unit_lines(results, unit, spec)
    end
    results:add(out_unit_suffix)
    results:add(out_default_prefix)
    for _, unit in ipairs(defaults) do
        results:add((out_default_item:gsub('{([%w_]+)}', unit)))
    end
    results:add(out_default_suffix)
    results:add(out_link_prefix)
    for _, unit in ipairs(links) do
        results:add((out_link_item:gsub('{([%w_]+)}', unit)))
    end
    results:add(out_link_suffix)
    results:add(data_postamble)
end

local function makeunits(frame)
    local args = frame.args
    local conversion_data_title = args[1] or 'Module:Convert/documentation/conversion data/doc'
    local text_module_title = args[2] or 'Module:Convert/text'
    local results = collection()
    local ok, msg = pcall(_makeunits, results, conversion_data_title, text_module_title)
    if not ok then
        results:add('Error:\n')
        results:add(msg)
    end
    local warn = ''
    if warnings.n > 0 then
        warn = 'Warning:\n\n' .. warnings:join() .. '\n\n'
    end
    -- Pre tags returned by a module are html tags, not like wikitext <pre>...</pre>.
    -- The following renders the text as is, and preserves tab characters.
    return '<pre>\n' .. mw.text.nowiki(warn .. results:join()) .. '\n</pre>\n'
end

return { makeunits = makeunits }