模块:Section sizes

require('strict');

local lang_obj = mw.language.getContentLanguage(); -- language object for number formatting appropriate to local language


--[[--------------------------< I 1 8 N _ T >------------------------------------------------------------------

An associative table of static text used in this module for use when translating this module to other languages.

The values $1 and $2 are replaced with values as stated in the associated message comment

]]

local i18n_t = {
    -- non-fatal error messaging
    ['markup removed'] = '标记已移除，无法链接', -- error message

    -- fatal error messaging
    ['fatal_no_article'] = '错误：页面$1未創建',              -- $1 is page name
    ['fatal_no_sections'] = '错误：页面$1未分章节',   -- $1 is page name
    ['fatal_redirect'] = '错误：页面$1是重定向',              -- $1 is page name

    ['help link'] = '（[[$1|帮助]]）',                             -- help text wikilink for all error messages; $1 is calling template's name
    ['error category'] = '[[Category:有$1错误的页面]]',    -- $1 is calling template's name; comment out this line, to suppress error category

    ['table caption'] = '[[$1]]各章节大小（共$2个章节）', -- caption; $1 is page name; $2 is number of sections

    ['section_name'] = '章节标题',                           -- column headings; left to right
    ['byte_count'] = '字节数',
    ['prose_size'] = '<abbr title="两汉字或一西文单词计为一词，粗略值">正文词数</abbr>',
    ['section_header'] = '标题', -- column headings; left to right
    ['section_total'] = '合计',

    ['top'] = nil,       -- for the unnamed lede section; use this only when this module does not get correct string from MediaWiki:Vector-toc-beginning

    ['total'] = '总计', -- footer

    ['yesno'] = {
        ['yes'] = true,
        ['y'] = true,
        ['true'] = true,
        ['t'] = true,
        ['1'] = true,
        ['no'] = false,
        ['n'] = false,
        ['false'] = false,
        ['f'] = false,
        ['0'] = false
    }
}

local section_top = i18n_t.top or          -- i18n_t.top rarely needed
    mw.message.new('vector-toc-beginning') -- lead section doesn't have a heading, get the interface message for 'top'
    :inLanguage(lang_obj:getCode())        -- in the current wiki's content language
    :plain();                              -- and make sure we have a string

local noprose_sections = {                 -- Sections to exclude when calculating prose size
    'References', '參考', '参考', '參考資料', '参考资料', '參考文獻', '参考文献',
    'See also', '參見', '参见', '另見', '另见',
    'External links', '外部連結', '外部链接',
    'Further reading', '擴展閱讀', '扩展阅读',
};


--[[--------------------------< E R R O R _ M S G _ M A K E >--------------------------------------------------

common function to emit both fatal and non-fatal error messages

template_name used in error message help text link and error category link fetched from MediaWiki

error category emitted only for transclusions in the Talk namespace; only when i18n_t['error category'] has a
value; only when we have not already emitted the category link.

]]

local err_cat_added;    -- page-scope boolean flag; true when error cat link has been emmitted; nil else
local function error_msg_make(template_name, msg, args_t, fatal)
    local err_cat = ''; -- empty string for concatenation

    if not err_cat_added and i18n_t['error category'] and 1 == mw.title.getCurrentTitle().namespace then
        err_cat = mw.message.newRawMessage(i18n_t['error category'], { template_name }):plain(); -- create error category wikilink
        err_cat_added = true;                                                                    -- we will do this only once
    end

    local err_msg = table.concat({
        fatal and '' or ' ',                                                                     -- no space before fatal error messages
        fatal and '<span style="font-size:100%;" class="error">' or '<span style="color:#d33">', -- select fatal/non-fatal styling
        mw.message.newRawMessage(msg, args_t):plain(),                                           -- compose the message
        ' ',                                                                                     -- insert a space between the message and the help link
        mw.message.newRawMessage(i18n_t['help link'], { template_name }):plain(),                -- add the help link
        '</span>',                                                                               -- and done with styling
        err_cat                                                                                  -- if not yet emitted, append error cat link
    });

    return err_msg;
end



--[[--------------------------< FIND_STRING_IN >--------------------------------

Function returns True if the string `str` is found in the table `tbl`, False if not

]]

local function find_string_in(tbl, str)
    for _, element in ipairs(tbl) do
        if (element == str) then
            return true
        end
    end
    return false
end



--[[--------------------------< PROSESIZE_GET >--------------------------------

Function returns the prose size of a wikitext.
The prosesize is returned as the number of bytes of readable prose, as well as the number of words

]]

local function prosesize_get(wikitext)
    -- Remove comments
    local text = wikitext:gsub("<!%-%-.-%-%->", "")
    local wordcount;
    local _;

    -- Remove refs and their contents
    text = text:gsub("<ref[^>]-/>", "")
    text = text:gsub("<ref.->.-</ref>", "")

    -- Remove tables {|
    text = text:gsub("{|.-|}", "")

    -- Remove templates {{...}}
    -- First, let's remove single brackets that might confused the below code
    text = text:gsub("([^{]){([^{])", "%1%2")
    text = text:gsub("([^}])}([^}])", "%1%2")
    -- recursive removal is tricky, so this handles most cases
    local n = 1
    while n > 0 do                            -- stop when nothing more was removed
        text, n = text:gsub("{{[^{}]-}}", "") -- remove the innermost templates
    end

    -- Remove files/images [[File:...]] or [[Image:...]]
    text = text:gsub("%[%[(File|Image):.-%]%]", "")

    -- Remove links but keep display text [[link|display]]
    text = text:gsub("%[%[[^|%]]-|", "[[") -- strip leading [[foo|
    text = text:gsub("%[%[(.-)%]%]", "%1") -- [[bar]] -> bar

    -- Remove html tags (but not their contents unless self-closing)
    text = text:gsub("<.->", "")

    -- Remove headings markup (=== heading ===)
    text = text:gsub("=+.-=+", "")

    -- Remove formatting markup (''italic'', '''bold''', etc.)
    text = text:gsub("''+", "")

    -- Remove list lines (*, #, ;, :)
    local cleaned_lines = {}
    for line in text:gmatch("[^\n]+") do
        if not line:match("^%s*[%*#;:]") then
            table.insert(cleaned_lines, line)
        end
    end
    text = table.concat(cleaned_lines, "\n")

    -- Remove now-empty parentheses
    -- text = text:gsub("%(%)", "")

    -- Collapse whitespace
    text = text:gsub("%s+", " ")

    -- Trim
    text = text:match("^%s*(.-)%s*$")
    text = mw.ustring.gsub(text, "[一-鿿][一-鿿]", "a ") -- \u4E00-\u9FFF

    -- Count the number of words
    _, wordcount = text:gsub("%S+", "")
    -- Return byte length and number of words
    return #text, wordcount
end



--[[--------------------------< R E D L I N K _ T E M P L A T E _ R E M O V E >--------------------------------

Following a preprocessing of the section heading, any templates not known to the local wiki will have been converted
to a template-space redlink to the unknown template-name in the locally named template namespace.  These redlinks
must be removed and the section name marked as modified so that the section heading link can be suppressed.

returns section name and boolean true if redlinks were replaced; boolean false else

]]

local function redlink_template_remove(section_name)
    local redlink_pattern = '%[%[:' .. mw.site.namespaces[10]["name"] .. ':.-%]%]'; -- fetch template names space name in the wiki's local language

    local count;
    section_name, count = section_name:gsub(redlink_pattern, '[...]'); -- replace unknown-template redlinks with bracketed ellipses

    return section_name, 0 < count;
end


--[[--------------------------< A N C H O R S _ R E M O V E >--------------------------------------------------

remove html markup that looks like an anchor.  There appear to be two general forms:
	<span class="anchor" id="Foo"></span>
	<span id="Foo"></span>
multiple anchor spans are allowed

Because anchor markup is allowed in section headings, does not set the modified flag on return

]]

local function anchors_remove(section_name)
    local patterns = {
        '<span +[^>]*class *= *"anchor"[^>]*></span>', -- don't care about the id= or any other attribute here if we have the anchor class
        '<span +%f[%a]id *= *".-" *></span>',          -- here the span must have only the id= attribute
    }

    for _, pattern in ipairs(patterns) do
        section_name = section_name:gsub(pattern, ''); -- remove all anchor spans
    end

    return section_name;
end


--[[--------------------------< R E F S _ R E M O V E >--------------------------------------------------------

remove wikitext reference markup.  done this way because we later preprocess the section name to render any templates
that are present in the section name (there shouldn't be but that doesn't stop editors from including them).
preprocessing a section name with reference markup causes MediaWiki to create a reflist; a side effect that we
don't want.

returns modified section name and boolean true when references have been removed; unmodified section name and false else.

]]

local function refs_remove(section_name)
    local name;           -- modified (or unmodified) section name
    local markup_removed; -- boolean true when reference markup has been removed
    local count;

    name, count = section_name:gsub('<ref.-/>', ''); -- remove self-closed <ref with attributes /> tags
    markup_removed = 0 < count;                      -- count not zero, set <markup_removed> true
    name, count = name:gsub('<ref.->.-</ref>', '');  -- remove remaining ref tags and content (if any)

    return name, markup_removed or (0 < count)
end


--[[--------------------------< S T R I P M A R K E R S _ R E M O V E >----------------------------------------

remove stripmarkers from preprocessed section names.  it may be best to preserve <nowiki/> tags before section name
is preprocessed to prevent '<nowiki/>'' from being interpreted as ''' bold markup.  It is not possible to do that
here because all nowiki strip markers are only identifiable by the numbers.

returns modified section name and boolean true when stripmarkers have been removed; unmodified section name and false else.

]]

local function stripmarkers_remove(section_name)
    local count;

    section_name, count = section_name:gsub('\127[^\127]*UNIQ%-%-%a+%-[%x]+%-QINU[^\127]*\127', '');
    return section_name, (0 < count);
end


--[=[-------------------------< R E M O V E _ W I K I _ L I N K >----------------------------------------------

Gets the display text from a wikilink like [[A|B]] or [[B]] gives B

The str:gsub() returns either A|B from a [[A|B]] or B from [[B]] or B from B (no wikilink markup).

In l(), l:gsub() removes the link and pipe (if they exist); the second :gsub() trims white space from the label
if str was wrapped in wikilink markup.  Presumably, this is because without wikimarkup in str, there is no match
in the initial gsub, the replacement function l() doesn't get called.

]=]

local function remove_wiki_link(str)
    return (str:gsub("%[%[([^%[%]]*)%]%]", function(l)
        return l:gsub("^[^|]*|(.*)$", "%1"):gsub("^%s*(.-)%s*$", "%1");
    end));
end


--[[--------------------------< R E M O V E _ C O N T A I N E R >----------------------------------------------

Inspired from above, removes everything between < & >
Used to remove html containers from headings to fix breaking section links, but legitimate text within < & > are removed too

returns text and boolean true if modified; text and boolean false else

]]

local function remove_container(str)
    local count;
    str, count = str:gsub("<([^>]*)>", function(l)
        return l:gsub("^%s*(.-)%s*$", "");
    end);

    return str, 0 < count
end


--[=[-------------------------< M A K E _ W I K I L I N K >----------------------------------------------------

Makes a wikilink; when both link and display text is provided, returns a wikilink in the form [[L|D]]; if only
link is provided, returns a wikilink in the form [[L]]; if neither are provided or link is omitted, returns an
empty string.

]=]

local function make_wikilink(link, display)
    if link and ('' ~= link) then
        if display and ('' ~= display) then
            return table.concat({ '[[', link, '|', display, ']]' });
        else
            return table.concat({ '[[', link, ']]' });
        end
    end
    return display or ''; -- link not set so return the display text
end


--[[--------------------------< S E C T I O N _ D A T A _ G E T >----------------------------------------------

Read article content and fill associative arrays in sequence <sections_t>.  The associative arrays hold section
name, its starting location within content, section size, and section level (2 (==), 3 (===), etc).  Modifies and
returns <total> (article size) and <max> (longest section).

]]

local function section_data_get(content, total, max, total_prose, max_prose, sections_t, getprose)
    local s, e, name, _;

    ----------< M A X _ U P D A T E >----------
    local function max_update(max, sec_size, sec_i) -- local function to update max and its index
        if max < sec_size then                      -- new section is longer
            return sec_size, sec_i;                 -- update and done
        end
        return max;                                 -- no update
    end
    -------------------------------------------

    while (1) do                                                          -- done this way because some articles reuse section names
        s, e, name = string.find(content, '\n==+ *(.-) *==+', e);         -- get start, end, and section name beginning at end of last find; newline must precede '==' heading markup
        if s then
            table.insert(sections_t, { ['name'] = name, ['start'] = s }); -- save section name and start location of this find
        else
            break;
        end
    end

    for i, section_t in ipairs(sections_t) do
        if 1 ~= i then                                                                              -- i==1 is the lead section; already accounted for
            local escaped_section_name = section_t.name:gsub('[%(%)%.%%%+%-%*%?%[%^%$%]]', '%%%0'); -- escape lua patterns in section name
            local pattern = '(==+ *' .. escaped_section_name .. ' *==+.-)==+';                      -- make a pattern to get the content of a section
            local section_content = string.match(content, pattern, section_t.start);                -- get the content beginning at the string.find() start location
            if section_content then
                section_t.size = #section_content;                                                  -- get a count of the bytes in the section
                total = total + section_t.size;
                max = max_update(max, section_t.size, i);                                           -- keep track of largest count
                if getprose then
                    if find_string_in(noprose_sections, escaped_section_name) then
                        section_t.prosesize = 0;
                    else
                        _, section_t.prosesize = prosesize_get(section_content);
                    end
                    total_prose = total_prose + section_t.prosesize;
                    max_prose = max_update(max_prose, section_t.prosesize, i);     -- keep track of largest count
                end
            else                                                                   -- probably the last section (no proper header follows this section name)
                pattern = '(==+ *' .. escaped_section_name .. ' *==+.+)';          -- make a new pattern
                section_content = string.match(content, pattern, section_t.start); -- try to get content
                if section_content then
                    section_t.size = #section_content;                             -- get a count of the bytes in the section
                    total = total + section_t.size;
                    max = max_update(max, section_t.size, i);                      -- keep track of largest count
                    if getprose then
                        if find_string_in(noprose_sections, escaped_section_name) then
                            section_t.prosesize = 0;
                        else
                            _, section_t.prosesize = prosesize_get(section_content);
                        end
                        total_prose = total_prose + section_t.prosesize;
                        max_prose = max_update(max_prose, section_t.prosesize, i); -- keep track of largest count
                    end
                else
                    section_t.size = '—'; -- no content so show that
                    section_t.prosesize = '-';
                end
            end

            local _;
            _, section_t.level = section_content:find('^=+'); -- should always be the first n characters of section content
        end
    end

    return total, max, total_prose, max_prose;
end


--[[--------------------------< A R T I C L E _ C O N T E N T _ G E T >----------------------------------------

Common function to fetch the <article> content.  Also fetches byte count for lead section because that section
is different from all others

On success, returns unparsed article content, lead section byte count, and nil message.  On error, returns nil
content, nil lead section byte count, and fatal error message

]]

local function article_content_get(article, template_name, getprose)
    local _, section_content_prose;
    local title_obj = mw.title.new(article)
    local content = title_obj:getContent();                                                                 -- get unparsed wikitext from the article
    if not content then
        return nil, nil, nil, error_msg_make(template_name, i18n_t['fatal_no_article'], { article }, true); -- emit fatal error message and abandon
    end

    if title_obj.isRedirect then                                                                          -- redirects don't have sections
        return nil, nil, nil, error_msg_make(template_name, i18n_t['fatal_redirect'], { article }, true); -- emit fatal error message and abandon
    end

    local section_content = content:match('(.-)===*');                                                       -- get the lead section
    if not section_content then
        return nil, nil, nil, error_msg_make(template_name, i18n_t['fatal_no_sections'], { article }, true); -- emit fatal error message and abandon
    end

    if getprose then
        _, section_content_prose = prosesize_get(section_content)
    end

    return content, #section_content, section_content_prose, nil; -- return the contnet and length of the lead section and nil for error message
end


--[[--------------------------< S I Z E >----------------------------------------------------------------------

module entry point

créer une liste liée au wikilien des sections de <nom de l'article> et de leur taille en octets dans un wikitable triable.

{{#invoke:Section sizes|size|<article name>}}

]]

local function size(frame)
    local template_name = frame:getParent():getTitle(); -- get template name for use in error messaging and category name

    local section_info_t = {};                          -- table to hold section names and sizes
    local section_content;                              -- section content used for counting
    local totcount = {};
    local totcount_prose = {};
    local lastlevel;
    local maxlevels;
    local levelcounts = {};
    local levelcounts_prose = {};
    local upperlevel;
    local highlight;
    local highlighttot;
    local highlight_prose;
    local highlighttot_prose;
    local total; -- sum of all byte counts
    local max;   -- largest section so far encountered
    local total_prose;
    local max_prose;
    local totmax;                                                 -- largest section so far encountered (section total)
    local totmax_prose;                                           -- largest section so far encountered (section total)
    local _;                                                      -- dummy for using gsub to count bytes
    local wl_name;                                                -- anchor and display portion for wikilinks in rendered list
    local getprose = i18n_t.yesno[frame.args.getprose or 'true']; -- boolean false when |_getprose=no; true else
    getprose = getprose == nil and true or
        getprose                                                  -- can't do "or true" because then it will turn false values true


    local content, sec_0_count, sec_0_count_prose, msg =
        article_content_get(frame.args[1], template_name, getprose); -- get the article content with lead section byte count because lead is different from all others
    if msg then                                                      -- is something wrong
        return msg;                                                  -- emit message and abandon
    end

    total = sec_0_count;
    max = sec_0_count;
    total_prose = sec_0_count_prose;
    max_prose = sec_0_count_prose;
    -- sequence of associative arrays with section name, starting location, size, and level
    local sections_t = { { ['level'] = 2, ['name'] = section_top, ['size'] = sec_0_count, ['start'] = 1, ['prosesize'] = sec_0_count_prose } }; -- init with lead info
    total, max, total_prose, max_prose = section_data_get(content, total, max, total_prose, max_prose, sections_t,
        getprose);                                                                                                                              -- fill <sections_t> arrays for all sections except the lead

    totmax = 0;
    totmax_prose = 0;
    lastlevel = 0;
    maxlevels = 7;
    for j = 1, maxlevels do
        levelcounts[j] = 0;
        levelcounts_prose[j] = 0;
    end

    local level, size, prosesize;
    for i = #sections_t, 1, -1 do
        level = sections_t[i].level;
        size = sections_t[i].size;
        prosesize = sections_t[i].prosesize or 0;

        if level < lastlevel then -- reset all
            totcount[i] = levelcounts[level] + size;
            totcount_prose[i] = levelcounts_prose[level] + prosesize;
            for j = level, maxlevels do
                levelcounts[j] = 0;
                levelcounts_prose[j] = 0;
            end
        end

        if level >= lastlevel then
            totcount[i] = size;
            totcount_prose[i] = prosesize;
        end

        if level > 0 then
            upperlevel = level - 1;
            levelcounts[upperlevel] = levelcounts[upperlevel] + totcount[i];
            levelcounts_prose[upperlevel] = levelcounts_prose[upperlevel] + totcount_prose[i];
        end

        lastlevel = level;
        if totcount[i] > totmax then
            totmax = totcount[i];
        end
        if totcount_prose[i] > totmax_prose then
            totmax_prose = totcount_prose[i];
        end
    end

    for i, section_t in ipairs(sections_t) do
        level = section_t.level;
        size = section_t.size;
        prosesize = section_t.prosesize;

        if size == max then
            highlight = 'background:#f00;color:#000;"|';
        else
            local proportion = size /
            max                                                                       -- get value of "how much of the max" the count is
            local gb = 250 -
                math.floor(250 * proportion)                                          -- approach #f8f9fa [r=248,g=249,b=250] (default wikitable cell color) for small bytecounts
            highlight = string.format('background:#F8%02X%02X;color:#000;"|', gb, gb) -- shade the bg as r: 248, g: gb, and b: gb
        end

        if getprose then
            if prosesize == max_prose then
                highlight_prose = 'background:#008000;color:#fff;"|';
            else
                local proportion = prosesize /
                max_prose                                                                       -- get value of "how much of the max" the count is
                local gb = 250 -
                    math.floor(250 * proportion)                                                -- approach #f8f9fa [r=248,g=249,b=250] (default wikitable cell color) for small bytecounts
                highlight_prose = string.format('background:#%02XF8%02X;color:#000;"|', gb, gb) -- shade the bg as r: 248, g: gb, and b: gb
            end
        end

        highlighttot = ''; -- start the style declaration
        if totcount[i] == totmax then
            highlighttot = highlighttot .. 'background:#f00;color:#000;';
        else
            local proportion = totcount[i] /
            totmax                                                      -- get value of "how much of the max" the count is
            local gb = 250 -
                math.floor(250 * proportion)                            -- approach #f8f9fa [r=248,g=249,b=250] (default wikitable cell color) for small bytecounts
            highlighttot = highlighttot ..
                string.format('background:#F8%02X%02X;color:#000;', gb, gb) -- shade the bg as r: 248, g: gb, and b: gb
        end

        if getprose then
            highlighttot_prose = ''; -- start the style declaration
            if totcount_prose[i] == totmax_prose then
                highlighttot_prose = highlighttot_prose .. 'background:#008000;color:#fff;';
            else
                local proportion = totcount_prose[i] /
                totmax_prose                                                -- get value of "how much of the max" the count is
                local gb = 250 -
                    math.floor(250 * proportion)                            -- approach #f8f9fa [r=248,g=249,b=250] (default wikitable cell color) for small bytecounts
                highlighttot_prose = highlighttot_prose ..
                    string.format('background:#%02XF8%02X;color:#000;', gb, gb) -- shade the bg as r: 248, g: gb, and b: gb
            end
        end

        if level == 2 then
            highlighttot = highlighttot .. 'font-weight:bold;'; -- if main section, make it bold
        elseif totcount[i] == size then
            highlighttot = 'color:transparent;';                -- hide totals for subsections with no subsubsections, values required for proper sorting
        end
        highlighttot = highlighttot .. '"|';                    -- close the style declaration

        if getprose then
            if level == 2 then
                highlighttot_prose = highlighttot_prose .. 'font-weight:bold;'; -- if main section, make it bold
            elseif totcount_prose[i] == prosesize then
                highlighttot_prose = 'color:transparent;';                      -- hide totals for subsections with no subsubsections, values required for proper sorting
            end
            highlighttot_prose = highlighttot_prose .. '"|';                    -- close the style declaration
        end

        level = (2 < level) and ((level - 2) * 1.6) or nil;                     -- remove offset and mult by 1.6em (same indent as ':' markup which doesn't work in a table)

        local markup_removed;                                                   -- temp flag to note that the section heading has been modified (references and html-like markup stripped, etc)
        local modified = false;                                                 -- flag to select section heading styling; false: wikilink; true: plain text with error message
        wl_name, modified = refs_remove(section_t.name);                        -- remove all references
        wl_name = remove_wiki_link(wl_name);                                    -- remove all wikilinks
        wl_name = wl_name:gsub('<nowiki/>', '__sss_nowiki/__');                 -- replace <nowiki/> tag with special secret symbol
        wl_name = frame:preprocess(wl_name);                                    -- render to html
        wl_name, markup_removed = redlink_template_remove(wl_name);             -- remove redlink template wikilinks created by preprocessing of unknown templates
        modified = modified or markup_removed;                                  -- update <modified>
        wl_name = anchors_remove(wl_name);                                      -- remove known anchor markup; these allowed in section heading so do not bump <modified>
        wl_name = wl_name:gsub('__sss_nowiki/__', '<nowiki/>');                 -- replace special secret symbol with <nowiki/> tag
        wl_name, markup_removed = stripmarkers_remove(wl_name);                 -- remove any strip markers resulting from preprocessing
        modified = modified or markup_removed;                                  -- update <modified>
        wl_name = wl_name:gsub('</?i>', '\'\'');                                -- italic markup has been converted to html; unconvert so remove_container() doesn't remove inappropriately
        wl_name = wl_name:gsub('</?b>', '\'\'\'');                              -- bold markup has been converted to html; unconvert so remove_container() doesn't remove inappropriately
        wl_name, markup_removed = remove_container(wl_name);                    -- remove html containers from section headings so that we can link to the section
        modified = modified or markup_removed;                                  -- update <modified>

        wl_name = wl_name:gsub('[%[%]]', { ['['] = '&#91;', [']'] = '&#93;' }); -- replace '[' and ']' characters with html entities so that wikilinked section names work
        wl_name = mw.text.trim(wl_name);                                        -- trim leading/trailing white space if any because white space buggers up url anchor links

        local heading_text;
        if modified then
            heading_text = table.concat({                                                           -- modified headings are rendered in plain text with an error message
                wl_name,                                                                            -- plain text section name
                error_msg_make(template_name, i18n_t['markup removed'], {}),                        -- error message with help link
            });                                                                                     -- close help link
        else
            heading_text = make_wikilink(frame.args[1] .. '#' .. wl_name:gsub("''+", ''), wl_name); -- unmodified rendered as is
        end

        table.insert(section_info_t,
            table.concat({                                                      -- build most of a table row here because here we have heading information that we won't have later
                level and '<span style="margin-left:' .. level .. 'em">' or '', -- indent per heading level (number of '=' in heading markup)
                heading_text,                                                   -- section link remove any bold/italic markup from the link part of the wikilink; leave the markup in the display
                level and '</span>' or '',                                      -- close the span if opened
                '||style="text-align:right;',                                   -- table column separator and right align byte count column
                highlight,
                lang_obj:formatNum(size),                                       -- commafied byte count for section
                '||style="text-align:right;',                                   -- table column separator and right align section total column
                highlighttot,
                lang_obj:formatNum(totcount[i]),                                -- section total count!!
            }) .. (getprose and table.concat({
                '||style="text-align:right;',                                   -- table column separator and right align byte count column
                highlight_prose,
                lang_obj:formatNum(prosesize),                                  -- section prose size
                '||style="text-align:right;',                                   -- table column separator and right align byte count column
                highlighttot_prose,
                lang_obj:formatNum(totcount_prose[i]),                          -- section prose size total
            }) or ''));
    end

    local out = {};                                                     -- make a sortable wikitable for output
    table.insert(out,
        string.format('{| class="wikitable sortable" style="%s"\n|+%s', -- output caption and column headings
            frame.args.style or '',                                     -- value for style= attribute
            mw.message.newRawMessage(i18n_t['table caption'], { frame.args[1], #section_info_t }):plain()
        ));

    table.insert(out, table.concat({ -- column headings
        '\n! rowspan=2|',
        i18n_t.section_name,
        '!! colspan=2|',
        i18n_t.byte_count,
    }));
    if getprose then
        table.insert(out, table.concat({
            '!! colspan=2|',
            i18n_t.prose_size,
        }));
    end
    table.insert(out, table.concat({ -- new header column
        '\n|-',
        '\n!',
        i18n_t.section_header,
        '!!',
        i18n_t.section_total,
    }));
    if getprose then
        table.insert(out, table.concat({
            '!!',
            i18n_t.section_header,
            '!!',
            i18n_t.section_total,
        }));
    end
    table.insert(out, '\n|-\n|');                               -- first row pipe

    table.insert(out, table.concat(section_info_t, '\n|-\n|')); -- section rows with leading pipes (except first row already done)
    table.insert(out,
        table.concat({                                          -- total number of bytes; heading markup so that sorting doesn't move this row from the bottom to top
            '\n|-\n!scope=row|',
            i18n_t.total,                                       -- footer label
            '\n!style="text-align:right"|',
            lang_obj:formatNum(total),                          -- byte count sum; TODO: to columns, should be two separate sums; test for equality?
            '\n!style="text-align:right"|',
            lang_obj:formatNum(total),                          -- section size sum; TODO: to columns, should be two separate sums; test for equality?
        }));
    if getprose then
        table.insert(out, table.concat({
            '\n!style="text-align:right"|',
            lang_obj:formatNum(total_prose), -- section size sum; TODO: to columns, should be two separate sums; test for equality?
            '\n!style="text-align:right"|',
            lang_obj:formatNum(total_prose), -- section size sum; TODO: to columns, should be two separate sums; test for equality?
        }));
    end
    table.insert(out, '\n|}'); -- close the wikitable

    local result = table.concat(out);
    return result; -- because gsub returns string and number of replacements
end




--[[--------------------------< S E C T I O N _ S I Z E _ G E T >----------------------------------------------

return the length of the specified section of the specified article.

parameters:
	{{{1}}} – article title (required)

	{{{2}}} – section name or one of three keywords. keywords use leading underscores to minimize confusion with
				section names 'Lead' (the metal) and 'Max' (a proper name), etc.
		<section name>	– returns the length of the named section; section name comparisons are case insensitve.

		keywords:
			_lead			– returns the length of the lead (unnamed section)
			_max			– returns the length of the longest section
			_total			– returns the article length

	{{{3}}} – supports one keyword
		_all	– requires named section in {{{2}}}; not supported for keywords; returns size of the named section plus
					the lengths of its subsections

	|_nosep= – accepts one value 'yes'; renders section size without thousands separator; ignored when |_pct= set;
				default is commafied
	|_pct= – accepts one value 'yes'; function returns size specified by name or keyword as a percentage of _total
				rounded to two decimals; appends '%' symbol;  _total as a percentage of _total allowed but why?

returns nil when there is no keyword match and no <section name> match

	{{#invoke:section sizes|section_length|<article title>|<_lead|_max|_total|section name>|_all}}

]]

local function section_size_get(frame)
    local args_t = require("Module:Arguments").getArgs(frame);
    local template_name = frame:getParent():getTitle();            -- get template name for use in error messaging and category name
    local nosep = i18n_t.yesno[args_t._nosep or 'false'] or false; -- boolean true when |_nosep=yes; false else
    local pct = i18n_t.yesno[args_t._pct or 'false'] or false;     -- boolean true when |_pct=yes; false else
    local getprose = false                                         -- fetching prose is only used by size(), not section_size_get()

    local content, sec_0_count, sec_0_count_prose, msg =
        article_content_get(args_t[1], template_name, getprose); -- get the article content with lead section byte count in <count_t> because lead is different from all others
    if msg then                                                  -- is something wrong
        return msg;                                              -- emit message and abandon
    end

    local total = sec_0_count; -- initialize with length of the lead
    local max = sec_0_count;
    local total_prose = sec_0_count_prose;
    local max_prose = sec_0_count_prose;

    ----------< R E T V A L >----------
    local function retval(size)                                          -- local function to select return value format according to |pct=
        return pct and string.format("%2.2f%%", 100 * (size / total)) or -- return the percentage
            lang_obj:formatNum(size, { noCommafy = nosep });             -- return the size
    end
    -----------------------------------
    -- sequence of associative arrays with section name, starting location, size, and level
    local sections_t = { { ['level'] = 2, ['name'] = section_top, ['size'] = sec_0_count, ['start'] = 1 } }; -- init with lead info

    total, max = section_data_get(content, total, max, total_prose, max_prose, sections_t, getprose);
    if '_lead' == args_t[2] then                       -- {{{2}}} has '_lead' keyword; lead section is handled differently from all other sections
        return retval(sec_0_count);                    -- return selected output form and done
    elseif '_max' == args_t[2] then                    -- when {{{2}}} has '_max' keyword
        return retval(max);                            -- return selected output form and done
    elseif '_total' == args_t[2] then                  -- when {{{2}}} has '_total' keyword
        return retval(total);                          -- return the article's total length and done; pct possible, but why?
    elseif args_t[2] then                              -- if set, it should be a section name
        local target_section_name = args_t[2]:lower(); -- get it and force lower case for comparison
        local section_level;
        local section_size = 0;

        for i, section_t in ipairs(sections_t) do                     -- loop through <section_t.name> looking for a match; TODO: is there a better way to do this?
            if not section_level then                                 -- not yet found a match
                if section_t.name:lower() == target_section_name then -- force lower case for comparison
                    if '_all' == args_t[3] then                       -- when {{{3}}} has '_all' keyword
                        section_level = section_t.level;              -- found it so initialize these; this one is a flag to know that we found the target section
                        section_size = section_t.size;                -- init
                    else
                        return retval(section_t.size);                -- return selected output form and done
                    end
                end
            elseif section_level < section_t.level then       -- here when we found the section, '_all' keyword present
                section_size = section_size + section_t.size; -- this section level greater than target's, so add in the subsection's size
            else                                              -- here when we found the section, '_all' keyword present
                return retval(section_size);                  -- return selected output form and done
            end
        end
    else            -- here when {{{3}}} has nothing
        return nil; -- so return nothing
    end
end


--[[--------------------------< E X P O R T E D   F U N C T I O N S >------------------------------------------
]]

return
{
    size = size,
    section_size_get = section_size_get,
}