Modul:Excerpt
Tampilan
Dokuméntasi antuk modul puniki prasida kakardi ring Modul:Excerpt/doc
-- Get localized data
local d = require("Module:Excerpt/i18n")
local p = {}
-- Helper function to debug
-- Returns blank text or an error message if requested
local errors
local function err(msg,a,b)
local text = mw.ustring.format(d.error[msg] or msg or '',a,b)
if errors then error(text, 2) end
return ""
end
-- Helper function to test for truthy and falsy values
local function is(value)
if not value or value == "" or value == "0" or value == "false" or value == "no" then
return false
end
return true
end
-- Helper function to match from a list regular expressions
-- Like so: match pre..list[1]..post or pre..list[2]..post or ...
local function matchAny(text, pre, list, post, init)
local match = {}
for i = 1, #list do
match = { mw.ustring.match(text, pre .. list[i] .. post, init) }
if match[1] then return unpack(match) end
end
return nil
end
-- Help gsub to remove unwanted templates and pseudo-templates such as #tag:ref and DEFAULTSORT
local function stripTemplate(t)
-- If template is unwanted then return "" (gsub will replace by nothing), else return nil (gsub will keep existing string)
if matchAny(t, "^{{%s*", d.unwantedInlineTemplates, "%s*%f[|}]") then return "" end
-- If template is wanted but produces an unwanted reference then return the string with |shortref or |ref removed
local noRef = mw.ustring.gsub(t, "|%s*shortref%s*%f[|}]", "")
noRef = mw.ustring.gsub(noRef, "|%s*ref%s*%f[|}]", "")
-- If a wanted template has unwanted nested templates, purge them too
noRef = mw.ustring.sub(noRef, 1, 2) .. mw.ustring.gsub(mw.ustring.sub(noRef, 3), "%b{}", stripTemplate)
-- Replace {{audio}} by its text parameter: {{Audio|Foo.ogg|Bar}} → Bar
noRef = mw.ustring.gsub(noRef, "^{{%s*[Aa]udio.-|.-|(.-)%f[|}].*", "%1")
-- Replace {{Nihongo foot}} by its text parameter: {{Nihongo foot|English|英語|eigo}} → English
noRef = mw.ustring.gsub(noRef, "^{{%s*[Nn]ihongo[ _]+foot%s*|(.-)%f[|}].*", "%1")
if noRef ~= t then return noRef end
return nil -- not an unwanted template: keep
end
-- Get a page's content, following redirects, and processing file description pages for files.
-- Also returns the page name, or the target page name if a redirect was followed, or false if no page found
local function getContent(page, frame)
local title = mw.title.new(page) -- Read description page (for :File:Foo rather than File:Foo)
if not title then return false, false end
local target = title.redirectTarget
if target then title = target end
return title:getContent(), title.prefixedText
end
-- Check image for suitability
local function checkImage(image)
local page = matchAny(image, "", d.fileNamespaces, "%s*:[^|%]]*") -- match File:(name) or Image:(name)
if not page then return false end
-- Limit to image types: .gif, .jpg, .jpeg, .png, .svg, .tiff, .xcf (exclude .ogg audio etc.)
if not matchAny(page, "%.", {"[Gg][Ii][Ff]", "[Jj][Pp][Ee]?[Gg]", "[Pp][Nn][Gg]", "[Ss][Vv][Gg]", "[Tt][Ii][Ff][Ff]", "[Xx][Cc][Ff]"}, "%s*$") then
return false
end
local fileDescription, fileTitle = getContent(page) -- get file description and title after following any redirect
if fileDescription and fileDescription ~= "" then -- found description on local wiki
if mw.ustring.match(fileDescription, "[Nn]on%-free") then return false end
fileDescription = mw.ustring.gsub(fileDescription, "%b{}", stripTemplate) -- remove DEFAULTSORT etc. to avoid side effects of frame:preprocess
elseif not fileTitle then
return false
else
-- try commons
fileDescription = "{{" .. fileTitle .. "}}"
end
frame = frame or mw.getCurrentFrame()
fileDescription = frame:preprocess(fileDescription)
return ( fileDescription and fileDescription ~= "" and not mw.ustring.match(fileDescription, "[Nn]on%-free") ) and true or false -- hide non-free image
end
-- Attempt to parse [[File:...]] or [[Image:...]], either anywhere (start=false) or at the start only (start=true)
local function parseImage(text, start)
local startre = ""
if start then startre = "^" end -- a true flag restricts search to start of string
local image = matchAny(text, startre .. "%[%[%s*", d.fileNamespaces, "%s*:.*") -- [[File: or [[Image: ...
if image then
image = mw.ustring.match(image, "%b[]%s*") -- matching [[...]] to handle wikilinks nested in caption
end
return image
end
-- Parse a caption, which ends at a | (end of parameter) or } (end of infobox) but may contain nested [..] and {..}
local function parseCaption(caption)
if not caption then return nil end
local length = mw.ustring.len(caption)
local position = 1
while position <= length do
local linkStart, linkEnd = mw.ustring.find(caption, "%b[]", position)
linkStart = linkStart or length + 1 -- avoid comparison with nil when no link
local templateStart, templateEnd = mw.ustring.find(caption, "%b{}", position)
templateStart = templateStart or length + 1 -- avoid comparison with nil when no template
local argEnd = mw.ustring.find(caption, "[|}]", position) or length + 1
if linkStart < templateStart and linkStart < argEnd then
position = linkEnd + 1 -- skip wikilink
elseif templateStart < argEnd then
position = templateEnd + 1 -- skip template
else -- argument ends before the next wikilink or template
return mw.ustring.sub(caption, 1, argEnd - 1)
end
end
return caption -- No terminator found: return entire caption
end
-- Attempt to construct a [[File:...]] block from {{infobox ... |image= ...}}
local function argImage(text)
local token = nil
local hasNamedArgs = mw.ustring.find(text, "|") and mw.ustring.find(text, "=")
if not hasNamedArgs then return nil end -- filter out any template that obviously doesn't contain an image
-- ensure image map is captured
text = mw.ustring.gsub(text, '<!%-%-imagemap%-%->', '|imagemap=')
-- find all images
local hasImages = false
local images = {}
local captureFrom = 1
while captureFrom < mw.ustring.len(text) do
local argname, position, image = mw.ustring.match(text, "|%s*([^=|]-[Ii][Mm][Aa][Gg][Ee][^=|]-)%s*=%s*()(.*)", captureFrom)
if image then -- ImageCaption=, image_size=, image_upright=, etc. do not introduce an image
local lcArgName = mw.ustring.lower(argname)
if mw.ustring.find(lcArgName, "caption")
or mw.ustring.find(lcArgName, "size")
or mw.ustring.find(lcArgName, "upright") then
image = nil
end
end
if image then
hasImages = true
images[position] = image
captureFrom = position
else
captureFrom = mw.ustring.len(text)
end
end
captureFrom = 1
while captureFrom < mw.ustring.len(text) do
local position, image = mw.ustring.match(text, "|%s*[^=|]-[Pp][Hh][Oo][Tt][Oo][^=|]-%s*=%s*()(.*)", captureFrom)
if image then
hasImages = true
images[position] = image
captureFrom = position
else
captureFrom = mw.ustring.len(text)
end
end
captureFrom = 1
while captureFrom < mw.ustring.len(text) do
local position, image = mw.ustring.match(text, "|%s*[^=|{}]-%s*=%s*()%[?%[?([^|{}]*%.%a%a%a%a?)%s*%f[|}]", captureFrom)
if image then
hasImages = true
if not images[position] then
images[position] = image
end
captureFrom = position
else
captureFrom = mw.ustring.len(text)
end
end
if not hasImages then return nil end
-- find all captions
local captions = {}
captureFrom = 1
while captureFrom < mw.ustring.len(text) do
local position, caption = matchAny(text, "|%s*", d.captionParams, "%s*=%s*()([^\n]+)", captureFrom)
if caption then
-- extend caption to parse "| caption = Foo {{Template\n on\n multiple lines}} Bar\n"
local bracedCaption = mw.ustring.match(text, "^[^\n]-%b{}[^\n]+", position)
if bracedCaption and bracedCaption ~= "" then caption = bracedCaption end
caption = mw.text.trim(caption)
local captionStart = mw.ustring.sub(caption, 1, 1)
if captionStart == '|' or captionStart == '}' then caption = nil end
end
if caption then
-- find nearest image, and use same index for captions table
local i = position
while i > 0 and not images[i] do
i = i - 1
if images[i] then
if not captions[i] then
captions[i] = parseCaption(caption)
end
end
end
captureFrom = position
else
captureFrom = mw.ustring.len(text)
end
end
-- find all alt text
local altTexts = {}
for position, altText in mw.ustring.gmatch(text, "|%s*[Aa][Ll][Tt]%s*=%s*()([^\n]*)") do
if altText then
-- altText is terminated by }} or |, but first skip any matched [[...]] and {{...}}
local lookFrom = math.max( -- find position after whichever comes last: start of string, end of last ]] or end of last }}
mw.ustring.match(altText, ".*{%b{}}()") or 1, -- if multiple {{...}}, .* consumes all but one, leaving the last for %b
mw.ustring.match(altText, ".*%[%b[]%]()") or 1)
local length = mw.ustring.len(altText)
local afterText = math.min( -- find position after whichever comes first: end of string, }} or |
mw.ustring.match(altText, "()}}", lookFrom) or length+1,
mw.ustring.match(altText, "()|", lookFrom) or length+1)
altText = mw.ustring.sub(altText, 1, afterText-1) -- chop off |... or }}... which is not part of [[...]] or {{...}}
altText = mw.text.trim(altText)
local altTextStart = mw.ustring.sub(altText, 1, 1)
if altTextStart == '|' or altTextStart == '}' then altText = nil end
end
if altText then
-- find nearest image, and use same index for altTexts table
local i = position
while i > 0 and not images[i] do
i = i - 1
if images[i] then
if not altTexts[i] then
altTexts[i] = altText
end
end
end
end
end
-- find all image sizes
local imageSizes = {}
for position, imageSizeMatch in mw.ustring.gmatch(text, "|%s*[Ii][Mm][Aa][Gg][Ee][ _]?[Ss][Ii][Zz][Ee]%s*=%s*()([^}|\n]*)") do
local imageSize = mw.ustring.match(imageSizeMatch, "=%s*([^}|\n]*)")
if imageSize then
imageSize = mw.text.trim(imageSize )
local imageSizeStart = mw.ustring.sub(imageSize, 1, 1)
if imageSizeStart == '|' or imageSizeStart == '}' then imageSize = nil end
end
if imageSize then
-- find nearest image, and use same index for imageSizes table
local i = position
while i > 0 and not images[i] do
i = i - 1
if images[i] then
if not imageSizes[i] then
imageSizes[i] = imageSize
end
end
end
end
end
-- sort the keys of the images table (in a table sequence), so that images can be iterated over in order
local keys = {}
for key, val in pairs(images) do
table.insert(keys, key)
end
table.sort(keys)
-- add in relevant optional parameters for each image: caption, alt text and image size
local imageTokens = {}
for _, index in ipairs(keys) do
local image = images[index]
local token = parseImage(image, true) -- look for image=[[File:...]] etc.
if not token then
image = mw.ustring.match(image, "^[^}|\n]*") -- remove later arguments
token = "[[" -- Add File: unless name already begins File: or Image:
if not matchAny(image, "^", d.fileNamespaces, "%s*:") then
token = token .. "File:"
end
token = token .. image
local caption = captions[index]
if caption and mw.ustring.match(caption, "%S") then token = token .. "|" .. caption end
local alt = altTexts[index]
if alt then token = token .. "|alt=" .. alt end
local image_size = imageSizes[index]
if image_size and mw.ustring.match(image_size, "%S") then token = token .. "|" .. image_size end
token = token .. "]]"
end
token = mw.ustring.gsub(token, "\n","") .. "\n"
table.insert(imageTokens, token)
end
return imageTokens
end
-- Help gsub convert imagemaps into standard images
local function convertImageMap(imagemap)
local image = matchAny(imagemap, "[>\n]%s*", d.fileNamespaces, "[^\n]*")
if image then
return "<!--imagemap-->[[" .. mw.ustring.gsub(image, "[>\n]%s*", "", 1) .. "]]"
else
return "" -- remove entire block if image can't be extracted
end
end
-- Convert a comma-separated list of numbers or min-max ranges into a list of booleans, e.g. "1,3-5" → {1=true,2=false,3=true,4=true,5=true}
local function numberFlags(str)
local ranges = mw.text.split(str, ",") -- parse ranges, e.g. "1,3-5" → {"1","3-5"}
local flags = {}
for _, r in pairs(ranges) do
local min, max = mw.ustring.match(r, "^%s*(%d+)%s*%-%s*(%d+)%s*$") -- "3-5" → min=3 max=5
if not max then min, max = mw.ustring.match(r, "^%s*((%d+))%s*$") end -- "1" → min=1 max=1
if max then
for p = min, max do flags[p] = true end
end
end
return flags
end
local imageArgGroups = {
{"thumb", "thumbnail", "frame", "framed", "frameless"},
{"right", "left", "center", "none"},
{"baseline", "middle", "sub", "super", "text-top", "text-bottom", "top", "bottom"}
}
local function modifyImage(image, fileArgs)
if fileArgs then
for _, filearg in pairs(mw.text.split(fileArgs, "|")) do -- handle fileArgs=left|border etc.
local fa = mw.ustring.gsub(filearg, "=.*", "") -- "upright=0.75" → "upright"
local group = {fa} -- group of "border" is ["border"]...
for _, g in pairs(imageArgGroups) do
for _, a in pairs(g) do
if fa == a then group = g end -- ...but group of "left" is ["right", "left", "center", "none"]
end
end
for _, a in pairs(group) do
image = mw.ustring.gsub(image, "|%s*" .. a .. "%f[%A]%s*=[^|%]]*", "") -- remove "|upright=0.75" etc.
image = mw.ustring.gsub(image, "|%s*" .. a .. "%s*([|%]])", "%1") -- replace "|left|" by "|" etc.
end
image = mw.ustring.gsub(image, "([|%]])", "|" .. filearg .. "%1", 1) -- replace "|" by "|left|" etc.
end
end
return image
end
-- a basic parser to trim down extracted wikitext
-- @param text : Wikitext to be processed
-- @param options : A table of options...
-- options.paraflags : Which number paragraphs to keep, as either a string (e.g. `1,3-5`) or a table (e.g. `{1=true,2=false,3=true,4=true,5=true}`. If not present, all paragraphs will be kept.
-- options.fileflags : table of which files to keep, as either a string (e.g. `1,3-5`) or a table (e.g. `{1=true,2=false,3=true,4=true,5=true}`
-- options.fileargs : args for the [[File:]] syntax, such as `left`
-- @param filesOnly : If set, only return the files and not the prose
local function parse(text, options, filesOnly)
local allParagraphs = true -- keep all paragraphs?
if options.paraflags then
if type(options.paraflags) ~= "table" then options.paraflags = numberFlags(options.paraflags) end
for _, v in pairs(options.paraflags) do
if v then allParagraphs = false end -- if any para specifically requested, don't keep all
end
end
if filesOnly then
allParagraphs = false
options.paraflags = {}
end
local maxfile = 0 -- for efficiency, stop checking images after this many have been found
if options.fileflags then
if type(options.fileflags) ~= "table" then options.fileflags = numberFlags(options.fileflags) end
for k, v in pairs(options.fileflags) do
if v and k > maxfile then maxfile = k end -- set maxfile = highest key in fileflags
end
end
local fileArgs = options.fileargs and mw.text.trim(options.fileargs)
if fileArgs == '' then fileArgs = nil end
local leadStart = nil -- have we found some text yet?
local t = "" -- the stripped down output text
local fileText = "" -- output text with concatenated [[File:Foo|...]]\n entries
local files = 0 -- how many images so far
local paras = 0 -- how many paragraphs so far
local startLine = true -- at the start of a line (no non-spaces found since last \n)?
text = mw.ustring.gsub(text,"^%s*","") -- remove initial white space
-- Add named files
local f = options.files
if f and mw.ustring.match(f, "[^%d%s%-,]") then -- filename rather than number list
f = mw.ustring.gsub(f, "^%s*File%s*:%s*", "", 1)
f = mw.ustring.gsub(f, "^%s*Image%s*:%s*", "", 1)
f = "[[File:" .. f .. "]]"
f = modifyImage(f, "thumb")
f = modifyImage(f, fileArgs)
if checkImage(f) then fileText = fileText .. f .. "\n" end
end
repeat -- loop around parsing a template, image or paragraph
local token = mw.ustring.match(text, "^%b{}%s*") or false -- {{Template}} or {| Table |}
if not leadStart and not token then token = mw.ustring.match(text, "^%b<>%s*%b{}%s*") end -- allow <tag>{{template}} before lead has started
local line = mw.ustring.match(text, "[^\n]*")
if token and line and mw.ustring.len(token) < mw.ustring.len(line) then -- template is followed by text (but it may just be other templates)
line = mw.ustring.gsub(line, "%b{}", "") -- remove all templates from this line
line = mw.ustring.gsub(line, "%b<>", "") -- remove all HTML tags from this line
-- if anything is left, other than an incomplete further template or an image, keep the template: it counts as part of the line
if mw.ustring.find(line, "%S") and not matchAny(line, "^%s*", { "{{", "%[%[%s*[Ff]ile:", "%[%[%s*[Ii]mage:" }, "") then
token = nil
end
end
if token then -- found a template which is not the prefix to a line of text
if leadStart then -- lead has already started, so keep the template within the text, unless it's a whole line (navbox etc.)
if not filesOnly and not startLine then t = t .. token end
elseif matchAny(token, "{{%s*", d.wantedBlockTemplates, "%s*%f[|}]") then
t = t .. token -- keep wanted block templates
elseif is(options.keepTables) and mw.ustring.sub(token, 1, 2) == '{|' then
t = t .. token -- keep tables
elseif files < maxfile then -- discard template, but if we are still collecting images...
local images = argImage(token) or {}
if not images then
local image = parseImage(token, false) -- look for embedded [[File:...]], |image=, etc.
if image then table.insert(images, image) end
end
for _, image in ipairs(images) do
if files < maxfile and checkImage(image) then -- if image is found and qualifies (not a sound file, non-free, etc.)
files = files + 1 -- count the file, whether displaying it or not
if options.fileflags and options.fileflags[files] then -- if displaying this image
image = modifyImage(image, "thumb")
image = modifyImage(image, fileArgs)
fileText = fileText .. image
end
end
end
end
else -- the next token in text is not a template
token = parseImage(text, true)
if token then -- the next token in text looks like an image
if files < maxfile and checkImage(token) then -- if more images are wanted and this is a wanted image
files = files + 1
if options.fileflags and options.fileflags[files] then
local image = token -- copy token for manipulation by adding |right etc. without changing the original
image = modifyImage(image, fileArgs)
fileText = fileText .. image
end
end
else -- got a paragraph, which ends at a file, image, blank line or end of text
local afterEnd = mw.ustring.len(text) + 1
local blankPosition = mw.ustring.find(text, "\n%s*\n") or afterEnd -- position of next paragraph delimiter (or end of text)
local endPosition = math.min( -- find position of whichever comes first: [[File:, [[Image: or paragraph delimiter
mw.ustring.find(text, "%[%[%s*[Ff]ile%s*:") or afterEnd,
mw.ustring.find(text, "%[%[%s*[Ii]mage%s*:") or afterEnd,
blankPosition)
token = mw.ustring.sub(text, 1, endPosition-1)
if blankPosition < afterEnd and blankPosition == endPosition then -- paragraph ends with a blank line
token = token .. mw.ustring.match(text, "\n%s*\n", blankPosition)
end
local isHatnote = not(leadStart) and mw.ustring.sub(token, 1, 1) == ':'
if not isHatnote then
leadStart = leadStart or mw.ustring.len(t) + 1 -- we got a paragraph, so mark the start of the lead section
paras = paras + 1
if allParagraphs or (options.paraflags and options.paraflags[paras]) then t = t .. token end -- add if this paragraph wanted
end
end -- of "else got a paragraph"
end -- of "else not a template"
if token then text = mw.ustring.sub(text, mw.ustring.len(token)+1) end -- remove parsed token from remaining text
startLine = mw.ustring.find(token, "\n%s*$") -- will the next token be the first non-space on a line?
until not text or text == "" or not token or token == "" -- loop until all text parsed
text = mw.ustring.gsub(t, "\n+$", "") -- remove trailing line feeds, so "{{Transclude text excerpt|Foo}} more" flows on one line
return fileText, text
end
local function cleanupText(text, options)
text = mw.ustring.gsub(text, "<!%-%-.-%-%->","") -- remove HTML comments
text = mw.ustring.gsub(text, "<[Nn][Oo][Ii][Nn][Cc][Ll][Uu][Dd][Ee]>.-</[Nn][Oo][Ii][Nn][Cc][Ll][Uu][Dd][Ee]>", "") -- remove noinclude bits
if mw.ustring.find(text, "[Oo][Nn][Ll][Yy][Ii][Nn][Cc][Ll][Uu][Dd][Ee]") then -- avoid expensive search if possible
text = mw.ustring.gsub(text, "</[Oo][Nn][Ll][Yy][Ii][Nn][Cc][Ll][Uu][Dd][Ee]>.-<[Oo][Nn][Ll][Yy][Ii][Nn][Cc][Ll][Uu][Dd][Ee]>", "") -- remove text between onlyinclude sections
text = mw.ustring.gsub(text, "^.-<[Oo][Nn][Ll][Yy][Ii][Nn][Cc][Ll][Uu][Dd][Ee]>", "") -- remove text before first onlyinclude section
text = mw.ustring.gsub(text, "</[Oo][Nn][Ll][Yy][Ii][Nn][Cc][Ll][Uu][Dd][Ee]>.*", "") -- remove text after last onlyinclude section
end
if not is(options.keepSubsections) then
text = mw.ustring.gsub(text, "\n==.*","") -- remove first ==Heading== and everything after it
text = mw.ustring.gsub(text, "^==.*","") -- ...even if the lead is empty
end
if not is(options.keepRefs) then
text = mw.ustring.gsub(text, "<%s*[Rr][Ee][Ff][^>]-/%s*>", "") -- remove refs cited elsewhere
text = mw.ustring.gsub(text, "<%s*[Rr][Ee][Ff].->.-<%s*/%s*[Rr][Ee][Ff]%s*>", "") -- remove refs
text = mw.ustring.gsub(text, "%b{}", stripTemplate) -- remove unwanted templates such as references
end
text = mw.ustring.gsub(text, "<%s*[Ss][Cc][Oo][Rr][Ee].->.-<%s*/%s*[Ss][Cc][Oo][Rr][Ee]%s*>", "") -- remove musical scores
text = mw.ustring.gsub(text, "<%s*[Ii][Mm][Aa][Gg][Ee][Mm][Aa][Pp].->.-<%s*/%s*[Ii][Mm][Aa][Gg][Ee][Mm][Aa][Pp]%s*>", convertImageMap) -- convert imagemaps into standard images
text = mw.ustring.gsub(text, "%s*{{%s*[Tt][Oo][Cc].-}}", "") -- remove most common tables of contents
text = mw.ustring.gsub(text, "%s*__[A-Z]*TOC__", "") -- remove TOC behavior switches
text = mw.ustring.gsub(text, "\n%s*{{%s*[Pp]p%-.-}}", "\n") -- remove protection templates
text = mw.ustring.gsub(text, "%s*{{[^{|}]*[Ss]idebar%s*}}", "") -- remove most sidebars
text = mw.ustring.gsub(text, "%s*{{[^{|}]*%-[Ss]tub%s*}}", "") -- remove most stub templates
text = mw.ustring.gsub(text, "%s*%[%[%s*:?[Cc]ategory:.-%]%]", "") -- remove categories
text = mw.ustring.gsub(text, "^:[^\n]+\n","") -- remove DIY hatnote indented with a colon
return text
end
-- Parse a ==Section== from a page
local function getSection(text, section, mainOnly)
local escapedSection = mw.ustring.gsub(mw.uri.decode(section), "([%^%$%(%)%%%.%[%]%*%+%-%?])", "%%%1") -- %26 → & etc, then ^ → %^ etc.
local level, content = mw.ustring.match(text .. "\n", "\n(==+)%s*" .. escapedSection .. "%s*==.-\n(.*)")
if not content then return nil end -- no such section
local nextSection
if mainOnly then
nextSection = "\n==.*" -- Main part of section terminates at any level of header
else
nextSection = "\n==" .. mw.ustring.rep("=?", #level - 2) .. "[^=].*" -- "===" → "\n===?[^=].*", matching "==" or "===" but not "===="
end
content = mw.ustring.gsub(content, nextSection, "") -- remove later sections with headings at this level or higher
return content
end
-- Remove unmatched <tag> or </tag> tags
local function fixTags(text, tag)
local startCount = 0
for i in mw.ustring.gmatch(text, "<%s*" .. tag .. "%f[^%w_].->") do startCount = startCount + 1 end
local endCount = 0
for i in mw.ustring.gmatch(text, "<%s*/" .. tag .. "%f[^%w_].->") do endCount = endCount + 1 end
if startCount > endCount then -- more <tag> than </tag>: remove the last few <tag>s
local i = 0
text = mw.ustring.gsub(text, "<%s*" .. tag .. "%f[^%w_].->", function(t)
i = i + 1
if i > endCount then return "" else return nil end
end) -- "end" here terminates the anonymous replacement function(t) passed to gsub
elseif endCount > startCount then -- more </tag> than <tag>: remove the first few </tag>s
text = mw.ustring.gsub(text, "<%s*/" .. tag .. "%f[^%w_].->", "", endCount - startCount)
end
return text
end
-- Main function returns a string value: text of the lead of a page
local function main(pageNames, options)
if not pageNames or #pageNames < 1 then return err("No page names given") end
local pageName
local text
local pageCount = #pageNames
local firstPage = pageNames[1] or "(nil)" -- save for error message, as it the name will be deleted
local gotOptions
local pageOptionsString
local section
-- read the page, or a random one if multiple pages were provided
if pageCount > 1 then math.randomseed(os.time()) end
while not text and pageCount > 0 do
local pageNumber = 1
if pageCount > 1 then pageNumber = math.random(pageCount) end -- pick a random title
pageName = pageNames[pageNumber]
if pageName and pageName ~= "" then
-- We have page or [[page]] or [[page|text]], possibly followed by |opt1|opt2...
local pn
pn, gotOptions, pageOptionsString = mw.ustring.match(pageName, "^%s*(%[%b[]%])%s*(|?)(.*)")
if pn then
pageName = mw.ustring.match(pn, "%[%[([^|%]]*)") -- turn [[page|text]] into page, discarding text
else -- we have page or page|opt...
pageName, gotOptions, pageOptionsString = mw.ustring.match(pageName, "%s*([^|]*[^|%s])%s*(|?)(.*)")
end
if pageName and pageName ~= "" then
local pn
pn, section = mw.ustring.match(pageName, "(.-)#(.*)")
pageName = pn or pageName
text, normalisedPageName = getContent(pageName)
if is(options.fragment) then
local frame = mw.getCurrentFrame()
text = frame:callParserFunction('#lst', normalisedPageName, options.fragment)
end
if not normalisedPageName then
return err("No title for page name " .. pageName)
else
pageName = normalisedPageName
end
if text and options.nostubs then
local isStub = mw.ustring.find(text, "%s*{{[^{|}]*%-[Ss]tub%s*}}")
if isStub then text = nil end
end
if not section then
section = mw.ustring.match(pageName, ".-#(.*)") -- parse redirect to Page#Section
end
if text and section and section ~= "" then text = getSection(text, section) end
end
end
if not text then table.remove(pageNames, pageNumber) end -- this one didn't work; try another
pageCount = pageCount - 1 -- ensure that we exit the loop after at most #pageNames iterations
end
if not text then return err("Cannot read a valid page: first name is " .. firstPage) end
text = cleanupText(text, options)
local pageOptions = {} -- pageOptions (even if value is "") have priority over global options
for k, v in pairs(options) do pageOptions[k] = v end
if gotOptions and gotOptions ~= "" then
for _, t in pairs(mw.text.split(pageOptionsString, "|")) do
local k, v = mw.ustring.match(t, "%s*([^=]-)%s*=(.-)%s*$")
pageOptions[k] = v
end
pageOptions.paraflags = numberFlags(pageOptions["paragraphs"] or "") -- parse paragraphs, e.g. "1,3-5" → {"1","3-5"}
pageOptions.fileflags = numberFlags(pageOptions["files"] or "") -- parse file numbers
if pageOptions.more and pageOptions.more == "" then pageOptions.more = "Read more..." end -- more= is short for this default text
end
local fileText
fileText, text = parse(text, pageOptions)
-- replace the bold title or synonym near the start of the article by a wikilink to the article
local lang = mw.language.getContentLanguage()
local pos = mw.ustring.find(text, "'''" .. lang:ucfirst(pageName) .. "'''", 1, true) -- look for "'''Foo''' is..." (uc) or "A '''foo''' is..." (lc)
or mw.ustring.find(text, "'''" .. lang:lcfirst(pageName) .. "'''", 1, true) -- plain search: special characters in pageName represent themselves
if pos then
local len = mw.ustring.len(pageName)
text = mw.ustring.sub(text, 1, pos + 2) .. "[[" .. mw.ustring.sub(text, pos + 3, pos + len + 2) .. "]]" .. mw.ustring.sub(text, pos + len + 3, -1) -- link it
else -- look for anything unlinked in bold, assumed to be a synonym of the title (e.g. a person's birth name)
text = mw.ustring.gsub(text, "()'''(.-'*)'''", function(a, b)
if a < 100 and not mw.ustring.find(b, "%[") then ---if early in article and not wikilinked
return "'''[[" .. pageName .. "|" .. b .. "]]'''" -- replace '''Foo''' by '''[[pageName|Foo]]'''
else
return nil -- instruct gsub to make no change
end
end, 1) -- "end" here terminates the anonymous replacement function(a, b) passed to gsub
end
-- remove '''bold text''' if requested
if is(pageOptions.nobold) then text = mw.ustring.gsub(text, "'''", "") end
text = fileText .. text
-- Seek and destroy unterminated templates and wikilinks
repeat -- hide matched {{template}}s including nested templates
local t = text
text = mw.ustring.gsub(text, "{(%b{})}", "\27{\27%1\27}\27") -- {{sometemplate}} → E{Esometemplate}E}E where E represents escape
text = mw.ustring.gsub(text, "(< *math[^>]*>[^<]-)}}(.-< */math *>)", "%1}\27}\27%2") -- <math>\{sqrt\{hat{x}}</math> → <math>\{sqrt\{hat{x}E}E</math>
until text == t
repeat -- do similar for [[wikilink]]s
local t = text
text = mw.ustring.gsub(text, "%[(%b[])%]", "\27[\27%1\27]\27")
until text == t
text = text.gsub(text, "([{}%[%]])%1[^\27].*", "") -- remove unmatched {{, }}, [[ or ]] and everything thereafter, avoiding ]E]E etc.
text = text.gsub(text, "([{}%[%]])%1$", "") -- remove unmatched {{, }}, [[ or ]] at end of text
text = mw.ustring.gsub(text, "\27", "") -- unhide matched pairs: E{E{ → {{, ]E]E → ]], etc.
-- Ensure div tags match
text = fixTags(text, "div")
if pageOptions.more then text = text .. " '''[[" .. pageName .. "|" .. pageOptions.more .. "]]'''" end -- wikilink to article for more info
if pageOptions.list and not pageOptions.showall then -- add a collapsed list of pages which might appear
local listtext = pageOptions.list
if listtext == "" then listtext = "Other articles" end
text = text .. "{{collapse top|title={{resize|85%|" ..listtext .. "}}|bg=fff}}{{hlist"
for _, p in pairs(pageNames) do
if mw.ustring.match(p, "%S") then text = text .. "|[[" .. mw.text.trim(p) .. "]]" end
end
text = text .. "}}\n{{collapse bottom}}"
end
return text
end
-- Shared template invocation code for lead and random functions
local function invoke(frame, template)
-- args = { 1,2,... = page names, paragraphs = list e.g. "1,3-5", files = list, more = text}
local args = {} -- args[k] = frame.args[k] or frame:getParent().args[k] for all k in either (numeric or not)
for k, v in pairs(frame:getParent().args) do args[k] = v end
for k, v in pairs(frame.args) do args[k] = v end -- args from a Lua call have priority over parent args from template
errors = args["errors"] -- set the module level boolean used in local function err
local articleCount = #args -- must be 1 except with selected=Foo and Foo=Somepage
if articleCount < 1 and not (template == "selected" and args[template] and args[args[template]]) then
return err("No articles provided")
end
local pageNames = {}
if template == "lead" then
pageNames = { args[1] }
elseif template == "linked" or template == "listitem" then
-- Read named page and find its wikilinks
local page = args[1]
local text, title = getContent(page)
if not title then
return err("No title for page name " .. page)
elseif not text then
return err("No content for page name " .. page)
end
if args["section"] then -- check relevant section only
text = getSection(text, args["section"], args["sectiononly"])
if not text then return err("No section " .. args["section"] .. " in page " .. page) end
end
-- replace annotated links with real links
text = mw.ustring.gsub(text, "{{%s*[Aa]nnotated[ _]link%s*|%s*(.-)%s*}}", "[[%1]]")
if template == "linked" then
for p in mw.ustring.gmatch(text, "%[%[%s*([^%]|\n]*)") do table.insert(pageNames, p) end
else -- listitem: first wikilink on a line beginning *, :#, etc. except in "See also" or later section
text = mw.ustring.gsub(text, "\n== *See also.*", "")
for p in mw.ustring.gmatch(text, "\n:*[%*#][^\n]-%[%[%s*([^%]|\n]*)") do table.insert(pageNames, p) end
end
elseif template == "random" then
-- accept any number of page names. If more than one, we'll pick one randomly
for i, p in pairs(args) do
if p and type(i) == 'number' then table.insert(pageNames, p) end
end
elseif template == "selected" then
local articleKey = args[template]
if tonumber(articleKey) then -- normalise article number into the range 1..#args
articleKey = articleKey % articleCount
if articleKey == 0 then articleKey = articleCount end
end
pageNames = { args[articleKey] }
end
local options = args -- pick up miscellaneous options: more, errors, fileargs
options.paraflags = numberFlags(args["paragraphs"] or "") -- parse paragraphs, e.g. "1,3-5" → {"1","3-5"}
options.fileflags = numberFlags(args["files"] or "") -- parse file numbers
if options.more and options.more == "" then options.more = "Read more..." end -- more= is short for this default text
local text = ""
if options.showall then
local separator = ""
for _, p in pairs(pageNames) do
local t = main({ p }, options)
if t ~= "" then
text = text .. separator .. t
separator = options.showall
if separator == "" then separator = "{{clear}}{{hr}}" end
end
end
else
text = main(pageNames, options)
end
if text == "" and d.brokenCategory and d.brokenCategory ~= "" and mw.title.getCurrentTitle().isContentPage then
return "[[Category:" .. d.brokenCategory .. "]]"
else
return frame:preprocess(text)
end
end
-- Replicate {{Excerpt}} entirely in Lua for reduced Post-expand include size
local function excerpt(frame)
local args = {} -- args[k] = frame.args[k] or frame:getParent().args[k] for all k in either (numeric or not)
for k, v in pairs(frame:getParent().args) do args[k] = v end
for k, v in pairs(frame.args) do args[k] = v end -- args from a Lua call have priority over parent args from template
local tag = is(args.tag) and args.tag or 'div'
local article = is(args.article) and args.article or args[1] or '{{{1}}}'
local section = is(args.section) and args.section or args[2]
local output = {}
output[1] = frame:extensionTag{ name = 'templatestyles', args = {src='Excerpt/styles.css'} }
output[2] = '<' .. tag .. ' class="excerpt-block">'
output[3] = is(args.indicator) and ('<' .. tag .. ' class="excerpt-indicator">') or ''
if is(args.nohat) then
output[4] = ''
else
local hatnote = {}
hatnote[1] = 'This' .. (is(args.indicator) and '' or ' section') .. ' is an excerpt from '
hatnote[2] = '[['
hatnote[3] = article .. (is(section) and ('#' .. frame:callParserFunction( 'urlencode', section, 'WIKI' )) or '')
hatnote[4] = '|'
hatnote[5] = article .. (is(section) and (frame:callParserFunction( '#tag:nowiki', ' § ' ) .. section) or '')
hatnote[6] = ']]'
hatnote[7] = "''" .. '<span class="mw-editsection-like plainlinks"><span>[ </span>['
local title = mw.title.new(article) or mw.title.getCurrentTitle()
hatnote[8] = title:fullUrl('action=edit') .. ' edit'
hatnote[9] = ']<span> ]</span></span>' .. "''"
output[4] = require('Module:Hatnote')._hatnote(table.concat(hatnote), {selfref=true}) or err("Error generating hatnote")
end
output[5] = '<' .. tag .. ' class="excerpt">\n'
if article ~= '{{{1}}}' then
local options = args -- turn template arguments into module options
options.paraflags = args.paragraphs
options.fileflags = args.files or 1
options.nobold = 1
options.fragment = args.fragment
options.keepTables = args.tables or 1
options.keepRefs = args.references or 1
options.keepSubsections = args.subsections
local pageNames = { (article .. '#' .. (section or '')) }
local text = main(pageNames, options)
if text == "" and d.brokenCategory and d.brokenCategory ~= "" and mw.title.getCurrentTitle().isContentPage then
output[6] = "[[Category:" .. d.brokenCategory .. "]]"
else
output[6] = frame:preprocess(text) or err("Error processing text")
end
else
output[6] = err("No article provided")
end
output[7] = '</' .. tag .. '>'
output[8] = is(args.indicator) and ('</' .. tag .. '>') or ''
output[9] = '</' .. tag .. '>'
output[10] = mw.title.getCurrentTitle().isContentPage and '[[Category:Articles with excerpts]]' or ''
return table.concat(output)
end
-- Entry points for template callers using #invoke:
function p.lead(frame) return invoke(frame, "lead") end -- {{Transclude lead excerpt}} reads the first and only article
function p.linked(frame) return invoke(frame, "linked") end -- {{Transclude linked excerpt}} reads a randomly selected article linked from the given page
function p.listitem(frame) return invoke(frame, "listitem") end -- {{Transclude list item excerpt}} reads a randomly selected article listed on the given page
function p.random(frame) return invoke(frame, "random") end -- {{Transclude random excerpt}} reads any article (default for invoke with one argument)
function p.selected(frame) return invoke(frame, "selected") end -- {{Transclude selected excerpt}} reads the article whose key is in the selected= parameter
function p.excerpt(frame) return excerpt(frame) end -- {{Excerpt}} transcludes part of an article into another article
-- Entry points for other Lua modules
function p.getContent(page, frame) return getContent(page, frame) end
function p.getsection(text, section) return getSection(text, section) end
function p.parse(text, options, filesOnly) return parse(text, options, filesOnly) end
function p.argimage(text) return argImage(text) end
function p.checkimage(image) return checkImage(image) end
function p.parseimage(text, start) return parseImage(text, start) end
function p.cleanupText(text, options) return cleanupText(text, options) end
function p.main(pageNames, options) return main(pageNames, options) end
function p.numberflags(str) return numberFlags(str) end
return p