Module:Excerpt/sandbox

This is the module sandbox page for Module:Excerpt (diff).
See also the companion subpage for test cases.
-- Module:Excerpt implements the Excerpt template
-- Documentation and master version: https://en.wikipedia.org/wiki/Module:Excerpt
-- Authors: User:Sophivorus, User:Certes, User:Aidan9382 & others
-- License: CC-BY-SA-3.0

local parser = require( 'Module:WikitextParser' )
local yesno = require( 'Module:Yesno' )

local ok, config = pcall( require, 'Module:Excerpt/config/sandbox' )
if not ok then config = {} end

local Excerpt = {}

-- Main entry point for templates
function Excerpt.main( frame )

	-- Make sure the requested page exists
	local page = Excerpt.getArg( 1 )
	if not page or page == '{{{1}}}' then return Excerpt.getError( 'no-page' ) end
	local title = mw.title.new( page )
	if not title then return Excerpt.getError( 'invalid-title', page ) end
	if title.isRedirect then title = title.redirectTarget end
	if not title.exists then return Excerpt.getError( 'page-not-found', page ) end
	page = title.prefixedText

	-- Set variables from the template parameters
	local hash = string.match( Excerpt.getArg( 1 ), '[^#]+#(.+)' )
	local section = Excerpt.getArg( 2, hash )
	local hat = yesno( Excerpt.getArg( 'hat', true ) )
	local edit = yesno( Excerpt.getArg( 'edit', true ) )
	local editIntro = Excerpt.getArg( 'editintro' )
	local this = Excerpt.getArg( 'this' )
	local only = Excerpt.getArg( 'only' )
	local files = Excerpt.getArg( 'files', Excerpt.getArg( 'file', ( only == 'file' and 1 ) ) )
	local lists = Excerpt.getArg( 'lists', Excerpt.getArg( 'list', ( only == 'list' and 1 ) ) )
	local tables = Excerpt.getArg( 'tables', Excerpt.getArg( 'table', ( only == 'table' and 1 ) ) )
	local templates = Excerpt.getArg( 'templates', Excerpt.getArg( 'template', ( only == 'template' and 1 ) ) )
	local paragraphs = Excerpt.getArg( 'paragraphs', Excerpt.getArg( 'paragraph', ( only == 'paragraph' and 1 ) ) )
	local references = yesno( Excerpt.getArg( 'references', true ) )
	local subsections = yesno( Excerpt.getArg( 'subsections', false ) )
	local links = yesno( Excerpt.getArg( 'links', true ) )
	local bold = yesno( Excerpt.getArg( 'bold', false ) )
	local briefDates = yesno( Excerpt.getArg( 'briefdates', false ) )
	local inline = yesno( Excerpt.getArg( 'inline' ) )
	local quote = yesno( Excerpt.getArg( 'quote' ) )
	local more = yesno( Excerpt.getArg( 'more' ) )
	local class = Excerpt.getArg( 'class' )
	local displayTitle = Excerpt.getArg( 'displaytitle', page )

	-- Get the full wikitext
	local wikitext = title:getContent()

	-- Reduce to the section we're interested in
	if section then
		excerpt = parser.getSectionTag( wikitext, section )
		if not excerpt then
			if subsections then
				excerpt = parser.getSection( wikitext, section )
			else
				local sections = parser.getSections( wikitext )
				excerpt = sections[ section ]
			end
		end
		if not excerpt then return Excerpt.getError( 'section-not-found', section ) end
		if excerpt == '' and not only then return Excerpt.getError( 'section-empty', section ) end
	else
		excerpt = parser.getLead( wikitext )
		if excerpt == '' and not only then return Excerpt.getError( 'lead-empty' ) end
	end

	-- Leave only the requested elements
	if only then
		if only == 'table' then
			local tables = parser.getTables( excerpt )
			excerpt = tables[1]
		end
		if only == 'tables' then
			local tables = parser.getTables( excerpt )
			excerpt = table.concat( tables, '\n' )
		end
	end

	if briefDates then
		excerpt = Excerpt.fixDates( excerpt )
	end

	-- If no file was found, try to get one from the infobox
	if ( only == 'file' or only == 'files' ) or ( not only and ( files ~= '0' or not files ) ) -- caller asked for files
		and config.captions -- and we have the config option required to try finding files in infoboxes
		and #parser.getFiles( excerpt ) == 0 -- and there are no files in the excerpt
	then
		excerpt = Excerpt.addInfoboxFile( excerpt )
	end

	-- Remove unwanted elements
	excerpt = Excerpt.removeComments( excerpt )
	excerpt = Excerpt.removeBlacklist( excerpt )
	excerpt = Excerpt.removeSelfLinks( excerpt )
	excerpt = Excerpt.removeNonFreeFiles( excerpt )
	excerpt = Excerpt.removeBehaviorSwitches( excerpt )

	-- Remove wikilinks
	if not links then
		excerpt = Excerpt.removeLinks( excerpt )
	end

	-- Link the bold text and then remove it
	excerpt = Excerpt.linkBold( excerpt, page )
	if not bold then
		excerpt = Excerpt.removeBold( excerpt )
	end

	if references then
		excerpt = Excerpt.fixReferences( excerpt, page, wikitext )
	else
		excerpt = Excerpt.removeReferences( excerpt )
	end

	-- Remove extra line breaks but leave one before and after so the parser interprets lists, tables, etc. correctly
	excerpt = mw.text.trim( excerpt )
	excerpt = string.gsub( excerpt, '\n\n\n+', '\n\n' )
	excerpt = '\n' .. excerpt .. '\n'

	-- Remove nested categories
	excerpt = frame:preprocess( excerpt )
	excerpt = Excerpt.removeCategories( excerpt )

	-- Add tracking categories
	if config.categories then
		local currentTitle = mw.title.getCurrentTitle()
		local contentCategory = config.categories.content
		if contentCategory and currentTitle.isContentPage then
			excerpt = excerpt .. '[[Category:' .. contentCategory .. ']]'
		end
		local namespaceCategory = config.categories[ currentTitle.namespace ]
		if namespaceCategory then
			excerpt = excerpt .. '[[Category:' .. namespaceCategory .. ']]'
		end
	end

	-- Combine and return the elements
	if inline then
		return mw.text.trim( excerpt )
	end

	local tag = quote and 'blockquote' or 'div'
	local block = mw.html.create( tag ):addClass( 'excerpt-block' ):addClass( class )

	if config.styles then
		styles = frame:extensionTag( 'templatestyles', '', { src = config.styles } )
		block:node( styles )
	end

	if hat then
		hat = Excerpt.getHat( page, section, displayTitle, this, quote, only, edit, editIntro )
		block:node( hat )
	end

	excerpt = mw.html.create( 'div' ):addClass( 'excerpt' ):wikitext( excerpt )
	block:node( excerpt )

	if more then
		more = Excerpt.getReadMore( page, section )
		block:node( more )
	end

	return block
end

function Excerpt.addInfoboxFile( excerpt )
	-- We cannot distinguish the infobox from the other templates, so we search them all
	local templates = parser.getTemplates( excerpt )
	for _, template in pairs( templates ) do
		local parameters = parser.getTemplateParameters( template )
		local file, captions, caption, cssClasses, cssClass
		for _, pair in pairs( config.captions ) do
			file = pair[1]
			file = parameters[file]
			if file and Excerpt.matchAny( file, '^.*%.', { '[Jj][Pp][Ee]?[Gg]', '[Pp][Nn][Gg]', '[Gg][Ii][Ff]', '[Ss][Vv][Gg]' }, '.*' ) then
				file = mw.ustring.match( file, '%[?%[?.-:([^{|]+)%]?%]?' ) or file -- [[File:Example.jpg{{!}}upright=1.5]] to Example.jpg
				captions = pair[2]
				for _, p in pairs( captions ) do
					if parameters[ p ] then caption = parameters[ p ] break end
				end
				-- Check for CSS classes
				-- We opt to use skin-invert-image instead of skin-invert
				-- in all other cases, the CSS provided in the infobox is used
				if pair[3] then
					cssClasses = pair[3]
					for _, p in pairs( cssClasses ) do
						if parameters[ p ] then
							cssClass = ( parameters[ p ] == 'skin-invert' ) and 'skin-invert-image' or parameters[ p ]
							break
						end
					end
				end
				local class = cssClass and ( '|class=' .. cssClass ) or ''
				return '[[File:' .. file .. class .. '|thumb|' .. ( caption or '' ) .. ']]' .. excerpt
			end
		end
	end
	return excerpt
end

function Excerpt.removeNonFreeFiles( wikitext )
	local files = parser.getFiles( wikitext )
	for _, file in pairs( files ) do
		local fileName = 'File:' .. parser.getFileName( file )
		if true then return fileName end
		local fileTitle = mw.title.new( fileName )
		local fileDescription = fileTitle:getContent()
		if not fileDescription or fileDescription == '' then
			local frame = mw.getCurrentFrame()
			fileDescription = frame:preprocess( '{{' .. fileName .. '}}' ) -- try Commons
		end
		if fileDescription and string.match( fileDescription, '[Nn]on%-free' ) then
			wikitext = Excerpt.removeString( wikitext, file )
		end
	end
	return wikitext
end

function Excerpt.getHat( page, section, displayTitle, this, quote, only, edit, editIntro )
	local hat

	-- Build the main part of the hatnote
	if this then
		hat = this
	elseif quote then
		hat = Excerpt.getMessage( 'this' )
	elseif only then
		hat = Excerpt.getMessage( only )
	else
		hat = Excerpt.getMessage( 'section' )
	end
	hat = hat .. ' ' .. Excerpt.getMessage( 'excerpt' )

	-- Build the section link
	if section then
		hat = hat .. ' [[:' .. page .. '#' .. mw.uri.anchorEncode( section ) .. '|' .. displayTitle
			.. ' § ' .. mw.ustring.gsub( section, '%[%[([^]|]+)|?[^]]*%]%]', '%1' ) .. ']].' -- remove nested links
	else
		hat = hat .. ' [[:' .. page .. '|' .. displayTitle .. ']].'
	end

	-- Build the edit link
	if edit then
		local title = mw.title.new( page )
		local editUrl = title:fullUrl( 'action=edit' )
		if editIntro then
			editUrl = title:fullUrl( 'action=edit&editintro=' .. editIntro )
		end
		hat = hat .. '<span class="mw-editsection-like plainlinks"><span class="mw-editsection-bracket">[</span>['
		hat = hat .. editUrl .. ' ' .. mw.message.new( 'editsection' ):plain()
		hat = hat .. ']<span class="mw-editsection-bracket">]</span></span>'
	end

	if config.hat then
		local frame = mw.getCurrentFrame()
		hat = config.hat .. hat .. '}}'
		hat = frame:preprocess( hat )
	else
		hat = mw.html.create( 'div' ):addClass( 'dablink excerpt-hat' ):wikitext( hat )
	end

	return hat
end

function Excerpt.getReadMore( page, section )
	local link = "'''[[" .. page
	if section then
		link = link .. '#' .. section
	end
	local text = Excerpt.getMessage( 'more' )
	link = link .. '|' .. text .. "]]'''"
	link = mw.html.create( 'div' ):addClass( 'noprint excerpt-more' ):wikitext( link )
	return link
end

-- Fix birth and death dates, but only in the first paragraph
-- @todo Use parser.getParagraphs() to get the first paragraph
function Excerpt.fixDates( excerpt )
	local startpos = 1 -- skip initial templates
	local s
	local e = 0
	repeat
		startpos = e + 1
		s, e = mw.ustring.find( excerpt, "%s*%b{}%s*", startpos )
	until not s or s > startpos
	s, e = mw.ustring.find( excerpt, "%b()", startpos ) -- get (...), which may be (year–year)
	if s and s < startpos + 100 then -- look only near the start
		local year1, conjunction, year2 = mw.ustring.match( mw.ustring.sub( excerpt, s, e ), '(%d%d%d+)(.-)(%d%d%d+)' )
		if year1 and year2 and ( mw.ustring.match( conjunction, '[%-–—]' ) or mw.ustring.match( conjunction, '{{%s*[sS]nd%s*}}' ) ) then
			local y1 = tonumber( year1 )
			local y2 = tonumber( year2 )
			if y2 > y1 and y2 < y1 + 125 and y1 <= tonumber( os.date( "%Y" )) then
				excerpt = mw.ustring.sub( excerpt, 1, s ) .. year1 .. "–" .. year2 .. mw.ustring.sub( excerpt, e )
			end
		end
	end
	return excerpt
end

-- Replace the first call to each reference defined outside of the text for the full reference, to prevent undefined references
-- Then prefix the page title to the reference names to prevent conflicts
-- that is, replace <ref name="Foo"> for <ref name="Title of the article Foo">
-- and also <ref name="Foo" /> for <ref name="Title of the article Foo" />
-- also remove reference groups: <ref name="Foo" group="Bar"> for <ref name="Title of the article Foo">
-- and <ref group="Bar"> for <ref>
-- @todo The current regex may fail in cases with both kinds of quotes, like <ref name="Darwin's book">
function Excerpt.fixReferences( excerpt, page, wikitext )
	local refNames = {}
	local refName
	local refBody
	local position = 1
	while position < mw.ustring.len( excerpt ) do
		refName, position = mw.ustring.match( excerpt, '<%s*[Rr][Ee][Ff][^>]*name%s*=%s*["\']?([^"\'>]+)["\']?[^>]*/%s*>()', position )
		if refName then
			refName = mw.text.trim( refName )
			if not refNames[ refName ] then -- make sure we process each ref name only once
				table.insert( refNames, refName )
				refName = Excerpt.escapeString( refName )
				refBody = mw.ustring.match( excerpt, '<%s*[Rr][Ee][Ff][^>]*name%s*=%s*["\']?%s*' .. refName .. '%s*["\']?[^>/]*>.-<%s*/%s*[Rr][Ee][Ff]%s*>' )
				if not refBody then -- the ref body is not in the excerpt
					refBody = mw.ustring.match( wikitext, '<%s*[Rr][Ee][Ff][^>]*name%s*=%s*["\']?%s*' .. refName .. '%s*["\']?[^/>]*>.-<%s*/%s*[Rr][Ee][Ff]%s*>' )
					if refBody then -- the ref body was found elsewhere
						excerpt = mw.ustring.gsub( excerpt, '<%s*[Rr][Ee][Ff][^>]*name%s*=%s*["\']?%s*' .. refName .. '%s*["\']?[^>]*/?%s*>', mw.ustring.gsub( refBody, '%%', '%%%%' ), 1 )
					end
				end
			end
		else
			position = mw.ustring.len( excerpt )
		end
	end
	page = string.gsub( page, '"', '' ) -- remove any quotation marks from the page title
	excerpt = mw.ustring.gsub( excerpt, '<%s*[Rr][Ee][Ff][^>]*name%s*=%s*["\']?([^"\'>/]+)["\']?[^>/]*(/?)%s*>', '<ref name="' .. page .. ' %1"%2>' )
	excerpt = mw.ustring.gsub( excerpt, '<%s*[Rr][Ee][Ff]%s*group%s*=%s*["\']?[^"\'>/]+["\']%s*>', '<ref>' )
	return excerpt
end

-- Remove blacklisted templates
function Excerpt.removeBlacklist( excerpt )
	local blacklist = config.blacklist and table.concat( config.blacklist, ',' ) or ''
	local filters = Excerpt.parseFilter( blacklist )
	for _, template in pairs( parser.getTemplates( excerpt ) ) do
		local templateName = parser.getTemplateName( template )
		if Excerpt.matchFilter( templateName, filters ) then
			excerpt = Excerpt.removeString( excerpt, template )
		end
	end
	return excerpt
end

function Excerpt.removeReferences( excerpt )
	local references = parser.getReferences( excerpt )
	for _, reference in pairs( references ) do
		excerpt = Excerpt.removeString( excerpt, reference )
	end
	return excerpt
end

function Excerpt.removeCategories( excerpt )
	local categories = parser.getCategories( excerpt )
	for _, category in pairs( categories ) do
		excerpt = Excerpt.removeString( excerpt, category )
	end
	return excerpt
end

function Excerpt.removeBold( excerpt )
	return string.gsub( excerpt, "'''", '' )
end

function Excerpt.removeBehaviorSwitches( excerpt )
	return string.gsub( excerpt, '__[A-Z]+__', '' )
end

function Excerpt.removeComments( excerpt )
	return string.gsub( excerpt, '<!%-%-.-%-%->', '' )
end

function Excerpt.removeBold( excerpt )
	return string.gsub( excerpt, "'''", '' )
end

function Excerpt.removeLinks( excerpt )
	local links = parser.getLinks( excerpt )
	for _, link in pairs( links ) do
		excerpt = Excerpt.removeString( excerpt, link )
	end
	return excerpt
end

-- @todo Use parser.getLinks
function Excerpt.removeSelfLinks( excerpt, page )
	local lang = mw.language.getContentLanguage()
	local page = Excerpt.escapeString( mw.title.getCurrentTitle().prefixedText )
	local ucpage = lang:ucfirst( page )
	local lcpage = lang:lcfirst( page )
	excerpt = excerpt
		:gsub( '%[%[(' .. ucpage .. ')%]%]', '%1' )
		:gsub( '%[%[(' .. lcpage .. ')%]%]', '%1' )
		:gsub( '%[%[' .. ucpage .. '|([^]]+)%]%]', '%1' )
		:gsub( '%[%[' .. lcpage .. '|([^]]+)%]%]', '%1' )
	return excerpt
end

-- Replace the bold title or synonym near the start of the page by a link to the page
function Excerpt.linkBold( excerpt, page )
	local lang = mw.language.getContentLanguage()
	local position = mw.ustring.find( excerpt, "'''" .. lang:ucfirst( page ) .. "'''", 1, true ) -- look for "'''Foo''' is..." (uc) or "A '''foo''' is..." (lc)
		or mw.ustring.find( excerpt, "'''" .. lang:lcfirst( page ) .. "'''", 1, true ) -- plain search: special characters in page represent themselves
	if position then
		local length = mw.ustring.len( page )
		excerpt = mw.ustring.sub( excerpt, 1, position + 2 ) .. '[[' .. mw.ustring.sub( excerpt, position + 3, position + length + 2 ) .. ']]' .. mw.ustring.sub( excerpt, position + length + 3, -1 ) -- link it
	else -- look for anything unlinked in bold, assumed to be a synonym of the title (e.g. a person's birth name)
		excerpt = mw.ustring.gsub( excerpt, "()'''(.-'*)'''", function ( a, b )
			if not mw.ustring.find( b, '%[' ) and not mw.ustring.find( b, '%{' ) then -- if not wikilinked or some weird template
				return "'''[[" .. page .. '|' .. b .. "]]'''" -- replace '''Foo''' by '''[[page|Foo]]'''
			else
				return nil -- instruct gsub to make no change
			end
		end, 1 ) -- "end" here terminates the anonymous replacement function(a, b) passed to gsub
	end
	return excerpt
end

-- Helper method to match from a list of regular expressions
-- Like so: match pre..list[1]..post or pre..list[2]..post or ...
function Excerpt.matchAny( text, pre, list, post, init )
	local match = {}
	for i = 1, #list do
		match = { mw.ustring.match( text, pre .. list[ i ] .. post, init ) }
		if match[1] then return unpack( match ) end
	end
	return nil
end

-- Helper function to get arguments
-- args from Lua calls have priority over parent args from template
function Excerpt.getArg( key, default )
	local frame = mw.getCurrentFrame()
	for k, value in pairs( frame:getParent().args ) do
		if k == key and mw.text.trim( value ) ~= '' then
			return value
		end
	end
	for k, value in pairs( frame.args ) do
		if k == key and mw.text.trim( value ) ~= '' then
			return value
		end
	end
	return default
end

-- Helper method to get an error message
-- This method also categorizes the current page in one of the configured error categories
function Excerpt.getError( key, value )
	local message = Excerpt.getMessage( 'error-' .. key, value )
	local markup = mw.html.create( 'div' ):addClass( 'error' ):wikitext( message )
	if config.categories and config.categories.errors and mw.title.getCurrentTitle().isContentPage then
		markup:node( '[[Category:' .. config.categories.errors .. ']]' )
	end
	return markup
end

-- Helper method to get a localized message
-- This method uses Module:TNT to get localized messages from https://commons.wikimedia.org/wiki/Data:I18n/Module:Excerpt.tab
-- If Module:TNT is not available or the localized message does not exist, the key is returned instead
function Excerpt.getMessage( key, value )
	local ok, TNT = pcall( require, 'Module:TNT' )
	if not ok then return key end
	local ok2, message = pcall( TNT.format, 'I18n/Module:Excerpt.tab', key, value )
	if not ok2 then return key end
	return message
end

-- Helper method to escape a string for use in regexes
function Excerpt.escapeString( str )
	return str:gsub( '[%^%$%(%)%.%[%]%*%+%-%?%%]', '%%%0' )
end

-- Helper method to remove a string from a text
-- @param text Text where to search for the string to remove
-- @param str String to remove
-- @return The given text with the string removed
function Excerpt.removeString( text, str )
	local pattern = Excerpt.escapeString( str )
	if #pattern > 9999 then -- strings longer than 10000 bytes can't be put into regexes
		pattern = escapeString( mw.ustring.sub( str, 1, 999 ) ) .. '.-' .. escapeString( mw.ustring.sub( str, -999 ) )
	end
	return string.gsub( text, pattern, '' )
end

-- Helper method to convert a comma-separated list of numbers or min-max ranges into a list of booleans
-- @param filter Comma-separated list of numbers or min-max ranges, for example '1,3-5'
-- @return Map from integers to booleans, for example {1=true,2=false,3=true,4=true,5=true}
-- @return Boolean indicating whether the flags should be treated as a blacklist or not
-- @todo Merge with matchFilter
function Excerpt.parseFilter( value )
	local flags = {}
	local blacklist = false
	if not value then return nil, false end
	if string.sub( value, 1, 1 ) == '-' then
		blacklist = true
		value = string.sub( value, 2 )
	end
	local ranges = mw.text.split( value, ',' ) -- split ranges: '1,3-5' to {'1','3-5'}
	for _, range in pairs( ranges ) do
		range = mw.text.trim( range )
		local min, max = mw.ustring.match( range, '^(%d+)%s*[-–—]%s*(%d+)$' ) -- '3-5' to min=3 max=5
		if not max then min, max = string.match( range, '^((%d+))$' ) end -- '1' to min=1 max=1
		if max then
			for i = min, max do flags[ i ] = true end
		else
			flags[ range ] = true -- if we reach this point, the string had the form 'a,b,c' rather than '1,2,3'
		end
	end
	return flags, blacklist
end

-- Helper function to see if a value matches any of the given filters
function Excerpt.matchFilter( value, filters )
	if not value then return false end
	value = tostring( value )
	local lang = mw.language.getContentLanguage()
	local lcvalue = lang:lcfirst( value )
	local ucvalue = lang:ucfirst( value )
	for filter in pairs( filters ) do
		if value == tostring( filter )
		or lcvalue == filter
		or ucvalue == filter
		or ( not tonumber( filter ) and mw.ustring.match( value, filter ) ) then
			return true
		end
	end
end

-- Entry points for backwards compatibility
-- @todo Verify that no one uses them and remove them
function Excerpt.lead( frame ) return Excerpt.main( frame ) end
function Excerpt.excerpt( frame ) return Excerpt.main( frame ) end

return Excerpt
Portals