Jump to content

Module:Excerpt/sandbox

From Wikipedia, the free encyclopedia
This is an old revision of this page, as edited by Sophivorus (talk | contribs) at 13:57, 21 March 2025 (Begin MAJOR rewrite of this module to use WikitextParser instead of Transcluder, modularize the various chunks of code, and other changes. See talk page.). The present address (URL) is a permanent link to this revision, which may differ significantly from the current revision.
-- Module:Excerpt implements the Excerpt template
-- Documentation and master version: https://en.wikipedia.org/wiki/Module:Excerpt
-- Authors: User:Sophivorus, User:Certes, User:Aidan9382 & others
-- License: CC-BY-SA-3.0

local parser = require( 'Module:WikitextParser' )
local yesno = require( 'Module:Yesno' )

local ok, config = pcall( require, 'Module:Excerpt/config/sandbox' )
if not ok then config = {} end

local Excerpt = {}

-- Main entry point for templates
function Excerpt.main( frame )

	-- Make sure the requested page exists
	local page = Excerpt.getArg( 1 )
	if not page or page == '{{{1}}}' then return Excerpt.getError( 'no-page' ) end
	local title = mw.title.new( page )
	if not title then return Excerpt.getError( 'invalid-title', page ) end
	if title.isRedirect then title = title.redirectTarget end
	if not title.exists then return Excerpt.getError( 'page-not-found', page ) end
	page = title.prefixedText

	-- Set variables from the template parameters
	local hash = string.match( Excerpt.getArg( 1 ), '[^#]+#(.+)' )
	local section = Excerpt.getArg( 2, hash )
	local hat = yesno( Excerpt.getArg( 'hat', true ) )
	local edit = yesno( Excerpt.getArg( 'edit', true ) )
	local editIntro = Excerpt.getArg( 'editintro' )
	local this = Excerpt.getArg( 'this' )
	local only = Excerpt.getArg( 'only' )
	local files = Excerpt.getArg( 'files', Excerpt.getArg( 'file', ( only == 'file' and 1 ) ) )
	local lists = Excerpt.getArg( 'lists', Excerpt.getArg( 'list', ( only == 'list' and 1 ) ) )
	local tables = Excerpt.getArg( 'tables', Excerpt.getArg( 'table', ( only == 'table' and 1 ) ) )
	local templates = Excerpt.getArg( 'templates', Excerpt.getArg( 'template', ( only == 'template' and 1 ) ) )
	local paragraphs = Excerpt.getArg( 'paragraphs', Excerpt.getArg( 'paragraph', ( only == 'paragraph' and 1 ) ) )
	local references = Excerpt.getArg( 'references' )
	local subsections = not yesno( Excerpt.getArg( 'subsections' ) )
	local noLinks = not yesno( Excerpt.getArg( 'links', true ) )
	local noBold = not yesno( Excerpt.getArg( 'bold' ) )
	local onlyFreeFiles = yesno( Excerpt.getArg( 'onlyfreefiles', true ) )
	local briefDates = yesno( Excerpt.getArg( 'briefdates', false ) )
	local inline = yesno( Excerpt.getArg( 'inline' ) )
	local quote = yesno( Excerpt.getArg( 'quote' ) )
	local more = yesno( Excerpt.getArg( 'more' ) )
	local class = Excerpt.getArg( 'class' )
	local displayTitle = Excerpt.getArg( 'displaytitle' ) or page

	-- Get the full wikitext
	local wikitext = title:getContent()

	-- Now get the part we're interested in
	if section then
		excerpt = parser.getSection( wikitext, section )
		if not excerpt then return Excerpt.getError( 'section-not-found', section ) end
		if excerpt == '' and not only then return Excerpt.getError( 'section-empty', section ) end
	else
		excerpt = parser.getLead( wikitext )
		if excerpt == '' and not only then return Excerpt.getError( 'lead-empty' ) end
	end

	if briefDates then
		excerpt = Excerpt.fixDates( excerpt )
	end

	-- If no file was found, try to get one from the infobox
	if ( only == 'file' or only == 'files' ) or ( not only and ( files ~= '0' or not files ) ) -- caller asked for files
		and config.captions -- and we have the config option required to try finding files in infoboxes
		and #parser.getFiles( excerpt ) == 0 -- and there are no files in the excerpt
	then
		excerpt = Excerpt.addInfoboxFile( excerpt )
	end

	if onlyFreeFiles then
		excerpt = Excerpt.removeNonFreeFiles( excerpt )
	end

	-- Remove blacklisted templates
	excerpt = Excerpt.removeBlacklist( excerpt )

	-- Remove extra line breaks but leave one before and after so the parser interprets lists, tables, etc. correctly
	excerpt = mw.text.trim( excerpt )
	excerpt = string.gsub( excerpt, '\n\n\n+', '\n\n' )
	excerpt = '\n' .. excerpt .. '\n'

	-- Remove nested categories
	excerpt = frame:preprocess( excerpt )
	excerpt = Excerpt.removeCategories( excerpt )

	-- Add tracking categories
	if config.categories then
		local contentCategory = config.categories.content
		if contentCategory and title.isContentPage then
			excerpt = excerpt .. '[[Category:' .. contentCategory .. ']]'
		end
		local namespaceCategory = config.categories[ title.namespace ]
		if namespaceCategory then
			excerpt = excerpt .. '[[Category:' .. namespaceCategory .. ']]'
		end
	end

	-- Load the styles
	local styles
	if config.styles then
		styles = frame:extensionTag( 'templatestyles', '', { src = config.styles } )
	end

	-- Combine and return the elements
	if inline then
		return mw.text.trim( excerpt )
	end
	local tag = 'div'
	if quote then
		tag = 'blockquote'
	end
	excerpt = mw.html.create( 'div' ):addClass( 'excerpt' ):wikitext( excerpt )

	-- Prepend the hatnote
	if hat and not inline then
		hat = Excerpt.getHat( page, section, displayTitle, this, quote, only, edit, editIntro )
	end

	-- Append the "Read more" link
	if more and not inline then
		more = Excerpt.getReadMore( page, section, more )
	end

	local block = mw.html.create( tag ):addClass( 'excerpt-block' ):addClass( class )
	return block:node( styles ):node( hat ):node( excerpt ):node( more )
end

function Excerpt.addInfoboxFile( excerpt )
	-- We cannot distinguish the infobox from the other templates, so we search them all
	local templates = parser.getTemplates( excerpt );
	for _, template in pairs( templates ) do
		local parameters = parser.getTemplateParameters( template )
		local file, captions, caption, cssClasses, cssClass
		for _, pair in pairs( config.captions ) do
			file = pair[1]
			file = parameters[file]
			if file and Excerpt.matchAny( file, '^.*%.', { '[Jj][Pp][Ee]?[Gg]', '[Pp][Nn][Gg]', '[Gg][Ii][Ff]', '[Ss][Vv][Gg]' }, '.*' ) then
				file = mw.ustring.match( file, '%[?%[?.-:([^{|]+)%]?%]?' ) or file -- [[File:Example.jpg{{!}}upright=1.5]] to Example.jpg
				captions = pair[2]
				for _, p in pairs( captions ) do
					if parameters[ p ] then caption = parameters[ p ] break end
				end
				-- Check for CSS classes
				-- We opt to use skin-invert-image instead of skin-invert
				-- in all other cases, the CSS provided in the infobox is used
				if pair[3] then
					cssClasses = pair[3]
					for _, p in pairs( cssClasses ) do
						if parameters[ p ] then
							cssClass = ( parameters[ p ] == 'skin-invert' ) and 'skin-invert-image' or parameters[ p ]
							break
						end
					end
				end
				local class = cssClass and ( '|class=' .. cssClass ) or ''
				return '[[File:' .. file .. class .. '|thumb|' .. ( caption or '' ) .. ']]' .. excerpt
			end
		end
	end
	return excerpt
end

function Excerpt.removeNonFreeFiles( wikitext )
	local files = parser.getFiles( wikitext )
	for _, file in pairs( files ) do
		local fileName = 'File:' .. parser.getFileName( file )
		local fileTitle = mw.title.new( fileName )
		local fileDescription = fileTitle:getContent()
		if not fileDescription or fileDescription == '' then
			local frame = mw.getCurrentFrame()
			fileDescription = frame:preprocess( '{{' .. fileName .. '}}' ) -- try Commons
		end
		if fileDescription and string.match( fileDescription, '[Nn]on%-free' ) then
			wikitext = Excerpt.removeString( wikitext, file )
		end
	end
	return wikitext
end

function Excerpt.getHat( page, section, displayTitle, this, quote, only, edit, editIntro )
	local hat

	-- Build the main part of the hatnote
	if this then
		hat = this
	elseif quote then
		hat = Excerpt.getMessage( 'this' )
	elseif only then
		hat = Excerpt.getMessage( only )
	else
		hat = Excerpt.getMessage( 'section' )
	end
	hat = hat .. ' ' .. Excerpt.getMessage( 'excerpt' )

	-- Build the section link
	if section then
		hat = hat .. ' [[:' .. page .. '#' .. mw.uri.anchorEncode( section ) .. '|' .. displayTitle
			.. ' § ' .. mw.ustring.gsub( section, '%[%[([^]|]+)|?[^]]*%]%]', '%1' ) .. ']].' -- remove nested links
	else
		hat = hat .. ' [[:' .. page .. '|' .. displayTitle .. ']].'
	end

	-- Build the edit link
	if edit then
		local title = mw.title.getCurrentTitle()
		local editUrl = title:fullUrl( 'action=edit' );
		if editIntro then
			editUrl = title:fullUrl( 'action=edit&editintro=' .. editIntro )
		end
		hat = hat .. '<span class="mw-editsection-like plainlinks"><span class="mw-editsection-bracket">[</span>['
		hat = hat .. editUrl .. ' ' .. mw.message.new( 'editsection' ):plain()
		hat = hat .. ']<span class="mw-editsection-bracket">]</span></span>'
	end

	if config.hat then
		local frame = mw.getCurrentFrame()
		hat = config.hat .. hat .. '}}'
		hat = frame:preprocess( hat )
	else
		hat = mw.html.create( 'div' ):addClass( 'dablink excerpt-hat' ):wikitext( hat )
	end

	return hat
end

function Excerpt.getReadMore( page, section, more )
	local link = "'''[[" .. page
	if section then
		link = link .. '#' .. section
	end
	link = '|' .. more .. "]]'''"
	link = mw.html.create( 'div' ):addClass( 'noprint excerpt-more' ):wikitext( link )
	return link
end

-- Fix birth and death dates, but only in the first paragraph
-- @todo Use parser.getParagraphs() to get the first paragraph
function Excerpt.fixDates( excerpt )
	local startpos = 1 -- skip initial templates
	local s
	local e = 0
	repeat
		startpos = e + 1
		s, e = mw.ustring.find( excerpt, "%s*%b{}%s*", startpos )
	until not s or s > startpos
	s, e = mw.ustring.find( excerpt, "%b()", startpos ) -- get (...), which may be (year–year)
	if s and s < startpos + 100 then -- look only near the start
		local year1, conjunction, year2 = mw.ustring.match( mw.ustring.sub( excerpt, s, e ), '(%d%d%d+)(.-)(%d%d%d+)' )
		if year1 and year2 and ( mw.ustring.match( conjunction, '[%-–—]' ) or mw.ustring.match( conjunction, '{{%s*[sS]nd%s*}}' ) ) then
			local y1 = tonumber( year1 )
			local y2 = tonumber( year2 )
			if y2 > y1 and y2 < y1 + 125 and y1 <= tonumber( os.date( "%Y" )) then
				excerpt = mw.ustring.sub( excerpt, 1, s ) .. year1 .. "–" .. year2 .. mw.ustring.sub( excerpt, e )
			end
		end
	end
	return excerpt
end

-- Remove blacklisted templates
function Excerpt.removeBlacklist( excerpt )
	local blacklist = config.blacklist and table.concat( config.blacklist, ',' ) or ''
	local filters = Excerpt.parseFilter( blacklist )
	for _, template in pairs( parser.getTemplates( excerpt ) ) do
		local templateName = parser.getTemplateName( template )
		if Excerpt.matchFilter( templateName, filters ) then
			excerpt = Excerpt.removeString( excerpt, template )
		end
	end
	return excerpt
end

function Excerpt.removeCategories( excerpt )
	local categories = parser.getCategories( excerpt )
	for _, category in pairs( categories ) do
		excerpt = Excerpt.removeString( excerpt, category )
	end
	return excerpt
end

-- Helper method to get the local name of a namespace and all its aliases
-- @param name Canonical name of the namespace, for example 'File'
-- @return Local name of the namespace and all aliases, for example {'File','Image','Archivo','Imagen'}
function Excerpt.getNamespaces( name )
	local namespaces = mw.clone( mw.site.namespaces[ name ].aliases ) -- Clone because https://en.wikipedia.org/w/index.php?diff=1056921358
	table.insert( namespaces, mw.site.namespaces[ name ].name )
	table.insert( namespaces, mw.site.namespaces[ name ].canonicalName )
	return namespaces
end

-- Helper method to match from a list of regular expressions
-- Like so: match pre..list[1]..post or pre..list[2]..post or ...
function Excerpt.matchAny( text, pre, list, post, init )
	local match = {}
	for i = 1, #list do
		match = { mw.ustring.match( text, pre .. list[ i ] .. post, init ) }
		if match[1] then return unpack( match ) end
	end
	return nil
end

-- Helper function to get arguments
-- args from Lua calls have priority over parent args from template
function Excerpt.getArg( key, default )
	local frame = mw.getCurrentFrame();
	for k, value in pairs( frame:getParent().args ) do
		if k == key and mw.text.trim( value ) ~= '' then
			return value
		end
	end
	for k, value in pairs( frame.args ) do
		if k == key and mw.text.trim( value ) ~= '' then
			return value
		end
	end
	return default
end

-- Helper method to get an error message
-- This method also categorizes the current page in one of the configured error categories
function Excerpt.getError( key, value )
	local message = Excerpt.getMessage( 'error-' .. key, value )
	local markup = mw.html.create( 'div' ):addClass( 'error' ):wikitext( message )
	if config.categories and config.categories.errors and mw.title.getCurrentTitle().isContentPage then
		markup:node( '[[Category:' .. config.categories.errors .. ']]' )
	end
	return markup
end

-- Helper method to get a localized message
-- This method uses Module:TNT to get localized messages from https://commons.wikimedia.org/wiki/Data:I18n/Module:Excerpt.tab
-- If Module:TNT is not available or the localized message does not exist, the key is returned instead
function Excerpt.getMessage( key, value )
	local ok, TNT = pcall( require, 'Module:TNT' )
	if not ok then return key end
	local ok2, message = pcall( TNT.format, 'I18n/Module:Excerpt.tab', key, value )
	if not ok2 then return key end
	return message
end

-- Helper method to escape a string for use in regexes
function Excerpt.escapeString( str )
	return str:gsub( '[%^%$%(%)%.%[%]%*%+%-%?%%]', '%%%0' )
end

-- Helper method to remove a string from a text
-- @param text Text where to search for the string to remove
-- @param str String to remove
-- @return The given text with the string removed
function Excerpt.removeString( text, str )
	local pattern = Excerpt.escapeString( str )
	if #pattern > 9999 then -- strings longer than 10000 bytes can't be put into regexes
		pattern = escapeString( mw.ustring.sub( str, 1, 999 ) ) .. '.-' .. escapeString( mw.ustring.sub( str, -999 ) )
	end
	return string.gsub( text, pattern, '' )
end

-- Helper method to convert a comma-separated list of numbers or min-max ranges into a list of booleans
-- @param filter Comma-separated list of numbers or min-max ranges, for example '1,3-5'
-- @return Map from integers to booleans, for example {1=true,2=false,3=true,4=true,5=true}
-- @return Boolean indicating whether the flags should be treated as a blacklist or not
-- @todo Merge with matchFilter
function Excerpt.parseFilter( value )
	local flags = {}
	local blacklist = false
	if not value then return nil, false end
	if string.sub( value, 1, 1 ) == '-' then
		blacklist = true
		value = string.sub( value, 2 )
	end
	local ranges = mw.text.split( value, ',' ) -- split ranges: '1,3-5' to {'1','3-5'}
	for _, range in pairs( ranges ) do
		range = mw.text.trim( range )
		local min, max = mw.ustring.match( range, '^(%d+)%s*[-–—]%s*(%d+)$' ) -- '3-5' to min=3 max=5
		if not max then min, max = string.match( range, '^((%d+))$' ) end -- '1' to min=1 max=1
		if max then
			for i = min, max do flags[ i ] = true end
		else
			flags[ range ] = true -- if we reach this point, the string had the form 'a,b,c' rather than '1,2,3'
		end
	end
	return flags, blacklist
end

-- Helper function to see if a value matches any of the given filters
function Excerpt.matchFilter( value, filters )
	if not value then return false end
	value = tostring( value )
	local lang = mw.language.getContentLanguage()
	local lcvalue = lang:lcfirst( value )
	local ucvalue = lang:ucfirst( value )
	for filter in pairs( filters ) do
		if value == tostring( filter )
		or lcvalue == filter
		or ucvalue == filter
		or ( not tonumber( filter ) and mw.ustring.match( value, filter ) ) then
			return true
		end
	end
end

-- Entry points for backwards compatibility
-- @todo Verify that no one uses them and remove them
function Excerpt.lead( frame ) return Excerpt.main( frame ) end
function Excerpt.excerpt( frame ) return Excerpt.main( frame ) end

return Excerpt