跳转到内容

模組:Citation/CS1/Utilities/Antigng

被永久保护的模块
维基百科,自由的百科全书

这是本页的一个历史版本,由Antigng留言 | 贡献2022年3月28日 (一) 13:26编辑。这可能和当前版本存在着巨大的差异。

--[[--------------------------< F O R W A R D   D E C L A R A T I O N S >--------------------------------------
]]

local cfg;																		-- table of tables imported from selected Module:Citation/CS1/Configuration


--[[--------------------------< I S _ S E T >------------------------------------------------------------------

Returns true if argument is set; false otherwise. Argument is 'set' when it exists (not nil) or when it is not an empty string.
This function is global because it is called from both this module and from Date validation

]]
function is_set( var )
	return not (var == nil or var == '');
end

--[[--------------------------< F I R S T _ S E T >------------------------------------------------------------

Locates and returns the first set value in a table of values where the order established in the table,
left-to-right (or top-to-bottom), is the order in which the values are evaluated.  Returns nil if none are set.

This version replaces the original 'for _, val in pairs do' and a similar version that used ipairs.  With the pairs
version the order of evaluation could not be guaranteed.  With the ipairs version, a nil value would terminate
the for-loop before it reached the actual end of the list.

]]

local function first_set (list, count)
	local i = 1;
	while i <= count do															-- loop through all items in list
		if is_set( list[i] ) then
			return list[i];														-- return the first set list member
		end
		i = i + 1;																-- point to next
	end
end

--[[--------------------------< I N _ A R R A Y >--------------------------------------------------------------

Whether needle is in haystack

]]

local function in_array( needle, haystack )
	if needle == nil then
		return false;
	end
	for n,v in ipairs( haystack ) do
		if v == needle then
			return n;
		end
	end
	return false;
end

--[[--------------------------< S U B S T I T U T E >----------------------------------------------------------

Populates numbered arguments in a message string using an argument table.

]]

local function substitute( msg, args )
	return args and mw.message.newRawMessage( msg, args ):plain() or msg;
end

--[[--------------------------< H A S _ A C C E P T _ A S _ W R I T T E N >------------------------------------

When <str> is wholly wrapped in accept-as-written markup, return <str> without markup and true; return <str> and false else

with allow_empty = false, <str> must have at least one character inside the markup
with allow_empty = true, <str> the markup frame can be empty like (()) to distinguish an empty template parameter from the specific condition "has no applicable value" in citation-context.

After further evaluation the two cases might be merged at a later stage, but should be kept separated for now.

]]

local function has_accept_as_written (str, allow_empty)
	local count;
	if true == allow_empty then
		str, count = str:gsub ('^%(%((.*)%)%)$', '%1'); 						-- allows (()) to be an empty set
	else
		str, count = str:gsub ('^%(%((.+)%)%)$', '%1');
	end
	return str, 0 ~= count;
end


--[[--------------------------< I S _ S E T >------------------------------------------------------------------

Returns true if argument is set; false otherwise. Argument is 'set' when it exists (not nil) or when it is not an empty string.

]]

local function is_set (var)
	return not (var == nil or var == '');
end


--[[--------------------------< S U B S T I T U T E >----------------------------------------------------------

Populates numbered arguments in a message string using an argument table.

]]

local function substitute (msg, args)
	return args and mw.message.newRawMessage (msg, args):plain() or msg;
end


--[[--------------------------< S A F E _ F O R _ I T A L I C S >----------------------------------------------

Protects a string that will be wrapped in wiki italic markup '' ... ''

Note: We cannot use <i> for italics, as the expected behavior for italics specified by ''...'' in the title is that
they will be inverted (i.e. unitalicized) in the resulting references.  In addition, <i> and '' tend to interact
poorly under Mediawiki's HTML tidy.

]]

local function safe_for_italics (str)
	if not is_set (str) then return str end

	if str:sub (1, 1) == "'" then str = "<span></span>" .. str; end
	if str:sub (-1, -1) == "'" then str = str .. "<span></span>"; end
	
	-- Remove newlines as they break italics.
	return str:gsub ('\n', ' ');

end


--[[--------------------------< W R A P _ S T Y L E >----------------------------------------------------------

Applies styling to various parameters.  Supplied string is wrapped using a message_list configuration taking one
argument; protects italic styled parameters.  Additional text taken from citation_config.presentation - the reason
this function is similar to but separate from wrap_msg().

]]

local function wrap_style (key, str)
	if not is_set (str) then
		return "";
	elseif in_array (key, {'italic-title', 'trans-italic-title'}) then
		str = safe_for_italics (str);
	end

	return substitute (cfg.presentation[key], {str});
end


--[[--------------------------< M A K E _ S E P _ L I S T >------------------------------------------------------------

make a separated list of items using provided separators.
	<sep_list> - typically '<comma><space>'
	<sep_list_pair> - typically '<space>and<space>'
	<sep_list_end> - typically '<comma><space>and<space>' or '<comma><space>&<space>'

defaults to cfg.presentation['sep_list'], cfg.presentation['sep_list_pair'], and cfg.presentation['sep_list_end']
if <sep_list_end> is specified, <sep_list> and <sep_list_pair> must also be supplied

]]

local function make_sep_list (count, list_seq, sep_list, sep_list_pair, sep_list_end)
	local list = '';

	if not sep_list then														-- set the defaults
		sep_list = cfg.presentation['sep_list'];
		sep_list_pair = cfg.presentation['sep_list_pair'];
		sep_list_end = cfg.presentation['sep_list_end'];
	end
	
	if 2 >= count then
		list = table.concat (list_seq, sep_list_pair);							-- insert separator between two items; returns list_seq[1] then only one item
	elseif 2 < count then
		list = table.concat (list_seq, sep_list, 1, count - 1);					-- concatenate all but last item with plain list separator
		list = table.concat ({list, list_seq[count]}, sep_list_end);			-- concatenate last item onto end of <list> with final separator
	end
	
	return list;
end

--[[--------------------------< S A F E _ J O I N >------------------------------------------------------------

Joins a sequence of strings together while checking for duplicate separation characters.

]]

local function safe_join( tbl, duplicate_char )
	--[[
	Note: we use string functions here, rather than ustring functions.
	
	This has considerably faster performance and should work correctly as 
	long as the duplicate_char is strict ASCII.  The strings
	in tbl may be ASCII or UTF8.
	]]
	
	local str = '';																-- the output string
	local comp = '';															-- what does 'comp' mean?
	local end_chr = '';
	local trim;
	for _, value in ipairs( tbl ) do
		if value == nil then value = ''; end
		
		if str == '' then														-- if output string is empty
			str = value;														-- assign value to it (first time through the loop)
		elseif value ~= '' then
			if value:sub(1,1) == '<' then										-- Special case of values enclosed in spans and other markup.
				comp = value:gsub( "%b<>", "" );								-- remove html markup (<span>string</span> -> string)
			else
				comp = value;
			end
																				-- typically duplicate_char is sepc
			if comp:sub(1,1) == duplicate_char then								-- is first charactier same as duplicate_char? why test first character?
																				--   Because individual string segments often (always?) begin with terminal punct for th
																				--   preceding segment: 'First element' .. 'sepc next element' .. etc?
				trim = false;
				end_chr = str:sub(-1,-1);										-- get the last character of the output string
				-- str = str .. "<HERE(enchr=" .. end_chr.. ")"					-- debug stuff?
				if end_chr == duplicate_char then								-- if same as separator
					str = str:sub(1,-2);										-- remove it
				elseif end_chr == "'" then										-- if it might be wikimarkup
					if str:sub(-3,-1) == duplicate_char .. "''" then			-- if last three chars of str are sepc'' 
						str = str:sub(1, -4) .. "''";							-- remove them and add back ''
					elseif str:sub(-5,-1) == duplicate_char .. "]]''" then		-- if last five chars of str are sepc]]'' 
						trim = true;											-- why? why do this and next differently from previous?
					elseif str:sub(-4,-1) == duplicate_char .. "]''" then		-- if last four chars of str are sepc]'' 
						trim = true;											-- same question
					end
				elseif end_chr == "]" then										-- if it might be wikimarkup
					if str:sub(-3,-1) == duplicate_char .. "]]" then			-- if last three chars of str are sepc]] wikilink 
						trim = true;
					elseif str:sub(-2,-1) == duplicate_char .. "]" then			-- if last two chars of str are sepc] external link
						trim = true;
					elseif str:sub(-4,-1) == duplicate_char .. "'']" then		-- normal case when |url=something & |title=Title.
						trim = true;
					end
				elseif end_chr == " " then										-- if last char of output string is a space
					if str:sub(-2,-1) == duplicate_char .. " " then				-- if last two chars of str are <sepc><space>
						str = str:sub(1,-3);									-- remove them both
					end
				end

				if trim then
					if value ~= comp then 										-- value does not equal comp when value contains html markup
						local dup2 = duplicate_char;
						if dup2:match( "%A" ) then dup2 = "%" .. dup2; end		-- if duplicate_char not a letter then escape it
						
						value = value:gsub( "(%b<>)" .. dup2, "%1", 1 )			-- remove duplicate_char if it follows html markup
					else
						value = value:sub( 2, -1 );								-- remove duplicate_char when it is first character
					end
				end
			end
			str = str .. value;													--add it to the output string
		end
	end
	return str;
end  

--[[--------------------------< S T R I P _ A P O S T R O P H E _ M A R K U P >--------------------------------

Strip wiki italic and bold markup from argument so that it doesn't contaminate COinS metadata.
This function strips common patterns of apostrophe markup.  We presume that editors who have taken the time to
markup a title have, as a result, provided valid markup. When they don't, some single apostrophes are left behind.

]]

local function strip_apostrophe_markup (argument)
	if not is_set (argument) then return argument; end

	while true do
		if argument:match ("%'%'%'%'%'") then									-- bold italic (5)
			argument=argument:gsub("%'%'%'%'%'", "");							-- remove all instances of it
		elseif argument:match ("%'%'%'%'") then									-- italic start and end without content (4)
			argument=argument:gsub("%'%'%'%'", "");
		elseif argument:match ("%'%'%'") then									-- bold (3)
			argument=argument:gsub("%'%'%'", "");
		elseif argument:match ("%'%'") then										-- italic (2)
			argument=argument:gsub("%'%'", "");
		else
			break;
		end
	end
	return argument;															-- done
end


--[[--------------------------< S E T _ S E L E C T E D _ M O D U L E S >--------------------------------------

Sets local cfg table to same (live or sandbox) as that used by the other modules.

]]

local function set_selected_modules (cfg_table_ptr)
	cfg = cfg_table_ptr;
	
end


--[[--------------------------< E X P O R T S >----------------------------------------------------------------
]]

return {
	first_set = first_set,														-- exported functions
	has_accept_as_written = has_accept_as_written,
	in_array = in_array,
	is_set = is_set,
	make_sep_list = make_sep_list,
	safe_for_italics = safe_for_italics,
	safe_join = safe_join,
	substitude = substitude,
	strip_apostrophe_markup = strip_apostrophe_markup,
	substitute = substitute,
	wrap_style = wrap_style,
	
	set_selected_modules = set_selected_modules
	}