Module:Citation/CS1
Appearance
Lua error in Module:Documentation at line 144: message: type error in message cfg.container (string expected, got nil).
local z = {
error_categories = {}; -- for categorizing citations that contain errors
error_ids = {};
message_tail = {};
maintenance_cats = {}; -- for categorizing citations that aren't erroneous per se, but could use a little work
properties_cats = {}; -- for categorizing citations based on certain properties, language of source for instance
}
-- Whether variable is set or not
function is_set( var )
return not (var == nil or var == '');
end
-- First set variable or nil if none
function first_set(...)
local list = {...};
for _, var in pairs(list) do
if is_set( var ) then
return var;
end
end
end
-- Whether needle is in haystack
function inArray( needle, haystack )
if needle == nil then
return false;
end
for n,v in ipairs( haystack ) do
if v == needle then
return n;
end
end
return false;
end
--[[
Categorize and emit an error message when the citation contains one or more deprecated parameters. Because deprecated parameters (currently |month=,
|coauthor=, and |coauthors=) aren't related to each other and because these parameters may be concatenated into the variables used by |date= and |author#= (and aliases)
details of which parameter caused the error message are not provided. Only one error message is emitted regardless of the number of deprecated parameters in the citation.
]]
function deprecated_parameter()
if true ~= Page_in_deprecated_cat then -- if we haven't been here before then set a
Page_in_deprecated_cat=true; -- sticky flag so that if there are more than one deprecated parameter the category is added only once
table.insert( z.message_tail, { seterror( 'deprecated_params', {}, true ) } ); -- add error message
end
end
-- Populates numbered arguments in a message string using an argument table.
function substitute( msg, args )
return args and mw.message.newRawMessage( msg, args ):plain() or msg;
end
--[[--------------------------< K E R N _ Q U O T E S >--------------------------------------------------------
Apply kerning to open the space between the quote mark provided by the Module and a leading or trailing quote mark contained in a |title= or |chapter= parameter's value.
This function will positive kern either single or double quotes:
"'Unkerned title with leading and trailing single quote marks'"
" 'Kerned title with leading and trailing single quote marks' " (in real life the kerning isn't as wide as this example)
Double single quotes (italic or bold wikimarkup) are not kerned.
Call this function for chapter titles, for website titles, etc; not for book titles.
]]
function kern_quotes (str)
local cap='';
local cap2='';
cap, cap2 = str:match ("^([\"\'])([^\'].+)"); -- match leading double or single quote but not double single quotes
if is_set (cap) then
str = substitute (cfg.presentation['kern-left'], {cap, cap2});
end
cap, cap2 = str:match ("^(.+[^\'])([\"\'])$")
if is_set (cap) then
str = substitute (cfg.presentation['kern-right'], {cap, cap2});
end
return str;
end
--[[--------------------------< F O R M A T _ S C R I P T _ V A L U E >----------------------------------------
|script-title= holds title parameters that are not written in Latin based scripts: Chinese, Japanese, Arabic, Hebrew, etc. These scripts should
not be italicized and may be written right-to-left. The value supplied by |script-title= is concatenated onto Title after Title has been wrapped
in italic markup.
Regardless of language, all values provided by |script-title= are wrapped in <bdi>...</bdi> tags to isolate rtl languages from the English left to right.
|script-title= provides a unique feature. The value in |script-title= may be prefixed with a two-character ISO639-1 language code and a colon:
|script-title=ja:*** *** (where * represents a Japanese character)
Spaces between the two-character code and the colon and the colon and the first script character are allowed:
|script-title=ja : *** ***
|script-title=ja: *** ***
|script-title=ja :*** ***
Spaces preceding the prefix are allowed: |script-title = ja:*** ***
The prefix is checked for validity. If it is a valid ISO639-1 language code, the lang attribute (lang="ja") is added to the <bdi> tag so that browsers can
know the language the tag contains. This may help the browser render the script more correctly. If the prefix is invalid, the lang attribute
is not added. At this time there is no error message for this condition.
At this writing, only |script-title= is supported. It is anticipated that additional parameters will be created to use this function.
TODO: error messages when prefix is invalid ISO639-1 code; when script_value has prefix but no script;
]]
function format_script_value (script_value)
local lang=''; -- initialize to empty string
local name;
if script_value:match('^%l%l%s*:') then -- if first 3 non-space characters are script language prefix
lang = script_value:match('^(%l%l)%s*:%s*%S.*'); -- get the language prefix or nil if there is no script
if not is_set (lang) then
return ''; -- script_value was just the prefix so return empty string
end
-- if we get this far we have prefix and script
name = mw.language.fetchLanguageName( lang, "en" ); -- get language name so that we can use it to categorize
if is_set (name) then -- is prefix a proper ISO 639-1 language code?
script_value = script_value:gsub ('^%l%l%s*:%s*', ''); -- strip prefix from script
-- is prefix one of these language codes?
if inArray (lang, {'ar', 'bs', 'dv', 'el', 'fa', 'hy', 'ja', 'ko', 'ku', 'he', 'ps', 'ru', 'sd', 'sr', 'th', 'uk', 'ug', 'yi', 'zh'}) then
table.insert( z.properties_cats, 'CS1 uses ' .. name .. '-language script ('..lang..')'); -- categorize in language-specific categories
else
table.insert( z.properties_cats, 'CS1 uses foreign language script'); -- use this category as a catchall until language-specific category is available
end
lang = ' lang="' .. lang .. '" '; -- convert prefix into a lang attribute
else
lang = ''; -- invalid so set lang to empty string
end
end
script_value = substitute (cfg.presentation['bdi'], {lang, script_value}); -- isolate in case script is rtl
return script_value;
end
--[[--------------------------< S C R I P T _ C O N C A T E N A T E >------------------------------------------
Initially for |title= and |script-title=, this function concatenates those two parameter values after the script value has been
wrapped in <bdi> tags.
]]
function script_concatenate (title, script)
if is_set (script) then
script = format_script_value (script); -- <bdi> tags, lang atribute, categorization, etc; returns empty string on error
if is_set (script) then
title = title .. ' ' .. script; -- concatenate title and script title
end
end
return title;
end
--[[--------------------------< W R A P _ S T Y L E >----------------------------------------------------------
Applies styling to various parameters. Supplied string is wrapped using a message_list configuration taking one
argument; protects italic styled parameters. Additional text taken from citation_config.presentation - the reason
this function is similar to but separate from wrap_msg().
]]
function wrap_style (key, str)
if not is_set( str ) then
return "";
elseif inArray( key, { 'italic-title', 'trans-italic-title' } ) then
str = safeforitalics( str );
end
return substitute( cfg.presentation[key], {str} );
end
--[[--------------------------< W R A P _ M S G >--------------------------------------------------------------
Applies additional message text to various parameter values. Supplied string is wrapped using a message_list
configuration taking one argument. Supports lower case text for {{citation}} templates. Additional text taken
from citation_config.messages - the reason this function is similar to but separate from wrap_style().
]]
function wrap_msg (key, str, lower)
if not is_set( str ) then
return "";
end
if true == lower then
local msg;
msg = cfg.messages[key]:lower(); -- set the message to lower case before
str = substitute( msg, {str} ); -- including template text
return str;
else
return substitute( cfg.messages[key], {str} );
end
end
--[[--------------------------< F O R M A T _ C H A P T E R _ T I T L E >--------------------------------------
Format the three chapter parameters: |chapter=, |trans-chapter=, and |chapter-url= into a single Chapter meta-
parameter (chapter_url_source used for error messages).
]]
function format_chapter_title (chapter, transchapter, chapterurl, chapter_url_source)
local chapter_error = '';
if not is_set (chapter) then
chapter = ''; -- just to be safe for concatenation
if is_set (transchapter) then
chapter = wrap_style ('trans-quoted-title', transchapter);
chapter_error = " " .. seterror ('trans_missing_chapter');
end
if is_set (chapterurl) then
chapter = externallink (chapterurl, chapter, chapter_url_source); -- adds bare_url_missing_title error if appropriate
end
return chapter .. chapter_error;
else -- here when chapter is set
chapter = kern_quotes (chapter); -- if necessary, separate chapter title's leading and trailing quote marks from Module provided quote marks
chapter = wrap_style ('quoted-title', chapter);
if is_set (transchapter) then
transchapter = wrap_style ('trans-quoted-title', transchapter);
chapter = chapter .. ' ' .. transchapter;
end
if is_set (chapterurl) then
chapter = externallink (chapterurl, chapter); -- adds bare_url_missing_title error if appropriate
end
end
return chapter;
end
--[[
Argument wrapper. This function provides support for argument
mapping defined in the configuration file so that multiple names
can be transparently aliased to single internal variable.
]]
function argument_wrapper( args )
local origin = {};
return setmetatable({
ORIGIN = function( self, k )
local dummy = self[k]; --force the variable to be loaded.
return origin[k];
end
},
{
__index = function ( tbl, k )
if origin[k] ~= nil then
return nil;
end
local args, list, v = args, cfg.aliases[k];
if type( list ) == 'table' then
v, origin[k] = selectone( args, list, 'redundant_parameters' );
if origin[k] == nil then
origin[k] = ''; -- Empty string, not nil
end
elseif list ~= nil then
v, origin[k] = args[list], list;
else
-- maybe let through instead of raising an error?
-- v, origin[k] = args[k], k;
error( cfg.messages['unknown_argument_map'] );
end
-- Empty strings, not nil;
if v == nil then
v = cfg.defaults[k] or '';
origin[k] = '';
end
tbl = rawset( tbl, k, v );
return v;
end,
});
end
--[[
Looks for a parameter's name in the whitelist.
Parameters in the whitelist can have three values:
true - active, supported parameters
false - deprecated, supported parameters
nil - unsupported parameters
]]
function validate( name )
local name = tostring( name );
local state = whitelist.basic_arguments[ name ];
-- Normal arguments
if true == state then return true; end -- valid actively supported parameter
if false == state then
deprecated_parameter (); -- parameter is deprecated but still supported
return true;
end
-- Arguments with numbers in them
name = name:gsub( "%d+", "#" ); -- replace digit(s) with # (last25 becomes last#
state = whitelist.numbered_arguments[ name ];
if true == state then return true; end -- valid actively supported parameter
if false == state then
deprecated_parameter (); -- parameter is deprecated but still supported
return true;
end
return false; -- Not supported because not found or name is set to nil
end
--[[--------------------------< E R R O R C O M M E N T >------------------------------------------------------
Wraps error messages with css markup according to the state of hidden.
]]
function errorcomment( content, hidden )
return substitute( hidden and cfg.presentation['hidden-error'] or cfg.presentation['visible-error'], content );
end
--[[
Sets an error condition and returns the appropriate error message. The actual placement
of the error message in the output is the responsibility of the calling function.
]]
function seterror( error_id, arguments, raw, prefix, suffix )
local error_state = cfg.error_conditions[ error_id ];
prefix = prefix or "";
suffix = suffix or "";
if error_state == nil then
error( cfg.messages['undefined_error'] );
elseif is_set( error_state.category ) then
table.insert( z.error_categories, error_state.category );
end
local message = substitute( error_state.message, arguments );
message = message .. " ([[" .. cfg.messages['help page link'] ..
"#" .. error_state.anchor .. "|" ..
cfg.messages['help page label'] .. "]])";
z.error_ids[ error_id ] = true;
if inArray( error_id, { 'bare_url_missing_title', 'trans_missing_title' } )
and z.error_ids['citation_missing_title'] then
return '', false;
end
message = table.concat({ prefix, message, suffix });
if raw == true then
return message, error_state.hidden;
end
return errorcomment( message, error_state.hidden );
end
-- Formats a wiki style external link
function externallinkid(options)
local url_string = options.id;
if options.encode == true or options.encode == nil then
url_string = mw.uri.encode( url_string );
end
return mw.ustring.format( '[[%s|%s]]%s[%s%s%s %s]',
options.link, options.label, options.separator or " ",
options.prefix, url_string, options.suffix or "",
mw.text.nowiki(options.id)
);
end
-- Formats a wiki style internal link
function internallinkid(options)
return mw.ustring.format( '[[%s|%s]]%s[[%s%s%s|%s]]',
options.link, options.label, options.separator or " ",
options.prefix, options.id, options.suffix or "",
mw.text.nowiki(options.id)
);
end
-- Format an external link with error checking
function externallink( URL, label, source )
local error_str = "";
if not is_set( label ) then
label = URL;
if is_set( source ) then
error_str = seterror( 'bare_url_missing_title', { wrap_style ('parameter', source) }, false, " " );
else
error( cfg.messages["bare_url_no_origin"] );
end
end
if not checkurl( URL ) then
error_str = seterror( 'bad_url', {}, false, " " ) .. error_str;
end
return table.concat({ "[", URL, " ", safeforurl( label ), "]", error_str });
end
--[[--------------------------< N O W R A P _ D A T E >--------------------------------------------------------
When date is YYYY-MM-DD format wrap in nowrap span: <span ...>YYYY-MM-DD</span>. When date is DD MMMM YYYY or is
MMMM DD, YYYY then wrap in nowrap span: <span ...>DD MMMM</span> YYYY or <span ...>MMMM DD,</span> YYYY
DOES NOT yet support MMMM YYYY or any of the date ranges.
]]
function nowrap_date (date)
local cap='';
local cap2='';
if date:match("^%d%d%d%d%-%d%d%-%d%d$") then
date = substitute (cfg.presentation['nowrap1'], date);
elseif date:match("%a+%s*%d%d?,%s*%d%d%d%d") or date:match ("%d%d?%s*%a+%s*%d%d%d%d") then
cap, cap2 = string.match (date, "^(.*)%s+(%d%d%d%d)$");
date = substitute (cfg.presentation['nowrap2'], {cap, cap2});
end
return date;
end
--[[--------------------------< A M A Z O N >------------------------------------------------------------------
Formats a link to Amazon. Do simple error checking: asin must be mix of 10 numeric or uppercase alpha
characters. If a mix, first character must be uppercase alpha; if all numeric, asins must be 10-digit
isbn. If 10-digit isbn, add a maintenance category so a bot or awb script can replace |asin= with |isbn=.
Error message if not 10 characters, if not isbn10, if mixed and first character is a digit.
]]
function amazon(id, domain)
local err_cat = ""
if not id:match("^[%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u]$") then
err_cat = ' ' .. seterror ('bad_asin'); -- asin is not a mix of 10 uppercase alpha and numeric characters
else
if id:match("^%d%d%d%d%d%d%d%d%d%d$") then -- if 10-digit numeric
if checkisbn( id ) then -- see if asin value is isbn10
table.insert( z.maintenance_cats, "CS1 maint: ASIN uses ISBN"); -- add to maint category
elseif not is_set (err_cat) then
err_cat = ' ' .. seterror ('bad_asin'); -- asin is not isbn10
end
elseif not id:match("^%u[%d%u]+$") then
err_cat = ' ' .. seterror ('bad_asin'); -- asin doesn't begin with uppercase alpha
end
end
if not is_set(domain) then
domain = "com";
elseif inArray (domain, {'jp', 'uk'}) then -- Japan, United Kingdom
domain = "co." .. domain;
elseif inArray (domain, {'au', 'br', 'mx'}) then -- Australia, Brazil, Mexico
domain = "com." .. domain;
end
local handler = cfg.id_handlers['ASIN'];
return externallinkid({link = handler.link,
label=handler.label , prefix="//www.amazon."..domain.."/dp/",id=id,
encode=handler.encode, separator = handler.separator}) .. err_cat;
end
--[[
format and error check arXiv identifier. There are two valid forms of the identifier:
the first form, valid only between date codes 9108 and 0703 is:
arXiv:<archive>.<class>/<date code><number><version>
where:
<archive> is a string of alpha characters - may be hyphenated; no other punctuation
<class> is a string of alpha characters - may be hyphenated; no other punctuation
<date code> is four digits in the form YYMM where YY is the last two digits of the four-digit year and MM is the month number January = 01
first digit of YY for this form can only 9 and 0
<number> is a three-digit number
<version> is a 1 or more digit number preceded with a lowercase v; no spaces (undocumented)
the second form, valid from April 2007 is:
arXiv:<date code>.<number><version>
where:
<date code> is four digits in the form YYMM where YY is the last two digits of the four-digit year and MM is the month number January = 01
<number> is a four-digit number
<version> is a 1 or more digit number preceded with a lowercase v; no spaces
]]
function arxiv (id)
local handler = cfg.id_handlers['ARXIV'];
local year, month, version;
local err_cat = ""
year, month, version = id:match("^%a[%a%.%-]+/([90]%d)([01]%d)%d%d%d([v%d]*)$"); -- test for the 9108-0703 format
if not year then -- arXiv id is not proper 9108-0703 form
year, month, version = id:match("^(%d%d)([01]%d)%.%d%d%d%d([v%d]*)$"); -- test for the 0704- format
if not year then
err_cat = ' ' .. seterror( 'bad_arxiv' ); -- arXiv id doesn't match either format
else -- id is the 0704- format
year = tonumber(year);
month = tonumber(month);
if ((7 > year) or (1 > month and 12 < month)) or -- is year invalid or is month invalid? (doesn't test for future years)
((7 == year) and (4 > month)) or -- when year is 07, is month invalid (before April)?
is_set (version) and nil == version:match("v%d+") then -- is version proper format of single 'v' followed by digits?
err_cat = ' ' .. seterror( 'bad_arxiv' ); -- set error message
end
end
else -- id is the 9108-0703 format; are the date values ok
year = tonumber(year);
month = tonumber(month);
if ((91 > year and 7 < year) or (1 > month and 12 < month)) or -- if invalid year or invalid month
((91 == year and 8 > month) or (7 == year and 3 < month)) or -- if years ok, are starting and ending months ok?
is_set (version) and nil == version:match("v%d+") then -- is version proper format of single 'v' followed by digits?
err_cat = ' ' .. seterror( 'bad_arxiv' ); -- set error message
end
end
return externallinkid({link = handler.link, label = handler.label,
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) .. err_cat;
end
--[[
lccn normalization (http://www.loc.gov/marc/lccn-namespace.html#normalization)
1. Remove all blanks.
2. If there is a forward slash (/) in the string, remove it, and remove all characters to the right of the forward slash.
3. If there is a hyphen in the string:
a. Remove it.
b. Inspect the substring following (to the right of) the (removed) hyphen. Then (and assuming that steps 1 and 2 have been carried out):
1. All these characters should be digits, and there should be six or less. (not done in this function)
2. If the length of the substring is less than 6, left-fill the substring with zeroes until the length is six.
Returns a normalized lccn for lccn() to validate. There is no error checking (step 3.b.1) performed in this function.
]]
function normalize_lccn (lccn)
lccn = lccn:gsub ("%s", ""); -- 1. strip whitespace
if nil ~= string.find (lccn,'/') then
lccn = lccn:match ("(.-)/"); -- 2. remove forward slash and all character to the right of it
end
local prefix
local suffix
prefix, suffix = lccn:match ("(.+)%-(.+)"); -- 3.a remove hyphen by splitting the string into prefix and suffix
if nil ~= suffix then -- if there was a hyphen
suffix=string.rep("0", 6-string.len (suffix)) .. suffix; -- 3.b.2 left fill the suffix with 0s if suffix length less than 6
lccn=prefix..suffix; -- reassemble the lccn
end
return lccn;
end
--[[
Format LCCN link and do simple error checking. LCCN is a character string 8-12 characters long. The length of the LCCN dictates the character type of the first 1-3 characters; the
rightmost eight are always digits. http://info-uri.info/registry/OAIHandler?verb=GetRecord&metadataPrefix=reg&identifier=info:lccn/
length = 8 then all digits
length = 9 then lccn[1] is lower case alpha
length = 10 then lccn[1] and lccn[2] are both lower case alpha or both digits
length = 11 then lccn[1] is lower case alpha, lccn[2] and lccn[3] are both lower case alpha or both digits
length = 12 then lccn[1] and lccn[2] are both lower case alpha
]]
function lccn(lccn)
local handler = cfg.id_handlers['LCCN'];
local err_cat = ''; -- presume that LCCN is valid
local id = lccn; -- local copy of the lccn
id = normalize_lccn (id); -- get canonical form (no whitespace, hyphens, forward slashes)
local len = id:len(); -- get the length of the lccn
if 8 == len then
if id:match("[^%d]") then -- if LCCN has anything but digits (nil if only digits)
err_cat = ' ' .. seterror( 'bad_lccn' ); -- set an error message
end
elseif 9 == len then -- LCCN should be adddddddd
if nil == id:match("%l%d%d%d%d%d%d%d%d") then -- does it match our pattern?
err_cat = ' ' .. seterror( 'bad_lccn' ); -- set an error message
end
elseif 10 == len then -- LCCN should be aadddddddd or dddddddddd
if id:match("[^%d]") then -- if LCCN has anything but digits (nil if only digits) ...
if nil == id:match("^%l%l%d%d%d%d%d%d%d%d") then -- ... see if it matches our pattern
err_cat = ' ' .. seterror( 'bad_lccn' ); -- no match, set an error message
end
end
elseif 11 == len then -- LCCN should be aaadddddddd or adddddddddd
if not (id:match("^%l%l%l%d%d%d%d%d%d%d%d") or id:match("^%l%d%d%d%d%d%d%d%d%d%d")) then -- see if it matches one of our patterns
err_cat = ' ' .. seterror( 'bad_lccn' ); -- no match, set an error message
end
elseif 12 == len then -- LCCN should be aadddddddddd
if not id:match("^%l%l%d%d%d%d%d%d%d%d%d%d") then -- see if it matches our pattern
err_cat = ' ' .. seterror( 'bad_lccn' ); -- no match, set an error message
end
else
err_cat = ' ' .. seterror( 'bad_lccn' ); -- wrong length, set an error message
end
if not is_set (err_cat) and nil ~= lccn:find ('%s') then
err_cat = ' ' .. seterror( 'bad_lccn' ); -- lccn contains a space, set an error message
end
return externallinkid({link = handler.link, label = handler.label,
prefix=handler.prefix,id=lccn,separator=handler.separator, encode=handler.encode}) .. err_cat;
end
--[[
Format PMID and do simple error checking. PMIDs are sequential numbers beginning at 1 and counting up. This code checks the PMID to see that it
contains only digits and is less than test_limit; the value in local variable test_limit will need to be updated periodically as more PMIDs are issued.
]]
function pmid(id)
local test_limit = 30000000; -- update this value as PMIDs approach
local handler = cfg.id_handlers['PMID'];
local err_cat = ''; -- presume that PMID is valid
if id:match("[^%d]") then -- if PMID has anything but digits
err_cat = ' ' .. seterror( 'bad_pmid' ); -- set an error message
else -- PMID is only digits
local id_num = tonumber(id); -- convert id to a number for range testing
if 1 > id_num or test_limit < id_num then -- if PMID is outside test limit boundaries
err_cat = ' ' .. seterror( 'bad_pmid' ); -- set an error message
end
end
return externallinkid({link = handler.link, label = handler.label,
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) .. err_cat;
end
--[[
Determines if a PMC identifier's online version is embargoed. Compares the date in |embargo= against today's date. If embargo date is
in the future, returns true; otherwise, returns false because the embargo has expired or |embargo= not set in this cite.
]]
function is_embargoed(embargo)
if is_set(embargo) then
local lang = mw.getContentLanguage();
local good1, embargo_date, good2, todays_date;
good1, embargo_date = pcall( lang.formatDate, lang, 'U', embargo );
good2, todays_date = pcall( lang.formatDate, lang, 'U' );
if good1 and good2 and tonumber( embargo_date ) >= tonumber( todays_date ) then --is embargo date is in the future?
return true; -- still embargoed
end
end
return false; -- embargo expired or |embargo= not set
end
--[[
Format a PMC, do simple error checking, and check for embargoed articles.
The embargo parameter takes a date for a value. If the embargo date is in the future
the PMC identifier will not be linked to the article. If the embargo specifies a date in the past, or if it is empty or omitted, then
the PMC identifier is linked to the article through the link at cfg.id_handlers['PMC'].prefix.
PMCs are sequential numbers beginning at 1 and counting up. This code checks the PMC to see that it contains only digits and is less
than test_limit; the value in local variable test_limit will need to be updated periodically as more PMCs are issued.
]]
function pmc(id, embargo)
local test_limit = 5000000; -- update this value as PMCs approach
local handler = cfg.id_handlers['PMC'];
local err_cat = ''; -- presume that PMC is valid
local text;
if id:match("[^%d]") then -- if PMC has anything but digits
err_cat = ' ' .. seterror( 'bad_pmc' ); -- set an error message
else -- PMC is only digits
local id_num = tonumber(id); -- convert id to a number for range testing
if 1 > id_num or test_limit < id_num then -- if PMC is outside test limit boundaries
err_cat = ' ' .. seterror( 'bad_pmc' ); -- set an error message
end
end
if is_embargoed(embargo) then
text="[[" .. handler.link .. "|" .. handler.label .. "]]:" .. handler.separator .. id .. err_cat; --still embargoed so no external link
else
text = externallinkid({link = handler.link, label = handler.label, --no embargo date, ok to link to article
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) .. err_cat;
end
return text;
end
-- Formats a DOI and checks for DOI errors.
-- DOI names contain two parts: prefix and suffix separated by a forward slash.
-- Prefix: directory indicator '10.' followed by a registrant code
-- Suffix: character string of any length chosen by the registrant
-- This function checks a DOI name for: prefix/suffix. If the doi name contains spaces or endashes,
-- or, if it ends with a period or a comma, this function will emit a bad_doi error message.
-- DOI names are case-insensitive and can incorporate any printable Unicode characters so the test for spaces, endash,
-- and terminal punctuation may not be technically correct but it appears, that in practice these characters are rarely if ever used in doi names.
function doi(id, inactive)
local cat = ""
local handler = cfg.id_handlers['DOI'];
local text;
if is_set(inactive) then
local inactive_year = inactive:match("%d%d%d%d") or ''; -- try to get the year portion from the inactive date
text = "[[" .. handler.link .. "|" .. handler.label .. "]]:" .. id;
if is_set(inactive_year) then
table.insert( z.error_categories, "Pages with DOIs inactive since " .. inactive_year );
else
table.insert( z.error_categories, "Pages with inactive DOIs" ); -- when inactive doesn't contain a recognizable year
end
inactive = " (" .. cfg.messages['inactive'] .. " " .. inactive .. ")"
else
text = externallinkid({link = handler.link, label = handler.label,
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode})
inactive = ""
end
if nil == id:match("^10%.[^%s–]-/[^%s–]-[^%.,]$") then -- doi must begin with '10.', must contain a fwd slash, must not contain spaces or endashes, and must not end with period or comma
cat = ' ' .. seterror( 'bad_doi' );
end
return text .. inactive .. cat
end
-- Formats an OpenLibrary link, and checks for associated errors.
function openlibrary(id)
local code = id:match("^%d+([AMW])$"); -- only digits followed by 'A', 'M', or 'W'
local handler = cfg.id_handlers['OL'];
if ( code == "A" ) then
return externallinkid({link=handler.link, label=handler.label,
prefix="http://openlibrary.org/authors/OL",id=id, separator=handler.separator,
encode = handler.encode})
elseif ( code == "M" ) then
return externallinkid({link=handler.link, label=handler.label,
prefix="http://openlibrary.org/books/OL",id=id, separator=handler.separator,
encode = handler.encode})
elseif ( code == "W" ) then
return externallinkid({link=handler.link, label=handler.label,
prefix= "http://openlibrary.org/works/OL",id=id, separator=handler.separator,
encode = handler.encode})
else
return externallinkid({link=handler.link, label=handler.label,
prefix= "http://openlibrary.org/OL",id=id, separator=handler.separator,
encode = handler.encode}) ..
' ' .. seterror( 'bad_ol' );
end
end
--[[
Validate and format an issn. This code fixes the case where an editor has included an ISSN in the citation but has separated the two groups of four
digits with a space. When that condition occurred, the resulting link looked like this:
|issn=0819 4327 gives: [http://www.worldcat.org/issn/0819 4327 0819 4327] -- can't have spaces in an external link
This code now prevents that by inserting a hyphen at the issn midpoint. It also validates the issn for length and makes sure that the checkdigit agrees
with the calculated value. Incorrect length (8 digits), characters other than 0-9 and X, or checkdigit / calculated value mismatch will all cause a check issn
error message. The issn is always displayed with a hyphen, even if the issn was given as a single group of 8 digits.
]]
function issn(id)
local issn_copy = id; -- save a copy of unadulterated issn; use this version for display if issn does not validate
local handler = cfg.id_handlers['ISSN'];
local text;
local valid_issn = true;
id=id:gsub( "[%s-–]", "" ); -- strip spaces, hyphens, and endashes from the issn
if 8 ~= id:len() or nil == id:match( "^%d*X?$" ) then -- validate the issn: 8 digits long, containing only 0-9 or X in the last position
valid_issn=false; -- wrong length or improper character
else
valid_issn=is_valid_isxn(id, 8); -- validate issn
end
if true == valid_issn then
id = string.sub( id, 1, 4 ) .. "-" .. string.sub( id, 5 ); -- if valid, display correctly formatted version
else
id = issn_copy; -- if not valid, use the show the invalid issn with error message
end
text = externallinkid({link = handler.link, label = handler.label,
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode})
if false == valid_issn then
text = text .. ' ' .. seterror( 'bad_issn' ) -- add an error message if the issn is invalid
end
return text
end