Jump to content

Module:Sandbox/Tom.Reding/Tools

From Wikipedia, the free encyclopedia
This is an old revision of this page, as edited by Tom.Reding (talk | contribs) at 15:28, 18 February 2019 (Modify output based on presence/absence of oldChildDescription). The present address (URL) is a permanent link to this revision, which may differ significantly from the current revision.

local p = {}

function p.fmttimestamp(frame)
	local ts = frame.args[1] --20190201223349
	local y  = string.sub(ts,1,4)
	local mo = string.sub(ts,5,6)
	local d  = string.sub(ts,7,8)
	local h  = string.sub(ts,9,10)
	local mi = string.sub(ts,11,12)
	local s  = string.sub(ts,13,14)
	return y..'-'..mo..'-'..d..'T'..h..':'..mi..':'..s --ISO 8601 format without time zone designator
end

function p.istaxon(frame)
-- climbs the taxon tree until a description is found
-- prefix 'c' = child, 'p' = parent
-- output:	[[cSitelink or cQID]]	[[cQID]]		<cI/O>	(cI/O lbl)	<cTaxonQID>	<cTaxon QID label>	[[pQID|pDescription]]:				<pDescription's animal type>
--     ex:	[[Orthalicus nobilis]]	[[d:Q49525664]]	Q16521	(taxon)		Q7432		species				[[d:Q7104378|genus of molluscs]]:	mollusc
	local resolveEntity = require( 'Module:ResolveEntityId' )
	local title = frame.args[1]
	local qid = resolveEntity._id(title)
	local item = mw.wikibase.getEntity(qid)
	local acceptableInstanceOf = {
		['Q16521'] = 'taxon',
		['Q310890'] = 'monotypic taxon',
		['Q2568288'] = 'ichnotaxon',
		['Q23038290'] = 'fossil taxon',
		['Q47487597'] = 'monotypic fossil taxon',
	}
	local allowNonTaxons = false --if false (default), display '(not a taxon)';
	local otherInstanceOf = {    --if true, allow clades, etc., as named in otherInstanceOf{}
		['Q713623'] = 'clade',
	--	['Q4886'] = 'cultivar', --kind of a mess
	}
	local tab = '	'
	if item then
		
		--find child's rank
		local childRank, childRankLabel, species = nil
		local childRankState = item:getBestStatements('P105')[1] --taxon rank
		if childRankState then
			if childRankState.mainsnak.datavalue then
				childRank = childRankState.mainsnak.datavalue.value.id
				childRankLabel = mw.wikibase.getLabel(childRank)
				local speciesRanks = {
					['Q7432'] = 'species',
					['Q68947'] = 'subspecies',
					['Q4886'] = 'cultivar',
					['Q279749'] = 'form',
					['Q630771'] = 'subvariety',
					['Q767728'] = 'variety',
					--these should all be plural (added here for reference):
				--	['Q3025161'] = 'series',
				--	['Q3181348'] = 'section',
				}
				if speciesRanks[childRank] then
					species = 'species' --used later as a boolean
				end
			else
				childRank = 'missing taxon rank value'
				childRankLabel = '-'
			end
		end
		
		--find parent's description; if nil, search grandparent, etc., etc.
		local ancestorItem = item --initialize to child
		local bothDescriptionsFmtd = tab..tab --parent's raw descrip + child's new suggested descrip
		local parentDescription = nil
		local newChildDescription = nil
		local oldChildDescription = mw.wikibase.getDescription(parentId) --different output if nil vs present
		if oldChildDescription == nil then
			while bothDescriptionsFmtd == tab..tab do
				local parentState = ancestorItem:getBestStatements('P171')[1] --parent taxon
				if parentState then
					local parentId = parentState.mainsnak.datavalue.value.id
					if parentId then
						parentDescription = mw.wikibase.getDescription(parentId)
						if parentDescription then
							local trim = mw.ustring.gsub(parentDescription, '^[%w%s%(%)%-,]-%s+of%s+', '') --trim to first ' of ' to remove taxon rank/monotypic
							if species then
								--remove 's' semi-liberally
								trim = mw.ustring.gsub(trim, '^([%w]+[^s])s([, ])', '%1%2')   --birdS that ...; plantS, guava
								trim = mw.ustring.gsub(trim, '^([a-z]+[^s])s$', '%1')         --birdS
								trim = mw.ustring.gsub(trim, '([^a-zA-Z][a-z]+[^s])s$', '%1') --song and dance birdS
								trim = mw.ustring.gsub(trim, '^([%w%-]+ [a-z]+[^s])s([, ])', '%1%2') --song birdS in ...
								trim = mw.ustring.gsub(trim, 's %(fossil', ' (fossil') --birdS (fossil)
							--	trim = mw.ustring.gsub(trim, 's %(',  ' (')    --turn off if too liberal
								trim = mw.ustring.gsub(trim, 's of ', ' of ')  --triasic birdS of the family...
								trim = mw.ustring.gsub(trim, 's in the ', ' in the ')  --triasic birdS in the family...
								--fix special cases
								trim = mw.ustring.gsub(trim, 'algae', 'alga')
								trim = mw.ustring.gsub(trim, 'cactu(%f[\0, ])', 'cactus%1') --only way to combine rules with Lua's shitty regex; %f doesn't recognize [^s]...
								trim = mw.ustring.gsub(trim, 'cactuse(%f[\0, ])', 'cactus%1')
								trim = mw.ustring.gsub(trim, 'carnivorou(%f[\0, ])', 'carnivorous%1')
								trim = mw.ustring.gsub(trim, 'countrie(%f[\0, ])', 'countries%1')
								trim = mw.ustring.gsub(trim, 'citru(%f[\0, ])', 'citrus%1')
								trim = mw.ustring.gsub(trim, 'crocu(%f[\0, ])', 'crocus%1')
								trim = mw.ustring.gsub(trim, 'deciduou(%f[\0, ])', 'deciduous%1')
								trim = mw.ustring.gsub(trim, '[eE]delweis(%f[\0, ])', 'edelweiss%1')
								trim = mw.ustring.gsub(trim, 'fishe(%f[\0, ])', 'fish%1')
								trim = mw.ustring.gsub(trim, 'flightles(%f[\0, ])', 'flightless%1')
								trim = mw.ustring.gsub(trim, 'fung[iu](%f[\0, ])', 'fungus%1')
								trim = mw.ustring.gsub(trim, '[gG]enu(%f[\0, ])', 'genus%1')
								trim = mw.ustring.gsub(trim, '[gG]ras(%f[\0, ])', 'grass%1')
								trim = mw.ustring.gsub(trim, '[gG]rasse(%f[\0, ])', 'grass%1')
								trim = mw.ustring.gsub(trim, 'herbaceou(%f[\0, ])', 'herbaceous%1')
								trim = mw.ustring.gsub(trim, 'herbivorou(%f[\0, ])', 'herbivorous%1')
								trim = mw.ustring.gsub(trim, '[iI]ri(%f[\0, ])', 'iris%1')
								trim = mw.ustring.gsub(trim, 'loache(%f[\0, ])', 'loach%1')
								trim = mw.ustring.gsub(trim, 'mos(%f[\0, ])', 'moss%1')
								trim = mw.ustring.gsub(trim, 'mosse(%f[\0, ])', 'moss%1')
								trim = mw.ustring.gsub(trim, '[sS]pecie(%f[\0, ])', 'species%1')
								trim = mw.ustring.gsub(trim, 'venomou(%f[\0, ])', 'venomous%1')
								trim = mw.ustring.gsub(trim, 'viruse(%f[\0, ])', 'virus%1')
							end
							if trim == parentDescription then trim = '' end --nothing changed, so no need to duplicate it
							newChildDescription = trim --used 'trim' just b/c it's shorter
							bothDescriptionsFmtd = tab..'[[d:'..parentId..'|'..parentDescription..']]:'..tab..newChildDescription
						else
							ancestorItem = mw.wikibase.getEntity(parentId)
						end
					else
						bothDescriptionFmtds = tab..'parent missing ID??'..tab --probably not a valid falure mode, but jic...
					end
				else
					bothDescriptionsFmtd = tab..'no parent taxon/P171'..tab
				end
			end --while
		end --if oldChildDescription == nil
		
		--test child for instance of: taxon
		local j = 0
		for i, instanceOfState in pairs ( item:getBestStatements('P31') ) do --child's instance of
			local instanceOf = instanceOfState.mainsnak.datavalue.value.id
			if acceptableInstanceOf[instanceOf] or 
			  (acceptableInstanceOf[instanceOf] == nil and otherInstanceOf[instanceOf] and allowNonTaxons == true) then
				
				--extinct/fossil handling in description
				local instanceOfLabel = mw.wikibase.getLabel(instanceOf)
				if oldChildDescription == nil then
					--only append ' (fossil)', when needed, to items missing descrip;
					--retroactively making existing descriptions match their 'instance of taxon rank' is a different/much larger job
					if instanceOfLabel == 'fossil taxon' or instanceOfLabel == 'monotypic fossil taxon' then
						local childSaysFossil = string.match(newChildDescription, 'extinct') or
												string.match(newChildDescription, 'dinosaur') or
												string.match(newChildDescription, 'fossil')
						if childSaysFossil == nil then
							newChildDescription = newChildDescription..' (fossil)' --housekeeping
							bothDescriptionsFmtd = bothDescriptionsFmtd..' (fossil)' --goes to output
						end
					end
				else
					bothDescriptionsFmtd = tab..'child has description: '..oldChildDescription..tab
				end
				
				--output for successful items
				local title_qid = '[['..title..']]'..tab..'[[d:'..qid..'|'..qid..']]'
				if childRank == nil then childRank = 'UNRANKED' end
				if childRankLabel == nil then 
					if childRank == 'UNRANKED' then childRankLabel = 'UNRANKED' 
					else childRankLabel = 'MISSING LABEL' end
				end
				local cRank_cLabel_pDescrip = childRank..tab..childRankLabel..bothDescriptionsFmtd
				if instanceOfLabel == 'taxon' then
					return title_qid..tab..instanceOf..tab..'('..instanceOfLabel..')'..tab..cRank_cLabel_pDescrip
					
				elseif instanceOfLabel == 'monotypic taxon' then --force 'Q16521' output (taxon)
					return title_qid..tab..'Q16521'..tab..'('..instanceOfLabel..')'..tab..cRank_cLabel_pDescrip
					
				elseif instanceOfLabel == 'monotypic fossil taxon' then --force 'Q23038290' output (fossil taxon)
					return title_qid..tab..'Q23038290'..tab..'('..instanceOfLabel..')'..tab..cRank_cLabel_pDescrip
					
				else --ichno or polytypic fossil
					return title_qid..tab..instanceOf..tab..'('..instanceOfLabel..')'..tab..cRank_cLabel_pDescrip
				end
			end
			j = i
		end --for
		--output for failed items
		if j > 0 then
			return '[['..title..']]'..tab..'[[d:'..qid..'|'..qid..']]'..tab..'instanceOf:'..tab..'(not a taxon)'
		else
			return '[['..title..']]'..tab..'[[d:'..qid..'|'..qid..']]'..tab..'instanceOf:'..tab..'(not a taxon? no instanceOf)'
		end
		
	else
		--output for failed items
		if qid then
			return '[['..title..']]'..tab..'[[d:'..qid..'|'..qid..']]'..tab..'item'..tab..'(not a taxon? item lookup failed)'
		else
			return '[['..title.. ']]'..tab..''..'Q#'..tab..'not on Wikidata OR no sitelink'
		end
	end
end

function p.getTaxonRank(frame)
	local resolveEntity = require( 'Module:ResolveEntityId' )
	local title = frame.args[1]
	local qid = resolveEntity._id(title)
	local item = mw.wikibase.getEntity(qid)
	local taxonRanks = {
		['Q7432'] = 'species',
		['Q34740'] = 'genus',
		['Q35409'] = 'family',
		['Q36602'] = 'order',
		['Q37517'] = 'class',
		['Q38348'] = 'phylum',
		['Q2007442'] = 'infraclass',
		['Q2136103'] = 'superfamily',
		['Q227936'] = 'tribe',
		['Q2455704'] = 'subfamily',
		['Q2889003'] = 'infraorder',
		['Q3238261'] = 'subgenus',
		['Q5867051'] = 'subclass',
		['Q5867959'] = 'suborder',
		['Q5868144'] = 'superorder',
		['Q68947'] = 'subspecies',
	}
	
	if qid then
		if item then
			local tab = '&#09;'
			local rankState = item:getBestStatements('P105')[1] --taxon rank
			if rankState then
				local rank = rankState.mainsnak.datavalue.value.id
				if rank then
					if taxonRanks[rank] then return taxonRanks[rank]
					else return rank end
				else return 'No rank found' end
			else
				return 'rankState not found'
			end
		else
			return 'Item not found'
		end
	else
		return 'QID not found'
	end
end

return p