Jump to content

Module:Sandbox/Tom.Reding/Tools

From Wikipedia, the free encyclopedia
This is an old revision of this page, as edited by Tom.Reding (talk | contribs) at 21:22, 16 February 2019 (grasse). The present address (URL) is a permanent link to this revision, which may differ significantly from the current revision.

local p = {}

function p.fmttimestamp(frame)
	local ts = frame.args[1] --20190201223349
	local y  = string.sub(ts,1,4)
	local mo = string.sub(ts,5,6)
	local d  = string.sub(ts,7,8)
	local h  = string.sub(ts,9,10)
	local mi = string.sub(ts,11,12)
	local s  = string.sub(ts,13,14)
	return y..'-'..mo..'-'..d..'T'..h..':'..mi..':'..s --ISO 8601 format without time zone designator
end

function p.istaxon(frame)
-- climbs the taxon tree until a description is found
-- assumes child is missing description
-- output:	[[enwiki child or QID]]	[[QID child]]	<instance of>	(incho/taxon/etc)	<taxon QID>	<taxon QID label>	[[par QID|description]]:			<par description's animal type>
--     ex:	[[Orthalicus nobilis]]	[[d:Q49525664]]	Q16521			(taxon)				Q7432		species				[[d:Q7104378|genus of molluscs]]:	mollusc
	local resolveEntity = require( 'Module:ResolveEntityId' )
	local title = frame.args[1]
	local qid = resolveEntity._id(title)
	local item = mw.wikibase.getEntity(qid)
	local acceptableInstanceOf = {
		['Q16521'] = 'taxon',
		['Q310890'] = 'monotypic taxon',
		['Q2568288'] = 'ichnotaxon',
		['Q23038290'] = 'fossil taxon',
		['Q47487597'] = 'monotypic fossil taxon',
	}
	local allowNonTaxons = false --if false (default), display '(not a taxon)';
	local otherInstanceOf = {    --if true, allow clades, etc., as named in otherInstanceOf{}
		['Q713623'] = 'clade',
	--	['Q4886'] = 'cultivar', --kind of a mess
	}
	local tab = '	'
	if item then
		
		--find child's rank
		local childRank, childRankLabel, species = nil
		local childRankState = item:getBestStatements('P105')[1] --taxon rank
		if childRankState then
			if childRankState.mainsnak.datavalue then
				childRank = childRankState.mainsnak.datavalue.value.id
				childRankLabel = mw.wikibase.getLabel(childRank)
				local speciesRanks = {
					['Q7432'] = 'species',
					['Q68947'] = 'subspecies',
					['Q4886'] = 'cultivar',
					['Q279749'] = 'form',
					['Q630771'] = 'subvariety',
					['Q767728'] = 'variety',
					--these should all be plural (added here for reference):
				--	['Q3025161'] = 'series',
				--	['Q3181348'] = 'section',
				}
				if speciesRanks[childRank] then
					species = 'species' --used later as a boolean
				end
			else
				childRank = 'missing taxon rank value'
				childRankLabel = '-'
			end
		end
		
		--find parent's description; if nil, search grandparent, etc., etc.
		local ancestorItem = item --initialize to child
		local parentDescription = nil
		local childDescription = nil
		local bothDescriptionsFmtd = tab..tab
		while bothDescriptionsFmtd == tab..tab do
			local parentState = ancestorItem:getBestStatements('P171')[1] --parent taxon
			if parentState then
				local parentId = parentState.mainsnak.datavalue.value.id
				if parentId then
					parentDescription = mw.wikibase.getDescription(parentId)
					if parentDescription then
						local trim = mw.ustring.gsub(parentDescription, '^[%w%s%(%)%-,]-%s+of%s+', '') --trim to first ' of '
						if species then
							--remove 's' semi-liberally
							trim = mw.ustring.gsub(trim, '^([%w]+)s([, ])', '%1%2')   --birdS that ...; plantS, guava
							trim = mw.ustring.gsub(trim, '^([a-z]+)s$', '%1')         --birdS
							trim = mw.ustring.gsub(trim, '([^a-zA-Z][a-z]+)s$', '%1') --song and dance birdS
							trim = mw.ustring.gsub(trim, '^([%w]+ [a-z]+)s([, ])', '%1%2') --song birdS in ...
							trim = mw.ustring.gsub(trim, 's %(fossil', ' (fossil') --birdS (fossil)
						--	trim = mw.ustring.gsub(trim, 's %(',  ' (')    --turn off if too liberal
							trim = mw.ustring.gsub(trim, 's of ', ' of ')  --triasic birdS of the family...
							trim = mw.ustring.gsub(trim, 's in the ', ' in the ')  --triasic birdS in the family...
							--fix special cases
							trim = mw.ustring.gsub(trim, 'algae', 'alga')
							trim = mw.ustring.gsub(trim, 'cactu(%f[\0, ])', 'cactus%1') --only way to combine rules with Lua's shitty regex, and %f doesn't recognize [^s]...
							trim = mw.ustring.gsub(trim, 'cactuse(%f[\0, ])', 'cactus%1')
							trim = mw.ustring.gsub(trim, 'carnivorou(%f[\0, ])', 'carnivorous%1')
							trim = mw.ustring.gsub(trim, 'countrie(%f[\0, ])', 'countries%1')
							trim = mw.ustring.gsub(trim, 'citru(%f[\0, ])', 'citrus%1')
							trim = mw.ustring.gsub(trim, 'crocu(%f[\0, ])', 'crocus%1')
							trim = mw.ustring.gsub(trim, 'deciduou(%f[\0, ])', 'deciduous%1')
							trim = mw.ustring.gsub(trim, '[eE]delweis(%f[\0, ])', 'edelweiss%1')
							trim = mw.ustring.gsub(trim, 'fishe(%f[\0, ])', 'fish%1')
							trim = mw.ustring.gsub(trim, 'flightles(%f[\0, ])', 'flightless%1')
							trim = mw.ustring.gsub(trim, 'fung[iu](%f[\0, ])', 'fungus%1')
							trim = mw.ustring.gsub(trim, 'genu(%f[\0, ])', 'genus%1')
							trim = mw.ustring.gsub(trim, '[gG]ras(%f[\0, ])', 'grass%1')
							trim = mw.ustring.gsub(trim, '[gG]rasse(%f[\0, ])', 'grass%1')
							trim = mw.ustring.gsub(trim, 'herbaceou(%f[\0, ])', 'herbaceous%1')
							trim = mw.ustring.gsub(trim, 'herbivorou(%f[\0, ])', 'herbivorous%1')
							trim = mw.ustring.gsub(trim, 'loache(%f[\0, ])', 'loach%1')
							trim = mw.ustring.gsub(trim, 'mos(%f[\0, ])', 'moss%1')
							trim = mw.ustring.gsub(trim, 'mosse(%f[\0, ])', 'moss%1')
							trim = mw.ustring.gsub(trim, 'specie(%f[\0, ])', 'species%1')
							trim = mw.ustring.gsub(trim, 'venomou(%f[\0, ])', 'venomous%1')
							trim = mw.ustring.gsub(trim, 'viruse(%f[\0, ])', 'virus%1')
						end
						if trim == parentDescription then trim = '' end --nothing changed, so no need to duplicate it
						childDescription = trim --used 'trim' just b/c it's shorter
						bothDescriptionsFmtd = tab..'[[d:'..parentId..'|'..parentDescription..']]:'..tab..childDescription
					else
						ancestorItem = mw.wikibase.getEntity(parentId)
					end
				else
					bothDescriptionFmtds = tab..'parent missing ID??'..tab --probably not a valid falure mode, but jic...
				end
			else
				bothDescriptionsFmtd = tab..'no parent taxon/P171'..tab
			end
		end --while
		
		--test child for instance of: taxon
		local j = 0
		for i, instanceOfState in pairs ( item:getBestStatements('P31') ) do --child's instance of
			local instanceOf = instanceOfState.mainsnak.datavalue.value.id
			if acceptableInstanceOf[instanceOf] or 
			  (acceptableInstanceOf[instanceOf] == nil and otherInstanceOf[instanceOf] and allowNonTaxons == true) then
				
				--extinct/fossil handling in description
				local instanceOfLabel = mw.wikibase.getLabel(instanceOf)
				if instanceOfLabel == 'fossil taxon' or instanceOfLabel == 'monotypic fossil taxon' then
					local childSaysFossil = string.match(childDescription, 'extinct') or
											string.match(childDescription, 'dinosaur') or
											string.match(childDescription, 'fossil')
					if childSaysFossil == nil then
						childDescription = childDescription..' (fossil)' --housekeeping
						bothDescriptionsFmtd = bothDescriptionsFmtd..' (fossil)' --goes to output
					end
				end
				
				--output for successful items
				local title_qid = '[['..title..']]'..tab..'[[d:'..qid..'|'..qid..']]'
				if childRank == nil then childRank = 'UNRANKED' end
				if childRankLabel == nil then 
					if childRank == 'UNRANKED' then childRankLabel = 'UNRANKED' 
					else childRankLabel = 'MISSING LABEL' end
				end
				local cRank_cLabel_pDescrip = childRank..tab..childRankLabel..bothDescriptionsFmtd
				if instanceOfLabel == 'taxon' then
					return title_qid..tab..instanceOf..tab..'('..instanceOfLabel..')'..tab..cRank_cLabel_pDescrip
					
				elseif instanceOfLabel == 'monotypic taxon' then --force 'Q16521' output (taxon)
					return title_qid..tab..'Q16521'..tab..'('..instanceOfLabel..')'..tab..cRank_cLabel_pDescrip
					
				elseif instanceOfLabel == 'monotypic fossil taxon' then --force 'Q23038290' output (fossil taxon)
					return title_qid..tab..'Q23038290'..tab..'('..instanceOfLabel..')'..tab..cRank_cLabel_pDescrip
					
				else --ichno or polytypic fossil
					return title_qid..tab..instanceOf..tab..'('..instanceOfLabel..')'..tab..cRank_cLabel_pDescrip
				end
			end
			j = i
		end --for
		--output for failed items
		if j > 0 then
			return '[['..title..']]'..tab..'[[d:'..qid..'|'..qid..']]'..tab..'instanceOf:'..tab..'(not a taxon)'
		else
			return '[['..title..']]'..tab..'[[d:'..qid..'|'..qid..']]'..tab..'instanceOf:'..tab..'(not a taxon? no instanceOf)'
		end
		
	else
		--output for failed items
		if qid then
			return '[['..title..']]'..tab..'[[d:'..qid..'|'..qid..']]'..tab..'item'..tab..'(not a taxon? item lookup failed)'
		else
			return '[['..title.. ']]'..tab..''..'Q#'..tab..'not on Wikidata OR no sitelink'
		end
	end
end

function p.getTaxonRank(frame)
	local resolveEntity = require( 'Module:ResolveEntityId' )
	local title = frame.args[1]
	local qid = resolveEntity._id(title)
	local item = mw.wikibase.getEntity(qid)
	local taxonRanks = {
		['Q7432'] = 'species',
		['Q34740'] = 'genus',
		['Q35409'] = 'family',
		['Q36602'] = 'order',
		['Q37517'] = 'class',
		['Q38348'] = 'phylum',
		['Q2007442'] = 'infraclass',
		['Q2136103'] = 'superfamily',
		['Q227936'] = 'tribe',
		['Q2455704'] = 'subfamily',
		['Q2889003'] = 'infraorder',
		['Q3238261'] = 'subgenus',
		['Q5867051'] = 'subclass',
		['Q5867959'] = 'suborder',
		['Q5868144'] = 'superorder',
		['Q68947'] = 'subspecies',
	}
	
	if qid then
		if item then
			local tab = '&#09;'
			local rankState = item:getBestStatements('P105')[1] --taxon rank
			if rankState then
				local rank = rankState.mainsnak.datavalue.value.id
				if rank then
					if taxonRanks[rank] then return taxonRanks[rank]
					else return rank end
				else return 'No rank found' end
			else
				return 'rankState not found'
			end
		else
			return 'Item not found'
		end
	else
		return 'QID not found'
	end
end

return p