Jump to content

Module:Sandbox/Tom.Reding/Tools

From Wikipedia, the free encyclopedia
This is an old revision of this page, as edited by Tom.Reding (talk | contribs) at 18:54, 14 February 2019 (disallow now). The present address (URL) is a permanent link to this revision, which may differ significantly from the current revision.

local p = {}

function p.fmttimestamp(frame)
	local ts = frame.args[1] --20190201223349
	local y  = string.sub(ts,1,4)
	local mo = string.sub(ts,5,6)
	local d  = string.sub(ts,7,8)
	local h  = string.sub(ts,9,10)
	local mi = string.sub(ts,11,12)
	local s  = string.sub(ts,13,14)
	return y..'-'..mo..'-'..d..'T'..h..':'..mi..':'..s --ISO 8601 format without time zone designator
end

function p.istaxon(frame)
-- climbs the taxon tree until a description is found
-- output:	[[enwiki child or QID]]	[[QID child]]	<instance of>	(incho/taxon/etc)	<taxon QID>	<taxon QID label>	<par description>:	<par description's animal type>
--     ex:	[[Orthalicus nobilis]]	[[d:Q49525664]]	Q16521			(taxon)				Q7432		species				genus of molluscs:	mollusc
	local resolveEntity = require( 'Module:ResolveEntityId' )
	local title = frame.args[1]
	local qid = resolveEntity._id(title)
	local item = mw.wikibase.getEntity(qid)
	local acceptableInstanceOf = {
		['Q16521'] = 'taxon',
		['Q310890'] = 'monotypic taxon',
		['Q2568288'] = 'ichnotaxon',
		['Q23038290'] = 'fossil taxon',
		['Q47487597'] = 'monotypic fossil taxon',
	}
	local allowNonTaxons = false --if false (default), display '(not a taxon)'; if true, allow clades, etc., as named in otherInstanceOf{}
	local otherInstanceOf = {
		['Q713623'] = 'clade',
	--	['Q4886'] = 'cultivar', --kind of a mess
	}
	local tab = '	'
	if item then
		
		--find child's rank
		local childRank, childRankLabel, species = nil
		local childRankState = item:getBestStatements('P105')[1] --taxon rank
		if childRankState then
			if childRankState.mainsnak.datavalue then
				childRank = childRankState.mainsnak.datavalue.value.id
				childRankLabel = mw.wikibase.getLabel(childRank)
				local speciesRanks = {
					['Q7432'] = 'species',
					['Q68947'] = 'subspecies',
					['Q4886'] = 'cultivar',
					['Q279749'] = 'form',
					['Q630771'] = 'subvariety',
					['Q767728'] = 'variety',
				}
				if speciesRanks[childRank] then
					species = 'species' --used later as a boolean
				end
			else
				childRank = 'missing taxon rank value'
				childRankLabel = '-'
			end
		end
		
		--find parent's description; if nil, search grandparent, etc., etc.
		local ancestorItem = item --initialize to child
		local parentDescription = tab..tab
		while parentDescription == tab..tab do
			local parentState = ancestorItem:getBestStatements('P171')[1] --parent taxon
			if parentState then
				local parentId = parentState.mainsnak.datavalue.value.id
				if parentId then
					parentDescription = mw.wikibase.getDescription(parentId)
					if parentDescription then
						local trim = mw.ustring.gsub(parentDescription, '^[%w%s%(%),]-%s+of%s+', '') --not greedy
						if species then
							trim = mw.ustring.gsub(trim, '^([%w]+)s([, ])', '%1%2')   --birds that ...; plants, guava
							trim = mw.ustring.gsub(trim, '^([a-z]+)s$', '%1')         --birds
							trim = mw.ustring.gsub(trim, '([^a-zA-Z][a-z]+)s$', '%1') --genus of supercaliflagulated birds
							trim = mw.ustring.gsub(trim, 's %(fossil', ' (fossil') --birds (fossil)
						--	trim = mw.ustring.gsub(trim, 's %(',  ' (')    --turn off if too liberal
							trim = mw.ustring.gsub(trim, 's of ', ' of ')  --triasic birds of the family...
							trim = mw.ustring.gsub(trim, 's in the ', ' in the ')  --triasic birds in the family...
							trim = mw.ustring.gsub(trim, 'algae', 'alga')
							trim = mw.ustring.gsub(trim, 'cactu$',      'cactus') --any way to combine these? %f didn't work as expected; lua's shitty regex implementation
							trim = mw.ustring.gsub(trim, 'cactu([^s])', 'cactus%1')
							trim = mw.ustring.gsub(trim, 'cactuse$',      'cactus')
							trim = mw.ustring.gsub(trim, 'cactuse([^s])', 'cactus%1')
							trim = mw.ustring.gsub(trim, 'carnivorou$',      'carnivorous')
							trim = mw.ustring.gsub(trim, 'carnivorou([^s])', 'carnivorous%1')
							trim = mw.ustring.gsub(trim, 'countrie$',      'countries')
							trim = mw.ustring.gsub(trim, 'countrie([^s])', 'countries%1')
							trim = mw.ustring.gsub(trim, 'citru$',      'citrus')
							trim = mw.ustring.gsub(trim, 'citru([^s])', 'citrus%1')
							trim = mw.ustring.gsub(trim, 'deciduou$',      'deciduous')
							trim = mw.ustring.gsub(trim, 'deciduou([^s])', 'deciduous%1')
							trim = mw.ustring.gsub(trim, '[eE]delweis$',      'edelweiss')
							trim = mw.ustring.gsub(trim, '[eE]delweis([^s])', 'edelweiss%1')
							trim = mw.ustring.gsub(trim, 'fishe$',      'fish')
							trim = mw.ustring.gsub(trim, 'fishe([^s])', 'fish%1')
							trim = mw.ustring.gsub(trim, 'fung[iu]$',      'fungus')
							trim = mw.ustring.gsub(trim, 'fung[iu]([^s])', 'fungus%1')
							trim = mw.ustring.gsub(trim, 'genu$',      'genus')
							trim = mw.ustring.gsub(trim, 'genu([^s])', 'genus%1')
							trim = mw.ustring.gsub(trim, 'herbaceou$',      'herbaceous')
							trim = mw.ustring.gsub(trim, 'herbaceou([^s])', 'herbaceous%1')
							trim = mw.ustring.gsub(trim, 'loache$',      'loach')
							trim = mw.ustring.gsub(trim, 'loache([^s])', 'loach%1')
							trim = mw.ustring.gsub(trim, 'mos$',      'moss')
							trim = mw.ustring.gsub(trim, 'mos([^st])', 'moss%1')
							trim = mw.ustring.gsub(trim, 'specie$',      'species')
							trim = mw.ustring.gsub(trim, 'specie([^s])', 'species%1')
							trim = mw.ustring.gsub(trim, 'viruse$',      'virus')
							trim = mw.ustring.gsub(trim, 'viruse([^s])', 'virus%1')
						end
						if trim == parentDescription then trim = '' end
						parentDescription = tab..parentDescription..':'..tab..trim
					else
						ancestorItem = mw.wikibase.getEntity(parentId)
						parentDescription = tab..tab
					end
				else
					parentDescription = tab..'parent missing ID??'..tab
				end
			else
				parentDescription = tab..'no parent taxon/P171'..tab
			end
		end --while
		
		--test child for instance of: taxon
		local j = 0
		for i, instanceOfState in pairs ( item:getBestStatements('P31') ) do --instance of
			local instanceOf = instanceOfState.mainsnak.datavalue.value.id
			if acceptableInstanceOf[instanceOf] or 
			  (acceptableInstanceOf[instanceOf] == nil and otherInstanceOf[instanceOf] and allowNonTaxons == true) then
				local instanceOfLabel = mw.wikibase.getLabel(instanceOf)
				local title_qid = '[['..title..']]'..tab..'[[d:'..qid..'|'..qid..']]'
				if childRank == nil then childRank = 'UNRANKED' end
				if childRankLabel == nil then 
					if childRank == 'UNRANKED' then childRankLabel = 'UNRANKED' 
					else childRankLabel = 'MISSING LABEL' end
				end
				local cRank_cLabel_pDescrip = childRank..tab..childRankLabel..parentDescription
				if instanceOfLabel == 'taxon' then
					return title_qid..tab..instanceOf..tab..'('..instanceOfLabel..')'..tab..cRank_cLabel_pDescrip
					
				elseif instanceOfLabel == 'monotypic taxon' then --force 'Q16521' output (taxon)
					return title_qid..tab..'Q16521'..tab..'('..instanceOfLabel..')'..tab..cRank_cLabel_pDescrip
					
				elseif instanceOfLabel == 'monotypic fossil taxon' then --force 'Q23038290' output (fossil taxon)
					return title_qid..tab..'Q23038290'..tab..'('..instanceOfLabel..')'..tab..cRank_cLabel_pDescrip
					
				else --ichno or polytypic fossil
					return title_qid..tab..instanceOf..tab..'('..instanceOfLabel..')'..tab..cRank_cLabel_pDescrip
				end
			end
			j = i
		end --for
		if j > 0 then
			return '[['..title..']]'..tab..'[[d:'..qid..'|'..qid..']]'..tab..'instanceOf:'..tab..'(not a taxon)'
		else
			return '[['..title..']]'..tab..'[[d:'..qid..'|'..qid..']]'..tab..'instanceOf:'..tab..'(not a taxon? no instanceOf)'
		end
		
	else
		if qid then
			return '[['..title..']]'..tab..'[[d:'..qid..'|'..qid..']]'..tab..'item'..tab..'(not a taxon? item lookup failed)'
		else
			return '[['..title.. ']]'..tab..''..'Q#'..tab..'not on Wikidata OR no sitelink'
		end
	end
end

function p.getTaxonRank(frame)
	local resolveEntity = require( 'Module:ResolveEntityId' )
	local title = frame.args[1]
	local qid = resolveEntity._id(title)
	local item = mw.wikibase.getEntity(qid)
	local taxonRanks = {
		['Q7432'] = 'species',
		['Q34740'] = 'genus',
		['Q35409'] = 'family',
		['Q36602'] = 'order',
		['Q37517'] = 'class',
		['Q38348'] = 'phylum',
		['Q2007442'] = 'infraclass',
		['Q2136103'] = 'superfamily',
		['Q227936'] = 'tribe',
		['Q2455704'] = 'subfamily',
		['Q2889003'] = 'infraorder',
		['Q3238261'] = 'subgenus',
		['Q5867051'] = 'subclass',
		['Q5867959'] = 'suborder',
		['Q5868144'] = 'superorder',
		['Q68947'] = 'subspecies',
	}
	
	if qid then
		if item then
			local tab = '&#09;'
			local rankState = item:getBestStatements('P105')[1] --taxon rank
			if rankState then
				local rank = rankState.mainsnak.datavalue.value.id
				if rank then
					if taxonRanks[rank] then return taxonRanks[rank]
					else return rank end
				else return 'No rank found' end
			else
				return 'rankState not found'
			end
		else
			return 'Item not found'
		end
	else
		return 'QID not found'
	end
end

return p