Jump to content

Module:Sandbox/trappist the monk/taxonomy

Permanently protected module
From Wikipedia, the free encyclopedia
This is an old revision of this page, as edited by Trappist the monk (talk | contribs) at 00:24, 25 October 2021. The present address (URL) is a permanent link to this revision, which may differ significantly from the current revision.
require('Module:No globals');

--[[--------------------------< T A X O M A P >----------------------------------------------------------------

this is a table of sequence tables that each list the first and last taxon name in a particular module.  module_select()
uses these tables to determine which data module 'should' have the data for the taxon name.

	[1] first taxon name in the data module
	[2] last taxon name in the module
	[3] suffix appended to the base module name to create: 'Module:Sandbox/trappist the monk/taxonomy <suffix>'

These tables are created by an awb script when it splits much larger raw data file before the splits are uploaded
to en.wiki.

]]

local taxomap_t = {		-- created/updated: 2021-10-24
	A = {
		{'ADA clade', 'Acratus', 'A1'},
		{'Acrecebus', 'Africotriton', 'A2'},
		{'Afrida', 'Alicia (plant)', 'A3'},
		{'Aliciella', 'Ammosperma', 'A4'},
		{'Ammospermophilus', 'Anenthemonae', 'A5'},
		{'Anentome', 'Aorangia', 'A6'},
		{'Aoranthe', 'Archarius', 'A7'},
		{'Archasia', 'Asemonea', 'A8'},
		{'Asemoneinae', 'Aurana', 'A9'},
		{'Auranticarpa', 'Azygopus', 'A10'},
		},
	B = {
		{'BOP clade', 'Bauruoolithus', 'B1'},
		{'Baurusuchia', 'Bloomeria', 'B2'},
		{'Blosnavirus', 'Brasilentulus', 'B3'},
		{'Brasileodactylus', 'Byturus', 'B4'},
		},
	C = {
		{'CRuMs', 'Camillina', 'C1'},
		{'Caminus', 'Cassianellidae', 'C2'},
		{'Cassianopsinae', 'Cereus', 'C3'},
		{'Cereus (anemone)', 'Chernes', 'C4'},
		{'Chernetidae', 'Chrysopida', 'C5'},
		{'Chrysopidae', 'Cnemidophorus', 'C6'},
		{'Cnemidopyge', 'Conopias', 'C7'},
		{'Conopidae', 'Crataegus ser. Intricata', 'C8'},
		{'Crataegus ser. Intricatae', 'Cunninghamella', 'C9'},
		{'Cunninghamellaceae', 'Czekanowskiales', 'C10'},
		},
	D = {
		{'Daanosaurus', 'Dentatherinidae', 'D1'},
		{'Dentectus', 'Digalodon', 'D2'},
		{'Digama', 'Dodecadenia', 'D3'},
		{'Dodecahema', 'Dystrophaeus', 'D4'},
		{'Dytaster', 'Dzungariotherium', 'D5'},
		},
	E = {
		{'Eacles', 'Empidadelpha', 'E1'},
		{'Empididae', 'Epipogiinae', 'E2'},
		{'Epipogium', 'Eucosmodon', 'E3'},
		{'Eucosmodontidae', 'Eutreptiidae', 'E4'},
		{'Eutreptiiida', 'Ezosciadium', 'E5'},
		},
	G = {
		{'Gabara', 'Giardia', 'G1'},
		{'Giardiavirus', 'Gonostomatidae', 'G2'},
		{'Gonostomatoidei', 'Gyroweisia', 'G3'},
		},
	H = {
		{'HTVC010P', 'Hecalini', 'H1'},
		{'Hecamede', 'Hesperonychus', 'H2'},
		{'Hesperoperla', 'Homalattus', 'H3'},
		{'Homalia', 'Hymenasplenium', 'H4'},
		{'Hymenelia', 'Hytrosaviridae', 'H5'},
		},
	L = {
		{'La', 'Lechia', 'L1'},
		{'Lechriaspis', 'Lethiscidae', 'L2'},
		{'Lethiscus', 'Lithophaga', 'L3'},
		{'Lithophane', 'Lycopteridae', 'L4'},
		{'Lycopteriformes', 'Lyttoniidina', 'L5'},
		},
	M = {
		{'Maaqwi', 'Manis (Manis)', 'M1'},
		{'Manis (Paramanis)', 'Megamastax', 'M2'},
		{'Megamelanus', 'Mesotitanina', 'M3'},
		{'Mesovagus', 'Microzercon', 'M4'},
		{'Microzoanthidae', 'Montfortia', 'M5'},
		{'Montfortista', 'Mytilaria', 'M6'},
		{'Mytilarioideae', 'minke whale species complex', 'M7'},
		},
	N = {
		{'Naashoibitosaurus', 'Neocoelidia', 'N1'},
		{'Neocoelidiinae', 'Nichollsemys', 'N2'},
		{'Nichollssaura', 'Nypa', 'N3'},
		{'Nypoideae', 'Nyungwea', 'N4'},
		},
	O = {
		{'OSLEUM', 'Ondigus', 'O1'},
		{'Ondina', 'Orthomorpha', 'O2'},
		{'Orthomorphini', 'Ozyptila', 'O3'},
		},
	P = {
		{'P2virus', 'Papaipema', 'P1'},
		{'Papakula', 'Parasynema', 'P2'},
		{'Parasynthemis', 'Peltandreae', 'P3'},
		{'Peltaria', 'Phaethornithinae', 'P4'},
		{'Phaetusa', 'Phyllocnistinae', 'P5'},
		{'Phyllocnistis', 'Planiliza', 'P6'},
		{'Planipapillus', 'Podothecus', 'P7'},
		{'Podothrombidiidae', 'Praezygaena', 'P8'},
		{'Pragmatodes', 'Protoazin', 'P9'},
		{'Protobalanus', 'Pseudomicrargus', 'P10'},
		{'Pseudomicrocentria', 'Ptychatractidae', 'P11'},
		{'Ptycheulimella', 'Pyxis', 'P12'},
		},
	S = {
		{'SAR', 'Scaptia', 'S1'},
		{'Scaptius', 'Seegeriella', 'S2'},
		{'Seeleyosaurus', 'Sinaivirus', 'S3'},
		{'Sinamma', 'Sphaeriida', 'S4'},
		{'Sphaeriidae', 'Stenogomphurus', 'S5'},
		{'Stenogonum', 'Stylonuroidea', 'S6'},
		{'Stylonuroides', 'Szovitsia', 'S7'},
		},
	T = {
		{'TG3', 'Tenthredo', 'T1'},
		{'Tentoriceps', 'Therizinosaurus', 'T2'},
		{'Therlinya', 'Torovirus', 'T3'},
		{'Torpedinidae', 'Trigonosaurus', 'T4'},
		{'Trigonoscuta', 'Typhlogarra', 'T5'},
		{'Typhlogastrura', 'Tzvelevopyrethrum', 'T6'},
		},
	}



local is_not_italic_virus_taxon_t = {
	['unranked domain'] = true,													-- taxon names assigned these ranks are *not* to be italicized
	['unranked'] = true,
	['virus group'] = true,
	['virus'] = true,
	['strain'] = true,
	['serotype'] = true,
	}

local is_italic_taxon_t = {														-- taxon names assigned these ranks are to be italicized
	['genus'] = true,
	['ichnogenus'] = true,
	['oogenus'] = true,
	['subgenus'] = true,
	['ichnosubgenus'] = true,
	['oosubgenus'] = true,

	['supersectio'] = true,
	['sectio'] = true,
	['subsectio'] = true,

	['series'] = true,
	['subseries'] = true,

	['species complex'] = true,
	['species group'] = true,
	['species subgroup'] = true,

	['species'] = true,
	['ichnospecies'] = true,
	['oospecies'] = true,
	['subspecies'] = true,
	['ichnosubspecies'] = true,
	['oosubspecies'] = true,
	}

local anglicize_rank_t = {														-- this table adapted from {{anglicise rank}}
	['alliance'] = '\'\'Alliance\'\'',											-- Special cases, alphabetic order
	['basic shell type'] = '\'\'Basic shell type\'\'',
	['branch'] = '\'\'Branch\'\'',
	['clade'] = '\'\'Clade\'\'',
	['cladus'] = '\'\'Clade\'\'',
	['form taxon'] = '\'\'Form taxon\'\'',
	['grade'] = '\'\'Grade\'\'',
	['gradus'] = '\'\'Grade\'\'',
	['informal'] = '\'\'Informal group\'\'',
	['informal group'] = '\'\'Informal group\'\'',
	['morphotype'] = '\'\'Morphotype\'\'',
	['node'] = '\'\'Node\'\'',
	['plesion'] = '\'\'Plesion\'\'',
	['plesion-group'] = '\'\'Plesion-group\'\'',
	['possible clade'] = '\'\'Clade?\'\'',
	['realm'] = '\'\'Realm\'\'',
	['species complex'] = '\'\'Species complex\'\'',
	['species group'] = '\'\'Species group\'\'',
	['species subgroup'] = '\'\'Species subgroup\'\'',
	['stem group'] = '\'\'Stem group\'\'',
	['total group'] = '\'\'Total group\'\'',
	['unranked'] = '(unranked)',												-- not a special case in {{anglicise rank}} but included here for convenience

	['serotype'] = 'Serotype',													-- Virus ranks below species
	['strain'] = 'Strain',
	['virus'] = 'Virus',
	['virus group'] = 'Group',

	['classis'] = 'Class',														-- Linnaean taxonomy, alphabetic order
	['cohort'] = 'Cohort',
	['divisio'] = 'Division',
	['domain'] = 'Domain',
	['epifamilia'] = 'Epifamily',
	['familia'] = 'Family',
	['forma'] = 'Form',
	['genus'] = 'Genus',
	['grandordo'] = 'Grandorder',
	['grandordo-mb'] = 'Grandorder',											-- McKenna & Bell version
	['hyperfamilia'] = 'Hyperfamily',
	['infraclassis'] = 'Infraclass',
	['infralegio'] = 'Infralegion',
	['infralegion'] = 'Infralegion',
	['infraordo'] = 'Infraorder',
	['infraphylum'] = 'Infraphylum',
	['infraregnum'] = 'Infrakingdom',
	['infratribus'] = 'Infratribe',
	['legio'] = 'Legion',
	['legion'] = 'Legion',
	['magnordo'] = 'Magnorder',
	['micrordo'] = 'Microrder',
	['microphylum'] = 'Microphylum',
	['mirordo-Mirorder'] = 'Mirorder',
	['mirordo-mb'] = 'Mirorder',												-- McKenna & Bell version
	['nanordo'] = 'Nanorder',
	['nanophylum'] = 'Nanophylum',
	['ordo'] = 'Order',
	['parafamilia'] = 'Parafamily',
	['parvclassis'] = 'Parvclass',
	['parvordo'] = 'Parvorder',
	['phylum'] = 'Phylum',
	['regnum'] = 'Kingdom',
	['sectio'] = 'Section',
	['series'] = 'Series',
	['species'] = 'Species',
	['subclassis'] = 'Subclass',
	['subcohort'] = 'Subcohort',
	['subdivisio'] = 'Subdivision',
	['subfamilia'] = 'Subfamily',
	['subgenus'] = 'Subgenus',
	['sublegio'] = 'Sublegion',
	['sublegion'] = 'Sublegion',
	['subordo'] = 'Suborder',
	['subphylum'] = 'Subphylum',
	['subregnum'] = 'Subkingdom',
	['subsectio'] = 'Subsection',
	['subseries'] = 'Subseries',
	['subspecies'] = 'Subspecies',
	['subterclassis'] = 'Subterclass',												-- used in WoRMS
	['subtribus'] = 'Subtribe',
	['superclassis'] = 'Superclass',
	['supercohort'] = 'Supercohort',
	['superdivisio'] = 'Superdivision',
	['superdomain'] = 'Superdomain',
	['superfamilia'] = 'Superfamily',
	['superlegio'] = 'Superlegion',
	['superlegion'] = 'Superlegion',
	['superordo'] = 'Superorder',
	['superphylum'] = 'Superphylum',
	['superregnum'] = 'Superkingdom',
	['supersectio'] = 'Supersection',
	['supertribus'] = 'Supertribe',
	['tribus'] = 'Tribe',
	['varietas'] = 'Variety',
	['zoodivisio'] = 'Division',
	['zoosectio'] = 'Section',
	['zoosubdivisio'] = 'Subdivision',
	['zoosubsectio'] = 'Subsection',

	['ichnoclassis'] = 'Ichnoclass',											--trace fossil taxonomy, alphabetic order
	['ichnocohort'] = 'Ichnocohort',
	['ichnodivisio'] = 'Ichnodivision',
	['ichnofamilia'] = 'Ichnofamily',
	['ichnogenus'] = 'Ichnogenus',
	['ichnograndordo'] = 'Ichnograndorder',
	['ichnograndordo-mb'] = 'Ichnograndorder',									--McKenna & Bell version-->
	['ichnoinfraclassis'] = 'Ichnoinfraclass',
	['ichnoinfradivisio'] = 'Ichnoinfradivision',
	['ichnoinfraordo'] = 'Ichnoinfraorder',
	['ichnolegio'] = 'Ichnolegion',
	['ichnolegion'] = 'Ichnolegion',
	['ichnomagnordo'] = 'Ichnomagnorder',
	['ichnomicrordo'] = 'Ichnomicrorder',
	['ichnoordo'] = 'Ichnoorder',
	['ichnoparvordo'] = 'Ichnoparvorder',
	['ichnospecies'] = 'Ichnospecies',
	['ichnostem-group'] = 'Ichnostem-Group',
	['ichnosubclassis'] = 'Ichnosubclass',
	['ichnosubdivisio'] = 'Ichnosubdivision',
	['ichnosubfamilia'] = 'Ichnosubfamily',
	['ichnosublegio'] = 'Ichnosublegion',
	['ichnosublegion'] = 'Ichnosublegion',
	['ichnosubordo'] = 'Ichnosuborder',
	['ichnosuperclassis'] = 'Ichnosuperclass',
	['ichnosupercohort'] = 'Ichnosupercohort',
	['ichnosuperfamilia'] = 'Ichnosuperfamily',
	['ichnosuperordo'] = 'Ichnosuperorder',

	['ooclassis'] = 'Ooclass',													--fossilized egg taxonomy, alphabetic order
	['oocohort'] = 'Oocohort',
	['oofamilia'] = 'Oofamily',
	['oogenus'] = 'Oogenus',
	['oomagnordo'] = 'Oomagnorder',
	['oordo'] = 'Oorder',
	['oospecies'] = 'Oospecies',
	['oosubclassis'] = 'Oosubclass',
	['oosubgenus'] = 'Oosubgenus',
	['oosubspecies'] = 'Oosubspecies',
	['oosupercohort'] = 'Oosupercohort',
	['oosuperordo'] = 'Oosuperorder',
	}


local is_always_displayed_t = {
	['virus_group'] = true,
	['regnum'] = true,
	['kingdom'] = true,
	['phylum'] = true,
	['divisio'] = true,
	['division'] = true,
	['class'] = true,
	['classis'] = true,
	['order'] = true,
	['ordo'] = true,
	['familia'] = true,
	['family'] = true,
	['genus'] = true,
	['species'] = true,
	}


local base_data_table_name = 'Module:Sandbox/trappist the monk/taxonomy ';	-- includes space between base name and suffix
local modules_loaded = {};														-- a list of the modules loaded while crawling the tree; viewable in the lua log


--[[--------------------------< M O D U L E _ S E L E C T >----------------------------------------------------

Select one data module to load that 'should' hold data for <taxon>.  Returns an enumerated letter suffix that
will be appended to the base module name to make: Module:Sandbox/trappist the monk/taxonomy <suffix>

]]

local function module_select (taxon)
	local letter = taxon:match ('^%a');											-- get the first character of the taxon name
	letter = (letter and letter:upper()) or 'symbols';							-- if a letter force uppercase; 'symbols' else

	if not taxomap_t[letter] then												-- if no table in <taxomap_t> for <letter>
		return letter;
	end

	for _, map_t in ipairs (taxomap_t[letter]) do								-- loop through the <letter> map table looking for the table that should hold <taxon>
		if (map_t[1] <= taxon) and (taxon <= map_t[2]) then						-- compare <taxon> against table's first and last entries
			return map_t[3];													-- should be here, return the enumerated letter
		end
	end
end


--[[--------------------------< T A X O N _ G E T >------------------------------------------------------------

fetch a taxon table from the appropriate Module:Sandbox/trappist the monk/taxonomy <letter> date module where
<letter> is the uppercase first letter of <taxon> or 'symbols' when the first catacter is not a letter.

follow one 'same_as' if that parameter is present

]]

local function taxon_get (taxon, no_follow)
	local suffix = module_select (taxon);										-- get the suffix to append to the base data module name
	local taxon_t = {};															-- the return table
	local same_as_t = {};														-- table used to hold data when <taxon> data has a same_as value
	local same_as;																-- holds the <same_as> value from <taxon> data; nil else

	local taxon_module_name = base_data_table_name .. suffix;
	local ok, taxonomy_t = pcall (require, taxon_module_name);					-- attempt to load the selected data module

	if ok then																	-- was loaded
		modules_loaded['taxonomy ' .. suffix] = (modules_loaded['taxonomy ' .. suffix] and (modules_loaded['taxonomy ' .. suffix] + 1)) or 1;

		for k, v in pairs (taxonomy_t[taxon]) do								-- copy content from taxonomy table to return table
			if no_follow or 'same_as' ~= k  then								-- but don't copy same_as data if present
				taxon_t[k] = v;
			else																-- here when save_as is present
				same_as = v;													-- so save its value
			end
		end

		package.loaded[taxon_module_name] = nil;								-- unload to save memory

		if same_as then															-- if there is a save_as value in the taxonomy table
			suffix = module_select (same_as);									-- get the suffix for the same_as taxon
			local same_as_module_name = base_data_table_name .. suffix;
			ok, taxonomy_t = pcall (require, same_as_module_name);				-- attempt to load the selected data module

			if ok then															-- was loaded
				modules_loaded['taxonomy ' .. suffix] = (modules_loaded['taxonomy ' .. suffix] and (modules_loaded['taxonomy ' .. suffix] + 1)) or 1;

				for k, v in pairs (taxonomy_t[same_as]) do						-- copy content of the same_as_t table into the taxon_t table
					if not taxon_t[k] then										-- but only if taxon_t doesn;t already have a value for that parameter
						taxon_t[k] = v;
					end
				end

				package.loaded[same_as_module_name] = nil;						-- unload to save memory
			end
		end

		return taxon_t;
	end

	return nil;																	-- unable to load data module
end


--[[--------------------------< I S _ S E T >------------------------------------------------------------------

Returns true if argument is set; false otherwise. Argument is 'set' when it exists (not nil) or when it is not an empty string.

]]

local function is_set( var )
	return not (var == nil or var == '');
end


--[=[-------------------------< M A K E _ W I K I L I N K >----------------------------------------------------

Makes a wikilink; when both link and display text is provided, returns a wikilink in the form [[L|D]]; if only
link is provided, returns a wikilink in the form [[L]]; if neither are provided or link is omitted, returns an
empty string.

]=]

local function make_wikilink (link, display)
	if is_set (link) then
		if is_set (display) then
			return table.concat ({'[[', link, '|', display, ']]'});
		else
			return table.concat ({'[[', link, ']]'});
		end
	else
		return '';
	end
end


--[[--------------------------< L I N K _ M A K E >------------------------------------------------------------

makes a wikilink from the value assigned to 'link' in <taxon_t>

]]

local function link_make (taxon_t, taxon)
	local link;

	if taxon_t.link then
		local link_label;
		local link_target;
		if taxon_t.link:find ('Incertae sedis', 1, true) then
			link_label = '\'\'incertae sedis\'\'';
			link_target = 'Incertae sedis';
		elseif taxon_t.link:match ('([^|]+)|(.*)') then							-- is link a piped link?
			link_target, link_label = taxon_t.link:match ('([^|]+)|(.*)');
		else
			link_label = taxon:match ('([^/]+)/(.*)') or taxon;					-- variant of Module:Autotaxobox l.stripExtra(taxon)
		end

		link_target = (link_target and link_target) or link_label;				-- when no <link_target> make it same as <link_label>
		if is_italic_taxon_t[taxon_t.rank] then
			link_label = require('Module:TaxonItalics').italicizeTaxonName (link_label, false);
		end
		link = make_wikilink (link_target, link_label);

		if taxon_t.extinct and (not link:find ('†', 1, true)) then
			link = '<span style="font-style:normal;font-weight:normal;">†</span>' .. link;
		end
		if taxon:match ('/%?$') and not link:find ('?', 1, true) then
			link = link .. '<span style="font-style:normal;font-weight:normal;"> (?)</span>'
		end
	end

	return link;
end


--[[--------------------------< _ C R A W L _ T R E E >--------------------------------------------------------

experimental function to see if it is possible / makes sense to replace 87k+ taxonomy templates with lua data modules

for use in Module:Autotaxobox/sandbox call this function with make_tables() from taxonomyList()

fills three tables:
	tree_t: inverted sequence table of taxa and their ranks

these are here because of an experiment in Module:Autotaxobox/sandbox
	taxon_tree_t: equivalent to Module:Autotaxobox taxonTable{} – a sequence table where [1] is <starting node> but also has [n]=number of taxa listed
	rank_tree_t: equivalent to Module:Autotaxobox taxonRankTable{} – a sequence table that matches the taxa in taxonTable{}; for 'Life', Veterovata, and Ichnos, empty string

]]

local function _crawl_tree (taxon, tree_t, taxon_tree_t, rank_tree_t)
	local starting_taxon = taxon;												-- save a copy for error messaging
	local taxon_t = taxon_get (taxon);											-- initialize

	while taxon_t and taxon do
		if taxon_t.rank then													-- nil for Taxonomy/Life
			local styled_rank = anglicize_rank_t[taxon_t.rank];					-- not for inclusion in <rank_tree_t>
			styled_rank = ((taxon_t.always_display or is_always_displayed_t[taxon_t.rank:lower()]) and '\'\'\'' .. styled_rank .. '\'\'\'') or styled_rank;
			local linked_taxon = link_make (taxon_t, taxon)				     	-- not for inclusion <taxon_tree_t>
			local suffix = taxon:match ('%/[%w]+$') or ""                       -- get suffix (e.g. /skip, /plantae)
			if suffix ~= "" then
			    linked_taxon = linked_taxon .. "&nbsp;&nbsp;<small>" .. suffix .. "</small>";
			end
			table.insert (tree_t, 1, styled_rank .. ': ' .. linked_taxon );
			if taxon:find ('/skip', 1, true) then
				table.insert (tree_t, 1, string.rep ('&middot;', 5 ) .. ': ' .. string.rep ('&middot;', 5 ));	-- not for inclusion <taxon_tree_t>
			end
			table.insert (taxon_tree_t, taxon);									-- Autotaxobox/sandbox experiment
			table.insert (rank_tree_t, taxon_t.rank);							-- Autotaxobox/sandbox experiment
			if taxon_t.parent then
				taxon = taxon_t.parent;											-- get the next taxon
				taxon_t = taxon_get (taxon);									-- and get its taxon table
				if nil == taxon_t then
					table.insert (tree_t, 1, '<span style="color:#d33">no path to \'Life\' at ' .. taxon .. ' from ' .. starting_taxon .. '</span>');
				end
			else
				table.insert (tree_t, 1, '<span style="color:#d33">no parent for taxon ' .. taxon .. ' from ' .. starting_taxon .. '</span>');
				node = nil;														-- no next taxon
			end
		else
			if ('Life' == taxon) or ('Veterovata' == taxon) or ('Ichnos' == taxon) then
				table.insert (taxon_tree_t, taxon);								-- Autotaxobox/sandbox experiment
				table.insert (rank_tree_t, '');									-- Autotaxobox/sandbox experiment
			else
				table.insert (tree_t, 1, '<span style="color:#d33">no rank or same_as for taxon ' .. taxon .. ' from ' .. starting_taxon .. '</span>');
			end

			taxon = nil;														-- no next taxon
		end
	end

	taxon_tree_t.n = #taxon_tree_t;												-- Autotaxobox/sandbox experiment; add the number of taxa in this table
	return tree_t, taxon_tree_t, rank_tree_t
end


--[[--------------------------< W I K I D A T A _ G E T >------------------------------------------------------



]]

local TAXON_NAME_P = 'P225'; --  mainsnak.datavalue["type"] = "string", mainsnak.datavalue.value = Felis, mainsnak["property"] = "P225", mainsnak["snaktype"] = "value",
local TAXON_RANK_P = 'P105'; --  mainsnak.datavalue.value["entity-type"] = "item", mainsnak.datavalue.value.id = qid, mainsnak["property"] = "P105", mainsnak["snaktype"] = "value",
local TAXON_PARENT_P = 'P171'; --  mainsnak.datavalue.value["entity-type"] = "item", mainsnak.datavalue.value.id = qid, mainsnak["property"] = "P171", mainsnak["snaktype"] = "value",


local function wikidata_get (qid, prop)
	local s_qid;
	local wd_table_t = mw.wikibase.getBestStatements (qid, prop)[1];			-- attempt to get the taxon name
	if not wd_table_t then
		error ('no data for ' .. qid .. ' ' .. prop);							-- some sort of better error handling needed
	end

	if wd_table_t.mainsnak.datavalue then
		if 'string' == type (wd_table_t.mainsnak.datavalue.value) then
			return wd_table_t.mainsnak.datavalue.value;							-- return a string value; here for taxon name
		end

		if 'table' == type (wd_table_t.mainsnak.datavalue.value) then
			s_qid = wd_table_t.mainsnak.datavalue.value.id;						-- extract the qid for taxon rank or parent taxon
			return mw.wikibase.getLabelByLang (s_qid, 'en'), s_qid;				-- return a string label and s_qid (used for parent); here for taxon rank and parent taxon
		end
	else
		return '<span style="color:#d33">no value</span>';
	end
end

--[[--------------------------< _ C R A W L _ W I K I D A T A _ T R E E >--------------------------------------

this is an experimental function to see if I can figure out how to walk a taxonomy tree in wikidata
Wikidata:Project_chat#is_this_possible%3F

]]

local function _crawl_wikidata_tree (taxon_qid)
	local out_t = {};

	local taxon;
	local rank;
	local _;

	while taxon_qid do
		taxon = wikidata_get (taxon_qid, TAXON_NAME_P);
		rank = wikidata_get (taxon_qid, TAXON_RANK_P);
		_, taxon_qid = wikidata_get (taxon_qid, TAXON_PARENT_P);				-- parent taxon name discarded, reset taxon_qid to the parent taxon's qid

		taxon = (is_italic_taxon_t[rank] and '\'\'' .. taxon .. '\'\'') or taxon;	-- italicize when appropriate

		rank = anglicize_rank_t[rank] or rank:gsub ('(%a)', string.upper, 1);	-- anglicize accepted ranks; uppercase first letter for all others
		rank = (is_always_displayed_t[rank:lower()] and '\'\'\'' .. rank .. '\'\'\'') or rank;	-- and italicize those that should be

		table.insert (out_t, 1, rank .. ': ' .. taxon);							-- save at the top of the list
	end

	return table.concat (out_t, '<br />');										-- make a big string and done
end


--[[--------------------------< C R A W L _ W I K I D A T A _ T R E E >----------------------------------------

entry point from {{#invoke:Sandbox/trappist_the_monk/taxonomy|crawl_wikidata_tree|Q...}}

]]

local function crawl_wikidata_tree (frame)
	return _crawl_wikidata_tree (frame.args[1]);
end


--[[--------------------------< C R A W L _ T R E E >----------------------------------------------------------


]]

local function crawl_tree (frame)
	local tree_t = {};
	local loaded = {};
    local taxon = frame.args[1] or 'Felis';

	local wikidata_id = mw.wikibase.getEntityIdForTitle (taxon);				-- only works when no disambiguation

	tree_t = _crawl_tree (taxon, tree_t, {}, {})								-- crawl the tree to get the debug taxon list of taxa and their ranks; empty tables not used here

	for module, v in pairs (modules_loaded) do									-- make a sortable list
		table.insert (loaded, module);
	end

	local function comp (a, b)
		local letter_a, enum_a = a:match ('(%u)(%d*)$');						-- get letter and enumerator from 'taxonomy <letter><enum>'
		local letter_b, enum_b = b:match ('(%u)(%d*)$');

		enum_a = tonumber (enum_a);												-- convert enumerators to number type if present; nil else
		enum_b = tonumber (enum_b);

		if (letter_a == letter_b) then											-- when letters the same compare enumerators
			if enum_a and enum_b then											-- both must be enumerated (we don't do 'taxonomy 'P' and 'taxonomy P1')
				return enum_a < enum_b;
			end
		end
		return letter_a < letter_b;												-- default compare letters
	end

	table.sort (loaded, comp);
	for _, module in ipairs (loaded) do
		mw.log (module .. ': ' .. modules_loaded[module]);
	end

	local out_t = {};															-- render crude tree from data modules, from wikidata, and taxonomy list from Module:Autotaxobox for comparison
	table.insert (out_t, '{| class="wikitable"\n! lua data module experiment !! wikidata experiment !! autotaobox reference\n|-\n|');
	table.insert (out_t, table.concat (tree_t, '<br />'));
	table.insert (out_t, '\n| ');
	table.insert (out_t, _crawl_wikidata_tree (wikidata_id));
	table.insert (out_t, '\n| ');
	table.insert (out_t, frame:callParserFunction ('#invoke', {'Autotaxobox/sandbox', 'taxonomyList', taxon}));
	table.insert (out_t, '\n|-\n|}');

	return table.concat (out_t);
end


--[[--------------------------< _ M A K E _ T A B L E S >------------------------------------------------------

experimental function to see if it is possible / makes sense to replace 87k+ taxonomy templates with lua data modules

for use in Module:Autotaxobox/sandbox

fills two tables:
	taxon_tree_t: equivalent to Module:Autotaxobox taxonTable{} – a sequence table where [1] is <starting node> but also has [n]=number of taxa listed
	rank_tree_t: equivalent to Module:Autotaxobox taxonRankTable{} – a sequence table that matches the taxa in taxonTable{}; for 'Life', Veterovata, and Ichnos, empty string

]]

local function _make_tables (taxon, taxon_tree_t, rank_tree_t)
	local starting_taxon = taxon;												-- save a copy for error messaging
	local taxon_t = taxon_get (taxon);											-- initialize

	while taxon_t and taxon do
		if taxon_t.rank then													-- nil for Taxonomy/Life
--			local styled_rank = anglicize_rank_t[taxon_t.rank];					-- not for inclusion in <rank_tree_t>
--			styled_rank = ((taxon_t.always_display or is_always_displayed_t[taxon_t.rank:lower()]) and '\'\'\'' .. styled_rank .. '\'\'\'') or styled_rank;
--			local linked_taxon = link_make (taxon_t, taxon)				     	-- not for inclusion <taxon_tree_t>
--			local suffix = taxon:match ('%/[%w]+$') or ""                       -- get suffix (e.g. /skip, /plantae)
--			if suffix ~= "" then
--			    linked_taxon = linked_taxon .. "&nbsp;&nbsp;<small>" .. suffix .. "</small>";
--			end
--			table.insert (tree_t, 1, styled_rank .. ': ' .. linked_taxon );
--			if taxon:find ('/skip', 1, true) then
--				table.insert (tree_t, 1, string.rep ('&middot;', 5 ) .. ': ' .. string.rep ('&middot;', 5 ));	-- not for inclusion <taxon_tree_t>
--			end
			table.insert (taxon_tree_t, taxon);									-- Autotaxobox/sandbox experiment
			table.insert (rank_tree_t, taxon_t.rank);							-- Autotaxobox/sandbox experiment
--			if taxon_t.parent then
--				taxon = taxon_t.parent;											-- get the next taxon
--				taxon_t = taxon_get (taxon);									-- and get its taxon table
--				if nil == taxon_t then
--					table.insert (tree_t, 1, '<span style="color:#d33">no path to \'Life\' at ' .. taxon .. ' from ' .. starting_taxon .. '</span>');
--				end
--			else
--				table.insert (tree_t, 1, '<span style="color:#d33">no parent for taxon ' .. taxon .. ' from ' .. starting_taxon .. '</span>');
--				node = nil;														-- no next taxon
--			end
		else
			if ('Life' == taxon) or ('Veterovata' == taxon) or ('Ichnos' == taxon) then
				table.insert (taxon_tree_t, taxon);								-- Autotaxobox/sandbox experiment
				table.insert (rank_tree_t, '');									-- Autotaxobox/sandbox experiment
--			else
--				table.insert (tree_t, 1, '<span style="color:#d33">no rank or same_as for taxon ' .. taxon .. ' from ' .. starting_taxon .. '</span>');
			end

			taxon = nil;														-- no next taxon
		end
	end

	taxon_tree_t.n = #taxon_tree_t;												-- Autotaxobox/sandbox experiment; add the number of taxa in this table
	return taxon_tree_t, rank_tree_t
end


--[[--------------------------< M A K E _ T A B L E S >--------------------------------------------------------

interface function between Module:Autotaxobox taxonomyList() and _crawl_tree()

]]

local function make_tables (taxon)
	local taxon_t = {};
	local rank_t = {};
	local _;

	_, taxon_t, rank_t = _make_tables (taxon, {}, taxon_t, rank_t);				-- crawl the tree to get the taxon list and the rank list; empty table not used here

	return taxon_t, rank_t;
end


--[[--------------------------< D A T A _ T A B L E _ W I K I L I N K _ M A K E >------------------------------

<suffix> is the lua data module suffix A1, A2, Q, etc
<caption> is boolean true when creating wikilink for wikitable caption

]]

local function data_table_wikilink_make (suffix, caption)
	local wikilink_t = {};

	if caption then																-- if this wikilink is for the table caption
		table.insert (wikilink_t, ' <span style="font-weight: normal">');		-- open styling span tag
	end
	table.insert (wikilink_t, '&#x5B;[[');										-- enclosing brackets; open and open wikilink
	table.insert (wikilink_t, base_data_table_name);							-- add base data table name of wikilink target name
	table.insert (wikilink_t, suffix);											-- add <suffix> as last part of the wikilink target name
	table.insert (wikilink_t, '|taxonomy ');									-- first part of the wikilink label
	table.insert (wikilink_t, suffix);											-- add <suffix> as last part of the wikilink label
	table.insert (wikilink_t, ']]&#x5D;');										-- close wikilink; close enclosing brackets
	if caption then																-- if this wikilink is for the table caption
		table.insert (wikilink_t, '</span>');									-- close styling span tag
	end

	return table.concat (wikilink_t)
end


--[[--------------------------< S H O W _ T A X O N _ D A T A >------------------------------------------------

similar to the table produced by Template:Taxonomy key

]]

local function show_taxon_data (frame)
	local taxon = frame.args[1];
	local out_t = {};
	local taxon_t = taxon_get (taxon, true);									-- get taxon data; do not follow same_as
	local suffix = module_select (taxon);										-- and get suffix for data module link

	table.insert (out_t, '{| class="wikitable"');								-- open wikitable
	table.insert (out_t, '\n|+ ');												-- table caption wikimarkup
	table.insert (out_t, taxon);												-- the taxon's name
	table.insert (out_t, ' <span style="font-weight: normal">');				-- open span tag
	table.insert (out_t, data_table_wikilink_make (suffix, true));				-- add bracketed wikilink to lua data table for this <suffix> to wikitable caption

	local same_as = taxon_t.same_as;
	if same_as then
		taxon_t = taxon_get (taxon);											-- get same_as taxon data; this time follow same_as
	end

	local parent_t = taxon_get (taxon_t.parent);								-- get parent taxon's data; follow same_as
	suffix = module_select (taxon_t.parent);									-- and get suffix for data module link

	table.insert (out_t, '\n|-\n|Parent:\n|');
	table.insert (out_t, '<code>');												-- open code tag
	table.insert (out_t, taxon_t.parent);										-- add parent taxon name
	table.insert (out_t, '</code> ');											-- close code tag; include space before lua data table wikilink
	table.insert (out_t, data_table_wikilink_make (suffix));					-- add bracketed wikilink to lua data table for this <suffix>

	table.insert (out_t, '\n|-\n|Rank:\n|');
	local rank;
	if taxon_t.rank then
		rank = anglicize_rank_t[taxon_t.rank];
		local rank_t = {};
		table.insert (rank_t, '<code>');										-- open code tag
		table.insert (rank_t, taxon_t.rank);									-- insert raw rank from taxon data
		table.insert (rank_t, '</code> [displays as: ');							-- start the message
		if is_italic_taxon_t[taxon_t.rank] then									-- for italicized taxon ranks
			table.insert (rank_t, '\'\'');										-- open italic markup
			table.insert (rank_t, rank);										-- add anglicized rank
			table.insert (rank_t, '\'\'');										-- clode italic markup
		else
			table.insert (rank_t, rank);										-- add anglicized rank
		end
		table.insert (rank_t, ']');												-- finish the message

		rank = table.concat (rank_t);											-- and make a big string
	else
		rank = '<span style="color:#d33">– a rank must be supplied</span>';
	end
	table.insert (out_t, rank);

	local link = link_make (taxon_t, taxon);

	if link then
		link = table.concat ({'<code>', mw.text.nowiki (taxon_t.link), '</code> [displays as: ', link, ']'});
	end

	table.insert (out_t, '\n|-\n|Link:\n|');
	table.insert (out_t, (link and link) or '–');

	local extinct = (taxon_t.extinct and '<code>true</code>') or 'no';
	if not taxon.extinct and parent_t.extinct then
		extinct = '<span style="background-color:#FCC">parent is marked as extinct</span>';
	end

	table.insert (out_t, '\n|-\n|Extinct:\n|');
	table.insert (out_t, extinct);

	table.insert (out_t, '\n|-\n|Always displayed:\n|');
	table.insert (out_t, (taxon_t.always_display and '<code>true</code>') or ((is_always_displayed_t[taxon_t.rank] and 'yes (major rank)') or 'no'))

	table.insert (out_t, '\n|-\n|Taxonomic references:\n|');
	table.insert (out_t, taxon_t.refs or '–');

	table.insert (out_t, '\n|-\n|Parent\'s taxonomic references:\n|');
	table.insert (out_t, parent_t.refs or '–');

	if same_as then
		suffix = module_select (same_as);

		table.insert (out_t, '\n|-\n|Same as taxon:\n|');
		table.insert (out_t, '<code>');												-- open code tag
		table.insert (out_t, same_as);												-- add same_as taxon name
		table.insert (out_t, '</code> ');											-- close code tag; include space before lua data table wikilink
		table.insert (out_t, data_table_wikilink_make (suffix));					-- add bracketed wikilink to lua data table for this <suffix>
	end

	if taxon:find ('/skip$') then
		suffix = module_select (taxon);

		table.insert (out_t, '\n|-\n| colspan="2" | For the suffix "/skip", see [[Wikipedia:Automated_taxobox_system/advanced_taxonomy#Skip_taxonomy_templates|Skip taxonomy templates]].<br />');
		table.insert (out_t, 'For the skipped taxa, see <code>');				-- start the message and open code tag
		table.insert (out_t, taxon:match ('([^/]+)/skip$'));					-- add skipped taxon name without '/skip' suffix
		table.insert (out_t, '</code> ');										-- close code tag; include space before lua data table wikilink
		table.insert (out_t, data_table_wikilink_make (suffix));				-- add bracketed wikilink to lua data table for this <suffix>
	end

	if taxon:find ('/%?$') then
		table.insert (out_t, '\n|-\n| colspan="2" | For the suffix "/?", see [[Wikipedia:Automated_taxobox_system/advanced_taxonomy#Questionable_assignments|Questionable assignments]].');
	end

	if taxon:find ('Incertae sedis') then
		table.insert (out_t, '\n|-\n| colspan="2" |');
		table.insert (out_t, 'For taxon names with "Incertae sedis", see [[Wikipedia:Automated_taxobox_system/advanced_taxonomy#Incertae_sedis_taxonomy_templates|\'\'Incertae sedis\'\' taxonomy templates]].');
	end

	table.insert (out_t, '\n|}');
	return frame:preprocess (table.concat (out_t));
end


--[[--------------------------< D E L E T E _ T A X O N >------------------------------------------------------

deletes a taxon entry from a taxonomy data module.

Calling this function finds the correct data module, reads it and removes the specified taxon entry.  The output
a copy of the data module that can be copy/pasted into the data module.  Yeah, I know, crude but lua can't write
wikitext.

TODO: error checking?  What if the taxon isn't found?

]]

local function delete_taxon (frame)
	local taxon = frame.args[1];
	local taxonomy_t = {};
	local suffix = module_select (taxon);

	local content = mw.title.new (base_data_table_name .. suffix):getContent()
	local found = false;

	local out_t = {};

	for entry in content:gmatch ('\t*%[\'[^\r\n]+},[\r\n]+') do
		local entry_taxon = entry:match ('^\t*%[\'([^=]+)\'%]%s*=');
		if entry_taxon == taxon then
			found = true;
		else
			table.insert (out_t, entry);
		end
	end

	if not found then
		return '<span style="color:#d33">Taxon: ' .. taxon .. ' not found in [[Module:Sandbox/trappist the monk/taxonomy ' .. suffix .. ']]';
	end
	table.sort (out_t)

	table.insert (out_t, 1, 'return {\n')
	table.insert (out_t, '\t}')

	return 'deleted: ' .. taxon .. '\n\n' .. frame:callParserFunction ({name='#tag:syntaxhighlight', args={table.concat (out_t), lang='lua'}});

end


--[[--------------------------< E X P O R T E D   F U N C T I O N S >------------------------------------------


]]

return {
	crawl_tree = crawl_tree,
	crawl_wikidata_tree = crawl_wikidata_tree,

	make_tables = make_tables,													-- interface function between Module:Autotaxobox taxonomyList() and _crawl_tree()

	
	show_taxon_data = show_taxon_data,											-- similar to the table produced by Template:Taxonomy key

--	taxomap_t = taxomap_t,
	delete_taxon = delete_taxon,
	}