Jump to content

Module:Sandbox/trappist the monk/taxonomy

Permanently protected module
From Wikipedia, the free encyclopedia
This is an old revision of this page, as edited by Trappist the monk (talk | contribs) at 17:49, 16 October 2021. The present address (URL) is a permanent link to this revision, which may differ significantly from the current revision.
require('Module:No globals');

--[[--------------------------< T A X O M A P >----------------------------------------------------------------

this is a table of sequence tables that each list the first and last taxon name in a particular module.  module_select()
uses these tables to determine which data module 'should' have the data for the taxon name.

	[1] first taxon name in the data module
	[2] last taxon name in the module
	[3] suffix appended to the base module name to create: 'Module:Sandbox/trappist the monk/taxonomy <suffix>'
	
These tables are created by an awb script when it splits much larger raw data file before the splits are uploaded
to en.wiki.

]]

local taxomap_t = {
	A = {
		{'ADA clade', 'Acremoniella', 'A1'},
		{'Acrepidopterum', 'Afripupa', 'A2'},
		{'Afristreptaxis', 'Alienochelys', 'A3'},
		{'Alienoptera', 'Ammotrophus', 'A4'},
		{'Ammoxenidae', 'Anerastia', 'A5'},
		{'Anerastiini', 'Aotus', 'A6'},
		{'Aoupinieta', 'Archemitra', 'A7'},
		{'Archencyrtus', 'Asfuvirales', 'A8'},
		{'Asgard', 'Auriculariaceae', 'A9'},
		{'Auriculariales', 'Azygopus', 'A10'},
		},
	B = {
		{'BOP clade', 'Baurusuchus', 'B1'},
		{'Baurutitan', 'Blotiella', 'B2'},
		{'Blubervirales', 'Brasilidia', 'B3'},
		{'Brasiliguana', 'Byturus', 'B4'},
		},
	C = {
		{'CRuMs', 'Camirus', 'C1'},
		{'Camisiidae', 'Cassidae', 'C2'},
		{'Cassidinae', 'Cerianthula', 'C3'},
		{'Cerianthus', 'Chersodromus', 'C4'},
		{'Chersomanes', 'Chrysoritis', 'C5'},
		{'Chrysoscota', 'Cnephalocotes', 'C6'},
		{'Cnephalodes', 'Conosperminae', 'C7'},
		{'Conospermum', 'Craterispermeae', 'C8'},
		{'Craterispermum', 'Cuprella', 'C9'},
		{'Cupressaceae', 'Czekanowskiales', 'C10'},
		},
	D = {
		{'Daanosaurus', 'Denteilema', 'D1'},
		{'Dentella', 'Digitalideae', 'D2'},
		{'Digitalis', 'Dodonaeoideae', 'D3'},
		{'Dodonidia', 'Dzharatitanis', 'D4'},
		{'Dzhungarocosa', 'Dzungariotherium', 'D5'},
		},
	E = {
		{'Eacles', 'Empidinae', 'E1'},
		{'Empidoidea', 'Epiprininae', 'E2'},
		{'Epiprinus', 'Eucrada', 'E3'},
		{'Eucradinae', 'Eutrichopidia', 'E4'},
		{'Eutrichopoda', 'Ezosciadium', 'E5'},
		},
	G = {
		{'Gabara', 'Gibbacousteau', 'G1'},
		{'Gibbafroneta', 'Gonypetidae', 'G2'},
		{'Gonypetinae', 'Gyroweisia', 'G3'},
		},
	H = {
		{'HTVC010P', 'Hecastocleis', 'H1'},
		{'Hecatera', 'Hesperoptenus', 'H2'},
		{'Hesperorhipis', 'Homalocephale', 'H3'},
		{'Homaloceras', 'Hymenocardia', 'H4'},
		{'Hymenocardiinae', 'Hytrosaviridae', 'H5'},
		},
	L = {
		{'La', 'Lechriopini', 'L1'},
		{'Lechriops', 'Lethrinidae', 'L2'},
		{'Lethrinops', 'Lithophyllum', 'L3'},
		{'Lithopoma', 'Lycosidae', 'L4'},
		{'Lycosinae', 'Lyttoniidina', 'L5'},
		},
	M = {
		{'Maaqwi', 'Manjala', 'M1'},
		{'Manjekia', 'Meganeurinae', 'M2'},
		{'Meganeuropsis', 'Messageriella', 'M3'},
		{'Messapicetus', 'Mideopsis', 'M4'},
		{'Midgee', 'Mopalia', 'M5'},
		{'Mopaliidae', 'Myxococcaceae', 'M6'},
		{'Myxococcales', 'minke whale species complex', 'M7'},
		},
	N = {
		{'Naashoibitosaurus', 'Neocoleoidea', 'N1'},
		{'Neocollyris', 'Nicolepeira', 'N2'},
		{'Nicoletia', 'Nystalea', 'N3'},
		{'Nystaleinae', 'Nyungwea', 'N4'},
		},
	O = {
		{'OSLEUM', 'Ondigus', 'O1'},
		{'Ondina', 'Orthonevra', 'O2'},
		{'Orthonopias', 'Ozyptila', 'O3'},
		},
	P = {
		{'P2virus', 'Papasula', 'P1'},
		{'Papaver', 'Paratachardina', 'P2'},
		{'Paratachina', 'Peltigeraceae', 'P3'},
		{'Peltigerales', 'Phalacrus (beetle)', 'P4'},
		{'Phalaenoides', 'Phyllodiscus', 'P5'},
		{'Phyllodistomum', 'Plantae', 'P6'},
		{'Plantae/?', 'Poecilognathus', 'P7'},
		{'Poecilographa', 'Praya', 'P8'},
		{'Praydidae', 'Protodictya', 'P9'},
		{'Protodiplatyidae', 'Pseudonigrita', 'P10'},
		{'Pseudoniscidae', 'Ptyoiulus', 'P11'},
		{'Ptyomaxia', 'Pyxis', 'P12'},
		},
	S = {
		{'SAR', 'Scaptochirus', 'S1'},
		{'Scaptocoris', 'Segestes', 'S2'},
		{'Segestria', 'Sinapriculus', 'S3'},
		{'Sinarachna', 'Sphaerobambos', 'S4'},
		{'Sphaerobothria', 'Stenolicmus', 'S5'},
		{'Stenoloba', 'Stylotermes', 'S6'},
		{'Stylotermitidae', 'Szovitsia', 'S7'},
		},
	T = {
		{'TG3', 'Tentyriini', 'T1'},
		{'Tenualosa', 'Thermodesulfobacteriaceae', 'T2'},
		{'Thermodesulfobacteriales', 'Torreyochloinae', 'T3'},
		{'Torricellia', 'Trihecaton', 'T4'},
		{'Trihecatontidae', 'Typhlosyrinx', 'T5'},
		{'Typhlotanaidae', 'Tzvelevopyrethrum', 'T6'},
		},
	}

local modules_loaded = {};														-- a list of the modules loaded while crawling the tree; viewable in the lua log


--[[--------------------------< M O D U L E _ S E L E C T >----------------------------------------------------

Select one data module to load that 'should' hold data for <taxon>.  Returns an enumerated letter suffix that
will be appended to the base module name to make: Module:Sandbox/trappist the monk/taxonomy <suffix>

]]

local function module_select (taxon)
	local letter = taxon:match ('^%a');											-- get the first character of the taxon name
	letter = (letter and letter:upper()) or 'symbols';							-- if a letter force uppercase; 'symbols' else
	
	if not taxomap_t[letter] then												-- if no table in <taxomap_t> for <letter>
		return letter;
	end

	for _, map_t in ipairs (taxomap_t[letter]) do								-- loop through the <letter> map table looking for the table that should hold <taxon>
		if (map_t[1] <= taxon) and (taxon <= map_t[2]) then						-- compare <taxon> against table's first and last entries
			return map_t[3];													-- should be here, return the enumerated letter
		end
	end
end


--[[--------------------------< T A X O N _ G E T >------------------------------------------------------------

fetch a taxon table from the appropriate Module:Sandbox/trappist the monk/taxonomy <letter> date module where
<letter> is the uppercase first letter of <taxon> or 'symbols' when the first catacter is not a letter.

follow one 'same_as' if that parameter is present

]]

local function taxon_get (taxon)
	local taxonomy_t = {};														-- table from loadData() goes here
	local suffix = module_select (taxon);										-- get the suffix to append to the base data module name
	local taxon_t = {};															-- the return table
	local same_as_t = {};														-- table used to hold data when <taxon> data has a same_as value
	local same_as;																-- holds the <same_as> value from <taxon> data; nil else

	local ok, taxonomy_t = pcall (mw.loadData, 'Module:Sandbox/trappist the monk/taxonomy ' .. suffix);		-- attempt to load the selected data module
	
	if ok then																	-- was loaded
		modules_loaded['taxonomy ' .. suffix] = (modules_loaded['taxonomy ' .. suffix] and (modules_loaded['taxonomy ' .. suffix] + 1)) or 1;
--mw.log ('taxonomy_t[taxon]: ' .. mw.dumpObject (taxonomy_t[taxon]))
		for k, v in pairs (taxonomy_t[taxon]) do								-- copy content from taxonomy table to return table
			if 'same_as' ~= k then												-- but don't copy same_as data if present
				taxon_t[k] = v;
			else																-- here when save_as is present
				same_as = v;													-- so save its value
			end
		end
		
		if same_as then															-- if there is a save_as value in the taxonomy table
			suffix = module_select (same_as);									-- get the suffix for the same_as taxon
			ok, taxonomy_t = pcall (mw.loadData, 'Module:Sandbox/trappist the monk/taxonomy ' .. suffix);	-- attempt to load the selected data module

			if ok then															-- was loaded
				modules_loaded['taxonomy ' .. suffix] = (modules_loaded['taxonomy ' .. suffix] and (modules_loaded['taxonomy ' .. suffix] + 1)) or 1;
--mw.log ('taxonomy_t[same_as]: ' .. mw.dumpObject (taxonomy_t[same_as]))
				for k, v in pairs (taxonomy_t[same_as]) do						-- copy content of the same_as_t table into the taxon_t table
					if not taxon_t[k] then										-- but only if taxon_t doesn;t already have a value for that parameter
						taxon_t[k] = v;
					end
				end
			end

--mw.log ('taxon_t: ' .. mw.dumpObject (taxon_t))
		end
		
		return taxon_t;
	end
	
	return nil;																	-- unable to load data module
end


--[[--------------------------< _ C R A W L _ T R E E >--------------------------------------------------------

experimental function to see if it is possible / makes sense to replace 87k+ taxonomy templates with lua data modules

for use in Module:Autotaxobox/sandbox call this function with make_tables() from taxonomyList()

fills three tables: 
	tree_t: inverted sequence table of taxa and their ranks
	taxon_tree_t: equivalent to Module:Autotaxobox taxonTable{} – a sequence table where [1] is <starting node> but also has [n]=number of taxa listed
	rank_tree_t: equivalent to Module:Autotaxobox taxonRankTable{} – a sequence table that matches the taxa in taxonTable{}; for 'Life', empty string
]]

local function _crawl_tree (starting_node, tree_t, taxon_tree_t, rank_tree_t)
	local node = starting_node;

	local taxon_t = taxon_get (node);											-- initialize

	while taxon_t and node do
		if taxon_t.rank then													-- nil for Taxonomy/Life
			table.insert (tree_t, 1, taxon_t.rank .. ': ' .. node);
			if node:find ('/skip', 1, true) then
				table.insert (tree_t, 1, string.rep ('&middot;', 5 ) .. ': ' .. string.rep ('&middot;', 5 ));
			end
			table.insert (taxon_tree_t, node);
			table.insert (rank_tree_t, taxon_t.rank);
			if taxon_t.parent then
				node = taxon_t.parent;											-- get the next node
				taxon_t = taxon_get (node);										-- and get its taxon table
				if nil == taxon_t then
					table.insert (tree_t, 1, '<span style="color:#d33">no path to \'Life\' at ' .. node .. ' from ' .. starting_node .. '</span>');
				end
			else
				table.insert (tree_t, 1, '<span style="color:#d33">no parent for node ' .. node .. ' from ' .. starting_node .. '</span>');
				node = nil;														-- no next node
			end
		else
			if 'Life' == node then
				table.insert (taxon_tree_t, node);
				table.insert (rank_tree_t, '');
			else
				table.insert (tree_t, 1, '<span style="color:#d33">no rank or same_as for node ' .. node .. ' from ' .. starting_node .. '</span>');
			end
			
			node = nil;															-- no next node
		end
	end

	taxon_tree_t.n = #taxon_tree_t;												-- add the number of taxa in this table
	return tree_t, taxon_tree_t, rank_tree_t
end


--[[--------------------------< M A K E _ T A B L E S >--------------------------------------------------------

interface function between Module:Autotaxobox taxonomyList() and _crawl_tree()

]]

local function make_tables (taxon)
	local taxon_t = {};
	local rank_t = {};
	local _;

	_, taxon_t, rank_t = _crawl_tree (taxon, {}, taxon_t, rank_t);				-- crawl the tree to get the taxon list and the rank list; empty table not used here
	
	return taxon_t, rank_t;
end


--[[--------------------------< C R A W L _ T R E E >----------------------------------------------------------


]]

local function crawl_tree (frame)
	local tree_t = {};
	local loaded = {};

	tree_t = _crawl_tree (frame.args[1], tree_t, {}, {});						-- crawl the tree to get the debug taxon list of taxa and their ranks; empty tables not used here

	for module, v in pairs (modules_loaded) do									-- make a sortable list
		table.insert (loaded, module);
	end
	
	local function comp (a, b)
		local letter_a, enum_a = a:match ('(%u)(%d*)$');						-- get letter and enumerator from 'taxonomy <letter><enum>'
		local letter_b, enum_b = b:match ('(%u)(%d*)$');

		enum_a = tonumber (enum_a);												-- convert enumerators to number type if present; nil else
		enum_b = tonumber (enum_b);
		
		if (letter_a == letter_b) then											-- when letters the same compare enumerators
			if enum_a and enum_b then											-- both must be enumerated (we don't do 'taxonomy 'P' and 'taxonomy P1')
				return enum_a < enum_b;
			end
		end
		return letter_a < letter_b;												-- default compare letters
	end

	table.sort (loaded, comp);
	for _, module in ipairs (loaded) do
		mw.log (module .. ': ' .. modules_loaded[module]);
	end


																				-- render crude tree and taxonomy list from Module:Autotaxobox for comparison
	return '{|\n|-\n|' .. table.concat (tree_t, '<br />') .. ' || ' .. frame:callParserFunction ('#invoke', {'Autotaxobox/sandbox', 'taxonomyList', frame.args[1]}) .. '\n|-\n|}';
end


--[[--------------------------< D E L E T E _ T A X O N >------------------------------------------------------

deletes a taxon entry from a taxonomy data module.

Calling this function finds the correct data module, reads it and removes the specified taxon entry.  The output
a copy of the data module that can be copy/pasted into the data module.  Yeah, I know, crude but lua can't write
wikitext.

TODO: error checking?  What if the taxon isn't found?

]]

local function delete_taxon (frame)
	local taxon = frame.args[1];
	local taxonomy_t = {};
	local suffix = module_select (taxon);

	local content = mw.title.new ('Module:Sandbox/trappist the monk/taxonomy ' .. suffix):getContent()
	local found = false;

	local out_t = {};

	for entry in content:gmatch ('\t*%[\'[^\r\n]+},[\r\n]+') do
		local entry_taxon = entry:match ('^\t*%[\'([^=]+)\'%]%s*=');
		if entry_taxon == taxon then
			found = true;
		else
			table.insert (out_t, entry);
		end
	end
	
	if not found then
		return '<span style="color:#d33">Taxon: ' .. taxon .. ' not found in [[Module:Sandbox/trappist the monk/taxonomy ' .. suffix .. ']]';
	end
	table.sort (out_t)

	table.insert (out_t, 1, 'return {\n')
	table.insert (out_t, '\t}')
	
	return 'deleted: ' .. taxon .. '\n\n' .. frame:callParserFunction ({name='#tag:syntaxhighlight', args={table.concat (out_t), lang='lua'}});
--	return ('deleted: ' .. taxon .. '\n\n' .. table.concat (out_t, '\n'))

end


--[[--------------------------< T E S T >----------------------------------------------------------------------

A test function that counts the number of 'parameters' used in a converted lua data file.

Also detects unknown parameters (most of the parameters in a malformed cite journal template in 
Template:Taxonomy/Parmastega |refs= parameter); identified a bug in the conversion script (<noinclude>...</noinclude>
not correctly removed).  The newst version of the conversion script does not retain unknown parameters.

]]

local function test ()
	local data_t = mw.loadData ('Module:Sandbox/trappist the monk/taxonomy A')
	local count_t = {parent = 0, rank=0, link=0, extinct=0, always_display=0, ['always display']=0, refs=0, same_as=0, ['same as']=0}
	local unknown_t = {};
	
	for _, taxon_t in pairs (data_t) do
		for key, _ in pairs (taxon_t) do
			if count_t[key] then
				count_t[key] = count_t[key] + 1;
			else 
				if unknown_t[key] then
					unknown_t[key] = unknown_t[key] + 1;
				else
					unknown_t[key] = 1;
				end
			end
		end
	end
	
	local ordered_t = {};
	local unknown_ordered_t = {};
	
	for key, value in pairs (count_t) do
		table.insert (ordered_t, key .. ': ' .. value);
	end
	
	for key, value in pairs (unknown_t) do
		table.insert (unknown_ordered_t, key .. ': ' .. value);
	end
	
	local function comp (a, b)
		return tonumber (a:match ('%d+$')) > tonumber (b:match ('%d+$'));
	end
	
	table.sort (ordered_t, comp);
	table.sort (unknown_ordered_t, comp);
	return table.concat (ordered_t, '<br />') .. '<br /><br />unknowns<br />' .. table.concat (unknown_ordered_t, '<br />')
end


--[[--------------------------< E X P O R T E D   F U N C T I O N S >------------------------------------------


]]

return {
	test=test,
	crawl_tree = crawl_tree,
	make_tables = make_tables,													-- interface function between Module:Autotaxobox taxonomyList() and _crawl_tree()

	delete_taxon = delete_taxon,
	
	taxomap_t = taxomap_t,
	}