Module:Sandbox/trappist the monk/taxonomy
Appearance
require('Module:No globals');
--[[--------------------------< T A X O M A P >----------------------------------------------------------------
this is a table of sequence tables that each list the first and last taxon name in a particular module. module_select()
uses these tables to determine which data module 'should' have the data for the taxon name.
[1] first taxon name in the data module
[2] last taxon name in the module
[3] suffix appended to the base module name to create: 'Module:Sandbox/trappist the monk/taxonomy <suffix>'
These tables are created by an awb script when it splits much larger raw data file before the splits are uploaded
to en.wiki.
]]
local taxomap_t = {
A = {
{'Aa', 'Acricoactinidae', 'A1'},
{'Acricoactis', 'Afroblemma', 'A2'},
{'Afrocalathea', 'Aliptina', 'A3'},
{'Alirhabditidae', 'Amontes', 'A4'},
{'Amorbia', 'Angelarctocyon', 'A5'},
{'Angeldiazia', 'Apatetrinae', 'A6'},
{'Apatetrini', 'Archimylacridae', 'A7'},
{'Archimylacrinae', 'Asparagales', 'A8'},
{'Asparagoideae', 'Australonycteris', 'A9'},
{'Australoonops', 'Azygopus', 'A10'},
},
B = {
{'BOP clade', 'Bavarisauridae', 'B1'},
{'Bavarisaurus', 'Blythia', 'B2'},
{'Blythipicus', 'Brasinorhynchus', 'B3'},
{'Brasityphis', 'Byturus', 'B4'},
},
C = {
{'Caaeteboia', 'Camissoniopsis', 'C1'},
{'Camitia', 'Cassiope', 'C2'},
{'Cassiopea', 'Ceriporiopsis', 'C3'},
{'Ceriscoides', 'Cheyletidae', 'C4'},
{'Cheyletiella', 'Chthamalus', 'C5'},
{'Chthiononetes', 'Cobarus', 'C6'},
{'Cobboldia', 'Contarinia', 'C7'},
{'Contectopalatus', 'Crax', 'C8'},
{'Creagdhubhia', 'Cusickiella', 'C9'},
{'Cuspicephalus', 'Cosgriffius', 'C10'},
},
D = {
{'Daanosaurus', 'Denticeps', 'D1'},
{'Denticetopsis', 'Diglossopis', 'D2'},
{'Diglyphomorpha', 'Doidae', 'D3'},
{'Dolabella', 'Dzungariotherium', 'D4'},
},
E = {
{'Eacles', 'Empis (Empis)', 'E1'},
{'Empis (Euempis)', 'Epirrita', 'E2'},
{'Episactidae', 'Eucyllus', 'E3'},
{'Eucynodontia', 'Euxiphidiopsis', 'E4'},
{'Euxiphocerus', 'Ezosciadium', 'E5'},
},
-- F = 'F',
G = {
{'Gabara', 'Gibbium', 'G1'},
{'Gibbobruchus', 'Gorbiscape', 'G2'},
{'Gordia', 'Gyroweisia', 'G3'},
},
H = {
{'HTVC010P', 'Hechtioideae', 'H1'},
{'Hecistopteris', 'Hesperotestudo', 'H2'},
{'Hesperotettix', 'Homalopsidae', 'H3'},
{'Homalopsinae', 'Hymenophyllum', 'H4'},
{'Hymenophytaceae', 'Hytrosaviridae', 'H5'},
},
-- I = 'I',
-- J = 'J',
-- K = 'K',
L = {
{'La', 'Lechytiidae', 'L1'},
{'Lecidea', 'Letispe', 'L2'},
{'Leto', 'Lithornithidae', 'L3'},
{'Lithornithiformes', 'Lycurgus', 'L4'},
{'Lycus', 'Lyttoniidina', 'L5'},
},
M = {
{'Maaqwi', 'Manniophyton', 'M1'},
{'Mannophorus', 'Megaphobema', 'M2'},
{'Megaphorus', 'Mesyatsia', 'M3'},
{'Meta', 'Milesiina', 'M4'},
{'Milesiini', 'Mordellistena', 'M5'},
{'Mordellistenini', 'Myzus', 'M6'},
},
N = {
{'Naashoibitosaurus', 'Neocomitinae', 'N1'},
{'Neocompsa', 'Nicotiana', 'N2'},
{'Nicotianeae', 'Nyungwea', 'N3'},
},
O = {
{'OSLEUM', 'Oneillornis', 'O1'},
{'Oneirodes', 'Orthoprosopa (Subgenus)', 'O2'},
{'Orthops', 'Ozyptila', 'O3'},
},
P = {
{'Pabstia', 'Papiamenta', 'P1'},
{'Papilio', 'Paraterpna', 'P2'},
{'Paraterschellingia', 'Peltophryne', 'P3'},
{'Peltophyllum', 'Phalaris', 'P4'},
{'Phalarodon', 'Phyllomenia', 'P5'},
{'Phyllomeniidae', 'Plataplochilus', 'P6'},
{'Platarctia', 'Pogobrama', 'P7'},
{'Pogogyne', 'Prestonia (plant)', 'P8'},
{'Prestosuchidae', 'Protomelas', 'P9'},
{'Protomicroplitis', 'Pseudophloeinae', 'P10'},
{'Pseudophoenix', 'Pulmonoscorpius', 'P11'},
{'Pulsarella', 'Pyxis', 'P12'},
},
-- Q = 'Q',
R = {
{'RTA clade', 'Rhinoleucophenga', 'R1'},
{'Rhinoliparis', 'Rotundabaloghia', 'R2'},
{'Rotundaria', 'Rzedowskia', 'R3'},
},
S = {
{'Saaba', 'Scaptonychini', 'S1'},
{'Scaptonyx', 'Segnosaurus', 'S2'},
{'Segregara', 'Sinella', 'S3'},
{'Sinembiidae', 'Sphaerocorynidae', 'S4'},
{'Sphaerodactylidae', 'Stenonemobius', 'S5'},
{'Stenoninae', 'Suarezia (plant)', 'S6'},
{'Suastus', 'Szovitsia', 'S7'},
},
T = {
{'Taaningichthys', 'Tenuiphantes', 'T1'},
{'Tenuiplanta', 'Thermosbaenidae', 'T2'},
{'Thermosipho', 'Tortyra slossonia', 'T3'},
{'Torulaspora', 'Trilosporidae', 'T4'},
{'Trimenia (butterfly)', 'Tyrannomolpus', 'T5'},
{'Tyrannomyrmex', 'Tzvelevopyrethrum', 'T6'},
},
-- U = 'U',
-- V = 'V',
-- W = 'W',
-- X = 'X',
-- Y = 'Y',
-- Z = 'Z',
-- symbols = 'symbols',
}
local modules_loaded = {}; -- a list of the modules loaded while crawling the tree; viewable in the lua log
--[[--------------------------< M O D U L E _ S E L E C T >----------------------------------------------------
Select one data module to load that 'should' hold data for <taxon>. Returns an enumerated letter suffix that
will be appended to the base module name to make: Module:Sandbox/trappist the monk/taxonomy <suffix>
]]
local function module_select (taxon)
local letter = taxon:match ('^%a'); -- get the first character of the taxon name
letter = (letter and letter:upper()) or 'symbols'; -- if a letter force uppercase; 'symbols' else
if not taxomap_t[letter] then -- if no table in <taxomap_t> for <letter>
return letter;
end
for _, map_t in ipairs (taxomap_t[letter]) do -- loop through the <letter> map table looking for the table that should hold <taxon>
if (map_t[1] <= taxon) and (taxon <= map_t[2]) then -- compare <taxon> against table's first and last entries
return map_t[3]; -- should be here, return the enumerated letter
end
end
end
--[[--------------------------< T A X O N _ G E T >------------------------------------------------------------
fetch a taxon table from the appropriate Module:Sandbox/trappist the monk/taxonomy <letter> date module where
<letter> is the uppercase first letter of <taxon> or 'symbol' when the first catacter is not a letter
]]
local function taxon_get (taxon)
local taxonomy_t = {};
local suffix = module_select (taxon);
local ok, taxonomy_t = pcall (mw.loadData, 'Module:Sandbox/trappist the monk/taxonomy ' .. suffix)
if ok then
modules_loaded['taxonomy ' .. suffix] = true;
return taxonomy_t[taxon];
else
error ('Module:Sandbox/trappist the monk/taxonomy ' .. suffix .. ' does not exist')
return nil;
end
end
--[[--------------------------< _ C R A W L _ T R E E >--------------------------------------------------------
experimental function to see if it is possible / makes sense to replace 87k+ taxonomy templates with lua data modules
for use in Module:Autotaxobox/sandbox call this function with make_tables() from taxonomyList()
fills three tables:
tree_t: inverted sequence table of taxa and their ranks
taxon_tree_t: equivalent to Module:Autotaxobox taxonTable{} – a sequence table where [1] is <starting node> but also has [n]=number of taxa listed
rank_tree_t: equivalent to Module:Autotaxobox taxonRankTable{} – a sequence table that matches the taxa in taxonTable{}; for 'Life', empty string
]]
local function _crawl_tree (starting_node, tree_t, taxon_tree_t, rank_tree_t)
local node = starting_node;
local taxon_t = taxon_get (node); -- initialize
while taxon_t and node do
if taxon_t.same_as then -- if this node redirects to another taxon via |same_as=; TODO: should this loop? multiple |same_as= possible?
taxon_t = taxon_get (taxon_t.same_as); -- follow the redirect
end
if taxon_t.rank then -- nil for Taxonomy/Life
table.insert (tree_t, 1, taxon_t.rank .. ': ' .. node);
table.insert (taxon_tree_t, node);
table.insert (rank_tree_t, taxon_t.rank);
if taxon_t.parent then
node = taxon_t.parent; -- get the next node
taxon_t = taxon_get (node); -- and get its taxon table
if nil == taxon_t then
table.insert (tree_t, 1, '<span style="color:#d33">no path to \'Life\' at ' .. node .. ' from ' .. starting_node .. '</span>');
end
else
table.insert (tree_t, 1, '<span style="color:#d33">no parent for node ' .. node .. ' from ' .. starting_node .. '</span>');
node = nil; -- no next node
end
else
if 'Life' == node then
table.insert (taxon_tree_t, node);
table.insert (rank_tree_t, '');
else
table.insert (tree_t, 1, '<span style="color:#d33">no rank or same_as for node ' .. node .. ' from ' .. starting_node .. '</span>');
end
node = nil; -- no next node
end
end
taxon_tree_t.n = #taxon_tree_t; -- add the number of taxa in this table
return tree_t, taxon_tree_t, rank_tree_t
end
--[[--------------------------< M A K E _ T A B L E S >--------------------------------------------------------
interface function between Module:Autotaxobox taxonomyList() and _crawl_tree()
]]
local function make_tables (taxon)
local taxon_t = {};
local rank_t = {};
local _;
_, taxon_t, rank_t = _crawl_tree (taxon, {}, taxon_t, rank_t); -- crawl the tree to get the taxon list and the rank list; empty table not used here
return taxon_t, rank_t;
end
--[[--------------------------< C R A W L _ T R E E >----------------------------------------------------------
]]
local function crawl_tree (frame)
local tree_t = {};
local loaded = {};
tree_t = _crawl_tree (frame.args[1], tree_t, {}, {}); -- crawl the tree to get the debug taxon list of taxa and their ranks; empty tables not used here
for module, _ in pairs (modules_loaded) do
table.insert (loaded, module);
end
local function comp (a, b)
local letter_a, enum_a = a:match ('(%u)(%d*)$'); -- get letter and enumerator from 'taxonomy <letter><enum>'
local letter_b, enum_b = b:match ('(%u)(%d*)$');
enum_a = tonumber (enum_a); -- convert enumerators to number type if present; nil else
enum_b = tonumber (enum_b);
if (letter_a == letter_b) then -- when letters the same compare enumerators
if enum_a and enum_b then -- both must be enumerated (we don't do 'taxonomy 'P' and 'taxonomy P1')
return enum_a < enum_b;
end
end
return letter_a < letter_b; -- default compare letters
end
table.sort (loaded, comp);
for _, module in ipairs (loaded) do
mw.log (module);
end
-- render crude tree and taxonomy list from Module:Autotaxobox for comparison
return '{|\n|-\n|' .. table.concat (tree_t, '<br />') .. ' || ' .. frame:callParserFunction ('#invoke', {'Autotaxobox/sandbox', 'taxonomyList', frame.args[1]}) .. '\n|-\n|}';
end
--[[--------------------------< D E L E T E _ T A X O N >------------------------------------------------------
deletes a taxon entry from a taxonomy data module.
Calling this function finds the correct data module, reads it and removes the specified taxon entry. The output
a copy of the data module that can be copy/pasted into the data module. Yeah, I know, crude but lua can't write
wikitext.
TODO: error checking? What if the taxon isn't found?
]]
local function delete_taxon (frame)
local taxon = frame.args[1];
local taxonomy_t = {};
local suffix = module_select (taxon);
local content = mw.title.new ('Module:Sandbox/trappist the monk/taxonomy ' .. suffix):getContent()
local found = false;
local out_t = {};
for entry in content:gmatch ('\t*%[\'[^\r\n]+},[\r\n]+') do
local entry_taxon = entry:match ('^\t*%[\'([^=]+)\'%]%s*=');
if entry_taxon == taxon then
found = true;
else
table.insert (out_t, entry);
end
end
if not found then
return '<span style="color:#d33">Taxon: ' .. taxon .. ' not found in [[Module:Sandbox/trappist the monk/taxonomy ' .. suffix .. ']]';
end
table.sort (out_t)
table.insert (out_t, 1, 'return {\n')
table.insert (out_t, '\t}')
return 'deleted: ' .. taxon .. '\n\n' .. frame:callParserFunction ({name='#tag:syntaxhighlight', args={table.concat (out_t), lang='lua'}});
-- return ('deleted: ' .. taxon .. '\n\n' .. table.concat (out_t, '\n'))
end
--[[--------------------------< T E S T >----------------------------------------------------------------------
A test function that counts the number of 'parameters' used in a converted lua data file.
Also detects unknown parameters (most of the parameters in a malformed cite journal template in
Template:Taxonomy/Parmastega |refs= parameter); identified a bug in the conversion script (<noinclude>...</noinclude>
not correctly removed). The newst version of the conversion script does not retain unknown parameters.
]]
local function test ()
local data_t = mw.loadData ('Module:Sandbox/trappist the monk/taxonomy A')
local count_t = {parent = 0, rank=0, link=0, extinct=0, always_display=0, ['always display']=0, refs=0, same_as=0, ['same as']=0}
local unknown_t = {};
for _, taxon_t in pairs (data_t) do
for key, _ in pairs (taxon_t) do
if count_t[key] then
count_t[key] = count_t[key] + 1;
else
if unknown_t[key] then
unknown_t[key] = unknown_t[key] + 1;
else
unknown_t[key] = 1;
end
end
end
end
local ordered_t = {};
local unknown_ordered_t = {};
for key, value in pairs (count_t) do
table.insert (ordered_t, key .. ': ' .. value);
end
for key, value in pairs (unknown_t) do
table.insert (unknown_ordered_t, key .. ': ' .. value);
end
local function comp (a, b)
return tonumber (a:match ('%d+$')) > tonumber (b:match ('%d+$'));
end
table.sort (ordered_t, comp);
table.sort (unknown_ordered_t, comp);
return table.concat (ordered_t, '<br />') .. '<br /><br />unknowns<br />' .. table.concat (unknown_ordered_t, '<br />')
end
--[[--------------------------< E X P O R T E D F U N C T I O N S >------------------------------------------
]]
return {
test=test,
crawl_tree = crawl_tree,
make_tables = make_tables, -- interface function between Module:Autotaxobox taxonomyList() and _crawl_tree()
delete_taxon = delete_taxon,
taxomap_t = taxomap_t,
}