Jump to content

Module:GHS phrases/sandbox

From Wikipedia, the free encyclopedia
This is an old revision of this page, as edited by DePiep (talk | contribs) at 03:43, 8 January 2022. The present address (URL) is a permanent link to this revision, which may differ significantly from the current revision.
  1. ^ a b "Globally Harmonized System of Classification and Labelling of Chemicals" (pdf). 2021. Annex 3: Codification of Statements and Pictograms (pp 268–385).
-- START MERGE PH TABLES 28-11-2021
-- removed dead/DEV code into /sandbox2 2021-12-28
-- todo : E102 and E105 not both (case 'setid=X')
-- todo: 1. setid finding, using, and errmsg
-- todo: 2. code find & errmsg
-- catsort order 0/1
-- todo: existing funtions stable
-- -- todonot: new ones; later (listRangeDEV, ellipsis..., EUHphrases, ...)
-- text omit rules header (into /template), numberofphrases fmt=bare number option;
--------------------------------------------------------------------------------
-- Module:GHS phrases
-- 
-- main: reads GHS parameters (arguments like "H301", "P401")
--		and returns for each (listtype='abbr'):
--		phraseCode visible; formal phrase text as <abbr title="...">
-- setid	= H, P or ''
-- phraseCode = e.g. "H201", "P231+P234"
-- phrase text read from array tables in [[Module:GHS phrases/data]]
--
-- Implements: [[Template:GHS phrases]]
-- Helppage: [[Template:GHS phrases]]
-- Error category: [[Category:GHS errors]], [[Category:GHS warnings]] (mainspace pages only)
--
-- Also: 
-- listAll(), numberOfPhrases(), listOmitRules(),
-- listtype, omit
--------------------------------------------------------------------------------

require('Module:No globals')
local r = {}	-- "r" for return, so no confusion with setid P
local GHSdata	= mw.loadData('Module:GHS phrases/data/sandbox') -- -- todo /sandbox here
local getArgs	= require('Module:Arguments').getArgs
local tTools	= require('Module:TableTools')
local yesno		= require('Module:Yesno')
local tArgName	= {} -- named parameters (setid, omit, listtype)
local tTailPreviewMsgs = {} -- tail: Preview messages, added after the inline output
local tTailCats        = {} -- tail: Categories, added after the inline output
local isDebug   = true

--------------------------------------------------------------------------------
-- label H-phrases or P-phrases
--------------------------------------------------------------------------------
local function PHlabel()

	if tArgName.useSetid == '' then
		return 'GHS--phrases'
	else
		return tArgName.useSetid .. '--phrases' -- -- todo remove double hyphen
	end
end

--------------------------------------------------------------------------------
-- wlHelpPage
--
-- Formats page as [[Helppage#Section|Label]]
-- by default, sLabel == sSection ( |label = #section (#anchor) )
--------------------------------------------------------------------------------
local function wlHelpPage(sSection, sLabel)
local sHelpPage = 'Template:GHS phrases'
	
	if sLabel == nil then sLabel = sSection end
	
	if (sLabel or '') == '' then
		sLabel = ''
	else
		sLabel = '|' .. sLabel 
	end
	if (sSection or '') == '' then
		sSection = ''
	else
		sSection = '#' .. sSection
	end
	return '[[' .. sHelpPage .. sSection .. sLabel .. ']]'
end

--------------------------------------------------------------------------------
-- wlInlineTag
--
-- class="noprint Inline-Template Template-Fact" is c/p from meta Template:Fix. No TSTYLES (Dec2021)
-- Returns <sup>[?]</sup> with wikilink to [[helppage#section|errormessage]]
--------------------------------------------------------------------------------
local function wlInlineTag(sMsgTxt)
	local sMsg
	
	-- span css title with the linked [?] text, as label in wikilink: [[targetpage#section|spantitlelabel]]
	local sSection  = PHlabel()
	local spanCssTitle = '<span title=\"' .. PHlabel() .. ' ' .. sMsgTxt .. '\">?</span>'
	
	sMsg = '<sup><span class="noprint Inline-Template Template-Fact">&#91;<i>'
				.. wlHelpPage(sSection, spanCssTitle)
				.. '</i>&#93;</span></sup>'
	return sMsg
end

--------------------------------------------------------------------------------
-- inMono
--
-- Use mono font-family (from: Template:Mono)
--------------------------------------------------------------------------------
local function inMono(s)
	if yesno(s == '', false) or false then -- do not return an empty span
		return ''
	else
		return '<span class="monospaced" style="font-family: monospace;">' .. s .. '</span>'
	end
end

--------------------------------------------------------------------------------
-- addErrorCategory
--
-- Formats as [[Category:GHS errors|catsort]]
-- or '' when in other namespace.
-- sCatsort option using: H, P, _
--------------------------------------------------------------------------------
local function addErrorCategory(sCatsort)
local pagetype = require('Module:Pagetype').main

	-- -- todo catsort could be ''?
	if sCatsort == nil then sCatsort = tArgName.useSetid end

	local wlErrCat = ''
	if pagetype() == 'article' then -- mainspace only
		if sCatsort == '' then
			wlErrCat = '[[Category:GHS errors]]'
		else
			wlErrCat = '[[Category:GHS errors|' .. sCatsort .. ']]'
		end
	else
		if isDebug then
			wlErrCat = '[[:Category:GHS errors|CAT:' .. sCatsort .. '-err]]|'
		else
			return
		end
	end

	table.insert(tTailCats, wlErrCat)
	return
end

--------------------------------------------------------------------------------
-- addWarningCategory
--
-- Formats as [[Category:GHS warnings|catsort]]
-- mainspace only, or '' when in other namespace.
-- sCatsort warning options used: D, O, - (Double, Omit, Hyphen)
--------------------------------------------------------------------------------
local function addWarningCategory(sCatsort)
local pagetype = require('Module:Pagetype').main
--debug so do here / move into if-clause
if sCatsort == nil then sCatsort = tArgName.setid end
		
	local wlWarnCat = ''
	if pagetype() == 'article' then -- mainspace only
		if sCatsort == '' then
			wlWarnCat = '[[Category:GHS warnings]]'
		else
			wlWarnCat = '[[Category:GHS warnings|' .. sCatsort .. ']]'
		end
	else
		if isDebug then
			wlWarnCat = '[[:Category:GHS warnings|CAT:warn-' .. sCatsort .. ']]|'
		else
			return ''
		end
	end

	table.insert(tTailCats, wlWarnCat)
	return
end

--------------------------------------------------------------------------------
-- addPreviewMsg
--------------------------------------------------------------------------------
local function addPreviewMsg(sMsg)
local previewWarn = require('Module:If preview')._warning

if isDebug then
	table.insert(tTailPreviewMsgs, sMsg .. '|')
else
	table.insert(tTailPreviewMsgs, previewWarn({sMsg}))
end
	return
end

--------------------------------------------------------------------------------
-- showTailMsgAndCat
--
-- show table tTailPreviewMsgs
-- preview-messages and errorcat
-- all namespaces
--------------------------------------------------------------------------------
local function showTailMsgAndCat()
		
if isDebug then
else
	if tTools.size(tTailPreviewMsgs) > 0 then
		return table.concat(tTailPreviewMsgs, '') .. table.concat(tTailCats, '')
	else
		return ''
	end
end

	return table.concat(tTailPreviewMsgs, '') .. table.concat(tTailCats, '')
end

--------------------------------------------------------------------------------
-- applyHideDuplicates
--
-- returns edited table, with double Codes removed
-- adds warning with codes.
-- base table tArgsCodes is walked through by a iwalker that reads a singel code,
-- then a ikiller checks the upward part of the same table to delete all copies
-- ikiller starts at end of table, walks towards iwalker; then tArgsCodes is compressed
-- iwalker steps 1 up in the freshly compressed table
-- Used: iArgs is sorted, and order stays same. compress does not change that.
--------------------------------------------------------------------------------
local function applyHideDuplicates(tArgsCodes)
local iR, iK -- iR = reader, iK = killer
local hit = false

	iR = 1
	while iR < #tArgsCodes do
		iK = #tArgsCodes -- will be counting downwards
		while iK > iR do
			if tArgsCodes[iK] == tArgsCodes[iR] then
				hit = true
				addPreviewMsg('Duplicate removed: ' .. tArgsCodes[iR])
				table.remove(tArgsCodes, iK)
				tTools.compressSparseArray(tArgsCodes)
			end
			iK = iK - 1
		end
		tTools.compressSparseArray(tArgsCodes)
		iR = iR + 1
	end

	if hit then
		addWarningCategory('D')
	end
	return tArgsCodes
end

--------------------------------------------------------------------------------
-- applyOmitRules
--
-- returns edited table, with Omit phraseCode's removed
-- Omit rule is per GHS_Rev9E_0.pdf (2021)
--------------------------------------------------------------------------------
local function applyOmitRules(tArgsCodes)
local tRules = GHSdata['tOmitRules']
	local hit = false
	
	for keep, omit in pairs(tRules) do
		if tTools.inArray(tArgsCodes, omit) then
			if tTools.inArray(tArgsCodes, keep) then
				hit = true
				for i, k in pairs(tArgsCodes) do
					if k == omit then
						table.remove(tArgsCodes, i)		
					end
				end
				addPreviewMsg(wlHelpPage('Omit Rules') .. ': keep ' .. keep .. ', omit ' .. omit)
			end
		end
	end
	if hit then
		tTools.compressSparseArray(tArgsCodes)
		addWarningCategory('O')
	end
	return tArgsCodes
end

--------------------------------------------------------------------------------
-- errorSetidOrCode
--
--------------------------------------------------------------------------------
local function errorSetidOrCode(errID, sSetID, sPhraseCode)
	-- -- ERR102 setid BAD like 'X'
	-- -- ERR105 no setid
	-- -- ERR201 code has no setid
	-- -- ERR202 code not found in list
	
	local isWarning = false
	local sPrevMsg = ''
	local sCatSort = 'ε' -- lc epsilon / unexpectedly absent
	local sInlineMsg = ''
	
	if errID == 'ERR102' then
		sPrevMsg = wlHelpPage('GHS setid') .. ': set id \'' .. sSetID .. '\' not known (\'H\' or \'P\' expected)'
		sCatSort = 'I-unk'
	elseif errID == 'ERR105' then
		sPrevMsg = wlHelpPage('', 'GHS phrases') .. ': set id missing (please use: setid=H or P)'
		sCatSort = 'I-missing'
		
	elseif errID == 'ERR201' then
		sInlineMsg = sPhraseCode .. wlInlineTag('\'' .. sPhraseCode .. '\' not found') -- adds the [?] tag
		sPrevMsg   = wlHelpPage(PHlabel()) .. ': \'' .. sPhraseCode .. '\' not found'
		sCatSort   = PHlabel()
	elseif errID == 'ERR202' then
		sInlineMsg = sPhraseCode .. wlInlineTag('\'' .. sPhraseCode .. '\' not found') -- adds the [?] tag
		sPrevMsg   = wlHelpPage(PHlabel()) .. ': \'' .. sPhraseCode .. '\' set id is not \'' .. tArgName.useSetid .. '\''
		sCatSort   = PHlabel()

	else
		sPrevMsg = 'error unk'
		sInlineMsg = errID .. ': error ID unk'
		sCatSort = '?E'
	end
	
	addPreviewMsg(errID .. ': ' .. sPrevMsg)
	if isWarning then
		addWarningCategory(sCatSort .. '/' .. errID)
	else
		addErrorCategory(sCatSort .. '/' .. errID)
	end
	return sInlineMsg
end

--------------------------------------------------------------------------------
-- formatPhraseAbbr
--
-- format phraseCode and text, for abbr-form (infobox list form)
--------------------------------------------------------------------------------
local function formatPhraseAbbr(phraseCode, sPhrase)
	return '<abbr class="abbr" title=" ' .. phraseCode .. ': ' .. sPhrase .. '">'
				.. phraseCode 
				.. '</abbr>'
end

--------------------------------------------------------------------------------
-- formatPhraseInline
--
-- format phraseCode and text, for inline form (in sentence)
-- adds "quotes"
--------------------------------------------------------------------------------
local function formatPhraseInline(phraseCode, sPhrase)
	return inMono(phraseCode) .. ': \"' .. sPhrase .. '\"'
end

--------------------------------------------------------------------------------
-- formatPhraseList
--
-- as inline, but no "quotes" added.
--------------------------------------------------------------------------------
local function formatPhraseList(phraseCode, sPhrase)
	return inMono(phraseCode) .. ': ' .. sPhrase
end

--------------------------------------------------------------------------------
-- getSetID
--
-- Determines setid (expected either 'H', 'P' or '')
-- First route is: read |setid=
-- When |setid= is not set, 
--		it looks for a first parameter that has an H of P prefix (in |P201|P202|...)
--		when not found, '' is retured
--		(then: try first hit in clean Code list from H330 or P330)
-- In one call, P and H numbers can *not* be mixed
--		so "|H201|P202|" will cause error "P202 not found" (... in H-list)
-- returns .useSetId
--------------------------------------------------------------------------------
local function getSetID(tArgs)
	local setidArg = tArgs.setid or ''
	local setidFromCodelist = ''
	
	if setidArg == 'P' or setidArg == 'H' then
	elseif setidArg == '' then
		-- not defined, try reading from code list
		setidFromCodelist = nil
		for i, v in ipairs(tArgs) do
			setidFromCodelist = mw.ustring.match(v, '^[PH]') or nil
			if setidFromCodelist ~= nil then
				break
			end
		end
		if setidFromCodelist == nil then
			setidFromCodelist  = ''
		end
	elseif setidArg == nil then -- -- debug/not expected 
		errorSetidOrCode('ERR109', '-nil-')
		setidArg = 'LiN' --
	else -- bad id like 'X' (nil not expected)
		errorSetidOrCode('ERR102', setidArg)
		setidArg = '' -- no setid; ERR
	end
	tArgName.setid = setidArg
	tArgName.setidFromCodelist = setidFromCodelist
	
	if tArgName.setid == '' then
		tArgName.useSetid = tArgName.setidFromCodelist
	else
		tArgName.useSetid = tArgName.setid
	end

	if tArgName.useSetid == '' then
		errorSetidOrCode('ERR105')
	end
	
	return tArgName.useSetid
end

--------------------------------------------------------------------------------
-- getListType
--
-- Checks list format, including those from Module:List
--------------------------------------------------------------------------------
local function getListType(tArgs)
	local listTypes = {
	['abbr'] = true,
	['inline'] = true,
	['bulleted'] = true,
	['unbulleted'] = true,
	['horizontal'] = true,
	['ordered'] = true,
	['horizontal_ordered'] = true,
	['horizontal ordered'] = true
	}
	local sListType = tArgs['listtype'] or 'abbr'

	if sListType == '' or sListType == 'abbr' then
		sListType = 'abbr'
	elseif listTypes[sListType] == true then
		if sListType == 'horizontal ordered' then
			sListType = 'horizontal_ordered'
		end
	else 
		sListType = 'abbr'
	end
	
	tArgName.listtype = sListType
	return
end

--------------------------------------------------------------------------------
-- getDoOmitRules
--------------------------------------------------------------------------------
local function getDoOmitRules(tArgs)
	tArgName.omit = yesno(tArgs['omit'], true) or true
	return 
end

--------------------------------------------------------------------------------
-- getHyphenNone
--------------------------------------------------------------------------------
local function getHyphenNone(tArgs)
	local b = false
	
	b = yesno(tArgs[1] == '-', false) or false
	if b == true then
		addWarningCategory('-')
	end
	
	tArgName.bHyphenNoGHS = b
	return b
end

--------------------------------------------------------------------------------
local function hyphenNoGHStext()
	return GHSdata.tHyphenNoGHS['hyphen']
end

--------------------------------------------------------------------------------
-- normaliseCode
--
--
--------------------------------------------------------------------------------
local function normaliseCode(c)
local setid = tArgName.useSetid or ''

	c = mw.text.decode(c)
	c = mw.ustring.gsub(c, '[^%d%+A-Za-z]', '')
	c = mw.ustring.gsub(c, '^(%d)', setid .. '%1')
	c = mw.ustring.gsub(c, '%+(%d)', '+' .. setid .. '%1')
	return c
end

--------------------------------------------------------------------------------
-- checkCodeFormat like X567
--------------------------------------------------------------------------------
local function checkCodeFormat(c)
local s = 'chk fmt: '
	mw.ustring.gmatch(c, '[%+]?[PH]%d%d%d[A-Za-z]*')
		for codeN in iterator do
			s = s .. codeN .. '; '
		end
	return s
end

--------------------------------------------------------------------------------
-- prepareArgsAndCodes
--
-- 1. reads & checks named parameters (like setid)
-- 2. clean up & format phrase IDs (=unnamed parameters)
--		remove bad characters, create H/P pattern "H201", "P310+P302"
-- returns input Codes list (=unnnamed parameters): straight array, no nil's, sorted
--------------------------------------------------------------------------------
local function prepareArgsAndCodes(tArgs)
local tArgsCodes

	-- read, normalise & store named args; add errnote
	getSetID(tArgs)
	getHyphenNone(tArgs)
	getListType(tArgs)
	getDoOmitRules(tArgs)
	if tArgs['format'] == 'plain' then  -- number of phrases
		tArgName.format = 'plain'
	else
		tArgName.format = ''
	end

	tArgsCodes = tTools.compressSparseArray(tArgs) -- removes all named args
	for i, v in ipairs(tArgsCodes) do
		v = normaliseCode(v)
		tArgsCodes[i] = v
	end

	table.sort(tArgsCodes)
	return tArgsCodes
end

--------------------------------------------------------------------------------
-- listAll
--
-- Returns wikitable rows for each phrase id.
-- requires |setid=P/H
-- returns full list, all phrases, for a setid
-- 2-columns wikitable, sorted, sortable, anchor like "H201" for each
--------------------------------------------------------------------------------
function r.listAll(frame)
local tArgs = getArgs(frame)
local tL = {}

	prepareArgsAndCodes(tArgs)
	if tArgName.useSetid == '' then
		-- setid is required ERR102, ERR105
		-- -- return 'ERR199-' .. showTailMsgAndCat()
	end

	local tRead
	tRead = GHSdata['GHSphrases']

	-- ORDERED RANGE: t2 { i, code }
	local t2 = {}
	local sPattern = '^' .. tArgName.setid
	for code, v in pairs(tRead) do
		if string.match(code, sPattern) ~= nil then
			table.insert(t2, code)
		end
	end
	t2 = tTools.compressSparseArray(t2)
	table.sort(t2) -- required, worls (unk why .sort(tRead) did not work / Nov 2021)

	-- LIST: tL tablerows (i, code, phrase)
	local sTR, v, sAnchor
	-- i = array index, s = phrase code, v = phrase text
	for i, s in ipairs(t2) do
		v = tRead[s]
		sAnchor = '<span class="anchor" id="' .. s .. '"/>'
		sTR = '|- ' .. sAnchor .. '\n| datasortvalue="' .. i .. '" | <span style="font-family: monospace;">' .. s .. '</span> || ' .. v
		table.insert(tL, sTR)
	end

	return table.concat(tL, '\n')
end

--------------------------------------------------------------------------------
-- numberOfPhrases
--
-- Documentation
-- requires |setid=H/P
-- Returns number of phrases, in format
--	"GHS H-phrases (123)"
--------------------------------------------------------------------------------
function r.numberOfPhrases(frame)
	local tArgs = getArgs(frame)
	local tC = {}
	
	tArgs.setid = 'H' -- dummy
	prepareArgsAndCodes(tArgs)
	if tArgName.useSetid == '' then
		-- setid is required
		return showTailMsgAndCat()
	end

	tC = GHSdata['GHSphrases']
	local iT = 0
	if tArgName.useSetid == '' then
		iT = tTools.size(tC) -- all H+P then
	else
		local sPat = '^' .. tArgName.useSetid
		for c, phrase in pairs(tC) do
			if string.match(c, sPat) ~= nil then
			 iT = iT + 1	
			end
		end
	end
	
	local sResult
	if tArgName.format == 'plain' then
		sResult = tostring(iT)
	else
		sResult = 'GHS ' .. PHlabel() .. ' <span style="font-weight: normal;">(' .. tostring(iT) .. ')</span>'
	end

	return sResult .. showTailMsgAndCat()
end

--------------------------------------------------------------------------------
-- listOmitRules
--
-- self-documentation
-- does not usae frame args, does not require setid=H,P
--------------------------------------------------------------------------------
function r.listOmitRules()
local tRules = GHSdata['tOmitRules']
local tL = {}
local s

	s = ''
	for keep, omit in pairs(tRules) do
		s = '&bull; keep ' .. inMono(keep) .. ', omit ' .. inMono(omit)
		table.insert(tL, s)
	end
	table.sort(tL)
	return table.concat(tL, '<br/>')
end

--------------------------------------------------------------------------------
-- _main
--
-- processes setid (H, P) and phrase codes
--		error:	setid not P, H
--				code not found
-- cannot mix H and P phrases
-- reads phrases from /data H or P phrases tables
-- formats phrase (abbreviation, abbr-title, phraseCode)
--------------------------------------------------------------------------------
function r._main(tArgs)
-- todo: logic setid=H, code in has P = err: MIXED unexpected
-- todo: tCodeList when codes only (now tArgs); also when listRangeDEV
-- todo check |-| == no phrases (no GHS at all)
local tArgsCodes

	tArgsCodes = prepareArgsAndCodes(tArgs)
	if tArgName.useSetid == '' then
		-- setid is required ERR102, ERR105
		return showTailMsgAndCat()
	end

	if #tArgsCodes == 0 then -- NOTE001 - no codes in; no text shown no error no warning
		return showTailMsgAndCat()
	end

	if tArgName.bHyphenNoGHS then
		return hyphenNoGHStext(tArgName.setid) .. showTailMsgAndCat()
	end
	
	tArgsCodes = applyHideDuplicates(tArgsCodes)
	
	if tArgName.omit then
		tArgsCodes = applyOmitRules(tArgsCodes)
	end

	local formatterF
	if tArgName.listtype == 'abbr' then
		formatterF = formatPhraseAbbr
	elseif tArgName.listtype == 'inline' then
		formatterF = formatPhraseInline
	else --- Module:List options
		formatterF = formatPhraseList
	end
	
	local tReadPhrases = {}
	tReadPhrases = GHSdata['GHSphrases']

	local sPhrase
	local tR = {} -- Return table, to concat by listtype
	for i, code in ipairs(tArgsCodes) do
		sPhrase = tReadPhrases[code]
		if sPhrase == nil then
			sPhrase = errorSetidOrCode('ERR201', nil, code)
			table.insert(tR, sPhrase)
		elseif mw.ustring.match(code, '^' .. tArgName.useSetid) == nil then
			sPhrase = errorSetidOrCode('ERR202', tArgName.useSetid, code)
			table.insert(tR, sPhrase)
		else
			table.insert(tR, formatterF(code, sPhrase))
		end
	end

	if tArgName.listtype == 'abbr' then
		return table.concat(tR, ', ') .. showTailMsgAndCat()
	elseif tArgName.listtype == 'inline' then
		return table.concat(tR, ', ') .. showTailMsgAndCat()
	else
		local mList = require('Module:List')
		return mList[tArgName.listtype](tR) .. showTailMsgAndCat()
	end
end

--------------------------------------------------------------------------------
-- main
--
-- handles template input frame, then calls generic _main() function
-- To be invoked from {{template}}
--------------------------------------------------------------------------------
function r.main(frame)
local tArgs = getArgs(frame)
	return r._main(tArgs) 
end

local function getNumberFromC(c)
	return tonumber(string.match(c, '%d%d%d'))
end

return r