Jump to content

Module:Interlinear/sandbox2/gcl

From Wikipedia, the free encyclopedia
This is an old revision of this page, as edited by Eievie (talk | contribs) at 02:06, 31 January 2024. The present address (URL) is a permanent link to this revision, which may differ significantly from the current revision.
local p = {}
local data = mw.loadData( 'Module:Interlinear/data' )
local gloss_override = {} -- for custom gloss abbreviations
local getArgs = require('Module:Arguments').getArgs
local yesno = require('Module:Yesno')
local lang_data = mw.loadData( 'Module:Lang/data' )

--------------------------
-- Almost-global variables
--------------------------
local glossing_type, displaying_messages, free_translation, msg, buffer

-------------------
-- General settings
-------------------
local conf = { --settings
	WordSeparator = " \n\r\t", -- Don't replace with %s as this would include non-breaking spaces
	GlossAbbrPattern = "^([Ø0-9A-Z]+)$", -- this isn't a full regex, but a Lua pattern
	-- NOTE: The following characters must be formatted for use in a pattern set.
	GlossAbbrBoundary = "-.,;:<>‹›/\\~+=%?%s%[%]()%_\127'",
	GlossExcludeTable = {I = true,}, --strings not be treated as glossing abbreviations
	GlossExcludePattern = '^[0-9][0-9]+$', -- excludes strings consisting entirely of digits
	GlossSmallCapsExclude = "^[AOPS]$", -- glossing abbreviations matching this pattern will not be rendered in small caps
	GlossingType = "label", -- if set to "label" gloss abbreviations are formatted as an <abbr> with the "label" appearing in a tooltip
						-- if set to "wikilink" the abbreviation is formatted as a wikilink to the relevant wikipedia article
						-- if set to "none" abbreviations aren't formatted at all
	ErrorCategory = "[[Category:Pages with errors in interlinear text]]",
	AmbiguousGlossCategory = "[[Category:Articles with ambiguous glossing abbreviations]]",
	MessageGlossingError = "Error(s) in interlinear glossing",
	combining_gender_numbers = "[0-9][0-9]?$", --e.g. G4 '4th gender' or CL7 'class 7'
	combining_gender_prefixes = {G = "gender", CL = "class"},
	combining_person = {["1"] = "first person", ["2"] = "second person", ["3"] = "third person"},
	combining_number = {S = "singular", SG = "singular", P = "plural", PL = "plural", D = "dual", DU = "dual", TRI = "trial"},
	combining_gender = {F = "feminine", M = "masculine", N = "neuter"},
	LowerCaseGlosses = {["1sg"] = true, ["2sg"] = true, ["3sg"] = true, ["1du"] = true, ["2du"] = true, ["3du"] = true, ["1pl"] = true, ["2pl"] = true,
		["3pl"] = true, ["Fsg"] = true, ["Fpl"] = true, ["Msg"] = true, ["Mpl"] = true,}, -- these are the non-all-upper-case strings that will be recognised as glossing abbreviations
	ErrorHelpLocation = "Template:Interlinear",
}

---------------------
-- CSS styles and classes
---------------------
conf.style = { --CSS styles
	WordDiv = "float: left; margin-bottom: 0.3em;",
	WordMargin = "margin-right: 1em;",
	WordP = "margin: 0px;", -- the style for the word <p> elements
	GlossAbbr = "font-variant: small-caps; font-variant-numeric: oldstyle-nums; text-transform: lowercase; ", -- won't be applied to gloss abbreviations containing lower-case characters
	HiddenText = "display: none;",
	EndDiv = "clear: left; display: block;", -- style of the <div> element at the end of the interlinear display
	ErrorMessage = "font-size: inherit",
}
conf.class = { --CSS classes
	Interlinear = "interlinear",
	GlossAbbr  = "gloss-abbr",
	GlossAbbrAmb = "gloss-abbr-ambiguous",
	GlossAbbrError = "gloss-abbr-error",
	ErrorMessage = "error",
}
---------------------
-- Section transclusion
---------------------
local page_content = nil -- lazy initilization
local function get_section(frame, section_name)
	if page_content == nil then
		local current_title = mw.title.getCurrentTitle()
		page_content = current_title:getContent()
	end
	if page_content then
		if mw.ustring.find(page_content, section_name, 1, true) then
			return frame:preprocess('{{#section:{{FULLPAGENAME}}|' .. section_name .. '}}')
		end
	end
	return ''
end
---------------------
-- Sundry small functions
---------------------
local function normalise(str)
	return mw.ustring.gsub(str,"[" .. conf.WordSeparator .. "]+"," ")
end

local function tidyCss(str)
	str = mw.ustring.gsub(str, '^[\"\']*(.-)[\"\']*$', "%1") -- trims quotation marks
	if mw.ustring.sub(str, -1) ~= ";" then str = str .. ";" end -- appends ";" if missing
	return str
end

local function highlight(text)
	if text then
		return '<span style="color:#C00;font-weight:bold;">' .. text .. '</span>'
	else return "" end
end

local function tone_sup(str)
	return mw.ustring.gsub(str, "([^%p%s0-9])([0-9]+)", "%1<sup>%2</sup>")
end

local function is_empty(str) -- returns "false" if its argument is a string containing chars other than spaces &c.
	if not str then return true end
	if mw.ustring.find(str, "[^" .. conf.WordSeparator .. "]")
		then return false
	else return true end
end

local function help_link (anchor)
	if anchor then
		return " ([[" .. conf.ErrorHelpLocation .. "#" .. anchor .. "|help]])"
	else return "" end
end

--------------------
-- The following two functions update the glossing settings based on the received
-- template arguments. set_global_glossing_settings() updates the global settings
-- that are valid for all gloss abbreviations. set_glossing_type()
-- returns the glossing type, which can vary between the different lines.
--------------------
local function set_global_glossing_settings(a)
	local style = ""
	if a.style then style = tidyCss(a.style) end
	if a.underline == "no" then
		style = style .. "text-decoration: none;" end
	if a.small_caps == "no" then
		style = style .. "font-variant:normal; text-transform: none;" end
	if style ~= "" then conf.style.GlossAbbr = conf.style.GlossAbbr .. style end
end

local function set_glossing_type(glossing)
	if glossing then
		local GlossingType
		glossing = mw.ustring.lower(mw.text.trim(glossing))
		if mw.ustring.find(glossing, 'link') then
			GlossingType = "wikilink"
		elseif mw.ustring.find(glossing, 'label')
			or  mw.ustring.find(glossing, 'no link') then
			GlossingType = 'label'
		elseif mw.ustring.find(glossing, 'no abbr') then
			GlossingType = "no abbr"
		elseif yesno(glossing) == false then
			GlossingType = nil
		elseif yesno(glossing) then
			GlossingType = conf.GlossingType
		else
			msg:add('error', 'Glossing type "' .. glossing .. '" not recognised') end
		return GlossingType
	else error("set_glossing_type: 'glossing' is nil or false", 2)
	end
end

local function set_custom_glosses(list)
	local abbs = mw.text.split(list, '[;\n\t]')
	for _,v in pairs(abbs) do
		local gloss = mw.text.split(v, ':')
		local a = mw.text.trim(gloss[1])
		if a and a ~= "" then
			gloss_override[a] = {}
			gloss_override[a].expansion = gloss[2]
			gloss_override[a].wikipage = gloss[3]
		end
	end
end

---------------------
-- The UserMessages object contains and processes error messages and warnings
---------------------
local UserMessages = {errors = {}, warnings = {}, gloss_messages = {}}
function UserMessages:add(msgtype, text, gloss)
	if msgtype == "gloss_message" then
		self.gloss_messages[gloss] = text
	elseif msgtype == "warning" then
		table.insert(self.warnings, text)
	elseif msgtype == "non-repeating error" then
		self.errors.nre = text
	elseif msgtype == "ambiguous gloss" then
		self.if_ambiguous_glosses = true
	elseif msgtype == "error" then
		table.insert(self.errors, text)
	else return error("UserMessages:add(): unknown message type", 2)
	end
end
function UserMessages:print_errors()
	local out = ""
	local namespace = mw.title.getCurrentTitle().namespace
	if next(self.errors) or self.warnings[1] then
		local err_span = mw.html.create("span")
		err_span:attr("style", conf.style.ErrorMessage)
		err_span:addClass(conf.class.ErrorMessage)
		for _,v in pairs(self.errors) do
			err_span:wikitext(" " .. v .. ";") end
		if namespace % 2 == 0 and namespace ~= 2 -- non-talk namespaces, excluding user pages; if modifying please update the description on the category page
			then err_span:wikitext(conf.ErrorCategory)
		end
		out = tostring(err_span)
		mw.addWarning(conf.MessageGlossingError)
	end
	if self.if_ambiguous_glosses then
		if namespace == 0 -- article namespace
			then out = out .. conf.AmbiguousGlossCategory -- this category will only track articles
		end
	end
	return out
end
function UserMessages:print_warnings()
	local out = ""
	-- Messages and warnings get displayed only if the page is being viewed in "preview" mode:
	if displaying_messages and (next(self.gloss_messages) or next(self.warnings)) then
		local div = mw.html.create("div")
		div:addClass("interlinear-preview-warning")
			:cssText('border: 1px solid #a2a9b1; background-color: #f8f9fa; width: 80%; padding: 0.2em;')
			:wikitext("<i>This message box is shown only in preview:</i>")
			:newline()
		for _,v in ipairs(self.warnings) do
			local p = div:tag("p")
			p:addClass(conf.class.ErrorMessage)
			p:attr("style", conf.style.ErrorMessage)
			p:wikitext(v)
		end
		if self.gloss_messages then
			div:wikitext("<p>  To change any of the following default expansions, see [[Template:Interlinear/doc#Custom abbreviations|the template's documentation]]:</p>")
			end
		for _,v in pairs(self.gloss_messages) do
			div:wikitext("<p>" .. v .. "</p>")
		end
		out = out .. "\n\n" .. tostring(div)
	end
	return out
end

---------------------
-- gloss_lookup() receives a gloss abbreviation and tries to uncover its meaning.
---------------------
local function gloss_lookup(a, label, wikilink)
	local _label, _wikilink, _lookup, source = nil, nil, nil, nil
	if gloss_override[a] then
		_lookup = gloss_override[a]
		source = "local"
	elseif data.abbreviations[a] then _lookup = data.abbreviations[a] end
	if _lookup and _lookup.expansion ~= "" then
		_label, _wikilink = _lookup.expansion, _lookup.wikipage
	else
		local prefix = mw.ustring.sub(a,1,1)
		local suffix = mw.ustring.sub(a,2)
		if conf.combining_person[prefix] then -- is it of the form 1PL or 3FS?
			_label = conf.combining_person[prefix]
		local _suffix = conf.combining_number[suffix] or conf.combining_gender[suffix]
			if _suffix then
				_label = _label .. ", " .. _suffix
			else
				local suffix1 = mw.ustring.sub(suffix,1,1)
				local suffix2 = mw.ustring.sub(suffix,2)
					if conf.combining_gender[suffix1]
					and  conf.combining_number[suffix2] then
						_label = _label .. ", " .. conf.combining_gender[suffix1] .. ", " .. conf.combining_number[suffix2]
					else _label = nil end
			end
	elseif mw.ustring.match(suffix,conf.combining_gender_numbers) then -- cases like G4 = gender 4
		local _i,_j = mw.ustring.find(a, conf.combining_gender_numbers)
		local _pre = mw.ustring.sub(a, 1, _i - 1)
		local _suff = mw.ustring.sub(a, _i)
		if conf.combining_gender_prefixes[_pre] then
			_label = conf.combining_gender_prefixes[_pre] .. " " .. _suff
		end
	elseif prefix == "N" then -- dealing with cases like NPST = non-past
		local s = gloss_override[suffix] or data.abbreviations[suffix]
			if s ~= nil and not s.ExcludeNegation then
				_label = "non-" .. s.expansion
				_wikilink = s.wikipage
			end
			s = nil
		end
	end
	if _label == "" then _label = nil end
	if _wikilink == "" then _wikilink = nil end
	if not label then label = _label end
	if not wikilink then wikilink = _wikilink end
	return label, wikilink, source
end

---------------------
-- format_gloss() calls gloss_lookup() to find the meaning of a gloss
-- abbreviation, which it then proceeds to format
---------------------
local function format_gloss(gloss, label, wikilink)
	if string.sub(gloss,1,3) == "000" then -- checks for a common component of exposed strip markers (see [[:mw:Strip marker]])
		return gloss
	end
	local gloss2 = mw.ustring.gsub(gloss,"<.->","") -- remove any html fluff
	gloss2 = mw.ustring.gsub(gloss2, "%'%'+", "") -- remove wiki bold/italic formatting
	gloss2 = mw.text.trim(mw.ustring.upper(gloss2))
	if not (label or wikilink)
		or (not label and glossing_type == "label")
		or (not wikilink  and glossing_type == "wikilink")
		then
			if glossing_type ~= "no abbr"
				then label, wikilink, source = gloss_lookup(gloss2, label, wikilink)
			end
	end
	local gloss_node
	if glossing_type == "no abbr"
		then gloss_node = mw.html.create("span")
	else gloss_node = mw.html.create("abbr") end
	gloss_node:addClass(conf.class.GlossAbbr)
	if label or wikilink then
		if not mw.ustring.match(gloss, "%l") -- excluding glosses that contain lower-case characters
			and not mw.ustring.match(gloss,conf.GlossSmallCapsExclude) -- and also excluding A, O etc. from rendering in small caps
			then gloss_node:attr("style", conf.style.GlossAbbr)
		end
		local abbr_label
		if label then abbr_label = label
			else abbr_label = wikilink end
		gloss_node:attr("title", abbr_label)
		if source ~= "local" and data.abbreviations[gloss2] then
			if data.abbreviations[gloss2].ambiguous then
				gloss_node:addClass(conf.class.GlossAbbrAmb)
					msg:add("ambiguous gloss")
				end
		end
		if glossing_type == "wikilink" and wikilink
			then gloss_node:wikitext("[[", wikilink, "|" , gloss, "]]")
			else gloss_node:wikitext(gloss) end
		if source ~= "local" and displaying_messages then -- logging gloss lookups:
			local message = ""
			if label then
				message = "assuming " .. gloss2 .. " means \"" .. abbr_label .. "\";" end
			if glossing_type == "wikilink" and wikilink then
				message = message .. " linking to [[" .. wikilink .. "]];"
			end
			msg:add("gloss_message", message, gloss)
		end
	elseif glossing_type == "no abbr"
		then gloss_node
				:attr("style", conf.style.GlossAbbr)
				:wikitext(gloss)
	else
		if displaying_messages then
			msg:add("warning", "Gloss abbreviation " .. highlight(gloss2) .. "  not recognised" .. help_link("gloss abbr"))
		end
		msg:add("non-repeating error", "Unknown glossing abbreviation(s)" .. help_link("gloss abbr"))
		gloss_node
			:addClass(conf.class.GlossAbbrError)
			:addClass("error")
			:css("font-size", "100%")
			:attr("title", gloss2 .. ": glossing abbreviation not found")
			:attr("style", conf.style.ErrorMessage)
			:wikitext(gloss)
	end
	return tostring(gloss_node)
end

---------------------
-- find_gloss() parses a word into morphemes, and it calls format_gloss()
-- for anything that looks like a glossing abbreviation.
---------------------
local function find_gloss(word)
	local function scan_gloss(boundary, gloss_abbr) -- checks a morpheme if it is a gloss abbreviation
		if (mw.ustring.match(gloss_abbr, conf.GlossAbbrPattern)
			or conf.LowerCaseGlosses[gloss_abbr])
			and not (conf.GlossExcludeTable[gloss_abbr]
				or mw.ustring.match(gloss_abbr, conf.GlossExcludePattern))
			then gloss_abbr = format_gloss(gloss_abbr)
		end
		return boundary .. gloss_abbr
	end
	local word = mw.text.decode(word, true)
	if word == "I" -- for the case of the English word "I", the 1SG pronoun
		then return word end
	local pattern = "([" .. conf.GlossAbbrBoundary .. "]?)([^" .. conf.GlossAbbrBoundary .. "]+)"
	word = mw.ustring.gsub(word, pattern, scan_gloss) -- splits into morphemes
	return word
end

--------------------
-- The following function is called by Template:gcl and is used for formatting an individual glossing abbreviation
--------------------
function p.gcl(frame)
	local args = getArgs(frame,{
		trim = true,
		removeBlanks = false,
		parentOnly = true,
		wrappers = {'Template:Gcl'},
	})
	msg = UserMessages
	set_global_glossing_settings{style = args.style, underline = args.underline, small_caps = args['small-caps']}
	if not args.glossing then
		glossing_type = conf.GlossingType -- a global variable
	else glossing_type = set_glossing_type(args.glossing)
	end
	local gloss, label, wikilink = args[1], args[2], args[3]
	if not gloss then UserMessages:add("error", "No gloss supplied")
		return UserMessages:print() end
	if wikilink and not args.glossing then -- if a wikilink is supplied and glossing isn't set to 'label'...
		glossing_type = 'wikilink' end --     .. then the wikilink will be formatted as such
	if label == "" then label = nil end
	if wikilink == "" then wikilink = nil end
	local result = format_gloss(gloss, label, wikilink)
	return result
end

return p