Vai al contenuto

Modulo:Unicode data/datasets

Da Wikipedia

La documentazione per questo modulo può essere creata in Modulo:Unicode data/datasets/man

local export = {}

--[==[
Fetches a data set from Wikimedia Commons by the name `dataset_name`, parses it as a Unicode character key-value table, and returns that table.

The table is assumed to have at least two fields with the `name` fields set to `key` and `value`. Both values are expected to be strings. The key string is parsed as a hexadecimal numeric literal (e.g. `0x1234`) and converted into a number.

The table returned will then have numbers as the keys and strings as the values.]==]
function export.dataset(dataset_name)
	local dataset = mw.ext.data.get(dataset_name)
	
	if not dataset then error("Dataset " .. dataset_name .. " not found") end
	
	-- Check schema.
	local charcode_index = nil
	local value_index = nil
	for index, field in ipairs(dataset.schema.fields) do
		local field_name_lower = field.name:lower()
		if field_name_lower:find("key") and field.type == "string" then
			charcode_index = index
		elseif field_name_lower:find("value") and field.type == "string" then
			value_index = index
		end
	end
	
	if not charcode_index then error("Character code field (name='key', type='string') not found in data schema.") end
	if not value_index then error("Value field (name='value', type='string') not found in data schema.") end
	
	-- Extract values from dataset data.
	local result = {}
	for _, item in ipairs(dataset.data) do
		result[tonumber(item[charcode_index])] = item[value_index]
	end
	
	return result
end

return export