Hopp til innhold

Bruker:Jeblad/Module:JSONstat

Fra Wikipedia, den frie encyklopedi
Dokumentasjon

The purpose of this module is to demonstrate handling of JSON-stat in Lua with closure-based instances modelled as lazy facades. The instances will not be created before boundraries are passed, that is a method returns a value that is wrapped in a closure. This makes a fairly efficient implementation for simple access.

The library can be used as simple calls through the invoke parser function, or as a support library. A main purpose for the lib is to provide Vega-formatted data, often constrained or adapted somehow to fit better with other available data.

Primary access point is the load-method, which will parse one of several sources and return an instance according to the provided data. In Lua-code the instance can be navigated by using provided methods, while in wikitext it is possible to use layout (format) parameters. One such predefined layout could be to create data for Vega, which is nothing more than the dataset values as an expanded table.

In wikicode the module will typically be called like one of the following

{{#invoke:JSONstat | load | updated }}
{{#invoke:JSONstat | load | Q123456 | label }}
{{#invoke:JSONstat | load | Bruker:Jeblad/SSB01222.json | ${source:%.10s} }}
{{#invoke:JSONstat | load | { ... } | expanded }}

The first call use the connected item to identify the external JSON-stat dataset (not implemented). The layout updated reports the datestamp when the dataset was last updated.

The second call identifies an item to use for further identification of a JSON-dataset (not implemented). The layout label reports the label for the dataset.

The third call identifies a page that contain the dataset. This use an inline format for layout, and reports a truncated source string.

The fourth call use an inline definition of the dataset. This use a layout that reports the values from the expanded form, that is a form that can be used in Vega.

Similar calls in Lua-code would be like the following

local jsonstat = require('Module:JSONstat')
jsonstat.load().updated()
jsonstat.load( 'updated' )
jsonstat.load( 'Q123456' ).label()
jsonstat.load( 'Q123456', 'label' )
jsonstat.load( 'Bruker:Jeblad/SSB01222.json' ).layout( '${source:%.10s}' )
jsonstat.load( 'Bruker:Jeblad/SSB01222.json', '${source:%.10s}' )
jsonstat.load( '{ ... }' ).expanded()
jsonstat.load( '{ ... }', 'expanded' )


-- module for processing JSON-stat
-- © John Erling Blad, Creative Commons by Attribution 3.0

-- don't pollute with globals
require('Module:No globals')

-- non-standard json library
local json = require('Module:DKjson')

-- @var our exposed table
local JSONstat = {}

-- Constructor for category
-- @param table init structure kept for later
-- @return self
function JSONstat.Category( init )
	local self = {}
	local data = init
	
	-- constructure signature
	-- @return string
	function self.signature()
		return 'JSONstatCategory'
	end
	
	-- provide a valid table structure
	-- @return table
	function self.toTable()
		return data
	end
	
	-- provide a valid json structure
	-- @return string
	function self.toJSON()
		return json.encode( data )
	end
	
	-- get the index by providing the named entry
	-- @param string id of the dataset
	-- @return table/value if id provided and not found, then nil
	function self.index(...)
		if arg.n == 0 then
			return data.index
		else
			return data.index[arg[1]]
		end
	end
	
	-- get the label by providing the named entry
	-- @param strin id of the dataset
	-- @return table/value if id provided and not found, then nil
	function self.label(...)
		if arg.n == 0 then
			return data.label
		else
			return data.label[arg[1]]
		end
	end
	
	-- get the unit by providing the named entry
	-- @param string id of the dataset
	-- @return table/value if id provided and not found, then nil
	function self.unit(...)
		if arg.n == 0 then
			return data.unit
		else
			return data.unit[arg[1]]
		end
	end
	
	-- get the index, label, unit by providing the named entry
	-- @param string id of the dataset
	-- @return table if id provided and not found, then nil
	function self.get( id )
		return data.index[id], data.label[id], data.unit[id]
	end
	
	-- this is our instance
	return self
end

-- Constructor for axis'
-- Note that this has no explicit structure in JSON-stat
-- @param table init structure kept for later
-- @return self
function JSONstat.Axis( init )
	local self = {}
	local data = init
	
	-- constructure signature
	-- @return string
	function self.signature()
		return 'JSONstatAxis'
	end
	
	-- provide a valid table structure
	-- @return table
	function self.toTable()
		return data
	end
	
	-- provide a valid json structure
	-- @return string
	function self.toJSON()
		return json.encode( data )
	end
	
	-- at this level it should be a string
	function self.label()
		return data.label
	end
	
	-- get the category
	-- should always exist
	-- @return table
	function self.category()
		return JSONstat.Category( data.category )
	end
	
	-- this is our instance
	return self
end

function JSONstat.Collection( init )
	local self = {}
	local data = init
	
	-- constructure signature
	-- @return string
	function self.signature()
		return 'JSONstatCollection'
	end
	
	-- provide a valid table structure
	-- @return table
	function self.toTable()
		return data
	end
	
	-- provide a valid json structure
	-- @return string
	function self.toJSON()
		return json.encode( data )
	end
	
	-- get the class value
	-- note that this may not exist
	-- @return string
	function self.class()
		return data.class
	end
	
	-- this is our instance
	return self
end

function JSONstat.Dimension( init )
	local self = {}
	local data = init
	
	-- constructure signature
	-- @return string
	function self.signature()
		return 'JSONstatDimension'
	end
	
	-- provide a valid table structure
	-- @return table
	function self.toTable()
		return data
	end
	
	-- provide a valid json structure
	-- @return string
	function self.toJSON()
		return json.encode( data )
	end
	
	-- get the class value
	-- note that this may not exist
	-- @return string
	function self.class()
		return data.class
	end
	
	-- get a named entry from the bundle
	-- this will always be a dataset
	-- @param string id of the dataset
	-- @return JsDataset if found, otherwise nil
	function self.get( id )
		return data[id] and JSONstat.Axis( data[id] ) or nil
	end
	
	-- get the time of the update
	-- note that this may not exist
	-- @return string formatted according to ISO8601
	function self.updated()
		return data.updated
	end
	
	-- get the id of all axis'
	-- should always exist
	-- @return table
	function self.id()
		return data.id
	end
	
	-- get the size of all axis'
	-- should always exist
	-- @return table
	function self.size()
		return data.size
	end
	
	-- @todo
	function self.role()
		return data.role
	end
	
	-- evaluate provided function for each id, supplying the axis'
	-- @param function f callback to be evaluated
	function self.each( f )
		for k,v in data.id do
			f( k, v, JSONstat.Axis(data[v]) )
		end
	end

	-- this is our instance
	return self
end

function JSONstat.Dataset( init )
	local self = {}
	local data = init
	
	-- constructure signature
	-- @return string
	function self.signature()
		return 'JSONstatDataset'
	end
	
	-- provide a valid table structure
	-- @return table
	function self.toTable()
		return data
	end
	
	-- provide a valid json structure
	-- @return string
	function self.toJSON()
		return json.encode( data )
	end
	
	-- get the class value
	-- note that this may not exist
	-- @return string
	function self.class()
		return data.class
	end
	
	-- this has no children
	-- this will change type if sparse
	function self.value(...)
		local indexes = {}
		if arg.n == 0 then
			return data.value
		end
		if arg.n == 1 and type( arg[1] ) == 'table' then
			for i,v in ipairs( data.dimension.id ) do
				local str = arg[1][v]
				local num = tonumber(str)
				if false and num then
					indexes[i] = num
				else
					indexes[i] = data.dimension[v] and (data.dimension[v].category.index[str]+1) or nil
				end
			end
		elseif arg.n == table.getn( data.dimension.id ) then
			for i,v in ipairs( arg ) do
				indexes[i] = v
			end
		else
			return nil
		end
		local pos = 0
		local cube = 1
		for i = table.getn( indexes ),1,-1 do
			pos = pos + (indexes[i]-1)*cube
			cube = cube*data.dimension.size[i]
		end
		return data.value[pos+1]
	end
	
	-- this has no children
	-- this can be a table
	function self.status()
		return data.status
	end
	
	-- at this level it should be a string
	function self.label()
		return data.label
	end
	
	-- a language dependant string
	-- note that there are no language marker
	function self.source()
		return data.source
	end
	
	-- get the time of the update
	-- note that this may not exist
	-- @return string formatted according to ISO8601
	function self.updated()
		return data.updated
	end
	
	-- 
	-- @return JsDimension
	function self.dimension()
		return JSONstat.Dimension( data.dimension )
	end
	
	-- evaluate the provided function for each table cell
	-- @todo this only uses the linear index
	-- @param function f the callback to be evaluated for each data value
	-- @return none
	function self.each( f )
		local size = data.dimension.size
		-- @todo should use pairs and a later tonumbers, it is faster and especially note sparse array
		for k,v in ipairs( data.value ) do
			local idx = {}
			local remainder = k
			for k2,v2 in ipair( size ) do
				remainder = remainder % k2
				-- @todo use idx[#idx] = remainder, it is faster
				insert(idx, remainder)
			end
			f( idx, v )
		end
	end

	-- expand for each table cell
	-- @todo this only uses the linear index
	-- @return none
	function self.expand( f )
		local ret = {}
		local size = data.dimension.size
		-- @todo use pairs(), it is faster and especially note sparse array
		for i,v in ipairs( data.value ) do
			local idx = {}
			local remainder = i
			-- @todo this should go opposite way
			-- for each id
			-- { dimension.id.label = dimension.id.category.label, ... add
			for i2,v2 in ipairs( size ) do
				remainder = remainder % v2
				-- @todo use idx[#idx] = remainder, it is faster
				insert(idx, remainder)
			end
			f( idx, v )
		end
	end

	-- reformat a table by slicng through it
	-- @param varargs
	-- @return JsDataset
	function self.slice(...)
	end
	
	-- this is our instance
	return self
end

function JSONstat.Bundle( init )
	local self = {}
	local data = init
	
	-- constructure signature
	-- @return string
	function self.signature()
		return 'JSONstatBundle'
	end
	
	-- provide a valid table structure
	-- @return table
	function self.toTable()
		return data
	end
	
	-- provide a valid json structure
	-- @return string
	function self.toJSON()
		return json.encode( data )
	end
	
	-- get a named entry from the bundle
	-- this will always be a dataset
	-- @param string id of the dataset
	-- @return JsDataset if found, otherwise nil
	function self.get( id )
		return data[id] and JSONstat.Dataset( data[id] ) or nil
	end
	
	-- this is our instance
	return self
end

-- helper to do all the clean typecasts at bundle-level
-- @param table init data structure to recast as instances
-- @return function closures representing instances
function JSONstat.typecast( init )
	local class = init.class
	if not class then
		if init.dataset then
			return JSONstat.Dataset( init.dataset )
		elseif init.dimension then
			return JSONstat.Dimension( init.dimension )
		elseif init.collection then
			return JSONstat.Collection( init.collection )
		end
	elseif class == 'dataset' then
		return JSONstat.Dataset( init )
	elseif class == 'dimension' then
		return JSONstat.Dimension( init )
	elseif class == 'collection' then
		return JSONstat.Collection( init )
	end
	return JSONstat.Bundle( init ) -- not sure if this is correct
end

-- alternate reports to be made
-- this is mostly for test cases
local report = {}
report['dump'] = function( object ) return object and dump( object ) or 'nil' end
report['signature'] = function( object ) return object and object.signature() or 'nil' end
report['label'] = function( object ) return object and object.label() or 'nil' end
report['updated'] = function( object ) return object and object.updated() or 'nil' end
report['dimension-signature'] = function( object ) return object and object.dimension().signature() or 'nil' end

-- replace named args by using the provided structure
local function replaceNamedArgs( layout, struct )
	local function redirect( str )
		local part = struct
		local pth, fmt = unpack( mw.text.split( str, ":", true ) )
		for entry in mw.text.gsplit( pth, ".", true ) do
			part = part[mw.text.trim( entry )]
			if not part then
				return '∅'
			end
		end
		return fmt and string.format( fmt, part ) or part
	end
	local replaced, count = layout:gsub('${%s*([^{}]-)%s*}', redirect )
	return replaced
end

-- helper to do all the dirty parsing
-- @todo make a better description
-- @param vararg
--               - nil is assumed to imply indirection
--               - table is assumed to be a frame
-- @return function closures representing instances
function JSONstat.load(...)
	local anon = arg[1]
	local layout = arg[2]
	local frame
	if type( anon ) == 'table' then
		frame = anon
		anon = frame.args[1]
		layout = frame.args[2]
	end
	local instance
	if type(anon) == 'nil' then
		-- @todo follow wikidata to stat description and load accordingly
	elseif type(anon) == 'string' then
		if string.match(anon, '^%s*{') then
			-- inline definition of data
			local data = mw.text.json.decode( anon )
			if data then
				instance = JSONstat( data )
			end
		elseif string.match(anon, '^%s*[QP]%d+%s*$') then
			-- @todo go to defined page at wikidata to get a stat description
		elseif string.match(anon, '%.json%s*$') then
			-- go to defined page to get a stat description
			local frame = mw.getCurrentFrame()
			if frame then
				local raw = frame:expandTemplate{ title = mw.text.trim( anon ) }
				if raw then
					local data = json.decode( raw )
					if data then
						instance = JSONstat( data )
					end
				end
			end
		end
	elseif type(anon) == 'function' then
		instance = JSONstat( anon() )
	end
	if layout then
		layout = mw.text.trim( layout )
		if layout and report[layout] then
			return report[layout]( instance )
		elseif layout then
			return replaceNamedArgs( layout, instance.toTable() )
		end
	end
	return instance
end

-- export the accesspoint
return JSONstat