Jump to content

Module:Sensitive IP addresses/API

Permanently protected module
From Wikipedia, the free encyclopedia
This is an old revision of this page, as edited by Mr. Stradivarius (talk | contribs) at 13:09, 9 August 2016 (finish the query function, add a couple of helper functions, and make a few bug fixes). The present address (URL) is a permanent link to this revision, which may differ significantly from the current revision.

-- This module provides functions for handling sensitive IP addresses.

-- Load modules
local mIP = require('Module:IP')
local IPAddress = mIP.IPAddress
local Subnet = mIP.Subnet
local IPv4Collection = mIP.IPv4Collection
local IPv6Collection = mIP.IPv6Collection
local libraryUtil = require('libraryUtil')
local checkType = libraryUtil.checkType

-------------------------------------------------------------------------------
-- Helper functions
-------------------------------------------------------------------------------

local function deepCopy(val)
	-- Make a deep copy of a value, but don't worry about self-references or
	-- metatables as mw.clone does. If a table in val has a self-reference,
	-- you will get an infinite loop, so don't do that.
	if type(val) == 'table' then
		local ret = {}
		for k, v in pairs(val) do
			ret[k] = deepCopy(v)
		end
		return ret
	else
		return val
	end
end

local function removeDuplicates(t)
	-- Return a copy of an array with duplicate values removed.
	local keys, ret = {}, {}
	for i, v in ipairs(t) do
		if not keys[v] then
			table.insert(ret, v)
			keys[v] = true
		end
	end
	return ret
end

-------------------------------------------------------------------------------
-- SensitiveEntity class
-- A country or organization for which blocks must be handled with care.
-- Media organizations may inspect block messages for IP addresses and ranges
-- belonging to these entities and those messages may end up in the press.
-------------------------------------------------------------------------------

local SensitiveEntity = {}
SensitiveEntity.__index = SensitiveEntity

SensitiveEntity.reasons = {
	-- The reasons that an entity may be sensitive. Used to verify data in
	-- Module:Sensitive IP addresses/list.
	political = true,
	technical = true,
}

do
	-- Private methods
	local function addRanges(self, key, collectionConstructor, ranges)
		if ranges and ranges[1] then
			self[key] = collectionConstructor()
			for i, range in ipairs(ranges) do
				self[key]:addSubnet(Subnet.new(range))
			end
		end
	end

	-- Constructor
	function SensitiveEntity.new(data)
		local self = setmetatable({}, SensitiveEntity)

		-- Set data
		self.data = data
		addRanges(self, 'v4Collection', IPv4Collection.new, data.ipv4Ranges)
		addRanges(self, 'v6Collection', IPv6Collection.new, data.ipv6Ranges)

		return self
	end
end

function SensitiveEntity:matchesIPOrRange(str)
	-- Returns true, matchObj, queryObj if there is a match for the IP address
	-- string or CIDR range str in the sensitive entity. Returns false
	-- otherwise. matchObj is the Subnet object that was matched, and queryObj
	-- is the IPAddress or Subnet object corresponding to the input string.
	checkType('matchesIPOrRange', 1, str, 'string')

	-- Get the IPAddress or Subnet object for str
	local isIP, isSubnet, obj
	isIP, obj = pcall(IPAddress.new, str)
	if isIP and not obj then
		isIP = false
	end

	if not isIP then
		isSubnet, obj = pcall(Subnet.new, str)
		if not isSubnet or not obj then
			error(string.format(
				"'%s' is not a valid IP address or CIDR string",
				str
			), 2)
		end
	end

	-- Try matching the object to the appropriate collection
	local function isInCollection(collection, obj, isIP)
		if isIP then
			if collection then
				local isMatch, matchObj = collection:containsIP(obj)
				return isMatch, matchObj, obj
			else
				return false
			end
		else
			if collection then
				local isMatch, matchObj = collection:overlapsSubnet(obj)
				return isMatch, matchObj, obj
			else
				return false
			end
		end
	end

	if obj:isIPv4() then
		return isInCollection(self.v4Collection, obj, isIP)
	else
		return isInCollection(self.v6Collection, obj, isIP)
	end
end

-------------------------------------------------------------------------------
-- Sensitive IP API
-------------------------------------------------------------------------------

-- This API is used by external tools and gadgets, so it should be kept
-- backwards-compatible. Clients query the API with a query table, and the
-- API returns a response table. The response table is available as a Lua table
-- for other Lua modules, and as JSON for external clients.

-- Example query tables:
--
-- Query IP addresses and ranges:
-- {
-- 	test = {'1.2.3.4', '4.5.6.0/24', '2001:db8::ff00:12:3456', '2001:db8::ff00:12:0/112'},
-- }
--
-- Query specific entities:
-- {
-- 	entities = {'ussenate', 'ushr'}
-- }
--
-- Query all entities:
-- {
-- 	entities = {'all'}
-- }
--
-- Combined query:
-- {
-- 	test = {'1.2.3.4', '4.5.6.0/24', '2001:db8::ff00:12:3456', '2001:db8::ff00:12:0/112'},
-- 	entities = {'ussenate', 'ushr'}
-- }

-- Example response:
--
-- {
--     sensitiveips = {
--         matches = {
--             {
--                 ip = '1.2.3.4',
--                 type = 'ip',
--                 ['ip-version'] = 'IPv4',
--                 ['matches-range'] = '1.2.3.0/24',
--                 ['entity-id'] = 'entityid'
--             },
--             {
--                 range = '4.5.6.0/24',
--                 type = 'range',
--                 ['ip-version'] = 'IPv4',
--                 ['matches-range'] = '4.5.0.0/16',
--                 ['entity-id'] = 'entityid'
--             }
--         },
--         ['matched-ranges'] = {
--             ['1.2.3.0/24'] = {
--                 range = '1.2.3.0/24',
--                 ['ip-version'] = 'IPv4',
--                 ['entity-id'] = 'entityid'
--             },
--             ['4.5.0.0/16'] = {
--                 range = '4.5.0.0/16',
--                 ['ip-version'] = 'IPv4',
--                 ['entity-id'] = 'entityid'
--             }
--         },
--         entities = {
--             ['entityid'] = {
--                 id = 'entityid',
--                 name = 'The entity name',
--                 description = 'A description of the entity',
--                 ['ipv4-ranges'] = {
--                     '1.2.3.0/24',
--                     '4.5.0.0/16'
--                     '6.7.0.0/16'
--                 },
--                 ['ipv6-ranges'] = {
--                     '2001:db8::ff00:12:0/112'
--                 },
--                 notes = 'Notes about the entity or its ranges'
--             }
--         }
--         ['entity-ids'] = {
--             'entityid'
--         }
--     }
-- }
--
-- Response with errors:
--
-- {
--     error = {
--         code = 'example-error',
--         info = 'There was an error',
--         ['*'] = 'See https://en.wikipedia.org/wiki/Module:Sensitive_IP_addresses for API usage'
--     }
-- }

local function query(options)
	-- Make entity objects
	local entities, entityIndexes = {}, {}
	local data = mw.loadData('Module:Sensitive IP addresses/list')
	for i, entityData in ipairs(data) do
		entities[entityData.id] = SensitiveEntity.new(entityData)
		entityIndexes[entityData.id] = i -- Keep track of the original order
	end

	local function makeError(code, info)
		return {
			code = code,
			info = info,
			['*'] = 'See https://en.wikipedia.org/wiki/Module:Sensitive_IP_addresses for API usage',
		}
	end

	-- Construct result
	local result = {}

	if type(options) ~= 'table' then
		return makeError(
			'sipa-options-type-error',
			string.format(
				"type error in argument #1 of 'query' (expected table, received %s)",
				type(options)
			)
		)
	elseif not options.test and not options.entities then
		return makeError(
			'sipa-blank-options',
			"the options table didn't contain a 'test' or an 'entities' key"
		)
	end

	if options.test then
		if type(options.test) ~= 'table' then
			return makeError(
				'sipa-test-type-error',
				string.format(
					"'test' options key was type %s (expected table)",
					type(options.test)
				)
			)
		end

		-- Insert the result subtables. We do this now rather than when we
		-- create the results table, as there shouldn't be a matches subtable
		-- if the user didn't specify any strings to match.
		result.matches = {}
		result['matched-ranges'] = {}
		result.entities = {}
		result['entity-ids'] = {}

		for i, testString in ipairs(options.test) do
			if type(testString) ~= 'string' then
				return makeError(
					'sipa-test-string-type-error',
					string.format(
						"type error in item #%d in the 'test' array (expected string, received %s)",
						i,
						type(testString)
					)
				)
			end

			for k, entity in pairs(entities) do
				-- Try to match the range with the current sensitive entity.
				local success, isMatch, matchObj, queryObj = pcall(
					entity.matchesIPOrRange,
					entity,
					testString
				)
				if not success then
					-- The string was invalid.
					return makeError(
						'sipa-invalid-test-string',
						string.format(
							"test string #%d '%s' was not a valid IP address or CIDR string",
							i,
							testString
						)
					)
				end
				if isMatch then
					-- The string was a sensitive IP address or subnet.
					local match = {}
					-- Quick and dirty hack to find if queryObj is an IPAddress object.
					local isIP = queryObj.getNextIP ~= nil and queryObj.isInSubnet ~= nil

					-- Add the match to the match subtable.
					if isIP then
						match.type = 'ip'
						match.ip = tostring(queryObj)
					else
						match.type = 'range'
						match.range = tostring(queryObj)
					end
					match['ip-version'] = queryObj:getVersion()
					match['matches-range'] = matchObj:getCIDR()
					match['entity-id'] = entity.data.id
					table.insert(result.matches, match)

					-- Add the matched range data.
					result['matched-ranges'][match['matches-range']] = {
						range = match['matches-range'],
						['ip-version'] = match['ip-version'],
						['entity-id'] = match['entity-id'],
					}

					-- Add the entity data for the entity we matched.
					result.entities[match['entity-id']] = deepCopy(
						entities[match['entity-id']].data
					)

					-- Add the entity ID for the entity we matched.
					table.insert(result['entity-ids'], match['entity-id'])
				end
			end
		end
	end

	-- Add entity data requested explicitly.
	if options.entities then
		if type(options.entities) ~= 'table' then
			return makeError(
				'sipa-entities-type-error',
				string.format(
					"'entities' options key was type %s (expected table)",
					type(options.test)
				)
			)
		end

		-- Check the type of all the entity strings, and check if 'all' has
		-- been specified.
		local isAll = false
		for i, entityString in ipairs(options.entities) do
			if type(entityString) ~= 'string' then
				return makeError(
					'sipa-entity-string-type-error',
					string.format(
						"type error in item #%d in the 'entities' array (expected string, received %s)",
						i,
						type(entityString)
					)
				)
			end
			if entityString == 'all' then
				isAll = true
			end
		end

		if isAll then
			-- Add all the entity data.
			-- As the final result will contain all the entity data, we can
			-- just create the entities and entity-ids subtables from scratch
			-- without worrying about what any existing values might be.
			result.entities = {}
			result['entity-ids'] = {}
			for i, entityData in ipairs(data) do
				result.entities[entityData.id] = deepCopy(entityData)
				result['entity-ids'][i] = entityData.id
			end
		else
			-- Add data for the entities specified.
			-- Insert the entity and entity-id subtables if they aren't already
			-- present.
			result.entities = result.entities or {}
			result['entity-ids'] = result['entity-ids'] or {}
			for i, entityString in ipairs(options.entities) do
				if entities[entityString] then
					result.entities[entityString] = deepCopy(
						entities[entityString].data
					)
					table.insert(result['entity-ids'], entityString)
				end
			end
			result['entity-ids'] = removeDuplicates(result['entity-ids'])
			table.sort(result['entity-ids'], function(s1, s2)
				return entityIndexes[s1] < entityIndexes[s2]
			end)
		end
	end

	return result
end

--------------------------------------------------------------------------------
-- Q&D demo of loading data from [[Module:Sensitive IP addresses/list]]
-- into a structure that could be used to determine whether a particular
-- IP or subnet overlaps a sensitive range.
-- If used, this would be greatly refactored and possibly split to
-- [[Module:Sensitive IP addresses/data]].
--
-- Usage in a sandbox:
-- {{#invoke:Sensitive IP addresses|main}}

local function main()
	-- Test Module:IP.
	----------------------------------------------------------------------------
	-- An IP collection in Module:IP should hold both IPv4 and IPv6 lists and
	-- it would use the appropriate list depending on the object queried?
	-- That would make this code more straight forward.
	----------------------------------------------------------------------------
	-- Support stuff
	----------------------------------------------------------------------------
	local modcode = require('Module:IP')
	local IPAddress = modcode.IPAddress
	local Subnet = modcode.Subnet
	local IPv4Collection = modcode.IPv4Collection
	local IPv6Collection = modcode.IPv6Collection
	local Collection = {}
	Collection.__index = Collection
	do
		function Collection:add(item)
			if item ~= nil then
				self.n = self.n + 1
				self[self.n] = item
			end
		end
		function Collection:join(sep)
			return table.concat(self, sep)
		end
		function Collection:sort(comp)
			table.sort(self, comp)
		end
		function Collection.new()
			return setmetatable({n = 0}, Collection)
		end
	end
	local function getObject(ipStr)
		-- Parse a string and return an appropriate object:
		--   IPv4 or IPv6 IP or subnet, or nil.
		-- TODO This should be in Module:IP (see IPCollection:_store).
		local maker
		if ipStr:find('/', 1, true) then
			maker = Subnet.new
		else
			maker = IPAddress.new
		end
		local success, obj = pcall(maker, ipStr)
		if success then
			return obj
		end
		return nil
	end
	local function preBlock(text)
		-- Pre tags returned by a module do not act like wikitext <pre>...</pre>.
		return '<pre>\n' ..
			mw.text.nowiki(text) ..
			(text:sub(-1) == '\n' and '' or '\n') ..
			'</pre>\n'
	end
	----------------------------------------------------------------------------
	-- Load sensitive IP information
	----------------------------------------------------------------------------
	local function loadList(modname)
		-- Return a table to query an IP/subnet wrt sensitive ranges.
		local data = {
			subnetToInfo = {},
			v4Collection = IPv4Collection.new(),
			v6Collection = IPv6Collection.new(),
		}
		local sensitiveList = mw.loadData(modname)
		for i, info in ipairs(sensitiveList) do
			for _, r in ipairs({
				{key = 'ipv4Ranges', list = data.v4Collection},
				{key = 'ipv6Ranges', list = data.v6Collection},
			}) do
				local rangeStrings = info[r.key]
				if rangeStrings then
					for _, str in ipairs(rangeStrings) do
						local subnet = Subnet.new(str)
						r.list:addSubnet(subnet)
						data.subnetToInfo[subnet] = info
					end
				end
			end
		end
		return data
	end
	----------------------------------------------------------------------------
	-- Run test using Module:IP
	----------------------------------------------------------------------------
	local data = loadList('Module:Sensitive IP addresses/list')
	local results = Collection.new()
	results:add('IP ranges equivalent to collection')
	for _, col in ipairs({data.v4Collection, data.v6Collection}) do
		for _, range in ipairs(col:getRanges()) do
			if range[1] == range[2] then
				results:add('  ' .. range[1])
			else
				results:add('  ' .. range[1] .. ' – ' .. range[2])
			end
		end
	end
	for _, ipStr in ipairs({
		-- Each of the following is tested against the sensitive list.
		'143.228.19.123',
		'2620:0:E21:9F2::',
		'131.132.224.0/19',
		'198.35.27.255',
		'2620:0:860::1',
		'1.2.3.4',
		'11.12.13.192/26',
		'2001:db8::abcd',
		'2001:db8::/72',
	}) do
		local obj = getObject(ipStr)
		if obj then
			local isPresent, clashObj
			local col = obj:getVersion() == 'IPv4' and
				data.v4Collection or data.v6Collection
			if obj.getNextIP then  -- dirty trick to check if obj is an IP
				isPresent, clashObj = col:containsIP(obj)
			else
				isPresent, clashObj = col:overlapsSubnet(obj)
			end
			results:add('')
			results:add('IP or range under test: ' .. ipStr)
			if isPresent then
				local info = data.subnetToInfo[clashObj]
				if info then
					results:add('  sensitive: ' .. clashObj)
					results:add('  name: ' .. (info.name or '?'))
					results:add('  id: ' .. (info.id or '?'))
					results:add('  description: ' .. (info.description or '?'))
					results:add('  reason: ' .. (info.reason or '?'))
				else
					-- Should not occur!
					results:add('  info not found!')
				end
			else
				results:add('  not sensitive')
			end
		else
			-- Report problem?
		end
	end
	return preBlock(results:join('\n'))
end

--------------------------------------------------------------------------------
-- Exports
--------------------------------------------------------------------------------

local p = {}
p.main = main

function p.isValidSensitivityReason(s)
	-- Return true if s is a valid sensitivity reason; otherwise return false.
	checkType('isValidSensitivityReason', 1, s, 'string')
	return SensitiveEntity.reasons[s] ~= nil
end

function p.getSensitivityReasons()
	-- Return an array of valid sensitivity reasons, ordered alphabetically.
	local ret = {}
	for reason in pairs(SensitiveEntity.reasons) do
		ret[#ret + 1] = reason
	end
	table.sort(ret)
	return ret
end

function p.query()
end

return p