Modul:URLutil
Erscheinungsbild
Vorlagen- programmierung |
Diskussionen | Lua | Test | Unterseiten | ||||||
Modul | Deutsch | English
|
Esperanto | Dolnoserbski | Hornjoserbsce | Modul: | WP:Lua |
Diese Seite enthält Code in der Programmiersprache Lua. Einbindungszahl Cirrus
Dies ist die (produktive) Mutterversion eines global benutzten Lua-Moduls.
Wenn die serial-Information nicht übereinstimmt, müsste eine Kopie hiervon in das lokale Wiki geschrieben werden.
Wenn die serial-Information nicht übereinstimmt, müsste eine Kopie hiervon in das lokale Wiki geschrieben werden.
Versionsbezeichnung auf WikiData:
2024-10-29
--[=[ URLutil 2013-06-14
Utilities for URL etc. on www.
* getAuthority()
* getHost()
* getPort()
* getScheme()
* getTLD()
* getTop2domain()
* isAuthority()
* isDomain()
* isHost()
* isIP()
* isIPv4()
* isIPv6()
* isMailAddress()
* isMailLink()
* isProtocolDialog
* isProtocolWiki
* isResourceURL()
* isSuspiciousURL()
* isUnescapedURL()
* isWebURL()
* wikiEscapeURL()
Only [[dotted decimal]] notation for IPv4 supported.
Does not support dotted hexadecimal, dotted octal, or single-number formats.
IPv6 URL (bracketed) not yet implemented; might need Wikintax escaping anyway.
]=]
-- table for export
local URLutil = {}
URLutil.getAuthority = function ( url )
if type( url ) == "string" then
local host, colon, port = mw.ustring.match( url .. "/", "^%s*%w*:?//([%w%.%%-]+)(:?)([%d]*)/" )
if URLutil.isHost( host ) then
host = mw.ustring.lower( host )
if colon == ":" then
if port:find( "^[1-9]" ) then
return ( host .. ":" .. port )
end
elseif #port == 0 then
return host
end
end
end
return false
end -- URLutil.getAuthority()
URLutil.getHost = function ( url )
local auth = URLutil.getAuthority( url )
if auth then
return mw.ustring.match( auth, "^([%w%.%%-]+):?[%d]*$" )
end
return false
end -- URLutil.getHost()
URLutil.getPort = function ( url )
url = URLutil.getAuthority( url )
if url then
url = url:match( ":([1-9][0-9]*)$" )
if type( url ) == "string" then
return tonumber( url )
end
end
return false
end -- URLutil.getPort()
URLutil.getScheme = function ( url )
if type( url ) == "string" then
local prot, colon, slashes = url:match( "^%s*([a-zA-Z]*)(:?)(//)" )
if slashes == "//" then
if colon == ":" then
if #prot > 2 then
return prot:lower() .. "://"
end
elseif #prot == 0 then
return "//"
end
end
end
return false
end -- URLutil.getScheme()
URLutil.getTLD = function ( url )
local host = URLutil.getHost( url )
if host then
host = mw.ustring.match( host, "[%w]+%.([a-z][a-z]+)$" )
return host or false
end
return false
end -- URLutil.getTLD()
URLutil.getTop2domain = function ( url )
local host = URLutil.getHost( url )
if host then
host = mw.ustring.match( "." .. host,
"(%.[%w%%]+%.[a-z][a-z]+)$" )
if host then
return mw.ustring.sub( host, 2 )
end
end
return false
end -- URLutil.getTop2domain()
URLutil.isAuthority = function ( s )
if type( s ) == "string" then
local host, colon, port = mw.ustring.match( s, "^%s*([%w%.%%-]+)(:?)(%d*)%s*$" )
if colon == ":" then
port = port:match( "^[1-9][0-9]*$" )
if type( port ) ~= "string" then
return false
end
elseif port ~= "" then
return false
end
return URLutil.isHost( host )
end
return false
end -- URLutil.isAuthority()
URLutil.isDomain = function ( s )
if type( s ) == "string" then
s = mw.ustring.match( s, "^%s*([%w%.%%-]+%w)%.[a-zA-Z][a-zA-Z]+%s*$" )
if type( s ) == "string" then
if mw.ustring.find( s, "^%w" ) then
if mw.ustring.find( s, "..", 1, true ) then
return false
else
return true
end
end
end
end
return false
end -- URLutil.isDomain()
URLutil.isHost = function ( s )
return URLutil.isDomain( s ) or URLutil.isIP( s )
end -- URLutil.isHost()
URLutil.isIP = function ( s )
return URLutil.isIPv4( s ) and 4 or URLutil.isIPv6( s ) and 6
end -- URLutil.isIP()
URLutil.isIPv4 = function ( s )
local function legal( n )
return ( tonumber( n ) < 256 )
end
if type( s ) == "string" then
local p1, p2, p3, p4 = s:match( "^%s*([1-9][0-9]?[0-9]?)%.([12]?[0-9]?[0-9])%.([12]?[0-9]?[0-9])%.([12]?[0-9]?[0-9])%s*$" )
if p1 and p2 and p3 and p4 then
return legal( p1 ) and legal( p2 ) and legal( p3 ) and legal( p4 )
end
end
return false
end -- URLutil.isIPv4()
URLutil.isIPv6 = function ( s )
local dcolon, groups
if type( s ) ~= "string"
or s:len() == 0
or s:find( "[^:%x]" ) -- only colon and hex digits are legal chars
or s:find( "^:[^:]" ) -- can begin or end with :: but not with single :
or s:find( "[^:]:$" )
or s:find( ":::" )
then
return false
end
s = mw.text.trim( s )
s, dcolon = s:gsub( "::", ":" )
if dcolon > 1 then
return false
end -- at most one ::
s = s:gsub( "^:?", ":" ) -- prepend : if needed, upper
s, groups = s:gsub( ":%x%x?%x?%x?", "" ) -- remove valid groups, and count them
return ( ( dcolon == 1 and groups < 8 ) or
( dcolon == 0 and groups == 8 ) )
and ( s:len() == 0 or ( dcolon == 1 and s == ":" ) ) -- might be one dangling : if original ended with ::
end -- URLutil.isIPv6()
URLutil.isMailAddress = function ( s )
if type( s ) == "string" then
s = mw.ustring.match( s, "^%s*[%w%.%%_-]+@([%w%.%%-]+)%s*$" )
return URLutil.isDomain( s )
end
return false
end -- URLutil.isMailAddress()
URLutil.isMailLink = function ( s )
if type( s ) == "string" then
local addr
s, addr = mw.ustring.match( s, "^%s*([Mm][Aa][Ii][Ll][Tt][Oo]):(%S[%w%.%%_-]*@[%w%.%%-]+)%s*$" )
if type( s ) == "string" then
if s:lower() == "mailto" then
return URLutil.isMailAddress( addr )
end
end
end
return false
end -- URLutil.isMailLink()
local function isProtocolAccepted( prot, supplied )
if type( prot ) == "string" then
local scheme, colon, slashes = mw.ustring.match( prot, "^%s*([a-zA-Z]*)(:?)(/?/?)%s*$" )
if slashes ~= "/" then
if scheme == "" then
if colon ~= ":" and slashes == "//" then
return true
end
elseif colon == ":" or slashes == "" then
local s = supplied:match( " " .. scheme:lower() .. " " )
if type( s ) == "string" then
return true
end
end
end
end
return false
end -- isProtocolAccepted()
URLutil.isProtocolDialog = function ( prot )
return isProtocolAccepted( prot, " mailto irc ircs ssh telnet " )
end -- URLutil.isProtocolDialog()
URLutil.isProtocolWiki = function ( prot )
return isProtocolAccepted( prot,
" ftp ftps git http https nntp sftp svn worldwind " )
end -- URLutil.isProtocolWiki()
URLutil.isResourceURL = function ( url )
local scheme = URLutil.getScheme( url )
if scheme then
local s = " // http:// https:// ftp:// "
s = s:find( " " .. scheme .. " " )
if s then
if URLutil.getAuthority( url ) then
if not url:match( "%S%s+%S" ) then
return true
end
end
end
end
return false
end -- URLutil.isResourceURL()
URLutil.isSuspiciousURL = function ( url )
if URLutil.isResourceURL( url ) then
local s = URLutil.getAuthority( url )
local pat = "[%[|%]" ..
mw.ustring.char( 8201, 45, 8207, 8234, 45, 8239, 8288 )
.. "]"
if s:find( "@" )
or url:find( "''" )
or url:find( pat )
or url:find( "[%.,]$" ) then
return true
end
-- TODO zero width character ??
return false
end
return true
end -- URLutil.isSuspiciousURL()
URLutil.isUnescapedURL = function ( url, trailing )
if type( trailing ) ~= "string" then
if URLutil.isWebURL( url ) then
if url:match( "[%[|%]]" ) then
return true
end
end
end
return false
end -- URLutil.isUnescapedURL()
URLutil.isWebURL = function ( url )
if URLutil.getScheme( url ) and URLutil.getAuthority( url ) then
if not url:match( "%S%s+%S" ) then
return true
end
end
return false
end -- URLutil.isWebURL()
URLutil.wikiEscapeURL = function ( url )
if url:find( "[%[|%]]" ) then
local n
url, n = url:gsub( "%[", "[" )
:gsub( "|", "|" )
:gsub( "%]", "]" )
end
return url
end -- URLutil.wikiEscapeURL()
-- Provide template access and expose URLutil table to require
local p = {}
function p.getAuthority( frame )
return URLutil.getAuthority( frame.args[ 1 ] ) or ""
end
function p.getHost( frame )
return URLutil.getHost( frame.args[ 1 ] ) or ""
end
function p.getPort( frame )
return URLutil.getPort( frame.args[ 1 ] ) or ""
end
function p.getScheme( frame )
return URLutil.getScheme( frame.args[ 1 ] ) or ""
end
function p.getTLD( frame )
return URLutil.getTLD( frame.args[ 1 ] ) or ""
end
function p.getTop2domain( frame )
return URLutil.getTop2domain( frame.args[ 1 ] ) or ""
end
function p.isAuthority( frame )
return URLutil.isAuthority( frame.args[ 1 ] ) and "1" or ""
end
function p.isDomain( frame )
return URLutil.isDomain( frame.args[ 1 ] ) and "1" or ""
end
function p.isHost( frame )
return URLutil.isHost( frame.args[ 1 ] ) and "1" or ""
end
function p.isIP( frame )
return URLutil.isIP( frame.args[ 1 ] ) or ""
end
function p.isIPv4( frame )
return URLutil.isIPv4( frame.args[ 1 ] ) and "1" or ""
end
function p.isIPv6( frame )
return URLutil.isIPv6( frame.args[ 1 ] ) and "1" or ""
end
function p.isMailAddress( frame )
return URLutil.isMailAddress( frame.args[ 1 ] ) and "1" or ""
end
function p.isMailLink( frame )
return URLutil.isMailLink( frame.args[ 1 ] ) and "1" or ""
end
function p.isProtocolDialog( frame )
return URLutil.isProtocolDialog( frame.args[ 1 ] ) and "1" or ""
end
function p.isProtocolWiki( frame )
return URLutil.isProtocolWiki( frame.args[ 1 ] ) and "1" or ""
end
function p.isResourceURL( frame )
return URLutil.isResourceURL( frame.args[ 1 ] ) and "1" or ""
end
function p.isSuspiciousURL( frame )
return URLutil.isSuspiciousURL( frame.args[ 1 ] ) and "1" or ""
end
function p.isUnescapedURL( frame )
return URLutil.isUnescapedURL( frame.args[ 1 ], frame.args[ 2 ] ) and "1" or ""
end
function p.isWebURL( frame )
return URLutil.isWebURL( frame.args[ 1 ] ) and "1" or ""
end
function p.wikiEscapeURL( frame )
return URLutil.wikiEscapeURL( frame.args[ 1 ] )
end
function p.URLutil()
return URLutil
end
return p