„Modul:Text“ – Versionsunterschied
Erscheinungsbild
[gesichtete Version] | [gesichtete Version] |
Inhalt gelöscht Inhalt hinzugefügt
2019-11-12 |
2024-06-05 |
||
Zeile 1: | Zeile 1: | ||
local Text = { serial = " |
local Text = { serial = "2024-06-05", |
||
suite = "Text", |
suite = "Text", |
||
item = 29387871 } |
item = 29387871 } |
||
Zeile 5: | Zeile 5: | ||
Text utilities |
Text utilities |
||
]=] |
]=] |
||
local Failsafe = Text |
|||
local GlobalMod = Text |
|||
-- local globals |
-- local globals |
||
local |
local Failsafe = Text |
||
local |
local GlobalMod = Text |
||
local |
local Patterns = { } |
||
local |
local RangesLatin = false |
||
local |
local SeekQuote = false |
||
local SeekQuote = false |
|||
Zeile 107: | Zeile 105: | ||
-- advance -- number, with level 1 or 2 |
-- advance -- number, with level 1 or 2 |
||
local r = apply |
local r = apply |
||
local suite |
local quotes, suite |
||
factoryQuote() |
factoryQuote() |
||
if alien then |
|||
suite = mw.text.trim( alien ) |
|||
if suite == "" then |
|||
suite = false |
|||
else |
|||
suite = Text.quoteLang[ slang ] |
|||
local s = Text.quoteLang[ suite ] |
|||
end |
|||
if s then |
|||
suite = |
suite = s |
||
end |
|||
end |
|||
if suite then |
|||
local quotes = Text.quoteType[ suite ] |
|||
if quotes then |
|||
local space |
|||
if quotes[ 3 ] then |
|||
space = " " |
|||
else |
else |
||
local slang = suite:match( "^(%l+)-" ) |
|||
if slang then |
|||
suite = Text.quoteLang[ slang ] |
|||
end |
|||
r = mw.ustring.format( "%s%s%s%s%s", |
|||
mw.ustring.char( quotes[ 1 ] ), |
|||
space, |
|||
apply, |
|||
space, |
|||
mw.ustring.char( quotes[ 2 ] ) ) |
|||
end |
end |
||
end |
|||
end |
|||
if not suite then |
|||
suite = Text.quoteLang.en |
|||
end |
|||
quotes = Text.quoteType[ suite ] |
|||
if quotes then |
|||
local space |
|||
if quotes[ 3 ] then |
|||
space = " " |
|||
else |
else |
||
space = "" |
|||
end |
end |
||
quotes = quotes[ advance ] |
|||
if quotes then |
|||
r = mw.ustring.format( "%s%s%s%s%s", |
|||
mw.ustring.char( quotes[ 1 ] ), |
|||
space, |
|||
apply, |
|||
space, |
|||
mw.ustring.char( quotes[ 2 ] ) ) |
|||
end |
|||
else |
|||
mw.log( "fiatQuote() " .. suite ) |
|||
end |
end |
||
return r |
return r |
||
Zeile 236: | Zeile 241: | ||
-- Returns: true, if CJK detected |
-- Returns: true, if CJK detected |
||
local r |
local r |
||
if not |
if not Patterns.CJK then |
||
Patterns.CJK = mw.ustring.char( 91, |
|||
0x3400, 45, 0x9FFF, |
|||
0x20000, 45, 0x2B81F, |
|||
93 ) |
93 ) |
||
end |
end |
||
if mw.ustring.find( analyse, |
if mw.ustring.find( analyse, Patterns.CJK ) then |
||
r = true |
r = true |
||
else |
else |
||
Zeile 285: | Zeile 290: | ||
local r |
local r |
||
if not RangesLatin then |
if not RangesLatin then |
||
RangesLatin = { { |
RangesLatin = { { 0x07, 0x02AF }, |
||
{ |
{ 0x1D6B, 0x1D9A }, |
||
{ |
{ 0x1E00, 0x1EFF }, |
||
{ |
{ 0x2002, 0x203A }, |
||
{ 0x2190, 0x23BD } } |
|||
end |
end |
||
if not |
if not Patterns.Latin then |
||
local range |
local range |
||
Patterns.Latin = "^[" |
|||
for i = 1, #RangesLatin do |
for i = 1, #RangesLatin do |
||
range = RangesLatin[ i ] |
range = RangesLatin[ i ] |
||
Patterns.Latin = Patterns.Latin .. |
|||
mw.ustring.char( range[ 1 ], 45, range[ 2 ] ) |
mw.ustring.char( range[ 1 ], 45, range[ 2 ] ) |
||
end -- for i |
end -- for i |
||
Patterns.Latin = Patterns.Latin .. "]*$" |
|||
end |
end |
||
if adjust then |
if adjust then |
||
if mw.ustring.match( adjust, |
if mw.ustring.match( adjust, Patterns.Latin ) then |
||
r = true |
r = true |
||
else |
else |
||
Zeile 382: | Zeile 388: | ||
slang = mw.text.trim( alien ):lower() |
slang = mw.text.trim( alien ):lower() |
||
else |
else |
||
local pageLang = mw.title.getCurrentTitle().pageLanguage |
|||
if |
if pageLang then |
||
slang = pageLang.code |
|||
else |
|||
slang = mw.language.getContentLanguage():getCode() |
slang = mw.language.getContentLanguage():getCode() |
||
end |
end |
||
Zeile 425: | Zeile 432: | ||
-- or basic greek or cyrillic or symbols etc. |
-- or basic greek or cyrillic or symbols etc. |
||
local cleanup, decomposed |
local cleanup, decomposed |
||
if not |
if not Patterns.Combined then |
||
Patterns.Combined = mw.ustring.char( 91, |
|||
0x0300, 45, 0x036F, |
0x0300, 45, 0x036F, |
||
0x1AB0, 45, 0x1AFF, |
0x1AB0, 45, 0x1AFF, |
||
0x1DC0, 45, 0x1DFF, |
0x1DC0, 45, 0x1DFF, |
||
0xFE20, 45, 0xFE2F, |
0xFE20, 45, 0xFE2F, |
||
93 ) |
93 ) |
||
end |
end |
||
decomposed = mw.ustring.toNFD( adjust ) |
decomposed = mw.ustring.toNFD( adjust ) |
||
cleanup = mw.ustring.gsub( decomposed, |
cleanup = mw.ustring.gsub( decomposed, Patterns.Combined, "" ) |
||
return mw.ustring.toNFC( cleanup ) |
return mw.ustring.toNFC( cleanup ) |
||
end -- Text.removeDiacritics() |
end -- Text.removeDiacritics() |
||
Text.removeWhitespace = function ( adjust ) |
|||
-- Remove all whitespace, or replace with ASCII space |
|||
-- Parameter: |
|||
-- adjust -- string |
|||
-- Returns: string; modified |
|||
local r = mw.text.decode( adjust ) |
|||
if r:find( "&", 1, true ) then |
|||
r = r:gsub( "‎", "" ) |
|||
:gsub( "‏", "" ) |
|||
:gsub( "‍", "" ) |
|||
:gsub( "‌", "" ) |
|||
:gsub( " ", " " ) |
|||
:gsub( " ", " " ) |
|||
:gsub( " ", " " ) |
|||
end |
|||
if not Patterns.Whitespace then |
|||
Patterns.Whitespace = mw.ustring.char( 0x00AD, |
|||
91, 0x200C, 45, 0x200F, 93, |
|||
91, 0x2028, 45, 0x202E, 93, |
|||
0x205F, |
|||
0x2060 ) |
|||
Patterns.Space = mw.ustring.char( 0x00A0, |
|||
0x1680, |
|||
91, 0x2000, 45, 0x200A, 93, |
|||
0x202F, |
|||
0x205F, |
|||
0x3000, |
|||
0x303F ) |
|||
end |
|||
r = mw.ustring.gsub( r, Patterns.Whitespace, "" ) |
|||
r = mw.ustring.gsub( r, Patterns.Space, " " ) |
|||
return mw.text.trim( r ) |
|||
end -- Text.removeWhitespace() |
|||
Zeile 446: | Zeile 489: | ||
-- analyse -- string |
-- analyse -- string |
||
-- Returns: true, if sentence terminated |
-- Returns: true, if sentence terminated |
||
local r = mw.text.trim( analyse ) |
local r = mw.text.trim( analyse ) |
||
local lt = r:find( "<", 1, true ) |
|||
if not PatternTerminated then |
|||
if not Patterns.Terminated then |
|||
PatternTerminated = mw.ustring.char( 91, |
|||
Patterns.Terminated = mw.ustring.char( 91, |
|||
12290, |
|||
0x3002, |
|||
0xFF01, |
|||
0xFF0E, |
|||
0xFF1F ) |
|||
.. "!%.%?…][\"'%]‹›«»‘’“”]*$" |
|||
end |
end |
||
if |
if lt then |
||
r = r:gsub( "</span>", "" ) |
|||
end |
|||
if mw.ustring.find( r, Patterns.Terminated ) then |
|||
r = true |
r = true |
||
elseif lt then |
|||
local s = "<bdi[^>]* dir=\"([lr]t[rl])\".+</bdi></bdo>" |
|||
s = r:match( s ) |
|||
if s then |
|||
if mw.language.getContentLanguage():isRTL() then |
|||
r = ( s == "ltr" ) |
|||
else |
|||
r = ( s == "rtl" ) |
|||
end |
|||
else |
|||
r = false |
|||
end |
|||
else |
else |
||
r = false |
r = false |
||
Zeile 462: | Zeile 521: | ||
return r |
return r |
||
end -- Text.sentenceTerminated() |
end -- Text.sentenceTerminated() |
||
Text.tokenWords = function ( adjust ) |
|||
-- Split text in words of digits or letters |
|||
-- Precondition: |
|||
-- adjust -- string |
|||
-- Returns: string with |
|||
local r = mw.uri.decode( adjust, "WIKI" ) |
|||
if r:find( "&", 1, true ) then |
|||
r = mw.text.decode( r ) |
|||
end |
|||
r = Text.removeWhitespace( r ) |
|||
r = mw.ustring.gsub( r, "[%p%s]+", " " ) |
|||
return r |
|||
end -- Text.tokenWords() |
|||
Zeile 523: | Zeile 598: | ||
local r |
local r |
||
Text.isLatinRange() |
Text.isLatinRange() |
||
if mw.ustring.match( adjust, |
if mw.ustring.match( adjust, Patterns.Latin ) then |
||
-- latin only, horizontal dashes, quotes |
-- latin only, horizontal dashes, quotes |
||
r = adjust |
r = adjust |
||
else |
else |
||
local c |
local c |
||
local |
local e = mw.html.create( "span" ) |
||
local |
local j = false |
||
local |
local k = 1 |
||
local |
local m = false |
||
local n = mw.ustring.len( adjust ) |
|||
local p |
|||
local span = "%s%s<span dir='auto' style='font-style:normal'>%s</span>" |
|||
local flat = function ( a ) |
local flat = function ( a ) |
||
-- isLatin |
-- isLatin |
||
Zeile 554: | Zeile 630: | ||
end -- focus() |
end -- focus() |
||
local form = function ( a ) |
local form = function ( a ) |
||
return string.format( |
return string.format( p, |
||
r, |
r, |
||
mw.ustring.sub( adjust, k, j - 1 ), |
mw.ustring.sub( adjust, k, j - 1 ), |
||
mw.ustring.sub( adjust, j, a ) ) |
mw.ustring.sub( adjust, j, a ) ) |
||
end -- form() |
end -- form() |
||
e:attr( "dir", "auto" ) |
|||
:css( "font-style", "normal" ) |
|||
:wikitext( "%s" ) |
|||
p = "%s%s" .. tostring( e ) |
|||
r = "" |
r = "" |
||
for i = 1, n do |
for i = 1, n do |
||
Zeile 616: | Zeile 696: | ||
-- Retrieve versioning and check for compliance |
-- Retrieve versioning and check for compliance |
||
-- Precondition: |
-- Precondition: |
||
-- atleast -- string, with required version |
-- atleast -- string, with required version |
||
-- or false |
-- or wikidata|item|~|@ or false |
||
-- Postcondition: |
-- Postcondition: |
||
-- Returns string -- with queried version, also if problem |
-- Returns string -- with queried version/item, also if problem |
||
-- false -- if appropriate |
-- false -- if appropriate |
||
-- |
-- 2024-03-01 |
||
local |
local since = atleast |
||
local since = |
local last = ( since == "~" ) |
||
local linked = ( since == "@" ) |
|||
local link = ( since == "item" ) |
|||
local r |
local r |
||
if last or since == "wikidata" then |
if last or link or linked or since == "wikidata" then |
||
local item = Failsafe.item |
local item = Failsafe.item |
||
since = false |
since = false |
||
if type( item ) == "number" and item > 0 then |
if type( item ) == "number" and item > 0 then |
||
local |
local suited = string.format( "Q%d", item ) |
||
if link then |
|||
item ) ) |
|||
r = suited |
|||
else |
|||
local seek = Failsafe.serialProperty or "P348" |
|||
local |
local entity = mw.wikibase.getEntity( suited ) |
||
if type( |
if type( entity ) == "table" then |
||
local seek = Failsafe.serialProperty or "P348" |
|||
vsn |
local vsn = entity:formatPropertyValues( seek ) |
||
if |
if type( vsn ) == "table" and |
||
type( vsn.value ) == "string" and |
|||
vsn.value ~= "" then |
|||
if last and vsn.value == Failsafe.serial then |
|||
r = false |
|||
elseif linked then |
|||
if mw.title.getCurrentTitle().prefixedText |
|||
== mw.wikibase.getSitelink( suited ) then |
|||
r = false |
|||
else |
|||
r = suited |
|||
end |
|||
else |
|||
r = vsn.value |
|||
end |
|||
end |
end |
||
end |
end |
||
end |
end |
||
elseif link then |
|||
r = false |
|||
end |
end |
||
end |
end |
||
Zeile 764: | Zeile 859: | ||
result_line = pformat |
result_line = pformat |
||
for j = 1, #lists do |
for j = 1, #lists do |
||
result_line = mw.ustring.gsub(result_line, |
result_line = mw.ustring.gsub( result_line, |
||
"%%s", |
|||
lists[ j ][ i ], |
|||
1 ) |
|||
end |
end |
||
result = result .. result_line |
result = result .. result_line |
||
Zeile 827: | Zeile 925: | ||
function p.sentenceTerminated( frame ) |
function p.sentenceTerminated( frame ) |
||
return Text.sentenceTerminated( frame.args[ 1 ] or "" ) and "1" or "" |
return Text.sentenceTerminated( frame.args[ 1 ] or "" ) and "1" or "" |
||
end |
|||
function p.tokenWords( frame ) |
|||
return Text.tokenWords( frame.args[ 1 ] or "" ) |
|||
end |
end |
||
Zeile 908: | Zeile 1.010: | ||
return Text |
return Text |
||
end -- p.Text |
end -- p.Text |
||
setmetatable( p, { __call = function ( func, ... ) |
|||
setmetatable( p, nil ) |
|||
return Failsafe |
|||
end } ) |
|||
return p |
return p |
Version vom 5. Juni 2024, 13:10 Uhr
Vorlagenprogrammierung | Diskussionen | Lua | Test | Unterseiten | |||
Modul | Deutsch | English
|
Modul: | Dokumentation |
Diese Seite enthält Code in der Programmiersprache Lua. Einbindungszahl Cirrus
Dies ist die (produktive) Mutterversion eines global benutzten Lua-Moduls.
Wenn die serial-Information nicht übereinstimmt, müsste eine Kopie hiervon in das lokale Wiki geschrieben werden.
Wenn die serial-Information nicht übereinstimmt, müsste eine Kopie hiervon in das lokale Wiki geschrieben werden.
Versionsbezeichnung auf WikiData:
2025-02-06
Updating notwendig
(lokal:2024-06-05
)local Text = { serial = "2024-06-05",
suite = "Text",
item = 29387871 }
--[=[
Text utilities
]=]
-- local globals
local Failsafe = Text
local GlobalMod = Text
local Patterns = { }
local RangesLatin = false
local SeekQuote = false
local foreignModule = function ( access, advanced, append, alt, alert )
-- Fetch global module
-- Precondition:
-- access -- string, with name of base module
-- advanced -- true, for require(); else mw.loadData()
-- append -- string, with subpage part, if any; or false
-- alt -- number, of wikidata item of root; or false
-- alert -- true, for throwing error on data problem
-- Postcondition:
-- Returns whatever, probably table
-- 2019-10-29
local storage = access
local finer = function ()
if append then
storage = string.format( "%s/%s",
storage,
append )
end
end
local fun, lucky, r, suited
if advanced then
fun = require
else
fun = mw.loadData
end
GlobalMod.globalModules = GlobalMod.globalModules or { }
suited = GlobalMod.globalModules[ access ]
if not suited then
finer()
lucky, r = pcall( fun, "Module:" .. storage )
end
if not lucky then
if not suited and
type( alt ) == "number" and
alt > 0 then
suited = string.format( "Q%d", alt )
suited = mw.wikibase.getSitelink( suited )
GlobalMod.globalModules[ access ] = suited or true
end
if type( suited ) == "string" then
storage = suited
finer()
lucky, r = pcall( fun, storage )
end
if not lucky and alert then
error( "Missing or invalid page: " .. storage, 0 )
end
end
return r
end -- foreignModule()
local function factoryQuote()
-- Create quote definitions
if not Text.quoteLang then
local quoting = foreignModule( "Text",
false,
"quoting",
Text.item )
if type( quoting ) == "table" then
Text.quoteLang = quoting.langs
Text.quoteType = quoting.types
end
if type( Text.quoteLang ) ~= "table" then
Text.quoteLang = { }
end
if type( Text.quoteType ) ~= "table" then
Text.quoteType = { }
end
if type( Text.quoteLang.en ) ~= "string" then
Text.quoteLang.en = "ld"
end
if type( Text.quoteType[ Text.quoteLang.en ] ) ~= "table" then
Text.quoteType[ Text.quoteLang.en ] = { { 8220, 8221 },
{ 8216, 8217 } }
end
end
end -- factoryQuote()
local function fiatQuote( apply, alien, advance )
-- Quote text
-- Parameter:
-- apply -- string, with text
-- alien -- string, with language code
-- advance -- number, with level 1 or 2
local r = apply
local quotes, suite
factoryQuote()
if alien then
suite = mw.text.trim( alien )
if suite == "" then
suite = false
else
local s = Text.quoteLang[ suite ]
if s then
suite = s
else
local slang = suite:match( "^(%l+)-" )
if slang then
suite = Text.quoteLang[ slang ]
end
end
end
end
if not suite then
suite = Text.quoteLang.en
end
quotes = Text.quoteType[ suite ]
if quotes then
local space
if quotes[ 3 ] then
space = " "
else
space = ""
end
quotes = quotes[ advance ]
if quotes then
r = mw.ustring.format( "%s%s%s%s%s",
mw.ustring.char( quotes[ 1 ] ),
space,
apply,
space,
mw.ustring.char( quotes[ 2 ] ) )
end
else
mw.log( "fiatQuote() " .. suite )
end
return r
end -- fiatQuote()
Text.char = function ( apply, again, accept )
-- Create string from codepoints
-- Parameter:
-- apply -- table (sequence) with numerical codepoints, or nil
-- again -- number of repetitions, or nil
-- accept -- true, if no error messages to be appended
-- Returns: string
local r
if type( apply ) == "table" then
local bad = { }
local codes = { }
local s
for k, v in pairs( apply ) do
s = type( v )
if s == "number" then
if v < 32 and v ~= 9 and v ~= 10 then
v = tostring( v )
else
v = math.floor( v )
s = false
end
elseif s ~= "string" then
v = tostring( v )
end
if s then
table.insert( bad, v )
else
table.insert( codes, v )
end
end -- for k, v
if #bad == 0 then
if #codes > 0 then
r = mw.ustring.char( unpack( codes ) )
if again then
if type( again ) == "number" then
local n = math.floor( again )
if n > 1 then
r = r:rep( n )
elseif n < 1 then
r = ""
end
else
s = "bad repetitions: " .. tostring( again )
end
end
end
else
s = "bad codepoints: " .. table.concat( bad, " " )
end
if s and not accept then
r = tostring( mw.html.create( "span" )
:addClass( "error" )
:wikitext( s ) )
end
end
return r or ""
end -- Text.char()
Text.concatParams = function ( args, apply, adapt )
-- Concat list items into one string
-- Parameter:
-- args -- table (sequence) with numKey=string
-- apply -- string (optional); separator (default: "|")
-- adapt -- string (optional); format including "%s"
-- Returns: string
local collect = { }
for k, v in pairs( args ) do
if type( k ) == "number" then
v = mw.text.trim( v )
if v ~= "" then
if adapt then
v = mw.ustring.format( adapt, v )
end
table.insert( collect, v )
end
end
end -- for k, v
return table.concat( collect, apply or "|" )
end -- Text.concatParams()
Text.containsCJK = function ( analyse )
-- Is any CJK code within?
-- Parameter:
-- analyse -- string
-- Returns: true, if CJK detected
local r
if not Patterns.CJK then
Patterns.CJK = mw.ustring.char( 91,
0x3400, 45, 0x9FFF,
0x20000, 45, 0x2B81F,
93 )
end
if mw.ustring.find( analyse, Patterns.CJK ) then
r = true
else
r = false
end
return r
end -- Text.containsCJK()
Text.getPlain = function ( adjust )
-- Remove wikisyntax from string, except templates
-- Parameter:
-- adjust -- string
-- Returns: string
local i = adjust:find( "<!--", 1, true )
local r = adjust
local j
while i do
j = r:find( "-->", i + 3, true )
if j then
r = r:sub( 1, i ) .. r:sub( j + 3 )
else
r = r:sub( 1, i )
end
i = r:find( "<!--", i, true )
end -- "<!--"
r = r:gsub( "(</?%l[^>]*>)", "" )
:gsub( "'''(.+)'''", "%1" )
:gsub( "''(.+)''", "%1" )
:gsub( " ", " " )
return mw.text.unstrip( r )
end -- Text.getPlain()
Text.isLatinRange = function ( adjust )
-- Are characters expected to be latin or symbols within latin texts?
-- Precondition:
-- adjust -- string, or nil for initialization
-- Returns: true, if valid for latin only
local r
if not RangesLatin then
RangesLatin = { { 0x07, 0x02AF },
{ 0x1D6B, 0x1D9A },
{ 0x1E00, 0x1EFF },
{ 0x2002, 0x203A },
{ 0x2190, 0x23BD } }
end
if not Patterns.Latin then
local range
Patterns.Latin = "^["
for i = 1, #RangesLatin do
range = RangesLatin[ i ]
Patterns.Latin = Patterns.Latin ..
mw.ustring.char( range[ 1 ], 45, range[ 2 ] )
end -- for i
Patterns.Latin = Patterns.Latin .. "]*$"
end
if adjust then
if mw.ustring.match( adjust, Patterns.Latin ) then
r = true
else
r = false
end
end
return r
end -- Text.isLatinRange()
Text.isQuote = function ( ask )
-- Is this character any quotation mark?
-- Parameter:
-- ask -- string, with single character
-- Returns: true, if ask is quotation mark
local r
if not SeekQuote then
SeekQuote = mw.ustring.char( 34, -- "
39, -- '
171, -- laquo
187, -- raquo
8216, -- lsquo
8217, -- rsquo
8218, -- sbquo
8220, -- ldquo
8221, -- rdquo
8222, -- bdquo
8249, -- lsaquo
8250, -- rsaquo
0x300C, -- CJK
0x300D, -- CJK
0x300E, -- CJK
0x300F ) -- CJK
end
if ask == "" then
r = false
elseif mw.ustring.find( SeekQuote, ask, 1, true ) then
r = true
else
r = false
end
return r
end -- Text.isQuote()
Text.listToText = function ( args, adapt )
-- Format list items similar to mw.text.listToText()
-- Parameter:
-- args -- table (sequence) with numKey=string
-- adapt -- string (optional); format including "%s"
-- Returns: string
local collect = { }
for k, v in pairs( args ) do
if type( k ) == "number" then
v = mw.text.trim( v )
if v ~= "" then
if adapt then
v = mw.ustring.format( adapt, v )
end
table.insert( collect, v )
end
end
end -- for k, v
return mw.text.listToText( collect )
end -- Text.listToText()
Text.quote = function ( apply, alien, advance )
-- Quote text
-- Parameter:
-- apply -- string, with text
-- alien -- string, with language code, or nil
-- advance -- number, with level 1 or 2, or nil
-- Returns: quoted string
local mode, slang
if type( alien ) == "string" then
slang = mw.text.trim( alien ):lower()
else
local pageLang = mw.title.getCurrentTitle().pageLanguage
if pageLang then
slang = pageLang.code
else
slang = mw.language.getContentLanguage():getCode()
end
end
if advance == 2 then
mode = 2
else
mode = 1
end
return fiatQuote( mw.text.trim( apply ), slang, mode )
end -- Text.quote()
Text.quoteUnquoted = function ( apply, alien, advance )
-- Quote text, if not yet quoted and not empty
-- Parameter:
-- apply -- string, with text
-- alien -- string, with language code, or nil
-- advance -- number, with level 1 or 2, or nil
-- Returns: string; possibly quoted
local r = mw.text.trim( apply )
local s = mw.ustring.sub( r, 1, 1 )
if s ~= "" and not Text.isQuote( s, advance ) then
s = mw.ustring.sub( r, -1, 1 )
if not Text.isQuote( s ) then
r = Text.quote( r, alien, advance )
end
end
return r
end -- Text.quoteUnquoted()
Text.removeDiacritics = function ( adjust )
-- Remove all diacritics
-- Parameter:
-- adjust -- string
-- Returns: string; all latin letters should be ASCII
-- or basic greek or cyrillic or symbols etc.
local cleanup, decomposed
if not Patterns.Combined then
Patterns.Combined = mw.ustring.char( 91,
0x0300, 45, 0x036F,
0x1AB0, 45, 0x1AFF,
0x1DC0, 45, 0x1DFF,
0xFE20, 45, 0xFE2F,
93 )
end
decomposed = mw.ustring.toNFD( adjust )
cleanup = mw.ustring.gsub( decomposed, Patterns.Combined, "" )
return mw.ustring.toNFC( cleanup )
end -- Text.removeDiacritics()
Text.removeWhitespace = function ( adjust )
-- Remove all whitespace, or replace with ASCII space
-- Parameter:
-- adjust -- string
-- Returns: string; modified
local r = mw.text.decode( adjust )
if r:find( "&", 1, true ) then
r = r:gsub( "‎", "" )
:gsub( "‏", "" )
:gsub( "‍", "" )
:gsub( "‌", "" )
:gsub( " ", " " )
:gsub( " ", " " )
:gsub( " ", " " )
end
if not Patterns.Whitespace then
Patterns.Whitespace = mw.ustring.char( 0x00AD,
91, 0x200C, 45, 0x200F, 93,
91, 0x2028, 45, 0x202E, 93,
0x205F,
0x2060 )
Patterns.Space = mw.ustring.char( 0x00A0,
0x1680,
91, 0x2000, 45, 0x200A, 93,
0x202F,
0x205F,
0x3000,
0x303F )
end
r = mw.ustring.gsub( r, Patterns.Whitespace, "" )
r = mw.ustring.gsub( r, Patterns.Space, " " )
return mw.text.trim( r )
end -- Text.removeWhitespace()
Text.sentenceTerminated = function ( analyse )
-- Is string terminated by dot, question or exclamation mark?
-- Quotation, link termination and so on granted
-- Parameter:
-- analyse -- string
-- Returns: true, if sentence terminated
local r = mw.text.trim( analyse )
local lt = r:find( "<", 1, true )
if not Patterns.Terminated then
Patterns.Terminated = mw.ustring.char( 91,
0x3002,
0xFF01,
0xFF0E,
0xFF1F )
.. "!%.%?…][\"'%]‹›«»‘’“”]*$"
end
if lt then
r = r:gsub( "</span>", "" )
end
if mw.ustring.find( r, Patterns.Terminated ) then
r = true
elseif lt then
local s = "<bdi[^>]* dir=\"([lr]t[rl])\".+</bdi></bdo>"
s = r:match( s )
if s then
if mw.language.getContentLanguage():isRTL() then
r = ( s == "ltr" )
else
r = ( s == "rtl" )
end
else
r = false
end
else
r = false
end
return r
end -- Text.sentenceTerminated()
Text.tokenWords = function ( adjust )
-- Split text in words of digits or letters
-- Precondition:
-- adjust -- string
-- Returns: string with
local r = mw.uri.decode( adjust, "WIKI" )
if r:find( "&", 1, true ) then
r = mw.text.decode( r )
end
r = Text.removeWhitespace( r )
r = mw.ustring.gsub( r, "[%p%s]+", " " )
return r
end -- Text.tokenWords()
Text.ucfirstAll = function ( adjust )
-- Capitalize all words
-- Precondition:
-- adjust -- string
-- Returns: string with all first letters in upper case
local r = " " .. adjust
local i = 1
local c, j, m
if adjust:find( "&" ) then
r = r:gsub( "&", "&" )
:gsub( "<", "<" )
:gsub( ">", ">" )
:gsub( " ", " " )
:gsub( " ", " " )
:gsub( "‌", "‌" )
:gsub( "‍", "‍" )
:gsub( "‎", "‎" )
:gsub( "‏", "‏" )
m = true
end
while i do
i = mw.ustring.find( r, "%W%l", i )
if i then
j = i + 1
c = mw.ustring.upper( mw.ustring.sub( r, j, j ) )
r = string.format( "%s%s%s",
mw.ustring.sub( r, 1, i ),
c,
mw.ustring.sub( r, i + 2 ) )
i = j
end
end -- while i
r = r:sub( 2 )
if m then
r = r:gsub( "&", "&" )
:gsub( "<", "<" )
:gsub( ">", ">" )
:gsub( " ", " " )
:gsub( " ", " " )
:gsub( "‌", "‌" )
:gsub( "‍", "‍" )
:gsub( "‎", "‎" )
:gsub( "‏", "‏" )
:gsub( "&#X(%x+);", "&#x%1;" )
end
return r
end -- Text.ucfirstAll()
Text.uprightNonlatin = function ( adjust )
-- Ensure non-italics for non-latin text parts
-- One single greek letter might be granted
-- Precondition:
-- adjust -- string
-- Returns: string with non-latin parts enclosed in <span>
local r
Text.isLatinRange()
if mw.ustring.match( adjust, Patterns.Latin ) then
-- latin only, horizontal dashes, quotes
r = adjust
else
local c
local e = mw.html.create( "span" )
local j = false
local k = 1
local m = false
local n = mw.ustring.len( adjust )
local p
local flat = function ( a )
-- isLatin
local range
for i = 1, #RangesLatin do
range = RangesLatin[ i ]
if a >= range[ 1 ] and a <= range[ 2 ] then
return true
end
end -- for i
end -- flat()
local focus = function ( a )
-- char is not ambivalent
local r = ( a > 64 )
if r then
r = ( a < 8192 or a > 8212 )
else
r = ( a == 38 or a == 60 ) -- '&' '<'
end
return r
end -- focus()
local form = function ( a )
return string.format( p,
r,
mw.ustring.sub( adjust, k, j - 1 ),
mw.ustring.sub( adjust, j, a ) )
end -- form()
e:attr( "dir", "auto" )
:css( "font-style", "normal" )
:wikitext( "%s" )
p = "%s%s" .. tostring( e )
r = ""
for i = 1, n do
c = mw.ustring.codepoint( adjust, i, i )
if focus( c ) then
if flat( c ) then
if j then
if m then
if i == m then
-- single greek letter.
j = false
end
m = false
end
if j then
local nx = i - 1
local s = ""
for ix = nx, 1, -1 do
c = mw.ustring.sub( adjust, ix, ix )
if c == " " or c == "(" then
nx = nx - 1
s = c .. s
else
break -- for ix
end
end -- for ix
r = form( nx ) .. s
j = false
k = i
end
end
elseif not j then
j = i
if c >= 880 and c <= 1023 then
-- single greek letter?
m = i + 1
else
m = false
end
end
elseif m then
m = m + 1
end
end -- for i
if j and ( not m or m < n ) then
r = form( n )
else
r = r .. mw.ustring.sub( adjust, k )
end
end
return r
end -- Text.uprightNonlatin()
Failsafe.failsafe = function ( atleast )
-- Retrieve versioning and check for compliance
-- Precondition:
-- atleast -- string, with required version
-- or wikidata|item|~|@ or false
-- Postcondition:
-- Returns string -- with queried version/item, also if problem
-- false -- if appropriate
-- 2024-03-01
local since = atleast
local last = ( since == "~" )
local linked = ( since == "@" )
local link = ( since == "item" )
local r
if last or link or linked or since == "wikidata" then
local item = Failsafe.item
since = false
if type( item ) == "number" and item > 0 then
local suited = string.format( "Q%d", item )
if link then
r = suited
else
local entity = mw.wikibase.getEntity( suited )
if type( entity ) == "table" then
local seek = Failsafe.serialProperty or "P348"
local vsn = entity:formatPropertyValues( seek )
if type( vsn ) == "table" and
type( vsn.value ) == "string" and
vsn.value ~= "" then
if last and vsn.value == Failsafe.serial then
r = false
elseif linked then
if mw.title.getCurrentTitle().prefixedText
== mw.wikibase.getSitelink( suited ) then
r = false
else
r = suited
end
else
r = vsn.value
end
end
end
end
elseif link then
r = false
end
end
if type( r ) == "nil" then
if not since or since <= Failsafe.serial then
r = Failsafe.serial
else
r = false
end
end
return r
end -- Failsafe.failsafe()
Text.test = function ( about )
local r
if about == "quote" then
factoryQuote()
r = { QuoteLang = Text.quoteLang,
QuoteType = Text.quoteType }
end
return r
end -- Text.test()
-- Export
local p = { }
function p.char( frame )
local params = frame:getParent().args
local story = params[ 1 ]
local codes, lenient, multiple
if not story then
params = frame.args
story = params[ 1 ]
end
if story then
local items = mw.text.split( story, "%s+" )
if #items > 0 then
local j
lenient = ( params.errors == "0" )
codes = { }
multiple = tonumber( params[ "*" ] )
for k, v in pairs( items ) do
if v:sub( 1, 1 ) == "x" then
j = tonumber( "0" .. v )
elseif v == "" then
v = false
else
j = tonumber( v )
end
if v then
table.insert( codes, j or v )
end
end -- for k, v
end
end
return Text.char( codes, multiple, lenient )
end
function p.concatParams( frame )
local args
local template = frame.args.template
if type( template ) == "string" then
template = mw.text.trim( template )
template = ( template == "1" )
end
if template then
args = frame:getParent().args
else
args = frame.args
end
return Text.concatParams( args,
frame.args.separator,
frame.args.format )
end
function p.containsCJK( frame )
return Text.containsCJK( frame.args[ 1 ] or "" ) and "1" or ""
end
function p.getPlain( frame )
return Text.getPlain( frame.args[ 1 ] or "" )
end
function p.isLatinRange( frame )
return Text.isLatinRange( frame.args[ 1 ] or "" ) and "1" or ""
end
function p.isQuote( frame )
return Text.isQuote( frame.args[ 1 ] or "" ) and "1" or ""
end
function p.listToFormat(frame)
local lists = {}
local pformat = frame.args["format"]
local sep = frame.args["sep"] or ";"
-- Parameter parsen: Listen
for k, v in pairs(frame.args) do
local knum = tonumber(k)
if knum then lists[knum] = v end
end
-- Listen splitten
local maxListLen = 0
for i = 1, #lists do
lists[i] = mw.text.split(lists[i], sep)
if #lists[i] > maxListLen then maxListLen = #lists[i] end
end
-- Ergebnisstring generieren
local result = ""
local result_line = ""
for i = 1, maxListLen do
result_line = pformat
for j = 1, #lists do
result_line = mw.ustring.gsub( result_line,
"%%s",
lists[ j ][ i ],
1 )
end
result = result .. result_line
end
return result
end
function p.listToText( frame )
local args
local template = frame.args.template
if type( template ) == "string" then
template = mw.text.trim( template )
template = ( template == "1" )
end
if template then
args = frame:getParent().args
else
args = frame.args
end
return Text.listToText( args, frame.args.format )
end
function p.quote( frame )
local slang = frame.args[2]
if type( slang ) == "string" then
slang = mw.text.trim( slang )
if slang == "" then
slang = false
end
end
return Text.quote( frame.args[ 1 ] or "",
slang,
tonumber( frame.args[3] ) )
end
function p.quoteUnquoted( frame )
local slang = frame.args[2]
if type( slang ) == "string" then
slang = mw.text.trim( slang )
if slang == "" then
slang = false
end
end
return Text.quoteUnquoted( frame.args[ 1 ] or "",
slang,
tonumber( frame.args[3] ) )
end
function p.removeDiacritics( frame )
return Text.removeDiacritics( frame.args[ 1 ] or "" )
end
function p.sentenceTerminated( frame )
return Text.sentenceTerminated( frame.args[ 1 ] or "" ) and "1" or ""
end
function p.tokenWords( frame )
return Text.tokenWords( frame.args[ 1 ] or "" )
end
function p.ucfirstAll( frame )
return Text.ucfirstAll( frame.args[ 1 ] or "" )
end
function p.unstrip( frame )
return mw.text.trim( mw.text.unstrip( frame.args[ 1 ] or "" ) )
end
function p.uprightNonlatin( frame )
return Text.uprightNonlatin( frame.args[ 1 ] or "" )
end
function p.zip(frame)
local lists = {}
local seps = {}
local defaultsep = frame.args["sep"] or ""
local innersep = frame.args["isep"] or ""
local outersep = frame.args["osep"] or ""
-- Parameter parsen
for k, v in pairs(frame.args) do
local knum = tonumber(k)
if knum then lists[knum] = v else
if string.sub(k, 1, 3) == "sep" then
local sepnum = tonumber(string.sub(k, 4))
if sepnum then seps[sepnum] = v end
end
end
end
-- sofern keine expliziten Separatoren angegeben sind, den Standardseparator verwenden
for i = 1, math.max(#seps, #lists) do
if not seps[i] then seps[i] = defaultsep end
end
-- Listen splitten
local maxListLen = 0
for i = 1, #lists do
lists[i] = mw.text.split(lists[i], seps[i])
if #lists[i] > maxListLen then maxListLen = #lists[i] end
end
local result = ""
for i = 1, maxListLen do
if i ~= 1 then result = result .. outersep end
for j = 1, #lists do
if j ~= 1 then result = result .. innersep end
result = result .. (lists[j][i] or "")
end
end
return result
end
p.failsafe = function ( frame )
-- Versioning interface
local s = type( frame )
local since
if s == "table" then
since = frame.args[ 1 ]
elseif s == "string" then
since = frame
end
if since then
since = mw.text.trim( since )
if since == "" then
since = false
end
end
return Failsafe.failsafe( since ) or ""
end -- p.failsafe()
p.Text = function ()
return Text
end -- p.Text
setmetatable( p, { __call = function ( func, ... )
setmetatable( p, nil )
return Failsafe
end } )
return p