Module:Diff/sandbox
| This is the module sandbox page for Module:Diff (diff). See also the companion subpage for test cases (run). |
| This module is rated as ready for general use. It has reached a mature state, is considered relatively stable and bug-free, and may be used wherever appropriate. It can be mentioned on help pages and other Wikipedia resources as an option for new users. To minimise server load and avoid disruptive output, improvements should be developed through sandbox testing rather than repeated trial-and-error editing. |
| Description | Provides functions for diffing text. |
|---|---|
| Author(s) |
|
| Code source | Diff |
| Status | Release |
| License | MIT/X |
Provides functions for diffing text.
Usage
Ciaran Hope (born 4 August 1974) is an Irish composer of orchestral, choral, and film music.
Difference in words
{{TextDiff|[[Ciaran Hope]] (born 4 August 1974) is an Irish composer of orchestral, choral, and film music.|[[Ciaran Hope]] (was given birth to on 20 August last year) is an Irish composter of orchestral, oral, and film music dance moves.}}
Result:
| − | [[Ciaran Hope]] | + | [[Ciaran Hope]] (was given birth to on 20 August last year) is an Irish composter of orchestral, oral, and film music dance moves. |
Difference in characters
{{StringDiff|[[Ciaran Hope]] (born 4 August 1974) is an Irish composer of orchestral, choral, and film music.|[[Ciaran Hope]] (was given birth to on 20 August last year) is an Irish composter of orchestral, oral, and film music dance moves.}}
Result:
| − | [[Ciaran Hope]] ( | + | [[Ciaran Hope]] (was given birth to on 20 August last year) is an Irish composter of orchestral, oral, and film music dance moves. |
Example with invoke
{{#invoke:Diff|main|[[Ciaran Hope]] (born 4 August 1974) is an Irish composer of orchestral, choral, and film music.|[[Ciaran Hope]] (was given birth to on 20 August last year) is an Irish composter of orchestral, oral, and film music dance moves.}}
Result:
| − | [[Ciaran Hope]] | + | [[Ciaran Hope]] (was given birth to on 20 August last year) is an Irish composter of orchestral, oral, and film music dance moves. |
Documentation
Package items
-----------------------------------------------------------------------------
-- Provides functions for getting the difference between two pieces of text.
--
-- (c) 2007, 2008 Yuri Takhteyev (yuri@freewisdom.org)
-- (c) 2007 Hisham Muhammad
-- Adapted to MediaWiki Lua originally by User:Ebrahim
--
-- License: MIT/X, see http://sputnik.freewisdom.org/en/License
-----------------------------------------------------------------------------
--- Localization of globals
local lstring = mw.ustring
local ltext = mw.text
local table_insert = table.insert
local table_remove = table.remove
local html_create = mw.html.create
local p = {}
local SKIP_SEPARATOR = true -- a constant
local STYLES = 'src = "Module:Diff/styles.css"'
-- Token statuses
local IN = "in"
local OUT = "out"
local SAME = "same"
-----------------------------------------------------------------------------
--- Split a string into tokens.
-- (Adapted from Gavin Kistner's split on http://lua-users.org/wiki/SplitJoin.)
--
--- @param text string Input string to be split.
--- @param separator string|'%s+'? the separator pattern (default: whitespace)
--- @param skip_separator boolean? don't include the separator in the results.
--- @return table token_list list of tokens.
-----------------------------------------------------------------------------
local function split(text, separator, skip_separator)
skip_separator = skip_separator or SKIP_SEPARATOR
separator = separator or "%s+"
local parts = {}
local start = 1
local split_start, split_end = lstring.find(text, separator, start)
while split_start do
table_insert(parts, lstring.sub(text, start, split_start - 1))
if not skip_separator then
table_insert(parts, lstring.sub(text, split_start, split_end))
end
start = split_end + 1
split_start, split_end = lstring.find(text, separator, start)
end
if lstring.sub(text, start) ~= "" then
table_insert(parts, lstring.sub(text, start))
end
return parts
end
-----------------------------------------------------------------------------
--- Derives the longest common subsequence of two strings. This is a faster
--- implementation than one provided by stdlib. Submitted by Hisham Muhammad.
-- (The algorithm was taken from:
-- http://en.wikibooks.org/wiki/Algorithm_implementation/Strings/Longest_common_subsequence)
--
--- NOTE: High-level design choice.
--- This implementation makes use of nested metatables to create a sparse,
--- auto-initializing 2D array. This is clever but complex and slightly
--- unconventional in order to create the LCS matrix only as needed.
--- It's a neat trick, but it may be slower than a more traditional approach,
--- especially for large strings. It also uses more memory, since the matrix
--- is not a sparse array and the matrix is of size (m+1)×(n+1), where m and n
--- are the lengths of the two strings t1 and t2.
--- While the metatable approach is compact, it increases mental overhead too.
--- Sparse array initialization and explicitly managing state with tables would
--- just as helpful and could improve performance.
---
--- @param t1 any first string.
--- @param t2 any second string.
--- @return table lcs_matrix least common subsequence as a matrix.
-----------------------------------------------------------------------------
local function quick_LCS(t1, t2)
local m = #t1
local n = #t2
-- Build the dynamic programming matrix C on demand using nested metatables.
-- This creates a "sparse array" structure where C[i][j] automatically
-- initializes to 0 when first accessed, avoiding the need for explicit
-- initialization of a potentially large (m+1) x (n+1) table.
local C = {}
local setmetatable = setmetatable
local mt_tbl = {
__index = function(t, k)
t[k] = 0
return 0
end,
}
local mt_C = {
__index = function(t, k)
local tbl = {}
setmetatable(tbl, mt_tbl)
t[k] = tbl
return tbl
end,
}
setmetatable(C, mt_C)
local max = math.max
-- Loops run from 1 to m and 1 to n.
for i = 1, m do
local ci1 = C[i + 1]
local ci = C[i]
for j = 1, n do
-- Compare the tokens directly using 1-based indices (t1[i] and t2[j]).
if t1[i] == t2[j] then
ci1[j + 1] = ci[j] + 1
else
ci1[j + 1] = max(ci1[j], ci[j + 1])
end
end
end
return C
end
-----------------------------------------------------------------------------
--- Formats an inline diff as HTML, with <ins> and <del> tags.
--
--- @param tokens table table of {token, status} key-value pairs.
--- @return string diff_buffer HTML string.
-----------------------------------------------------------------------------
local function format_as_html(tokens)
local diff_buffer = ""
local token, status
for _, token_record in ipairs(tokens) do
token = ltext.nowiki(token_record[1])
status = token_record[2]
if status == "in" then
diff_buffer = diff_buffer .. "<ins>" .. token .. "</ins>"
elseif status == "out" then
diff_buffer = diff_buffer .. "<del>" .. token .. "</del>"
else
diff_buffer = diff_buffer .. token
end
end
return diff_buffer
end
-----------------------------------------------------------------------------
-- Returns a diff of two strings in tuples/two-element array, where the key
-- is a token and the value the token's status ("same", "in", "out").
--
--- @param old string "old" text string
--- @param new string "new" text string
--- @param separator string|'%s+'? separator pattern (default: whitespace).
--- @return table<string, 'same'|'in'|'out'> token_list Two-element array/tuples of annotated tokens.
-----------------------------------------------------------------------------
local function diff(old, new, separator)
assert(old)
assert(new)
local tbl_new = split(new, separator)
local tbl_old = split(old, separator)
-- First, compare the beginnings and ends of strings to remove the common
-- prefix and suffix.
local prefix = "" -- common text in the beginning
local suffix = "" -- common text in the end
while tbl_old[1] and tbl_old[1] == tbl_new[1] do
local token = table_remove(tbl_old, 1)
table_remove(tbl_new, 1)
prefix = prefix .. token
end
while tbl_old[#tbl_old] and tbl_old[#tbl_old] == tbl_new[#tbl_new] do
local token = table_remove(tbl_old)
table_remove(tbl_new)
suffix = token .. suffix
end
-- Setup the table that will store the diff in reverse order.
-- ... (rest of the function is now fine as it uses tbl_old/tbl_new)
local rev_diff = {}
-- Define local helper functions to manage token insertion into rev_diff.
local function put(token, type)
table_insert(rev_diff, { token, type })
end
local function ins(token)
put(token, IN)
end
local function del(token)
put(token, OUT)
end
local function same(token)
if token then
put(token, SAME)
end
end
-- Put the suffix as the first token (we are storing the diff in the
-- reverse order)
same(suffix)
-- Define a function that will scan the LCS matrix backwards and build the
-- diff output recursively.
local function get_diff(C, tbl_old, tbl_new, i, j)
local old_i = tbl_old[i]
local new_j = tbl_new[j]
if i >= 1 and j >= 1 and old_i == new_j then
same(old_i)
return get_diff(C, tbl_old, tbl_new, i - 1, j - 1)
else
local Cij1 = C[i][j - 1]
local Ci1j = C[i - 1][j]
if j >= 1 and (i == 0 or Cij1 >= Ci1j) then
ins(new_j)
return get_diff(C, tbl_old, tbl_new, i, j - 1)
elseif i >= 1 and (j == 0 or Cij1 < Ci1j) then
del(old_i)
return get_diff(C, tbl_old, tbl_new, i - 1, j)
end
end
end
-- Then call it.
get_diff(quick_LCS(tbl_old, tbl_new), tbl_old, tbl_new, #tbl_old + 1, #tbl_new + 1)
-- Put the prefix in at the end
same(prefix)
-- Reverse the diff.
local tbl_diff = {}
for i = #rev_diff, 1, -1 do
table_insert(tbl_diff, rev_diff[i])
end
tbl_diff.to_html = format_as_html
return tbl_diff
end
local function attach_styles(base)
local templatestyles_tag
if not templatestyles_tag then
templatestyles_tag = lstring.format(
"%s %s",
mw.getCurrentFrame():extensionTag({
name = "templatestyles",
content = "",
args = { STYLES },
}),
base
)
else
return lstring.format("%s%s", templatestyles_tag, base)
end
end
-----------------------------------------------------------------------------
-- Wiki diff style, currently just for a line
-----------------------------------------------------------------------------
local function wikiDiff(old, new, separator)
local tokens = diff(old, new, separator)
local root = html_create("")
local token, status
local invalidStyles = "-webkit-border-end-width: 1px;"
.. "-webkit-border-start-width: 4px;"
.. "-moz-border-end-width: 1px;"
.. "-moz-border-start-width: 4px;"
local tr = root
:tag("table")
:addClass("mdl-diff")
:attr("role", "none")
-- :attr("aria-label", "text difference, table, two columns," ..
-- " first column content removed, second column content added")
:tag(
"tr"
)
tr:tag("td"):addClass("mdl-diff-marker"):attr("aria-hidden", "true"):wikitext("−")
local deleted = tr
:tag("td")
:attr("aria-label", "Removed")
:addClass("mdl-diff-deletedline")
:addClass("mdl-diff-changedline")
:css(invalidStyles) -- Workaround for a bug in Firefox where border styles are ignored on table cells
:addClass("monospace")
:addClass("monospaced")
:tag("div")
:addClass("mdl-diff-div")
for _, token_record in ipairs(tokens) do
token = ltext.nowiki(token_record[1]):gsub("\n", " ") -- Force all newlines to encode to avoid linter issues
status = token_record[2]
if status == OUT then
deleted:tag("del"):addClass("mdl-diff-deletedline"):addClass("mdl-diff-change-inline"):wikitext(token)
elseif status == SAME then
deleted:wikitext(token)
end
end
tr:tag("td"):addClass("mdl-diff-marker"):attr("aria-hidden", "true"):wikitext("+")
local inserted = tr:tag("td")
:attr("aria-label", "Added")
:addClass("mdl-diff-addedline")
:addClass("mdl-diff-changedline")
:css(invalidStyles)
:addClass("monospace")
:addClass("monospaced")
:tag("div")
:addClass("mdl-diff-div")
for _, token_record in ipairs(tokens) do
token = ltext.nowiki(token_record[1]):gsub("\n", " ") -- Force all newlines to encode to avoid linter issues
status = token_record[2]
if status == IN then
inserted:tag("ins"):addClass("mdl-diff-addedline"):addClass("mdl-diff-change-inline"):wikitext(token)
elseif status == SAME then
inserted:wikitext(token)
end
end
return attach_styles(tostring(root))
end
local function main(frame)
return wikiDiff(
ltext.decode(ltext.unstrip(frame.args[1])),
ltext.decode(ltext.unstrip(frame.args[2])),
frame.args[3] or "[%s%.:-]+"
)
end
p = {
diff = diff,
wikiDiff = wikiDiff,
main = main,
}
return p