User:Cacycle/diff.js
Appearance
Code that you insert on this page could contain malicious content capable of compromising your account. If you import a script from another page with "importScript", "mw.loader.load", "iusc", or "lusc", take note that this causes you to dynamically load a remote script, which could be changed by others. Editors are responsible for all edits and actions they perform, including by scripts. User scripts are not centrally supported and may malfunction or become inoperable due to software changes. A guide to help you find broken scripts is available. If you are unsure whether code you are adding to this page is safe, you can ask at the appropriate village pump. This code will be executed when previewing this page. |
![]() | This user script seems to have a documentation page at User:Cacycle/diff. |
/*<pre><nowiki> */
/*
* JavaScript diff algorithm
* by [[User:Cacycle]] (en.wikipedia.org)
*
* An implementation of:
* Paul Heckel: A technique for isolating differences between files
* Communications of the ACM 21(4):264 (1978)
* http://doi.acm.org/10.1145/359460.359467
*
*/
function diff(oldText, newText) {
var str = '';
// text object holds all text datastructures
var text = new Object();
// consecutive words of the new text (N)
text.newWords = new Array();
// consecutive words of the old text (O)
text.oldWords = new Array();
// array of corresponding word number in old text (NA): text.newToOld
text.newToOld = new Array();
// array of corresponding word number in new text (OA): text.oldToNew
text.oldToNew = new Array();
// symbol table for passes 1 - 3 holds words as a hash: symbol['word']
symbol = new Object();
// symbol table new word occurences counter (NC): symbol.newCtr
symbol.newCtr = new Array();
// symbol table old word occurences counter (OC): symbol.oldCtr
symbol.oldCtr = new Array();
// symbol table last old word number: symbol.toNew
symbol.toNew = new Array();
// symbol table last new word number (OLNA): symbol.toOld
symbol.toOld = new Array();
// split old text into words
do {
// / | | | | | | | | | | | | | | /
var result = /[\w]+|\[\[|\]\]|\{\{|\}\}|\n+| +|&\w+;|'''|''|=+|\{\||\|\}|\|\-|./g.exec(oldText);
if (result != null) {
text.oldWords.push(result[0]);
}
} while (result != null);
// split new text into words ///VAR???
do {
var result = /[\w]+|\[\[|\]\]|\{\{|\}\}|\n+| +|&\w+;|'''|''|=+|\{\||\|\}|\|\-|./g.exec(newText);
if (result != null) {
text.newWords.push(result[0]);
}
} while (result != null);
// pass 1: parse new text into symbol table s
for (var i = 0; i < text.newWords.length; i ++) {
var word = text.newWords[i];
// add new entry to symbol table
if ( symbol[word] == null) {
symbol[word] = { newCtr: 0, oldCtr: 0, toNew: null, toOld: null };
}
// increment symbol table word counter for new text
symbol[word].newCtr ++;
// add last word number in new text
symbol[word].toNew = i;
}
// pass 2: parse old text into symbol table
for (var i = 0; i < text.oldWords.length; i ++) {
var word = text.oldWords[i];
// add new entry to symbol table
if ( symbol[word] == null) {
symbol[word] = { newCtr: 0, oldCtr: 0, toNew: null, toOld: null };
}
// increment symbol table word counter for old text
symbol[word].oldCtr ++;
// add last word number in old text
symbol[word].toOld = i;
}
// pass 3: connect unique words
for (var i in symbol) {
// find words in the symbol table that occur only once in both versions
if ( (symbol[i].newCtr == 1) && (symbol[i].oldCtr == 1) ) {
var toNew = symbol[i].toNew;
var toOld = symbol[i].toOld;
// do not use spaces and non-words as unique markers
if ( /\w/.test( text.newWords[toNew] ) ) {
// connect from n to o
text.newToOld[toNew] = toOld;
// connect from o to n
text.oldToNew[toOld] = toNew;
}
}
}
// pass 4: connect following identical words downwards
text = pass4(text);
function pass4(text) {
for (var i = 0; i < text.newWords.length - 1; i ++) {
// find already connected pairs
if (text.newToOld[i] != null) {
var j = text.newToOld[i];
// check if the following words are not yet connected
if ( (text.newToOld[i + 1] == null) && (text.oldToNew[j + 1] == null) ) {
// if the following words are the same connect them
if ( text.newWords[i + 1] == text.oldWords[j + 1] ) {
text.newToOld[i + 1] = j + 1;
text.oldToNew[j + 1] = i + 1;
}
}
}
}
return (text);
}
// pass 5: connect following identical words upwards
text = pass5(text);
function pass5(text) {
for (var i = text.newWords.length - 1; i > 0; i --) {
// find already connected pairs
if (text.newToOld[i] != null) {
var j = text.newToOld[i];
// check if the preceeding words are not yet connected
if ( (text.newToOld[i - 1] == null) && (text.oldToNew[j - 1] == null) ) {
// if the preceeding words are the same connect them
if ( text.newWords[i - 1] == text.oldWords[j - 1] ) {
text.newToOld[i - 1] = j - 1;
text.oldToNew[j - 1] = i - 1;
}
}
}
}
return (text);
}
// unresolved regions caused by adding two common words into sequences of common words
// are resolved by pairwise comparison of the words
for (var i = 0; i < text.newWords.length - 1; i ++) {
// find already connected pairs
if (text.newToOld[i] != null) {
var j = text.newToOld[i];
// find corresponding unresolved sequences
if ( (text.newToOld[i + 1] == null) && (text.oldToNew[j + 1] == null) ) {
// determine the end of the sequences
var ib = i + 2;
while ( (text.newToOld[ib] == null) && (ib < text.newWords.length) ) {
ib ++;
}
var jb = j + 2;
while ( (text.oldToNew[jb] == null) && (jb < text.oldWords.length) ) {
jb ++;
}
// compare wordwise, exclude spaces
for (i2 = i + 1; i2 < ib; i2 ++) {
if ( /\s/.test( text.newWords[i2] ) ) {
continue;
}
for (j2 = j + 1; j2 < jb; j2 ++) {
if (text.oldToNew[j2] != null) {
continue;
}
if ( text.newWords[i2] == text.oldWords[j2] ) {
text.newToOld[i2] = j2;
text.oldToNew[j2] = i2;
break;
}
}
}
}
}
}
// repeat pass 4: connect following identical spaces downwards
text = pass4(text);
// repeat pass 5: connect following identical spaces upwards
text = pass5(text);
// pass 6: generate output
str += '<span class="diffBlock0">';
var block = 1;
var i = 0;
var j = 0;
do {
// collect consecutive identical text
var ident = '';
while ( (text.newToOld[i] != null) && (i < text.newWords.length) ) {
// detect block boundary
if ( (text.newToOld[i] != j) && (text.newToOld[i] != null) ) {
j = text.newToOld[i];
ident += '</span><span class="diffBlock' + block + '">';
block = 1 - block;
}
ident += escape(text.newWords[i]);
i ++;
j ++;
}
// collect consecutive deletions
var del = '';
while ( (text.oldToNew[j] == null) && (j < text.oldWords.length) ) {
del += text.oldWords[j];
j ++;
}
// collect consecutive inserts
var ins = '';
while ( (text.newToOld[i] == null) && (i < text.newWords.length) ) {
ins += text.newWords[i];
i ++;
}
// remove leading and trailing similarities betweein del and ins from highlighting
var pre = '';
var post = '';
if ( (del != '') && (ins != '') ) {
// remove leading similarities
while ( del.charAt(0) == ins.charAt(0) ) {
pre = pre + del.charAt(0);
del = del.substr(1);
ins = ins.substr(1);
}
// remove trailing similarities
while ( del.charAt(del.length - 1) == ins.charAt(ins.length - 1) ) {
post = del.charAt(del.length - 1) + post;
del = del.substr(0, del.length - 1);
ins = ins.substr(0, ins.length - 1);
}
}
// output the identical text, deletions and inserts
str += ident;
str += pre;
if (del != '') {
del = '<font class="diffDelete">' + escape(del) + "</font>";
del = del.replace(/\n/g, '¶<br>');
str += del;
}
if (ins != '') {
ins = '<font class="diffInsert">' + escape(ins) + "</font>";
ins = ins.replace(/\n/g, '¶<br>');
str += ins;
}
str += post;
} while (i < text.newWords.length);
str += '</span>';
str = str.replace(/ /g, ' ');
str = str.replace(/\n/g, '<br>');
return(str);
}
function escape(str) {
str = str.replace(/&/g, "&");
str = str.replace(/</g, "<");
str = str.replace(/>/g, ">");
str = str.replace(/"/g, """); //"
return (str);
}
/* </nowiki></pre> */