User:Cacycle/diff.js

Code that you insert on this page could contain malicious content capable of compromising your account. If you import a script from another page with "importScript", "mw.loader.load", "iusc", or "lusc", take note that this causes you to dynamically load a remote script, which could be changed by others. Editors are responsible for all edits and actions they perform, including by scripts. User scripts are not centrally supported and may malfunction or become inoperable due to software changes. A guide to help you find broken scripts is available. If you are unsure whether code you are adding to this page is safe, you can ask at the appropriate village pump.
This code will be executed when previewing this page.

This user script seems to have a documentation page at User:Cacycle/diff.

Note: After saving, you have to bypass your browser's cache to see the changes. Google Chrome, Firefox, Microsoft Edge and Safari: Hold down the ⇧ Shift key and click the Reload toolbar button. For details and instructions about other browsers, see Wikipedia:Bypass your cache.

/*<pre><nowiki> */


/*
 * JavaScript diff algorithm
 * by [[User:Cacycle]] (en.wikipedia.org)
 *
 * An implementation of:
 * Paul Heckel: A technique for isolating differences between files
 * Communications of the ACM 21(4):264 (1978)
 * http://doi.acm.org/10.1145/359460.359467
 *
 */

function diff(oldText, newText) {

  var str = '';

// text object holds all text datastructures
  var text = new Object();

// consecutive words of the new text (N)
  text.newWords = new Array();

// consecutive words of the old text (O)
  text.oldWords = new Array();

// array of corresponding word number in old text (NA): text.newToOld
  text.newToOld = new Array();

// array of corresponding word number in new text (OA): text.oldToNew
  text.oldToNew = new Array();


// symbol table for passes 1 - 3  holds words as a hash: symbol['word']
  symbol = new Object();

// symbol table new word occurences counter (NC): symbol.newCtr
  symbol.newCtr = new Array();

// symbol table old word occurences counter (OC): symbol.oldCtr
  symbol.oldCtr = new Array();

// symbol table last old word number: symbol.toNew
  symbol.toNew = new Array();

// symbol table last new word number (OLNA): symbol.toOld
  symbol.toOld = new Array();


// split old text into words
  do {
//               /     |    |    |    |    |   |  |     |   |  |  |    |    |    | /
    var result = /[\w]+|\[\[|\]\]|\{\{|\}\}|\n+| +|&\w+;|'''|''|=+|\{\||\|\}|\|\-|./g.exec(oldText);
    if (result != null) {
      text.oldWords.push(result[0]);
    }
  } while (result != null);

// split new text into words ///VAR???
  do {
    var result = /[\w]+|\[\[|\]\]|\{\{|\}\}|\n+| +|&\w+;|'''|''|=+|\{\||\|\}|\|\-|./g.exec(newText);
    if (result != null) {
      text.newWords.push(result[0]);
    }
  } while (result != null);

// pass 1: parse new text into symbol table s

  for (var i = 0; i < text.newWords.length; i ++) {
    var word = text.newWords[i];

// add new entry to symbol table
    if ( symbol[word] == null) {
      symbol[word] = { newCtr: 0, oldCtr: 0, toNew: null, toOld: null };
    }

// increment symbol table word counter for new text
    symbol[word].newCtr ++;

// add last word number in new text
    symbol[word].toNew = i;
  }

// pass 2: parse old text into symbol table

  for (var i = 0; i < text.oldWords.length; i ++) {
    var word = text.oldWords[i];

// add new entry to symbol table
    if ( symbol[word] == null) {
      symbol[word] = { newCtr: 0, oldCtr: 0, toNew: null, toOld: null };
    }

// increment symbol table word counter for old text
    symbol[word].oldCtr ++;

// add last word number in old text
    symbol[word].toOld = i;
  }

// pass 3: connect unique words

  for (var i in symbol) {

// find words in the symbol table that occur only once in both versions
    if ( (symbol[i].newCtr == 1) && (symbol[i].oldCtr == 1) ) {
      var toNew = symbol[i].toNew;
      var toOld = symbol[i].toOld;

// do not use spaces and non-words as unique markers
      if ( /\w/.test( text.newWords[toNew] ) ) {

// connect from n to o
        text.newToOld[toNew] = toOld;

// connect from o to n
        text.oldToNew[toOld] = toNew;
      }
    }
  }

// pass 4: connect following identical words downwards

  text = pass4(text);
  function pass4(text) {
    for (var i = 0; i < text.newWords.length - 1; i ++) {

// find already connected pairs
      if (text.newToOld[i] != null) {
        var j = text.newToOld[i];

// check if the following words are not yet connected
        if ( (text.newToOld[i + 1] == null) && (text.oldToNew[j + 1] == null) ) {

// if the following words are the same connect them
          if ( text.newWords[i + 1] == text.oldWords[j + 1] ) {
            text.newToOld[i + 1] = j + 1;
            text.oldToNew[j + 1] = i + 1;
          }
        }
      }
    }
    return (text);
  }

// pass 5: connect following identical words upwards

  text = pass5(text);
  function pass5(text) {
    for (var i = text.newWords.length - 1; i > 0; i --) {

// find already connected pairs
      if (text.newToOld[i] != null) {
        var j = text.newToOld[i];

// check if the preceeding words are not yet connected
        if ( (text.newToOld[i - 1] == null) && (text.oldToNew[j - 1] == null) ) {

// if the preceeding words are the same connect them
          if ( text.newWords[i - 1] == text.oldWords[j - 1] ) {
            text.newToOld[i - 1] = j - 1;
            text.oldToNew[j - 1] = i - 1;
          }
        }
      }
    }
    return (text);
  }

// unresolved regions caused by adding two common words into sequences of common words
// are resolved by pairwise comparison of the words
  for (var i = 0; i < text.newWords.length - 1; i ++) {

// find already connected pairs
    if (text.newToOld[i] != null) {
      var j = text.newToOld[i];

// find corresponding unresolved sequences
      if ( (text.newToOld[i + 1] == null) && (text.oldToNew[j + 1] == null) ) {

// determine the end of the sequences
        var ib = i + 2;
        while ( (text.newToOld[ib] == null) && (ib < text.newWords.length) ) {
          ib ++;
        }
        var jb = j + 2;
        while ( (text.oldToNew[jb] == null) && (jb < text.oldWords.length) ) {
          jb ++;
        }

// compare wordwise, exclude spaces
        for (i2 = i + 1; i2 < ib; i2 ++) {
          if ( /\s/.test( text.newWords[i2] ) ) {
            continue;
          }
          for (j2 = j + 1; j2 < jb; j2 ++) {
            if (text.oldToNew[j2] != null) {
              continue;
            }
            if ( text.newWords[i2] == text.oldWords[j2] ) {
              text.newToOld[i2] = j2;
              text.oldToNew[j2] = i2;
              break;
            }
          }
        }
      }
    }
  }

// repeat pass 4: connect following identical spaces downwards
  text = pass4(text);

// repeat pass 5: connect following identical spaces upwards
  text = pass5(text);

// pass 6: generate output

  str += '<span class="diffBlock0">';
  var block = 1;

  var i = 0;
  var j = 0;
  do {

// collect consecutive identical text
    var ident = '';
    while ( (text.newToOld[i] != null) && (i < text.newWords.length) ) {

// detect block boundary
      if ( (text.newToOld[i] != j) && (text.newToOld[i] != null) ) {
        j = text.newToOld[i];
        ident += '</span><span class="diffBlock' + block + '">';
        block = 1 - block;
      }

      ident += escape(text.newWords[i]);
      i ++;
      j ++;
    }

// collect consecutive deletions
    var del = '';
    while ( (text.oldToNew[j] == null) && (j < text.oldWords.length) ) {
      del += text.oldWords[j];
      j ++;
    }

// collect consecutive inserts
    var ins = '';
    while ( (text.newToOld[i] == null) && (i < text.newWords.length) ) {
      ins += text.newWords[i];
      i ++;
    }

// remove leading and trailing similarities betweein del and ins from highlighting
    var pre = '';
    var post = '';
    if ( (del != '') && (ins != '') ) {

// remove leading similarities
      while ( del.charAt(0) == ins.charAt(0) ) {
        pre = pre + del.charAt(0);
        del = del.substr(1);
        ins = ins.substr(1);
      }

// remove trailing similarities
      while ( del.charAt(del.length - 1) == ins.charAt(ins.length - 1) ) {
        post = del.charAt(del.length - 1) + post;
        del = del.substr(0, del.length - 1);
        ins = ins.substr(0, ins.length - 1);
      }
    }

// output the identical text, deletions and inserts
    str += ident;
    str += pre;
    if (del != '') {
      del = '<font class="diffDelete">' + escape(del) + "</font>";
      del = del.replace(/\n/g, '&para;<br>');
      str += del;
    }
    if (ins != '') {
      ins = '<font class="diffInsert">' + escape(ins) + "</font>";
      ins = ins.replace(/\n/g, '&para;<br>');
      str += ins;
    }
    str += post;

  } while (i < text.newWords.length);

  str += '</span>';
  str = str.replace(/  /g, ' &nbsp;');
  str = str.replace(/\n/g, '<br>');
  return(str);
}


function escape(str) {
  str = str.replace(/&/g, "&amp;");
  str = str.replace(/</g, "&lt;");
  str = str.replace(/>/g, "&gt;");
  str = str.replace(/"/g, "&quot;"); //"
  return (str);
}


/* </nowiki></pre> */