Jump to content

User:Quarl/diff.js

From Wikipedia, the free encyclopedia
The printable version is no longer supported and may have rendering errors. Please update your browser bookmarks and please use the default browser print function instead.
Note: After saving, you have to bypass your browser's cache to see the changes. Google Chrome, Firefox, Microsoft Edge and Safari: Hold down the ⇧ Shift key and click the Reload toolbar button. For details and instructions about other browsers, see Wikipedia:Bypass your cache.
// [[User:Quarl/diff.js]] - utility functions for doing diffs

// quarl 2006-01-29 initial version

// requires: util.js (trimspaces)

// <pre><nowiki>

// if more than this many words of changes, use overflow string
var diff_wikisummary_maxwords = 30;
var diff_wikisummary_overflow = "$1 words changed";

/*
 * diff() and diffString() are based on
 *  http://ejohn.org/projects/javascript-diff-algorithm/
 *   Copyright John Resig
 */

function diff_split(s) {
    //return trimspaces(s).split(/(?:\s|[.,;\'\"`])+/);
    return trimspaces(s).split(/\s+/);
}

function diffString( o, n ) {
    var out = diff( diff_split(o), diff_split(n) );
    var str = "";

    for ( var i = 0; i < out.n.length - 1; i++ ) {
        if ( out.n[i].text == null ) {
            if ( out.n[i].indexOf('"') == -1 && out.n[i].indexOf('<') == -1 )
                str += "<ins style='background:#E6FFE6;'> " + out.n[i] +"</ins>";
            else
                str += " " + out.n[i];
        } else {
            var pre = "";
            if ( out.n[i].text.indexOf('"') == -1 && out.n[i].text.indexOf('<') == -1 ) {

                var n = out.n[i].row + 1;
                while ( n < out.o.length && out.o[n].text == null ) {
                    if ( out.o[n].indexOf('"') == -1 && out.o[n].indexOf('<') == -1 && out.o[n].indexOf(':') == -1 && out.o[n].indexOf(';') == -1 )
                        pre += " <del style='background:#FFE6E6;'>" + out.o[n] +" </del>";
                    n++;
                }
            }
            str += " " + out.n[i].text + pre;
        }
    }

    return str;
}

function diff( o, n ) {
    var ns = {};
    var os = {};

    for ( var i = 0; i < n.length; i++ ) {
        // note we have to check that it is in fact an object with "rows", in
        // case ns[i] happens to match a javascript member function of class
        // Array, e.g. "some"!
        if ( ns[ n[i] ] == null || !ns[n[i]].rows )
            ns[ n[i] ] = { rows: new Array(), o: null };
        ns[ n[i] ].rows.push( i );
    }

    for ( var i = 0; i < o.length; i++ ) {
        if ( os[ o[i] ] == null || !os[o[i]].rows )
            os[ o[i] ] = { rows: new Array(), n: null };
        os[ o[i] ].rows.push( i );
    }

    for ( var i in ns ) {
        if ( ns[i].rows.length == 1 && typeof(os[i]) != "undefined" && os[i].rows.length == 1 ) {
            n[ ns[i].rows[0] ] = { text: n[ ns[i].rows[0] ], row: os[i].rows[0] };
            o[ os[i].rows[0] ] = { text: o[ os[i].rows[0] ], row: ns[i].rows[0] };
        }
    }

    for ( var i = 0; i < n.length - 1; i++ ) {
        if ( n[i].text != null && n[i+1].text == null &&
             0 <= n[i].row+1 && n[i].row+1 < o.length &&
             o[ n[i].row + 1 ].text == null &&
             n[i+1] == o[ n[i].row + 1 ] )
        {
            n[i+1] = { text: n[i+1], row: n[i].row + 1 };
            o[n[i].row+1] = { text: o[n[i].row+1], row: i + 1 };
        }
    }

    for ( var i = n.length - 1; i > 0; i-- ) {
        if ( n[i].text != null && n[i-1].text == null &&
             0 <= n[i].row-1 && n[i].row-1 < o.length &&
             o[ n[i].row - 1 ].text == null &&
             n[i-1] == o[ n[i].row - 1 ] )
        {
            n[i-1] = { text: n[i-1], row: n[i].row - 1 };
            o[n[i].row-1] = { text: o[n[i].row-1], row: i - 1 };
        }
    }

    return { o: o, n: n };
}

function diffAggregate(words) {
    var phrases = new Array();
    var cur = null;
    var wordcount = 0;

    // start at virtual index -1 to check for removed words at beginning of
    // text
    for ( var i = -1; i < words.n.length; i++ ) {
        if ( i!=-1 && words.n[i].text == null ) {
            if (!cur) {
                cur = { o: "", n: "" };
                phrases.push(cur);
            }
            cur.n += " " + words.n[i];
            wordcount ++;
        } else {
            var pre = "";
            var j = i==-1 ? 0 : words.n[i].row + 1;
            while ( j < words.o.length && words.o[j].text == null ) {
                pre += " " + words.o[j];
                j++;
                wordcount ++;
            }
            if (pre) {
                if (!cur) {
                    cur = { o: "", n: "" };
                    phrases.push(cur);
                }
                cur.o += pre;
            }
            if (pre && words.n[i+1] && words.n[i+1].text == null) {
                // If there's an addition following, treat this as part of the
                // same change.
            } else {
                cur = null;
            }
        }
    }

    for (var i in phrases) {
        phrases[i].n = trimspaces(phrases[i].n);
        phrases[i].o = trimspaces(phrases[i].o);
    }

    return { phrases: phrases, wordcount: wordcount };
}

function diffWikiQuote(s) {
    if (!s) return s;
    if (s.match(/^\{\{.*\}\}$/)) return s;
    s = s.replace(/\"/g, "'");
    return '"'+s+'"';
}


function reverse(s) {
    var ret = '';
    for (var i = s.length-1; i >= 0; --i) {
        ret += s[i];
    }
    return ret;
}

// trim the equal chars from the front and back of o,n at a word boundary
function diffStringTrim(o, n) {
    var r = diffStringTrim0(reverse(o), reverse(n));
    return diffStringTrim0(reverse(r.o), reverse(r.n));
}

function diffStringTrim0(o, n) {
    var i = 0;
    while (i < o.length && i < n.length && o[i] == n[i]) {
        ++i;
    }

    // find index of last non-word character
    var prefix = o.substr(0, i);

    // if prefix ends with word characters and suffix starts with non-word,
    // then erase entire prefix
    if (prefix.match(/\w$/) &&
        !o.substr(i, 1).match(/^\w/) && !n.substr(i, 1).match(/^\w/))
    {
        o = o.substr(i);
        n = n.substr(i);
    } else if (prefix.match(/.*\W/)) {
        i = RegExp.lastMatch.length;
        o = o.substr(i);
        n = n.substr(i);
    } else {
        // keep entire prefix
    }
    return { o: o, n: n };
}

function diffSummary(o, n) {
    if (o == n) return "";
    if (!o) return "new";
    if (!n) return "blank";
    var words = diff( diff_split(o), diff_split(n) );
    var r = diffAggregate(words);
    if (!r.wordcount) return "";
    if (r.wordcount > diff_wikisummary_maxwords) {
        return diff_wikisummary_overflow.replace('$1', r.wordcount);
    }

    var phrases = r.phrases;
    var str = [];
    for (var i in phrases) {
        var r = diffStringTrim(phrases[i].o, phrases[i].n);
        var o = diffWikiQuote(r.o), n = diffWikiQuote(r.n);
        if (o && n) {
            str.push(o + ' → ' + n);
        } else if (o) {
            str.push('-' + o);
        } else if (n) {
            str.push('+' + n);
        } else {
            alert("## internal error 15e1b13f-bae3-4399-86c5-721786822fa2");
        }
    }

    return str.join(", ");
}

// </nowiki></pre>