Jump to content

User:Plastikspork/whitespace.js

From Wikipedia, the free encyclopedia
The printable version is no longer supported and may have rendering errors. Please update your browser bookmarks and please use the default browser print function instead.
Note: After saving, you have to bypass your browser's cache to see the changes. Google Chrome, Firefox, Microsoft Edge and Safari: Hold down the ⇧ Shift key and click the Reload toolbar button. For details and instructions about other browsers, see Wikipedia:Bypass your cache.
// ---------------------------------------  --------------------------------------- //
// ----------------------------------- Credits ------------------------------------ //
//
// These javascript tools were inspired by
//      [[Wikipedia:WikiProject User scripts/Scripts/Formatter]]
// Some of the functions were created by modifying existing Formatter functions
//
// This script is intended to be complimentary to this script with very little
// to no duplication of function.
 
// ---------------------------------- Disclaimer ---------------------------------- //
//
// Use at your own risk and make sure you check the edit changes before you save
//
// Let me know [[User_Talk:Plastikspork]] if you find bugs!
 
// ----------------------------- Installing the Script ---------------------------- //
//
// (1) Open/Create your USERNAME/monobook.js page, where USERNAME is your username.
//     A quick way to get there is to go to your user page, then append
//     '/monobook.js' to the end of the URL.
//
// (2) Put the following command on your monobook.js page:
//          importScript('User:Plastikspork/whitespace.js');
//
// (3) Save the page and reload it by following the instructions at the top of your
//     monobook.js page.  For example, Ctrl+Shift+R in Firefox.
 
// ------------------------------- Using the Script ------------------------------- //
//
// (1) This is a collection of javascript functions which can be called from other
//     scripts.
//
//  Note: Some functions could run slowly on very large pages, but do 
//        always eventually complete in my experience.
// ---------------------------------------  --------------------------------------- //
 
 
// ------------------------------- spork_whitespace ------------------------------- //
// Purpose: Removes extra whitespace and nonbreaking space
//
// Examples:
//
// Notes: Not all editors agree on utility of whitespace
// 
function spork_whitespace(str) {
  // Tabs to spaces:
  str=str.replace(/\t/g, ' ');
  // Double or more spaces
  str=str.replace(/  [ ]*/g, ' ');
  // Space at the end of a line
  str=str.replace(/(?: | )+([\r\n])/gi, '$1');
  // Unnecessary nbsp in tables
  str=str.replace(/(\|)[ ]* [ ]*(\|\|)/gi, '$1 $2');
  // Space after nbsp, before start of a sentence
  str=str.replace(/([^;])  ([A-Z])/g, '$1 $2');
  str=str.replace(/([^;])  ([A-Z])/g, '$1 $2');
  // Out of control nbsp
  str=str.replace(/      /gi, '{{nbsp|6}}');
  str=str.replace(/     /gi, '{{nbsp|5}}');
  str=str.replace(/    /gi, '{{nbsp|4}}');
  str=str.replace(/   /gi, '{{nbsp|3}}');
  str=str.replace(/  /gi, '{{nbsp|2}}');
  // Space inside ref tags
  str=str.replace(/(<ref[^<>\/]*>) /gi, '$1');
  str=str.replace(/ (<\/ref>)/gi, '$1');
  // Space inside templates
  str=str.replace(/({{) /g, '$1');
  str=str.replace(/([^\\]|[\r\n]) (}})/g, '$1$2');

  // Space just inside section headings
  str=str.replace(/ (=[=]+)[\t ]*$/g, '$1');
  str=str.replace(/^[\t ]*(=[=]+) /g, '$1');

  return str;
}

// ------------------------------- spork_whitespace ------------------------------- //
// Purpose: Removes extra whitespace and nonbreaking space
//
// Examples:
//
// Notes: Not all editors agree on utility of whitespace
//
function spork_ws_refs(str) {
  var loopcount = 0;

  // Repeated punctuation before and after a ref tag or ref link
  loopcount = 0;
  while( str.search(/[:;,\.\?][\s]*(?:[\s]*<ref[^<>\/]*>[^<>]*<\/ref>[\s]*|[\s]*<ref[^e][^<>]*\/>[\s]*|[\s]*\[*\[https?:\/\/[^ \[\]\|]+\]\]*[\s]*)+[;\.,]/gim ) >=0 && loopcount <=10 ) {
    str=str.replace(/([:;,\.\?][\s]*(?:[\s]*<ref[^<>\/]*>[^<>]*<\/ref>[\s]*|[\s]*<ref[^e][^<>]*\/>[\s]*|[\s]*\[*\[https?:\/\/[^ \[\]\|]+\]\]*[\s]*)+)[;\.,]/gim, '$1');
    loopcount++;
  }
  // Punctuation after a ref tag or ref link
  loopcount = 0;
  while( str.search(/(?:[\s]*<ref[^<>\/]*>[^<>]*<\/ref>[\s]*|[\s]*<ref[^e][^<>]*\/>[\s]*|[\s]*\[*\[https?:\/\/[^ \[\]\|]+\]\]*[\s]*)+[,\.\?]+/gim ) >= 0 && loopcount <=10 ) {
     str=str.replace(/((?:[\s]*<ref[^<>\/]*>[^<>]*<\/ref>[\s]*|[\s]*<ref[^e][^<>]*\/>[\s]*|[\s]*\[*\[https?:\/\/[^ \[\]\|]+\]\]*[\s]*)+)([,\.\?]+)/gim, '$2$1');
     loopcount++;
  }
  loopcount = 0;
  while( str.search(/(?:[\s]*<ref[^<>\/]*>[^<>]*<\/ref>[\t ]*|[\s]*<ref[^e][^<>]*\/>[\t ]*|[\s]*\[*\[https?:\/\/[^ \[\]\|]+\]\]*[\t ]*)+[;:]+/gim ) >= 0 && loopcount <=10 ) {
     str=str.replace(/((?:[\s]*<ref[^<>\/]*>[^<>]*<\/ref>[\t ]*|[\s]*<ref[^e][^<>]*\/>[\t ]*|[\s]*\[*\[https?:\/\/[^ \[\]\|]+\]\]*[\t ]*)+)([;:]+)/gim, '$2$1');
     loopcount++;
  }
  // Space before a ref tag or ref link
    loopcount = 0;
  while( str.search(/[\s]+(?:[\s]*?<ref[^<>\/]*>[^<>]*<\/ref>[\s]*?|[\s]*?<ref[^e][^<>]*\/>[\s]*?|[\s]*?\[*\[https?:\/\/[^ \[\]\|]+\]\]*[\s]*?)/gim ) >= 0 && loopcount <=10 ) {
     str=str.replace(/([\s]+)((?:[\s]*?<ref[^<>\/]*>[^<>]*<\/ref>[\s]*?|[\s]*?<ref[^e][^<>]*\/>[\s]*?|[\s]*?\[*\[https?:\/\/[^ \[\]\|]+\]\]*[\s]*?))/gim, '$2$1');
     loopcount++;
  }
  // Space before a footnote  
  str=str.replace(/ (\[\[#fn)/gi, '$1 ');

  // ref inside a quotation
  str=str.replace(/("[^\r\n]*[,\.])([\s]*)(<ref[^<>]*>[^<>]*<\/ref>|<ref[^e][^<>]*\/>)([\s]*)(")/gi, '$1$5$3$2$4'); //"


  return str;
}

// ------------------------------- spork_whitespace ------------------------------- //
// Purpose: Removes extra whitespace and nonbreaking space
//
// Examples:
//
// Notes: Not all editors agree on utility of whitespace
//
function spork_ws_nowrap(str) {
  var loopcount = 0;

  // Simplify excessive nbsp using nowrap
  str=str.replace(/((?:[^ <>\]\[\|}{\r\n]|<\/?sup>|<\/?sub>)+)&nbsp;((?:[^ <>\]\[\|}{\r\n]|<\/?sup>|<\/?sub>|&nbsp;)+)&nbsp;((?:[^ <>\]\[\|}{\r\n]|<\/?sup>|<\/?sub>)+)/gi, '{{nowrap|$1 $2 $3}}');
  str=str.replace(/(\[\[[^\|\]\[]*\|[^{}\]\[\|]*)&nbsp;([^{}\]\[\|]*\]\])/gi, '{{nowrap|$1 $2}}')
  // Move nowrap outside of links
  str=str.replace(/(\[\[[^\|\]\[]*)\{\{nowrap\|([^{}\]\[]*)}}(\]\])/gi, '{{nowrap|$1$2$3}}');
  // Replace = sign inside of nowrap with {{=}} due to template parsing technicalities (multiple times)
  loopcount = 0;
  while( str.search( /\{\{nowrap\|(?:{{=}}|[^{}])+=/gi ) >= 0 && loopcount <= 10 ) {
    str=str.replace(/(\{\{nowrap\|(?:{{=}}|[^{}])+)=/gi, '$1{{=}}');
    loopcount++;
  }
  // Move trailing punctuation outside of nowrap
  str=str.replace(/(\{\{nowrap\|(?:{{=}}|[^}])+)([\.,:\?])(}})/gi, '$1$3$2');
  // Extend nowrap if it ends with a space
  str=str.replace(/(\{\{nowrap\|(?:{{=}}|[^}])+ )(}})([\.,:\?]|[a-z]*)/gi, '$1$3$2');
  // Remove redundant nbsp inside nowrap (multiple times)
  loopcount = 0;
  while( str.search( /\{\{nowrap\|(?:{{=}}|[^}])+&nbsp;/g ) >= 0 && loopcount <= 10 ) {
    str=str.replace(/(\{\{nowrap\|(?:{{=}}|[^}])+)&nbsp;/g, '$1 ');
    loopcount++;
  }

  // Unnecessary nowrap
  str=str.replace(/\{\{nowrap\|(-?[a-z0-9\.%]*)}}/gi, '$1');
  str=str.replace(/\{\{nowrap\|([^{}\| ]*) {{=}} ([^{}\| ]*)}}/gi, '$1&nbsp;=&nbsp;$2');

  return str;
}

// ------------------------------- spork_whitespace ------------------------------- //
// Purpose: Removes extra whitespace and nonbreaking space
//
// Examples:
//
// Notes: Not all editors agree on utility of whitespace
//
function spork_ws_tables_infoboxes(str){
  // Space before the start of a line before an entry in a table, infobox, citation, ...
  str=str.replace(/([\r\n]) (!|\|)/g, '$1$2');
  // Space before a pipe in a table, infobox, citation
  // str=str.replace(/([^\|!]) (\||!)/g, '$1$2');
  // Space at the start of an entry in a table
  // str=str.replace(/(!!|\|\|) ([^\|!+\-])/g, '$1$2');
  // Put pipes at the start of lines in infoboxes
  str=str.replace(/(\||!)([\r\n])([^!\|\r\n]*=(?:[^!\|\r\n]|\[\[[^\[\]\|]*\|[^\[\]\|]*\]\]|{{[^{}]*}})*)(\|[ ]*)([\r\n])/gim, '$2$1$3$5$4');
  str=str.replace(/(\||!)([\r\n])([^!\|\r\n]*=(?:[^!\|\r\n]|\[\[[^\[\]\|]*\|[^\[\]\|]*\]\]|{{[^{}]*}})*)(\|[ ]*)([\r\n])/gim, '$2$1$3$5$4');
  // Space around equals-sign in common table entries
  str=str.replace(/([^a-z])(bgcolor|style|colspan|rowspan|align) (=)/gi, '$1$2$3');
  str=str.replace(/([^a-z])(bgcolor|style|colspan|rowspan|align)(=) /gi, '$1$2$3');
  // Extra space style parameters
  str=str.replace(/([\|! ]style="[^"\|!]*)[ ]*(:) /gi, '$1$2');
  // Trailing semicolon is style parameters
  str=str.replace(/(="[^\"\|}{]*);[ ]*(")/g, '$1$2');
  // Simplify alignment
  str=str.replace(/([\r\n]\||\|\|)([^\|<>]*)style[ ]*=[ ]*"text-align[ ]*:[ ]*([a-z]+)[ ;]*"/gi, '$1$2align="$3"');
  // Quotes around numbers
  str=str.replace(/(colspan|rowspan)[ ]*(=)[ ]*"([0-9]+)"/gi, '$1$2$3');
  // Unnecessary colspan=1 and rowspan=1
  str=str.replace(/(!!|\|\|)[ ]*(colspan|rowspan)=1[ ]*\|/gi, '$1');
  str=str.replace(/([\r\n]!|[\r\n]\|)[ ]*(colspan|rowspan)=1[ ]*\|/gi, '$1');
  str=str.replace(/[ ]*(?:colspan|rowspan)=1([^0-9])/gi, ' $1');
  // Redundant bold in table headings
  str=str.replace(/([\r\n]!|!!)[ ]*'''([^\|!]*)'''([\r\n]|!!|\|)/gi, '$1$2$3');
  str=str.replace(/([\r\n]!|!!)([^\|!]*\|)[ ]*'''([^\|!]*)'''([\r\n]|\!\|)/gi, '$1$2$3$4');
  // Redundant align in table headings
  str=str.replace(/([\r\n]!|!!)[ ]*align[ ]*=[ "]*center[ "]*/gi, '$1 ');
  str=str.replace(/([\r\n]!|!!)([^\|! ]*) align[ ]*=[ "]*center[ "]*/gi, '$1$2 ');
  // Redundant pipe
  str=str.replace(/([\r\n]!|!!|[\r\n]\||\|\|) \|([^\|!])/gim, '$1$2');
  str=str.replace(/([\r\n]!|!!|[\r\n]\||\|\|) \|([^\|!])/gim, '$1$2');
  // Redundant table row separation
  str=str.replace(/([\r\n]\|-)[ ]*[\r\n][ \r\n]*\|-[ ]*([\r\n])/gim, '$1$2');
  str=str.replace(/([\r\n]\|-)[ ]*[\r\n][ \r\n]*\|-[ ]*([\r\n])/gim, '$1$2');

  return str;
}