Jump to content

User:Js/urldecoder.js

From Wikipedia, the free encyclopedia
This is an old revision of this page, as edited by Js (talk | contribs) at 18:13, 31 August 2009 (+ urlDecoderCustom call for user-defined URL processing, e.g. conversion to template). The present address (URL) is a permanent link to this revision, which may differ significantly from the current revision.
Note: After saving, you have to bypass your browser's cache to see the changes. Google Chrome, Firefox, Microsoft Edge and Safari: Hold down the ⇧ Shift key and click the Reload toolbar button. For details and instructions about other browsers, see Wikipedia:Bypass your cache.
//[[user:js/urldecoder]]

function urlDecoderButton(){
 var tlb = document.getElementById('toolbar')
 if (!tlb) return
 var bt = document.createElement('input')
 bt.type = 'button'; bt.onclick = urlDecoderRun;  bt.id = 'urlDecoder'
 bt.value = '→[\[]]';  
 bt.title = 'Decode URL before cursor or all URLs in selected text'
 bt.style.cssText = 'background:#adbede; height:22px; vertical-align:top; padding:0'
 tlb.appendChild(bt)
 if (!window.urlDecoderKey) return
 bt.accessKey = urlDecoderKey
 bt.title += ' ['+urlDecoderKey+']'
 updateTooltipAccessKeys([bt])
}
if (wgAction=='edit' || wgAction=='submit') addOnloadHook(urlDecoderButton)


function urlDecoderRun(){ //main function

var httpRegExp = '(https?:\\/\\/[^\\]\\[\\n\\r<>" ]+)' //  except []<>"
var beforeCursor = new RegExp('(\\[{0,2})'+httpRegExp+'( +[^\\]\n]+)?\\]{0,2}$', 'i') 
var localPrefix = WMPrefixes(unSecure(wgServer+wgScript))
var newText, linkSize, txtarea = document.editform.wpTextbox1
var isBeforeCursor = false

if (document.selection) { //IE/Opera
  var scrollTop = document.documentElement.scrollTop
  txtarea.focus()
  range = document.selection.createRange()
  if (!range.moveStart) return
  if (range.text){
    newText = processSelText(range.text)
  }else { //no selection
    if (!(rr=range.duplicate())) return
    rr.moveStart('character', - 1500)
    linkSize = processBeforeCursor(rr.text)
    if (!linkSize) return
    range.moveStart('character', - linkSize) //select matched
  }
  //replace text
  if (newText != range.text){
    range.text = newText
    if (navigator.userAgent.indexOf('MSIE') != -1) newText = newText.replace(/\r/g,'') //for IE: do not count \r
    range.moveStart('character', - newText.length)
    range.select()
  }
  document.documentElement.scrollTop = scrollTop //restore window scroll position

}else if (txtarea.selectionStart || txtarea.selectionStart == '0') { // Mozilla
  var scrollTop = txtarea.scrollTop, txt = txtarea.value
  txtarea.focus()
  var startPos = txtarea.selectionStart, endPos = txtarea.selectionEnd
  if (startPos != endPos){
    newText = processSelText(txt.substring(startPos, endPos))
  }else{ //no selection
	linkSize = processBeforeCursor(txt.substring((endPos-1500>0?endPos-1500:0), endPos))
    if (!linkSize) return
    startPos = endPos - linkSize //select matched
  }
  //replace text
  if (newText != txt.substring(startPos, endPos)){
    txtarea.value = txt.substring(0, startPos) + newText +	txt.substring(endPos, txtarea.value.length)
    txtarea.selectionEnd = startPos + newText.length
    txtarea.selectionStart = startPos
  }
  txtarea.scrollTop = scrollTop
}//end of main function


function processBeforeCursor(str){//finds http:.* in string, returns its length and also newText var
 isBeforeCursor = true
 var pos = str.lastIndexOf('http://')
 if (pos == -1) pos = str.lastIndexOf('https://')
 if (pos == -1) return 0
 if (pos >= 2)  str = str.substring(pos-2) //move left to include leading [s
 var ma = str.match(beforeCursor) // result: (whole string)' '[', 'http:...', ' name]'
 if (!ma) return 0
 if (ma[3]) //link with name: automatically add brackets
   newText = simplifyMatched(ma[0], '[', ma[2], ma[3]+']')
 else //just url: add closing bracket only if there is leading bracket
   newText = simplifyMatched(ma[0], ma[1], ma[2], ma[1]?']':'')
 return ma[0].length
}

function processSelText(txt){
 txt = txt.replace(RegExp('(\\[{0,2})' + httpRegExp + '([^\\]\\[\\n\\r]*?\\]\\]?)?', 'ig'),
  simplifyMatched)
 if (window.urlDecoderIntLinks) txt = txt.replace(/\[\[[^\]\|\n]+/g, 
  function(lnk){//skip user_talk
   return /^\[\[user_talk:[^#]+$/i.test(lnk) ? lnk : decodeAnchor(lnk)
 })
 return txt
}


function simplifyMatched(str, bracket, url, rest){//arguments: (whole string), '[', url, ' name]'; calls decodeUrl
 var pos = url.indexOf("''")
 if (pos != -1) url = url.substring(0, pos) // double ' is not allowed inside urls
 if (!bracket){//no brackets, just url
   var trail = url.match(RegExp('[,;\\\\\.:!\\?' //trailing punctuation, per Parser.php
    + (!/\(/.test(url) ? '\\)' : '') + ']+$' ))  //trailing no-matching )
   if (trail) url = url.substring(0, url.length-trail[0].length) //move these out of url
   if (/(\}\}|\|)$/.test(url)) return str //trailing | or }}  can be a part of template, skip to be safe
   return decodeUrl(url) + str.substring(url.length)
 }else if (rest) //both brackets and possibly name
   return decodeUrl(url, rest.replace(/\]+$|^ +| +$/g,'')) //trim ending brackets and spaces in 'name]'
 else return str //probably broken wikicode in selected text
}

function decodeUrl(url, name){ //url -> %-decoded -> [[link|name]] (if possible); name is optional
 url = unSecure(url)
 if (url.indexOf('%') != -1) try { url = decodeURI(url) } catch(e){}  //decode %
 url = url.replace(/%(3B|2F|2C|3A)/g, decodeURIComponent) //decode ;/,:
 url = url.replace(/[ <>"\[\]]/g, encodeURIComponent) //" disallowed chars
 if (isBeforeCursor)
   for (var n in window.urlDecoderEngNames) //to eng keywords
     url = url.replace(RegExp('(title=|wiki\/)('+urlDecoderEngNames[n]+':)'), '$1' + n + ':')
 var link = toWikilink(url)
 if (!link && window.urlDecoderCustom
     && (link = urlDecoderCustom(url)) && /^(https?:\/\/|\{\{)/.test(link))
	  {url = link; link = null} //still external
 if (link){
  link = link.replace(/%(3f|26)/ig, decodeURIComponent) //decode ?&
  link = link.replace(/%3f/ig,'?') //decode ?
  if (wgNamespaceNumber==0 || wgNamespaceNumber==14) link=link.replace(/^:/,'') //interwiki?
  return '[\[' + link + (name?'|'+name:'') + ']]'
 }else if (typeof name == 'string') return '[' + url + (name?' '+name:'') + ']' //empty name
 else return url
}
	 
function toWikilink(url){//url -> wikilink, otherwise null
 //try bugzilla and user-defined prefixes
 if (!window.urlDecoderPrefixes) urlDecoderPrefixes = {}
 urlDecoderPrefixes['https://bugzilla.wikimedia.org/show_bug.cgi?id=']='mediazilla'
 for (var key in urlDecoderPrefixes)
   if (url.toLowerCase().indexOf(key)!=-1)
    return urlDecoderPrefixes[key]+':'+ url.substring(url.indexOf(key)+key.length)
 //try WM prefixes
 var parts = url.substring(7).split('/')
 if (parts[1]!='wiki' || url.indexOf('?')!=-1) return null
 var linkPrefix = WMPrefixes(url.toLowerCase()), prefixes = ''
 if (!linkPrefix) return null
 var title = url.substring(parts[0].length + parts[1].length + 9) //get part after  /wiki/
 title = decodeAnchor(title)
 if (linkPrefix[0] && (linkPrefix[0] != localPrefix[0])) prefixes = linkPrefix[0]
 if (linkPrefix[1] && (linkPrefix[1] != localPrefix[1])) prefixes += ':' + linkPrefix[1]
 if (prefixes || isColonNeeded(title)) prefixes += ':' //dividing colon or cat/file leading colon
 return prefixes + title
}


function decodeAnchor(link){//simplify internal link: replace %20 and _ then decode anchor
 link = link.replace(/(_|%20)/g, ' ').replace(/^ +| +$/g, '')
 var parts = link.split('#')
 if (parts.length != 2) return link //no anchor
 var anchor = parts[1], hidIdx = -1, hidden = []
 //decode 4, 3 and 2-byte: http://en.wikipedia.org/wiki/UTF-8
 anchor = anchor.replace(/\.F[0-4]\.[89AB][\dA-F]\.[89AB][\dA-F]\.[89AB][\dA-F]/g, deChar)
 anchor = anchor.replace(/\.E[\dA-F]\.[89AB][\dA-F]\.[89AB][\dA-F]/g, deChar)
 anchor = anchor.replace(/\.[CD][\dA-F]\.[89AB][\dA-F]/g, deChar)
 anchor = anchor.replace( //hide IPs
/(?:^|[^0-9A-F\.])(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)/,
   function(s){ hidden[++hidIdx] = s; return '\x01' + hidIdx + '\x02' }
 )
 //decode 1-byte chars: all symbols except  -.:_  and []{} prohibited in links
 anchor = anchor.replace(/\.[2-7][0-9A-F]/g, function(hhh){
   var ch = deChar(hhh)
   if ('!"#$%&\'()*+,/;<=>?@\\^`~'.indexOf(ch) >= 0) return ch; else return hhh
 })
 //unhide IPs and return
 for (var i=hidIdx; i>=0; i--) anchor = anchor.replace('\x01'+i+'\x02', hidden[i])
 if (anchor.indexOf("''") != -1) return link //cannot have double '' in link
 else return parts[0] + '#' + anchor

 function deChar(ss){ 
  try{ss = decodeURIComponent(ss.replace(/\.([0-9A-F][0-9A-F])/g, '%$1'))} catch(e){}
  return ss
 } 
}

function WMPrefixes(url){  // htp://en.wikipedia.org/wiki/...  -> [ 'w',  'en']
 var dd = url.substring(7).split('/')[0].split('.') // -> ['en','wikipedia','org']
 if (dd.pop() != 'org') return null
 var proj='', lang = '', part = dd.pop()
 if (proj = {'mediawiki':'mw','wikimediafoundation':'foundation'}[part]);
 else if (proj = {'wikipedia':'w','wikibooks':'b','wikinews':'n','wikiquote':'q',
 'wikisource':'s','wikiversity':'v','wiktionary':'wikt'}[part]){
   lang = dd.pop()
   if (!lang || lang=='www') lang = ''
   else if (lang=='test') {lang=''; proj='testwiki'}
 }else if (part == 'wikimedia'){
   part = dd.pop()
   if (!part || part=='www') proj = 'foundation'
   else if (/^(meta|commons|incubator|species|strategy)$/.test(part)) proj = part
   else return null
 }else return null
 return [proj, lang]
}

function unSecure(url){
 return url.replace(/https:\/\/secure\.wikimedia\.org\/(\w+)\/(\w+)\/([^\]\|\n\r ]+)/,
  'http://$2.$1.org/$3')
}

function isColonNeeded(pg){
 if (pg.indexOf(':')==-1) return false
 urlDecoderNS = window.urlDecoderNS || {}
 urlDecoderNS.en = 'image'
 var ns = urlDecoderNS[wgContentLanguage]
 if (typeof ns != 'string') ns = requestColonNS()
 return RegExp('^('+ns+'|file|category) *:','i').test(pg)
}

function requestColonNS(){
 var whatsthis = ' &nbsp; &nbsp; <a href="http://en.wikipedia.org/wiki/user:js/urldecoder#Localization" target=_blank>(?)</a>'
 showMsg('Requesting namespaces...'+whatsthis)
 var aj = sajax_init_object(), q = null, ns = []
 aj.open('GET', '/w/api.php?format=json&action=query&meta=siteinfo&siprop=namespaces|namespacealiases', false)
 aj.send(null)
 try { eval('q='+aj.responseText); q = q.query
 } catch(e){return null}
 ns.push(q.namespaces[6]['*']); ns.push(q.namespaces[14]['*'])
 for (var k in q.namespacealiases)
   if (q.namespacealiases[k].id==6 || q.namespacealiases[k].id==14) 
      ns.push(q.namespacealiases[k]['*'])
 ns = ns.join('|').toLowerCase()
 urlDecoderNS[wgContentLanguage] = ns
 showMsg("<code>urlDecoderNS = {'"+wgContentLanguage+"':'"+ns+"'}<code>"+whatsthis)
 return ns
} 

function showMsg(htm){
 var dv = document.getElementById('edit-msg')
 if (!dv){
   dv = document.createElement('div')
   dv.id = 'edit-msg'; dv.style.cssText = 'margin:5px; border:1px solid gray'
   var tbox = document.getElementById('wpTextbox1')
   tbox.parentNode.insertBefore(dv, tbox)
 }
 dv.innerHTML = htm; dv.style.display = ''
 setTimeout(hideMsg, 15000)
}

function hideMsg(){
 var dv = document.getElementById('edit-msg')
 dv.style.display = 'none'
}

}