Jump to content

User:DeadlyPenguin/extractFirst.xsl

From Wikipedia, the free encyclopedia
The printable version is no longer supported and may have rendering errors. Please update your browser bookmarks and please use the default browser print function instead.
<?xml version="1.0" encoding="UTF-8"?>
 <!-- This is for extracting the first definition of a word from wiktionary, that can be used in a cross site manner. Consider: 
 http://en.wiktionary.org/w/api.php?action=parse&prop=text&page=word&format=xml&xslt=MediaWiki:extractFirst.xsl -->
 <xsl:stylesheet version="1.0"
 xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
 <xsl:output method='html'/>

 <xsl:variable name="copyright"> © <a href="http://fr.wiktionary.org/wiki/"> Wiktionnaire</a>. Paru en <a href="http://creativecommons.org/licenses/by-sa/3.0/deed.fr" rel="license copyright"> CC-BY-SA 3.0 </a></xsl:variable>
 <xsl:template match="/">
 <html>
 <head>
 <meta name="generator" content="Wiktionary Extract XSLT 1.08ish-FR-non-standard"/>
 <base target='_blank' href='http://fr.wiktionary.org' />
<title> Wiktionary extract</title>
 
 <style>
 #wordThisIsFor { font-weight:bold;}
 a.wtif1 { color: black; text-decoration: none;}
 a.wtif1:hover {text-decoration: underline;}
 .disambig-see-also, .disambig-see-also-2 {display:inline;}
 #container {background-color:white; padding: 0.5em; border: solid black thin;}
 a.new {color: red;}
 #error {color: red;font-size:larger;}
 </style>
 <script type='text/javascript'>
 /*<![CDATA[*/
 function setup () {
 var createLink = '«Créer»'; // text only
 var pageURL = '/w/index.php?title=' +location.search.match(/\&page\=([^&]*)/)[1];
 var src = document.getElementById('src');
 var display = document.getElementById('word-list');
 var loc = location.search.match(/\&page\=([^&]*)/)[1];
 var preferLang = location.search.match(/\&lang\=([^&]*)/);
 if (preferLang) {preferLang = preferLang[1];}
 if (preferLang.length > 3) { preferLang = null; }
 src.normalize();
 var html = src.firstChild.data;
 var def = html //may be redefined later.
 var rd = location.search.match(/\&rd\=([^&]*)/); //is this from redirect. + converts to numeric.
 rd = rd ? (+rd[1] + 1) : 1; //redirection level.
 var showWord = 0; //default to not showing. 0 = none, 1 = bold, 2 = bold link.
 var showWordRaw = location.search.match(/\&showWord\=([^&]*)/);
 showWordRaw = showWordRaw ? showWordRaw[1] : 'none';
 if (showWordRaw !== "none") {
  showWord++;
 }
 if (showWordRaw === "link") {
  showWord++;
 }
 var numbDfn = location.search.match(/\&count\=([^&]*)/); //count. + converts to numeric.
 numbDfn = numbDfn ? (+numbDfn[1]) : 1; //default to 1

 var escWord = decodeURIComponent(loc).replace(/&/, '&amp;').replace(/>/, '&lt;').replace(/</, '&gt;');
 //note: wordEsc does not escape quotes. DO NOT PUT AS ATTRIBUTE VALUE

 try {
  //this assumes attribute order doesn't change!!!
  html = html.replace(/<div id="toctitle">[\s\S]*?<\/div>/, '');
  if (preferLang) {
   try {
    //strip off all definitions before tagret lang.
    var subSect = html.match(new RegExp('<span class="mw-headline" id[^>]*><span id="' + preferLang + '">[\\s\\S]*$'))[0];
    if (subSect.match(/<ol>[\s\S]*?<li>/)) {
     //if it has content
     def = subSect;
    }
   } catch (e) { /*alert(e)*/}
  } 
  var lang = def.match(/<span class="mw-headline" id[^>]*>([\s\S]*?)<\/span>/)[1];
  var intro = "(" + lang + ") ";
  if (showWord)  intro = '<a href="' + pageURL + '" id="wordThisIsFor" class="wtif' + showWord + '" >' + escWord + "</a> " + intro ;

  var definitions_matched;
  //FIXME: in both cases the extraction method does not properly strip nested divs. This results in image thumbnails being left behind
  if (numbDfn === 1) {
  definitions_matched = def.match(/<ol>[\s\S]*?<\/ol>/)[0].replace(/<dl>[\s\S]*?<\/dl>/g, '').replace(/<div[^>]*>[\s\S]*?<\/div>/g, '').replace(/<\/div>/g, '').replace(/<ul>[\s\S]*?<\/ul>/g, '').replace(/<a href="(#[^"]*)">/g, '<a href="' + pageURL + '$1">').match(/<li>([\s\S]*?)<\/li>/);
  display.innerHTML = intro +  definitions_matched[1];
  } else {
   //this use not well supported...
   definitions_matched = def.match(/<ol>[\s\S]*?<\/ol>/)[0].replace(/<dl>[\s\S]*?<\/dl>/g, '').replace(/<div[^>]*>[\s\S]*?<\/div>/g, '').replace(/<\/div>/g, '').replace(/<ul>[\s\S]*?<\/ul>/g, '').replace(/<a href="(#[^"]*)">/g, '<a href="' + pageURL + '$1">').match(/<li>([\s\S]*?)<\/li>/g);
   var tmp = intro + ' <ul>';
   for (var i = 0; i < numbDfn && i < definitions_matched.length; i++) {
    tmp += definitions_matched[i];
   }
   display.innerHTML = tmp + '</ul>';
  }
 }
 catch (e) {
 //alert(e)
  //page does not exist, not well formed, these regexs suck, etc
  display.appendChild(document.createTextNode('Could not retrieve definition of ' + decodeURIComponent(loc) + "."));
  document.getElementById('more-link').firstChild.data = createLink;
  if (rd < 9) { //arbitrary to prevent infinite loops
   //make sure don't have loops.
   var newLoc; //this should not be urlEncoded.
   var remAlt = false;
   var dLoc = decodeURIComponent(loc);

   newLoc = dLoc.charAt(0).toLowerCase() + dLoc.substring(1, loc.length);
   //try some other redirections.

   if (newLoc === dLoc && dLoc.charAt(1) === "'") newLoc = dLoc.substring(2,dLoc.length); //for j'<some verb starting w/ vowel>

   if (newLoc === dLoc) newLoc = dLoc.toLowerCase();
   if (newLoc === dLoc && location.search.match(/\&alt\=([^&]*)/)) {
    newLoc = decodeURIComponent(location.search.match(/\&alt\=([^&]*)/)[1]);
    remAlt = true;
   }
   
   if (newLoc !== dLoc) { //redir
    var newURL = location.href.replace(/(^[\s\S]*?\&page\=)[^&]*([\s\S]*$)/, '$1'+ encodeURIComponent(newLoc) + '$2');
    newURL = newURL.replace(/&rd\=[^&]*/, ''); //strip old redirect header.
    if (remAlt) {
     location.href.replace(/&alt\=[^&]*/, '');
    }
    location = newURL + '&rd=' + rd;
   }
  }
 }
 var sa = html.match(/<table class=\"bandeau-voir\"[^>]*>[\s\S]*?(<span title="Variantes typographiques">[\s\S]*?[\s\S]*?)<\/td>[\s\S]*?<\/table>/)
 if(sa && sa[1]) {
  document.getElementById('see-also').innerHTML = ' (' + sa[1].replace(/<a[^>]*><img[^>]*\/><\/a>/, '') + ')' ;
 } 
 document.getElementById('more-link').href = pageURL;
}

 /*]]>*/
 </script>
</head>
 <body onload='setup()'>
 <div id='container'>
 <div id='word-list'><xsl:apply-templates select='api/error'/></div>
 <div><a id='more-link'>«lire la suite»</a> <span id='see-also'/> <small id="copyright-notice"> <xsl:copy-of select="$copyright"/></small></div>
 </div>
 <div id='src' style='display:None'>
  <xsl:value-of select='api/parse/text'/>
 </div>
 </body>
 </html>

 </xsl:template>
 <xsl:template match='api/error'>
 <span id='error'><b>faute: </b> <xsl:value-of select='@info'/></span>
 </xsl:template>

 </xsl:stylesheet>