User:DeadlyPenguin/extractFirst.xsl

The printable version is no longer supported and may have rendering errors. Please update your browser bookmarks and please use the default browser print function instead.
<?xml version="1.0" encoding="UTF-8"?>
 <!-- This is for extracting the first definition of a word from wiktionary, that can be used in a cross site manner. Consider: 
 http://en.wiktionary.org/w/api.php?action=parse&prop=text&page=word&format=xml&xslt=MediaWiki:extractFirst.xsl -->
 <xsl:stylesheet version="1.0"
 xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
 <xsl:output method='html'/>

 <xsl:variable name="copyright"> © <a href="http://fr.wiktionary.org/wiki/"> Wiktionnaire</a>. Paru en <a href="http://creativecommons.org/licenses/by-sa/3.0/deed.fr" rel="license copyright"> CC-BY-SA 3.0 </a></xsl:variable>
 <xsl:template match="/">
 <html>
 <head>
 <meta name="generator" content="Wiktionary Extract XSLT 1.08ish-FR-non-standard"/>
 <base target='_blank' href='http://fr.wiktionary.org' />
<title> Wiktionary extract</title>
 
 <style>
 #wordThisIsFor { font-weight:bold;}
 a.wtif1 { color: black; text-decoration: none;}
 a.wtif1:hover {text-decoration: underline;}
 .disambig-see-also, .disambig-see-also-2 {display:inline;}
 #container {background-color:white; padding: 0.5em; border: solid black thin;}
 a.new {color: red;}
 #error {color: red;font-size:larger;}
 </style>
 <script type='text/javascript'>
 /*<![CDATA[*/
 function setup () {
 var createLink = '«Créer»'; // text only
 var pageURL = '/w/index.php?title=' +location.search.match(/\&page\=([^&]*)/)[1];
 var src = document.getElementById('src');
 var display = document.getElementById('word-list');
 var loc = location.search.match(/\&page\=([^&]*)/)[1];
 var preferLang = location.search.match(/\&lang\=([^&]*)/);
 if (preferLang) {preferLang = preferLang[1];}
 if (preferLang.length > 3) { preferLang = null; }
 src.normalize();
 var html = src.firstChild.data;
 var def = html //may be redefined later.
 var rd = location.search.match(/\&rd\=([^&]*)/); //is this from redirect. + converts to numeric.
 rd = rd ? (+rd[1] + 1) : 1; //redirection level.
 var showWord = 0; //default to not showing. 0 = none, 1 = bold, 2 = bold link.
 var showWordRaw = location.search.match(/\&showWord\=([^&]*)/);
 showWordRaw = showWordRaw ? showWordRaw[1] : 'none';
 if (showWordRaw !== "none") {
  showWord++;
 }
 if (showWordRaw === "link") {
  showWord++;
 }
 var numbDfn = location.search.match(/\&count\=([^&]*)/); //count. + converts to numeric.
 numbDfn = numbDfn ? (+numbDfn[1]) : 1; //default to 1

 var escWord = decodeURIComponent(loc).replace(/&/, '&amp;').replace(/>/, '&lt;').replace(/</, '&gt;');
 //note: wordEsc does not escape quotes. DO NOT PUT AS ATTRIBUTE VALUE

 try {
  //this assumes attribute order doesn't change!!!
  html = html.replace(/<div id="toctitle">[\s\S]*?<\/div>/, '');
  if (preferLang) {
   try {
    //strip off all definitions before tagret lang.
    var subSect = html.match(new RegExp('<span class="mw-headline" id[^>]*><span id="' + preferLang + '">[\\s\\S]*$'))[0];
    if (subSect.match(/<ol>[\s\S]*?<li>/)) {
     //if it has content
     def = subSect;
    }
   } catch (e) { /*alert(e)*/}
  } 
  var lang = def.match(/<span class="mw-headline" id[^>]*>([\s\S]*?)<\/span>/)[1];
  var intro = "(" + lang + ") ";
  if (showWord)  intro = '<a href="' + pageURL + '" id="wordThisIsFor" class="wtif' + showWord + '" >' + escWord + "</a> " + intro ;

  var definitions_matched;
  //FIXME: in both cases the extraction method does not properly strip nested divs. This results in image thumbnails being left behind
  if (numbDfn === 1) {
  definitions_matched = def.match(/<ol>[\s\S]*?<\/ol>/)[0].replace(/<dl>[\s\S]*?<\/dl>/g, '').replace(/<div[^>]*>[\s\S]*?<\/div>/g, '').replace(/<\/div>/g, '').replace(/<ul>[\s\S]*?<\/ul>/g, '').replace(/<a href="(#[^"]*)">/g, '<a href="' + pageURL + '$1">').match(/<li>([\s\S]*?)<\/li>/);
  display.innerHTML = intro +  definitions_matched[1];
  } else {
   //this use not well supported...
   definitions_matched = def.match(/<ol>[\s\S]*?<\/ol>/)[0].replace(/<dl>[\s\S]*?<\/dl>/g, '').replace(/<div[^>]*>[\s\S]*?<\/div>/g, '').replace(/<\/div>/g, '').replace(/<ul>[\s\S]*?<\/ul>/g, '').replace(/<a href="(#[^"]*)">/g, '<a href="' + pageURL + '$1">').match(/<li>([\s\S]*?)<\/li>/g);
   var tmp = intro + ' <ul>';
   for (var i = 0; i < numbDfn && i < definitions_matched.length; i++) {
    tmp += definitions_matched[i];
   }
   display.innerHTML = tmp + '</ul>';
  }
 }
 catch (e) {
 //alert(e)
  //page does not exist, not well formed, these regexs suck, etc
  display.appendChild(document.createTextNode('Could not retrieve definition of ' + decodeURIComponent(loc) + "."));
  document.getElementById('more-link').firstChild.data = createLink;
  if (rd < 9) { //arbitrary to prevent infinite loops
   //make sure don't have loops.
   var newLoc; //this should not be urlEncoded.
   var remAlt = false;
   var dLoc = decodeURIComponent(loc);

   newLoc = dLoc.charAt(0).toLowerCase() + dLoc.substring(1, loc.length);
   //try some other redirections.

   if (newLoc === dLoc && dLoc.charAt(1) === "'") newLoc = dLoc.substring(2,dLoc.length); //for j'<some verb starting w/ vowel>

   if (newLoc === dLoc) newLoc = dLoc.toLowerCase();
   if (newLoc === dLoc && location.search.match(/\&alt\=([^&]*)/)) {
    newLoc = decodeURIComponent(location.search.match(/\&alt\=([^&]*)/)[1]);
    remAlt = true;
   }
   
   if (newLoc !== dLoc) { //redir
    var newURL = location.href.replace(/(^[\s\S]*?\&page\=)[^&]*([\s\S]*$)/, '$1'+ encodeURIComponent(newLoc) + '$2');
    newURL = newURL.replace(/&rd\=[^&]*/, ''); //strip old redirect header.
    if (remAlt) {
     location.href.replace(/&alt\=[^&]*/, '');
    }
    location = newURL + '&rd=' + rd;
   }
  }
 }
 var sa = html.match(/<table class=\"bandeau-voir\"[^>]*>[\s\S]*?(<span title="Variantes typographiques">[\s\S]*?[\s\S]*?)<\/td>[\s\S]*?<\/table>/)
 if(sa && sa[1]) {
  document.getElementById('see-also').innerHTML = ' (' + sa[1].replace(/<a[^>]*><img[^>]*\/><\/a>/, '') + ')' ;
 } 
 document.getElementById('more-link').href = pageURL;
}

 /*]]>*/
 </script>
</head>
 <body onload='setup()'>
 <div id='container'>
 <div id='word-list'><xsl:apply-templates select='api/error'/></div>
 <div><a id='more-link'>«lire la suite»</a> <span id='see-also'/> <small id="copyright-notice"> <xsl:copy-of select="$copyright"/></small></div>
 </div>
 <div id='src' style='display:None'>
  <xsl:value-of select='api/parse/text'/>
 </div>
 </body>
 </html>

 </xsl:template>
 <xsl:template match='api/error'>
 <span id='error'><b>faute: </b> <xsl:value-of select='@info'/></span>
 </xsl:template>

 </xsl:stylesheet>