User:DeadlyPenguin/extractFirst.xsl
Appearance
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"> <xsl:output method='html'/>
<xsl:variable name="copyright"> © <a href="http://fr.wiktionary.org/wiki/"> Wiktionnaire</a>. Paru en <a href="http://creativecommons.org/licenses/by-sa/3.0/deed.fr" rel="license copyright"> CC-BY-SA 3.0 </a></xsl:variable> <xsl:template match="/"> <html> <head> <meta name="generator" content="Wiktionary Extract XSLT 1.08ish-FR-non-standard"/> <base target='_blank' href='http://fr.wiktionary.org' />
<title> Wiktionary extract</title>
<style>
#wordThisIsFor { font-weight:bold;}
a.wtif1 { color: black; text-decoration: none;}
a.wtif1:hover {text-decoration: underline;}
.disambig-see-also, .disambig-see-also-2 {display:inline;}
#container {background-color:white; padding: 0.5em; border: solid black thin;}
a.new {color: red;}
#error {color: red;font-size:larger;}
</style>
<script type='text/javascript'>
/*<![CDATA[*/
function setup () {
var createLink = '«Créer»'; // text only
var pageURL = '/w/index.php?title=' +location.search.match(/\&page\=([^&]*)/)[1];
var src = document.getElementById('src');
var display = document.getElementById('word-list');
var loc = location.search.match(/\&page\=([^&]*)/)[1];
var preferLang = location.search.match(/\&lang\=([^&]*)/);
if (preferLang) {preferLang = preferLang[1];}
if (preferLang.length > 3) { preferLang = null; }
src.normalize();
var html = src.firstChild.data;
var def = html //may be redefined later.
var rd = location.search.match(/\&rd\=([^&]*)/); //is this from redirect. + converts to numeric.
rd = rd ? (+rd[1] + 1) : 1; //redirection level.
var showWord = 0; //default to not showing. 0 = none, 1 = bold, 2 = bold link.
var showWordRaw = location.search.match(/\&showWord\=([^&]*)/);
showWordRaw = showWordRaw ? showWordRaw[1] : 'none';
if (showWordRaw !== "none") {
showWord++;
}
if (showWordRaw === "link") {
showWord++;
}
var numbDfn = location.search.match(/\&count\=([^&]*)/); //count. + converts to numeric.
numbDfn = numbDfn ? (+numbDfn[1]) : 1; //default to 1
var escWord = decodeURIComponent(loc).replace(/&/, '&').replace(/>/, '<').replace(/</, '>'); //note: wordEsc does not escape quotes. DO NOT PUT AS ATTRIBUTE VALUE
try {
//this assumes attribute order doesn't change!!!
html = html.replace(/
[\s\S]*?<\/div>/, );
if (preferLang) {
try {
//strip off all definitions before tagret lang.
var subSect = html.match(new RegExp(']*>[\\s\\S]*$'))[0];
if (subSect.match(/- [\s\S]*?
- /)) {
//if it has content
def = subSect;
}
} catch (e) { /*alert(e)*/}
}
var lang = def.match(/]*>([\s\S]*?)<\/span>/)[1];
var intro = "(" + lang + ") ";
if (showWord) intro = '<a href="' + pageURL + '" id="wordThisIsFor" class="wtif' + showWord + '" >' + escWord + "</a> " + intro ;
var definitions_matched;
//FIXME: in both cases the extraction method does not properly strip nested divs. This results in image thumbnails being left behind
if (numbDfn === 1) {
definitions_matched = def.match(/
- [\s\S]*?<\/ol>/)[0].replace(/
- ([\s\S]*?)<\/li>/);
display.innerHTML = intro + definitions_matched[1];
} else {
//this use not well supported...
definitions_matched = def.match(/
- [\s\S]*?<\/ol>/)[0].replace(/
- ([\s\S]*?)<\/li>/g);
var tmp = intro + '
- ';
for (var i = 0; i < numbDfn && i < definitions_matched.length; i++) {
tmp += definitions_matched[i];
}
display.innerHTML = tmp + '
} } catch (e) { //alert(e) //page does not exist, not well formed, these regexs suck, etc display.appendChild(document.createTextNode('Could not retrieve definition of ' + decodeURIComponent(loc) + ".")); document.getElementById('more-link').firstChild.data = createLink; if (rd < 9) { //arbitrary to prevent infinite loops //make sure don't have loops. var newLoc; //this should not be urlEncoded. var remAlt = false; var dLoc = decodeURIComponent(loc);newLoc = dLoc.charAt(0).toLowerCase() + dLoc.substring(1, loc.length); //try some other redirections.
if (newLoc === dLoc && dLoc.charAt(1) === "'") newLoc = dLoc.substring(2,dLoc.length); //for j'<some verb starting w/ vowel>
if (newLoc === dLoc) newLoc = dLoc.toLowerCase(); if (newLoc === dLoc && location.search.match(/\&alt\=([^&]*)/)) { newLoc = decodeURIComponent(location.search.match(/\&alt\=([^&]*)/)[1]); remAlt = true; } if (newLoc !== dLoc) { //redir var newURL = location.href.replace(/(^[\s\S]*?\&page\=)[^&]*([\s\S]*$)/, '$1'+ encodeURIComponent(newLoc) + '$2'); newURL = newURL.replace(/&rd\=[^&]*/, ); //strip old redirect header. if (remAlt) { location.href.replace(/&alt\=[^&]*/, ); } location = newURL + '&rd=' + rd; } } }var sa = html.match(/]*>[\s\S]*?([\s\S]*?[\s\S]*?)<\/td>[\s\S]*?<\/table>/) if(sa && sa[1]) { document.getElementById('see-also').innerHTML = ' (' + sa[1].replace(/<a[^>]*><img[^>]*\/><\/a>/, ) + ')' ; } document.getElementById('more-link').href = pageURL; } /*]]>*/ </script> </head> <body onload='setup()'><xsl:apply-templates select='api/error'/><a id='more-link'>«lire la suite»</a> <xsl:copy-of select="$copyright"/><xsl:value-of select='api/parse/text'/>
</body> </html>
</xsl:template> <xsl:template match='api/error'> faute: <xsl:value-of select='@info'/> </xsl:template></xsl:stylesheet>
- [\s\S]*?<\/dl>/g, ).replace(/<div[^>]*>[\s\S]*?<\/div>/g, ).replace(/<\/div>/g, ).replace(/
- [\s\S]*?<\/ul>/g, ).replace(/<a href="(#[^"]*)">/g, '<a href="' + pageURL + '$1">').match(/
- ([\s\S]*?)<\/li>/g);
var tmp = intro + '
- [\s\S]*?<\/dl>/g, ).replace(/<div[^>]*>[\s\S]*?<\/div>/g, ).replace(/<\/div>/g, ).replace(/
- [\s\S]*?<\/ul>/g, ).replace(/<a href="(#[^"]*)">/g, '<a href="' + pageURL + '$1">').match(/
- ([\s\S]*?)<\/li>/);
display.innerHTML = intro + definitions_matched[1];
} else {
//this use not well supported...
definitions_matched = def.match(/