Jump to content

User:Proteins/striparticlelinks.js

From Wikipedia, the free encyclopedia
This is an old revision of this page, as edited by Proteins (talk | contribs) at 19:31, 20 October 2008 (more careful with variable declarations; new counter for images). The present address (URL) is a permanent link to this revision, which may differ significantly from the current revision.
Note: After saving, you have to bypass your browser's cache to see the changes. Google Chrome, Firefox, Microsoft Edge and Safari: Hold down the ⇧ Shift key and click the Reload toolbar button. For details and instructions about other browsers, see Wikipedia:Bypass your cache.
//<pre>
// Strip document hyperlinks (esp. wikilinks), leaving only their text; useful for FireVox screen reader
// Also fix bug 11555 (order of section title and edit link) and double caption

function stripHyperlinks() {
	var alert_string = "";

	var on_main_page = false;
	var delete_line_breaks_in_mp_topbanner = false;

	var strip_hyperlinks = true; // turn off to control stripping in some sections
	var within_closing_section = false; // determine when we near the end of the article
	var force_hyperlink_deletion = false;

	var temp_hyperlink;
	var temp_hyperlink_text;

	var temp_anchor_name;

	var hyperlinks;
	var num_hyperlinks = 0;
	var hyperlink_index = 0;
	var hyperlink_counter = 0;
	var num_hyperlinks_removed = 0;

	var num_redlinks = 0;
	var num_redlinks_removed = 0;
	var parent_node;
	var element_node;
	var replacement_node;
	var grandparent_node;
	var next_sibling_node;
	var prev_sibling_node;
	var greatgrandparent_node;
	var greatgreatgrandparent_node;

	var child_node;
	var num_child_nodes = 0;
	var child_node_index = 0;

	var headers;
	var temp_header;
	var num_headers = 0;
	var header_index = 0;
	var mw_headline_node;
	var editsection_node;
	var num_header_swaps = 0;
	var total_num_header_swaps = 0;

	var num_header_tag_strings = 0;
	var header_tag_string_index = 0;
	var header_tag_strings = [ "H2", "H3", "H4", "H5" ];

	var temp_image;
	var num_images = 0;
	var num_pixels = 0;
	var alt_string = "";
	var image_index = 0;
	var image_counter = 0;
 	var num_uncaptioned_images = 0;

// Check whether we're on the Main Page
	on_main_page = false;
	if (document.getElementById("mp-topbanner")) {
		on_main_page = true;
//		window.alert("We're reading the Main Page.");

// Try to remove two annoying linebeaks, per Graham87
		if (delete_line_breaks_in_mp_topbanner == true) {
			next_sibling_node = document.getElementById('articlecount');
			element_node = next_sibling_node.previousSibling;
			prev_sibling_node = element_node.previousSibling;
			parent_node = next_sibling_code.parentNode;

//There no document subtree, just the text in two subsequent DIV's
			child_node = document.createTextNode(element_node.innerHTML);
			prev_sibling_node.appendChild(child_node);

			child_node = document.createTextNode(next_sibling_node.innerHTML);
			prev_sibling_node.appendChild(child_node);

			parent_node.removeChild(element_node);
			parent_node.removeChild(next_sibling_node);
		}
	} // closes check whether we're on the Main Page

// Fix bug 11555 for screen readers: swap order of "editsection" and "mw-headline" nodes in headings
	alert_string = "";
	total_num_header_swaps = 0;
	num_header_tag_strings = header_tag_strings.length;
	for (header_tag_string_index = 0; header_tag_string_index < num_header_tag_strings; header_tag_string_index++) { 
		headers = document.getElementsByTagName(header_tag_strings[header_tag_string_index]);
		num_headers = headers.length;
		num_header_swaps = 0;
		for (header_index=1; header_index<num_headers; header_index++) {
			temp_header = headers[header_index];

			editsection_node = null;
			mw_headline_node = null;
			num_child_nodes = temp_header.childNodes.length;
			for (child_node_index = 0; child_node_index < num_child_nodes; child_node_index++) {
				child_node = temp_header.childNodes[child_node_index];
				if (child_node.className == "editsection") {
					editsection_node = child_node;
				} else if (child_node.className == "mw-headline") {
					mw_headline_node = child_node;
				}
			}
			if ((editsection_node != null) && (mw_headline_node != null)) {
				temp_header.insertBefore(mw_headline_node, editsection_node);
				num_header_swaps++;
			}
		} // closes loop over headers of that type in document
		total_num_header_swaps += num_header_swaps;
//		alert_string += "Swapped nodes in " + num_header_swaps + " " + header_tag_strings[header_tag_string_index] + " headers.\n";
	} // closes loop over different types of headers
// Acknowledgment
	if (total_num_header_swaps == 1) {
		alert_string += "Swapped text and edit link in one header.\n";
	} else {
		alert_string += "Swapped text and edit link in " + total_num_header_swaps + " headers.\n";
	}


// Fix double reading of image captions
	image_counter = 0;
 	num_uncaptioned_images = 0;	
 	num_images = document.images.length;
	for (image_index=0; image_index<num_images; image_index++) {
		alt_string = "";
		temp_image = document.images[image_index];

		num_pixels = temp_image.width * temp_image.height;

		if (num_pixels > 5000) { 
			image_counter++;
			alt_string = "Image " + image_counter + ": ";
			temp_image.alt = alt_string + temp_image.alt; // preface image with number
		}
/*
		if (temp_image.className == "thumbimage") {
			if (temp_image.alt) {
				num_uncaptioned_images++;
				alt_string = "Image " + num_uncaptioned_images + ": ";
				temp_image.alt = alt_string + temp_image.alt; // preface image with number
			}
		} else if (num_pixels > 5000) { // uncaptioned infobox images
			num_uncaptioned_images++;

		}
*/
	}
// Acknowledgment
	if (image_counter == 1) {
		alert_string += "Modified ALT text of one image.\n";
	} else {
		alert_string += "Modified ALT text of " + image_counter + " images.\n";
	}


// Proceed to main work of the script: eliminating hyperlinks
	hyperlinks = document.getElementById("bodyContent").getElementsByTagName("a");

	num_redlinks = 0;
	num_redlinks_removed = 0;
	within_closing_section = false;
	num_hyperlinks = hyperlinks.length;
	while (hyperlink_counter<num_hyperlinks) {
		temp_hyperlink = hyperlinks[hyperlink_index];
		hyperlink_counter++;

// Count the redlinks
		if (temp_hyperlink.className == "new") { num_redlinks++; }

// Determine whether we've reached the end of the article
		if ((temp_hyperlink.name) && (!within_closing_section)) {
			temp_anchor_name = temp_hyperlink.name;

			temp_anchor_name = temp_anchor_name.replace(/:$/ig,""); // eliminate colons at end
			temp_anchor_name = temp_anchor_name.replace(/s$/ig,""); // eliminate plurals at end
			temp_anchor_name = temp_anchor_name.replace(/See_also/ig,"");
			temp_anchor_name = temp_anchor_name.replace(/Related_topic/ig,"");
			temp_anchor_name = temp_anchor_name.replace(/Related_article/ig,"");
			temp_anchor_name = temp_anchor_name.replace(/Further_reading/ig,"");
			temp_anchor_name = temp_anchor_name.replace(/External_link/ig,"");
			temp_anchor_name = temp_anchor_name.replace(/Footnote/ig,"");
			temp_anchor_name = temp_anchor_name.replace(/Note/ig,"");
			temp_anchor_name = temp_anchor_name.replace(/Reference/ig,"");
			temp_anchor_name = temp_anchor_name.replace(/Citation/ig,"");
			temp_anchor_name = temp_anchor_name.replace(/Source/ig,"");
			temp_anchor_name = temp_anchor_name.replace(/Link/ig,"");
			temp_anchor_name = temp_anchor_name.replace(/s([_\s]+)and([_\s]+)/ig,"");
			temp_anchor_name = temp_anchor_name.replace(/([_\s]+)and([_\s]+)/ig,"");
			temp_anchor_name = temp_anchor_name.replace(/([_\s]+)/ig,"");
			if (temp_anchor_name == "") { 
				within_closing_section = true;
//				window.alert("The end of the article begins with section \"" + temp_hyperlink.name + "\"\n");
			}
		} // closes check whether we've reached the end of the article

// allow some sections to be skipped

		if (temp_hyperlink.name == "See_also") { 
			strip_hyperlinks = false; 
		} else if (temp_hyperlink.name == "Related_topics") { 
			strip_hyperlinks = false; 
		} else if (temp_hyperlink.name == "Related_articles") {
			strip_hyperlinks = false; 
		} else if (temp_hyperlink.name) {
			strip_hyperlinks = true; 
		}
//		if ((strip_hyperlinks == false) && (temp_hyperlink.className != "new")) { continue; }


// criteria for keeping some links
		if (!temp_hyperlink.title) { hyperlink_index++; continue; } // replace only wikilinks?
		if (temp_hyperlink.getAttribute("accesskey")) { hyperlink_index++; continue; } // avoid command links
		if (temp_hyperlink.className == "image") { hyperlink_index++; continue; } // keep images
		if (temp_hyperlink.className == "internal") { hyperlink_index++; continue; } // keep Enlarge buttons
		if (temp_hyperlink.className == "external text") { hyperlink_index++; continue; } // keep geotags, etc.
//		if ((on_main_page) && (temp_hyperlink.className == "extiw")) { continue; } // interwiki links at bottom


// force the deletion of some types of links 
		force_hyperlink_deletion = false;
/*
		if (temp_hyperlink.className == "new") {
			force_hyperlink_deletion = true;
		}
*/

// check ancestor links against criteria to keep other types of links
		if (!force_hyperlink_deletion) {
			parent_node = temp_hyperlink.parentNode;
			grandparent_node = parent_node.parentNode;
			greatgrandparent_node = grandparent_node.parentNode;
			greatgreatgrandparent_node = greatgrandparent_node.parentNode;

//Save all bold links on the Main Page
			if ((on_main_page) && (parent_node.nodeName == "B")) { hyperlink_index++; continue; }

// Save specific types of navigational links on the Main Page
// Save links in the mp-strapline
			if ((on_main_page) && ((greatgreatgrandparent_node.id == "mp-strapline") || (greatgreatgrandparent_node.parentNode.id == "mp-strapline"))) { hyperlink_index++; continue; }

// Save "Recently featured:" links: most other parts use DIV; this section uses P as the parent
			if ((on_main_page) && (parent_node.nodeName == "P")) {

				next_sibling_node = parent_node.nextSibling;
				if ((next_sibling_node) && (next_sibling_node.nextSibling)) {
					next_sibling_node = next_sibling_node.nextSibling;
					if ((next_sibling_node.nodeName == "DIV") && (next_sibling_node.className = "noprint")) { hyperlink_index++; continue; }
				}
/*
				diagnostic_string = temp_hyperlink.innerHTML;
				diagnostic_string += "\nParent node: " + parent_node.nodeName;
				if (parent_node.className) { diagnostic_string += "Class: " + parent_node.className; }
				diagnostic_string += "\nSibling node: " + next_sibling_node.nodeName;
				if (next_sibling_node.className) { diagnostic_string += "Class: " + next_sibling_node.className; }
				diagnostic_string += "\nGrandparent node: " + grandparent_node.nodeName;
				if (grandparent_node.className) { diagnostic_string += "Class: " + grandparent_node.className; }
				window.alert(diagnostic_string);
*/
			}
//			if ((on_main_page) && (parent_node.nodeName == "P") && (grandparent_node.nodeName != "TD")) { hyperlink_index++; continue; }
//			if ((on_main_page) && (parent_node.nodeName == "P") && (grandparent_node.nodeName != "TD") && (greatgrandparent_node.nodeName != "TD") && (greatgreatgrandparent_node.nodeName != "TD")) { hyperlink_index++; continue; }

// Imperfect solutions:
// if on Main Page and parent_node firstChild text equals "Recently featured: ": Language-specific is bad
// if on Main Page and nextSibling of parent is DIV with align=right and className=noprint and prevSibling has id mp-tfa
//			next_sibling_node = parent_node.nextSibling;
//			prev_sibling_node = parent_node.previousSibling;
//FAILED		if ((on_main_page) && (parent_node.nodeName == "P") && (next_sibling_node.nodeName == "DIV") && (next_sibling_node.className == "noprint")) { hyperlink_index++; continue; }
//FAILED		if ((on_main_page) && (parent_node.nodeName == "P") && (next_sibling_node.nodeName == "DIV") && (next_sibling_node.className == "noprint") && (grandparent_node.nodeName == "DIV")) { hyperlink_index++; continue; }

// Links that should be kept:
// keep links within most lists per Graham87's suggestion, but not References and Notes; allow anchor to be in italics
			if (((parent_node.nodeName == "LI") || (grandparent_node.nodeName == "LI")) && (!on_main_page) && (grandparent_node.className != "references") && (greatgrandparent_node.className != "references-small")  && (temp_hyperlink.className != "new")) {hyperlink_index++; continue; } 

// keep section edit buttons
			if (parent_node.className == "editsection") { hyperlink_index++; continue; }

// keep sidebar buttons 
			if (greatgrandparent_node.className == "pBody") { hyperlink_index++; continue; } 

// keep category links
			if ((greatgrandparent_node.className == "catlinks") || (grandparent_node.className == "catlinks")) { hyperlink_index++; continue; } 

// keep disambiguations
			if ((parent_node.className == "dablink") || (grandparent_node.className == "dablink") || (greatgrandparent_node.className == "dablink")) { hyperlink_index++; continue; } 

// keep "Main article" links
			if (grandparent_node.className == "noprint relarticle mainarticle") { hyperlink_index++; continue; } 

// keep "Further details" links
			if ((grandparent_node.className == "boilerplate seealso") || (grandparent_node.className == "boilerplate further")){ hyperlink_index++; continue; } 

// keep protected and semi-protected icons
			if (grandparent_node.className == "metadata plainlinks") { hyperlink_index++; continue; } 

// keep links in sound samples
			if ((parent_node.className == "medialist listenlist") || (grandparent_node.className == "medialist listenlist") || (greatgrandparent_node.className == "medialist listenlist")) { hyperlink_index++; continue; } 
		} // closes check for forced deletion of hyperlink

// Old technique for replacing link; fails for italicized text, and is not general
//		temp_hyperlink_text = document.createTextNode(temp_hyperlink.innerHTML);
//		parent_node.replaceChild(temp_hyperlink_text, temp_hyperlink);

// Better technique for replacing links: graft subtree back into the document

		num_child_nodes = temp_hyperlink.childNodes.length;
		for (child_node_index = 0; child_node_index < num_child_nodes; child_node_index++) {
			child_node = temp_hyperlink.childNodes[0];
			parent_node.insertBefore(child_node, temp_hyperlink);
		}
		parent_node.removeChild(temp_hyperlink);
		num_hyperlinks_removed++;

// Count the redlinks removed
		if (temp_hyperlink.className == "new") { num_redlinks_removed++; }

// Merge blocks of text that are adjacent in the document tree, prevent screen reader pauses
		parent_node.normalize();
	} // closes loop over hyperlinks

// Acknowledgment
	if (num_redlinks == 1) {
		alert_string += "Counted one redlink in the main article, removed " + num_redlinks_removed + ".\n";
	} else {
		alert_string += "Counted " + num_redlinks + " redlinks in the main article, removed " + num_redlinks_removed + ".\n";
	}
	if (num_hyperlinks_removed == 1) {
		alert_string += "Removed one hyperlink from this article.";
	} else {
		alert_string += "Removed " + num_hyperlinks_removed + " hyperlinks from this article.";
	}
	window.alert(alert_string);  // print combined alert string

} // closes function stripHyperlinks()

addOnloadHook(function () {
            addPortletLink('p-cactions', 'javascript:stripHyperlinks()', 'Strip links', 'ca-nolinks', 'Strips links for screen readers like FireVox', 's', '');
});

//</pre>