Jump to content

User:Proteins/articlestructure.js

From Wikipedia, the free encyclopedia
The printable version is no longer supported and may have rendering errors. Please update your browser bookmarks and please use the default browser print function instead.
Note: After saving, you have to bypass your browser's cache to see the changes. Google Chrome, Firefox, Microsoft Edge and Safari: Hold down the ⇧ Shift key and click the Reload toolbar button. For details and instructions about other browsers, see Wikipedia:Bypass your cache.
//<pre>
// Analyze the article's structure
// with kind respects to Dr. pda, whose excellent prosesizebytes.js script was the inspiration
//
// To use this script, add "importScript('User:Proteins/articlestructure.js');" to your monobook.js subpage 
// under your user page, as you can see at User:Proteins/monobook.js

function articleStructure() {
	var alert_string = "";
	var diagnostic_string = "";
	var read_entire_article = true;

	var show_lead_diagnostics = true;
	var show_section_diagnostics = false;
	var display_individual_words = false;

	var using_Internet_Explorer = false;

	var spaced_text = "";
	var untagged_text = "";
	var stripped_text = "";
	var unescaped_text = "";

	var anchors;
	var temp_anchor;
	var section_name = "";
	var temp_anchor_name = "";

	var num_anchors = 0;
	var anchor_index = 0;
	var anchor_level = 0;
	var prev_anchor_level = 0;
	var num_H2_anchors = 0;
	var H2_anchor_index = 0;

	var cutoff_anchor_index = 0;
	var cutoff_H2_anchor_index = 0;
	var cutoff_child_node_index = 0;
	var last_P_child_node_index = 0;
	var cutoff_element_node_index = 0;

	var num_sections = 0;
	var section_index = 0;

	var element_node;
	var num_element_nodes = 0;
	var element_node_index = 0;

	var temp_node_name = "";

	var parent_node;
	var grandparent_node;
	var greatgrandparent_node;

	var sibling_node;
	var next_sibling_node;

	var child_node;
	var child_nodes;
	var prev_child_node;
	var num_child_nodes = 0;
	var child_node_index = 0;
	var child_node_name = "";
	var num_prose_counted_nodes = 0;

	var grandchild_node;
	var grandchild_nodes;
	var num_grandchild_nodes = 0;
	var grandchild_node_index = 0;

	var path_names;
	var file_name = "";

	var num_characters = 0;
	var del_num_characters = 0;
	var temp_num_characters = 0;

	var temp_word = "";
	var num_words = 0;
	var word_count = 0;
	var word_index = 0;
	var nonempty_word_index = 0;
	var tentative_num_words = 0;

	var num_spaces = 0;
	var paragraph_count = 0;
	var list_item_count = 0;
	var prose_size_bytes = 0;
	var total_word_count = 0;
	var total_paragraph_count = 0;
	var total_list_item_count = 0;
	var total_prose_size_bytes = 0;
	var section_word_count = new Array();
	var section_paragraph_count = new Array();
	var section_list_item_count = new Array();
	var section_prose_size_bytes = new Array();

	var word_count_string = "";
	var paragraph_count_string = "";
	var list_item_count_string = "";
	var prose_size_bytes_string = "";

	var temp_paragraph;
	var text_paragraphs;
	var num_paragraphs = 0;
	var paragraph_index = 0;

	var temp_list_item;
	var text_list_items;
	var num_list_items = 0;
	var list_item_index = 0;

	var temp_image;
	var num_pixels = 0;
	var image_index = 0;
	var image_counter = 0;
	var num_raw_images = 0;
	var num_nonicon_images = 0;

	var num_anchors = 0;
	var num_raw_links = 0;
	var num_raw_tables = 0;
	var num_raw_references = 0;


// check for Internet Explorer browser 

	using_Internet_Explorer = false;
	if (navigator.userAgent.indexOf("MSIE") > -1) { 
		using_Internet_Explorer = true;
//		alert_string = "This script works correctly in every browser — except Internet Explorer.  Please be patient!"
//		window.alert(alert_string);	
	}

// Find the cutoff H2 anchor index, where we stop counting things
	alert_string = "";
	num_H2_anchors = 0;
	section_name = "lead section";
	prev_anchor_level = 1;  //begin at the H1 heading
	read_entire_article = true;
	anchors = document.anchors;
	num_anchors = anchors.length;
	for (anchor_index=1; anchor_index<num_anchors; anchor_index++) {
		temp_anchor = anchors[anchor_index];

		parent_node = temp_anchor.parentNode; 
		if (!parent_node) { continue; }

		sibling_node = parent_node.nextSibling;
		if (!sibling_node) { continue; }

// Check headings for jumps upwards in heading level
		anchor_level = 0;
		if (sibling_node.nodeName == "H1") {
			alert_string += "  WARNING: Illegal H1 heading in this section\n";
		} else if (sibling_node.nodeName == "H2") {
			anchor_level = 2;	
		} else if (sibling_node.nodeName == "H3") {
			anchor_level = 3;	
		} else if (sibling_node.nodeName == "H4") {
			anchor_level = 4;	
		} else if (sibling_node.nodeName == "H5") {
			anchor_level = 5;	
		}  else { 
			next_sibling_node = sibling_node.nextSibling;
			if (!next_sibling_node) { continue; }

// Check headings for jumps upwards in heading level
			if (next_sibling_node.nodeName == "H1") {
				alert_string += "  WARNING: Illegal H1 heading in this section\n";
			} else if (next_sibling_node.nodeName == "H2") {
				anchor_level = 2;	
			} else if (next_sibling_node.nodeName == "H3") {
				anchor_level = 3;	
			} else if (next_sibling_node.nodeName == "H4") {
				anchor_level = 4;	
			} else if (next_sibling_node.nodeName == "H5") {
				anchor_level = 5;	
			}
		} // closes assignment of the anchor level, if any 
 	
		if (((anchor_level - prev_anchor_level) > 1) && (prev_anchor_level != 0)) {
			if (num_H2_anchors == 0) {
				alert_string += "  WARNING: H" + prev_anchor_level + " to H" + anchor_level + " jump in the lead\n";
			} else { 
				alert_string += "  WARNING: H" + prev_anchor_level + " to H" + anchor_level + " jump in \"" + section_name.replace(/(_+)/ig, " ") + "\"\n";
			}
		}
		if (anchor_level > 0) { prev_anchor_level = anchor_level; }

//Check major section headings for closing sections
		if (anchor_level == 2) { 
			num_H2_anchors++;
			section_name = temp_anchor.name;
			temp_anchor_name = temp_anchor.name;

			alert_string += "Section " + num_H2_anchors + " : " + section_name.replace(/(_+)/ig, " ") + "\n";
//			alert_string += "Section " + num_H2_anchors + " : " + section_name.replace(/(_+)/ig, " ") + " " + temp_anchor.parentNode.nodeName + " " + sibling_node.nodeName + "\n";

			temp_anchor_name = temp_anchor_name.replace(/:$/ig,""); // eliminate colons at end
			temp_anchor_name = temp_anchor_name.replace(/s$/ig,""); // eliminate plurals at end
			temp_anchor_name = temp_anchor_name.replace(/See_also/ig,"");
			temp_anchor_name = temp_anchor_name.replace(/Related_topic/ig,"");
			temp_anchor_name = temp_anchor_name.replace(/Related_article/ig,"");
			temp_anchor_name = temp_anchor_name.replace(/Further_reading/ig,"");
			temp_anchor_name = temp_anchor_name.replace(/External_link/ig,"");
			temp_anchor_name = temp_anchor_name.replace(/Footnote/ig,"");
			temp_anchor_name = temp_anchor_name.replace(/Note/ig,"");
			temp_anchor_name = temp_anchor_name.replace(/Reference/ig,"");
			temp_anchor_name = temp_anchor_name.replace(/Citation/ig,"");
			temp_anchor_name = temp_anchor_name.replace(/Source/ig,"");
			temp_anchor_name = temp_anchor_name.replace(/Link/ig,"");
			temp_anchor_name = temp_anchor_name.replace(/s([_\s]+)and([_\s]+)/ig,"");
			temp_anchor_name = temp_anchor_name.replace(/([_\s]+)and([_\s]+)/ig,"");
			temp_anchor_name = temp_anchor_name.replace(/([_\s]+)/ig,"");
			if (temp_anchor_name == "") { break; }

//			diagnostic_string = "Section " + num_H2_anchors + " : " + temp_anchor_name + " L: " + temp_anchor_name.length;
//			window.alert(diagnostic_string);

		} // closes check for H2 anchor
	} // closes loop over the anchors
	cutoff_anchor_index = anchor_index;
	cutoff_H2_anchor_index = num_H2_anchors;
	if (cutoff_anchor_index < num_anchors) {
		read_entire_article = false;
		alert_string += "\nProse counting will stop before the \"" + temp_anchor.name.replace(/(_+)/ig, " ") + "\" section.\n";
	} else {
		read_entire_article = true;
		alert_string += "\nProse counting will cover the entire article.\n";
	}
	window.alert(alert_string);


// Count child and element nodes 
	alert_string = "";
	num_element_nodes = 0;
	child_nodes = document.getElementById("bodyContent").childNodes;
	num_child_nodes = child_nodes.length;
//	if (num_child_nodes > 40) { num_child_nodes = 40;} // truncate loop for testing

	for (child_node_index=0; child_node_index < num_child_nodes; child_node_index++) {
		child_node = child_nodes[child_node_index];
		if (child_node.nodeType != 1) {
//			alert_string += "Child node " + child_node_index + " : " + child_node.nodeName + "\n"; 
			continue; 
		} // examine only Element nodes
		num_element_nodes++;
//		alert_string += "Element node " + num_element_nodes + " : " + child_node.nodeName + "\n"; 
	} // closes loop counting the element nodes
//	window.alert(alert_string);

// Determine the corresponding childNode index cutoff
	alert_string = "";
	if (read_entire_article == true) {
		cutoff_child_node_index = num_child_nodes;
		cutoff_element_node_index = num_element_nodes;
	} else {
		H2_anchor_index = 0;
		element_node_index = 0;
		last_P_child_node_index = -1;
		last_P_element_node_index = -1;
		for (child_node_index=0; child_node_index < num_child_nodes; child_node_index++) {
			child_node = child_nodes[child_node_index];
			if (child_node.nodeType != 1) { continue; } // examine only Element nodes
			element_node_index++;

			if (child_node.nodeName == "P") {
				last_P_child_node_index = child_node_index;
				last_P_element_node_index = num_element_nodes;
			} else if (child_node.nodeName == "H2") {
				H2_anchor_index++;
				if (H2_anchor_index == cutoff_H2_anchor_index) { 
					cutoff_child_node_index = last_P_child_node_index;
					cutoff_element_node_index = last_P_element_node_index;
					break; 
				}
			}
//			alert_string += "Section " + H2_anchor_index + ", Element node " + num_element_nodes + " : " + child_node.nodeName + " " + child_node.childNodes.length + "\n";
//			if (num_element_nodes > 45) { break; } // for debugging
		} // closes loop over the childNodes of the Document
		if (last_P_child_node_index < 0) { // if no cutoff was discovered; should never happen
			cutoff_child_node_index = num_child_nodes;
			cutoff_element_node_index = num_element_nodes;
		}
	} // closes check whether to read entire article
	alert_string = "\nThe child_node_index and element_node_index cutoffs are " + cutoff_child_node_index + " and " + cutoff_element_node_index + ", respectively.\n";
//	window.alert(alert_string);


// Count the words, paragraphs and prose size bytes by section
	word_count = 0;
	paragraph_count = 0;
	list_item_count = 0;
	prose_size_bytes = 0;
	num_prose_counted_nodes = 0;

	H2_anchor_index = 0;
	for (child_node_index=0; child_node_index < cutoff_child_node_index; child_node_index++) {
		child_node = child_nodes[child_node_index];
		if (child_node.nodeType != 1) { continue; } // examine only Element nodes
		element_node_index++;

		if (child_node.nodeName == "H2") {
			section_word_count.push(word_count);
			section_paragraph_count.push(paragraph_count);
			section_list_item_count.push(list_item_count);
			section_prose_size_bytes.push(prose_size_bytes);

			H2_anchor_index++;
			word_count = 0;
			paragraph_count = 0;
			list_item_count = 0;
			prose_size_bytes = 0;
		}

// if the child node meets the criteria, add to the prose size, word and paragraph counts
		if ((child_node.nodeName == "P") || (child_node.nodeName == "PRE")) {

			untagged_text = child_node.innerHTML;
			untagged_text = untagged_text.replace(/<sup>/ig,""); // keep simple superscript text
			untagged_text = untagged_text.replace(/(<sup([^>]+)>)(.*?<\/sup>)/ig,""); // remove superscript text
			untagged_text = untagged_text.replace(/(<([^>]+)>)/ig,""); // remove remaining tags
			untagged_text = untagged_text.replace(/&gt;/ig, ">"); // convert &gt; to a single character >
			untagged_text = untagged_text.replace(/&lt;/ig, "<"); // convert &lt; to a single character <
			untagged_text = untagged_text.replace(/&amp;/ig, "&"); // convert &amp; to a single character &
			untagged_text = untagged_text.replace(/—/ig, ", "); // replace em-dashes with comma+space

			spaced_text = untagged_text.replace(/&nbsp;/ig, " ");  // convert non-breaking spaces to spaces
			spaced_text = spaced_text.replace(/&#160;/ig, " ");  // convert non-breaking spaces to spaces
			spaced_text = spaced_text.replace(/\s+/ig, " "); // convert all whitespace to a single space

//			spaced_text = filterStringForProseSizeCounting(untagged_text);
			words = spaced_text.split(' ');		
			tentative_num_words = words.length;
			if (tentative_num_words > 0) { // verify that the paragraph contributes text
				num_words = 0;
				num_characters = 0;
				for (word_index=0; word_index<tentative_num_words; word_index++) {
					temp_word = words[word_index];
					del_num_characters = temp_word.length;
					if (del_num_characters > 0) {
						num_words++;
						num_characters += del_num_characters; 
					}
				}
				if (num_words > 0) {
					paragraph_count++;
					num_prose_counted_nodes++;
					word_count += num_words;
					prose_size_bytes += num_characters;
					num_spaces = num_words - 1;
					prose_size_bytes += num_spaces; // add spaces to character count
					child_node.style.cssText = "background-color:yellow";
					
					// Code for testing output
					if ((!show_section_diagnostics) && ((!show_lead_diagnostics) || (H2_anchor_index != 0))) { 
						continue;
					}
					diagnostic_string = "";
					nonempty_word_index = 0;
					temp_num_characters = 0; 
					for (word_index=0; word_index<tentative_num_words; word_index++) {
						if ((word_index%45 == 1) && (word_index>45) && (display_individual_words)) {
							window.alert(diagnostic_string);
							diagnostic_string = "Continued from previous screen:\n\n";
						}
						temp_word = words[word_index];
						del_num_characters = temp_word.length;
						if (del_num_characters > 0) {
							nonempty_word_index++;
							temp_num_characters += del_num_characters;  
							diagnostic_string += "Section " + H2_anchor_index + ", Paragraph " + paragraph_count + ", Word " + nonempty_word_index + " : " + temp_word + " " + del_num_characters + "  " + temp_num_characters + "\n";
						}
					}
					temp_num_characters += num_spaces;
					diagnostic_string += "Added " + num_spaces + " spaces to the byte count.\n\n";
					if (display_individual_words) {
						diagnostic_string += "\nEND of paragraph " + paragraph_count + " of Section " + H2_anchor_index + ": character count = " + temp_num_characters + " total= " + prose_size_bytes + "\n"; 
						window.alert(diagnostic_string); 
					}
				} // closes check for non-empty paragraph
			} // tentative check for words
		} else if ((child_node.nodeName == "UL") || (child_node.nodeName == "OL")) { // unordered and ordered lists
			grandchild_nodes = child_node.childNodes; // not all LI elements because of possible nesting
			num_grandchild_nodes = grandchild_nodes.length;
			for (grandchild_node_index=0; grandchild_node_index<num_grandchild_nodes; grandchild_node_index++) {
				grandchild_node = grandchild_nodes[grandchild_node_index];
				if (grandchild_node.nodeName == "LI") {
					untagged_text = grandchild_node.innerHTML;
					untagged_text = untagged_text.replace(/<sup>/ig,""); // keep simple superscript text
					untagged_text = untagged_text.replace(/(<sup([^>]+)>)(.*?<\/sup>)/ig,""); // remove superscript text
					untagged_text = untagged_text.replace(/(<([^>]+)>)/ig,""); // remove remaining tags
					untagged_text = untagged_text.replace(/&gt;/ig, ">"); // convert &gt; to a single character >
					untagged_text = untagged_text.replace(/&lt;/ig, "<"); // convert &lt; to a single character <
					untagged_text = untagged_text.replace(/&amp;/ig, "&"); // convert &amp; to a single character &
					untagged_text = untagged_text.replace(/—/ig, ", "); // replace em-dashes with comma+space

					spaced_text = untagged_text.replace(/&nbsp;/ig, " ");  // convert non-breaking spaces to spaces
					spaced_text = spaced_text.replace(/&#160;/ig, " ");  // convert non-breaking spaces to spaces
					spaced_text = spaced_text.replace(/\s+/ig, " "); // convert all whitespace to a single space

					words = spaced_text.split(' ');		
					tentative_num_words = words.length;
					if (tentative_num_words > 0) { // verify that the list item contributes text
						num_words = 0;
						num_characters = 0;
						for (word_index=0; word_index<tentative_num_words; word_index++) {
							temp_word = words[word_index];
							del_num_characters = temp_word.length;
							if (del_num_characters > 0) {
								num_words++;
								num_characters += del_num_characters; 
							}
						}
						if (num_words > 0) {
							list_item_count++;
							num_prose_counted_nodes++;
							word_count += num_words;
							prose_size_bytes += num_characters;
							num_spaces = num_words - 1;
							prose_size_bytes += num_spaces; // add spaces to character count
							child_node.style.cssText = "background-color:yellow";

							// Code for testing output
							if ((!show_section_diagnostics) && ((!show_lead_diagnostics) || (H2_anchor_index != 0))) { 
								continue;
							}
							diagnostic_string = "";
							nonempty_word_index = 0; 
							temp_num_characters = 0; 
							for (word_index=0; word_index<tentative_num_words; word_index++) {
								if ((word_index%45 == 1) && (word_index>45) && (display_individual_words)) {
									window.alert(diagnostic_string);
									diagnostic_string = "Continued from previous screen:\n\n";
								}
								temp_word = words[word_index];
								del_num_characters = temp_word.length;
								if (del_num_characters > 0) {
									nonempty_word_index++;
									temp_num_characters += del_num_characters;  
									diagnostic_string += "Section " + H2_anchor_index + ", Paragraph " + paragraph_count + ", List item " + list_item_count + ", Word " + nonempty_word_index + " : " + temp_word + " " + del_num_characters + "\n";
								}
							}
							temp_num_characters += num_spaces;
							diagnostic_string += "Added " + num_spaces + " spaces to the byte count.\n\n";
							if (display_individual_words) {
								diagnostic_string += "\nEND of list item " + list_item_count + " of Section " + H2_anchor_index + ": character count = " + temp_num_characters + " total= " + prose_size_bytes + "\n"; 
								window.alert(diagnostic_string); 
							}
						} // closes check for non-empty list item
					} // tentative check for words
				} // closes check for a list item (LI) node
			} // closes loop over grandchild nodes of an ordered (OL) or unordered (UL) list
		} else if (child_node.nodeName == "DL") { // discursive lists
			grandchild_nodes = child_node.childNodes;			
			num_grandchild_nodes = grandchild_nodes.length;
			for (grandchild_node_index=0; grandchild_node_index<num_grandchild_nodes; grandchild_node_index++) {
				grandchild_node = grandchild_nodes[grandchild_node_index];
				if ((grandchild_node.nodeName == "DT") || (grandchild_node.nodeName == "DD")) {
					// Exceptions that shouldn't be counted
					if (grandchild_node.childNodes.length > 0) {
						temp_node_name = grandchild_node.childNodes[0].nodeName;
						if ((temp_node_name == "DIV") || (temp_node_name == "SPAN")) { continue; }
					}
					if (grandchild_node.childNodes.length > 1) {
						temp_node_name = grandchild_node.childNodes[1].nodeName;
						if ((temp_node_name == "DIV") || (temp_node_name == "SPAN")) { continue; }
					}

					untagged_text = grandchild_node.innerHTML;
					untagged_text = untagged_text.replace(/<sup>/ig,""); // keep simple superscript text
					untagged_text = untagged_text.replace(/(<sup([^>]+)>)(.*?<\/sup>)/ig,""); // remove superscript text
					untagged_text = untagged_text.replace(/(<([^>]+)>)/ig,""); // remove remaining tags
					untagged_text = untagged_text.replace(/&gt;/ig, ">"); // convert &gt; to a single character >
					untagged_text = untagged_text.replace(/&lt;/ig, "<"); // convert &lt; to a single character <
					untagged_text = untagged_text.replace(/&amp;/ig, "&"); // convert &amp; to a single character &
					untagged_text = untagged_text.replace(/—/ig, ", "); // replace em-dashes with comma+space

					spaced_text = untagged_text.replace(/&nbsp;/ig, " ");  // convert non-breaking spaces to spaces
					spaced_text = spaced_text.replace(/&#160;/ig, " ");  // convert non-breaking spaces to spaces
					spaced_text = spaced_text.replace(/\s+/ig, " "); // convert all whitespace to a single space

					words = spaced_text.split(' ');		
					tentative_num_words = words.length;
					if (tentative_num_words > 0) { // verify that the list item contributes text
						num_words = 0;
						num_characters = 0;
						for (word_index=0; word_index<tentative_num_words; word_index++) {
							temp_word = words[word_index];
							del_num_characters = temp_word.length;
							if (del_num_characters > 0) {
								num_words++;
								num_characters += del_num_characters; 
							}
						}
						if (num_words > 0) {
							list_item_count++;
							num_prose_counted_nodes++;
							word_count += num_words;
							prose_size_bytes += num_characters;
							num_spaces = num_words - 1;
							prose_size_bytes += num_spaces; // add spaces to character count
							child_node.style.cssText = "background-color:yellow";

							// Code for testing output
							if ((!show_section_diagnostics) && ((!show_lead_diagnostics) || (H2_anchor_index != 0))) { 
								continue;
							}
							diagnostic_string = "";
							nonempty_word_index = 0;
							temp_num_characters = 0;  
							for (word_index=0; word_index<tentative_num_words; word_index++) {
								if ((word_index%45 == 1) && (word_index>45) && (display_individual_words)) {
									window.alert(diagnostic_string);
									diagnostic_string = "Continued from previous screen:\n\n";
								}
								temp_word = words[word_index];
								del_num_characters = temp_word.length;
								if (del_num_characters > 0) {
									nonempty_word_index++;
									temp_num_characters += del_num_characters;  
									diagnostic_string += "Section " + H2_anchor_index + ", Paragraph " + paragraph_count + ", List item " + list_item_count + ", Word " + nonempty_word_index + " : " + temp_word + " " + del_num_characters + "\n";								}
							}
							temp_num_characters += num_spaces;
							diagnostic_string += "Added " + num_spaces + " spaces to the byte count.\n\n";
							if (display_individual_words) {
								diagnostic_string += "\nEND of discursive list item " + list_item_count + " of Section " + H2_anchor_index + ": character count = " + temp_num_characters + " total= " + prose_size_bytes + "\n"; 
								window.alert(diagnostic_string); 
							}
						} // closes check for non-empty list item
					} // tentative check for words
				} // closes check for a discursive list item (DT or DD) node
			} // closes loop over grandchild nodes of a discursive list DL
		} else if (child_node.nodeName == "BLOCKQUOTE") {
			grandchild_nodes = child_node.getElementsByTagName("P");			
			num_grandchild_nodes = grandchild_nodes.length;
			for (grandchild_node_index=0; grandchild_node_index<num_grandchild_nodes; grandchild_node_index++) {
				grandchild_node = grandchild_nodes[grandchild_node_index];
				if (grandchild_node.nodeName == "P") {
					untagged_text = grandchild_node.innerHTML;
					untagged_text = untagged_text.replace(/<sup>/ig,""); // keep simple superscript text
					untagged_text = untagged_text.replace(/(<sup([^>]+)>)(.*?<\/sup>)/ig,""); // remove superscript text
					untagged_text = untagged_text.replace(/(<([^>]+)>)/ig,""); // remove remaining tags
					untagged_text = untagged_text.replace(/&gt;/ig, ">"); // convert &gt; to a single character >
					untagged_text = untagged_text.replace(/&lt;/ig, "<"); // convert &lt; to a single character <
					untagged_text = untagged_text.replace(/&amp;/ig, "&"); // convert &amp; to a single character &
					untagged_text = untagged_text.replace(/—/ig, ", "); // replace em-dashes with comma+space

					spaced_text = untagged_text.replace(/&nbsp;/ig, " ");  // convert non-breaking spaces to spaces
					spaced_text = spaced_text.replace(/&#160;/ig, " ");  // convert non-breaking spaces to spaces
					spaced_text = spaced_text.replace(/\s+/ig, " "); // convert all whitespace to a single space

					words = spaced_text.split(' ');		
					tentative_num_words = words.length;
					if (tentative_num_words > 0) { // verify that the list item contributes text
						num_words = 0;
						num_characters = 0;
						for (word_index=0; word_index<tentative_num_words; word_index++) {
							temp_word = words[word_index];
							del_num_characters = temp_word.length;
							if (del_num_characters > 0) {
								num_words++;
								num_characters += del_num_characters; 
							}
						}
						if (num_words > 0) {
							// don't count blockquotes, for now
							num_prose_counted_nodes++;
							word_count += num_words;
							prose_size_bytes += num_characters;
							num_spaces = num_words - 1;
							prose_size_bytes += num_spaces; // add spaces to character count
							child_node.style.cssText = "background-color:yellow";

							// Code for testing output
							if ((!show_section_diagnostics) && ((!show_lead_diagnostics) || (H2_anchor_index != 0))) { 
								continue;
							}
							diagnostic_string = "";
							nonempty_word_index = 0;
							temp_num_characters = 0;  
							for (word_index=0; word_index<tentative_num_words; word_index++) {
								if ((word_index%45 == 1) && (word_index>45) && (display_individual_words)) {
									window.alert(diagnostic_string);
									diagnostic_string = "Continued from previous screen:\n\n";
								}
								temp_word = words[word_index];
								del_num_characters = temp_word.length;
								if (del_num_characters > 0) {
									nonempty_word_index++;
									temp_num_characters += del_num_characters;  
									diagnostic_string += "Section " + H2_anchor_index + ", Paragraph " + paragraph_count + ", Word " + nonempty_word_index + " : " + temp_word + " " + del_num_characters + "\n";
								}
							}
							temp_num_characters += num_spaces;
							diagnostic_string += "Added " + num_spaces + " spaces to the byte count.\n\n";
							if (display_individual_words) {
								diagnostic_string += "\nEND of BLOCKQUOTE in Section " + H2_anchor_index + ": character count = " + temp_num_characters + " total= " + prose_size_bytes + "\n"; 
								window.alert(diagnostic_string); 
							}
						} // closes check for non-empty list item
					} // tentative check for words
				} // closes check for a paragraph (P) node in a BLOCKQUOTE
			} // closes loop over grandchild nodes in a BLOCKQUOTE
		} else if (child_node.nodeName == "TABLE") {
			if (child_node.className != "cquote") { continue; } // count only tables that are cquotes
			grandchild_nodes = child_node.getElementsByTagName("TD");			
			num_grandchild_nodes = grandchild_nodes.length;
			for (grandchild_node_index=0; grandchild_node_index<num_grandchild_nodes; grandchild_node_index++) {
				grandchild_node = grandchild_nodes[grandchild_node_index];
				if (grandchild_node.nodeName == "TD") {
					untagged_text = grandchild_node.innerHTML;
					untagged_text = untagged_text.replace(/<sup>/ig,""); // keep simple superscript text
					untagged_text = untagged_text.replace(/(<sup([^>]+)>)(.*?<\/sup>)/ig,""); // remove superscript text
					untagged_text = untagged_text.replace(/(<([^>]+)>)/ig,""); // remove remaining tags
					untagged_text = untagged_text.replace(/&gt;/ig, ">"); // convert &gt; to a single character >
					untagged_text = untagged_text.replace(/&lt;/ig, "<"); // convert &lt; to a single character <
					untagged_text = untagged_text.replace(/&amp;/ig, "&"); // convert &amp; to a single character &
					untagged_text = untagged_text.replace(/—/ig, ", "); // replace em-dashes with comma+space

					spaced_text = untagged_text.replace(/&nbsp;/ig, " ");  // convert non-breaking spaces to spaces
					spaced_text = spaced_text.replace(/&#160;/ig, " ");  // convert non-breaking spaces to spaces
					spaced_text = spaced_text.replace(/\s+/ig, " "); // convert all whitespace to a single space

					words = spaced_text.split(' ');		
					tentative_num_words = words.length;
					if (tentative_num_words > 0) { // verify that the list item contributes text
						num_words = 0;
						num_characters = 0;
						for (word_index=0; word_index<tentative_num_words; word_index++) {
							temp_word = words[word_index];
							del_num_characters = temp_word.length;
							if (del_num_characters > 0) {
								num_words++;
								num_characters += del_num_characters; 
							}
						}
						if (num_words > 0) {
							// don't count cquotes, for now
							num_prose_counted_nodes++;
							word_count += num_words;
							prose_size_bytes += num_characters;
							num_spaces = num_words - 1;
							prose_size_bytes += num_spaces; // add spaces to character count
							child_node.style.cssText = "background-color:yellow";

							// Code for testing output
							if ((!show_section_diagnostics) && ((!show_lead_diagnostics) || (H2_anchor_index != 0))) { 
								continue;
							}
							diagnostic_string = "";
							nonempty_word_index = 0; 
							temp_num_characters = 0; 
							for (word_index=0; word_index<tentative_num_words; word_index++) {
								if ((word_index%45 == 1) && (word_index>45) && (display_individual_words)) {
									window.alert(diagnostic_string);
									diagnostic_string = "Continued from previous screen:\n\n";
								}
								temp_word = words[word_index];
								del_num_characters = temp_word.length;
								if (del_num_characters > 0) {
									nonempty_word_index++;
									temp_num_characters += del_num_characters;  
									diagnostic_string += "Section " + H2_anchor_index + ", Paragraph " + paragraph_count + ", Word " + nonempty_word_index + " : " + temp_word + " " + del_num_characters + "\n";
								}
							}
							temp_num_characters += num_spaces;
							diagnostic_string += "Added " + num_spaces + " spaces to the byte count.\n\n";
							if (display_individual_words) {
								diagnostic_string += "\nEND of CQUOTE paragraph in Section " + H2_anchor_index + ": character count = " + temp_num_characters + " total= " + prose_size_bytes + "\n"; 
								window.alert(diagnostic_string); 
							}
						} // closes check for non-empty list item
					} // tentative check for words
				} // closes check for a paragraph (P) node in a CQUOTE
			} // closes loop over grandchild nodes in a CQUOTE
		} else if (child_node.nodeName == "DIV") { // Poems
			if (child_node.className != "poem") { continue; } // allow only poem DIV's
			grandchild_nodes = child_node.getElementsByTagName("P");			
			num_grandchild_nodes = grandchild_nodes.length;
			for (grandchild_node_index=0; grandchild_node_index<num_grandchild_nodes; grandchild_node_index++) {
				grandchild_node = grandchild_nodes[grandchild_node_index];
				if (grandchild_node.nodeName == "P") {
					untagged_text = grandchild_node.innerHTML;
					untagged_text = untagged_text.replace(/<sup>/ig,""); // keep simple superscript text
					untagged_text = untagged_text.replace(/(<sup([^>]+)>)(.*?<\/sup>)/ig,""); // remove superscript text
					untagged_text = untagged_text.replace(/(<([^>]+)>)/ig,""); // remove remaining tags
					untagged_text = untagged_text.replace(/&gt;/ig, ">"); // convert &gt; to a single character >
					untagged_text = untagged_text.replace(/&lt;/ig, "<"); // convert &lt; to a single character <
					untagged_text = untagged_text.replace(/&amp;/ig, "&"); // convert &amp; to a single character &
					untagged_text = untagged_text.replace(/—/ig, ", "); // replace em-dashes with comma+space

					spaced_text = untagged_text.replace(/&nbsp;/ig, " ");  // convert non-breaking spaces to spaces
					spaced_text = spaced_text.replace(/&#160;/ig, " ");  // convert non-breaking spaces to spaces
					spaced_text = spaced_text.replace(/\s+/ig, " "); // convert all whitespace to a single space

					words = spaced_text.split(' ');		
					tentative_num_words = words.length;
					if (tentative_num_words > 0) { // verify that the list item contributes text
						num_words = 0;
						num_characters = 0;
						for (word_index=0; word_index<tentative_num_words; word_index++) {
							temp_word = words[word_index];
							del_num_characters = temp_word.length;
							if (del_num_characters > 0) {
								num_words++;
								num_characters += del_num_characters; 
							}
						}
						if (num_words > 0) {
							// don't count blockquotes, for now
							num_prose_counted_nodes++;
							word_count += num_words;
							prose_size_bytes += num_characters;
							num_spaces = num_words - 1;
							prose_size_bytes += num_spaces; // add spaces to character count
							child_node.style.cssText = "background-color:yellow";

							// Code for testing output
							if ((!show_section_diagnostics) && ((!show_lead_diagnostics) || (H2_anchor_index != 0))) { 
								continue;
							}
							diagnostic_string = "";
							nonempty_word_index = 0; 
							temp_num_characters = 0; 
							for (word_index=0; word_index<tentative_num_words; word_index++) {
								if ((word_index%45 == 1) && (word_index>45) && (display_individual_words)) {
									window.alert(diagnostic_string);
									diagnostic_string = "Continued from previous screen:\n\n";
								}
								temp_word = words[word_index];
								del_num_characters = temp_word.length;
								if (del_num_characters > 0) {
									nonempty_word_index++;
									temp_num_characters += del_num_characters;  
									diagnostic_string += "Section " + H2_anchor_index + ", Paragraph " + paragraph_count + ", Word " + nonempty_word_index + " : " + temp_word + " " + del_num_characters + "\n";
								}
							}
							temp_num_characters += num_spaces;
							diagnostic_string += "Added " + num_spaces + " spaces to the byte count.\n\n";
							if (display_individual_words) {
								diagnostic_string += "\nEND of <poem> in Section " + H2_anchor_index + ": character count = " + temp_num_characters + " total= " + prose_size_bytes + "\n"; 
								window.alert(diagnostic_string); 
							}
						} // closes check for non-empty list item
					} // tentative check for words
				} // closes check for a paragraph (P) node in a poem
			} // closes loop over grandchild nodes in a poem
		} // closes check for appropriate elements
	} // closes loop over the child nodes
	section_word_count.push(word_count);
	section_paragraph_count.push(paragraph_count);
	section_list_item_count.push(list_item_count);
	section_prose_size_bytes.push(prose_size_bytes);


// Output the various counts

	word_count_string = " word";
	paragraph_count_string = " paragraph";
	list_item_count_string = " list item";
	prose_size_bytes_string = " byte";
	if (section_word_count[0] != 1) { word_count_string += "s";}
	if (section_paragraph_count[0] != 1) { paragraph_count_string += "s";}
	if (section_list_item_count[0] != 1) { list_item_count_string += "s";}
	if (section_prose_size_bytes[0] != 1) { prose_size_bytes_string += "s";}

	alert_string = "Lead section: " + section_paragraph_count[0] + paragraph_count_string + ", " + section_list_item_count[0] + list_item_count_string + ", " + section_word_count[0] + word_count_string + ", " + section_prose_size_bytes[0] + prose_size_bytes_string + "\n\n";

	total_word_count = section_word_count[0];
	total_paragraph_count = section_paragraph_count[0];
	total_list_item_count = section_list_item_count[0];
	total_prose_size_bytes = section_prose_size_bytes[0];

	num_sections = section_word_count.length;
	for (section_index=1; section_index<num_sections; section_index++) {
		total_word_count += section_word_count[section_index];
		total_paragraph_count += section_paragraph_count[section_index];
		total_list_item_count += section_list_item_count[section_index];
		total_prose_size_bytes += section_prose_size_bytes[section_index];
	
		word_count_string = " word";
		paragraph_count_string = " paragraph";
		list_item_count_string = " list item";
		prose_size_bytes_string = " byte";
		if (section_word_count[section_index] != 1) { word_count_string += "s";}
		if (section_paragraph_count[section_index] != 1) { paragraph_count_string += "s";}
		if (section_list_item_count[section_index] != 1) { list_item_count_string += "s";}
		if (section_prose_size_bytes[section_index] != 1) { prose_size_bytes_string += "s";}

		alert_string += "Section " + section_index + " : " + section_paragraph_count[section_index] + paragraph_count_string + ", " + section_list_item_count[section_index] + list_item_count_string + ", " + section_word_count[section_index] + word_count_string + ", " + section_prose_size_bytes[section_index] + prose_size_bytes_string + "\n";
	}
	if (num_sections>1) {alert_string += "\n";} // Make space for the totals

	word_count_string = " word";
	paragraph_count_string = " paragraph";
	list_item_count_string = " list item";
	prose_size_bytes_string = " byte";
	if (total_word_count != 1) { word_count_string += "s";}
	if (total_paragraph_count != 1) { paragraph_count_string += "s";}
	if (total_list_item_count != 1) { list_item_count_string += "s";}
	if (total_prose_size_bytes != 1) { prose_size_bytes_string += "s";}
	alert_string += "Totals: " + total_paragraph_count + paragraph_count_string + ", " + total_list_item_count + list_item_count_string + ", " + total_word_count + word_count_string + ", " + total_prose_size_bytes + prose_size_bytes_string + "\n";
	window.alert(alert_string);


// Count the article images
	num_nonicon_images = 0;
	num_raw_images = document.images.length;
	alert_string = "This document has " + num_raw_images + " images.\n";
	for (image_index=0; image_index<num_raw_images; image_index++) {
		temp_image = document.images[image_index];
		parent_node = temp_image.parentNode;
		grandparent_node = parent_node.parentNode;
		greatgrandparent_node = grandparent_node.parentNode;

		num_pixels = temp_image.width * temp_image.height;
		if (temp_image.src.match(/Replace_this_image_male\.svg/)) { continue; }
		if (temp_image.src.match(/Replace_this_image_female\.svg/)) { continue; }
		if (num_pixels > 5000) { num_nonicon_images++; }
	}
	if (num_nonicon_images == 1) {
		alert_string = "This document has 1 image with more than 5000 pixels.\n\n";
	} else {
		alert_string = "This document has " + num_nonicon_images + " images with more than 5000 pixels.\n\n";
	}
	image_counter = 0;
	for (image_index=0; image_index<num_raw_images; image_index++) {
		temp_image = document.images[image_index];
		parent_node = temp_image.parentNode;
		grandparent_node = parent_node.parentNode;
		greatgrandparent_node = grandparent_node.parentNode;

		num_pixels = temp_image.width * temp_image.height;
		if (temp_image.src.match(/Replace_this_image_male\.svg/)) { continue; }
		if (temp_image.src.match(/Replace_this_image_female\.svg/)) { continue; }
		if (num_pixels < 5001) { continue; }

		image_counter++;
		alert_string += image_counter + " " + temp_image.width + "x" + temp_image.height + " " + num_pixels + " ";
		path_names = temp_image.src.split("/");
		file_name = path_names.pop();
		file_name = file_name.replace(/^(\d+)px-/, "");
		alert_string += file_name + "\n";
	}
	window.alert(alert_string);
	return;


// Count the article tables and check for infoboxes and navigation templates 
	num_raw_tables = document.getElementsByTagName("table").length;

	// Check for className = "infobox vcard" or "navbox-group" 

	alert_string = "This document has " + num_raw_tables + " tables.\n";
	window.alert(alert_string);

// Count the article references
	num_raw_references = document.getElementsByTagName("li").length;

// Count the article interwikis
	num_raw_interwikis = document.getElementsByTagName("li").length;

// Count the article categories
	num_raw_categories = document.getElementsByTagName("table").length;

// Count the article anchors; for each anchor...
	alert_string = "This document has " + document.anchors.length + " anchors:\n";
	for (anchor_index=0; anchor_index<document.anchors.length; anchor_index++) {
		temp_anchor = document.anchors[anchor_index];
		alert_string += "Name " + anchor_index + ": " + temp_anchor.name + "\n";
	}
	window.alert(alert_string);

} // closes function articleStructure() 

addOnloadHook(function () {
            mw.util.addPortletLink('p-cactions', 'javascript:articleStructure()', 'structure', 'ca-structure', 'Structure of the article', 'g', '');
});

//</pre>