Jump to content

User:Phlsph7/ListUnreferencedParagraphs.js

From Wikipedia, the free encyclopedia
This is an old revision of this page, as edited by Phlsph7 (talk | contribs) at 12:40, 27 July 2023 (new script). The present address (URL) is a permanent link to this revision, which may differ significantly from the current revision.
(diff) ← Previous revision | Latest revision (diff) | Newer revision → (diff)
Note: After saving, you have to bypass your browser's cache to see the changes. Google Chrome, Firefox, Microsoft Edge and Safari: Hold down the ⇧ Shift key and click the Reload toolbar button. For details and instructions about other browsers, see Wikipedia:Bypass your cache.
(function(){
	const scriptName = 'List Unreferenced Paragraphs';

	$.when(mw.loader.using('mediawiki.util'), $.ready).then(function(){
		const portletlink = mw.util.addPortletLink('p-tb', '#', scriptName, scriptName + 'Id');
		portletlink.onclick = function(e) {
			e.preventDefault();
			openScript();
		};
	});

	function openScript(){
		const timeout = 50;
		let stopProcessing = false;
		const content = document.getElementById('content');
		const contentContainer = content.parentElement;
		content.style.display = 'none';

		let scriptContainer = document.createElement('div');
		contentContainer.appendChild(scriptContainer);
		scriptContainer.outerHTML = `
	<div id="scriptContainer" style="display:flex; flex-direction: column;">
		<style>
			textarea {
				resize: none;
				padding: 5px;
			}
			button {
				margin: 5px;
			}
		</style>
		<h1>Unreferenced Paragraph Counter</h1>
		<div style="display:flex;">
			<div style="flex: 1; display:flex; flex-direction: column; margin: 5px;  height: 50vh; overflow-y: auto;">
				<label for="taList">Article Titles</label>
				<textarea id="taList" style="height: 100%;"></textarea>
			</div>
			<div style="flex: 2; display:flex; flex-direction: column; margin: 5px; height: 50vh; overflow-y: auto;">
				<label for="tableCounter">Overview table</label>
				<table id="tableCounter" class="wikitable" style="height: 100%; margin: 0px; width: 100%; border-collapse: collapse;">
					<thead>
						<tr>
							<th>Article title</th>
							<th>Paragraphs with references / total paragraphs</th>
						</tr>
					</thead>
					<tbody id="tbodyCounter">
					
					</tbody>
				</table>
			</div>
		</div>
		<div style="display:flex; flex-direction: column">
			<div style="display:flex;">
				<button id="btStart" style="flex: 1;">Start</button>
				<button id="btStop" disabled style="flex: 1;">Stop</button>
				<button id="btCopy" style="flex: 1;">Copy</button>
			</div>
			<div>
				<button id="btClose" style="width: 100%;">Close</button>
			</div>
		</div>
	</div>
	`;
		const btStart = $('#btStart');
		btStart.click(function(){
			stopProcessing = false;
			btStart.prop("disabled", true);
			btStop.prop("disabled", false);
			
			let articleTitles = $('#taList').val().trim()
				.split('\r').join('')
				.split('\n');
				
			// remove duplicates
			articleTitles = [...new Set(articleTitles)];
			
			// populate table
			$("#tbodyCounter").empty();
			for(let i = 0; i < articleTitles.length; i++){
				let row = `<tr><td>${articleTitles[i]}</td><td id="td${i}" style="text-align: center;">-</td></tr>`;
				$("#tbodyCounter").append(row);
			}
				
			recursivelyProcessArticles(articleTitles, 0, timeout);
			
		});
		const btStop = $('#btStop');
		btStop.click(function(){
			stopProcessing = true;
			btStart.prop("disabled", false);
			btStop.prop("disabled", true);
		});
		const btCopy = $('#btCopy');
		btCopy.click(function(){
			const tableText =  getTextViaSelection();
			copyToClipboard(tableText);
			alert("The table was copied to the clipboard.");
			
			function getTextViaSelection(){
				const tbodyCounter = $('#tbodyCounter')[0];
				const range = document.createRange();
				range.selectNodeContents(tbodyCounter);

				const selection = window.getSelection();
				selection.removeAllRanges();
				selection.addRange(range);
				return selection.toString();
			}
			
			function copyToClipboard(text) {
				const textarea = document.createElement('textarea');
				textarea.value = text;
				document.body.appendChild(textarea);
				textarea.select();
				document.execCommand('copy');
				document.body.removeChild(textarea);
			}
		});
		const btClose = $('#btClose');
		btClose.click(function(){
			btStop.trigger('click');
			let scriptContainer = document.getElementById('scriptContainer');
			scriptContainer.parentElement.removeChild(scriptContainer);
			content.style.display = '';
		});

		function recursivelyProcessArticles(articleTitles, index, timeout){
			if(!stopProcessing && index < articleTitles.length){
				btStop.text(`Stop (${index}/${articleTitles.length})`);
				const articleTitle = articleTitles[index];
				processArticle(articleTitles, index);
				
				setTimeout(function(){recursivelyProcessArticles(articleTitles, index + 1, timeout);}, timeout);
			}
			else{
				btStop.text(`Stop`);
				btStop.trigger('click');
			}
		}

		function processArticle(articleTitles, index){
			const articleTitle = articleTitles[index];
			const articleSearchTerm = encodeURIComponent(articleTitle);
			let wikiApiUrl = `https://en.wikipedia.org/w/api.php?action=parse&page=${articleSearchTerm}&format=json`;
			fetch(wikiApiUrl).then(async function(response) {
				const data = await response.json();
				let cellId = `td${index}`;
				if (data && data.parse && data.parse.text && data.parse.text['*']) {
					const articleHTML = data.parse.text['*'];
					let count = getParagraphInfo(articleHTML);
					$('#' + cellId).html(count);
				} else {
					$('#' + cellId).html('error');
				}
			});

			function getParagraphInfo(articleHTML){
				const minimalParagraphLength = 100;
				const parser = new DOMParser();
				const doc = parser.parseFromString(articleHTML, 'text/html');
				const paragraphContainer = $(doc).find('.mw-parser-output').eq(0);
				const releventChildren = paragraphContainer.children("p, h2").toArray();
				const articleObject = convertToObject(releventChildren);
				removeIrrelevantSections(articleObject);
				const paragraphsInRelevantSections = convertToSimpleArray(articleObject);
				const longParagraphs = removeShortParagraphs(paragraphsInRelevantSections);
				const unreferencedParagraphs = getUnreferencedParagraphs(longParagraphs);

				return `${unreferencedParagraphs.length} / ${longParagraphs.length}`;

				function convertToObject(elementArray){
					const articleObject = {};
					let currentSection = "Lead";
					articleObject["Lead"] = []; // jshint ignore:line
					
					for(let element of elementArray){
						if(element.tagName === 'H2'){
							currentSection = element.innerText.split('[edit]').join('');
							articleObject[currentSection] = [];
						}
						else{
							articleObject[currentSection].push(element);
						}
					}
					
					return articleObject;
				}

				function removeIrrelevantSections(articleObject){
					const excludedSections = ['Lead', 'Plot', 'Synopsis', 'Further reading', 'See also', 'External links', 'References', 'Bibliography', 'Notes', 'Selected publications', 'Selected works', 'Cited sources', 'Sources', 'Footnotes'];
					for(let sectionName in articleObject){
						if(excludedSections.indexOf(sectionName) != -1){
							delete articleObject[sectionName];
						}
					}
				}

				function convertToSimpleArray(articleObject){
					let array = [];
					for (let sectionName in articleObject){
						array = array.concat(articleObject[sectionName]);
					}
					
					return array;
				}

				function removeShortParagraphs(paragraphArray){
					const longParagraphs = [];
					for(let paragraph of paragraphArray){
						if(paragraph.innerText.length >= minimalParagraphLength){
							longParagraphs.push(paragraph);
						}
					}
					
					return longParagraphs;
				}

				function getUnreferencedParagraphs(paragraphArray){
					const unreferencedParagraph = [];
					for(let paragraph of paragraphArray){
						if(isUnreferenced(paragraph)){
							unreferencedParagraph.push(paragraph);
						}
					}
					return unreferencedParagraph;
					
					function isUnreferenced(paragraph){
						return $(paragraph).find('.reference').length === 0;
					}
				}
			}
		}
	}
})();