User:Polygnotus/DuplicateReferences.js
Appearance
Code that you insert on this page could contain malicious content capable of compromising your account. If you import a script from another page with "importScript", "mw.loader.load", "iusc", or "lusc", take note that this causes you to dynamically load a remote script, which could be changed by others. Editors are responsible for all edits and actions they perform, including by scripts. User scripts are not centrally supported and may malfunction or become inoperable due to software changes. A guide to help you find broken scripts is available. If you are unsure whether code you are adding to this page is safe, you can ask at the appropriate village pump. This code will be executed when previewing this page. |
![]() | This user script seems to have a documentation page at User:Polygnotus/DuplicateReferences. |
mw.loader.using(['mediawiki.util'], function () {
$(document).ready(function () {
if ((mw.config.get('wgNamespaceNumber') !== 0 && mw.config.get('wgPageName') !== 'User:Polygnotus/dupreftest') || mw.config.get('wgAction') !== 'view') {
return;
}
let referencesHeading = document.getElementById("References");
if (!referencesHeading) {
return;
}
const style = document.createElement('style');
style.textContent = `li:target { border: 4px dotted red; padding: 2px;}`;
document.head.appendChild(style);
let parentDiv = referencesHeading.closest("div");
let newParagraph = document.createElement("p");
newParagraph.style.color = "red";
function addDuplicateCitationsTemplate() {
var api = new mw.Api();
var pageTitle = mw.config.get('wgPageName');
// First, get the duplicate information
let duplicateInfo = getDuplicateInfo();
api.get({
action: 'query',
prop: 'revisions',
titles: pageTitle,
rvprop: 'content',
rvslots: 'main',
formatversion: 2
}).then(function(data) {
var page = data.query.pages[0];
var content = page.revisions[0].slots.main.content;
// Add the template at the top of the page
var newContent = '{{Duplicate citations}}\n' + content;
// Create the edit summary
let summary = '+{{Duplicate citations}}';
if (duplicateInfo.length > 0) {
summary += ': ';
duplicateInfo.forEach((info, index) => {
summary += `${info.url} (refs: ${info.refs.map(ref => {
const parts = ref.split('-');
return parts[parts.length - 1];
}).join(', ')})`;
if (index < duplicateInfo.length - 1) {
summary += '; ';
}
});
}
// Make the edit
return api.postWithToken('csrf', {
action: 'edit',
title: pageTitle,
text: newContent,
summary: summary
});
}).then(function() {
mw.notify('Successfully added the Duplicate citations template!');
// Reload the page to show the changes
location.reload();
}).catch(function(error) {
console.error('Error:', error);
mw.notify('Failed to add the template. See console for details.', {type: 'error'});
});
}
function getDuplicateInfo() {
const referenceSpans = document.querySelectorAll('span.reference-text');
const urlMap = new Map();
const duplicates = [];
referenceSpans.forEach((span) => {
const links = span.querySelectorAll('a');
const refNumber = span.closest('li')?.id.replace('cite_note-', '') || 'Unknown';
let validLink = null;
for (let link of links) {
const url = link.href;
const linkText = link.textContent.trim();
if (
linkText !== "Archived" &&
!url.includes("wikipedia.org/wiki/") &&
!url.includes("_(identifier)")
) {
validLink = link;
break;
}
}
if (validLink) {
const url = validLink.href;
if (urlMap.has(url)) {
urlMap.get(url).push(refNumber);
} else {
urlMap.set(url, [refNumber]);
}
}
});
urlMap.forEach((refs, url) => {
if (refs.length > 1) {
duplicates.push({ url, refs });
}
});
return duplicates;
}
function calculateSimilarity(str1, str2) {
const longer = str1.length > str2.length ? str1 : str2;
const shorter = str1.length > str2.length ? str2 : str1;
const longerLength = longer.length;
if (longerLength === 0) {
return 1.0;
}
return (longerLength - editDistance(longer, shorter)) / parseFloat(longerLength);
}
function editDistance(s1, s2) {
s1 = s1.toLowerCase();
s2 = s2.toLowerCase();
const costs = new Array();
for (let i = 0; i <= s1.length; i++) {
let lastValue = i;
for (let j = 0; j <= s2.length; j++) {
if (i == 0)
costs[j] = j;
else {
if (j > 0) {
let newValue = costs[j - 1];
if (s1.charAt(i - 1) != s2.charAt(j - 1))
newValue = Math.min(Math.min(newValue, lastValue),
costs[j]) + 1;
costs[j - 1] = lastValue;
lastValue = newValue;
}
}
}
if (i > 0)
costs[s2.length] = lastValue;
}
return costs[s2.length];
}
function getAllVisibleText(element) {
if (element.nodeType === Node.TEXT_NODE) {
return element.textContent.trim();
}
if (element.nodeType === Node.ELEMENT_NODE) {
if (window.getComputedStyle(element).display === 'none') {
return '';
}
return Array.from(element.childNodes)
.map(child => getAllVisibleText(child))
.join(' ')
.replace(/\s+/g, ' ')
.trim();
}
return '';
}
function extractVisibleText(htmlString) {
const parser = new DOMParser();
const doc = parser.parseFromString(htmlString, 'text/html');
return getAllVisibleText(doc.body);
}
function checkDuplicateReferenceLinks() {
const referenceSpans = document.querySelectorAll('span.reference-text');
const urlMap = new Map();
const duplicates = new Map();
let totalLinks = 0;
// Create a map of cite_note ids to their correct reference numbers
const citeNoteMap = new Map();
document.querySelectorAll('.reference').forEach(ref => {
const link = ref.querySelector('a');
if (link) {
const linkHref = link.getAttribute('href');
if (linkHref) {
const citeNoteId = linkHref.substring(1); // Remove the leading '#'
const refNumber = ref.textContent.trim().replace(/[[\]]/g, '');
citeNoteMap.set(citeNoteId, refNumber);
}
}
});
referenceSpans.forEach((span) => {
const links = span.querySelectorAll('a');
const citeNote = span.closest('li')?.id;
const refNumber = citeNoteMap.get(citeNote) || 'Unknown';
const refText = extractVisibleText(span.outerHTML); // Extract visible text content
let validLink = null;
for (let link of links) {
const url = link.href;
const linkText = link.textContent.trim();
if (
linkText !== "Archived" &&
!url.includes("wikipedia.org/wiki/") &&
!url.includes("_(identifier)")
) {
validLink = link;
break;
}
}
if (validLink) {
const url = validLink.href;
totalLinks++;
if (urlMap.has(url)) {
if (duplicates.has(url)) {
duplicates.get(url).push({ refNumber, citeNote, refText });
} else {
duplicates.set(url, [urlMap.get(url), { refNumber, citeNote, refText }]);
}
} else {
urlMap.set(url, { refNumber, citeNote, refText });
}
}
});
if (duplicates.size > 0) {
if (document.querySelector('table.box-Duplicated_citations') === null) {
const editSections = parentDiv.querySelectorAll('span.mw-editsection');
editSections.forEach(editSection => {
let spanBefore = document.createElement('span');
spanBefore.className = 'mw-editsection-bracket';
spanBefore.textContent = '[';
let addTemplateLink = document.createElement('a');
addTemplateLink.textContent = ' add {{duplicated citations}} ';
addTemplateLink.href = '#';
addTemplateLink.addEventListener('click', function(e) {
e.preventDefault();
addDuplicateCitationsTemplate();
});
let spanAfter = document.createElement('span');
spanAfter.className = 'mw-editsection-bracket';
spanAfter.textContent = ']';
editSection.appendChild(spanBefore);
editSection.appendChild(addTemplateLink);
editSection.appendChild(spanAfter);
});
}
duplicates.forEach((refInfo, url) => {
let paragraphInfo = document.createElement('span');
let urlLink = document.createElement('a');
urlLink.href = url;
urlLink.textContent = url;
urlLink.target = "_blank";
urlLink.rel = "noopener noreferrer";
paragraphInfo.appendChild(document.createTextNode('Duplicate URL: '));
paragraphInfo.appendChild(urlLink);
paragraphInfo.appendChild(document.createTextNode(' in refs: '));
refInfo.forEach((ref, index) => {
if (ref.citeNote) {
let link = document.createElement('a');
link.href = `#${ref.citeNote}`;
link.textContent = ref.refNumber;
paragraphInfo.appendChild(link);
} else {
paragraphInfo.appendChild(document.createTextNode(ref.refNumber));
}
// Calculate similarity with the next reference
if (index < refInfo.length - 1) {
const similarity = calculateSimilarity(ref.refText, refInfo[index + 1].refText);
const similarityPercentage = Math.round(similarity * 100);
paragraphInfo.appendChild(document.createTextNode(` (${similarityPercentage}%)`));
}
if (index < refInfo.length - 1) {
paragraphInfo.appendChild(document.createTextNode(', '));
}
});
paragraphInfo.appendChild(document.createElement('br'));
newParagraph.appendChild(paragraphInfo);
});
parentDiv.after(newParagraph);
}
}
checkDuplicateReferenceLinks();
});
});