Jump to content

User:Polygnotus/DuplicateReferences.js

From Wikipedia, the free encyclopedia
This is an old revision of this page, as edited by Polygnotus (talk | contribs) at 18:26, 15 July 2024 (Created page with 'mw.loader.using(['mediawiki.util'], function () { $(document).ready(function () { if ((mw.config.get('wgNamespaceNumber') !== 0 && mw.config.get('wgPageName') !== 'User:Polygnotus/dupreftest') || mw.config.get('wgAction') !== 'view') { return; } let referencesHeading = document.getElementById("References"); if (!referencesHeading) { return; } const style = document...'). The present address (URL) is a permanent link to this revision, which may differ significantly from the current revision.
(diff) ← Previous revision | Latest revision (diff) | Newer revision → (diff)
Note: After saving, you have to bypass your browser's cache to see the changes. Google Chrome, Firefox, Microsoft Edge and Safari: Hold down the ⇧ Shift key and click the Reload toolbar button. For details and instructions about other browsers, see Wikipedia:Bypass your cache.
mw.loader.using(['mediawiki.util'], function () {
    $(document).ready(function () {
        if ((mw.config.get('wgNamespaceNumber') !== 0 && mw.config.get('wgPageName') !== 'User:Polygnotus/dupreftest') || mw.config.get('wgAction') !== 'view') {
            return;
        }
        
        let referencesHeading = document.getElementById("References");
        if (!referencesHeading) {
            return;
        }
        
        const style = document.createElement('style');
        style.textContent = `li:target { border: 4px dotted red; padding: 2px;}`;
        document.head.appendChild(style);
        
        let parentDiv = referencesHeading.closest("div");
        let newParagraph = document.createElement("p");
        newParagraph.style.color = "red";
        
        function addDuplicateCitationsTemplate() {
            var api = new mw.Api();
            var pageTitle = mw.config.get('wgPageName');

            // First, get the duplicate information
            let duplicateInfo = getDuplicateInfo();

            api.get({
                action: 'query',
                prop: 'revisions',
                titles: pageTitle,
                rvprop: 'content',
                rvslots: 'main',
                formatversion: 2
            }).then(function(data) {
                var page = data.query.pages[0];
                var content = page.revisions[0].slots.main.content;

                // Add the template at the top of the page
                var newContent = '{{Duplicate citations}}\n' + content;

                // Create the edit summary
				let summary = '+{{Duplicate citations}}';
				if (duplicateInfo.length > 0) {
				    summary += ': ';
				    duplicateInfo.forEach((info, index) => {
				        summary += `${info.url} (refs: ${info.refs.map(ref => {
				            const parts = ref.split('-');
				            return parts[parts.length - 1];
				        }).join(', ')})`;
				        if (index < duplicateInfo.length - 1) {
				            summary += '; ';
				        }
				    });
				}



                // Make the edit
                return api.postWithToken('csrf', {
                    action: 'edit',
                    title: pageTitle,
                    text: newContent,
                    summary: summary
                });
            }).then(function() {
                mw.notify('Successfully added the Duplicate citations template!');
                // Reload the page to show the changes
                location.reload();
            }).catch(function(error) {
                console.error('Error:', error);
                mw.notify('Failed to add the template. See console for details.', {type: 'error'});
            });
        }

        function getDuplicateInfo() {
            const referenceSpans = document.querySelectorAll('span.reference-text');
            const urlMap = new Map();
            const duplicates = [];

            referenceSpans.forEach((span) => {
                const links = span.querySelectorAll('a');
                const refNumber = span.closest('li')?.id.replace('cite_note-', '') || 'Unknown';

                let validLink = null;
                for (let link of links) {
                    const url = link.href;
                    const linkText = link.textContent.trim();
                    
                    if (
                        linkText !== "Archived" &&
                        !url.includes("wikipedia.org/wiki/") &&
                        !url.includes("_(identifier)")
                    ) {
                        validLink = link;
                        break;
                    }
                }

                if (validLink) {
                    const url = validLink.href;
                    if (urlMap.has(url)) {
                        urlMap.get(url).push(refNumber);
                    } else {
                        urlMap.set(url, [refNumber]);
                    }
                }
            });

            urlMap.forEach((refs, url) => {
                if (refs.length > 1) {
                    duplicates.push({ url, refs });
                }
            });

            return duplicates;
        }

        function calculateSimilarity(str1, str2) {
            const longer = str1.length > str2.length ? str1 : str2;
            const shorter = str1.length > str2.length ? str2 : str1;
            const longerLength = longer.length;
            if (longerLength === 0) {
                return 1.0;
            }
            return (longerLength - editDistance(longer, shorter)) / parseFloat(longerLength);
        }

        function editDistance(s1, s2) {
            s1 = s1.toLowerCase();
            s2 = s2.toLowerCase();

            const costs = new Array();
            for (let i = 0; i <= s1.length; i++) {
                let lastValue = i;
                for (let j = 0; j <= s2.length; j++) {
                    if (i == 0)
                        costs[j] = j;
                    else {
                        if (j > 0) {
                            let newValue = costs[j - 1];
                            if (s1.charAt(i - 1) != s2.charAt(j - 1))
                                newValue = Math.min(Math.min(newValue, lastValue),
                                    costs[j]) + 1;
                            costs[j - 1] = lastValue;
                            lastValue = newValue;
                        }
                    }
                }
                if (i > 0)
                    costs[s2.length] = lastValue;
            }
            return costs[s2.length];
        }

        function getAllVisibleText(element) {
            if (element.nodeType === Node.TEXT_NODE) {
                return element.textContent.trim();
            }
            
            if (element.nodeType === Node.ELEMENT_NODE) {
                if (window.getComputedStyle(element).display === 'none') {
                    return '';
                }
                
                return Array.from(element.childNodes)
                    .map(child => getAllVisibleText(child))
                    .join(' ')
                    .replace(/\s+/g, ' ')
                    .trim();
            }
            
            return '';
        }

        function extractVisibleText(htmlString) {
            const parser = new DOMParser();
            const doc = parser.parseFromString(htmlString, 'text/html');
            return getAllVisibleText(doc.body);
        }

        function checkDuplicateReferenceLinks() {
            const referenceSpans = document.querySelectorAll('span.reference-text');
            const urlMap = new Map();
            const duplicates = new Map();
            let totalLinks = 0;
            
            // Create a map of cite_note ids to their correct reference numbers
            const citeNoteMap = new Map();
            document.querySelectorAll('.reference').forEach(ref => {
                const link = ref.querySelector('a');
                if (link) {
                    const linkHref = link.getAttribute('href');
                    if (linkHref) {
                        const citeNoteId = linkHref.substring(1);  // Remove the leading '#'
                        const refNumber = ref.textContent.trim().replace(/[[\]]/g, '');
                        citeNoteMap.set(citeNoteId, refNumber);
                    }
                }
            });
            
            referenceSpans.forEach((span) => {
                const links = span.querySelectorAll('a');
                const citeNote = span.closest('li')?.id;
                const refNumber = citeNoteMap.get(citeNote) || 'Unknown';
                const refText = extractVisibleText(span.outerHTML);  // Extract visible text content
                
                let validLink = null;
                for (let link of links) {
                    const url = link.href;
                    const linkText = link.textContent.trim();
                    
                    if (
                        linkText !== "Archived" &&
                        !url.includes("wikipedia.org/wiki/") &&
                        !url.includes("_(identifier)")
                    ) {
                        validLink = link;
                        break;
                    }
                }
                
                if (validLink) {
                    const url = validLink.href;
                    totalLinks++;
                    if (urlMap.has(url)) {
                        if (duplicates.has(url)) {
                            duplicates.get(url).push({ refNumber, citeNote, refText });
                        } else {
                            duplicates.set(url, [urlMap.get(url), { refNumber, citeNote, refText }]);
                        }
                    } else {
                        urlMap.set(url, { refNumber, citeNote, refText });
                    }
                }
            });
            
            if (duplicates.size > 0) {
                if (document.querySelector('table.box-Duplicated_citations') === null) {
                    const editSections = parentDiv.querySelectorAll('span.mw-editsection');
                    
                    editSections.forEach(editSection => {
                        let spanBefore = document.createElement('span');
                        spanBefore.className = 'mw-editsection-bracket';
                        spanBefore.textContent = '[';
                
                        let addTemplateLink = document.createElement('a');
                        addTemplateLink.textContent = ' add {{duplicated citations}} ';
                        addTemplateLink.href = '#';
                        addTemplateLink.addEventListener('click', function(e) {
                            e.preventDefault();
                            addDuplicateCitationsTemplate();
                        });
                
                        let spanAfter = document.createElement('span');
                        spanAfter.className = 'mw-editsection-bracket';
                        spanAfter.textContent = ']';
                
                        editSection.appendChild(spanBefore);
                        editSection.appendChild(addTemplateLink);
                        editSection.appendChild(spanAfter);
                    });
                }
                
                duplicates.forEach((refInfo, url) => {
                    let paragraphInfo = document.createElement('span');
                    
                    let urlLink = document.createElement('a');
                    urlLink.href = url;
                    urlLink.textContent = url;
                    urlLink.target = "_blank";
                    urlLink.rel = "noopener noreferrer";
                    
                    paragraphInfo.appendChild(document.createTextNode('Duplicate URL: '));
                    paragraphInfo.appendChild(urlLink);
                    paragraphInfo.appendChild(document.createTextNode(' in refs: '));
                    
                    refInfo.forEach((ref, index) => {
                        if (ref.citeNote) {
                            let link = document.createElement('a');
                            link.href = `#${ref.citeNote}`;
                            link.textContent = ref.refNumber;
                            paragraphInfo.appendChild(link);
                        } else {
                            paragraphInfo.appendChild(document.createTextNode(ref.refNumber));
                        }
                        
                        // Calculate similarity with the next reference
                        if (index < refInfo.length - 1) {
                            const similarity = calculateSimilarity(ref.refText, refInfo[index + 1].refText);
                            const similarityPercentage = Math.round(similarity * 100);
                            paragraphInfo.appendChild(document.createTextNode(` (${similarityPercentage}%)`));
                        }
                        
                        if (index < refInfo.length - 1) {
                            paragraphInfo.appendChild(document.createTextNode(', '));
                        }
                    });
                    
                    paragraphInfo.appendChild(document.createElement('br'));
                    newParagraph.appendChild(paragraphInfo);
                });
                
                parentDiv.after(newParagraph);
            }
        }
        
        checkDuplicateReferenceLinks();
    });
});