Jump to content

User:Polygnotus/DuplicateReferences.js

From Wikipedia, the free encyclopedia
The printable version is no longer supported and may have rendering errors. Please update your browser bookmarks and please use the default browser print function instead.
Note: After saving, you have to bypass your browser's cache to see the changes. Google Chrome, Firefox, Microsoft Edge and Safari: Hold down the ⇧ Shift key and click the Reload toolbar button. For details and instructions about other browsers, see Wikipedia:Bypass your cache.
//Testpage: https://en.wikipedia.org/wiki/User:Polygnotus/DuplicateReferencesTest

// <nowiki>
mw.loader.using(['mediawiki.util'], function () {
    $(document).ready(function () {

        const DEBUG = false;

        function debug(...args) {
            if (DEBUG) {
                console.log('[DuplicateReferences]', ...args);
            }
        }

		if (
		    mw.config.get('wgAction') !== 'view' ||
		    mw.config.get('wgDiffNewId') ||
		    mw.config.get('wgDiffOldId') ||
		    (mw.config.get('wgNamespaceNumber') !== 0 && mw.config.get('wgPageName') !== 'User:Polygnotus/DuplicateReferencesTest')
		) {
			debug("Not the correct page or action, script terminated");
		    return;
		}

        debug("Page title:", document.title);
        debug("URL:", window.location.href);

        function findNextReflistDiv(element) {
            let nextElement = element.nextElementSibling;
            while (nextElement) {
                if (nextElement.tagName.toLowerCase() === 'div' &&
                    (nextElement.classList.contains('reflist') || nextElement.classList.contains('mw-references-wrap'))) {
                    return nextElement;
                }
                nextElement = nextElement.nextElementSibling;
            }
            return null;
        }

        const referencesHeader = document.querySelector("h2#References");
        if (!referencesHeader) {
            debug("References heading not found, script terminated");
            return;
        }

        const containerDiv = referencesHeader.closest("div");
        if (!containerDiv) {
            debug("Container div not found, script terminated");
            return;
        }

        const reflistDiv = findNextReflistDiv(containerDiv);
        if (!reflistDiv) {
            debug("Reflist div not found, script terminated");
            return;
        }

        const referencesList = reflistDiv.querySelector('ol.references');
        if (!referencesList) {
            debug("ol.references not found within reflist div");
            return;
        }

        const style = document.createElement('style');
        style.textContent = `
            li:target { border: 1px dotted red; padding: 2px; background-color: #ffcccc !important;}
            .duplicate-citation-highlight { background-color: #e1eeff; }
            .duplicate-citation-hover { background-color: #cce0ff; border: 1px dotted blue; }
            .duplicate-citation-clicked { border: 1px dotted red; padding: 2px; background-color: #ffe6e6; }
            .mw-collapsible-toggle { font-weight: normal; float: right; }
            .duplicate-references-table { width: 100%; }
            @media only screen and (max-width: 768px) {
                .duplicate-references-table { display: none; }
            }
        `;
        document.head.appendChild(style);

        function addDuplicateCitationsTemplate(linkElement) {
            debug("Adding duplicate citations template");
            showLoading(linkElement);
            var api = new mw.Api();
            var pageTitle = mw.config.get('wgPageName');

            let duplicateInfo = getDuplicateInfo();

            // Get current date
            const currentDate = new Date();
            const monthNames = ["January", "February", "March", "April", "May", "June",
                "July", "August", "September", "October", "November", "December"
            ];
            const currentMonth = monthNames[currentDate.getMonth()];
            const currentYear = currentDate.getFullYear();
            const dateParam = `|date=${currentMonth} ${currentYear}`;

            api.get({
                action: 'query',
                prop: 'revisions',
                titles: pageTitle,
                rvprop: 'content',
                rvslots: 'main',
                formatversion: 2
            }).then(function (data) {
                var page = data.query.pages[0];
                var content = page.revisions[0].slots.main.content;

                // Create the reason string
                let reason = '[[User:Polygnotus/DuplicateReferences|DuplicateReferences]] detected:<br>\n';
                if (duplicateInfo.length > 0) {
                    duplicateInfo.forEach((info) => {
                        reason += `* ${info.url} (refs: ${info.refs.map(r => r.number).join(', ')})<br>\n`;
                    });
                }

                // Create the template to insert
                const templateToInsert = `{{Duplicated citations|reason=${reason}${dateParam}}}\n`;

                // Use Morebits to handle the template insertion
                const wikitextPage = new Morebits.wikitext.page(content);
                
                // Define templates that should come before the duplicated citations template
                const precedingTemplates = [
                    'short description',
                    'displaytitle', 
                    'lowercase title',
                    'italic title',
                    'about',
                    'redirect',
                    'distinguish', 
                    'for',
                    'Featured list',
                    'Featured article',
                    'Good article',
                    'Other uses'
                ];

                // Insert the template after the specified templates
                // The third parameter is flags (default 'i' for case-insensitive)
                // The fourth parameter can include pre-template content like HTML comments
                wikitextPage.insertAfterTemplates(templateToInsert, precedingTemplates, 'i', ['<!--[\\s\\S]*?-->']);
                
                var newContent = wikitextPage.getText();

                let summary = `Tagged [[WP:DUPREF|duplicate citations]] using [[User:Polygnotus/DuplicateReferences|DuplicateReferences]]`;

                return api.postWithToken('csrf', {
                    action: 'edit',
                    title: pageTitle,
                    text: newContent,
                    summary: summary
                });
            }).then(function () {
                showSuccess(linkElement);
                setTimeout(function () {
                    location.reload();
                }, 100); // Reload after 0.1 second
            }).catch(function (error) {
                console.error('Error:', error);
                showError(linkElement);
                mw.notify('Failed to add the template. See console for details.', {type: 'error'});
            });
        }

        function showLoading(element) {
            element.innerHTML = '<sup><small>[ Working... ]</small></sup>';
        }

        function showSuccess(element) {
            element.innerHTML = '<sup><small>[ Done ]</small></sup>';
        }

        function showError(element) {
            element.innerHTML = '<sup><small>[ Error ]</small></sup>';
        }

        function getVisibleText(element) {
            // Recursively get the visible text content of an element
            let text = '';
            for (let node of element.childNodes) {
                if (node.nodeType === Node.TEXT_NODE) {
                    text += node.textContent.trim() + ' ';
                } else if (node.nodeType === Node.ELEMENT_NODE) {
                    // Skip hidden elements
                    const style = window.getComputedStyle(node);
                    if (style.display !== 'none' && style.visibility !== 'hidden') {
                        text += getVisibleText(node) + ' ';
                    }
                }
            }
            return text.trim();
        }

        function calculateLevenshteinDistance(a, b) {
            debug("Comparing:");
            debug("Text 1:", a);
            debug("Text 2:", b);

            if (a.length === 0) return b.length;
            if (b.length === 0) return a.length;

            const matrix = [];

            // Increment along the first column of each row
            for (let i = 0; i <= b.length; i++) {
                matrix[i] = [i];
            }

            // Increment each column in the first row
            for (let j = 0; j <= a.length; j++) {
                matrix[0][j] = j;
            }

            // Fill in the rest of the matrix
            for (let i = 1; i <= b.length; i++) {
                for (let j = 1; j <= a.length; j++) {
                    if (b.charAt(i - 1) === a.charAt(j - 1)) {
                        matrix[i][j] = matrix[i - 1][j - 1];
                    } else {
                        matrix[i][j] = Math.min(
                            matrix[i - 1][j - 1] + 1, // substitution
                            Math.min(
                                matrix[i][j - 1] + 1, // insertion
                                matrix[i - 1][j] + 1  // deletion
                            )
                        );
                    }
                }
            }

            debug("Levenshtein distance:", matrix[b.length][a.length]);
            return matrix[b.length][a.length];
        }

        function calculateSimilarityPercentage(distance, maxLength) {
            const similarity = ((maxLength - distance) / maxLength) * 100;
            debug("Similarity percentage:", similarity.toFixed(2) + "%");
            return Math.round(similarity) + '%';
        }

        function getDuplicateInfo() {
            debug("Getting duplicate info");

            const duplicates = [];
            const urlMap = new Map();
            const referenceItems = Array.from(referencesList.children);

            debug("Number of reference items:", referenceItems.length);

            referenceItems.forEach((item, index) => {
                if (item.tagName.toLowerCase() === 'li') {
                    const refId = item.id;
                    const refNumber = index + 1;
                    debug(`Processing reference item ${refNumber} (${refId})`);

                    // Get the visible text of the entire reference item
                    const refText = getVisibleText(item);
                    debug(`  Reference text: ${refText}`);

                    // Find the first valid link in the reference
                    const links = item.querySelectorAll('a');
                    let validLink = null;
                    for (let link of links) {
                        const url = link.href;

                        // Skip this reference if the URL doesn't contain 'http'
                        if (!url.includes('http')) {
                            debug(`  Skipping reference ${refNumber} - URL does not contain 'http'`);
                            return; // This 'return' is equivalent to 'continue' in a regular for loop
                        }
                        const linkText = link.textContent.trim();

                        if (
                            // (!url.includes("wikipedia.org/wiki/") || url.includes("Special:BookSources")) &&
                            linkText !== "Archived" &&
                            !url.includes("wikipedia.org") &&
                            !url.includes("_(identifier)") &&                       // Templates like ISBN and ISSN and OCLC and S2CID contain (identifier)
                            !url.startsWith("https://search.worldcat.org/") &&  // |issn= parameter in cite news
                            !url.startsWith("https://www.bbc.co.uk/news/live/") &&  // live articles get frequent updates
                            !url.startsWith("https://www.aljazeera.com/news/liveblog/") &&  
                            !url.startsWith("https://www.nbcnews.com/news/world/live-blog/") &&
                            !url.startsWith("https://www.theguardian.com/world/live/") &&
                            !url.startsWith("https://www.nytimes.com/live/") &&
                            !url.startsWith("https://edition.cnn.com/world/live-news/") &&
                            !url.startsWith("https://www.timesofisrael.com/liveblog") &&
                            !url.startsWith("https://www.france24.com/en/live-news/") &&
                            !url.startsWith("https://books.google.com/") &&         //may be 2 different pages of the same book
                            !url.startsWith("https://archive.org/details/isbn_")
                        ) {
                            validLink = link;
                            debug(`  Valid link found: ${url}`);
                            break;
                        }
                    }

                    if (validLink) {
                        const url = validLink.href;
                        if (urlMap.has(url)) {
                            urlMap.get(url).push({id: refId, number: refNumber, text: refText});
                            debug(`  Duplicate found for URL: ${url}`);
                        } else {
                            urlMap.set(url, [{id: refId, number: refNumber, text: refText}]);
                            debug(`  New URL added to map: ${url}`);
                        }
                    } else {
                        debug(`  No valid link found in this item`);
                    }
                }
            });

            urlMap.forEach((refs, url) => {
                if (refs.length > 1) {
                    // Calculate Levenshtein distance for each pair of refs
                    for (let i = 0; i < refs.length - 1; i++) {
                        for (let j = i + 1; j < refs.length; j++) {
                            debug(`Comparing references ${refs[i].number} and ${refs[j].number}:`);
                            const distance = calculateLevenshteinDistance(refs[i].text, refs[j].text);
                            const maxLength = Math.max(refs[i].text.length, refs[j].text.length);
                            const similarity = calculateSimilarityPercentage(distance, maxLength);
                            refs[i].similarity = refs[i].similarity || {};
                            refs[i].similarity[refs[j].id] = similarity;
                        }
                    }
                    duplicates.push({url, refs});
                }
            });

            debug("Number of duplicate sets found:", duplicates.length);
            debug("Duplicate sets:", duplicates);
            return duplicates;
        }

        function createCollapsibleTable(duplicateInfo) {
            const table = document.createElement('table');
            table.className = 'wikitable mw-collapsible duplicate-references-table';
            table.setAttribute('role', 'presentation');

            const tbody = document.createElement('tbody');
            table.appendChild(tbody);

            const headerRow = document.createElement('tr');
            const headerCell = document.createElement('td');
            headerCell.innerHTML = '<strong>Duplicate References</strong>';

            const toggleSpan = document.createElement('span');
            toggleSpan.className = 'mw-collapsible-toggle';
            toggleSpan.innerHTML = '[<a href="#" class="mw-collapsible-text">hide</a>]';
            headerCell.appendChild(toggleSpan);


            // Check if the {{Duplicated citations}} template is already present
            const duplicatedCitationsTemplate = document.querySelector('table.box-Duplicated_citations');

            // Only add the link if the template is not present
            if (!duplicatedCitationsTemplate) {

                // Add the "add {{duplicated citations}}" link to the header
                const addTemplateLink = document.createElement('a');
                addTemplateLink.textContent = ' add {{duplicated citations}} ';
                addTemplateLink.href = '#';
                addTemplateLink.addEventListener('click', function (e) {
                    e.preventDefault();
                    addDuplicateCitationsTemplate(this);
                });
                //headerCell.appendChild(document.createTextNode(' ['));
                headerCell.appendChild(addTemplateLink);
                //headerCell.appendChild(document.createTextNode(']'));
            }
            headerRow.appendChild(headerCell);
            tbody.appendChild(headerRow);

            const pageTitle = mw.config.get('wgPageName').replace(/_/g, ' ');

            duplicateInfo.forEach(({url, refs}) => {
                const row = document.createElement('tr');
                const cell = document.createElement('td');

                // Create report icon
                const reportIcon = document.createElement('a');
                reportIcon.href = `https://en.wikipedia.org/wiki/User_talk:Polygnotus?action=edit&section=new&preloadtitle=Reporting%20%5B%5BUser%3APolygnotus%2FDuplicateReferences%7CDuplicateReferences%5D%5D%20false-positive&preload=User:Polygnotus/$1&preloadparams%5b%5d=${encodeURIComponent(`[[${pageTitle}]] ${url}`)}%20~~~~`;
                reportIcon.innerHTML = '<img src="/media/wikipedia/commons/thumb/e/ef/Cross_CSS_Red.svg/15px-Cross_CSS_Red.svg.png" width="15" height="15" alt="Report false positive" title="Report false positive" />';
                reportIcon.style.marginRight = '5px';
                cell.appendChild(reportIcon);

                let urlLink = document.createElement('a');
                urlLink.href = url;
                urlLink.textContent = url;
                urlLink.target = "_blank";
                urlLink.rel = "noopener noreferrer";

                cell.appendChild(urlLink);
                cell.appendChild(document.createTextNode(' in refs: '));

                const originalRef = refs[0];
                refs.forEach((ref, index) => {
                    let link = document.createElement('a');
                    link.href = `#${ref.id}`;
                    link.textContent = ref.number;
                    cell.appendChild(link);

                    // Add similarity information
                    if (index > 0) {
                        const similarity = calculateSimilarityPercentage(
                            calculateLevenshteinDistance(originalRef.text, ref.text),
                            Math.max(originalRef.text.length, ref.text.length)
                        );
                        let similarityInfo = document.createElement('span');
                        similarityInfo.textContent = ` (${similarity})`;
                        cell.appendChild(similarityInfo);
                    }
                    link.addEventListener('mouseover', () => {
                        refs.forEach(r => {
                            const citationElement = document.getElementById(r.id);
                            if (citationElement) {
                                if (r.id === ref.id) {
                                    citationElement.classList.add('duplicate-citation-hover');
                                } else {
                                    citationElement.classList.add('duplicate-citation-highlight');
                                }
                            }
                        });
                    });
                    link.addEventListener('mouseout', () => {
                        refs.forEach(r => {
                            const citationElement = document.getElementById(r.id);
                            if (citationElement) {
                                citationElement.classList.remove('duplicate-citation-hover');
                                citationElement.classList.remove('duplicate-citation-highlight');
                            }
                        });
                    });

                    link.addEventListener('click', () => {
                        document.querySelectorAll('.duplicate-citation-clicked').forEach(el => {
                            el.classList.remove('duplicate-citation-clicked');
                        });
                        refs.forEach(r => {
                            const citationElement = document.getElementById(r.id);
                            if (citationElement) {
                                citationElement.classList.add('duplicate-citation-clicked');
                            }
                        });
                    });

                    if (index < refs.length - 1) {
                        cell.appendChild(document.createTextNode(', '));
                    }
                });

                row.appendChild(cell);
                tbody.appendChild(row);
            });

            return table;
        }

        function checkDuplicateReferenceLinks() {
            debug("Checking for duplicate reference links");
            const duplicateInfo = getDuplicateInfo();

            if (duplicateInfo.length > 0) {
                debug("Duplicates found, creating collapsible table");

                const table = createCollapsibleTable(duplicateInfo);
                containerDiv.after(table);

                // Set up collapsible functionality
                const toggleLink = table.querySelector('.mw-collapsible-toggle a');
                const tableBody = $(table).find('tr:not(:first-child)');
                const storageKey = 'duplicateReferencesTableState';

                function setTableState(isCollapsed) {
                    if (isCollapsed) {
                        tableBody.hide();
                        toggleLink.textContent = 'show';
                    } else {
                        tableBody.show();
                        toggleLink.textContent = 'hide';
                    }
                    localStorage.setItem(storageKey, isCollapsed);
                }

                // Initialize state from localStorage
                const initialState = localStorage.getItem(storageKey) === 'true';
                setTableState(initialState);

                toggleLink.addEventListener('click', function (e) {
                    e.preventDefault();
                    const isCurrentlyCollapsed = tableBody.is(':hidden');
                    setTableState(!isCurrentlyCollapsed);
                });
            } else {
                debug("No duplicates found");
            }
        }

        checkDuplicateReferenceLinks();
        debug("Script execution completed");
    });
});
// </nowiki>