Jump to content

User:Vanderwaalforces/checkTranslationAttribution.js

From Wikipedia, the free encyclopedia
The printable version is no longer supported and may have rendering errors. Please update your browser bookmarks and please use the default browser print function instead.
Note: After saving, you have to bypass your browser's cache to see the changes. Google Chrome, Firefox, Microsoft Edge and Safari: Hold down the ⇧ Shift key and click the Reload toolbar button. For details and instructions about other browsers, see Wikipedia:Bypass your cache.
// @name        Wikipedia translation attribution checker
// @namespace   https://en.wikipedia.org/
// @version     1.5.1
// @description Checks if a page is a potential unattributed translation, has suspicious access dates, or was created using the ContentTranslation tool.
// @author      [[User:Vanderwaalforces]]
// @match       https://en.wikipedia.org/wiki/*
// @match       https://en.wikipedia.org/w/index.php?title=*

(function() {
    'use strict';

    // Function to load the external CSS file
    function loadCSS(href, id = '') {
        const link = document.createElement('link');
        link.rel = 'stylesheet';
        link.type = 'text/css';
        link.href = href;
        if (id) link.id = id; // Optional ID for the link element
        document.head.appendChild(link);
    }

    // Load the default CSS first
    loadCSS('/w/index.php?title=User:Vanderwaalforces/checkTranslationAttribution.css&action=raw&ctype=text/css', 'defaultCTAStyles');
    
    // Configurable margin in days for offline work
    const OFFLINE_WORK_MARGIN_DAYS = 7;

    // Ensure the script only runs in mainspace (0) or draftspace (118)
    const namespace = mw.config.get('wgNamespaceNumber');
    if (namespace !== 0 && namespace !== 118) return;

    const apiUrl = "https://en.wikipedia.org/w/api.php";
    const pageTitle = mw.config.get('wgPageName');
    const talkPageTitle = "Talk:" + pageTitle;

    // List of valid two-, three-, five- and six-letter language codes for interwiki links
	const validLanguageCodes = [
    	'aa', 'ab', 'ace', 'ady', 'af', 'ak', 'als', 'alt', 'am', 'ami', 'an', 'ang', 'ar', 'arc', 'ary', 'arz', 'as', 'ast',
    	'atj', 'av', 'avk', 'awa', 'ay', 'az', 'azb', 'ba', 'ban', 'bar', 'bat-smg', 'bcl', 'be', 'be-tarask', 'be-x-old',
    	'bg', 'bh', 'bi', 'bjn', 'blk', 'bm', 'bn', 'bo', 'bpy', 'br', 'bs', 'bug', 'bxr', 'ca', 'cbk-zam', 'cdo', 'ce',
    	'ceb', 'ch', 'cho', 'chr', 'chy', 'ckb', 'co', 'cr', 'crh', 'cs', 'csb', 'cu', 'cv', 'cy', 'da', 'dag', 'de', 'din',
    	'diq', 'dsb', 'dty', 'dv', 'dz', 'ee', 'el', 'eml', 'eo', 'es', 'et', 'eu', 'ext', 'fa', 'ff', 'fi', 'fiu-vro',
    	'fj', 'fo', 'fr', 'frp', 'frr', 'fur', 'fy', 'ga', 'gag', 'gan', 'gcr', 'gd', 'gl', 'glk', 'gn', 'gom', 'gor', 'got',
    	'gu', 'guw', 'gv', 'ha', 'hak', 'haw', 'he', 'hi', 'hif', 'ho', 'hr', 'hsb', 'ht', 'hu', 'hy', 'hyw', 'hz', 'ia',
    	'id', 'ie', 'ig', 'ii', 'ik', 'ilo', 'inh', 'io', 'is', 'it', 'iu', 'ja', 'jam', 'jbo', 'jv', 'ka', 'kaa', 'kab',
    	'kbd', 'kbp', 'kcg', 'kg', 'ki', 'kj', 'kk', 'kl', 'km', 'kn', 'ko', 'koi', 'kr', 'krc', 'ks', 'ksh', 'ku', 'kv',
    	'kw', 'ky', 'la', 'lad', 'lb', 'lbe', 'lez', 'lfn', 'lg', 'li', 'lij', 'lld', 'lmo', 'ln', 'lo', 'lrc', 'lt', 'ltg',
    	'lv', 'mad', 'mai', 'map-bms', 'mdf', 'mg', 'mh', 'mhr', 'mi', 'min', 'mk', 'ml', 'mn', 'mni', 'mnw', 'mo', 'mr',
    	'mrj', 'ms', 'mt', 'mus', 'mwl', 'my', 'myv', 'mzn', 'na', 'nah', 'nap', 'nds', 'nds-nl', 'ne', 'new', 'ng', 'nia',
    	'nl', 'nn', 'no', 'nov', 'nqo', 'nrm', 'nso', 'nv', 'ny', 'oc', 'olo', 'om', 'or', 'os', 'pa', 'pag', 'pam', 'pap',
    	'pcd', 'pcm', 'pdc', 'pfl', 'pi', 'pih', 'pl', 'pms', 'pnb', 'pnt', 'ps', 'pt', 'pwn', 'qu', 'rm', 'rmy', 'rn', 'ro',
		'roa-rup', 'roa-tara', 'ru', 'rue', 'rw', 'sa', 'sah', 'sat', 'sc', 'scn', 'sco', 'sd', 'se', 'sg', 'sh', 'shi',
    	'shn', 'shy', 'si', 'simple', 'sk', 'skr', 'sl', 'sm', 'smn', 'sn', 'so', 'sq', 'sr', 'srn', 'ss', 'st', 'stq', 'su',
    	'sv', 'sw', 'szl', 'szy', 'ta', 'tay', 'tcy', 'te', 'tet', 'tg', 'th', 'ti', 'tk', 'tl', 'tn', 'to', 'tpi', 'tr',
    	'trv', 'ts', 'tt', 'tum', 'tw', 'ty', 'tyv', 'udm', 'ug', 'uk', 'ur', 'uz', 've', 'vec', 'vep', 'vi', 'vls', 'vo',
    	'wa', 'war', 'wo', 'wuu', 'xal', 'xh', 'xmf', 'yi', 'yo', 'yue', 'za', 'zea', 'zh', 'zh-classical', 'zh-min-nan',
    	'zh-yue', 'zu'
	];

    // Function to fetch edit summaries and the first revision date (first 100 revisions)
    function fetchEditSummaries() {
        return new Promise((resolve, reject) => {
            $.ajax({
                url: apiUrl,
                data: {
                    action: "query",
                    format: "json",
                    prop: "revisions",
                    titles: pageTitle,
                    rvprop: "comment|timestamp",
                    rvlimit: 100,
                    origin: "*"
                },
                success: function(data) {
                    const pages = data.query.pages;
                    const revisions = pages[Object.keys(pages)[0]].revisions;
                    const firstRevisionDate = revisions[revisions.length - 1].timestamp;
                    const comments = revisions.map(rev => rev.comment);

                    // Log the fetched comments and first revision date for debugging
                    console.log("Fetched edit summaries:", comments);
                    console.log("First revision date (from 100 revisions):", firstRevisionDate);

                    resolve({ comments, firstRevisionDate });
                },
                error: function(err) {
                    reject(err);
                }
            });
        });
    }

    // Function to fetch the very first revision of the article (creation edit)
    function fetchFirstRevision() {
        return new Promise((resolve, reject) => {
            $.ajax({
                url: apiUrl,
                data: {
                    action: "query",
                    format: "json",
                    prop: "revisions",
                    titles: pageTitle,
                    rvprop: "comment|tags|timestamp",  // Include timestamp to get the creation date
                    rvdir: "newer",  // Fetch from the oldest revision
                    rvlimit: 1,      // Only fetch the first revision
                    origin: "*"
                },
                success: function(data) {
                    const pages = data.query.pages;
                    const revisions = pages[Object.keys(pages)[0]].revisions;
                    const firstRevision = revisions[0]; // Get the first revision
                    console.log("First revision fetched:", firstRevision);
                    resolve(firstRevision);  // Now includes the creation timestamp
                },
                error: function(err) {
                    reject(err);
                }
            });
        });
    }

    // Function to fetch article wikitext for citation checks
    function fetchWikitext() {
        return new Promise((resolve, reject) => {
            $.ajax({
                url: apiUrl,
                data: {
                    action: "query",
                    format: "json",
                    prop: "revisions",
                    titles: pageTitle,
                    rvprop: "content",
                    origin: "*"
                },
                success: function(data) {
                    const pages = data.query.pages;
                    const pageData = pages[Object.keys(pages)[0]];
                    if (pageData.revisions && pageData.revisions[0]) {
                        const wikitext = pageData.revisions[0]['*'];

                        // Log the wikitext for debugging
                        console.log("Fetched wikitext:", wikitext);

                        resolve(wikitext);
                    } else {
                        console.log("No wikitext found");
                        resolve(null);  // Return null if wikitext is missing
                    }
                },
                error: function(err) {
                    reject(err);
                }
            });
        });
    }

    // Function to check if talk page contains the word "translat"
    function fetchTalkPageContent() {
        return new Promise((resolve, reject) => {
            $.ajax({
                url: apiUrl,
                data: {
                    action: "query",
                    format: "json",
                    prop: "revisions",
                    titles: talkPageTitle,
                    rvprop: "content",
                    origin: "*"
                },
                success: function(data) {
                    const pages = data.query.pages;
                    const revisions = pages[Object.keys(pages)[0]].revisions;
                    if (revisions && revisions[0] && revisions[0]['*']) {
                        const talkPageContent = revisions[0]['*'].toLowerCase();
                        
                        // Log the talk page content for debugging
                        console.log("Fetched talk page content:", talkPageContent);

                        resolve(talkPageContent.includes("translat"));
                    } else {
                        resolve(false);
                    }
                },
                error: function(err) {
                    reject(err);
                }
            });
        });
    }

    // Helper function to check if a comment contains a valid interwiki link
    function containsInterwikiLink(comment) {
        const interwikiRegex = new RegExp(`\\b(${validLanguageCodes.join('|')}):`, 'i');
        return interwikiRegex.test(comment);
    }

    // Helper function to check if an edit summary contains both "translat" or "import" and "from"
    function containsTranslationKeywords(comment) {
        const lowerComment = comment.toLowerCase();
        const hasTranslatAndFrom = lowerComment.includes("translat") && lowerComment.includes("from");
        const hasImportedAndFrom = lowerComment.includes("imported") && lowerComment.includes("from");
        const hasImportingAndFrom = lowerComment.includes("importing") && lowerComment.includes("from");

        // Log the status of each summary's keyword check
        console.log(`Summary: ${comment}, Has translat + from: ${hasTranslatAndFrom}, Has imported + from: ${hasImportedAndFrom}, Has importing + from: ${hasImportingAndFrom}`);

        return hasTranslatAndFrom || hasImportedAndFrom || hasImportingAndFrom;
    }

    // Function to classify edit summaries based on the refined algorithm
    function classifyEditSummaries(editSummaries) {
        let hasTranslatNoInterwiki = false;
        let hasTranslatWithInterwiki = false;

        editSummaries.forEach(summary => {
            if (containsTranslationKeywords(summary)) {
                const hasInterwiki = containsInterwikiLink(summary);
                if (hasInterwiki) {
                    hasTranslatWithInterwiki = true;
                } else {
                    hasTranslatNoInterwiki = true;
                }
            }
        });

        return { hasTranslatNoInterwiki, hasTranslatWithInterwiki };
    }

    // Improved date regex to handle various date formats
    function parseDate(dateString) {
        const regexes = [
            /\b(\d{1,2})\s(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]*\s(\d{4})\b/,  // e.g. "21 January 2024"
            /\b(\d{4})-(\d{2})-(\d{2})\b/,  // e.g. "2024-01-21"
            /\b(\d{1,2})-(\d{1,2})-(\d{4})\b/,  // e.g. "21-01-2024"
            /\b(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s(\d{1,2}),?\s(\d{4})\b/  // e.g. "January 21, 2024"
        ];

        for (const regex of regexes) {
            const match = regex.exec(dateString);
            if (match) {
                return new Date(match[0]);
            }
        }
        return null;  // Return null if no valid date format is found
    }

    function addDays(date, days) {
        const result = new Date(date);
        result.setDate(result.getDate() + days);
        return result;
    }
    
    // Function to check for suspicious access dates in the article's wikitext
    function checkSuspiciousAccessDates(wikitext, firstRevisionDate) {
        if (!wikitext) {
            console.log("No wikitext to check for access dates");
            return false;
        }

        const accessDateRegex = /\|\s*access[- ]date\s*=\s*([A-Za-z0-9, -]+)/g;
        const matches = [...wikitext.matchAll(accessDateRegex)];
        const suspiciousDates = [];

        const firstRevisionParsed = new Date(firstRevisionDate);
        const marginDate = addDays(firstRevisionParsed, -OFFLINE_WORK_MARGIN_DAYS);

        matches.forEach(match => {
            const accessDate = match[1];
            const accessDateParsed = parseDate(accessDate);
            if (accessDateParsed) {
                if (accessDateParsed < marginDate) {
                    suspiciousDates.push(accessDate);
                }
            }
        });

		// Log any suspicious dates for debugging
        console.log("Suspicious access dates:", suspiciousDates);
        return suspiciousDates.length > 0;
    }

    // Function to display a message before the #contentSub element, with a dismiss button in the top-right corner
    function displayMessage(text, className) {
        const messageDiv = document.createElement("div");
        messageDiv.className = `${className} cTA_banner`; // Add a general class and the specific type
        messageDiv.innerHTML = text;  // Use innerHTML for parsing link elements

        const docLink = document.createElement("a");
        docLink.className = "cTA_docLink"; 
        docLink.href = "https://en.wikipedia.org/wiki/User:Vanderwaalforces/checkTranslationAttribution";
        docLink.target = "_blank";
        docLink.title = "Help";
        docLink.style.color = "#ffffff";
        docLink.innerHTML = '&#9432;';  // Info symbol
        messageDiv.appendChild(docLink);

        // Add dismiss button at the very top-right corner
        const dismissButton = document.createElement("button");
        dismissButton.className = "cTA_dismiss"; // Use a class for the button as well
        dismissButton.innerHTML = "&cross;"; // ✗ symbol
        dismissButton.title = "Close";
        dismissButton.onclick = () => {
            messageDiv.style.display = "none";
        };
        messageDiv.appendChild(dismissButton);

        // Insert the message before the #contentSub element to make it compatible with all skins
        document.querySelector('#contentSub').insertAdjacentElement('beforebegin', messageDiv);
    }

    // Main logic
    async function checkTranslationAttribution() {
        try {
            // Fetch the first revision to check for ContentTranslation tool and creation date
            const firstRevision = await fetchFirstRevision();
            const firstComment = firstRevision.comment;
            const firstTags = firstRevision.tags;
            const firstRevisionDate = firstRevision.timestamp;  // Fetch the timestamp of the first revision
            
            // Define the constants for links to WP:TFOLWP and the Translated from template
        	const wpShortcutLink = mw.html.element('a', {
            	href: '/wiki/WP:TFOLWP',
            	target: '_blank',
            	class: 'cTA_link_scut'
        	}, 'WP:TFOLWP');
        
        	const templateLink = mw.html.element('a', {
            	href: '/wiki/Template:Translated_from',
            	target: '_blank',
            	class: 'cTA_link_tplt'
        	}, '{{Translated from}}'); 

            // If the first edit used the ContentTranslation tool, display the green banner and skip other checks
            if (firstComment.includes("Created by translating the page") && firstTags.includes("contenttranslation")) {
                const contentTranslationLink = mw.html.element('a', {
                    href: 'https://www.mediawiki.org/wiki/Special:MyLanguage/Content_translation',
                    target: '_blank',
                    class: 'cTA_link_ctl'
                }, 'ContentTranslation');
                
                displayMessage(
                    `This article was created using the ${contentTranslationLink} module and is correctly attributed to the source Wikipedia.`,
                    "cTA_info_talk"
                );
                return; // Skip all other checks if ContentTranslation tool was used
            }

            // Continue with other checks for suspicious access dates and translation attribution
            const { comments } = await fetchEditSummaries();  // Use only for edit summaries now
            const { hasTranslatNoInterwiki, hasTranslatWithInterwiki } = classifyEditSummaries(comments);

            const wikitext = await fetchWikitext();
            const hasSuspiciousAccessDates = checkSuspiciousAccessDates(wikitext, firstRevisionDate);  // Use actual creation date

            // Display singular orange banner if there are suspicious access dates (no translation detected)
            if (hasSuspiciousAccessDates && !hasTranslatNoInterwiki && !hasTranslatWithInterwiki) {
                displayMessage(
                    "Warning: There are citations in this article that have access dates from before the article was created. This suggests the article may have been copy-pasted from somewhere.",
                    "cTA_warn_date"
                );
                return; // Only show the orange banner and stop further checks
            }

            // If suspicious dates are found and the article is correctly attributed
            if (hasSuspiciousAccessDates && hasTranslatWithInterwiki) {
                displayMessage(
                    "Notice: Despite some citations having access dates before the article's creation, indicating possible copy-pasting or interwiki translation, proper attribution has been given.",
                    "cTA_info_date"
                );
            }
            // If suspicious dates are found and no proper attribution
            else if (hasSuspiciousAccessDates && hasTranslatNoInterwiki) {
                displayMessage(
                    `Warning: This article is likely an unattributed translation. Please see ${wpShortcutLink} for proper attribution, and consider adding ${templateLink} to the talk page.`,
                    "cTA_warn_unattr"
                );
                displayMessage(
                    "Warning: There are citations in this article that have access dates from before the article was created. This suggests the article may have been copy-pasted from somewhere.",
                    "cTA_warn_date"
                );
            }
            // If there are no suspicious dates and the article is correctly attributed
            else if (!hasSuspiciousAccessDates && hasTranslatWithInterwiki) {
                const hasTranslatInTalkPage = await fetchTalkPageContent();
                if (!hasTranslatInTalkPage) {
                    displayMessage(
                        `Notice: This translated article has been correctly attributed. Consider optionally adding ${templateLink} to the talk page.`,
                        "cTA_info_talk"
                    );
                } else {
                    displayMessage(
                        "Notice: This translated article has been correctly attributed.",
                        "cTA_info_talk1"
                    );
                }
            }
            // If there are no suspicious dates and no proper attribution
            else if (!hasSuspiciousAccessDates && hasTranslatNoInterwiki) {
                displayMessage(
                    `Warning: This article is likely an unattributed translation. Please see ${wpShortcutLink} for proper attribution, and consider adding ${templateLink} to the talk page.`,
                    "cTA_warn_unattr"
                );
            }
        } catch (error) {
            console.error("Error checking translation attribution:", error);
        }
    }

    // Run the check
    checkTranslationAttribution();
})();