Jump to content

User:SuperHamster/rsp-to-json.js

From Wikipedia, the free encyclopedia
The printable version is no longer supported and may have rendering errors. Please update your browser bookmarks and please use the default browser print function instead.
Note: After saving, you have to bypass your browser's cache to see the changes. Google Chrome, Firefox, Microsoft Edge and Safari: Hold down the ⇧ Shift key and click the Reload toolbar button. For details and instructions about other browsers, see Wikipedia:Bypass your cache.
/**
 * Parses the perennial sources table and generates a JSON structure for each source.
 * @param {string} tableClass - The class name of the table to parse.
 * @returns {Array<Object>} Array of source objects.
 */
function parsePerennialSourcesTable(tableClass) {
    const table = document.querySelector(`.${tableClass}`);
    const sources = [];

    if (table) {
        const rows = table.querySelectorAll("tbody > tr");

        rows.forEach((row, rowIndex) => {
            const cells = row.querySelectorAll("td");
            if (cells.length < 6) {
                return;
            }

            const sourceNameCell = cells[0];
            const statusCell = cells[1];
            const discussionCell = cells[2];
            const lastCell = cells[3];
            const summaryCell = cells[4];
            const domainsCell = cells[5];

            const source = {
                name: findSourceName(sourceNameCell),
                link: findSourceLink(sourceNameCell),
                shortcuts: findShortcuts(sourceNameCell),
                status: findStatus(statusCell),
                blacklisted: isBlacklisted(statusCell),
                discussions: parseDiscussions(discussionCell),
                lastDiscussed: lastCell.textContent.trim(),
                summary: summaryCell.textContent.trim(),
                summary_wikitext: convertHtmlToWikiMarkup(summaryCell),
                domains: findDomains(domainsCell),
            };

            sources.push(source);
        });
    } else {
        console.error(`[RSP-to-JSON] Table with class ${tableClass} not found`);
    }

    return sources;
}

/**
 * Checks if an element or any of its ancestors has a given class.
 * @param {Element} element - The DOM element to check.
 * @param {string} className - The class name to look for.
 * @returns {boolean} True if the class is found, false otherwise.
 */
function hasAncestorWithClass(element, className) {
    while (element) {
        if (element.classList && element.classList.contains(className)) return true;
        element = element.parentElement;
    }
    return false;
}

/**
 * Extracts the source name from a table cell.
 * @param {Element} cell - The table cell element.
 * @returns {string} The extracted source name.
 */
function findSourceName(cell) {
    function extractTextFromNode(node) {
        if (node.nodeType === Node.TEXT_NODE) {
            return node.textContent.trim();
        } else if (node.nodeType === Node.ELEMENT_NODE && (node.tagName === "A" || node.tagName === "I")) {
            return Array.from(node.childNodes).map(extractTextFromNode).join(" ").trim();
        }
        return "";
    }

    // Traverse child nodes to locate the source name and combine all text
    let sourceName = Array.from(cell.childNodes)
        .map(extractTextFromNode)
        .filter(text => text)
        .join(" ")
        .trim();

    return sourceName || "";
}

/**
 * Finds the main source link in a table cell, ignoring shortcut links.
 * @param {Element} cell - The table cell element.
 * @returns {string} The href of the main source link, or an empty string if not found.
 */
function findSourceLink(cell) {
    const linkElement = Array.from(cell.querySelectorAll("a")).find(link => !hasAncestorWithClass(link, "wp-rsp-sc"));
    return linkElement ? linkElement.href : "";
}

/**
 * Finds all shortcut links in a table cell.
 * @param {Element} cell - The table cell element.
 * @returns {Array<string>} Array of shortcut strings.
 */
function findShortcuts(cell) {
    const shortcuts = Array.from(cell.querySelectorAll(".wp-rsp-sc a")).map(anchor => anchor.textContent.trim());
    return shortcuts;
}

/**
 * Determines the status of a source from a table cell.
 * @param {Element} cell - The table cell element.
 * @returns {string} The status string (e.g., 'deprecated', 'generally reliable', etc.).
 */
function findStatus(cell) {
    anchors = cell.querySelectorAll('a');
    statuses = [];
    anchors.forEach(anchor => { 
        statuses.push(anchor.title.toLowerCase());
    });

    if (statuses.includes("deprecated")) return "deprecated";
    if (statuses.includes("generally reliable")) return "generally reliable";
    if (statuses.includes("generally unreliable")) return "generally unreliable";
    if (statuses.includes("no consensus")) return "no consensus";
    if (statuses.includes("blacklisted")) return "blacklisted";
    return "unknown";
}

/**
 * Checks if a source is blacklisted based on the cell content.
 * @param {Element} cell - The table cell element.
 * @returns {boolean} True if blacklisted, false otherwise.
 */
function isBlacklisted(cell) {
    const blacklisted = !!cell.querySelector("a[title='Blacklisted']");
    return blacklisted;
}

/**
 * Parses the discussions cell to extract discussion links and metadata.
 * @param {Element} cell - The table cell element.
 * @returns {Array<Object>} Array of discussion objects.
 */
function parseDiscussions(cell) {
    const discussions = [];

    const links = cell.querySelectorAll("a");
    links.forEach(link => {
        const typeIcon = link.previousElementSibling?.querySelector("img[alt]");
        const type = typeIcon ? typeIcon.getAttribute("alt") : "General";
        const discussionLink = link.getAttribute("href");

        // If cite-note, fetch the links from the corresponding citation note
        if (discussionLink && discussionLink.startsWith("#cite_note-")) {
            const noteId = discussionLink.replace("#", "");
            const citationLinks = parseCitationLinks(noteId);
            discussions.push(...citationLinks);
        } else {
            // Check that the link has text content
            // otherwise, it is likely an icon and can be skipped
            if (link.textContent.length) {
                discussions.push({
                    link: discussionLink.startsWith("/") ? `https://en.wikipedia.org${discussionLink}` : discussionLink,
                    type: type,
                    display: "inline",
                    label: link.textContent.trim()
                });
            }
        }
    });

    return discussions;
}

/**
 * Converts the HTML content of a cell to Wikipedia wikitext markup.
 * @param {Element} cell - The table cell element.
 * @returns {string} The wikitext representation of the cell's content.
 */
function convertHtmlToWikiMarkup(cell) {
    const wikiMarkup = Array.from(cell.childNodes).map(node => {
        if (node.nodeType === Node.ELEMENT_NODE) {
            if (node.tagName === "A") return `[[${node.getAttribute("href").replace("/wiki/", "")}|${node.textContent}]]`;
            if (node.tagName === "I") return `''${node.textContent}''`;
            if (node.tagName === "B") return `'''${node.textContent}'''`;
        }
        return node.textContent;
    }).join("");
    return wikiMarkup.trim();
}

/**
 * Extracts all domain strings from a domains cell.
 * @param {Element} cell - The table cell element.
 * @returns {Array<string>} Array of domain strings.
 */
function findDomains(cell) {
    const domains = Array.from(cell.querySelectorAll("a")).map(link => {
        const domainMatch = link.href.match(/insource:%22([^"]+)%22/);
        return domainMatch ? domainMatch[1] : "";
    }).filter(Boolean); // Remove empty entries
    return domains;
}

/**
 * Parses a citation note to extract discussion links and their context.
 * @param {string} noteId - The ID of the citation note element.
 * @returns {Array<Object>} Array of discussion objects from the citation note.
 */
function parseCitationLinks(noteId) {
    const citationLinks = [];
    const noteElement = document.getElementById(noteId);

    if (noteElement) {
        const referenceText = noteElement.querySelector(".reference-text");

        if (referenceText) {
            const links = Array.from(referenceText.querySelectorAll("a"));
            const contextMatches = [];
            let currentContext = "";
            let accumulatingContext = false;

            referenceText.childNodes.forEach(node => {
                // Most citation notes have a structure like "See these discussions of <source>:",
                // from which we want to extract those links to discussions,
                // so we check for the existence of " of ":
                if (node.nodeType === Node.TEXT_NODE && node.textContent.includes(" of ")) {
                    currentContext = "";
                    accumulatingContext = true;
                    let textAfterOf = node.textContent.split(" of ")[1] || "";

                    // Extract the content after the colon, if it exists
                    if (textAfterOf) {
                        const colonIndex = textAfterOf.indexOf(":");
                        if (colonIndex !== -1) {
                            currentContext = textAfterOf.slice(0, colonIndex).trim();
                            contextMatches.push({ context: currentContext.trim(), node });
                            accumulatingContext = false;
                        } else {
                            currentContext = textAfterOf.trim();
                        }
                    }

                    // Some citation notes have multiple text nodes,
                    // covering multiple contexts
                    // e.g. arXiv and bioRxiv
                    if (accumulatingContext) {
                        let nextNode = node.nextSibling;
                        while (nextNode && accumulatingContext) {
                            if (nextNode.nodeType === Node.TEXT_NODE) {
                                const colonIndex = nextNode.textContent.indexOf(":");
                                if (colonIndex !== -1) {
                                    currentContext += " " + nextNode.textContent.slice(0, colonIndex).trim();
                                    contextMatches.push({ context: currentContext.trim(), node: nextNode });
                                    accumulatingContext = false;
                                } else {
                                    currentContext += " " + nextNode.textContent.trim();
                                }
                            } else if (nextNode.nodeType === Node.ELEMENT_NODE && nextNode.tagName === "I") {
                                currentContext += " " + nextNode.textContent.trim();
                            }
                            nextNode = nextNode.nextSibling;
                        }
                    }
                }
            });

            const multipleContexts = contextMatches.length > 1;
            let currentContextIndex = 0;
            currentContext = contextMatches[currentContextIndex]?.context.trim() || "";

            links.forEach(link => {
                // Check that the link has text content
                // otherwise, it is likely an icon and can be skipped
                if (link.textContent.length) {
                    const nextContextNode = contextMatches[currentContextIndex + 1]?.node;
                    if (nextContextNode && link.compareDocumentPosition(nextContextNode) & Node.DOCUMENT_POSITION_PRECEDING) {
                        if (contextMatches[currentContextIndex + 1]) {
                            currentContextIndex++;
                            currentContext = contextMatches[currentContextIndex].context.trim();
                        }
                    }

                    const discussionLink = link.getAttribute("href");
                    let label = link.textContent.trim();

                    if (multipleContexts && currentContext) {
                        label += ` (${currentContext})`;
                    }

                    const typeIcon = link.previousElementSibling?.querySelector("img[alt]");
                    const type = typeIcon ? typeIcon.getAttribute("alt") : "General";

                    citationLinks.push({
                        link: discussionLink.startsWith("/") ? `https://en.wikipedia.org${discussionLink}` : discussionLink,
                        type: type,
                        display: "footnote",
                        label: label
                    });
                }
            });
        }
    } else {
        console.warn(`[RSP-to-JSON] No element found for citation note ID: ${noteId}`);
    }
    return citationLinks;
}

/**
 * Removes the 'discussions' field from each source object in the array.
 * @param {Array<Object>} sources - Array of source objects.
 * @returns {Array<Object>} New array with 'discussions' removed from each source.
 */
function filterOutDiscussions(sources) {
    return sources.map(source => {
        const { discussions, ...rest } = source;
        return rest;
    });
}

/**
 * Initializes the dropdown UI and handles copy-to-clipboard actions for the perennial sources table.
 */
function init() {
    const table = document.querySelector('.perennial-sources');

    if (!table) {
        return;
    }

    // Create container div for dropdown
    const container = document.createElement('div');
    container.style.float = 'right';
    container.style.marginBottom = '10px';
    container.style.marginTop = '10px';

    // Create select element
    const select = document.createElement('select');
    select.classList = 'cdx-select';
    select.style.padding = '8px';
    select.style.borderRadius = '2px';

    // Add default option
    const defaultOption = document.createElement('option');
    defaultOption.value = '';
    defaultOption.textContent = 'Copy JSON...';
    defaultOption.disabled = true;
    defaultOption.selected = true;
    select.appendChild(defaultOption);

    // Add copy options
    const options = [
        { value: 'with-discussions', text: 'Copy with discussions' },
        { value: 'without-discussions', text: 'Copy without discussions' }
    ];

    options.forEach(option => {
        const optElement = document.createElement('option');
        optElement.value = option.value;
        optElement.textContent = option.text;
        select.appendChild(optElement);
    });

    // Add elements to container
    container.appendChild(select);

    // Add documentation link below the select
    const docLink = document.createElement('a');
    docLink.href = 'https://en.wikipedia.org/wiki/User:SuperHamster/RSP-to-JSON';
    docLink.textContent = 'RSP-to-JSON Documentation';
    docLink.target = '_blank';
    docLink.style.display = 'block';
    docLink.style.fontSize = '11px';
    docLink.style.marginTop = '2px';
    docLink.style.color = '#3366cc';
    docLink.style.textDecoration = 'underline';
    docLink.style.textAlign = 'right';
    container.appendChild(docLink);

    // Clear float for table
    table.style.clear = 'both';
    
    // Insert container before table
    table.parentNode.insertBefore(container, table);

    select.addEventListener('change', async () => {
        try {
            let result = parsePerennialSourcesTable('perennial-sources');
            
            if (!result || result.length === 0) {
                console.error(`[RSP-to-JSON] Failed to produce JSON`);
                select.style.backgroundColor = '#f9dde9';
            } else {
                if (select.value === 'without-discussions') {
                    result = filterOutDiscussions(result);
                }
                
                await navigator.clipboard.writeText(JSON.stringify(result));
                select.style.backgroundColor = '#dbf3ec';
            }
        } catch (error) {
            console.error('Failed to copy JSON to clipboard:', error);
            select.style.backgroundColor = '#f9dde9';
        }
        
        // Reset select to default after 2 seconds
        setTimeout(() => {
            select.style.backgroundColor = '';
            select.value = '';
        }, 2000);
    });
}

if (document.readyState === 'loading') {
    document.addEventListener('DOMContentLoaded', init);
} else {
    init();
}