Jump to content

User:Bunnypranav/archiver.js

From Wikipedia, the free encyclopedia
This is the current revision of this page, as edited by Bunnypranav (talk | contribs) at 12:23, 25 December 2024 (create fork with changes being marked as minor). The present address (URL) is a permanent link to this version.
(diff) ← Previous revision | Latest revision (diff) | Newer revision → (diff)
Note: After saving, you have to bypass your browser's cache to see the changes. Google Chrome, Firefox, Microsoft Edge and Safari: Hold down the ⇧ Shift key and click the Reload toolbar button. For details and instructions about other browsers, see Wikipedia:Bypass your cache.
/*
 * <nowiki>
 * This script is a fork of https://en.wikipedia.org/w/index.php?title=User:Andrybak/Scripts/Archiver.js&oldid=1248814478
 * which was forked from https://en.wikipedia.org/w/index.php?title=User:Enterprisey/archiver.js&oldid=1113588553
 * which was forked from https://en.wikipedia.org/w/index.php?title=User:%CE%A3/Testing_facility/Archiver.js&oldid=1003561411
 */

/*
 * Documentation of CSS classes.
 *
 * .arky-span is the main custom class of the script.
 * Inside a .arky-span is an archive link, which triggers selection.
 * .arky-span tags also store data (not visible in the UI) associated with
 * corresponding sections: the index of the section and heading level
 * (i.e. ==2==, ===3===, etc)
 * Tags with .arky-span class are also called "archive spans".
 *
 * .arky-selected-section is put onto the whole semantic heading
 * of a section, selected by user for archiving.
 * During selection the class is used for styling (a light-blue background).
 * After clicking "archive ... selected threads" the class is used to
 * find all the archive spans, which live inside the semantic heading.
 */

const USERSCRIPT_NAME = "Archiver";

function notifyUser(messageText, important) {
  let message = messageText;
  if (important) {
    const img = document.createElement("img");
    img.width = 20;
    img.height = 20;
    // [[File:Ambox important.svg]]
    img.src =
      "/media/wikipedia/en/thumb/b/b4/Ambox_important.svg/40px-Ambox_important.svg.png";
    const span = document.createElement("span");
    span.append(img, " ", messageText);
    message = span;
  }
  mw.notify(message, {
    title: USERSCRIPT_NAME,
    autoHide: false,
  });
}

const LOG_PREFIX = `[${USERSCRIPT_NAME}]:`;

function error(...toLog) {
  console.error(LOG_PREFIX, ...toLog);
}

function warn(...toLog) {
  console.warn(LOG_PREFIX, ...toLog);
}

function info(...toLog) {
  console.info(LOG_PREFIX, ...toLog);
}

function debug(...toLog) {
  console.debug(LOG_PREFIX, ...toLog);
}

function constructEditSummary(mainEditSummary) {
  return `${mainEditSummary}`;
}

$.when(mw.loader.using(["mediawiki.util", "mediawiki.api"]), $.ready).done(
  function () {
    /*
     * Reference documentation about keys and values in mw.config:
     * https://www.mediawiki.org/wiki/Manual:Interface/JavaScript#mw.config
     */
    if (!mw.config.get("wgIsArticle")) {
      // This variable is badly named -- it is not related to a page being a main namespace "article".
      info("Not a wiki page. Aborting.");
      return;
    }
    if (
      mw.config.get("wgArticleId") === 0 ||
      mw.config.get("wgRevisionId") === 0
    ) {
      info(
        "Either the page does not exist yet or it is a diffonly=yes view. Aborting."
      );
      return;
    }
    if (
      mw.config.get("wgNamespaceNumber") % 2 == 0 &&
      mw.config.get("wgNamespaceNumber") != 4
    ) {
      // not a talk page and not project namespace
      info("Not a discussion namespace. Aborting.");
      return;
    }
    if (mw.config.get("wgNamespaceNumber") == -1) {
      // is a special page
      info('This is a "Special:" page. Aborting.');
      return;
    }
    const parserOutput = document.querySelector(
      "#mw-content-text .mw-parser-output"
    );
    if (!parserOutput || $(parserOutput).find(":header").length === 0) {
      info("Nothing to archive here. Aborting.");
      return;
    }
    if (
      mw.config.get("wgDiffNewId") != null ||
      mw.config.get("wgDiffOldId") != null
    ) {
      info("Detected diff view. Aborting.");
      return;
    }

    mw.util.addCSS(
      ".arky-selected-section { background-color: color-mix(in srgb, var(--background-color-progressive-subtle, #D9E9FF) 90%, var(--background-color-progressive--hover, #D9E9FF)); }" +
        ".arky-selected-section .arky-span a { font-weight:bold }"
    );

    var sectionCodepointOffsets = new Object();
    var wikiText = "";
    var revStamp; // The timestamp when we originally got the page contents - we pass it to the "edit" API call for edit conflict detection

    var portletLink = mw.util.addPortletLink(
      "p-cactions",
      "#",
      "Archive",
      "ca-oecaAndrybak",
      "Enter/exit the archival process",
      null,
      null
    );
    var archiveButton = $(document.createElement("button"));
    let highestArchiveSubpagePromise = null;
    $(portletLink).click(function (e) {
      $(".arky-selected-section").removeClass(".arky-selected-section");
      $(".arky-span").toggle();
      archiveButton.toggle();
      if (highestArchiveSubpagePromise == null) {
        /*
         * Start searching for the archive subpage with highest number immediately.
         * Then the click listener on `archiveButton` will wait for this `Promise`.
         */
        highestArchiveSubpagePromise = findHighestArchiveSubpage();
      } else {
        // TODO: if "Loading..." was already shown to the user via the button, we need to reset the text here.
      }
    });

    archiveButton
      .html("Archive all the selected threads")
      .attr("id", "arky-archive-button")
      .css("position", "sticky")
      .css("bottom", 0)
      .css("width", "100%")
      .css("font-size", "200%");
    $(document.body).append(archiveButton);
    archiveButton.toggle();
    archiveButton.click(function (e) {
      var selectedSections = $(".arky-selected-section .arky-span")
        .map(function () {
          return $(this).data("section");
        })
        .toArray();
      if (selectedSections.length === 0) {
        return alert("No threads selected, aborting");
      }
      const timeoutId = setTimeout(() => {
        /*
         * In case highestArchiveSubpagePromise is taking a long time,
         * show to the user that stuff is happening.
         */
        archiveButton.text("Loading...");
      }, 1000);
      highestArchiveSubpagePromise.then(
        (result) => {
          clearTimeout(timeoutId);
          info("Successful highestArchiveSubpagePromise:", result);
          doArchive(selectedSections, result);
        },
        (rejection) => {
          info("Failed highestArchiveSubpagePromise:", rejection);
          const currentPageName = mw.config.get("wgPageName");
          doArchive(
            selectedSections,
            archiveSpacedSubpageName(currentPageName, "???")
          );
        }
      );
    }); // end of archiveButton click handler

    addArchiveLinks();

    function midPoint(lower, upper) {
      return Math.floor(lower + (upper - lower) / 2);
    }

    /*
     * Based on https://en.wikipedia.org/wiki/Module:Exponential_search
     */
    async function exponentialSearch(testFunc, i, lower, upper) {
      if (await testFunc(i)) {
        if (i + 1 == upper) {
          return i;
        }
        lower = i;
        if (upper) {
          i = midPoint(lower, upper);
        } else {
          i = i * 2;
        }
        return exponentialSearch(testFunc, i, lower, upper);
      } else {
        upper = i;
        i = midPoint(lower, upper);
        return exponentialSearch(testFunc, i, lower, upper);
      }
    }

    function archiveSpacedSubpageName(pageName, archiveNumber) {
      return pageName + "/Archive " + archiveNumber;
    }

    function archiveSlashedSubpageName(pageName, archiveNumber) {
      return pageName + "/Archive/" + archiveNumber;
    }

    /*
     * Based on https://en.wikipedia.org/wiki/Wikipedia_talk:User_scripts/Archive_7#nocreate-missing
     */
    async function pageExists(title) {
      const api = new mw.Api();
      const response = await api.get({
        action: "query",
        format: "json",
        titles: title,
      });
      const missing = "missing" in Object.values(response.query.pages)[0];
      return !missing;
    }

    /*
     * Find the subpage of this page, which will be used as destination/target of archiving.
     * It is just "Archive 1" by default, but can be increased by exponentialSearch.
     */
    function findHighestArchiveSubpage() {
      info("findHighestArchiveSubpage: start");
      // mw.config.get("wgPageName")
      return new Promise(async (resolve, reject) => {
        try {
          const currentPageName = mw.config.get("wgPageName");
          const currentYear = new Date().getUTCFullYear();
          let subpageFunc;
          /*
           * Check if "current year" subpage is a good candidate for
           * pages with https://en.wikipedia.org/wiki/Template:Archived_annually
           * TODO: maybe implement checking if {{Archived annually}} is transcluded.
           */
          if (
            (await pageExists(
              archiveSpacedSubpageName(currentPageName, currentYear - 1)
            )) &&
            !(await pageExists(
              archiveSpacedSubpageName(currentPageName, currentYear + 1)
            ))
          ) {
            resolve(archiveSpacedSubpageName(currentPageName, currentYear));
            return;
          } else if (
            await pageExists(archiveSpacedSubpageName(currentPageName, 1))
          ) {
            subpageFunc = archiveSpacedSubpageName;
          } else if (
            await pageExists(archiveSlashedSubpageName(currentPageName, 1))
          ) {
            subpageFunc = archiveSlashedSubpageName;
          } else {
            notifyUser("Cannot find the first archive subpage", true);
            info("Assuming zero archive subpages.");
            resolve(archiveSpacedSubpageName(currentPageName, 1));
            return;
          }

          async function checkArchiveSubpageExists(archiveNumber) {
            const archiveSubpageTitle = subpageFunc(
              currentPageName,
              archiveNumber
            );
            return pageExists(archiveSubpageTitle);
          }
          // see also https://en.wikipedia.org/wiki/Module:Highest_archive_number
          const highestNumber = await exponentialSearch(
            checkArchiveSubpageExists,
            10,
            1,
            null
          );
          const highestArchiveSubpage = subpageFunc(
            currentPageName,
            highestNumber
          );
          resolve(highestArchiveSubpage);
        } catch (e) {
          const msg = "Cannot find archive subpage with the highest number";
          error(msg, e);
          notifyUser(msg, true);
          reject(e);
        }
      });
    }

    function doArchive(selectedSections, highestArchiveSubpage) {
      // returns `s` without the substring starting at `start` and ending at `end`
      function cut(s, start, end) {
        return s.substr(0, start) + s.substring(end);
      }

      const archivePageName = prompt(
        "Archiving " +
          selectedSections.length +
          " threads: where should we move them to? The latest archive number seems to be:",
        highestArchiveSubpage
      );
      if (!archivePageName || archivePageName == mw.config.get("wgPageName")) {
        return alert("No archive target selected, aborting");
      }

      // codepointToUtf16Idx maps codepoint idx (i.e. MediaWiki index into page text) to utf-16 idx (i.e. JavaScript index into wikiText)
      var codepointToUtf16Idx = {};

      // Initialize "important" (= either a section start or end) values to 0
      selectedSections.forEach(function (n) {
        codepointToUtf16Idx[sectionCodepointOffsets[n].start] = 0;
        codepointToUtf16Idx[sectionCodepointOffsets[n].end] = 0;
      });
      codepointToUtf16Idx[Infinity] = Infinity; // Because sometimes we'll have Infinity as an "end" value

      // fill in our mapping from codepoints (MediaWiki indices) to utf-16 (i.e. JavaScript).
      // yes, this loops through every character in the wikitext. very unfortunate.
      var codepointPos = 0;
      for (
        var utf16Pos = 0;
        utf16Pos < wikiText.length;
        utf16Pos++, codepointPos++
      ) {
        if (codepointToUtf16Idx.hasOwnProperty(codepointPos)) {
          codepointToUtf16Idx[codepointPos] = utf16Pos;
        }

        if (
          0xd800 <= wikiText.charCodeAt(utf16Pos) &&
          wikiText.charCodeAt(utf16Pos) <= 0xdbff
        ) {
          // high surrogate! utf16Pos goes up by 2, but codepointPos goes up by only 1.
          utf16Pos++; // skip the low surrogate
        }
      }

      var newTextForArchivePage = selectedSections
        .map(function (n) {
          return wikiText.substring(
            codepointToUtf16Idx[sectionCodepointOffsets[n].start],
            codepointToUtf16Idx[sectionCodepointOffsets[n].end]
          );
        })
        .join("");

      selectedSections.reverse(); // go in reverse order so that we don't invalidate the offsets of earlier sections
      var newWikiText = wikiText;
      selectedSections.forEach(function (n) {
        newWikiText = cut(
          newWikiText,
          codepointToUtf16Idx[sectionCodepointOffsets[n].start],
          codepointToUtf16Idx[sectionCodepointOffsets[n].end]
        );
      });

      info("archive this:" + newTextForArchivePage);
      info("revised page:" + newWikiText);
      var pluralizedThreads =
        selectedSections.length +
        " thread" +
        (selectedSections.length === 1 ? "" : "s");
      new mw.Api()
        .postWithToken("csrf", {
          action: "edit",
          minor: 1,
          title: mw.config.get("wgPageName"),
          text: newWikiText,
          summary: constructEditSummary(
            `Archiving ${pluralizedThreads}, will be on [[${archivePageName}]]`
          ),
          basetimestamp: revStamp,
          starttimestamp: revStamp,
        })
        .done(function (res1) {
          alert("Successfully removed threads from talk page");
          info(res1);
          new mw.Api()
            .postWithToken("csrf", {
              action: "edit",
              minor: 1,
              title: archivePageName,
              appendtext: "\n" + newTextForArchivePage,
              summary: constructEditSummary(
                `Archving ${pluralizedThreads} from [[${mw.config.get(
                  "wgPageName"
                )}]]`
              ),
            })
            .done(() => alert("Successfully added threads to archive page"))
            .fail(() =>
              alert(
                "Failed to add threads to archive page. Manual inspection needed."
              )
            )
            .always(function (res2) {
              info(res2);
              window.location.reload();
            });
        })
        .fail(function (res1) {
          alert(
            "Failed to remove threads from talk page. Aborting archive process."
          );
          error(res1);
          window.location.reload();
        });
    } // end of doArchive()

    /*
     * Filters the result of the API query.
     * Plus, importantly, populates the global variable `sectionCodepointOffsets`.
     */
    function extractValidSections(apiResultSections) {
      const validSections = {};

      // generate the list/array
      $(apiResultSections)
        // For sections transcluded from other pages, s.index will look
        // like T-1 instead of just 1. Remove those.
        .filter((i, s) => {
          return s.index == parseInt(s.index);
        })
        .each((i, s) => {
          validSections[s.index] = s;
        });

      // record the offsets in the global variable
      for (var i in validSections) {
        i = parseInt(i);
        // What MediaWiki calls "byteoffset" is actually a codepoint offset!! Drat!!
        sectionCodepointOffsets[i] = {
          start: validSections[i].byteoffset,
          end: validSections.hasOwnProperty(i + 1)
            ? validSections[i + 1].byteoffset
            : Infinity,
        };
      }

      return validSections;
    }

    /*
     * The convoluted way of "header" vs "headerContainer" is needed, because
     * there are different HTML layouts for "headings" in different skins.
     * In Vector 2022, layout of ==Second level== versus ===Third level===
     * headings is different even for a _single_ skin.
     *
     * The HTML layout is either
     *    <div><h*></h*><edit section link /></div>
     * or
     *    <h*><edit section link /></h*>
     *
     * "headerContainer" is always the outer of the tags, it always contains the <edit section link /> tags.
     * "header" is always one of the <h*> tags.
     * Meaning that in some cases "header" and "headContainer" is the same HTML element.
     *
     * arky-span, aka archiveSpans are put inside the "<edit section link />".
     *
     * For details, see:
     *   - https://www.mediawiki.org/w/index.php?title=Heading_HTML_changes&oldid=6538029
     *   - https://en.wikipedia.org/wiki/Wikipedia:Village_pump_(technical)/Archive_213#Tech_News_%E2%80%93_User%3AEnterprisey%2Farchiver.js
     */

    // Returns a plain HTMLElement
    function findEditSectionForHeader(header) {
      // in Vector, the bracketed [edit] section link is a direct child element/node
      const maybeVectorEditSection = header.querySelector(".mw-editsection");
      if (maybeVectorEditSection) {
        return maybeVectorEditSection;
      }
      // in other skins, the bracketed [edit] section link is a sibling of <h2> <h3> etc
      if (header.parentElement.classList.contains("mw-heading")) {
        const maybeEditSection =
          header.parentElement.querySelector(".mw-editsection");
        if (maybeEditSection) {
          return maybeEditSection;
        }
      }
      return null;
    }

    // Returns a jQuery object
    function findHeaderContainerForArchiveSpan(archiveSpan) {
      const jQueryArchiveSpan = $(archiveSpan);
      const maybeDivMwHeading = jQueryArchiveSpan.parents(".mw-heading");
      if (maybeDivMwHeading.length > 0) {
        return maybeDivMwHeading;
      }
      const maybeHeaderParent = jQueryArchiveSpan.parents(":header");
      if (maybeHeaderParent.length > 0) {
        return maybeHeaderParent;
      }
      notifyUser(
        "findHeaderContainerForArchiveSpan: Cannot parse section headings in this skin. Aborting.",
        true
      );
      error(
        "findHeaderContainerForArchiveSpan: Tags for bug report:",
        archiveSpan,
        archiveSpan.parentElement
      );
      return null;
    }

    /*
     * We need to get the top-level element of the whole header.
     * In some cases it's a <div class="mw-heading ...">
     * In other cases it's just a <h2>, <h3>, etc tag.
     *
     * Returns a plain HTML element.
     */
    function getHeaderContainer(header) {
      if (header.parentElement.classList.contains("mw-heading")) {
        return header.parentElement;
      }
      return header;
    }

    /*
     * Create the bracketed [archive] links next to the [edit] section links.
     * These [archive] links are used by a user to select sections for archival.
     */
    function addArchiveLinks() {
      // grab page sections and wikitext so we can add the "archive" links to appropriate sections
      new mw.Api()
        .get({ action: "parse", page: mw.config.get("wgPageName") })
        .done(function (parseApiResult) {
          new mw.Api()
            .get({
              action: "query",
              pageids: mw.config.get("wgArticleId"),
              prop: ["revisions"],
              rvprop: ["content", "timestamp"],
            })
            .done(function (revisionsApiResult) {
              var rv;
              rv =
                revisionsApiResult.query.pages[mw.config.get("wgArticleId")]
                  .revisions[0];
              wikiText = rv["*"];
              revStamp = rv["timestamp"];
            });

          const validSections = extractValidSections(
            parseApiResult.parse.sections
          );

          /*
           * The search for all section headings starts with
           * finding all <h*> tags, which aren't for the table of contents.
           * From the <h*> tags, we find the "[edit] section links" and
           * "header containers" (see big comment above).
           */
          const allHeaders = $("#mw-content-text .mw-parser-output")
            .find(":header")
            .filter(":not(#mw-toc-heading)");
          if (allHeaders.length == 0) {
            warn(
              "Nothing to archive here. The script should have aborted earlier. Aborting."
            );
            return;
          }
          allHeaders.each(function (i, header) {
            var sectionNumber = undefined;
            const headerLevel = header.tagName.slice(1) * 1; // wtf javascript
            const editSection = findEditSectionForHeader(header);
            if (!editSection) {
              // we're either in an archived page ([edit] links are hidden with magic word __NOEDITSECTION__)
              return;
            }
            {
              const editSectionLink = editSection.querySelector("a");
              if (editSectionLink) {
                // Note: href may not be set.
                const sectionNumberMatch =
                  editSectionLink.href &&
                  editSectionLink.href.match(/&section=(\d+)/);
                if (sectionNumberMatch) {
                  sectionNumber = sectionNumberMatch[1];
                }
              }
            }
            // if the if statement fails, it might be something like <h2>not a real section</h2>
            if (validSections.hasOwnProperty(sectionNumber)) {
              const archiveLink = $("<a>")
                .text("archive")
                .click(function () {
                  const correspondingHeaderContainer = $(
                    getHeaderContainer(header)
                  );
                  correspondingHeaderContainer.toggleClass(
                    "arky-selected-section"
                  );

                  // now, click all sub-sections of this section
                  // i.e. mark all needed header containers with our CSS class .arky-selected-section
                  const isThisSectionSelected =
                    correspondingHeaderContainer.hasClass(
                      "arky-selected-section"
                    );
                  const thisHeaderLevel = archiveLink
                    .parents(".arky-span")
                    .data("header-level");

                  // starting from the current section, loop through each section
                  const allArchiveSpans = $(".arky-span");
                  const currSectionIdx = allArchiveSpans.index(
                    archiveLink.parents(".arky-span")
                  );
                  for (
                    var i = currSectionIdx + 1;
                    i < allArchiveSpans.length;
                    i++
                  ) {
                    if (
                      $(allArchiveSpans[i]).data("header-level") <=
                      thisHeaderLevel
                    ) {
                      // if this isn't a subsection, quit
                      break;
                    }
                    const closestHeaderContainer =
                      findHeaderContainerForArchiveSpan(allArchiveSpans[i]);
                    if (
                      closestHeaderContainer.hasClass(
                        "arky-selected-section"
                      ) != isThisSectionSelected
                    ) {
                      // if this section needs toggling, toggle it
                      closestHeaderContainer.toggleClass(
                        "arky-selected-section"
                      );
                    }
                  }

                  // finally, update button
                  const selectedSectionCount = $(
                    ".arky-selected-section"
                  ).length;
                  archiveButton
                    .prop("disabled", selectedSectionCount === 0)
                    .text(
                      "Archive " +
                        selectedSectionCount +
                        " selected thread" +
                        (selectedSectionCount === 1 ? "" : "s")
                    );
                });

              const arkySpan = $("<span>", { class: "arky-span" })
                .css({ display: "none" })
                .data({ "header-level": headerLevel, section: sectionNumber })
                .append(
                  $("<span>", { class: "mw-editsection-bracket" }).text("["),
                  archiveLink,
                  $("<span>", { class: "mw-editsection-bracket" }).text("]")
                );

              $(editSection).append("&nbsp;", arkySpan);
            }
          });
        })
        .fail(() =>
          warn("addArchiveLinks: Cannot download current page. Aborting.")
        );
    }
  }
); // </nowiki>