User:Novem Linguae/Scripts/DraftCleaner.js
Appearance
Code that you insert on this page could contain malicious content capable of compromising your account. If you import a script from another page with "importScript", "mw.loader.load", "iusc", or "lusc", take note that this causes you to dynamically load a remote script, which could be changed by others. Editors are responsible for all edits and actions they perform, including by scripts. User scripts are not centrally supported and may malfunction or become inoperable due to software changes. A guide to help you find broken scripts is available. If you are unsure whether code you are adding to this page is safe, you can ask at the appropriate village pump. This code will be executed when previewing this page. |
![]() | Documentation for this user script can be added at User:Novem Linguae/Scripts/DraftCleaner. |
// <nowiki>
/* THIS SCRIPT IS STILL IN BETA AND IS BUGGY ABOUT 25% OF THE TIME. Be sure to check the diff that pops up before submitting.
- Adds "Run DraftCleaner" link to the left sidebar
- Top uses:
- remove extra line breaks
- in the first sentence, bold the title
- convert curly quotes to regular quotes
- put <ref>s after periods
- clean external links out of the main article area (turn them into references)
- More detailed list of uses:
- converts [inline external links] to <ref>s
- reduces more than 2 enters in a row, to 2 enters
- removes spaces in front of <ref>s
- get rid of any level 2 heading that contains the article's title
- bolds the first occurrence of the article title
- removes bold from headings
- converts =TitleHeading= to ==H2Heading==
- replaces Covid-19 with COVID-19
- removes enter characters between <ref>s
- trims whitespace at beginning and end
- remove self wikilinks to the article title
- convert ==Reference== to ==References==
- swap ref period with period ref
- turn bare URLs into references
- fix errant spaces at beginning of lines, which makes a blockquote looking thing
- add references section if missing
- delete whitespace at the end of lines
- convert smart quotes to regular quotes
- convert double spaces to single spaces
- remove blank heading
- in refs, turn short links into long links, so you can see the domain
- change year range dash to ndash
- if in draftspace, and draft in categories, disable the categories
- delete <br>. in drafts, these are usually poorly placed
- fix empty references section
- right align images
- remove whitespace if that is the only character on a line
- correct capitalization of see also, references, further reading, external links
- if article has headings but no lead, remove first heading
- replace unicode bullets with asterisks
This page was assembled from 3 files using my publish.php script. I have an offline test suite with around 100 unit tests for the DraftCleaner and StringFilter classes.
*/
$(function() {
async function getWikicode(title) {
if ( ! mw.config.get('wgCurRevisionId') ) return ''; // if page is deleted, return blank
var wikicode = '';
title = encodeURIComponent(title);
await $.ajax({
url: 'https://en.wikipedia.org/w/api.php?action=parse&page='+title+'&prop=wikitext&formatversion=2&format=json',
success: function (result) {
wikicode = result['parse']['wikitext'];
},
dataType: "json",
});
return wikicode;
}
// borrowed from [[Wikipedia:User scripts/Guide#Edit a page and other common actions]]
function editPage(articleName, wikicode, summary) {
let debugInfo = $.ajax({
url: mw.util.wikiScript('api'),
type: 'POST',
dataType: 'json',
data: {
format: 'json',
action: 'edit',
title: articleName,
text: wikicode, // will replace entire page content
summary: summary,
token: mw.user.tokens.get('csrfToken')
},
async: false
});
}
function goToShowChangesScreen(titleWithNamespaceAndUnderscores, wikicode, editSummary) {
let titleEncoded = encodeURIComponent(titleWithNamespaceAndUnderscores);
let wgServer = mw.config.get('wgServer');
let wgScriptPath = mw.config.get('wgScriptPath');
let baseURL = wgServer + wgScriptPath + '/';
let editToken = mw.user.tokens.get( 'csrfToken' );
// https://stackoverflow.com/a/12464290/3480193
$(`<form action="${baseURL}index.php?title=${titleEncoded}&action=submit" method="POST"/>`)
.append($('<input type="hidden" name="wpTextbox1">').val(wikicode))
.append($('<input type="hidden" name="wpSummary">').val(editSummary))
.append($('<input type="hidden" name="mode">').val('preview'))
.append($('<input type="hidden" name="wpDiff">').val('Show changes'))
.append($('<input type="hidden" name="wpUltimateParam">').val('1'))
.appendTo($(document.body)) //it has to be added somewhere into the <body>
.submit();
}
/** returns the pagename, including the namespace name, but with spaces replaced by underscores */
function getArticleName() {
return mw.config.get('wgPageName');
}
function showMessage(messageText) {
$('#DraftCleaner').hide();
$('#DraftCleanerNoClick').empty();
$('#DraftCleanerNoClick').prepend(messageText);
$('#DraftCleanerNoClick').show();
}
function showClickableButton() {
$('#DraftCleanerNoClick').hide();
$('#DraftCleaner').show();
}
/** refresh AND clear cache */
function hardRefresh() {
// window.location.reload(true) is deprecated. use this instead
window.location.href = window.location.href;
}
// don't run when not viewing articles
let action = mw.config.get('wgAction');
if ( action != 'view' ) return;
// don't run when viewing diffs
let isDiff = mw.config.get('wgDiffNewId');
if ( isDiff ) return;
// Only run in mainspace, draftspace, and sandboxes
let titleWithNamespaceAndUnderscores = getArticleName();
let namespaceNumber = mw.config.get('wgNamespaceNumber');
let sandbox = titleWithNamespaceAndUnderscores.match(/sandbox/i);
if ( ! [0, 118].includes(namespaceNumber) && ! sandbox ) return;
// Add DraftCleaner to left sidebar
// Using two <li>s. One of the two is kept hidden at all times. This avoids having to delete #DraftCleanerLink, which would also delete the event listener.
$('#p-navigation .vector-menu-content-list').append(`
<li id="DraftCleaner">
<a id="DraftCleanerLink">Run DraftCleaner</a>
</li>
<li id="DraftCleanerNoClick" style="display:none">
</li>
`);
$('#DraftCleanerLink').on('click', async function() {
// prevent running the script while script is already in progress
showMessage('Editing. Please wait.');
// get page wikicode
let titleWithNamespaceAndSpaces = titleWithNamespaceAndUnderscores.replace(/_/g, ' ');
let originalWikicode = await getWikicode(titleWithNamespaceAndUnderscores);
let wikicode = originalWikicode;
let dc = new DraftCleaner();
wikicode = dc.cleanDraft(wikicode, namespaceNumber, titleWithNamespaceAndSpaces);
// if changes to be made
if ( wikicode != originalWikicode ) {
let summary = 'clean up ([[User:Novem Linguae/Scripts/DraftCleaner.js|DraftCleaner]])';
// editPage(titleWithNamespaceAndUnderscores, wikicode);
// hardRefresh();
await goToShowChangesScreen(titleWithNamespaceAndUnderscores, wikicode, summary);
// else display "no changes needed", then reset
} else {
showMessage('No changes needed.');
setTimeout(function (){
showClickableButton();
}, 2000);
}
});
});
// </nowiki>
// <nowiki>
class DraftCleaner {
cleanDraft(wikicode, namespaceNumber, titleWithNamespaceAndSpaces) {
wikicode = this.fixWikilinksContainingURL(wikicode);
wikicode = this.fixExternalLinksToWikipediaArticles(wikicode);
wikicode = this.deleteWeirdUnicodeCharacters(wikicode);
wikicode = this.trimEveryLine(wikicode);
wikicode = this.convertH1ToH2(wikicode);
wikicode = this.convertVeryLongHeadingToParagraph(wikicode);
wikicode = this.bareURLToRef(wikicode);
wikicode = this.refShortLinkToLongLink(wikicode);
wikicode = this.inlineExternalLinksToRefs(wikicode); // buggy
wikicode = this.deleteSpacesInFrontOfRefs(wikicode);
wikicode = this.deleteHeadingsWithTitle(wikicode, titleWithNamespaceAndSpaces);
wikicode = this.unlinkWikilinksToThisTitle(wikicode, titleWithNamespaceAndSpaces);
wikicode = this.capitalizeCOVID19(wikicode);
wikicode = this.removeBoldFromHeadings(wikicode);
wikicode = this.deleteNewLinesBetweenRefs(wikicode);
wikicode = this.convertReferenceToReferences(wikicode);
wikicode = this.swapRefPeriodWithPeriodRef(wikicode); // buggy
wikicode = this.addReferencesSectionIfMissing(wikicode); // buggy
wikicode = this.fixEmptyReferencesSection(wikicode);
wikicode = this.deleteWhitespaceAtEndOfLines(wikicode);
wikicode = this.convertSmartQuotesToRegularQuotes(wikicode);
wikicode = this.fixWordEmphasizedWithSingleQuotes(wikicode);
wikicode = this.convertDoubleSpacesToSingleSpaces(wikicode);
wikicode = this.deleteBlankHeadings(wikicode);
wikicode = this.changeYearRangeDashToNDash(wikicode);
wikicode = this.disableCategoriesInDraftspace(wikicode, namespaceNumber);
wikicode = this.deleteBRTagsOutsideInfoboxes(wikicode, namespaceNumber); // buggy
// wikicode = this.rightAlignImages(wikicode); // commenting out, too many false positives in featured articles
wikicode = this.correctCapitalizationOfEndMatterHeaders(wikicode);
wikicode = this.ifNoLeadSectionDeleteFirstHeading(wikicode);
wikicode = this.deleteCopyPastedEditAndEditSource(wikicode);
wikicode = this.replaceUnicodeBulletsWithAsterisks(wikicode);
wikicode = this.fixDoublePeriod(wikicode);
wikicode = this.deleteEmptySections(wikicode);
wikicode = this.fixHeadingsInAllCaps(wikicode);
wikicode = this.deleteDuplicateReferencesSection(wikicode);
wikicode = this.deleteSomeHTMLTags(wikicode);
wikicode = this.deleteBlankLinesBetweenBullets(wikicode);
wikicode = this.deleteBigTag(wikicode);
wikicode = this.removeUnderscoresFromWikilinks(wikicode);
// delete empty sections. for example, empty ==See Also== section
// all ==sections== should start with a capital letter
// after swap, if citation has no spaces on either side, and is not touching two other citations, add a space on the right
// strip [[File: from infobox's image field
// example 1: | image = [[File:SAMIR 1626.png|thumb|Samir Mohanty]]
// example 2: | image = [[File:SAMIR 1626.pngy]]
// trim whitespace inside refs, e.g. <ref> abc </ref>
// single quotes to double quotes, i.e. This 'thing' to This "thing"
// replace unreliable sources with {{cn}}.
// if adjacent to other sources, just delete
// if ref is used multiple times, account for that
// duplicate citation fixer
// move refs that are below {{Reflist}}, to above {{Reflist}}
// move refs out of headings
// delete AFC submission templates located mid-article, they end up self-hiding then appear as inexplicable whitespace. example: {{AfC submission|t||ts=20211212134609|u=Doezdemir|ns=118|demo=}}<!-- Important, do not remove this line before article has been created. -->
// delete <!-- Inline citations added to your article will automatically display here. See en.wikipedia.org/wiki/WP:REFB for instructions on how to add citations. -->
// remove <big></big> tags. test: == '''''<big>Saroj Kumar Basu</big>''''' ==, '''''<big>Saroj Kumar Basu</big>'''''
// in wikilinks, underscores to spaces
// fix redundant wikilinks, e.g. [[Spotify|Spotify]]
wikicode = this.boldArticleTitle(wikicode, titleWithNamespaceAndSpaces); // buggy
wikicode = this.trimEmptyLines(wikicode);
wikicode = this.deleteMoreThanTwoEntersInARow(wikicode);
return wikicode;
}
_escapeRegEx(string) {
return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); // $& means the whole matched string
}
// surround bare URL's with <ref></ref>
// Useful for seeing all URL's in the reflist section, and for CiteHighlighter ref quality highlighting
bareURLToRef(wikicode) {
return wikicode.replace(/^(http[^\n ]*) {0,}$/gm, "<ref>$1</ref>");
}
// in refs, turn [short links] into long links, so you can see the domain
// also fixes <ref> link </ref> issues with extra spaces in the ref
refShortLinkToLongLink(wikicode) {
return wikicode.replace(/(<ref[^>]*>) {0,}\[ {0,}([^\]]*) {0,}\] {0,}(<\/ref>)/gm, '$1$2$3');
}
// convert inline external links to references
inlineExternalLinksToRefs(wikicode) {
let externalLinksSectionInArticle = wikicode.match(/== ?External link/i);
let furtherReadingSectionInArticle = wikicode.match(/== ?Further reading/i);
// let infoboxInArticle = wikicode.match(/{{Infobox/i);
let sf = new StringFilter();
// skip external links and further reading sections
if ( externalLinksSectionInArticle || furtherReadingSectionInArticle ) {
let topHalf = wikicode.replace(/((== ?Further reading|== ?External link).*$)/is, '');
let bottomHalf = wikicode.match(/((== ?Further reading|== ?External link).*$)/is)[1];
let buffer = sf.surgicalReplaceOutsideTags(/(?<!>|> )\[(http[^ \]]+) ?(.*?)\](?!<\/ref>| <\/ref>)/gm, '$2<ref>$1</ref>', topHalf, ['<ref'], ['</ref>', '/>']);
wikicode = buffer + bottomHalf;
} else {
wikicode = sf.surgicalReplaceOutsideTags(/(?<!>|> )\[(http[^ \]]+) ?(.*?)\](?!<\/ref>| <\/ref>)/gm, '$2<ref>$1</ref>', wikicode, ['<ref'], ['</ref>', '/>']);
}
return wikicode;
}
// get rid of spaces in front of <refs>
deleteSpacesInFrontOfRefs(wikicode) {
return wikicode.replace(/(?<!\|) {1,}<ref/gm, "<ref");
}
// get rid of any level 2 heading that contains the article's title
// this takes care of 2 common cases: heading at the bottom next to the {{AFC Submission}} template, and heading at the top above the lead
deleteHeadingsWithTitle(wikicode, titleWithNamespaceAndSpaces) {
let headingNameToLookFor = titleWithNamespaceAndSpaces;
headingNameToLookFor = headingNameToLookFor.replace(/^Draft:/, '');
headingNameToLookFor = this._escapeRegEx(headingNameToLookFor);
let regEx = new RegExp("^== ?"+headingNameToLookFor+" ?==\n", "gmi");
wikicode = wikicode.replace(regEx, "");
// now look for titles that contain Draft: at the beginning, too
headingNameToLookFor = titleWithNamespaceAndSpaces;
headingNameToLookFor = this._escapeRegEx(headingNameToLookFor);
regEx = new RegExp("^== ?"+headingNameToLookFor+" ?==\n", "gmi");
wikicode = wikicode.replace(regEx, "");
return wikicode;
}
// remove wikilinks to article name
// Example: if title is Draft:Menna Shahin, change [[Menna Shahin]] to Menna Shahin
unlinkWikilinksToThisTitle(wikicode, titleWithNamespaceAndSpaces) {
let wikilinkToLookFor = titleWithNamespaceAndSpaces;
wikilinkToLookFor = wikilinkToLookFor.replace(/^Draft:/, '');
wikilinkToLookFor = this._escapeRegEx(wikilinkToLookFor);
let regEx = new RegExp("\\[\\[("+wikilinkToLookFor+")\\]\\]", "gm");
wikicode = wikicode.replace(regEx, "$1");
return wikicode;
}
// bold the first occurrence of the article title
// WARNING: this is buggy sometimes
boldArticleTitle(wikicode, titleWithNamespaceAndSpaces) {
let boldInArticle = wikicode.includes("'''");
if ( ! boldInArticle ) {
let titleToLookFor = titleWithNamespaceAndSpaces;
titleToLookFor = titleToLookFor.replace(/^Draft:/, '');
titleToLookFor = titleToLookFor.replace(/ \(.*?\)$/, '');
titleToLookFor = this._escapeRegEx(titleToLookFor);
// Be pretty strict, to avoid adding ''' to image paths and infoboxes, which messes up the image.
let regEx = new RegExp("^(The )?("+titleToLookFor+")([ <,])", "mi");
wikicode = wikicode.replace(regEx, "$1'''$2'''$3");
}
return wikicode;
}
// /covid-19/i -> COVID-19
// Careful of this string in URLs.
capitalizeCOVID19(wikicode) {
wikicode = wikicode.replace(/ covid-19/gmi, ' COVID-19');
wikicode = wikicode.replace(/\ncovid-19/gmi, "\nCOVID-19");
return wikicode;
}
// remove bold from headings
removeBoldFromHeadings(wikicode) {
return wikicode.replace(/^(=.*)'''(.*)'''(.*=)$/gm, '$1$2$3');
}
// remove enter characters between <ref>s
deleteNewLinesBetweenRefs(wikicode) {
return wikicode.replace(/<\/ref>\n{1,}<ref>/gm, '<\/ref><ref>');
}
// convert ==Reference== to ==References==
convertReferenceToReferences(wikicode) {
return wikicode.replace(/^== ?Reference ?==$/gmi, '== References ==');
}
// TOOL - swap ref period with period ref
swapRefPeriodWithPeriodRef(wikicode) {
wikicode = wikicode.replace(/((?:<ref[^>]*?>[^>]*?<\/ref>){1,})\. /gm, '.$1 ');
wikicode = wikicode.replace(/((?:<ref[^>]*?>[^>]*?<\/ref>){1,})\.\n/gm, ".$1\n");
return wikicode;
}
// fix errant spaces at beginning of lines, which makes a blockquote looking thing (AFCH does it)
trimEveryLine(wikicode) {
let output = '';
let lines = wikicode.split("\n");
let lineCount = lines.length;
let i = 0;
for ( let line of lines ) {
i++;
let trimmed = line.trim();
if ( trimmed.startsWith('|') || trimmed.startsWith('}') ) { // don't trim lines that start with | or }. It is common in FAs to indent these a bit.
output += line;
} else {
output += line.trim();
}
if ( i !== lineCount ) {
output += "\n";
}
}
return output;
}
// add references section if missing
addReferencesSectionIfMissing(wikicode) {
let hasRefSection = wikicode.match(/^== ?References ?==$/mi);
let hasReflist = wikicode.match(/(?:{{Reflist|<references)/mi);
if ( ! hasRefSection && ! hasReflist ) {
let hasBottomAFCTemplate = wikicode.match(/(\n{{AfC submission[^}]*}}\s*)$/);
if ( hasBottomAFCTemplate ) {
wikicode = wikicode.replace(/(\n{{AfC submission[^}]*}}\s*)$/, "\n\n== References ==\n{{Reflist}}$1");
} else {
wikicode = wikicode.replace(/$/, "\n\n== References ==\n{{Reflist}}");
}
}
return wikicode;
}
// fix empty references section
fixEmptyReferencesSection(wikicode) {
let hasRefSection = wikicode.match(/^== ?References ?==$/mi);
let hasReflist = wikicode.match(/(?:{{Reflist|<references)/mi);
if ( ! hasReflist && hasRefSection ) {
wikicode = wikicode.replace(/(?<=== ?References ?==)/gmi, "\n{{Reflist}}");
}
return wikicode;
}
// delete whitespace at the end of lines
// (?!\|)(?!\}\}) is to stop this from deleting spaces after = in infoboxes
deleteWhitespaceAtEndOfLines(wikicode) {
return wikicode.replace(/[ \t]+\n(?!\|)(?!\}\})/g, "\n");
}
// convert smart quotes to regular quotes
convertSmartQuotesToRegularQuotes(wikicode) {
wikicode = wikicode.replace(/”/g, '"');
wikicode = wikicode.replace(/“/g, '"');
wikicode = wikicode.replace(/‘/g, "'");
wikicode = wikicode.replace(/’/g, "'");
wikicode = wikicode.replace(/…/g, "...");
return wikicode;
}
// convert double spaces to single spaces
convertDoubleSpacesToSingleSpaces(wikicode) {
return wikicode.replace(/\. {2,}/g, '. ');
}
// remove blank heading
deleteBlankHeadings(wikicode) {
return wikicode.replace(/\n={2,} {0,}={2,}\n/g, "\n");
}
// Change year range dash to ndash. Skip text inside of [[File:
changeYearRangeDashToNDash(wikicode) {
let sf = new StringFilter();
// (1111-1111)
wikicode = sf.surgicalReplaceOutsideTags(/(\(\d{4}) ?- ?(\d{4}\))/gm, '$1–$2', wikicode, ['[[File:'], [']]']);
// 1839 - 1926)
wikicode = sf.surgicalReplaceOutsideTags(/( \d{4}) ?- ?(\d{4}\))/gm, '$1–$2', wikicode, ['[[File:'], [']]']);
return wikicode;
}
// if in draftspace, and draft has categories, disable the categories
disableCategoriesInDraftspace(wikicode, namespace) {
let draft = (namespace == 118);
if ( draft ) {
wikicode = wikicode.replace(/:?(\[\[)(Category:[^\]]*\]\])/gm, '$1:$2');
}
return wikicode;
}
// delete <br> in drafts, these are usually poorly placed
deleteBRTagsOutsideInfoboxes(wikicode) {
let output = '';
let lines = wikicode.split("\n");
let lineCount = lines.length;
let i = 0;
for ( let line of lines ) {
i++;
// Skip lines that start with { or |. This is the easiest way to detect infoboxes
if ( line.startsWith('{') || line.startsWith('|') ) {
output += line;
} else {
output += line.replace(/\<br ?\/?\>/gm, '');
}
if ( i !== lineCount ) {
output += "\n";
}
}
return output;
}
// right align images
rightAlignImages(wikicode) {
return wikicode.replace(/(\[\[File:[^\]]*\|)left(\|[^\]]*\]\])/gm, '$1right$2');
}
// correct capitalization of see also, references, further reading, external links
correctCapitalizationOfEndMatterHeaders(wikicode) {
wikicode = wikicode.replace(/^(== ?)References( ?==)$/gmi, "$1References$2");
wikicode = wikicode.replace(/^(== ?)External links( ?==)$/gmi, "$1External links$2");
wikicode = wikicode.replace(/^(== ?)Further reading( ?==)$/gmi, "$1Further reading$2");
wikicode = wikicode.replace(/^(== ?)See also( ?==)$/gmi, "$1See also$2");
return wikicode;
}
// if article has headings but no lead, remove first heading
ifNoLeadSectionDeleteFirstHeading(wikicode) {
let output = '';
let lines = wikicode.split("\n");
let lineCount = lines.length;
let i = 0;
let textCount = 0;
for ( let line of lines ) {
i++;
// scan for first heading. empty lines, lines with templates, or lines with images do not count.
if ( line.startsWith('{') || line.length === 0 || line.startsWith('[[File:') ) {
output += line;
} else if ( line.startsWith('==') && ! textCount ) {
continue; // delete this line by not putting it in the output string
} else {
textCount++;
output += line;
}
if ( i !== lineCount ) {
output += "\n";
}
}
return output;
}
// delete [edit], [edit source], and [editar] from headings
deleteCopyPastedEditAndEditSource(wikicode) {
wikicode = wikicode.replace(/\[edit\]( ?={2,})$/gm, '$1');
wikicode = wikicode.replace(/\[edit source\]( ?={2,})$/gm, '$1');
wikicode = wikicode.replace(/\[editar\]( ?={2,})$/gm, '$1');
return wikicode;
}
// at beginning of lines, replace unicode bullets with asterisks
replaceUnicodeBulletsWithAsterisks(wikicode) {
return wikicode.replace(/^\s{0,}[·•●]\s{0,}/gm, '* ');
}
// remove whitespace if that is the only character on a line
trimEmptyLines(wikicode) {
return wikicode.replace(/^\s*$/gm, '');
}
// no more than 2 enters in a row
// Note: AFCH does this too
deleteMoreThanTwoEntersInARow(wikicode) {
return wikicode.replace(/\n{3,}/gm, "\n\n");
}
// convert =TitleHeading= to ==H2Heading==
convertH1ToH2(wikicode) {
return wikicode.replace(/^= ?([^=]*) ?=$/gm, '== $1 ==');
}
convertVeryLongHeadingToParagraph(wikicode) {
let output = '';
let lines = wikicode.split("\n");
let lineCount = lines.length;
let i = 0;
for ( let line of lines ) {
i++;
if ( line.length > 150 && line.match(/^==.*==$/gm) && ! line.match(/<ref/) ) {
output += line.replace(/^={1,}\s*(.*?)\s*={1,}$/m, '$1');
} else {
output += line;
}
if ( i !== lineCount ) {
output += "\n";
}
}
return output;
}
fixWordEmphasizedWithSingleQuotes(wikicode) {
return wikicode.replace(/ '(\w+)' /g, ' "$1" ');
}
fixDoublePeriod(wikicode) {
return wikicode.replace(/(?<=[A-Za-z\]])\.\.(?=<ref| |\n)/g, '.');
}
fixWikilinksContainingURL(wikicode) {
// non-piped wikilink
wikicode = wikicode.replace(/\[\[https?:\/\/en\.wikipedia\.org\/wiki\/([^|]*)\]\]/g, '[[$1]]');
// piped wikilink
wikicode = wikicode.replace(/\[\[https?:\/\/en\.wikipedia\.org\/wiki\/([^|]*)\|([^\]]*)\]\]/g, '[[$1|$2]]');
// non-piped external link
wikicode = wikicode.replace(/\[\[(http[^|]*)\]\]/g, '[$1]');
// piped external link
wikicode = wikicode.replace(/\[\[(http[^|]*)\|([^\]]*)\]\]/g, '[$1 $2]');
return wikicode;
}
fixExternalLinksToWikipediaArticles(wikicode) {
// [https://en.wikipedia.org/wiki/Article] and [https://en.wikipedia.org/wiki/Article Article name]
return wikicode.replace(/(?<!\[)\[https?:\/\/en\.wikipedia\.org\/wiki\/([^ \]]*)( [^\]]*)?\]/gs, '[[$1]]');
}
deleteBlankLinesBetweenBullets(wikicode) {
let lines = wikicode.split('\n');
let buffer = [];
let length = lines.length;
for ( let i = 0; i < length; i++ ) {
let previous = lines[i-1];
let current = lines[i];
let next = lines[i+1];
if (
typeof previous !== 'undefined' &&
typeof next !== 'undefined' &&
previous.startsWith('*') &&
current === '' &&
next.startsWith('*')
) {
continue;
}
buffer.push(current);
}
return buffer.join('\n');
}
deleteWeirdUnicodeCharacters(wikicode) {
return wikicode.replace(/[]/g, '');
}
deleteSomeHTMLTags(wikicode) {
wikicode = wikicode.replace(/<\/?p>/g, '');
wikicode = wikicode.replace(/<\/?strong>/g, '');
wikicode = wikicode.replace(/<\/?em>/g, '');
wikicode = wikicode.replace(/<\/?nowiki>/g, '');
return wikicode;
}
fixHeadingsInAllCaps(wikicode) {
// create a concatenated string with the text from every heading
let matches = wikicode.matchAll(/== {0,}(.+) {0,}==/g);
let headingString = '';
for ( let match of matches ) {
headingString += match[1];
}
// if string only contains caps
if ( this._isUpperCase(headingString) ) {
// convert all headings to sentence case
let matches = wikicode.matchAll(/== {0,}(.+) {0,}==/g);
for ( let match of matches ) {
let matchRegex = this._escapeRegEx(match[1].trim());
matchRegex = new RegExp('== {0,}' + matchRegex + ' {0,}==', 'g');
let sentenceCase = this._toSentenceCase(match[1].trim());
wikicode = wikicode.replace(matchRegex, '== ' + sentenceCase + ' ==');
}
}
return wikicode;
}
deleteBigTag(wikicode) {
return wikicode.replace(/(?:<big>|<\/big>)/g, '');
}
deleteEmptySections(wikicode) {
// TODO:
return wikicode;
}
deleteDuplicateReferencesSection(wikicode) {
let matches = wikicode.match(/== ?References ?==/gi);
if ( matches.length > 1 ) {
// run regexes that are likely to delete the extra section
let attempt = wikicode.replace(
`== References ==
<!-- Inline citations added to your article will automatically display here. See en.wikipedia.org/wiki/WP:REFB for instructions on how to add citations. -->
{{reflist}}`
, '');
let matches2 = attempt.match(/== ?References ?==/gi);
if ( matches2.length === 1 ) {
wikicode = attempt.trim();
}
}
return wikicode;
}
removeUnderscoresFromWikilinks(wikicode) {
let sf = new StringFilter();
wikicode = sf.surgicalReplaceInsideTags(/_/g, ' ', wikicode, ['[['], [']]']);
return wikicode;
}
_isUpperCase(str) {
return str === str.toUpperCase();
}
_toSentenceCase(string) {
return string.charAt(0).toUpperCase() + string.slice(1).toLowerCase();
}
}
/** Lets you use regex to specify what parts of a very long string you want to specify as "off limits", then you can do additional regex's and search/replace to the remaining parts of the string. */
class StringFilter {
/** Does a replace, but specifies areas of the file that should NOT be replaced. Those areas are specified by providing an openingTag and a closingTag, and those areas are marked as off limits. */
surgicalReplaceOutsideTags(regex, replacement, haystack, openingTags, closingTags) {
let allTags = [...openingTags, ...closingTags];
let parts = this._splitStringUsingMultiplePatterns(haystack, allTags);
let resultArray = [];
for ( let part of parts ) {
let openingTagMatch = false;
for ( let tag of openingTags ) {
if ( part.startsWith(tag) ) {
openingTagMatch = true;
break;
}
}
if ( ! openingTagMatch ) {
part = part.replace(regex, replacement);
}
resultArray.push(part);
}
return resultArray.join('');
}
/** Does a replace, but specifies areas of the file that SHOULD be replaced, then skips the rest of the file. The area that should be replaced is specified by providing an openingTag and a closingTag. */
surgicalReplaceInsideTags(regex, replacement, haystack, openingTags, closingTags) {
let allTags = [...openingTags, ...closingTags];
let parts = this._splitStringUsingMultiplePatterns(haystack, allTags);
let resultArray = [];
for ( let part of parts ) {
for ( let tag of openingTags ) {
if ( part.startsWith(tag) ) {
part = part.replace(regex, replacement);
}
}
resultArray.push(part);
}
return resultArray.join('');
}
/**
Also keeps the pattern in the result, unlike string.prototype.split. Algorithm isn't perfect, will fail with this pattern: <ref>Test/>Test</ref>. But should be good enough for DraftCleaner stuff.
@param {Array} patterns
*/
_splitStringUsingMultiplePatterns(string, patterns) {
let length = string.length;
let result = [];
let positionOfLastMatch = 0;
for ( let i = 0; i < length; i++ ) {
let lookAhead = string.substring(i); // the rest of the string after current position
let patternMatch = false;
for ( let pattern of patterns ) {
if ( lookAhead.startsWith(pattern) ) {
patternMatch = true;
break;
}
}
if ( patternMatch ) {
let chunk = string.slice(positionOfLastMatch, i);
if ( ! chunk ) continue; // if blank (happens if i=0 matches), continue instead of putting an empty "" into the array
result.push(chunk);
positionOfLastMatch = i;
}
}
// Don't forget the last chunk.
result.push(string.substring(positionOfLastMatch));
return result;
}
}
// </nowiki>