Jump to content

User:Novem Linguae/Scripts/DraftCleaner.js

From Wikipedia, the free encyclopedia
This is an old revision of this page, as edited by Novem Linguae (talk | contribs) at 14:00, 15 December 2021 (fix edge cases in deleteDuplicateReferencesSection() (publish.php)). The present address (URL) is a permanent link to this revision, which may differ significantly from the current revision.
Note: After saving, you have to bypass your browser's cache to see the changes. Google Chrome, Firefox, Microsoft Edge and Safari: Hold down the ⇧ Shift key and click the Reload toolbar button. For details and instructions about other browsers, see Wikipedia:Bypass your cache.
// <nowiki>

/* THIS SCRIPT IS STILL IN BETA AND IS BUGGY ABOUT 25% OF THE TIME. Be sure to check the diff that pops up before submitting.

- Adds "Run DraftCleaner" link to the left sidebar

- Top uses:
	- remove extra line breaks
	- in the first sentence, bold the title
	- convert curly quotes to regular quotes
	- put <ref>s after periods
	- clean external links out of the main article area (turn them into references)

- More detailed list of uses:
	- converts [inline external links] to <ref>s
	- reduces more than 2 enters in a row, to 2 enters
	- removes spaces in front of <ref>s
	- get rid of any level 2 heading that contains the article's title
	- bolds the first occurrence of the article title
	- removes bold from headings
	- converts =TitleHeading= to ==H2Heading==
	- replaces Covid-19 with COVID-19
	- removes enter characters between <ref>s
	- trims whitespace at beginning and end
	- remove self wikilinks to the article title
	- convert ==Reference== to ==References==
	- swap ref period with period ref
	- turn bare URLs into references
	- fix errant spaces at beginning of lines, which makes a blockquote looking thing
	- add references section if missing
	- delete whitespace at the end of lines
	- convert smart quotes to regular quotes
	- convert double spaces to single spaces
	- remove blank heading
	- in refs, turn short links into long links, so you can see the domain
	- change year range dash to ndash
	- if in draftspace, and draft in categories, disable the categories
	- delete <br>. in drafts, these are usually poorly placed
	- fix empty references section
	- right align images
	- remove whitespace if that is the only character on a line
	- correct capitalization of see also, references, further reading, external links
	- if article has headings but no lead, remove first heading
	- replace unicode bullets with asterisks

This page was assembled from 3 files using my publish.php script. I have an offline test suite with around 100 unit tests for the DraftCleaner and StringFilter classes.
*/

$(function() {
	async function getWikicode(title) {
		if ( ! mw.config.get('wgCurRevisionId') ) return ''; // if page is deleted, return blank
		var wikicode = '';
		title = encodeURIComponent(title);
		await $.ajax({
			url: 'https://en.wikipedia.org/w/api.php?action=parse&page='+title+'&prop=wikitext&formatversion=2&format=json',
			success: function (result) {
				wikicode = result['parse']['wikitext'];
			},
			dataType: "json",
		});
		return wikicode;
	}
	
	// borrowed from [[Wikipedia:User scripts/Guide#Edit a page and other common actions]]
	function editPage(articleName, wikicode, summary) {
		let debugInfo = $.ajax({
			url: mw.util.wikiScript('api'),
			type: 'POST',
			dataType: 'json',
			data: {
				format: 'json',
				action: 'edit',
				title: articleName,
				text: wikicode, // will replace entire page content
				summary: summary,
				token: mw.user.tokens.get('csrfToken')
			},
			async: false
		});
	}

	function goToShowChangesScreen(titleWithNamespaceAndUnderscores, wikicode, editSummary) {
		let titleEncoded = encodeURIComponent(titleWithNamespaceAndUnderscores);
		let wgServer = mw.config.get('wgServer');
		let wgScriptPath = mw.config.get('wgScriptPath');
		let baseURL = wgServer + wgScriptPath + '/';
		let editToken = mw.user.tokens.get( 'csrfToken' );
		// https://stackoverflow.com/a/12464290/3480193
		$(`<form action="${baseURL}index.php?title=${titleEncoded}&action=submit" method="POST"/>`)
			.append($('<input type="hidden" name="wpTextbox1">').val(wikicode))
			.append($('<input type="hidden" name="wpSummary">').val(editSummary))
			.append($('<input type="hidden" name="mode">').val('preview'))
			.append($('<input type="hidden" name="wpDiff">').val('Show changes'))
			.append($('<input type="hidden" name="wpUltimateParam">').val('1'))
			.appendTo($(document.body)) //it has to be added somewhere into the <body>
			.submit();
	}

	/** returns the pagename, including the namespace name, but with spaces replaced by underscores */
	function getArticleName() {
		return mw.config.get('wgPageName');
	}
	
	function showMessage(messageText) {
		$('#DraftCleaner').hide();
		$('#DraftCleanerNoClick').empty();
		$('#DraftCleanerNoClick').prepend(messageText);
		$('#DraftCleanerNoClick').show();
	}
	
	function showClickableButton() {
		$('#DraftCleanerNoClick').hide();
		$('#DraftCleaner').show();
	}
	
	/** refresh AND clear cache */
	function hardRefresh() {
		// window.location.reload(true) is deprecated. use this instead
		window.location.href = window.location.href;
	}
	
	// don't run when not viewing articles
	let action = mw.config.get('wgAction');
	if ( action != 'view' ) return;
	
	// don't run when viewing diffs
	let isDiff = mw.config.get('wgDiffNewId');
	if ( isDiff ) return;
	
	// Only run in mainspace, draftspace, and sandboxes
	let titleWithNamespaceAndUnderscores = getArticleName();
	let namespaceNumber = mw.config.get('wgNamespaceNumber');
	let sandbox = titleWithNamespaceAndUnderscores.match(/sandbox/i);
	if ( ! [0, 118].includes(namespaceNumber) && ! sandbox ) return;
	
	// Add DraftCleaner to left sidebar
	// Using two <li>s. One of the two is kept hidden at all times. This avoids having to delete #DraftCleanerLink, which would also delete the event listener.
	$('#p-navigation .vector-menu-content-list').append(`
		<li id="DraftCleaner">
			<a id="DraftCleanerLink">Run DraftCleaner</a>
		</li>
		
		<li id="DraftCleanerNoClick" style="display:none">
			
		</li>
	`);
	
	$('#DraftCleanerLink').on('click', async function() {
		// prevent running the script while script is already in progress
		showMessage('Editing. Please wait.');
		
		// get page wikicode
		let titleWithNamespaceAndSpaces = titleWithNamespaceAndUnderscores.replace(/_/g, ' ');
		let originalWikicode = await getWikicode(titleWithNamespaceAndUnderscores);
		let wikicode = originalWikicode;

		let dc = new DraftCleaner();
		wikicode = dc.cleanDraft(wikicode, namespaceNumber, titleWithNamespaceAndSpaces);

		// if changes to be made
		if ( wikicode != originalWikicode ) {
			let summary = 'clean up ([[User:Novem Linguae/Scripts/DraftCleaner.js|DraftCleaner]])';
			// editPage(titleWithNamespaceAndUnderscores, wikicode);
			// hardRefresh();
			await goToShowChangesScreen(titleWithNamespaceAndUnderscores, wikicode, summary);
		// else display "no changes needed", then reset
		} else {
			showMessage('No changes needed.');
			
			setTimeout(function (){
				showClickableButton();
			}, 2000);
		}
	});
});

// </nowiki>


// <nowiki>


class DraftCleaner {
	cleanDraft(wikicode, namespaceNumber, titleWithNamespaceAndSpaces) {
		wikicode = this.fixWikilinksContainingURL(wikicode);
		wikicode = this.fixExternalLinksToWikipediaArticles(wikicode);
		wikicode = this.deleteWeirdUnicodeCharacters(wikicode);
		wikicode = this.trimEveryLine(wikicode);
		wikicode = this.convertH1ToH2(wikicode);
		wikicode = this.convertVeryLongHeadingToParagraph(wikicode);
		wikicode = this.bareURLToRef(wikicode);
		wikicode = this.refShortLinkToLongLink(wikicode);
		wikicode = this.inlineExternalLinksToRefs(wikicode); // buggy
		wikicode = this.deleteSpacesInFrontOfRefs(wikicode);
		wikicode = this.deleteHeadingsWithTitle(wikicode, titleWithNamespaceAndSpaces);
		wikicode = this.unlinkWikilinksToThisTitle(wikicode, titleWithNamespaceAndSpaces);
		wikicode = this.capitalizeCOVID19(wikicode);
		wikicode = this.removeBoldFromHeadings(wikicode);
		wikicode = this.deleteNewLinesBetweenRefs(wikicode);
		wikicode = this.convertReferenceToReferences(wikicode);
		wikicode = this.swapRefPeriodWithPeriodRef(wikicode); // buggy
		wikicode = this.addReferencesSectionIfMissing(wikicode); // buggy
		wikicode = this.fixEmptyReferencesSection(wikicode);
		wikicode = this.deleteWhitespaceAtEndOfLines(wikicode);
		wikicode = this.convertSmartQuotesToRegularQuotes(wikicode);
		wikicode = this.fixWordEmphasizedWithSingleQuotes(wikicode);
		wikicode = this.convertDoubleSpacesToSingleSpaces(wikicode);
		wikicode = this.deleteBlankHeadings(wikicode);
		wikicode = this.changeYearRangeDashToNDash(wikicode);
		wikicode = this.disableCategoriesInDraftspace(wikicode, namespaceNumber);
		wikicode = this.deleteBRTagsOutsideInfoboxes(wikicode, namespaceNumber); // buggy
		// wikicode = this.rightAlignImages(wikicode); // commenting out, too many false positives in featured articles
		wikicode = this.correctCapitalizationOfEndMatterHeaders(wikicode);
		wikicode = this.ifNoLeadSectionDeleteFirstHeading(wikicode);
		wikicode = this.deleteCopyPastedEditAndEditSource(wikicode);
		wikicode = this.replaceUnicodeBulletsWithAsterisks(wikicode);
		wikicode = this.fixDoublePeriod(wikicode);
		wikicode = this.deleteEmptySections(wikicode);
		wikicode = this.fixHeadingsInAllCaps(wikicode);
		wikicode = this.deleteDuplicateReferencesSection(wikicode);
		wikicode = this.deleteSomeHTMLTags(wikicode);
		wikicode = this.deleteBlankLinesBetweenBullets(wikicode);
		wikicode = this.deleteBigTag(wikicode);
		wikicode = this.removeUnderscoresFromWikilinks(wikicode);
		// delete empty sections. for example, empty ==See Also== section
		// all ==sections== should start with a capital letter
		// after swap, if citation has no spaces on either side, and is not touching two other citations, add a space on the right
		// strip [[File: from infobox's image field
			// example 1: | image               = [[File:SAMIR 1626.png|thumb|Samir Mohanty]]
			// example 2: | image               = [[File:SAMIR 1626.pngy]]
		// trim whitespace inside refs, e.g. <ref> abc </ref>
		// single quotes to double quotes, i.e. This 'thing' to This "thing"
		// replace unreliable sources with {{cn}}.
			// if adjacent to other sources, just delete
			// if ref is used multiple times, account for that
		// duplicate citation fixer
		// move refs that are below {{Reflist}}, to above {{Reflist}}
		// move refs out of headings
		// delete AFC submission templates located mid-article, they end up self-hiding then appear as inexplicable whitespace. example: {{AfC submission|t||ts=20211212134609|u=Doezdemir|ns=118|demo=}}<!-- Important, do not remove this line before article has been created. -->
		// delete <!-- Inline citations added to your article will automatically display here. See en.wikipedia.org/wiki/WP:REFB for instructions on how to add citations. -->
		// remove <big></big> tags. test: == '''''<big>Saroj Kumar Basu</big>''''' ==, '''''<big>Saroj Kumar Basu</big>'''''
		// in wikilinks, underscores to spaces
		// fix redundant wikilinks, e.g. [[Spotify|Spotify]]
		wikicode = this.boldArticleTitle(wikicode, titleWithNamespaceAndSpaces); // buggy
		wikicode = this.trimEmptyLines(wikicode);
		wikicode = this.deleteMoreThanTwoEntersInARow(wikicode);
		return wikicode;
	}

	_escapeRegEx(string) {
		return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); // $& means the whole matched string
	}

	// surround bare URL's with <ref></ref>
	// Useful for seeing all URL's in the reflist section, and for CiteHighlighter ref quality highlighting
	bareURLToRef(wikicode) {
		return wikicode.replace(/^(http[^\n ]*) {0,}$/gm, "<ref>$1</ref>");
	}
	
	// in refs, turn [short links] into long links, so you can see the domain
	// also fixes <ref> link </ref> issues with extra spaces in the ref
	refShortLinkToLongLink(wikicode) {
		return wikicode.replace(/(<ref[^>]*>) {0,}\[ {0,}([^\]]*) {0,}\] {0,}(<\/ref>)/gm, '$1$2$3');
	}
	
	// convert inline external links to references
	inlineExternalLinksToRefs(wikicode) {
		let externalLinksSectionInArticle = wikicode.match(/== ?External link/i);
		let furtherReadingSectionInArticle = wikicode.match(/== ?Further reading/i);
		// let infoboxInArticle = wikicode.match(/{{Infobox/i);

		let sf = new StringFilter();

		// skip external links and further reading sections
		if ( externalLinksSectionInArticle || furtherReadingSectionInArticle ) {
			let topHalf = wikicode.replace(/((== ?Further reading|== ?External link).*$)/is, '');
			let bottomHalf = wikicode.match(/((== ?Further reading|== ?External link).*$)/is)[1];
			let buffer = sf.surgicalReplaceOutsideTags(/(?<!>|> )\[(http[^ \]]+) ?(.*?)\](?!<\/ref>| <\/ref>)/gm, '$2<ref>$1</ref>', topHalf, ['<ref'], ['</ref>', '/>']);
			wikicode = buffer + bottomHalf;
		} else {
			wikicode = sf.surgicalReplaceOutsideTags(/(?<!>|> )\[(http[^ \]]+) ?(.*?)\](?!<\/ref>| <\/ref>)/gm, '$2<ref>$1</ref>', wikicode, ['<ref'], ['</ref>', '/>']);
		}
		return wikicode;
	}
	
	// get rid of spaces in front of <refs>
	deleteSpacesInFrontOfRefs(wikicode) {
		return wikicode.replace(/(?<!\|) {1,}<ref/gm, "<ref");
	}
	
	// get rid of any level 2 heading that contains the article's title
	// this takes care of 2 common cases: heading at the bottom next to the {{AFC Submission}} template, and heading at the top above the lead
	deleteHeadingsWithTitle(wikicode, titleWithNamespaceAndSpaces) {
		let headingNameToLookFor = titleWithNamespaceAndSpaces;
		headingNameToLookFor = headingNameToLookFor.replace(/^Draft:/, '');
		headingNameToLookFor = this._escapeRegEx(headingNameToLookFor);
		let regEx = new RegExp("^== ?"+headingNameToLookFor+" ?==\n", "gmi");
		wikicode = wikicode.replace(regEx, "");
		// now look for titles that contain Draft: at the beginning, too
		headingNameToLookFor = titleWithNamespaceAndSpaces;
		headingNameToLookFor = this._escapeRegEx(headingNameToLookFor);
		regEx = new RegExp("^== ?"+headingNameToLookFor+" ?==\n", "gmi");
		wikicode = wikicode.replace(regEx, "");
		return wikicode;
	}
	
	// remove wikilinks to article name
	// Example: if title is Draft:Menna Shahin, change [[Menna Shahin]] to Menna Shahin
	unlinkWikilinksToThisTitle(wikicode, titleWithNamespaceAndSpaces) {
		let wikilinkToLookFor = titleWithNamespaceAndSpaces;
		wikilinkToLookFor = wikilinkToLookFor.replace(/^Draft:/, '');
		wikilinkToLookFor = this._escapeRegEx(wikilinkToLookFor);
		let regEx = new RegExp("\\[\\[("+wikilinkToLookFor+")\\]\\]", "gm");
		wikicode = wikicode.replace(regEx, "$1");
		return wikicode;
	}
	
	// bold the first occurrence of the article title
	// WARNING: this is buggy sometimes
	boldArticleTitle(wikicode, titleWithNamespaceAndSpaces) {
		let boldInArticle = wikicode.includes("'''");
		if ( ! boldInArticle ) {
			let titleToLookFor = titleWithNamespaceAndSpaces;
			titleToLookFor = titleToLookFor.replace(/^Draft:/, '');
			titleToLookFor = titleToLookFor.replace(/ \(.*?\)$/, '');
			titleToLookFor = this._escapeRegEx(titleToLookFor);
			// Be pretty strict, to avoid adding ''' to image paths and infoboxes, which messes up the image.
			let regEx = new RegExp("^(The )?("+titleToLookFor+")([ <,])", "mi");
			wikicode = wikicode.replace(regEx, "$1'''$2'''$3");
		}
		return wikicode;
	}
	
	// /covid-19/i -> COVID-19
	// Careful of this string in URLs.
	capitalizeCOVID19(wikicode) {
		wikicode = wikicode.replace(/ covid-19/gmi, ' COVID-19');
		wikicode = wikicode.replace(/\ncovid-19/gmi, "\nCOVID-19");
		return wikicode;
	}
	
	// remove bold from headings
	removeBoldFromHeadings(wikicode) {
		return wikicode.replace(/^(=.*)'''(.*)'''(.*=)$/gm, '$1$2$3');
	}
	
	// remove enter characters between <ref>s
	deleteNewLinesBetweenRefs(wikicode) {
		return wikicode.replace(/<\/ref>\n{1,}<ref>/gm, '<\/ref><ref>');
	}
	
	// convert ==Reference== to ==References==
	convertReferenceToReferences(wikicode) {
		return wikicode.replace(/^== ?Reference ?==$/gmi, '== References ==');
	}
	
	// TOOL  - swap ref period with period ref
	swapRefPeriodWithPeriodRef(wikicode) {
		wikicode = wikicode.replace(/((?:<ref[^>]*?>[^>]*?<\/ref>){1,})\. /gm, '.$1 ');
		wikicode = wikicode.replace(/((?:<ref[^>]*?>[^>]*?<\/ref>){1,})\.\n/gm, ".$1\n");
		return wikicode;
	}
	
	// fix errant spaces at beginning of lines, which makes a blockquote looking thing (AFCH does it)
	trimEveryLine(wikicode) {
		let output = '';
		let lines = wikicode.split("\n");
		let lineCount = lines.length;
		let i = 0;
		for ( let line of lines ) {
			i++;
			let trimmed = line.trim();
			if ( trimmed.startsWith('|') || trimmed.startsWith('}') ) { // don't trim lines that start with | or }. It is common in FAs to indent these a bit.
				output += line;
			} else {
				output += line.trim();
			}
			if ( i !== lineCount ) {
				output += "\n";
			}
		}
		return output;
	}
	
	// add references section if missing
	addReferencesSectionIfMissing(wikicode) {
		let hasRefSection = wikicode.match(/^== ?References ?==$/mi);
		let hasReflist = wikicode.match(/(?:{{Reflist|<references)/mi);
		if ( ! hasRefSection && ! hasReflist ) {
			let hasBottomAFCTemplate = wikicode.match(/(\n{{AfC submission[^}]*}}\s*)$/);
			if ( hasBottomAFCTemplate ) {
				wikicode = wikicode.replace(/(\n{{AfC submission[^}]*}}\s*)$/, "\n\n== References ==\n{{Reflist}}$1");
			} else {
				wikicode = wikicode.replace(/$/, "\n\n== References ==\n{{Reflist}}");
			}
		}
		return wikicode;
	}
	
	// fix empty references section
	fixEmptyReferencesSection(wikicode) {
		let hasRefSection = wikicode.match(/^== ?References ?==$/mi);
		let hasReflist = wikicode.match(/(?:{{Reflist|<references)/mi);
		if ( ! hasReflist && hasRefSection ) {
			wikicode = wikicode.replace(/(?<=== ?References ?==)/gmi, "\n{{Reflist}}");
		}
		return wikicode;
	}
	
	// delete whitespace at the end of lines
	// (?!\|)(?!\}\}) is to stop this from deleting spaces after = in infoboxes
	deleteWhitespaceAtEndOfLines(wikicode) {
		return wikicode.replace(/[ \t]+\n(?!\|)(?!\}\})/g, "\n");
	}
	
	// convert smart quotes to regular quotes
	convertSmartQuotesToRegularQuotes(wikicode) {
		wikicode = wikicode.replace(/”/g, '"');
		wikicode = wikicode.replace(/“/g, '"');
		wikicode = wikicode.replace(/‘/g, "'");
		wikicode = wikicode.replace(/’/g, "'");
		wikicode = wikicode.replace(/…/g, "...");
		return wikicode;
	}
	
	// convert double spaces to single spaces
	convertDoubleSpacesToSingleSpaces(wikicode) {
		return wikicode.replace(/\. {2,}/g, '. ');
	}
	
	// remove blank heading
	deleteBlankHeadings(wikicode) {
		return wikicode.replace(/\n={2,} {0,}={2,}\n/g, "\n");
	}
	
	// Change year range dash to ndash. Skip text inside of [[File:
	changeYearRangeDashToNDash(wikicode) {
		let sf = new StringFilter();
		// (1111-1111)
		wikicode = sf.surgicalReplaceOutsideTags(/(\(\d{4}) ?- ?(\d{4}\))/gm, '$1–$2', wikicode, ['[[File:'], [']]']);
		//  1839 - 1926)
		wikicode = sf.surgicalReplaceOutsideTags(/( \d{4}) ?- ?(\d{4}\))/gm, '$1–$2', wikicode, ['[[File:'], [']]']);
		return wikicode;
	}
	
	// if in draftspace, and draft has categories, disable the categories
	disableCategoriesInDraftspace(wikicode, namespace) {
		let draft = (namespace == 118);
		if ( draft ) {
			wikicode = wikicode.replace(/:?(\[\[)(Category:[^\]]*\]\])/gm, '$1:$2');
		}
		return wikicode;
	}

	// delete <br> in drafts, these are usually poorly placed
	deleteBRTagsOutsideInfoboxes(wikicode) {
		let output = '';
		let lines = wikicode.split("\n");
		let lineCount = lines.length;
		let i = 0;
		for ( let line of lines ) {
			i++;
			// Skip lines that start with { or |. This is the easiest way to detect infoboxes
			if ( line.startsWith('{') || line.startsWith('|') ) {
				output += line;
			} else {
				output += line.replace(/\<br ?\/?\>/gm, '');
			}
			if ( i !== lineCount ) {
				output += "\n";
			}
		}
		return output;
	}
	
	// right align images
	rightAlignImages(wikicode) {
		return wikicode.replace(/(\[\[File:[^\]]*\|)left(\|[^\]]*\]\])/gm, '$1right$2');
	}
	
	// correct capitalization of see also, references, further reading, external links
	correctCapitalizationOfEndMatterHeaders(wikicode) {
		wikicode = wikicode.replace(/^(== ?)References( ?==)$/gmi, "$1References$2");
		wikicode = wikicode.replace(/^(== ?)External links( ?==)$/gmi, "$1External links$2");
		wikicode = wikicode.replace(/^(== ?)Further reading( ?==)$/gmi, "$1Further reading$2");
		wikicode = wikicode.replace(/^(== ?)See also( ?==)$/gmi, "$1See also$2");
		return wikicode;
	}
	
	// if article has headings but no lead, remove first heading
	ifNoLeadSectionDeleteFirstHeading(wikicode) {
		let output = '';
		let lines = wikicode.split("\n");
		let lineCount = lines.length;
		let i = 0;
		let textCount = 0;
		for ( let line of lines ) {
			i++;
			// scan for first heading. empty lines, lines with templates, or lines with images do not count.
			if ( line.startsWith('{') || line.length === 0 || line.startsWith('[[File:') ) {
				output += line;
			} else if ( line.startsWith('==') && ! textCount ) {
				continue; // delete this line by not putting it in the output string
			} else {
				textCount++;
				output += line;
			}
			if ( i !== lineCount ) {
				output += "\n";
			}
		}
		return output;
	}
	
	// delete [edit], [edit source], and [editar] from headings
	deleteCopyPastedEditAndEditSource(wikicode) {
		wikicode = wikicode.replace(/\[edit\]( ?={2,})$/gm, '$1');
		wikicode = wikicode.replace(/\[edit source\]( ?={2,})$/gm, '$1');
		wikicode = wikicode.replace(/\[editar\]( ?={2,})$/gm, '$1');
		return wikicode;
	}
	
	// at beginning of lines, replace unicode bullets with asterisks
	replaceUnicodeBulletsWithAsterisks(wikicode) {
		return wikicode.replace(/^\s{0,}[·•●]\s{0,}/gm, '* ');
	}
	
	// remove whitespace if that is the only character on a line
	trimEmptyLines(wikicode) {
		return wikicode.replace(/^\s*$/gm, '');
	}
	
	// no more than 2 enters in a row
	// Note: AFCH does this too
	deleteMoreThanTwoEntersInARow(wikicode) {
		return wikicode.replace(/\n{3,}/gm, "\n\n");
	}
	
	// convert =TitleHeading= to ==H2Heading==
	convertH1ToH2(wikicode) {
		return wikicode.replace(/^= ?([^=]*) ?=$/gm, '== $1 ==');
	}

	convertVeryLongHeadingToParagraph(wikicode) {
		let output = '';
		let lines = wikicode.split("\n");
		let lineCount = lines.length;
		let i = 0;
		for ( let line of lines ) {
			i++;
			if ( line.length > 150 && line.match(/^==.*==$/gm) && ! line.match(/<ref/) ) {
				output += line.replace(/^={1,}\s*(.*?)\s*={1,}$/m, '$1');
			} else {
				output += line;
			}
			if ( i !== lineCount ) {
				output += "\n";
			}
		}
		return output;
	}

	fixWordEmphasizedWithSingleQuotes(wikicode) {
		return wikicode.replace(/ '(\w+)' /g, ' "$1" ');
	}

	fixDoublePeriod(wikicode) {
		return wikicode.replace(/(?<=[A-Za-z\]])\.\.(?=<ref| |\n)/g, '.');
	}

	fixWikilinksContainingURL(wikicode) {
		// non-piped wikilink
		wikicode = wikicode.replace(/\[\[https?:\/\/en\.wikipedia\.org\/wiki\/([^|]*)\]\]/g, '[[$1]]');
		// piped wikilink
		wikicode = wikicode.replace(/\[\[https?:\/\/en\.wikipedia\.org\/wiki\/([^|]*)\|([^\]]*)\]\]/g, '[[$1|$2]]');
		// non-piped external link
		wikicode = wikicode.replace(/\[\[(http[^|]*)\]\]/g, '[$1]');
		// piped external link
		wikicode = wikicode.replace(/\[\[(http[^|]*)\|([^\]]*)\]\]/g, '[$1 $2]');
		return wikicode;
	}

	fixExternalLinksToWikipediaArticles(wikicode) {
		// [https://en.wikipedia.org/wiki/Article] and [https://en.wikipedia.org/wiki/Article Article name]
		return wikicode.replace(/(?<!\[)\[https?:\/\/en\.wikipedia\.org\/wiki\/([^ \]]*)( [^\]]*)?\]/gs, '[[$1]]');
	}

	deleteBlankLinesBetweenBullets(wikicode) {
		let lines = wikicode.split('\n');
		let buffer = [];
		let length = lines.length;
		for ( let i = 0; i < length; i++ ) {
			let previous = lines[i-1];
			let current = lines[i];
			let next = lines[i+1];
			if (
				typeof previous !== 'undefined' &&
				typeof next !== 'undefined' &&
				previous.startsWith('*') &&
				current === '' &&
				next.startsWith('*')
			) {
				continue;
			}
			buffer.push(current);
		}
		return buffer.join('\n');
	}

	deleteWeirdUnicodeCharacters(wikicode) {
		return wikicode.replace(/[–]/g, '');
	}

	deleteSomeHTMLTags(wikicode) {
		wikicode = wikicode.replace(/<\/?p>/g, '');
		wikicode = wikicode.replace(/<\/?strong>/g, '');
		wikicode = wikicode.replace(/<\/?em>/g, '');
		wikicode = wikicode.replace(/<\/?nowiki>/g, '');
		return wikicode;
	}

	fixHeadingsInAllCaps(wikicode) {
		// create a concatenated string with the text from every heading
		let matches = wikicode.matchAll(/== {0,}(.+) {0,}==/g);
		let headingString = '';
		for ( let match of matches ) {
			headingString += match[1];
		}

		// if string only contains caps
		if ( this._isUpperCase(headingString) ) {
			// convert all headings to sentence case
			let matches = wikicode.matchAll(/== {0,}(.+) {0,}==/g);
			for ( let match of matches ) {
				let matchRegex = this._escapeRegEx(match[1].trim());
				matchRegex = new RegExp('== {0,}' + matchRegex + ' {0,}==', 'g');
				let sentenceCase = this._toSentenceCase(match[1].trim());
				wikicode = wikicode.replace(matchRegex, '== ' + sentenceCase + ' ==');
			}
		}

		return wikicode;
	}

	deleteBigTag(wikicode) {
		return wikicode.replace(/(?:<big>|<\/big>)/g, '');
	}

	deleteEmptySections(wikicode) {
		// TODO: 
		return wikicode;
	}

	deleteDuplicateReferencesSection(wikicode) {
		let matches = wikicode.match(/== ?References ?==/gi);
		if ( matches.length > 1 ) {
			// run regexes that are likely to delete the extra section
			let attempt = wikicode.replace(
`== References ==
<!-- Inline citations added to your article will automatically display here. See en.wikipedia.org/wiki/WP:REFB for instructions on how to add citations. -->
{{reflist}}`
			, '');
			let matches2 = attempt.match(/== ?References ?==/gi);
			if ( matches2.length === 1 ) {
				wikicode = attempt.trim();
			}
		}
		return wikicode;
	}

	removeUnderscoresFromWikilinks(wikicode) {
		let sf = new StringFilter();
		wikicode = sf.surgicalReplaceInsideTags(/_/g, ' ', wikicode, ['[['], [']]']);
		return wikicode;
	}

	_isUpperCase(str) {
		return str === str.toUpperCase();
	}

	_toSentenceCase(string) {
		return string.charAt(0).toUpperCase() + string.slice(1).toLowerCase();
	}
}

/** Lets you use regex to specify what parts of a very long string you want to specify as "off limits", then you can do additional regex's and search/replace to the remaining parts of the string. */
class StringFilter {
	/** Does a replace, but specifies areas of the file that should NOT be replaced. Those areas are specified by providing an openingTag and a closingTag, and those areas are marked as off limits. */
	surgicalReplaceOutsideTags(regex, replacement, haystack, openingTags, closingTags) {
		let allTags = [...openingTags, ...closingTags];
		let parts = this._splitStringUsingMultiplePatterns(haystack, allTags);
		let resultArray = [];
		for ( let part of parts ) {
			let openingTagMatch = false;
			for ( let tag of openingTags ) {
				if ( part.startsWith(tag) ) {
					openingTagMatch = true;
					break;
				}
			}
			if ( ! openingTagMatch ) {
				part = part.replace(regex, replacement);
			}
			resultArray.push(part);
		}
		return resultArray.join('');
	}

	/** Does a replace, but specifies areas of the file that SHOULD be replaced, then skips the rest of the file. The area that should be replaced is specified by providing an openingTag and a closingTag. */
	surgicalReplaceInsideTags(regex, replacement, haystack, openingTags, closingTags) {
		let allTags = [...openingTags, ...closingTags];
		let parts = this._splitStringUsingMultiplePatterns(haystack, allTags);
		let resultArray = [];
		for ( let part of parts ) {
			for ( let tag of openingTags ) {
				if ( part.startsWith(tag) ) {
					part = part.replace(regex, replacement);
				}
			}
			resultArray.push(part);
		}
		return resultArray.join('');
	}
	
	/**
	Also keeps the pattern in the result, unlike string.prototype.split. Algorithm isn't perfect, will fail with this pattern: <ref>Test/>Test</ref>. But should be good enough for DraftCleaner stuff.
	
	@param {Array} patterns
	*/
	_splitStringUsingMultiplePatterns(string, patterns) {
		let length = string.length;
		let result = [];
		let positionOfLastMatch = 0;
		for ( let i = 0; i < length; i++ ) {
			let lookAhead = string.substring(i); // the rest of the string after current position
			let patternMatch = false;
			for ( let pattern of patterns ) {
				if ( lookAhead.startsWith(pattern) ) {
					patternMatch = true;
					break;
				}
			}
			if ( patternMatch ) {
				let chunk = string.slice(positionOfLastMatch, i);
				if ( ! chunk ) continue; // if blank (happens if i=0 matches), continue instead of putting an empty "" into the array
				result.push(chunk);
				positionOfLastMatch = i;
			}
		}
		// Don't forget the last chunk.
		result.push(string.substring(positionOfLastMatch));
		return result;
	}
}

// </nowiki>