Jump to content

User:Full-date unlinking bot/code

From Wikipedia, the free encyclopedia
This is an old revision of this page, as edited by Harej (talk | contribs) at 10:10, 23 August 2009 (first version ever). The present address (URL) is a permanent link to this revision, which may differ significantly from the current revision.
(diff) ← Previous revision | Latest revision (diff) | Newer revision → (diff)
<?php
/** fulldateunlinker.php -- Removes link tags from dates
 *  Beta Release 1
 *
 *  (c) 2009 James Hare - http://en.wikipedia.org/wiki/User:Harej
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 *   
 *  Developers (add your self here if you worked on the code):
 *    James Hare - [[User:Harej]] - Wrote initial code
 *                 [[User:Tcncv]] - Wrote the date-parsing regular expressions
 **/
ini_set("display_errors", 1);
error_reporting(E_ALL ^ E_NOTICE);
include("./public_html/botclasses.php");  // Botclasses.php was written by User:Chris_G and is available under the GNU General Public License
include("fdublogin.php");

// For the purposes of unambiguous documentation, the Month-Day-Year style of writing dates will be referred to as "American" and the Day-Month-Year style "British".
// I understand how not-right this is but I felt it was necessary to use two terms that could not be confused with each other.
// ("International" would be a good replacement for "British", but "i" could be confused for "1", plus "int" means "integer".)

echo "Logging in...";
$objwiki = new wikipedia();
$objwiki->login($botuser, $botpass);
echo " done.\n";

function overridecheck() {
	// This checks to see if [[User:Full-date unlinking bot/Manual override]] has been triggered by the placement of the string "Joe Biden" anywhere on the page.
	// I chose the Vice President of the United States as the "safety word" because it can't be triggered accidentally. And because I'm nuts.
	
	$overridepage = $objwiki->getpage("User:Full-date unlinking bot/Manual override");
	
	if (strpos($overridepage, "Joe Biden") !== false) {
		die("Manual override has been triggered. Shutting down.");
	}
}

function checktoprocess($page) {
	// checktoprocess checks if $page should be processed.
	// First, it checks if it's of the article namespace. Then, it checks if the page has already been processed based on a comment that is left by the bot after each page is processed.
	// Checks are then performed based on the exclusion criteria on the bot's user page
	// If any of these tests fail, "false" is returned; otherwise, "true" is returned.
	
	$regex1 = "/^((User|Wikipedia|Image|MediaWiki|Template|Help|Category|Portal)(( |_)talk)|Talk):/i"; // matches non-articles
	$regex2 = "/^(January|February|March|April|May|June|July|August|September|October|November|December)\s\d{1,2}/"; // matches Month-Date
	$regex3 = "/^\d{1,4}(st|rd|th)?\s?(century|millennium)?( BC)?( in (architecture|art|aviation|comics|film|home video|literature|(country |British )?music|poetry|radio|science|television)?$/i"; // matches year, century, and millennium articles, BC and AD, in any sort of topic
	$regex4 = "/^List of (sovereign states|state leaders) in \d{1,4}( BC)?$/i" // because the lists of sovereign states and state leaders decided to stick out
	if (preg_match($regex1, $page) || preg_match($regex2, $page) || preg_match($regex3, $page) || preg_match($regex4, $page)) {
		return false;
	}
	
	$contents = $objwiki->getpage($page);
	
	$check = strpos($contents, "<!-- [[User:Full-date unlinking bot]] has processed this page -->");
	if ($check === false) { // if that comment is not on the page
		return true;
	}
	else {
		return false;
	}
}

function unlinker($link) {
	$editsummary = "[[User:Full-date unlinking bot/Codes|Codes]]: ";
	
	preg_match_all("/\[\[(Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|June?|July?|Aug(?:ust)?|Sep(?:t(?:ember)?)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)[\s_](\d{1,2})\]\],\s\[\[(\d{1,4}(?:[\s_]BC)?)\]\]/i", $contents, $amReg); // looks for American dates with regular punctuation
	
	for ($z=0; $z < count($amReg[0]); $z++) {
		$unlinked = preg_replace("/[\[\]]/", "", $amReg[0][$z]); // gets rid of link tags
		$contents = str_replace($amReg[0][$z], $unlinked, $contents); // substitutes the linked date with the unlinked date
	}
	if (count($amReg[0]) > 0) { // if the "American-regular" de-linker actually has to do any work
		$editsummary .= "AMreg, ";
	}
	
	preg_match_all("/[\s_](Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|June?|July?|Aug(?:ust)?|Sep(?:t(?:ember)?)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)\]\]\s\[\[(\d{1,4}(?:[\s_]BC)?)\]\]/i", $contents, $brReg); // looks for British dates with regular punctuation
	
	for ($z=0; $z < count($brReg[0]); $z++) {
		$unlinked = preg_replace("/[\[\]]/", "", $brReg[0][$z]);
		$contents = str_replace($brReg[0][$z], $unlinked, $contents);
	}
	if (count($brReg[0]) > 0) {
		$editsummary .= "BRreg, ";
	}
	
	preg_match_all("/\[\[(Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|June?|July?|Aug(?:ust)?|Sep(?:t(?:ember)?)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)\]\]\s*\[\[(\d{1,2})\]\](?:\s*(?:,\s*)?)\[\[(\d{1,4}(?:[\s_]BC)?)\]\]/i", $content, $amOdd); // looks for American dates with odd punctuation
	
	for ($z=0; $z < count($amOdd[0]); $z++) {
		$unlinked = preg_replace("/[\[\]]/", "", $amOdd[0][$z]);
		$unlinked = date('F j, Y', strtotime($unlinked)); // strtotime() is very flexible in my experience, so it should be able to take the maligned date and make it normal
		$contents = str_replace($amOdd[0][$z], $unlinked, $contents);
	}
	if (count($amOdd[0]) > 0) {
		$editsummary .= "AModd, ";
	}
	
	preg_match_all("/\[\[(\d{1,2})[\s_](Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|June?|July?|Aug(?:ust)?|Sep(?:t(?:ember)?)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)\]\](?!\s\[)(?:\s*(?:,\s*)?)\[\[(\d{1,4}(?:[\s_]BC)?)\]\]/i", $content, $brOdd); // looks for British dates with odd punctuation
	
	for ($z=0; $z < count($brOdd[0]); $z++) {
		$unlinked = preg_replace("/[\[\]]/", "", $brOdd[0][$z]);
		$unlinked = date('j F Y', strtotime($unlinked));
		$contents = str_replace($brOdd[0][$z], $unlinked, $contents);
	}
	if (count($brOdd[0]) > 0) {
		$editsummary .= "BRodd, ";
	}
	
	$editsummary = substr($editsummary, 0, -2); // to get rid of superfluous comma and space
	
	overridecheck(); // checks if the manual override has been triggered
	$objwiki->edit($link,$contents,$editsummary,true,true); // posts the change. The two "true" parameters indicate that this is a bot edit and it is a minor edit
}

// The below array, $months, features all the possible months.

$months = array("January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December");

for ($i = 0; $i < count($months); $i++) { // for each month
	$links = $objwiki->whatlinkshere($months[$i]); // This gets all the links. Now the next loop iterates through each a dem!
	for ($j = 0; $j < count($links); $j++) {
		if (checktoprocess($links[$j])) { // if the checktoprocess function returns true
			unlinker($links[$j]);
		}
	}
	for ($d = 1; $d < 32; $d++) { // This is like the above, except with different date combinations
		$links = $objwiki->whatlinkshere($months[$i] . $d);
		for ($j = 0; $j < count($links); $j++) {
			if (checktoprocess($links[$j]) {
				unlinker($links[$j]);
			}
		}
	}
}

?>