User:Full-date unlinking bot/code
Appearance
<?php /** fulldateunlinker.php -- Removes link tags from dates * Beta Release 1 * * (c) 2009 James Hare - http://en.wikipedia.org/wiki/User:Harej * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * * Developers (add your self here if you worked on the code): * James Hare - [[User:Harej]] - Wrote initial code * [[User:Tcncv]] - Wrote the date-parsing regular expressions **/ ini_set("display_errors", 1); error_reporting(E_ALL ^ E_NOTICE); include("./public_html/botclasses.php"); // Botclasses.php was written by User:Chris_G and is available under the GNU General Public License include("fdublogin.php"); // For the purposes of unambiguous documentation, the Month-Day-Year style of writing dates will be referred to as "American" and the Day-Month-Year style "British". // I understand how not-right this is but I felt it was necessary to use two terms that could not be confused with each other. // ("International" would be a good replacement for "British", but "i" could be confused for "1", plus "int" means "integer".) echo "Logging in..."; $objwiki = new wikipedia(); $objwiki->login($botuser, $botpass); echo " done.\n"; function overridecheck() { // This checks to see if [[User:Full-date unlinking bot/Manual override]] has been triggered by the placement of the string "Joe Biden" anywhere on the page. // I chose the Vice President of the United States as the "safety word" because it can't be triggered accidentally. And because I'm nuts. $overridepage = $objwiki->getpage("User:Full-date unlinking bot/Manual override"); if (strpos($overridepage, "Joe Biden") !== false) { die("Manual override has been triggered. Shutting down."); } } function checktoprocess($page) { // checktoprocess checks if $page should be processed. // First, it checks if it's of the article namespace. Then, it checks if the page has already been processed based on a comment that is left by the bot after each page is processed. // Checks are then performed based on the exclusion criteria on the bot's user page // If any of these tests fail, "false" is returned; otherwise, "true" is returned. $regex1 = "/^((User|Wikipedia|Image|MediaWiki|Template|Help|Category|Portal)(( |_)talk)|Talk):/i"; // matches non-articles $regex2 = "/^(January|February|March|April|May|June|July|August|September|October|November|December)\s\d{1,2}/"; // matches Month-Date $regex3 = "/^\d{1,4}(st|rd|th)?\s?(century|millennium)?( BC)?( in (architecture|art|aviation|comics|film|home video|literature|(country |British )?music|poetry|radio|science|television)?$/i"; // matches year, century, and millennium articles, BC and AD, in any sort of topic $regex4 = "/^List of (sovereign states|state leaders) in \d{1,4}( BC)?$/i" // because the lists of sovereign states and state leaders decided to stick out if (preg_match($regex1, $page) || preg_match($regex2, $page) || preg_match($regex3, $page) || preg_match($regex4, $page)) { return false; } $contents = $objwiki->getpage($page); $check = strpos($contents, "<!-- [[User:Full-date unlinking bot]] has processed this page -->"); if ($check === false) { // if that comment is not on the page return true; } else { return false; } } function unlinker($link) { $editsummary = "[[User:Full-date unlinking bot/Codes|Codes]]: "; preg_match_all("/\[\[(Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|June?|July?|Aug(?:ust)?|Sep(?:t(?:ember)?)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)[\s_](\d{1,2})\]\],\s\[\[(\d{1,4}(?:[\s_]BC)?)\]\]/i", $contents, $amReg); // looks for American dates with regular punctuation for ($z=0; $z < count($amReg[0]); $z++) { $unlinked = preg_replace("/[\[\]]/", "", $amReg[0][$z]); // gets rid of link tags $contents = str_replace($amReg[0][$z], $unlinked, $contents); // substitutes the linked date with the unlinked date } if (count($amReg[0]) > 0) { // if the "American-regular" de-linker actually has to do any work $editsummary .= "AMreg, "; } preg_match_all("/[\s_](Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|June?|July?|Aug(?:ust)?|Sep(?:t(?:ember)?)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)\]\]\s\[\[(\d{1,4}(?:[\s_]BC)?)\]\]/i", $contents, $brReg); // looks for British dates with regular punctuation for ($z=0; $z < count($brReg[0]); $z++) { $unlinked = preg_replace("/[\[\]]/", "", $brReg[0][$z]); $contents = str_replace($brReg[0][$z], $unlinked, $contents); } if (count($brReg[0]) > 0) { $editsummary .= "BRreg, "; } preg_match_all("/\[\[(Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|June?|July?|Aug(?:ust)?|Sep(?:t(?:ember)?)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)\]\]\s*\[\[(\d{1,2})\]\](?:\s*(?:,\s*)?)\[\[(\d{1,4}(?:[\s_]BC)?)\]\]/i", $content, $amOdd); // looks for American dates with odd punctuation for ($z=0; $z < count($amOdd[0]); $z++) { $unlinked = preg_replace("/[\[\]]/", "", $amOdd[0][$z]); $unlinked = date('F j, Y', strtotime($unlinked)); // strtotime() is very flexible in my experience, so it should be able to take the maligned date and make it normal $contents = str_replace($amOdd[0][$z], $unlinked, $contents); } if (count($amOdd[0]) > 0) { $editsummary .= "AModd, "; } preg_match_all("/\[\[(\d{1,2})[\s_](Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|June?|July?|Aug(?:ust)?|Sep(?:t(?:ember)?)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)\]\](?!\s\[)(?:\s*(?:,\s*)?)\[\[(\d{1,4}(?:[\s_]BC)?)\]\]/i", $content, $brOdd); // looks for British dates with odd punctuation for ($z=0; $z < count($brOdd[0]); $z++) { $unlinked = preg_replace("/[\[\]]/", "", $brOdd[0][$z]); $unlinked = date('j F Y', strtotime($unlinked)); $contents = str_replace($brOdd[0][$z], $unlinked, $contents); } if (count($brOdd[0]) > 0) { $editsummary .= "BRodd, "; } $editsummary = substr($editsummary, 0, -2); // to get rid of superfluous comma and space overridecheck(); // checks if the manual override has been triggered $objwiki->edit($link,$contents,$editsummary,true,true); // posts the change. The two "true" parameters indicate that this is a bot edit and it is a minor edit } // The below array, $months, features all the possible months. $months = array("January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"); for ($i = 0; $i < count($months); $i++) { // for each month $links = $objwiki->whatlinkshere($months[$i]); // This gets all the links. Now the next loop iterates through each a dem! for ($j = 0; $j < count($links); $j++) { if (checktoprocess($links[$j])) { // if the checktoprocess function returns true unlinker($links[$j]); } } for ($d = 1; $d < 32; $d++) { // This is like the above, except with different date combinations $links = $objwiki->whatlinkshere($months[$i] . $d); for ($j = 0; $j < count($links); $j++) { if (checktoprocess($links[$j]) { unlinker($links[$j]); } } } } ?>