User:Shadowbot2/Source
Appearance
This page contains a cleaned-up version of the Shadowbot2 source.
#!/usr/bin/perl
use Perlwikipedia;
use HTML::Entities;
my $editor=Perlwikipedia->new;
my $starting_time=time; #What time did we start?
my $username='Shadowbot2';
my $password='**********';
$editor->login($username,$password);
my (@protected,@unprotected);
$editor->purge_page('Main Page'); #Make sure we have a clean copy of the Main Page
my $mailing_list_text=$editor->get_text('User:Shadowbot2/Mailing list');
my @mailing_list=$mailing_list_text=~m/\* \[\[User:(.+?)\|.+?\]\]/g; #Parse for all users listed
unless ($main_page_html=~m/Shadowbot2/i) { #Make sure we're still logged in
$editor->login($username,$password);
}
my @tranclusion_list;
load_template_list();
do_main_page_scan();
do_images_scan();
do_tomorrow_templates_scan();
my $sent_email=0;
unless((scalar @unprotected)==0) { #Send email if needed and mark as such
send_mail();
$sent_email=1;
}
my $ending_time=time; #Get various statistics about the run
my $total_time=$ending_time-$starting_time;
my $total_scanned=(scalar @protected)+(scalar @unprotected);
print "I just scanned $total_scanned pages/images. I detected " . (scalar @protected) ." protected pages and " . (scalar @unprotected) . " unprotected pages. The job took $total_time seconds to run.\n\n";
if($sent_email) {print "I sent an alert email to " . (scalar @mailing_list) . " users.\n\n";}
else {print "I did not send an alert email.\n\n";}
print "Pages I scanned and their protection status:\n\n";
#Create the format for the top of the form
format STDOUT_TOP=
Name of page Result of scan
--------------------------------------------------------------------------------------
.
#Create the report format
format STDOUT=
@<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< @<<<<<<<<<<<<<
$page, $result
.
foreach $page (@protected) { #Write out the protected pages
$page="[[$page]]";
$result="Protected";
write;
}
foreach $page (@unprotected) { #Write out the unprotected pages
$page="[[$page]]";
$result="Unprotected";
write;
}
sub do_main_page_scan {
#Save us the hassle of finding the POTD subpage
push (@transclusion_list,"Template:POTD protected/$CURRENTYEAR-$CURRENTMONTH-$CURRENTDAY2");
#If it's not a .js or .css page, scan it.
foreach (@transclusion_list) {
unless(/\.\w{2,3}/) {scan($_);}
}
}
sub do_images_scan {
my @images;
foreach (@transclusion_list) {
my $wikitext=$editor->get_text($_);
#Get rid of unrendered stuff
$wikitext=~s/<!--.+?-->//gs;
$wikitext=~s/<noinclude>.+?<\/noinclude>//gs;
#Handle <includeonly> text
if ($wikitext=~m/<includeonly>\[\[(Image:.+)\|.+?\]\]<\/includeonly>/) {
unless ($1=~m/\{\{.+?\}\}/) {push (@images,$1);}
}
#Grab the rest
if ($wikitext=~m/\[\[(Image:.+?)\|.+?\]\]/) {
my $image=$1;
unless ($image=~m/\{\{.+?\}\}/) {
push (@images,$image);
}
}
}
foreach (@images) {scan($_);}
}
sub do_tomorrow_templates_scan {
foreach (@transclusion_list) {
if (/(\w+) (\d{1,2})$/) { #Check for one date format
my $month=$1;
my $day=$2;
($month,$day)=do_date_increment("$month $day");
$_ =~ s/\w+ \d{1,2}/$month $day/; #Substitute the new format
scan($_);
}
elsif (/(\w+) (\d{1,2}), \d{4}$/) { #The other format
my $month=$1;
my $day=$2;
my $year=$3;
($month,$day,$year)=do_date_increment("$month $day $year");
$_ =~ s/\w+ \d{1,2}, \d{4}/$month $day, $year/;
scan($_);
}
}
}
sub scan {
my $page=shift;
#Grab page and decode it
my $edit_page_contents=$editor->{mech}->get("http://en.wikipedia.org/w/index.php?title=$page&action=edit")->content;
decode_entities($edit_page_contents);
#Remove unrendered content
$edit_page_contents=~s/<!--.+?-->//gs;
$edit_page_contents=~s/<noinclude>.+?<\/noinclude>//gs;
#Replace MediaWiki variables with their real values
$edit_page_contents=do_replace($edit_page_contents);
#Check if the text is editable
if ($edit_page_contents =~ m/<textarea .+? readonly='readonly'/) {
push(@protected,$page);
}
else {
push(@unprotected,$page);
}
}
sub send_mail {
my $email_text;
$email_text.="This is an automated report of unprotected templates and images I detected during my scan of the Main Page. This list also includes tomorrow's templates, but not tomorrow's images.\n\n Unprotected templates/images I found were:\n\n";
foreach (@unprotected) { #Write out the unprotected templates
$email_text.="[[$_]] -- http://en.wikipedia.org/wiki/$_\n";
}
$email_text.="\nThese templates/images should be protected immediately.";
foreach (@mailing_list) { #Perform for each user subscribed
$editor->{mech}->get("http://en.wikipedia.org/wiki/Special:Emailuser/$_");
#Fill out variables
$editor->{mech}->field('wpSubject','Shadowbot2 report of unprotected templates');
$editor->{mech}->field('wpText',$email_text);
$editor->{mech}->click('wpSend'); #Send the email
}
}
sub load_template_list {
my $main_page_wikitext=$editor->get_text("Main Page");
my $variable_page_raw=$editor->{mech}->get("http://en.wikipedia.org/wiki/User:Shadowbot2/Variables")->content;
#Guess.
$main_page_wikitext=~s/<!--.+?-->//gs;
$main_page_wikitext=~s/<noinclude>.+?<\/noinclude>//gs;
#Grab the current MediaWiki variable values and mark them as global
if($variable_page_raw=~m/CURRENTMONTHNAME=(\w+)/) {$CURRENTMONTHNAME=$1;}
if($variable_page_raw=~m/CURRENTDAY=(\d+)/) {$CURRENTDAY=$1;}
if($variable_page_raw=~m/CURRENTYEAR=(\d+)/) {$CURRENTYEAR=$1;}
if($variable_page_raw=~m/CURRENTMONTH=(\d+)/) {$CURRENTMONTH=$1;}
if($variable_page_raw=~m/CURRENTDAY2=(\d+)/) {$CURRENTDAY2=$1;}
#Do the replace
$main_page_wikitext=do_replace($main_page_wikitext);
my @temp_list=$main_page_wikitext=~m/\{\{(.+?)\}\}/g; #Match text inside of curly brackets
foreach $link (@temp_list) {
if ($link !~ m/^[A-Z]+$/) { #Make sure it's not a variable we missed (all uppercase)
if ($link !~ m/Wikipedia:/) {$link="Template:".$link;} #Make sure it has a namespace
push (@transclusion_list,$link);
}
}
#Take care of other templates that would be difficult to regex for
push(@transclusion_list,"Template:TFAfooter");
push(@transclusion_list,"Template:SelAnnivFooter");
push(@transclusion_list,"Template:*mp");
push(@transclusion_list,"Template:Click");
}
sub do_replace {
#Self-explanatory
my $text=shift;
$text=~s/\{\{CURRENTDAY\}\}/$CURRENTDAY/g;
$text=~s/\{\{CURRENTYEAR\}\}/$CURRENTYEAR/g;
$text=~s/\{\{CURRENTMONTH\}\}/$CURRENTMONTH/g;
$text=~s/\{\{CURRENTDAY2\}\}/$CURRENTDAY2/g;
$text=~s/\{\{CURRENTMONTHNAME\}\}/$CURRENTMONTHNAME/g;
return $text;
}
sub do_date_increment {
#Set up hashes for date conversions
my %months = ( January => 1,
February => 2,
March => 3,
April => 4,
May => 5,
June => 6,
July => 7,
August => 8,
September => 9,
October => 10,
November => 11,
December => 12
);
my %number_to_month = ( 1 => 'January',
2 => 'February',
3 => 'March',
4 => 'April',
5 => 'May',
6 => 'June',
7 => 'July',
8 => 'August',
9 => 'September',
10 => 'October',
11 => 'November',
12 => 'December'
);
my %months_less_than_31 = ( April => 30,
June => 30,
September => 30,
November => 30,
February => 28
);
my $date = shift;
#Split on a space, set the year to 2007 if it wasn't defined
my $month = (split(/ /,$date))[0];
my $day = (split(/ /,$date))[1];
my $year = (split(/ /,$date))[2];
if ($year eq '') { $year=2007;}
if (!(exists($months_less_than_31{$month})) && $day<31) {
$day++;
return ($month,$day,$year);
}
elsif (exists(($months_less_than_31{$month})) && $day<($months_less_than_31{$month})) {
$day++;
return ($month,$day,$year);
}
elsif (!(exists($months_less_than_31{$month})) && $day==31) {
$day=1;
my $temp;
if ($month eq 'December') {$temp=1; $year=2007;}
else {
$temp=$months{$month};
$temp++;
}
$month=$number_to_month{$temp};
return ($month,$day,$year);
}
elsif (exists(($months_less_than_31{$month})) && $day==($months_less_than_31{$month})) {
$day=1;
my $temp;
if ($month eq 'December') {$temp=1; $year=2007;}
else {
$temp=$months{$month};
$temp++;
}
$month=$number_to_month{$temp};
return ($month,$day,$year);
}
}