User:DustaBot/source
Appearance
Between the powerful modules LWP::Simple, XML::Simple and MediaWiki and the mediawiki API, this script is able to do a hell of a lot in only 84 lines.
#!/usr/bin/perl
use strict;
use LWP::Simple;
use XML::Simple;
use URI::Escape;
use MediaWiki;
my $c = login();
my $cat_list_url = 'http://en.wikipedia.org/w/api.php?action=query&list=categorymembers&cmprop=title&cmlimit=20&cmtitle=Category:Wikipedian%20usernames%20editors%20have%20expressed%20concern%20over&cmsort=timestamp&cmdir=asc<CMSTART>&format=xml';
my $user_blocked_url = 'http://en.wikipedia.org/w/api.php?action=query&list=users&ususers=<USERLIST>&usprop=blockinfo&format=xml';
my $cmstart = '';
my $done = 0;
my $count;
until ( $done ) {
my $url = $cat_list_url;
$cmstart = "&cmstart=$cmstart" if ($cmstart);
$url =~ s|<CMSTART>|$cmstart|;
print "Fetching a page of 20 users in the category...\n";
my $xml = XMLin(get($url));
$cmstart = ${$xml}{'query-continue'}{'categorymembers'}{'cmstart'};
my $ra_userlist = ${$xml}{'query'}{'categorymembers'}{'cm'};
my $user_list;
foreach my $rh_user (@{$ra_userlist}) {
${$rh_user}{'title'} =~ m|User( talk:)?(.*)$| || die ${$rh_user}{'title'};
$user_list .= uri_escape($2).'|'
}
chop($user_list);
my $url = $user_blocked_url;
$url =~ s|<USERLIST>|$user_list|;
print "Checking if they are blocked...\n\n";
my $xml = XMLin(get($url));
my $rh_userlist = ${$xml}{'query'}{'users'}{'user'};
foreach my $userkey (keys(%{$rh_userlist})) {
my $rh_userdata = ${$rh_userlist}{$userkey};
if (${$rh_userlist}{$userkey}{blockreason}) {
my $summary = 'Removing cat, user blocked by [[User:'.${$rh_userlist}{$userkey}{blockedby}.']] with the reason: '.${$rh_userlist}{$userkey}{blockreason};
remove_cat_from_page($userkey,$summary);
}
}
$done = 1 unless ($cmstart);
$count++;
}
print "Read $count pages\n";
sub remove_cat_from_page {
my ($title,$summary) = @_;
my (@prefixes) = ('User talk:', 'User:');
foreach my $prefix (@prefixes) {
my $page_name = "$prefix$title";
print "Downloading: $page_name...\n";
my $page = $c->get($page_name, 'rw') || die;
if ($page->{'content'} =~ s/\[\[Category:(Wikipedia usernames with possible policy issues|Usernames editors have expressed concern over).*?\]\]//ig) {
print "Succeeded in removing at least one instance of the category: $1\n";
print "Saving...\n";
$page->{'summary'} = $summary;
$page->save();
print "Done.\n\n";
return 1
} else {
print "I did not see the category on that page...\n\n";
}
}
return 0;
}
sub login {
open(USER,'username');sysread(USER, my $username, -s(USER));close(USER);
open(PASS,'password');sysread(PASS, my $password, -s(PASS));close(PASS);
warn "Connecting to Wikipedia...\n";
my $c = MediaWiki->new;
$c->setup
({
'bot' => {'user' => $username,'pass' => $password},
'wiki' => {'host' => 'en.wikipedia.org','path' => 'w'}
}) || warn "Failed to log in\n";
my $whoami = $c->user();
warn "$whoami connected\n\n";
return $c;
}