Jump to content

User:Joe's Null Bot/source

From Wikipedia, the free encyclopedia
The printable version is no longer supported and may have rendering errors. Please update your browser bookmarks and please use the default browser print function instead.
use MediaWiki::API;

   # Gently pruned from the standard exclusion code to hardcode $user and $opt
   sub allowBots {
    my($text) = @_;

    my $user = "Joe's Null Bot";

    return 0 if $text =~ /{{[nN]obots}}/;
    return 1 if $text =~ /{{[bB]ots}}/;
    if($text =~ /{{[bB]ots\s*\|\s*allow\s*=\s*(.*?)\s*}}/s){
        return 1 if $1 eq 'all';
        return 0 if $1 eq 'none';
        my @bots = split(/\s*,\s*/, $1);
        return (grep $_ eq $user, @bots)?1:0;
    }
    if($text =~ /{{[bB]ots\s*\|\s*deny\s*=\s*(.*?)\s*}}/s){
        return 0 if $1 eq 'all';
        return 1 if $1 eq 'none';
        my @bots = split(/\s*,\s*/, $1);
        return (grep $_ eq $user, @bots)?0:1;
    }
    return 1;
  }

  # Have the bot check in to see if it's run past it's "expiration date", typically of 86400 seconds
  # (that is, one day).  Mostly here to avoid ten copies of the bot running if nothing can run for 
  # ten days.
  $epoch = time();
  $listcount =0;
  $purgecount = 0;

  sub check_expirations() {
    my $secs = time() - $epoch;
    if ($secs > 86400) {
       die "Bot expired of old age.\n";
    }
    if ($purgecount > 250) {
       die "This category is looking disturbingly large. Quitting.\n";
    }
  }

  # Within a single MediaWiki call, we ask the API to make up to 5 attempts, 10 s apart, until
  # the worst-case server lag is better than 5s. 
  my $mw = MediaWiki::API->new();
  $mw->{config}->{api_url} = 'http://en.wikipedia.org/w/api.php';

  # Delay/retry parameters
 
  $mw->{config}->{max_lag}         = 5;        # Tell MediaWiki to put us off it there's a 5s+ db lag out there
  $mw->{config}->{max_lag_delay}   = 10;  # ..and to wait 10s between retries
  $mw->{config}->{max_lag_retries} = 4;    # ..and to only make 4 retries before dropping back to our code

  # Our own delay parameters
  $standardelay      = 15;  # Wait 15s or more between purge calls....
  $longdelay         = 900;  # ...if the API puts us off several times in a row, take a 15-minute break

  my $articles = null;

  # login
  while (1) { 
    if ($mw->login( { lgname => "Joe's Null Bot", lgpassword => 'REDACTED' } )) {
      last;
    }

    check_expirations();

    if ($mw->{error}->{details} =~ /Server has reported lag above the configure/) {
      sleep $longdelay;   
    } else {
      die $mw->{error}->{code} . ': ' . $mw->{error}->{details};
    } 
  } 

  # Get list of articles
  while (1) {
    check_expirations();

    $articles = $mw->list ( {
       action => 'query',
       list => 'categorymembers',
       cmtitle => 'Category:BLP articles proposed for deletion by days left',
       cmlimit => 'max'} );  

    if ($articles) { last; }

    if ($mw->{error}->{details} =~ /Server has reported lag above the configure/) {
      sleep $longdelay;   
    } else {
      die $mw->{error}->{code} . ': ' . $mw->{error}->{details};
    } 
  }

  # scan through the articles...
  foreach (@{$articles}) {
      my $thistitle = $_->{title};
      $listcount++;

      print  "T: " . $thistitle . "\n";

      while (1) {
         check_expirations();

         my $pagehash = $mw->get_page( { title => $thistitle } );
         if ($pagehash) { last; }

         if ($mw->{error}->{details} =~ /Server has reported lag above the configure/) {
            sleep $longdelay;   
          } else {
            die $mw->{error}->{code} . ': ' . $mw->{error}->{details};
          } 
      }

      sleep $standardelay;     # There's no hurry!
     
      if (allowBots($pagehash->{'*'})) {       
        $purgecount++;

        while (1) {
             check_expirations();

             # …and purge each one 
             my $apires = $mw->api( {
                 action => 'purge', titles => $thistitle, forcelinkupdate => 1} );
                      
             if ($apires) { last; }

             if ($mw->{error}->{details} =~ /Server has reported lag above the configure/) {
                sleep $longdelay;   
             } else {
                die $mw->{error}->{code} . ': ' . $mw->{error}->{details};
             } 
         }
      } else {
        print "….DENIED\n";
      }
  }

  print $purgecount . " from a total list of " . $listcount . " articles in " . (time()-$epoch) . "seconds.\n";