User:ImageRemovalBot/removebot.pl
Appearance
ImageRemovalBot's code. Requires User:FairuseBot/Pearle.pm and User:FairuseBot/libBot.pm. User:ImageRemovalBot/removebot-followup.pl is used to follow up on failed removals.
#!/usr/bin/perl # RemoveBot # # A bot to remove deleted images from pages use strict; use warnings; use lib '/home/mark/perllib'; use lib '/home/mark/Desktop/wikibots/common'; #use Date::Calc qw(); #use URI::Escape; use Fcntl qw(:flock); use libBot; my $test = 1; my $homedir = '/home/mark/Desktop/wikibots/removebot'; my $permit_interruptions = 0; # Allow talkpage messages to stop the bot? Pearle::init("ImageRemovalBot", "<INSERT PASSWORD HERE>", "$homedir/removebot.log","$homedir/cookies.txt"); Pearle::config(nullOK => 1); config(username => "ImageRemovalBot"); if(!Pearle::login()) { exit; } # Check for a running copy if(-e "$homedir/pid") { # Possible other copy. Compare PIDs open PIDFILE, "<", "$homedir/pid"; my $pid = <PIDFILE>; close PIDFILE; my $psresult = `ps -p $pid`; if($psresult =~ /removebot.pl/) { botwarnlog("*Previous run is taking longer than normal\n"); exit; } } open PIDFILE, ">", "$homedir/pid"; print PIDFILE $$; close PIDFILE; # Get the last log entry processed my ($last_date); if(-e "$homedir/lastfile.log") { open INFILE, "<", "$homedir/lastfile.log"; $last_date = <INFILE>; close INFILE; chomp $last_date; } else { $last_date = "1970-01-01T00:00:01Z"; # Beginning of time } my $total_images = 0; my $total_processed = 0; my $i = 1; #for($i = 1; $i <= 100; $i++) { my @images; my $image; my $images_removed = 0; @images = (); Pearle::myLog(2, "Beginning set at " . time() . "\n"); # Get the log my @articles; if($test) { @articles = undef; open INFILE, "<", "images2.txt"; @images = <INFILE>; chomp @images; } else { @articles = Pearle::getLogArticles(log => 'delete', limit => 500, time => $last_date, dir => 'newer'); foreach my $item (@articles) { push @images, $item->[0] if($item->[0] =~ /^image:/i); } } if($test) { $last_date = undef; } else { $last_date = $articles[0]->[3]; } print join "\n", @images; print "\n", scalar(@images), " images found\n"; $total_processed += scalar(@images); # Process for deleted images if(scalar(@images) == 0) { Pearle::myLog(1, "*No images in log\n"); } foreach $image (@images) { my $image_url; my $image_regex = $image; my $page; my @pages = (); my ($day, $month, $year); # Fetch image info my $image_data = Pearle::APIQuery(titles => [$image], prop => 'imageinfo', meta => 'userinfo', uiprop => ['hasmsg'], # Basic data list => 'imageusage', iutitle => $image, iunamespace => [0, 10, 12, 14, 100], iulimit => 500); # Image usage my $full_comment = ""; my $removal_prefix = "Missing image removed:"; my $removal_comment = "Removing missing image"; if($permit_interruptions and DoIHaveMessages($image_data)) { Pearle::myLog(0, "Talkpage message found; exiting on image $image.\n"); exit; } # Verify the image is still deleted if($image_data !~ /missing=""/) { Pearle::myLog(2, "*Image [[:$image]] has been re-uploaded.\n"); next; } # Images from Commons. May have been masked by the deleted version. if($image_data =~ /imagerepository="shared"/) { Pearle::myLog(2, "*Commons image [[:$image]] found\n"); next; } @pages = GetPageList($image_data); if(scalar(@pages) == 0) { notelog("Image $image is already orphaned\n"); next; } my ($raw_image) = $image =~ /Image:(.*)/; $raw_image = MakeWikiRegex($raw_image); if($image !~ /(\.jpg|\.jpeg|\.png|\.gif|\.svg)$/i and $image !~ /^http:\/\//i) { $image_regex = "[ _]*(:?[Ii][Mm][Aa][Gg][Ee]|[Mm][Ee][Dd][Ii][Aa])[ _]*:[ _]*${raw_image}[ _]*"; Pearle::myLog(2, "*Non-image media file [[:$image]] found.\n"); } else { $image_regex = "[ _]*[Ii][Mm][Aa][Gg][Ee][ _]*:[ _]*${raw_image}[ _]*"; } # Sanity check if(!defined($raw_image) or $image !~ /$raw_image/) { botwarnlog("*Parse error on image [[:$image]] ($raw_image)\n"); next; } Pearle::myLog(3, "Image regex: $image_regex\n"); if($image !~ /(\.jpg|\.jpeg|\.png|\.gif|\.svg|)$/i and $image !~ /^http:\/\//i) { my $page_list = "( "; foreach my $page_entry (@pages) { $page_list .= "[[:$page_entry]] "; } $page_list .= ")"; wikilog("User talk:ImageRemovalBot/media", "*[[:$image]] $page_list\n"); next; # Non-image files are too hard to work with } my $parsed_removal_comment = $removal_comment; $parsed_removal_comment =~ s/image/[[:$image|image]]/; foreach $page (@pages) { my $hits = 0; notelog("Page for removal: $page\n"); if($hits = RemoveImageFromPage($image, $page, $image_regex, $removal_prefix, $parsed_removal_comment)) # Don't limit if we just touched the article { Pearle::myLog(2, "Removed image $image from article $page ($hits times)\n"); Pearle::limit(); } $images_removed += $hits; } # Verify removal # Portal removal is too hard to get correct, and we don't really care about it. # Template removal isn't possible, and the template usage has already been logged. $image_data = Pearle::APIQuery(list => 'imageusage', iutitle => $image, iunamespace => [0, 12, 14], iulimit => 500); @pages = GetPageList($image_data); if(scalar(@pages) != 0) { # botwarnlog("*Unable to remove all instances of [[:$image]]\n"); Pearle::myLog(2, "*Unable to remove all instances of [[:$image]], adding to followup log\n"); open OUTFILE, ">>", "followup.log"; flock OUTFILE, LOCK_EX; my $date = time; print OUTFILE "$date $image\n"; flock OUTFILE, LOCK_UN; close OUTFILE; } print "Images removed: $images_removed\n"; } Pearle::myLog(2, "Finished with set. Removed $images_removed images.\n"); $total_images += $images_removed; # Record the last log entry processed if(!$test) { open OUTFILE, ">", "$homedir/lastfile.log"; print OUTFILE "$last_date\n"; print "$last_date\n"; close OUTFILE; } } #print "Finished. Total $total_images removed, $total_processed processed.\n"; unlink "$homedir/pid"