Jump to content

User:ImageRemovalBot/removebot.pl

From Wikipedia, the free encyclopedia
This is an old revision of this page, as edited by Carnildo (talk | contribs) at 06:46, 17 November 2008. The present address (URL) is a permanent link to this revision, which may differ significantly from the current revision.
(diff) ← Previous revision | Latest revision (diff) | Newer revision → (diff)

ImageRemovalBot's code. Requires User:FairuseBot/Pearle.pm and User:FairuseBot/libBot.pm. User:ImageRemovalBot/removebot-followup.pl is used to follow up on failed removals.

#!/usr/bin/perl


# RemoveBot
#
# A bot to remove deleted images from pages

use strict;
use warnings;

use lib '/home/mark/perllib';
use lib '/home/mark/Desktop/wikibots/common';
#use Date::Calc qw();
#use URI::Escape;
use Fcntl qw(:flock);

use libBot;

my $test = 1;

my $homedir = '/home/mark/Desktop/wikibots/removebot';
my $permit_interruptions = 0;	# Allow talkpage messages to stop the bot?

Pearle::init("ImageRemovalBot", "<INSERT PASSWORD HERE>", "$homedir/removebot.log","$homedir/cookies.txt");
Pearle::config(nullOK => 1);
config(username => "ImageRemovalBot");

if(!Pearle::login())
{
	exit;
}

# Check for a running copy
if(-e "$homedir/pid")
{
	# Possible other copy.  Compare PIDs
	open PIDFILE, "<", "$homedir/pid";
	my $pid = <PIDFILE>;
	close PIDFILE;

	my $psresult = `ps -p $pid`;
	if($psresult =~ /removebot.pl/)
	{
		botwarnlog("*Previous run is taking longer than normal\n");
		exit;
	}
}

open PIDFILE, ">", "$homedir/pid";
print PIDFILE $$;
close PIDFILE;

# Get the last log entry processed
my ($last_date);
if(-e "$homedir/lastfile.log")
{
	open INFILE, "<", "$homedir/lastfile.log";
	$last_date = <INFILE>;
	close INFILE;
	chomp $last_date;
}
else
{
	$last_date = "1970-01-01T00:00:01Z";	# Beginning of time
}

my $total_images = 0;
my $total_processed = 0;
my $i = 1;

#for($i = 1; $i <= 100; $i++)
{
	my @images;
	my $image;
	my $images_removed = 0;
	
	@images = ();
	
	Pearle::myLog(2, "Beginning set at " . time() . "\n");

	# Get the log
	my @articles;
	if($test)
	{
		@articles = undef;
		open INFILE, "<", "images2.txt";
		@images = <INFILE>;
		chomp @images;
	}
	else
	{
		@articles = Pearle::getLogArticles(log => 'delete', limit => 500, time => $last_date, dir => 'newer');
		foreach my $item (@articles)
		{
			push @images, $item->[0] if($item->[0] =~ /^image:/i);
		}
	}
		
	if($test)
	{
		$last_date = undef;
	}
	else
	{
		$last_date = $articles[0]->[3];
	}
	
	print join "\n", @images;
	print "\n", scalar(@images), " images found\n";
	$total_processed += scalar(@images);
	
	# Process for deleted images
	
	if(scalar(@images) == 0)
	{
		Pearle::myLog(1, "*No images in log\n");
	}

	foreach $image (@images)
	{
		my $image_url;
		my $image_regex = $image;
		my $page;
		my @pages = ();
		my ($day, $month, $year);
		# Fetch image info
		my $image_data = Pearle::APIQuery(titles => [$image], prop => 'imageinfo', meta => 'userinfo', uiprop => ['hasmsg'], 			# Basic data
		                                  list => 'imageusage', iutitle => $image, iunamespace => [0, 10, 12, 14, 100], iulimit => 500);	# Image usage

		my $full_comment = "";
		my $removal_prefix = "Missing image removed:";
		my $removal_comment = "Removing missing image";

		if($permit_interruptions and DoIHaveMessages($image_data))
		{
			Pearle::myLog(0, "Talkpage message found; exiting on image $image.\n");
			exit;
		}
		
		# Verify the image is still deleted
		if($image_data !~ /missing=""/)
		{
			Pearle::myLog(2, "*Image [[:$image]] has been re-uploaded.\n");
			next;
		}

		# Images from Commons.  May have been masked by the deleted version.
		if($image_data =~ /imagerepository="shared"/)
		{
			Pearle::myLog(2, "*Commons image [[:$image]] found\n");
			next;
		}
		
		@pages = GetPageList($image_data);

		if(scalar(@pages) == 0)
		{
			notelog("Image $image is already orphaned\n");
			next;
		}
		
		my ($raw_image) = $image =~ /Image:(.*)/;
		$raw_image = MakeWikiRegex($raw_image);
		if($image !~ /(\.jpg|\.jpeg|\.png|\.gif|\.svg)$/i and $image !~ /^http:\/\//i)
		{
			$image_regex = "[ _]*(:?[Ii][Mm][Aa][Gg][Ee]|[Mm][Ee][Dd][Ii][Aa])[ _]*:[ _]*${raw_image}[ _]*";
			Pearle::myLog(2, "*Non-image media file [[:$image]] found.\n");
		}
		else
		{
			$image_regex = "[ _]*[Ii][Mm][Aa][Gg][Ee][ _]*:[ _]*${raw_image}[ _]*";
		}
		
		# Sanity check
		if(!defined($raw_image) or $image !~ /$raw_image/)
		{
			botwarnlog("*Parse error on image [[:$image]] ($raw_image)\n");
			next;
		}
		Pearle::myLog(3, "Image regex: $image_regex\n");
		
		if($image !~ /(\.jpg|\.jpeg|\.png|\.gif|\.svg|)$/i and $image !~ /^http:\/\//i)
		{
			my $page_list = "( ";
			foreach my $page_entry (@pages)
			{
				$page_list .= "[[:$page_entry]] ";
			}
			$page_list .= ")";
			wikilog("User talk:ImageRemovalBot/media", "*[[:$image]] $page_list\n");
			next;	# Non-image files are too hard to work with
		}

		my $parsed_removal_comment = $removal_comment;
		$parsed_removal_comment =~ s/image/[[:$image|image]]/;
 		foreach $page (@pages)
		{
			my $hits = 0;
			notelog("Page for removal: $page\n");
			if($hits = RemoveImageFromPage($image, $page, $image_regex, $removal_prefix, $parsed_removal_comment)) 	# Don't limit if we just touched the article
			{
				Pearle::myLog(2, "Removed image $image from article $page ($hits times)\n");
				Pearle::limit();
			}
			$images_removed += $hits;
		}
				
		# Verify removal
		# Portal removal is too hard to get correct, and we don't really care about it.
		# Template removal isn't possible, and the template usage has already been logged.
		$image_data = Pearle::APIQuery(list => 'imageusage', iutitle => $image, iunamespace => [0, 12, 14], iulimit => 500);
		@pages = GetPageList($image_data);

		if(scalar(@pages) != 0)
		{
		#	botwarnlog("*Unable to remove all instances of [[:$image]]\n");
			Pearle::myLog(2, "*Unable to remove all instances of [[:$image]], adding to followup log\n");
			open OUTFILE, ">>", "followup.log";
			flock OUTFILE, LOCK_EX;
			my $date = time;
			print OUTFILE "$date $image\n";
			flock OUTFILE, LOCK_UN;
			close OUTFILE;
		}

		print "Images removed: $images_removed\n";
	}
	Pearle::myLog(2, "Finished with set.  Removed $images_removed images.\n");
	$total_images += $images_removed;

	# Record the last log entry processed
	if(!$test)
	{
		open OUTFILE, ">", "$homedir/lastfile.log";
		print OUTFILE "$last_date\n";
		print "$last_date\n";
		close OUTFILE;
	}
}

#print "Finished.  Total $total_images removed, $total_processed processed.\n";

unlink "$homedir/pid"