Jump to content

User:ImageRemovalBot/removebot-badlinks.pl

From Wikipedia, the free encyclopedia
The printable version is no longer supported and may have rendering errors. Please update your browser bookmarks and please use the default browser print function instead.

ImageRemovalBot's code for removing invalid file links. Requires User:FairuseBot/Pearle.pm, User:FairuseBot/Pearle/WikiPage.pm and User:FairuseBot/libBot.pm.

#!/usr/bin/perl


# RemoveBot-badlinks
#
# A bot to remove bad image links (eg. 'File:http://' or 'File:C:\')

use strict;
use warnings;

use lib '/home/bot/perllib';

use libBot;

#exit;

my $test = 0;

my $homedir = '/home/bot/removebot';

Pearle::init("ImageRemovalBot", "<INSERT PASSWORD HERE>", "$homedir/removebot-badlinks.log","$homedir/cookies-badlinks.txt");
Pearle::config(nullOK => 1, printlevel => 4, loglevel => 2);
config(username => "ImageRemovalBot");

if(!Pearle::login())
{
	exit;
}

{
	my @articles;
	my $image;
	my $images_removed = 0;
	
	Pearle::myLog(2, "Beginning set at " . time() . "\n");

	# Get the list of pages with redlinked images
	if($test)
	{
		@articles = ("User:Carnildo/sandbox4", "Hussain Sagar");
	}
	else
	{
		@articles = Pearle::getCategoryArticles('Category:Articles with missing files');
	}

	if(scalar(@articles) == 0)
	{
		Pearle::myLog(2, "No files in category.\n");
		exit;
	}
			
	Pearle::myLog(4, join("\n", @articles) . "\n");
	Pearle::myLog(2, scalar(@articles) . " pages found\n");
	
	foreach my $article (@articles)
	{
		my $image_regex;
		
		my $full_comment = "";
		my $removal_prefix = undef;
		my $removal_comment = "Removing external link used as image";
		
		my @images = Pearle::getPageImages($article);
		@images = grep {$_ =~ /(?:File:Https?:\/\/|File:(?:C|D):\\)/} @images;
		if(scalar(@images) == 0)
		{
			Pearle::myLog(4, "Article $article has no URL images.\n");
			next;
		}
		Pearle::myLog(3, "External-linked images: ", join(", ", @images), "\n");
		
		foreach my $image (@images)
		{
			my ($raw_image) = $image =~ /(?:Image|File):(.*)/;
			$raw_image = MakeWikiRegex($raw_image);

			$image_regex = "[ _]*(?:[Ii][Mm][Aa][Gg][Ee]|[Ff][Ii][Ll][Ee])[ _]*:[ _]*${raw_image}[ _]*";

			# Sanity check
			if(!defined($raw_image) or $image !~ /$raw_image/)
			{
				botwarnlog("\n*Parse error on image [[:$image]] ($raw_image)");
				exit;
			}
			Pearle::myLog(3, "Image regex: $image_regex\n");
			
			eval
			{
				my $hits = 0;
				Pearle::myLog(3, "Page for removal: $article\n");
				if($hits = RemoveImageFromPage($image, $article, $image_regex, $removal_prefix, $removal_comment)) 	# Don't limit if we just touched the article
				{
					Pearle::myLog(2, "Removed external link from article $article ($hits times)\n");
					Pearle::limit();
				}
				$images_removed += $hits;
			};
			if($@)
			{
				if(925 == $@)
				{
					botwarnlog("\n*Page [[:$article]] is protected removing external link.");
				}
				else
				{
					die;
				}
			}
		}
	}
	Pearle::myLog(2, "Finished with set.  Removed $images_removed images.\n");
}