Jump to content

User:VWBot/source

From Wikipedia, the free encyclopedia
The printable version is no longer supported and may have rendering errors. Please update your browser bookmarks and please use the default browser print function instead.
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import difflib
import simplejson as json # safely retrieve json objects (and correctly handle '/' in article titles)
import pickle # save arrays in files
import re
#import string # string.atoi - variable wait when lagged
import sys # read/write files
import time # what day is it?
import urllib # read/write websites



null = 0 
cj = None
ClientCookie = None
cookielib = None
try:
    import cookielib            
except ImportError:
    pass
else:
    import urllib2    
    urlopen = urllib2.urlopen
    cj = cookielib.LWPCookieJar()
    Request = urllib2.Request

if not cookielib:
    try:                                            
        import ClientCookie 
    except ImportError:
        import urllib2
        urlopen = urllib2.urlopen
        Request = urllib2.Request
    else:
        urlopen = ClientCookie.urlopen
        cj = ClientCookie.LWPCookieJar()
        Request = ClientCookie.Request

if cj != None:
    if cookielib:
        opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
        urllib2.install_opener(opener)
    else:
        opener = ClientCookie.build_opener(ClientCookie.HTTPCookieProcessor(cj))
        ClientCookie.install_opener(opener)

### LOWER-LEVEL URL INTERFACE ###
def act (txdata, url = 'http://en.wikipedia.org/w/api.php', txheaders = {'User-agent' : 'VWBot'}):
	while True: # Loop so that it will continue to retry until it connects to the server, handles error occasionally thrown by server
		try:
			req = Request(url, txdata, txheaders)
			handle = urlopen(req)
		except IOError, e:
			#print 'We failed to open "%s".' % url
			#if hasattr(e, 'code'):
			#	print 'We failed with error code - %s.' % e.code
			#elif hasattr(e, 'reason'):
			#	print "The error object has the following 'reason' attribute :", e.reason
			#	print "This usually means the server doesn't exist, is down, or we don't have an internet connection."
				time.sleep(5)
		else:
			return handle.read()

# handle.info() returns headers, handle.read() returns the page, handle.geturl() returns the true url of the page fetched (in case urlopen has followed any redirects)


### THIS DOES NOT ACCOUNT FOR QUERY-CONTINUE RESULTS, THESE MUST BE RE-QUERIED LATER
def action (params):
	if 'url' in params:
		url = params['url']
		del params['url']
	else: url = 'http://en.wikipedia.org/w/api.php'
	while True: # Loop so that it passes all of the errors
		params['format'] = 'json'
		# An appropriate non-aggressive value is maxlag=5 (5 seconds), used by most of the server-side scripts.
		# Higher values mean more aggressive behaviour, lower values are nicer. 
		#params['maxlag'] = 2 - impractical due to number 
		params['assert'] = 'bot'
		# If we're trying to make an edit, get an edit token first and set the timestamps to recognize an edit conflict.
		if params['action'] == 'edit':
			page = action({'action': 'query', 'prop': 'info|revisions', 'intoken': 'edit', 'titles': params['title']})
			params['token'] = page['query']['pages'][page['query']['pages'].keys()[0]]['edittoken']
			params['starttimestamp'] = page['query']['pages'][page['query']['pages'].keys()[0]]['starttimestamp']
			if 'revisions' in page['query']['pages'][page['query']['pages'].keys()[0]].keys():
				# page exists
				params['basetimestamp'] = page['query']['pages'][page['query']['pages'].keys()[0]]['revisions'][0]['timestamp']
			else:
				# page doesn't exist
				params['basetimestamp'] = params['starttimestamp']
		page = json.loads(act(urllib.urlencode(params), url))
		# log reply
		file = open(time.strftime('log %Y-%m-%d.txt', time.gmtime()), 'a')
		file.write(time.asctime(time.gmtime()) + '\t' + str(page) + '\n\n')
		file.close()
		# make sure we got a result
		if params['action'] in page.keys()[0]:
			# if 'continue' in params['action']:
			if params['action'] == 'edit': time.sleep(5)
			return page
		if page['error']['code'] == 'emptynewsection':
			return page
		# We've lagged: wait the duration of the lag (or a minimum of 5 seconds) and try again
		#if page['error']['code'] == 'maxlag':
		#	time.sleep(max(5,string.atoi(page['error']['info'][page['error']['info'].find(':')+2:page['error']['info'].find('seconds')-1])))
		# We've hit an edit conflict or some other unknown error.
		time.sleep(5)

#######################
##### @ 00:00 GMT #####
#######################
startTime = time.time()

##### 2-STEP LOGIN #####
def login():
	page = action({'action': 'login', 'lgname': foo, 'lgpassword': bar})
	page = action({'action': 'login', 'lgname': foo, 'lgpassword': bar, 'lgtoken': page['login']['token']})
	if page['login']['result'] == 'Throttled':
		time.sleep(page['login']['wait'])
		login()

login()

##### TASK 1 #####
# TASK 2 - backlogSCV()
page = action({'action': 'edit', 'bot': 1, 'title': 'Wikipedia:Suspected copyright violations', 'appendtext': time.strftime('\n{{/%Y-%m-%d}}', time.gmtime()), 'section': 0, 'summary': time.strftime('Adding new listing for %-d %B %Y ([[WP:BOT|bot]]) ([[User:VernoWhitney|op]])', time.gmtime())})


##### TASK 3 #####
page = action({'action': 'query', 'list': 'categorymembers', 'cmtitle': 'Category:Articles tagged for copyright problems', 'cmlimit': 'max'})
blankedPages = []
for i in page['query']['categorymembers']:
	blankedPages.append(i['title'])

file = open('todayLogCopyvio', 'rb') # pull up the previous run
alreadyBlankedPages = pickle.load(file)
file.close()
file = open('yesterdayLogCopyvio', 'wb') # overwrite yesterday's log with today's now that we have the change in articles
pickle.dump(alreadyBlankedPages, file)
file.close()
file = open('todayLogCopyvio', 'wb') # save log so it can be compared to the next run
pickle.dump(blankedPages, file)
file.close()
newBlankedPages = []
for x in blankedPages:
	if x not in alreadyBlankedPages:
		newBlankedPages.append(x) # now we have our list to run searches for

for i in newBlankedPages:
	if i[:5] == 'File:':
		newBlankedPages.remove(i) # also need to report elsewhere - list at [[WP:PUF?]]

##### TASK 5 #####
page = action({'action': 'query', 'list': 'embeddedin', 'eititle': 'Template:Close paraphrasing', 'eilimit': 'max'})
closeParaphrases = []
for i in page['query']['embeddedin']:
	closeParaphrases.append(i['title'])

file = open('todayLogCloseParaphrasing', 'rb') # pull up the previous run
oldCloseParaphrases = pickle.load(file)
file.close()
file = open('yesterdayLogCloseParaphrasing', 'wb') # overwrite yesterday's log with today's now that we have the change in articles
pickle.dump(oldCloseParaphrases, file)
file.close()
file = open('todayLogCloseParaphrasing', 'wb') # save log so it can be compared to the next run
pickle.dump(closeParaphrases, file)
file.close()

newCloseParaphrases = []
for x in closeParaphrases:
	if x not in oldCloseParaphrases:
		newCloseParaphrases.append(x) # now we have our list to run searches for

##### TASK 10 #####
page = action({'action': 'query', 'list': 'embeddedin', 'eititle': 'Template:Copypaste', 'eilimit': 'max'})
copyPastes = []
for i in page['query']['embeddedin']:
	copyPastes.append(i['title'])

file = open('todayLogCopypaste', 'rb') # pull up the previous run
oldCopyPastes = pickle.load(file)
file.close()
file = open('yesterdayLogCopypaste', 'wb') # overwrite yesterday's log with today's
pickle.dump(oldCopyPastes, file)
file.close()
file = open('todayLogCopypaste', 'wb') # save the new log so it can be compared to the next run tomorrow
pickle.dump(copyPastes, file)
file.close()

newCopyPastes = []
for x in copyPastes:
	if x not in oldCopyPastes:
		newCopyPastes.append(x) # now we have our list to run searches for

#######################
##### @ 00:10 GMT #####
#######################
while time.time() - startTime < 600: # no earlier than 00:10 GMT
	time.sleep(600 - (time.time() - startTime))

# always update NewListings - this is only needed so Zorglbot doesn't screw up; has no actual effect
page = action({'action': 'edit', 'bot': 1, 'title': 'Wikipedia:Copyright problems/NewListings', 'text': time.strftime('{{Wikipedia:Copyright problems/{{#time:Y F j|-7 day}}}}\n{{Wikipedia:Copyright problems/{{#time:Y F j|-6 day}}}}\n{{Wikipedia:Copyright problems/{{#time:Y F j|-5 day}}}}\n{{Wikipedia:Copyright problems/{{#time:Y F j|-4 day}}}}\n{{Wikipedia:Copyright problems/{{#time:Y F j|-3 day}}}}\n{{Wikipedia:Copyright problems/{{#time:Y F j|-2 day}}}}\n{{Wikipedia:Copyright problems/{{#time:Y F j|-1 day}}}}\n{{Wikipedia:Copyright problems/{{#time:Y F j}}}}<!--\n{{Wikipedia:Copyright problems/%Y %B %-d}}\n', time.gmtime(time.time()-60*60*168)) + time.strftime('{{Wikipedia:Copyright problems/%Y %B %-d}}\n', time.gmtime(time.time()-60*60*144)) + time.strftime('{{Wikipedia:Copyright problems/%Y %B %-d}}\n', time.gmtime(time.time()-60*60*120)) + time.strftime('{{Wikipedia:Copyright problems/%Y %B %-d}}\n', time.gmtime(time.time()-60*60*96)) + time.strftime('{{Wikipedia:Copyright problems/%Y %B %-d}}\n', time.gmtime(time.time()-60*60*72)) + time.strftime('{{Wikipedia:Copyright problems/%Y %B %-d}}\n', time.gmtime(time.time()-60*60*48)) + time.strftime('{{Wikipedia:Copyright problems/%Y %B %-d}}\n', time.gmtime(time.time()-60*60*24)) + time.strftime('{{Wikipedia:Copyright problems/%Y %B %-d}}', time.gmtime()), 'summary': time.strftime('Automatic addition of new listing for %-d %B %Y and archiving of listings older than 7 days ([[WP:BOT|bot]]) ([[User:VernoWhitney|op]])', time.gmtime())})

#######################
##### @ 00:20 GMT #####
#######################
while time.time() - startTime < 1200: # no earlier than 00:20 GMT
	time.sleep(1200 - (time.time() - startTime))

##### TASK 3 #####
p3 = re.compile('<!-- This is Cppage. Comment used by User:DumbBOT, do not remove  or change -->')
p4 = re.compile('====.*====')

page = action({'action': 'query', 'prop': 'revisions', 'rvprop': 'content', 'titles': time.strftime('Wikipedia:Copyright problems/%Y %B %-d', time.gmtime()), 'rvlimit': 1})

# group new page creation AND old page archival
if 'missing' in page['query']['pages'][page['query']['pages'].keys()[0]]:
	# CREATE AND POPULATE "BOT: Automatic creation of new daily page for copyright problems"
	page = action({'action': 'edit', 'bot': 1, 'title': time.strftime('Wikipedia:Copyright problems/%Y %B %-d', time.gmtime()), 'text': '{{subst:Cppage}}\n<!-- Add new listings at the bottom of the list with the following format:\n\n* {{subst:article-cv|ArticleName}} from [http://www.WhereItWasCopiedFrom.com]. ~~~~\n\n-->\n', 'summary': 'Automatic creation of new daily page for copyright problems including automated findings ([[WP:BOT|bot]]) ([[User:VernoWhitney|op]])'})
	page = action({'action': 'query', 'prop': 'revisions', 'rvprop': 'content', 'titles': 'Wikipedia:Copyright problems', 'rvlimit': 1})
	newtext = page['query']['pages'][page['query']['pages'].keys()[0]]['revisions'][0]['*'].replace('\n\n===New listings===', time.strftime('\n{{Wikipedia:Copyright problems/%Y %B %-d}}\n\n===New listings===', time.gmtime(time.time()-60*60*192)))
	page = action({'action': 'edit', 'bot': 1, 'title': 'Wikipedia:Copyright problems', 'text': newtext.encode('utf-8'), 'summary': 'Automatic archiving of listings older than 7 days ([[WP:BOT|bot]]) ([[User:VernoWhitney|op]])'})
elif not re.search(p3, page['query']['pages'][page['query']['pages'].keys()[0]]['revisions'][0]['*']):
	# POPULATE "adding CorenSearchBot findings"
	page = action({'action': 'edit', 'bot': 1, 'title': time.strftime('Wikipedia:Copyright problems/%Y %B %-d', time.gmtime()), 'text': page['query']['pages'][page['query']['pages'].keys()[0]]['revisions'][0]['*'].replace(re.search(p4, page['query']['pages'][page['query']['pages'].keys()[0]]['revisions'][0]['*']).group(),'{{subst:Cppage}}'), 'summary': 'Adding automated findings ([[WP:BOT|bot]]) ([[User:VernoWhitney|op]])'})

##### TASKS 3, 5, 7 and 10 #####
def isAlreadyListed(title):
	page = action({'action': 'query', 'list': 'backlinks', 'bltitle': title.encode('utf-8'), 'bllimit': 'max', 'blfilterredir': 'redirects'})
	page['query']['backlinks'].append({'title': title})
	for i in page['query']['backlinks']:
		page = action({'action': 'query', 'list': 'backlinks', 'bltitle': i['title'].encode('utf-8'), 'bllimit': 'max', 'blnamespace': '4'})
		for j in page['query']['backlinks']:
			if 'Wikipedia:Copyright problems' == j['title'] or 'Wikipedia:Suspected copyright violations' == j['title'] or 'Wikipedia:Copyright problems/NewListings' == j['title']:
				return True
	return False

# replace NewListings check with one for each of the 8 always-listed days ???

def shouldBeRelisted(title):
	page = action({'action': 'query', 'list': 'backlinks', 'bltitle': title.encode('utf-8'), 'bllimit': 'max', 'blfilterredir': 'redirects'})
	page['query']['backlinks'].append({'title': title})
	wasListed = False
	isListed = False
	for i in page['query']['backlinks']:
		page = action({'action': 'query', 'list': 'backlinks', 'bltitle': i['title'].encode('utf-8'), 'bllimit': 'max', 'blnamespace': '4'})
		for j in page['query']['backlinks']:
			if 'Wikipedia:Suspected copyright violations/' in j['title'] or 'Wikipedia:Copyright problems/' in j['title']:
				wasListed = True
			if 'Wikipedia:Copyright problems' == j['title'] or 'Wikipedia:Suspected copyright violations' == j['title'] or 'Wikipedia:Copyright problems/NewListings' == j['title']:
				isListed = True
	if wasListed and not isListed: return True
	return False

# replace NewListings check with one for each of the 8 always-listed days ???

addtext = ''
p0 = re.compile('{{Close paraphras.*?}}', re.IGNORECASE | re.DOTALL)
p1 = re.compile('{{Close paraphras.*?source.*?}}', re.IGNORECASE | re.DOTALL) # gets {{Close paraphrase}} and {{Close paraphrasing}}
p1a = re.compile('\|\W*free\W*=\W*yes', re.IGNORECASE | re.DOTALL) # is source free?

for i in newCloseParaphrases:
	if not isAlreadyListed(i):
		page = action({'action': 'query', 'prop': 'revisions', 'rvprop': 'content', 'titles': i.encode('utf-8'), 'rvlimit': 1})
		if 'missing' not in page['query']['pages'][page['query']['pages'].keys()[0]]:
			pageSource = page['query']['pages'][page['query']['pages'].keys()[0]]['revisions'][0]['*']
			if re.search(p0, pageSource): # could be tag removed before it's analyzed
				temp = re.search(p0, pageSource).group()
				tag = re.search(p1, temp)
				if not re.search(p1a, temp): # only list at WP:CP if non-free
					if tag:
						if '|' in tag.group()[tag.group().find('source') + tag.group()[tag.group().find('source'):].find('='):]:
							addtext += '* {{subst:article-cv|:' + i + '}} Close paraphrase of ' + tag.group()[tag.group().find('source') +\
							tag.group()[tag.group().find('source'):].find('=') + 1:tag.group().find('source') + tag.group()[tag.group().find('source'):].find('=') +\
							tag.group()[tag.group().find('source') + tag.group()[tag.group().find('source'):].find('='):].find('|')].strip() + '. ~~~~\n'
						else:
							addtext += '* {{subst:article-cv|:' + i + '}} Close paraphrase of ' +\
							tag.group()[tag.group().find('source') + tag.group()[tag.group().find('source'):].find('=') + 1:-2].strip() + '. ~~~~\n'
					else: addtext += '* {{subst:article-cv|:' + i + '}} Close paraphrase. ~~~~\n'

moretext = ''
p2 = re.compile('{{Copyviocore.*?}}', re.IGNORECASE | re.DOTALL)

for i in newBlankedPages:
	if not isAlreadyListed(i):
		page = action({'action': 'query', 'prop': 'revisions', 'rvprop': 'content', 'titles': i.encode('utf-8'), 'rvlimit': 1})
		if 'missing' not in page['query']['pages'][page['query']['pages'].keys()[0]]:
			pageSource = page['query']['pages'][page['query']['pages'].keys()[0]]['revisions'][0]['*']
			tag = re.search(p2, pageSource)
			if tag:
				if '|' in tag.group()[tag.group().find('url') + tag.group()[tag.group().find('url'):].find('='):]:
					moretext += '* {{subst:article-cv|:' + i + '}} from ' + tag.group()[tag.group().find('url') +\
					tag.group()[tag.group().find('url'):].find('=') + 1:tag.group().find('url') + tag.group()[tag.group().find('url'):].find('=') +\
					tag.group()[tag.group().find('url') + tag.group()[tag.group().find('url'):].find('='):].find('|')].strip() + '. Nomination completed by ~~~~\n'
				else:
					moretext += '* {{subst:article-cv|:' + i + '}} from ' +\
					tag.group()[tag.group().find('url') + tag.group()[tag.group().find('source'):].find('=') + 1:-2].strip() + '. Nomination completed by ~~~~\n'
			else: moretext += '* {{subst:article-cv|:' + i + '}} Nomination completed by ~~~~\n'


CopyPasteText = ''
p5 = re.compile('{{Copy.?past.*?}}|{{Copy\s*\|.*?}}|{{Copy\s*}}', re.IGNORECASE | re.DOTALL)
p6 = re.compile('{{Copy.?past.*?url.*?}}|{{Copy\s*\|.*?url.*?}}', re.IGNORECASE | re.DOTALL)

for i in newCopyPastes:
	if not isAlreadyListed(i):
		page = action({'action': 'query', 'prop': 'revisions', 'rvprop': 'content', 'titles': i.encode('utf-8'), 'rvlimit': 1})
		if 'missing' not in page['query']['pages'][page['query']['pages'].keys()[0]]:
			pageSource = page['query']['pages'][page['query']['pages'].keys()[0]]['revisions'][0]['*']
			if re.search(p5, pageSource): # could be tag removed before it's analyzed
				temp = re.search(p5, pageSource).group()
				tag = re.search(p6, temp)
				if tag:
					if '|' in tag.group()[tag.group().find('url') + tag.group()[tag.group().find('url'):].find('='):]:
						CopyPasteText += '* {{subst:article-cv|:' + i + '}} Copied and pasted from ' + tag.group()[tag.group().find('url') +\
						tag.group()[tag.group().find('url'):].find('=') + 1:tag.group().find('url') + tag.group()[tag.group().find('url'):].find('=') +\
						tag.group()[tag.group().find('url') + tag.group()[tag.group().find('url'):].find('='):].find('|')].strip() + '. ~~~~\n'
					else:
						CopyPasteText += '* {{subst:article-cv|:' + i + '}} Copied and pasted from ' +\
						tag.group()[tag.group().find('url') + tag.group()[tag.group().find('url'):].find('=') + 1:-2].strip() + '. ~~~~\n'
				else: CopyPasteText += '* {{subst:article-cv|:' + i + '}} Copied and pasted. ~~~~\n'

### NOW FOR THE RELISTINGS ###
evenmoretext = ''
for i in blankedPages:
	if i in alreadyBlankedPages and shouldBeRelisted(i): # need to check alreadyBlankedPages as there is a delay between transclusion and backlinks
		page = action({'action': 'query', 'prop': 'revisions', 'rvprop': 'content', 'titles': i.encode('utf-8'), 'rvlimit': 1})
		if 'missing' not in page['query']['pages'][page['query']['pages'].keys()[0]]:
			pageSource = page['query']['pages'][page['query']['pages'].keys()[0]]['revisions'][0]['*']
			tag = re.search(p2, pageSource)
			if tag:
				if '|' in tag.group()[tag.group().find('url') + tag.group()[tag.group().find('url'):].find('='):]:
					evenmoretext += '* {{subst:article-cv|:' + i + '}} from ' + tag.group()[tag.group().find('url') +\
					tag.group()[tag.group().find('url'):].find('=') + 1:tag.group().find('url') + tag.group()[tag.group().find('url'):].find('=') +\
					tag.group()[tag.group().find('url') + tag.group()[tag.group().find('url'):].find('='):].find('|')].strip() + '. Relisting. ~~~~\n'
				else:
					evenmoretext += '* {{subst:article-cv|:' + i + '}} from ' +\
					tag.group()[tag.group().find('url') + tag.group()[tag.group().find('source'):].find('=') + 1:-2].strip() + '. Relisting. ~~~~\n'
			else: evenmoretext += '* {{subst:article-cv|:' + i + '}} Relisting. ~~~~\n'

for i in copyPastes:
	if i in oldCopyPastes and shouldBeRelisted(i):
		page = action({'action': 'query', 'prop': 'revisions', 'rvprop': 'content', 'titles': i.encode('utf-8'), 'rvlimit': 1})
		if 'missing' not in page['query']['pages'][page['query']['pages'].keys()[0]]:
			pageSource = page['query']['pages'][page['query']['pages'].keys()[0]]['revisions'][0]['*']
			temp = re.search(p5, pageSource).group()
			tag = re.search(p6, temp)
			if tag:
				if '|' in tag.group()[tag.group().find('url') + tag.group()[tag.group().find('url'):].find('='):]:
					CopyPasteText += '* {{subst:article-cv|:' + i + '}} Copied and pasted from ' + tag.group()[tag.group().find('url') +\
					tag.group()[tag.group().find('url'):].find('=') + 1:tag.group().find('url') + tag.group()[tag.group().find('url'):].find('=') +\
					tag.group()[tag.group().find('url') + tag.group()[tag.group().find('url'):].find('='):].find('|')].strip() + '. Relisting. ~~~~\n'
				else:
					evenmoretext += '* {{subst:article-cv|:' + i + '}} Copied and pasted from ' +\
					tag.group()[tag.group().find('url') + tag.group()[tag.group().find('url'):].find('=') + 1:-2].strip() + '. Relisting. ~~~~\n'
			else: evenmoretext += '* {{subst:article-cv|:' + i + '}} Copied and pasted. Relisting. ~~~~\n'


for i in closeParaphrases:
	if i in oldCloseParaphrases and shouldBeRelisted(i): # need to check alreadyBlankedPages as there is a delay between transclusion and backlinks
		page = action({'action': 'query', 'prop': 'revisions', 'rvprop': 'content', 'titles': i.encode('utf-8'), 'rvlimit': 1})
		if 'missing' not in page['query']['pages'][page['query']['pages'].keys()[0]]:
			pageSource = page['query']['pages'][page['query']['pages'].keys()[0]]['revisions'][0]['*']
			temp = re.search(p0, pageSource).group()
			tag = re.search(p1, temp)
			if not re.search(p1a, temp): # only list at WP:CP if non-free
				if tag:
					if '|' in tag.group()[tag.group().find('source') + tag.group()[tag.group().find('source'):].find('='):]:
						evenmoretext += '* {{subst:article-cv|:' + i + '}} Close paraphrase of ' + tag.group()[tag.group().find('source') +\
						tag.group()[tag.group().find('source'):].find('=') + 1:tag.group().find('source') + tag.group()[tag.group().find('source'):].find('=') +\
						tag.group()[tag.group().find('source') + tag.group()[tag.group().find('source'):].find('='):].find('|')].strip() + '. Relisting. ~~~~\n'
					else:
						evenmoretext += '* {{subst:article-cv|:' + i + '}} Close paraphrase of ' +\
						tag.group()[tag.group().find('source') + tag.group()[tag.group().find('source'):].find('=') + 1:-2].strip() + '. Relisting. ~~~~\n'
				else: evenmoretext += '* {{subst:article-cv|:' + i + '}} Close paraphrase. Relisting. ~~~~\n'

#addtext should be CloseParaphraseText
#moretext should be CopyvioText
#evenmoretext should be RelistText

editsum = ''
if len(addtext) + len(moretext) + len(evenmoretext) + len(CopyPasteText):
	if len(addtext):
		if len(moretext):
			if len(evenmoretext):
				if len(CopyPasteText): editsum = 'Adding incomplete nominations, copy/pastes, close paraphrases and relisting overlooked pages'
				else: editsum = 'Adding incomplete nominations, close paraphrases and relisting overlooked pages'
			elif len(CopyPasteText): editsum = 'Adding incomplete nominations, copy/pastes and close paraphrases'
			else: editsum = 'Adding incomplete nominations and close paraphrases'
		elif len(evenmoretext):
			if len(CopyPasteText): editsum = 'Adding copy/pastes, close paraphrases and relisting overlooked pages'
			else: editsum = 'Adding close paraphrases and relisting overlooked pages'
		elif len(CopyPasteText): editsum = 'Adding copy/pastes and close paraphrases'
		else: editsum = 'Adding close paraphrases'
	elif len(moretext):
		if len(evenmoretext):
			if len(CopyPasteText): editsum = 'Adding incomplete nominations, copy/pastes and relisting overlooked pages'
			else: editsum = 'Adding incomplete nominations and relisting overlooked pages'
		elif len(CopyPasteText): editsum = 'Adding incomplete nominations and copy/pastes'
		else: editsum = 'Adding incomplete nominations'
	elif len(evenmoretext):
		if len(CopyPasteText): editsum = 'Adding copy/pastes and relisting overlooked pages'
		else: editsum = 'Relisting overlooked pages'
	else: editsum = 'Adding copy/pastes'

if len(editsum):
	page = action({'action': 'edit', 'bot': 1, 'title': time.strftime('Wikipedia:Copyright problems/%Y %B %-d', time.gmtime(time.time()-60*60*24)), 'appendtext': (u'\n' + moretext + CopyPasteText + addtext + evenmoretext).encode('utf-8'), 'section': 2, 'summary': editsum + ' ([[WP:BOT|bot]]) ([[User:VernoWhitney|op]])'})

############################
##### USERSPACE TRIALS #####
############################

##### TASK 4: notify authors that their pages have been blanked (by {{subst:copyvio}}) in case they aren't notified by the taggers, so that the pages don't get relisted for an extra week without any action being taken on them  #####

def doNotify(title):
	page = action({'action': 'query', 'list': 'backlinks', 'bltitle': title.encode('utf-8'), 'bllimit': 'max', 'prop': 'revisions|info', 'rvprop': 'timestamp|user', 'rvdir': 'newer', 'titles': title.encode('utf-8'), 'rvlimit': 1, 'blredirect': 1}) # get backlinks and creation time/user as well as info to determine if it's deleted
	if 'missing' in page['query']['pages'][page['query']['pages'].keys()[0]]:
		return "'''Do Nothing''' Article has been deleted."
	for i in page['query']['backlinks']: # check for CCIs
		if i['title'][:47] == 'Wikipedia:Contributor copyright investigations/':
			return "'''Do Nothing''' [[" + i['title'] + '|CCI]]'
		elif i['title'][:14] == 'Wikipedia:CCI/':
			return "'''Do Nothing''' [[" + i['title'] + '|CCI]]'
		if 'redirlinks' in i:
			for j in i['redirlinks']:
				if j['title'][:47] == 'Wikipedia:Contributor copyright investigations/':
					return "'''Do Nothing''' [[" + j['title'] + '|CCI]]'
				elif j['title'][:14] == 'Wikipedia:CCI/':
					return "'''Do Nothing''' [[" + j['title'] + '|CCI]]'
	for i in page['query']['backlinks']: # parse talk pages to see if already notified
		if i['title'][:10] == 'User talk:':
			page2 = action({'action': 'parse', 'page': i['title'], 'prop': 'sections'})
			for j in page2['parse']['sections']:
				if j['line'] == 'Copyright problem: ' + title: # need to see if it matches a redirect title too... :(
					return "'''Do Nothing''' " + i['title'][10:] + ' already notified'
	page = action({'action': 'query', 'prop': 'categories', 'clcategories': 'Category:Items pending OTRS confirmation of permission|Category:Wikipedia pages with unconfirmed permission received by OTRS|Category:Wikipedia files with unconfirmed permission received by OTRS|Category:Items with OTRS permission confirmed', 'titles': 'Talk:'+title.encode('utf-8')})
	if 'categories' in page['query']['pages'][page['query']['pages'].keys()[0]]:
		return "'''Do Nothing''' OTRS tag"
	page = action({'action': 'query', 'prop': 'revisions', 'rvprop': 'ids|user', 'titles': title.encode('utf-8'), 'rvlimit': 'max'})
	articleRevisionIDs = []
	for i in page['query']['pages'][page['query']['pages'].keys()[0]]['revisions']:
		articleRevisionIDs.append(i['revid'])
	revisionMatch = []
	latest = ''
	for i in articleRevisionIDs:
		page = action({'action': 'query', 'prop': 'revisions', 'rvstartid': i, 'rvprop': 'content|user|timestamp', 'titles': title.encode('utf-8'), 'rvlimit': 1})
		if i == articleRevisionIDs[0]: # maybe ???
			tagger = page['query']['pages'][page['query']['pages'].keys()[0]]['revisions'][0]['user'] # maybe ???
			tagtime = page['query']['pages'][page['query']['pages'].keys()[0]]['revisions'][0]['timestamp'] # maybe ??
		if '*' in page['query']['pages'][page['query']['pages'].keys()[0]]['revisions'][0].keys(): # ignore deleted revisions
			if latest == '': latest = page['query']['pages'][page['query']['pages'].keys()[0]]['revisions'][0]['*']
			if '{{Copyviocore' in page['query']['pages'][page['query']['pages'].keys()[0]]['revisions'][0]['*']: 
				tagger = page['query']['pages'][page['query']['pages'].keys()[0]]['revisions'][0]['user']
				tagtime = page['query']['pages'][page['query']['pages'].keys()[0]]['revisions'][0]['timestamp']
			revisionMatch.append(difflib.SequenceMatcher(None, latest[latest.find('<!-- Do not use the "Copyviocore" template directly; the above line is generated by "subst:Copyvio|url" -->\n')+108:latest.find('</div>')], page['query']['pages'][page['query']['pages'].keys()[0]]['revisions'][0]['*']).ratio())
	diffRevisionMatch = []
	for i in range(len(revisionMatch)):
		if i < len(revisionMatch)-1: diffRevisionMatch.append(round(revisionMatch[i]-revisionMatch[i+1], 6))
		else: diffRevisionMatch.append(round(revisionMatch[i], 6))
	page = action({'action': 'query', 'prop': 'revisions', 'rvprop': 'user', 'titles': title.encode('utf-8'), 'rvlimit': 1, 'rvstartid': articleRevisionIDs[[i for i, x in enumerate(diffRevisionMatch) if x == max(diffRevisionMatch)][0]]})
	contributor = page['query']['pages'][page['query']['pages'].keys()[0]]['revisions'][0]['user']
	# CHECK FOR CUSTOM NOTIFICATION
	#tagger at User talk:contributor > tagtime
	page = action({'action': 'query', 'prop': 'revisions', 'rvprop': 'user', 'titles': 'User talk:' + contributor.encode('utf-8'), 'rvend': tagtime, 'rvlimit': 'max'})
	if 'revisions' in page['query']['pages'][page['query']['pages'].keys()[0]]: 
		for i in page['query']['pages'][page['query']['pages'].keys()[0]]['revisions']:
			if i['user'] == tagger: return "'''Do Nothing''' " + contributor + ' was left a custom notification'
	#contributor at Talk:Article/Temp page > tagtime
	page = action({'action': 'query', 'prop': 'revisions', 'rvprop': 'user', 'titles': 'Talk:' + title.encode('utf-8') + '/Temp', 'rvend': tagtime, 'rvlimit': 'max'})
	if 'revisions' in page['query']['pages'][page['query']['pages'].keys()[0]]: 
		for i in page['query']['pages'][page['query']['pages'].keys()[0]]['revisions']:
			if i['user'] == contributor: return "'''Do Nothing''' " + contributor + ' created the temporary page'
	#contributor at Talk:Article > tagtime
	page = action({'action': 'query', 'prop': 'revisions', 'rvprop': 'user', 'titles': 'Talk:' + title.encode('utf-8'), 'rvend': tagtime, 'rvlimit': 'max'})
	if 'revisions' in page['query']['pages'][page['query']['pages'].keys()[0]]: 
		for i in page['query']['pages'][page['query']['pages'].keys()[0]]['revisions']:
			if i['user'] == contributor: return "'''Do Nothing''' " + contributor + ' edited the article talk page after it was tagged'
	#contributor at Article  > tagtime
	page = action({'action': 'query', 'prop': 'revisions', 'rvprop': 'user', 'titles': title.encode('utf-8'), 'rvend': tagtime, 'rvlimit': 'max'})
	if 'revisions' in page['query']['pages'][page['query']['pages'].keys()[0]]: 
		for i in page['query']['pages'][page['query']['pages'].keys()[0]]['revisions']:
			if i['user'] == contributor: return "'''Do Nothing''' " + contributor + ' edited the article after it was tagged'
	return "'''Notify contributor''': """ + contributor + ' - tagged by ' + tagger

#narrowing with 'blnamespace': '3|4' breaks the blredirect parameter
# BETTER BUGFIX - try narrowed backlinks, then get list of redirects ONLY, then get backlinks for each redirect
# look for 'Copyright problem: <title or redirect>'

# list of all blanked pages
article = ''
for i in newBlankedPages:
	article += '*[[:' + i + ']] - ' + doNotify(i) + '\n'

page = action({'action': 'edit', 'bot': 1, 'title': 'User:VWBot/Trial', 'text': (article + '\n').encode('utf-8'), 'section': 'new', 'summary': time.strftime('== %-d %B %Y ==', time.gmtime())})

##### TASK 6: flag when a contributor gets a CorenSearchBot/VWBot notice if he has had a significant amount before #####
# CSBot's user talk contribs from 00:00:00 to 23:59:59 the previous day
page = action({'action': 'query', 'list': 'usercontribs', 'ucuser': 'CorenSearchBot', 'uclimit': 'max', 'ucstart': time.strftime('%Y-%m-%dT23:59:59Z', time.gmtime(time.time()-60*60*24)), 'ucend': time.strftime('%Y-%m-%dT00:00:00Z', time.gmtime(time.time()-60*60*24)),  'ucnamespace': '3'})

users = {}
for i in page['query']['usercontribs']:
	users[i['title']] = []

# VWBot's user talk contribs from 00:00:00 to 23:59:59 the previous day
page = action({'action': 'query', 'list': 'usercontribs', 'ucuser': 'VWBot', 'uclimit': 'max', 'ucstart': time.strftime('%Y-%m-%dT23:59:59Z', time.gmtime(time.time()-60*60*24)), 'ucend': time.strftime('%Y-%m-%dT00:00:00Z', time.gmtime(time.time()-60*60*24)),  'ucnamespace': '3'})

for i in page['query']['usercontribs']:
	users[i['title']] = []

for i in ['Merovingian', u'Leszek Jańczuk', 'Ganeshbot', 'Starzynka', 'Ser Amantio di Nicolao', 'Kumioko', 'Packerfansam', 'Alan Liefting']:
	try: del users['User talk:' + i]
	except: pass

for user in users.keys():
	# only checks last 5,000 edits
	page = action({'action': 'query', 'prop': 'revisions', 'rvprop': 'comment|timestamp|user', 'titles': user.encode('utf-8'), 'rvlimit': 'max'})
	for i in page['query']['pages'][page['query']['pages'].keys()[0]]['revisions']:
		if 'user' in i: # needed because RevDelete can return edits with no user field...apparently
			if i['user'] == 'VWBot' or i['user'] == 'CorenSearchBot':
				users[user].append([i['comment'][i['comment'].find('on')+3:], time.strftime('%Y %B %-d', time.strptime(i['timestamp'],'%Y-%m-%dT%H:%M:%SZ'))])

addition = u''
for user in users.keys():
	if len(users[user]) > 4:
		addition += '\n==== ' + str(len(users[user])) + ': {{User|1=' + user[10:] + '}} ====\n{{Collapse top|Tagged articles}}\n'
		for i in users[user]:
			addition += '* {{subst:article-cv|' + i[0] + '}} created on ' + i[1] + '\n'
		addition += '{{Collapse bottom}}\n'

if len(addition):
	page = action({'action': 'edit', 'bot': 1, 'title': 'User:VWBot/Trial', 'appendtext': (u'\n\n=== Task 6 ===' + addition).encode('utf-8'), 'summary': 'Listing users who have had multiple articles tagged by CorenSearchBot/VWBot ([[WP:BOT|bot]]) ([[User:VernoWhitney|op]])'})