Jump to content

User:PointBot/source

From Wikipedia, the free encyclopedia
The printable version is no longer supported and may have rendering errors. Please update your browser bookmarks and please use the default browser print function instead.
# coding: utf-8
import urllib2, time, urllib
import random

#cookielib:
import cookielib
urlopen = urllib2.urlopen
Request = urllib2.Request
cj = cookielib.LWPCookieJar()
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
urllib2.install_opener(opener)

#################################
#	Wikipedia functions	#
#################################

def parse(page, tag):
  for line in page:
    if tag + '''="''' in line:
      value=''
      for letter in line[line.find(tag + '''="''')+len(tag)+2:]:
	if letter=='''"''':return value
	value+=letter
	
def load(name):
  data=urllib.urlencode({'format':'xml', 'action':'query','prop':'revisions', 'rvprop':'content', 'titles':name})
  loadString='http://en.wikipedia.org/w/api.php?'
  page=urllib2.urlopen(loadString, data)
  pagestring=''
  for i in page.readlines():pagestring+=i
  pagestring=pagestring[pagestring.find('''xml:space'''):]
  return pagestring[pagestring.find('''>''')+1:pagestring.find('''</rev>''')]
  
def login():
  #Get token
  data=urllib.urlencode({'format':'xml', 'action':'login', 'lgname':'PointBot', 'lgpassword':password})
  loginString='http://en.wikipedia.org/w/api.php?'
  loginpage=urllib2.urlopen(loginString, data)
  loginpage=loginpage.readlines()
  token=parse(loginpage, 'token')
  cookieprefix=parse(loginpage, 'cookieprefix')
  sessionid=parse(loginpage, 'sessionid')
  #login
  data=urllib.urlencode({'enwiki_session':sessionid,'format':'xml', 'action':'login', 'lgname':'PointBot', 'lgpassword':password, 'lgtoken':token})
  loginString='http://en.wikipedia.org/w/api.php?'
  loginpage=urllib2.urlopen(loginString, data)
  loginpage=loginpage.readlines()
  lguserid=parse(loginpage, 'lguserid')
  lgtoken=parse(loginpage, 'lgtoken')
  sessionid=parse(loginpage, 'sessionid')
  print 'Login was: ', parse(loginpage, 'result')
  print lguserid, lgtoken, sessionid
  return lguserid, lgtoken, sessionid

def get_edit_token(name, lguserid, lgtoken, sessionid):
  data=urllib.urlencode({'format':'xml', 'action':'query', 'prop':'info|revisions', 'intoken':'edit', 'titles':'Main Page'})
  headers={'enwikiUserName':'PointBot','enwikiUserID':lguserid,'enwikiToken':lgtoken, 'enwiki_session':sessionid}
  loadString='http://en.wikipedia.org/w/api.php?'
  req=urllib2.Request(loadString, data)
  page=urllib2.urlopen(req)
  page=page.readlines()
  timestamp=parse(page, 'timestamp')
  edittoken=parse(page, 'edittoken')
  return timestamp, edittoken

def edit_full(name, newcontent, timestamp, edittoken, summary):
  data=urllib.urlencode({'format':'xml', 'action':'edit', 'title':name, 'summary':summary, 'text':newcontent, 'basetimestamp':timestamp, 'token':edittoken})
  loadString='http://en.wikipedia.org/w/api.php?'
  page=urllib2.urlopen(loadString, data)  
  
  
def edit_add(name, newcontent, timestamp, edittoken, summary):
  data=urllib.urlencode({'format':'xml', 'action':'edit', 'title':name,'section':'new', 'summary':summary, 'text':newcontent, 'basetimestamp':timestamp, 'token':edittoken})
  loadString='http://en.wikipedia.org/w/api.php?'
  page=urllib2.urlopen(loadString, data)  
  
def setup():
  lguserid, lgtoken, sessionid=login()
  timestamp, edittoken=get_edit_token('User:PointBot/log', lguserid, lgtoken, sessionid)
  return lguserid, lgtoken, sessionid, edittoken

#################################
#	Analysis functions	#
#################################

def findNextLink(page):
  #grab all the links in page and return random one. This function takes a list. It is useful for randomly surfing wikipedia.
  links=[]
  for i in range(len(page)-1):
    if page[i] == '[':
      if page[i+1] == '[':
	link=''
	j=int(i)+1
	while ']' not in link and '|' not in link:
	  j+=1
	  link+=page[j]
	if ':' not in link:links.append(link[:-1])#if link is not to another wiki, that would be boring.
  return random.choice(links)

def getFirstSentence(page):
  #This function trys to get the first sentence of a page, but it uses a lot of rules. There's probably a better way to do this.
  score=0
  found=0
  italics=0
  for i in range(len(page)):
    if page[i] == '{' or page[i] == '[' or page[i] == '(' or page[i] =='<' or page[i:i+4] == '&lt;':score-=1
    if page[i] == '}' or page[i] == ']' or page[i] == ')' or page[i] =='>' or page[i:i+4] == '&gt;':score+=1
    if page[i] == """'""" and page[i+1] == """'""":
      if italics == 0:italics=1
      elif italics == 1:italics=0
    if score == 0 and italics == 0:
      if page[i]=='.' and page[i-2] != ' ' and page[i-2] != '.':
	if page[i-3:i-1] != """''""":
	  found = 1
	  return page[:i+1]
	  
def verb_in_first_sentence(page):
  #checks if a verb is in the sentence.
  verbs=['is', 'are', 'were', 'was', 'will', 'refers']
  first=getFirstSentence(page)
  found=0
  for verb in verbs:
    if verb in first:
      found=1
  return found

def run(name, names, lguserid, lgtoken, sessionid, edittoken):
  page=load(name)
  print 'Checking: ', name
  if page!='':
    if '''{{disambiguation}}''' not in page and '''{{disambig}}''' not in page and page[0] != '#':#if it is not a disambugation page
      if verb_in_first_sentence(page) == 0:#if no correct verb is in first sentence
	timestamp, edittoken=get_edit_token(name, lguserid, lgtoken, sessionid)
	oldpage=load('User:PointBot/log')
	first=first=getFirstSentence(page)
	if name not in oldpage:
	  edit_full('User:PointBot/log', load('User:PointBot/log') + '\n\nArticle [[' + name + ']] lacks a proper descriptive introduction and could use some editing.\n' + str(time.time()), timestamp, edittoken, 'Verb report')
	print 'Article ' + name + ' lacks proper descriptive introduction', first
      try:
	nextname=findNextLink(page)
      except:nextname=random.choice(names)
    else:nextname=random.choice(names)#if page was disamb
  else:nextname=random.choice(names)#if page was blank
  return nextname



if True:
  password='*********'
  #good example: urban design
  lguserid, lgtoken, sessionid, edittoken=setup()
  names=['wiki']
  name=run('wiki', names, lguserid, lgtoken, sessionid, edittoken)
  while True:
    try:
      names.append(name)
      name=run(name, names, lguserid, lgtoken, sessionid, edittoken)
    except:name=random.choice(names)