Jump to content

User:InfoboxBot/wikipedia edit pages clean.py

From Wikipedia, the free encyclopedia
This is an old revision of this page, as edited by JJMC89 (talk | contribs) at 05:26, 11 December 2017 (syntaxhighlight). The present address (URL) is a permanent link to this revision, which may differ significantly from the current revision.
import urllib.request
import urllib
import json
import requests
import time
import glob, os
import mwparserfromhell
import re
import pymysql.cursors

s = requests.Session() 

headers = {
	'User-Agent': 'enwiki Infobox power station editor by Garzfoth, v0.1a'
}

connection = pymysql.connect(host='localhost',
							user='',
							password='',
							db='infobox_power_station',
							charset='utf8mb4',
							cursorclass=pymysql.cursors.DictCursor)
try:
	with connection.cursor() as cursor:
		sql = "SELECT pageid, title FROM `data` WHERE `key` = 'status' AND `value` = 'Active'"
		cursor.execute(sql)
		result = cursor.fetchall()
finally:
	connection.close()


query = {
	"action": "query",
	"format": "json",
	"meta": "tokens",
	"type": "login"
}
encodedquery = urllib.parse.urlencode(query)
baseurl = "https://en.wikipedia.org/w/api.php?"
login1 = s.get(baseurl+encodedquery, headers=headers)
print("Login #1: " + login1.json()["query"]["tokens"]["logintoken"])
query = {
	"action": "login",
	"format": "json",
	"lgname": ""
}
querypost = {
	"action": "login",
	"format": "json",
	"lgname": "",
	"lgpassword": "",
	"lgtoken": login1.json()["query"]["tokens"]["logintoken"]
}
encodedquery = urllib.parse.urlencode(query)
login2 = s.post("https://en.wikipedia.org/w/api.php", data=querypost, headers=headers)
print(login2.json())



for item in result:
	print(item["title"] + " - " + str(item["pageid"]))
	query = {
		"action": "query",
		"format": "json",
		"curtimestamp": 1,
		"prop": "revisions",
		"pageids": item["pageid"],
		"rvprop": "content|timestamp"
	}
	encodedquery = urllib.parse.urlencode(query)
	print(baseurl+encodedquery)
	response = s.get(baseurl+encodedquery, headers=headers)
	wikicode = mwparserfromhell.parse(response.json()["query"]["pages"][str(item["pageid"])]["revisions"][0]["*"])
	templates = wikicode.filter_templates()
	tpl = next(x for x in templates if x.startswith("{{Infobox power station") or x.startswith("{{infobox power station") or x.startswith("{{Infobox power plant") or x.startswith("{{infobox power plant") or x.startswith("{{Infobox wind farm") or x.startswith("{{infobox wind farm") or x.startswith("{{Infobox nuclear power station") or x.startswith("{{infobox nuclear power station"))
	tpl.add("status", "O")
	query = {
		"action": "query",
		"format": "json",
		"meta": "tokens",
		"type": "csrf"
	}
	encodedquery = urllib.parse.urlencode(query)
	csrf = s.get(baseurl+encodedquery, headers=headers)
	querypost = {
		"action": "edit",
		"assert": "user",
		"format": "json",
		"pageid": item["pageid"],
		"text": str(wikicode),
		"summary": "Automated edit: fixing infobox parameter \"status\"",
		"minor": 1,
		"basetimestamp": response.json()["query"]["pages"][str(item["pageid"])]["revisions"][0]["timestamp"],
		"starttimestamp": response.json()["curtimestamp"],
		"nocreate": 1,
		"watchlist": "nochange",
		"token": csrf.json()["query"]["tokens"]["csrftoken"]
	}
	finalresult = s.post("https://en.wikipedia.org/w/api.php", data=querypost, headers=headers)
	print(finalresult)
	
	time.sleep(10)