User:InfoboxBot/wikipedia edit pages clean.py

import urllib.request
import urllib
import json
import requests
import time
import glob, os
import mwparserfromhell
import re
import pymysql.cursors

s = requests.Session() 

headers = {
	'User-Agent': 'enwiki Infobox power station editor by Garzfoth, v0.1a'
}

connection = pymysql.connect(host='localhost',
							user='',
							password='',
							db='infobox_power_station',
							charset='utf8mb4',
							cursorclass=pymysql.cursors.DictCursor)
try:
	with connection.cursor() as cursor:
		sql = "SELECT pageid, title FROM `data` WHERE `key` = 'status' AND `value` = 'Active'"
		cursor.execute(sql)
		result = cursor.fetchall()
finally:
	connection.close()


query = {
	"action": "query",
	"format": "json",
	"meta": "tokens",
	"type": "login"
}
encodedquery = urllib.parse.urlencode(query)
baseurl = "https://en.wikipedia.org/w/api.php?"
login1 = s.get(baseurl+encodedquery, headers=headers)
print("Login #1: " + login1.json()["query"]["tokens"]["logintoken"])
query = {
	"action": "login",
	"format": "json",
	"lgname": ""
}
querypost = {
	"action": "login",
	"format": "json",
	"lgname": "",
	"lgpassword": "",
	"lgtoken": login1.json()["query"]["tokens"]["logintoken"]
}
encodedquery = urllib.parse.urlencode(query)
login2 = s.post("https://en.wikipedia.org/w/api.php", data=querypost, headers=headers)
print(login2.json())



for item in result:
	print(item["title"] + " - " + str(item["pageid"]))
	query = {
		"action": "query",
		"format": "json",
		"curtimestamp": 1,
		"prop": "revisions",
		"pageids": item["pageid"],
		"rvprop": "content|timestamp"
	}
	encodedquery = urllib.parse.urlencode(query)
	print(baseurl+encodedquery)
	response = s.get(baseurl+encodedquery, headers=headers)
	wikicode = mwparserfromhell.parse(response.json()["query"]["pages"][str(item["pageid"])]["revisions"][0]["*"])
	templates = wikicode.filter_templates()
	tpl = next(x for x in templates if x.startswith("{{Infobox power station") or x.startswith("{{infobox power station") or x.startswith("{{Infobox power plant") or x.startswith("{{infobox power plant") or x.startswith("{{Infobox wind farm") or x.startswith("{{infobox wind farm") or x.startswith("{{Infobox nuclear power station") or x.startswith("{{infobox nuclear power station"))
	tpl.add("status", "O")
	query = {
		"action": "query",
		"format": "json",
		"meta": "tokens",
		"type": "csrf"
	}
	encodedquery = urllib.parse.urlencode(query)
	csrf = s.get(baseurl+encodedquery, headers=headers)
	querypost = {
		"action": "edit",
		"assert": "user",
		"format": "json",
		"pageid": item["pageid"],
		"text": str(wikicode),
		"summary": "Automated edit: fixing infobox parameter \"status\"",
		"minor": 1,
		"basetimestamp": response.json()["query"]["pages"][str(item["pageid"])]["revisions"][0]["timestamp"],
		"starttimestamp": response.json()["curtimestamp"],
		"nocreate": 1,
		"watchlist": "nochange",
		"token": csrf.json()["query"]["tokens"]["csrftoken"]
	}
	finalresult = s.post("https://en.wikipedia.org/w/api.php", data=querypost, headers=headers)
	print(finalresult)
	
	time.sleep(10)