User:InfoboxBot/wikipedia edit pages clean.py
Appearance
import urllib.request
import urllib
import json
import requests
import time
import glob, os
import mwparserfromhell
import re
import pymysql.cursors
s = requests.Session()
headers = {
'User-Agent': 'enwiki Infobox power station editor by Garzfoth, v0.1a'
}
connection = pymysql.connect(host='localhost',
user='',
password='',
db='infobox_power_station',
charset='utf8mb4',
cursorclass=pymysql.cursors.DictCursor)
try:
with connection.cursor() as cursor:
sql = "SELECT pageid, title FROM `data` WHERE `key` = 'status' AND `value` = 'Active'"
cursor.execute(sql)
result = cursor.fetchall()
finally:
connection.close()
query = {
"action": "query",
"format": "json",
"meta": "tokens",
"type": "login"
}
encodedquery = urllib.parse.urlencode(query)
baseurl = "https://en.wikipedia.org/w/api.php?"
login1 = s.get(baseurl+encodedquery, headers=headers)
print("Login #1: " + login1.json()["query"]["tokens"]["logintoken"])
query = {
"action": "login",
"format": "json",
"lgname": ""
}
querypost = {
"action": "login",
"format": "json",
"lgname": "",
"lgpassword": "",
"lgtoken": login1.json()["query"]["tokens"]["logintoken"]
}
encodedquery = urllib.parse.urlencode(query)
login2 = s.post("https://en.wikipedia.org/w/api.php", data=querypost, headers=headers)
print(login2.json())
for item in result:
print(item["title"] + " - " + str(item["pageid"]))
query = {
"action": "query",
"format": "json",
"curtimestamp": 1,
"prop": "revisions",
"pageids": item["pageid"],
"rvprop": "content|timestamp"
}
encodedquery = urllib.parse.urlencode(query)
print(baseurl+encodedquery)
response = s.get(baseurl+encodedquery, headers=headers)
wikicode = mwparserfromhell.parse(response.json()["query"]["pages"][str(item["pageid"])]["revisions"][0]["*"])
templates = wikicode.filter_templates()
tpl = next(x for x in templates if x.startswith("{{Infobox power station") or x.startswith("{{infobox power station") or x.startswith("{{Infobox power plant") or x.startswith("{{infobox power plant") or x.startswith("{{Infobox wind farm") or x.startswith("{{infobox wind farm") or x.startswith("{{Infobox nuclear power station") or x.startswith("{{infobox nuclear power station"))
tpl.add("status", "O")
query = {
"action": "query",
"format": "json",
"meta": "tokens",
"type": "csrf"
}
encodedquery = urllib.parse.urlencode(query)
csrf = s.get(baseurl+encodedquery, headers=headers)
querypost = {
"action": "edit",
"assert": "user",
"format": "json",
"pageid": item["pageid"],
"text": str(wikicode),
"summary": "Automated edit: fixing infobox parameter \"status\"",
"minor": 1,
"basetimestamp": response.json()["query"]["pages"][str(item["pageid"])]["revisions"][0]["timestamp"],
"starttimestamp": response.json()["curtimestamp"],
"nocreate": 1,
"watchlist": "nochange",
"token": csrf.json()["query"]["tokens"]["csrftoken"]
}
finalresult = s.post("https://en.wikipedia.org/w/api.php", data=querypost, headers=headers)
print(finalresult)
time.sleep(10)