User:InfoboxBot/wikipedia edit pages clean.py
Appearance
import urllib.request import urllib import json import requests import time import glob, os import mwparserfromhell import re import pymysql.cursors s = requests.Session() headers = { 'User-Agent': 'enwiki Infobox power station editor by Garzfoth, v0.1a' } connection = pymysql.connect(host='localhost', user='', password='', db='infobox_power_station', charset='utf8mb4', cursorclass=pymysql.cursors.DictCursor) try: with connection.cursor() as cursor: sql = "SELECT pageid, title FROM `data` WHERE `key` = 'status' AND `value` = 'Active'" cursor.execute(sql) result = cursor.fetchall() finally: connection.close() query = { "action": "query", "format": "json", "meta": "tokens", "type": "login" } encodedquery = urllib.parse.urlencode(query) baseurl = "https://en.wikipedia.org/w/api.php?" login1 = s.get(baseurl+encodedquery, headers=headers) print("Login #1: " + login1.json()["query"]["tokens"]["logintoken"]) query = { "action": "login", "format": "json", "lgname": "" } querypost = { "action": "login", "format": "json", "lgname": "", "lgpassword": "", "lgtoken": login1.json()["query"]["tokens"]["logintoken"] } encodedquery = urllib.parse.urlencode(query) login2 = s.post("https://en.wikipedia.org/w/api.php", data=querypost, headers=headers) print(login2.json()) for item in result: print(item["title"] + " - " + str(item["pageid"])) query = { "action": "query", "format": "json", "curtimestamp": 1, "prop": "revisions", "pageids": item["pageid"], "rvprop": "content|timestamp" } encodedquery = urllib.parse.urlencode(query) print(baseurl+encodedquery) response = s.get(baseurl+encodedquery, headers=headers) wikicode = mwparserfromhell.parse(response.json()["query"]["pages"][str(item["pageid"])]["revisions"][0]["*"]) templates = wikicode.filter_templates() tpl = next(x for x in templates if x.startswith("{{Infobox power station") or x.startswith("{{infobox power station") or x.startswith("{{Infobox power plant") or x.startswith("{{infobox power plant") or x.startswith("{{Infobox wind farm") or x.startswith("{{infobox wind farm") or x.startswith("{{Infobox nuclear power station") or x.startswith("{{infobox nuclear power station")) tpl.add("status", "O") query = { "action": "query", "format": "json", "meta": "tokens", "type": "csrf" } encodedquery = urllib.parse.urlencode(query) csrf = s.get(baseurl+encodedquery, headers=headers) querypost = { "action": "edit", "assert": "user", "format": "json", "pageid": item["pageid"], "text": str(wikicode), "summary": "Automated edit: fixing infobox parameter \"status\"", "minor": 1, "basetimestamp": response.json()["query"]["pages"][str(item["pageid"])]["revisions"][0]["timestamp"], "starttimestamp": response.json()["curtimestamp"], "nocreate": 1, "watchlist": "nochange", "token": csrf.json()["query"]["tokens"]["csrftoken"] } finalresult = s.post("https://en.wikipedia.org/w/api.php", data=querypost, headers=headers) print(finalresult) time.sleep(10)