User:PharyngealImplosive7/IPAlinker.py
Appearance
import pywikibot
from mwparserfromhell import parse, nodes
import os
import random
# Configuration
USERNAME = "PharyngealImplosive7"
CATEGORY_TITLE = "Category:Languages"
PROCESSED_FILE = "processed_pages.txt"
PAGE_LIMIT = 5
MAX_IPA_LENGTH = 5
API_CHUNK_SIZE = 50 # For faster category loading
def load_processed_pages():
return set() if not os.path.exists(PROCESSED_FILE) else set(open(PROCESSED_FILE).read().splitlines())
def save_processed_page(page_title):
with open(PROCESSED_FILE, 'a') as f:
f.write(f"{page_title}\n")
def fast_category_loader(site, category_name):
"""Optimized category page loader with batch requests"""
cat = pywikibot.Category(site, category_name)
return cat.articles(namespaces=0, recurse=True, total=API_CHUNK_SIZE)
def process_ipa_templates(wikicode):
changes = 0
for template in wikicode.ifilter_templates():
if template.name.strip().lower() == 'ipa':
# Table check
parent = template.parent
while parent and not (isinstance(parent, nodes.Tag) and parent.tag.lower() == 'table'):
parent = parent.parent
if not parent:
continue
# Parameter check
if not template.params:
continue
# Content processing
raw_content = str(template.params[0].value).strip()
content = raw_content
brackets = {'(': ')', '[': ']', '{': '}'}
# Remove existing brackets
if len(content) >= 2 and content[0] in brackets and content[-1] == brackets[content[0]]:
content = content[1:-1].strip()
# Length check
if len(content) > MAX_IPA_LENGTH:
continue
# Replacement logic
template.name = "IPA link"
if content != raw_content: # Had brackets
template.params[0].value = content
# Add brackets as text nodes
parent = template.parent.parent
index = parent.nodes.index(template.parent)
parent.nodes[index:index+1] = [
nodes.Text(raw_content[0]),
template,
nodes.Text(raw_content[-1])
]
changes += 1
return changes
def process_page(page):
print(f"\nProcessing: {page.title()}")
try:
text = page.get()
except pywikibot.exceptions.NoPageError:
return False
wikicode = parse(text)
changes = process_ipa_templates(wikicode)
if changes:
new_text = str(wikicode)
print(f"🔧 Found {changes} IPA conversions")
pywikibot.showDiff(text, new_text)
if input("Save changes? (y/n): ").lower() == 'y':
page.text = new_text
page.save(summary=f"BOT: IPA conversion ({changes} templates)", botflag=True)
return True
else:
print("No changes needed")
return False
def main():
site = pywikibot.Site('en', 'wikipedia', user=USERNAME)
processed = load_processed_pages()
print("🔍 Scanning category pages...")
candidate_pages = [
p for p in fast_category_loader(site, CATEGORY_TITLE)
if p.title() not in processed
][:PAGE_LIMIT*2]
random.shuffle(candidate_pages)
processed_count = 0
for page in candidate_pages[:PAGE_LIMIT]:
print(f"\n=== Processing page {processed_count + 1}/{PAGE_LIMIT} ===")
if process_page(page):
processed_count += 1
save_processed_page(page.title())
print(f"\nCompleted processing {processed_count} pages")
if __name__ == "__main__":
main()