User:ChristieBot/app.py
Appearance
import pywikibot from flask import Flask, render_template, request import re from datetime import date from datetime import timedelta from datetime import datetime from datetime import datetime as dt import datetime import pymysql import configparser import os import urllib.parse import GA_nom from operator import attrgetter app = Flask(__name__) @app.route("/") def ganfilter_main(): ######## # Version Number ######## version = '1.0.0' page_text = ['<html><head></head>'] page_text.append('<h1>GAN filtering</h1>') page_text.append('This tool provides a way to filter the Good Articles nominations on the English Wikipedia. The tool is under development and may go down without warning. The following limitations should be noted:<br />') page_text.append('<ul><br />') page_text.append('<li>The count of previously successfully nominated GAs is taken from a database that is updated daily by SDZeroBot</li>') page_text.append('<li>The notes column is not currently populated</li>') #page_text.append('<li>The status column will link to the review in the future</li>') page_text.append('<li>The count of reviews performed does not currently take into account user name changes</li>') #page_text.append('<li>Sorting is not yet implemented but should be possible on all columns eventually</li>') page_text.append('</ul><br />') page_text.append('<b>Definitions</b>') page_text.append('<ul>') page_text.append('<li>R: the number of GA reviews performed by the nominator</li>') page_text.append('<li>G: the number of articles the nominator has had promoted to GA</li>') page_text.append('<li>R/G: the ratio of reviews performed by the nominator to GAs they have nominated which were promoted.</li>') page_text.append('<li>R - G: the number of reviews performed by the nominator minus the number of GAs they have nominated which were promoted.</li>') page_text.append('</ul><br />') page_text.append('Please contact <a href="/wiki/User_talk:Mike_Christie">Mike Christie</a>, the maintainer of the tool, with any bugs or suggestions for enhancements.<br />') page_text.append('<h2>Filter and sort the Good Article nominations on the English Wikipedia</h2>') page_text.append('<form method="GET" action="process_gf">') page_text.append('Include nominations from section: <select name="GAN_section" id="GAN_section" multiple>') page_text.append(' <option value="All" selected>All</option>') page_text.append(' <option value="Agriculture, food and drink">Agriculture, food and drink</option>') page_text.append(' <option value="Albums">Albums</option>') page_text.append(' <option value="Art and architecture">Art and architecture</option>') page_text.append(' <option value="Biology and medicine">Biology and medicine</option>') page_text.append(' <option value="Chemistry and materials science">Chemistry and materials science</option>') page_text.append(' <option value="Computing and engineering">Computing and engineering</option>') page_text.append(' <option value="Culture, sociology and psychology">Culture, sociology and psychology</option>') page_text.append(' <option value="Earth sciences">Earth sciences</option>') page_text.append(' <option value="Economics and business">Economics and business</option>') page_text.append(' <option value="Education">Education</option>') page_text.append(' <option value="Film">Film</option>') page_text.append(' <option value="Geography">Geography</option>') page_text.append(' <option value="Language and literature">Language and literature</option>') page_text.append(' <option value="Law">Law</option>') page_text.append(' <option value="Magazines and print journalism">Magazines and print journalism</option>') page_text.append(' <option value="Mathematics and mathematicians">Mathematics and mathematicians</option>') page_text.append(' <option value="Media and drama">Media and drama</option>') page_text.append(' <option value="Other music articles">Other music articles</option>') page_text.append(' <option value="Philosophy and religion">Philosophy and religion</option>') page_text.append(' <option value="Physics and astronomy">Physics and astronomy</option>') page_text.append(' <option value="Places">Places</option>') page_text.append(' <option value="Politics and government">Politics and government</option>') page_text.append(' <option value="Royalty, nobility and heraldry">Royalty, nobility and heraldry</option>') page_text.append(' <option value="Songs">Songs</option>') page_text.append(' <option value="Sports and recreation">Sports and recreation</option>') page_text.append(' <option value="Television">Television</option>') page_text.append(' <option value="Transport">Transport</option>') page_text.append(' <option value="Video games">Video games</option>') page_text.append(' <option value="Warfare">Warfare</option>') page_text.append(' <option value="World history">World history</option>') page_text.append('</select><br />') page_text.append('Include nominations with these statuses: <select name="statuses" id="statuses" multiple>') page_text.append(' <option value="All" selected>All</option>') page_text.append(' <option value="W">Waiting for review</option>') page_text.append(' <option value="2">Waiting for 2nd opinion</option>') page_text.append(' <option value="H">On hold</option>') page_text.append(' <option value="R">Under review</option>') page_text.append('</select><br />') page_text.append('<br />') page_text.append('Nominator has reviewed at least this many GAs: <input type="number" name="min_reviews" id="min_reviews" value="0" /><br />') page_text.append('Nominator has reviewed at most this many GAs: <input type="number" name="max_reviews" id="max_reviews" value="1000" /><br /><br />') page_text.append('Nominator has previously successfully nominated at least this many GAs: <input type="number" name="min_GAs_promoted" id="min_GAs_promoted" value="0" /><br />') page_text.append('Nominator has previously successfully nominated at most this many GAs: <input type="number" name="max_GAs_promoted" id="max_GAs_promoted" value="1000" /><br /><br />') page_text.append('Nominator\'s edit count is at least: <input type="number" name="min_edit_count" id="min_edit_count" value="0" /><br />') page_text.append('Nominator\'s edit count is at most: <input type="number" name="max_edit_count" id="max_edit_count" value="10000000" /><br /><br />') page_text.append('Nominator\'s R/G is at least: <input type="number" name="min_R_over_G" step="0.01" id="min_R_over_G" value="0" /><br />') page_text.append('Nominator\'s R/G is at most: <input type="number" name="max_R_over_G" step="0.01" id="max_R_over_G" value="1000" /><br /><br />') page_text.append('Nominator\'s R - G is at least: <input type="number" name="min_R_minus_G" id="min_R_minus_G" value="-1000" /><br />') page_text.append('Nominator\'s R - G is at most: <input type="number" name="max_R_minus_G" id="max_R_minus_G" value="1000" /><br /><br />') page_text.append('Nomination is at least this many days old: <input type="number" name="min_nom_age" id="min_nom_age" value="0" /><br />') page_text.append('Nomination is at most this many days old: <input type="number" name="max_nom_age" id="max_nom_age" value="1000" /><br /><br />') page_text.append('Sort by: <select name="sort_order" id="sort_order">') page_text.append(' <option value="custom_sort_1" selected>Prioritize new nominators, then by reviews per promoted GA</option>') page_text.append(' <option value="nom_reviews">Number of reviews</option>') page_text.append(' <option value="nom_GAs">Number of promoted GAs</option>') page_text.append(' <option value="age_in_days">Age</option>') page_text.append(' <option value="nom_edits">Nominator edit count</option>') page_text.append(' <option value="R_over_G">Reviews per promoted GA</option>') page_text.append(' <option value="R_minus_G">Reviews minus promoted GAs</option>') page_text.append(' <option value="status">Review status</option>') page_text.append(' <option value="title">Article title</option>') page_text.append('</select><br />') page_text.append('Sort direction: <select name="sort_dir" id="sort_dir">') page_text.append(' <option value="asc" selected>Ascending</option>') page_text.append(' <option value="desc">Descending</option>') page_text.append('</select><br />') page_text.append('Output format: <select name="target" id="target">') page_text.append(' <option value="Web" selected>Web page</option>') page_text.append(' <option value="Wikipedia">Wikipedia markup</option>') page_text.append('</select><br />') page_text.append('<input type="checkbox" id="split_by_topic" name="split_by_topic" value="Split" checked>') page_text.append('<label for="split_by_topic"> Split output by topic?</label><br>') page_text.append('<br />') page_text.append('<input type="submit" value="Submit" />') page_text.append('</form>') page_text.append('</body>') page_text.append('</html>') page_text = '\n'.join(page_text) return page_text @app.route("/process_gf", methods=['GET', 'POST']) def process_filters(): site = pywikibot.Site('en','wikipedia') cat = pywikibot.Category(site,'Good article nominees') GAbot_page = pywikibot.Page(site,'User:GA bot/Stats') reviews_page = GA_nom.GAbot_Page(GAbot_page.text) reviews_page.parse() wbg_nominators = {} HOME=os.environ.get('HOME') #get environment variable $HOME replica_path=HOME + '/replica.my.cnf' if os.path.exists(replica_path): #check that the file is found config = configparser.ConfigParser() config.read(replica_path) else: print('replica.my.cnf file not found') database = "s54328__goodarticles_p" conn = pymysql.connections.Connection(user=config['client']['user'], password=config['client']['password'], database="s54328__goodarticles_p", host='tools.db.svc.eqiad.wmflabs') with conn.cursor() as cursor: sql = "select nominator, count(*) as GA_count from nominators group by nominator order by count(*) desc" cursor.execute(sql) result = cursor.fetchall() for row in result: wbg_nominators[row[0]] = row[1] name_changes = {} name_changes['Bennv123']='Bennv3771' name_changes['Bennv3771']='Bennv123' name_changes['Olympian']='Nunuxxx' name_changes['Nunuxxx']='Olympian' name_changes['Interaccoonale']='Raccoozzy' name_changes['Raccoozzy']='Interaccoonale' y = dt.utcnow() GAN_section = request.args['GAN_section'] min_reviews = request.args['min_reviews'] max_reviews = request.args['max_reviews'] min_GAs_promoted = request.args['min_GAs_promoted'] max_GAs_promoted = request.args['max_GAs_promoted'] min_edit_count = request.args['min_edit_count'] max_edit_count = request.args['max_edit_count'] min_R_over_G = request.args['min_R_over_G'] max_R_over_G = request.args['max_R_over_G'] min_R_minus_G = request.args['min_R_minus_G'] max_R_minus_G = request.args['max_R_minus_G'] min_nom_age = request.args['min_nom_age'] max_nom_age = request.args['max_nom_age'] sort_order = request.args['sort_order'] sort_dir = request.args['sort_dir'] if sort_dir == 'asc': reverse_bool = False else: reverse_bool = True target = request.args['target'] split_by_topic = False if request.args.get('split_by_topic'): split_by_topic = True args_dict = request.args.to_dict(flat=False) GAN_sections = args_dict["GAN_section"] status_list = args_dict["statuses"] params_text = ['Param values<br />'] params_text.append('GAN_section = ' + GAN_section + '<br />') params_text.append('<br />') params_text.append('|'.join(GAN_sections)) params_text.append('<br />') params_text.append('Statuses = ') params_text.append('|'.join(status_list)) params_text.append('<br />') params_text.append('min_reviews = ' + min_reviews + '<br />') params_text.append('max_reviews = ' + max_reviews + '<br />') params_text.append('min_GAs_promoted = ' + min_GAs_promoted + '<br />') params_text.append('max_GAs_promoted = ' + max_GAs_promoted + '<br />') params_text.append('min_edit_count = ' + min_edit_count + '<br />') params_text.append('max_edit_count = ' + max_edit_count + '<br />') params_text.append('min_R_over_G = ' + min_R_over_G + '<br />') params_text.append('max_R_over_G = ' + max_R_over_G + '<br />') params_text.append('min_R_minus_G = ' + min_R_minus_G + '<br />') params_text.append('max_R_minus_G = ' + max_R_minus_G + '<br />') params_text.append('min_nom_age = ' + min_nom_age + '<br />') params_text.append('max_nom_age = ' + max_nom_age + '<br />') params_text.append('sort_order = ' + sort_order + '<br />') params_text.append('sort_dir = ' + sort_dir + '<br />') params_text.append('target = ' + target + '<br />') params_text.append('split_by_topic = ' + str(split_by_topic) + '<br />') print_list = [] if target == 'Web': print_list = ['<html><head></head>'] print_list.append('<h1>Filtered GAN results</h1>') print_list.append('<br /><a href = "https://ganfilter.toolforge.org">Return to search page</a><br /><br />') print_list.append('<style>') print_list.append('table, th, td {') print_list.append('border: 1px solid black;') print_list.append('border-collapse: collapse;') print_list.append('}') print_list.append('</style>') elif target == "Wikipedia": print_list = ['<pre>'] GA_noms = GA_nom.GA_nom_list() for x in cat.articles(): title = x.title()[5:] params = GA_nom.GA_utils.get_GA_params(x) if params['nominator'] == None or params['page'] == None: continue edits = 0 user_o = pywikibot.User(site, params['nominator']) edits = user_o.editCount() if params['nominator'] in name_changes.keys(): user2 = pywikibot.User(site, name_changes[params['nominator']]) edits += user2.editCount() reviews = GA_nom.GA_utils.get_review_count(params['nominator'], reviews_page, name_changes) GAs = GA_nom.GA_utils.get_GA_count(params['nominator'], wbg_nominators, name_changes) if not GA_nom.GA_nom.c_filter_all(params['nominator'], params['timestamp'], int(min_nom_age), int(max_nom_age), reviews, int(min_reviews), int(max_reviews), GAs, int(min_GAs_promoted), int(max_GAs_promoted), edits, int(min_edit_count), int(max_edit_count), params['subtopic'], GAN_sections, params['status'], status_list, int(min_R_over_G), int(max_R_over_G), int(min_R_minus_G), int(max_R_minus_G)): continue nom = GA_nom.GA_nom(params['subtopic'], title, params['status'], params['page'], params['nominator'], params['timestamp'], params['note'], reviews, GAs, edits) GA_noms.add(nom) if split_by_topic: topics_in_list = GA_noms.get_topics() topics_in_list.sort() for x in topics_in_list: GA_sub_list = GA_noms.filter_by_topic(x) print_list.append(GA_sub_list.header(x,target,2)) GA_subtopics = GA_sub_list.get_subtopics() GA_subtopics.sort() for y in GA_subtopics: GA_sub2_list = GA_sub_list.filter_by_subtopic(y) print_list.append(GA_sub2_list.header(y,target,3)) print_list.append(GA_noms.table_top(target)) GA_sub2_list.noms = sorted(GA_sub2_list.noms, key=attrgetter(sort_order),reverse=False) rank = 1 for sub2_GA in GA_sub2_list.noms: print_list.append(sub2_GA.print_row(rank,target)) rank += 1 print_list.append(GA_noms.table_bottom(target)) else: print_list.append(GA_nom.GA_nom_list.table_top(target,topic=True,subtopic=True)) GA_noms.noms = sorted(GA_noms.noms, key=attrgetter(sort_order),reverse=reverse_bool) rank = 1 for GA in GA_noms.noms: print_list.append(GA.print_row(rank, target,topic=True,subtopic=True)) rank += 1 print_list.append(GA_nom.GA_nom_list.table_bottom(target)) if target == 'Web': print_list.append('</body>') print_list.append('</html>') elif target == 'Wikipedia': print_list.append('
')
page_text = '\n'.join(print_list)
return page_text