Jump to content

User:MastCell/dermimages.py

From Wikipedia, the free encyclopedia
This is an old revision of this page, as edited by MastCell (talk | contribs) at 00:05, 11 May 2010 (create). The present address (URL) is a permanent link to this revision, which may differ significantly from the current revision.
(diff) ← Previous revision | Latest revision (diff) | Newer revision → (diff)
# File: Dermimages.py
# By MastCell
# Released for any and all reuse and modification
# Use at your own risk.
# ========================
# This script does the following:
#  1. Load all articles linked to the Dermatology Task Force,
#     using Category:Dermatology task force articles.
#  2. Check each page for images, removing those which are part of
#     common templates
#  3. Output the results in a sortable wikitable which can be
#     cut-and-pasted onto Wikipedia.
#
# The goal is to assess how many dermatology-related articles currently
# lack images, and to assess the overall prevalence of images across
# derm-related articles.
# =========================

# MWclient module for Wikimedia API calls
import mwclient


# Global set of image names to exclude
# (include images from templates, featured article stars, etc
#  which should not be counted as "content" images)
# Modify as needed.
global_exclude_list =\
    set(["Normal Epidermis and Dermis with Intradermal Nevus 10x.JPG",\
        "LinkFA-star.png",\
        "Featured article star.svg",\
        "Symbol support vote.svg",\
        "Rod of Asclepius2.svg",\
        "Mergefrom.svg"])


# Open the site and collect pages from category
# (Note that these will generally be article talk pages, since that's
#  where the Derm task force template is typically placed)
wpHandle = mwclient.Site('en.wikipedia.org')
dermTalkPages = wpHandle.Pages['Category:Dermatology task force articles']


# Main program loop: load and process each page
def main_program():
    setUpTable()
    for page in dermTalkPages:
        # Make sure we're dealing with the article page, rather than talk page
        page = wpHandle.Pages[page.page_title]
        
        # Load images and process them
        imageHandle = page.images
        imageList = imageHandle()
        processPage(page, imageList)
    closeTable()
    
# Page processing function
def processPage(page, imageList):
    outputFile.write("|-\n")
    outputFile.write("| [[")
    outputFile.write(page.name.encode("iso-8859-15", "xmlcharrefreplace"))
    outputFile.write("]] ||")

    imageCount = 0
    for image in imageList:
        if (image.page_title not in global_exclude_list):
            if (imageCount > 0):
                outputFile.write("<br>\n")
            imageCount += 1
            outputFile.write('[[:' + image.name + ']]')
    outputFile.write(' || ' + str(imageCount) + "\n")


# Output the table header boilerplate
def setUpTable():
    outputFile.write('{| class="wikitable sortable" border="1"')
    outputFile.write('! Page !! class="unsortable" | Images !! Number of images')


# Output the table footer boilerplate
def closeTable():
    outputFile.write('|}')
    
    
########################################
# Main program
# ------------
# Opens a handle to the output file, then runs the main loop
########################################
with open('dermimages_output.txt', 'w') as outputFile:
    main_program()