User:MastCell/dermimages.py
Appearance
# File: Dermimages.py # By MastCell # Released for any and all reuse and modification # Use at your own risk. # ======================== # This script does the following: # 1. Load all articles linked to the Dermatology Task Force, # using Category:Dermatology task force articles. # 2. Check each page for images, removing those which are part of # common templates # 3. Output the results in a sortable wikitable which can be # cut-and-pasted onto Wikipedia. # # The goal is to assess how many dermatology-related articles currently # lack images, and to assess the overall prevalence of images across # derm-related articles. # ========================= # MWclient module for Wikimedia API calls import mwclient # Global set of image names to exclude # (include images from templates, featured article stars, etc # which should not be counted as "content" images) # Modify as needed. global_exclude_list =\ set(["Normal Epidermis and Dermis with Intradermal Nevus 10x.JPG",\ "LinkFA-star.png",\ "Featured article star.svg",\ "Symbol support vote.svg",\ "Rod of Asclepius2.svg",\ "Mergefrom.svg"]) # Open the site and collect pages from category # (Note that these will generally be article talk pages, since that's # where the Derm task force template is typically placed) wpHandle = mwclient.Site('en.wikipedia.org') dermTalkPages = wpHandle.Pages['Category:Dermatology task force articles'] # Main program loop: load and process each page def main_program(): setUpTable() for page in dermTalkPages: # Make sure we're dealing with the article page, rather than talk page page = wpHandle.Pages[page.page_title] # Load images and process them imageHandle = page.images imageList = imageHandle() processPage(page, imageList) closeTable() # Page processing function def processPage(page, imageList): outputFile.write("|-\n") outputFile.write("| [[") outputFile.write(page.name.encode("iso-8859-15", "xmlcharrefreplace")) outputFile.write("]] ||") imageCount = 0 for image in imageList: if (image.page_title not in global_exclude_list): if (imageCount > 0): outputFile.write("<br>\n") imageCount += 1 outputFile.write('[[:' + image.name + ']]') outputFile.write(' || ' + str(imageCount) + "\n") # Output the table header boilerplate def setUpTable(): outputFile.write('{| class="wikitable sortable" border="1"') outputFile.write('! Page !! class="unsortable" | Images !! Number of images') # Output the table footer boilerplate def closeTable(): outputFile.write('|}') ######################################## # Main program # ------------ # Opens a handle to the output file, then runs the main loop ######################################## with open('dermimages_output.txt', 'w') as outputFile: main_program()