User:WatchlistBot/source.py

import catlib
import wikipedia
import codecs

# the maximum number of articles per page
MAX = 10000
# should we write to file or directly to wikipedia?
DBG = False
class Watchlist:
    
    # the name of the template used to tag articles, e.g., "Numismaticnotice"
    template = ""
    # the name of the project, e.g., "Numismatics"
    project = ""
    # the location of the article list (output) -- without prefix, so for
    # "Wikipedia:WikiProject Numismatics/Articles", use "Articles"
    articleOut = ""

    # a list for all articles
    articles = []
    # a list for all article talk pages
    articlesTalk = []
    # a list for all Wikipedia pages
    wikis = []
    # a list for all Wikipedia talk pages
    wikisTalk = []
    # a list for all templates
    templates = []
    # a list for all template talk pages
    templatesTalk = []
    # a list for all categories
    categories = []
    # a list for all category talk pages
    categoriesTalk = []
    # a list for all images
    images = []
    # a list for all image talk pages
    imagesTalk = []
    # a list for all portals
    portals = []
    # a list for all portal talk pages
    portalsTalk = []

    # certain pages need to be included explicitly (for example, if they share
    # a talk page)
    includePages = []

    def __init__(self, template, project, articleOut, includePages = []):
        self.template = template
        self.project = project
        self.articleOut = articleOut
        self.articles = []
        self.articlesTalk = []
        self.wikis = []
        self.wikisTalk = []
        self.templates = []
        self.templatesTalk = []
        self.categories = []
        self.categoriesTalk = []
        self.images = []
        self.imagesTalk = []
        self.portals = []
        self.portalsTalk = []
        self.includePages = includePages
    
    def processPageName (self, name):
        """
        Process one page name, updating the lists as appropriate.
        """
        result = name.split(":")
        if (len(result) == 1):
            self.articles.append(result[0])
            self.articlesTalk.append("Talk:"+result[0])
        elif (result[0] == "Talk"):
           self.articles.append(result[1])
           self.articlesTalk.append("Talk:"+result[1])
        elif (result[0] == "Wikipedia talk" or
              result[0] == "Wikipedia"):
           self.wikis.append("Wikipedia:"+result[1])
           self.wikisTalk.append("Wikipedia talk:"+result[1])
        elif (result[0] == "Template talk" or
              result[0] == "Template"):
           self.templates.append("Template:"+result[1])
           self.templatesTalk.append("Template talk:"+result[1])
        elif (result[0] == "Category talk" or
              result[0] == "Category"):
           self.categories.append(":Category:"+result[1])
           self.categoriesTalk.append("Category talk:"+result[1])
        elif (result[0] == "Image talk" or
              result[0] == "Image"):
           self.images.append(":Image:"+result[1])
           self.imagesTalk.append("Image talk:"+result[1])
        elif (result[0] == "Portal talk" or
              result[0] == "Portal"):
           self.portals.append("Portal:"+result[1])
           self.portalsTalk.append("Portal talk:"+result[1])

    def scanCat (self, catName, recurse):
        cat = catlib.Category(wikipedia.getSite(), catName)
        pages = cat.articles(recurse)
        for page in pages:
            self.processPageName(page.title())
        self.categories.append(":Category:"+catName)
        self.categoriesTalk.append("Category talk:"+catName)

    def removeDuplicatesAndSort (self):
        self.articles = dict.fromkeys(self.articles).keys()
        self.articles.sort()
        self.articlesTalk = dict.fromkeys(self.articlesTalk).keys()
	self.articlesTalk.sort()
        self.wikis = dict.fromkeys(self.wikis).keys()
	self.wikis.sort()
        self.wikisTalk = dict.fromkeys(self.wikisTalk).keys()
	self.wikisTalk.sort()
        self.templates = dict.fromkeys(self.templates).keys()
	self.templates.sort()
        self.templatesTalk = dict.fromkeys(self.templatesTalk).keys()
	self.templatesTalk.sort()
        self.categories = dict.fromkeys(self.categories).keys()
	self.categories.sort()
        self.categoriesTalk = dict.fromkeys(self.categoriesTalk).keys()
	self.categoriesTalk.sort()
	self.images = dict.fromkeys(self.images).keys()
	self.images.sort()
	self.imagesTalk = dict.fromkeys(self.imagesTalk).keys()
	self.imagesTalk.sort()
	self.portals = dict.fromkeys(self.portals).keys()
	self.portals.sort()
	self.portalsTalk = dict.fromkeys(self.portalsTalk).keys()
	self.portalsTalk.sort()

    def getTaggedPages (self):
        """
        Get the pages that include templateName
        Add the articles to the appropriate lists
        """
        page = wikipedia.Page(wikipedia.getSite(), "Template:" + self.template)
        refs = page.getReferences(onlyTemplateInclusion=True)
        for page in refs:
            self.processPageName(page.title())

        # include the explicitly named pages
        for page in self.includePages:
            self.processPageName(page)

        # remove duplicates and sort the lists
        self.removeDuplicatesAndSort()

    def getPagesFromTaggedCategories (self):
        page = wikipedia.Page(wikipedia.getSite(), "Template:" + self.template)
        refs = page.getReferences(onlyTemplateInclusion=True)

        # include the explicitly named pages
        articles = []
        for page in refs:
            result = page.title().split(":")
            if (result[0] == "Category talk"): # we expect this
                findArticlesInCategory("Category:" + result[1], articles)
        articles = dict.fromkeys(articles).keys()
	articles.sort()

        for page in articles:            
            self.processPageName(page)

        # remove duplicates and sort the lists
        self.removeDuplicatesAndSort()

    def writeList (self):
        """
        write the output to the specified page on Wikipedia
        """

        # the output page, without spaces
        wikipedia.output(u"Preparing output")

        output = self.project.replace(" ", "_") + "/" + \
                 self.articleOut.replace(" ", "_")
        
        totalArticles = len(self.articles) + len(self.wikis) + \
                        len(self.templates) + len(self.categories) + \
                        len(self.images) + len(self.portals)
        mainText = ""
        # double the number of articles because of talk pages
        splitting = (totalArticles*2 > MAX)
        if (splitting):
            mainText += "There are too many articles in this project to list " + \
                        "them all on one page. This article contains the first " + \
                        str(MAX) + " articles and links to other articles which " + \
                        "contain "
        else:
            mainText += "This article contains "
        mainText += "links to all articles, categories, images, portal pages " + \
                    "templates, and project pages with {{tl|" + self.template + "}} " + \
                    "on their talk page. It was generated by [[User:WatchlistBot|" + \
                    "WatchlistBot]]. Its purpose is to be able to track " + \
                    "the project history using ''[[Special:Recentchangeslinked/" + \
                    "Wikipedia:WikiProject " + output + \
                    "|related changes]]'' or ''[http://tools.wikimedia.de/~interiot/" + \
                    "cgi-bin/offtoolserver/RC_firstonly?url=http%3A%2F%2Fen.wikipedia.org" + \
                    "%2Fw%2Findex.php%3Ftitle%3DSpecial%3ARecentchangeslinked%26target" + \
                    "%3DWikipedia:WikiProject_" + output + \
                    "%26hideminor%3D0%26days%3D7%26limit%3D500 related watchlist]'' which " + \
                    "only shows the last change for each article.\n" + \
                    "\n"
        
        mainText += "==Regular content (count: " + str(totalArticles) + ")==\n"

        # the number of articles listed on this page
        count = 0
        # the page number
        pageNo = 1
        # the text for this subpage (if no subpages, will just be on the main
        # page)
        mainText += "===Articles (count: " + str(len(self.articles)) + ")===\n"
        prevChar = firstChar = "Z" #initialize to anything but A
        subText = ""
        # make sure the first batch of articles goes to the main page
        firstBatch = True
        for s in self.articles:
            if (s[0] != prevChar):
                subText += "====" + s[0] + "====\n"
                prevChar = s[0]
                if (count == 0):
                    firstChar = prevChar
            subText += "*[[" + s + "]]\n"
            count = count+1
            if (count > MAX):
                count = 0
                if (firstBatch):
                    firstBatch = False
                    mainText += subText
                else:
                    mainText += "*[[/Page" + str(pageNo) + "|" + \
                                firstChar + "-" + prevChar + "]]\n"
                    subText = subText.replace("<range>", firstChar + " through " + \
                                              prevChar)
                    self.writeProjPage(self.articleOut + "/Page" + str(pageNo),
                                       subText)
                    pageNo = pageNo+1
                firstChar = prevChar
                subText = "===Articles <range>===\n" + \
                          "====" + prevChar + "====\n"
        if (splitting and not firstBatch):
            mainText += "*[[/Page" + str(pageNo) + "|" + \
                        firstChar + " through " + prevChar + "]]\n"
            subText = subText.replace("<range>", firstChar + " through " + prevChar)
            self.writeProjPage(self.articleOut + "/Page" + str(pageNo), subText)
            pageNo = pageNo+1
        else:
            mainText += subText
            
        mainText += "===Wikipedia (count: " + str(len(self.wikis)) + ")===\n"
        if (splitting):
            subText = "This article contains links to templates, categories, portals, " + \
                      "and images with {{tl|" + self.template + "}} " + \
                      "on their talk page. It was generated by [[User:WatchlistBot|" + \
                      "WatchlistBot]]. Its purpose is to be able to track " + \
                      "the project history using ''[[Special:Recentchangeslinked/" + \
                      "Wikipedia:WikiProject " + output + \
                      "/Page" + str(pageNo) + "|related changes]]'' or ''[http://tools.wikimedia.de/~interiot/" + \
                      "cgi-bin/offtoolserver/RC_firstonly?url=http%3A%2F%2Fen.wikipedia.org" + \
                      "%2Fw%2Findex.php%3Ftitle%3DSpecial%3ARecentchangeslinked%26target" + \
                      "%3DWikipedia:WikiProject_" + output + \
                      "/Page" + str(pageNo) + "%26hideminor%3D0%26days%3D7%26limit%3D500 related watchlist]'' which " + \
                      "only shows the last change for each article.\n" + \
                      "\n" + \
                      "===Wikipedia===\n"
            mainText += "*[[/Page" + str(pageNo) + "#Wikipedia|Wikipedia]]\n"
        else:
            subText = ""
        for s in self.wikis:
            subText += "*[[" + s + "]]\n"
        if (not splitting):
            mainText += subText
            subText = ""
            
        mainText += "===Templates (count: " + str(len(self.templates)) + ")===\n"
        if (splitting):
            subText += "===Templates===\n"
            mainText += "*[[/Page" + str(pageNo) + "#Templates|Templates]]\n"
        for s in self.templates:
            subText += "*[[" + s + "]]\n"
        if (not splitting):
            mainText += subText
            subText = ""
            
        mainText += "===Categories (count: " + str(len(self.categories)) + ")===\n"
        if (splitting):
            subText += "===Categories===\n"
            mainText += "*[[/Page" + str(pageNo) + "#Categories|Categories]]\n"
        for s in self.categories:
            subText += "*[[" + s + "]]\n"
        if (not splitting):
            mainText += subText
            subText = ""
    
        mainText += "===Portals (count: " + str(len(self.portals)) + ")===\n"
        if (splitting):
            subText += "===Portals===\n"
            mainText += "*[[/Page" + str(pageNo) + "#Portals|Portals]]\n"
        for s in self.portals:
            subText += "*[[" + s + "]]\n"
        if (not splitting):
            mainText += subText
            subText = ""

        mainText += "===Images (count: " + str(len(self.images)) + ")===\n"
        if (splitting):
            subText += "===Images===\n"
            mainText += "*[[/Page" + str(pageNo) + "#Images|Images]]\n"
        for s in self.images:
            subText += "*[[" + s + "]]\n"
        if (not splitting):
            mainText += subText
            subText = ""

        if (splitting):
            self.writeProjPage(self.articleOut + "/Page" + str(pageNo), subText)
            pageNo = pageNo+1


        mainText += "==Talk pages==\n"
        
        mainText += "===Articles===\n"
        prevChar = firstChar = "Z" #initialize to anything but A
        if (splitting):
            subText = "This article contains links to some talk pages with " + \
                      "{{tl|" + self.template + "}} " + \
                      "on their talk page. It was generated by [[User:WatchlistBot|" + \
                      "WatchlistBot]]. Its purpose is to be able to track " + \
                      "the project history using ''[[Special:Recentchangeslinked/" + \
                      "Wikipedia:WikiProject " + output + \
                      "/Page" + str(pageNo) + "|related changes]]'' or ''[http://tools.wikimedia.de/~interiot/" + \
                      "cgi-bin/offtoolserver/RC_firstonly?url=http%3A%2F%2Fen.wikipedia.org" + \
                      "%2Fw%2Findex.php%3Ftitle%3DSpecial%3ARecentchangeslinked%26target" + \
                      "%3DWikipedia:WikiProject_" + output + \
                      "/Page" + str(pageNo) + "%26hideminor%3D0%26days%3D7%26limit%3D500 related watchlist]'' which " + \
                      "only shows the last change for each article.\n" + \
                      "\n" + \
                      "===Articles <range>===\n"
        else:
            subText = ""
        count = 0
        for s in self.articlesTalk:
            if (count == 0):
                firstChar = s.split(":")[1][0]
            subText += "*[[" + s + "]]\n"
            count = count+1
            if (count > MAX):
                count = 0
                endChar = s.split(":")[1][0]
                mainText += "*[[/Page" + str(pageNo) + "|" + \
                            firstChar + "-" + endChar + "]]\n"
                subText = subText.replace("<range>", firstChar + " through " + \
                                          endChar)
                self.writeProjPage(self.articleOut + "/Page" + str(pageNo), subText)
                pageNo = pageNo+1
                firstChar = endChar
                subText = "===Articles <range>===\n"
        if (splitting):
            endChar = s.split(":")[1][0]
            mainText += "*[[/Page" + str(pageNo) + "|" + \
                        firstChar + " through " + endChar + "]]\n"
            subText = subText.replace("<range>", firstChar + " through " + endChar)
            self.writeProjPage(self.articleOut + "/Page" + str(pageNo), subText)
            pageNo = pageNo+1
        else:
            mainText += subText

        mainText += "===Wikipedia===\n"
        if (splitting):
            subText = "This article contains links to some talk pages with " + \
                      "{{tl|" + self.template + "}} " + \
                      "on their talk page. It was generated by [[User:WatchlistBot|" + \
                      "WatchlistBot]]. Its purpose is to be able to track " + \
                      "the project history using ''[[Special:Recentchangeslinked/" + \
                      "Wikipedia:WikiProject " + output + \
                      "/Page" + str(pageNo) + "|related changes]]'' or ''[http://tools.wikimedia.de/~interiot/" + \
                      "cgi-bin/offtoolserver/RC_firstonly?url=http%3A%2F%2Fen.wikipedia.org" + \
                      "%2Fw%2Findex.php%3Ftitle%3DSpecial%3ARecentchangeslinked%26target" + \
                      "%3DWikipedia:WikiProject_" + output + \
                      "/Page" + str(pageNo) + "%26hideminor%3D0%26days%3D7%26limit%3D500 related watchlist]'' which " + \
                      "only shows the last change for each article.\n" + \
                      "\n" + \
                      "===Wikipedia===\n"
            mainText += "*[[/Page" + str(pageNo) + "#Wikipedia|Wikipedia]]\n"
        else:
            subText = ""
        for s in self.wikisTalk:
            subText += "*[[" + s + "]]\n"
        if (not splitting):
            mainText += subText
            subText = ""
            
        mainText += "===Templates===\n"
        if (splitting):
            subText += "===Templates===\n"
            mainText += "*[[/Page" + str(pageNo) + "#Templates|Templates]]\n"
        for s in self.templatesTalk:
            subText += "*[[" + s + "]]\n"
        if (not splitting):
            mainText += subText
            subText = ""
            
        mainText += "===Categories===\n"
        if (splitting):
            subText += "===Categories===\n"
            mainText += "*[[/Page" + str(pageNo) + "#Categories|Categories]]\n"
        for s in self.categoriesTalk:
            subText += "*[[" + s + "]]\n"
        if (not splitting):
            mainText += subText
            subText = ""
    
        mainText += "===Portals===\n"
        if (splitting):
            subText += "===Portals===\n"
            mainText += "*[[/Page" + str(pageNo) + "#Portals|Portals]]\n"
        for s in self.portalsTalk:
            subText += "*[[" + s + "]]\n"
        if (not splitting):
            mainText += subText
            subText = ""

        mainText += "===Images===\n"
        if (splitting):
            subText += "===Images===\n"
            mainText += "*[[/Page" + str(pageNo) + "#Images|Images]]\n"
        for s in self.imagesTalk:
            subText += "*[[" + s + "]]\n"
        if (not splitting):
            mainText += subText
            subText = ""

        if (splitting):
            self.writeProjPage(self.articleOut + "/Page" + str(pageNo), subText)
            pageNo = pageNo+1
        else:
            mainText += subText

        self.writeProjPage(self.articleOut, mainText)

    def writeProjPage (self, pageName, text):
        pageName = "Wikipedia:WikiProject " + self.project + "/" + pageName
        comment = "full update by [[User:WatchlistBot|WatchlistBot]]"
        page = wikipedia.Page(wikipedia.getSite(), pageName)
        writePage(page, text, comment)

def getExcludedArticles (project):
    """
    get the list of pages which should not be tagged even though they're in
    tagged categories
    """
    print "getExcludedArticles"
    page = wikipedia.Page(wikipedia.getSite(), "User:WatchlistBot/" + project)
    if (page.exists()):
        text = page.get()
        # find the "----" the list of articles is below the line
        start = text.find("----\n")
        result = text[start+4:].split("[[")
        pages = []
        for page in result:
            end = page.find("]]")
            if (end != -1):
                pages.append(getTalkVersion(page[:end]))
        print pages
        return pages
    return []

def getTalkVersion (name):
    """
    given a page name, convert it to the associated talk page
    """
    result = name.split(":")
    if (len(result) == 1):
        return "Talk:"+name
    if (result[0].find("Talk") != -1 or
        result[0].find("talk") != -1):
        return name
    return result[0] + " talk:" + result[1]

def writePage (page, text, comment):
    if (not DBG):
        page.put(text, comment, minorEdit=False)
    else:
        pageName = page.title()
        start = pageName.find("/");
        if (start != -1):
            pageName = pageName[start+1:]
        start = pageName.find(":");
        if (start != -1):
            pageName = pageName[start+1:]
##            page = wikipedia.Page(wikipedia.getSite(),
##                                  "User:mom2jandk/" + pageName)
##            page.put(text, comment, minorEdit=False)
        wikipedia.output(u"Writing file " + pageName + u".txt")
        f = codecs.open(pageName + ".txt", mode="w", encoding="utf8")
        f.write(text)
        f.close()

def untagPage (pageName, tag):
    """
    remove the tag from the given talk page, if it is there
    """
    page = wikipedia.Page(wikipedia.getSite(), pageName)
    if page.exists():
        if not page.isRedirectPage():
            text = page.get()
            if (text.find("{{"+tag+"))") == -1):
                wikipedia.output("Page " + page.title() + " not tagged")
            else:
                text = wikipedia.replaceExceptMathNowikiAndComments(text, "{{"+text+"}}", "")
                writePage(page, text, "Removing " + tag)
                    
def tagPage (pageName, tag):
    """
    tag the given talk page with the tag
    """
    # get the talk page
    page = wikipedia.Page(wikipedia.getSite(), pageName)
    if page.exists():
        if not page.isRedirectPage():
            text = page.get()
            tagIt(page, text, tag)
        else:
            wikipedia.output("Page " + page.title() + " is a redirect")
    else:
        # we don't mind if the page doesn't exist yet, just create it
        tagIt(page, "", tag)

def tagIt (page, text, tag):
    text = "{{" + tag + "}}\n\n" + text
    writePage(page, text, "Adding " + tag)

def findArticlesInCategory (catName, articles, confirm = False):
    """
    find all the articles in the given category, and return a list
    If confirm is true, check each article with the user
    articles is the list so far
    """

    # get the category (don't tag it, since that's already been done separately)
    cat = catlib.Category(wikipedia.getSite(), catName)

    # tag all pages in this category
    pages = cat.articles()
    for page in pages:
        # if confirming, check
        if (confirm):
            response = wikipedia.input(u"Do you want to tag " + page.title() + u"? (y for yes)")
        if (page == "Width="):
            response = "n"
        if (not confirm or response == "y"):    
            # add the appropriate prefix
            if (page.namespace() == 10): # template
                articles.append("Template talk:" + page.titleWithoutNamespace())
            elif (page.namespace() == 0): # article
                articles.append("Talk:" + page.title())
            elif (page.namespace() == 6): # image
                articles.append("Image talk:" + page.titleWithoutNamespace())
            elif (page.namespace() == 100): # portal
                articles.append("Portal talk:" + page.titleWithoutNamespace())
            elif (page.namespace() == 4): # wikipedia
                articles.append("Wikipedia talk:" + page.titleWithoutNamespace())
            elif (page.namespace() == 1 or
                  page.namespace() == 5 or
                  page.namespace() == 7 or
                  page.namespace() == 11 or
                  page.namespace() == 101 or
                  page.namespace() == 15):
                articles.append(page.title())
            elif (page.namespace() == 2 or
                  page.namespace() == 3): # ignore user and user talk
                # dummy command
                x = 1
            else:
                print "Unexpected namespace on " + page.title() + ": " + str(page.namespace())
    #remove duplicates
    articles = dict.fromkeys(articles).keys()
	            

def updateCategoryList (catList, catName, keywords):
    """
    add the given category to the given category list
    ask the user first, and allow the user the choice to recurse
    through subcategories
    keywords are words that if they're in the category, it will be tagged
    without confirmation
    """
    
    cat = catlib.Category(wikipedia.getSite(), "Category:" + catName)
    response = "n"
    if (catName not in catList):
        for keyword in keywords:
            if (keyword in catName):
                response = "y"
            
        # if we haven't found a keyword, ask the user
        if (response == "n"):
            response = wikipedia.input(u"Do you want to tag " + cat.title() + u"? (y for yes, yn for yes but no recursion)")

        # add the category to the list
        if (response == "y" or response == "yn"):
            catList.append(cat.titleWithoutNamespace())
        
        # recurse through subcategories
        if (response == "y"):
            subcats = cat.subcategories()
            for subcat in subcats:
                updateCategoryList(catList, subcat.titleWithoutNamespace(),
                                   keywords)

def tagCategories (catName, tag, keywords):
    """
    tag all categories in the specified category and subcategories with the
    specified tag (at the top of the page)
    check with the user for each category
    keywords are words that if they're in the category, it will be tagged
    without confirmation
    """
    wikipedia.put_throttle.setDelay(10, absolute = True)

    # get the category list
    catList = []
    updateCategoryList(catList, catName, keywords)
    # remove duplicates and sort
    catList = dict.fromkeys(catList).keys()
    catList.sort()
    for cat in catList:
        tagPage("Category talk:" + cat, tag)
    return catList

def getTagged (tag, catList, articles):
    """
    get a list of categories and articles which contain the specified tag
    """
    page = wikipedia.Page(wikipedia.getSite(), "Template:" + tag)
    refs = page.getReferences(onlyTemplateInclusion=True)

    for page in refs:
        name = page.title()
        result = name.split(":")
        if (result[0] == "Category talk"):
            catList.append("Category:"+result[1])
        else:
            articles.append(name)

def untag (catList = [], tag = "Numismaticnotice"):
    """
    remove the tag from all articles in the specified categories
    this is useful when the bot makes a mistake
    """
    articles = []
    for catName in catList:
        findArticlesInCategory("Category:"+catName, articles, False)
    articles = dict.fromkeys(articles).keys()
    articles.sort()
    for article in articles:
        untagPage(article, tag)
    wikipedia.stopme()

def addNA (catName="Unassessed numismatic articles", tag="Numismaticnotice"):
    """
    add "class=NA" to all non-article content which is not currently assessed
    """
    articles = []
    findArticlesInCategory("Category:"+catName, articles, False)
    
    for article in articles:
        # if this is not in the main project namespace
        if (article.find("Talk:") == -1):
            page = wikipedia.Page(wikipedia.getSite(), article)
            text = page.get()
            text = wikipedia.replaceExceptMathNowikiAndComments(
                text, "{{"+tag+"}}", "{{"+tag+"|class=NA}}")
            writePage(page, text, "Assessment, class=NA")

def tag (tag = "Numismaticnotice", otherTag = "Exonumianotice",
         project = "Numismatics", confirm=False, catList = []):
    """
    tag articles in tagged categories
    if a page is already tagged with otherTag, skip it
    """

    taggedArticles = []
    if (len(catList) == 0):
        getTagged(tag, catList, taggedArticles)
    else:
        dummy = []
        getTagged(tag, dummy, taggedArticles)

    # add the articles tagged with otherTag to the list of taggedArticles
    getTagged(otherTag, [], taggedArticles)

    untaggedArticles = []
    for cat in catList:
        findArticlesInCategory(cat, untaggedArticles, confirm)
    # remove duplicates and sort
    untaggedArticles = dict.fromkeys(untaggedArticles).keys()
    untaggedArticles.sort()

    # make a list of articles that need to be tagged (by removing articles
    # that are already tagged from list of all articles)
    for article in taggedArticles:
        if (article in untaggedArticles):
            untaggedArticles.remove(article)

    # remove excluded articles
    excluded = getExcludedArticles(project)
    for page in excluded:
        if (page in untaggedArticles):
            untaggedArticles.remove(page)



    # pywikipedia bug
    if ("Talk:Width=" in untaggedArticles):
        untaggedArticles.remove("Talk:Width=")

    if (len(untaggedArticles) == 0):
        wikipedia.output(u"No untagged articles")

    # tag the articles
    for article in untaggedArticles:
        tagPage(article, tag)

    wikipedia.stopme()

def update ():
    """
    update the project watchlists
    """
    templates = ["Numismaticnotice", "Exonumianotice", "WikiProject Hawaii",
                 "WikiProject Texas", "Ice hockey", "LouisvilleWikiProject",
                 "WikiProject Kentucky", "AutomobileWatch", "WikiProject Cricket"]
    projects = ["Numismatics", "Numismatics", "Hawaii", "Texas", "Ice Hockey",
                "Louisville", "Kentucky", "Automobiles", "Cricket"]
    articleOuts = ["Articles", "Exonumia articles", "Hawaii recent changes",
                   "Articles", "Articles", "Watchall", "Watchall", "Articles",
                   "Watchlist"]
    # pages to include even though they aren't tagged
    includePagesLists = [["Template:AfricanCurrencies", "Template:AmericanCurrencies",
                          "Template:AsianCurrencies", "Template:EuropeanCurrencies"],
                         [], [], [], [], [], [], [], []]
    # true if we're getting tagged articles, false if we're getting articles
    # in tagged categories
    taggedPagesFlags = [True, True, True, True, True, True, True, False, False]
    
    for i in range(len(templates)):
#    for i in [7]:
        template, project = templates[i], projects[i]
        articleOut, includePagesList = articleOuts[i], includePagesLists[i]
        taggedPagesFlag = taggedPagesFlags[i]
        print "Updating watchlist for: %s using template: %s. Saving to: %s" \
              % (project, template, articleOut)
        wl = Watchlist(template, project, articleOut, includePagesList)
        if (taggedPagesFlag):
            wl.getTaggedPages()
        else:
            wl.getPagesFromTaggedCategories()
        wl.writeList()

    wikipedia.stopme()