Jump to content

User:WatchlistBot/source.java

From Wikipedia, the free encyclopedia
The printable version is no longer supported and may have rendering errors. Please update your browser bookmarks and please use the default browser print function instead.

WatchlistBot.java

class WatchlistBot {
    public static void main (String[] args) throws Exception {
    	WikiSessionManager sessionMgr = new WikiSessionManager();
    	sessionMgr.userLogin(Private.username, Private.password);

    	// numismatics
    	String[] includePages = {"Template:Currencies of Africa",
    	                         "Template:Currencies of Asia",
    	                         "Template:Currencies of Europe",
    	                         "Template:Currencies of Oceania",
    	                         "Template:Currencies of the Americas"};
    	Project project = new Project(sessionMgr, "Numismatics", "Numismaticnotice",
    								  "Articles", includePages);
    	project.updateWatchlist(true);
    	    
    	// exonumia
    	includePages = new String[0];
    	project = new Project(sessionMgr, "Numismatics", "Exonumianotice", "Exonumia articles", includePages);
    	project.updateWatchlist(true);
    	
    	// Hawaii
    	project = new Project(sessionMgr, "Hawaii", "WPHawaii", "Hawaii recent changes", includePages);
    	project.updateWatchlist(true);
    	
    	// Texas
    	project = new Project(sessionMgr, "Texas", "WikiProject Texas", "Articles", includePages);
    	project.updateWatchlist(true);
    	
    	// Ice Hockey
    	project = new Project(sessionMgr, "Ice Hockey", "Ice hockey", "Articles", includePages);
    	project.updateWatchlist(true);
    	
    	// Louisville
    	project = new Project(sessionMgr, "Louisville", "WikiProject Louisville", "Watchall", includePages);
    	project.updateWatchlist(true);
    	
    	// Kentucky
    	project = new Project(sessionMgr, "Kentucky", "WikiProject Kentucky", "Watchall", includePages);
    	project.updateWatchlist(true);
    	
    	// Texas State Highways
    	project = new Project(sessionMgr, "Texas State Highways", "Texas State Highway WikiProject",
    						  "Watchlist", includePages);
    	project.updateWatchlist(true);
    	
    	// Dallas
    	project = new Project(sessionMgr, "Dallas", "WikiProject Dallas", "Articles", includePages);
    	project.updateWatchlist(true);
    	
    	// Comics
    	project = new Project(sessionMgr, "Comics", "comicsproj", "Articles", includePages);
    	project.updateWatchlist(true);
    	
    	// Pittsburgh
    	project = new Project(sessionMgr, "Pittsburgh", "PittsburghWikiProject", "Articles", includePages);
    	project.updateWatchlist(true);
    	
    	// Baseball
    	project = new Project(sessionMgr, "Baseball", "Baseball-WikiProject", "Articles", includePages);
    	project.updateWatchlist(true);
    	
    	// Bell Systems
    	project = new Project(sessionMgr, "Bell Systems", "WikiProject Bell System", "Articles", includePages);
    	project.updateWatchlist(true);
    	
    	// LGBT studies
    	project = new Project(sessionMgr, "LGBT studies", "LGBTProject", "Articles", includePages);
    	project.updateWatchlist(true);
    	
    	// San Francisco Bay Area
    	project = new Project(sessionMgr, "San Francisco Bay Area", "SFBAProject", "Watchlist", includePages);
    	project.updateWatchlist(true);
    	
    	// Africa
    	project = new Project(sessionMgr, "Africa", "AfricaProject", "Watchlist", includePages);
    	project.updateWatchlist(true);
    	
    	// Electronics
    	project = new Project(sessionMgr, "Electronics", "Electron", "Articles", includePages);
    	project.updateWatchlist(true);
    	
    	// Tennessee
    	project = new Project(sessionMgr, "Tennessee", "WikiProject Tennessee", "Articles", includePages);
    	project.updateWatchlist(true);
    	
    	// Hong Kong
    	project = new Project(sessionMgr, "Hong Kong", "WikiProject Hong Kong", "Articles", includePages);
    	project.updateWatchlist(true);
   	
    	// Films
    	project = new Project(sessionMgr, "Films", "Film", "Articles", includePages);
    	project.updateWatchlist(true);

    	// Automobiles
    	project = new Project(sessionMgr, "Automobiles", "AutomobileWatch", "Articles", includePages);
    	project.updateWatchlist(false);
    	
    	// Cricket
    	project = new Project(sessionMgr, "Cricket", "CricketWatch", "Articles", includePages);
    	project.updateWatchlist(false);
    	
    	System.out.println("finished");
        sessionMgr.userLogout();
   	}
}

WikiSessionManager.java

import java.io.IOException;
import java.io.OutputStreamWriter;
import java.util.Arrays;
import java.net.URL;
import java.net.URLEncoder;
import java.net.URLConnection;
 
    /**
     * WikiSessionManager is a utility class that logs into the English
     * Wikipedia and facilitates making HTTP requests with cookies.
     *
     * This program is free software; you can redistribute it and/or modify
     * it under the terms of the GNU General Public License as published by
     * the Free Software Foundation; either version 2 of the License, or
     * (at your option) any later version.
     *
     * This program is distributed in the hope that it will be useful,
     * but WITHOUT ANY WARRANTY; without even the implied warranty of
     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     * GNU General Public License for more details.
     *
     * You should have received a copy of the GNU General Public License
     * along with this program; if not, write to the Free Software
     * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
     * 
     * @author Gracenotes
     * @version 0.1
     **/
 
public class WikiSessionManager
{
    private String cookie, sessionData, username;
    private boolean loggedIn;
 
    public WikiSessionManager()
    {
        this.loggedIn = false;
        this.sessionData = "";
        this.cookie = "";
    }
 
    public void userLogin(String username, char[] password) throws IOException
    {
        username = username.trim();
        if (username.length() == 0 || password.length == 0) throw new IllegalArgumentException("Blank parameter");
 
        URL url = new URL("http://en.wikipedia.org/w/api.php");
        URLConnection connection = url.openConnection();
 
        connection.setDoOutput(true);
        connection.setUseCaches(false);
        connection.setRequestProperty("Content-Type", "application/x-www-form-urlencoded");
        connection.connect();
        OutputStreamWriter output = new OutputStreamWriter(connection.getOutputStream(), "UTF-8");
 
        output.write("action=login" +
                     "&lgname=" + URLEncoder.encode(username, "UTF-8") +
                     "&lgpassword=" + URLEncoder.encode(new String(password).trim(), "UTF-8"));
        output.flush();
        output.close();
 
        Arrays.fill(password, ' ');
 
        String headerName;
        StringBuffer receivedCookie = new StringBuffer();
        int i = 0;
        while ((headerName = connection.getHeaderFieldKey(++i)) != null)
        {
            headerName = connection.getHeaderFieldKey(i);
            if (headerName != null && headerName.equalsIgnoreCase("Set-Cookie"))
            {
                receivedCookie.append("; " + connection.getHeaderField(i).split(";")[0]);
            }
        }
        receivedCookie.delete(0, 2);
        this.cookie = receivedCookie.toString();
        this.loggedIn = this.cookie.indexOf("Token=") != -1;
        this.username = this.loggedIn ? username : null;
        
        // IB edit (get the session data)
        url = new URL("http://en.wikipedia.org/w/index.php?title=Wikipedia:Sandbox&action=edit");
        connection = url.openConnection();
        addCookies(connection);
        connection.connect();
        if (!findSessionData(connection)) {
        	throw new IOException("Could not load session data");
        }
        // end IB edit
     }
 
    public void userLogout() throws IOException
    {
        if (!this.loggedIn)
            return;
        URL url = new URL("http://en.wikipedia.org/w/index.php?title=Special:Userlogout");
        URLConnection connection = url.openConnection();
        this.addCookies(connection);
        connection.connect();
 
        this.loggedIn = false;
        this.cookie = "";
        this.sessionData = "";
    }
 
    /**
     * Indicates whether a user is logged in or not
     * 
     * @return A boolean showing whether a user is logged in or not
     */
    public boolean isLoggedIn()
    {
        return this.loggedIn;
    }
 
    public void addCookies(URLConnection connection)
    {
        if (!this.loggedIn)
            return;
        connection.setRequestProperty("Cookie", this.cookie +
                                      (this.sessionData != null ? "; " + this.sessionData : ""));
        connection.setRequestProperty("User-Agent", this.username);
    }
 
    public boolean findSessionData(URLConnection connection)
    {
        sessionData = "";
        String headerName;
        int i = 0;
        while ((headerName = connection.getHeaderFieldKey(++i)) != null)
        {
            if (headerName.equals("Set-Cookie") && connection.getHeaderField(i).indexOf("_session") != -1)
                this.sessionData = connection.getHeaderField(i).split(";")[0];
        }
 
        return this.sessionData.length() != 0;
    }
}

Project.java

import java.io.*;
import java.net.*;

public class Project {
	/** are we debugging (sends output to file instead of wikipedia) **/
	final static boolean DBG = false;
	
	/** the watchlist **/
	private Watchlist watchlist;
	
	/** the name of the project (without Wikipedia:WikiProject) **/
	private String projectName;
	
	/** the session manager (controls logging in, communication w/ wikipedia) **/
	private WikiSessionManager sessionMgr;
	
	Project (WikiSessionManager sessionMgr, String projectName, String template,
			 String articlePage, String[] includePages) {
		this.sessionMgr = sessionMgr;
		this.projectName = projectName;
		this.watchlist = new Watchlist(projectName, articlePage, template, 
									   sessionMgr, includePages, this);
	}
	
	/** update the watchlist
	 * @param useTaggedPages are we inluding tagged pages (true), or all pages in
	 *        tagged categories (false)
	 **/
	void updateWatchlist (boolean useTaggedPages) throws UnsupportedEncodingException,
			IOException, MalformedURLException {
		watchlist.update(useTaggedPages);
		watchlist.write();
	}
	
	/** write a page in the project
	 * @param subPageName the name of the subpage
	 * @param text the text to write
	 */
	void writePage (String subPageName, String text) {
		try {
			if (DBG) {
				subPageName = subPageName.replaceAll("/", "_");
				FileWriter file = new FileWriter(subPageName + ".txt");
				file.write(text);
				file.close();
			} else {
				String pageName = "Wikipedia:WikiProject " + projectName + "/" + subPageName;
				String comment = "full update by [[User:WatchlistBot|WatchlistBot]]";
				Page page = new Page(sessionMgr, pageName);
				page.put(text, comment, false);
			}
		} catch (Exception e) {
			System.out.println(e);
		}
	}
}

Watchlist.java

import java.util.*;
import java.io.*;
import java.net.*;

public class Watchlist {
	/** the project **/
	private Project project;
	
	/** the template name (without namespace) **/
	private String template;
	
	/** the session manager **/
	private WikiSessionManager sessionMgr;
	
	/** does this watchlist use tagged pages (as opposed to pages in a category list **/
	private boolean taggedPages = true;

	/** pages which should be included in the project even though they're not tagged
	 * (maybe because they share a talk page)
	 **/
	private String[] includePages;
	
	/** the name of the project (without Wikipedia:WikiProject) **/
	private String projectName;
	
	/** the name of the page where the article list goes **/
	private String articlePage;
	
	/** the article pages **/
	private TreeSet<String> articles;
	/** the article talk pages **/
	private TreeSet<String> articlesTalk;
	/** the wikipedia pages **/
	private TreeSet<String> wikis;
	/** the wikipedia talk pages **/
	private TreeSet<String> wikisTalk;
	/** the template pages **/
	private TreeSet<String> templates;
	/** the template talk pages **/
	private TreeSet<String> templatesTalk;
	/** the category pages **/
	private TreeSet<String> categories;
	/** the category talk pages **/
	private TreeSet<String> categoriesTalk;
	/** the image pages **/
	private TreeSet<String> images;
	/** the image talk pages **/
	private TreeSet<String> imagesTalk;
	/** the portal pages **/
	private TreeSet<String> portals;
	/** the portal talk pages **/
	private TreeSet<String> portalsTalk;
	
	/** the maximum number of articles to put on one page **/
	private static final int MAX_ARTICLES = 9000;
	
	/** this one is for the top of all bot-created pages **/
	private static final String BOT_WARN =
				"<div class=\"notice\" " +
	            "style=\"background:#ffe1a7; border:1px solid #AAA; " +
	            "padding:0.2em; margin:0.5em auto;\"> " +
	            "[[Image:Stop_hand.svg|left|20px]] This page is automatically " +
	            "recreated from time to time. Accordingly, any changes you " +
	            "make here will be overwitten. See below for details.</div>\n\n";
	/** this text is used to start the first page, if we're splitting (use SPLIT_INTRO for main page,
	 * SPLIT_INTRO_NEXT for next pages)
	 **/
	private static final String SPLIT_INTRO1 =
				"There are too many articles (more than " + MAX_ARTICLES + ") in this project " +
	            "to list them all on one page. This page and the ones linked ";
	private static final String SPLIT_INTRO2 = "contain ";
	private static final String SPLIT_INTRO = SPLIT_INTRO1 + "below " + SPLIT_INTRO2;
	private static final String SPLIT_INTRO_NEXT = SPLIT_INTRO1 + "from the main page " + SPLIT_INTRO2;
	/** this text starts the first page, if we're not splitting **/
	private static final String ONE_PAGE_INTRO = "This page contains ";
	/** this text is the rest of the intro, in either case (use END_INTRO1 + tagText + END_INTRO2
	 * + template + END_INTRO3 + pageName + END_INTRO4 + pageName + END_INTRO5)
	 **/
	private static final String END_INTRO1 =
				"links to all articles, categories, images, portal pages " +
	            "templates, and project pages ";
	private static final String END_INTRO2 = "with {{tl|";
	private static final String END_INTRO3 = "}} on their talk page. It was " +
	            "generated by [[User:WatchlistBot|" +
	            "WatchlistBot]]. Its purpose is to be able to track " +
	            "the project history using ''[[Special:Recentchangeslinked/" +
	            "Wikipedia:WikiProject ";
	private static final String END_INTRO4 =
				"|related changes]]'' or ''[http://tools.wikimedia.de/~interiot/" +
	            "cgi-bin/offtoolserver/RC_firstonly?url=http%3A%2F%2Fen.wikipedia.org" +
	            "%2Fw%2Findex.php%3Ftitle%3DSpecial%3ARecentchangeslinked%26target" +
	            "%3DWikipedia:WikiProject_";
	private static final String END_INTRO5 =
				"%26hideminor%3D0%26days%3D7%26limit%3D500 related watchlist]'' which " +
	            "only shows the last change for each article.\n\n";
	
	/** the text to be put on the main page **/
	private StringBuilder mainText;
	/** the text to be put on a sub page **/
	private StringBuilder subText;
	/** the number of articles on the main page **/
	private int count = 0;
	/** are we still putting articles on the main page **/
	private boolean onMainPage = true;
	/** special text to use if we're not using tagged pages **/
	private String tagText = "";
	/** the page number for the current subpage **/
	private int pageNo = 1;
	/** the output page name, for putting in messages **/
	private String outputName;

	Watchlist (String projectName, String articlePage, String template,
			   WikiSessionManager sessionMgr, String[] includePages,
			   Project project) {
		this.projectName = projectName;
		this.articlePage = articlePage;
		this.template = template;
		this.sessionMgr = sessionMgr;
		this.includePages = includePages;
		this.project = project;
	}
	
	/** update the watchlist
	 * @param useTaggedPages are we inluding tagged pages (true), or all pages in
	 *        tagged categories (false)
	 **/
	void update (boolean useTaggedPages) throws UnsupportedEncodingException,
			IOException, MalformedURLException {
		// reinitialize lists
		initLists();
		// first find the pages which are linked
		Page page = new Page(sessionMgr, "Template:" + template);
		TreeSet<String> refs = page.getTransclusions();
		if (!useTaggedPages) {
			// the list of pages in tagged categories
			TreeSet<String> pages = new TreeSet<String>();
			for (String ref : refs) {
				if (ref.startsWith("Category talk:")) {
					System.out.println("getting pages in " + ref + " pages: " + pages.size());
					Page cat = new Page(sessionMgr, ref.replace(" talk", ""));
					pages.addAll(cat.getMembers());
				}
			}
			// move the pages list into refs (so 
			refs = pages;
		}
		for (String ref : refs) {
			processPageName(ref);
		}
	}

	void initLists () {
		articles = new TreeSet<String>();
		articlesTalk = new TreeSet<String>();
		wikis = new TreeSet<String>();
		wikisTalk = new TreeSet<String>();
		templates = new TreeSet<String>();
		templatesTalk = new TreeSet<String>();
		categories = new TreeSet<String>();
		categoriesTalk = new TreeSet<String>();
		images = new TreeSet<String>();
		imagesTalk = new TreeSet<String>();
		portals = new TreeSet<String>();
		portalsTalk = new TreeSet<String>();
		for (String page : includePages) {
			processPageName(page);
		}
	}

	
	/** process a page name -- that is, add the article and its talk
	 *  page to the appropriate lists
	 **/
	private void processPageName (String pageName) {
		String[] result = pageName.split(":");
		if (result.length == 1) {
			articles.add(result[0]);
			articlesTalk.add("Talk:" + result[0]);
		} else if (result[0].equals("Talk")) {
			articles.add(result[1]);
			articlesTalk.add("Talk:" + result[1]);
		} else if (result[0].startsWith("Wikipedia")) {
			wikis.add("Wikipedia:" + result[1]);
			wikisTalk.add("Wikipedia talk:" + result[1]);
		} else if (result[0].startsWith("Template")) {
			templates.add("Template:" + result[1]);
			templatesTalk.add("Template talk:" + result[1]);
		} else if (result[0].startsWith("Category")) {
			categories.add(":Category:" + result[1]);
			categoriesTalk.add("Category talk:" + result[1]);
		} else if (result[0].startsWith("Image")) {
			images.add(":Image:" + result[1]);
			imagesTalk.add("Image talk:" + result[1]);
		} else if (result[0].startsWith("Portal")) {
			portals.add("Portal:" + result[1]);
			portalsTalk.add("Portal talk:" + result[1]);
		}
	}
	
	/** prepare the output and write to wikipedia **/
	void write () {
		// if we're not using tagged pages, we need to update the output a bit
		if (!taggedPages) {
        	tagText = "in categories ";
        }
		// the page name of the output
        outputName = projectName.replace(" ", "_") + "/" +
        	articlePage.replace(" ", "_");

		mainText = new StringBuilder(BOT_WARN);
		
		// count the number of articles
		int numArticles = articles.size() + wikis.size() + templates.size() +
			categories.size() + images.size() + portals.size();
		
		// figure out if we can fit everything on one page (double the
		// number of articles to count talk pages)
		boolean splitting = (numArticles*2 > MAX_ARTICLES);
		if (splitting) {
			mainText.append(SPLIT_INTRO);
		} else {
			mainText.append(ONE_PAGE_INTRO);
		}
		mainText.append(END_INTRO1 + tagText + END_INTRO2 + template + END_INTRO3 +
			outputName + END_INTRO4 + outputName + END_INTRO5);
		
		mainText.append("==Regular content (count: " + numArticles + ")==\n");
		
		mainText.append("===Articles (count: " + articles.size() + ")===\n");
		char prevChar = 'Z';
		char firstChar = prevChar; // initialize to something late in the alphabet
		
		// the text for this subpage (if we're not splitting, this will be put
		// onto the main page)
		subText = new StringBuilder();
		
		for (String s : articles) {
			if (s.charAt(0) != prevChar) {
				subText.append("====" + s.charAt(0) + "====\n");
				prevChar = s.charAt(0);
				// if this is the first article
				if (count == 0) {
					firstChar = prevChar;
				}
			}
			// put the article name
			subText.append("*[[" + s + "]]\n");
			count++;
			// if we've put all teh articles we can on this page
			if (count > MAX_ARTICLES) {
				count = 0;
				if (onMainPage) {
					onMainPage = false;
					mainText.append(subText);
				} else {
					mainText.append("====[[/Page" + pageNo + "|" +
							firstChar + "-" + prevChar + "]]====\n");
					int index = subText.indexOf("<range>");
					subText.replace(index, index+7, firstChar + "-" + prevChar);
					project.writePage(articlePage + "/Page" + pageNo, subText.toString());
					pageNo++;
				}
				firstChar = prevChar;
				subText = new StringBuilder("===Articles <range>===\n" +
						  "====" + firstChar + "====\n");
			}
		}
		// if we have too many articles, and we've already started the second
		// (or more) page
		if (splitting && !onMainPage) {
			mainText.append("====[[/Page" + pageNo + "|" +
					firstChar + "-" + prevChar + "]]====\n");
			int index = subText.indexOf("<range>");
			subText.replace(index, index+7, firstChar + "-" + prevChar);
			project.writePage(articlePage + "/Page" + pageNo, subText.toString());
			pageNo++;
		} else { // we only have one page or this is the first batch
			mainText.append(subText);
		}
		
		prepareArticleList("Wikipedia", wikis, true);
		prepareArticleList("Templates", templates, true);
		prepareArticleList("Portals", portals, true);
		prepareArticleList("Categories", categories, true);
		prepareArticleList("Images", images, true);
		
		mainText.append("==Talk pages==\n");
		
		mainText.append("===Articles===\n");
		prevChar = firstChar = 'Z';
		if (splitting && subText.length() != 0) {
			project.writePage(articlePage + "/Page" + pageNo, subText.toString());
			pageNo++;
			subText = new StringBuilder(BOT_WARN + SPLIT_INTRO_NEXT +
					 					END_INTRO1 + tagText + END_INTRO2 +
					 					template + END_INTRO3 + outputName +
					 					END_INTRO4 + outputName + END_INTRO5);
			subText.append("===Articles <range>==\n");
		} else {
			subText = new StringBuilder();
		}
		count = 0;
		char endChar = 'Z';
		for (String s : articlesTalk) {
			if (count == 0) {
				firstChar = s.charAt(5);
			}
			subText.append("*[[" + s + "]]\n");
			count++;
			if (count > MAX_ARTICLES) {
				count = 0;
				endChar = s.charAt(5);
				mainText.append("*[[/Page" + pageNo + "|" +
							    firstChar + "-" + endChar + "]]\n");
				int index = subText.indexOf("<range>");
				subText.replace(index, index+7, firstChar + "-" + endChar);
				project.writePage(articlePage + "/Page" + pageNo, subText.toString());
				pageNo++;
				firstChar = endChar;
                subText = new StringBuilder("===Articles <range>===\n");
			}
			endChar = s.charAt(5);
		}
		if (splitting) {
			mainText.append("*[[/Page" + pageNo + "|" +
            				firstChar + "-" + endChar + "]]\n");
            int index = subText.indexOf("<range>");
            if (index != -1) {
            	subText = subText.replace(index, index+7, firstChar + "-" + endChar);
            }
            project.writePage(articlePage + "/Page" + pageNo, subText.toString());
            pageNo++;
		} else {
			mainText.append(subText);
		}
		
		prepareArticleList("Wikipedia", wikisTalk, false);
		prepareArticleList("Templates", templatesTalk, false);
		prepareArticleList("Portals", portalsTalk, false);
		prepareArticleList("Categories", categoriesTalk, false);
		prepareArticleList("Images", imagesTalk, false);		
		
		project.writePage(articlePage, mainText.toString());
	}
	
	private void prepareArticleList (String title, TreeSet<String> pages,
									 boolean includeCount) {
		String countText = "";
		if (includeCount) {
			countText = " (count: " + pages.size() + ")";
		}
		mainText.append("===" + title + countText + "===\n");
		// if we need to put these articles on the next page (becaue we've
		// already started the second page, or we can't fit all these pages
		// on the main page
		boolean pagesOnNext = !onMainPage || count + pages.size() > MAX_ARTICLES;
		if (pagesOnNext) {
			subText = new StringBuilder(BOT_WARN + SPLIT_INTRO_NEXT +
					END_INTRO1 + tagText + END_INTRO2 + template + END_INTRO3 +
					outputName + "/Page" + pageNo + END_INTRO4 + outputName + "/" + pageNo +
					END_INTRO5 +
					"===" + title + "===\n");
			mainText.append("*[[/Page" + pageNo + "#" + title + "|" + title +"]]\n");
		} else {
			subText = new StringBuilder();
			count += pages.size();
		}
		for (String s : pages) {
			subText.append("*[[" + s + "]]\n");
		}
		// if these pages are going on the main page, put them there
		if (!pagesOnNext) {
			mainText.append(subText);
			subText = new StringBuilder();
		} else {
			onMainPage = false;
		}
	}
}

Page.java

import java.io.*;
import java.net.*;
import java.util.*;
import java.util.regex.*;
import org.apache.commons.lang.StringEscapeUtils;

public class Page {
	/** the title of the page (with namespace) **/
	private String title;
	/** the title of the page (without namespace) **/
	private String titleWithoutNamespace = null;
	
	/** the index.php URL (as a String) **/
	private final String strIndexURL = "http://en.wikipedia.org/w/index.php";

	/** the api.php URL (as a String) **/
	private final String strAPIURL = "http://en.wikipedia.org/w/api.php";

	/** the session manager (manages logging in, cookies, etc) **/
	private WikiSessionManager sessionMgr;

	/** how long to sleep if maxlag is > 5 -- start with 5 sec **/
	private static int sleepTime = 5000;
	/** the maximum time to sleep (after this much time, we quit **/
	private final static int MAX_SLEEP_TIME = 160000;
	/** the last write time, so we can keep the bot slow **/
	private static long lastWriteTime = -1;
	/** the minimum delay between writes **/
	private final static int MIN_WRITE_DELAY = 10000;

	/** the list of articles that we're building, for example,
	 *  in @see getTransclusions()
	 **/
	private TreeSet<String> articles;
	
	/** create the Page object and store its title (with namespace)
	 * @param title the title of the page (with namespace)
	 * @param sessionMgr the session manager (controls loggin in and other interaction
	 *        with wikipedia
	 * @throws UnsupportedEncodingException if there's a problem with the URL
	 */
	Page (WikiSessionManager sessionMgr, String title)
			throws UnsupportedEncodingException {
		this.sessionMgr = sessionMgr;
		this.title = title;
		this.titleWithoutNamespace = URLEncoder.encode(titleWithoutNamespace(), "UTF-8");
		this.title = URLEncoder.encode(title, "UTF-8");
	}

	/** get the title of this page without namespace
	 **/
	String titleWithoutNamespace () {
		// if we've already gotten it once, don't do it again (because of encoding)
		if (titleWithoutNamespace != null) return titleWithoutNamespace;
		// we haven't called this yet -- means we're in the constructor
		String[] split = title.split(":");
		if (split.length == 1) return split[0];
		return split[1];
	}
	
	/** get the contents of the page
	 * @return the page contents
	 * @throws IOException if something goes wrong (like the page doesn't exist)
	 **/
	public String get () throws IOException {
		// get the URL & connection
		return urlRequest(strIndexURL + "?title=" + title + "&action=raw");
	}
	
	/** write the specified text to the page
	 * @param text the text to put on the page
	 * @param summary the edit summary
	 * @param minor is this a minor edit
	 * @throws MalformedURLException if there's a problem with the page URL
	 * @throws IOException if there's a problem with one of the readers or writers
	 **/
	void put (String text, String summary, boolean minor) {
		try {
			URLConnection connection = null;
			URL url = null;

			// get the URL and connection
			url = new URL(strIndexURL + "?title=" + title + "&action=edit&maxlag=5");
			connection = url.openConnection();
			sessionMgr.addCookies(connection);
			connection.connect();
	
			// process the existing page text to find:
			// wpStarttime, wpEdittime, and wpEditToken. They're in lines of the
			// form given in the pattern
			Pattern pattern = Pattern.compile("<input type='hidden' value=\"(.*?)\" name=\"(.*?)\" />");
			Matcher matcher;
	
			String startTime = "", editTime = "", editToken = "";
			BufferedReader reader = null;
			boolean stillTrying = true;
			while (stillTrying) {
				try {
					reader = new BufferedReader(
							new InputStreamReader(connection.getInputStream()));
					stillTrying = false;
					sleepTime = 5000;
				} catch (IOException e) {
					// there must be a better way to do this!
					if (e.toString().contains("503")) {
						System.out.println("Max lag -- sleeping for " + sleepTime/1000 + " seconds");
						Thread.sleep(sleepTime);
						sleepTime *= 2;
						if (sleepTime > MAX_SLEEP_TIME) {
							System.out.println("Giving up");
							System.exit(-1);
						}
					}
				}
			}
			String line = reader.readLine();
			while (line != null) {
				if (line.indexOf("<input type='hidden'") != -1) {
					matcher = pattern.matcher(line);
					matcher.find();
					String name = matcher.group(2);
					String value = matcher.group(1);
					if (name.equals("wpStarttime")) {
						startTime = value;
					} else if (name.equals("wpEdittime")) {
						editTime = value;
					} else if (name.equals("wpEditToken")) {
						editToken = value;
						break; // we don't need anything else
					}
				}
				line = reader.readLine();
			}
			reader.close();
	
			// send the data
			url = new URL(strIndexURL + "?title=" + title + "&action=submit");
			connection = url.openConnection();
			
			connection.setDoInput(true);
			connection.setDoOutput(true);
			connection.setUseCaches(false);
			connection.setRequestProperty("Content-Type",
					"application/x-www-form-urlencoded");
			sessionMgr.addCookies(connection);
	
			// write the data to the output stream
			long writeDelay = System.currentTimeMillis() - lastWriteTime;
			if (lastWriteTime != -1 && writeDelay < MIN_WRITE_DELAY) {
				System.out.println("Waiting " + (MIN_WRITE_DELAY-writeDelay)/1000 + " seconds");
				Thread.sleep(MIN_WRITE_DELAY-writeDelay);
			}
			System.out.println("Writing " + titleWithoutNamespace);
			OutputStreamWriter output = new OutputStreamWriter(connection
					.getOutputStream(), "UTF-8");
			output.write("wpStarttime=" + startTime);
			output.write("&wpEdittime=" + editTime);
			output.write("&wpEditToken=" + URLEncoder.encode(editToken, "UTF-8"));
			output.write("&wpTextbox1=" + URLEncoder.encode(text, "UTF-8"));
			output.write("&wpSummary=" + URLEncoder.encode(summary, "UTF-8"));
			if (minor) {
				output.write("&wpMinorEdit=1");
			}
			output.flush();
			output.close();
			lastWriteTime = System.currentTimeMillis();
	
			// I don't understand why this is necessary
			BufferedReader input = new BufferedReader(new InputStreamReader(
					connection.getInputStream()));
			line = input.readLine();
			/* could be used to check for errors
			while (line != null) {
				line = input.readLine();
			} */
		} catch (Exception e) {
			System.out.println(e);
		}
	}

	/** get the transclusions for this page
	 * @return the list of all articles which transclude this page
	 * @state articles is used to build the list, but it is initialized
	 *        and then returned
	 * @throws MalformedURLException
	 * @throws IOException
	 */
	TreeSet<String> getTransclusions () 
			throws MalformedURLException, IOException {
		// the article list
		articles = new TreeSet<String>();
		// the parameters to use in the URL
		final String urlParams = "action=query&list=embeddedin&eilimit=5000&format=xml";

		String result = urlRequest(strAPIURL + "?titles=" + title + "&" + urlParams);
		
		int index = 0;
		while (index != -1) {
			processResult(result, "ei");
			index = result.indexOf("eicontinue");
			if (index != -1) {
				// find the next " after eicontinue=" (12 chars long)
				int endIndex = result.indexOf("\"", index+12);
				String continueText = result.substring(index+12, endIndex);
				result = urlRequest(strAPIURL + "?" + urlParams + "&eicontinue=" +
									URLEncoder.encode(continueText, "UTF-8"));
			}
		}

		return articles;
	}

	/** get the articles in the category
	 * @return the list of all articles in this category
	 * @state articles is used to build the list, but it is initialized
	 *        and then returned
	 * @throws MalformedURLException
	 * @throws IOException
	 */
	TreeSet<String> getMembers () 
			throws MalformedURLException, IOException {
		// the article list
		articles = new TreeSet<String>();
		// the parameters to use in the URL
		final String urlParams = "?cmcategory=" + titleWithoutNamespace +
								 "&action=query&list=categorymembers&cmlimit=5000&format=xml";
		String result = urlRequest(strAPIURL + urlParams);

		int index = 0;
		while (index != -1) {
			processResult(result, "cm");
			index = result.indexOf("cmcontinue");
			if (index != -1) {
				// find the next " after cmcontinue=" (12 chars long)
				int endIndex = result.indexOf("\"", index+12);
				String continueText = result.substring(index+12, endIndex);
				result = urlRequest(strAPIURL + urlParams + "&cmcontinue=" +
									URLEncoder.encode(continueText, "UTF-8"));
			}
		}

		return articles;
	}

	/** process the result -- this is a list of articles in XML format
	 * @param result the raw text
	 * @param id the id to use in the pattern (e.g., "ei" for embedded in, "cm" for
	 *        for category members, etc.)
	 * @state articles new article titles are added to articles
	 */
	private void processResult (String result, String id) {
		Pattern pattern =
			Pattern.compile("<" + id + " pageid=\"(.*?)\" ns=\"(.*?)\" title=\"(.*?)\" />");
		Matcher matcher = pattern.matcher(result);
		
		while (matcher.find()) {
			String article = matcher.group(3);
			article = StringEscapeUtils.unescapeXml(article);
			articles.add(article);
		}
	}
	
	/** open a URL and read the page
	 * @param http the full URL "http://whatever"
	 * @return the text of the page
	 * @throws MalformedURLException
	 * @throws IOException
	 */
	private String urlRequest (String http) throws MalformedURLException,
			IOException {
		// get the URL & connection
		URL url = new URL(http);
		URLConnection connection = url.openConnection();
		sessionMgr.addCookies(connection);

		// convert the connection stream into a String
		StringBuilder sbResult = new StringBuilder();
		BufferedReader reader = new BufferedReader(new InputStreamReader(
				connection.getInputStream(), "UTF-8"));
		String line = reader.readLine();
		while (line != null) {
			sbResult.append(line + "\n");
			line = reader.readLine();
		}
		reader.close();

		return sbResult.toString();
	}
}