User:WatchlistBot/source.java
Appearance
WatchlistBot.java
class WatchlistBot {
public static void main (String[] args) throws Exception {
WikiSessionManager sessionMgr = new WikiSessionManager();
sessionMgr.userLogin(Private.username, Private.password);
// numismatics
String[] includePages = {"Template:Currencies of Africa",
"Template:Currencies of Asia",
"Template:Currencies of Europe",
"Template:Currencies of Oceania",
"Template:Currencies of the Americas"};
Project project = new Project(sessionMgr, "Numismatics", "Numismaticnotice",
"Articles", includePages);
project.updateWatchlist(true);
// exonumia
includePages = new String[0];
project = new Project(sessionMgr, "Numismatics", "Exonumianotice", "Exonumia articles", includePages);
project.updateWatchlist(true);
// Hawaii
project = new Project(sessionMgr, "Hawaii", "WPHawaii", "Hawaii recent changes", includePages);
project.updateWatchlist(true);
// Texas
project = new Project(sessionMgr, "Texas", "WikiProject Texas", "Articles", includePages);
project.updateWatchlist(true);
// Ice Hockey
project = new Project(sessionMgr, "Ice Hockey", "Ice hockey", "Articles", includePages);
project.updateWatchlist(true);
// Louisville
project = new Project(sessionMgr, "Louisville", "WikiProject Louisville", "Watchall", includePages);
project.updateWatchlist(true);
// Kentucky
project = new Project(sessionMgr, "Kentucky", "WikiProject Kentucky", "Watchall", includePages);
project.updateWatchlist(true);
// Texas State Highways
project = new Project(sessionMgr, "Texas State Highways", "Texas State Highway WikiProject",
"Watchlist", includePages);
project.updateWatchlist(true);
// Dallas
project = new Project(sessionMgr, "Dallas", "WikiProject Dallas", "Articles", includePages);
project.updateWatchlist(true);
// Comics
project = new Project(sessionMgr, "Comics", "comicsproj", "Articles", includePages);
project.updateWatchlist(true);
// Pittsburgh
project = new Project(sessionMgr, "Pittsburgh", "PittsburghWikiProject", "Articles", includePages);
project.updateWatchlist(true);
// Baseball
project = new Project(sessionMgr, "Baseball", "Baseball-WikiProject", "Articles", includePages);
project.updateWatchlist(true);
// Bell Systems
project = new Project(sessionMgr, "Bell Systems", "WikiProject Bell System", "Articles", includePages);
project.updateWatchlist(true);
// LGBT studies
project = new Project(sessionMgr, "LGBT studies", "LGBTProject", "Articles", includePages);
project.updateWatchlist(true);
// San Francisco Bay Area
project = new Project(sessionMgr, "San Francisco Bay Area", "SFBAProject", "Watchlist", includePages);
project.updateWatchlist(true);
// Africa
project = new Project(sessionMgr, "Africa", "AfricaProject", "Watchlist", includePages);
project.updateWatchlist(true);
// Electronics
project = new Project(sessionMgr, "Electronics", "Electron", "Articles", includePages);
project.updateWatchlist(true);
// Tennessee
project = new Project(sessionMgr, "Tennessee", "WikiProject Tennessee", "Articles", includePages);
project.updateWatchlist(true);
// Hong Kong
project = new Project(sessionMgr, "Hong Kong", "WikiProject Hong Kong", "Articles", includePages);
project.updateWatchlist(true);
// Films
project = new Project(sessionMgr, "Films", "Film", "Articles", includePages);
project.updateWatchlist(true);
// Automobiles
project = new Project(sessionMgr, "Automobiles", "AutomobileWatch", "Articles", includePages);
project.updateWatchlist(false);
// Cricket
project = new Project(sessionMgr, "Cricket", "CricketWatch", "Articles", includePages);
project.updateWatchlist(false);
System.out.println("finished");
sessionMgr.userLogout();
}
}
WikiSessionManager.java
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.util.Arrays;
import java.net.URL;
import java.net.URLEncoder;
import java.net.URLConnection;
/**
* WikiSessionManager is a utility class that logs into the English
* Wikipedia and facilitates making HTTP requests with cookies.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
* @author Gracenotes
* @version 0.1
**/
public class WikiSessionManager
{
private String cookie, sessionData, username;
private boolean loggedIn;
public WikiSessionManager()
{
this.loggedIn = false;
this.sessionData = "";
this.cookie = "";
}
public void userLogin(String username, char[] password) throws IOException
{
username = username.trim();
if (username.length() == 0 || password.length == 0) throw new IllegalArgumentException("Blank parameter");
URL url = new URL("http://en.wikipedia.org/w/api.php");
URLConnection connection = url.openConnection();
connection.setDoOutput(true);
connection.setUseCaches(false);
connection.setRequestProperty("Content-Type", "application/x-www-form-urlencoded");
connection.connect();
OutputStreamWriter output = new OutputStreamWriter(connection.getOutputStream(), "UTF-8");
output.write("action=login" +
"&lgname=" + URLEncoder.encode(username, "UTF-8") +
"&lgpassword=" + URLEncoder.encode(new String(password).trim(), "UTF-8"));
output.flush();
output.close();
Arrays.fill(password, ' ');
String headerName;
StringBuffer receivedCookie = new StringBuffer();
int i = 0;
while ((headerName = connection.getHeaderFieldKey(++i)) != null)
{
headerName = connection.getHeaderFieldKey(i);
if (headerName != null && headerName.equalsIgnoreCase("Set-Cookie"))
{
receivedCookie.append("; " + connection.getHeaderField(i).split(";")[0]);
}
}
receivedCookie.delete(0, 2);
this.cookie = receivedCookie.toString();
this.loggedIn = this.cookie.indexOf("Token=") != -1;
this.username = this.loggedIn ? username : null;
// IB edit (get the session data)
url = new URL("http://en.wikipedia.org/w/index.php?title=Wikipedia:Sandbox&action=edit");
connection = url.openConnection();
addCookies(connection);
connection.connect();
if (!findSessionData(connection)) {
throw new IOException("Could not load session data");
}
// end IB edit
}
public void userLogout() throws IOException
{
if (!this.loggedIn)
return;
URL url = new URL("http://en.wikipedia.org/w/index.php?title=Special:Userlogout");
URLConnection connection = url.openConnection();
this.addCookies(connection);
connection.connect();
this.loggedIn = false;
this.cookie = "";
this.sessionData = "";
}
/**
* Indicates whether a user is logged in or not
*
* @return A boolean showing whether a user is logged in or not
*/
public boolean isLoggedIn()
{
return this.loggedIn;
}
public void addCookies(URLConnection connection)
{
if (!this.loggedIn)
return;
connection.setRequestProperty("Cookie", this.cookie +
(this.sessionData != null ? "; " + this.sessionData : ""));
connection.setRequestProperty("User-Agent", this.username);
}
public boolean findSessionData(URLConnection connection)
{
sessionData = "";
String headerName;
int i = 0;
while ((headerName = connection.getHeaderFieldKey(++i)) != null)
{
if (headerName.equals("Set-Cookie") && connection.getHeaderField(i).indexOf("_session") != -1)
this.sessionData = connection.getHeaderField(i).split(";")[0];
}
return this.sessionData.length() != 0;
}
}
Project.java
import java.io.*;
import java.net.*;
public class Project {
/** are we debugging (sends output to file instead of wikipedia) **/
final static boolean DBG = false;
/** the watchlist **/
private Watchlist watchlist;
/** the name of the project (without Wikipedia:WikiProject) **/
private String projectName;
/** the session manager (controls logging in, communication w/ wikipedia) **/
private WikiSessionManager sessionMgr;
Project (WikiSessionManager sessionMgr, String projectName, String template,
String articlePage, String[] includePages) {
this.sessionMgr = sessionMgr;
this.projectName = projectName;
this.watchlist = new Watchlist(projectName, articlePage, template,
sessionMgr, includePages, this);
}
/** update the watchlist
* @param useTaggedPages are we inluding tagged pages (true), or all pages in
* tagged categories (false)
**/
void updateWatchlist (boolean useTaggedPages) throws UnsupportedEncodingException,
IOException, MalformedURLException {
watchlist.update(useTaggedPages);
watchlist.write();
}
/** write a page in the project
* @param subPageName the name of the subpage
* @param text the text to write
*/
void writePage (String subPageName, String text) {
try {
if (DBG) {
subPageName = subPageName.replaceAll("/", "_");
FileWriter file = new FileWriter(subPageName + ".txt");
file.write(text);
file.close();
} else {
String pageName = "Wikipedia:WikiProject " + projectName + "/" + subPageName;
String comment = "full update by [[User:WatchlistBot|WatchlistBot]]";
Page page = new Page(sessionMgr, pageName);
page.put(text, comment, false);
}
} catch (Exception e) {
System.out.println(e);
}
}
}
Watchlist.java
import java.util.*;
import java.io.*;
import java.net.*;
public class Watchlist {
/** the project **/
private Project project;
/** the template name (without namespace) **/
private String template;
/** the session manager **/
private WikiSessionManager sessionMgr;
/** does this watchlist use tagged pages (as opposed to pages in a category list **/
private boolean taggedPages = true;
/** pages which should be included in the project even though they're not tagged
* (maybe because they share a talk page)
**/
private String[] includePages;
/** the name of the project (without Wikipedia:WikiProject) **/
private String projectName;
/** the name of the page where the article list goes **/
private String articlePage;
/** the article pages **/
private TreeSet<String> articles;
/** the article talk pages **/
private TreeSet<String> articlesTalk;
/** the wikipedia pages **/
private TreeSet<String> wikis;
/** the wikipedia talk pages **/
private TreeSet<String> wikisTalk;
/** the template pages **/
private TreeSet<String> templates;
/** the template talk pages **/
private TreeSet<String> templatesTalk;
/** the category pages **/
private TreeSet<String> categories;
/** the category talk pages **/
private TreeSet<String> categoriesTalk;
/** the image pages **/
private TreeSet<String> images;
/** the image talk pages **/
private TreeSet<String> imagesTalk;
/** the portal pages **/
private TreeSet<String> portals;
/** the portal talk pages **/
private TreeSet<String> portalsTalk;
/** the maximum number of articles to put on one page **/
private static final int MAX_ARTICLES = 9000;
/** this one is for the top of all bot-created pages **/
private static final String BOT_WARN =
"<div class=\"notice\" " +
"style=\"background:#ffe1a7; border:1px solid #AAA; " +
"padding:0.2em; margin:0.5em auto;\"> " +
"[[Image:Stop_hand.svg|left|20px]] This page is automatically " +
"recreated from time to time. Accordingly, any changes you " +
"make here will be overwitten. See below for details.</div>\n\n";
/** this text is used to start the first page, if we're splitting (use SPLIT_INTRO for main page,
* SPLIT_INTRO_NEXT for next pages)
**/
private static final String SPLIT_INTRO1 =
"There are too many articles (more than " + MAX_ARTICLES + ") in this project " +
"to list them all on one page. This page and the ones linked ";
private static final String SPLIT_INTRO2 = "contain ";
private static final String SPLIT_INTRO = SPLIT_INTRO1 + "below " + SPLIT_INTRO2;
private static final String SPLIT_INTRO_NEXT = SPLIT_INTRO1 + "from the main page " + SPLIT_INTRO2;
/** this text starts the first page, if we're not splitting **/
private static final String ONE_PAGE_INTRO = "This page contains ";
/** this text is the rest of the intro, in either case (use END_INTRO1 + tagText + END_INTRO2
* + template + END_INTRO3 + pageName + END_INTRO4 + pageName + END_INTRO5)
**/
private static final String END_INTRO1 =
"links to all articles, categories, images, portal pages " +
"templates, and project pages ";
private static final String END_INTRO2 = "with {{tl|";
private static final String END_INTRO3 = "}} on their talk page. It was " +
"generated by [[User:WatchlistBot|" +
"WatchlistBot]]. Its purpose is to be able to track " +
"the project history using ''[[Special:Recentchangeslinked/" +
"Wikipedia:WikiProject ";
private static final String END_INTRO4 =
"|related changes]]'' or ''[http://tools.wikimedia.de/~interiot/" +
"cgi-bin/offtoolserver/RC_firstonly?url=http%3A%2F%2Fen.wikipedia.org" +
"%2Fw%2Findex.php%3Ftitle%3DSpecial%3ARecentchangeslinked%26target" +
"%3DWikipedia:WikiProject_";
private static final String END_INTRO5 =
"%26hideminor%3D0%26days%3D7%26limit%3D500 related watchlist]'' which " +
"only shows the last change for each article.\n\n";
/** the text to be put on the main page **/
private StringBuilder mainText;
/** the text to be put on a sub page **/
private StringBuilder subText;
/** the number of articles on the main page **/
private int count = 0;
/** are we still putting articles on the main page **/
private boolean onMainPage = true;
/** special text to use if we're not using tagged pages **/
private String tagText = "";
/** the page number for the current subpage **/
private int pageNo = 1;
/** the output page name, for putting in messages **/
private String outputName;
Watchlist (String projectName, String articlePage, String template,
WikiSessionManager sessionMgr, String[] includePages,
Project project) {
this.projectName = projectName;
this.articlePage = articlePage;
this.template = template;
this.sessionMgr = sessionMgr;
this.includePages = includePages;
this.project = project;
}
/** update the watchlist
* @param useTaggedPages are we inluding tagged pages (true), or all pages in
* tagged categories (false)
**/
void update (boolean useTaggedPages) throws UnsupportedEncodingException,
IOException, MalformedURLException {
// reinitialize lists
initLists();
// first find the pages which are linked
Page page = new Page(sessionMgr, "Template:" + template);
TreeSet<String> refs = page.getTransclusions();
if (!useTaggedPages) {
// the list of pages in tagged categories
TreeSet<String> pages = new TreeSet<String>();
for (String ref : refs) {
if (ref.startsWith("Category talk:")) {
System.out.println("getting pages in " + ref + " pages: " + pages.size());
Page cat = new Page(sessionMgr, ref.replace(" talk", ""));
pages.addAll(cat.getMembers());
}
}
// move the pages list into refs (so
refs = pages;
}
for (String ref : refs) {
processPageName(ref);
}
}
void initLists () {
articles = new TreeSet<String>();
articlesTalk = new TreeSet<String>();
wikis = new TreeSet<String>();
wikisTalk = new TreeSet<String>();
templates = new TreeSet<String>();
templatesTalk = new TreeSet<String>();
categories = new TreeSet<String>();
categoriesTalk = new TreeSet<String>();
images = new TreeSet<String>();
imagesTalk = new TreeSet<String>();
portals = new TreeSet<String>();
portalsTalk = new TreeSet<String>();
for (String page : includePages) {
processPageName(page);
}
}
/** process a page name -- that is, add the article and its talk
* page to the appropriate lists
**/
private void processPageName (String pageName) {
String[] result = pageName.split(":");
if (result.length == 1) {
articles.add(result[0]);
articlesTalk.add("Talk:" + result[0]);
} else if (result[0].equals("Talk")) {
articles.add(result[1]);
articlesTalk.add("Talk:" + result[1]);
} else if (result[0].startsWith("Wikipedia")) {
wikis.add("Wikipedia:" + result[1]);
wikisTalk.add("Wikipedia talk:" + result[1]);
} else if (result[0].startsWith("Template")) {
templates.add("Template:" + result[1]);
templatesTalk.add("Template talk:" + result[1]);
} else if (result[0].startsWith("Category")) {
categories.add(":Category:" + result[1]);
categoriesTalk.add("Category talk:" + result[1]);
} else if (result[0].startsWith("Image")) {
images.add(":Image:" + result[1]);
imagesTalk.add("Image talk:" + result[1]);
} else if (result[0].startsWith("Portal")) {
portals.add("Portal:" + result[1]);
portalsTalk.add("Portal talk:" + result[1]);
}
}
/** prepare the output and write to wikipedia **/
void write () {
// if we're not using tagged pages, we need to update the output a bit
if (!taggedPages) {
tagText = "in categories ";
}
// the page name of the output
outputName = projectName.replace(" ", "_") + "/" +
articlePage.replace(" ", "_");
mainText = new StringBuilder(BOT_WARN);
// count the number of articles
int numArticles = articles.size() + wikis.size() + templates.size() +
categories.size() + images.size() + portals.size();
// figure out if we can fit everything on one page (double the
// number of articles to count talk pages)
boolean splitting = (numArticles*2 > MAX_ARTICLES);
if (splitting) {
mainText.append(SPLIT_INTRO);
} else {
mainText.append(ONE_PAGE_INTRO);
}
mainText.append(END_INTRO1 + tagText + END_INTRO2 + template + END_INTRO3 +
outputName + END_INTRO4 + outputName + END_INTRO5);
mainText.append("==Regular content (count: " + numArticles + ")==\n");
mainText.append("===Articles (count: " + articles.size() + ")===\n");
char prevChar = 'Z';
char firstChar = prevChar; // initialize to something late in the alphabet
// the text for this subpage (if we're not splitting, this will be put
// onto the main page)
subText = new StringBuilder();
for (String s : articles) {
if (s.charAt(0) != prevChar) {
subText.append("====" + s.charAt(0) + "====\n");
prevChar = s.charAt(0);
// if this is the first article
if (count == 0) {
firstChar = prevChar;
}
}
// put the article name
subText.append("*[[" + s + "]]\n");
count++;
// if we've put all teh articles we can on this page
if (count > MAX_ARTICLES) {
count = 0;
if (onMainPage) {
onMainPage = false;
mainText.append(subText);
} else {
mainText.append("====[[/Page" + pageNo + "|" +
firstChar + "-" + prevChar + "]]====\n");
int index = subText.indexOf("<range>");
subText.replace(index, index+7, firstChar + "-" + prevChar);
project.writePage(articlePage + "/Page" + pageNo, subText.toString());
pageNo++;
}
firstChar = prevChar;
subText = new StringBuilder("===Articles <range>===\n" +
"====" + firstChar + "====\n");
}
}
// if we have too many articles, and we've already started the second
// (or more) page
if (splitting && !onMainPage) {
mainText.append("====[[/Page" + pageNo + "|" +
firstChar + "-" + prevChar + "]]====\n");
int index = subText.indexOf("<range>");
subText.replace(index, index+7, firstChar + "-" + prevChar);
project.writePage(articlePage + "/Page" + pageNo, subText.toString());
pageNo++;
} else { // we only have one page or this is the first batch
mainText.append(subText);
}
prepareArticleList("Wikipedia", wikis, true);
prepareArticleList("Templates", templates, true);
prepareArticleList("Portals", portals, true);
prepareArticleList("Categories", categories, true);
prepareArticleList("Images", images, true);
mainText.append("==Talk pages==\n");
mainText.append("===Articles===\n");
prevChar = firstChar = 'Z';
if (splitting && subText.length() != 0) {
project.writePage(articlePage + "/Page" + pageNo, subText.toString());
pageNo++;
subText = new StringBuilder(BOT_WARN + SPLIT_INTRO_NEXT +
END_INTRO1 + tagText + END_INTRO2 +
template + END_INTRO3 + outputName +
END_INTRO4 + outputName + END_INTRO5);
subText.append("===Articles <range>==\n");
} else {
subText = new StringBuilder();
}
count = 0;
char endChar = 'Z';
for (String s : articlesTalk) {
if (count == 0) {
firstChar = s.charAt(5);
}
subText.append("*[[" + s + "]]\n");
count++;
if (count > MAX_ARTICLES) {
count = 0;
endChar = s.charAt(5);
mainText.append("*[[/Page" + pageNo + "|" +
firstChar + "-" + endChar + "]]\n");
int index = subText.indexOf("<range>");
subText.replace(index, index+7, firstChar + "-" + endChar);
project.writePage(articlePage + "/Page" + pageNo, subText.toString());
pageNo++;
firstChar = endChar;
subText = new StringBuilder("===Articles <range>===\n");
}
endChar = s.charAt(5);
}
if (splitting) {
mainText.append("*[[/Page" + pageNo + "|" +
firstChar + "-" + endChar + "]]\n");
int index = subText.indexOf("<range>");
if (index != -1) {
subText = subText.replace(index, index+7, firstChar + "-" + endChar);
}
project.writePage(articlePage + "/Page" + pageNo, subText.toString());
pageNo++;
} else {
mainText.append(subText);
}
prepareArticleList("Wikipedia", wikisTalk, false);
prepareArticleList("Templates", templatesTalk, false);
prepareArticleList("Portals", portalsTalk, false);
prepareArticleList("Categories", categoriesTalk, false);
prepareArticleList("Images", imagesTalk, false);
project.writePage(articlePage, mainText.toString());
}
private void prepareArticleList (String title, TreeSet<String> pages,
boolean includeCount) {
String countText = "";
if (includeCount) {
countText = " (count: " + pages.size() + ")";
}
mainText.append("===" + title + countText + "===\n");
// if we need to put these articles on the next page (becaue we've
// already started the second page, or we can't fit all these pages
// on the main page
boolean pagesOnNext = !onMainPage || count + pages.size() > MAX_ARTICLES;
if (pagesOnNext) {
subText = new StringBuilder(BOT_WARN + SPLIT_INTRO_NEXT +
END_INTRO1 + tagText + END_INTRO2 + template + END_INTRO3 +
outputName + "/Page" + pageNo + END_INTRO4 + outputName + "/" + pageNo +
END_INTRO5 +
"===" + title + "===\n");
mainText.append("*[[/Page" + pageNo + "#" + title + "|" + title +"]]\n");
} else {
subText = new StringBuilder();
count += pages.size();
}
for (String s : pages) {
subText.append("*[[" + s + "]]\n");
}
// if these pages are going on the main page, put them there
if (!pagesOnNext) {
mainText.append(subText);
subText = new StringBuilder();
} else {
onMainPage = false;
}
}
}
Page.java
import java.io.*;
import java.net.*;
import java.util.*;
import java.util.regex.*;
import org.apache.commons.lang.StringEscapeUtils;
public class Page {
/** the title of the page (with namespace) **/
private String title;
/** the title of the page (without namespace) **/
private String titleWithoutNamespace = null;
/** the index.php URL (as a String) **/
private final String strIndexURL = "http://en.wikipedia.org/w/index.php";
/** the api.php URL (as a String) **/
private final String strAPIURL = "http://en.wikipedia.org/w/api.php";
/** the session manager (manages logging in, cookies, etc) **/
private WikiSessionManager sessionMgr;
/** how long to sleep if maxlag is > 5 -- start with 5 sec **/
private static int sleepTime = 5000;
/** the maximum time to sleep (after this much time, we quit **/
private final static int MAX_SLEEP_TIME = 160000;
/** the last write time, so we can keep the bot slow **/
private static long lastWriteTime = -1;
/** the minimum delay between writes **/
private final static int MIN_WRITE_DELAY = 10000;
/** the list of articles that we're building, for example,
* in @see getTransclusions()
**/
private TreeSet<String> articles;
/** create the Page object and store its title (with namespace)
* @param title the title of the page (with namespace)
* @param sessionMgr the session manager (controls loggin in and other interaction
* with wikipedia
* @throws UnsupportedEncodingException if there's a problem with the URL
*/
Page (WikiSessionManager sessionMgr, String title)
throws UnsupportedEncodingException {
this.sessionMgr = sessionMgr;
this.title = title;
this.titleWithoutNamespace = URLEncoder.encode(titleWithoutNamespace(), "UTF-8");
this.title = URLEncoder.encode(title, "UTF-8");
}
/** get the title of this page without namespace
**/
String titleWithoutNamespace () {
// if we've already gotten it once, don't do it again (because of encoding)
if (titleWithoutNamespace != null) return titleWithoutNamespace;
// we haven't called this yet -- means we're in the constructor
String[] split = title.split(":");
if (split.length == 1) return split[0];
return split[1];
}
/** get the contents of the page
* @return the page contents
* @throws IOException if something goes wrong (like the page doesn't exist)
**/
public String get () throws IOException {
// get the URL & connection
return urlRequest(strIndexURL + "?title=" + title + "&action=raw");
}
/** write the specified text to the page
* @param text the text to put on the page
* @param summary the edit summary
* @param minor is this a minor edit
* @throws MalformedURLException if there's a problem with the page URL
* @throws IOException if there's a problem with one of the readers or writers
**/
void put (String text, String summary, boolean minor) {
try {
URLConnection connection = null;
URL url = null;
// get the URL and connection
url = new URL(strIndexURL + "?title=" + title + "&action=edit&maxlag=5");
connection = url.openConnection();
sessionMgr.addCookies(connection);
connection.connect();
// process the existing page text to find:
// wpStarttime, wpEdittime, and wpEditToken. They're in lines of the
// form given in the pattern
Pattern pattern = Pattern.compile("<input type='hidden' value=\"(.*?)\" name=\"(.*?)\" />");
Matcher matcher;
String startTime = "", editTime = "", editToken = "";
BufferedReader reader = null;
boolean stillTrying = true;
while (stillTrying) {
try {
reader = new BufferedReader(
new InputStreamReader(connection.getInputStream()));
stillTrying = false;
sleepTime = 5000;
} catch (IOException e) {
// there must be a better way to do this!
if (e.toString().contains("503")) {
System.out.println("Max lag -- sleeping for " + sleepTime/1000 + " seconds");
Thread.sleep(sleepTime);
sleepTime *= 2;
if (sleepTime > MAX_SLEEP_TIME) {
System.out.println("Giving up");
System.exit(-1);
}
}
}
}
String line = reader.readLine();
while (line != null) {
if (line.indexOf("<input type='hidden'") != -1) {
matcher = pattern.matcher(line);
matcher.find();
String name = matcher.group(2);
String value = matcher.group(1);
if (name.equals("wpStarttime")) {
startTime = value;
} else if (name.equals("wpEdittime")) {
editTime = value;
} else if (name.equals("wpEditToken")) {
editToken = value;
break; // we don't need anything else
}
}
line = reader.readLine();
}
reader.close();
// send the data
url = new URL(strIndexURL + "?title=" + title + "&action=submit");
connection = url.openConnection();
connection.setDoInput(true);
connection.setDoOutput(true);
connection.setUseCaches(false);
connection.setRequestProperty("Content-Type",
"application/x-www-form-urlencoded");
sessionMgr.addCookies(connection);
// write the data to the output stream
long writeDelay = System.currentTimeMillis() - lastWriteTime;
if (lastWriteTime != -1 && writeDelay < MIN_WRITE_DELAY) {
System.out.println("Waiting " + (MIN_WRITE_DELAY-writeDelay)/1000 + " seconds");
Thread.sleep(MIN_WRITE_DELAY-writeDelay);
}
System.out.println("Writing " + titleWithoutNamespace);
OutputStreamWriter output = new OutputStreamWriter(connection
.getOutputStream(), "UTF-8");
output.write("wpStarttime=" + startTime);
output.write("&wpEdittime=" + editTime);
output.write("&wpEditToken=" + URLEncoder.encode(editToken, "UTF-8"));
output.write("&wpTextbox1=" + URLEncoder.encode(text, "UTF-8"));
output.write("&wpSummary=" + URLEncoder.encode(summary, "UTF-8"));
if (minor) {
output.write("&wpMinorEdit=1");
}
output.flush();
output.close();
lastWriteTime = System.currentTimeMillis();
// I don't understand why this is necessary
BufferedReader input = new BufferedReader(new InputStreamReader(
connection.getInputStream()));
line = input.readLine();
/* could be used to check for errors
while (line != null) {
line = input.readLine();
} */
} catch (Exception e) {
System.out.println(e);
}
}
/** get the transclusions for this page
* @return the list of all articles which transclude this page
* @state articles is used to build the list, but it is initialized
* and then returned
* @throws MalformedURLException
* @throws IOException
*/
TreeSet<String> getTransclusions ()
throws MalformedURLException, IOException {
// the article list
articles = new TreeSet<String>();
// the parameters to use in the URL
final String urlParams = "action=query&list=embeddedin&eilimit=5000&format=xml";
String result = urlRequest(strAPIURL + "?titles=" + title + "&" + urlParams);
int index = 0;
while (index != -1) {
processResult(result, "ei");
index = result.indexOf("eicontinue");
if (index != -1) {
// find the next " after eicontinue=" (12 chars long)
int endIndex = result.indexOf("\"", index+12);
String continueText = result.substring(index+12, endIndex);
result = urlRequest(strAPIURL + "?" + urlParams + "&eicontinue=" +
URLEncoder.encode(continueText, "UTF-8"));
}
}
return articles;
}
/** get the articles in the category
* @return the list of all articles in this category
* @state articles is used to build the list, but it is initialized
* and then returned
* @throws MalformedURLException
* @throws IOException
*/
TreeSet<String> getMembers ()
throws MalformedURLException, IOException {
// the article list
articles = new TreeSet<String>();
// the parameters to use in the URL
final String urlParams = "?cmcategory=" + titleWithoutNamespace +
"&action=query&list=categorymembers&cmlimit=5000&format=xml";
String result = urlRequest(strAPIURL + urlParams);
int index = 0;
while (index != -1) {
processResult(result, "cm");
index = result.indexOf("cmcontinue");
if (index != -1) {
// find the next " after cmcontinue=" (12 chars long)
int endIndex = result.indexOf("\"", index+12);
String continueText = result.substring(index+12, endIndex);
result = urlRequest(strAPIURL + urlParams + "&cmcontinue=" +
URLEncoder.encode(continueText, "UTF-8"));
}
}
return articles;
}
/** process the result -- this is a list of articles in XML format
* @param result the raw text
* @param id the id to use in the pattern (e.g., "ei" for embedded in, "cm" for
* for category members, etc.)
* @state articles new article titles are added to articles
*/
private void processResult (String result, String id) {
Pattern pattern =
Pattern.compile("<" + id + " pageid=\"(.*?)\" ns=\"(.*?)\" title=\"(.*?)\" />");
Matcher matcher = pattern.matcher(result);
while (matcher.find()) {
String article = matcher.group(3);
article = StringEscapeUtils.unescapeXml(article);
articles.add(article);
}
}
/** open a URL and read the page
* @param http the full URL "http://whatever"
* @return the text of the page
* @throws MalformedURLException
* @throws IOException
*/
private String urlRequest (String http) throws MalformedURLException,
IOException {
// get the URL & connection
URL url = new URL(http);
URLConnection connection = url.openConnection();
sessionMgr.addCookies(connection);
// convert the connection stream into a String
StringBuilder sbResult = new StringBuilder();
BufferedReader reader = new BufferedReader(new InputStreamReader(
connection.getInputStream(), "UTF-8"));
String line = reader.readLine();
while (line != null) {
sbResult.append(line + "\n");
line = reader.readLine();
}
reader.close();
return sbResult.toString();
}
}