User:DavidBrooks/UndoRelinksModule
Appearance
This module should "unlink" any redlinks (wikilinks whose target does not exist) in the newly loaded article. It will only change pages in Article space, and will not change any that link that points to another namespace. Let me know (via this talk page) if you have any problems or other suggestions.
To use:
- Before loading the first article, Tools Menu, click "Make Module"
- Paste the code below into the edit window
- Make sure "Enabled" is checked
- Click "Make Module". There should be a green-backgound "Module compiled and loaded" message
- Click "Close"
If you Save Settings, you won't need to repeat the above steps.
public string ProcessArticle(string ArticleText, string ArticleTitle, int wikiNamespace, out string Summary, out bool Skip)
{
Summary = String.Empty;
Skip = false;
if (wikiNamespace != 0)
return ArticleText; // Only edit Mainspace articles
// There are more sophisticated ways of doing all this, but I went for more clarity.
// That said, chaining Linq functions ftw...
string[] allWikiLinks = WikiFunctions.Parse.Parsers.GetAllWikiLinks(ArticleText).
Where(l => !l.Contains(":")).
Select(l => Regex.Replace(l, @".*\[\[(.*?)[#|\]].*", "$1")). // links are still wrapped in [[...]]
Select(l => l.Length == 0 ? l : Char.ToUpper(l[0]) + l.Substring(1)).
Distinct().
ToArray();
if (allWikiLinks.Length == 0) // How likely is this?
return ArticleText;
string newtext = ArticleText;
// The API has a limit of 50 titles, but to avoid overlong URLs it does no harm
// to chop the list up. It could also be useful to limit the number of characters, maybe.
// Also, use XML because JSON escapes non-ANSI characters and it gets messy
string apiAction = "https://" + awb.LangCode + "." + awb.Project +
".org/w/api.php?action=query&prop=pageprops&ppprop=displaytitle&format=xml&titles=";
const int batchSize = 25;
StringBuilder titleList = new StringBuilder(apiAction);
for (int i = 0; i < allWikiLinks.Length; i++) {
titleList.Append(allWikiLinks[i]);
string xmlResult;
if (((i + 1) % batchSize) == 0 || i == allWikiLinks.Length - 1) {
try {
System.Net.HttpWebRequest req =
(System.Net.HttpWebRequest)System.Net.WebRequest.Create(titleList.ToString());
req.UserAgent = "AWB redlink remover";
using (System.Net.WebResponse resp = req.GetResponse()) {
using (System.IO.StreamReader sr = new System.IO.StreamReader(resp.GetResponseStream())) {
xmlResult = sr.ReadToEnd();
}
}
}
catch {
// Choices: return the fixes so far, return the original list, and/or pop up a warning
return newtext;
}
// Parsing the XML would look smarter but this is just as effective
MatchCollection missingMatches = Regex.Matches(xmlResult, "title=\"([^\"]+)\" *missing=");
foreach (Match missingMatch in missingMatches) {
string redTitle = missingMatch.Groups[1].Value;
// This title is normalized, so matches need a case-independent first letter.
// Care: there may be other weird variations
// NB Regex.Escape puts a \ before a space.
string matchPattern = @"\[\[ *((?i:" + redTitle[0] + ")" +
Regex.Escape(redTitle.Substring(1)).Replace(@"\ ", "[_ ]") +
@") *(?:\|(.+? *))?]]";
newtext = Regex.Replace(newtext, matchPattern, Replacer);
Summary = "Redlink(s) removed";
}
titleList.Length = apiAction.Length; // truncate
} else {
titleList.Append('|');
}
}
return newtext;
}
private string Replacer(Match linkMatch)
{
Group pipeGroup = linkMatch.Groups[2];
return pipeGroup.Success ? pipeGroup.Value : linkMatch.Groups[1].Value;
}