User:Tom.Reding/Category:Pages using deprecated image syntax
Appearance
Source
[edit]public string ProcessArticle(string ArticleText, string ArticleTitle, int wikiNamespace, out string Summary, out bool Skip)
{
// global switches //////////////////////////////////////////////////////////
Skip = false;
// globally-used vars ///////////////////////////////////////////////////////
string Infobox = ""; // will contain the key of the infobox found on the page (i.e. "football club")
string Infobox_Regex = ""; // will contain regexes for {{infobox}} aliases
string Image_Regex = ""; // will contain regexes for "image" parameter aliases
string ImageSize_Regex = ""; // will contain regexes for "image_size" parameter aliases
string InfoboxText_Old = "";
string InfoboxText_New = "";
string TempOpenBraces = @"$R3PL4C3_0P3N1N6_8R4C35";
string TempCloseBraces = @"$R3PL4C3_CL051N6_8R4C35";
string ImageTypes_Regex = @"(?:jpe?g|gif|bmp|png|svg|tiff?|xcf)";
Summary = "";
// dictionaries /////////////////////////////////////////////////////////////
// {{infobox}} aliases
Dictionary<string, string> dIBAliases = new Dictionary<string, string>(); // parent infobox, regex aliases copy+pasta'd from Rdcheck
dIBAliases.Add(@"football club", @"(?:[Ff]ootball[ _]+club[ _]+infobox|[Ii]ndoor[ _]+Football[ _]+League[ _]+team|[Ii]nfobox[ _]+football[ _]+club|[Ii]nfobox[ _]+Football[ _]+Club|[Ii]nfobox[ _]+Football[ _]+club|[Ii]nfobox[ _]+football[ _]+team|[Mm]LS[ _]+Canada)");
dIBAliases.Add(@"military conflict", @"(?:[Bb]attle|[Ii]nfobox[ _]+battle|[Ii]nfobox[ _]+Military[ _]+Conflict[ _]+\(3[ _]+sided\)|[Ii]nfobox[ _]+military[ _]+conflict[ _]+timeline|[Ii]nfobox[ _]+military[ _]+conflict|[Ii]nfobox[ _]+Military[ _]+Conflict|[Ii]nfobox[ _]+Militärischer[ _]+Konflikt|[Ii]nfobox[ _]+Siege|[Ii]nfobox[ _]+War|[Ii]nfobox[ _]+war|[Mm]ultiWarbox|[Ww]arboxNew|[Ww]arbox|(?:[Ww]ikipedia:)?(?:[Ww]ikiProject[ _]+Military[ _]+history/Military[ _]+conflict[ _]+infobox))");
dIBAliases.Add(@"Olympic event", @"(?:Infobox[ _]+Olympic[ _]+event)");
dIBAliases.Add(@"university", @"(?:[Ii]nfobox[ _]+art[ _]+school|[Ii]nfobox[ _]+business[ _]+school|[Ii]nfobox[ _]+College|[Ii]nfobox[ _]+college|[Ii]nfobox[ _]+dental[ _]+school|[Ii]nfobox[ _]+faculty|[Ii]nfobox[ _]+graduate[ _]+school|[Ii]nfobox[ _]+institute[ _]+department|[Ii]nfobox[ _]+medical[ _]+college|[Ii]nfobox[ _]+music[ _]+school|[Ii]nfobox[ _]+seminary[ _]+school|[Ii]nfobox[ _]+university[ _]+faculty|[Ii]nfobox[ _]+University[ _]+School|[Ii]nfobox[ _]+university[ _]+school|[Ii]nfobox[ _]+university|[Ii]nfobox[ _]+University|(?:[Ww]ikipedia:)?UNI/BOX)");
// "image" parameter aliases
Dictionary<string, string> dImageAliases = new Dictionary<string, string>(); // parent infobox, regex aliases
dImageAliases.Add(@"football club", @"(image)");
dImageAliases.Add(@"military conflict", @"(image)");
dImageAliases.Add(@"Olympic event", @"(image)");
dImageAliases.Add(@"university", @"(image|image_name)");
// "image_size" parameter aliases
Dictionary<string, string> dImageSizeAliases = new Dictionary<string, string>(); // parent infobox, regex aliases
dImageSizeAliases.Add(@"football club", @"(?:image_size)");
dImageSizeAliases.Add(@"military conflict", @"(?:image_size)");
dImageSizeAliases.Add(@"Olympic event", @"(?:image_size)");
dImageSizeAliases.Add(@"university", @"(?:image_size)");
int iIBs = 0;
foreach (KeyValuePair<string, string> iba in dIBAliases)
{
string IBA_Regex = @"\{\{\s*" + iba.Value + @"\s*\|";
Match mIBA = Regex.Match(ArticleText, IBA_Regex, RegexOptions.IgnoreCase);
if (mIBA.Success)
{
Infobox = iba.Key;
Infobox_Regex = iba.Value;
Image_Regex = dImageAliases[iba.Key];
ImageSize_Regex = dImageSizeAliases[iba.Key];
iIBs += Regex.Matches(ArticleText, IBA_Regex, RegexOptions.IgnoreCase).Count;
}
}
// auto-skip conditions /////////////////////////////////////////////////////
if (iIBs != 1)
{
Summary = "Unexpected # of infoboxes: " + iIBs + ". ";
Skip = true;
}
if (!Skip)
{
// collapse non-IB templates
string NonInfobox_Regex = @"\{\{\s*(?!\s*" + Infobox_Regex + @"\s*\|)([^\{\}]*)\}\}";
ArticleText = Regex.Replace(ArticleText, NonInfobox_Regex, TempOpenBraces + @"$1" + TempCloseBraces, RegexOptions.IgnoreCase);
ArticleText = Regex.Replace(ArticleText, NonInfobox_Regex, TempOpenBraces + @"$1" + TempCloseBraces, RegexOptions.IgnoreCase); // again for nested templates
ArticleText = Regex.Replace(ArticleText, NonInfobox_Regex, TempOpenBraces + @"$1" + TempCloseBraces, RegexOptions.IgnoreCase); // again for nested templates
InfoboxText_Old = Regex.Match(ArticleText, @"\{\{\s*" + Infobox_Regex + @"\s*\|[^\{\}]+\}\}", RegexOptions.IgnoreCase).Value;
InfoboxText_New = InfoboxText_Old;
// search IB for aliases of "image_size"
bool IBHasImageSize = Regex.IsMatch(InfoboxText_Old, @"\|\s*" + ImageSize_Regex + @"\s*=", RegexOptions.IgnoreCase);
if (IBHasImageSize)
{
Summary = "IB already contains an image_size param. ";
Skip = true;
// TODO: accomodate null image_size params
}
}
// main /////////////////////////////////////////////////////////////////////
if (!Skip)
{
// determine whitespace#3 alignment
//
// image -> image_size image_name -> image_size
// 0 space -> 0 space |image= x space(s) -> x space(s) |image_name=
// |image_size= |image_size=
//
// 1 space -> 1 space |image = . .
// |image_size = . .
//
// 2 spaces -> 1 space |image = . .
// |image_size = . .
//
// 3 spaces -> 1 space |image = . .
// |image_size = . .
//
// 4 spaces -> 1 space |image = . .
// |image_size = . .
//
// 5 spaces -> 1 space |image = . .
// |image_size = . .
//
// 6 spaces -> 1 space |image = . .
// |image_size = . .
//
// 7+ spaces -> (7+)-5 |image = . .
// |image_size = . .
Match mWS3 = Regex.Match(InfoboxText_New, @"[\r\n]+[ ]*\|\s*" + Image_Regex + @"(\s*)=(?!\s*[\|\}])", RegexOptions.IgnoreCase); // 2 grps
string WS3 = mWS3.Groups[2].Value;
string ImageAlias = mWS3.Groups[1].Value;
if (ImageAlias == "image")
{
int WS3Len = WS3.Length;
if (WS3Len >= 0 && WS3Len <= 1) WS3 = WS3; // the trivial case, for explicitness
if (WS3Len >= 2 && WS3Len <= 6) WS3 = " ";
if (WS3Len >= 7) WS3 = new String(' ', (WS3Len - 5));
}
else if (ImageAlias == "image_name")
{
WS3 = WS3; // for explicitness
}
// [[Image:....jpg|#px]] -> ....jpg | image_size = #px
string ImageAndSize_Regex = @"([\r\n]+)([ ]*\|\s*)" + Image_Regex + @"(\s*)(=\s*)\[\[(?:Image|File):([^\r\n\[\]\|]+?\." + ImageTypes_Regex + @")\s*\|\s*(\d+[a-z][a-z])[\|\s]*\]\][ ]*(?=[\r\n]?\s*[\|\}])";
string ImageAndSize_Replace = @"$1$2$3$4$5$6" + "\n" + @"$2image_size" + WS3 + @"$5${7}";
InfoboxText_New = Regex.Replace(InfoboxText_New, ImageAndSize_Regex, ImageAndSize_Replace, RegexOptions.IgnoreCase);
// [[Image:....jpg]] -> ....jpg
string ImageOnly_Regex = @"([\r\n]+)([ ]*\|\s*)" + Image_Regex + @"(\s*=\s*)\[\[(?:Image|File):([^\r\n\[\]\|]+?\." + ImageTypes_Regex + @")[\|\s]*\]\][ ]*(?=[\r\n]?\s*[\|\}])";
string ImageOnly_Replace = @"$1$2$3$4$5";
InfoboxText_New = Regex.Replace(InfoboxText_New, ImageOnly_Regex, ImageOnly_Replace, RegexOptions.IgnoreCase);
if (string.IsNullOrEmpty(InfoboxText_New))
{
Summary = "Check IB for weird EoL pipes...";
Skip = true;
}
else
{
if (InfoboxText_Old != InfoboxText_New)
ArticleText = ArticleText.Replace(InfoboxText_Old, InfoboxText_New);
else
{
Summary = "No infobox changes made (almost though).";
Skip = true;
}
}
}
// reinstate non-IB templates ///////////////////////////////////////////////
ArticleText = ArticleText.Replace(TempOpenBraces, "{{");
ArticleText = ArticleText.Replace(TempCloseBraces, "}}");
return ArticleText;
}