Jump to content

User:Tom.Reding/Category:Pages using deprecated image syntax

From Wikipedia, the free encyclopedia

Source

[edit]
public string ProcessArticle(string ArticleText, string ArticleTitle, int wikiNamespace, out string Summary, out bool Skip)
{
	// global switches //////////////////////////////////////////////////////////
	
	Skip = false;
	
	
	// globally-used vars ///////////////////////////////////////////////////////
	
	string Infobox = ""; // will contain the key of the infobox found on the page (i.e. "football club")
	string Infobox_Regex   = ""; // will contain regexes for {{infobox}} aliases
	string Image_Regex     = ""; // will contain regexes for "image" parameter aliases
	string ImageSize_Regex = ""; // will contain regexes for "image_size" parameter aliases
	string InfoboxText_Old = "";
	string InfoboxText_New = "";
	string TempOpenBraces  = @"$R3PL4C3_0P3N1N6_8R4C35";
	string TempCloseBraces = @"$R3PL4C3_CL051N6_8R4C35";
	string ImageTypes_Regex = @"(?:jpe?g|gif|bmp|png|svg|tiff?|xcf)";
	Summary = "";
	
	
	// dictionaries /////////////////////////////////////////////////////////////
	
	// {{infobox}} aliases
	Dictionary<string, string> dIBAliases = new Dictionary<string, string>(); // parent infobox, regex aliases copy+pasta'd from Rdcheck
	dIBAliases.Add(@"football club", @"(?:[Ff]ootball[ _]+club[ _]+infobox|[Ii]ndoor[ _]+Football[ _]+League[ _]+team|[Ii]nfobox[ _]+football[ _]+club|[Ii]nfobox[ _]+Football[ _]+Club|[Ii]nfobox[ _]+Football[ _]+club|[Ii]nfobox[ _]+football[ _]+team|[Mm]LS[ _]+Canada)");
	dIBAliases.Add(@"military conflict", @"(?:[Bb]attle|[Ii]nfobox[ _]+battle|[Ii]nfobox[ _]+Military[ _]+Conflict[ _]+\(3[ _]+sided\)|[Ii]nfobox[ _]+military[ _]+conflict[ _]+timeline|[Ii]nfobox[ _]+military[ _]+conflict|[Ii]nfobox[ _]+Military[ _]+Conflict|[Ii]nfobox[ _]+Militärischer[ _]+Konflikt|[Ii]nfobox[ _]+Siege|[Ii]nfobox[ _]+War|[Ii]nfobox[ _]+war|[Mm]ultiWarbox|[Ww]arboxNew|[Ww]arbox|(?:[Ww]ikipedia:)?(?:[Ww]ikiProject[ _]+Military[ _]+history/Military[ _]+conflict[ _]+infobox))");
	dIBAliases.Add(@"Olympic event", @"(?:Infobox[ _]+Olympic[ _]+event)");
	dIBAliases.Add(@"university", @"(?:[Ii]nfobox[ _]+art[ _]+school|[Ii]nfobox[ _]+business[ _]+school|[Ii]nfobox[ _]+College|[Ii]nfobox[ _]+college|[Ii]nfobox[ _]+dental[ _]+school|[Ii]nfobox[ _]+faculty|[Ii]nfobox[ _]+graduate[ _]+school|[Ii]nfobox[ _]+institute[ _]+department|[Ii]nfobox[ _]+medical[ _]+college|[Ii]nfobox[ _]+music[ _]+school|[Ii]nfobox[ _]+seminary[ _]+school|[Ii]nfobox[ _]+university[ _]+faculty|[Ii]nfobox[ _]+University[ _]+School|[Ii]nfobox[ _]+university[ _]+school|[Ii]nfobox[ _]+university|[Ii]nfobox[ _]+University|(?:[Ww]ikipedia:)?UNI/BOX)");
	
	// "image" parameter aliases
	Dictionary<string, string> dImageAliases = new Dictionary<string, string>(); // parent infobox, regex aliases
	dImageAliases.Add(@"football club",     @"(image)");
	dImageAliases.Add(@"military conflict", @"(image)");
	dImageAliases.Add(@"Olympic event",     @"(image)");
	dImageAliases.Add(@"university",        @"(image|image_name)");
	
	// "image_size" parameter aliases
	Dictionary<string, string> dImageSizeAliases = new Dictionary<string, string>(); // parent infobox, regex aliases
	dImageSizeAliases.Add(@"football club",     @"(?:image_size)");
	dImageSizeAliases.Add(@"military conflict", @"(?:image_size)");
	dImageSizeAliases.Add(@"Olympic event",     @"(?:image_size)");
	dImageSizeAliases.Add(@"university",        @"(?:image_size)");
	
	int iIBs = 0;
	foreach (KeyValuePair<string, string> iba in dIBAliases)
	{
		string IBA_Regex = @"\{\{\s*" + iba.Value + @"\s*\|";
		Match mIBA = Regex.Match(ArticleText, IBA_Regex, RegexOptions.IgnoreCase);
		if (mIBA.Success)
		{
			Infobox         = iba.Key;
			Infobox_Regex   = iba.Value;
			Image_Regex     = dImageAliases[iba.Key];
			ImageSize_Regex = dImageSizeAliases[iba.Key];
			iIBs += Regex.Matches(ArticleText, IBA_Regex, RegexOptions.IgnoreCase).Count;
		}
	}
	
	
	// auto-skip conditions /////////////////////////////////////////////////////
	
	if (iIBs != 1)
	{
		Summary = "Unexpected # of infoboxes: " + iIBs + ". ";
		Skip = true;
	}
	
	if (!Skip)
	{
		// collapse non-IB templates
		string NonInfobox_Regex = @"\{\{\s*(?!\s*" + Infobox_Regex + @"\s*\|)([^\{\}]*)\}\}";
		ArticleText = Regex.Replace(ArticleText, NonInfobox_Regex, TempOpenBraces + @"$1" + TempCloseBraces, RegexOptions.IgnoreCase);
		ArticleText = Regex.Replace(ArticleText, NonInfobox_Regex, TempOpenBraces + @"$1" + TempCloseBraces, RegexOptions.IgnoreCase); // again for nested templates
		ArticleText = Regex.Replace(ArticleText, NonInfobox_Regex, TempOpenBraces + @"$1" + TempCloseBraces, RegexOptions.IgnoreCase); // again for nested templates
		
		InfoboxText_Old = Regex.Match(ArticleText, @"\{\{\s*" + Infobox_Regex + @"\s*\|[^\{\}]+\}\}", RegexOptions.IgnoreCase).Value;
		InfoboxText_New = InfoboxText_Old;
		
		// search IB for aliases of "image_size"
		bool IBHasImageSize = Regex.IsMatch(InfoboxText_Old, @"\|\s*" + ImageSize_Regex + @"\s*=", RegexOptions.IgnoreCase);
		if (IBHasImageSize)
		{
			Summary = "IB already contains an image_size param. ";
			Skip = true;
			// TODO: accomodate null image_size params
		}
	}
	
	
	// main /////////////////////////////////////////////////////////////////////
	
	if (!Skip)
	{
		// determine whitespace#3 alignment
		// 
		//   image -> image_size                 image_name -> image_size
		// 0 space -> 0 space   |image=          x space(s) -> x space(s)    |image_name=
		//                      |image_size=                                 |image_size=
		// 
		// 1 space -> 1 space   |image =                     .                     .
		//                      |image_size =                .                     .
		// 
		// 2 spaces -> 1 space  |image  =                    .                     .
		//                      |image_size =                .                     .
		// 
		// 3 spaces -> 1 space  |image   =                   .                     .
		//                      |image_size =                .                     .
		// 
		// 4 spaces -> 1 space  |image    =                  .                     .
		//                      |image_size =                .                     .
		// 
		// 5 spaces -> 1 space  |image     =                 .                     .
		//                      |image_size =                .                     .
		// 
		// 6 spaces -> 1 space  |image      =                .                     .
		//                      |image_size =                .                     .
		// 
		// 7+ spaces -> (7+)-5  |image       =               .                     .
		//                      |image_size  =               .                     .
		Match mWS3 = Regex.Match(InfoboxText_New, @"[\r\n]+[ 	]*\|\s*" + Image_Regex + @"(\s*)=(?!\s*[\|\}])", RegexOptions.IgnoreCase); // 2 grps
		string WS3 = mWS3.Groups[2].Value;
		string ImageAlias = mWS3.Groups[1].Value;
		if (ImageAlias == "image")
		{
			int WS3Len = WS3.Length;
			if (WS3Len >= 0 && WS3Len <= 1) WS3 = WS3; // the trivial case, for explicitness
			if (WS3Len >= 2 && WS3Len <= 6) WS3 = " ";
			if (WS3Len >= 7) WS3 = new String(' ', (WS3Len - 5));
		}
		else if (ImageAlias == "image_name")
		{
			WS3 = WS3; // for explicitness
		}
		
		// [[Image:....jpg|#px]] -> ....jpg | image_size = #px
		string ImageAndSize_Regex   = @"([\r\n]+)([ 	]*\|\s*)" + Image_Regex + @"(\s*)(=\s*)\[\[(?:Image|File):([^\r\n\[\]\|]+?\." + ImageTypes_Regex + @")\s*\|\s*(\d+[a-z][a-z])[\|\s]*\]\][ 	]*(?=[\r\n]?\s*[\|\}])";
		string ImageAndSize_Replace = @"$1$2$3$4$5$6" + "\n" + @"$2image_size" + WS3 + @"$5${7}";
		InfoboxText_New = Regex.Replace(InfoboxText_New, ImageAndSize_Regex, ImageAndSize_Replace, RegexOptions.IgnoreCase);
		
		// [[Image:....jpg]] -> ....jpg
		string ImageOnly_Regex = @"([\r\n]+)([ 	]*\|\s*)" + Image_Regex + @"(\s*=\s*)\[\[(?:Image|File):([^\r\n\[\]\|]+?\." + ImageTypes_Regex + @")[\|\s]*\]\][ 	]*(?=[\r\n]?\s*[\|\}])";
		string ImageOnly_Replace = @"$1$2$3$4$5";
		InfoboxText_New = Regex.Replace(InfoboxText_New, ImageOnly_Regex, ImageOnly_Replace, RegexOptions.IgnoreCase);
		
		if (string.IsNullOrEmpty(InfoboxText_New))
		{
			Summary = "Check IB for weird EoL pipes...";
			Skip = true;
		}
		else
		{
			if (InfoboxText_Old != InfoboxText_New)
				ArticleText = ArticleText.Replace(InfoboxText_Old, InfoboxText_New);
			else
			{
				Summary = "No infobox changes made (almost though).";
				Skip = true;
			}
		}
	}
	
	
	// reinstate non-IB templates ///////////////////////////////////////////////
	ArticleText = ArticleText.Replace(TempOpenBraces,  "{{");
	ArticleText = ArticleText.Replace(TempCloseBraces, "}}");
	
	return ArticleText;
}