Jump to content

User:User A1/svgTinker.py

From Wikipedia, the free encyclopedia
This is an old revision of this page, as edited by User A1 (talk | contribs) at 08:22, 19 July 2009 (modify more). The present address (URL) is a permanent link to this revision, which may differ significantly from the current revision.
#!/usr/bin/python


from BeautifulSoup import BeautifulStoneSoup, Tag
import string
import sys
import re

#Split all inkscape style=" " into known attributes for that tag
def splitInkscapeStyle(tag) :

	
	if not tag["style"] :
		return False;

	breakAttr= [ "font-face", "font-size", "font-family" ];

	d={}


	strStyle=tag["style"]
	
	print "style is" + str(strStyle)

	splitStyle=strStyle.split(";");

	for i in splitStyle :
		print "I is :  " + i
		if len(i) :
			splitter=i.rsplit(":")
			tag[splitter[0] ] = splitter[1];
				

	
	for t, val in d:
		tag[t] = val;
	

	del(tag['style'])

#By Peter Waller, BS: Replacing a tag with its contents, BeautifulSoup mailing list
def tagRemove(tag, tagname):
	# Locate the span tag's position
	origIndex = tag.parent.contents.index(tag)

	# For each element in tag.contents, insert it before this tag
	# Use a list here, otherwise the list will shrink as elements are
	# disconnected from 'tag' and inserted into tag.parent
	for i, content in enumerate(list(tag.contents)):
		tag.parent.insert(i+origIndex, content)

	# Excise the now empty span tag
	tag.extract() 


def epsilon():
	eps=float(1.0);

	while eps + 1.0 > 1.0 : 
		eps = eps//2;

	return eps;


def hasFontFace(tag):
	if not len(tag.string) :
		return False; 

	#Check for encoded font base64
	if( tag.string.find("@font-face") ) :
		return True;

	return False;

#Takes a stone-soup tag and applies various
#workaround fixes of dubious effectiveness
def fontFix(tag):

	bold=False;
	italic=False;
	dejavu=False;

	dejaVuRe=re.compile("'?(?i)dejavusans.*")
	boldRe=re.compile("(?i).*-bold.*");
	italicRe=re.compile("(?i).*-italic.*");
	fontRe=re.compile("(?i)-.*");

	for i in tag.attrs :
		if i[0] == "font-family" :
			#Check the font types and perform font substitution
			bold=boldRe.match(i[1]);
			italic=italicRe.match(i[1]);
			dejavu=dejaVuRe.match(i[1]);
			#Strip font bold/italic embed
			tmp = fontRe.split(i[1]);
			fontAttr=tmp[0];
			i = (i[0],fontAttr)
			break;



	#if none of the above apply we can skip
	if(not bold and not italic and not dejavu):
		return;

	str=""

	if (bold) :
		str=str+"bad bolding method ";
	if(italic):
		str=str+"bad italicising method ";
	if(dejavu):
		str=str+"wrong font name";

	print "Fixing tag : "  + str
	print tag
	#Otherwise we have work to do!

	haveWeight=False;
	haveStyle=False;

	for i in tag.attrs:
		#find any bold font-weight tag
		if i[0] == "font-weight":
			haveWeight=True;
			continue;
		if i[0] == "font-style":
			haveStyle=True;
			continue;


	#Check for bold
	if(bold):
		if(haveWeight):
			if not re.match(i[1],".*(?i)bold.*"):
				tag["font-weight"]+=";Bold";
		else:
			tag["font-weight"]="Bold";
			
		tag["font-family"]=re.sub("(?i)-Bold","",tag["font-family"])
	

	#Check for italics
	if(italic and haveStyle ):
		if not re.match(i[1],".*(?i)italic.*"):
			tag["font-style"]+=";Italic";
	else:
		if italic and not haveStyle:
			tag["font-style"]="Italic";

	#Fix dejavu vs Deja Vu
	if (dejavu):
		tag["font-family"]="DejaVu Sans";



#Check to see if a small font is being used in conjunction with 
def fontSizeFix(tag):

	#without a transformation there is nothing we can do
	if(not tag.has_key("transform") ) :
		return False;

	#Find the parent tag with the font-size parameter
	haveFontSize=False

	thisParent=tag;
	while not haveFontSize :
		if thisParent.has_key("font-size") :
			haveFontSize=True;
		else :
			haveFontSize=False;
			if thisParent.parent :
				thisParent=thisParent.parent;
			else :
				break

	#check to see that we found the correct parent tag
	if not haveFontSize:
		return False;
	else :
		parentTag=thisParent




	
	matrixRe=re.compile(".*(?i)matrix\(");
	scaleRe=re.compile(".*(?i)scale\(");	
	
	if matrixRe.match(tag["transform"]) :

		#grab the matrix
		trans=re.sub(".*(?i)matrix\(","",tag["transform"]);

		trans=re.sub("\)","",trans);

		#split the transformation matirx
		m = re.split("(\ |,)",trans);

		m=filter(lambda x: not (x=="" or x==" " or x==",") ,m);
	else:
		if scaleRe.match(tag["transform"]) :
			#grab the matrix components (11,22)
			trans=re.sub(".*(?i)scale\(","",tag["transform"]);

			trans=re.sub("\)","",trans);

			#split the transformation matirx
			m = re.split("(\ |,)",trans);
			m=filter(lambda x: not (x=="" or x==" " or x==",") ,m);

			assert(len(m) == 2 );
			#construct m as  a list in Mx+b form
			m = [ m[0] , "0" ,"0" ,m[1] ,"0", "0" ];

	#Transform should be of the form y=Mx+b
	print m
	assert(len(m) == 6 );

	mF=[];
	for i in m:
		mF.append(float(i));

	m=mF;


	print m
	EPSILON=0.001;
	if(abs(m[1]) < EPSILON and abs(m[2]) < EPSILON ):
		#OK, so M is a diagonal matrix
		print "so far so good"
		if abs(m[0]) > abs(m[3]) :
			factor=m[0];
		else:
			factor=m[3];


		if (factor > 1 ):
			#Pump up the font size by factor, then reduce the matrix
			fsStr=parentTag["font-size"];
			fsStr=fsStr.strip("px");
			
			fontSize =float(fsStr);
			parentTag["font-size"] = fontSize*factor;


		m[0] = m[0]/factor;
		m[3] = m[3]/factor;


	tag["transform"] = "matrix(" + str(m[0]) + " "  + str(m[1]) + " " + str(m[2]) + " "  + str(m[3]) + " "+ str(m[4]) + " "  + str(m[5]) + ")";

	return;	


#Crappy font substitution routine
def fontSub(tag):


	preferredFont = [];
	preferredFont.append((re.compile("(?i)'?Arial.*"),"DejaVu Sans")); 
	preferredFont.append((re.compile("(?i)'?Times new roman.*"),"Times")); 

	for i in tag.attrs :
		if i[0] ==  "font-family" :
			#Substitute fonts from our preferred font table
			for j in preferredFont:
				if j[0].match(i[1]):
					tag["font-family"]=j[1];
					break;
	return;



def main():

	if(not len(sys.argv) == 3):
		print "Usage: svgTinker.py inputFile outputFile"
		quit(1);

	f = open(sys.argv[1]);

	if not f :
		print "File does not exist or could not be read"
		quit(1)


	xmlText = f.read();

	soup=BeautifulStoneSoup(xmlText);


	#find all style="..." tags
	styleTags=soup.findAll(style=True);

	for i in styleTags:
		splitInkscapeStyle(i);

	tags=soup.findAll("text");

	#Correct all font tags
	for i in tags:

		fontFamilyTag=False;
		fontSizeTag=False;
		fontTrasnformTag=False;
		if(len(i.attrs)):
			for j in i.attrs :

				#Check to see what attrs this guy has
				if re.match("(?i)font-family",j[0]):
					fontFamilyTag=True;
					continue;

				if re.match("(?i)transform",j[0]):
					fontTransformTag=True;
					continue;

				if re.match("(?i)font-size",j[0]):
					fontSizeTag=True;


			if fontFamilyTag :
				fontFix(i);
				fontSub(i);
				continue;

			if fontTransformTag : 
				fontSizeFix(i);
			

	#Fonts can also be stored in g elements.
	tags=soup.findAll("g");
	for i in tags:
		fontTag=False;
		if(len(i.attrs)):
			for j in i.attrs :
				
				if re.match("(?i)font-family",j[0]):
					fontTag=True;
					break;

			if fontTag :
				fontFix(i);
				fontSub(i);
			


	tags=soup.findAll("tspan");
	
	#Nuke the tspans, preserving children	
	for i in tags:
		tagRemove(i,"tspans");
	

	tags=soup.findAll("style")

	#Find base64 encoded data and destroy it
	#FIXME: Not sure how to trick soup into inserting "" vs "<></>", so use <g></g> instead
	emptyTag = Tag(soup, "g")
	for i in tags:
		if hasFontFace(i):
			i.replaceWith(emptyTag);


	try:
		f=open(sys.argv[2],'w');
	except:
		print('Unable to open file for writing. aborting');
		quit(1);

	#prettify soup data
	soup.prettify()
	
	#save modified svg data
	f.write(str(soup));
	
	
	print("Wrote file : " + sys.argv[2]);	



if __name__ == "__main__":
	    main()