User:User A1/svgTinker.py
Appearance
#!/usr/bin/python
from BeautifulSoup import BeautifulStoneSoup, Tag
import string
import sys
import re
#Split all inkscape style=" " into known attributes for that tag
def splitInkscapeStyle(tag) :
if not tag["style"] :
return False;
breakAttr= [ "font-face", "font-size", "font-family" ];
d={}
strStyle=tag["style"]
print "style is" + str(strStyle)
splitStyle=strStyle.split(";");
for i in splitStyle :
print "I is : " + i
if len(i) :
splitter=i.rsplit(":")
tag[splitter[0] ] = splitter[1];
for t, val in d:
tag[t] = val;
del(tag['style'])
#By Peter Waller, BS: Replacing a tag with its contents, BeautifulSoup mailing list
def tagRemove(tag, tagname):
# Locate the span tag's position
origIndex = tag.parent.contents.index(tag)
# For each element in tag.contents, insert it before this tag
# Use a list here, otherwise the list will shrink as elements are
# disconnected from 'tag' and inserted into tag.parent
for i, content in enumerate(list(tag.contents)):
tag.parent.insert(i+origIndex, content)
# Excise the now empty span tag
tag.extract()
def epsilon():
eps=float(1.0);
while eps + 1.0 > 1.0 :
eps = eps//2;
return eps;
def hasFontFace(tag):
if not len(tag.string) :
return False;
#Check for encoded font base64
if( tag.string.find("@font-face") ) :
return True;
return False;
#Takes a stone-soup tag and applies various
#workaround fixes of dubious effectiveness
def fontFix(tag):
bold=False;
italic=False;
dejavu=False;
dejaVuRe=re.compile("'?(?i)dejavusans.*")
boldRe=re.compile("(?i).*-bold.*");
italicRe=re.compile("(?i).*-italic.*");
fontRe=re.compile("(?i)-.*");
for i in tag.attrs :
if i[0] == "font-family" :
#Check the font types and perform font substitution
bold=boldRe.match(i[1]);
italic=italicRe.match(i[1]);
dejavu=dejaVuRe.match(i[1]);
#Strip font bold/italic embed
tmp = fontRe.split(i[1]);
fontAttr=tmp[0];
i = (i[0],fontAttr)
break;
#if none of the above apply we can skip
if(not bold and not italic and not dejavu):
return;
str=""
if (bold) :
str=str+"bad bolding method ";
if(italic):
str=str+"bad italicising method ";
if(dejavu):
str=str+"wrong font name";
print "Fixing tag : " + str
print tag
#Otherwise we have work to do!
haveWeight=False;
haveStyle=False;
for i in tag.attrs:
#find any bold font-weight tag
if i[0] == "font-weight":
haveWeight=True;
continue;
if i[0] == "font-style":
haveStyle=True;
continue;
#Check for bold
if(bold):
if(haveWeight):
if not re.match(i[1],".*(?i)bold.*"):
tag["font-weight"]+=";Bold";
else:
tag["font-weight"]="Bold";
tag["font-family"]=re.sub("(?i)-Bold","",tag["font-family"])
#Check for italics
if(italic and haveStyle ):
if not re.match(i[1],".*(?i)italic.*"):
tag["font-style"]+=";Italic";
else:
if italic and not haveStyle:
tag["font-style"]="Italic";
#Fix dejavu vs Deja Vu
if (dejavu):
tag["font-family"]="DejaVu Sans";
#Check to see if a small font is being used in conjunction with
def fontSizeFix(tag):
#without a transformation there is nothing we can do
if(not tag.has_key("transform") ) :
return False;
#Find the parent tag with the font-size parameter
haveFontSize=False
thisParent=tag;
while not haveFontSize :
if thisParent.has_key("font-size") :
haveFontSize=True;
else :
haveFontSize=False;
if thisParent.parent :
thisParent=thisParent.parent;
else :
break
#check to see that we found the correct parent tag
if not haveFontSize:
return False;
else :
parentTag=thisParent
matrixRe=re.compile(".*(?i)matrix\(");
scaleRe=re.compile(".*(?i)scale\(");
if matrixRe.match(tag["transform"]) :
#grab the matrix
trans=re.sub(".*(?i)matrix\(","",tag["transform"]);
trans=re.sub("\)","",trans);
#split the transformation matirx
m = re.split("(\ |,)",trans);
m=filter(lambda x: not (x=="" or x==" " or x==",") ,m);
else:
if scaleRe.match(tag["transform"]) :
#grab the matrix components (11,22)
trans=re.sub(".*(?i)scale\(","",tag["transform"]);
trans=re.sub("\)","",trans);
#split the transformation matirx
m = re.split("(\ |,)",trans);
m=filter(lambda x: not (x=="" or x==" " or x==",") ,m);
assert(len(m) == 2 );
#construct m as a list in Mx+b form
m = [ m[0] , "0" ,"0" ,m[1] ,"0", "0" ];
#Transform should be of the form y=Mx+b
print m
assert(len(m) == 6 );
mF=[];
for i in m:
mF.append(float(i));
m=mF;
print m
EPSILON=0.001;
if(abs(m[1]) < EPSILON and abs(m[2]) < EPSILON ):
#OK, so M is a diagonal matrix
print "so far so good"
if abs(m[0]) > abs(m[3]) :
factor=m[0];
else:
factor=m[3];
if (factor > 1 ):
#Pump up the font size by factor, then reduce the matrix
fsStr=parentTag["font-size"];
fsStr=fsStr.strip("px");
fontSize =float(fsStr);
parentTag["font-size"] = fontSize*factor;
m[0] = m[0]/factor;
m[3] = m[3]/factor;
tag["transform"] = "matrix(" + str(m[0]) + " " + str(m[1]) + " " + str(m[2]) + " " + str(m[3]) + " "+ str(m[4]) + " " + str(m[5]) + ")";
return;
#Crappy font substitution routine
def fontSub(tag):
preferredFont = [];
preferredFont.append((re.compile("(?i)'?Arial.*"),"DejaVu Sans"));
preferredFont.append((re.compile("(?i)'?Times new roman.*"),"Times"));
for i in tag.attrs :
if i[0] == "font-family" :
#Substitute fonts from our preferred font table
for j in preferredFont:
if j[0].match(i[1]):
tag["font-family"]=j[1];
break;
return;
def main():
if(not len(sys.argv) == 3):
print "Usage: svgTinker.py inputFile outputFile"
quit(1);
f = open(sys.argv[1]);
if not f :
print "File does not exist or could not be read"
quit(1)
xmlText = f.read();
soup=BeautifulStoneSoup(xmlText);
#find all style="..." tags
styleTags=soup.findAll(style=True);
for i in styleTags:
splitInkscapeStyle(i);
tags=soup.findAll("text");
#Correct all font tags
for i in tags:
fontFamilyTag=False;
fontSizeTag=False;
fontTrasnformTag=False;
if(len(i.attrs)):
for j in i.attrs :
#Check to see what attrs this guy has
if re.match("(?i)font-family",j[0]):
fontFamilyTag=True;
continue;
if re.match("(?i)transform",j[0]):
fontTransformTag=True;
continue;
if re.match("(?i)font-size",j[0]):
fontSizeTag=True;
if fontFamilyTag :
fontFix(i);
fontSub(i);
continue;
if fontTransformTag :
fontSizeFix(i);
#Fonts can also be stored in g elements.
tags=soup.findAll("g");
for i in tags:
fontTag=False;
if(len(i.attrs)):
for j in i.attrs :
if re.match("(?i)font-family",j[0]):
fontTag=True;
break;
if fontTag :
fontFix(i);
fontSub(i);
tags=soup.findAll("tspan");
#Nuke the tspans, preserving children
for i in tags:
tagRemove(i,"tspans");
tags=soup.findAll("style")
#Find base64 encoded data and destroy it
#FIXME: Not sure how to trick soup into inserting "" vs "<></>", so use <g></g> instead
emptyTag = Tag(soup, "g")
for i in tags:
if hasFontFace(i):
i.replaceWith(emptyTag);
try:
f=open(sys.argv[2],'w');
except:
print('Unable to open file for writing. aborting');
quit(1);
#prettify soup data
soup.prettify()
#save modified svg data
f.write(str(soup));
print("Wrote file : " + sys.argv[2]);
if __name__ == "__main__":
main()