Jump to content

User:User A1/svgTinker.py

From Wikipedia, the free encyclopedia
The printable version is no longer supported and may have rendering errors. Please update your browser bookmarks and please use the default browser print function instead.
#!/usr/bin/python


from BeautifulSoup import BeautifulStoneSoup, Tag
import sys
import re

#Split all inkscape style=" " into known attributes for that tag
def splitInkscapeStyle(tag) :

	
	if not tag["style"] :
		return False

	breakAttr= [ "font-face", "font-size", "font-family" ]

	d={}


	strStyle=tag["style"]
	
	print "style is" + str(strStyle)

	splitStyle=strStyle.split(";")

	for i in splitStyle :
		print "I is :  " + i
		if i:
			splitter=i.rsplit(":")
			tag[splitter[0] ] = splitter[1]
				

	
	for t, val in d:
		tag[t] = val
	

	del tag['style']

#By Peter Waller, BS: Replacing a tag with its contents, BeautifulSoup mailing list
def tagRemove(tag, tagname):
	# Locate the span tag's position
	origIndex = tag.parent.contents.index(tag)

	# For each element in tag.contents, insert it before this tag
	# Use a list here, otherwise the list will shrink as elements are
	# disconnected from 'tag' and inserted into tag.parent
	for i, content in enumerate(tag.contents):
		tag.parent.insert(i+origIndex, content)

	# Excise the now empty span tag
	tag.extract() 


def epsilon():
	eps=1.0

	while eps + 1.0 > 1.0 : 
		eps //= 2

	return eps


def hasFontFace(tag):
	if not tag.string:
		return False

	#Check for encoded font base64
	return tag.string.find("@font-face")

#Takes a stone-soup tag and applies various
#workaround fixes of dubious effectiveness
def fontFix(tag):

	bold=False
	italic=False
	dejavu=False

	dejaVuRe=re.compile("'?(?i)dejavusans.*")
	boldRe=re.compile("(?i).*-bold.*")
	italicRe=re.compile("(?i).*-italic.*")
	fontRe=re.compile("(?i)-.*")

	for i in tag.attrs :
		if i[0] == "font-family" :
			#Check the font types and perform font substitution
			bold=boldRe.match(i[1])
			italic=italicRe.match(i[1])
			dejavu=dejaVuRe.match(i[1])
			#Strip font bold/italic embed
			tmp = fontRe.split(i[1])
			fontAttr=tmp[0]
			i = (i[0],fontAttr)
			break



	#if none of the above apply we can skip
	if not bold and not italic and not dejavu:
		return

	str=""

	if bold:
		str+="bad bolding method "
	if italic:
		str+="bad italicising method "
	if dejavu:
		str+="wrong font name"

	print "Fixing tag : "  + str
	print tag
	#Otherwise we have work to do!

	haveWeight=False
	haveStyle=False

	for i in tag.attrs:
		#find any bold font-weight tag
		if i[0] == "font-weight":
			haveWeight=True
			continue
		if i[0] == "font-style":
			haveStyle=True
			continue


	#Check for bold
	if bold:
		if haveWeight:
			if not re.match(i[1],".*(?i)bold.*"):
				tag["font-weight"]+=";Bold"
		else:
			tag["font-weight"]="Bold"
			
		tag["font-family"]=re.sub("(?i)-Bold","",tag["font-family"])
	

	#Check for italics
	if italic and haveStyle:
		if not re.match(i[1],".*(?i)italic.*"):
			tag["font-style"]+=";Italic"
	else:
		if italic and not haveStyle:
			tag["font-style"]="Italic"

	#Fix dejavu vs Deja Vu
	if dejavu:
		tag["font-family"]="DejaVu Sans"



#Check to see if a small font is being used in conjunction with 
def fontSizeFix(tag):

	#without a transformation there is nothing we can do
	if "transform" not in tag:
		return False

	#Find the parent tag with the font-size parameter
	haveFontSize=False

	thisParent=tag
	while not haveFontSize :
		if thisParent.has_key("font-size") :
			haveFontSize=True
		else :
			haveFontSize=False
			if thisParent.parent :
				thisParent=thisParent.parent
			else :
				break

	#check to see that we found the correct parent tag
	if not haveFontSize:
		return False
	else :
		parentTag=thisParent




	
	matrixRe=re.compile(".*(?i)matrix\(")
	scaleRe=re.compile(".*(?i)scale\(")
	
	if matrixRe.match(tag["transform"]) :

		#grab the matrix
		trans=re.sub(".*(?i)matrix\(","",tag["transform"])

		trans=re.sub("\)","",trans)

		#split the transformation matirx
		m = re.split("(\ |,)",trans)

		m=filter(lambda x: not (x=="" or x==" " or x==",") ,m)
	else:
		if scaleRe.match(tag["transform"]) :
			#grab the matrix components (11,22)
			trans=re.sub(".*(?i)scale\(","",tag["transform"])

			trans=re.sub("\)","",trans)

			#split the transformation matirx
			m = re.split("(\ |,)",trans)
			m=filter(lambda x: not (x=="" or x==" " or x==",") ,m)

			assert len(m) == 2
			#construct m as  a list in Mx+b form
			m = [ m[0] , "0" ,"0" ,m[1] ,"0", "0" ]

	#Transform should be of the form y=Mx+b
	print m
	assert len(m) == 6

	mF=[]
	for i in m:
		mF.append(float(i))

	m=mF


	print m
	EPSILON=0.001
	if abs(m[1]) < EPSILON and abs(m[2]) < EPSILON:
		#OK, so M is a diagonal matrix
		print "so far so good"
		if abs(m[0]) > abs(m[3]) :
			factor=m[0]
		else:
			factor=m[3]


		if factor > 1:
			#Pump up the font size by factor, then reduce the matrix
			fsStr=parentTag["font-size"]
			fsStr=fsStr.strip("px")
			
			fontSize =float(fsStr)
			parentTag["font-size"] = fontSize*factor


		m[0] = m[0]/factor
		m[3] = m[3]/factor


	tag["transform"] = "matrix(" + str(m[0]) + " "  + str(m[1]) + " " + str(m[2]) + " "  + str(m[3]) + " "+ str(m[4]) + " "  + str(m[5]) + ")"


#Crappy font substitution routine
def fontSub(tag):


	preferredFont = []
	preferredFont.append((re.compile("(?i)'?Arial.*"),"DejaVu Sans"))
	preferredFont.append((re.compile("(?i)'?Times new roman.*"),"Times"))

	for i in tag.attrs :
		if i[0] ==  "font-family" :
			#Substitute fonts from our preferred font table
			for j in preferredFont:
				if j[0].match(i[1])
					tag["font-family"]=j[1]
					break



def main():

	if len(sys.argv) != 3:
		print "Usage: svgTinker.py inputFile outputFile"
		quit(1)

	f = open(sys.argv[1])

	if not f :
		print "File does not exist or could not be read"
		quit(1)


	xmlText = f.read()

	soup=BeautifulStoneSoup(xmlText)


	#find all style="..." tags
	styleTags=soup.findAll(style=True)

	for i in styleTags:
		splitInkscapeStyle(i)

	tags=soup.findAll("text")

	#Correct all font tags
	for i in tags:

		fontFamilyTag=False
		fontSizeTag=False
		fontTrasnformTag=False
		if i.attrs:
			for j in i.attrs :

				#Check to see what attrs this guy has
				if re.match("(?i)font-family",j[0]):
					fontFamilyTag=True
					continue

				if re.match("(?i)transform",j[0]):
					fontTransformTag=True
					continue

				if re.match("(?i)font-size",j[0]):
					fontSizeTag=True


			if fontFamilyTag :
				fontFix(i)
				fontSub(i)
				continue

			if fontTransformTag : 
				fontSizeFix(i)
			

	#Fonts can also be stored in g elements.
	tags=soup.findAll("g")
	for i in tags:
		fontTag=False
		if i.attrs:
			for j in i.attrs :
				
				if re.match("(?i)font-family",j[0]):
					fontTag=True
					break

			if fontTag :
				fontFix(i)
				fontSub(i)
			


	tags=soup.findAll("tspan")
	
	#Nuke the tspans, preserving children	
	for i in tags:
		tagRemove(i,"tspans")
	

	tags=soup.findAll("style")

	#Find base64 encoded data and destroy it
	#FIXME: Not sure how to trick soup into inserting "" vs "<></>", so use <g></g> instead
	emptyTag = Tag(soup, "g")
	for i in tags:
		if hasFontFace(i):
			i.replaceWith(emptyTag)


	try:
		f=open(sys.argv[2],'w')
	except:
		print('Unable to open file for writing. aborting')
		quit(1)

	#prettify soup data
	soup.prettify()
	
	#save modified svg data
	f.write(str(soup))
	
	
	print("Wrote file : " + sys.argv[2])



if __name__ == "__main__":
	    main()