User:PhiLiP/ZhConversion.py
外观
# -*- coding: utf-8 -*-
# file: ZhConversion.py
import zipfile
import codecs
def getUnihan(unihan_fname='Unihan.zip'):
unihanzipfile = zipfile.ZipFile(unihan_fname, 'r')
data = unihanzipfile.read('Unihan.txt')
return data
def getCodeFromUnihan(variant):
unihanfile = getUnihan(unihan_fname='Unihan.zip')
elems = unihanfile.splitlines()
to = {}
sept = '\t' + variant + '\t'
for elem in elems:
left, sep, right = elem.partition(sept)
if sep == sept:
right = right.split()
right = right[0]
to[ucs4chr(int(left[2:],16))] = ucs4chr(int(right[2:],16))
return to
def toHansCode():
toHans = getCodeFromUnihan('kSimplifiedVariant')
return toHans
def toHantCode():
toHant = getCodeFromUnihan('kTraditionalVariant')
return toHant
def saveFile(toHans, toHant):
zhConversion = codecs.open('ZhConversion.php', 'w','utf-8')
CString = u'<?php\n$zh2Hant = array(\n'
for left, right in toHant.items():
CString += '"' + left + '" => "' + right + '",\n'
CString += u');\n\n$zh2Hans = array(\n'
for left, right in toHans.items():
CString += '"' + left + '" => "' + right + '",\n'
CString += u');'
zhConversion.write(CString)
zhConversion.close()
def ucs4chr(codepoint):
try:
return unichr(codepoint)
except ValueError:
hi, lo = divmod (codepoint-0x10000, 0x400)
return unichr(0xd800+hi) + unichr(0xdc00+lo)
def ucs4ord(str):
if len(str)==1:
return ord(str)
if len(str)==2:
hi, lo = ord(str[0])-0xd800, ord(str[1])-0xdc00
return hi*0x400+0x10000
raise TypeError("ucs4ord() expected a valid ucs4 character")
print(1)
toHant = toHantCode()
toHans = toHansCode()
CString = ''
print(2)
saveFile(toHans, toHant)
print(3)