跳转到内容

User:PhiLiP/ZhConversion.py

维基百科,自由的百科全书

这是本页的一个历史版本,由PhiLiP留言 | 贡献2008年6月23日 (一) 16:50 新页面,内容:<source lang="python"> # -*- coding: utf-8 -*- # file: ZhConversion.py import zipfile import codecs def getUnihan(unihan_fname='Unihan.zip'): unihanzipfile = z...)编辑。这可能和当前版本存在着巨大的差异。

(差异) ←上一修订 | 最后版本 (差异) | 下一修订→ (差异)
# -*- coding: utf-8  -*-
# file: ZhConversion.py

import zipfile
import codecs

def getUnihan(unihan_fname='Unihan.zip'):
	unihanzipfile = zipfile.ZipFile(unihan_fname, 'r')
	data = unihanzipfile.read('Unihan.txt')
	return data
	
def getCodeFromUnihan(variant):
	unihanfile = getUnihan(unihan_fname='Unihan.zip')
	elems = unihanfile.splitlines()
	to = {}
	sept = '\t' + variant + '\t'
	for elem in elems:
		left, sep, right = elem.partition(sept)
		if sep == sept:
			right = right.split()
			right = right[0]
			to[ucs4chr(int(left[2:],16))] = ucs4chr(int(right[2:],16))
	return to

def toHansCode():
	toHans = getCodeFromUnihan('kSimplifiedVariant')
	return toHans

def toHantCode():
	toHant = getCodeFromUnihan('kTraditionalVariant')
	return toHant

def saveFile(toHans, toHant):
	zhConversion = codecs.open('ZhConversion.php', 'w','utf-8')
	CString = u'<?php\n$zh2Hant = array(\n'
	for left, right in toHant.items():
		CString += '"' + left + '" => "' + right + '",\n'
	CString += u');\n\n$zh2Hans = array(\n'
	for left, right in toHans.items():
		CString += '"' + left + '" => "' + right + '",\n'
	CString += u');'
	zhConversion.write(CString)
	zhConversion.close()

def ucs4chr(codepoint):
	try:
		return unichr(codepoint)
	except ValueError:
		hi, lo = divmod (codepoint-0x10000, 0x400)
		return unichr(0xd800+hi) + unichr(0xdc00+lo)

def ucs4ord(str):
	if len(str)==1:
		return ord(str)
	if len(str)==2:
		hi, lo = ord(str[0])-0xd800, ord(str[1])-0xdc00
		return hi*0x400+0x10000
	raise TypeError("ucs4ord() expected a valid ucs4 character")

print(1)
toHant = toHantCode()
toHans = toHansCode()
CString = ''
print(2)
saveFile(toHans, toHant)
print(3)