#!/usr/bin/env python
class CharSet:
def __init__(self, charset, hiragana, katakana):
self.name = charset
self.hiragana_prefix = chr(hiragana[0])
self.hiragana_range = hiragana[1]
self.katakana_prefix = chr(katakana[0])
self.katakana_range = katakana[1]
def getCharsetName(self):
return self.name
def show(self, prefix, char_range):
for c in char_range:
if 0 < c <= 0xff:
print prefix + chr(c),
else:
print prefix + chr(c >> 8) + chr(c & 0xff),
print
def show_hiragana(self):
self.show(self.hiragana_prefix, self.hiragana_range)
def show_katakana(self):
self.show(self.katakana_prefix, self.katakana_range)
EUCJP = CharSet('EUC-JP',
(0xa4, xrange(0xa1, 0xf4)),
(0xa5, xrange(0xa1, 0xf7)))
SJIS = CharSet('Shift_JIS',
(0x82, xrange(0x9f, 0xf2)),
(0x83, xrange(0x40, 0x97)))
UTF8 = CharSet('utf-8',
(0xe3, range(0x8181, 0x81bf) + range(0x8280, 0x8294)),
(0xe3, range(0x82a1, 0x82bf) + range(0x8380, 0x83b7)))
if __name__ == '__main__':
UTF8.show_hiragana()
UTF8.show_katakana()
syntax highlighted by Code2HTML, v. 0.9.1