annotate bin/multicode @ 12254:616be78bd12e draft

<oerjan> revert
author HackEso <hackeso@esolangs.org>
date Fri, 06 Dec 2019 07:54:58 +0000
parents c989a1669243
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
9075
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
1 #!/usr/bin/python
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
2
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
3
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
4 import os, glob, sys, unicodedata, locale, gzip, re, traceback, encodings
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
5 import urllib, webbrowser, textwrap
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
6
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
7 # bz2 was introduced in 2.3, we want this to work also with earlier versions
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
8 try:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
9 import bz2
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
10 except ImportError:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
11 bz2 = None
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
12
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
13 # for python3
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
14 try:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
15 unicode
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
16 except NameError:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
17 unicode = str
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
18
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
19 # 'any' and 'all' were introduced in python2.5
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
20 # dummy replacement for older versions
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
21 try:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
22 all
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
23 except NameError:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
24 all = lambda x: False
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
25
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
26 PY3 = sys.version_info[0] >= 3
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
27 if PY3:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
28 import subprocess as cmd
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
29
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
30 def is_ascii(s):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
31 "test is string s consists completely of ascii characters (python 3)"
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
32 try:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
33 s.encode('ascii')
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
34 except UnicodeEncodeError:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
35 return False
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
36 return True
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
37
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
38 def out(*args):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
39 "pring args, converting them to output charset"
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
40 for i in args:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
41 sys.stdout.flush()
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
42 sys.stdout.buffer.write(i.encode(options.iocharset, 'replace'))
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
43
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
44 # ord23 is used to convert elements of byte array in python3, which are integers
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
45 ord23 = lambda x: x
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
46
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
47 # unichr is not in python3
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
48 unichr = chr
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
49
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
50 else: # python2
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
51
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
52 # getoutput() and getstatusoutput() methods have
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
53 # been moved from commands to the subprocess module
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
54 # with Python >= 3.x
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
55 import commands as cmd
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
56
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
57 def is_ascii(s):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
58 "test is string s consists completely of ascii characters (python 2)"
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
59 try:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
60 unicode(s, 'ascii')
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
61 except UnicodeDecodeError:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
62 return False
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
63 return True
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
64
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
65 def out(*args):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
66 "pring args, converting them to output charset"
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
67 for i in args:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
68 sys.stdout.write(i.encode(options.iocharset, 'replace'))
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
69
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
70 ord23 = ord
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
71
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
72
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
73
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
74 from optparse import OptionParser
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
75
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
76 VERSION='0.9.7'
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
77
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
78
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
79 # list of terminals that support bidi
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
80 biditerms = ['mlterm']
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
81
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
82 try:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
83 locale.setlocale(locale.LC_ALL, '')
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
84 except locale.Error:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
85 pass
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
86
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
87 # guess terminal charset
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
88 try:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
89 iocharsetguess = locale.nl_langinfo(locale.CODESET) or "ascii"
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
90 except locale.Error:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
91 iocharsetguess = "ascii"
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
92
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
93 if os.environ.get('TERM') in biditerms and iocharsetguess.lower().startswith('utf'):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
94 LTR = u'\u202d' # left to right override
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
95 else:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
96 LTR = ''
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
97
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
98
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
99 colours = {
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
100 'none' : "",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
101 'default' : "\033[0m",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
102 'bold' : "\033[1m",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
103 'underline' : "\033[4m",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
104 'blink' : "\033[5m",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
105 'reverse' : "\033[7m",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
106 'concealed' : "\033[8m",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
107
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
108 'black' : "\033[30m",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
109 'red' : "\033[31m",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
110 'green' : "\033[32m",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
111 'yellow' : "\033[33m",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
112 'blue' : "\033[34m",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
113 'magenta' : "\033[35m",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
114 'cyan' : "\033[36m",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
115 'white' : "\033[37m",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
116
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
117 'on_black' : "\033[40m",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
118 'on_red' : "\033[41m",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
119 'on_green' : "\033[42m",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
120 'on_yellow' : "\033[43m",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
121 'on_blue' : "\033[44m",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
122 'on_magenta' : "\033[45m",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
123 'on_cyan' : "\033[46m",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
124 'on_white' : "\033[47m",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
125
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
126 'beep' : "\007",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
127 }
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
128
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
129
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
130 general_category = {
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
131 'Lu': 'Letter, Uppercase',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
132 'Ll': 'Letter, Lowercase',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
133 'Lt': 'Letter, Titlecase',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
134 'Lm': 'Letter, Modifier',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
135 'Lo': 'Letter, Other',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
136 'Mn': 'Mark, Non-Spacing',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
137 'Mc': 'Mark, Spacing Combining',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
138 'Me': 'Mark, Enclosing',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
139 'Nd': 'Number, Decimal Digit',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
140 'Nl': 'Number, Letter',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
141 'No': 'Number, Other',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
142 'Pc': 'Punctuation, Connector',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
143 'Pd': 'Punctuation, Dash',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
144 'Ps': 'Punctuation, Open',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
145 'Pe': 'Punctuation, Close',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
146 'Pi': 'Punctuation, Initial quote',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
147 'Pf': 'Punctuation, Final quote',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
148 'Po': 'Punctuation, Other',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
149 'Sm': 'Symbol, Math',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
150 'Sc': 'Symbol, Currency',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
151 'Sk': 'Symbol, Modifier',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
152 'So': 'Symbol, Other',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
153 'Zs': 'Separator, Space',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
154 'Zl': 'Separator, Line',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
155 'Zp': 'Separator, Paragraph',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
156 'Cc': 'Other, Control',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
157 'Cf': 'Other, Format',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
158 'Cs': 'Other, Surrogate',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
159 'Co': 'Other, Private Use',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
160 'Cn': 'Other, Not Assigned',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
161 }
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
162
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
163 bidi_category = {
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
164 'L' : 'Left-to-Right',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
165 'LRE' : 'Left-to-Right Embedding',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
166 'LRO' : 'Left-to-Right Override',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
167 'R' : 'Right-to-Left',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
168 'AL' : 'Right-to-Left Arabic',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
169 'RLE' : 'Right-to-Left Embedding',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
170 'RLO' : 'Right-to-Left Override',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
171 'PDF' : 'Pop Directional Format',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
172 'EN' : 'European Number',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
173 'ES' : 'European Number Separator',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
174 'ET' : 'European Number Terminator',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
175 'AN' : 'Arabic Number',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
176 'CS' : 'Common Number Separator',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
177 'NSM' : 'Non-Spacing Mark',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
178 'BN' : 'Boundary Neutral',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
179 'B' : 'Paragraph Separator',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
180 'S' : 'Segment Separator',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
181 'WS' : 'Whitespace',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
182 'ON' : 'Other Neutrals',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
183 }
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
184
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
185 comb_classes = {
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
186 0: 'Spacing, split, enclosing, reordrant, and Tibetan subjoined',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
187 1: 'Overlays and interior',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
188 7: 'Nuktas',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
189 8: 'Hiragana/Katakana voicing marks',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
190 9: 'Viramas',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
191 10: 'Start of fixed position classes',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
192 199: 'End of fixed position classes',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
193 200: 'Below left attached',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
194 202: 'Below attached',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
195 204: 'Below right attached',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
196 208: 'Left attached (reordrant around single base character)',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
197 210: 'Right attached',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
198 212: 'Above left attached',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
199 214: 'Above attached',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
200 216: 'Above right attached',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
201 218: 'Below left',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
202 220: 'Below',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
203 222: 'Below right',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
204 224: 'Left (reordrant around single base character)',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
205 226: 'Right',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
206 228: 'Above left',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
207 230: 'Above',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
208 232: 'Above right',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
209 233: 'Double below',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
210 234: 'Double above',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
211 240: 'Below (iota subscript)',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
212 }
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
213
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
214
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
215
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
216 def get_unicode_properties(ch):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
217 properties = {}
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
218 if ch in linecache:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
219 fields = linecache[ch].strip().split(';')
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
220 proplist = ['codepoint', 'name', 'category', 'combining', 'bidi', 'decomposition', 'dummy', 'digit_value', 'numeric_value', 'mirrored', 'unicode1name', 'iso_comment', 'uppercase', 'lowercase', 'titlecase']
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
221 for i, prop in enumerate(proplist):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
222 if prop!='dummy':
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
223 properties[prop] = fields[i]
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
224
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
225 if properties['lowercase']:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
226 properties['lowercase'] = unichr(int(properties['lowercase'], 16))
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
227 if properties['uppercase']:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
228 properties['uppercase'] = unichr(int(properties['uppercase'], 16))
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
229 if properties['titlecase']:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
230 properties['titlecase'] = unichr(int(properties['titlecase'], 16))
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
231
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
232 properties['combining'] = int(properties['combining'])
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
233 properties['mirrored'] = properties['mirrored']=='Y'
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
234 else:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
235 properties['codepoint'] = '%04X' % ord(ch)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
236 properties['name'] = unicodedata.name(ch, '')
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
237 properties['category'] = unicodedata.category(ch)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
238 properties['combining'] = unicodedata.combining(ch)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
239 properties['bidi'] = unicodedata.bidirectional(ch)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
240 properties['decomposition'] = unicodedata.decomposition(ch)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
241 properties['digit_value'] = unicodedata.digit(ch, '')
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
242 properties['numeric_value'] = unicodedata.numeric(ch, '')
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
243 properties['mirrored'] = unicodedata.mirrored(ch)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
244 properties['unicode1name'] = ''
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
245 properties['iso_comment'] = ''
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
246 properties['uppercase'] = ch.upper()
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
247 properties['lowercase'] = ch.lower()
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
248 properties['titlecase'] = ''
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
249 return properties
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
250
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
251
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
252 def do_init():
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
253 HomeDir = os.path.expanduser('~/.unicode')
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
254 HomeUnicodeData = os.path.join(HomeDir, "UnicodeData.txt")
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
255 global UnicodeDataFileNames
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
256 UnicodeDataFileNames = [HomeUnicodeData, '/usr/share/unicode/UnicodeData.txt', '/usr/share/unidata/UnicodeData.txt', '/hackenv/share/UnicodeData.txt'] + \
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
257 glob.glob('/usr/share/unidata/UnicodeData*.txt') + \
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
258 glob.glob('/usr/share/perl/*/unicore/UnicodeData.txt') + \
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
259 glob.glob('/System/Library/Perl/*/unicore/UnicodeData.txt') # for MacOSX
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
260
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
261 HomeUnihanData = os.path.join(HomeDir, "Unihan*")
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
262 global UnihanDataGlobs
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
263 UnihanDataGlobs = [HomeUnihanData, '/usr/share/unidata/Unihan*', '/usr/share/unicode/Unihan*', './Unihan*']
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
264
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
265
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
266 def get_unihan_files():
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
267 fos = [] # list of file names for Unihan data file(s)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
268 for gl in UnihanDataGlobs:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
269 fnames = glob.glob(gl)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
270 fos += fnames
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
271 return fos
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
272
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
273 def get_unihan_properties_internal(ch):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
274 properties = {}
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
275 ch = ord(ch)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
276 global unihan_fs
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
277 for f in unihan_fs:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
278 fo = OpenGzip(f)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
279 for l in fo:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
280 if l.startswith('#'):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
281 continue
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
282 line = l.strip()
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
283 if not line:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
284 continue
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
285 char, key, value = line.strip().split('\t')
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
286 if int(char[2:], 16) == ch:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
287 properties[key] = unicode(value, 'utf-8')
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
288 elif int(char[2:], 16)>ch:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
289 break
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
290 return properties
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
291
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
292 def get_unihan_properties_zgrep(ch):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
293 properties = {}
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
294 global unihan_fs
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
295 ch = ord(ch)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
296 chs = 'U+%X' % ch
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
297 for f in unihan_fs:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
298 if f.endswith('.gz'):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
299 grepcmd = 'zgrep'
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
300 elif f.endswith('.bz2'):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
301 grepcmd = 'bzgrep'
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
302 else:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
303 grepcmd = 'grep'
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
304 cmdline = grepcmd+' ^'+chs+r'\\b '+f
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
305 status, output = cmd.getstatusoutput(cmdline)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
306 output = output.split('\n')
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
307 for l in output:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
308 if not l:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
309 continue
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
310 char, key, value = l.strip().split('\t')
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
311 if int(char[2:], 16) == ch:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
312 if PY3:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
313 properties[key] = value
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
314 else:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
315 properties[key] = unicode(value, 'utf-8')
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
316 elif int(char[2:], 16)>ch:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
317 break
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
318 return properties
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
319
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
320 # basic sanity check, if e.g. you run this on MS Windows...
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
321 if os.path.exists('/bin/grep'):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
322 get_unihan_properties = get_unihan_properties_zgrep
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
323 else:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
324 get_unihan_properties = get_unihan_properties_internal
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
325
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
326
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
327 def error(txt):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
328 out(txt)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
329 out('\n')
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
330 sys.exit(1)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
331
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
332 def get_gzip_filename(fname):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
333 "return fname, if it does not exist, return fname+.gz, if neither that, fname+bz2, if neither that, return None"
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
334 if os.path.exists(fname):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
335 return fname
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
336 if os.path.exists(fname+'.gz'):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
337 return fname+'.gz'
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
338 if os.path.exists(fname+'.bz2') and bz2 is not None:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
339 return fname+'.bz2'
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
340 return None
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
341
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
342
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
343 def OpenGzip(fname):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
344 "open fname, try fname.gz or fname.bz2 if fname does not exist, return file object or GzipFile or BZ2File object"
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
345 if os.path.exists(fname) and not (fname.endswith('.gz') or fname.endswith('.bz2')):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
346 return open(fname)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
347 if os.path.exists(fname+'.gz'):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
348 fname = fname+'.gz'
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
349 elif os.path.exists(fname+'.bz2') and bz2 is not None:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
350 fname = fname+'.bz2'
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
351 if fname.endswith('.gz'):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
352 return gzip.GzipFile(fname)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
353 elif fname.endswith('.bz2'):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
354 return bz2.BZ2File(fname)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
355 return None
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
356
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
357 def GrepInNames(pattern, fillcache=False):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
358 p = re.compile(pattern, re.I)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
359 f = None
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
360 for name in UnicodeDataFileNames:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
361 f = OpenGzip(name)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
362 if f != None:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
363 break
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
364 if not fillcache:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
365 if not f:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
366 out( """
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
367 Cannot find UnicodeData.txt, please place it into
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
368 /usr/share/unidata/UnicodeData.txt,
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
369 /usr/share/unicode/UnicodeData.txt, ~/.unicode/ or current
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
370 working directory (optionally you can gzip it).
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
371 Without the file, searching will be much slower.
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
372
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
373 """ )
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
374 for i in xrange(sys.maxunicode):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
375 try:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
376 name = unicodedata.name(unichr(i))
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
377 if re.search(p, name):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
378 yield myunichr(i)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
379 except ValueError:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
380 pass
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
381 else:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
382 for l in f:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
383 if re.search(p, l):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
384 r = myunichr(int(l.split(';')[0], 16))
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
385 linecache[r] = l
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
386 yield r
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
387 f.close()
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
388 else:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
389 if f:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
390 for l in f:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
391 if re.search(p, l):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
392 r = myunichr(int(l.split(';')[0], 16))
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
393 linecache[r] = l
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
394 f.close()
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
395
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
396
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
397 def valfromcp(n, cp=None):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
398 "if fromcp is defined, then the 'n' is considered to be from that codepage and is converted accordingly"
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
399 if cp:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
400 xh = '%x' %n
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
401 if len(xh) % 2: # pad hexadecimal representation with a zero
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
402 xh = '0'+xh
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
403 cps = ( [xh[i:i+2] for i in range(0,len(xh),2)] )
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
404 cps = ( chr(int(i, 16)) for i in cps)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
405 cps = ''.join(cps)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
406 """
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
407 if 0 <= n <= 255:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
408 s = chr(n)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
409 elif 256 <= n <= 65535:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
410 s = struct.pack('>H', n)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
411 elif 65536 <= n <= sys.maxint:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
412 s = struct.pack('>H', n)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
413 else: # bad character code, either negative or too big
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
414 raise ValueError("Bad character code %s" %n)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
415 print 'ee',`s`
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
416 n = unicode(s, cp)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
417 """
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
418 s = unicode(cps, cp)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
419 ns = [ord(x) for x in s]
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
420 return ns
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
421 else:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
422 return [n]
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
423
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
424 def myunichr(n):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
425 try:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
426 r = unichr(n)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
427 return r
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
428 except OverflowError:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
429 traceback.print_exc()
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
430 error("The codepoint is too big - it does not fit into an int.")
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
431 except ValueError:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
432 traceback.print_exc()
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
433 err = "The codepoint is too big."
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
434 if sys.maxunicode <= 0xffff:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
435 err += "\nPerhaps your python interpreter is not compiled with wide unicode characters."
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
436 error(err)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
437
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
438
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
439 def guesstype(arg):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
440 if not arg: # empty string
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
441 return 'empty string', arg
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
442 elif not is_ascii(arg):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
443 return 'string', arg
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
444 elif arg[:2]=='U+' or arg[:2]=='u+': # it is hexadecimal number
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
445 try:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
446 val = int(arg[2:], 16)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
447 if val>sys.maxunicode:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
448 return 'regexp', arg
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
449 else:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
450 return 'hexadecimal', arg[2:]
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
451 except ValueError:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
452 return 'regexp', arg
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
453 elif arg[0] in "Uu" and len(arg)>4:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
454 try:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
455 val = int(arg[1:], 16)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
456 if val>sys.maxunicode:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
457 return 'regexp', arg
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
458 else:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
459 return 'hexadecimal', arg
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
460 except ValueError:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
461 return 'regexp', arg
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
462 elif len(arg)>=4:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
463 if len(arg) in (8, 16, 24, 32):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
464 if all(x in '01' for x in arg):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
465 val = int(arg, 2)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
466 if val<=sys.maxunicode:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
467 return 'binary', arg
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
468 try:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
469 val = int(arg, 16)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
470 if val>sys.maxunicode:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
471 return 'regexp', arg
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
472 else:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
473 return 'hexadecimal', arg
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
474 except ValueError:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
475 return 'regexp', arg
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
476 else:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
477 return 'string', arg
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
478
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
479 def process(arglist, t, fromcp=None):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
480 # build a list of values, so that we can combine queries like
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
481 # LATIN ALPHA and search for LATIN.*ALPHA and not names that
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
482 # contain either LATIN or ALPHA
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
483 result = []
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
484 names_query = [] # reserved for queries in names - i.e. -r
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
485 for arg_i in arglist:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
486 if t==None:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
487 tp, arg = guesstype(arg_i)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
488 if tp == 'regexp':
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
489 # if the first argument is guessed to be a regexp, add
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
490 # all the following arguments to the regular expression -
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
491 # this is probably what you wanted, e.g.
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
492 # 'unicode cyrillic be' will now search for the 'cyrillic.*be' regular expression
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
493 t = 'regexp'
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
494 else:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
495 tp, arg = t, arg_i
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
496 if tp=='hexadecimal':
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
497 val = int(arg, 16)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
498 vals = valfromcp(val, fromcp)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
499 for val in vals:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
500 r = myunichr(val)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
501 list(GrepInNames('%04X'%val, fillcache=True)) # fill the table with character properties
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
502 result.append(r)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
503 elif tp=='decimal':
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
504 val = int(arg, 10)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
505 vals = valfromcp(val, fromcp)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
506 for val in vals:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
507 r = myunichr(val)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
508 list(GrepInNames('%04X'%val, fillcache=True)) # fill the table with character properties
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
509 result.append(r)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
510 elif tp=='octal':
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
511 val = int(arg, 8)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
512 vals = valfromcp(val, fromcp)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
513 for val in vals:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
514 r = myunichr(val)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
515 list(GrepInNames('%04X'%val, fillcache=True)) # fill the table with character properties
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
516 result.append(r)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
517 elif tp=='binary':
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
518 val = int(arg, 2)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
519 vals = valfromcp(val, fromcp)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
520 for val in vals:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
521 r = myunichr(val)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
522 list(GrepInNames('%04X'%val, fillcache=True)) # fill the table with character properties
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
523 result.append(r)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
524 elif tp=='regexp':
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
525 names_query.append(arg)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
526 elif tp=='string':
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
527 try:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
528 if PY3: # argv is automatically decoded into unicode, even padded with bogus character if it is not encodable
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
529 unirepr = arg
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
530 else:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
531 unirepr = unicode(arg, options.iocharset)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
532 except UnicodeDecodeError:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
533 error ("Sequence %s is not valid in charset '%s'." % (repr(arg), options.iocharset))
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
534 unilist = ['%04X'%ord(x) for x in unirepr]
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
535 unireg = '|'.join(unilist)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
536 list(GrepInNames(unireg, fillcache=True))
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
537 for r in unirepr:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
538 result.append(r)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
539 elif tp=='empty string':
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
540 pass # do not do anything for an empty string
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
541 if names_query:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
542 query = '.*'.join(names_query)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
543 for r in GrepInNames(query):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
544 result.append(r)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
545 return result
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
546
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
547 def maybe_colours(colour):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
548 if use_colour:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
549 return colours[colour]
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
550 else:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
551 return ""
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
552
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
553 # format key and value
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
554 def printkv(*l):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
555 for i in range(0, len(l), 2):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
556 if i<len(l)-2:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
557 sep = " "
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
558 else:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
559 sep = "\n"
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
560 k, v = l[i], l[i+1]
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
561 out(maybe_colours('green'))
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
562 out(k)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
563 out(": ")
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
564 out(maybe_colours('default'))
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
565 out(unicode(v))
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
566 out(sep)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
567
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
568 def print_characters(clist, maxcount, query_wiki=0):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
569 """query_wiki - 0 - don't
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
570 1 - spawn browser
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
571 """
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
572 counter = 0
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
573 for c in clist:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
574
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
575 if query_wiki:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
576 ch = urllib.quote(c.encode('utf-8')) # wikipedia uses UTF-8 in names
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
577 wiki_url = 'http://en.wikipedia.org/wiki/'+ch
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
578 webbrowser.open(wiki_url)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
579 query_wiki = 0 # query only the very first character
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
580
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
581
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
582 if maxcount:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
583 counter += 1
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
584 if counter > options.maxcount:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
585 out("\nToo many characters to display, more than %s, use --max option to change it\n" % options.maxcount)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
586 return
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
587 properties = get_unicode_properties(c)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
588 out(maybe_colours('bold'))
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
589 out('U+%04X '% ord(c))
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
590 if properties['name']:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
591 out(properties['name'])
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
592 else:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
593 out(maybe_colours('default'))
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
594 out(" - No such unicode character name in database")
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
595 out(maybe_colours('default'))
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
596 out('\n')
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
597
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
598 ar = ["UTF-8", ' '.join([("%02x" % ord23(x)) for x in c.encode('utf-8')]) ,
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
599 "UTF-16BE", ''.join([("%02x" % ord23(x)) for x in c.encode('utf-16be')]),
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
600 "Decimal", "&#%s;" % ord(c) ]
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
601 if options.addcharset:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
602 try:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
603 rep = ' '.join([("%02x" % ord(x)) for x in c.encode(options.addcharset)] )
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
604 except UnicodeError:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
605 rep = "NONE"
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
606 ar.extend( [options.addcharset, rep] )
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
607 printkv(*ar)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
608
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
609
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
610 if properties['combining']:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
611 pc = " "+c
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
612 else:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
613 pc = c
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
614 out(pc)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
615 uppercase = properties['uppercase']
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
616 lowercase = properties['lowercase']
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
617 if uppercase:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
618 out(" (%s)" % uppercase)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
619 out('\n')
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
620 printkv( "Uppercase", 'U+%04X'% ord(properties['uppercase']) )
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
621 elif lowercase:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
622 out(" (%s)" % properties['lowercase'])
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
623 out('\n')
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
624 printkv( "Lowercase", 'U+%04X'% ord(properties['lowercase']) )
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
625 else:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
626 out('\n')
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
627 printkv( 'Category', properties['category']+ " (%s)" % general_category[properties['category']] )
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
628
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
629 if properties['numeric_value']:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
630 printkv( 'Numeric value', properties['numeric_value'])
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
631 if properties['digit_value']:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
632 printkv( 'Digit value', properties['digit_value'])
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
633
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
634 bidi = properties['bidi']
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
635 if bidi:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
636 printkv( 'Bidi', bidi+ " (%s)" % bidi_category[bidi] )
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
637 mirrored = properties['mirrored']
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
638 if mirrored:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
639 out('Character is mirrored\n')
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
640 comb = properties['combining']
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
641 if comb:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
642 printkv( 'Combining', str(comb)+ " (%s)" % (comb_classes.get(comb, '?')) )
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
643 decomp = properties['decomposition']
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
644 if decomp:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
645 printkv( 'Decomposition', decomp )
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
646 if options.verbosity>0:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
647 uhp = get_unihan_properties(c)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
648 for key in uhp:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
649 printkv(key, uhp[key])
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
650 out('\n')
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
651
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
652
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
653 def print_block(block):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
654 #header
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
655 out(" "*10)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
656 for i in range(16):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
657 out(".%X " % i)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
658 out('\n')
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
659 #body
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
660 for i in range(block*16, block*16+16):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
661 hexi = "%X" % i
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
662 if len(hexi)>3:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
663 hexi = "%07X" % i
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
664 hexi = hexi[:4]+" "+hexi[4:]
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
665 else:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
666 hexi = " %03X" % i
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
667 out(LTR+hexi+". ")
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
668 for j in range(16):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
669 c = unichr(i*16+j)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
670 if unicodedata.combining(c):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
671 c = " "+c
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
672 out(c)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
673 out(' ')
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
674 out('\n')
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
675 out('\n')
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
676
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
677 def print_blocks(blocks):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
678 for block in blocks:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
679 print_block(block)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
680
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
681 def is_range(s, typ):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
682 sp = s.split('..')
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
683 if len(sp)!=2:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
684 return False
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
685 if not sp[1]:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
686 sp[1] = sp[0]
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
687 elif not sp[0]:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
688 sp[0] = sp[1]
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
689 if not sp[0]:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
690 return False
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
691 low = list(process([sp[0]], typ)) # intentionally no fromcp here, ranges are only of unicode characters
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
692 high = list(process([sp[1]], typ))
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
693 if len(low)!=1 or len(high)!=1:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
694 return False
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
695 low = ord(low[0])
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
696 high = ord(high[0])
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
697 low = low // 256
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
698 high = high // 256 + 1
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
699 return range(low, high)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
700
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
701
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
702
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
703 parser = OptionParser(usage="usage: %prog [options] arg")
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
704 parser.add_option("-x", "--hexadecimal",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
705 action="store_const", const='hexadecimal', dest="type",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
706 help="Assume arg to be hexadecimal number")
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
707 parser.add_option("-o", "--octal",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
708 action="store_const", const='octal', dest="type",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
709 help="Assume arg to be octal number")
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
710 parser.add_option("-b", "--binary",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
711 action="store_const", const='binary', dest="type",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
712 help="Assume arg to be binary number")
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
713 parser.add_option("-d", "--decimal",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
714 action="store_const", const='decimal', dest="type",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
715 help="Assume arg to be decimal number")
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
716 parser.add_option("-r", "--regexp",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
717 action="store_const", const='regexp', dest="type",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
718 help="Assume arg to be regular expression")
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
719 parser.add_option("-s", "--string",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
720 action="store_const", const='string', dest="type",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
721 help="Assume arg to be a sequence of characters")
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
722 parser.add_option("-a", "--auto",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
723 action="store_const", const=None, dest="type",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
724 help="Try to guess arg type (default)")
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
725 parser.add_option("-m", "--max",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
726 action="store", default=10, dest="maxcount", type="int",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
727 help="Maximal number of codepoints to display, default: 10; 0=unlimited")
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
728 parser.add_option("-i", "--io",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
729 action="store", default=iocharsetguess, dest="iocharset", type="string",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
730 help="I/O character set, I am guessing %s" % iocharsetguess)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
731 parser.add_option("--fcp", "--fromcp",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
732 action="store", default='', dest="fromcp", type="string",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
733 help="Convert numerical arguments from this encoding, default: no conversion")
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
734 parser.add_option("-c", "--charset-add",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
735 action="store", dest="addcharset", type="string",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
736 help="Show hexadecimal reprezentation in this additional charset")
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
737 parser.add_option("-C", "--colour",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
738 action="store", dest="use_colour", type="string",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
739 default="auto",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
740 help="Use colours, on, off or auto")
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
741 parser.add_option('', "--color",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
742 action="store", dest="use_colour", type="string",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
743 default="auto",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
744 help="synonym for --colour")
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
745 parser.add_option("-v", "--verbose",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
746 action="count", dest="verbosity",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
747 default=0,
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
748 help="Increase verbosity (reads Unihan properties - slow!)")
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
749 parser.add_option("-w", "--wikipedia",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
750 action="count", dest="query_wiki",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
751 default=0,
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
752 help="Query wikipedia for the character")
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
753 parser.add_option("--list",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
754 action="store_const", dest="list_all_encodings",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
755 const=True,
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
756 help="List (approximately) all known encodings")
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
757
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
758
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
759 (options, arguments) = parser.parse_args()
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
760
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
761 linecache = {}
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
762 do_init()
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
763
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
764
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
765 if options.list_all_encodings:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
766 all_encodings = os.listdir(os.path.dirname(encodings.__file__))
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
767 all_encodings = set([os.path.splitext(x)[0] for x in all_encodings])
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
768 all_encodings = list(all_encodings)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
769 all_encodings.sort()
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
770 print (textwrap.fill(' '.join(all_encodings)))
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
771 sys.exit()
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
772
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
773 if len(arguments)==0:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
774 parser.print_help()
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
775 sys.exit()
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
776
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
777
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
778 if options.use_colour.lower() in ("on", "1", "true", "yes"):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
779 use_colour = True
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
780 elif options.use_colour.lower() in ("off", "0", "false", "no"):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
781 use_colour = False
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
782 else:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
783 use_colour = sys.stdout.isatty()
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
784 if sys.platform == 'win32':
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
785 use_colour = False
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
786
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
787
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
788 l_args = [] # list of non range arguments to process
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
789 for argum in arguments:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
790 is_r = is_range(argum, options.type)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
791 if is_r:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
792 print_blocks(is_r)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
793 else:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
794 l_args.append(argum)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
795
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
796 if l_args:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
797 unihan_fs = []
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
798 if options.verbosity>0:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
799 unihan_fs = get_unihan_files() # list of file names for Unihan data file(s), empty if not available
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
800 if not unihan_fs:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
801 out( """
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
802 Unihan_*.txt files not found. In order to view Unihan properties,
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
803 please place the file into /usr/share/unidata/,
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
804 /usr/share/unicode/, ~/.unicode/
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
805 or current working directory (optionally you can gzip or bzip2 them).
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
806 You can get the files by unpacking ftp://ftp.unicode.org/Public/UNIDATA/Unihan.zip
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
807 Warning, listing UniHan Properties is rather slow.
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
808
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
809 """)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
810 options.verbosity = 0
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
811 try:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
812 print_characters(process(l_args, options.type, options.fromcp), options.maxcount, options.query_wiki)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
813 except IOError: # e.g. broken pipe
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
814 pass
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
815