annotate bin/multicode @ 10843:2d5ba2d44680

<Jafet> le/rn double dactyl//Curious spurious verse form, ostensibly catchy to hear but herculean to write. Sadly its bent on the sesquipedalian makes double dactyls pretentious and trite.
author HackBot
date Fri, 28 Apr 2017 02:49:16 +0000
parents c989a1669243
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
9075
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
1 #!/usr/bin/python
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
2
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
3
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
4 import os, glob, sys, unicodedata, locale, gzip, re, traceback, encodings
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
5 import urllib, webbrowser, textwrap
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
6
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
7 # bz2 was introduced in 2.3, we want this to work also with earlier versions
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
8 try:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
9 import bz2
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
10 except ImportError:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
11 bz2 = None
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
12
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
13 # for python3
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
14 try:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
15 unicode
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
16 except NameError:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
17 unicode = str
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
18
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
19 # 'any' and 'all' were introduced in python2.5
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
20 # dummy replacement for older versions
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
21 try:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
22 all
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
23 except NameError:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
24 all = lambda x: False
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
25
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
26 PY3 = sys.version_info[0] >= 3
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
27 if PY3:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
28 import subprocess as cmd
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
29
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
30 def is_ascii(s):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
31 "test is string s consists completely of ascii characters (python 3)"
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
32 try:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
33 s.encode('ascii')
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
34 except UnicodeEncodeError:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
35 return False
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
36 return True
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
37
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
38 def out(*args):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
39 "pring args, converting them to output charset"
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
40 for i in args:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
41 sys.stdout.flush()
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
42 sys.stdout.buffer.write(i.encode(options.iocharset, 'replace'))
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
43
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
44 # ord23 is used to convert elements of byte array in python3, which are integers
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
45 ord23 = lambda x: x
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
46
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
47 # unichr is not in python3
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
48 unichr = chr
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
49
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
50 else: # python2
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
51
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
52 # getoutput() and getstatusoutput() methods have
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
53 # been moved from commands to the subprocess module
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
54 # with Python >= 3.x
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
55 import commands as cmd
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
56
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
57 def is_ascii(s):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
58 "test is string s consists completely of ascii characters (python 2)"
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
59 try:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
60 unicode(s, 'ascii')
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
61 except UnicodeDecodeError:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
62 return False
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
63 return True
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
64
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
65 def out(*args):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
66 "pring args, converting them to output charset"
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
67 for i in args:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
68 sys.stdout.write(i.encode(options.iocharset, 'replace'))
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
69
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
70 ord23 = ord
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
71
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
72
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
73
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
74 from optparse import OptionParser
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
75
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
76 VERSION='0.9.7'
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
77
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
78
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
79 # list of terminals that support bidi
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
80 biditerms = ['mlterm']
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
81
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
82 try:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
83 locale.setlocale(locale.LC_ALL, '')
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
84 except locale.Error:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
85 pass
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
86
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
87 # guess terminal charset
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
88 try:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
89 iocharsetguess = locale.nl_langinfo(locale.CODESET) or "ascii"
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
90 except locale.Error:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
91 iocharsetguess = "ascii"
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
92
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
93 if os.environ.get('TERM') in biditerms and iocharsetguess.lower().startswith('utf'):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
94 LTR = u'\u202d' # left to right override
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
95 else:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
96 LTR = ''
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
97
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
98
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
99 colours = {
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
100 'none' : "",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
101 'default' : "\033[0m",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
102 'bold' : "\033[1m",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
103 'underline' : "\033[4m",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
104 'blink' : "\033[5m",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
105 'reverse' : "\033[7m",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
106 'concealed' : "\033[8m",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
107
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
108 'black' : "\033[30m",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
109 'red' : "\033[31m",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
110 'green' : "\033[32m",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
111 'yellow' : "\033[33m",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
112 'blue' : "\033[34m",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
113 'magenta' : "\033[35m",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
114 'cyan' : "\033[36m",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
115 'white' : "\033[37m",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
116
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
117 'on_black' : "\033[40m",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
118 'on_red' : "\033[41m",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
119 'on_green' : "\033[42m",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
120 'on_yellow' : "\033[43m",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
121 'on_blue' : "\033[44m",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
122 'on_magenta' : "\033[45m",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
123 'on_cyan' : "\033[46m",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
124 'on_white' : "\033[47m",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
125
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
126 'beep' : "\007",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
127 }
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
128
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
129
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
130 general_category = {
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
131 'Lu': 'Letter, Uppercase',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
132 'Ll': 'Letter, Lowercase',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
133 'Lt': 'Letter, Titlecase',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
134 'Lm': 'Letter, Modifier',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
135 'Lo': 'Letter, Other',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
136 'Mn': 'Mark, Non-Spacing',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
137 'Mc': 'Mark, Spacing Combining',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
138 'Me': 'Mark, Enclosing',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
139 'Nd': 'Number, Decimal Digit',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
140 'Nl': 'Number, Letter',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
141 'No': 'Number, Other',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
142 'Pc': 'Punctuation, Connector',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
143 'Pd': 'Punctuation, Dash',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
144 'Ps': 'Punctuation, Open',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
145 'Pe': 'Punctuation, Close',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
146 'Pi': 'Punctuation, Initial quote',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
147 'Pf': 'Punctuation, Final quote',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
148 'Po': 'Punctuation, Other',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
149 'Sm': 'Symbol, Math',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
150 'Sc': 'Symbol, Currency',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
151 'Sk': 'Symbol, Modifier',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
152 'So': 'Symbol, Other',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
153 'Zs': 'Separator, Space',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
154 'Zl': 'Separator, Line',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
155 'Zp': 'Separator, Paragraph',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
156 'Cc': 'Other, Control',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
157 'Cf': 'Other, Format',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
158 'Cs': 'Other, Surrogate',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
159 'Co': 'Other, Private Use',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
160 'Cn': 'Other, Not Assigned',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
161 }
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
162
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
163 bidi_category = {
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
164 'L' : 'Left-to-Right',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
165 'LRE' : 'Left-to-Right Embedding',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
166 'LRO' : 'Left-to-Right Override',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
167 'R' : 'Right-to-Left',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
168 'AL' : 'Right-to-Left Arabic',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
169 'RLE' : 'Right-to-Left Embedding',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
170 'RLO' : 'Right-to-Left Override',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
171 'PDF' : 'Pop Directional Format',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
172 'EN' : 'European Number',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
173 'ES' : 'European Number Separator',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
174 'ET' : 'European Number Terminator',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
175 'AN' : 'Arabic Number',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
176 'CS' : 'Common Number Separator',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
177 'NSM' : 'Non-Spacing Mark',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
178 'BN' : 'Boundary Neutral',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
179 'B' : 'Paragraph Separator',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
180 'S' : 'Segment Separator',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
181 'WS' : 'Whitespace',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
182 'ON' : 'Other Neutrals',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
183 }
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
184
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
185 comb_classes = {
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
186 0: 'Spacing, split, enclosing, reordrant, and Tibetan subjoined',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
187 1: 'Overlays and interior',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
188 7: 'Nuktas',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
189 8: 'Hiragana/Katakana voicing marks',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
190 9: 'Viramas',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
191 10: 'Start of fixed position classes',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
192 199: 'End of fixed position classes',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
193 200: 'Below left attached',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
194 202: 'Below attached',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
195 204: 'Below right attached',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
196 208: 'Left attached (reordrant around single base character)',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
197 210: 'Right attached',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
198 212: 'Above left attached',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
199 214: 'Above attached',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
200 216: 'Above right attached',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
201 218: 'Below left',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
202 220: 'Below',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
203 222: 'Below right',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
204 224: 'Left (reordrant around single base character)',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
205 226: 'Right',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
206 228: 'Above left',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
207 230: 'Above',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
208 232: 'Above right',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
209 233: 'Double below',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
210 234: 'Double above',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
211 240: 'Below (iota subscript)',
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
212 }
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
213
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
214
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
215
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
216 def get_unicode_properties(ch):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
217 properties = {}
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
218 if ch in linecache:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
219 fields = linecache[ch].strip().split(';')
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
220 proplist = ['codepoint', 'name', 'category', 'combining', 'bidi', 'decomposition', 'dummy', 'digit_value', 'numeric_value', 'mirrored', 'unicode1name', 'iso_comment', 'uppercase', 'lowercase', 'titlecase']
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
221 for i, prop in enumerate(proplist):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
222 if prop!='dummy':
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
223 properties[prop] = fields[i]
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
224
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
225 if properties['lowercase']:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
226 properties['lowercase'] = unichr(int(properties['lowercase'], 16))
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
227 if properties['uppercase']:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
228 properties['uppercase'] = unichr(int(properties['uppercase'], 16))
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
229 if properties['titlecase']:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
230 properties['titlecase'] = unichr(int(properties['titlecase'], 16))
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
231
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
232 properties['combining'] = int(properties['combining'])
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
233 properties['mirrored'] = properties['mirrored']=='Y'
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
234 else:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
235 properties['codepoint'] = '%04X' % ord(ch)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
236 properties['name'] = unicodedata.name(ch, '')
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
237 properties['category'] = unicodedata.category(ch)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
238 properties['combining'] = unicodedata.combining(ch)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
239 properties['bidi'] = unicodedata.bidirectional(ch)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
240 properties['decomposition'] = unicodedata.decomposition(ch)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
241 properties['digit_value'] = unicodedata.digit(ch, '')
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
242 properties['numeric_value'] = unicodedata.numeric(ch, '')
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
243 properties['mirrored'] = unicodedata.mirrored(ch)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
244 properties['unicode1name'] = ''
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
245 properties['iso_comment'] = ''
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
246 properties['uppercase'] = ch.upper()
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
247 properties['lowercase'] = ch.lower()
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
248 properties['titlecase'] = ''
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
249 return properties
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
250
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
251
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
252 def do_init():
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
253 HomeDir = os.path.expanduser('~/.unicode')
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
254 HomeUnicodeData = os.path.join(HomeDir, "UnicodeData.txt")
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
255 global UnicodeDataFileNames
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
256 UnicodeDataFileNames = [HomeUnicodeData, '/usr/share/unicode/UnicodeData.txt', '/usr/share/unidata/UnicodeData.txt', '/hackenv/share/UnicodeData.txt'] + \
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
257 glob.glob('/usr/share/unidata/UnicodeData*.txt') + \
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
258 glob.glob('/usr/share/perl/*/unicore/UnicodeData.txt') + \
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
259 glob.glob('/System/Library/Perl/*/unicore/UnicodeData.txt') # for MacOSX
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
260
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
261 HomeUnihanData = os.path.join(HomeDir, "Unihan*")
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
262 global UnihanDataGlobs
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
263 UnihanDataGlobs = [HomeUnihanData, '/usr/share/unidata/Unihan*', '/usr/share/unicode/Unihan*', './Unihan*']
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
264
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
265
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
266 def get_unihan_files():
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
267 fos = [] # list of file names for Unihan data file(s)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
268 for gl in UnihanDataGlobs:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
269 fnames = glob.glob(gl)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
270 fos += fnames
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
271 return fos
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
272
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
273 def get_unihan_properties_internal(ch):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
274 properties = {}
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
275 ch = ord(ch)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
276 global unihan_fs
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
277 for f in unihan_fs:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
278 fo = OpenGzip(f)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
279 for l in fo:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
280 if l.startswith('#'):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
281 continue
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
282 line = l.strip()
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
283 if not line:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
284 continue
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
285 char, key, value = line.strip().split('\t')
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
286 if int(char[2:], 16) == ch:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
287 properties[key] = unicode(value, 'utf-8')
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
288 elif int(char[2:], 16)>ch:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
289 break
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
290 return properties
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
291
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
292 def get_unihan_properties_zgrep(ch):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
293 properties = {}
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
294 global unihan_fs
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
295 ch = ord(ch)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
296 chs = 'U+%X' % ch
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
297 for f in unihan_fs:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
298 if f.endswith('.gz'):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
299 grepcmd = 'zgrep'
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
300 elif f.endswith('.bz2'):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
301 grepcmd = 'bzgrep'
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
302 else:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
303 grepcmd = 'grep'
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
304 cmdline = grepcmd+' ^'+chs+r'\\b '+f
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
305 status, output = cmd.getstatusoutput(cmdline)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
306 output = output.split('\n')
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
307 for l in output:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
308 if not l:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
309 continue
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
310 char, key, value = l.strip().split('\t')
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
311 if int(char[2:], 16) == ch:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
312 if PY3:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
313 properties[key] = value
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
314 else:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
315 properties[key] = unicode(value, 'utf-8')
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
316 elif int(char[2:], 16)>ch:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
317 break
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
318 return properties
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
319
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
320 # basic sanity check, if e.g. you run this on MS Windows...
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
321 if os.path.exists('/bin/grep'):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
322 get_unihan_properties = get_unihan_properties_zgrep
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
323 else:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
324 get_unihan_properties = get_unihan_properties_internal
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
325
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
326
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
327 def error(txt):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
328 out(txt)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
329 out('\n')
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
330 sys.exit(1)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
331
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
332 def get_gzip_filename(fname):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
333 "return fname, if it does not exist, return fname+.gz, if neither that, fname+bz2, if neither that, return None"
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
334 if os.path.exists(fname):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
335 return fname
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
336 if os.path.exists(fname+'.gz'):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
337 return fname+'.gz'
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
338 if os.path.exists(fname+'.bz2') and bz2 is not None:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
339 return fname+'.bz2'
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
340 return None
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
341
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
342
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
343 def OpenGzip(fname):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
344 "open fname, try fname.gz or fname.bz2 if fname does not exist, return file object or GzipFile or BZ2File object"
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
345 if os.path.exists(fname) and not (fname.endswith('.gz') or fname.endswith('.bz2')):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
346 return open(fname)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
347 if os.path.exists(fname+'.gz'):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
348 fname = fname+'.gz'
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
349 elif os.path.exists(fname+'.bz2') and bz2 is not None:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
350 fname = fname+'.bz2'
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
351 if fname.endswith('.gz'):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
352 return gzip.GzipFile(fname)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
353 elif fname.endswith('.bz2'):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
354 return bz2.BZ2File(fname)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
355 return None
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
356
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
357 def GrepInNames(pattern, fillcache=False):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
358 p = re.compile(pattern, re.I)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
359 f = None
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
360 for name in UnicodeDataFileNames:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
361 f = OpenGzip(name)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
362 if f != None:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
363 break
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
364 if not fillcache:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
365 if not f:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
366 out( """
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
367 Cannot find UnicodeData.txt, please place it into
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
368 /usr/share/unidata/UnicodeData.txt,
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
369 /usr/share/unicode/UnicodeData.txt, ~/.unicode/ or current
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
370 working directory (optionally you can gzip it).
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
371 Without the file, searching will be much slower.
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
372
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
373 """ )
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
374 for i in xrange(sys.maxunicode):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
375 try:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
376 name = unicodedata.name(unichr(i))
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
377 if re.search(p, name):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
378 yield myunichr(i)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
379 except ValueError:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
380 pass
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
381 else:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
382 for l in f:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
383 if re.search(p, l):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
384 r = myunichr(int(l.split(';')[0], 16))
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
385 linecache[r] = l
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
386 yield r
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
387 f.close()
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
388 else:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
389 if f:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
390 for l in f:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
391 if re.search(p, l):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
392 r = myunichr(int(l.split(';')[0], 16))
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
393 linecache[r] = l
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
394 f.close()
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
395
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
396
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
397 def valfromcp(n, cp=None):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
398 "if fromcp is defined, then the 'n' is considered to be from that codepage and is converted accordingly"
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
399 if cp:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
400 xh = '%x' %n
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
401 if len(xh) % 2: # pad hexadecimal representation with a zero
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
402 xh = '0'+xh
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
403 cps = ( [xh[i:i+2] for i in range(0,len(xh),2)] )
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
404 cps = ( chr(int(i, 16)) for i in cps)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
405 cps = ''.join(cps)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
406 """
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
407 if 0 <= n <= 255:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
408 s = chr(n)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
409 elif 256 <= n <= 65535:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
410 s = struct.pack('>H', n)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
411 elif 65536 <= n <= sys.maxint:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
412 s = struct.pack('>H', n)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
413 else: # bad character code, either negative or too big
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
414 raise ValueError("Bad character code %s" %n)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
415 print 'ee',`s`
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
416 n = unicode(s, cp)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
417 """
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
418 s = unicode(cps, cp)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
419 ns = [ord(x) for x in s]
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
420 return ns
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
421 else:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
422 return [n]
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
423
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
424 def myunichr(n):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
425 try:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
426 r = unichr(n)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
427 return r
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
428 except OverflowError:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
429 traceback.print_exc()
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
430 error("The codepoint is too big - it does not fit into an int.")
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
431 except ValueError:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
432 traceback.print_exc()
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
433 err = "The codepoint is too big."
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
434 if sys.maxunicode <= 0xffff:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
435 err += "\nPerhaps your python interpreter is not compiled with wide unicode characters."
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
436 error(err)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
437
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
438
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
439 def guesstype(arg):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
440 if not arg: # empty string
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
441 return 'empty string', arg
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
442 elif not is_ascii(arg):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
443 return 'string', arg
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
444 elif arg[:2]=='U+' or arg[:2]=='u+': # it is hexadecimal number
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
445 try:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
446 val = int(arg[2:], 16)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
447 if val>sys.maxunicode:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
448 return 'regexp', arg
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
449 else:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
450 return 'hexadecimal', arg[2:]
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
451 except ValueError:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
452 return 'regexp', arg
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
453 elif arg[0] in "Uu" and len(arg)>4:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
454 try:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
455 val = int(arg[1:], 16)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
456 if val>sys.maxunicode:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
457 return 'regexp', arg
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
458 else:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
459 return 'hexadecimal', arg
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
460 except ValueError:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
461 return 'regexp', arg
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
462 elif len(arg)>=4:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
463 if len(arg) in (8, 16, 24, 32):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
464 if all(x in '01' for x in arg):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
465 val = int(arg, 2)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
466 if val<=sys.maxunicode:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
467 return 'binary', arg
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
468 try:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
469 val = int(arg, 16)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
470 if val>sys.maxunicode:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
471 return 'regexp', arg
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
472 else:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
473 return 'hexadecimal', arg
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
474 except ValueError:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
475 return 'regexp', arg
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
476 else:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
477 return 'string', arg
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
478
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
479 def process(arglist, t, fromcp=None):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
480 # build a list of values, so that we can combine queries like
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
481 # LATIN ALPHA and search for LATIN.*ALPHA and not names that
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
482 # contain either LATIN or ALPHA
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
483 result = []
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
484 names_query = [] # reserved for queries in names - i.e. -r
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
485 for arg_i in arglist:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
486 if t==None:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
487 tp, arg = guesstype(arg_i)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
488 if tp == 'regexp':
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
489 # if the first argument is guessed to be a regexp, add
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
490 # all the following arguments to the regular expression -
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
491 # this is probably what you wanted, e.g.
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
492 # 'unicode cyrillic be' will now search for the 'cyrillic.*be' regular expression
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
493 t = 'regexp'
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
494 else:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
495 tp, arg = t, arg_i
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
496 if tp=='hexadecimal':
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
497 val = int(arg, 16)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
498 vals = valfromcp(val, fromcp)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
499 for val in vals:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
500 r = myunichr(val)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
501 list(GrepInNames('%04X'%val, fillcache=True)) # fill the table with character properties
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
502 result.append(r)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
503 elif tp=='decimal':
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
504 val = int(arg, 10)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
505 vals = valfromcp(val, fromcp)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
506 for val in vals:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
507 r = myunichr(val)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
508 list(GrepInNames('%04X'%val, fillcache=True)) # fill the table with character properties
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
509 result.append(r)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
510 elif tp=='octal':
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
511 val = int(arg, 8)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
512 vals = valfromcp(val, fromcp)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
513 for val in vals:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
514 r = myunichr(val)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
515 list(GrepInNames('%04X'%val, fillcache=True)) # fill the table with character properties
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
516 result.append(r)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
517 elif tp=='binary':
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
518 val = int(arg, 2)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
519 vals = valfromcp(val, fromcp)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
520 for val in vals:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
521 r = myunichr(val)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
522 list(GrepInNames('%04X'%val, fillcache=True)) # fill the table with character properties
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
523 result.append(r)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
524 elif tp=='regexp':
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
525 names_query.append(arg)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
526 elif tp=='string':
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
527 try:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
528 if PY3: # argv is automatically decoded into unicode, even padded with bogus character if it is not encodable
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
529 unirepr = arg
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
530 else:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
531 unirepr = unicode(arg, options.iocharset)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
532 except UnicodeDecodeError:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
533 error ("Sequence %s is not valid in charset '%s'." % (repr(arg), options.iocharset))
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
534 unilist = ['%04X'%ord(x) for x in unirepr]
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
535 unireg = '|'.join(unilist)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
536 list(GrepInNames(unireg, fillcache=True))
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
537 for r in unirepr:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
538 result.append(r)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
539 elif tp=='empty string':
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
540 pass # do not do anything for an empty string
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
541 if names_query:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
542 query = '.*'.join(names_query)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
543 for r in GrepInNames(query):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
544 result.append(r)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
545 return result
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
546
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
547 def maybe_colours(colour):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
548 if use_colour:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
549 return colours[colour]
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
550 else:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
551 return ""
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
552
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
553 # format key and value
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
554 def printkv(*l):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
555 for i in range(0, len(l), 2):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
556 if i<len(l)-2:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
557 sep = " "
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
558 else:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
559 sep = "\n"
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
560 k, v = l[i], l[i+1]
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
561 out(maybe_colours('green'))
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
562 out(k)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
563 out(": ")
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
564 out(maybe_colours('default'))
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
565 out(unicode(v))
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
566 out(sep)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
567
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
568 def print_characters(clist, maxcount, query_wiki=0):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
569 """query_wiki - 0 - don't
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
570 1 - spawn browser
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
571 """
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
572 counter = 0
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
573 for c in clist:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
574
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
575 if query_wiki:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
576 ch = urllib.quote(c.encode('utf-8')) # wikipedia uses UTF-8 in names
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
577 wiki_url = 'http://en.wikipedia.org/wiki/'+ch
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
578 webbrowser.open(wiki_url)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
579 query_wiki = 0 # query only the very first character
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
580
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
581
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
582 if maxcount:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
583 counter += 1
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
584 if counter > options.maxcount:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
585 out("\nToo many characters to display, more than %s, use --max option to change it\n" % options.maxcount)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
586 return
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
587 properties = get_unicode_properties(c)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
588 out(maybe_colours('bold'))
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
589 out('U+%04X '% ord(c))
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
590 if properties['name']:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
591 out(properties['name'])
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
592 else:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
593 out(maybe_colours('default'))
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
594 out(" - No such unicode character name in database")
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
595 out(maybe_colours('default'))
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
596 out('\n')
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
597
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
598 ar = ["UTF-8", ' '.join([("%02x" % ord23(x)) for x in c.encode('utf-8')]) ,
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
599 "UTF-16BE", ''.join([("%02x" % ord23(x)) for x in c.encode('utf-16be')]),
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
600 "Decimal", "&#%s;" % ord(c) ]
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
601 if options.addcharset:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
602 try:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
603 rep = ' '.join([("%02x" % ord(x)) for x in c.encode(options.addcharset)] )
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
604 except UnicodeError:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
605 rep = "NONE"
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
606 ar.extend( [options.addcharset, rep] )
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
607 printkv(*ar)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
608
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
609
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
610 if properties['combining']:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
611 pc = " "+c
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
612 else:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
613 pc = c
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
614 out(pc)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
615 uppercase = properties['uppercase']
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
616 lowercase = properties['lowercase']
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
617 if uppercase:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
618 out(" (%s)" % uppercase)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
619 out('\n')
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
620 printkv( "Uppercase", 'U+%04X'% ord(properties['uppercase']) )
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
621 elif lowercase:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
622 out(" (%s)" % properties['lowercase'])
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
623 out('\n')
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
624 printkv( "Lowercase", 'U+%04X'% ord(properties['lowercase']) )
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
625 else:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
626 out('\n')
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
627 printkv( 'Category', properties['category']+ " (%s)" % general_category[properties['category']] )
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
628
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
629 if properties['numeric_value']:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
630 printkv( 'Numeric value', properties['numeric_value'])
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
631 if properties['digit_value']:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
632 printkv( 'Digit value', properties['digit_value'])
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
633
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
634 bidi = properties['bidi']
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
635 if bidi:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
636 printkv( 'Bidi', bidi+ " (%s)" % bidi_category[bidi] )
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
637 mirrored = properties['mirrored']
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
638 if mirrored:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
639 out('Character is mirrored\n')
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
640 comb = properties['combining']
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
641 if comb:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
642 printkv( 'Combining', str(comb)+ " (%s)" % (comb_classes.get(comb, '?')) )
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
643 decomp = properties['decomposition']
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
644 if decomp:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
645 printkv( 'Decomposition', decomp )
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
646 if options.verbosity>0:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
647 uhp = get_unihan_properties(c)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
648 for key in uhp:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
649 printkv(key, uhp[key])
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
650 out('\n')
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
651
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
652
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
653 def print_block(block):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
654 #header
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
655 out(" "*10)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
656 for i in range(16):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
657 out(".%X " % i)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
658 out('\n')
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
659 #body
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
660 for i in range(block*16, block*16+16):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
661 hexi = "%X" % i
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
662 if len(hexi)>3:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
663 hexi = "%07X" % i
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
664 hexi = hexi[:4]+" "+hexi[4:]
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
665 else:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
666 hexi = " %03X" % i
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
667 out(LTR+hexi+". ")
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
668 for j in range(16):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
669 c = unichr(i*16+j)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
670 if unicodedata.combining(c):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
671 c = " "+c
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
672 out(c)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
673 out(' ')
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
674 out('\n')
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
675 out('\n')
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
676
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
677 def print_blocks(blocks):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
678 for block in blocks:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
679 print_block(block)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
680
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
681 def is_range(s, typ):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
682 sp = s.split('..')
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
683 if len(sp)!=2:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
684 return False
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
685 if not sp[1]:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
686 sp[1] = sp[0]
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
687 elif not sp[0]:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
688 sp[0] = sp[1]
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
689 if not sp[0]:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
690 return False
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
691 low = list(process([sp[0]], typ)) # intentionally no fromcp here, ranges are only of unicode characters
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
692 high = list(process([sp[1]], typ))
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
693 if len(low)!=1 or len(high)!=1:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
694 return False
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
695 low = ord(low[0])
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
696 high = ord(high[0])
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
697 low = low // 256
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
698 high = high // 256 + 1
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
699 return range(low, high)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
700
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
701
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
702
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
703 parser = OptionParser(usage="usage: %prog [options] arg")
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
704 parser.add_option("-x", "--hexadecimal",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
705 action="store_const", const='hexadecimal', dest="type",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
706 help="Assume arg to be hexadecimal number")
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
707 parser.add_option("-o", "--octal",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
708 action="store_const", const='octal', dest="type",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
709 help="Assume arg to be octal number")
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
710 parser.add_option("-b", "--binary",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
711 action="store_const", const='binary', dest="type",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
712 help="Assume arg to be binary number")
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
713 parser.add_option("-d", "--decimal",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
714 action="store_const", const='decimal', dest="type",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
715 help="Assume arg to be decimal number")
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
716 parser.add_option("-r", "--regexp",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
717 action="store_const", const='regexp', dest="type",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
718 help="Assume arg to be regular expression")
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
719 parser.add_option("-s", "--string",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
720 action="store_const", const='string', dest="type",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
721 help="Assume arg to be a sequence of characters")
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
722 parser.add_option("-a", "--auto",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
723 action="store_const", const=None, dest="type",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
724 help="Try to guess arg type (default)")
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
725 parser.add_option("-m", "--max",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
726 action="store", default=10, dest="maxcount", type="int",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
727 help="Maximal number of codepoints to display, default: 10; 0=unlimited")
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
728 parser.add_option("-i", "--io",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
729 action="store", default=iocharsetguess, dest="iocharset", type="string",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
730 help="I/O character set, I am guessing %s" % iocharsetguess)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
731 parser.add_option("--fcp", "--fromcp",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
732 action="store", default='', dest="fromcp", type="string",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
733 help="Convert numerical arguments from this encoding, default: no conversion")
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
734 parser.add_option("-c", "--charset-add",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
735 action="store", dest="addcharset", type="string",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
736 help="Show hexadecimal reprezentation in this additional charset")
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
737 parser.add_option("-C", "--colour",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
738 action="store", dest="use_colour", type="string",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
739 default="auto",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
740 help="Use colours, on, off or auto")
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
741 parser.add_option('', "--color",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
742 action="store", dest="use_colour", type="string",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
743 default="auto",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
744 help="synonym for --colour")
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
745 parser.add_option("-v", "--verbose",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
746 action="count", dest="verbosity",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
747 default=0,
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
748 help="Increase verbosity (reads Unihan properties - slow!)")
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
749 parser.add_option("-w", "--wikipedia",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
750 action="count", dest="query_wiki",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
751 default=0,
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
752 help="Query wikipedia for the character")
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
753 parser.add_option("--list",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
754 action="store_const", dest="list_all_encodings",
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
755 const=True,
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
756 help="List (approximately) all known encodings")
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
757
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
758
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
759 (options, arguments) = parser.parse_args()
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
760
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
761 linecache = {}
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
762 do_init()
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
763
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
764
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
765 if options.list_all_encodings:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
766 all_encodings = os.listdir(os.path.dirname(encodings.__file__))
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
767 all_encodings = set([os.path.splitext(x)[0] for x in all_encodings])
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
768 all_encodings = list(all_encodings)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
769 all_encodings.sort()
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
770 print (textwrap.fill(' '.join(all_encodings)))
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
771 sys.exit()
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
772
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
773 if len(arguments)==0:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
774 parser.print_help()
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
775 sys.exit()
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
776
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
777
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
778 if options.use_colour.lower() in ("on", "1", "true", "yes"):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
779 use_colour = True
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
780 elif options.use_colour.lower() in ("off", "0", "false", "no"):
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
781 use_colour = False
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
782 else:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
783 use_colour = sys.stdout.isatty()
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
784 if sys.platform == 'win32':
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
785 use_colour = False
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
786
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
787
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
788 l_args = [] # list of non range arguments to process
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
789 for argum in arguments:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
790 is_r = is_range(argum, options.type)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
791 if is_r:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
792 print_blocks(is_r)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
793 else:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
794 l_args.append(argum)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
795
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
796 if l_args:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
797 unihan_fs = []
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
798 if options.verbosity>0:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
799 unihan_fs = get_unihan_files() # list of file names for Unihan data file(s), empty if not available
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
800 if not unihan_fs:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
801 out( """
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
802 Unihan_*.txt files not found. In order to view Unihan properties,
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
803 please place the file into /usr/share/unidata/,
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
804 /usr/share/unicode/, ~/.unicode/
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
805 or current working directory (optionally you can gzip or bzip2 them).
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
806 You can get the files by unpacking ftp://ftp.unicode.org/Public/UNIDATA/Unihan.zip
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
807 Warning, listing UniHan Properties is rather slow.
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
808
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
809 """)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
810 options.verbosity = 0
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
811 try:
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
812 print_characters(process(l_args, options.type, options.fromcp), options.maxcount, options.query_wiki)
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
813 except IOError: # e.g. broken pipe
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
814 pass
c989a1669243 <fizzie> revert 58b9ee8f97a7
HackBot
parents:
diff changeset
815