/repo: bin/multicode comparison

comparison bin/multicode @ 9075:c989a1669243

<fizzie> revert 58b9ee8f97a7

author	HackBot
date	Sun, 25 Sep 2016 20:31:46 +0000
parents
children

comparison

equal deleted inserted replaced

-:560a73f4f0a4
+:c989a1669243
+#!/usr/bin/python
+import os, glob, sys, unicodedata, locale, gzip, re, traceback, encodings
+import urllib, webbrowser, textwrap
+# bz2 was introduced in 2.3, we want this to work also with earlier versions
+try:
+import bz2
+except ImportError:
+bz2 = None
+# for python3
+try:
+unicode
+except NameError:
+unicode = str
+# 'any' and 'all' were introduced in python2.5
+# dummy replacement for older versions
+try:
+all
+except NameError:
+all = lambda x: False
+PY3 = sys.version_info[0] >= 3
+if PY3:
+import subprocess as cmd
+def is_ascii(s):
+"test is string s consists completely of ascii characters (python 3)"
+try:
+s.encode('ascii')
+except UnicodeEncodeError:
+return False
+return True
+def out(*args):
+"pring args, converting them to output charset"
+for i in args:
+sys.stdout.flush()
+sys.stdout.buffer.write(i.encode(options.iocharset, 'replace'))
+# ord23 is used to convert elements of byte array in python3, which are integers
+ord23 = lambda x: x
+# unichr is not in python3
+unichr = chr
+else: # python2
+# getoutput() and getstatusoutput() methods have
+# been moved from commands to the subprocess module
+# with Python >= 3.x
+import commands as cmd
+def is_ascii(s):
+"test is string s consists completely of ascii characters (python 2)"
+try:
+unicode(s, 'ascii')
+except UnicodeDecodeError:
+return False
+return True
+def out(*args):
+"pring args, converting them to output charset"
+for i in args:
+sys.stdout.write(i.encode(options.iocharset, 'replace'))
+ord23 = ord
+from optparse import OptionParser
+VERSION='0.9.7'
+# list of terminals that support bidi
+biditerms = ['mlterm']
+try:
+locale.setlocale(locale.LC_ALL, '')
+except locale.Error:
+pass
+# guess terminal charset
+try:
+iocharsetguess = locale.nl_langinfo(locale.CODESET) or "ascii"
+except locale.Error:
+iocharsetguess = "ascii"
+if os.environ.get('TERM') in biditerms and iocharsetguess.lower().startswith('utf'):
+LTR = u'\u202d' # left to right override
+else:
+LTR = ''
+colours = {
+'none'       :    "",
+'default'    :    "\033[0m",
+'bold'       :    "\033[1m",
+'underline'  :    "\033[4m",
+'blink'      :    "\033[5m",
+'reverse'    :    "\033[7m",
+'concealed'  :    "\033[8m",
+'black'      :    "\033[30m",
+'red'        :    "\033[31m",
+'green'      :    "\033[32m",
+'yellow'     :    "\033[33m",
+'blue'       :    "\033[34m",
+'magenta'    :    "\033[35m",
+'cyan'       :    "\033[36m",
+'white'      :    "\033[37m",
+'on_black'   :    "\033[40m",
+'on_red'     :    "\033[41m",
+'on_green'   :    "\033[42m",
+'on_yellow'  :    "\033[43m",
+'on_blue'    :    "\033[44m",
+'on_magenta' :    "\033[45m",
+'on_cyan'    :    "\033[46m",
+'on_white'   :    "\033[47m",
+'beep'       :    "\007",
+}
+general_category = {
+'Lu':  'Letter, Uppercase',
+'Ll':  'Letter, Lowercase',
+'Lt':  'Letter, Titlecase',
+'Lm':  'Letter, Modifier',
+'Lo':  'Letter, Other',
+'Mn':  'Mark, Non-Spacing',
+'Mc':  'Mark, Spacing Combining',
+'Me':  'Mark, Enclosing',
+'Nd':  'Number, Decimal Digit',
+'Nl':  'Number, Letter',
+'No':  'Number, Other',
+'Pc':  'Punctuation, Connector',
+'Pd':  'Punctuation, Dash',
+'Ps':  'Punctuation, Open',
+'Pe':  'Punctuation, Close',
+'Pi':  'Punctuation, Initial quote',
+'Pf':  'Punctuation, Final quote',
+'Po':  'Punctuation, Other',
+'Sm':  'Symbol, Math',
+'Sc':  'Symbol, Currency',
+'Sk':  'Symbol, Modifier',
+'So':  'Symbol, Other',
+'Zs':  'Separator, Space',
+'Zl':  'Separator, Line',
+'Zp':  'Separator, Paragraph',
+'Cc':  'Other, Control',
+'Cf':  'Other, Format',
+'Cs':  'Other, Surrogate',
+'Co':  'Other, Private Use',
+'Cn':  'Other, Not Assigned',
+}
+bidi_category = {
+'L'   : 'Left-to-Right',
+'LRE' : 'Left-to-Right Embedding',
+'LRO' : 'Left-to-Right Override',
+'R'   : 'Right-to-Left',
+'AL'  : 'Right-to-Left Arabic',
+'RLE' : 'Right-to-Left Embedding',
+'RLO' : 'Right-to-Left Override',
+'PDF' : 'Pop Directional Format',
+'EN'  : 'European Number',
+'ES'  : 'European Number Separator',
+'ET'  : 'European Number Terminator',
+'AN'  : 'Arabic Number',
+'CS'  : 'Common Number Separator',
+'NSM' : 'Non-Spacing Mark',
+'BN'  : 'Boundary Neutral',
+'B'   : 'Paragraph Separator',
+'S'   : 'Segment Separator',
+'WS'  : 'Whitespace',
+'ON'  : 'Other Neutrals',
+}
+comb_classes = {
+0: 'Spacing, split, enclosing, reordrant, and Tibetan subjoined',
+1: 'Overlays and interior',
+7: 'Nuktas',
+8: 'Hiragana/Katakana voicing marks',
+9: 'Viramas',
+10: 'Start of fixed position classes',
+199: 'End of fixed position classes',
+200: 'Below left attached',
+202: 'Below attached',
+204: 'Below right attached',
+208: 'Left attached (reordrant around single base character)',
+210: 'Right attached',
+212: 'Above left attached',
+214: 'Above attached',
+216: 'Above right attached',
+218: 'Below left',
+220: 'Below',
+222: 'Below right',
+224: 'Left (reordrant around single base character)',
+226: 'Right',
+228: 'Above left',
+230: 'Above',
+232: 'Above right',
+233: 'Double below',
+234: 'Double above',
+240: 'Below (iota subscript)',
+}
+def get_unicode_properties(ch):
+properties = {}
+if ch in linecache:
+fields = linecache[ch].strip().split(';')
+proplist = ['codepoint', 'name', 'category', 'combining', 'bidi', 'decomposition', 'dummy', 'digit_value', 'numeric_value', 'mirrored', 'unicode1name', 'iso_comment', 'uppercase', 'lowercase', 'titlecase']
+for i, prop in enumerate(proplist):
+if prop!='dummy':
+properties[prop] = fields[i]
+if properties['lowercase']:
+properties['lowercase'] = unichr(int(properties['lowercase'], 16))
+if properties['uppercase']:
+properties['uppercase'] = unichr(int(properties['uppercase'], 16))
+if properties['titlecase']:
+properties['titlecase'] = unichr(int(properties['titlecase'], 16))
+properties['combining'] = int(properties['combining'])
+properties['mirrored'] = properties['mirrored']=='Y'
+else:
+properties['codepoint'] = '%04X' % ord(ch)
+properties['name'] = unicodedata.name(ch, '')
+properties['category'] = unicodedata.category(ch)
+properties['combining'] = unicodedata.combining(ch)
+properties['bidi'] = unicodedata.bidirectional(ch)
+properties['decomposition'] = unicodedata.decomposition(ch)
+properties['digit_value'] = unicodedata.digit(ch, '')
+properties['numeric_value'] = unicodedata.numeric(ch, '')
+properties['mirrored'] = unicodedata.mirrored(ch)
+properties['unicode1name'] = ''
+properties['iso_comment'] = ''
+properties['uppercase'] = ch.upper()
+properties['lowercase'] = ch.lower()
+properties['titlecase'] = ''
+return properties
+def do_init():
+HomeDir = os.path.expanduser('~/.unicode')
+HomeUnicodeData = os.path.join(HomeDir, "UnicodeData.txt")
+global UnicodeDataFileNames
+UnicodeDataFileNames = [HomeUnicodeData, '/usr/share/unicode/UnicodeData.txt', '/usr/share/unidata/UnicodeData.txt', '/hackenv/share/UnicodeData.txt'] + \
+glob.glob('/usr/share/unidata/UnicodeData*.txt') + \
+glob.glob('/usr/share/perl/*/unicore/UnicodeData.txt') + \
+glob.glob('/System/Library/Perl/*/unicore/UnicodeData.txt') # for MacOSX
+HomeUnihanData = os.path.join(HomeDir, "Unihan*")
+global UnihanDataGlobs
+UnihanDataGlobs = [HomeUnihanData, '/usr/share/unidata/Unihan*', '/usr/share/unicode/Unihan*', './Unihan*']
+def get_unihan_files():
+fos = [] # list of file names for Unihan data file(s)
+for gl in UnihanDataGlobs:
+fnames = glob.glob(gl)
+fos += fnames
+return fos
+def get_unihan_properties_internal(ch):
+properties = {}
+ch = ord(ch)
+global unihan_fs
+for f in unihan_fs:
+fo = OpenGzip(f)
+for l in fo:
+if l.startswith('#'):
+continue
+line = l.strip()
+if not line:
+continue
+char, key, value = line.strip().split('\t')
+if int(char[2:], 16) == ch:
+properties[key] = unicode(value, 'utf-8')
+elif int(char[2:], 16)>ch:
+break
+return properties
+def get_unihan_properties_zgrep(ch):
+properties = {}
+global unihan_fs
+ch = ord(ch)
+chs = 'U+%X' % ch
+for f in unihan_fs:
+if f.endswith('.gz'):
+grepcmd = 'zgrep'
+elif f.endswith('.bz2'):
+grepcmd = 'bzgrep'
+else:
+grepcmd = 'grep'
+cmdline = grepcmd+' ^'+chs+r'\\b '+f
+status, output = cmd.getstatusoutput(cmdline)
+output = output.split('\n')
+for l in output:
+if not l:
+continue
+char, key, value = l.strip().split('\t')
+if int(char[2:], 16) == ch:
+if PY3:
+properties[key] = value
+else:
+properties[key] = unicode(value, 'utf-8')
+elif int(char[2:], 16)>ch:
+break
+return properties
+# basic sanity check, if e.g. you run this on MS Windows...
+if os.path.exists('/bin/grep'):
+get_unihan_properties = get_unihan_properties_zgrep
+else:
+get_unihan_properties = get_unihan_properties_internal
+def error(txt):
+out(txt)
+out('\n')
+sys.exit(1)
+def get_gzip_filename(fname):
+"return fname, if it does not exist, return fname+.gz, if neither that, fname+bz2, if neither that, return None"
+if os.path.exists(fname):
+return fname
+if os.path.exists(fname+'.gz'):
+return fname+'.gz'
+if os.path.exists(fname+'.bz2') and bz2 is not None:
+return fname+'.bz2'
+return None
+def OpenGzip(fname):
+"open fname, try fname.gz or fname.bz2 if fname does not exist, return file object or GzipFile or BZ2File object"
+if os.path.exists(fname) and not (fname.endswith('.gz') or fname.endswith('.bz2')):
+return open(fname)
+if os.path.exists(fname+'.gz'):
+fname = fname+'.gz'
+elif os.path.exists(fname+'.bz2') and bz2 is not None:
+fname = fname+'.bz2'
+if fname.endswith('.gz'):
+return gzip.GzipFile(fname)
+elif fname.endswith('.bz2'):
+return bz2.BZ2File(fname)
+return None
+def GrepInNames(pattern, fillcache=False):
+p = re.compile(pattern, re.I)
+f = None
+for name in UnicodeDataFileNames:
+f = OpenGzip(name)
+if f != None:
+break
+if not fillcache:
+if not f:
+out( """
+Cannot find UnicodeData.txt, please place it into
+/usr/share/unidata/UnicodeData.txt,
+/usr/share/unicode/UnicodeData.txt, ~/.unicode/ or current
+working directory (optionally you can gzip it).
+Without the file, searching will be much slower.
+""" )
+for i in xrange(sys.maxunicode):
+try:
+name = unicodedata.name(unichr(i))
+if re.search(p, name):
+yield myunichr(i)
+except ValueError:
+pass
+else:
+for l in f:
+if re.search(p, l):
+r = myunichr(int(l.split(';')[0], 16))
+linecache[r] = l
+yield r
+f.close()
+else:
+if f:
+for l in f:
+if re.search(p, l):
+r = myunichr(int(l.split(';')[0], 16))
+linecache[r] = l
+f.close()
+def valfromcp(n, cp=None):
+"if fromcp is defined, then the 'n' is considered to be from that codepage and is converted accordingly"
+if cp:
+xh = '%x' %n
+if len(xh) % 2: # pad hexadecimal representation with a zero
+xh = '0'+xh
+cps = ( [xh[i:i+2] for i in range(0,len(xh),2)] )
+cps = ( chr(int(i, 16)) for i in cps)
+cps = ''.join(cps)
+"""
+if 0 <= n <= 255:
+s = chr(n)
+elif 256 <= n <= 65535:
+s = struct.pack('>H', n)
+elif 65536 <= n <= sys.maxint:
+s = struct.pack('>H', n)
+else: # bad character code, either negative or too big
+raise ValueError("Bad character code %s" %n)
+print 'ee',`s`
+n = unicode(s, cp)
+"""
+s = unicode(cps, cp)
+ns = [ord(x) for x in s]
+return ns
+else:
+return [n]
+def myunichr(n):
+try:
+r = unichr(n)
+return r
+except OverflowError:
+traceback.print_exc()
+error("The codepoint is too big - it does not fit into an int.")
+except ValueError:
+traceback.print_exc()
+err = "The codepoint is too big."
+if sys.maxunicode <= 0xffff:
+err += "\nPerhaps your python interpreter is not compiled with wide unicode characters."
+error(err)
+def guesstype(arg):
+if not arg: # empty string
+return 'empty string', arg
+elif not is_ascii(arg):
+return 'string', arg
+elif arg[:2]=='U+' or arg[:2]=='u+': # it is hexadecimal number
+try:
+val = int(arg[2:], 16)
+if val>sys.maxunicode:
+return 'regexp', arg
+else:
+return 'hexadecimal', arg[2:]
+except ValueError:
+return 'regexp', arg
+elif arg[0] in "Uu" and len(arg)>4:
+try:
+val = int(arg[1:], 16)
+if val>sys.maxunicode:
+return 'regexp', arg
+else:
+return 'hexadecimal', arg
+except ValueError:
+return 'regexp', arg
+elif len(arg)>=4:
+if len(arg) in (8, 16, 24, 32):
+if all(x in '01' for x in arg):
+val = int(arg, 2)
+if val<=sys.maxunicode:
+return 'binary', arg
+try:
+val = int(arg, 16)
+if val>sys.maxunicode:
+return 'regexp', arg
+else:
+return 'hexadecimal', arg
+except ValueError:
+return 'regexp', arg
+else:
+return 'string', arg
+def process(arglist, t, fromcp=None):
+# build a list of values, so that we can combine queries like
+# LATIN ALPHA and search for LATIN.*ALPHA and not names that
+# contain either LATIN or ALPHA
+result = []
+names_query = [] # reserved for queries in names - i.e. -r
+for arg_i in arglist:
+if t==None:
+tp, arg = guesstype(arg_i)
+if tp == 'regexp':
+# if the first argument is guessed to be a regexp, add
+# all the following arguments to the regular expression -
+# this is probably what you wanted, e.g.
+# 'unicode cyrillic be' will now search for the 'cyrillic.*be' regular expression
+t = 'regexp'
+else:
+tp, arg = t, arg_i
+if tp=='hexadecimal':
+val = int(arg, 16)
+vals = valfromcp(val, fromcp)
+for val in vals:
+r = myunichr(val)
+list(GrepInNames('%04X'%val, fillcache=True)) # fill the table with character properties
+result.append(r)
+elif tp=='decimal':
+val = int(arg, 10)
+vals = valfromcp(val, fromcp)
+for val in vals:
+r = myunichr(val)
+list(GrepInNames('%04X'%val, fillcache=True)) # fill the table with character properties
+result.append(r)
+elif tp=='octal':
+val = int(arg, 8)
+vals = valfromcp(val, fromcp)
+for val in vals:
+r = myunichr(val)
+list(GrepInNames('%04X'%val, fillcache=True)) # fill the table with character properties
+result.append(r)
+elif tp=='binary':
+val = int(arg, 2)
+vals = valfromcp(val, fromcp)
+for val in vals:
+r = myunichr(val)
+list(GrepInNames('%04X'%val, fillcache=True)) # fill the table with character properties
+result.append(r)
+elif tp=='regexp':
+names_query.append(arg)
+elif tp=='string':
+try:
+if PY3: # argv is automatically decoded into unicode, even padded with bogus character if it is not encodable
+unirepr = arg
+else:
+unirepr = unicode(arg, options.iocharset)
+except UnicodeDecodeError:
+error ("Sequence %s is not valid in charset '%s'." % (repr(arg),  options.iocharset))
+unilist = ['%04X'%ord(x) for x in unirepr]
+unireg = '|'.join(unilist)
+list(GrepInNames(unireg, fillcache=True))
+for r in unirepr:
+result.append(r)
+elif tp=='empty string':
+pass # do not do anything for an empty string
+if names_query:
+query = '.*'.join(names_query)
+for r in GrepInNames(query):
+result.append(r)
+return result
+def maybe_colours(colour):
+if use_colour:
+return colours[colour]
+else:
+return ""
+# format key and value
+def printkv(*l):
+for i in range(0, len(l), 2):
+if i<len(l)-2:
+sep = "  "
+else:
+sep = "\n"
+k, v = l[i], l[i+1]
+out(maybe_colours('green'))
+out(k)
+out(": ")
+out(maybe_colours('default'))
+out(unicode(v))
+out(sep)
+def print_characters(clist, maxcount, query_wiki=0):
+"""query_wiki - 0 - don't
+1 - spawn browser
+"""
+counter = 0
+for c in clist:
+if query_wiki:
+ch = urllib.quote(c.encode('utf-8')) # wikipedia uses UTF-8 in names
+wiki_url = 'http://en.wikipedia.org/wiki/'+ch
+webbrowser.open(wiki_url)
+query_wiki = 0 # query only the very first character
+if maxcount:
+counter += 1
+if counter > options.maxcount:
+out("\nToo many characters to display, more than %s, use --max option to change it\n" % options.maxcount)
+return
+properties = get_unicode_properties(c)
+out(maybe_colours('bold'))
+out('U+%04X '% ord(c))
+if properties['name']:
+out(properties['name'])
+else:
+out(maybe_colours('default'))
+out(" - No such unicode character name in database")
+out(maybe_colours('default'))
+out('\n')
+ar = ["UTF-8", ' '.join([("%02x" % ord23(x)) for x in c.encode('utf-8')]) ,
+"UTF-16BE", ''.join([("%02x" % ord23(x)) for x in c.encode('utf-16be')]),
+"Decimal", "&#%s;" % ord(c) ]
+if options.addcharset:
+try:
+rep = ' '.join([("%02x" % ord(x)) for x in c.encode(options.addcharset)] )
+except UnicodeError:
+rep = "NONE"
+ar.extend( [options.addcharset, rep] )
+printkv(*ar)
+if properties['combining']:
+pc = " "+c
+else:
+pc = c
+out(pc)
+uppercase = properties['uppercase']
+lowercase = properties['lowercase']
+if uppercase:
+out(" (%s)" % uppercase)
+out('\n')
+printkv( "Uppercase", 'U+%04X'% ord(properties['uppercase']) )
+elif lowercase:
+out(" (%s)" % properties['lowercase'])
+out('\n')
+printkv( "Lowercase", 'U+%04X'% ord(properties['lowercase']) )
+else:
+out('\n')
+printkv( 'Category', properties['category']+ " (%s)" % general_category[properties['category']] )
+if properties['numeric_value']:
+printkv( 'Numeric value',  properties['numeric_value'])
+if properties['digit_value']:
+printkv( 'Digit value',  properties['digit_value'])
+bidi = properties['bidi']
+if bidi:
+printkv( 'Bidi', bidi+ " (%s)" % bidi_category[bidi] )
+mirrored = properties['mirrored']
+if mirrored:
+out('Character is mirrored\n')
+comb = properties['combining']
+if comb:
+printkv( 'Combining', str(comb)+ " (%s)" % (comb_classes.get(comb, '?')) )
+decomp = properties['decomposition']
+if decomp:
+printkv( 'Decomposition', decomp )
+if options.verbosity>0:
+uhp = get_unihan_properties(c)
+for key in uhp:
+printkv(key, uhp[key])
+out('\n')
+def print_block(block):
+#header
+out(" "*10)
+for i in range(16):
+out(".%X " % i)
+out('\n')
+#body
+for i in range(block*16, block*16+16):
+hexi = "%X" % i
+if len(hexi)>3:
+hexi = "%07X" % i
+hexi = hexi[:4]+" "+hexi[4:]
+else:
+hexi = "     %03X" % i
+out(LTR+hexi+".  ")
+for j in range(16):
+c = unichr(i*16+j)
+if unicodedata.combining(c):
+c = " "+c
+out(c)
+out('  ')
+out('\n')
+out('\n')
+def print_blocks(blocks):
+for block in blocks:
+print_block(block)
+def is_range(s, typ):
+sp = s.split('..')
+if len(sp)!=2:
+return False
+if not sp[1]:
+sp[1] = sp[0]
+elif not sp[0]:
+sp[0] = sp[1]
+if not sp[0]:
+return False
+low = list(process([sp[0]], typ)) # intentionally no fromcp here, ranges are only of unicode characters
+high = list(process([sp[1]], typ))
+if len(low)!=1 or len(high)!=1:
+return False
+low = ord(low[0])
+high = ord(high[0])
+low = low // 256
+high = high // 256 + 1
+return range(low, high)
+parser = OptionParser(usage="usage: %prog [options] arg")
+parser.add_option("-x", "--hexadecimal",
+action="store_const", const='hexadecimal', dest="type",
+help="Assume arg to be hexadecimal number")
+parser.add_option("-o", "--octal",
+action="store_const", const='octal', dest="type",
+help="Assume arg to be octal number")
+parser.add_option("-b", "--binary",
+action="store_const", const='binary', dest="type",
+help="Assume arg to be binary number")
+parser.add_option("-d", "--decimal",
+action="store_const", const='decimal', dest="type",
+help="Assume arg to be decimal number")
+parser.add_option("-r", "--regexp",
+action="store_const", const='regexp', dest="type",
+help="Assume arg to be regular expression")
+parser.add_option("-s", "--string",
+action="store_const", const='string', dest="type",
+help="Assume arg to be a sequence of characters")
+parser.add_option("-a", "--auto",
+action="store_const", const=None, dest="type",
+help="Try to guess arg type (default)")
+parser.add_option("-m", "--max",
+action="store", default=10, dest="maxcount", type="int",
+help="Maximal number of codepoints to display, default: 10; 0=unlimited")
+parser.add_option("-i", "--io",
+action="store", default=iocharsetguess, dest="iocharset", type="string",
+help="I/O character set, I am guessing %s" % iocharsetguess)
+parser.add_option("--fcp", "--fromcp",
+action="store", default='', dest="fromcp", type="string",
+help="Convert numerical arguments from this encoding, default: no conversion")
+parser.add_option("-c", "--charset-add",
+action="store", dest="addcharset", type="string",
+help="Show hexadecimal reprezentation in this additional charset")
+parser.add_option("-C", "--colour",
+action="store", dest="use_colour", type="string",
+default="auto",
+help="Use colours, on, off or auto")
+parser.add_option('', "--color",
+action="store", dest="use_colour", type="string",
+default="auto",
+help="synonym for --colour")
+parser.add_option("-v", "--verbose",
+action="count", dest="verbosity",
+default=0,
+help="Increase verbosity (reads Unihan properties - slow!)")
+parser.add_option("-w", "--wikipedia",
+action="count", dest="query_wiki",
+default=0,
+help="Query wikipedia for the character")
+parser.add_option("--list",
+action="store_const", dest="list_all_encodings",
+const=True,
+help="List (approximately) all known encodings")
+(options, arguments) = parser.parse_args()
+linecache = {}
+do_init()
+if options.list_all_encodings:
+all_encodings = os.listdir(os.path.dirname(encodings.__file__))
+all_encodings = set([os.path.splitext(x)[0] for x in all_encodings])
+all_encodings = list(all_encodings)
+all_encodings.sort()
+print (textwrap.fill(' '.join(all_encodings)))
+sys.exit()
+if len(arguments)==0:
+parser.print_help()
+sys.exit()
+if options.use_colour.lower() in ("on", "1", "true", "yes"):
+use_colour = True
+elif options.use_colour.lower() in ("off", "0", "false", "no"):
+use_colour = False
+else:
+use_colour = sys.stdout.isatty()
+if sys.platform == 'win32':
+use_colour = False
+l_args = [] # list of non range arguments to process
+for argum in arguments:
+is_r = is_range(argum, options.type)
+if is_r:
+print_blocks(is_r)
+else:
+l_args.append(argum)
+if l_args:
+unihan_fs = []
+if options.verbosity>0:
+unihan_fs = get_unihan_files() # list of file names for Unihan data file(s), empty if not available
+if not unihan_fs:
+out( """
+Unihan_*.txt files not found. In order to view Unihan properties,
+please place the file into /usr/share/unidata/,
+/usr/share/unicode/, ~/.unicode/
+or current working directory (optionally you can gzip or bzip2 them).
+You can get the files by unpacking ftp://ftp.unicode.org/Public/UNIDATA/Unihan.zip
+Warning, listing UniHan Properties is rather slow.
+""")
+options.verbosity = 0
+try:
+print_characters(process(l_args, options.type, options.fromcp), options.maxcount, options.query_wiki)
+except IOError: # e.g. broken pipe
+pass

Mercurial > repo

comparison bin/multicode @ 9075:c989a1669243