view bin/multicode @ 12320:9c7eb9899d95 draft

<fizzie> le/rn karma//All living beings have actions (karma) as their own, their inheritance, their congenital cause, their kinsman, their refuge. It is karma that differentiates beings into low and high states.
author HackEso <hackeso@esolangs.org>
date Fri, 06 Mar 2020 23:08:37 +0000
parents c989a1669243
children
line wrap: on
line source

#!/usr/bin/python


import os, glob, sys, unicodedata, locale, gzip, re, traceback, encodings
import urllib, webbrowser, textwrap

# bz2 was introduced in 2.3, we want this to work also with earlier versions
try:
    import bz2
except ImportError:
    bz2 = None

# for python3
try:
    unicode
except NameError:
    unicode = str

# 'any' and 'all' were introduced in python2.5
# dummy replacement for older versions
try:
    all
except NameError:
    all = lambda x: False

PY3 = sys.version_info[0] >= 3
if PY3:
    import subprocess as cmd

    def is_ascii(s):
        "test is string s consists completely of ascii characters (python 3)"
        try:
            s.encode('ascii')
        except UnicodeEncodeError:
            return False
        return True

    def out(*args):
        "pring args, converting them to output charset"
        for i in args:
            sys.stdout.flush()
            sys.stdout.buffer.write(i.encode(options.iocharset, 'replace'))

    # ord23 is used to convert elements of byte array in python3, which are integers
    ord23 = lambda x: x

    # unichr is not in python3
    unichr = chr

else: # python2

    # getoutput() and getstatusoutput() methods have
    # been moved from commands to the subprocess module
    # with Python >= 3.x
    import commands as cmd

    def is_ascii(s):
        "test is string s consists completely of ascii characters (python 2)"
        try:
            unicode(s, 'ascii')
        except UnicodeDecodeError:
            return False
        return True

    def out(*args):
        "pring args, converting them to output charset"
        for i in args:
            sys.stdout.write(i.encode(options.iocharset, 'replace'))

    ord23 = ord



from optparse import OptionParser

VERSION='0.9.7'


# list of terminals that support bidi
biditerms = ['mlterm']

try:
    locale.setlocale(locale.LC_ALL, '')
except locale.Error:
    pass

# guess terminal charset
try:
    iocharsetguess = locale.nl_langinfo(locale.CODESET) or "ascii"
except locale.Error:
    iocharsetguess = "ascii"

if os.environ.get('TERM') in biditerms and iocharsetguess.lower().startswith('utf'):
    LTR = u'\u202d' # left to right override
else:
    LTR = ''


colours = {
            'none'       :    "",
            'default'    :    "\033[0m",
            'bold'       :    "\033[1m",
            'underline'  :    "\033[4m",
            'blink'      :    "\033[5m",
            'reverse'    :    "\033[7m",
            'concealed'  :    "\033[8m",

            'black'      :    "\033[30m",
            'red'        :    "\033[31m",
            'green'      :    "\033[32m",
            'yellow'     :    "\033[33m",
            'blue'       :    "\033[34m",
            'magenta'    :    "\033[35m",
            'cyan'       :    "\033[36m",
            'white'      :    "\033[37m",

            'on_black'   :    "\033[40m",
            'on_red'     :    "\033[41m",
            'on_green'   :    "\033[42m",
            'on_yellow'  :    "\033[43m",
            'on_blue'    :    "\033[44m",
            'on_magenta' :    "\033[45m",
            'on_cyan'    :    "\033[46m",
            'on_white'   :    "\033[47m",

            'beep'       :    "\007",
            }


general_category = {
      'Lu':  'Letter, Uppercase',
      'Ll':  'Letter, Lowercase',
      'Lt':  'Letter, Titlecase',
      'Lm':  'Letter, Modifier',
      'Lo':  'Letter, Other',
      'Mn':  'Mark, Non-Spacing',
      'Mc':  'Mark, Spacing Combining',
      'Me':  'Mark, Enclosing',
      'Nd':  'Number, Decimal Digit',
      'Nl':  'Number, Letter',
      'No':  'Number, Other',
      'Pc':  'Punctuation, Connector',
      'Pd':  'Punctuation, Dash',
      'Ps':  'Punctuation, Open',
      'Pe':  'Punctuation, Close',
      'Pi':  'Punctuation, Initial quote',
      'Pf':  'Punctuation, Final quote',
      'Po':  'Punctuation, Other',
      'Sm':  'Symbol, Math',
      'Sc':  'Symbol, Currency',
      'Sk':  'Symbol, Modifier',
      'So':  'Symbol, Other',
      'Zs':  'Separator, Space',
      'Zl':  'Separator, Line',
      'Zp':  'Separator, Paragraph',
      'Cc':  'Other, Control',
      'Cf':  'Other, Format',
      'Cs':  'Other, Surrogate',
      'Co':  'Other, Private Use',
      'Cn':  'Other, Not Assigned',
}

bidi_category = {
     'L'   : 'Left-to-Right',
     'LRE' : 'Left-to-Right Embedding',
     'LRO' : 'Left-to-Right Override',
     'R'   : 'Right-to-Left',
     'AL'  : 'Right-to-Left Arabic',
     'RLE' : 'Right-to-Left Embedding',
     'RLO' : 'Right-to-Left Override',
     'PDF' : 'Pop Directional Format',
     'EN'  : 'European Number',
     'ES'  : 'European Number Separator',
     'ET'  : 'European Number Terminator',
     'AN'  : 'Arabic Number',
     'CS'  : 'Common Number Separator',
     'NSM' : 'Non-Spacing Mark',
     'BN'  : 'Boundary Neutral',
     'B'   : 'Paragraph Separator',
     'S'   : 'Segment Separator',
     'WS'  : 'Whitespace',
     'ON'  : 'Other Neutrals',
}

comb_classes = {
        0: 'Spacing, split, enclosing, reordrant, and Tibetan subjoined',
        1: 'Overlays and interior',
        7: 'Nuktas',
        8: 'Hiragana/Katakana voicing marks',
        9: 'Viramas',
       10: 'Start of fixed position classes',
      199: 'End of fixed position classes',
      200: 'Below left attached',
      202: 'Below attached',
      204: 'Below right attached',
      208: 'Left attached (reordrant around single base character)',
      210: 'Right attached',
      212: 'Above left attached',
      214: 'Above attached',
      216: 'Above right attached',
      218: 'Below left',
      220: 'Below',
      222: 'Below right',
      224: 'Left (reordrant around single base character)',
      226: 'Right',
      228: 'Above left',
      230: 'Above',
      232: 'Above right',
      233: 'Double below',
      234: 'Double above',
      240: 'Below (iota subscript)',
}



def get_unicode_properties(ch):
    properties = {}
    if ch in linecache:
        fields = linecache[ch].strip().split(';')
        proplist = ['codepoint', 'name', 'category', 'combining', 'bidi', 'decomposition', 'dummy', 'digit_value', 'numeric_value', 'mirrored', 'unicode1name', 'iso_comment', 'uppercase', 'lowercase', 'titlecase']
        for i, prop in enumerate(proplist):
            if prop!='dummy':
                properties[prop] = fields[i]

        if properties['lowercase']:
            properties['lowercase'] = unichr(int(properties['lowercase'], 16))
        if properties['uppercase']:
            properties['uppercase'] = unichr(int(properties['uppercase'], 16))
        if properties['titlecase']:
            properties['titlecase'] = unichr(int(properties['titlecase'], 16))

        properties['combining'] = int(properties['combining'])
        properties['mirrored'] = properties['mirrored']=='Y'
    else:
        properties['codepoint'] = '%04X' % ord(ch)
        properties['name'] = unicodedata.name(ch, '')
        properties['category'] = unicodedata.category(ch)
        properties['combining'] = unicodedata.combining(ch)
        properties['bidi'] = unicodedata.bidirectional(ch)
        properties['decomposition'] = unicodedata.decomposition(ch)
        properties['digit_value'] = unicodedata.digit(ch, '')
        properties['numeric_value'] = unicodedata.numeric(ch, '')
        properties['mirrored'] = unicodedata.mirrored(ch)
        properties['unicode1name'] = ''
        properties['iso_comment'] = ''
        properties['uppercase'] = ch.upper()
        properties['lowercase'] = ch.lower()
        properties['titlecase'] = ''
    return properties


def do_init():
    HomeDir = os.path.expanduser('~/.unicode')
    HomeUnicodeData = os.path.join(HomeDir, "UnicodeData.txt")
    global UnicodeDataFileNames
    UnicodeDataFileNames = [HomeUnicodeData, '/usr/share/unicode/UnicodeData.txt', '/usr/share/unidata/UnicodeData.txt', '/hackenv/share/UnicodeData.txt'] + \
        glob.glob('/usr/share/unidata/UnicodeData*.txt') + \
        glob.glob('/usr/share/perl/*/unicore/UnicodeData.txt') + \
        glob.glob('/System/Library/Perl/*/unicore/UnicodeData.txt') # for MacOSX

    HomeUnihanData = os.path.join(HomeDir, "Unihan*")
    global UnihanDataGlobs
    UnihanDataGlobs = [HomeUnihanData, '/usr/share/unidata/Unihan*', '/usr/share/unicode/Unihan*', './Unihan*']


def get_unihan_files():
    fos = [] # list of file names for Unihan data file(s)
    for gl in UnihanDataGlobs:
        fnames = glob.glob(gl)
        fos += fnames
    return fos

def get_unihan_properties_internal(ch):
    properties = {}
    ch = ord(ch)
    global unihan_fs
    for f in unihan_fs:
        fo = OpenGzip(f)
        for l in fo:
            if l.startswith('#'):
                continue
            line = l.strip()
            if not line:
                continue
            char, key, value = line.strip().split('\t')
            if int(char[2:], 16) == ch:
                properties[key] = unicode(value, 'utf-8')
            elif int(char[2:], 16)>ch:
                break
    return properties

def get_unihan_properties_zgrep(ch):
    properties = {}
    global unihan_fs
    ch = ord(ch)
    chs = 'U+%X' % ch
    for f in unihan_fs:
        if f.endswith('.gz'):
            grepcmd = 'zgrep'
        elif f.endswith('.bz2'):
            grepcmd = 'bzgrep'
        else:
            grepcmd = 'grep'
        cmdline = grepcmd+' ^'+chs+r'\\b '+f
        status, output = cmd.getstatusoutput(cmdline)
        output = output.split('\n')
        for l in output:
            if not l:
                continue
            char, key, value = l.strip().split('\t')
            if int(char[2:], 16) == ch:
                if PY3:
                    properties[key] = value
                else:
                    properties[key] = unicode(value, 'utf-8')
            elif int(char[2:], 16)>ch:
                break
    return properties

# basic sanity check, if e.g. you run this on MS Windows...
if os.path.exists('/bin/grep'):
    get_unihan_properties = get_unihan_properties_zgrep
else:
    get_unihan_properties = get_unihan_properties_internal


def error(txt):
    out(txt)
    out('\n')
    sys.exit(1)

def get_gzip_filename(fname):
    "return fname, if it does not exist, return fname+.gz, if neither that, fname+bz2, if neither that, return None"
    if os.path.exists(fname):
        return fname
    if os.path.exists(fname+'.gz'):
        return fname+'.gz'
    if os.path.exists(fname+'.bz2') and bz2 is not None:
        return fname+'.bz2'
    return None


def OpenGzip(fname):
    "open fname, try fname.gz or fname.bz2 if fname does not exist, return file object or GzipFile or BZ2File object"
    if os.path.exists(fname) and not (fname.endswith('.gz') or fname.endswith('.bz2')):
        return open(fname)
    if os.path.exists(fname+'.gz'):
        fname = fname+'.gz'
    elif os.path.exists(fname+'.bz2') and bz2 is not None:
        fname = fname+'.bz2'
    if fname.endswith('.gz'):
        return gzip.GzipFile(fname)
    elif fname.endswith('.bz2'):
        return bz2.BZ2File(fname)
    return None

def GrepInNames(pattern, fillcache=False):
    p = re.compile(pattern, re.I)
    f = None
    for name in UnicodeDataFileNames:
        f = OpenGzip(name)
        if f != None:
            break
    if not fillcache:
        if not f:
            out( """
Cannot find UnicodeData.txt, please place it into 
/usr/share/unidata/UnicodeData.txt,
/usr/share/unicode/UnicodeData.txt, ~/.unicode/ or current 
working directory (optionally you can gzip it).
Without the file, searching will be much slower.

""" )
            for i in xrange(sys.maxunicode):
                try:
                    name = unicodedata.name(unichr(i))
                    if re.search(p, name):
                        yield myunichr(i)
                except ValueError:
                    pass
        else:
            for l in f:
                if re.search(p, l):
                    r = myunichr(int(l.split(';')[0], 16))
                    linecache[r] = l
                    yield r
            f.close()
    else:
        if f:
            for l in f:
                if re.search(p, l):
                    r = myunichr(int(l.split(';')[0], 16))
                    linecache[r] = l
            f.close()
 

def valfromcp(n, cp=None):
    "if fromcp is defined, then the 'n' is considered to be from that codepage and is converted accordingly"
    if cp:
        xh = '%x' %n
        if len(xh) % 2: # pad hexadecimal representation with a zero
            xh = '0'+xh
        cps = ( [xh[i:i+2] for i in range(0,len(xh),2)] )
        cps = ( chr(int(i, 16)) for i in cps)
        cps = ''.join(cps)
        """
        if 0 <= n <= 255:
            s = chr(n)
        elif 256 <= n <= 65535:
            s = struct.pack('>H', n)
        elif 65536 <= n <= sys.maxint:
            s = struct.pack('>H', n)
        else: # bad character code, either negative or too big
            raise ValueError("Bad character code %s" %n)
        print 'ee',`s`
        n = unicode(s, cp)
        """
        s = unicode(cps, cp)
        ns = [ord(x) for x in s]
        return ns
    else:
        return [n]

def myunichr(n):
    try:
        r = unichr(n)
        return r
    except OverflowError:
        traceback.print_exc()
        error("The codepoint is too big - it does not fit into an int.")
    except ValueError:
        traceback.print_exc()
        err = "The codepoint is too big."
        if sys.maxunicode <= 0xffff:
            err += "\nPerhaps your python interpreter is not compiled with wide unicode characters."
        error(err)


def guesstype(arg):
    if not arg: # empty string
        return 'empty string', arg
    elif not is_ascii(arg):
        return 'string', arg
    elif arg[:2]=='U+' or arg[:2]=='u+': # it is hexadecimal number
        try:
            val = int(arg[2:], 16)
            if val>sys.maxunicode:
                return 'regexp', arg
            else:
                return 'hexadecimal', arg[2:]
        except ValueError:
            return 'regexp', arg
    elif arg[0] in "Uu" and len(arg)>4:
        try:
            val = int(arg[1:], 16)
            if val>sys.maxunicode:
                return 'regexp', arg
            else:
                return 'hexadecimal', arg
        except ValueError:
            return 'regexp', arg
    elif len(arg)>=4:
        if len(arg) in (8, 16, 24, 32):
            if all(x in '01' for x in arg):
                val = int(arg, 2)
                if val<=sys.maxunicode:
                    return 'binary', arg
        try:
            val = int(arg, 16)
            if val>sys.maxunicode:
                return 'regexp', arg
            else:
                return 'hexadecimal', arg
        except ValueError:
            return 'regexp', arg
    else:
        return 'string', arg

def process(arglist, t, fromcp=None):
    # build a list of values, so that we can combine queries like
    # LATIN ALPHA and search for LATIN.*ALPHA and not names that
    # contain either LATIN or ALPHA
    result = []
    names_query = [] # reserved for queries in names - i.e. -r
    for arg_i in arglist:
        if t==None:
            tp, arg = guesstype(arg_i)
            if tp == 'regexp':
                # if the first argument is guessed to be a regexp, add
                # all the following arguments to the regular expression -
                # this is probably what you wanted, e.g. 
                # 'unicode cyrillic be' will now search for the 'cyrillic.*be' regular expression
                t = 'regexp'
        else:
            tp, arg = t, arg_i
        if tp=='hexadecimal':
            val = int(arg, 16)
            vals = valfromcp(val, fromcp)
            for val in vals:
                r = myunichr(val)
                list(GrepInNames('%04X'%val, fillcache=True)) # fill the table with character properties
                result.append(r)
        elif tp=='decimal':
            val = int(arg, 10)
            vals = valfromcp(val, fromcp)
            for val in vals:
                r = myunichr(val)
                list(GrepInNames('%04X'%val, fillcache=True)) # fill the table with character properties
                result.append(r)
        elif tp=='octal':
            val = int(arg, 8)
            vals = valfromcp(val, fromcp)
            for val in vals:
                r = myunichr(val)
                list(GrepInNames('%04X'%val, fillcache=True)) # fill the table with character properties
                result.append(r)
        elif tp=='binary':
            val = int(arg, 2)
            vals = valfromcp(val, fromcp)
            for val in vals:
                r = myunichr(val)
                list(GrepInNames('%04X'%val, fillcache=True)) # fill the table with character properties
                result.append(r)
        elif tp=='regexp':
            names_query.append(arg)
        elif tp=='string':
            try:
                if PY3: # argv is automatically decoded into unicode, even padded with bogus character if it is not encodable
                    unirepr = arg
                else:
                    unirepr = unicode(arg, options.iocharset)
            except UnicodeDecodeError:
                error ("Sequence %s is not valid in charset '%s'." % (repr(arg),  options.iocharset))
            unilist = ['%04X'%ord(x) for x in unirepr]
            unireg = '|'.join(unilist)
            list(GrepInNames(unireg, fillcache=True))
            for r in unirepr:
                result.append(r)
        elif tp=='empty string':
            pass # do not do anything for an empty string
    if names_query:
        query = '.*'.join(names_query)
        for r in GrepInNames(query):
            result.append(r)
    return result

def maybe_colours(colour):
    if use_colour:
        return colours[colour]
    else:
        return ""

# format key and value
def printkv(*l):
    for i in range(0, len(l), 2):
        if i<len(l)-2:
            sep = "  "
        else:
            sep = "\n"
        k, v = l[i], l[i+1]
        out(maybe_colours('green'))
        out(k)
        out(": ")
        out(maybe_colours('default'))
        out(unicode(v))
        out(sep)

def print_characters(clist, maxcount, query_wiki=0):
    """query_wiki - 0 - don't
                    1 - spawn browser
    """
    counter = 0
    for c in clist:

        if query_wiki:
            ch = urllib.quote(c.encode('utf-8')) # wikipedia uses UTF-8 in names
            wiki_url = 'http://en.wikipedia.org/wiki/'+ch
            webbrowser.open(wiki_url)
            query_wiki = 0 # query only the very first character


        if maxcount:
            counter += 1
        if counter > options.maxcount:
            out("\nToo many characters to display, more than %s, use --max option to change it\n" % options.maxcount)
            return
        properties = get_unicode_properties(c)
        out(maybe_colours('bold'))
        out('U+%04X '% ord(c)) 
        if properties['name']:
            out(properties['name'])
        else:
            out(maybe_colours('default'))
            out(" - No such unicode character name in database")
        out(maybe_colours('default'))
        out('\n')

        ar = ["UTF-8", ' '.join([("%02x" % ord23(x)) for x in c.encode('utf-8')]) ,
              "UTF-16BE", ''.join([("%02x" % ord23(x)) for x in c.encode('utf-16be')]),
              "Decimal", "&#%s;" % ord(c) ]
        if options.addcharset:
            try:
                rep = ' '.join([("%02x" % ord(x)) for x in c.encode(options.addcharset)] )
            except UnicodeError:
                rep = "NONE"
            ar.extend( [options.addcharset, rep] )
        printkv(*ar)


        if properties['combining']:
            pc = " "+c
        else:
            pc = c
        out(pc)
        uppercase = properties['uppercase']
        lowercase = properties['lowercase']
        if uppercase:
            out(" (%s)" % uppercase)
            out('\n')
            printkv( "Uppercase", 'U+%04X'% ord(properties['uppercase']) )
        elif lowercase:
            out(" (%s)" % properties['lowercase'])
            out('\n')
            printkv( "Lowercase", 'U+%04X'% ord(properties['lowercase']) )
        else:
            out('\n')
        printkv( 'Category', properties['category']+ " (%s)" % general_category[properties['category']] )

        if properties['numeric_value']:
            printkv( 'Numeric value',  properties['numeric_value'])
        if properties['digit_value']:
            printkv( 'Digit value',  properties['digit_value'])

        bidi = properties['bidi']
        if bidi:
            printkv( 'Bidi', bidi+ " (%s)" % bidi_category[bidi] )
        mirrored = properties['mirrored']
        if mirrored:
            out('Character is mirrored\n')
        comb = properties['combining']
        if comb:
            printkv( 'Combining', str(comb)+ " (%s)" % (comb_classes.get(comb, '?')) )
        decomp = properties['decomposition']
        if decomp:
            printkv( 'Decomposition', decomp )
        if options.verbosity>0:
            uhp = get_unihan_properties(c)
            for key in uhp:
                printkv(key, uhp[key])
        out('\n')


def print_block(block):
    #header
    out(" "*10)
    for i in range(16):
        out(".%X " % i)
    out('\n')
    #body
    for i in range(block*16, block*16+16):
        hexi = "%X" % i
        if len(hexi)>3:
            hexi = "%07X" % i
            hexi = hexi[:4]+" "+hexi[4:]
        else:
            hexi = "     %03X" % i
        out(LTR+hexi+".  ")
        for j in range(16):
            c = unichr(i*16+j)
            if unicodedata.combining(c):
                c = " "+c
            out(c)
            out('  ')
        out('\n')
    out('\n')

def print_blocks(blocks):
    for block in blocks:
        print_block(block)

def is_range(s, typ):
    sp = s.split('..')
    if len(sp)!=2:
        return False
    if not sp[1]:
        sp[1] = sp[0]
    elif not sp[0]:
        sp[0] = sp[1]
    if not sp[0]:
        return False
    low = list(process([sp[0]], typ)) # intentionally no fromcp here, ranges are only of unicode characters
    high = list(process([sp[1]], typ))
    if len(low)!=1 or len(high)!=1:
        return False
    low = ord(low[0])
    high = ord(high[0])
    low = low // 256
    high = high // 256 + 1
    return range(low, high)



parser = OptionParser(usage="usage: %prog [options] arg")
parser.add_option("-x", "--hexadecimal",
      action="store_const", const='hexadecimal', dest="type", 
      help="Assume arg to be hexadecimal number")
parser.add_option("-o", "--octal",
      action="store_const", const='octal', dest="type", 
      help="Assume arg to be octal number")
parser.add_option("-b", "--binary",
      action="store_const", const='binary', dest="type", 
      help="Assume arg to be binary number")
parser.add_option("-d", "--decimal",
      action="store_const", const='decimal', dest="type",
      help="Assume arg to be decimal number")
parser.add_option("-r", "--regexp",
      action="store_const", const='regexp', dest="type",
      help="Assume arg to be regular expression")
parser.add_option("-s", "--string",
      action="store_const", const='string', dest="type",
      help="Assume arg to be a sequence of characters")
parser.add_option("-a", "--auto",
      action="store_const", const=None, dest="type",
      help="Try to guess arg type (default)")
parser.add_option("-m", "--max",
      action="store", default=10, dest="maxcount", type="int",
      help="Maximal number of codepoints to display, default: 10; 0=unlimited")
parser.add_option("-i", "--io",
      action="store", default=iocharsetguess, dest="iocharset", type="string",
      help="I/O character set, I am guessing %s" % iocharsetguess)
parser.add_option("--fcp", "--fromcp",
      action="store", default='', dest="fromcp", type="string",
      help="Convert numerical arguments from this encoding, default: no conversion")
parser.add_option("-c", "--charset-add",
      action="store", dest="addcharset", type="string",
      help="Show hexadecimal reprezentation in this additional charset")
parser.add_option("-C", "--colour",
      action="store", dest="use_colour", type="string",
      default="auto",
      help="Use colours, on, off or auto")
parser.add_option('', "--color",
      action="store", dest="use_colour", type="string",
      default="auto",
      help="synonym for --colour")
parser.add_option("-v", "--verbose",
      action="count", dest="verbosity",
      default=0,
      help="Increase verbosity (reads Unihan properties - slow!)")
parser.add_option("-w", "--wikipedia",
      action="count", dest="query_wiki",
      default=0,
      help="Query wikipedia for the character")
parser.add_option("--list",
      action="store_const", dest="list_all_encodings",
      const=True,
      help="List (approximately) all known encodings")


(options, arguments) = parser.parse_args()

linecache = {}
do_init()


if options.list_all_encodings:
    all_encodings = os.listdir(os.path.dirname(encodings.__file__))
    all_encodings = set([os.path.splitext(x)[0] for x in all_encodings])
    all_encodings = list(all_encodings)
    all_encodings.sort()
    print (textwrap.fill(' '.join(all_encodings)))
    sys.exit()

if len(arguments)==0:
    parser.print_help()
    sys.exit()


if options.use_colour.lower() in ("on", "1", "true", "yes"):
    use_colour = True
elif options.use_colour.lower() in ("off", "0", "false", "no"):
    use_colour = False
else:
    use_colour = sys.stdout.isatty()
    if sys.platform == 'win32':
        use_colour = False


l_args = [] # list of non range arguments to process
for argum in arguments:
    is_r = is_range(argum, options.type)
    if is_r:
        print_blocks(is_r)
    else:
        l_args.append(argum)

if l_args:
    unihan_fs = []
    if options.verbosity>0:
        unihan_fs = get_unihan_files() # list of file names for Unihan data file(s), empty if not available
        if not unihan_fs:
            out( """
Unihan_*.txt files not found. In order to view Unihan properties, 
please place the file into /usr/share/unidata/, 
/usr/share/unicode/, ~/.unicode/
or current working directory (optionally you can gzip or bzip2 them).
You can get the files by unpacking ftp://ftp.unicode.org/Public/UNIDATA/Unihan.zip
Warning, listing UniHan Properties is rather slow.

""")
            options.verbosity = 0
    try:
        print_characters(process(l_args, options.type, options.fromcp), options.maxcount, options.query_wiki)
    except IOError: # e.g. broken pipe
        pass