Mercurial > repo
view interps/lambda/tokenizer.py @ 11562:6b0304dcec5c draft
<oerjan> ` cd bin; cp welcome \xd0\xb4\xd0\xbe\xd0\xb1\xd1\x80\xd0\xbe-\xd0\xbf\xd0\xbe\xd0\xb6\xd0\xb0\xd0\xbb\xd0\xbe\xd0\xb2\xd0\xb0\xd1\x82\xd1\x8c; sled \xd0\xb4\xd0\xbe\xd0\xb1\xd1\x80\xd0\xbe-\xd0\xbf\xd0\xbe\xd0\xb6\xd0\xb0\xd0\xbb\xd0\xbe\xd0\xb2\xd0\xb0\xd1\x82\xd1\x8c//s,welcome,welcome.ru,
author | HackEso <hackeso@esolangs.org> |
---|---|
date | Wed, 16 May 2018 04:46:17 +0100 |
parents | 859f9b4339e6 |
children |
line wrap: on
line source
symbols = '()\\.,=;' class TokenizerException(Exception): pass def tokenize(s): tokens = [] current = '' state = 'begin' index = 0 s = s + ' ' # a cheesy way to simplify token termination conditions while index < len(s): c = s[index] index += 1 if state == 'begin': current = c if c.isspace(): continue if c.isalpha(): state = 'name' continue if c == '{': state = 'comment' continue if c == '"': state = 'string' current = '' continue if c == '#': # a "Special function" state = 'special' continue if c in symbols: # guaranteed single-character token tokens.append(c) continue raise TokenizerException("Tokenizer can't comprehend '" + c + "'") if state == 'name': if c.isalpha() or c.isdigit() or c == '_' or c == '-': current += c else: tokens.append(current) state = 'begin' index -= 1 continue if state == 'string': current += c if c == '"' and current != '"': tokens.append('"' + current) state = 'begin' if state == 'comment': if c == '}': state = 'begin' if state == 'special': if (c == '#' and current == '') or c.isalpha(): current += c else: tokens.append(current) state = 'begin' index -= 1 return tokens