Mercurial > repo
diff interps/lambda/tokenizer.py @ 996:859f9b4339e6
<Gregor> tar xf egobot.tar.xz
author | HackBot |
---|---|
date | Sun, 09 Dec 2012 19:30:08 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/interps/lambda/tokenizer.py Sun Dec 09 19:30:08 2012 +0000 @@ -0,0 +1,60 @@ +symbols = '()\\.,=;' + +class TokenizerException(Exception): + pass + + +def tokenize(s): + tokens = [] + current = '' + state = 'begin' + index = 0 + s = s + ' ' # a cheesy way to simplify token termination conditions + while index < len(s): + c = s[index] + index += 1 + if state == 'begin': + current = c + if c.isspace(): + continue + if c.isalpha(): + state = 'name' + continue + if c == '{': + state = 'comment' + continue + if c == '"': + state = 'string' + current = '' + continue + if c == '#': # a "Special function" + state = 'special' + continue + if c in symbols: # guaranteed single-character token + tokens.append(c) + continue + raise TokenizerException("Tokenizer can't comprehend '" + c + "'") + if state == 'name': + if c.isalpha() or c.isdigit() or c == '_' or c == '-': + current += c + else: + tokens.append(current) + state = 'begin' + index -= 1 + continue + if state == 'string': + current += c + if c == '"' and current != '"': + tokens.append('"' + current) + state = 'begin' + if state == 'comment': + if c == '}': + state = 'begin' + if state == 'special': + if (c == '#' and current == '') or c.isalpha(): + current += c + else: + tokens.append(current) + state = 'begin' + index -= 1 + return tokens