Mercurial > repo
comparison interps/lambda/tokenizer.py @ 996:859f9b4339e6
<Gregor> tar xf egobot.tar.xz
author | HackBot |
---|---|
date | Sun, 09 Dec 2012 19:30:08 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
995:6883f5911eb7 | 996:859f9b4339e6 |
---|---|
1 symbols = '()\\.,=;' | |
2 | |
3 class TokenizerException(Exception): | |
4 pass | |
5 | |
6 | |
7 def tokenize(s): | |
8 tokens = [] | |
9 current = '' | |
10 state = 'begin' | |
11 index = 0 | |
12 s = s + ' ' # a cheesy way to simplify token termination conditions | |
13 while index < len(s): | |
14 c = s[index] | |
15 index += 1 | |
16 if state == 'begin': | |
17 current = c | |
18 if c.isspace(): | |
19 continue | |
20 if c.isalpha(): | |
21 state = 'name' | |
22 continue | |
23 if c == '{': | |
24 state = 'comment' | |
25 continue | |
26 if c == '"': | |
27 state = 'string' | |
28 current = '' | |
29 continue | |
30 if c == '#': # a "Special function" | |
31 state = 'special' | |
32 continue | |
33 if c in symbols: # guaranteed single-character token | |
34 tokens.append(c) | |
35 continue | |
36 raise TokenizerException("Tokenizer can't comprehend '" + c + "'") | |
37 if state == 'name': | |
38 if c.isalpha() or c.isdigit() or c == '_' or c == '-': | |
39 current += c | |
40 else: | |
41 tokens.append(current) | |
42 state = 'begin' | |
43 index -= 1 | |
44 continue | |
45 if state == 'string': | |
46 current += c | |
47 if c == '"' and current != '"': | |
48 tokens.append('"' + current) | |
49 state = 'begin' | |
50 if state == 'comment': | |
51 if c == '}': | |
52 state = 'begin' | |
53 if state == 'special': | |
54 if (c == '#' and current == '') or c.isalpha(): | |
55 current += c | |
56 else: | |
57 tokens.append(current) | |
58 state = 'begin' | |
59 index -= 1 | |
60 return tokens |