comparison interps/lambda/tokenizer.py @ 996:859f9b4339e6

<Gregor> tar xf egobot.tar.xz
author HackBot
date Sun, 09 Dec 2012 19:30:08 +0000
parents
children
comparison
equal deleted inserted replaced
995:6883f5911eb7 996:859f9b4339e6
1 symbols = '()\\.,=;'
2
3 class TokenizerException(Exception):
4 pass
5
6
7 def tokenize(s):
8 tokens = []
9 current = ''
10 state = 'begin'
11 index = 0
12 s = s + ' ' # a cheesy way to simplify token termination conditions
13 while index < len(s):
14 c = s[index]
15 index += 1
16 if state == 'begin':
17 current = c
18 if c.isspace():
19 continue
20 if c.isalpha():
21 state = 'name'
22 continue
23 if c == '{':
24 state = 'comment'
25 continue
26 if c == '"':
27 state = 'string'
28 current = ''
29 continue
30 if c == '#': # a "Special function"
31 state = 'special'
32 continue
33 if c in symbols: # guaranteed single-character token
34 tokens.append(c)
35 continue
36 raise TokenizerException("Tokenizer can't comprehend '" + c + "'")
37 if state == 'name':
38 if c.isalpha() or c.isdigit() or c == '_' or c == '-':
39 current += c
40 else:
41 tokens.append(current)
42 state = 'begin'
43 index -= 1
44 continue
45 if state == 'string':
46 current += c
47 if c == '"' and current != '"':
48 tokens.append('"' + current)
49 state = 'begin'
50 if state == 'comment':
51 if c == '}':
52 state = 'begin'
53 if state == 'special':
54 if (c == '#' and current == '') or c.isalpha():
55 current += c
56 else:
57 tokens.append(current)
58 state = 'begin'
59 index -= 1
60 return tokens