annotate interps/lambda/tokenizer.py @ 12500:e48c08805365 draft default tip

<b_jonas> ` learn \'The password of the month is Cthulhuquagdonic Mothraquagdonic Narwhalicorn.\' # https://logs.esolangs.org/libera-esolangs/2024-04.html#lKE Infinite craft
author HackEso <hackeso@esolangs.org>
date Wed, 01 May 2024 06:39:10 +0000
parents 859f9b4339e6
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
996
859f9b4339e6 <Gregor> tar xf egobot.tar.xz
HackBot
parents:
diff changeset
1 symbols = '()\\.,=;'
859f9b4339e6 <Gregor> tar xf egobot.tar.xz
HackBot
parents:
diff changeset
2
859f9b4339e6 <Gregor> tar xf egobot.tar.xz
HackBot
parents:
diff changeset
3 class TokenizerException(Exception):
859f9b4339e6 <Gregor> tar xf egobot.tar.xz
HackBot
parents:
diff changeset
4 pass
859f9b4339e6 <Gregor> tar xf egobot.tar.xz
HackBot
parents:
diff changeset
5
859f9b4339e6 <Gregor> tar xf egobot.tar.xz
HackBot
parents:
diff changeset
6
859f9b4339e6 <Gregor> tar xf egobot.tar.xz
HackBot
parents:
diff changeset
7 def tokenize(s):
859f9b4339e6 <Gregor> tar xf egobot.tar.xz
HackBot
parents:
diff changeset
8 tokens = []
859f9b4339e6 <Gregor> tar xf egobot.tar.xz
HackBot
parents:
diff changeset
9 current = ''
859f9b4339e6 <Gregor> tar xf egobot.tar.xz
HackBot
parents:
diff changeset
10 state = 'begin'
859f9b4339e6 <Gregor> tar xf egobot.tar.xz
HackBot
parents:
diff changeset
11 index = 0
859f9b4339e6 <Gregor> tar xf egobot.tar.xz
HackBot
parents:
diff changeset
12 s = s + ' ' # a cheesy way to simplify token termination conditions
859f9b4339e6 <Gregor> tar xf egobot.tar.xz
HackBot
parents:
diff changeset
13 while index < len(s):
859f9b4339e6 <Gregor> tar xf egobot.tar.xz
HackBot
parents:
diff changeset
14 c = s[index]
859f9b4339e6 <Gregor> tar xf egobot.tar.xz
HackBot
parents:
diff changeset
15 index += 1
859f9b4339e6 <Gregor> tar xf egobot.tar.xz
HackBot
parents:
diff changeset
16 if state == 'begin':
859f9b4339e6 <Gregor> tar xf egobot.tar.xz
HackBot
parents:
diff changeset
17 current = c
859f9b4339e6 <Gregor> tar xf egobot.tar.xz
HackBot
parents:
diff changeset
18 if c.isspace():
859f9b4339e6 <Gregor> tar xf egobot.tar.xz
HackBot
parents:
diff changeset
19 continue
859f9b4339e6 <Gregor> tar xf egobot.tar.xz
HackBot
parents:
diff changeset
20 if c.isalpha():
859f9b4339e6 <Gregor> tar xf egobot.tar.xz
HackBot
parents:
diff changeset
21 state = 'name'
859f9b4339e6 <Gregor> tar xf egobot.tar.xz
HackBot
parents:
diff changeset
22 continue
859f9b4339e6 <Gregor> tar xf egobot.tar.xz
HackBot
parents:
diff changeset
23 if c == '{':
859f9b4339e6 <Gregor> tar xf egobot.tar.xz
HackBot
parents:
diff changeset
24 state = 'comment'
859f9b4339e6 <Gregor> tar xf egobot.tar.xz
HackBot
parents:
diff changeset
25 continue
859f9b4339e6 <Gregor> tar xf egobot.tar.xz
HackBot
parents:
diff changeset
26 if c == '"':
859f9b4339e6 <Gregor> tar xf egobot.tar.xz
HackBot
parents:
diff changeset
27 state = 'string'
859f9b4339e6 <Gregor> tar xf egobot.tar.xz
HackBot
parents:
diff changeset
28 current = ''
859f9b4339e6 <Gregor> tar xf egobot.tar.xz
HackBot
parents:
diff changeset
29 continue
859f9b4339e6 <Gregor> tar xf egobot.tar.xz
HackBot
parents:
diff changeset
30 if c == '#': # a "Special function"
859f9b4339e6 <Gregor> tar xf egobot.tar.xz
HackBot
parents:
diff changeset
31 state = 'special'
859f9b4339e6 <Gregor> tar xf egobot.tar.xz
HackBot
parents:
diff changeset
32 continue
859f9b4339e6 <Gregor> tar xf egobot.tar.xz
HackBot
parents:
diff changeset
33 if c in symbols: # guaranteed single-character token
859f9b4339e6 <Gregor> tar xf egobot.tar.xz
HackBot
parents:
diff changeset
34 tokens.append(c)
859f9b4339e6 <Gregor> tar xf egobot.tar.xz
HackBot
parents:
diff changeset
35 continue
859f9b4339e6 <Gregor> tar xf egobot.tar.xz
HackBot
parents:
diff changeset
36 raise TokenizerException("Tokenizer can't comprehend '" + c + "'")
859f9b4339e6 <Gregor> tar xf egobot.tar.xz
HackBot
parents:
diff changeset
37 if state == 'name':
859f9b4339e6 <Gregor> tar xf egobot.tar.xz
HackBot
parents:
diff changeset
38 if c.isalpha() or c.isdigit() or c == '_' or c == '-':
859f9b4339e6 <Gregor> tar xf egobot.tar.xz
HackBot
parents:
diff changeset
39 current += c
859f9b4339e6 <Gregor> tar xf egobot.tar.xz
HackBot
parents:
diff changeset
40 else:
859f9b4339e6 <Gregor> tar xf egobot.tar.xz
HackBot
parents:
diff changeset
41 tokens.append(current)
859f9b4339e6 <Gregor> tar xf egobot.tar.xz
HackBot
parents:
diff changeset
42 state = 'begin'
859f9b4339e6 <Gregor> tar xf egobot.tar.xz
HackBot
parents:
diff changeset
43 index -= 1
859f9b4339e6 <Gregor> tar xf egobot.tar.xz
HackBot
parents:
diff changeset
44 continue
859f9b4339e6 <Gregor> tar xf egobot.tar.xz
HackBot
parents:
diff changeset
45 if state == 'string':
859f9b4339e6 <Gregor> tar xf egobot.tar.xz
HackBot
parents:
diff changeset
46 current += c
859f9b4339e6 <Gregor> tar xf egobot.tar.xz
HackBot
parents:
diff changeset
47 if c == '"' and current != '"':
859f9b4339e6 <Gregor> tar xf egobot.tar.xz
HackBot
parents:
diff changeset
48 tokens.append('"' + current)
859f9b4339e6 <Gregor> tar xf egobot.tar.xz
HackBot
parents:
diff changeset
49 state = 'begin'
859f9b4339e6 <Gregor> tar xf egobot.tar.xz
HackBot
parents:
diff changeset
50 if state == 'comment':
859f9b4339e6 <Gregor> tar xf egobot.tar.xz
HackBot
parents:
diff changeset
51 if c == '}':
859f9b4339e6 <Gregor> tar xf egobot.tar.xz
HackBot
parents:
diff changeset
52 state = 'begin'
859f9b4339e6 <Gregor> tar xf egobot.tar.xz
HackBot
parents:
diff changeset
53 if state == 'special':
859f9b4339e6 <Gregor> tar xf egobot.tar.xz
HackBot
parents:
diff changeset
54 if (c == '#' and current == '') or c.isalpha():
859f9b4339e6 <Gregor> tar xf egobot.tar.xz
HackBot
parents:
diff changeset
55 current += c
859f9b4339e6 <Gregor> tar xf egobot.tar.xz
HackBot
parents:
diff changeset
56 else:
859f9b4339e6 <Gregor> tar xf egobot.tar.xz
HackBot
parents:
diff changeset
57 tokens.append(current)
859f9b4339e6 <Gregor> tar xf egobot.tar.xz
HackBot
parents:
diff changeset
58 state = 'begin'
859f9b4339e6 <Gregor> tar xf egobot.tar.xz
HackBot
parents:
diff changeset
59 index -= 1
859f9b4339e6 <Gregor> tar xf egobot.tar.xz
HackBot
parents:
diff changeset
60 return tokens