/repo: interps/lambda/tokenizer.py annotate

annotate interps/lambda/tokenizer.py @ 11973:c47f90df9112 draft

<b_jonas> `` perl -pi -e \'/(bf\$1egobot\$)/ and $_ = "$1 - evaluate brainfuck snippet\\n"\' share/whatis

author	HackEso <hackeso@esolangs.org>
date	Mon, 21 Oct 2019 21:52:10 +0000
parents	859f9b4339e6
children

rev	line source
996 859f9b4339e6 <Gregor> tar xf egobot.tar.xz HackBot parents: diff changeset	1 symbols = '()\\.,=;'
859f9b4339e6 <Gregor> tar xf egobot.tar.xz HackBot parents: diff changeset	2
859f9b4339e6 <Gregor> tar xf egobot.tar.xz HackBot parents: diff changeset	3 class TokenizerException(Exception):
859f9b4339e6 <Gregor> tar xf egobot.tar.xz HackBot parents: diff changeset	4 pass
859f9b4339e6 <Gregor> tar xf egobot.tar.xz HackBot parents: diff changeset	5
859f9b4339e6 <Gregor> tar xf egobot.tar.xz HackBot parents: diff changeset	6
859f9b4339e6 <Gregor> tar xf egobot.tar.xz HackBot parents: diff changeset	7 def tokenize(s):
859f9b4339e6 <Gregor> tar xf egobot.tar.xz HackBot parents: diff changeset	8 tokens = []
859f9b4339e6 <Gregor> tar xf egobot.tar.xz HackBot parents: diff changeset	9 current = ''
859f9b4339e6 <Gregor> tar xf egobot.tar.xz HackBot parents: diff changeset	10 state = 'begin'
859f9b4339e6 <Gregor> tar xf egobot.tar.xz HackBot parents: diff changeset	11 index = 0
859f9b4339e6 <Gregor> tar xf egobot.tar.xz HackBot parents: diff changeset	12 s = s + ' ' # a cheesy way to simplify token termination conditions
859f9b4339e6 <Gregor> tar xf egobot.tar.xz HackBot parents: diff changeset	13 while index < len(s):
859f9b4339e6 <Gregor> tar xf egobot.tar.xz HackBot parents: diff changeset	14 c = s[index]
859f9b4339e6 <Gregor> tar xf egobot.tar.xz HackBot parents: diff changeset	15 index += 1
859f9b4339e6 <Gregor> tar xf egobot.tar.xz HackBot parents: diff changeset	16 if state == 'begin':
859f9b4339e6 <Gregor> tar xf egobot.tar.xz HackBot parents: diff changeset	17 current = c
859f9b4339e6 <Gregor> tar xf egobot.tar.xz HackBot parents: diff changeset	18 if c.isspace():
859f9b4339e6 <Gregor> tar xf egobot.tar.xz HackBot parents: diff changeset	19 continue
859f9b4339e6 <Gregor> tar xf egobot.tar.xz HackBot parents: diff changeset	20 if c.isalpha():
859f9b4339e6 <Gregor> tar xf egobot.tar.xz HackBot parents: diff changeset	21 state = 'name'
859f9b4339e6 <Gregor> tar xf egobot.tar.xz HackBot parents: diff changeset	22 continue
859f9b4339e6 <Gregor> tar xf egobot.tar.xz HackBot parents: diff changeset	23 if c == '{':
859f9b4339e6 <Gregor> tar xf egobot.tar.xz HackBot parents: diff changeset	24 state = 'comment'
859f9b4339e6 <Gregor> tar xf egobot.tar.xz HackBot parents: diff changeset	25 continue
859f9b4339e6 <Gregor> tar xf egobot.tar.xz HackBot parents: diff changeset	26 if c == '"':
859f9b4339e6 <Gregor> tar xf egobot.tar.xz HackBot parents: diff changeset	27 state = 'string'
859f9b4339e6 <Gregor> tar xf egobot.tar.xz HackBot parents: diff changeset	28 current = ''
859f9b4339e6 <Gregor> tar xf egobot.tar.xz HackBot parents: diff changeset	29 continue
859f9b4339e6 <Gregor> tar xf egobot.tar.xz HackBot parents: diff changeset	30 if c == '#': # a "Special function"
859f9b4339e6 <Gregor> tar xf egobot.tar.xz HackBot parents: diff changeset	31 state = 'special'
859f9b4339e6 <Gregor> tar xf egobot.tar.xz HackBot parents: diff changeset	32 continue
859f9b4339e6 <Gregor> tar xf egobot.tar.xz HackBot parents: diff changeset	33 if c in symbols: # guaranteed single-character token
859f9b4339e6 <Gregor> tar xf egobot.tar.xz HackBot parents: diff changeset	34 tokens.append(c)
859f9b4339e6 <Gregor> tar xf egobot.tar.xz HackBot parents: diff changeset	35 continue
859f9b4339e6 <Gregor> tar xf egobot.tar.xz HackBot parents: diff changeset	36 raise TokenizerException("Tokenizer can't comprehend '" + c + "'")
859f9b4339e6 <Gregor> tar xf egobot.tar.xz HackBot parents: diff changeset	37 if state == 'name':
859f9b4339e6 <Gregor> tar xf egobot.tar.xz HackBot parents: diff changeset	38 if c.isalpha() or c.isdigit() or c == '_' or c == '-':
859f9b4339e6 <Gregor> tar xf egobot.tar.xz HackBot parents: diff changeset	39 current += c
859f9b4339e6 <Gregor> tar xf egobot.tar.xz HackBot parents: diff changeset	40 else:
859f9b4339e6 <Gregor> tar xf egobot.tar.xz HackBot parents: diff changeset	41 tokens.append(current)
859f9b4339e6 <Gregor> tar xf egobot.tar.xz HackBot parents: diff changeset	42 state = 'begin'
859f9b4339e6 <Gregor> tar xf egobot.tar.xz HackBot parents: diff changeset	43 index -= 1
859f9b4339e6 <Gregor> tar xf egobot.tar.xz HackBot parents: diff changeset	44 continue
859f9b4339e6 <Gregor> tar xf egobot.tar.xz HackBot parents: diff changeset	45 if state == 'string':
859f9b4339e6 <Gregor> tar xf egobot.tar.xz HackBot parents: diff changeset	46 current += c
859f9b4339e6 <Gregor> tar xf egobot.tar.xz HackBot parents: diff changeset	47 if c == '"' and current != '"':
859f9b4339e6 <Gregor> tar xf egobot.tar.xz HackBot parents: diff changeset	48 tokens.append('"' + current)
859f9b4339e6 <Gregor> tar xf egobot.tar.xz HackBot parents: diff changeset	49 state = 'begin'
859f9b4339e6 <Gregor> tar xf egobot.tar.xz HackBot parents: diff changeset	50 if state == 'comment':
859f9b4339e6 <Gregor> tar xf egobot.tar.xz HackBot parents: diff changeset	51 if c == '}':
859f9b4339e6 <Gregor> tar xf egobot.tar.xz HackBot parents: diff changeset	52 state = 'begin'
859f9b4339e6 <Gregor> tar xf egobot.tar.xz HackBot parents: diff changeset	53 if state == 'special':
859f9b4339e6 <Gregor> tar xf egobot.tar.xz HackBot parents: diff changeset	54 if (c == '#' and current == '') or c.isalpha():
859f9b4339e6 <Gregor> tar xf egobot.tar.xz HackBot parents: diff changeset	55 current += c
859f9b4339e6 <Gregor> tar xf egobot.tar.xz HackBot parents: diff changeset	56 else:
859f9b4339e6 <Gregor> tar xf egobot.tar.xz HackBot parents: diff changeset	57 tokens.append(current)
859f9b4339e6 <Gregor> tar xf egobot.tar.xz HackBot parents: diff changeset	58 state = 'begin'
859f9b4339e6 <Gregor> tar xf egobot.tar.xz HackBot parents: diff changeset	59 index -= 1
859f9b4339e6 <Gregor> tar xf egobot.tar.xz HackBot parents: diff changeset	60 return tokens

Mercurial > repo

annotate interps/lambda/tokenizer.py @ 11973:c47f90df9112 draft