Mercurial > repo
view interps/lambda/tokenizer.py @ 12518:2d8fe55c6e65 draft default tip
<int-e> learn The password of the month is release incident pilot.
author | HackEso <hackeso@esolangs.org> |
---|---|
date | Sun, 03 Nov 2024 00:31:02 +0000 |
parents | 859f9b4339e6 |
children |
line wrap: on
line source
symbols = '()\\.,=;' class TokenizerException(Exception): pass def tokenize(s): tokens = [] current = '' state = 'begin' index = 0 s = s + ' ' # a cheesy way to simplify token termination conditions while index < len(s): c = s[index] index += 1 if state == 'begin': current = c if c.isspace(): continue if c.isalpha(): state = 'name' continue if c == '{': state = 'comment' continue if c == '"': state = 'string' current = '' continue if c == '#': # a "Special function" state = 'special' continue if c in symbols: # guaranteed single-character token tokens.append(c) continue raise TokenizerException("Tokenizer can't comprehend '" + c + "'") if state == 'name': if c.isalpha() or c.isdigit() or c == '_' or c == '-': current += c else: tokens.append(current) state = 'begin' index -= 1 continue if state == 'string': current += c if c == '"' and current != '"': tokens.append('"' + current) state = 'begin' if state == 'comment': if c == '}': state = 'begin' if state == 'special': if (c == '#' and current == '') or c.isalpha(): current += c else: tokens.append(current) state = 'begin' index -= 1 return tokens