Mercurial > repo
view interps/lambda/tokenizer.py @ 8509:272663b31668
<b_jonas> slashlearn block device/Block devices are a concession made in Unix to make raw hard disks and magnetic tape have a similar interface to regular files and terminals. Since magnetic tapes can\'t write individual bytes, only entire blocks, the interface isn\'t exactly the same, thus the dreaded dd obs= option was born.
author | HackBot |
---|---|
date | Fri, 17 Jun 2016 08:35:50 +0000 |
parents | 859f9b4339e6 |
children |
line wrap: on
line source
symbols = '()\\.,=;' class TokenizerException(Exception): pass def tokenize(s): tokens = [] current = '' state = 'begin' index = 0 s = s + ' ' # a cheesy way to simplify token termination conditions while index < len(s): c = s[index] index += 1 if state == 'begin': current = c if c.isspace(): continue if c.isalpha(): state = 'name' continue if c == '{': state = 'comment' continue if c == '"': state = 'string' current = '' continue if c == '#': # a "Special function" state = 'special' continue if c in symbols: # guaranteed single-character token tokens.append(c) continue raise TokenizerException("Tokenizer can't comprehend '" + c + "'") if state == 'name': if c.isalpha() or c.isdigit() or c == '_' or c == '-': current += c else: tokens.append(current) state = 'begin' index -= 1 continue if state == 'string': current += c if c == '"' and current != '"': tokens.append('"' + current) state = 'begin' if state == 'comment': if c == '}': state = 'begin' if state == 'special': if (c == '#' and current == '') or c.isalpha(): current += c else: tokens.append(current) state = 'begin' index -= 1 return tokens