comparison ply-3.8/example/GardenSnake/GardenSnake.py @ 7267:343ff337a19b

<ais523> ` tar -xf ply-3.8.tar.gz
author HackBot
date Wed, 23 Mar 2016 02:40:16 +0000
parents
children
comparison
equal deleted inserted replaced
7266:61a39a120dee 7267:343ff337a19b
1 # GardenSnake - a parser generator demonstration program
2 #
3 # This implements a modified version of a subset of Python:
4 # - only 'def', 'return' and 'if' statements
5 # - 'if' only has 'then' clause (no elif nor else)
6 # - single-quoted strings only, content in raw format
7 # - numbers are decimal.Decimal instances (not integers or floats)
8 # - no print statment; use the built-in 'print' function
9 # - only < > == + - / * implemented (and unary + -)
10 # - assignment and tuple assignment work
11 # - no generators of any sort
12 # - no ... well, no quite a lot
13
14 # Why? I'm thinking about a new indentation-based configuration
15 # language for a project and wanted to figure out how to do it. Once
16 # I got that working I needed a way to test it out. My original AST
17 # was dumb so I decided to target Python's AST and compile it into
18 # Python code. Plus, it's pretty cool that it only took a day or so
19 # from sitting down with Ply to having working code.
20
21 # This uses David Beazley's Ply from http://www.dabeaz.com/ply/
22
23 # This work is hereby released into the Public Domain. To view a copy of
24 # the public domain dedication, visit
25 # http://creativecommons.org/licenses/publicdomain/ or send a letter to
26 # Creative Commons, 543 Howard Street, 5th Floor, San Francisco,
27 # California, 94105, USA.
28 #
29 # Portions of this work are derived from Python's Grammar definition
30 # and may be covered under the Python copyright and license
31 #
32 # Andrew Dalke / Dalke Scientific Software, LLC
33 # 30 August 2006 / Cape Town, South Africa
34
35 # Changelog:
36 # 30 August - added link to CC license; removed the "swapcase" encoding
37
38 # Modifications for inclusion in PLY distribution
39 import sys
40 sys.path.insert(0,"../..")
41 from ply import *
42
43 ##### Lexer ######
44 #import lex
45 import decimal
46
47 tokens = (
48 'DEF',
49 'IF',
50 'NAME',
51 'NUMBER', # Python decimals
52 'STRING', # single quoted strings only; syntax of raw strings
53 'LPAR',
54 'RPAR',
55 'COLON',
56 'EQ',
57 'ASSIGN',
58 'LT',
59 'GT',
60 'PLUS',
61 'MINUS',
62 'MULT',
63 'DIV',
64 'RETURN',
65 'WS',
66 'NEWLINE',
67 'COMMA',
68 'SEMICOLON',
69 'INDENT',
70 'DEDENT',
71 'ENDMARKER',
72 )
73
74 #t_NUMBER = r'\d+'
75 # taken from decmial.py but without the leading sign
76 def t_NUMBER(t):
77 r"""(\d+(\.\d*)?|\.\d+)([eE][-+]? \d+)?"""
78 t.value = decimal.Decimal(t.value)
79 return t
80
81 def t_STRING(t):
82 r"'([^\\']+|\\'|\\\\)*'" # I think this is right ...
83 t.value=t.value[1:-1].decode("string-escape") # .swapcase() # for fun
84 return t
85
86 t_COLON = r':'
87 t_EQ = r'=='
88 t_ASSIGN = r'='
89 t_LT = r'<'
90 t_GT = r'>'
91 t_PLUS = r'\+'
92 t_MINUS = r'-'
93 t_MULT = r'\*'
94 t_DIV = r'/'
95 t_COMMA = r','
96 t_SEMICOLON = r';'
97
98 # Ply nicely documented how to do this.
99
100 RESERVED = {
101 "def": "DEF",
102 "if": "IF",
103 "return": "RETURN",
104 }
105
106 def t_NAME(t):
107 r'[a-zA-Z_][a-zA-Z0-9_]*'
108 t.type = RESERVED.get(t.value, "NAME")
109 return t
110
111 # Putting this before t_WS let it consume lines with only comments in
112 # them so the latter code never sees the WS part. Not consuming the
113 # newline. Needed for "if 1: #comment"
114 def t_comment(t):
115 r"[ ]*\043[^\n]*" # \043 is '#'
116 pass
117
118
119 # Whitespace
120 def t_WS(t):
121 r' [ ]+ '
122 if t.lexer.at_line_start and t.lexer.paren_count == 0:
123 return t
124
125 # Don't generate newline tokens when inside of parenthesis, eg
126 # a = (1,
127 # 2, 3)
128 def t_newline(t):
129 r'\n+'
130 t.lexer.lineno += len(t.value)
131 t.type = "NEWLINE"
132 if t.lexer.paren_count == 0:
133 return t
134
135 def t_LPAR(t):
136 r'\('
137 t.lexer.paren_count += 1
138 return t
139
140 def t_RPAR(t):
141 r'\)'
142 # check for underflow? should be the job of the parser
143 t.lexer.paren_count -= 1
144 return t
145
146
147 def t_error(t):
148 raise SyntaxError("Unknown symbol %r" % (t.value[0],))
149 print "Skipping", repr(t.value[0])
150 t.lexer.skip(1)
151
152 ## I implemented INDENT / DEDENT generation as a post-processing filter
153
154 # The original lex token stream contains WS and NEWLINE characters.
155 # WS will only occur before any other tokens on a line.
156
157 # I have three filters. One tags tokens by adding two attributes.
158 # "must_indent" is True if the token must be indented from the
159 # previous code. The other is "at_line_start" which is True for WS
160 # and the first non-WS/non-NEWLINE on a line. It flags the check so
161 # see if the new line has changed indication level.
162
163 # Python's syntax has three INDENT states
164 # 0) no colon hence no need to indent
165 # 1) "if 1: go()" - simple statements have a COLON but no need for an indent
166 # 2) "if 1:\n go()" - complex statements have a COLON NEWLINE and must indent
167 NO_INDENT = 0
168 MAY_INDENT = 1
169 MUST_INDENT = 2
170
171 # only care about whitespace at the start of a line
172 def track_tokens_filter(lexer, tokens):
173 lexer.at_line_start = at_line_start = True
174 indent = NO_INDENT
175 saw_colon = False
176 for token in tokens:
177 token.at_line_start = at_line_start
178
179 if token.type == "COLON":
180 at_line_start = False
181 indent = MAY_INDENT
182 token.must_indent = False
183
184 elif token.type == "NEWLINE":
185 at_line_start = True
186 if indent == MAY_INDENT:
187 indent = MUST_INDENT
188 token.must_indent = False
189
190 elif token.type == "WS":
191 assert token.at_line_start == True
192 at_line_start = True
193 token.must_indent = False
194
195 else:
196 # A real token; only indent after COLON NEWLINE
197 if indent == MUST_INDENT:
198 token.must_indent = True
199 else:
200 token.must_indent = False
201 at_line_start = False
202 indent = NO_INDENT
203
204 yield token
205 lexer.at_line_start = at_line_start
206
207 def _new_token(type, lineno):
208 tok = lex.LexToken()
209 tok.type = type
210 tok.value = None
211 tok.lineno = lineno
212 return tok
213
214 # Synthesize a DEDENT tag
215 def DEDENT(lineno):
216 return _new_token("DEDENT", lineno)
217
218 # Synthesize an INDENT tag
219 def INDENT(lineno):
220 return _new_token("INDENT", lineno)
221
222
223 # Track the indentation level and emit the right INDENT / DEDENT events.
224 def indentation_filter(tokens):
225 # A stack of indentation levels; will never pop item 0
226 levels = [0]
227 token = None
228 depth = 0
229 prev_was_ws = False
230 for token in tokens:
231 ## if 1:
232 ## print "Process", token,
233 ## if token.at_line_start:
234 ## print "at_line_start",
235 ## if token.must_indent:
236 ## print "must_indent",
237 ## print
238
239 # WS only occurs at the start of the line
240 # There may be WS followed by NEWLINE so
241 # only track the depth here. Don't indent/dedent
242 # until there's something real.
243 if token.type == "WS":
244 assert depth == 0
245 depth = len(token.value)
246 prev_was_ws = True
247 # WS tokens are never passed to the parser
248 continue
249
250 if token.type == "NEWLINE":
251 depth = 0
252 if prev_was_ws or token.at_line_start:
253 # ignore blank lines
254 continue
255 # pass the other cases on through
256 yield token
257 continue
258
259 # then it must be a real token (not WS, not NEWLINE)
260 # which can affect the indentation level
261
262 prev_was_ws = False
263 if token.must_indent:
264 # The current depth must be larger than the previous level
265 if not (depth > levels[-1]):
266 raise IndentationError("expected an indented block")
267
268 levels.append(depth)
269 yield INDENT(token.lineno)
270
271 elif token.at_line_start:
272 # Must be on the same level or one of the previous levels
273 if depth == levels[-1]:
274 # At the same level
275 pass
276 elif depth > levels[-1]:
277 raise IndentationError("indentation increase but not in new block")
278 else:
279 # Back up; but only if it matches a previous level
280 try:
281 i = levels.index(depth)
282 except ValueError:
283 raise IndentationError("inconsistent indentation")
284 for _ in range(i+1, len(levels)):
285 yield DEDENT(token.lineno)
286 levels.pop()
287
288 yield token
289
290 ### Finished processing ###
291
292 # Must dedent any remaining levels
293 if len(levels) > 1:
294 assert token is not None
295 for _ in range(1, len(levels)):
296 yield DEDENT(token.lineno)
297
298
299 # The top-level filter adds an ENDMARKER, if requested.
300 # Python's grammar uses it.
301 def filter(lexer, add_endmarker = True):
302 token = None
303 tokens = iter(lexer.token, None)
304 tokens = track_tokens_filter(lexer, tokens)
305 for token in indentation_filter(tokens):
306 yield token
307
308 if add_endmarker:
309 lineno = 1
310 if token is not None:
311 lineno = token.lineno
312 yield _new_token("ENDMARKER", lineno)
313
314 # Combine Ply and my filters into a new lexer
315
316 class IndentLexer(object):
317 def __init__(self, debug=0, optimize=0, lextab='lextab', reflags=0):
318 self.lexer = lex.lex(debug=debug, optimize=optimize, lextab=lextab, reflags=reflags)
319 self.token_stream = None
320 def input(self, s, add_endmarker=True):
321 self.lexer.paren_count = 0
322 self.lexer.input(s)
323 self.token_stream = filter(self.lexer, add_endmarker)
324 def token(self):
325 try:
326 return self.token_stream.next()
327 except StopIteration:
328 return None
329
330 ########## Parser (tokens -> AST) ######
331
332 # also part of Ply
333 #import yacc
334
335 # I use the Python AST
336 from compiler import ast
337
338 # Helper function
339 def Assign(left, right):
340 names = []
341 if isinstance(left, ast.Name):
342 # Single assignment on left
343 return ast.Assign([ast.AssName(left.name, 'OP_ASSIGN')], right)
344 elif isinstance(left, ast.Tuple):
345 # List of things - make sure they are Name nodes
346 names = []
347 for child in left.getChildren():
348 if not isinstance(child, ast.Name):
349 raise SyntaxError("that assignment not supported")
350 names.append(child.name)
351 ass_list = [ast.AssName(name, 'OP_ASSIGN') for name in names]
352 return ast.Assign([ast.AssTuple(ass_list)], right)
353 else:
354 raise SyntaxError("Can't do that yet")
355
356
357 # The grammar comments come from Python's Grammar/Grammar file
358
359 ## NB: compound_stmt in single_input is followed by extra NEWLINE!
360 # file_input: (NEWLINE | stmt)* ENDMARKER
361 def p_file_input_end(p):
362 """file_input_end : file_input ENDMARKER"""
363 p[0] = ast.Stmt(p[1])
364 def p_file_input(p):
365 """file_input : file_input NEWLINE
366 | file_input stmt
367 | NEWLINE
368 | stmt"""
369 if isinstance(p[len(p)-1], basestring):
370 if len(p) == 3:
371 p[0] = p[1]
372 else:
373 p[0] = [] # p == 2 --> only a blank line
374 else:
375 if len(p) == 3:
376 p[0] = p[1] + p[2]
377 else:
378 p[0] = p[1]
379
380
381 # funcdef: [decorators] 'def' NAME parameters ':' suite
382 # ignoring decorators
383 def p_funcdef(p):
384 "funcdef : DEF NAME parameters COLON suite"
385 p[0] = ast.Function(None, p[2], tuple(p[3]), (), 0, None, p[5])
386
387 # parameters: '(' [varargslist] ')'
388 def p_parameters(p):
389 """parameters : LPAR RPAR
390 | LPAR varargslist RPAR"""
391 if len(p) == 3:
392 p[0] = []
393 else:
394 p[0] = p[2]
395
396
397 # varargslist: (fpdef ['=' test] ',')* ('*' NAME [',' '**' NAME] | '**' NAME) |
398 # highly simplified
399 def p_varargslist(p):
400 """varargslist : varargslist COMMA NAME
401 | NAME"""
402 if len(p) == 4:
403 p[0] = p[1] + p[3]
404 else:
405 p[0] = [p[1]]
406
407 # stmt: simple_stmt | compound_stmt
408 def p_stmt_simple(p):
409 """stmt : simple_stmt"""
410 # simple_stmt is a list
411 p[0] = p[1]
412
413 def p_stmt_compound(p):
414 """stmt : compound_stmt"""
415 p[0] = [p[1]]
416
417 # simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
418 def p_simple_stmt(p):
419 """simple_stmt : small_stmts NEWLINE
420 | small_stmts SEMICOLON NEWLINE"""
421 p[0] = p[1]
422
423 def p_small_stmts(p):
424 """small_stmts : small_stmts SEMICOLON small_stmt
425 | small_stmt"""
426 if len(p) == 4:
427 p[0] = p[1] + [p[3]]
428 else:
429 p[0] = [p[1]]
430
431 # small_stmt: expr_stmt | print_stmt | del_stmt | pass_stmt | flow_stmt |
432 # import_stmt | global_stmt | exec_stmt | assert_stmt
433 def p_small_stmt(p):
434 """small_stmt : flow_stmt
435 | expr_stmt"""
436 p[0] = p[1]
437
438 # expr_stmt: testlist (augassign (yield_expr|testlist) |
439 # ('=' (yield_expr|testlist))*)
440 # augassign: ('+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' |
441 # '<<=' | '>>=' | '**=' | '//=')
442 def p_expr_stmt(p):
443 """expr_stmt : testlist ASSIGN testlist
444 | testlist """
445 if len(p) == 2:
446 # a list of expressions
447 p[0] = ast.Discard(p[1])
448 else:
449 p[0] = Assign(p[1], p[3])
450
451 def p_flow_stmt(p):
452 "flow_stmt : return_stmt"
453 p[0] = p[1]
454
455 # return_stmt: 'return' [testlist]
456 def p_return_stmt(p):
457 "return_stmt : RETURN testlist"
458 p[0] = ast.Return(p[2])
459
460
461 def p_compound_stmt(p):
462 """compound_stmt : if_stmt
463 | funcdef"""
464 p[0] = p[1]
465
466 def p_if_stmt(p):
467 'if_stmt : IF test COLON suite'
468 p[0] = ast.If([(p[2], p[4])], None)
469
470 def p_suite(p):
471 """suite : simple_stmt
472 | NEWLINE INDENT stmts DEDENT"""
473 if len(p) == 2:
474 p[0] = ast.Stmt(p[1])
475 else:
476 p[0] = ast.Stmt(p[3])
477
478
479 def p_stmts(p):
480 """stmts : stmts stmt
481 | stmt"""
482 if len(p) == 3:
483 p[0] = p[1] + p[2]
484 else:
485 p[0] = p[1]
486
487 ## No using Python's approach because Ply supports precedence
488
489 # comparison: expr (comp_op expr)*
490 # arith_expr: term (('+'|'-') term)*
491 # term: factor (('*'|'/'|'%'|'//') factor)*
492 # factor: ('+'|'-'|'~') factor | power
493 # comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not'
494
495 def make_lt_compare((left, right)):
496 return ast.Compare(left, [('<', right),])
497 def make_gt_compare((left, right)):
498 return ast.Compare(left, [('>', right),])
499 def make_eq_compare((left, right)):
500 return ast.Compare(left, [('==', right),])
501
502
503 binary_ops = {
504 "+": ast.Add,
505 "-": ast.Sub,
506 "*": ast.Mul,
507 "/": ast.Div,
508 "<": make_lt_compare,
509 ">": make_gt_compare,
510 "==": make_eq_compare,
511 }
512 unary_ops = {
513 "+": ast.UnaryAdd,
514 "-": ast.UnarySub,
515 }
516 precedence = (
517 ("left", "EQ", "GT", "LT"),
518 ("left", "PLUS", "MINUS"),
519 ("left", "MULT", "DIV"),
520 )
521
522 def p_comparison(p):
523 """comparison : comparison PLUS comparison
524 | comparison MINUS comparison
525 | comparison MULT comparison
526 | comparison DIV comparison
527 | comparison LT comparison
528 | comparison EQ comparison
529 | comparison GT comparison
530 | PLUS comparison
531 | MINUS comparison
532 | power"""
533 if len(p) == 4:
534 p[0] = binary_ops[p[2]]((p[1], p[3]))
535 elif len(p) == 3:
536 p[0] = unary_ops[p[1]](p[2])
537 else:
538 p[0] = p[1]
539
540 # power: atom trailer* ['**' factor]
541 # trailers enables function calls. I only allow one level of calls
542 # so this is 'trailer'
543 def p_power(p):
544 """power : atom
545 | atom trailer"""
546 if len(p) == 2:
547 p[0] = p[1]
548 else:
549 if p[2][0] == "CALL":
550 p[0] = ast.CallFunc(p[1], p[2][1], None, None)
551 else:
552 raise AssertionError("not implemented")
553
554 def p_atom_name(p):
555 """atom : NAME"""
556 p[0] = ast.Name(p[1])
557
558 def p_atom_number(p):
559 """atom : NUMBER
560 | STRING"""
561 p[0] = ast.Const(p[1])
562
563 def p_atom_tuple(p):
564 """atom : LPAR testlist RPAR"""
565 p[0] = p[2]
566
567 # trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
568 def p_trailer(p):
569 "trailer : LPAR arglist RPAR"
570 p[0] = ("CALL", p[2])
571
572 # testlist: test (',' test)* [',']
573 # Contains shift/reduce error
574 def p_testlist(p):
575 """testlist : testlist_multi COMMA
576 | testlist_multi """
577 if len(p) == 2:
578 p[0] = p[1]
579 else:
580 # May need to promote singleton to tuple
581 if isinstance(p[1], list):
582 p[0] = p[1]
583 else:
584 p[0] = [p[1]]
585 # Convert into a tuple?
586 if isinstance(p[0], list):
587 p[0] = ast.Tuple(p[0])
588
589 def p_testlist_multi(p):
590 """testlist_multi : testlist_multi COMMA test
591 | test"""
592 if len(p) == 2:
593 # singleton
594 p[0] = p[1]
595 else:
596 if isinstance(p[1], list):
597 p[0] = p[1] + [p[3]]
598 else:
599 # singleton -> tuple
600 p[0] = [p[1], p[3]]
601
602
603 # test: or_test ['if' or_test 'else' test] | lambdef
604 # as I don't support 'and', 'or', and 'not' this works down to 'comparison'
605 def p_test(p):
606 "test : comparison"
607 p[0] = p[1]
608
609
610
611 # arglist: (argument ',')* (argument [',']| '*' test [',' '**' test] | '**' test)
612 # XXX INCOMPLETE: this doesn't allow the trailing comma
613 def p_arglist(p):
614 """arglist : arglist COMMA argument
615 | argument"""
616 if len(p) == 4:
617 p[0] = p[1] + [p[3]]
618 else:
619 p[0] = [p[1]]
620
621 # argument: test [gen_for] | test '=' test # Really [keyword '='] test
622 def p_argument(p):
623 "argument : test"
624 p[0] = p[1]
625
626 def p_error(p):
627 #print "Error!", repr(p)
628 raise SyntaxError(p)
629
630
631 class GardenSnakeParser(object):
632 def __init__(self, lexer = None):
633 if lexer is None:
634 lexer = IndentLexer()
635 self.lexer = lexer
636 self.parser = yacc.yacc(start="file_input_end")
637
638 def parse(self, code):
639 self.lexer.input(code)
640 result = self.parser.parse(lexer = self.lexer)
641 return ast.Module(None, result)
642
643
644 ###### Code generation ######
645
646 from compiler import misc, syntax, pycodegen
647
648 class GardenSnakeCompiler(object):
649 def __init__(self):
650 self.parser = GardenSnakeParser()
651 def compile(self, code, filename="<string>"):
652 tree = self.parser.parse(code)
653 #print tree
654 misc.set_filename(filename, tree)
655 syntax.check(tree)
656 gen = pycodegen.ModuleCodeGenerator(tree)
657 code = gen.getCode()
658 return code
659
660 ####### Test code #######
661
662 compile = GardenSnakeCompiler().compile
663
664 code = r"""
665
666 print('LET\'S TRY THIS \\OUT')
667
668 #Comment here
669 def x(a):
670 print('called with',a)
671 if a == 1:
672 return 2
673 if a*2 > 10: return 999 / 4
674 # Another comment here
675
676 return a+2*3
677
678 ints = (1, 2,
679 3, 4,
680 5)
681 print('mutiline-expression', ints)
682
683 t = 4+1/3*2+6*(9-5+1)
684 print('predence test; should be 34+2/3:', t, t==(34+2/3))
685
686 print('numbers', 1,2,3,4,5)
687 if 1:
688 8
689 a=9
690 print(x(a))
691
692 print(x(1))
693 print(x(2))
694 print(x(8),'3')
695 print('this is decimal', 1/5)
696 print('BIG DECIMAL', 1.234567891234567e12345)
697
698 """
699
700 # Set up the GardenSnake run-time environment
701 def print_(*args):
702 print "-->", " ".join(map(str,args))
703
704 globals()["print"] = print_
705
706 compiled_code = compile(code)
707
708 exec compiled_code in globals()
709 print "Done"