7267
|
1 # GardenSnake - a parser generator demonstration program
|
|
2 #
|
|
3 # This implements a modified version of a subset of Python:
|
|
4 # - only 'def', 'return' and 'if' statements
|
|
5 # - 'if' only has 'then' clause (no elif nor else)
|
|
6 # - single-quoted strings only, content in raw format
|
|
7 # - numbers are decimal.Decimal instances (not integers or floats)
|
|
8 # - no print statment; use the built-in 'print' function
|
|
9 # - only < > == + - / * implemented (and unary + -)
|
|
10 # - assignment and tuple assignment work
|
|
11 # - no generators of any sort
|
|
12 # - no ... well, no quite a lot
|
|
13
|
|
14 # Why? I'm thinking about a new indentation-based configuration
|
|
15 # language for a project and wanted to figure out how to do it. Once
|
|
16 # I got that working I needed a way to test it out. My original AST
|
|
17 # was dumb so I decided to target Python's AST and compile it into
|
|
18 # Python code. Plus, it's pretty cool that it only took a day or so
|
|
19 # from sitting down with Ply to having working code.
|
|
20
|
|
21 # This uses David Beazley's Ply from http://www.dabeaz.com/ply/
|
|
22
|
|
23 # This work is hereby released into the Public Domain. To view a copy of
|
|
24 # the public domain dedication, visit
|
|
25 # http://creativecommons.org/licenses/publicdomain/ or send a letter to
|
|
26 # Creative Commons, 543 Howard Street, 5th Floor, San Francisco,
|
|
27 # California, 94105, USA.
|
|
28 #
|
|
29 # Portions of this work are derived from Python's Grammar definition
|
|
30 # and may be covered under the Python copyright and license
|
|
31 #
|
|
32 # Andrew Dalke / Dalke Scientific Software, LLC
|
|
33 # 30 August 2006 / Cape Town, South Africa
|
|
34
|
|
35 # Changelog:
|
|
36 # 30 August - added link to CC license; removed the "swapcase" encoding
|
|
37
|
|
38 # Modifications for inclusion in PLY distribution
|
|
39 import sys
|
|
40 sys.path.insert(0,"../..")
|
|
41 from ply import *
|
|
42
|
|
43 ##### Lexer ######
|
|
44 #import lex
|
|
45 import decimal
|
|
46
|
|
47 tokens = (
|
|
48 'DEF',
|
|
49 'IF',
|
|
50 'NAME',
|
|
51 'NUMBER', # Python decimals
|
|
52 'STRING', # single quoted strings only; syntax of raw strings
|
|
53 'LPAR',
|
|
54 'RPAR',
|
|
55 'COLON',
|
|
56 'EQ',
|
|
57 'ASSIGN',
|
|
58 'LT',
|
|
59 'GT',
|
|
60 'PLUS',
|
|
61 'MINUS',
|
|
62 'MULT',
|
|
63 'DIV',
|
|
64 'RETURN',
|
|
65 'WS',
|
|
66 'NEWLINE',
|
|
67 'COMMA',
|
|
68 'SEMICOLON',
|
|
69 'INDENT',
|
|
70 'DEDENT',
|
|
71 'ENDMARKER',
|
|
72 )
|
|
73
|
|
74 #t_NUMBER = r'\d+'
|
|
75 # taken from decmial.py but without the leading sign
|
|
76 def t_NUMBER(t):
|
|
77 r"""(\d+(\.\d*)?|\.\d+)([eE][-+]? \d+)?"""
|
|
78 t.value = decimal.Decimal(t.value)
|
|
79 return t
|
|
80
|
|
81 def t_STRING(t):
|
|
82 r"'([^\\']+|\\'|\\\\)*'" # I think this is right ...
|
|
83 t.value=t.value[1:-1].decode("string-escape") # .swapcase() # for fun
|
|
84 return t
|
|
85
|
|
86 t_COLON = r':'
|
|
87 t_EQ = r'=='
|
|
88 t_ASSIGN = r'='
|
|
89 t_LT = r'<'
|
|
90 t_GT = r'>'
|
|
91 t_PLUS = r'\+'
|
|
92 t_MINUS = r'-'
|
|
93 t_MULT = r'\*'
|
|
94 t_DIV = r'/'
|
|
95 t_COMMA = r','
|
|
96 t_SEMICOLON = r';'
|
|
97
|
|
98 # Ply nicely documented how to do this.
|
|
99
|
|
100 RESERVED = {
|
|
101 "def": "DEF",
|
|
102 "if": "IF",
|
|
103 "return": "RETURN",
|
|
104 }
|
|
105
|
|
106 def t_NAME(t):
|
|
107 r'[a-zA-Z_][a-zA-Z0-9_]*'
|
|
108 t.type = RESERVED.get(t.value, "NAME")
|
|
109 return t
|
|
110
|
|
111 # Putting this before t_WS let it consume lines with only comments in
|
|
112 # them so the latter code never sees the WS part. Not consuming the
|
|
113 # newline. Needed for "if 1: #comment"
|
|
114 def t_comment(t):
|
|
115 r"[ ]*\043[^\n]*" # \043 is '#'
|
|
116 pass
|
|
117
|
|
118
|
|
119 # Whitespace
|
|
120 def t_WS(t):
|
|
121 r' [ ]+ '
|
|
122 if t.lexer.at_line_start and t.lexer.paren_count == 0:
|
|
123 return t
|
|
124
|
|
125 # Don't generate newline tokens when inside of parenthesis, eg
|
|
126 # a = (1,
|
|
127 # 2, 3)
|
|
128 def t_newline(t):
|
|
129 r'\n+'
|
|
130 t.lexer.lineno += len(t.value)
|
|
131 t.type = "NEWLINE"
|
|
132 if t.lexer.paren_count == 0:
|
|
133 return t
|
|
134
|
|
135 def t_LPAR(t):
|
|
136 r'\('
|
|
137 t.lexer.paren_count += 1
|
|
138 return t
|
|
139
|
|
140 def t_RPAR(t):
|
|
141 r'\)'
|
|
142 # check for underflow? should be the job of the parser
|
|
143 t.lexer.paren_count -= 1
|
|
144 return t
|
|
145
|
|
146
|
|
147 def t_error(t):
|
|
148 raise SyntaxError("Unknown symbol %r" % (t.value[0],))
|
|
149 print "Skipping", repr(t.value[0])
|
|
150 t.lexer.skip(1)
|
|
151
|
|
152 ## I implemented INDENT / DEDENT generation as a post-processing filter
|
|
153
|
|
154 # The original lex token stream contains WS and NEWLINE characters.
|
|
155 # WS will only occur before any other tokens on a line.
|
|
156
|
|
157 # I have three filters. One tags tokens by adding two attributes.
|
|
158 # "must_indent" is True if the token must be indented from the
|
|
159 # previous code. The other is "at_line_start" which is True for WS
|
|
160 # and the first non-WS/non-NEWLINE on a line. It flags the check so
|
|
161 # see if the new line has changed indication level.
|
|
162
|
|
163 # Python's syntax has three INDENT states
|
|
164 # 0) no colon hence no need to indent
|
|
165 # 1) "if 1: go()" - simple statements have a COLON but no need for an indent
|
|
166 # 2) "if 1:\n go()" - complex statements have a COLON NEWLINE and must indent
|
|
167 NO_INDENT = 0
|
|
168 MAY_INDENT = 1
|
|
169 MUST_INDENT = 2
|
|
170
|
|
171 # only care about whitespace at the start of a line
|
|
172 def track_tokens_filter(lexer, tokens):
|
|
173 lexer.at_line_start = at_line_start = True
|
|
174 indent = NO_INDENT
|
|
175 saw_colon = False
|
|
176 for token in tokens:
|
|
177 token.at_line_start = at_line_start
|
|
178
|
|
179 if token.type == "COLON":
|
|
180 at_line_start = False
|
|
181 indent = MAY_INDENT
|
|
182 token.must_indent = False
|
|
183
|
|
184 elif token.type == "NEWLINE":
|
|
185 at_line_start = True
|
|
186 if indent == MAY_INDENT:
|
|
187 indent = MUST_INDENT
|
|
188 token.must_indent = False
|
|
189
|
|
190 elif token.type == "WS":
|
|
191 assert token.at_line_start == True
|
|
192 at_line_start = True
|
|
193 token.must_indent = False
|
|
194
|
|
195 else:
|
|
196 # A real token; only indent after COLON NEWLINE
|
|
197 if indent == MUST_INDENT:
|
|
198 token.must_indent = True
|
|
199 else:
|
|
200 token.must_indent = False
|
|
201 at_line_start = False
|
|
202 indent = NO_INDENT
|
|
203
|
|
204 yield token
|
|
205 lexer.at_line_start = at_line_start
|
|
206
|
|
207 def _new_token(type, lineno):
|
|
208 tok = lex.LexToken()
|
|
209 tok.type = type
|
|
210 tok.value = None
|
|
211 tok.lineno = lineno
|
|
212 return tok
|
|
213
|
|
214 # Synthesize a DEDENT tag
|
|
215 def DEDENT(lineno):
|
|
216 return _new_token("DEDENT", lineno)
|
|
217
|
|
218 # Synthesize an INDENT tag
|
|
219 def INDENT(lineno):
|
|
220 return _new_token("INDENT", lineno)
|
|
221
|
|
222
|
|
223 # Track the indentation level and emit the right INDENT / DEDENT events.
|
|
224 def indentation_filter(tokens):
|
|
225 # A stack of indentation levels; will never pop item 0
|
|
226 levels = [0]
|
|
227 token = None
|
|
228 depth = 0
|
|
229 prev_was_ws = False
|
|
230 for token in tokens:
|
|
231 ## if 1:
|
|
232 ## print "Process", token,
|
|
233 ## if token.at_line_start:
|
|
234 ## print "at_line_start",
|
|
235 ## if token.must_indent:
|
|
236 ## print "must_indent",
|
|
237 ## print
|
|
238
|
|
239 # WS only occurs at the start of the line
|
|
240 # There may be WS followed by NEWLINE so
|
|
241 # only track the depth here. Don't indent/dedent
|
|
242 # until there's something real.
|
|
243 if token.type == "WS":
|
|
244 assert depth == 0
|
|
245 depth = len(token.value)
|
|
246 prev_was_ws = True
|
|
247 # WS tokens are never passed to the parser
|
|
248 continue
|
|
249
|
|
250 if token.type == "NEWLINE":
|
|
251 depth = 0
|
|
252 if prev_was_ws or token.at_line_start:
|
|
253 # ignore blank lines
|
|
254 continue
|
|
255 # pass the other cases on through
|
|
256 yield token
|
|
257 continue
|
|
258
|
|
259 # then it must be a real token (not WS, not NEWLINE)
|
|
260 # which can affect the indentation level
|
|
261
|
|
262 prev_was_ws = False
|
|
263 if token.must_indent:
|
|
264 # The current depth must be larger than the previous level
|
|
265 if not (depth > levels[-1]):
|
|
266 raise IndentationError("expected an indented block")
|
|
267
|
|
268 levels.append(depth)
|
|
269 yield INDENT(token.lineno)
|
|
270
|
|
271 elif token.at_line_start:
|
|
272 # Must be on the same level or one of the previous levels
|
|
273 if depth == levels[-1]:
|
|
274 # At the same level
|
|
275 pass
|
|
276 elif depth > levels[-1]:
|
|
277 raise IndentationError("indentation increase but not in new block")
|
|
278 else:
|
|
279 # Back up; but only if it matches a previous level
|
|
280 try:
|
|
281 i = levels.index(depth)
|
|
282 except ValueError:
|
|
283 raise IndentationError("inconsistent indentation")
|
|
284 for _ in range(i+1, len(levels)):
|
|
285 yield DEDENT(token.lineno)
|
|
286 levels.pop()
|
|
287
|
|
288 yield token
|
|
289
|
|
290 ### Finished processing ###
|
|
291
|
|
292 # Must dedent any remaining levels
|
|
293 if len(levels) > 1:
|
|
294 assert token is not None
|
|
295 for _ in range(1, len(levels)):
|
|
296 yield DEDENT(token.lineno)
|
|
297
|
|
298
|
|
299 # The top-level filter adds an ENDMARKER, if requested.
|
|
300 # Python's grammar uses it.
|
|
301 def filter(lexer, add_endmarker = True):
|
|
302 token = None
|
|
303 tokens = iter(lexer.token, None)
|
|
304 tokens = track_tokens_filter(lexer, tokens)
|
|
305 for token in indentation_filter(tokens):
|
|
306 yield token
|
|
307
|
|
308 if add_endmarker:
|
|
309 lineno = 1
|
|
310 if token is not None:
|
|
311 lineno = token.lineno
|
|
312 yield _new_token("ENDMARKER", lineno)
|
|
313
|
|
314 # Combine Ply and my filters into a new lexer
|
|
315
|
|
316 class IndentLexer(object):
|
|
317 def __init__(self, debug=0, optimize=0, lextab='lextab', reflags=0):
|
|
318 self.lexer = lex.lex(debug=debug, optimize=optimize, lextab=lextab, reflags=reflags)
|
|
319 self.token_stream = None
|
|
320 def input(self, s, add_endmarker=True):
|
|
321 self.lexer.paren_count = 0
|
|
322 self.lexer.input(s)
|
|
323 self.token_stream = filter(self.lexer, add_endmarker)
|
|
324 def token(self):
|
|
325 try:
|
|
326 return self.token_stream.next()
|
|
327 except StopIteration:
|
|
328 return None
|
|
329
|
|
330 ########## Parser (tokens -> AST) ######
|
|
331
|
|
332 # also part of Ply
|
|
333 #import yacc
|
|
334
|
|
335 # I use the Python AST
|
|
336 from compiler import ast
|
|
337
|
|
338 # Helper function
|
|
339 def Assign(left, right):
|
|
340 names = []
|
|
341 if isinstance(left, ast.Name):
|
|
342 # Single assignment on left
|
|
343 return ast.Assign([ast.AssName(left.name, 'OP_ASSIGN')], right)
|
|
344 elif isinstance(left, ast.Tuple):
|
|
345 # List of things - make sure they are Name nodes
|
|
346 names = []
|
|
347 for child in left.getChildren():
|
|
348 if not isinstance(child, ast.Name):
|
|
349 raise SyntaxError("that assignment not supported")
|
|
350 names.append(child.name)
|
|
351 ass_list = [ast.AssName(name, 'OP_ASSIGN') for name in names]
|
|
352 return ast.Assign([ast.AssTuple(ass_list)], right)
|
|
353 else:
|
|
354 raise SyntaxError("Can't do that yet")
|
|
355
|
|
356
|
|
357 # The grammar comments come from Python's Grammar/Grammar file
|
|
358
|
|
359 ## NB: compound_stmt in single_input is followed by extra NEWLINE!
|
|
360 # file_input: (NEWLINE | stmt)* ENDMARKER
|
|
361 def p_file_input_end(p):
|
|
362 """file_input_end : file_input ENDMARKER"""
|
|
363 p[0] = ast.Stmt(p[1])
|
|
364 def p_file_input(p):
|
|
365 """file_input : file_input NEWLINE
|
|
366 | file_input stmt
|
|
367 | NEWLINE
|
|
368 | stmt"""
|
|
369 if isinstance(p[len(p)-1], basestring):
|
|
370 if len(p) == 3:
|
|
371 p[0] = p[1]
|
|
372 else:
|
|
373 p[0] = [] # p == 2 --> only a blank line
|
|
374 else:
|
|
375 if len(p) == 3:
|
|
376 p[0] = p[1] + p[2]
|
|
377 else:
|
|
378 p[0] = p[1]
|
|
379
|
|
380
|
|
381 # funcdef: [decorators] 'def' NAME parameters ':' suite
|
|
382 # ignoring decorators
|
|
383 def p_funcdef(p):
|
|
384 "funcdef : DEF NAME parameters COLON suite"
|
|
385 p[0] = ast.Function(None, p[2], tuple(p[3]), (), 0, None, p[5])
|
|
386
|
|
387 # parameters: '(' [varargslist] ')'
|
|
388 def p_parameters(p):
|
|
389 """parameters : LPAR RPAR
|
|
390 | LPAR varargslist RPAR"""
|
|
391 if len(p) == 3:
|
|
392 p[0] = []
|
|
393 else:
|
|
394 p[0] = p[2]
|
|
395
|
|
396
|
|
397 # varargslist: (fpdef ['=' test] ',')* ('*' NAME [',' '**' NAME] | '**' NAME) |
|
|
398 # highly simplified
|
|
399 def p_varargslist(p):
|
|
400 """varargslist : varargslist COMMA NAME
|
|
401 | NAME"""
|
|
402 if len(p) == 4:
|
|
403 p[0] = p[1] + p[3]
|
|
404 else:
|
|
405 p[0] = [p[1]]
|
|
406
|
|
407 # stmt: simple_stmt | compound_stmt
|
|
408 def p_stmt_simple(p):
|
|
409 """stmt : simple_stmt"""
|
|
410 # simple_stmt is a list
|
|
411 p[0] = p[1]
|
|
412
|
|
413 def p_stmt_compound(p):
|
|
414 """stmt : compound_stmt"""
|
|
415 p[0] = [p[1]]
|
|
416
|
|
417 # simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
|
|
418 def p_simple_stmt(p):
|
|
419 """simple_stmt : small_stmts NEWLINE
|
|
420 | small_stmts SEMICOLON NEWLINE"""
|
|
421 p[0] = p[1]
|
|
422
|
|
423 def p_small_stmts(p):
|
|
424 """small_stmts : small_stmts SEMICOLON small_stmt
|
|
425 | small_stmt"""
|
|
426 if len(p) == 4:
|
|
427 p[0] = p[1] + [p[3]]
|
|
428 else:
|
|
429 p[0] = [p[1]]
|
|
430
|
|
431 # small_stmt: expr_stmt | print_stmt | del_stmt | pass_stmt | flow_stmt |
|
|
432 # import_stmt | global_stmt | exec_stmt | assert_stmt
|
|
433 def p_small_stmt(p):
|
|
434 """small_stmt : flow_stmt
|
|
435 | expr_stmt"""
|
|
436 p[0] = p[1]
|
|
437
|
|
438 # expr_stmt: testlist (augassign (yield_expr|testlist) |
|
|
439 # ('=' (yield_expr|testlist))*)
|
|
440 # augassign: ('+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' |
|
|
441 # '<<=' | '>>=' | '**=' | '//=')
|
|
442 def p_expr_stmt(p):
|
|
443 """expr_stmt : testlist ASSIGN testlist
|
|
444 | testlist """
|
|
445 if len(p) == 2:
|
|
446 # a list of expressions
|
|
447 p[0] = ast.Discard(p[1])
|
|
448 else:
|
|
449 p[0] = Assign(p[1], p[3])
|
|
450
|
|
451 def p_flow_stmt(p):
|
|
452 "flow_stmt : return_stmt"
|
|
453 p[0] = p[1]
|
|
454
|
|
455 # return_stmt: 'return' [testlist]
|
|
456 def p_return_stmt(p):
|
|
457 "return_stmt : RETURN testlist"
|
|
458 p[0] = ast.Return(p[2])
|
|
459
|
|
460
|
|
461 def p_compound_stmt(p):
|
|
462 """compound_stmt : if_stmt
|
|
463 | funcdef"""
|
|
464 p[0] = p[1]
|
|
465
|
|
466 def p_if_stmt(p):
|
|
467 'if_stmt : IF test COLON suite'
|
|
468 p[0] = ast.If([(p[2], p[4])], None)
|
|
469
|
|
470 def p_suite(p):
|
|
471 """suite : simple_stmt
|
|
472 | NEWLINE INDENT stmts DEDENT"""
|
|
473 if len(p) == 2:
|
|
474 p[0] = ast.Stmt(p[1])
|
|
475 else:
|
|
476 p[0] = ast.Stmt(p[3])
|
|
477
|
|
478
|
|
479 def p_stmts(p):
|
|
480 """stmts : stmts stmt
|
|
481 | stmt"""
|
|
482 if len(p) == 3:
|
|
483 p[0] = p[1] + p[2]
|
|
484 else:
|
|
485 p[0] = p[1]
|
|
486
|
|
487 ## No using Python's approach because Ply supports precedence
|
|
488
|
|
489 # comparison: expr (comp_op expr)*
|
|
490 # arith_expr: term (('+'|'-') term)*
|
|
491 # term: factor (('*'|'/'|'%'|'//') factor)*
|
|
492 # factor: ('+'|'-'|'~') factor | power
|
|
493 # comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not'
|
|
494
|
|
495 def make_lt_compare((left, right)):
|
|
496 return ast.Compare(left, [('<', right),])
|
|
497 def make_gt_compare((left, right)):
|
|
498 return ast.Compare(left, [('>', right),])
|
|
499 def make_eq_compare((left, right)):
|
|
500 return ast.Compare(left, [('==', right),])
|
|
501
|
|
502
|
|
503 binary_ops = {
|
|
504 "+": ast.Add,
|
|
505 "-": ast.Sub,
|
|
506 "*": ast.Mul,
|
|
507 "/": ast.Div,
|
|
508 "<": make_lt_compare,
|
|
509 ">": make_gt_compare,
|
|
510 "==": make_eq_compare,
|
|
511 }
|
|
512 unary_ops = {
|
|
513 "+": ast.UnaryAdd,
|
|
514 "-": ast.UnarySub,
|
|
515 }
|
|
516 precedence = (
|
|
517 ("left", "EQ", "GT", "LT"),
|
|
518 ("left", "PLUS", "MINUS"),
|
|
519 ("left", "MULT", "DIV"),
|
|
520 )
|
|
521
|
|
522 def p_comparison(p):
|
|
523 """comparison : comparison PLUS comparison
|
|
524 | comparison MINUS comparison
|
|
525 | comparison MULT comparison
|
|
526 | comparison DIV comparison
|
|
527 | comparison LT comparison
|
|
528 | comparison EQ comparison
|
|
529 | comparison GT comparison
|
|
530 | PLUS comparison
|
|
531 | MINUS comparison
|
|
532 | power"""
|
|
533 if len(p) == 4:
|
|
534 p[0] = binary_ops[p[2]]((p[1], p[3]))
|
|
535 elif len(p) == 3:
|
|
536 p[0] = unary_ops[p[1]](p[2])
|
|
537 else:
|
|
538 p[0] = p[1]
|
|
539
|
|
540 # power: atom trailer* ['**' factor]
|
|
541 # trailers enables function calls. I only allow one level of calls
|
|
542 # so this is 'trailer'
|
|
543 def p_power(p):
|
|
544 """power : atom
|
|
545 | atom trailer"""
|
|
546 if len(p) == 2:
|
|
547 p[0] = p[1]
|
|
548 else:
|
|
549 if p[2][0] == "CALL":
|
|
550 p[0] = ast.CallFunc(p[1], p[2][1], None, None)
|
|
551 else:
|
|
552 raise AssertionError("not implemented")
|
|
553
|
|
554 def p_atom_name(p):
|
|
555 """atom : NAME"""
|
|
556 p[0] = ast.Name(p[1])
|
|
557
|
|
558 def p_atom_number(p):
|
|
559 """atom : NUMBER
|
|
560 | STRING"""
|
|
561 p[0] = ast.Const(p[1])
|
|
562
|
|
563 def p_atom_tuple(p):
|
|
564 """atom : LPAR testlist RPAR"""
|
|
565 p[0] = p[2]
|
|
566
|
|
567 # trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
|
|
568 def p_trailer(p):
|
|
569 "trailer : LPAR arglist RPAR"
|
|
570 p[0] = ("CALL", p[2])
|
|
571
|
|
572 # testlist: test (',' test)* [',']
|
|
573 # Contains shift/reduce error
|
|
574 def p_testlist(p):
|
|
575 """testlist : testlist_multi COMMA
|
|
576 | testlist_multi """
|
|
577 if len(p) == 2:
|
|
578 p[0] = p[1]
|
|
579 else:
|
|
580 # May need to promote singleton to tuple
|
|
581 if isinstance(p[1], list):
|
|
582 p[0] = p[1]
|
|
583 else:
|
|
584 p[0] = [p[1]]
|
|
585 # Convert into a tuple?
|
|
586 if isinstance(p[0], list):
|
|
587 p[0] = ast.Tuple(p[0])
|
|
588
|
|
589 def p_testlist_multi(p):
|
|
590 """testlist_multi : testlist_multi COMMA test
|
|
591 | test"""
|
|
592 if len(p) == 2:
|
|
593 # singleton
|
|
594 p[0] = p[1]
|
|
595 else:
|
|
596 if isinstance(p[1], list):
|
|
597 p[0] = p[1] + [p[3]]
|
|
598 else:
|
|
599 # singleton -> tuple
|
|
600 p[0] = [p[1], p[3]]
|
|
601
|
|
602
|
|
603 # test: or_test ['if' or_test 'else' test] | lambdef
|
|
604 # as I don't support 'and', 'or', and 'not' this works down to 'comparison'
|
|
605 def p_test(p):
|
|
606 "test : comparison"
|
|
607 p[0] = p[1]
|
|
608
|
|
609
|
|
610
|
|
611 # arglist: (argument ',')* (argument [',']| '*' test [',' '**' test] | '**' test)
|
|
612 # XXX INCOMPLETE: this doesn't allow the trailing comma
|
|
613 def p_arglist(p):
|
|
614 """arglist : arglist COMMA argument
|
|
615 | argument"""
|
|
616 if len(p) == 4:
|
|
617 p[0] = p[1] + [p[3]]
|
|
618 else:
|
|
619 p[0] = [p[1]]
|
|
620
|
|
621 # argument: test [gen_for] | test '=' test # Really [keyword '='] test
|
|
622 def p_argument(p):
|
|
623 "argument : test"
|
|
624 p[0] = p[1]
|
|
625
|
|
626 def p_error(p):
|
|
627 #print "Error!", repr(p)
|
|
628 raise SyntaxError(p)
|
|
629
|
|
630
|
|
631 class GardenSnakeParser(object):
|
|
632 def __init__(self, lexer = None):
|
|
633 if lexer is None:
|
|
634 lexer = IndentLexer()
|
|
635 self.lexer = lexer
|
|
636 self.parser = yacc.yacc(start="file_input_end")
|
|
637
|
|
638 def parse(self, code):
|
|
639 self.lexer.input(code)
|
|
640 result = self.parser.parse(lexer = self.lexer)
|
|
641 return ast.Module(None, result)
|
|
642
|
|
643
|
|
644 ###### Code generation ######
|
|
645
|
|
646 from compiler import misc, syntax, pycodegen
|
|
647
|
|
648 class GardenSnakeCompiler(object):
|
|
649 def __init__(self):
|
|
650 self.parser = GardenSnakeParser()
|
|
651 def compile(self, code, filename="<string>"):
|
|
652 tree = self.parser.parse(code)
|
|
653 #print tree
|
|
654 misc.set_filename(filename, tree)
|
|
655 syntax.check(tree)
|
|
656 gen = pycodegen.ModuleCodeGenerator(tree)
|
|
657 code = gen.getCode()
|
|
658 return code
|
|
659
|
|
660 ####### Test code #######
|
|
661
|
|
662 compile = GardenSnakeCompiler().compile
|
|
663
|
|
664 code = r"""
|
|
665
|
|
666 print('LET\'S TRY THIS \\OUT')
|
|
667
|
|
668 #Comment here
|
|
669 def x(a):
|
|
670 print('called with',a)
|
|
671 if a == 1:
|
|
672 return 2
|
|
673 if a*2 > 10: return 999 / 4
|
|
674 # Another comment here
|
|
675
|
|
676 return a+2*3
|
|
677
|
|
678 ints = (1, 2,
|
|
679 3, 4,
|
|
680 5)
|
|
681 print('mutiline-expression', ints)
|
|
682
|
|
683 t = 4+1/3*2+6*(9-5+1)
|
|
684 print('predence test; should be 34+2/3:', t, t==(34+2/3))
|
|
685
|
|
686 print('numbers', 1,2,3,4,5)
|
|
687 if 1:
|
|
688 8
|
|
689 a=9
|
|
690 print(x(a))
|
|
691
|
|
692 print(x(1))
|
|
693 print(x(2))
|
|
694 print(x(8),'3')
|
|
695 print('this is decimal', 1/5)
|
|
696 print('BIG DECIMAL', 1.234567891234567e12345)
|
|
697
|
|
698 """
|
|
699
|
|
700 # Set up the GardenSnake run-time environment
|
|
701 def print_(*args):
|
|
702 print "-->", " ".join(map(str,args))
|
|
703
|
|
704 globals()["print"] = print_
|
|
705
|
|
706 compiled_code = compile(code)
|
|
707
|
|
708 exec compiled_code in globals()
|
|
709 print "Done"
|