comparison ply-3.8/ply/yacc.py @ 7267:343ff337a19b

<ais523> ` tar -xf ply-3.8.tar.gz
author HackBot
date Wed, 23 Mar 2016 02:40:16 +0000
parents
children
comparison
equal deleted inserted replaced
7266:61a39a120dee 7267:343ff337a19b
1 # -----------------------------------------------------------------------------
2 # ply: yacc.py
3 #
4 # Copyright (C) 2001-2015,
5 # David M. Beazley (Dabeaz LLC)
6 # All rights reserved.
7 #
8 # Redistribution and use in source and binary forms, with or without
9 # modification, are permitted provided that the following conditions are
10 # met:
11 #
12 # * Redistributions of source code must retain the above copyright notice,
13 # this list of conditions and the following disclaimer.
14 # * Redistributions in binary form must reproduce the above copyright notice,
15 # this list of conditions and the following disclaimer in the documentation
16 # and/or other materials provided with the distribution.
17 # * Neither the name of the David Beazley or Dabeaz LLC may be used to
18 # endorse or promote products derived from this software without
19 # specific prior written permission.
20 #
21 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 # -----------------------------------------------------------------------------
33 #
34 # This implements an LR parser that is constructed from grammar rules defined
35 # as Python functions. The grammer is specified by supplying the BNF inside
36 # Python documentation strings. The inspiration for this technique was borrowed
37 # from John Aycock's Spark parsing system. PLY might be viewed as cross between
38 # Spark and the GNU bison utility.
39 #
40 # The current implementation is only somewhat object-oriented. The
41 # LR parser itself is defined in terms of an object (which allows multiple
42 # parsers to co-exist). However, most of the variables used during table
43 # construction are defined in terms of global variables. Users shouldn't
44 # notice unless they are trying to define multiple parsers at the same
45 # time using threads (in which case they should have their head examined).
46 #
47 # This implementation supports both SLR and LALR(1) parsing. LALR(1)
48 # support was originally implemented by Elias Ioup (ezioup@alumni.uchicago.edu),
49 # using the algorithm found in Aho, Sethi, and Ullman "Compilers: Principles,
50 # Techniques, and Tools" (The Dragon Book). LALR(1) has since been replaced
51 # by the more efficient DeRemer and Pennello algorithm.
52 #
53 # :::::::: WARNING :::::::
54 #
55 # Construction of LR parsing tables is fairly complicated and expensive.
56 # To make this module run fast, a *LOT* of work has been put into
57 # optimization---often at the expensive of readability and what might
58 # consider to be good Python "coding style." Modify the code at your
59 # own risk!
60 # ----------------------------------------------------------------------------
61
62 import re
63 import types
64 import sys
65 import os.path
66 import inspect
67 import base64
68 import warnings
69
70 __version__ = '3.8'
71 __tabversion__ = '3.8'
72
73 #-----------------------------------------------------------------------------
74 # === User configurable parameters ===
75 #
76 # Change these to modify the default behavior of yacc (if you wish)
77 #-----------------------------------------------------------------------------
78
79 yaccdebug = True # Debugging mode. If set, yacc generates a
80 # a 'parser.out' file in the current directory
81
82 debug_file = 'parser.out' # Default name of the debugging file
83 tab_module = 'parsetab' # Default name of the table module
84 default_lr = 'LALR' # Default LR table generation method
85
86 error_count = 3 # Number of symbols that must be shifted to leave recovery mode
87
88 yaccdevel = False # Set to True if developing yacc. This turns off optimized
89 # implementations of certain functions.
90
91 resultlimit = 40 # Size limit of results when running in debug mode.
92
93 pickle_protocol = 0 # Protocol to use when writing pickle files
94
95 # String type-checking compatibility
96 if sys.version_info[0] < 3:
97 string_types = basestring
98 else:
99 string_types = str
100
101 MAXINT = sys.maxsize
102
103 # This object is a stand-in for a logging object created by the
104 # logging module. PLY will use this by default to create things
105 # such as the parser.out file. If a user wants more detailed
106 # information, they can create their own logging object and pass
107 # it into PLY.
108
109 class PlyLogger(object):
110 def __init__(self, f):
111 self.f = f
112
113 def debug(self, msg, *args, **kwargs):
114 self.f.write((msg % args) + '\n')
115
116 info = debug
117
118 def warning(self, msg, *args, **kwargs):
119 self.f.write('WARNING: ' + (msg % args) + '\n')
120
121 def error(self, msg, *args, **kwargs):
122 self.f.write('ERROR: ' + (msg % args) + '\n')
123
124 critical = debug
125
126 # Null logger is used when no output is generated. Does nothing.
127 class NullLogger(object):
128 def __getattribute__(self, name):
129 return self
130
131 def __call__(self, *args, **kwargs):
132 return self
133
134 # Exception raised for yacc-related errors
135 class YaccError(Exception):
136 pass
137
138 # Format the result message that the parser produces when running in debug mode.
139 def format_result(r):
140 repr_str = repr(r)
141 if '\n' in repr_str:
142 repr_str = repr(repr_str)
143 if len(repr_str) > resultlimit:
144 repr_str = repr_str[:resultlimit] + ' ...'
145 result = '<%s @ 0x%x> (%s)' % (type(r).__name__, id(r), repr_str)
146 return result
147
148 # Format stack entries when the parser is running in debug mode
149 def format_stack_entry(r):
150 repr_str = repr(r)
151 if '\n' in repr_str:
152 repr_str = repr(repr_str)
153 if len(repr_str) < 16:
154 return repr_str
155 else:
156 return '<%s @ 0x%x>' % (type(r).__name__, id(r))
157
158 # Panic mode error recovery support. This feature is being reworked--much of the
159 # code here is to offer a deprecation/backwards compatible transition
160
161 _errok = None
162 _token = None
163 _restart = None
164 _warnmsg = '''PLY: Don't use global functions errok(), token(), and restart() in p_error().
165 Instead, invoke the methods on the associated parser instance:
166
167 def p_error(p):
168 ...
169 # Use parser.errok(), parser.token(), parser.restart()
170 ...
171
172 parser = yacc.yacc()
173 '''
174
175 def errok():
176 warnings.warn(_warnmsg)
177 return _errok()
178
179 def restart():
180 warnings.warn(_warnmsg)
181 return _restart()
182
183 def token():
184 warnings.warn(_warnmsg)
185 return _token()
186
187 # Utility function to call the p_error() function with some deprecation hacks
188 def call_errorfunc(errorfunc, token, parser):
189 global _errok, _token, _restart
190 _errok = parser.errok
191 _token = parser.token
192 _restart = parser.restart
193 r = errorfunc(token)
194 try:
195 del _errok, _token, _restart
196 except NameError:
197 pass
198 return r
199
200 #-----------------------------------------------------------------------------
201 # === LR Parsing Engine ===
202 #
203 # The following classes are used for the LR parser itself. These are not
204 # used during table construction and are independent of the actual LR
205 # table generation algorithm
206 #-----------------------------------------------------------------------------
207
208 # This class is used to hold non-terminal grammar symbols during parsing.
209 # It normally has the following attributes set:
210 # .type = Grammar symbol type
211 # .value = Symbol value
212 # .lineno = Starting line number
213 # .endlineno = Ending line number (optional, set automatically)
214 # .lexpos = Starting lex position
215 # .endlexpos = Ending lex position (optional, set automatically)
216
217 class YaccSymbol:
218 def __str__(self):
219 return self.type
220
221 def __repr__(self):
222 return str(self)
223
224 # This class is a wrapper around the objects actually passed to each
225 # grammar rule. Index lookup and assignment actually assign the
226 # .value attribute of the underlying YaccSymbol object.
227 # The lineno() method returns the line number of a given
228 # item (or 0 if not defined). The linespan() method returns
229 # a tuple of (startline,endline) representing the range of lines
230 # for a symbol. The lexspan() method returns a tuple (lexpos,endlexpos)
231 # representing the range of positional information for a symbol.
232
233 class YaccProduction:
234 def __init__(self, s, stack=None):
235 self.slice = s
236 self.stack = stack
237 self.lexer = None
238 self.parser = None
239
240 def __getitem__(self, n):
241 if isinstance(n, slice):
242 return [s.value for s in self.slice[n]]
243 elif n >= 0:
244 return self.slice[n].value
245 else:
246 return self.stack[n].value
247
248 def __setitem__(self, n, v):
249 self.slice[n].value = v
250
251 def __getslice__(self, i, j):
252 return [s.value for s in self.slice[i:j]]
253
254 def __len__(self):
255 return len(self.slice)
256
257 def lineno(self, n):
258 return getattr(self.slice[n], 'lineno', 0)
259
260 def set_lineno(self, n, lineno):
261 self.slice[n].lineno = lineno
262
263 def linespan(self, n):
264 startline = getattr(self.slice[n], 'lineno', 0)
265 endline = getattr(self.slice[n], 'endlineno', startline)
266 return startline, endline
267
268 def lexpos(self, n):
269 return getattr(self.slice[n], 'lexpos', 0)
270
271 def lexspan(self, n):
272 startpos = getattr(self.slice[n], 'lexpos', 0)
273 endpos = getattr(self.slice[n], 'endlexpos', startpos)
274 return startpos, endpos
275
276 def error(self):
277 raise SyntaxError
278
279 # -----------------------------------------------------------------------------
280 # == LRParser ==
281 #
282 # The LR Parsing engine.
283 # -----------------------------------------------------------------------------
284
285 class LRParser:
286 def __init__(self, lrtab, errorf):
287 self.productions = lrtab.lr_productions
288 self.action = lrtab.lr_action
289 self.goto = lrtab.lr_goto
290 self.errorfunc = errorf
291 self.set_defaulted_states()
292 self.errorok = True
293
294 def errok(self):
295 self.errorok = True
296
297 def restart(self):
298 del self.statestack[:]
299 del self.symstack[:]
300 sym = YaccSymbol()
301 sym.type = '$end'
302 self.symstack.append(sym)
303 self.statestack.append(0)
304
305 # Defaulted state support.
306 # This method identifies parser states where there is only one possible reduction action.
307 # For such states, the parser can make a choose to make a rule reduction without consuming
308 # the next look-ahead token. This delayed invocation of the tokenizer can be useful in
309 # certain kinds of advanced parsing situations where the lexer and parser interact with
310 # each other or change states (i.e., manipulation of scope, lexer states, etc.).
311 #
312 # See: http://www.gnu.org/software/bison/manual/html_node/Default-Reductions.html#Default-Reductions
313 def set_defaulted_states(self):
314 self.defaulted_states = {}
315 for state, actions in self.action.items():
316 rules = list(actions.values())
317 if len(rules) == 1 and rules[0] < 0:
318 self.defaulted_states[state] = rules[0]
319
320 def disable_defaulted_states(self):
321 self.defaulted_states = {}
322
323 def parse(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None):
324 if debug or yaccdevel:
325 if isinstance(debug, int):
326 debug = PlyLogger(sys.stderr)
327 return self.parsedebug(input, lexer, debug, tracking, tokenfunc)
328 elif tracking:
329 return self.parseopt(input, lexer, debug, tracking, tokenfunc)
330 else:
331 return self.parseopt_notrack(input, lexer, debug, tracking, tokenfunc)
332
333
334 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
335 # parsedebug().
336 #
337 # This is the debugging enabled version of parse(). All changes made to the
338 # parsing engine should be made here. Optimized versions of this function
339 # are automatically created by the ply/ygen.py script. This script cuts out
340 # sections enclosed in markers such as this:
341 #
342 # #--! DEBUG
343 # statements
344 # #--! DEBUG
345 #
346 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
347
348 def parsedebug(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None):
349 #--! parsedebug-start
350 lookahead = None # Current lookahead symbol
351 lookaheadstack = [] # Stack of lookahead symbols
352 actions = self.action # Local reference to action table (to avoid lookup on self.)
353 goto = self.goto # Local reference to goto table (to avoid lookup on self.)
354 prod = self.productions # Local reference to production list (to avoid lookup on self.)
355 defaulted_states = self.defaulted_states # Local reference to defaulted states
356 pslice = YaccProduction(None) # Production object passed to grammar rules
357 errorcount = 0 # Used during error recovery
358
359 #--! DEBUG
360 debug.info('PLY: PARSE DEBUG START')
361 #--! DEBUG
362
363 # If no lexer was given, we will try to use the lex module
364 if not lexer:
365 from . import lex
366 lexer = lex.lexer
367
368 # Set up the lexer and parser objects on pslice
369 pslice.lexer = lexer
370 pslice.parser = self
371
372 # If input was supplied, pass to lexer
373 if input is not None:
374 lexer.input(input)
375
376 if tokenfunc is None:
377 # Tokenize function
378 get_token = lexer.token
379 else:
380 get_token = tokenfunc
381
382 # Set the parser() token method (sometimes used in error recovery)
383 self.token = get_token
384
385 # Set up the state and symbol stacks
386
387 statestack = [] # Stack of parsing states
388 self.statestack = statestack
389 symstack = [] # Stack of grammar symbols
390 self.symstack = symstack
391
392 pslice.stack = symstack # Put in the production
393 errtoken = None # Err token
394
395 # The start state is assumed to be (0,$end)
396
397 statestack.append(0)
398 sym = YaccSymbol()
399 sym.type = '$end'
400 symstack.append(sym)
401 state = 0
402 while True:
403 # Get the next symbol on the input. If a lookahead symbol
404 # is already set, we just use that. Otherwise, we'll pull
405 # the next token off of the lookaheadstack or from the lexer
406
407 #--! DEBUG
408 debug.debug('')
409 debug.debug('State : %s', state)
410 #--! DEBUG
411
412 if state not in defaulted_states:
413 if not lookahead:
414 if not lookaheadstack:
415 lookahead = get_token() # Get the next token
416 else:
417 lookahead = lookaheadstack.pop()
418 if not lookahead:
419 lookahead = YaccSymbol()
420 lookahead.type = '$end'
421
422 # Check the action table
423 ltype = lookahead.type
424 t = actions[state].get(ltype)
425 else:
426 t = defaulted_states[state]
427 #--! DEBUG
428 debug.debug('Defaulted state %s: Reduce using %d', state, -t)
429 #--! DEBUG
430
431 #--! DEBUG
432 debug.debug('Stack : %s',
433 ('%s . %s' % (' '.join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip())
434 #--! DEBUG
435
436 if t is not None:
437 if t > 0:
438 # shift a symbol on the stack
439 statestack.append(t)
440 state = t
441
442 #--! DEBUG
443 debug.debug('Action : Shift and goto state %s', t)
444 #--! DEBUG
445
446 symstack.append(lookahead)
447 lookahead = None
448
449 # Decrease error count on successful shift
450 if errorcount:
451 errorcount -= 1
452 continue
453
454 if t < 0:
455 # reduce a symbol on the stack, emit a production
456 p = prod[-t]
457 pname = p.name
458 plen = p.len
459
460 # Get production function
461 sym = YaccSymbol()
462 sym.type = pname # Production name
463 sym.value = None
464
465 #--! DEBUG
466 if plen:
467 debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str,
468 '['+','.join([format_stack_entry(_v.value) for _v in symstack[-plen:]])+']',
469 goto[statestack[-1-plen]][pname])
470 else:
471 debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str, [],
472 goto[statestack[-1]][pname])
473
474 #--! DEBUG
475
476 if plen:
477 targ = symstack[-plen-1:]
478 targ[0] = sym
479
480 #--! TRACKING
481 if tracking:
482 t1 = targ[1]
483 sym.lineno = t1.lineno
484 sym.lexpos = t1.lexpos
485 t1 = targ[-1]
486 sym.endlineno = getattr(t1, 'endlineno', t1.lineno)
487 sym.endlexpos = getattr(t1, 'endlexpos', t1.lexpos)
488 #--! TRACKING
489
490 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
491 # The code enclosed in this section is duplicated
492 # below as a performance optimization. Make sure
493 # changes get made in both locations.
494
495 pslice.slice = targ
496
497 try:
498 # Call the grammar rule with our special slice object
499 del symstack[-plen:]
500 del statestack[-plen:]
501 p.callable(pslice)
502 #--! DEBUG
503 debug.info('Result : %s', format_result(pslice[0]))
504 #--! DEBUG
505 symstack.append(sym)
506 state = goto[statestack[-1]][pname]
507 statestack.append(state)
508 except SyntaxError:
509 # If an error was set. Enter error recovery state
510 lookaheadstack.append(lookahead)
511 symstack.pop()
512 statestack.pop()
513 state = statestack[-1]
514 sym.type = 'error'
515 lookahead = sym
516 errorcount = error_count
517 self.errorok = False
518 continue
519 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
520
521 else:
522
523 #--! TRACKING
524 if tracking:
525 sym.lineno = lexer.lineno
526 sym.lexpos = lexer.lexpos
527 #--! TRACKING
528
529 targ = [sym]
530
531 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
532 # The code enclosed in this section is duplicated
533 # above as a performance optimization. Make sure
534 # changes get made in both locations.
535
536 pslice.slice = targ
537
538 try:
539 # Call the grammar rule with our special slice object
540 p.callable(pslice)
541 #--! DEBUG
542 debug.info('Result : %s', format_result(pslice[0]))
543 #--! DEBUG
544 symstack.append(sym)
545 state = goto[statestack[-1]][pname]
546 statestack.append(state)
547 except SyntaxError:
548 # If an error was set. Enter error recovery state
549 lookaheadstack.append(lookahead)
550 symstack.pop()
551 statestack.pop()
552 state = statestack[-1]
553 sym.type = 'error'
554 lookahead = sym
555 errorcount = error_count
556 self.errorok = False
557 continue
558 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
559
560 if t == 0:
561 n = symstack[-1]
562 result = getattr(n, 'value', None)
563 #--! DEBUG
564 debug.info('Done : Returning %s', format_result(result))
565 debug.info('PLY: PARSE DEBUG END')
566 #--! DEBUG
567 return result
568
569 if t is None:
570
571 #--! DEBUG
572 debug.error('Error : %s',
573 ('%s . %s' % (' '.join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip())
574 #--! DEBUG
575
576 # We have some kind of parsing error here. To handle
577 # this, we are going to push the current token onto
578 # the tokenstack and replace it with an 'error' token.
579 # If there are any synchronization rules, they may
580 # catch it.
581 #
582 # In addition to pushing the error token, we call call
583 # the user defined p_error() function if this is the
584 # first syntax error. This function is only called if
585 # errorcount == 0.
586 if errorcount == 0 or self.errorok:
587 errorcount = error_count
588 self.errorok = False
589 errtoken = lookahead
590 if errtoken.type == '$end':
591 errtoken = None # End of file!
592 if self.errorfunc:
593 if errtoken and not hasattr(errtoken, 'lexer'):
594 errtoken.lexer = lexer
595 tok = call_errorfunc(self.errorfunc, errtoken, self)
596 if self.errorok:
597 # User must have done some kind of panic
598 # mode recovery on their own. The
599 # returned token is the next lookahead
600 lookahead = tok
601 errtoken = None
602 continue
603 else:
604 if errtoken:
605 if hasattr(errtoken, 'lineno'):
606 lineno = lookahead.lineno
607 else:
608 lineno = 0
609 if lineno:
610 sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type))
611 else:
612 sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type)
613 else:
614 sys.stderr.write('yacc: Parse error in input. EOF\n')
615 return
616
617 else:
618 errorcount = error_count
619
620 # case 1: the statestack only has 1 entry on it. If we're in this state, the
621 # entire parse has been rolled back and we're completely hosed. The token is
622 # discarded and we just keep going.
623
624 if len(statestack) <= 1 and lookahead.type != '$end':
625 lookahead = None
626 errtoken = None
627 state = 0
628 # Nuke the pushback stack
629 del lookaheadstack[:]
630 continue
631
632 # case 2: the statestack has a couple of entries on it, but we're
633 # at the end of the file. nuke the top entry and generate an error token
634
635 # Start nuking entries on the stack
636 if lookahead.type == '$end':
637 # Whoa. We're really hosed here. Bail out
638 return
639
640 if lookahead.type != 'error':
641 sym = symstack[-1]
642 if sym.type == 'error':
643 # Hmmm. Error is on top of stack, we'll just nuke input
644 # symbol and continue
645 #--! TRACKING
646 if tracking:
647 sym.endlineno = getattr(lookahead, 'lineno', sym.lineno)
648 sym.endlexpos = getattr(lookahead, 'lexpos', sym.lexpos)
649 #--! TRACKING
650 lookahead = None
651 continue
652
653 # Create the error symbol for the first time and make it the new lookahead symbol
654 t = YaccSymbol()
655 t.type = 'error'
656
657 if hasattr(lookahead, 'lineno'):
658 t.lineno = t.endlineno = lookahead.lineno
659 if hasattr(lookahead, 'lexpos'):
660 t.lexpos = t.endlexpos = lookahead.lexpos
661 t.value = lookahead
662 lookaheadstack.append(lookahead)
663 lookahead = t
664 else:
665 sym = symstack.pop()
666 #--! TRACKING
667 if tracking:
668 lookahead.lineno = sym.lineno
669 lookahead.lexpos = sym.lexpos
670 #--! TRACKING
671 statestack.pop()
672 state = statestack[-1]
673
674 continue
675
676 # Call an error function here
677 raise RuntimeError('yacc: internal parser error!!!\n')
678
679 #--! parsedebug-end
680
681 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
682 # parseopt().
683 #
684 # Optimized version of parse() method. DO NOT EDIT THIS CODE DIRECTLY!
685 # This code is automatically generated by the ply/ygen.py script. Make
686 # changes to the parsedebug() method instead.
687 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
688
689 def parseopt(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None):
690 #--! parseopt-start
691 lookahead = None # Current lookahead symbol
692 lookaheadstack = [] # Stack of lookahead symbols
693 actions = self.action # Local reference to action table (to avoid lookup on self.)
694 goto = self.goto # Local reference to goto table (to avoid lookup on self.)
695 prod = self.productions # Local reference to production list (to avoid lookup on self.)
696 defaulted_states = self.defaulted_states # Local reference to defaulted states
697 pslice = YaccProduction(None) # Production object passed to grammar rules
698 errorcount = 0 # Used during error recovery
699
700
701 # If no lexer was given, we will try to use the lex module
702 if not lexer:
703 from . import lex
704 lexer = lex.lexer
705
706 # Set up the lexer and parser objects on pslice
707 pslice.lexer = lexer
708 pslice.parser = self
709
710 # If input was supplied, pass to lexer
711 if input is not None:
712 lexer.input(input)
713
714 if tokenfunc is None:
715 # Tokenize function
716 get_token = lexer.token
717 else:
718 get_token = tokenfunc
719
720 # Set the parser() token method (sometimes used in error recovery)
721 self.token = get_token
722
723 # Set up the state and symbol stacks
724
725 statestack = [] # Stack of parsing states
726 self.statestack = statestack
727 symstack = [] # Stack of grammar symbols
728 self.symstack = symstack
729
730 pslice.stack = symstack # Put in the production
731 errtoken = None # Err token
732
733 # The start state is assumed to be (0,$end)
734
735 statestack.append(0)
736 sym = YaccSymbol()
737 sym.type = '$end'
738 symstack.append(sym)
739 state = 0
740 while True:
741 # Get the next symbol on the input. If a lookahead symbol
742 # is already set, we just use that. Otherwise, we'll pull
743 # the next token off of the lookaheadstack or from the lexer
744
745
746 if state not in defaulted_states:
747 if not lookahead:
748 if not lookaheadstack:
749 lookahead = get_token() # Get the next token
750 else:
751 lookahead = lookaheadstack.pop()
752 if not lookahead:
753 lookahead = YaccSymbol()
754 lookahead.type = '$end'
755
756 # Check the action table
757 ltype = lookahead.type
758 t = actions[state].get(ltype)
759 else:
760 t = defaulted_states[state]
761
762
763 if t is not None:
764 if t > 0:
765 # shift a symbol on the stack
766 statestack.append(t)
767 state = t
768
769
770 symstack.append(lookahead)
771 lookahead = None
772
773 # Decrease error count on successful shift
774 if errorcount:
775 errorcount -= 1
776 continue
777
778 if t < 0:
779 # reduce a symbol on the stack, emit a production
780 p = prod[-t]
781 pname = p.name
782 plen = p.len
783
784 # Get production function
785 sym = YaccSymbol()
786 sym.type = pname # Production name
787 sym.value = None
788
789
790 if plen:
791 targ = symstack[-plen-1:]
792 targ[0] = sym
793
794 #--! TRACKING
795 if tracking:
796 t1 = targ[1]
797 sym.lineno = t1.lineno
798 sym.lexpos = t1.lexpos
799 t1 = targ[-1]
800 sym.endlineno = getattr(t1, 'endlineno', t1.lineno)
801 sym.endlexpos = getattr(t1, 'endlexpos', t1.lexpos)
802 #--! TRACKING
803
804 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
805 # The code enclosed in this section is duplicated
806 # below as a performance optimization. Make sure
807 # changes get made in both locations.
808
809 pslice.slice = targ
810
811 try:
812 # Call the grammar rule with our special slice object
813 del symstack[-plen:]
814 del statestack[-plen:]
815 p.callable(pslice)
816 symstack.append(sym)
817 state = goto[statestack[-1]][pname]
818 statestack.append(state)
819 except SyntaxError:
820 # If an error was set. Enter error recovery state
821 lookaheadstack.append(lookahead)
822 symstack.pop()
823 statestack.pop()
824 state = statestack[-1]
825 sym.type = 'error'
826 lookahead = sym
827 errorcount = error_count
828 self.errorok = False
829 continue
830 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
831
832 else:
833
834 #--! TRACKING
835 if tracking:
836 sym.lineno = lexer.lineno
837 sym.lexpos = lexer.lexpos
838 #--! TRACKING
839
840 targ = [sym]
841
842 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
843 # The code enclosed in this section is duplicated
844 # above as a performance optimization. Make sure
845 # changes get made in both locations.
846
847 pslice.slice = targ
848
849 try:
850 # Call the grammar rule with our special slice object
851 p.callable(pslice)
852 symstack.append(sym)
853 state = goto[statestack[-1]][pname]
854 statestack.append(state)
855 except SyntaxError:
856 # If an error was set. Enter error recovery state
857 lookaheadstack.append(lookahead)
858 symstack.pop()
859 statestack.pop()
860 state = statestack[-1]
861 sym.type = 'error'
862 lookahead = sym
863 errorcount = error_count
864 self.errorok = False
865 continue
866 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
867
868 if t == 0:
869 n = symstack[-1]
870 result = getattr(n, 'value', None)
871 return result
872
873 if t is None:
874
875
876 # We have some kind of parsing error here. To handle
877 # this, we are going to push the current token onto
878 # the tokenstack and replace it with an 'error' token.
879 # If there are any synchronization rules, they may
880 # catch it.
881 #
882 # In addition to pushing the error token, we call call
883 # the user defined p_error() function if this is the
884 # first syntax error. This function is only called if
885 # errorcount == 0.
886 if errorcount == 0 or self.errorok:
887 errorcount = error_count
888 self.errorok = False
889 errtoken = lookahead
890 if errtoken.type == '$end':
891 errtoken = None # End of file!
892 if self.errorfunc:
893 if errtoken and not hasattr(errtoken, 'lexer'):
894 errtoken.lexer = lexer
895 tok = call_errorfunc(self.errorfunc, errtoken, self)
896 if self.errorok:
897 # User must have done some kind of panic
898 # mode recovery on their own. The
899 # returned token is the next lookahead
900 lookahead = tok
901 errtoken = None
902 continue
903 else:
904 if errtoken:
905 if hasattr(errtoken, 'lineno'):
906 lineno = lookahead.lineno
907 else:
908 lineno = 0
909 if lineno:
910 sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type))
911 else:
912 sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type)
913 else:
914 sys.stderr.write('yacc: Parse error in input. EOF\n')
915 return
916
917 else:
918 errorcount = error_count
919
920 # case 1: the statestack only has 1 entry on it. If we're in this state, the
921 # entire parse has been rolled back and we're completely hosed. The token is
922 # discarded and we just keep going.
923
924 if len(statestack) <= 1 and lookahead.type != '$end':
925 lookahead = None
926 errtoken = None
927 state = 0
928 # Nuke the pushback stack
929 del lookaheadstack[:]
930 continue
931
932 # case 2: the statestack has a couple of entries on it, but we're
933 # at the end of the file. nuke the top entry and generate an error token
934
935 # Start nuking entries on the stack
936 if lookahead.type == '$end':
937 # Whoa. We're really hosed here. Bail out
938 return
939
940 if lookahead.type != 'error':
941 sym = symstack[-1]
942 if sym.type == 'error':
943 # Hmmm. Error is on top of stack, we'll just nuke input
944 # symbol and continue
945 #--! TRACKING
946 if tracking:
947 sym.endlineno = getattr(lookahead, 'lineno', sym.lineno)
948 sym.endlexpos = getattr(lookahead, 'lexpos', sym.lexpos)
949 #--! TRACKING
950 lookahead = None
951 continue
952
953 # Create the error symbol for the first time and make it the new lookahead symbol
954 t = YaccSymbol()
955 t.type = 'error'
956
957 if hasattr(lookahead, 'lineno'):
958 t.lineno = t.endlineno = lookahead.lineno
959 if hasattr(lookahead, 'lexpos'):
960 t.lexpos = t.endlexpos = lookahead.lexpos
961 t.value = lookahead
962 lookaheadstack.append(lookahead)
963 lookahead = t
964 else:
965 sym = symstack.pop()
966 #--! TRACKING
967 if tracking:
968 lookahead.lineno = sym.lineno
969 lookahead.lexpos = sym.lexpos
970 #--! TRACKING
971 statestack.pop()
972 state = statestack[-1]
973
974 continue
975
976 # Call an error function here
977 raise RuntimeError('yacc: internal parser error!!!\n')
978
979 #--! parseopt-end
980
981 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
982 # parseopt_notrack().
983 #
984 # Optimized version of parseopt() with line number tracking removed.
985 # DO NOT EDIT THIS CODE DIRECTLY. This code is automatically generated
986 # by the ply/ygen.py script. Make changes to the parsedebug() method instead.
987 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
988
989 def parseopt_notrack(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None):
990 #--! parseopt-notrack-start
991 lookahead = None # Current lookahead symbol
992 lookaheadstack = [] # Stack of lookahead symbols
993 actions = self.action # Local reference to action table (to avoid lookup on self.)
994 goto = self.goto # Local reference to goto table (to avoid lookup on self.)
995 prod = self.productions # Local reference to production list (to avoid lookup on self.)
996 defaulted_states = self.defaulted_states # Local reference to defaulted states
997 pslice = YaccProduction(None) # Production object passed to grammar rules
998 errorcount = 0 # Used during error recovery
999
1000
1001 # If no lexer was given, we will try to use the lex module
1002 if not lexer:
1003 from . import lex
1004 lexer = lex.lexer
1005
1006 # Set up the lexer and parser objects on pslice
1007 pslice.lexer = lexer
1008 pslice.parser = self
1009
1010 # If input was supplied, pass to lexer
1011 if input is not None:
1012 lexer.input(input)
1013
1014 if tokenfunc is None:
1015 # Tokenize function
1016 get_token = lexer.token
1017 else:
1018 get_token = tokenfunc
1019
1020 # Set the parser() token method (sometimes used in error recovery)
1021 self.token = get_token
1022
1023 # Set up the state and symbol stacks
1024
1025 statestack = [] # Stack of parsing states
1026 self.statestack = statestack
1027 symstack = [] # Stack of grammar symbols
1028 self.symstack = symstack
1029
1030 pslice.stack = symstack # Put in the production
1031 errtoken = None # Err token
1032
1033 # The start state is assumed to be (0,$end)
1034
1035 statestack.append(0)
1036 sym = YaccSymbol()
1037 sym.type = '$end'
1038 symstack.append(sym)
1039 state = 0
1040 while True:
1041 # Get the next symbol on the input. If a lookahead symbol
1042 # is already set, we just use that. Otherwise, we'll pull
1043 # the next token off of the lookaheadstack or from the lexer
1044
1045
1046 if state not in defaulted_states:
1047 if not lookahead:
1048 if not lookaheadstack:
1049 lookahead = get_token() # Get the next token
1050 else:
1051 lookahead = lookaheadstack.pop()
1052 if not lookahead:
1053 lookahead = YaccSymbol()
1054 lookahead.type = '$end'
1055
1056 # Check the action table
1057 ltype = lookahead.type
1058 t = actions[state].get(ltype)
1059 else:
1060 t = defaulted_states[state]
1061
1062
1063 if t is not None:
1064 if t > 0:
1065 # shift a symbol on the stack
1066 statestack.append(t)
1067 state = t
1068
1069
1070 symstack.append(lookahead)
1071 lookahead = None
1072
1073 # Decrease error count on successful shift
1074 if errorcount:
1075 errorcount -= 1
1076 continue
1077
1078 if t < 0:
1079 # reduce a symbol on the stack, emit a production
1080 p = prod[-t]
1081 pname = p.name
1082 plen = p.len
1083
1084 # Get production function
1085 sym = YaccSymbol()
1086 sym.type = pname # Production name
1087 sym.value = None
1088
1089
1090 if plen:
1091 targ = symstack[-plen-1:]
1092 targ[0] = sym
1093
1094
1095 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1096 # The code enclosed in this section is duplicated
1097 # below as a performance optimization. Make sure
1098 # changes get made in both locations.
1099
1100 pslice.slice = targ
1101
1102 try:
1103 # Call the grammar rule with our special slice object
1104 del symstack[-plen:]
1105 del statestack[-plen:]
1106 p.callable(pslice)
1107 symstack.append(sym)
1108 state = goto[statestack[-1]][pname]
1109 statestack.append(state)
1110 except SyntaxError:
1111 # If an error was set. Enter error recovery state
1112 lookaheadstack.append(lookahead)
1113 symstack.pop()
1114 statestack.pop()
1115 state = statestack[-1]
1116 sym.type = 'error'
1117 lookahead = sym
1118 errorcount = error_count
1119 self.errorok = False
1120 continue
1121 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1122
1123 else:
1124
1125
1126 targ = [sym]
1127
1128 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1129 # The code enclosed in this section is duplicated
1130 # above as a performance optimization. Make sure
1131 # changes get made in both locations.
1132
1133 pslice.slice = targ
1134
1135 try:
1136 # Call the grammar rule with our special slice object
1137 p.callable(pslice)
1138 symstack.append(sym)
1139 state = goto[statestack[-1]][pname]
1140 statestack.append(state)
1141 except SyntaxError:
1142 # If an error was set. Enter error recovery state
1143 lookaheadstack.append(lookahead)
1144 symstack.pop()
1145 statestack.pop()
1146 state = statestack[-1]
1147 sym.type = 'error'
1148 lookahead = sym
1149 errorcount = error_count
1150 self.errorok = False
1151 continue
1152 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1153
1154 if t == 0:
1155 n = symstack[-1]
1156 result = getattr(n, 'value', None)
1157 return result
1158
1159 if t is None:
1160
1161
1162 # We have some kind of parsing error here. To handle
1163 # this, we are going to push the current token onto
1164 # the tokenstack and replace it with an 'error' token.
1165 # If there are any synchronization rules, they may
1166 # catch it.
1167 #
1168 # In addition to pushing the error token, we call call
1169 # the user defined p_error() function if this is the
1170 # first syntax error. This function is only called if
1171 # errorcount == 0.
1172 if errorcount == 0 or self.errorok:
1173 errorcount = error_count
1174 self.errorok = False
1175 errtoken = lookahead
1176 if errtoken.type == '$end':
1177 errtoken = None # End of file!
1178 if self.errorfunc:
1179 if errtoken and not hasattr(errtoken, 'lexer'):
1180 errtoken.lexer = lexer
1181 tok = call_errorfunc(self.errorfunc, errtoken, self)
1182 if self.errorok:
1183 # User must have done some kind of panic
1184 # mode recovery on their own. The
1185 # returned token is the next lookahead
1186 lookahead = tok
1187 errtoken = None
1188 continue
1189 else:
1190 if errtoken:
1191 if hasattr(errtoken, 'lineno'):
1192 lineno = lookahead.lineno
1193 else:
1194 lineno = 0
1195 if lineno:
1196 sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type))
1197 else:
1198 sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type)
1199 else:
1200 sys.stderr.write('yacc: Parse error in input. EOF\n')
1201 return
1202
1203 else:
1204 errorcount = error_count
1205
1206 # case 1: the statestack only has 1 entry on it. If we're in this state, the
1207 # entire parse has been rolled back and we're completely hosed. The token is
1208 # discarded and we just keep going.
1209
1210 if len(statestack) <= 1 and lookahead.type != '$end':
1211 lookahead = None
1212 errtoken = None
1213 state = 0
1214 # Nuke the pushback stack
1215 del lookaheadstack[:]
1216 continue
1217
1218 # case 2: the statestack has a couple of entries on it, but we're
1219 # at the end of the file. nuke the top entry and generate an error token
1220
1221 # Start nuking entries on the stack
1222 if lookahead.type == '$end':
1223 # Whoa. We're really hosed here. Bail out
1224 return
1225
1226 if lookahead.type != 'error':
1227 sym = symstack[-1]
1228 if sym.type == 'error':
1229 # Hmmm. Error is on top of stack, we'll just nuke input
1230 # symbol and continue
1231 lookahead = None
1232 continue
1233
1234 # Create the error symbol for the first time and make it the new lookahead symbol
1235 t = YaccSymbol()
1236 t.type = 'error'
1237
1238 if hasattr(lookahead, 'lineno'):
1239 t.lineno = t.endlineno = lookahead.lineno
1240 if hasattr(lookahead, 'lexpos'):
1241 t.lexpos = t.endlexpos = lookahead.lexpos
1242 t.value = lookahead
1243 lookaheadstack.append(lookahead)
1244 lookahead = t
1245 else:
1246 sym = symstack.pop()
1247 statestack.pop()
1248 state = statestack[-1]
1249
1250 continue
1251
1252 # Call an error function here
1253 raise RuntimeError('yacc: internal parser error!!!\n')
1254
1255 #--! parseopt-notrack-end
1256
1257 # -----------------------------------------------------------------------------
1258 # === Grammar Representation ===
1259 #
1260 # The following functions, classes, and variables are used to represent and
1261 # manipulate the rules that make up a grammar.
1262 # -----------------------------------------------------------------------------
1263
1264 # regex matching identifiers
1265 _is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$')
1266
1267 # -----------------------------------------------------------------------------
1268 # class Production:
1269 #
1270 # This class stores the raw information about a single production or grammar rule.
1271 # A grammar rule refers to a specification such as this:
1272 #
1273 # expr : expr PLUS term
1274 #
1275 # Here are the basic attributes defined on all productions
1276 #
1277 # name - Name of the production. For example 'expr'
1278 # prod - A list of symbols on the right side ['expr','PLUS','term']
1279 # prec - Production precedence level
1280 # number - Production number.
1281 # func - Function that executes on reduce
1282 # file - File where production function is defined
1283 # lineno - Line number where production function is defined
1284 #
1285 # The following attributes are defined or optional.
1286 #
1287 # len - Length of the production (number of symbols on right hand side)
1288 # usyms - Set of unique symbols found in the production
1289 # -----------------------------------------------------------------------------
1290
1291 class Production(object):
1292 reduced = 0
1293 def __init__(self, number, name, prod, precedence=('right', 0), func=None, file='', line=0):
1294 self.name = name
1295 self.prod = tuple(prod)
1296 self.number = number
1297 self.func = func
1298 self.callable = None
1299 self.file = file
1300 self.line = line
1301 self.prec = precedence
1302
1303 # Internal settings used during table construction
1304
1305 self.len = len(self.prod) # Length of the production
1306
1307 # Create a list of unique production symbols used in the production
1308 self.usyms = []
1309 for s in self.prod:
1310 if s not in self.usyms:
1311 self.usyms.append(s)
1312
1313 # List of all LR items for the production
1314 self.lr_items = []
1315 self.lr_next = None
1316
1317 # Create a string representation
1318 if self.prod:
1319 self.str = '%s -> %s' % (self.name, ' '.join(self.prod))
1320 else:
1321 self.str = '%s -> <empty>' % self.name
1322
1323 def __str__(self):
1324 return self.str
1325
1326 def __repr__(self):
1327 return 'Production(' + str(self) + ')'
1328
1329 def __len__(self):
1330 return len(self.prod)
1331
1332 def __nonzero__(self):
1333 return 1
1334
1335 def __getitem__(self, index):
1336 return self.prod[index]
1337
1338 # Return the nth lr_item from the production (or None if at the end)
1339 def lr_item(self, n):
1340 if n > len(self.prod):
1341 return None
1342 p = LRItem(self, n)
1343 # Precompute the list of productions immediately following.
1344 try:
1345 p.lr_after = Prodnames[p.prod[n+1]]
1346 except (IndexError, KeyError):
1347 p.lr_after = []
1348 try:
1349 p.lr_before = p.prod[n-1]
1350 except IndexError:
1351 p.lr_before = None
1352 return p
1353
1354 # Bind the production function name to a callable
1355 def bind(self, pdict):
1356 if self.func:
1357 self.callable = pdict[self.func]
1358
1359 # This class serves as a minimal standin for Production objects when
1360 # reading table data from files. It only contains information
1361 # actually used by the LR parsing engine, plus some additional
1362 # debugging information.
1363 class MiniProduction(object):
1364 def __init__(self, str, name, len, func, file, line):
1365 self.name = name
1366 self.len = len
1367 self.func = func
1368 self.callable = None
1369 self.file = file
1370 self.line = line
1371 self.str = str
1372
1373 def __str__(self):
1374 return self.str
1375
1376 def __repr__(self):
1377 return 'MiniProduction(%s)' % self.str
1378
1379 # Bind the production function name to a callable
1380 def bind(self, pdict):
1381 if self.func:
1382 self.callable = pdict[self.func]
1383
1384
1385 # -----------------------------------------------------------------------------
1386 # class LRItem
1387 #
1388 # This class represents a specific stage of parsing a production rule. For
1389 # example:
1390 #
1391 # expr : expr . PLUS term
1392 #
1393 # In the above, the "." represents the current location of the parse. Here
1394 # basic attributes:
1395 #
1396 # name - Name of the production. For example 'expr'
1397 # prod - A list of symbols on the right side ['expr','.', 'PLUS','term']
1398 # number - Production number.
1399 #
1400 # lr_next Next LR item. Example, if we are ' expr -> expr . PLUS term'
1401 # then lr_next refers to 'expr -> expr PLUS . term'
1402 # lr_index - LR item index (location of the ".") in the prod list.
1403 # lookaheads - LALR lookahead symbols for this item
1404 # len - Length of the production (number of symbols on right hand side)
1405 # lr_after - List of all productions that immediately follow
1406 # lr_before - Grammar symbol immediately before
1407 # -----------------------------------------------------------------------------
1408
1409 class LRItem(object):
1410 def __init__(self, p, n):
1411 self.name = p.name
1412 self.prod = list(p.prod)
1413 self.number = p.number
1414 self.lr_index = n
1415 self.lookaheads = {}
1416 self.prod.insert(n, '.')
1417 self.prod = tuple(self.prod)
1418 self.len = len(self.prod)
1419 self.usyms = p.usyms
1420
1421 def __str__(self):
1422 if self.prod:
1423 s = '%s -> %s' % (self.name, ' '.join(self.prod))
1424 else:
1425 s = '%s -> <empty>' % self.name
1426 return s
1427
1428 def __repr__(self):
1429 return 'LRItem(' + str(self) + ')'
1430
1431 # -----------------------------------------------------------------------------
1432 # rightmost_terminal()
1433 #
1434 # Return the rightmost terminal from a list of symbols. Used in add_production()
1435 # -----------------------------------------------------------------------------
1436 def rightmost_terminal(symbols, terminals):
1437 i = len(symbols) - 1
1438 while i >= 0:
1439 if symbols[i] in terminals:
1440 return symbols[i]
1441 i -= 1
1442 return None
1443
1444 # -----------------------------------------------------------------------------
1445 # === GRAMMAR CLASS ===
1446 #
1447 # The following class represents the contents of the specified grammar along
1448 # with various computed properties such as first sets, follow sets, LR items, etc.
1449 # This data is used for critical parts of the table generation process later.
1450 # -----------------------------------------------------------------------------
1451
1452 class GrammarError(YaccError):
1453 pass
1454
1455 class Grammar(object):
1456 def __init__(self, terminals):
1457 self.Productions = [None] # A list of all of the productions. The first
1458 # entry is always reserved for the purpose of
1459 # building an augmented grammar
1460
1461 self.Prodnames = {} # A dictionary mapping the names of nonterminals to a list of all
1462 # productions of that nonterminal.
1463
1464 self.Prodmap = {} # A dictionary that is only used to detect duplicate
1465 # productions.
1466
1467 self.Terminals = {} # A dictionary mapping the names of terminal symbols to a
1468 # list of the rules where they are used.
1469
1470 for term in terminals:
1471 self.Terminals[term] = []
1472
1473 self.Terminals['error'] = []
1474
1475 self.Nonterminals = {} # A dictionary mapping names of nonterminals to a list
1476 # of rule numbers where they are used.
1477
1478 self.First = {} # A dictionary of precomputed FIRST(x) symbols
1479
1480 self.Follow = {} # A dictionary of precomputed FOLLOW(x) symbols
1481
1482 self.Precedence = {} # Precedence rules for each terminal. Contains tuples of the
1483 # form ('right',level) or ('nonassoc', level) or ('left',level)
1484
1485 self.UsedPrecedence = set() # Precedence rules that were actually used by the grammer.
1486 # This is only used to provide error checking and to generate
1487 # a warning about unused precedence rules.
1488
1489 self.Start = None # Starting symbol for the grammar
1490
1491
1492 def __len__(self):
1493 return len(self.Productions)
1494
1495 def __getitem__(self, index):
1496 return self.Productions[index]
1497
1498 # -----------------------------------------------------------------------------
1499 # set_precedence()
1500 #
1501 # Sets the precedence for a given terminal. assoc is the associativity such as
1502 # 'left','right', or 'nonassoc'. level is a numeric level.
1503 #
1504 # -----------------------------------------------------------------------------
1505
1506 def set_precedence(self, term, assoc, level):
1507 assert self.Productions == [None], 'Must call set_precedence() before add_production()'
1508 if term in self.Precedence:
1509 raise GrammarError('Precedence already specified for terminal %r' % term)
1510 if assoc not in ['left', 'right', 'nonassoc']:
1511 raise GrammarError("Associativity must be one of 'left','right', or 'nonassoc'")
1512 self.Precedence[term] = (assoc, level)
1513
1514 # -----------------------------------------------------------------------------
1515 # add_production()
1516 #
1517 # Given an action function, this function assembles a production rule and
1518 # computes its precedence level.
1519 #
1520 # The production rule is supplied as a list of symbols. For example,
1521 # a rule such as 'expr : expr PLUS term' has a production name of 'expr' and
1522 # symbols ['expr','PLUS','term'].
1523 #
1524 # Precedence is determined by the precedence of the right-most non-terminal
1525 # or the precedence of a terminal specified by %prec.
1526 #
1527 # A variety of error checks are performed to make sure production symbols
1528 # are valid and that %prec is used correctly.
1529 # -----------------------------------------------------------------------------
1530
1531 def add_production(self, prodname, syms, func=None, file='', line=0):
1532
1533 if prodname in self.Terminals:
1534 raise GrammarError('%s:%d: Illegal rule name %r. Already defined as a token' % (file, line, prodname))
1535 if prodname == 'error':
1536 raise GrammarError('%s:%d: Illegal rule name %r. error is a reserved word' % (file, line, prodname))
1537 if not _is_identifier.match(prodname):
1538 raise GrammarError('%s:%d: Illegal rule name %r' % (file, line, prodname))
1539
1540 # Look for literal tokens
1541 for n, s in enumerate(syms):
1542 if s[0] in "'\"":
1543 try:
1544 c = eval(s)
1545 if (len(c) > 1):
1546 raise GrammarError('%s:%d: Literal token %s in rule %r may only be a single character' %
1547 (file, line, s, prodname))
1548 if c not in self.Terminals:
1549 self.Terminals[c] = []
1550 syms[n] = c
1551 continue
1552 except SyntaxError:
1553 pass
1554 if not _is_identifier.match(s) and s != '%prec':
1555 raise GrammarError('%s:%d: Illegal name %r in rule %r' % (file, line, s, prodname))
1556
1557 # Determine the precedence level
1558 if '%prec' in syms:
1559 if syms[-1] == '%prec':
1560 raise GrammarError('%s:%d: Syntax error. Nothing follows %%prec' % (file, line))
1561 if syms[-2] != '%prec':
1562 raise GrammarError('%s:%d: Syntax error. %%prec can only appear at the end of a grammar rule' %
1563 (file, line))
1564 precname = syms[-1]
1565 prodprec = self.Precedence.get(precname)
1566 if not prodprec:
1567 raise GrammarError('%s:%d: Nothing known about the precedence of %r' % (file, line, precname))
1568 else:
1569 self.UsedPrecedence.add(precname)
1570 del syms[-2:] # Drop %prec from the rule
1571 else:
1572 # If no %prec, precedence is determined by the rightmost terminal symbol
1573 precname = rightmost_terminal(syms, self.Terminals)
1574 prodprec = self.Precedence.get(precname, ('right', 0))
1575
1576 # See if the rule is already in the rulemap
1577 map = '%s -> %s' % (prodname, syms)
1578 if map in self.Prodmap:
1579 m = self.Prodmap[map]
1580 raise GrammarError('%s:%d: Duplicate rule %s. ' % (file, line, m) +
1581 'Previous definition at %s:%d' % (m.file, m.line))
1582
1583 # From this point on, everything is valid. Create a new Production instance
1584 pnumber = len(self.Productions)
1585 if prodname not in self.Nonterminals:
1586 self.Nonterminals[prodname] = []
1587
1588 # Add the production number to Terminals and Nonterminals
1589 for t in syms:
1590 if t in self.Terminals:
1591 self.Terminals[t].append(pnumber)
1592 else:
1593 if t not in self.Nonterminals:
1594 self.Nonterminals[t] = []
1595 self.Nonterminals[t].append(pnumber)
1596
1597 # Create a production and add it to the list of productions
1598 p = Production(pnumber, prodname, syms, prodprec, func, file, line)
1599 self.Productions.append(p)
1600 self.Prodmap[map] = p
1601
1602 # Add to the global productions list
1603 try:
1604 self.Prodnames[prodname].append(p)
1605 except KeyError:
1606 self.Prodnames[prodname] = [p]
1607
1608 # -----------------------------------------------------------------------------
1609 # set_start()
1610 #
1611 # Sets the starting symbol and creates the augmented grammar. Production
1612 # rule 0 is S' -> start where start is the start symbol.
1613 # -----------------------------------------------------------------------------
1614
1615 def set_start(self, start=None):
1616 if not start:
1617 start = self.Productions[1].name
1618 if start not in self.Nonterminals:
1619 raise GrammarError('start symbol %s undefined' % start)
1620 self.Productions[0] = Production(0, "S'", [start])
1621 self.Nonterminals[start].append(0)
1622 self.Start = start
1623
1624 # -----------------------------------------------------------------------------
1625 # find_unreachable()
1626 #
1627 # Find all of the nonterminal symbols that can't be reached from the starting
1628 # symbol. Returns a list of nonterminals that can't be reached.
1629 # -----------------------------------------------------------------------------
1630
1631 def find_unreachable(self):
1632
1633 # Mark all symbols that are reachable from a symbol s
1634 def mark_reachable_from(s):
1635 if s in reachable:
1636 return
1637 reachable.add(s)
1638 for p in self.Prodnames.get(s, []):
1639 for r in p.prod:
1640 mark_reachable_from(r)
1641
1642 reachable = set()
1643 mark_reachable_from(self.Productions[0].prod[0])
1644 return [s for s in self.Nonterminals if s not in reachable]
1645
1646 # -----------------------------------------------------------------------------
1647 # infinite_cycles()
1648 #
1649 # This function looks at the various parsing rules and tries to detect
1650 # infinite recursion cycles (grammar rules where there is no possible way
1651 # to derive a string of only terminals).
1652 # -----------------------------------------------------------------------------
1653
1654 def infinite_cycles(self):
1655 terminates = {}
1656
1657 # Terminals:
1658 for t in self.Terminals:
1659 terminates[t] = True
1660
1661 terminates['$end'] = True
1662
1663 # Nonterminals:
1664
1665 # Initialize to false:
1666 for n in self.Nonterminals:
1667 terminates[n] = False
1668
1669 # Then propagate termination until no change:
1670 while True:
1671 some_change = False
1672 for (n, pl) in self.Prodnames.items():
1673 # Nonterminal n terminates iff any of its productions terminates.
1674 for p in pl:
1675 # Production p terminates iff all of its rhs symbols terminate.
1676 for s in p.prod:
1677 if not terminates[s]:
1678 # The symbol s does not terminate,
1679 # so production p does not terminate.
1680 p_terminates = False
1681 break
1682 else:
1683 # didn't break from the loop,
1684 # so every symbol s terminates
1685 # so production p terminates.
1686 p_terminates = True
1687
1688 if p_terminates:
1689 # symbol n terminates!
1690 if not terminates[n]:
1691 terminates[n] = True
1692 some_change = True
1693 # Don't need to consider any more productions for this n.
1694 break
1695
1696 if not some_change:
1697 break
1698
1699 infinite = []
1700 for (s, term) in terminates.items():
1701 if not term:
1702 if s not in self.Prodnames and s not in self.Terminals and s != 'error':
1703 # s is used-but-not-defined, and we've already warned of that,
1704 # so it would be overkill to say that it's also non-terminating.
1705 pass
1706 else:
1707 infinite.append(s)
1708
1709 return infinite
1710
1711 # -----------------------------------------------------------------------------
1712 # undefined_symbols()
1713 #
1714 # Find all symbols that were used the grammar, but not defined as tokens or
1715 # grammar rules. Returns a list of tuples (sym, prod) where sym in the symbol
1716 # and prod is the production where the symbol was used.
1717 # -----------------------------------------------------------------------------
1718 def undefined_symbols(self):
1719 result = []
1720 for p in self.Productions:
1721 if not p:
1722 continue
1723
1724 for s in p.prod:
1725 if s not in self.Prodnames and s not in self.Terminals and s != 'error':
1726 result.append((s, p))
1727 return result
1728
1729 # -----------------------------------------------------------------------------
1730 # unused_terminals()
1731 #
1732 # Find all terminals that were defined, but not used by the grammar. Returns
1733 # a list of all symbols.
1734 # -----------------------------------------------------------------------------
1735 def unused_terminals(self):
1736 unused_tok = []
1737 for s, v in self.Terminals.items():
1738 if s != 'error' and not v:
1739 unused_tok.append(s)
1740
1741 return unused_tok
1742
1743 # ------------------------------------------------------------------------------
1744 # unused_rules()
1745 #
1746 # Find all grammar rules that were defined, but not used (maybe not reachable)
1747 # Returns a list of productions.
1748 # ------------------------------------------------------------------------------
1749
1750 def unused_rules(self):
1751 unused_prod = []
1752 for s, v in self.Nonterminals.items():
1753 if not v:
1754 p = self.Prodnames[s][0]
1755 unused_prod.append(p)
1756 return unused_prod
1757
1758 # -----------------------------------------------------------------------------
1759 # unused_precedence()
1760 #
1761 # Returns a list of tuples (term,precedence) corresponding to precedence
1762 # rules that were never used by the grammar. term is the name of the terminal
1763 # on which precedence was applied and precedence is a string such as 'left' or
1764 # 'right' corresponding to the type of precedence.
1765 # -----------------------------------------------------------------------------
1766
1767 def unused_precedence(self):
1768 unused = []
1769 for termname in self.Precedence:
1770 if not (termname in self.Terminals or termname in self.UsedPrecedence):
1771 unused.append((termname, self.Precedence[termname][0]))
1772
1773 return unused
1774
1775 # -------------------------------------------------------------------------
1776 # _first()
1777 #
1778 # Compute the value of FIRST1(beta) where beta is a tuple of symbols.
1779 #
1780 # During execution of compute_first1, the result may be incomplete.
1781 # Afterward (e.g., when called from compute_follow()), it will be complete.
1782 # -------------------------------------------------------------------------
1783 def _first(self, beta):
1784
1785 # We are computing First(x1,x2,x3,...,xn)
1786 result = []
1787 for x in beta:
1788 x_produces_empty = False
1789
1790 # Add all the non-<empty> symbols of First[x] to the result.
1791 for f in self.First[x]:
1792 if f == '<empty>':
1793 x_produces_empty = True
1794 else:
1795 if f not in result:
1796 result.append(f)
1797
1798 if x_produces_empty:
1799 # We have to consider the next x in beta,
1800 # i.e. stay in the loop.
1801 pass
1802 else:
1803 # We don't have to consider any further symbols in beta.
1804 break
1805 else:
1806 # There was no 'break' from the loop,
1807 # so x_produces_empty was true for all x in beta,
1808 # so beta produces empty as well.
1809 result.append('<empty>')
1810
1811 return result
1812
1813 # -------------------------------------------------------------------------
1814 # compute_first()
1815 #
1816 # Compute the value of FIRST1(X) for all symbols
1817 # -------------------------------------------------------------------------
1818 def compute_first(self):
1819 if self.First:
1820 return self.First
1821
1822 # Terminals:
1823 for t in self.Terminals:
1824 self.First[t] = [t]
1825
1826 self.First['$end'] = ['$end']
1827
1828 # Nonterminals:
1829
1830 # Initialize to the empty set:
1831 for n in self.Nonterminals:
1832 self.First[n] = []
1833
1834 # Then propagate symbols until no change:
1835 while True:
1836 some_change = False
1837 for n in self.Nonterminals:
1838 for p in self.Prodnames[n]:
1839 for f in self._first(p.prod):
1840 if f not in self.First[n]:
1841 self.First[n].append(f)
1842 some_change = True
1843 if not some_change:
1844 break
1845
1846 return self.First
1847
1848 # ---------------------------------------------------------------------
1849 # compute_follow()
1850 #
1851 # Computes all of the follow sets for every non-terminal symbol. The
1852 # follow set is the set of all symbols that might follow a given
1853 # non-terminal. See the Dragon book, 2nd Ed. p. 189.
1854 # ---------------------------------------------------------------------
1855 def compute_follow(self, start=None):
1856 # If already computed, return the result
1857 if self.Follow:
1858 return self.Follow
1859
1860 # If first sets not computed yet, do that first.
1861 if not self.First:
1862 self.compute_first()
1863
1864 # Add '$end' to the follow list of the start symbol
1865 for k in self.Nonterminals:
1866 self.Follow[k] = []
1867
1868 if not start:
1869 start = self.Productions[1].name
1870
1871 self.Follow[start] = ['$end']
1872
1873 while True:
1874 didadd = False
1875 for p in self.Productions[1:]:
1876 # Here is the production set
1877 for i, B in enumerate(p.prod):
1878 if B in self.Nonterminals:
1879 # Okay. We got a non-terminal in a production
1880 fst = self._first(p.prod[i+1:])
1881 hasempty = False
1882 for f in fst:
1883 if f != '<empty>' and f not in self.Follow[B]:
1884 self.Follow[B].append(f)
1885 didadd = True
1886 if f == '<empty>':
1887 hasempty = True
1888 if hasempty or i == (len(p.prod)-1):
1889 # Add elements of follow(a) to follow(b)
1890 for f in self.Follow[p.name]:
1891 if f not in self.Follow[B]:
1892 self.Follow[B].append(f)
1893 didadd = True
1894 if not didadd:
1895 break
1896 return self.Follow
1897
1898
1899 # -----------------------------------------------------------------------------
1900 # build_lritems()
1901 #
1902 # This function walks the list of productions and builds a complete set of the
1903 # LR items. The LR items are stored in two ways: First, they are uniquely
1904 # numbered and placed in the list _lritems. Second, a linked list of LR items
1905 # is built for each production. For example:
1906 #
1907 # E -> E PLUS E
1908 #
1909 # Creates the list
1910 #
1911 # [E -> . E PLUS E, E -> E . PLUS E, E -> E PLUS . E, E -> E PLUS E . ]
1912 # -----------------------------------------------------------------------------
1913
1914 def build_lritems(self):
1915 for p in self.Productions:
1916 lastlri = p
1917 i = 0
1918 lr_items = []
1919 while True:
1920 if i > len(p):
1921 lri = None
1922 else:
1923 lri = LRItem(p, i)
1924 # Precompute the list of productions immediately following
1925 try:
1926 lri.lr_after = self.Prodnames[lri.prod[i+1]]
1927 except (IndexError, KeyError):
1928 lri.lr_after = []
1929 try:
1930 lri.lr_before = lri.prod[i-1]
1931 except IndexError:
1932 lri.lr_before = None
1933
1934 lastlri.lr_next = lri
1935 if not lri:
1936 break
1937 lr_items.append(lri)
1938 lastlri = lri
1939 i += 1
1940 p.lr_items = lr_items
1941
1942 # -----------------------------------------------------------------------------
1943 # == Class LRTable ==
1944 #
1945 # This basic class represents a basic table of LR parsing information.
1946 # Methods for generating the tables are not defined here. They are defined
1947 # in the derived class LRGeneratedTable.
1948 # -----------------------------------------------------------------------------
1949
1950 class VersionError(YaccError):
1951 pass
1952
1953 class LRTable(object):
1954 def __init__(self):
1955 self.lr_action = None
1956 self.lr_goto = None
1957 self.lr_productions = None
1958 self.lr_method = None
1959
1960 def read_table(self, module):
1961 if isinstance(module, types.ModuleType):
1962 parsetab = module
1963 else:
1964 exec('import %s' % module)
1965 parsetab = sys.modules[module]
1966
1967 if parsetab._tabversion != __tabversion__:
1968 raise VersionError('yacc table file version is out of date')
1969
1970 self.lr_action = parsetab._lr_action
1971 self.lr_goto = parsetab._lr_goto
1972
1973 self.lr_productions = []
1974 for p in parsetab._lr_productions:
1975 self.lr_productions.append(MiniProduction(*p))
1976
1977 self.lr_method = parsetab._lr_method
1978 return parsetab._lr_signature
1979
1980 def read_pickle(self, filename):
1981 try:
1982 import cPickle as pickle
1983 except ImportError:
1984 import pickle
1985
1986 if not os.path.exists(filename):
1987 raise ImportError
1988
1989 in_f = open(filename, 'rb')
1990
1991 tabversion = pickle.load(in_f)
1992 if tabversion != __tabversion__:
1993 raise VersionError('yacc table file version is out of date')
1994 self.lr_method = pickle.load(in_f)
1995 signature = pickle.load(in_f)
1996 self.lr_action = pickle.load(in_f)
1997 self.lr_goto = pickle.load(in_f)
1998 productions = pickle.load(in_f)
1999
2000 self.lr_productions = []
2001 for p in productions:
2002 self.lr_productions.append(MiniProduction(*p))
2003
2004 in_f.close()
2005 return signature
2006
2007 # Bind all production function names to callable objects in pdict
2008 def bind_callables(self, pdict):
2009 for p in self.lr_productions:
2010 p.bind(pdict)
2011
2012
2013 # -----------------------------------------------------------------------------
2014 # === LR Generator ===
2015 #
2016 # The following classes and functions are used to generate LR parsing tables on
2017 # a grammar.
2018 # -----------------------------------------------------------------------------
2019
2020 # -----------------------------------------------------------------------------
2021 # digraph()
2022 # traverse()
2023 #
2024 # The following two functions are used to compute set valued functions
2025 # of the form:
2026 #
2027 # F(x) = F'(x) U U{F(y) | x R y}
2028 #
2029 # This is used to compute the values of Read() sets as well as FOLLOW sets
2030 # in LALR(1) generation.
2031 #
2032 # Inputs: X - An input set
2033 # R - A relation
2034 # FP - Set-valued function
2035 # ------------------------------------------------------------------------------
2036
2037 def digraph(X, R, FP):
2038 N = {}
2039 for x in X:
2040 N[x] = 0
2041 stack = []
2042 F = {}
2043 for x in X:
2044 if N[x] == 0:
2045 traverse(x, N, stack, F, X, R, FP)
2046 return F
2047
2048 def traverse(x, N, stack, F, X, R, FP):
2049 stack.append(x)
2050 d = len(stack)
2051 N[x] = d
2052 F[x] = FP(x) # F(X) <- F'(x)
2053
2054 rel = R(x) # Get y's related to x
2055 for y in rel:
2056 if N[y] == 0:
2057 traverse(y, N, stack, F, X, R, FP)
2058 N[x] = min(N[x], N[y])
2059 for a in F.get(y, []):
2060 if a not in F[x]:
2061 F[x].append(a)
2062 if N[x] == d:
2063 N[stack[-1]] = MAXINT
2064 F[stack[-1]] = F[x]
2065 element = stack.pop()
2066 while element != x:
2067 N[stack[-1]] = MAXINT
2068 F[stack[-1]] = F[x]
2069 element = stack.pop()
2070
2071 class LALRError(YaccError):
2072 pass
2073
2074 # -----------------------------------------------------------------------------
2075 # == LRGeneratedTable ==
2076 #
2077 # This class implements the LR table generation algorithm. There are no
2078 # public methods except for write()
2079 # -----------------------------------------------------------------------------
2080
2081 class LRGeneratedTable(LRTable):
2082 def __init__(self, grammar, method='LALR', log=None):
2083 if method not in ['SLR', 'LALR']:
2084 raise LALRError('Unsupported method %s' % method)
2085
2086 self.grammar = grammar
2087 self.lr_method = method
2088
2089 # Set up the logger
2090 if not log:
2091 log = NullLogger()
2092 self.log = log
2093
2094 # Internal attributes
2095 self.lr_action = {} # Action table
2096 self.lr_goto = {} # Goto table
2097 self.lr_productions = grammar.Productions # Copy of grammar Production array
2098 self.lr_goto_cache = {} # Cache of computed gotos
2099 self.lr0_cidhash = {} # Cache of closures
2100
2101 self._add_count = 0 # Internal counter used to detect cycles
2102
2103 # Diagonistic information filled in by the table generator
2104 self.sr_conflict = 0
2105 self.rr_conflict = 0
2106 self.conflicts = [] # List of conflicts
2107
2108 self.sr_conflicts = []
2109 self.rr_conflicts = []
2110
2111 # Build the tables
2112 self.grammar.build_lritems()
2113 self.grammar.compute_first()
2114 self.grammar.compute_follow()
2115 self.lr_parse_table()
2116
2117 # Compute the LR(0) closure operation on I, where I is a set of LR(0) items.
2118
2119 def lr0_closure(self, I):
2120 self._add_count += 1
2121
2122 # Add everything in I to J
2123 J = I[:]
2124 didadd = True
2125 while didadd:
2126 didadd = False
2127 for j in J:
2128 for x in j.lr_after:
2129 if getattr(x, 'lr0_added', 0) == self._add_count:
2130 continue
2131 # Add B --> .G to J
2132 J.append(x.lr_next)
2133 x.lr0_added = self._add_count
2134 didadd = True
2135
2136 return J
2137
2138 # Compute the LR(0) goto function goto(I,X) where I is a set
2139 # of LR(0) items and X is a grammar symbol. This function is written
2140 # in a way that guarantees uniqueness of the generated goto sets
2141 # (i.e. the same goto set will never be returned as two different Python
2142 # objects). With uniqueness, we can later do fast set comparisons using
2143 # id(obj) instead of element-wise comparison.
2144
2145 def lr0_goto(self, I, x):
2146 # First we look for a previously cached entry
2147 g = self.lr_goto_cache.get((id(I), x))
2148 if g:
2149 return g
2150
2151 # Now we generate the goto set in a way that guarantees uniqueness
2152 # of the result
2153
2154 s = self.lr_goto_cache.get(x)
2155 if not s:
2156 s = {}
2157 self.lr_goto_cache[x] = s
2158
2159 gs = []
2160 for p in I:
2161 n = p.lr_next
2162 if n and n.lr_before == x:
2163 s1 = s.get(id(n))
2164 if not s1:
2165 s1 = {}
2166 s[id(n)] = s1
2167 gs.append(n)
2168 s = s1
2169 g = s.get('$end')
2170 if not g:
2171 if gs:
2172 g = self.lr0_closure(gs)
2173 s['$end'] = g
2174 else:
2175 s['$end'] = gs
2176 self.lr_goto_cache[(id(I), x)] = g
2177 return g
2178
2179 # Compute the LR(0) sets of item function
2180 def lr0_items(self):
2181 C = [self.lr0_closure([self.grammar.Productions[0].lr_next])]
2182 i = 0
2183 for I in C:
2184 self.lr0_cidhash[id(I)] = i
2185 i += 1
2186
2187 # Loop over the items in C and each grammar symbols
2188 i = 0
2189 while i < len(C):
2190 I = C[i]
2191 i += 1
2192
2193 # Collect all of the symbols that could possibly be in the goto(I,X) sets
2194 asyms = {}
2195 for ii in I:
2196 for s in ii.usyms:
2197 asyms[s] = None
2198
2199 for x in asyms:
2200 g = self.lr0_goto(I, x)
2201 if not g or id(g) in self.lr0_cidhash:
2202 continue
2203 self.lr0_cidhash[id(g)] = len(C)
2204 C.append(g)
2205
2206 return C
2207
2208 # -----------------------------------------------------------------------------
2209 # ==== LALR(1) Parsing ====
2210 #
2211 # LALR(1) parsing is almost exactly the same as SLR except that instead of
2212 # relying upon Follow() sets when performing reductions, a more selective
2213 # lookahead set that incorporates the state of the LR(0) machine is utilized.
2214 # Thus, we mainly just have to focus on calculating the lookahead sets.
2215 #
2216 # The method used here is due to DeRemer and Pennelo (1982).
2217 #
2218 # DeRemer, F. L., and T. J. Pennelo: "Efficient Computation of LALR(1)
2219 # Lookahead Sets", ACM Transactions on Programming Languages and Systems,
2220 # Vol. 4, No. 4, Oct. 1982, pp. 615-649
2221 #
2222 # Further details can also be found in:
2223 #
2224 # J. Tremblay and P. Sorenson, "The Theory and Practice of Compiler Writing",
2225 # McGraw-Hill Book Company, (1985).
2226 #
2227 # -----------------------------------------------------------------------------
2228
2229 # -----------------------------------------------------------------------------
2230 # compute_nullable_nonterminals()
2231 #
2232 # Creates a dictionary containing all of the non-terminals that might produce
2233 # an empty production.
2234 # -----------------------------------------------------------------------------
2235
2236 def compute_nullable_nonterminals(self):
2237 nullable = set()
2238 num_nullable = 0
2239 while True:
2240 for p in self.grammar.Productions[1:]:
2241 if p.len == 0:
2242 nullable.add(p.name)
2243 continue
2244 for t in p.prod:
2245 if t not in nullable:
2246 break
2247 else:
2248 nullable.add(p.name)
2249 if len(nullable) == num_nullable:
2250 break
2251 num_nullable = len(nullable)
2252 return nullable
2253
2254 # -----------------------------------------------------------------------------
2255 # find_nonterminal_trans(C)
2256 #
2257 # Given a set of LR(0) items, this functions finds all of the non-terminal
2258 # transitions. These are transitions in which a dot appears immediately before
2259 # a non-terminal. Returns a list of tuples of the form (state,N) where state
2260 # is the state number and N is the nonterminal symbol.
2261 #
2262 # The input C is the set of LR(0) items.
2263 # -----------------------------------------------------------------------------
2264
2265 def find_nonterminal_transitions(self, C):
2266 trans = []
2267 for stateno, state in enumerate(C):
2268 for p in state:
2269 if p.lr_index < p.len - 1:
2270 t = (stateno, p.prod[p.lr_index+1])
2271 if t[1] in self.grammar.Nonterminals:
2272 if t not in trans:
2273 trans.append(t)
2274 return trans
2275
2276 # -----------------------------------------------------------------------------
2277 # dr_relation()
2278 #
2279 # Computes the DR(p,A) relationships for non-terminal transitions. The input
2280 # is a tuple (state,N) where state is a number and N is a nonterminal symbol.
2281 #
2282 # Returns a list of terminals.
2283 # -----------------------------------------------------------------------------
2284
2285 def dr_relation(self, C, trans, nullable):
2286 dr_set = {}
2287 state, N = trans
2288 terms = []
2289
2290 g = self.lr0_goto(C[state], N)
2291 for p in g:
2292 if p.lr_index < p.len - 1:
2293 a = p.prod[p.lr_index+1]
2294 if a in self.grammar.Terminals:
2295 if a not in terms:
2296 terms.append(a)
2297
2298 # This extra bit is to handle the start state
2299 if state == 0 and N == self.grammar.Productions[0].prod[0]:
2300 terms.append('$end')
2301
2302 return terms
2303
2304 # -----------------------------------------------------------------------------
2305 # reads_relation()
2306 #
2307 # Computes the READS() relation (p,A) READS (t,C).
2308 # -----------------------------------------------------------------------------
2309
2310 def reads_relation(self, C, trans, empty):
2311 # Look for empty transitions
2312 rel = []
2313 state, N = trans
2314
2315 g = self.lr0_goto(C[state], N)
2316 j = self.lr0_cidhash.get(id(g), -1)
2317 for p in g:
2318 if p.lr_index < p.len - 1:
2319 a = p.prod[p.lr_index + 1]
2320 if a in empty:
2321 rel.append((j, a))
2322
2323 return rel
2324
2325 # -----------------------------------------------------------------------------
2326 # compute_lookback_includes()
2327 #
2328 # Determines the lookback and includes relations
2329 #
2330 # LOOKBACK:
2331 #
2332 # This relation is determined by running the LR(0) state machine forward.
2333 # For example, starting with a production "N : . A B C", we run it forward
2334 # to obtain "N : A B C ." We then build a relationship between this final
2335 # state and the starting state. These relationships are stored in a dictionary
2336 # lookdict.
2337 #
2338 # INCLUDES:
2339 #
2340 # Computes the INCLUDE() relation (p,A) INCLUDES (p',B).
2341 #
2342 # This relation is used to determine non-terminal transitions that occur
2343 # inside of other non-terminal transition states. (p,A) INCLUDES (p', B)
2344 # if the following holds:
2345 #
2346 # B -> LAT, where T -> epsilon and p' -L-> p
2347 #
2348 # L is essentially a prefix (which may be empty), T is a suffix that must be
2349 # able to derive an empty string. State p' must lead to state p with the string L.
2350 #
2351 # -----------------------------------------------------------------------------
2352
2353 def compute_lookback_includes(self, C, trans, nullable):
2354 lookdict = {} # Dictionary of lookback relations
2355 includedict = {} # Dictionary of include relations
2356
2357 # Make a dictionary of non-terminal transitions
2358 dtrans = {}
2359 for t in trans:
2360 dtrans[t] = 1
2361
2362 # Loop over all transitions and compute lookbacks and includes
2363 for state, N in trans:
2364 lookb = []
2365 includes = []
2366 for p in C[state]:
2367 if p.name != N:
2368 continue
2369
2370 # Okay, we have a name match. We now follow the production all the way
2371 # through the state machine until we get the . on the right hand side
2372
2373 lr_index = p.lr_index
2374 j = state
2375 while lr_index < p.len - 1:
2376 lr_index = lr_index + 1
2377 t = p.prod[lr_index]
2378
2379 # Check to see if this symbol and state are a non-terminal transition
2380 if (j, t) in dtrans:
2381 # Yes. Okay, there is some chance that this is an includes relation
2382 # the only way to know for certain is whether the rest of the
2383 # production derives empty
2384
2385 li = lr_index + 1
2386 while li < p.len:
2387 if p.prod[li] in self.grammar.Terminals:
2388 break # No forget it
2389 if p.prod[li] not in nullable:
2390 break
2391 li = li + 1
2392 else:
2393 # Appears to be a relation between (j,t) and (state,N)
2394 includes.append((j, t))
2395
2396 g = self.lr0_goto(C[j], t) # Go to next set
2397 j = self.lr0_cidhash.get(id(g), -1) # Go to next state
2398
2399 # When we get here, j is the final state, now we have to locate the production
2400 for r in C[j]:
2401 if r.name != p.name:
2402 continue
2403 if r.len != p.len:
2404 continue
2405 i = 0
2406 # This look is comparing a production ". A B C" with "A B C ."
2407 while i < r.lr_index:
2408 if r.prod[i] != p.prod[i+1]:
2409 break
2410 i = i + 1
2411 else:
2412 lookb.append((j, r))
2413 for i in includes:
2414 if i not in includedict:
2415 includedict[i] = []
2416 includedict[i].append((state, N))
2417 lookdict[(state, N)] = lookb
2418
2419 return lookdict, includedict
2420
2421 # -----------------------------------------------------------------------------
2422 # compute_read_sets()
2423 #
2424 # Given a set of LR(0) items, this function computes the read sets.
2425 #
2426 # Inputs: C = Set of LR(0) items
2427 # ntrans = Set of nonterminal transitions
2428 # nullable = Set of empty transitions
2429 #
2430 # Returns a set containing the read sets
2431 # -----------------------------------------------------------------------------
2432
2433 def compute_read_sets(self, C, ntrans, nullable):
2434 FP = lambda x: self.dr_relation(C, x, nullable)
2435 R = lambda x: self.reads_relation(C, x, nullable)
2436 F = digraph(ntrans, R, FP)
2437 return F
2438
2439 # -----------------------------------------------------------------------------
2440 # compute_follow_sets()
2441 #
2442 # Given a set of LR(0) items, a set of non-terminal transitions, a readset,
2443 # and an include set, this function computes the follow sets
2444 #
2445 # Follow(p,A) = Read(p,A) U U {Follow(p',B) | (p,A) INCLUDES (p',B)}
2446 #
2447 # Inputs:
2448 # ntrans = Set of nonterminal transitions
2449 # readsets = Readset (previously computed)
2450 # inclsets = Include sets (previously computed)
2451 #
2452 # Returns a set containing the follow sets
2453 # -----------------------------------------------------------------------------
2454
2455 def compute_follow_sets(self, ntrans, readsets, inclsets):
2456 FP = lambda x: readsets[x]
2457 R = lambda x: inclsets.get(x, [])
2458 F = digraph(ntrans, R, FP)
2459 return F
2460
2461 # -----------------------------------------------------------------------------
2462 # add_lookaheads()
2463 #
2464 # Attaches the lookahead symbols to grammar rules.
2465 #
2466 # Inputs: lookbacks - Set of lookback relations
2467 # followset - Computed follow set
2468 #
2469 # This function directly attaches the lookaheads to productions contained
2470 # in the lookbacks set
2471 # -----------------------------------------------------------------------------
2472
2473 def add_lookaheads(self, lookbacks, followset):
2474 for trans, lb in lookbacks.items():
2475 # Loop over productions in lookback
2476 for state, p in lb:
2477 if state not in p.lookaheads:
2478 p.lookaheads[state] = []
2479 f = followset.get(trans, [])
2480 for a in f:
2481 if a not in p.lookaheads[state]:
2482 p.lookaheads[state].append(a)
2483
2484 # -----------------------------------------------------------------------------
2485 # add_lalr_lookaheads()
2486 #
2487 # This function does all of the work of adding lookahead information for use
2488 # with LALR parsing
2489 # -----------------------------------------------------------------------------
2490
2491 def add_lalr_lookaheads(self, C):
2492 # Determine all of the nullable nonterminals
2493 nullable = self.compute_nullable_nonterminals()
2494
2495 # Find all non-terminal transitions
2496 trans = self.find_nonterminal_transitions(C)
2497
2498 # Compute read sets
2499 readsets = self.compute_read_sets(C, trans, nullable)
2500
2501 # Compute lookback/includes relations
2502 lookd, included = self.compute_lookback_includes(C, trans, nullable)
2503
2504 # Compute LALR FOLLOW sets
2505 followsets = self.compute_follow_sets(trans, readsets, included)
2506
2507 # Add all of the lookaheads
2508 self.add_lookaheads(lookd, followsets)
2509
2510 # -----------------------------------------------------------------------------
2511 # lr_parse_table()
2512 #
2513 # This function constructs the parse tables for SLR or LALR
2514 # -----------------------------------------------------------------------------
2515 def lr_parse_table(self):
2516 Productions = self.grammar.Productions
2517 Precedence = self.grammar.Precedence
2518 goto = self.lr_goto # Goto array
2519 action = self.lr_action # Action array
2520 log = self.log # Logger for output
2521
2522 actionp = {} # Action production array (temporary)
2523
2524 log.info('Parsing method: %s', self.lr_method)
2525
2526 # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items
2527 # This determines the number of states
2528
2529 C = self.lr0_items()
2530
2531 if self.lr_method == 'LALR':
2532 self.add_lalr_lookaheads(C)
2533
2534 # Build the parser table, state by state
2535 st = 0
2536 for I in C:
2537 # Loop over each production in I
2538 actlist = [] # List of actions
2539 st_action = {}
2540 st_actionp = {}
2541 st_goto = {}
2542 log.info('')
2543 log.info('state %d', st)
2544 log.info('')
2545 for p in I:
2546 log.info(' (%d) %s', p.number, p)
2547 log.info('')
2548
2549 for p in I:
2550 if p.len == p.lr_index + 1:
2551 if p.name == "S'":
2552 # Start symbol. Accept!
2553 st_action['$end'] = 0
2554 st_actionp['$end'] = p
2555 else:
2556 # We are at the end of a production. Reduce!
2557 if self.lr_method == 'LALR':
2558 laheads = p.lookaheads[st]
2559 else:
2560 laheads = self.grammar.Follow[p.name]
2561 for a in laheads:
2562 actlist.append((a, p, 'reduce using rule %d (%s)' % (p.number, p)))
2563 r = st_action.get(a)
2564 if r is not None:
2565 # Whoa. Have a shift/reduce or reduce/reduce conflict
2566 if r > 0:
2567 # Need to decide on shift or reduce here
2568 # By default we favor shifting. Need to add
2569 # some precedence rules here.
2570 sprec, slevel = Productions[st_actionp[a].number].prec
2571 rprec, rlevel = Precedence.get(a, ('right', 0))
2572 if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')):
2573 # We really need to reduce here.
2574 st_action[a] = -p.number
2575 st_actionp[a] = p
2576 if not slevel and not rlevel:
2577 log.info(' ! shift/reduce conflict for %s resolved as reduce', a)
2578 self.sr_conflicts.append((st, a, 'reduce'))
2579 Productions[p.number].reduced += 1
2580 elif (slevel == rlevel) and (rprec == 'nonassoc'):
2581 st_action[a] = None
2582 else:
2583 # Hmmm. Guess we'll keep the shift
2584 if not rlevel:
2585 log.info(' ! shift/reduce conflict for %s resolved as shift', a)
2586 self.sr_conflicts.append((st, a, 'shift'))
2587 elif r < 0:
2588 # Reduce/reduce conflict. In this case, we favor the rule
2589 # that was defined first in the grammar file
2590 oldp = Productions[-r]
2591 pp = Productions[p.number]
2592 if oldp.line > pp.line:
2593 st_action[a] = -p.number
2594 st_actionp[a] = p
2595 chosenp, rejectp = pp, oldp
2596 Productions[p.number].reduced += 1
2597 Productions[oldp.number].reduced -= 1
2598 else:
2599 chosenp, rejectp = oldp, pp
2600 self.rr_conflicts.append((st, chosenp, rejectp))
2601 log.info(' ! reduce/reduce conflict for %s resolved using rule %d (%s)',
2602 a, st_actionp[a].number, st_actionp[a])
2603 else:
2604 raise LALRError('Unknown conflict in state %d' % st)
2605 else:
2606 st_action[a] = -p.number
2607 st_actionp[a] = p
2608 Productions[p.number].reduced += 1
2609 else:
2610 i = p.lr_index
2611 a = p.prod[i+1] # Get symbol right after the "."
2612 if a in self.grammar.Terminals:
2613 g = self.lr0_goto(I, a)
2614 j = self.lr0_cidhash.get(id(g), -1)
2615 if j >= 0:
2616 # We are in a shift state
2617 actlist.append((a, p, 'shift and go to state %d' % j))
2618 r = st_action.get(a)
2619 if r is not None:
2620 # Whoa have a shift/reduce or shift/shift conflict
2621 if r > 0:
2622 if r != j:
2623 raise LALRError('Shift/shift conflict in state %d' % st)
2624 elif r < 0:
2625 # Do a precedence check.
2626 # - if precedence of reduce rule is higher, we reduce.
2627 # - if precedence of reduce is same and left assoc, we reduce.
2628 # - otherwise we shift
2629 rprec, rlevel = Productions[st_actionp[a].number].prec
2630 sprec, slevel = Precedence.get(a, ('right', 0))
2631 if (slevel > rlevel) or ((slevel == rlevel) and (rprec == 'right')):
2632 # We decide to shift here... highest precedence to shift
2633 Productions[st_actionp[a].number].reduced -= 1
2634 st_action[a] = j
2635 st_actionp[a] = p
2636 if not rlevel:
2637 log.info(' ! shift/reduce conflict for %s resolved as shift', a)
2638 self.sr_conflicts.append((st, a, 'shift'))
2639 elif (slevel == rlevel) and (rprec == 'nonassoc'):
2640 st_action[a] = None
2641 else:
2642 # Hmmm. Guess we'll keep the reduce
2643 if not slevel and not rlevel:
2644 log.info(' ! shift/reduce conflict for %s resolved as reduce', a)
2645 self.sr_conflicts.append((st, a, 'reduce'))
2646
2647 else:
2648 raise LALRError('Unknown conflict in state %d' % st)
2649 else:
2650 st_action[a] = j
2651 st_actionp[a] = p
2652
2653 # Print the actions associated with each terminal
2654 _actprint = {}
2655 for a, p, m in actlist:
2656 if a in st_action:
2657 if p is st_actionp[a]:
2658 log.info(' %-15s %s', a, m)
2659 _actprint[(a, m)] = 1
2660 log.info('')
2661 # Print the actions that were not used. (debugging)
2662 not_used = 0
2663 for a, p, m in actlist:
2664 if a in st_action:
2665 if p is not st_actionp[a]:
2666 if not (a, m) in _actprint:
2667 log.debug(' ! %-15s [ %s ]', a, m)
2668 not_used = 1
2669 _actprint[(a, m)] = 1
2670 if not_used:
2671 log.debug('')
2672
2673 # Construct the goto table for this state
2674
2675 nkeys = {}
2676 for ii in I:
2677 for s in ii.usyms:
2678 if s in self.grammar.Nonterminals:
2679 nkeys[s] = None
2680 for n in nkeys:
2681 g = self.lr0_goto(I, n)
2682 j = self.lr0_cidhash.get(id(g), -1)
2683 if j >= 0:
2684 st_goto[n] = j
2685 log.info(' %-30s shift and go to state %d', n, j)
2686
2687 action[st] = st_action
2688 actionp[st] = st_actionp
2689 goto[st] = st_goto
2690 st += 1
2691
2692 # -----------------------------------------------------------------------------
2693 # write()
2694 #
2695 # This function writes the LR parsing tables to a file
2696 # -----------------------------------------------------------------------------
2697
2698 def write_table(self, tabmodule, outputdir='', signature=''):
2699 if isinstance(tabmodule, types.ModuleType):
2700 raise IOError("Won't overwrite existing tabmodule")
2701
2702 basemodulename = tabmodule.split('.')[-1]
2703 filename = os.path.join(outputdir, basemodulename) + '.py'
2704 try:
2705 f = open(filename, 'w')
2706
2707 f.write('''
2708 # %s
2709 # This file is automatically generated. Do not edit.
2710 _tabversion = %r
2711
2712 _lr_method = %r
2713
2714 _lr_signature = %r
2715 ''' % (os.path.basename(filename), __tabversion__, self.lr_method, signature))
2716
2717 # Change smaller to 0 to go back to original tables
2718 smaller = 1
2719
2720 # Factor out names to try and make smaller
2721 if smaller:
2722 items = {}
2723
2724 for s, nd in self.lr_action.items():
2725 for name, v in nd.items():
2726 i = items.get(name)
2727 if not i:
2728 i = ([], [])
2729 items[name] = i
2730 i[0].append(s)
2731 i[1].append(v)
2732
2733 f.write('\n_lr_action_items = {')
2734 for k, v in items.items():
2735 f.write('%r:([' % k)
2736 for i in v[0]:
2737 f.write('%r,' % i)
2738 f.write('],[')
2739 for i in v[1]:
2740 f.write('%r,' % i)
2741
2742 f.write(']),')
2743 f.write('}\n')
2744
2745 f.write('''
2746 _lr_action = {}
2747 for _k, _v in _lr_action_items.items():
2748 for _x,_y in zip(_v[0],_v[1]):
2749 if not _x in _lr_action: _lr_action[_x] = {}
2750 _lr_action[_x][_k] = _y
2751 del _lr_action_items
2752 ''')
2753
2754 else:
2755 f.write('\n_lr_action = { ')
2756 for k, v in self.lr_action.items():
2757 f.write('(%r,%r):%r,' % (k[0], k[1], v))
2758 f.write('}\n')
2759
2760 if smaller:
2761 # Factor out names to try and make smaller
2762 items = {}
2763
2764 for s, nd in self.lr_goto.items():
2765 for name, v in nd.items():
2766 i = items.get(name)
2767 if not i:
2768 i = ([], [])
2769 items[name] = i
2770 i[0].append(s)
2771 i[1].append(v)
2772
2773 f.write('\n_lr_goto_items = {')
2774 for k, v in items.items():
2775 f.write('%r:([' % k)
2776 for i in v[0]:
2777 f.write('%r,' % i)
2778 f.write('],[')
2779 for i in v[1]:
2780 f.write('%r,' % i)
2781
2782 f.write(']),')
2783 f.write('}\n')
2784
2785 f.write('''
2786 _lr_goto = {}
2787 for _k, _v in _lr_goto_items.items():
2788 for _x, _y in zip(_v[0], _v[1]):
2789 if not _x in _lr_goto: _lr_goto[_x] = {}
2790 _lr_goto[_x][_k] = _y
2791 del _lr_goto_items
2792 ''')
2793 else:
2794 f.write('\n_lr_goto = { ')
2795 for k, v in self.lr_goto.items():
2796 f.write('(%r,%r):%r,' % (k[0], k[1], v))
2797 f.write('}\n')
2798
2799 # Write production table
2800 f.write('_lr_productions = [\n')
2801 for p in self.lr_productions:
2802 if p.func:
2803 f.write(' (%r,%r,%d,%r,%r,%d),\n' % (p.str, p.name, p.len,
2804 p.func, os.path.basename(p.file), p.line))
2805 else:
2806 f.write(' (%r,%r,%d,None,None,None),\n' % (str(p), p.name, p.len))
2807 f.write(']\n')
2808 f.close()
2809
2810 except IOError as e:
2811 raise
2812
2813
2814 # -----------------------------------------------------------------------------
2815 # pickle_table()
2816 #
2817 # This function pickles the LR parsing tables to a supplied file object
2818 # -----------------------------------------------------------------------------
2819
2820 def pickle_table(self, filename, signature=''):
2821 try:
2822 import cPickle as pickle
2823 except ImportError:
2824 import pickle
2825 with open(filename, 'wb') as outf:
2826 pickle.dump(__tabversion__, outf, pickle_protocol)
2827 pickle.dump(self.lr_method, outf, pickle_protocol)
2828 pickle.dump(signature, outf, pickle_protocol)
2829 pickle.dump(self.lr_action, outf, pickle_protocol)
2830 pickle.dump(self.lr_goto, outf, pickle_protocol)
2831
2832 outp = []
2833 for p in self.lr_productions:
2834 if p.func:
2835 outp.append((p.str, p.name, p.len, p.func, os.path.basename(p.file), p.line))
2836 else:
2837 outp.append((str(p), p.name, p.len, None, None, None))
2838 pickle.dump(outp, outf, pickle_protocol)
2839
2840 # -----------------------------------------------------------------------------
2841 # === INTROSPECTION ===
2842 #
2843 # The following functions and classes are used to implement the PLY
2844 # introspection features followed by the yacc() function itself.
2845 # -----------------------------------------------------------------------------
2846
2847 # -----------------------------------------------------------------------------
2848 # get_caller_module_dict()
2849 #
2850 # This function returns a dictionary containing all of the symbols defined within
2851 # a caller further down the call stack. This is used to get the environment
2852 # associated with the yacc() call if none was provided.
2853 # -----------------------------------------------------------------------------
2854
2855 def get_caller_module_dict(levels):
2856 f = sys._getframe(levels)
2857 ldict = f.f_globals.copy()
2858 if f.f_globals != f.f_locals:
2859 ldict.update(f.f_locals)
2860 return ldict
2861
2862 # -----------------------------------------------------------------------------
2863 # parse_grammar()
2864 #
2865 # This takes a raw grammar rule string and parses it into production data
2866 # -----------------------------------------------------------------------------
2867 def parse_grammar(doc, file, line):
2868 grammar = []
2869 # Split the doc string into lines
2870 pstrings = doc.splitlines()
2871 lastp = None
2872 dline = line
2873 for ps in pstrings:
2874 dline += 1
2875 p = ps.split()
2876 if not p:
2877 continue
2878 try:
2879 if p[0] == '|':
2880 # This is a continuation of a previous rule
2881 if not lastp:
2882 raise SyntaxError("%s:%d: Misplaced '|'" % (file, dline))
2883 prodname = lastp
2884 syms = p[1:]
2885 else:
2886 prodname = p[0]
2887 lastp = prodname
2888 syms = p[2:]
2889 assign = p[1]
2890 if assign != ':' and assign != '::=':
2891 raise SyntaxError("%s:%d: Syntax error. Expected ':'" % (file, dline))
2892
2893 grammar.append((file, dline, prodname, syms))
2894 except SyntaxError:
2895 raise
2896 except Exception:
2897 raise SyntaxError('%s:%d: Syntax error in rule %r' % (file, dline, ps.strip()))
2898
2899 return grammar
2900
2901 # -----------------------------------------------------------------------------
2902 # ParserReflect()
2903 #
2904 # This class represents information extracted for building a parser including
2905 # start symbol, error function, tokens, precedence list, action functions,
2906 # etc.
2907 # -----------------------------------------------------------------------------
2908 class ParserReflect(object):
2909 def __init__(self, pdict, log=None):
2910 self.pdict = pdict
2911 self.start = None
2912 self.error_func = None
2913 self.tokens = None
2914 self.modules = set()
2915 self.grammar = []
2916 self.error = False
2917
2918 if log is None:
2919 self.log = PlyLogger(sys.stderr)
2920 else:
2921 self.log = log
2922
2923 # Get all of the basic information
2924 def get_all(self):
2925 self.get_start()
2926 self.get_error_func()
2927 self.get_tokens()
2928 self.get_precedence()
2929 self.get_pfunctions()
2930
2931 # Validate all of the information
2932 def validate_all(self):
2933 self.validate_start()
2934 self.validate_error_func()
2935 self.validate_tokens()
2936 self.validate_precedence()
2937 self.validate_pfunctions()
2938 self.validate_modules()
2939 return self.error
2940
2941 # Compute a signature over the grammar
2942 def signature(self):
2943 try:
2944 from hashlib import md5
2945 except ImportError:
2946 from md5 import md5
2947 try:
2948 sig = md5()
2949 if self.start:
2950 sig.update(self.start.encode('latin-1'))
2951 if self.prec:
2952 sig.update(''.join([''.join(p) for p in self.prec]).encode('latin-1'))
2953 if self.tokens:
2954 sig.update(' '.join(self.tokens).encode('latin-1'))
2955 for f in self.pfuncs:
2956 if f[3]:
2957 sig.update(f[3].encode('latin-1'))
2958 except (TypeError, ValueError):
2959 pass
2960
2961 digest = base64.b16encode(sig.digest())
2962 if sys.version_info[0] >= 3:
2963 digest = digest.decode('latin-1')
2964 return digest
2965
2966 # -----------------------------------------------------------------------------
2967 # validate_modules()
2968 #
2969 # This method checks to see if there are duplicated p_rulename() functions
2970 # in the parser module file. Without this function, it is really easy for
2971 # users to make mistakes by cutting and pasting code fragments (and it's a real
2972 # bugger to try and figure out why the resulting parser doesn't work). Therefore,
2973 # we just do a little regular expression pattern matching of def statements
2974 # to try and detect duplicates.
2975 # -----------------------------------------------------------------------------
2976
2977 def validate_modules(self):
2978 # Match def p_funcname(
2979 fre = re.compile(r'\s*def\s+(p_[a-zA-Z_0-9]*)\(')
2980
2981 for module in self.modules:
2982 lines, linen = inspect.getsourcelines(module)
2983
2984 counthash = {}
2985 for linen, line in enumerate(lines):
2986 linen += 1
2987 m = fre.match(line)
2988 if m:
2989 name = m.group(1)
2990 prev = counthash.get(name)
2991 if not prev:
2992 counthash[name] = linen
2993 else:
2994 filename = inspect.getsourcefile(module)
2995 self.log.warning('%s:%d: Function %s redefined. Previously defined on line %d',
2996 filename, linen, name, prev)
2997
2998 # Get the start symbol
2999 def get_start(self):
3000 self.start = self.pdict.get('start')
3001
3002 # Validate the start symbol
3003 def validate_start(self):
3004 if self.start is not None:
3005 if not isinstance(self.start, string_types):
3006 self.log.error("'start' must be a string")
3007
3008 # Look for error handler
3009 def get_error_func(self):
3010 self.error_func = self.pdict.get('p_error')
3011
3012 # Validate the error function
3013 def validate_error_func(self):
3014 if self.error_func:
3015 if isinstance(self.error_func, types.FunctionType):
3016 ismethod = 0
3017 elif isinstance(self.error_func, types.MethodType):
3018 ismethod = 1
3019 else:
3020 self.log.error("'p_error' defined, but is not a function or method")
3021 self.error = True
3022 return
3023
3024 eline = self.error_func.__code__.co_firstlineno
3025 efile = self.error_func.__code__.co_filename
3026 module = inspect.getmodule(self.error_func)
3027 self.modules.add(module)
3028
3029 argcount = self.error_func.__code__.co_argcount - ismethod
3030 if argcount != 1:
3031 self.log.error('%s:%d: p_error() requires 1 argument', efile, eline)
3032 self.error = True
3033
3034 # Get the tokens map
3035 def get_tokens(self):
3036 tokens = self.pdict.get('tokens')
3037 if not tokens:
3038 self.log.error('No token list is defined')
3039 self.error = True
3040 return
3041
3042 if not isinstance(tokens, (list, tuple)):
3043 self.log.error('tokens must be a list or tuple')
3044 self.error = True
3045 return
3046
3047 if not tokens:
3048 self.log.error('tokens is empty')
3049 self.error = True
3050 return
3051
3052 self.tokens = tokens
3053
3054 # Validate the tokens
3055 def validate_tokens(self):
3056 # Validate the tokens.
3057 if 'error' in self.tokens:
3058 self.log.error("Illegal token name 'error'. Is a reserved word")
3059 self.error = True
3060 return
3061
3062 terminals = set()
3063 for n in self.tokens:
3064 if n in terminals:
3065 self.log.warning('Token %r multiply defined', n)
3066 terminals.add(n)
3067
3068 # Get the precedence map (if any)
3069 def get_precedence(self):
3070 self.prec = self.pdict.get('precedence')
3071
3072 # Validate and parse the precedence map
3073 def validate_precedence(self):
3074 preclist = []
3075 if self.prec:
3076 if not isinstance(self.prec, (list, tuple)):
3077 self.log.error('precedence must be a list or tuple')
3078 self.error = True
3079 return
3080 for level, p in enumerate(self.prec):
3081 if not isinstance(p, (list, tuple)):
3082 self.log.error('Bad precedence table')
3083 self.error = True
3084 return
3085
3086 if len(p) < 2:
3087 self.log.error('Malformed precedence entry %s. Must be (assoc, term, ..., term)', p)
3088 self.error = True
3089 return
3090 assoc = p[0]
3091 if not isinstance(assoc, string_types):
3092 self.log.error('precedence associativity must be a string')
3093 self.error = True
3094 return
3095 for term in p[1:]:
3096 if not isinstance(term, string_types):
3097 self.log.error('precedence items must be strings')
3098 self.error = True
3099 return
3100 preclist.append((term, assoc, level+1))
3101 self.preclist = preclist
3102
3103 # Get all p_functions from the grammar
3104 def get_pfunctions(self):
3105 p_functions = []
3106 for name, item in self.pdict.items():
3107 if not name.startswith('p_') or name == 'p_error':
3108 continue
3109 if isinstance(item, (types.FunctionType, types.MethodType)):
3110 line = item.__code__.co_firstlineno
3111 module = inspect.getmodule(item)
3112 p_functions.append((line, module, name, item.__doc__))
3113
3114 # Sort all of the actions by line number; make sure to stringify
3115 # modules to make them sortable, since `line` may not uniquely sort all
3116 # p functions
3117 p_functions.sort(key=lambda p_function: (
3118 p_function[0],
3119 str(p_function[1]),
3120 p_function[2],
3121 p_function[3]))
3122 self.pfuncs = p_functions
3123
3124 # Validate all of the p_functions
3125 def validate_pfunctions(self):
3126 grammar = []
3127 # Check for non-empty symbols
3128 if len(self.pfuncs) == 0:
3129 self.log.error('no rules of the form p_rulename are defined')
3130 self.error = True
3131 return
3132
3133 for line, module, name, doc in self.pfuncs:
3134 file = inspect.getsourcefile(module)
3135 func = self.pdict[name]
3136 if isinstance(func, types.MethodType):
3137 reqargs = 2
3138 else:
3139 reqargs = 1
3140 if func.__code__.co_argcount > reqargs:
3141 self.log.error('%s:%d: Rule %r has too many arguments', file, line, func.__name__)
3142 self.error = True
3143 elif func.__code__.co_argcount < reqargs:
3144 self.log.error('%s:%d: Rule %r requires an argument', file, line, func.__name__)
3145 self.error = True
3146 elif not func.__doc__:
3147 self.log.warning('%s:%d: No documentation string specified in function %r (ignored)',
3148 file, line, func.__name__)
3149 else:
3150 try:
3151 parsed_g = parse_grammar(doc, file, line)
3152 for g in parsed_g:
3153 grammar.append((name, g))
3154 except SyntaxError as e:
3155 self.log.error(str(e))
3156 self.error = True
3157
3158 # Looks like a valid grammar rule
3159 # Mark the file in which defined.
3160 self.modules.add(module)
3161
3162 # Secondary validation step that looks for p_ definitions that are not functions
3163 # or functions that look like they might be grammar rules.
3164
3165 for n, v in self.pdict.items():
3166 if n.startswith('p_') and isinstance(v, (types.FunctionType, types.MethodType)):
3167 continue
3168 if n.startswith('t_'):
3169 continue
3170 if n.startswith('p_') and n != 'p_error':
3171 self.log.warning('%r not defined as a function', n)
3172 if ((isinstance(v, types.FunctionType) and v.__code__.co_argcount == 1) or
3173 (isinstance(v, types.MethodType) and v.__func__.__code__.co_argcount == 2)):
3174 if v.__doc__:
3175 try:
3176 doc = v.__doc__.split(' ')
3177 if doc[1] == ':':
3178 self.log.warning('%s:%d: Possible grammar rule %r defined without p_ prefix',
3179 v.__code__.co_filename, v.__code__.co_firstlineno, n)
3180 except IndexError:
3181 pass
3182
3183 self.grammar = grammar
3184
3185 # -----------------------------------------------------------------------------
3186 # yacc(module)
3187 #
3188 # Build a parser
3189 # -----------------------------------------------------------------------------
3190
3191 def yacc(method='LALR', debug=yaccdebug, module=None, tabmodule=tab_module, start=None,
3192 check_recursion=True, optimize=False, write_tables=True, debugfile=debug_file,
3193 outputdir=None, debuglog=None, errorlog=None, picklefile=None):
3194
3195 if tabmodule is None:
3196 tabmodule = tab_module
3197
3198 # Reference to the parsing method of the last built parser
3199 global parse
3200
3201 # If pickling is enabled, table files are not created
3202 if picklefile:
3203 write_tables = 0
3204
3205 if errorlog is None:
3206 errorlog = PlyLogger(sys.stderr)
3207
3208 # Get the module dictionary used for the parser
3209 if module:
3210 _items = [(k, getattr(module, k)) for k in dir(module)]
3211 pdict = dict(_items)
3212 # If no __file__ attribute is available, try to obtain it from the __module__ instead
3213 if '__file__' not in pdict:
3214 pdict['__file__'] = sys.modules[pdict['__module__']].__file__
3215 else:
3216 pdict = get_caller_module_dict(2)
3217
3218 if outputdir is None:
3219 # If no output directory is set, the location of the output files
3220 # is determined according to the following rules:
3221 # - If tabmodule specifies a package, files go into that package directory
3222 # - Otherwise, files go in the same directory as the specifying module
3223 if isinstance(tabmodule, types.ModuleType):
3224 srcfile = tabmodule.__file__
3225 else:
3226 if '.' not in tabmodule:
3227 srcfile = pdict['__file__']
3228 else:
3229 parts = tabmodule.split('.')
3230 pkgname = '.'.join(parts[:-1])
3231 exec('import %s' % pkgname)
3232 srcfile = getattr(sys.modules[pkgname], '__file__', '')
3233 outputdir = os.path.dirname(srcfile)
3234
3235 # Determine if the module is package of a package or not.
3236 # If so, fix the tabmodule setting so that tables load correctly
3237 pkg = pdict.get('__package__')
3238 if pkg and isinstance(tabmodule, str):
3239 if '.' not in tabmodule:
3240 tabmodule = pkg + '.' + tabmodule
3241
3242
3243
3244 # Set start symbol if it's specified directly using an argument
3245 if start is not None:
3246 pdict['start'] = start
3247
3248 # Collect parser information from the dictionary
3249 pinfo = ParserReflect(pdict, log=errorlog)
3250 pinfo.get_all()
3251
3252 if pinfo.error:
3253 raise YaccError('Unable to build parser')
3254
3255 # Check signature against table files (if any)
3256 signature = pinfo.signature()
3257
3258 # Read the tables
3259 try:
3260 lr = LRTable()
3261 if picklefile:
3262 read_signature = lr.read_pickle(picklefile)
3263 else:
3264 read_signature = lr.read_table(tabmodule)
3265 if optimize or (read_signature == signature):
3266 try:
3267 lr.bind_callables(pinfo.pdict)
3268 parser = LRParser(lr, pinfo.error_func)
3269 parse = parser.parse
3270 return parser
3271 except Exception as e:
3272 errorlog.warning('There was a problem loading the table file: %r', e)
3273 except VersionError as e:
3274 errorlog.warning(str(e))
3275 except ImportError:
3276 pass
3277
3278 if debuglog is None:
3279 if debug:
3280 try:
3281 debuglog = PlyLogger(open(os.path.join(outputdir, debugfile), 'w'))
3282 except IOError as e:
3283 errorlog.warning("Couldn't open %r. %s" % (debugfile, e))
3284 debuglog = NullLogger()
3285 else:
3286 debuglog = NullLogger()
3287
3288 debuglog.info('Created by PLY version %s (http://www.dabeaz.com/ply)', __version__)
3289
3290 errors = False
3291
3292 # Validate the parser information
3293 if pinfo.validate_all():
3294 raise YaccError('Unable to build parser')
3295
3296 if not pinfo.error_func:
3297 errorlog.warning('no p_error() function is defined')
3298
3299 # Create a grammar object
3300 grammar = Grammar(pinfo.tokens)
3301
3302 # Set precedence level for terminals
3303 for term, assoc, level in pinfo.preclist:
3304 try:
3305 grammar.set_precedence(term, assoc, level)
3306 except GrammarError as e:
3307 errorlog.warning('%s', e)
3308
3309 # Add productions to the grammar
3310 for funcname, gram in pinfo.grammar:
3311 file, line, prodname, syms = gram
3312 try:
3313 grammar.add_production(prodname, syms, funcname, file, line)
3314 except GrammarError as e:
3315 errorlog.error('%s', e)
3316 errors = True
3317
3318 # Set the grammar start symbols
3319 try:
3320 if start is None:
3321 grammar.set_start(pinfo.start)
3322 else:
3323 grammar.set_start(start)
3324 except GrammarError as e:
3325 errorlog.error(str(e))
3326 errors = True
3327
3328 if errors:
3329 raise YaccError('Unable to build parser')
3330
3331 # Verify the grammar structure
3332 undefined_symbols = grammar.undefined_symbols()
3333 for sym, prod in undefined_symbols:
3334 errorlog.error('%s:%d: Symbol %r used, but not defined as a token or a rule', prod.file, prod.line, sym)
3335 errors = True
3336
3337 unused_terminals = grammar.unused_terminals()
3338 if unused_terminals:
3339 debuglog.info('')
3340 debuglog.info('Unused terminals:')
3341 debuglog.info('')
3342 for term in unused_terminals:
3343 errorlog.warning('Token %r defined, but not used', term)
3344 debuglog.info(' %s', term)
3345
3346 # Print out all productions to the debug log
3347 if debug:
3348 debuglog.info('')
3349 debuglog.info('Grammar')
3350 debuglog.info('')
3351 for n, p in enumerate(grammar.Productions):
3352 debuglog.info('Rule %-5d %s', n, p)
3353
3354 # Find unused non-terminals
3355 unused_rules = grammar.unused_rules()
3356 for prod in unused_rules:
3357 errorlog.warning('%s:%d: Rule %r defined, but not used', prod.file, prod.line, prod.name)
3358
3359 if len(unused_terminals) == 1:
3360 errorlog.warning('There is 1 unused token')
3361 if len(unused_terminals) > 1:
3362 errorlog.warning('There are %d unused tokens', len(unused_terminals))
3363
3364 if len(unused_rules) == 1:
3365 errorlog.warning('There is 1 unused rule')
3366 if len(unused_rules) > 1:
3367 errorlog.warning('There are %d unused rules', len(unused_rules))
3368
3369 if debug:
3370 debuglog.info('')
3371 debuglog.info('Terminals, with rules where they appear')
3372 debuglog.info('')
3373 terms = list(grammar.Terminals)
3374 terms.sort()
3375 for term in terms:
3376 debuglog.info('%-20s : %s', term, ' '.join([str(s) for s in grammar.Terminals[term]]))
3377
3378 debuglog.info('')
3379 debuglog.info('Nonterminals, with rules where they appear')
3380 debuglog.info('')
3381 nonterms = list(grammar.Nonterminals)
3382 nonterms.sort()
3383 for nonterm in nonterms:
3384 debuglog.info('%-20s : %s', nonterm, ' '.join([str(s) for s in grammar.Nonterminals[nonterm]]))
3385 debuglog.info('')
3386
3387 if check_recursion:
3388 unreachable = grammar.find_unreachable()
3389 for u in unreachable:
3390 errorlog.warning('Symbol %r is unreachable', u)
3391
3392 infinite = grammar.infinite_cycles()
3393 for inf in infinite:
3394 errorlog.error('Infinite recursion detected for symbol %r', inf)
3395 errors = True
3396
3397 unused_prec = grammar.unused_precedence()
3398 for term, assoc in unused_prec:
3399 errorlog.error('Precedence rule %r defined for unknown symbol %r', assoc, term)
3400 errors = True
3401
3402 if errors:
3403 raise YaccError('Unable to build parser')
3404
3405 # Run the LRGeneratedTable on the grammar
3406 if debug:
3407 errorlog.debug('Generating %s tables', method)
3408
3409 lr = LRGeneratedTable(grammar, method, debuglog)
3410
3411 if debug:
3412 num_sr = len(lr.sr_conflicts)
3413
3414 # Report shift/reduce and reduce/reduce conflicts
3415 if num_sr == 1:
3416 errorlog.warning('1 shift/reduce conflict')
3417 elif num_sr > 1:
3418 errorlog.warning('%d shift/reduce conflicts', num_sr)
3419
3420 num_rr = len(lr.rr_conflicts)
3421 if num_rr == 1:
3422 errorlog.warning('1 reduce/reduce conflict')
3423 elif num_rr > 1:
3424 errorlog.warning('%d reduce/reduce conflicts', num_rr)
3425
3426 # Write out conflicts to the output file
3427 if debug and (lr.sr_conflicts or lr.rr_conflicts):
3428 debuglog.warning('')
3429 debuglog.warning('Conflicts:')
3430 debuglog.warning('')
3431
3432 for state, tok, resolution in lr.sr_conflicts:
3433 debuglog.warning('shift/reduce conflict for %s in state %d resolved as %s', tok, state, resolution)
3434
3435 already_reported = set()
3436 for state, rule, rejected in lr.rr_conflicts:
3437 if (state, id(rule), id(rejected)) in already_reported:
3438 continue
3439 debuglog.warning('reduce/reduce conflict in state %d resolved using rule (%s)', state, rule)
3440 debuglog.warning('rejected rule (%s) in state %d', rejected, state)
3441 errorlog.warning('reduce/reduce conflict in state %d resolved using rule (%s)', state, rule)
3442 errorlog.warning('rejected rule (%s) in state %d', rejected, state)
3443 already_reported.add((state, id(rule), id(rejected)))
3444
3445 warned_never = []
3446 for state, rule, rejected in lr.rr_conflicts:
3447 if not rejected.reduced and (rejected not in warned_never):
3448 debuglog.warning('Rule (%s) is never reduced', rejected)
3449 errorlog.warning('Rule (%s) is never reduced', rejected)
3450 warned_never.append(rejected)
3451
3452 # Write the table file if requested
3453 if write_tables:
3454 try:
3455 lr.write_table(tabmodule, outputdir, signature)
3456 except IOError as e:
3457 errorlog.warning("Couldn't create %r. %s" % (tabmodule, e))
3458
3459 # Write a pickled version of the tables
3460 if picklefile:
3461 try:
3462 lr.pickle_table(picklefile, signature)
3463 except IOError as e:
3464 errorlog.warning("Couldn't create %r. %s" % (picklefile, e))
3465
3466 # Build the parser
3467 lr.bind_callables(pinfo.pdict)
3468 parser = LRParser(lr, pinfo.error_func)
3469
3470 parse = parser.parse
3471 return parser