7267
|
1 # -----------------------------------------------------------------------------
|
|
2 # ply: yacc.py
|
|
3 #
|
|
4 # Copyright (C) 2001-2015,
|
|
5 # David M. Beazley (Dabeaz LLC)
|
|
6 # All rights reserved.
|
|
7 #
|
|
8 # Redistribution and use in source and binary forms, with or without
|
|
9 # modification, are permitted provided that the following conditions are
|
|
10 # met:
|
|
11 #
|
|
12 # * Redistributions of source code must retain the above copyright notice,
|
|
13 # this list of conditions and the following disclaimer.
|
|
14 # * Redistributions in binary form must reproduce the above copyright notice,
|
|
15 # this list of conditions and the following disclaimer in the documentation
|
|
16 # and/or other materials provided with the distribution.
|
|
17 # * Neither the name of the David Beazley or Dabeaz LLC may be used to
|
|
18 # endorse or promote products derived from this software without
|
|
19 # specific prior written permission.
|
|
20 #
|
|
21 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
22 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
23 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
24 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
25 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
26 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
27 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
28 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
29 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
30 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
31 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
32 # -----------------------------------------------------------------------------
|
|
33 #
|
|
34 # This implements an LR parser that is constructed from grammar rules defined
|
|
35 # as Python functions. The grammer is specified by supplying the BNF inside
|
|
36 # Python documentation strings. The inspiration for this technique was borrowed
|
|
37 # from John Aycock's Spark parsing system. PLY might be viewed as cross between
|
|
38 # Spark and the GNU bison utility.
|
|
39 #
|
|
40 # The current implementation is only somewhat object-oriented. The
|
|
41 # LR parser itself is defined in terms of an object (which allows multiple
|
|
42 # parsers to co-exist). However, most of the variables used during table
|
|
43 # construction are defined in terms of global variables. Users shouldn't
|
|
44 # notice unless they are trying to define multiple parsers at the same
|
|
45 # time using threads (in which case they should have their head examined).
|
|
46 #
|
|
47 # This implementation supports both SLR and LALR(1) parsing. LALR(1)
|
|
48 # support was originally implemented by Elias Ioup (ezioup@alumni.uchicago.edu),
|
|
49 # using the algorithm found in Aho, Sethi, and Ullman "Compilers: Principles,
|
|
50 # Techniques, and Tools" (The Dragon Book). LALR(1) has since been replaced
|
|
51 # by the more efficient DeRemer and Pennello algorithm.
|
|
52 #
|
|
53 # :::::::: WARNING :::::::
|
|
54 #
|
|
55 # Construction of LR parsing tables is fairly complicated and expensive.
|
|
56 # To make this module run fast, a *LOT* of work has been put into
|
|
57 # optimization---often at the expensive of readability and what might
|
|
58 # consider to be good Python "coding style." Modify the code at your
|
|
59 # own risk!
|
|
60 # ----------------------------------------------------------------------------
|
|
61
|
|
62 import re
|
|
63 import types
|
|
64 import sys
|
|
65 import os.path
|
|
66 import inspect
|
|
67 import base64
|
|
68 import warnings
|
|
69
|
|
70 __version__ = '3.8'
|
|
71 __tabversion__ = '3.8'
|
|
72
|
|
73 #-----------------------------------------------------------------------------
|
|
74 # === User configurable parameters ===
|
|
75 #
|
|
76 # Change these to modify the default behavior of yacc (if you wish)
|
|
77 #-----------------------------------------------------------------------------
|
|
78
|
|
79 yaccdebug = True # Debugging mode. If set, yacc generates a
|
|
80 # a 'parser.out' file in the current directory
|
|
81
|
|
82 debug_file = 'parser.out' # Default name of the debugging file
|
|
83 tab_module = 'parsetab' # Default name of the table module
|
|
84 default_lr = 'LALR' # Default LR table generation method
|
|
85
|
|
86 error_count = 3 # Number of symbols that must be shifted to leave recovery mode
|
|
87
|
|
88 yaccdevel = False # Set to True if developing yacc. This turns off optimized
|
|
89 # implementations of certain functions.
|
|
90
|
|
91 resultlimit = 40 # Size limit of results when running in debug mode.
|
|
92
|
|
93 pickle_protocol = 0 # Protocol to use when writing pickle files
|
|
94
|
|
95 # String type-checking compatibility
|
|
96 if sys.version_info[0] < 3:
|
|
97 string_types = basestring
|
|
98 else:
|
|
99 string_types = str
|
|
100
|
|
101 MAXINT = sys.maxsize
|
|
102
|
|
103 # This object is a stand-in for a logging object created by the
|
|
104 # logging module. PLY will use this by default to create things
|
|
105 # such as the parser.out file. If a user wants more detailed
|
|
106 # information, they can create their own logging object and pass
|
|
107 # it into PLY.
|
|
108
|
|
109 class PlyLogger(object):
|
|
110 def __init__(self, f):
|
|
111 self.f = f
|
|
112
|
|
113 def debug(self, msg, *args, **kwargs):
|
|
114 self.f.write((msg % args) + '\n')
|
|
115
|
|
116 info = debug
|
|
117
|
|
118 def warning(self, msg, *args, **kwargs):
|
|
119 self.f.write('WARNING: ' + (msg % args) + '\n')
|
|
120
|
|
121 def error(self, msg, *args, **kwargs):
|
|
122 self.f.write('ERROR: ' + (msg % args) + '\n')
|
|
123
|
|
124 critical = debug
|
|
125
|
|
126 # Null logger is used when no output is generated. Does nothing.
|
|
127 class NullLogger(object):
|
|
128 def __getattribute__(self, name):
|
|
129 return self
|
|
130
|
|
131 def __call__(self, *args, **kwargs):
|
|
132 return self
|
|
133
|
|
134 # Exception raised for yacc-related errors
|
|
135 class YaccError(Exception):
|
|
136 pass
|
|
137
|
|
138 # Format the result message that the parser produces when running in debug mode.
|
|
139 def format_result(r):
|
|
140 repr_str = repr(r)
|
|
141 if '\n' in repr_str:
|
|
142 repr_str = repr(repr_str)
|
|
143 if len(repr_str) > resultlimit:
|
|
144 repr_str = repr_str[:resultlimit] + ' ...'
|
|
145 result = '<%s @ 0x%x> (%s)' % (type(r).__name__, id(r), repr_str)
|
|
146 return result
|
|
147
|
|
148 # Format stack entries when the parser is running in debug mode
|
|
149 def format_stack_entry(r):
|
|
150 repr_str = repr(r)
|
|
151 if '\n' in repr_str:
|
|
152 repr_str = repr(repr_str)
|
|
153 if len(repr_str) < 16:
|
|
154 return repr_str
|
|
155 else:
|
|
156 return '<%s @ 0x%x>' % (type(r).__name__, id(r))
|
|
157
|
|
158 # Panic mode error recovery support. This feature is being reworked--much of the
|
|
159 # code here is to offer a deprecation/backwards compatible transition
|
|
160
|
|
161 _errok = None
|
|
162 _token = None
|
|
163 _restart = None
|
|
164 _warnmsg = '''PLY: Don't use global functions errok(), token(), and restart() in p_error().
|
|
165 Instead, invoke the methods on the associated parser instance:
|
|
166
|
|
167 def p_error(p):
|
|
168 ...
|
|
169 # Use parser.errok(), parser.token(), parser.restart()
|
|
170 ...
|
|
171
|
|
172 parser = yacc.yacc()
|
|
173 '''
|
|
174
|
|
175 def errok():
|
|
176 warnings.warn(_warnmsg)
|
|
177 return _errok()
|
|
178
|
|
179 def restart():
|
|
180 warnings.warn(_warnmsg)
|
|
181 return _restart()
|
|
182
|
|
183 def token():
|
|
184 warnings.warn(_warnmsg)
|
|
185 return _token()
|
|
186
|
|
187 # Utility function to call the p_error() function with some deprecation hacks
|
|
188 def call_errorfunc(errorfunc, token, parser):
|
|
189 global _errok, _token, _restart
|
|
190 _errok = parser.errok
|
|
191 _token = parser.token
|
|
192 _restart = parser.restart
|
|
193 r = errorfunc(token)
|
|
194 try:
|
|
195 del _errok, _token, _restart
|
|
196 except NameError:
|
|
197 pass
|
|
198 return r
|
|
199
|
|
200 #-----------------------------------------------------------------------------
|
|
201 # === LR Parsing Engine ===
|
|
202 #
|
|
203 # The following classes are used for the LR parser itself. These are not
|
|
204 # used during table construction and are independent of the actual LR
|
|
205 # table generation algorithm
|
|
206 #-----------------------------------------------------------------------------
|
|
207
|
|
208 # This class is used to hold non-terminal grammar symbols during parsing.
|
|
209 # It normally has the following attributes set:
|
|
210 # .type = Grammar symbol type
|
|
211 # .value = Symbol value
|
|
212 # .lineno = Starting line number
|
|
213 # .endlineno = Ending line number (optional, set automatically)
|
|
214 # .lexpos = Starting lex position
|
|
215 # .endlexpos = Ending lex position (optional, set automatically)
|
|
216
|
|
217 class YaccSymbol:
|
|
218 def __str__(self):
|
|
219 return self.type
|
|
220
|
|
221 def __repr__(self):
|
|
222 return str(self)
|
|
223
|
|
224 # This class is a wrapper around the objects actually passed to each
|
|
225 # grammar rule. Index lookup and assignment actually assign the
|
|
226 # .value attribute of the underlying YaccSymbol object.
|
|
227 # The lineno() method returns the line number of a given
|
|
228 # item (or 0 if not defined). The linespan() method returns
|
|
229 # a tuple of (startline,endline) representing the range of lines
|
|
230 # for a symbol. The lexspan() method returns a tuple (lexpos,endlexpos)
|
|
231 # representing the range of positional information for a symbol.
|
|
232
|
|
233 class YaccProduction:
|
|
234 def __init__(self, s, stack=None):
|
|
235 self.slice = s
|
|
236 self.stack = stack
|
|
237 self.lexer = None
|
|
238 self.parser = None
|
|
239
|
|
240 def __getitem__(self, n):
|
|
241 if isinstance(n, slice):
|
|
242 return [s.value for s in self.slice[n]]
|
|
243 elif n >= 0:
|
|
244 return self.slice[n].value
|
|
245 else:
|
|
246 return self.stack[n].value
|
|
247
|
|
248 def __setitem__(self, n, v):
|
|
249 self.slice[n].value = v
|
|
250
|
|
251 def __getslice__(self, i, j):
|
|
252 return [s.value for s in self.slice[i:j]]
|
|
253
|
|
254 def __len__(self):
|
|
255 return len(self.slice)
|
|
256
|
|
257 def lineno(self, n):
|
|
258 return getattr(self.slice[n], 'lineno', 0)
|
|
259
|
|
260 def set_lineno(self, n, lineno):
|
|
261 self.slice[n].lineno = lineno
|
|
262
|
|
263 def linespan(self, n):
|
|
264 startline = getattr(self.slice[n], 'lineno', 0)
|
|
265 endline = getattr(self.slice[n], 'endlineno', startline)
|
|
266 return startline, endline
|
|
267
|
|
268 def lexpos(self, n):
|
|
269 return getattr(self.slice[n], 'lexpos', 0)
|
|
270
|
|
271 def lexspan(self, n):
|
|
272 startpos = getattr(self.slice[n], 'lexpos', 0)
|
|
273 endpos = getattr(self.slice[n], 'endlexpos', startpos)
|
|
274 return startpos, endpos
|
|
275
|
|
276 def error(self):
|
|
277 raise SyntaxError
|
|
278
|
|
279 # -----------------------------------------------------------------------------
|
|
280 # == LRParser ==
|
|
281 #
|
|
282 # The LR Parsing engine.
|
|
283 # -----------------------------------------------------------------------------
|
|
284
|
|
285 class LRParser:
|
|
286 def __init__(self, lrtab, errorf):
|
|
287 self.productions = lrtab.lr_productions
|
|
288 self.action = lrtab.lr_action
|
|
289 self.goto = lrtab.lr_goto
|
|
290 self.errorfunc = errorf
|
|
291 self.set_defaulted_states()
|
|
292 self.errorok = True
|
|
293
|
|
294 def errok(self):
|
|
295 self.errorok = True
|
|
296
|
|
297 def restart(self):
|
|
298 del self.statestack[:]
|
|
299 del self.symstack[:]
|
|
300 sym = YaccSymbol()
|
|
301 sym.type = '$end'
|
|
302 self.symstack.append(sym)
|
|
303 self.statestack.append(0)
|
|
304
|
|
305 # Defaulted state support.
|
|
306 # This method identifies parser states where there is only one possible reduction action.
|
|
307 # For such states, the parser can make a choose to make a rule reduction without consuming
|
|
308 # the next look-ahead token. This delayed invocation of the tokenizer can be useful in
|
|
309 # certain kinds of advanced parsing situations where the lexer and parser interact with
|
|
310 # each other or change states (i.e., manipulation of scope, lexer states, etc.).
|
|
311 #
|
|
312 # See: http://www.gnu.org/software/bison/manual/html_node/Default-Reductions.html#Default-Reductions
|
|
313 def set_defaulted_states(self):
|
|
314 self.defaulted_states = {}
|
|
315 for state, actions in self.action.items():
|
|
316 rules = list(actions.values())
|
|
317 if len(rules) == 1 and rules[0] < 0:
|
|
318 self.defaulted_states[state] = rules[0]
|
|
319
|
|
320 def disable_defaulted_states(self):
|
|
321 self.defaulted_states = {}
|
|
322
|
|
323 def parse(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None):
|
|
324 if debug or yaccdevel:
|
|
325 if isinstance(debug, int):
|
|
326 debug = PlyLogger(sys.stderr)
|
|
327 return self.parsedebug(input, lexer, debug, tracking, tokenfunc)
|
|
328 elif tracking:
|
|
329 return self.parseopt(input, lexer, debug, tracking, tokenfunc)
|
|
330 else:
|
|
331 return self.parseopt_notrack(input, lexer, debug, tracking, tokenfunc)
|
|
332
|
|
333
|
|
334 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
|
335 # parsedebug().
|
|
336 #
|
|
337 # This is the debugging enabled version of parse(). All changes made to the
|
|
338 # parsing engine should be made here. Optimized versions of this function
|
|
339 # are automatically created by the ply/ygen.py script. This script cuts out
|
|
340 # sections enclosed in markers such as this:
|
|
341 #
|
|
342 # #--! DEBUG
|
|
343 # statements
|
|
344 # #--! DEBUG
|
|
345 #
|
|
346 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
|
347
|
|
348 def parsedebug(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None):
|
|
349 #--! parsedebug-start
|
|
350 lookahead = None # Current lookahead symbol
|
|
351 lookaheadstack = [] # Stack of lookahead symbols
|
|
352 actions = self.action # Local reference to action table (to avoid lookup on self.)
|
|
353 goto = self.goto # Local reference to goto table (to avoid lookup on self.)
|
|
354 prod = self.productions # Local reference to production list (to avoid lookup on self.)
|
|
355 defaulted_states = self.defaulted_states # Local reference to defaulted states
|
|
356 pslice = YaccProduction(None) # Production object passed to grammar rules
|
|
357 errorcount = 0 # Used during error recovery
|
|
358
|
|
359 #--! DEBUG
|
|
360 debug.info('PLY: PARSE DEBUG START')
|
|
361 #--! DEBUG
|
|
362
|
|
363 # If no lexer was given, we will try to use the lex module
|
|
364 if not lexer:
|
|
365 from . import lex
|
|
366 lexer = lex.lexer
|
|
367
|
|
368 # Set up the lexer and parser objects on pslice
|
|
369 pslice.lexer = lexer
|
|
370 pslice.parser = self
|
|
371
|
|
372 # If input was supplied, pass to lexer
|
|
373 if input is not None:
|
|
374 lexer.input(input)
|
|
375
|
|
376 if tokenfunc is None:
|
|
377 # Tokenize function
|
|
378 get_token = lexer.token
|
|
379 else:
|
|
380 get_token = tokenfunc
|
|
381
|
|
382 # Set the parser() token method (sometimes used in error recovery)
|
|
383 self.token = get_token
|
|
384
|
|
385 # Set up the state and symbol stacks
|
|
386
|
|
387 statestack = [] # Stack of parsing states
|
|
388 self.statestack = statestack
|
|
389 symstack = [] # Stack of grammar symbols
|
|
390 self.symstack = symstack
|
|
391
|
|
392 pslice.stack = symstack # Put in the production
|
|
393 errtoken = None # Err token
|
|
394
|
|
395 # The start state is assumed to be (0,$end)
|
|
396
|
|
397 statestack.append(0)
|
|
398 sym = YaccSymbol()
|
|
399 sym.type = '$end'
|
|
400 symstack.append(sym)
|
|
401 state = 0
|
|
402 while True:
|
|
403 # Get the next symbol on the input. If a lookahead symbol
|
|
404 # is already set, we just use that. Otherwise, we'll pull
|
|
405 # the next token off of the lookaheadstack or from the lexer
|
|
406
|
|
407 #--! DEBUG
|
|
408 debug.debug('')
|
|
409 debug.debug('State : %s', state)
|
|
410 #--! DEBUG
|
|
411
|
|
412 if state not in defaulted_states:
|
|
413 if not lookahead:
|
|
414 if not lookaheadstack:
|
|
415 lookahead = get_token() # Get the next token
|
|
416 else:
|
|
417 lookahead = lookaheadstack.pop()
|
|
418 if not lookahead:
|
|
419 lookahead = YaccSymbol()
|
|
420 lookahead.type = '$end'
|
|
421
|
|
422 # Check the action table
|
|
423 ltype = lookahead.type
|
|
424 t = actions[state].get(ltype)
|
|
425 else:
|
|
426 t = defaulted_states[state]
|
|
427 #--! DEBUG
|
|
428 debug.debug('Defaulted state %s: Reduce using %d', state, -t)
|
|
429 #--! DEBUG
|
|
430
|
|
431 #--! DEBUG
|
|
432 debug.debug('Stack : %s',
|
|
433 ('%s . %s' % (' '.join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip())
|
|
434 #--! DEBUG
|
|
435
|
|
436 if t is not None:
|
|
437 if t > 0:
|
|
438 # shift a symbol on the stack
|
|
439 statestack.append(t)
|
|
440 state = t
|
|
441
|
|
442 #--! DEBUG
|
|
443 debug.debug('Action : Shift and goto state %s', t)
|
|
444 #--! DEBUG
|
|
445
|
|
446 symstack.append(lookahead)
|
|
447 lookahead = None
|
|
448
|
|
449 # Decrease error count on successful shift
|
|
450 if errorcount:
|
|
451 errorcount -= 1
|
|
452 continue
|
|
453
|
|
454 if t < 0:
|
|
455 # reduce a symbol on the stack, emit a production
|
|
456 p = prod[-t]
|
|
457 pname = p.name
|
|
458 plen = p.len
|
|
459
|
|
460 # Get production function
|
|
461 sym = YaccSymbol()
|
|
462 sym.type = pname # Production name
|
|
463 sym.value = None
|
|
464
|
|
465 #--! DEBUG
|
|
466 if plen:
|
|
467 debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str,
|
|
468 '['+','.join([format_stack_entry(_v.value) for _v in symstack[-plen:]])+']',
|
|
469 goto[statestack[-1-plen]][pname])
|
|
470 else:
|
|
471 debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str, [],
|
|
472 goto[statestack[-1]][pname])
|
|
473
|
|
474 #--! DEBUG
|
|
475
|
|
476 if plen:
|
|
477 targ = symstack[-plen-1:]
|
|
478 targ[0] = sym
|
|
479
|
|
480 #--! TRACKING
|
|
481 if tracking:
|
|
482 t1 = targ[1]
|
|
483 sym.lineno = t1.lineno
|
|
484 sym.lexpos = t1.lexpos
|
|
485 t1 = targ[-1]
|
|
486 sym.endlineno = getattr(t1, 'endlineno', t1.lineno)
|
|
487 sym.endlexpos = getattr(t1, 'endlexpos', t1.lexpos)
|
|
488 #--! TRACKING
|
|
489
|
|
490 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
|
491 # The code enclosed in this section is duplicated
|
|
492 # below as a performance optimization. Make sure
|
|
493 # changes get made in both locations.
|
|
494
|
|
495 pslice.slice = targ
|
|
496
|
|
497 try:
|
|
498 # Call the grammar rule with our special slice object
|
|
499 del symstack[-plen:]
|
|
500 del statestack[-plen:]
|
|
501 p.callable(pslice)
|
|
502 #--! DEBUG
|
|
503 debug.info('Result : %s', format_result(pslice[0]))
|
|
504 #--! DEBUG
|
|
505 symstack.append(sym)
|
|
506 state = goto[statestack[-1]][pname]
|
|
507 statestack.append(state)
|
|
508 except SyntaxError:
|
|
509 # If an error was set. Enter error recovery state
|
|
510 lookaheadstack.append(lookahead)
|
|
511 symstack.pop()
|
|
512 statestack.pop()
|
|
513 state = statestack[-1]
|
|
514 sym.type = 'error'
|
|
515 lookahead = sym
|
|
516 errorcount = error_count
|
|
517 self.errorok = False
|
|
518 continue
|
|
519 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
|
520
|
|
521 else:
|
|
522
|
|
523 #--! TRACKING
|
|
524 if tracking:
|
|
525 sym.lineno = lexer.lineno
|
|
526 sym.lexpos = lexer.lexpos
|
|
527 #--! TRACKING
|
|
528
|
|
529 targ = [sym]
|
|
530
|
|
531 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
|
532 # The code enclosed in this section is duplicated
|
|
533 # above as a performance optimization. Make sure
|
|
534 # changes get made in both locations.
|
|
535
|
|
536 pslice.slice = targ
|
|
537
|
|
538 try:
|
|
539 # Call the grammar rule with our special slice object
|
|
540 p.callable(pslice)
|
|
541 #--! DEBUG
|
|
542 debug.info('Result : %s', format_result(pslice[0]))
|
|
543 #--! DEBUG
|
|
544 symstack.append(sym)
|
|
545 state = goto[statestack[-1]][pname]
|
|
546 statestack.append(state)
|
|
547 except SyntaxError:
|
|
548 # If an error was set. Enter error recovery state
|
|
549 lookaheadstack.append(lookahead)
|
|
550 symstack.pop()
|
|
551 statestack.pop()
|
|
552 state = statestack[-1]
|
|
553 sym.type = 'error'
|
|
554 lookahead = sym
|
|
555 errorcount = error_count
|
|
556 self.errorok = False
|
|
557 continue
|
|
558 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
|
559
|
|
560 if t == 0:
|
|
561 n = symstack[-1]
|
|
562 result = getattr(n, 'value', None)
|
|
563 #--! DEBUG
|
|
564 debug.info('Done : Returning %s', format_result(result))
|
|
565 debug.info('PLY: PARSE DEBUG END')
|
|
566 #--! DEBUG
|
|
567 return result
|
|
568
|
|
569 if t is None:
|
|
570
|
|
571 #--! DEBUG
|
|
572 debug.error('Error : %s',
|
|
573 ('%s . %s' % (' '.join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip())
|
|
574 #--! DEBUG
|
|
575
|
|
576 # We have some kind of parsing error here. To handle
|
|
577 # this, we are going to push the current token onto
|
|
578 # the tokenstack and replace it with an 'error' token.
|
|
579 # If there are any synchronization rules, they may
|
|
580 # catch it.
|
|
581 #
|
|
582 # In addition to pushing the error token, we call call
|
|
583 # the user defined p_error() function if this is the
|
|
584 # first syntax error. This function is only called if
|
|
585 # errorcount == 0.
|
|
586 if errorcount == 0 or self.errorok:
|
|
587 errorcount = error_count
|
|
588 self.errorok = False
|
|
589 errtoken = lookahead
|
|
590 if errtoken.type == '$end':
|
|
591 errtoken = None # End of file!
|
|
592 if self.errorfunc:
|
|
593 if errtoken and not hasattr(errtoken, 'lexer'):
|
|
594 errtoken.lexer = lexer
|
|
595 tok = call_errorfunc(self.errorfunc, errtoken, self)
|
|
596 if self.errorok:
|
|
597 # User must have done some kind of panic
|
|
598 # mode recovery on their own. The
|
|
599 # returned token is the next lookahead
|
|
600 lookahead = tok
|
|
601 errtoken = None
|
|
602 continue
|
|
603 else:
|
|
604 if errtoken:
|
|
605 if hasattr(errtoken, 'lineno'):
|
|
606 lineno = lookahead.lineno
|
|
607 else:
|
|
608 lineno = 0
|
|
609 if lineno:
|
|
610 sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type))
|
|
611 else:
|
|
612 sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type)
|
|
613 else:
|
|
614 sys.stderr.write('yacc: Parse error in input. EOF\n')
|
|
615 return
|
|
616
|
|
617 else:
|
|
618 errorcount = error_count
|
|
619
|
|
620 # case 1: the statestack only has 1 entry on it. If we're in this state, the
|
|
621 # entire parse has been rolled back and we're completely hosed. The token is
|
|
622 # discarded and we just keep going.
|
|
623
|
|
624 if len(statestack) <= 1 and lookahead.type != '$end':
|
|
625 lookahead = None
|
|
626 errtoken = None
|
|
627 state = 0
|
|
628 # Nuke the pushback stack
|
|
629 del lookaheadstack[:]
|
|
630 continue
|
|
631
|
|
632 # case 2: the statestack has a couple of entries on it, but we're
|
|
633 # at the end of the file. nuke the top entry and generate an error token
|
|
634
|
|
635 # Start nuking entries on the stack
|
|
636 if lookahead.type == '$end':
|
|
637 # Whoa. We're really hosed here. Bail out
|
|
638 return
|
|
639
|
|
640 if lookahead.type != 'error':
|
|
641 sym = symstack[-1]
|
|
642 if sym.type == 'error':
|
|
643 # Hmmm. Error is on top of stack, we'll just nuke input
|
|
644 # symbol and continue
|
|
645 #--! TRACKING
|
|
646 if tracking:
|
|
647 sym.endlineno = getattr(lookahead, 'lineno', sym.lineno)
|
|
648 sym.endlexpos = getattr(lookahead, 'lexpos', sym.lexpos)
|
|
649 #--! TRACKING
|
|
650 lookahead = None
|
|
651 continue
|
|
652
|
|
653 # Create the error symbol for the first time and make it the new lookahead symbol
|
|
654 t = YaccSymbol()
|
|
655 t.type = 'error'
|
|
656
|
|
657 if hasattr(lookahead, 'lineno'):
|
|
658 t.lineno = t.endlineno = lookahead.lineno
|
|
659 if hasattr(lookahead, 'lexpos'):
|
|
660 t.lexpos = t.endlexpos = lookahead.lexpos
|
|
661 t.value = lookahead
|
|
662 lookaheadstack.append(lookahead)
|
|
663 lookahead = t
|
|
664 else:
|
|
665 sym = symstack.pop()
|
|
666 #--! TRACKING
|
|
667 if tracking:
|
|
668 lookahead.lineno = sym.lineno
|
|
669 lookahead.lexpos = sym.lexpos
|
|
670 #--! TRACKING
|
|
671 statestack.pop()
|
|
672 state = statestack[-1]
|
|
673
|
|
674 continue
|
|
675
|
|
676 # Call an error function here
|
|
677 raise RuntimeError('yacc: internal parser error!!!\n')
|
|
678
|
|
679 #--! parsedebug-end
|
|
680
|
|
681 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
|
682 # parseopt().
|
|
683 #
|
|
684 # Optimized version of parse() method. DO NOT EDIT THIS CODE DIRECTLY!
|
|
685 # This code is automatically generated by the ply/ygen.py script. Make
|
|
686 # changes to the parsedebug() method instead.
|
|
687 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
|
688
|
|
689 def parseopt(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None):
|
|
690 #--! parseopt-start
|
|
691 lookahead = None # Current lookahead symbol
|
|
692 lookaheadstack = [] # Stack of lookahead symbols
|
|
693 actions = self.action # Local reference to action table (to avoid lookup on self.)
|
|
694 goto = self.goto # Local reference to goto table (to avoid lookup on self.)
|
|
695 prod = self.productions # Local reference to production list (to avoid lookup on self.)
|
|
696 defaulted_states = self.defaulted_states # Local reference to defaulted states
|
|
697 pslice = YaccProduction(None) # Production object passed to grammar rules
|
|
698 errorcount = 0 # Used during error recovery
|
|
699
|
|
700
|
|
701 # If no lexer was given, we will try to use the lex module
|
|
702 if not lexer:
|
|
703 from . import lex
|
|
704 lexer = lex.lexer
|
|
705
|
|
706 # Set up the lexer and parser objects on pslice
|
|
707 pslice.lexer = lexer
|
|
708 pslice.parser = self
|
|
709
|
|
710 # If input was supplied, pass to lexer
|
|
711 if input is not None:
|
|
712 lexer.input(input)
|
|
713
|
|
714 if tokenfunc is None:
|
|
715 # Tokenize function
|
|
716 get_token = lexer.token
|
|
717 else:
|
|
718 get_token = tokenfunc
|
|
719
|
|
720 # Set the parser() token method (sometimes used in error recovery)
|
|
721 self.token = get_token
|
|
722
|
|
723 # Set up the state and symbol stacks
|
|
724
|
|
725 statestack = [] # Stack of parsing states
|
|
726 self.statestack = statestack
|
|
727 symstack = [] # Stack of grammar symbols
|
|
728 self.symstack = symstack
|
|
729
|
|
730 pslice.stack = symstack # Put in the production
|
|
731 errtoken = None # Err token
|
|
732
|
|
733 # The start state is assumed to be (0,$end)
|
|
734
|
|
735 statestack.append(0)
|
|
736 sym = YaccSymbol()
|
|
737 sym.type = '$end'
|
|
738 symstack.append(sym)
|
|
739 state = 0
|
|
740 while True:
|
|
741 # Get the next symbol on the input. If a lookahead symbol
|
|
742 # is already set, we just use that. Otherwise, we'll pull
|
|
743 # the next token off of the lookaheadstack or from the lexer
|
|
744
|
|
745
|
|
746 if state not in defaulted_states:
|
|
747 if not lookahead:
|
|
748 if not lookaheadstack:
|
|
749 lookahead = get_token() # Get the next token
|
|
750 else:
|
|
751 lookahead = lookaheadstack.pop()
|
|
752 if not lookahead:
|
|
753 lookahead = YaccSymbol()
|
|
754 lookahead.type = '$end'
|
|
755
|
|
756 # Check the action table
|
|
757 ltype = lookahead.type
|
|
758 t = actions[state].get(ltype)
|
|
759 else:
|
|
760 t = defaulted_states[state]
|
|
761
|
|
762
|
|
763 if t is not None:
|
|
764 if t > 0:
|
|
765 # shift a symbol on the stack
|
|
766 statestack.append(t)
|
|
767 state = t
|
|
768
|
|
769
|
|
770 symstack.append(lookahead)
|
|
771 lookahead = None
|
|
772
|
|
773 # Decrease error count on successful shift
|
|
774 if errorcount:
|
|
775 errorcount -= 1
|
|
776 continue
|
|
777
|
|
778 if t < 0:
|
|
779 # reduce a symbol on the stack, emit a production
|
|
780 p = prod[-t]
|
|
781 pname = p.name
|
|
782 plen = p.len
|
|
783
|
|
784 # Get production function
|
|
785 sym = YaccSymbol()
|
|
786 sym.type = pname # Production name
|
|
787 sym.value = None
|
|
788
|
|
789
|
|
790 if plen:
|
|
791 targ = symstack[-plen-1:]
|
|
792 targ[0] = sym
|
|
793
|
|
794 #--! TRACKING
|
|
795 if tracking:
|
|
796 t1 = targ[1]
|
|
797 sym.lineno = t1.lineno
|
|
798 sym.lexpos = t1.lexpos
|
|
799 t1 = targ[-1]
|
|
800 sym.endlineno = getattr(t1, 'endlineno', t1.lineno)
|
|
801 sym.endlexpos = getattr(t1, 'endlexpos', t1.lexpos)
|
|
802 #--! TRACKING
|
|
803
|
|
804 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
|
805 # The code enclosed in this section is duplicated
|
|
806 # below as a performance optimization. Make sure
|
|
807 # changes get made in both locations.
|
|
808
|
|
809 pslice.slice = targ
|
|
810
|
|
811 try:
|
|
812 # Call the grammar rule with our special slice object
|
|
813 del symstack[-plen:]
|
|
814 del statestack[-plen:]
|
|
815 p.callable(pslice)
|
|
816 symstack.append(sym)
|
|
817 state = goto[statestack[-1]][pname]
|
|
818 statestack.append(state)
|
|
819 except SyntaxError:
|
|
820 # If an error was set. Enter error recovery state
|
|
821 lookaheadstack.append(lookahead)
|
|
822 symstack.pop()
|
|
823 statestack.pop()
|
|
824 state = statestack[-1]
|
|
825 sym.type = 'error'
|
|
826 lookahead = sym
|
|
827 errorcount = error_count
|
|
828 self.errorok = False
|
|
829 continue
|
|
830 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
|
831
|
|
832 else:
|
|
833
|
|
834 #--! TRACKING
|
|
835 if tracking:
|
|
836 sym.lineno = lexer.lineno
|
|
837 sym.lexpos = lexer.lexpos
|
|
838 #--! TRACKING
|
|
839
|
|
840 targ = [sym]
|
|
841
|
|
842 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
|
843 # The code enclosed in this section is duplicated
|
|
844 # above as a performance optimization. Make sure
|
|
845 # changes get made in both locations.
|
|
846
|
|
847 pslice.slice = targ
|
|
848
|
|
849 try:
|
|
850 # Call the grammar rule with our special slice object
|
|
851 p.callable(pslice)
|
|
852 symstack.append(sym)
|
|
853 state = goto[statestack[-1]][pname]
|
|
854 statestack.append(state)
|
|
855 except SyntaxError:
|
|
856 # If an error was set. Enter error recovery state
|
|
857 lookaheadstack.append(lookahead)
|
|
858 symstack.pop()
|
|
859 statestack.pop()
|
|
860 state = statestack[-1]
|
|
861 sym.type = 'error'
|
|
862 lookahead = sym
|
|
863 errorcount = error_count
|
|
864 self.errorok = False
|
|
865 continue
|
|
866 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
|
867
|
|
868 if t == 0:
|
|
869 n = symstack[-1]
|
|
870 result = getattr(n, 'value', None)
|
|
871 return result
|
|
872
|
|
873 if t is None:
|
|
874
|
|
875
|
|
876 # We have some kind of parsing error here. To handle
|
|
877 # this, we are going to push the current token onto
|
|
878 # the tokenstack and replace it with an 'error' token.
|
|
879 # If there are any synchronization rules, they may
|
|
880 # catch it.
|
|
881 #
|
|
882 # In addition to pushing the error token, we call call
|
|
883 # the user defined p_error() function if this is the
|
|
884 # first syntax error. This function is only called if
|
|
885 # errorcount == 0.
|
|
886 if errorcount == 0 or self.errorok:
|
|
887 errorcount = error_count
|
|
888 self.errorok = False
|
|
889 errtoken = lookahead
|
|
890 if errtoken.type == '$end':
|
|
891 errtoken = None # End of file!
|
|
892 if self.errorfunc:
|
|
893 if errtoken and not hasattr(errtoken, 'lexer'):
|
|
894 errtoken.lexer = lexer
|
|
895 tok = call_errorfunc(self.errorfunc, errtoken, self)
|
|
896 if self.errorok:
|
|
897 # User must have done some kind of panic
|
|
898 # mode recovery on their own. The
|
|
899 # returned token is the next lookahead
|
|
900 lookahead = tok
|
|
901 errtoken = None
|
|
902 continue
|
|
903 else:
|
|
904 if errtoken:
|
|
905 if hasattr(errtoken, 'lineno'):
|
|
906 lineno = lookahead.lineno
|
|
907 else:
|
|
908 lineno = 0
|
|
909 if lineno:
|
|
910 sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type))
|
|
911 else:
|
|
912 sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type)
|
|
913 else:
|
|
914 sys.stderr.write('yacc: Parse error in input. EOF\n')
|
|
915 return
|
|
916
|
|
917 else:
|
|
918 errorcount = error_count
|
|
919
|
|
920 # case 1: the statestack only has 1 entry on it. If we're in this state, the
|
|
921 # entire parse has been rolled back and we're completely hosed. The token is
|
|
922 # discarded and we just keep going.
|
|
923
|
|
924 if len(statestack) <= 1 and lookahead.type != '$end':
|
|
925 lookahead = None
|
|
926 errtoken = None
|
|
927 state = 0
|
|
928 # Nuke the pushback stack
|
|
929 del lookaheadstack[:]
|
|
930 continue
|
|
931
|
|
932 # case 2: the statestack has a couple of entries on it, but we're
|
|
933 # at the end of the file. nuke the top entry and generate an error token
|
|
934
|
|
935 # Start nuking entries on the stack
|
|
936 if lookahead.type == '$end':
|
|
937 # Whoa. We're really hosed here. Bail out
|
|
938 return
|
|
939
|
|
940 if lookahead.type != 'error':
|
|
941 sym = symstack[-1]
|
|
942 if sym.type == 'error':
|
|
943 # Hmmm. Error is on top of stack, we'll just nuke input
|
|
944 # symbol and continue
|
|
945 #--! TRACKING
|
|
946 if tracking:
|
|
947 sym.endlineno = getattr(lookahead, 'lineno', sym.lineno)
|
|
948 sym.endlexpos = getattr(lookahead, 'lexpos', sym.lexpos)
|
|
949 #--! TRACKING
|
|
950 lookahead = None
|
|
951 continue
|
|
952
|
|
953 # Create the error symbol for the first time and make it the new lookahead symbol
|
|
954 t = YaccSymbol()
|
|
955 t.type = 'error'
|
|
956
|
|
957 if hasattr(lookahead, 'lineno'):
|
|
958 t.lineno = t.endlineno = lookahead.lineno
|
|
959 if hasattr(lookahead, 'lexpos'):
|
|
960 t.lexpos = t.endlexpos = lookahead.lexpos
|
|
961 t.value = lookahead
|
|
962 lookaheadstack.append(lookahead)
|
|
963 lookahead = t
|
|
964 else:
|
|
965 sym = symstack.pop()
|
|
966 #--! TRACKING
|
|
967 if tracking:
|
|
968 lookahead.lineno = sym.lineno
|
|
969 lookahead.lexpos = sym.lexpos
|
|
970 #--! TRACKING
|
|
971 statestack.pop()
|
|
972 state = statestack[-1]
|
|
973
|
|
974 continue
|
|
975
|
|
976 # Call an error function here
|
|
977 raise RuntimeError('yacc: internal parser error!!!\n')
|
|
978
|
|
979 #--! parseopt-end
|
|
980
|
|
981 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
|
982 # parseopt_notrack().
|
|
983 #
|
|
984 # Optimized version of parseopt() with line number tracking removed.
|
|
985 # DO NOT EDIT THIS CODE DIRECTLY. This code is automatically generated
|
|
986 # by the ply/ygen.py script. Make changes to the parsedebug() method instead.
|
|
987 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
|
988
|
|
989 def parseopt_notrack(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None):
|
|
990 #--! parseopt-notrack-start
|
|
991 lookahead = None # Current lookahead symbol
|
|
992 lookaheadstack = [] # Stack of lookahead symbols
|
|
993 actions = self.action # Local reference to action table (to avoid lookup on self.)
|
|
994 goto = self.goto # Local reference to goto table (to avoid lookup on self.)
|
|
995 prod = self.productions # Local reference to production list (to avoid lookup on self.)
|
|
996 defaulted_states = self.defaulted_states # Local reference to defaulted states
|
|
997 pslice = YaccProduction(None) # Production object passed to grammar rules
|
|
998 errorcount = 0 # Used during error recovery
|
|
999
|
|
1000
|
|
1001 # If no lexer was given, we will try to use the lex module
|
|
1002 if not lexer:
|
|
1003 from . import lex
|
|
1004 lexer = lex.lexer
|
|
1005
|
|
1006 # Set up the lexer and parser objects on pslice
|
|
1007 pslice.lexer = lexer
|
|
1008 pslice.parser = self
|
|
1009
|
|
1010 # If input was supplied, pass to lexer
|
|
1011 if input is not None:
|
|
1012 lexer.input(input)
|
|
1013
|
|
1014 if tokenfunc is None:
|
|
1015 # Tokenize function
|
|
1016 get_token = lexer.token
|
|
1017 else:
|
|
1018 get_token = tokenfunc
|
|
1019
|
|
1020 # Set the parser() token method (sometimes used in error recovery)
|
|
1021 self.token = get_token
|
|
1022
|
|
1023 # Set up the state and symbol stacks
|
|
1024
|
|
1025 statestack = [] # Stack of parsing states
|
|
1026 self.statestack = statestack
|
|
1027 symstack = [] # Stack of grammar symbols
|
|
1028 self.symstack = symstack
|
|
1029
|
|
1030 pslice.stack = symstack # Put in the production
|
|
1031 errtoken = None # Err token
|
|
1032
|
|
1033 # The start state is assumed to be (0,$end)
|
|
1034
|
|
1035 statestack.append(0)
|
|
1036 sym = YaccSymbol()
|
|
1037 sym.type = '$end'
|
|
1038 symstack.append(sym)
|
|
1039 state = 0
|
|
1040 while True:
|
|
1041 # Get the next symbol on the input. If a lookahead symbol
|
|
1042 # is already set, we just use that. Otherwise, we'll pull
|
|
1043 # the next token off of the lookaheadstack or from the lexer
|
|
1044
|
|
1045
|
|
1046 if state not in defaulted_states:
|
|
1047 if not lookahead:
|
|
1048 if not lookaheadstack:
|
|
1049 lookahead = get_token() # Get the next token
|
|
1050 else:
|
|
1051 lookahead = lookaheadstack.pop()
|
|
1052 if not lookahead:
|
|
1053 lookahead = YaccSymbol()
|
|
1054 lookahead.type = '$end'
|
|
1055
|
|
1056 # Check the action table
|
|
1057 ltype = lookahead.type
|
|
1058 t = actions[state].get(ltype)
|
|
1059 else:
|
|
1060 t = defaulted_states[state]
|
|
1061
|
|
1062
|
|
1063 if t is not None:
|
|
1064 if t > 0:
|
|
1065 # shift a symbol on the stack
|
|
1066 statestack.append(t)
|
|
1067 state = t
|
|
1068
|
|
1069
|
|
1070 symstack.append(lookahead)
|
|
1071 lookahead = None
|
|
1072
|
|
1073 # Decrease error count on successful shift
|
|
1074 if errorcount:
|
|
1075 errorcount -= 1
|
|
1076 continue
|
|
1077
|
|
1078 if t < 0:
|
|
1079 # reduce a symbol on the stack, emit a production
|
|
1080 p = prod[-t]
|
|
1081 pname = p.name
|
|
1082 plen = p.len
|
|
1083
|
|
1084 # Get production function
|
|
1085 sym = YaccSymbol()
|
|
1086 sym.type = pname # Production name
|
|
1087 sym.value = None
|
|
1088
|
|
1089
|
|
1090 if plen:
|
|
1091 targ = symstack[-plen-1:]
|
|
1092 targ[0] = sym
|
|
1093
|
|
1094
|
|
1095 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
|
1096 # The code enclosed in this section is duplicated
|
|
1097 # below as a performance optimization. Make sure
|
|
1098 # changes get made in both locations.
|
|
1099
|
|
1100 pslice.slice = targ
|
|
1101
|
|
1102 try:
|
|
1103 # Call the grammar rule with our special slice object
|
|
1104 del symstack[-plen:]
|
|
1105 del statestack[-plen:]
|
|
1106 p.callable(pslice)
|
|
1107 symstack.append(sym)
|
|
1108 state = goto[statestack[-1]][pname]
|
|
1109 statestack.append(state)
|
|
1110 except SyntaxError:
|
|
1111 # If an error was set. Enter error recovery state
|
|
1112 lookaheadstack.append(lookahead)
|
|
1113 symstack.pop()
|
|
1114 statestack.pop()
|
|
1115 state = statestack[-1]
|
|
1116 sym.type = 'error'
|
|
1117 lookahead = sym
|
|
1118 errorcount = error_count
|
|
1119 self.errorok = False
|
|
1120 continue
|
|
1121 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
|
1122
|
|
1123 else:
|
|
1124
|
|
1125
|
|
1126 targ = [sym]
|
|
1127
|
|
1128 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
|
1129 # The code enclosed in this section is duplicated
|
|
1130 # above as a performance optimization. Make sure
|
|
1131 # changes get made in both locations.
|
|
1132
|
|
1133 pslice.slice = targ
|
|
1134
|
|
1135 try:
|
|
1136 # Call the grammar rule with our special slice object
|
|
1137 p.callable(pslice)
|
|
1138 symstack.append(sym)
|
|
1139 state = goto[statestack[-1]][pname]
|
|
1140 statestack.append(state)
|
|
1141 except SyntaxError:
|
|
1142 # If an error was set. Enter error recovery state
|
|
1143 lookaheadstack.append(lookahead)
|
|
1144 symstack.pop()
|
|
1145 statestack.pop()
|
|
1146 state = statestack[-1]
|
|
1147 sym.type = 'error'
|
|
1148 lookahead = sym
|
|
1149 errorcount = error_count
|
|
1150 self.errorok = False
|
|
1151 continue
|
|
1152 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
|
1153
|
|
1154 if t == 0:
|
|
1155 n = symstack[-1]
|
|
1156 result = getattr(n, 'value', None)
|
|
1157 return result
|
|
1158
|
|
1159 if t is None:
|
|
1160
|
|
1161
|
|
1162 # We have some kind of parsing error here. To handle
|
|
1163 # this, we are going to push the current token onto
|
|
1164 # the tokenstack and replace it with an 'error' token.
|
|
1165 # If there are any synchronization rules, they may
|
|
1166 # catch it.
|
|
1167 #
|
|
1168 # In addition to pushing the error token, we call call
|
|
1169 # the user defined p_error() function if this is the
|
|
1170 # first syntax error. This function is only called if
|
|
1171 # errorcount == 0.
|
|
1172 if errorcount == 0 or self.errorok:
|
|
1173 errorcount = error_count
|
|
1174 self.errorok = False
|
|
1175 errtoken = lookahead
|
|
1176 if errtoken.type == '$end':
|
|
1177 errtoken = None # End of file!
|
|
1178 if self.errorfunc:
|
|
1179 if errtoken and not hasattr(errtoken, 'lexer'):
|
|
1180 errtoken.lexer = lexer
|
|
1181 tok = call_errorfunc(self.errorfunc, errtoken, self)
|
|
1182 if self.errorok:
|
|
1183 # User must have done some kind of panic
|
|
1184 # mode recovery on their own. The
|
|
1185 # returned token is the next lookahead
|
|
1186 lookahead = tok
|
|
1187 errtoken = None
|
|
1188 continue
|
|
1189 else:
|
|
1190 if errtoken:
|
|
1191 if hasattr(errtoken, 'lineno'):
|
|
1192 lineno = lookahead.lineno
|
|
1193 else:
|
|
1194 lineno = 0
|
|
1195 if lineno:
|
|
1196 sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type))
|
|
1197 else:
|
|
1198 sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type)
|
|
1199 else:
|
|
1200 sys.stderr.write('yacc: Parse error in input. EOF\n')
|
|
1201 return
|
|
1202
|
|
1203 else:
|
|
1204 errorcount = error_count
|
|
1205
|
|
1206 # case 1: the statestack only has 1 entry on it. If we're in this state, the
|
|
1207 # entire parse has been rolled back and we're completely hosed. The token is
|
|
1208 # discarded and we just keep going.
|
|
1209
|
|
1210 if len(statestack) <= 1 and lookahead.type != '$end':
|
|
1211 lookahead = None
|
|
1212 errtoken = None
|
|
1213 state = 0
|
|
1214 # Nuke the pushback stack
|
|
1215 del lookaheadstack[:]
|
|
1216 continue
|
|
1217
|
|
1218 # case 2: the statestack has a couple of entries on it, but we're
|
|
1219 # at the end of the file. nuke the top entry and generate an error token
|
|
1220
|
|
1221 # Start nuking entries on the stack
|
|
1222 if lookahead.type == '$end':
|
|
1223 # Whoa. We're really hosed here. Bail out
|
|
1224 return
|
|
1225
|
|
1226 if lookahead.type != 'error':
|
|
1227 sym = symstack[-1]
|
|
1228 if sym.type == 'error':
|
|
1229 # Hmmm. Error is on top of stack, we'll just nuke input
|
|
1230 # symbol and continue
|
|
1231 lookahead = None
|
|
1232 continue
|
|
1233
|
|
1234 # Create the error symbol for the first time and make it the new lookahead symbol
|
|
1235 t = YaccSymbol()
|
|
1236 t.type = 'error'
|
|
1237
|
|
1238 if hasattr(lookahead, 'lineno'):
|
|
1239 t.lineno = t.endlineno = lookahead.lineno
|
|
1240 if hasattr(lookahead, 'lexpos'):
|
|
1241 t.lexpos = t.endlexpos = lookahead.lexpos
|
|
1242 t.value = lookahead
|
|
1243 lookaheadstack.append(lookahead)
|
|
1244 lookahead = t
|
|
1245 else:
|
|
1246 sym = symstack.pop()
|
|
1247 statestack.pop()
|
|
1248 state = statestack[-1]
|
|
1249
|
|
1250 continue
|
|
1251
|
|
1252 # Call an error function here
|
|
1253 raise RuntimeError('yacc: internal parser error!!!\n')
|
|
1254
|
|
1255 #--! parseopt-notrack-end
|
|
1256
|
|
1257 # -----------------------------------------------------------------------------
|
|
1258 # === Grammar Representation ===
|
|
1259 #
|
|
1260 # The following functions, classes, and variables are used to represent and
|
|
1261 # manipulate the rules that make up a grammar.
|
|
1262 # -----------------------------------------------------------------------------
|
|
1263
|
|
1264 # regex matching identifiers
|
|
1265 _is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$')
|
|
1266
|
|
1267 # -----------------------------------------------------------------------------
|
|
1268 # class Production:
|
|
1269 #
|
|
1270 # This class stores the raw information about a single production or grammar rule.
|
|
1271 # A grammar rule refers to a specification such as this:
|
|
1272 #
|
|
1273 # expr : expr PLUS term
|
|
1274 #
|
|
1275 # Here are the basic attributes defined on all productions
|
|
1276 #
|
|
1277 # name - Name of the production. For example 'expr'
|
|
1278 # prod - A list of symbols on the right side ['expr','PLUS','term']
|
|
1279 # prec - Production precedence level
|
|
1280 # number - Production number.
|
|
1281 # func - Function that executes on reduce
|
|
1282 # file - File where production function is defined
|
|
1283 # lineno - Line number where production function is defined
|
|
1284 #
|
|
1285 # The following attributes are defined or optional.
|
|
1286 #
|
|
1287 # len - Length of the production (number of symbols on right hand side)
|
|
1288 # usyms - Set of unique symbols found in the production
|
|
1289 # -----------------------------------------------------------------------------
|
|
1290
|
|
1291 class Production(object):
|
|
1292 reduced = 0
|
|
1293 def __init__(self, number, name, prod, precedence=('right', 0), func=None, file='', line=0):
|
|
1294 self.name = name
|
|
1295 self.prod = tuple(prod)
|
|
1296 self.number = number
|
|
1297 self.func = func
|
|
1298 self.callable = None
|
|
1299 self.file = file
|
|
1300 self.line = line
|
|
1301 self.prec = precedence
|
|
1302
|
|
1303 # Internal settings used during table construction
|
|
1304
|
|
1305 self.len = len(self.prod) # Length of the production
|
|
1306
|
|
1307 # Create a list of unique production symbols used in the production
|
|
1308 self.usyms = []
|
|
1309 for s in self.prod:
|
|
1310 if s not in self.usyms:
|
|
1311 self.usyms.append(s)
|
|
1312
|
|
1313 # List of all LR items for the production
|
|
1314 self.lr_items = []
|
|
1315 self.lr_next = None
|
|
1316
|
|
1317 # Create a string representation
|
|
1318 if self.prod:
|
|
1319 self.str = '%s -> %s' % (self.name, ' '.join(self.prod))
|
|
1320 else:
|
|
1321 self.str = '%s -> <empty>' % self.name
|
|
1322
|
|
1323 def __str__(self):
|
|
1324 return self.str
|
|
1325
|
|
1326 def __repr__(self):
|
|
1327 return 'Production(' + str(self) + ')'
|
|
1328
|
|
1329 def __len__(self):
|
|
1330 return len(self.prod)
|
|
1331
|
|
1332 def __nonzero__(self):
|
|
1333 return 1
|
|
1334
|
|
1335 def __getitem__(self, index):
|
|
1336 return self.prod[index]
|
|
1337
|
|
1338 # Return the nth lr_item from the production (or None if at the end)
|
|
1339 def lr_item(self, n):
|
|
1340 if n > len(self.prod):
|
|
1341 return None
|
|
1342 p = LRItem(self, n)
|
|
1343 # Precompute the list of productions immediately following.
|
|
1344 try:
|
|
1345 p.lr_after = Prodnames[p.prod[n+1]]
|
|
1346 except (IndexError, KeyError):
|
|
1347 p.lr_after = []
|
|
1348 try:
|
|
1349 p.lr_before = p.prod[n-1]
|
|
1350 except IndexError:
|
|
1351 p.lr_before = None
|
|
1352 return p
|
|
1353
|
|
1354 # Bind the production function name to a callable
|
|
1355 def bind(self, pdict):
|
|
1356 if self.func:
|
|
1357 self.callable = pdict[self.func]
|
|
1358
|
|
1359 # This class serves as a minimal standin for Production objects when
|
|
1360 # reading table data from files. It only contains information
|
|
1361 # actually used by the LR parsing engine, plus some additional
|
|
1362 # debugging information.
|
|
1363 class MiniProduction(object):
|
|
1364 def __init__(self, str, name, len, func, file, line):
|
|
1365 self.name = name
|
|
1366 self.len = len
|
|
1367 self.func = func
|
|
1368 self.callable = None
|
|
1369 self.file = file
|
|
1370 self.line = line
|
|
1371 self.str = str
|
|
1372
|
|
1373 def __str__(self):
|
|
1374 return self.str
|
|
1375
|
|
1376 def __repr__(self):
|
|
1377 return 'MiniProduction(%s)' % self.str
|
|
1378
|
|
1379 # Bind the production function name to a callable
|
|
1380 def bind(self, pdict):
|
|
1381 if self.func:
|
|
1382 self.callable = pdict[self.func]
|
|
1383
|
|
1384
|
|
1385 # -----------------------------------------------------------------------------
|
|
1386 # class LRItem
|
|
1387 #
|
|
1388 # This class represents a specific stage of parsing a production rule. For
|
|
1389 # example:
|
|
1390 #
|
|
1391 # expr : expr . PLUS term
|
|
1392 #
|
|
1393 # In the above, the "." represents the current location of the parse. Here
|
|
1394 # basic attributes:
|
|
1395 #
|
|
1396 # name - Name of the production. For example 'expr'
|
|
1397 # prod - A list of symbols on the right side ['expr','.', 'PLUS','term']
|
|
1398 # number - Production number.
|
|
1399 #
|
|
1400 # lr_next Next LR item. Example, if we are ' expr -> expr . PLUS term'
|
|
1401 # then lr_next refers to 'expr -> expr PLUS . term'
|
|
1402 # lr_index - LR item index (location of the ".") in the prod list.
|
|
1403 # lookaheads - LALR lookahead symbols for this item
|
|
1404 # len - Length of the production (number of symbols on right hand side)
|
|
1405 # lr_after - List of all productions that immediately follow
|
|
1406 # lr_before - Grammar symbol immediately before
|
|
1407 # -----------------------------------------------------------------------------
|
|
1408
|
|
1409 class LRItem(object):
|
|
1410 def __init__(self, p, n):
|
|
1411 self.name = p.name
|
|
1412 self.prod = list(p.prod)
|
|
1413 self.number = p.number
|
|
1414 self.lr_index = n
|
|
1415 self.lookaheads = {}
|
|
1416 self.prod.insert(n, '.')
|
|
1417 self.prod = tuple(self.prod)
|
|
1418 self.len = len(self.prod)
|
|
1419 self.usyms = p.usyms
|
|
1420
|
|
1421 def __str__(self):
|
|
1422 if self.prod:
|
|
1423 s = '%s -> %s' % (self.name, ' '.join(self.prod))
|
|
1424 else:
|
|
1425 s = '%s -> <empty>' % self.name
|
|
1426 return s
|
|
1427
|
|
1428 def __repr__(self):
|
|
1429 return 'LRItem(' + str(self) + ')'
|
|
1430
|
|
1431 # -----------------------------------------------------------------------------
|
|
1432 # rightmost_terminal()
|
|
1433 #
|
|
1434 # Return the rightmost terminal from a list of symbols. Used in add_production()
|
|
1435 # -----------------------------------------------------------------------------
|
|
1436 def rightmost_terminal(symbols, terminals):
|
|
1437 i = len(symbols) - 1
|
|
1438 while i >= 0:
|
|
1439 if symbols[i] in terminals:
|
|
1440 return symbols[i]
|
|
1441 i -= 1
|
|
1442 return None
|
|
1443
|
|
1444 # -----------------------------------------------------------------------------
|
|
1445 # === GRAMMAR CLASS ===
|
|
1446 #
|
|
1447 # The following class represents the contents of the specified grammar along
|
|
1448 # with various computed properties such as first sets, follow sets, LR items, etc.
|
|
1449 # This data is used for critical parts of the table generation process later.
|
|
1450 # -----------------------------------------------------------------------------
|
|
1451
|
|
1452 class GrammarError(YaccError):
|
|
1453 pass
|
|
1454
|
|
1455 class Grammar(object):
|
|
1456 def __init__(self, terminals):
|
|
1457 self.Productions = [None] # A list of all of the productions. The first
|
|
1458 # entry is always reserved for the purpose of
|
|
1459 # building an augmented grammar
|
|
1460
|
|
1461 self.Prodnames = {} # A dictionary mapping the names of nonterminals to a list of all
|
|
1462 # productions of that nonterminal.
|
|
1463
|
|
1464 self.Prodmap = {} # A dictionary that is only used to detect duplicate
|
|
1465 # productions.
|
|
1466
|
|
1467 self.Terminals = {} # A dictionary mapping the names of terminal symbols to a
|
|
1468 # list of the rules where they are used.
|
|
1469
|
|
1470 for term in terminals:
|
|
1471 self.Terminals[term] = []
|
|
1472
|
|
1473 self.Terminals['error'] = []
|
|
1474
|
|
1475 self.Nonterminals = {} # A dictionary mapping names of nonterminals to a list
|
|
1476 # of rule numbers where they are used.
|
|
1477
|
|
1478 self.First = {} # A dictionary of precomputed FIRST(x) symbols
|
|
1479
|
|
1480 self.Follow = {} # A dictionary of precomputed FOLLOW(x) symbols
|
|
1481
|
|
1482 self.Precedence = {} # Precedence rules for each terminal. Contains tuples of the
|
|
1483 # form ('right',level) or ('nonassoc', level) or ('left',level)
|
|
1484
|
|
1485 self.UsedPrecedence = set() # Precedence rules that were actually used by the grammer.
|
|
1486 # This is only used to provide error checking and to generate
|
|
1487 # a warning about unused precedence rules.
|
|
1488
|
|
1489 self.Start = None # Starting symbol for the grammar
|
|
1490
|
|
1491
|
|
1492 def __len__(self):
|
|
1493 return len(self.Productions)
|
|
1494
|
|
1495 def __getitem__(self, index):
|
|
1496 return self.Productions[index]
|
|
1497
|
|
1498 # -----------------------------------------------------------------------------
|
|
1499 # set_precedence()
|
|
1500 #
|
|
1501 # Sets the precedence for a given terminal. assoc is the associativity such as
|
|
1502 # 'left','right', or 'nonassoc'. level is a numeric level.
|
|
1503 #
|
|
1504 # -----------------------------------------------------------------------------
|
|
1505
|
|
1506 def set_precedence(self, term, assoc, level):
|
|
1507 assert self.Productions == [None], 'Must call set_precedence() before add_production()'
|
|
1508 if term in self.Precedence:
|
|
1509 raise GrammarError('Precedence already specified for terminal %r' % term)
|
|
1510 if assoc not in ['left', 'right', 'nonassoc']:
|
|
1511 raise GrammarError("Associativity must be one of 'left','right', or 'nonassoc'")
|
|
1512 self.Precedence[term] = (assoc, level)
|
|
1513
|
|
1514 # -----------------------------------------------------------------------------
|
|
1515 # add_production()
|
|
1516 #
|
|
1517 # Given an action function, this function assembles a production rule and
|
|
1518 # computes its precedence level.
|
|
1519 #
|
|
1520 # The production rule is supplied as a list of symbols. For example,
|
|
1521 # a rule such as 'expr : expr PLUS term' has a production name of 'expr' and
|
|
1522 # symbols ['expr','PLUS','term'].
|
|
1523 #
|
|
1524 # Precedence is determined by the precedence of the right-most non-terminal
|
|
1525 # or the precedence of a terminal specified by %prec.
|
|
1526 #
|
|
1527 # A variety of error checks are performed to make sure production symbols
|
|
1528 # are valid and that %prec is used correctly.
|
|
1529 # -----------------------------------------------------------------------------
|
|
1530
|
|
1531 def add_production(self, prodname, syms, func=None, file='', line=0):
|
|
1532
|
|
1533 if prodname in self.Terminals:
|
|
1534 raise GrammarError('%s:%d: Illegal rule name %r. Already defined as a token' % (file, line, prodname))
|
|
1535 if prodname == 'error':
|
|
1536 raise GrammarError('%s:%d: Illegal rule name %r. error is a reserved word' % (file, line, prodname))
|
|
1537 if not _is_identifier.match(prodname):
|
|
1538 raise GrammarError('%s:%d: Illegal rule name %r' % (file, line, prodname))
|
|
1539
|
|
1540 # Look for literal tokens
|
|
1541 for n, s in enumerate(syms):
|
|
1542 if s[0] in "'\"":
|
|
1543 try:
|
|
1544 c = eval(s)
|
|
1545 if (len(c) > 1):
|
|
1546 raise GrammarError('%s:%d: Literal token %s in rule %r may only be a single character' %
|
|
1547 (file, line, s, prodname))
|
|
1548 if c not in self.Terminals:
|
|
1549 self.Terminals[c] = []
|
|
1550 syms[n] = c
|
|
1551 continue
|
|
1552 except SyntaxError:
|
|
1553 pass
|
|
1554 if not _is_identifier.match(s) and s != '%prec':
|
|
1555 raise GrammarError('%s:%d: Illegal name %r in rule %r' % (file, line, s, prodname))
|
|
1556
|
|
1557 # Determine the precedence level
|
|
1558 if '%prec' in syms:
|
|
1559 if syms[-1] == '%prec':
|
|
1560 raise GrammarError('%s:%d: Syntax error. Nothing follows %%prec' % (file, line))
|
|
1561 if syms[-2] != '%prec':
|
|
1562 raise GrammarError('%s:%d: Syntax error. %%prec can only appear at the end of a grammar rule' %
|
|
1563 (file, line))
|
|
1564 precname = syms[-1]
|
|
1565 prodprec = self.Precedence.get(precname)
|
|
1566 if not prodprec:
|
|
1567 raise GrammarError('%s:%d: Nothing known about the precedence of %r' % (file, line, precname))
|
|
1568 else:
|
|
1569 self.UsedPrecedence.add(precname)
|
|
1570 del syms[-2:] # Drop %prec from the rule
|
|
1571 else:
|
|
1572 # If no %prec, precedence is determined by the rightmost terminal symbol
|
|
1573 precname = rightmost_terminal(syms, self.Terminals)
|
|
1574 prodprec = self.Precedence.get(precname, ('right', 0))
|
|
1575
|
|
1576 # See if the rule is already in the rulemap
|
|
1577 map = '%s -> %s' % (prodname, syms)
|
|
1578 if map in self.Prodmap:
|
|
1579 m = self.Prodmap[map]
|
|
1580 raise GrammarError('%s:%d: Duplicate rule %s. ' % (file, line, m) +
|
|
1581 'Previous definition at %s:%d' % (m.file, m.line))
|
|
1582
|
|
1583 # From this point on, everything is valid. Create a new Production instance
|
|
1584 pnumber = len(self.Productions)
|
|
1585 if prodname not in self.Nonterminals:
|
|
1586 self.Nonterminals[prodname] = []
|
|
1587
|
|
1588 # Add the production number to Terminals and Nonterminals
|
|
1589 for t in syms:
|
|
1590 if t in self.Terminals:
|
|
1591 self.Terminals[t].append(pnumber)
|
|
1592 else:
|
|
1593 if t not in self.Nonterminals:
|
|
1594 self.Nonterminals[t] = []
|
|
1595 self.Nonterminals[t].append(pnumber)
|
|
1596
|
|
1597 # Create a production and add it to the list of productions
|
|
1598 p = Production(pnumber, prodname, syms, prodprec, func, file, line)
|
|
1599 self.Productions.append(p)
|
|
1600 self.Prodmap[map] = p
|
|
1601
|
|
1602 # Add to the global productions list
|
|
1603 try:
|
|
1604 self.Prodnames[prodname].append(p)
|
|
1605 except KeyError:
|
|
1606 self.Prodnames[prodname] = [p]
|
|
1607
|
|
1608 # -----------------------------------------------------------------------------
|
|
1609 # set_start()
|
|
1610 #
|
|
1611 # Sets the starting symbol and creates the augmented grammar. Production
|
|
1612 # rule 0 is S' -> start where start is the start symbol.
|
|
1613 # -----------------------------------------------------------------------------
|
|
1614
|
|
1615 def set_start(self, start=None):
|
|
1616 if not start:
|
|
1617 start = self.Productions[1].name
|
|
1618 if start not in self.Nonterminals:
|
|
1619 raise GrammarError('start symbol %s undefined' % start)
|
|
1620 self.Productions[0] = Production(0, "S'", [start])
|
|
1621 self.Nonterminals[start].append(0)
|
|
1622 self.Start = start
|
|
1623
|
|
1624 # -----------------------------------------------------------------------------
|
|
1625 # find_unreachable()
|
|
1626 #
|
|
1627 # Find all of the nonterminal symbols that can't be reached from the starting
|
|
1628 # symbol. Returns a list of nonterminals that can't be reached.
|
|
1629 # -----------------------------------------------------------------------------
|
|
1630
|
|
1631 def find_unreachable(self):
|
|
1632
|
|
1633 # Mark all symbols that are reachable from a symbol s
|
|
1634 def mark_reachable_from(s):
|
|
1635 if s in reachable:
|
|
1636 return
|
|
1637 reachable.add(s)
|
|
1638 for p in self.Prodnames.get(s, []):
|
|
1639 for r in p.prod:
|
|
1640 mark_reachable_from(r)
|
|
1641
|
|
1642 reachable = set()
|
|
1643 mark_reachable_from(self.Productions[0].prod[0])
|
|
1644 return [s for s in self.Nonterminals if s not in reachable]
|
|
1645
|
|
1646 # -----------------------------------------------------------------------------
|
|
1647 # infinite_cycles()
|
|
1648 #
|
|
1649 # This function looks at the various parsing rules and tries to detect
|
|
1650 # infinite recursion cycles (grammar rules where there is no possible way
|
|
1651 # to derive a string of only terminals).
|
|
1652 # -----------------------------------------------------------------------------
|
|
1653
|
|
1654 def infinite_cycles(self):
|
|
1655 terminates = {}
|
|
1656
|
|
1657 # Terminals:
|
|
1658 for t in self.Terminals:
|
|
1659 terminates[t] = True
|
|
1660
|
|
1661 terminates['$end'] = True
|
|
1662
|
|
1663 # Nonterminals:
|
|
1664
|
|
1665 # Initialize to false:
|
|
1666 for n in self.Nonterminals:
|
|
1667 terminates[n] = False
|
|
1668
|
|
1669 # Then propagate termination until no change:
|
|
1670 while True:
|
|
1671 some_change = False
|
|
1672 for (n, pl) in self.Prodnames.items():
|
|
1673 # Nonterminal n terminates iff any of its productions terminates.
|
|
1674 for p in pl:
|
|
1675 # Production p terminates iff all of its rhs symbols terminate.
|
|
1676 for s in p.prod:
|
|
1677 if not terminates[s]:
|
|
1678 # The symbol s does not terminate,
|
|
1679 # so production p does not terminate.
|
|
1680 p_terminates = False
|
|
1681 break
|
|
1682 else:
|
|
1683 # didn't break from the loop,
|
|
1684 # so every symbol s terminates
|
|
1685 # so production p terminates.
|
|
1686 p_terminates = True
|
|
1687
|
|
1688 if p_terminates:
|
|
1689 # symbol n terminates!
|
|
1690 if not terminates[n]:
|
|
1691 terminates[n] = True
|
|
1692 some_change = True
|
|
1693 # Don't need to consider any more productions for this n.
|
|
1694 break
|
|
1695
|
|
1696 if not some_change:
|
|
1697 break
|
|
1698
|
|
1699 infinite = []
|
|
1700 for (s, term) in terminates.items():
|
|
1701 if not term:
|
|
1702 if s not in self.Prodnames and s not in self.Terminals and s != 'error':
|
|
1703 # s is used-but-not-defined, and we've already warned of that,
|
|
1704 # so it would be overkill to say that it's also non-terminating.
|
|
1705 pass
|
|
1706 else:
|
|
1707 infinite.append(s)
|
|
1708
|
|
1709 return infinite
|
|
1710
|
|
1711 # -----------------------------------------------------------------------------
|
|
1712 # undefined_symbols()
|
|
1713 #
|
|
1714 # Find all symbols that were used the grammar, but not defined as tokens or
|
|
1715 # grammar rules. Returns a list of tuples (sym, prod) where sym in the symbol
|
|
1716 # and prod is the production where the symbol was used.
|
|
1717 # -----------------------------------------------------------------------------
|
|
1718 def undefined_symbols(self):
|
|
1719 result = []
|
|
1720 for p in self.Productions:
|
|
1721 if not p:
|
|
1722 continue
|
|
1723
|
|
1724 for s in p.prod:
|
|
1725 if s not in self.Prodnames and s not in self.Terminals and s != 'error':
|
|
1726 result.append((s, p))
|
|
1727 return result
|
|
1728
|
|
1729 # -----------------------------------------------------------------------------
|
|
1730 # unused_terminals()
|
|
1731 #
|
|
1732 # Find all terminals that were defined, but not used by the grammar. Returns
|
|
1733 # a list of all symbols.
|
|
1734 # -----------------------------------------------------------------------------
|
|
1735 def unused_terminals(self):
|
|
1736 unused_tok = []
|
|
1737 for s, v in self.Terminals.items():
|
|
1738 if s != 'error' and not v:
|
|
1739 unused_tok.append(s)
|
|
1740
|
|
1741 return unused_tok
|
|
1742
|
|
1743 # ------------------------------------------------------------------------------
|
|
1744 # unused_rules()
|
|
1745 #
|
|
1746 # Find all grammar rules that were defined, but not used (maybe not reachable)
|
|
1747 # Returns a list of productions.
|
|
1748 # ------------------------------------------------------------------------------
|
|
1749
|
|
1750 def unused_rules(self):
|
|
1751 unused_prod = []
|
|
1752 for s, v in self.Nonterminals.items():
|
|
1753 if not v:
|
|
1754 p = self.Prodnames[s][0]
|
|
1755 unused_prod.append(p)
|
|
1756 return unused_prod
|
|
1757
|
|
1758 # -----------------------------------------------------------------------------
|
|
1759 # unused_precedence()
|
|
1760 #
|
|
1761 # Returns a list of tuples (term,precedence) corresponding to precedence
|
|
1762 # rules that were never used by the grammar. term is the name of the terminal
|
|
1763 # on which precedence was applied and precedence is a string such as 'left' or
|
|
1764 # 'right' corresponding to the type of precedence.
|
|
1765 # -----------------------------------------------------------------------------
|
|
1766
|
|
1767 def unused_precedence(self):
|
|
1768 unused = []
|
|
1769 for termname in self.Precedence:
|
|
1770 if not (termname in self.Terminals or termname in self.UsedPrecedence):
|
|
1771 unused.append((termname, self.Precedence[termname][0]))
|
|
1772
|
|
1773 return unused
|
|
1774
|
|
1775 # -------------------------------------------------------------------------
|
|
1776 # _first()
|
|
1777 #
|
|
1778 # Compute the value of FIRST1(beta) where beta is a tuple of symbols.
|
|
1779 #
|
|
1780 # During execution of compute_first1, the result may be incomplete.
|
|
1781 # Afterward (e.g., when called from compute_follow()), it will be complete.
|
|
1782 # -------------------------------------------------------------------------
|
|
1783 def _first(self, beta):
|
|
1784
|
|
1785 # We are computing First(x1,x2,x3,...,xn)
|
|
1786 result = []
|
|
1787 for x in beta:
|
|
1788 x_produces_empty = False
|
|
1789
|
|
1790 # Add all the non-<empty> symbols of First[x] to the result.
|
|
1791 for f in self.First[x]:
|
|
1792 if f == '<empty>':
|
|
1793 x_produces_empty = True
|
|
1794 else:
|
|
1795 if f not in result:
|
|
1796 result.append(f)
|
|
1797
|
|
1798 if x_produces_empty:
|
|
1799 # We have to consider the next x in beta,
|
|
1800 # i.e. stay in the loop.
|
|
1801 pass
|
|
1802 else:
|
|
1803 # We don't have to consider any further symbols in beta.
|
|
1804 break
|
|
1805 else:
|
|
1806 # There was no 'break' from the loop,
|
|
1807 # so x_produces_empty was true for all x in beta,
|
|
1808 # so beta produces empty as well.
|
|
1809 result.append('<empty>')
|
|
1810
|
|
1811 return result
|
|
1812
|
|
1813 # -------------------------------------------------------------------------
|
|
1814 # compute_first()
|
|
1815 #
|
|
1816 # Compute the value of FIRST1(X) for all symbols
|
|
1817 # -------------------------------------------------------------------------
|
|
1818 def compute_first(self):
|
|
1819 if self.First:
|
|
1820 return self.First
|
|
1821
|
|
1822 # Terminals:
|
|
1823 for t in self.Terminals:
|
|
1824 self.First[t] = [t]
|
|
1825
|
|
1826 self.First['$end'] = ['$end']
|
|
1827
|
|
1828 # Nonterminals:
|
|
1829
|
|
1830 # Initialize to the empty set:
|
|
1831 for n in self.Nonterminals:
|
|
1832 self.First[n] = []
|
|
1833
|
|
1834 # Then propagate symbols until no change:
|
|
1835 while True:
|
|
1836 some_change = False
|
|
1837 for n in self.Nonterminals:
|
|
1838 for p in self.Prodnames[n]:
|
|
1839 for f in self._first(p.prod):
|
|
1840 if f not in self.First[n]:
|
|
1841 self.First[n].append(f)
|
|
1842 some_change = True
|
|
1843 if not some_change:
|
|
1844 break
|
|
1845
|
|
1846 return self.First
|
|
1847
|
|
1848 # ---------------------------------------------------------------------
|
|
1849 # compute_follow()
|
|
1850 #
|
|
1851 # Computes all of the follow sets for every non-terminal symbol. The
|
|
1852 # follow set is the set of all symbols that might follow a given
|
|
1853 # non-terminal. See the Dragon book, 2nd Ed. p. 189.
|
|
1854 # ---------------------------------------------------------------------
|
|
1855 def compute_follow(self, start=None):
|
|
1856 # If already computed, return the result
|
|
1857 if self.Follow:
|
|
1858 return self.Follow
|
|
1859
|
|
1860 # If first sets not computed yet, do that first.
|
|
1861 if not self.First:
|
|
1862 self.compute_first()
|
|
1863
|
|
1864 # Add '$end' to the follow list of the start symbol
|
|
1865 for k in self.Nonterminals:
|
|
1866 self.Follow[k] = []
|
|
1867
|
|
1868 if not start:
|
|
1869 start = self.Productions[1].name
|
|
1870
|
|
1871 self.Follow[start] = ['$end']
|
|
1872
|
|
1873 while True:
|
|
1874 didadd = False
|
|
1875 for p in self.Productions[1:]:
|
|
1876 # Here is the production set
|
|
1877 for i, B in enumerate(p.prod):
|
|
1878 if B in self.Nonterminals:
|
|
1879 # Okay. We got a non-terminal in a production
|
|
1880 fst = self._first(p.prod[i+1:])
|
|
1881 hasempty = False
|
|
1882 for f in fst:
|
|
1883 if f != '<empty>' and f not in self.Follow[B]:
|
|
1884 self.Follow[B].append(f)
|
|
1885 didadd = True
|
|
1886 if f == '<empty>':
|
|
1887 hasempty = True
|
|
1888 if hasempty or i == (len(p.prod)-1):
|
|
1889 # Add elements of follow(a) to follow(b)
|
|
1890 for f in self.Follow[p.name]:
|
|
1891 if f not in self.Follow[B]:
|
|
1892 self.Follow[B].append(f)
|
|
1893 didadd = True
|
|
1894 if not didadd:
|
|
1895 break
|
|
1896 return self.Follow
|
|
1897
|
|
1898
|
|
1899 # -----------------------------------------------------------------------------
|
|
1900 # build_lritems()
|
|
1901 #
|
|
1902 # This function walks the list of productions and builds a complete set of the
|
|
1903 # LR items. The LR items are stored in two ways: First, they are uniquely
|
|
1904 # numbered and placed in the list _lritems. Second, a linked list of LR items
|
|
1905 # is built for each production. For example:
|
|
1906 #
|
|
1907 # E -> E PLUS E
|
|
1908 #
|
|
1909 # Creates the list
|
|
1910 #
|
|
1911 # [E -> . E PLUS E, E -> E . PLUS E, E -> E PLUS . E, E -> E PLUS E . ]
|
|
1912 # -----------------------------------------------------------------------------
|
|
1913
|
|
1914 def build_lritems(self):
|
|
1915 for p in self.Productions:
|
|
1916 lastlri = p
|
|
1917 i = 0
|
|
1918 lr_items = []
|
|
1919 while True:
|
|
1920 if i > len(p):
|
|
1921 lri = None
|
|
1922 else:
|
|
1923 lri = LRItem(p, i)
|
|
1924 # Precompute the list of productions immediately following
|
|
1925 try:
|
|
1926 lri.lr_after = self.Prodnames[lri.prod[i+1]]
|
|
1927 except (IndexError, KeyError):
|
|
1928 lri.lr_after = []
|
|
1929 try:
|
|
1930 lri.lr_before = lri.prod[i-1]
|
|
1931 except IndexError:
|
|
1932 lri.lr_before = None
|
|
1933
|
|
1934 lastlri.lr_next = lri
|
|
1935 if not lri:
|
|
1936 break
|
|
1937 lr_items.append(lri)
|
|
1938 lastlri = lri
|
|
1939 i += 1
|
|
1940 p.lr_items = lr_items
|
|
1941
|
|
1942 # -----------------------------------------------------------------------------
|
|
1943 # == Class LRTable ==
|
|
1944 #
|
|
1945 # This basic class represents a basic table of LR parsing information.
|
|
1946 # Methods for generating the tables are not defined here. They are defined
|
|
1947 # in the derived class LRGeneratedTable.
|
|
1948 # -----------------------------------------------------------------------------
|
|
1949
|
|
1950 class VersionError(YaccError):
|
|
1951 pass
|
|
1952
|
|
1953 class LRTable(object):
|
|
1954 def __init__(self):
|
|
1955 self.lr_action = None
|
|
1956 self.lr_goto = None
|
|
1957 self.lr_productions = None
|
|
1958 self.lr_method = None
|
|
1959
|
|
1960 def read_table(self, module):
|
|
1961 if isinstance(module, types.ModuleType):
|
|
1962 parsetab = module
|
|
1963 else:
|
|
1964 exec('import %s' % module)
|
|
1965 parsetab = sys.modules[module]
|
|
1966
|
|
1967 if parsetab._tabversion != __tabversion__:
|
|
1968 raise VersionError('yacc table file version is out of date')
|
|
1969
|
|
1970 self.lr_action = parsetab._lr_action
|
|
1971 self.lr_goto = parsetab._lr_goto
|
|
1972
|
|
1973 self.lr_productions = []
|
|
1974 for p in parsetab._lr_productions:
|
|
1975 self.lr_productions.append(MiniProduction(*p))
|
|
1976
|
|
1977 self.lr_method = parsetab._lr_method
|
|
1978 return parsetab._lr_signature
|
|
1979
|
|
1980 def read_pickle(self, filename):
|
|
1981 try:
|
|
1982 import cPickle as pickle
|
|
1983 except ImportError:
|
|
1984 import pickle
|
|
1985
|
|
1986 if not os.path.exists(filename):
|
|
1987 raise ImportError
|
|
1988
|
|
1989 in_f = open(filename, 'rb')
|
|
1990
|
|
1991 tabversion = pickle.load(in_f)
|
|
1992 if tabversion != __tabversion__:
|
|
1993 raise VersionError('yacc table file version is out of date')
|
|
1994 self.lr_method = pickle.load(in_f)
|
|
1995 signature = pickle.load(in_f)
|
|
1996 self.lr_action = pickle.load(in_f)
|
|
1997 self.lr_goto = pickle.load(in_f)
|
|
1998 productions = pickle.load(in_f)
|
|
1999
|
|
2000 self.lr_productions = []
|
|
2001 for p in productions:
|
|
2002 self.lr_productions.append(MiniProduction(*p))
|
|
2003
|
|
2004 in_f.close()
|
|
2005 return signature
|
|
2006
|
|
2007 # Bind all production function names to callable objects in pdict
|
|
2008 def bind_callables(self, pdict):
|
|
2009 for p in self.lr_productions:
|
|
2010 p.bind(pdict)
|
|
2011
|
|
2012
|
|
2013 # -----------------------------------------------------------------------------
|
|
2014 # === LR Generator ===
|
|
2015 #
|
|
2016 # The following classes and functions are used to generate LR parsing tables on
|
|
2017 # a grammar.
|
|
2018 # -----------------------------------------------------------------------------
|
|
2019
|
|
2020 # -----------------------------------------------------------------------------
|
|
2021 # digraph()
|
|
2022 # traverse()
|
|
2023 #
|
|
2024 # The following two functions are used to compute set valued functions
|
|
2025 # of the form:
|
|
2026 #
|
|
2027 # F(x) = F'(x) U U{F(y) | x R y}
|
|
2028 #
|
|
2029 # This is used to compute the values of Read() sets as well as FOLLOW sets
|
|
2030 # in LALR(1) generation.
|
|
2031 #
|
|
2032 # Inputs: X - An input set
|
|
2033 # R - A relation
|
|
2034 # FP - Set-valued function
|
|
2035 # ------------------------------------------------------------------------------
|
|
2036
|
|
2037 def digraph(X, R, FP):
|
|
2038 N = {}
|
|
2039 for x in X:
|
|
2040 N[x] = 0
|
|
2041 stack = []
|
|
2042 F = {}
|
|
2043 for x in X:
|
|
2044 if N[x] == 0:
|
|
2045 traverse(x, N, stack, F, X, R, FP)
|
|
2046 return F
|
|
2047
|
|
2048 def traverse(x, N, stack, F, X, R, FP):
|
|
2049 stack.append(x)
|
|
2050 d = len(stack)
|
|
2051 N[x] = d
|
|
2052 F[x] = FP(x) # F(X) <- F'(x)
|
|
2053
|
|
2054 rel = R(x) # Get y's related to x
|
|
2055 for y in rel:
|
|
2056 if N[y] == 0:
|
|
2057 traverse(y, N, stack, F, X, R, FP)
|
|
2058 N[x] = min(N[x], N[y])
|
|
2059 for a in F.get(y, []):
|
|
2060 if a not in F[x]:
|
|
2061 F[x].append(a)
|
|
2062 if N[x] == d:
|
|
2063 N[stack[-1]] = MAXINT
|
|
2064 F[stack[-1]] = F[x]
|
|
2065 element = stack.pop()
|
|
2066 while element != x:
|
|
2067 N[stack[-1]] = MAXINT
|
|
2068 F[stack[-1]] = F[x]
|
|
2069 element = stack.pop()
|
|
2070
|
|
2071 class LALRError(YaccError):
|
|
2072 pass
|
|
2073
|
|
2074 # -----------------------------------------------------------------------------
|
|
2075 # == LRGeneratedTable ==
|
|
2076 #
|
|
2077 # This class implements the LR table generation algorithm. There are no
|
|
2078 # public methods except for write()
|
|
2079 # -----------------------------------------------------------------------------
|
|
2080
|
|
2081 class LRGeneratedTable(LRTable):
|
|
2082 def __init__(self, grammar, method='LALR', log=None):
|
|
2083 if method not in ['SLR', 'LALR']:
|
|
2084 raise LALRError('Unsupported method %s' % method)
|
|
2085
|
|
2086 self.grammar = grammar
|
|
2087 self.lr_method = method
|
|
2088
|
|
2089 # Set up the logger
|
|
2090 if not log:
|
|
2091 log = NullLogger()
|
|
2092 self.log = log
|
|
2093
|
|
2094 # Internal attributes
|
|
2095 self.lr_action = {} # Action table
|
|
2096 self.lr_goto = {} # Goto table
|
|
2097 self.lr_productions = grammar.Productions # Copy of grammar Production array
|
|
2098 self.lr_goto_cache = {} # Cache of computed gotos
|
|
2099 self.lr0_cidhash = {} # Cache of closures
|
|
2100
|
|
2101 self._add_count = 0 # Internal counter used to detect cycles
|
|
2102
|
|
2103 # Diagonistic information filled in by the table generator
|
|
2104 self.sr_conflict = 0
|
|
2105 self.rr_conflict = 0
|
|
2106 self.conflicts = [] # List of conflicts
|
|
2107
|
|
2108 self.sr_conflicts = []
|
|
2109 self.rr_conflicts = []
|
|
2110
|
|
2111 # Build the tables
|
|
2112 self.grammar.build_lritems()
|
|
2113 self.grammar.compute_first()
|
|
2114 self.grammar.compute_follow()
|
|
2115 self.lr_parse_table()
|
|
2116
|
|
2117 # Compute the LR(0) closure operation on I, where I is a set of LR(0) items.
|
|
2118
|
|
2119 def lr0_closure(self, I):
|
|
2120 self._add_count += 1
|
|
2121
|
|
2122 # Add everything in I to J
|
|
2123 J = I[:]
|
|
2124 didadd = True
|
|
2125 while didadd:
|
|
2126 didadd = False
|
|
2127 for j in J:
|
|
2128 for x in j.lr_after:
|
|
2129 if getattr(x, 'lr0_added', 0) == self._add_count:
|
|
2130 continue
|
|
2131 # Add B --> .G to J
|
|
2132 J.append(x.lr_next)
|
|
2133 x.lr0_added = self._add_count
|
|
2134 didadd = True
|
|
2135
|
|
2136 return J
|
|
2137
|
|
2138 # Compute the LR(0) goto function goto(I,X) where I is a set
|
|
2139 # of LR(0) items and X is a grammar symbol. This function is written
|
|
2140 # in a way that guarantees uniqueness of the generated goto sets
|
|
2141 # (i.e. the same goto set will never be returned as two different Python
|
|
2142 # objects). With uniqueness, we can later do fast set comparisons using
|
|
2143 # id(obj) instead of element-wise comparison.
|
|
2144
|
|
2145 def lr0_goto(self, I, x):
|
|
2146 # First we look for a previously cached entry
|
|
2147 g = self.lr_goto_cache.get((id(I), x))
|
|
2148 if g:
|
|
2149 return g
|
|
2150
|
|
2151 # Now we generate the goto set in a way that guarantees uniqueness
|
|
2152 # of the result
|
|
2153
|
|
2154 s = self.lr_goto_cache.get(x)
|
|
2155 if not s:
|
|
2156 s = {}
|
|
2157 self.lr_goto_cache[x] = s
|
|
2158
|
|
2159 gs = []
|
|
2160 for p in I:
|
|
2161 n = p.lr_next
|
|
2162 if n and n.lr_before == x:
|
|
2163 s1 = s.get(id(n))
|
|
2164 if not s1:
|
|
2165 s1 = {}
|
|
2166 s[id(n)] = s1
|
|
2167 gs.append(n)
|
|
2168 s = s1
|
|
2169 g = s.get('$end')
|
|
2170 if not g:
|
|
2171 if gs:
|
|
2172 g = self.lr0_closure(gs)
|
|
2173 s['$end'] = g
|
|
2174 else:
|
|
2175 s['$end'] = gs
|
|
2176 self.lr_goto_cache[(id(I), x)] = g
|
|
2177 return g
|
|
2178
|
|
2179 # Compute the LR(0) sets of item function
|
|
2180 def lr0_items(self):
|
|
2181 C = [self.lr0_closure([self.grammar.Productions[0].lr_next])]
|
|
2182 i = 0
|
|
2183 for I in C:
|
|
2184 self.lr0_cidhash[id(I)] = i
|
|
2185 i += 1
|
|
2186
|
|
2187 # Loop over the items in C and each grammar symbols
|
|
2188 i = 0
|
|
2189 while i < len(C):
|
|
2190 I = C[i]
|
|
2191 i += 1
|
|
2192
|
|
2193 # Collect all of the symbols that could possibly be in the goto(I,X) sets
|
|
2194 asyms = {}
|
|
2195 for ii in I:
|
|
2196 for s in ii.usyms:
|
|
2197 asyms[s] = None
|
|
2198
|
|
2199 for x in asyms:
|
|
2200 g = self.lr0_goto(I, x)
|
|
2201 if not g or id(g) in self.lr0_cidhash:
|
|
2202 continue
|
|
2203 self.lr0_cidhash[id(g)] = len(C)
|
|
2204 C.append(g)
|
|
2205
|
|
2206 return C
|
|
2207
|
|
2208 # -----------------------------------------------------------------------------
|
|
2209 # ==== LALR(1) Parsing ====
|
|
2210 #
|
|
2211 # LALR(1) parsing is almost exactly the same as SLR except that instead of
|
|
2212 # relying upon Follow() sets when performing reductions, a more selective
|
|
2213 # lookahead set that incorporates the state of the LR(0) machine is utilized.
|
|
2214 # Thus, we mainly just have to focus on calculating the lookahead sets.
|
|
2215 #
|
|
2216 # The method used here is due to DeRemer and Pennelo (1982).
|
|
2217 #
|
|
2218 # DeRemer, F. L., and T. J. Pennelo: "Efficient Computation of LALR(1)
|
|
2219 # Lookahead Sets", ACM Transactions on Programming Languages and Systems,
|
|
2220 # Vol. 4, No. 4, Oct. 1982, pp. 615-649
|
|
2221 #
|
|
2222 # Further details can also be found in:
|
|
2223 #
|
|
2224 # J. Tremblay and P. Sorenson, "The Theory and Practice of Compiler Writing",
|
|
2225 # McGraw-Hill Book Company, (1985).
|
|
2226 #
|
|
2227 # -----------------------------------------------------------------------------
|
|
2228
|
|
2229 # -----------------------------------------------------------------------------
|
|
2230 # compute_nullable_nonterminals()
|
|
2231 #
|
|
2232 # Creates a dictionary containing all of the non-terminals that might produce
|
|
2233 # an empty production.
|
|
2234 # -----------------------------------------------------------------------------
|
|
2235
|
|
2236 def compute_nullable_nonterminals(self):
|
|
2237 nullable = set()
|
|
2238 num_nullable = 0
|
|
2239 while True:
|
|
2240 for p in self.grammar.Productions[1:]:
|
|
2241 if p.len == 0:
|
|
2242 nullable.add(p.name)
|
|
2243 continue
|
|
2244 for t in p.prod:
|
|
2245 if t not in nullable:
|
|
2246 break
|
|
2247 else:
|
|
2248 nullable.add(p.name)
|
|
2249 if len(nullable) == num_nullable:
|
|
2250 break
|
|
2251 num_nullable = len(nullable)
|
|
2252 return nullable
|
|
2253
|
|
2254 # -----------------------------------------------------------------------------
|
|
2255 # find_nonterminal_trans(C)
|
|
2256 #
|
|
2257 # Given a set of LR(0) items, this functions finds all of the non-terminal
|
|
2258 # transitions. These are transitions in which a dot appears immediately before
|
|
2259 # a non-terminal. Returns a list of tuples of the form (state,N) where state
|
|
2260 # is the state number and N is the nonterminal symbol.
|
|
2261 #
|
|
2262 # The input C is the set of LR(0) items.
|
|
2263 # -----------------------------------------------------------------------------
|
|
2264
|
|
2265 def find_nonterminal_transitions(self, C):
|
|
2266 trans = []
|
|
2267 for stateno, state in enumerate(C):
|
|
2268 for p in state:
|
|
2269 if p.lr_index < p.len - 1:
|
|
2270 t = (stateno, p.prod[p.lr_index+1])
|
|
2271 if t[1] in self.grammar.Nonterminals:
|
|
2272 if t not in trans:
|
|
2273 trans.append(t)
|
|
2274 return trans
|
|
2275
|
|
2276 # -----------------------------------------------------------------------------
|
|
2277 # dr_relation()
|
|
2278 #
|
|
2279 # Computes the DR(p,A) relationships for non-terminal transitions. The input
|
|
2280 # is a tuple (state,N) where state is a number and N is a nonterminal symbol.
|
|
2281 #
|
|
2282 # Returns a list of terminals.
|
|
2283 # -----------------------------------------------------------------------------
|
|
2284
|
|
2285 def dr_relation(self, C, trans, nullable):
|
|
2286 dr_set = {}
|
|
2287 state, N = trans
|
|
2288 terms = []
|
|
2289
|
|
2290 g = self.lr0_goto(C[state], N)
|
|
2291 for p in g:
|
|
2292 if p.lr_index < p.len - 1:
|
|
2293 a = p.prod[p.lr_index+1]
|
|
2294 if a in self.grammar.Terminals:
|
|
2295 if a not in terms:
|
|
2296 terms.append(a)
|
|
2297
|
|
2298 # This extra bit is to handle the start state
|
|
2299 if state == 0 and N == self.grammar.Productions[0].prod[0]:
|
|
2300 terms.append('$end')
|
|
2301
|
|
2302 return terms
|
|
2303
|
|
2304 # -----------------------------------------------------------------------------
|
|
2305 # reads_relation()
|
|
2306 #
|
|
2307 # Computes the READS() relation (p,A) READS (t,C).
|
|
2308 # -----------------------------------------------------------------------------
|
|
2309
|
|
2310 def reads_relation(self, C, trans, empty):
|
|
2311 # Look for empty transitions
|
|
2312 rel = []
|
|
2313 state, N = trans
|
|
2314
|
|
2315 g = self.lr0_goto(C[state], N)
|
|
2316 j = self.lr0_cidhash.get(id(g), -1)
|
|
2317 for p in g:
|
|
2318 if p.lr_index < p.len - 1:
|
|
2319 a = p.prod[p.lr_index + 1]
|
|
2320 if a in empty:
|
|
2321 rel.append((j, a))
|
|
2322
|
|
2323 return rel
|
|
2324
|
|
2325 # -----------------------------------------------------------------------------
|
|
2326 # compute_lookback_includes()
|
|
2327 #
|
|
2328 # Determines the lookback and includes relations
|
|
2329 #
|
|
2330 # LOOKBACK:
|
|
2331 #
|
|
2332 # This relation is determined by running the LR(0) state machine forward.
|
|
2333 # For example, starting with a production "N : . A B C", we run it forward
|
|
2334 # to obtain "N : A B C ." We then build a relationship between this final
|
|
2335 # state and the starting state. These relationships are stored in a dictionary
|
|
2336 # lookdict.
|
|
2337 #
|
|
2338 # INCLUDES:
|
|
2339 #
|
|
2340 # Computes the INCLUDE() relation (p,A) INCLUDES (p',B).
|
|
2341 #
|
|
2342 # This relation is used to determine non-terminal transitions that occur
|
|
2343 # inside of other non-terminal transition states. (p,A) INCLUDES (p', B)
|
|
2344 # if the following holds:
|
|
2345 #
|
|
2346 # B -> LAT, where T -> epsilon and p' -L-> p
|
|
2347 #
|
|
2348 # L is essentially a prefix (which may be empty), T is a suffix that must be
|
|
2349 # able to derive an empty string. State p' must lead to state p with the string L.
|
|
2350 #
|
|
2351 # -----------------------------------------------------------------------------
|
|
2352
|
|
2353 def compute_lookback_includes(self, C, trans, nullable):
|
|
2354 lookdict = {} # Dictionary of lookback relations
|
|
2355 includedict = {} # Dictionary of include relations
|
|
2356
|
|
2357 # Make a dictionary of non-terminal transitions
|
|
2358 dtrans = {}
|
|
2359 for t in trans:
|
|
2360 dtrans[t] = 1
|
|
2361
|
|
2362 # Loop over all transitions and compute lookbacks and includes
|
|
2363 for state, N in trans:
|
|
2364 lookb = []
|
|
2365 includes = []
|
|
2366 for p in C[state]:
|
|
2367 if p.name != N:
|
|
2368 continue
|
|
2369
|
|
2370 # Okay, we have a name match. We now follow the production all the way
|
|
2371 # through the state machine until we get the . on the right hand side
|
|
2372
|
|
2373 lr_index = p.lr_index
|
|
2374 j = state
|
|
2375 while lr_index < p.len - 1:
|
|
2376 lr_index = lr_index + 1
|
|
2377 t = p.prod[lr_index]
|
|
2378
|
|
2379 # Check to see if this symbol and state are a non-terminal transition
|
|
2380 if (j, t) in dtrans:
|
|
2381 # Yes. Okay, there is some chance that this is an includes relation
|
|
2382 # the only way to know for certain is whether the rest of the
|
|
2383 # production derives empty
|
|
2384
|
|
2385 li = lr_index + 1
|
|
2386 while li < p.len:
|
|
2387 if p.prod[li] in self.grammar.Terminals:
|
|
2388 break # No forget it
|
|
2389 if p.prod[li] not in nullable:
|
|
2390 break
|
|
2391 li = li + 1
|
|
2392 else:
|
|
2393 # Appears to be a relation between (j,t) and (state,N)
|
|
2394 includes.append((j, t))
|
|
2395
|
|
2396 g = self.lr0_goto(C[j], t) # Go to next set
|
|
2397 j = self.lr0_cidhash.get(id(g), -1) # Go to next state
|
|
2398
|
|
2399 # When we get here, j is the final state, now we have to locate the production
|
|
2400 for r in C[j]:
|
|
2401 if r.name != p.name:
|
|
2402 continue
|
|
2403 if r.len != p.len:
|
|
2404 continue
|
|
2405 i = 0
|
|
2406 # This look is comparing a production ". A B C" with "A B C ."
|
|
2407 while i < r.lr_index:
|
|
2408 if r.prod[i] != p.prod[i+1]:
|
|
2409 break
|
|
2410 i = i + 1
|
|
2411 else:
|
|
2412 lookb.append((j, r))
|
|
2413 for i in includes:
|
|
2414 if i not in includedict:
|
|
2415 includedict[i] = []
|
|
2416 includedict[i].append((state, N))
|
|
2417 lookdict[(state, N)] = lookb
|
|
2418
|
|
2419 return lookdict, includedict
|
|
2420
|
|
2421 # -----------------------------------------------------------------------------
|
|
2422 # compute_read_sets()
|
|
2423 #
|
|
2424 # Given a set of LR(0) items, this function computes the read sets.
|
|
2425 #
|
|
2426 # Inputs: C = Set of LR(0) items
|
|
2427 # ntrans = Set of nonterminal transitions
|
|
2428 # nullable = Set of empty transitions
|
|
2429 #
|
|
2430 # Returns a set containing the read sets
|
|
2431 # -----------------------------------------------------------------------------
|
|
2432
|
|
2433 def compute_read_sets(self, C, ntrans, nullable):
|
|
2434 FP = lambda x: self.dr_relation(C, x, nullable)
|
|
2435 R = lambda x: self.reads_relation(C, x, nullable)
|
|
2436 F = digraph(ntrans, R, FP)
|
|
2437 return F
|
|
2438
|
|
2439 # -----------------------------------------------------------------------------
|
|
2440 # compute_follow_sets()
|
|
2441 #
|
|
2442 # Given a set of LR(0) items, a set of non-terminal transitions, a readset,
|
|
2443 # and an include set, this function computes the follow sets
|
|
2444 #
|
|
2445 # Follow(p,A) = Read(p,A) U U {Follow(p',B) | (p,A) INCLUDES (p',B)}
|
|
2446 #
|
|
2447 # Inputs:
|
|
2448 # ntrans = Set of nonterminal transitions
|
|
2449 # readsets = Readset (previously computed)
|
|
2450 # inclsets = Include sets (previously computed)
|
|
2451 #
|
|
2452 # Returns a set containing the follow sets
|
|
2453 # -----------------------------------------------------------------------------
|
|
2454
|
|
2455 def compute_follow_sets(self, ntrans, readsets, inclsets):
|
|
2456 FP = lambda x: readsets[x]
|
|
2457 R = lambda x: inclsets.get(x, [])
|
|
2458 F = digraph(ntrans, R, FP)
|
|
2459 return F
|
|
2460
|
|
2461 # -----------------------------------------------------------------------------
|
|
2462 # add_lookaheads()
|
|
2463 #
|
|
2464 # Attaches the lookahead symbols to grammar rules.
|
|
2465 #
|
|
2466 # Inputs: lookbacks - Set of lookback relations
|
|
2467 # followset - Computed follow set
|
|
2468 #
|
|
2469 # This function directly attaches the lookaheads to productions contained
|
|
2470 # in the lookbacks set
|
|
2471 # -----------------------------------------------------------------------------
|
|
2472
|
|
2473 def add_lookaheads(self, lookbacks, followset):
|
|
2474 for trans, lb in lookbacks.items():
|
|
2475 # Loop over productions in lookback
|
|
2476 for state, p in lb:
|
|
2477 if state not in p.lookaheads:
|
|
2478 p.lookaheads[state] = []
|
|
2479 f = followset.get(trans, [])
|
|
2480 for a in f:
|
|
2481 if a not in p.lookaheads[state]:
|
|
2482 p.lookaheads[state].append(a)
|
|
2483
|
|
2484 # -----------------------------------------------------------------------------
|
|
2485 # add_lalr_lookaheads()
|
|
2486 #
|
|
2487 # This function does all of the work of adding lookahead information for use
|
|
2488 # with LALR parsing
|
|
2489 # -----------------------------------------------------------------------------
|
|
2490
|
|
2491 def add_lalr_lookaheads(self, C):
|
|
2492 # Determine all of the nullable nonterminals
|
|
2493 nullable = self.compute_nullable_nonterminals()
|
|
2494
|
|
2495 # Find all non-terminal transitions
|
|
2496 trans = self.find_nonterminal_transitions(C)
|
|
2497
|
|
2498 # Compute read sets
|
|
2499 readsets = self.compute_read_sets(C, trans, nullable)
|
|
2500
|
|
2501 # Compute lookback/includes relations
|
|
2502 lookd, included = self.compute_lookback_includes(C, trans, nullable)
|
|
2503
|
|
2504 # Compute LALR FOLLOW sets
|
|
2505 followsets = self.compute_follow_sets(trans, readsets, included)
|
|
2506
|
|
2507 # Add all of the lookaheads
|
|
2508 self.add_lookaheads(lookd, followsets)
|
|
2509
|
|
2510 # -----------------------------------------------------------------------------
|
|
2511 # lr_parse_table()
|
|
2512 #
|
|
2513 # This function constructs the parse tables for SLR or LALR
|
|
2514 # -----------------------------------------------------------------------------
|
|
2515 def lr_parse_table(self):
|
|
2516 Productions = self.grammar.Productions
|
|
2517 Precedence = self.grammar.Precedence
|
|
2518 goto = self.lr_goto # Goto array
|
|
2519 action = self.lr_action # Action array
|
|
2520 log = self.log # Logger for output
|
|
2521
|
|
2522 actionp = {} # Action production array (temporary)
|
|
2523
|
|
2524 log.info('Parsing method: %s', self.lr_method)
|
|
2525
|
|
2526 # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items
|
|
2527 # This determines the number of states
|
|
2528
|
|
2529 C = self.lr0_items()
|
|
2530
|
|
2531 if self.lr_method == 'LALR':
|
|
2532 self.add_lalr_lookaheads(C)
|
|
2533
|
|
2534 # Build the parser table, state by state
|
|
2535 st = 0
|
|
2536 for I in C:
|
|
2537 # Loop over each production in I
|
|
2538 actlist = [] # List of actions
|
|
2539 st_action = {}
|
|
2540 st_actionp = {}
|
|
2541 st_goto = {}
|
|
2542 log.info('')
|
|
2543 log.info('state %d', st)
|
|
2544 log.info('')
|
|
2545 for p in I:
|
|
2546 log.info(' (%d) %s', p.number, p)
|
|
2547 log.info('')
|
|
2548
|
|
2549 for p in I:
|
|
2550 if p.len == p.lr_index + 1:
|
|
2551 if p.name == "S'":
|
|
2552 # Start symbol. Accept!
|
|
2553 st_action['$end'] = 0
|
|
2554 st_actionp['$end'] = p
|
|
2555 else:
|
|
2556 # We are at the end of a production. Reduce!
|
|
2557 if self.lr_method == 'LALR':
|
|
2558 laheads = p.lookaheads[st]
|
|
2559 else:
|
|
2560 laheads = self.grammar.Follow[p.name]
|
|
2561 for a in laheads:
|
|
2562 actlist.append((a, p, 'reduce using rule %d (%s)' % (p.number, p)))
|
|
2563 r = st_action.get(a)
|
|
2564 if r is not None:
|
|
2565 # Whoa. Have a shift/reduce or reduce/reduce conflict
|
|
2566 if r > 0:
|
|
2567 # Need to decide on shift or reduce here
|
|
2568 # By default we favor shifting. Need to add
|
|
2569 # some precedence rules here.
|
|
2570 sprec, slevel = Productions[st_actionp[a].number].prec
|
|
2571 rprec, rlevel = Precedence.get(a, ('right', 0))
|
|
2572 if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')):
|
|
2573 # We really need to reduce here.
|
|
2574 st_action[a] = -p.number
|
|
2575 st_actionp[a] = p
|
|
2576 if not slevel and not rlevel:
|
|
2577 log.info(' ! shift/reduce conflict for %s resolved as reduce', a)
|
|
2578 self.sr_conflicts.append((st, a, 'reduce'))
|
|
2579 Productions[p.number].reduced += 1
|
|
2580 elif (slevel == rlevel) and (rprec == 'nonassoc'):
|
|
2581 st_action[a] = None
|
|
2582 else:
|
|
2583 # Hmmm. Guess we'll keep the shift
|
|
2584 if not rlevel:
|
|
2585 log.info(' ! shift/reduce conflict for %s resolved as shift', a)
|
|
2586 self.sr_conflicts.append((st, a, 'shift'))
|
|
2587 elif r < 0:
|
|
2588 # Reduce/reduce conflict. In this case, we favor the rule
|
|
2589 # that was defined first in the grammar file
|
|
2590 oldp = Productions[-r]
|
|
2591 pp = Productions[p.number]
|
|
2592 if oldp.line > pp.line:
|
|
2593 st_action[a] = -p.number
|
|
2594 st_actionp[a] = p
|
|
2595 chosenp, rejectp = pp, oldp
|
|
2596 Productions[p.number].reduced += 1
|
|
2597 Productions[oldp.number].reduced -= 1
|
|
2598 else:
|
|
2599 chosenp, rejectp = oldp, pp
|
|
2600 self.rr_conflicts.append((st, chosenp, rejectp))
|
|
2601 log.info(' ! reduce/reduce conflict for %s resolved using rule %d (%s)',
|
|
2602 a, st_actionp[a].number, st_actionp[a])
|
|
2603 else:
|
|
2604 raise LALRError('Unknown conflict in state %d' % st)
|
|
2605 else:
|
|
2606 st_action[a] = -p.number
|
|
2607 st_actionp[a] = p
|
|
2608 Productions[p.number].reduced += 1
|
|
2609 else:
|
|
2610 i = p.lr_index
|
|
2611 a = p.prod[i+1] # Get symbol right after the "."
|
|
2612 if a in self.grammar.Terminals:
|
|
2613 g = self.lr0_goto(I, a)
|
|
2614 j = self.lr0_cidhash.get(id(g), -1)
|
|
2615 if j >= 0:
|
|
2616 # We are in a shift state
|
|
2617 actlist.append((a, p, 'shift and go to state %d' % j))
|
|
2618 r = st_action.get(a)
|
|
2619 if r is not None:
|
|
2620 # Whoa have a shift/reduce or shift/shift conflict
|
|
2621 if r > 0:
|
|
2622 if r != j:
|
|
2623 raise LALRError('Shift/shift conflict in state %d' % st)
|
|
2624 elif r < 0:
|
|
2625 # Do a precedence check.
|
|
2626 # - if precedence of reduce rule is higher, we reduce.
|
|
2627 # - if precedence of reduce is same and left assoc, we reduce.
|
|
2628 # - otherwise we shift
|
|
2629 rprec, rlevel = Productions[st_actionp[a].number].prec
|
|
2630 sprec, slevel = Precedence.get(a, ('right', 0))
|
|
2631 if (slevel > rlevel) or ((slevel == rlevel) and (rprec == 'right')):
|
|
2632 # We decide to shift here... highest precedence to shift
|
|
2633 Productions[st_actionp[a].number].reduced -= 1
|
|
2634 st_action[a] = j
|
|
2635 st_actionp[a] = p
|
|
2636 if not rlevel:
|
|
2637 log.info(' ! shift/reduce conflict for %s resolved as shift', a)
|
|
2638 self.sr_conflicts.append((st, a, 'shift'))
|
|
2639 elif (slevel == rlevel) and (rprec == 'nonassoc'):
|
|
2640 st_action[a] = None
|
|
2641 else:
|
|
2642 # Hmmm. Guess we'll keep the reduce
|
|
2643 if not slevel and not rlevel:
|
|
2644 log.info(' ! shift/reduce conflict for %s resolved as reduce', a)
|
|
2645 self.sr_conflicts.append((st, a, 'reduce'))
|
|
2646
|
|
2647 else:
|
|
2648 raise LALRError('Unknown conflict in state %d' % st)
|
|
2649 else:
|
|
2650 st_action[a] = j
|
|
2651 st_actionp[a] = p
|
|
2652
|
|
2653 # Print the actions associated with each terminal
|
|
2654 _actprint = {}
|
|
2655 for a, p, m in actlist:
|
|
2656 if a in st_action:
|
|
2657 if p is st_actionp[a]:
|
|
2658 log.info(' %-15s %s', a, m)
|
|
2659 _actprint[(a, m)] = 1
|
|
2660 log.info('')
|
|
2661 # Print the actions that were not used. (debugging)
|
|
2662 not_used = 0
|
|
2663 for a, p, m in actlist:
|
|
2664 if a in st_action:
|
|
2665 if p is not st_actionp[a]:
|
|
2666 if not (a, m) in _actprint:
|
|
2667 log.debug(' ! %-15s [ %s ]', a, m)
|
|
2668 not_used = 1
|
|
2669 _actprint[(a, m)] = 1
|
|
2670 if not_used:
|
|
2671 log.debug('')
|
|
2672
|
|
2673 # Construct the goto table for this state
|
|
2674
|
|
2675 nkeys = {}
|
|
2676 for ii in I:
|
|
2677 for s in ii.usyms:
|
|
2678 if s in self.grammar.Nonterminals:
|
|
2679 nkeys[s] = None
|
|
2680 for n in nkeys:
|
|
2681 g = self.lr0_goto(I, n)
|
|
2682 j = self.lr0_cidhash.get(id(g), -1)
|
|
2683 if j >= 0:
|
|
2684 st_goto[n] = j
|
|
2685 log.info(' %-30s shift and go to state %d', n, j)
|
|
2686
|
|
2687 action[st] = st_action
|
|
2688 actionp[st] = st_actionp
|
|
2689 goto[st] = st_goto
|
|
2690 st += 1
|
|
2691
|
|
2692 # -----------------------------------------------------------------------------
|
|
2693 # write()
|
|
2694 #
|
|
2695 # This function writes the LR parsing tables to a file
|
|
2696 # -----------------------------------------------------------------------------
|
|
2697
|
|
2698 def write_table(self, tabmodule, outputdir='', signature=''):
|
|
2699 if isinstance(tabmodule, types.ModuleType):
|
|
2700 raise IOError("Won't overwrite existing tabmodule")
|
|
2701
|
|
2702 basemodulename = tabmodule.split('.')[-1]
|
|
2703 filename = os.path.join(outputdir, basemodulename) + '.py'
|
|
2704 try:
|
|
2705 f = open(filename, 'w')
|
|
2706
|
|
2707 f.write('''
|
|
2708 # %s
|
|
2709 # This file is automatically generated. Do not edit.
|
|
2710 _tabversion = %r
|
|
2711
|
|
2712 _lr_method = %r
|
|
2713
|
|
2714 _lr_signature = %r
|
|
2715 ''' % (os.path.basename(filename), __tabversion__, self.lr_method, signature))
|
|
2716
|
|
2717 # Change smaller to 0 to go back to original tables
|
|
2718 smaller = 1
|
|
2719
|
|
2720 # Factor out names to try and make smaller
|
|
2721 if smaller:
|
|
2722 items = {}
|
|
2723
|
|
2724 for s, nd in self.lr_action.items():
|
|
2725 for name, v in nd.items():
|
|
2726 i = items.get(name)
|
|
2727 if not i:
|
|
2728 i = ([], [])
|
|
2729 items[name] = i
|
|
2730 i[0].append(s)
|
|
2731 i[1].append(v)
|
|
2732
|
|
2733 f.write('\n_lr_action_items = {')
|
|
2734 for k, v in items.items():
|
|
2735 f.write('%r:([' % k)
|
|
2736 for i in v[0]:
|
|
2737 f.write('%r,' % i)
|
|
2738 f.write('],[')
|
|
2739 for i in v[1]:
|
|
2740 f.write('%r,' % i)
|
|
2741
|
|
2742 f.write(']),')
|
|
2743 f.write('}\n')
|
|
2744
|
|
2745 f.write('''
|
|
2746 _lr_action = {}
|
|
2747 for _k, _v in _lr_action_items.items():
|
|
2748 for _x,_y in zip(_v[0],_v[1]):
|
|
2749 if not _x in _lr_action: _lr_action[_x] = {}
|
|
2750 _lr_action[_x][_k] = _y
|
|
2751 del _lr_action_items
|
|
2752 ''')
|
|
2753
|
|
2754 else:
|
|
2755 f.write('\n_lr_action = { ')
|
|
2756 for k, v in self.lr_action.items():
|
|
2757 f.write('(%r,%r):%r,' % (k[0], k[1], v))
|
|
2758 f.write('}\n')
|
|
2759
|
|
2760 if smaller:
|
|
2761 # Factor out names to try and make smaller
|
|
2762 items = {}
|
|
2763
|
|
2764 for s, nd in self.lr_goto.items():
|
|
2765 for name, v in nd.items():
|
|
2766 i = items.get(name)
|
|
2767 if not i:
|
|
2768 i = ([], [])
|
|
2769 items[name] = i
|
|
2770 i[0].append(s)
|
|
2771 i[1].append(v)
|
|
2772
|
|
2773 f.write('\n_lr_goto_items = {')
|
|
2774 for k, v in items.items():
|
|
2775 f.write('%r:([' % k)
|
|
2776 for i in v[0]:
|
|
2777 f.write('%r,' % i)
|
|
2778 f.write('],[')
|
|
2779 for i in v[1]:
|
|
2780 f.write('%r,' % i)
|
|
2781
|
|
2782 f.write(']),')
|
|
2783 f.write('}\n')
|
|
2784
|
|
2785 f.write('''
|
|
2786 _lr_goto = {}
|
|
2787 for _k, _v in _lr_goto_items.items():
|
|
2788 for _x, _y in zip(_v[0], _v[1]):
|
|
2789 if not _x in _lr_goto: _lr_goto[_x] = {}
|
|
2790 _lr_goto[_x][_k] = _y
|
|
2791 del _lr_goto_items
|
|
2792 ''')
|
|
2793 else:
|
|
2794 f.write('\n_lr_goto = { ')
|
|
2795 for k, v in self.lr_goto.items():
|
|
2796 f.write('(%r,%r):%r,' % (k[0], k[1], v))
|
|
2797 f.write('}\n')
|
|
2798
|
|
2799 # Write production table
|
|
2800 f.write('_lr_productions = [\n')
|
|
2801 for p in self.lr_productions:
|
|
2802 if p.func:
|
|
2803 f.write(' (%r,%r,%d,%r,%r,%d),\n' % (p.str, p.name, p.len,
|
|
2804 p.func, os.path.basename(p.file), p.line))
|
|
2805 else:
|
|
2806 f.write(' (%r,%r,%d,None,None,None),\n' % (str(p), p.name, p.len))
|
|
2807 f.write(']\n')
|
|
2808 f.close()
|
|
2809
|
|
2810 except IOError as e:
|
|
2811 raise
|
|
2812
|
|
2813
|
|
2814 # -----------------------------------------------------------------------------
|
|
2815 # pickle_table()
|
|
2816 #
|
|
2817 # This function pickles the LR parsing tables to a supplied file object
|
|
2818 # -----------------------------------------------------------------------------
|
|
2819
|
|
2820 def pickle_table(self, filename, signature=''):
|
|
2821 try:
|
|
2822 import cPickle as pickle
|
|
2823 except ImportError:
|
|
2824 import pickle
|
|
2825 with open(filename, 'wb') as outf:
|
|
2826 pickle.dump(__tabversion__, outf, pickle_protocol)
|
|
2827 pickle.dump(self.lr_method, outf, pickle_protocol)
|
|
2828 pickle.dump(signature, outf, pickle_protocol)
|
|
2829 pickle.dump(self.lr_action, outf, pickle_protocol)
|
|
2830 pickle.dump(self.lr_goto, outf, pickle_protocol)
|
|
2831
|
|
2832 outp = []
|
|
2833 for p in self.lr_productions:
|
|
2834 if p.func:
|
|
2835 outp.append((p.str, p.name, p.len, p.func, os.path.basename(p.file), p.line))
|
|
2836 else:
|
|
2837 outp.append((str(p), p.name, p.len, None, None, None))
|
|
2838 pickle.dump(outp, outf, pickle_protocol)
|
|
2839
|
|
2840 # -----------------------------------------------------------------------------
|
|
2841 # === INTROSPECTION ===
|
|
2842 #
|
|
2843 # The following functions and classes are used to implement the PLY
|
|
2844 # introspection features followed by the yacc() function itself.
|
|
2845 # -----------------------------------------------------------------------------
|
|
2846
|
|
2847 # -----------------------------------------------------------------------------
|
|
2848 # get_caller_module_dict()
|
|
2849 #
|
|
2850 # This function returns a dictionary containing all of the symbols defined within
|
|
2851 # a caller further down the call stack. This is used to get the environment
|
|
2852 # associated with the yacc() call if none was provided.
|
|
2853 # -----------------------------------------------------------------------------
|
|
2854
|
|
2855 def get_caller_module_dict(levels):
|
|
2856 f = sys._getframe(levels)
|
|
2857 ldict = f.f_globals.copy()
|
|
2858 if f.f_globals != f.f_locals:
|
|
2859 ldict.update(f.f_locals)
|
|
2860 return ldict
|
|
2861
|
|
2862 # -----------------------------------------------------------------------------
|
|
2863 # parse_grammar()
|
|
2864 #
|
|
2865 # This takes a raw grammar rule string and parses it into production data
|
|
2866 # -----------------------------------------------------------------------------
|
|
2867 def parse_grammar(doc, file, line):
|
|
2868 grammar = []
|
|
2869 # Split the doc string into lines
|
|
2870 pstrings = doc.splitlines()
|
|
2871 lastp = None
|
|
2872 dline = line
|
|
2873 for ps in pstrings:
|
|
2874 dline += 1
|
|
2875 p = ps.split()
|
|
2876 if not p:
|
|
2877 continue
|
|
2878 try:
|
|
2879 if p[0] == '|':
|
|
2880 # This is a continuation of a previous rule
|
|
2881 if not lastp:
|
|
2882 raise SyntaxError("%s:%d: Misplaced '|'" % (file, dline))
|
|
2883 prodname = lastp
|
|
2884 syms = p[1:]
|
|
2885 else:
|
|
2886 prodname = p[0]
|
|
2887 lastp = prodname
|
|
2888 syms = p[2:]
|
|
2889 assign = p[1]
|
|
2890 if assign != ':' and assign != '::=':
|
|
2891 raise SyntaxError("%s:%d: Syntax error. Expected ':'" % (file, dline))
|
|
2892
|
|
2893 grammar.append((file, dline, prodname, syms))
|
|
2894 except SyntaxError:
|
|
2895 raise
|
|
2896 except Exception:
|
|
2897 raise SyntaxError('%s:%d: Syntax error in rule %r' % (file, dline, ps.strip()))
|
|
2898
|
|
2899 return grammar
|
|
2900
|
|
2901 # -----------------------------------------------------------------------------
|
|
2902 # ParserReflect()
|
|
2903 #
|
|
2904 # This class represents information extracted for building a parser including
|
|
2905 # start symbol, error function, tokens, precedence list, action functions,
|
|
2906 # etc.
|
|
2907 # -----------------------------------------------------------------------------
|
|
2908 class ParserReflect(object):
|
|
2909 def __init__(self, pdict, log=None):
|
|
2910 self.pdict = pdict
|
|
2911 self.start = None
|
|
2912 self.error_func = None
|
|
2913 self.tokens = None
|
|
2914 self.modules = set()
|
|
2915 self.grammar = []
|
|
2916 self.error = False
|
|
2917
|
|
2918 if log is None:
|
|
2919 self.log = PlyLogger(sys.stderr)
|
|
2920 else:
|
|
2921 self.log = log
|
|
2922
|
|
2923 # Get all of the basic information
|
|
2924 def get_all(self):
|
|
2925 self.get_start()
|
|
2926 self.get_error_func()
|
|
2927 self.get_tokens()
|
|
2928 self.get_precedence()
|
|
2929 self.get_pfunctions()
|
|
2930
|
|
2931 # Validate all of the information
|
|
2932 def validate_all(self):
|
|
2933 self.validate_start()
|
|
2934 self.validate_error_func()
|
|
2935 self.validate_tokens()
|
|
2936 self.validate_precedence()
|
|
2937 self.validate_pfunctions()
|
|
2938 self.validate_modules()
|
|
2939 return self.error
|
|
2940
|
|
2941 # Compute a signature over the grammar
|
|
2942 def signature(self):
|
|
2943 try:
|
|
2944 from hashlib import md5
|
|
2945 except ImportError:
|
|
2946 from md5 import md5
|
|
2947 try:
|
|
2948 sig = md5()
|
|
2949 if self.start:
|
|
2950 sig.update(self.start.encode('latin-1'))
|
|
2951 if self.prec:
|
|
2952 sig.update(''.join([''.join(p) for p in self.prec]).encode('latin-1'))
|
|
2953 if self.tokens:
|
|
2954 sig.update(' '.join(self.tokens).encode('latin-1'))
|
|
2955 for f in self.pfuncs:
|
|
2956 if f[3]:
|
|
2957 sig.update(f[3].encode('latin-1'))
|
|
2958 except (TypeError, ValueError):
|
|
2959 pass
|
|
2960
|
|
2961 digest = base64.b16encode(sig.digest())
|
|
2962 if sys.version_info[0] >= 3:
|
|
2963 digest = digest.decode('latin-1')
|
|
2964 return digest
|
|
2965
|
|
2966 # -----------------------------------------------------------------------------
|
|
2967 # validate_modules()
|
|
2968 #
|
|
2969 # This method checks to see if there are duplicated p_rulename() functions
|
|
2970 # in the parser module file. Without this function, it is really easy for
|
|
2971 # users to make mistakes by cutting and pasting code fragments (and it's a real
|
|
2972 # bugger to try and figure out why the resulting parser doesn't work). Therefore,
|
|
2973 # we just do a little regular expression pattern matching of def statements
|
|
2974 # to try and detect duplicates.
|
|
2975 # -----------------------------------------------------------------------------
|
|
2976
|
|
2977 def validate_modules(self):
|
|
2978 # Match def p_funcname(
|
|
2979 fre = re.compile(r'\s*def\s+(p_[a-zA-Z_0-9]*)\(')
|
|
2980
|
|
2981 for module in self.modules:
|
|
2982 lines, linen = inspect.getsourcelines(module)
|
|
2983
|
|
2984 counthash = {}
|
|
2985 for linen, line in enumerate(lines):
|
|
2986 linen += 1
|
|
2987 m = fre.match(line)
|
|
2988 if m:
|
|
2989 name = m.group(1)
|
|
2990 prev = counthash.get(name)
|
|
2991 if not prev:
|
|
2992 counthash[name] = linen
|
|
2993 else:
|
|
2994 filename = inspect.getsourcefile(module)
|
|
2995 self.log.warning('%s:%d: Function %s redefined. Previously defined on line %d',
|
|
2996 filename, linen, name, prev)
|
|
2997
|
|
2998 # Get the start symbol
|
|
2999 def get_start(self):
|
|
3000 self.start = self.pdict.get('start')
|
|
3001
|
|
3002 # Validate the start symbol
|
|
3003 def validate_start(self):
|
|
3004 if self.start is not None:
|
|
3005 if not isinstance(self.start, string_types):
|
|
3006 self.log.error("'start' must be a string")
|
|
3007
|
|
3008 # Look for error handler
|
|
3009 def get_error_func(self):
|
|
3010 self.error_func = self.pdict.get('p_error')
|
|
3011
|
|
3012 # Validate the error function
|
|
3013 def validate_error_func(self):
|
|
3014 if self.error_func:
|
|
3015 if isinstance(self.error_func, types.FunctionType):
|
|
3016 ismethod = 0
|
|
3017 elif isinstance(self.error_func, types.MethodType):
|
|
3018 ismethod = 1
|
|
3019 else:
|
|
3020 self.log.error("'p_error' defined, but is not a function or method")
|
|
3021 self.error = True
|
|
3022 return
|
|
3023
|
|
3024 eline = self.error_func.__code__.co_firstlineno
|
|
3025 efile = self.error_func.__code__.co_filename
|
|
3026 module = inspect.getmodule(self.error_func)
|
|
3027 self.modules.add(module)
|
|
3028
|
|
3029 argcount = self.error_func.__code__.co_argcount - ismethod
|
|
3030 if argcount != 1:
|
|
3031 self.log.error('%s:%d: p_error() requires 1 argument', efile, eline)
|
|
3032 self.error = True
|
|
3033
|
|
3034 # Get the tokens map
|
|
3035 def get_tokens(self):
|
|
3036 tokens = self.pdict.get('tokens')
|
|
3037 if not tokens:
|
|
3038 self.log.error('No token list is defined')
|
|
3039 self.error = True
|
|
3040 return
|
|
3041
|
|
3042 if not isinstance(tokens, (list, tuple)):
|
|
3043 self.log.error('tokens must be a list or tuple')
|
|
3044 self.error = True
|
|
3045 return
|
|
3046
|
|
3047 if not tokens:
|
|
3048 self.log.error('tokens is empty')
|
|
3049 self.error = True
|
|
3050 return
|
|
3051
|
|
3052 self.tokens = tokens
|
|
3053
|
|
3054 # Validate the tokens
|
|
3055 def validate_tokens(self):
|
|
3056 # Validate the tokens.
|
|
3057 if 'error' in self.tokens:
|
|
3058 self.log.error("Illegal token name 'error'. Is a reserved word")
|
|
3059 self.error = True
|
|
3060 return
|
|
3061
|
|
3062 terminals = set()
|
|
3063 for n in self.tokens:
|
|
3064 if n in terminals:
|
|
3065 self.log.warning('Token %r multiply defined', n)
|
|
3066 terminals.add(n)
|
|
3067
|
|
3068 # Get the precedence map (if any)
|
|
3069 def get_precedence(self):
|
|
3070 self.prec = self.pdict.get('precedence')
|
|
3071
|
|
3072 # Validate and parse the precedence map
|
|
3073 def validate_precedence(self):
|
|
3074 preclist = []
|
|
3075 if self.prec:
|
|
3076 if not isinstance(self.prec, (list, tuple)):
|
|
3077 self.log.error('precedence must be a list or tuple')
|
|
3078 self.error = True
|
|
3079 return
|
|
3080 for level, p in enumerate(self.prec):
|
|
3081 if not isinstance(p, (list, tuple)):
|
|
3082 self.log.error('Bad precedence table')
|
|
3083 self.error = True
|
|
3084 return
|
|
3085
|
|
3086 if len(p) < 2:
|
|
3087 self.log.error('Malformed precedence entry %s. Must be (assoc, term, ..., term)', p)
|
|
3088 self.error = True
|
|
3089 return
|
|
3090 assoc = p[0]
|
|
3091 if not isinstance(assoc, string_types):
|
|
3092 self.log.error('precedence associativity must be a string')
|
|
3093 self.error = True
|
|
3094 return
|
|
3095 for term in p[1:]:
|
|
3096 if not isinstance(term, string_types):
|
|
3097 self.log.error('precedence items must be strings')
|
|
3098 self.error = True
|
|
3099 return
|
|
3100 preclist.append((term, assoc, level+1))
|
|
3101 self.preclist = preclist
|
|
3102
|
|
3103 # Get all p_functions from the grammar
|
|
3104 def get_pfunctions(self):
|
|
3105 p_functions = []
|
|
3106 for name, item in self.pdict.items():
|
|
3107 if not name.startswith('p_') or name == 'p_error':
|
|
3108 continue
|
|
3109 if isinstance(item, (types.FunctionType, types.MethodType)):
|
|
3110 line = item.__code__.co_firstlineno
|
|
3111 module = inspect.getmodule(item)
|
|
3112 p_functions.append((line, module, name, item.__doc__))
|
|
3113
|
|
3114 # Sort all of the actions by line number; make sure to stringify
|
|
3115 # modules to make them sortable, since `line` may not uniquely sort all
|
|
3116 # p functions
|
|
3117 p_functions.sort(key=lambda p_function: (
|
|
3118 p_function[0],
|
|
3119 str(p_function[1]),
|
|
3120 p_function[2],
|
|
3121 p_function[3]))
|
|
3122 self.pfuncs = p_functions
|
|
3123
|
|
3124 # Validate all of the p_functions
|
|
3125 def validate_pfunctions(self):
|
|
3126 grammar = []
|
|
3127 # Check for non-empty symbols
|
|
3128 if len(self.pfuncs) == 0:
|
|
3129 self.log.error('no rules of the form p_rulename are defined')
|
|
3130 self.error = True
|
|
3131 return
|
|
3132
|
|
3133 for line, module, name, doc in self.pfuncs:
|
|
3134 file = inspect.getsourcefile(module)
|
|
3135 func = self.pdict[name]
|
|
3136 if isinstance(func, types.MethodType):
|
|
3137 reqargs = 2
|
|
3138 else:
|
|
3139 reqargs = 1
|
|
3140 if func.__code__.co_argcount > reqargs:
|
|
3141 self.log.error('%s:%d: Rule %r has too many arguments', file, line, func.__name__)
|
|
3142 self.error = True
|
|
3143 elif func.__code__.co_argcount < reqargs:
|
|
3144 self.log.error('%s:%d: Rule %r requires an argument', file, line, func.__name__)
|
|
3145 self.error = True
|
|
3146 elif not func.__doc__:
|
|
3147 self.log.warning('%s:%d: No documentation string specified in function %r (ignored)',
|
|
3148 file, line, func.__name__)
|
|
3149 else:
|
|
3150 try:
|
|
3151 parsed_g = parse_grammar(doc, file, line)
|
|
3152 for g in parsed_g:
|
|
3153 grammar.append((name, g))
|
|
3154 except SyntaxError as e:
|
|
3155 self.log.error(str(e))
|
|
3156 self.error = True
|
|
3157
|
|
3158 # Looks like a valid grammar rule
|
|
3159 # Mark the file in which defined.
|
|
3160 self.modules.add(module)
|
|
3161
|
|
3162 # Secondary validation step that looks for p_ definitions that are not functions
|
|
3163 # or functions that look like they might be grammar rules.
|
|
3164
|
|
3165 for n, v in self.pdict.items():
|
|
3166 if n.startswith('p_') and isinstance(v, (types.FunctionType, types.MethodType)):
|
|
3167 continue
|
|
3168 if n.startswith('t_'):
|
|
3169 continue
|
|
3170 if n.startswith('p_') and n != 'p_error':
|
|
3171 self.log.warning('%r not defined as a function', n)
|
|
3172 if ((isinstance(v, types.FunctionType) and v.__code__.co_argcount == 1) or
|
|
3173 (isinstance(v, types.MethodType) and v.__func__.__code__.co_argcount == 2)):
|
|
3174 if v.__doc__:
|
|
3175 try:
|
|
3176 doc = v.__doc__.split(' ')
|
|
3177 if doc[1] == ':':
|
|
3178 self.log.warning('%s:%d: Possible grammar rule %r defined without p_ prefix',
|
|
3179 v.__code__.co_filename, v.__code__.co_firstlineno, n)
|
|
3180 except IndexError:
|
|
3181 pass
|
|
3182
|
|
3183 self.grammar = grammar
|
|
3184
|
|
3185 # -----------------------------------------------------------------------------
|
|
3186 # yacc(module)
|
|
3187 #
|
|
3188 # Build a parser
|
|
3189 # -----------------------------------------------------------------------------
|
|
3190
|
|
3191 def yacc(method='LALR', debug=yaccdebug, module=None, tabmodule=tab_module, start=None,
|
|
3192 check_recursion=True, optimize=False, write_tables=True, debugfile=debug_file,
|
|
3193 outputdir=None, debuglog=None, errorlog=None, picklefile=None):
|
|
3194
|
|
3195 if tabmodule is None:
|
|
3196 tabmodule = tab_module
|
|
3197
|
|
3198 # Reference to the parsing method of the last built parser
|
|
3199 global parse
|
|
3200
|
|
3201 # If pickling is enabled, table files are not created
|
|
3202 if picklefile:
|
|
3203 write_tables = 0
|
|
3204
|
|
3205 if errorlog is None:
|
|
3206 errorlog = PlyLogger(sys.stderr)
|
|
3207
|
|
3208 # Get the module dictionary used for the parser
|
|
3209 if module:
|
|
3210 _items = [(k, getattr(module, k)) for k in dir(module)]
|
|
3211 pdict = dict(_items)
|
|
3212 # If no __file__ attribute is available, try to obtain it from the __module__ instead
|
|
3213 if '__file__' not in pdict:
|
|
3214 pdict['__file__'] = sys.modules[pdict['__module__']].__file__
|
|
3215 else:
|
|
3216 pdict = get_caller_module_dict(2)
|
|
3217
|
|
3218 if outputdir is None:
|
|
3219 # If no output directory is set, the location of the output files
|
|
3220 # is determined according to the following rules:
|
|
3221 # - If tabmodule specifies a package, files go into that package directory
|
|
3222 # - Otherwise, files go in the same directory as the specifying module
|
|
3223 if isinstance(tabmodule, types.ModuleType):
|
|
3224 srcfile = tabmodule.__file__
|
|
3225 else:
|
|
3226 if '.' not in tabmodule:
|
|
3227 srcfile = pdict['__file__']
|
|
3228 else:
|
|
3229 parts = tabmodule.split('.')
|
|
3230 pkgname = '.'.join(parts[:-1])
|
|
3231 exec('import %s' % pkgname)
|
|
3232 srcfile = getattr(sys.modules[pkgname], '__file__', '')
|
|
3233 outputdir = os.path.dirname(srcfile)
|
|
3234
|
|
3235 # Determine if the module is package of a package or not.
|
|
3236 # If so, fix the tabmodule setting so that tables load correctly
|
|
3237 pkg = pdict.get('__package__')
|
|
3238 if pkg and isinstance(tabmodule, str):
|
|
3239 if '.' not in tabmodule:
|
|
3240 tabmodule = pkg + '.' + tabmodule
|
|
3241
|
|
3242
|
|
3243
|
|
3244 # Set start symbol if it's specified directly using an argument
|
|
3245 if start is not None:
|
|
3246 pdict['start'] = start
|
|
3247
|
|
3248 # Collect parser information from the dictionary
|
|
3249 pinfo = ParserReflect(pdict, log=errorlog)
|
|
3250 pinfo.get_all()
|
|
3251
|
|
3252 if pinfo.error:
|
|
3253 raise YaccError('Unable to build parser')
|
|
3254
|
|
3255 # Check signature against table files (if any)
|
|
3256 signature = pinfo.signature()
|
|
3257
|
|
3258 # Read the tables
|
|
3259 try:
|
|
3260 lr = LRTable()
|
|
3261 if picklefile:
|
|
3262 read_signature = lr.read_pickle(picklefile)
|
|
3263 else:
|
|
3264 read_signature = lr.read_table(tabmodule)
|
|
3265 if optimize or (read_signature == signature):
|
|
3266 try:
|
|
3267 lr.bind_callables(pinfo.pdict)
|
|
3268 parser = LRParser(lr, pinfo.error_func)
|
|
3269 parse = parser.parse
|
|
3270 return parser
|
|
3271 except Exception as e:
|
|
3272 errorlog.warning('There was a problem loading the table file: %r', e)
|
|
3273 except VersionError as e:
|
|
3274 errorlog.warning(str(e))
|
|
3275 except ImportError:
|
|
3276 pass
|
|
3277
|
|
3278 if debuglog is None:
|
|
3279 if debug:
|
|
3280 try:
|
|
3281 debuglog = PlyLogger(open(os.path.join(outputdir, debugfile), 'w'))
|
|
3282 except IOError as e:
|
|
3283 errorlog.warning("Couldn't open %r. %s" % (debugfile, e))
|
|
3284 debuglog = NullLogger()
|
|
3285 else:
|
|
3286 debuglog = NullLogger()
|
|
3287
|
|
3288 debuglog.info('Created by PLY version %s (http://www.dabeaz.com/ply)', __version__)
|
|
3289
|
|
3290 errors = False
|
|
3291
|
|
3292 # Validate the parser information
|
|
3293 if pinfo.validate_all():
|
|
3294 raise YaccError('Unable to build parser')
|
|
3295
|
|
3296 if not pinfo.error_func:
|
|
3297 errorlog.warning('no p_error() function is defined')
|
|
3298
|
|
3299 # Create a grammar object
|
|
3300 grammar = Grammar(pinfo.tokens)
|
|
3301
|
|
3302 # Set precedence level for terminals
|
|
3303 for term, assoc, level in pinfo.preclist:
|
|
3304 try:
|
|
3305 grammar.set_precedence(term, assoc, level)
|
|
3306 except GrammarError as e:
|
|
3307 errorlog.warning('%s', e)
|
|
3308
|
|
3309 # Add productions to the grammar
|
|
3310 for funcname, gram in pinfo.grammar:
|
|
3311 file, line, prodname, syms = gram
|
|
3312 try:
|
|
3313 grammar.add_production(prodname, syms, funcname, file, line)
|
|
3314 except GrammarError as e:
|
|
3315 errorlog.error('%s', e)
|
|
3316 errors = True
|
|
3317
|
|
3318 # Set the grammar start symbols
|
|
3319 try:
|
|
3320 if start is None:
|
|
3321 grammar.set_start(pinfo.start)
|
|
3322 else:
|
|
3323 grammar.set_start(start)
|
|
3324 except GrammarError as e:
|
|
3325 errorlog.error(str(e))
|
|
3326 errors = True
|
|
3327
|
|
3328 if errors:
|
|
3329 raise YaccError('Unable to build parser')
|
|
3330
|
|
3331 # Verify the grammar structure
|
|
3332 undefined_symbols = grammar.undefined_symbols()
|
|
3333 for sym, prod in undefined_symbols:
|
|
3334 errorlog.error('%s:%d: Symbol %r used, but not defined as a token or a rule', prod.file, prod.line, sym)
|
|
3335 errors = True
|
|
3336
|
|
3337 unused_terminals = grammar.unused_terminals()
|
|
3338 if unused_terminals:
|
|
3339 debuglog.info('')
|
|
3340 debuglog.info('Unused terminals:')
|
|
3341 debuglog.info('')
|
|
3342 for term in unused_terminals:
|
|
3343 errorlog.warning('Token %r defined, but not used', term)
|
|
3344 debuglog.info(' %s', term)
|
|
3345
|
|
3346 # Print out all productions to the debug log
|
|
3347 if debug:
|
|
3348 debuglog.info('')
|
|
3349 debuglog.info('Grammar')
|
|
3350 debuglog.info('')
|
|
3351 for n, p in enumerate(grammar.Productions):
|
|
3352 debuglog.info('Rule %-5d %s', n, p)
|
|
3353
|
|
3354 # Find unused non-terminals
|
|
3355 unused_rules = grammar.unused_rules()
|
|
3356 for prod in unused_rules:
|
|
3357 errorlog.warning('%s:%d: Rule %r defined, but not used', prod.file, prod.line, prod.name)
|
|
3358
|
|
3359 if len(unused_terminals) == 1:
|
|
3360 errorlog.warning('There is 1 unused token')
|
|
3361 if len(unused_terminals) > 1:
|
|
3362 errorlog.warning('There are %d unused tokens', len(unused_terminals))
|
|
3363
|
|
3364 if len(unused_rules) == 1:
|
|
3365 errorlog.warning('There is 1 unused rule')
|
|
3366 if len(unused_rules) > 1:
|
|
3367 errorlog.warning('There are %d unused rules', len(unused_rules))
|
|
3368
|
|
3369 if debug:
|
|
3370 debuglog.info('')
|
|
3371 debuglog.info('Terminals, with rules where they appear')
|
|
3372 debuglog.info('')
|
|
3373 terms = list(grammar.Terminals)
|
|
3374 terms.sort()
|
|
3375 for term in terms:
|
|
3376 debuglog.info('%-20s : %s', term, ' '.join([str(s) for s in grammar.Terminals[term]]))
|
|
3377
|
|
3378 debuglog.info('')
|
|
3379 debuglog.info('Nonterminals, with rules where they appear')
|
|
3380 debuglog.info('')
|
|
3381 nonterms = list(grammar.Nonterminals)
|
|
3382 nonterms.sort()
|
|
3383 for nonterm in nonterms:
|
|
3384 debuglog.info('%-20s : %s', nonterm, ' '.join([str(s) for s in grammar.Nonterminals[nonterm]]))
|
|
3385 debuglog.info('')
|
|
3386
|
|
3387 if check_recursion:
|
|
3388 unreachable = grammar.find_unreachable()
|
|
3389 for u in unreachable:
|
|
3390 errorlog.warning('Symbol %r is unreachable', u)
|
|
3391
|
|
3392 infinite = grammar.infinite_cycles()
|
|
3393 for inf in infinite:
|
|
3394 errorlog.error('Infinite recursion detected for symbol %r', inf)
|
|
3395 errors = True
|
|
3396
|
|
3397 unused_prec = grammar.unused_precedence()
|
|
3398 for term, assoc in unused_prec:
|
|
3399 errorlog.error('Precedence rule %r defined for unknown symbol %r', assoc, term)
|
|
3400 errors = True
|
|
3401
|
|
3402 if errors:
|
|
3403 raise YaccError('Unable to build parser')
|
|
3404
|
|
3405 # Run the LRGeneratedTable on the grammar
|
|
3406 if debug:
|
|
3407 errorlog.debug('Generating %s tables', method)
|
|
3408
|
|
3409 lr = LRGeneratedTable(grammar, method, debuglog)
|
|
3410
|
|
3411 if debug:
|
|
3412 num_sr = len(lr.sr_conflicts)
|
|
3413
|
|
3414 # Report shift/reduce and reduce/reduce conflicts
|
|
3415 if num_sr == 1:
|
|
3416 errorlog.warning('1 shift/reduce conflict')
|
|
3417 elif num_sr > 1:
|
|
3418 errorlog.warning('%d shift/reduce conflicts', num_sr)
|
|
3419
|
|
3420 num_rr = len(lr.rr_conflicts)
|
|
3421 if num_rr == 1:
|
|
3422 errorlog.warning('1 reduce/reduce conflict')
|
|
3423 elif num_rr > 1:
|
|
3424 errorlog.warning('%d reduce/reduce conflicts', num_rr)
|
|
3425
|
|
3426 # Write out conflicts to the output file
|
|
3427 if debug and (lr.sr_conflicts or lr.rr_conflicts):
|
|
3428 debuglog.warning('')
|
|
3429 debuglog.warning('Conflicts:')
|
|
3430 debuglog.warning('')
|
|
3431
|
|
3432 for state, tok, resolution in lr.sr_conflicts:
|
|
3433 debuglog.warning('shift/reduce conflict for %s in state %d resolved as %s', tok, state, resolution)
|
|
3434
|
|
3435 already_reported = set()
|
|
3436 for state, rule, rejected in lr.rr_conflicts:
|
|
3437 if (state, id(rule), id(rejected)) in already_reported:
|
|
3438 continue
|
|
3439 debuglog.warning('reduce/reduce conflict in state %d resolved using rule (%s)', state, rule)
|
|
3440 debuglog.warning('rejected rule (%s) in state %d', rejected, state)
|
|
3441 errorlog.warning('reduce/reduce conflict in state %d resolved using rule (%s)', state, rule)
|
|
3442 errorlog.warning('rejected rule (%s) in state %d', rejected, state)
|
|
3443 already_reported.add((state, id(rule), id(rejected)))
|
|
3444
|
|
3445 warned_never = []
|
|
3446 for state, rule, rejected in lr.rr_conflicts:
|
|
3447 if not rejected.reduced and (rejected not in warned_never):
|
|
3448 debuglog.warning('Rule (%s) is never reduced', rejected)
|
|
3449 errorlog.warning('Rule (%s) is never reduced', rejected)
|
|
3450 warned_never.append(rejected)
|
|
3451
|
|
3452 # Write the table file if requested
|
|
3453 if write_tables:
|
|
3454 try:
|
|
3455 lr.write_table(tabmodule, outputdir, signature)
|
|
3456 except IOError as e:
|
|
3457 errorlog.warning("Couldn't create %r. %s" % (tabmodule, e))
|
|
3458
|
|
3459 # Write a pickled version of the tables
|
|
3460 if picklefile:
|
|
3461 try:
|
|
3462 lr.pickle_table(picklefile, signature)
|
|
3463 except IOError as e:
|
|
3464 errorlog.warning("Couldn't create %r. %s" % (picklefile, e))
|
|
3465
|
|
3466 # Build the parser
|
|
3467 lr.bind_callables(pinfo.pdict)
|
|
3468 parser = LRParser(lr, pinfo.error_func)
|
|
3469
|
|
3470 parse = parser.parse
|
|
3471 return parser
|