/repo: ply-3.8/ply/yacc.py comparison

comparison ply-3.8/ply/yacc.py @ 7267:343ff337a19b

<ais523> ` tar -xf ply-3.8.tar.gz

author	HackBot
date	Wed, 23 Mar 2016 02:40:16 +0000
parents
children

comparison

equal deleted inserted replaced

-:61a39a120dee
+:343ff337a19b
+# -----------------------------------------------------------------------------
+# ply: yacc.py
+#
+# Copyright (C) 2001-2015,
+# David M. Beazley (Dabeaz LLC)
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright notice,
+#   this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+# * Neither the name of the David Beazley or Dabeaz LLC may be used to
+#   endorse or promote products derived from this software without
+#  specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# -----------------------------------------------------------------------------
+#
+# This implements an LR parser that is constructed from grammar rules defined
+# as Python functions. The grammer is specified by supplying the BNF inside
+# Python documentation strings.  The inspiration for this technique was borrowed
+# from John Aycock's Spark parsing system.  PLY might be viewed as cross between
+# Spark and the GNU bison utility.
+#
+# The current implementation is only somewhat object-oriented. The
+# LR parser itself is defined in terms of an object (which allows multiple
+# parsers to co-exist).  However, most of the variables used during table
+# construction are defined in terms of global variables.  Users shouldn't
+# notice unless they are trying to define multiple parsers at the same
+# time using threads (in which case they should have their head examined).
+#
+# This implementation supports both SLR and LALR(1) parsing.  LALR(1)
+# support was originally implemented by Elias Ioup (ezioup@alumni.uchicago.edu),
+# using the algorithm found in Aho, Sethi, and Ullman "Compilers: Principles,
+# Techniques, and Tools" (The Dragon Book).  LALR(1) has since been replaced
+# by the more efficient DeRemer and Pennello algorithm.
+#
+# :::::::: WARNING :::::::
+#
+# Construction of LR parsing tables is fairly complicated and expensive.
+# To make this module run fast, a *LOT* of work has been put into
+# optimization---often at the expensive of readability and what might
+# consider to be good Python "coding style."   Modify the code at your
+# own risk!
+# ----------------------------------------------------------------------------
+import re
+import types
+import sys
+import os.path
+import inspect
+import base64
+import warnings
+__version__    = '3.8'
+__tabversion__ = '3.8'
+#-----------------------------------------------------------------------------
+#                     === User configurable parameters ===
+#
+# Change these to modify the default behavior of yacc (if you wish)
+#-----------------------------------------------------------------------------
+yaccdebug   = True             # Debugging mode.  If set, yacc generates a
+# a 'parser.out' file in the current directory
+debug_file  = 'parser.out'     # Default name of the debugging file
+tab_module  = 'parsetab'       # Default name of the table module
+default_lr  = 'LALR'           # Default LR table generation method
+error_count = 3                # Number of symbols that must be shifted to leave recovery mode
+yaccdevel   = False            # Set to True if developing yacc.  This turns off optimized
+# implementations of certain functions.
+resultlimit = 40               # Size limit of results when running in debug mode.
+pickle_protocol = 0            # Protocol to use when writing pickle files
+# String type-checking compatibility
+if sys.version_info[0] < 3:
+string_types = basestring
+else:
+string_types = str
+MAXINT = sys.maxsize
+# This object is a stand-in for a logging object created by the
+# logging module.   PLY will use this by default to create things
+# such as the parser.out file.  If a user wants more detailed
+# information, they can create their own logging object and pass
+# it into PLY.
+class PlyLogger(object):
+def __init__(self, f):
+self.f = f
+def debug(self, msg, *args, **kwargs):
+self.f.write((msg % args) + '\n')
+info = debug
+def warning(self, msg, *args, **kwargs):
+self.f.write('WARNING: ' + (msg % args) + '\n')
+def error(self, msg, *args, **kwargs):
+self.f.write('ERROR: ' + (msg % args) + '\n')
+critical = debug
+# Null logger is used when no output is generated. Does nothing.
+class NullLogger(object):
+def __getattribute__(self, name):
+return self
+def __call__(self, *args, **kwargs):
+return self
+# Exception raised for yacc-related errors
+class YaccError(Exception):
+pass
+# Format the result message that the parser produces when running in debug mode.
+def format_result(r):
+repr_str = repr(r)
+if '\n' in repr_str:
+repr_str = repr(repr_str)
+if len(repr_str) > resultlimit:
+repr_str = repr_str[:resultlimit] + ' ...'
+result = '<%s @ 0x%x> (%s)' % (type(r).__name__, id(r), repr_str)
+return result
+# Format stack entries when the parser is running in debug mode
+def format_stack_entry(r):
+repr_str = repr(r)
+if '\n' in repr_str:
+repr_str = repr(repr_str)
+if len(repr_str) < 16:
+return repr_str
+else:
+return '<%s @ 0x%x>' % (type(r).__name__, id(r))
+# Panic mode error recovery support.   This feature is being reworked--much of the
+# code here is to offer a deprecation/backwards compatible transition
+_errok = None
+_token = None
+_restart = None
+_warnmsg = '''PLY: Don't use global functions errok(), token(), and restart() in p_error().
+Instead, invoke the methods on the associated parser instance:
+def p_error(p):
+...
+# Use parser.errok(), parser.token(), parser.restart()
+...
+parser = yacc.yacc()
+'''
+def errok():
+warnings.warn(_warnmsg)
+return _errok()
+def restart():
+warnings.warn(_warnmsg)
+return _restart()
+def token():
+warnings.warn(_warnmsg)
+return _token()
+# Utility function to call the p_error() function with some deprecation hacks
+def call_errorfunc(errorfunc, token, parser):
+global _errok, _token, _restart
+_errok = parser.errok
+_token = parser.token
+_restart = parser.restart
+r = errorfunc(token)
+try:
+del _errok, _token, _restart
+except NameError:
+pass
+return r
+#-----------------------------------------------------------------------------
+#                        ===  LR Parsing Engine ===
+#
+# The following classes are used for the LR parser itself.  These are not
+# used during table construction and are independent of the actual LR
+# table generation algorithm
+#-----------------------------------------------------------------------------
+# This class is used to hold non-terminal grammar symbols during parsing.
+# It normally has the following attributes set:
+#        .type       = Grammar symbol type
+#        .value      = Symbol value
+#        .lineno     = Starting line number
+#        .endlineno  = Ending line number (optional, set automatically)
+#        .lexpos     = Starting lex position
+#        .endlexpos  = Ending lex position (optional, set automatically)
+class YaccSymbol:
+def __str__(self):
+return self.type
+def __repr__(self):
+return str(self)
+# This class is a wrapper around the objects actually passed to each
+# grammar rule.   Index lookup and assignment actually assign the
+# .value attribute of the underlying YaccSymbol object.
+# The lineno() method returns the line number of a given
+# item (or 0 if not defined).   The linespan() method returns
+# a tuple of (startline,endline) representing the range of lines
+# for a symbol.  The lexspan() method returns a tuple (lexpos,endlexpos)
+# representing the range of positional information for a symbol.
+class YaccProduction:
+def __init__(self, s, stack=None):
+self.slice = s
+self.stack = stack
+self.lexer = None
+self.parser = None
+def __getitem__(self, n):
+if isinstance(n, slice):
+return [s.value for s in self.slice[n]]
+elif n >= 0:
+return self.slice[n].value
+else:
+return self.stack[n].value
+def __setitem__(self, n, v):
+self.slice[n].value = v
+def __getslice__(self, i, j):
+return [s.value for s in self.slice[i:j]]
+def __len__(self):
+return len(self.slice)
+def lineno(self, n):
+return getattr(self.slice[n], 'lineno', 0)
+def set_lineno(self, n, lineno):
+self.slice[n].lineno = lineno
+def linespan(self, n):
+startline = getattr(self.slice[n], 'lineno', 0)
+endline = getattr(self.slice[n], 'endlineno', startline)
+return startline, endline
+def lexpos(self, n):
+return getattr(self.slice[n], 'lexpos', 0)
+def lexspan(self, n):
+startpos = getattr(self.slice[n], 'lexpos', 0)
+endpos = getattr(self.slice[n], 'endlexpos', startpos)
+return startpos, endpos
+def error(self):
+raise SyntaxError
+# -----------------------------------------------------------------------------
+#                               == LRParser ==
+#
+# The LR Parsing engine.
+# -----------------------------------------------------------------------------
+class LRParser:
+def __init__(self, lrtab, errorf):
+self.productions = lrtab.lr_productions
+self.action = lrtab.lr_action
+self.goto = lrtab.lr_goto
+self.errorfunc = errorf
+self.set_defaulted_states()
+self.errorok = True
+def errok(self):
+self.errorok = True
+def restart(self):
+del self.statestack[:]
+del self.symstack[:]
+sym = YaccSymbol()
+sym.type = '$end'
+self.symstack.append(sym)
+self.statestack.append(0)
+# Defaulted state support.
+# This method identifies parser states where there is only one possible reduction action.
+# For such states, the parser can make a choose to make a rule reduction without consuming
+# the next look-ahead token.  This delayed invocation of the tokenizer can be useful in
+# certain kinds of advanced parsing situations where the lexer and parser interact with
+# each other or change states (i.e., manipulation of scope, lexer states, etc.).
+#
+# See:  http://www.gnu.org/software/bison/manual/html_node/Default-Reductions.html#Default-Reductions
+def set_defaulted_states(self):
+self.defaulted_states = {}
+for state, actions in self.action.items():
+rules = list(actions.values())
+if len(rules) == 1 and rules[0] < 0:
+self.defaulted_states[state] = rules[0]
+def disable_defaulted_states(self):
+self.defaulted_states = {}
+def parse(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None):
+if debug or yaccdevel:
+if isinstance(debug, int):
+debug = PlyLogger(sys.stderr)
+return self.parsedebug(input, lexer, debug, tracking, tokenfunc)
+elif tracking:
+return self.parseopt(input, lexer, debug, tracking, tokenfunc)
+else:
+return self.parseopt_notrack(input, lexer, debug, tracking, tokenfunc)
+# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+# parsedebug().
+#
+# This is the debugging enabled version of parse().  All changes made to the
+# parsing engine should be made here.   Optimized versions of this function
+# are automatically created by the ply/ygen.py script.  This script cuts out
+# sections enclosed in markers such as this:
+#
+#      #--! DEBUG
+#      statements
+#      #--! DEBUG
+#
+# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+def parsedebug(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None):
+#--! parsedebug-start
+lookahead = None                         # Current lookahead symbol
+lookaheadstack = []                      # Stack of lookahead symbols
+actions = self.action                    # Local reference to action table (to avoid lookup on self.)
+goto    = self.goto                      # Local reference to goto table (to avoid lookup on self.)
+prod    = self.productions               # Local reference to production list (to avoid lookup on self.)
+defaulted_states = self.defaulted_states # Local reference to defaulted states
+pslice  = YaccProduction(None)           # Production object passed to grammar rules
+errorcount = 0                           # Used during error recovery
+#--! DEBUG
+debug.info('PLY: PARSE DEBUG START')
+#--! DEBUG
+# If no lexer was given, we will try to use the lex module
+if not lexer:
+from . import lex
+lexer = lex.lexer
+# Set up the lexer and parser objects on pslice
+pslice.lexer = lexer
+pslice.parser = self
+# If input was supplied, pass to lexer
+if input is not None:
+lexer.input(input)
+if tokenfunc is None:
+# Tokenize function
+get_token = lexer.token
+else:
+get_token = tokenfunc
+# Set the parser() token method (sometimes used in error recovery)
+self.token = get_token
+# Set up the state and symbol stacks
+statestack = []                # Stack of parsing states
+self.statestack = statestack
+symstack   = []                # Stack of grammar symbols
+self.symstack = symstack
+pslice.stack = symstack         # Put in the production
+errtoken   = None               # Err token
+# The start state is assumed to be (0,$end)
+statestack.append(0)
+sym = YaccSymbol()
+sym.type = '$end'
+symstack.append(sym)
+state = 0
+while True:
+# Get the next symbol on the input.  If a lookahead symbol
+# is already set, we just use that. Otherwise, we'll pull
+# the next token off of the lookaheadstack or from the lexer
+#--! DEBUG
+debug.debug('')
+debug.debug('State  : %s', state)
+#--! DEBUG
+if state not in defaulted_states:
+if not lookahead:
+if not lookaheadstack:
+lookahead = get_token()     # Get the next token
+else:
+lookahead = lookaheadstack.pop()
+if not lookahead:
+lookahead = YaccSymbol()
+lookahead.type = '$end'
+# Check the action table
+ltype = lookahead.type
+t = actions[state].get(ltype)
+else:
+t = defaulted_states[state]
+#--! DEBUG
+debug.debug('Defaulted state %s: Reduce using %d', state, -t)
+#--! DEBUG
+#--! DEBUG
+debug.debug('Stack  : %s',
+('%s . %s' % (' '.join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip())
+#--! DEBUG
+if t is not None:
+if t > 0:
+# shift a symbol on the stack
+statestack.append(t)
+state = t
+#--! DEBUG
+debug.debug('Action : Shift and goto state %s', t)
+#--! DEBUG
+symstack.append(lookahead)
+lookahead = None
+# Decrease error count on successful shift
+if errorcount:
+errorcount -= 1
+continue
+if t < 0:
+# reduce a symbol on the stack, emit a production
+p = prod[-t]
+pname = p.name
+plen  = p.len
+# Get production function
+sym = YaccSymbol()
+sym.type = pname       # Production name
+sym.value = None
+#--! DEBUG
+if plen:
+debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str,
+'['+','.join([format_stack_entry(_v.value) for _v in symstack[-plen:]])+']',
+goto[statestack[-1-plen]][pname])
+else:
+debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str, [],
+goto[statestack[-1]][pname])
+#--! DEBUG
+if plen:
+targ = symstack[-plen-1:]
+targ[0] = sym
+#--! TRACKING
+if tracking:
+t1 = targ[1]
+sym.lineno = t1.lineno
+sym.lexpos = t1.lexpos
+t1 = targ[-1]
+sym.endlineno = getattr(t1, 'endlineno', t1.lineno)
+sym.endlexpos = getattr(t1, 'endlexpos', t1.lexpos)
+#--! TRACKING
+# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+# The code enclosed in this section is duplicated
+# below as a performance optimization.  Make sure
+# changes get made in both locations.
+pslice.slice = targ
+try:
+# Call the grammar rule with our special slice object
+del symstack[-plen:]
+del statestack[-plen:]
+p.callable(pslice)
+#--! DEBUG
+debug.info('Result : %s', format_result(pslice[0]))
+#--! DEBUG
+symstack.append(sym)
+state = goto[statestack[-1]][pname]
+statestack.append(state)
+except SyntaxError:
+# If an error was set. Enter error recovery state
+lookaheadstack.append(lookahead)
+symstack.pop()
+statestack.pop()
+state = statestack[-1]
+sym.type = 'error'
+lookahead = sym
+errorcount = error_count
+self.errorok = False
+continue
+# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+else:
+#--! TRACKING
+if tracking:
+sym.lineno = lexer.lineno
+sym.lexpos = lexer.lexpos
+#--! TRACKING
+targ = [sym]
+# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+# The code enclosed in this section is duplicated
+# above as a performance optimization.  Make sure
+# changes get made in both locations.
+pslice.slice = targ
+try:
+# Call the grammar rule with our special slice object
+p.callable(pslice)
+#--! DEBUG
+debug.info('Result : %s', format_result(pslice[0]))
+#--! DEBUG
+symstack.append(sym)
+state = goto[statestack[-1]][pname]
+statestack.append(state)
+except SyntaxError:
+# If an error was set. Enter error recovery state
+lookaheadstack.append(lookahead)
+symstack.pop()
+statestack.pop()
+state = statestack[-1]
+sym.type = 'error'
+lookahead = sym
+errorcount = error_count
+self.errorok = False
+continue
+# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+if t == 0:
+n = symstack[-1]
+result = getattr(n, 'value', None)
+#--! DEBUG
+debug.info('Done   : Returning %s', format_result(result))
+debug.info('PLY: PARSE DEBUG END')
+#--! DEBUG
+return result
+if t is None:
+#--! DEBUG
+debug.error('Error  : %s',
+('%s . %s' % (' '.join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip())
+#--! DEBUG
+# We have some kind of parsing error here.  To handle
+# this, we are going to push the current token onto
+# the tokenstack and replace it with an 'error' token.
+# If there are any synchronization rules, they may
+# catch it.
+#
+# In addition to pushing the error token, we call call
+# the user defined p_error() function if this is the
+# first syntax error.  This function is only called if
+# errorcount == 0.
+if errorcount == 0 or self.errorok:
+errorcount = error_count
+self.errorok = False
+errtoken = lookahead
+if errtoken.type == '$end':
+errtoken = None               # End of file!
+if self.errorfunc:
+if errtoken and not hasattr(errtoken, 'lexer'):
+errtoken.lexer = lexer
+tok = call_errorfunc(self.errorfunc, errtoken, self)
+if self.errorok:
+# User must have done some kind of panic
+# mode recovery on their own.  The
+# returned token is the next lookahead
+lookahead = tok
+errtoken = None
+continue
+else:
+if errtoken:
+if hasattr(errtoken, 'lineno'):
+lineno = lookahead.lineno
+else:
+lineno = 0
+if lineno:
+sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type))
+else:
+sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type)
+else:
+sys.stderr.write('yacc: Parse error in input. EOF\n')
+return
+else:
+errorcount = error_count
+# case 1:  the statestack only has 1 entry on it.  If we're in this state, the
+# entire parse has been rolled back and we're completely hosed.   The token is
+# discarded and we just keep going.
+if len(statestack) <= 1 and lookahead.type != '$end':
+lookahead = None
+errtoken = None
+state = 0
+# Nuke the pushback stack
+del lookaheadstack[:]
+continue
+# case 2: the statestack has a couple of entries on it, but we're
+# at the end of the file. nuke the top entry and generate an error token
+# Start nuking entries on the stack
+if lookahead.type == '$end':
+# Whoa. We're really hosed here. Bail out
+return
+if lookahead.type != 'error':
+sym = symstack[-1]
+if sym.type == 'error':
+# Hmmm. Error is on top of stack, we'll just nuke input
+# symbol and continue
+#--! TRACKING
+if tracking:
+sym.endlineno = getattr(lookahead, 'lineno', sym.lineno)
+sym.endlexpos = getattr(lookahead, 'lexpos', sym.lexpos)
+#--! TRACKING
+lookahead = None
+continue
+# Create the error symbol for the first time and make it the new lookahead symbol
+t = YaccSymbol()
+t.type = 'error'
+if hasattr(lookahead, 'lineno'):
+t.lineno = t.endlineno = lookahead.lineno
+if hasattr(lookahead, 'lexpos'):
+t.lexpos = t.endlexpos = lookahead.lexpos
+t.value = lookahead
+lookaheadstack.append(lookahead)
+lookahead = t
+else:
+sym = symstack.pop()
+#--! TRACKING
+if tracking:
+lookahead.lineno = sym.lineno
+lookahead.lexpos = sym.lexpos
+#--! TRACKING
+statestack.pop()
+state = statestack[-1]
+continue
+# Call an error function here
+raise RuntimeError('yacc: internal parser error!!!\n')
+#--! parsedebug-end
+# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+# parseopt().
+#
+# Optimized version of parse() method.  DO NOT EDIT THIS CODE DIRECTLY!
+# This code is automatically generated by the ply/ygen.py script. Make
+# changes to the parsedebug() method instead.
+# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+def parseopt(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None):
+#--! parseopt-start
+lookahead = None                         # Current lookahead symbol
+lookaheadstack = []                      # Stack of lookahead symbols
+actions = self.action                    # Local reference to action table (to avoid lookup on self.)
+goto    = self.goto                      # Local reference to goto table (to avoid lookup on self.)
+prod    = self.productions               # Local reference to production list (to avoid lookup on self.)
+defaulted_states = self.defaulted_states # Local reference to defaulted states
+pslice  = YaccProduction(None)           # Production object passed to grammar rules
+errorcount = 0                           # Used during error recovery
+# If no lexer was given, we will try to use the lex module
+if not lexer:
+from . import lex
+lexer = lex.lexer
+# Set up the lexer and parser objects on pslice
+pslice.lexer = lexer
+pslice.parser = self
+# If input was supplied, pass to lexer
+if input is not None:
+lexer.input(input)
+if tokenfunc is None:
+# Tokenize function
+get_token = lexer.token
+else:
+get_token = tokenfunc
+# Set the parser() token method (sometimes used in error recovery)
+self.token = get_token
+# Set up the state and symbol stacks
+statestack = []                # Stack of parsing states
+self.statestack = statestack
+symstack   = []                # Stack of grammar symbols
+self.symstack = symstack
+pslice.stack = symstack         # Put in the production
+errtoken   = None               # Err token
+# The start state is assumed to be (0,$end)
+statestack.append(0)
+sym = YaccSymbol()
+sym.type = '$end'
+symstack.append(sym)
+state = 0
+while True:
+# Get the next symbol on the input.  If a lookahead symbol
+# is already set, we just use that. Otherwise, we'll pull
+# the next token off of the lookaheadstack or from the lexer
+if state not in defaulted_states:
+if not lookahead:
+if not lookaheadstack:
+lookahead = get_token()     # Get the next token
+else:
+lookahead = lookaheadstack.pop()
+if not lookahead:
+lookahead = YaccSymbol()
+lookahead.type = '$end'
+# Check the action table
+ltype = lookahead.type
+t = actions[state].get(ltype)
+else:
+t = defaulted_states[state]
+if t is not None:
+if t > 0:
+# shift a symbol on the stack
+statestack.append(t)
+state = t
+symstack.append(lookahead)
+lookahead = None
+# Decrease error count on successful shift
+if errorcount:
+errorcount -= 1
+continue
+if t < 0:
+# reduce a symbol on the stack, emit a production
+p = prod[-t]
+pname = p.name
+plen  = p.len
+# Get production function
+sym = YaccSymbol()
+sym.type = pname       # Production name
+sym.value = None
+if plen:
+targ = symstack[-plen-1:]
+targ[0] = sym
+#--! TRACKING
+if tracking:
+t1 = targ[1]
+sym.lineno = t1.lineno
+sym.lexpos = t1.lexpos
+t1 = targ[-1]
+sym.endlineno = getattr(t1, 'endlineno', t1.lineno)
+sym.endlexpos = getattr(t1, 'endlexpos', t1.lexpos)
+#--! TRACKING
+# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+# The code enclosed in this section is duplicated
+# below as a performance optimization.  Make sure
+# changes get made in both locations.
+pslice.slice = targ
+try:
+# Call the grammar rule with our special slice object
+del symstack[-plen:]
+del statestack[-plen:]
+p.callable(pslice)
+symstack.append(sym)
+state = goto[statestack[-1]][pname]
+statestack.append(state)
+except SyntaxError:
+# If an error was set. Enter error recovery state
+lookaheadstack.append(lookahead)
+symstack.pop()
+statestack.pop()
+state = statestack[-1]
+sym.type = 'error'
+lookahead = sym
+errorcount = error_count
+self.errorok = False
+continue
+# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+else:
+#--! TRACKING
+if tracking:
+sym.lineno = lexer.lineno
+sym.lexpos = lexer.lexpos
+#--! TRACKING
+targ = [sym]
+# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+# The code enclosed in this section is duplicated
+# above as a performance optimization.  Make sure
+# changes get made in both locations.
+pslice.slice = targ
+try:
+# Call the grammar rule with our special slice object
+p.callable(pslice)
+symstack.append(sym)
+state = goto[statestack[-1]][pname]
+statestack.append(state)
+except SyntaxError:
+# If an error was set. Enter error recovery state
+lookaheadstack.append(lookahead)
+symstack.pop()
+statestack.pop()
+state = statestack[-1]
+sym.type = 'error'
+lookahead = sym
+errorcount = error_count
+self.errorok = False
+continue
+# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+if t == 0:
+n = symstack[-1]
+result = getattr(n, 'value', None)
+return result
+if t is None:
+# We have some kind of parsing error here.  To handle
+# this, we are going to push the current token onto
+# the tokenstack and replace it with an 'error' token.
+# If there are any synchronization rules, they may
+# catch it.
+#
+# In addition to pushing the error token, we call call
+# the user defined p_error() function if this is the
+# first syntax error.  This function is only called if
+# errorcount == 0.
+if errorcount == 0 or self.errorok:
+errorcount = error_count
+self.errorok = False
+errtoken = lookahead
+if errtoken.type == '$end':
+errtoken = None               # End of file!
+if self.errorfunc:
+if errtoken and not hasattr(errtoken, 'lexer'):
+errtoken.lexer = lexer
+tok = call_errorfunc(self.errorfunc, errtoken, self)
+if self.errorok:
+# User must have done some kind of panic
+# mode recovery on their own.  The
+# returned token is the next lookahead
+lookahead = tok
+errtoken = None
+continue
+else:
+if errtoken:
+if hasattr(errtoken, 'lineno'):
+lineno = lookahead.lineno
+else:
+lineno = 0
+if lineno:
+sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type))
+else:
+sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type)
+else:
+sys.stderr.write('yacc: Parse error in input. EOF\n')
+return
+else:
+errorcount = error_count
+# case 1:  the statestack only has 1 entry on it.  If we're in this state, the
+# entire parse has been rolled back and we're completely hosed.   The token is
+# discarded and we just keep going.
+if len(statestack) <= 1 and lookahead.type != '$end':
+lookahead = None
+errtoken = None
+state = 0
+# Nuke the pushback stack
+del lookaheadstack[:]
+continue
+# case 2: the statestack has a couple of entries on it, but we're
+# at the end of the file. nuke the top entry and generate an error token
+# Start nuking entries on the stack
+if lookahead.type == '$end':
+# Whoa. We're really hosed here. Bail out
+return
+if lookahead.type != 'error':
+sym = symstack[-1]
+if sym.type == 'error':
+# Hmmm. Error is on top of stack, we'll just nuke input
+# symbol and continue
+#--! TRACKING
+if tracking:
+sym.endlineno = getattr(lookahead, 'lineno', sym.lineno)
+sym.endlexpos = getattr(lookahead, 'lexpos', sym.lexpos)
+#--! TRACKING
+lookahead = None
+continue
+# Create the error symbol for the first time and make it the new lookahead symbol
+t = YaccSymbol()
+t.type = 'error'
+if hasattr(lookahead, 'lineno'):
+t.lineno = t.endlineno = lookahead.lineno
+if hasattr(lookahead, 'lexpos'):
+t.lexpos = t.endlexpos = lookahead.lexpos
+t.value = lookahead
+lookaheadstack.append(lookahead)
+lookahead = t
+else:
+sym = symstack.pop()
+#--! TRACKING
+if tracking:
+lookahead.lineno = sym.lineno
+lookahead.lexpos = sym.lexpos
+#--! TRACKING
+statestack.pop()
+state = statestack[-1]
+continue
+# Call an error function here
+raise RuntimeError('yacc: internal parser error!!!\n')
+#--! parseopt-end
+# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+# parseopt_notrack().
+#
+# Optimized version of parseopt() with line number tracking removed.
+# DO NOT EDIT THIS CODE DIRECTLY. This code is automatically generated
+# by the ply/ygen.py script. Make changes to the parsedebug() method instead.
+# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+def parseopt_notrack(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None):
+#--! parseopt-notrack-start
+lookahead = None                         # Current lookahead symbol
+lookaheadstack = []                      # Stack of lookahead symbols
+actions = self.action                    # Local reference to action table (to avoid lookup on self.)
+goto    = self.goto                      # Local reference to goto table (to avoid lookup on self.)
+prod    = self.productions               # Local reference to production list (to avoid lookup on self.)
+defaulted_states = self.defaulted_states # Local reference to defaulted states
+pslice  = YaccProduction(None)           # Production object passed to grammar rules
+errorcount = 0                           # Used during error recovery
+# If no lexer was given, we will try to use the lex module
+if not lexer:
+from . import lex
+lexer = lex.lexer
+# Set up the lexer and parser objects on pslice
+pslice.lexer = lexer
+pslice.parser = self
+# If input was supplied, pass to lexer
+if input is not None:
+lexer.input(input)
+if tokenfunc is None:
+# Tokenize function
+get_token = lexer.token
+else:
+get_token = tokenfunc
+# Set the parser() token method (sometimes used in error recovery)
+self.token = get_token
+# Set up the state and symbol stacks
+statestack = []                # Stack of parsing states
+self.statestack = statestack
+symstack   = []                # Stack of grammar symbols
+self.symstack = symstack
+pslice.stack = symstack         # Put in the production
+errtoken   = None               # Err token
+# The start state is assumed to be (0,$end)
+statestack.append(0)
+sym = YaccSymbol()
+sym.type = '$end'
+symstack.append(sym)
+state = 0
+while True:
+# Get the next symbol on the input.  If a lookahead symbol
+# is already set, we just use that. Otherwise, we'll pull
+# the next token off of the lookaheadstack or from the lexer
+if state not in defaulted_states:
+if not lookahead:
+if not lookaheadstack:
+lookahead = get_token()     # Get the next token
+else:
+lookahead = lookaheadstack.pop()
+if not lookahead:
+lookahead = YaccSymbol()
+lookahead.type = '$end'
+# Check the action table
+ltype = lookahead.type
+t = actions[state].get(ltype)
+else:
+t = defaulted_states[state]
+if t is not None:
+if t > 0:
+# shift a symbol on the stack
+statestack.append(t)
+state = t
+symstack.append(lookahead)
+lookahead = None
+# Decrease error count on successful shift
+if errorcount:
+errorcount -= 1
+continue
+if t < 0:
+# reduce a symbol on the stack, emit a production
+p = prod[-t]
+pname = p.name
+plen  = p.len
+# Get production function
+sym = YaccSymbol()
+sym.type = pname       # Production name
+sym.value = None
+if plen:
+targ = symstack[-plen-1:]
+targ[0] = sym
+# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+# The code enclosed in this section is duplicated
+# below as a performance optimization.  Make sure
+# changes get made in both locations.
+pslice.slice = targ
+try:
+# Call the grammar rule with our special slice object
+del symstack[-plen:]
+del statestack[-plen:]
+p.callable(pslice)
+symstack.append(sym)
+state = goto[statestack[-1]][pname]
+statestack.append(state)
+except SyntaxError:
+# If an error was set. Enter error recovery state
+lookaheadstack.append(lookahead)
+symstack.pop()
+statestack.pop()
+state = statestack[-1]
+sym.type = 'error'
+lookahead = sym
+errorcount = error_count
+self.errorok = False
+continue
+# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+else:
+targ = [sym]
+# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+# The code enclosed in this section is duplicated
+# above as a performance optimization.  Make sure
+# changes get made in both locations.
+pslice.slice = targ
+try:
+# Call the grammar rule with our special slice object
+p.callable(pslice)
+symstack.append(sym)
+state = goto[statestack[-1]][pname]
+statestack.append(state)
+except SyntaxError:
+# If an error was set. Enter error recovery state
+lookaheadstack.append(lookahead)
+symstack.pop()
+statestack.pop()
+state = statestack[-1]
+sym.type = 'error'
+lookahead = sym
+errorcount = error_count
+self.errorok = False
+continue
+# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+if t == 0:
+n = symstack[-1]
+result = getattr(n, 'value', None)
+return result
+if t is None:
+# We have some kind of parsing error here.  To handle
+# this, we are going to push the current token onto
+# the tokenstack and replace it with an 'error' token.
+# If there are any synchronization rules, they may
+# catch it.
+#
+# In addition to pushing the error token, we call call
+# the user defined p_error() function if this is the
+# first syntax error.  This function is only called if
+# errorcount == 0.
+if errorcount == 0 or self.errorok:
+errorcount = error_count
+self.errorok = False
+errtoken = lookahead
+if errtoken.type == '$end':
+errtoken = None               # End of file!
+if self.errorfunc:
+if errtoken and not hasattr(errtoken, 'lexer'):
+errtoken.lexer = lexer
+tok = call_errorfunc(self.errorfunc, errtoken, self)
+if self.errorok:
+# User must have done some kind of panic
+# mode recovery on their own.  The
+# returned token is the next lookahead
+lookahead = tok
+errtoken = None
+continue
+else:
+if errtoken:
+if hasattr(errtoken, 'lineno'):
+lineno = lookahead.lineno
+else:
+lineno = 0
+if lineno:
+sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type))
+else:
+sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type)
+else:
+sys.stderr.write('yacc: Parse error in input. EOF\n')
+return
+else:
+errorcount = error_count
+# case 1:  the statestack only has 1 entry on it.  If we're in this state, the
+# entire parse has been rolled back and we're completely hosed.   The token is
+# discarded and we just keep going.
+if len(statestack) <= 1 and lookahead.type != '$end':
+lookahead = None
+errtoken = None
+state = 0
+# Nuke the pushback stack
+del lookaheadstack[:]
+continue
+# case 2: the statestack has a couple of entries on it, but we're
+# at the end of the file. nuke the top entry and generate an error token
+# Start nuking entries on the stack
+if lookahead.type == '$end':
+# Whoa. We're really hosed here. Bail out
+return
+if lookahead.type != 'error':
+sym = symstack[-1]
+if sym.type == 'error':
+# Hmmm. Error is on top of stack, we'll just nuke input
+# symbol and continue
+lookahead = None
+continue
+# Create the error symbol for the first time and make it the new lookahead symbol
+t = YaccSymbol()
+t.type = 'error'
+if hasattr(lookahead, 'lineno'):
+t.lineno = t.endlineno = lookahead.lineno
+if hasattr(lookahead, 'lexpos'):
+t.lexpos = t.endlexpos = lookahead.lexpos
+t.value = lookahead
+lookaheadstack.append(lookahead)
+lookahead = t
+else:
+sym = symstack.pop()
+statestack.pop()
+state = statestack[-1]
+continue
+# Call an error function here
+raise RuntimeError('yacc: internal parser error!!!\n')
+#--! parseopt-notrack-end
+# -----------------------------------------------------------------------------
+#                          === Grammar Representation ===
+#
+# The following functions, classes, and variables are used to represent and
+# manipulate the rules that make up a grammar.
+# -----------------------------------------------------------------------------
+# regex matching identifiers
+_is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$')
+# -----------------------------------------------------------------------------
+# class Production:
+#
+# This class stores the raw information about a single production or grammar rule.
+# A grammar rule refers to a specification such as this:
+#
+#       expr : expr PLUS term
+#
+# Here are the basic attributes defined on all productions
+#
+#       name     - Name of the production.  For example 'expr'
+#       prod     - A list of symbols on the right side ['expr','PLUS','term']
+#       prec     - Production precedence level
+#       number   - Production number.
+#       func     - Function that executes on reduce
+#       file     - File where production function is defined
+#       lineno   - Line number where production function is defined
+#
+# The following attributes are defined or optional.
+#
+#       len       - Length of the production (number of symbols on right hand side)
+#       usyms     - Set of unique symbols found in the production
+# -----------------------------------------------------------------------------
+class Production(object):
+reduced = 0
+def __init__(self, number, name, prod, precedence=('right', 0), func=None, file='', line=0):
+self.name     = name
+self.prod     = tuple(prod)
+self.number   = number
+self.func     = func
+self.callable = None
+self.file     = file
+self.line     = line
+self.prec     = precedence
+# Internal settings used during table construction
+self.len  = len(self.prod)   # Length of the production
+# Create a list of unique production symbols used in the production
+self.usyms = []
+for s in self.prod:
+if s not in self.usyms:
+self.usyms.append(s)
+# List of all LR items for the production
+self.lr_items = []
+self.lr_next = None
+# Create a string representation
+if self.prod:
+self.str = '%s -> %s' % (self.name, ' '.join(self.prod))
+else:
+self.str = '%s -> <empty>' % self.name
+def __str__(self):
+return self.str
+def __repr__(self):
+return 'Production(' + str(self) + ')'
+def __len__(self):
+return len(self.prod)
+def __nonzero__(self):
+return 1
+def __getitem__(self, index):
+return self.prod[index]
+# Return the nth lr_item from the production (or None if at the end)
+def lr_item(self, n):
+if n > len(self.prod):
+return None
+p = LRItem(self, n)
+# Precompute the list of productions immediately following.
+try:
+p.lr_after = Prodnames[p.prod[n+1]]
+except (IndexError, KeyError):
+p.lr_after = []
+try:
+p.lr_before = p.prod[n-1]
+except IndexError:
+p.lr_before = None
+return p
+# Bind the production function name to a callable
+def bind(self, pdict):
+if self.func:
+self.callable = pdict[self.func]
+# This class serves as a minimal standin for Production objects when
+# reading table data from files.   It only contains information
+# actually used by the LR parsing engine, plus some additional
+# debugging information.
+class MiniProduction(object):
+def __init__(self, str, name, len, func, file, line):
+self.name     = name
+self.len      = len
+self.func     = func
+self.callable = None
+self.file     = file
+self.line     = line
+self.str      = str
+def __str__(self):
+return self.str
+def __repr__(self):
+return 'MiniProduction(%s)' % self.str
+# Bind the production function name to a callable
+def bind(self, pdict):
+if self.func:
+self.callable = pdict[self.func]
+# -----------------------------------------------------------------------------
+# class LRItem
+#
+# This class represents a specific stage of parsing a production rule.  For
+# example:
+#
+#       expr : expr . PLUS term
+#
+# In the above, the "." represents the current location of the parse.  Here
+# basic attributes:
+#
+#       name       - Name of the production.  For example 'expr'
+#       prod       - A list of symbols on the right side ['expr','.', 'PLUS','term']
+#       number     - Production number.
+#
+#       lr_next      Next LR item. Example, if we are ' expr -> expr . PLUS term'
+#                    then lr_next refers to 'expr -> expr PLUS . term'
+#       lr_index   - LR item index (location of the ".") in the prod list.
+#       lookaheads - LALR lookahead symbols for this item
+#       len        - Length of the production (number of symbols on right hand side)
+#       lr_after    - List of all productions that immediately follow
+#       lr_before   - Grammar symbol immediately before
+# -----------------------------------------------------------------------------
+class LRItem(object):
+def __init__(self, p, n):
+self.name       = p.name
+self.prod       = list(p.prod)
+self.number     = p.number
+self.lr_index   = n
+self.lookaheads = {}
+self.prod.insert(n, '.')
+self.prod       = tuple(self.prod)
+self.len        = len(self.prod)
+self.usyms      = p.usyms
+def __str__(self):
+if self.prod:
+s = '%s -> %s' % (self.name, ' '.join(self.prod))
+else:
+s = '%s -> <empty>' % self.name
+return s
+def __repr__(self):
+return 'LRItem(' + str(self) + ')'
+# -----------------------------------------------------------------------------
+# rightmost_terminal()
+#
+# Return the rightmost terminal from a list of symbols.  Used in add_production()
+# -----------------------------------------------------------------------------
+def rightmost_terminal(symbols, terminals):
+i = len(symbols) - 1
+while i >= 0:
+if symbols[i] in terminals:
+return symbols[i]
+i -= 1
+return None
+# -----------------------------------------------------------------------------
+#                           === GRAMMAR CLASS ===
+#
+# The following class represents the contents of the specified grammar along
+# with various computed properties such as first sets, follow sets, LR items, etc.
+# This data is used for critical parts of the table generation process later.
+# -----------------------------------------------------------------------------
+class GrammarError(YaccError):
+pass
+class Grammar(object):
+def __init__(self, terminals):
+self.Productions  = [None]  # A list of all of the productions.  The first
+# entry is always reserved for the purpose of
+# building an augmented grammar
+self.Prodnames    = {}      # A dictionary mapping the names of nonterminals to a list of all
+# productions of that nonterminal.
+self.Prodmap      = {}      # A dictionary that is only used to detect duplicate
+# productions.
+self.Terminals    = {}      # A dictionary mapping the names of terminal symbols to a
+# list of the rules where they are used.
+for term in terminals:
+self.Terminals[term] = []
+self.Terminals['error'] = []
+self.Nonterminals = {}      # A dictionary mapping names of nonterminals to a list
+# of rule numbers where they are used.
+self.First        = {}      # A dictionary of precomputed FIRST(x) symbols
+self.Follow       = {}      # A dictionary of precomputed FOLLOW(x) symbols
+self.Precedence   = {}      # Precedence rules for each terminal. Contains tuples of the
+# form ('right',level) or ('nonassoc', level) or ('left',level)
+self.UsedPrecedence = set() # Precedence rules that were actually used by the grammer.
+# This is only used to provide error checking and to generate
+# a warning about unused precedence rules.
+self.Start = None           # Starting symbol for the grammar
+def __len__(self):
+return len(self.Productions)
+def __getitem__(self, index):
+return self.Productions[index]
+# -----------------------------------------------------------------------------
+# set_precedence()
+#
+# Sets the precedence for a given terminal. assoc is the associativity such as
+# 'left','right', or 'nonassoc'.  level is a numeric level.
+#
+# -----------------------------------------------------------------------------
+def set_precedence(self, term, assoc, level):
+assert self.Productions == [None], 'Must call set_precedence() before add_production()'
+if term in self.Precedence:
+raise GrammarError('Precedence already specified for terminal %r' % term)
+if assoc not in ['left', 'right', 'nonassoc']:
+raise GrammarError("Associativity must be one of 'left','right', or 'nonassoc'")
+self.Precedence[term] = (assoc, level)
+# -----------------------------------------------------------------------------
+# add_production()
+#
+# Given an action function, this function assembles a production rule and
+# computes its precedence level.
+#
+# The production rule is supplied as a list of symbols.   For example,
+# a rule such as 'expr : expr PLUS term' has a production name of 'expr' and
+# symbols ['expr','PLUS','term'].
+#
+# Precedence is determined by the precedence of the right-most non-terminal
+# or the precedence of a terminal specified by %prec.
+#
+# A variety of error checks are performed to make sure production symbols
+# are valid and that %prec is used correctly.
+# -----------------------------------------------------------------------------
+def add_production(self, prodname, syms, func=None, file='', line=0):
+if prodname in self.Terminals:
+raise GrammarError('%s:%d: Illegal rule name %r. Already defined as a token' % (file, line, prodname))
+if prodname == 'error':
+raise GrammarError('%s:%d: Illegal rule name %r. error is a reserved word' % (file, line, prodname))
+if not _is_identifier.match(prodname):
+raise GrammarError('%s:%d: Illegal rule name %r' % (file, line, prodname))
+# Look for literal tokens
+for n, s in enumerate(syms):
+if s[0] in "'\"":
+try:
+c = eval(s)
+if (len(c) > 1):
+raise GrammarError('%s:%d: Literal token %s in rule %r may only be a single character' %
+(file, line, s, prodname))
+if c not in self.Terminals:
+self.Terminals[c] = []
+syms[n] = c
+continue
+except SyntaxError:
+pass
+if not _is_identifier.match(s) and s != '%prec':
+raise GrammarError('%s:%d: Illegal name %r in rule %r' % (file, line, s, prodname))
+# Determine the precedence level
+if '%prec' in syms:
+if syms[-1] == '%prec':
+raise GrammarError('%s:%d: Syntax error. Nothing follows %%prec' % (file, line))
+if syms[-2] != '%prec':
+raise GrammarError('%s:%d: Syntax error. %%prec can only appear at the end of a grammar rule' %
+(file, line))
+precname = syms[-1]
+prodprec = self.Precedence.get(precname)
+if not prodprec:
+raise GrammarError('%s:%d: Nothing known about the precedence of %r' % (file, line, precname))
+else:
+self.UsedPrecedence.add(precname)
+del syms[-2:]     # Drop %prec from the rule
+else:
+# If no %prec, precedence is determined by the rightmost terminal symbol
+precname = rightmost_terminal(syms, self.Terminals)
+prodprec = self.Precedence.get(precname, ('right', 0))
+# See if the rule is already in the rulemap
+map = '%s -> %s' % (prodname, syms)
+if map in self.Prodmap:
+m = self.Prodmap[map]
+raise GrammarError('%s:%d: Duplicate rule %s. ' % (file, line, m) +
+'Previous definition at %s:%d' % (m.file, m.line))
+# From this point on, everything is valid.  Create a new Production instance
+pnumber  = len(self.Productions)
+if prodname not in self.Nonterminals:
+self.Nonterminals[prodname] = []
+# Add the production number to Terminals and Nonterminals
+for t in syms:
+if t in self.Terminals:
+self.Terminals[t].append(pnumber)
+else:
+if t not in self.Nonterminals:
+self.Nonterminals[t] = []
+self.Nonterminals[t].append(pnumber)
+# Create a production and add it to the list of productions
+p = Production(pnumber, prodname, syms, prodprec, func, file, line)
+self.Productions.append(p)
+self.Prodmap[map] = p
+# Add to the global productions list
+try:
+self.Prodnames[prodname].append(p)
+except KeyError:
+self.Prodnames[prodname] = [p]
+# -----------------------------------------------------------------------------
+# set_start()
+#
+# Sets the starting symbol and creates the augmented grammar.  Production
+# rule 0 is S' -> start where start is the start symbol.
+# -----------------------------------------------------------------------------
+def set_start(self, start=None):
+if not start:
+start = self.Productions[1].name
+if start not in self.Nonterminals:
+raise GrammarError('start symbol %s undefined' % start)
+self.Productions[0] = Production(0, "S'", [start])
+self.Nonterminals[start].append(0)
+self.Start = start
+# -----------------------------------------------------------------------------
+# find_unreachable()
+#
+# Find all of the nonterminal symbols that can't be reached from the starting
+# symbol.  Returns a list of nonterminals that can't be reached.
+# -----------------------------------------------------------------------------
+def find_unreachable(self):
+# Mark all symbols that are reachable from a symbol s
+def mark_reachable_from(s):
+if s in reachable:
+return
+reachable.add(s)
+for p in self.Prodnames.get(s, []):
+for r in p.prod:
+mark_reachable_from(r)
+reachable = set()
+mark_reachable_from(self.Productions[0].prod[0])
+return [s for s in self.Nonterminals if s not in reachable]
+# -----------------------------------------------------------------------------
+# infinite_cycles()
+#
+# This function looks at the various parsing rules and tries to detect
+# infinite recursion cycles (grammar rules where there is no possible way
+# to derive a string of only terminals).
+# -----------------------------------------------------------------------------
+def infinite_cycles(self):
+terminates = {}
+# Terminals:
+for t in self.Terminals:
+terminates[t] = True
+terminates['$end'] = True
+# Nonterminals:
+# Initialize to false:
+for n in self.Nonterminals:
+terminates[n] = False
+# Then propagate termination until no change:
+while True:
+some_change = False
+for (n, pl) in self.Prodnames.items():
+# Nonterminal n terminates iff any of its productions terminates.
+for p in pl:
+# Production p terminates iff all of its rhs symbols terminate.
+for s in p.prod:
+if not terminates[s]:
+# The symbol s does not terminate,
+# so production p does not terminate.
+p_terminates = False
+break
+else:
+# didn't break from the loop,
+# so every symbol s terminates
+# so production p terminates.
+p_terminates = True
+if p_terminates:
+# symbol n terminates!
+if not terminates[n]:
+terminates[n] = True
+some_change = True
+# Don't need to consider any more productions for this n.
+break
+if not some_change:
+break
+infinite = []
+for (s, term) in terminates.items():
+if not term:
+if s not in self.Prodnames and s not in self.Terminals and s != 'error':
+# s is used-but-not-defined, and we've already warned of that,
+# so it would be overkill to say that it's also non-terminating.
+pass
+else:
+infinite.append(s)
+return infinite
+# -----------------------------------------------------------------------------
+# undefined_symbols()
+#
+# Find all symbols that were used the grammar, but not defined as tokens or
+# grammar rules.  Returns a list of tuples (sym, prod) where sym in the symbol
+# and prod is the production where the symbol was used.
+# -----------------------------------------------------------------------------
+def undefined_symbols(self):
+result = []
+for p in self.Productions:
+if not p:
+continue
+for s in p.prod:
+if s not in self.Prodnames and s not in self.Terminals and s != 'error':
+result.append((s, p))
+return result
+# -----------------------------------------------------------------------------
+# unused_terminals()
+#
+# Find all terminals that were defined, but not used by the grammar.  Returns
+# a list of all symbols.
+# -----------------------------------------------------------------------------
+def unused_terminals(self):
+unused_tok = []
+for s, v in self.Terminals.items():
+if s != 'error' and not v:
+unused_tok.append(s)
+return unused_tok
+# ------------------------------------------------------------------------------
+# unused_rules()
+#
+# Find all grammar rules that were defined,  but not used (maybe not reachable)
+# Returns a list of productions.
+# ------------------------------------------------------------------------------
+def unused_rules(self):
+unused_prod = []
+for s, v in self.Nonterminals.items():
+if not v:
+p = self.Prodnames[s][0]
+unused_prod.append(p)
+return unused_prod
+# -----------------------------------------------------------------------------
+# unused_precedence()
+#
+# Returns a list of tuples (term,precedence) corresponding to precedence
+# rules that were never used by the grammar.  term is the name of the terminal
+# on which precedence was applied and precedence is a string such as 'left' or
+# 'right' corresponding to the type of precedence.
+# -----------------------------------------------------------------------------
+def unused_precedence(self):
+unused = []
+for termname in self.Precedence:
+if not (termname in self.Terminals or termname in self.UsedPrecedence):
+unused.append((termname, self.Precedence[termname][0]))
+return unused
+# -------------------------------------------------------------------------
+# _first()
+#
+# Compute the value of FIRST1(beta) where beta is a tuple of symbols.
+#
+# During execution of compute_first1, the result may be incomplete.
+# Afterward (e.g., when called from compute_follow()), it will be complete.
+# -------------------------------------------------------------------------
+def _first(self, beta):
+# We are computing First(x1,x2,x3,...,xn)
+result = []
+for x in beta:
+x_produces_empty = False
+# Add all the non-<empty> symbols of First[x] to the result.
+for f in self.First[x]:
+if f == '<empty>':
+x_produces_empty = True
+else:
+if f not in result:
+result.append(f)
+if x_produces_empty:
+# We have to consider the next x in beta,
+# i.e. stay in the loop.
+pass
+else:
+# We don't have to consider any further symbols in beta.
+break
+else:
+# There was no 'break' from the loop,
+# so x_produces_empty was true for all x in beta,
+# so beta produces empty as well.
+result.append('<empty>')
+return result
+# -------------------------------------------------------------------------
+# compute_first()
+#
+# Compute the value of FIRST1(X) for all symbols
+# -------------------------------------------------------------------------
+def compute_first(self):
+if self.First:
+return self.First
+# Terminals:
+for t in self.Terminals:
+self.First[t] = [t]
+self.First['$end'] = ['$end']
+# Nonterminals:
+# Initialize to the empty set:
+for n in self.Nonterminals:
+self.First[n] = []
+# Then propagate symbols until no change:
+while True:
+some_change = False
+for n in self.Nonterminals:
+for p in self.Prodnames[n]:
+for f in self._first(p.prod):
+if f not in self.First[n]:
+self.First[n].append(f)
+some_change = True
+if not some_change:
+break
+return self.First
+# ---------------------------------------------------------------------
+# compute_follow()
+#
+# Computes all of the follow sets for every non-terminal symbol.  The
+# follow set is the set of all symbols that might follow a given
+# non-terminal.  See the Dragon book, 2nd Ed. p. 189.
+# ---------------------------------------------------------------------
+def compute_follow(self, start=None):
+# If already computed, return the result
+if self.Follow:
+return self.Follow
+# If first sets not computed yet, do that first.
+if not self.First:
+self.compute_first()
+# Add '$end' to the follow list of the start symbol
+for k in self.Nonterminals:
+self.Follow[k] = []
+if not start:
+start = self.Productions[1].name
+self.Follow[start] = ['$end']
+while True:
+didadd = False
+for p in self.Productions[1:]:
+# Here is the production set
+for i, B in enumerate(p.prod):
+if B in self.Nonterminals:
+# Okay. We got a non-terminal in a production
+fst = self._first(p.prod[i+1:])
+hasempty = False
+for f in fst:
+if f != '<empty>' and f not in self.Follow[B]:
+self.Follow[B].append(f)
+didadd = True
+if f == '<empty>':
+hasempty = True
+if hasempty or i == (len(p.prod)-1):
+# Add elements of follow(a) to follow(b)
+for f in self.Follow[p.name]:
+if f not in self.Follow[B]:
+self.Follow[B].append(f)
+didadd = True
+if not didadd:
+break
+return self.Follow
+# -----------------------------------------------------------------------------
+# build_lritems()
+#
+# This function walks the list of productions and builds a complete set of the
+# LR items.  The LR items are stored in two ways:  First, they are uniquely
+# numbered and placed in the list _lritems.  Second, a linked list of LR items
+# is built for each production.  For example:
+#
+#   E -> E PLUS E
+#
+# Creates the list
+#
+#  [E -> . E PLUS E, E -> E . PLUS E, E -> E PLUS . E, E -> E PLUS E . ]
+# -----------------------------------------------------------------------------
+def build_lritems(self):
+for p in self.Productions:
+lastlri = p
+i = 0
+lr_items = []
+while True:
+if i > len(p):
+lri = None
+else:
+lri = LRItem(p, i)
+# Precompute the list of productions immediately following
+try:
+lri.lr_after = self.Prodnames[lri.prod[i+1]]
+except (IndexError, KeyError):
+lri.lr_after = []
+try:
+lri.lr_before = lri.prod[i-1]
+except IndexError:
+lri.lr_before = None
+lastlri.lr_next = lri
+if not lri:
+break
+lr_items.append(lri)
+lastlri = lri
+i += 1
+p.lr_items = lr_items
+# -----------------------------------------------------------------------------
+#                            == Class LRTable ==
+#
+# This basic class represents a basic table of LR parsing information.
+# Methods for generating the tables are not defined here.  They are defined
+# in the derived class LRGeneratedTable.
+# -----------------------------------------------------------------------------
+class VersionError(YaccError):
+pass
+class LRTable(object):
+def __init__(self):
+self.lr_action = None
+self.lr_goto = None
+self.lr_productions = None
+self.lr_method = None
+def read_table(self, module):
+if isinstance(module, types.ModuleType):
+parsetab = module
+else:
+exec('import %s' % module)
+parsetab = sys.modules[module]
+if parsetab._tabversion != __tabversion__:
+raise VersionError('yacc table file version is out of date')
+self.lr_action = parsetab._lr_action
+self.lr_goto = parsetab._lr_goto
+self.lr_productions = []
+for p in parsetab._lr_productions:
+self.lr_productions.append(MiniProduction(*p))
+self.lr_method = parsetab._lr_method
+return parsetab._lr_signature
+def read_pickle(self, filename):
+try:
+import cPickle as pickle
+except ImportError:
+import pickle
+if not os.path.exists(filename):
+raise ImportError
+in_f = open(filename, 'rb')
+tabversion = pickle.load(in_f)
+if tabversion != __tabversion__:
+raise VersionError('yacc table file version is out of date')
+self.lr_method = pickle.load(in_f)
+signature      = pickle.load(in_f)
+self.lr_action = pickle.load(in_f)
+self.lr_goto   = pickle.load(in_f)
+productions    = pickle.load(in_f)
+self.lr_productions = []
+for p in productions:
+self.lr_productions.append(MiniProduction(*p))
+in_f.close()
+return signature
+# Bind all production function names to callable objects in pdict
+def bind_callables(self, pdict):
+for p in self.lr_productions:
+p.bind(pdict)
+# -----------------------------------------------------------------------------
+#                           === LR Generator ===
+#
+# The following classes and functions are used to generate LR parsing tables on
+# a grammar.
+# -----------------------------------------------------------------------------
+# -----------------------------------------------------------------------------
+# digraph()
+# traverse()
+#
+# The following two functions are used to compute set valued functions
+# of the form:
+#
+#     F(x) = F'(x) U U{F(y) | x R y}
+#
+# This is used to compute the values of Read() sets as well as FOLLOW sets
+# in LALR(1) generation.
+#
+# Inputs:  X    - An input set
+#          R    - A relation
+#          FP   - Set-valued function
+# ------------------------------------------------------------------------------
+def digraph(X, R, FP):
+N = {}
+for x in X:
+N[x] = 0
+stack = []
+F = {}
+for x in X:
+if N[x] == 0:
+traverse(x, N, stack, F, X, R, FP)
+return F
+def traverse(x, N, stack, F, X, R, FP):
+stack.append(x)
+d = len(stack)
+N[x] = d
+F[x] = FP(x)             # F(X) <- F'(x)
+rel = R(x)               # Get y's related to x
+for y in rel:
+if N[y] == 0:
+traverse(y, N, stack, F, X, R, FP)
+N[x] = min(N[x], N[y])
+for a in F.get(y, []):
+if a not in F[x]:
+F[x].append(a)
+if N[x] == d:
+N[stack[-1]] = MAXINT
+F[stack[-1]] = F[x]
+element = stack.pop()
+while element != x:
+N[stack[-1]] = MAXINT
+F[stack[-1]] = F[x]
+element = stack.pop()
+class LALRError(YaccError):
+pass
+# -----------------------------------------------------------------------------
+#                             == LRGeneratedTable ==
+#
+# This class implements the LR table generation algorithm.  There are no
+# public methods except for write()
+# -----------------------------------------------------------------------------
+class LRGeneratedTable(LRTable):
+def __init__(self, grammar, method='LALR', log=None):
+if method not in ['SLR', 'LALR']:
+raise LALRError('Unsupported method %s' % method)
+self.grammar = grammar
+self.lr_method = method
+# Set up the logger
+if not log:
+log = NullLogger()
+self.log = log
+# Internal attributes
+self.lr_action     = {}        # Action table
+self.lr_goto       = {}        # Goto table
+self.lr_productions  = grammar.Productions    # Copy of grammar Production array
+self.lr_goto_cache = {}        # Cache of computed gotos
+self.lr0_cidhash   = {}        # Cache of closures
+self._add_count    = 0         # Internal counter used to detect cycles
+# Diagonistic information filled in by the table generator
+self.sr_conflict   = 0
+self.rr_conflict   = 0
+self.conflicts     = []        # List of conflicts
+self.sr_conflicts  = []
+self.rr_conflicts  = []
+# Build the tables
+self.grammar.build_lritems()
+self.grammar.compute_first()
+self.grammar.compute_follow()
+self.lr_parse_table()
+# Compute the LR(0) closure operation on I, where I is a set of LR(0) items.
+def lr0_closure(self, I):
+self._add_count += 1
+# Add everything in I to J
+J = I[:]
+didadd = True
+while didadd:
+didadd = False
+for j in J:
+for x in j.lr_after:
+if getattr(x, 'lr0_added', 0) == self._add_count:
+continue
+# Add B --> .G to J
+J.append(x.lr_next)
+x.lr0_added = self._add_count
+didadd = True
+return J
+# Compute the LR(0) goto function goto(I,X) where I is a set
+# of LR(0) items and X is a grammar symbol.   This function is written
+# in a way that guarantees uniqueness of the generated goto sets
+# (i.e. the same goto set will never be returned as two different Python
+# objects).  With uniqueness, we can later do fast set comparisons using
+# id(obj) instead of element-wise comparison.
+def lr0_goto(self, I, x):
+# First we look for a previously cached entry
+g = self.lr_goto_cache.get((id(I), x))
+if g:
+return g
+# Now we generate the goto set in a way that guarantees uniqueness
+# of the result
+s = self.lr_goto_cache.get(x)
+if not s:
+s = {}
+self.lr_goto_cache[x] = s
+gs = []
+for p in I:
+n = p.lr_next
+if n and n.lr_before == x:
+s1 = s.get(id(n))
+if not s1:
+s1 = {}
+s[id(n)] = s1
+gs.append(n)
+s = s1
+g = s.get('$end')
+if not g:
+if gs:
+g = self.lr0_closure(gs)
+s['$end'] = g
+else:
+s['$end'] = gs
+self.lr_goto_cache[(id(I), x)] = g
+return g
+# Compute the LR(0) sets of item function
+def lr0_items(self):
+C = [self.lr0_closure([self.grammar.Productions[0].lr_next])]
+i = 0
+for I in C:
+self.lr0_cidhash[id(I)] = i
+i += 1
+# Loop over the items in C and each grammar symbols
+i = 0
+while i < len(C):
+I = C[i]
+i += 1
+# Collect all of the symbols that could possibly be in the goto(I,X) sets
+asyms = {}
+for ii in I:
+for s in ii.usyms:
+asyms[s] = None
+for x in asyms:
+g = self.lr0_goto(I, x)
+if not g or id(g) in self.lr0_cidhash:
+continue
+self.lr0_cidhash[id(g)] = len(C)
+C.append(g)
+return C
+# -----------------------------------------------------------------------------
+#                       ==== LALR(1) Parsing ====
+#
+# LALR(1) parsing is almost exactly the same as SLR except that instead of
+# relying upon Follow() sets when performing reductions, a more selective
+# lookahead set that incorporates the state of the LR(0) machine is utilized.
+# Thus, we mainly just have to focus on calculating the lookahead sets.
+#
+# The method used here is due to DeRemer and Pennelo (1982).
+#
+# DeRemer, F. L., and T. J. Pennelo: "Efficient Computation of LALR(1)
+#     Lookahead Sets", ACM Transactions on Programming Languages and Systems,
+#     Vol. 4, No. 4, Oct. 1982, pp. 615-649
+#
+# Further details can also be found in:
+#
+#  J. Tremblay and P. Sorenson, "The Theory and Practice of Compiler Writing",
+#      McGraw-Hill Book Company, (1985).
+#
+# -----------------------------------------------------------------------------
+# -----------------------------------------------------------------------------
+# compute_nullable_nonterminals()
+#
+# Creates a dictionary containing all of the non-terminals that might produce
+# an empty production.
+# -----------------------------------------------------------------------------
+def compute_nullable_nonterminals(self):
+nullable = set()
+num_nullable = 0
+while True:
+for p in self.grammar.Productions[1:]:
+if p.len == 0:
+nullable.add(p.name)
+continue
+for t in p.prod:
+if t not in nullable:
+break
+else:
+nullable.add(p.name)
+if len(nullable) == num_nullable:
+break
+num_nullable = len(nullable)
+return nullable
+# -----------------------------------------------------------------------------
+# find_nonterminal_trans(C)
+#
+# Given a set of LR(0) items, this functions finds all of the non-terminal
+# transitions.    These are transitions in which a dot appears immediately before
+# a non-terminal.   Returns a list of tuples of the form (state,N) where state
+# is the state number and N is the nonterminal symbol.
+#
+# The input C is the set of LR(0) items.
+# -----------------------------------------------------------------------------
+def find_nonterminal_transitions(self, C):
+trans = []
+for stateno, state in enumerate(C):
+for p in state:
+if p.lr_index < p.len - 1:
+t = (stateno, p.prod[p.lr_index+1])
+if t[1] in self.grammar.Nonterminals:
+if t not in trans:
+trans.append(t)
+return trans
+# -----------------------------------------------------------------------------
+# dr_relation()
+#
+# Computes the DR(p,A) relationships for non-terminal transitions.  The input
+# is a tuple (state,N) where state is a number and N is a nonterminal symbol.
+#
+# Returns a list of terminals.
+# -----------------------------------------------------------------------------
+def dr_relation(self, C, trans, nullable):
+dr_set = {}
+state, N = trans
+terms = []
+g = self.lr0_goto(C[state], N)
+for p in g:
+if p.lr_index < p.len - 1:
+a = p.prod[p.lr_index+1]
+if a in self.grammar.Terminals:
+if a not in terms:
+terms.append(a)
+# This extra bit is to handle the start state
+if state == 0 and N == self.grammar.Productions[0].prod[0]:
+terms.append('$end')
+return terms
+# -----------------------------------------------------------------------------
+# reads_relation()
+#
+# Computes the READS() relation (p,A) READS (t,C).
+# -----------------------------------------------------------------------------
+def reads_relation(self, C, trans, empty):
+# Look for empty transitions
+rel = []
+state, N = trans
+g = self.lr0_goto(C[state], N)
+j = self.lr0_cidhash.get(id(g), -1)
+for p in g:
+if p.lr_index < p.len - 1:
+a = p.prod[p.lr_index + 1]
+if a in empty:
+rel.append((j, a))
+return rel
+# -----------------------------------------------------------------------------
+# compute_lookback_includes()
+#
+# Determines the lookback and includes relations
+#
+# LOOKBACK:
+#
+# This relation is determined by running the LR(0) state machine forward.
+# For example, starting with a production "N : . A B C", we run it forward
+# to obtain "N : A B C ."   We then build a relationship between this final
+# state and the starting state.   These relationships are stored in a dictionary
+# lookdict.
+#
+# INCLUDES:
+#
+# Computes the INCLUDE() relation (p,A) INCLUDES (p',B).
+#
+# This relation is used to determine non-terminal transitions that occur
+# inside of other non-terminal transition states.   (p,A) INCLUDES (p', B)
+# if the following holds:
+#
+#       B -> LAT, where T -> epsilon and p' -L-> p
+#
+# L is essentially a prefix (which may be empty), T is a suffix that must be
+# able to derive an empty string.  State p' must lead to state p with the string L.
+#
+# -----------------------------------------------------------------------------
+def compute_lookback_includes(self, C, trans, nullable):
+lookdict = {}          # Dictionary of lookback relations
+includedict = {}       # Dictionary of include relations
+# Make a dictionary of non-terminal transitions
+dtrans = {}
+for t in trans:
+dtrans[t] = 1
+# Loop over all transitions and compute lookbacks and includes
+for state, N in trans:
+lookb = []
+includes = []
+for p in C[state]:
+if p.name != N:
+continue
+# Okay, we have a name match.  We now follow the production all the way
+# through the state machine until we get the . on the right hand side
+lr_index = p.lr_index
+j = state
+while lr_index < p.len - 1:
+lr_index = lr_index + 1
+t = p.prod[lr_index]
+# Check to see if this symbol and state are a non-terminal transition
+if (j, t) in dtrans:
+# Yes.  Okay, there is some chance that this is an includes relation
+# the only way to know for certain is whether the rest of the
+# production derives empty
+li = lr_index + 1
+while li < p.len:
+if p.prod[li] in self.grammar.Terminals:
+break      # No forget it
+if p.prod[li] not in nullable:
+break
+li = li + 1
+else:
+# Appears to be a relation between (j,t) and (state,N)
+includes.append((j, t))
+g = self.lr0_goto(C[j], t)               # Go to next set
+j = self.lr0_cidhash.get(id(g), -1)      # Go to next state
+# When we get here, j is the final state, now we have to locate the production
+for r in C[j]:
+if r.name != p.name:
+continue
+if r.len != p.len:
+continue
+i = 0
+# This look is comparing a production ". A B C" with "A B C ."
+while i < r.lr_index:
+if r.prod[i] != p.prod[i+1]:
+break
+i = i + 1
+else:
+lookb.append((j, r))
+for i in includes:
+if i not in includedict:
+includedict[i] = []
+includedict[i].append((state, N))
+lookdict[(state, N)] = lookb
+return lookdict, includedict
+# -----------------------------------------------------------------------------
+# compute_read_sets()
+#
+# Given a set of LR(0) items, this function computes the read sets.
+#
+# Inputs:  C        =  Set of LR(0) items
+#          ntrans   = Set of nonterminal transitions
+#          nullable = Set of empty transitions
+#
+# Returns a set containing the read sets
+# -----------------------------------------------------------------------------
+def compute_read_sets(self, C, ntrans, nullable):
+FP = lambda x: self.dr_relation(C, x, nullable)
+R =  lambda x: self.reads_relation(C, x, nullable)
+F = digraph(ntrans, R, FP)
+return F
+# -----------------------------------------------------------------------------
+# compute_follow_sets()
+#
+# Given a set of LR(0) items, a set of non-terminal transitions, a readset,
+# and an include set, this function computes the follow sets
+#
+# Follow(p,A) = Read(p,A) U U {Follow(p',B) | (p,A) INCLUDES (p',B)}
+#
+# Inputs:
+#            ntrans     = Set of nonterminal transitions
+#            readsets   = Readset (previously computed)
+#            inclsets   = Include sets (previously computed)
+#
+# Returns a set containing the follow sets
+# -----------------------------------------------------------------------------
+def compute_follow_sets(self, ntrans, readsets, inclsets):
+FP = lambda x: readsets[x]
+R  = lambda x: inclsets.get(x, [])
+F = digraph(ntrans, R, FP)
+return F
+# -----------------------------------------------------------------------------
+# add_lookaheads()
+#
+# Attaches the lookahead symbols to grammar rules.
+#
+# Inputs:    lookbacks         -  Set of lookback relations
+#            followset         -  Computed follow set
+#
+# This function directly attaches the lookaheads to productions contained
+# in the lookbacks set
+# -----------------------------------------------------------------------------
+def add_lookaheads(self, lookbacks, followset):
+for trans, lb in lookbacks.items():
+# Loop over productions in lookback
+for state, p in lb:
+if state not in p.lookaheads:
+p.lookaheads[state] = []
+f = followset.get(trans, [])
+for a in f:
+if a not in p.lookaheads[state]:
+p.lookaheads[state].append(a)
+# -----------------------------------------------------------------------------
+# add_lalr_lookaheads()
+#
+# This function does all of the work of adding lookahead information for use
+# with LALR parsing
+# -----------------------------------------------------------------------------
+def add_lalr_lookaheads(self, C):
+# Determine all of the nullable nonterminals
+nullable = self.compute_nullable_nonterminals()
+# Find all non-terminal transitions
+trans = self.find_nonterminal_transitions(C)
+# Compute read sets
+readsets = self.compute_read_sets(C, trans, nullable)
+# Compute lookback/includes relations
+lookd, included = self.compute_lookback_includes(C, trans, nullable)
+# Compute LALR FOLLOW sets
+followsets = self.compute_follow_sets(trans, readsets, included)
+# Add all of the lookaheads
+self.add_lookaheads(lookd, followsets)
+# -----------------------------------------------------------------------------
+# lr_parse_table()
+#
+# This function constructs the parse tables for SLR or LALR
+# -----------------------------------------------------------------------------
+def lr_parse_table(self):
+Productions = self.grammar.Productions
+Precedence  = self.grammar.Precedence
+goto   = self.lr_goto         # Goto array
+action = self.lr_action       # Action array
+log    = self.log             # Logger for output
+actionp = {}                  # Action production array (temporary)
+log.info('Parsing method: %s', self.lr_method)
+# Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items
+# This determines the number of states
+C = self.lr0_items()
+if self.lr_method == 'LALR':
+self.add_lalr_lookaheads(C)
+# Build the parser table, state by state
+st = 0
+for I in C:
+# Loop over each production in I
+actlist = []              # List of actions
+st_action  = {}
+st_actionp = {}
+st_goto    = {}
+log.info('')
+log.info('state %d', st)
+log.info('')
+for p in I:
+log.info('    (%d) %s', p.number, p)
+log.info('')
+for p in I:
+if p.len == p.lr_index + 1:
+if p.name == "S'":
+# Start symbol. Accept!
+st_action['$end'] = 0
+st_actionp['$end'] = p
+else:
+# We are at the end of a production.  Reduce!
+if self.lr_method == 'LALR':
+laheads = p.lookaheads[st]
+else:
+laheads = self.grammar.Follow[p.name]
+for a in laheads:
+actlist.append((a, p, 'reduce using rule %d (%s)' % (p.number, p)))
+r = st_action.get(a)
+if r is not None:
+# Whoa. Have a shift/reduce or reduce/reduce conflict
+if r > 0:
+# Need to decide on shift or reduce here
+# By default we favor shifting. Need to add
+# some precedence rules here.
+sprec, slevel = Productions[st_actionp[a].number].prec
+rprec, rlevel = Precedence.get(a, ('right', 0))
+if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')):
+# We really need to reduce here.
+st_action[a] = -p.number
+st_actionp[a] = p
+if not slevel and not rlevel:
+log.info('  ! shift/reduce conflict for %s resolved as reduce', a)
+self.sr_conflicts.append((st, a, 'reduce'))
+Productions[p.number].reduced += 1
+elif (slevel == rlevel) and (rprec == 'nonassoc'):
+st_action[a] = None
+else:
+# Hmmm. Guess we'll keep the shift
+if not rlevel:
+log.info('  ! shift/reduce conflict for %s resolved as shift', a)
+self.sr_conflicts.append((st, a, 'shift'))
+elif r < 0:
+# Reduce/reduce conflict.   In this case, we favor the rule
+# that was defined first in the grammar file
+oldp = Productions[-r]
+pp = Productions[p.number]
+if oldp.line > pp.line:
+st_action[a] = -p.number
+st_actionp[a] = p
+chosenp, rejectp = pp, oldp
+Productions[p.number].reduced += 1
+Productions[oldp.number].reduced -= 1
+else:
+chosenp, rejectp = oldp, pp
+self.rr_conflicts.append((st, chosenp, rejectp))
+log.info('  ! reduce/reduce conflict for %s resolved using rule %d (%s)',
+a, st_actionp[a].number, st_actionp[a])
+else:
+raise LALRError('Unknown conflict in state %d' % st)
+else:
+st_action[a] = -p.number
+st_actionp[a] = p
+Productions[p.number].reduced += 1
+else:
+i = p.lr_index
+a = p.prod[i+1]       # Get symbol right after the "."
+if a in self.grammar.Terminals:
+g = self.lr0_goto(I, a)
+j = self.lr0_cidhash.get(id(g), -1)
+if j >= 0:
+# We are in a shift state
+actlist.append((a, p, 'shift and go to state %d' % j))
+r = st_action.get(a)
+if r is not None:
+# Whoa have a shift/reduce or shift/shift conflict
+if r > 0:
+if r != j:
+raise LALRError('Shift/shift conflict in state %d' % st)
+elif r < 0:
+# Do a precedence check.
+#   -  if precedence of reduce rule is higher, we reduce.
+#   -  if precedence of reduce is same and left assoc, we reduce.
+#   -  otherwise we shift
+rprec, rlevel = Productions[st_actionp[a].number].prec
+sprec, slevel = Precedence.get(a, ('right', 0))
+if (slevel > rlevel) or ((slevel == rlevel) and (rprec == 'right')):
+# We decide to shift here... highest precedence to shift
+Productions[st_actionp[a].number].reduced -= 1
+st_action[a] = j
+st_actionp[a] = p
+if not rlevel:
+log.info('  ! shift/reduce conflict for %s resolved as shift', a)
+self.sr_conflicts.append((st, a, 'shift'))
+elif (slevel == rlevel) and (rprec == 'nonassoc'):
+st_action[a] = None
+else:
+# Hmmm. Guess we'll keep the reduce
+if not slevel and not rlevel:
+log.info('  ! shift/reduce conflict for %s resolved as reduce', a)
+self.sr_conflicts.append((st, a, 'reduce'))
+else:
+raise LALRError('Unknown conflict in state %d' % st)
+else:
+st_action[a] = j
+st_actionp[a] = p
+# Print the actions associated with each terminal
+_actprint = {}
+for a, p, m in actlist:
+if a in st_action:
+if p is st_actionp[a]:
+log.info('    %-15s %s', a, m)
+_actprint[(a, m)] = 1
+log.info('')
+# Print the actions that were not used. (debugging)
+not_used = 0
+for a, p, m in actlist:
+if a in st_action:
+if p is not st_actionp[a]:
+if not (a, m) in _actprint:
+log.debug('  ! %-15s [ %s ]', a, m)
+not_used = 1
+_actprint[(a, m)] = 1
+if not_used:
+log.debug('')
+# Construct the goto table for this state
+nkeys = {}
+for ii in I:
+for s in ii.usyms:
+if s in self.grammar.Nonterminals:
+nkeys[s] = None
+for n in nkeys:
+g = self.lr0_goto(I, n)
+j = self.lr0_cidhash.get(id(g), -1)
+if j >= 0:
+st_goto[n] = j
+log.info('    %-30s shift and go to state %d', n, j)
+action[st] = st_action
+actionp[st] = st_actionp
+goto[st] = st_goto
+st += 1
+# -----------------------------------------------------------------------------
+# write()
+#
+# This function writes the LR parsing tables to a file
+# -----------------------------------------------------------------------------
+def write_table(self, tabmodule, outputdir='', signature=''):
+if isinstance(tabmodule, types.ModuleType):
+raise IOError("Won't overwrite existing tabmodule")
+basemodulename = tabmodule.split('.')[-1]
+filename = os.path.join(outputdir, basemodulename) + '.py'
+try:
+f = open(filename, 'w')
+f.write('''
+# %s
+# This file is automatically generated. Do not edit.
+_tabversion = %r
+_lr_method = %r
+_lr_signature = %r
+''' % (os.path.basename(filename), __tabversion__, self.lr_method, signature))
+# Change smaller to 0 to go back to original tables
+smaller = 1
+# Factor out names to try and make smaller
+if smaller:
+items = {}
+for s, nd in self.lr_action.items():
+for name, v in nd.items():
+i = items.get(name)
+if not i:
+i = ([], [])
+items[name] = i
+i[0].append(s)
+i[1].append(v)
+f.write('\n_lr_action_items = {')
+for k, v in items.items():
+f.write('%r:([' % k)
+for i in v[0]:
+f.write('%r,' % i)
+f.write('],[')
+for i in v[1]:
+f.write('%r,' % i)
+f.write(']),')
+f.write('}\n')
+f.write('''
+_lr_action = {}
+for _k, _v in _lr_action_items.items():
+for _x,_y in zip(_v[0],_v[1]):
+if not _x in _lr_action:  _lr_action[_x] = {}
+_lr_action[_x][_k] = _y
+del _lr_action_items
+''')
+else:
+f.write('\n_lr_action = { ')
+for k, v in self.lr_action.items():
+f.write('(%r,%r):%r,' % (k[0], k[1], v))
+f.write('}\n')
+if smaller:
+# Factor out names to try and make smaller
+items = {}
+for s, nd in self.lr_goto.items():
+for name, v in nd.items():
+i = items.get(name)
+if not i:
+i = ([], [])
+items[name] = i
+i[0].append(s)
+i[1].append(v)
+f.write('\n_lr_goto_items = {')
+for k, v in items.items():
+f.write('%r:([' % k)
+for i in v[0]:
+f.write('%r,' % i)
+f.write('],[')
+for i in v[1]:
+f.write('%r,' % i)
+f.write(']),')
+f.write('}\n')
+f.write('''
+_lr_goto = {}
+for _k, _v in _lr_goto_items.items():
+for _x, _y in zip(_v[0], _v[1]):
+if not _x in _lr_goto: _lr_goto[_x] = {}
+_lr_goto[_x][_k] = _y
+del _lr_goto_items
+''')
+else:
+f.write('\n_lr_goto = { ')
+for k, v in self.lr_goto.items():
+f.write('(%r,%r):%r,' % (k[0], k[1], v))
+f.write('}\n')
+# Write production table
+f.write('_lr_productions = [\n')
+for p in self.lr_productions:
+if p.func:
+f.write('  (%r,%r,%d,%r,%r,%d),\n' % (p.str, p.name, p.len,
+p.func, os.path.basename(p.file), p.line))
+else:
+f.write('  (%r,%r,%d,None,None,None),\n' % (str(p), p.name, p.len))
+f.write(']\n')
+f.close()
+except IOError as e:
+raise
+# -----------------------------------------------------------------------------
+# pickle_table()
+#
+# This function pickles the LR parsing tables to a supplied file object
+# -----------------------------------------------------------------------------
+def pickle_table(self, filename, signature=''):
+try:
+import cPickle as pickle
+except ImportError:
+import pickle
+with open(filename, 'wb') as outf:
+pickle.dump(__tabversion__, outf, pickle_protocol)
+pickle.dump(self.lr_method, outf, pickle_protocol)
+pickle.dump(signature, outf, pickle_protocol)
+pickle.dump(self.lr_action, outf, pickle_protocol)
+pickle.dump(self.lr_goto, outf, pickle_protocol)
+outp = []
+for p in self.lr_productions:
+if p.func:
+outp.append((p.str, p.name, p.len, p.func, os.path.basename(p.file), p.line))
+else:
+outp.append((str(p), p.name, p.len, None, None, None))
+pickle.dump(outp, outf, pickle_protocol)
+# -----------------------------------------------------------------------------
+#                            === INTROSPECTION ===
+#
+# The following functions and classes are used to implement the PLY
+# introspection features followed by the yacc() function itself.
+# -----------------------------------------------------------------------------
+# -----------------------------------------------------------------------------
+# get_caller_module_dict()
+#
+# This function returns a dictionary containing all of the symbols defined within
+# a caller further down the call stack.  This is used to get the environment
+# associated with the yacc() call if none was provided.
+# -----------------------------------------------------------------------------
+def get_caller_module_dict(levels):
+f = sys._getframe(levels)
+ldict = f.f_globals.copy()
+if f.f_globals != f.f_locals:
+ldict.update(f.f_locals)
+return ldict
+# -----------------------------------------------------------------------------
+# parse_grammar()
+#
+# This takes a raw grammar rule string and parses it into production data
+# -----------------------------------------------------------------------------
+def parse_grammar(doc, file, line):
+grammar = []
+# Split the doc string into lines
+pstrings = doc.splitlines()
+lastp = None
+dline = line
+for ps in pstrings:
+dline += 1
+p = ps.split()
+if not p:
+continue
+try:
+if p[0] == '|':
+# This is a continuation of a previous rule
+if not lastp:
+raise SyntaxError("%s:%d: Misplaced '|'" % (file, dline))
+prodname = lastp
+syms = p[1:]
+else:
+prodname = p[0]
+lastp = prodname
+syms   = p[2:]
+assign = p[1]
+if assign != ':' and assign != '::=':
+raise SyntaxError("%s:%d: Syntax error. Expected ':'" % (file, dline))
+grammar.append((file, dline, prodname, syms))
+except SyntaxError:
+raise
+except Exception:
+raise SyntaxError('%s:%d: Syntax error in rule %r' % (file, dline, ps.strip()))
+return grammar
+# -----------------------------------------------------------------------------
+# ParserReflect()
+#
+# This class represents information extracted for building a parser including
+# start symbol, error function, tokens, precedence list, action functions,
+# etc.
+# -----------------------------------------------------------------------------
+class ParserReflect(object):
+def __init__(self, pdict, log=None):
+self.pdict      = pdict
+self.start      = None
+self.error_func = None
+self.tokens     = None
+self.modules    = set()
+self.grammar    = []
+self.error      = False
+if log is None:
+self.log = PlyLogger(sys.stderr)
+else:
+self.log = log
+# Get all of the basic information
+def get_all(self):
+self.get_start()
+self.get_error_func()
+self.get_tokens()
+self.get_precedence()
+self.get_pfunctions()
+# Validate all of the information
+def validate_all(self):
+self.validate_start()
+self.validate_error_func()
+self.validate_tokens()
+self.validate_precedence()
+self.validate_pfunctions()
+self.validate_modules()
+return self.error
+# Compute a signature over the grammar
+def signature(self):
+try:
+from hashlib import md5
+except ImportError:
+from md5 import md5
+try:
+sig = md5()
+if self.start:
+sig.update(self.start.encode('latin-1'))
+if self.prec:
+sig.update(''.join([''.join(p) for p in self.prec]).encode('latin-1'))
+if self.tokens:
+sig.update(' '.join(self.tokens).encode('latin-1'))
+for f in self.pfuncs:
+if f[3]:
+sig.update(f[3].encode('latin-1'))
+except (TypeError, ValueError):
+pass
+digest = base64.b16encode(sig.digest())
+if sys.version_info[0] >= 3:
+digest = digest.decode('latin-1')
+return digest
+# -----------------------------------------------------------------------------
+# validate_modules()
+#
+# This method checks to see if there are duplicated p_rulename() functions
+# in the parser module file.  Without this function, it is really easy for
+# users to make mistakes by cutting and pasting code fragments (and it's a real
+# bugger to try and figure out why the resulting parser doesn't work).  Therefore,
+# we just do a little regular expression pattern matching of def statements
+# to try and detect duplicates.
+# -----------------------------------------------------------------------------
+def validate_modules(self):
+# Match def p_funcname(
+fre = re.compile(r'\s*def\s+(p_[a-zA-Z_0-9]*)\(')
+for module in self.modules:
+lines, linen = inspect.getsourcelines(module)
+counthash = {}
+for linen, line in enumerate(lines):
+linen += 1
+m = fre.match(line)
+if m:
+name = m.group(1)
+prev = counthash.get(name)
+if not prev:
+counthash[name] = linen
+else:
+filename = inspect.getsourcefile(module)
+self.log.warning('%s:%d: Function %s redefined. Previously defined on line %d',
+filename, linen, name, prev)
+# Get the start symbol
+def get_start(self):
+self.start = self.pdict.get('start')
+# Validate the start symbol
+def validate_start(self):
+if self.start is not None:
+if not isinstance(self.start, string_types):
+self.log.error("'start' must be a string")
+# Look for error handler
+def get_error_func(self):
+self.error_func = self.pdict.get('p_error')
+# Validate the error function
+def validate_error_func(self):
+if self.error_func:
+if isinstance(self.error_func, types.FunctionType):
+ismethod = 0
+elif isinstance(self.error_func, types.MethodType):
+ismethod = 1
+else:
+self.log.error("'p_error' defined, but is not a function or method")
+self.error = True
+return
+eline = self.error_func.__code__.co_firstlineno
+efile = self.error_func.__code__.co_filename
+module = inspect.getmodule(self.error_func)
+self.modules.add(module)
+argcount = self.error_func.__code__.co_argcount - ismethod
+if argcount != 1:
+self.log.error('%s:%d: p_error() requires 1 argument', efile, eline)
+self.error = True
+# Get the tokens map
+def get_tokens(self):
+tokens = self.pdict.get('tokens')
+if not tokens:
+self.log.error('No token list is defined')
+self.error = True
+return
+if not isinstance(tokens, (list, tuple)):
+self.log.error('tokens must be a list or tuple')
+self.error = True
+return
+if not tokens:
+self.log.error('tokens is empty')
+self.error = True
+return
+self.tokens = tokens
+# Validate the tokens
+def validate_tokens(self):
+# Validate the tokens.
+if 'error' in self.tokens:
+self.log.error("Illegal token name 'error'. Is a reserved word")
+self.error = True
+return
+terminals = set()
+for n in self.tokens:
+if n in terminals:
+self.log.warning('Token %r multiply defined', n)
+terminals.add(n)
+# Get the precedence map (if any)
+def get_precedence(self):
+self.prec = self.pdict.get('precedence')
+# Validate and parse the precedence map
+def validate_precedence(self):
+preclist = []
+if self.prec:
+if not isinstance(self.prec, (list, tuple)):
+self.log.error('precedence must be a list or tuple')
+self.error = True
+return
+for level, p in enumerate(self.prec):
+if not isinstance(p, (list, tuple)):
+self.log.error('Bad precedence table')
+self.error = True
+return
+if len(p) < 2:
+self.log.error('Malformed precedence entry %s. Must be (assoc, term, ..., term)', p)
+self.error = True
+return
+assoc = p[0]
+if not isinstance(assoc, string_types):
+self.log.error('precedence associativity must be a string')
+self.error = True
+return
+for term in p[1:]:
+if not isinstance(term, string_types):
+self.log.error('precedence items must be strings')
+self.error = True
+return
+preclist.append((term, assoc, level+1))
+self.preclist = preclist
+# Get all p_functions from the grammar
+def get_pfunctions(self):
+p_functions = []
+for name, item in self.pdict.items():
+if not name.startswith('p_') or name == 'p_error':
+continue
+if isinstance(item, (types.FunctionType, types.MethodType)):
+line = item.__code__.co_firstlineno
+module = inspect.getmodule(item)
+p_functions.append((line, module, name, item.__doc__))
+# Sort all of the actions by line number; make sure to stringify
+# modules to make them sortable, since `line` may not uniquely sort all
+# p functions
+p_functions.sort(key=lambda p_function: (
+p_function[0],
+str(p_function[1]),
+p_function[2],
+p_function[3]))
+self.pfuncs = p_functions
+# Validate all of the p_functions
+def validate_pfunctions(self):
+grammar = []
+# Check for non-empty symbols
+if len(self.pfuncs) == 0:
+self.log.error('no rules of the form p_rulename are defined')
+self.error = True
+return
+for line, module, name, doc in self.pfuncs:
+file = inspect.getsourcefile(module)
+func = self.pdict[name]
+if isinstance(func, types.MethodType):
+reqargs = 2
+else:
+reqargs = 1
+if func.__code__.co_argcount > reqargs:
+self.log.error('%s:%d: Rule %r has too many arguments', file, line, func.__name__)
+self.error = True
+elif func.__code__.co_argcount < reqargs:
+self.log.error('%s:%d: Rule %r requires an argument', file, line, func.__name__)
+self.error = True
+elif not func.__doc__:
+self.log.warning('%s:%d: No documentation string specified in function %r (ignored)',
+file, line, func.__name__)
+else:
+try:
+parsed_g = parse_grammar(doc, file, line)
+for g in parsed_g:
+grammar.append((name, g))
+except SyntaxError as e:
+self.log.error(str(e))
+self.error = True
+# Looks like a valid grammar rule
+# Mark the file in which defined.
+self.modules.add(module)
+# Secondary validation step that looks for p_ definitions that are not functions
+# or functions that look like they might be grammar rules.
+for n, v in self.pdict.items():
+if n.startswith('p_') and isinstance(v, (types.FunctionType, types.MethodType)):
+continue
+if n.startswith('t_'):
+continue
+if n.startswith('p_') and n != 'p_error':
+self.log.warning('%r not defined as a function', n)
+if ((isinstance(v, types.FunctionType) and v.__code__.co_argcount == 1) or
+(isinstance(v, types.MethodType) and v.__func__.__code__.co_argcount == 2)):
+if v.__doc__:
+try:
+doc = v.__doc__.split(' ')
+if doc[1] == ':':
+self.log.warning('%s:%d: Possible grammar rule %r defined without p_ prefix',
+v.__code__.co_filename, v.__code__.co_firstlineno, n)
+except IndexError:
+pass
+self.grammar = grammar
+# -----------------------------------------------------------------------------
+# yacc(module)
+#
+# Build a parser
+# -----------------------------------------------------------------------------
+def yacc(method='LALR', debug=yaccdebug, module=None, tabmodule=tab_module, start=None,
+check_recursion=True, optimize=False, write_tables=True, debugfile=debug_file,
+outputdir=None, debuglog=None, errorlog=None, picklefile=None):
+if tabmodule is None:
+tabmodule = tab_module
+# Reference to the parsing method of the last built parser
+global parse
+# If pickling is enabled, table files are not created
+if picklefile:
+write_tables = 0
+if errorlog is None:
+errorlog = PlyLogger(sys.stderr)
+# Get the module dictionary used for the parser
+if module:
+_items = [(k, getattr(module, k)) for k in dir(module)]
+pdict = dict(_items)
+# If no __file__ attribute is available, try to obtain it from the __module__ instead
+if '__file__' not in pdict:
+pdict['__file__'] = sys.modules[pdict['__module__']].__file__
+else:
+pdict = get_caller_module_dict(2)
+if outputdir is None:
+# If no output directory is set, the location of the output files
+# is determined according to the following rules:
+#     - If tabmodule specifies a package, files go into that package directory
+#     - Otherwise, files go in the same directory as the specifying module
+if isinstance(tabmodule, types.ModuleType):
+srcfile = tabmodule.__file__
+else:
+if '.' not in tabmodule:
+srcfile = pdict['__file__']
+else:
+parts = tabmodule.split('.')
+pkgname = '.'.join(parts[:-1])
+exec('import %s' % pkgname)
+srcfile = getattr(sys.modules[pkgname], '__file__', '')
+outputdir = os.path.dirname(srcfile)
+# Determine if the module is package of a package or not.
+# If so, fix the tabmodule setting so that tables load correctly
+pkg = pdict.get('__package__')
+if pkg and isinstance(tabmodule, str):
+if '.' not in tabmodule:
+tabmodule = pkg + '.' + tabmodule
+# Set start symbol if it's specified directly using an argument
+if start is not None:
+pdict['start'] = start
+# Collect parser information from the dictionary
+pinfo = ParserReflect(pdict, log=errorlog)
+pinfo.get_all()
+if pinfo.error:
+raise YaccError('Unable to build parser')
+# Check signature against table files (if any)
+signature = pinfo.signature()
+# Read the tables
+try:
+lr = LRTable()
+if picklefile:
+read_signature = lr.read_pickle(picklefile)
+else:
+read_signature = lr.read_table(tabmodule)
+if optimize or (read_signature == signature):
+try:
+lr.bind_callables(pinfo.pdict)
+parser = LRParser(lr, pinfo.error_func)
+parse = parser.parse
+return parser
+except Exception as e:
+errorlog.warning('There was a problem loading the table file: %r', e)
+except VersionError as e:
+errorlog.warning(str(e))
+except ImportError:
+pass
+if debuglog is None:
+if debug:
+try:
+debuglog = PlyLogger(open(os.path.join(outputdir, debugfile), 'w'))
+except IOError as e:
+errorlog.warning("Couldn't open %r. %s" % (debugfile, e))
+debuglog = NullLogger()
+else:
+debuglog = NullLogger()
+debuglog.info('Created by PLY version %s (http://www.dabeaz.com/ply)', __version__)
+errors = False
+# Validate the parser information
+if pinfo.validate_all():
+raise YaccError('Unable to build parser')
+if not pinfo.error_func:
+errorlog.warning('no p_error() function is defined')
+# Create a grammar object
+grammar = Grammar(pinfo.tokens)
+# Set precedence level for terminals
+for term, assoc, level in pinfo.preclist:
+try:
+grammar.set_precedence(term, assoc, level)
+except GrammarError as e:
+errorlog.warning('%s', e)
+# Add productions to the grammar
+for funcname, gram in pinfo.grammar:
+file, line, prodname, syms = gram
+try:
+grammar.add_production(prodname, syms, funcname, file, line)
+except GrammarError as e:
+errorlog.error('%s', e)
+errors = True
+# Set the grammar start symbols
+try:
+if start is None:
+grammar.set_start(pinfo.start)
+else:
+grammar.set_start(start)
+except GrammarError as e:
+errorlog.error(str(e))
+errors = True
+if errors:
+raise YaccError('Unable to build parser')
+# Verify the grammar structure
+undefined_symbols = grammar.undefined_symbols()
+for sym, prod in undefined_symbols:
+errorlog.error('%s:%d: Symbol %r used, but not defined as a token or a rule', prod.file, prod.line, sym)
+errors = True
+unused_terminals = grammar.unused_terminals()
+if unused_terminals:
+debuglog.info('')
+debuglog.info('Unused terminals:')
+debuglog.info('')
+for term in unused_terminals:
+errorlog.warning('Token %r defined, but not used', term)
+debuglog.info('    %s', term)
+# Print out all productions to the debug log
+if debug:
+debuglog.info('')
+debuglog.info('Grammar')
+debuglog.info('')
+for n, p in enumerate(grammar.Productions):
+debuglog.info('Rule %-5d %s', n, p)
+# Find unused non-terminals
+unused_rules = grammar.unused_rules()
+for prod in unused_rules:
+errorlog.warning('%s:%d: Rule %r defined, but not used', prod.file, prod.line, prod.name)
+if len(unused_terminals) == 1:
+errorlog.warning('There is 1 unused token')
+if len(unused_terminals) > 1:
+errorlog.warning('There are %d unused tokens', len(unused_terminals))
+if len(unused_rules) == 1:
+errorlog.warning('There is 1 unused rule')
+if len(unused_rules) > 1:
+errorlog.warning('There are %d unused rules', len(unused_rules))
+if debug:
+debuglog.info('')
+debuglog.info('Terminals, with rules where they appear')
+debuglog.info('')
+terms = list(grammar.Terminals)
+terms.sort()
+for term in terms:
+debuglog.info('%-20s : %s', term, ' '.join([str(s) for s in grammar.Terminals[term]]))
+debuglog.info('')
+debuglog.info('Nonterminals, with rules where they appear')
+debuglog.info('')
+nonterms = list(grammar.Nonterminals)
+nonterms.sort()
+for nonterm in nonterms:
+debuglog.info('%-20s : %s', nonterm, ' '.join([str(s) for s in grammar.Nonterminals[nonterm]]))
+debuglog.info('')
+if check_recursion:
+unreachable = grammar.find_unreachable()
+for u in unreachable:
+errorlog.warning('Symbol %r is unreachable', u)
+infinite = grammar.infinite_cycles()
+for inf in infinite:
+errorlog.error('Infinite recursion detected for symbol %r', inf)
+errors = True
+unused_prec = grammar.unused_precedence()
+for term, assoc in unused_prec:
+errorlog.error('Precedence rule %r defined for unknown symbol %r', assoc, term)
+errors = True
+if errors:
+raise YaccError('Unable to build parser')
+# Run the LRGeneratedTable on the grammar
+if debug:
+errorlog.debug('Generating %s tables', method)
+lr = LRGeneratedTable(grammar, method, debuglog)
+if debug:
+num_sr = len(lr.sr_conflicts)
+# Report shift/reduce and reduce/reduce conflicts
+if num_sr == 1:
+errorlog.warning('1 shift/reduce conflict')
+elif num_sr > 1:
+errorlog.warning('%d shift/reduce conflicts', num_sr)
+num_rr = len(lr.rr_conflicts)
+if num_rr == 1:
+errorlog.warning('1 reduce/reduce conflict')
+elif num_rr > 1:
+errorlog.warning('%d reduce/reduce conflicts', num_rr)
+# Write out conflicts to the output file
+if debug and (lr.sr_conflicts or lr.rr_conflicts):
+debuglog.warning('')
+debuglog.warning('Conflicts:')
+debuglog.warning('')
+for state, tok, resolution in lr.sr_conflicts:
+debuglog.warning('shift/reduce conflict for %s in state %d resolved as %s',  tok, state, resolution)
+already_reported = set()
+for state, rule, rejected in lr.rr_conflicts:
+if (state, id(rule), id(rejected)) in already_reported:
+continue
+debuglog.warning('reduce/reduce conflict in state %d resolved using rule (%s)', state, rule)
+debuglog.warning('rejected rule (%s) in state %d', rejected, state)
+errorlog.warning('reduce/reduce conflict in state %d resolved using rule (%s)', state, rule)
+errorlog.warning('rejected rule (%s) in state %d', rejected, state)
+already_reported.add((state, id(rule), id(rejected)))
+warned_never = []
+for state, rule, rejected in lr.rr_conflicts:
+if not rejected.reduced and (rejected not in warned_never):
+debuglog.warning('Rule (%s) is never reduced', rejected)
+errorlog.warning('Rule (%s) is never reduced', rejected)
+warned_never.append(rejected)
+# Write the table file if requested
+if write_tables:
+try:
+lr.write_table(tabmodule, outputdir, signature)
+except IOError as e:
+errorlog.warning("Couldn't create %r. %s" % (tabmodule, e))
+# Write a pickled version of the tables
+if picklefile:
+try:
+lr.pickle_table(picklefile, signature)
+except IOError as e:
+errorlog.warning("Couldn't create %r. %s" % (picklefile, e))
+# Build the parser
+lr.bind_callables(pinfo.pdict)
+parser = LRParser(lr, pinfo.error_func)
+parse = parser.parse
+return parser

Mercurial > repo

comparison ply-3.8/ply/yacc.py @ 7267:343ff337a19b