7267
|
1 # parser for Unix yacc-based grammars
|
|
2 #
|
|
3 # Author: David Beazley (dave@dabeaz.com)
|
|
4 # Date : October 2, 2006
|
|
5
|
|
6 import ylex
|
|
7 tokens = ylex.tokens
|
|
8
|
|
9 from ply import *
|
|
10
|
|
11 tokenlist = []
|
|
12 preclist = []
|
|
13
|
|
14 emit_code = 1
|
|
15
|
|
16 def p_yacc(p):
|
|
17 '''yacc : defsection rulesection'''
|
|
18
|
|
19 def p_defsection(p):
|
|
20 '''defsection : definitions SECTION
|
|
21 | SECTION'''
|
|
22 p.lexer.lastsection = 1
|
|
23 print "tokens = ", repr(tokenlist)
|
|
24 print
|
|
25 print "precedence = ", repr(preclist)
|
|
26 print
|
|
27 print "# -------------- RULES ----------------"
|
|
28 print
|
|
29
|
|
30 def p_rulesection(p):
|
|
31 '''rulesection : rules SECTION'''
|
|
32
|
|
33 print "# -------------- RULES END ----------------"
|
|
34 print_code(p[2],0)
|
|
35
|
|
36 def p_definitions(p):
|
|
37 '''definitions : definitions definition
|
|
38 | definition'''
|
|
39
|
|
40 def p_definition_literal(p):
|
|
41 '''definition : LITERAL'''
|
|
42 print_code(p[1],0)
|
|
43
|
|
44 def p_definition_start(p):
|
|
45 '''definition : START ID'''
|
|
46 print "start = '%s'" % p[2]
|
|
47
|
|
48 def p_definition_token(p):
|
|
49 '''definition : toktype opttype idlist optsemi '''
|
|
50 for i in p[3]:
|
|
51 if i[0] not in "'\"":
|
|
52 tokenlist.append(i)
|
|
53 if p[1] == '%left':
|
|
54 preclist.append(('left',) + tuple(p[3]))
|
|
55 elif p[1] == '%right':
|
|
56 preclist.append(('right',) + tuple(p[3]))
|
|
57 elif p[1] == '%nonassoc':
|
|
58 preclist.append(('nonassoc',)+ tuple(p[3]))
|
|
59
|
|
60 def p_toktype(p):
|
|
61 '''toktype : TOKEN
|
|
62 | LEFT
|
|
63 | RIGHT
|
|
64 | NONASSOC'''
|
|
65 p[0] = p[1]
|
|
66
|
|
67 def p_opttype(p):
|
|
68 '''opttype : '<' ID '>'
|
|
69 | empty'''
|
|
70
|
|
71 def p_idlist(p):
|
|
72 '''idlist : idlist optcomma tokenid
|
|
73 | tokenid'''
|
|
74 if len(p) == 2:
|
|
75 p[0] = [p[1]]
|
|
76 else:
|
|
77 p[0] = p[1]
|
|
78 p[1].append(p[3])
|
|
79
|
|
80 def p_tokenid(p):
|
|
81 '''tokenid : ID
|
|
82 | ID NUMBER
|
|
83 | QLITERAL
|
|
84 | QLITERAL NUMBER'''
|
|
85 p[0] = p[1]
|
|
86
|
|
87 def p_optsemi(p):
|
|
88 '''optsemi : ';'
|
|
89 | empty'''
|
|
90
|
|
91 def p_optcomma(p):
|
|
92 '''optcomma : ','
|
|
93 | empty'''
|
|
94
|
|
95 def p_definition_type(p):
|
|
96 '''definition : TYPE '<' ID '>' namelist optsemi'''
|
|
97 # type declarations are ignored
|
|
98
|
|
99 def p_namelist(p):
|
|
100 '''namelist : namelist optcomma ID
|
|
101 | ID'''
|
|
102
|
|
103 def p_definition_union(p):
|
|
104 '''definition : UNION CODE optsemi'''
|
|
105 # Union declarations are ignored
|
|
106
|
|
107 def p_rules(p):
|
|
108 '''rules : rules rule
|
|
109 | rule'''
|
|
110 if len(p) == 2:
|
|
111 rule = p[1]
|
|
112 else:
|
|
113 rule = p[2]
|
|
114
|
|
115 # Print out a Python equivalent of this rule
|
|
116
|
|
117 embedded = [ ] # Embedded actions (a mess)
|
|
118 embed_count = 0
|
|
119
|
|
120 rulename = rule[0]
|
|
121 rulecount = 1
|
|
122 for r in rule[1]:
|
|
123 # r contains one of the rule possibilities
|
|
124 print "def p_%s_%d(p):" % (rulename,rulecount)
|
|
125 prod = []
|
|
126 prodcode = ""
|
|
127 for i in range(len(r)):
|
|
128 item = r[i]
|
|
129 if item[0] == '{': # A code block
|
|
130 if i == len(r) - 1:
|
|
131 prodcode = item
|
|
132 break
|
|
133 else:
|
|
134 # an embedded action
|
|
135 embed_name = "_embed%d_%s" % (embed_count,rulename)
|
|
136 prod.append(embed_name)
|
|
137 embedded.append((embed_name,item))
|
|
138 embed_count += 1
|
|
139 else:
|
|
140 prod.append(item)
|
|
141 print " '''%s : %s'''" % (rulename, " ".join(prod))
|
|
142 # Emit code
|
|
143 print_code(prodcode,4)
|
|
144 print
|
|
145 rulecount += 1
|
|
146
|
|
147 for e,code in embedded:
|
|
148 print "def p_%s(p):" % e
|
|
149 print " '''%s : '''" % e
|
|
150 print_code(code,4)
|
|
151 print
|
|
152
|
|
153 def p_rule(p):
|
|
154 '''rule : ID ':' rulelist ';' '''
|
|
155 p[0] = (p[1],[p[3]])
|
|
156
|
|
157 def p_rule2(p):
|
|
158 '''rule : ID ':' rulelist morerules ';' '''
|
|
159 p[4].insert(0,p[3])
|
|
160 p[0] = (p[1],p[4])
|
|
161
|
|
162 def p_rule_empty(p):
|
|
163 '''rule : ID ':' ';' '''
|
|
164 p[0] = (p[1],[[]])
|
|
165
|
|
166 def p_rule_empty2(p):
|
|
167 '''rule : ID ':' morerules ';' '''
|
|
168
|
|
169 p[3].insert(0,[])
|
|
170 p[0] = (p[1],p[3])
|
|
171
|
|
172 def p_morerules(p):
|
|
173 '''morerules : morerules '|' rulelist
|
|
174 | '|' rulelist
|
|
175 | '|' '''
|
|
176
|
|
177 if len(p) == 2:
|
|
178 p[0] = [[]]
|
|
179 elif len(p) == 3:
|
|
180 p[0] = [p[2]]
|
|
181 else:
|
|
182 p[0] = p[1]
|
|
183 p[0].append(p[3])
|
|
184
|
|
185 # print "morerules", len(p), p[0]
|
|
186
|
|
187 def p_rulelist(p):
|
|
188 '''rulelist : rulelist ruleitem
|
|
189 | ruleitem'''
|
|
190
|
|
191 if len(p) == 2:
|
|
192 p[0] = [p[1]]
|
|
193 else:
|
|
194 p[0] = p[1]
|
|
195 p[1].append(p[2])
|
|
196
|
|
197 def p_ruleitem(p):
|
|
198 '''ruleitem : ID
|
|
199 | QLITERAL
|
|
200 | CODE
|
|
201 | PREC'''
|
|
202 p[0] = p[1]
|
|
203
|
|
204 def p_empty(p):
|
|
205 '''empty : '''
|
|
206
|
|
207 def p_error(p):
|
|
208 pass
|
|
209
|
|
210 yacc.yacc(debug=0)
|
|
211
|
|
212 def print_code(code,indent):
|
|
213 if not emit_code: return
|
|
214 codelines = code.splitlines()
|
|
215 for c in codelines:
|
|
216 print "%s# %s" % (" "*indent,c)
|
|
217
|