7267
|
1 # -----------------------------------------------------------------------------
|
|
2 # cpp.py
|
|
3 #
|
|
4 # Author: David Beazley (http://www.dabeaz.com)
|
|
5 # Copyright (C) 2007
|
|
6 # All rights reserved
|
|
7 #
|
|
8 # This module implements an ANSI-C style lexical preprocessor for PLY.
|
|
9 # -----------------------------------------------------------------------------
|
|
10 from __future__ import generators
|
|
11
|
|
12 # -----------------------------------------------------------------------------
|
|
13 # Default preprocessor lexer definitions. These tokens are enough to get
|
|
14 # a basic preprocessor working. Other modules may import these if they want
|
|
15 # -----------------------------------------------------------------------------
|
|
16
|
|
17 tokens = (
|
|
18 'CPP_ID','CPP_INTEGER', 'CPP_FLOAT', 'CPP_STRING', 'CPP_CHAR', 'CPP_WS', 'CPP_COMMENT1', 'CPP_COMMENT2', 'CPP_POUND','CPP_DPOUND'
|
|
19 )
|
|
20
|
|
21 literals = "+-*/%|&~^<>=!?()[]{}.,;:\\\'\""
|
|
22
|
|
23 # Whitespace
|
|
24 def t_CPP_WS(t):
|
|
25 r'\s+'
|
|
26 t.lexer.lineno += t.value.count("\n")
|
|
27 return t
|
|
28
|
|
29 t_CPP_POUND = r'\#'
|
|
30 t_CPP_DPOUND = r'\#\#'
|
|
31
|
|
32 # Identifier
|
|
33 t_CPP_ID = r'[A-Za-z_][\w_]*'
|
|
34
|
|
35 # Integer literal
|
|
36 def CPP_INTEGER(t):
|
|
37 r'(((((0x)|(0X))[0-9a-fA-F]+)|(\d+))([uU][lL]|[lL][uU]|[uU]|[lL])?)'
|
|
38 return t
|
|
39
|
|
40 t_CPP_INTEGER = CPP_INTEGER
|
|
41
|
|
42 # Floating literal
|
|
43 t_CPP_FLOAT = r'((\d+)(\.\d+)(e(\+|-)?(\d+))? | (\d+)e(\+|-)?(\d+))([lL]|[fF])?'
|
|
44
|
|
45 # String literal
|
|
46 def t_CPP_STRING(t):
|
|
47 r'\"([^\\\n]|(\\(.|\n)))*?\"'
|
|
48 t.lexer.lineno += t.value.count("\n")
|
|
49 return t
|
|
50
|
|
51 # Character constant 'c' or L'c'
|
|
52 def t_CPP_CHAR(t):
|
|
53 r'(L)?\'([^\\\n]|(\\(.|\n)))*?\''
|
|
54 t.lexer.lineno += t.value.count("\n")
|
|
55 return t
|
|
56
|
|
57 # Comment
|
|
58 def t_CPP_COMMENT1(t):
|
|
59 r'(/\*(.|\n)*?\*/)'
|
|
60 ncr = t.value.count("\n")
|
|
61 t.lexer.lineno += ncr
|
|
62 # replace with one space or a number of '\n'
|
|
63 t.type = 'CPP_WS'; t.value = '\n' * ncr if ncr else ' '
|
|
64 return t
|
|
65
|
|
66 # Line comment
|
|
67 def t_CPP_COMMENT2(t):
|
|
68 r'(//.*?(\n|$))'
|
|
69 # replace with '/n'
|
|
70 t.type = 'CPP_WS'; t.value = '\n'
|
|
71
|
|
72 def t_error(t):
|
|
73 t.type = t.value[0]
|
|
74 t.value = t.value[0]
|
|
75 t.lexer.skip(1)
|
|
76 return t
|
|
77
|
|
78 import re
|
|
79 import copy
|
|
80 import time
|
|
81 import os.path
|
|
82
|
|
83 # -----------------------------------------------------------------------------
|
|
84 # trigraph()
|
|
85 #
|
|
86 # Given an input string, this function replaces all trigraph sequences.
|
|
87 # The following mapping is used:
|
|
88 #
|
|
89 # ??= #
|
|
90 # ??/ \
|
|
91 # ??' ^
|
|
92 # ??( [
|
|
93 # ??) ]
|
|
94 # ??! |
|
|
95 # ??< {
|
|
96 # ??> }
|
|
97 # ??- ~
|
|
98 # -----------------------------------------------------------------------------
|
|
99
|
|
100 _trigraph_pat = re.compile(r'''\?\?[=/\'\(\)\!<>\-]''')
|
|
101 _trigraph_rep = {
|
|
102 '=':'#',
|
|
103 '/':'\\',
|
|
104 "'":'^',
|
|
105 '(':'[',
|
|
106 ')':']',
|
|
107 '!':'|',
|
|
108 '<':'{',
|
|
109 '>':'}',
|
|
110 '-':'~'
|
|
111 }
|
|
112
|
|
113 def trigraph(input):
|
|
114 return _trigraph_pat.sub(lambda g: _trigraph_rep[g.group()[-1]],input)
|
|
115
|
|
116 # ------------------------------------------------------------------
|
|
117 # Macro object
|
|
118 #
|
|
119 # This object holds information about preprocessor macros
|
|
120 #
|
|
121 # .name - Macro name (string)
|
|
122 # .value - Macro value (a list of tokens)
|
|
123 # .arglist - List of argument names
|
|
124 # .variadic - Boolean indicating whether or not variadic macro
|
|
125 # .vararg - Name of the variadic parameter
|
|
126 #
|
|
127 # When a macro is created, the macro replacement token sequence is
|
|
128 # pre-scanned and used to create patch lists that are later used
|
|
129 # during macro expansion
|
|
130 # ------------------------------------------------------------------
|
|
131
|
|
132 class Macro(object):
|
|
133 def __init__(self,name,value,arglist=None,variadic=False):
|
|
134 self.name = name
|
|
135 self.value = value
|
|
136 self.arglist = arglist
|
|
137 self.variadic = variadic
|
|
138 if variadic:
|
|
139 self.vararg = arglist[-1]
|
|
140 self.source = None
|
|
141
|
|
142 # ------------------------------------------------------------------
|
|
143 # Preprocessor object
|
|
144 #
|
|
145 # Object representing a preprocessor. Contains macro definitions,
|
|
146 # include directories, and other information
|
|
147 # ------------------------------------------------------------------
|
|
148
|
|
149 class Preprocessor(object):
|
|
150 def __init__(self,lexer=None):
|
|
151 if lexer is None:
|
|
152 lexer = lex.lexer
|
|
153 self.lexer = lexer
|
|
154 self.macros = { }
|
|
155 self.path = []
|
|
156 self.temp_path = []
|
|
157
|
|
158 # Probe the lexer for selected tokens
|
|
159 self.lexprobe()
|
|
160
|
|
161 tm = time.localtime()
|
|
162 self.define("__DATE__ \"%s\"" % time.strftime("%b %d %Y",tm))
|
|
163 self.define("__TIME__ \"%s\"" % time.strftime("%H:%M:%S",tm))
|
|
164 self.parser = None
|
|
165
|
|
166 # -----------------------------------------------------------------------------
|
|
167 # tokenize()
|
|
168 #
|
|
169 # Utility function. Given a string of text, tokenize into a list of tokens
|
|
170 # -----------------------------------------------------------------------------
|
|
171
|
|
172 def tokenize(self,text):
|
|
173 tokens = []
|
|
174 self.lexer.input(text)
|
|
175 while True:
|
|
176 tok = self.lexer.token()
|
|
177 if not tok: break
|
|
178 tokens.append(tok)
|
|
179 return tokens
|
|
180
|
|
181 # ---------------------------------------------------------------------
|
|
182 # error()
|
|
183 #
|
|
184 # Report a preprocessor error/warning of some kind
|
|
185 # ----------------------------------------------------------------------
|
|
186
|
|
187 def error(self,file,line,msg):
|
|
188 print("%s:%d %s" % (file,line,msg))
|
|
189
|
|
190 # ----------------------------------------------------------------------
|
|
191 # lexprobe()
|
|
192 #
|
|
193 # This method probes the preprocessor lexer object to discover
|
|
194 # the token types of symbols that are important to the preprocessor.
|
|
195 # If this works right, the preprocessor will simply "work"
|
|
196 # with any suitable lexer regardless of how tokens have been named.
|
|
197 # ----------------------------------------------------------------------
|
|
198
|
|
199 def lexprobe(self):
|
|
200
|
|
201 # Determine the token type for identifiers
|
|
202 self.lexer.input("identifier")
|
|
203 tok = self.lexer.token()
|
|
204 if not tok or tok.value != "identifier":
|
|
205 print("Couldn't determine identifier type")
|
|
206 else:
|
|
207 self.t_ID = tok.type
|
|
208
|
|
209 # Determine the token type for integers
|
|
210 self.lexer.input("12345")
|
|
211 tok = self.lexer.token()
|
|
212 if not tok or int(tok.value) != 12345:
|
|
213 print("Couldn't determine integer type")
|
|
214 else:
|
|
215 self.t_INTEGER = tok.type
|
|
216 self.t_INTEGER_TYPE = type(tok.value)
|
|
217
|
|
218 # Determine the token type for strings enclosed in double quotes
|
|
219 self.lexer.input("\"filename\"")
|
|
220 tok = self.lexer.token()
|
|
221 if not tok or tok.value != "\"filename\"":
|
|
222 print("Couldn't determine string type")
|
|
223 else:
|
|
224 self.t_STRING = tok.type
|
|
225
|
|
226 # Determine the token type for whitespace--if any
|
|
227 self.lexer.input(" ")
|
|
228 tok = self.lexer.token()
|
|
229 if not tok or tok.value != " ":
|
|
230 self.t_SPACE = None
|
|
231 else:
|
|
232 self.t_SPACE = tok.type
|
|
233
|
|
234 # Determine the token type for newlines
|
|
235 self.lexer.input("\n")
|
|
236 tok = self.lexer.token()
|
|
237 if not tok or tok.value != "\n":
|
|
238 self.t_NEWLINE = None
|
|
239 print("Couldn't determine token for newlines")
|
|
240 else:
|
|
241 self.t_NEWLINE = tok.type
|
|
242
|
|
243 self.t_WS = (self.t_SPACE, self.t_NEWLINE)
|
|
244
|
|
245 # Check for other characters used by the preprocessor
|
|
246 chars = [ '<','>','#','##','\\','(',')',',','.']
|
|
247 for c in chars:
|
|
248 self.lexer.input(c)
|
|
249 tok = self.lexer.token()
|
|
250 if not tok or tok.value != c:
|
|
251 print("Unable to lex '%s' required for preprocessor" % c)
|
|
252
|
|
253 # ----------------------------------------------------------------------
|
|
254 # add_path()
|
|
255 #
|
|
256 # Adds a search path to the preprocessor.
|
|
257 # ----------------------------------------------------------------------
|
|
258
|
|
259 def add_path(self,path):
|
|
260 self.path.append(path)
|
|
261
|
|
262 # ----------------------------------------------------------------------
|
|
263 # group_lines()
|
|
264 #
|
|
265 # Given an input string, this function splits it into lines. Trailing whitespace
|
|
266 # is removed. Any line ending with \ is grouped with the next line. This
|
|
267 # function forms the lowest level of the preprocessor---grouping into text into
|
|
268 # a line-by-line format.
|
|
269 # ----------------------------------------------------------------------
|
|
270
|
|
271 def group_lines(self,input):
|
|
272 lex = self.lexer.clone()
|
|
273 lines = [x.rstrip() for x in input.splitlines()]
|
|
274 for i in xrange(len(lines)):
|
|
275 j = i+1
|
|
276 while lines[i].endswith('\\') and (j < len(lines)):
|
|
277 lines[i] = lines[i][:-1]+lines[j]
|
|
278 lines[j] = ""
|
|
279 j += 1
|
|
280
|
|
281 input = "\n".join(lines)
|
|
282 lex.input(input)
|
|
283 lex.lineno = 1
|
|
284
|
|
285 current_line = []
|
|
286 while True:
|
|
287 tok = lex.token()
|
|
288 if not tok:
|
|
289 break
|
|
290 current_line.append(tok)
|
|
291 if tok.type in self.t_WS and '\n' in tok.value:
|
|
292 yield current_line
|
|
293 current_line = []
|
|
294
|
|
295 if current_line:
|
|
296 yield current_line
|
|
297
|
|
298 # ----------------------------------------------------------------------
|
|
299 # tokenstrip()
|
|
300 #
|
|
301 # Remove leading/trailing whitespace tokens from a token list
|
|
302 # ----------------------------------------------------------------------
|
|
303
|
|
304 def tokenstrip(self,tokens):
|
|
305 i = 0
|
|
306 while i < len(tokens) and tokens[i].type in self.t_WS:
|
|
307 i += 1
|
|
308 del tokens[:i]
|
|
309 i = len(tokens)-1
|
|
310 while i >= 0 and tokens[i].type in self.t_WS:
|
|
311 i -= 1
|
|
312 del tokens[i+1:]
|
|
313 return tokens
|
|
314
|
|
315
|
|
316 # ----------------------------------------------------------------------
|
|
317 # collect_args()
|
|
318 #
|
|
319 # Collects comma separated arguments from a list of tokens. The arguments
|
|
320 # must be enclosed in parenthesis. Returns a tuple (tokencount,args,positions)
|
|
321 # where tokencount is the number of tokens consumed, args is a list of arguments,
|
|
322 # and positions is a list of integers containing the starting index of each
|
|
323 # argument. Each argument is represented by a list of tokens.
|
|
324 #
|
|
325 # When collecting arguments, leading and trailing whitespace is removed
|
|
326 # from each argument.
|
|
327 #
|
|
328 # This function properly handles nested parenthesis and commas---these do not
|
|
329 # define new arguments.
|
|
330 # ----------------------------------------------------------------------
|
|
331
|
|
332 def collect_args(self,tokenlist):
|
|
333 args = []
|
|
334 positions = []
|
|
335 current_arg = []
|
|
336 nesting = 1
|
|
337 tokenlen = len(tokenlist)
|
|
338
|
|
339 # Search for the opening '('.
|
|
340 i = 0
|
|
341 while (i < tokenlen) and (tokenlist[i].type in self.t_WS):
|
|
342 i += 1
|
|
343
|
|
344 if (i < tokenlen) and (tokenlist[i].value == '('):
|
|
345 positions.append(i+1)
|
|
346 else:
|
|
347 self.error(self.source,tokenlist[0].lineno,"Missing '(' in macro arguments")
|
|
348 return 0, [], []
|
|
349
|
|
350 i += 1
|
|
351
|
|
352 while i < tokenlen:
|
|
353 t = tokenlist[i]
|
|
354 if t.value == '(':
|
|
355 current_arg.append(t)
|
|
356 nesting += 1
|
|
357 elif t.value == ')':
|
|
358 nesting -= 1
|
|
359 if nesting == 0:
|
|
360 if current_arg:
|
|
361 args.append(self.tokenstrip(current_arg))
|
|
362 positions.append(i)
|
|
363 return i+1,args,positions
|
|
364 current_arg.append(t)
|
|
365 elif t.value == ',' and nesting == 1:
|
|
366 args.append(self.tokenstrip(current_arg))
|
|
367 positions.append(i+1)
|
|
368 current_arg = []
|
|
369 else:
|
|
370 current_arg.append(t)
|
|
371 i += 1
|
|
372
|
|
373 # Missing end argument
|
|
374 self.error(self.source,tokenlist[-1].lineno,"Missing ')' in macro arguments")
|
|
375 return 0, [],[]
|
|
376
|
|
377 # ----------------------------------------------------------------------
|
|
378 # macro_prescan()
|
|
379 #
|
|
380 # Examine the macro value (token sequence) and identify patch points
|
|
381 # This is used to speed up macro expansion later on---we'll know
|
|
382 # right away where to apply patches to the value to form the expansion
|
|
383 # ----------------------------------------------------------------------
|
|
384
|
|
385 def macro_prescan(self,macro):
|
|
386 macro.patch = [] # Standard macro arguments
|
|
387 macro.str_patch = [] # String conversion expansion
|
|
388 macro.var_comma_patch = [] # Variadic macro comma patch
|
|
389 i = 0
|
|
390 while i < len(macro.value):
|
|
391 if macro.value[i].type == self.t_ID and macro.value[i].value in macro.arglist:
|
|
392 argnum = macro.arglist.index(macro.value[i].value)
|
|
393 # Conversion of argument to a string
|
|
394 if i > 0 and macro.value[i-1].value == '#':
|
|
395 macro.value[i] = copy.copy(macro.value[i])
|
|
396 macro.value[i].type = self.t_STRING
|
|
397 del macro.value[i-1]
|
|
398 macro.str_patch.append((argnum,i-1))
|
|
399 continue
|
|
400 # Concatenation
|
|
401 elif (i > 0 and macro.value[i-1].value == '##'):
|
|
402 macro.patch.append(('c',argnum,i-1))
|
|
403 del macro.value[i-1]
|
|
404 continue
|
|
405 elif ((i+1) < len(macro.value) and macro.value[i+1].value == '##'):
|
|
406 macro.patch.append(('c',argnum,i))
|
|
407 i += 1
|
|
408 continue
|
|
409 # Standard expansion
|
|
410 else:
|
|
411 macro.patch.append(('e',argnum,i))
|
|
412 elif macro.value[i].value == '##':
|
|
413 if macro.variadic and (i > 0) and (macro.value[i-1].value == ',') and \
|
|
414 ((i+1) < len(macro.value)) and (macro.value[i+1].type == self.t_ID) and \
|
|
415 (macro.value[i+1].value == macro.vararg):
|
|
416 macro.var_comma_patch.append(i-1)
|
|
417 i += 1
|
|
418 macro.patch.sort(key=lambda x: x[2],reverse=True)
|
|
419
|
|
420 # ----------------------------------------------------------------------
|
|
421 # macro_expand_args()
|
|
422 #
|
|
423 # Given a Macro and list of arguments (each a token list), this method
|
|
424 # returns an expanded version of a macro. The return value is a token sequence
|
|
425 # representing the replacement macro tokens
|
|
426 # ----------------------------------------------------------------------
|
|
427
|
|
428 def macro_expand_args(self,macro,args):
|
|
429 # Make a copy of the macro token sequence
|
|
430 rep = [copy.copy(_x) for _x in macro.value]
|
|
431
|
|
432 # Make string expansion patches. These do not alter the length of the replacement sequence
|
|
433
|
|
434 str_expansion = {}
|
|
435 for argnum, i in macro.str_patch:
|
|
436 if argnum not in str_expansion:
|
|
437 str_expansion[argnum] = ('"%s"' % "".join([x.value for x in args[argnum]])).replace("\\","\\\\")
|
|
438 rep[i] = copy.copy(rep[i])
|
|
439 rep[i].value = str_expansion[argnum]
|
|
440
|
|
441 # Make the variadic macro comma patch. If the variadic macro argument is empty, we get rid
|
|
442 comma_patch = False
|
|
443 if macro.variadic and not args[-1]:
|
|
444 for i in macro.var_comma_patch:
|
|
445 rep[i] = None
|
|
446 comma_patch = True
|
|
447
|
|
448 # Make all other patches. The order of these matters. It is assumed that the patch list
|
|
449 # has been sorted in reverse order of patch location since replacements will cause the
|
|
450 # size of the replacement sequence to expand from the patch point.
|
|
451
|
|
452 expanded = { }
|
|
453 for ptype, argnum, i in macro.patch:
|
|
454 # Concatenation. Argument is left unexpanded
|
|
455 if ptype == 'c':
|
|
456 rep[i:i+1] = args[argnum]
|
|
457 # Normal expansion. Argument is macro expanded first
|
|
458 elif ptype == 'e':
|
|
459 if argnum not in expanded:
|
|
460 expanded[argnum] = self.expand_macros(args[argnum])
|
|
461 rep[i:i+1] = expanded[argnum]
|
|
462
|
|
463 # Get rid of removed comma if necessary
|
|
464 if comma_patch:
|
|
465 rep = [_i for _i in rep if _i]
|
|
466
|
|
467 return rep
|
|
468
|
|
469
|
|
470 # ----------------------------------------------------------------------
|
|
471 # expand_macros()
|
|
472 #
|
|
473 # Given a list of tokens, this function performs macro expansion.
|
|
474 # The expanded argument is a dictionary that contains macros already
|
|
475 # expanded. This is used to prevent infinite recursion.
|
|
476 # ----------------------------------------------------------------------
|
|
477
|
|
478 def expand_macros(self,tokens,expanded=None):
|
|
479 if expanded is None:
|
|
480 expanded = {}
|
|
481 i = 0
|
|
482 while i < len(tokens):
|
|
483 t = tokens[i]
|
|
484 if t.type == self.t_ID:
|
|
485 if t.value in self.macros and t.value not in expanded:
|
|
486 # Yes, we found a macro match
|
|
487 expanded[t.value] = True
|
|
488
|
|
489 m = self.macros[t.value]
|
|
490 if not m.arglist:
|
|
491 # A simple macro
|
|
492 ex = self.expand_macros([copy.copy(_x) for _x in m.value],expanded)
|
|
493 for e in ex:
|
|
494 e.lineno = t.lineno
|
|
495 tokens[i:i+1] = ex
|
|
496 i += len(ex)
|
|
497 else:
|
|
498 # A macro with arguments
|
|
499 j = i + 1
|
|
500 while j < len(tokens) and tokens[j].type in self.t_WS:
|
|
501 j += 1
|
|
502 if tokens[j].value == '(':
|
|
503 tokcount,args,positions = self.collect_args(tokens[j:])
|
|
504 if not m.variadic and len(args) != len(m.arglist):
|
|
505 self.error(self.source,t.lineno,"Macro %s requires %d arguments" % (t.value,len(m.arglist)))
|
|
506 i = j + tokcount
|
|
507 elif m.variadic and len(args) < len(m.arglist)-1:
|
|
508 if len(m.arglist) > 2:
|
|
509 self.error(self.source,t.lineno,"Macro %s must have at least %d arguments" % (t.value, len(m.arglist)-1))
|
|
510 else:
|
|
511 self.error(self.source,t.lineno,"Macro %s must have at least %d argument" % (t.value, len(m.arglist)-1))
|
|
512 i = j + tokcount
|
|
513 else:
|
|
514 if m.variadic:
|
|
515 if len(args) == len(m.arglist)-1:
|
|
516 args.append([])
|
|
517 else:
|
|
518 args[len(m.arglist)-1] = tokens[j+positions[len(m.arglist)-1]:j+tokcount-1]
|
|
519 del args[len(m.arglist):]
|
|
520
|
|
521 # Get macro replacement text
|
|
522 rep = self.macro_expand_args(m,args)
|
|
523 rep = self.expand_macros(rep,expanded)
|
|
524 for r in rep:
|
|
525 r.lineno = t.lineno
|
|
526 tokens[i:j+tokcount] = rep
|
|
527 i += len(rep)
|
|
528 del expanded[t.value]
|
|
529 continue
|
|
530 elif t.value == '__LINE__':
|
|
531 t.type = self.t_INTEGER
|
|
532 t.value = self.t_INTEGER_TYPE(t.lineno)
|
|
533
|
|
534 i += 1
|
|
535 return tokens
|
|
536
|
|
537 # ----------------------------------------------------------------------
|
|
538 # evalexpr()
|
|
539 #
|
|
540 # Evaluate an expression token sequence for the purposes of evaluating
|
|
541 # integral expressions.
|
|
542 # ----------------------------------------------------------------------
|
|
543
|
|
544 def evalexpr(self,tokens):
|
|
545 # tokens = tokenize(line)
|
|
546 # Search for defined macros
|
|
547 i = 0
|
|
548 while i < len(tokens):
|
|
549 if tokens[i].type == self.t_ID and tokens[i].value == 'defined':
|
|
550 j = i + 1
|
|
551 needparen = False
|
|
552 result = "0L"
|
|
553 while j < len(tokens):
|
|
554 if tokens[j].type in self.t_WS:
|
|
555 j += 1
|
|
556 continue
|
|
557 elif tokens[j].type == self.t_ID:
|
|
558 if tokens[j].value in self.macros:
|
|
559 result = "1L"
|
|
560 else:
|
|
561 result = "0L"
|
|
562 if not needparen: break
|
|
563 elif tokens[j].value == '(':
|
|
564 needparen = True
|
|
565 elif tokens[j].value == ')':
|
|
566 break
|
|
567 else:
|
|
568 self.error(self.source,tokens[i].lineno,"Malformed defined()")
|
|
569 j += 1
|
|
570 tokens[i].type = self.t_INTEGER
|
|
571 tokens[i].value = self.t_INTEGER_TYPE(result)
|
|
572 del tokens[i+1:j+1]
|
|
573 i += 1
|
|
574 tokens = self.expand_macros(tokens)
|
|
575 for i,t in enumerate(tokens):
|
|
576 if t.type == self.t_ID:
|
|
577 tokens[i] = copy.copy(t)
|
|
578 tokens[i].type = self.t_INTEGER
|
|
579 tokens[i].value = self.t_INTEGER_TYPE("0L")
|
|
580 elif t.type == self.t_INTEGER:
|
|
581 tokens[i] = copy.copy(t)
|
|
582 # Strip off any trailing suffixes
|
|
583 tokens[i].value = str(tokens[i].value)
|
|
584 while tokens[i].value[-1] not in "0123456789abcdefABCDEF":
|
|
585 tokens[i].value = tokens[i].value[:-1]
|
|
586
|
|
587 expr = "".join([str(x.value) for x in tokens])
|
|
588 expr = expr.replace("&&"," and ")
|
|
589 expr = expr.replace("||"," or ")
|
|
590 expr = expr.replace("!"," not ")
|
|
591 try:
|
|
592 result = eval(expr)
|
|
593 except StandardError:
|
|
594 self.error(self.source,tokens[0].lineno,"Couldn't evaluate expression")
|
|
595 result = 0
|
|
596 return result
|
|
597
|
|
598 # ----------------------------------------------------------------------
|
|
599 # parsegen()
|
|
600 #
|
|
601 # Parse an input string/
|
|
602 # ----------------------------------------------------------------------
|
|
603 def parsegen(self,input,source=None):
|
|
604
|
|
605 # Replace trigraph sequences
|
|
606 t = trigraph(input)
|
|
607 lines = self.group_lines(t)
|
|
608
|
|
609 if not source:
|
|
610 source = ""
|
|
611
|
|
612 self.define("__FILE__ \"%s\"" % source)
|
|
613
|
|
614 self.source = source
|
|
615 chunk = []
|
|
616 enable = True
|
|
617 iftrigger = False
|
|
618 ifstack = []
|
|
619
|
|
620 for x in lines:
|
|
621 for i,tok in enumerate(x):
|
|
622 if tok.type not in self.t_WS: break
|
|
623 if tok.value == '#':
|
|
624 # Preprocessor directive
|
|
625
|
|
626 # insert necessary whitespace instead of eaten tokens
|
|
627 for tok in x:
|
|
628 if tok.type in self.t_WS and '\n' in tok.value:
|
|
629 chunk.append(tok)
|
|
630
|
|
631 dirtokens = self.tokenstrip(x[i+1:])
|
|
632 if dirtokens:
|
|
633 name = dirtokens[0].value
|
|
634 args = self.tokenstrip(dirtokens[1:])
|
|
635 else:
|
|
636 name = ""
|
|
637 args = []
|
|
638
|
|
639 if name == 'define':
|
|
640 if enable:
|
|
641 for tok in self.expand_macros(chunk):
|
|
642 yield tok
|
|
643 chunk = []
|
|
644 self.define(args)
|
|
645 elif name == 'include':
|
|
646 if enable:
|
|
647 for tok in self.expand_macros(chunk):
|
|
648 yield tok
|
|
649 chunk = []
|
|
650 oldfile = self.macros['__FILE__']
|
|
651 for tok in self.include(args):
|
|
652 yield tok
|
|
653 self.macros['__FILE__'] = oldfile
|
|
654 self.source = source
|
|
655 elif name == 'undef':
|
|
656 if enable:
|
|
657 for tok in self.expand_macros(chunk):
|
|
658 yield tok
|
|
659 chunk = []
|
|
660 self.undef(args)
|
|
661 elif name == 'ifdef':
|
|
662 ifstack.append((enable,iftrigger))
|
|
663 if enable:
|
|
664 if not args[0].value in self.macros:
|
|
665 enable = False
|
|
666 iftrigger = False
|
|
667 else:
|
|
668 iftrigger = True
|
|
669 elif name == 'ifndef':
|
|
670 ifstack.append((enable,iftrigger))
|
|
671 if enable:
|
|
672 if args[0].value in self.macros:
|
|
673 enable = False
|
|
674 iftrigger = False
|
|
675 else:
|
|
676 iftrigger = True
|
|
677 elif name == 'if':
|
|
678 ifstack.append((enable,iftrigger))
|
|
679 if enable:
|
|
680 result = self.evalexpr(args)
|
|
681 if not result:
|
|
682 enable = False
|
|
683 iftrigger = False
|
|
684 else:
|
|
685 iftrigger = True
|
|
686 elif name == 'elif':
|
|
687 if ifstack:
|
|
688 if ifstack[-1][0]: # We only pay attention if outer "if" allows this
|
|
689 if enable: # If already true, we flip enable False
|
|
690 enable = False
|
|
691 elif not iftrigger: # If False, but not triggered yet, we'll check expression
|
|
692 result = self.evalexpr(args)
|
|
693 if result:
|
|
694 enable = True
|
|
695 iftrigger = True
|
|
696 else:
|
|
697 self.error(self.source,dirtokens[0].lineno,"Misplaced #elif")
|
|
698
|
|
699 elif name == 'else':
|
|
700 if ifstack:
|
|
701 if ifstack[-1][0]:
|
|
702 if enable:
|
|
703 enable = False
|
|
704 elif not iftrigger:
|
|
705 enable = True
|
|
706 iftrigger = True
|
|
707 else:
|
|
708 self.error(self.source,dirtokens[0].lineno,"Misplaced #else")
|
|
709
|
|
710 elif name == 'endif':
|
|
711 if ifstack:
|
|
712 enable,iftrigger = ifstack.pop()
|
|
713 else:
|
|
714 self.error(self.source,dirtokens[0].lineno,"Misplaced #endif")
|
|
715 else:
|
|
716 # Unknown preprocessor directive
|
|
717 pass
|
|
718
|
|
719 else:
|
|
720 # Normal text
|
|
721 if enable:
|
|
722 chunk.extend(x)
|
|
723
|
|
724 for tok in self.expand_macros(chunk):
|
|
725 yield tok
|
|
726 chunk = []
|
|
727
|
|
728 # ----------------------------------------------------------------------
|
|
729 # include()
|
|
730 #
|
|
731 # Implementation of file-inclusion
|
|
732 # ----------------------------------------------------------------------
|
|
733
|
|
734 def include(self,tokens):
|
|
735 # Try to extract the filename and then process an include file
|
|
736 if not tokens:
|
|
737 return
|
|
738 if tokens:
|
|
739 if tokens[0].value != '<' and tokens[0].type != self.t_STRING:
|
|
740 tokens = self.expand_macros(tokens)
|
|
741
|
|
742 if tokens[0].value == '<':
|
|
743 # Include <...>
|
|
744 i = 1
|
|
745 while i < len(tokens):
|
|
746 if tokens[i].value == '>':
|
|
747 break
|
|
748 i += 1
|
|
749 else:
|
|
750 print("Malformed #include <...>")
|
|
751 return
|
|
752 filename = "".join([x.value for x in tokens[1:i]])
|
|
753 path = self.path + [""] + self.temp_path
|
|
754 elif tokens[0].type == self.t_STRING:
|
|
755 filename = tokens[0].value[1:-1]
|
|
756 path = self.temp_path + [""] + self.path
|
|
757 else:
|
|
758 print("Malformed #include statement")
|
|
759 return
|
|
760 for p in path:
|
|
761 iname = os.path.join(p,filename)
|
|
762 try:
|
|
763 data = open(iname,"r").read()
|
|
764 dname = os.path.dirname(iname)
|
|
765 if dname:
|
|
766 self.temp_path.insert(0,dname)
|
|
767 for tok in self.parsegen(data,filename):
|
|
768 yield tok
|
|
769 if dname:
|
|
770 del self.temp_path[0]
|
|
771 break
|
|
772 except IOError:
|
|
773 pass
|
|
774 else:
|
|
775 print("Couldn't find '%s'" % filename)
|
|
776
|
|
777 # ----------------------------------------------------------------------
|
|
778 # define()
|
|
779 #
|
|
780 # Define a new macro
|
|
781 # ----------------------------------------------------------------------
|
|
782
|
|
783 def define(self,tokens):
|
|
784 if isinstance(tokens,(str,unicode)):
|
|
785 tokens = self.tokenize(tokens)
|
|
786
|
|
787 linetok = tokens
|
|
788 try:
|
|
789 name = linetok[0]
|
|
790 if len(linetok) > 1:
|
|
791 mtype = linetok[1]
|
|
792 else:
|
|
793 mtype = None
|
|
794 if not mtype:
|
|
795 m = Macro(name.value,[])
|
|
796 self.macros[name.value] = m
|
|
797 elif mtype.type in self.t_WS:
|
|
798 # A normal macro
|
|
799 m = Macro(name.value,self.tokenstrip(linetok[2:]))
|
|
800 self.macros[name.value] = m
|
|
801 elif mtype.value == '(':
|
|
802 # A macro with arguments
|
|
803 tokcount, args, positions = self.collect_args(linetok[1:])
|
|
804 variadic = False
|
|
805 for a in args:
|
|
806 if variadic:
|
|
807 print("No more arguments may follow a variadic argument")
|
|
808 break
|
|
809 astr = "".join([str(_i.value) for _i in a])
|
|
810 if astr == "...":
|
|
811 variadic = True
|
|
812 a[0].type = self.t_ID
|
|
813 a[0].value = '__VA_ARGS__'
|
|
814 variadic = True
|
|
815 del a[1:]
|
|
816 continue
|
|
817 elif astr[-3:] == "..." and a[0].type == self.t_ID:
|
|
818 variadic = True
|
|
819 del a[1:]
|
|
820 # If, for some reason, "." is part of the identifier, strip off the name for the purposes
|
|
821 # of macro expansion
|
|
822 if a[0].value[-3:] == '...':
|
|
823 a[0].value = a[0].value[:-3]
|
|
824 continue
|
|
825 if len(a) > 1 or a[0].type != self.t_ID:
|
|
826 print("Invalid macro argument")
|
|
827 break
|
|
828 else:
|
|
829 mvalue = self.tokenstrip(linetok[1+tokcount:])
|
|
830 i = 0
|
|
831 while i < len(mvalue):
|
|
832 if i+1 < len(mvalue):
|
|
833 if mvalue[i].type in self.t_WS and mvalue[i+1].value == '##':
|
|
834 del mvalue[i]
|
|
835 continue
|
|
836 elif mvalue[i].value == '##' and mvalue[i+1].type in self.t_WS:
|
|
837 del mvalue[i+1]
|
|
838 i += 1
|
|
839 m = Macro(name.value,mvalue,[x[0].value for x in args],variadic)
|
|
840 self.macro_prescan(m)
|
|
841 self.macros[name.value] = m
|
|
842 else:
|
|
843 print("Bad macro definition")
|
|
844 except LookupError:
|
|
845 print("Bad macro definition")
|
|
846
|
|
847 # ----------------------------------------------------------------------
|
|
848 # undef()
|
|
849 #
|
|
850 # Undefine a macro
|
|
851 # ----------------------------------------------------------------------
|
|
852
|
|
853 def undef(self,tokens):
|
|
854 id = tokens[0].value
|
|
855 try:
|
|
856 del self.macros[id]
|
|
857 except LookupError:
|
|
858 pass
|
|
859
|
|
860 # ----------------------------------------------------------------------
|
|
861 # parse()
|
|
862 #
|
|
863 # Parse input text.
|
|
864 # ----------------------------------------------------------------------
|
|
865 def parse(self,input,source=None,ignore={}):
|
|
866 self.ignore = ignore
|
|
867 self.parser = self.parsegen(input,source)
|
|
868
|
|
869 # ----------------------------------------------------------------------
|
|
870 # token()
|
|
871 #
|
|
872 # Method to return individual tokens
|
|
873 # ----------------------------------------------------------------------
|
|
874 def token(self):
|
|
875 try:
|
|
876 while True:
|
|
877 tok = next(self.parser)
|
|
878 if tok.type not in self.ignore: return tok
|
|
879 except StopIteration:
|
|
880 self.parser = None
|
|
881 return None
|
|
882
|
|
883 if __name__ == '__main__':
|
|
884 import ply.lex as lex
|
|
885 lexer = lex.lex()
|
|
886
|
|
887 # Run a preprocessor
|
|
888 import sys
|
|
889 f = open(sys.argv[1])
|
|
890 input = f.read()
|
|
891
|
|
892 p = Preprocessor(lexer)
|
|
893 p.parse(input,sys.argv[1])
|
|
894 while True:
|
|
895 tok = p.token()
|
|
896 if not tok: break
|
|
897 print(p.source, tok)
|
|
898
|
|
899
|
|
900
|
|
901
|
|
902
|
|
903
|
|
904
|
|
905
|
|
906
|
|
907
|
|
908
|