Mercurial > repo
comparison ply-3.8/build/lib.linux-x86_64-2.7/ply/cpp.py @ 7268:a1845676eaa0
<ais523> ` (cd ply-3.8; python setup.py build)
author | HackBot |
---|---|
date | Wed, 23 Mar 2016 02:43:21 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
7267:343ff337a19b | 7268:a1845676eaa0 |
---|---|
1 # ----------------------------------------------------------------------------- | |
2 # cpp.py | |
3 # | |
4 # Author: David Beazley (http://www.dabeaz.com) | |
5 # Copyright (C) 2007 | |
6 # All rights reserved | |
7 # | |
8 # This module implements an ANSI-C style lexical preprocessor for PLY. | |
9 # ----------------------------------------------------------------------------- | |
10 from __future__ import generators | |
11 | |
12 # ----------------------------------------------------------------------------- | |
13 # Default preprocessor lexer definitions. These tokens are enough to get | |
14 # a basic preprocessor working. Other modules may import these if they want | |
15 # ----------------------------------------------------------------------------- | |
16 | |
17 tokens = ( | |
18 'CPP_ID','CPP_INTEGER', 'CPP_FLOAT', 'CPP_STRING', 'CPP_CHAR', 'CPP_WS', 'CPP_COMMENT1', 'CPP_COMMENT2', 'CPP_POUND','CPP_DPOUND' | |
19 ) | |
20 | |
21 literals = "+-*/%|&~^<>=!?()[]{}.,;:\\\'\"" | |
22 | |
23 # Whitespace | |
24 def t_CPP_WS(t): | |
25 r'\s+' | |
26 t.lexer.lineno += t.value.count("\n") | |
27 return t | |
28 | |
29 t_CPP_POUND = r'\#' | |
30 t_CPP_DPOUND = r'\#\#' | |
31 | |
32 # Identifier | |
33 t_CPP_ID = r'[A-Za-z_][\w_]*' | |
34 | |
35 # Integer literal | |
36 def CPP_INTEGER(t): | |
37 r'(((((0x)|(0X))[0-9a-fA-F]+)|(\d+))([uU][lL]|[lL][uU]|[uU]|[lL])?)' | |
38 return t | |
39 | |
40 t_CPP_INTEGER = CPP_INTEGER | |
41 | |
42 # Floating literal | |
43 t_CPP_FLOAT = r'((\d+)(\.\d+)(e(\+|-)?(\d+))? | (\d+)e(\+|-)?(\d+))([lL]|[fF])?' | |
44 | |
45 # String literal | |
46 def t_CPP_STRING(t): | |
47 r'\"([^\\\n]|(\\(.|\n)))*?\"' | |
48 t.lexer.lineno += t.value.count("\n") | |
49 return t | |
50 | |
51 # Character constant 'c' or L'c' | |
52 def t_CPP_CHAR(t): | |
53 r'(L)?\'([^\\\n]|(\\(.|\n)))*?\'' | |
54 t.lexer.lineno += t.value.count("\n") | |
55 return t | |
56 | |
57 # Comment | |
58 def t_CPP_COMMENT1(t): | |
59 r'(/\*(.|\n)*?\*/)' | |
60 ncr = t.value.count("\n") | |
61 t.lexer.lineno += ncr | |
62 # replace with one space or a number of '\n' | |
63 t.type = 'CPP_WS'; t.value = '\n' * ncr if ncr else ' ' | |
64 return t | |
65 | |
66 # Line comment | |
67 def t_CPP_COMMENT2(t): | |
68 r'(//.*?(\n|$))' | |
69 # replace with '/n' | |
70 t.type = 'CPP_WS'; t.value = '\n' | |
71 | |
72 def t_error(t): | |
73 t.type = t.value[0] | |
74 t.value = t.value[0] | |
75 t.lexer.skip(1) | |
76 return t | |
77 | |
78 import re | |
79 import copy | |
80 import time | |
81 import os.path | |
82 | |
83 # ----------------------------------------------------------------------------- | |
84 # trigraph() | |
85 # | |
86 # Given an input string, this function replaces all trigraph sequences. | |
87 # The following mapping is used: | |
88 # | |
89 # ??= # | |
90 # ??/ \ | |
91 # ??' ^ | |
92 # ??( [ | |
93 # ??) ] | |
94 # ??! | | |
95 # ??< { | |
96 # ??> } | |
97 # ??- ~ | |
98 # ----------------------------------------------------------------------------- | |
99 | |
100 _trigraph_pat = re.compile(r'''\?\?[=/\'\(\)\!<>\-]''') | |
101 _trigraph_rep = { | |
102 '=':'#', | |
103 '/':'\\', | |
104 "'":'^', | |
105 '(':'[', | |
106 ')':']', | |
107 '!':'|', | |
108 '<':'{', | |
109 '>':'}', | |
110 '-':'~' | |
111 } | |
112 | |
113 def trigraph(input): | |
114 return _trigraph_pat.sub(lambda g: _trigraph_rep[g.group()[-1]],input) | |
115 | |
116 # ------------------------------------------------------------------ | |
117 # Macro object | |
118 # | |
119 # This object holds information about preprocessor macros | |
120 # | |
121 # .name - Macro name (string) | |
122 # .value - Macro value (a list of tokens) | |
123 # .arglist - List of argument names | |
124 # .variadic - Boolean indicating whether or not variadic macro | |
125 # .vararg - Name of the variadic parameter | |
126 # | |
127 # When a macro is created, the macro replacement token sequence is | |
128 # pre-scanned and used to create patch lists that are later used | |
129 # during macro expansion | |
130 # ------------------------------------------------------------------ | |
131 | |
132 class Macro(object): | |
133 def __init__(self,name,value,arglist=None,variadic=False): | |
134 self.name = name | |
135 self.value = value | |
136 self.arglist = arglist | |
137 self.variadic = variadic | |
138 if variadic: | |
139 self.vararg = arglist[-1] | |
140 self.source = None | |
141 | |
142 # ------------------------------------------------------------------ | |
143 # Preprocessor object | |
144 # | |
145 # Object representing a preprocessor. Contains macro definitions, | |
146 # include directories, and other information | |
147 # ------------------------------------------------------------------ | |
148 | |
149 class Preprocessor(object): | |
150 def __init__(self,lexer=None): | |
151 if lexer is None: | |
152 lexer = lex.lexer | |
153 self.lexer = lexer | |
154 self.macros = { } | |
155 self.path = [] | |
156 self.temp_path = [] | |
157 | |
158 # Probe the lexer for selected tokens | |
159 self.lexprobe() | |
160 | |
161 tm = time.localtime() | |
162 self.define("__DATE__ \"%s\"" % time.strftime("%b %d %Y",tm)) | |
163 self.define("__TIME__ \"%s\"" % time.strftime("%H:%M:%S",tm)) | |
164 self.parser = None | |
165 | |
166 # ----------------------------------------------------------------------------- | |
167 # tokenize() | |
168 # | |
169 # Utility function. Given a string of text, tokenize into a list of tokens | |
170 # ----------------------------------------------------------------------------- | |
171 | |
172 def tokenize(self,text): | |
173 tokens = [] | |
174 self.lexer.input(text) | |
175 while True: | |
176 tok = self.lexer.token() | |
177 if not tok: break | |
178 tokens.append(tok) | |
179 return tokens | |
180 | |
181 # --------------------------------------------------------------------- | |
182 # error() | |
183 # | |
184 # Report a preprocessor error/warning of some kind | |
185 # ---------------------------------------------------------------------- | |
186 | |
187 def error(self,file,line,msg): | |
188 print("%s:%d %s" % (file,line,msg)) | |
189 | |
190 # ---------------------------------------------------------------------- | |
191 # lexprobe() | |
192 # | |
193 # This method probes the preprocessor lexer object to discover | |
194 # the token types of symbols that are important to the preprocessor. | |
195 # If this works right, the preprocessor will simply "work" | |
196 # with any suitable lexer regardless of how tokens have been named. | |
197 # ---------------------------------------------------------------------- | |
198 | |
199 def lexprobe(self): | |
200 | |
201 # Determine the token type for identifiers | |
202 self.lexer.input("identifier") | |
203 tok = self.lexer.token() | |
204 if not tok or tok.value != "identifier": | |
205 print("Couldn't determine identifier type") | |
206 else: | |
207 self.t_ID = tok.type | |
208 | |
209 # Determine the token type for integers | |
210 self.lexer.input("12345") | |
211 tok = self.lexer.token() | |
212 if not tok or int(tok.value) != 12345: | |
213 print("Couldn't determine integer type") | |
214 else: | |
215 self.t_INTEGER = tok.type | |
216 self.t_INTEGER_TYPE = type(tok.value) | |
217 | |
218 # Determine the token type for strings enclosed in double quotes | |
219 self.lexer.input("\"filename\"") | |
220 tok = self.lexer.token() | |
221 if not tok or tok.value != "\"filename\"": | |
222 print("Couldn't determine string type") | |
223 else: | |
224 self.t_STRING = tok.type | |
225 | |
226 # Determine the token type for whitespace--if any | |
227 self.lexer.input(" ") | |
228 tok = self.lexer.token() | |
229 if not tok or tok.value != " ": | |
230 self.t_SPACE = None | |
231 else: | |
232 self.t_SPACE = tok.type | |
233 | |
234 # Determine the token type for newlines | |
235 self.lexer.input("\n") | |
236 tok = self.lexer.token() | |
237 if not tok or tok.value != "\n": | |
238 self.t_NEWLINE = None | |
239 print("Couldn't determine token for newlines") | |
240 else: | |
241 self.t_NEWLINE = tok.type | |
242 | |
243 self.t_WS = (self.t_SPACE, self.t_NEWLINE) | |
244 | |
245 # Check for other characters used by the preprocessor | |
246 chars = [ '<','>','#','##','\\','(',')',',','.'] | |
247 for c in chars: | |
248 self.lexer.input(c) | |
249 tok = self.lexer.token() | |
250 if not tok or tok.value != c: | |
251 print("Unable to lex '%s' required for preprocessor" % c) | |
252 | |
253 # ---------------------------------------------------------------------- | |
254 # add_path() | |
255 # | |
256 # Adds a search path to the preprocessor. | |
257 # ---------------------------------------------------------------------- | |
258 | |
259 def add_path(self,path): | |
260 self.path.append(path) | |
261 | |
262 # ---------------------------------------------------------------------- | |
263 # group_lines() | |
264 # | |
265 # Given an input string, this function splits it into lines. Trailing whitespace | |
266 # is removed. Any line ending with \ is grouped with the next line. This | |
267 # function forms the lowest level of the preprocessor---grouping into text into | |
268 # a line-by-line format. | |
269 # ---------------------------------------------------------------------- | |
270 | |
271 def group_lines(self,input): | |
272 lex = self.lexer.clone() | |
273 lines = [x.rstrip() for x in input.splitlines()] | |
274 for i in xrange(len(lines)): | |
275 j = i+1 | |
276 while lines[i].endswith('\\') and (j < len(lines)): | |
277 lines[i] = lines[i][:-1]+lines[j] | |
278 lines[j] = "" | |
279 j += 1 | |
280 | |
281 input = "\n".join(lines) | |
282 lex.input(input) | |
283 lex.lineno = 1 | |
284 | |
285 current_line = [] | |
286 while True: | |
287 tok = lex.token() | |
288 if not tok: | |
289 break | |
290 current_line.append(tok) | |
291 if tok.type in self.t_WS and '\n' in tok.value: | |
292 yield current_line | |
293 current_line = [] | |
294 | |
295 if current_line: | |
296 yield current_line | |
297 | |
298 # ---------------------------------------------------------------------- | |
299 # tokenstrip() | |
300 # | |
301 # Remove leading/trailing whitespace tokens from a token list | |
302 # ---------------------------------------------------------------------- | |
303 | |
304 def tokenstrip(self,tokens): | |
305 i = 0 | |
306 while i < len(tokens) and tokens[i].type in self.t_WS: | |
307 i += 1 | |
308 del tokens[:i] | |
309 i = len(tokens)-1 | |
310 while i >= 0 and tokens[i].type in self.t_WS: | |
311 i -= 1 | |
312 del tokens[i+1:] | |
313 return tokens | |
314 | |
315 | |
316 # ---------------------------------------------------------------------- | |
317 # collect_args() | |
318 # | |
319 # Collects comma separated arguments from a list of tokens. The arguments | |
320 # must be enclosed in parenthesis. Returns a tuple (tokencount,args,positions) | |
321 # where tokencount is the number of tokens consumed, args is a list of arguments, | |
322 # and positions is a list of integers containing the starting index of each | |
323 # argument. Each argument is represented by a list of tokens. | |
324 # | |
325 # When collecting arguments, leading and trailing whitespace is removed | |
326 # from each argument. | |
327 # | |
328 # This function properly handles nested parenthesis and commas---these do not | |
329 # define new arguments. | |
330 # ---------------------------------------------------------------------- | |
331 | |
332 def collect_args(self,tokenlist): | |
333 args = [] | |
334 positions = [] | |
335 current_arg = [] | |
336 nesting = 1 | |
337 tokenlen = len(tokenlist) | |
338 | |
339 # Search for the opening '('. | |
340 i = 0 | |
341 while (i < tokenlen) and (tokenlist[i].type in self.t_WS): | |
342 i += 1 | |
343 | |
344 if (i < tokenlen) and (tokenlist[i].value == '('): | |
345 positions.append(i+1) | |
346 else: | |
347 self.error(self.source,tokenlist[0].lineno,"Missing '(' in macro arguments") | |
348 return 0, [], [] | |
349 | |
350 i += 1 | |
351 | |
352 while i < tokenlen: | |
353 t = tokenlist[i] | |
354 if t.value == '(': | |
355 current_arg.append(t) | |
356 nesting += 1 | |
357 elif t.value == ')': | |
358 nesting -= 1 | |
359 if nesting == 0: | |
360 if current_arg: | |
361 args.append(self.tokenstrip(current_arg)) | |
362 positions.append(i) | |
363 return i+1,args,positions | |
364 current_arg.append(t) | |
365 elif t.value == ',' and nesting == 1: | |
366 args.append(self.tokenstrip(current_arg)) | |
367 positions.append(i+1) | |
368 current_arg = [] | |
369 else: | |
370 current_arg.append(t) | |
371 i += 1 | |
372 | |
373 # Missing end argument | |
374 self.error(self.source,tokenlist[-1].lineno,"Missing ')' in macro arguments") | |
375 return 0, [],[] | |
376 | |
377 # ---------------------------------------------------------------------- | |
378 # macro_prescan() | |
379 # | |
380 # Examine the macro value (token sequence) and identify patch points | |
381 # This is used to speed up macro expansion later on---we'll know | |
382 # right away where to apply patches to the value to form the expansion | |
383 # ---------------------------------------------------------------------- | |
384 | |
385 def macro_prescan(self,macro): | |
386 macro.patch = [] # Standard macro arguments | |
387 macro.str_patch = [] # String conversion expansion | |
388 macro.var_comma_patch = [] # Variadic macro comma patch | |
389 i = 0 | |
390 while i < len(macro.value): | |
391 if macro.value[i].type == self.t_ID and macro.value[i].value in macro.arglist: | |
392 argnum = macro.arglist.index(macro.value[i].value) | |
393 # Conversion of argument to a string | |
394 if i > 0 and macro.value[i-1].value == '#': | |
395 macro.value[i] = copy.copy(macro.value[i]) | |
396 macro.value[i].type = self.t_STRING | |
397 del macro.value[i-1] | |
398 macro.str_patch.append((argnum,i-1)) | |
399 continue | |
400 # Concatenation | |
401 elif (i > 0 and macro.value[i-1].value == '##'): | |
402 macro.patch.append(('c',argnum,i-1)) | |
403 del macro.value[i-1] | |
404 continue | |
405 elif ((i+1) < len(macro.value) and macro.value[i+1].value == '##'): | |
406 macro.patch.append(('c',argnum,i)) | |
407 i += 1 | |
408 continue | |
409 # Standard expansion | |
410 else: | |
411 macro.patch.append(('e',argnum,i)) | |
412 elif macro.value[i].value == '##': | |
413 if macro.variadic and (i > 0) and (macro.value[i-1].value == ',') and \ | |
414 ((i+1) < len(macro.value)) and (macro.value[i+1].type == self.t_ID) and \ | |
415 (macro.value[i+1].value == macro.vararg): | |
416 macro.var_comma_patch.append(i-1) | |
417 i += 1 | |
418 macro.patch.sort(key=lambda x: x[2],reverse=True) | |
419 | |
420 # ---------------------------------------------------------------------- | |
421 # macro_expand_args() | |
422 # | |
423 # Given a Macro and list of arguments (each a token list), this method | |
424 # returns an expanded version of a macro. The return value is a token sequence | |
425 # representing the replacement macro tokens | |
426 # ---------------------------------------------------------------------- | |
427 | |
428 def macro_expand_args(self,macro,args): | |
429 # Make a copy of the macro token sequence | |
430 rep = [copy.copy(_x) for _x in macro.value] | |
431 | |
432 # Make string expansion patches. These do not alter the length of the replacement sequence | |
433 | |
434 str_expansion = {} | |
435 for argnum, i in macro.str_patch: | |
436 if argnum not in str_expansion: | |
437 str_expansion[argnum] = ('"%s"' % "".join([x.value for x in args[argnum]])).replace("\\","\\\\") | |
438 rep[i] = copy.copy(rep[i]) | |
439 rep[i].value = str_expansion[argnum] | |
440 | |
441 # Make the variadic macro comma patch. If the variadic macro argument is empty, we get rid | |
442 comma_patch = False | |
443 if macro.variadic and not args[-1]: | |
444 for i in macro.var_comma_patch: | |
445 rep[i] = None | |
446 comma_patch = True | |
447 | |
448 # Make all other patches. The order of these matters. It is assumed that the patch list | |
449 # has been sorted in reverse order of patch location since replacements will cause the | |
450 # size of the replacement sequence to expand from the patch point. | |
451 | |
452 expanded = { } | |
453 for ptype, argnum, i in macro.patch: | |
454 # Concatenation. Argument is left unexpanded | |
455 if ptype == 'c': | |
456 rep[i:i+1] = args[argnum] | |
457 # Normal expansion. Argument is macro expanded first | |
458 elif ptype == 'e': | |
459 if argnum not in expanded: | |
460 expanded[argnum] = self.expand_macros(args[argnum]) | |
461 rep[i:i+1] = expanded[argnum] | |
462 | |
463 # Get rid of removed comma if necessary | |
464 if comma_patch: | |
465 rep = [_i for _i in rep if _i] | |
466 | |
467 return rep | |
468 | |
469 | |
470 # ---------------------------------------------------------------------- | |
471 # expand_macros() | |
472 # | |
473 # Given a list of tokens, this function performs macro expansion. | |
474 # The expanded argument is a dictionary that contains macros already | |
475 # expanded. This is used to prevent infinite recursion. | |
476 # ---------------------------------------------------------------------- | |
477 | |
478 def expand_macros(self,tokens,expanded=None): | |
479 if expanded is None: | |
480 expanded = {} | |
481 i = 0 | |
482 while i < len(tokens): | |
483 t = tokens[i] | |
484 if t.type == self.t_ID: | |
485 if t.value in self.macros and t.value not in expanded: | |
486 # Yes, we found a macro match | |
487 expanded[t.value] = True | |
488 | |
489 m = self.macros[t.value] | |
490 if not m.arglist: | |
491 # A simple macro | |
492 ex = self.expand_macros([copy.copy(_x) for _x in m.value],expanded) | |
493 for e in ex: | |
494 e.lineno = t.lineno | |
495 tokens[i:i+1] = ex | |
496 i += len(ex) | |
497 else: | |
498 # A macro with arguments | |
499 j = i + 1 | |
500 while j < len(tokens) and tokens[j].type in self.t_WS: | |
501 j += 1 | |
502 if tokens[j].value == '(': | |
503 tokcount,args,positions = self.collect_args(tokens[j:]) | |
504 if not m.variadic and len(args) != len(m.arglist): | |
505 self.error(self.source,t.lineno,"Macro %s requires %d arguments" % (t.value,len(m.arglist))) | |
506 i = j + tokcount | |
507 elif m.variadic and len(args) < len(m.arglist)-1: | |
508 if len(m.arglist) > 2: | |
509 self.error(self.source,t.lineno,"Macro %s must have at least %d arguments" % (t.value, len(m.arglist)-1)) | |
510 else: | |
511 self.error(self.source,t.lineno,"Macro %s must have at least %d argument" % (t.value, len(m.arglist)-1)) | |
512 i = j + tokcount | |
513 else: | |
514 if m.variadic: | |
515 if len(args) == len(m.arglist)-1: | |
516 args.append([]) | |
517 else: | |
518 args[len(m.arglist)-1] = tokens[j+positions[len(m.arglist)-1]:j+tokcount-1] | |
519 del args[len(m.arglist):] | |
520 | |
521 # Get macro replacement text | |
522 rep = self.macro_expand_args(m,args) | |
523 rep = self.expand_macros(rep,expanded) | |
524 for r in rep: | |
525 r.lineno = t.lineno | |
526 tokens[i:j+tokcount] = rep | |
527 i += len(rep) | |
528 del expanded[t.value] | |
529 continue | |
530 elif t.value == '__LINE__': | |
531 t.type = self.t_INTEGER | |
532 t.value = self.t_INTEGER_TYPE(t.lineno) | |
533 | |
534 i += 1 | |
535 return tokens | |
536 | |
537 # ---------------------------------------------------------------------- | |
538 # evalexpr() | |
539 # | |
540 # Evaluate an expression token sequence for the purposes of evaluating | |
541 # integral expressions. | |
542 # ---------------------------------------------------------------------- | |
543 | |
544 def evalexpr(self,tokens): | |
545 # tokens = tokenize(line) | |
546 # Search for defined macros | |
547 i = 0 | |
548 while i < len(tokens): | |
549 if tokens[i].type == self.t_ID and tokens[i].value == 'defined': | |
550 j = i + 1 | |
551 needparen = False | |
552 result = "0L" | |
553 while j < len(tokens): | |
554 if tokens[j].type in self.t_WS: | |
555 j += 1 | |
556 continue | |
557 elif tokens[j].type == self.t_ID: | |
558 if tokens[j].value in self.macros: | |
559 result = "1L" | |
560 else: | |
561 result = "0L" | |
562 if not needparen: break | |
563 elif tokens[j].value == '(': | |
564 needparen = True | |
565 elif tokens[j].value == ')': | |
566 break | |
567 else: | |
568 self.error(self.source,tokens[i].lineno,"Malformed defined()") | |
569 j += 1 | |
570 tokens[i].type = self.t_INTEGER | |
571 tokens[i].value = self.t_INTEGER_TYPE(result) | |
572 del tokens[i+1:j+1] | |
573 i += 1 | |
574 tokens = self.expand_macros(tokens) | |
575 for i,t in enumerate(tokens): | |
576 if t.type == self.t_ID: | |
577 tokens[i] = copy.copy(t) | |
578 tokens[i].type = self.t_INTEGER | |
579 tokens[i].value = self.t_INTEGER_TYPE("0L") | |
580 elif t.type == self.t_INTEGER: | |
581 tokens[i] = copy.copy(t) | |
582 # Strip off any trailing suffixes | |
583 tokens[i].value = str(tokens[i].value) | |
584 while tokens[i].value[-1] not in "0123456789abcdefABCDEF": | |
585 tokens[i].value = tokens[i].value[:-1] | |
586 | |
587 expr = "".join([str(x.value) for x in tokens]) | |
588 expr = expr.replace("&&"," and ") | |
589 expr = expr.replace("||"," or ") | |
590 expr = expr.replace("!"," not ") | |
591 try: | |
592 result = eval(expr) | |
593 except StandardError: | |
594 self.error(self.source,tokens[0].lineno,"Couldn't evaluate expression") | |
595 result = 0 | |
596 return result | |
597 | |
598 # ---------------------------------------------------------------------- | |
599 # parsegen() | |
600 # | |
601 # Parse an input string/ | |
602 # ---------------------------------------------------------------------- | |
603 def parsegen(self,input,source=None): | |
604 | |
605 # Replace trigraph sequences | |
606 t = trigraph(input) | |
607 lines = self.group_lines(t) | |
608 | |
609 if not source: | |
610 source = "" | |
611 | |
612 self.define("__FILE__ \"%s\"" % source) | |
613 | |
614 self.source = source | |
615 chunk = [] | |
616 enable = True | |
617 iftrigger = False | |
618 ifstack = [] | |
619 | |
620 for x in lines: | |
621 for i,tok in enumerate(x): | |
622 if tok.type not in self.t_WS: break | |
623 if tok.value == '#': | |
624 # Preprocessor directive | |
625 | |
626 # insert necessary whitespace instead of eaten tokens | |
627 for tok in x: | |
628 if tok.type in self.t_WS and '\n' in tok.value: | |
629 chunk.append(tok) | |
630 | |
631 dirtokens = self.tokenstrip(x[i+1:]) | |
632 if dirtokens: | |
633 name = dirtokens[0].value | |
634 args = self.tokenstrip(dirtokens[1:]) | |
635 else: | |
636 name = "" | |
637 args = [] | |
638 | |
639 if name == 'define': | |
640 if enable: | |
641 for tok in self.expand_macros(chunk): | |
642 yield tok | |
643 chunk = [] | |
644 self.define(args) | |
645 elif name == 'include': | |
646 if enable: | |
647 for tok in self.expand_macros(chunk): | |
648 yield tok | |
649 chunk = [] | |
650 oldfile = self.macros['__FILE__'] | |
651 for tok in self.include(args): | |
652 yield tok | |
653 self.macros['__FILE__'] = oldfile | |
654 self.source = source | |
655 elif name == 'undef': | |
656 if enable: | |
657 for tok in self.expand_macros(chunk): | |
658 yield tok | |
659 chunk = [] | |
660 self.undef(args) | |
661 elif name == 'ifdef': | |
662 ifstack.append((enable,iftrigger)) | |
663 if enable: | |
664 if not args[0].value in self.macros: | |
665 enable = False | |
666 iftrigger = False | |
667 else: | |
668 iftrigger = True | |
669 elif name == 'ifndef': | |
670 ifstack.append((enable,iftrigger)) | |
671 if enable: | |
672 if args[0].value in self.macros: | |
673 enable = False | |
674 iftrigger = False | |
675 else: | |
676 iftrigger = True | |
677 elif name == 'if': | |
678 ifstack.append((enable,iftrigger)) | |
679 if enable: | |
680 result = self.evalexpr(args) | |
681 if not result: | |
682 enable = False | |
683 iftrigger = False | |
684 else: | |
685 iftrigger = True | |
686 elif name == 'elif': | |
687 if ifstack: | |
688 if ifstack[-1][0]: # We only pay attention if outer "if" allows this | |
689 if enable: # If already true, we flip enable False | |
690 enable = False | |
691 elif not iftrigger: # If False, but not triggered yet, we'll check expression | |
692 result = self.evalexpr(args) | |
693 if result: | |
694 enable = True | |
695 iftrigger = True | |
696 else: | |
697 self.error(self.source,dirtokens[0].lineno,"Misplaced #elif") | |
698 | |
699 elif name == 'else': | |
700 if ifstack: | |
701 if ifstack[-1][0]: | |
702 if enable: | |
703 enable = False | |
704 elif not iftrigger: | |
705 enable = True | |
706 iftrigger = True | |
707 else: | |
708 self.error(self.source,dirtokens[0].lineno,"Misplaced #else") | |
709 | |
710 elif name == 'endif': | |
711 if ifstack: | |
712 enable,iftrigger = ifstack.pop() | |
713 else: | |
714 self.error(self.source,dirtokens[0].lineno,"Misplaced #endif") | |
715 else: | |
716 # Unknown preprocessor directive | |
717 pass | |
718 | |
719 else: | |
720 # Normal text | |
721 if enable: | |
722 chunk.extend(x) | |
723 | |
724 for tok in self.expand_macros(chunk): | |
725 yield tok | |
726 chunk = [] | |
727 | |
728 # ---------------------------------------------------------------------- | |
729 # include() | |
730 # | |
731 # Implementation of file-inclusion | |
732 # ---------------------------------------------------------------------- | |
733 | |
734 def include(self,tokens): | |
735 # Try to extract the filename and then process an include file | |
736 if not tokens: | |
737 return | |
738 if tokens: | |
739 if tokens[0].value != '<' and tokens[0].type != self.t_STRING: | |
740 tokens = self.expand_macros(tokens) | |
741 | |
742 if tokens[0].value == '<': | |
743 # Include <...> | |
744 i = 1 | |
745 while i < len(tokens): | |
746 if tokens[i].value == '>': | |
747 break | |
748 i += 1 | |
749 else: | |
750 print("Malformed #include <...>") | |
751 return | |
752 filename = "".join([x.value for x in tokens[1:i]]) | |
753 path = self.path + [""] + self.temp_path | |
754 elif tokens[0].type == self.t_STRING: | |
755 filename = tokens[0].value[1:-1] | |
756 path = self.temp_path + [""] + self.path | |
757 else: | |
758 print("Malformed #include statement") | |
759 return | |
760 for p in path: | |
761 iname = os.path.join(p,filename) | |
762 try: | |
763 data = open(iname,"r").read() | |
764 dname = os.path.dirname(iname) | |
765 if dname: | |
766 self.temp_path.insert(0,dname) | |
767 for tok in self.parsegen(data,filename): | |
768 yield tok | |
769 if dname: | |
770 del self.temp_path[0] | |
771 break | |
772 except IOError: | |
773 pass | |
774 else: | |
775 print("Couldn't find '%s'" % filename) | |
776 | |
777 # ---------------------------------------------------------------------- | |
778 # define() | |
779 # | |
780 # Define a new macro | |
781 # ---------------------------------------------------------------------- | |
782 | |
783 def define(self,tokens): | |
784 if isinstance(tokens,(str,unicode)): | |
785 tokens = self.tokenize(tokens) | |
786 | |
787 linetok = tokens | |
788 try: | |
789 name = linetok[0] | |
790 if len(linetok) > 1: | |
791 mtype = linetok[1] | |
792 else: | |
793 mtype = None | |
794 if not mtype: | |
795 m = Macro(name.value,[]) | |
796 self.macros[name.value] = m | |
797 elif mtype.type in self.t_WS: | |
798 # A normal macro | |
799 m = Macro(name.value,self.tokenstrip(linetok[2:])) | |
800 self.macros[name.value] = m | |
801 elif mtype.value == '(': | |
802 # A macro with arguments | |
803 tokcount, args, positions = self.collect_args(linetok[1:]) | |
804 variadic = False | |
805 for a in args: | |
806 if variadic: | |
807 print("No more arguments may follow a variadic argument") | |
808 break | |
809 astr = "".join([str(_i.value) for _i in a]) | |
810 if astr == "...": | |
811 variadic = True | |
812 a[0].type = self.t_ID | |
813 a[0].value = '__VA_ARGS__' | |
814 variadic = True | |
815 del a[1:] | |
816 continue | |
817 elif astr[-3:] == "..." and a[0].type == self.t_ID: | |
818 variadic = True | |
819 del a[1:] | |
820 # If, for some reason, "." is part of the identifier, strip off the name for the purposes | |
821 # of macro expansion | |
822 if a[0].value[-3:] == '...': | |
823 a[0].value = a[0].value[:-3] | |
824 continue | |
825 if len(a) > 1 or a[0].type != self.t_ID: | |
826 print("Invalid macro argument") | |
827 break | |
828 else: | |
829 mvalue = self.tokenstrip(linetok[1+tokcount:]) | |
830 i = 0 | |
831 while i < len(mvalue): | |
832 if i+1 < len(mvalue): | |
833 if mvalue[i].type in self.t_WS and mvalue[i+1].value == '##': | |
834 del mvalue[i] | |
835 continue | |
836 elif mvalue[i].value == '##' and mvalue[i+1].type in self.t_WS: | |
837 del mvalue[i+1] | |
838 i += 1 | |
839 m = Macro(name.value,mvalue,[x[0].value for x in args],variadic) | |
840 self.macro_prescan(m) | |
841 self.macros[name.value] = m | |
842 else: | |
843 print("Bad macro definition") | |
844 except LookupError: | |
845 print("Bad macro definition") | |
846 | |
847 # ---------------------------------------------------------------------- | |
848 # undef() | |
849 # | |
850 # Undefine a macro | |
851 # ---------------------------------------------------------------------- | |
852 | |
853 def undef(self,tokens): | |
854 id = tokens[0].value | |
855 try: | |
856 del self.macros[id] | |
857 except LookupError: | |
858 pass | |
859 | |
860 # ---------------------------------------------------------------------- | |
861 # parse() | |
862 # | |
863 # Parse input text. | |
864 # ---------------------------------------------------------------------- | |
865 def parse(self,input,source=None,ignore={}): | |
866 self.ignore = ignore | |
867 self.parser = self.parsegen(input,source) | |
868 | |
869 # ---------------------------------------------------------------------- | |
870 # token() | |
871 # | |
872 # Method to return individual tokens | |
873 # ---------------------------------------------------------------------- | |
874 def token(self): | |
875 try: | |
876 while True: | |
877 tok = next(self.parser) | |
878 if tok.type not in self.ignore: return tok | |
879 except StopIteration: | |
880 self.parser = None | |
881 return None | |
882 | |
883 if __name__ == '__main__': | |
884 import ply.lex as lex | |
885 lexer = lex.lex() | |
886 | |
887 # Run a preprocessor | |
888 import sys | |
889 f = open(sys.argv[1]) | |
890 input = f.read() | |
891 | |
892 p = Preprocessor(lexer) | |
893 p.parse(input,sys.argv[1]) | |
894 while True: | |
895 tok = p.token() | |
896 if not tok: break | |
897 print(p.source, tok) | |
898 | |
899 | |
900 | |
901 | |
902 | |
903 | |
904 | |
905 | |
906 | |
907 | |
908 |