7267
|
1 # ----------------------------------------------------------------------
|
|
2 # clex.py
|
|
3 #
|
|
4 # A lexer for ANSI C.
|
|
5 # ----------------------------------------------------------------------
|
|
6
|
|
7 import sys
|
|
8 sys.path.insert(0,"../..")
|
|
9
|
|
10 import ply.lex as lex
|
|
11
|
|
12 # Reserved words
|
|
13 reserved = (
|
|
14 'AUTO', 'BREAK', 'CASE', 'CHAR', 'CONST', 'CONTINUE', 'DEFAULT', 'DO', 'DOUBLE',
|
|
15 'ELSE', 'ENUM', 'EXTERN', 'FLOAT', 'FOR', 'GOTO', 'IF', 'INT', 'LONG', 'REGISTER',
|
|
16 'RETURN', 'SHORT', 'SIGNED', 'SIZEOF', 'STATIC', 'STRUCT', 'SWITCH', 'TYPEDEF',
|
|
17 'UNION', 'UNSIGNED', 'VOID', 'VOLATILE', 'WHILE',
|
|
18 )
|
|
19
|
|
20 tokens = reserved + (
|
|
21 # Literals (identifier, integer constant, float constant, string constant, char const)
|
|
22 'ID', 'TYPEID', 'ICONST', 'FCONST', 'SCONST', 'CCONST',
|
|
23
|
|
24 # Operators (+,-,*,/,%,|,&,~,^,<<,>>, ||, &&, !, <, <=, >, >=, ==, !=)
|
|
25 'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'MOD',
|
|
26 'OR', 'AND', 'NOT', 'XOR', 'LSHIFT', 'RSHIFT',
|
|
27 'LOR', 'LAND', 'LNOT',
|
|
28 'LT', 'LE', 'GT', 'GE', 'EQ', 'NE',
|
|
29
|
|
30 # Assignment (=, *=, /=, %=, +=, -=, <<=, >>=, &=, ^=, |=)
|
|
31 'EQUALS', 'TIMESEQUAL', 'DIVEQUAL', 'MODEQUAL', 'PLUSEQUAL', 'MINUSEQUAL',
|
|
32 'LSHIFTEQUAL','RSHIFTEQUAL', 'ANDEQUAL', 'XOREQUAL', 'OREQUAL',
|
|
33
|
|
34 # Increment/decrement (++,--)
|
|
35 'PLUSPLUS', 'MINUSMINUS',
|
|
36
|
|
37 # Structure dereference (->)
|
|
38 'ARROW',
|
|
39
|
|
40 # Conditional operator (?)
|
|
41 'CONDOP',
|
|
42
|
|
43 # Delimeters ( ) [ ] { } , . ; :
|
|
44 'LPAREN', 'RPAREN',
|
|
45 'LBRACKET', 'RBRACKET',
|
|
46 'LBRACE', 'RBRACE',
|
|
47 'COMMA', 'PERIOD', 'SEMI', 'COLON',
|
|
48
|
|
49 # Ellipsis (...)
|
|
50 'ELLIPSIS',
|
|
51 )
|
|
52
|
|
53 # Completely ignored characters
|
|
54 t_ignore = ' \t\x0c'
|
|
55
|
|
56 # Newlines
|
|
57 def t_NEWLINE(t):
|
|
58 r'\n+'
|
|
59 t.lexer.lineno += t.value.count("\n")
|
|
60
|
|
61 # Operators
|
|
62 t_PLUS = r'\+'
|
|
63 t_MINUS = r'-'
|
|
64 t_TIMES = r'\*'
|
|
65 t_DIVIDE = r'/'
|
|
66 t_MOD = r'%'
|
|
67 t_OR = r'\|'
|
|
68 t_AND = r'&'
|
|
69 t_NOT = r'~'
|
|
70 t_XOR = r'\^'
|
|
71 t_LSHIFT = r'<<'
|
|
72 t_RSHIFT = r'>>'
|
|
73 t_LOR = r'\|\|'
|
|
74 t_LAND = r'&&'
|
|
75 t_LNOT = r'!'
|
|
76 t_LT = r'<'
|
|
77 t_GT = r'>'
|
|
78 t_LE = r'<='
|
|
79 t_GE = r'>='
|
|
80 t_EQ = r'=='
|
|
81 t_NE = r'!='
|
|
82
|
|
83 # Assignment operators
|
|
84
|
|
85 t_EQUALS = r'='
|
|
86 t_TIMESEQUAL = r'\*='
|
|
87 t_DIVEQUAL = r'/='
|
|
88 t_MODEQUAL = r'%='
|
|
89 t_PLUSEQUAL = r'\+='
|
|
90 t_MINUSEQUAL = r'-='
|
|
91 t_LSHIFTEQUAL = r'<<='
|
|
92 t_RSHIFTEQUAL = r'>>='
|
|
93 t_ANDEQUAL = r'&='
|
|
94 t_OREQUAL = r'\|='
|
|
95 t_XOREQUAL = r'\^='
|
|
96
|
|
97 # Increment/decrement
|
|
98 t_PLUSPLUS = r'\+\+'
|
|
99 t_MINUSMINUS = r'--'
|
|
100
|
|
101 # ->
|
|
102 t_ARROW = r'->'
|
|
103
|
|
104 # ?
|
|
105 t_CONDOP = r'\?'
|
|
106
|
|
107 # Delimeters
|
|
108 t_LPAREN = r'\('
|
|
109 t_RPAREN = r'\)'
|
|
110 t_LBRACKET = r'\['
|
|
111 t_RBRACKET = r'\]'
|
|
112 t_LBRACE = r'\{'
|
|
113 t_RBRACE = r'\}'
|
|
114 t_COMMA = r','
|
|
115 t_PERIOD = r'\.'
|
|
116 t_SEMI = r';'
|
|
117 t_COLON = r':'
|
|
118 t_ELLIPSIS = r'\.\.\.'
|
|
119
|
|
120 # Identifiers and reserved words
|
|
121
|
|
122 reserved_map = { }
|
|
123 for r in reserved:
|
|
124 reserved_map[r.lower()] = r
|
|
125
|
|
126 def t_ID(t):
|
|
127 r'[A-Za-z_][\w_]*'
|
|
128 t.type = reserved_map.get(t.value,"ID")
|
|
129 return t
|
|
130
|
|
131 # Integer literal
|
|
132 t_ICONST = r'\d+([uU]|[lL]|[uU][lL]|[lL][uU])?'
|
|
133
|
|
134 # Floating literal
|
|
135 t_FCONST = r'((\d+)(\.\d+)(e(\+|-)?(\d+))? | (\d+)e(\+|-)?(\d+))([lL]|[fF])?'
|
|
136
|
|
137 # String literal
|
|
138 t_SCONST = r'\"([^\\\n]|(\\.))*?\"'
|
|
139
|
|
140 # Character constant 'c' or L'c'
|
|
141 t_CCONST = r'(L)?\'([^\\\n]|(\\.))*?\''
|
|
142
|
|
143 # Comments
|
|
144 def t_comment(t):
|
|
145 r'/\*(.|\n)*?\*/'
|
|
146 t.lexer.lineno += t.value.count('\n')
|
|
147
|
|
148 # Preprocessor directive (ignored)
|
|
149 def t_preprocessor(t):
|
|
150 r'\#(.)*?\n'
|
|
151 t.lexer.lineno += 1
|
|
152
|
|
153 def t_error(t):
|
|
154 print("Illegal character %s" % repr(t.value[0]))
|
|
155 t.lexer.skip(1)
|
|
156
|
|
157 lexer = lex.lex()
|
|
158 if __name__ == "__main__":
|
|
159 lex.runmain(lexer)
|
|
160
|
|
161
|
|
162
|
|
163
|
|
164
|