7267
|
1 # ----------------------------------------------------------------------
|
|
2 # ctokens.py
|
|
3 #
|
|
4 # Token specifications for symbols in ANSI C and C++. This file is
|
|
5 # meant to be used as a library in other tokenizers.
|
|
6 # ----------------------------------------------------------------------
|
|
7
|
|
8 # Reserved words
|
|
9
|
|
10 tokens = [
|
|
11 # Literals (identifier, integer constant, float constant, string constant, char const)
|
|
12 'ID', 'TYPEID', 'INTEGER', 'FLOAT', 'STRING', 'CHARACTER',
|
|
13
|
|
14 # Operators (+,-,*,/,%,|,&,~,^,<<,>>, ||, &&, !, <, <=, >, >=, ==, !=)
|
|
15 'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'MODULO',
|
|
16 'OR', 'AND', 'NOT', 'XOR', 'LSHIFT', 'RSHIFT',
|
|
17 'LOR', 'LAND', 'LNOT',
|
|
18 'LT', 'LE', 'GT', 'GE', 'EQ', 'NE',
|
|
19
|
|
20 # Assignment (=, *=, /=, %=, +=, -=, <<=, >>=, &=, ^=, |=)
|
|
21 'EQUALS', 'TIMESEQUAL', 'DIVEQUAL', 'MODEQUAL', 'PLUSEQUAL', 'MINUSEQUAL',
|
|
22 'LSHIFTEQUAL','RSHIFTEQUAL', 'ANDEQUAL', 'XOREQUAL', 'OREQUAL',
|
|
23
|
|
24 # Increment/decrement (++,--)
|
|
25 'INCREMENT', 'DECREMENT',
|
|
26
|
|
27 # Structure dereference (->)
|
|
28 'ARROW',
|
|
29
|
|
30 # Ternary operator (?)
|
|
31 'TERNARY',
|
|
32
|
|
33 # Delimeters ( ) [ ] { } , . ; :
|
|
34 'LPAREN', 'RPAREN',
|
|
35 'LBRACKET', 'RBRACKET',
|
|
36 'LBRACE', 'RBRACE',
|
|
37 'COMMA', 'PERIOD', 'SEMI', 'COLON',
|
|
38
|
|
39 # Ellipsis (...)
|
|
40 'ELLIPSIS',
|
|
41 ]
|
|
42
|
|
43 # Operators
|
|
44 t_PLUS = r'\+'
|
|
45 t_MINUS = r'-'
|
|
46 t_TIMES = r'\*'
|
|
47 t_DIVIDE = r'/'
|
|
48 t_MODULO = r'%'
|
|
49 t_OR = r'\|'
|
|
50 t_AND = r'&'
|
|
51 t_NOT = r'~'
|
|
52 t_XOR = r'\^'
|
|
53 t_LSHIFT = r'<<'
|
|
54 t_RSHIFT = r'>>'
|
|
55 t_LOR = r'\|\|'
|
|
56 t_LAND = r'&&'
|
|
57 t_LNOT = r'!'
|
|
58 t_LT = r'<'
|
|
59 t_GT = r'>'
|
|
60 t_LE = r'<='
|
|
61 t_GE = r'>='
|
|
62 t_EQ = r'=='
|
|
63 t_NE = r'!='
|
|
64
|
|
65 # Assignment operators
|
|
66
|
|
67 t_EQUALS = r'='
|
|
68 t_TIMESEQUAL = r'\*='
|
|
69 t_DIVEQUAL = r'/='
|
|
70 t_MODEQUAL = r'%='
|
|
71 t_PLUSEQUAL = r'\+='
|
|
72 t_MINUSEQUAL = r'-='
|
|
73 t_LSHIFTEQUAL = r'<<='
|
|
74 t_RSHIFTEQUAL = r'>>='
|
|
75 t_ANDEQUAL = r'&='
|
|
76 t_OREQUAL = r'\|='
|
|
77 t_XOREQUAL = r'\^='
|
|
78
|
|
79 # Increment/decrement
|
|
80 t_INCREMENT = r'\+\+'
|
|
81 t_DECREMENT = r'--'
|
|
82
|
|
83 # ->
|
|
84 t_ARROW = r'->'
|
|
85
|
|
86 # ?
|
|
87 t_TERNARY = r'\?'
|
|
88
|
|
89 # Delimeters
|
|
90 t_LPAREN = r'\('
|
|
91 t_RPAREN = r'\)'
|
|
92 t_LBRACKET = r'\['
|
|
93 t_RBRACKET = r'\]'
|
|
94 t_LBRACE = r'\{'
|
|
95 t_RBRACE = r'\}'
|
|
96 t_COMMA = r','
|
|
97 t_PERIOD = r'\.'
|
|
98 t_SEMI = r';'
|
|
99 t_COLON = r':'
|
|
100 t_ELLIPSIS = r'\.\.\.'
|
|
101
|
|
102 # Identifiers
|
|
103 t_ID = r'[A-Za-z_][A-Za-z0-9_]*'
|
|
104
|
|
105 # Integer literal
|
|
106 t_INTEGER = r'\d+([uU]|[lL]|[uU][lL]|[lL][uU])?'
|
|
107
|
|
108 # Floating literal
|
|
109 t_FLOAT = r'((\d+)(\.\d+)(e(\+|-)?(\d+))? | (\d+)e(\+|-)?(\d+))([lL]|[fF])?'
|
|
110
|
|
111 # String literal
|
|
112 t_STRING = r'\"([^\\\n]|(\\.))*?\"'
|
|
113
|
|
114 # Character constant 'c' or L'c'
|
|
115 t_CHARACTER = r'(L)?\'([^\\\n]|(\\.))*?\''
|
|
116
|
|
117 # Comment (C-Style)
|
|
118 def t_COMMENT(t):
|
|
119 r'/\*(.|\n)*?\*/'
|
|
120 t.lexer.lineno += t.value.count('\n')
|
|
121 return t
|
|
122
|
|
123 # Comment (C++-Style)
|
|
124 def t_CPPCOMMENT(t):
|
|
125 r'//.*\n'
|
|
126 t.lexer.lineno += 1
|
|
127 return t
|
|
128
|
|
129
|
|
130
|
|
131
|
|
132
|
|
133
|