Mercurial > repo
comparison interps/brachylog/brachylog/Brachylog-master/src/tokenize.pl @ 11868:70dedbc831e9 draft
<ais523> ` mv ibin/brachylog interps/brachylog
author | HackEso <hackeso@esolangs.org> |
---|---|
date | Tue, 16 Jul 2019 21:39:11 +0000 |
parents | ibin/brachylog/Brachylog-master/src/tokenize.pl@318de151d0ec |
children |
comparison
equal
deleted
inserted
replaced
11867:b0414b6b332f | 11868:70dedbc831e9 |
---|---|
1 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | |
2 ____ ____ | |
3 \ \ / / | |
4 \ \ ____ / / | |
5 \ \/ \/ / | |
6 \ /\ / BRACHYLOG | |
7 \ / \ / A terse declarative logic programming language | |
8 / \ / \ | |
9 / \/ \ Written by Julien Cumin - 2017 | |
10 / /\____/\ \ https://github.com/JCumin/Brachylog | |
11 / / ___ \ \ | |
12 /___/ /__/ \___\ | |
13 | |
14 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ | |
15 | |
16 | |
17 :- module(tokenize, [tokenize/2]). | |
18 | |
19 :- use_module(symbols). | |
20 | |
21 | |
22 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | |
23 TOKENIZE | |
24 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ | |
25 tokenize([], []). | |
26 tokenize([' '|T], T2) :- | |
27 tokenize(T, T2). | |
28 tokenize([Variable|T], ['variable':VariableName:'sup':Sup|T2]) :- | |
29 is_variable_character(Variable), | |
30 tokenize_variable([Variable|T], Rest, VariableName), | |
31 tokenize_superscript(Rest, Rest2, Sup), | |
32 tokenize(Rest2, T2). | |
33 tokenize([Variable|T], ['variable':R|T2]) :- | |
34 ( is_variable_character_dot_above(Variable) | |
35 -> token_variable(Variable, RealVariable), | |
36 tokenize_superscript(T, Rest, Sup), | |
37 R = RealVariable:'sup':Sup | |
38 ; is_variable_character_dot_below(Variable) | |
39 -> token_variable(Variable, R), | |
40 Rest = T | |
41 ), | |
42 tokenize(Rest, T2). | |
43 tokenize([Variable|T], ['variable':RealVariable|T2]) :- | |
44 is_math_constant_character(Variable), | |
45 token_variable(Variable, RealVariable), | |
46 tokenize(T, T2). | |
47 tokenize([H|T], ['variable':'Input':'sup':Sup|T2]) :- | |
48 is_input_character(H), | |
49 tokenize_superscript(T, Rest, Sup), | |
50 tokenize(Rest, T2). | |
51 tokenize([H|T], ['variable':'Output':'sup':Sup|T2]) :- | |
52 is_output_character(H), | |
53 tokenize_superscript(T, Rest, Sup), | |
54 tokenize(Rest, T2). | |
55 tokenize([Modifier,Predicate|T], ['predicate':PredName:Sub|T2]) :- | |
56 is_modifier_character(Modifier), | |
57 \+ (is_variable_character(Predicate)), | |
58 atomic_list_concat([Modifier,Predicate], Pred), | |
59 token_predicate(Pred, PredName), | |
60 tokenize_subscript(T, Rest, Sub), | |
61 tokenize(Rest, T2). | |
62 tokenize([Predicate|T], ['predicate':PredName:Sub|T2]) :- | |
63 is_predicate_character(Predicate), | |
64 token_predicate(Predicate, PredName), | |
65 tokenize_subscript(T, Rest, Sub), | |
66 tokenize(Rest, T2). | |
67 tokenize([MetaPred|T], ['metapredicate':PredName:Sup|T2]) :- | |
68 is_metapredicate_character(MetaPred), | |
69 token_metapredicate(MetaPred, PredName), | |
70 tokenize_superscript(T, Rest, Sup), | |
71 tokenize(Rest, T2). | |
72 tokenize(['"'|T], ['variable':Variable|T2]) :- | |
73 tokenize_string(['"'|T], Rest, Variable), | |
74 tokenize(Rest, T2). | |
75 tokenize(['_',Digit|T], ['variable':Type:N|T2]) :- | |
76 is_digit_character(Digit), | |
77 tokenize_number([Digit|T] ,Rest, Type:X), | |
78 N is -X, | |
79 tokenize(Rest, T2). | |
80 tokenize(['_','_'|T], T2) :- | |
81 tokenize(T, T2). | |
82 tokenize([Digit|T], ['variable':Type:X|T2]) :- | |
83 is_digit_character(Digit), | |
84 tokenize_number([Digit|T], Rest, Type:X), | |
85 tokenize(Rest, T2). | |
86 tokenize(['['|T], ['variable':List|T2]) :- | |
87 tokenize_list(['['|T], Rest, List), | |
88 tokenize(Rest, T2). | |
89 tokenize([Modifier,Variable|T], ['variable':RealVariable|T2]) :- | |
90 is_modifier_character(Modifier), | |
91 is_variable_character(Variable), | |
92 token_variable(Modifier:Variable, RealVariable), | |
93 tokenize(T, T2). | |
94 tokenize([ControlFlow|T], ['control':ControlFlow|T2]) :- | |
95 is_control_character(ControlFlow), | |
96 tokenize(T, T2). | |
97 | |
98 | |
99 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | |
100 TOKENIZE_VARIABLE | |
101 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ | |
102 tokenize_variable([], [], ''). | |
103 tokenize_variable([H|T], R, Name) :- | |
104 ( is_variable_character(H) -> | |
105 tokenize_variable(T, R, TName), | |
106 atomic_list_concat([H, TName], Name) | |
107 ; Name = '', | |
108 R = [H|T] | |
109 ). | |
110 | |
111 | |
112 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | |
113 TOKENIZE_STRING | |
114 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ | |
115 tokenize_string(['"'|T], Rest, 'string':T2) :- | |
116 tokenize_string_(T, Rest, T2). | |
117 | |
118 tokenize_string_([], [], []). | |
119 tokenize_string_([X,'"'|Rest], Rest, [X]) :- | |
120 X \= '\\', | |
121 X \= '"', | |
122 Rest \= ['"'|_], | |
123 !. | |
124 tokenize_string_(['\\','"'|T], Rest, ['"'|T2]) :- | |
125 tokenize_string_(T, Rest, T2). | |
126 tokenize_string_(['"','"'|T], Rest, ['"'|T2]) :- | |
127 tokenize_string_(T, Rest, T2). | |
128 tokenize_string_([X|T], Rest, L) :- | |
129 ( X \= '"' -> | |
130 L = [X|T2], | |
131 tokenize_string_(T, Rest, T2) | |
132 ; Rest = T, | |
133 L = [] | |
134 ). | |
135 | |
136 | |
137 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | |
138 TOKENIZE_NUMBER | |
139 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ | |
140 tokenize_number(N, Rest, Type:Number) :- | |
141 tokenize_number_(N, Rest, T2), | |
142 ( member('.', T2), | |
143 !, | |
144 Type = 'float' | |
145 ; Type = 'integer' | |
146 ), | |
147 atomic_list_concat(T2, A), | |
148 atom_number(A, Number). | |
149 | |
150 tokenize_number_([], [], []). | |
151 tokenize_number_(['.',I|T], Rest, ['.',J|T2]) :- | |
152 is_digit_character(I), | |
153 atom_number(I, J), | |
154 tokenize_integer(T, Rest, T2). | |
155 tokenize_number_(['.'], ['.'], []). | |
156 tokenize_number_(['.',X|T], ['.',X|T], []) :- | |
157 \+ (is_digit_character(X)). | |
158 tokenize_number_([X|T], [X|T], []) :- | |
159 \+ (is_digit_character(X)), | |
160 X \= '.'. | |
161 tokenize_number_([I|T], Rest, [J|T2]) :- | |
162 is_digit_character(I), | |
163 atom_number(I, J), | |
164 tokenize_number_(T, Rest, T2). | |
165 | |
166 tokenize_integer([], [], []). | |
167 tokenize_integer([I|T], Rest, [J|T2]) :- | |
168 is_digit_character(I), | |
169 atom_number(I, J), | |
170 tokenize_integer(T, Rest, T2). | |
171 tokenize_integer([X|T], [X|T], []) :- | |
172 \+ (is_digit_character(X)). | |
173 | |
174 | |
175 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | |
176 TOKENIZE_LIST | |
177 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ | |
178 tokenize_list(['['|T], Rest, List) :- | |
179 isolate_list(T, L, Rest), | |
180 tokenize(L, List). | |
181 | |
182 isolate_list(T, List, Rest) :- | |
183 isolate_list(T, 1, [], L, Rest), | |
184 reverse(L, List). | |
185 isolate_list([], _, L, L, []). | |
186 isolate_list([']'|T], 1, L, L, T). | |
187 isolate_list([']'|T], X, L, M, Rest) :- | |
188 X > 1, | |
189 Y is X - 1, | |
190 isolate_list(T, Y, [']'|L], M, Rest). | |
191 isolate_list(['['|T], X, L, M, Rest) :- | |
192 Y is X + 1, | |
193 isolate_list(T, Y, ['['|L], M, Rest). | |
194 isolate_list([H|T], X, L, M, Rest) :- | |
195 H \= '[', | |
196 H \= ']', | |
197 isolate_list(T, X, [H|L], M, Rest). | |
198 | |
199 | |
200 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | |
201 TOKENIZE_SUBSCRIPT | |
202 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ | |
203 tokenize_subscript(L, Rest, Sub) :- | |
204 tokenize_subscript_(L, Rest, LSub), | |
205 ( LSub = 'first' -> | |
206 Sub = LSub | |
207 ; LSub = 'last' -> | |
208 Sub = 'last' | |
209 ; LSub = [] -> | |
210 Sub = 'default' | |
211 ; maplist(number_codes, LSub, LC), | |
212 append(LC, C), | |
213 number_codes(ISub, C), | |
214 term_to_atom('integer':ISub, Sub) | |
215 ). | |
216 | |
217 tokenize_subscript_([], [], []). | |
218 tokenize_subscript_([H|T], Rest, Ds) :- | |
219 ( is_subscript_character(H, D) -> | |
220 tokenize_subscript_(T, Rest, TDs), | |
221 Ds = [D|TDs] | |
222 ; is_subscript_parenthesis(H, D) -> | |
223 Rest = T, | |
224 Ds = D | |
225 ; Rest = [H|T], | |
226 Ds = [] | |
227 ). | |
228 | |
229 | |
230 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | |
231 TOKENIZE_SUPERSCRIPT | |
232 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ | |
233 tokenize_superscript(L, Rest, Sup) :- | |
234 tokenize_superscript_(L, Rest, LSup), | |
235 ( LSup = 'first' -> | |
236 Sup = LSup | |
237 ; LSup = 'last' -> | |
238 Sup = 'last' | |
239 ; LSup = [] -> | |
240 Sup = 'default' | |
241 ; maplist(number_codes, LSup, LC), | |
242 append(LC, C), | |
243 number_codes(ISup, C), | |
244 term_to_atom('integer':ISup, Sup) | |
245 ). | |
246 | |
247 tokenize_superscript_([], [], []). | |
248 tokenize_superscript_([H|T], Rest, Ds) :- | |
249 ( is_superscript_character(H, D) -> | |
250 tokenize_superscript_(T, Rest, TDs), | |
251 Ds = [D|TDs] | |
252 ; is_superscript_parenthesis(H, D) -> | |
253 Rest = T, | |
254 Ds = D | |
255 ; Rest = [H|T], | |
256 Ds = [] | |
257 ). | |
258 | |
259 | |
260 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | |
261 IS_X_CHARACTER | |
262 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ | |
263 is_variable_character(X) :- | |
264 member(X, ['A', 'B', 'C', 'D', 'E', | |
265 'F', 'G', 'H', 'I', 'J', | |
266 'K', 'L', 'M', 'N', 'O', | |
267 'P', 'Q', 'R', 'S', 'T', | |
268 'U', 'V', 'W', 'X', 'Y', 'Z']). | |
269 | |
270 is_variable_character_dot_below(X) :- | |
271 member(X, ['Ạ', 'Ḅ', 'Ḍ', 'Ẹ', | |
272 'Ḥ', 'Ị', 'Ḳ', 'Ḷ', | |
273 'Ṃ', 'Ṇ', 'Ọ', 'Ṛ', | |
274 'Ṣ', 'Ṭ', 'Ụ', 'Ṿ', | |
275 'Ẉ', 'Ỵ', 'Ẓ']). | |
276 | |
277 is_variable_character_dot_above(X) :- | |
278 member(X, ['Ȧ', 'Ḃ', 'Ċ', 'Ḋ', 'Ė', | |
279 'Ḟ', 'Ġ', 'Ḣ', 'İ', 'Ṁ', | |
280 'Ṅ', 'Ȯ', 'Ṗ', 'Ṙ', 'Ṡ', | |
281 'Ṫ', 'Ẇ', 'Ẋ', 'Ẏ', 'Ż']). | |
282 | |
283 is_digit_character(X) :- | |
284 member(X, ['0', '1', '2', '3', '4', | |
285 '5', '6', '7', '8', '9']). | |
286 | |
287 is_predicate_character(X) :- | |
288 member(X, ['≤', '≥', '∈', '∋', '⊆', '⊇', | |
289 '↔', '↰', '↺', | |
290 '↻', '√', '⌉', '⌋', '⟦', '⟧', | |
291 'ℕ', 'ℤ', 'ℝ', '∅', '≠', '≡', | |
292 '÷', '×', '%', '*', '+', | |
293 '-', '/', '<', '=', '>', '\\', | |
294 '^', 'a', 'b', 'c', 'd', 'e', | |
295 'f', 'g', 'h', 'i', 'j', 'k', | |
296 'l', 'm', 'n', 'o', 'p', 'q', | |
297 'r', 's', 't', 'u', 'v', 'w', | |
298 'x', 'y', 'z', 'ạ', 'ḅ', 'ḍ', | |
299 'ẹ', 'ḥ', 'ị', 'ḳ', 'ḷ', 'ṃ', | |
300 'ṇ', 'ọ', 'ṛ', 'ṣ', 'ṭ', 'ụ', | |
301 'ṿ', 'ẉ', 'ỵ', 'ẓ', 'ȧ', 'ḃ', | |
302 'ċ', 'ḋ', 'ė', 'ḟ', 'ġ', 'ḣ', | |
303 'ṁ', 'ṅ', 'ȯ', 'ṗ', 'ṙ', 'ṡ', | |
304 'ṫ', 'ẇ', 'ẋ', 'ẏ', 'ż', '≜']). | |
305 | |
306 is_math_constant_character(X) :- | |
307 member(X, ['π', 'φ']). | |
308 | |
309 is_modifier_character(X) :- | |
310 member(X, ['$', '@', '#']). | |
311 | |
312 is_input_character('?'). | |
313 | |
314 is_output_character('.'). | |
315 | |
316 is_metapredicate_character(X) :- | |
317 member(X, ['ᵃ', 'ᵇ', 'ᶜ', 'ᵈ', 'ᵉ', | |
318 'ᶠ', 'ᵍ', 'ʰ', 'ⁱ', 'ʲ', | |
319 'ᵏ', 'ˡ', 'ᵐ', 'ⁿ', 'ᵒ', | |
320 'ᵖ', 'ʳ', 'ˢ', 'ᵗ', 'ᵘ', | |
321 'ᵛ', 'ʷ', 'ˣ', 'ʸ', 'ᶻ']). | |
322 | |
323 is_subscript_character(C, D) :- | |
324 nth0(D, ['₀','₁','₂','₃','₄', | |
325 '₅','₆','₇','₈','₉'], C). | |
326 | |
327 is_subscript_parenthesis('₍', 'first'). | |
328 is_subscript_parenthesis('₎', 'last'). | |
329 | |
330 is_superscript_character(C, D) :- | |
331 nth0(D, ['⁰','¹','²','³','⁴', | |
332 '⁵','⁶','⁷','⁸','⁹'], C). | |
333 | |
334 is_superscript_parenthesis('⁽', 'first'). | |
335 is_superscript_parenthesis('⁾', 'last'). | |
336 | |
337 is_control_character(X) :- | |
338 member(X, ['∧', '∨', '⊥', '\n', '!', '↖', '↙', | |
339 '\'', '(', ')', ',', ':', | |
340 ':', '|', '{', '}', '`', | |
341 '¬', '~', ';', '&', '⟨', '⟩']). |