comparison interps/brachylog/brachylog/Brachylog-master/src/tokenize.pl @ 11868:70dedbc831e9 draft

<ais523> ` mv ibin/brachylog interps/brachylog
author HackEso <hackeso@esolangs.org>
date Tue, 16 Jul 2019 21:39:11 +0000
parents ibin/brachylog/Brachylog-master/src/tokenize.pl@318de151d0ec
children
comparison
equal deleted inserted replaced
11867:b0414b6b332f 11868:70dedbc831e9
1 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2 ____ ____
3 \ \ / /
4 \ \ ____ / /
5 \ \/ \/ /
6 \ /\ / BRACHYLOG
7 \ / \ / A terse declarative logic programming language
8 / \ / \
9 / \/ \ Written by Julien Cumin - 2017
10 / /\____/\ \ https://github.com/JCumin/Brachylog
11 / / ___ \ \
12 /___/ /__/ \___\
13
14 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
15
16
17 :- module(tokenize, [tokenize/2]).
18
19 :- use_module(symbols).
20
21
22 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
23 TOKENIZE
24 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
25 tokenize([], []).
26 tokenize([' '|T], T2) :-
27 tokenize(T, T2).
28 tokenize([Variable|T], ['variable':VariableName:'sup':Sup|T2]) :-
29 is_variable_character(Variable),
30 tokenize_variable([Variable|T], Rest, VariableName),
31 tokenize_superscript(Rest, Rest2, Sup),
32 tokenize(Rest2, T2).
33 tokenize([Variable|T], ['variable':R|T2]) :-
34 ( is_variable_character_dot_above(Variable)
35 -> token_variable(Variable, RealVariable),
36 tokenize_superscript(T, Rest, Sup),
37 R = RealVariable:'sup':Sup
38 ; is_variable_character_dot_below(Variable)
39 -> token_variable(Variable, R),
40 Rest = T
41 ),
42 tokenize(Rest, T2).
43 tokenize([Variable|T], ['variable':RealVariable|T2]) :-
44 is_math_constant_character(Variable),
45 token_variable(Variable, RealVariable),
46 tokenize(T, T2).
47 tokenize([H|T], ['variable':'Input':'sup':Sup|T2]) :-
48 is_input_character(H),
49 tokenize_superscript(T, Rest, Sup),
50 tokenize(Rest, T2).
51 tokenize([H|T], ['variable':'Output':'sup':Sup|T2]) :-
52 is_output_character(H),
53 tokenize_superscript(T, Rest, Sup),
54 tokenize(Rest, T2).
55 tokenize([Modifier,Predicate|T], ['predicate':PredName:Sub|T2]) :-
56 is_modifier_character(Modifier),
57 \+ (is_variable_character(Predicate)),
58 atomic_list_concat([Modifier,Predicate], Pred),
59 token_predicate(Pred, PredName),
60 tokenize_subscript(T, Rest, Sub),
61 tokenize(Rest, T2).
62 tokenize([Predicate|T], ['predicate':PredName:Sub|T2]) :-
63 is_predicate_character(Predicate),
64 token_predicate(Predicate, PredName),
65 tokenize_subscript(T, Rest, Sub),
66 tokenize(Rest, T2).
67 tokenize([MetaPred|T], ['metapredicate':PredName:Sup|T2]) :-
68 is_metapredicate_character(MetaPred),
69 token_metapredicate(MetaPred, PredName),
70 tokenize_superscript(T, Rest, Sup),
71 tokenize(Rest, T2).
72 tokenize(['"'|T], ['variable':Variable|T2]) :-
73 tokenize_string(['"'|T], Rest, Variable),
74 tokenize(Rest, T2).
75 tokenize(['_',Digit|T], ['variable':Type:N|T2]) :-
76 is_digit_character(Digit),
77 tokenize_number([Digit|T] ,Rest, Type:X),
78 N is -X,
79 tokenize(Rest, T2).
80 tokenize(['_','_'|T], T2) :-
81 tokenize(T, T2).
82 tokenize([Digit|T], ['variable':Type:X|T2]) :-
83 is_digit_character(Digit),
84 tokenize_number([Digit|T], Rest, Type:X),
85 tokenize(Rest, T2).
86 tokenize(['['|T], ['variable':List|T2]) :-
87 tokenize_list(['['|T], Rest, List),
88 tokenize(Rest, T2).
89 tokenize([Modifier,Variable|T], ['variable':RealVariable|T2]) :-
90 is_modifier_character(Modifier),
91 is_variable_character(Variable),
92 token_variable(Modifier:Variable, RealVariable),
93 tokenize(T, T2).
94 tokenize([ControlFlow|T], ['control':ControlFlow|T2]) :-
95 is_control_character(ControlFlow),
96 tokenize(T, T2).
97
98
99 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
100 TOKENIZE_VARIABLE
101 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
102 tokenize_variable([], [], '').
103 tokenize_variable([H|T], R, Name) :-
104 ( is_variable_character(H) ->
105 tokenize_variable(T, R, TName),
106 atomic_list_concat([H, TName], Name)
107 ; Name = '',
108 R = [H|T]
109 ).
110
111
112 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
113 TOKENIZE_STRING
114 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
115 tokenize_string(['"'|T], Rest, 'string':T2) :-
116 tokenize_string_(T, Rest, T2).
117
118 tokenize_string_([], [], []).
119 tokenize_string_([X,'"'|Rest], Rest, [X]) :-
120 X \= '\\',
121 X \= '"',
122 Rest \= ['"'|_],
123 !.
124 tokenize_string_(['\\','"'|T], Rest, ['"'|T2]) :-
125 tokenize_string_(T, Rest, T2).
126 tokenize_string_(['"','"'|T], Rest, ['"'|T2]) :-
127 tokenize_string_(T, Rest, T2).
128 tokenize_string_([X|T], Rest, L) :-
129 ( X \= '"' ->
130 L = [X|T2],
131 tokenize_string_(T, Rest, T2)
132 ; Rest = T,
133 L = []
134 ).
135
136
137 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
138 TOKENIZE_NUMBER
139 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
140 tokenize_number(N, Rest, Type:Number) :-
141 tokenize_number_(N, Rest, T2),
142 ( member('.', T2),
143 !,
144 Type = 'float'
145 ; Type = 'integer'
146 ),
147 atomic_list_concat(T2, A),
148 atom_number(A, Number).
149
150 tokenize_number_([], [], []).
151 tokenize_number_(['.',I|T], Rest, ['.',J|T2]) :-
152 is_digit_character(I),
153 atom_number(I, J),
154 tokenize_integer(T, Rest, T2).
155 tokenize_number_(['.'], ['.'], []).
156 tokenize_number_(['.',X|T], ['.',X|T], []) :-
157 \+ (is_digit_character(X)).
158 tokenize_number_([X|T], [X|T], []) :-
159 \+ (is_digit_character(X)),
160 X \= '.'.
161 tokenize_number_([I|T], Rest, [J|T2]) :-
162 is_digit_character(I),
163 atom_number(I, J),
164 tokenize_number_(T, Rest, T2).
165
166 tokenize_integer([], [], []).
167 tokenize_integer([I|T], Rest, [J|T2]) :-
168 is_digit_character(I),
169 atom_number(I, J),
170 tokenize_integer(T, Rest, T2).
171 tokenize_integer([X|T], [X|T], []) :-
172 \+ (is_digit_character(X)).
173
174
175 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
176 TOKENIZE_LIST
177 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
178 tokenize_list(['['|T], Rest, List) :-
179 isolate_list(T, L, Rest),
180 tokenize(L, List).
181
182 isolate_list(T, List, Rest) :-
183 isolate_list(T, 1, [], L, Rest),
184 reverse(L, List).
185 isolate_list([], _, L, L, []).
186 isolate_list([']'|T], 1, L, L, T).
187 isolate_list([']'|T], X, L, M, Rest) :-
188 X > 1,
189 Y is X - 1,
190 isolate_list(T, Y, [']'|L], M, Rest).
191 isolate_list(['['|T], X, L, M, Rest) :-
192 Y is X + 1,
193 isolate_list(T, Y, ['['|L], M, Rest).
194 isolate_list([H|T], X, L, M, Rest) :-
195 H \= '[',
196 H \= ']',
197 isolate_list(T, X, [H|L], M, Rest).
198
199
200 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
201 TOKENIZE_SUBSCRIPT
202 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
203 tokenize_subscript(L, Rest, Sub) :-
204 tokenize_subscript_(L, Rest, LSub),
205 ( LSub = 'first' ->
206 Sub = LSub
207 ; LSub = 'last' ->
208 Sub = 'last'
209 ; LSub = [] ->
210 Sub = 'default'
211 ; maplist(number_codes, LSub, LC),
212 append(LC, C),
213 number_codes(ISub, C),
214 term_to_atom('integer':ISub, Sub)
215 ).
216
217 tokenize_subscript_([], [], []).
218 tokenize_subscript_([H|T], Rest, Ds) :-
219 ( is_subscript_character(H, D) ->
220 tokenize_subscript_(T, Rest, TDs),
221 Ds = [D|TDs]
222 ; is_subscript_parenthesis(H, D) ->
223 Rest = T,
224 Ds = D
225 ; Rest = [H|T],
226 Ds = []
227 ).
228
229
230 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
231 TOKENIZE_SUPERSCRIPT
232 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
233 tokenize_superscript(L, Rest, Sup) :-
234 tokenize_superscript_(L, Rest, LSup),
235 ( LSup = 'first' ->
236 Sup = LSup
237 ; LSup = 'last' ->
238 Sup = 'last'
239 ; LSup = [] ->
240 Sup = 'default'
241 ; maplist(number_codes, LSup, LC),
242 append(LC, C),
243 number_codes(ISup, C),
244 term_to_atom('integer':ISup, Sup)
245 ).
246
247 tokenize_superscript_([], [], []).
248 tokenize_superscript_([H|T], Rest, Ds) :-
249 ( is_superscript_character(H, D) ->
250 tokenize_superscript_(T, Rest, TDs),
251 Ds = [D|TDs]
252 ; is_superscript_parenthesis(H, D) ->
253 Rest = T,
254 Ds = D
255 ; Rest = [H|T],
256 Ds = []
257 ).
258
259
260 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
261 IS_X_CHARACTER
262 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
263 is_variable_character(X) :-
264 member(X, ['A', 'B', 'C', 'D', 'E',
265 'F', 'G', 'H', 'I', 'J',
266 'K', 'L', 'M', 'N', 'O',
267 'P', 'Q', 'R', 'S', 'T',
268 'U', 'V', 'W', 'X', 'Y', 'Z']).
269
270 is_variable_character_dot_below(X) :-
271 member(X, ['Ạ', 'Ḅ', 'Ḍ', 'Ẹ',
272 'Ḥ', 'Ị', 'Ḳ', 'Ḷ',
273 'Ṃ', 'Ṇ', 'Ọ', 'Ṛ',
274 'Ṣ', 'Ṭ', 'Ụ', 'Ṿ',
275 'Ẉ', 'Ỵ', 'Ẓ']).
276
277 is_variable_character_dot_above(X) :-
278 member(X, ['Ȧ', 'Ḃ', 'Ċ', 'Ḋ', 'Ė',
279 'Ḟ', 'Ġ', 'Ḣ', 'İ', 'Ṁ',
280 'Ṅ', 'Ȯ', 'Ṗ', 'Ṙ', 'Ṡ',
281 'Ṫ', 'Ẇ', 'Ẋ', 'Ẏ', 'Ż']).
282
283 is_digit_character(X) :-
284 member(X, ['0', '1', '2', '3', '4',
285 '5', '6', '7', '8', '9']).
286
287 is_predicate_character(X) :-
288 member(X, ['≤', '≥', '∈', '∋', '⊆', '⊇',
289 '↔', '↰', '↺',
290 '↻', '√', '⌉', '⌋', '⟦', '⟧',
291 'ℕ', 'ℤ', 'ℝ', '∅', '≠', '≡',
292 '÷', '×', '%', '*', '+',
293 '-', '/', '<', '=', '>', '\\',
294 '^', 'a', 'b', 'c', 'd', 'e',
295 'f', 'g', 'h', 'i', 'j', 'k',
296 'l', 'm', 'n', 'o', 'p', 'q',
297 'r', 's', 't', 'u', 'v', 'w',
298 'x', 'y', 'z', 'ạ', 'ḅ', 'ḍ',
299 'ẹ', 'ḥ', 'ị', 'ḳ', 'ḷ', 'ṃ',
300 'ṇ', 'ọ', 'ṛ', 'ṣ', 'ṭ', 'ụ',
301 'ṿ', 'ẉ', 'ỵ', 'ẓ', 'ȧ', 'ḃ',
302 'ċ', 'ḋ', 'ė', 'ḟ', 'ġ', 'ḣ',
303 'ṁ', 'ṅ', 'ȯ', 'ṗ', 'ṙ', 'ṡ',
304 'ṫ', 'ẇ', 'ẋ', 'ẏ', 'ż', '≜']).
305
306 is_math_constant_character(X) :-
307 member(X, ['π', 'φ']).
308
309 is_modifier_character(X) :-
310 member(X, ['$', '@', '#']).
311
312 is_input_character('?').
313
314 is_output_character('.').
315
316 is_metapredicate_character(X) :-
317 member(X, ['ᵃ', 'ᵇ', 'ᶜ', 'ᵈ', 'ᵉ',
318 'ᶠ', 'ᵍ', 'ʰ', 'ⁱ', 'ʲ',
319 'ᵏ', 'ˡ', 'ᵐ', 'ⁿ', 'ᵒ',
320 'ᵖ', 'ʳ', 'ˢ', 'ᵗ', 'ᵘ',
321 'ᵛ', 'ʷ', 'ˣ', 'ʸ', 'ᶻ']).
322
323 is_subscript_character(C, D) :-
324 nth0(D, ['₀','₁','₂','₃','₄',
325 '₅','₆','₇','₈','₉'], C).
326
327 is_subscript_parenthesis('₍', 'first').
328 is_subscript_parenthesis('₎', 'last').
329
330 is_superscript_character(C, D) :-
331 nth0(D, ['⁰','¹','²','³','⁴',
332 '⁵','⁶','⁷','⁸','⁹'], C).
333
334 is_superscript_parenthesis('⁽', 'first').
335 is_superscript_parenthesis('⁾', 'last').
336
337 is_control_character(X) :-
338 member(X, ['∧', '∨', '⊥', '\n', '!', '↖', '↙',
339 '\'', '(', ')', ',', ':',
340 ':', '|', '{', '}', '`',
341 '¬', '~', ';', '&', '⟨', '⟩']).