Mercurial > repo
diff ibin/brachylog/Brachylog-master/src/tokenize.pl @ 11865:318de151d0ec draft
<b_jonas> python3 -cimport os,zipfile; os.chdir("ibin/brachylog"); zipfile.ZipFile("master.zip").extractall()
author | HackEso <hackeso@esolangs.org> |
---|---|
date | Tue, 16 Jul 2019 21:37:27 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ibin/brachylog/Brachylog-master/src/tokenize.pl Tue Jul 16 21:37:27 2019 +0000 @@ -0,0 +1,341 @@ +/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +____ ____ +\ \ / / + \ \ ____ / / + \ \/ \/ / + \ /\ / BRACHYLOG + \ / \ / A terse declarative logic programming language + / \ / \ + / \/ \ Written by Julien Cumin - 2017 + / /\____/\ \ https://github.com/JCumin/Brachylog + / / ___ \ \ +/___/ /__/ \___\ + +- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ + + +:- module(tokenize, [tokenize/2]). + +:- use_module(symbols). + + +/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + TOKENIZE +- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ +tokenize([], []). +tokenize([' '|T], T2) :- + tokenize(T, T2). +tokenize([Variable|T], ['variable':VariableName:'sup':Sup|T2]) :- + is_variable_character(Variable), + tokenize_variable([Variable|T], Rest, VariableName), + tokenize_superscript(Rest, Rest2, Sup), + tokenize(Rest2, T2). +tokenize([Variable|T], ['variable':R|T2]) :- + ( is_variable_character_dot_above(Variable) + -> token_variable(Variable, RealVariable), + tokenize_superscript(T, Rest, Sup), + R = RealVariable:'sup':Sup + ; is_variable_character_dot_below(Variable) + -> token_variable(Variable, R), + Rest = T + ), + tokenize(Rest, T2). +tokenize([Variable|T], ['variable':RealVariable|T2]) :- + is_math_constant_character(Variable), + token_variable(Variable, RealVariable), + tokenize(T, T2). +tokenize([H|T], ['variable':'Input':'sup':Sup|T2]) :- + is_input_character(H), + tokenize_superscript(T, Rest, Sup), + tokenize(Rest, T2). +tokenize([H|T], ['variable':'Output':'sup':Sup|T2]) :- + is_output_character(H), + tokenize_superscript(T, Rest, Sup), + tokenize(Rest, T2). +tokenize([Modifier,Predicate|T], ['predicate':PredName:Sub|T2]) :- + is_modifier_character(Modifier), + \+ (is_variable_character(Predicate)), + atomic_list_concat([Modifier,Predicate], Pred), + token_predicate(Pred, PredName), + tokenize_subscript(T, Rest, Sub), + tokenize(Rest, T2). +tokenize([Predicate|T], ['predicate':PredName:Sub|T2]) :- + is_predicate_character(Predicate), + token_predicate(Predicate, PredName), + tokenize_subscript(T, Rest, Sub), + tokenize(Rest, T2). +tokenize([MetaPred|T], ['metapredicate':PredName:Sup|T2]) :- + is_metapredicate_character(MetaPred), + token_metapredicate(MetaPred, PredName), + tokenize_superscript(T, Rest, Sup), + tokenize(Rest, T2). +tokenize(['"'|T], ['variable':Variable|T2]) :- + tokenize_string(['"'|T], Rest, Variable), + tokenize(Rest, T2). +tokenize(['_',Digit|T], ['variable':Type:N|T2]) :- + is_digit_character(Digit), + tokenize_number([Digit|T] ,Rest, Type:X), + N is -X, + tokenize(Rest, T2). +tokenize(['_','_'|T], T2) :- + tokenize(T, T2). +tokenize([Digit|T], ['variable':Type:X|T2]) :- + is_digit_character(Digit), + tokenize_number([Digit|T], Rest, Type:X), + tokenize(Rest, T2). +tokenize(['['|T], ['variable':List|T2]) :- + tokenize_list(['['|T], Rest, List), + tokenize(Rest, T2). +tokenize([Modifier,Variable|T], ['variable':RealVariable|T2]) :- + is_modifier_character(Modifier), + is_variable_character(Variable), + token_variable(Modifier:Variable, RealVariable), + tokenize(T, T2). +tokenize([ControlFlow|T], ['control':ControlFlow|T2]) :- + is_control_character(ControlFlow), + tokenize(T, T2). + + +/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + TOKENIZE_VARIABLE +- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ +tokenize_variable([], [], ''). +tokenize_variable([H|T], R, Name) :- + ( is_variable_character(H) -> + tokenize_variable(T, R, TName), + atomic_list_concat([H, TName], Name) + ; Name = '', + R = [H|T] + ). + + +/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + TOKENIZE_STRING +- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ +tokenize_string(['"'|T], Rest, 'string':T2) :- + tokenize_string_(T, Rest, T2). + +tokenize_string_([], [], []). +tokenize_string_([X,'"'|Rest], Rest, [X]) :- + X \= '\\', + X \= '"', + Rest \= ['"'|_], + !. +tokenize_string_(['\\','"'|T], Rest, ['"'|T2]) :- + tokenize_string_(T, Rest, T2). +tokenize_string_(['"','"'|T], Rest, ['"'|T2]) :- + tokenize_string_(T, Rest, T2). +tokenize_string_([X|T], Rest, L) :- + ( X \= '"' -> + L = [X|T2], + tokenize_string_(T, Rest, T2) + ; Rest = T, + L = [] + ). + + +/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + TOKENIZE_NUMBER +- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ +tokenize_number(N, Rest, Type:Number) :- + tokenize_number_(N, Rest, T2), + ( member('.', T2), + !, + Type = 'float' + ; Type = 'integer' + ), + atomic_list_concat(T2, A), + atom_number(A, Number). + +tokenize_number_([], [], []). +tokenize_number_(['.',I|T], Rest, ['.',J|T2]) :- + is_digit_character(I), + atom_number(I, J), + tokenize_integer(T, Rest, T2). +tokenize_number_(['.'], ['.'], []). +tokenize_number_(['.',X|T], ['.',X|T], []) :- + \+ (is_digit_character(X)). +tokenize_number_([X|T], [X|T], []) :- + \+ (is_digit_character(X)), + X \= '.'. +tokenize_number_([I|T], Rest, [J|T2]) :- + is_digit_character(I), + atom_number(I, J), + tokenize_number_(T, Rest, T2). + +tokenize_integer([], [], []). +tokenize_integer([I|T], Rest, [J|T2]) :- + is_digit_character(I), + atom_number(I, J), + tokenize_integer(T, Rest, T2). +tokenize_integer([X|T], [X|T], []) :- + \+ (is_digit_character(X)). + + +/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + TOKENIZE_LIST +- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ +tokenize_list(['['|T], Rest, List) :- + isolate_list(T, L, Rest), + tokenize(L, List). + +isolate_list(T, List, Rest) :- + isolate_list(T, 1, [], L, Rest), + reverse(L, List). +isolate_list([], _, L, L, []). +isolate_list([']'|T], 1, L, L, T). +isolate_list([']'|T], X, L, M, Rest) :- + X > 1, + Y is X - 1, + isolate_list(T, Y, [']'|L], M, Rest). +isolate_list(['['|T], X, L, M, Rest) :- + Y is X + 1, + isolate_list(T, Y, ['['|L], M, Rest). +isolate_list([H|T], X, L, M, Rest) :- + H \= '[', + H \= ']', + isolate_list(T, X, [H|L], M, Rest). + + +/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + TOKENIZE_SUBSCRIPT +- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ +tokenize_subscript(L, Rest, Sub) :- + tokenize_subscript_(L, Rest, LSub), + ( LSub = 'first' -> + Sub = LSub + ; LSub = 'last' -> + Sub = 'last' + ; LSub = [] -> + Sub = 'default' + ; maplist(number_codes, LSub, LC), + append(LC, C), + number_codes(ISub, C), + term_to_atom('integer':ISub, Sub) + ). + +tokenize_subscript_([], [], []). +tokenize_subscript_([H|T], Rest, Ds) :- + ( is_subscript_character(H, D) -> + tokenize_subscript_(T, Rest, TDs), + Ds = [D|TDs] + ; is_subscript_parenthesis(H, D) -> + Rest = T, + Ds = D + ; Rest = [H|T], + Ds = [] + ). + + +/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + TOKENIZE_SUPERSCRIPT +- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ +tokenize_superscript(L, Rest, Sup) :- + tokenize_superscript_(L, Rest, LSup), + ( LSup = 'first' -> + Sup = LSup + ; LSup = 'last' -> + Sup = 'last' + ; LSup = [] -> + Sup = 'default' + ; maplist(number_codes, LSup, LC), + append(LC, C), + number_codes(ISup, C), + term_to_atom('integer':ISup, Sup) + ). + +tokenize_superscript_([], [], []). +tokenize_superscript_([H|T], Rest, Ds) :- + ( is_superscript_character(H, D) -> + tokenize_superscript_(T, Rest, TDs), + Ds = [D|TDs] + ; is_superscript_parenthesis(H, D) -> + Rest = T, + Ds = D + ; Rest = [H|T], + Ds = [] + ). + + +/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + IS_X_CHARACTER +- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ +is_variable_character(X) :- + member(X, ['A', 'B', 'C', 'D', 'E', + 'F', 'G', 'H', 'I', 'J', + 'K', 'L', 'M', 'N', 'O', + 'P', 'Q', 'R', 'S', 'T', + 'U', 'V', 'W', 'X', 'Y', 'Z']). + +is_variable_character_dot_below(X) :- + member(X, ['Ạ', 'Ḅ', 'Ḍ', 'Ẹ', + 'Ḥ', 'Ị', 'Ḳ', 'Ḷ', + 'Ṃ', 'Ṇ', 'Ọ', 'Ṛ', + 'Ṣ', 'Ṭ', 'Ụ', 'Ṿ', + 'Ẉ', 'Ỵ', 'Ẓ']). + +is_variable_character_dot_above(X) :- + member(X, ['Ȧ', 'Ḃ', 'Ċ', 'Ḋ', 'Ė', + 'Ḟ', 'Ġ', 'Ḣ', 'İ', 'Ṁ', + 'Ṅ', 'Ȯ', 'Ṗ', 'Ṙ', 'Ṡ', + 'Ṫ', 'Ẇ', 'Ẋ', 'Ẏ', 'Ż']). + +is_digit_character(X) :- + member(X, ['0', '1', '2', '3', '4', + '5', '6', '7', '8', '9']). + +is_predicate_character(X) :- + member(X, ['≤', '≥', '∈', '∋', '⊆', '⊇', + '↔', '↰', '↺', + '↻', '√', '⌉', '⌋', '⟦', '⟧', + 'ℕ', 'ℤ', 'ℝ', '∅', '≠', '≡', + '÷', '×', '%', '*', '+', + '-', '/', '<', '=', '>', '\\', + '^', 'a', 'b', 'c', 'd', 'e', + 'f', 'g', 'h', 'i', 'j', 'k', + 'l', 'm', 'n', 'o', 'p', 'q', + 'r', 's', 't', 'u', 'v', 'w', + 'x', 'y', 'z', 'ạ', 'ḅ', 'ḍ', + 'ẹ', 'ḥ', 'ị', 'ḳ', 'ḷ', 'ṃ', + 'ṇ', 'ọ', 'ṛ', 'ṣ', 'ṭ', 'ụ', + 'ṿ', 'ẉ', 'ỵ', 'ẓ', 'ȧ', 'ḃ', + 'ċ', 'ḋ', 'ė', 'ḟ', 'ġ', 'ḣ', + 'ṁ', 'ṅ', 'ȯ', 'ṗ', 'ṙ', 'ṡ', + 'ṫ', 'ẇ', 'ẋ', 'ẏ', 'ż', '≜']). + +is_math_constant_character(X) :- + member(X, ['π', 'φ']). + +is_modifier_character(X) :- + member(X, ['$', '@', '#']). + +is_input_character('?'). + +is_output_character('.'). + +is_metapredicate_character(X) :- + member(X, ['ᵃ', 'ᵇ', 'ᶜ', 'ᵈ', 'ᵉ', + 'ᶠ', 'ᵍ', 'ʰ', 'ⁱ', 'ʲ', + 'ᵏ', 'ˡ', 'ᵐ', 'ⁿ', 'ᵒ', + 'ᵖ', 'ʳ', 'ˢ', 'ᵗ', 'ᵘ', + 'ᵛ', 'ʷ', 'ˣ', 'ʸ', 'ᶻ']). + +is_subscript_character(C, D) :- + nth0(D, ['₀','₁','₂','₃','₄', + '₅','₆','₇','₈','₉'], C). + +is_subscript_parenthesis('₍', 'first'). +is_subscript_parenthesis('₎', 'last'). + +is_superscript_character(C, D) :- + nth0(D, ['⁰','¹','²','³','⁴', + '⁵','⁶','⁷','⁸','⁹'], C). + +is_superscript_parenthesis('⁽', 'first'). +is_superscript_parenthesis('⁾', 'last'). + +is_control_character(X) :- + member(X, ['∧', '∨', '⊥', '\n', '!', '↖', '↙', + '\'', '(', ')', ',', ':', + ':', '|', '{', '}', '`', + '¬', '~', ';', '&', '⟨', '⟩']).