Mercurial > repo
view ibin/brachylog/Brachylog-master/src/tokenize.pl @ 11865:318de151d0ec draft
<b_jonas> python3 -cimport os,zipfile; os.chdir("ibin/brachylog"); zipfile.ZipFile("master.zip").extractall()
author | HackEso <hackeso@esolangs.org> |
---|---|
date | Tue, 16 Jul 2019 21:37:27 +0000 |
parents | |
children |
line wrap: on
line source
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ____ ____ \ \ / / \ \ ____ / / \ \/ \/ / \ /\ / BRACHYLOG \ / \ / A terse declarative logic programming language / \ / \ / \/ \ Written by Julien Cumin - 2017 / /\____/\ \ https://github.com/JCumin/Brachylog / / ___ \ \ /___/ /__/ \___\ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ :- module(tokenize, [tokenize/2]). :- use_module(symbols). /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - TOKENIZE - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ tokenize([], []). tokenize([' '|T], T2) :- tokenize(T, T2). tokenize([Variable|T], ['variable':VariableName:'sup':Sup|T2]) :- is_variable_character(Variable), tokenize_variable([Variable|T], Rest, VariableName), tokenize_superscript(Rest, Rest2, Sup), tokenize(Rest2, T2). tokenize([Variable|T], ['variable':R|T2]) :- ( is_variable_character_dot_above(Variable) -> token_variable(Variable, RealVariable), tokenize_superscript(T, Rest, Sup), R = RealVariable:'sup':Sup ; is_variable_character_dot_below(Variable) -> token_variable(Variable, R), Rest = T ), tokenize(Rest, T2). tokenize([Variable|T], ['variable':RealVariable|T2]) :- is_math_constant_character(Variable), token_variable(Variable, RealVariable), tokenize(T, T2). tokenize([H|T], ['variable':'Input':'sup':Sup|T2]) :- is_input_character(H), tokenize_superscript(T, Rest, Sup), tokenize(Rest, T2). tokenize([H|T], ['variable':'Output':'sup':Sup|T2]) :- is_output_character(H), tokenize_superscript(T, Rest, Sup), tokenize(Rest, T2). tokenize([Modifier,Predicate|T], ['predicate':PredName:Sub|T2]) :- is_modifier_character(Modifier), \+ (is_variable_character(Predicate)), atomic_list_concat([Modifier,Predicate], Pred), token_predicate(Pred, PredName), tokenize_subscript(T, Rest, Sub), tokenize(Rest, T2). tokenize([Predicate|T], ['predicate':PredName:Sub|T2]) :- is_predicate_character(Predicate), token_predicate(Predicate, PredName), tokenize_subscript(T, Rest, Sub), tokenize(Rest, T2). tokenize([MetaPred|T], ['metapredicate':PredName:Sup|T2]) :- is_metapredicate_character(MetaPred), token_metapredicate(MetaPred, PredName), tokenize_superscript(T, Rest, Sup), tokenize(Rest, T2). tokenize(['"'|T], ['variable':Variable|T2]) :- tokenize_string(['"'|T], Rest, Variable), tokenize(Rest, T2). tokenize(['_',Digit|T], ['variable':Type:N|T2]) :- is_digit_character(Digit), tokenize_number([Digit|T] ,Rest, Type:X), N is -X, tokenize(Rest, T2). tokenize(['_','_'|T], T2) :- tokenize(T, T2). tokenize([Digit|T], ['variable':Type:X|T2]) :- is_digit_character(Digit), tokenize_number([Digit|T], Rest, Type:X), tokenize(Rest, T2). tokenize(['['|T], ['variable':List|T2]) :- tokenize_list(['['|T], Rest, List), tokenize(Rest, T2). tokenize([Modifier,Variable|T], ['variable':RealVariable|T2]) :- is_modifier_character(Modifier), is_variable_character(Variable), token_variable(Modifier:Variable, RealVariable), tokenize(T, T2). tokenize([ControlFlow|T], ['control':ControlFlow|T2]) :- is_control_character(ControlFlow), tokenize(T, T2). /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - TOKENIZE_VARIABLE - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ tokenize_variable([], [], ''). tokenize_variable([H|T], R, Name) :- ( is_variable_character(H) -> tokenize_variable(T, R, TName), atomic_list_concat([H, TName], Name) ; Name = '', R = [H|T] ). /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - TOKENIZE_STRING - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ tokenize_string(['"'|T], Rest, 'string':T2) :- tokenize_string_(T, Rest, T2). tokenize_string_([], [], []). tokenize_string_([X,'"'|Rest], Rest, [X]) :- X \= '\\', X \= '"', Rest \= ['"'|_], !. tokenize_string_(['\\','"'|T], Rest, ['"'|T2]) :- tokenize_string_(T, Rest, T2). tokenize_string_(['"','"'|T], Rest, ['"'|T2]) :- tokenize_string_(T, Rest, T2). tokenize_string_([X|T], Rest, L) :- ( X \= '"' -> L = [X|T2], tokenize_string_(T, Rest, T2) ; Rest = T, L = [] ). /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - TOKENIZE_NUMBER - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ tokenize_number(N, Rest, Type:Number) :- tokenize_number_(N, Rest, T2), ( member('.', T2), !, Type = 'float' ; Type = 'integer' ), atomic_list_concat(T2, A), atom_number(A, Number). tokenize_number_([], [], []). tokenize_number_(['.',I|T], Rest, ['.',J|T2]) :- is_digit_character(I), atom_number(I, J), tokenize_integer(T, Rest, T2). tokenize_number_(['.'], ['.'], []). tokenize_number_(['.',X|T], ['.',X|T], []) :- \+ (is_digit_character(X)). tokenize_number_([X|T], [X|T], []) :- \+ (is_digit_character(X)), X \= '.'. tokenize_number_([I|T], Rest, [J|T2]) :- is_digit_character(I), atom_number(I, J), tokenize_number_(T, Rest, T2). tokenize_integer([], [], []). tokenize_integer([I|T], Rest, [J|T2]) :- is_digit_character(I), atom_number(I, J), tokenize_integer(T, Rest, T2). tokenize_integer([X|T], [X|T], []) :- \+ (is_digit_character(X)). /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - TOKENIZE_LIST - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ tokenize_list(['['|T], Rest, List) :- isolate_list(T, L, Rest), tokenize(L, List). isolate_list(T, List, Rest) :- isolate_list(T, 1, [], L, Rest), reverse(L, List). isolate_list([], _, L, L, []). isolate_list([']'|T], 1, L, L, T). isolate_list([']'|T], X, L, M, Rest) :- X > 1, Y is X - 1, isolate_list(T, Y, [']'|L], M, Rest). isolate_list(['['|T], X, L, M, Rest) :- Y is X + 1, isolate_list(T, Y, ['['|L], M, Rest). isolate_list([H|T], X, L, M, Rest) :- H \= '[', H \= ']', isolate_list(T, X, [H|L], M, Rest). /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - TOKENIZE_SUBSCRIPT - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ tokenize_subscript(L, Rest, Sub) :- tokenize_subscript_(L, Rest, LSub), ( LSub = 'first' -> Sub = LSub ; LSub = 'last' -> Sub = 'last' ; LSub = [] -> Sub = 'default' ; maplist(number_codes, LSub, LC), append(LC, C), number_codes(ISub, C), term_to_atom('integer':ISub, Sub) ). tokenize_subscript_([], [], []). tokenize_subscript_([H|T], Rest, Ds) :- ( is_subscript_character(H, D) -> tokenize_subscript_(T, Rest, TDs), Ds = [D|TDs] ; is_subscript_parenthesis(H, D) -> Rest = T, Ds = D ; Rest = [H|T], Ds = [] ). /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - TOKENIZE_SUPERSCRIPT - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ tokenize_superscript(L, Rest, Sup) :- tokenize_superscript_(L, Rest, LSup), ( LSup = 'first' -> Sup = LSup ; LSup = 'last' -> Sup = 'last' ; LSup = [] -> Sup = 'default' ; maplist(number_codes, LSup, LC), append(LC, C), number_codes(ISup, C), term_to_atom('integer':ISup, Sup) ). tokenize_superscript_([], [], []). tokenize_superscript_([H|T], Rest, Ds) :- ( is_superscript_character(H, D) -> tokenize_superscript_(T, Rest, TDs), Ds = [D|TDs] ; is_superscript_parenthesis(H, D) -> Rest = T, Ds = D ; Rest = [H|T], Ds = [] ). /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - IS_X_CHARACTER - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ is_variable_character(X) :- member(X, ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z']). is_variable_character_dot_below(X) :- member(X, ['Ạ', 'Ḅ', 'Ḍ', 'Ẹ', 'Ḥ', 'Ị', 'Ḳ', 'Ḷ', 'Ṃ', 'Ṇ', 'Ọ', 'Ṛ', 'Ṣ', 'Ṭ', 'Ụ', 'Ṿ', 'Ẉ', 'Ỵ', 'Ẓ']). is_variable_character_dot_above(X) :- member(X, ['Ȧ', 'Ḃ', 'Ċ', 'Ḋ', 'Ė', 'Ḟ', 'Ġ', 'Ḣ', 'İ', 'Ṁ', 'Ṅ', 'Ȯ', 'Ṗ', 'Ṙ', 'Ṡ', 'Ṫ', 'Ẇ', 'Ẋ', 'Ẏ', 'Ż']). is_digit_character(X) :- member(X, ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']). is_predicate_character(X) :- member(X, ['≤', '≥', '∈', '∋', '⊆', '⊇', '↔', '↰', '↺', '↻', '√', '⌉', '⌋', '⟦', '⟧', 'ℕ', 'ℤ', 'ℝ', '∅', '≠', '≡', '÷', '×', '%', '*', '+', '-', '/', '<', '=', '>', '\\', '^', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'ạ', 'ḅ', 'ḍ', 'ẹ', 'ḥ', 'ị', 'ḳ', 'ḷ', 'ṃ', 'ṇ', 'ọ', 'ṛ', 'ṣ', 'ṭ', 'ụ', 'ṿ', 'ẉ', 'ỵ', 'ẓ', 'ȧ', 'ḃ', 'ċ', 'ḋ', 'ė', 'ḟ', 'ġ', 'ḣ', 'ṁ', 'ṅ', 'ȯ', 'ṗ', 'ṙ', 'ṡ', 'ṫ', 'ẇ', 'ẋ', 'ẏ', 'ż', '≜']). is_math_constant_character(X) :- member(X, ['π', 'φ']). is_modifier_character(X) :- member(X, ['$', '@', '#']). is_input_character('?'). is_output_character('.'). is_metapredicate_character(X) :- member(X, ['ᵃ', 'ᵇ', 'ᶜ', 'ᵈ', 'ᵉ', 'ᶠ', 'ᵍ', 'ʰ', 'ⁱ', 'ʲ', 'ᵏ', 'ˡ', 'ᵐ', 'ⁿ', 'ᵒ', 'ᵖ', 'ʳ', 'ˢ', 'ᵗ', 'ᵘ', 'ᵛ', 'ʷ', 'ˣ', 'ʸ', 'ᶻ']). is_subscript_character(C, D) :- nth0(D, ['₀','₁','₂','₃','₄', '₅','₆','₇','₈','₉'], C). is_subscript_parenthesis('₍', 'first'). is_subscript_parenthesis('₎', 'last'). is_superscript_character(C, D) :- nth0(D, ['⁰','¹','²','³','⁴', '⁵','⁶','⁷','⁸','⁹'], C). is_superscript_parenthesis('⁽', 'first'). is_superscript_parenthesis('⁾', 'last'). is_control_character(X) :- member(X, ['∧', '∨', '⊥', '\n', '!', '↖', '↙', '\'', '(', ')', ',', ':', ':', '|', '{', '}', '`', '¬', '~', ';', '&', '⟨', '⟩']).