diff interps/lazyk/lazy.cpp @ 996:859f9b4339e6

<Gregor> tar xf egobot.tar.xz
author HackBot
date Sun, 09 Dec 2012 19:30:08 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/interps/lazyk/lazy.cpp	Sun Dec 09 19:30:08 2012 +0000
@@ -0,0 +1,612 @@
+// Lazy K interpreter in C++.
+// For usage see usage() function below.
+// Copyright 2002 Ben Rudiak-Gould. Distributed under the GPL.
+//
+// Implementation notes:
+//  - When Sxyz is reduced to (xz)(yz), both "copies" of z
+//    point to the same expression tree. When z (or any of
+//    its subexpressions) is reduced, the old tree nodes are
+//    overwritten with their newly reduced versions, so that
+//    any other pointers to the node get the benefit of the
+//    change. This is critical to the performance of any
+//    lazy evaluator. Despite this destructive update, the
+//    meaning (i.e. behavior) of the function described by
+//    any subtree never changes (until the nodes are
+//    garbage-collected and reassigned, that is).
+//  - I actually got stack overflows in the evaluator when
+//    running complicated programs (e.g. prime_numbers.unl
+//    inside the Unlambda interpreter), so I rewrote it to
+//    eliminate recursion from partial_eval() and free().
+//    These functions now use relatively abstruse iterative
+//    algorithms which borrow expression tree pointers for
+//    temporary storage, and restore the original values
+//    where necessary before returning. Other than that, the
+//    interpreter is pretty simple to understand. The only
+//    recursion left (I think) is in the parser and in the
+//    Inc case of partial_eval_primitive_application; the
+//    former will only bite you if you have really deep
+//    nesting in your source code, and the latter only if
+//    you return a ridiculously large number in the output
+//    stream.
+//
+
+
+#include <stdio.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#ifdef __WIN32
+#include <io.h>
+#else
+#define O_BINARY 0
+#endif
+
+class Expr {
+private:
+	int refcnt;
+	union {
+		Expr* arg1;
+		int numeric_arg1;
+	};
+	Expr* arg2;
+
+	static Expr* free_list;
+	static Expr* alloc();
+	void free();
+
+	void partial_eval_primitive_application();
+
+public:
+	enum Type { A, K, K1, S, S1, S2, I1, LazyRead, Inc, Num, Free } type;
+
+	static void* operator new(size_t) {
+		Expr* result = free_list;
+		if (result) {
+			free_list = result->arg1;
+			return result;
+		} else {
+			return alloc();
+		}
+	}
+
+	// caller keeps original ref plus returned ref
+	Expr* dup() {
+		++refcnt;
+		return this;
+	}
+	// caller loses original ref
+	void deref() {
+		if (--refcnt == 0) {
+			free();
+		}
+	}
+
+	// caller keeps original ref
+	Type gettype() { return type; }
+
+	// caller loses refs to a1 and a2, gets ref to new object
+	Expr(Type t, Expr* a1 =0, Expr* a2 =0) {
+		refcnt = 1;
+		type = t;
+		arg1 = a1; arg2 = a2;
+	}
+
+	// caller loses original ref, gets returned ref
+	Expr* partial_eval();
+
+	// caller loses original ref, gets returned ref
+	static Expr* partial_apply(Expr* lhs, Expr* rhs) {
+		// You could do something more complicated here,
+		// but I tried it and it didn't seem to improve
+		// execution speed.
+		return new Expr(A, lhs, rhs);
+	}
+
+	// caller loses original ref
+	int to_number() {
+		int result = (type == Num) ? numeric_arg1 : -1;
+		deref();
+		return result;
+	}
+#if 0
+	void print(Expr*);
+#endif
+	// caller loses original ref, gets returned ref
+	Expr* drop_i1() {
+		Expr* cur = this;
+		if (type == I1) {
+			do {
+				cur = cur->arg1;
+			} while (cur->type == I1);
+			cur = cur->dup();
+			this->deref();
+		}
+		return cur;
+	}
+};
+
+
+Expr* Expr::free_list = 0;
+
+
+Expr K(Expr::K);
+Expr S(Expr::S);
+Expr I(Expr::S2, &K, &K);
+Expr KI(Expr::K1, &I);
+
+Expr SI(Expr::S1, &I);
+Expr KS(Expr::K1, &S);
+Expr KK(Expr::K1, &K);
+Expr SKSK(Expr::S2, &KS, &K);
+Expr SIKS(Expr::S2, &I, &KS);
+Expr Iota(Expr::S2, &SIKS, &KK);
+
+Expr Inc(Expr::Inc);
+Expr Zero(Expr::Num);
+
+
+Expr* Expr::alloc() {
+	enum { blocksize = 10000 };
+	static Expr* p = 0;
+	static Expr* end = 0;
+	if (p >= end) {
+		p = (Expr*)malloc(blocksize*sizeof(Expr));
+		if (p == 0) {
+			fputs("Out of memory!\n", stderr);
+			exit(2);
+		}
+		end = p + blocksize;
+	}
+	return p++;
+}
+
+#if 0
+void Expr::print(Expr* highlight) {
+	if (this == highlight) {
+		fputs("###", stdout);
+	}
+	switch (type) {
+		case A:
+			putchar('(');
+			arg1->print(highlight);
+			putchar(' ');
+			arg2->print(highlight);
+			putchar(')');
+			break;
+		case K:
+			putchar('K');
+			break;
+		case K1:
+			fputs("[K ", stdout);
+			arg1->print(highlight);
+			putchar(']');
+			break;
+		case S:
+			putchar('S');
+			break;
+		case S1:
+			fputs("[s ", stdout);
+			arg1->print(highlight);
+			putchar(']');
+			break;
+		case S2:
+			fputs("[S ", stdout);
+			arg1->print(highlight);
+			putchar(' ');
+			arg2->print(highlight);
+			putchar(']');
+			break;
+		case I1:
+			putchar('.');
+			arg1->print(highlight);
+			break;
+		case LazyRead:
+			fputs("LazyRead", stdout);
+			break;
+		case Inc:
+			fputs("Inc", stdout);
+			break;
+		case Num:
+			printf("%d", numeric_arg1);
+			break;
+		default:
+			putchar('?');
+	}
+	if (this == highlight) {
+		fputs("###", stdout);
+	}
+}
+#endif
+
+Expr* make_church_char(int ch) {
+	if (ch < 0 || ch > 256) {
+		ch = 256;
+	}
+
+	static Expr* cached_church_chars[257] = { KI.dup(), I.dup() };
+
+	if (cached_church_chars[ch] == 0) {
+		cached_church_chars[ch] = new Expr(Expr::S2, SKSK.dup(), make_church_char(ch-1));
+	}
+	return cached_church_chars[ch]->dup();
+}
+
+
+Expr* g_expr;
+
+
+// This function modifies the object in-place so that
+// all references to it see the new version.
+
+void Expr::partial_eval_primitive_application() {
+	Expr* lhs = arg1;
+	Expr* rhs = arg2->drop_i1();
+
+	// arg1 and arg2 are now uninitialized space
+
+	switch (lhs->type) {
+	case K:
+		type = K1;
+		arg1 = rhs;
+		arg2 = 0;
+		break;
+	case K1:
+		type = I1;
+		arg1 = lhs->arg1->dup();
+		arg2 = 0;
+		rhs->deref();
+		break;
+	case S:
+		type = S1;
+		arg1 = rhs;
+		arg2 = 0;
+		break;
+	case S1:
+		type = S2;
+		arg1 = lhs->arg1->dup();
+		arg2 = rhs;
+		break;
+	case LazyRead:
+		lhs->type = S2;
+		lhs->arg1 = new Expr(S2, I.dup(), new Expr(K1, make_church_char(getchar())));
+		lhs->arg2 = new Expr(K1, new Expr(LazyRead));
+		// fall thru
+	case S2:
+		//type = A;
+		arg1 = partial_apply(lhs->arg1->dup(), rhs->dup());
+		arg2 = partial_apply(lhs->arg2->dup(), rhs);
+		break;
+	case Inc:
+		rhs = rhs->partial_eval();
+		type = Num;
+		numeric_arg1 = rhs->to_number() + 1;
+		if (numeric_arg1 == 0) {
+			fputs("Runtime error: invalid output format (attempted to apply inc to a non-number)\n", stderr);
+			exit(3);
+		}
+		arg2 = 0;
+		break;
+	case Num:
+		fputs("Runtime error: invalid output format (attempted to apply a number)\n", stderr);
+		exit(3);
+	default:
+		fprintf(stderr, "INTERNAL ERROR: invalid type in partial_eval_primitive_application (%d)\n", lhs->type);
+		exit(4);
+	}
+	lhs->deref();
+}
+
+
+Expr* Expr::partial_eval() {
+	Expr* prev = 0;
+	Expr* cur = this;
+	for (;;) {
+		cur = cur->drop_i1();
+		while (cur->type == A) {
+			Expr* next = cur->arg1->drop_i1();
+			cur->arg1 = prev;
+			prev = cur; cur = next;
+		}
+		if (!prev) {
+			return cur;
+		}
+		Expr* next = cur; cur = prev;
+		prev = cur->arg1;
+		cur->arg1 = next;
+		cur->partial_eval_primitive_application();
+	}
+}
+
+
+/*
+void Expr::free() {
+	if (type != Num) {
+		if (arg1) arg1->deref();
+		if (arg2) arg2->deref();
+	}
+	type = Free;
+	arg1 = free_list;
+	free_list = this;
+}
+*/
+
+void Expr::free() {
+	Expr* cur = this;
+	Expr* partially_free_list = 0;
+	for (;;) {
+		while (--cur->refcnt <= 0 && cur->arg1 != 0 && cur->type != Num) {
+			Expr* next = cur->arg1;
+			if (cur->arg2 != 0) {
+				cur->arg1 = partially_free_list;
+				partially_free_list = cur;
+			} else {
+				cur->arg1 = free_list;
+				free_list = cur;
+			}
+			cur = next;
+		}
+		if (partially_free_list == 0) {
+			break;
+		}
+		cur = partially_free_list;
+		partially_free_list = partially_free_list->arg1;
+		cur->arg1 = free_list;
+		free_list = cur;
+		cur = cur->arg2;
+	}
+}
+
+
+class Stream {
+public:
+	virtual int getch() = 0;
+	virtual void ungetch(int ch) = 0;
+	virtual void error(const char* msg) = 0;
+};
+
+class File : public Stream {
+	FILE* f;
+	const char* filename;
+	enum { circular_buf_size = 256 };
+	char circular_buf[circular_buf_size];
+	int last_newline, cur_pos;
+public:
+	File(FILE* _f, const char* _filename) {
+		f = _f; filename = _filename;
+		last_newline = cur_pos = 0;
+	}
+	int getch();
+	void ungetch(int ch);
+	void error(const char* msg);
+};
+
+int File::getch() {
+	int ch;
+	do {
+		ch = getc(f);
+		circular_buf[(cur_pos++)%circular_buf_size] = ch;
+		if (ch == '#') {
+			do {
+				ch = getc(f);
+			} while (ch != '\n' && ch != EOF);
+		}
+		if (ch == '\n') {
+			last_newline = cur_pos;
+		}
+	} while (isspace(ch));
+	return ch;
+}
+
+void File::ungetch(int ch) {
+	ungetc(ch, f);
+	--cur_pos;
+}
+
+void File::error(const char* msg) {
+	fprintf(stderr, "While parsing \"%s\": %s\n", filename, msg);
+	int from;
+	if (cur_pos-last_newline < circular_buf_size) {
+		from = last_newline;
+	} else {
+		from = cur_pos-circular_buf_size+1;
+		fputs("...", stdout);
+	}
+	for (int i=from; i < cur_pos; ++i) {
+		putc(circular_buf[i%circular_buf_size], stderr);
+	}
+	fputs(" <--\n", stderr);
+	exit(1);
+}
+
+class StringStream : public Stream {
+	const char* str;
+	const char* p;
+public:
+	StringStream(const char* s) {
+		str = s; p = s;
+	}
+	int getch() {
+		return *p ? *p++ : EOF;
+	}
+	void ungetch(int ch) {
+		if (ch != EOF) --p;
+	}
+	void error(const char* msg) {
+		fprintf(stderr, "While parsing command line: %s\n%s\n", msg, str);
+		for (const char* q = str+1; q < p; ++q) {
+			putc(' ', stderr);
+		}
+		fputs("^\n", stderr);
+		exit(1);
+	}
+};
+
+
+Expr* parse_expr(Stream* f, int ch, bool i_is_iota);
+
+Expr* parse_manual_close(Stream* f, int expected_terminator);
+
+
+Expr* parse_expr(Stream* f, int ch, bool i_is_iota) {
+	switch (ch) {
+	case '`': case '*':
+	{
+		Expr* p = parse_expr(f, f->getch(), ch=='*');
+		Expr* q = parse_expr(f, f->getch(), ch=='*');
+		return Expr::partial_apply(p, q);
+	}
+	case '(':
+		return parse_manual_close(f, ')');
+	case ')':
+		f->error("Mismatched close-parenthesis!");
+	case 'k': case 'K':
+		return K.dup();
+	case 's': case 'S':
+		return S.dup();
+	case 'i':
+		if (i_is_iota)
+			return Iota.dup();
+		// else fall thru
+	case 'I':
+		return I.dup();
+	case '0': case '1':
+	{
+		Expr* e = I.dup();
+		do {
+			if (ch == '0') {
+				e = Expr::partial_apply(Expr::partial_apply(e, S.dup()), K.dup());
+			} else {
+				e = Expr::partial_apply(S.dup(), Expr::partial_apply(K.dup(), e));
+			}
+			ch = f->getch();
+		} while (ch == '0' || ch == '1');
+		f->ungetch(ch);
+		return e;
+	}
+	default:
+		f->error("Invalid character!");
+	}
+	return 0;
+}
+
+
+Expr* parse_manual_close(Stream* f, int expected_terminator) {
+	Expr* e = 0;
+	int peek;
+	while (peek = f->getch(), peek != ')' && peek != EOF) {
+		Expr* e2 = parse_expr(f, peek, false);
+		e = e ? Expr::partial_apply(e, e2) : e2;
+	}
+	if (peek != expected_terminator) {
+		f->error(peek == EOF ? "Premature end of program!" : "Unmatched trailing close-parenthesis!");
+	}
+	if (e == 0) {
+		e = I.dup();
+	}
+	return e;
+}
+
+
+static Expr* car(Expr* list) {
+	return Expr::partial_apply(list, K.dup());
+}
+
+static Expr* cdr(Expr* list) {
+	return Expr::partial_apply(list, KI.dup());
+}
+
+static int church2int(Expr* church) {
+	Expr* e = Expr::partial_apply(Expr::partial_apply(church, Inc.dup()), Zero.dup());
+	g_expr = e;
+	int result = e->partial_eval()->to_number();
+	if (result == -1) {
+		fputs("Runtime error: invalid output format (result was not a number)\n", stderr);
+		exit(3);
+	}
+	return result;
+}
+
+
+Expr* compose(Expr* f, Expr* g) {
+	return new Expr(Expr::S2, new Expr(Expr::K1, f), g);
+}
+
+
+Expr* append_program(Expr* old, Stream* stream) {
+	return compose(parse_manual_close(stream, EOF), old);
+}
+
+
+void usage() {
+	fputs(
+		"usage: lazy [-b] { -e program | program-file.lazy } *\n"
+		"\n"
+		"   -b           puts stdin and stdout into binary mode on systems that care\n"
+		"                (i.e. Windows)\n"
+		"\n"
+		"   -e program   takes program code from the command line (like Perl's -e\n"
+		"                switch)\n"
+		"\n"
+		"   program-file.lazy   name of file containing program code\n"
+		"\n"
+		" If more than one -e or filename argument is given, the programs will be\n"
+		" combined by functional composition (but in Unix pipe order, not mathematical-\n"
+		" notation order). If no -e or filename argument is given, the result is a\n"
+		" degenerate composition, i.e. the identity function.\n", stdout);
+	exit(0);
+}
+
+
+int main(int argc, char** argv) {
+	Expr* e = I.dup();
+	for (int i=1; i<argc; ++i) {
+		if (argv[i][0] == '-') {
+			switch (argv[i][1]) {
+			case 0:
+                        {
+                                File f = File(stdin, "(standard input)");
+				e = append_program(e, &f);
+				break;
+                        }
+			case 'b':
+#ifdef __WIN32
+				setmode(fileno(stdin), O_BINARY);
+				setmode(fileno(stdout), O_BINARY);
+#endif
+				break;
+			case 'e':
+                        {
+				++i;
+				if (i == argc) {
+					usage();
+				}
+                                StringStream s = StringStream(argv[i]);
+				e = append_program(e, &s);
+				break;
+                        }
+			default:
+				usage();
+			}
+		} else {
+			FILE* f = fopen(argv[i], "r");
+			if (!f) {
+				fprintf(stderr, "Unable to open the file \"%s\".\n", argv[i]);
+				exit(1);
+			}
+                        File ff = File(f, argv[i]);
+			e = append_program(e, &ff);
+		}
+	}
+	e = Expr::partial_apply(e, new Expr(Expr::LazyRead));
+	for (;;) {
+		int ch = church2int(car(e->dup()));
+		if (ch >= 256)
+			return ch-256;
+		putchar(ch);
+		e = cdr(e);
+	}
+}