diff interps/unlambda/unlambda.c @ 996:859f9b4339e6

<Gregor> tar xf egobot.tar.xz
author HackBot
date Sun, 09 Dec 2012 19:30:08 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/interps/unlambda/unlambda.c	Sun Dec 09 19:30:08 2012 +0000
@@ -0,0 +1,761 @@
+/*
+ * This is an implementation of the Unlambda programming language
+ *
+ * Copyright (C) 2000 Bertram Felgenhauer <b.f.@gmx.de>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+/*
+ * This is more or less a translation of the Haskell version, which is a lot
+ * easier to read than this code; most comments here are actually Haskell code.
+ *
+ * history:
+ * 13-10-2000: initial version, in Haskell
+ * 14-10-2000: initial version in C
+ * xx-10-2000: optimised; gcc is weird: removing code (which was never
+ *             executed anyway) can make programs slower. duh.
+ * 31-10-2000: added '/' (use -DUSE_SLASH), which is similar to '|' but
+ *             with ?<char> instead of .<char>
+ * 21-12-2000: added some fflush(stdout) code...
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <ctype.h>
+
+// note: Be sure to compile with -DNDEBUG if you want optimal performance.
+#include <assert.h>
+
+#define BLOCK_SIZE 4096  // allocate BLOCK_SIZE info structs each malloc
+
+#define _STR(x) #x
+#define STR(x) _STR(x)
+
+/*
+ * data Prog = App Prog Prog | At | C | C1 Cont | D | D1 Prog | Dot Char | E
+ *      | I | K | K1 Prog | Que Char | S | S1 Prog | S2 Prog Prog | V | VBar
+ * data Cont = App1 Prog Cont | App2 Prog Cont
+ */
+
+// type tags:
+enum { 
+    At, C, D, Dot, E, I, K, Que, S,
+#if defined USE_SLASH
+    Slash,
+#endif
+    V, VBar,                        T0=VBar, // 0 links
+    C1, D1, K1, S1,                 T1=S1,   // 1 link
+    App, S2, App1, App2                      // 2 links
+};
+
+#if defined DEBUG_REFCOUNT
+char* tags[] = {"At", "C", "D", "Dot", "E", "I", "K", "Que", "S", 
+#if defined USE_SLASH
+                "Slash",
+#endif
+		"V", "VBar",
+		"C1", "D1", "K1", "S1", "App", "S2", "App1", "App2"};
+#endif
+
+typedef struct info {
+    int tag; // type tag
+    int ref; // reference count, minus one
+    struct info *l;
+    union {
+	struct info *r;
+	char ch;
+    } u;
+} info;
+
+#define L  l
+#define R  u.r
+#define CH u.ch
+
+// All info records with no link fields can be allocated statically.
+// "eval" is a continuation which applies its argument to e, which
+// ends the program, and is used in exec()
+info at={At}, c={C}, d={D}, e={E}, i={I}, k={K}, vbar={VBar}, s={S}, v={V},
+#if defined USE_SLASH
+     slash={Slash},
+#endif
+     eval = {App2, 0, &e, {&e}}, dot[256], que[256];
+
+info *heap;
+int  heap_idx = 0;
+info *avail = 0;
+
+#if defined DEBUG_REFCOUNT
+int allocated, freed;
+#endif
+
+// Get a free info record, with ref field == 0 (reference count == 1).
+inline info *new_info()
+{
+#if defined DEBUG_REFCOUNT
+    allocated++;
+#endif
+    if (avail) {
+	info *t;
+	t = avail;
+	avail = avail->L;
+#if defined DEBUG_REFCOUNT
+	t->ref++;
+#endif
+	assert(t->ref == 0);
+	return t;
+    } else {
+	if (!heap_idx) {
+	    if (!(heap = malloc(BLOCK_SIZE * sizeof(info))))
+		perror("new_info"), exit(EXIT_FAILURE);
+	    heap_idx = BLOCK_SIZE;
+	}
+	heap[--heap_idx].ref = 0; // Would it be better to use calloc above?
+	return heap + heap_idx;
+    }
+}
+
+// Increment the reference count on an info struct.
+#define add_ref(i) ((i)->ref++)
+
+// Free an info record without checking any reference counts.
+inline void free_info_no_ref(info *i)
+{
+    assert(i->ref == 0);
+#if defined DEBUG_REFCOUNT
+    freed++;
+    i->ref--;
+#endif
+    i->L = avail;
+    avail = i;
+}
+
+// Free a reference counted info record.
+inline void free_info(info *i)
+{
+    assert(i);
+    if (i->ref) 
+	i->ref--;
+    else {
+	if (i->tag > T0) {
+	    free_info(i->L);
+	    if (i->tag > T1)
+		free_info(i->R);
+	}
+	free_info_no_ref(i);
+    }
+}
+
+// Initialise the dot[] and que[] arrays.
+void init()
+{
+    int i;
+    for (i=0; i<256; i++) {
+	dot[i].tag = Dot;
+	dot[i].CH = i;
+	que[i].tag = Que;
+	que[i].CH = i;
+    }
+}
+
+// Execute program.
+void exec(info *prog)
+{
+    info *cont, *p, *q, *tmp;
+    int current_char;
+
+    current_char = EOF;
+    tmp = new_info();
+    add_ref(cont = &eval);
+    add_ref(prog);
+    for (;;) {
+        // exec cont prog
+	assert(tmp);
+	assert(tmp->ref == 0);
+	if (prog->tag == App) {
+	    // exec c (App p1 p2) = exec (App1 p2 c) p1
+	    tmp->tag = App1;
+	    tmp->R = cont;
+	    cont = tmp;
+	    p = prog->L;
+	    tmp->L = prog->R;
+	    if (prog->ref) {
+		prog->ref--;
+		add_ref(prog->L);
+		add_ref(prog->R);
+		tmp = new_info();
+	    } else
+		tmp = prog;
+	    prog = p;
+	    continue;
+	}
+	if (cont->tag == App1) {
+	    if (prog->tag == D) {
+		// exec (App1 p c) D = exec c (D1 p)
+#if 1
+                assert(prog == &d);
+                d.ref--; // free_info(prog);
+		assert(!cont->ref); // all references were inside prog
+		cont->tag = D1;
+		prog = cont;
+		cont = cont->R;
+#else
+		tmp->tag = D1;
+                assert(prog == &d);
+                d.ref--; // free_info(prog);
+		prog = tmp;
+		tmp->L = cont->L;
+		p = cont->R;
+                if (cont->ref) {
+		    cont->ref--;
+		    add_ref(cont->L);
+		    add_ref(cont->R);
+		    tmp = new_info();
+		} else 
+		    tmp = cont;
+		cont = p;
+#endif
+	    } else {
+		// exec (App1 p1 c) p2 = exec (App2 p2 c) p1
+#if 0
+		tmp->tag = App2;
+		tmp->L = prog;
+		tmp->R = cont->R;
+		prog = cont->L;
+		p = tmp;
+                if (cont->ref) {
+		    cont->ref--;
+		    add_ref(cont->L);
+		    add_ref(cont->R);
+		    tmp = new_info();
+		} else
+		    tmp = cont;
+		cont = p;
+#else
+		if (cont->ref) {
+		    cont->ref--;
+		    tmp->tag = App2;
+		    tmp->L = prog;
+		    add_ref(tmp->R = cont->R);
+		    add_ref(prog = cont->L);
+		    cont = tmp;
+		    tmp = new_info();
+		} else {
+		    cont->tag = App2;
+		    p = cont->L;
+		    cont->L = prog;
+		    prog = p;
+		}
+#endif
+	    }
+	    continue;
+	}
+	assert(cont->tag == App2);
+	// exec (App2 p1 c) p = case p1 of
+	switch ((q=cont->L)->tag) {
+	case S:
+	    // S -> exec c (S1 p)
+	    tmp->tag = S1;
+	    tmp->L = prog;
+	    prog = tmp;
+	    p = cont->R;
+	    if (cont->ref) {
+		cont->ref--;
+		add_ref(cont->R);
+		tmp = new_info();
+	    } else {
+		assert(cont->L == &s);
+		s.ref--; // free_info(q);
+		tmp = cont;
+	    }
+	    cont = p;
+	    break;
+	case S1:
+	    // S1 p1 -> exec c (S2 p1 p)
+	    tmp->tag = S2;
+	    tmp->L = q->L;
+	    tmp->R = prog;
+	    prog = tmp;
+	    p = cont->R;
+            if (cont->ref) {
+		cont->ref--;
+		add_ref(q->L);
+		add_ref(cont->R);
+		tmp = new_info();
+	    } else {
+		if (q->ref) {
+		    q->ref--;
+		    add_ref(q->L);
+		} else
+		    free_info_no_ref(q);
+		tmp = cont;
+	    }
+	    cont = p;
+	    break;
+	case S2:
+	    // S2 p1 p2 -> exec (App1 p (App1 (App p2 p) c)) p1
+	    // **NOTE** This wastes a bit of time, as p1,p,p2,p are evaluated
+	    // again, or better, their tag is checked just to find out that
+	    // it's not App, which is not necessary as we already knew that.
+	    // Some options:
+	    // - introduce a tag "App3" meaning "works exactly as App1, but
+	    //   with all arguments already evaluated"
+	    // - consider gotos (yuk) to avoid the prog->tag==App check at the
+	    //   beginning of the loop
+	    // - introduce an AppE program tag meaning "like App, but both
+	    //   parts already evaluated, transform directly to App3"
+	    // - (does not really belong here) swap the two fields of App1 -
+            //   to make the right side of an App always go to the ->R field
+	    //   and maybe save some data movement.
+	    // **TODO**: Implement this and check whether it actually makes
+	    // the program run faster; I'm not sure about this.
+	    p = new_info();
+	    p->tag = App;
+	    p->L = q->R;
+	    add_ref(p->R = prog);
+	    tmp->tag = App1;
+	    tmp->L = p;
+	    tmp->R = cont->R;
+	    if (cont->ref) {
+		cont->ref--;
+		add_ref(cont->R);
+		cont = new_info();
+		cont->tag = App1;
+		cont->L = prog;
+		cont->R = tmp;
+		prog = q->L;
+		add_ref(q->R);
+		add_ref(q->L);
+		tmp = new_info();
+	    } else {
+		cont->tag = App1;
+		cont->L = prog;
+		cont->R = tmp;
+		prog = q->L;
+		if (q->ref) {
+		    q->ref--;
+		    add_ref(q->R);
+		    add_ref(q->L);
+		    tmp = new_info();
+		} else
+		    tmp = q;
+	    }
+	    break;
+	case K:
+	    // K -> exec c (K1 p)
+	    tmp->tag = K1;
+	    tmp->L = prog;
+	    prog = tmp;
+	    p = cont->R;
+	    if (cont->ref) {
+		cont->ref--;
+		add_ref(cont->R);
+		tmp = new_info();
+	    } else {
+		assert(cont->L == &k);
+		k.ref--; // free_info(q);
+		tmp = cont;
+	    }
+	    cont = p;
+	    break;
+	case K1:
+	    // K1 p1 -> exec c p1
+	    free_info(prog);
+	    prog = q->L;
+	    p = cont->R;
+	    assert(!cont->ref); // all references were inside prog
+	    if (0 && cont->ref) {
+		cont->ref--;
+		add_ref(q->L);
+		add_ref(cont->R);
+	    } else {
+		free_info_no_ref(cont);
+		if (q->ref) {
+		    q->ref--;
+		    add_ref(q->L);
+		} else
+		    free_info_no_ref(q);
+	    }
+	    cont = p;
+	    break;
+	case Dot:
+	    // Dot char -> putChar char >> exec c p
+#if !defined SILENT
+	    putchar(q->CH);
+//	    fflush(stdout); // slow :-|
+#endif
+	    /* fall through */
+	case I:
+	    // I -> exec c p
+	    p = cont->R;
+	    if (cont->ref) {
+		cont->ref--;
+		add_ref(cont->R);
+	    } else {
+		assert(cont->L == &i || (unsigned)(cont->L - dot) < 256);
+		q->ref--; // free_info(q);
+		free_info_no_ref(cont);
+	    }
+	    cont = p;
+	    break;
+	case V:
+	    // V -> exec c V
+	    free_info(prog);
+	    prog = q;
+	    p = cont->R;
+	    assert(!cont->ref); // all references were inside prog
+	    if (0 && cont->ref) {
+		cont->ref--;
+		add_ref(cont->L);
+		add_ref(cont->R);
+	    } else
+		free_info_no_ref(cont);
+	    cont = p;
+	    break;
+	case At:
+	    // At -> isEOF >>= \eof ->
+	    //       if (eof) then current_char=EOF `seq`
+	    //                     exec (App1 V c) p
+	    //                else getChar >>= \ch ->
+	    //                     current_char=ch `seq`
+	    //                     exec (App1 I c) p
+	    // **NOTE** See comment at S2.
+	    current_char = getchar();
+	    tmp->tag = App1;
+	    tmp->R = cont->R;
+	    add_ref(tmp->L = (current_char!=EOF) ? &i : &v);
+	    p = tmp;
+	    if (cont->ref) {
+		cont->ref--;
+		add_ref(cont->R);
+		tmp = new_info();
+	    } else {
+		assert(cont->L == &at);
+		at.ref--; // free_info(q);
+		tmp = cont;
+	    }
+	    cont = p;
+	    break;
+	case VBar:
+	    // VBar -> exec (App1 (maybe V Dot current_char) c) p
+	    // **NOTE** See comment at S2.
+	    tmp->tag = App1;
+	    tmp->R = cont->R;
+	    add_ref(tmp->L = (current_char!=EOF) ? dot+current_char : &v);
+	    p = tmp;
+	    if (cont->ref) {
+		cont->ref--;
+		add_ref(cont->R);
+		tmp = new_info();
+	    } else {
+		assert(cont->L == &vbar);
+		vbar.ref--; // free_info(q);
+		tmp = cont;
+	    }
+	    cont = p;
+	    break;
+#if defined USE_SLASH
+	case Slash:
+	    // Slash -> exec (App1 (maybe V Que current_char) c) p
+	    // **NOTE** See comment at S2.
+	    tmp->tag = App1;
+	    tmp->R = cont->R;
+	    add_ref(tmp->L = (current_char!=EOF) ? que+current_char : &v);
+	    p = tmp;
+	    if (cont->ref) {
+		cont->ref--;
+		add_ref(cont->R);
+		tmp = new_info();
+	    } else {
+		assert(cont->L == &slash);
+		slash.ref--; // free_info(q);
+		tmp = cont;
+	    }
+	    cont = p;
+	    break;
+#endif
+	case Que:
+	    // Que char -> exec (App1 (if (Just char) == ch then I else V) c) p
+	    // **NOTE** See comment at S2.
+	    tmp->tag = App1;
+	    tmp->R = cont->R;
+	    add_ref(tmp->L = (current_char==q->CH) ? &i : &v);
+	    p = tmp;
+	    if (cont->ref) {
+		cont->ref--;
+		add_ref(cont->R);
+		tmp = new_info();
+	    } else {
+		assert((unsigned)(cont->L - que) < 256);
+		q->ref--; // free_info(q);
+		tmp = cont;
+	    }
+	    cont = p;
+	    break;
+	case D1:
+	    // D1 p1 -> exec (App1 p c) p1
+	    // **NOTE** See comment at S2.
+	    tmp->tag = App1;
+	    tmp->L = prog;
+	    tmp->R = cont->R;
+	    prog = q->L;
+	    p = tmp;
+	    if (cont->ref) {
+		cont->ref--;
+		add_ref(cont->R);
+		add_ref(q->L);
+		tmp = new_info();
+	    } else {
+		if (q->ref) {
+		    q->ref--;
+		    add_ref(q->L);
+		} else
+		    free_info_no_ref(q);
+		tmp = cont;
+	    }
+	    cont = p;
+	    break;
+	case C:
+	    // C -> exec (App1 (C1 c) c) p
+	    // **NOTE** See comment at S2.
+	    tmp->tag = C1;
+	    tmp->L = cont->R;
+	    if (cont->ref) {
+		cont->ref--;
+		cont->R->ref += 2; // add_ref, twice
+		cont = new_info();
+	    } else {
+		add_ref(cont->R);
+		assert(cont->L == &c);
+		c.ref--;
+	    }
+	    cont->tag = App1;
+	    cont->L = tmp;
+	    cont->R = tmp->L;
+	    tmp = new_info();
+	    break;
+	case C1:
+	    // C1 c1 -> exec c1 p
+#if 0
+	    // *note*: cont == (App2 (C1 c) c') is equivalent to cont == c,
+	    // so we just copy c's values to cont (instead of using c as
+	    // the new continuation), thus affecting all references to cont.
+	    // This is a clear win for ``ci`.*`cr, regarding both memory
+            // consumption and speed, and it should not hurt most other
+	    // programs.
+	    // Also, we can now execute ``ci`ci in a limited amount of memory.
+	    // *note*: this assumes that all continuations contain two links.
+	    free_info(cont->R);
+	    cont->tag = q->L->tag;
+	    cont->L = q->L->L;
+	    cont->R = q->L->R;
+	    if (q->ref) {
+		q->ref--;
+		add_ref(q->L->L);
+		add_ref(q->L->R);
+	    } else {
+		if (q->L->ref) {
+		    q->L->ref--;
+		    add_ref(q->L->L);
+		    add_ref(q->L->R);
+		} else
+		    free_info_no_ref(q->L);
+		free_info_no_ref(q);
+	    }
+#else
+	    p = q->L;
+	    if (cont->ref) {
+		cont->ref--;
+		add_ref(q->L);
+	    } else {
+		if (q->ref) {
+		    q->ref--;
+		    add_ref(q->L);
+		} else
+		    free_info_no_ref(q);
+		free_info(cont->R);
+		free_info_no_ref(cont);
+	    }
+	    cont = p;
+#endif
+	    break;
+	case E:
+	    // E -> return ()
+#if defined DEBUG_REFCOUNT || defined FREE_ALL
+	    free_info(prog);
+	    free_info(cont);
+	    free_info_no_ref(tmp);
+#endif
+	    return;
+	default:
+	    assert(!App & !D);
+	}
+    }
+}
+
+// Parse input from file f and return parsed program.
+info *parse(FILE *f)
+{
+    info *p, *q, *t;
+    int ch;
+
+    do { // Skip whitespace and comments.
+	while ((ch = getc(f)) == '#')
+	    while ((ch = getc(f)) != '\n')
+		;
+    } while (isspace(ch));
+
+    switch (tolower(ch)) {
+    case '@':
+	// '@' -> (At, s)
+	at.ref++;
+	return &at;
+    case 'c':
+	// 'c' -> (C, s)
+	c.ref++;
+	return &c;
+    case 'd':
+	// 'd' -> (D, s)
+	d.ref++;
+	return &d;
+    case 'e':
+	// 'e' -> (E, s)
+	e.ref++;
+	return &e;
+    case 'i':
+	// 'i' -> (I, s)
+	i.ref++;
+	return &i;
+    case 'k':
+	// 'k' -> (K, s)
+	k.ref++;
+	return &k;
+    case '|':
+	// '|' -> (VBar, s)
+	vbar.ref++;
+	return &vbar;
+#ifdef USE_SLASH
+    case '/':
+	// '/' -> (Slash, s)
+	slash.ref++;
+	return &slash;
+#endif
+    case 's':
+	// 's' -> (S, s)
+	s.ref++;
+	return &s;
+    case 'v':
+	// 'v' -> (V, s)
+	v.ref++;
+	return &v;
+    case 'r':
+	// 'r' -> (Dot '\n', s)
+	dot['\n'].ref++;
+	return dot+'\n';
+    case '.':
+	// '.' -> let (c:s1) = s in (Dot c, s1)
+	if ((ch = getc(f))==EOF)
+	    return 0;
+	dot[ch].ref++;
+	return dot+ch;
+    case '?':
+	// '?' -> let (c:s1) = s in (Que c, s1)
+	if ((ch = getc(f))==EOF)
+	    return 0;
+	que[ch].ref++;
+	return que+ch;
+    case '`':
+	// '`' -> let (p1, s1) = parse_ s in
+	//        let (p2, s2) = parse_ s1 in (App p1 p2, s2)
+	if (!(p=parse(f)))
+	    return 0;
+	if (!(q=parse(f))) {
+	    free_info(p);
+	    return 0;
+	}
+	t = new_info();
+	assert(t);
+	t->tag = App;
+	t->L = p;
+	t->R = q;
+	return t;
+    default:
+	// otherwise -> error "Unlambda.parse: parse error"
+	return 0;
+    }
+}
+
+
+int main(int argc, char **argv)
+{
+    FILE *f;
+    info *p;
+
+    init();
+    if (argc<2 || !(f=fopen(argv[1], "r"))) {
+	fprintf(stderr, "Usage: %s <file name>\n", argv[0]);
+	exit(1);
+    }
+    if (!(p=parse(f))) {
+	fprintf(stderr, "%s: file %s: parse error\n", argv[0], argv[1]);
+	fclose(f);
+	exit(1);
+    }
+    fclose(f);
+
+    exec(p);
+
+#if defined DEBUG_REFCOUNT || defined FREE_ALL
+    free_info(p);
+#endif
+#if defined DEBUG_REFCOUNT
+    fflush(stdout);
+    fprintf(stderr, "\n%s: %d allocated; %d freed\n",
+            argv[0], allocated, freed);
+
+#define CHECK_ATOM(x) \
+    if (x.ref != 0) \
+        fprintf(stderr, #x " ref count is %d\n", x.ref);
+
+    CHECK_ATOM(at);
+    CHECK_ATOM(c);
+    CHECK_ATOM(d);
+    CHECK_ATOM(e);
+    CHECK_ATOM(i);
+    CHECK_ATOM(k);
+    CHECK_ATOM(vbar);
+    CHECK_ATOM(s);
+    CHECK_ATOM(v);
+    {
+	int i;
+	for (i=0; i<256; i++)
+	    if (dot[i].ref != 0)
+		fprintf(stderr, ".%c ref count is %d\n", i, dot[i].ref);
+    }
+    while (avail) {
+	avail->ref = -1;
+	avail = avail->L;
+    }
+    // check heap (only last allocated block!) for lost items 
+    for (;heap_idx<BLOCK_SIZE;heap_idx++)
+	if (heap[heap_idx].ref != -1) {
+	    fprintf(stderr, "heap item left: tag =%2d (%4s), ref =%2d\n",
+		    heap[heap_idx].tag, tags[heap[heap_idx].tag],
+		    heap[heap_idx].ref);
+	}
+#endif
+
+    return 0;
+}