Mercurial > repo
diff interps/unlambda/unlambda.c @ 996:859f9b4339e6
<Gregor> tar xf egobot.tar.xz
author | HackBot |
---|---|
date | Sun, 09 Dec 2012 19:30:08 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/interps/unlambda/unlambda.c Sun Dec 09 19:30:08 2012 +0000 @@ -0,0 +1,761 @@ +/* + * This is an implementation of the Unlambda programming language + * + * Copyright (C) 2000 Bertram Felgenhauer <b.f.@gmx.de> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* + * This is more or less a translation of the Haskell version, which is a lot + * easier to read than this code; most comments here are actually Haskell code. + * + * history: + * 13-10-2000: initial version, in Haskell + * 14-10-2000: initial version in C + * xx-10-2000: optimised; gcc is weird: removing code (which was never + * executed anyway) can make programs slower. duh. + * 31-10-2000: added '/' (use -DUSE_SLASH), which is similar to '|' but + * with ?<char> instead of .<char> + * 21-12-2000: added some fflush(stdout) code... + */ + +#include <stdio.h> +#include <stdlib.h> +#include <ctype.h> + +// note: Be sure to compile with -DNDEBUG if you want optimal performance. +#include <assert.h> + +#define BLOCK_SIZE 4096 // allocate BLOCK_SIZE info structs each malloc + +#define _STR(x) #x +#define STR(x) _STR(x) + +/* + * data Prog = App Prog Prog | At | C | C1 Cont | D | D1 Prog | Dot Char | E + * | I | K | K1 Prog | Que Char | S | S1 Prog | S2 Prog Prog | V | VBar + * data Cont = App1 Prog Cont | App2 Prog Cont + */ + +// type tags: +enum { + At, C, D, Dot, E, I, K, Que, S, +#if defined USE_SLASH + Slash, +#endif + V, VBar, T0=VBar, // 0 links + C1, D1, K1, S1, T1=S1, // 1 link + App, S2, App1, App2 // 2 links +}; + +#if defined DEBUG_REFCOUNT +char* tags[] = {"At", "C", "D", "Dot", "E", "I", "K", "Que", "S", +#if defined USE_SLASH + "Slash", +#endif + "V", "VBar", + "C1", "D1", "K1", "S1", "App", "S2", "App1", "App2"}; +#endif + +typedef struct info { + int tag; // type tag + int ref; // reference count, minus one + struct info *l; + union { + struct info *r; + char ch; + } u; +} info; + +#define L l +#define R u.r +#define CH u.ch + +// All info records with no link fields can be allocated statically. +// "eval" is a continuation which applies its argument to e, which +// ends the program, and is used in exec() +info at={At}, c={C}, d={D}, e={E}, i={I}, k={K}, vbar={VBar}, s={S}, v={V}, +#if defined USE_SLASH + slash={Slash}, +#endif + eval = {App2, 0, &e, {&e}}, dot[256], que[256]; + +info *heap; +int heap_idx = 0; +info *avail = 0; + +#if defined DEBUG_REFCOUNT +int allocated, freed; +#endif + +// Get a free info record, with ref field == 0 (reference count == 1). +inline info *new_info() +{ +#if defined DEBUG_REFCOUNT + allocated++; +#endif + if (avail) { + info *t; + t = avail; + avail = avail->L; +#if defined DEBUG_REFCOUNT + t->ref++; +#endif + assert(t->ref == 0); + return t; + } else { + if (!heap_idx) { + if (!(heap = malloc(BLOCK_SIZE * sizeof(info)))) + perror("new_info"), exit(EXIT_FAILURE); + heap_idx = BLOCK_SIZE; + } + heap[--heap_idx].ref = 0; // Would it be better to use calloc above? + return heap + heap_idx; + } +} + +// Increment the reference count on an info struct. +#define add_ref(i) ((i)->ref++) + +// Free an info record without checking any reference counts. +inline void free_info_no_ref(info *i) +{ + assert(i->ref == 0); +#if defined DEBUG_REFCOUNT + freed++; + i->ref--; +#endif + i->L = avail; + avail = i; +} + +// Free a reference counted info record. +inline void free_info(info *i) +{ + assert(i); + if (i->ref) + i->ref--; + else { + if (i->tag > T0) { + free_info(i->L); + if (i->tag > T1) + free_info(i->R); + } + free_info_no_ref(i); + } +} + +// Initialise the dot[] and que[] arrays. +void init() +{ + int i; + for (i=0; i<256; i++) { + dot[i].tag = Dot; + dot[i].CH = i; + que[i].tag = Que; + que[i].CH = i; + } +} + +// Execute program. +void exec(info *prog) +{ + info *cont, *p, *q, *tmp; + int current_char; + + current_char = EOF; + tmp = new_info(); + add_ref(cont = &eval); + add_ref(prog); + for (;;) { + // exec cont prog + assert(tmp); + assert(tmp->ref == 0); + if (prog->tag == App) { + // exec c (App p1 p2) = exec (App1 p2 c) p1 + tmp->tag = App1; + tmp->R = cont; + cont = tmp; + p = prog->L; + tmp->L = prog->R; + if (prog->ref) { + prog->ref--; + add_ref(prog->L); + add_ref(prog->R); + tmp = new_info(); + } else + tmp = prog; + prog = p; + continue; + } + if (cont->tag == App1) { + if (prog->tag == D) { + // exec (App1 p c) D = exec c (D1 p) +#if 1 + assert(prog == &d); + d.ref--; // free_info(prog); + assert(!cont->ref); // all references were inside prog + cont->tag = D1; + prog = cont; + cont = cont->R; +#else + tmp->tag = D1; + assert(prog == &d); + d.ref--; // free_info(prog); + prog = tmp; + tmp->L = cont->L; + p = cont->R; + if (cont->ref) { + cont->ref--; + add_ref(cont->L); + add_ref(cont->R); + tmp = new_info(); + } else + tmp = cont; + cont = p; +#endif + } else { + // exec (App1 p1 c) p2 = exec (App2 p2 c) p1 +#if 0 + tmp->tag = App2; + tmp->L = prog; + tmp->R = cont->R; + prog = cont->L; + p = tmp; + if (cont->ref) { + cont->ref--; + add_ref(cont->L); + add_ref(cont->R); + tmp = new_info(); + } else + tmp = cont; + cont = p; +#else + if (cont->ref) { + cont->ref--; + tmp->tag = App2; + tmp->L = prog; + add_ref(tmp->R = cont->R); + add_ref(prog = cont->L); + cont = tmp; + tmp = new_info(); + } else { + cont->tag = App2; + p = cont->L; + cont->L = prog; + prog = p; + } +#endif + } + continue; + } + assert(cont->tag == App2); + // exec (App2 p1 c) p = case p1 of + switch ((q=cont->L)->tag) { + case S: + // S -> exec c (S1 p) + tmp->tag = S1; + tmp->L = prog; + prog = tmp; + p = cont->R; + if (cont->ref) { + cont->ref--; + add_ref(cont->R); + tmp = new_info(); + } else { + assert(cont->L == &s); + s.ref--; // free_info(q); + tmp = cont; + } + cont = p; + break; + case S1: + // S1 p1 -> exec c (S2 p1 p) + tmp->tag = S2; + tmp->L = q->L; + tmp->R = prog; + prog = tmp; + p = cont->R; + if (cont->ref) { + cont->ref--; + add_ref(q->L); + add_ref(cont->R); + tmp = new_info(); + } else { + if (q->ref) { + q->ref--; + add_ref(q->L); + } else + free_info_no_ref(q); + tmp = cont; + } + cont = p; + break; + case S2: + // S2 p1 p2 -> exec (App1 p (App1 (App p2 p) c)) p1 + // **NOTE** This wastes a bit of time, as p1,p,p2,p are evaluated + // again, or better, their tag is checked just to find out that + // it's not App, which is not necessary as we already knew that. + // Some options: + // - introduce a tag "App3" meaning "works exactly as App1, but + // with all arguments already evaluated" + // - consider gotos (yuk) to avoid the prog->tag==App check at the + // beginning of the loop + // - introduce an AppE program tag meaning "like App, but both + // parts already evaluated, transform directly to App3" + // - (does not really belong here) swap the two fields of App1 - + // to make the right side of an App always go to the ->R field + // and maybe save some data movement. + // **TODO**: Implement this and check whether it actually makes + // the program run faster; I'm not sure about this. + p = new_info(); + p->tag = App; + p->L = q->R; + add_ref(p->R = prog); + tmp->tag = App1; + tmp->L = p; + tmp->R = cont->R; + if (cont->ref) { + cont->ref--; + add_ref(cont->R); + cont = new_info(); + cont->tag = App1; + cont->L = prog; + cont->R = tmp; + prog = q->L; + add_ref(q->R); + add_ref(q->L); + tmp = new_info(); + } else { + cont->tag = App1; + cont->L = prog; + cont->R = tmp; + prog = q->L; + if (q->ref) { + q->ref--; + add_ref(q->R); + add_ref(q->L); + tmp = new_info(); + } else + tmp = q; + } + break; + case K: + // K -> exec c (K1 p) + tmp->tag = K1; + tmp->L = prog; + prog = tmp; + p = cont->R; + if (cont->ref) { + cont->ref--; + add_ref(cont->R); + tmp = new_info(); + } else { + assert(cont->L == &k); + k.ref--; // free_info(q); + tmp = cont; + } + cont = p; + break; + case K1: + // K1 p1 -> exec c p1 + free_info(prog); + prog = q->L; + p = cont->R; + assert(!cont->ref); // all references were inside prog + if (0 && cont->ref) { + cont->ref--; + add_ref(q->L); + add_ref(cont->R); + } else { + free_info_no_ref(cont); + if (q->ref) { + q->ref--; + add_ref(q->L); + } else + free_info_no_ref(q); + } + cont = p; + break; + case Dot: + // Dot char -> putChar char >> exec c p +#if !defined SILENT + putchar(q->CH); +// fflush(stdout); // slow :-| +#endif + /* fall through */ + case I: + // I -> exec c p + p = cont->R; + if (cont->ref) { + cont->ref--; + add_ref(cont->R); + } else { + assert(cont->L == &i || (unsigned)(cont->L - dot) < 256); + q->ref--; // free_info(q); + free_info_no_ref(cont); + } + cont = p; + break; + case V: + // V -> exec c V + free_info(prog); + prog = q; + p = cont->R; + assert(!cont->ref); // all references were inside prog + if (0 && cont->ref) { + cont->ref--; + add_ref(cont->L); + add_ref(cont->R); + } else + free_info_no_ref(cont); + cont = p; + break; + case At: + // At -> isEOF >>= \eof -> + // if (eof) then current_char=EOF `seq` + // exec (App1 V c) p + // else getChar >>= \ch -> + // current_char=ch `seq` + // exec (App1 I c) p + // **NOTE** See comment at S2. + current_char = getchar(); + tmp->tag = App1; + tmp->R = cont->R; + add_ref(tmp->L = (current_char!=EOF) ? &i : &v); + p = tmp; + if (cont->ref) { + cont->ref--; + add_ref(cont->R); + tmp = new_info(); + } else { + assert(cont->L == &at); + at.ref--; // free_info(q); + tmp = cont; + } + cont = p; + break; + case VBar: + // VBar -> exec (App1 (maybe V Dot current_char) c) p + // **NOTE** See comment at S2. + tmp->tag = App1; + tmp->R = cont->R; + add_ref(tmp->L = (current_char!=EOF) ? dot+current_char : &v); + p = tmp; + if (cont->ref) { + cont->ref--; + add_ref(cont->R); + tmp = new_info(); + } else { + assert(cont->L == &vbar); + vbar.ref--; // free_info(q); + tmp = cont; + } + cont = p; + break; +#if defined USE_SLASH + case Slash: + // Slash -> exec (App1 (maybe V Que current_char) c) p + // **NOTE** See comment at S2. + tmp->tag = App1; + tmp->R = cont->R; + add_ref(tmp->L = (current_char!=EOF) ? que+current_char : &v); + p = tmp; + if (cont->ref) { + cont->ref--; + add_ref(cont->R); + tmp = new_info(); + } else { + assert(cont->L == &slash); + slash.ref--; // free_info(q); + tmp = cont; + } + cont = p; + break; +#endif + case Que: + // Que char -> exec (App1 (if (Just char) == ch then I else V) c) p + // **NOTE** See comment at S2. + tmp->tag = App1; + tmp->R = cont->R; + add_ref(tmp->L = (current_char==q->CH) ? &i : &v); + p = tmp; + if (cont->ref) { + cont->ref--; + add_ref(cont->R); + tmp = new_info(); + } else { + assert((unsigned)(cont->L - que) < 256); + q->ref--; // free_info(q); + tmp = cont; + } + cont = p; + break; + case D1: + // D1 p1 -> exec (App1 p c) p1 + // **NOTE** See comment at S2. + tmp->tag = App1; + tmp->L = prog; + tmp->R = cont->R; + prog = q->L; + p = tmp; + if (cont->ref) { + cont->ref--; + add_ref(cont->R); + add_ref(q->L); + tmp = new_info(); + } else { + if (q->ref) { + q->ref--; + add_ref(q->L); + } else + free_info_no_ref(q); + tmp = cont; + } + cont = p; + break; + case C: + // C -> exec (App1 (C1 c) c) p + // **NOTE** See comment at S2. + tmp->tag = C1; + tmp->L = cont->R; + if (cont->ref) { + cont->ref--; + cont->R->ref += 2; // add_ref, twice + cont = new_info(); + } else { + add_ref(cont->R); + assert(cont->L == &c); + c.ref--; + } + cont->tag = App1; + cont->L = tmp; + cont->R = tmp->L; + tmp = new_info(); + break; + case C1: + // C1 c1 -> exec c1 p +#if 0 + // *note*: cont == (App2 (C1 c) c') is equivalent to cont == c, + // so we just copy c's values to cont (instead of using c as + // the new continuation), thus affecting all references to cont. + // This is a clear win for ``ci`.*`cr, regarding both memory + // consumption and speed, and it should not hurt most other + // programs. + // Also, we can now execute ``ci`ci in a limited amount of memory. + // *note*: this assumes that all continuations contain two links. + free_info(cont->R); + cont->tag = q->L->tag; + cont->L = q->L->L; + cont->R = q->L->R; + if (q->ref) { + q->ref--; + add_ref(q->L->L); + add_ref(q->L->R); + } else { + if (q->L->ref) { + q->L->ref--; + add_ref(q->L->L); + add_ref(q->L->R); + } else + free_info_no_ref(q->L); + free_info_no_ref(q); + } +#else + p = q->L; + if (cont->ref) { + cont->ref--; + add_ref(q->L); + } else { + if (q->ref) { + q->ref--; + add_ref(q->L); + } else + free_info_no_ref(q); + free_info(cont->R); + free_info_no_ref(cont); + } + cont = p; +#endif + break; + case E: + // E -> return () +#if defined DEBUG_REFCOUNT || defined FREE_ALL + free_info(prog); + free_info(cont); + free_info_no_ref(tmp); +#endif + return; + default: + assert(!App & !D); + } + } +} + +// Parse input from file f and return parsed program. +info *parse(FILE *f) +{ + info *p, *q, *t; + int ch; + + do { // Skip whitespace and comments. + while ((ch = getc(f)) == '#') + while ((ch = getc(f)) != '\n') + ; + } while (isspace(ch)); + + switch (tolower(ch)) { + case '@': + // '@' -> (At, s) + at.ref++; + return &at; + case 'c': + // 'c' -> (C, s) + c.ref++; + return &c; + case 'd': + // 'd' -> (D, s) + d.ref++; + return &d; + case 'e': + // 'e' -> (E, s) + e.ref++; + return &e; + case 'i': + // 'i' -> (I, s) + i.ref++; + return &i; + case 'k': + // 'k' -> (K, s) + k.ref++; + return &k; + case '|': + // '|' -> (VBar, s) + vbar.ref++; + return &vbar; +#ifdef USE_SLASH + case '/': + // '/' -> (Slash, s) + slash.ref++; + return &slash; +#endif + case 's': + // 's' -> (S, s) + s.ref++; + return &s; + case 'v': + // 'v' -> (V, s) + v.ref++; + return &v; + case 'r': + // 'r' -> (Dot '\n', s) + dot['\n'].ref++; + return dot+'\n'; + case '.': + // '.' -> let (c:s1) = s in (Dot c, s1) + if ((ch = getc(f))==EOF) + return 0; + dot[ch].ref++; + return dot+ch; + case '?': + // '?' -> let (c:s1) = s in (Que c, s1) + if ((ch = getc(f))==EOF) + return 0; + que[ch].ref++; + return que+ch; + case '`': + // '`' -> let (p1, s1) = parse_ s in + // let (p2, s2) = parse_ s1 in (App p1 p2, s2) + if (!(p=parse(f))) + return 0; + if (!(q=parse(f))) { + free_info(p); + return 0; + } + t = new_info(); + assert(t); + t->tag = App; + t->L = p; + t->R = q; + return t; + default: + // otherwise -> error "Unlambda.parse: parse error" + return 0; + } +} + + +int main(int argc, char **argv) +{ + FILE *f; + info *p; + + init(); + if (argc<2 || !(f=fopen(argv[1], "r"))) { + fprintf(stderr, "Usage: %s <file name>\n", argv[0]); + exit(1); + } + if (!(p=parse(f))) { + fprintf(stderr, "%s: file %s: parse error\n", argv[0], argv[1]); + fclose(f); + exit(1); + } + fclose(f); + + exec(p); + +#if defined DEBUG_REFCOUNT || defined FREE_ALL + free_info(p); +#endif +#if defined DEBUG_REFCOUNT + fflush(stdout); + fprintf(stderr, "\n%s: %d allocated; %d freed\n", + argv[0], allocated, freed); + +#define CHECK_ATOM(x) \ + if (x.ref != 0) \ + fprintf(stderr, #x " ref count is %d\n", x.ref); + + CHECK_ATOM(at); + CHECK_ATOM(c); + CHECK_ATOM(d); + CHECK_ATOM(e); + CHECK_ATOM(i); + CHECK_ATOM(k); + CHECK_ATOM(vbar); + CHECK_ATOM(s); + CHECK_ATOM(v); + { + int i; + for (i=0; i<256; i++) + if (dot[i].ref != 0) + fprintf(stderr, ".%c ref count is %d\n", i, dot[i].ref); + } + while (avail) { + avail->ref = -1; + avail = avail->L; + } + // check heap (only last allocated block!) for lost items + for (;heap_idx<BLOCK_SIZE;heap_idx++) + if (heap[heap_idx].ref != -1) { + fprintf(stderr, "heap item left: tag =%2d (%4s), ref =%2d\n", + heap[heap_idx].tag, tags[heap[heap_idx].tag], + heap[heap_idx].ref); + } +#endif + + return 0; +}