view interps/unlambda/unlambda.c @ 9554:23f43464694e

<Zarutian> le/rn Frams\xc3\xb3knarflokkurinn/A, now defunct, political party in Iceland. Like its sister party Sj\xc3\xa1lfst\xc3\xa6\xc3\xb0isflokkurinn it is named by the antonym of what it is. (The name means the Progressive Party but they have nearly always been highly regressive). Think dumb Hill-Billies in ill fitting suits and you get their constiuents.
author HackBot
date Sun, 30 Oct 2016 14:33:24 +0000
parents 859f9b4339e6
children
line wrap: on
line source

/*
 * This is an implementation of the Unlambda programming language
 *
 * Copyright (C) 2000 Bertram Felgenhauer <b.f.@gmx.de>
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version
 * 2 of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty
 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
 * the GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

/*
 * This is more or less a translation of the Haskell version, which is a lot
 * easier to read than this code; most comments here are actually Haskell code.
 *
 * history:
 * 13-10-2000: initial version, in Haskell
 * 14-10-2000: initial version in C
 * xx-10-2000: optimised; gcc is weird: removing code (which was never
 *             executed anyway) can make programs slower. duh.
 * 31-10-2000: added '/' (use -DUSE_SLASH), which is similar to '|' but
 *             with ?<char> instead of .<char>
 * 21-12-2000: added some fflush(stdout) code...
 */

#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>

// note: Be sure to compile with -DNDEBUG if you want optimal performance.
#include <assert.h>

#define BLOCK_SIZE 4096  // allocate BLOCK_SIZE info structs each malloc

#define _STR(x) #x
#define STR(x) _STR(x)

/*
 * data Prog = App Prog Prog | At | C | C1 Cont | D | D1 Prog | Dot Char | E
 *      | I | K | K1 Prog | Que Char | S | S1 Prog | S2 Prog Prog | V | VBar
 * data Cont = App1 Prog Cont | App2 Prog Cont
 */

// type tags:
enum { 
    At, C, D, Dot, E, I, K, Que, S,
#if defined USE_SLASH
    Slash,
#endif
    V, VBar,                        T0=VBar, // 0 links
    C1, D1, K1, S1,                 T1=S1,   // 1 link
    App, S2, App1, App2                      // 2 links
};

#if defined DEBUG_REFCOUNT
char* tags[] = {"At", "C", "D", "Dot", "E", "I", "K", "Que", "S", 
#if defined USE_SLASH
                "Slash",
#endif
		"V", "VBar",
		"C1", "D1", "K1", "S1", "App", "S2", "App1", "App2"};
#endif

typedef struct info {
    int tag; // type tag
    int ref; // reference count, minus one
    struct info *l;
    union {
	struct info *r;
	char ch;
    } u;
} info;

#define L  l
#define R  u.r
#define CH u.ch

// All info records with no link fields can be allocated statically.
// "eval" is a continuation which applies its argument to e, which
// ends the program, and is used in exec()
info at={At}, c={C}, d={D}, e={E}, i={I}, k={K}, vbar={VBar}, s={S}, v={V},
#if defined USE_SLASH
     slash={Slash},
#endif
     eval = {App2, 0, &e, {&e}}, dot[256], que[256];

info *heap;
int  heap_idx = 0;
info *avail = 0;

#if defined DEBUG_REFCOUNT
int allocated, freed;
#endif

// Get a free info record, with ref field == 0 (reference count == 1).
inline info *new_info()
{
#if defined DEBUG_REFCOUNT
    allocated++;
#endif
    if (avail) {
	info *t;
	t = avail;
	avail = avail->L;
#if defined DEBUG_REFCOUNT
	t->ref++;
#endif
	assert(t->ref == 0);
	return t;
    } else {
	if (!heap_idx) {
	    if (!(heap = malloc(BLOCK_SIZE * sizeof(info))))
		perror("new_info"), exit(EXIT_FAILURE);
	    heap_idx = BLOCK_SIZE;
	}
	heap[--heap_idx].ref = 0; // Would it be better to use calloc above?
	return heap + heap_idx;
    }
}

// Increment the reference count on an info struct.
#define add_ref(i) ((i)->ref++)

// Free an info record without checking any reference counts.
inline void free_info_no_ref(info *i)
{
    assert(i->ref == 0);
#if defined DEBUG_REFCOUNT
    freed++;
    i->ref--;
#endif
    i->L = avail;
    avail = i;
}

// Free a reference counted info record.
inline void free_info(info *i)
{
    assert(i);
    if (i->ref) 
	i->ref--;
    else {
	if (i->tag > T0) {
	    free_info(i->L);
	    if (i->tag > T1)
		free_info(i->R);
	}
	free_info_no_ref(i);
    }
}

// Initialise the dot[] and que[] arrays.
void init()
{
    int i;
    for (i=0; i<256; i++) {
	dot[i].tag = Dot;
	dot[i].CH = i;
	que[i].tag = Que;
	que[i].CH = i;
    }
}

// Execute program.
void exec(info *prog)
{
    info *cont, *p, *q, *tmp;
    int current_char;

    current_char = EOF;
    tmp = new_info();
    add_ref(cont = &eval);
    add_ref(prog);
    for (;;) {
        // exec cont prog
	assert(tmp);
	assert(tmp->ref == 0);
	if (prog->tag == App) {
	    // exec c (App p1 p2) = exec (App1 p2 c) p1
	    tmp->tag = App1;
	    tmp->R = cont;
	    cont = tmp;
	    p = prog->L;
	    tmp->L = prog->R;
	    if (prog->ref) {
		prog->ref--;
		add_ref(prog->L);
		add_ref(prog->R);
		tmp = new_info();
	    } else
		tmp = prog;
	    prog = p;
	    continue;
	}
	if (cont->tag == App1) {
	    if (prog->tag == D) {
		// exec (App1 p c) D = exec c (D1 p)
#if 1
                assert(prog == &d);
                d.ref--; // free_info(prog);
		assert(!cont->ref); // all references were inside prog
		cont->tag = D1;
		prog = cont;
		cont = cont->R;
#else
		tmp->tag = D1;
                assert(prog == &d);
                d.ref--; // free_info(prog);
		prog = tmp;
		tmp->L = cont->L;
		p = cont->R;
                if (cont->ref) {
		    cont->ref--;
		    add_ref(cont->L);
		    add_ref(cont->R);
		    tmp = new_info();
		} else 
		    tmp = cont;
		cont = p;
#endif
	    } else {
		// exec (App1 p1 c) p2 = exec (App2 p2 c) p1
#if 0
		tmp->tag = App2;
		tmp->L = prog;
		tmp->R = cont->R;
		prog = cont->L;
		p = tmp;
                if (cont->ref) {
		    cont->ref--;
		    add_ref(cont->L);
		    add_ref(cont->R);
		    tmp = new_info();
		} else
		    tmp = cont;
		cont = p;
#else
		if (cont->ref) {
		    cont->ref--;
		    tmp->tag = App2;
		    tmp->L = prog;
		    add_ref(tmp->R = cont->R);
		    add_ref(prog = cont->L);
		    cont = tmp;
		    tmp = new_info();
		} else {
		    cont->tag = App2;
		    p = cont->L;
		    cont->L = prog;
		    prog = p;
		}
#endif
	    }
	    continue;
	}
	assert(cont->tag == App2);
	// exec (App2 p1 c) p = case p1 of
	switch ((q=cont->L)->tag) {
	case S:
	    // S -> exec c (S1 p)
	    tmp->tag = S1;
	    tmp->L = prog;
	    prog = tmp;
	    p = cont->R;
	    if (cont->ref) {
		cont->ref--;
		add_ref(cont->R);
		tmp = new_info();
	    } else {
		assert(cont->L == &s);
		s.ref--; // free_info(q);
		tmp = cont;
	    }
	    cont = p;
	    break;
	case S1:
	    // S1 p1 -> exec c (S2 p1 p)
	    tmp->tag = S2;
	    tmp->L = q->L;
	    tmp->R = prog;
	    prog = tmp;
	    p = cont->R;
            if (cont->ref) {
		cont->ref--;
		add_ref(q->L);
		add_ref(cont->R);
		tmp = new_info();
	    } else {
		if (q->ref) {
		    q->ref--;
		    add_ref(q->L);
		} else
		    free_info_no_ref(q);
		tmp = cont;
	    }
	    cont = p;
	    break;
	case S2:
	    // S2 p1 p2 -> exec (App1 p (App1 (App p2 p) c)) p1
	    // **NOTE** This wastes a bit of time, as p1,p,p2,p are evaluated
	    // again, or better, their tag is checked just to find out that
	    // it's not App, which is not necessary as we already knew that.
	    // Some options:
	    // - introduce a tag "App3" meaning "works exactly as App1, but
	    //   with all arguments already evaluated"
	    // - consider gotos (yuk) to avoid the prog->tag==App check at the
	    //   beginning of the loop
	    // - introduce an AppE program tag meaning "like App, but both
	    //   parts already evaluated, transform directly to App3"
	    // - (does not really belong here) swap the two fields of App1 -
            //   to make the right side of an App always go to the ->R field
	    //   and maybe save some data movement.
	    // **TODO**: Implement this and check whether it actually makes
	    // the program run faster; I'm not sure about this.
	    p = new_info();
	    p->tag = App;
	    p->L = q->R;
	    add_ref(p->R = prog);
	    tmp->tag = App1;
	    tmp->L = p;
	    tmp->R = cont->R;
	    if (cont->ref) {
		cont->ref--;
		add_ref(cont->R);
		cont = new_info();
		cont->tag = App1;
		cont->L = prog;
		cont->R = tmp;
		prog = q->L;
		add_ref(q->R);
		add_ref(q->L);
		tmp = new_info();
	    } else {
		cont->tag = App1;
		cont->L = prog;
		cont->R = tmp;
		prog = q->L;
		if (q->ref) {
		    q->ref--;
		    add_ref(q->R);
		    add_ref(q->L);
		    tmp = new_info();
		} else
		    tmp = q;
	    }
	    break;
	case K:
	    // K -> exec c (K1 p)
	    tmp->tag = K1;
	    tmp->L = prog;
	    prog = tmp;
	    p = cont->R;
	    if (cont->ref) {
		cont->ref--;
		add_ref(cont->R);
		tmp = new_info();
	    } else {
		assert(cont->L == &k);
		k.ref--; // free_info(q);
		tmp = cont;
	    }
	    cont = p;
	    break;
	case K1:
	    // K1 p1 -> exec c p1
	    free_info(prog);
	    prog = q->L;
	    p = cont->R;
	    assert(!cont->ref); // all references were inside prog
	    if (0 && cont->ref) {
		cont->ref--;
		add_ref(q->L);
		add_ref(cont->R);
	    } else {
		free_info_no_ref(cont);
		if (q->ref) {
		    q->ref--;
		    add_ref(q->L);
		} else
		    free_info_no_ref(q);
	    }
	    cont = p;
	    break;
	case Dot:
	    // Dot char -> putChar char >> exec c p
#if !defined SILENT
	    putchar(q->CH);
//	    fflush(stdout); // slow :-|
#endif
	    /* fall through */
	case I:
	    // I -> exec c p
	    p = cont->R;
	    if (cont->ref) {
		cont->ref--;
		add_ref(cont->R);
	    } else {
		assert(cont->L == &i || (unsigned)(cont->L - dot) < 256);
		q->ref--; // free_info(q);
		free_info_no_ref(cont);
	    }
	    cont = p;
	    break;
	case V:
	    // V -> exec c V
	    free_info(prog);
	    prog = q;
	    p = cont->R;
	    assert(!cont->ref); // all references were inside prog
	    if (0 && cont->ref) {
		cont->ref--;
		add_ref(cont->L);
		add_ref(cont->R);
	    } else
		free_info_no_ref(cont);
	    cont = p;
	    break;
	case At:
	    // At -> isEOF >>= \eof ->
	    //       if (eof) then current_char=EOF `seq`
	    //                     exec (App1 V c) p
	    //                else getChar >>= \ch ->
	    //                     current_char=ch `seq`
	    //                     exec (App1 I c) p
	    // **NOTE** See comment at S2.
	    current_char = getchar();
	    tmp->tag = App1;
	    tmp->R = cont->R;
	    add_ref(tmp->L = (current_char!=EOF) ? &i : &v);
	    p = tmp;
	    if (cont->ref) {
		cont->ref--;
		add_ref(cont->R);
		tmp = new_info();
	    } else {
		assert(cont->L == &at);
		at.ref--; // free_info(q);
		tmp = cont;
	    }
	    cont = p;
	    break;
	case VBar:
	    // VBar -> exec (App1 (maybe V Dot current_char) c) p
	    // **NOTE** See comment at S2.
	    tmp->tag = App1;
	    tmp->R = cont->R;
	    add_ref(tmp->L = (current_char!=EOF) ? dot+current_char : &v);
	    p = tmp;
	    if (cont->ref) {
		cont->ref--;
		add_ref(cont->R);
		tmp = new_info();
	    } else {
		assert(cont->L == &vbar);
		vbar.ref--; // free_info(q);
		tmp = cont;
	    }
	    cont = p;
	    break;
#if defined USE_SLASH
	case Slash:
	    // Slash -> exec (App1 (maybe V Que current_char) c) p
	    // **NOTE** See comment at S2.
	    tmp->tag = App1;
	    tmp->R = cont->R;
	    add_ref(tmp->L = (current_char!=EOF) ? que+current_char : &v);
	    p = tmp;
	    if (cont->ref) {
		cont->ref--;
		add_ref(cont->R);
		tmp = new_info();
	    } else {
		assert(cont->L == &slash);
		slash.ref--; // free_info(q);
		tmp = cont;
	    }
	    cont = p;
	    break;
#endif
	case Que:
	    // Que char -> exec (App1 (if (Just char) == ch then I else V) c) p
	    // **NOTE** See comment at S2.
	    tmp->tag = App1;
	    tmp->R = cont->R;
	    add_ref(tmp->L = (current_char==q->CH) ? &i : &v);
	    p = tmp;
	    if (cont->ref) {
		cont->ref--;
		add_ref(cont->R);
		tmp = new_info();
	    } else {
		assert((unsigned)(cont->L - que) < 256);
		q->ref--; // free_info(q);
		tmp = cont;
	    }
	    cont = p;
	    break;
	case D1:
	    // D1 p1 -> exec (App1 p c) p1
	    // **NOTE** See comment at S2.
	    tmp->tag = App1;
	    tmp->L = prog;
	    tmp->R = cont->R;
	    prog = q->L;
	    p = tmp;
	    if (cont->ref) {
		cont->ref--;
		add_ref(cont->R);
		add_ref(q->L);
		tmp = new_info();
	    } else {
		if (q->ref) {
		    q->ref--;
		    add_ref(q->L);
		} else
		    free_info_no_ref(q);
		tmp = cont;
	    }
	    cont = p;
	    break;
	case C:
	    // C -> exec (App1 (C1 c) c) p
	    // **NOTE** See comment at S2.
	    tmp->tag = C1;
	    tmp->L = cont->R;
	    if (cont->ref) {
		cont->ref--;
		cont->R->ref += 2; // add_ref, twice
		cont = new_info();
	    } else {
		add_ref(cont->R);
		assert(cont->L == &c);
		c.ref--;
	    }
	    cont->tag = App1;
	    cont->L = tmp;
	    cont->R = tmp->L;
	    tmp = new_info();
	    break;
	case C1:
	    // C1 c1 -> exec c1 p
#if 0
	    // *note*: cont == (App2 (C1 c) c') is equivalent to cont == c,
	    // so we just copy c's values to cont (instead of using c as
	    // the new continuation), thus affecting all references to cont.
	    // This is a clear win for ``ci`.*`cr, regarding both memory
            // consumption and speed, and it should not hurt most other
	    // programs.
	    // Also, we can now execute ``ci`ci in a limited amount of memory.
	    // *note*: this assumes that all continuations contain two links.
	    free_info(cont->R);
	    cont->tag = q->L->tag;
	    cont->L = q->L->L;
	    cont->R = q->L->R;
	    if (q->ref) {
		q->ref--;
		add_ref(q->L->L);
		add_ref(q->L->R);
	    } else {
		if (q->L->ref) {
		    q->L->ref--;
		    add_ref(q->L->L);
		    add_ref(q->L->R);
		} else
		    free_info_no_ref(q->L);
		free_info_no_ref(q);
	    }
#else
	    p = q->L;
	    if (cont->ref) {
		cont->ref--;
		add_ref(q->L);
	    } else {
		if (q->ref) {
		    q->ref--;
		    add_ref(q->L);
		} else
		    free_info_no_ref(q);
		free_info(cont->R);
		free_info_no_ref(cont);
	    }
	    cont = p;
#endif
	    break;
	case E:
	    // E -> return ()
#if defined DEBUG_REFCOUNT || defined FREE_ALL
	    free_info(prog);
	    free_info(cont);
	    free_info_no_ref(tmp);
#endif
	    return;
	default:
	    assert(!App & !D);
	}
    }
}

// Parse input from file f and return parsed program.
info *parse(FILE *f)
{
    info *p, *q, *t;
    int ch;

    do { // Skip whitespace and comments.
	while ((ch = getc(f)) == '#')
	    while ((ch = getc(f)) != '\n')
		;
    } while (isspace(ch));

    switch (tolower(ch)) {
    case '@':
	// '@' -> (At, s)
	at.ref++;
	return &at;
    case 'c':
	// 'c' -> (C, s)
	c.ref++;
	return &c;
    case 'd':
	// 'd' -> (D, s)
	d.ref++;
	return &d;
    case 'e':
	// 'e' -> (E, s)
	e.ref++;
	return &e;
    case 'i':
	// 'i' -> (I, s)
	i.ref++;
	return &i;
    case 'k':
	// 'k' -> (K, s)
	k.ref++;
	return &k;
    case '|':
	// '|' -> (VBar, s)
	vbar.ref++;
	return &vbar;
#ifdef USE_SLASH
    case '/':
	// '/' -> (Slash, s)
	slash.ref++;
	return &slash;
#endif
    case 's':
	// 's' -> (S, s)
	s.ref++;
	return &s;
    case 'v':
	// 'v' -> (V, s)
	v.ref++;
	return &v;
    case 'r':
	// 'r' -> (Dot '\n', s)
	dot['\n'].ref++;
	return dot+'\n';
    case '.':
	// '.' -> let (c:s1) = s in (Dot c, s1)
	if ((ch = getc(f))==EOF)
	    return 0;
	dot[ch].ref++;
	return dot+ch;
    case '?':
	// '?' -> let (c:s1) = s in (Que c, s1)
	if ((ch = getc(f))==EOF)
	    return 0;
	que[ch].ref++;
	return que+ch;
    case '`':
	// '`' -> let (p1, s1) = parse_ s in
	//        let (p2, s2) = parse_ s1 in (App p1 p2, s2)
	if (!(p=parse(f)))
	    return 0;
	if (!(q=parse(f))) {
	    free_info(p);
	    return 0;
	}
	t = new_info();
	assert(t);
	t->tag = App;
	t->L = p;
	t->R = q;
	return t;
    default:
	// otherwise -> error "Unlambda.parse: parse error"
	return 0;
    }
}


int main(int argc, char **argv)
{
    FILE *f;
    info *p;

    init();
    if (argc<2 || !(f=fopen(argv[1], "r"))) {
	fprintf(stderr, "Usage: %s <file name>\n", argv[0]);
	exit(1);
    }
    if (!(p=parse(f))) {
	fprintf(stderr, "%s: file %s: parse error\n", argv[0], argv[1]);
	fclose(f);
	exit(1);
    }
    fclose(f);

    exec(p);

#if defined DEBUG_REFCOUNT || defined FREE_ALL
    free_info(p);
#endif
#if defined DEBUG_REFCOUNT
    fflush(stdout);
    fprintf(stderr, "\n%s: %d allocated; %d freed\n",
            argv[0], allocated, freed);

#define CHECK_ATOM(x) \
    if (x.ref != 0) \
        fprintf(stderr, #x " ref count is %d\n", x.ref);

    CHECK_ATOM(at);
    CHECK_ATOM(c);
    CHECK_ATOM(d);
    CHECK_ATOM(e);
    CHECK_ATOM(i);
    CHECK_ATOM(k);
    CHECK_ATOM(vbar);
    CHECK_ATOM(s);
    CHECK_ATOM(v);
    {
	int i;
	for (i=0; i<256; i++)
	    if (dot[i].ref != 0)
		fprintf(stderr, ".%c ref count is %d\n", i, dot[i].ref);
    }
    while (avail) {
	avail->ref = -1;
	avail = avail->L;
    }
    // check heap (only last allocated block!) for lost items 
    for (;heap_idx<BLOCK_SIZE;heap_idx++)
	if (heap[heap_idx].ref != -1) {
	    fprintf(stderr, "heap item left: tag =%2d (%4s), ref =%2d\n",
		    heap[heap_idx].tag, tags[heap[heap_idx].tag],
		    heap[heap_idx].ref);
	}
#endif

    return 0;
}