view interps/unlambda/unlambda.c @ 12518:2d8fe55c6e65 draft default tip

<int-e> learn The password of the month is release incident pilot.
author HackEso <hackeso@esolangs.org>
date Sun, 03 Nov 2024 00:31:02 +0000
parents 859f9b4339e6
children
line wrap: on
line source

/*
 * This is an implementation of the Unlambda programming language
 *
 * Copyright (C) 2000 Bertram Felgenhauer <b.f.@gmx.de>
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version
 * 2 of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty
 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
 * the GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

/*
 * This is more or less a translation of the Haskell version, which is a lot
 * easier to read than this code; most comments here are actually Haskell code.
 *
 * history:
 * 13-10-2000: initial version, in Haskell
 * 14-10-2000: initial version in C
 * xx-10-2000: optimised; gcc is weird: removing code (which was never
 *             executed anyway) can make programs slower. duh.
 * 31-10-2000: added '/' (use -DUSE_SLASH), which is similar to '|' but
 *             with ?<char> instead of .<char>
 * 21-12-2000: added some fflush(stdout) code...
 */

#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>

// note: Be sure to compile with -DNDEBUG if you want optimal performance.
#include <assert.h>

#define BLOCK_SIZE 4096  // allocate BLOCK_SIZE info structs each malloc

#define _STR(x) #x
#define STR(x) _STR(x)

/*
 * data Prog = App Prog Prog | At | C | C1 Cont | D | D1 Prog | Dot Char | E
 *      | I | K | K1 Prog | Que Char | S | S1 Prog | S2 Prog Prog | V | VBar
 * data Cont = App1 Prog Cont | App2 Prog Cont
 */

// type tags:
enum { 
    At, C, D, Dot, E, I, K, Que, S,
#if defined USE_SLASH
    Slash,
#endif
    V, VBar,                        T0=VBar, // 0 links
    C1, D1, K1, S1,                 T1=S1,   // 1 link
    App, S2, App1, App2                      // 2 links
};

#if defined DEBUG_REFCOUNT
char* tags[] = {"At", "C", "D", "Dot", "E", "I", "K", "Que", "S", 
#if defined USE_SLASH
                "Slash",
#endif
		"V", "VBar",
		"C1", "D1", "K1", "S1", "App", "S2", "App1", "App2"};
#endif

typedef struct info {
    int tag; // type tag
    int ref; // reference count, minus one
    struct info *l;
    union {
	struct info *r;
	char ch;
    } u;
} info;

#define L  l
#define R  u.r
#define CH u.ch

// All info records with no link fields can be allocated statically.
// "eval" is a continuation which applies its argument to e, which
// ends the program, and is used in exec()
info at={At}, c={C}, d={D}, e={E}, i={I}, k={K}, vbar={VBar}, s={S}, v={V},
#if defined USE_SLASH
     slash={Slash},
#endif
     eval = {App2, 0, &e, {&e}}, dot[256], que[256];

info *heap;
int  heap_idx = 0;
info *avail = 0;

#if defined DEBUG_REFCOUNT
int allocated, freed;
#endif

// Get a free info record, with ref field == 0 (reference count == 1).
inline info *new_info()
{
#if defined DEBUG_REFCOUNT
    allocated++;
#endif
    if (avail) {
	info *t;
	t = avail;
	avail = avail->L;
#if defined DEBUG_REFCOUNT
	t->ref++;
#endif
	assert(t->ref == 0);
	return t;
    } else {
	if (!heap_idx) {
	    if (!(heap = malloc(BLOCK_SIZE * sizeof(info))))
		perror("new_info"), exit(EXIT_FAILURE);
	    heap_idx = BLOCK_SIZE;
	}
	heap[--heap_idx].ref = 0; // Would it be better to use calloc above?
	return heap + heap_idx;
    }
}

// Increment the reference count on an info struct.
#define add_ref(i) ((i)->ref++)

// Free an info record without checking any reference counts.
inline void free_info_no_ref(info *i)
{
    assert(i->ref == 0);
#if defined DEBUG_REFCOUNT
    freed++;
    i->ref--;
#endif
    i->L = avail;
    avail = i;
}

// Free a reference counted info record.
inline void free_info(info *i)
{
    assert(i);
    if (i->ref) 
	i->ref--;
    else {
	if (i->tag > T0) {
	    free_info(i->L);
	    if (i->tag > T1)
		free_info(i->R);
	}
	free_info_no_ref(i);
    }
}

// Initialise the dot[] and que[] arrays.
void init()
{
    int i;
    for (i=0; i<256; i++) {
	dot[i].tag = Dot;
	dot[i].CH = i;
	que[i].tag = Que;
	que[i].CH = i;
    }
}

// Execute program.
void exec(info *prog)
{
    info *cont, *p, *q, *tmp;
    int current_char;

    current_char = EOF;
    tmp = new_info();
    add_ref(cont = &eval);
    add_ref(prog);
    for (;;) {
        // exec cont prog
	assert(tmp);
	assert(tmp->ref == 0);
	if (prog->tag == App) {
	    // exec c (App p1 p2) = exec (App1 p2 c) p1
	    tmp->tag = App1;
	    tmp->R = cont;
	    cont = tmp;
	    p = prog->L;
	    tmp->L = prog->R;
	    if (prog->ref) {
		prog->ref--;
		add_ref(prog->L);
		add_ref(prog->R);
		tmp = new_info();
	    } else
		tmp = prog;
	    prog = p;
	    continue;
	}
	if (cont->tag == App1) {
	    if (prog->tag == D) {
		// exec (App1 p c) D = exec c (D1 p)
#if 1
                assert(prog == &d);
                d.ref--; // free_info(prog);
		assert(!cont->ref); // all references were inside prog
		cont->tag = D1;
		prog = cont;
		cont = cont->R;
#else
		tmp->tag = D1;
                assert(prog == &d);
                d.ref--; // free_info(prog);
		prog = tmp;
		tmp->L = cont->L;
		p = cont->R;
                if (cont->ref) {
		    cont->ref--;
		    add_ref(cont->L);
		    add_ref(cont->R);
		    tmp = new_info();
		} else 
		    tmp = cont;
		cont = p;
#endif
	    } else {
		// exec (App1 p1 c) p2 = exec (App2 p2 c) p1
#if 0
		tmp->tag = App2;
		tmp->L = prog;
		tmp->R = cont->R;
		prog = cont->L;
		p = tmp;
                if (cont->ref) {
		    cont->ref--;
		    add_ref(cont->L);
		    add_ref(cont->R);
		    tmp = new_info();
		} else
		    tmp = cont;
		cont = p;
#else
		if (cont->ref) {
		    cont->ref--;
		    tmp->tag = App2;
		    tmp->L = prog;
		    add_ref(tmp->R = cont->R);
		    add_ref(prog = cont->L);
		    cont = tmp;
		    tmp = new_info();
		} else {
		    cont->tag = App2;
		    p = cont->L;
		    cont->L = prog;
		    prog = p;
		}
#endif
	    }
	    continue;
	}
	assert(cont->tag == App2);
	// exec (App2 p1 c) p = case p1 of
	switch ((q=cont->L)->tag) {
	case S:
	    // S -> exec c (S1 p)
	    tmp->tag = S1;
	    tmp->L = prog;
	    prog = tmp;
	    p = cont->R;
	    if (cont->ref) {
		cont->ref--;
		add_ref(cont->R);
		tmp = new_info();
	    } else {
		assert(cont->L == &s);
		s.ref--; // free_info(q);
		tmp = cont;
	    }
	    cont = p;
	    break;
	case S1:
	    // S1 p1 -> exec c (S2 p1 p)
	    tmp->tag = S2;
	    tmp->L = q->L;
	    tmp->R = prog;
	    prog = tmp;
	    p = cont->R;
            if (cont->ref) {
		cont->ref--;
		add_ref(q->L);
		add_ref(cont->R);
		tmp = new_info();
	    } else {
		if (q->ref) {
		    q->ref--;
		    add_ref(q->L);
		} else
		    free_info_no_ref(q);
		tmp = cont;
	    }
	    cont = p;
	    break;
	case S2:
	    // S2 p1 p2 -> exec (App1 p (App1 (App p2 p) c)) p1
	    // **NOTE** This wastes a bit of time, as p1,p,p2,p are evaluated
	    // again, or better, their tag is checked just to find out that
	    // it's not App, which is not necessary as we already knew that.
	    // Some options:
	    // - introduce a tag "App3" meaning "works exactly as App1, but
	    //   with all arguments already evaluated"
	    // - consider gotos (yuk) to avoid the prog->tag==App check at the
	    //   beginning of the loop
	    // - introduce an AppE program tag meaning "like App, but both
	    //   parts already evaluated, transform directly to App3"
	    // - (does not really belong here) swap the two fields of App1 -
            //   to make the right side of an App always go to the ->R field
	    //   and maybe save some data movement.
	    // **TODO**: Implement this and check whether it actually makes
	    // the program run faster; I'm not sure about this.
	    p = new_info();
	    p->tag = App;
	    p->L = q->R;
	    add_ref(p->R = prog);
	    tmp->tag = App1;
	    tmp->L = p;
	    tmp->R = cont->R;
	    if (cont->ref) {
		cont->ref--;
		add_ref(cont->R);
		cont = new_info();
		cont->tag = App1;
		cont->L = prog;
		cont->R = tmp;
		prog = q->L;
		add_ref(q->R);
		add_ref(q->L);
		tmp = new_info();
	    } else {
		cont->tag = App1;
		cont->L = prog;
		cont->R = tmp;
		prog = q->L;
		if (q->ref) {
		    q->ref--;
		    add_ref(q->R);
		    add_ref(q->L);
		    tmp = new_info();
		} else
		    tmp = q;
	    }
	    break;
	case K:
	    // K -> exec c (K1 p)
	    tmp->tag = K1;
	    tmp->L = prog;
	    prog = tmp;
	    p = cont->R;
	    if (cont->ref) {
		cont->ref--;
		add_ref(cont->R);
		tmp = new_info();
	    } else {
		assert(cont->L == &k);
		k.ref--; // free_info(q);
		tmp = cont;
	    }
	    cont = p;
	    break;
	case K1:
	    // K1 p1 -> exec c p1
	    free_info(prog);
	    prog = q->L;
	    p = cont->R;
	    assert(!cont->ref); // all references were inside prog
	    if (0 && cont->ref) {
		cont->ref--;
		add_ref(q->L);
		add_ref(cont->R);
	    } else {
		free_info_no_ref(cont);
		if (q->ref) {
		    q->ref--;
		    add_ref(q->L);
		} else
		    free_info_no_ref(q);
	    }
	    cont = p;
	    break;
	case Dot:
	    // Dot char -> putChar char >> exec c p
#if !defined SILENT
	    putchar(q->CH);
//	    fflush(stdout); // slow :-|
#endif
	    /* fall through */
	case I:
	    // I -> exec c p
	    p = cont->R;
	    if (cont->ref) {
		cont->ref--;
		add_ref(cont->R);
	    } else {
		assert(cont->L == &i || (unsigned)(cont->L - dot) < 256);
		q->ref--; // free_info(q);
		free_info_no_ref(cont);
	    }
	    cont = p;
	    break;
	case V:
	    // V -> exec c V
	    free_info(prog);
	    prog = q;
	    p = cont->R;
	    assert(!cont->ref); // all references were inside prog
	    if (0 && cont->ref) {
		cont->ref--;
		add_ref(cont->L);
		add_ref(cont->R);
	    } else
		free_info_no_ref(cont);
	    cont = p;
	    break;
	case At:
	    // At -> isEOF >>= \eof ->
	    //       if (eof) then current_char=EOF `seq`
	    //                     exec (App1 V c) p
	    //                else getChar >>= \ch ->
	    //                     current_char=ch `seq`
	    //                     exec (App1 I c) p
	    // **NOTE** See comment at S2.
	    current_char = getchar();
	    tmp->tag = App1;
	    tmp->R = cont->R;
	    add_ref(tmp->L = (current_char!=EOF) ? &i : &v);
	    p = tmp;
	    if (cont->ref) {
		cont->ref--;
		add_ref(cont->R);
		tmp = new_info();
	    } else {
		assert(cont->L == &at);
		at.ref--; // free_info(q);
		tmp = cont;
	    }
	    cont = p;
	    break;
	case VBar:
	    // VBar -> exec (App1 (maybe V Dot current_char) c) p
	    // **NOTE** See comment at S2.
	    tmp->tag = App1;
	    tmp->R = cont->R;
	    add_ref(tmp->L = (current_char!=EOF) ? dot+current_char : &v);
	    p = tmp;
	    if (cont->ref) {
		cont->ref--;
		add_ref(cont->R);
		tmp = new_info();
	    } else {
		assert(cont->L == &vbar);
		vbar.ref--; // free_info(q);
		tmp = cont;
	    }
	    cont = p;
	    break;
#if defined USE_SLASH
	case Slash:
	    // Slash -> exec (App1 (maybe V Que current_char) c) p
	    // **NOTE** See comment at S2.
	    tmp->tag = App1;
	    tmp->R = cont->R;
	    add_ref(tmp->L = (current_char!=EOF) ? que+current_char : &v);
	    p = tmp;
	    if (cont->ref) {
		cont->ref--;
		add_ref(cont->R);
		tmp = new_info();
	    } else {
		assert(cont->L == &slash);
		slash.ref--; // free_info(q);
		tmp = cont;
	    }
	    cont = p;
	    break;
#endif
	case Que:
	    // Que char -> exec (App1 (if (Just char) == ch then I else V) c) p
	    // **NOTE** See comment at S2.
	    tmp->tag = App1;
	    tmp->R = cont->R;
	    add_ref(tmp->L = (current_char==q->CH) ? &i : &v);
	    p = tmp;
	    if (cont->ref) {
		cont->ref--;
		add_ref(cont->R);
		tmp = new_info();
	    } else {
		assert((unsigned)(cont->L - que) < 256);
		q->ref--; // free_info(q);
		tmp = cont;
	    }
	    cont = p;
	    break;
	case D1:
	    // D1 p1 -> exec (App1 p c) p1
	    // **NOTE** See comment at S2.
	    tmp->tag = App1;
	    tmp->L = prog;
	    tmp->R = cont->R;
	    prog = q->L;
	    p = tmp;
	    if (cont->ref) {
		cont->ref--;
		add_ref(cont->R);
		add_ref(q->L);
		tmp = new_info();
	    } else {
		if (q->ref) {
		    q->ref--;
		    add_ref(q->L);
		} else
		    free_info_no_ref(q);
		tmp = cont;
	    }
	    cont = p;
	    break;
	case C:
	    // C -> exec (App1 (C1 c) c) p
	    // **NOTE** See comment at S2.
	    tmp->tag = C1;
	    tmp->L = cont->R;
	    if (cont->ref) {
		cont->ref--;
		cont->R->ref += 2; // add_ref, twice
		cont = new_info();
	    } else {
		add_ref(cont->R);
		assert(cont->L == &c);
		c.ref--;
	    }
	    cont->tag = App1;
	    cont->L = tmp;
	    cont->R = tmp->L;
	    tmp = new_info();
	    break;
	case C1:
	    // C1 c1 -> exec c1 p
#if 0
	    // *note*: cont == (App2 (C1 c) c') is equivalent to cont == c,
	    // so we just copy c's values to cont (instead of using c as
	    // the new continuation), thus affecting all references to cont.
	    // This is a clear win for ``ci`.*`cr, regarding both memory
            // consumption and speed, and it should not hurt most other
	    // programs.
	    // Also, we can now execute ``ci`ci in a limited amount of memory.
	    // *note*: this assumes that all continuations contain two links.
	    free_info(cont->R);
	    cont->tag = q->L->tag;
	    cont->L = q->L->L;
	    cont->R = q->L->R;
	    if (q->ref) {
		q->ref--;
		add_ref(q->L->L);
		add_ref(q->L->R);
	    } else {
		if (q->L->ref) {
		    q->L->ref--;
		    add_ref(q->L->L);
		    add_ref(q->L->R);
		} else
		    free_info_no_ref(q->L);
		free_info_no_ref(q);
	    }
#else
	    p = q->L;
	    if (cont->ref) {
		cont->ref--;
		add_ref(q->L);
	    } else {
		if (q->ref) {
		    q->ref--;
		    add_ref(q->L);
		} else
		    free_info_no_ref(q);
		free_info(cont->R);
		free_info_no_ref(cont);
	    }
	    cont = p;
#endif
	    break;
	case E:
	    // E -> return ()
#if defined DEBUG_REFCOUNT || defined FREE_ALL
	    free_info(prog);
	    free_info(cont);
	    free_info_no_ref(tmp);
#endif
	    return;
	default:
	    assert(!App & !D);
	}
    }
}

// Parse input from file f and return parsed program.
info *parse(FILE *f)
{
    info *p, *q, *t;
    int ch;

    do { // Skip whitespace and comments.
	while ((ch = getc(f)) == '#')
	    while ((ch = getc(f)) != '\n')
		;
    } while (isspace(ch));

    switch (tolower(ch)) {
    case '@':
	// '@' -> (At, s)
	at.ref++;
	return &at;
    case 'c':
	// 'c' -> (C, s)
	c.ref++;
	return &c;
    case 'd':
	// 'd' -> (D, s)
	d.ref++;
	return &d;
    case 'e':
	// 'e' -> (E, s)
	e.ref++;
	return &e;
    case 'i':
	// 'i' -> (I, s)
	i.ref++;
	return &i;
    case 'k':
	// 'k' -> (K, s)
	k.ref++;
	return &k;
    case '|':
	// '|' -> (VBar, s)
	vbar.ref++;
	return &vbar;
#ifdef USE_SLASH
    case '/':
	// '/' -> (Slash, s)
	slash.ref++;
	return &slash;
#endif
    case 's':
	// 's' -> (S, s)
	s.ref++;
	return &s;
    case 'v':
	// 'v' -> (V, s)
	v.ref++;
	return &v;
    case 'r':
	// 'r' -> (Dot '\n', s)
	dot['\n'].ref++;
	return dot+'\n';
    case '.':
	// '.' -> let (c:s1) = s in (Dot c, s1)
	if ((ch = getc(f))==EOF)
	    return 0;
	dot[ch].ref++;
	return dot+ch;
    case '?':
	// '?' -> let (c:s1) = s in (Que c, s1)
	if ((ch = getc(f))==EOF)
	    return 0;
	que[ch].ref++;
	return que+ch;
    case '`':
	// '`' -> let (p1, s1) = parse_ s in
	//        let (p2, s2) = parse_ s1 in (App p1 p2, s2)
	if (!(p=parse(f)))
	    return 0;
	if (!(q=parse(f))) {
	    free_info(p);
	    return 0;
	}
	t = new_info();
	assert(t);
	t->tag = App;
	t->L = p;
	t->R = q;
	return t;
    default:
	// otherwise -> error "Unlambda.parse: parse error"
	return 0;
    }
}


int main(int argc, char **argv)
{
    FILE *f;
    info *p;

    init();
    if (argc<2 || !(f=fopen(argv[1], "r"))) {
	fprintf(stderr, "Usage: %s <file name>\n", argv[0]);
	exit(1);
    }
    if (!(p=parse(f))) {
	fprintf(stderr, "%s: file %s: parse error\n", argv[0], argv[1]);
	fclose(f);
	exit(1);
    }
    fclose(f);

    exec(p);

#if defined DEBUG_REFCOUNT || defined FREE_ALL
    free_info(p);
#endif
#if defined DEBUG_REFCOUNT
    fflush(stdout);
    fprintf(stderr, "\n%s: %d allocated; %d freed\n",
            argv[0], allocated, freed);

#define CHECK_ATOM(x) \
    if (x.ref != 0) \
        fprintf(stderr, #x " ref count is %d\n", x.ref);

    CHECK_ATOM(at);
    CHECK_ATOM(c);
    CHECK_ATOM(d);
    CHECK_ATOM(e);
    CHECK_ATOM(i);
    CHECK_ATOM(k);
    CHECK_ATOM(vbar);
    CHECK_ATOM(s);
    CHECK_ATOM(v);
    {
	int i;
	for (i=0; i<256; i++)
	    if (dot[i].ref != 0)
		fprintf(stderr, ".%c ref count is %d\n", i, dot[i].ref);
    }
    while (avail) {
	avail->ref = -1;
	avail = avail->L;
    }
    // check heap (only last allocated block!) for lost items 
    for (;heap_idx<BLOCK_SIZE;heap_idx++)
	if (heap[heap_idx].ref != -1) {
	    fprintf(stderr, "heap item left: tag =%2d (%4s), ref =%2d\n",
		    heap[heap_idx].tag, tags[heap[heap_idx].tag],
		    heap[heap_idx].ref);
	}
#endif

    return 0;
}