diff interps/underload/underload.c @ 996:859f9b4339e6

<Gregor> tar xf egobot.tar.xz
author HackBot
date Sun, 09 Dec 2012 19:30:08 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/interps/underload/underload.c	Sun Dec 09 19:30:08 2012 +0000
@@ -0,0 +1,383 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+
+#define CAT   -1  /* inner should be treated normally, except continue with
+		    this el's next once it's evaluated */
+#define INNER -2  /* inner is a quotation */
+#define STACK -3  /* inner is a stack element */
+
+typedef uint_fast8_t fint;
+
+typedef struct tag_el {
+  int_least32_t val;    /* character, or a special value if negative */
+  size_t refcount;      /* reference count from inner, next, or globals */
+  /* this counts the number of locations referencing, even if they're
+     referenced multiple times themselves they only count as 1 */
+  struct tag_el* next;  /* the element after this one */
+  struct tag_el* inner; /* the element inside this one, if val is negative */
+} el;
+
+enum {unknown, underload, underlambda} lang = unknown;
+
+int debuglevel = 0;
+
+void serialize(el* e, FILE* f, fint stackdir);
+
+void error(const char* errstr) {
+  fprintf(stderr,"Error: %s\n",errstr);
+  exit(EXIT_FAILURE); }
+
+el* malloc_el(void) {
+  el* x = malloc(sizeof (el));
+  if(!x) error("Out of memory");
+  return x; }
+void checkfree(el* e) {
+  if(!e) return;
+  if(!(e->refcount)) {
+    e->refcount=1;
+    if(debuglevel >= 6) fprintf(stderr,"[free] ");
+    if(debuglevel >= 6) serialize(e,stderr,1);
+    if(debuglevel >= 6) fputc('\n',stderr);
+    if(e->next) e->next->refcount--;
+    checkfree(e->next);
+    if(e->inner) e->inner->refcount--;
+    checkfree(e->inner);
+    free(e); }}
+
+void uniputc(int_least32_t c, FILE* f) {
+  if     (c <          0)
+    error("Attempt to output a negative-coded character");
+  if     (c <       0x80)
+    putc(c,f);
+  else if(c <      0x800) {
+    putc(0xc0| (c>> 6),f);
+    putc(0x80|( c     &0x3f),f); }
+  else if(c <    0x10000) {
+    putc(0xd0| (c>>12),f);
+    putc(0x80|((c>> 6)&0x3f),f);
+    putc(0x80| (c     &0x3f),f); }
+  else if(c <   0x200000) {
+    putc(0xf0| (c>>18),f);
+    putc(0x80|((c>>12)&0x3f),f);
+    putc(0x80|((c>> 6)&0x3f),f);
+    putc(0x80|( c     &0x3f),f); }
+  else if(c <  0x4000000) {
+    putc(0xf8| (c>>24),f);
+    putc(0x80|((c>>18)&0x3f),f);
+    putc(0x80|((c>>12)&0x3f),f);
+    putc(0x80|((c>> 6)&0x3f),f);
+    putc(0x80|( c     &0x3f),f); }
+  else {
+    putc(0xfc| (c>>30),f);
+    putc(0x80|((c>>24)&0x3f),f);
+    putc(0x80|((c>>18)&0x3f),f);
+    putc(0x80|((c>>12)&0x3f),f);
+    putc(0x80|((c>> 6)&0x3f),f);
+    putc(0x80|( c     &0x3f),f); }}
+int_least32_t unigetc(FILE* f) {
+  int_least32_t rv=0;
+  int_least32_t min=0x40;
+  fint count=0;
+  int c=getc(f);
+  unsigned char mask = 0xc0;
+  if(c < 0x80) return c;
+  while(c & mask) {count++; c &= ~mask; mask>>=1;}
+  if(count < 1) error("UTF-8 input contains invalid first byte in character");
+  if(count > 1) min <<= 1;
+  rv = c;
+  while(count--) {
+    rv *= 1<<6; 
+    min <<= 5;
+    c = getc(f);
+    if((c&0xc0) != 0x80)
+      error("UTF-8 input contains invalid subsequent byte in character");
+    rv += (c & 0x3f); }
+  if(rv < min) error("UTF-8 character has a non-shortest representation");
+  return rv; }
+
+el* popstack(el** stack, const char* errmsg) {
+  el* rv;
+  el* temp;
+  if(!*stack) error(errmsg);
+  if((**stack).val != STACK) error("Internal error: Stack is not a stack");
+  if((**stack).refcount != 1) error("Internal error: Overreferenced stack");
+  rv = (**stack).inner;
+  temp = (**stack).next;
+  free(*stack);
+  /* temp.refcount stays the same */
+  *stack = temp;
+  if(rv) rv->refcount--;
+  return rv; }
+void pushstack(el** stack, el* e) {
+  el* temp = malloc_el();
+  if(e) e->refcount++;
+  temp->val = STACK;
+  temp->refcount = 1;
+  temp->inner = e;
+  temp->next = *stack;
+  /* *stack.refcount stays the same */
+  *stack = temp; }
+
+void serialize_underload(el* e, FILE* f, fint stackdir) {
+serialise_tco: /* tail-call optimisation of this function by hand */;
+  if(!e) return;
+  if(debuglevel >= 6) fprintf(f,"[%lu]",(unsigned long)e->refcount);
+  switch(e->val) {
+  case STACK:
+    if(!stackdir) {
+      if(debuglevel >+ 6) fprintf(f,"[z]");
+      if(e->inner) serialize_underload(e->inner,f,stackdir);
+      break; }
+    if(e->next) serialize_underload(e->next,f,stackdir);
+    putc('{',f);
+    if(e->inner) serialize_underload(e->inner,f,stackdir);
+    fputs("} ",f);
+    return;
+  case INNER:
+    putc('(',f);
+    if(e->inner) serialize_underload(e->inner,f,stackdir);
+    putc(')',f);
+    break;
+  case CAT:
+    serialize_underload(e->inner,f,stackdir); break;
+  default:
+    uniputc(e->val,f); break; }
+  if(debuglevel >= 5 && !e->refcount)
+    error("Sanity check failed: This should be free by now");
+  e=e->next; goto serialise_tco; }
+void serialize(el* e, FILE* f, fint stackdir) {
+  /* //tk: Efficient serialization for Underlambda */
+  serialize_underload(e, f, stackdir); }
+
+el  *stack;     /* the stack */
+el  *ip;        /* current point in the program we're executing */
+el  *zerostack; /* stack of places to go if we hit null next pointers */
+int main(int argc, char **argv) {
+  FILE* in;
+  int usingstdin;
+  el **working;
+  el  *temp, *temp2, *temp3;
+  int_least32_t c;
+  if(!argc||!argv[0]) error("Could not determine executable name");
+  char* a0 = argv[0];
+  if(strrchr(argv[0],'/')) a0=strrchr(argv[0],'/')+1;
+  if(!strcmp(a0,"derlo")) lang=underload;
+  if(!strcmp(a0,"derla")) lang=underlambda;
+  while(argc>=2) {
+    if(!strcmp(argv[1],"-o")) {lang=underload; argv++; argc--;}
+    else if(!strcmp(argv[1],"-a")) {lang=underlambda; argv++; argc--;}
+    else if(!strncmp(argv[1],"-d",2)) {
+      debuglevel=argv[1][2]-'0';
+      if(debuglevel < 0 || debuglevel > 7) debuglevel = 0;
+      argv++; argc--; }
+    else break; }
+  if(lang==unknown || argc > 2) {
+    puts("Usage: derl (-o|-a) [inputfile]");
+    puts("Options:");
+    puts("    -o   Interpret input as Underload");
+    puts("    -a   Interpret input as Underlambda");
+    puts("    -d0  No debugging");
+    puts("    -d1: enable warnings");
+    puts("    -d2: free all used resources at program exit");
+    puts("    -d3: show program and stack every step");
+    puts("    -d5: enable internal sanity checks");
+    puts("    -d6: debug memory allocation");
+    puts("    -d7: debug internal interpreter program flow");
+    puts("Instead of using the -o or -a option, you can invoke this program");
+    puts("with the name 'derlo' or 'derla'. All -d options include all");
+    puts("previous -d options except -d0. -d0 is the default.");
+    return argc==1 ? 0 : EXIT_FAILURE; }
+  if(argc==2) {
+    in = fopen(argv[1],"rb");
+    if(!in) {perror(argv[1]); return EXIT_FAILURE;}
+  } else {in = stdin; usingstdin = 1;}
+  stack = 0;
+  ip = 0;
+  zerostack = 0;
+  working = &ip;
+  while(((c=unigetc(in)))!=EOF) {
+    if(c==')') {
+      temp = popstack(&zerostack,"Unmatched )");
+      working = &(temp->next);
+      continue; }
+    if(c=='(') {
+      *working = malloc_el();
+      (**working).refcount = 1;
+      (**working).val = INNER;
+      (**working).inner = 0;
+      (**working).next = 0;
+      pushstack(&zerostack,*working);
+      working = &((**working).inner);
+      continue; }
+    /* //tk: string parsing for Underlambda */
+    /* //tk: EOF parsing for Underlambda */
+    *working = malloc_el();
+    (**working).refcount = 1;
+    (**working).val = c;
+    (**working).inner = 0;
+    (**working).next = 0;
+    working = &((**working).next); }
+  if(zerostack) error("Expected ) at end of input");
+
+  while(ip || zerostack) {
+    if(!ip) {
+      if(debuglevel >= 7) fprintf(stderr,"Moving back in zerostack.\n");
+      ip = popstack(&zerostack,"Internal error: Zerostack disappeared");
+      if(ip) ip->refcount++;
+      continue; }
+    if(debuglevel >= 3) {
+      putc('\t',stderr);
+      serialize(stack,stderr,1);
+      serialize(ip,stderr,0);
+      serialize(zerostack,stderr,0);
+      putc('\n',stderr); }
+    switch(ip->val) {
+    case STACK:
+      error("Internal error: Stack element found in program");
+    case CAT:
+      /* not a command, but an optimisation to avoid duplicating things;
+	 put ip->next on the zerostack so we can go back to it later, and
+	 continue with ip->inner */
+      if(debuglevel >= 7) fprintf(stderr,"Evaluating a lazy cat.\n");
+      if(!ip->inner) break; /* continue with ip->next */
+      if(debuglevel >= 7) fprintf(stderr,"The cat was nontrivial.\n");
+      pushstack(&zerostack,ip->next);
+      temp = ip->inner;
+      ip->refcount--;
+      temp->refcount++;
+      checkfree(ip);
+      ip = temp;
+      continue;
+    case INNER:
+      if(debuglevel >= 7) fprintf(stderr,"Pushing to the stack.\n");
+      if(ip->inner)
+	pushstack(&stack,ip->inner);
+      else
+	pushstack(&stack,0);
+      break;
+    case '!':
+      if(debuglevel >= 7) fprintf(stderr,"Popping the stack.\n");
+      temp = popstack(&stack,"Stack underflow in !");
+      checkfree(temp);
+      break;
+    case 'S':
+      if(debuglevel >= 7) fprintf(stderr,"Serialising TOS.\n");
+      temp = popstack(&stack,"Stack underflow in S");
+      if(temp) temp->refcount++;
+      serialize(temp,stdout,0);
+      fflush(stdout);
+      if(temp) temp->refcount--;
+      if(debuglevel >= 3) putc('\n',stdout);
+      checkfree(temp);
+      break;
+    case '~':
+      if(debuglevel >= 7) fprintf(stderr,"Swapping stack elements.\n");
+      temp  = popstack(&stack,"Stack underflow in ~");
+      temp2 = popstack(&stack,"Stack underflow in ~");
+      pushstack(&stack,temp);
+      pushstack(&stack,temp2);
+      break;
+    case 'a':
+      if(debuglevel >= 7) fprintf(stderr,"Wrapping the top stack element.\n");
+      temp = malloc_el();
+      temp->refcount = 0;
+      temp->inner = popstack(&stack,"Stack underflow in a");
+      if(temp->inner) temp->inner->refcount++;
+      temp->val = INNER;
+      temp->next = 0;
+      pushstack(&stack,temp);
+      break;
+    case ':':
+      if(debuglevel >= 7) fprintf(stderr,"Duplicating the top stack element.\n");
+      temp = popstack(&stack,"Stack underflow in :");
+      pushstack(&stack,temp);
+      pushstack(&stack,temp);
+      break;
+    case '*':
+      if(debuglevel >= 7) fprintf(stderr,"Catting two stack elements.\n");
+      temp  = popstack(&stack,"Stack underflow in *");
+      temp2 = popstack(&stack,"Stack underflow in *");
+      /* The degenerate cases of catting a null string need to be handled */
+      if(!temp ) {pushstack(&stack,temp2); break;}
+      if(!temp2) {pushstack(&stack,temp ); break;}
+      temp->refcount++; /* if temp and temp2 are aliased, this correctly
+			   prevents the optimisation happening */
+      if(temp2->refcount == 0) {
+	/* Tailcat optimisation: if nothing else is using the stack
+	   element we're catting to, just change its next pointer to do
+	   the catting directly. */
+	if(debuglevel >= 7) fprintf(stderr,"Trying tailcat optimisation.\n");
+	temp3 = temp2;
+	while(temp3->next) {
+	  if(temp3->refcount > 1) goto cantoptimise_star;
+	  temp3 = temp3->next; }
+	if(temp3->refcount > 1) goto cantoptimise_star;
+	if(debuglevel >= 7) fprintf(stderr,"Tailcat optimisation succeeded.\n");
+	temp3->next = temp;
+	temp3 = temp2; }
+      else {
+      cantoptimise_star: ;
+	temp3 = malloc_el();
+	temp3->refcount = 0;
+	temp3->next = temp;
+	temp3->inner = temp2;
+	temp2->refcount++;
+	temp3->val = CAT; }
+      pushstack(&stack,temp3);
+      break;
+    case '^':
+      if(debuglevel >= 7) fprintf(stderr,"Evalling the stack.\n");
+      temp = popstack(&stack,"Stack underflow in ^");
+      if(!temp) break; /* continue with ip->next */
+      while(ip->refcount == 1 && !(ip->next) && zerostack) {
+	/* Tailcall optimisation. */
+	if(debuglevel >= 7) fprintf(stderr,"Tailcall optimisation.\n");
+	ip->next = popstack(&zerostack,
+			    "Internal error: Zerostack disappeared"); }
+      if(ip->next) ip->next->refcount++;
+      if(temp->refcount == 0) {
+	/* Tailcat optimisation on the call stack; replace the return at
+	   the end of the called code with a goto, as long as it isn't
+	   used anywhere else */
+	if(debuglevel >= 7) fprintf(stderr,"Trying tailcat optimisation.\n");
+	temp3 = temp;
+	while(temp3->next) {
+	  if(temp3->refcount > 1) goto cantoptimise_caret;
+	  temp3 = temp3->next; }
+	if(temp3->refcount > 1) goto cantoptimise_caret;
+	temp3->next = ip->next;
+	if(debuglevel >= 7) fprintf(stderr,"Tailcat optimisation succeeded.\n");
+	temp2 = temp; }
+      else {
+      cantoptimise_caret: ;
+	temp2 = malloc_el();
+	temp2->refcount = 0;
+	temp2->next = ip->next;
+	temp2->inner = temp;
+	temp->refcount++;
+	temp2->val = CAT; }
+      ip->refcount--;
+      if(temp2) temp2->refcount++;
+      checkfree(ip);
+      ip = temp2;
+      continue;
+    default:
+      fprintf(stderr, "Attempt to execute unknown command %d\n",ip->val);
+      return EXIT_FAILURE; }
+    temp = ip->next;
+    ip->refcount--;
+    if(temp) temp->refcount++;
+    checkfree(ip);
+    ip = temp; }
+  if(!usingstdin) fclose(in);
+  if(debuglevel >= 1 && stack)
+    fprintf(stderr, "Warning: Stack was not empty at program exit\n");
+  if(debuglevel >= 3) {
+    serialize(stack,stderr,1);
+    putc('\n',stderr); }
+  if(debuglevel >= 2)
+    while(stack)
+      checkfree(popstack(&stack,"Internal error: Stack has inconsistent size"));
+  return 0; }
\ No newline at end of file