changeset 4273:5fddb59b5d15

<zzo38> fetch http://sprunge.us/UNPA
author HackBot
date Tue, 07 Jan 2014 03:54:14 +0000
parents 0ea5d8e5b787
children 7d50abd2515f
files UNPA
diffstat 1 files changed, 448 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/UNPA	Tue Jan 07 03:54:14 2014 +0000
@@ -0,0 +1,448 @@
+/*
+  UTF-to-VLQ
+  Public domain
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#ifdef _WIN32
+#include <fcntl.h>
+#endif
+
+typedef unsigned char byte;
+typedef unsigned long long ULL;
+
+typedef ULL(*in_func_t)(void);
+typedef void(*out_func_t)(ULL);
+
+char in_mode;
+char out_mode;
+int options[128];
+ULL translation[256];
+
+#define conv_lf options['L']
+#define conv_cr options['c']
+#define bom_in options['b']
+#define bom_out options['B']
+#define trans_le options['t']
+
+byte getb(void) {
+  int x=fgetc(stdin);
+  if(x==EOF) exit(0);
+  return x;
+}
+
+inline ULL sign_extend(ULL x,int y) {
+  return x|((x&(1LL<<y))?-1LL<<y:0);
+}
+
+ULL read_8bit_raw(void) {
+  return getb();
+}
+
+ULL read_16bit_le_raw(void) {
+  ULL x=getb();
+  return x|(getb()<<8);
+}
+
+ULL read_16bit_be_raw(void) {
+  ULL x=getb()<<8;
+  return x|getb();
+}
+
+ULL read_32bit_le_raw(void) {
+  ULL x=getb();
+  x|=getb()<<8;
+  x|=getb()<<16;
+  return x|(getb()<<24);
+}
+
+ULL read_32bit_be_raw(void) {
+  ULL x=getb()<<24;
+  x|=getb()<<16;
+  x|=getb()<<8;
+  return x|getb();
+}
+
+ULL read_64bit_le_raw(void) {
+  ULL x=getb();
+  x|=getb()<<8;
+  x|=getb()<<16;
+  x|=((ULL)getb())<<24;
+  x|=((ULL)getb())<<32;
+  x|=((ULL)getb())<<40;
+  x|=((ULL)getb())<<48;
+  x|=((ULL)getb())<<56;
+  return x;
+}
+
+ULL read_64bit_be_raw(void) {
+  ULL x=((ULL)getb())<<56;
+  x|=((ULL)getb())<<48;
+  x|=((ULL)getb())<<40;
+  x|=((ULL)getb())<<32;
+  x|=((ULL)getb())<<24;
+  x|=getb()<<16;
+  x|=getb()<<8;
+  return x;
+}
+
+ULL read_utf8(void) {
+  ULL x=getb();
+  if((x&0xE0)==0xC0) {
+    x=((x&0x1F)<<6)|(getb()&0x3F);
+  } else if((x&0xF0)==0xE0) {
+    x=((x&0x0F)<<6)|(getb()&0x3F);
+    x=(x<<6)|(getb()&0x3F);
+  } else if((x&0xF8)==0xF0) {
+    x=((x&0x07)<<6)|(getb()&0x3F);
+    x=(x<<6)|(getb()&0x3F);
+    x=(x<<6)|(getb()&0x3F);
+  } else if((x&0xFC)==0xF8) {
+    x=((x&0x03)<<6)|(getb()&0x3F);
+    x=(x<<6)|(getb()&0x3F);
+    x=(x<<6)|(getb()&0x3F);
+    x=(x<<6)|(getb()&0x3F);
+  } else if((x&0xFE)==0xFC) {
+    x=((x&0x01)<<6)|(getb()&0x3F);
+    x=(x<<6)|(getb()&0x3F);
+    x=(x<<6)|(getb()&0x3F);
+    x=(x<<6)|(getb()&0x3F);
+    x=(x<<6)|(getb()&0x3F);
+  } else if(x==0xFE || x==0xFF) {
+    x=((x&0x01)<<6)|(getb()&0x3F);
+    x=(x<<6)|(getb()&0x3F);
+    x=(x<<6)|(getb()&0x3F);
+    x=(x<<6)|(getb()&0x3F);
+    x=(x<<6)|(getb()&0x3F);
+    x=(x<<6)|(getb()&0x3F);
+  }
+  return x;
+}
+
+ULL read_vlq8(void) {
+  byte x=getb();
+  ULL r=0;
+  while(x&0x80) {
+    r=(r<<7)|(x&0x7F);
+    x=getb();
+  }
+  return r|x;
+}
+
+ULL read_leb128(void) {
+  byte x=getb();
+  int i=0;
+  ULL r=0;
+  while(x&0x80) {
+    r|=(x&0x7F)<<(7*(i++));
+    x=getb();
+  }
+  return r|(x<<(7*i));
+}
+
+ULL read_utf16_le(void) {
+  ULL r=getb()<<16;
+  r|=getb();
+  if(r>=0xD800 && r<0xDC00) {
+    int x=getb()<<16;
+    x|=getb();
+    return (((r&0x3FF)<<10)|(x&0x3FF))+0x10000ULL;
+  } else {
+    return r;
+  }
+}
+
+ULL read_utf16_be(void) {
+  ULL r=getb();
+  r|=getb()<<16;
+  if(r>=0xD800 && r<0xDC00) {
+    int x=getb();
+    x|=getb()<<16;
+    return (((r&0x3FF)<<10)|(x&0x3FF))+0x10000ULL;
+  } else {
+    return r;
+  }
+}
+
+ULL read_translate(void) {
+  return translation[getb()];
+}
+
+ULL read_messagepack(void) {
+  byte x;
+  ULL s;
+  float f;
+  double d;
+  for(;;) {
+    x=getb();
+    switch(x) {
+      case 0x00 ... 0x7F: return x;
+      case 0x80 ... 0x9F: continue;
+      case 0xA0 ... 0xBF:
+        while(x-->0xA0) putchar(getb());
+        continue;
+      case 0xC0: continue;
+      case 0xC2: return 0;
+      case 0xC3: return -1LL;
+      case 0xCA:
+        *(short*)&f=read_16bit_be_raw(); // Not completely portable
+        return (ULL)f;
+      case 0xCB:
+        *(int*)&d=read_32bit_be_raw(); // Not completely portable
+        return (ULL)f;
+      case 0xCC: return read_8bit_raw();
+      case 0xCD: return read_16bit_be_raw();
+      case 0xCE: return read_32bit_be_raw();
+      case 0xCF: return read_64bit_be_raw();
+      case 0xD0: return sign_extend(read_8bit_raw(),7);
+      case 0xD1: return sign_extend(read_16bit_be_raw(),15);
+      case 0xD2: return sign_extend(read_32bit_be_raw(),31);
+      case 0xD3: return read_64bit_be_raw();
+      case 0xDA:
+        s=read_16bit_be_raw();
+        while(s--) putchar(getb());
+        continue;
+      case 0xDB:
+        s=read_32bit_be_raw();
+        while(s--) putchar(getb());
+        continue;
+      case 0xDC: read_16bit_be_raw(); continue;
+      case 0xDD: read_32bit_be_raw(); continue;
+      case 0xDE: read_16bit_be_raw(); continue;
+      case 0xDF: read_32bit_be_raw(); continue;
+      case 0xE0 ... 0xFF: return x|~31LL;
+      default: exit(1);
+    }
+  }
+}
+
+ULL read_hex(void) {
+  char a,b;
+  do a=getb(); while(a<=' ');
+  do b=getb(); while(b<=' ');
+  return (((a&15)+(a>='A'?9:0))<<4)|((b&15)+(b>='A'?9:0));
+}
+
+void write_8bit_raw(ULL x) {
+  putchar(x);
+}
+
+void write_16bit_le_raw(ULL x) {
+  putchar(x&255);
+  putchar(x>>8);
+}
+
+void write_16bit_be_raw(ULL x) {
+  putchar(x>>8);
+  putchar(x&255);
+}
+
+void write_32bit_le_raw(ULL x) {
+  putchar(x&255);
+  putchar(x>>8);
+  putchar(x>>16);
+  putchar(x>>24);
+}
+
+void write_32bit_be_raw(ULL x) {
+  putchar(x>>24);
+  putchar(x>>16);
+  putchar(x>>8);
+  putchar(x&255);
+}
+
+void write_64bit_le_raw(ULL x) {
+  putchar(x&255);
+  putchar(x>>8);
+  putchar(x>>16);
+  putchar(x>>24);
+  putchar(x>>32);
+  putchar(x>>40);
+  putchar(x>>48);
+  putchar(x>>56);
+}
+
+void write_64bit_be_raw(ULL x) {
+  putchar(x>>56);
+  putchar(x>>48);
+  putchar(x>>40);
+  putchar(x>>32);
+  putchar(x>>24);
+  putchar(x>>16);
+  putchar(x>>8);
+  putchar(x&255);
+}
+
+void write_utf8(ULL x) {
+  if(out_mode=='0' && !x) {
+    putchar(0xC0);
+    putchar(0x80);
+  } else if(x<0x80ULL) {
+    putchar(x);
+  } else if(x<0x800ULL) {
+    putchar(0xC0|(x>>6));
+    putchar(0x80|(x)&0xBF);
+  } else if(x<0x10000ULL) {
+    putchar(0xE0|(x>>12));
+    putchar(0x80|(x>>6)&0xBF);
+    putchar(0x80|(x)&0xBF);
+  } else if(x<0x200000ULL) {
+    putchar(0xF0|(x>>18));
+    putchar(0x80|(x>>12)&0xBF);
+    putchar(0x80|(x>>6)&0xBF);
+    putchar(0x80|(x)&0xBF);
+  } else if(x<0x4000000ULL) {
+    putchar(0xF8|(x>>24));
+    putchar(0x80|(x>>18)&0xBF);
+    putchar(0x80|(x>>12)&0xBF);
+    putchar(0x80|(x>>6)&0xBF);
+    putchar(0x80|(x)&0xBF);
+  } else if(x<0x80000000ULL) {
+    putchar(0xFC|(x>>30));
+    putchar(0x80|(x>>24)&0xBF);
+    putchar(0x80|(x>>18)&0xBF);
+    putchar(0x80|(x>>12)&0xBF);
+    putchar(0x80|(x>>6)&0xBF);
+    putchar(0x80|(x)&0xBF);
+  } else if(x<0x1000000000ULL) {
+    putchar(0xFE|(x>>36));
+    putchar(0x80|(x>>30)&0xBF);
+    putchar(0x80|(x>>24)&0xBF);
+    putchar(0x80|(x>>18)&0xBF);
+    putchar(0x80|(x>>12)&0xBF);
+    putchar(0x80|(x>>6)&0xBF);
+    putchar(0x80|(x)&0xBF);
+  } else {
+    exit(1);
+  }
+}
+
+void write_vlq8(ULL x) {
+  int i;
+  for(i=63;i;i-=7) if(x&-(1LL<<i)) putchar(0x80|(x>>i)&0xFF);
+  putchar(x&0x7F);
+}
+
+void write_leb128(ULL x) {
+  while(x&~0x7FULL) {
+    putchar(0x80|x&0xFF);
+    x>>=7;
+  }
+  putchar(x);
+}
+
+void write_utf16_le(ULL x) {
+  if(x>0x10FFFFULL) exit(1);
+  if(x&0x1F0000ULL) {
+    x-=0x10000ULL;
+    write_16bit_le_raw((x>>10)|0xD800);
+    write_16bit_le_raw((x&0x3FF)|0xDC00);
+  } else {
+    write_16bit_le_raw(x);
+  }
+}
+
+void write_utf16_be(ULL x) {
+  if(x>0x10FFFFULL) exit(1);
+  if(x&0x1F0000ULL) {
+    x-=0x10000ULL;
+    write_16bit_be_raw((x>>10)|0xD800);
+    write_16bit_be_raw((x&0x3FF)|0xDC00);
+  } else {
+    write_16bit_be_raw(x);
+  }
+}
+
+void write_translate(ULL x) {
+  int i;
+  for(i=0;i<256;i++) if(translation[i]==x) putchar(i);
+}
+
+void write_hex(ULL x) {
+  printf("%02X",(int)x);
+}
+
+const in_func_t in_func[128]={
+  ['8']=read_8bit_raw,
+  ['w']=read_16bit_le_raw,
+  ['W']=read_16bit_be_raw,
+  ['d']=read_32bit_le_raw,
+  ['D']=read_32bit_be_raw,
+  ['q']=read_64bit_le_raw,
+  ['Q']=read_64bit_be_raw,
+  ['1']=read_utf8,
+  ['0']=read_utf8,
+  ['V']=read_vlq8,
+  ['v']=read_leb128,
+  ['u']=read_utf16_le,
+  ['U']=read_utf16_be,
+  ['T']=read_translate,
+  ['M']=read_messagepack,
+  ['4']=read_hex,
+  [0]=0
+};
+
+const out_func_t out_func[128]={
+  ['8']=write_8bit_raw,
+  ['w']=write_16bit_le_raw,
+  ['W']=write_16bit_be_raw,
+  ['d']=write_32bit_le_raw,
+  ['D']=write_32bit_be_raw,
+  ['q']=write_64bit_le_raw,
+  ['Q']=write_64bit_be_raw,
+  ['1']=write_utf8,
+  ['0']=write_utf8,
+  ['V']=write_vlq8,
+  ['v']=write_leb128,
+  ['u']=write_utf16_le,
+  ['U']=write_utf16_be,
+  ['T']=write_translate,
+  ['4']=write_hex,
+  [0]=0
+};
+
+int main(int argc,char**argv) {
+  int b;
+  int is_lf=0;
+  ULL x;
+#ifdef _WIN32
+  _setmode(_fileno(stdin),_O_BINARY);
+  _setmode(_fileno(stdout),_O_BINARY);
+#endif
+  if(argc<2 || !argv[1][0] || !in_func[argv[1][0]] || !out_func[argv[1][1]]) return 1;
+  in_mode=argv[1][0];
+  out_mode=argv[1][1];
+  for(b=2;argv[1][b];b++) options[argv[1][b]&127]=1;
+  if(argc>2) {
+    FILE*fp=fopen(argv[2],"rb");
+    int i;
+    if(!fp) return 1;
+    fseek(fp,0,SEEK_END);
+    b=ftell(fp)>>8;
+    rewind(fp);
+    for(i=0;i<255;i++) {
+      translation[i]=fgetc(fp);
+      if(b>1) translation[i]=trans_le?(translation[i]|(fgetc(fp)<<8)):((translation[i]<<8)|fgetc(fp));
+      if(b>2) translation[i]=trans_le?(translation[i]|(fgetc(fp)<<16)):((translation[i]<<8)|fgetc(fp));
+      if(b>3) translation[i]=trans_le?(translation[i]|(fgetc(fp)<<24)):((translation[i]<<8)|fgetc(fp));
+    }
+    fclose(fp);
+  }
+  if(bom_out) out_func[out_mode&127](0xFEFF);
+  while(!feof(stdin)) {
+    x=in_func[in_mode&127]();
+    if(bom_in && x!=0xFEFF) return 1;
+    if(is_lf && x==10) {
+      is_lf=0;
+      continue;
+    }
+    if(is_lf=(conv_lf && x==13)) x=10;
+    if(conv_cr && x==10) out_func[out_mode&127](13);
+    if(!bom_in) out_func[out_mode&127](x);
+    bom_in=0;
+  }
+  return 0;
+}
+