# HG changeset patch # User HackBot # Date 1389066854 0 # Node ID 5fddb59b5d15c6a7921f94e1a2227ac1973a92f1 # Parent 0ea5d8e5b787e45e391cc82b46e6b3deb4245d1a fetch http://sprunge.us/UNPA diff -r 0ea5d8e5b787 -r 5fddb59b5d15 UNPA --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/UNPA Tue Jan 07 03:54:14 2014 +0000 @@ -0,0 +1,448 @@ +/* + UTF-to-VLQ + Public domain +*/ + +#include +#include + +#ifdef _WIN32 +#include +#endif + +typedef unsigned char byte; +typedef unsigned long long ULL; + +typedef ULL(*in_func_t)(void); +typedef void(*out_func_t)(ULL); + +char in_mode; +char out_mode; +int options[128]; +ULL translation[256]; + +#define conv_lf options['L'] +#define conv_cr options['c'] +#define bom_in options['b'] +#define bom_out options['B'] +#define trans_le options['t'] + +byte getb(void) { + int x=fgetc(stdin); + if(x==EOF) exit(0); + return x; +} + +inline ULL sign_extend(ULL x,int y) { + return x|((x&(1LL<=0xD800 && r<0xDC00) { + int x=getb()<<16; + x|=getb(); + return (((r&0x3FF)<<10)|(x&0x3FF))+0x10000ULL; + } else { + return r; + } +} + +ULL read_utf16_be(void) { + ULL r=getb(); + r|=getb()<<16; + if(r>=0xD800 && r<0xDC00) { + int x=getb(); + x|=getb()<<16; + return (((r&0x3FF)<<10)|(x&0x3FF))+0x10000ULL; + } else { + return r; + } +} + +ULL read_translate(void) { + return translation[getb()]; +} + +ULL read_messagepack(void) { + byte x; + ULL s; + float f; + double d; + for(;;) { + x=getb(); + switch(x) { + case 0x00 ... 0x7F: return x; + case 0x80 ... 0x9F: continue; + case 0xA0 ... 0xBF: + while(x-->0xA0) putchar(getb()); + continue; + case 0xC0: continue; + case 0xC2: return 0; + case 0xC3: return -1LL; + case 0xCA: + *(short*)&f=read_16bit_be_raw(); // Not completely portable + return (ULL)f; + case 0xCB: + *(int*)&d=read_32bit_be_raw(); // Not completely portable + return (ULL)f; + case 0xCC: return read_8bit_raw(); + case 0xCD: return read_16bit_be_raw(); + case 0xCE: return read_32bit_be_raw(); + case 0xCF: return read_64bit_be_raw(); + case 0xD0: return sign_extend(read_8bit_raw(),7); + case 0xD1: return sign_extend(read_16bit_be_raw(),15); + case 0xD2: return sign_extend(read_32bit_be_raw(),31); + case 0xD3: return read_64bit_be_raw(); + case 0xDA: + s=read_16bit_be_raw(); + while(s--) putchar(getb()); + continue; + case 0xDB: + s=read_32bit_be_raw(); + while(s--) putchar(getb()); + continue; + case 0xDC: read_16bit_be_raw(); continue; + case 0xDD: read_32bit_be_raw(); continue; + case 0xDE: read_16bit_be_raw(); continue; + case 0xDF: read_32bit_be_raw(); continue; + case 0xE0 ... 0xFF: return x|~31LL; + default: exit(1); + } + } +} + +ULL read_hex(void) { + char a,b; + do a=getb(); while(a<=' '); + do b=getb(); while(b<=' '); + return (((a&15)+(a>='A'?9:0))<<4)|((b&15)+(b>='A'?9:0)); +} + +void write_8bit_raw(ULL x) { + putchar(x); +} + +void write_16bit_le_raw(ULL x) { + putchar(x&255); + putchar(x>>8); +} + +void write_16bit_be_raw(ULL x) { + putchar(x>>8); + putchar(x&255); +} + +void write_32bit_le_raw(ULL x) { + putchar(x&255); + putchar(x>>8); + putchar(x>>16); + putchar(x>>24); +} + +void write_32bit_be_raw(ULL x) { + putchar(x>>24); + putchar(x>>16); + putchar(x>>8); + putchar(x&255); +} + +void write_64bit_le_raw(ULL x) { + putchar(x&255); + putchar(x>>8); + putchar(x>>16); + putchar(x>>24); + putchar(x>>32); + putchar(x>>40); + putchar(x>>48); + putchar(x>>56); +} + +void write_64bit_be_raw(ULL x) { + putchar(x>>56); + putchar(x>>48); + putchar(x>>40); + putchar(x>>32); + putchar(x>>24); + putchar(x>>16); + putchar(x>>8); + putchar(x&255); +} + +void write_utf8(ULL x) { + if(out_mode=='0' && !x) { + putchar(0xC0); + putchar(0x80); + } else if(x<0x80ULL) { + putchar(x); + } else if(x<0x800ULL) { + putchar(0xC0|(x>>6)); + putchar(0x80|(x)&0xBF); + } else if(x<0x10000ULL) { + putchar(0xE0|(x>>12)); + putchar(0x80|(x>>6)&0xBF); + putchar(0x80|(x)&0xBF); + } else if(x<0x200000ULL) { + putchar(0xF0|(x>>18)); + putchar(0x80|(x>>12)&0xBF); + putchar(0x80|(x>>6)&0xBF); + putchar(0x80|(x)&0xBF); + } else if(x<0x4000000ULL) { + putchar(0xF8|(x>>24)); + putchar(0x80|(x>>18)&0xBF); + putchar(0x80|(x>>12)&0xBF); + putchar(0x80|(x>>6)&0xBF); + putchar(0x80|(x)&0xBF); + } else if(x<0x80000000ULL) { + putchar(0xFC|(x>>30)); + putchar(0x80|(x>>24)&0xBF); + putchar(0x80|(x>>18)&0xBF); + putchar(0x80|(x>>12)&0xBF); + putchar(0x80|(x>>6)&0xBF); + putchar(0x80|(x)&0xBF); + } else if(x<0x1000000000ULL) { + putchar(0xFE|(x>>36)); + putchar(0x80|(x>>30)&0xBF); + putchar(0x80|(x>>24)&0xBF); + putchar(0x80|(x>>18)&0xBF); + putchar(0x80|(x>>12)&0xBF); + putchar(0x80|(x>>6)&0xBF); + putchar(0x80|(x)&0xBF); + } else { + exit(1); + } +} + +void write_vlq8(ULL x) { + int i; + for(i=63;i;i-=7) if(x&-(1LL<>i)&0xFF); + putchar(x&0x7F); +} + +void write_leb128(ULL x) { + while(x&~0x7FULL) { + putchar(0x80|x&0xFF); + x>>=7; + } + putchar(x); +} + +void write_utf16_le(ULL x) { + if(x>0x10FFFFULL) exit(1); + if(x&0x1F0000ULL) { + x-=0x10000ULL; + write_16bit_le_raw((x>>10)|0xD800); + write_16bit_le_raw((x&0x3FF)|0xDC00); + } else { + write_16bit_le_raw(x); + } +} + +void write_utf16_be(ULL x) { + if(x>0x10FFFFULL) exit(1); + if(x&0x1F0000ULL) { + x-=0x10000ULL; + write_16bit_be_raw((x>>10)|0xD800); + write_16bit_be_raw((x&0x3FF)|0xDC00); + } else { + write_16bit_be_raw(x); + } +} + +void write_translate(ULL x) { + int i; + for(i=0;i<256;i++) if(translation[i]==x) putchar(i); +} + +void write_hex(ULL x) { + printf("%02X",(int)x); +} + +const in_func_t in_func[128]={ + ['8']=read_8bit_raw, + ['w']=read_16bit_le_raw, + ['W']=read_16bit_be_raw, + ['d']=read_32bit_le_raw, + ['D']=read_32bit_be_raw, + ['q']=read_64bit_le_raw, + ['Q']=read_64bit_be_raw, + ['1']=read_utf8, + ['0']=read_utf8, + ['V']=read_vlq8, + ['v']=read_leb128, + ['u']=read_utf16_le, + ['U']=read_utf16_be, + ['T']=read_translate, + ['M']=read_messagepack, + ['4']=read_hex, + [0]=0 +}; + +const out_func_t out_func[128]={ + ['8']=write_8bit_raw, + ['w']=write_16bit_le_raw, + ['W']=write_16bit_be_raw, + ['d']=write_32bit_le_raw, + ['D']=write_32bit_be_raw, + ['q']=write_64bit_le_raw, + ['Q']=write_64bit_be_raw, + ['1']=write_utf8, + ['0']=write_utf8, + ['V']=write_vlq8, + ['v']=write_leb128, + ['u']=write_utf16_le, + ['U']=write_utf16_be, + ['T']=write_translate, + ['4']=write_hex, + [0]=0 +}; + +int main(int argc,char**argv) { + int b; + int is_lf=0; + ULL x; +#ifdef _WIN32 + _setmode(_fileno(stdin),_O_BINARY); + _setmode(_fileno(stdout),_O_BINARY); +#endif + if(argc<2 || !argv[1][0] || !in_func[argv[1][0]] || !out_func[argv[1][1]]) return 1; + in_mode=argv[1][0]; + out_mode=argv[1][1]; + for(b=2;argv[1][b];b++) options[argv[1][b]&127]=1; + if(argc>2) { + FILE*fp=fopen(argv[2],"rb"); + int i; + if(!fp) return 1; + fseek(fp,0,SEEK_END); + b=ftell(fp)>>8; + rewind(fp); + for(i=0;i<255;i++) { + translation[i]=fgetc(fp); + if(b>1) translation[i]=trans_le?(translation[i]|(fgetc(fp)<<8)):((translation[i]<<8)|fgetc(fp)); + if(b>2) translation[i]=trans_le?(translation[i]|(fgetc(fp)<<16)):((translation[i]<<8)|fgetc(fp)); + if(b>3) translation[i]=trans_le?(translation[i]|(fgetc(fp)<<24)):((translation[i]<<8)|fgetc(fp)); + } + fclose(fp); + } + if(bom_out) out_func[out_mode&127](0xFEFF); + while(!feof(stdin)) { + x=in_func[in_mode&127](); + if(bom_in && x!=0xFEFF) return 1; + if(is_lf && x==10) { + is_lf=0; + continue; + } + if(is_lf=(conv_lf && x==13)) x=10; + if(conv_cr && x==10) out_func[out_mode&127](13); + if(!bom_in) out_func[out_mode&127](x); + bom_in=0; + } + return 0; +} +