Mercurial > repo
view UNPA @ 4292:03ef871e702e
<oerjan> learn Nooodles are the invention of the chinese. They were brought to Europe by Marco Polo, a distant ancestor of Taneb.
author | HackBot |
---|---|
date | Tue, 14 Jan 2014 01:15:45 +0000 |
parents | 5fddb59b5d15 |
children |
line wrap: on
line source
/* UTF-to-VLQ Public domain */ #include <stdio.h> #include <stdlib.h> #ifdef _WIN32 #include <fcntl.h> #endif typedef unsigned char byte; typedef unsigned long long ULL; typedef ULL(*in_func_t)(void); typedef void(*out_func_t)(ULL); char in_mode; char out_mode; int options[128]; ULL translation[256]; #define conv_lf options['L'] #define conv_cr options['c'] #define bom_in options['b'] #define bom_out options['B'] #define trans_le options['t'] byte getb(void) { int x=fgetc(stdin); if(x==EOF) exit(0); return x; } inline ULL sign_extend(ULL x,int y) { return x|((x&(1LL<<y))?-1LL<<y:0); } ULL read_8bit_raw(void) { return getb(); } ULL read_16bit_le_raw(void) { ULL x=getb(); return x|(getb()<<8); } ULL read_16bit_be_raw(void) { ULL x=getb()<<8; return x|getb(); } ULL read_32bit_le_raw(void) { ULL x=getb(); x|=getb()<<8; x|=getb()<<16; return x|(getb()<<24); } ULL read_32bit_be_raw(void) { ULL x=getb()<<24; x|=getb()<<16; x|=getb()<<8; return x|getb(); } ULL read_64bit_le_raw(void) { ULL x=getb(); x|=getb()<<8; x|=getb()<<16; x|=((ULL)getb())<<24; x|=((ULL)getb())<<32; x|=((ULL)getb())<<40; x|=((ULL)getb())<<48; x|=((ULL)getb())<<56; return x; } ULL read_64bit_be_raw(void) { ULL x=((ULL)getb())<<56; x|=((ULL)getb())<<48; x|=((ULL)getb())<<40; x|=((ULL)getb())<<32; x|=((ULL)getb())<<24; x|=getb()<<16; x|=getb()<<8; return x; } ULL read_utf8(void) { ULL x=getb(); if((x&0xE0)==0xC0) { x=((x&0x1F)<<6)|(getb()&0x3F); } else if((x&0xF0)==0xE0) { x=((x&0x0F)<<6)|(getb()&0x3F); x=(x<<6)|(getb()&0x3F); } else if((x&0xF8)==0xF0) { x=((x&0x07)<<6)|(getb()&0x3F); x=(x<<6)|(getb()&0x3F); x=(x<<6)|(getb()&0x3F); } else if((x&0xFC)==0xF8) { x=((x&0x03)<<6)|(getb()&0x3F); x=(x<<6)|(getb()&0x3F); x=(x<<6)|(getb()&0x3F); x=(x<<6)|(getb()&0x3F); } else if((x&0xFE)==0xFC) { x=((x&0x01)<<6)|(getb()&0x3F); x=(x<<6)|(getb()&0x3F); x=(x<<6)|(getb()&0x3F); x=(x<<6)|(getb()&0x3F); x=(x<<6)|(getb()&0x3F); } else if(x==0xFE || x==0xFF) { x=((x&0x01)<<6)|(getb()&0x3F); x=(x<<6)|(getb()&0x3F); x=(x<<6)|(getb()&0x3F); x=(x<<6)|(getb()&0x3F); x=(x<<6)|(getb()&0x3F); x=(x<<6)|(getb()&0x3F); } return x; } ULL read_vlq8(void) { byte x=getb(); ULL r=0; while(x&0x80) { r=(r<<7)|(x&0x7F); x=getb(); } return r|x; } ULL read_leb128(void) { byte x=getb(); int i=0; ULL r=0; while(x&0x80) { r|=(x&0x7F)<<(7*(i++)); x=getb(); } return r|(x<<(7*i)); } ULL read_utf16_le(void) { ULL r=getb()<<16; r|=getb(); if(r>=0xD800 && r<0xDC00) { int x=getb()<<16; x|=getb(); return (((r&0x3FF)<<10)|(x&0x3FF))+0x10000ULL; } else { return r; } } ULL read_utf16_be(void) { ULL r=getb(); r|=getb()<<16; if(r>=0xD800 && r<0xDC00) { int x=getb(); x|=getb()<<16; return (((r&0x3FF)<<10)|(x&0x3FF))+0x10000ULL; } else { return r; } } ULL read_translate(void) { return translation[getb()]; } ULL read_messagepack(void) { byte x; ULL s; float f; double d; for(;;) { x=getb(); switch(x) { case 0x00 ... 0x7F: return x; case 0x80 ... 0x9F: continue; case 0xA0 ... 0xBF: while(x-->0xA0) putchar(getb()); continue; case 0xC0: continue; case 0xC2: return 0; case 0xC3: return -1LL; case 0xCA: *(short*)&f=read_16bit_be_raw(); // Not completely portable return (ULL)f; case 0xCB: *(int*)&d=read_32bit_be_raw(); // Not completely portable return (ULL)f; case 0xCC: return read_8bit_raw(); case 0xCD: return read_16bit_be_raw(); case 0xCE: return read_32bit_be_raw(); case 0xCF: return read_64bit_be_raw(); case 0xD0: return sign_extend(read_8bit_raw(),7); case 0xD1: return sign_extend(read_16bit_be_raw(),15); case 0xD2: return sign_extend(read_32bit_be_raw(),31); case 0xD3: return read_64bit_be_raw(); case 0xDA: s=read_16bit_be_raw(); while(s--) putchar(getb()); continue; case 0xDB: s=read_32bit_be_raw(); while(s--) putchar(getb()); continue; case 0xDC: read_16bit_be_raw(); continue; case 0xDD: read_32bit_be_raw(); continue; case 0xDE: read_16bit_be_raw(); continue; case 0xDF: read_32bit_be_raw(); continue; case 0xE0 ... 0xFF: return x|~31LL; default: exit(1); } } } ULL read_hex(void) { char a,b; do a=getb(); while(a<=' '); do b=getb(); while(b<=' '); return (((a&15)+(a>='A'?9:0))<<4)|((b&15)+(b>='A'?9:0)); } void write_8bit_raw(ULL x) { putchar(x); } void write_16bit_le_raw(ULL x) { putchar(x&255); putchar(x>>8); } void write_16bit_be_raw(ULL x) { putchar(x>>8); putchar(x&255); } void write_32bit_le_raw(ULL x) { putchar(x&255); putchar(x>>8); putchar(x>>16); putchar(x>>24); } void write_32bit_be_raw(ULL x) { putchar(x>>24); putchar(x>>16); putchar(x>>8); putchar(x&255); } void write_64bit_le_raw(ULL x) { putchar(x&255); putchar(x>>8); putchar(x>>16); putchar(x>>24); putchar(x>>32); putchar(x>>40); putchar(x>>48); putchar(x>>56); } void write_64bit_be_raw(ULL x) { putchar(x>>56); putchar(x>>48); putchar(x>>40); putchar(x>>32); putchar(x>>24); putchar(x>>16); putchar(x>>8); putchar(x&255); } void write_utf8(ULL x) { if(out_mode=='0' && !x) { putchar(0xC0); putchar(0x80); } else if(x<0x80ULL) { putchar(x); } else if(x<0x800ULL) { putchar(0xC0|(x>>6)); putchar(0x80|(x)&0xBF); } else if(x<0x10000ULL) { putchar(0xE0|(x>>12)); putchar(0x80|(x>>6)&0xBF); putchar(0x80|(x)&0xBF); } else if(x<0x200000ULL) { putchar(0xF0|(x>>18)); putchar(0x80|(x>>12)&0xBF); putchar(0x80|(x>>6)&0xBF); putchar(0x80|(x)&0xBF); } else if(x<0x4000000ULL) { putchar(0xF8|(x>>24)); putchar(0x80|(x>>18)&0xBF); putchar(0x80|(x>>12)&0xBF); putchar(0x80|(x>>6)&0xBF); putchar(0x80|(x)&0xBF); } else if(x<0x80000000ULL) { putchar(0xFC|(x>>30)); putchar(0x80|(x>>24)&0xBF); putchar(0x80|(x>>18)&0xBF); putchar(0x80|(x>>12)&0xBF); putchar(0x80|(x>>6)&0xBF); putchar(0x80|(x)&0xBF); } else if(x<0x1000000000ULL) { putchar(0xFE|(x>>36)); putchar(0x80|(x>>30)&0xBF); putchar(0x80|(x>>24)&0xBF); putchar(0x80|(x>>18)&0xBF); putchar(0x80|(x>>12)&0xBF); putchar(0x80|(x>>6)&0xBF); putchar(0x80|(x)&0xBF); } else { exit(1); } } void write_vlq8(ULL x) { int i; for(i=63;i;i-=7) if(x&-(1LL<<i)) putchar(0x80|(x>>i)&0xFF); putchar(x&0x7F); } void write_leb128(ULL x) { while(x&~0x7FULL) { putchar(0x80|x&0xFF); x>>=7; } putchar(x); } void write_utf16_le(ULL x) { if(x>0x10FFFFULL) exit(1); if(x&0x1F0000ULL) { x-=0x10000ULL; write_16bit_le_raw((x>>10)|0xD800); write_16bit_le_raw((x&0x3FF)|0xDC00); } else { write_16bit_le_raw(x); } } void write_utf16_be(ULL x) { if(x>0x10FFFFULL) exit(1); if(x&0x1F0000ULL) { x-=0x10000ULL; write_16bit_be_raw((x>>10)|0xD800); write_16bit_be_raw((x&0x3FF)|0xDC00); } else { write_16bit_be_raw(x); } } void write_translate(ULL x) { int i; for(i=0;i<256;i++) if(translation[i]==x) putchar(i); } void write_hex(ULL x) { printf("%02X",(int)x); } const in_func_t in_func[128]={ ['8']=read_8bit_raw, ['w']=read_16bit_le_raw, ['W']=read_16bit_be_raw, ['d']=read_32bit_le_raw, ['D']=read_32bit_be_raw, ['q']=read_64bit_le_raw, ['Q']=read_64bit_be_raw, ['1']=read_utf8, ['0']=read_utf8, ['V']=read_vlq8, ['v']=read_leb128, ['u']=read_utf16_le, ['U']=read_utf16_be, ['T']=read_translate, ['M']=read_messagepack, ['4']=read_hex, [0]=0 }; const out_func_t out_func[128]={ ['8']=write_8bit_raw, ['w']=write_16bit_le_raw, ['W']=write_16bit_be_raw, ['d']=write_32bit_le_raw, ['D']=write_32bit_be_raw, ['q']=write_64bit_le_raw, ['Q']=write_64bit_be_raw, ['1']=write_utf8, ['0']=write_utf8, ['V']=write_vlq8, ['v']=write_leb128, ['u']=write_utf16_le, ['U']=write_utf16_be, ['T']=write_translate, ['4']=write_hex, [0]=0 }; int main(int argc,char**argv) { int b; int is_lf=0; ULL x; #ifdef _WIN32 _setmode(_fileno(stdin),_O_BINARY); _setmode(_fileno(stdout),_O_BINARY); #endif if(argc<2 || !argv[1][0] || !in_func[argv[1][0]] || !out_func[argv[1][1]]) return 1; in_mode=argv[1][0]; out_mode=argv[1][1]; for(b=2;argv[1][b];b++) options[argv[1][b]&127]=1; if(argc>2) { FILE*fp=fopen(argv[2],"rb"); int i; if(!fp) return 1; fseek(fp,0,SEEK_END); b=ftell(fp)>>8; rewind(fp); for(i=0;i<255;i++) { translation[i]=fgetc(fp); if(b>1) translation[i]=trans_le?(translation[i]|(fgetc(fp)<<8)):((translation[i]<<8)|fgetc(fp)); if(b>2) translation[i]=trans_le?(translation[i]|(fgetc(fp)<<16)):((translation[i]<<8)|fgetc(fp)); if(b>3) translation[i]=trans_le?(translation[i]|(fgetc(fp)<<24)):((translation[i]<<8)|fgetc(fp)); } fclose(fp); } if(bom_out) out_func[out_mode&127](0xFEFF); while(!feof(stdin)) { x=in_func[in_mode&127](); if(bom_in && x!=0xFEFF) return 1; if(is_lf && x==10) { is_lf=0; continue; } if(is_lf=(conv_lf && x==13)) x=10; if(conv_cr && x==10) out_func[out_mode&127](13); if(!bom_in) out_func[out_mode&127](x); bom_in=0; } return 0; }