Mercurial > repo
diff nasmbuild/nasm-2.13rc9/asm/quote.c @ 10554:587a0a262d22
<moonythedwarf> ` cd nasmbuild; tar -xf nasm.tar.gz
author | HackBot |
---|---|
date | Thu, 30 Mar 2017 20:58:41 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/nasmbuild/nasm-2.13rc9/asm/quote.c Thu Mar 30 20:58:41 2017 +0000 @@ -0,0 +1,479 @@ +/* ----------------------------------------------------------------------- * + * + * Copyright 1996-2016 The NASM Authors - All Rights Reserved + * See the file AUTHORS included with the NASM distribution for + * the specific copyright holders. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following + * conditions are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ----------------------------------------------------------------------- */ + +/* + * quote.c + */ + +#include "compiler.h" + +#include <stdlib.h> + +#include "nasmlib.h" +#include "quote.h" + +char *nasm_quote(const char *str, size_t len) +{ + const char *p, *ep; + char c, c1, *q, *nstr; + unsigned char uc; + bool sq_ok, dq_ok; + size_t qlen; + + sq_ok = dq_ok = true; + ep = str+len; + qlen = 0; /* Length if we need `...` quotes */ + for (p = str; p < ep; p++) { + c = *p; + switch (c) { + case '\'': + sq_ok = false; + qlen++; + break; + case '\"': + dq_ok = false; + qlen++; + break; + case '`': + case '\\': + qlen += 2; + break; + default: + if (c < ' ' || c > '~') { + sq_ok = dq_ok = false; + switch (c) { + case '\a': + case '\b': + case '\t': + case '\n': + case '\v': + case '\f': + case '\r': + case 27: + qlen += 2; + break; + default: + c1 = (p+1 < ep) ? p[1] : 0; + if (c1 >= '0' && c1 <= '7') + uc = 0377; /* Must use the full form */ + else + uc = c; + if (uc > 077) + qlen++; + if (uc > 07) + qlen++; + qlen += 2; + break; + } + } else { + qlen++; + } + break; + } + } + + if (sq_ok || dq_ok) { + /* Use '...' or "..." */ + nstr = nasm_malloc(len+3); + nstr[0] = nstr[len+1] = sq_ok ? '\'' : '\"'; + nstr[len+2] = '\0'; + if (len > 0) + memcpy(nstr+1, str, len); + } else { + /* Need to use `...` quoted syntax */ + nstr = nasm_malloc(qlen+3); + q = nstr; + *q++ = '`'; + for (p = str; p < ep; p++) { + c = *p; + switch (c) { + case '`': + case '\\': + *q++ = '\\'; + *q++ = c; + break; + case 7: + *q++ = '\\'; + *q++ = 'a'; + break; + case 8: + *q++ = '\\'; + *q++ = 'b'; + break; + case 9: + *q++ = '\\'; + *q++ = 't'; + break; + case 10: + *q++ = '\\'; + *q++ = 'n'; + break; + case 11: + *q++ = '\\'; + *q++ = 'v'; + break; + case 12: + *q++ = '\\'; + *q++ = 'f'; + break; + case 13: + *q++ = '\\'; + *q++ = 'r'; + break; + case 27: + *q++ = '\\'; + *q++ = 'e'; + break; + default: + if (c < ' ' || c > '~') { + c1 = (p+1 < ep) ? p[1] : 0; + if (c1 >= '0' && c1 <= '7') + uc = 0377; /* Must use the full form */ + else + uc = c; + *q++ = '\\'; + if (uc > 077) + *q++ = ((unsigned char)c >> 6) + '0'; + if (uc > 07) + *q++ = (((unsigned char)c >> 3) & 7) + '0'; + *q++ = ((unsigned char)c & 7) + '0'; + break; + } else { + *q++ = c; + } + break; + } + } + *q++ = '`'; + *q++ = '\0'; + nasm_assert((size_t)(q-nstr) == qlen+3); + } + return nstr; +} + +static char *emit_utf8(char *q, int32_t v) +{ + if (v < 0) { + /* Impossible - do nothing */ + } else if (v <= 0x7f) { + *q++ = v; + } else if (v <= 0x000007ff) { + *q++ = 0xc0 | (v >> 6); + *q++ = 0x80 | (v & 63); + } else if (v <= 0x0000ffff) { + *q++ = 0xe0 | (v >> 12); + *q++ = 0x80 | ((v >> 6) & 63); + *q++ = 0x80 | (v & 63); + } else if (v <= 0x001fffff) { + *q++ = 0xf0 | (v >> 18); + *q++ = 0x80 | ((v >> 12) & 63); + *q++ = 0x80 | ((v >> 6) & 63); + *q++ = 0x80 | (v & 63); + } else if (v <= 0x03ffffff) { + *q++ = 0xf8 | (v >> 24); + *q++ = 0x80 | ((v >> 18) & 63); + *q++ = 0x80 | ((v >> 12) & 63); + *q++ = 0x80 | ((v >> 6) & 63); + *q++ = 0x80 | (v & 63); + } else { + *q++ = 0xfc | (v >> 30); + *q++ = 0x80 | ((v >> 24) & 63); + *q++ = 0x80 | ((v >> 18) & 63); + *q++ = 0x80 | ((v >> 12) & 63); + *q++ = 0x80 | ((v >> 6) & 63); + *q++ = 0x80 | (v & 63); + } + return q; +} + +/* + * Do an *in-place* dequoting of the specified string, returning the + * resulting length (which may be containing embedded nulls.) + * + * In-place replacement is possible since the unquoted length is always + * shorter than or equal to the quoted length. + * + * *ep points to the final quote, or to the null if improperly quoted. + */ +size_t nasm_unquote(char *str, char **ep) +{ + char bq; + char *p, *q; + char *escp = NULL; + char c; + enum unq_state { + st_start, + st_backslash, + st_hex, + st_oct, + st_ucs + } state; + int ndig = 0; + int32_t nval = 0; + + p = q = str; + + bq = *p++; + if (!bq) + return 0; + + switch (bq) { + case '\'': + case '\"': + /* '...' or "..." string */ + while ((c = *p) && c != bq) { + p++; + *q++ = c; + } + *q = '\0'; + break; + + case '`': + /* `...` string */ + state = st_start; + + while ((c = *p)) { + p++; + switch (state) { + case st_start: + switch (c) { + case '\\': + state = st_backslash; + break; + case '`': + p--; + goto out; + default: + *q++ = c; + break; + } + break; + + case st_backslash: + state = st_start; + escp = p; /* Beginning of argument sequence */ + nval = 0; + switch (c) { + case 'a': + *q++ = 7; + break; + case 'b': + *q++ = 8; + break; + case 'e': + *q++ = 27; + break; + case 'f': + *q++ = 12; + break; + case 'n': + *q++ = 10; + break; + case 'r': + *q++ = 13; + break; + case 't': + *q++ = 9; + break; + case 'u': + state = st_ucs; + ndig = 4; + break; + case 'U': + state = st_ucs; + ndig = 8; + break; + case 'v': + *q++ = 11; + break; + case 'x': + case 'X': + state = st_hex; + ndig = 2; + break; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + state = st_oct; + ndig = 2; /* Up to two more digits */ + nval = c - '0'; + break; + default: + *q++ = c; + break; + } + break; + + case st_oct: + if (c >= '0' && c <= '7') { + nval = (nval << 3) + (c - '0'); + if (!--ndig) { + *q++ = nval; + state = st_start; + } + } else { + p--; /* Process this character again */ + *q++ = nval; + state = st_start; + } + break; + + case st_hex: + if ((c >= '0' && c <= '9') || + (c >= 'A' && c <= 'F') || + (c >= 'a' && c <= 'f')) { + nval = (nval << 4) + numvalue(c); + if (!--ndig) { + *q++ = nval; + state = st_start; + } + } else { + p--; /* Process this character again */ + *q++ = (p > escp) ? nval : escp[-1]; + state = st_start; + } + break; + + case st_ucs: + if ((c >= '0' && c <= '9') || + (c >= 'A' && c <= 'F') || + (c >= 'a' && c <= 'f')) { + nval = (nval << 4) + numvalue(c); + if (!--ndig) { + q = emit_utf8(q, nval); + state = st_start; + } + } else { + p--; /* Process this character again */ + if (p > escp) + q = emit_utf8(q, nval); + else + *q++ = escp[-1]; + state = st_start; + } + break; + } + } + switch (state) { + case st_start: + case st_backslash: + break; + case st_oct: + *q++ = nval; + break; + case st_hex: + *q++ = (p > escp) ? nval : escp[-1]; + break; + case st_ucs: + if (p > escp) + q = emit_utf8(q, nval); + else + *q++ = escp[-1]; + break; + } + out: + break; + + default: + /* Not a quoted string, just return the input... */ + p = q = strchr(str, '\0'); + break; + } + + if (ep) + *ep = p; + return q-str; +} + +/* + * Find the end of a quoted string; returns the pointer to the terminating + * character (either the ending quote or the null character, if unterminated.) + */ +char *nasm_skip_string(char *str) +{ + char bq; + char *p; + char c; + enum unq_state { + st_start, + st_backslash + } state; + + bq = str[0]; + if (bq == '\'' || bq == '\"') { + /* '...' or "..." string */ + for (p = str+1; *p && *p != bq; p++) + ; + return p; + } else if (bq == '`') { + /* `...` string */ + state = st_start; + p = str+1; + if (!*p) + return p; + + while ((c = *p++)) { + switch (state) { + case st_start: + switch (c) { + case '\\': + state = st_backslash; + break; + case '`': + return p-1; /* Found the end */ + default: + break; + } + break; + + case st_backslash: + /* + * Note: for the purpose of finding the end of the string, + * all successor states to st_backslash are functionally + * equivalent to st_start, since either a backslash or + * a backquote will force a return to the st_start state. + */ + state = st_start; + break; + } + } + return p-1; /* Unterminated string... */ + } else { + return str; /* Not a string... */ + } +}