Mercurial > repo
comparison nasmbuild/nasm-2.13rc9/asm/parser.c @ 10554:587a0a262d22
<moonythedwarf> ` cd nasmbuild; tar -xf nasm.tar.gz
author | HackBot |
---|---|
date | Thu, 30 Mar 2017 20:58:41 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
10553:93dc2a984de0 | 10554:587a0a262d22 |
---|---|
1 /* ----------------------------------------------------------------------- * | |
2 * | |
3 * Copyright 1996-2017 The NASM Authors - All Rights Reserved | |
4 * See the file AUTHORS included with the NASM distribution for | |
5 * the specific copyright holders. | |
6 * | |
7 * Redistribution and use in source and binary forms, with or without | |
8 * modification, are permitted provided that the following | |
9 * conditions are met: | |
10 * | |
11 * * Redistributions of source code must retain the above copyright | |
12 * notice, this list of conditions and the following disclaimer. | |
13 * * Redistributions in binary form must reproduce the above | |
14 * copyright notice, this list of conditions and the following | |
15 * disclaimer in the documentation and/or other materials provided | |
16 * with the distribution. | |
17 * | |
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND | |
19 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, | |
20 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF | |
21 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | |
22 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR | |
23 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT | |
25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | |
26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR | |
29 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, | |
30 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
31 * | |
32 * ----------------------------------------------------------------------- */ | |
33 | |
34 /* | |
35 * parser.c source line parser for the Netwide Assembler | |
36 */ | |
37 | |
38 #include "compiler.h" | |
39 | |
40 #include <stdio.h> | |
41 #include <stdlib.h> | |
42 #include <stddef.h> | |
43 #include <string.h> | |
44 #include <ctype.h> | |
45 | |
46 #include "nasm.h" | |
47 #include "insns.h" | |
48 #include "nasmlib.h" | |
49 #include "error.h" | |
50 #include "stdscan.h" | |
51 #include "eval.h" | |
52 #include "parser.h" | |
53 #include "float.h" | |
54 #include "assemble.h" | |
55 #include "tables.h" | |
56 | |
57 | |
58 static int is_comma_next(void); | |
59 | |
60 static struct tokenval tokval; | |
61 | |
62 static int prefix_slot(int prefix) | |
63 { | |
64 switch (prefix) { | |
65 case P_WAIT: | |
66 return PPS_WAIT; | |
67 case R_CS: | |
68 case R_DS: | |
69 case R_SS: | |
70 case R_ES: | |
71 case R_FS: | |
72 case R_GS: | |
73 return PPS_SEG; | |
74 case P_LOCK: | |
75 return PPS_LOCK; | |
76 case P_REP: | |
77 case P_REPE: | |
78 case P_REPZ: | |
79 case P_REPNE: | |
80 case P_REPNZ: | |
81 case P_XACQUIRE: | |
82 case P_XRELEASE: | |
83 case P_BND: | |
84 case P_NOBND: | |
85 return PPS_REP; | |
86 case P_O16: | |
87 case P_O32: | |
88 case P_O64: | |
89 case P_OSP: | |
90 return PPS_OSIZE; | |
91 case P_A16: | |
92 case P_A32: | |
93 case P_A64: | |
94 case P_ASP: | |
95 return PPS_ASIZE; | |
96 case P_EVEX: | |
97 case P_VEX3: | |
98 case P_VEX2: | |
99 return PPS_VEX; | |
100 default: | |
101 nasm_panic(0, "Invalid value %d passed to prefix_slot()", prefix); | |
102 return -1; | |
103 } | |
104 } | |
105 | |
106 static void process_size_override(insn *result, operand *op) | |
107 { | |
108 if (tasm_compatible_mode) { | |
109 switch (tokval.t_integer) { | |
110 /* For TASM compatibility a size override inside the | |
111 * brackets changes the size of the operand, not the | |
112 * address type of the operand as it does in standard | |
113 * NASM syntax. Hence: | |
114 * | |
115 * mov eax,[DWORD val] | |
116 * | |
117 * is valid syntax in TASM compatibility mode. Note that | |
118 * you lose the ability to override the default address | |
119 * type for the instruction, but we never use anything | |
120 * but 32-bit flat model addressing in our code. | |
121 */ | |
122 case S_BYTE: | |
123 op->type |= BITS8; | |
124 break; | |
125 case S_WORD: | |
126 op->type |= BITS16; | |
127 break; | |
128 case S_DWORD: | |
129 case S_LONG: | |
130 op->type |= BITS32; | |
131 break; | |
132 case S_QWORD: | |
133 op->type |= BITS64; | |
134 break; | |
135 case S_TWORD: | |
136 op->type |= BITS80; | |
137 break; | |
138 case S_OWORD: | |
139 op->type |= BITS128; | |
140 break; | |
141 default: | |
142 nasm_error(ERR_NONFATAL, | |
143 "invalid operand size specification"); | |
144 break; | |
145 } | |
146 } else { | |
147 /* Standard NASM compatible syntax */ | |
148 switch (tokval.t_integer) { | |
149 case S_NOSPLIT: | |
150 op->eaflags |= EAF_TIMESTWO; | |
151 break; | |
152 case S_REL: | |
153 op->eaflags |= EAF_REL; | |
154 break; | |
155 case S_ABS: | |
156 op->eaflags |= EAF_ABS; | |
157 break; | |
158 case S_BYTE: | |
159 op->disp_size = 8; | |
160 op->eaflags |= EAF_BYTEOFFS; | |
161 break; | |
162 case P_A16: | |
163 case P_A32: | |
164 case P_A64: | |
165 if (result->prefixes[PPS_ASIZE] && | |
166 result->prefixes[PPS_ASIZE] != tokval.t_integer) | |
167 nasm_error(ERR_NONFATAL, | |
168 "conflicting address size specifications"); | |
169 else | |
170 result->prefixes[PPS_ASIZE] = tokval.t_integer; | |
171 break; | |
172 case S_WORD: | |
173 op->disp_size = 16; | |
174 op->eaflags |= EAF_WORDOFFS; | |
175 break; | |
176 case S_DWORD: | |
177 case S_LONG: | |
178 op->disp_size = 32; | |
179 op->eaflags |= EAF_WORDOFFS; | |
180 break; | |
181 case S_QWORD: | |
182 op->disp_size = 64; | |
183 op->eaflags |= EAF_WORDOFFS; | |
184 break; | |
185 default: | |
186 nasm_error(ERR_NONFATAL, "invalid size specification in" | |
187 " effective address"); | |
188 break; | |
189 } | |
190 } | |
191 } | |
192 | |
193 /* | |
194 * when two or more decorators follow a register operand, | |
195 * consecutive decorators are parsed here. | |
196 * opmask and zeroing decorators can be placed in any order. | |
197 * e.g. zmm1 {k2}{z} or zmm2 {z}{k3} | |
198 * decorator(s) are placed at the end of an operand. | |
199 */ | |
200 static bool parse_braces(decoflags_t *decoflags) | |
201 { | |
202 int i; | |
203 bool recover = false; | |
204 | |
205 i = tokval.t_type; | |
206 do { | |
207 if (i == TOKEN_OPMASK) { | |
208 if (*decoflags & OPMASK_MASK) { | |
209 nasm_error(ERR_NONFATAL, "opmask k%"PRIu64" is already set", | |
210 *decoflags & OPMASK_MASK); | |
211 *decoflags &= ~OPMASK_MASK; | |
212 } | |
213 *decoflags |= VAL_OPMASK(nasm_regvals[tokval.t_integer]); | |
214 } else if (i == TOKEN_DECORATOR) { | |
215 switch (tokval.t_integer) { | |
216 case BRC_Z: | |
217 /* | |
218 * according to AVX512 spec, only zeroing/merging decorator | |
219 * is supported with opmask | |
220 */ | |
221 *decoflags |= GEN_Z(0); | |
222 break; | |
223 default: | |
224 nasm_error(ERR_NONFATAL, "{%s} is not an expected decorator", | |
225 tokval.t_charptr); | |
226 break; | |
227 } | |
228 } else if (i == ',' || i == TOKEN_EOS){ | |
229 break; | |
230 } else { | |
231 nasm_error(ERR_NONFATAL, "only a series of valid decorators" | |
232 " expected"); | |
233 recover = true; | |
234 break; | |
235 } | |
236 i = stdscan(NULL, &tokval); | |
237 } while(1); | |
238 | |
239 return recover; | |
240 } | |
241 | |
242 static int parse_mref(operand *op, const expr *e) | |
243 { | |
244 int b, i, s; /* basereg, indexreg, scale */ | |
245 int64_t o; /* offset */ | |
246 | |
247 b = i = -1; | |
248 o = s = 0; | |
249 op->segment = op->wrt = NO_SEG; | |
250 | |
251 if (e->type && e->type <= EXPR_REG_END) { /* this bit's a register */ | |
252 bool is_gpr = is_class(REG_GPR,nasm_reg_flags[e->type]); | |
253 | |
254 if (is_gpr && e->value == 1) | |
255 b = e->type; /* It can be basereg */ | |
256 else /* No, it has to be indexreg */ | |
257 i = e->type, s = e->value; | |
258 e++; | |
259 } | |
260 if (e->type && e->type <= EXPR_REG_END) { /* it's a 2nd register */ | |
261 bool is_gpr = is_class(REG_GPR,nasm_reg_flags[e->type]); | |
262 | |
263 if (b != -1) /* If the first was the base, ... */ | |
264 i = e->type, s = e->value; /* second has to be indexreg */ | |
265 | |
266 else if (!is_gpr || e->value != 1) { | |
267 /* If both want to be index */ | |
268 nasm_error(ERR_NONFATAL, | |
269 "invalid effective address: two index registers"); | |
270 return -1; | |
271 } else | |
272 b = e->type; | |
273 e++; | |
274 } | |
275 | |
276 if (e->type) { /* is there an offset? */ | |
277 if (e->type <= EXPR_REG_END) { /* in fact, is there an error? */ | |
278 nasm_error(ERR_NONFATAL, | |
279 "invalid effective address: impossible register"); | |
280 return -1; | |
281 } else { | |
282 if (e->type == EXPR_UNKNOWN) { | |
283 op->opflags |= OPFLAG_UNKNOWN; | |
284 o = 0; /* doesn't matter what */ | |
285 while (e->type) | |
286 e++; /* go to the end of the line */ | |
287 } else { | |
288 if (e->type == EXPR_SIMPLE) { | |
289 o = e->value; | |
290 e++; | |
291 } | |
292 if (e->type == EXPR_WRT) { | |
293 op->wrt = e->value; | |
294 e++; | |
295 } | |
296 /* | |
297 * Look for a segment base type. | |
298 */ | |
299 for (; e->type; e++) { | |
300 if (!e->value) | |
301 continue; | |
302 | |
303 if (e->type <= EXPR_REG_END) { | |
304 nasm_error(ERR_NONFATAL, | |
305 "invalid effective address: too many registers"); | |
306 return -1; | |
307 } else if (e->type < EXPR_SEGBASE) { | |
308 nasm_error(ERR_NONFATAL, | |
309 "invalid effective address: bad subexpression type"); | |
310 return -1; | |
311 } else if (e->value == 1) { | |
312 if (op->segment != NO_SEG) { | |
313 nasm_error(ERR_NONFATAL, | |
314 "invalid effective address: multiple base segments"); | |
315 return -1; | |
316 } | |
317 op->segment = e->type - EXPR_SEGBASE; | |
318 } else if (e->value == -1 && | |
319 e->type == location.segment + EXPR_SEGBASE && | |
320 !(op->opflags & OPFLAG_RELATIVE)) { | |
321 op->opflags |= OPFLAG_RELATIVE; | |
322 } else { | |
323 nasm_error(ERR_NONFATAL, | |
324 "invalid effective address: impossible segment base multiplier"); | |
325 return -1; | |
326 } | |
327 } | |
328 } | |
329 } | |
330 } | |
331 | |
332 nasm_assert(!e->type); /* We should be at the end */ | |
333 | |
334 op->basereg = b; | |
335 op->indexreg = i; | |
336 op->scale = s; | |
337 op->offset = o; | |
338 return 0; | |
339 } | |
340 | |
341 static void mref_set_optype(operand *op) | |
342 { | |
343 int b = op->basereg; | |
344 int i = op->indexreg; | |
345 int s = op->scale; | |
346 | |
347 /* It is memory, but it can match any r/m operand */ | |
348 op->type |= MEMORY_ANY; | |
349 | |
350 if (b == -1 && (i == -1 || s == 0)) { | |
351 int is_rel = globalbits == 64 && | |
352 !(op->eaflags & EAF_ABS) && | |
353 ((globalrel && | |
354 !(op->eaflags & EAF_FSGS)) || | |
355 (op->eaflags & EAF_REL)); | |
356 | |
357 op->type |= is_rel ? IP_REL : MEM_OFFS; | |
358 } | |
359 | |
360 if (i != -1) { | |
361 opflags_t iclass = nasm_reg_flags[i]; | |
362 | |
363 if (is_class(XMMREG,iclass)) | |
364 op->type |= XMEM; | |
365 else if (is_class(YMMREG,iclass)) | |
366 op->type |= YMEM; | |
367 else if (is_class(ZMMREG,iclass)) | |
368 op->type |= ZMEM; | |
369 } | |
370 } | |
371 | |
372 /* | |
373 * Convert an expression vector returned from evaluate() into an | |
374 * extop structure. Return zero on success. | |
375 */ | |
376 static int value_to_extop(expr * vect, extop *eop, int32_t myseg) | |
377 { | |
378 eop->type = EOT_DB_NUMBER; | |
379 eop->offset = 0; | |
380 eop->segment = eop->wrt = NO_SEG; | |
381 eop->relative = false; | |
382 | |
383 for (; vect->type; vect++) { | |
384 if (!vect->value) /* zero term, safe to ignore */ | |
385 continue; | |
386 | |
387 if (vect->type <= EXPR_REG_END) /* false if a register is present */ | |
388 return -1; | |
389 | |
390 if (vect->type == EXPR_UNKNOWN) /* something we can't resolve yet */ | |
391 return 0; | |
392 | |
393 if (vect->type == EXPR_SIMPLE) { | |
394 /* Simple number expression */ | |
395 eop->offset += vect->value; | |
396 continue; | |
397 } | |
398 if (eop->wrt == NO_SEG && !eop->relative && vect->type == EXPR_WRT) { | |
399 /* WRT term */ | |
400 eop->wrt = vect->value; | |
401 continue; | |
402 } | |
403 | |
404 if (!eop->relative && | |
405 vect->type == EXPR_SEGBASE + myseg && vect->value == -1) { | |
406 /* Expression of the form: foo - $ */ | |
407 eop->relative = true; | |
408 continue; | |
409 } | |
410 | |
411 if (eop->segment == NO_SEG && vect->type >= EXPR_SEGBASE && | |
412 vect->value == 1) { | |
413 eop->segment = vect->type - EXPR_SEGBASE; | |
414 continue; | |
415 } | |
416 | |
417 /* Otherwise, badness */ | |
418 return -1; | |
419 } | |
420 | |
421 /* We got to the end and it was all okay */ | |
422 return 0; | |
423 } | |
424 | |
425 insn *parse_line(int pass, char *buffer, insn *result, ldfunc ldef) | |
426 { | |
427 bool insn_is_label = false; | |
428 struct eval_hints hints; | |
429 int opnum; | |
430 int critical; | |
431 bool first; | |
432 bool recover; | |
433 int i; | |
434 | |
435 restart_parse: | |
436 first = true; | |
437 result->forw_ref = false; | |
438 | |
439 stdscan_reset(); | |
440 stdscan_set(buffer); | |
441 i = stdscan(NULL, &tokval); | |
442 | |
443 result->label = NULL; /* Assume no label */ | |
444 result->eops = NULL; /* must do this, whatever happens */ | |
445 result->operands = 0; /* must initialize this */ | |
446 result->evex_rm = 0; /* Ensure EVEX rounding mode is reset */ | |
447 result->evex_brerop = -1; /* Reset EVEX broadcasting/ER op position */ | |
448 | |
449 /* Ignore blank lines */ | |
450 if (i == TOKEN_EOS) | |
451 goto fail; | |
452 | |
453 if (i != TOKEN_ID && | |
454 i != TOKEN_INSN && | |
455 i != TOKEN_PREFIX && | |
456 (i != TOKEN_REG || !IS_SREG(tokval.t_integer))) { | |
457 nasm_error(ERR_NONFATAL, | |
458 "label or instruction expected at start of line"); | |
459 goto fail; | |
460 } | |
461 | |
462 if (i == TOKEN_ID || (insn_is_label && i == TOKEN_INSN)) { | |
463 /* there's a label here */ | |
464 first = false; | |
465 result->label = tokval.t_charptr; | |
466 i = stdscan(NULL, &tokval); | |
467 if (i == ':') { /* skip over the optional colon */ | |
468 i = stdscan(NULL, &tokval); | |
469 } else if (i == 0) { | |
470 nasm_error(ERR_WARNING | ERR_WARN_OL | ERR_PASS1, | |
471 "label alone on a line without a colon might be in error"); | |
472 } | |
473 if (i != TOKEN_INSN || tokval.t_integer != I_EQU) { | |
474 /* | |
475 * FIXME: location.segment could be NO_SEG, in which case | |
476 * it is possible we should be passing 'absolute.segment'. Look into this. | |
477 * Work out whether that is *really* what we should be doing. | |
478 * Generally fix things. I think this is right as it is, but | |
479 * am still not certain. | |
480 */ | |
481 ldef(result->label, in_absolute ? absolute.segment : location.segment, | |
482 location.offset, NULL, true, false); | |
483 } | |
484 } | |
485 | |
486 /* Just a label here */ | |
487 if (i == TOKEN_EOS) | |
488 goto fail; | |
489 | |
490 nasm_static_assert(P_none == 0); | |
491 memset(result->prefixes, P_none, sizeof(result->prefixes)); | |
492 result->times = 1L; | |
493 | |
494 while (i == TOKEN_PREFIX || | |
495 (i == TOKEN_REG && IS_SREG(tokval.t_integer))) { | |
496 first = false; | |
497 | |
498 /* | |
499 * Handle special case: the TIMES prefix. | |
500 */ | |
501 if (i == TOKEN_PREFIX && tokval.t_integer == P_TIMES) { | |
502 expr *value; | |
503 | |
504 i = stdscan(NULL, &tokval); | |
505 value = evaluate(stdscan, NULL, &tokval, NULL, pass0, NULL); | |
506 i = tokval.t_type; | |
507 if (!value) /* Error in evaluator */ | |
508 goto fail; | |
509 if (!is_simple(value)) { | |
510 nasm_error(ERR_NONFATAL, | |
511 "non-constant argument supplied to TIMES"); | |
512 result->times = 1L; | |
513 } else { | |
514 result->times = value->value; | |
515 if (value->value < 0 && pass0 == 2) { | |
516 nasm_error(ERR_NONFATAL, "TIMES value %"PRId64" is negative", | |
517 value->value); | |
518 result->times = 0; | |
519 } | |
520 } | |
521 } else { | |
522 int slot = prefix_slot(tokval.t_integer); | |
523 if (result->prefixes[slot]) { | |
524 if (result->prefixes[slot] == tokval.t_integer) | |
525 nasm_error(ERR_WARNING | ERR_PASS1, | |
526 "instruction has redundant prefixes"); | |
527 else | |
528 nasm_error(ERR_NONFATAL, | |
529 "instruction has conflicting prefixes"); | |
530 } | |
531 result->prefixes[slot] = tokval.t_integer; | |
532 i = stdscan(NULL, &tokval); | |
533 } | |
534 } | |
535 | |
536 if (i != TOKEN_INSN) { | |
537 int j; | |
538 enum prefixes pfx; | |
539 | |
540 for (j = 0; j < MAXPREFIX; j++) { | |
541 if ((pfx = result->prefixes[j]) != P_none) | |
542 break; | |
543 } | |
544 | |
545 if (i == 0 && pfx != P_none) { | |
546 /* | |
547 * Instruction prefixes are present, but no actual | |
548 * instruction. This is allowed: at this point we | |
549 * invent a notional instruction of RESB 0. | |
550 */ | |
551 result->opcode = I_RESB; | |
552 result->operands = 1; | |
553 result->oprs[0].type = IMMEDIATE; | |
554 result->oprs[0].offset = 0L; | |
555 result->oprs[0].segment = result->oprs[0].wrt = NO_SEG; | |
556 return result; | |
557 } else { | |
558 nasm_error(ERR_NONFATAL, "parser: instruction expected"); | |
559 goto fail; | |
560 } | |
561 } | |
562 | |
563 result->opcode = tokval.t_integer; | |
564 result->condition = tokval.t_inttwo; | |
565 | |
566 /* | |
567 * INCBIN cannot be satisfied with incorrectly | |
568 * evaluated operands, since the correct values _must_ be known | |
569 * on the first pass. Hence, even in pass one, we set the | |
570 * `critical' flag on calling evaluate(), so that it will bomb | |
571 * out on undefined symbols. | |
572 */ | |
573 if (result->opcode == I_INCBIN) { | |
574 critical = (pass0 < 2 ? 1 : 2); | |
575 | |
576 } else | |
577 critical = (pass == 2 ? 2 : 0); | |
578 | |
579 if (result->opcode == I_DB || result->opcode == I_DW || | |
580 result->opcode == I_DD || result->opcode == I_DQ || | |
581 result->opcode == I_DT || result->opcode == I_DO || | |
582 result->opcode == I_DY || result->opcode == I_DZ || | |
583 result->opcode == I_INCBIN) { | |
584 extop *eop, **tail = &result->eops, **fixptr; | |
585 int oper_num = 0; | |
586 int32_t sign; | |
587 | |
588 result->eops_float = false; | |
589 | |
590 /* | |
591 * Begin to read the DB/DW/DD/DQ/DT/DO/DY/DZ/INCBIN operands. | |
592 */ | |
593 while (1) { | |
594 i = stdscan(NULL, &tokval); | |
595 if (i == TOKEN_EOS) | |
596 break; | |
597 else if (first && i == ':') { | |
598 insn_is_label = true; | |
599 goto restart_parse; | |
600 } | |
601 first = false; | |
602 fixptr = tail; | |
603 eop = *tail = nasm_malloc(sizeof(extop)); | |
604 tail = &eop->next; | |
605 eop->next = NULL; | |
606 eop->type = EOT_NOTHING; | |
607 oper_num++; | |
608 sign = +1; | |
609 | |
610 /* | |
611 * is_comma_next() here is to distinguish this from | |
612 * a string used as part of an expression... | |
613 */ | |
614 if (i == TOKEN_STR && is_comma_next()) { | |
615 eop->type = EOT_DB_STRING; | |
616 eop->stringval = tokval.t_charptr; | |
617 eop->stringlen = tokval.t_inttwo; | |
618 i = stdscan(NULL, &tokval); /* eat the comma */ | |
619 } else if (i == TOKEN_STRFUNC) { | |
620 bool parens = false; | |
621 const char *funcname = tokval.t_charptr; | |
622 enum strfunc func = tokval.t_integer; | |
623 i = stdscan(NULL, &tokval); | |
624 if (i == '(') { | |
625 parens = true; | |
626 i = stdscan(NULL, &tokval); | |
627 } | |
628 if (i != TOKEN_STR) { | |
629 nasm_error(ERR_NONFATAL, | |
630 "%s must be followed by a string constant", | |
631 funcname); | |
632 eop->type = EOT_NOTHING; | |
633 } else { | |
634 eop->type = EOT_DB_STRING_FREE; | |
635 eop->stringlen = | |
636 string_transform(tokval.t_charptr, tokval.t_inttwo, | |
637 &eop->stringval, func); | |
638 if (eop->stringlen == (size_t)-1) { | |
639 nasm_error(ERR_NONFATAL, "invalid string for transform"); | |
640 eop->type = EOT_NOTHING; | |
641 } | |
642 } | |
643 if (parens && i && i != ')') { | |
644 i = stdscan(NULL, &tokval); | |
645 if (i != ')') { | |
646 nasm_error(ERR_NONFATAL, "unterminated %s function", | |
647 funcname); | |
648 } | |
649 } | |
650 if (i && i != ',') | |
651 i = stdscan(NULL, &tokval); | |
652 } else if (i == '-' || i == '+') { | |
653 char *save = stdscan_get(); | |
654 int token = i; | |
655 sign = (i == '-') ? -1 : 1; | |
656 i = stdscan(NULL, &tokval); | |
657 if (i != TOKEN_FLOAT) { | |
658 stdscan_set(save); | |
659 i = tokval.t_type = token; | |
660 goto is_expression; | |
661 } else { | |
662 goto is_float; | |
663 } | |
664 } else if (i == TOKEN_FLOAT) { | |
665 is_float: | |
666 eop->type = EOT_DB_STRING; | |
667 result->eops_float = true; | |
668 | |
669 eop->stringlen = idata_bytes(result->opcode); | |
670 if (eop->stringlen > 16) { | |
671 nasm_error(ERR_NONFATAL, "floating-point constant" | |
672 " encountered in DY or DZ instruction"); | |
673 eop->stringlen = 0; | |
674 } else if (eop->stringlen < 1) { | |
675 nasm_error(ERR_NONFATAL, "floating-point constant" | |
676 " encountered in unknown instruction"); | |
677 /* | |
678 * fix suggested by Pedro Gimeno... original line was: | |
679 * eop->type = EOT_NOTHING; | |
680 */ | |
681 eop->stringlen = 0; | |
682 } | |
683 | |
684 eop = nasm_realloc(eop, sizeof(extop) + eop->stringlen); | |
685 tail = &eop->next; | |
686 *fixptr = eop; | |
687 eop->stringval = (char *)eop + sizeof(extop); | |
688 if (!eop->stringlen || | |
689 !float_const(tokval.t_charptr, sign, | |
690 (uint8_t *)eop->stringval, eop->stringlen)) | |
691 eop->type = EOT_NOTHING; | |
692 i = stdscan(NULL, &tokval); /* eat the comma */ | |
693 } else { | |
694 /* anything else, assume it is an expression */ | |
695 expr *value; | |
696 | |
697 is_expression: | |
698 value = evaluate(stdscan, NULL, &tokval, NULL, | |
699 critical, NULL); | |
700 i = tokval.t_type; | |
701 if (!value) /* Error in evaluator */ | |
702 goto fail; | |
703 if (value_to_extop(value, eop, location.segment)) { | |
704 nasm_error(ERR_NONFATAL, | |
705 "operand %d: expression is not simple or relocatable", | |
706 oper_num); | |
707 } | |
708 } | |
709 | |
710 /* | |
711 * We're about to call stdscan(), which will eat the | |
712 * comma that we're currently sitting on between | |
713 * arguments. However, we'd better check first that it | |
714 * _is_ a comma. | |
715 */ | |
716 if (i == TOKEN_EOS) /* also could be EOL */ | |
717 break; | |
718 if (i != ',') { | |
719 nasm_error(ERR_NONFATAL, "comma expected after operand %d", | |
720 oper_num); | |
721 goto fail; | |
722 } | |
723 } | |
724 | |
725 if (result->opcode == I_INCBIN) { | |
726 /* | |
727 * Correct syntax for INCBIN is that there should be | |
728 * one string operand, followed by one or two numeric | |
729 * operands. | |
730 */ | |
731 if (!result->eops || result->eops->type != EOT_DB_STRING) | |
732 nasm_error(ERR_NONFATAL, "`incbin' expects a file name"); | |
733 else if (result->eops->next && | |
734 result->eops->next->type != EOT_DB_NUMBER) | |
735 nasm_error(ERR_NONFATAL, "`incbin': second parameter is" | |
736 " non-numeric"); | |
737 else if (result->eops->next && result->eops->next->next && | |
738 result->eops->next->next->type != EOT_DB_NUMBER) | |
739 nasm_error(ERR_NONFATAL, "`incbin': third parameter is" | |
740 " non-numeric"); | |
741 else if (result->eops->next && result->eops->next->next && | |
742 result->eops->next->next->next) | |
743 nasm_error(ERR_NONFATAL, | |
744 "`incbin': more than three parameters"); | |
745 else | |
746 return result; | |
747 /* | |
748 * If we reach here, one of the above errors happened. | |
749 * Throw the instruction away. | |
750 */ | |
751 goto fail; | |
752 } else /* DB ... */ if (oper_num == 0) | |
753 nasm_error(ERR_WARNING | ERR_PASS1, | |
754 "no operand for data declaration"); | |
755 else | |
756 result->operands = oper_num; | |
757 | |
758 return result; | |
759 } | |
760 | |
761 /* | |
762 * Now we begin to parse the operands. There may be up to four | |
763 * of these, separated by commas, and terminated by a zero token. | |
764 */ | |
765 | |
766 for (opnum = 0; opnum < MAX_OPERANDS; opnum++) { | |
767 operand *op = &result->oprs[opnum]; | |
768 expr *value; /* used most of the time */ | |
769 bool mref; /* is this going to be a memory ref? */ | |
770 bool bracket; /* is it a [] mref, or a & mref? */ | |
771 bool mib; /* compound (mib) mref? */ | |
772 int setsize = 0; | |
773 decoflags_t brace_flags = 0; /* flags for decorators in braces */ | |
774 | |
775 op->disp_size = 0; /* have to zero this whatever */ | |
776 op->eaflags = 0; /* and this */ | |
777 op->opflags = 0; | |
778 op->decoflags = 0; | |
779 | |
780 i = stdscan(NULL, &tokval); | |
781 if (i == TOKEN_EOS) | |
782 break; /* end of operands: get out of here */ | |
783 else if (first && i == ':') { | |
784 insn_is_label = true; | |
785 goto restart_parse; | |
786 } | |
787 first = false; | |
788 op->type = 0; /* so far, no override */ | |
789 while (i == TOKEN_SPECIAL) { /* size specifiers */ | |
790 switch (tokval.t_integer) { | |
791 case S_BYTE: | |
792 if (!setsize) /* we want to use only the first */ | |
793 op->type |= BITS8; | |
794 setsize = 1; | |
795 break; | |
796 case S_WORD: | |
797 if (!setsize) | |
798 op->type |= BITS16; | |
799 setsize = 1; | |
800 break; | |
801 case S_DWORD: | |
802 case S_LONG: | |
803 if (!setsize) | |
804 op->type |= BITS32; | |
805 setsize = 1; | |
806 break; | |
807 case S_QWORD: | |
808 if (!setsize) | |
809 op->type |= BITS64; | |
810 setsize = 1; | |
811 break; | |
812 case S_TWORD: | |
813 if (!setsize) | |
814 op->type |= BITS80; | |
815 setsize = 1; | |
816 break; | |
817 case S_OWORD: | |
818 if (!setsize) | |
819 op->type |= BITS128; | |
820 setsize = 1; | |
821 break; | |
822 case S_YWORD: | |
823 if (!setsize) | |
824 op->type |= BITS256; | |
825 setsize = 1; | |
826 break; | |
827 case S_ZWORD: | |
828 if (!setsize) | |
829 op->type |= BITS512; | |
830 setsize = 1; | |
831 break; | |
832 case S_TO: | |
833 op->type |= TO; | |
834 break; | |
835 case S_STRICT: | |
836 op->type |= STRICT; | |
837 break; | |
838 case S_FAR: | |
839 op->type |= FAR; | |
840 break; | |
841 case S_NEAR: | |
842 op->type |= NEAR; | |
843 break; | |
844 case S_SHORT: | |
845 op->type |= SHORT; | |
846 break; | |
847 default: | |
848 nasm_error(ERR_NONFATAL, "invalid operand size specification"); | |
849 } | |
850 i = stdscan(NULL, &tokval); | |
851 } | |
852 | |
853 if (i == '[' || i == '&') { /* memory reference */ | |
854 mref = true; | |
855 bracket = (i == '['); | |
856 i = stdscan(NULL, &tokval); /* then skip the colon */ | |
857 while (i == TOKEN_SPECIAL || i == TOKEN_PREFIX) { | |
858 process_size_override(result, op); | |
859 i = stdscan(NULL, &tokval); | |
860 } | |
861 /* when a comma follows an opening bracket - [ , eax*4] */ | |
862 if (i == ',') { | |
863 /* treat as if there is a zero displacement virtually */ | |
864 tokval.t_type = TOKEN_NUM; | |
865 tokval.t_integer = 0; | |
866 stdscan_set(stdscan_get() - 1); /* rewind the comma */ | |
867 } | |
868 } else { /* immediate operand, or register */ | |
869 mref = false; | |
870 bracket = false; /* placate optimisers */ | |
871 } | |
872 | |
873 if ((op->type & FAR) && !mref && | |
874 result->opcode != I_JMP && result->opcode != I_CALL) { | |
875 nasm_error(ERR_NONFATAL, "invalid use of FAR operand specifier"); | |
876 } | |
877 | |
878 value = evaluate(stdscan, NULL, &tokval, | |
879 &op->opflags, critical, &hints); | |
880 i = tokval.t_type; | |
881 if (op->opflags & OPFLAG_FORWARD) { | |
882 result->forw_ref = true; | |
883 } | |
884 if (!value) /* Error in evaluator */ | |
885 goto fail; | |
886 if (i == ':' && mref) { /* it was seg:offset */ | |
887 /* | |
888 * Process the segment override. | |
889 */ | |
890 if (value[1].type != 0 || | |
891 value->value != 1 || | |
892 !IS_SREG(value->type)) | |
893 nasm_error(ERR_NONFATAL, "invalid segment override"); | |
894 else if (result->prefixes[PPS_SEG]) | |
895 nasm_error(ERR_NONFATAL, | |
896 "instruction has conflicting segment overrides"); | |
897 else { | |
898 result->prefixes[PPS_SEG] = value->type; | |
899 if (IS_FSGS(value->type)) | |
900 op->eaflags |= EAF_FSGS; | |
901 } | |
902 | |
903 i = stdscan(NULL, &tokval); /* then skip the colon */ | |
904 while (i == TOKEN_SPECIAL || i == TOKEN_PREFIX) { | |
905 process_size_override(result, op); | |
906 i = stdscan(NULL, &tokval); | |
907 } | |
908 value = evaluate(stdscan, NULL, &tokval, | |
909 &op->opflags, critical, &hints); | |
910 i = tokval.t_type; | |
911 if (op->opflags & OPFLAG_FORWARD) { | |
912 result->forw_ref = true; | |
913 } | |
914 /* and get the offset */ | |
915 if (!value) /* Error in evaluator */ | |
916 goto fail; | |
917 } | |
918 | |
919 mib = false; | |
920 if (mref && bracket && i == ',') { | |
921 /* [seg:base+offset,index*scale] syntax (mib) */ | |
922 | |
923 operand o1, o2; /* Partial operands */ | |
924 | |
925 if (parse_mref(&o1, value)) | |
926 goto fail; | |
927 | |
928 i = stdscan(NULL, &tokval); /* Eat comma */ | |
929 value = evaluate(stdscan, NULL, &tokval, &op->opflags, | |
930 critical, &hints); | |
931 i = tokval.t_type; | |
932 if (!value) | |
933 goto fail; | |
934 | |
935 if (parse_mref(&o2, value)) | |
936 goto fail; | |
937 | |
938 if (o2.basereg != -1 && o2.indexreg == -1) { | |
939 o2.indexreg = o2.basereg; | |
940 o2.scale = 1; | |
941 o2.basereg = -1; | |
942 } | |
943 | |
944 if (o1.indexreg != -1 || o2.basereg != -1 || o2.offset != 0 || | |
945 o2.segment != NO_SEG || o2.wrt != NO_SEG) { | |
946 nasm_error(ERR_NONFATAL, "invalid mib expression"); | |
947 goto fail; | |
948 } | |
949 | |
950 op->basereg = o1.basereg; | |
951 op->indexreg = o2.indexreg; | |
952 op->scale = o2.scale; | |
953 op->offset = o1.offset; | |
954 op->segment = o1.segment; | |
955 op->wrt = o1.wrt; | |
956 | |
957 if (op->basereg != -1) { | |
958 op->hintbase = op->basereg; | |
959 op->hinttype = EAH_MAKEBASE; | |
960 } else if (op->indexreg != -1) { | |
961 op->hintbase = op->indexreg; | |
962 op->hinttype = EAH_NOTBASE; | |
963 } else { | |
964 op->hintbase = -1; | |
965 op->hinttype = EAH_NOHINT; | |
966 } | |
967 | |
968 mib = true; | |
969 } | |
970 | |
971 recover = false; | |
972 if (mref && bracket) { /* find ] at the end */ | |
973 if (i != ']') { | |
974 nasm_error(ERR_NONFATAL, "parser: expecting ]"); | |
975 recover = true; | |
976 } else { /* we got the required ] */ | |
977 i = stdscan(NULL, &tokval); | |
978 if ((i == TOKEN_DECORATOR) || (i == TOKEN_OPMASK)) { | |
979 /* | |
980 * according to AVX512 spec, broacast or opmask decorator | |
981 * is expected for memory reference operands | |
982 */ | |
983 if (tokval.t_flag & TFLAG_BRDCAST) { | |
984 brace_flags |= GEN_BRDCAST(0) | | |
985 VAL_BRNUM(tokval.t_integer - BRC_1TO2); | |
986 i = stdscan(NULL, &tokval); | |
987 } else if (i == TOKEN_OPMASK) { | |
988 brace_flags |= VAL_OPMASK(nasm_regvals[tokval.t_integer]); | |
989 i = stdscan(NULL, &tokval); | |
990 } else { | |
991 nasm_error(ERR_NONFATAL, "broadcast or opmask " | |
992 "decorator expected inside braces"); | |
993 recover = true; | |
994 } | |
995 } | |
996 | |
997 if (i != 0 && i != ',') { | |
998 nasm_error(ERR_NONFATAL, "comma or end of line expected"); | |
999 recover = true; | |
1000 } | |
1001 } | |
1002 } else { /* immediate operand */ | |
1003 if (i != 0 && i != ',' && i != ':' && | |
1004 i != TOKEN_DECORATOR && i != TOKEN_OPMASK) { | |
1005 nasm_error(ERR_NONFATAL, "comma, colon, decorator or end of " | |
1006 "line expected after operand"); | |
1007 recover = true; | |
1008 } else if (i == ':') { | |
1009 op->type |= COLON; | |
1010 } else if (i == TOKEN_DECORATOR || i == TOKEN_OPMASK) { | |
1011 /* parse opmask (and zeroing) after an operand */ | |
1012 recover = parse_braces(&brace_flags); | |
1013 } | |
1014 } | |
1015 if (recover) { | |
1016 do { /* error recovery */ | |
1017 i = stdscan(NULL, &tokval); | |
1018 } while (i != 0 && i != ','); | |
1019 } | |
1020 | |
1021 /* | |
1022 * now convert the exprs returned from evaluate() | |
1023 * into operand descriptions... | |
1024 */ | |
1025 op->decoflags |= brace_flags; | |
1026 | |
1027 if (mref) { /* it's a memory reference */ | |
1028 /* A mib reference was fully parsed already */ | |
1029 if (!mib) { | |
1030 if (parse_mref(op, value)) | |
1031 goto fail; | |
1032 op->hintbase = hints.base; | |
1033 op->hinttype = hints.type; | |
1034 } | |
1035 mref_set_optype(op); | |
1036 } else { /* it's not a memory reference */ | |
1037 if (is_just_unknown(value)) { /* it's immediate but unknown */ | |
1038 op->type |= IMMEDIATE; | |
1039 op->opflags |= OPFLAG_UNKNOWN; | |
1040 op->offset = 0; /* don't care */ | |
1041 op->segment = NO_SEG; /* don't care again */ | |
1042 op->wrt = NO_SEG; /* still don't care */ | |
1043 | |
1044 if(optimizing >= 0 && !(op->type & STRICT)) { | |
1045 /* Be optimistic */ | |
1046 op->type |= | |
1047 UNITY | SBYTEWORD | SBYTEDWORD | UDWORD | SDWORD; | |
1048 } | |
1049 } else if (is_reloc(value)) { /* it's immediate */ | |
1050 uint64_t n = reloc_value(value); | |
1051 | |
1052 op->type |= IMMEDIATE; | |
1053 op->offset = n; | |
1054 op->segment = reloc_seg(value); | |
1055 op->wrt = reloc_wrt(value); | |
1056 op->opflags |= is_self_relative(value) ? OPFLAG_RELATIVE : 0; | |
1057 | |
1058 if (is_simple(value)) { | |
1059 if (n == 1) | |
1060 op->type |= UNITY; | |
1061 if (optimizing >= 0 && !(op->type & STRICT)) { | |
1062 if ((uint32_t) (n + 128) <= 255) | |
1063 op->type |= SBYTEDWORD; | |
1064 if ((uint16_t) (n + 128) <= 255) | |
1065 op->type |= SBYTEWORD; | |
1066 if (n <= UINT64_C(0xFFFFFFFF)) | |
1067 op->type |= UDWORD; | |
1068 if (n + UINT64_C(0x80000000) <= UINT64_C(0xFFFFFFFF)) | |
1069 op->type |= SDWORD; | |
1070 } | |
1071 } | |
1072 } else if (value->type == EXPR_RDSAE) { | |
1073 /* | |
1074 * it's not an operand but a rounding or SAE decorator. | |
1075 * put the decorator information in the (opflag_t) type field | |
1076 * of previous operand. | |
1077 */ | |
1078 opnum--; op--; | |
1079 switch (value->value) { | |
1080 case BRC_RN: | |
1081 case BRC_RU: | |
1082 case BRC_RD: | |
1083 case BRC_RZ: | |
1084 case BRC_SAE: | |
1085 op->decoflags |= (value->value == BRC_SAE ? SAE : ER); | |
1086 result->evex_rm = value->value; | |
1087 break; | |
1088 default: | |
1089 nasm_error(ERR_NONFATAL, "invalid decorator"); | |
1090 break; | |
1091 } | |
1092 } else { /* it's a register */ | |
1093 opflags_t rs; | |
1094 | |
1095 if (value->type >= EXPR_SIMPLE || value->value != 1) { | |
1096 nasm_error(ERR_NONFATAL, "invalid operand type"); | |
1097 goto fail; | |
1098 } | |
1099 | |
1100 /* | |
1101 * check that its only 1 register, not an expression... | |
1102 */ | |
1103 for (i = 1; value[i].type; i++) | |
1104 if (value[i].value) { | |
1105 nasm_error(ERR_NONFATAL, "invalid operand type"); | |
1106 goto fail; | |
1107 } | |
1108 | |
1109 /* clear overrides, except TO which applies to FPU regs */ | |
1110 if (op->type & ~TO) { | |
1111 /* | |
1112 * we want to produce a warning iff the specified size | |
1113 * is different from the register size | |
1114 */ | |
1115 rs = op->type & SIZE_MASK; | |
1116 } else | |
1117 rs = 0; | |
1118 | |
1119 op->type &= TO; | |
1120 op->type |= REGISTER; | |
1121 op->type |= nasm_reg_flags[value->type]; | |
1122 op->decoflags |= brace_flags; | |
1123 op->basereg = value->type; | |
1124 | |
1125 if (rs && (op->type & SIZE_MASK) != rs) | |
1126 nasm_error(ERR_WARNING | ERR_PASS1, | |
1127 "register size specification ignored"); | |
1128 } | |
1129 } | |
1130 | |
1131 /* remember the position of operand having broadcasting/ER mode */ | |
1132 if (op->decoflags & (BRDCAST_MASK | ER | SAE)) | |
1133 result->evex_brerop = opnum; | |
1134 } | |
1135 | |
1136 result->operands = opnum; /* set operand count */ | |
1137 | |
1138 /* clear remaining operands */ | |
1139 while (opnum < MAX_OPERANDS) | |
1140 result->oprs[opnum++].type = 0; | |
1141 | |
1142 /* | |
1143 * Transform RESW, RESD, RESQ, REST, RESO, RESY, RESZ into RESB. | |
1144 */ | |
1145 switch (result->opcode) { | |
1146 case I_RESW: | |
1147 result->opcode = I_RESB; | |
1148 result->oprs[0].offset *= 2; | |
1149 break; | |
1150 case I_RESD: | |
1151 result->opcode = I_RESB; | |
1152 result->oprs[0].offset *= 4; | |
1153 break; | |
1154 case I_RESQ: | |
1155 result->opcode = I_RESB; | |
1156 result->oprs[0].offset *= 8; | |
1157 break; | |
1158 case I_REST: | |
1159 result->opcode = I_RESB; | |
1160 result->oprs[0].offset *= 10; | |
1161 break; | |
1162 case I_RESO: | |
1163 result->opcode = I_RESB; | |
1164 result->oprs[0].offset *= 16; | |
1165 break; | |
1166 case I_RESY: | |
1167 result->opcode = I_RESB; | |
1168 result->oprs[0].offset *= 32; | |
1169 break; | |
1170 case I_RESZ: | |
1171 result->opcode = I_RESB; | |
1172 result->oprs[0].offset *= 64; | |
1173 break; | |
1174 default: | |
1175 break; | |
1176 } | |
1177 | |
1178 return result; | |
1179 | |
1180 fail: | |
1181 result->opcode = I_none; | |
1182 return result; | |
1183 } | |
1184 | |
1185 static int is_comma_next(void) | |
1186 { | |
1187 struct tokenval tv; | |
1188 char *p; | |
1189 int i; | |
1190 | |
1191 p = stdscan_get(); | |
1192 i = stdscan(NULL, &tv); | |
1193 stdscan_set(p); | |
1194 | |
1195 return (i == ',' || i == ';' || !i); | |
1196 } | |
1197 | |
1198 void cleanup_insn(insn * i) | |
1199 { | |
1200 extop *e; | |
1201 | |
1202 while ((e = i->eops)) { | |
1203 i->eops = e->next; | |
1204 if (e->type == EOT_DB_STRING_FREE) | |
1205 nasm_free(e->stringval); | |
1206 nasm_free(e); | |
1207 } | |
1208 } |