/* * i386 specific functions for TCC assembler * * Copyright (c) 2001, 2002 Fabrice Bellard * Copyright (c) 2009 Frédéric Feret (x86_64 support) * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include "tcc.h" /* #define NB_ASM_REGS 8 */ #define MAX_OPERANDS 3 #define NB_SAVED_REGS 3 #define TOK_ASM_first TOK_ASM_clc #define TOK_ASM_last TOK_ASM_emms #define TOK_ASM_alllast TOK_ASM_subps #define OPC_B 0x01 /* only used with OPC_WL */ #define OPC_WL 0x02 /* accepts w, l or no suffix */ #define OPC_BWL (OPC_B | OPC_WL) /* accepts b, w, l or no suffix */ #define OPC_REG 0x04 /* register is added to opcode */ #define OPC_MODRM 0x08 /* modrm encoding */ #define OPCT_MASK 0x70 #define OPC_FWAIT 0x10 /* add fwait opcode */ #define OPC_SHIFT 0x20 /* shift opcodes */ #define OPC_ARITH 0x30 /* arithmetic opcodes */ #define OPC_FARITH 0x40 /* FPU arithmetic opcodes */ #define OPC_TEST 0x50 /* test opcodes */ #define OPCT_IS(v,i) (((v) & OPCT_MASK) == (i)) #define OPC_0F 0x100 /* Is secondary map (0x0f prefix) */ #ifdef TCC_TARGET_X86_64 # define OPC_WLQ 0x1000 /* accepts w, l, q or no suffix */ # define OPC_BWLQ (OPC_B | OPC_WLQ) /* accepts b, w, l, q or no suffix */ # define OPC_WLX OPC_WLQ # define OPC_BWLX OPC_BWLQ #else # define OPC_WLX OPC_WL # define OPC_BWLX OPC_BWL #endif #define OPC_GROUP_SHIFT 13 /* in order to compress the operand type, we use specific operands and we or only with EA */ enum { OPT_REG8=0, /* warning: value is hardcoded from TOK_ASM_xxx */ OPT_REG16, /* warning: value is hardcoded from TOK_ASM_xxx */ OPT_REG32, /* warning: value is hardcoded from TOK_ASM_xxx */ #ifdef TCC_TARGET_X86_64 OPT_REG64, /* warning: value is hardcoded from TOK_ASM_xxx */ #endif OPT_MMX, /* warning: value is hardcoded from TOK_ASM_xxx */ OPT_SSE, /* warning: value is hardcoded from TOK_ASM_xxx */ OPT_CR, /* warning: value is hardcoded from TOK_ASM_xxx */ OPT_TR, /* warning: value is hardcoded from TOK_ASM_xxx */ OPT_DB, /* warning: value is hardcoded from TOK_ASM_xxx */ OPT_SEG, OPT_ST, OPT_IM8, OPT_IM8S, OPT_IM16, OPT_IM32, #ifdef TCC_TARGET_X86_64 OPT_IM64, #endif OPT_EAX, /* %al, %ax, %eax or %rax register */ OPT_ST0, /* %st(0) register */ OPT_CL, /* %cl register */ OPT_DX, /* %dx register */ OPT_ADDR, /* OP_EA with only offset */ OPT_INDIR, /* *(expr) */ /* composite types */ OPT_COMPOSITE_FIRST, OPT_IM, /* IM8 | IM16 | IM32 */ OPT_REG, /* REG8 | REG16 | REG32 | REG64 */ OPT_REGW, /* REG16 | REG32 | REG64 */ OPT_IMW, /* IM16 | IM32 */ OPT_MMXSSE, /* MMX | SSE */ OPT_DISP, /* Like OPT_ADDR, but emitted as displacement (for jumps) */ OPT_DISP8, /* Like OPT_ADDR, but only 8bit (short jumps) */ /* can be ored with any OPT_xxx */ OPT_EA = 0x80 }; #define OP_REG8 (1 << OPT_REG8) #define OP_REG16 (1 << OPT_REG16) #define OP_REG32 (1 << OPT_REG32) #define OP_MMX (1 << OPT_MMX) #define OP_SSE (1 << OPT_SSE) #define OP_CR (1 << OPT_CR) #define OP_TR (1 << OPT_TR) #define OP_DB (1 << OPT_DB) #define OP_SEG (1 << OPT_SEG) #define OP_ST (1 << OPT_ST) #define OP_IM8 (1 << OPT_IM8) #define OP_IM8S (1 << OPT_IM8S) #define OP_IM16 (1 << OPT_IM16) #define OP_IM32 (1 << OPT_IM32) #define OP_EAX (1 << OPT_EAX) #define OP_ST0 (1 << OPT_ST0) #define OP_CL (1 << OPT_CL) #define OP_DX (1 << OPT_DX) #define OP_ADDR (1 << OPT_ADDR) #define OP_INDIR (1 << OPT_INDIR) #ifdef TCC_TARGET_X86_64 # define OP_REG64 (1 << OPT_REG64) # define OP_IM64 (1 << OPT_IM64) # define OP_EA32 (OP_EA << 1) #else # define OP_REG64 0 # define OP_IM64 0 # define OP_EA32 0 #endif #define OP_EA 0x40000000 #define OP_REG (OP_REG8 | OP_REG16 | OP_REG32 | OP_REG64) #ifdef TCC_TARGET_X86_64 # define TREG_XAX TREG_RAX # define TREG_XCX TREG_RCX # define TREG_XDX TREG_RDX #else # define TREG_XAX TREG_EAX # define TREG_XCX TREG_ECX # define TREG_XDX TREG_EDX #endif typedef struct ASMInstr { uint16_t sym; uint16_t opcode; uint16_t instr_type; uint8_t nb_ops; uint8_t op_type[MAX_OPERANDS]; /* see OP_xxx */ } ASMInstr; typedef struct Operand { uint32_t type; int8_t reg; /* register, -1 if none */ int8_t reg2; /* second register, -1 if none */ uint8_t shift; ExprValue e; } Operand; static const uint8_t reg_to_size[9] = { /* [OP_REG8] = 0, [OP_REG16] = 1, [OP_REG32] = 2, #ifdef TCC_TARGET_X86_64 [OP_REG64] = 3, #endif */ 0, 0, 1, 0, 2, 0, 0, 0, 3 }; #define NB_TEST_OPCODES 30 static const uint8_t test_bits[NB_TEST_OPCODES] = { 0x00, /* o */ 0x01, /* no */ 0x02, /* b */ 0x02, /* c */ 0x02, /* nae */ 0x03, /* nb */ 0x03, /* nc */ 0x03, /* ae */ 0x04, /* e */ 0x04, /* z */ 0x05, /* ne */ 0x05, /* nz */ 0x06, /* be */ 0x06, /* na */ 0x07, /* nbe */ 0x07, /* a */ 0x08, /* s */ 0x09, /* ns */ 0x0a, /* p */ 0x0a, /* pe */ 0x0b, /* np */ 0x0b, /* po */ 0x0c, /* l */ 0x0c, /* nge */ 0x0d, /* nl */ 0x0d, /* ge */ 0x0e, /* le */ 0x0e, /* ng */ 0x0f, /* nle */ 0x0f, /* g */ }; static const uint8_t segment_prefixes[] = { 0x26, /* es */ 0x2e, /* cs */ 0x36, /* ss */ 0x3e, /* ds */ 0x64, /* fs */ 0x65 /* gs */ }; static const ASMInstr asm_instrs[] = { #define ALT(x) x /* This removes a 0x0f in the second byte */ #define O(o) ((((o) & 0xff00) == 0x0f00) ? ((((o) >> 8) & ~0xff) | ((o) & 0xff)) : (o)) /* This constructs instr_type from opcode, type and group. */ #define T(o,i,g) ((i) | ((g) << OPC_GROUP_SHIFT) | ((((o) & 0xff00) == 0x0f00) ? OPC_0F : 0)) #define DEF_ASM_OP0(name, opcode) #define DEF_ASM_OP0L(name, opcode, group, instr_type) { TOK_ASM_ ## name, O(opcode), T(opcode, instr_type, group), 0 }, #define DEF_ASM_OP1(name, opcode, group, instr_type, op0) { TOK_ASM_ ## name, O(opcode), T(opcode, instr_type, group), 1, { op0 }}, #define DEF_ASM_OP2(name, opcode, group, instr_type, op0, op1) { TOK_ASM_ ## name, O(opcode), T(opcode, instr_type, group), 2, { op0, op1 }}, #define DEF_ASM_OP3(name, opcode, group, instr_type, op0, op1, op2) { TOK_ASM_ ## name, O(opcode), T(opcode, instr_type, group), 3, { op0, op1, op2 }}, #ifdef TCC_TARGET_X86_64 # include "x86_64-asm.h" #else # include "i386-asm.h" #endif /* last operation */ { 0, }, }; static const uint16_t op0_codes[] = { #define ALT(x) #define DEF_ASM_OP0(x, opcode) opcode, #define DEF_ASM_OP0L(name, opcode, group, instr_type) #define DEF_ASM_OP1(name, opcode, group, instr_type, op0) #define DEF_ASM_OP2(name, opcode, group, instr_type, op0, op1) #define DEF_ASM_OP3(name, opcode, group, instr_type, op0, op1, op2) #ifdef TCC_TARGET_X86_64 # include "x86_64-asm.h" #else # include "i386-asm.h" #endif }; static inline int get_reg_shift(TCCState *s1) { int shift, v; v = asm_int_expr(s1); switch(v) { case 1: shift = 0; break; case 2: shift = 1; break; case 4: shift = 2; break; case 8: shift = 3; break; default: expect("1, 2, 4 or 8 constant"); shift = 0; break; } return shift; } static int asm_parse_reg(int *type) { int reg = 0; *type = 0; if (tok != '%') goto error_32; next(); if (tok >= TOK_ASM_eax && tok <= TOK_ASM_edi) { reg = tok - TOK_ASM_eax; #ifdef TCC_TARGET_X86_64 *type = OP_EA32; } else if (tok >= TOK_ASM_rax && tok <= TOK_ASM_rdi) { reg = tok - TOK_ASM_rax; } else if (tok == TOK_ASM_rip) { reg = 8; #endif } else { error_32: expect("register"); } next(); return reg; } static void parse_operand(TCCState *s1, Operand *op) { ExprValue e; int reg, indir; const char *p; indir = 0; if (tok == '*') { next(); indir = OP_INDIR; } if (tok == '%') { next(); if (tok >= TOK_ASM_al && tok <= TOK_ASM_db7) { reg = tok - TOK_ASM_al; op->type = 1 << (reg >> 3); /* WARNING: do not change constant order */ op->reg = reg & 7; if ((op->type & OP_REG) && op->reg == TREG_XAX) op->type |= OP_EAX; else if (op->type == OP_REG8 && op->reg == TREG_XCX) op->type |= OP_CL; else if (op->type == OP_REG16 && op->reg == TREG_XDX) op->type |= OP_DX; } else if (tok >= TOK_ASM_dr0 && tok <= TOK_ASM_dr7) { op->type = OP_DB; op->reg = tok - TOK_ASM_dr0; } else if (tok >= TOK_ASM_es && tok <= TOK_ASM_gs) { op->type = OP_SEG; op->reg = tok - TOK_ASM_es; } else if (tok == TOK_ASM_st) { op->type = OP_ST; op->reg = 0; next(); if (tok == '(') { next(); if (tok != TOK_PPNUM) goto reg_error; p = tokc.str.data; reg = p[0] - '0'; if ((unsigned)reg >= 8 || p[1] != '\0') goto reg_error; op->reg = reg; next(); skip(')'); } if (op->reg == 0) op->type |= OP_ST0; goto no_skip; } else { reg_error: tcc_error("unknown register"); } next(); no_skip: ; } else if (tok == '$') { /* constant value */ next(); asm_expr(s1, &e); op->type = OP_IM32; op->e = e; if (!op->e.sym) { if (op->e.v == (uint8_t)op->e.v) op->type |= OP_IM8; if (op->e.v == (int8_t)op->e.v) op->type |= OP_IM8S; if (op->e.v == (uint16_t)op->e.v) op->type |= OP_IM16; #ifdef TCC_TARGET_X86_64 if (op->e.v != (int32_t)op->e.v) op->type = OP_IM64; #endif } } else { /* address(reg,reg2,shift) with all variants */ op->type = OP_EA; op->reg = -1; op->reg2 = -1; op->shift = 0; if (tok != '(') { asm_expr(s1, &e); op->e = e; } else { next(); if (tok == '%') { unget_tok('('); op->e.v = 0; op->e.sym = NULL; } else { /* bracketed offset expression */ asm_expr(s1, &e); if (tok != ')') expect(")"); next(); op->e.v = e.v; op->e.sym = e.sym; } op->e.pcrel = 0; } if (tok == '(') { int type = 0; next(); if (tok != ',') { op->reg = asm_parse_reg(&type); } if (tok == ',') { next(); if (tok != ',') { op->reg2 = asm_parse_reg(&type); } if (tok == ',') { next(); op->shift = get_reg_shift(s1); } } if (type & OP_EA32) op->type |= OP_EA32; skip(')'); } if (op->reg == -1 && op->reg2 == -1) op->type |= OP_ADDR; } op->type |= indir; } /* XXX: unify with C code output ? */ ST_FUNC void gen_expr32(ExprValue *pe) { if (pe->pcrel) /* If PC-relative, always set VT_SYM, even without symbol, so as to force a relocation to be emitted. */ gen_addrpc32(VT_SYM, pe->sym, pe->v); else gen_addr32(pe->sym ? VT_SYM : 0, pe->sym, pe->v); } #ifdef TCC_TARGET_X86_64 ST_FUNC void gen_expr64(ExprValue *pe) { gen_addr64(pe->sym ? VT_SYM : 0, pe->sym, pe->v); } #endif /* XXX: unify with C code output ? */ static void gen_disp32(ExprValue *pe) { Sym *sym = pe->sym; if (sym && sym->r == cur_text_section->sh_num) { /* same section: we can output an absolute value. Note that the TCC compiler behaves differently here because it always outputs a relocation to ease (future) code elimination in the linker */ gen_le32(pe->v + sym->jnext - ind - 4); } else { if (sym && sym->type.t == VT_VOID) { sym->type.t = VT_FUNC; sym->type.ref = NULL; } gen_addrpc32(VT_SYM, sym, pe->v); } } /* generate the modrm operand */ static inline int asm_modrm(int reg, Operand *op) { int mod, reg1, reg2, sib_reg1; if (op->type & (OP_REG | OP_MMX | OP_SSE)) { g(0xc0 + (reg << 3) + op->reg); } else if (op->reg == -1 && op->reg2 == -1) { /* displacement only */ #ifdef TCC_TARGET_X86_64 g(0x04 + (reg << 3)); g(0x25); #else g(0x05 + (reg << 3)); #endif gen_expr32(&op->e); #ifdef TCC_TARGET_X86_64 } else if (op->reg == 8) { ExprValue *pe = &op->e; g(0x05 + (reg << 3)); gen_addrpc32(pe->sym ? VT_SYM : 0, pe->sym, pe->v); return ind; #endif } else { sib_reg1 = op->reg; /* fist compute displacement encoding */ if (sib_reg1 == -1) { sib_reg1 = 5; mod = 0x00; } else if (op->e.v == 0 && !op->e.sym && op->reg != 5) { mod = 0x00; } else if (op->e.v == (int8_t)op->e.v && !op->e.sym) { mod = 0x40; } else { mod = 0x80; } /* compute if sib byte needed */ reg1 = op->reg; if (op->reg2 != -1) reg1 = 4; g(mod + (reg << 3) + reg1); if (reg1 == 4) { /* add sib byte */ reg2 = op->reg2; if (reg2 == -1) reg2 = 4; /* indicate no index */ g((op->shift << 6) + (reg2 << 3) + sib_reg1); } /* add offset */ if (mod == 0x40) { g(op->e.v); } else if (mod == 0x80 || op->reg == -1) { gen_expr32(&op->e); } } return 0; } static void maybe_print_stats (void) { static int already = 1; if (!already) /* print stats about opcodes */ { const struct ASMInstr *pa; int freq[4]; int op_vals[500]; int nb_op_vals, i, j; already = 1; nb_op_vals = 0; memset(freq, 0, sizeof(freq)); for(pa = asm_instrs; pa->sym != 0; pa++) { freq[pa->nb_ops]++; //for(i=0;inb_ops;i++) { for(j=0;jop_type[i] == op_vals[j]) if (pa->instr_type == op_vals[j]) goto found; } //op_vals[nb_op_vals++] = pa->op_type[i]; op_vals[nb_op_vals++] = pa->instr_type; found: ; //} } for(i=0;i= TOK_ASM_wait && opcode <= TOK_ASM_repnz) unget_tok(';'); /* get operands */ pop = ops; nb_ops = 0; seg_prefix = 0; alltypes = 0; for(;;) { if (tok == ';' || tok == TOK_LINEFEED) break; if (nb_ops >= MAX_OPERANDS) { tcc_error("incorrect number of operands"); } parse_operand(s1, pop); if (tok == ':') { if (pop->type != OP_SEG || seg_prefix) tcc_error("incorrect prefix"); seg_prefix = segment_prefixes[pop->reg]; next(); parse_operand(s1, pop); if (!(pop->type & OP_EA)) { tcc_error("segment prefix must be followed by memory reference"); } } pop++; nb_ops++; if (tok != ',') break; next(); } s = 0; /* avoid warning */ /* optimize matching by using a lookup table (no hashing is needed !) */ for(pa = asm_instrs; pa->sym != 0; pa++) { int it = pa->instr_type & OPCT_MASK; s = 0; if (it == OPC_FARITH) { v = opcode - pa->sym; if (!((unsigned)v < 8 * 6 && (v % 6) == 0)) continue; } else if (it == OPC_ARITH) { if (!(opcode >= pa->sym && opcode < pa->sym + 8*NBWLX)) continue; s = (opcode - pa->sym) % NBWLX; if ((pa->instr_type & OPC_BWLX) == OPC_WLX) { /* We need to reject the xxxb opcodes that we accepted above. Note that pa->sym for WLX opcodes is the 'w' token, to get the 'b' token subtract one. */ if (((opcode - pa->sym + 1) % NBWLX) == 0) continue; s++; } } else if (it == OPC_SHIFT) { if (!(opcode >= pa->sym && opcode < pa->sym + 7*NBWLX)) continue; s = (opcode - pa->sym) % NBWLX; } else if (it == OPC_TEST) { if (!(opcode >= pa->sym && opcode < pa->sym + NB_TEST_OPCODES)) continue; /* cmovxx is a test opcode but accepts multiple sizes. TCC doesn't accept the suffixed mnemonic, instead we simply force size autodetection always. */ if (pa->instr_type & OPC_WLX) s = NBWLX - 1; } else if (pa->instr_type & OPC_B) { #ifdef TCC_TARGET_X86_64 /* Some instructions don't have the full size but only bwl form. insb e.g. */ if ((pa->instr_type & OPC_WLQ) != OPC_WLQ && !(opcode >= pa->sym && opcode < pa->sym + NBWLX-1)) continue; #endif if (!(opcode >= pa->sym && opcode < pa->sym + NBWLX)) continue; s = opcode - pa->sym; } else if (pa->instr_type & OPC_WLX) { if (!(opcode >= pa->sym && opcode < pa->sym + NBWLX-1)) continue; s = opcode - pa->sym + 1; } else { if (pa->sym != opcode) continue; } if (pa->nb_ops != nb_ops) continue; #ifdef TCC_TARGET_X86_64 /* Special case for moves. Selecting the IM64->REG64 form should only be done if we really have an >32bit imm64, and that is hardcoded. Ignore it here. */ if (pa->opcode == 0xb0 && ops[0].type != OP_IM64 && ops[1].type == OP_REG64 && !(pa->instr_type & OPC_0F)) continue; #endif /* now decode and check each operand */ alltypes = 0; for(i = 0; i < nb_ops; i++) { int op1, op2; op1 = pa->op_type[i]; op2 = op1 & 0x1f; switch(op2) { case OPT_IM: v = OP_IM8 | OP_IM16 | OP_IM32; break; case OPT_REG: v = OP_REG8 | OP_REG16 | OP_REG32 | OP_REG64; break; case OPT_REGW: v = OP_REG16 | OP_REG32 | OP_REG64; break; case OPT_IMW: v = OP_IM16 | OP_IM32; break; case OPT_MMXSSE: v = OP_MMX | OP_SSE; break; case OPT_DISP: case OPT_DISP8: v = OP_ADDR; break; default: v = 1 << op2; break; } if (op1 & OPT_EA) v |= OP_EA; op_type[i] = v; if ((ops[i].type & v) == 0) goto next; alltypes |= ops[i].type; } /* all is matching ! */ break; next: ; } if (pa->sym == 0) { if (opcode >= TOK_ASM_first && opcode <= TOK_ASM_last) { int b; b = op0_codes[opcode - TOK_ASM_first]; if (b & 0xff00) g(b >> 8); g(b); return; } else if (opcode <= TOK_ASM_alllast) { tcc_error("bad operand with opcode '%s'", get_tok_str(opcode, NULL)); } else { tcc_error("unknown opcode '%s'", get_tok_str(opcode, NULL)); } } /* if the size is unknown, then evaluate it (OPC_B or OPC_WL case) */ autosize = NBWLX-1; #ifdef TCC_TARGET_X86_64 /* XXX the autosize should rather be zero, to not have to adjust this all the time. */ if ((pa->instr_type & OPC_BWLQ) == OPC_B) autosize = NBWLX-2; #endif if (s == autosize) { for(i = 0; s == autosize && i < nb_ops; i++) { if ((ops[i].type & OP_REG) && !(op_type[i] & (OP_CL | OP_DX))) s = reg_to_size[ops[i].type & OP_REG]; } if (s == autosize) { if ((opcode == TOK_ASM_push || opcode == TOK_ASM_pop) && (ops[0].type & (OP_SEG | OP_IM8S | OP_IM32))) s = 2; else tcc_error("cannot infer opcode suffix"); } } #ifdef TCC_TARGET_X86_64 /* Generate addr32 prefix if needed */ for(i = 0; i < nb_ops; i++) { if (ops[i].type & OP_EA32) { g(0x67); break; } } #endif /* generate data16 prefix if needed */ p66 = 0; if (s == 1) p66 = 1; else { /* accepting mmx+sse in all operands --> needs 0x66 to switch to sse mode. Accepting only sse in an operand --> is already SSE insn and needs 0x66/f2/f3 handling. */ for (i = 0; i < nb_ops; i++) if ((op_type[i] & (OP_MMX | OP_SSE)) == (OP_MMX | OP_SSE) && ops[i].type & OP_SSE) p66 = 1; } if (p66) g(0x66); #ifdef TCC_TARGET_X86_64 if (s == 3 || (alltypes & OP_REG64)) { /* generate REX prefix */ int default64 = 0; for(i = 0; i < nb_ops; i++) { if (op_type[i] == OP_REG64) { /* If only 64bit regs are accepted in one operand this is a default64 instruction without need for REX prefixes. */ default64 = 1; break; } } /* XXX find better encoding for the default64 instructions. */ if (((opcode != TOK_ASM_push && opcode != TOK_ASM_pop && opcode != TOK_ASM_pushw && opcode != TOK_ASM_pushl && opcode != TOK_ASM_pushq && opcode != TOK_ASM_popw && opcode != TOK_ASM_popl && opcode != TOK_ASM_popq && opcode != TOK_ASM_call && opcode != TOK_ASM_jmp)) && !default64) g(0x48); } #endif /* now generates the operation */ if (OPCT_IS(pa->instr_type, OPC_FWAIT)) g(0x9b); if (seg_prefix) g(seg_prefix); v = pa->opcode; if (pa->instr_type & OPC_0F) v = ((v & ~0xff) << 8) | 0x0f00 | (v & 0xff); if ((v == 0x69 || v == 0x6b) && nb_ops == 2) { /* kludge for imul $im, %reg */ nb_ops = 3; ops[2] = ops[1]; op_type[2] = op_type[1]; } else if (v == 0xcd && ops[0].e.v == 3 && !ops[0].e.sym) { v--; /* int $3 case */ nb_ops = 0; } else if ((v == 0x06 || v == 0x07)) { if (ops[0].reg >= 4) { /* push/pop %fs or %gs */ v = 0x0fa0 + (v - 0x06) + ((ops[0].reg - 4) << 3); } else { v += ops[0].reg << 3; } nb_ops = 0; } else if (v <= 0x05) { /* arith case */ v += ((opcode - TOK_ASM_addb) / NBWLX) << 3; } else if ((pa->instr_type & (OPCT_MASK | OPC_MODRM)) == OPC_FARITH) { /* fpu arith case */ v += ((opcode - pa->sym) / 6) << 3; } if (pa->instr_type & OPC_REG) { /* mov $im, %reg case */ if (v == 0xb0 && s >= 1) v += 7; for(i = 0; i < nb_ops; i++) { if (op_type[i] & (OP_REG | OP_ST)) { v += ops[i].reg; break; } } } if (pa->instr_type & OPC_B) v += s >= 1; if (nb_ops == 1 && pa->op_type[0] == OPT_DISP8) { Sym *sym; int jmp_disp; /* see if we can really generate the jump with a byte offset */ sym = ops[0].e.sym; if (!sym) goto no_short_jump; if (sym->r != cur_text_section->sh_num) goto no_short_jump; jmp_disp = ops[0].e.v + sym->jnext - ind - 2 - (v >= 0xff); if (jmp_disp == (int8_t)jmp_disp) { /* OK to generate jump */ ops[0].e.sym = 0; ops[0].e.v = jmp_disp; op_type[0] = OP_IM8S; } else { no_short_jump: /* long jump will be allowed. need to modify the opcode slightly */ if (v == 0xeb) /* jmp */ v = 0xe9; else if (v == 0x70) /* jcc */ v += 0x0f10; else tcc_error("invalid displacement"); } } if (OPCT_IS(pa->instr_type, OPC_TEST)) v += test_bits[opcode - pa->sym]; op1 = v >> 16; if (op1) g(op1); op1 = (v >> 8) & 0xff; if (op1) g(op1); g(v); /* search which operand will used for modrm */ modrm_index = 0; if (OPCT_IS(pa->instr_type, OPC_SHIFT)) { reg = (opcode - pa->sym) / NBWLX; if (reg == 6) reg = 7; } else if (OPCT_IS(pa->instr_type, OPC_ARITH)) { reg = (opcode - pa->sym) / NBWLX; } else if (OPCT_IS(pa->instr_type, OPC_FARITH)) { reg = (opcode - pa->sym) / 6; } else { reg = (pa->instr_type >> OPC_GROUP_SHIFT) & 7; } pc = 0; if (pa->instr_type & OPC_MODRM) { if (!nb_ops) { /* A modrm opcode without operands is a special case (e.g. mfence). It has a group and acts as if there's an register operand 0 (ax). */ i = 0; ops[i].type = OP_REG; ops[i].reg = 0; goto modrm_found; } /* first look for an ea operand */ for(i = 0;i < nb_ops; i++) { if (op_type[i] & OP_EA) goto modrm_found; } /* then if not found, a register or indirection (shift instructions) */ for(i = 0;i < nb_ops; i++) { if (op_type[i] & (OP_REG | OP_MMX | OP_SSE | OP_INDIR)) goto modrm_found; } #ifdef ASM_DEBUG tcc_error("bad op table"); #endif modrm_found: modrm_index = i; /* if a register is used in another operand then it is used instead of group */ for(i = 0;i < nb_ops; i++) { v = op_type[i]; if (i != modrm_index && (v & (OP_REG | OP_MMX | OP_SSE | OP_CR | OP_TR | OP_DB | OP_SEG))) { reg = ops[i].reg; break; } } pc = asm_modrm(reg, &ops[modrm_index]); } /* emit constants */ #ifndef TCC_TARGET_X86_64 if (!(pa->instr_type & OPC_0F) && (pa->opcode == 0x9a || pa->opcode == 0xea)) { /* ljmp or lcall kludge */ gen_expr32(&ops[1].e); if (ops[0].e.sym) tcc_error("cannot relocate"); gen_le16(ops[0].e.v); return; } #endif for(i = 0;i < nb_ops; i++) { v = op_type[i]; if (v & (OP_IM8 | OP_IM16 | OP_IM32 | OP_IM64 | OP_IM8S | OP_ADDR)) { /* if multiple sizes are given it means we must look at the op size */ if ((v | OP_IM8 | OP_IM64) == (OP_IM8 | OP_IM16 | OP_IM32 | OP_IM64)) { if (s == 0) v = OP_IM8; else if (s == 1) v = OP_IM16; else if (s == 2 || (v & OP_IM64) == 0) v = OP_IM32; else v = OP_IM64; } if ((v & (OP_IM8 | OP_IM8S | OP_IM16)) && ops[i].e.sym) tcc_error("cannot relocate"); if (v & (OP_IM8 | OP_IM8S)) { g(ops[i].e.v); } else if (v & OP_IM16) { gen_le16(ops[i].e.v); #ifdef TCC_TARGET_X86_64 } else if (v & OP_IM64) { gen_expr64(&ops[i].e); #endif } else if (pa->op_type[i] == OPT_DISP || pa->op_type[i] == OPT_DISP8) { gen_disp32(&ops[i].e); } else { gen_expr32(&ops[i].e); } } } /* after immediate operands, adjust pc-relative address */ if (pc) add32le(text_section->data + pc - 4, pc - ind); } /* return the constraint priority (we allocate first the lowest numbered constraints) */ static inline int constraint_priority(const char *str) { int priority, c, pr; /* we take the lowest priority */ priority = 0; for(;;) { c = *str; if (c == '\0') break; str++; switch(c) { case 'A': pr = 0; break; case 'a': case 'b': case 'c': case 'd': case 'S': case 'D': pr = 1; break; case 'q': pr = 2; break; case 'r': case 'p': pr = 3; break; case 'N': case 'M': case 'I': case 'e': case 'i': case 'm': case 'g': pr = 4; break; default: tcc_error("unknown constraint '%c'", c); pr = 0; } if (pr > priority) priority = pr; } return priority; } static const char *skip_constraint_modifiers(const char *p) { while (*p == '=' || *p == '&' || *p == '+' || *p == '%') p++; return p; } #define REG_OUT_MASK 0x01 #define REG_IN_MASK 0x02 #define is_reg_allocated(reg) (regs_allocated[reg] & reg_mask) ST_FUNC void asm_compute_constraints(ASMOperand *operands, int nb_operands, int nb_outputs, const uint8_t *clobber_regs, int *pout_reg) { ASMOperand *op; int sorted_op[MAX_ASM_OPERANDS]; int i, j, k, p1, p2, tmp, reg, c, reg_mask; const char *str; uint8_t regs_allocated[NB_ASM_REGS]; /* init fields */ for(i=0;iinput_index = -1; op->ref_index = -1; op->reg = -1; op->is_memory = 0; op->is_rw = 0; } /* compute constraint priority and evaluate references to output constraints if input constraints */ for(i=0;iconstraint; str = skip_constraint_modifiers(str); if (isnum(*str) || *str == '[') { /* this is a reference to another constraint */ k = find_constraint(operands, nb_operands, str, NULL); if ((unsigned)k >= i || i < nb_outputs) tcc_error("invalid reference in constraint %d ('%s')", i, str); op->ref_index = k; if (operands[k].input_index >= 0) tcc_error("cannot reference twice the same operand"); operands[k].input_index = i; op->priority = 5; } else { op->priority = constraint_priority(str); } } /* sort operands according to their priority */ for(i=0;iconstraint; /* no need to allocate references */ if (op->ref_index >= 0) continue; /* select if register is used for output, input or both */ if (op->input_index >= 0) { reg_mask = REG_IN_MASK | REG_OUT_MASK; } else if (j < nb_outputs) { reg_mask = REG_OUT_MASK; } else { reg_mask = REG_IN_MASK; } try_next: c = *str++; switch(c) { case '=': goto try_next; case '+': op->is_rw = 1; /* FALL THRU */ case '&': if (j >= nb_outputs) tcc_error("'%c' modifier can only be applied to outputs", c); reg_mask = REG_IN_MASK | REG_OUT_MASK; goto try_next; case 'A': /* allocate both eax and edx */ if (is_reg_allocated(TREG_XAX) || is_reg_allocated(TREG_XDX)) goto try_next; op->is_llong = 1; op->reg = TREG_XAX; regs_allocated[TREG_XAX] |= reg_mask; regs_allocated[TREG_XDX] |= reg_mask; break; case 'a': reg = TREG_XAX; goto alloc_reg; case 'b': reg = 3; goto alloc_reg; case 'c': reg = TREG_XCX; goto alloc_reg; case 'd': reg = TREG_XDX; goto alloc_reg; case 'S': reg = 6; goto alloc_reg; case 'D': reg = 7; alloc_reg: if (is_reg_allocated(reg)) goto try_next; goto reg_found; case 'q': /* eax, ebx, ecx or edx */ for(reg = 0; reg < 4; reg++) { if (!is_reg_allocated(reg)) goto reg_found; } goto try_next; case 'r': case 'p': /* A general address, for x86(64) any register is acceptable*/ /* any general register */ for(reg = 0; reg < 8; reg++) { if (!is_reg_allocated(reg)) goto reg_found; } goto try_next; reg_found: /* now we can reload in the register */ op->is_llong = 0; op->reg = reg; regs_allocated[reg] |= reg_mask; break; case 'e': case 'i': if (!((op->vt->r & (VT_VALMASK | VT_LVAL)) == VT_CONST)) goto try_next; break; case 'I': case 'N': case 'M': if (!((op->vt->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST)) goto try_next; break; case 'm': case 'g': /* nothing special to do because the operand is already in memory, except if the pointer itself is stored in a memory variable (VT_LLOCAL case) */ /* XXX: fix constant case */ /* if it is a reference to a memory zone, it must lie in a register, so we reserve the register in the input registers and a load will be generated later */ if (j < nb_outputs || c == 'm') { if ((op->vt->r & VT_VALMASK) == VT_LLOCAL) { /* any general register */ for(reg = 0; reg < 8; reg++) { if (!(regs_allocated[reg] & REG_IN_MASK)) goto reg_found1; } goto try_next; reg_found1: /* now we can reload in the register */ regs_allocated[reg] |= REG_IN_MASK; op->reg = reg; op->is_memory = 1; } } break; default: tcc_error("asm constraint %d ('%s') could not be satisfied", j, op->constraint); break; } /* if a reference is present for that operand, we assign it too */ if (op->input_index >= 0) { operands[op->input_index].reg = op->reg; operands[op->input_index].is_llong = op->is_llong; } } /* compute out_reg. It is used to store outputs registers to memory locations references by pointers (VT_LLOCAL case) */ *pout_reg = -1; for(i=0;ireg >= 0 && (op->vt->r & VT_VALMASK) == VT_LLOCAL && !op->is_memory) { for(reg = 0; reg < 8; reg++) { if (!(regs_allocated[reg] & REG_OUT_MASK)) goto reg_found2; } tcc_error("could not find free output register for reloading"); reg_found2: *pout_reg = reg; break; } } /* print sorted constraints */ #ifdef ASM_DEBUG for(i=0;iid ? get_tok_str(op->id, NULL) : "", op->constraint, op->vt->r, op->reg); } if (*pout_reg >= 0) printf("out_reg=%d\n", *pout_reg); #endif } ST_FUNC void subst_asm_operand(CString *add_str, SValue *sv, int modifier) { int r, reg, size, val; char buf[64]; r = sv->r; if ((r & VT_VALMASK) == VT_CONST) { if (!(r & VT_LVAL) && modifier != 'c' && modifier != 'n' && modifier != 'P') cstr_ccat(add_str, '$'); if (r & VT_SYM) { cstr_cat(add_str, get_tok_str(sv->sym->v, NULL), -1); if ((uint32_t)sv->c.i == 0) goto no_offset; cstr_ccat(add_str, '+'); } val = sv->c.i; if (modifier == 'n') val = -val; snprintf(buf, sizeof(buf), "%d", (int)sv->c.i); cstr_cat(add_str, buf, -1); no_offset:; #ifdef TCC_TARGET_X86_64 if (r & VT_LVAL) cstr_cat(add_str, "(%rip)", -1); #endif } else if ((r & VT_VALMASK) == VT_LOCAL) { #ifdef TCC_TARGET_X86_64 snprintf(buf, sizeof(buf), "%d(%%rbp)", (int)sv->c.i); #else snprintf(buf, sizeof(buf), "%d(%%ebp)", (int)sv->c.i); #endif cstr_cat(add_str, buf, -1); } else if (r & VT_LVAL) { reg = r & VT_VALMASK; if (reg >= VT_CONST) tcc_error("internal compiler error"); snprintf(buf, sizeof(buf), "(%%%s)", #ifdef TCC_TARGET_X86_64 get_tok_str(TOK_ASM_rax + reg, NULL) #else get_tok_str(TOK_ASM_eax + reg, NULL) #endif ); cstr_cat(add_str, buf, -1); } else { /* register case */ reg = r & VT_VALMASK; if (reg >= VT_CONST) tcc_error("internal compiler error"); /* choose register operand size */ if ((sv->type.t & VT_BTYPE) == VT_BYTE) size = 1; else if ((sv->type.t & VT_BTYPE) == VT_SHORT) size = 2; #ifdef TCC_TARGET_X86_64 else if ((sv->type.t & VT_BTYPE) == VT_LLONG) size = 8; #endif else size = 4; if (size == 1 && reg >= 4) size = 4; if (modifier == 'b') { if (reg >= 4) tcc_error("cannot use byte register"); size = 1; } else if (modifier == 'h') { if (reg >= 4) tcc_error("cannot use byte register"); size = -1; } else if (modifier == 'w') { size = 2; } else if (modifier == 'k') { size = 4; #ifdef TCC_TARGET_X86_64 } else if (modifier == 'q') { size = 8; #endif } switch(size) { case -1: reg = TOK_ASM_ah + reg; break; case 1: reg = TOK_ASM_al + reg; break; case 2: reg = TOK_ASM_ax + reg; break; default: reg = TOK_ASM_eax + reg; break; #ifdef TCC_TARGET_X86_64 case 8: reg = TOK_ASM_rax + reg; break; #endif } snprintf(buf, sizeof(buf), "%%%s", get_tok_str(reg, NULL)); cstr_cat(add_str, buf, -1); } } /* generate prolog and epilog code for asm statement */ ST_FUNC void asm_gen_code(ASMOperand *operands, int nb_operands, int nb_outputs, int is_output, uint8_t *clobber_regs, int out_reg) { uint8_t regs_allocated[NB_ASM_REGS]; ASMOperand *op; int i, reg; static uint8_t reg_saved[NB_SAVED_REGS] = { 3, 6, 7 }; /* mark all used registers */ memcpy(regs_allocated, clobber_regs, sizeof(regs_allocated)); for(i = 0; i < nb_operands;i++) { op = &operands[i]; if (op->reg >= 0) regs_allocated[op->reg] = 1; } if (!is_output) { /* generate reg save code */ for(i = 0; i < NB_SAVED_REGS; i++) { reg = reg_saved[i]; if (regs_allocated[reg]) { g(0x50 + reg); } } /* generate load code */ for(i = 0; i < nb_operands; i++) { op = &operands[i]; if (op->reg >= 0) { if ((op->vt->r & VT_VALMASK) == VT_LLOCAL && op->is_memory) { /* memory reference case (for both input and output cases) */ SValue sv; sv = *op->vt; sv.r = (sv.r & ~VT_VALMASK) | VT_LOCAL | VT_LVAL; sv.type.t = VT_PTR; load(op->reg, &sv); } else if (i >= nb_outputs || op->is_rw) { /* load value in register */ load(op->reg, op->vt); if (op->is_llong) { SValue sv; sv = *op->vt; sv.c.i += 4; load(TREG_XDX, &sv); } } } } } else { /* generate save code */ for(i = 0 ; i < nb_outputs; i++) { op = &operands[i]; if (op->reg >= 0) { if ((op->vt->r & VT_VALMASK) == VT_LLOCAL) { if (!op->is_memory) { SValue sv; sv = *op->vt; sv.r = (sv.r & ~VT_VALMASK) | VT_LOCAL; load(out_reg, &sv); sv.r = (sv.r & ~VT_VALMASK) | out_reg; store(op->reg, &sv); } } else { store(op->reg, op->vt); if (op->is_llong) { SValue sv; sv = *op->vt; sv.c.i += 4; store(TREG_XDX, &sv); } } } } /* generate reg restore code */ for(i = NB_SAVED_REGS - 1; i >= 0; i--) { reg = reg_saved[i]; if (regs_allocated[reg]) { g(0x58 + reg); } } } } ST_FUNC void asm_clobber(uint8_t *clobber_regs, const char *str) { int reg; TokenSym *ts; if (!strcmp(str, "memory") || !strcmp(str, "cc")) return; ts = tok_alloc(str, strlen(str)); reg = ts->tok; if (reg >= TOK_ASM_eax && reg <= TOK_ASM_edi) { reg -= TOK_ASM_eax; } else if (reg >= TOK_ASM_ax && reg <= TOK_ASM_di) { reg -= TOK_ASM_ax; #ifdef TCC_TARGET_X86_64 } else if (reg >= TOK_ASM_rax && reg <= TOK_ASM_rdi) { reg -= TOK_ASM_rax; } else if (1 && str[0] == 'r' && (((str[1] == '8' || str[1] == '9') && str[2] == 0) || (str[1] == '1' && str[2] >= '0' && str[2] <= '5' && str[3] == 0))) { /* Do nothing for now. We can't parse the high registers. */ goto end; #endif } else { tcc_error("invalid clobber register '%s'", str); } clobber_regs[reg] = 1; end:; }