From 8e1c85e37277be8e0f1e273297a1dc19eb898a01 Mon Sep 17 00:00:00 2001 From: bellard Date: Wed, 21 May 2008 19:16:45 +0000 Subject: [PATCH] converted conditional jumps, SET and CMOVx to TCG git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@4518 c046a42c-6fe2-441c-8c8c-71466251a162 --- target-i386/op.c | 222 ---------- target-i386/opreg_template.h | 50 --- target-i386/ops_template.h | 176 -------- target-i386/translate.c | 813 ++++++++++++++++++----------------- 4 files changed, 421 insertions(+), 840 deletions(-) delete mode 100644 target-i386/op.c delete mode 100644 target-i386/opreg_template.h delete mode 100644 target-i386/ops_template.h diff --git a/target-i386/op.c b/target-i386/op.c deleted file mode 100644 index 392f06e013..0000000000 --- a/target-i386/op.c +++ /dev/null @@ -1,222 +0,0 @@ -/* - * i386 micro operations - * - * Copyright (c) 2003 Fabrice Bellard - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#define ASM_SOFTMMU -#include "exec.h" - -/* we define the various pieces of code used by the JIT */ - -#define REG EAX -#define REGNAME _EAX -#include "opreg_template.h" -#undef REG -#undef REGNAME - -#define REG ECX -#define REGNAME _ECX -#include "opreg_template.h" -#undef REG -#undef REGNAME - -#define REG EDX -#define REGNAME _EDX -#include "opreg_template.h" -#undef REG -#undef REGNAME - -#define REG EBX -#define REGNAME _EBX -#include "opreg_template.h" -#undef REG -#undef REGNAME - -#define REG ESP -#define REGNAME _ESP -#include "opreg_template.h" -#undef REG -#undef REGNAME - -#define REG EBP -#define REGNAME _EBP -#include "opreg_template.h" -#undef REG -#undef REGNAME - -#define REG ESI -#define REGNAME _ESI -#include "opreg_template.h" -#undef REG -#undef REGNAME - -#define REG EDI -#define REGNAME _EDI -#include "opreg_template.h" -#undef REG -#undef REGNAME - -#ifdef TARGET_X86_64 - -#define REG (env->regs[8]) -#define REGNAME _R8 -#include "opreg_template.h" -#undef REG -#undef REGNAME - -#define REG (env->regs[9]) -#define REGNAME _R9 -#include "opreg_template.h" -#undef REG -#undef REGNAME - -#define REG (env->regs[10]) -#define REGNAME _R10 -#include "opreg_template.h" -#undef REG -#undef REGNAME - -#define REG (env->regs[11]) -#define REGNAME _R11 -#include "opreg_template.h" -#undef REG -#undef REGNAME - -#define REG (env->regs[12]) -#define REGNAME _R12 -#include "opreg_template.h" -#undef REG -#undef REGNAME - -#define REG (env->regs[13]) -#define REGNAME _R13 -#include "opreg_template.h" -#undef REG -#undef REGNAME - -#define REG (env->regs[14]) -#define REGNAME _R14 -#include "opreg_template.h" -#undef REG -#undef REGNAME - -#define REG (env->regs[15]) -#define REGNAME _R15 -#include "opreg_template.h" -#undef REG -#undef REGNAME - -#endif - -/* multiple size ops */ - -#define ldul ldl - -#define SHIFT 0 -#include "ops_template.h" -#undef SHIFT - -#define SHIFT 1 -#include "ops_template.h" -#undef SHIFT - -#define SHIFT 2 -#include "ops_template.h" -#undef SHIFT - -#ifdef TARGET_X86_64 - -#define SHIFT 3 -#include "ops_template.h" -#undef SHIFT - -#endif - -/* flags handling */ - -void OPPROTO op_jmp_label(void) -{ - GOTO_LABEL_PARAM(1); -} - -void OPPROTO op_jnz_T0_label(void) -{ - if (T0) - GOTO_LABEL_PARAM(1); - FORCE_RET(); -} - -/* slow set cases (compute x86 flags) */ -void OPPROTO op_seto_T0_cc(void) -{ - int eflags; - eflags = cc_table[CC_OP].compute_all(); - T0 = (eflags >> 11) & 1; -} - -void OPPROTO op_setb_T0_cc(void) -{ - T0 = cc_table[CC_OP].compute_c(); -} - -void OPPROTO op_setz_T0_cc(void) -{ - int eflags; - eflags = cc_table[CC_OP].compute_all(); - T0 = (eflags >> 6) & 1; -} - -void OPPROTO op_setbe_T0_cc(void) -{ - int eflags; - eflags = cc_table[CC_OP].compute_all(); - T0 = (eflags & (CC_Z | CC_C)) != 0; -} - -void OPPROTO op_sets_T0_cc(void) -{ - int eflags; - eflags = cc_table[CC_OP].compute_all(); - T0 = (eflags >> 7) & 1; -} - -void OPPROTO op_setp_T0_cc(void) -{ - int eflags; - eflags = cc_table[CC_OP].compute_all(); - T0 = (eflags >> 2) & 1; -} - -void OPPROTO op_setl_T0_cc(void) -{ - int eflags; - eflags = cc_table[CC_OP].compute_all(); - T0 = ((eflags ^ (eflags >> 4)) >> 7) & 1; -} - -void OPPROTO op_setle_T0_cc(void) -{ - int eflags; - eflags = cc_table[CC_OP].compute_all(); - T0 = (((eflags ^ (eflags >> 4)) & 0x80) || (eflags & CC_Z)) != 0; -} - -void OPPROTO op_xor_T0_1(void) -{ - T0 ^= 1; -} diff --git a/target-i386/opreg_template.h b/target-i386/opreg_template.h deleted file mode 100644 index 67bee8804a..0000000000 --- a/target-i386/opreg_template.h +++ /dev/null @@ -1,50 +0,0 @@ -/* - * i386 micro operations (templates for various register related - * operations) - * - * Copyright (c) 2003 Fabrice Bellard - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -/* mov T1 to REG if T0 is true */ -void OPPROTO glue(glue(op_cmovw,REGNAME),_T1_T0)(void) -{ - if (T0) - REG = (REG & ~0xffff) | (T1 & 0xffff); - FORCE_RET(); -} - -void OPPROTO glue(glue(op_cmovl,REGNAME),_T1_T0)(void) -{ -#ifdef TARGET_X86_64 - if (T0) - REG = (uint32_t)T1; - else - REG = (uint32_t)REG; -#else - if (T0) - REG = (uint32_t)T1; -#endif - FORCE_RET(); -} - -#ifdef TARGET_X86_64 -void OPPROTO glue(glue(op_cmovq,REGNAME),_T1_T0)(void) -{ - if (T0) - REG = T1; - FORCE_RET(); -} -#endif diff --git a/target-i386/ops_template.h b/target-i386/ops_template.h deleted file mode 100644 index f271e7ed20..0000000000 --- a/target-i386/ops_template.h +++ /dev/null @@ -1,176 +0,0 @@ -/* - * i386 micro operations (included several times to generate - * different operand sizes) - * - * Copyright (c) 2003 Fabrice Bellard - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -#define DATA_BITS (1 << (3 + SHIFT)) -#define SHIFT_MASK (DATA_BITS - 1) -#define SIGN_MASK (((target_ulong)1) << (DATA_BITS - 1)) -#if DATA_BITS <= 32 -#define SHIFT1_MASK 0x1f -#else -#define SHIFT1_MASK 0x3f -#endif - -#if DATA_BITS == 8 -#define SUFFIX b -#define DATA_TYPE uint8_t -#define DATA_STYPE int8_t -#define DATA_MASK 0xff -#elif DATA_BITS == 16 -#define SUFFIX w -#define DATA_TYPE uint16_t -#define DATA_STYPE int16_t -#define DATA_MASK 0xffff -#elif DATA_BITS == 32 -#define SUFFIX l -#define DATA_TYPE uint32_t -#define DATA_STYPE int32_t -#define DATA_MASK 0xffffffff -#elif DATA_BITS == 64 -#define SUFFIX q -#define DATA_TYPE uint64_t -#define DATA_STYPE int64_t -#define DATA_MASK 0xffffffffffffffffULL -#else -#error unhandled operand size -#endif - -/* various optimized jumps cases */ - -void OPPROTO glue(op_jb_sub, SUFFIX)(void) -{ - target_long src1, src2; - src1 = CC_DST + CC_SRC; - src2 = CC_SRC; - - if ((DATA_TYPE)src1 < (DATA_TYPE)src2) - GOTO_LABEL_PARAM(1); - FORCE_RET(); -} - -void OPPROTO glue(op_jz_sub, SUFFIX)(void) -{ - if ((DATA_TYPE)CC_DST == 0) - GOTO_LABEL_PARAM(1); - FORCE_RET(); -} - -void OPPROTO glue(op_jnz_sub, SUFFIX)(void) -{ - if ((DATA_TYPE)CC_DST != 0) - GOTO_LABEL_PARAM(1); - FORCE_RET(); -} - -void OPPROTO glue(op_jbe_sub, SUFFIX)(void) -{ - target_long src1, src2; - src1 = CC_DST + CC_SRC; - src2 = CC_SRC; - - if ((DATA_TYPE)src1 <= (DATA_TYPE)src2) - GOTO_LABEL_PARAM(1); - FORCE_RET(); -} - -void OPPROTO glue(op_js_sub, SUFFIX)(void) -{ - if (CC_DST & SIGN_MASK) - GOTO_LABEL_PARAM(1); - FORCE_RET(); -} - -void OPPROTO glue(op_jl_sub, SUFFIX)(void) -{ - target_long src1, src2; - src1 = CC_DST + CC_SRC; - src2 = CC_SRC; - - if ((DATA_STYPE)src1 < (DATA_STYPE)src2) - GOTO_LABEL_PARAM(1); - FORCE_RET(); -} - -void OPPROTO glue(op_jle_sub, SUFFIX)(void) -{ - target_long src1, src2; - src1 = CC_DST + CC_SRC; - src2 = CC_SRC; - - if ((DATA_STYPE)src1 <= (DATA_STYPE)src2) - GOTO_LABEL_PARAM(1); - FORCE_RET(); -} - -/* various optimized set cases */ - -void OPPROTO glue(op_setb_T0_sub, SUFFIX)(void) -{ - target_long src1, src2; - src1 = CC_DST + CC_SRC; - src2 = CC_SRC; - - T0 = ((DATA_TYPE)src1 < (DATA_TYPE)src2); -} - -void OPPROTO glue(op_setz_T0_sub, SUFFIX)(void) -{ - T0 = ((DATA_TYPE)CC_DST == 0); -} - -void OPPROTO glue(op_setbe_T0_sub, SUFFIX)(void) -{ - target_long src1, src2; - src1 = CC_DST + CC_SRC; - src2 = CC_SRC; - - T0 = ((DATA_TYPE)src1 <= (DATA_TYPE)src2); -} - -void OPPROTO glue(op_sets_T0_sub, SUFFIX)(void) -{ - T0 = lshift(CC_DST, -(DATA_BITS - 1)) & 1; -} - -void OPPROTO glue(op_setl_T0_sub, SUFFIX)(void) -{ - target_long src1, src2; - src1 = CC_DST + CC_SRC; - src2 = CC_SRC; - - T0 = ((DATA_STYPE)src1 < (DATA_STYPE)src2); -} - -void OPPROTO glue(op_setle_T0_sub, SUFFIX)(void) -{ - target_long src1, src2; - src1 = CC_DST + CC_SRC; - src2 = CC_SRC; - - T0 = ((DATA_STYPE)src1 <= (DATA_STYPE)src2); -} - -#undef DATA_BITS -#undef SHIFT_MASK -#undef SHIFT1_MASK -#undef SIGN_MASK -#undef DATA_TYPE -#undef DATA_STYPE -#undef DATA_MASK -#undef SUFFIX diff --git a/target-i386/translate.c b/target-i386/translate.c index 4da839cc06..c668722349 100644 --- a/target-i386/translate.c +++ b/target-i386/translate.c @@ -133,6 +133,17 @@ enum { OP_SAR = 7, }; +enum { + JCC_O, + JCC_B, + JCC_Z, + JCC_BE, + JCC_S, + JCC_P, + JCC_L, + JCC_LE, +}; + /* operand size */ enum { OT_BYTE = 0, @@ -228,38 +239,10 @@ static inline void gen_op_andl_A0_ffff(void) #define NB_OP_SIZES 4 -#define DEF_REGS(prefix, suffix) \ - prefix ## EAX ## suffix,\ - prefix ## ECX ## suffix,\ - prefix ## EDX ## suffix,\ - prefix ## EBX ## suffix,\ - prefix ## ESP ## suffix,\ - prefix ## EBP ## suffix,\ - prefix ## ESI ## suffix,\ - prefix ## EDI ## suffix,\ - prefix ## R8 ## suffix,\ - prefix ## R9 ## suffix,\ - prefix ## R10 ## suffix,\ - prefix ## R11 ## suffix,\ - prefix ## R12 ## suffix,\ - prefix ## R13 ## suffix,\ - prefix ## R14 ## suffix,\ - prefix ## R15 ## suffix, - #else /* !TARGET_X86_64 */ #define NB_OP_SIZES 3 -#define DEF_REGS(prefix, suffix) \ - prefix ## EAX ## suffix,\ - prefix ## ECX ## suffix,\ - prefix ## EDX ## suffix,\ - prefix ## EBX ## suffix,\ - prefix ## ESP ## suffix,\ - prefix ## EBP ## suffix,\ - prefix ## ESI ## suffix,\ - prefix ## EDI ## suffix, - #endif /* !TARGET_X86_64 */ #if defined(WORDS_BIGENDIAN) @@ -510,20 +493,6 @@ static inline void gen_op_addq_A0_reg_sN(int shift, int reg) } #endif -static GenOpFunc *gen_op_cmov_reg_T1_T0[NB_OP_SIZES - 1][CPU_NB_REGS] = { - [0] = { - DEF_REGS(gen_op_cmovw_, _T1_T0) - }, - [1] = { - DEF_REGS(gen_op_cmovl_, _T1_T0) - }, -#ifdef TARGET_X86_64 - [2] = { - DEF_REGS(gen_op_cmovq_, _T1_T0) - }, -#endif -}; - static inline void gen_op_lds_T0_A0(int idx) { int mem_index = (idx >> 2) - 1; @@ -743,21 +712,6 @@ static inline void gen_op_jz_ecx(int size, int label1) tcg_gen_brcond_tl(TCG_COND_EQ, cpu_tmp0, tcg_const_tl(0), label1); } -static GenOpFunc1 *gen_op_string_jnz_sub[2][4] = { - { - gen_op_jnz_subb, - gen_op_jnz_subw, - gen_op_jnz_subl, - X86_64_ONLY(gen_op_jnz_subq), - }, - { - gen_op_jz_subb, - gen_op_jz_subw, - gen_op_jz_subl, - X86_64_ONLY(gen_op_jz_subq), - }, -}; - static void *helper_in_func[3] = { helper_inb, helper_inw, @@ -858,6 +812,352 @@ static void gen_op_update_neg_cc(void) tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); } +/* compute eflags.C to reg */ +static void gen_compute_eflags_c(TCGv reg) +{ +#if TCG_TARGET_REG_BITS == 32 + tcg_gen_shli_i32(cpu_tmp2_i32, cpu_cc_op, 3); + tcg_gen_addi_i32(cpu_tmp2_i32, cpu_tmp2_i32, + (long)cc_table + offsetof(CCTable, compute_c)); + tcg_gen_ld_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0); + tcg_gen_call(&tcg_ctx, cpu_tmp2_i32, TCG_CALL_PURE, + 1, &cpu_tmp2_i32, 0, NULL); +#else + tcg_gen_extu_i32_tl(cpu_tmp1_i64, cpu_cc_op); + tcg_gen_shli_i64(cpu_tmp1_i64, cpu_tmp1_i64, 4); + tcg_gen_addi_i64(cpu_tmp1_i64, cpu_tmp1_i64, + (long)cc_table + offsetof(CCTable, compute_c)); + tcg_gen_ld_i64(cpu_tmp1_i64, cpu_tmp1_i64, 0); + tcg_gen_call(&tcg_ctx, cpu_tmp1_i64, TCG_CALL_PURE, + 1, &cpu_tmp2_i32, 0, NULL); +#endif + tcg_gen_extu_i32_tl(reg, cpu_tmp2_i32); +} + +/* compute all eflags to cc_src */ +static void gen_compute_eflags(TCGv reg) +{ +#if TCG_TARGET_REG_BITS == 32 + tcg_gen_shli_i32(cpu_tmp2_i32, cpu_cc_op, 3); + tcg_gen_addi_i32(cpu_tmp2_i32, cpu_tmp2_i32, + (long)cc_table + offsetof(CCTable, compute_all)); + tcg_gen_ld_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0); + tcg_gen_call(&tcg_ctx, cpu_tmp2_i32, TCG_CALL_PURE, + 1, &cpu_tmp2_i32, 0, NULL); +#else + tcg_gen_extu_i32_tl(cpu_tmp1_i64, cpu_cc_op); + tcg_gen_shli_i64(cpu_tmp1_i64, cpu_tmp1_i64, 4); + tcg_gen_addi_i64(cpu_tmp1_i64, cpu_tmp1_i64, + (long)cc_table + offsetof(CCTable, compute_all)); + tcg_gen_ld_i64(cpu_tmp1_i64, cpu_tmp1_i64, 0); + tcg_gen_call(&tcg_ctx, cpu_tmp1_i64, TCG_CALL_PURE, + 1, &cpu_tmp2_i32, 0, NULL); +#endif + tcg_gen_extu_i32_tl(reg, cpu_tmp2_i32); +} + +static inline void gen_setcc_slow_T0(int op) +{ + switch(op) { + case JCC_O: + gen_compute_eflags(cpu_T[0]); + tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 11); + tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1); + break; + case JCC_B: + gen_compute_eflags_c(cpu_T[0]); + break; + case JCC_Z: + gen_compute_eflags(cpu_T[0]); + tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 6); + tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1); + break; + case JCC_BE: + gen_compute_eflags(cpu_tmp0); + tcg_gen_shri_tl(cpu_T[0], cpu_tmp0, 6); + tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_tmp0); + tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1); + break; + case JCC_S: + gen_compute_eflags(cpu_T[0]); + tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 7); + tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1); + break; + case JCC_P: + gen_compute_eflags(cpu_T[0]); + tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 2); + tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1); + break; + case JCC_L: + gen_compute_eflags(cpu_tmp0); + tcg_gen_shri_tl(cpu_T[0], cpu_tmp0, 11); /* CC_O */ + tcg_gen_shri_tl(cpu_tmp0, cpu_tmp0, 7); /* CC_S */ + tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_tmp0); + tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1); + break; + default: + case JCC_LE: + gen_compute_eflags(cpu_tmp0); + tcg_gen_shri_tl(cpu_T[0], cpu_tmp0, 11); /* CC_O */ + tcg_gen_shri_tl(cpu_tmp4, cpu_tmp0, 7); /* CC_S */ + tcg_gen_shri_tl(cpu_tmp0, cpu_tmp0, 6); /* CC_Z */ + tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_tmp4); + tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_tmp0); + tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1); + break; + } +} + +/* return true if setcc_slow is not needed (WARNING: must be kept in + sync with gen_jcc1) */ +static int is_fast_jcc_case(DisasContext *s, int b) +{ + int jcc_op; + jcc_op = (b >> 1) & 7; + switch(s->cc_op) { + /* we optimize the cmp/jcc case */ + case CC_OP_SUBB: + case CC_OP_SUBW: + case CC_OP_SUBL: + case CC_OP_SUBQ: + if (jcc_op == JCC_O || jcc_op == JCC_P) + goto slow_jcc; + break; + + /* some jumps are easy to compute */ + case CC_OP_ADDB: + case CC_OP_ADDW: + case CC_OP_ADDL: + case CC_OP_ADDQ: + + case CC_OP_LOGICB: + case CC_OP_LOGICW: + case CC_OP_LOGICL: + case CC_OP_LOGICQ: + + case CC_OP_INCB: + case CC_OP_INCW: + case CC_OP_INCL: + case CC_OP_INCQ: + + case CC_OP_DECB: + case CC_OP_DECW: + case CC_OP_DECL: + case CC_OP_DECQ: + + case CC_OP_SHLB: + case CC_OP_SHLW: + case CC_OP_SHLL: + case CC_OP_SHLQ: + if (jcc_op != JCC_Z && jcc_op != JCC_S) + goto slow_jcc; + break; + default: + slow_jcc: + return 0; + } + return 1; +} + +/* generate a conditional jump to label 'l1' according to jump opcode + value 'b'. In the fast case, T0 is guaranted not to be used. */ +static inline void gen_jcc1(DisasContext *s, int cc_op, int b, int l1) +{ + int inv, jcc_op, size, cond; + TCGv t0; + + inv = b & 1; + jcc_op = (b >> 1) & 7; + + switch(cc_op) { + /* we optimize the cmp/jcc case */ + case CC_OP_SUBB: + case CC_OP_SUBW: + case CC_OP_SUBL: + case CC_OP_SUBQ: + + size = cc_op - CC_OP_SUBB; + switch(jcc_op) { + case JCC_Z: + fast_jcc_z: + switch(size) { + case 0: + tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0xff); + t0 = cpu_tmp0; + break; + case 1: + tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0xffff); + t0 = cpu_tmp0; + break; +#ifdef TARGET_X86_64 + case 2: + tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0xffffffff); + t0 = cpu_tmp0; + break; +#endif + default: + t0 = cpu_cc_dst; + break; + } + tcg_gen_brcond_tl(inv ? TCG_COND_NE : TCG_COND_EQ, t0, + tcg_const_tl(0), l1); + break; + case JCC_S: + fast_jcc_s: + switch(size) { + case 0: + tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0x80); + tcg_gen_brcond_tl(inv ? TCG_COND_EQ : TCG_COND_NE, cpu_tmp0, + tcg_const_tl(0), l1); + break; + case 1: + tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0x8000); + tcg_gen_brcond_tl(inv ? TCG_COND_EQ : TCG_COND_NE, cpu_tmp0, + tcg_const_tl(0), l1); + break; +#ifdef TARGET_X86_64 + case 2: + tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0x80000000); + tcg_gen_brcond_tl(inv ? TCG_COND_EQ : TCG_COND_NE, cpu_tmp0, + tcg_const_tl(0), l1); + break; +#endif + default: + tcg_gen_brcond_tl(inv ? TCG_COND_GE : TCG_COND_LT, cpu_cc_dst, + tcg_const_tl(0), l1); + break; + } + break; + + case JCC_B: + cond = inv ? TCG_COND_GEU : TCG_COND_LTU; + goto fast_jcc_b; + case JCC_BE: + cond = inv ? TCG_COND_GTU : TCG_COND_LEU; + fast_jcc_b: + tcg_gen_add_tl(cpu_tmp4, cpu_cc_dst, cpu_cc_src); + switch(size) { + case 0: + t0 = cpu_tmp0; + tcg_gen_andi_tl(cpu_tmp4, cpu_tmp4, 0xff); + tcg_gen_andi_tl(t0, cpu_cc_src, 0xff); + break; + case 1: + t0 = cpu_tmp0; + tcg_gen_andi_tl(cpu_tmp4, cpu_tmp4, 0xffff); + tcg_gen_andi_tl(t0, cpu_cc_src, 0xffff); + break; +#ifdef TARGET_X86_64 + case 2: + t0 = cpu_tmp0; + tcg_gen_andi_tl(cpu_tmp4, cpu_tmp4, 0xffffffff); + tcg_gen_andi_tl(t0, cpu_cc_src, 0xffffffff); + break; +#endif + default: + t0 = cpu_cc_src; + break; + } + tcg_gen_brcond_tl(cond, cpu_tmp4, t0, l1); + break; + + case JCC_L: + cond = inv ? TCG_COND_GE : TCG_COND_LT; + goto fast_jcc_l; + case JCC_LE: + cond = inv ? TCG_COND_GT : TCG_COND_LE; + fast_jcc_l: + tcg_gen_add_tl(cpu_tmp4, cpu_cc_dst, cpu_cc_src); + switch(size) { + case 0: + t0 = cpu_tmp0; + tcg_gen_ext8s_tl(cpu_tmp4, cpu_tmp4); + tcg_gen_ext8s_tl(t0, cpu_cc_src); + break; + case 1: + t0 = cpu_tmp0; + tcg_gen_ext16s_tl(cpu_tmp4, cpu_tmp4); + tcg_gen_ext16s_tl(t0, cpu_cc_src); + break; +#ifdef TARGET_X86_64 + case 2: + t0 = cpu_tmp0; + tcg_gen_ext32s_tl(cpu_tmp4, cpu_tmp4); + tcg_gen_ext32s_tl(t0, cpu_cc_src); + break; +#endif + default: + t0 = cpu_cc_src; + break; + } + tcg_gen_brcond_tl(cond, cpu_tmp4, t0, l1); + break; + + default: + goto slow_jcc; + } + break; + + /* some jumps are easy to compute */ + case CC_OP_ADDB: + case CC_OP_ADDW: + case CC_OP_ADDL: + case CC_OP_ADDQ: + + case CC_OP_ADCB: + case CC_OP_ADCW: + case CC_OP_ADCL: + case CC_OP_ADCQ: + + case CC_OP_SBBB: + case CC_OP_SBBW: + case CC_OP_SBBL: + case CC_OP_SBBQ: + + case CC_OP_LOGICB: + case CC_OP_LOGICW: + case CC_OP_LOGICL: + case CC_OP_LOGICQ: + + case CC_OP_INCB: + case CC_OP_INCW: + case CC_OP_INCL: + case CC_OP_INCQ: + + case CC_OP_DECB: + case CC_OP_DECW: + case CC_OP_DECL: + case CC_OP_DECQ: + + case CC_OP_SHLB: + case CC_OP_SHLW: + case CC_OP_SHLL: + case CC_OP_SHLQ: + + case CC_OP_SARB: + case CC_OP_SARW: + case CC_OP_SARL: + case CC_OP_SARQ: + switch(jcc_op) { + case JCC_Z: + size = (cc_op - CC_OP_ADDB) & 3; + goto fast_jcc_z; + case JCC_S: + size = (cc_op - CC_OP_ADDB) & 3; + goto fast_jcc_s; + default: + goto slow_jcc; + } + break; + default: + slow_jcc: + gen_setcc_slow_T0(jcc_op); + tcg_gen_brcond_tl(inv ? TCG_COND_EQ : TCG_COND_NE, + cpu_T[0], tcg_const_tl(0), l1); + break; + } +} + /* XXX: does not work with gdbstub "ice" single step - not a serious problem */ static int gen_jz_ecx_string(DisasContext *s, target_ulong next_eip) @@ -974,7 +1274,7 @@ static inline void gen_repz_ ## op(DisasContext *s, int ot, \ gen_ ## op(s, ot); \ gen_op_add_reg_im(s->aflag, R_ECX, -1); \ gen_op_set_cc_op(CC_OP_SUBB + ot); \ - gen_op_string_jnz_sub[nz][ot](l2);\ + gen_jcc1(s, CC_OP_SUBB + ot, (JCC_Z << 1) | (nz ^ 1), l2); \ if (!s->jmp_opt) \ gen_op_jz_ecx(s->aflag, l2); \ gen_jmp(s, cur_eip); \ @@ -988,118 +1288,6 @@ GEN_REPZ(outs) GEN_REPZ2(scas) GEN_REPZ2(cmps) -enum { - JCC_O, - JCC_B, - JCC_Z, - JCC_BE, - JCC_S, - JCC_P, - JCC_L, - JCC_LE, -}; - -static GenOpFunc1 *gen_jcc_sub[4][8] = { - [OT_BYTE] = { - NULL, - gen_op_jb_subb, - gen_op_jz_subb, - gen_op_jbe_subb, - gen_op_js_subb, - NULL, - gen_op_jl_subb, - gen_op_jle_subb, - }, - [OT_WORD] = { - NULL, - gen_op_jb_subw, - gen_op_jz_subw, - gen_op_jbe_subw, - gen_op_js_subw, - NULL, - gen_op_jl_subw, - gen_op_jle_subw, - }, - [OT_LONG] = { - NULL, - gen_op_jb_subl, - gen_op_jz_subl, - gen_op_jbe_subl, - gen_op_js_subl, - NULL, - gen_op_jl_subl, - gen_op_jle_subl, - }, -#ifdef TARGET_X86_64 - [OT_QUAD] = { - NULL, - BUGGY_64(gen_op_jb_subq), - gen_op_jz_subq, - BUGGY_64(gen_op_jbe_subq), - gen_op_js_subq, - NULL, - BUGGY_64(gen_op_jl_subq), - BUGGY_64(gen_op_jle_subq), - }, -#endif -}; - -static GenOpFunc *gen_setcc_slow[8] = { - gen_op_seto_T0_cc, - gen_op_setb_T0_cc, - gen_op_setz_T0_cc, - gen_op_setbe_T0_cc, - gen_op_sets_T0_cc, - gen_op_setp_T0_cc, - gen_op_setl_T0_cc, - gen_op_setle_T0_cc, -}; - -static GenOpFunc *gen_setcc_sub[4][8] = { - [OT_BYTE] = { - NULL, - gen_op_setb_T0_subb, - gen_op_setz_T0_subb, - gen_op_setbe_T0_subb, - gen_op_sets_T0_subb, - NULL, - gen_op_setl_T0_subb, - gen_op_setle_T0_subb, - }, - [OT_WORD] = { - NULL, - gen_op_setb_T0_subw, - gen_op_setz_T0_subw, - gen_op_setbe_T0_subw, - gen_op_sets_T0_subw, - NULL, - gen_op_setl_T0_subw, - gen_op_setle_T0_subw, - }, - [OT_LONG] = { - NULL, - gen_op_setb_T0_subl, - gen_op_setz_T0_subl, - gen_op_setbe_T0_subl, - gen_op_sets_T0_subl, - NULL, - gen_op_setl_T0_subl, - gen_op_setle_T0_subl, - }, -#ifdef TARGET_X86_64 - [OT_QUAD] = { - NULL, - gen_op_setb_T0_subq, - gen_op_setz_T0_subq, - gen_op_setbe_T0_subq, - gen_op_sets_T0_subq, - NULL, - gen_op_setl_T0_subq, - gen_op_setle_T0_subq, - }, -#endif -}; - static void *helper_fp_arith_ST0_FT0[8] = { helper_fadd_ST0_FT0, helper_fmul_ST0_FT0, @@ -1123,50 +1311,6 @@ static void *helper_fp_arith_STN_ST0[8] = { helper_fdiv_STN_ST0, }; -/* compute eflags.C to reg */ -static void gen_compute_eflags_c(TCGv reg) -{ -#if TCG_TARGET_REG_BITS == 32 - tcg_gen_shli_i32(cpu_tmp2_i32, cpu_cc_op, 3); - tcg_gen_addi_i32(cpu_tmp2_i32, cpu_tmp2_i32, - (long)cc_table + offsetof(CCTable, compute_c)); - tcg_gen_ld_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0); - tcg_gen_call(&tcg_ctx, cpu_tmp2_i32, TCG_CALL_PURE, - 1, &cpu_tmp2_i32, 0, NULL); -#else - tcg_gen_extu_i32_tl(cpu_tmp1_i64, cpu_cc_op); - tcg_gen_shli_i64(cpu_tmp1_i64, cpu_tmp1_i64, 4); - tcg_gen_addi_i64(cpu_tmp1_i64, cpu_tmp1_i64, - (long)cc_table + offsetof(CCTable, compute_c)); - tcg_gen_ld_i64(cpu_tmp1_i64, cpu_tmp1_i64, 0); - tcg_gen_call(&tcg_ctx, cpu_tmp1_i64, TCG_CALL_PURE, - 1, &cpu_tmp2_i32, 0, NULL); -#endif - tcg_gen_extu_i32_tl(reg, cpu_tmp2_i32); -} - -/* compute all eflags to cc_src */ -static void gen_compute_eflags(TCGv reg) -{ -#if TCG_TARGET_REG_BITS == 32 - tcg_gen_shli_i32(cpu_tmp2_i32, cpu_cc_op, 3); - tcg_gen_addi_i32(cpu_tmp2_i32, cpu_tmp2_i32, - (long)cc_table + offsetof(CCTable, compute_all)); - tcg_gen_ld_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0); - tcg_gen_call(&tcg_ctx, cpu_tmp2_i32, TCG_CALL_PURE, - 1, &cpu_tmp2_i32, 0, NULL); -#else - tcg_gen_extu_i32_tl(cpu_tmp1_i64, cpu_cc_op); - tcg_gen_shli_i64(cpu_tmp1_i64, cpu_tmp1_i64, 4); - tcg_gen_addi_i64(cpu_tmp1_i64, cpu_tmp1_i64, - (long)cc_table + offsetof(CCTable, compute_all)); - tcg_gen_ld_i64(cpu_tmp1_i64, cpu_tmp1_i64, 0); - tcg_gen_call(&tcg_ctx, cpu_tmp1_i64, TCG_CALL_PURE, - 1, &cpu_tmp2_i32, 0, NULL); -#endif - tcg_gen_extu_i32_tl(reg, cpu_tmp2_i32); -} - /* if d == OR_TMP0, it means memory operand (address in A0) */ static void gen_op(DisasContext *s1, int op, int ot, int d) { @@ -1974,125 +2118,31 @@ static inline void gen_goto_tb(DisasContext *s, int tb_num, target_ulong eip) static inline void gen_jcc(DisasContext *s, int b, target_ulong val, target_ulong next_eip) { - TranslationBlock *tb; - int inv, jcc_op; - GenOpFunc1 *func; - target_ulong tmp; - int l1, l2; - - inv = b & 1; - jcc_op = (b >> 1) & 7; + int l1, l2, cc_op; + cc_op = s->cc_op; + if (s->cc_op != CC_OP_DYNAMIC) { + gen_op_set_cc_op(s->cc_op); + s->cc_op = CC_OP_DYNAMIC; + } if (s->jmp_opt) { - switch(s->cc_op) { - /* we optimize the cmp/jcc case */ - case CC_OP_SUBB: - case CC_OP_SUBW: - case CC_OP_SUBL: - case CC_OP_SUBQ: - func = gen_jcc_sub[s->cc_op - CC_OP_SUBB][jcc_op]; - break; - - /* some jumps are easy to compute */ - case CC_OP_ADDB: - case CC_OP_ADDW: - case CC_OP_ADDL: - case CC_OP_ADDQ: - - case CC_OP_ADCB: - case CC_OP_ADCW: - case CC_OP_ADCL: - case CC_OP_ADCQ: - - case CC_OP_SBBB: - case CC_OP_SBBW: - case CC_OP_SBBL: - case CC_OP_SBBQ: - - case CC_OP_LOGICB: - case CC_OP_LOGICW: - case CC_OP_LOGICL: - case CC_OP_LOGICQ: - - case CC_OP_INCB: - case CC_OP_INCW: - case CC_OP_INCL: - case CC_OP_INCQ: - - case CC_OP_DECB: - case CC_OP_DECW: - case CC_OP_DECL: - case CC_OP_DECQ: - - case CC_OP_SHLB: - case CC_OP_SHLW: - case CC_OP_SHLL: - case CC_OP_SHLQ: - - case CC_OP_SARB: - case CC_OP_SARW: - case CC_OP_SARL: - case CC_OP_SARQ: - switch(jcc_op) { - case JCC_Z: - func = gen_jcc_sub[(s->cc_op - CC_OP_ADDB) % 4][jcc_op]; - break; - case JCC_S: - func = gen_jcc_sub[(s->cc_op - CC_OP_ADDB) % 4][jcc_op]; - break; - default: - func = NULL; - break; - } - break; - default: - func = NULL; - break; - } - - if (s->cc_op != CC_OP_DYNAMIC) { - gen_op_set_cc_op(s->cc_op); - s->cc_op = CC_OP_DYNAMIC; - } - - if (!func) { - gen_setcc_slow[jcc_op](); - func = gen_op_jnz_T0_label; - } - - if (inv) { - tmp = val; - val = next_eip; - next_eip = tmp; - } - tb = s->tb; - l1 = gen_new_label(); - func(l1); - + gen_jcc1(s, cc_op, b, l1); + gen_goto_tb(s, 0, next_eip); gen_set_label(l1); gen_goto_tb(s, 1, val); - s->is_jmp = 3; } else { - if (s->cc_op != CC_OP_DYNAMIC) { - gen_op_set_cc_op(s->cc_op); - s->cc_op = CC_OP_DYNAMIC; - } - gen_setcc_slow[jcc_op](); - if (inv) { - tmp = val; - val = next_eip; - next_eip = tmp; - } l1 = gen_new_label(); l2 = gen_new_label(); - gen_op_jnz_T0_label(l1); + gen_jcc1(s, cc_op, b, l1); + gen_jmp_im(next_eip); - gen_op_jmp_label(l2); + tcg_gen_br(l2); + gen_set_label(l1); gen_jmp_im(val); gen_set_label(l2); @@ -2102,68 +2152,27 @@ static inline void gen_jcc(DisasContext *s, int b, static void gen_setcc(DisasContext *s, int b) { - int inv, jcc_op; - GenOpFunc *func; + int inv, jcc_op, l1; - inv = b & 1; - jcc_op = (b >> 1) & 7; - switch(s->cc_op) { - /* we optimize the cmp/jcc case */ - case CC_OP_SUBB: - case CC_OP_SUBW: - case CC_OP_SUBL: - case CC_OP_SUBQ: - func = gen_setcc_sub[s->cc_op - CC_OP_SUBB][jcc_op]; - if (!func) - goto slow_jcc; - break; - - /* some jumps are easy to compute */ - case CC_OP_ADDB: - case CC_OP_ADDW: - case CC_OP_ADDL: - case CC_OP_ADDQ: - - case CC_OP_LOGICB: - case CC_OP_LOGICW: - case CC_OP_LOGICL: - case CC_OP_LOGICQ: - - case CC_OP_INCB: - case CC_OP_INCW: - case CC_OP_INCL: - case CC_OP_INCQ: - - case CC_OP_DECB: - case CC_OP_DECW: - case CC_OP_DECL: - case CC_OP_DECQ: - - case CC_OP_SHLB: - case CC_OP_SHLW: - case CC_OP_SHLL: - case CC_OP_SHLQ: - switch(jcc_op) { - case JCC_Z: - func = gen_setcc_sub[(s->cc_op - CC_OP_ADDB) % 4][jcc_op]; - break; - case JCC_S: - func = gen_setcc_sub[(s->cc_op - CC_OP_ADDB) % 4][jcc_op]; - break; - default: - goto slow_jcc; - } - break; - default: - slow_jcc: + if (is_fast_jcc_case(s, b)) { + /* nominal case: we use a jump */ + tcg_gen_movi_tl(cpu_T[0], 0); + l1 = gen_new_label(); + gen_jcc1(s, s->cc_op, b ^ 1, l1); + tcg_gen_movi_tl(cpu_T[0], 1); + gen_set_label(l1); + } else { + /* slow case: it is more efficient not to generate a jump, + although it is questionnable whether this optimization is + worth to */ + inv = b & 1; + jcc_op = (b >> 1) & 7; if (s->cc_op != CC_OP_DYNAMIC) gen_op_set_cc_op(s->cc_op); - func = gen_setcc_slow[jcc_op]; - break; - } - func(); - if (inv) { - gen_op_xor_T0_1(); + gen_setcc_slow_T0(jcc_op); + if (inv) { + tcg_gen_xori_tl(cpu_T[0], cpu_T[0], 1); + } } } @@ -5708,19 +5717,39 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start) gen_ldst_modrm(s, modrm, OT_BYTE, OR_TMP0, 1); break; case 0x140 ... 0x14f: /* cmov Gv, Ev */ - ot = dflag + OT_WORD; - modrm = ldub_code(s->pc++); - reg = ((modrm >> 3) & 7) | rex_r; - mod = (modrm >> 6) & 3; - gen_setcc(s, b); - if (mod != 3) { - gen_lea_modrm(s, modrm, ®_addr, &offset_addr); - gen_op_ld_T1_A0(ot + s->mem_index); - } else { - rm = (modrm & 7) | REX_B(s); - gen_op_mov_TN_reg(ot, 1, rm); + { + int l1; + ot = dflag + OT_WORD; + modrm = ldub_code(s->pc++); + reg = ((modrm >> 3) & 7) | rex_r; + mod = (modrm >> 6) & 3; + if (mod != 3) { + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + gen_op_ld_T1_A0(ot + s->mem_index); + } else { + rm = (modrm & 7) | REX_B(s); + gen_op_mov_TN_reg(ot, 1, rm); + } + if (s->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s->cc_op); +#ifdef TARGET_X86_64 + if (ot == OT_LONG) { + /* XXX: specific Intel behaviour ? */ + l1 = gen_new_label(); + gen_jcc1(s, s->cc_op, b ^ 1, l1); + tcg_gen_st32_tl(cpu_T[1], cpu_env, offsetof(CPUState, regs[reg]) + REG_L_OFFSET); + gen_set_label(l1); + tcg_gen_movi_tl(cpu_tmp0, 0); + tcg_gen_st32_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]) + REG_LH_OFFSET); + } else +#endif + { + l1 = gen_new_label(); + gen_jcc1(s, s->cc_op, b ^ 1, l1); + gen_op_mov_reg_T1(ot, reg); + gen_set_label(l1); + } } - gen_op_cmov_reg_T1_T0[ot - OT_WORD][reg](); break; /************************/ @@ -6191,7 +6220,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start) gen_set_label(l3); gen_jmp_im(next_eip); - gen_op_jmp_label(l2); + tcg_gen_br(l2); gen_set_label(l1); gen_jmp_im(tval);