target/i386: convert LZCNT/TZCNT/BSF/BSR/POPCNT to new decoder
Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
This commit is contained in:
parent
6476902740
commit
11ffaf8c73
@ -450,6 +450,50 @@ static void decode_0F7F(DisasContext *s, CPUX86State *env, X86OpEntry *entry, ui
|
|||||||
*entry = *decode_by_prefix(s, opcodes_0F7F);
|
*entry = *decode_by_prefix(s, opcodes_0F7F);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void decode_0FB8(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
|
||||||
|
{
|
||||||
|
static const X86OpEntry popcnt =
|
||||||
|
X86_OP_ENTRYwr(POPCNT, G,v, E,v, cpuid(POPCNT) zextT0);
|
||||||
|
|
||||||
|
if (s->prefix & PREFIX_REPZ) {
|
||||||
|
*entry = popcnt;
|
||||||
|
} else {
|
||||||
|
memset(entry, 0, sizeof(*entry));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void decode_0FBC(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
|
||||||
|
{
|
||||||
|
/* For BSF, pass 2op as the third operand so that we can use zextT0 */
|
||||||
|
static const X86OpEntry opcodes_0FBC[4] = {
|
||||||
|
X86_OP_ENTRY3(BSF, G,v, E,v, 2op,v, zextT0),
|
||||||
|
X86_OP_ENTRY3(BSF, G,v, E,v, 2op,v, zextT0), /* 0x66 */
|
||||||
|
X86_OP_ENTRYwr(TZCNT, G,v, E,v, zextT0), /* 0xf3 */
|
||||||
|
X86_OP_ENTRY3(BSF, G,v, E,v, 2op,v, zextT0), /* 0xf2 */
|
||||||
|
};
|
||||||
|
if (!(s->cpuid_ext3_features & CPUID_EXT3_ABM)) {
|
||||||
|
*entry = opcodes_0FBC[0];
|
||||||
|
} else {
|
||||||
|
*entry = *decode_by_prefix(s, opcodes_0FBC);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void decode_0FBD(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
|
||||||
|
{
|
||||||
|
/* For BSR, pass 2op as the third operand so that we can use zextT0 */
|
||||||
|
static const X86OpEntry opcodes_0FBD[4] = {
|
||||||
|
X86_OP_ENTRY3(BSR, G,v, E,v, 2op,v, zextT0),
|
||||||
|
X86_OP_ENTRY3(BSR, G,v, E,v, 2op,v, zextT0), /* 0x66 */
|
||||||
|
X86_OP_ENTRYwr(LZCNT, G,v, E,v, zextT0), /* 0xf3 */
|
||||||
|
X86_OP_ENTRY3(BSR, G,v, E,v, 2op,v, zextT0), /* 0xf2 */
|
||||||
|
};
|
||||||
|
if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)) {
|
||||||
|
*entry = opcodes_0FBD[0];
|
||||||
|
} else {
|
||||||
|
*entry = *decode_by_prefix(s, opcodes_0FBD);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static void decode_0FD6(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
|
static void decode_0FD6(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
|
||||||
{
|
{
|
||||||
static const X86OpEntry movq[4] = {
|
static const X86OpEntry movq[4] = {
|
||||||
@ -1255,8 +1299,11 @@ static const X86OpEntry opcodes_0F[256] = {
|
|||||||
*/
|
*/
|
||||||
[0xaf] = X86_OP_ENTRY3(IMUL3, G,v, E,v, 2op,v, sextT0),
|
[0xaf] = X86_OP_ENTRY3(IMUL3, G,v, E,v, 2op,v, sextT0),
|
||||||
|
|
||||||
|
[0xb8] = X86_OP_GROUP0(0FB8),
|
||||||
/* decoded as modrm, which is visible as a difference between page fault and #UD */
|
/* decoded as modrm, which is visible as a difference between page fault and #UD */
|
||||||
[0xb9] = X86_OP_ENTRYr(UD, nop,v), /* UD1 */
|
[0xb9] = X86_OP_ENTRYr(UD, nop,v), /* UD1 */
|
||||||
|
[0xbc] = X86_OP_GROUP0(0FBC),
|
||||||
|
[0xbd] = X86_OP_GROUP0(0FBD),
|
||||||
[0xbe] = X86_OP_ENTRY3(MOV, G,v, E,b, None, None, sextT0), /* MOVSX */
|
[0xbe] = X86_OP_ENTRY3(MOV, G,v, E,b, None, None, sextT0), /* MOVSX */
|
||||||
[0xbf] = X86_OP_ENTRY3(MOV, G,v, E,w, None, None, sextT0), /* MOVSX */
|
[0xbf] = X86_OP_ENTRY3(MOV, G,v, E,w, None, None, sextT0), /* MOVSX */
|
||||||
|
|
||||||
@ -2158,6 +2205,8 @@ static bool has_cpuid_feature(DisasContext *s, X86CPUIDFeature cpuid)
|
|||||||
return (s->cpuid_ext_features & CPUID_EXT_MOVBE);
|
return (s->cpuid_ext_features & CPUID_EXT_MOVBE);
|
||||||
case X86_FEAT_PCLMULQDQ:
|
case X86_FEAT_PCLMULQDQ:
|
||||||
return (s->cpuid_ext_features & CPUID_EXT_PCLMULQDQ);
|
return (s->cpuid_ext_features & CPUID_EXT_PCLMULQDQ);
|
||||||
|
case X86_FEAT_POPCNT:
|
||||||
|
return (s->cpuid_ext_features & CPUID_EXT_POPCNT);
|
||||||
case X86_FEAT_SSE:
|
case X86_FEAT_SSE:
|
||||||
return (s->cpuid_features & CPUID_SSE);
|
return (s->cpuid_features & CPUID_SSE);
|
||||||
case X86_FEAT_SSE2:
|
case X86_FEAT_SSE2:
|
||||||
@ -2548,8 +2597,7 @@ static void disas_insn(DisasContext *s, CPUState *cpu)
|
|||||||
case 0xab: /* bts */
|
case 0xab: /* bts */
|
||||||
case 0xb0 ... 0xb1: /* cmpxchg */
|
case 0xb0 ... 0xb1: /* cmpxchg */
|
||||||
case 0xb3: /* btr */
|
case 0xb3: /* btr */
|
||||||
case 0xb8: /* integer ops */
|
case 0xba ... 0xbb: /* grp8, btc */
|
||||||
case 0xba ... 0xbd: /* integer ops */
|
|
||||||
case 0xc0 ... 0xc1: /* xadd */
|
case 0xc0 ... 0xc1: /* xadd */
|
||||||
case 0xc7: /* grp9 */
|
case 0xc7: /* grp9 */
|
||||||
disas_insn_old(s, cpu, b + 0x100);
|
disas_insn_old(s, cpu, b + 0x100);
|
||||||
|
@ -120,6 +120,7 @@ typedef enum X86CPUIDFeature {
|
|||||||
X86_FEAT_FXSR,
|
X86_FEAT_FXSR,
|
||||||
X86_FEAT_MOVBE,
|
X86_FEAT_MOVBE,
|
||||||
X86_FEAT_PCLMULQDQ,
|
X86_FEAT_PCLMULQDQ,
|
||||||
|
X86_FEAT_POPCNT,
|
||||||
X86_FEAT_SHA_NI,
|
X86_FEAT_SHA_NI,
|
||||||
X86_FEAT_SSE,
|
X86_FEAT_SSE,
|
||||||
X86_FEAT_SSE2,
|
X86_FEAT_SSE2,
|
||||||
|
@ -1333,6 +1333,47 @@ static void gen_BOUND(DisasContext *s, X86DecodedInsn *decode)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Non-standard convention - on entry T0 is zero-extended input, T1 is the output. */
|
||||||
|
static void gen_BSF(DisasContext *s, X86DecodedInsn *decode)
|
||||||
|
{
|
||||||
|
MemOp ot = decode->op[0].ot;
|
||||||
|
|
||||||
|
/* Only the Z bit is defined and it is related to the input. */
|
||||||
|
decode->cc_dst = tcg_temp_new();
|
||||||
|
decode->cc_op = CC_OP_LOGICB + ot;
|
||||||
|
tcg_gen_mov_tl(decode->cc_dst, s->T0);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The manual says that the output is undefined when the
|
||||||
|
* input is zero, but real hardware leaves it unchanged, and
|
||||||
|
* real programs appear to depend on that. Accomplish this
|
||||||
|
* by passing the output as the value to return upon zero.
|
||||||
|
*/
|
||||||
|
tcg_gen_ctz_tl(s->T0, s->T0, s->T1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Non-standard convention - on entry T0 is zero-extended input, T1 is the output. */
|
||||||
|
static void gen_BSR(DisasContext *s, X86DecodedInsn *decode)
|
||||||
|
{
|
||||||
|
MemOp ot = decode->op[0].ot;
|
||||||
|
|
||||||
|
/* Only the Z bit is defined and it is related to the input. */
|
||||||
|
decode->cc_dst = tcg_temp_new();
|
||||||
|
decode->cc_op = CC_OP_LOGICB + ot;
|
||||||
|
tcg_gen_mov_tl(decode->cc_dst, s->T0);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The manual says that the output is undefined when the
|
||||||
|
* input is zero, but real hardware leaves it unchanged, and
|
||||||
|
* real programs appear to depend on that. Accomplish this
|
||||||
|
* by passing the output as the value to return upon zero.
|
||||||
|
* Plus, return the bit index of the first 1 bit.
|
||||||
|
*/
|
||||||
|
tcg_gen_xori_tl(s->T1, s->T1, TARGET_LONG_BITS - 1);
|
||||||
|
tcg_gen_clz_tl(s->T0, s->T0, s->T1);
|
||||||
|
tcg_gen_xori_tl(s->T0, s->T0, TARGET_LONG_BITS - 1);
|
||||||
|
}
|
||||||
|
|
||||||
static void gen_BSWAP(DisasContext *s, X86DecodedInsn *decode)
|
static void gen_BSWAP(DisasContext *s, X86DecodedInsn *decode)
|
||||||
{
|
{
|
||||||
#ifdef TARGET_X86_64
|
#ifdef TARGET_X86_64
|
||||||
@ -2134,6 +2175,24 @@ static void gen_LSS(DisasContext *s, X86DecodedInsn *decode)
|
|||||||
gen_lxx_seg(s, decode, R_SS);
|
gen_lxx_seg(s, decode, R_SS);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void gen_LZCNT(DisasContext *s, X86DecodedInsn *decode)
|
||||||
|
{
|
||||||
|
MemOp ot = decode->op[0].ot;
|
||||||
|
|
||||||
|
/* C bit (cc_src) is defined related to the input. */
|
||||||
|
decode->cc_src = tcg_temp_new();
|
||||||
|
decode->cc_dst = s->T0;
|
||||||
|
decode->cc_op = CC_OP_BMILGB + ot;
|
||||||
|
tcg_gen_mov_tl(decode->cc_src, s->T0);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Reduce the target_ulong result by the number of zeros that
|
||||||
|
* we expect to find at the top.
|
||||||
|
*/
|
||||||
|
tcg_gen_clzi_tl(s->T0, s->T0, TARGET_LONG_BITS);
|
||||||
|
tcg_gen_subi_tl(s->T0, s->T0, TARGET_LONG_BITS - (8 << ot));
|
||||||
|
}
|
||||||
|
|
||||||
static void gen_MFENCE(DisasContext *s, X86DecodedInsn *decode)
|
static void gen_MFENCE(DisasContext *s, X86DecodedInsn *decode)
|
||||||
{
|
{
|
||||||
tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
|
tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
|
||||||
@ -2692,6 +2751,15 @@ static void gen_POPA(DisasContext *s, X86DecodedInsn *decode)
|
|||||||
gen_popa(s);
|
gen_popa(s);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void gen_POPCNT(DisasContext *s, X86DecodedInsn *decode)
|
||||||
|
{
|
||||||
|
decode->cc_src = tcg_temp_new();
|
||||||
|
decode->cc_op = CC_OP_POPCNT;
|
||||||
|
|
||||||
|
tcg_gen_mov_tl(decode->cc_src, s->T0);
|
||||||
|
tcg_gen_ctpop_tl(s->T0, s->T0);
|
||||||
|
}
|
||||||
|
|
||||||
static void gen_POPF(DisasContext *s, X86DecodedInsn *decode)
|
static void gen_POPF(DisasContext *s, X86DecodedInsn *decode)
|
||||||
{
|
{
|
||||||
MemOp ot;
|
MemOp ot;
|
||||||
@ -3773,6 +3841,20 @@ static void gen_SYSRET(DisasContext *s, X86DecodedInsn *decode)
|
|||||||
s->base.is_jmp = DISAS_EOB_RECHECK_TF;
|
s->base.is_jmp = DISAS_EOB_RECHECK_TF;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void gen_TZCNT(DisasContext *s, X86DecodedInsn *decode)
|
||||||
|
{
|
||||||
|
MemOp ot = decode->op[0].ot;
|
||||||
|
|
||||||
|
/* C bit (cc_src) is defined related to the input. */
|
||||||
|
decode->cc_src = tcg_temp_new();
|
||||||
|
decode->cc_dst = s->T0;
|
||||||
|
decode->cc_op = CC_OP_BMILGB + ot;
|
||||||
|
tcg_gen_mov_tl(decode->cc_src, s->T0);
|
||||||
|
|
||||||
|
/* A zero input returns the operand size. */
|
||||||
|
tcg_gen_ctzi_tl(s->T0, s->T0, 8 << ot);
|
||||||
|
}
|
||||||
|
|
||||||
static void gen_UD(DisasContext *s, X86DecodedInsn *decode)
|
static void gen_UD(DisasContext *s, X86DecodedInsn *decode)
|
||||||
{
|
{
|
||||||
gen_illegal_opcode(s);
|
gen_illegal_opcode(s);
|
||||||
|
@ -823,11 +823,6 @@ static void gen_movs(DisasContext *s, MemOp ot)
|
|||||||
gen_op_add_reg(s, s->aflag, R_EDI, dshift);
|
gen_op_add_reg(s, s->aflag, R_EDI, dshift);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void gen_op_update1_cc(DisasContext *s)
|
|
||||||
{
|
|
||||||
tcg_gen_mov_tl(cpu_cc_dst, s->T0);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void gen_op_update2_cc(DisasContext *s)
|
static void gen_op_update2_cc(DisasContext *s)
|
||||||
{
|
{
|
||||||
tcg_gen_mov_tl(cpu_cc_src, s->T1);
|
tcg_gen_mov_tl(cpu_cc_src, s->T1);
|
||||||
@ -3311,56 +3306,6 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b)
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case 0x1bc: /* bsf / tzcnt */
|
|
||||||
case 0x1bd: /* bsr / lzcnt */
|
|
||||||
ot = dflag;
|
|
||||||
modrm = x86_ldub_code(env, s);
|
|
||||||
reg = ((modrm >> 3) & 7) | REX_R(s);
|
|
||||||
gen_ld_modrm(env, s, modrm, ot);
|
|
||||||
gen_extu(ot, s->T0);
|
|
||||||
|
|
||||||
/* Note that lzcnt and tzcnt are in different extensions. */
|
|
||||||
if ((prefixes & PREFIX_REPZ)
|
|
||||||
&& (b & 1
|
|
||||||
? s->cpuid_ext3_features & CPUID_EXT3_ABM
|
|
||||||
: s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)) {
|
|
||||||
int size = 8 << ot;
|
|
||||||
/* For lzcnt/tzcnt, C bit is defined related to the input. */
|
|
||||||
tcg_gen_mov_tl(cpu_cc_src, s->T0);
|
|
||||||
if (b & 1) {
|
|
||||||
/* For lzcnt, reduce the target_ulong result by the
|
|
||||||
number of zeros that we expect to find at the top. */
|
|
||||||
tcg_gen_clzi_tl(s->T0, s->T0, TARGET_LONG_BITS);
|
|
||||||
tcg_gen_subi_tl(s->T0, s->T0, TARGET_LONG_BITS - size);
|
|
||||||
} else {
|
|
||||||
/* For tzcnt, a zero input must return the operand size. */
|
|
||||||
tcg_gen_ctzi_tl(s->T0, s->T0, size);
|
|
||||||
}
|
|
||||||
/* For lzcnt/tzcnt, Z bit is defined related to the result. */
|
|
||||||
gen_op_update1_cc(s);
|
|
||||||
set_cc_op(s, CC_OP_BMILGB + ot);
|
|
||||||
} else {
|
|
||||||
/* For bsr/bsf, only the Z bit is defined and it is related
|
|
||||||
to the input and not the result. */
|
|
||||||
tcg_gen_mov_tl(cpu_cc_dst, s->T0);
|
|
||||||
set_cc_op(s, CC_OP_LOGICB + ot);
|
|
||||||
|
|
||||||
/* ??? The manual says that the output is undefined when the
|
|
||||||
input is zero, but real hardware leaves it unchanged, and
|
|
||||||
real programs appear to depend on that. Accomplish this
|
|
||||||
by passing the output as the value to return upon zero. */
|
|
||||||
if (b & 1) {
|
|
||||||
/* For bsr, return the bit index of the first 1 bit,
|
|
||||||
not the count of leading zeros. */
|
|
||||||
tcg_gen_xori_tl(s->T1, cpu_regs[reg], TARGET_LONG_BITS - 1);
|
|
||||||
tcg_gen_clz_tl(s->T0, s->T0, s->T1);
|
|
||||||
tcg_gen_xori_tl(s->T0, s->T0, TARGET_LONG_BITS - 1);
|
|
||||||
} else {
|
|
||||||
tcg_gen_ctz_tl(s->T0, s->T0, cpu_regs[reg]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
gen_op_mov_reg_v(s, ot, reg, s->T0);
|
|
||||||
break;
|
|
||||||
case 0x100:
|
case 0x100:
|
||||||
modrm = x86_ldub_code(env, s);
|
modrm = x86_ldub_code(env, s);
|
||||||
mod = (modrm >> 6) & 3;
|
mod = (modrm >> 6) & 3;
|
||||||
@ -3955,25 +3900,6 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b)
|
|||||||
}
|
}
|
||||||
gen_nop_modrm(env, s, modrm);
|
gen_nop_modrm(env, s, modrm);
|
||||||
break;
|
break;
|
||||||
case 0x1b8: /* SSE4.2 popcnt */
|
|
||||||
if ((prefixes & (PREFIX_REPZ | PREFIX_LOCK | PREFIX_REPNZ)) !=
|
|
||||||
PREFIX_REPZ)
|
|
||||||
goto illegal_op;
|
|
||||||
if (!(s->cpuid_ext_features & CPUID_EXT_POPCNT))
|
|
||||||
goto illegal_op;
|
|
||||||
|
|
||||||
modrm = x86_ldub_code(env, s);
|
|
||||||
reg = ((modrm >> 3) & 7) | REX_R(s);
|
|
||||||
|
|
||||||
ot = dflag;
|
|
||||||
gen_ld_modrm(env, s, modrm, ot);
|
|
||||||
gen_extu(ot, s->T0);
|
|
||||||
tcg_gen_mov_tl(cpu_cc_src, s->T0);
|
|
||||||
tcg_gen_ctpop_tl(s->T0, s->T0);
|
|
||||||
gen_op_mov_reg_v(s, ot, reg, s->T0);
|
|
||||||
|
|
||||||
set_cc_op(s, CC_OP_POPCNT);
|
|
||||||
break;
|
|
||||||
default:
|
default:
|
||||||
g_assert_not_reached();
|
g_assert_not_reached();
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user