target-i386: Implement tzcnt and fix lzcnt
We weren't computing flags for lzcnt at all. At the same time, adjust the implementation of bsf/bsr to avoid the local branch, using movcond instead. Signed-off-by: Richard Henderson <rth@twiddle.net>
This commit is contained in:
parent
f1300734cb
commit
321c535105
@ -195,9 +195,8 @@ DEF_HELPER_3(frstor, void, env, tl, int)
|
|||||||
DEF_HELPER_3(fxsave, void, env, tl, int)
|
DEF_HELPER_3(fxsave, void, env, tl, int)
|
||||||
DEF_HELPER_3(fxrstor, void, env, tl, int)
|
DEF_HELPER_3(fxrstor, void, env, tl, int)
|
||||||
|
|
||||||
DEF_HELPER_FLAGS_1(bsf, TCG_CALL_NO_RWG_SE, tl, tl)
|
DEF_HELPER_FLAGS_1(clz, TCG_CALL_NO_RWG_SE, tl, tl)
|
||||||
DEF_HELPER_FLAGS_1(bsr, TCG_CALL_NO_RWG_SE, tl, tl)
|
DEF_HELPER_FLAGS_1(ctz, TCG_CALL_NO_RWG_SE, tl, tl)
|
||||||
DEF_HELPER_FLAGS_2(lzcnt, TCG_CALL_NO_RWG_SE, tl, tl, int)
|
|
||||||
DEF_HELPER_FLAGS_2(pdep, TCG_CALL_NO_RWG_SE, tl, tl, tl)
|
DEF_HELPER_FLAGS_2(pdep, TCG_CALL_NO_RWG_SE, tl, tl, tl)
|
||||||
DEF_HELPER_FLAGS_2(pext, TCG_CALL_NO_RWG_SE, tl, tl, tl)
|
DEF_HELPER_FLAGS_2(pext, TCG_CALL_NO_RWG_SE, tl, tl, tl)
|
||||||
|
|
||||||
|
@ -456,19 +456,14 @@ void helper_idivq_EAX(CPUX86State *env, target_ulong t0)
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* bit operations */
|
/* bit operations */
|
||||||
target_ulong helper_bsf(target_ulong t0)
|
target_ulong helper_ctz(target_ulong t0)
|
||||||
{
|
{
|
||||||
return ctztl(t0);
|
return ctztl(t0);
|
||||||
}
|
}
|
||||||
|
|
||||||
target_ulong helper_lzcnt(target_ulong t0, int wordsize)
|
target_ulong helper_clz(target_ulong t0)
|
||||||
{
|
{
|
||||||
return clztl(t0) - (TARGET_LONG_BITS - wordsize);
|
return clztl(t0);
|
||||||
}
|
|
||||||
|
|
||||||
target_ulong helper_bsr(target_ulong t0)
|
|
||||||
{
|
|
||||||
return clztl(t0) ^ (TARGET_LONG_BITS - 1);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
target_ulong helper_pdep(target_ulong src, target_ulong mask)
|
target_ulong helper_pdep(target_ulong src, target_ulong mask)
|
||||||
|
@ -7157,46 +7157,58 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
|
|||||||
tcg_gen_movi_tl(cpu_cc_dst, 0);
|
tcg_gen_movi_tl(cpu_cc_dst, 0);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case 0x1bc: /* bsf */
|
case 0x1bc: /* bsf / tzcnt */
|
||||||
case 0x1bd: /* bsr */
|
case 0x1bd: /* bsr / lzcnt */
|
||||||
{
|
ot = dflag + OT_WORD;
|
||||||
int label1;
|
modrm = cpu_ldub_code(env, s->pc++);
|
||||||
TCGv t0;
|
reg = ((modrm >> 3) & 7) | rex_r;
|
||||||
|
gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
|
||||||
|
gen_extu(ot, cpu_T[0]);
|
||||||
|
|
||||||
ot = dflag + OT_WORD;
|
/* Note that lzcnt and tzcnt are in different extensions. */
|
||||||
modrm = cpu_ldub_code(env, s->pc++);
|
if ((prefixes & PREFIX_REPZ)
|
||||||
reg = ((modrm >> 3) & 7) | rex_r;
|
&& (b & 1
|
||||||
gen_ldst_modrm(env, s,modrm, ot, OR_TMP0, 0);
|
? s->cpuid_ext3_features & CPUID_EXT3_ABM
|
||||||
gen_extu(ot, cpu_T[0]);
|
: s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)) {
|
||||||
t0 = tcg_temp_local_new();
|
int size = 8 << ot;
|
||||||
tcg_gen_mov_tl(t0, cpu_T[0]);
|
tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]);
|
||||||
if ((b & 1) && (prefixes & PREFIX_REPZ) &&
|
if (b & 1) {
|
||||||
(s->cpuid_ext3_features & CPUID_EXT3_ABM)) {
|
/* For lzcnt, reduce the target_ulong result by the
|
||||||
switch(ot) {
|
number of zeros that we expect to find at the top. */
|
||||||
case OT_WORD: gen_helper_lzcnt(cpu_T[0], t0,
|
gen_helper_clz(cpu_T[0], cpu_T[0]);
|
||||||
tcg_const_i32(16)); break;
|
tcg_gen_subi_tl(cpu_T[0], cpu_T[0], TARGET_LONG_BITS - size);
|
||||||
case OT_LONG: gen_helper_lzcnt(cpu_T[0], t0,
|
|
||||||
tcg_const_i32(32)); break;
|
|
||||||
case OT_QUAD: gen_helper_lzcnt(cpu_T[0], t0,
|
|
||||||
tcg_const_i32(64)); break;
|
|
||||||
}
|
|
||||||
gen_op_mov_reg_T0(ot, reg);
|
|
||||||
} else {
|
} else {
|
||||||
label1 = gen_new_label();
|
/* For tzcnt, a zero input must return the operand size:
|
||||||
tcg_gen_movi_tl(cpu_cc_dst, 0);
|
force all bits outside the operand size to 1. */
|
||||||
tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0, label1);
|
target_ulong mask = (target_ulong)-2 << (size - 1);
|
||||||
if (b & 1) {
|
tcg_gen_ori_tl(cpu_T[0], cpu_T[0], mask);
|
||||||
gen_helper_bsr(cpu_T[0], t0);
|
gen_helper_ctz(cpu_T[0], cpu_T[0]);
|
||||||
} else {
|
|
||||||
gen_helper_bsf(cpu_T[0], t0);
|
|
||||||
}
|
|
||||||
gen_op_mov_reg_T0(ot, reg);
|
|
||||||
tcg_gen_movi_tl(cpu_cc_dst, 1);
|
|
||||||
gen_set_label(label1);
|
|
||||||
set_cc_op(s, CC_OP_LOGICB + ot);
|
|
||||||
}
|
}
|
||||||
tcg_temp_free(t0);
|
/* For lzcnt/tzcnt, C and Z bits are defined and are
|
||||||
|
related to the result. */
|
||||||
|
gen_op_update1_cc();
|
||||||
|
set_cc_op(s, CC_OP_BMILGB + ot);
|
||||||
|
} else {
|
||||||
|
/* For bsr/bsf, only the Z bit is defined and it is related
|
||||||
|
to the input and not the result. */
|
||||||
|
tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
|
||||||
|
set_cc_op(s, CC_OP_LOGICB + ot);
|
||||||
|
if (b & 1) {
|
||||||
|
/* For bsr, return the bit index of the first 1 bit,
|
||||||
|
not the count of leading zeros. */
|
||||||
|
gen_helper_clz(cpu_T[0], cpu_T[0]);
|
||||||
|
tcg_gen_xori_tl(cpu_T[0], cpu_T[0], TARGET_LONG_BITS - 1);
|
||||||
|
} else {
|
||||||
|
gen_helper_ctz(cpu_T[0], cpu_T[0]);
|
||||||
|
}
|
||||||
|
/* ??? The manual says that the output is undefined when the
|
||||||
|
input is zero, but real hardware leaves it unchanged, and
|
||||||
|
real programs appear to depend on that. */
|
||||||
|
tcg_gen_movi_tl(cpu_tmp0, 0);
|
||||||
|
tcg_gen_movcond_tl(TCG_COND_EQ, cpu_T[0], cpu_cc_dst, cpu_tmp0,
|
||||||
|
cpu_regs[reg], cpu_T[0]);
|
||||||
}
|
}
|
||||||
|
gen_op_mov_reg_T0(ot, reg);
|
||||||
break;
|
break;
|
||||||
/************************/
|
/************************/
|
||||||
/* bcd */
|
/* bcd */
|
||||||
|
Loading…
Reference in New Issue
Block a user