target/i386: implement CMPccXADD
The main difficulty here is that a page fault when writing to the destination must not overwrite the flags. Therefore, the flags computation must be inlined instead of using gen_jcc1*. For simplicity, I am using an unconditional cmpxchg operation, that becomes a NOP if the comparison fails. Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
This commit is contained in:
parent
e7bbb7cb71
commit
405c7c0708
@ -738,7 +738,7 @@ void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1,
|
|||||||
#define TCG_7_0_EDX_FEATURES (CPUID_7_0_EDX_FSRM | CPUID_7_0_EDX_KERNEL_FEATURES)
|
#define TCG_7_0_EDX_FEATURES (CPUID_7_0_EDX_FSRM | CPUID_7_0_EDX_KERNEL_FEATURES)
|
||||||
|
|
||||||
#define TCG_7_1_EAX_FEATURES (CPUID_7_1_EAX_FZRM | CPUID_7_1_EAX_FSRS | \
|
#define TCG_7_1_EAX_FEATURES (CPUID_7_1_EAX_FZRM | CPUID_7_1_EAX_FSRS | \
|
||||||
CPUID_7_1_EAX_FSRC)
|
CPUID_7_1_EAX_FSRC | CPUID_7_1_EAX_CMPCCXADD)
|
||||||
#define TCG_7_1_EDX_FEATURES 0
|
#define TCG_7_1_EDX_FEATURES 0
|
||||||
#define TCG_7_2_EDX_FEATURES 0
|
#define TCG_7_2_EDX_FEATURES 0
|
||||||
#define TCG_APM_FEATURES 0
|
#define TCG_APM_FEATURES 0
|
||||||
|
@ -538,6 +538,28 @@ static const X86OpEntry opcodes_0F38_00toEF[240] = {
|
|||||||
[0xdd] = X86_OP_ENTRY3(VAESENCLAST, V,x, H,x, W,x, vex4 cpuid(AES) p_66),
|
[0xdd] = X86_OP_ENTRY3(VAESENCLAST, V,x, H,x, W,x, vex4 cpuid(AES) p_66),
|
||||||
[0xde] = X86_OP_ENTRY3(VAESDEC, V,x, H,x, W,x, vex4 cpuid(AES) p_66),
|
[0xde] = X86_OP_ENTRY3(VAESDEC, V,x, H,x, W,x, vex4 cpuid(AES) p_66),
|
||||||
[0xdf] = X86_OP_ENTRY3(VAESDECLAST, V,x, H,x, W,x, vex4 cpuid(AES) p_66),
|
[0xdf] = X86_OP_ENTRY3(VAESDECLAST, V,x, H,x, W,x, vex4 cpuid(AES) p_66),
|
||||||
|
|
||||||
|
/*
|
||||||
|
* REG selects srcdest2 operand, VEX.vvvv selects src3. VEX class not found
|
||||||
|
* in manual, assumed to be 13 from the VEX.L0 constraint.
|
||||||
|
*/
|
||||||
|
[0xe0] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
|
||||||
|
[0xe1] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
|
||||||
|
[0xe2] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
|
||||||
|
[0xe3] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
|
||||||
|
[0xe4] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
|
||||||
|
[0xe5] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
|
||||||
|
[0xe6] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
|
||||||
|
[0xe7] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
|
||||||
|
|
||||||
|
[0xe8] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
|
||||||
|
[0xe9] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
|
||||||
|
[0xea] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
|
||||||
|
[0xeb] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
|
||||||
|
[0xec] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
|
||||||
|
[0xed] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
|
||||||
|
[0xee] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
|
||||||
|
[0xef] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66),
|
||||||
};
|
};
|
||||||
|
|
||||||
/* five rows for no prefix, 66, F3, F2, 66+F2 */
|
/* five rows for no prefix, 66, F3, F2, 66+F2 */
|
||||||
@ -1503,6 +1525,9 @@ static bool has_cpuid_feature(DisasContext *s, X86CPUIDFeature cpuid)
|
|||||||
return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_AVX2);
|
return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_AVX2);
|
||||||
case X86_FEAT_SHA_NI:
|
case X86_FEAT_SHA_NI:
|
||||||
return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SHA_NI);
|
return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SHA_NI);
|
||||||
|
|
||||||
|
case X86_FEAT_CMPCCXADD:
|
||||||
|
return (s->cpuid_7_1_eax_features & CPUID_7_1_EAX_CMPCCXADD);
|
||||||
}
|
}
|
||||||
g_assert_not_reached();
|
g_assert_not_reached();
|
||||||
}
|
}
|
||||||
|
@ -104,6 +104,7 @@ typedef enum X86CPUIDFeature {
|
|||||||
X86_FEAT_AVX2,
|
X86_FEAT_AVX2,
|
||||||
X86_FEAT_BMI1,
|
X86_FEAT_BMI1,
|
||||||
X86_FEAT_BMI2,
|
X86_FEAT_BMI2,
|
||||||
|
X86_FEAT_CMPCCXADD,
|
||||||
X86_FEAT_F16C,
|
X86_FEAT_F16C,
|
||||||
X86_FEAT_FMA,
|
X86_FEAT_FMA,
|
||||||
X86_FEAT_MOVBE,
|
X86_FEAT_MOVBE,
|
||||||
|
@ -1190,6 +1190,110 @@ static void gen_BZHI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
|
|||||||
prepare_update2_cc(decode, s, CC_OP_BMILGB + ot);
|
prepare_update2_cc(decode, s, CC_OP_BMILGB + ot);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void gen_CMPccXADD(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
|
||||||
|
{
|
||||||
|
TCGLabel *label_top = gen_new_label();
|
||||||
|
TCGLabel *label_bottom = gen_new_label();
|
||||||
|
TCGv oldv = tcg_temp_new();
|
||||||
|
TCGv newv = tcg_temp_new();
|
||||||
|
TCGv cmpv = tcg_temp_new();
|
||||||
|
TCGCond cond;
|
||||||
|
|
||||||
|
TCGv cmp_lhs, cmp_rhs;
|
||||||
|
MemOp ot, ot_full;
|
||||||
|
|
||||||
|
int jcc_op = (decode->b >> 1) & 7;
|
||||||
|
static const TCGCond cond_table[8] = {
|
||||||
|
[JCC_O] = TCG_COND_LT, /* test sign bit by comparing against 0 */
|
||||||
|
[JCC_B] = TCG_COND_LTU,
|
||||||
|
[JCC_Z] = TCG_COND_EQ,
|
||||||
|
[JCC_BE] = TCG_COND_LEU,
|
||||||
|
[JCC_S] = TCG_COND_LT, /* test sign bit by comparing against 0 */
|
||||||
|
[JCC_P] = TCG_COND_EQ, /* even parity - tests low bit of popcount */
|
||||||
|
[JCC_L] = TCG_COND_LT,
|
||||||
|
[JCC_LE] = TCG_COND_LE,
|
||||||
|
};
|
||||||
|
|
||||||
|
cond = cond_table[jcc_op];
|
||||||
|
if (decode->b & 1) {
|
||||||
|
cond = tcg_invert_cond(cond);
|
||||||
|
}
|
||||||
|
|
||||||
|
ot = decode->op[0].ot;
|
||||||
|
ot_full = ot | MO_LE;
|
||||||
|
if (jcc_op >= JCC_S) {
|
||||||
|
/*
|
||||||
|
* Sign-extend values before subtracting for S, P (zero/sign extension
|
||||||
|
* does not matter there) L, LE and their inverses.
|
||||||
|
*/
|
||||||
|
ot_full |= MO_SIGN;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* cmpv will be moved to cc_src *after* cpu_regs[] is written back, so use
|
||||||
|
* tcg_gen_ext_tl instead of gen_ext_tl.
|
||||||
|
*/
|
||||||
|
tcg_gen_ext_tl(cmpv, cpu_regs[decode->op[1].n], ot_full);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Cmpxchg loop starts here.
|
||||||
|
* - s->T1: addition operand (from decoder)
|
||||||
|
* - s->A0: dest address (from decoder)
|
||||||
|
* - s->cc_srcT: memory operand (lhs for comparison)
|
||||||
|
* - cmpv: rhs for comparison
|
||||||
|
*/
|
||||||
|
gen_set_label(label_top);
|
||||||
|
gen_op_ld_v(s, ot_full, s->cc_srcT, s->A0);
|
||||||
|
tcg_gen_sub_tl(s->T0, s->cc_srcT, cmpv);
|
||||||
|
|
||||||
|
/* Compute the comparison result by hand, to avoid clobbering cc_*. */
|
||||||
|
switch (jcc_op) {
|
||||||
|
case JCC_O:
|
||||||
|
/* (src1 ^ src2) & (src1 ^ dst). newv is only used here for a moment */
|
||||||
|
tcg_gen_xor_tl(newv, s->cc_srcT, s->T0);
|
||||||
|
tcg_gen_xor_tl(s->tmp0, s->cc_srcT, cmpv);
|
||||||
|
tcg_gen_and_tl(s->tmp0, s->tmp0, newv);
|
||||||
|
tcg_gen_sextract_tl(s->tmp0, s->tmp0, 0, 8 << ot);
|
||||||
|
cmp_lhs = s->tmp0, cmp_rhs = tcg_constant_tl(0);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case JCC_P:
|
||||||
|
tcg_gen_ext8u_tl(s->tmp0, s->T0);
|
||||||
|
tcg_gen_ctpop_tl(s->tmp0, s->tmp0);
|
||||||
|
tcg_gen_andi_tl(s->tmp0, s->tmp0, 1);
|
||||||
|
cmp_lhs = s->tmp0, cmp_rhs = tcg_constant_tl(0);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case JCC_S:
|
||||||
|
tcg_gen_sextract_tl(s->tmp0, s->T0, 0, 8 << ot);
|
||||||
|
cmp_lhs = s->tmp0, cmp_rhs = tcg_constant_tl(0);
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
cmp_lhs = s->cc_srcT, cmp_rhs = cmpv;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Compute new value: if condition does not hold, just store back s->cc_srcT */
|
||||||
|
tcg_gen_add_tl(newv, s->cc_srcT, s->T1);
|
||||||
|
tcg_gen_movcond_tl(cond, newv, cmp_lhs, cmp_rhs, newv, s->cc_srcT);
|
||||||
|
tcg_gen_atomic_cmpxchg_tl(oldv, s->A0, s->cc_srcT, newv, s->mem_index, ot_full);
|
||||||
|
|
||||||
|
/* Exit unconditionally if cmpxchg succeeded. */
|
||||||
|
tcg_gen_brcond_tl(TCG_COND_EQ, oldv, s->cc_srcT, label_bottom);
|
||||||
|
|
||||||
|
/* Try again if there was actually a store to make. */
|
||||||
|
tcg_gen_brcond_tl(cond, cmp_lhs, cmp_rhs, label_top);
|
||||||
|
gen_set_label(label_bottom);
|
||||||
|
|
||||||
|
/* Store old value to registers only after a successful store. */
|
||||||
|
gen_writeback(s, decode, 1, s->cc_srcT);
|
||||||
|
|
||||||
|
decode->cc_dst = s->T0;
|
||||||
|
decode->cc_src = cmpv;
|
||||||
|
decode->cc_op = CC_OP_SUBB + ot;
|
||||||
|
}
|
||||||
|
|
||||||
static void gen_CRC32(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
|
static void gen_CRC32(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
|
||||||
{
|
{
|
||||||
MemOp ot = decode->op[2].ot;
|
MemOp ot = decode->op[2].ot;
|
||||||
|
@ -122,6 +122,7 @@ typedef struct DisasContext {
|
|||||||
int cpuid_ext3_features;
|
int cpuid_ext3_features;
|
||||||
int cpuid_7_0_ebx_features;
|
int cpuid_7_0_ebx_features;
|
||||||
int cpuid_7_0_ecx_features;
|
int cpuid_7_0_ecx_features;
|
||||||
|
int cpuid_7_1_eax_features;
|
||||||
int cpuid_xsave_features;
|
int cpuid_xsave_features;
|
||||||
|
|
||||||
/* TCG local temps */
|
/* TCG local temps */
|
||||||
@ -6963,6 +6964,7 @@ static void i386_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cpu)
|
|||||||
dc->cpuid_ext3_features = env->features[FEAT_8000_0001_ECX];
|
dc->cpuid_ext3_features = env->features[FEAT_8000_0001_ECX];
|
||||||
dc->cpuid_7_0_ebx_features = env->features[FEAT_7_0_EBX];
|
dc->cpuid_7_0_ebx_features = env->features[FEAT_7_0_EBX];
|
||||||
dc->cpuid_7_0_ecx_features = env->features[FEAT_7_0_ECX];
|
dc->cpuid_7_0_ecx_features = env->features[FEAT_7_0_ECX];
|
||||||
|
dc->cpuid_7_1_eax_features = env->features[FEAT_7_1_EAX];
|
||||||
dc->cpuid_xsave_features = env->features[FEAT_XSAVE];
|
dc->cpuid_xsave_features = env->features[FEAT_XSAVE];
|
||||||
dc->jmp_opt = !((cflags & CF_NO_GOTO_TB) ||
|
dc->jmp_opt = !((cflags & CF_NO_GOTO_TB) ||
|
||||||
(flags & (HF_TF_MASK | HF_INHIBIT_IRQ_MASK)));
|
(flags & (HF_TF_MASK | HF_INHIBIT_IRQ_MASK)));
|
||||||
|
Loading…
Reference in New Issue
Block a user