target/i386: fix memory operand size for CVTPS2PD

CVTPS2PD only loads a half-register for memory, unlike the other
operations under 0x0F 0x5A.  "Unpack" the group into separate
emission functions instead of using gen_unary_fp_sse.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit abd41884c5)
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
This commit is contained in:
Paolo Bonzini 2023-08-29 18:28:33 +02:00 committed by Michael Tokarev
parent 796468c24a
commit 755cf29ead
2 changed files with 37 additions and 7 deletions

View File

@ -805,10 +805,20 @@ static void decode_sse_unary(DisasContext *s, CPUX86State *env, X86OpEntry *entr
case 0x51: entry->gen = gen_VSQRT; break;
case 0x52: entry->gen = gen_VRSQRT; break;
case 0x53: entry->gen = gen_VRCP; break;
case 0x5A: entry->gen = gen_VCVTfp2fp; break;
}
}
static void decode_0F5A(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
{
static const X86OpEntry opcodes_0F5A[4] = {
X86_OP_ENTRY2(VCVTPS2PD, V,x, W,xh, vex2), /* VCVTPS2PD */
X86_OP_ENTRY2(VCVTPD2PS, V,x, W,x, vex2), /* VCVTPD2PS */
X86_OP_ENTRY3(VCVTSS2SD, V,x, H,x, W,x, vex2_rep3), /* VCVTSS2SD */
X86_OP_ENTRY3(VCVTSD2SS, V,x, H,x, W,x, vex2_rep3), /* VCVTSD2SS */
};
*entry = *decode_by_prefix(s, opcodes_0F5A);
}
static void decode_0F5B(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
{
static const X86OpEntry opcodes_0F5B[4] = {
@ -891,7 +901,7 @@ static const X86OpEntry opcodes_0F[256] = {
[0x58] = X86_OP_ENTRY3(VADD, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2),
[0x59] = X86_OP_ENTRY3(VMUL, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2),
[0x5a] = X86_OP_GROUP3(sse_unary, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2), /* CVTPS2PD */
[0x5a] = X86_OP_GROUP0(0F5A),
[0x5b] = X86_OP_GROUP0(0F5B),
[0x5c] = X86_OP_ENTRY3(VSUB, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2),
[0x5d] = X86_OP_ENTRY3(VMIN, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2),

View File

@ -1917,12 +1917,22 @@ static void gen_VCOMI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
set_cc_op(s, CC_OP_EFLAGS);
}
static void gen_VCVTfp2fp(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
static void gen_VCVTPD2PS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
{
gen_unary_fp_sse(s, env, decode,
gen_helper_cvtpd2ps_xmm, gen_helper_cvtps2pd_xmm,
gen_helper_cvtpd2ps_ymm, gen_helper_cvtps2pd_ymm,
gen_helper_cvtsd2ss, gen_helper_cvtss2sd);
if (s->vex_l) {
gen_helper_cvtpd2ps_ymm(cpu_env, OP_PTR0, OP_PTR2);
} else {
gen_helper_cvtpd2ps_xmm(cpu_env, OP_PTR0, OP_PTR2);
}
}
static void gen_VCVTPS2PD(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
{
if (s->vex_l) {
gen_helper_cvtps2pd_ymm(cpu_env, OP_PTR0, OP_PTR2);
} else {
gen_helper_cvtps2pd_xmm(cpu_env, OP_PTR0, OP_PTR2);
}
}
static void gen_VCVTPS2PH(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
@ -1939,6 +1949,16 @@ static void gen_VCVTPS2PH(DisasContext *s, CPUX86State *env, X86DecodedInsn *dec
}
}
static void gen_VCVTSD2SS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
{
gen_helper_cvtsd2ss(cpu_env, OP_PTR0, OP_PTR1, OP_PTR2);
}
static void gen_VCVTSS2SD(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
{
gen_helper_cvtss2sd(cpu_env, OP_PTR0, OP_PTR1, OP_PTR2);
}
static void gen_VCVTSI2Sx(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
{
int vec_len = vector_len(s, decode);