target/i386: fix phadd* with identical destination and source register

Detected by asm test suite failures in dav1d
(https://code.videolan.org/videolan/dav1d). Can be reproduced by
`qemu-x86_64 -cpu core2duo ./tests/checkasm --test=mc_8bpc 1659890620`.

Signed-off-by: Janne Grunau <j@jannau.net>
Message-Id: <20200401225253.30745-1-j@jannau.net>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
This commit is contained in:
Janne Grunau 2020-04-02 00:52:53 +02:00 committed by Paolo Bonzini
parent cac9edfc4d
commit 2dfbea1a87

View File

@ -1435,34 +1435,47 @@ void glue(helper_pshufb, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
void glue(helper_phaddw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) void glue(helper_phaddw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
{ {
d->W(0) = (int16_t)d->W(0) + (int16_t)d->W(1);
d->W(1) = (int16_t)d->W(2) + (int16_t)d->W(3); Reg r;
XMM_ONLY(d->W(2) = (int16_t)d->W(4) + (int16_t)d->W(5));
XMM_ONLY(d->W(3) = (int16_t)d->W(6) + (int16_t)d->W(7)); r.W(0) = (int16_t)d->W(0) + (int16_t)d->W(1);
d->W((2 << SHIFT) + 0) = (int16_t)s->W(0) + (int16_t)s->W(1); r.W(1) = (int16_t)d->W(2) + (int16_t)d->W(3);
d->W((2 << SHIFT) + 1) = (int16_t)s->W(2) + (int16_t)s->W(3); XMM_ONLY(r.W(2) = (int16_t)d->W(4) + (int16_t)d->W(5));
XMM_ONLY(d->W(6) = (int16_t)s->W(4) + (int16_t)s->W(5)); XMM_ONLY(r.W(3) = (int16_t)d->W(6) + (int16_t)d->W(7));
XMM_ONLY(d->W(7) = (int16_t)s->W(6) + (int16_t)s->W(7)); r.W((2 << SHIFT) + 0) = (int16_t)s->W(0) + (int16_t)s->W(1);
r.W((2 << SHIFT) + 1) = (int16_t)s->W(2) + (int16_t)s->W(3);
XMM_ONLY(r.W(6) = (int16_t)s->W(4) + (int16_t)s->W(5));
XMM_ONLY(r.W(7) = (int16_t)s->W(6) + (int16_t)s->W(7));
*d = r;
} }
void glue(helper_phaddd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) void glue(helper_phaddd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
{ {
d->L(0) = (int32_t)d->L(0) + (int32_t)d->L(1); Reg r;
XMM_ONLY(d->L(1) = (int32_t)d->L(2) + (int32_t)d->L(3));
d->L((1 << SHIFT) + 0) = (int32_t)s->L(0) + (int32_t)s->L(1); r.L(0) = (int32_t)d->L(0) + (int32_t)d->L(1);
XMM_ONLY(d->L(3) = (int32_t)s->L(2) + (int32_t)s->L(3)); XMM_ONLY(r.L(1) = (int32_t)d->L(2) + (int32_t)d->L(3));
r.L((1 << SHIFT) + 0) = (int32_t)s->L(0) + (int32_t)s->L(1);
XMM_ONLY(r.L(3) = (int32_t)s->L(2) + (int32_t)s->L(3));
*d = r;
} }
void glue(helper_phaddsw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) void glue(helper_phaddsw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
{ {
d->W(0) = satsw((int16_t)d->W(0) + (int16_t)d->W(1)); Reg r;
d->W(1) = satsw((int16_t)d->W(2) + (int16_t)d->W(3));
XMM_ONLY(d->W(2) = satsw((int16_t)d->W(4) + (int16_t)d->W(5))); r.W(0) = satsw((int16_t)d->W(0) + (int16_t)d->W(1));
XMM_ONLY(d->W(3) = satsw((int16_t)d->W(6) + (int16_t)d->W(7))); r.W(1) = satsw((int16_t)d->W(2) + (int16_t)d->W(3));
d->W((2 << SHIFT) + 0) = satsw((int16_t)s->W(0) + (int16_t)s->W(1)); XMM_ONLY(r.W(2) = satsw((int16_t)d->W(4) + (int16_t)d->W(5)));
d->W((2 << SHIFT) + 1) = satsw((int16_t)s->W(2) + (int16_t)s->W(3)); XMM_ONLY(r.W(3) = satsw((int16_t)d->W(6) + (int16_t)d->W(7)));
XMM_ONLY(d->W(6) = satsw((int16_t)s->W(4) + (int16_t)s->W(5))); r.W((2 << SHIFT) + 0) = satsw((int16_t)s->W(0) + (int16_t)s->W(1));
XMM_ONLY(d->W(7) = satsw((int16_t)s->W(6) + (int16_t)s->W(7))); r.W((2 << SHIFT) + 1) = satsw((int16_t)s->W(2) + (int16_t)s->W(3));
XMM_ONLY(r.W(6) = satsw((int16_t)s->W(4) + (int16_t)s->W(5)));
XMM_ONLY(r.W(7) = satsw((int16_t)s->W(6) + (int16_t)s->W(7)));
*d = r;
} }
void glue(helper_pmaddubsw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) void glue(helper_pmaddubsw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)