target/arm: Replace tcg_gen_dupi_vec with constants in translate-sve.c

Instead of copying a constant into a temporary with dupi,
use a vector constant directly.

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20240912024114.1097832-3-richard.henderson@linaro.org
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Richard Henderson 2024-09-11 19:40:47 -07:00 committed by Peter Maydell
parent 143e179c84
commit 04e824eac9
1 changed files with 49 additions and 79 deletions

View File

@ -6081,9 +6081,9 @@ static void gen_sshll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm)
if (top) {
if (shl == halfbits) {
TCGv_vec t = tcg_temp_new_vec_matching(d);
tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits));
tcg_gen_and_vec(vece, d, n, t);
tcg_gen_and_vec(vece, d, n,
tcg_constant_vec_matching(d, vece,
MAKE_64BIT_MASK(halfbits, halfbits)));
} else {
tcg_gen_sari_vec(vece, d, n, halfbits);
tcg_gen_shli_vec(vece, d, d, shl);
@ -6138,18 +6138,18 @@ static void gen_ushll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm)
if (top) {
if (shl == halfbits) {
TCGv_vec t = tcg_temp_new_vec_matching(d);
tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits));
tcg_gen_and_vec(vece, d, n, t);
tcg_gen_and_vec(vece, d, n,
tcg_constant_vec_matching(d, vece,
MAKE_64BIT_MASK(halfbits, halfbits)));
} else {
tcg_gen_shri_vec(vece, d, n, halfbits);
tcg_gen_shli_vec(vece, d, d, shl);
}
} else {
if (shl == 0) {
TCGv_vec t = tcg_temp_new_vec_matching(d);
tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
tcg_gen_and_vec(vece, d, n, t);
tcg_gen_and_vec(vece, d, n,
tcg_constant_vec_matching(d, vece,
MAKE_64BIT_MASK(0, halfbits)));
} else {
tcg_gen_shli_vec(vece, d, n, halfbits);
tcg_gen_shri_vec(vece, d, d, halfbits - shl);
@ -6317,18 +6317,14 @@ static const TCGOpcode sqxtn_list[] = {
static void gen_sqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
{
TCGv_vec t = tcg_temp_new_vec_matching(d);
int halfbits = 4 << vece;
int64_t mask = (1ull << halfbits) - 1;
int64_t min = -1ull << (halfbits - 1);
int64_t max = -min - 1;
tcg_gen_dupi_vec(vece, t, min);
tcg_gen_smax_vec(vece, d, n, t);
tcg_gen_dupi_vec(vece, t, max);
tcg_gen_smin_vec(vece, d, d, t);
tcg_gen_dupi_vec(vece, t, mask);
tcg_gen_and_vec(vece, d, d, t);
tcg_gen_smax_vec(vece, d, n, tcg_constant_vec_matching(d, vece, min));
tcg_gen_smin_vec(vece, d, d, tcg_constant_vec_matching(d, vece, max));
tcg_gen_and_vec(vece, d, d, tcg_constant_vec_matching(d, vece, mask));
}
static const GVecGen2 sqxtnb_ops[3] = {
@ -6349,19 +6345,15 @@ TRANS_FEAT(SQXTNB, aa64_sve2, do_narrow_extract, a, sqxtnb_ops)
static void gen_sqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
{
TCGv_vec t = tcg_temp_new_vec_matching(d);
int halfbits = 4 << vece;
int64_t mask = (1ull << halfbits) - 1;
int64_t min = -1ull << (halfbits - 1);
int64_t max = -min - 1;
tcg_gen_dupi_vec(vece, t, min);
tcg_gen_smax_vec(vece, n, n, t);
tcg_gen_dupi_vec(vece, t, max);
tcg_gen_smin_vec(vece, n, n, t);
tcg_gen_smax_vec(vece, n, n, tcg_constant_vec_matching(d, vece, min));
tcg_gen_smin_vec(vece, n, n, tcg_constant_vec_matching(d, vece, max));
tcg_gen_shli_vec(vece, n, n, halfbits);
tcg_gen_dupi_vec(vece, t, mask);
tcg_gen_bitsel_vec(vece, d, t, d, n);
tcg_gen_bitsel_vec(vece, d, tcg_constant_vec_matching(d, vece, mask), d, n);
}
static const GVecGen2 sqxtnt_ops[3] = {
@ -6389,12 +6381,10 @@ static const TCGOpcode uqxtn_list[] = {
static void gen_uqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
{
TCGv_vec t = tcg_temp_new_vec_matching(d);
int halfbits = 4 << vece;
int64_t max = (1ull << halfbits) - 1;
tcg_gen_dupi_vec(vece, t, max);
tcg_gen_umin_vec(vece, d, n, t);
tcg_gen_umin_vec(vece, d, n, tcg_constant_vec_matching(d, vece, max));
}
static const GVecGen2 uqxtnb_ops[3] = {
@ -6415,14 +6405,13 @@ TRANS_FEAT(UQXTNB, aa64_sve2, do_narrow_extract, a, uqxtnb_ops)
static void gen_uqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
{
TCGv_vec t = tcg_temp_new_vec_matching(d);
int halfbits = 4 << vece;
int64_t max = (1ull << halfbits) - 1;
TCGv_vec maxv = tcg_constant_vec_matching(d, vece, max);
tcg_gen_dupi_vec(vece, t, max);
tcg_gen_umin_vec(vece, n, n, t);
tcg_gen_umin_vec(vece, n, n, maxv);
tcg_gen_shli_vec(vece, n, n, halfbits);
tcg_gen_bitsel_vec(vece, d, t, d, n);
tcg_gen_bitsel_vec(vece, d, maxv, d, n);
}
static const GVecGen2 uqxtnt_ops[3] = {
@ -6450,14 +6439,11 @@ static const TCGOpcode sqxtun_list[] = {
static void gen_sqxtunb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
{
TCGv_vec t = tcg_temp_new_vec_matching(d);
int halfbits = 4 << vece;
int64_t max = (1ull << halfbits) - 1;
tcg_gen_dupi_vec(vece, t, 0);
tcg_gen_smax_vec(vece, d, n, t);
tcg_gen_dupi_vec(vece, t, max);
tcg_gen_umin_vec(vece, d, d, t);
tcg_gen_smax_vec(vece, d, n, tcg_constant_vec_matching(d, vece, 0));
tcg_gen_umin_vec(vece, d, d, tcg_constant_vec_matching(d, vece, max));
}
static const GVecGen2 sqxtunb_ops[3] = {
@ -6478,16 +6464,14 @@ TRANS_FEAT(SQXTUNB, aa64_sve2, do_narrow_extract, a, sqxtunb_ops)
static void gen_sqxtunt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
{
TCGv_vec t = tcg_temp_new_vec_matching(d);
int halfbits = 4 << vece;
int64_t max = (1ull << halfbits) - 1;
TCGv_vec maxv = tcg_constant_vec_matching(d, vece, max);
tcg_gen_dupi_vec(vece, t, 0);
tcg_gen_smax_vec(vece, n, n, t);
tcg_gen_dupi_vec(vece, t, max);
tcg_gen_umin_vec(vece, n, n, t);
tcg_gen_smax_vec(vece, n, n, tcg_constant_vec_matching(d, vece, 0));
tcg_gen_umin_vec(vece, n, n, maxv);
tcg_gen_shli_vec(vece, n, n, halfbits);
tcg_gen_bitsel_vec(vece, d, t, d, n);
tcg_gen_bitsel_vec(vece, d, maxv, d, n);
}
static const GVecGen2 sqxtunt_ops[3] = {
@ -6551,13 +6535,11 @@ static void gen_shrnb64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
static void gen_shrnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr)
{
TCGv_vec t = tcg_temp_new_vec_matching(d);
int halfbits = 4 << vece;
uint64_t mask = MAKE_64BIT_MASK(0, halfbits);
tcg_gen_shri_vec(vece, n, n, shr);
tcg_gen_dupi_vec(vece, t, mask);
tcg_gen_and_vec(vece, d, n, t);
tcg_gen_and_vec(vece, d, n, tcg_constant_vec_matching(d, vece, mask));
}
static const TCGOpcode shrnb_vec_list[] = { INDEX_op_shri_vec, 0 };
@ -6609,13 +6591,11 @@ static void gen_shrnt64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
static void gen_shrnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr)
{
TCGv_vec t = tcg_temp_new_vec_matching(d);
int halfbits = 4 << vece;
uint64_t mask = MAKE_64BIT_MASK(0, halfbits);
tcg_gen_shli_vec(vece, n, n, halfbits - shr);
tcg_gen_dupi_vec(vece, t, mask);
tcg_gen_bitsel_vec(vece, d, t, d, n);
tcg_gen_bitsel_vec(vece, d, tcg_constant_vec_matching(d, vece, mask), d, n);
}
static const TCGOpcode shrnt_vec_list[] = { INDEX_op_shli_vec, 0 };
@ -6658,14 +6638,12 @@ TRANS_FEAT(RSHRNT, aa64_sve2, do_shr_narrow, a, rshrnt_ops)
static void gen_sqshrunb_vec(unsigned vece, TCGv_vec d,
TCGv_vec n, int64_t shr)
{
TCGv_vec t = tcg_temp_new_vec_matching(d);
int halfbits = 4 << vece;
uint64_t max = MAKE_64BIT_MASK(0, halfbits);
tcg_gen_sari_vec(vece, n, n, shr);
tcg_gen_dupi_vec(vece, t, 0);
tcg_gen_smax_vec(vece, n, n, t);
tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
tcg_gen_umin_vec(vece, d, n, t);
tcg_gen_smax_vec(vece, n, n, tcg_constant_vec_matching(d, vece, 0));
tcg_gen_umin_vec(vece, d, n, tcg_constant_vec_matching(d, vece, max));
}
static const TCGOpcode sqshrunb_vec_list[] = {
@ -6690,16 +6668,15 @@ TRANS_FEAT(SQSHRUNB, aa64_sve2, do_shr_narrow, a, sqshrunb_ops)
static void gen_sqshrunt_vec(unsigned vece, TCGv_vec d,
TCGv_vec n, int64_t shr)
{
TCGv_vec t = tcg_temp_new_vec_matching(d);
int halfbits = 4 << vece;
uint64_t max = MAKE_64BIT_MASK(0, halfbits);
TCGv_vec maxv = tcg_constant_vec_matching(d, vece, max);
tcg_gen_sari_vec(vece, n, n, shr);
tcg_gen_dupi_vec(vece, t, 0);
tcg_gen_smax_vec(vece, n, n, t);
tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
tcg_gen_umin_vec(vece, n, n, t);
tcg_gen_smax_vec(vece, n, n, tcg_constant_vec_matching(d, vece, 0));
tcg_gen_umin_vec(vece, n, n, maxv);
tcg_gen_shli_vec(vece, n, n, halfbits);
tcg_gen_bitsel_vec(vece, d, t, d, n);
tcg_gen_bitsel_vec(vece, d, maxv, d, n);
}
static const TCGOpcode sqshrunt_vec_list[] = {
@ -6742,18 +6719,15 @@ TRANS_FEAT(SQRSHRUNT, aa64_sve2, do_shr_narrow, a, sqrshrunt_ops)
static void gen_sqshrnb_vec(unsigned vece, TCGv_vec d,
TCGv_vec n, int64_t shr)
{
TCGv_vec t = tcg_temp_new_vec_matching(d);
int halfbits = 4 << vece;
int64_t max = MAKE_64BIT_MASK(0, halfbits - 1);
int64_t min = -max - 1;
int64_t mask = MAKE_64BIT_MASK(0, halfbits);
tcg_gen_sari_vec(vece, n, n, shr);
tcg_gen_dupi_vec(vece, t, min);
tcg_gen_smax_vec(vece, n, n, t);
tcg_gen_dupi_vec(vece, t, max);
tcg_gen_smin_vec(vece, n, n, t);
tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
tcg_gen_and_vec(vece, d, n, t);
tcg_gen_smax_vec(vece, n, n, tcg_constant_vec_matching(d, vece, min));
tcg_gen_smin_vec(vece, n, n, tcg_constant_vec_matching(d, vece, max));
tcg_gen_and_vec(vece, d, n, tcg_constant_vec_matching(d, vece, mask));
}
static const TCGOpcode sqshrnb_vec_list[] = {
@ -6778,19 +6752,16 @@ TRANS_FEAT(SQSHRNB, aa64_sve2, do_shr_narrow, a, sqshrnb_ops)
static void gen_sqshrnt_vec(unsigned vece, TCGv_vec d,
TCGv_vec n, int64_t shr)
{
TCGv_vec t = tcg_temp_new_vec_matching(d);
int halfbits = 4 << vece;
int64_t max = MAKE_64BIT_MASK(0, halfbits - 1);
int64_t min = -max - 1;
int64_t mask = MAKE_64BIT_MASK(0, halfbits);
tcg_gen_sari_vec(vece, n, n, shr);
tcg_gen_dupi_vec(vece, t, min);
tcg_gen_smax_vec(vece, n, n, t);
tcg_gen_dupi_vec(vece, t, max);
tcg_gen_smin_vec(vece, n, n, t);
tcg_gen_smax_vec(vece, n, n, tcg_constant_vec_matching(d, vece, min));
tcg_gen_smin_vec(vece, n, n, tcg_constant_vec_matching(d, vece, max));
tcg_gen_shli_vec(vece, n, n, halfbits);
tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
tcg_gen_bitsel_vec(vece, d, t, d, n);
tcg_gen_bitsel_vec(vece, d, tcg_constant_vec_matching(d, vece, mask), d, n);
}
static const TCGOpcode sqshrnt_vec_list[] = {
@ -6833,12 +6804,11 @@ TRANS_FEAT(SQRSHRNT, aa64_sve2, do_shr_narrow, a, sqrshrnt_ops)
static void gen_uqshrnb_vec(unsigned vece, TCGv_vec d,
TCGv_vec n, int64_t shr)
{
TCGv_vec t = tcg_temp_new_vec_matching(d);
int halfbits = 4 << vece;
int64_t max = MAKE_64BIT_MASK(0, halfbits);
tcg_gen_shri_vec(vece, n, n, shr);
tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
tcg_gen_umin_vec(vece, d, n, t);
tcg_gen_umin_vec(vece, d, n, tcg_constant_vec_matching(d, vece, max));
}
static const TCGOpcode uqshrnb_vec_list[] = {
@ -6863,14 +6833,14 @@ TRANS_FEAT(UQSHRNB, aa64_sve2, do_shr_narrow, a, uqshrnb_ops)
static void gen_uqshrnt_vec(unsigned vece, TCGv_vec d,
TCGv_vec n, int64_t shr)
{
TCGv_vec t = tcg_temp_new_vec_matching(d);
int halfbits = 4 << vece;
int64_t max = MAKE_64BIT_MASK(0, halfbits);
TCGv_vec maxv = tcg_constant_vec_matching(d, vece, max);
tcg_gen_shri_vec(vece, n, n, shr);
tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
tcg_gen_umin_vec(vece, n, n, t);
tcg_gen_umin_vec(vece, n, n, maxv);
tcg_gen_shli_vec(vece, n, n, halfbits);
tcg_gen_bitsel_vec(vece, d, t, d, n);
tcg_gen_bitsel_vec(vece, d, maxv, d, n);
}
static const TCGOpcode uqshrnt_vec_list[] = {