target/arm: Convert SUQADD and USQADD to gvec
Signed-off-by: Richard Henderson <richard.henderson@linaro.org> Message-id: 20240528203044.612851-5-richard.henderson@linaro.org Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
parent
01d5665bc3
commit
8f6343ae18
@ -836,6 +836,22 @@ DEF_HELPER_FLAGS_5(gvec_sqsub_s, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_5(gvec_sqsub_d, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_5(gvec_usqadd_b, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_5(gvec_usqadd_h, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_5(gvec_usqadd_s, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_5(gvec_usqadd_d, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_5(gvec_suqadd_b, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_5(gvec_suqadd_h, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_5(gvec_suqadd_s, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_5(gvec_suqadd_d, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_5(gvec_fmlal_a32, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, i32)
|
||||
|
@ -188,3 +188,113 @@ void gen_gvec_bcax(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
|
||||
tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
|
||||
}
|
||||
|
||||
static void gen_suqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec qc,
|
||||
TCGv_vec a, TCGv_vec b)
|
||||
{
|
||||
TCGv_vec max =
|
||||
tcg_constant_vec_matching(t, vece, (1ull << ((8 << vece) - 1)) - 1);
|
||||
TCGv_vec u = tcg_temp_new_vec_matching(t);
|
||||
|
||||
/* Maximum value that can be added to @a without overflow. */
|
||||
tcg_gen_sub_vec(vece, u, max, a);
|
||||
|
||||
/* Constrain addend so that the next addition never overflows. */
|
||||
tcg_gen_umin_vec(vece, u, u, b);
|
||||
tcg_gen_add_vec(vece, t, u, a);
|
||||
|
||||
/* Compute QC by comparing the adjusted @b. */
|
||||
tcg_gen_xor_vec(vece, u, u, b);
|
||||
tcg_gen_or_vec(vece, qc, qc, u);
|
||||
}
|
||||
|
||||
void gen_gvec_suqadd_qc(unsigned vece, uint32_t rd_ofs,
|
||||
uint32_t rn_ofs, uint32_t rm_ofs,
|
||||
uint32_t opr_sz, uint32_t max_sz)
|
||||
{
|
||||
static const TCGOpcode vecop_list[] = {
|
||||
INDEX_op_add_vec, INDEX_op_sub_vec, INDEX_op_umin_vec, 0
|
||||
};
|
||||
static const GVecGen4 ops[4] = {
|
||||
{ .fniv = gen_suqadd_vec,
|
||||
.fno = gen_helper_gvec_suqadd_b,
|
||||
.opt_opc = vecop_list,
|
||||
.write_aofs = true,
|
||||
.vece = MO_8 },
|
||||
{ .fniv = gen_suqadd_vec,
|
||||
.fno = gen_helper_gvec_suqadd_h,
|
||||
.opt_opc = vecop_list,
|
||||
.write_aofs = true,
|
||||
.vece = MO_16 },
|
||||
{ .fniv = gen_suqadd_vec,
|
||||
.fno = gen_helper_gvec_suqadd_s,
|
||||
.opt_opc = vecop_list,
|
||||
.write_aofs = true,
|
||||
.vece = MO_32 },
|
||||
{ .fniv = gen_suqadd_vec,
|
||||
.fno = gen_helper_gvec_suqadd_d,
|
||||
.opt_opc = vecop_list,
|
||||
.write_aofs = true,
|
||||
.vece = MO_64 },
|
||||
};
|
||||
|
||||
tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc));
|
||||
tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
|
||||
rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
|
||||
}
|
||||
|
||||
static void gen_usqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec qc,
|
||||
TCGv_vec a, TCGv_vec b)
|
||||
{
|
||||
TCGv_vec u = tcg_temp_new_vec_matching(t);
|
||||
TCGv_vec z = tcg_constant_vec_matching(t, vece, 0);
|
||||
|
||||
/* Compute unsigned saturation of add for +b and sub for -b. */
|
||||
tcg_gen_neg_vec(vece, t, b);
|
||||
tcg_gen_usadd_vec(vece, u, a, b);
|
||||
tcg_gen_ussub_vec(vece, t, a, t);
|
||||
|
||||
/* Select the correct result depending on the sign of b. */
|
||||
tcg_gen_cmpsel_vec(TCG_COND_LT, vece, t, b, z, t, u);
|
||||
|
||||
/* Compute QC by comparing against the non-saturated result. */
|
||||
tcg_gen_add_vec(vece, u, a, b);
|
||||
tcg_gen_xor_vec(vece, u, u, t);
|
||||
tcg_gen_or_vec(vece, qc, qc, u);
|
||||
}
|
||||
|
||||
void gen_gvec_usqadd_qc(unsigned vece, uint32_t rd_ofs,
|
||||
uint32_t rn_ofs, uint32_t rm_ofs,
|
||||
uint32_t opr_sz, uint32_t max_sz)
|
||||
{
|
||||
static const TCGOpcode vecop_list[] = {
|
||||
INDEX_op_neg_vec, INDEX_op_add_vec,
|
||||
INDEX_op_usadd_vec, INDEX_op_ussub_vec,
|
||||
INDEX_op_cmpsel_vec, 0
|
||||
};
|
||||
static const GVecGen4 ops[4] = {
|
||||
{ .fniv = gen_usqadd_vec,
|
||||
.fno = gen_helper_gvec_usqadd_b,
|
||||
.opt_opc = vecop_list,
|
||||
.write_aofs = true,
|
||||
.vece = MO_8 },
|
||||
{ .fniv = gen_usqadd_vec,
|
||||
.fno = gen_helper_gvec_usqadd_h,
|
||||
.opt_opc = vecop_list,
|
||||
.write_aofs = true,
|
||||
.vece = MO_16 },
|
||||
{ .fniv = gen_usqadd_vec,
|
||||
.fno = gen_helper_gvec_usqadd_s,
|
||||
.opt_opc = vecop_list,
|
||||
.write_aofs = true,
|
||||
.vece = MO_32 },
|
||||
{ .fniv = gen_usqadd_vec,
|
||||
.fno = gen_helper_gvec_usqadd_d,
|
||||
.opt_opc = vecop_list,
|
||||
.write_aofs = true,
|
||||
.vece = MO_64 },
|
||||
};
|
||||
|
||||
tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc));
|
||||
tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
|
||||
rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
|
||||
}
|
||||
|
@ -9983,83 +9983,68 @@ static void handle_2misc_narrow(DisasContext *s, bool scalar,
|
||||
|
||||
/* Remaining saturating accumulating ops */
|
||||
static void handle_2misc_satacc(DisasContext *s, bool is_scalar, bool is_u,
|
||||
bool is_q, int size, int rn, int rd)
|
||||
bool is_q, unsigned size, int rn, int rd)
|
||||
{
|
||||
bool is_double = (size == 3);
|
||||
if (!is_scalar) {
|
||||
gen_gvec_fn3(s, is_q, rd, rd, rn,
|
||||
is_u ? gen_gvec_usqadd_qc : gen_gvec_suqadd_qc, size);
|
||||
return;
|
||||
}
|
||||
|
||||
if (is_double) {
|
||||
if (size == 3) {
|
||||
TCGv_i64 tcg_rn = tcg_temp_new_i64();
|
||||
TCGv_i64 tcg_rd = tcg_temp_new_i64();
|
||||
int pass;
|
||||
|
||||
for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
|
||||
read_vec_element(s, tcg_rn, rn, pass, MO_64);
|
||||
read_vec_element(s, tcg_rd, rd, pass, MO_64);
|
||||
read_vec_element(s, tcg_rn, rn, 0, MO_64);
|
||||
read_vec_element(s, tcg_rd, rd, 0, MO_64);
|
||||
|
||||
if (is_u) { /* USQADD */
|
||||
gen_helper_neon_uqadd_s64(tcg_rd, tcg_env, tcg_rn, tcg_rd);
|
||||
} else { /* SUQADD */
|
||||
gen_helper_neon_sqadd_u64(tcg_rd, tcg_env, tcg_rn, tcg_rd);
|
||||
}
|
||||
write_vec_element(s, tcg_rd, rd, pass, MO_64);
|
||||
if (is_u) { /* USQADD */
|
||||
gen_helper_neon_uqadd_s64(tcg_rd, tcg_env, tcg_rn, tcg_rd);
|
||||
} else { /* SUQADD */
|
||||
gen_helper_neon_sqadd_u64(tcg_rd, tcg_env, tcg_rn, tcg_rd);
|
||||
}
|
||||
clear_vec_high(s, !is_scalar, rd);
|
||||
write_vec_element(s, tcg_rd, rd, 0, MO_64);
|
||||
clear_vec_high(s, false, rd);
|
||||
} else {
|
||||
TCGv_i32 tcg_rn = tcg_temp_new_i32();
|
||||
TCGv_i32 tcg_rd = tcg_temp_new_i32();
|
||||
int pass, maxpasses;
|
||||
|
||||
if (is_scalar) {
|
||||
maxpasses = 1;
|
||||
} else {
|
||||
maxpasses = is_q ? 4 : 2;
|
||||
read_vec_element_i32(s, tcg_rn, rn, 0, size);
|
||||
read_vec_element_i32(s, tcg_rd, rd, 0, size);
|
||||
|
||||
if (is_u) { /* USQADD */
|
||||
switch (size) {
|
||||
case 0:
|
||||
gen_helper_neon_uqadd_s8(tcg_rd, tcg_env, tcg_rn, tcg_rd);
|
||||
break;
|
||||
case 1:
|
||||
gen_helper_neon_uqadd_s16(tcg_rd, tcg_env, tcg_rn, tcg_rd);
|
||||
break;
|
||||
case 2:
|
||||
gen_helper_neon_uqadd_s32(tcg_rd, tcg_env, tcg_rn, tcg_rd);
|
||||
break;
|
||||
default:
|
||||
g_assert_not_reached();
|
||||
}
|
||||
} else { /* SUQADD */
|
||||
switch (size) {
|
||||
case 0:
|
||||
gen_helper_neon_sqadd_u8(tcg_rd, tcg_env, tcg_rn, tcg_rd);
|
||||
break;
|
||||
case 1:
|
||||
gen_helper_neon_sqadd_u16(tcg_rd, tcg_env, tcg_rn, tcg_rd);
|
||||
break;
|
||||
case 2:
|
||||
gen_helper_neon_sqadd_u32(tcg_rd, tcg_env, tcg_rn, tcg_rd);
|
||||
break;
|
||||
default:
|
||||
g_assert_not_reached();
|
||||
}
|
||||
}
|
||||
|
||||
for (pass = 0; pass < maxpasses; pass++) {
|
||||
if (is_scalar) {
|
||||
read_vec_element_i32(s, tcg_rn, rn, pass, size);
|
||||
read_vec_element_i32(s, tcg_rd, rd, pass, size);
|
||||
} else {
|
||||
read_vec_element_i32(s, tcg_rn, rn, pass, MO_32);
|
||||
read_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
|
||||
}
|
||||
|
||||
if (is_u) { /* USQADD */
|
||||
switch (size) {
|
||||
case 0:
|
||||
gen_helper_neon_uqadd_s8(tcg_rd, tcg_env, tcg_rn, tcg_rd);
|
||||
break;
|
||||
case 1:
|
||||
gen_helper_neon_uqadd_s16(tcg_rd, tcg_env, tcg_rn, tcg_rd);
|
||||
break;
|
||||
case 2:
|
||||
gen_helper_neon_uqadd_s32(tcg_rd, tcg_env, tcg_rn, tcg_rd);
|
||||
break;
|
||||
default:
|
||||
g_assert_not_reached();
|
||||
}
|
||||
} else { /* SUQADD */
|
||||
switch (size) {
|
||||
case 0:
|
||||
gen_helper_neon_sqadd_u8(tcg_rd, tcg_env, tcg_rn, tcg_rd);
|
||||
break;
|
||||
case 1:
|
||||
gen_helper_neon_sqadd_u16(tcg_rd, tcg_env, tcg_rn, tcg_rd);
|
||||
break;
|
||||
case 2:
|
||||
gen_helper_neon_sqadd_u32(tcg_rd, tcg_env, tcg_rn, tcg_rd);
|
||||
break;
|
||||
default:
|
||||
g_assert_not_reached();
|
||||
}
|
||||
}
|
||||
|
||||
if (is_scalar) {
|
||||
write_vec_element(s, tcg_constant_i64(0), rd, 0, MO_64);
|
||||
}
|
||||
write_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
|
||||
}
|
||||
clear_vec_high(s, is_q, rd);
|
||||
write_vec_element(s, tcg_constant_i64(0), rd, 0, MO_64);
|
||||
write_vec_element_i32(s, tcg_rd, rd, 0, MO_32);
|
||||
clear_vec_high(s, false, rd);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -197,6 +197,12 @@ void gen_gvec_eor3(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
|
||||
uint32_t a, uint32_t oprsz, uint32_t maxsz);
|
||||
void gen_gvec_bcax(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
|
||||
uint32_t a, uint32_t oprsz, uint32_t maxsz);
|
||||
void gen_gvec_suqadd_qc(unsigned vece, uint32_t rd_ofs,
|
||||
uint32_t rn_ofs, uint32_t rm_ofs,
|
||||
uint32_t opr_sz, uint32_t max_sz);
|
||||
void gen_gvec_usqadd_qc(unsigned vece, uint32_t rd_ofs,
|
||||
uint32_t rn_ofs, uint32_t rm_ofs,
|
||||
uint32_t opr_sz, uint32_t max_sz);
|
||||
|
||||
void gen_sve_ldr(DisasContext *s, TCGv_ptr, int vofs, int len, int rn, int imm);
|
||||
void gen_sve_str(DisasContext *s, TCGv_ptr, int vofs, int len, int rn, int imm);
|
||||
|
@ -1555,6 +1555,14 @@ DO_SAT(gvec_sqsub_b, int, int8_t, int8_t, -, INT8_MIN, INT8_MAX)
|
||||
DO_SAT(gvec_sqsub_h, int, int16_t, int16_t, -, INT16_MIN, INT16_MAX)
|
||||
DO_SAT(gvec_sqsub_s, int64_t, int32_t, int32_t, -, INT32_MIN, INT32_MAX)
|
||||
|
||||
DO_SAT(gvec_usqadd_b, int, uint8_t, int8_t, +, 0, UINT8_MAX)
|
||||
DO_SAT(gvec_usqadd_h, int, uint16_t, int16_t, +, 0, UINT16_MAX)
|
||||
DO_SAT(gvec_usqadd_s, int64_t, uint32_t, int32_t, +, 0, UINT32_MAX)
|
||||
|
||||
DO_SAT(gvec_suqadd_b, int, int8_t, uint8_t, +, INT8_MIN, INT8_MAX)
|
||||
DO_SAT(gvec_suqadd_h, int, int16_t, uint16_t, +, INT16_MIN, INT16_MAX)
|
||||
DO_SAT(gvec_suqadd_s, int64_t, int32_t, uint32_t, +, INT32_MIN, INT32_MAX)
|
||||
|
||||
#undef DO_SAT
|
||||
|
||||
void HELPER(gvec_uqadd_d)(void *vd, void *vq, void *vn,
|
||||
@ -1645,6 +1653,62 @@ void HELPER(gvec_sqsub_d)(void *vd, void *vq, void *vn,
|
||||
clear_tail(d, oprsz, simd_maxsz(desc));
|
||||
}
|
||||
|
||||
void HELPER(gvec_usqadd_d)(void *vd, void *vq, void *vn,
|
||||
void *vm, uint32_t desc)
|
||||
{
|
||||
intptr_t i, oprsz = simd_oprsz(desc);
|
||||
uint64_t *d = vd, *n = vn, *m = vm;
|
||||
bool q = false;
|
||||
|
||||
for (i = 0; i < oprsz / 8; i++) {
|
||||
uint64_t nn = n[i];
|
||||
int64_t mm = m[i];
|
||||
uint64_t dd = nn + mm;
|
||||
|
||||
if (mm < 0) {
|
||||
if (nn < (uint64_t)-mm) {
|
||||
dd = 0;
|
||||
q = true;
|
||||
}
|
||||
} else {
|
||||
if (dd < nn) {
|
||||
dd = UINT64_MAX;
|
||||
q = true;
|
||||
}
|
||||
}
|
||||
d[i] = dd;
|
||||
}
|
||||
if (q) {
|
||||
uint32_t *qc = vq;
|
||||
qc[0] = 1;
|
||||
}
|
||||
clear_tail(d, oprsz, simd_maxsz(desc));
|
||||
}
|
||||
|
||||
void HELPER(gvec_suqadd_d)(void *vd, void *vq, void *vn,
|
||||
void *vm, uint32_t desc)
|
||||
{
|
||||
intptr_t i, oprsz = simd_oprsz(desc);
|
||||
uint64_t *d = vd, *n = vn, *m = vm;
|
||||
bool q = false;
|
||||
|
||||
for (i = 0; i < oprsz / 8; i++) {
|
||||
int64_t nn = n[i];
|
||||
uint64_t mm = m[i];
|
||||
int64_t dd = nn + mm;
|
||||
|
||||
if (mm > (uint64_t)(INT64_MAX - nn)) {
|
||||
dd = INT64_MAX;
|
||||
q = true;
|
||||
}
|
||||
d[i] = dd;
|
||||
}
|
||||
if (q) {
|
||||
uint32_t *qc = vq;
|
||||
qc[0] = 1;
|
||||
}
|
||||
clear_tail(d, oprsz, simd_maxsz(desc));
|
||||
}
|
||||
|
||||
#define DO_SRA(NAME, TYPE) \
|
||||
void HELPER(NAME)(void *vd, void *vn, uint32_t desc) \
|
||||
|
Loading…
Reference in New Issue
Block a user