target/arm: Inline scalar SQADD, UQADD, SQSUB, UQSUB
This eliminates the last uses of these neon helpers. Incorporate the MO_64 expanders as an option to the vector expander. Reviewed-by: Peter Maydell <peter.maydell@linaro.org> Signed-off-by: Richard Henderson <richard.henderson@linaro.org> Message-id: 20240528203044.612851-7-richard.henderson@linaro.org Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
parent
1217edace8
commit
f4fa83d614
@ -268,23 +268,6 @@ DEF_HELPER_FLAGS_2(fjcvtzs, TCG_CALL_NO_RWG, i64, f64, ptr)
|
||||
DEF_HELPER_FLAGS_3(check_hcr_el2_trap, TCG_CALL_NO_WG, void, env, i32, i32)
|
||||
|
||||
/* neon_helper.c */
|
||||
DEF_HELPER_FLAGS_3(neon_qadd_u8, TCG_CALL_NO_RWG, i32, env, i32, i32)
|
||||
DEF_HELPER_FLAGS_3(neon_qadd_s8, TCG_CALL_NO_RWG, i32, env, i32, i32)
|
||||
DEF_HELPER_FLAGS_3(neon_qadd_u16, TCG_CALL_NO_RWG, i32, env, i32, i32)
|
||||
DEF_HELPER_FLAGS_3(neon_qadd_s16, TCG_CALL_NO_RWG, i32, env, i32, i32)
|
||||
DEF_HELPER_FLAGS_3(neon_qadd_u32, TCG_CALL_NO_RWG, i32, env, i32, i32)
|
||||
DEF_HELPER_FLAGS_3(neon_qadd_s32, TCG_CALL_NO_RWG, i32, env, i32, i32)
|
||||
DEF_HELPER_3(neon_qsub_u8, i32, env, i32, i32)
|
||||
DEF_HELPER_3(neon_qsub_s8, i32, env, i32, i32)
|
||||
DEF_HELPER_3(neon_qsub_u16, i32, env, i32, i32)
|
||||
DEF_HELPER_3(neon_qsub_s16, i32, env, i32, i32)
|
||||
DEF_HELPER_3(neon_qsub_u32, i32, env, i32, i32)
|
||||
DEF_HELPER_3(neon_qsub_s32, i32, env, i32, i32)
|
||||
DEF_HELPER_3(neon_qadd_u64, i64, env, i64, i64)
|
||||
DEF_HELPER_3(neon_qadd_s64, i64, env, i64, i64)
|
||||
DEF_HELPER_3(neon_qsub_u64, i64, env, i64, i64)
|
||||
DEF_HELPER_3(neon_qsub_s64, i64, env, i64, i64)
|
||||
|
||||
DEF_HELPER_2(neon_hadd_s8, i32, i32, i32)
|
||||
DEF_HELPER_2(neon_hadd_u8, i32, i32, i32)
|
||||
DEF_HELPER_2(neon_hadd_s16, i32, i32, i32)
|
||||
|
@ -1218,6 +1218,28 @@ void gen_gvec_sshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
|
||||
tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
|
||||
}
|
||||
|
||||
void gen_uqadd_bhs(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b, MemOp esz)
|
||||
{
|
||||
uint64_t max = MAKE_64BIT_MASK(0, 8 << esz);
|
||||
TCGv_i64 tmp = tcg_temp_new_i64();
|
||||
|
||||
tcg_gen_add_i64(tmp, a, b);
|
||||
tcg_gen_umin_i64(res, tmp, tcg_constant_i64(max));
|
||||
tcg_gen_xor_i64(tmp, tmp, res);
|
||||
tcg_gen_or_i64(qc, qc, tmp);
|
||||
}
|
||||
|
||||
void gen_uqadd_d(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b)
|
||||
{
|
||||
TCGv_i64 t = tcg_temp_new_i64();
|
||||
|
||||
tcg_gen_add_i64(t, a, b);
|
||||
tcg_gen_movcond_i64(TCG_COND_LTU, res, t, a,
|
||||
tcg_constant_i64(UINT64_MAX), t);
|
||||
tcg_gen_xor_i64(t, t, res);
|
||||
tcg_gen_or_i64(qc, qc, t);
|
||||
}
|
||||
|
||||
static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec qc,
|
||||
TCGv_vec a, TCGv_vec b)
|
||||
{
|
||||
@ -1251,6 +1273,7 @@ void gen_gvec_uqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
|
||||
.opt_opc = vecop_list,
|
||||
.vece = MO_32 },
|
||||
{ .fniv = gen_uqadd_vec,
|
||||
.fni8 = gen_uqadd_d,
|
||||
.fno = gen_helper_gvec_uqadd_d,
|
||||
.write_aofs = true,
|
||||
.opt_opc = vecop_list,
|
||||
@ -1262,6 +1285,41 @@ void gen_gvec_uqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
|
||||
rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
|
||||
}
|
||||
|
||||
void gen_sqadd_bhs(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b, MemOp esz)
|
||||
{
|
||||
int64_t max = MAKE_64BIT_MASK(0, (8 << esz) - 1);
|
||||
int64_t min = -1ll - max;
|
||||
TCGv_i64 tmp = tcg_temp_new_i64();
|
||||
|
||||
tcg_gen_add_i64(tmp, a, b);
|
||||
tcg_gen_smin_i64(res, tmp, tcg_constant_i64(max));
|
||||
tcg_gen_smax_i64(res, res, tcg_constant_i64(min));
|
||||
tcg_gen_xor_i64(tmp, tmp, res);
|
||||
tcg_gen_or_i64(qc, qc, tmp);
|
||||
}
|
||||
|
||||
void gen_sqadd_d(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b)
|
||||
{
|
||||
TCGv_i64 t0 = tcg_temp_new_i64();
|
||||
TCGv_i64 t1 = tcg_temp_new_i64();
|
||||
TCGv_i64 t2 = tcg_temp_new_i64();
|
||||
|
||||
tcg_gen_add_i64(t0, a, b);
|
||||
|
||||
/* Compute signed overflow indication into T1 */
|
||||
tcg_gen_xor_i64(t1, a, b);
|
||||
tcg_gen_xor_i64(t2, t0, a);
|
||||
tcg_gen_andc_i64(t1, t2, t1);
|
||||
|
||||
/* Compute saturated value into T2 */
|
||||
tcg_gen_sari_i64(t2, a, 63);
|
||||
tcg_gen_xori_i64(t2, t2, INT64_MAX);
|
||||
|
||||
tcg_gen_movcond_i64(TCG_COND_LT, res, t1, tcg_constant_i64(0), t2, t0);
|
||||
tcg_gen_xor_i64(t0, t0, res);
|
||||
tcg_gen_or_i64(qc, qc, t0);
|
||||
}
|
||||
|
||||
static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec qc,
|
||||
TCGv_vec a, TCGv_vec b)
|
||||
{
|
||||
@ -1295,6 +1353,7 @@ void gen_gvec_sqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
|
||||
.write_aofs = true,
|
||||
.vece = MO_32 },
|
||||
{ .fniv = gen_sqadd_vec,
|
||||
.fni8 = gen_sqadd_d,
|
||||
.fno = gen_helper_gvec_sqadd_d,
|
||||
.opt_opc = vecop_list,
|
||||
.write_aofs = true,
|
||||
@ -1306,6 +1365,26 @@ void gen_gvec_sqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
|
||||
rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
|
||||
}
|
||||
|
||||
void gen_uqsub_bhs(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b, MemOp esz)
|
||||
{
|
||||
TCGv_i64 tmp = tcg_temp_new_i64();
|
||||
|
||||
tcg_gen_sub_i64(tmp, a, b);
|
||||
tcg_gen_smax_i64(res, tmp, tcg_constant_i64(0));
|
||||
tcg_gen_xor_i64(tmp, tmp, res);
|
||||
tcg_gen_or_i64(qc, qc, tmp);
|
||||
}
|
||||
|
||||
void gen_uqsub_d(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b)
|
||||
{
|
||||
TCGv_i64 t = tcg_temp_new_i64();
|
||||
|
||||
tcg_gen_sub_i64(t, a, b);
|
||||
tcg_gen_movcond_i64(TCG_COND_LTU, res, a, b, tcg_constant_i64(0), t);
|
||||
tcg_gen_xor_i64(t, t, res);
|
||||
tcg_gen_or_i64(qc, qc, t);
|
||||
}
|
||||
|
||||
static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec qc,
|
||||
TCGv_vec a, TCGv_vec b)
|
||||
{
|
||||
@ -1339,6 +1418,7 @@ void gen_gvec_uqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
|
||||
.write_aofs = true,
|
||||
.vece = MO_32 },
|
||||
{ .fniv = gen_uqsub_vec,
|
||||
.fni8 = gen_uqsub_d,
|
||||
.fno = gen_helper_gvec_uqsub_d,
|
||||
.opt_opc = vecop_list,
|
||||
.write_aofs = true,
|
||||
@ -1350,6 +1430,41 @@ void gen_gvec_uqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
|
||||
rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
|
||||
}
|
||||
|
||||
void gen_sqsub_bhs(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b, MemOp esz)
|
||||
{
|
||||
int64_t max = MAKE_64BIT_MASK(0, (8 << esz) - 1);
|
||||
int64_t min = -1ll - max;
|
||||
TCGv_i64 tmp = tcg_temp_new_i64();
|
||||
|
||||
tcg_gen_sub_i64(tmp, a, b);
|
||||
tcg_gen_smin_i64(res, tmp, tcg_constant_i64(max));
|
||||
tcg_gen_smax_i64(res, res, tcg_constant_i64(min));
|
||||
tcg_gen_xor_i64(tmp, tmp, res);
|
||||
tcg_gen_or_i64(qc, qc, tmp);
|
||||
}
|
||||
|
||||
void gen_sqsub_d(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b)
|
||||
{
|
||||
TCGv_i64 t0 = tcg_temp_new_i64();
|
||||
TCGv_i64 t1 = tcg_temp_new_i64();
|
||||
TCGv_i64 t2 = tcg_temp_new_i64();
|
||||
|
||||
tcg_gen_sub_i64(t0, a, b);
|
||||
|
||||
/* Compute signed overflow indication into T1 */
|
||||
tcg_gen_xor_i64(t1, a, b);
|
||||
tcg_gen_xor_i64(t2, t0, a);
|
||||
tcg_gen_and_i64(t1, t1, t2);
|
||||
|
||||
/* Compute saturated value into T2 */
|
||||
tcg_gen_sari_i64(t2, a, 63);
|
||||
tcg_gen_xori_i64(t2, t2, INT64_MAX);
|
||||
|
||||
tcg_gen_movcond_i64(TCG_COND_LT, res, t1, tcg_constant_i64(0), t2, t0);
|
||||
tcg_gen_xor_i64(t0, t0, res);
|
||||
tcg_gen_or_i64(qc, qc, t0);
|
||||
}
|
||||
|
||||
static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec qc,
|
||||
TCGv_vec a, TCGv_vec b)
|
||||
{
|
||||
@ -1383,6 +1498,7 @@ void gen_gvec_sqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
|
||||
.write_aofs = true,
|
||||
.vece = MO_32 },
|
||||
{ .fniv = gen_sqsub_vec,
|
||||
.fni8 = gen_sqsub_d,
|
||||
.fno = gen_helper_gvec_sqsub_d,
|
||||
.opt_opc = vecop_list,
|
||||
.write_aofs = true,
|
||||
|
@ -155,168 +155,6 @@ uint32_t HELPER(glue(neon_,name))(uint32_t arg) \
|
||||
return arg; \
|
||||
}
|
||||
|
||||
|
||||
#define NEON_USAT(dest, src1, src2, type) do { \
|
||||
uint32_t tmp = (uint32_t)src1 + (uint32_t)src2; \
|
||||
if (tmp != (type)tmp) { \
|
||||
SET_QC(); \
|
||||
dest = ~0; \
|
||||
} else { \
|
||||
dest = tmp; \
|
||||
}} while(0)
|
||||
#define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint8_t)
|
||||
NEON_VOP_ENV(qadd_u8, neon_u8, 4)
|
||||
#undef NEON_FN
|
||||
#define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint16_t)
|
||||
NEON_VOP_ENV(qadd_u16, neon_u16, 2)
|
||||
#undef NEON_FN
|
||||
#undef NEON_USAT
|
||||
|
||||
uint32_t HELPER(neon_qadd_u32)(CPUARMState *env, uint32_t a, uint32_t b)
|
||||
{
|
||||
uint32_t res = a + b;
|
||||
if (res < a) {
|
||||
SET_QC();
|
||||
res = ~0;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
uint64_t HELPER(neon_qadd_u64)(CPUARMState *env, uint64_t src1, uint64_t src2)
|
||||
{
|
||||
uint64_t res;
|
||||
|
||||
res = src1 + src2;
|
||||
if (res < src1) {
|
||||
SET_QC();
|
||||
res = ~(uint64_t)0;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
#define NEON_SSAT(dest, src1, src2, type) do { \
|
||||
int32_t tmp = (uint32_t)src1 + (uint32_t)src2; \
|
||||
if (tmp != (type)tmp) { \
|
||||
SET_QC(); \
|
||||
if (src2 > 0) { \
|
||||
tmp = (1 << (sizeof(type) * 8 - 1)) - 1; \
|
||||
} else { \
|
||||
tmp = 1 << (sizeof(type) * 8 - 1); \
|
||||
} \
|
||||
} \
|
||||
dest = tmp; \
|
||||
} while(0)
|
||||
#define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int8_t)
|
||||
NEON_VOP_ENV(qadd_s8, neon_s8, 4)
|
||||
#undef NEON_FN
|
||||
#define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int16_t)
|
||||
NEON_VOP_ENV(qadd_s16, neon_s16, 2)
|
||||
#undef NEON_FN
|
||||
#undef NEON_SSAT
|
||||
|
||||
uint32_t HELPER(neon_qadd_s32)(CPUARMState *env, uint32_t a, uint32_t b)
|
||||
{
|
||||
uint32_t res = a + b;
|
||||
if (((res ^ a) & SIGNBIT) && !((a ^ b) & SIGNBIT)) {
|
||||
SET_QC();
|
||||
res = ~(((int32_t)a >> 31) ^ SIGNBIT);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
uint64_t HELPER(neon_qadd_s64)(CPUARMState *env, uint64_t src1, uint64_t src2)
|
||||
{
|
||||
uint64_t res;
|
||||
|
||||
res = src1 + src2;
|
||||
if (((res ^ src1) & SIGNBIT64) && !((src1 ^ src2) & SIGNBIT64)) {
|
||||
SET_QC();
|
||||
res = ((int64_t)src1 >> 63) ^ ~SIGNBIT64;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
#define NEON_USAT(dest, src1, src2, type) do { \
|
||||
uint32_t tmp = (uint32_t)src1 - (uint32_t)src2; \
|
||||
if (tmp != (type)tmp) { \
|
||||
SET_QC(); \
|
||||
dest = 0; \
|
||||
} else { \
|
||||
dest = tmp; \
|
||||
}} while(0)
|
||||
#define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint8_t)
|
||||
NEON_VOP_ENV(qsub_u8, neon_u8, 4)
|
||||
#undef NEON_FN
|
||||
#define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint16_t)
|
||||
NEON_VOP_ENV(qsub_u16, neon_u16, 2)
|
||||
#undef NEON_FN
|
||||
#undef NEON_USAT
|
||||
|
||||
uint32_t HELPER(neon_qsub_u32)(CPUARMState *env, uint32_t a, uint32_t b)
|
||||
{
|
||||
uint32_t res = a - b;
|
||||
if (res > a) {
|
||||
SET_QC();
|
||||
res = 0;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
uint64_t HELPER(neon_qsub_u64)(CPUARMState *env, uint64_t src1, uint64_t src2)
|
||||
{
|
||||
uint64_t res;
|
||||
|
||||
if (src1 < src2) {
|
||||
SET_QC();
|
||||
res = 0;
|
||||
} else {
|
||||
res = src1 - src2;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
#define NEON_SSAT(dest, src1, src2, type) do { \
|
||||
int32_t tmp = (uint32_t)src1 - (uint32_t)src2; \
|
||||
if (tmp != (type)tmp) { \
|
||||
SET_QC(); \
|
||||
if (src2 < 0) { \
|
||||
tmp = (1 << (sizeof(type) * 8 - 1)) - 1; \
|
||||
} else { \
|
||||
tmp = 1 << (sizeof(type) * 8 - 1); \
|
||||
} \
|
||||
} \
|
||||
dest = tmp; \
|
||||
} while(0)
|
||||
#define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int8_t)
|
||||
NEON_VOP_ENV(qsub_s8, neon_s8, 4)
|
||||
#undef NEON_FN
|
||||
#define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int16_t)
|
||||
NEON_VOP_ENV(qsub_s16, neon_s16, 2)
|
||||
#undef NEON_FN
|
||||
#undef NEON_SSAT
|
||||
|
||||
uint32_t HELPER(neon_qsub_s32)(CPUARMState *env, uint32_t a, uint32_t b)
|
||||
{
|
||||
uint32_t res = a - b;
|
||||
if (((res ^ a) & SIGNBIT) && ((a ^ b) & SIGNBIT)) {
|
||||
SET_QC();
|
||||
res = ~(((int32_t)a >> 31) ^ SIGNBIT);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
uint64_t HELPER(neon_qsub_s64)(CPUARMState *env, uint64_t src1, uint64_t src2)
|
||||
{
|
||||
uint64_t res;
|
||||
|
||||
res = src1 - src2;
|
||||
if (((res ^ src1) & SIGNBIT64) && ((src1 ^ src2) & SIGNBIT64)) {
|
||||
SET_QC();
|
||||
res = ((int64_t)src1 >> 63) ^ ~SIGNBIT64;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
#define NEON_FN(dest, src1, src2) dest = (src1 + src2) >> 1
|
||||
NEON_VOP(hadd_s8, neon_s8, 4)
|
||||
NEON_VOP(hadd_u8, neon_u8, 4)
|
||||
|
@ -9291,21 +9291,28 @@ static void handle_3same_64(DisasContext *s, int opcode, bool u,
|
||||
* or scalar-three-reg-same groups.
|
||||
*/
|
||||
TCGCond cond;
|
||||
TCGv_i64 qc;
|
||||
|
||||
switch (opcode) {
|
||||
case 0x1: /* SQADD */
|
||||
qc = tcg_temp_new_i64();
|
||||
tcg_gen_ld_i64(qc, tcg_env, offsetof(CPUARMState, vfp.qc));
|
||||
if (u) {
|
||||
gen_helper_neon_qadd_u64(tcg_rd, tcg_env, tcg_rn, tcg_rm);
|
||||
gen_uqadd_d(tcg_rd, qc, tcg_rn, tcg_rm);
|
||||
} else {
|
||||
gen_helper_neon_qadd_s64(tcg_rd, tcg_env, tcg_rn, tcg_rm);
|
||||
gen_sqadd_d(tcg_rd, qc, tcg_rn, tcg_rm);
|
||||
}
|
||||
tcg_gen_st_i64(qc, tcg_env, offsetof(CPUARMState, vfp.qc));
|
||||
break;
|
||||
case 0x5: /* SQSUB */
|
||||
qc = tcg_temp_new_i64();
|
||||
tcg_gen_ld_i64(qc, tcg_env, offsetof(CPUARMState, vfp.qc));
|
||||
if (u) {
|
||||
gen_helper_neon_qsub_u64(tcg_rd, tcg_env, tcg_rn, tcg_rm);
|
||||
gen_uqsub_d(tcg_rd, qc, tcg_rn, tcg_rm);
|
||||
} else {
|
||||
gen_helper_neon_qsub_s64(tcg_rd, tcg_env, tcg_rn, tcg_rm);
|
||||
gen_sqsub_d(tcg_rd, qc, tcg_rn, tcg_rm);
|
||||
}
|
||||
tcg_gen_st_i64(qc, tcg_env, offsetof(CPUARMState, vfp.qc));
|
||||
break;
|
||||
case 0x6: /* CMGT, CMHI */
|
||||
cond = u ? TCG_COND_GTU : TCG_COND_GT;
|
||||
@ -9425,35 +9432,16 @@ static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn)
|
||||
* OPTME: special-purpose helpers would avoid doing some
|
||||
* unnecessary work in the helper for the 8 and 16 bit cases.
|
||||
*/
|
||||
NeonGenTwoOpEnvFn *genenvfn;
|
||||
TCGv_i32 tcg_rn = tcg_temp_new_i32();
|
||||
TCGv_i32 tcg_rm = tcg_temp_new_i32();
|
||||
TCGv_i32 tcg_rd32 = tcg_temp_new_i32();
|
||||
|
||||
read_vec_element_i32(s, tcg_rn, rn, 0, size);
|
||||
read_vec_element_i32(s, tcg_rm, rm, 0, size);
|
||||
NeonGenTwoOpEnvFn *genenvfn = NULL;
|
||||
void (*genfn)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64, MemOp) = NULL;
|
||||
|
||||
switch (opcode) {
|
||||
case 0x1: /* SQADD, UQADD */
|
||||
{
|
||||
static NeonGenTwoOpEnvFn * const fns[3][2] = {
|
||||
{ gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 },
|
||||
{ gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 },
|
||||
{ gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 },
|
||||
};
|
||||
genenvfn = fns[size][u];
|
||||
genfn = u ? gen_uqadd_bhs : gen_sqadd_bhs;
|
||||
break;
|
||||
}
|
||||
case 0x5: /* SQSUB, UQSUB */
|
||||
{
|
||||
static NeonGenTwoOpEnvFn * const fns[3][2] = {
|
||||
{ gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 },
|
||||
{ gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 },
|
||||
{ gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 },
|
||||
};
|
||||
genenvfn = fns[size][u];
|
||||
genfn = u ? gen_uqsub_bhs : gen_sqsub_bhs;
|
||||
break;
|
||||
}
|
||||
case 0x9: /* SQSHL, UQSHL */
|
||||
{
|
||||
static NeonGenTwoOpEnvFn * const fns[3][2] = {
|
||||
@ -9488,8 +9476,29 @@ static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn)
|
||||
g_assert_not_reached();
|
||||
}
|
||||
|
||||
genenvfn(tcg_rd32, tcg_env, tcg_rn, tcg_rm);
|
||||
tcg_gen_extu_i32_i64(tcg_rd, tcg_rd32);
|
||||
if (genenvfn) {
|
||||
TCGv_i32 tcg_rn = tcg_temp_new_i32();
|
||||
TCGv_i32 tcg_rm = tcg_temp_new_i32();
|
||||
|
||||
read_vec_element_i32(s, tcg_rn, rn, 0, size);
|
||||
read_vec_element_i32(s, tcg_rm, rm, 0, size);
|
||||
genenvfn(tcg_rn, tcg_env, tcg_rn, tcg_rm);
|
||||
tcg_gen_extu_i32_i64(tcg_rd, tcg_rn);
|
||||
} else {
|
||||
TCGv_i64 tcg_rn = tcg_temp_new_i64();
|
||||
TCGv_i64 tcg_rm = tcg_temp_new_i64();
|
||||
TCGv_i64 qc = tcg_temp_new_i64();
|
||||
|
||||
read_vec_element(s, tcg_rn, rn, 0, size | (u ? 0 : MO_SIGN));
|
||||
read_vec_element(s, tcg_rm, rm, 0, size | (u ? 0 : MO_SIGN));
|
||||
tcg_gen_ld_i64(qc, tcg_env, offsetof(CPUARMState, vfp.qc));
|
||||
genfn(tcg_rd, qc, tcg_rn, tcg_rm, size);
|
||||
tcg_gen_st_i64(qc, tcg_env, offsetof(CPUARMState, vfp.qc));
|
||||
if (!u) {
|
||||
/* Truncate signed 64-bit result for writeback. */
|
||||
tcg_gen_ext_i64(tcg_rd, tcg_rd, size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
write_fp_dreg(s, rd, tcg_rd);
|
||||
|
@ -466,12 +466,27 @@ void gen_sshl_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b);
|
||||
void gen_ushl_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b);
|
||||
void gen_sshl_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b);
|
||||
|
||||
void gen_uqadd_bhs(TCGv_i64 res, TCGv_i64 qc,
|
||||
TCGv_i64 a, TCGv_i64 b, MemOp esz);
|
||||
void gen_uqadd_d(TCGv_i64 d, TCGv_i64 q, TCGv_i64 a, TCGv_i64 b);
|
||||
void gen_gvec_uqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
|
||||
uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
|
||||
|
||||
void gen_sqadd_bhs(TCGv_i64 res, TCGv_i64 qc,
|
||||
TCGv_i64 a, TCGv_i64 b, MemOp esz);
|
||||
void gen_sqadd_d(TCGv_i64 d, TCGv_i64 q, TCGv_i64 a, TCGv_i64 b);
|
||||
void gen_gvec_sqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
|
||||
uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
|
||||
|
||||
void gen_uqsub_bhs(TCGv_i64 res, TCGv_i64 qc,
|
||||
TCGv_i64 a, TCGv_i64 b, MemOp esz);
|
||||
void gen_uqsub_d(TCGv_i64 d, TCGv_i64 q, TCGv_i64 a, TCGv_i64 b);
|
||||
void gen_gvec_uqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
|
||||
uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
|
||||
|
||||
void gen_sqsub_bhs(TCGv_i64 res, TCGv_i64 qc,
|
||||
TCGv_i64 a, TCGv_i64 b, MemOp esz);
|
||||
void gen_sqsub_d(TCGv_i64 d, TCGv_i64 q, TCGv_i64 a, TCGv_i64 b);
|
||||
void gen_gvec_sqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
|
||||
uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user