target/arm: Convert VQSHL, VQSHLU to gvec

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20240912024114.1097832-26-richard.henderson@linaro.org
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Richard Henderson 2024-09-11 19:41:10 -07:00 committed by Peter Maydell
parent 7e5d5a3d8c
commit ef2b80eb21
6 changed files with 94 additions and 110 deletions

View File

@ -324,6 +324,18 @@ DEF_HELPER_FLAGS_5(neon_uqrshl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32
DEF_HELPER_FLAGS_5(neon_uqrshl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(neon_uqrshl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(neon_uqrshl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(neon_sqshli_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(neon_sqshli_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(neon_sqshli_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(neon_sqshli_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(neon_uqshli_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(neon_uqshli_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(neon_uqshli_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(neon_uqshli_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(neon_sqshlui_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(neon_sqshlui_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(neon_sqshlui_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(neon_sqshlui_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_srshl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_srshl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)

View File

@ -1313,6 +1313,42 @@ void gen_neon_uqrshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
opr_sz, max_sz, 0, fns[vece]);
}
void gen_neon_sqshli(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
int64_t c, uint32_t opr_sz, uint32_t max_sz)
{
static gen_helper_gvec_2_ptr * const fns[] = {
gen_helper_neon_sqshli_b, gen_helper_neon_sqshli_h,
gen_helper_neon_sqshli_s, gen_helper_neon_sqshli_d,
};
tcg_debug_assert(vece <= MO_64);
tcg_debug_assert(c >= 0 && c <= (8 << vece));
tcg_gen_gvec_2_ptr(rd_ofs, rn_ofs, tcg_env, opr_sz, max_sz, c, fns[vece]);
}
void gen_neon_uqshli(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
int64_t c, uint32_t opr_sz, uint32_t max_sz)
{
static gen_helper_gvec_2_ptr * const fns[] = {
gen_helper_neon_uqshli_b, gen_helper_neon_uqshli_h,
gen_helper_neon_uqshli_s, gen_helper_neon_uqshli_d,
};
tcg_debug_assert(vece <= MO_64);
tcg_debug_assert(c >= 0 && c <= (8 << vece));
tcg_gen_gvec_2_ptr(rd_ofs, rn_ofs, tcg_env, opr_sz, max_sz, c, fns[vece]);
}
void gen_neon_sqshlui(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
int64_t c, uint32_t opr_sz, uint32_t max_sz)
{
static gen_helper_gvec_2_ptr * const fns[] = {
gen_helper_neon_sqshlui_b, gen_helper_neon_sqshlui_h,
gen_helper_neon_sqshlui_s, gen_helper_neon_sqshlui_d,
};
tcg_debug_assert(vece <= MO_64);
tcg_debug_assert(c >= 0 && c <= (8 << vece));
tcg_gen_gvec_2_ptr(rd_ofs, rn_ofs, tcg_env, opr_sz, max_sz, c, fns[vece]);
}
void gen_uqadd_bhs(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b, MemOp esz)
{
uint64_t max = MAKE_64BIT_MASK(0, 8 << esz);

View File

@ -291,17 +291,17 @@ VSLI_2sh 1111 001 1 1 . ...... .... 0101 . . . 1 .... @2reg_shl_s
VSLI_2sh 1111 001 1 1 . ...... .... 0101 . . . 1 .... @2reg_shl_h
VSLI_2sh 1111 001 1 1 . ...... .... 0101 . . . 1 .... @2reg_shl_b
VQSHLU_64_2sh 1111 001 1 1 . ...... .... 0110 . . . 1 .... @2reg_shl_d
VQSHLU_2sh 1111 001 1 1 . ...... .... 0110 . . . 1 .... @2reg_shl_d
VQSHLU_2sh 1111 001 1 1 . ...... .... 0110 . . . 1 .... @2reg_shl_s
VQSHLU_2sh 1111 001 1 1 . ...... .... 0110 . . . 1 .... @2reg_shl_h
VQSHLU_2sh 1111 001 1 1 . ...... .... 0110 . . . 1 .... @2reg_shl_b
VQSHL_S_64_2sh 1111 001 0 1 . ...... .... 0111 . . . 1 .... @2reg_shl_d
VQSHL_S_2sh 1111 001 0 1 . ...... .... 0111 . . . 1 .... @2reg_shl_d
VQSHL_S_2sh 1111 001 0 1 . ...... .... 0111 . . . 1 .... @2reg_shl_s
VQSHL_S_2sh 1111 001 0 1 . ...... .... 0111 . . . 1 .... @2reg_shl_h
VQSHL_S_2sh 1111 001 0 1 . ...... .... 0111 . . . 1 .... @2reg_shl_b
VQSHL_U_64_2sh 1111 001 1 1 . ...... .... 0111 . . . 1 .... @2reg_shl_d
VQSHL_U_2sh 1111 001 1 1 . ...... .... 0111 . . . 1 .... @2reg_shl_d
VQSHL_U_2sh 1111 001 1 1 . ...... .... 0111 . . . 1 .... @2reg_shl_s
VQSHL_U_2sh 1111 001 1 1 . ...... .... 0111 . . . 1 .... @2reg_shl_h
VQSHL_U_2sh 1111 001 1 1 . ...... .... 0111 . . . 1 .... @2reg_shl_b

View File

@ -141,6 +141,19 @@ void HELPER(name)(void *vd, void *vn, void *vm, void *venv, uint32_t desc) \
clear_tail(d, opr_sz, simd_maxsz(desc)); \
}
#define NEON_GVEC_VOP2i_ENV(name, vtype) \
void HELPER(name)(void *vd, void *vn, void *venv, uint32_t desc) \
{ \
intptr_t i, opr_sz = simd_oprsz(desc); \
int imm = simd_data(desc); \
vtype *d = vd, *n = vn; \
CPUARMState *env = venv; \
for (i = 0; i < opr_sz / sizeof(vtype); i++) { \
NEON_FN(d[i], n[i], imm); \
} \
clear_tail(d, opr_sz, simd_maxsz(desc)); \
}
/* Pairwise operations. */
/* For 32-bit elements each segment only contains a single element, so
the elementwise and pairwise operations are the same. */
@ -271,22 +284,26 @@ uint64_t HELPER(neon_rshl_u64)(uint64_t val, uint64_t shift)
(dest = do_uqrshl_bhs(src1, (int8_t)src2, 8, false, env->vfp.qc))
NEON_VOP_ENV(qshl_u8, neon_u8, 4)
NEON_GVEC_VOP2_ENV(neon_uqshl_b, uint8_t)
NEON_GVEC_VOP2i_ENV(neon_uqshli_b, uint8_t)
#undef NEON_FN
#define NEON_FN(dest, src1, src2) \
(dest = do_uqrshl_bhs(src1, (int8_t)src2, 16, false, env->vfp.qc))
NEON_VOP_ENV(qshl_u16, neon_u16, 2)
NEON_GVEC_VOP2_ENV(neon_uqshl_h, uint16_t)
NEON_GVEC_VOP2i_ENV(neon_uqshli_h, uint16_t)
#undef NEON_FN
#define NEON_FN(dest, src1, src2) \
(dest = do_uqrshl_bhs(src1, (int8_t)src2, 32, false, env->vfp.qc))
NEON_GVEC_VOP2_ENV(neon_uqshl_s, uint32_t)
NEON_GVEC_VOP2i_ENV(neon_uqshli_s, uint32_t)
#undef NEON_FN
#define NEON_FN(dest, src1, src2) \
(dest = do_uqrshl_d(src1, (int8_t)src2, false, env->vfp.qc))
NEON_GVEC_VOP2_ENV(neon_uqshl_d, uint64_t)
NEON_GVEC_VOP2i_ENV(neon_uqshli_d, uint64_t)
#undef NEON_FN
uint32_t HELPER(neon_qshl_u32)(CPUARMState *env, uint32_t val, uint32_t shift)
@ -303,22 +320,26 @@ uint64_t HELPER(neon_qshl_u64)(CPUARMState *env, uint64_t val, uint64_t shift)
(dest = do_sqrshl_bhs(src1, (int8_t)src2, 8, false, env->vfp.qc))
NEON_VOP_ENV(qshl_s8, neon_s8, 4)
NEON_GVEC_VOP2_ENV(neon_sqshl_b, int8_t)
NEON_GVEC_VOP2i_ENV(neon_sqshli_b, int8_t)
#undef NEON_FN
#define NEON_FN(dest, src1, src2) \
(dest = do_sqrshl_bhs(src1, (int8_t)src2, 16, false, env->vfp.qc))
NEON_VOP_ENV(qshl_s16, neon_s16, 2)
NEON_GVEC_VOP2_ENV(neon_sqshl_h, int16_t)
NEON_GVEC_VOP2i_ENV(neon_sqshli_h, int16_t)
#undef NEON_FN
#define NEON_FN(dest, src1, src2) \
(dest = do_sqrshl_bhs(src1, (int8_t)src2, 32, false, env->vfp.qc))
NEON_GVEC_VOP2_ENV(neon_sqshl_s, int32_t)
NEON_GVEC_VOP2i_ENV(neon_sqshli_s, int32_t)
#undef NEON_FN
#define NEON_FN(dest, src1, src2) \
(dest = do_sqrshl_d(src1, (int8_t)src2, false, env->vfp.qc))
NEON_GVEC_VOP2_ENV(neon_sqshl_d, int64_t)
NEON_GVEC_VOP2i_ENV(neon_sqshli_d, int64_t)
#undef NEON_FN
uint32_t HELPER(neon_qshl_s32)(CPUARMState *env, uint32_t val, uint32_t shift)
@ -334,11 +355,13 @@ uint64_t HELPER(neon_qshl_s64)(CPUARMState *env, uint64_t val, uint64_t shift)
#define NEON_FN(dest, src1, src2) \
(dest = do_suqrshl_bhs(src1, (int8_t)src2, 8, false, env->vfp.qc))
NEON_VOP_ENV(qshlu_s8, neon_s8, 4)
NEON_GVEC_VOP2i_ENV(neon_sqshlui_b, int8_t)
#undef NEON_FN
#define NEON_FN(dest, src1, src2) \
(dest = do_suqrshl_bhs(src1, (int8_t)src2, 16, false, env->vfp.qc))
NEON_VOP_ENV(qshlu_s16, neon_s16, 2)
NEON_GVEC_VOP2i_ENV(neon_sqshlui_h, int16_t)
#undef NEON_FN
uint32_t HELPER(neon_qshlu_s32)(CPUARMState *env, uint32_t val, uint32_t shift)
@ -351,6 +374,16 @@ uint64_t HELPER(neon_qshlu_s64)(CPUARMState *env, uint64_t val, uint64_t shift)
return do_suqrshl_d(val, (int8_t)shift, false, env->vfp.qc);
}
#define NEON_FN(dest, src1, src2) \
(dest = do_suqrshl_bhs(src1, (int8_t)src2, 32, false, env->vfp.qc))
NEON_GVEC_VOP2i_ENV(neon_sqshlui_s, int32_t)
#undef NEON_FN
#define NEON_FN(dest, src1, src2) \
(dest = do_suqrshl_d(src1, (int8_t)src2, false, env->vfp.qc))
NEON_GVEC_VOP2i_ENV(neon_sqshlui_d, int64_t)
#undef NEON_FN
#define NEON_FN(dest, src1, src2) \
(dest = do_uqrshl_bhs(src1, (int8_t)src2, 8, true, env->vfp.qc))
NEON_VOP_ENV(qrshl_u8, neon_u8, 4)

View File

@ -1101,113 +1101,9 @@ DO_2SH(VRSRA_S, gen_gvec_srsra)
DO_2SH(VRSRA_U, gen_gvec_ursra)
DO_2SH(VSHR_S, gen_gvec_sshr)
DO_2SH(VSHR_U, gen_gvec_ushr)
static bool do_2shift_env_64(DisasContext *s, arg_2reg_shift *a,
NeonGenTwo64OpEnvFn *fn)
{
/*
* 2-reg-and-shift operations, size == 3 case, where the
* function needs to be passed tcg_env.
*/
TCGv_i64 constimm;
int pass;
if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
return false;
}
/* UNDEF accesses to D16-D31 if they don't exist. */
if (!dc_isar_feature(aa32_simd_r32, s) &&
((a->vd | a->vm) & 0x10)) {
return false;
}
if ((a->vm | a->vd) & a->q) {
return false;
}
if (!vfp_access_check(s)) {
return true;
}
/*
* To avoid excessive duplication of ops we implement shift
* by immediate using the variable shift operations.
*/
constimm = tcg_constant_i64(dup_const(a->size, a->shift));
for (pass = 0; pass < a->q + 1; pass++) {
TCGv_i64 tmp = tcg_temp_new_i64();
read_neon_element64(tmp, a->vm, pass, MO_64);
fn(tmp, tcg_env, tmp, constimm);
write_neon_element64(tmp, a->vd, pass, MO_64);
}
return true;
}
static bool do_2shift_env_32(DisasContext *s, arg_2reg_shift *a,
NeonGenTwoOpEnvFn *fn)
{
/*
* 2-reg-and-shift operations, size < 3 case, where the
* helper needs to be passed tcg_env.
*/
TCGv_i32 constimm, tmp;
int pass;
if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
return false;
}
/* UNDEF accesses to D16-D31 if they don't exist. */
if (!dc_isar_feature(aa32_simd_r32, s) &&
((a->vd | a->vm) & 0x10)) {
return false;
}
if ((a->vm | a->vd) & a->q) {
return false;
}
if (!vfp_access_check(s)) {
return true;
}
/*
* To avoid excessive duplication of ops we implement shift
* by immediate using the variable shift operations.
*/
constimm = tcg_constant_i32(dup_const(a->size, a->shift));
tmp = tcg_temp_new_i32();
for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
read_neon_element32(tmp, a->vm, pass, MO_32);
fn(tmp, tcg_env, tmp, constimm);
write_neon_element32(tmp, a->vd, pass, MO_32);
}
return true;
}
#define DO_2SHIFT_ENV(INSN, FUNC) \
static bool trans_##INSN##_64_2sh(DisasContext *s, arg_2reg_shift *a) \
{ \
return do_2shift_env_64(s, a, gen_helper_neon_##FUNC##64); \
} \
static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a) \
{ \
static NeonGenTwoOpEnvFn * const fns[] = { \
gen_helper_neon_##FUNC##8, \
gen_helper_neon_##FUNC##16, \
gen_helper_neon_##FUNC##32, \
}; \
assert(a->size < ARRAY_SIZE(fns)); \
return do_2shift_env_32(s, a, fns[a->size]); \
}
DO_2SHIFT_ENV(VQSHLU, qshlu_s)
DO_2SHIFT_ENV(VQSHL_U, qshl_u)
DO_2SHIFT_ENV(VQSHL_S, qshl_s)
DO_2SH(VQSHLU, gen_neon_sqshlui)
DO_2SH(VQSHL_U, gen_neon_uqshli)
DO_2SH(VQSHL_S, gen_neon_sqshli)
static bool do_2shift_narrow_64(DisasContext *s, arg_2reg_shift *a,
NeonGenTwo64OpFn *shiftfn,

View File

@ -471,6 +471,13 @@ void gen_neon_sqrshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
void gen_neon_uqrshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
void gen_neon_sqshli(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
int64_t c, uint32_t opr_sz, uint32_t max_sz);
void gen_neon_uqshli(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
int64_t c, uint32_t opr_sz, uint32_t max_sz);
void gen_neon_sqshlui(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
int64_t c, uint32_t opr_sz, uint32_t max_sz);
void gen_gvec_shadd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
void gen_gvec_uhadd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,