target/arm: Implement SVE2 bitwise ternary operations

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20210525010358.152808-33-richard.henderson@linaro.org
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Richard Henderson 2021-05-24 18:02:58 -07:00 committed by Peter Maydell
parent 14f6dad168
commit 911cdc6d79
4 changed files with 281 additions and 0 deletions

View File

@ -2543,3 +2543,9 @@ DEF_HELPER_FLAGS_6(sve2_fminp_zpzz_s, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, ptr, i32) void, ptr, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_6(sve2_fminp_zpzz_d, TCG_CALL_NO_RWG, DEF_HELPER_FLAGS_6(sve2_fminp_zpzz_d, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, ptr, i32) void, ptr, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(sve2_eor3, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(sve2_bcax, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(sve2_bsl1n, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(sve2_bsl2n, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(sve2_nbsl, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)

View File

@ -124,6 +124,10 @@
@rda_rn_rm ........ esz:2 . rm:5 ... ... rn:5 rd:5 \ @rda_rn_rm ........ esz:2 . rm:5 ... ... rn:5 rd:5 \
&rrrr_esz ra=%reg_movprfx &rrrr_esz ra=%reg_movprfx
# Four operand with unused vector element size
@rdn_ra_rm_e0 ........ ... rm:5 ... ... ra:5 rd:5 \
&rrrr_esz esz=0 rn=%reg_movprfx
# Three operand with "memory" size, aka immediate left shift # Three operand with "memory" size, aka immediate left shift
@rd_rn_msz_rm ........ ... rm:5 .... imm:2 rn:5 rd:5 &rrri @rd_rn_msz_rm ........ ... rm:5 .... imm:2 rn:5 rd:5 &rrri
@ -379,6 +383,14 @@ ORR_zzz 00000100 01 1 ..... 001 100 ..... ..... @rd_rn_rm_e0
EOR_zzz 00000100 10 1 ..... 001 100 ..... ..... @rd_rn_rm_e0 EOR_zzz 00000100 10 1 ..... 001 100 ..... ..... @rd_rn_rm_e0
BIC_zzz 00000100 11 1 ..... 001 100 ..... ..... @rd_rn_rm_e0 BIC_zzz 00000100 11 1 ..... 001 100 ..... ..... @rd_rn_rm_e0
# SVE2 bitwise ternary operations
EOR3 00000100 00 1 ..... 001 110 ..... ..... @rdn_ra_rm_e0
BSL 00000100 00 1 ..... 001 111 ..... ..... @rdn_ra_rm_e0
BCAX 00000100 01 1 ..... 001 110 ..... ..... @rdn_ra_rm_e0
BSL1N 00000100 01 1 ..... 001 111 ..... ..... @rdn_ra_rm_e0
BSL2N 00000100 10 1 ..... 001 111 ..... ..... @rdn_ra_rm_e0
NBSL 00000100 11 1 ..... 001 111 ..... ..... @rdn_ra_rm_e0
### SVE Index Generation Group ### SVE Index Generation Group
# SVE index generation (immediate start, immediate increment) # SVE index generation (immediate start, immediate increment)

View File

@ -6797,3 +6797,53 @@ DO_ST1_ZPZ_D(dd_be, zd, MO_64)
#undef DO_ST1_ZPZ_S #undef DO_ST1_ZPZ_S
#undef DO_ST1_ZPZ_D #undef DO_ST1_ZPZ_D
void HELPER(sve2_eor3)(void *vd, void *vn, void *vm, void *vk, uint32_t desc)
{
intptr_t i, opr_sz = simd_oprsz(desc) / 8;
uint64_t *d = vd, *n = vn, *m = vm, *k = vk;
for (i = 0; i < opr_sz; ++i) {
d[i] = n[i] ^ m[i] ^ k[i];
}
}
void HELPER(sve2_bcax)(void *vd, void *vn, void *vm, void *vk, uint32_t desc)
{
intptr_t i, opr_sz = simd_oprsz(desc) / 8;
uint64_t *d = vd, *n = vn, *m = vm, *k = vk;
for (i = 0; i < opr_sz; ++i) {
d[i] = n[i] ^ (m[i] & ~k[i]);
}
}
void HELPER(sve2_bsl1n)(void *vd, void *vn, void *vm, void *vk, uint32_t desc)
{
intptr_t i, opr_sz = simd_oprsz(desc) / 8;
uint64_t *d = vd, *n = vn, *m = vm, *k = vk;
for (i = 0; i < opr_sz; ++i) {
d[i] = (~n[i] & k[i]) | (m[i] & ~k[i]);
}
}
void HELPER(sve2_bsl2n)(void *vd, void *vn, void *vm, void *vk, uint32_t desc)
{
intptr_t i, opr_sz = simd_oprsz(desc) / 8;
uint64_t *d = vd, *n = vn, *m = vm, *k = vk;
for (i = 0; i < opr_sz; ++i) {
d[i] = (n[i] & k[i]) | (~m[i] & ~k[i]);
}
}
void HELPER(sve2_nbsl)(void *vd, void *vn, void *vm, void *vk, uint32_t desc)
{
intptr_t i, opr_sz = simd_oprsz(desc) / 8;
uint64_t *d = vd, *n = vn, *m = vm, *k = vk;
for (i = 0; i < opr_sz; ++i) {
d[i] = ~((n[i] & k[i]) | (m[i] & ~k[i]));
}
}

View File

@ -217,6 +217,17 @@ static void gen_gvec_fn_zzz(DisasContext *s, GVecGen3Fn *gvec_fn,
vec_full_reg_offset(s, rm), vsz, vsz); vec_full_reg_offset(s, rm), vsz, vsz);
} }
/* Invoke a vector expander on four Zregs. */
static void gen_gvec_fn_zzzz(DisasContext *s, GVecGen4Fn *gvec_fn,
int esz, int rd, int rn, int rm, int ra)
{
unsigned vsz = vec_full_reg_size(s);
gvec_fn(esz, vec_full_reg_offset(s, rd),
vec_full_reg_offset(s, rn),
vec_full_reg_offset(s, rm),
vec_full_reg_offset(s, ra), vsz, vsz);
}
/* Invoke a vector move on two Zregs. */ /* Invoke a vector move on two Zregs. */
static bool do_mov_z(DisasContext *s, int rd, int rn) static bool do_mov_z(DisasContext *s, int rd, int rn)
{ {
@ -329,6 +340,208 @@ static bool trans_BIC_zzz(DisasContext *s, arg_rrr_esz *a)
return do_zzz_fn(s, a, tcg_gen_gvec_andc); return do_zzz_fn(s, a, tcg_gen_gvec_andc);
} }
static bool do_sve2_zzzz_fn(DisasContext *s, arg_rrrr_esz *a, GVecGen4Fn *fn)
{
if (!dc_isar_feature(aa64_sve2, s)) {
return false;
}
if (sve_access_check(s)) {
gen_gvec_fn_zzzz(s, fn, a->esz, a->rd, a->rn, a->rm, a->ra);
}
return true;
}
static void gen_eor3_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
{
tcg_gen_xor_i64(d, n, m);
tcg_gen_xor_i64(d, d, k);
}
static void gen_eor3_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
TCGv_vec m, TCGv_vec k)
{
tcg_gen_xor_vec(vece, d, n, m);
tcg_gen_xor_vec(vece, d, d, k);
}
static void gen_eor3(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
uint32_t a, uint32_t oprsz, uint32_t maxsz)
{
static const GVecGen4 op = {
.fni8 = gen_eor3_i64,
.fniv = gen_eor3_vec,
.fno = gen_helper_sve2_eor3,
.vece = MO_64,
.prefer_i64 = TCG_TARGET_REG_BITS == 64,
};
tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
}
static bool trans_EOR3(DisasContext *s, arg_rrrr_esz *a)
{
return do_sve2_zzzz_fn(s, a, gen_eor3);
}
static void gen_bcax_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
{
tcg_gen_andc_i64(d, m, k);
tcg_gen_xor_i64(d, d, n);
}
static void gen_bcax_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
TCGv_vec m, TCGv_vec k)
{
tcg_gen_andc_vec(vece, d, m, k);
tcg_gen_xor_vec(vece, d, d, n);
}
static void gen_bcax(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
uint32_t a, uint32_t oprsz, uint32_t maxsz)
{
static const GVecGen4 op = {
.fni8 = gen_bcax_i64,
.fniv = gen_bcax_vec,
.fno = gen_helper_sve2_bcax,
.vece = MO_64,
.prefer_i64 = TCG_TARGET_REG_BITS == 64,
};
tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
}
static bool trans_BCAX(DisasContext *s, arg_rrrr_esz *a)
{
return do_sve2_zzzz_fn(s, a, gen_bcax);
}
static void gen_bsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
uint32_t a, uint32_t oprsz, uint32_t maxsz)
{
/* BSL differs from the generic bitsel in argument ordering. */
tcg_gen_gvec_bitsel(vece, d, a, n, m, oprsz, maxsz);
}
static bool trans_BSL(DisasContext *s, arg_rrrr_esz *a)
{
return do_sve2_zzzz_fn(s, a, gen_bsl);
}
static void gen_bsl1n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
{
tcg_gen_andc_i64(n, k, n);
tcg_gen_andc_i64(m, m, k);
tcg_gen_or_i64(d, n, m);
}
static void gen_bsl1n_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
TCGv_vec m, TCGv_vec k)
{
if (TCG_TARGET_HAS_bitsel_vec) {
tcg_gen_not_vec(vece, n, n);
tcg_gen_bitsel_vec(vece, d, k, n, m);
} else {
tcg_gen_andc_vec(vece, n, k, n);
tcg_gen_andc_vec(vece, m, m, k);
tcg_gen_or_vec(vece, d, n, m);
}
}
static void gen_bsl1n(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
uint32_t a, uint32_t oprsz, uint32_t maxsz)
{
static const GVecGen4 op = {
.fni8 = gen_bsl1n_i64,
.fniv = gen_bsl1n_vec,
.fno = gen_helper_sve2_bsl1n,
.vece = MO_64,
.prefer_i64 = TCG_TARGET_REG_BITS == 64,
};
tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
}
static bool trans_BSL1N(DisasContext *s, arg_rrrr_esz *a)
{
return do_sve2_zzzz_fn(s, a, gen_bsl1n);
}
static void gen_bsl2n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
{
/*
* Z[dn] = (n & k) | (~m & ~k)
* = | ~(m | k)
*/
tcg_gen_and_i64(n, n, k);
if (TCG_TARGET_HAS_orc_i64) {
tcg_gen_or_i64(m, m, k);
tcg_gen_orc_i64(d, n, m);
} else {
tcg_gen_nor_i64(m, m, k);
tcg_gen_or_i64(d, n, m);
}
}
static void gen_bsl2n_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
TCGv_vec m, TCGv_vec k)
{
if (TCG_TARGET_HAS_bitsel_vec) {
tcg_gen_not_vec(vece, m, m);
tcg_gen_bitsel_vec(vece, d, k, n, m);
} else {
tcg_gen_and_vec(vece, n, n, k);
tcg_gen_or_vec(vece, m, m, k);
tcg_gen_orc_vec(vece, d, n, m);
}
}
static void gen_bsl2n(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
uint32_t a, uint32_t oprsz, uint32_t maxsz)
{
static const GVecGen4 op = {
.fni8 = gen_bsl2n_i64,
.fniv = gen_bsl2n_vec,
.fno = gen_helper_sve2_bsl2n,
.vece = MO_64,
.prefer_i64 = TCG_TARGET_REG_BITS == 64,
};
tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
}
static bool trans_BSL2N(DisasContext *s, arg_rrrr_esz *a)
{
return do_sve2_zzzz_fn(s, a, gen_bsl2n);
}
static void gen_nbsl_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
{
tcg_gen_and_i64(n, n, k);
tcg_gen_andc_i64(m, m, k);
tcg_gen_nor_i64(d, n, m);
}
static void gen_nbsl_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
TCGv_vec m, TCGv_vec k)
{
tcg_gen_bitsel_vec(vece, d, k, n, m);
tcg_gen_not_vec(vece, d, d);
}
static void gen_nbsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
uint32_t a, uint32_t oprsz, uint32_t maxsz)
{
static const GVecGen4 op = {
.fni8 = gen_nbsl_i64,
.fniv = gen_nbsl_vec,
.fno = gen_helper_sve2_nbsl,
.vece = MO_64,
.prefer_i64 = TCG_TARGET_REG_BITS == 64,
};
tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
}
static bool trans_NBSL(DisasContext *s, arg_rrrr_esz *a)
{
return do_sve2_zzzz_fn(s, a, gen_nbsl);
}
/* /*
*** SVE Integer Arithmetic - Unpredicated Group *** SVE Integer Arithmetic - Unpredicated Group
*/ */