target/ppc: Move VAVG[SU][BHW] to decodetree and use gvec

Moved the instructions VAVGUB, VAVGUH, VAVGUW, VAVGSB, VAVGSH, VAVGSW,
to decodetree and use gvec with them. For these one the right shift
had to be made before the sum as to avoid an overflow, so add 1 at the
end if any of the entries had 1 in its LSB as to replicate the "+ 1"
before the shift described by the ISA.

vavgub:
rept    loop    master             patch
8       12500   0,02616600         0,00754200 (-71.2%)
25      4000    0,02530000         0,00637700 (-74.8%)
100     1000    0,02604600         0,00790100 (-69.7%)
500     200     0,03189300         0,01838400 (-42.4%)
2500    40      0,06006900         0,06851000 (+14.1%)
8000    12      0,13941000         0,20548500 (+47.4%)

vavguh:
rept    loop    master             patch
8       12500   0,01818200         0,00780600 (-57.1%)
25      4000    0,01789300         0,00641600 (-64.1%)
100     1000    0,01899100         0,00787200 (-58.5%)
500     200     0,02527200         0,01828400 (-27.7%)
2500    40      0,05361800         0,06773000 (+26.3%)
8000    12      0,12886600         0,20291400 (+57.5%)

vavguw:
rept    loop    master             patch
8       12500   0,01423100         0,00776600 (-45.4%)
25      4000    0,01780800         0,00638600 (-64.1%)
100     1000    0,02085500         0,00787000 (-62.3%)
500     200     0,02737100         0,01828800 (-33.2%)
2500    40      0,05572600         0,06774200 (+21.6%)
8000    12      0,13101700         0,20311600 (+55.0%)

vavgsb:
rept    loop    master             patch
8       12500   0,03006000         0,00788600 (-73.8%)
25      4000    0,02882200         0,00637800 (-77.9%)
100     1000    0,02958000         0,00791400 (-73.2%)
500     200     0,03548800         0,01860400 (-47.6%)
2500    40      0,06360000         0,06850800 (+7.7%)
8000    12      0,13816500         0,20550300 (+48.7%)

vavgsh:
rept    loop    master             patch
8       12500   0,01965900         0,00776600 (-60.5%)
25      4000    0,01875400         0,00638700 (-65.9%)
100     1000    0,01952200         0,00786900 (-59.7%)
500     200     0,02562000         0,01760300 (-31.3%)
2500    40      0,05384300         0,06742800 (+25.2%)
8000    12      0,13240800         0,20330000 (+53.5%)

vavgsw:
rept    loop    master             patch
8       12500   0,01407700         0,00775600 (-44.9%)
25      4000    0,01762300         0,00640000 (-63.7%)
100     1000    0,02046500         0,00788500 (-61.5%)
500     200     0,02745600         0,01843000 (-32.9%)
2500    40      0,05375500         0,06820500 (+26.9%)
8000    12      0,13068300         0,20304900 (+55.4%)

These results to me seems to indicate that with gvec the results have a
slower translation but faster execution.

Signed-off-by: Lucas Mateus Castro (alqotel) <lucas.araujo@eldorado.org.br>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <20221019125040.48028-7-lucas.araujo@eldorado.org.br>
Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com>
This commit is contained in:
Lucas Mateus Castro (alqotel) 2022-10-19 09:50:34 -03:00 committed by Daniel Henrique Barboza
parent d57fbd8fd9
commit c85929b2dd
5 changed files with 127 additions and 41 deletions

View File

@ -143,15 +143,15 @@ DEF_HELPER_FLAGS_1(ftsqrt, TCG_CALL_NO_RWG_SE, i32, i64)
#define dh_ctype_acc ppc_acc_t *
#define dh_typecode_acc dh_typecode_ptr
DEF_HELPER_FLAGS_3(vavgub, TCG_CALL_NO_RWG, void, avr, avr, avr)
DEF_HELPER_FLAGS_3(vavguh, TCG_CALL_NO_RWG, void, avr, avr, avr)
DEF_HELPER_FLAGS_3(vavguw, TCG_CALL_NO_RWG, void, avr, avr, avr)
DEF_HELPER_FLAGS_4(VAVGUB, TCG_CALL_NO_RWG, void, avr, avr, avr, i32)
DEF_HELPER_FLAGS_4(VAVGUH, TCG_CALL_NO_RWG, void, avr, avr, avr, i32)
DEF_HELPER_FLAGS_4(VAVGUW, TCG_CALL_NO_RWG, void, avr, avr, avr, i32)
DEF_HELPER_FLAGS_3(vabsdub, TCG_CALL_NO_RWG, void, avr, avr, avr)
DEF_HELPER_FLAGS_3(vabsduh, TCG_CALL_NO_RWG, void, avr, avr, avr)
DEF_HELPER_FLAGS_3(vabsduw, TCG_CALL_NO_RWG, void, avr, avr, avr)
DEF_HELPER_FLAGS_3(vavgsb, TCG_CALL_NO_RWG, void, avr, avr, avr)
DEF_HELPER_FLAGS_3(vavgsh, TCG_CALL_NO_RWG, void, avr, avr, avr)
DEF_HELPER_FLAGS_3(vavgsw, TCG_CALL_NO_RWG, void, avr, avr, avr)
DEF_HELPER_FLAGS_4(VAVGSB, TCG_CALL_NO_RWG, void, avr, avr, avr, i32)
DEF_HELPER_FLAGS_4(VAVGSH, TCG_CALL_NO_RWG, void, avr, avr, avr, i32)
DEF_HELPER_FLAGS_4(VAVGSW, TCG_CALL_NO_RWG, void, avr, avr, avr, i32)
DEF_HELPER_4(vcmpeqfp, void, env, avr, avr, avr)
DEF_HELPER_4(vcmpgefp, void, env, avr, avr, avr)
DEF_HELPER_4(vcmpgtfp, void, env, avr, avr, avr)

View File

@ -519,6 +519,15 @@ VCMPNEZW 000100 ..... ..... ..... . 0110000111 @VC
VCMPSQ 000100 ... -- ..... ..... 00101000001 @VX_bf
VCMPUQ 000100 ... -- ..... ..... 00100000001 @VX_bf
## Vector Integer Average Instructions
VAVGSB 000100 ..... ..... ..... 10100000010 @VX
VAVGSH 000100 ..... ..... ..... 10101000010 @VX
VAVGSW 000100 ..... ..... ..... 10110000010 @VX
VAVGUB 000100 ..... ..... ..... 10000000010 @VX
VAVGUH 000100 ..... ..... ..... 10001000010 @VX
VAVGUW 000100 ..... ..... ..... 10010000010 @VX
## Vector Bit Manipulation Instruction
VGNB 000100 ..... -- ... ..... 10011001100 @VX_n

View File

@ -570,25 +570,23 @@ VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw)
#undef VARITHSAT_SIGNED
#undef VARITHSAT_UNSIGNED
#define VAVG_DO(name, element, etype) \
void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
{ \
int i; \
\
for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \
r->element[i] = x >> 1; \
} \
#define VAVG(name, element, etype) \
void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t v)\
{ \
int i; \
\
for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \
r->element[i] = x >> 1; \
} \
}
#define VAVG(type, signed_element, signed_type, unsigned_element, \
unsigned_type) \
VAVG_DO(avgs##type, signed_element, signed_type) \
VAVG_DO(avgu##type, unsigned_element, unsigned_type)
VAVG(b, s8, int16_t, u8, uint16_t)
VAVG(h, s16, int32_t, u16, uint32_t)
VAVG(w, s32, int64_t, u32, uint64_t)
#undef VAVG_DO
VAVG(VAVGSB, s8, int16_t)
VAVG(VAVGUB, u8, uint16_t)
VAVG(VAVGSH, s16, int32_t)
VAVG(VAVGUH, u16, uint32_t)
VAVG(VAVGSW, s32, int64_t)
VAVG(VAVGUW, u32, uint64_t)
#undef VAVG
#define VABSDU_DO(name, element) \

View File

@ -431,21 +431,9 @@ GEN_VXFORM_V(vminsb, MO_8, tcg_gen_gvec_smin, 1, 12);
GEN_VXFORM_V(vminsh, MO_16, tcg_gen_gvec_smin, 1, 13);
GEN_VXFORM_V(vminsw, MO_32, tcg_gen_gvec_smin, 1, 14);
GEN_VXFORM_V(vminsd, MO_64, tcg_gen_gvec_smin, 1, 15);
GEN_VXFORM(vavgub, 1, 16);
GEN_VXFORM(vabsdub, 1, 16);
GEN_VXFORM_DUAL(vavgub, PPC_ALTIVEC, PPC_NONE, \
vabsdub, PPC_NONE, PPC2_ISA300)
GEN_VXFORM(vavguh, 1, 17);
GEN_VXFORM(vabsduh, 1, 17);
GEN_VXFORM_DUAL(vavguh, PPC_ALTIVEC, PPC_NONE, \
vabsduh, PPC_NONE, PPC2_ISA300)
GEN_VXFORM(vavguw, 1, 18);
GEN_VXFORM(vabsduw, 1, 18);
GEN_VXFORM_DUAL(vavguw, PPC_ALTIVEC, PPC_NONE, \
vabsduw, PPC_NONE, PPC2_ISA300)
GEN_VXFORM(vavgsb, 1, 20);
GEN_VXFORM(vavgsh, 1, 21);
GEN_VXFORM(vavgsw, 1, 22);
GEN_VXFORM(vmrghb, 6, 0);
GEN_VXFORM(vmrghh, 6, 1);
GEN_VXFORM(vmrghw, 6, 2);
@ -3373,6 +3361,100 @@ TRANS(VMULHSD, do_vx_mulh, true , do_vx_vmulhd_i64)
TRANS(VMULHUW, do_vx_mulh, false, do_vx_vmulhw_i64)
TRANS(VMULHUD, do_vx_mulh, false, do_vx_vmulhd_i64)
static void do_vavg(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b,
void (*gen_shr_vec)(unsigned, TCGv_vec, TCGv_vec, int64_t))
{
TCGv_vec tmp = tcg_temp_new_vec_matching(t);
tcg_gen_or_vec(vece, tmp, a, b);
tcg_gen_and_vec(vece, tmp, tmp, tcg_constant_vec_matching(t, vece, 1));
gen_shr_vec(vece, a, a, 1);
gen_shr_vec(vece, b, b, 1);
tcg_gen_add_vec(vece, t, a, b);
tcg_gen_add_vec(vece, t, t, tmp);
tcg_temp_free_vec(tmp);
}
QEMU_FLATTEN
static void gen_vavgu(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
{
do_vavg(vece, t, a, b, tcg_gen_shri_vec);
}
QEMU_FLATTEN
static void gen_vavgs(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
{
do_vavg(vece, t, a, b, tcg_gen_sari_vec);
}
static bool do_vx_vavg(DisasContext *ctx, arg_VX *a, int sign, int vece)
{
static const TCGOpcode vecop_list_s[] = {
INDEX_op_add_vec, INDEX_op_sari_vec, 0
};
static const TCGOpcode vecop_list_u[] = {
INDEX_op_add_vec, INDEX_op_shri_vec, 0
};
static const GVecGen3 op[2][3] = {
{
{
.fniv = gen_vavgu,
.fno = gen_helper_VAVGUB,
.opt_opc = vecop_list_u,
.vece = MO_8
},
{
.fniv = gen_vavgu,
.fno = gen_helper_VAVGUH,
.opt_opc = vecop_list_u,
.vece = MO_16
},
{
.fniv = gen_vavgu,
.fno = gen_helper_VAVGUW,
.opt_opc = vecop_list_u,
.vece = MO_32
},
},
{
{
.fniv = gen_vavgs,
.fno = gen_helper_VAVGSB,
.opt_opc = vecop_list_s,
.vece = MO_8
},
{
.fniv = gen_vavgs,
.fno = gen_helper_VAVGSH,
.opt_opc = vecop_list_s,
.vece = MO_16
},
{
.fniv = gen_vavgs,
.fno = gen_helper_VAVGSW,
.opt_opc = vecop_list_s,
.vece = MO_32
},
},
};
REQUIRE_VECTOR(ctx);
tcg_gen_gvec_3(avr_full_offset(a->vrt), avr_full_offset(a->vra),
avr_full_offset(a->vrb), 16, 16, &op[sign][vece]);
return true;
}
TRANS_FLAGS(ALTIVEC, VAVGSB, do_vx_vavg, 1, MO_8)
TRANS_FLAGS(ALTIVEC, VAVGSH, do_vx_vavg, 1, MO_16)
TRANS_FLAGS(ALTIVEC, VAVGSW, do_vx_vavg, 1, MO_32)
TRANS_FLAGS(ALTIVEC, VAVGUB, do_vx_vavg, 0, MO_8)
TRANS_FLAGS(ALTIVEC, VAVGUH, do_vx_vavg, 0, MO_16)
TRANS_FLAGS(ALTIVEC, VAVGUW, do_vx_vavg, 0, MO_32)
static bool do_vdiv_vmod(DisasContext *ctx, arg_VX *a, const int vece,
void (*func_32)(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b),
void (*func_64)(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b))

View File

@ -83,12 +83,9 @@ GEN_VXFORM(vminsb, 1, 12),
GEN_VXFORM(vminsh, 1, 13),
GEN_VXFORM(vminsw, 1, 14),
GEN_VXFORM_207(vminsd, 1, 15),
GEN_VXFORM_DUAL(vavgub, vabsdub, 1, 16, PPC_ALTIVEC, PPC_NONE),
GEN_VXFORM_DUAL(vavguh, vabsduh, 1, 17, PPC_ALTIVEC, PPC_NONE),
GEN_VXFORM_DUAL(vavguw, vabsduw, 1, 18, PPC_ALTIVEC, PPC_NONE),
GEN_VXFORM(vavgsb, 1, 20),
GEN_VXFORM(vavgsh, 1, 21),
GEN_VXFORM(vavgsw, 1, 22),
GEN_VXFORM(vabsdub, 1, 16),
GEN_VXFORM(vabsduh, 1, 17),
GEN_VXFORM(vabsduw, 1, 18),
GEN_VXFORM(vmrghb, 6, 0),
GEN_VXFORM(vmrghh, 6, 1),
GEN_VXFORM(vmrghw, 6, 2),