target-ppc: Altivec 2.07: Vector Polynomial Multiply Sum
This patch adds the Vectory Polynomial Multiply Sum instructions introduced in Power ISA Version 2.07: - Vectory Polynomial Multiply Sum Byte (vpmsumb) - Vectory Polynomial Multiply Sum Halfword (vpmsumh) - Vectory Polynomial Multiply Sum Word (vpmsumw) - Vectory Polynomial Multiply Sum Doubleword (vpmsumd) Signed-off-by: Tom Musta <tommusta@gmail.com> Signed-off-by: Alexander Graf <agraf@suse.de>
This commit is contained in:
parent
f1064f612c
commit
b8476fc7c6
@ -311,6 +311,10 @@ DEF_HELPER_2(vpopcntw, void, avr, avr)
|
||||
DEF_HELPER_2(vpopcntd, void, avr, avr)
|
||||
DEF_HELPER_3(vbpermq, void, avr, avr, avr)
|
||||
DEF_HELPER_2(vgbbd, void, avr, avr)
|
||||
DEF_HELPER_3(vpmsumb, void, avr, avr, avr)
|
||||
DEF_HELPER_3(vpmsumh, void, avr, avr, avr)
|
||||
DEF_HELPER_3(vpmsumw, void, avr, avr, avr)
|
||||
DEF_HELPER_3(vpmsumd, void, avr, avr, avr)
|
||||
|
||||
DEF_HELPER_2(xsadddp, void, env, i32)
|
||||
DEF_HELPER_2(xssubdp, void, env, i32)
|
||||
|
@ -1351,6 +1351,76 @@ void helper_vgbbd(ppc_avr_t *r, ppc_avr_t *b)
|
||||
r->u64[1] = t[1];
|
||||
}
|
||||
|
||||
#define PMSUM(name, srcfld, trgfld, trgtyp) \
|
||||
void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
|
||||
{ \
|
||||
int i, j; \
|
||||
trgtyp prod[sizeof(ppc_avr_t)/sizeof(a->srcfld[0])]; \
|
||||
\
|
||||
VECTOR_FOR_INORDER_I(i, srcfld) { \
|
||||
prod[i] = 0; \
|
||||
for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \
|
||||
if (a->srcfld[i] & (1ull<<j)) { \
|
||||
prod[i] ^= ((trgtyp)b->srcfld[i] << j); \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
\
|
||||
VECTOR_FOR_INORDER_I(i, trgfld) { \
|
||||
r->trgfld[i] = prod[2*i] ^ prod[2*i+1]; \
|
||||
} \
|
||||
}
|
||||
|
||||
PMSUM(vpmsumb, u8, u16, uint16_t)
|
||||
PMSUM(vpmsumh, u16, u32, uint32_t)
|
||||
PMSUM(vpmsumw, u32, u64, uint64_t)
|
||||
|
||||
void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
|
||||
{
|
||||
|
||||
#ifdef CONFIG_INT128
|
||||
int i, j;
|
||||
__uint128_t prod[2];
|
||||
|
||||
VECTOR_FOR_INORDER_I(i, u64) {
|
||||
prod[i] = 0;
|
||||
for (j = 0; j < 64; j++) {
|
||||
if (a->u64[i] & (1ull<<j)) {
|
||||
prod[i] ^= (((__uint128_t)b->u64[i]) << j);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
r->u128 = prod[0] ^ prod[1];
|
||||
|
||||
#else
|
||||
int i, j;
|
||||
ppc_avr_t prod[2];
|
||||
|
||||
VECTOR_FOR_INORDER_I(i, u64) {
|
||||
prod[i].u64[LO_IDX] = prod[i].u64[HI_IDX] = 0;
|
||||
for (j = 0; j < 64; j++) {
|
||||
if (a->u64[i] & (1ull<<j)) {
|
||||
ppc_avr_t bshift;
|
||||
if (j == 0) {
|
||||
bshift.u64[HI_IDX] = 0;
|
||||
bshift.u64[LO_IDX] = b->u64[i];
|
||||
} else {
|
||||
bshift.u64[HI_IDX] = b->u64[i] >> (64-j);
|
||||
bshift.u64[LO_IDX] = b->u64[i] << j;
|
||||
}
|
||||
prod[i].u64[LO_IDX] ^= bshift.u64[LO_IDX];
|
||||
prod[i].u64[HI_IDX] ^= bshift.u64[HI_IDX];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
r->u64[LO_IDX] = prod[0].u64[LO_IDX] ^ prod[1].u64[LO_IDX];
|
||||
r->u64[HI_IDX] = prod[0].u64[HI_IDX] ^ prod[1].u64[HI_IDX];
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
#if defined(HOST_WORDS_BIGENDIAN)
|
||||
#define PKBIG 1
|
||||
#else
|
||||
|
@ -7372,6 +7372,10 @@ GEN_VXFORM_DUAL(vclzd, PPC_NONE, PPC2_ALTIVEC_207, \
|
||||
vpopcntd, PPC_NONE, PPC2_ALTIVEC_207)
|
||||
GEN_VXFORM(vbpermq, 6, 21);
|
||||
GEN_VXFORM_NOA(vgbbd, 6, 20);
|
||||
GEN_VXFORM(vpmsumb, 4, 16)
|
||||
GEN_VXFORM(vpmsumh, 4, 17)
|
||||
GEN_VXFORM(vpmsumw, 4, 18)
|
||||
GEN_VXFORM(vpmsumd, 4, 19)
|
||||
|
||||
/*** VSX extension ***/
|
||||
|
||||
@ -10623,6 +10627,10 @@ GEN_VXFORM_DUAL(vclzd, vpopcntd, 1, 31, PPC_NONE, PPC2_ALTIVEC_207),
|
||||
|
||||
GEN_VXFORM_207(vbpermq, 6, 21),
|
||||
GEN_VXFORM_207(vgbbd, 6, 20),
|
||||
GEN_VXFORM_207(vpmsumb, 4, 16),
|
||||
GEN_VXFORM_207(vpmsumh, 4, 17),
|
||||
GEN_VXFORM_207(vpmsumw, 4, 18),
|
||||
GEN_VXFORM_207(vpmsumd, 4, 19),
|
||||
|
||||
GEN_HANDLER_E(lxsdx, 0x1F, 0x0C, 0x12, 0, PPC_NONE, PPC2_VSX),
|
||||
GEN_HANDLER_E(lxsiwax, 0x1F, 0x0C, 0x02, 0, PPC_NONE, PPC2_VSX207),
|
||||
|
Loading…
Reference in New Issue
Block a user