avx512 move functions introduced

This commit is contained in:
Stanislav Shwartsman 2013-11-29 11:10:34 +00:00
parent 1a735e9fdf
commit 21bb1363ac
5 changed files with 68 additions and 59 deletions

View File

@ -31,41 +31,47 @@
#include "simd_int.h"
#include "simd_compare.h"
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVAPS_MASK_VpsWpsR(bxInstruction_c *i)
{
BxPackedAvxRegister op = BX_READ_AVX_REG(i->src());
unsigned mask = BX_READ_16BIT_OPMASK(i->opmask());
BX_CPP_INLINE void BX_CPU_C::avx512_write_regd_masked(bxInstruction_c *i, const BxPackedAvxRegister *op, unsigned mask)
{
unsigned len = i->getVL();
if (i->isZeroMasking()) {
for (unsigned n=0; n < len; n++, mask >>= 4)
xmm_zero_blendps(&op.vmm128(n), mask);
xmm_zero_blendps(&BX_READ_AVX_REG_LANE(i->dst(), n), &op->vmm128(n), mask);
}
else {
for (unsigned n=0; n < len; n++, mask >>= 4)
xmm_blendps(&op.vmm128(n), &BX_READ_AVX_REG_LANE(i->dst(), n), ~mask);
xmm_blendps(&BX_READ_AVX_REG_LANE(i->dst(), n), &op->vmm128(n), mask);
}
}
BX_CPP_INLINE void BX_CPU_C::avx512_write_regq_masked(bxInstruction_c *i, const BxPackedAvxRegister *op, unsigned mask)
{
unsigned len = i->getVL();
if (i->isZeroMasking()) {
for (unsigned n=0; n < len; n++, mask >>= 4)
xmm_zero_blendps(&BX_READ_AVX_REG_LANE(i->dst(), n), &op->vmm128(n), mask);
}
else {
for (unsigned n=0; n < len; n++, mask >>= 4)
xmm_blendps(&BX_READ_AVX_REG_LANE(i->dst(), n), &op->vmm128(n), mask);
}
BX_WRITE_AVX_REGZ(i->dst(), op, len);
BX_CLEAR_AVX_REGZ(i->dst(), len);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVAPS_MASK_VpsWpsR(bxInstruction_c *i)
{
BxPackedAvxRegister op = BX_READ_AVX_REG(i->src());
avx512_write_regd_masked(i, &op, BX_READ_16BIT_OPMASK(i->opmask()));
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVAPD_MASK_VpdWpdR(bxInstruction_c *i)
{
BxPackedAvxRegister op = BX_READ_AVX_REG(i->src());
unsigned mask = BX_READ_8BIT_OPMASK(i->opmask());
unsigned len = i->getVL();
if (i->isZeroMasking()) {
for (unsigned n=0; n < len; n++, mask >>= 2)
xmm_zero_blendpd(&op.vmm128(n), mask);
}
else {
for (unsigned n=0; n < len; n++, mask >>= 2)
xmm_blendpd(&op.vmm128(n), &BX_READ_AVX_REG_LANE(i->dst(), n), ~mask);
}
BX_WRITE_AVX_REGZ(i->dst(), op, len);
avx512_write_regq_masked(i, &op, BX_READ_8BIT_OPMASK(i->opmask()));
BX_NEXT_INSTR(i);
}
@ -97,22 +103,13 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVAPD_MASK_WpdVpdM(bxInstruction
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C :: HANDLER (bxInstruction_c *i) \
{ \
BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2()); \
unsigned mask = BX_READ_16BIT_OPMASK(i->opmask()); \
unsigned len = i->getVL(); \
\
for (unsigned n=0; n < len; n++) \
(func)(&op1.vmm128(n), &op2.vmm128(n)); \
\
if (i->isZeroMasking()) { \
for (unsigned n=0; n < len; n++, mask >>= 4) \
xmm_zero_blendps(&op1.vmm128(n), mask); \
} \
else { \
for (unsigned n=0; n < len; n++, mask >>= 4) \
xmm_blendps(&op1.vmm128(n), &BX_READ_AVX_REG_LANE(i->dst(), n), ~mask); \
} \
\
BX_WRITE_AVX_REGZ(i->dst(), op1, len); \
avx512_write_regd_masked(i, &op1, BX_READ_16BIT_OPMASK(i->opmask())); \
\
BX_NEXT_INSTR(i); \
}
@ -135,22 +132,13 @@ AVX512_2OP_DWORD_EL(VPMULLD_MASK_VdqHdqWdqR, xmm_pmulld)
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C :: HANDLER (bxInstruction_c *i) \
{ \
BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2()); \
unsigned mask = BX_READ_8BIT_OPMASK(i->opmask()); \
unsigned len = i->getVL(); \
\
for (unsigned n=0; n < len; n++) \
(func)(&op1.vmm128(n), &op2.vmm128(n)); \
\
if (i->isZeroMasking()) { \
for (unsigned n=0; n < len; n++, mask >>= 2) \
xmm_zero_blendpd(&op1.vmm128(n), mask); \
} \
else { \
for (unsigned n=0; n < len; n++, mask >>= 2) \
xmm_blendpd(&op1.vmm128(n), &BX_READ_AVX_REG_LANE(i->dst(), n), ~mask); \
} \
\
BX_WRITE_AVX_REGZ(i->dst(), op1, len); \
avx512_write_regq_masked(i, &op1, BX_READ_8BIT_OPMASK(i->opmask())); \
\
BX_NEXT_INSTR(i); \
}

View File

@ -57,10 +57,14 @@ extern void mxcsr_to_softfloat_status_word(float_status_t &status, bx_mxcsr_t mx
\
if (! i->isZeroMasking()) { \
for (unsigned n=0; n < len; n++, mask >>= 4) \
xmm_blendps(&op1.vmm128(n), &BX_READ_AVX_REG_LANE(i->dst(), n), ~mask); \
xmm_blendps(&BX_READ_AVX_REG_LANE(i->dst(), n), &op1.vmm128(n), mask); \
\
BX_CLEAR_AVX_REGZ(i->dst(), len); \
} \
else { \
BX_WRITE_AVX_REGZ(i->dst(), op1, len); \
} \
\
BX_WRITE_AVX_REGZ(i->dst(), op1, len); \
BX_NEXT_INSTR(i); \
}
@ -91,10 +95,14 @@ EVEX_FMA_PACKED_SINGLE(VFNMSUBPS_MASK_VpsHpsWpsR, xmm_fnmsubps_mask)
\
if (! i->isZeroMasking()) { \
for (unsigned n=0; n < len; n++, mask >>= 2) \
xmm_blendpd(&op1.vmm128(n), &BX_READ_AVX_REG_LANE(i->dst(), n), ~mask); \
xmm_blendpd(&BX_READ_AVX_REG_LANE(i->dst(), n), &op1.vmm128(n), mask); \
\
BX_CLEAR_AVX_REGZ(i->dst(), len); \
} \
else { \
BX_WRITE_AVX_REGZ(i->dst(), op1, len); \
} \
\
BX_WRITE_AVX_REGZ(i->dst(), op1, len); \
BX_NEXT_INSTR(i); \
}

View File

@ -52,10 +52,14 @@ extern void mxcsr_to_softfloat_status_word(float_status_t &status, bx_mxcsr_t mx
\
if (! i->isZeroMasking()) { \
for (unsigned n=0; n < len; n++, mask >>= 4) \
xmm_blendps(&op1.vmm128(n), &BX_READ_AVX_REG_LANE(i->dst(), n), ~mask); \
xmm_blendps(&BX_READ_AVX_REG_LANE(i->dst(), n), &op1.vmm128(n), mask); \
\
BX_CLEAR_AVX_REGZ(i->dst(), len); \
} \
else { \
BX_WRITE_AVX_REGZ(i->dst(), op1, len); \
} \
\
BX_WRITE_AVX_REGZ(i->dst(), op1, len); \
BX_NEXT_INSTR(i); \
}
@ -84,10 +88,14 @@ EVEX_OP_PACKED_SINGLE(VMINPS_MASK_VpsHpsWpsR, xmm_minps_mask)
\
if (! i->isZeroMasking()) { \
for (unsigned n=0; n < len; n++, mask >>= 2) \
xmm_blendpd(&op1.vmm128(n), &BX_READ_AVX_REG_LANE(i->dst(), n), ~mask); \
xmm_blendpd(&BX_READ_AVX_REG_LANE(i->dst(), n), &op1.vmm128(n), mask); \
\
BX_CLEAR_AVX_REGZ(i->dst(), len); \
} \
else { \
BX_WRITE_AVX_REGZ(i->dst(), op1, len); \
} \
\
BX_WRITE_AVX_REGZ(i->dst(), op1, len); \
BX_NEXT_INSTR(i); \
}

View File

@ -4305,6 +4305,11 @@ public: // for now...
BX_SMF void handleAvxModeChange(void);
#endif
#if BX_SUPPORT_EVEX
BX_SMF void avx512_write_regd_masked(bxInstruction_c *i, const BxPackedAvxRegister *op, unsigned mask);
BX_SMF void avx512_write_regq_masked(bxInstruction_c *i, const BxPackedAvxRegister *op, unsigned mask);
#endif
#if BX_CPU_LEVEL >= 5
BX_SMF bx_bool rdmsr(Bit32u index, Bit64u *val_64) BX_CPP_AttrRegparmN(2);
BX_SMF bx_bool handle_unknown_rdmsr(Bit32u index, Bit64u *val_64) BX_CPP_AttrRegparmN(2);

View File

@ -504,12 +504,12 @@ BX_CPP_INLINE void xmm_blendps(BxPackedXmmRegister *op1, const BxPackedXmmRegist
}
#if BX_SUPPORT_EVEX
BX_CPP_INLINE void xmm_zero_blendps(BxPackedXmmRegister *op, unsigned mask)
BX_CPP_INLINE void xmm_zero_blendps(BxPackedXmmRegister *dst, const BxPackedXmmRegister *op, unsigned mask)
{
if ((mask & 0x1) == 0) op->xmm32u(0) = 0;
if ((mask & 0x2) == 0) op->xmm32u(1) = 0;
if ((mask & 0x4) == 0) op->xmm32u(2) = 0;
if ((mask & 0x8) == 0) op->xmm32u(3) = 0;
dst->xmm32u(0) = (mask & 0x1) ? op->xmm32u(0) : 0;
dst->xmm32u(1) = (mask & 0x2) ? op->xmm32u(1) : 0;
dst->xmm32u(2) = (mask & 0x4) ? op->xmm32u(2) : 0;
dst->xmm32u(3) = (mask & 0x8) ? op->xmm32u(3) : 0;
}
#endif
@ -520,10 +520,10 @@ BX_CPP_INLINE void xmm_blendpd(BxPackedXmmRegister *op1, const BxPackedXmmRegist
}
#if BX_SUPPORT_EVEX
BX_CPP_INLINE void xmm_zero_blendpd(BxPackedXmmRegister *op, unsigned mask)
BX_CPP_INLINE void xmm_zero_blendpd(BxPackedXmmRegister *dst, const BxPackedXmmRegister *op, unsigned mask)
{
if ((mask & 0x1) == 0) op->xmm64u(0) = 0;
if ((mask & 0x2) == 0) op->xmm64u(1) = 0;
dst->xmm64u(0) = (mask & 0x1) ? op->xmm64u(0) : 0;
dst->xmm64u(1) = (mask & 0x2) ? op->xmm64u(1) : 0;
}
#endif