avx512 move functions introduced
This commit is contained in:
parent
1a735e9fdf
commit
21bb1363ac
@ -31,41 +31,47 @@
|
||||
#include "simd_int.h"
|
||||
#include "simd_compare.h"
|
||||
|
||||
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVAPS_MASK_VpsWpsR(bxInstruction_c *i)
|
||||
{
|
||||
BxPackedAvxRegister op = BX_READ_AVX_REG(i->src());
|
||||
unsigned mask = BX_READ_16BIT_OPMASK(i->opmask());
|
||||
BX_CPP_INLINE void BX_CPU_C::avx512_write_regd_masked(bxInstruction_c *i, const BxPackedAvxRegister *op, unsigned mask)
|
||||
{
|
||||
unsigned len = i->getVL();
|
||||
|
||||
|
||||
if (i->isZeroMasking()) {
|
||||
for (unsigned n=0; n < len; n++, mask >>= 4)
|
||||
xmm_zero_blendps(&op.vmm128(n), mask);
|
||||
xmm_zero_blendps(&BX_READ_AVX_REG_LANE(i->dst(), n), &op->vmm128(n), mask);
|
||||
}
|
||||
else {
|
||||
for (unsigned n=0; n < len; n++, mask >>= 4)
|
||||
xmm_blendps(&op.vmm128(n), &BX_READ_AVX_REG_LANE(i->dst(), n), ~mask);
|
||||
xmm_blendps(&BX_READ_AVX_REG_LANE(i->dst(), n), &op->vmm128(n), mask);
|
||||
}
|
||||
}
|
||||
|
||||
BX_CPP_INLINE void BX_CPU_C::avx512_write_regq_masked(bxInstruction_c *i, const BxPackedAvxRegister *op, unsigned mask)
|
||||
{
|
||||
unsigned len = i->getVL();
|
||||
|
||||
if (i->isZeroMasking()) {
|
||||
for (unsigned n=0; n < len; n++, mask >>= 4)
|
||||
xmm_zero_blendps(&BX_READ_AVX_REG_LANE(i->dst(), n), &op->vmm128(n), mask);
|
||||
}
|
||||
else {
|
||||
for (unsigned n=0; n < len; n++, mask >>= 4)
|
||||
xmm_blendps(&BX_READ_AVX_REG_LANE(i->dst(), n), &op->vmm128(n), mask);
|
||||
}
|
||||
|
||||
BX_WRITE_AVX_REGZ(i->dst(), op, len);
|
||||
BX_CLEAR_AVX_REGZ(i->dst(), len);
|
||||
}
|
||||
|
||||
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVAPS_MASK_VpsWpsR(bxInstruction_c *i)
|
||||
{
|
||||
BxPackedAvxRegister op = BX_READ_AVX_REG(i->src());
|
||||
avx512_write_regd_masked(i, &op, BX_READ_16BIT_OPMASK(i->opmask()));
|
||||
BX_NEXT_INSTR(i);
|
||||
}
|
||||
|
||||
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVAPD_MASK_VpdWpdR(bxInstruction_c *i)
|
||||
{
|
||||
BxPackedAvxRegister op = BX_READ_AVX_REG(i->src());
|
||||
unsigned mask = BX_READ_8BIT_OPMASK(i->opmask());
|
||||
unsigned len = i->getVL();
|
||||
|
||||
if (i->isZeroMasking()) {
|
||||
for (unsigned n=0; n < len; n++, mask >>= 2)
|
||||
xmm_zero_blendpd(&op.vmm128(n), mask);
|
||||
}
|
||||
else {
|
||||
for (unsigned n=0; n < len; n++, mask >>= 2)
|
||||
xmm_blendpd(&op.vmm128(n), &BX_READ_AVX_REG_LANE(i->dst(), n), ~mask);
|
||||
}
|
||||
|
||||
BX_WRITE_AVX_REGZ(i->dst(), op, len);
|
||||
avx512_write_regq_masked(i, &op, BX_READ_8BIT_OPMASK(i->opmask()));
|
||||
BX_NEXT_INSTR(i);
|
||||
}
|
||||
|
||||
@ -97,22 +103,13 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVAPD_MASK_WpdVpdM(bxInstruction
|
||||
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C :: HANDLER (bxInstruction_c *i) \
|
||||
{ \
|
||||
BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2()); \
|
||||
unsigned mask = BX_READ_16BIT_OPMASK(i->opmask()); \
|
||||
unsigned len = i->getVL(); \
|
||||
\
|
||||
for (unsigned n=0; n < len; n++) \
|
||||
(func)(&op1.vmm128(n), &op2.vmm128(n)); \
|
||||
\
|
||||
if (i->isZeroMasking()) { \
|
||||
for (unsigned n=0; n < len; n++, mask >>= 4) \
|
||||
xmm_zero_blendps(&op1.vmm128(n), mask); \
|
||||
} \
|
||||
else { \
|
||||
for (unsigned n=0; n < len; n++, mask >>= 4) \
|
||||
xmm_blendps(&op1.vmm128(n), &BX_READ_AVX_REG_LANE(i->dst(), n), ~mask); \
|
||||
} \
|
||||
\
|
||||
BX_WRITE_AVX_REGZ(i->dst(), op1, len); \
|
||||
avx512_write_regd_masked(i, &op1, BX_READ_16BIT_OPMASK(i->opmask())); \
|
||||
\
|
||||
BX_NEXT_INSTR(i); \
|
||||
}
|
||||
|
||||
@ -135,22 +132,13 @@ AVX512_2OP_DWORD_EL(VPMULLD_MASK_VdqHdqWdqR, xmm_pmulld)
|
||||
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C :: HANDLER (bxInstruction_c *i) \
|
||||
{ \
|
||||
BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2()); \
|
||||
unsigned mask = BX_READ_8BIT_OPMASK(i->opmask()); \
|
||||
unsigned len = i->getVL(); \
|
||||
\
|
||||
for (unsigned n=0; n < len; n++) \
|
||||
(func)(&op1.vmm128(n), &op2.vmm128(n)); \
|
||||
\
|
||||
if (i->isZeroMasking()) { \
|
||||
for (unsigned n=0; n < len; n++, mask >>= 2) \
|
||||
xmm_zero_blendpd(&op1.vmm128(n), mask); \
|
||||
} \
|
||||
else { \
|
||||
for (unsigned n=0; n < len; n++, mask >>= 2) \
|
||||
xmm_blendpd(&op1.vmm128(n), &BX_READ_AVX_REG_LANE(i->dst(), n), ~mask); \
|
||||
} \
|
||||
\
|
||||
BX_WRITE_AVX_REGZ(i->dst(), op1, len); \
|
||||
avx512_write_regq_masked(i, &op1, BX_READ_8BIT_OPMASK(i->opmask())); \
|
||||
\
|
||||
BX_NEXT_INSTR(i); \
|
||||
}
|
||||
|
||||
|
@ -57,10 +57,14 @@ extern void mxcsr_to_softfloat_status_word(float_status_t &status, bx_mxcsr_t mx
|
||||
\
|
||||
if (! i->isZeroMasking()) { \
|
||||
for (unsigned n=0; n < len; n++, mask >>= 4) \
|
||||
xmm_blendps(&op1.vmm128(n), &BX_READ_AVX_REG_LANE(i->dst(), n), ~mask); \
|
||||
xmm_blendps(&BX_READ_AVX_REG_LANE(i->dst(), n), &op1.vmm128(n), mask); \
|
||||
\
|
||||
BX_CLEAR_AVX_REGZ(i->dst(), len); \
|
||||
} \
|
||||
else { \
|
||||
BX_WRITE_AVX_REGZ(i->dst(), op1, len); \
|
||||
} \
|
||||
\
|
||||
BX_WRITE_AVX_REGZ(i->dst(), op1, len); \
|
||||
BX_NEXT_INSTR(i); \
|
||||
}
|
||||
|
||||
@ -91,10 +95,14 @@ EVEX_FMA_PACKED_SINGLE(VFNMSUBPS_MASK_VpsHpsWpsR, xmm_fnmsubps_mask)
|
||||
\
|
||||
if (! i->isZeroMasking()) { \
|
||||
for (unsigned n=0; n < len; n++, mask >>= 2) \
|
||||
xmm_blendpd(&op1.vmm128(n), &BX_READ_AVX_REG_LANE(i->dst(), n), ~mask); \
|
||||
xmm_blendpd(&BX_READ_AVX_REG_LANE(i->dst(), n), &op1.vmm128(n), mask); \
|
||||
\
|
||||
BX_CLEAR_AVX_REGZ(i->dst(), len); \
|
||||
} \
|
||||
else { \
|
||||
BX_WRITE_AVX_REGZ(i->dst(), op1, len); \
|
||||
} \
|
||||
\
|
||||
BX_WRITE_AVX_REGZ(i->dst(), op1, len); \
|
||||
BX_NEXT_INSTR(i); \
|
||||
}
|
||||
|
||||
|
@ -52,10 +52,14 @@ extern void mxcsr_to_softfloat_status_word(float_status_t &status, bx_mxcsr_t mx
|
||||
\
|
||||
if (! i->isZeroMasking()) { \
|
||||
for (unsigned n=0; n < len; n++, mask >>= 4) \
|
||||
xmm_blendps(&op1.vmm128(n), &BX_READ_AVX_REG_LANE(i->dst(), n), ~mask); \
|
||||
xmm_blendps(&BX_READ_AVX_REG_LANE(i->dst(), n), &op1.vmm128(n), mask); \
|
||||
\
|
||||
BX_CLEAR_AVX_REGZ(i->dst(), len); \
|
||||
} \
|
||||
else { \
|
||||
BX_WRITE_AVX_REGZ(i->dst(), op1, len); \
|
||||
} \
|
||||
\
|
||||
BX_WRITE_AVX_REGZ(i->dst(), op1, len); \
|
||||
BX_NEXT_INSTR(i); \
|
||||
}
|
||||
|
||||
@ -84,10 +88,14 @@ EVEX_OP_PACKED_SINGLE(VMINPS_MASK_VpsHpsWpsR, xmm_minps_mask)
|
||||
\
|
||||
if (! i->isZeroMasking()) { \
|
||||
for (unsigned n=0; n < len; n++, mask >>= 2) \
|
||||
xmm_blendpd(&op1.vmm128(n), &BX_READ_AVX_REG_LANE(i->dst(), n), ~mask); \
|
||||
xmm_blendpd(&BX_READ_AVX_REG_LANE(i->dst(), n), &op1.vmm128(n), mask); \
|
||||
\
|
||||
BX_CLEAR_AVX_REGZ(i->dst(), len); \
|
||||
} \
|
||||
else { \
|
||||
BX_WRITE_AVX_REGZ(i->dst(), op1, len); \
|
||||
} \
|
||||
\
|
||||
BX_WRITE_AVX_REGZ(i->dst(), op1, len); \
|
||||
BX_NEXT_INSTR(i); \
|
||||
}
|
||||
|
||||
|
@ -4305,6 +4305,11 @@ public: // for now...
|
||||
BX_SMF void handleAvxModeChange(void);
|
||||
#endif
|
||||
|
||||
#if BX_SUPPORT_EVEX
|
||||
BX_SMF void avx512_write_regd_masked(bxInstruction_c *i, const BxPackedAvxRegister *op, unsigned mask);
|
||||
BX_SMF void avx512_write_regq_masked(bxInstruction_c *i, const BxPackedAvxRegister *op, unsigned mask);
|
||||
#endif
|
||||
|
||||
#if BX_CPU_LEVEL >= 5
|
||||
BX_SMF bx_bool rdmsr(Bit32u index, Bit64u *val_64) BX_CPP_AttrRegparmN(2);
|
||||
BX_SMF bx_bool handle_unknown_rdmsr(Bit32u index, Bit64u *val_64) BX_CPP_AttrRegparmN(2);
|
||||
|
@ -504,12 +504,12 @@ BX_CPP_INLINE void xmm_blendps(BxPackedXmmRegister *op1, const BxPackedXmmRegist
|
||||
}
|
||||
|
||||
#if BX_SUPPORT_EVEX
|
||||
BX_CPP_INLINE void xmm_zero_blendps(BxPackedXmmRegister *op, unsigned mask)
|
||||
BX_CPP_INLINE void xmm_zero_blendps(BxPackedXmmRegister *dst, const BxPackedXmmRegister *op, unsigned mask)
|
||||
{
|
||||
if ((mask & 0x1) == 0) op->xmm32u(0) = 0;
|
||||
if ((mask & 0x2) == 0) op->xmm32u(1) = 0;
|
||||
if ((mask & 0x4) == 0) op->xmm32u(2) = 0;
|
||||
if ((mask & 0x8) == 0) op->xmm32u(3) = 0;
|
||||
dst->xmm32u(0) = (mask & 0x1) ? op->xmm32u(0) : 0;
|
||||
dst->xmm32u(1) = (mask & 0x2) ? op->xmm32u(1) : 0;
|
||||
dst->xmm32u(2) = (mask & 0x4) ? op->xmm32u(2) : 0;
|
||||
dst->xmm32u(3) = (mask & 0x8) ? op->xmm32u(3) : 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -520,10 +520,10 @@ BX_CPP_INLINE void xmm_blendpd(BxPackedXmmRegister *op1, const BxPackedXmmRegist
|
||||
}
|
||||
|
||||
#if BX_SUPPORT_EVEX
|
||||
BX_CPP_INLINE void xmm_zero_blendpd(BxPackedXmmRegister *op, unsigned mask)
|
||||
BX_CPP_INLINE void xmm_zero_blendpd(BxPackedXmmRegister *dst, const BxPackedXmmRegister *op, unsigned mask)
|
||||
{
|
||||
if ((mask & 0x1) == 0) op->xmm64u(0) = 0;
|
||||
if ((mask & 0x2) == 0) op->xmm64u(1) = 0;
|
||||
dst->xmm64u(0) = (mask & 0x1) ? op->xmm64u(0) : 0;
|
||||
dst->xmm64u(1) = (mask & 0x2) ? op->xmm64u(1) : 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user