infrastructure change for several AVX handlers to support any VL and only 128/256

This commit is contained in:
Stanislav Shwartsman 2013-09-21 20:40:57 +00:00
parent 404b8b1475
commit f791802286
5 changed files with 166 additions and 166 deletions

View File

@ -35,13 +35,13 @@
/* AVX instruction with two src operands */ \
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C :: HANDLER (bxInstruction_c *i) \
{ \
BxPackedYmmRegister op1 = BX_READ_YMM_REG(i->src1()), op2 = BX_READ_YMM_REG(i->src2()); \
BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2()); \
unsigned len = i->getVL(); \
\
for (unsigned n=0; n < len; n++) \
(func)(&op1.ymm128(n), &op2.ymm128(n)); \
(func)(&op1.vmm128(n), &op2.vmm128(n)); \
\
BX_WRITE_YMM_REGZ_VLEN(i->dst(), op1, len); \
BX_WRITE_AVX_REGZ(i->dst(), op1, len); \
\
BX_NEXT_INSTR(i); \
}
@ -143,13 +143,13 @@ AVX_2OP(VPSRLVQ_VdqHdqWdqR, xmm_psrlvq)
/* AVX instruction with single src operand */ \
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C :: HANDLER (bxInstruction_c *i) \
{ \
BxPackedYmmRegister op = BX_READ_YMM_REG(i->src()); \
BxPackedAvxRegister op = BX_READ_AVX_REG(i->src()); \
unsigned len = i->getVL(); \
\
for (unsigned n=0; n < len; n++) \
(func)(&op.ymm128(n)); \
(func)(&op.vmm128(n)); \
\
BX_WRITE_YMM_REGZ_VLEN(i->dst(), op, len); \
BX_WRITE_AVX_REGZ(i->dst(), op, len); \
\
BX_NEXT_INSTR(i); \
}
@ -162,13 +162,13 @@ AVX_1OP(VPABSD_VdqWdqR, xmm_pabsd)
/* AVX packed shift instruction */ \
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C:: HANDLER (bxInstruction_c *i) \
{ \
BxPackedYmmRegister op = BX_READ_YMM_REG(i->src1()); \
BxPackedAvxRegister op = BX_READ_AVX_REG(i->src1()); \
unsigned len = i->getVL(); \
\
for (unsigned n=0; n < len; n++) \
(func)(&op.ymm128(n), BX_READ_XMM_REG_LO_QWORD(i->src2())); \
(func)(&op.vmm128(n), BX_READ_XMM_REG_LO_QWORD(i->src2())); \
\
BX_WRITE_YMM_REGZ_VLEN(i->dst(), op, len); \
BX_WRITE_AVX_REGZ(i->dst(), op, len); \
\
BX_NEXT_INSTR(i); \
}
@ -186,13 +186,13 @@ AVX_PSHIFT(VPSLLQ_VdqHdqWdqR, xmm_psllq);
/* AVX packed shift with imm8 instruction */ \
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C:: HANDLER (bxInstruction_c *i) \
{ \
BxPackedYmmRegister op = BX_READ_YMM_REG(i->src()); \
BxPackedAvxRegister op = BX_READ_AVX_REG(i->src()); \
unsigned len = i->getVL(); \
\
for (unsigned n=0; n < len; n++) \
(func)(&op.ymm128(n), i->Ib()); \
(func)(&op.vmm128(n), i->Ib()); \
\
BX_WRITE_YMM_REGZ_VLEN(i->dst(), op, len); \
BX_WRITE_AVX_REGZ(i->dst(), op, len); \
\
BX_NEXT_INSTR(i); \
}
@ -211,75 +211,75 @@ AVX_PSHIFT_IMM(VPSLLDQ_UdqIb, xmm_pslldq);
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPSHUFHW_VdqWdqIbR(bxInstruction_c *i)
{
BxPackedYmmRegister op = BX_READ_YMM_REG(i->src()), result;
BxPackedAvxRegister op = BX_READ_AVX_REG(i->src()), result;
Bit8u order = i->Ib();
unsigned len = i->getVL();
for (unsigned n=0; n < len; n++)
xmm_pshufhw(&result.ymm128(n), &op.ymm128(n), order);
xmm_pshufhw(&result.vmm128(n), &op.vmm128(n), order);
BX_WRITE_YMM_REGZ_VLEN(i->dst(), result, len);
BX_WRITE_AVX_REGZ(i->dst(), result, len);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPSHUFLW_VdqWdqIbR(bxInstruction_c *i)
{
BxPackedYmmRegister op = BX_READ_YMM_REG(i->src()), result;
BxPackedAvxRegister op = BX_READ_AVX_REG(i->src()), result;
Bit8u order = i->Ib();
unsigned len = i->getVL();
for (unsigned n=0; n < len; n++)
xmm_pshuflw(&result.ymm128(n), &op.ymm128(n), order);
xmm_pshuflw(&result.vmm128(n), &op.vmm128(n), order);
BX_WRITE_YMM_REGZ_VLEN(i->dst(), result, len);
BX_WRITE_AVX_REGZ(i->dst(), result, len);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPSHUFB_VdqHdqWdqR(bxInstruction_c *i)
{
BxPackedYmmRegister op1 = BX_READ_YMM_REG(i->src1());
BxPackedYmmRegister op2 = BX_READ_YMM_REG(i->src2()), result;
BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1());
BxPackedAvxRegister op2 = BX_READ_AVX_REG(i->src2()), result;
unsigned len = i->getVL();
for (unsigned n=0; n < len; n++)
xmm_pshufb(&result.ymm128(n), &op1.ymm128(n), &op2.ymm128(n));
xmm_pshufb(&result.vmm128(n), &op1.vmm128(n), &op2.vmm128(n));
BX_WRITE_YMM_REGZ_VLEN(i->dst(), result, len);
BX_WRITE_AVX_REGZ(i->dst(), result, len);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VMPSADBW_VdqHdqWdqIbR(bxInstruction_c *i)
{
BxPackedYmmRegister op1 = BX_READ_YMM_REG(i->src1());
BxPackedYmmRegister op2 = BX_READ_YMM_REG(i->src2()), result;
BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1());
BxPackedAvxRegister op2 = BX_READ_AVX_REG(i->src2()), result;
Bit8u control = i->Ib();
unsigned len = i->getVL();
for (unsigned n=0; n < len; n++) {
xmm_mpsadbw(&result.ymm128(n), &op1.ymm128(n), &op2.ymm128(n), control & 0x7);
xmm_mpsadbw(&result.vmm128(n), &op1.vmm128(n), &op2.vmm128(n), control & 0x7);
control >>= 3;
}
BX_WRITE_YMM_REGZ_VLEN(i->dst(), result, len);
BX_WRITE_AVX_REGZ(i->dst(), result, len);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPBLENDW_VdqHdqWdqIbR(bxInstruction_c *i)
{
BxPackedYmmRegister op1 = BX_READ_YMM_REG(i->src1()), op2 = BX_READ_YMM_REG(i->src2());
BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2());
unsigned len = i->getVL();
Bit8u mask = i->Ib();
for (unsigned n=0; n < len; n++)
xmm_pblendw(&op1.ymm128(n), &op2.ymm128(n), mask);
xmm_pblendw(&op1.vmm128(n), &op2.vmm128(n), mask);
BX_WRITE_YMM_REGZ_VLEN(i->dst(), op1, len);
BX_WRITE_AVX_REGZ(i->dst(), op1, len);
BX_NEXT_INSTR(i);
}

View File

@ -42,7 +42,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VADDPS_MASK_VpsHpsWpsR(bxInstructi
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
evex_softfloat_status_word_override(status, i, len);
softfloat_status_word_rc_override(status, i);;
for (unsigned n=0, tmp_mask = mask; n < len; n++, tmp_mask >>= 4) {
xmm_addps_mask(&op1.vmm128(n), &op2.vmm128(n), status, tmp_mask);
@ -67,7 +67,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VADDPD_MASK_VpdHpdWpdR(bxInstructi
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
evex_softfloat_status_word_override(status, i, len);
softfloat_status_word_rc_override(status, i);;
for (unsigned n=0, tmp_mask = mask; n < len; n++, tmp_mask >>= 2) {
xmm_addpd_mask(&op1.vmm128(n), &op2.vmm128(n), status, tmp_mask);
@ -94,7 +94,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VADDSS_MASK_VssHpsWssR(bxInstructi
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
evex_softfloat_status_word_override(status, i, BX_VL512);
softfloat_status_word_rc_override(status, i);
op1.xmm32u(0) = float32_add(op1.xmm32u(0), op2, status);
@ -122,7 +122,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VADDSD_MASK_VsdHpdWsdR(bxInstructi
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
evex_softfloat_status_word_override(status, i, BX_VL512);
softfloat_status_word_rc_override(status, i);
op1.xmm64u(0) = float64_add(op1.xmm64u(0), op2, status);
@ -148,7 +148,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VSUBPS_MASK_VpsHpsWpsR(bxInstructi
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
evex_softfloat_status_word_override(status, i, len);
softfloat_status_word_rc_override(status, i);;
for (unsigned n=0, tmp_mask = mask; n < len; n++, tmp_mask >>= 4) {
xmm_subps_mask(&op1.vmm128(n), &op2.vmm128(n), status, tmp_mask);
@ -173,7 +173,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VSUBPD_MASK_VpdHpdWpdR(bxInstructi
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
evex_softfloat_status_word_override(status, i, len);
softfloat_status_word_rc_override(status, i);;
for (unsigned n=0, tmp_mask = mask; n < len; n++, tmp_mask >>= 2) {
xmm_subpd_mask(&op1.vmm128(n), &op2.vmm128(n), status, tmp_mask);
@ -200,7 +200,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VSUBSS_MASK_VssHpsWssR(bxInstructi
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
evex_softfloat_status_word_override(status, i, BX_VL512);
softfloat_status_word_rc_override(status, i);
op1.xmm32u(0) = float32_sub(op1.xmm32u(0), op2, status);
@ -228,7 +228,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VSUBSD_MASK_VsdHpdWsdR(bxInstructi
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
evex_softfloat_status_word_override(status, i, BX_VL512);
softfloat_status_word_rc_override(status, i);
op1.xmm64u(0) = float64_sub(op1.xmm64u(0), op2, status);
@ -254,7 +254,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VMULPS_MASK_VpsHpsWpsR(bxInstructi
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
evex_softfloat_status_word_override(status, i, len);
softfloat_status_word_rc_override(status, i);;
for (unsigned n=0, tmp_mask = mask; n < len; n++, tmp_mask >>= 4) {
xmm_mulps_mask(&op1.vmm128(n), &op2.vmm128(n), status, tmp_mask);
@ -279,7 +279,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VMULPD_MASK_VpdHpdWpdR(bxInstructi
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
evex_softfloat_status_word_override(status, i, len);
softfloat_status_word_rc_override(status, i);;
for (unsigned n=0, tmp_mask = mask; n < len; n++, tmp_mask >>= 2) {
xmm_mulpd_mask(&op1.vmm128(n), &op2.vmm128(n), status, tmp_mask);
@ -306,7 +306,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VMULSS_MASK_VssHpsWssR(bxInstructi
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
evex_softfloat_status_word_override(status, i, BX_VL512);
softfloat_status_word_rc_override(status, i);
op1.xmm32u(0) = float32_mul(op1.xmm32u(0), op2, status);
@ -334,7 +334,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VMULSD_MASK_VsdHpdWsdR(bxInstructi
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
evex_softfloat_status_word_override(status, i, BX_VL512);
softfloat_status_word_rc_override(status, i);
op1.xmm64u(0) = float64_mul(op1.xmm64u(0), op2, status);
@ -360,7 +360,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VDIVPS_MASK_VpsHpsWpsR(bxInstructi
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
evex_softfloat_status_word_override(status, i, len);
softfloat_status_word_rc_override(status, i);;
for (unsigned n=0, tmp_mask = mask; n < len; n++, tmp_mask >>= 4) {
xmm_divps_mask(&op1.vmm128(n), &op2.vmm128(n), status, tmp_mask);
@ -385,7 +385,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VDIVPD_MASK_VpdHpdWpdR(bxInstructi
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
evex_softfloat_status_word_override(status, i, len);
softfloat_status_word_rc_override(status, i);;
for (unsigned n=0, tmp_mask = mask; n < len; n++, tmp_mask >>= 2) {
xmm_divpd_mask(&op1.vmm128(n), &op2.vmm128(n), status, tmp_mask);
@ -412,7 +412,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VDIVSS_MASK_VssHpsWssR(bxInstructi
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
evex_softfloat_status_word_override(status, i, BX_VL512);
softfloat_status_word_rc_override(status, i);
op1.xmm32u(0) = float32_div(op1.xmm32u(0), op2, status);
@ -440,7 +440,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VDIVSD_MASK_VsdHpdWsdR(bxInstructi
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
evex_softfloat_status_word_override(status, i, BX_VL512);
softfloat_status_word_rc_override(status, i);
op1.xmm64u(0) = float64_div(op1.xmm64u(0), op2, status);
@ -466,7 +466,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VMINPS_MASK_VpsHpsWpsR(bxInstructi
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
evex_softfloat_status_word_override(status, i, len);
softfloat_status_word_rc_override(status, i);;
for (unsigned n=0, tmp_mask = mask; n < len; n++, tmp_mask >>= 4) {
xmm_minps_mask(&op1.vmm128(n), &op2.vmm128(n), status, tmp_mask);
@ -491,7 +491,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VMINPD_MASK_VpdHpdWpdR(bxInstructi
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
evex_softfloat_status_word_override(status, i, len);
softfloat_status_word_rc_override(status, i);;
for (unsigned n=0, tmp_mask = mask; n < len; n++, tmp_mask >>= 2) {
xmm_minpd_mask(&op1.vmm128(n), &op2.vmm128(n), status, tmp_mask);
@ -517,7 +517,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VMINSS_MASK_VssHpsWssR(bxInstructi
float32 op2 = BX_READ_XMM_REG_LO_DWORD(i->src2());
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
evex_softfloat_status_word_override(status, i, BX_VL512);
softfloat_status_word_rc_override(status, i);
op1.xmm32u(0) = float32_min(op1.xmm32u(0), op2, status);
check_exceptionsSSE(status.float_exception_flags);
}
@ -542,7 +542,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VMINSD_MASK_VsdHpdWsdR(bxInstructi
float64 op2 = BX_READ_XMM_REG_LO_QWORD(i->src2());
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
evex_softfloat_status_word_override(status, i, BX_VL512);
softfloat_status_word_rc_override(status, i);
op1.xmm64u(0) = float64_min(op1.xmm64u(0), op2, status);
check_exceptionsSSE(status.float_exception_flags);
}
@ -566,7 +566,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VMAXPS_MASK_VpsHpsWpsR(bxInstructi
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
evex_softfloat_status_word_override(status, i, len);
softfloat_status_word_rc_override(status, i);;
for (unsigned n=0, tmp_mask = mask; n < len; n++, tmp_mask >>= 4) {
xmm_maxps_mask(&op1.vmm128(n), &op2.vmm128(n), status, tmp_mask);
@ -591,7 +591,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VMAXPD_MASK_VpdHpdWpdR(bxInstructi
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
evex_softfloat_status_word_override(status, i, len);
softfloat_status_word_rc_override(status, i);;
for (unsigned n=0, tmp_mask = mask; n < len; n++, tmp_mask >>= 2) {
xmm_maxpd_mask(&op1.vmm128(n), &op2.vmm128(n), status, tmp_mask);
@ -617,7 +617,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VMAXSS_MASK_VssHpsWssR(bxInstructi
float32 op2 = BX_READ_XMM_REG_LO_DWORD(i->src2());
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
evex_softfloat_status_word_override(status, i, BX_VL512);
softfloat_status_word_rc_override(status, i);
op1.xmm32u(0) = float32_max(op1.xmm32u(0), op2, status);
check_exceptionsSSE(status.float_exception_flags);
}
@ -642,7 +642,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VMAXSD_MASK_VsdHpdWsdR(bxInstructi
float64 op2 = BX_READ_XMM_REG_LO_QWORD(i->src2());
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
evex_softfloat_status_word_override(status, i, BX_VL512);
softfloat_status_word_rc_override(status, i);
op1.xmm64u(0) = float64_max(op1.xmm64u(0), op2, status);
check_exceptionsSSE(status.float_exception_flags);
}

View File

@ -39,20 +39,20 @@ extern void mxcsr_to_softfloat_status_word(float_status_t &status, bx_mxcsr_t mx
// FMADDPD
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VFMADDPD_VpdHpdWpdR(bxInstruction_c *i)
{
BxPackedYmmRegister op1 = BX_READ_YMM_REG(i->src1());
BxPackedYmmRegister op2 = BX_READ_YMM_REG(i->src2());
BxPackedYmmRegister op3 = BX_READ_YMM_REG(i->src3());
BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1());
BxPackedAvxRegister op2 = BX_READ_AVX_REG(i->src2());
BxPackedAvxRegister op3 = BX_READ_AVX_REG(i->src3());
unsigned len = i->getVL();
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
for (unsigned n=0; n < len; n++)
xmm_fmaddpd(&op1.ymm128(n), &op2.ymm128(n), &op3.ymm128(n), status);
xmm_fmaddpd(&op1.vmm128(n), &op2.vmm128(n), &op3.vmm128(n), status);
check_exceptionsSSE(status.float_exception_flags);
BX_WRITE_YMM_REGZ_VLEN(i->dst(), op1, len);
BX_WRITE_AVX_REGZ(i->dst(), op1, len);
BX_NEXT_INSTR(i);
}
@ -60,20 +60,20 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VFMADDPD_VpdHpdWpdR(bxInstruction_
// FMADDPS
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VFMADDPS_VpsHpsWpsR(bxInstruction_c *i)
{
BxPackedYmmRegister op1 = BX_READ_YMM_REG(i->src1());
BxPackedYmmRegister op2 = BX_READ_YMM_REG(i->src2());
BxPackedYmmRegister op3 = BX_READ_YMM_REG(i->src3());
BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1());
BxPackedAvxRegister op2 = BX_READ_AVX_REG(i->src2());
BxPackedAvxRegister op3 = BX_READ_AVX_REG(i->src3());
unsigned len = i->getVL();
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
for (unsigned n=0; n < len; n++)
xmm_fmaddps(&op1.ymm128(n), &op2.ymm128(n), &op3.ymm128(n), status);
xmm_fmaddps(&op1.vmm128(n), &op2.vmm128(n), &op3.vmm128(n), status);
check_exceptionsSSE(status.float_exception_flags);
BX_WRITE_YMM_REGZ_VLEN(i->dst(), op1, len);
BX_WRITE_AVX_REGZ(i->dst(), op1, len);
BX_NEXT_INSTR(i);
}
@ -117,20 +117,20 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VFMADDSS_VpsHssWssR(bxInstruction_
// FMADDSUBPD
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VFMADDSUBPD_VpdHpdWpdR(bxInstruction_c *i)
{
BxPackedYmmRegister op1 = BX_READ_YMM_REG(i->src1());
BxPackedYmmRegister op2 = BX_READ_YMM_REG(i->src2());
BxPackedYmmRegister op3 = BX_READ_YMM_REG(i->src3());
BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1());
BxPackedAvxRegister op2 = BX_READ_AVX_REG(i->src2());
BxPackedAvxRegister op3 = BX_READ_AVX_REG(i->src3());
unsigned len = i->getVL();
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
for (unsigned n=0; n < len; n++)
xmm_fmaddsubpd(&op1.ymm128(n), &op2.ymm128(n), &op3.ymm128(n), status);
xmm_fmaddsubpd(&op1.vmm128(n), &op2.vmm128(n), &op3.vmm128(n), status);
check_exceptionsSSE(status.float_exception_flags);
BX_WRITE_YMM_REGZ_VLEN(i->dst(), op1, len);
BX_WRITE_AVX_REGZ(i->dst(), op1, len);
BX_NEXT_INSTR(i);
}
@ -138,20 +138,20 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VFMADDSUBPD_VpdHpdWpdR(bxInstructi
// FMADDSUBPS
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VFMADDSUBPS_VpsHpsWpsR(bxInstruction_c *i)
{
BxPackedYmmRegister op1 = BX_READ_YMM_REG(i->src1());
BxPackedYmmRegister op2 = BX_READ_YMM_REG(i->src2());
BxPackedYmmRegister op3 = BX_READ_YMM_REG(i->src3());
BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1());
BxPackedAvxRegister op2 = BX_READ_AVX_REG(i->src2());
BxPackedAvxRegister op3 = BX_READ_AVX_REG(i->src3());
unsigned len = i->getVL();
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
for (unsigned n=0; n < len; n++)
xmm_fmaddsubps(&op1.ymm128(n), &op2.ymm128(n), &op3.ymm128(n), status);
xmm_fmaddsubps(&op1.vmm128(n), &op2.vmm128(n), &op3.vmm128(n), status);
check_exceptionsSSE(status.float_exception_flags);
BX_WRITE_YMM_REGZ_VLEN(i->dst(), op1, len);
BX_WRITE_AVX_REGZ(i->dst(), op1, len);
BX_NEXT_INSTR(i);
}
@ -159,20 +159,20 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VFMADDSUBPS_VpsHpsWpsR(bxInstructi
// FMSUBADDPD
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VFMSUBADDPD_VpdHpdWpdR(bxInstruction_c *i)
{
BxPackedYmmRegister op1 = BX_READ_YMM_REG(i->src1());
BxPackedYmmRegister op2 = BX_READ_YMM_REG(i->src2());
BxPackedYmmRegister op3 = BX_READ_YMM_REG(i->src3());
BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1());
BxPackedAvxRegister op2 = BX_READ_AVX_REG(i->src2());
BxPackedAvxRegister op3 = BX_READ_AVX_REG(i->src3());
unsigned len = i->getVL();
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
for (unsigned n=0; n < len; n++)
xmm_fmsubaddpd(&op1.ymm128(n), &op2.ymm128(n), &op3.ymm128(n), status);
xmm_fmsubaddpd(&op1.vmm128(n), &op2.vmm128(n), &op3.vmm128(n), status);
check_exceptionsSSE(status.float_exception_flags);
BX_WRITE_YMM_REGZ_VLEN(i->dst(), op1, len);
BX_WRITE_AVX_REGZ(i->dst(), op1, len);
BX_NEXT_INSTR(i);
}
@ -180,20 +180,20 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VFMSUBADDPD_VpdHpdWpdR(bxInstructi
// FMSUBADDPS
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VFMSUBADDPS_VpsHpsWpsR(bxInstruction_c *i)
{
BxPackedYmmRegister op1 = BX_READ_YMM_REG(i->src1());
BxPackedYmmRegister op2 = BX_READ_YMM_REG(i->src2());
BxPackedYmmRegister op3 = BX_READ_YMM_REG(i->src3());
BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1());
BxPackedAvxRegister op2 = BX_READ_AVX_REG(i->src2());
BxPackedAvxRegister op3 = BX_READ_AVX_REG(i->src3());
unsigned len = i->getVL();
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
for (unsigned n=0; n < len; n++)
xmm_fmsubaddps(&op1.ymm128(n), &op2.ymm128(n), &op3.ymm128(n), status);
xmm_fmsubaddps(&op1.vmm128(n), &op2.vmm128(n), &op3.vmm128(n), status);
check_exceptionsSSE(status.float_exception_flags);
BX_WRITE_YMM_REGZ_VLEN(i->dst(), op1, len);
BX_WRITE_AVX_REGZ(i->dst(), op1, len);
BX_NEXT_INSTR(i);
}
@ -201,20 +201,20 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VFMSUBADDPS_VpsHpsWpsR(bxInstructi
// FMSUBPD
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VFMSUBPD_VpdHpdWpdR(bxInstruction_c *i)
{
BxPackedYmmRegister op1 = BX_READ_YMM_REG(i->src1());
BxPackedYmmRegister op2 = BX_READ_YMM_REG(i->src2());
BxPackedYmmRegister op3 = BX_READ_YMM_REG(i->src3());
BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1());
BxPackedAvxRegister op2 = BX_READ_AVX_REG(i->src2());
BxPackedAvxRegister op3 = BX_READ_AVX_REG(i->src3());
unsigned len = i->getVL();
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
for (unsigned n=0; n < len; n++)
xmm_fmsubpd(&op1.ymm128(n), &op2.ymm128(n), &op3.ymm128(n), status);
xmm_fmsubpd(&op1.vmm128(n), &op2.vmm128(n), &op3.vmm128(n), status);
check_exceptionsSSE(status.float_exception_flags);
BX_WRITE_YMM_REGZ_VLEN(i->dst(), op1, len);
BX_WRITE_AVX_REGZ(i->dst(), op1, len);
BX_NEXT_INSTR(i);
}
@ -222,20 +222,20 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VFMSUBPD_VpdHpdWpdR(bxInstruction_
// FMSUBPS
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VFMSUBPS_VpsHpsWpsR(bxInstruction_c *i)
{
BxPackedYmmRegister op1 = BX_READ_YMM_REG(i->src1());
BxPackedYmmRegister op2 = BX_READ_YMM_REG(i->src2());
BxPackedYmmRegister op3 = BX_READ_YMM_REG(i->src3());
BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1());
BxPackedAvxRegister op2 = BX_READ_AVX_REG(i->src2());
BxPackedAvxRegister op3 = BX_READ_AVX_REG(i->src3());
unsigned len = i->getVL();
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
for (unsigned n=0; n < len; n++)
xmm_fmsubps(&op1.ymm128(n), &op2.ymm128(n), &op3.ymm128(n), status);
xmm_fmsubps(&op1.vmm128(n), &op2.vmm128(n), &op3.vmm128(n), status);
check_exceptionsSSE(status.float_exception_flags);
BX_WRITE_YMM_REGZ_VLEN(i->dst(), op1, len);
BX_WRITE_AVX_REGZ(i->dst(), op1, len);
BX_NEXT_INSTR(i);
}
@ -279,20 +279,20 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VFMSUBSS_VpsHssWssR(bxInstruction_
// FNMADDPD
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VFNMADDPD_VpdHpdWpdR(bxInstruction_c *i)
{
BxPackedYmmRegister op1 = BX_READ_YMM_REG(i->src1());
BxPackedYmmRegister op2 = BX_READ_YMM_REG(i->src2());
BxPackedYmmRegister op3 = BX_READ_YMM_REG(i->src3());
BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1());
BxPackedAvxRegister op2 = BX_READ_AVX_REG(i->src2());
BxPackedAvxRegister op3 = BX_READ_AVX_REG(i->src3());
unsigned len = i->getVL();
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
for (unsigned n=0; n < len; n++)
xmm_fnmaddpd(&op1.ymm128(n), &op2.ymm128(n), &op3.ymm128(n), status);
xmm_fnmaddpd(&op1.vmm128(n), &op2.vmm128(n), &op3.vmm128(n), status);
check_exceptionsSSE(status.float_exception_flags);
BX_WRITE_YMM_REGZ_VLEN(i->dst(), op1, len);
BX_WRITE_AVX_REGZ(i->dst(), op1, len);
BX_NEXT_INSTR(i);
}
@ -300,20 +300,20 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VFNMADDPD_VpdHpdWpdR(bxInstruction
// FNMADDPS
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VFNMADDPS_VpsHpsWpsR(bxInstruction_c *i)
{
BxPackedYmmRegister op1 = BX_READ_YMM_REG(i->src1());
BxPackedYmmRegister op2 = BX_READ_YMM_REG(i->src2());
BxPackedYmmRegister op3 = BX_READ_YMM_REG(i->src3());
BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1());
BxPackedAvxRegister op2 = BX_READ_AVX_REG(i->src2());
BxPackedAvxRegister op3 = BX_READ_AVX_REG(i->src3());
unsigned len = i->getVL();
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
for (unsigned n=0; n < len; n++)
xmm_fnmaddps(&op1.ymm128(n), &op2.ymm128(n), &op3.ymm128(n), status);
xmm_fnmaddps(&op1.vmm128(n), &op2.vmm128(n), &op3.vmm128(n), status);
check_exceptionsSSE(status.float_exception_flags);
BX_WRITE_YMM_REGZ_VLEN(i->dst(), op1, len);
BX_WRITE_AVX_REGZ(i->dst(), op1, len);
BX_NEXT_INSTR(i);
}
@ -357,20 +357,20 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VFNMADDSS_VpsHssWssR(bxInstruction
// FNMSUBPD
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VFNMSUBPD_VpdHpdWpdR(bxInstruction_c *i)
{
BxPackedYmmRegister op1 = BX_READ_YMM_REG(i->src1());
BxPackedYmmRegister op2 = BX_READ_YMM_REG(i->src2());
BxPackedYmmRegister op3 = BX_READ_YMM_REG(i->src3());
BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1());
BxPackedAvxRegister op2 = BX_READ_AVX_REG(i->src2());
BxPackedAvxRegister op3 = BX_READ_AVX_REG(i->src3());
unsigned len = i->getVL();
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
for (unsigned n=0; n < len; n++)
xmm_fnmsubpd(&op1.ymm128(n), &op2.ymm128(n), &op3.ymm128(n), status);
xmm_fnmsubpd(&op1.vmm128(n), &op2.vmm128(n), &op3.vmm128(n), status);
check_exceptionsSSE(status.float_exception_flags);
BX_WRITE_YMM_REGZ_VLEN(i->dst(), op1, len);
BX_WRITE_AVX_REGZ(i->dst(), op1, len);
BX_NEXT_INSTR(i);
}
@ -378,20 +378,20 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VFNMSUBPD_VpdHpdWpdR(bxInstruction
// FNMSUBPS
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VFNMSUBPS_VpsHpsWpsR(bxInstruction_c *i)
{
BxPackedYmmRegister op1 = BX_READ_YMM_REG(i->src1());
BxPackedYmmRegister op2 = BX_READ_YMM_REG(i->src2());
BxPackedYmmRegister op3 = BX_READ_YMM_REG(i->src3());
BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1());
BxPackedAvxRegister op2 = BX_READ_AVX_REG(i->src2());
BxPackedAvxRegister op3 = BX_READ_AVX_REG(i->src3());
unsigned len = i->getVL();
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
for (unsigned n=0; n < len; n++)
xmm_fnmsubps(&op1.ymm128(n), &op2.ymm128(n), &op3.ymm128(n), status);
xmm_fnmsubps(&op1.vmm128(n), &op2.vmm128(n), &op3.vmm128(n), status);
check_exceptionsSSE(status.float_exception_flags);
BX_WRITE_YMM_REGZ_VLEN(i->dst(), op1, len);
BX_WRITE_AVX_REGZ(i->dst(), op1, len);
BX_NEXT_INSTR(i);
}

View File

@ -294,13 +294,13 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VRSQRTSS_VssHpsWssR(bxInstruction_
/* Opcode: VEX.0F 53 (VEX.W ignore, VEX.VVV #UD) */
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VRCPPS_VpsWpsR(bxInstruction_c *i)
{
BxPackedYmmRegister op = BX_READ_YMM_REG(i->src());
BxPackedAvxRegister op = BX_READ_AVX_REG(i->src());
unsigned len = i->getVL();
for (unsigned n=0; n < (4*len); n++)
op.ymm32u(n) = approximate_rcp(op.ymm32u(n));
op.vmm32u(n) = approximate_rcp(op.vmm32u(n));
BX_WRITE_YMM_REGZ_VLEN(i->dst(), op, len);
BX_WRITE_AVX_REGZ(i->dst(), op, len);
BX_NEXT_INSTR(i);
}
@ -327,7 +327,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VADDPS_VpsHpsWpsR(bxInstruction_c
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
#if BX_SUPPORT_EVEX
evex_softfloat_status_word_override(status, i, len);
softfloat_status_word_rc_override(status, i);
#endif
for (unsigned n=0; n < len; n++) {
@ -350,7 +350,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VADDPD_VpdHpdWpdR(bxInstruction_c
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
#if BX_SUPPORT_EVEX
evex_softfloat_status_word_override(status, i, len);
softfloat_status_word_rc_override(status, i);
#endif
for (unsigned n=0; n < len; n++) {
@ -373,7 +373,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VADDSS_VssHpsWssR(bxInstruction_c
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
#if BX_SUPPORT_EVEX
evex_softfloat_status_word_override(status, i, BX_VL512);
softfloat_status_word_rc_override(status, i);
#endif
op1.xmm32u(0) = float32_add(op1.xmm32u(0), op2, status);
check_exceptionsSSE(status.float_exception_flags);
@ -391,7 +391,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VADDSD_VsdHpdWsdR(bxInstruction_c
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
#if BX_SUPPORT_EVEX
evex_softfloat_status_word_override(status, i, BX_VL512);
softfloat_status_word_rc_override(status, i);
#endif
op1.xmm64u(0) = float64_add(op1.xmm64u(0), op2, status);
check_exceptionsSSE(status.float_exception_flags);
@ -409,7 +409,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VMULPS_VpsHpsWpsR(bxInstruction_c
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
#if BX_SUPPORT_EVEX
evex_softfloat_status_word_override(status, i, len);
softfloat_status_word_rc_override(status, i);
#endif
for (unsigned n=0; n < len; n++) {
@ -432,7 +432,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VMULPD_VpdHpdWpdR(bxInstruction_c
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
#if BX_SUPPORT_EVEX
evex_softfloat_status_word_override(status, i, len);
softfloat_status_word_rc_override(status, i);
#endif
for (unsigned n=0; n < len; n++) {
@ -455,7 +455,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VMULSS_VssHpsWssR(bxInstruction_c
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
#if BX_SUPPORT_EVEX
evex_softfloat_status_word_override(status, i, BX_VL512);
softfloat_status_word_rc_override(status, i);
#endif
op1.xmm32u(0) = float32_mul(op1.xmm32u(0), op2, status);
check_exceptionsSSE(status.float_exception_flags);
@ -473,7 +473,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VMULSD_VsdHpdWsdR(bxInstruction_c
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
#if BX_SUPPORT_EVEX
evex_softfloat_status_word_override(status, i, BX_VL512);
softfloat_status_word_rc_override(status, i);
#endif
op1.xmm64u(0) = float64_mul(op1.xmm64u(0), op2, status);
check_exceptionsSSE(status.float_exception_flags);
@ -626,7 +626,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VSUBPS_VpsHpsWpsR(bxInstruction_c
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
#if BX_SUPPORT_EVEX
evex_softfloat_status_word_override(status, i, len);
softfloat_status_word_rc_override(status, i);
#endif
for (unsigned n=0; n < len; n++) {
@ -649,7 +649,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VSUBPD_VpdHpdWpdR(bxInstruction_c
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
#if BX_SUPPORT_EVEX
evex_softfloat_status_word_override(status, i, len);
softfloat_status_word_rc_override(status, i);
#endif
for (unsigned n=0; n < len; n++) {
@ -672,7 +672,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VSUBSS_VssHpsWssR(bxInstruction_c
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
#if BX_SUPPORT_EVEX
evex_softfloat_status_word_override(status, i, BX_VL512);
softfloat_status_word_rc_override(status, i);
#endif
op1.xmm32u(0) = float32_sub(op1.xmm32u(0), op2, status);
check_exceptionsSSE(status.float_exception_flags);
@ -690,7 +690,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VSUBSD_VsdHpdWsdR(bxInstruction_c
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
#if BX_SUPPORT_EVEX
evex_softfloat_status_word_override(status, i, BX_VL512);
softfloat_status_word_rc_override(status, i);
#endif
op1.xmm64u(0) = float64_sub(op1.xmm64u(0), op2, status);
check_exceptionsSSE(status.float_exception_flags);
@ -708,7 +708,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VMINPS_VpsHpsWpsR(bxInstruction_c
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
#if BX_SUPPORT_EVEX
evex_softfloat_status_word_override(status, i, len);
softfloat_status_word_rc_override(status, i);
#endif
for (unsigned n=0; n < len; n++) {
@ -731,7 +731,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VMINPD_VpdHpdWpdR(bxInstruction_c
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
#if BX_SUPPORT_EVEX
evex_softfloat_status_word_override(status, i, len);
softfloat_status_word_rc_override(status, i);
#endif
for (unsigned n=0; n < len; n++) {
@ -754,7 +754,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VMINSS_VssHpsWssR(bxInstruction_c
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
#if BX_SUPPORT_EVEX
evex_softfloat_status_word_override(status, i, BX_VL512);
softfloat_status_word_rc_override(status, i);
#endif
op1.xmm32u(0) = float32_min(op1.xmm32u(0), op2, status);
check_exceptionsSSE(status.float_exception_flags);
@ -773,7 +773,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VMINSD_VsdHpdWsdR(bxInstruction_c
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
#if BX_SUPPORT_EVEX
evex_softfloat_status_word_override(status, i, BX_VL512);
softfloat_status_word_rc_override(status, i);
#endif
op1.xmm64u(0) = float64_min(op1.xmm64u(0), op2, status);
check_exceptionsSSE(status.float_exception_flags);
@ -792,7 +792,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VDIVPS_VpsHpsWpsR(bxInstruction_c
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
#if BX_SUPPORT_EVEX
evex_softfloat_status_word_override(status, i, len);
softfloat_status_word_rc_override(status, i);
#endif
for (unsigned n=0; n < len; n++) {
@ -815,7 +815,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VDIVPD_VpdHpdWpdR(bxInstruction_c
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
#if BX_SUPPORT_EVEX
evex_softfloat_status_word_override(status, i, len);
softfloat_status_word_rc_override(status, i);
#endif
for (unsigned n=0; n < len; n++) {
@ -838,7 +838,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VDIVSS_VssHpsWssR(bxInstruction_c
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
#if BX_SUPPORT_EVEX
evex_softfloat_status_word_override(status, i, BX_VL512);
softfloat_status_word_rc_override(status, i);
#endif
op1.xmm32u(0) = float32_div(op1.xmm32u(0), op2, status);
check_exceptionsSSE(status.float_exception_flags);
@ -856,7 +856,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VDIVSD_VsdHpdWsdR(bxInstruction_c
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
#if BX_SUPPORT_EVEX
evex_softfloat_status_word_override(status, i, BX_VL512);
softfloat_status_word_rc_override(status, i);
#endif
op1.xmm64u(0) = float64_div(op1.xmm64u(0), op2, status);
check_exceptionsSSE(status.float_exception_flags);
@ -874,7 +874,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VMAXPS_VpsHpsWpsR(bxInstruction_c
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
#if BX_SUPPORT_EVEX
evex_softfloat_status_word_override(status, i, len);
softfloat_status_word_rc_override(status, i);
#endif
for (unsigned n=0; n < len; n++) {
@ -897,7 +897,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VMAXPD_VpdHpdWpdR(bxInstruction_c
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
#if BX_SUPPORT_EVEX
evex_softfloat_status_word_override(status, i, len);
softfloat_status_word_rc_override(status, i);
#endif
for (unsigned n=0; n < len; n++) {
@ -920,7 +920,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VMAXSS_VssHpsWssR(bxInstruction_c
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
#if BX_SUPPORT_EVEX
evex_softfloat_status_word_override(status, i, BX_VL512);
softfloat_status_word_rc_override(status, i);
#endif
op1.xmm32u(0) = float32_max(op1.xmm32u(0), op2, status);
check_exceptionsSSE(status.float_exception_flags);
@ -939,7 +939,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VMAXSD_VsdHpdWsdR(bxInstruction_c
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
#if BX_SUPPORT_EVEX
evex_softfloat_status_word_override(status, i, BX_VL512);
softfloat_status_word_rc_override(status, i);
#endif
op1.xmm64u(0) = float64_max(op1.xmm64u(0), op2, status);
check_exceptionsSSE(status.float_exception_flags);
@ -952,22 +952,22 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VMAXSD_VsdHpdWsdR(bxInstruction_c
/* Opcode: VEX.NDS.66.0F 7C (VEX.W ignore) */
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VHADDPD_VpdHpdWpdR(bxInstruction_c *i)
{
BxPackedYmmRegister op1 = BX_READ_YMM_REG(i->src1()), op2 = BX_READ_YMM_REG(i->src2());
BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2());
unsigned len = i->getVL();
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
#if BX_SUPPORT_EVEX
evex_softfloat_status_word_override(status, i, len);
softfloat_status_word_rc_override(status, i);
#endif
for (unsigned n=0; n < len; n++) {
xmm_haddpd(&op1.ymm128(n), &op2.ymm128(n), status);
xmm_haddpd(&op1.vmm128(n), &op2.vmm128(n), status);
}
check_exceptionsSSE(status.float_exception_flags);
BX_WRITE_YMM_REGZ_VLEN(i->dst(), op1, len);
BX_WRITE_AVX_REGZ(i->dst(), op1, len);
BX_NEXT_INSTR(i);
}
@ -975,22 +975,22 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VHADDPD_VpdHpdWpdR(bxInstruction_c
/* Opcode: VEX.NDS.F2.0F 7C (VEX.W ignore) */
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VHADDPS_VpsHpsWpsR(bxInstruction_c *i)
{
BxPackedYmmRegister op1 = BX_READ_YMM_REG(i->src1()), op2 = BX_READ_YMM_REG(i->src2());
BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2());
unsigned len = i->getVL();
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
#if BX_SUPPORT_EVEX
evex_softfloat_status_word_override(status, i, len);
softfloat_status_word_rc_override(status, i);
#endif
for (unsigned n=0; n < len; n++) {
xmm_haddps(&op1.ymm128(n), &op2.ymm128(n), status);
xmm_haddps(&op1.vmm128(n), &op2.vmm128(n), status);
}
check_exceptionsSSE(status.float_exception_flags);
BX_WRITE_YMM_REGZ_VLEN(i->dst(), op1, len);
BX_WRITE_AVX_REGZ(i->dst(), op1, len);
BX_NEXT_INSTR(i);
}
@ -998,22 +998,22 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VHADDPS_VpsHpsWpsR(bxInstruction_c
/* Opcode: VEX.NDS.66.0F 7D (VEX.W ignore) */
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VHSUBPD_VpdHpdWpdR(bxInstruction_c *i)
{
BxPackedYmmRegister op1 = BX_READ_YMM_REG(i->src1()), op2 = BX_READ_YMM_REG(i->src2());
BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2());
unsigned len = i->getVL();
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
#if BX_SUPPORT_EVEX
evex_softfloat_status_word_override(status, i, len);
softfloat_status_word_rc_override(status, i);
#endif
for (unsigned n=0; n < len; n++) {
xmm_hsubpd(&op1.ymm128(n), &op2.ymm128(n), status);
xmm_hsubpd(&op1.vmm128(n), &op2.vmm128(n), status);
}
check_exceptionsSSE(status.float_exception_flags);
BX_WRITE_YMM_REGZ_VLEN(i->dst(), op1, len);
BX_WRITE_AVX_REGZ(i->dst(), op1, len);
BX_NEXT_INSTR(i);
}
@ -1021,22 +1021,22 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VHSUBPD_VpdHpdWpdR(bxInstruction_c
/* Opcode: VEX.NDS.F2.0F 7D (VEX.W ignore) */
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VHSUBPS_VpsHpsWpsR(bxInstruction_c *i)
{
BxPackedYmmRegister op1 = BX_READ_YMM_REG(i->src1()), op2 = BX_READ_YMM_REG(i->src2());
BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2());
unsigned len = i->getVL();
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
#if BX_SUPPORT_EVEX
evex_softfloat_status_word_override(status, i, len);
softfloat_status_word_rc_override(status, i);
#endif
for (unsigned n=0; n < len; n++) {
xmm_hsubps(&op1.ymm128(n), &op2.ymm128(n), status);
xmm_hsubps(&op1.vmm128(n), &op2.vmm128(n), status);
}
check_exceptionsSSE(status.float_exception_flags);
BX_WRITE_YMM_REGZ_VLEN(i->dst(), op1, len);
BX_WRITE_AVX_REGZ(i->dst(), op1, len);
BX_NEXT_INSTR(i);
}
@ -1129,22 +1129,22 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCMPSS_VssHpsWssIbR(bxInstruction_
/* Opcode: VEX.NDS.F2.0F D0 (VEX.W ignore) */
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VADDSUBPD_VpdHpdWpdR(bxInstruction_c *i)
{
BxPackedYmmRegister op1 = BX_READ_YMM_REG(i->src1()), op2 = BX_READ_YMM_REG(i->src2());
BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2());
unsigned len = i->getVL();
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
#if BX_SUPPORT_EVEX
evex_softfloat_status_word_override(status, i, len);
softfloat_status_word_rc_override(status, i);
#endif
for (unsigned n=0; n < len; n++) {
xmm_addsubpd(&op1.ymm128(n), &op2.ymm128(n), status);
xmm_addsubpd(&op1.vmm128(n), &op2.vmm128(n), status);
}
check_exceptionsSSE(status.float_exception_flags);
BX_WRITE_YMM_REGZ_VLEN(i->dst(), op1, len);
BX_WRITE_AVX_REGZ(i->dst(), op1, len);
BX_NEXT_INSTR(i);
}
@ -1152,22 +1152,22 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VADDSUBPD_VpdHpdWpdR(bxInstruction
/* Opcode: VEX.NDS.F2.0F D0 (VEX.W ignore) */
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VADDSUBPS_VpsHpsWpsR(bxInstruction_c *i)
{
BxPackedYmmRegister op1 = BX_READ_YMM_REG(i->src1()), op2 = BX_READ_YMM_REG(i->src2());
BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2());
unsigned len = i->getVL();
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
#if BX_SUPPORT_EVEX
evex_softfloat_status_word_override(status, i, len);
softfloat_status_word_rc_override(status, i);
#endif
for (unsigned n=0; n < len; n++) {
xmm_addsubps(&op1.ymm128(n), &op2.ymm128(n), status);
xmm_addsubps(&op1.vmm128(n), &op2.vmm128(n), status);
}
check_exceptionsSSE(status.float_exception_flags);
BX_WRITE_YMM_REGZ_VLEN(i->dst(), op1, len);
BX_WRITE_AVX_REGZ(i->dst(), op1, len);
BX_NEXT_INSTR(i);
}

View File

@ -378,10 +378,10 @@ typedef BxPackedYmmRegister BxPackedAvxRegister;
#if BX_SUPPORT_EVEX
// implement SAE and EVEX encoded rounding control
BX_CPP_INLINE void evex_softfloat_status_word_override(float_status_t &status, bxInstruction_c *i, unsigned vl)
BX_CPP_INLINE void softfloat_status_word_rc_override(float_status_t &status, bxInstruction_c *i)
{
/* must be VL512 otherwise EVEX.LL encodes vector length */
if (vl == BX_VL512 && i->modC0() && i->getEvexb()) {
if (i->modC0() && i->getEvexb()) {
status.float_rounding_mode = i->getRC();
status.float_exception_masks = float_all_exceptions_mask;
}