fixed to VSCALEF instruction + one more step in the implementation in the softfloat
This commit is contained in:
parent
4a78dffb24
commit
d10fa93d89
@ -172,7 +172,7 @@ EVEX_OP_SCALAR_DOUBLE(VMULSD_MASK_VsdHpdWsdR, float64_mul)
|
|||||||
EVEX_OP_SCALAR_DOUBLE(VDIVSD_MASK_VsdHpdWsdR, float64_div)
|
EVEX_OP_SCALAR_DOUBLE(VDIVSD_MASK_VsdHpdWsdR, float64_div)
|
||||||
EVEX_OP_SCALAR_DOUBLE(VMINSD_MASK_VsdHpdWsdR, float64_min)
|
EVEX_OP_SCALAR_DOUBLE(VMINSD_MASK_VsdHpdWsdR, float64_min)
|
||||||
EVEX_OP_SCALAR_DOUBLE(VMAXSD_MASK_VsdHpdWsdR, float64_max)
|
EVEX_OP_SCALAR_DOUBLE(VMAXSD_MASK_VsdHpdWsdR, float64_max)
|
||||||
EVEX_OP_SCALAR_SINGLE(VSCALEFSD_MASK_VsdHpdWsdR, float64_scalef)
|
EVEX_OP_SCALAR_DOUBLE(VSCALEFSD_MASK_VsdHpdWsdR, float64_scalef)
|
||||||
|
|
||||||
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VSQRTPS_MASK_VpsWpsR(bxInstruction_c *i)
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VSQRTPS_MASK_VpsWpsR(bxInstruction_c *i)
|
||||||
{
|
{
|
||||||
@ -1114,4 +1114,80 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VRNDSCALESD_MASK_VsdHpdWsdIbR(bxIn
|
|||||||
BX_NEXT_INSTR(i);
|
BX_NEXT_INSTR(i);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// scalef
|
||||||
|
|
||||||
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VSCALEFPS_VpsHpsWpsR(bxInstruction_c *i)
|
||||||
|
{
|
||||||
|
BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2());
|
||||||
|
unsigned len = i->getVL();
|
||||||
|
|
||||||
|
float_status_t status;
|
||||||
|
mxcsr_to_softfloat_status_word(status, MXCSR);
|
||||||
|
softfloat_status_word_rc_override(status, i);
|
||||||
|
|
||||||
|
for (unsigned n=0; n < len; n++) {
|
||||||
|
xmm_scalefps(&op1.vmm128(n), &op2.vmm128(n), status);
|
||||||
|
}
|
||||||
|
|
||||||
|
check_exceptionsSSE(get_exception_flags(status));
|
||||||
|
|
||||||
|
BX_WRITE_AVX_REGZ(i->dst(), op1, len);
|
||||||
|
|
||||||
|
BX_NEXT_INSTR(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VSCALEFPD_VpdHpdWpdR(bxInstruction_c *i)
|
||||||
|
{
|
||||||
|
BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2());
|
||||||
|
unsigned len = i->getVL();
|
||||||
|
|
||||||
|
float_status_t status;
|
||||||
|
mxcsr_to_softfloat_status_word(status, MXCSR);
|
||||||
|
softfloat_status_word_rc_override(status, i);
|
||||||
|
|
||||||
|
for (unsigned n=0; n < len; n++) {
|
||||||
|
xmm_scalefpd(&op1.vmm128(n), &op2.vmm128(n), status);
|
||||||
|
}
|
||||||
|
|
||||||
|
check_exceptionsSSE(get_exception_flags(status));
|
||||||
|
|
||||||
|
BX_WRITE_AVX_REGZ(i->dst(), op1, len);
|
||||||
|
|
||||||
|
BX_NEXT_INSTR(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VSCALEFSS_VssHpsWssR(bxInstruction_c *i)
|
||||||
|
{
|
||||||
|
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
|
||||||
|
float32 op2 = BX_READ_XMM_REG_LO_DWORD(i->src2());
|
||||||
|
|
||||||
|
float_status_t status;
|
||||||
|
mxcsr_to_softfloat_status_word(status, MXCSR);
|
||||||
|
softfloat_status_word_rc_override(status, i);
|
||||||
|
|
||||||
|
op1.xmm32u(0) = float32_scalef(op1.xmm32u(0), op2, status);
|
||||||
|
|
||||||
|
check_exceptionsSSE(get_exception_flags(status));
|
||||||
|
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
|
||||||
|
|
||||||
|
BX_NEXT_INSTR(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VSCALEFSD_VsdHpdWsdR(bxInstruction_c *i)
|
||||||
|
{
|
||||||
|
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
|
||||||
|
float64 op2 = BX_READ_XMM_REG_LO_QWORD(i->src2());
|
||||||
|
|
||||||
|
float_status_t status;
|
||||||
|
mxcsr_to_softfloat_status_word(status, MXCSR);
|
||||||
|
softfloat_status_word_rc_override(status, i);
|
||||||
|
|
||||||
|
op1.xmm64u(0) = float64_scalef(op1.xmm64u(0), op2, status);
|
||||||
|
|
||||||
|
check_exceptionsSSE(get_exception_flags(status));
|
||||||
|
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
|
||||||
|
|
||||||
|
BX_NEXT_INSTR(i);
|
||||||
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -3261,6 +3261,11 @@ public: // for now...
|
|||||||
BX_SMF BX_INSF_TYPE VRNDSCALESS_MASK_VssHpsWssIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
BX_SMF BX_INSF_TYPE VRNDSCALESS_MASK_VssHpsWssIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||||
BX_SMF BX_INSF_TYPE VRNDSCALESD_MASK_VsdHpdWsdIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
BX_SMF BX_INSF_TYPE VRNDSCALESD_MASK_VsdHpdWsdIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||||
|
|
||||||
|
BX_SMF BX_INSF_TYPE VSCALEFPS_VpsHpsWpsR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||||
|
BX_SMF BX_INSF_TYPE VSCALEFPD_VpdHpdWpdR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||||
|
BX_SMF BX_INSF_TYPE VSCALEFSS_VssHpsWssR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||||
|
BX_SMF BX_INSF_TYPE VSCALEFSD_VsdHpdWsdR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||||
|
|
||||||
BX_SMF BX_INSF_TYPE VSCALEFPS_MASK_VpsHpsWpsR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
BX_SMF BX_INSF_TYPE VSCALEFPS_MASK_VpsHpsWpsR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||||
BX_SMF BX_INSF_TYPE VSCALEFPD_MASK_VpdHpdWpdR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
BX_SMF BX_INSF_TYPE VSCALEFPD_MASK_VpdHpdWpdR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||||
BX_SMF BX_INSF_TYPE VSCALEFSS_MASK_VssHpsWssR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
BX_SMF BX_INSF_TYPE VSCALEFSS_MASK_VssHpsWssR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||||
|
@ -1250,9 +1250,9 @@ static const BxOpcodeInfo_t BxOpcodeTableEVEX[256*3*2] = {
|
|||||||
/* 2A */ { 0, BX_IA_ERROR }, // #UD
|
/* 2A */ { 0, BX_IA_ERROR }, // #UD
|
||||||
/* 2B k0 */ { 0, BX_IA_ERROR },
|
/* 2B k0 */ { 0, BX_IA_ERROR },
|
||||||
/* 2B */ { 0, BX_IA_ERROR },
|
/* 2B */ { 0, BX_IA_ERROR },
|
||||||
/* 2C k0 */ { BxAliasVexW | BxPrefixSSE66, BX_IA_V512_VSCALEFPS_VpsHpsWps_Kmask },
|
/* 2C k0 */ { BxAliasVexW | BxPrefixSSE66, BX_IA_V512_VSCALEFPS_VpsHpsWps },
|
||||||
/* 2C */ { BxAliasVexW | BxPrefixSSE66, BX_IA_V512_VSCALEFPS_VpsHpsWps_Kmask },
|
/* 2C */ { BxAliasVexW | BxPrefixSSE66, BX_IA_V512_VSCALEFPS_VpsHpsWps_Kmask },
|
||||||
/* 2D k0 */ { BxAliasVexW | BxPrefixSSE66, BX_IA_V512_VSCALEFSS_VssHpsWss_Kmask },
|
/* 2D k0 */ { BxAliasVexW | BxPrefixSSE66, BX_IA_V512_VSCALEFSS_VssHpsWss },
|
||||||
/* 2D */ { BxAliasVexW | BxPrefixSSE66, BX_IA_V512_VSCALEFSS_VssHpsWss_Kmask },
|
/* 2D */ { BxAliasVexW | BxPrefixSSE66, BX_IA_V512_VSCALEFSS_VssHpsWss_Kmask },
|
||||||
/* 2E k0 */ { 0, BX_IA_ERROR },
|
/* 2E k0 */ { 0, BX_IA_ERROR },
|
||||||
/* 2E */ { 0, BX_IA_ERROR },
|
/* 2E */ { 0, BX_IA_ERROR },
|
||||||
|
@ -213,8 +213,7 @@ Bit32s float32_to_int32(float32 a, float_status_t &status)
|
|||||||
if (get_denormals_are_zeros(status)) aSig = 0;
|
if (get_denormals_are_zeros(status)) aSig = 0;
|
||||||
}
|
}
|
||||||
int shiftCount = 0xAF - aExp;
|
int shiftCount = 0xAF - aExp;
|
||||||
Bit64u aSig64 = aSig;
|
Bit64u aSig64 = Bit64u(aSig) << 32;
|
||||||
aSig64 <<= 32;
|
|
||||||
if (0 < shiftCount) aSig64 = shift64RightJamming(aSig64, shiftCount);
|
if (0 < shiftCount) aSig64 = shift64RightJamming(aSig64, shiftCount);
|
||||||
return roundAndPackInt32(aSign, aSig64, status);
|
return roundAndPackInt32(aSign, aSig64, status);
|
||||||
}
|
}
|
||||||
@ -760,12 +759,19 @@ float32 float32_scalef(float32 a, float32 b, float_status_t &status)
|
|||||||
return a;
|
return a;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ((bExp | bSig) == 0) return a;
|
||||||
|
|
||||||
if (bExp == 0xFF) {
|
if (bExp == 0xFF) {
|
||||||
if (bSign) return packFloat32(aSign, 0, 0);
|
if (bSign) return packFloat32(aSign, 0, 0);
|
||||||
return packFloat32(aSign, 0xFF, 0);
|
return packFloat32(aSign, 0xFF, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0; // fixme
|
if (bExp > FLOAT32_EXP_BIAS + 16) {
|
||||||
|
// handle obvious overflow/underflow result
|
||||||
|
return roundAndPackFloat32(aSign, bSign ? -0x7F : 0xFF, aSig, status);
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0xdeadbeef; // fixme
|
||||||
}
|
}
|
||||||
|
|
||||||
/*----------------------------------------------------------------------------
|
/*----------------------------------------------------------------------------
|
||||||
@ -1926,12 +1932,19 @@ float64 float64_scalef(float64 a, float64 b, float_status_t &status)
|
|||||||
return a;
|
return a;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ((bExp | bSig) == 0) return a;
|
||||||
|
|
||||||
if (bExp == 0x7FF) {
|
if (bExp == 0x7FF) {
|
||||||
if (bSign) return packFloat64(aSign, 0, 0);
|
if (bSign) return packFloat64(aSign, 0, 0);
|
||||||
return packFloat64(aSign, 0x7FF, 0);
|
return packFloat64(aSign, 0x7FF, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0; // fixme
|
if (bExp > FLOAT64_EXP_BIAS + 16) {
|
||||||
|
// handle obvious overflow/underflow result
|
||||||
|
return roundAndPackFloat64(aSign, bSign ? -0x3FF : 0x7FF, aSig, status);
|
||||||
|
}
|
||||||
|
|
||||||
|
return BX_CONST64(0xdeadbeefdeadbeef); // fixme
|
||||||
}
|
}
|
||||||
|
|
||||||
/*----------------------------------------------------------------------------
|
/*----------------------------------------------------------------------------
|
||||||
|
@ -2952,6 +2952,11 @@ bx_define_opcode(BX_IA_V512_VGETMANTPD_VpdWpdIb_Kmask, &BX_CPU_C::LOAD_BROADCAST
|
|||||||
bx_define_opcode(BX_IA_V512_VGETMANTSS_VssHpsWssIb_Kmask, &BX_CPU_C::LOAD_Wss, &BX_CPU_C::VGETMANTSS_MASK_VssHpsWssIbR, BX_ISA_AVX512, OP_Vss, OP_Hps, OP_mVss, OP_Ib, BX_PREPARE_EVEX_NO_BROADCAST)
|
bx_define_opcode(BX_IA_V512_VGETMANTSS_VssHpsWssIb_Kmask, &BX_CPU_C::LOAD_Wss, &BX_CPU_C::VGETMANTSS_MASK_VssHpsWssIbR, BX_ISA_AVX512, OP_Vss, OP_Hps, OP_mVss, OP_Ib, BX_PREPARE_EVEX_NO_BROADCAST)
|
||||||
bx_define_opcode(BX_IA_V512_VGETMANTSD_VsdHpdWsdIb_Kmask, &BX_CPU_C::LOAD_Wsd, &BX_CPU_C::VGETMANTSD_MASK_VsdHpdWsdIbR, BX_ISA_AVX512, OP_Vsd, OP_Hpd, OP_mVsd, OP_Ib, BX_PREPARE_EVEX_NO_BROADCAST)
|
bx_define_opcode(BX_IA_V512_VGETMANTSD_VsdHpdWsdIb_Kmask, &BX_CPU_C::LOAD_Wsd, &BX_CPU_C::VGETMANTSD_MASK_VsdHpdWsdIbR, BX_ISA_AVX512, OP_Vsd, OP_Hpd, OP_mVsd, OP_Ib, BX_PREPARE_EVEX_NO_BROADCAST)
|
||||||
|
|
||||||
|
bx_define_opcode(BX_IA_V512_VSCALEFPS_VpsHpsWps, &BX_CPU_C::LOAD_BROADCAST_VectorD, &BX_CPU_C::VSCALEFPS_VpsHpsWpsR, BX_ISA_AVX512, OP_Vps, OP_Hps, OP_mVps, OP_NONE, BX_PREPARE_EVEX)
|
||||||
|
bx_define_opcode(BX_IA_V512_VSCALEFPD_VpdHpdWpd, &BX_CPU_C::LOAD_BROADCAST_VectorQ, &BX_CPU_C::VSCALEFPD_VpdHpdWpdR, BX_ISA_AVX512, OP_Vpd, OP_Hpd, OP_mVpd, OP_NONE, BX_PREPARE_EVEX)
|
||||||
|
bx_define_opcode(BX_IA_V512_VSCALEFSS_VssHpsWss, &BX_CPU_C::LOAD_Wss, &BX_CPU_C::VSCALEFSS_VssHpsWssR, BX_ISA_AVX512, OP_Vss, OP_Hps, OP_mVss, OP_NONE, BX_PREPARE_EVEX_NO_BROADCAST)
|
||||||
|
bx_define_opcode(BX_IA_V512_VSCALEFSD_VsdHpdWsd, &BX_CPU_C::LOAD_Wsd, &BX_CPU_C::VSCALEFSD_VsdHpdWsdR, BX_ISA_AVX512, OP_Vsd, OP_Hpd, OP_mVsd, OP_NONE, BX_PREPARE_EVEX_NO_BROADCAST)
|
||||||
|
|
||||||
bx_define_opcode(BX_IA_V512_VSCALEFPS_VpsHpsWps_Kmask, &BX_CPU_C::LOAD_BROADCAST_MASK_VectorD, &BX_CPU_C::VSCALEFPS_MASK_VpsHpsWpsR, BX_ISA_AVX512, OP_Vps, OP_Hps, OP_mVps, OP_NONE, BX_PREPARE_EVEX)
|
bx_define_opcode(BX_IA_V512_VSCALEFPS_VpsHpsWps_Kmask, &BX_CPU_C::LOAD_BROADCAST_MASK_VectorD, &BX_CPU_C::VSCALEFPS_MASK_VpsHpsWpsR, BX_ISA_AVX512, OP_Vps, OP_Hps, OP_mVps, OP_NONE, BX_PREPARE_EVEX)
|
||||||
bx_define_opcode(BX_IA_V512_VSCALEFPD_VpdHpdWpd_Kmask, &BX_CPU_C::LOAD_BROADCAST_MASK_VectorQ, &BX_CPU_C::VSCALEFPD_MASK_VpdHpdWpdR, BX_ISA_AVX512, OP_Vpd, OP_Hpd, OP_mVpd, OP_NONE, BX_PREPARE_EVEX)
|
bx_define_opcode(BX_IA_V512_VSCALEFPD_VpdHpdWpd_Kmask, &BX_CPU_C::LOAD_BROADCAST_MASK_VectorQ, &BX_CPU_C::VSCALEFPD_MASK_VpdHpdWpdR, BX_ISA_AVX512, OP_Vpd, OP_Hpd, OP_mVpd, OP_NONE, BX_PREPARE_EVEX)
|
||||||
bx_define_opcode(BX_IA_V512_VSCALEFSS_VssHpsWss_Kmask, &BX_CPU_C::LOAD_Wss, &BX_CPU_C::VSCALEFSS_MASK_VssHpsWssR, BX_ISA_AVX512, OP_Vss, OP_Hps, OP_mVss, OP_NONE, BX_PREPARE_EVEX_NO_BROADCAST)
|
bx_define_opcode(BX_IA_V512_VSCALEFSS_VssHpsWss_Kmask, &BX_CPU_C::LOAD_Wss, &BX_CPU_C::VSCALEFSS_MASK_VssHpsWssR, BX_ISA_AVX512, OP_Vss, OP_Hps, OP_mVss, OP_NONE, BX_PREPARE_EVEX_NO_BROADCAST)
|
||||||
|
Loading…
Reference in New Issue
Block a user