fixed to VSCALEF instruction + one more step in the implementation in the softfloat

This commit is contained in:
Stanislav Shwartsman 2014-03-14 20:26:50 +00:00
parent 4a78dffb24
commit d10fa93d89
5 changed files with 106 additions and 7 deletions

View File

@ -172,7 +172,7 @@ EVEX_OP_SCALAR_DOUBLE(VMULSD_MASK_VsdHpdWsdR, float64_mul)
EVEX_OP_SCALAR_DOUBLE(VDIVSD_MASK_VsdHpdWsdR, float64_div)
EVEX_OP_SCALAR_DOUBLE(VMINSD_MASK_VsdHpdWsdR, float64_min)
EVEX_OP_SCALAR_DOUBLE(VMAXSD_MASK_VsdHpdWsdR, float64_max)
EVEX_OP_SCALAR_SINGLE(VSCALEFSD_MASK_VsdHpdWsdR, float64_scalef)
EVEX_OP_SCALAR_DOUBLE(VSCALEFSD_MASK_VsdHpdWsdR, float64_scalef)
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VSQRTPS_MASK_VpsWpsR(bxInstruction_c *i)
{
@ -1114,4 +1114,80 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VRNDSCALESD_MASK_VsdHpdWsdIbR(bxIn
BX_NEXT_INSTR(i);
}
// scalef
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VSCALEFPS_VpsHpsWpsR(bxInstruction_c *i)
{
BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2());
unsigned len = i->getVL();
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
softfloat_status_word_rc_override(status, i);
for (unsigned n=0; n < len; n++) {
xmm_scalefps(&op1.vmm128(n), &op2.vmm128(n), status);
}
check_exceptionsSSE(get_exception_flags(status));
BX_WRITE_AVX_REGZ(i->dst(), op1, len);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VSCALEFPD_VpdHpdWpdR(bxInstruction_c *i)
{
BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2());
unsigned len = i->getVL();
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
softfloat_status_word_rc_override(status, i);
for (unsigned n=0; n < len; n++) {
xmm_scalefpd(&op1.vmm128(n), &op2.vmm128(n), status);
}
check_exceptionsSSE(get_exception_flags(status));
BX_WRITE_AVX_REGZ(i->dst(), op1, len);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VSCALEFSS_VssHpsWssR(bxInstruction_c *i)
{
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
float32 op2 = BX_READ_XMM_REG_LO_DWORD(i->src2());
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
softfloat_status_word_rc_override(status, i);
op1.xmm32u(0) = float32_scalef(op1.xmm32u(0), op2, status);
check_exceptionsSSE(get_exception_flags(status));
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VSCALEFSD_VsdHpdWsdR(bxInstruction_c *i)
{
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
float64 op2 = BX_READ_XMM_REG_LO_QWORD(i->src2());
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
softfloat_status_word_rc_override(status, i);
op1.xmm64u(0) = float64_scalef(op1.xmm64u(0), op2, status);
check_exceptionsSSE(get_exception_flags(status));
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
BX_NEXT_INSTR(i);
}
#endif

View File

@ -3261,6 +3261,11 @@ public: // for now...
BX_SMF BX_INSF_TYPE VRNDSCALESS_MASK_VssHpsWssIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VRNDSCALESD_MASK_VsdHpdWsdIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VSCALEFPS_VpsHpsWpsR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VSCALEFPD_VpdHpdWpdR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VSCALEFSS_VssHpsWssR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VSCALEFSD_VsdHpdWsdR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VSCALEFPS_MASK_VpsHpsWpsR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VSCALEFPD_MASK_VpdHpdWpdR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VSCALEFSS_MASK_VssHpsWssR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);

View File

@ -1250,9 +1250,9 @@ static const BxOpcodeInfo_t BxOpcodeTableEVEX[256*3*2] = {
/* 2A */ { 0, BX_IA_ERROR }, // #UD
/* 2B k0 */ { 0, BX_IA_ERROR },
/* 2B */ { 0, BX_IA_ERROR },
/* 2C k0 */ { BxAliasVexW | BxPrefixSSE66, BX_IA_V512_VSCALEFPS_VpsHpsWps_Kmask },
/* 2C k0 */ { BxAliasVexW | BxPrefixSSE66, BX_IA_V512_VSCALEFPS_VpsHpsWps },
/* 2C */ { BxAliasVexW | BxPrefixSSE66, BX_IA_V512_VSCALEFPS_VpsHpsWps_Kmask },
/* 2D k0 */ { BxAliasVexW | BxPrefixSSE66, BX_IA_V512_VSCALEFSS_VssHpsWss_Kmask },
/* 2D k0 */ { BxAliasVexW | BxPrefixSSE66, BX_IA_V512_VSCALEFSS_VssHpsWss },
/* 2D */ { BxAliasVexW | BxPrefixSSE66, BX_IA_V512_VSCALEFSS_VssHpsWss_Kmask },
/* 2E k0 */ { 0, BX_IA_ERROR },
/* 2E */ { 0, BX_IA_ERROR },

View File

@ -213,8 +213,7 @@ Bit32s float32_to_int32(float32 a, float_status_t &status)
if (get_denormals_are_zeros(status)) aSig = 0;
}
int shiftCount = 0xAF - aExp;
Bit64u aSig64 = aSig;
aSig64 <<= 32;
Bit64u aSig64 = Bit64u(aSig) << 32;
if (0 < shiftCount) aSig64 = shift64RightJamming(aSig64, shiftCount);
return roundAndPackInt32(aSign, aSig64, status);
}
@ -760,12 +759,19 @@ float32 float32_scalef(float32 a, float32 b, float_status_t &status)
return a;
}
if ((bExp | bSig) == 0) return a;
if (bExp == 0xFF) {
if (bSign) return packFloat32(aSign, 0, 0);
return packFloat32(aSign, 0xFF, 0);
}
return 0; // fixme
if (bExp > FLOAT32_EXP_BIAS + 16) {
// handle obvious overflow/underflow result
return roundAndPackFloat32(aSign, bSign ? -0x7F : 0xFF, aSig, status);
}
return 0xdeadbeef; // fixme
}
/*----------------------------------------------------------------------------
@ -1926,12 +1932,19 @@ float64 float64_scalef(float64 a, float64 b, float_status_t &status)
return a;
}
if ((bExp | bSig) == 0) return a;
if (bExp == 0x7FF) {
if (bSign) return packFloat64(aSign, 0, 0);
return packFloat64(aSign, 0x7FF, 0);
}
return 0; // fixme
if (bExp > FLOAT64_EXP_BIAS + 16) {
// handle obvious overflow/underflow result
return roundAndPackFloat64(aSign, bSign ? -0x3FF : 0x7FF, aSig, status);
}
return BX_CONST64(0xdeadbeefdeadbeef); // fixme
}
/*----------------------------------------------------------------------------

View File

@ -2952,6 +2952,11 @@ bx_define_opcode(BX_IA_V512_VGETMANTPD_VpdWpdIb_Kmask, &BX_CPU_C::LOAD_BROADCAST
bx_define_opcode(BX_IA_V512_VGETMANTSS_VssHpsWssIb_Kmask, &BX_CPU_C::LOAD_Wss, &BX_CPU_C::VGETMANTSS_MASK_VssHpsWssIbR, BX_ISA_AVX512, OP_Vss, OP_Hps, OP_mVss, OP_Ib, BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VGETMANTSD_VsdHpdWsdIb_Kmask, &BX_CPU_C::LOAD_Wsd, &BX_CPU_C::VGETMANTSD_MASK_VsdHpdWsdIbR, BX_ISA_AVX512, OP_Vsd, OP_Hpd, OP_mVsd, OP_Ib, BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VSCALEFPS_VpsHpsWps, &BX_CPU_C::LOAD_BROADCAST_VectorD, &BX_CPU_C::VSCALEFPS_VpsHpsWpsR, BX_ISA_AVX512, OP_Vps, OP_Hps, OP_mVps, OP_NONE, BX_PREPARE_EVEX)
bx_define_opcode(BX_IA_V512_VSCALEFPD_VpdHpdWpd, &BX_CPU_C::LOAD_BROADCAST_VectorQ, &BX_CPU_C::VSCALEFPD_VpdHpdWpdR, BX_ISA_AVX512, OP_Vpd, OP_Hpd, OP_mVpd, OP_NONE, BX_PREPARE_EVEX)
bx_define_opcode(BX_IA_V512_VSCALEFSS_VssHpsWss, &BX_CPU_C::LOAD_Wss, &BX_CPU_C::VSCALEFSS_VssHpsWssR, BX_ISA_AVX512, OP_Vss, OP_Hps, OP_mVss, OP_NONE, BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VSCALEFSD_VsdHpdWsd, &BX_CPU_C::LOAD_Wsd, &BX_CPU_C::VSCALEFSD_VsdHpdWsdR, BX_ISA_AVX512, OP_Vsd, OP_Hpd, OP_mVsd, OP_NONE, BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VSCALEFPS_VpsHpsWps_Kmask, &BX_CPU_C::LOAD_BROADCAST_MASK_VectorD, &BX_CPU_C::VSCALEFPS_MASK_VpsHpsWpsR, BX_ISA_AVX512, OP_Vps, OP_Hps, OP_mVps, OP_NONE, BX_PREPARE_EVEX)
bx_define_opcode(BX_IA_V512_VSCALEFPD_VpdHpdWpd_Kmask, &BX_CPU_C::LOAD_BROADCAST_MASK_VectorQ, &BX_CPU_C::VSCALEFPD_MASK_VpdHpdWpdR, BX_ISA_AVX512, OP_Vpd, OP_Hpd, OP_mVpd, OP_NONE, BX_PREPARE_EVEX)
bx_define_opcode(BX_IA_V512_VSCALEFSS_VssHpsWss_Kmask, &BX_CPU_C::LOAD_Wss, &BX_CPU_C::VSCALEFSS_MASK_VssHpsWssR, BX_ISA_AVX512, OP_Vss, OP_Hps, OP_mVss, OP_NONE, BX_PREPARE_EVEX_NO_BROADCAST)