From d10fa93d89f3f094b0171f5daf730fed38566781 Mon Sep 17 00:00:00 2001 From: Stanislav Shwartsman Date: Fri, 14 Mar 2014 20:26:50 +0000 Subject: [PATCH] fixed to VSCALEF instruction + one more step in the implementation in the softfloat --- bochs/cpu/avx512_pfp.cc | 78 +++++++++++++++++++++++++++++++++++- bochs/cpu/cpu.h | 5 +++ bochs/cpu/fetchdecode_evex.h | 4 +- bochs/cpu/fpu/softfloat.cc | 21 ++++++++-- bochs/cpu/ia_opcodes.h | 5 +++ 5 files changed, 106 insertions(+), 7 deletions(-) diff --git a/bochs/cpu/avx512_pfp.cc b/bochs/cpu/avx512_pfp.cc index f29d22909..c991ab866 100644 --- a/bochs/cpu/avx512_pfp.cc +++ b/bochs/cpu/avx512_pfp.cc @@ -172,7 +172,7 @@ EVEX_OP_SCALAR_DOUBLE(VMULSD_MASK_VsdHpdWsdR, float64_mul) EVEX_OP_SCALAR_DOUBLE(VDIVSD_MASK_VsdHpdWsdR, float64_div) EVEX_OP_SCALAR_DOUBLE(VMINSD_MASK_VsdHpdWsdR, float64_min) EVEX_OP_SCALAR_DOUBLE(VMAXSD_MASK_VsdHpdWsdR, float64_max) -EVEX_OP_SCALAR_SINGLE(VSCALEFSD_MASK_VsdHpdWsdR, float64_scalef) +EVEX_OP_SCALAR_DOUBLE(VSCALEFSD_MASK_VsdHpdWsdR, float64_scalef) BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VSQRTPS_MASK_VpsWpsR(bxInstruction_c *i) { @@ -1114,4 +1114,80 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VRNDSCALESD_MASK_VsdHpdWsdIbR(bxIn BX_NEXT_INSTR(i); } +// scalef + +BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VSCALEFPS_VpsHpsWpsR(bxInstruction_c *i) +{ + BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2()); + unsigned len = i->getVL(); + + float_status_t status; + mxcsr_to_softfloat_status_word(status, MXCSR); + softfloat_status_word_rc_override(status, i); + + for (unsigned n=0; n < len; n++) { + xmm_scalefps(&op1.vmm128(n), &op2.vmm128(n), status); + } + + check_exceptionsSSE(get_exception_flags(status)); + + BX_WRITE_AVX_REGZ(i->dst(), op1, len); + + BX_NEXT_INSTR(i); +} + +BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VSCALEFPD_VpdHpdWpdR(bxInstruction_c *i) +{ + BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2()); + unsigned len = i->getVL(); + + float_status_t status; + mxcsr_to_softfloat_status_word(status, MXCSR); + softfloat_status_word_rc_override(status, i); + + for (unsigned n=0; n < len; n++) { + xmm_scalefpd(&op1.vmm128(n), &op2.vmm128(n), status); + } + + check_exceptionsSSE(get_exception_flags(status)); + + BX_WRITE_AVX_REGZ(i->dst(), op1, len); + + BX_NEXT_INSTR(i); +} + +BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VSCALEFSS_VssHpsWssR(bxInstruction_c *i) +{ + BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1()); + float32 op2 = BX_READ_XMM_REG_LO_DWORD(i->src2()); + + float_status_t status; + mxcsr_to_softfloat_status_word(status, MXCSR); + softfloat_status_word_rc_override(status, i); + + op1.xmm32u(0) = float32_scalef(op1.xmm32u(0), op2, status); + + check_exceptionsSSE(get_exception_flags(status)); + BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1); + + BX_NEXT_INSTR(i); +} + +BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VSCALEFSD_VsdHpdWsdR(bxInstruction_c *i) +{ + BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1()); + float64 op2 = BX_READ_XMM_REG_LO_QWORD(i->src2()); + + float_status_t status; + mxcsr_to_softfloat_status_word(status, MXCSR); + softfloat_status_word_rc_override(status, i); + + op1.xmm64u(0) = float64_scalef(op1.xmm64u(0), op2, status); + + check_exceptionsSSE(get_exception_flags(status)); + BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1); + + BX_NEXT_INSTR(i); +} + #endif diff --git a/bochs/cpu/cpu.h b/bochs/cpu/cpu.h index fc7e49817..2343e7f3d 100644 --- a/bochs/cpu/cpu.h +++ b/bochs/cpu/cpu.h @@ -3261,6 +3261,11 @@ public: // for now... BX_SMF BX_INSF_TYPE VRNDSCALESS_MASK_VssHpsWssIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); BX_SMF BX_INSF_TYPE VRNDSCALESD_MASK_VsdHpdWsdIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); + BX_SMF BX_INSF_TYPE VSCALEFPS_VpsHpsWpsR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); + BX_SMF BX_INSF_TYPE VSCALEFPD_VpdHpdWpdR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); + BX_SMF BX_INSF_TYPE VSCALEFSS_VssHpsWssR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); + BX_SMF BX_INSF_TYPE VSCALEFSD_VsdHpdWsdR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); + BX_SMF BX_INSF_TYPE VSCALEFPS_MASK_VpsHpsWpsR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); BX_SMF BX_INSF_TYPE VSCALEFPD_MASK_VpdHpdWpdR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); BX_SMF BX_INSF_TYPE VSCALEFSS_MASK_VssHpsWssR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); diff --git a/bochs/cpu/fetchdecode_evex.h b/bochs/cpu/fetchdecode_evex.h index e8b8ba2d7..9aa4bb88e 100644 --- a/bochs/cpu/fetchdecode_evex.h +++ b/bochs/cpu/fetchdecode_evex.h @@ -1250,9 +1250,9 @@ static const BxOpcodeInfo_t BxOpcodeTableEVEX[256*3*2] = { /* 2A */ { 0, BX_IA_ERROR }, // #UD /* 2B k0 */ { 0, BX_IA_ERROR }, /* 2B */ { 0, BX_IA_ERROR }, - /* 2C k0 */ { BxAliasVexW | BxPrefixSSE66, BX_IA_V512_VSCALEFPS_VpsHpsWps_Kmask }, + /* 2C k0 */ { BxAliasVexW | BxPrefixSSE66, BX_IA_V512_VSCALEFPS_VpsHpsWps }, /* 2C */ { BxAliasVexW | BxPrefixSSE66, BX_IA_V512_VSCALEFPS_VpsHpsWps_Kmask }, - /* 2D k0 */ { BxAliasVexW | BxPrefixSSE66, BX_IA_V512_VSCALEFSS_VssHpsWss_Kmask }, + /* 2D k0 */ { BxAliasVexW | BxPrefixSSE66, BX_IA_V512_VSCALEFSS_VssHpsWss }, /* 2D */ { BxAliasVexW | BxPrefixSSE66, BX_IA_V512_VSCALEFSS_VssHpsWss_Kmask }, /* 2E k0 */ { 0, BX_IA_ERROR }, /* 2E */ { 0, BX_IA_ERROR }, diff --git a/bochs/cpu/fpu/softfloat.cc b/bochs/cpu/fpu/softfloat.cc index 9c22f00be..a601dc977 100644 --- a/bochs/cpu/fpu/softfloat.cc +++ b/bochs/cpu/fpu/softfloat.cc @@ -213,8 +213,7 @@ Bit32s float32_to_int32(float32 a, float_status_t &status) if (get_denormals_are_zeros(status)) aSig = 0; } int shiftCount = 0xAF - aExp; - Bit64u aSig64 = aSig; - aSig64 <<= 32; + Bit64u aSig64 = Bit64u(aSig) << 32; if (0 < shiftCount) aSig64 = shift64RightJamming(aSig64, shiftCount); return roundAndPackInt32(aSign, aSig64, status); } @@ -760,12 +759,19 @@ float32 float32_scalef(float32 a, float32 b, float_status_t &status) return a; } + if ((bExp | bSig) == 0) return a; + if (bExp == 0xFF) { if (bSign) return packFloat32(aSign, 0, 0); return packFloat32(aSign, 0xFF, 0); } - return 0; // fixme + if (bExp > FLOAT32_EXP_BIAS + 16) { + // handle obvious overflow/underflow result + return roundAndPackFloat32(aSign, bSign ? -0x7F : 0xFF, aSig, status); + } + + return 0xdeadbeef; // fixme } /*---------------------------------------------------------------------------- @@ -1926,12 +1932,19 @@ float64 float64_scalef(float64 a, float64 b, float_status_t &status) return a; } + if ((bExp | bSig) == 0) return a; + if (bExp == 0x7FF) { if (bSign) return packFloat64(aSign, 0, 0); return packFloat64(aSign, 0x7FF, 0); } - return 0; // fixme + if (bExp > FLOAT64_EXP_BIAS + 16) { + // handle obvious overflow/underflow result + return roundAndPackFloat64(aSign, bSign ? -0x3FF : 0x7FF, aSig, status); + } + + return BX_CONST64(0xdeadbeefdeadbeef); // fixme } /*---------------------------------------------------------------------------- diff --git a/bochs/cpu/ia_opcodes.h b/bochs/cpu/ia_opcodes.h index b2f75e894..102f3e524 100644 --- a/bochs/cpu/ia_opcodes.h +++ b/bochs/cpu/ia_opcodes.h @@ -2952,6 +2952,11 @@ bx_define_opcode(BX_IA_V512_VGETMANTPD_VpdWpdIb_Kmask, &BX_CPU_C::LOAD_BROADCAST bx_define_opcode(BX_IA_V512_VGETMANTSS_VssHpsWssIb_Kmask, &BX_CPU_C::LOAD_Wss, &BX_CPU_C::VGETMANTSS_MASK_VssHpsWssIbR, BX_ISA_AVX512, OP_Vss, OP_Hps, OP_mVss, OP_Ib, BX_PREPARE_EVEX_NO_BROADCAST) bx_define_opcode(BX_IA_V512_VGETMANTSD_VsdHpdWsdIb_Kmask, &BX_CPU_C::LOAD_Wsd, &BX_CPU_C::VGETMANTSD_MASK_VsdHpdWsdIbR, BX_ISA_AVX512, OP_Vsd, OP_Hpd, OP_mVsd, OP_Ib, BX_PREPARE_EVEX_NO_BROADCAST) +bx_define_opcode(BX_IA_V512_VSCALEFPS_VpsHpsWps, &BX_CPU_C::LOAD_BROADCAST_VectorD, &BX_CPU_C::VSCALEFPS_VpsHpsWpsR, BX_ISA_AVX512, OP_Vps, OP_Hps, OP_mVps, OP_NONE, BX_PREPARE_EVEX) +bx_define_opcode(BX_IA_V512_VSCALEFPD_VpdHpdWpd, &BX_CPU_C::LOAD_BROADCAST_VectorQ, &BX_CPU_C::VSCALEFPD_VpdHpdWpdR, BX_ISA_AVX512, OP_Vpd, OP_Hpd, OP_mVpd, OP_NONE, BX_PREPARE_EVEX) +bx_define_opcode(BX_IA_V512_VSCALEFSS_VssHpsWss, &BX_CPU_C::LOAD_Wss, &BX_CPU_C::VSCALEFSS_VssHpsWssR, BX_ISA_AVX512, OP_Vss, OP_Hps, OP_mVss, OP_NONE, BX_PREPARE_EVEX_NO_BROADCAST) +bx_define_opcode(BX_IA_V512_VSCALEFSD_VsdHpdWsd, &BX_CPU_C::LOAD_Wsd, &BX_CPU_C::VSCALEFSD_VsdHpdWsdR, BX_ISA_AVX512, OP_Vsd, OP_Hpd, OP_mVsd, OP_NONE, BX_PREPARE_EVEX_NO_BROADCAST) + bx_define_opcode(BX_IA_V512_VSCALEFPS_VpsHpsWps_Kmask, &BX_CPU_C::LOAD_BROADCAST_MASK_VectorD, &BX_CPU_C::VSCALEFPS_MASK_VpsHpsWpsR, BX_ISA_AVX512, OP_Vps, OP_Hps, OP_mVps, OP_NONE, BX_PREPARE_EVEX) bx_define_opcode(BX_IA_V512_VSCALEFPD_VpdHpdWpd_Kmask, &BX_CPU_C::LOAD_BROADCAST_MASK_VectorQ, &BX_CPU_C::VSCALEFPD_MASK_VpdHpdWpdR, BX_ISA_AVX512, OP_Vpd, OP_Hpd, OP_mVpd, OP_NONE, BX_PREPARE_EVEX) bx_define_opcode(BX_IA_V512_VSCALEFSS_VssHpsWss_Kmask, &BX_CPU_C::LOAD_Wss, &BX_CPU_C::VSCALEFSS_MASK_VssHpsWssR, BX_ISA_AVX512, OP_Vss, OP_Hps, OP_mVss, OP_NONE, BX_PREPARE_EVEX_NO_BROADCAST)