diff --git a/bochs/cpu/avx512_pfp.cc b/bochs/cpu/avx512_pfp.cc index b28928e91..f9daee52a 100644 --- a/bochs/cpu/avx512_pfp.cc +++ b/bochs/cpu/avx512_pfp.cc @@ -367,6 +367,169 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCMPSS_MASK_KGbHssWssIbR(bxInstruc // convert +BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTSS2USI_GdWssR(bxInstruction_c *i) +{ + float32 op = BX_READ_XMM_REG_LO_DWORD(i->src()); + + float_status_t status; + mxcsr_to_softfloat_status_word(status, MXCSR); + softfloat_status_word_rc_override(status, i); + Bit32u result = float32_to_uint32(op, status); + check_exceptionsSSE(get_exception_flags(status)); + + BX_WRITE_32BIT_REGZ(i->dst(), result); + BX_NEXT_INSTR(i); +} + +BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTSS2USI_GqWssR(bxInstruction_c *i) +{ + float32 op = BX_READ_XMM_REG_LO_DWORD(i->src()); + + float_status_t status; + mxcsr_to_softfloat_status_word(status, MXCSR); + softfloat_status_word_rc_override(status, i); + Bit64u result = float32_to_uint64(op, status); + check_exceptionsSSE(get_exception_flags(status)); + + BX_WRITE_64BIT_REG(i->dst(), result); + BX_NEXT_INSTR(i); +} + +BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTSD2USI_GdWsdR(bxInstruction_c *i) +{ + float64 op = BX_READ_XMM_REG_LO_QWORD(i->src()); + + float_status_t status; + mxcsr_to_softfloat_status_word(status, MXCSR); + softfloat_status_word_rc_override(status, i); + Bit32u result = float64_to_uint32(op, status); + check_exceptionsSSE(get_exception_flags(status)); + + BX_WRITE_32BIT_REGZ(i->dst(), result); + BX_NEXT_INSTR(i); +} + +BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTSD2USI_GqWsdR(bxInstruction_c *i) +{ + float64 op = BX_READ_XMM_REG_LO_QWORD(i->src()); + + float_status_t status; + mxcsr_to_softfloat_status_word(status, MXCSR); + softfloat_status_word_rc_override(status, i); + Bit64u result = float64_to_uint64(op, status); + check_exceptionsSSE(get_exception_flags(status)); + + BX_WRITE_64BIT_REG(i->dst(), result); + BX_NEXT_INSTR(i); +} + +BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTPS2UDQ_VdqWpsR(bxInstruction_c *i) +{ + BxPackedAvxRegister op = BX_READ_AVX_REG(i->src()); + unsigned len = i->getVL(); + + float_status_t status; + mxcsr_to_softfloat_status_word(status, MXCSR); + softfloat_status_word_rc_override(status, i); + + for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) { + op.vmm32u(n) = float32_to_uint32(op.vmm32u(n), status); + } + + check_exceptionsSSE(get_exception_flags(status)); + BX_WRITE_AVX_REGZ(i->dst(), op, len); + + BX_NEXT_INSTR(i); +} + +BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTPS2UDQ_MASK_VdqWpsR(bxInstruction_c *i) +{ + BxPackedAvxRegister op = BX_READ_AVX_REG(i->src()); + unsigned len = i->getVL(), num_elements = DWORD_ELEMENTS(len); + + Bit32u opmask = BX_READ_16BIT_OPMASK(i->opmask()); + + float_status_t status; + mxcsr_to_softfloat_status_word(status, MXCSR); + softfloat_status_word_rc_override(status, i); + + for (unsigned n=0, mask = 0x1; n < num_elements; n++, mask <<= 1) { + if (opmask & mask) + op.vmm32u(n) = float32_to_uint32(op.vmm32u(n), status); + else + op.vmm32u(n) = 0; + } + + check_exceptionsSSE(get_exception_flags(status)); + + if (! i->isZeroMasking()) { + for (unsigned n=0; n < len; n++, opmask >>= 4) + xmm_blendps(&BX_READ_AVX_REG_LANE(i->dst(), n), &op.vmm128(n), opmask); + BX_CLEAR_AVX_REGZ(i->dst(), len); + } + else { + BX_WRITE_AVX_REGZ(i->dst(), op, len); + } + + BX_NEXT_INSTR(i); +} + +BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTPD2UDQ_VdqWpdR(bxInstruction_c *i) +{ + BxPackedAvxRegister op = BX_READ_AVX_REG(i->src()), result = BX_READ_AVX_REG(i->dst()); + unsigned len = i->getVL(); + + float_status_t status; + mxcsr_to_softfloat_status_word(status, MXCSR); + softfloat_status_word_rc_override(status, i); + + for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) { + result.vmm32u(n) = float64_to_uint32(op.vmm64u(n), status); + } + + check_exceptionsSSE(get_exception_flags(status)); + + if (len == BX_VL128) { + result.vmm64u(1) = 0; + BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), result.vmm128(0)); + } + else { + BX_WRITE_AVX_REGZ(i->dst(), result, len-1); // write half vector + } + + BX_NEXT_INSTR(i); +} + +BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTPD2UDQ_MASK_VdqWpdR(bxInstruction_c *i) +{ + BxPackedAvxRegister op = BX_READ_AVX_REG(i->src()), result = BX_READ_AVX_REG(i->dst()); + unsigned opmask = BX_READ_8BIT_OPMASK(i->opmask()); + unsigned len = i->getVL(); + + float_status_t status; + mxcsr_to_softfloat_status_word(status, MXCSR); + softfloat_status_word_rc_override(status, i); + + for (unsigned n=0; n < QWORD_ELEMENTS(len); n++, opmask >>= 1) { + if (opmask & 0x1) + result.vmm32u(n) = float64_to_uint32(op.vmm64u(n), status); + else if (i->isZeroMasking()) + result.vmm32u(n) = 0; + } + + check_exceptionsSSE(get_exception_flags(status)); + + if (len == BX_VL128) { + result.vmm64u(1) = 0; + BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), result.vmm128(0)); + } + else { + BX_WRITE_AVX_REGZ(i->dst(), result, len-1); // write half vector + } + + BX_NEXT_INSTR(i); +} + BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTUSI2SS_VssEdR(bxInstruction_c *i) { BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1()); @@ -427,12 +590,13 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTUSI2SD_VsdEqR(bxInstruction_c BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTUDQ2PS_VpsWdqR(bxInstruction_c *i) { BxPackedAvxRegister op = BX_READ_AVX_REG(i->src()); - unsigned len = i->getVL(), num_elements = DWORD_ELEMENTS(len); + unsigned len = i->getVL(); float_status_t status; mxcsr_to_softfloat_status_word(status, MXCSR); + softfloat_status_word_rc_override(status, i); - for (unsigned n=0; n < num_elements; n++) { + for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) { op.vmm32u(n) = uint32_to_float32(op.vmm32u(n), status); } @@ -451,14 +615,13 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTUDQ2PS_MASK_VpsWdqR(bxInstruct float_status_t status; mxcsr_to_softfloat_status_word(status, MXCSR); + softfloat_status_word_rc_override(status, i); for (unsigned n=0, mask = 0x1; n < num_elements; n++, mask <<= 1) { - if (opmask & mask) { + if (opmask & mask) op.vmm32u(n) = uint32_to_float32(op.vmm32u(n), status); - } - else { + else op.vmm32u(n) = 0; - } } check_exceptionsSSE(get_exception_flags(status)); @@ -498,12 +661,10 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTUDQ2PD_MASK_VpdWdqR(bxInstruct Bit32u opmask = BX_READ_8BIT_OPMASK(i->opmask()); for (unsigned n=0, mask = 0x1; n < num_elements; n++, mask <<= 1) { - if (opmask & mask) { + if (opmask & mask) result.vmm64u(n) = uint32_to_float64(op.ymm32u(n)); - } - else { + else result.vmm64u(n) = 0; - } } if (! i->isZeroMasking()) { diff --git a/bochs/cpu/avx_pfp.cc b/bochs/cpu/avx_pfp.cc index 7505ff83a..1669b28f4 100644 --- a/bochs/cpu/avx_pfp.cc +++ b/bochs/cpu/avx_pfp.cc @@ -142,7 +142,6 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTSI2SS_VssEdR(bxInstruction_c * check_exceptionsSSE(get_exception_flags(status)); BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1); - BX_NEXT_INSTR(i); } @@ -160,7 +159,6 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTSI2SS_VssEqR(bxInstruction_c * check_exceptionsSSE(get_exception_flags(status)); BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1); - BX_NEXT_INSTR(i); } @@ -188,7 +186,6 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTSI2SD_VsdEqR(bxInstruction_c * check_exceptionsSSE(get_exception_flags(status)); BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1); - BX_NEXT_INSTR(i); } @@ -243,7 +240,6 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VSQRTSS_VssHpsWssR(bxInstruction_c check_exceptionsSSE(get_exception_flags(status)); BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1); - BX_NEXT_INSTR(i); } @@ -260,7 +256,6 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VSQRTSD_VsdHpdWsdR(bxInstruction_c check_exceptionsSSE(get_exception_flags(status)); BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1); - BX_NEXT_INSTR(i); } @@ -287,7 +282,6 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VRSQRTSS_VssHpsWssR(bxInstruction_ op1.xmm32u(0) = approximate_rsqrt(op2); BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1); - BX_NEXT_INSTR(i); } @@ -314,7 +308,6 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VRCPSS_VssHpsWssR(bxInstruction_c op1.xmm32u(0) = approximate_rcp(op2); BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1); - BX_NEXT_INSTR(i); } @@ -477,8 +470,8 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VMULSD_VsdHpdWsdR(bxInstruction_c /* Opcode: VEX.0F 5A (VEX.W ignore, VEX.VVV #UD) */ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTPS2PD_VpdWpsR(bxInstruction_c *i) { - BxPackedYmmRegister result; - BxPackedXmmRegister op = BX_READ_XMM_REG(i->src()); + BxPackedAvxRegister result; + BxPackedYmmRegister op = BX_READ_YMM_REG(i->src()); unsigned len = i->getVL(); float_status_t status; @@ -486,35 +479,38 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTPS2PD_VpdWpsR(bxInstruction_c softfloat_status_word_rc_override(status, i); for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) { - result.ymm64u(n) = float32_to_float64(op.xmm32u(n), status); + result.vmm64u(n) = float32_to_float64(op.ymm32u(n), status); } check_exceptionsSSE(get_exception_flags(status)); - BX_WRITE_YMM_REGZ_VLEN(i->dst(), result, len); - + BX_WRITE_AVX_REGZ(i->dst(), result, len); BX_NEXT_INSTR(i); } /* Opcode: VEX.66.0F 5A (VEX.W ignore, VEX.VVV #UD) */ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTPD2PS_VpsWpdR(bxInstruction_c *i) { - BxPackedYmmRegister op = BX_READ_YMM_REG(i->src()); - BxPackedXmmRegister result; + BxPackedAvxRegister op = BX_READ_AVX_REG(i->src()), result; unsigned len = i->getVL(); - result.xmm64u(1) = 0; /* clear upper part of the result for case of VL128 */ - float_status_t status; mxcsr_to_softfloat_status_word(status, MXCSR); softfloat_status_word_rc_override(status, i); for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) { - result.xmm32u(n) = float64_to_float32(op.ymm64u(n), status); + result.vmm32u(n) = float64_to_float32(op.vmm64u(n), status); } check_exceptionsSSE(get_exception_flags(status)); - BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), result); + + if (len == BX_VL128) { + result.vmm64u(1) = 0; + BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), result.vmm128(0)); + } + else { + BX_WRITE_AVX_REGZ(i->dst(), result, len-1); // write half vector + } BX_NEXT_INSTR(i); } @@ -556,7 +552,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTSD2SS_VssWsdR(bxInstruction_c /* Opcode: VEX.NDS.0F 5B (VEX.W ignore, VEX.VVV #UD) */ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTDQ2PS_VpsWdqR(bxInstruction_c *i) { - BxPackedYmmRegister op = BX_READ_YMM_REG(i->src()); + BxPackedAvxRegister op = BX_READ_AVX_REG(i->src()); unsigned len = i->getVL(); float_status_t status; @@ -564,20 +560,19 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTDQ2PS_VpsWdqR(bxInstruction_c softfloat_status_word_rc_override(status, i); for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) { - op.ymm32u(n) = int32_to_float32(op.ymm32u(n), status); + op.vmm32u(n) = int32_to_float32(op.vmm32u(n), status); } check_exceptionsSSE(get_exception_flags(status)); - BX_WRITE_YMM_REGZ_VLEN(i->dst(), op, len); - + BX_WRITE_AVX_REGZ(i->dst(), op, len); BX_NEXT_INSTR(i); } /* Opcode: VEX.NDS.66.0F 5B (VEX.W ignore, VEX.VVV #UD) */ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTPS2DQ_VdqWpsR(bxInstruction_c *i) { - BxPackedYmmRegister op = BX_READ_YMM_REG(i->src()); + BxPackedAvxRegister op = BX_READ_AVX_REG(i->src()); unsigned len = i->getVL(); float_status_t status; @@ -585,20 +580,19 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTPS2DQ_VdqWpsR(bxInstruction_c softfloat_status_word_rc_override(status, i); for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) { - op.ymm32u(n) = float32_to_int32(op.ymm32u(n), status); + op.vmm32s(n) = float32_to_int32(op.vmm32u(n), status); } check_exceptionsSSE(get_exception_flags(status)); - BX_WRITE_YMM_REGZ_VLEN(i->dst(), op, len); - + BX_WRITE_AVX_REGZ(i->dst(), op, len); BX_NEXT_INSTR(i); } /* Opcode: VEX.NDS.F3.0F 5B (VEX.W ignore, VEX.VVV #UD) */ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTTPS2DQ_VdqWpsR(bxInstruction_c *i) { - BxPackedYmmRegister op = BX_READ_YMM_REG(i->src()); + BxPackedAvxRegister op = BX_READ_AVX_REG(i->src()); unsigned len = i->getVL(); float_status_t status; @@ -606,13 +600,12 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTTPS2DQ_VdqWpsR(bxInstruction_c softfloat_status_word_rc_override(status, i); for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) { - op.ymm32u(n) = float32_to_int32_round_to_zero(op.ymm32u(n), status); + op.vmm32s(n) = float32_to_int32_round_to_zero(op.vmm32u(n), status); } check_exceptionsSSE(get_exception_flags(status)); - BX_WRITE_YMM_REGZ_VLEN(i->dst(), op, len); - + BX_WRITE_AVX_REGZ(i->dst(), op, len); BX_NEXT_INSTR(i); } @@ -1146,22 +1139,26 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VADDSUBPS_VpsHpsWpsR(bxInstruction /* Opcode: VEX.66.0F.E6 (VEX.W ignore, VEX.VVV #UD) */ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTTPD2DQ_VqWpdR(bxInstruction_c *i) { - BxPackedYmmRegister op = BX_READ_YMM_REG(i->src()); - BxPackedXmmRegister result; + BxPackedAvxRegister op = BX_READ_AVX_REG(i->src()), result; unsigned len = i->getVL(); - result.xmm64u(1) = 0; /* clear upper part of the result for case of VL128 */ - float_status_t status; mxcsr_to_softfloat_status_word(status, MXCSR); softfloat_status_word_rc_override(status, i); for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) { - result.xmm32u(n) = float64_to_int32_round_to_zero(op.ymm64u(n), status); + result.vmm32s(n) = float64_to_int32_round_to_zero(op.vmm64u(n), status); } check_exceptionsSSE(get_exception_flags(status)); - BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), result); + + if (len == BX_VL128) { + result.vmm64u(1) = 0; + BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), result.vmm128(0)); + } + else { + BX_WRITE_AVX_REGZ(i->dst(), result, len-1); // write half vector + } BX_NEXT_INSTR(i); } @@ -1169,22 +1166,26 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTTPD2DQ_VqWpdR(bxInstruction_c /* Opcode: VEX.F2.0F.E6 (VEX.W ignore, VEX.VVV #UD) */ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTPD2DQ_VqWpdR(bxInstruction_c *i) { - BxPackedYmmRegister op = BX_READ_YMM_REG(i->src()); - BxPackedXmmRegister result; + BxPackedAvxRegister op = BX_READ_AVX_REG(i->src()), result; unsigned len = i->getVL(); - result.xmm64u(1) = 0; /* clear upper part of the result for case of VL128 */ - float_status_t status; mxcsr_to_softfloat_status_word(status, MXCSR); softfloat_status_word_rc_override(status, i); for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) { - result.xmm32u(n) = float64_to_int32(op.ymm64u(n), status); + result.vmm32s(n) = float64_to_int32(op.vmm64u(n), status); } check_exceptionsSSE(get_exception_flags(status)); - BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), result); + + if (len == BX_VL128) { + result.vmm64u(1) = 0; + BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), result.vmm128(0)); + } + else { + BX_WRITE_AVX_REGZ(i->dst(), result, len-1); // write half vector + } BX_NEXT_INSTR(i); } @@ -1192,16 +1193,15 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTPD2DQ_VqWpdR(bxInstruction_c * /* Opcode: VEX.F3.0F.E6 (VEX.W ignore, VEX.VVV #UD) */ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTDQ2PD_VpdWqR(bxInstruction_c *i) { - BxPackedYmmRegister result; - BxPackedXmmRegister op = BX_READ_XMM_REG(i->src()); + BxPackedAvxRegister result; + BxPackedYmmRegister op = BX_READ_YMM_REG(i->src()); unsigned len = i->getVL(); for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) { - result.ymm64u(n) = int32_to_float64(op.xmm32u(n)); + result.vmm64u(n) = int32_to_float64(op.ymm32u(n)); } - BX_WRITE_YMM_REGZ_VLEN(i->dst(), result, len); - + BX_WRITE_AVX_REGZ(i->dst(), result, len); BX_NEXT_INSTR(i); } diff --git a/bochs/cpu/cpu.h b/bochs/cpu/cpu.h index a6fb762ae..99c00a933 100644 --- a/bochs/cpu/cpu.h +++ b/bochs/cpu/cpu.h @@ -3230,11 +3230,21 @@ public: // for now... BX_SMF BX_INSF_TYPE VSQRTSS_MASK_VssHpsWssR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); BX_SMF BX_INSF_TYPE VSQRTSD_MASK_VsdHpdWsdR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); + BX_SMF BX_INSF_TYPE VCVTSS2USI_GdWssR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); + BX_SMF BX_INSF_TYPE VCVTSS2USI_GqWssR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); + BX_SMF BX_INSF_TYPE VCVTSD2USI_GdWsdR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); + BX_SMF BX_INSF_TYPE VCVTSD2USI_GqWsdR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); + BX_SMF BX_INSF_TYPE VCVTUSI2SD_VsdEdR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); BX_SMF BX_INSF_TYPE VCVTUSI2SS_VssEdR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); BX_SMF BX_INSF_TYPE VCVTUSI2SD_VsdEqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); BX_SMF BX_INSF_TYPE VCVTUSI2SS_VssEqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); + BX_SMF BX_INSF_TYPE VCVTPS2UDQ_VdqWpsR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); + BX_SMF BX_INSF_TYPE VCVTPS2UDQ_MASK_VdqWpsR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); + BX_SMF BX_INSF_TYPE VCVTPD2UDQ_VdqWpdR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); + BX_SMF BX_INSF_TYPE VCVTPD2UDQ_MASK_VdqWpdR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); + BX_SMF BX_INSF_TYPE VCVTUDQ2PS_VpsWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); BX_SMF BX_INSF_TYPE VCVTUDQ2PS_MASK_VpsWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); BX_SMF BX_INSF_TYPE VCVTUDQ2PD_VpdWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); diff --git a/bochs/cpu/fetchdecode_evex.h b/bochs/cpu/fetchdecode_evex.h index be86455a8..9b295d95e 100644 --- a/bochs/cpu/fetchdecode_evex.h +++ b/bochs/cpu/fetchdecode_evex.h @@ -329,6 +329,13 @@ static const BxOpcodeInfo_t BxOpcodeGroupEVEX_0f6f_Mask[3] = { /* F2 */ { 0, BX_IA_ERROR } }; +static const BxOpcodeInfo_t BxOpcodeGroupEVEX_0f79[4] = { + /* -- */ { BxAliasVexW, BX_IA_V512_VCVTPS2UDQ_VdqWps }, + /* 66 */ { 0, BX_IA_ERROR }, + /* F3 */ { BxAliasVexW64, BX_IA_V512_VCVTSS2USI_GdWss }, + /* F2 */ { BxAliasVexW64, BX_IA_V512_VCVTSD2USI_GdWsd } +}; + static const BxOpcodeInfo_t BxOpcodeGroupEVEX_0f7a[3] = { /* 66 */ { 0, BX_IA_ERROR }, /* F3 */ { BxVexW0, BX_IA_V512_VCVTUDQ2PD_VpdWdq }, @@ -712,8 +719,8 @@ static const BxOpcodeInfo_t BxOpcodeTableEVEX[256*3*2] = { /* 77 */ { 0, BX_IA_ERROR }, /* 78 k0 */ { 0, BX_IA_ERROR }, /* 78 */ { 0, BX_IA_ERROR }, - /* 79 k0 */ { 0, BX_IA_ERROR }, - /* 79 */ { 0, BX_IA_ERROR }, + /* 79 k0 */ { BxPrefixSSE4, BX_IA_ERROR, BxOpcodeGroupEVEX_0f79 }, + /* 79 */ { BxPrefixSSE, BX_IA_V512_VCVTPS2UDQ_VdqWps_Kmask, BxOpcodeGroupSSE_ERR }, /* 7A k0 */ { BxPrefixSSE, BX_IA_ERROR, BxOpcodeGroupEVEX_0f7a }, /* 7A */ { BxPrefixSSE, BX_IA_ERROR, BxOpcodeGroupEVEX_0f7a_Mask }, /* 7B k0 */ { BxPrefixSSE, BX_IA_ERROR, BxOpcodeGroupEVEX_0f7b }, diff --git a/bochs/cpu/ia_opcodes.h b/bochs/cpu/ia_opcodes.h index 28d0e7dd8..d86ef6add 100644 --- a/bochs/cpu/ia_opcodes.h +++ b/bochs/cpu/ia_opcodes.h @@ -2565,7 +2565,6 @@ bx_define_opcode(BX_IA_V512_VCOMISD_VsdWsd, &BX_CPU_C::LOAD_Wsd, &BX_CPU_C::COMI bx_define_opcode(BX_IA_V512_VCVTUDQ2PD_VpdWdq, &BX_CPU_C::LOAD_BROADCAST_VectorD, &BX_CPU_C::VCVTUDQ2PD_VpdWdqR, BX_ISA_AVX512, OP_Vpd, OP_Wdq, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE) bx_define_opcode(BX_IA_V512_VCVTUDQ2PS_VpsWdq, &BX_CPU_C::LOAD_BROADCAST_VectorD, &BX_CPU_C::VCVTUDQ2PS_VpsWdqR, BX_ISA_AVX512, OP_Vps, OP_Wdq, OP_NONE, OP_NONE, BX_PREPARE_EVEX) - bx_define_opcode(BX_IA_V512_VCVTUDQ2PD_VpdWdq_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorD, &BX_CPU_C::VCVTUDQ2PD_MASK_VpdWdqR, BX_ISA_AVX512, OP_Vpd, OP_Wdq, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE) bx_define_opcode(BX_IA_V512_VCVTUDQ2PS_VpsWdq_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorD, &BX_CPU_C::VCVTUDQ2PS_MASK_VpsWdqR, BX_ISA_AVX512, OP_Vps, OP_Wdq, OP_NONE, OP_NONE, BX_PREPARE_EVEX) @@ -2869,6 +2868,11 @@ bx_define_opcode(BX_IA_V512_VMOVDQA64_WdqVdq, &BX_CPU_C::VMOVAPS_WpsVpsM, &BX_CP bx_define_opcode(BX_IA_V512_VMOVDQA32_WdqVdq_Kmask, &BX_CPU_C::VMOVAPS_MASK_WpsVpsM, &BX_CPU_C::VMOVAPS_MASK_VpsWpsR, BX_ISA_AVX512, OP_Wdq, OP_Vdq, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST) bx_define_opcode(BX_IA_V512_VMOVDQA64_WdqVdq_Kmask, &BX_CPU_C::VMOVAPD_MASK_WpdVpdM, &BX_CPU_C::VMOVAPD_MASK_VpdWpdR, BX_ISA_AVX512, OP_Wdq, OP_Vdq, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST) +bx_define_opcode(BX_IA_V512_VCVTPS2UDQ_VdqWps, &BX_CPU_C::LOAD_BROADCAST_VectorD, &BX_CPU_C::VCVTPS2UDQ_VdqWpsR, BX_ISA_AVX512, OP_Vdq, OP_Wps, OP_NONE, OP_NONE, BX_PREPARE_EVEX) +bx_define_opcode(BX_IA_V512_VCVTPD2UDQ_VdqWpd, &BX_CPU_C::LOAD_BROADCAST_VectorQ, &BX_CPU_C::VCVTPD2UDQ_VdqWpdR, BX_ISA_AVX512, OP_Vdq, OP_Wpd, OP_NONE, OP_NONE, BX_PREPARE_EVEX) +bx_define_opcode(BX_IA_V512_VCVTPS2UDQ_VdqWps_Kmask, &BX_CPU_C::LOAD_BROADCAST_MASK_VectorD, &BX_CPU_C::VCVTPS2UDQ_VdqWpsR, BX_ISA_AVX512, OP_Vdq, OP_Wps, OP_NONE, OP_NONE, BX_PREPARE_EVEX) +bx_define_opcode(BX_IA_V512_VCVTPD2UDQ_VdqWpd_Kmask, &BX_CPU_C::LOAD_BROADCAST_MASK_VectorQ, &BX_CPU_C::VCVTPD2UDQ_VdqWpdR, BX_ISA_AVX512, OP_Vdq, OP_Wpd, OP_NONE, OP_NONE, BX_PREPARE_EVEX) + bx_define_opcode(BX_IA_V512_VFMADD132PS_VpsHpsWps, &BX_CPU_C::LOAD_BROADCAST_VectorD, &BX_CPU_C::VFMADDPS_VpsHpsWpsR, BX_ISA_AVX512, OP_Vps, OP_Vps, OP_Wps, OP_Hps, BX_PREPARE_EVEX) bx_define_opcode(BX_IA_V512_VFMADD132PD_VpdHpdWpd, &BX_CPU_C::LOAD_BROADCAST_VectorQ, &BX_CPU_C::VFMADDPD_VpdHpdWpdR, BX_ISA_AVX512, OP_Vpd, OP_Vpd, OP_Wpd, OP_Hpd, BX_PREPARE_EVEX) bx_define_opcode(BX_IA_V512_VFMADD213PS_VpsHpsWps, &BX_CPU_C::LOAD_BROADCAST_VectorD, &BX_CPU_C::VFMADDPS_VpsHpsWpsR, BX_ISA_AVX512, OP_Vps, OP_Hps, OP_Vps, OP_Wps, BX_PREPARE_EVEX) @@ -3113,6 +3117,11 @@ bx_define_opcode(BX_IA_V512_VCVTUSI2SS_VssEd, &BX_CPU_C::LOAD_Ed, &BX_CPU_C::VCV bx_define_opcode(BX_IA_V512_VCVTUSI2SS_VssEq, &BX_CPU_C::LOAD_Eq, &BX_CPU_C::VCVTUSI2SS_VssEqR, BX_ISA_AVX512, OP_Vss, OP_Hps, OP_Eq, OP_NONE, BX_PREPARE_EVEX_NO_BROADCAST) bx_define_opcode(BX_IA_V512_VCVTUSI2SD_VsdEd, &BX_CPU_C::LOAD_Ed, &BX_CPU_C::VCVTUSI2SD_VsdEdR, BX_ISA_AVX512, OP_Vsd, OP_Hpd, OP_Ed, OP_NONE, BX_PREPARE_EVEX_NO_BROADCAST | BX_PREPARE_EVEX_NO_SAE) bx_define_opcode(BX_IA_V512_VCVTUSI2SD_VsdEq, &BX_CPU_C::LOAD_Eq, &BX_CPU_C::VCVTUSI2SD_VsdEqR, BX_ISA_AVX512, OP_Vsd, OP_Hpd, OP_Eq, OP_NONE, BX_PREPARE_EVEX_NO_BROADCAST) + +bx_define_opcode(BX_IA_V512_VCVTSS2USI_GdWss, &BX_CPU_C::LOAD_Wss, &BX_CPU_C::VCVTSS2USI_GdWssR, BX_ISA_AVX512, OP_Gd, OP_Wss, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_BROADCAST) +bx_define_opcode(BX_IA_V512_VCVTSS2USI_GqWss, &BX_CPU_C::LOAD_Wss, &BX_CPU_C::VCVTSS2USI_GqWssR, BX_ISA_AVX512, OP_Gq, OP_Wss, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_BROADCAST) +bx_define_opcode(BX_IA_V512_VCVTSD2USI_GdWsd, &BX_CPU_C::LOAD_Wsd, &BX_CPU_C::VCVTSD2USI_GdWsdR, BX_ISA_AVX512, OP_Gd, OP_Wsd, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_BROADCAST) +bx_define_opcode(BX_IA_V512_VCVTSD2USI_GqWsd, &BX_CPU_C::LOAD_Wsd, &BX_CPU_C::VCVTSD2USI_GqWsdR, BX_ISA_AVX512, OP_Gq, OP_Wsd, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_BROADCAST) // VexW64 aliased #endif // BX_SUPPORT_EVEX diff --git a/bochs/cpu/sse_pfp.cc b/bochs/cpu/sse_pfp.cc index 1ae883a29..9fefa6043 100644 --- a/bochs/cpu/sse_pfp.cc +++ b/bochs/cpu/sse_pfp.cc @@ -2,7 +2,7 @@ // $Id$ ///////////////////////////////////////////////////////////////////////// // -// Copyright (c) 2003-2013 Stanislav Shwartsman +// Copyright (c) 2003-2014 Stanislav Shwartsman // Written by Stanislav Shwartsman [sshwarts at sourceforge net] // // This library is free software; you can redistribute it and/or @@ -277,8 +277,8 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTTPS2PI_PqWps(bxInstruction_c *i float_status_t status; mxcsr_to_softfloat_status_word(status, MXCSR); - MMXUD0(op) = float32_to_int32_round_to_zero(MMXUD0(op), status); - MMXUD1(op) = float32_to_int32_round_to_zero(MMXUD1(op), status); + MMXSD0(op) = float32_to_int32_round_to_zero(MMXUD0(op), status); + MMXSD1(op) = float32_to_int32_round_to_zero(MMXUD1(op), status); prepareFPU2MMX(); /* cause FPU2MMX state transition */ check_exceptionsSSE(get_exception_flags(status)); @@ -319,8 +319,8 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTTPD2PI_PqWpd(bxInstruction_c *i float_status_t status; mxcsr_to_softfloat_status_word(status, MXCSR); - MMXUD0(result) = float64_to_int32_round_to_zero(op.xmm64u(0), status); - MMXUD1(result) = float64_to_int32_round_to_zero(op.xmm64u(1), status); + MMXSD0(result) = float64_to_int32_round_to_zero(op.xmm64u(0), status); + MMXSD1(result) = float64_to_int32_round_to_zero(op.xmm64u(1), status); prepareFPU2MMX(); /* cause FPU2MMX state transition */ check_exceptionsSSE(get_exception_flags(status)); @@ -344,11 +344,10 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTTSD2SI_GdWsdR(bxInstruction_c * float_status_t status; mxcsr_to_softfloat_status_word(status, MXCSR); softfloat_status_word_rc_override(status, i); - - Bit32u result = float64_to_int32_round_to_zero(op, status); - + Bit32s result = float64_to_int32_round_to_zero(op, status); check_exceptionsSSE(get_exception_flags(status)); - BX_WRITE_32BIT_REGZ(i->dst(), result); + + BX_WRITE_32BIT_REGZ(i->dst(), (Bit32u) result); #endif BX_NEXT_INSTR(i); @@ -362,12 +361,10 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTTSD2SI_GqWsdR(bxInstruction_c * float_status_t status; mxcsr_to_softfloat_status_word(status, MXCSR); softfloat_status_word_rc_override(status, i); - - Bit64u result = float64_to_int64_round_to_zero(op, status); - + Bit64s result = float64_to_int64_round_to_zero(op, status); check_exceptionsSSE(get_exception_flags(status)); - BX_WRITE_64BIT_REG(i->dst(), result); + BX_WRITE_64BIT_REG(i->dst(), (Bit64u) result); BX_NEXT_INSTR(i); } #endif @@ -386,11 +383,10 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTTSS2SI_GdWssR(bxInstruction_c * float_status_t status; mxcsr_to_softfloat_status_word(status, MXCSR); softfloat_status_word_rc_override(status, i); - - Bit32u result = float32_to_int32_round_to_zero(op, status); - + Bit32s result = float32_to_int32_round_to_zero(op, status); check_exceptionsSSE(get_exception_flags(status)); - BX_WRITE_32BIT_REGZ(i->dst(), result); + + BX_WRITE_32BIT_REGZ(i->dst(), (Bit32u) result); #endif BX_NEXT_INSTR(i); @@ -404,12 +400,10 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTTSS2SI_GqWssR(bxInstruction_c * float_status_t status; mxcsr_to_softfloat_status_word(status, MXCSR); softfloat_status_word_rc_override(status, i); - - Bit64u result = float32_to_int64_round_to_zero(op, status); - + Bit64s result = float32_to_int64_round_to_zero(op, status); check_exceptionsSSE(get_exception_flags(status)); - BX_WRITE_64BIT_REG(i->dst(), result); + BX_WRITE_64BIT_REG(i->dst(), (Bit64u) result); BX_NEXT_INSTR(i); } #endif @@ -442,8 +436,8 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTPS2PI_PqWps(bxInstruction_c *i) float_status_t status; mxcsr_to_softfloat_status_word(status, MXCSR); - MMXUD0(op) = float32_to_int32(MMXUD0(op), status); - MMXUD1(op) = float32_to_int32(MMXUD1(op), status); + MMXSD0(op) = float32_to_int32(MMXUD0(op), status); + MMXSD1(op) = float32_to_int32(MMXUD1(op), status); prepareFPU2MMX(); /* cause FPU2MMX state transition */ check_exceptionsSSE(get_exception_flags(status)); @@ -487,8 +481,8 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTPD2PI_PqWpd(bxInstruction_c *i) float_status_t status; mxcsr_to_softfloat_status_word(status, MXCSR); - MMXUD0(result) = float64_to_int32(op.xmm64u(0), status); - MMXUD1(result) = float64_to_int32(op.xmm64u(1), status); + MMXSD0(result) = float64_to_int32(op.xmm64u(0), status); + MMXSD1(result) = float64_to_int32(op.xmm64u(1), status); prepareFPU2MMX(); /* cause FPU2MMX state transition */ check_exceptionsSSE(get_exception_flags(status)); @@ -513,11 +507,10 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTSD2SI_GdWsdR(bxInstruction_c *i float_status_t status; mxcsr_to_softfloat_status_word(status, MXCSR); softfloat_status_word_rc_override(status, i); - - Bit32u result = float64_to_int32(op, status); - + Bit32s result = float64_to_int32(op, status); check_exceptionsSSE(get_exception_flags(status)); - BX_WRITE_32BIT_REGZ(i->dst(), result); + + BX_WRITE_32BIT_REGZ(i->dst(), (Bit32u) result); #endif BX_NEXT_INSTR(i); @@ -531,12 +524,10 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTSD2SI_GqWsdR(bxInstruction_c *i float_status_t status; mxcsr_to_softfloat_status_word(status, MXCSR); softfloat_status_word_rc_override(status, i); - - Bit64u result = float64_to_int64(op, status); - + Bit64s result = float64_to_int64(op, status); check_exceptionsSSE(get_exception_flags(status)); - BX_WRITE_64BIT_REG(i->dst(), result); + BX_WRITE_64BIT_REG(i->dst(), (Bit64u) result); BX_NEXT_INSTR(i); } #endif @@ -556,11 +547,10 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTSS2SI_GdWssR(bxInstruction_c *i float_status_t status; mxcsr_to_softfloat_status_word(status, MXCSR); softfloat_status_word_rc_override(status, i); - - Bit32u result = float32_to_int32(op, status); - + Bit32s result = float32_to_int32(op, status); check_exceptionsSSE(get_exception_flags(status)); - BX_WRITE_32BIT_REGZ(i->dst(), result); + + BX_WRITE_32BIT_REGZ(i->dst(), (Bit32u) result); #endif BX_NEXT_INSTR(i); @@ -574,12 +564,10 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTSS2SI_GqWssR(bxInstruction_c *i float_status_t status; mxcsr_to_softfloat_status_word(status, MXCSR); softfloat_status_word_rc_override(status, i); - - Bit64u result = float32_to_int64(op, status); - + Bit64s result = float32_to_int64(op, status); check_exceptionsSSE(get_exception_flags(status)); - BX_WRITE_64BIT_REG(i->dst(), result); + BX_WRITE_64BIT_REG(i->dst(), (Bit64u) result); BX_NEXT_INSTR(i); } #endif @@ -721,10 +709,10 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTPS2DQ_VdqWpsR(bxInstruction_c * float_status_t status; mxcsr_to_softfloat_status_word(status, MXCSR); - op.xmm32u(0) = float32_to_int32(op.xmm32u(0), status); - op.xmm32u(1) = float32_to_int32(op.xmm32u(1), status); - op.xmm32u(2) = float32_to_int32(op.xmm32u(2), status); - op.xmm32u(3) = float32_to_int32(op.xmm32u(3), status); + op.xmm32s(0) = float32_to_int32(op.xmm32u(0), status); + op.xmm32s(1) = float32_to_int32(op.xmm32u(1), status); + op.xmm32s(2) = float32_to_int32(op.xmm32u(2), status); + op.xmm32s(3) = float32_to_int32(op.xmm32u(3), status); check_exceptionsSSE(get_exception_flags(status)); BX_WRITE_XMM_REG(i->dst(), op); @@ -747,10 +735,10 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTTPS2DQ_VdqWpsR(bxInstruction_c float_status_t status; mxcsr_to_softfloat_status_word(status, MXCSR); - op.xmm32u(0) = float32_to_int32_round_to_zero(op.xmm32u(0), status); - op.xmm32u(1) = float32_to_int32_round_to_zero(op.xmm32u(1), status); - op.xmm32u(2) = float32_to_int32_round_to_zero(op.xmm32u(2), status); - op.xmm32u(3) = float32_to_int32_round_to_zero(op.xmm32u(3), status); + op.xmm32s(0) = float32_to_int32_round_to_zero(op.xmm32u(0), status); + op.xmm32s(1) = float32_to_int32_round_to_zero(op.xmm32u(1), status); + op.xmm32s(2) = float32_to_int32_round_to_zero(op.xmm32u(2), status); + op.xmm32s(3) = float32_to_int32_round_to_zero(op.xmm32u(3), status); check_exceptionsSSE(get_exception_flags(status)); BX_WRITE_XMM_REG(i->dst(), op); @@ -773,8 +761,8 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTTPD2DQ_VqWpdR(bxInstruction_c * float_status_t status; mxcsr_to_softfloat_status_word(status, MXCSR); - op.xmm32u(0) = float64_to_int32_round_to_zero(op.xmm64u(0), status); - op.xmm32u(1) = float64_to_int32_round_to_zero(op.xmm64u(1), status); + op.xmm32s(0) = float64_to_int32_round_to_zero(op.xmm64u(0), status); + op.xmm32s(1) = float64_to_int32_round_to_zero(op.xmm64u(1), status); op.xmm64u(1) = 0; check_exceptionsSSE(get_exception_flags(status)); @@ -799,8 +787,8 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTPD2DQ_VqWpdR(bxInstruction_c *i float_status_t status; mxcsr_to_softfloat_status_word(status, MXCSR); - op.xmm32u(0) = float64_to_int32(op.xmm64u(0), status); - op.xmm32u(1) = float64_to_int32(op.xmm64u(1), status); + op.xmm32s(0) = float64_to_int32(op.xmm64u(0), status); + op.xmm32s(1) = float64_to_int32(op.xmm64u(1), status); op.xmm64u(1) = 0; check_exceptionsSSE(get_exception_flags(status));