Bochs/bochs/cpu/avx512_cvt.cc
Stanislav Shwartsman 7775483d5e Implemented VCVTPS2PH AVX-512 instruction
Now only missed AVX-512 opcodes now are:

512.66.0F38.W0 2C VSCALEFPS
512.66.0F38.W1 2C VSCALEFPD
NDS.LIG.66.0F38.W0 2D VSCALESS
NDS.LIG.66.0F38.W1 2D VSCALESD

512.66.0F38.W0 4C VRCP14PS
512.66.0F38.W1 4C VRCP14PD
NDS.LIG.66.0F38.W0 4D VRCP14SS
NDS.LIG.66.0F38.W1 4D VRCP14SD
512.66.0F38.W0 4E VRSQRT14PS
512.66.0F38.W1 4E VRSQRT14PD
NDS.LIG.66.0F38.W0 4F VRSQRT14SS
NDS.LIG.66.0F38.W1 4F VRSQRT14SD

512.66.0F3A.W0 08 VRNDSCALEPS
512.66.0F3A.W1 09 VRNDSCALEPD
NDS.LIG.66.0F3A.W1 0A VRNDSCALESS
NDS.LIG.66.0F3A.W1 0B VRNDSCALESD
2014-02-15 19:21:08 +00:00

638 lines
22 KiB
C++

/////////////////////////////////////////////////////////////////////////
// $Id$
/////////////////////////////////////////////////////////////////////////
//
// Copyright (c) 2013-2014 Stanislav Shwartsman
// Written by Stanislav Shwartsman [sshwarts at sourceforge net]
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA B 02110-1301 USA
//
/////////////////////////////////////////////////////////////////////////
#define NEED_CPU_REG_SHORTCUTS 1
#include "bochs.h"
#include "cpu.h"
#define LOG_THIS BX_CPU_THIS_PTR
#if BX_SUPPORT_EVEX
extern void mxcsr_to_softfloat_status_word(float_status_t &status, bx_mxcsr_t mxcsr);
#include "simd_int.h"
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTSS2USI_GdWssR(bxInstruction_c *i)
{
float32 op = BX_READ_XMM_REG_LO_DWORD(i->src());
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
softfloat_status_word_rc_override(status, i);
Bit32u result = float32_to_uint32(op, status);
check_exceptionsSSE(get_exception_flags(status));
BX_WRITE_32BIT_REGZ(i->dst(), result);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTSS2USI_GqWssR(bxInstruction_c *i)
{
float32 op = BX_READ_XMM_REG_LO_DWORD(i->src());
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
softfloat_status_word_rc_override(status, i);
Bit64u result = float32_to_uint64(op, status);
check_exceptionsSSE(get_exception_flags(status));
BX_WRITE_64BIT_REG(i->dst(), result);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTTSS2USI_GdWssR(bxInstruction_c *i)
{
float32 op = BX_READ_XMM_REG_LO_DWORD(i->src());
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
softfloat_status_word_rc_override(status, i);
Bit32u result = float32_to_uint32_round_to_zero(op, status);
check_exceptionsSSE(get_exception_flags(status));
BX_WRITE_32BIT_REGZ(i->dst(), result);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTTSS2USI_GqWssR(bxInstruction_c *i)
{
float32 op = BX_READ_XMM_REG_LO_DWORD(i->src());
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
softfloat_status_word_rc_override(status, i);
Bit64u result = float32_to_uint64_round_to_zero(op, status);
check_exceptionsSSE(get_exception_flags(status));
BX_WRITE_64BIT_REG(i->dst(), result);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTSD2USI_GdWsdR(bxInstruction_c *i)
{
float64 op = BX_READ_XMM_REG_LO_QWORD(i->src());
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
softfloat_status_word_rc_override(status, i);
Bit32u result = float64_to_uint32(op, status);
check_exceptionsSSE(get_exception_flags(status));
BX_WRITE_32BIT_REGZ(i->dst(), result);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTSD2USI_GqWsdR(bxInstruction_c *i)
{
float64 op = BX_READ_XMM_REG_LO_QWORD(i->src());
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
softfloat_status_word_rc_override(status, i);
Bit64u result = float64_to_uint64(op, status);
check_exceptionsSSE(get_exception_flags(status));
BX_WRITE_64BIT_REG(i->dst(), result);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTTSD2USI_GdWsdR(bxInstruction_c *i)
{
float64 op = BX_READ_XMM_REG_LO_QWORD(i->src());
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
softfloat_status_word_rc_override(status, i);
Bit32u result = float64_to_uint32_round_to_zero(op, status);
check_exceptionsSSE(get_exception_flags(status));
BX_WRITE_32BIT_REGZ(i->dst(), result);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTTSD2USI_GqWsdR(bxInstruction_c *i)
{
float64 op = BX_READ_XMM_REG_LO_QWORD(i->src());
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
softfloat_status_word_rc_override(status, i);
Bit64u result = float64_to_uint64_round_to_zero(op, status);
check_exceptionsSSE(get_exception_flags(status));
BX_WRITE_64BIT_REG(i->dst(), result);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTUSI2SS_VssEdR(bxInstruction_c *i)
{
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
softfloat_status_word_rc_override(status, i);
op1.xmm32u(0) = uint32_to_float32(BX_READ_32BIT_REG(i->src2()), status);
check_exceptionsSSE(get_exception_flags(status));
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTUSI2SS_VssEqR(bxInstruction_c *i)
{
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
softfloat_status_word_rc_override(status, i);
op1.xmm32u(0) = uint64_to_float32(BX_READ_64BIT_REG(i->src2()), status);
check_exceptionsSSE(get_exception_flags(status));
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTUSI2SD_VsdEdR(bxInstruction_c *i)
{
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
op1.xmm64u(0) = uint32_to_float64(BX_READ_32BIT_REG(i->src2()));
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTUSI2SD_VsdEqR(bxInstruction_c *i)
{
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
softfloat_status_word_rc_override(status, i);
op1.xmm64u(0) = uint64_to_float64(BX_READ_64BIT_REG(i->src2()), status);
check_exceptionsSSE(get_exception_flags(status));
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTSD2SS_MASK_VssWsdR(bxInstruction_c *i)
{
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
if (BX_SCALAR_ELEMENT_MASK(i->opmask())) {
float64 op2 = BX_READ_XMM_REG_LO_QWORD(i->src2());
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
softfloat_status_word_rc_override(status, i);
op1.xmm32u(0) = float64_to_float32(op2, status);
check_exceptionsSSE(get_exception_flags(status));
}
else {
if (i->isZeroMasking())
op1.xmm32u(0) = 0;
else
op1.xmm32u(0) = BX_READ_XMM_REG_LO_DWORD(i->dst());
}
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTSS2SD_MASK_VsdWssR(bxInstruction_c *i)
{
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
if (BX_SCALAR_ELEMENT_MASK(i->opmask())) {
float32 op2 = BX_READ_XMM_REG_LO_DWORD(i->src2());
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
softfloat_status_word_rc_override(status, i);
op1.xmm64u(0) = float32_to_float64(op2, status);
check_exceptionsSSE(get_exception_flags(status));
}
else {
if (i->isZeroMasking())
op1.xmm64u(0) = 0;
else
op1.xmm64u(0) = BX_READ_XMM_REG_LO_QWORD(i->dst());
}
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
BX_NEXT_INSTR(i);
}
#define AVX512_CVT64_TO_32(HANDLER, func) \
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C:: HANDLER (bxInstruction_c *i) \
{ \
BxPackedAvxRegister op = BX_READ_AVX_REG(i->src()), result = BX_READ_AVX_REG(i->dst()); \
unsigned opmask = BX_READ_8BIT_OPMASK(i->opmask()); \
unsigned len = i->getVL(); \
\
float_status_t status; \
mxcsr_to_softfloat_status_word(status, MXCSR); \
softfloat_status_word_rc_override(status, i); \
\
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++, opmask >>= 1) { \
if (opmask & 0x1) \
result.vmm32u(n) = (func)(op.vmm64u(n), status); \
else if (i->isZeroMasking()) \
result.vmm32u(n) = 0; \
} \
\
check_exceptionsSSE(get_exception_flags(status)); \
\
if (len == BX_VL128) { \
BX_WRITE_XMM_REG_LO_QWORD_CLEAR_HIGH(i->dst(), result.vmm64u(0)); \
} \
else { \
BX_WRITE_AVX_REGZ(i->dst(), result, len >> 1); /* write half vector */ \
} \
\
BX_NEXT_INSTR(i); \
}
AVX512_CVT64_TO_32(VCVTPD2PS_MASK_VpsWpdR, float64_to_float32)
AVX512_CVT64_TO_32(VCVTPD2DQ_MASK_VdqWpdR, float64_to_int32)
AVX512_CVT64_TO_32(VCVTTPD2DQ_MASK_VdqWpdR, float64_to_int32_round_to_zero)
AVX512_CVT64_TO_32(VCVTPD2UDQ_MASK_VdqWpdR, float64_to_uint32)
AVX512_CVT64_TO_32(VCVTTPD2UDQ_MASK_VdqWpdR, float64_to_uint32_round_to_zero)
#define AVX512_CVT32_TO_32(HANDLER, func) \
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C:: HANDLER (bxInstruction_c *i) \
{ \
BxPackedAvxRegister op = BX_READ_AVX_REG(i->src()); \
unsigned len = i->getVL(); \
\
Bit32u opmask = BX_READ_16BIT_OPMASK(i->opmask()); \
\
float_status_t status; \
mxcsr_to_softfloat_status_word(status, MXCSR); \
softfloat_status_word_rc_override(status, i); \
\
for (unsigned n=0, mask = 0x1; n < DWORD_ELEMENTS(len); n++, mask <<= 1) { \
if (opmask & mask) \
op.vmm32u(n) = (func)(op.vmm32u(n), status); \
else \
op.vmm32u(n) = 0; \
} \
\
check_exceptionsSSE(get_exception_flags(status)); \
\
if (! i->isZeroMasking()) { \
for (unsigned n=0; n < len; n++, opmask >>= 4) \
xmm_blendps(&BX_READ_AVX_REG_LANE(i->dst(), n), &op.vmm128(n), opmask); \
BX_CLEAR_AVX_REGZ(i->dst(), len); \
} \
else { \
BX_WRITE_AVX_REGZ(i->dst(), op, len); \
} \
\
BX_NEXT_INSTR(i); \
}
AVX512_CVT32_TO_32(VCVTPS2DQ_MASK_VdqWpsR, float32_to_int32)
AVX512_CVT32_TO_32(VCVTTPS2DQ_MASK_VdqWpsR, float32_to_int32_round_to_zero)
AVX512_CVT32_TO_32(VCVTPS2UDQ_MASK_VdqWpsR, float32_to_uint32)
AVX512_CVT32_TO_32(VCVTTPS2UDQ_MASK_VdqWpsR, float32_to_uint32_round_to_zero)
AVX512_CVT32_TO_32(VCVTDQ2PS_MASK_VpsWdqR, int32_to_float32)
AVX512_CVT32_TO_32(VCVTUDQ2PS_MASK_VpsWdqR, uint32_to_float32)
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTPS2UDQ_VdqWpsR(bxInstruction_c *i)
{
BxPackedAvxRegister op = BX_READ_AVX_REG(i->src());
unsigned len = i->getVL();
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
softfloat_status_word_rc_override(status, i);
for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
op.vmm32u(n) = float32_to_uint32(op.vmm32u(n), status);
}
check_exceptionsSSE(get_exception_flags(status));
BX_WRITE_AVX_REGZ(i->dst(), op, len);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTTPS2UDQ_VdqWpsR(bxInstruction_c *i)
{
BxPackedAvxRegister op = BX_READ_AVX_REG(i->src());
unsigned len = i->getVL();
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
softfloat_status_word_rc_override(status, i);
for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
op.vmm32u(n) = float32_to_uint32_round_to_zero(op.vmm32u(n), status);
}
check_exceptionsSSE(get_exception_flags(status));
BX_WRITE_AVX_REGZ(i->dst(), op, len);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTPD2UDQ_VdqWpdR(bxInstruction_c *i)
{
BxPackedAvxRegister op = BX_READ_AVX_REG(i->src()), result;
unsigned len = i->getVL();
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
softfloat_status_word_rc_override(status, i);
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
result.vmm32u(n) = float64_to_uint32(op.vmm64u(n), status);
}
check_exceptionsSSE(get_exception_flags(status));
if (len == BX_VL128) {
BX_WRITE_XMM_REG_LO_QWORD_CLEAR_HIGH(i->dst(), result.vmm64u(0));
}
else {
BX_WRITE_AVX_REGZ(i->dst(), result, len >> 1); // write half vector
}
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTTPD2UDQ_VdqWpdR(bxInstruction_c *i)
{
BxPackedAvxRegister op = BX_READ_AVX_REG(i->src()), result;
unsigned len = i->getVL();
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
softfloat_status_word_rc_override(status, i);
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
result.vmm32u(n) = float64_to_uint32_round_to_zero(op.vmm64u(n), status);
}
check_exceptionsSSE(get_exception_flags(status));
if (len == BX_VL128) {
BX_WRITE_XMM_REG_LO_QWORD_CLEAR_HIGH(i->dst(), result.vmm64u(0));
}
else {
BX_WRITE_AVX_REGZ(i->dst(), result, len >> 1); // write half vector
}
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTUDQ2PS_VpsWdqR(bxInstruction_c *i)
{
BxPackedAvxRegister op = BX_READ_AVX_REG(i->src());
unsigned len = i->getVL();
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
softfloat_status_word_rc_override(status, i);
for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
op.vmm32u(n) = uint32_to_float32(op.vmm32u(n), status);
}
check_exceptionsSSE(get_exception_flags(status));
BX_WRITE_AVX_REGZ(i->dst(), op, len);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTUDQ2PD_VpdWdqR(bxInstruction_c *i)
{
BxPackedYmmRegister op = BX_READ_YMM_REG(i->src());
BxPackedAvxRegister result;
unsigned len = i->getVL();
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
result.vmm64u(n) = uint32_to_float64(op.ymm32u(n));
}
BX_WRITE_AVX_REGZ(i->dst(), result, len);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTUDQ2PD_MASK_VpdWdqR(bxInstruction_c *i)
{
BxPackedYmmRegister op = BX_READ_YMM_REG(i->src());
BxPackedAvxRegister result;
unsigned len = i->getVL();
Bit32u opmask = BX_READ_8BIT_OPMASK(i->opmask());
for (unsigned n=0, mask = 0x1; n < QWORD_ELEMENTS(len); n++, mask <<= 1) {
if (opmask & mask)
result.vmm64u(n) = uint32_to_float64(op.ymm32u(n));
else
result.vmm64u(n) = 0;
}
if (! i->isZeroMasking()) {
for (unsigned n=0; n < len; n++, opmask >>= 2)
xmm_blendpd(&BX_READ_AVX_REG_LANE(i->dst(), n), &result.vmm128(n), opmask);
BX_CLEAR_AVX_REGZ(i->dst(), len);
}
else {
BX_WRITE_AVX_REGZ(i->dst(), result, len);
}
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTPS2PD_MASK_VpdWpsR(bxInstruction_c *i)
{
BxPackedAvxRegister result;
BxPackedYmmRegister op = BX_READ_YMM_REG(i->src());
unsigned opmask = BX_READ_8BIT_OPMASK(i->opmask());
unsigned len = i->getVL();
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
softfloat_status_word_rc_override(status, i);
for (unsigned n=0, tmp_mask = opmask; n < QWORD_ELEMENTS(len); n++, tmp_mask >>= 1) {
if (tmp_mask & 0x1)
result.vmm64u(n) = float32_to_float64(op.ymm32u(n), status);
else
result.vmm64u(n) = 0;
}
check_exceptionsSSE(get_exception_flags(status));
if (! i->isZeroMasking()) {
for (unsigned n=0; n < len; n++, opmask >>= 2)
xmm_blendpd(&BX_READ_AVX_REG_LANE(i->dst(), n), &result.vmm128(n), opmask);
BX_CLEAR_AVX_REGZ(i->dst(), len);
}
else {
BX_WRITE_AVX_REGZ(i->dst(), result, len);
}
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTDQ2PD_MASK_VpdWdqR(bxInstruction_c *i)
{
BxPackedYmmRegister op = BX_READ_YMM_REG(i->src());
BxPackedAvxRegister result;
unsigned len = i->getVL();
Bit32u opmask = BX_READ_8BIT_OPMASK(i->opmask());
for (unsigned n=0, mask = 0x1; n < QWORD_ELEMENTS(len); n++, mask <<= 1) {
if (opmask & mask)
result.vmm64u(n) = int32_to_float64(op.ymm32s(n));
else
result.vmm64u(n) = 0;
}
if (! i->isZeroMasking()) {
for (unsigned n=0; n < len; n++, opmask >>= 2)
xmm_blendpd(&BX_READ_AVX_REG_LANE(i->dst(), n), &result.vmm128(n), opmask);
BX_CLEAR_AVX_REGZ(i->dst(), len);
}
else {
BX_WRITE_AVX_REGZ(i->dst(), result, len);
}
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTPH2PS_MASK_VpsWpsR(bxInstruction_c *i)
{
BxPackedAvxRegister result;
BxPackedYmmRegister op = BX_READ_YMM_REG(i->src());
unsigned len = i->getVL();
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
status.denormals_are_zeros = 0; // ignore MXCSR.DAZ
// no denormal exception is reported on MXCSR
status.float_suppress_exception = float_flag_denormal;
Bit32u opmask = BX_READ_16BIT_OPMASK(i->opmask());
for (unsigned n=0, mask = 0x1; n < DWORD_ELEMENTS(len); n++, mask <<= 1) {
if (opmask & mask)
result.vmm32u(n) = float16_to_float32(op.ymm16u(n), status);
else
result.vmm32u(n) = 0;
}
check_exceptionsSSE(get_exception_flags(status));
if (! i->isZeroMasking()) {
for (unsigned n=0; n < len; n++, opmask >>= 4)
xmm_blendps(&BX_READ_AVX_REG_LANE(i->dst(), n), &result.vmm128(n), opmask);
BX_CLEAR_AVX_REGZ(i->dst(), len);
}
else {
BX_WRITE_AVX_REGZ(i->dst(), result, len);
}
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTPS2PH_MASK_WpsVpsIbR(bxInstruction_c *i)
{
BxPackedAvxRegister op = BX_READ_AVX_REG(i->src()), dst = BX_READ_AVX_REG(i->dst());
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
unsigned len = i->getVL();
Bit8u control = i->Ib();
status.flush_underflow_to_zero = 0; // ignore MXCSR.FUZ
// override MXCSR rounding mode with control coming from imm8
if ((control & 0x4) == 0)
status.float_rounding_mode = control & 0x3;
Bit32u opmask = BX_READ_16BIT_OPMASK(i->opmask());
for (unsigned n=0, mask = 0x1; n < DWORD_ELEMENTS(len); n++, mask <<= 1) {
if (opmask & mask)
dst.vmm16u(n) = float32_to_float16(op.vmm32u(n), status);
else if (i->isZeroMasking())
dst.vmm16u(n) = 0;
}
check_exceptionsSSE(get_exception_flags(status));
if (len == BX_VL128) {
BX_WRITE_XMM_REG_LO_QWORD_CLEAR_HIGH(i->dst(), dst.vmm64u(0));
}
else {
BX_WRITE_AVX_REGZ(i->dst(), dst, len >> 1); // write half vector
}
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTPS2PH_MASK_WpsVpsIbM(bxInstruction_c *i)
{
BxPackedAvxRegister op = BX_READ_AVX_REG(i->src()), result;
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
unsigned len = i->getVL();
Bit8u control = i->Ib();
status.flush_underflow_to_zero = 0; // ignore MXCSR.FUZ
// override MXCSR rounding mode with control coming from imm8
if ((control & 0x4) == 0)
status.float_rounding_mode = control & 0x3;
Bit32u opmask = BX_READ_16BIT_OPMASK(i->opmask());
opmask &= (1 << DWORD_ELEMENTS(len)) - 1;
for (unsigned n=0, mask = 0x1; n < DWORD_ELEMENTS(len); n++, mask <<= 1) {
if (opmask & mask)
result.vmm16u(n) = float32_to_float16(op.vmm32u(n), status);
}
check_exceptionsSSE(get_exception_flags(status));
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
avx_masked_store16(i, eaddr, &result, opmask);
BX_NEXT_INSTR(i);
}
#endif