implemented few more AVX-512 floating point convert instructions
This commit is contained in:
parent
78509d637b
commit
ba52890538
@ -518,20 +518,66 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTUDQ2PD_MASK_VpdWdqR(bxInstruct
|
||||
BX_NEXT_INSTR(i);
|
||||
}
|
||||
|
||||
#if 0
|
||||
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTSD2SS_MASK_VssWsdR(bxInstruction_c *i)
|
||||
{
|
||||
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
|
||||
|
||||
if (BX_SCALAR_ELEMENT_MASK(i->opmask())) {
|
||||
float64 op2 = BX_READ_XMM_REG_LO_QWORD(i->src2());
|
||||
|
||||
float_status_t status;
|
||||
mxcsr_to_softfloat_status_word(status, MXCSR);
|
||||
softfloat_status_word_rc_override(status, i);
|
||||
op1.xmm32u(0) = float64_to_float32(op2, status);
|
||||
check_exceptionsSSE(get_exception_flags(status));
|
||||
}
|
||||
else {
|
||||
if (i->isZeroMasking())
|
||||
op1.xmm32u(0) = 0;
|
||||
else
|
||||
op1.xmm32u(0) = BX_READ_XMM_REG_LO_DWORD(i->dst());
|
||||
}
|
||||
|
||||
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
|
||||
BX_NEXT_INSTR(i);
|
||||
}
|
||||
|
||||
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTSS2SD_MASK_VsdWssR(bxInstruction_c *i)
|
||||
{
|
||||
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
|
||||
|
||||
if (BX_SCALAR_ELEMENT_MASK(i->opmask())) {
|
||||
float32 op2 = BX_READ_XMM_REG_LO_DWORD(i->src2());
|
||||
|
||||
float_status_t status;
|
||||
mxcsr_to_softfloat_status_word(status, MXCSR);
|
||||
softfloat_status_word_rc_override(status, i);
|
||||
op1.xmm64u(0) = float32_to_float64(op2, status);
|
||||
check_exceptionsSSE(get_exception_flags(status));
|
||||
}
|
||||
else {
|
||||
if (i->isZeroMasking())
|
||||
op1.xmm64u(0) = 0;
|
||||
else
|
||||
op1.xmm64u(0) = BX_READ_XMM_REG_LO_QWORD(i->dst());
|
||||
}
|
||||
|
||||
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
|
||||
BX_NEXT_INSTR(i);
|
||||
}
|
||||
|
||||
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTPS2PD_MASK_VpdWpsR(bxInstruction_c *i)
|
||||
{
|
||||
BxPackedAvxRegister result;
|
||||
BxPackedYmmRegister op = BX_READ_YMM_REG(i->src());
|
||||
unsigned mask = BX_READ_8BIT_OPMASK(i->opmask());
|
||||
unsigned opmask = BX_READ_8BIT_OPMASK(i->opmask());
|
||||
unsigned len = i->getVL();
|
||||
|
||||
float_status_t status;
|
||||
mxcsr_to_softfloat_status_word(status, MXCSR);
|
||||
softfloat_status_word_rc_override(status, i);
|
||||
|
||||
for (unsigned n=0, tmp_mask = mask; n < QWORD_ELEMENTS(len); n++, tmp_mask >>= 1) {
|
||||
for (unsigned n=0, tmp_mask = opmask; n < QWORD_ELEMENTS(len); n++, tmp_mask >>= 1) {
|
||||
if (tmp_mask & 0x1)
|
||||
result.vmm64u(n) = float32_to_float64(op.ymm32u(n), status);
|
||||
else
|
||||
@ -541,18 +587,46 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTPS2PD_MASK_VpdWpsR(bxInstructi
|
||||
check_exceptionsSSE(get_exception_flags(status));
|
||||
|
||||
if (! i->isZeroMasking()) {
|
||||
for (unsigned n=0; n < len; n++, mask >>= 2)
|
||||
xmm_blendpd(&BX_READ_AVX_REG_LANE(i->dst(), n), &op.vmm128(n), mask);
|
||||
for (unsigned n=0; n < len; n++, opmask >>= 2)
|
||||
xmm_blendpd(&BX_READ_AVX_REG_LANE(i->dst(), n), &result.vmm128(n), opmask);
|
||||
BX_CLEAR_AVX_REGZ(i->dst(), len);
|
||||
}
|
||||
else {
|
||||
BX_WRITE_AVX_REGZ(i->dst(), op, len);
|
||||
BX_WRITE_AVX_REGZ(i->dst(), result, len);
|
||||
}
|
||||
|
||||
BX_NEXT_INSTR(i);
|
||||
}
|
||||
|
||||
#endif
|
||||
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTPD2PS_MASK_VpsWpdR(bxInstruction_c *i)
|
||||
{
|
||||
BxPackedAvxRegister op = BX_READ_AVX_REG(i->src()), result = BX_READ_AVX_REG(i->dst());
|
||||
unsigned opmask = BX_READ_8BIT_OPMASK(i->opmask());
|
||||
unsigned len = i->getVL();
|
||||
|
||||
float_status_t status;
|
||||
mxcsr_to_softfloat_status_word(status, MXCSR);
|
||||
softfloat_status_word_rc_override(status, i);
|
||||
|
||||
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++, opmask >>= 1) {
|
||||
if (opmask & 0x1)
|
||||
result.vmm32u(n) = float64_to_float32(op.vmm64u(n), status);
|
||||
else if (i->isZeroMasking())
|
||||
result.vmm32u(n) = 0;
|
||||
}
|
||||
|
||||
check_exceptionsSSE(get_exception_flags(status));
|
||||
|
||||
if (len == BX_VL128) {
|
||||
result.vmm64u(1) = 0;
|
||||
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), result.vmm128(0));
|
||||
}
|
||||
else {
|
||||
BX_WRITE_AVX_REGZ(i->dst(), result, len-1); // write half vector
|
||||
}
|
||||
|
||||
BX_NEXT_INSTR(i);
|
||||
}
|
||||
|
||||
// fixup
|
||||
|
||||
|
@ -2,7 +2,7 @@
|
||||
// $Id$
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Copyright (c) 2011-2013 Stanislav Shwartsman
|
||||
// Copyright (c) 2011-2014 Stanislav Shwartsman
|
||||
// Written by Stanislav Shwartsman [sshwarts at sourceforge net]
|
||||
//
|
||||
// This library is free software; you can redistribute it and/or
|
||||
@ -270,7 +270,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VRSQRTPS_VpsWpsR(bxInstruction_c *
|
||||
BxPackedYmmRegister op = BX_READ_YMM_REG(i->src());
|
||||
unsigned len = i->getVL();
|
||||
|
||||
for (unsigned n=0; n < (4*len); n++)
|
||||
for (unsigned n=0; n < DWORD_ELEMENTS(len); n++)
|
||||
op.ymm32u(n) = approximate_rsqrt(op.ymm32u(n));
|
||||
|
||||
BX_WRITE_YMM_REGZ_VLEN(i->dst(), op, len);
|
||||
@ -297,7 +297,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VRCPPS_VpsWpsR(bxInstruction_c *i)
|
||||
BxPackedAvxRegister op = BX_READ_AVX_REG(i->src());
|
||||
unsigned len = i->getVL();
|
||||
|
||||
for (unsigned n=0; n < (4*len); n++)
|
||||
for (unsigned n=0; n < DWORD_ELEMENTS(len); n++)
|
||||
op.vmm32u(n) = approximate_rcp(op.vmm32u(n));
|
||||
|
||||
BX_WRITE_AVX_REGZ(i->dst(), op, len);
|
||||
@ -483,8 +483,9 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTPS2PD_VpdWpsR(bxInstruction_c
|
||||
|
||||
float_status_t status;
|
||||
mxcsr_to_softfloat_status_word(status, MXCSR);
|
||||
softfloat_status_word_rc_override(status, i);
|
||||
|
||||
for (unsigned n=0; n < (2*len); n++) {
|
||||
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
|
||||
result.ymm64u(n) = float32_to_float64(op.xmm32u(n), status);
|
||||
}
|
||||
|
||||
@ -500,14 +501,15 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTPD2PS_VpsWpdR(bxInstruction_c
|
||||
{
|
||||
BxPackedYmmRegister op = BX_READ_YMM_REG(i->src());
|
||||
BxPackedXmmRegister result;
|
||||
unsigned len = i->getVL();
|
||||
|
||||
result.xmm64u(1) = 0; /* clear upper part of the result for case of VL128 */
|
||||
|
||||
float_status_t status;
|
||||
mxcsr_to_softfloat_status_word(status, MXCSR);
|
||||
unsigned len = i->getVL();
|
||||
softfloat_status_word_rc_override(status, i);
|
||||
|
||||
for (unsigned n=0; n < (2*len); n++) {
|
||||
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
|
||||
result.xmm32u(n) = float64_to_float32(op.ymm64u(n), status);
|
||||
}
|
||||
|
||||
@ -525,6 +527,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTSS2SD_VsdWssR(bxInstruction_c
|
||||
|
||||
float_status_t status;
|
||||
mxcsr_to_softfloat_status_word(status, MXCSR);
|
||||
softfloat_status_word_rc_override(status, i);
|
||||
op1.xmm64u(0) = float32_to_float64(op2, status);
|
||||
check_exceptionsSSE(get_exception_flags(status));
|
||||
|
||||
@ -541,6 +544,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTSD2SS_VssWsdR(bxInstruction_c
|
||||
|
||||
float_status_t status;
|
||||
mxcsr_to_softfloat_status_word(status, MXCSR);
|
||||
softfloat_status_word_rc_override(status, i);
|
||||
op1.xmm32u(0) = float64_to_float32(op2, status);
|
||||
check_exceptionsSSE(get_exception_flags(status));
|
||||
|
||||
@ -557,8 +561,9 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTDQ2PS_VpsWdqR(bxInstruction_c
|
||||
|
||||
float_status_t status;
|
||||
mxcsr_to_softfloat_status_word(status, MXCSR);
|
||||
softfloat_status_word_rc_override(status, i);
|
||||
|
||||
for (unsigned n=0; n < (4*len); n++) {
|
||||
for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
|
||||
op.ymm32u(n) = int32_to_float32(op.ymm32u(n), status);
|
||||
}
|
||||
|
||||
@ -577,8 +582,9 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTPS2DQ_VdqWpsR(bxInstruction_c
|
||||
|
||||
float_status_t status;
|
||||
mxcsr_to_softfloat_status_word(status, MXCSR);
|
||||
softfloat_status_word_rc_override(status, i);
|
||||
|
||||
for (unsigned n=0; n < (4*len); n++) {
|
||||
for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
|
||||
op.ymm32u(n) = float32_to_int32(op.ymm32u(n), status);
|
||||
}
|
||||
|
||||
@ -597,8 +603,9 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTTPS2DQ_VdqWpsR(bxInstruction_c
|
||||
|
||||
float_status_t status;
|
||||
mxcsr_to_softfloat_status_word(status, MXCSR);
|
||||
softfloat_status_word_rc_override(status, i);
|
||||
|
||||
for (unsigned n=0; n < (4*len); n++) {
|
||||
for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
|
||||
op.ymm32u(n) = float32_to_int32_round_to_zero(op.ymm32u(n), status);
|
||||
}
|
||||
|
||||
@ -1019,7 +1026,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCMPPS_VpsHpsWpsIbR(bxInstruction_
|
||||
mxcsr_to_softfloat_status_word(status, MXCSR);
|
||||
int ib = i->Ib() & 0x1F;
|
||||
|
||||
for (unsigned n=0; n < (4*len); n++) {
|
||||
for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
|
||||
op1.ymm32u(n) = avx_compare32[ib](op1.ymm32u(n), op2.ymm32u(n), status) ? 0xFFFFFFFF : 0;
|
||||
}
|
||||
|
||||
@ -1039,7 +1046,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCMPPD_VpdHpdWpdIbR(bxInstruction_
|
||||
mxcsr_to_softfloat_status_word(status, MXCSR);
|
||||
int ib = i->Ib() & 0x1F;
|
||||
|
||||
for (unsigned n=0; n < (2*len); n++) {
|
||||
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
|
||||
op1.ymm64u(n) = avx_compare64[ib](op1.ymm64u(n), op2.ymm64u(n), status) ?
|
||||
BX_CONST64(0xFFFFFFFFFFFFFFFF) : 0;
|
||||
}
|
||||
@ -1141,14 +1148,15 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTTPD2DQ_VqWpdR(bxInstruction_c
|
||||
{
|
||||
BxPackedYmmRegister op = BX_READ_YMM_REG(i->src());
|
||||
BxPackedXmmRegister result;
|
||||
unsigned len = i->getVL();
|
||||
|
||||
result.xmm64u(1) = 0; /* clear upper part of the result for case of VL128 */
|
||||
|
||||
float_status_t status;
|
||||
mxcsr_to_softfloat_status_word(status, MXCSR);
|
||||
unsigned len = i->getVL();
|
||||
softfloat_status_word_rc_override(status, i);
|
||||
|
||||
for (unsigned n=0; n < (2*len); n++) {
|
||||
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
|
||||
result.xmm32u(n) = float64_to_int32_round_to_zero(op.ymm64u(n), status);
|
||||
}
|
||||
|
||||
@ -1163,14 +1171,15 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTPD2DQ_VqWpdR(bxInstruction_c *
|
||||
{
|
||||
BxPackedYmmRegister op = BX_READ_YMM_REG(i->src());
|
||||
BxPackedXmmRegister result;
|
||||
unsigned len = i->getVL();
|
||||
|
||||
result.xmm64u(1) = 0; /* clear upper part of the result for case of VL128 */
|
||||
|
||||
float_status_t status;
|
||||
mxcsr_to_softfloat_status_word(status, MXCSR);
|
||||
unsigned len = i->getVL();
|
||||
softfloat_status_word_rc_override(status, i);
|
||||
|
||||
for (unsigned n=0; n < (2*len); n++) {
|
||||
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
|
||||
result.xmm32u(n) = float64_to_int32(op.ymm64u(n), status);
|
||||
}
|
||||
|
||||
@ -1187,7 +1196,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTDQ2PD_VpdWqR(bxInstruction_c *
|
||||
BxPackedXmmRegister op = BX_READ_XMM_REG(i->src());
|
||||
unsigned len = i->getVL();
|
||||
|
||||
for (unsigned n=0; n < (2*len); n++) {
|
||||
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
|
||||
result.ymm64u(n) = int32_to_float64(op.xmm32u(n));
|
||||
}
|
||||
|
||||
@ -1204,7 +1213,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VTESTPS_VpsWpsR(bxInstruction_c *i
|
||||
|
||||
unsigned result = EFlagsZFMask | EFlagsCFMask;
|
||||
|
||||
for (unsigned n=0; n < (2*len); n++) {
|
||||
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
|
||||
if ((op2.ymm64u(n) & op1.ymm64u(n) & BX_CONST64(0x8000000080000000)) != 0)
|
||||
result &= ~EFlagsZFMask;
|
||||
|
||||
@ -1225,7 +1234,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VTESTPD_VpdWpdR(bxInstruction_c *i
|
||||
|
||||
unsigned result = EFlagsZFMask | EFlagsCFMask;
|
||||
|
||||
for (unsigned n=0; n < (2*len); n++) {
|
||||
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
|
||||
if ((op2.ymm64u(n) & op1.ymm64u(n) & BX_CONST64(0x8000000000000000)) != 0)
|
||||
result &= ~EFlagsZFMask;
|
||||
|
||||
@ -1255,7 +1264,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VROUNDPS_VpsWpsIbR(bxInstruction_c
|
||||
if (control & 0x8)
|
||||
status.float_suppress_exception |= float_flag_inexact;
|
||||
|
||||
for(unsigned n=0; n < (4*len); n++) {
|
||||
for(unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
|
||||
op.ymm32u(n) = float32_round_to_int(op.ymm32u(n), status);
|
||||
}
|
||||
|
||||
@ -1283,7 +1292,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VROUNDPD_VpdWpdIbR(bxInstruction_c
|
||||
if (control & 0x8)
|
||||
status.float_suppress_exception |= float_flag_inexact;
|
||||
|
||||
for(unsigned n=0; n < (2*len); n++) {
|
||||
for(unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
|
||||
op.ymm64u(n) = float64_round_to_int(op.ymm64u(n), status);
|
||||
}
|
||||
|
||||
@ -1401,7 +1410,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTPH2PS_VpsWpsR(bxInstruction_c
|
||||
// no denormal exception is reported on MXCSR
|
||||
status.float_suppress_exception = float_flag_denormal;
|
||||
|
||||
for (unsigned n=0; n < (4*len); n++) {
|
||||
for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
|
||||
result.ymm32u(n) = float16_to_float32(op.xmm16u(n), status);
|
||||
}
|
||||
|
||||
@ -1431,7 +1440,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTPS2PH_WpsVpsIb(bxInstruction_c
|
||||
if ((control & 0x4) == 0)
|
||||
status.float_rounding_mode = control & 0x3;
|
||||
|
||||
for (unsigned n=0; n < (4*len); n++) {
|
||||
for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
|
||||
result.xmm16u(n) = float32_to_float16(op.ymm32u(n), status);
|
||||
}
|
||||
|
||||
|
@ -3240,6 +3240,11 @@ public: // for now...
|
||||
BX_SMF BX_INSF_TYPE VCVTUDQ2PD_VpdWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF BX_INSF_TYPE VCVTUDQ2PD_MASK_VpdWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
|
||||
BX_SMF BX_INSF_TYPE VCVTPD2PS_MASK_VpsWpdR(bxInstruction_c *i) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF BX_INSF_TYPE VCVTPS2PD_MASK_VpdWpsR(bxInstruction_c *i) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF BX_INSF_TYPE VCVTSS2SD_MASK_VsdWssR(bxInstruction_c *i) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF BX_INSF_TYPE VCVTSD2SS_MASK_VssWsdR(bxInstruction_c *i) BX_CPP_AttrRegparmN(1);
|
||||
|
||||
BX_SMF BX_INSF_TYPE VPADDD_MASK_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF BX_INSF_TYPE VPSUBD_MASK_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF BX_INSF_TYPE VPANDD_MASK_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
|
@ -2,7 +2,7 @@
|
||||
// $Id$
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Copyright (C) 2001-2013 The Bochs Project
|
||||
// Copyright (C) 2001-2014 The Bochs Project
|
||||
//
|
||||
// This library is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU Lesser General Public
|
||||
|
@ -2,7 +2,7 @@
|
||||
// $Id$
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Copyright (C) 2001-2013 The Bochs Project
|
||||
// Copyright (C) 2001-2014 The Bochs Project
|
||||
//
|
||||
// This library is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU Lesser General Public
|
||||
|
@ -2,7 +2,7 @@
|
||||
// $Id$
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Copyright (c) 2013 Stanislav Shwartsman
|
||||
// Copyright (c) 2013-2014 Stanislav Shwartsman
|
||||
// Written by Stanislav Shwartsman [sshwarts at sourceforge net]
|
||||
//
|
||||
// This library is free software; you can redistribute it and/or
|
||||
@ -247,6 +247,20 @@ static const BxOpcodeInfo_t BxOpcodeGroupEVEX_0f59_Mask[4] = {
|
||||
/* F2 */ { BxVexW1, BX_IA_V512_VMULSD_VsdHpdWsd_Kmask }
|
||||
};
|
||||
|
||||
static const BxOpcodeInfo_t BxOpcodeGroupEVEX_0f5a[4] = {
|
||||
/* -- */ { BxVexW0, BX_IA_V512_VCVTPS2PD_VpdWps },
|
||||
/* 66 */ { BxVexW1, BX_IA_V512_VCVTPD2PS_VpsWpd },
|
||||
/* F3 */ { BxVexW0, BX_IA_V512_VCVTSS2SD_VsdWss },
|
||||
/* F2 */ { BxVexW1, BX_IA_V512_VCVTSD2SS_VssWsd }
|
||||
};
|
||||
|
||||
static const BxOpcodeInfo_t BxOpcodeGroupEVEX_0f5a_Mask[4] = {
|
||||
/* -- */ { BxVexW0, BX_IA_V512_VCVTPS2PD_VpdWps_Kmask },
|
||||
/* 66 */ { BxVexW1, BX_IA_V512_VCVTPD2PS_VpsWpd_Kmask },
|
||||
/* F3 */ { BxVexW0, BX_IA_V512_VCVTSS2SD_VsdWss_Kmask },
|
||||
/* F2 */ { BxVexW1, BX_IA_V512_VCVTSD2SS_VssWsd_Kmask }
|
||||
};
|
||||
|
||||
static const BxOpcodeInfo_t BxOpcodeGroupEVEX_0f5c[4] = {
|
||||
/* -- */ { BxVexW0, BX_IA_V512_VSUBPS_VpsHpsWps },
|
||||
/* 66 */ { BxVexW1, BX_IA_V512_VSUBPD_VpdHpdWpd },
|
||||
@ -636,8 +650,8 @@ static const BxOpcodeInfo_t BxOpcodeTableEVEX[256*3*2] = {
|
||||
/* 58 */ { BxPrefixSSE4, BX_IA_ERROR, BxOpcodeGroupEVEX_0f58_Mask },
|
||||
/* 59 k0 */ { BxPrefixSSE4, BX_IA_ERROR, BxOpcodeGroupEVEX_0f59 },
|
||||
/* 59 */ { BxPrefixSSE4, BX_IA_ERROR, BxOpcodeGroupEVEX_0f59_Mask },
|
||||
/* 5A k0 */ { 0, BX_IA_ERROR },
|
||||
/* 5A */ { 0, BX_IA_ERROR },
|
||||
/* 5A k0 */ { BxPrefixSSE4, BX_IA_ERROR, BxOpcodeGroupEVEX_0f5a },
|
||||
/* 5A */ { BxPrefixSSE4, BX_IA_ERROR, BxOpcodeGroupEVEX_0f5a_Mask },
|
||||
/* 5B k0 */ { 0, BX_IA_ERROR },
|
||||
/* 5B */ { 0, BX_IA_ERROR },
|
||||
/* 5C k0 */ { BxPrefixSSE4, BX_IA_ERROR, BxOpcodeGroupEVEX_0f5c },
|
||||
|
@ -271,7 +271,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VGATHERDPS_MASK_VpsVSib(bxInstruct
|
||||
// 256 bit => 8
|
||||
// 512 bit => 16
|
||||
|
||||
unsigned n, len = i->getVL(), num_elements = 4 * len;
|
||||
unsigned n, len = i->getVL(), num_elements = DWORD_ELEMENTS(len);
|
||||
|
||||
#if BX_SUPPORT_ALIGNMENT_CHECK
|
||||
unsigned save_alignment_check_mask = BX_CPU_THIS_PTR alignment_check_mask;
|
||||
@ -312,7 +312,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VGATHERQPS_MASK_VpsVSib(bxInstruct
|
||||
BxPackedAvxRegister *dest = &BX_AVX_REG(i->dst());
|
||||
Bit64u opmask = BX_READ_OPMASK(i->opmask()), mask;
|
||||
|
||||
unsigned n, len = i->getVL(), num_elements = 2 * len;
|
||||
unsigned n, len = i->getVL(), num_elements = QWORD_ELEMENTS(len);
|
||||
|
||||
#if BX_SUPPORT_ALIGNMENT_CHECK
|
||||
unsigned save_alignment_check_mask = BX_CPU_THIS_PTR alignment_check_mask;
|
||||
@ -357,7 +357,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VGATHERDPD_MASK_VpdVSib(bxInstruct
|
||||
BxPackedAvxRegister *dest = &BX_AVX_REG(i->dst());
|
||||
Bit64u opmask = BX_READ_OPMASK(i->opmask()), mask;
|
||||
|
||||
unsigned n, len = i->getVL(), num_elements = 2 * len;
|
||||
unsigned n, len = i->getVL(), num_elements = QWORD_ELEMENTS(len);
|
||||
|
||||
#if BX_SUPPORT_ALIGNMENT_CHECK
|
||||
unsigned save_alignment_check_mask = BX_CPU_THIS_PTR alignment_check_mask;
|
||||
@ -398,7 +398,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VGATHERQPD_MASK_VpdVSib(bxInstruct
|
||||
BxPackedAvxRegister *dest = &BX_AVX_REG(i->dst());
|
||||
Bit64u opmask = BX_READ_OPMASK(i->opmask()), mask;
|
||||
|
||||
unsigned n, len = i->getVL(), num_elements = 2 * len;
|
||||
unsigned n, len = i->getVL(), num_elements = QWORD_ELEMENTS(len);
|
||||
|
||||
#if BX_SUPPORT_ALIGNMENT_CHECK
|
||||
unsigned save_alignment_check_mask = BX_CPU_THIS_PTR alignment_check_mask;
|
||||
|
@ -2,7 +2,7 @@
|
||||
// $Id$
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Copyright (c) 2008-2013 Stanislav Shwartsman
|
||||
// Copyright (c) 2008-2014 Stanislav Shwartsman
|
||||
// Written by Stanislav Shwartsman [sshwarts at sourceforge net]
|
||||
//
|
||||
// This library is free software; you can redistribute it and/or
|
||||
@ -2060,8 +2060,8 @@ bx_define_opcode(BX_IA_VCVTTPD2DQ_VqWpd, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VCVT
|
||||
bx_define_opcode(BX_IA_VCVTPD2DQ_VqWpd, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VCVTPD2DQ_VqWpdR, BX_ISA_AVX, OP_Vq, OP_Wpd, OP_NONE, OP_NONE, BX_PREPARE_AVX)
|
||||
bx_define_opcode(BX_IA_VCVTDQ2PD_VpdWq, &BX_CPU_C::LOAD_Half_Vector, &BX_CPU_C::VCVTDQ2PD_VpdWqR, BX_ISA_AVX, OP_Vpd, OP_Wq, OP_NONE, OP_NONE, BX_PREPARE_AVX)
|
||||
bx_define_opcode(BX_IA_VCVTPD2PS_VpsWpd, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VCVTPD2PS_VpsWpdR, BX_ISA_AVX, OP_Vps, OP_Wpd, OP_NONE, OP_NONE, BX_PREPARE_AVX)
|
||||
bx_define_opcode(BX_IA_VCVTSD2SS_VssWsd, &BX_CPU_C::LOAD_Wsd, &BX_CPU_C::VCVTSD2SS_VssWsdR, BX_ISA_AVX, OP_Vss, OP_Hpd, OP_Wsd, OP_NONE, BX_PREPARE_AVX)
|
||||
bx_define_opcode(BX_IA_VCVTSS2SD_VsdWss, &BX_CPU_C::LOAD_Wss, &BX_CPU_C::VCVTSS2SD_VsdWssR, BX_ISA_AVX, OP_Vsd, OP_Hps, OP_Wss, OP_NONE, BX_PREPARE_AVX)
|
||||
bx_define_opcode(BX_IA_VCVTSD2SS_VssWsd, &BX_CPU_C::LOAD_Wsd, &BX_CPU_C::VCVTSD2SS_VssWsdR, BX_ISA_AVX, OP_Vss, OP_Hps, OP_Wsd, OP_NONE, BX_PREPARE_AVX)
|
||||
bx_define_opcode(BX_IA_VCVTSS2SD_VsdWss, &BX_CPU_C::LOAD_Wss, &BX_CPU_C::VCVTSS2SD_VsdWssR, BX_ISA_AVX, OP_Vsd, OP_Hpd, OP_Wss, OP_NONE, BX_PREPARE_AVX)
|
||||
bx_define_opcode(BX_IA_VCVTDQ2PS_VpsWdq, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VCVTDQ2PS_VpsWdqR, BX_ISA_AVX, OP_Vps, OP_Wdq, OP_NONE, OP_NONE, BX_PREPARE_AVX)
|
||||
bx_define_opcode(BX_IA_VCVTPS2DQ_VdqWps, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VCVTPS2DQ_VdqWpsR, BX_ISA_AVX, OP_Vdq, OP_Wps, OP_NONE, OP_NONE, BX_PREPARE_AVX)
|
||||
bx_define_opcode(BX_IA_VCVTTPS2DQ_VdqWps, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VCVTTPS2DQ_VdqWpsR, BX_ISA_AVX, OP_Vdq, OP_Wps, OP_NONE, OP_NONE, BX_PREPARE_AVX)
|
||||
@ -2569,6 +2569,16 @@ bx_define_opcode(BX_IA_V512_VCVTUDQ2PS_VpsWdq, &BX_CPU_C::LOAD_BROADCAST_VectorD
|
||||
bx_define_opcode(BX_IA_V512_VCVTUDQ2PD_VpdWdq_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorD, &BX_CPU_C::VCVTUDQ2PD_MASK_VpdWdqR, BX_ISA_AVX512, OP_Vpd, OP_Wdq, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE)
|
||||
bx_define_opcode(BX_IA_V512_VCVTUDQ2PS_VpsWdq_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorD, &BX_CPU_C::VCVTUDQ2PS_MASK_VpsWdqR, BX_ISA_AVX512, OP_Vps, OP_Wdq, OP_NONE, OP_NONE, BX_PREPARE_EVEX)
|
||||
|
||||
bx_define_opcode(BX_IA_V512_VCVTSS2SD_VsdWss, &BX_CPU_C::LOAD_Wss, &BX_CPU_C::VCVTSS2SD_VsdWssR, BX_ISA_AVX512, OP_Vsd, OP_Hpd, OP_Wss, OP_NONE, BX_PREPARE_EVEX_NO_BROADCAST)
|
||||
bx_define_opcode(BX_IA_V512_VCVTSD2SS_VssWsd, &BX_CPU_C::LOAD_Wsd, &BX_CPU_C::VCVTSD2SS_VssWsdR, BX_ISA_AVX512, OP_Vss, OP_Hps, OP_Wsd, OP_NONE, BX_PREPARE_EVEX_NO_BROADCAST)
|
||||
bx_define_opcode(BX_IA_V512_VCVTPS2PD_VpdWps, &BX_CPU_C::LOAD_BROADCAST_VectorD, &BX_CPU_C::VCVTPS2PD_VpdWpsR, BX_ISA_AVX512, OP_Vpd, OP_Wps, OP_NONE, OP_NONE, BX_PREPARE_EVEX)
|
||||
bx_define_opcode(BX_IA_V512_VCVTPD2PS_VpsWpd, &BX_CPU_C::LOAD_BROADCAST_VectorQ, &BX_CPU_C::VCVTPD2PS_VpsWpdR, BX_ISA_AVX512, OP_Vps, OP_Wpd, OP_NONE, OP_NONE, BX_PREPARE_EVEX)
|
||||
|
||||
bx_define_opcode(BX_IA_V512_VCVTSS2SD_VsdWss_Kmask, &BX_CPU_C::LOAD_Wss, &BX_CPU_C::VCVTSS2SD_MASK_VsdWssR, BX_ISA_AVX512, OP_Vsd, OP_Hpd, OP_Wss, OP_NONE, BX_PREPARE_EVEX_NO_BROADCAST)
|
||||
bx_define_opcode(BX_IA_V512_VCVTSD2SS_VssWsd_Kmask, &BX_CPU_C::LOAD_Wsd, &BX_CPU_C::VCVTSD2SS_MASK_VssWsdR, BX_ISA_AVX512, OP_Vss, OP_Hps, OP_Wsd, OP_NONE, BX_PREPARE_EVEX_NO_BROADCAST)
|
||||
bx_define_opcode(BX_IA_V512_VCVTPS2PD_VpdWps_Kmask, &BX_CPU_C::LOAD_BROADCAST_MASK_VectorD, &BX_CPU_C::VCVTPS2PD_MASK_VpdWpsR, BX_ISA_AVX512, OP_Vpd, OP_Wps, OP_NONE, OP_NONE, BX_PREPARE_EVEX)
|
||||
bx_define_opcode(BX_IA_V512_VCVTPD2PS_VpsWpd_Kmask, &BX_CPU_C::LOAD_BROADCAST_MASK_VectorQ, &BX_CPU_C::VCVTPD2PS_MASK_VpsWpdR, BX_ISA_AVX512, OP_Vps, OP_Wpd, OP_NONE, OP_NONE, BX_PREPARE_EVEX)
|
||||
|
||||
bx_define_opcode(BX_IA_V512_VMOVAPS_VpsWps, &BX_CPU_C::VMOVAPS_VpsWpsM, &BX_CPU_C::VMOVAPS_VpsWpsR, BX_ISA_AVX512, OP_Vps, OP_Wps, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
|
||||
bx_define_opcode(BX_IA_V512_VMOVAPS_VpsWps_Kmask, &BX_CPU_C::VMOVAPS_MASK_VpsWpsM, &BX_CPU_C::VMOVAPS_MASK_VpsWpsR, BX_ISA_AVX512, OP_Vps, OP_Wps, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
|
||||
bx_define_opcode(BX_IA_V512_VMOVAPS_WpsVps, &BX_CPU_C::VMOVAPS_WpsVpsM, &BX_CPU_C::VMOVAPS_VpsWpsR, BX_ISA_AVX512, OP_Wps, OP_Vps, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
|
||||
|
@ -2,7 +2,7 @@
|
||||
// $Id$
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Copyright (c) 2011-2013 Stanislav Shwartsman
|
||||
// Copyright (c) 2011-2014 Stanislav Shwartsman
|
||||
// Written by Stanislav Shwartsman [sshwarts at sourceforge net]
|
||||
//
|
||||
// This library is free software; you can redistribute it and/or
|
||||
@ -614,7 +614,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VFRCZPS_VpsWpsR(bxInstruction_c *i
|
||||
float_status_t status;
|
||||
mxcsr_to_softfloat_status_word(status, MXCSR);
|
||||
|
||||
for (unsigned n=0; n < (4*len); n++) {
|
||||
for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
|
||||
op.ymm32u(n) = float32_frc(op.ymm32u(n), status);
|
||||
}
|
||||
|
||||
@ -632,7 +632,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VFRCZPD_VpdWpdR(bxInstruction_c *i
|
||||
float_status_t status;
|
||||
mxcsr_to_softfloat_status_word(status, MXCSR);
|
||||
|
||||
for (unsigned n=0; n < (2*len); n++) {
|
||||
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
|
||||
op.ymm64u(n) = float64_frc(op.ymm64u(n), status);
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user