implemented few more AVX-512 floating point convert instructions

This commit is contained in:
Stanislav Shwartsman 2014-01-18 20:10:05 +00:00
parent 78509d637b
commit ba52890538
9 changed files with 156 additions and 44 deletions

View File

@ -518,20 +518,66 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTUDQ2PD_MASK_VpdWdqR(bxInstruct
BX_NEXT_INSTR(i);
}
#if 0
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTSD2SS_MASK_VssWsdR(bxInstruction_c *i)
{
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
if (BX_SCALAR_ELEMENT_MASK(i->opmask())) {
float64 op2 = BX_READ_XMM_REG_LO_QWORD(i->src2());
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
softfloat_status_word_rc_override(status, i);
op1.xmm32u(0) = float64_to_float32(op2, status);
check_exceptionsSSE(get_exception_flags(status));
}
else {
if (i->isZeroMasking())
op1.xmm32u(0) = 0;
else
op1.xmm32u(0) = BX_READ_XMM_REG_LO_DWORD(i->dst());
}
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTSS2SD_MASK_VsdWssR(bxInstruction_c *i)
{
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
if (BX_SCALAR_ELEMENT_MASK(i->opmask())) {
float32 op2 = BX_READ_XMM_REG_LO_DWORD(i->src2());
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
softfloat_status_word_rc_override(status, i);
op1.xmm64u(0) = float32_to_float64(op2, status);
check_exceptionsSSE(get_exception_flags(status));
}
else {
if (i->isZeroMasking())
op1.xmm64u(0) = 0;
else
op1.xmm64u(0) = BX_READ_XMM_REG_LO_QWORD(i->dst());
}
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTPS2PD_MASK_VpdWpsR(bxInstruction_c *i)
{
BxPackedAvxRegister result;
BxPackedYmmRegister op = BX_READ_YMM_REG(i->src());
unsigned mask = BX_READ_8BIT_OPMASK(i->opmask());
unsigned opmask = BX_READ_8BIT_OPMASK(i->opmask());
unsigned len = i->getVL();
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
softfloat_status_word_rc_override(status, i);
for (unsigned n=0, tmp_mask = mask; n < QWORD_ELEMENTS(len); n++, tmp_mask >>= 1) {
for (unsigned n=0, tmp_mask = opmask; n < QWORD_ELEMENTS(len); n++, tmp_mask >>= 1) {
if (tmp_mask & 0x1)
result.vmm64u(n) = float32_to_float64(op.ymm32u(n), status);
else
@ -541,18 +587,46 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTPS2PD_MASK_VpdWpsR(bxInstructi
check_exceptionsSSE(get_exception_flags(status));
if (! i->isZeroMasking()) {
for (unsigned n=0; n < len; n++, mask >>= 2)
xmm_blendpd(&BX_READ_AVX_REG_LANE(i->dst(), n), &op.vmm128(n), mask);
for (unsigned n=0; n < len; n++, opmask >>= 2)
xmm_blendpd(&BX_READ_AVX_REG_LANE(i->dst(), n), &result.vmm128(n), opmask);
BX_CLEAR_AVX_REGZ(i->dst(), len);
}
else {
BX_WRITE_AVX_REGZ(i->dst(), op, len);
BX_WRITE_AVX_REGZ(i->dst(), result, len);
}
BX_NEXT_INSTR(i);
}
#endif
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTPD2PS_MASK_VpsWpdR(bxInstruction_c *i)
{
BxPackedAvxRegister op = BX_READ_AVX_REG(i->src()), result = BX_READ_AVX_REG(i->dst());
unsigned opmask = BX_READ_8BIT_OPMASK(i->opmask());
unsigned len = i->getVL();
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
softfloat_status_word_rc_override(status, i);
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++, opmask >>= 1) {
if (opmask & 0x1)
result.vmm32u(n) = float64_to_float32(op.vmm64u(n), status);
else if (i->isZeroMasking())
result.vmm32u(n) = 0;
}
check_exceptionsSSE(get_exception_flags(status));
if (len == BX_VL128) {
result.vmm64u(1) = 0;
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), result.vmm128(0));
}
else {
BX_WRITE_AVX_REGZ(i->dst(), result, len-1); // write half vector
}
BX_NEXT_INSTR(i);
}
// fixup

View File

@ -2,7 +2,7 @@
// $Id$
/////////////////////////////////////////////////////////////////////////
//
// Copyright (c) 2011-2013 Stanislav Shwartsman
// Copyright (c) 2011-2014 Stanislav Shwartsman
// Written by Stanislav Shwartsman [sshwarts at sourceforge net]
//
// This library is free software; you can redistribute it and/or
@ -270,7 +270,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VRSQRTPS_VpsWpsR(bxInstruction_c *
BxPackedYmmRegister op = BX_READ_YMM_REG(i->src());
unsigned len = i->getVL();
for (unsigned n=0; n < (4*len); n++)
for (unsigned n=0; n < DWORD_ELEMENTS(len); n++)
op.ymm32u(n) = approximate_rsqrt(op.ymm32u(n));
BX_WRITE_YMM_REGZ_VLEN(i->dst(), op, len);
@ -297,7 +297,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VRCPPS_VpsWpsR(bxInstruction_c *i)
BxPackedAvxRegister op = BX_READ_AVX_REG(i->src());
unsigned len = i->getVL();
for (unsigned n=0; n < (4*len); n++)
for (unsigned n=0; n < DWORD_ELEMENTS(len); n++)
op.vmm32u(n) = approximate_rcp(op.vmm32u(n));
BX_WRITE_AVX_REGZ(i->dst(), op, len);
@ -483,8 +483,9 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTPS2PD_VpdWpsR(bxInstruction_c
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
softfloat_status_word_rc_override(status, i);
for (unsigned n=0; n < (2*len); n++) {
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
result.ymm64u(n) = float32_to_float64(op.xmm32u(n), status);
}
@ -500,14 +501,15 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTPD2PS_VpsWpdR(bxInstruction_c
{
BxPackedYmmRegister op = BX_READ_YMM_REG(i->src());
BxPackedXmmRegister result;
unsigned len = i->getVL();
result.xmm64u(1) = 0; /* clear upper part of the result for case of VL128 */
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
unsigned len = i->getVL();
softfloat_status_word_rc_override(status, i);
for (unsigned n=0; n < (2*len); n++) {
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
result.xmm32u(n) = float64_to_float32(op.ymm64u(n), status);
}
@ -525,6 +527,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTSS2SD_VsdWssR(bxInstruction_c
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
softfloat_status_word_rc_override(status, i);
op1.xmm64u(0) = float32_to_float64(op2, status);
check_exceptionsSSE(get_exception_flags(status));
@ -541,6 +544,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTSD2SS_VssWsdR(bxInstruction_c
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
softfloat_status_word_rc_override(status, i);
op1.xmm32u(0) = float64_to_float32(op2, status);
check_exceptionsSSE(get_exception_flags(status));
@ -557,8 +561,9 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTDQ2PS_VpsWdqR(bxInstruction_c
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
softfloat_status_word_rc_override(status, i);
for (unsigned n=0; n < (4*len); n++) {
for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
op.ymm32u(n) = int32_to_float32(op.ymm32u(n), status);
}
@ -577,8 +582,9 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTPS2DQ_VdqWpsR(bxInstruction_c
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
softfloat_status_word_rc_override(status, i);
for (unsigned n=0; n < (4*len); n++) {
for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
op.ymm32u(n) = float32_to_int32(op.ymm32u(n), status);
}
@ -597,8 +603,9 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTTPS2DQ_VdqWpsR(bxInstruction_c
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
softfloat_status_word_rc_override(status, i);
for (unsigned n=0; n < (4*len); n++) {
for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
op.ymm32u(n) = float32_to_int32_round_to_zero(op.ymm32u(n), status);
}
@ -1019,7 +1026,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCMPPS_VpsHpsWpsIbR(bxInstruction_
mxcsr_to_softfloat_status_word(status, MXCSR);
int ib = i->Ib() & 0x1F;
for (unsigned n=0; n < (4*len); n++) {
for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
op1.ymm32u(n) = avx_compare32[ib](op1.ymm32u(n), op2.ymm32u(n), status) ? 0xFFFFFFFF : 0;
}
@ -1039,7 +1046,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCMPPD_VpdHpdWpdIbR(bxInstruction_
mxcsr_to_softfloat_status_word(status, MXCSR);
int ib = i->Ib() & 0x1F;
for (unsigned n=0; n < (2*len); n++) {
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
op1.ymm64u(n) = avx_compare64[ib](op1.ymm64u(n), op2.ymm64u(n), status) ?
BX_CONST64(0xFFFFFFFFFFFFFFFF) : 0;
}
@ -1141,14 +1148,15 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTTPD2DQ_VqWpdR(bxInstruction_c
{
BxPackedYmmRegister op = BX_READ_YMM_REG(i->src());
BxPackedXmmRegister result;
unsigned len = i->getVL();
result.xmm64u(1) = 0; /* clear upper part of the result for case of VL128 */
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
unsigned len = i->getVL();
softfloat_status_word_rc_override(status, i);
for (unsigned n=0; n < (2*len); n++) {
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
result.xmm32u(n) = float64_to_int32_round_to_zero(op.ymm64u(n), status);
}
@ -1163,14 +1171,15 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTPD2DQ_VqWpdR(bxInstruction_c *
{
BxPackedYmmRegister op = BX_READ_YMM_REG(i->src());
BxPackedXmmRegister result;
unsigned len = i->getVL();
result.xmm64u(1) = 0; /* clear upper part of the result for case of VL128 */
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
unsigned len = i->getVL();
softfloat_status_word_rc_override(status, i);
for (unsigned n=0; n < (2*len); n++) {
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
result.xmm32u(n) = float64_to_int32(op.ymm64u(n), status);
}
@ -1187,7 +1196,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTDQ2PD_VpdWqR(bxInstruction_c *
BxPackedXmmRegister op = BX_READ_XMM_REG(i->src());
unsigned len = i->getVL();
for (unsigned n=0; n < (2*len); n++) {
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
result.ymm64u(n) = int32_to_float64(op.xmm32u(n));
}
@ -1204,7 +1213,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VTESTPS_VpsWpsR(bxInstruction_c *i
unsigned result = EFlagsZFMask | EFlagsCFMask;
for (unsigned n=0; n < (2*len); n++) {
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
if ((op2.ymm64u(n) & op1.ymm64u(n) & BX_CONST64(0x8000000080000000)) != 0)
result &= ~EFlagsZFMask;
@ -1225,7 +1234,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VTESTPD_VpdWpdR(bxInstruction_c *i
unsigned result = EFlagsZFMask | EFlagsCFMask;
for (unsigned n=0; n < (2*len); n++) {
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
if ((op2.ymm64u(n) & op1.ymm64u(n) & BX_CONST64(0x8000000000000000)) != 0)
result &= ~EFlagsZFMask;
@ -1255,7 +1264,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VROUNDPS_VpsWpsIbR(bxInstruction_c
if (control & 0x8)
status.float_suppress_exception |= float_flag_inexact;
for(unsigned n=0; n < (4*len); n++) {
for(unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
op.ymm32u(n) = float32_round_to_int(op.ymm32u(n), status);
}
@ -1283,7 +1292,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VROUNDPD_VpdWpdIbR(bxInstruction_c
if (control & 0x8)
status.float_suppress_exception |= float_flag_inexact;
for(unsigned n=0; n < (2*len); n++) {
for(unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
op.ymm64u(n) = float64_round_to_int(op.ymm64u(n), status);
}
@ -1401,7 +1410,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTPH2PS_VpsWpsR(bxInstruction_c
// no denormal exception is reported on MXCSR
status.float_suppress_exception = float_flag_denormal;
for (unsigned n=0; n < (4*len); n++) {
for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
result.ymm32u(n) = float16_to_float32(op.xmm16u(n), status);
}
@ -1431,7 +1440,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTPS2PH_WpsVpsIb(bxInstruction_c
if ((control & 0x4) == 0)
status.float_rounding_mode = control & 0x3;
for (unsigned n=0; n < (4*len); n++) {
for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
result.xmm16u(n) = float32_to_float16(op.ymm32u(n), status);
}

View File

@ -3240,6 +3240,11 @@ public: // for now...
BX_SMF BX_INSF_TYPE VCVTUDQ2PD_VpdWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VCVTUDQ2PD_MASK_VpdWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VCVTPD2PS_MASK_VpsWpdR(bxInstruction_c *i) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VCVTPS2PD_MASK_VpdWpsR(bxInstruction_c *i) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VCVTSS2SD_MASK_VsdWssR(bxInstruction_c *i) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VCVTSD2SS_MASK_VssWsdR(bxInstruction_c *i) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPADDD_MASK_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPSUBD_MASK_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPANDD_MASK_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);

View File

@ -2,7 +2,7 @@
// $Id$
/////////////////////////////////////////////////////////////////////////
//
// Copyright (C) 2001-2013 The Bochs Project
// Copyright (C) 2001-2014 The Bochs Project
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public

View File

@ -2,7 +2,7 @@
// $Id$
/////////////////////////////////////////////////////////////////////////
//
// Copyright (C) 2001-2013 The Bochs Project
// Copyright (C) 2001-2014 The Bochs Project
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public

View File

@ -2,7 +2,7 @@
// $Id$
/////////////////////////////////////////////////////////////////////////
//
// Copyright (c) 2013 Stanislav Shwartsman
// Copyright (c) 2013-2014 Stanislav Shwartsman
// Written by Stanislav Shwartsman [sshwarts at sourceforge net]
//
// This library is free software; you can redistribute it and/or
@ -247,6 +247,20 @@ static const BxOpcodeInfo_t BxOpcodeGroupEVEX_0f59_Mask[4] = {
/* F2 */ { BxVexW1, BX_IA_V512_VMULSD_VsdHpdWsd_Kmask }
};
static const BxOpcodeInfo_t BxOpcodeGroupEVEX_0f5a[4] = {
/* -- */ { BxVexW0, BX_IA_V512_VCVTPS2PD_VpdWps },
/* 66 */ { BxVexW1, BX_IA_V512_VCVTPD2PS_VpsWpd },
/* F3 */ { BxVexW0, BX_IA_V512_VCVTSS2SD_VsdWss },
/* F2 */ { BxVexW1, BX_IA_V512_VCVTSD2SS_VssWsd }
};
static const BxOpcodeInfo_t BxOpcodeGroupEVEX_0f5a_Mask[4] = {
/* -- */ { BxVexW0, BX_IA_V512_VCVTPS2PD_VpdWps_Kmask },
/* 66 */ { BxVexW1, BX_IA_V512_VCVTPD2PS_VpsWpd_Kmask },
/* F3 */ { BxVexW0, BX_IA_V512_VCVTSS2SD_VsdWss_Kmask },
/* F2 */ { BxVexW1, BX_IA_V512_VCVTSD2SS_VssWsd_Kmask }
};
static const BxOpcodeInfo_t BxOpcodeGroupEVEX_0f5c[4] = {
/* -- */ { BxVexW0, BX_IA_V512_VSUBPS_VpsHpsWps },
/* 66 */ { BxVexW1, BX_IA_V512_VSUBPD_VpdHpdWpd },
@ -636,8 +650,8 @@ static const BxOpcodeInfo_t BxOpcodeTableEVEX[256*3*2] = {
/* 58 */ { BxPrefixSSE4, BX_IA_ERROR, BxOpcodeGroupEVEX_0f58_Mask },
/* 59 k0 */ { BxPrefixSSE4, BX_IA_ERROR, BxOpcodeGroupEVEX_0f59 },
/* 59 */ { BxPrefixSSE4, BX_IA_ERROR, BxOpcodeGroupEVEX_0f59_Mask },
/* 5A k0 */ { 0, BX_IA_ERROR },
/* 5A */ { 0, BX_IA_ERROR },
/* 5A k0 */ { BxPrefixSSE4, BX_IA_ERROR, BxOpcodeGroupEVEX_0f5a },
/* 5A */ { BxPrefixSSE4, BX_IA_ERROR, BxOpcodeGroupEVEX_0f5a_Mask },
/* 5B k0 */ { 0, BX_IA_ERROR },
/* 5B */ { 0, BX_IA_ERROR },
/* 5C k0 */ { BxPrefixSSE4, BX_IA_ERROR, BxOpcodeGroupEVEX_0f5c },

View File

@ -271,7 +271,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VGATHERDPS_MASK_VpsVSib(bxInstruct
// 256 bit => 8
// 512 bit => 16
unsigned n, len = i->getVL(), num_elements = 4 * len;
unsigned n, len = i->getVL(), num_elements = DWORD_ELEMENTS(len);
#if BX_SUPPORT_ALIGNMENT_CHECK
unsigned save_alignment_check_mask = BX_CPU_THIS_PTR alignment_check_mask;
@ -312,7 +312,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VGATHERQPS_MASK_VpsVSib(bxInstruct
BxPackedAvxRegister *dest = &BX_AVX_REG(i->dst());
Bit64u opmask = BX_READ_OPMASK(i->opmask()), mask;
unsigned n, len = i->getVL(), num_elements = 2 * len;
unsigned n, len = i->getVL(), num_elements = QWORD_ELEMENTS(len);
#if BX_SUPPORT_ALIGNMENT_CHECK
unsigned save_alignment_check_mask = BX_CPU_THIS_PTR alignment_check_mask;
@ -357,7 +357,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VGATHERDPD_MASK_VpdVSib(bxInstruct
BxPackedAvxRegister *dest = &BX_AVX_REG(i->dst());
Bit64u opmask = BX_READ_OPMASK(i->opmask()), mask;
unsigned n, len = i->getVL(), num_elements = 2 * len;
unsigned n, len = i->getVL(), num_elements = QWORD_ELEMENTS(len);
#if BX_SUPPORT_ALIGNMENT_CHECK
unsigned save_alignment_check_mask = BX_CPU_THIS_PTR alignment_check_mask;
@ -398,7 +398,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VGATHERQPD_MASK_VpdVSib(bxInstruct
BxPackedAvxRegister *dest = &BX_AVX_REG(i->dst());
Bit64u opmask = BX_READ_OPMASK(i->opmask()), mask;
unsigned n, len = i->getVL(), num_elements = 2 * len;
unsigned n, len = i->getVL(), num_elements = QWORD_ELEMENTS(len);
#if BX_SUPPORT_ALIGNMENT_CHECK
unsigned save_alignment_check_mask = BX_CPU_THIS_PTR alignment_check_mask;

View File

@ -2,7 +2,7 @@
// $Id$
/////////////////////////////////////////////////////////////////////////
//
// Copyright (c) 2008-2013 Stanislav Shwartsman
// Copyright (c) 2008-2014 Stanislav Shwartsman
// Written by Stanislav Shwartsman [sshwarts at sourceforge net]
//
// This library is free software; you can redistribute it and/or
@ -2060,8 +2060,8 @@ bx_define_opcode(BX_IA_VCVTTPD2DQ_VqWpd, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VCVT
bx_define_opcode(BX_IA_VCVTPD2DQ_VqWpd, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VCVTPD2DQ_VqWpdR, BX_ISA_AVX, OP_Vq, OP_Wpd, OP_NONE, OP_NONE, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VCVTDQ2PD_VpdWq, &BX_CPU_C::LOAD_Half_Vector, &BX_CPU_C::VCVTDQ2PD_VpdWqR, BX_ISA_AVX, OP_Vpd, OP_Wq, OP_NONE, OP_NONE, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VCVTPD2PS_VpsWpd, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VCVTPD2PS_VpsWpdR, BX_ISA_AVX, OP_Vps, OP_Wpd, OP_NONE, OP_NONE, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VCVTSD2SS_VssWsd, &BX_CPU_C::LOAD_Wsd, &BX_CPU_C::VCVTSD2SS_VssWsdR, BX_ISA_AVX, OP_Vss, OP_Hpd, OP_Wsd, OP_NONE, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VCVTSS2SD_VsdWss, &BX_CPU_C::LOAD_Wss, &BX_CPU_C::VCVTSS2SD_VsdWssR, BX_ISA_AVX, OP_Vsd, OP_Hps, OP_Wss, OP_NONE, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VCVTSD2SS_VssWsd, &BX_CPU_C::LOAD_Wsd, &BX_CPU_C::VCVTSD2SS_VssWsdR, BX_ISA_AVX, OP_Vss, OP_Hps, OP_Wsd, OP_NONE, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VCVTSS2SD_VsdWss, &BX_CPU_C::LOAD_Wss, &BX_CPU_C::VCVTSS2SD_VsdWssR, BX_ISA_AVX, OP_Vsd, OP_Hpd, OP_Wss, OP_NONE, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VCVTDQ2PS_VpsWdq, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VCVTDQ2PS_VpsWdqR, BX_ISA_AVX, OP_Vps, OP_Wdq, OP_NONE, OP_NONE, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VCVTPS2DQ_VdqWps, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VCVTPS2DQ_VdqWpsR, BX_ISA_AVX, OP_Vdq, OP_Wps, OP_NONE, OP_NONE, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VCVTTPS2DQ_VdqWps, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VCVTTPS2DQ_VdqWpsR, BX_ISA_AVX, OP_Vdq, OP_Wps, OP_NONE, OP_NONE, BX_PREPARE_AVX)
@ -2569,6 +2569,16 @@ bx_define_opcode(BX_IA_V512_VCVTUDQ2PS_VpsWdq, &BX_CPU_C::LOAD_BROADCAST_VectorD
bx_define_opcode(BX_IA_V512_VCVTUDQ2PD_VpdWdq_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorD, &BX_CPU_C::VCVTUDQ2PD_MASK_VpdWdqR, BX_ISA_AVX512, OP_Vpd, OP_Wdq, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE)
bx_define_opcode(BX_IA_V512_VCVTUDQ2PS_VpsWdq_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorD, &BX_CPU_C::VCVTUDQ2PS_MASK_VpsWdqR, BX_ISA_AVX512, OP_Vps, OP_Wdq, OP_NONE, OP_NONE, BX_PREPARE_EVEX)
bx_define_opcode(BX_IA_V512_VCVTSS2SD_VsdWss, &BX_CPU_C::LOAD_Wss, &BX_CPU_C::VCVTSS2SD_VsdWssR, BX_ISA_AVX512, OP_Vsd, OP_Hpd, OP_Wss, OP_NONE, BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VCVTSD2SS_VssWsd, &BX_CPU_C::LOAD_Wsd, &BX_CPU_C::VCVTSD2SS_VssWsdR, BX_ISA_AVX512, OP_Vss, OP_Hps, OP_Wsd, OP_NONE, BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VCVTPS2PD_VpdWps, &BX_CPU_C::LOAD_BROADCAST_VectorD, &BX_CPU_C::VCVTPS2PD_VpdWpsR, BX_ISA_AVX512, OP_Vpd, OP_Wps, OP_NONE, OP_NONE, BX_PREPARE_EVEX)
bx_define_opcode(BX_IA_V512_VCVTPD2PS_VpsWpd, &BX_CPU_C::LOAD_BROADCAST_VectorQ, &BX_CPU_C::VCVTPD2PS_VpsWpdR, BX_ISA_AVX512, OP_Vps, OP_Wpd, OP_NONE, OP_NONE, BX_PREPARE_EVEX)
bx_define_opcode(BX_IA_V512_VCVTSS2SD_VsdWss_Kmask, &BX_CPU_C::LOAD_Wss, &BX_CPU_C::VCVTSS2SD_MASK_VsdWssR, BX_ISA_AVX512, OP_Vsd, OP_Hpd, OP_Wss, OP_NONE, BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VCVTSD2SS_VssWsd_Kmask, &BX_CPU_C::LOAD_Wsd, &BX_CPU_C::VCVTSD2SS_MASK_VssWsdR, BX_ISA_AVX512, OP_Vss, OP_Hps, OP_Wsd, OP_NONE, BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VCVTPS2PD_VpdWps_Kmask, &BX_CPU_C::LOAD_BROADCAST_MASK_VectorD, &BX_CPU_C::VCVTPS2PD_MASK_VpdWpsR, BX_ISA_AVX512, OP_Vpd, OP_Wps, OP_NONE, OP_NONE, BX_PREPARE_EVEX)
bx_define_opcode(BX_IA_V512_VCVTPD2PS_VpsWpd_Kmask, &BX_CPU_C::LOAD_BROADCAST_MASK_VectorQ, &BX_CPU_C::VCVTPD2PS_MASK_VpsWpdR, BX_ISA_AVX512, OP_Vps, OP_Wpd, OP_NONE, OP_NONE, BX_PREPARE_EVEX)
bx_define_opcode(BX_IA_V512_VMOVAPS_VpsWps, &BX_CPU_C::VMOVAPS_VpsWpsM, &BX_CPU_C::VMOVAPS_VpsWpsR, BX_ISA_AVX512, OP_Vps, OP_Wps, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VMOVAPS_VpsWps_Kmask, &BX_CPU_C::VMOVAPS_MASK_VpsWpsM, &BX_CPU_C::VMOVAPS_MASK_VpsWpsR, BX_ISA_AVX512, OP_Vps, OP_Wps, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VMOVAPS_WpsVps, &BX_CPU_C::VMOVAPS_WpsVpsM, &BX_CPU_C::VMOVAPS_VpsWpsR, BX_ISA_AVX512, OP_Wps, OP_Vps, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)

View File

@ -2,7 +2,7 @@
// $Id$
/////////////////////////////////////////////////////////////////////////
//
// Copyright (c) 2011-2013 Stanislav Shwartsman
// Copyright (c) 2011-2014 Stanislav Shwartsman
// Written by Stanislav Shwartsman [sshwarts at sourceforge net]
//
// This library is free software; you can redistribute it and/or
@ -614,7 +614,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VFRCZPS_VpsWpsR(bxInstruction_c *i
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
for (unsigned n=0; n < (4*len); n++) {
for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
op.ymm32u(n) = float32_frc(op.ymm32u(n), status);
}
@ -632,7 +632,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VFRCZPD_VpdWpdR(bxInstruction_c *i
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
for (unsigned n=0; n < (2*len); n++) {
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
op.ymm64u(n) = float64_frc(op.ymm64u(n), status);
}