Bochs/bochs/cpu/avx512_cvt.cc
Stanislav Shwartsman 36ba25847f Implemented last missed AVX-512 unsigned convert instructions
The only missed AVX-512 opcodes now are:

512.66.0F38.W0 13 VCVTPH2PS
512.66.0F3A.W0 1D VCVTPS2PH

512.66.0F38.WIG 21 VPMOVSXBD
512.66.0F38.WIG 22 VPMOVSXBQ
512.66.0F38.WIG 23 VPMOVSXWD
512.66.0F38.WIG 24 VPMOVSXWQ
512.66.0F38.W0  25 VPMOVSXDQ

512.66.0F38.WIG 31 VPMOVSZBD
512.66.0F38.WIG 32 VPMOVSZBQ
512.66.0F38.WIG 33 VPMOVSZWD
512.66.0F38.WIG 34 VPMOVSZWQ
512.66.0F38.W0  35 VPMOVSzDQ

512.66.0F38.W0 2C VSCALEFPS
512.66.0F38.W1 2C VSCALEFPD
NDS.LIG.66.0F38.W0 2D VSCALESS
NDS.LIG.66.0F38.W1 2D VSCALESD

512.66.0F38.W0 4C VRCP14PS
 512.66.0F38.W1 4C VRCP14PD
NDS.LIG.66.0F38.W0 4D VRCP14SS
 NDS.LIG.66.0F38.W1 4D VRCP14SD
512.66.0F38.W0 4E VRSQRT14PS
 512.66.0F38.W1 4E VRSQRT14PD
NDS.LIG.66.0F38.W0 4F VRSQRT14SS
 NDS.LIG.66.0F38.W1 4F VRSQRT14SD

NDS.512.66.0F3A.W0 03 VALIGND
NDS.512.66.0F3A.W1 03 VALIGNQ

512.66.0F3A.W0 08 VRNDSCALEPS
512.66.0F3A.W1 09 VRNDSCALEPD
NDS.LIG.66.0F3A.W1 0A VRNDSCALESS
NDS.LIG.66.0F3A.W1 0B VRNDSCALESD

512.66.0F3A.W0 19 VEXTRACTF32x4
512.66.0F3A.W1 1B VEXTRACTF64x4
512.66.0F3A.W0 39 VEXTRACTI32x4
512.66.0F3A.W1 3B VEXTRACTI64x4

512.66.0F3A.W0 26 VGETMANTPS
 512.66.0F3A.W1 26 VGETMANTPD
NDS.LIG.66.0F3A.W0 27 VGETMANTSS
 NDS.LIG.66.0F3A.W1 27 VGETMANTSD
2014-01-28 12:57:38 +00:00

539 lines
20 KiB
C++

/////////////////////////////////////////////////////////////////////////
// $Id$
/////////////////////////////////////////////////////////////////////////
//
// Copyright (c) 2013-2014 Stanislav Shwartsman
// Written by Stanislav Shwartsman [sshwarts at sourceforge net]
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA B 02110-1301 USA
//
/////////////////////////////////////////////////////////////////////////
#define NEED_CPU_REG_SHORTCUTS 1
#include "bochs.h"
#include "cpu.h"
#define LOG_THIS BX_CPU_THIS_PTR
#if BX_SUPPORT_EVEX
extern void mxcsr_to_softfloat_status_word(float_status_t &status, bx_mxcsr_t mxcsr);
#include "simd_int.h"
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTSS2USI_GdWssR(bxInstruction_c *i)
{
float32 op = BX_READ_XMM_REG_LO_DWORD(i->src());
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
softfloat_status_word_rc_override(status, i);
Bit32u result = float32_to_uint32(op, status);
check_exceptionsSSE(get_exception_flags(status));
BX_WRITE_32BIT_REGZ(i->dst(), result);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTSS2USI_GqWssR(bxInstruction_c *i)
{
float32 op = BX_READ_XMM_REG_LO_DWORD(i->src());
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
softfloat_status_word_rc_override(status, i);
Bit64u result = float32_to_uint64(op, status);
check_exceptionsSSE(get_exception_flags(status));
BX_WRITE_64BIT_REG(i->dst(), result);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTTSS2USI_GdWssR(bxInstruction_c *i)
{
float32 op = BX_READ_XMM_REG_LO_DWORD(i->src());
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
softfloat_status_word_rc_override(status, i);
Bit32u result = float32_to_uint32_round_to_zero(op, status);
check_exceptionsSSE(get_exception_flags(status));
BX_WRITE_32BIT_REGZ(i->dst(), result);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTTSS2USI_GqWssR(bxInstruction_c *i)
{
float32 op = BX_READ_XMM_REG_LO_DWORD(i->src());
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
softfloat_status_word_rc_override(status, i);
Bit64u result = float32_to_uint64_round_to_zero(op, status);
check_exceptionsSSE(get_exception_flags(status));
BX_WRITE_64BIT_REG(i->dst(), result);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTSD2USI_GdWsdR(bxInstruction_c *i)
{
float64 op = BX_READ_XMM_REG_LO_QWORD(i->src());
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
softfloat_status_word_rc_override(status, i);
Bit32u result = float64_to_uint32(op, status);
check_exceptionsSSE(get_exception_flags(status));
BX_WRITE_32BIT_REGZ(i->dst(), result);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTSD2USI_GqWsdR(bxInstruction_c *i)
{
float64 op = BX_READ_XMM_REG_LO_QWORD(i->src());
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
softfloat_status_word_rc_override(status, i);
Bit64u result = float64_to_uint64(op, status);
check_exceptionsSSE(get_exception_flags(status));
BX_WRITE_64BIT_REG(i->dst(), result);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTTSD2USI_GdWsdR(bxInstruction_c *i)
{
float64 op = BX_READ_XMM_REG_LO_QWORD(i->src());
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
softfloat_status_word_rc_override(status, i);
Bit32u result = float64_to_uint32_round_to_zero(op, status);
check_exceptionsSSE(get_exception_flags(status));
BX_WRITE_32BIT_REGZ(i->dst(), result);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTTSD2USI_GqWsdR(bxInstruction_c *i)
{
float64 op = BX_READ_XMM_REG_LO_QWORD(i->src());
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
softfloat_status_word_rc_override(status, i);
Bit64u result = float64_to_uint64_round_to_zero(op, status);
check_exceptionsSSE(get_exception_flags(status));
BX_WRITE_64BIT_REG(i->dst(), result);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTUSI2SS_VssEdR(bxInstruction_c *i)
{
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
softfloat_status_word_rc_override(status, i);
op1.xmm32u(0) = uint32_to_float32(BX_READ_32BIT_REG(i->src2()), status);
check_exceptionsSSE(get_exception_flags(status));
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTUSI2SS_VssEqR(bxInstruction_c *i)
{
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
softfloat_status_word_rc_override(status, i);
op1.xmm32u(0) = uint64_to_float32(BX_READ_64BIT_REG(i->src2()), status);
check_exceptionsSSE(get_exception_flags(status));
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTUSI2SD_VsdEdR(bxInstruction_c *i)
{
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
op1.xmm64u(0) = uint32_to_float64(BX_READ_32BIT_REG(i->src2()));
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTUSI2SD_VsdEqR(bxInstruction_c *i)
{
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
softfloat_status_word_rc_override(status, i);
op1.xmm64u(0) = uint64_to_float64(BX_READ_64BIT_REG(i->src2()), status);
check_exceptionsSSE(get_exception_flags(status));
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTSD2SS_MASK_VssWsdR(bxInstruction_c *i)
{
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
if (BX_SCALAR_ELEMENT_MASK(i->opmask())) {
float64 op2 = BX_READ_XMM_REG_LO_QWORD(i->src2());
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
softfloat_status_word_rc_override(status, i);
op1.xmm32u(0) = float64_to_float32(op2, status);
check_exceptionsSSE(get_exception_flags(status));
}
else {
if (i->isZeroMasking())
op1.xmm32u(0) = 0;
else
op1.xmm32u(0) = BX_READ_XMM_REG_LO_DWORD(i->dst());
}
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTSS2SD_MASK_VsdWssR(bxInstruction_c *i)
{
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
if (BX_SCALAR_ELEMENT_MASK(i->opmask())) {
float32 op2 = BX_READ_XMM_REG_LO_DWORD(i->src2());
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
softfloat_status_word_rc_override(status, i);
op1.xmm64u(0) = float32_to_float64(op2, status);
check_exceptionsSSE(get_exception_flags(status));
}
else {
if (i->isZeroMasking())
op1.xmm64u(0) = 0;
else
op1.xmm64u(0) = BX_READ_XMM_REG_LO_QWORD(i->dst());
}
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
BX_NEXT_INSTR(i);
}
#define AVX512_CVT64_TO_32(HANDLER, func) \
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C:: HANDLER (bxInstruction_c *i) \
{ \
BxPackedAvxRegister op = BX_READ_AVX_REG(i->src()), result = BX_READ_AVX_REG(i->dst()); \
unsigned opmask = BX_READ_8BIT_OPMASK(i->opmask()); \
unsigned len = i->getVL(); \
\
float_status_t status; \
mxcsr_to_softfloat_status_word(status, MXCSR); \
softfloat_status_word_rc_override(status, i); \
\
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++, opmask >>= 1) { \
if (opmask & 0x1) \
result.vmm32u(n) = (func)(op.vmm64u(n), status); \
else if (i->isZeroMasking()) \
result.vmm32u(n) = 0; \
} \
\
check_exceptionsSSE(get_exception_flags(status)); \
\
if (len == BX_VL128) { \
result.vmm64u(1) = 0; \
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), result.vmm128(0)); \
} \
else { \
BX_WRITE_AVX_REGZ(i->dst(), result, len >> 1); /* write half vector */ \
} \
\
BX_NEXT_INSTR(i); \
}
AVX512_CVT64_TO_32(VCVTPD2PS_MASK_VpsWpdR, float64_to_float32)
AVX512_CVT64_TO_32(VCVTPD2DQ_MASK_VdqWpdR, float64_to_int32)
AVX512_CVT64_TO_32(VCVTTPD2DQ_MASK_VdqWpdR, float64_to_int32_round_to_zero)
AVX512_CVT64_TO_32(VCVTPD2UDQ_MASK_VdqWpdR, float64_to_uint32)
AVX512_CVT64_TO_32(VCVTTPD2UDQ_MASK_VdqWpdR, float64_to_uint32_round_to_zero)
#define AVX512_CVT32_TO_32(HANDLER, func) \
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C:: HANDLER (bxInstruction_c *i) \
{ \
BxPackedAvxRegister op = BX_READ_AVX_REG(i->src()); \
unsigned len = i->getVL(); \
\
Bit32u opmask = BX_READ_16BIT_OPMASK(i->opmask()); \
\
float_status_t status; \
mxcsr_to_softfloat_status_word(status, MXCSR); \
softfloat_status_word_rc_override(status, i); \
\
for (unsigned n=0, mask = 0x1; n < DWORD_ELEMENTS(len); n++, mask <<= 1) { \
if (opmask & mask) \
op.vmm32u(n) = (func)(op.vmm32u(n), status); \
else \
op.vmm32u(n) = 0; \
} \
\
check_exceptionsSSE(get_exception_flags(status)); \
\
if (! i->isZeroMasking()) { \
for (unsigned n=0; n < len; n++, opmask >>= 4) \
xmm_blendps(&BX_READ_AVX_REG_LANE(i->dst(), n), &op.vmm128(n), opmask); \
BX_CLEAR_AVX_REGZ(i->dst(), len); \
} \
else { \
BX_WRITE_AVX_REGZ(i->dst(), op, len); \
} \
\
BX_NEXT_INSTR(i); \
}
AVX512_CVT32_TO_32(VCVTPS2DQ_MASK_VdqWpsR, float32_to_int32)
AVX512_CVT32_TO_32(VCVTTPS2DQ_MASK_VdqWpsR, float32_to_int32_round_to_zero)
AVX512_CVT32_TO_32(VCVTPS2UDQ_MASK_VdqWpsR, float32_to_uint32)
AVX512_CVT32_TO_32(VCVTTPS2UDQ_MASK_VdqWpsR, float32_to_uint32_round_to_zero)
AVX512_CVT32_TO_32(VCVTDQ2PS_MASK_VpsWdqR, int32_to_float32)
AVX512_CVT32_TO_32(VCVTUDQ2PS_MASK_VpsWdqR, uint32_to_float32)
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTPS2UDQ_VdqWpsR(bxInstruction_c *i)
{
BxPackedAvxRegister op = BX_READ_AVX_REG(i->src());
unsigned len = i->getVL();
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
softfloat_status_word_rc_override(status, i);
for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
op.vmm32u(n) = float32_to_uint32(op.vmm32u(n), status);
}
check_exceptionsSSE(get_exception_flags(status));
BX_WRITE_AVX_REGZ(i->dst(), op, len);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTTPS2UDQ_VdqWpsR(bxInstruction_c *i)
{
BxPackedAvxRegister op = BX_READ_AVX_REG(i->src());
unsigned len = i->getVL();
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
softfloat_status_word_rc_override(status, i);
for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
op.vmm32u(n) = float32_to_uint32_round_to_zero(op.vmm32u(n), status);
}
check_exceptionsSSE(get_exception_flags(status));
BX_WRITE_AVX_REGZ(i->dst(), op, len);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTPD2UDQ_VdqWpdR(bxInstruction_c *i)
{
BxPackedAvxRegister op = BX_READ_AVX_REG(i->src()), result;
unsigned len = i->getVL();
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
softfloat_status_word_rc_override(status, i);
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
result.vmm32u(n) = float64_to_uint32(op.vmm64u(n), status);
}
check_exceptionsSSE(get_exception_flags(status));
if (len == BX_VL128) {
result.vmm64u(1) = 0;
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), result.vmm128(0));
}
else {
BX_WRITE_AVX_REGZ(i->dst(), result, len >> 1); // write half vector
}
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTTPD2UDQ_VdqWpdR(bxInstruction_c *i)
{
BxPackedAvxRegister op = BX_READ_AVX_REG(i->src()), result;
unsigned len = i->getVL();
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
softfloat_status_word_rc_override(status, i);
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
result.vmm32u(n) = float64_to_uint32_round_to_zero(op.vmm64u(n), status);
}
check_exceptionsSSE(get_exception_flags(status));
if (len == BX_VL128) {
result.vmm64u(1) = 0;
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), result.vmm128(0));
}
else {
BX_WRITE_AVX_REGZ(i->dst(), result, len >> 1); // write half vector
}
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTUDQ2PS_VpsWdqR(bxInstruction_c *i)
{
BxPackedAvxRegister op = BX_READ_AVX_REG(i->src());
unsigned len = i->getVL();
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
softfloat_status_word_rc_override(status, i);
for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
op.vmm32u(n) = uint32_to_float32(op.vmm32u(n), status);
}
check_exceptionsSSE(get_exception_flags(status));
BX_WRITE_AVX_REGZ(i->dst(), op, len);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTUDQ2PD_VpdWdqR(bxInstruction_c *i)
{
BxPackedYmmRegister op = BX_READ_YMM_REG(i->src());
BxPackedAvxRegister result;
unsigned len = i->getVL();
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
result.vmm64u(n) = uint32_to_float64(op.ymm32u(n));
}
BX_WRITE_AVX_REGZ(i->dst(), result, len);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTUDQ2PD_MASK_VpdWdqR(bxInstruction_c *i)
{
BxPackedYmmRegister op = BX_READ_YMM_REG(i->src());
BxPackedAvxRegister result;
unsigned len = i->getVL();
Bit32u opmask = BX_READ_8BIT_OPMASK(i->opmask());
for (unsigned n=0, mask = 0x1; n < QWORD_ELEMENTS(len); n++, mask <<= 1) {
if (opmask & mask)
result.vmm64u(n) = uint32_to_float64(op.ymm32u(n));
else
result.vmm64u(n) = 0;
}
if (! i->isZeroMasking()) {
for (unsigned n=0; n < len; n++, opmask >>= 2)
xmm_blendpd(&BX_READ_AVX_REG_LANE(i->dst(), n), &result.vmm128(n), opmask);
BX_CLEAR_AVX_REGZ(i->dst(), len);
}
else {
BX_WRITE_AVX_REGZ(i->dst(), result, len);
}
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTPS2PD_MASK_VpdWpsR(bxInstruction_c *i)
{
BxPackedAvxRegister result;
BxPackedYmmRegister op = BX_READ_YMM_REG(i->src());
unsigned opmask = BX_READ_8BIT_OPMASK(i->opmask());
unsigned len = i->getVL();
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
softfloat_status_word_rc_override(status, i);
for (unsigned n=0, tmp_mask = opmask; n < QWORD_ELEMENTS(len); n++, tmp_mask >>= 1) {
if (tmp_mask & 0x1)
result.vmm64u(n) = float32_to_float64(op.ymm32u(n), status);
else
result.vmm64u(n) = 0;
}
check_exceptionsSSE(get_exception_flags(status));
if (! i->isZeroMasking()) {
for (unsigned n=0; n < len; n++, opmask >>= 2)
xmm_blendpd(&BX_READ_AVX_REG_LANE(i->dst(), n), &result.vmm128(n), opmask);
BX_CLEAR_AVX_REGZ(i->dst(), len);
}
else {
BX_WRITE_AVX_REGZ(i->dst(), result, len);
}
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTDQ2PD_MASK_VpdWdqR(bxInstruction_c *i)
{
BxPackedYmmRegister op = BX_READ_YMM_REG(i->src());
BxPackedAvxRegister result;
unsigned len = i->getVL();
Bit32u opmask = BX_READ_8BIT_OPMASK(i->opmask());
for (unsigned n=0, mask = 0x1; n < QWORD_ELEMENTS(len); n++, mask <<= 1) {
if (opmask & mask)
result.vmm64u(n) = int32_to_float64(op.ymm32s(n));
else
result.vmm64u(n) = 0;
}
if (! i->isZeroMasking()) {
for (unsigned n=0; n < len; n++, opmask >>= 2)
xmm_blendpd(&BX_READ_AVX_REG_LANE(i->dst(), n), &result.vmm128(n), opmask);
BX_CLEAR_AVX_REGZ(i->dst(), len);
}
else {
BX_WRITE_AVX_REGZ(i->dst(), result, len);
}
BX_NEXT_INSTR(i);
}
#endif