ac06ee46ae
Now only missed AVX-512 opcodes now are: 512.66.0F38.W0 13 VCVTPH2PS 512.66.0F3A.W0 1D VCVTPS2PH 512.66.0F38.W0 2C VSCALEFPS 512.66.0F38.W1 2C VSCALEFPD NDS.LIG.66.0F38.W0 2D VSCALESS NDS.LIG.66.0F38.W1 2D VSCALESD 512.66.0F38.W0 4C VRCP14PS 512.66.0F38.W1 4C VRCP14PD NDS.LIG.66.0F38.W0 4D VRCP14SS NDS.LIG.66.0F38.W1 4D VRCP14SD 512.66.0F38.W0 4E VRSQRT14PS 512.66.0F38.W1 4E VRSQRT14PD NDS.LIG.66.0F38.W0 4F VRSQRT14SS NDS.LIG.66.0F38.W1 4F VRSQRT14SD 512.66.0F3A.W0 08 VRNDSCALEPS 512.66.0F3A.W1 09 VRNDSCALEPD NDS.LIG.66.0F3A.W1 0A VRNDSCALESS NDS.LIG.66.0F3A.W1 0B VRNDSCALESD 512.66.0F3A.W0 26 VGETMANTPS 512.66.0F3A.W1 26 VGETMANTPD NDS.LIG.66.0F3A.W0 27 VGETMANTSS NDS.LIG.66.0F3A.W1 27 VGETMANTSD
1562 lines
44 KiB
C++
1562 lines
44 KiB
C++
/////////////////////////////////////////////////////////////////////////
|
|
// $Id$
|
|
/////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// Copyright (c) 2013-2014 Stanislav Shwartsman
|
|
// Written by Stanislav Shwartsman [sshwarts at sourceforge net]
|
|
//
|
|
// This library is free software; you can redistribute it and/or
|
|
// modify it under the terms of the GNU Lesser General Public
|
|
// License as published by the Free Software Foundation; either
|
|
// version 2 of the License, or (at your option) any later version.
|
|
//
|
|
// This library is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
// Lesser General Public License for more details.
|
|
//
|
|
// You should have received a copy of the GNU Lesser General Public
|
|
// License along with this library; if not, write to the Free Software
|
|
// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA B 02110-1301 USA
|
|
//
|
|
/////////////////////////////////////////////////////////////////////////
|
|
|
|
#define NEED_CPU_REG_SHORTCUTS 1
|
|
#include "bochs.h"
|
|
#include "cpu.h"
|
|
#define LOG_THIS BX_CPU_THIS_PTR
|
|
|
|
#if BX_SUPPORT_AVX
|
|
void BX_CPU_C::avx_masked_load32(bxInstruction_c *i, bx_address eaddr, BxPackedAvxRegister *op, Bit32u mask)
|
|
{
|
|
unsigned len = i->getVL();
|
|
|
|
if (i->as64L()) {
|
|
for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
|
|
if (mask & (1<<n)) {
|
|
if (! IsCanonical(get_laddr64(i->seg(), eaddr + 4*n)))
|
|
exception(int_number(i->seg()), 0);
|
|
}
|
|
}
|
|
}
|
|
|
|
#if BX_SUPPORT_ALIGNMENT_CHECK
|
|
unsigned save_alignment_check_mask = BX_CPU_THIS_PTR alignment_check_mask;
|
|
BX_CPU_THIS_PTR alignment_check_mask = 0;
|
|
#endif
|
|
|
|
for (int n=DWORD_ELEMENTS(len)-1; n >= 0; n--) {
|
|
if (mask & (1<<n))
|
|
op->vmm32u(n) = read_virtual_dword(i->seg(), eaddr + 4*n);
|
|
else
|
|
op->vmm32u(n) = 0;
|
|
}
|
|
|
|
#if BX_SUPPORT_ALIGNMENT_CHECK
|
|
BX_CPU_THIS_PTR alignment_check_mask = save_alignment_check_mask;
|
|
#endif
|
|
}
|
|
|
|
void BX_CPU_C::avx_masked_load64(bxInstruction_c *i, bx_address eaddr, BxPackedAvxRegister *op, Bit32u mask)
|
|
{
|
|
unsigned len = i->getVL();
|
|
|
|
if (i->as64L()) {
|
|
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
|
|
if (mask & (1<<n)) {
|
|
if (! IsCanonical(get_laddr64(i->seg(), eaddr + 8*n)))
|
|
exception(int_number(i->seg()), 0);
|
|
}
|
|
}
|
|
}
|
|
|
|
#if BX_SUPPORT_ALIGNMENT_CHECK
|
|
unsigned save_alignment_check_mask = BX_CPU_THIS_PTR alignment_check_mask;
|
|
BX_CPU_THIS_PTR alignment_check_mask = 0;
|
|
#endif
|
|
|
|
for (int n=QWORD_ELEMENTS(len)-1; n >= 0; n--) {
|
|
if (mask & (1<<n))
|
|
op->vmm64u(n) = read_virtual_qword(i->seg(), eaddr + 8*n);
|
|
else
|
|
op->vmm64u(n) = 0;
|
|
}
|
|
|
|
#if BX_SUPPORT_ALIGNMENT_CHECK
|
|
BX_CPU_THIS_PTR alignment_check_mask = save_alignment_check_mask;
|
|
#endif
|
|
}
|
|
|
|
void BX_CPU_C::avx_masked_store8(bxInstruction_c *i, bx_address eaddr, const BxPackedAvxRegister *op, Bit64u mask)
|
|
{
|
|
unsigned len = i->getVL();
|
|
|
|
#if BX_SUPPORT_X86_64
|
|
if (i->as64L()) {
|
|
for (unsigned n=0; n < BYTE_ELEMENTS(len); n++) {
|
|
if (mask & (BX_CONST64(1)<<n)) {
|
|
if (! IsCanonical(get_laddr64(i->seg(), eaddr + n)))
|
|
exception(int_number(i->seg()), 0);
|
|
}
|
|
}
|
|
}
|
|
#endif
|
|
|
|
#if BX_SUPPORT_ALIGNMENT_CHECK
|
|
unsigned save_alignment_check_mask = BX_CPU_THIS_PTR alignment_check_mask;
|
|
BX_CPU_THIS_PTR alignment_check_mask = 0;
|
|
#endif
|
|
|
|
// see if you can successfully write all the elements first
|
|
for (int n=BYTE_ELEMENTS(len)-1; n >= 0; n--) {
|
|
if (mask & (BX_CONST64(1)<<n))
|
|
read_RMW_virtual_byte(i->seg(), eaddr + n);
|
|
}
|
|
|
|
for (unsigned n=0; n < BYTE_ELEMENTS(len); n++) {
|
|
if (mask & (BX_CONST64(1)<<n))
|
|
write_virtual_byte(i->seg(), eaddr + n, op->vmmubyte(n));
|
|
}
|
|
|
|
#if BX_SUPPORT_ALIGNMENT_CHECK
|
|
BX_CPU_THIS_PTR alignment_check_mask = save_alignment_check_mask;
|
|
#endif
|
|
}
|
|
|
|
void BX_CPU_C::avx_masked_store16(bxInstruction_c *i, bx_address eaddr, const BxPackedAvxRegister *op, Bit32u mask)
|
|
{
|
|
unsigned len = i->getVL();
|
|
|
|
#if BX_SUPPORT_X86_64
|
|
if (i->as64L()) {
|
|
for (unsigned n=0; n < WORD_ELEMENTS(len); n++) {
|
|
if (mask & (1<<n)) {
|
|
if (! IsCanonical(get_laddr64(i->seg(), eaddr + 2*n)))
|
|
exception(int_number(i->seg()), 0);
|
|
}
|
|
}
|
|
}
|
|
#endif
|
|
|
|
#if BX_SUPPORT_ALIGNMENT_CHECK
|
|
unsigned save_alignment_check_mask = BX_CPU_THIS_PTR alignment_check_mask;
|
|
BX_CPU_THIS_PTR alignment_check_mask = 0;
|
|
#endif
|
|
|
|
// see if you can successfully write all the elements first
|
|
for (int n=WORD_ELEMENTS(len)-1; n >= 0; n--) {
|
|
if (mask & (1<<n))
|
|
read_RMW_virtual_word(i->seg(), eaddr + 2*n);
|
|
}
|
|
|
|
for (unsigned n=0; n < WORD_ELEMENTS(len); n++) {
|
|
if (mask & (1<<n))
|
|
write_virtual_word(i->seg(), eaddr + 2*n, op->vmm16u(n));
|
|
}
|
|
|
|
#if BX_SUPPORT_ALIGNMENT_CHECK
|
|
BX_CPU_THIS_PTR alignment_check_mask = save_alignment_check_mask;
|
|
#endif
|
|
}
|
|
|
|
void BX_CPU_C::avx_masked_store32(bxInstruction_c *i, bx_address eaddr, const BxPackedAvxRegister *op, Bit32u mask)
|
|
{
|
|
unsigned len = i->getVL();
|
|
|
|
#if BX_SUPPORT_X86_64
|
|
if (i->as64L()) {
|
|
for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
|
|
if (mask & (1<<n)) {
|
|
if (! IsCanonical(get_laddr64(i->seg(), eaddr + 4*n)))
|
|
exception(int_number(i->seg()), 0);
|
|
}
|
|
}
|
|
}
|
|
#endif
|
|
|
|
#if BX_SUPPORT_ALIGNMENT_CHECK
|
|
unsigned save_alignment_check_mask = BX_CPU_THIS_PTR alignment_check_mask;
|
|
BX_CPU_THIS_PTR alignment_check_mask = 0;
|
|
#endif
|
|
|
|
// see if you can successfully write all the elements first
|
|
for (int n=DWORD_ELEMENTS(len)-1; n >= 0; n--) {
|
|
if (mask & (1<<n))
|
|
read_RMW_virtual_dword(i->seg(), eaddr + 4*n);
|
|
}
|
|
|
|
for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
|
|
if (mask & (1<<n))
|
|
write_virtual_dword(i->seg(), eaddr + 4*n, op->vmm32u(n));
|
|
}
|
|
|
|
#if BX_SUPPORT_ALIGNMENT_CHECK
|
|
BX_CPU_THIS_PTR alignment_check_mask = save_alignment_check_mask;
|
|
#endif
|
|
}
|
|
|
|
void BX_CPU_C::avx_masked_store64(bxInstruction_c *i, bx_address eaddr, const BxPackedAvxRegister *op, Bit32u mask)
|
|
{
|
|
unsigned len = i->getVL();
|
|
|
|
#if BX_SUPPORT_X86_64
|
|
if (i->as64L()) {
|
|
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
|
|
if (mask & (1<<n)) {
|
|
if (! IsCanonical(get_laddr64(i->seg(), eaddr + 8*n)))
|
|
exception(int_number(i->seg()), 0);
|
|
}
|
|
}
|
|
}
|
|
#endif
|
|
|
|
#if BX_SUPPORT_ALIGNMENT_CHECK
|
|
unsigned save_alignment_check_mask = BX_CPU_THIS_PTR alignment_check_mask;
|
|
BX_CPU_THIS_PTR alignment_check_mask = 0;
|
|
#endif
|
|
|
|
// see if you can successfully write all the elements first
|
|
for (int n=QWORD_ELEMENTS(len)-1; n >= 0; n--) {
|
|
if (mask & (1<<n))
|
|
read_RMW_virtual_qword(i->seg(), eaddr + 8*n);
|
|
}
|
|
|
|
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
|
|
if (mask & (1<<n))
|
|
write_virtual_qword(i->seg(), eaddr + 8*n, op->vmm64u(n));
|
|
}
|
|
|
|
#if BX_SUPPORT_ALIGNMENT_CHECK
|
|
BX_CPU_THIS_PTR alignment_check_mask = save_alignment_check_mask;
|
|
#endif
|
|
}
|
|
#endif // BX_SUPPORT_AVX
|
|
|
|
#if BX_SUPPORT_EVEX
|
|
|
|
#include "simd_int.h"
|
|
|
|
void BX_CPU_C::avx512_write_regd_masked(bxInstruction_c *i, const BxPackedAvxRegister *op, unsigned len, Bit32u opmask)
|
|
{
|
|
if (i->isZeroMasking()) {
|
|
for (unsigned n=0; n < len; n++, opmask >>= 4)
|
|
xmm_zero_blendps(&BX_READ_AVX_REG_LANE(i->dst(), n), &op->vmm128(n), opmask);
|
|
}
|
|
else {
|
|
for (unsigned n=0; n < len; n++, opmask >>= 4)
|
|
xmm_blendps(&BX_READ_AVX_REG_LANE(i->dst(), n), &op->vmm128(n), opmask);
|
|
}
|
|
|
|
BX_CLEAR_AVX_REGZ(i->dst(), len);
|
|
}
|
|
|
|
void BX_CPU_C::avx512_write_regq_masked(bxInstruction_c *i, const BxPackedAvxRegister *op, unsigned len, Bit32u opmask)
|
|
{
|
|
if (i->isZeroMasking()) {
|
|
for (unsigned n=0; n < len; n++, opmask >>= 2)
|
|
xmm_zero_blendpd(&BX_READ_AVX_REG_LANE(i->dst(), n), &op->vmm128(n), opmask);
|
|
}
|
|
else {
|
|
for (unsigned n=0; n < len; n++, opmask >>= 2)
|
|
xmm_blendpd(&BX_READ_AVX_REG_LANE(i->dst(), n), &op->vmm128(n), opmask);
|
|
}
|
|
|
|
BX_CLEAR_AVX_REGZ(i->dst(), len);
|
|
}
|
|
|
|
//////////////////////////
|
|
// masked register move //
|
|
//////////////////////////
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVAPS_MASK_VpsWpsR(bxInstruction_c *i)
|
|
{
|
|
BxPackedAvxRegister op = BX_READ_AVX_REG(i->src());
|
|
avx512_write_regd_masked(i, &op, i->getVL(), BX_READ_16BIT_OPMASK(i->opmask()));
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVAPD_MASK_VpdWpdR(bxInstruction_c *i)
|
|
{
|
|
BxPackedAvxRegister op = BX_READ_AVX_REG(i->src());
|
|
avx512_write_regq_masked(i, &op, i->getVL(), BX_READ_8BIT_OPMASK(i->opmask()));
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
/////////////////////////////////
|
|
// masked load/store - aligned //
|
|
/////////////////////////////////
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVAPS_MASK_VpsWpsM(bxInstruction_c *i)
|
|
{
|
|
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
|
|
bx_address laddr = get_laddr(i->seg(), eaddr);
|
|
|
|
unsigned len = i->getVL(), len_in_bytes = 16 * len;
|
|
if (laddr & (len_in_bytes-1)) {
|
|
BX_ERROR(("AVX masked read len=%d: #GP misaligned access", len_in_bytes));
|
|
exception(BX_GP_EXCEPTION, 0);
|
|
}
|
|
|
|
BxPackedAvxRegister reg;
|
|
unsigned mask = BX_READ_16BIT_OPMASK(i->opmask());
|
|
avx_masked_load32(i, eaddr, ®, mask);
|
|
|
|
if (i->isZeroMasking()) {
|
|
BX_WRITE_AVX_REGZ(i->dst(), reg, len);
|
|
}
|
|
else {
|
|
for (unsigned n=0; n < len; n++, mask >>= 4)
|
|
xmm_blendps(&BX_READ_AVX_REG_LANE(i->dst(), n), ®.vmm128(n), mask);
|
|
|
|
BX_CLEAR_AVX_REGZ(i->dst(), len);
|
|
}
|
|
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVAPD_MASK_VpdWpdM(bxInstruction_c *i)
|
|
{
|
|
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
|
|
bx_address laddr = get_laddr(i->seg(), eaddr);
|
|
|
|
unsigned len = i->getVL(), len_in_bytes = 16 * len;
|
|
if (laddr & (len_in_bytes-1)) {
|
|
BX_ERROR(("AVX masked read len=%d: #GP misaligned access", len_in_bytes));
|
|
exception(BX_GP_EXCEPTION, 0);
|
|
}
|
|
|
|
BxPackedAvxRegister reg;
|
|
unsigned mask = BX_READ_8BIT_OPMASK(i->opmask());
|
|
avx_masked_load64(i, eaddr, ®, mask);
|
|
|
|
if (i->isZeroMasking()) {
|
|
BX_WRITE_AVX_REGZ(i->dst(), reg, len);
|
|
}
|
|
else {
|
|
for (unsigned n=0; n < len; n++, mask >>= 2)
|
|
xmm_blendpd(&BX_READ_AVX_REG_LANE(i->dst(), n), ®.vmm128(n), mask);
|
|
|
|
BX_CLEAR_AVX_REGZ(i->dst(), len);
|
|
}
|
|
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVAPS_MASK_WpsVpsM(bxInstruction_c *i)
|
|
{
|
|
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
|
|
bx_address laddr = get_laddr(i->seg(), eaddr);
|
|
|
|
unsigned len_in_bytes = 16 * i->getVL();
|
|
if (laddr & (len_in_bytes-1)) {
|
|
BX_ERROR(("AVX masked write len=%d: #GP misaligned access", len_in_bytes));
|
|
exception(BX_GP_EXCEPTION, 0);
|
|
}
|
|
|
|
avx_masked_store32(i, eaddr, &BX_READ_AVX_REG(i->src()), BX_READ_16BIT_OPMASK(i->opmask()));
|
|
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVAPD_MASK_WpdVpdM(bxInstruction_c *i)
|
|
{
|
|
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
|
|
bx_address laddr = get_laddr(i->seg(), eaddr);
|
|
|
|
unsigned len_in_bytes = 16 * i->getVL();
|
|
if (laddr & (len_in_bytes-1)) {
|
|
BX_ERROR(("AVX masked write len=%d: #GP misaligned access", len_in_bytes));
|
|
exception(BX_GP_EXCEPTION, 0);
|
|
}
|
|
|
|
avx_masked_store64(i, eaddr, &BX_READ_AVX_REG(i->src()), BX_READ_8BIT_OPMASK(i->opmask()));
|
|
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
//////////////////////////////
|
|
// masked packed load/store //
|
|
//////////////////////////////
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVUPS_MASK_VpsWpsM(bxInstruction_c *i)
|
|
{
|
|
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
|
|
unsigned len = i->getVL();
|
|
|
|
BxPackedAvxRegister reg;
|
|
unsigned mask = BX_READ_16BIT_OPMASK(i->opmask());
|
|
avx_masked_load32(i, eaddr, ®, mask);
|
|
|
|
if (i->isZeroMasking()) {
|
|
BX_WRITE_AVX_REGZ(i->dst(), reg, len);
|
|
}
|
|
else {
|
|
for (unsigned n=0; n < len; n++, mask >>= 4)
|
|
xmm_blendps(&BX_READ_AVX_REG_LANE(i->dst(), n), ®.vmm128(n), mask);
|
|
|
|
BX_CLEAR_AVX_REGZ(i->dst(), len);
|
|
}
|
|
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVUPD_MASK_VpdWpdM(bxInstruction_c *i)
|
|
{
|
|
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
|
|
unsigned len = i->getVL();
|
|
|
|
BxPackedAvxRegister reg;
|
|
unsigned mask = BX_READ_8BIT_OPMASK(i->opmask());
|
|
avx_masked_load64(i, eaddr, ®, mask);
|
|
|
|
if (i->isZeroMasking()) {
|
|
BX_WRITE_AVX_REGZ(i->dst(), reg, len);
|
|
}
|
|
else {
|
|
for (unsigned n=0; n < len; n++, mask >>= 2)
|
|
xmm_blendpd(&BX_READ_AVX_REG_LANE(i->dst(), n), ®.vmm128(n), mask);
|
|
|
|
BX_CLEAR_AVX_REGZ(i->dst(), len);
|
|
}
|
|
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVUPS_MASK_WpsVpsM(bxInstruction_c *i)
|
|
{
|
|
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
|
|
avx_masked_store32(i, eaddr, &BX_READ_AVX_REG(i->src()), BX_READ_16BIT_OPMASK(i->opmask()));
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVUPD_MASK_WpdVpdM(bxInstruction_c *i)
|
|
{
|
|
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
|
|
avx_masked_store64(i, eaddr, &BX_READ_AVX_REG(i->src()), BX_READ_8BIT_OPMASK(i->opmask()));
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
//////////////////////////////
|
|
// masked scalar load/store //
|
|
//////////////////////////////
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVSD_MASK_VsdWsdM(bxInstruction_c *i)
|
|
{
|
|
BxPackedXmmRegister op;
|
|
|
|
op.xmm64u(1) = 0;
|
|
|
|
if (BX_SCALAR_ELEMENT_MASK(i->opmask())) {
|
|
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
|
|
op.xmm64u(0) = read_virtual_qword(i->seg(), eaddr);
|
|
}
|
|
else {
|
|
if (! i->isZeroMasking()) {
|
|
op.xmm64u(0) = BX_READ_XMM_REG_LO_QWORD(i->dst());
|
|
}
|
|
else {
|
|
op.xmm64u(0) = 0;
|
|
}
|
|
}
|
|
|
|
BX_WRITE_XMM_REGZ(i->dst(), op, i->getVL());
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVSS_MASK_VssWssM(bxInstruction_c *i)
|
|
{
|
|
BxPackedXmmRegister op;
|
|
|
|
op.xmm64u(1) = 0;
|
|
|
|
if (BX_SCALAR_ELEMENT_MASK(i->opmask())) {
|
|
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
|
|
op.xmm64u(0) = (Bit64u) read_virtual_dword(i->seg(), eaddr);
|
|
}
|
|
else {
|
|
if (! i->isZeroMasking()) {
|
|
op.xmm64u(0) = (Bit64u) BX_READ_XMM_REG_LO_DWORD(i->dst());
|
|
}
|
|
else {
|
|
op.xmm64u(0) = 0;
|
|
}
|
|
}
|
|
|
|
BX_WRITE_XMM_REGZ(i->dst(), op, i->getVL());
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVSD_MASK_WsdVsdM(bxInstruction_c *i)
|
|
{
|
|
if (BX_SCALAR_ELEMENT_MASK(i->opmask())) {
|
|
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
|
|
write_virtual_qword(i->seg(), eaddr, BX_READ_XMM_REG_LO_QWORD(i->src()));
|
|
}
|
|
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVSS_MASK_WssVssM(bxInstruction_c *i)
|
|
{
|
|
if (BX_SCALAR_ELEMENT_MASK(i->opmask())) {
|
|
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
|
|
write_virtual_dword(i->seg(), eaddr, BX_READ_XMM_REG_LO_DWORD(i->src()));
|
|
}
|
|
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVSD_MASK_VsdHpdWsdR(bxInstruction_c *i)
|
|
{
|
|
BxPackedXmmRegister op;
|
|
|
|
if (BX_SCALAR_ELEMENT_MASK(i->opmask())) {
|
|
op.xmm64u(0) = BX_READ_XMM_REG_LO_QWORD(i->src2());
|
|
}
|
|
else {
|
|
if (! i->isZeroMasking()) {
|
|
op.xmm64u(0) = BX_READ_XMM_REG_LO_QWORD(i->dst());
|
|
}
|
|
else {
|
|
op.xmm64u(0) = 0;
|
|
}
|
|
}
|
|
op.xmm64u(1) = BX_READ_XMM_REG_HI_QWORD(i->src1());
|
|
|
|
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op);
|
|
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVSS_MASK_VssHpsWssR(bxInstruction_c *i)
|
|
{
|
|
BxPackedXmmRegister op = BX_READ_XMM_REG(i->src1());
|
|
|
|
if (BX_SCALAR_ELEMENT_MASK(i->opmask())) {
|
|
op.xmm32u(0) = BX_READ_XMM_REG_LO_DWORD(i->src2());
|
|
}
|
|
else {
|
|
if (! i->isZeroMasking()) {
|
|
op.xmm32u(0) = BX_READ_XMM_REG_LO_DWORD(i->dst());
|
|
}
|
|
else {
|
|
op.xmm32u(0) = 0;
|
|
}
|
|
}
|
|
|
|
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op);
|
|
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
////////////////////////////////////
|
|
// masked store with down convert //
|
|
////////////////////////////////////
|
|
|
|
// quad-word to byte
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVQB_MASK_WdqVdqM(bxInstruction_c *i)
|
|
{
|
|
BxPackedAvxRegister src = BX_READ_AVX_REG(i->src()), dst;
|
|
unsigned len = i->getVL();
|
|
|
|
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
|
|
dst.vmmubyte(n) = (Bit8u) src.vmm64u(n);
|
|
}
|
|
|
|
Bit32u opmask = i->opmask() ? BX_READ_8BIT_OPMASK(i->opmask()) : (Bit32u) -1;
|
|
opmask &= (1 << QWORD_ELEMENTS(len)) - 1;
|
|
|
|
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
|
|
avx_masked_store8(i, eaddr, &dst, opmask);
|
|
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVQB_WdqVdqR(bxInstruction_c *i)
|
|
{
|
|
BxPackedXmmRegister dst = BX_READ_XMM_REG(i->dst());
|
|
BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
|
|
unsigned len = i->getVL();
|
|
|
|
dst.xmm64u(1) = 0;
|
|
|
|
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
|
|
dst.xmmubyte(n) = (Bit8u) src.vmm64u(n);
|
|
}
|
|
|
|
if (len == BX_VL128) dst.xmm16u(1) = 0;
|
|
if (len != BX_VL512) dst.xmm32u(1) = 0;
|
|
|
|
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), dst);
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVQB_MASK_WdqVdqR(bxInstruction_c *i)
|
|
{
|
|
BxPackedXmmRegister dst = BX_READ_XMM_REG(i->dst());
|
|
BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
|
|
unsigned len = i->getVL();
|
|
|
|
unsigned mask = BX_READ_8BIT_OPMASK(i->opmask());
|
|
|
|
dst.xmm64u(1) = 0;
|
|
|
|
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++, mask >>= 1) {
|
|
if (mask & 0x1)
|
|
dst.xmmubyte(n) = (Bit8u) src.vmm64u(n);
|
|
else
|
|
if (i->isZeroMasking()) dst.xmmubyte(n) = 0;
|
|
}
|
|
|
|
if (len == BX_VL128) dst.xmm16u(1) = 0;
|
|
if (len != BX_VL512) dst.xmm32u(1) = 0;
|
|
|
|
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), dst);
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVSQB_MASK_WdqVdqM(bxInstruction_c *i)
|
|
{
|
|
BxPackedAvxRegister src = BX_READ_AVX_REG(i->src()), dst;
|
|
unsigned len = i->getVL();
|
|
|
|
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
|
|
dst.vmmsbyte(n) = SaturateQwordSToByteS(src.vmm64s(n));
|
|
}
|
|
|
|
Bit32u opmask = i->opmask() ? BX_READ_8BIT_OPMASK(i->opmask()) : (Bit32u) -1;
|
|
opmask &= (1 << QWORD_ELEMENTS(len)) - 1;
|
|
|
|
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
|
|
avx_masked_store8(i, eaddr, &dst, opmask);
|
|
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVSQB_WdqVdqR(bxInstruction_c *i)
|
|
{
|
|
BxPackedXmmRegister dst = BX_READ_XMM_REG(i->dst());
|
|
BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
|
|
unsigned len = i->getVL();
|
|
|
|
dst.xmm64u(1) = 0;
|
|
|
|
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
|
|
dst.xmmsbyte(n) = SaturateQwordSToByteS(src.vmm64s(n));
|
|
}
|
|
|
|
if (len == BX_VL128) dst.xmm16u(1) = 0;
|
|
if (len != BX_VL512) dst.xmm32u(1) = 0;
|
|
|
|
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), dst);
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVSQB_MASK_WdqVdqR(bxInstruction_c *i)
|
|
{
|
|
BxPackedXmmRegister dst = BX_READ_XMM_REG(i->dst());
|
|
BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
|
|
unsigned len = i->getVL();
|
|
|
|
unsigned mask = BX_READ_8BIT_OPMASK(i->opmask());
|
|
|
|
dst.xmm64u(1) = 0;
|
|
|
|
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++, mask >>= 1) {
|
|
if (mask & 0x1)
|
|
dst.xmmsbyte(n) = SaturateQwordSToByteS(src.vmm64s(n));
|
|
else
|
|
if (i->isZeroMasking()) dst.xmmubyte(n) = 0;
|
|
}
|
|
|
|
if (len == BX_VL128) dst.xmm16u(1) = 0;
|
|
if (len != BX_VL512) dst.xmm32u(1) = 0;
|
|
|
|
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), dst);
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVUSQB_MASK_WdqVdqM(bxInstruction_c *i)
|
|
{
|
|
BxPackedAvxRegister src = BX_READ_AVX_REG(i->src()), dst;
|
|
unsigned len = i->getVL();
|
|
|
|
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
|
|
dst.vmmubyte(n) = SaturateQwordUToByteU(src.vmm64u(n));
|
|
}
|
|
|
|
Bit32u opmask = i->opmask() ? BX_READ_8BIT_OPMASK(i->opmask()) : (Bit32u) -1;
|
|
opmask &= (1 << QWORD_ELEMENTS(len)) - 1;
|
|
|
|
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
|
|
avx_masked_store8(i, eaddr, &dst, opmask);
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVUSQB_WdqVdqR(bxInstruction_c *i)
|
|
{
|
|
BxPackedXmmRegister dst = BX_READ_XMM_REG(i->dst());
|
|
BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
|
|
unsigned len = i->getVL();
|
|
|
|
dst.xmm64u(1) = 0;
|
|
|
|
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
|
|
dst.xmmubyte(n) = SaturateQwordUToByteU(src.vmm64u(n));
|
|
}
|
|
|
|
if (len == BX_VL128) dst.xmm16u(1) = 0;
|
|
if (len != BX_VL512) dst.xmm32u(1) = 0;
|
|
|
|
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), dst);
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVUSQB_MASK_WdqVdqR(bxInstruction_c *i)
|
|
{
|
|
BxPackedXmmRegister dst = BX_READ_XMM_REG(i->dst());
|
|
BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
|
|
unsigned len = i->getVL();
|
|
|
|
unsigned mask = BX_READ_8BIT_OPMASK(i->opmask());
|
|
|
|
dst.xmm64u(1) = 0;
|
|
|
|
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++, mask >>= 1) {
|
|
if (mask & 0x1)
|
|
dst.xmmubyte(n) = SaturateQwordUToByteU(src.vmm64u(n));
|
|
else
|
|
if (i->isZeroMasking()) dst.xmmubyte(n) = 0;
|
|
}
|
|
|
|
if (len == BX_VL128) dst.xmm16u(1) = 0;
|
|
if (len != BX_VL512) dst.xmm32u(1) = 0;
|
|
|
|
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), dst);
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
// double-word to byte
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVDB_MASK_WdqVdqM(bxInstruction_c *i)
|
|
{
|
|
BxPackedAvxRegister src = BX_READ_AVX_REG(i->src()), dst;
|
|
unsigned len = i->getVL();
|
|
|
|
for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
|
|
dst.vmmubyte(n) = (Bit8u) src.vmm32u(n);
|
|
}
|
|
|
|
Bit32u opmask = i->opmask() ? BX_READ_16BIT_OPMASK(i->opmask()) : (Bit32u) -1;
|
|
opmask &= (1 << DWORD_ELEMENTS(len)) - 1;
|
|
|
|
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
|
|
avx_masked_store8(i, eaddr, &dst, opmask);
|
|
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVDB_WdqVdqR(bxInstruction_c *i)
|
|
{
|
|
BxPackedXmmRegister dst = BX_READ_XMM_REG(i->dst());
|
|
BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
|
|
unsigned len = i->getVL();
|
|
|
|
for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
|
|
dst.xmmubyte(n) = (Bit8u) src.vmm32u(n);
|
|
}
|
|
|
|
if (len == BX_VL128) dst.xmm32u(1) = 0;
|
|
if (len != BX_VL512) dst.xmm64u(1) = 0;
|
|
|
|
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), dst);
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVDB_MASK_WdqVdqR(bxInstruction_c *i)
|
|
{
|
|
BxPackedXmmRegister dst = BX_READ_XMM_REG(i->dst());
|
|
BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
|
|
unsigned len = i->getVL();
|
|
|
|
unsigned mask = BX_READ_16BIT_OPMASK(i->opmask());
|
|
|
|
for (unsigned n=0; n < DWORD_ELEMENTS(len); n++, mask >>= 1) {
|
|
if (mask & 0x1)
|
|
dst.xmmubyte(n) = (Bit8u) src.vmm32u(n);
|
|
else
|
|
if (i->isZeroMasking()) dst.xmmubyte(n) = 0;
|
|
}
|
|
|
|
if (len == BX_VL128) dst.xmm32u(1) = 0;
|
|
if (len != BX_VL512) dst.xmm64u(1) = 0;
|
|
|
|
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), dst);
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVSDB_MASK_WdqVdqM(bxInstruction_c *i)
|
|
{
|
|
BxPackedAvxRegister src = BX_READ_AVX_REG(i->src()), dst;
|
|
unsigned len = i->getVL();
|
|
|
|
for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
|
|
dst.vmmsbyte(n) = SaturateDwordSToByteS(src.vmm32s(n));
|
|
}
|
|
|
|
Bit32u opmask = i->opmask() ? BX_READ_16BIT_OPMASK(i->opmask()) : (Bit32u) -1;
|
|
opmask &= (1 << DWORD_ELEMENTS(len)) - 1;
|
|
|
|
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
|
|
avx_masked_store8(i, eaddr, &dst, opmask);
|
|
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVSDB_WdqVdqR(bxInstruction_c *i)
|
|
{
|
|
BxPackedXmmRegister dst = BX_READ_XMM_REG(i->dst());
|
|
BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
|
|
unsigned len = i->getVL();
|
|
|
|
for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
|
|
dst.xmmsbyte(n) = SaturateDwordSToByteS(src.vmm32s(n));
|
|
}
|
|
|
|
if (len == BX_VL128) dst.xmm32u(1) = 0;
|
|
if (len != BX_VL512) dst.xmm64u(1) = 0;
|
|
|
|
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), dst);
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVSDB_MASK_WdqVdqR(bxInstruction_c *i)
|
|
{
|
|
BxPackedXmmRegister dst = BX_READ_XMM_REG(i->dst());
|
|
BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
|
|
unsigned len = i->getVL();
|
|
|
|
unsigned mask = BX_READ_16BIT_OPMASK(i->opmask());
|
|
|
|
for (unsigned n=0; n < DWORD_ELEMENTS(len); n++, mask >>= 1) {
|
|
if (mask & 0x1)
|
|
dst.xmmsbyte(n) = SaturateDwordSToByteS(src.vmm32s(n));
|
|
else
|
|
if (i->isZeroMasking()) dst.xmmubyte(n) = 0;
|
|
}
|
|
|
|
if (len == BX_VL128) dst.xmm32u(1) = 0;
|
|
if (len != BX_VL512) dst.xmm64u(1) = 0;
|
|
|
|
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), dst);
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVUSDB_MASK_WdqVdqM(bxInstruction_c *i)
|
|
{
|
|
BxPackedAvxRegister src = BX_READ_AVX_REG(i->src()), dst;
|
|
unsigned len = i->getVL();
|
|
|
|
for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
|
|
dst.vmmubyte(n) = SaturateDwordUToByteU(src.vmm32u(n));
|
|
}
|
|
|
|
Bit32u opmask = i->opmask() ? BX_READ_16BIT_OPMASK(i->opmask()) : (Bit32u) -1;
|
|
opmask &= (1 << DWORD_ELEMENTS(len)) - 1;
|
|
|
|
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
|
|
avx_masked_store8(i, eaddr, &dst, opmask);
|
|
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVUSDB_WdqVdqR(bxInstruction_c *i)
|
|
{
|
|
BxPackedXmmRegister dst = BX_READ_XMM_REG(i->dst());
|
|
BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
|
|
unsigned len = i->getVL();
|
|
|
|
for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
|
|
dst.xmmubyte(n) = SaturateDwordUToByteU(src.vmm32u(n));
|
|
}
|
|
|
|
if (len == BX_VL128) dst.xmm32u(1) = 0;
|
|
if (len != BX_VL512) dst.xmm64u(1) = 0;
|
|
|
|
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), dst);
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVUSDB_MASK_WdqVdqR(bxInstruction_c *i)
|
|
{
|
|
BxPackedXmmRegister dst = BX_READ_XMM_REG(i->dst());
|
|
BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
|
|
unsigned len = i->getVL();
|
|
|
|
unsigned mask = BX_READ_16BIT_OPMASK(i->opmask());
|
|
|
|
for (unsigned n=0; n < DWORD_ELEMENTS(len); n++, mask >>= 1) {
|
|
if (mask & 0x1)
|
|
dst.xmmubyte(n) = SaturateDwordUToByteU(src.vmm32u(n));
|
|
else
|
|
if (i->isZeroMasking()) dst.xmmubyte(n) = 0;
|
|
}
|
|
|
|
if (len == BX_VL128) dst.xmm32u(1) = 0;
|
|
if (len != BX_VL512) dst.xmm64u(1) = 0;
|
|
|
|
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), dst);
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
// double-word to word
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVDW_MASK_WdqVdqM(bxInstruction_c *i)
|
|
{
|
|
BxPackedAvxRegister src = BX_READ_AVX_REG(i->src()), dst;
|
|
unsigned len = i->getVL();
|
|
|
|
for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
|
|
dst.vmm16u(n) = (Bit16u) src.vmm32u(n);
|
|
}
|
|
|
|
Bit32u opmask = i->opmask() ? BX_READ_16BIT_OPMASK(i->opmask()) : (Bit32u) -1;
|
|
opmask &= (1 << DWORD_ELEMENTS(len)) - 1;
|
|
|
|
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
|
|
avx_masked_store16(i, eaddr, &dst, opmask);
|
|
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVDW_WdqVdqR(bxInstruction_c *i)
|
|
{
|
|
BxPackedYmmRegister dst = BX_READ_YMM_REG(i->dst());
|
|
BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
|
|
unsigned len = i->getVL();
|
|
|
|
for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
|
|
dst.ymm16u(n) = (Bit16u) src.vmm32u(n);
|
|
}
|
|
|
|
if (len == BX_VL128) dst.ymm64u(1) = 0;
|
|
if (len != BX_VL512) dst.ymm128(1).clear();
|
|
|
|
BX_WRITE_YMM_REGZ(i->dst(), dst);
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVDW_MASK_WdqVdqR(bxInstruction_c *i)
|
|
{
|
|
BxPackedYmmRegister dst = BX_READ_YMM_REG(i->dst());
|
|
BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
|
|
unsigned len = i->getVL();
|
|
|
|
unsigned mask = BX_READ_16BIT_OPMASK(i->opmask());
|
|
|
|
for (unsigned n=0; n < DWORD_ELEMENTS(len); n++, mask >>= 1) {
|
|
if (mask & 0x1)
|
|
dst.ymm16u(n) = (Bit16u) src.vmm32u(n);
|
|
else
|
|
if (i->isZeroMasking()) dst.ymm16u(n) = 0;
|
|
}
|
|
|
|
if (len == BX_VL128) dst.ymm64u(1) = 0;
|
|
if (len != BX_VL512) dst.ymm128(1).clear();
|
|
|
|
BX_WRITE_YMM_REGZ(i->dst(), dst);
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVSDW_MASK_WdqVdqM(bxInstruction_c *i)
|
|
{
|
|
BxPackedAvxRegister src = BX_READ_AVX_REG(i->src()), dst;
|
|
unsigned len = i->getVL();
|
|
|
|
for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
|
|
dst.vmm16s(n) = SaturateDwordSToWordS(src.vmm32s(n));
|
|
}
|
|
|
|
Bit32u opmask = i->opmask() ? BX_READ_16BIT_OPMASK(i->opmask()) : (Bit32u) -1;
|
|
opmask &= (1 << DWORD_ELEMENTS(len)) - 1;
|
|
|
|
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
|
|
avx_masked_store16(i, eaddr, &dst, opmask);
|
|
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVSDW_WdqVdqR(bxInstruction_c *i)
|
|
{
|
|
BxPackedYmmRegister dst = BX_READ_YMM_REG(i->dst());
|
|
BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
|
|
unsigned len = i->getVL();
|
|
|
|
for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
|
|
dst.ymm16s(n) = SaturateDwordSToWordS(src.vmm32s(n));
|
|
}
|
|
|
|
if (len == BX_VL128) dst.ymm64u(1) = 0;
|
|
if (len != BX_VL512) dst.ymm128(1).clear();
|
|
|
|
BX_WRITE_YMM_REGZ(i->dst(), dst);
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVSDW_MASK_WdqVdqR(bxInstruction_c *i)
|
|
{
|
|
BxPackedYmmRegister dst = BX_READ_YMM_REG(i->dst());
|
|
BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
|
|
unsigned len = i->getVL();
|
|
|
|
unsigned mask = BX_READ_16BIT_OPMASK(i->opmask());
|
|
|
|
for (unsigned n=0; n < DWORD_ELEMENTS(len); n++, mask >>= 1) {
|
|
if (mask & 0x1)
|
|
dst.ymm16s(n) = SaturateDwordSToWordS(src.vmm32s(n));
|
|
else
|
|
if (i->isZeroMasking()) dst.ymm16u(n) = 0;
|
|
}
|
|
|
|
if (len == BX_VL128) dst.ymm64u(1) = 0;
|
|
if (len != BX_VL512) dst.ymm128(1).clear();
|
|
|
|
BX_WRITE_YMM_REGZ(i->dst(), dst);
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVUSDW_MASK_WdqVdqM(bxInstruction_c *i)
|
|
{
|
|
BxPackedAvxRegister src = BX_READ_AVX_REG(i->src()), dst;
|
|
unsigned len = i->getVL();
|
|
|
|
for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
|
|
dst.vmm16u(n) = SaturateDwordUToWordU(src.vmm32u(n));
|
|
}
|
|
|
|
Bit32u opmask = i->opmask() ? BX_READ_16BIT_OPMASK(i->opmask()) : (Bit32u) -1;
|
|
opmask &= (1 << DWORD_ELEMENTS(len)) - 1;
|
|
|
|
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
|
|
avx_masked_store16(i, eaddr, &dst, opmask);
|
|
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVUSDW_WdqVdqR(bxInstruction_c *i)
|
|
{
|
|
BxPackedYmmRegister dst = BX_READ_YMM_REG(i->dst());
|
|
BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
|
|
unsigned len = i->getVL();
|
|
|
|
for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
|
|
dst.ymm16u(n) = SaturateDwordUToWordU(src.vmm32u(n));
|
|
}
|
|
|
|
if (len == BX_VL128) dst.ymm64u(1) = 0;
|
|
if (len != BX_VL512) dst.ymm128(1).clear();
|
|
|
|
BX_WRITE_YMM_REGZ(i->dst(), dst);
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVUSDW_MASK_WdqVdqR(bxInstruction_c *i)
|
|
{
|
|
BxPackedYmmRegister dst = BX_READ_YMM_REG(i->dst());
|
|
BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
|
|
unsigned len = i->getVL();
|
|
|
|
unsigned mask = BX_READ_16BIT_OPMASK(i->opmask());
|
|
|
|
for (unsigned n=0; n < DWORD_ELEMENTS(len); n++, mask >>= 1) {
|
|
if (mask & 0x1)
|
|
dst.ymm16u(n) = SaturateDwordUToWordU(src.vmm32u(n));
|
|
else
|
|
if (i->isZeroMasking()) dst.ymm16u(n) = 0;
|
|
}
|
|
|
|
if (len == BX_VL128) dst.ymm64u(1) = 0;
|
|
if (len != BX_VL512) dst.ymm128(1).clear();
|
|
|
|
BX_WRITE_YMM_REGZ(i->dst(), dst);
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
// quad-word to word
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVQW_MASK_WdqVdqM(bxInstruction_c *i)
|
|
{
|
|
BxPackedAvxRegister src = BX_READ_AVX_REG(i->src()), dst;
|
|
unsigned len = i->getVL();
|
|
|
|
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
|
|
dst.vmm16u(n) = (Bit16u) src.vmm64u(n);
|
|
}
|
|
|
|
Bit32u opmask = i->opmask() ? BX_READ_8BIT_OPMASK(i->opmask()) : (Bit32u) -1;
|
|
opmask &= (1 << QWORD_ELEMENTS(len)) - 1;
|
|
|
|
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
|
|
avx_masked_store16(i, eaddr, &dst, opmask);
|
|
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVQW_WdqVdqR(bxInstruction_c *i)
|
|
{
|
|
BxPackedXmmRegister dst = BX_READ_XMM_REG(i->dst());
|
|
BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
|
|
unsigned len = i->getVL();
|
|
|
|
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
|
|
dst.xmm16u(n) = (Bit16u) src.vmm64u(n);
|
|
}
|
|
|
|
if (len == BX_VL128) dst.xmm32u(1) = 0;
|
|
if (len != BX_VL512) dst.xmm64u(1) = 0;
|
|
|
|
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), dst);
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVQW_MASK_WdqVdqR(bxInstruction_c *i)
|
|
{
|
|
BxPackedXmmRegister dst = BX_READ_XMM_REG(i->dst());
|
|
BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
|
|
unsigned len = i->getVL();
|
|
|
|
unsigned mask = BX_READ_8BIT_OPMASK(i->opmask());
|
|
|
|
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++, mask >>= 1) {
|
|
if (mask & 0x1)
|
|
dst.xmm16u(n) = (Bit16u) src.vmm64u(n);
|
|
else
|
|
if (i->isZeroMasking()) dst.xmm16u(n) = 0;
|
|
}
|
|
|
|
if (len == BX_VL128) dst.xmm32u(1) = 0;
|
|
if (len != BX_VL512) dst.xmm64u(1) = 0;
|
|
|
|
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), dst);
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVSQW_MASK_WdqVdqM(bxInstruction_c *i)
|
|
{
|
|
BxPackedAvxRegister src = BX_READ_AVX_REG(i->src()), dst;
|
|
unsigned len = i->getVL();
|
|
|
|
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
|
|
dst.vmm16s(n) = SaturateQwordSToWordS(src.vmm64s(n));
|
|
}
|
|
|
|
Bit32u opmask = i->opmask() ? BX_READ_8BIT_OPMASK(i->opmask()) : (Bit32u) -1;
|
|
opmask &= (1 << QWORD_ELEMENTS(len)) - 1;
|
|
|
|
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
|
|
avx_masked_store16(i, eaddr, &dst, opmask);
|
|
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVSQW_WdqVdqR(bxInstruction_c *i)
|
|
{
|
|
BxPackedXmmRegister dst = BX_READ_XMM_REG(i->dst());
|
|
BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
|
|
unsigned len = i->getVL();
|
|
|
|
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
|
|
dst.xmm16s(n) = SaturateQwordSToWordS(src.vmm64s(n));
|
|
}
|
|
|
|
if (len == BX_VL128) dst.xmm32u(1) = 0;
|
|
if (len != BX_VL512) dst.xmm64u(1) = 0;
|
|
|
|
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), dst);
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVSQW_MASK_WdqVdqR(bxInstruction_c *i)
|
|
{
|
|
BxPackedXmmRegister dst = BX_READ_XMM_REG(i->dst());
|
|
BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
|
|
unsigned len = i->getVL();
|
|
|
|
unsigned mask = BX_READ_8BIT_OPMASK(i->opmask());
|
|
|
|
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++, mask >>= 1) {
|
|
if (mask & 0x1)
|
|
dst.xmm16s(n) = SaturateQwordSToWordS(src.vmm64s(n));
|
|
else
|
|
if (i->isZeroMasking()) dst.xmm16u(n) = 0;
|
|
}
|
|
|
|
if (len == BX_VL128) dst.xmm32u(1) = 0;
|
|
if (len != BX_VL512) dst.xmm64u(1) = 0;
|
|
|
|
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), dst);
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVUSQW_MASK_WdqVdqM(bxInstruction_c *i)
|
|
{
|
|
BxPackedAvxRegister src = BX_READ_AVX_REG(i->src()), dst;
|
|
unsigned len = i->getVL();
|
|
|
|
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
|
|
dst.vmm16u(n) = SaturateQwordUToWordU(src.vmm64u(n));
|
|
}
|
|
|
|
Bit32u opmask = i->opmask() ? BX_READ_8BIT_OPMASK(i->opmask()) : (Bit32u) -1;
|
|
opmask &= (1 << QWORD_ELEMENTS(len)) - 1;
|
|
|
|
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
|
|
avx_masked_store16(i, eaddr, &dst, opmask);
|
|
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVUSQW_WdqVdqR(bxInstruction_c *i)
|
|
{
|
|
BxPackedXmmRegister dst = BX_READ_XMM_REG(i->dst());
|
|
BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
|
|
unsigned len = i->getVL();
|
|
|
|
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
|
|
dst.xmm16u(n) = SaturateQwordUToWordU(src.vmm64u(n));
|
|
}
|
|
|
|
if (len == BX_VL128) dst.xmm32u(1) = 0;
|
|
if (len != BX_VL512) dst.xmm64u(1) = 0;
|
|
|
|
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), dst);
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVUSQW_MASK_WdqVdqR(bxInstruction_c *i)
|
|
{
|
|
BxPackedXmmRegister dst = BX_READ_XMM_REG(i->dst());
|
|
BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
|
|
unsigned len = i->getVL();
|
|
|
|
unsigned mask = BX_READ_8BIT_OPMASK(i->opmask());
|
|
|
|
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++, mask >>= 1) {
|
|
if (mask & 0x1)
|
|
dst.xmm16u(n) = SaturateQwordUToWordU(src.vmm64u(n));
|
|
else
|
|
if (i->isZeroMasking()) dst.xmm16u(n) = 0;
|
|
}
|
|
|
|
if (len == BX_VL128) dst.xmm32u(1) = 0;
|
|
if (len != BX_VL512) dst.xmm64u(1) = 0;
|
|
|
|
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), dst);
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
// quad-word to double-word
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVQD_MASK_WdqVdqM(bxInstruction_c *i)
|
|
{
|
|
BxPackedAvxRegister src = BX_READ_AVX_REG(i->src()), dst;
|
|
unsigned len = i->getVL();
|
|
|
|
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
|
|
dst.vmm32u(n) = (Bit32u) src.vmm64u(n);
|
|
}
|
|
|
|
Bit32u opmask = i->opmask() ? BX_READ_8BIT_OPMASK(i->opmask()) : (Bit32u) -1;
|
|
opmask &= (1 << QWORD_ELEMENTS(len)) - 1;
|
|
|
|
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
|
|
avx_masked_store32(i, eaddr, &dst, opmask);
|
|
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVQD_WdqVdqR(bxInstruction_c *i)
|
|
{
|
|
BxPackedYmmRegister dst = BX_READ_YMM_REG(i->dst());
|
|
BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
|
|
unsigned len = i->getVL();
|
|
|
|
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
|
|
dst.ymm32u(n) = (Bit32u) src.vmm64u(n);
|
|
}
|
|
|
|
if (len == BX_VL128) dst.ymm64u(1) = 0;
|
|
if (len != BX_VL512) dst.ymm128(1).clear();
|
|
|
|
BX_WRITE_YMM_REGZ(i->dst(), dst);
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVQD_MASK_WdqVdqR(bxInstruction_c *i)
|
|
{
|
|
BxPackedYmmRegister dst = BX_READ_YMM_REG(i->dst());
|
|
BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
|
|
unsigned len = i->getVL();
|
|
|
|
unsigned mask = BX_READ_8BIT_OPMASK(i->opmask());
|
|
|
|
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++, mask >>= 1) {
|
|
if (mask & 0x1)
|
|
dst.ymm32u(n) = (Bit32u) src.vmm64u(n);
|
|
else
|
|
if (i->isZeroMasking()) dst.ymm32u(n) = 0;
|
|
}
|
|
|
|
if (len == BX_VL128) dst.ymm64u(1) = 0;
|
|
if (len != BX_VL512) dst.ymm128(1).clear();
|
|
|
|
BX_WRITE_YMM_REGZ(i->dst(), dst);
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVSQD_MASK_WdqVdqM(bxInstruction_c *i)
|
|
{
|
|
BxPackedAvxRegister src = BX_READ_AVX_REG(i->src()), dst;
|
|
unsigned len = i->getVL();
|
|
|
|
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
|
|
dst.vmm32s(n) = SaturateQwordSToDwordS(src.vmm64s(n));
|
|
}
|
|
|
|
Bit32u opmask = i->opmask() ? BX_READ_8BIT_OPMASK(i->opmask()) : (Bit32u) -1;
|
|
opmask &= (1 << QWORD_ELEMENTS(len)) - 1;
|
|
|
|
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
|
|
avx_masked_store32(i, eaddr, &dst, opmask);
|
|
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVSQD_WdqVdqR(bxInstruction_c *i)
|
|
{
|
|
BxPackedYmmRegister dst = BX_READ_YMM_REG(i->dst());
|
|
BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
|
|
unsigned len = i->getVL();
|
|
|
|
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
|
|
dst.ymm32s(n) = SaturateQwordSToDwordS(src.vmm64s(n));
|
|
}
|
|
|
|
if (len == BX_VL128) dst.ymm64u(1) = 0;
|
|
if (len != BX_VL512) dst.ymm128(1).clear();
|
|
|
|
BX_WRITE_YMM_REGZ(i->dst(), dst);
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVSQD_MASK_WdqVdqR(bxInstruction_c *i)
|
|
{
|
|
BxPackedYmmRegister dst = BX_READ_YMM_REG(i->dst());
|
|
BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
|
|
unsigned len = i->getVL();
|
|
|
|
unsigned mask = BX_READ_8BIT_OPMASK(i->opmask());
|
|
|
|
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++, mask >>= 1) {
|
|
if (mask & 0x1)
|
|
dst.ymm32s(n) = SaturateQwordSToDwordS(src.vmm64s(n));
|
|
else
|
|
if (i->isZeroMasking()) dst.ymm32u(n) = 0;
|
|
}
|
|
|
|
if (len == BX_VL128) dst.ymm64u(1) = 0;
|
|
if (len != BX_VL512) dst.ymm128(1).clear();
|
|
|
|
BX_WRITE_YMM_REGZ(i->dst(), dst);
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVUSQD_MASK_WdqVdqM(bxInstruction_c *i)
|
|
{
|
|
BxPackedAvxRegister src = BX_READ_AVX_REG(i->src()), dst;
|
|
unsigned len = i->getVL();
|
|
|
|
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
|
|
dst.vmm32u(n) = SaturateQwordUToDwordU(src.vmm64u(n));
|
|
}
|
|
|
|
Bit32u opmask = i->opmask() ? BX_READ_8BIT_OPMASK(i->opmask()) : (Bit32u) -1;
|
|
opmask &= (1 << QWORD_ELEMENTS(len)) - 1;
|
|
|
|
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
|
|
avx_masked_store32(i, eaddr, &dst, opmask);
|
|
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVUSQD_WdqVdqR(bxInstruction_c *i)
|
|
{
|
|
BxPackedYmmRegister dst = BX_READ_YMM_REG(i->dst());
|
|
BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
|
|
unsigned len = i->getVL();
|
|
|
|
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
|
|
dst.ymm32u(n) = SaturateQwordUToDwordU(src.vmm64u(n));
|
|
}
|
|
|
|
if (len == BX_VL128) dst.ymm64u(1) = 0;
|
|
if (len != BX_VL512) dst.ymm128(1).clear();
|
|
|
|
BX_WRITE_YMM_REGZ(i->dst(), dst);
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVUSQD_MASK_WdqVdqR(bxInstruction_c *i)
|
|
{
|
|
BxPackedYmmRegister dst = BX_READ_YMM_REG(i->dst());
|
|
BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
|
|
unsigned len = i->getVL();
|
|
|
|
unsigned mask = BX_READ_8BIT_OPMASK(i->opmask());
|
|
|
|
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++, mask >>= 1) {
|
|
if (mask & 0x1)
|
|
dst.ymm32u(n) = SaturateQwordUToDwordU(src.vmm64u(n));
|
|
else
|
|
if (i->isZeroMasking()) dst.ymm32u(n) = 0;
|
|
}
|
|
|
|
if (len == BX_VL128) dst.ymm64u(1) = 0;
|
|
if (len != BX_VL512) dst.ymm128(1).clear();
|
|
|
|
BX_WRITE_YMM_REGZ(i->dst(), dst);
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
//////////////////////////
|
|
// load with up convert //
|
|
//////////////////////////
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVSXBD_MASK_VdqWdqR(bxInstruction_c *i)
|
|
{
|
|
BxPackedXmmRegister op = BX_READ_XMM_REG(i->src());
|
|
BxPackedAvxRegister result;
|
|
unsigned len = i->getVL();
|
|
|
|
for (unsigned n=0; n < DWORD_ELEMENTS(len); n++)
|
|
result.vmm32s(n) = (Bit32s) op.xmmsbyte(n);
|
|
|
|
avx512_write_regd_masked(i, &result, len, BX_READ_16BIT_OPMASK(i->opmask()));
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVSXBQ_MASK_VdqWdqR(bxInstruction_c *i)
|
|
{
|
|
BxPackedXmmRegister op = BX_READ_XMM_REG(i->src());
|
|
BxPackedAvxRegister result;
|
|
unsigned len = i->getVL();
|
|
|
|
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++)
|
|
result.vmm64s(n) = (Bit64s) op.xmmsbyte(n);
|
|
|
|
avx512_write_regq_masked(i, &result, len, BX_READ_8BIT_OPMASK(i->opmask()));
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVSXWD_MASK_VdqWdqR(bxInstruction_c *i)
|
|
{
|
|
BxPackedYmmRegister op = BX_READ_YMM_REG(i->src());
|
|
BxPackedAvxRegister result;
|
|
unsigned len = i->getVL();
|
|
|
|
for (unsigned n=0; n < DWORD_ELEMENTS(len); n++)
|
|
result.vmm32s(n) = (Bit32s) op.ymm16s(n);
|
|
|
|
avx512_write_regd_masked(i, &result, len, BX_READ_16BIT_OPMASK(i->opmask()));
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVSXWQ_MASK_VdqWdqR(bxInstruction_c *i)
|
|
{
|
|
BxPackedXmmRegister op = BX_READ_XMM_REG(i->src());
|
|
BxPackedAvxRegister result;
|
|
unsigned len = i->getVL();
|
|
|
|
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++)
|
|
result.vmm64s(n) = (Bit64s) op.xmm16s(n);
|
|
|
|
avx512_write_regq_masked(i, &result, len, BX_READ_8BIT_OPMASK(i->opmask()));
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVSXDQ_MASK_VdqWdqR(bxInstruction_c *i)
|
|
{
|
|
BxPackedYmmRegister op = BX_READ_YMM_REG(i->src());
|
|
BxPackedAvxRegister result;
|
|
unsigned len = i->getVL();
|
|
|
|
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++)
|
|
result.vmm64s(n) = (Bit64s) op.ymm32s(n);
|
|
|
|
avx512_write_regq_masked(i, &result, len, BX_READ_8BIT_OPMASK(i->opmask()));
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVZXBD_MASK_VdqWdqR(bxInstruction_c *i)
|
|
{
|
|
BxPackedXmmRegister op = BX_READ_XMM_REG(i->src());
|
|
BxPackedAvxRegister result;
|
|
unsigned len = i->getVL();
|
|
|
|
for (unsigned n=0; n < DWORD_ELEMENTS(len); n++)
|
|
result.vmm32u(n) = (Bit32u) op.xmmubyte(n);
|
|
|
|
avx512_write_regd_masked(i, &result, len, BX_READ_16BIT_OPMASK(i->opmask()));
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVZXBQ_MASK_VdqWdqR(bxInstruction_c *i)
|
|
{
|
|
BxPackedXmmRegister op = BX_READ_XMM_REG(i->src());
|
|
BxPackedAvxRegister result;
|
|
unsigned len = i->getVL();
|
|
|
|
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++)
|
|
result.vmm64u(n) = (Bit64u) op.xmmubyte(n);
|
|
|
|
avx512_write_regq_masked(i, &result, len, BX_READ_8BIT_OPMASK(i->opmask()));
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVZXWD_MASK_VdqWdqR(bxInstruction_c *i)
|
|
{
|
|
BxPackedYmmRegister op = BX_READ_YMM_REG(i->src());
|
|
BxPackedAvxRegister result;
|
|
unsigned len = i->getVL();
|
|
|
|
for (unsigned n=0; n < DWORD_ELEMENTS(len); n++)
|
|
result.vmm32u(n) = (Bit32u) op.ymm16u(n);
|
|
|
|
avx512_write_regd_masked(i, &result, len, BX_READ_16BIT_OPMASK(i->opmask()));
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVZXWQ_MASK_VdqWdqR(bxInstruction_c *i)
|
|
{
|
|
BxPackedXmmRegister op = BX_READ_XMM_REG(i->src());
|
|
BxPackedAvxRegister result;
|
|
unsigned len = i->getVL();
|
|
|
|
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++)
|
|
result.vmm64u(n) = (Bit64u) op.xmm16u(n);
|
|
|
|
avx512_write_regq_masked(i, &result, len, BX_READ_8BIT_OPMASK(i->opmask()));
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVZXDQ_MASK_VdqWdqR(bxInstruction_c *i)
|
|
{
|
|
BxPackedYmmRegister op = BX_READ_YMM_REG(i->src());
|
|
BxPackedAvxRegister result;
|
|
unsigned len = i->getVL();
|
|
|
|
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++)
|
|
result.vmm64u(n) = (Bit64u) op.ymm32u(n);
|
|
|
|
avx512_write_regq_masked(i, &result, len, BX_READ_8BIT_OPMASK(i->opmask()));
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
#endif
|