2013-11-30 00:22:31 +04:00
|
|
|
/////////////////////////////////////////////////////////////////////////
|
|
|
|
// $Id$
|
|
|
|
/////////////////////////////////////////////////////////////////////////
|
|
|
|
//
|
2014-01-10 23:40:38 +04:00
|
|
|
// Copyright (c) 2013-2014 Stanislav Shwartsman
|
2013-11-30 00:22:31 +04:00
|
|
|
// Written by Stanislav Shwartsman [sshwarts at sourceforge net]
|
|
|
|
//
|
|
|
|
// This library is free software; you can redistribute it and/or
|
|
|
|
// modify it under the terms of the GNU Lesser General Public
|
|
|
|
// License as published by the Free Software Foundation; either
|
|
|
|
// version 2 of the License, or (at your option) any later version.
|
|
|
|
//
|
|
|
|
// This library is distributed in the hope that it will be useful,
|
|
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
// Lesser General Public License for more details.
|
|
|
|
//
|
|
|
|
// You should have received a copy of the GNU Lesser General Public
|
|
|
|
// License along with this library; if not, write to the Free Software
|
|
|
|
// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA B 02110-1301 USA
|
|
|
|
//
|
|
|
|
/////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
|
|
#define NEED_CPU_REG_SHORTCUTS 1
|
|
|
|
#include "bochs.h"
|
|
|
|
#include "cpu.h"
|
|
|
|
#define LOG_THIS BX_CPU_THIS_PTR
|
|
|
|
|
|
|
|
#if BX_SUPPORT_AVX
|
2014-01-10 23:40:38 +04:00
|
|
|
void BX_CPU_C::avx_masked_load32(bxInstruction_c *i, bx_address eaddr, BxPackedAvxRegister *op, Bit32u mask)
|
2013-11-30 00:22:31 +04:00
|
|
|
{
|
|
|
|
unsigned len = i->getVL();
|
|
|
|
|
|
|
|
if (i->as64L()) {
|
2014-01-10 23:40:38 +04:00
|
|
|
for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
|
2013-11-30 00:22:31 +04:00
|
|
|
if (mask & (1<<n)) {
|
|
|
|
if (! IsCanonical(get_laddr64(i->seg(), eaddr + 4*n)))
|
|
|
|
exception(int_number(i->seg()), 0);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#if BX_SUPPORT_ALIGNMENT_CHECK
|
|
|
|
unsigned save_alignment_check_mask = BX_CPU_THIS_PTR alignment_check_mask;
|
|
|
|
BX_CPU_THIS_PTR alignment_check_mask = 0;
|
|
|
|
#endif
|
|
|
|
|
2014-01-10 23:40:38 +04:00
|
|
|
for (int n=DWORD_ELEMENTS(len)-1; n >= 0; n--) {
|
2013-11-30 00:22:31 +04:00
|
|
|
if (mask & (1<<n))
|
|
|
|
op->vmm32u(n) = read_virtual_dword(i->seg(), eaddr + 4*n);
|
|
|
|
else
|
|
|
|
op->vmm32u(n) = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
#if BX_SUPPORT_ALIGNMENT_CHECK
|
|
|
|
BX_CPU_THIS_PTR alignment_check_mask = save_alignment_check_mask;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2014-01-10 23:40:38 +04:00
|
|
|
void BX_CPU_C::avx_masked_load64(bxInstruction_c *i, bx_address eaddr, BxPackedAvxRegister *op, Bit32u mask)
|
2013-11-30 00:22:31 +04:00
|
|
|
{
|
|
|
|
unsigned len = i->getVL();
|
|
|
|
|
|
|
|
if (i->as64L()) {
|
2014-01-10 23:40:38 +04:00
|
|
|
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
|
2013-11-30 00:22:31 +04:00
|
|
|
if (mask & (1<<n)) {
|
|
|
|
if (! IsCanonical(get_laddr64(i->seg(), eaddr + 8*n)))
|
|
|
|
exception(int_number(i->seg()), 0);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#if BX_SUPPORT_ALIGNMENT_CHECK
|
|
|
|
unsigned save_alignment_check_mask = BX_CPU_THIS_PTR alignment_check_mask;
|
|
|
|
BX_CPU_THIS_PTR alignment_check_mask = 0;
|
|
|
|
#endif
|
|
|
|
|
2014-01-10 23:40:38 +04:00
|
|
|
for (int n=QWORD_ELEMENTS(len)-1; n >= 0; n--) {
|
2013-11-30 00:22:31 +04:00
|
|
|
if (mask & (1<<n))
|
|
|
|
op->vmm64u(n) = read_virtual_qword(i->seg(), eaddr + 8*n);
|
|
|
|
else
|
|
|
|
op->vmm64u(n) = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
#if BX_SUPPORT_ALIGNMENT_CHECK
|
|
|
|
BX_CPU_THIS_PTR alignment_check_mask = save_alignment_check_mask;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2014-01-10 23:40:38 +04:00
|
|
|
void BX_CPU_C::avx_masked_store8(bxInstruction_c *i, bx_address eaddr, const BxPackedAvxRegister *op, Bit64u mask)
|
2013-11-30 00:22:31 +04:00
|
|
|
{
|
|
|
|
unsigned len = i->getVL();
|
|
|
|
|
|
|
|
#if BX_SUPPORT_X86_64
|
|
|
|
if (i->as64L()) {
|
2014-01-10 23:40:38 +04:00
|
|
|
for (unsigned n=0; n < BYTE_ELEMENTS(len); n++) {
|
|
|
|
if (mask & (BX_CONST64(1)<<n)) {
|
|
|
|
if (! IsCanonical(get_laddr64(i->seg(), eaddr + n)))
|
|
|
|
exception(int_number(i->seg()), 0);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#if BX_SUPPORT_ALIGNMENT_CHECK
|
|
|
|
unsigned save_alignment_check_mask = BX_CPU_THIS_PTR alignment_check_mask;
|
|
|
|
BX_CPU_THIS_PTR alignment_check_mask = 0;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
// see if you can successfully write all the elements first
|
|
|
|
for (int n=BYTE_ELEMENTS(len)-1; n >= 0; n--) {
|
|
|
|
if (mask & (BX_CONST64(1)<<n))
|
|
|
|
read_RMW_virtual_byte(i->seg(), eaddr + n);
|
|
|
|
}
|
|
|
|
|
|
|
|
for (unsigned n=0; n < BYTE_ELEMENTS(len); n++) {
|
|
|
|
if (mask & (BX_CONST64(1)<<n))
|
|
|
|
write_virtual_byte(i->seg(), eaddr + n, op->vmmubyte(n));
|
|
|
|
}
|
|
|
|
|
|
|
|
#if BX_SUPPORT_ALIGNMENT_CHECK
|
|
|
|
BX_CPU_THIS_PTR alignment_check_mask = save_alignment_check_mask;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
void BX_CPU_C::avx_masked_store16(bxInstruction_c *i, bx_address eaddr, const BxPackedAvxRegister *op, Bit32u mask)
|
|
|
|
{
|
|
|
|
unsigned len = i->getVL();
|
|
|
|
|
|
|
|
#if BX_SUPPORT_X86_64
|
|
|
|
if (i->as64L()) {
|
|
|
|
for (unsigned n=0; n < WORD_ELEMENTS(len); n++) {
|
|
|
|
if (mask & (1<<n)) {
|
|
|
|
if (! IsCanonical(get_laddr64(i->seg(), eaddr + 2*n)))
|
|
|
|
exception(int_number(i->seg()), 0);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#if BX_SUPPORT_ALIGNMENT_CHECK
|
|
|
|
unsigned save_alignment_check_mask = BX_CPU_THIS_PTR alignment_check_mask;
|
|
|
|
BX_CPU_THIS_PTR alignment_check_mask = 0;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
// see if you can successfully write all the elements first
|
|
|
|
for (int n=WORD_ELEMENTS(len)-1; n >= 0; n--) {
|
|
|
|
if (mask & (1<<n))
|
|
|
|
read_RMW_virtual_word(i->seg(), eaddr + 2*n);
|
|
|
|
}
|
|
|
|
|
|
|
|
for (unsigned n=0; n < WORD_ELEMENTS(len); n++) {
|
|
|
|
if (mask & (1<<n))
|
|
|
|
write_virtual_word(i->seg(), eaddr + 2*n, op->vmm16u(n));
|
|
|
|
}
|
|
|
|
|
|
|
|
#if BX_SUPPORT_ALIGNMENT_CHECK
|
|
|
|
BX_CPU_THIS_PTR alignment_check_mask = save_alignment_check_mask;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
void BX_CPU_C::avx_masked_store32(bxInstruction_c *i, bx_address eaddr, const BxPackedAvxRegister *op, Bit32u mask)
|
|
|
|
{
|
|
|
|
unsigned len = i->getVL();
|
|
|
|
|
|
|
|
#if BX_SUPPORT_X86_64
|
|
|
|
if (i->as64L()) {
|
|
|
|
for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
|
2013-11-30 00:22:31 +04:00
|
|
|
if (mask & (1<<n)) {
|
|
|
|
if (! IsCanonical(get_laddr64(i->seg(), eaddr + 4*n)))
|
|
|
|
exception(int_number(i->seg()), 0);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#if BX_SUPPORT_ALIGNMENT_CHECK
|
|
|
|
unsigned save_alignment_check_mask = BX_CPU_THIS_PTR alignment_check_mask;
|
|
|
|
BX_CPU_THIS_PTR alignment_check_mask = 0;
|
|
|
|
#endif
|
|
|
|
|
2014-01-10 23:40:38 +04:00
|
|
|
// see if you can successfully write all the elements first
|
|
|
|
for (int n=DWORD_ELEMENTS(len)-1; n >= 0; n--) {
|
2013-11-30 00:22:31 +04:00
|
|
|
if (mask & (1<<n))
|
|
|
|
read_RMW_virtual_dword(i->seg(), eaddr + 4*n);
|
|
|
|
}
|
|
|
|
|
2014-01-10 23:40:38 +04:00
|
|
|
for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
|
2013-11-30 00:22:31 +04:00
|
|
|
if (mask & (1<<n))
|
|
|
|
write_virtual_dword(i->seg(), eaddr + 4*n, op->vmm32u(n));
|
|
|
|
}
|
|
|
|
|
|
|
|
#if BX_SUPPORT_ALIGNMENT_CHECK
|
|
|
|
BX_CPU_THIS_PTR alignment_check_mask = save_alignment_check_mask;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2014-01-10 23:40:38 +04:00
|
|
|
void BX_CPU_C::avx_masked_store64(bxInstruction_c *i, bx_address eaddr, const BxPackedAvxRegister *op, Bit32u mask)
|
2013-11-30 00:22:31 +04:00
|
|
|
{
|
|
|
|
unsigned len = i->getVL();
|
|
|
|
|
|
|
|
#if BX_SUPPORT_X86_64
|
|
|
|
if (i->as64L()) {
|
2014-01-10 23:40:38 +04:00
|
|
|
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
|
2013-11-30 00:22:31 +04:00
|
|
|
if (mask & (1<<n)) {
|
|
|
|
if (! IsCanonical(get_laddr64(i->seg(), eaddr + 8*n)))
|
|
|
|
exception(int_number(i->seg()), 0);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#if BX_SUPPORT_ALIGNMENT_CHECK
|
|
|
|
unsigned save_alignment_check_mask = BX_CPU_THIS_PTR alignment_check_mask;
|
|
|
|
BX_CPU_THIS_PTR alignment_check_mask = 0;
|
|
|
|
#endif
|
|
|
|
|
2014-01-10 23:40:38 +04:00
|
|
|
// see if you can successfully write all the elements first
|
|
|
|
for (int n=QWORD_ELEMENTS(len)-1; n >= 0; n--) {
|
2013-11-30 00:22:31 +04:00
|
|
|
if (mask & (1<<n))
|
|
|
|
read_RMW_virtual_qword(i->seg(), eaddr + 8*n);
|
|
|
|
}
|
|
|
|
|
2014-01-10 23:40:38 +04:00
|
|
|
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
|
2013-11-30 00:22:31 +04:00
|
|
|
if (mask & (1<<n))
|
|
|
|
write_virtual_qword(i->seg(), eaddr + 8*n, op->vmm64u(n));
|
|
|
|
}
|
|
|
|
|
|
|
|
#if BX_SUPPORT_ALIGNMENT_CHECK
|
|
|
|
BX_CPU_THIS_PTR alignment_check_mask = save_alignment_check_mask;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
#endif // BX_SUPPORT_AVX
|
|
|
|
|
|
|
|
#if BX_SUPPORT_EVEX
|
|
|
|
|
|
|
|
#include "simd_int.h"
|
|
|
|
|
2013-12-11 01:09:46 +04:00
|
|
|
void BX_CPU_C::avx512_write_regd_masked(bxInstruction_c *i, const BxPackedAvxRegister *op, unsigned len, Bit32u opmask)
|
2013-11-30 00:22:31 +04:00
|
|
|
{
|
2013-12-11 01:09:46 +04:00
|
|
|
if (i->isZeroMasking()) {
|
|
|
|
for (unsigned n=0; n < len; n++, opmask >>= 4)
|
|
|
|
xmm_zero_blendps(&BX_READ_AVX_REG_LANE(i->dst(), n), &op->vmm128(n), opmask);
|
2013-11-30 00:22:31 +04:00
|
|
|
}
|
|
|
|
else {
|
2013-12-11 01:09:46 +04:00
|
|
|
for (unsigned n=0; n < len; n++, opmask >>= 4)
|
|
|
|
xmm_blendps(&BX_READ_AVX_REG_LANE(i->dst(), n), &op->vmm128(n), opmask);
|
2013-12-09 23:09:37 +04:00
|
|
|
}
|
2013-12-11 01:09:46 +04:00
|
|
|
|
|
|
|
BX_CLEAR_AVX_REGZ(i->dst(), len);
|
2013-11-30 00:22:31 +04:00
|
|
|
}
|
|
|
|
|
2013-12-11 01:09:46 +04:00
|
|
|
void BX_CPU_C::avx512_write_regq_masked(bxInstruction_c *i, const BxPackedAvxRegister *op, unsigned len, Bit32u opmask)
|
2013-11-30 00:22:31 +04:00
|
|
|
{
|
2013-12-11 01:09:46 +04:00
|
|
|
if (i->isZeroMasking()) {
|
|
|
|
for (unsigned n=0; n < len; n++, opmask >>= 2)
|
|
|
|
xmm_zero_blendpd(&BX_READ_AVX_REG_LANE(i->dst(), n), &op->vmm128(n), opmask);
|
2013-11-30 00:22:31 +04:00
|
|
|
}
|
|
|
|
else {
|
2013-12-11 01:09:46 +04:00
|
|
|
for (unsigned n=0; n < len; n++, opmask >>= 2)
|
|
|
|
xmm_blendpd(&BX_READ_AVX_REG_LANE(i->dst(), n), &op->vmm128(n), opmask);
|
2013-12-09 23:09:37 +04:00
|
|
|
}
|
2013-12-11 01:09:46 +04:00
|
|
|
|
|
|
|
BX_CLEAR_AVX_REGZ(i->dst(), len);
|
2013-11-30 00:22:31 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
//////////////////////////
|
|
|
|
// masked register move //
|
|
|
|
//////////////////////////
|
|
|
|
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVAPS_MASK_VpsWpsR(bxInstruction_c *i)
|
|
|
|
{
|
|
|
|
BxPackedAvxRegister op = BX_READ_AVX_REG(i->src());
|
2013-12-11 01:09:46 +04:00
|
|
|
avx512_write_regd_masked(i, &op, i->getVL(), BX_READ_16BIT_OPMASK(i->opmask()));
|
2013-11-30 00:22:31 +04:00
|
|
|
BX_NEXT_INSTR(i);
|
|
|
|
}
|
|
|
|
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVAPD_MASK_VpdWpdR(bxInstruction_c *i)
|
|
|
|
{
|
|
|
|
BxPackedAvxRegister op = BX_READ_AVX_REG(i->src());
|
2013-12-11 01:09:46 +04:00
|
|
|
avx512_write_regq_masked(i, &op, i->getVL(), BX_READ_8BIT_OPMASK(i->opmask()));
|
2013-11-30 00:22:31 +04:00
|
|
|
BX_NEXT_INSTR(i);
|
|
|
|
}
|
|
|
|
|
|
|
|
/////////////////////////////////
|
|
|
|
// masked load/store - aligned //
|
|
|
|
/////////////////////////////////
|
|
|
|
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVAPS_MASK_VpsWpsM(bxInstruction_c *i)
|
|
|
|
{
|
|
|
|
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
|
|
|
|
bx_address laddr = get_laddr(i->seg(), eaddr);
|
|
|
|
|
2013-11-30 22:37:25 +04:00
|
|
|
unsigned len = i->getVL(), len_in_bytes = 16 * len;
|
2013-11-30 00:22:31 +04:00
|
|
|
if (laddr & (len_in_bytes-1)) {
|
|
|
|
BX_ERROR(("AVX masked read len=%d: #GP misaligned access", len_in_bytes));
|
|
|
|
exception(BX_GP_EXCEPTION, 0);
|
|
|
|
}
|
|
|
|
|
2013-11-30 22:37:25 +04:00
|
|
|
BxPackedAvxRegister reg;
|
|
|
|
unsigned mask = BX_READ_16BIT_OPMASK(i->opmask());
|
|
|
|
avx_masked_load32(i, eaddr, ®, mask);
|
|
|
|
|
|
|
|
if (i->isZeroMasking()) {
|
|
|
|
BX_WRITE_AVX_REGZ(i->dst(), reg, len);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
for (unsigned n=0; n < len; n++, mask >>= 4)
|
|
|
|
xmm_blendps(&BX_READ_AVX_REG_LANE(i->dst(), n), ®.vmm128(n), mask);
|
|
|
|
|
|
|
|
BX_CLEAR_AVX_REGZ(i->dst(), len);
|
|
|
|
}
|
|
|
|
|
2013-11-30 00:22:31 +04:00
|
|
|
BX_NEXT_INSTR(i);
|
|
|
|
}
|
|
|
|
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVAPD_MASK_VpdWpdM(bxInstruction_c *i)
|
|
|
|
{
|
|
|
|
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
|
|
|
|
bx_address laddr = get_laddr(i->seg(), eaddr);
|
|
|
|
|
2013-11-30 22:37:25 +04:00
|
|
|
unsigned len = i->getVL(), len_in_bytes = 16 * len;
|
2013-11-30 00:22:31 +04:00
|
|
|
if (laddr & (len_in_bytes-1)) {
|
|
|
|
BX_ERROR(("AVX masked read len=%d: #GP misaligned access", len_in_bytes));
|
|
|
|
exception(BX_GP_EXCEPTION, 0);
|
|
|
|
}
|
|
|
|
|
2013-11-30 22:37:25 +04:00
|
|
|
BxPackedAvxRegister reg;
|
|
|
|
unsigned mask = BX_READ_8BIT_OPMASK(i->opmask());
|
|
|
|
avx_masked_load64(i, eaddr, ®, mask);
|
|
|
|
|
|
|
|
if (i->isZeroMasking()) {
|
|
|
|
BX_WRITE_AVX_REGZ(i->dst(), reg, len);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
for (unsigned n=0; n < len; n++, mask >>= 2)
|
|
|
|
xmm_blendpd(&BX_READ_AVX_REG_LANE(i->dst(), n), ®.vmm128(n), mask);
|
|
|
|
|
|
|
|
BX_CLEAR_AVX_REGZ(i->dst(), len);
|
|
|
|
}
|
|
|
|
|
2013-11-30 00:22:31 +04:00
|
|
|
BX_NEXT_INSTR(i);
|
|
|
|
}
|
|
|
|
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVAPS_MASK_WpsVpsM(bxInstruction_c *i)
|
|
|
|
{
|
|
|
|
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
|
|
|
|
bx_address laddr = get_laddr(i->seg(), eaddr);
|
|
|
|
|
|
|
|
unsigned len_in_bytes = 16 * i->getVL();
|
|
|
|
if (laddr & (len_in_bytes-1)) {
|
|
|
|
BX_ERROR(("AVX masked write len=%d: #GP misaligned access", len_in_bytes));
|
|
|
|
exception(BX_GP_EXCEPTION, 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
avx_masked_store32(i, eaddr, &BX_READ_AVX_REG(i->src()), BX_READ_16BIT_OPMASK(i->opmask()));
|
|
|
|
|
|
|
|
BX_NEXT_INSTR(i);
|
|
|
|
}
|
|
|
|
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVAPD_MASK_WpdVpdM(bxInstruction_c *i)
|
|
|
|
{
|
|
|
|
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
|
|
|
|
bx_address laddr = get_laddr(i->seg(), eaddr);
|
|
|
|
|
|
|
|
unsigned len_in_bytes = 16 * i->getVL();
|
|
|
|
if (laddr & (len_in_bytes-1)) {
|
|
|
|
BX_ERROR(("AVX masked write len=%d: #GP misaligned access", len_in_bytes));
|
|
|
|
exception(BX_GP_EXCEPTION, 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
avx_masked_store64(i, eaddr, &BX_READ_AVX_REG(i->src()), BX_READ_8BIT_OPMASK(i->opmask()));
|
|
|
|
|
|
|
|
BX_NEXT_INSTR(i);
|
|
|
|
}
|
|
|
|
|
2013-12-01 23:39:18 +04:00
|
|
|
//////////////////////////////
|
|
|
|
// masked packed load/store //
|
|
|
|
//////////////////////////////
|
2013-11-30 00:22:31 +04:00
|
|
|
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVUPS_MASK_VpsWpsM(bxInstruction_c *i)
|
|
|
|
{
|
2013-11-30 22:37:25 +04:00
|
|
|
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
|
|
|
|
unsigned len = i->getVL();
|
|
|
|
|
|
|
|
BxPackedAvxRegister reg;
|
|
|
|
unsigned mask = BX_READ_16BIT_OPMASK(i->opmask());
|
|
|
|
avx_masked_load32(i, eaddr, ®, mask);
|
|
|
|
|
|
|
|
if (i->isZeroMasking()) {
|
|
|
|
BX_WRITE_AVX_REGZ(i->dst(), reg, len);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
for (unsigned n=0; n < len; n++, mask >>= 4)
|
|
|
|
xmm_blendps(&BX_READ_AVX_REG_LANE(i->dst(), n), ®.vmm128(n), mask);
|
|
|
|
|
|
|
|
BX_CLEAR_AVX_REGZ(i->dst(), len);
|
|
|
|
}
|
|
|
|
|
2013-11-30 00:22:31 +04:00
|
|
|
BX_NEXT_INSTR(i);
|
|
|
|
}
|
|
|
|
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVUPD_MASK_VpdWpdM(bxInstruction_c *i)
|
|
|
|
{
|
2013-11-30 22:37:25 +04:00
|
|
|
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
|
|
|
|
unsigned len = i->getVL();
|
|
|
|
|
|
|
|
BxPackedAvxRegister reg;
|
|
|
|
unsigned mask = BX_READ_8BIT_OPMASK(i->opmask());
|
|
|
|
avx_masked_load64(i, eaddr, ®, mask);
|
|
|
|
|
|
|
|
if (i->isZeroMasking()) {
|
|
|
|
BX_WRITE_AVX_REGZ(i->dst(), reg, len);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
for (unsigned n=0; n < len; n++, mask >>= 2)
|
|
|
|
xmm_blendpd(&BX_READ_AVX_REG_LANE(i->dst(), n), ®.vmm128(n), mask);
|
|
|
|
|
|
|
|
BX_CLEAR_AVX_REGZ(i->dst(), len);
|
|
|
|
}
|
|
|
|
|
2013-11-30 00:22:31 +04:00
|
|
|
BX_NEXT_INSTR(i);
|
|
|
|
}
|
|
|
|
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVUPS_MASK_WpsVpsM(bxInstruction_c *i)
|
|
|
|
{
|
|
|
|
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
|
|
|
|
avx_masked_store32(i, eaddr, &BX_READ_AVX_REG(i->src()), BX_READ_16BIT_OPMASK(i->opmask()));
|
|
|
|
BX_NEXT_INSTR(i);
|
|
|
|
}
|
|
|
|
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVUPD_MASK_WpdVpdM(bxInstruction_c *i)
|
|
|
|
{
|
|
|
|
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
|
|
|
|
avx_masked_store64(i, eaddr, &BX_READ_AVX_REG(i->src()), BX_READ_8BIT_OPMASK(i->opmask()));
|
|
|
|
BX_NEXT_INSTR(i);
|
|
|
|
}
|
|
|
|
|
2013-12-01 23:39:18 +04:00
|
|
|
//////////////////////////////
|
|
|
|
// masked scalar load/store //
|
|
|
|
//////////////////////////////
|
|
|
|
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVSD_MASK_VsdWsdM(bxInstruction_c *i)
|
|
|
|
{
|
|
|
|
BxPackedXmmRegister op;
|
|
|
|
|
|
|
|
op.xmm64u(1) = 0;
|
|
|
|
|
|
|
|
if (BX_SCALAR_ELEMENT_MASK(i->opmask())) {
|
|
|
|
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
|
|
|
|
op.xmm64u(0) = read_virtual_qword(i->seg(), eaddr);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
if (! i->isZeroMasking()) {
|
|
|
|
op.xmm64u(0) = BX_READ_XMM_REG_LO_QWORD(i->dst());
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
op.xmm64u(0) = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
BX_WRITE_XMM_REGZ(i->dst(), op, i->getVL());
|
|
|
|
BX_NEXT_INSTR(i);
|
|
|
|
}
|
|
|
|
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVSS_MASK_VssWssM(bxInstruction_c *i)
|
|
|
|
{
|
|
|
|
BxPackedXmmRegister op;
|
|
|
|
|
|
|
|
op.xmm64u(1) = 0;
|
|
|
|
|
|
|
|
if (BX_SCALAR_ELEMENT_MASK(i->opmask())) {
|
|
|
|
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
|
|
|
|
op.xmm64u(0) = (Bit64u) read_virtual_dword(i->seg(), eaddr);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
if (! i->isZeroMasking()) {
|
|
|
|
op.xmm64u(0) = (Bit64u) BX_READ_XMM_REG_LO_DWORD(i->dst());
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
op.xmm64u(0) = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
BX_WRITE_XMM_REGZ(i->dst(), op, i->getVL());
|
|
|
|
BX_NEXT_INSTR(i);
|
|
|
|
}
|
|
|
|
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVSD_MASK_WsdVsdM(bxInstruction_c *i)
|
|
|
|
{
|
|
|
|
if (BX_SCALAR_ELEMENT_MASK(i->opmask())) {
|
|
|
|
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
|
|
|
|
write_virtual_qword(i->seg(), eaddr, BX_READ_XMM_REG_LO_QWORD(i->src()));
|
|
|
|
}
|
|
|
|
|
|
|
|
BX_NEXT_INSTR(i);
|
|
|
|
}
|
|
|
|
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVSS_MASK_WssVssM(bxInstruction_c *i)
|
|
|
|
{
|
|
|
|
if (BX_SCALAR_ELEMENT_MASK(i->opmask())) {
|
|
|
|
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
|
|
|
|
write_virtual_dword(i->seg(), eaddr, BX_READ_XMM_REG_LO_DWORD(i->src()));
|
|
|
|
}
|
|
|
|
|
|
|
|
BX_NEXT_INSTR(i);
|
|
|
|
}
|
|
|
|
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVSD_MASK_VsdHpdWsdR(bxInstruction_c *i)
|
|
|
|
{
|
2013-12-05 23:17:16 +04:00
|
|
|
BxPackedXmmRegister op;
|
|
|
|
|
|
|
|
if (BX_SCALAR_ELEMENT_MASK(i->opmask())) {
|
|
|
|
op.xmm64u(0) = BX_READ_XMM_REG_LO_QWORD(i->src2());
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
if (! i->isZeroMasking()) {
|
|
|
|
op.xmm64u(0) = BX_READ_XMM_REG_LO_QWORD(i->dst());
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
op.xmm64u(0) = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
op.xmm64u(1) = BX_READ_XMM_REG_HI_QWORD(i->src1());
|
|
|
|
|
|
|
|
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op);
|
|
|
|
|
|
|
|
BX_NEXT_INSTR(i);
|
2013-12-01 23:39:18 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVSS_MASK_VssHpsWssR(bxInstruction_c *i)
|
|
|
|
{
|
2013-12-05 23:17:16 +04:00
|
|
|
BxPackedXmmRegister op = BX_READ_XMM_REG(i->src1());
|
|
|
|
|
|
|
|
if (BX_SCALAR_ELEMENT_MASK(i->opmask())) {
|
|
|
|
op.xmm32u(0) = BX_READ_XMM_REG_LO_DWORD(i->src2());
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
if (! i->isZeroMasking()) {
|
|
|
|
op.xmm32u(0) = BX_READ_XMM_REG_LO_DWORD(i->dst());
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
op.xmm32u(0) = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op);
|
|
|
|
|
|
|
|
BX_NEXT_INSTR(i);
|
2013-12-01 23:39:18 +04:00
|
|
|
}
|
|
|
|
|
2014-01-12 13:31:22 +04:00
|
|
|
///////////////////////////////////////
|
|
|
|
// masked store with down conversion //
|
|
|
|
///////////////////////////////////////
|
|
|
|
|
2014-01-12 17:08:16 +04:00
|
|
|
// quad-word to byte
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVQB_MASK_WdqVdqM(bxInstruction_c *i)
|
|
|
|
{
|
|
|
|
BxPackedAvxRegister src = BX_READ_AVX_REG(i->src()), dst;
|
|
|
|
unsigned len = i->getVL();
|
|
|
|
|
|
|
|
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
|
|
|
|
dst.vmmubyte(n) = (Bit8u) src.vmm64u(n);
|
|
|
|
}
|
|
|
|
|
|
|
|
Bit32u opmask = i->opmask() ? BX_READ_8BIT_OPMASK(i->opmask()) : (Bit32u) -1;
|
|
|
|
opmask &= (1 << QWORD_ELEMENTS(len)) - 1;
|
|
|
|
|
|
|
|
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
|
|
|
|
avx_masked_store8(i, eaddr, &dst, opmask);
|
2014-01-28 23:36:46 +04:00
|
|
|
|
|
|
|
BX_NEXT_INSTR(i);
|
2014-01-12 17:08:16 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVQB_WdqVdqR(bxInstruction_c *i)
|
|
|
|
{
|
|
|
|
BxPackedXmmRegister dst = BX_READ_XMM_REG(i->dst());
|
|
|
|
BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
|
|
|
|
unsigned len = i->getVL();
|
|
|
|
|
|
|
|
dst.xmm64u(1) = 0;
|
|
|
|
|
|
|
|
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
|
|
|
|
dst.xmmubyte(n) = (Bit8u) src.vmm64u(n);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (len == BX_VL128) dst.xmm16u(1) = 0;
|
|
|
|
if (len != BX_VL512) dst.xmm32u(1) = 0;
|
|
|
|
|
|
|
|
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), dst);
|
2014-01-28 23:36:46 +04:00
|
|
|
BX_NEXT_INSTR(i);
|
2014-01-12 17:08:16 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVQB_MASK_WdqVdqR(bxInstruction_c *i)
|
|
|
|
{
|
|
|
|
BxPackedXmmRegister dst = BX_READ_XMM_REG(i->dst());
|
|
|
|
BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
|
|
|
|
unsigned len = i->getVL();
|
|
|
|
|
|
|
|
unsigned mask = BX_READ_8BIT_OPMASK(i->opmask());
|
|
|
|
|
|
|
|
dst.xmm64u(1) = 0;
|
|
|
|
|
|
|
|
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++, mask >>= 1) {
|
|
|
|
if (mask & 0x1)
|
|
|
|
dst.xmmubyte(n) = (Bit8u) src.vmm64u(n);
|
|
|
|
else
|
|
|
|
if (i->isZeroMasking()) dst.xmmubyte(n) = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (len == BX_VL128) dst.xmm16u(1) = 0;
|
|
|
|
if (len != BX_VL512) dst.xmm32u(1) = 0;
|
|
|
|
|
|
|
|
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), dst);
|
2014-01-28 23:36:46 +04:00
|
|
|
BX_NEXT_INSTR(i);
|
2014-01-12 17:08:16 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVSQB_MASK_WdqVdqM(bxInstruction_c *i)
|
|
|
|
{
|
|
|
|
BxPackedAvxRegister src = BX_READ_AVX_REG(i->src()), dst;
|
|
|
|
unsigned len = i->getVL();
|
|
|
|
|
|
|
|
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
|
|
|
|
dst.vmmsbyte(n) = SaturateQwordSToByteS(src.vmm64s(n));
|
|
|
|
}
|
|
|
|
|
|
|
|
Bit32u opmask = i->opmask() ? BX_READ_8BIT_OPMASK(i->opmask()) : (Bit32u) -1;
|
|
|
|
opmask &= (1 << QWORD_ELEMENTS(len)) - 1;
|
|
|
|
|
|
|
|
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
|
|
|
|
avx_masked_store8(i, eaddr, &dst, opmask);
|
2014-01-28 23:36:46 +04:00
|
|
|
|
|
|
|
BX_NEXT_INSTR(i);
|
2014-01-12 17:08:16 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVSQB_WdqVdqR(bxInstruction_c *i)
|
|
|
|
{
|
|
|
|
BxPackedXmmRegister dst = BX_READ_XMM_REG(i->dst());
|
|
|
|
BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
|
|
|
|
unsigned len = i->getVL();
|
|
|
|
|
|
|
|
dst.xmm64u(1) = 0;
|
|
|
|
|
|
|
|
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
|
|
|
|
dst.xmmsbyte(n) = SaturateQwordSToByteS(src.vmm64s(n));
|
|
|
|
}
|
|
|
|
|
|
|
|
if (len == BX_VL128) dst.xmm16u(1) = 0;
|
|
|
|
if (len != BX_VL512) dst.xmm32u(1) = 0;
|
|
|
|
|
|
|
|
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), dst);
|
2014-01-28 23:36:46 +04:00
|
|
|
BX_NEXT_INSTR(i);
|
2014-01-12 17:08:16 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVSQB_MASK_WdqVdqR(bxInstruction_c *i)
|
|
|
|
{
|
|
|
|
BxPackedXmmRegister dst = BX_READ_XMM_REG(i->dst());
|
|
|
|
BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
|
|
|
|
unsigned len = i->getVL();
|
|
|
|
|
|
|
|
unsigned mask = BX_READ_8BIT_OPMASK(i->opmask());
|
|
|
|
|
|
|
|
dst.xmm64u(1) = 0;
|
|
|
|
|
|
|
|
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++, mask >>= 1) {
|
|
|
|
if (mask & 0x1)
|
|
|
|
dst.xmmsbyte(n) = SaturateQwordSToByteS(src.vmm64s(n));
|
|
|
|
else
|
|
|
|
if (i->isZeroMasking()) dst.xmmubyte(n) = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (len == BX_VL128) dst.xmm16u(1) = 0;
|
|
|
|
if (len != BX_VL512) dst.xmm32u(1) = 0;
|
|
|
|
|
|
|
|
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), dst);
|
2014-01-28 23:36:46 +04:00
|
|
|
BX_NEXT_INSTR(i);
|
2014-01-12 17:08:16 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVUSQB_MASK_WdqVdqM(bxInstruction_c *i)
|
|
|
|
{
|
|
|
|
BxPackedAvxRegister src = BX_READ_AVX_REG(i->src()), dst;
|
|
|
|
unsigned len = i->getVL();
|
|
|
|
|
|
|
|
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
|
|
|
|
dst.vmmubyte(n) = SaturateQwordUToByteU(src.vmm64u(n));
|
|
|
|
}
|
|
|
|
|
|
|
|
Bit32u opmask = i->opmask() ? BX_READ_8BIT_OPMASK(i->opmask()) : (Bit32u) -1;
|
|
|
|
opmask &= (1 << QWORD_ELEMENTS(len)) - 1;
|
|
|
|
|
|
|
|
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
|
|
|
|
avx_masked_store8(i, eaddr, &dst, opmask);
|
2014-01-28 23:36:46 +04:00
|
|
|
BX_NEXT_INSTR(i);
|
2014-01-12 17:08:16 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVUSQB_WdqVdqR(bxInstruction_c *i)
|
|
|
|
{
|
|
|
|
BxPackedXmmRegister dst = BX_READ_XMM_REG(i->dst());
|
|
|
|
BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
|
|
|
|
unsigned len = i->getVL();
|
|
|
|
|
|
|
|
dst.xmm64u(1) = 0;
|
|
|
|
|
|
|
|
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
|
|
|
|
dst.xmmubyte(n) = SaturateQwordUToByteU(src.vmm64u(n));
|
|
|
|
}
|
|
|
|
|
|
|
|
if (len == BX_VL128) dst.xmm16u(1) = 0;
|
|
|
|
if (len != BX_VL512) dst.xmm32u(1) = 0;
|
|
|
|
|
|
|
|
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), dst);
|
2014-01-28 23:36:46 +04:00
|
|
|
BX_NEXT_INSTR(i);
|
2014-01-12 17:08:16 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVUSQB_MASK_WdqVdqR(bxInstruction_c *i)
|
|
|
|
{
|
|
|
|
BxPackedXmmRegister dst = BX_READ_XMM_REG(i->dst());
|
|
|
|
BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
|
|
|
|
unsigned len = i->getVL();
|
|
|
|
|
|
|
|
unsigned mask = BX_READ_8BIT_OPMASK(i->opmask());
|
|
|
|
|
|
|
|
dst.xmm64u(1) = 0;
|
|
|
|
|
|
|
|
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++, mask >>= 1) {
|
|
|
|
if (mask & 0x1)
|
|
|
|
dst.xmmubyte(n) = SaturateQwordUToByteU(src.vmm64u(n));
|
|
|
|
else
|
|
|
|
if (i->isZeroMasking()) dst.xmmubyte(n) = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (len == BX_VL128) dst.xmm16u(1) = 0;
|
|
|
|
if (len != BX_VL512) dst.xmm32u(1) = 0;
|
|
|
|
|
|
|
|
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), dst);
|
2014-01-28 23:36:46 +04:00
|
|
|
BX_NEXT_INSTR(i);
|
2014-01-12 17:08:16 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
// double-word to byte
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVDB_MASK_WdqVdqM(bxInstruction_c *i)
|
|
|
|
{
|
|
|
|
BxPackedAvxRegister src = BX_READ_AVX_REG(i->src()), dst;
|
|
|
|
unsigned len = i->getVL();
|
|
|
|
|
|
|
|
for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
|
|
|
|
dst.vmmubyte(n) = (Bit8u) src.vmm32u(n);
|
|
|
|
}
|
|
|
|
|
|
|
|
Bit32u opmask = i->opmask() ? BX_READ_16BIT_OPMASK(i->opmask()) : (Bit32u) -1;
|
|
|
|
opmask &= (1 << DWORD_ELEMENTS(len)) - 1;
|
|
|
|
|
|
|
|
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
|
|
|
|
avx_masked_store8(i, eaddr, &dst, opmask);
|
2014-01-28 23:36:46 +04:00
|
|
|
|
|
|
|
BX_NEXT_INSTR(i);
|
2014-01-12 17:08:16 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVDB_WdqVdqR(bxInstruction_c *i)
|
|
|
|
{
|
|
|
|
BxPackedXmmRegister dst = BX_READ_XMM_REG(i->dst());
|
|
|
|
BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
|
|
|
|
unsigned len = i->getVL();
|
|
|
|
|
|
|
|
for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
|
|
|
|
dst.xmmubyte(n) = (Bit8u) src.vmm32u(n);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (len == BX_VL128) dst.xmm32u(1) = 0;
|
|
|
|
if (len != BX_VL512) dst.xmm64u(1) = 0;
|
|
|
|
|
|
|
|
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), dst);
|
2014-01-28 23:36:46 +04:00
|
|
|
BX_NEXT_INSTR(i);
|
2014-01-12 17:08:16 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVDB_MASK_WdqVdqR(bxInstruction_c *i)
|
|
|
|
{
|
|
|
|
BxPackedXmmRegister dst = BX_READ_XMM_REG(i->dst());
|
|
|
|
BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
|
|
|
|
unsigned len = i->getVL();
|
|
|
|
|
|
|
|
unsigned mask = BX_READ_16BIT_OPMASK(i->opmask());
|
|
|
|
|
|
|
|
for (unsigned n=0; n < DWORD_ELEMENTS(len); n++, mask >>= 1) {
|
|
|
|
if (mask & 0x1)
|
|
|
|
dst.xmmubyte(n) = (Bit8u) src.vmm32u(n);
|
|
|
|
else
|
|
|
|
if (i->isZeroMasking()) dst.xmmubyte(n) = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (len == BX_VL128) dst.xmm32u(1) = 0;
|
|
|
|
if (len != BX_VL512) dst.xmm64u(1) = 0;
|
|
|
|
|
|
|
|
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), dst);
|
2014-01-28 23:36:46 +04:00
|
|
|
BX_NEXT_INSTR(i);
|
2014-01-12 17:08:16 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVSDB_MASK_WdqVdqM(bxInstruction_c *i)
|
|
|
|
{
|
|
|
|
BxPackedAvxRegister src = BX_READ_AVX_REG(i->src()), dst;
|
|
|
|
unsigned len = i->getVL();
|
|
|
|
|
|
|
|
for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
|
|
|
|
dst.vmmsbyte(n) = SaturateDwordSToByteS(src.vmm32s(n));
|
|
|
|
}
|
|
|
|
|
|
|
|
Bit32u opmask = i->opmask() ? BX_READ_16BIT_OPMASK(i->opmask()) : (Bit32u) -1;
|
|
|
|
opmask &= (1 << DWORD_ELEMENTS(len)) - 1;
|
|
|
|
|
|
|
|
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
|
|
|
|
avx_masked_store8(i, eaddr, &dst, opmask);
|
2014-01-28 23:36:46 +04:00
|
|
|
|
|
|
|
BX_NEXT_INSTR(i);
|
2014-01-12 17:08:16 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVSDB_WdqVdqR(bxInstruction_c *i)
|
|
|
|
{
|
|
|
|
BxPackedXmmRegister dst = BX_READ_XMM_REG(i->dst());
|
|
|
|
BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
|
|
|
|
unsigned len = i->getVL();
|
|
|
|
|
|
|
|
for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
|
|
|
|
dst.xmmsbyte(n) = SaturateDwordSToByteS(src.vmm32s(n));
|
|
|
|
}
|
|
|
|
|
|
|
|
if (len == BX_VL128) dst.xmm32u(1) = 0;
|
|
|
|
if (len != BX_VL512) dst.xmm64u(1) = 0;
|
|
|
|
|
|
|
|
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), dst);
|
2014-01-28 23:36:46 +04:00
|
|
|
BX_NEXT_INSTR(i);
|
2014-01-12 17:08:16 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVSDB_MASK_WdqVdqR(bxInstruction_c *i)
|
|
|
|
{
|
|
|
|
BxPackedXmmRegister dst = BX_READ_XMM_REG(i->dst());
|
|
|
|
BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
|
|
|
|
unsigned len = i->getVL();
|
|
|
|
|
|
|
|
unsigned mask = BX_READ_16BIT_OPMASK(i->opmask());
|
|
|
|
|
|
|
|
for (unsigned n=0; n < DWORD_ELEMENTS(len); n++, mask >>= 1) {
|
|
|
|
if (mask & 0x1)
|
|
|
|
dst.xmmsbyte(n) = SaturateDwordSToByteS(src.vmm32s(n));
|
|
|
|
else
|
|
|
|
if (i->isZeroMasking()) dst.xmmubyte(n) = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (len == BX_VL128) dst.xmm32u(1) = 0;
|
|
|
|
if (len != BX_VL512) dst.xmm64u(1) = 0;
|
|
|
|
|
|
|
|
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), dst);
|
2014-01-28 23:36:46 +04:00
|
|
|
BX_NEXT_INSTR(i);
|
2014-01-12 17:08:16 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVUSDB_MASK_WdqVdqM(bxInstruction_c *i)
|
|
|
|
{
|
|
|
|
BxPackedAvxRegister src = BX_READ_AVX_REG(i->src()), dst;
|
|
|
|
unsigned len = i->getVL();
|
|
|
|
|
|
|
|
for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
|
|
|
|
dst.vmmubyte(n) = SaturateDwordUToByteU(src.vmm32u(n));
|
|
|
|
}
|
|
|
|
|
|
|
|
Bit32u opmask = i->opmask() ? BX_READ_16BIT_OPMASK(i->opmask()) : (Bit32u) -1;
|
|
|
|
opmask &= (1 << DWORD_ELEMENTS(len)) - 1;
|
|
|
|
|
|
|
|
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
|
|
|
|
avx_masked_store8(i, eaddr, &dst, opmask);
|
2014-01-28 23:36:46 +04:00
|
|
|
|
|
|
|
BX_NEXT_INSTR(i);
|
2014-01-12 17:08:16 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVUSDB_WdqVdqR(bxInstruction_c *i)
|
|
|
|
{
|
|
|
|
BxPackedXmmRegister dst = BX_READ_XMM_REG(i->dst());
|
|
|
|
BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
|
|
|
|
unsigned len = i->getVL();
|
|
|
|
|
|
|
|
for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
|
|
|
|
dst.xmmubyte(n) = SaturateDwordUToByteU(src.vmm32u(n));
|
|
|
|
}
|
|
|
|
|
|
|
|
if (len == BX_VL128) dst.xmm32u(1) = 0;
|
|
|
|
if (len != BX_VL512) dst.xmm64u(1) = 0;
|
|
|
|
|
|
|
|
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), dst);
|
2014-01-28 23:36:46 +04:00
|
|
|
BX_NEXT_INSTR(i);
|
2014-01-12 17:08:16 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVUSDB_MASK_WdqVdqR(bxInstruction_c *i)
|
|
|
|
{
|
|
|
|
BxPackedXmmRegister dst = BX_READ_XMM_REG(i->dst());
|
|
|
|
BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
|
|
|
|
unsigned len = i->getVL();
|
|
|
|
|
|
|
|
unsigned mask = BX_READ_16BIT_OPMASK(i->opmask());
|
|
|
|
|
|
|
|
for (unsigned n=0; n < DWORD_ELEMENTS(len); n++, mask >>= 1) {
|
|
|
|
if (mask & 0x1)
|
|
|
|
dst.xmmubyte(n) = SaturateDwordUToByteU(src.vmm32u(n));
|
|
|
|
else
|
|
|
|
if (i->isZeroMasking()) dst.xmmubyte(n) = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (len == BX_VL128) dst.xmm32u(1) = 0;
|
|
|
|
if (len != BX_VL512) dst.xmm64u(1) = 0;
|
|
|
|
|
|
|
|
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), dst);
|
2014-01-28 23:36:46 +04:00
|
|
|
BX_NEXT_INSTR(i);
|
2014-01-12 17:08:16 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
// double-word to word
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVDW_MASK_WdqVdqM(bxInstruction_c *i)
|
|
|
|
{
|
|
|
|
BxPackedAvxRegister src = BX_READ_AVX_REG(i->src()), dst;
|
|
|
|
unsigned len = i->getVL();
|
|
|
|
|
|
|
|
for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
|
|
|
|
dst.vmm16u(n) = (Bit16u) src.vmm32u(n);
|
|
|
|
}
|
|
|
|
|
|
|
|
Bit32u opmask = i->opmask() ? BX_READ_16BIT_OPMASK(i->opmask()) : (Bit32u) -1;
|
|
|
|
opmask &= (1 << DWORD_ELEMENTS(len)) - 1;
|
|
|
|
|
|
|
|
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
|
|
|
|
avx_masked_store16(i, eaddr, &dst, opmask);
|
2014-01-28 23:36:46 +04:00
|
|
|
|
|
|
|
BX_NEXT_INSTR(i);
|
2014-01-12 17:08:16 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVDW_WdqVdqR(bxInstruction_c *i)
|
|
|
|
{
|
|
|
|
BxPackedYmmRegister dst = BX_READ_YMM_REG(i->dst());
|
|
|
|
BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
|
|
|
|
unsigned len = i->getVL();
|
|
|
|
|
|
|
|
for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
|
|
|
|
dst.ymm16u(n) = (Bit16u) src.vmm32u(n);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (len == BX_VL128) dst.ymm64u(1) = 0;
|
|
|
|
if (len != BX_VL512) dst.ymm128(1).clear();
|
|
|
|
|
|
|
|
BX_WRITE_YMM_REGZ(i->dst(), dst);
|
2014-01-28 23:36:46 +04:00
|
|
|
BX_NEXT_INSTR(i);
|
2014-01-12 17:08:16 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVDW_MASK_WdqVdqR(bxInstruction_c *i)
|
|
|
|
{
|
|
|
|
BxPackedYmmRegister dst = BX_READ_YMM_REG(i->dst());
|
|
|
|
BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
|
|
|
|
unsigned len = i->getVL();
|
|
|
|
|
|
|
|
unsigned mask = BX_READ_16BIT_OPMASK(i->opmask());
|
|
|
|
|
|
|
|
for (unsigned n=0; n < DWORD_ELEMENTS(len); n++, mask >>= 1) {
|
|
|
|
if (mask & 0x1)
|
|
|
|
dst.ymm16u(n) = (Bit16u) src.vmm32u(n);
|
|
|
|
else
|
|
|
|
if (i->isZeroMasking()) dst.ymm16u(n) = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (len == BX_VL128) dst.ymm64u(1) = 0;
|
|
|
|
if (len != BX_VL512) dst.ymm128(1).clear();
|
|
|
|
|
|
|
|
BX_WRITE_YMM_REGZ(i->dst(), dst);
|
2014-01-28 23:36:46 +04:00
|
|
|
BX_NEXT_INSTR(i);
|
2014-01-12 17:08:16 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVSDW_MASK_WdqVdqM(bxInstruction_c *i)
|
|
|
|
{
|
|
|
|
BxPackedAvxRegister src = BX_READ_AVX_REG(i->src()), dst;
|
|
|
|
unsigned len = i->getVL();
|
|
|
|
|
|
|
|
for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
|
|
|
|
dst.vmm16s(n) = SaturateDwordSToWordS(src.vmm32s(n));
|
|
|
|
}
|
|
|
|
|
|
|
|
Bit32u opmask = i->opmask() ? BX_READ_16BIT_OPMASK(i->opmask()) : (Bit32u) -1;
|
|
|
|
opmask &= (1 << DWORD_ELEMENTS(len)) - 1;
|
|
|
|
|
|
|
|
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
|
|
|
|
avx_masked_store16(i, eaddr, &dst, opmask);
|
2014-01-28 23:36:46 +04:00
|
|
|
|
|
|
|
BX_NEXT_INSTR(i);
|
2014-01-12 17:08:16 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVSDW_WdqVdqR(bxInstruction_c *i)
|
|
|
|
{
|
|
|
|
BxPackedYmmRegister dst = BX_READ_YMM_REG(i->dst());
|
|
|
|
BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
|
|
|
|
unsigned len = i->getVL();
|
|
|
|
|
|
|
|
for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
|
|
|
|
dst.ymm16s(n) = SaturateDwordSToWordS(src.vmm32s(n));
|
|
|
|
}
|
|
|
|
|
|
|
|
if (len == BX_VL128) dst.ymm64u(1) = 0;
|
|
|
|
if (len != BX_VL512) dst.ymm128(1).clear();
|
|
|
|
|
|
|
|
BX_WRITE_YMM_REGZ(i->dst(), dst);
|
2014-01-28 23:36:46 +04:00
|
|
|
BX_NEXT_INSTR(i);
|
2014-01-12 17:08:16 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVSDW_MASK_WdqVdqR(bxInstruction_c *i)
|
|
|
|
{
|
|
|
|
BxPackedYmmRegister dst = BX_READ_YMM_REG(i->dst());
|
|
|
|
BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
|
|
|
|
unsigned len = i->getVL();
|
|
|
|
|
|
|
|
unsigned mask = BX_READ_16BIT_OPMASK(i->opmask());
|
|
|
|
|
|
|
|
for (unsigned n=0; n < DWORD_ELEMENTS(len); n++, mask >>= 1) {
|
|
|
|
if (mask & 0x1)
|
|
|
|
dst.ymm16s(n) = SaturateDwordSToWordS(src.vmm32s(n));
|
|
|
|
else
|
|
|
|
if (i->isZeroMasking()) dst.ymm16u(n) = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (len == BX_VL128) dst.ymm64u(1) = 0;
|
|
|
|
if (len != BX_VL512) dst.ymm128(1).clear();
|
|
|
|
|
|
|
|
BX_WRITE_YMM_REGZ(i->dst(), dst);
|
2014-01-28 23:36:46 +04:00
|
|
|
BX_NEXT_INSTR(i);
|
2014-01-12 17:08:16 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVUSDW_MASK_WdqVdqM(bxInstruction_c *i)
|
|
|
|
{
|
|
|
|
BxPackedAvxRegister src = BX_READ_AVX_REG(i->src()), dst;
|
|
|
|
unsigned len = i->getVL();
|
|
|
|
|
|
|
|
for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
|
|
|
|
dst.vmm16u(n) = SaturateDwordUToWordU(src.vmm32u(n));
|
|
|
|
}
|
|
|
|
|
|
|
|
Bit32u opmask = i->opmask() ? BX_READ_16BIT_OPMASK(i->opmask()) : (Bit32u) -1;
|
|
|
|
opmask &= (1 << DWORD_ELEMENTS(len)) - 1;
|
|
|
|
|
|
|
|
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
|
|
|
|
avx_masked_store16(i, eaddr, &dst, opmask);
|
2014-01-28 23:36:46 +04:00
|
|
|
|
|
|
|
BX_NEXT_INSTR(i);
|
2014-01-12 17:08:16 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVUSDW_WdqVdqR(bxInstruction_c *i)
|
|
|
|
{
|
|
|
|
BxPackedYmmRegister dst = BX_READ_YMM_REG(i->dst());
|
|
|
|
BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
|
|
|
|
unsigned len = i->getVL();
|
|
|
|
|
|
|
|
for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
|
|
|
|
dst.ymm16u(n) = SaturateDwordUToWordU(src.vmm32u(n));
|
|
|
|
}
|
|
|
|
|
|
|
|
if (len == BX_VL128) dst.ymm64u(1) = 0;
|
|
|
|
if (len != BX_VL512) dst.ymm128(1).clear();
|
|
|
|
|
|
|
|
BX_WRITE_YMM_REGZ(i->dst(), dst);
|
2014-01-28 23:36:46 +04:00
|
|
|
BX_NEXT_INSTR(i);
|
2014-01-12 17:08:16 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVUSDW_MASK_WdqVdqR(bxInstruction_c *i)
|
|
|
|
{
|
|
|
|
BxPackedYmmRegister dst = BX_READ_YMM_REG(i->dst());
|
|
|
|
BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
|
|
|
|
unsigned len = i->getVL();
|
|
|
|
|
|
|
|
unsigned mask = BX_READ_16BIT_OPMASK(i->opmask());
|
|
|
|
|
|
|
|
for (unsigned n=0; n < DWORD_ELEMENTS(len); n++, mask >>= 1) {
|
|
|
|
if (mask & 0x1)
|
|
|
|
dst.ymm16u(n) = SaturateDwordUToWordU(src.vmm32u(n));
|
|
|
|
else
|
|
|
|
if (i->isZeroMasking()) dst.ymm16u(n) = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (len == BX_VL128) dst.ymm64u(1) = 0;
|
|
|
|
if (len != BX_VL512) dst.ymm128(1).clear();
|
|
|
|
|
|
|
|
BX_WRITE_YMM_REGZ(i->dst(), dst);
|
2014-01-28 23:36:46 +04:00
|
|
|
BX_NEXT_INSTR(i);
|
2014-01-12 17:08:16 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
// quad-word to word
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVQW_MASK_WdqVdqM(bxInstruction_c *i)
|
|
|
|
{
|
|
|
|
BxPackedAvxRegister src = BX_READ_AVX_REG(i->src()), dst;
|
|
|
|
unsigned len = i->getVL();
|
|
|
|
|
|
|
|
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
|
|
|
|
dst.vmm16u(n) = (Bit16u) src.vmm64u(n);
|
|
|
|
}
|
|
|
|
|
|
|
|
Bit32u opmask = i->opmask() ? BX_READ_8BIT_OPMASK(i->opmask()) : (Bit32u) -1;
|
|
|
|
opmask &= (1 << QWORD_ELEMENTS(len)) - 1;
|
|
|
|
|
|
|
|
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
|
|
|
|
avx_masked_store16(i, eaddr, &dst, opmask);
|
2014-01-28 23:36:46 +04:00
|
|
|
|
|
|
|
BX_NEXT_INSTR(i);
|
2014-01-12 17:08:16 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVQW_WdqVdqR(bxInstruction_c *i)
|
|
|
|
{
|
|
|
|
BxPackedXmmRegister dst = BX_READ_XMM_REG(i->dst());
|
|
|
|
BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
|
|
|
|
unsigned len = i->getVL();
|
|
|
|
|
|
|
|
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
|
|
|
|
dst.xmm16u(n) = (Bit16u) src.vmm64u(n);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (len == BX_VL128) dst.xmm32u(1) = 0;
|
|
|
|
if (len != BX_VL512) dst.xmm64u(1) = 0;
|
|
|
|
|
|
|
|
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), dst);
|
2014-01-28 23:36:46 +04:00
|
|
|
BX_NEXT_INSTR(i);
|
2014-01-12 17:08:16 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVQW_MASK_WdqVdqR(bxInstruction_c *i)
|
|
|
|
{
|
|
|
|
BxPackedXmmRegister dst = BX_READ_XMM_REG(i->dst());
|
|
|
|
BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
|
|
|
|
unsigned len = i->getVL();
|
|
|
|
|
|
|
|
unsigned mask = BX_READ_8BIT_OPMASK(i->opmask());
|
|
|
|
|
|
|
|
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++, mask >>= 1) {
|
|
|
|
if (mask & 0x1)
|
|
|
|
dst.xmm16u(n) = (Bit16u) src.vmm64u(n);
|
|
|
|
else
|
|
|
|
if (i->isZeroMasking()) dst.xmm16u(n) = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (len == BX_VL128) dst.xmm32u(1) = 0;
|
|
|
|
if (len != BX_VL512) dst.xmm64u(1) = 0;
|
|
|
|
|
|
|
|
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), dst);
|
2014-01-28 23:36:46 +04:00
|
|
|
BX_NEXT_INSTR(i);
|
2014-01-12 17:08:16 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVSQW_MASK_WdqVdqM(bxInstruction_c *i)
|
|
|
|
{
|
|
|
|
BxPackedAvxRegister src = BX_READ_AVX_REG(i->src()), dst;
|
|
|
|
unsigned len = i->getVL();
|
|
|
|
|
|
|
|
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
|
|
|
|
dst.vmm16s(n) = SaturateQwordSToWordS(src.vmm64s(n));
|
|
|
|
}
|
|
|
|
|
|
|
|
Bit32u opmask = i->opmask() ? BX_READ_8BIT_OPMASK(i->opmask()) : (Bit32u) -1;
|
|
|
|
opmask &= (1 << QWORD_ELEMENTS(len)) - 1;
|
|
|
|
|
|
|
|
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
|
|
|
|
avx_masked_store16(i, eaddr, &dst, opmask);
|
2014-01-28 23:36:46 +04:00
|
|
|
|
|
|
|
BX_NEXT_INSTR(i);
|
2014-01-12 17:08:16 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVSQW_WdqVdqR(bxInstruction_c *i)
|
|
|
|
{
|
|
|
|
BxPackedXmmRegister dst = BX_READ_XMM_REG(i->dst());
|
|
|
|
BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
|
|
|
|
unsigned len = i->getVL();
|
|
|
|
|
|
|
|
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
|
|
|
|
dst.xmm16s(n) = SaturateQwordSToWordS(src.vmm64s(n));
|
|
|
|
}
|
|
|
|
|
|
|
|
if (len == BX_VL128) dst.xmm32u(1) = 0;
|
|
|
|
if (len != BX_VL512) dst.xmm64u(1) = 0;
|
|
|
|
|
|
|
|
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), dst);
|
2014-01-28 23:36:46 +04:00
|
|
|
BX_NEXT_INSTR(i);
|
2014-01-12 17:08:16 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVSQW_MASK_WdqVdqR(bxInstruction_c *i)
|
|
|
|
{
|
|
|
|
BxPackedXmmRegister dst = BX_READ_XMM_REG(i->dst());
|
|
|
|
BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
|
|
|
|
unsigned len = i->getVL();
|
|
|
|
|
|
|
|
unsigned mask = BX_READ_8BIT_OPMASK(i->opmask());
|
|
|
|
|
|
|
|
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++, mask >>= 1) {
|
|
|
|
if (mask & 0x1)
|
|
|
|
dst.xmm16s(n) = SaturateQwordSToWordS(src.vmm64s(n));
|
|
|
|
else
|
|
|
|
if (i->isZeroMasking()) dst.xmm16u(n) = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (len == BX_VL128) dst.xmm32u(1) = 0;
|
|
|
|
if (len != BX_VL512) dst.xmm64u(1) = 0;
|
|
|
|
|
|
|
|
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), dst);
|
2014-01-28 23:36:46 +04:00
|
|
|
BX_NEXT_INSTR(i);
|
2014-01-12 17:08:16 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVUSQW_MASK_WdqVdqM(bxInstruction_c *i)
|
|
|
|
{
|
|
|
|
BxPackedAvxRegister src = BX_READ_AVX_REG(i->src()), dst;
|
|
|
|
unsigned len = i->getVL();
|
|
|
|
|
|
|
|
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
|
|
|
|
dst.vmm16u(n) = SaturateQwordUToWordU(src.vmm64u(n));
|
|
|
|
}
|
|
|
|
|
|
|
|
Bit32u opmask = i->opmask() ? BX_READ_8BIT_OPMASK(i->opmask()) : (Bit32u) -1;
|
|
|
|
opmask &= (1 << QWORD_ELEMENTS(len)) - 1;
|
|
|
|
|
|
|
|
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
|
|
|
|
avx_masked_store16(i, eaddr, &dst, opmask);
|
2014-01-28 23:36:46 +04:00
|
|
|
|
|
|
|
BX_NEXT_INSTR(i);
|
2014-01-12 17:08:16 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVUSQW_WdqVdqR(bxInstruction_c *i)
|
|
|
|
{
|
|
|
|
BxPackedXmmRegister dst = BX_READ_XMM_REG(i->dst());
|
|
|
|
BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
|
|
|
|
unsigned len = i->getVL();
|
|
|
|
|
|
|
|
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
|
|
|
|
dst.xmm16u(n) = SaturateQwordUToWordU(src.vmm64u(n));
|
|
|
|
}
|
|
|
|
|
|
|
|
if (len == BX_VL128) dst.xmm32u(1) = 0;
|
|
|
|
if (len != BX_VL512) dst.xmm64u(1) = 0;
|
|
|
|
|
|
|
|
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), dst);
|
2014-01-28 23:36:46 +04:00
|
|
|
BX_NEXT_INSTR(i);
|
2014-01-12 17:08:16 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVUSQW_MASK_WdqVdqR(bxInstruction_c *i)
|
|
|
|
{
|
|
|
|
BxPackedXmmRegister dst = BX_READ_XMM_REG(i->dst());
|
|
|
|
BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
|
|
|
|
unsigned len = i->getVL();
|
|
|
|
|
|
|
|
unsigned mask = BX_READ_8BIT_OPMASK(i->opmask());
|
|
|
|
|
|
|
|
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++, mask >>= 1) {
|
|
|
|
if (mask & 0x1)
|
|
|
|
dst.xmm16u(n) = SaturateQwordUToWordU(src.vmm64u(n));
|
|
|
|
else
|
|
|
|
if (i->isZeroMasking()) dst.xmm16u(n) = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (len == BX_VL128) dst.xmm32u(1) = 0;
|
|
|
|
if (len != BX_VL512) dst.xmm64u(1) = 0;
|
|
|
|
|
|
|
|
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), dst);
|
2014-01-28 23:36:46 +04:00
|
|
|
BX_NEXT_INSTR(i);
|
2014-01-12 17:08:16 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
// quad-word to double-word
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVQD_MASK_WdqVdqM(bxInstruction_c *i)
|
|
|
|
{
|
|
|
|
BxPackedAvxRegister src = BX_READ_AVX_REG(i->src()), dst;
|
|
|
|
unsigned len = i->getVL();
|
|
|
|
|
|
|
|
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
|
|
|
|
dst.vmm32u(n) = (Bit32u) src.vmm64u(n);
|
|
|
|
}
|
|
|
|
|
|
|
|
Bit32u opmask = i->opmask() ? BX_READ_8BIT_OPMASK(i->opmask()) : (Bit32u) -1;
|
|
|
|
opmask &= (1 << QWORD_ELEMENTS(len)) - 1;
|
|
|
|
|
|
|
|
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
|
|
|
|
avx_masked_store32(i, eaddr, &dst, opmask);
|
2014-01-28 23:36:46 +04:00
|
|
|
|
|
|
|
BX_NEXT_INSTR(i);
|
2014-01-12 17:08:16 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVQD_WdqVdqR(bxInstruction_c *i)
|
|
|
|
{
|
|
|
|
BxPackedYmmRegister dst = BX_READ_YMM_REG(i->dst());
|
|
|
|
BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
|
|
|
|
unsigned len = i->getVL();
|
|
|
|
|
|
|
|
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
|
|
|
|
dst.ymm32u(n) = (Bit32u) src.vmm64u(n);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (len == BX_VL128) dst.ymm64u(1) = 0;
|
|
|
|
if (len != BX_VL512) dst.ymm128(1).clear();
|
|
|
|
|
|
|
|
BX_WRITE_YMM_REGZ(i->dst(), dst);
|
2014-01-28 23:36:46 +04:00
|
|
|
BX_NEXT_INSTR(i);
|
2014-01-12 17:08:16 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVQD_MASK_WdqVdqR(bxInstruction_c *i)
|
|
|
|
{
|
|
|
|
BxPackedYmmRegister dst = BX_READ_YMM_REG(i->dst());
|
|
|
|
BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
|
|
|
|
unsigned len = i->getVL();
|
|
|
|
|
|
|
|
unsigned mask = BX_READ_8BIT_OPMASK(i->opmask());
|
|
|
|
|
|
|
|
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++, mask >>= 1) {
|
|
|
|
if (mask & 0x1)
|
|
|
|
dst.ymm32u(n) = (Bit32u) src.vmm64u(n);
|
|
|
|
else
|
|
|
|
if (i->isZeroMasking()) dst.ymm32u(n) = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (len == BX_VL128) dst.ymm64u(1) = 0;
|
|
|
|
if (len != BX_VL512) dst.ymm128(1).clear();
|
|
|
|
|
|
|
|
BX_WRITE_YMM_REGZ(i->dst(), dst);
|
2014-01-28 23:36:46 +04:00
|
|
|
BX_NEXT_INSTR(i);
|
2014-01-12 17:08:16 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVSQD_MASK_WdqVdqM(bxInstruction_c *i)
|
|
|
|
{
|
|
|
|
BxPackedAvxRegister src = BX_READ_AVX_REG(i->src()), dst;
|
|
|
|
unsigned len = i->getVL();
|
|
|
|
|
|
|
|
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
|
|
|
|
dst.vmm32s(n) = SaturateQwordSToDwordS(src.vmm64s(n));
|
|
|
|
}
|
|
|
|
|
|
|
|
Bit32u opmask = i->opmask() ? BX_READ_8BIT_OPMASK(i->opmask()) : (Bit32u) -1;
|
|
|
|
opmask &= (1 << QWORD_ELEMENTS(len)) - 1;
|
|
|
|
|
|
|
|
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
|
|
|
|
avx_masked_store32(i, eaddr, &dst, opmask);
|
2014-01-28 23:36:46 +04:00
|
|
|
|
|
|
|
BX_NEXT_INSTR(i);
|
2014-01-12 17:08:16 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVSQD_WdqVdqR(bxInstruction_c *i)
|
|
|
|
{
|
|
|
|
BxPackedYmmRegister dst = BX_READ_YMM_REG(i->dst());
|
|
|
|
BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
|
|
|
|
unsigned len = i->getVL();
|
|
|
|
|
|
|
|
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
|
|
|
|
dst.ymm32s(n) = SaturateQwordSToDwordS(src.vmm64s(n));
|
|
|
|
}
|
|
|
|
|
|
|
|
if (len == BX_VL128) dst.ymm64u(1) = 0;
|
|
|
|
if (len != BX_VL512) dst.ymm128(1).clear();
|
|
|
|
|
|
|
|
BX_WRITE_YMM_REGZ(i->dst(), dst);
|
2014-01-28 23:36:46 +04:00
|
|
|
BX_NEXT_INSTR(i);
|
2014-01-12 17:08:16 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVSQD_MASK_WdqVdqR(bxInstruction_c *i)
|
|
|
|
{
|
|
|
|
BxPackedYmmRegister dst = BX_READ_YMM_REG(i->dst());
|
|
|
|
BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
|
|
|
|
unsigned len = i->getVL();
|
|
|
|
|
|
|
|
unsigned mask = BX_READ_8BIT_OPMASK(i->opmask());
|
|
|
|
|
|
|
|
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++, mask >>= 1) {
|
|
|
|
if (mask & 0x1)
|
|
|
|
dst.ymm32s(n) = SaturateQwordSToDwordS(src.vmm64s(n));
|
|
|
|
else
|
|
|
|
if (i->isZeroMasking()) dst.ymm32u(n) = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (len == BX_VL128) dst.ymm64u(1) = 0;
|
|
|
|
if (len != BX_VL512) dst.ymm128(1).clear();
|
|
|
|
|
|
|
|
BX_WRITE_YMM_REGZ(i->dst(), dst);
|
2014-01-28 23:36:46 +04:00
|
|
|
BX_NEXT_INSTR(i);
|
2014-01-12 17:08:16 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVUSQD_MASK_WdqVdqM(bxInstruction_c *i)
|
|
|
|
{
|
|
|
|
BxPackedAvxRegister src = BX_READ_AVX_REG(i->src()), dst;
|
|
|
|
unsigned len = i->getVL();
|
|
|
|
|
|
|
|
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
|
|
|
|
dst.vmm32u(n) = SaturateQwordUToDwordU(src.vmm64u(n));
|
|
|
|
}
|
|
|
|
|
|
|
|
Bit32u opmask = i->opmask() ? BX_READ_8BIT_OPMASK(i->opmask()) : (Bit32u) -1;
|
|
|
|
opmask &= (1 << QWORD_ELEMENTS(len)) - 1;
|
|
|
|
|
|
|
|
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
|
|
|
|
avx_masked_store32(i, eaddr, &dst, opmask);
|
2014-01-28 23:36:46 +04:00
|
|
|
|
|
|
|
BX_NEXT_INSTR(i);
|
2014-01-12 17:08:16 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVUSQD_WdqVdqR(bxInstruction_c *i)
|
|
|
|
{
|
|
|
|
BxPackedYmmRegister dst = BX_READ_YMM_REG(i->dst());
|
|
|
|
BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
|
|
|
|
unsigned len = i->getVL();
|
|
|
|
|
|
|
|
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
|
|
|
|
dst.ymm32u(n) = SaturateQwordUToDwordU(src.vmm64u(n));
|
|
|
|
}
|
|
|
|
|
|
|
|
if (len == BX_VL128) dst.ymm64u(1) = 0;
|
|
|
|
if (len != BX_VL512) dst.ymm128(1).clear();
|
|
|
|
|
|
|
|
BX_WRITE_YMM_REGZ(i->dst(), dst);
|
2014-01-28 23:36:46 +04:00
|
|
|
BX_NEXT_INSTR(i);
|
2014-01-12 17:08:16 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVUSQD_MASK_WdqVdqR(bxInstruction_c *i)
|
|
|
|
{
|
|
|
|
BxPackedYmmRegister dst = BX_READ_YMM_REG(i->dst());
|
|
|
|
BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
|
|
|
|
unsigned len = i->getVL();
|
|
|
|
|
|
|
|
unsigned mask = BX_READ_8BIT_OPMASK(i->opmask());
|
|
|
|
|
|
|
|
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++, mask >>= 1) {
|
|
|
|
if (mask & 0x1)
|
|
|
|
dst.ymm32u(n) = SaturateQwordUToDwordU(src.vmm64u(n));
|
|
|
|
else
|
|
|
|
if (i->isZeroMasking()) dst.ymm32u(n) = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (len == BX_VL128) dst.ymm64u(1) = 0;
|
|
|
|
if (len != BX_VL512) dst.ymm128(1).clear();
|
|
|
|
|
|
|
|
BX_WRITE_YMM_REGZ(i->dst(), dst);
|
2014-01-28 23:36:46 +04:00
|
|
|
BX_NEXT_INSTR(i);
|
2014-01-12 17:08:16 +04:00
|
|
|
}
|
|
|
|
|
2013-11-30 00:22:31 +04:00
|
|
|
#endif
|