Bochs/bochs/cpu/xop.cc

989 lines
30 KiB
C++
Executable File

/////////////////////////////////////////////////////////////////////////
// $Id$
/////////////////////////////////////////////////////////////////////////
//
// Copyright (c) 2011 Stanislav Shwartsman
// Written by Stanislav Shwartsman [sshwarts at sourceforge net]
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA B 02110-1301 USA
//
/////////////////////////////////////////////////////////////////////////
#define NEED_CPU_REG_SHORTCUTS 1
#include "bochs.h"
#include "cpu.h"
#define LOG_THIS BX_CPU_THIS_PTR
#if BX_SUPPORT_AVX
extern void mxcsr_to_softfloat_status_word(float_status_t &status, bx_mxcsr_t mxcsr);
#include "simd_int.h"
#include "simd_compare.h"
typedef void (*simd_compare_method)(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2);
// comparison predicate for PCOMB
static simd_compare_method compare8[8] = {
sse_pcmpltb,
sse_pcmpleb,
sse_pcmpgtb,
sse_pcmpgeb,
sse_pcmpeqb,
sse_pcmpneb,
sse_pcmpfalse,
sse_pcmptrue
};
// comparison predicate for PCOMUB
static simd_compare_method compare8u[8] = {
sse_pcmpltub,
sse_pcmpleub,
sse_pcmpgtub,
sse_pcmpgeub,
sse_pcmpeqb,
sse_pcmpneb,
sse_pcmpfalse,
sse_pcmptrue
};
// comparison predicate for PCOMW
static simd_compare_method compare16[8] = {
sse_pcmpltw,
sse_pcmplew,
sse_pcmpgtw,
sse_pcmpgew,
sse_pcmpeqw,
sse_pcmpnew,
sse_pcmpfalse,
sse_pcmptrue
};
// comparison predicate for PCOMUW
static simd_compare_method compare16u[8] = {
sse_pcmpltuw,
sse_pcmpleuw,
sse_pcmpgtuw,
sse_pcmpgeuw,
sse_pcmpeqw,
sse_pcmpnew,
sse_pcmpfalse,
sse_pcmptrue
};
// comparison predicate for PCOMD
static simd_compare_method compare32[8] = {
sse_pcmpltd,
sse_pcmpled,
sse_pcmpgtd,
sse_pcmpged,
sse_pcmpeqd,
sse_pcmpned,
sse_pcmpfalse,
sse_pcmptrue
};
// comparison predicate for PCOMUD
static simd_compare_method compare32u[8] = {
sse_pcmpltud,
sse_pcmpleud,
sse_pcmpgtud,
sse_pcmpgeud,
sse_pcmpeqd,
sse_pcmpned,
sse_pcmpfalse,
sse_pcmptrue
};
// comparison predicate for PCOMQ
static simd_compare_method compare64[8] = {
sse_pcmpltq,
sse_pcmpleq,
sse_pcmpgtq,
sse_pcmpgeq,
sse_pcmpeqq,
sse_pcmpneq,
sse_pcmpfalse,
sse_pcmptrue
};
// comparison predicate for PCOMUQ
static simd_compare_method compare64u[8] = {
sse_pcmpltuq,
sse_pcmpleuq,
sse_pcmpgtuq,
sse_pcmpgeuq,
sse_pcmpeqq,
sse_pcmpneq,
sse_pcmpfalse,
sse_pcmptrue
};
typedef Bit8u (*vpperm_operation)(Bit8u byte);
BX_CPP_INLINE Bit8u vpperm_bit_reverse(Bit8u v8)
{
return (v8 >> 7) |
((v8 >> 5) & 0x02) |
((v8 >> 3) & 0x04) |
((v8 >> 1) & 0x08) |
((v8 << 1) & 0x10) |
((v8 << 3) & 0x20) |
((v8 << 5) & 0x40) |
(v8 << 7);
}
BX_CPP_INLINE Bit8u vpperm_noop(Bit8u v8) { return v8; }
BX_CPP_INLINE Bit8u vpperm_invert(Bit8u v8) { return ~v8; }
BX_CPP_INLINE Bit8u vpperm_invert_bit_reverse(Bit8u v8) { return vpperm_bit_reverse(~v8); }
BX_CPP_INLINE Bit8u vpperm_zeros(Bit8u v8) { return 0; }
BX_CPP_INLINE Bit8u vpperm_ones(Bit8u v8) { return 0xff; }
BX_CPP_INLINE Bit8u vpperm_replicate_msb(Bit8u v8) { return (((Bit8s) v8) >> 7); }
BX_CPP_INLINE Bit8u vpperm_invert_replicate_msb(Bit8u v8) { return vpperm_replicate_msb(~v8); }
// logical operation for VPPERM
static vpperm_operation vpperm_op[8] = {
vpperm_noop,
vpperm_invert,
vpperm_bit_reverse,
vpperm_invert_bit_reverse,
vpperm_zeros,
vpperm_ones,
vpperm_replicate_msb,
vpperm_invert_replicate_msb
};
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPCMOV_VdqHdqWdqVIb(bxInstruction_c *i)
{
BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->vvv()), op2, op3;
if (i->getVexW()) {
op2 = BX_READ_AVX_REG(i->Ib());
op3 = BX_READ_AVX_REG(i->rm());
}
else {
op2 = BX_READ_AVX_REG(i->rm());
op3 = BX_READ_AVX_REG(i->Ib());
}
unsigned len = i->getVL();
for (unsigned n=0; n < len; n++) {
sse_pselect(&op1.avx128(n), &op2.avx128(n), &op3.avx128(n));
}
BX_WRITE_AVX_REGZ(i->nnn(), op1, len);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPPERM_VdqHdqWdqVIb(bxInstruction_c *i)
{
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->vvv()), op2, op3, dest;
if (i->getVexW()) {
op2 = BX_READ_XMM_REG(i->Ib());
op3 = BX_READ_XMM_REG(i->rm());
}
else {
op2 = BX_READ_XMM_REG(i->rm());
op3 = BX_READ_XMM_REG(i->Ib());
}
for (unsigned n=0;n<16;n++) {
unsigned control = op3.xmmubyte(n);
if (control & 0x10)
dest.xmmubyte(n) = op1.xmmubyte(control & 0xf);
else
dest.xmmubyte(n) = op2.xmmubyte(control & 0xf);
dest.xmmubyte(n) = vpperm_op[control >> 5](dest.xmmubyte(n));
}
BX_WRITE_XMM_REG_CLEAR_HIGH(i->nnn(), dest);
BX_NEXT_INSTR(i);
}
#define XOP_SHIFT_ROTATE(HANDLER, func) \
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C:: HANDLER (bxInstruction_c *i) \
{ \
BxPackedXmmRegister op1, op2; \
if (i->getVexW()) { \
op1 = BX_READ_XMM_REG(i->rm()); \
op2 = BX_READ_XMM_REG(i->vvv()); \
} \
else { \
op1 = BX_READ_XMM_REG(i->vvv()); \
op2 = BX_READ_XMM_REG(i->rm()); \
} \
\
(func)(&op1, &op2); \
\
BX_WRITE_XMM_REG_CLEAR_HIGH(i->nnn(), op1); \
\
BX_NEXT_INSTR(i); \
}
XOP_SHIFT_ROTATE(VPSHAB_VdqWdqHdq, sse_pshab);
XOP_SHIFT_ROTATE(VPSHAW_VdqWdqHdq, sse_pshaw);
XOP_SHIFT_ROTATE(VPSHAD_VdqWdqHdq, sse_pshad);
XOP_SHIFT_ROTATE(VPSHAQ_VdqWdqHdq, sse_pshaq);
XOP_SHIFT_ROTATE(VPSHLB_VdqWdqHdq, sse_pshlb);
XOP_SHIFT_ROTATE(VPSHLW_VdqWdqHdq, sse_pshlw);
XOP_SHIFT_ROTATE(VPSHLD_VdqWdqHdq, sse_pshld);
XOP_SHIFT_ROTATE(VPSHLQ_VdqWdqHdq, sse_pshlq);
XOP_SHIFT_ROTATE(VPROTB_VdqWdqHdq, sse_protb);
XOP_SHIFT_ROTATE(VPROTW_VdqWdqHdq, sse_protw);
XOP_SHIFT_ROTATE(VPROTD_VdqWdqHdq, sse_protd);
XOP_SHIFT_ROTATE(VPROTQ_VdqWdqHdq, sse_protq);
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMACSSWW_VdqHdqWdqVIbR(bxInstruction_c *i)
{
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->vvv());
BxPackedXmmRegister op2 = BX_READ_XMM_REG(i->rm());
BxPackedXmmRegister op3 = BX_READ_XMM_REG(i->Ib());
for(unsigned n=0;n<8;n++) {
op1.xmm16s(n) = SaturateDwordSToWordS(((Bit32s) op1.xmm16s(n) * (Bit32s) op2.xmm16s(n)) + (Bit32s) op3.xmm16s(n));
}
BX_WRITE_XMM_REG_CLEAR_HIGH(i->nnn(), op1);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMACSSWD_VdqHdqWdqVIbR(bxInstruction_c *i)
{
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->vvv());
BxPackedXmmRegister op2 = BX_READ_XMM_REG(i->rm());
BxPackedXmmRegister op3 = BX_READ_XMM_REG(i->Ib());
op1.xmm32s(0) = SaturateQwordSToDwordS(((Bit32s) op1.xmm16s(1) * (Bit32s) op2.xmm16s(1)) + (Bit64s) op3.xmm32s(0));
op1.xmm32s(1) = SaturateQwordSToDwordS(((Bit32s) op1.xmm16s(3) * (Bit32s) op2.xmm16s(3)) + (Bit64s) op3.xmm32s(1));
op1.xmm32s(2) = SaturateQwordSToDwordS(((Bit32s) op1.xmm16s(5) * (Bit32s) op2.xmm16s(5)) + (Bit64s) op3.xmm32s(2));
op1.xmm32s(3) = SaturateQwordSToDwordS(((Bit32s) op1.xmm16s(7) * (Bit32s) op2.xmm16s(7)) + (Bit64s) op3.xmm32s(3));
BX_WRITE_XMM_REG_CLEAR_HIGH(i->nnn(), op1);
BX_NEXT_INSTR(i);
}
#define GET_ADD_OVERFLOW(op1, op2, result, mask) \
((((op1) ^ (result)) & ((op2) ^ (result))) & (mask))
BX_CPP_INLINE Bit64s add_saturate64(Bit64s a, Bit64s b)
{
Bit64s r = a + b;
Bit64u overflow = GET_ADD_OVERFLOW(a, b, r, BX_CONST64(0x8000000000000000));
if (! overflow) return r;
// signed overflow detected, saturate
if (a > 0) overflow--;
return (Bit64s) overflow;
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMACSSDQL_VdqHdqWdqVIbR(bxInstruction_c *i)
{
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->vvv());
BxPackedXmmRegister op2 = BX_READ_XMM_REG(i->rm());
BxPackedXmmRegister op3 = BX_READ_XMM_REG(i->Ib());
Bit64s product1 = (Bit64s) op1.xmm32s(0) * (Bit64s) op2.xmm32s(0);
Bit64s product2 = (Bit64s) op1.xmm32s(2) * (Bit64s) op2.xmm32s(2);
op1.xmm64s(0) = add_saturate64(product1, op3.xmm64s(0));
op1.xmm64s(1) = add_saturate64(product2, op3.xmm64s(1));
BX_WRITE_XMM_REG_CLEAR_HIGH(i->nnn(), op1);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMACSSDD_VdqHdqWdqVIbR(bxInstruction_c *i)
{
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->vvv());
BxPackedXmmRegister op2 = BX_READ_XMM_REG(i->rm());
BxPackedXmmRegister op3 = BX_READ_XMM_REG(i->Ib());
for(unsigned n=0;n<4;n++) {
op1.xmm32s(n) = SaturateQwordSToDwordS(((Bit64s) op1.xmm32s(n) * (Bit64s) op2.xmm32s(n)) + (Bit64s) op3.xmm32s(n));
}
BX_WRITE_XMM_REG_CLEAR_HIGH(i->nnn(), op1);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMACSSDQH_VdqHdqWdqVIbR(bxInstruction_c *i)
{
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->vvv());
BxPackedXmmRegister op2 = BX_READ_XMM_REG(i->rm());
BxPackedXmmRegister op3 = BX_READ_XMM_REG(i->Ib());
Bit64s product1 = (Bit64s) op1.xmm32s(1) * (Bit64s) op2.xmm32s(1);
Bit64s product2 = (Bit64s) op1.xmm32s(3) * (Bit64s) op2.xmm32s(3);
op1.xmm64s(0) = add_saturate64(product1, op3.xmm64s(0));
op1.xmm64s(1) = add_saturate64(product2, op3.xmm64s(1));
BX_WRITE_XMM_REG_CLEAR_HIGH(i->nnn(), op1);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMACSWW_VdqHdqWdqVIbR(bxInstruction_c *i)
{
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->vvv());
BxPackedXmmRegister op2 = BX_READ_XMM_REG(i->rm());
BxPackedXmmRegister op3 = BX_READ_XMM_REG(i->Ib());
for(unsigned n=0;n<8;n++) {
op1.xmm16s(n) = ((Bit32s) op1.xmm16s(n) * (Bit32s) op2.xmm16s(n)) + (Bit32s) op3.xmm16s(n);
}
BX_WRITE_XMM_REG_CLEAR_HIGH(i->nnn(), op1);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMACSWD_VdqHdqWdqVIbR(bxInstruction_c *i)
{
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->vvv());
BxPackedXmmRegister op2 = BX_READ_XMM_REG(i->rm());
BxPackedXmmRegister op3 = BX_READ_XMM_REG(i->Ib());
op1.xmm32s(0) = ((Bit32s) op1.xmm16s(1) * (Bit32s) op2.xmm16s(1)) + (Bit64s) op3.xmm32s(0);
op1.xmm32s(1) = ((Bit32s) op1.xmm16s(3) * (Bit32s) op2.xmm16s(3)) + (Bit64s) op3.xmm32s(1);
op1.xmm32s(2) = ((Bit32s) op1.xmm16s(5) * (Bit32s) op2.xmm16s(5)) + (Bit64s) op3.xmm32s(2);
op1.xmm32s(3) = ((Bit32s) op1.xmm16s(7) * (Bit32s) op2.xmm16s(7)) + (Bit64s) op3.xmm32s(3);
BX_WRITE_XMM_REG_CLEAR_HIGH(i->nnn(), op1);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMACSDQL_VdqHdqWdqVIbR(bxInstruction_c *i)
{
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->vvv());
BxPackedXmmRegister op2 = BX_READ_XMM_REG(i->rm());
BxPackedXmmRegister op3 = BX_READ_XMM_REG(i->Ib());
Bit64s product1 = (Bit64s) op1.xmm32s(0) * (Bit64s) op2.xmm32s(0);
Bit64s product2 = (Bit64s) op1.xmm32s(2) * (Bit64s) op2.xmm32s(2);
op1.xmm64s(0) = product1 + op3.xmm64s(0);
op1.xmm64s(1) = product2 + op3.xmm64s(1);
BX_WRITE_XMM_REG_CLEAR_HIGH(i->nnn(), op1);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMACSDD_VdqHdqWdqVIbR(bxInstruction_c *i)
{
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->vvv());
BxPackedXmmRegister op2 = BX_READ_XMM_REG(i->rm());
BxPackedXmmRegister op3 = BX_READ_XMM_REG(i->Ib());
for(unsigned n=0;n<4;n++) {
op1.xmm32s(n) = ((Bit64s) op1.xmm32s(n) * (Bit64s) op2.xmm32s(n)) + (Bit64s) op3.xmm32s(n);
}
BX_WRITE_XMM_REG_CLEAR_HIGH(i->nnn(), op1);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMACSDQH_VdqHdqWdqVIbR(bxInstruction_c *i)
{
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->vvv());
BxPackedXmmRegister op2 = BX_READ_XMM_REG(i->rm());
BxPackedXmmRegister op3 = BX_READ_XMM_REG(i->Ib());
Bit64s product1 = (Bit64s) op1.xmm32s(1) * (Bit64s) op2.xmm32s(1);
Bit64s product2 = (Bit64s) op1.xmm32s(3) * (Bit64s) op2.xmm32s(3);
op1.xmm64s(0) = product1 + op3.xmm64s(0);
op1.xmm64s(1) = product2 + op3.xmm64s(1);
BX_WRITE_XMM_REG_CLEAR_HIGH(i->nnn(), op1);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMADCSSWD_VdqHdqWdqVIbR(bxInstruction_c *i)
{
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->vvv());
BxPackedXmmRegister op2 = BX_READ_XMM_REG(i->rm());
BxPackedXmmRegister op3 = BX_READ_XMM_REG(i->Ib());
Bit32s product[8];
for(unsigned n=0;n < 8;n++)
product[n] = (Bit32s) op1.xmm16s(n) * (Bit32s) op2.xmm16s(n);
op1.xmm32s(0) = SaturateQwordSToDwordS((Bit64s) product[0] + (Bit64s) product[1] + (Bit64s) op3.xmm32s(0));
op1.xmm32s(1) = SaturateQwordSToDwordS((Bit64s) product[2] + (Bit64s) product[3] + (Bit64s) op3.xmm32s(1));
op1.xmm32s(2) = SaturateQwordSToDwordS((Bit64s) product[4] + (Bit64s) product[5] + (Bit64s) op3.xmm32s(2));
op1.xmm32s(3) = SaturateQwordSToDwordS((Bit64s) product[6] + (Bit64s) product[7] + (Bit64s) op3.xmm32s(3));
BX_WRITE_XMM_REG_CLEAR_HIGH(i->nnn(), op1);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMADCSWD_VdqHdqWdqVIbR(bxInstruction_c *i)
{
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->vvv());
BxPackedXmmRegister op2 = BX_READ_XMM_REG(i->rm());
BxPackedXmmRegister op3 = BX_READ_XMM_REG(i->Ib());
Bit32s product[8];
for(unsigned n=0;n < 8;n++)
product[n] = (Bit32s) op1.xmm16s(n) * (Bit32s) op2.xmm16s(n);
op1.xmm32s(0) = product[0] + product[1] + op3.xmm32s(0);
op1.xmm32s(1) = product[2] + product[3] + op3.xmm32s(1);
op1.xmm32s(2) = product[4] + product[5] + op3.xmm32s(2);
op1.xmm32s(3) = product[6] + product[7] + op3.xmm32s(3);
BX_WRITE_XMM_REG_CLEAR_HIGH(i->nnn(), op1);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPROTB_VdqWdqIbR(bxInstruction_c *i)
{
BxPackedXmmRegister op = BX_READ_XMM_REG(i->rm());
int count = i->Ib();
if (count > 0) {
// rotate left
sse_prolb(&op, count);
}
else if (count < 0) {
// rotate right
sse_prorb(&op, -count);
}
BX_WRITE_XMM_REG_CLEAR_HIGH(i->nnn(), op);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPROTW_VdqWdqIbR(bxInstruction_c *i)
{
BxPackedXmmRegister op = BX_READ_XMM_REG(i->rm());
int count = i->Ib();
if (count > 0) {
// rotate left
sse_prolw(&op, count);
}
else if (count < 0) {
// rotate right
sse_prorw(&op, -count);
}
BX_WRITE_XMM_REG_CLEAR_HIGH(i->nnn(), op);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPROTD_VdqWdqIbR(bxInstruction_c *i)
{
BxPackedXmmRegister op = BX_READ_XMM_REG(i->rm());
int count = i->Ib();
if (count > 0) {
// rotate left
sse_prold(&op, count);
}
else if (count < 0) {
// rotate right
sse_prord(&op, -count);
}
BX_WRITE_XMM_REG_CLEAR_HIGH(i->nnn(), op);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPROTQ_VdqWdqIbR(bxInstruction_c *i)
{
BxPackedXmmRegister op = BX_READ_XMM_REG(i->rm());
int count = i->Ib();
if (count > 0) {
// rotate left
sse_prolq(&op, count);
}
else if (count < 0) {
// rotate right
sse_prorq(&op, -count);
}
BX_WRITE_XMM_REG_CLEAR_HIGH(i->nnn(), op);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPCOMB_VdqHdqWdqIbR(bxInstruction_c *i)
{
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->vvv()), op2 = BX_READ_XMM_REG(i->rm());
compare8[i->Ib() & 7](&op1, &op2);
BX_WRITE_XMM_REG_CLEAR_HIGH(i->nnn(), op1);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPCOMW_VdqHdqWdqIbR(bxInstruction_c *i)
{
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->vvv()), op2 = BX_READ_XMM_REG(i->rm());
compare16[i->Ib() & 7](&op1, &op2);
BX_WRITE_XMM_REG_CLEAR_HIGH(i->nnn(), op1);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPCOMD_VdqHdqWdqIbR(bxInstruction_c *i)
{
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->vvv()), op2 = BX_READ_XMM_REG(i->rm());
compare32[i->Ib() & 7](&op1, &op2);
BX_WRITE_XMM_REG_CLEAR_HIGH(i->nnn(), op1);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPCOMQ_VdqHdqWdqIbR(bxInstruction_c *i)
{
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->vvv()), op2 = BX_READ_XMM_REG(i->rm());
compare64[i->Ib() & 7](&op1, &op2);
BX_WRITE_XMM_REG_CLEAR_HIGH(i->nnn(), op1);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPCOMUB_VdqHdqWdqIbR(bxInstruction_c *i)
{
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->vvv()), op2 = BX_READ_XMM_REG(i->rm());
compare8u[i->Ib() & 7](&op1, &op2);
BX_WRITE_XMM_REG_CLEAR_HIGH(i->nnn(), op1);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPCOMUW_VdqHdqWdqIbR(bxInstruction_c *i)
{
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->vvv()), op2 = BX_READ_XMM_REG(i->rm());
compare16u[i->Ib() & 7](&op1, &op2);
BX_WRITE_XMM_REG_CLEAR_HIGH(i->nnn(), op1);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPCOMUD_VdqHdqWdqIbR(bxInstruction_c *i)
{
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->vvv()), op2 = BX_READ_XMM_REG(i->rm());
compare32u[i->Ib() & 7](&op1, &op2);
BX_WRITE_XMM_REG_CLEAR_HIGH(i->nnn(), op1);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPCOMUQ_VdqHdqWdqIbR(bxInstruction_c *i)
{
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->vvv()), op2 = BX_READ_XMM_REG(i->rm());
compare64u[i->Ib() & 7](&op1, &op2);
BX_WRITE_XMM_REG_CLEAR_HIGH(i->nnn(), op1);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VFRCZPS_VpsWpsR(bxInstruction_c *i)
{
BxPackedAvxRegister op = BX_READ_AVX_REG(i->rm());
unsigned len = i->getVL();
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
for (unsigned n=0; n < (4*len); n++) {
op.avx32u(n) = float32_frc(op.avx32u(n), status);
}
check_exceptionsSSE(status.float_exception_flags);
BX_WRITE_AVX_REGZ(i->nnn(), op, len);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VFRCZPD_VpdWpdR(bxInstruction_c *i)
{
BxPackedAvxRegister op = BX_READ_AVX_REG(i->rm());
unsigned len = i->getVL();
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
for (unsigned n=0; n < (2*len); n++) {
op.avx64u(n) = float64_frc(op.avx64u(n), status);
}
check_exceptionsSSE(status.float_exception_flags);
BX_WRITE_AVX_REGZ(i->nnn(), op, len);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VFRCZSS_VssWssR(bxInstruction_c *i)
{
float32 op = BX_READ_XMM_REG_LO_DWORD(i->rm());
BxPackedXmmRegister r;
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
r.xmm32u(0) = float32_frc(op, status);
r.xmm32u(1) = 0;
r.xmm64u(1) = 0;
check_exceptionsSSE(status.float_exception_flags);
BX_WRITE_XMM_REG_CLEAR_HIGH(i->nnn(), r);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VFRCZSD_VsdWsdR(bxInstruction_c *i)
{
float64 op = BX_READ_XMM_REG_LO_QWORD(i->rm());
BxPackedXmmRegister r;
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
r.xmm64u(0) = float64_frc(op, status);
r.xmm64u(1) = 0;
check_exceptionsSSE(status.float_exception_flags);
BX_WRITE_XMM_REG_CLEAR_HIGH(i->nnn(), r);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPHADDBW_VdqWdqR(bxInstruction_c *i)
{
BxPackedXmmRegister op = BX_READ_XMM_REG(i->rm());
op.xmm16s(0) = (Bit16s) op.xmmsbyte(0x0) + (Bit16s) op.xmmsbyte(0x1);
op.xmm16s(1) = (Bit16s) op.xmmsbyte(0x2) + (Bit16s) op.xmmsbyte(0x3);
op.xmm16s(2) = (Bit16s) op.xmmsbyte(0x4) + (Bit16s) op.xmmsbyte(0x5);
op.xmm16s(3) = (Bit16s) op.xmmsbyte(0x6) + (Bit16s) op.xmmsbyte(0x7);
op.xmm16s(4) = (Bit16s) op.xmmsbyte(0x8) + (Bit16s) op.xmmsbyte(0x9);
op.xmm16s(5) = (Bit16s) op.xmmsbyte(0xA) + (Bit16s) op.xmmsbyte(0xB);
op.xmm16s(6) = (Bit16s) op.xmmsbyte(0xC) + (Bit16s) op.xmmsbyte(0xD);
op.xmm16s(7) = (Bit16s) op.xmmsbyte(0xE) + (Bit16s) op.xmmsbyte(0xF);
BX_WRITE_XMM_REG_CLEAR_HIGH(i->nnn(), op);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPHADDBD_VdqWdqR(bxInstruction_c *i)
{
BxPackedXmmRegister op = BX_READ_XMM_REG(i->rm());
op.xmm32s(0) = (Bit32s) op.xmmsbyte(0x0) + (Bit32s) op.xmmsbyte(0x1) +
(Bit32s) op.xmmsbyte(0x2) + (Bit32s) op.xmmsbyte(0x3);
op.xmm32s(1) = (Bit32s) op.xmmsbyte(0x4) + (Bit32s) op.xmmsbyte(0x5) +
(Bit32s) op.xmmsbyte(0x6) + (Bit32s) op.xmmsbyte(0x7);
op.xmm32s(2) = (Bit32s) op.xmmsbyte(0x8) + (Bit32s) op.xmmsbyte(0x9) +
(Bit32s) op.xmmsbyte(0xA) + (Bit32s) op.xmmsbyte(0xB);
op.xmm32s(3) = (Bit32s) op.xmmsbyte(0xC) + (Bit32s) op.xmmsbyte(0xD) +
(Bit32s) op.xmmsbyte(0xE) + (Bit32s) op.xmmsbyte(0xF);
BX_WRITE_XMM_REG_CLEAR_HIGH(i->nnn(), op);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPHADDBQ_VdqWdqR(bxInstruction_c *i)
{
BxPackedXmmRegister op = BX_READ_XMM_REG(i->rm());
op.xmm64s(0) = (Bit32s) op.xmmsbyte(0x0) + (Bit32s) op.xmmsbyte(0x1) +
(Bit32s) op.xmmsbyte(0x2) + (Bit32s) op.xmmsbyte(0x3) +
(Bit32s) op.xmmsbyte(0x4) + (Bit32s) op.xmmsbyte(0x5) +
(Bit32s) op.xmmsbyte(0x6) + (Bit32s) op.xmmsbyte(0x7);
op.xmm64s(1) = (Bit32s) op.xmmsbyte(0x8) + (Bit32s) op.xmmsbyte(0x9) +
(Bit32s) op.xmmsbyte(0xA) + (Bit32s) op.xmmsbyte(0xB) +
(Bit32s) op.xmmsbyte(0xC) + (Bit32s) op.xmmsbyte(0xD) +
(Bit32s) op.xmmsbyte(0xE) + (Bit32s) op.xmmsbyte(0xF);
BX_WRITE_XMM_REG_CLEAR_HIGH(i->nnn(), op);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPHADDWD_VdqWdqR(bxInstruction_c *i)
{
BxPackedXmmRegister op = BX_READ_XMM_REG(i->rm());
op.xmm32s(0) = (Bit32s) op.xmm16s(0) + (Bit32s) op.xmm16s(1);
op.xmm32s(1) = (Bit32s) op.xmm16s(2) + (Bit32s) op.xmm16s(3);
op.xmm32s(2) = (Bit32s) op.xmm16s(4) + (Bit32s) op.xmm16s(5);
op.xmm32s(3) = (Bit32s) op.xmm16s(6) + (Bit32s) op.xmm16s(7);
BX_WRITE_XMM_REG_CLEAR_HIGH(i->nnn(), op);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPHADDWQ_VdqWdqR(bxInstruction_c *i)
{
BxPackedXmmRegister op = BX_READ_XMM_REG(i->rm());
op.xmm64s(0) = (Bit32s) op.xmm16s(0) + (Bit32s) op.xmm16s(1) +
(Bit32s) op.xmm16s(2) + (Bit32s) op.xmm16s(3);
op.xmm64s(1) = (Bit32s) op.xmm16s(4) + (Bit32s) op.xmm16s(5) +
(Bit32s) op.xmm16s(6) + (Bit32s) op.xmm16s(7);
BX_WRITE_XMM_REG_CLEAR_HIGH(i->nnn(), op);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPHADDDQ_VdqWdqR(bxInstruction_c *i)
{
BxPackedXmmRegister op = BX_READ_XMM_REG(i->rm());
op.xmm64s(0) = (Bit64s) op.xmm32s(0) + (Bit64s) op.xmm32s(1);
op.xmm64s(1) = (Bit64s) op.xmm32s(2) + (Bit64s) op.xmm32s(3);
BX_WRITE_XMM_REG_CLEAR_HIGH(i->nnn(), op);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPHADDUBW_VdqWdqR(bxInstruction_c *i)
{
BxPackedXmmRegister op = BX_READ_XMM_REG(i->rm());
op.xmm16u(0) = (Bit16u) op.xmmubyte(0x0) + (Bit16u) op.xmmubyte(0x1);
op.xmm16u(1) = (Bit16u) op.xmmubyte(0x2) + (Bit16u) op.xmmubyte(0x3);
op.xmm16u(2) = (Bit16u) op.xmmubyte(0x4) + (Bit16u) op.xmmubyte(0x5);
op.xmm16u(3) = (Bit16u) op.xmmubyte(0x6) + (Bit16u) op.xmmubyte(0x7);
op.xmm16u(4) = (Bit16u) op.xmmubyte(0x8) + (Bit16u) op.xmmubyte(0x9);
op.xmm16u(5) = (Bit16u) op.xmmubyte(0xA) + (Bit16u) op.xmmubyte(0xB);
op.xmm16u(6) = (Bit16u) op.xmmubyte(0xC) + (Bit16u) op.xmmubyte(0xD);
op.xmm16u(7) = (Bit16u) op.xmmubyte(0xE) + (Bit16u) op.xmmubyte(0xF);
BX_WRITE_XMM_REG_CLEAR_HIGH(i->nnn(), op);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPHADDUBD_VdqWdqR(bxInstruction_c *i)
{
BxPackedXmmRegister op = BX_READ_XMM_REG(i->rm());
op.xmm32u(0) = (Bit32u) op.xmmubyte(0x0) + (Bit32s) op.xmmubyte(0x1) +
(Bit32u) op.xmmubyte(0x2) + (Bit32s) op.xmmubyte(0x3);
op.xmm32u(1) = (Bit32u) op.xmmubyte(0x4) + (Bit32s) op.xmmubyte(0x5) +
(Bit32u) op.xmmubyte(0x6) + (Bit32s) op.xmmubyte(0x7);
op.xmm32u(2) = (Bit32u) op.xmmubyte(0x8) + (Bit32s) op.xmmubyte(0x9) +
(Bit32u) op.xmmubyte(0xA) + (Bit32s) op.xmmubyte(0xB);
op.xmm32u(3) = (Bit32u) op.xmmubyte(0xC) + (Bit32s) op.xmmubyte(0xD) +
(Bit32u) op.xmmubyte(0xE) + (Bit32s) op.xmmubyte(0xF);
BX_WRITE_XMM_REG_CLEAR_HIGH(i->nnn(), op);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPHADDUBQ_VdqWdqR(bxInstruction_c *i)
{
BxPackedXmmRegister op = BX_READ_XMM_REG(i->rm());
op.xmm64u(0) = (Bit32u) op.xmmubyte(0x0) + (Bit32u) op.xmmubyte(0x1) +
(Bit32u) op.xmmubyte(0x2) + (Bit32u) op.xmmubyte(0x3) +
(Bit32u) op.xmmubyte(0x4) + (Bit32u) op.xmmubyte(0x5) +
(Bit32u) op.xmmubyte(0x6) + (Bit32u) op.xmmubyte(0x7);
op.xmm64u(1) = (Bit32u) op.xmmubyte(0x8) + (Bit32u) op.xmmubyte(0x9) +
(Bit32u) op.xmmubyte(0xA) + (Bit32u) op.xmmubyte(0xB) +
(Bit32u) op.xmmubyte(0xC) + (Bit32u) op.xmmubyte(0xD) +
(Bit32u) op.xmmubyte(0xE) + (Bit32u) op.xmmubyte(0xF);
BX_WRITE_XMM_REG_CLEAR_HIGH(i->nnn(), op);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPHADDUWD_VdqWdqR(bxInstruction_c *i)
{
BxPackedXmmRegister op = BX_READ_XMM_REG(i->rm());
op.xmm32u(0) = (Bit32u) op.xmm16u(0) + (Bit32u) op.xmm16u(1);
op.xmm32u(1) = (Bit32u) op.xmm16u(2) + (Bit32u) op.xmm16u(3);
op.xmm32u(2) = (Bit32u) op.xmm16u(4) + (Bit32u) op.xmm16u(5);
op.xmm32u(3) = (Bit32u) op.xmm16u(6) + (Bit32u) op.xmm16u(7);
BX_WRITE_XMM_REG_CLEAR_HIGH(i->nnn(), op);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPHADDUWQ_VdqWdqR(bxInstruction_c *i)
{
BxPackedXmmRegister op = BX_READ_XMM_REG(i->rm());
op.xmm64u(0) = (Bit32u) op.xmm16u(0) + (Bit32u) op.xmm16u(1) +
(Bit32u) op.xmm16u(2) + (Bit32u) op.xmm16u(3);
op.xmm64u(1) = (Bit32u) op.xmm16u(4) + (Bit32u) op.xmm16u(5) +
(Bit32u) op.xmm16u(6) + (Bit32u) op.xmm16u(7);
BX_WRITE_XMM_REG_CLEAR_HIGH(i->nnn(), op);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPHADDUDQ_VdqWdqR(bxInstruction_c *i)
{
BxPackedXmmRegister op = BX_READ_XMM_REG(i->rm());
op.xmm64u(0) = (Bit64u) op.xmm32u(0) + (Bit64u) op.xmm32u(1);
op.xmm64u(1) = (Bit64u) op.xmm32u(2) + (Bit64u) op.xmm32u(3);
BX_WRITE_XMM_REG_CLEAR_HIGH(i->nnn(), op);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPHSUBBW_VdqWdqR(bxInstruction_c *i)
{
BxPackedXmmRegister op = BX_READ_XMM_REG(i->rm());
op.xmm16s(0) = (Bit16s) op.xmmsbyte(0x0) - (Bit16s) op.xmmsbyte(0x1);
op.xmm16s(1) = (Bit16s) op.xmmsbyte(0x2) - (Bit16s) op.xmmsbyte(0x3);
op.xmm16s(2) = (Bit16s) op.xmmsbyte(0x4) - (Bit16s) op.xmmsbyte(0x5);
op.xmm16s(3) = (Bit16s) op.xmmsbyte(0x6) - (Bit16s) op.xmmsbyte(0x7);
op.xmm16s(4) = (Bit16s) op.xmmsbyte(0x8) - (Bit16s) op.xmmsbyte(0x9);
op.xmm16s(5) = (Bit16s) op.xmmsbyte(0xA) - (Bit16s) op.xmmsbyte(0xB);
op.xmm16s(6) = (Bit16s) op.xmmsbyte(0xC) - (Bit16s) op.xmmsbyte(0xD);
op.xmm16s(7) = (Bit16s) op.xmmsbyte(0xE) - (Bit16s) op.xmmsbyte(0xF);
BX_WRITE_XMM_REG_CLEAR_HIGH(i->nnn(), op);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPHSUBWD_VdqWdqR(bxInstruction_c *i)
{
BxPackedXmmRegister op = BX_READ_XMM_REG(i->rm());
op.xmm32s(0) = (Bit32s) op.xmm16s(0) - (Bit32s) op.xmm16s(1);
op.xmm32s(1) = (Bit32s) op.xmm16s(2) - (Bit32s) op.xmm16s(3);
op.xmm32s(2) = (Bit32s) op.xmm16s(4) - (Bit32s) op.xmm16s(5);
op.xmm32s(3) = (Bit32s) op.xmm16s(6) - (Bit32s) op.xmm16s(7);
BX_WRITE_XMM_REG_CLEAR_HIGH(i->nnn(), op);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPHSUBDQ_VdqWdqR(bxInstruction_c *i)
{
BxPackedXmmRegister op = BX_READ_XMM_REG(i->rm());
op.xmm64s(0) = (Bit64s) op.xmm32s(0) - (Bit64s) op.xmm32s(1);
op.xmm64s(1) = (Bit64s) op.xmm32s(2) - (Bit64s) op.xmm32s(3);
BX_WRITE_XMM_REG_CLEAR_HIGH(i->nnn(), op);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPERMIL2PS_VdqHdqWdqIbR(bxInstruction_c *i)
{
BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->vvv()), op2, op3, result;
int imm_reg = i->Ib() >> 4;
if (! long64_mode()) imm_reg &= 0x7;
if (i->getVexW()) {
op2 = BX_READ_AVX_REG(imm_reg);
op3 = BX_READ_AVX_REG(i->rm());
}
else {
op2 = BX_READ_AVX_REG(i->rm());
op3 = BX_READ_AVX_REG(imm_reg);
}
unsigned len = i->getVL();
for (unsigned n=0; n < len; n++) {
sse_permil2ps(&result.avx128(n), &op1.avx128(n), &op2.avx128(n), &op3.avx128(n), i->Ib() & 3);
}
BX_WRITE_AVX_REGZ(i->nnn(), result, len);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPERMIL2PD_VdqHdqWdqIbR(bxInstruction_c *i)
{
BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->vvv()), op2, op3, result;
int imm_reg = i->Ib() >> 4;
if (! long64_mode()) imm_reg &= 0x7;
if (i->getVexW()) {
op2 = BX_READ_AVX_REG(imm_reg);
op3 = BX_READ_AVX_REG(i->rm());
}
else {
op2 = BX_READ_AVX_REG(i->rm());
op3 = BX_READ_AVX_REG(imm_reg);
}
unsigned len = i->getVL();
for (unsigned n=0; n < len; n++) {
sse_permil2pd(&result.avx128(n), &op1.avx128(n), &op2.avx128(n), &op3.avx128(n), i->Ib() & 3);
}
BX_WRITE_AVX_REGZ(i->nnn(), result, len);
BX_NEXT_INSTR(i);
}
#endif