945 lines
29 KiB
C++
945 lines
29 KiB
C++
/////////////////////////////////////////////////////////////////////////
|
|
// $Id$
|
|
/////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// Copyright (c) 2011-2013 Stanislav Shwartsman
|
|
// Written by Stanislav Shwartsman [sshwarts at sourceforge net]
|
|
//
|
|
// This library is free software; you can redistribute it and/or
|
|
// modify it under the terms of the GNU Lesser General Public
|
|
// License as published by the Free Software Foundation; either
|
|
// version 2 of the License, or (at your option) any later version.
|
|
//
|
|
// This library is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
// Lesser General Public License for more details.
|
|
//
|
|
// You should have received a copy of the GNU Lesser General Public
|
|
// License along with this library; if not, write to the Free Software
|
|
// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA B 02110-1301 USA
|
|
//
|
|
/////////////////////////////////////////////////////////////////////////
|
|
|
|
#define NEED_CPU_REG_SHORTCUTS 1
|
|
#include "bochs.h"
|
|
#include "cpu.h"
|
|
#define LOG_THIS BX_CPU_THIS_PTR
|
|
|
|
#if BX_SUPPORT_AVX
|
|
|
|
extern void mxcsr_to_softfloat_status_word(float_status_t &status, bx_mxcsr_t mxcsr);
|
|
|
|
#include "simd_int.h"
|
|
#include "simd_compare.h"
|
|
|
|
typedef void (*simd_compare_method)(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2);
|
|
|
|
// comparison predicate for PCOMB
|
|
static simd_compare_method xop_compare8[8] = {
|
|
xmm_pcmpltb,
|
|
xmm_pcmpleb,
|
|
xmm_pcmpgtb,
|
|
xmm_pcmpgeb,
|
|
xmm_pcmpeqb,
|
|
xmm_pcmpneb,
|
|
xmm_pcmpfalse,
|
|
xmm_pcmptrue
|
|
};
|
|
|
|
// comparison predicate for PCOMUB
|
|
static simd_compare_method xop_compare8u[8] = {
|
|
xmm_pcmpltub,
|
|
xmm_pcmpleub,
|
|
xmm_pcmpgtub,
|
|
xmm_pcmpgeub,
|
|
xmm_pcmpeqb,
|
|
xmm_pcmpneb,
|
|
xmm_pcmpfalse,
|
|
xmm_pcmptrue
|
|
};
|
|
|
|
// comparison predicate for PCOMW
|
|
static simd_compare_method xop_compare16[8] = {
|
|
xmm_pcmpltw,
|
|
xmm_pcmplew,
|
|
xmm_pcmpgtw,
|
|
xmm_pcmpgew,
|
|
xmm_pcmpeqw,
|
|
xmm_pcmpnew,
|
|
xmm_pcmpfalse,
|
|
xmm_pcmptrue
|
|
};
|
|
|
|
// comparison predicate for PCOMUW
|
|
static simd_compare_method xop_compare16u[8] = {
|
|
xmm_pcmpltuw,
|
|
xmm_pcmpleuw,
|
|
xmm_pcmpgtuw,
|
|
xmm_pcmpgeuw,
|
|
xmm_pcmpeqw,
|
|
xmm_pcmpnew,
|
|
xmm_pcmpfalse,
|
|
xmm_pcmptrue
|
|
};
|
|
|
|
// comparison predicate for PCOMD
|
|
static simd_compare_method xop_compare32[8] = {
|
|
xmm_pcmpltd,
|
|
xmm_pcmpled,
|
|
xmm_pcmpgtd,
|
|
xmm_pcmpged,
|
|
xmm_pcmpeqd,
|
|
xmm_pcmpned,
|
|
xmm_pcmpfalse,
|
|
xmm_pcmptrue
|
|
};
|
|
|
|
// comparison predicate for PCOMUD
|
|
static simd_compare_method xop_compare32u[8] = {
|
|
xmm_pcmpltud,
|
|
xmm_pcmpleud,
|
|
xmm_pcmpgtud,
|
|
xmm_pcmpgeud,
|
|
xmm_pcmpeqd,
|
|
xmm_pcmpned,
|
|
xmm_pcmpfalse,
|
|
xmm_pcmptrue
|
|
};
|
|
|
|
// comparison predicate for PCOMQ
|
|
static simd_compare_method xop_compare64[8] = {
|
|
xmm_pcmpltq,
|
|
xmm_pcmpleq,
|
|
xmm_pcmpgtq,
|
|
xmm_pcmpgeq,
|
|
xmm_pcmpeqq,
|
|
xmm_pcmpneq,
|
|
xmm_pcmpfalse,
|
|
xmm_pcmptrue
|
|
};
|
|
|
|
// comparison predicate for PCOMUQ
|
|
static simd_compare_method xop_compare64u[8] = {
|
|
xmm_pcmpltuq,
|
|
xmm_pcmpleuq,
|
|
xmm_pcmpgtuq,
|
|
xmm_pcmpgeuq,
|
|
xmm_pcmpeqq,
|
|
xmm_pcmpneq,
|
|
xmm_pcmpfalse,
|
|
xmm_pcmptrue
|
|
};
|
|
|
|
typedef Bit8u (*vpperm_operation)(Bit8u byte);
|
|
|
|
BX_CPP_INLINE Bit8u vpperm_bit_reverse(Bit8u v8)
|
|
{
|
|
return (v8 >> 7) |
|
|
((v8 >> 5) & 0x02) |
|
|
((v8 >> 3) & 0x04) |
|
|
((v8 >> 1) & 0x08) |
|
|
((v8 << 1) & 0x10) |
|
|
((v8 << 3) & 0x20) |
|
|
((v8 << 5) & 0x40) |
|
|
(v8 << 7);
|
|
}
|
|
|
|
BX_CPP_INLINE Bit8u vpperm_noop(Bit8u v8) { return v8; }
|
|
BX_CPP_INLINE Bit8u vpperm_invert(Bit8u v8) { return ~v8; }
|
|
BX_CPP_INLINE Bit8u vpperm_invert_bit_reverse(Bit8u v8) { return vpperm_bit_reverse(~v8); }
|
|
BX_CPP_INLINE Bit8u vpperm_zeros(Bit8u v8) { return 0; }
|
|
BX_CPP_INLINE Bit8u vpperm_ones(Bit8u v8) { return 0xff; }
|
|
BX_CPP_INLINE Bit8u vpperm_replicate_msb(Bit8u v8) { return (((Bit8s) v8) >> 7); }
|
|
BX_CPP_INLINE Bit8u vpperm_invert_replicate_msb(Bit8u v8) { return vpperm_replicate_msb(~v8); }
|
|
|
|
// logical operation for VPPERM
|
|
static vpperm_operation vpperm_op[8] = {
|
|
vpperm_noop,
|
|
vpperm_invert,
|
|
vpperm_bit_reverse,
|
|
vpperm_invert_bit_reverse,
|
|
vpperm_zeros,
|
|
vpperm_ones,
|
|
vpperm_replicate_msb,
|
|
vpperm_invert_replicate_msb
|
|
};
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPCMOV_VdqHdqWdqVIb(bxInstruction_c *i)
|
|
{
|
|
BxPackedYmmRegister op1 = BX_READ_YMM_REG(i->src1());
|
|
BxPackedYmmRegister op2 = BX_READ_YMM_REG(i->src2());
|
|
BxPackedYmmRegister op3 = BX_READ_YMM_REG(i->src3());
|
|
unsigned len = i->getVL();
|
|
|
|
for (unsigned n=0; n < len; n++) {
|
|
xmm_pselect(&op1.ymm128(n), &op2.ymm128(n), &op3.ymm128(n));
|
|
}
|
|
|
|
BX_WRITE_YMM_REGZ_VLEN(i->dst(), op1, len);
|
|
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPPERM_VdqHdqWdqVIb(bxInstruction_c *i)
|
|
{
|
|
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
|
|
BxPackedXmmRegister op2 = BX_READ_XMM_REG(i->src2());
|
|
BxPackedXmmRegister op3 = BX_READ_XMM_REG(i->src3()), dst;
|
|
|
|
for (unsigned n=0;n<16;n++) {
|
|
unsigned control = op3.xmmubyte(n);
|
|
|
|
if (control & 0x10)
|
|
dst.xmmubyte(n) = op1.xmmubyte(control & 0xf);
|
|
else
|
|
dst.xmmubyte(n) = op2.xmmubyte(control & 0xf);
|
|
|
|
dst.xmmubyte(n) = vpperm_op[control >> 5](dst.xmmubyte(n));
|
|
}
|
|
|
|
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), dst);
|
|
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
#define XOP_SHIFT_ROTATE(HANDLER, func) \
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C:: HANDLER (bxInstruction_c *i) \
|
|
{ \
|
|
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1()), op2 = BX_READ_XMM_REG(i->src2()); \
|
|
\
|
|
(func)(&op1, &op2); \
|
|
\
|
|
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1); \
|
|
\
|
|
BX_NEXT_INSTR(i); \
|
|
}
|
|
|
|
XOP_SHIFT_ROTATE(VPSHAB_VdqWdqHdq, xmm_pshab);
|
|
XOP_SHIFT_ROTATE(VPSHAW_VdqWdqHdq, xmm_pshaw);
|
|
XOP_SHIFT_ROTATE(VPSHAD_VdqWdqHdq, xmm_pshad);
|
|
XOP_SHIFT_ROTATE(VPSHAQ_VdqWdqHdq, xmm_pshaq);
|
|
|
|
XOP_SHIFT_ROTATE(VPSHLB_VdqWdqHdq, xmm_pshlb);
|
|
XOP_SHIFT_ROTATE(VPSHLW_VdqWdqHdq, xmm_pshlw);
|
|
XOP_SHIFT_ROTATE(VPSHLD_VdqWdqHdq, xmm_pshld);
|
|
XOP_SHIFT_ROTATE(VPSHLQ_VdqWdqHdq, xmm_pshlq);
|
|
|
|
XOP_SHIFT_ROTATE(VPROTB_VdqWdqHdq, xmm_protb);
|
|
XOP_SHIFT_ROTATE(VPROTW_VdqWdqHdq, xmm_protw);
|
|
XOP_SHIFT_ROTATE(VPROTD_VdqWdqHdq, xmm_protd);
|
|
XOP_SHIFT_ROTATE(VPROTQ_VdqWdqHdq, xmm_protq);
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMACSSWW_VdqHdqWdqVIbR(bxInstruction_c *i)
|
|
{
|
|
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
|
|
BxPackedXmmRegister op2 = BX_READ_XMM_REG(i->src2());
|
|
BxPackedXmmRegister op3 = BX_READ_XMM_REG(i->src3());
|
|
|
|
for(unsigned n=0;n<8;n++) {
|
|
op1.xmm16s(n) = SaturateDwordSToWordS(((Bit32s) op1.xmm16s(n) * (Bit32s) op2.xmm16s(n)) + (Bit32s) op3.xmm16s(n));
|
|
}
|
|
|
|
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
|
|
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMACSSWD_VdqHdqWdqVIbR(bxInstruction_c *i)
|
|
{
|
|
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
|
|
BxPackedXmmRegister op2 = BX_READ_XMM_REG(i->src2());
|
|
BxPackedXmmRegister op3 = BX_READ_XMM_REG(i->src3());
|
|
|
|
op1.xmm32s(0) = SaturateQwordSToDwordS(((Bit32s) op1.xmm16s(1) * (Bit32s) op2.xmm16s(1)) + (Bit64s) op3.xmm32s(0));
|
|
op1.xmm32s(1) = SaturateQwordSToDwordS(((Bit32s) op1.xmm16s(3) * (Bit32s) op2.xmm16s(3)) + (Bit64s) op3.xmm32s(1));
|
|
op1.xmm32s(2) = SaturateQwordSToDwordS(((Bit32s) op1.xmm16s(5) * (Bit32s) op2.xmm16s(5)) + (Bit64s) op3.xmm32s(2));
|
|
op1.xmm32s(3) = SaturateQwordSToDwordS(((Bit32s) op1.xmm16s(7) * (Bit32s) op2.xmm16s(7)) + (Bit64s) op3.xmm32s(3));
|
|
|
|
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
|
|
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_CPP_INLINE Bit64s add_saturate64(Bit64s a, Bit64s b)
|
|
{
|
|
Bit64s r = a + b;
|
|
Bit64u overflow = GET_ADD_OVERFLOW(a, b, r, BX_CONST64(0x8000000000000000));
|
|
if (! overflow) return r;
|
|
// signed overflow detected, saturate
|
|
if (a > 0) overflow--;
|
|
return (Bit64s) overflow;
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMACSSDQL_VdqHdqWdqVIbR(bxInstruction_c *i)
|
|
{
|
|
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
|
|
BxPackedXmmRegister op2 = BX_READ_XMM_REG(i->src2());
|
|
BxPackedXmmRegister op3 = BX_READ_XMM_REG(i->src3());
|
|
|
|
Bit64s product1 = (Bit64s) op1.xmm32s(0) * (Bit64s) op2.xmm32s(0);
|
|
Bit64s product2 = (Bit64s) op1.xmm32s(2) * (Bit64s) op2.xmm32s(2);
|
|
|
|
op1.xmm64s(0) = add_saturate64(product1, op3.xmm64s(0));
|
|
op1.xmm64s(1) = add_saturate64(product2, op3.xmm64s(1));
|
|
|
|
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
|
|
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMACSSDD_VdqHdqWdqVIbR(bxInstruction_c *i)
|
|
{
|
|
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
|
|
BxPackedXmmRegister op2 = BX_READ_XMM_REG(i->src2());
|
|
BxPackedXmmRegister op3 = BX_READ_XMM_REG(i->src3());
|
|
|
|
for(unsigned n=0;n<4;n++) {
|
|
op1.xmm32s(n) = SaturateQwordSToDwordS(((Bit64s) op1.xmm32s(n) * (Bit64s) op2.xmm32s(n)) + (Bit64s) op3.xmm32s(n));
|
|
}
|
|
|
|
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
|
|
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMACSSDQH_VdqHdqWdqVIbR(bxInstruction_c *i)
|
|
{
|
|
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
|
|
BxPackedXmmRegister op2 = BX_READ_XMM_REG(i->src2());
|
|
BxPackedXmmRegister op3 = BX_READ_XMM_REG(i->src3());
|
|
|
|
Bit64s product1 = (Bit64s) op1.xmm32s(1) * (Bit64s) op2.xmm32s(1);
|
|
Bit64s product2 = (Bit64s) op1.xmm32s(3) * (Bit64s) op2.xmm32s(3);
|
|
|
|
op1.xmm64s(0) = add_saturate64(product1, op3.xmm64s(0));
|
|
op1.xmm64s(1) = add_saturate64(product2, op3.xmm64s(1));
|
|
|
|
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
|
|
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMACSWW_VdqHdqWdqVIbR(bxInstruction_c *i)
|
|
{
|
|
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
|
|
BxPackedXmmRegister op2 = BX_READ_XMM_REG(i->src2());
|
|
BxPackedXmmRegister op3 = BX_READ_XMM_REG(i->src3());
|
|
|
|
for(unsigned n=0;n<8;n++) {
|
|
op1.xmm16s(n) = ((Bit32s) op1.xmm16s(n) * (Bit32s) op2.xmm16s(n)) + (Bit32s) op3.xmm16s(n);
|
|
}
|
|
|
|
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
|
|
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMACSWD_VdqHdqWdqVIbR(bxInstruction_c *i)
|
|
{
|
|
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
|
|
BxPackedXmmRegister op2 = BX_READ_XMM_REG(i->src2());
|
|
BxPackedXmmRegister op3 = BX_READ_XMM_REG(i->src3());
|
|
|
|
op1.xmm32s(0) = ((Bit32s) op1.xmm16s(1) * (Bit32s) op2.xmm16s(1)) + (Bit64s) op3.xmm32s(0);
|
|
op1.xmm32s(1) = ((Bit32s) op1.xmm16s(3) * (Bit32s) op2.xmm16s(3)) + (Bit64s) op3.xmm32s(1);
|
|
op1.xmm32s(2) = ((Bit32s) op1.xmm16s(5) * (Bit32s) op2.xmm16s(5)) + (Bit64s) op3.xmm32s(2);
|
|
op1.xmm32s(3) = ((Bit32s) op1.xmm16s(7) * (Bit32s) op2.xmm16s(7)) + (Bit64s) op3.xmm32s(3);
|
|
|
|
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
|
|
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMACSDQL_VdqHdqWdqVIbR(bxInstruction_c *i)
|
|
{
|
|
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
|
|
BxPackedXmmRegister op2 = BX_READ_XMM_REG(i->src2());
|
|
BxPackedXmmRegister op3 = BX_READ_XMM_REG(i->src3());
|
|
|
|
Bit64s product1 = (Bit64s) op1.xmm32s(0) * (Bit64s) op2.xmm32s(0);
|
|
Bit64s product2 = (Bit64s) op1.xmm32s(2) * (Bit64s) op2.xmm32s(2);
|
|
|
|
op1.xmm64s(0) = product1 + op3.xmm64s(0);
|
|
op1.xmm64s(1) = product2 + op3.xmm64s(1);
|
|
|
|
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
|
|
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMACSDD_VdqHdqWdqVIbR(bxInstruction_c *i)
|
|
{
|
|
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
|
|
BxPackedXmmRegister op2 = BX_READ_XMM_REG(i->src2());
|
|
BxPackedXmmRegister op3 = BX_READ_XMM_REG(i->src3());
|
|
|
|
for(unsigned n=0;n<4;n++) {
|
|
op1.xmm32s(n) = ((Bit64s) op1.xmm32s(n) * (Bit64s) op2.xmm32s(n)) + (Bit64s) op3.xmm32s(n);
|
|
}
|
|
|
|
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
|
|
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMACSDQH_VdqHdqWdqVIbR(bxInstruction_c *i)
|
|
{
|
|
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
|
|
BxPackedXmmRegister op2 = BX_READ_XMM_REG(i->src2());
|
|
BxPackedXmmRegister op3 = BX_READ_XMM_REG(i->src3());
|
|
|
|
Bit64s product1 = (Bit64s) op1.xmm32s(1) * (Bit64s) op2.xmm32s(1);
|
|
Bit64s product2 = (Bit64s) op1.xmm32s(3) * (Bit64s) op2.xmm32s(3);
|
|
|
|
op1.xmm64s(0) = product1 + op3.xmm64s(0);
|
|
op1.xmm64s(1) = product2 + op3.xmm64s(1);
|
|
|
|
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
|
|
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMADCSSWD_VdqHdqWdqVIbR(bxInstruction_c *i)
|
|
{
|
|
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
|
|
BxPackedXmmRegister op2 = BX_READ_XMM_REG(i->src2());
|
|
BxPackedXmmRegister op3 = BX_READ_XMM_REG(i->src3());
|
|
|
|
Bit32s product[8];
|
|
|
|
for(unsigned n=0;n < 8;n++)
|
|
product[n] = (Bit32s) op1.xmm16s(n) * (Bit32s) op2.xmm16s(n);
|
|
|
|
op1.xmm32s(0) = SaturateQwordSToDwordS((Bit64s) product[0] + (Bit64s) product[1] + (Bit64s) op3.xmm32s(0));
|
|
op1.xmm32s(1) = SaturateQwordSToDwordS((Bit64s) product[2] + (Bit64s) product[3] + (Bit64s) op3.xmm32s(1));
|
|
op1.xmm32s(2) = SaturateQwordSToDwordS((Bit64s) product[4] + (Bit64s) product[5] + (Bit64s) op3.xmm32s(2));
|
|
op1.xmm32s(3) = SaturateQwordSToDwordS((Bit64s) product[6] + (Bit64s) product[7] + (Bit64s) op3.xmm32s(3));
|
|
|
|
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
|
|
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMADCSWD_VdqHdqWdqVIbR(bxInstruction_c *i)
|
|
{
|
|
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
|
|
BxPackedXmmRegister op2 = BX_READ_XMM_REG(i->src2());
|
|
BxPackedXmmRegister op3 = BX_READ_XMM_REG(i->src3());
|
|
|
|
Bit32s product[8];
|
|
|
|
for(unsigned n=0;n < 8;n++)
|
|
product[n] = (Bit32s) op1.xmm16s(n) * (Bit32s) op2.xmm16s(n);
|
|
|
|
op1.xmm32s(0) = product[0] + product[1] + op3.xmm32s(0);
|
|
op1.xmm32s(1) = product[2] + product[3] + op3.xmm32s(1);
|
|
op1.xmm32s(2) = product[4] + product[5] + op3.xmm32s(2);
|
|
op1.xmm32s(3) = product[6] + product[7] + op3.xmm32s(3);
|
|
|
|
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
|
|
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPROTB_VdqWdqIbR(bxInstruction_c *i)
|
|
{
|
|
BxPackedXmmRegister op = BX_READ_XMM_REG(i->src());
|
|
int count = i->Ib();
|
|
|
|
if (count > 0) {
|
|
// rotate left
|
|
xmm_prolb(&op, count);
|
|
}
|
|
else if (count < 0) {
|
|
// rotate right
|
|
xmm_prorb(&op, -count);
|
|
}
|
|
|
|
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op);
|
|
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPROTW_VdqWdqIbR(bxInstruction_c *i)
|
|
{
|
|
BxPackedXmmRegister op = BX_READ_XMM_REG(i->src());
|
|
int count = i->Ib();
|
|
|
|
if (count > 0) {
|
|
// rotate left
|
|
xmm_prolw(&op, count);
|
|
}
|
|
else if (count < 0) {
|
|
// rotate right
|
|
xmm_prorw(&op, -count);
|
|
}
|
|
|
|
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op);
|
|
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPROTD_VdqWdqIbR(bxInstruction_c *i)
|
|
{
|
|
BxPackedXmmRegister op = BX_READ_XMM_REG(i->src());
|
|
int count = i->Ib();
|
|
|
|
if (count > 0) {
|
|
// rotate left
|
|
xmm_prold(&op, count);
|
|
}
|
|
else if (count < 0) {
|
|
// rotate right
|
|
xmm_prord(&op, -count);
|
|
}
|
|
|
|
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op);
|
|
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPROTQ_VdqWdqIbR(bxInstruction_c *i)
|
|
{
|
|
BxPackedXmmRegister op = BX_READ_XMM_REG(i->src());
|
|
int count = i->Ib();
|
|
|
|
if (count > 0) {
|
|
// rotate left
|
|
xmm_prolq(&op, count);
|
|
}
|
|
else if (count < 0) {
|
|
// rotate right
|
|
xmm_prorq(&op, -count);
|
|
}
|
|
|
|
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op);
|
|
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPCOMB_VdqHdqWdqIbR(bxInstruction_c *i)
|
|
{
|
|
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1()), op2 = BX_READ_XMM_REG(i->src2());
|
|
|
|
xop_compare8[i->Ib() & 7](&op1, &op2);
|
|
|
|
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
|
|
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPCOMW_VdqHdqWdqIbR(bxInstruction_c *i)
|
|
{
|
|
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1()), op2 = BX_READ_XMM_REG(i->src2());
|
|
|
|
xop_compare16[i->Ib() & 7](&op1, &op2);
|
|
|
|
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
|
|
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPCOMD_VdqHdqWdqIbR(bxInstruction_c *i)
|
|
{
|
|
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1()), op2 = BX_READ_XMM_REG(i->src2());
|
|
|
|
xop_compare32[i->Ib() & 7](&op1, &op2);
|
|
|
|
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
|
|
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPCOMQ_VdqHdqWdqIbR(bxInstruction_c *i)
|
|
{
|
|
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1()), op2 = BX_READ_XMM_REG(i->src2());
|
|
|
|
xop_compare64[i->Ib() & 7](&op1, &op2);
|
|
|
|
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
|
|
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPCOMUB_VdqHdqWdqIbR(bxInstruction_c *i)
|
|
{
|
|
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1()), op2 = BX_READ_XMM_REG(i->src2());
|
|
|
|
xop_compare8u[i->Ib() & 7](&op1, &op2);
|
|
|
|
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
|
|
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPCOMUW_VdqHdqWdqIbR(bxInstruction_c *i)
|
|
{
|
|
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1()), op2 = BX_READ_XMM_REG(i->src2());
|
|
|
|
xop_compare16u[i->Ib() & 7](&op1, &op2);
|
|
|
|
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
|
|
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPCOMUD_VdqHdqWdqIbR(bxInstruction_c *i)
|
|
{
|
|
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1()), op2 = BX_READ_XMM_REG(i->src2());
|
|
|
|
xop_compare32u[i->Ib() & 7](&op1, &op2);
|
|
|
|
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
|
|
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPCOMUQ_VdqHdqWdqIbR(bxInstruction_c *i)
|
|
{
|
|
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1()), op2 = BX_READ_XMM_REG(i->src2());
|
|
|
|
xop_compare64u[i->Ib() & 7](&op1, &op2);
|
|
|
|
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
|
|
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VFRCZPS_VpsWpsR(bxInstruction_c *i)
|
|
{
|
|
BxPackedYmmRegister op = BX_READ_YMM_REG(i->src());
|
|
unsigned len = i->getVL();
|
|
|
|
float_status_t status;
|
|
mxcsr_to_softfloat_status_word(status, MXCSR);
|
|
|
|
for (unsigned n=0; n < (4*len); n++) {
|
|
op.ymm32u(n) = float32_frc(op.ymm32u(n), status);
|
|
}
|
|
|
|
check_exceptionsSSE(get_exception_flags(status));
|
|
BX_WRITE_YMM_REGZ_VLEN(i->dst(), op, len);
|
|
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VFRCZPD_VpdWpdR(bxInstruction_c *i)
|
|
{
|
|
BxPackedYmmRegister op = BX_READ_YMM_REG(i->src());
|
|
unsigned len = i->getVL();
|
|
|
|
float_status_t status;
|
|
mxcsr_to_softfloat_status_word(status, MXCSR);
|
|
|
|
for (unsigned n=0; n < (2*len); n++) {
|
|
op.ymm64u(n) = float64_frc(op.ymm64u(n), status);
|
|
}
|
|
|
|
check_exceptionsSSE(get_exception_flags(status));
|
|
|
|
BX_WRITE_YMM_REGZ_VLEN(i->dst(), op, len);
|
|
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VFRCZSS_VssWssR(bxInstruction_c *i)
|
|
{
|
|
float32 op = BX_READ_XMM_REG_LO_DWORD(i->src());
|
|
BxPackedXmmRegister r;
|
|
|
|
float_status_t status;
|
|
mxcsr_to_softfloat_status_word(status, MXCSR);
|
|
|
|
r.xmm64u(0) = (Bit64u) float32_frc(op, status);
|
|
r.xmm64u(1) = 0;
|
|
|
|
check_exceptionsSSE(get_exception_flags(status));
|
|
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), r);
|
|
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VFRCZSD_VsdWsdR(bxInstruction_c *i)
|
|
{
|
|
float64 op = BX_READ_XMM_REG_LO_QWORD(i->src());
|
|
BxPackedXmmRegister r;
|
|
|
|
float_status_t status;
|
|
mxcsr_to_softfloat_status_word(status, MXCSR);
|
|
|
|
r.xmm64u(0) = float64_frc(op, status);
|
|
r.xmm64u(1) = 0;
|
|
|
|
check_exceptionsSSE(get_exception_flags(status));
|
|
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), r);
|
|
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPHADDBW_VdqWdqR(bxInstruction_c *i)
|
|
{
|
|
BxPackedXmmRegister op = BX_READ_XMM_REG(i->src());
|
|
|
|
op.xmm16s(0) = (Bit16s) op.xmmsbyte(0x0) + (Bit16s) op.xmmsbyte(0x1);
|
|
op.xmm16s(1) = (Bit16s) op.xmmsbyte(0x2) + (Bit16s) op.xmmsbyte(0x3);
|
|
op.xmm16s(2) = (Bit16s) op.xmmsbyte(0x4) + (Bit16s) op.xmmsbyte(0x5);
|
|
op.xmm16s(3) = (Bit16s) op.xmmsbyte(0x6) + (Bit16s) op.xmmsbyte(0x7);
|
|
op.xmm16s(4) = (Bit16s) op.xmmsbyte(0x8) + (Bit16s) op.xmmsbyte(0x9);
|
|
op.xmm16s(5) = (Bit16s) op.xmmsbyte(0xA) + (Bit16s) op.xmmsbyte(0xB);
|
|
op.xmm16s(6) = (Bit16s) op.xmmsbyte(0xC) + (Bit16s) op.xmmsbyte(0xD);
|
|
op.xmm16s(7) = (Bit16s) op.xmmsbyte(0xE) + (Bit16s) op.xmmsbyte(0xF);
|
|
|
|
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op);
|
|
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPHADDBD_VdqWdqR(bxInstruction_c *i)
|
|
{
|
|
BxPackedXmmRegister op = BX_READ_XMM_REG(i->src());
|
|
|
|
op.xmm32s(0) = (Bit32s) op.xmmsbyte(0x0) + (Bit32s) op.xmmsbyte(0x1) +
|
|
(Bit32s) op.xmmsbyte(0x2) + (Bit32s) op.xmmsbyte(0x3);
|
|
op.xmm32s(1) = (Bit32s) op.xmmsbyte(0x4) + (Bit32s) op.xmmsbyte(0x5) +
|
|
(Bit32s) op.xmmsbyte(0x6) + (Bit32s) op.xmmsbyte(0x7);
|
|
op.xmm32s(2) = (Bit32s) op.xmmsbyte(0x8) + (Bit32s) op.xmmsbyte(0x9) +
|
|
(Bit32s) op.xmmsbyte(0xA) + (Bit32s) op.xmmsbyte(0xB);
|
|
op.xmm32s(3) = (Bit32s) op.xmmsbyte(0xC) + (Bit32s) op.xmmsbyte(0xD) +
|
|
(Bit32s) op.xmmsbyte(0xE) + (Bit32s) op.xmmsbyte(0xF);
|
|
|
|
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op);
|
|
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPHADDBQ_VdqWdqR(bxInstruction_c *i)
|
|
{
|
|
BxPackedXmmRegister op = BX_READ_XMM_REG(i->src());
|
|
|
|
op.xmm64s(0) = (Bit32s) op.xmmsbyte(0x0) + (Bit32s) op.xmmsbyte(0x1) +
|
|
(Bit32s) op.xmmsbyte(0x2) + (Bit32s) op.xmmsbyte(0x3) +
|
|
(Bit32s) op.xmmsbyte(0x4) + (Bit32s) op.xmmsbyte(0x5) +
|
|
(Bit32s) op.xmmsbyte(0x6) + (Bit32s) op.xmmsbyte(0x7);
|
|
op.xmm64s(1) = (Bit32s) op.xmmsbyte(0x8) + (Bit32s) op.xmmsbyte(0x9) +
|
|
(Bit32s) op.xmmsbyte(0xA) + (Bit32s) op.xmmsbyte(0xB) +
|
|
(Bit32s) op.xmmsbyte(0xC) + (Bit32s) op.xmmsbyte(0xD) +
|
|
(Bit32s) op.xmmsbyte(0xE) + (Bit32s) op.xmmsbyte(0xF);
|
|
|
|
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op);
|
|
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPHADDWD_VdqWdqR(bxInstruction_c *i)
|
|
{
|
|
BxPackedXmmRegister op = BX_READ_XMM_REG(i->src());
|
|
|
|
op.xmm32s(0) = (Bit32s) op.xmm16s(0) + (Bit32s) op.xmm16s(1);
|
|
op.xmm32s(1) = (Bit32s) op.xmm16s(2) + (Bit32s) op.xmm16s(3);
|
|
op.xmm32s(2) = (Bit32s) op.xmm16s(4) + (Bit32s) op.xmm16s(5);
|
|
op.xmm32s(3) = (Bit32s) op.xmm16s(6) + (Bit32s) op.xmm16s(7);
|
|
|
|
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op);
|
|
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPHADDWQ_VdqWdqR(bxInstruction_c *i)
|
|
{
|
|
BxPackedXmmRegister op = BX_READ_XMM_REG(i->src());
|
|
|
|
op.xmm64s(0) = (Bit32s) op.xmm16s(0) + (Bit32s) op.xmm16s(1) +
|
|
(Bit32s) op.xmm16s(2) + (Bit32s) op.xmm16s(3);
|
|
op.xmm64s(1) = (Bit32s) op.xmm16s(4) + (Bit32s) op.xmm16s(5) +
|
|
(Bit32s) op.xmm16s(6) + (Bit32s) op.xmm16s(7);
|
|
|
|
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op);
|
|
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPHADDDQ_VdqWdqR(bxInstruction_c *i)
|
|
{
|
|
BxPackedXmmRegister op = BX_READ_XMM_REG(i->src());
|
|
|
|
op.xmm64s(0) = (Bit64s) op.xmm32s(0) + (Bit64s) op.xmm32s(1);
|
|
op.xmm64s(1) = (Bit64s) op.xmm32s(2) + (Bit64s) op.xmm32s(3);
|
|
|
|
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op);
|
|
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPHADDUBW_VdqWdqR(bxInstruction_c *i)
|
|
{
|
|
BxPackedXmmRegister op = BX_READ_XMM_REG(i->src());
|
|
|
|
op.xmm16u(0) = (Bit16u) op.xmmubyte(0x0) + (Bit16u) op.xmmubyte(0x1);
|
|
op.xmm16u(1) = (Bit16u) op.xmmubyte(0x2) + (Bit16u) op.xmmubyte(0x3);
|
|
op.xmm16u(2) = (Bit16u) op.xmmubyte(0x4) + (Bit16u) op.xmmubyte(0x5);
|
|
op.xmm16u(3) = (Bit16u) op.xmmubyte(0x6) + (Bit16u) op.xmmubyte(0x7);
|
|
op.xmm16u(4) = (Bit16u) op.xmmubyte(0x8) + (Bit16u) op.xmmubyte(0x9);
|
|
op.xmm16u(5) = (Bit16u) op.xmmubyte(0xA) + (Bit16u) op.xmmubyte(0xB);
|
|
op.xmm16u(6) = (Bit16u) op.xmmubyte(0xC) + (Bit16u) op.xmmubyte(0xD);
|
|
op.xmm16u(7) = (Bit16u) op.xmmubyte(0xE) + (Bit16u) op.xmmubyte(0xF);
|
|
|
|
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op);
|
|
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPHADDUBD_VdqWdqR(bxInstruction_c *i)
|
|
{
|
|
BxPackedXmmRegister op = BX_READ_XMM_REG(i->src());
|
|
|
|
op.xmm32u(0) = (Bit32u) op.xmmubyte(0x0) + (Bit32s) op.xmmubyte(0x1) +
|
|
(Bit32u) op.xmmubyte(0x2) + (Bit32s) op.xmmubyte(0x3);
|
|
op.xmm32u(1) = (Bit32u) op.xmmubyte(0x4) + (Bit32s) op.xmmubyte(0x5) +
|
|
(Bit32u) op.xmmubyte(0x6) + (Bit32s) op.xmmubyte(0x7);
|
|
op.xmm32u(2) = (Bit32u) op.xmmubyte(0x8) + (Bit32s) op.xmmubyte(0x9) +
|
|
(Bit32u) op.xmmubyte(0xA) + (Bit32s) op.xmmubyte(0xB);
|
|
op.xmm32u(3) = (Bit32u) op.xmmubyte(0xC) + (Bit32s) op.xmmubyte(0xD) +
|
|
(Bit32u) op.xmmubyte(0xE) + (Bit32s) op.xmmubyte(0xF);
|
|
|
|
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op);
|
|
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPHADDUBQ_VdqWdqR(bxInstruction_c *i)
|
|
{
|
|
BxPackedXmmRegister op = BX_READ_XMM_REG(i->src());
|
|
|
|
op.xmm64u(0) = (Bit32u) op.xmmubyte(0x0) + (Bit32u) op.xmmubyte(0x1) +
|
|
(Bit32u) op.xmmubyte(0x2) + (Bit32u) op.xmmubyte(0x3) +
|
|
(Bit32u) op.xmmubyte(0x4) + (Bit32u) op.xmmubyte(0x5) +
|
|
(Bit32u) op.xmmubyte(0x6) + (Bit32u) op.xmmubyte(0x7);
|
|
op.xmm64u(1) = (Bit32u) op.xmmubyte(0x8) + (Bit32u) op.xmmubyte(0x9) +
|
|
(Bit32u) op.xmmubyte(0xA) + (Bit32u) op.xmmubyte(0xB) +
|
|
(Bit32u) op.xmmubyte(0xC) + (Bit32u) op.xmmubyte(0xD) +
|
|
(Bit32u) op.xmmubyte(0xE) + (Bit32u) op.xmmubyte(0xF);
|
|
|
|
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op);
|
|
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPHADDUWD_VdqWdqR(bxInstruction_c *i)
|
|
{
|
|
BxPackedXmmRegister op = BX_READ_XMM_REG(i->src());
|
|
|
|
op.xmm32u(0) = (Bit32u) op.xmm16u(0) + (Bit32u) op.xmm16u(1);
|
|
op.xmm32u(1) = (Bit32u) op.xmm16u(2) + (Bit32u) op.xmm16u(3);
|
|
op.xmm32u(2) = (Bit32u) op.xmm16u(4) + (Bit32u) op.xmm16u(5);
|
|
op.xmm32u(3) = (Bit32u) op.xmm16u(6) + (Bit32u) op.xmm16u(7);
|
|
|
|
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op);
|
|
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPHADDUWQ_VdqWdqR(bxInstruction_c *i)
|
|
{
|
|
BxPackedXmmRegister op = BX_READ_XMM_REG(i->src());
|
|
|
|
op.xmm64u(0) = (Bit32u) op.xmm16u(0) + (Bit32u) op.xmm16u(1) +
|
|
(Bit32u) op.xmm16u(2) + (Bit32u) op.xmm16u(3);
|
|
op.xmm64u(1) = (Bit32u) op.xmm16u(4) + (Bit32u) op.xmm16u(5) +
|
|
(Bit32u) op.xmm16u(6) + (Bit32u) op.xmm16u(7);
|
|
|
|
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op);
|
|
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPHADDUDQ_VdqWdqR(bxInstruction_c *i)
|
|
{
|
|
BxPackedXmmRegister op = BX_READ_XMM_REG(i->src());
|
|
|
|
op.xmm64u(0) = (Bit64u) op.xmm32u(0) + (Bit64u) op.xmm32u(1);
|
|
op.xmm64u(1) = (Bit64u) op.xmm32u(2) + (Bit64u) op.xmm32u(3);
|
|
|
|
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op);
|
|
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPHSUBBW_VdqWdqR(bxInstruction_c *i)
|
|
{
|
|
BxPackedXmmRegister op = BX_READ_XMM_REG(i->src());
|
|
|
|
op.xmm16s(0) = (Bit16s) op.xmmsbyte(0x0) - (Bit16s) op.xmmsbyte(0x1);
|
|
op.xmm16s(1) = (Bit16s) op.xmmsbyte(0x2) - (Bit16s) op.xmmsbyte(0x3);
|
|
op.xmm16s(2) = (Bit16s) op.xmmsbyte(0x4) - (Bit16s) op.xmmsbyte(0x5);
|
|
op.xmm16s(3) = (Bit16s) op.xmmsbyte(0x6) - (Bit16s) op.xmmsbyte(0x7);
|
|
op.xmm16s(4) = (Bit16s) op.xmmsbyte(0x8) - (Bit16s) op.xmmsbyte(0x9);
|
|
op.xmm16s(5) = (Bit16s) op.xmmsbyte(0xA) - (Bit16s) op.xmmsbyte(0xB);
|
|
op.xmm16s(6) = (Bit16s) op.xmmsbyte(0xC) - (Bit16s) op.xmmsbyte(0xD);
|
|
op.xmm16s(7) = (Bit16s) op.xmmsbyte(0xE) - (Bit16s) op.xmmsbyte(0xF);
|
|
|
|
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op);
|
|
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPHSUBWD_VdqWdqR(bxInstruction_c *i)
|
|
{
|
|
BxPackedXmmRegister op = BX_READ_XMM_REG(i->src());
|
|
|
|
op.xmm32s(0) = (Bit32s) op.xmm16s(0) - (Bit32s) op.xmm16s(1);
|
|
op.xmm32s(1) = (Bit32s) op.xmm16s(2) - (Bit32s) op.xmm16s(3);
|
|
op.xmm32s(2) = (Bit32s) op.xmm16s(4) - (Bit32s) op.xmm16s(5);
|
|
op.xmm32s(3) = (Bit32s) op.xmm16s(6) - (Bit32s) op.xmm16s(7);
|
|
|
|
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op);
|
|
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPHSUBDQ_VdqWdqR(bxInstruction_c *i)
|
|
{
|
|
BxPackedXmmRegister op = BX_READ_XMM_REG(i->src());
|
|
|
|
op.xmm64s(0) = (Bit64s) op.xmm32s(0) - (Bit64s) op.xmm32s(1);
|
|
op.xmm64s(1) = (Bit64s) op.xmm32s(2) - (Bit64s) op.xmm32s(3);
|
|
|
|
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op);
|
|
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPERMIL2PS_VdqHdqWdqIbR(bxInstruction_c *i)
|
|
{
|
|
BxPackedYmmRegister op1 = BX_READ_YMM_REG(i->src1());
|
|
BxPackedYmmRegister op2 = BX_READ_YMM_REG(i->src2());
|
|
BxPackedYmmRegister op3 = BX_READ_YMM_REG(i->src3()), result;
|
|
unsigned len = i->getVL();
|
|
|
|
for (unsigned n=0; n < len; n++) {
|
|
xmm_permil2ps(&result.ymm128(n), &op1.ymm128(n), &op2.ymm128(n), &op3.ymm128(n), i->Ib() & 3);
|
|
}
|
|
|
|
BX_WRITE_YMM_REGZ_VLEN(i->dst(), result, len);
|
|
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPERMIL2PD_VdqHdqWdqIbR(bxInstruction_c *i)
|
|
{
|
|
BxPackedYmmRegister op1 = BX_READ_YMM_REG(i->src1());
|
|
BxPackedYmmRegister op2 = BX_READ_YMM_REG(i->src2());
|
|
BxPackedYmmRegister op3 = BX_READ_YMM_REG(i->src3()), result;
|
|
unsigned len = i->getVL();
|
|
|
|
for (unsigned n=0; n < len; n++) {
|
|
xmm_permil2pd(&result.ymm128(n), &op1.ymm128(n), &op2.ymm128(n), &op3.ymm128(n), i->Ib() & 3);
|
|
}
|
|
|
|
BX_WRITE_YMM_REGZ_VLEN(i->dst(), result, len);
|
|
|
|
BX_NEXT_INSTR(i);
|
|
}
|
|
|
|
#endif
|