implemented PERMIL2PS/PERMIL2PD XOP instructions

This commit is contained in:
Stanislav Shwartsman 2011-10-20 17:37:57 +00:00
parent e4b92b55bb
commit b1a6b34616
5 changed files with 82 additions and 4 deletions

View File

@ -2978,6 +2978,8 @@ public: // for now...
BX_SMF BX_INSF_TYPE VPHSUBBW_VdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPHSUBWD_VdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPHSUBDQ_VdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPERMIL2PS_VdqHdqWdqIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPERMIL2PD_VdqHdqWdqIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
/* XOP (AMD) */
/* TBM (AMD) */

View File

@ -1325,8 +1325,8 @@ static const BxOpcodeInfo_t BxOpcodeTableAVX[256*3*2] = {
/* 45 /0 */ { 0, BX_IA_ERROR },
/* 46 /0 */ { 0, BX_IA_ERROR },
/* 47 /0 */ { 0, BX_IA_ERROR },
/* 48 /0 */ { 0, BX_IA_ERROR },
/* 49 /0 */ { 0, BX_IA_ERROR },
/* 48 /0 */ { BxPrefixSSE66 | BxImmediate_Ib, BX_IA_VPERMIL2PS_VdqHdqWdqIb },
/* 49 /0 */ { BxPrefixSSE66 | BxImmediate_Ib, BX_IA_VPERMIL2PD_VdqHdqWdqIb },
/* 4A /0 */ { BxPrefixSSE66 | BxVexW0 | BxImmediate_Ib4, BX_IA_VBLENDVPS_VpsHpsWpsIb },
/* 4B /0 */ { BxPrefixSSE66 | BxVexW0 | BxImmediate_Ib4, BX_IA_VBLENDVPD_VpdHpdWpdIb },
/* 4C /0 */ { BxPrefixSSE66 | BxVexW0 | BxImmediate_Ib4, BX_IA_V128_VPBLENDVB_VdqHdqWdqIb },
@ -2099,8 +2099,8 @@ static const BxOpcodeInfo_t BxOpcodeTableAVX[256*3*2] = {
/* 45 /1 */ { 0, BX_IA_ERROR },
/* 46 /1 */ { BxPrefixSSE66 | BxVexW0 | BxImmediate_Ib, BX_IA_V256_VPERM2I128_VdqHdqWdqIb },
/* 47 /1 */ { 0, BX_IA_ERROR },
/* 48 /1 */ { 0, BX_IA_ERROR },
/* 49 /1 */ { 0, BX_IA_ERROR },
/* 48 /1 */ { BxPrefixSSE66 | BxImmediate_Ib, BX_IA_VPERMIL2PS_VdqHdqWdqIb },
/* 49 /1 */ { BxPrefixSSE66 | BxImmediate_Ib, BX_IA_VPERMIL2PD_VdqHdqWdqIb },
/* 4A /1 */ { BxPrefixSSE66 | BxVexW0 | BxImmediate_Ib4, BX_IA_VBLENDVPS_VpsHpsWpsIb },
/* 4B /1 */ { BxPrefixSSE66 | BxVexW0 | BxImmediate_Ib4, BX_IA_VBLENDVPD_VpdHpdWpdIb },
/* 4C /1 */ { BxPrefixSSE66 | BxVexW0 | BxImmediate_Ib4, BX_IA_V256_VPBLENDVB_VdqHdqWdqIb },

View File

@ -2015,6 +2015,8 @@ bx_define_opcode(BX_IA_VPHADDUDQ_VdqWdq, &BX_CPU_C::LOADU_Wdq, &BX_CPU_C::VPHADD
bx_define_opcode(BX_IA_VPHSUBBW_VdqWdq, &BX_CPU_C::LOADU_Wdq, &BX_CPU_C::VPHSUBBW_VdqWdqR, BX_ISA_XOP, BX_PREPARE_AVX | BX_VEX_NO_VVV)
bx_define_opcode(BX_IA_VPHSUBWD_VdqWdq, &BX_CPU_C::LOADU_Wdq, &BX_CPU_C::VPHSUBWD_VdqWdqR, BX_ISA_XOP, BX_PREPARE_AVX | BX_VEX_NO_VVV)
bx_define_opcode(BX_IA_VPHSUBDQ_VdqWdq, &BX_CPU_C::LOADU_Wdq, &BX_CPU_C::VPHSUBDQ_VdqWdqR, BX_ISA_XOP, BX_PREPARE_AVX | BX_VEX_NO_VVV)
bx_define_opcode(BX_IA_VPERMIL2PS_VdqHdqWdqIb, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VPERMIL2PS_VdqHdqWdqIbR, BX_ISA_XOP, BX_PREPARE_AVX)
bx_define_opcode(BX_IA_VPERMIL2PD_VdqHdqWdqIb, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VPERMIL2PD_VdqHdqWdqIbR, BX_ISA_XOP, BX_PREPARE_AVX)
// XOP (AMD)
// TBM (AMD)

View File

@ -357,6 +357,28 @@ BX_CPP_INLINE void sse_permilpd(BxPackedXmmRegister *r, const BxPackedXmmRegiste
r->xmm64u(1) = op1->xmm64u((op2->xmm32u(2) >> 1) & 0x1);
}
BX_CPP_INLINE void sse_permil2ps(BxPackedXmmRegister *r, const BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2, const BxPackedXmmRegister *op3, unsigned m2z)
{
for(unsigned n=0; n < 4; n++) {
Bit32u ctrl = op3->xmm32u(n);
if ((m2z ^ ((ctrl >> 3) & 0x1)) == 0x3)
r->xmm32u(n) = 0;
else
r->xmm32u(n) = (ctrl & 0x4) ? op1->xmm32u(ctrl & 0x3) : op2->xmm32u(ctrl & 0x3);
}
}
BX_CPP_INLINE void sse_permil2pd(BxPackedXmmRegister *r, const BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2, const BxPackedXmmRegister *op3, unsigned m2z)
{
for(unsigned n=0; n < 2; n++) {
Bit32u ctrl = op3->xmm32u(n*2);
if ((m2z ^ ((ctrl >> 3) & 0x1)) == 0x3)
r->xmm64u(n) = 0;
else
r->xmm64u(n) = (ctrl & 0x4) ? op1->xmm64u((ctrl >> 1) & 0x1) : op2->xmm64u((ctrl >> 1) & 0x1);
}
}
// sign
BX_CPP_INLINE void sse_psignb(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2)

View File

@ -887,4 +887,56 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPHSUBDQ_VdqWdqR(bxInstruction_c *
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPERMIL2PS_VdqHdqWdqIbR(bxInstruction_c *i)
{
BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->vvv()), op2, op3, result;
int imm_reg = i->Ib() >> 4;
if (! long64_mode()) imm_reg &= 0x7;
if (i->getVexW()) {
op2 = BX_READ_AVX_REG(imm_reg);
op3 = BX_READ_AVX_REG(i->rm());
}
else {
op2 = BX_READ_AVX_REG(i->rm());
op3 = BX_READ_AVX_REG(imm_reg);
}
unsigned len = i->getVL();
for (unsigned n=0; n < len; n++) {
sse_permil2ps(&result.avx128(n), &op1.avx128(n), &op2.avx128(n), &op3.avx128(n), i->Ib() & 3);
}
BX_WRITE_AVX_REGZ(i->nnn(), result, len);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPERMIL2PD_VdqHdqWdqIbR(bxInstruction_c *i)
{
BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->vvv()), op2, op3, result;
int imm_reg = i->Ib() >> 4;
if (! long64_mode()) imm_reg &= 0x7;
if (i->getVexW()) {
op2 = BX_READ_AVX_REG(imm_reg);
op3 = BX_READ_AVX_REG(i->rm());
}
else {
op2 = BX_READ_AVX_REG(i->rm());
op3 = BX_READ_AVX_REG(imm_reg);
}
unsigned len = i->getVL();
for (unsigned n=0; n < len; n++) {
sse_permil2pd(&result.avx128(n), &op1.avx128(n), &op2.avx128(n), &op3.avx128(n), i->Ib() & 3);
}
BX_WRITE_AVX_REGZ(i->nnn(), result, len);
BX_NEXT_INSTR(i);
}
#endif