implemented PERMIL2PS/PERMIL2PD XOP instructions
This commit is contained in:
parent
e4b92b55bb
commit
b1a6b34616
@ -2978,6 +2978,8 @@ public: // for now...
|
|||||||
BX_SMF BX_INSF_TYPE VPHSUBBW_VdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
BX_SMF BX_INSF_TYPE VPHSUBBW_VdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||||
BX_SMF BX_INSF_TYPE VPHSUBWD_VdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
BX_SMF BX_INSF_TYPE VPHSUBWD_VdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||||
BX_SMF BX_INSF_TYPE VPHSUBDQ_VdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
BX_SMF BX_INSF_TYPE VPHSUBDQ_VdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||||
|
BX_SMF BX_INSF_TYPE VPERMIL2PS_VdqHdqWdqIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||||
|
BX_SMF BX_INSF_TYPE VPERMIL2PD_VdqHdqWdqIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||||
/* XOP (AMD) */
|
/* XOP (AMD) */
|
||||||
|
|
||||||
/* TBM (AMD) */
|
/* TBM (AMD) */
|
||||||
|
@ -1325,8 +1325,8 @@ static const BxOpcodeInfo_t BxOpcodeTableAVX[256*3*2] = {
|
|||||||
/* 45 /0 */ { 0, BX_IA_ERROR },
|
/* 45 /0 */ { 0, BX_IA_ERROR },
|
||||||
/* 46 /0 */ { 0, BX_IA_ERROR },
|
/* 46 /0 */ { 0, BX_IA_ERROR },
|
||||||
/* 47 /0 */ { 0, BX_IA_ERROR },
|
/* 47 /0 */ { 0, BX_IA_ERROR },
|
||||||
/* 48 /0 */ { 0, BX_IA_ERROR },
|
/* 48 /0 */ { BxPrefixSSE66 | BxImmediate_Ib, BX_IA_VPERMIL2PS_VdqHdqWdqIb },
|
||||||
/* 49 /0 */ { 0, BX_IA_ERROR },
|
/* 49 /0 */ { BxPrefixSSE66 | BxImmediate_Ib, BX_IA_VPERMIL2PD_VdqHdqWdqIb },
|
||||||
/* 4A /0 */ { BxPrefixSSE66 | BxVexW0 | BxImmediate_Ib4, BX_IA_VBLENDVPS_VpsHpsWpsIb },
|
/* 4A /0 */ { BxPrefixSSE66 | BxVexW0 | BxImmediate_Ib4, BX_IA_VBLENDVPS_VpsHpsWpsIb },
|
||||||
/* 4B /0 */ { BxPrefixSSE66 | BxVexW0 | BxImmediate_Ib4, BX_IA_VBLENDVPD_VpdHpdWpdIb },
|
/* 4B /0 */ { BxPrefixSSE66 | BxVexW0 | BxImmediate_Ib4, BX_IA_VBLENDVPD_VpdHpdWpdIb },
|
||||||
/* 4C /0 */ { BxPrefixSSE66 | BxVexW0 | BxImmediate_Ib4, BX_IA_V128_VPBLENDVB_VdqHdqWdqIb },
|
/* 4C /0 */ { BxPrefixSSE66 | BxVexW0 | BxImmediate_Ib4, BX_IA_V128_VPBLENDVB_VdqHdqWdqIb },
|
||||||
@ -2099,8 +2099,8 @@ static const BxOpcodeInfo_t BxOpcodeTableAVX[256*3*2] = {
|
|||||||
/* 45 /1 */ { 0, BX_IA_ERROR },
|
/* 45 /1 */ { 0, BX_IA_ERROR },
|
||||||
/* 46 /1 */ { BxPrefixSSE66 | BxVexW0 | BxImmediate_Ib, BX_IA_V256_VPERM2I128_VdqHdqWdqIb },
|
/* 46 /1 */ { BxPrefixSSE66 | BxVexW0 | BxImmediate_Ib, BX_IA_V256_VPERM2I128_VdqHdqWdqIb },
|
||||||
/* 47 /1 */ { 0, BX_IA_ERROR },
|
/* 47 /1 */ { 0, BX_IA_ERROR },
|
||||||
/* 48 /1 */ { 0, BX_IA_ERROR },
|
/* 48 /1 */ { BxPrefixSSE66 | BxImmediate_Ib, BX_IA_VPERMIL2PS_VdqHdqWdqIb },
|
||||||
/* 49 /1 */ { 0, BX_IA_ERROR },
|
/* 49 /1 */ { BxPrefixSSE66 | BxImmediate_Ib, BX_IA_VPERMIL2PD_VdqHdqWdqIb },
|
||||||
/* 4A /1 */ { BxPrefixSSE66 | BxVexW0 | BxImmediate_Ib4, BX_IA_VBLENDVPS_VpsHpsWpsIb },
|
/* 4A /1 */ { BxPrefixSSE66 | BxVexW0 | BxImmediate_Ib4, BX_IA_VBLENDVPS_VpsHpsWpsIb },
|
||||||
/* 4B /1 */ { BxPrefixSSE66 | BxVexW0 | BxImmediate_Ib4, BX_IA_VBLENDVPD_VpdHpdWpdIb },
|
/* 4B /1 */ { BxPrefixSSE66 | BxVexW0 | BxImmediate_Ib4, BX_IA_VBLENDVPD_VpdHpdWpdIb },
|
||||||
/* 4C /1 */ { BxPrefixSSE66 | BxVexW0 | BxImmediate_Ib4, BX_IA_V256_VPBLENDVB_VdqHdqWdqIb },
|
/* 4C /1 */ { BxPrefixSSE66 | BxVexW0 | BxImmediate_Ib4, BX_IA_V256_VPBLENDVB_VdqHdqWdqIb },
|
||||||
|
@ -2015,6 +2015,8 @@ bx_define_opcode(BX_IA_VPHADDUDQ_VdqWdq, &BX_CPU_C::LOADU_Wdq, &BX_CPU_C::VPHADD
|
|||||||
bx_define_opcode(BX_IA_VPHSUBBW_VdqWdq, &BX_CPU_C::LOADU_Wdq, &BX_CPU_C::VPHSUBBW_VdqWdqR, BX_ISA_XOP, BX_PREPARE_AVX | BX_VEX_NO_VVV)
|
bx_define_opcode(BX_IA_VPHSUBBW_VdqWdq, &BX_CPU_C::LOADU_Wdq, &BX_CPU_C::VPHSUBBW_VdqWdqR, BX_ISA_XOP, BX_PREPARE_AVX | BX_VEX_NO_VVV)
|
||||||
bx_define_opcode(BX_IA_VPHSUBWD_VdqWdq, &BX_CPU_C::LOADU_Wdq, &BX_CPU_C::VPHSUBWD_VdqWdqR, BX_ISA_XOP, BX_PREPARE_AVX | BX_VEX_NO_VVV)
|
bx_define_opcode(BX_IA_VPHSUBWD_VdqWdq, &BX_CPU_C::LOADU_Wdq, &BX_CPU_C::VPHSUBWD_VdqWdqR, BX_ISA_XOP, BX_PREPARE_AVX | BX_VEX_NO_VVV)
|
||||||
bx_define_opcode(BX_IA_VPHSUBDQ_VdqWdq, &BX_CPU_C::LOADU_Wdq, &BX_CPU_C::VPHSUBDQ_VdqWdqR, BX_ISA_XOP, BX_PREPARE_AVX | BX_VEX_NO_VVV)
|
bx_define_opcode(BX_IA_VPHSUBDQ_VdqWdq, &BX_CPU_C::LOADU_Wdq, &BX_CPU_C::VPHSUBDQ_VdqWdqR, BX_ISA_XOP, BX_PREPARE_AVX | BX_VEX_NO_VVV)
|
||||||
|
bx_define_opcode(BX_IA_VPERMIL2PS_VdqHdqWdqIb, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VPERMIL2PS_VdqHdqWdqIbR, BX_ISA_XOP, BX_PREPARE_AVX)
|
||||||
|
bx_define_opcode(BX_IA_VPERMIL2PD_VdqHdqWdqIb, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VPERMIL2PD_VdqHdqWdqIbR, BX_ISA_XOP, BX_PREPARE_AVX)
|
||||||
// XOP (AMD)
|
// XOP (AMD)
|
||||||
|
|
||||||
// TBM (AMD)
|
// TBM (AMD)
|
||||||
|
@ -357,6 +357,28 @@ BX_CPP_INLINE void sse_permilpd(BxPackedXmmRegister *r, const BxPackedXmmRegiste
|
|||||||
r->xmm64u(1) = op1->xmm64u((op2->xmm32u(2) >> 1) & 0x1);
|
r->xmm64u(1) = op1->xmm64u((op2->xmm32u(2) >> 1) & 0x1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
BX_CPP_INLINE void sse_permil2ps(BxPackedXmmRegister *r, const BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2, const BxPackedXmmRegister *op3, unsigned m2z)
|
||||||
|
{
|
||||||
|
for(unsigned n=0; n < 4; n++) {
|
||||||
|
Bit32u ctrl = op3->xmm32u(n);
|
||||||
|
if ((m2z ^ ((ctrl >> 3) & 0x1)) == 0x3)
|
||||||
|
r->xmm32u(n) = 0;
|
||||||
|
else
|
||||||
|
r->xmm32u(n) = (ctrl & 0x4) ? op1->xmm32u(ctrl & 0x3) : op2->xmm32u(ctrl & 0x3);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
BX_CPP_INLINE void sse_permil2pd(BxPackedXmmRegister *r, const BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2, const BxPackedXmmRegister *op3, unsigned m2z)
|
||||||
|
{
|
||||||
|
for(unsigned n=0; n < 2; n++) {
|
||||||
|
Bit32u ctrl = op3->xmm32u(n*2);
|
||||||
|
if ((m2z ^ ((ctrl >> 3) & 0x1)) == 0x3)
|
||||||
|
r->xmm64u(n) = 0;
|
||||||
|
else
|
||||||
|
r->xmm64u(n) = (ctrl & 0x4) ? op1->xmm64u((ctrl >> 1) & 0x1) : op2->xmm64u((ctrl >> 1) & 0x1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// sign
|
// sign
|
||||||
|
|
||||||
BX_CPP_INLINE void sse_psignb(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2)
|
BX_CPP_INLINE void sse_psignb(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2)
|
||||||
|
@ -887,4 +887,56 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPHSUBDQ_VdqWdqR(bxInstruction_c *
|
|||||||
BX_NEXT_INSTR(i);
|
BX_NEXT_INSTR(i);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPERMIL2PS_VdqHdqWdqIbR(bxInstruction_c *i)
|
||||||
|
{
|
||||||
|
BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->vvv()), op2, op3, result;
|
||||||
|
|
||||||
|
int imm_reg = i->Ib() >> 4;
|
||||||
|
if (! long64_mode()) imm_reg &= 0x7;
|
||||||
|
|
||||||
|
if (i->getVexW()) {
|
||||||
|
op2 = BX_READ_AVX_REG(imm_reg);
|
||||||
|
op3 = BX_READ_AVX_REG(i->rm());
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
op2 = BX_READ_AVX_REG(i->rm());
|
||||||
|
op3 = BX_READ_AVX_REG(imm_reg);
|
||||||
|
}
|
||||||
|
unsigned len = i->getVL();
|
||||||
|
|
||||||
|
for (unsigned n=0; n < len; n++) {
|
||||||
|
sse_permil2ps(&result.avx128(n), &op1.avx128(n), &op2.avx128(n), &op3.avx128(n), i->Ib() & 3);
|
||||||
|
}
|
||||||
|
|
||||||
|
BX_WRITE_AVX_REGZ(i->nnn(), result, len);
|
||||||
|
|
||||||
|
BX_NEXT_INSTR(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPERMIL2PD_VdqHdqWdqIbR(bxInstruction_c *i)
|
||||||
|
{
|
||||||
|
BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->vvv()), op2, op3, result;
|
||||||
|
|
||||||
|
int imm_reg = i->Ib() >> 4;
|
||||||
|
if (! long64_mode()) imm_reg &= 0x7;
|
||||||
|
|
||||||
|
if (i->getVexW()) {
|
||||||
|
op2 = BX_READ_AVX_REG(imm_reg);
|
||||||
|
op3 = BX_READ_AVX_REG(i->rm());
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
op2 = BX_READ_AVX_REG(i->rm());
|
||||||
|
op3 = BX_READ_AVX_REG(imm_reg);
|
||||||
|
}
|
||||||
|
unsigned len = i->getVL();
|
||||||
|
|
||||||
|
for (unsigned n=0; n < len; n++) {
|
||||||
|
sse_permil2pd(&result.avx128(n), &op1.avx128(n), &op2.avx128(n), &op3.avx128(n), i->Ib() & 3);
|
||||||
|
}
|
||||||
|
|
||||||
|
BX_WRITE_AVX_REGZ(i->nnn(), result, len);
|
||||||
|
|
||||||
|
BX_NEXT_INSTR(i);
|
||||||
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
Loading…
Reference in New Issue
Block a user