implemented vperm* avx-512 opcodes

This commit is contained in:
Stanislav Shwartsman 2014-01-24 19:23:19 +00:00
parent a5984f7cd0
commit 7beb85b850
5 changed files with 51 additions and 5 deletions

View File

@ -587,7 +587,6 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPERMD_VdqHdqWdqR(bxInstruction_c
result.ymm32u(n) = op2.ymm32u(op1.ymm32u(n) & 0x7);
BX_WRITE_YMM_REGZ(i->dst(), result);
BX_NEXT_INSTR(i);
}

View File

@ -623,6 +623,44 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPERMI2PD_MASK_VpdHpdWpdR(bxInstru
}
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPERMPS_MASK_VpsHpsWpsR(bxInstruction_c *i)
{
BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1());
BxPackedAvxRegister op2 = BX_READ_AVX_REG(i->src2()), result;
unsigned len = i->getVL(), elements = DWORD_ELEMENTS(len);
unsigned shuffle_control_mask = elements - 1;
for (unsigned n=0;n < elements;n++)
result.vmm32u(n) = op2.vmm32u(op1.vmm32u(n) & shuffle_control_mask);
if (i->opmask()) {
avx512_write_regd_masked(i, &result, len, BX_READ_16BIT_OPMASK(i->opmask()));
}
else {
BX_WRITE_AVX_REGZ(i->dst(), result, len);
}
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPERMPD_MASK_VpdHpdWpdR(bxInstruction_c *i)
{
BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1());
BxPackedAvxRegister op2 = BX_READ_AVX_REG(i->src2()), result;
unsigned len = i->getVL(), elements = QWORD_ELEMENTS(len);
unsigned shuffle_control_mask = elements - 1;
for (unsigned n=0;n < elements;n++)
result.vmm64u(n) = op2.vmm64u(op1.vmm64u(n) & shuffle_control_mask);
if (i->opmask()) {
avx512_write_regq_masked(i, &result, len, BX_READ_8BIT_OPMASK(i->opmask()));
}
else {
BX_WRITE_AVX_REGZ(i->dst(), result, len);
}
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VINSERTF32x4_MASK_VpsHpsWpsIbR(bxInstruction_c *i)
{
unsigned len = i->getVL();

View File

@ -3413,6 +3413,9 @@ public: // for now...
BX_SMF BX_INSF_TYPE VPERMT2PS_MASK_VpsHpsWpsR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPERMT2PD_MASK_VpdHpdWpdR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPERMPS_MASK_VpsHpsWpsR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPERMPD_MASK_VpdHpdWpdR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VINSERTF32x4_MASK_VpsHpsWpsIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VINSERTI32x4_MASK_VdqHdqWdqIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VINSERTF64x4_MASK_VpdHpdWpdIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);

View File

@ -1061,8 +1061,8 @@ static const BxOpcodeInfo_t BxOpcodeTableEVEX[256*3*2] = {
/* 14 */ { BxPrefixSSE, BX_IA_ERROR, BxOpcodeGroupEVEX_0f3814_Mask },
/* 15 k0 */ { BxPrefixSSE, BX_IA_ERROR, BxOpcodeGroupEVEX_0f3815 },
/* 15 */ { BxPrefixSSE, BX_IA_ERROR, BxOpcodeGroupEVEX_0f3815_Mask },
/* 16 k0 */ { 0, BX_IA_ERROR },
/* 16 */ { 0, BX_IA_ERROR },
/* 16 k0 */ { BxAliasVexW | BxPrefixSSE66 | BxVexL1, BX_IA_V512_VPERMPS_VpsHpsWps_Kmask },
/* 16 */ { BxAliasVexW | BxPrefixSSE66 | BxVexL1, BX_IA_V512_VPERMPS_VpsHpsWps_Kmask },
/* 17 k0 */ { 0, BX_IA_ERROR },
/* 17 */ { 0, BX_IA_ERROR },
/* 18 k0 */ { BxVexW0 | BxPrefixSSE66, BX_IA_V512_VBROADCASTSS_VpsWss },
@ -1125,8 +1125,8 @@ static const BxOpcodeInfo_t BxOpcodeTableEVEX[256*3*2] = {
/* 34 */ { BxVexW0 | BxPrefixSSEF3, BX_IA_V512_VPMOVQW_WdqVdq_Kmask },
/* 35 k0 */ { BxVexW0 | BxPrefixSSEF3, BX_IA_V512_VPMOVQD_WdqVdq },
/* 35 */ { BxVexW0 | BxPrefixSSEF3, BX_IA_V512_VPMOVQD_WdqVdq_Kmask },
/* 36 k0 */ { 0, BX_IA_ERROR },
/* 36 */ { 0, BX_IA_ERROR },
/* 36 k0 */ { BxAliasVexW | BxPrefixSSE66 | BxVexL1, BX_IA_V512_VPERMD_VdqHdqWdq_Kmask },
/* 36 */ { BxAliasVexW | BxPrefixSSE66 | BxVexL1, BX_IA_V512_VPERMD_VdqHdqWdq_Kmask },
/* 37 k0 */ { BxVexW1 | BxPrefixSSE66, BX_IA_V512_VPCMPGTQ_KGbHdqWdq },
/* 37 */ { BxVexW1 | BxPrefixSSE66, BX_IA_V512_VPCMPGTQ_KGbHdqWdq },
/* 38 k0 */ { 0, BX_IA_ERROR },

View File

@ -3113,6 +3113,12 @@ bx_define_opcode(BX_IA_V512_VPERMT2PS_VpsHpsWps_Kmask, &BX_CPU_C::LOAD_BROADCAST
bx_define_opcode(BX_IA_V512_VPERMT2PD_VpdHpdWpd_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorQ, &BX_CPU_C::VPERMT2PD_MASK_VpdHpdWpdR, BX_ISA_AVX512, OP_Vpd, OP_Hpd, OP_Wpd, OP_NONE, BX_PREPARE_EVEX_NO_SAE)
bx_define_opcode(BX_IA_V512_VPERMI2PS_VpsHpsWps_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorD, &BX_CPU_C::VPERMI2PS_MASK_VpsHpsWpsR, BX_ISA_AVX512, OP_Vps, OP_Hps, OP_Wps, OP_NONE, BX_PREPARE_EVEX_NO_SAE)
bx_define_opcode(BX_IA_V512_VPERMI2PD_VpdHpdWpd_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorQ, &BX_CPU_C::VPERMI2PD_MASK_VpdHpdWpdR, BX_ISA_AVX512, OP_Vpd, OP_Hpd, OP_Wpd, OP_NONE, BX_PREPARE_EVEX_NO_SAE)
bx_define_opcode(BX_IA_V512_VPERMD_VdqHdqWdq_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorD, &BX_CPU_C::VPERMPS_MASK_VpsHpsWpsR, BX_ISA_AVX512, OP_Vdq, OP_Hdq, OP_Wdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE)
bx_define_opcode(BX_IA_V512_VPERMQ_VdqHdqWdq_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorQ, &BX_CPU_C::VPERMPD_MASK_VpdHpdWpdR, BX_ISA_AVX512, OP_Vdq, OP_Hdq, OP_Wdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE)
bx_define_opcode(BX_IA_V512_VPERMPS_VpsHpsWps_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorD, &BX_CPU_C::VPERMPS_MASK_VpsHpsWpsR, BX_ISA_AVX512, OP_Vps, OP_Hps, OP_Wps, OP_NONE, BX_PREPARE_EVEX_NO_SAE)
bx_define_opcode(BX_IA_V512_VPERMPD_VpdHpdWpd_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorQ, &BX_CPU_C::VPERMPD_MASK_VpdHpdWpdR, BX_ISA_AVX512, OP_Vpd, OP_Hpd, OP_Wpd, OP_NONE, BX_PREPARE_EVEX_NO_SAE)
// VexW alias
// VexW64 aliased