implemented vperm* avx-512 opcodes
This commit is contained in:
parent
a5984f7cd0
commit
7beb85b850
@ -587,7 +587,6 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPERMD_VdqHdqWdqR(bxInstruction_c
|
||||
result.ymm32u(n) = op2.ymm32u(op1.ymm32u(n) & 0x7);
|
||||
|
||||
BX_WRITE_YMM_REGZ(i->dst(), result);
|
||||
|
||||
BX_NEXT_INSTR(i);
|
||||
}
|
||||
|
||||
|
@ -623,6 +623,44 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPERMI2PD_MASK_VpdHpdWpdR(bxInstru
|
||||
}
|
||||
}
|
||||
|
||||
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPERMPS_MASK_VpsHpsWpsR(bxInstruction_c *i)
|
||||
{
|
||||
BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1());
|
||||
BxPackedAvxRegister op2 = BX_READ_AVX_REG(i->src2()), result;
|
||||
unsigned len = i->getVL(), elements = DWORD_ELEMENTS(len);
|
||||
unsigned shuffle_control_mask = elements - 1;
|
||||
|
||||
for (unsigned n=0;n < elements;n++)
|
||||
result.vmm32u(n) = op2.vmm32u(op1.vmm32u(n) & shuffle_control_mask);
|
||||
|
||||
if (i->opmask()) {
|
||||
avx512_write_regd_masked(i, &result, len, BX_READ_16BIT_OPMASK(i->opmask()));
|
||||
}
|
||||
else {
|
||||
BX_WRITE_AVX_REGZ(i->dst(), result, len);
|
||||
}
|
||||
|
||||
BX_NEXT_INSTR(i);
|
||||
}
|
||||
|
||||
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPERMPD_MASK_VpdHpdWpdR(bxInstruction_c *i)
|
||||
{
|
||||
BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1());
|
||||
BxPackedAvxRegister op2 = BX_READ_AVX_REG(i->src2()), result;
|
||||
unsigned len = i->getVL(), elements = QWORD_ELEMENTS(len);
|
||||
unsigned shuffle_control_mask = elements - 1;
|
||||
for (unsigned n=0;n < elements;n++)
|
||||
result.vmm64u(n) = op2.vmm64u(op1.vmm64u(n) & shuffle_control_mask);
|
||||
|
||||
if (i->opmask()) {
|
||||
avx512_write_regq_masked(i, &result, len, BX_READ_8BIT_OPMASK(i->opmask()));
|
||||
}
|
||||
else {
|
||||
BX_WRITE_AVX_REGZ(i->dst(), result, len);
|
||||
}
|
||||
BX_NEXT_INSTR(i);
|
||||
}
|
||||
|
||||
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VINSERTF32x4_MASK_VpsHpsWpsIbR(bxInstruction_c *i)
|
||||
{
|
||||
unsigned len = i->getVL();
|
||||
|
@ -3413,6 +3413,9 @@ public: // for now...
|
||||
BX_SMF BX_INSF_TYPE VPERMT2PS_MASK_VpsHpsWpsR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF BX_INSF_TYPE VPERMT2PD_MASK_VpdHpdWpdR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
|
||||
BX_SMF BX_INSF_TYPE VPERMPS_MASK_VpsHpsWpsR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF BX_INSF_TYPE VPERMPD_MASK_VpdHpdWpdR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
|
||||
BX_SMF BX_INSF_TYPE VINSERTF32x4_MASK_VpsHpsWpsIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF BX_INSF_TYPE VINSERTI32x4_MASK_VdqHdqWdqIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF BX_INSF_TYPE VINSERTF64x4_MASK_VpdHpdWpdIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
|
@ -1061,8 +1061,8 @@ static const BxOpcodeInfo_t BxOpcodeTableEVEX[256*3*2] = {
|
||||
/* 14 */ { BxPrefixSSE, BX_IA_ERROR, BxOpcodeGroupEVEX_0f3814_Mask },
|
||||
/* 15 k0 */ { BxPrefixSSE, BX_IA_ERROR, BxOpcodeGroupEVEX_0f3815 },
|
||||
/* 15 */ { BxPrefixSSE, BX_IA_ERROR, BxOpcodeGroupEVEX_0f3815_Mask },
|
||||
/* 16 k0 */ { 0, BX_IA_ERROR },
|
||||
/* 16 */ { 0, BX_IA_ERROR },
|
||||
/* 16 k0 */ { BxAliasVexW | BxPrefixSSE66 | BxVexL1, BX_IA_V512_VPERMPS_VpsHpsWps_Kmask },
|
||||
/* 16 */ { BxAliasVexW | BxPrefixSSE66 | BxVexL1, BX_IA_V512_VPERMPS_VpsHpsWps_Kmask },
|
||||
/* 17 k0 */ { 0, BX_IA_ERROR },
|
||||
/* 17 */ { 0, BX_IA_ERROR },
|
||||
/* 18 k0 */ { BxVexW0 | BxPrefixSSE66, BX_IA_V512_VBROADCASTSS_VpsWss },
|
||||
@ -1125,8 +1125,8 @@ static const BxOpcodeInfo_t BxOpcodeTableEVEX[256*3*2] = {
|
||||
/* 34 */ { BxVexW0 | BxPrefixSSEF3, BX_IA_V512_VPMOVQW_WdqVdq_Kmask },
|
||||
/* 35 k0 */ { BxVexW0 | BxPrefixSSEF3, BX_IA_V512_VPMOVQD_WdqVdq },
|
||||
/* 35 */ { BxVexW0 | BxPrefixSSEF3, BX_IA_V512_VPMOVQD_WdqVdq_Kmask },
|
||||
/* 36 k0 */ { 0, BX_IA_ERROR },
|
||||
/* 36 */ { 0, BX_IA_ERROR },
|
||||
/* 36 k0 */ { BxAliasVexW | BxPrefixSSE66 | BxVexL1, BX_IA_V512_VPERMD_VdqHdqWdq_Kmask },
|
||||
/* 36 */ { BxAliasVexW | BxPrefixSSE66 | BxVexL1, BX_IA_V512_VPERMD_VdqHdqWdq_Kmask },
|
||||
/* 37 k0 */ { BxVexW1 | BxPrefixSSE66, BX_IA_V512_VPCMPGTQ_KGbHdqWdq },
|
||||
/* 37 */ { BxVexW1 | BxPrefixSSE66, BX_IA_V512_VPCMPGTQ_KGbHdqWdq },
|
||||
/* 38 k0 */ { 0, BX_IA_ERROR },
|
||||
|
@ -3113,6 +3113,12 @@ bx_define_opcode(BX_IA_V512_VPERMT2PS_VpsHpsWps_Kmask, &BX_CPU_C::LOAD_BROADCAST
|
||||
bx_define_opcode(BX_IA_V512_VPERMT2PD_VpdHpdWpd_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorQ, &BX_CPU_C::VPERMT2PD_MASK_VpdHpdWpdR, BX_ISA_AVX512, OP_Vpd, OP_Hpd, OP_Wpd, OP_NONE, BX_PREPARE_EVEX_NO_SAE)
|
||||
bx_define_opcode(BX_IA_V512_VPERMI2PS_VpsHpsWps_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorD, &BX_CPU_C::VPERMI2PS_MASK_VpsHpsWpsR, BX_ISA_AVX512, OP_Vps, OP_Hps, OP_Wps, OP_NONE, BX_PREPARE_EVEX_NO_SAE)
|
||||
bx_define_opcode(BX_IA_V512_VPERMI2PD_VpdHpdWpd_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorQ, &BX_CPU_C::VPERMI2PD_MASK_VpdHpdWpdR, BX_ISA_AVX512, OP_Vpd, OP_Hpd, OP_Wpd, OP_NONE, BX_PREPARE_EVEX_NO_SAE)
|
||||
|
||||
bx_define_opcode(BX_IA_V512_VPERMD_VdqHdqWdq_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorD, &BX_CPU_C::VPERMPS_MASK_VpsHpsWpsR, BX_ISA_AVX512, OP_Vdq, OP_Hdq, OP_Wdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE)
|
||||
bx_define_opcode(BX_IA_V512_VPERMQ_VdqHdqWdq_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorQ, &BX_CPU_C::VPERMPD_MASK_VpdHpdWpdR, BX_ISA_AVX512, OP_Vdq, OP_Hdq, OP_Wdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE)
|
||||
|
||||
bx_define_opcode(BX_IA_V512_VPERMPS_VpsHpsWps_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorD, &BX_CPU_C::VPERMPS_MASK_VpsHpsWpsR, BX_ISA_AVX512, OP_Vps, OP_Hps, OP_Wps, OP_NONE, BX_PREPARE_EVEX_NO_SAE)
|
||||
bx_define_opcode(BX_IA_V512_VPERMPD_VpdHpdWpd_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorQ, &BX_CPU_C::VPERMPD_MASK_VpdHpdWpdR, BX_ISA_AVX512, OP_Vpd, OP_Hpd, OP_Wpd, OP_NONE, BX_PREPARE_EVEX_NO_SAE)
|
||||
// VexW alias
|
||||
|
||||
// VexW64 aliased
|
||||
|
Loading…
Reference in New Issue
Block a user