implemented some more avx-512 opcodes

This commit is contained in:
Stanislav Shwartsman 2014-01-24 12:02:47 +00:00
parent fa60a654c8
commit 407681c98a
4 changed files with 107 additions and 8 deletions

View File

@ -539,6 +539,90 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPERMQ_MASK_VdqWdqIbR(bxInstructio
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPERMT2PS_MASK_VpsHpsWpsR(bxInstruction_c *i)
{
BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1());
BxPackedAvxRegister op2 = BX_READ_AVX_REG(i->src2());
BxPackedAvxRegister dst = BX_READ_AVX_REG(i->dst()), result;
unsigned len = i->getVL(), elements = DWORD_ELEMENTS(len);
unsigned shuffle_control_mask = elements - 1;
for (unsigned n=0; n < elements; n++) {
unsigned shuffle_control = (unsigned) (op1.vmm32u(n) & shuffle_control_mask);
result.vmm32u(n) = (op1.vmm32u(n) & elements) ? op2.vmm32u(shuffle_control) : dst.vmm32u(shuffle_control);
}
if (i->opmask()) {
avx512_write_regd_masked(i, &result, len, BX_READ_16BIT_OPMASK(i->opmask()));
}
else {
BX_WRITE_AVX_REGZ(i->dst(), result, len);
}
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPERMT2PD_MASK_VpdHpdWpdR(bxInstruction_c *i)
{
BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1());
BxPackedAvxRegister op2 = BX_READ_AVX_REG(i->src2());
BxPackedAvxRegister dst = BX_READ_AVX_REG(i->dst()), result;
unsigned len = i->getVL(), elements = QWORD_ELEMENTS(len);
unsigned shuffle_control_mask = elements - 1;
for (unsigned n=0; n < elements; n++) {
unsigned shuffle_control = (unsigned) (op1.vmm64u(n) & shuffle_control_mask);
result.vmm64u(n) = (op1.vmm64u(n) & elements) ? op2.vmm64u(shuffle_control) : dst.vmm64u(shuffle_control);
}
if (i->opmask()) {
avx512_write_regq_masked(i, &result, len, BX_READ_8BIT_OPMASK(i->opmask()));
}
else {
BX_WRITE_AVX_REGZ(i->dst(), result, len);
}
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPERMI2PS_MASK_VpsHpsWpsR(bxInstruction_c *i)
{
BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1());
BxPackedAvxRegister op2 = BX_READ_AVX_REG(i->src2());
BxPackedAvxRegister dst = BX_READ_AVX_REG(i->dst()), result;
unsigned len = i->getVL(), elements = DWORD_ELEMENTS(len);
unsigned shuffle_control_mask = elements - 1;
for (unsigned n=0; n < elements; n++) {
unsigned shuffle_control = (unsigned) (dst.vmm32u(n) & shuffle_control_mask);
result.vmm32u(n) = (dst.vmm32u(n) & elements) ? op2.vmm32u(shuffle_control) : op1.vmm32u(shuffle_control);
}
if (i->opmask()) {
avx512_write_regd_masked(i, &result, len, BX_READ_16BIT_OPMASK(i->opmask()));
}
else {
BX_WRITE_AVX_REGZ(i->dst(), result, len);
}
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPERMI2PD_MASK_VpdHpdWpdR(bxInstruction_c *i)
{
BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1());
BxPackedAvxRegister op2 = BX_READ_AVX_REG(i->src2());
BxPackedAvxRegister dst = BX_READ_AVX_REG(i->dst()), result;
unsigned len = i->getVL(), elements = QWORD_ELEMENTS(len);
unsigned shuffle_control_mask = elements - 1;
for (unsigned n=0; n < elements; n++) {
unsigned shuffle_control = (unsigned) (dst.vmm64u(n) & shuffle_control_mask);
result.vmm64u(n) = (dst.vmm64u(n) & elements) ? op2.vmm64u(shuffle_control) : op1.vmm64u(shuffle_control);
}
if (i->opmask()) {
avx512_write_regq_masked(i, &result, len, BX_READ_8BIT_OPMASK(i->opmask()));
}
else {
BX_WRITE_AVX_REGZ(i->dst(), result, len);
}
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VINSERTF32x4_MASK_VpsHpsWpsIbR(bxInstruction_c *i)
{
unsigned len = i->getVL();

View File

@ -3408,6 +3408,11 @@ public: // for now...
BX_SMF BX_INSF_TYPE VSHUFF32x4_MASK_VpsHpsWpsIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VSHUFF64x2_MASK_VpdHpdWpdIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPERMI2PS_MASK_VpsHpsWpsR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPERMI2PD_MASK_VpdHpdWpdR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPERMT2PS_MASK_VpsHpsWpsR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPERMT2PD_MASK_VpdHpdWpdR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VINSERTF32x4_MASK_VpsHpsWpsIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VINSERTI32x4_MASK_VdqHdqWdqIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VINSERTF64x4_MASK_VpdHpdWpdIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);

View File

@ -1253,10 +1253,10 @@ static const BxOpcodeInfo_t BxOpcodeTableEVEX[256*3*2] = {
/* 74 */ { 0, BX_IA_ERROR },
/* 75 k0 */ { 0, BX_IA_ERROR },
/* 75 */ { 0, BX_IA_ERROR },
/* 76 k0 */ { 0, BX_IA_ERROR },
/* 76 */ { 0, BX_IA_ERROR },
/* 77 k0 */ { 0, BX_IA_ERROR },
/* 77 */ { 0, BX_IA_ERROR },
/* 76 k0 */ { BxPrefixSSE66 | BxAliasVexW, BX_IA_V512_VPERMI2D_VdqHdqWdq_Kmask },
/* 76 */ { BxPrefixSSE66 | BxAliasVexW, BX_IA_V512_VPERMI2D_VdqHdqWdq_Kmask },
/* 77 k0 */ { BxPrefixSSE66 | BxAliasVexW, BX_IA_V512_VPERMI2PS_VpsHpsWps_Kmask },
/* 77 */ { BxPrefixSSE66 | BxAliasVexW, BX_IA_V512_VPERMI2PS_VpsHpsWps_Kmask },
/* 78 k0 */ { 0, BX_IA_ERROR },
/* 78 */ { 0, BX_IA_ERROR },
/* 79 k0 */ { 0, BX_IA_ERROR },
@ -1269,10 +1269,10 @@ static const BxOpcodeInfo_t BxOpcodeTableEVEX[256*3*2] = {
/* 7C */ { BxPrefixSSE66 | BxAliasVexW64, BX_IA_V512_VPBROADCASTD_VdqEd_Kmask },
/* 7D k0 */ { 0, BX_IA_ERROR },
/* 7D */ { 0, BX_IA_ERROR },
/* 7E k0 */ { 0, BX_IA_ERROR },
/* 7E */ { 0, BX_IA_ERROR },
/* 7F k0 */ { 0, BX_IA_ERROR },
/* 7F */ { 0, BX_IA_ERROR },
/* 7E k0 */ { BxPrefixSSE66 | BxAliasVexW, BX_IA_V512_VPERMT2D_VdqHdqWdq_Kmask },
/* 7E */ { BxPrefixSSE66 | BxAliasVexW, BX_IA_V512_VPERMT2D_VdqHdqWdq_Kmask },
/* 7F k0 */ { BxPrefixSSE66 | BxAliasVexW, BX_IA_V512_VPERMT2PS_VpsHpsWps_Kmask },
/* 7F */ { BxPrefixSSE66 | BxAliasVexW, BX_IA_V512_VPERMT2PS_VpsHpsWps_Kmask },
/* 80 k0 */ { 0, BX_IA_ERROR },
/* 80 */ { 0, BX_IA_ERROR },
/* 81 k0 */ { 0, BX_IA_ERROR },

View File

@ -3103,6 +3103,16 @@ bx_define_opcode(BX_IA_V512_VFIXUPIMMPS_VpsHpsWpsIb, &BX_CPU_C::LOAD_BROADCAST_V
bx_define_opcode(BX_IA_V512_VFIXUPIMMPD_VpdHpdWpdIb, &BX_CPU_C::LOAD_BROADCAST_VectorQ, &BX_CPU_C::VFIXUPIMMPD_VpdHpdWpdIbR, BX_ISA_AVX512, OP_Vpd, OP_Hpd, OP_Wpd, OP_Ib, BX_PREPARE_EVEX)
bx_define_opcode(BX_IA_V512_VFIXUPIMMPS_VpsHpsWpsIb_Kmask, &BX_CPU_C::LOAD_BROADCAST_MASK_VectorD, &BX_CPU_C::VFIXUPIMMPS_MASK_VpsHpsWpsIbR, BX_ISA_AVX512, OP_Vps, OP_Hps, OP_Wps, OP_Ib, BX_PREPARE_EVEX)
bx_define_opcode(BX_IA_V512_VFIXUPIMMPD_VpdHpdWpdIb_Kmask, &BX_CPU_C::LOAD_BROADCAST_MASK_VectorQ, &BX_CPU_C::VFIXUPIMMPD_MASK_VpdHpdWpdIbR, BX_ISA_AVX512, OP_Vpd, OP_Hpd, OP_Wpd, OP_Ib, BX_PREPARE_EVEX)
bx_define_opcode(BX_IA_V512_VPERMT2D_VdqHdqWdq_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorD, &BX_CPU_C::VPERMT2PS_MASK_VpsHpsWpsR, BX_ISA_AVX512, OP_Vdq, OP_Hdq, OP_Wdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE)
bx_define_opcode(BX_IA_V512_VPERMT2Q_VdqHdqWdq_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorQ, &BX_CPU_C::VPERMT2PD_MASK_VpdHpdWpdR, BX_ISA_AVX512, OP_Vdq, OP_Hdq, OP_Wdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE)
bx_define_opcode(BX_IA_V512_VPERMI2D_VdqHdqWdq_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorD, &BX_CPU_C::VPERMI2PS_MASK_VpsHpsWpsR, BX_ISA_AVX512, OP_Vdq, OP_Hdq, OP_Wdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE)
bx_define_opcode(BX_IA_V512_VPERMI2Q_VdqHdqWdq_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorQ, &BX_CPU_C::VPERMI2PD_MASK_VpdHpdWpdR, BX_ISA_AVX512, OP_Vdq, OP_Hdq, OP_Wdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE)
bx_define_opcode(BX_IA_V512_VPERMT2PS_VpsHpsWps_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorD, &BX_CPU_C::VPERMT2PS_MASK_VpsHpsWpsR, BX_ISA_AVX512, OP_Vps, OP_Hps, OP_Wps, OP_NONE, BX_PREPARE_EVEX_NO_SAE)
bx_define_opcode(BX_IA_V512_VPERMT2PD_VpdHpdWpd_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorQ, &BX_CPU_C::VPERMT2PD_MASK_VpdHpdWpdR, BX_ISA_AVX512, OP_Vpd, OP_Hpd, OP_Wpd, OP_NONE, BX_PREPARE_EVEX_NO_SAE)
bx_define_opcode(BX_IA_V512_VPERMI2PS_VpsHpsWps_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorD, &BX_CPU_C::VPERMI2PS_MASK_VpsHpsWpsR, BX_ISA_AVX512, OP_Vps, OP_Hps, OP_Wps, OP_NONE, BX_PREPARE_EVEX_NO_SAE)
bx_define_opcode(BX_IA_V512_VPERMI2PD_VpdHpdWpd_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorQ, &BX_CPU_C::VPERMI2PD_MASK_VpdHpdWpdR, BX_ISA_AVX512, OP_Vpd, OP_Hpd, OP_Wpd, OP_NONE, BX_PREPARE_EVEX_NO_SAE)
// VexW alias
// VexW64 aliased