From 407681c98adb48137bd66a44267c2da49ba44560 Mon Sep 17 00:00:00 2001 From: Stanislav Shwartsman Date: Fri, 24 Jan 2014 12:02:47 +0000 Subject: [PATCH] implemented some more avx-512 opcodes --- bochs/cpu/avx512.cc | 84 ++++++++++++++++++++++++++++++++++++ bochs/cpu/cpu.h | 5 +++ bochs/cpu/fetchdecode_evex.h | 16 +++---- bochs/cpu/ia_opcodes.h | 10 +++++ 4 files changed, 107 insertions(+), 8 deletions(-) diff --git a/bochs/cpu/avx512.cc b/bochs/cpu/avx512.cc index 5ddc29021..b80bae3ca 100644 --- a/bochs/cpu/avx512.cc +++ b/bochs/cpu/avx512.cc @@ -539,6 +539,90 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPERMQ_MASK_VdqWdqIbR(bxInstructio BX_NEXT_INSTR(i); } +BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPERMT2PS_MASK_VpsHpsWpsR(bxInstruction_c *i) +{ + BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1()); + BxPackedAvxRegister op2 = BX_READ_AVX_REG(i->src2()); + BxPackedAvxRegister dst = BX_READ_AVX_REG(i->dst()), result; + unsigned len = i->getVL(), elements = DWORD_ELEMENTS(len); + unsigned shuffle_control_mask = elements - 1; + + for (unsigned n=0; n < elements; n++) { + unsigned shuffle_control = (unsigned) (op1.vmm32u(n) & shuffle_control_mask); + result.vmm32u(n) = (op1.vmm32u(n) & elements) ? op2.vmm32u(shuffle_control) : dst.vmm32u(shuffle_control); + } + + if (i->opmask()) { + avx512_write_regd_masked(i, &result, len, BX_READ_16BIT_OPMASK(i->opmask())); + } + else { + BX_WRITE_AVX_REGZ(i->dst(), result, len); + } +} + +BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPERMT2PD_MASK_VpdHpdWpdR(bxInstruction_c *i) +{ + BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1()); + BxPackedAvxRegister op2 = BX_READ_AVX_REG(i->src2()); + BxPackedAvxRegister dst = BX_READ_AVX_REG(i->dst()), result; + unsigned len = i->getVL(), elements = QWORD_ELEMENTS(len); + unsigned shuffle_control_mask = elements - 1; + + for (unsigned n=0; n < elements; n++) { + unsigned shuffle_control = (unsigned) (op1.vmm64u(n) & shuffle_control_mask); + result.vmm64u(n) = (op1.vmm64u(n) & elements) ? op2.vmm64u(shuffle_control) : dst.vmm64u(shuffle_control); + } + + if (i->opmask()) { + avx512_write_regq_masked(i, &result, len, BX_READ_8BIT_OPMASK(i->opmask())); + } + else { + BX_WRITE_AVX_REGZ(i->dst(), result, len); + } +} + +BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPERMI2PS_MASK_VpsHpsWpsR(bxInstruction_c *i) +{ + BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1()); + BxPackedAvxRegister op2 = BX_READ_AVX_REG(i->src2()); + BxPackedAvxRegister dst = BX_READ_AVX_REG(i->dst()), result; + unsigned len = i->getVL(), elements = DWORD_ELEMENTS(len); + unsigned shuffle_control_mask = elements - 1; + + for (unsigned n=0; n < elements; n++) { + unsigned shuffle_control = (unsigned) (dst.vmm32u(n) & shuffle_control_mask); + result.vmm32u(n) = (dst.vmm32u(n) & elements) ? op2.vmm32u(shuffle_control) : op1.vmm32u(shuffle_control); + } + + if (i->opmask()) { + avx512_write_regd_masked(i, &result, len, BX_READ_16BIT_OPMASK(i->opmask())); + } + else { + BX_WRITE_AVX_REGZ(i->dst(), result, len); + } +} + +BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPERMI2PD_MASK_VpdHpdWpdR(bxInstruction_c *i) +{ + BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1()); + BxPackedAvxRegister op2 = BX_READ_AVX_REG(i->src2()); + BxPackedAvxRegister dst = BX_READ_AVX_REG(i->dst()), result; + unsigned len = i->getVL(), elements = QWORD_ELEMENTS(len); + unsigned shuffle_control_mask = elements - 1; + + for (unsigned n=0; n < elements; n++) { + unsigned shuffle_control = (unsigned) (dst.vmm64u(n) & shuffle_control_mask); + result.vmm64u(n) = (dst.vmm64u(n) & elements) ? op2.vmm64u(shuffle_control) : op1.vmm64u(shuffle_control); + } + + if (i->opmask()) { + avx512_write_regq_masked(i, &result, len, BX_READ_8BIT_OPMASK(i->opmask())); + } + else { + BX_WRITE_AVX_REGZ(i->dst(), result, len); + } +} + BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VINSERTF32x4_MASK_VpsHpsWpsIbR(bxInstruction_c *i) { unsigned len = i->getVL(); diff --git a/bochs/cpu/cpu.h b/bochs/cpu/cpu.h index 667a8ac1e..9eb8bf0c1 100644 --- a/bochs/cpu/cpu.h +++ b/bochs/cpu/cpu.h @@ -3408,6 +3408,11 @@ public: // for now... BX_SMF BX_INSF_TYPE VSHUFF32x4_MASK_VpsHpsWpsIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); BX_SMF BX_INSF_TYPE VSHUFF64x2_MASK_VpdHpdWpdIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); + BX_SMF BX_INSF_TYPE VPERMI2PS_MASK_VpsHpsWpsR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); + BX_SMF BX_INSF_TYPE VPERMI2PD_MASK_VpdHpdWpdR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); + BX_SMF BX_INSF_TYPE VPERMT2PS_MASK_VpsHpsWpsR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); + BX_SMF BX_INSF_TYPE VPERMT2PD_MASK_VpdHpdWpdR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); + BX_SMF BX_INSF_TYPE VINSERTF32x4_MASK_VpsHpsWpsIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); BX_SMF BX_INSF_TYPE VINSERTI32x4_MASK_VdqHdqWdqIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); BX_SMF BX_INSF_TYPE VINSERTF64x4_MASK_VpdHpdWpdIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); diff --git a/bochs/cpu/fetchdecode_evex.h b/bochs/cpu/fetchdecode_evex.h index 647a600a8..c0a6851fe 100644 --- a/bochs/cpu/fetchdecode_evex.h +++ b/bochs/cpu/fetchdecode_evex.h @@ -1253,10 +1253,10 @@ static const BxOpcodeInfo_t BxOpcodeTableEVEX[256*3*2] = { /* 74 */ { 0, BX_IA_ERROR }, /* 75 k0 */ { 0, BX_IA_ERROR }, /* 75 */ { 0, BX_IA_ERROR }, - /* 76 k0 */ { 0, BX_IA_ERROR }, - /* 76 */ { 0, BX_IA_ERROR }, - /* 77 k0 */ { 0, BX_IA_ERROR }, - /* 77 */ { 0, BX_IA_ERROR }, + /* 76 k0 */ { BxPrefixSSE66 | BxAliasVexW, BX_IA_V512_VPERMI2D_VdqHdqWdq_Kmask }, + /* 76 */ { BxPrefixSSE66 | BxAliasVexW, BX_IA_V512_VPERMI2D_VdqHdqWdq_Kmask }, + /* 77 k0 */ { BxPrefixSSE66 | BxAliasVexW, BX_IA_V512_VPERMI2PS_VpsHpsWps_Kmask }, + /* 77 */ { BxPrefixSSE66 | BxAliasVexW, BX_IA_V512_VPERMI2PS_VpsHpsWps_Kmask }, /* 78 k0 */ { 0, BX_IA_ERROR }, /* 78 */ { 0, BX_IA_ERROR }, /* 79 k0 */ { 0, BX_IA_ERROR }, @@ -1269,10 +1269,10 @@ static const BxOpcodeInfo_t BxOpcodeTableEVEX[256*3*2] = { /* 7C */ { BxPrefixSSE66 | BxAliasVexW64, BX_IA_V512_VPBROADCASTD_VdqEd_Kmask }, /* 7D k0 */ { 0, BX_IA_ERROR }, /* 7D */ { 0, BX_IA_ERROR }, - /* 7E k0 */ { 0, BX_IA_ERROR }, - /* 7E */ { 0, BX_IA_ERROR }, - /* 7F k0 */ { 0, BX_IA_ERROR }, - /* 7F */ { 0, BX_IA_ERROR }, + /* 7E k0 */ { BxPrefixSSE66 | BxAliasVexW, BX_IA_V512_VPERMT2D_VdqHdqWdq_Kmask }, + /* 7E */ { BxPrefixSSE66 | BxAliasVexW, BX_IA_V512_VPERMT2D_VdqHdqWdq_Kmask }, + /* 7F k0 */ { BxPrefixSSE66 | BxAliasVexW, BX_IA_V512_VPERMT2PS_VpsHpsWps_Kmask }, + /* 7F */ { BxPrefixSSE66 | BxAliasVexW, BX_IA_V512_VPERMT2PS_VpsHpsWps_Kmask }, /* 80 k0 */ { 0, BX_IA_ERROR }, /* 80 */ { 0, BX_IA_ERROR }, /* 81 k0 */ { 0, BX_IA_ERROR }, diff --git a/bochs/cpu/ia_opcodes.h b/bochs/cpu/ia_opcodes.h index 74d3a4dee..c69f1dc5b 100644 --- a/bochs/cpu/ia_opcodes.h +++ b/bochs/cpu/ia_opcodes.h @@ -3103,6 +3103,16 @@ bx_define_opcode(BX_IA_V512_VFIXUPIMMPS_VpsHpsWpsIb, &BX_CPU_C::LOAD_BROADCAST_V bx_define_opcode(BX_IA_V512_VFIXUPIMMPD_VpdHpdWpdIb, &BX_CPU_C::LOAD_BROADCAST_VectorQ, &BX_CPU_C::VFIXUPIMMPD_VpdHpdWpdIbR, BX_ISA_AVX512, OP_Vpd, OP_Hpd, OP_Wpd, OP_Ib, BX_PREPARE_EVEX) bx_define_opcode(BX_IA_V512_VFIXUPIMMPS_VpsHpsWpsIb_Kmask, &BX_CPU_C::LOAD_BROADCAST_MASK_VectorD, &BX_CPU_C::VFIXUPIMMPS_MASK_VpsHpsWpsIbR, BX_ISA_AVX512, OP_Vps, OP_Hps, OP_Wps, OP_Ib, BX_PREPARE_EVEX) bx_define_opcode(BX_IA_V512_VFIXUPIMMPD_VpdHpdWpdIb_Kmask, &BX_CPU_C::LOAD_BROADCAST_MASK_VectorQ, &BX_CPU_C::VFIXUPIMMPD_MASK_VpdHpdWpdIbR, BX_ISA_AVX512, OP_Vpd, OP_Hpd, OP_Wpd, OP_Ib, BX_PREPARE_EVEX) + +bx_define_opcode(BX_IA_V512_VPERMT2D_VdqHdqWdq_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorD, &BX_CPU_C::VPERMT2PS_MASK_VpsHpsWpsR, BX_ISA_AVX512, OP_Vdq, OP_Hdq, OP_Wdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE) +bx_define_opcode(BX_IA_V512_VPERMT2Q_VdqHdqWdq_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorQ, &BX_CPU_C::VPERMT2PD_MASK_VpdHpdWpdR, BX_ISA_AVX512, OP_Vdq, OP_Hdq, OP_Wdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE) +bx_define_opcode(BX_IA_V512_VPERMI2D_VdqHdqWdq_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorD, &BX_CPU_C::VPERMI2PS_MASK_VpsHpsWpsR, BX_ISA_AVX512, OP_Vdq, OP_Hdq, OP_Wdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE) +bx_define_opcode(BX_IA_V512_VPERMI2Q_VdqHdqWdq_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorQ, &BX_CPU_C::VPERMI2PD_MASK_VpdHpdWpdR, BX_ISA_AVX512, OP_Vdq, OP_Hdq, OP_Wdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE) + +bx_define_opcode(BX_IA_V512_VPERMT2PS_VpsHpsWps_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorD, &BX_CPU_C::VPERMT2PS_MASK_VpsHpsWpsR, BX_ISA_AVX512, OP_Vps, OP_Hps, OP_Wps, OP_NONE, BX_PREPARE_EVEX_NO_SAE) +bx_define_opcode(BX_IA_V512_VPERMT2PD_VpdHpdWpd_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorQ, &BX_CPU_C::VPERMT2PD_MASK_VpdHpdWpdR, BX_ISA_AVX512, OP_Vpd, OP_Hpd, OP_Wpd, OP_NONE, BX_PREPARE_EVEX_NO_SAE) +bx_define_opcode(BX_IA_V512_VPERMI2PS_VpsHpsWps_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorD, &BX_CPU_C::VPERMI2PS_MASK_VpsHpsWpsR, BX_ISA_AVX512, OP_Vps, OP_Hps, OP_Wps, OP_NONE, BX_PREPARE_EVEX_NO_SAE) +bx_define_opcode(BX_IA_V512_VPERMI2PD_VpdHpdWpd_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorQ, &BX_CPU_C::VPERMI2PD_MASK_VpdHpdWpdR, BX_ISA_AVX512, OP_Vpd, OP_Hpd, OP_Wpd, OP_NONE, BX_PREPARE_EVEX_NO_SAE) // VexW alias // VexW64 aliased