From c4c8652a3b324959d7b05a090f69fe6c18314ce9 Mon Sep 17 00:00:00 2001 From: Stanislav Shwartsman Date: Tue, 22 Jul 2014 20:36:55 +0000 Subject: [PATCH] Implemented VPMOV?2? and VPMIN* AVX512 instructions The only missing AVX512BW/AVX512DQ opcodes are now: "512.66.0F38.W1 10 VPSRLVW" "512.66.0F38.W1 11 VPSRAVW" "512.66.0F38.W1 12 VPSLLVW" "512.66.0F3A.W1 0F VPALIGNR" "NDS.66.0F3A.W0 42 VDBPSADBW" "NDS.512.66.0F3A.W0 50 VRANGEPS NDS.512.66.0F3A.W1 50 VRANGEPD" "NDS.512.66.0F3A.W0 51 VRANGESS NDS.512.66.0F3A.W1 51 VRANGESD" "NDS.512.66.0F3A.W0 56 VREDUCEPS NDS.512.66.0F3A.W1 56 VREDUCEPD" "NDS.512.66.0F3A.W0 57 VREDUCESS NDS.512.66.0F3A.W1 57 VREDUCESD" --- bochs/cpu/avx512.cc | 116 +++++++++++++++++++++++++++++++++++ bochs/cpu/cpu.h | 12 ++++ bochs/cpu/fetchdecode_evex.h | 44 ++++++++++--- bochs/cpu/ia_opcodes.h | 14 +++++ bochs/cpu/simd_int.h | 50 ++++++++++++++- 5 files changed, 226 insertions(+), 10 deletions(-) diff --git a/bochs/cpu/avx512.cc b/bochs/cpu/avx512.cc index ad10bce1c..0a5800ad1 100644 --- a/bochs/cpu/avx512.cc +++ b/bochs/cpu/avx512.cc @@ -472,6 +472,7 @@ AVX512_2OP_WORD_EL(VPSUBW_MASK_VdqHdqWdqR, xmm_psubw) AVX512_2OP_WORD_EL(VPSUBSW_MASK_VdqHdqWdqR, xmm_psubsw) AVX512_2OP_WORD_EL(VPSUBUSW_MASK_VdqHdqWdqR, xmm_psubusw) AVX512_2OP_WORD_EL(VPMINSW_MASK_VdqHdqWdqR, xmm_pminsw) +AVX512_2OP_WORD_EL(VPMINUW_MASK_VdqHdqWdqR, xmm_pminuw) AVX512_2OP_WORD_EL(VPMAXSW_MASK_VdqHdqWdqR, xmm_pmaxsw) AVX512_2OP_WORD_EL(VPMAXUW_MASK_VdqHdqWdqR, xmm_pmaxuw) AVX512_2OP_WORD_EL(VPMADDUBSW_MASK_VdqHdqWdqR, xmm_pmaddubsw) @@ -506,6 +507,7 @@ AVX512_2OP_BYTE_EL(VPADDUSB_MASK_VdqHdqWdqR, xmm_paddusb) AVX512_2OP_BYTE_EL(VPSUBB_MASK_VdqHdqWdqR, xmm_psubb) AVX512_2OP_BYTE_EL(VPSUBSB_MASK_VdqHdqWdqR, xmm_psubsb) AVX512_2OP_BYTE_EL(VPSUBUSB_MASK_VdqHdqWdqR, xmm_psubusb) +AVX512_2OP_BYTE_EL(VPMINSB_MASK_VdqHdqWdqR, xmm_pminsb) AVX512_2OP_BYTE_EL(VPMINUB_MASK_VdqHdqWdqR, xmm_pminub) AVX512_2OP_BYTE_EL(VPMAXUB_MASK_VdqHdqWdqR, xmm_pmaxub) AVX512_2OP_BYTE_EL(VPMAXSB_MASK_VdqHdqWdqR, xmm_pmaxsb) @@ -1975,4 +1977,118 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCOMPRESSPD_MASK_WpdVpd(bxInstruct BX_NEXT_INSTR(i); } +// convert mask + +BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVM2B_VdqKEqR(bxInstruction_c *i) +{ + Bit64u opmask = BX_READ_OPMASK(i->src()); + unsigned len = i->getVL(); + + for (unsigned n=0; ndst(), n), (Bit32u) opmask); + opmask >>= 16; + } + + BX_CLEAR_AVX_REGZ(i->dst(), len); + BX_NEXT_INSTR(i); +} + +BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVM2W_VdqKEdR(bxInstruction_c *i) +{ + Bit32u opmask = BX_READ_32BIT_OPMASK(i->src()); + unsigned len = i->getVL(); + + for (unsigned n=0; ndst(), n), opmask); + opmask >>= 8; + } + + BX_CLEAR_AVX_REGZ(i->dst(), len); + BX_NEXT_INSTR(i); +} + +BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVM2D_VdqKEwR(bxInstruction_c *i) +{ + Bit32u opmask = (Bit32u) BX_READ_16BIT_OPMASK(i->src()); + unsigned len = i->getVL(); + + for (unsigned n=0; ndst(), n), opmask); + opmask >>= 4; + } + + BX_CLEAR_AVX_REGZ(i->dst(), len); + BX_NEXT_INSTR(i); +} + +BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVM2Q_VdqKEbR(bxInstruction_c *i) +{ + Bit32u opmask = (Bit32u) BX_READ_8BIT_OPMASK(i->src()); + unsigned len = i->getVL(); + + for (unsigned n=0; ndst(), n), opmask); + opmask >>= 2; + } + + BX_CLEAR_AVX_REGZ(i->dst(), len); + BX_NEXT_INSTR(i); +} + +BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVB2M_KGqWdqR(bxInstruction_c *i) +{ + BxPackedAvxRegister op = BX_READ_AVX_REG(i->src()); + unsigned len = i->getVL(); + Bit64u mask = 0; + + for (unsigned n=0; ndst(), mask); + BX_NEXT_INSTR(i); +} + +BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVW2M_KGdWdqR(bxInstruction_c *i) +{ + BxPackedAvxRegister op = BX_READ_AVX_REG(i->src()); + unsigned len = i->getVL(); + Bit32u mask = 0; + + for (unsigned n=0; ndst(), mask); + BX_NEXT_INSTR(i); +} + +BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVD2M_KGwWdqR(bxInstruction_c *i) +{ + BxPackedAvxRegister op = BX_READ_AVX_REG(i->src()); + unsigned len = i->getVL(); + Bit32u mask = 0; + + for (unsigned n=0; ndst(), mask); + BX_NEXT_INSTR(i); +} + +BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVQ2M_KGbWdqR(bxInstruction_c *i) +{ + BxPackedAvxRegister op = BX_READ_AVX_REG(i->src()); + unsigned len = i->getVL(); + Bit32u mask = 0; + + for (unsigned n=0; ndst(), mask); + BX_NEXT_INSTR(i); +} + #endif diff --git a/bochs/cpu/cpu.h b/bochs/cpu/cpu.h index d0a8086c4..df239c7cd 100644 --- a/bochs/cpu/cpu.h +++ b/bochs/cpu/cpu.h @@ -3497,10 +3497,12 @@ public: // for now... BX_SMF BX_INSF_TYPE VPADDSW_MASK_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); BX_SMF BX_INSF_TYPE VPADDUSW_MASK_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); + BX_SMF BX_INSF_TYPE VPMINSB_MASK_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); BX_SMF BX_INSF_TYPE VPMINUB_MASK_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); BX_SMF BX_INSF_TYPE VPMAXUB_MASK_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); BX_SMF BX_INSF_TYPE VPMAXSB_MASK_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); BX_SMF BX_INSF_TYPE VPMINSW_MASK_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); + BX_SMF BX_INSF_TYPE VPMINUW_MASK_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); BX_SMF BX_INSF_TYPE VPMAXSW_MASK_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); BX_SMF BX_INSF_TYPE VPMAXUW_MASK_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); @@ -3803,6 +3805,16 @@ public: // for now... BX_SMF BX_INSF_TYPE VPBROADCASTMB2Q_VdqKEbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); BX_SMF BX_INSF_TYPE VPBROADCASTMW2D_VdqKEwR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); + + BX_SMF BX_INSF_TYPE VPMOVM2B_VdqKEqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); + BX_SMF BX_INSF_TYPE VPMOVM2W_VdqKEdR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); + BX_SMF BX_INSF_TYPE VPMOVM2D_VdqKEwR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); + BX_SMF BX_INSF_TYPE VPMOVM2Q_VdqKEbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); + + BX_SMF BX_INSF_TYPE VPMOVB2M_KGqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); + BX_SMF BX_INSF_TYPE VPMOVW2M_KGdWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); + BX_SMF BX_INSF_TYPE VPMOVD2M_KGwWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); + BX_SMF BX_INSF_TYPE VPMOVQ2M_KGbWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); #endif BX_SMF BX_INSF_TYPE LZCNT_GwEwR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); diff --git a/bochs/cpu/fetchdecode_evex.h b/bochs/cpu/fetchdecode_evex.h index af4b36085..be2dac091 100644 --- a/bochs/cpu/fetchdecode_evex.h +++ b/bochs/cpu/fetchdecode_evex.h @@ -604,6 +604,18 @@ static const BxOpcodeInfo_t BxOpcodeGroupEVEX_0f3827[3] = { /* F2 */ { 0, BX_IA_ERROR } }; +static const BxOpcodeInfo_t BxOpcodeGroupEVEX_0f3828[3] = { + /* 66 */ { BxVexW1, BX_IA_V512_VPMULDQ_VdqHdqWdq }, + /* F3 */ { BxAliasVexW, BX_IA_V512_VPMOVM2B_VdqKEq }, + /* F2 */ { 0, BX_IA_ERROR } +}; + +static const BxOpcodeInfo_t BxOpcodeGroupEVEX_0f3829[3] = { + /* 66 */ { BxVexW1, BX_IA_V512_VPCMPEQQ_KGbHdqWdq }, + /* F3 */ { BxAliasVexW, BX_IA_V512_VPMOVB2M_KGqWdq }, + /* F2 */ { 0, BX_IA_ERROR } +}; + static const BxOpcodeInfo_t BxOpcodeGroupEVEX_0f382a[3] = { /* 66 */ { BxVexW0, BX_IA_V512_VMOVNTDQA_VdqMdq }, /* F3 */ { BxVexW1, BX_IA_V512_VPBROADCASTMB2Q_VdqKEb }, @@ -682,6 +694,24 @@ static const BxOpcodeInfo_t BxOpcodeGroupEVEX_0f3835_Mask[3] = { /* F2 */ { 0, BX_IA_ERROR } }; +static const BxOpcodeInfo_t BxOpcodeGroupEVEX_0f3838[3] = { + /* 66 */ { 0, BX_IA_V512_VPMINSB_VdqHdqWdq }, + /* F3 */ { BxAliasVexW, BX_IA_V512_VPMOVM2D_VdqKEw }, + /* F2 */ { 0, BX_IA_ERROR } +}; + +static const BxOpcodeInfo_t BxOpcodeGroupEVEX_0f3839[3] = { + /* 66 */ { BxAliasVexW, BX_IA_V512_VPMINSD_VdqHdqWdq }, + /* F3 */ { BxAliasVexW, BX_IA_V512_VPMOVD2M_KGwWdq }, + /* F2 */ { 0, BX_IA_ERROR } +}; + +static const BxOpcodeInfo_t BxOpcodeGroupEVEX_0f383a[3] = { + /* 66 */ { 0, BX_IA_V512_VPMINUW_VdqHdqWdq }, + /* F3 */ { BxVexW1, BX_IA_V512_VPBROADCASTMW2D_VdqKEw }, + /* F2 */ { 0, BX_IA_ERROR } +}; + /* ************************************************************************ */ /* ******** */ @@ -1360,9 +1390,9 @@ static const BxOpcodeInfo_t BxOpcodeTableEVEX[256*3*2] = { /* 26 */ { BxPrefixSSE, BX_IA_ERROR, BxOpcodeGroupEVEX_0f3826 }, /* 27 k0 */ { BxPrefixSSE, BX_IA_ERROR, BxOpcodeGroupEVEX_0f3827 }, /* 27 */ { BxPrefixSSE, BX_IA_ERROR, BxOpcodeGroupEVEX_0f3827 }, - /* 28 k0 */ { BxVexW1 | BxPrefixSSE66, BX_IA_V512_VPMULDQ_VdqHdqWdq }, + /* 28 k0 */ { BxPrefixSSE, BX_IA_ERROR, BxOpcodeGroupEVEX_0f3828 }, /* 28 */ { BxVexW1 | BxPrefixSSE66, BX_IA_V512_VPMULDQ_VdqHdqWdq_Kmask }, - /* 29 k0 */ { BxVexW1 | BxPrefixSSE66, BX_IA_V512_VPCMPEQQ_KGbHdqWdq }, + /* 29 k0 */ { BxPrefixSSE, BX_IA_ERROR, BxOpcodeGroupEVEX_0f3829 }, /* 29 */ { BxVexW1 | BxPrefixSSE66, BX_IA_V512_VPCMPEQQ_KGbHdqWdq }, /* 2A k0 */ { BxPrefixSSE, BX_IA_ERROR, BxOpcodeGroupEVEX_0f382a }, /* 2A */ { 0, BX_IA_ERROR }, // #UD @@ -1392,12 +1422,12 @@ static const BxOpcodeInfo_t BxOpcodeTableEVEX[256*3*2] = { /* 36 */ { BxAliasVexW | BxPrefixSSE66 | BxVexL1, BX_IA_V512_VPERMD_VdqHdqWdq_Kmask }, /* 37 k0 */ { BxVexW1 | BxPrefixSSE66, BX_IA_V512_VPCMPGTQ_KGbHdqWdq }, /* 37 */ { BxVexW1 | BxPrefixSSE66, BX_IA_V512_VPCMPGTQ_KGbHdqWdq }, - /* 38 k0 */ { 0, BX_IA_ERROR }, - /* 38 */ { 0, BX_IA_ERROR }, - /* 39 k0 */ { BxAliasVexW | BxPrefixSSE66, BX_IA_V512_VPMINSD_VdqHdqWdq }, + /* 38 k0 */ { BxPrefixSSE, BX_IA_ERROR, BxOpcodeGroupEVEX_0f3838 }, + /* 38 */ { BxPrefixSSE66, BX_IA_V512_VPMINSB_VdqHdqWdq_Kmask }, + /* 39 k0 */ { BxPrefixSSE, BX_IA_ERROR, BxOpcodeGroupEVEX_0f3839 }, /* 39 */ { BxAliasVexW | BxPrefixSSE66, BX_IA_V512_VPMINSD_VdqHdqWdq_Kmask }, - /* 3A k0 */ { BxVexW1 | BxPrefixSSEF3, BX_IA_V512_VPBROADCASTMW2D_VdqKEw }, - /* 3A */ { 0, BX_IA_ERROR }, // #UD + /* 3A k0 */ { BxPrefixSSE, BX_IA_ERROR, BxOpcodeGroupEVEX_0f383a }, + /* 3A */ { BxPrefixSSE66, BX_IA_V512_VPMINUW_VdqHdqWdq_Kmask }, /* 3B k0 */ { BxAliasVexW | BxPrefixSSE66, BX_IA_V512_VPMINUD_VdqHdqWdq }, /* 3B */ { BxAliasVexW | BxPrefixSSE66, BX_IA_V512_VPMINUD_VdqHdqWdq_Kmask }, /* 3C k0 */ { BxPrefixSSE66, BX_IA_V512_VPMAXSB_VdqHdqWdq }, diff --git a/bochs/cpu/ia_opcodes.h b/bochs/cpu/ia_opcodes.h index 1506a9938..d480704c6 100644 --- a/bochs/cpu/ia_opcodes.h +++ b/bochs/cpu/ia_opcodes.h @@ -2926,17 +2926,21 @@ bx_define_opcode(BX_IA_V512_VPADDW_VdqHdqWdq_Kmask, &BX_CPU_C::LOAD_Vector, &BX_ bx_define_opcode(BX_IA_V512_VPADDSW_VdqHdqWdq_Kmask, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VPADDSW_MASK_VdqHdqWdqR, BX_ISA_AVX512_BW, OP_Vdq, OP_Hdq, OP_mVdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST) bx_define_opcode(BX_IA_V512_VPADDUSW_VdqHdqWdq_Kmask, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VPADDUSW_MASK_VdqHdqWdqR, BX_ISA_AVX512_BW, OP_Vdq, OP_Hdq, OP_mVdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST) +bx_define_opcode(BX_IA_V512_VPMINSB_VdqHdqWdq, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VPMINSB_VdqHdqWdqR, BX_ISA_AVX512_BW, OP_Vdq, OP_Hdq, OP_mVdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST) bx_define_opcode(BX_IA_V512_VPMINUB_VdqHdqWdq, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VPMINUB_VdqHdqWdqR, BX_ISA_AVX512_BW, OP_Vdq, OP_Hdq, OP_mVdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST) bx_define_opcode(BX_IA_V512_VPMAXUB_VdqHdqWdq, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VPMAXUB_VdqHdqWdqR, BX_ISA_AVX512_BW, OP_Vdq, OP_Hdq, OP_mVdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST) bx_define_opcode(BX_IA_V512_VPMAXSB_VdqHdqWdq, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VPMAXSB_VdqHdqWdqR, BX_ISA_AVX512_BW, OP_Vdq, OP_Hdq, OP_mVdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST) bx_define_opcode(BX_IA_V512_VPMINSW_VdqHdqWdq, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VPMINSW_VdqHdqWdqR, BX_ISA_AVX512_BW, OP_Vdq, OP_Hdq, OP_mVdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST) +bx_define_opcode(BX_IA_V512_VPMINUW_VdqHdqWdq, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VPMINUW_VdqHdqWdqR, BX_ISA_AVX512_BW, OP_Vdq, OP_Hdq, OP_mVdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST) bx_define_opcode(BX_IA_V512_VPMAXSW_VdqHdqWdq, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VPMAXSW_VdqHdqWdqR, BX_ISA_AVX512_BW, OP_Vdq, OP_Hdq, OP_mVdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST) bx_define_opcode(BX_IA_V512_VPMAXUW_VdqHdqWdq, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VPMAXUW_VdqHdqWdqR, BX_ISA_AVX512_BW, OP_Vdq, OP_Hdq, OP_mVdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST) +bx_define_opcode(BX_IA_V512_VPMINSB_VdqHdqWdq_Kmask, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VPMINSB_MASK_VdqHdqWdqR, BX_ISA_AVX512_BW, OP_Vdq, OP_Hdq, OP_mVdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST) bx_define_opcode(BX_IA_V512_VPMINUB_VdqHdqWdq_Kmask, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VPMINUB_MASK_VdqHdqWdqR, BX_ISA_AVX512_BW, OP_Vdq, OP_Hdq, OP_mVdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST) bx_define_opcode(BX_IA_V512_VPMAXUB_VdqHdqWdq_Kmask, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VPMAXUB_MASK_VdqHdqWdqR, BX_ISA_AVX512_BW, OP_Vdq, OP_Hdq, OP_mVdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST) bx_define_opcode(BX_IA_V512_VPMAXSB_VdqHdqWdq_Kmask, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VPMAXSB_MASK_VdqHdqWdqR, BX_ISA_AVX512_BW, OP_Vdq, OP_Hdq, OP_mVdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST) bx_define_opcode(BX_IA_V512_VPMINSW_VdqHdqWdq_Kmask, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VPMINSW_MASK_VdqHdqWdqR, BX_ISA_AVX512_BW, OP_Vdq, OP_Hdq, OP_mVdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST) +bx_define_opcode(BX_IA_V512_VPMINUW_VdqHdqWdq_Kmask, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VPMINUW_MASK_VdqHdqWdqR, BX_ISA_AVX512_BW, OP_Vdq, OP_Hdq, OP_mVdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST) bx_define_opcode(BX_IA_V512_VPMAXSW_VdqHdqWdq_Kmask, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VPMAXSW_MASK_VdqHdqWdqR, BX_ISA_AVX512_BW, OP_Vdq, OP_Hdq, OP_mVdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST) bx_define_opcode(BX_IA_V512_VPMAXUW_VdqHdqWdq_Kmask, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VPMAXUW_MASK_VdqHdqWdqR, BX_ISA_AVX512_BW, OP_Vdq, OP_Hdq, OP_mVdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST) @@ -3537,6 +3541,16 @@ bx_define_opcode(BX_IA_V512_VPCONFLICTQ_VdqWdq_Kmask, &BX_CPU_C::LOAD_BROADCAST_ bx_define_opcode(BX_IA_V512_VPLZCNTD_VdqWdq_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorD, &BX_CPU_C::VPLZCNTD_MASK_VdqWdqR, BX_ISA_AVX512_CD, OP_Vdq, OP_mVdq32, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE) bx_define_opcode(BX_IA_V512_VPLZCNTQ_VdqWdq_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorQ, &BX_CPU_C::VPLZCNTQ_MASK_VdqWdqR, BX_ISA_AVX512_CD, OP_Vdq, OP_mVdq64, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE) + +bx_define_opcode(BX_IA_V512_VPMOVM2B_VdqKEq, &BX_CPU_C::BxError, &BX_CPU_C::VPMOVM2B_VdqKEqR, BX_ISA_AVX512_BW, OP_Vdq, OP_KEq, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST) +bx_define_opcode(BX_IA_V512_VPMOVM2W_VdqKEd, &BX_CPU_C::BxError, &BX_CPU_C::VPMOVM2W_VdqKEdR, BX_ISA_AVX512_BW, OP_Vdq, OP_KEd, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST) +bx_define_opcode(BX_IA_V512_VPMOVM2D_VdqKEw, &BX_CPU_C::BxError, &BX_CPU_C::VPMOVM2D_VdqKEwR, BX_ISA_AVX512_DQ, OP_Vdq, OP_KEw, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST) +bx_define_opcode(BX_IA_V512_VPMOVM2Q_VdqKEb, &BX_CPU_C::BxError, &BX_CPU_C::VPMOVM2Q_VdqKEbR, BX_ISA_AVX512_DQ, OP_Vdq, OP_KEb, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST) + +bx_define_opcode(BX_IA_V512_VPMOVB2M_KGqWdq, &BX_CPU_C::BxError, &BX_CPU_C::VPMOVB2M_KGqWdqR, BX_ISA_AVX512_BW, OP_KGq, OP_Wdq, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST) +bx_define_opcode(BX_IA_V512_VPMOVW2M_KGdWdq, &BX_CPU_C::BxError, &BX_CPU_C::VPMOVW2M_KGdWdqR, BX_ISA_AVX512_BW, OP_KGd, OP_Wdq, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST) +bx_define_opcode(BX_IA_V512_VPMOVD2M_KGwWdq, &BX_CPU_C::BxError, &BX_CPU_C::VPMOVD2M_KGwWdqR, BX_ISA_AVX512_DQ, OP_KGw, OP_Wdq, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST) +bx_define_opcode(BX_IA_V512_VPMOVQ2M_KGbWdq, &BX_CPU_C::BxError, &BX_CPU_C::VPMOVQ2M_KGbWdqR, BX_ISA_AVX512_DQ, OP_KGb, OP_Wdq, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST) // VexW alias // VexW64 aliased diff --git a/bochs/cpu/simd_int.h b/bochs/cpu/simd_int.h index 0f0fc1dde..00fcf4498 100644 --- a/bochs/cpu/simd_int.h +++ b/bochs/cpu/simd_int.h @@ -454,7 +454,7 @@ BX_CPP_INLINE void xmm_psignd(BxPackedXmmRegister *op1, const BxPackedXmmRegiste BX_CPP_INLINE Bit32u xmm_pmovmskb(const BxPackedXmmRegister *op) { - unsigned mask = 0; + Bit32u mask = 0; if(op->xmmsbyte(0x0) < 0) mask |= 0x0001; if(op->xmmsbyte(0x1) < 0) mask |= 0x0002; @@ -476,9 +476,25 @@ BX_CPP_INLINE Bit32u xmm_pmovmskb(const BxPackedXmmRegister *op) return mask; } +BX_CPP_INLINE Bit32u xmm_pmovmskw(const BxPackedXmmRegister *op) +{ + Bit32u mask = 0; + + if(op->xmm16s(0) < 0) mask |= 0x01; + if(op->xmm16s(1) < 0) mask |= 0x02; + if(op->xmm16s(2) < 0) mask |= 0x04; + if(op->xmm16s(3) < 0) mask |= 0x08; + if(op->xmm16s(4) < 0) mask |= 0x10; + if(op->xmm16s(5) < 0) mask |= 0x20; + if(op->xmm16s(6) < 0) mask |= 0x40; + if(op->xmm16s(7) < 0) mask |= 0x80; + + return mask; +} + BX_CPP_INLINE Bit32u xmm_pmovmskd(const BxPackedXmmRegister *op) { - unsigned mask = 0; + Bit32u mask = 0; if(op->xmm32s(0) < 0) mask |= 0x1; if(op->xmm32s(1) < 0) mask |= 0x2; @@ -490,7 +506,7 @@ BX_CPP_INLINE Bit32u xmm_pmovmskd(const BxPackedXmmRegister *op) BX_CPP_INLINE Bit32u xmm_pmovmskq(const BxPackedXmmRegister *op) { - unsigned mask = 0; + Bit32u mask = 0; if(op->xmm32s(1) < 0) mask |= 0x1; if(op->xmm32s(3) < 0) mask |= 0x2; @@ -498,6 +514,34 @@ BX_CPP_INLINE Bit32u xmm_pmovmskq(const BxPackedXmmRegister *op) return mask; } +BX_CPP_INLINE void xmm_pmovm2b(BxPackedXmmRegister *dst, Bit32u mask) +{ + for (unsigned n=0; n < 16; n++, mask >>= 1) { + dst->xmmsbyte(n) = - (mask & 0x1); + } +} + +BX_CPP_INLINE void xmm_pmovm2w(BxPackedXmmRegister *dst, Bit32u mask) +{ + for (unsigned n=0; n < 8; n++, mask >>= 1) { + dst->xmm16s(n) = - (mask & 0x1); + } +} + +BX_CPP_INLINE void xmm_pmovm2d(BxPackedXmmRegister *dst, Bit32u mask) +{ + for (unsigned n=0; n < 4; n++, mask >>= 1) { + dst->xmm32s(n) = - (mask & 0x1); + } +} + +BX_CPP_INLINE void xmm_pmovm2q(BxPackedXmmRegister *dst, Bit32u mask) +{ + for (unsigned n=0; n < 2; n++, mask >>= 1) { + dst->xmm64s(n) = - (mask & 0x1); + } +} + // blend BX_CPP_INLINE void xmm_pblendb(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2, Bit32u mask)