diff --git a/bochs/cpu/avx.cc b/bochs/cpu/avx.cc index 613fb6fc6..ae2770b95 100644 --- a/bochs/cpu/avx.cc +++ b/bochs/cpu/avx.cc @@ -390,13 +390,6 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VBROADCASTF128_VdqMdq(bxInstructio BxPackedAvxRegister dst; BxPackedXmmRegister src; unsigned len = i->getVL(); - -#if BX_SUPPORT_EVEX - if (len == BX_VL128) { - BX_ERROR(("%s: vector length must be >= 256 bit", i->getIaOpcodeNameShort())); - exception(BX_UD_EXCEPTION, 0); - } -#endif bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i)); read_virtual_xmmword(i->seg(), eaddr, (Bit8u*) &src); @@ -497,7 +490,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VINSERTF128_VdqHdqWdqIbR(bxInstruc { BxPackedYmmRegister op1 = BX_READ_YMM_REG(i->src1()); - op1.ymm128(i->Ib() & 1) = BX_READ_XMM_REG(i->src2()); + op1.ymm128(i->Ib() & 0x1) = BX_READ_XMM_REG(i->src2()); BX_WRITE_YMM_REGZ(i->dst(), op1); diff --git a/bochs/cpu/avx512.cc b/bochs/cpu/avx512.cc index bbf069be8..c99ee26bc 100644 --- a/bochs/cpu/avx512.cc +++ b/bochs/cpu/avx512.cc @@ -519,6 +519,50 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VSHUFF64x2_MASK_VpdHpdWpdIbR(bxIns BX_NEXT_INSTR(i); } +BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VINSERTF32x4_MASK_VpsHpsWpsIbR(bxInstruction_c *i) +{ + unsigned len = i->getVL(); + if (len != BX_VL512) { + BX_ERROR(("%s: vector length must be 512 bit", i->getIaOpcodeNameShort())); + exception(BX_UD_EXCEPTION, 0); + } + + BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1()); + + op1.vmm128(i->Ib() & 0x3) = BX_READ_XMM_REG(i->src2()); + + if (i->opmask()) { + avx512_write_regd_masked(i, &op1, len, BX_READ_16BIT_OPMASK(i->opmask())); + } + else { + BX_WRITE_AVX_REGZ(i->dst(), op1, len); + } + + BX_NEXT_INSTR(i); +} + +BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VINSERTF64x4_MASK_VpdHpdWpdIbR(bxInstruction_c *i) +{ + unsigned len = i->getVL(); + if (len != BX_VL512) { + BX_ERROR(("%s: vector length must be 512 bit", i->getIaOpcodeNameShort())); + exception(BX_UD_EXCEPTION, 0); + } + + BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1()); + + op1.vmm256(i->Ib() & 0x1) = BX_READ_YMM_REG(i->src2()); + + if (i->opmask()) { + avx512_write_regq_masked(i, &op1, len, BX_READ_8BIT_OPMASK(i->opmask())); + } + else { + BX_WRITE_AVX_REGZ(i->dst(), op1, len); + } + + BX_NEXT_INSTR(i); +} + // broadcast BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPBROADCASTD_MASK_VdqWdR(bxInstruction_c *i) @@ -589,13 +633,6 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VBROADCASTF32x4_MASK_VpsMps(bxInst BxPackedXmmRegister src; unsigned len = i->getVL(); -#if BX_SUPPORT_EVEX - if (len == BX_VL128) { - BX_ERROR(("%s: vector length must be >= 256 bit", i->getIaOpcodeNameShort())); - exception(BX_UD_EXCEPTION, 0); - } -#endif - Bit32u opmask = BX_READ_16BIT_OPMASK(i->opmask()); if (opmask != 0) { bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i)); diff --git a/bochs/cpu/cpu.h b/bochs/cpu/cpu.h index 5db83b4d6..bece299bf 100644 --- a/bochs/cpu/cpu.h +++ b/bochs/cpu/cpu.h @@ -3374,6 +3374,11 @@ public: // for now... BX_SMF BX_INSF_TYPE VSHUFF32x4_MASK_VpsHpsWpsIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); BX_SMF BX_INSF_TYPE VSHUFF64x2_MASK_VpdHpdWpdIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); + BX_SMF BX_INSF_TYPE VINSERTF32x4_MASK_VpsHpsWpsIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); + BX_SMF BX_INSF_TYPE VINSERTI32x4_MASK_VdqHdqWdqIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); + BX_SMF BX_INSF_TYPE VINSERTF64x4_MASK_VpdHpdWpdIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); + BX_SMF BX_INSF_TYPE VINSERTI64x4_MASK_VdqHdqWdqIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); + BX_SMF BX_INSF_TYPE VPBROADCASTD_MASK_VdqWdR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); BX_SMF BX_INSF_TYPE VPBROADCASTQ_MASK_VdqWqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); diff --git a/bochs/cpu/fetchdecode_evex.h b/bochs/cpu/fetchdecode_evex.h index ddf36e2e1..850a378e9 100644 --- a/bochs/cpu/fetchdecode_evex.h +++ b/bochs/cpu/fetchdecode_evex.h @@ -956,10 +956,10 @@ static const BxOpcodeInfo_t BxOpcodeTableEVEX[256*3*2] = { /* 18 */ { BxVexW0 | BxPrefixSSE66, BX_IA_V512_VBROADCASTSS_VpsWss_Kmask }, /* 19 k0 */ { BxVexW1 | BxPrefixSSE66, BX_IA_V512_VBROADCASTSD_VpdWsd }, /* 19 */ { BxVexW1 | BxPrefixSSE66, BX_IA_V512_VBROADCASTSD_VpdWsd_Kmask }, - /* 1A k0 */ { BxVexW0 | BxPrefixSSE66, BX_IA_V512_VBROADCASTF32x4_VpsWps }, - /* 1A */ { BxVexW0 | BxPrefixSSE66, BX_IA_V512_VBROADCASTF32x4_VpsWps_Kmask }, - /* 1B k0 */ { BxVexW1 | BxPrefixSSE66, BX_IA_V512_VBROADCASTF64x4_VpdWpd }, - /* 1B */ { BxVexW1 | BxPrefixSSE66, BX_IA_V512_VBROADCASTF64x4_VpdWpd_Kmask }, + /* 1A k0 */ { BxVexW0 | BxVexL1 | BxPrefixSSE66, BX_IA_V512_VBROADCASTF32x4_VpsWps }, + /* 1A */ { BxVexW0 | BxVexL1 | BxPrefixSSE66, BX_IA_V512_VBROADCASTF32x4_VpsWps_Kmask }, + /* 1B k0 */ { BxVexW1 | BxVexL1 | BxPrefixSSE66, BX_IA_V512_VBROADCASTF64x4_VpdWpd }, + /* 1B */ { BxVexW1 | BxVexL1 | BxPrefixSSE66, BX_IA_V512_VBROADCASTF64x4_VpdWpd_Kmask }, /* 1C k0 */ { 0, BX_IA_ERROR }, /* 1C */ { 0, BX_IA_ERROR }, /* 1D k0 */ { 0, BX_IA_ERROR }, @@ -1084,10 +1084,10 @@ static const BxOpcodeInfo_t BxOpcodeTableEVEX[256*3*2] = { /* 58 */ { BxVexW0 | BxPrefixSSE66, BX_IA_V512_VPBROADCASTD_VdqWd_Kmask }, /* 59 k0 */ { BxVexW1 | BxPrefixSSE66, BX_IA_V512_VPBROADCASTQ_VdqWq }, /* 59 */ { BxVexW1 | BxPrefixSSE66, BX_IA_V512_VPBROADCASTQ_VdqWq_Kmask }, - /* 5A k0 */ { BxVexW0 | BxPrefixSSE66, BX_IA_V512_VBROADCASTI32x4_VdqWdq }, - /* 5A */ { BxVexW0 | BxPrefixSSE66, BX_IA_V512_VBROADCASTI32x4_VdqWdq_Kmask }, - /* 5B k0 */ { BxVexW1 | BxPrefixSSE66, BX_IA_V512_VBROADCASTI64x4_VdqWdq }, - /* 5B */ { BxVexW1 | BxPrefixSSE66, BX_IA_V512_VBROADCASTI64x4_VdqWdq_Kmask }, + /* 5A k0 */ { BxVexW0 | BxVexL1 | BxPrefixSSE66, BX_IA_V512_VBROADCASTI32x4_VdqWdq }, + /* 5A */ { BxVexW0 | BxVexL1 | BxPrefixSSE66, BX_IA_V512_VBROADCASTI32x4_VdqWdq_Kmask }, + /* 5B k0 */ { BxVexW1 | BxVexL1 | BxPrefixSSE66, BX_IA_V512_VBROADCASTI64x4_VdqWdq }, + /* 5B */ { BxVexW1 | BxVexL1 | BxPrefixSSE66, BX_IA_V512_VBROADCASTI64x4_VdqWdq_Kmask }, /* 5C k0 */ { 0, BX_IA_ERROR }, /* 5C */ { 0, BX_IA_ERROR }, /* 5D k0 */ { 0, BX_IA_ERROR }, @@ -1466,12 +1466,12 @@ static const BxOpcodeInfo_t BxOpcodeTableEVEX[256*3*2] = { /* 16 */ { 0, BX_IA_ERROR }, /* 17 k0 */ { BxVexW0 | BxPrefixSSE66 | BxImmediate_Ib, BX_IA_V512_VEXTRACTPS_EdVpsIb }, /* 17 */ { 0, BX_IA_ERROR }, // #UD - /* 18 k0 */ { 0, BX_IA_ERROR }, - /* 18 */ { 0, BX_IA_ERROR }, + /* 18 k0 */ { BxVexW0 | BxVexL1 | BxPrefixSSE66 | BxImmediate_Ib, BX_IA_V512_VINSERTF32x4_VpsHpsWpsIb_Kmask }, + /* 18 */ { BxVexW0 | BxVexL1 | BxPrefixSSE66 | BxImmediate_Ib, BX_IA_V512_VINSERTF32x4_VpsHpsWpsIb_Kmask }, /* 19 k0 */ { 0, BX_IA_ERROR }, /* 19 */ { 0, BX_IA_ERROR }, - /* 1A k0 */ { 0, BX_IA_ERROR }, - /* 1A */ { 0, BX_IA_ERROR }, + /* 1A k0 */ { BxVexW0 | BxVexL1 | BxPrefixSSE66 | BxImmediate_Ib, BX_IA_V512_VINSERTF64x4_VpdHpdWpdIb_Kmask }, + /* 1A */ { BxVexW0 | BxVexL1 | BxPrefixSSE66 | BxImmediate_Ib, BX_IA_V512_VINSERTF64x4_VpdHpdWpdIb_Kmask }, /* 1B k0 */ { 0, BX_IA_ERROR }, /* 1B */ { 0, BX_IA_ERROR }, /* 1C k0 */ { 0, BX_IA_ERROR }, @@ -1488,8 +1488,8 @@ static const BxOpcodeInfo_t BxOpcodeTableEVEX[256*3*2] = { /* 21 */ { 0, BX_IA_ERROR }, // #UD /* 22 k0 */ { 0, BX_IA_ERROR }, /* 22 */ { 0, BX_IA_ERROR }, - /* 23 k0 */ { BxAliasVexW | BxPrefixSSE66 | BxImmediate_Ib, BX_IA_V512_VSHUFF32x4_VpsHpsWpsIb_Kmask }, - /* 23 */ { BxAliasVexW | BxPrefixSSE66 | BxImmediate_Ib, BX_IA_V512_VSHUFF32x4_VpsHpsWpsIb_Kmask }, + /* 23 k0 */ { BxAliasVexW | BxVexL1 | BxPrefixSSE66 | BxImmediate_Ib, BX_IA_V512_VSHUFF32x4_VpsHpsWpsIb_Kmask }, + /* 23 */ { BxAliasVexW | BxVexL1 | BxPrefixSSE66 | BxImmediate_Ib, BX_IA_V512_VSHUFF32x4_VpsHpsWpsIb_Kmask }, /* 24 k0 */ { 0, BX_IA_ERROR }, /* 24 */ { 0, BX_IA_ERROR }, /* 25 k0 */ { BxAliasVexW | BxPrefixSSE66 | BxImmediate_Ib, BX_IA_V512_VPTERNLOGD_VdqHdqWdqIb }, @@ -1530,12 +1530,12 @@ static const BxOpcodeInfo_t BxOpcodeTableEVEX[256*3*2] = { /* 36 */ { 0, BX_IA_ERROR }, /* 37 k0 */ { 0, BX_IA_ERROR }, /* 37 */ { 0, BX_IA_ERROR }, - /* 38 k0 */ { 0, BX_IA_ERROR }, - /* 38 */ { 0, BX_IA_ERROR }, + /* 38 k0 */ { BxVexW0 | BxVexL1 | BxPrefixSSE66 | BxImmediate_Ib, BX_IA_V512_VINSERTI32x4_VdqHdqWdqIb_Kmask }, + /* 38 */ { BxVexW0 | BxVexL1 | BxPrefixSSE66 | BxImmediate_Ib, BX_IA_V512_VINSERTI32x4_VdqHdqWdqIb_Kmask }, /* 39 k0 */ { 0, BX_IA_ERROR }, /* 39 */ { 0, BX_IA_ERROR }, - /* 3A k0 */ { 0, BX_IA_ERROR }, - /* 3A */ { 0, BX_IA_ERROR }, + /* 3A k0 */ { BxVexW0 | BxVexL1 | BxPrefixSSE66 | BxImmediate_Ib, BX_IA_V512_VINSERTI64x4_VdqHdqWdqIb_Kmask }, + /* 3A */ { BxVexW0 | BxVexL1 | BxPrefixSSE66 | BxImmediate_Ib, BX_IA_V512_VINSERTI64x4_VdqHdqWdqIb_Kmask }, /* 3B k0 */ { 0, BX_IA_ERROR }, /* 3B */ { 0, BX_IA_ERROR }, /* 3C k0 */ { 0, BX_IA_ERROR }, @@ -1552,8 +1552,8 @@ static const BxOpcodeInfo_t BxOpcodeTableEVEX[256*3*2] = { /* 41 */ { 0, BX_IA_ERROR }, /* 42 k0 */ { 0, BX_IA_ERROR }, /* 42 */ { 0, BX_IA_ERROR }, - /* 43 k0 */ { BxAliasVexW | BxPrefixSSE66 | BxImmediate_Ib, BX_IA_V512_VSHUFI32x4_VdqHdqWdqIb_Kmask }, - /* 43 */ { BxAliasVexW | BxPrefixSSE66 | BxImmediate_Ib, BX_IA_V512_VSHUFI32x4_VdqHdqWdqIb_Kmask }, + /* 43 k0 */ { BxAliasVexW | BxVexL1 | BxPrefixSSE66 | BxImmediate_Ib, BX_IA_V512_VSHUFI32x4_VdqHdqWdqIb_Kmask }, + /* 43 */ { BxAliasVexW | BxVexL1 | BxPrefixSSE66 | BxImmediate_Ib, BX_IA_V512_VSHUFI32x4_VdqHdqWdqIb_Kmask }, /* 44 k0 */ { 0, BX_IA_ERROR }, /* 44 */ { 0, BX_IA_ERROR }, /* 45 k0 */ { 0, BX_IA_ERROR }, diff --git a/bochs/cpu/ia_opcodes.h b/bochs/cpu/ia_opcodes.h index f346141b5..074e33e3b 100644 --- a/bochs/cpu/ia_opcodes.h +++ b/bochs/cpu/ia_opcodes.h @@ -2682,6 +2682,12 @@ bx_define_opcode(BX_IA_V512_VMOVQ_VqWq, &BX_CPU_C::MOVSD_VsdWsdM, &BX_CPU_C::MOV bx_define_opcode(BX_IA_V512_VINSERTPS_VpsWssIb, &BX_CPU_C::INSERTPS_VpsHpsWssIb, &BX_CPU_C::INSERTPS_VpsHpsWssIb, BX_ISA_AVX512, OP_Vps, OP_Hps, OP_Wss, OP_Ib, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST) bx_define_opcode(BX_IA_V512_VEXTRACTPS_EdVpsIb, &BX_CPU_C::EXTRACTPS_EdVpsIbM, &BX_CPU_C::EXTRACTPS_EdVpsIbR, BX_ISA_AVX512, OP_Ed, OP_Vps, OP_Ib, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST) +bx_define_opcode(BX_IA_V512_VINSERTF32x4_VpsHpsWpsIb_Kmask, &BX_CPU_C::LOADU_Wdq, &BX_CPU_C::VINSERTF32x4_MASK_VpsHpsWpsIbR, BX_ISA_AVX512, OP_Vps, OP_Hps, OP_Wps, OP_Ib, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST) +bx_define_opcode(BX_IA_V512_VINSERTF64x4_VpdHpdWpdIb_Kmask, &BX_CPU_C::LOAD_Half_Vector, &BX_CPU_C::VINSERTF64x4_MASK_VpdHpdWpdIbR, BX_ISA_AVX512, OP_Vpd, OP_Hpd, OP_Wpd, OP_Ib, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST) + +bx_define_opcode(BX_IA_V512_VINSERTI32x4_VdqHdqWdqIb_Kmask, &BX_CPU_C::LOADU_Wdq, &BX_CPU_C::VINSERTF32x4_MASK_VpsHpsWpsIbR, BX_ISA_AVX512, OP_Vdq, OP_Hdq, OP_Wdq, OP_Ib, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST) +bx_define_opcode(BX_IA_V512_VINSERTI64x4_VdqHdqWdqIb_Kmask, &BX_CPU_C::LOAD_Half_Vector, &BX_CPU_C::VINSERTF64x4_MASK_VpdHpdWpdIbR, BX_ISA_AVX512, OP_Vdq, OP_Hdq, OP_Wdq, OP_Ib, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST) + // VexW alias bx_define_opcode(BX_IA_V512_VPADDD_VdqHdqWdq, &BX_CPU_C::LOAD_BROADCAST_VectorD, &BX_CPU_C::VPADDD_VdqHdqWdqR, BX_ISA_AVX512, OP_Vdq, OP_Hdq, OP_Wdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE) bx_define_opcode(BX_IA_V512_VPADDQ_VdqHdqWdq, &BX_CPU_C::LOAD_BROADCAST_VectorQ, &BX_CPU_C::VPADDQ_VdqHdqWdqR, BX_ISA_AVX512, OP_Vdq, OP_Hdq, OP_Wdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE)