diff --git a/bochs/cpu/cpu.h b/bochs/cpu/cpu.h index e278cc58f..5a3227475 100644 --- a/bochs/cpu/cpu.h +++ b/bochs/cpu/cpu.h @@ -1954,8 +1954,14 @@ public: // for now... #if BX_SUPPORT_AVX BX_SMF void LOAD_Vector(bxInstruction_c *) BX_CPP_AttrRegparmN(1); BX_SMF void LOAD_Half_Vector(bxInstruction_c *) BX_CPP_AttrRegparmN(1); + BX_SMF void LOAD_MASK_Half_VectorB(bxInstruction_c *) BX_CPP_AttrRegparmN(1); + BX_SMF void LOAD_MASK_Half_VectorW(bxInstruction_c *) BX_CPP_AttrRegparmN(1); + BX_SMF void LOAD_MASK_Half_VectorD(bxInstruction_c *) BX_CPP_AttrRegparmN(1); BX_SMF void LOAD_Quarter_Vector(bxInstruction_c *) BX_CPP_AttrRegparmN(1); + BX_SMF void LOAD_MASK_Quarter_VectorB(bxInstruction_c *) BX_CPP_AttrRegparmN(1); + BX_SMF void LOAD_MASK_Quarter_VectorW(bxInstruction_c *) BX_CPP_AttrRegparmN(1); BX_SMF void LOAD_Eighth_Vector(bxInstruction_c *) BX_CPP_AttrRegparmN(1); + BX_SMF void LOAD_MASK_Eighth_VectorB(bxInstruction_c *) BX_CPP_AttrRegparmN(1); #endif #if BX_SUPPORT_EVEX BX_SMF void LOAD_MASK_Wss(bxInstruction_c *i) BX_CPP_AttrRegparmN(1); diff --git a/bochs/cpu/decoder/ia_opcodes.def b/bochs/cpu/decoder/ia_opcodes.def index 27a213eb9..8983c050b 100644 --- a/bochs/cpu/decoder/ia_opcodes.def +++ b/bochs/cpu/decoder/ia_opcodes.def @@ -2936,7 +2936,7 @@ bx_define_opcode(BX_IA_V512_VCVTTPD2DQ_VdqWpd, "vcvttpd2dq", "vcvttpd2dq", &BX_C bx_define_opcode(BX_IA_V512_VCVTTPD2DQ_VdqWpd_Kmask, "vcvttpd2dq", "vcvttpd2dq", &BX_CPU_C::LOAD_BROADCAST_MASK_VectorQ, &BX_CPU_C::VCVTTPD2DQ_MASK_VdqWpdR, BX_ISA_AVX512, OP_Vdq, OP_mVpd, OP_NONE, OP_NONE, BX_PREPARE_EVEX) bx_define_opcode(BX_IA_V512_VCVTPH2PS_VpsWps, "vcvtph2ps", "vcvtph2ps", &BX_CPU_C::LOAD_Half_Vector, &BX_CPU_C::VCVTPH2PS_VpsWpsR, BX_ISA_AVX512, OP_Vps, OP_mVHV, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_BROADCAST) -bx_define_opcode(BX_IA_V512_VCVTPH2PS_VpsWps_Kmask, "vcvtph2ps", "vcvtph2ps", &BX_CPU_C::LOAD_Half_Vector, &BX_CPU_C::VCVTPH2PS_MASK_VpsWpsR, BX_ISA_AVX512, OP_Vps, OP_mVHV, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_BROADCAST) +bx_define_opcode(BX_IA_V512_VCVTPH2PS_VpsWps_Kmask, "vcvtph2ps", "vcvtph2ps", &BX_CPU_C::LOAD_MASK_Half_VectorW, &BX_CPU_C::VCVTPH2PS_MASK_VpsWpsR, BX_ISA_AVX512, OP_Vps, OP_mVHV, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_BROADCAST) bx_define_opcode(BX_IA_V512_VCVTPS2PH_WpsVpsIb, "vcvtps2ph", "vcvtps2ph", &BX_CPU_C::VCVTPS2PH_WpsVpsIb, &BX_CPU_C::VCVTPS2PH_WpsVpsIb, BX_ISA_AVX512, OP_mVHV, OP_Vps, OP_Ib, OP_NONE, BX_PREPARE_EVEX_NO_BROADCAST) bx_define_opcode(BX_IA_V512_VCVTPS2PH_WpsVpsIb_Kmask, "vcvtps2ph", "vcvtps2ph", &BX_CPU_C::VCVTPS2PH_MASK_WpsVpsIbM, &BX_CPU_C::VCVTPS2PH_MASK_WpsVpsIbR, BX_ISA_AVX512, OP_mVHV, OP_Vps, OP_Ib, OP_NONE, BX_PREPARE_EVEX_NO_BROADCAST) @@ -3168,12 +3168,12 @@ bx_define_opcode(BX_IA_V512_VPMOVSXWD_VdqWdq, "vpmovsxwd", "vpmovsxwd", &BX_CPU_ bx_define_opcode(BX_IA_V512_VPMOVSXWQ_VdqWdq, "vpmovsxwq", "vpmovsxwq", &BX_CPU_C::LOAD_Quarter_Vector, &BX_CPU_C::VPMOVSXWQ_VdqWdqR, BX_ISA_AVX512, OP_Vdq, OP_mVQV, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST) bx_define_opcode(BX_IA_V512_VPMOVSXDQ_VdqWdq, "vpmovsxdq", "vpmovsxdq", &BX_CPU_C::LOAD_Half_Vector, &BX_CPU_C::VPMOVSXDQ_VdqWdqR, BX_ISA_AVX512, OP_Vdq, OP_mVHV, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST) -bx_define_opcode(BX_IA_V512_VPMOVSXBW_VdqWdq_Kmask, "vpmovsxbw", "vpmovsxbw", &BX_CPU_C::LOAD_Half_Vector, &BX_CPU_C::VPMOVSXBW_MASK_VdqWdqR, BX_ISA_AVX512_BW, OP_Vdq, OP_mVHV, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST) -bx_define_opcode(BX_IA_V512_VPMOVSXBD_VdqWdq_Kmask, "vpmovsxbd", "vpmovsxbd", &BX_CPU_C::LOAD_Quarter_Vector, &BX_CPU_C::VPMOVSXBD_MASK_VdqWdqR, BX_ISA_AVX512, OP_Vdq, OP_mVQV, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST) -bx_define_opcode(BX_IA_V512_VPMOVSXBQ_VdqWdq_Kmask, "vpmovsxbq", "vpmovsxbq", &BX_CPU_C::LOAD_Eighth_Vector, &BX_CPU_C::VPMOVSXBQ_MASK_VdqWdqR, BX_ISA_AVX512, OP_Vdq, OP_mVOV, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST) -bx_define_opcode(BX_IA_V512_VPMOVSXWD_VdqWdq_Kmask, "vpmovsxwd", "vpmovsxwd", &BX_CPU_C::LOAD_Half_Vector, &BX_CPU_C::VPMOVSXWD_MASK_VdqWdqR, BX_ISA_AVX512, OP_Vdq, OP_mVHV, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST) -bx_define_opcode(BX_IA_V512_VPMOVSXWQ_VdqWdq_Kmask, "vpmovsxwq", "vpmovsxwq", &BX_CPU_C::LOAD_Quarter_Vector, &BX_CPU_C::VPMOVSXWQ_MASK_VdqWdqR, BX_ISA_AVX512, OP_Vdq, OP_mVQV, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST) -bx_define_opcode(BX_IA_V512_VPMOVSXDQ_VdqWdq_Kmask, "vpmovsxdq", "vpmovsxdq", &BX_CPU_C::LOAD_Half_Vector, &BX_CPU_C::VPMOVSXDQ_MASK_VdqWdqR, BX_ISA_AVX512, OP_Vdq, OP_mVHV, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST) +bx_define_opcode(BX_IA_V512_VPMOVSXBW_VdqWdq_Kmask, "vpmovsxbw", "vpmovsxbw", &BX_CPU_C::LOAD_MASK_Half_VectorB, &BX_CPU_C::VPMOVSXBW_MASK_VdqWdqR, BX_ISA_AVX512_BW, OP_Vdq, OP_mVHV, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST) +bx_define_opcode(BX_IA_V512_VPMOVSXBD_VdqWdq_Kmask, "vpmovsxbd", "vpmovsxbd", &BX_CPU_C::LOAD_MASK_Quarter_VectorB, &BX_CPU_C::VPMOVSXBD_MASK_VdqWdqR, BX_ISA_AVX512, OP_Vdq, OP_mVQV, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST) +bx_define_opcode(BX_IA_V512_VPMOVSXBQ_VdqWdq_Kmask, "vpmovsxbq", "vpmovsxbq", &BX_CPU_C::LOAD_MASK_Eighth_VectorB, &BX_CPU_C::VPMOVSXBQ_MASK_VdqWdqR, BX_ISA_AVX512, OP_Vdq, OP_mVOV, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST) +bx_define_opcode(BX_IA_V512_VPMOVSXWD_VdqWdq_Kmask, "vpmovsxwd", "vpmovsxwd", &BX_CPU_C::LOAD_MASK_Half_VectorW, &BX_CPU_C::VPMOVSXWD_MASK_VdqWdqR, BX_ISA_AVX512, OP_Vdq, OP_mVHV, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST) +bx_define_opcode(BX_IA_V512_VPMOVSXWQ_VdqWdq_Kmask, "vpmovsxwq", "vpmovsxwq", &BX_CPU_C::LOAD_MASK_Quarter_VectorW, &BX_CPU_C::VPMOVSXWQ_MASK_VdqWdqR, BX_ISA_AVX512, OP_Vdq, OP_mVQV, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST) +bx_define_opcode(BX_IA_V512_VPMOVSXDQ_VdqWdq_Kmask, "vpmovsxdq", "vpmovsxdq", &BX_CPU_C::LOAD_MASK_Half_VectorD, &BX_CPU_C::VPMOVSXDQ_MASK_VdqWdqR, BX_ISA_AVX512, OP_Vdq, OP_mVHV, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST) bx_define_opcode(BX_IA_V512_VPMOVZXBW_VdqWdq, "vpmovzxbw", "vpmovzxbw", &BX_CPU_C::LOAD_Half_Vector, &BX_CPU_C::VPMOVZXBW_VdqWdqR, BX_ISA_AVX512_BW, OP_Vdq, OP_mVHV, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST) bx_define_opcode(BX_IA_V512_VPMOVZXBD_VdqWdq, "vpmovzxbd", "vpmovzxbd", &BX_CPU_C::LOAD_Quarter_Vector, &BX_CPU_C::VPMOVZXBD_VdqWdqR, BX_ISA_AVX512, OP_Vdq, OP_mVQV, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST) @@ -3182,12 +3182,12 @@ bx_define_opcode(BX_IA_V512_VPMOVZXWD_VdqWdq, "vpmovzxwd", "vpmovzxwd", &BX_CPU_ bx_define_opcode(BX_IA_V512_VPMOVZXWQ_VdqWdq, "vpmovzxwq", "vpmovzxwq", &BX_CPU_C::LOAD_Quarter_Vector, &BX_CPU_C::VPMOVZXWQ_VdqWdqR, BX_ISA_AVX512, OP_Vdq, OP_mVQV, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST) bx_define_opcode(BX_IA_V512_VPMOVZXDQ_VdqWdq, "vpmovzxdq", "vpmovzxdq", &BX_CPU_C::LOAD_Half_Vector, &BX_CPU_C::VPMOVZXDQ_VdqWdqR, BX_ISA_AVX512, OP_Vdq, OP_mVHV, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST) -bx_define_opcode(BX_IA_V512_VPMOVZXBW_VdqWdq_Kmask, "vpmovzxbw", "vpmovzxbw", &BX_CPU_C::LOAD_Half_Vector, &BX_CPU_C::VPMOVZXBW_MASK_VdqWdqR, BX_ISA_AVX512_BW, OP_Vdq, OP_mVHV, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST) -bx_define_opcode(BX_IA_V512_VPMOVZXBD_VdqWdq_Kmask, "vpmovzxbd", "vpmovzxbd", &BX_CPU_C::LOAD_Quarter_Vector, &BX_CPU_C::VPMOVZXBD_MASK_VdqWdqR, BX_ISA_AVX512, OP_Vdq, OP_mVQV, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST) -bx_define_opcode(BX_IA_V512_VPMOVZXBQ_VdqWdq_Kmask, "vpmovzxbq", "vpmovzxbq", &BX_CPU_C::LOAD_Eighth_Vector, &BX_CPU_C::VPMOVZXBQ_MASK_VdqWdqR, BX_ISA_AVX512, OP_Vdq, OP_mVOV, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST) -bx_define_opcode(BX_IA_V512_VPMOVZXWD_VdqWdq_Kmask, "vpmovzxwd", "vpmovzxwd", &BX_CPU_C::LOAD_Half_Vector, &BX_CPU_C::VPMOVZXWD_MASK_VdqWdqR, BX_ISA_AVX512, OP_Vdq, OP_mVHV, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST) -bx_define_opcode(BX_IA_V512_VPMOVZXWQ_VdqWdq_Kmask, "vpmovzxwq", "vpmovzxwq", &BX_CPU_C::LOAD_Quarter_Vector, &BX_CPU_C::VPMOVZXWQ_MASK_VdqWdqR, BX_ISA_AVX512, OP_Vdq, OP_mVQV, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST) -bx_define_opcode(BX_IA_V512_VPMOVZXDQ_VdqWdq_Kmask, "vpmovzxdq", "vpmovzxdq", &BX_CPU_C::LOAD_Half_Vector, &BX_CPU_C::VPMOVZXDQ_MASK_VdqWdqR, BX_ISA_AVX512, OP_Vdq, OP_mVHV, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST) +bx_define_opcode(BX_IA_V512_VPMOVZXBW_VdqWdq_Kmask, "vpmovzxbw", "vpmovzxbw", &BX_CPU_C::LOAD_MASK_Half_VectorB, &BX_CPU_C::VPMOVZXBW_MASK_VdqWdqR, BX_ISA_AVX512_BW, OP_Vdq, OP_mVHV, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST) +bx_define_opcode(BX_IA_V512_VPMOVZXBD_VdqWdq_Kmask, "vpmovzxbd", "vpmovzxbd", &BX_CPU_C::LOAD_MASK_Quarter_VectorB, &BX_CPU_C::VPMOVZXBD_MASK_VdqWdqR, BX_ISA_AVX512, OP_Vdq, OP_mVQV, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST) +bx_define_opcode(BX_IA_V512_VPMOVZXBQ_VdqWdq_Kmask, "vpmovzxbq", "vpmovzxbq", &BX_CPU_C::LOAD_MASK_Eighth_VectorB, &BX_CPU_C::VPMOVZXBQ_MASK_VdqWdqR, BX_ISA_AVX512, OP_Vdq, OP_mVOV, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST) +bx_define_opcode(BX_IA_V512_VPMOVZXWD_VdqWdq_Kmask, "vpmovzxwd", "vpmovzxwd", &BX_CPU_C::LOAD_MASK_Half_VectorW, &BX_CPU_C::VPMOVZXWD_MASK_VdqWdqR, BX_ISA_AVX512, OP_Vdq, OP_mVHV, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST) +bx_define_opcode(BX_IA_V512_VPMOVZXWQ_VdqWdq_Kmask, "vpmovzxwq", "vpmovzxwq", &BX_CPU_C::LOAD_MASK_Quarter_VectorW, &BX_CPU_C::VPMOVZXWQ_MASK_VdqWdqR, BX_ISA_AVX512, OP_Vdq, OP_mVQV, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST) +bx_define_opcode(BX_IA_V512_VPMOVZXDQ_VdqWdq_Kmask, "vpmovzxdq", "vpmovzxdq", &BX_CPU_C::LOAD_MASK_Half_VectorD, &BX_CPU_C::VPMOVZXDQ_MASK_VdqWdqR, BX_ISA_AVX512, OP_Vdq, OP_mVHV, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST) bx_define_opcode(BX_IA_V512_VPSUBB_VdqHdqWdq, "vpsubb", "vpsubb", &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VPSUBB_VdqHdqWdqR, BX_ISA_AVX512_BW, OP_Vdq, OP_Hdq, OP_mVdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST) bx_define_opcode(BX_IA_V512_VPSUBSB_VdqHdqWdq, "vpsubsb", "vpsubsb", &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VPSUBSB_VdqHdqWdqR, BX_ISA_AVX512_BW, OP_Vdq, OP_Hdq, OP_mVdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST) diff --git a/bochs/cpu/load.cc b/bochs/cpu/load.cc index b00b4a824..3212af05c 100644 --- a/bochs/cpu/load.cc +++ b/bochs/cpu/load.cc @@ -201,6 +201,57 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::LOAD_Half_Vector(bxInstruction_c *i) BX_CPU_CALL_METHOD(i->execute2(), (i)); } +void BX_CPP_AttrRegparmN(1) BX_CPU_C::LOAD_MASK_Half_VectorB(bxInstruction_c *i) +{ + Bit32u opmask = (i->opmask() != 0) ? BX_READ_32BIT_OPMASK(i->opmask()) : 0xffffffff; + unsigned len = i->getVL(); + opmask &= CUT_OPMASK_TO(BYTE_ELEMENTS(len)); + + if (opmask == 0) { + BX_CPU_CALL_METHOD(i->execute2(), (i)); // for now let execute method to deal with zero/merge masking semantics + return; + } + + bx_address eaddr = BX_CPU_RESOLVE_ADDR(i); + avx_masked_load8(i, eaddr, &BX_READ_AVX_REG(BX_VECTOR_TMP_REGISTER), opmask); + + BX_CPU_CALL_METHOD(i->execute2(), (i)); +} + +void BX_CPP_AttrRegparmN(1) BX_CPU_C::LOAD_MASK_Half_VectorW(bxInstruction_c *i) +{ + Bit32u opmask = (i->opmask() != 0) ? BX_READ_16BIT_OPMASK(i->opmask()) : 0xffff; + unsigned len = i->getVL(); + opmask &= CUT_OPMASK_TO(WORD_ELEMENTS(len)); + + if (opmask == 0) { + BX_CPU_CALL_METHOD(i->execute2(), (i)); // for now let execute method to deal with zero/merge masking semantics + return; + } + + bx_address eaddr = BX_CPU_RESOLVE_ADDR(i); + avx_masked_load16(i, eaddr, &BX_READ_AVX_REG(BX_VECTOR_TMP_REGISTER), opmask); + + BX_CPU_CALL_METHOD(i->execute2(), (i)); +} + +void BX_CPP_AttrRegparmN(1) BX_CPU_C::LOAD_MASK_Half_VectorD(bxInstruction_c *i) +{ + Bit32u opmask = (i->opmask() != 0) ? BX_READ_8BIT_OPMASK(i->opmask()) : 0xff; + unsigned len = i->getVL(); + opmask &= CUT_OPMASK_TO(DWORD_ELEMENTS(len)); + + if (opmask == 0) { + BX_CPU_CALL_METHOD(i->execute2(), (i)); // for now let execute method to deal with zero/merge masking semantics + return; + } + + bx_address eaddr = BX_CPU_RESOLVE_ADDR(i); + avx_masked_load32(i, eaddr, &BX_READ_AVX_REG(BX_VECTOR_TMP_REGISTER), opmask); + + BX_CPU_CALL_METHOD(i->execute2(), (i)); +} + void BX_CPP_AttrRegparmN(1) BX_CPU_C::LOAD_Quarter_Vector(bxInstruction_c *i) { bx_address eaddr = BX_CPU_RESOLVE_ADDR(i); @@ -226,6 +277,40 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::LOAD_Quarter_Vector(bxInstruction_c *i) BX_CPU_CALL_METHOD(i->execute2(), (i)); } +void BX_CPP_AttrRegparmN(1) BX_CPU_C::LOAD_MASK_Quarter_VectorB(bxInstruction_c *i) +{ + Bit32u opmask = (i->opmask() != 0) ? BX_READ_16BIT_OPMASK(i->opmask()) : 0xffff; + unsigned len = i->getVL(); + opmask &= CUT_OPMASK_TO(WORD_ELEMENTS(len)); + + if (opmask == 0) { + BX_CPU_CALL_METHOD(i->execute2(), (i)); // for now let execute method to deal with zero/merge masking semantics + return; + } + + bx_address eaddr = BX_CPU_RESOLVE_ADDR(i); + avx_masked_load8(i, eaddr, &BX_READ_AVX_REG(BX_VECTOR_TMP_REGISTER), opmask); + + BX_CPU_CALL_METHOD(i->execute2(), (i)); +} + +void BX_CPP_AttrRegparmN(1) BX_CPU_C::LOAD_MASK_Quarter_VectorW(bxInstruction_c *i) +{ + Bit32u opmask = (i->opmask() != 0) ? BX_READ_8BIT_OPMASK(i->opmask()) : 0xff; + unsigned len = i->getVL(); + opmask &= CUT_OPMASK_TO(DWORD_ELEMENTS(len)); + + if (opmask == 0) { + BX_CPU_CALL_METHOD(i->execute2(), (i)); // for now let execute method to deal with zero/merge masking semantics + return; + } + + bx_address eaddr = BX_CPU_RESOLVE_ADDR(i); + avx_masked_load16(i, eaddr, &BX_READ_AVX_REG(BX_VECTOR_TMP_REGISTER), opmask); + + BX_CPU_CALL_METHOD(i->execute2(), (i)); +} + void BX_CPP_AttrRegparmN(1) BX_CPU_C::LOAD_Eighth_Vector(bxInstruction_c *i) { bx_address eaddr = BX_CPU_RESOLVE_ADDR(i); @@ -252,6 +337,23 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::LOAD_Eighth_Vector(bxInstruction_c *i) BX_CPU_CALL_METHOD(i->execute2(), (i)); } +void BX_CPP_AttrRegparmN(1) BX_CPU_C::LOAD_MASK_Eighth_VectorB(bxInstruction_c *i) +{ + Bit32u opmask = (i->opmask() != 0) ? BX_READ_8BIT_OPMASK(i->opmask()) : 0xff; + unsigned len = i->getVL(); + opmask &= CUT_OPMASK_TO(QWORD_ELEMENTS(len)); + + if (opmask == 0) { + BX_CPU_CALL_METHOD(i->execute2(), (i)); // for now let execute method to deal with zero/merge masking semantics + return; + } + + bx_address eaddr = BX_CPU_RESOLVE_ADDR(i); + avx_masked_load8(i, eaddr, &BX_READ_AVX_REG(BX_VECTOR_TMP_REGISTER), opmask); + + BX_CPU_CALL_METHOD(i->execute2(), (i)); +} + #endif #if BX_SUPPORT_EVEX @@ -445,9 +547,9 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::LOAD_BROADCAST_Half_VectorD(bxInstruction_ // load half vector of dwords, support broadcast and masked fault suppression void BX_CPP_AttrRegparmN(1) BX_CPU_C::LOAD_BROADCAST_MASK_Half_VectorD(bxInstruction_c *i) { + Bit32u opmask = (i->opmask() != 0) ? BX_READ_8BIT_OPMASK(i->opmask()) : 0xff; unsigned len = i->getVL(); - Bit32u opmask = (i->opmask() != 0) ? BX_READ_16BIT_OPMASK(i->opmask()) : 0xffff; - opmask &= CUT_OPMASK_TO(DWORD_ELEMENTS(len)-1); + opmask &= CUT_OPMASK_TO(QWORD_ELEMENTS(len)); if (opmask == 0) { BX_CPU_CALL_METHOD(i->execute2(), (i)); // for now let execute method to deal with zero/merge masking semantics