implemented proper masked load for VPMOVSX/ZX instructions + bugfix

This commit is contained in:
Shwartsman 2023-10-20 20:13:29 +03:00
parent 5103d97ece
commit 02c4f85a89
3 changed files with 123 additions and 15 deletions

View File

@ -1954,8 +1954,14 @@ public: // for now...
#if BX_SUPPORT_AVX
BX_SMF void LOAD_Vector(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void LOAD_Half_Vector(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void LOAD_MASK_Half_VectorB(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void LOAD_MASK_Half_VectorW(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void LOAD_MASK_Half_VectorD(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void LOAD_Quarter_Vector(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void LOAD_MASK_Quarter_VectorB(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void LOAD_MASK_Quarter_VectorW(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void LOAD_Eighth_Vector(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void LOAD_MASK_Eighth_VectorB(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
#endif
#if BX_SUPPORT_EVEX
BX_SMF void LOAD_MASK_Wss(bxInstruction_c *i) BX_CPP_AttrRegparmN(1);

View File

@ -2936,7 +2936,7 @@ bx_define_opcode(BX_IA_V512_VCVTTPD2DQ_VdqWpd, "vcvttpd2dq", "vcvttpd2dq", &BX_C
bx_define_opcode(BX_IA_V512_VCVTTPD2DQ_VdqWpd_Kmask, "vcvttpd2dq", "vcvttpd2dq", &BX_CPU_C::LOAD_BROADCAST_MASK_VectorQ, &BX_CPU_C::VCVTTPD2DQ_MASK_VdqWpdR, BX_ISA_AVX512, OP_Vdq, OP_mVpd, OP_NONE, OP_NONE, BX_PREPARE_EVEX)
bx_define_opcode(BX_IA_V512_VCVTPH2PS_VpsWps, "vcvtph2ps", "vcvtph2ps", &BX_CPU_C::LOAD_Half_Vector, &BX_CPU_C::VCVTPH2PS_VpsWpsR, BX_ISA_AVX512, OP_Vps, OP_mVHV, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VCVTPH2PS_VpsWps_Kmask, "vcvtph2ps", "vcvtph2ps", &BX_CPU_C::LOAD_Half_Vector, &BX_CPU_C::VCVTPH2PS_MASK_VpsWpsR, BX_ISA_AVX512, OP_Vps, OP_mVHV, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VCVTPH2PS_VpsWps_Kmask, "vcvtph2ps", "vcvtph2ps", &BX_CPU_C::LOAD_MASK_Half_VectorW, &BX_CPU_C::VCVTPH2PS_MASK_VpsWpsR, BX_ISA_AVX512, OP_Vps, OP_mVHV, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VCVTPS2PH_WpsVpsIb, "vcvtps2ph", "vcvtps2ph", &BX_CPU_C::VCVTPS2PH_WpsVpsIb, &BX_CPU_C::VCVTPS2PH_WpsVpsIb, BX_ISA_AVX512, OP_mVHV, OP_Vps, OP_Ib, OP_NONE, BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VCVTPS2PH_WpsVpsIb_Kmask, "vcvtps2ph", "vcvtps2ph", &BX_CPU_C::VCVTPS2PH_MASK_WpsVpsIbM, &BX_CPU_C::VCVTPS2PH_MASK_WpsVpsIbR, BX_ISA_AVX512, OP_mVHV, OP_Vps, OP_Ib, OP_NONE, BX_PREPARE_EVEX_NO_BROADCAST)
@ -3168,12 +3168,12 @@ bx_define_opcode(BX_IA_V512_VPMOVSXWD_VdqWdq, "vpmovsxwd", "vpmovsxwd", &BX_CPU_
bx_define_opcode(BX_IA_V512_VPMOVSXWQ_VdqWdq, "vpmovsxwq", "vpmovsxwq", &BX_CPU_C::LOAD_Quarter_Vector, &BX_CPU_C::VPMOVSXWQ_VdqWdqR, BX_ISA_AVX512, OP_Vdq, OP_mVQV, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VPMOVSXDQ_VdqWdq, "vpmovsxdq", "vpmovsxdq", &BX_CPU_C::LOAD_Half_Vector, &BX_CPU_C::VPMOVSXDQ_VdqWdqR, BX_ISA_AVX512, OP_Vdq, OP_mVHV, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VPMOVSXBW_VdqWdq_Kmask, "vpmovsxbw", "vpmovsxbw", &BX_CPU_C::LOAD_Half_Vector, &BX_CPU_C::VPMOVSXBW_MASK_VdqWdqR, BX_ISA_AVX512_BW, OP_Vdq, OP_mVHV, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VPMOVSXBD_VdqWdq_Kmask, "vpmovsxbd", "vpmovsxbd", &BX_CPU_C::LOAD_Quarter_Vector, &BX_CPU_C::VPMOVSXBD_MASK_VdqWdqR, BX_ISA_AVX512, OP_Vdq, OP_mVQV, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VPMOVSXBQ_VdqWdq_Kmask, "vpmovsxbq", "vpmovsxbq", &BX_CPU_C::LOAD_Eighth_Vector, &BX_CPU_C::VPMOVSXBQ_MASK_VdqWdqR, BX_ISA_AVX512, OP_Vdq, OP_mVOV, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VPMOVSXWD_VdqWdq_Kmask, "vpmovsxwd", "vpmovsxwd", &BX_CPU_C::LOAD_Half_Vector, &BX_CPU_C::VPMOVSXWD_MASK_VdqWdqR, BX_ISA_AVX512, OP_Vdq, OP_mVHV, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VPMOVSXWQ_VdqWdq_Kmask, "vpmovsxwq", "vpmovsxwq", &BX_CPU_C::LOAD_Quarter_Vector, &BX_CPU_C::VPMOVSXWQ_MASK_VdqWdqR, BX_ISA_AVX512, OP_Vdq, OP_mVQV, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VPMOVSXDQ_VdqWdq_Kmask, "vpmovsxdq", "vpmovsxdq", &BX_CPU_C::LOAD_Half_Vector, &BX_CPU_C::VPMOVSXDQ_MASK_VdqWdqR, BX_ISA_AVX512, OP_Vdq, OP_mVHV, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VPMOVSXBW_VdqWdq_Kmask, "vpmovsxbw", "vpmovsxbw", &BX_CPU_C::LOAD_MASK_Half_VectorB, &BX_CPU_C::VPMOVSXBW_MASK_VdqWdqR, BX_ISA_AVX512_BW, OP_Vdq, OP_mVHV, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VPMOVSXBD_VdqWdq_Kmask, "vpmovsxbd", "vpmovsxbd", &BX_CPU_C::LOAD_MASK_Quarter_VectorB, &BX_CPU_C::VPMOVSXBD_MASK_VdqWdqR, BX_ISA_AVX512, OP_Vdq, OP_mVQV, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VPMOVSXBQ_VdqWdq_Kmask, "vpmovsxbq", "vpmovsxbq", &BX_CPU_C::LOAD_MASK_Eighth_VectorB, &BX_CPU_C::VPMOVSXBQ_MASK_VdqWdqR, BX_ISA_AVX512, OP_Vdq, OP_mVOV, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VPMOVSXWD_VdqWdq_Kmask, "vpmovsxwd", "vpmovsxwd", &BX_CPU_C::LOAD_MASK_Half_VectorW, &BX_CPU_C::VPMOVSXWD_MASK_VdqWdqR, BX_ISA_AVX512, OP_Vdq, OP_mVHV, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VPMOVSXWQ_VdqWdq_Kmask, "vpmovsxwq", "vpmovsxwq", &BX_CPU_C::LOAD_MASK_Quarter_VectorW, &BX_CPU_C::VPMOVSXWQ_MASK_VdqWdqR, BX_ISA_AVX512, OP_Vdq, OP_mVQV, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VPMOVSXDQ_VdqWdq_Kmask, "vpmovsxdq", "vpmovsxdq", &BX_CPU_C::LOAD_MASK_Half_VectorD, &BX_CPU_C::VPMOVSXDQ_MASK_VdqWdqR, BX_ISA_AVX512, OP_Vdq, OP_mVHV, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VPMOVZXBW_VdqWdq, "vpmovzxbw", "vpmovzxbw", &BX_CPU_C::LOAD_Half_Vector, &BX_CPU_C::VPMOVZXBW_VdqWdqR, BX_ISA_AVX512_BW, OP_Vdq, OP_mVHV, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VPMOVZXBD_VdqWdq, "vpmovzxbd", "vpmovzxbd", &BX_CPU_C::LOAD_Quarter_Vector, &BX_CPU_C::VPMOVZXBD_VdqWdqR, BX_ISA_AVX512, OP_Vdq, OP_mVQV, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
@ -3182,12 +3182,12 @@ bx_define_opcode(BX_IA_V512_VPMOVZXWD_VdqWdq, "vpmovzxwd", "vpmovzxwd", &BX_CPU_
bx_define_opcode(BX_IA_V512_VPMOVZXWQ_VdqWdq, "vpmovzxwq", "vpmovzxwq", &BX_CPU_C::LOAD_Quarter_Vector, &BX_CPU_C::VPMOVZXWQ_VdqWdqR, BX_ISA_AVX512, OP_Vdq, OP_mVQV, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VPMOVZXDQ_VdqWdq, "vpmovzxdq", "vpmovzxdq", &BX_CPU_C::LOAD_Half_Vector, &BX_CPU_C::VPMOVZXDQ_VdqWdqR, BX_ISA_AVX512, OP_Vdq, OP_mVHV, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VPMOVZXBW_VdqWdq_Kmask, "vpmovzxbw", "vpmovzxbw", &BX_CPU_C::LOAD_Half_Vector, &BX_CPU_C::VPMOVZXBW_MASK_VdqWdqR, BX_ISA_AVX512_BW, OP_Vdq, OP_mVHV, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VPMOVZXBD_VdqWdq_Kmask, "vpmovzxbd", "vpmovzxbd", &BX_CPU_C::LOAD_Quarter_Vector, &BX_CPU_C::VPMOVZXBD_MASK_VdqWdqR, BX_ISA_AVX512, OP_Vdq, OP_mVQV, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VPMOVZXBQ_VdqWdq_Kmask, "vpmovzxbq", "vpmovzxbq", &BX_CPU_C::LOAD_Eighth_Vector, &BX_CPU_C::VPMOVZXBQ_MASK_VdqWdqR, BX_ISA_AVX512, OP_Vdq, OP_mVOV, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VPMOVZXWD_VdqWdq_Kmask, "vpmovzxwd", "vpmovzxwd", &BX_CPU_C::LOAD_Half_Vector, &BX_CPU_C::VPMOVZXWD_MASK_VdqWdqR, BX_ISA_AVX512, OP_Vdq, OP_mVHV, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VPMOVZXWQ_VdqWdq_Kmask, "vpmovzxwq", "vpmovzxwq", &BX_CPU_C::LOAD_Quarter_Vector, &BX_CPU_C::VPMOVZXWQ_MASK_VdqWdqR, BX_ISA_AVX512, OP_Vdq, OP_mVQV, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VPMOVZXDQ_VdqWdq_Kmask, "vpmovzxdq", "vpmovzxdq", &BX_CPU_C::LOAD_Half_Vector, &BX_CPU_C::VPMOVZXDQ_MASK_VdqWdqR, BX_ISA_AVX512, OP_Vdq, OP_mVHV, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VPMOVZXBW_VdqWdq_Kmask, "vpmovzxbw", "vpmovzxbw", &BX_CPU_C::LOAD_MASK_Half_VectorB, &BX_CPU_C::VPMOVZXBW_MASK_VdqWdqR, BX_ISA_AVX512_BW, OP_Vdq, OP_mVHV, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VPMOVZXBD_VdqWdq_Kmask, "vpmovzxbd", "vpmovzxbd", &BX_CPU_C::LOAD_MASK_Quarter_VectorB, &BX_CPU_C::VPMOVZXBD_MASK_VdqWdqR, BX_ISA_AVX512, OP_Vdq, OP_mVQV, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VPMOVZXBQ_VdqWdq_Kmask, "vpmovzxbq", "vpmovzxbq", &BX_CPU_C::LOAD_MASK_Eighth_VectorB, &BX_CPU_C::VPMOVZXBQ_MASK_VdqWdqR, BX_ISA_AVX512, OP_Vdq, OP_mVOV, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VPMOVZXWD_VdqWdq_Kmask, "vpmovzxwd", "vpmovzxwd", &BX_CPU_C::LOAD_MASK_Half_VectorW, &BX_CPU_C::VPMOVZXWD_MASK_VdqWdqR, BX_ISA_AVX512, OP_Vdq, OP_mVHV, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VPMOVZXWQ_VdqWdq_Kmask, "vpmovzxwq", "vpmovzxwq", &BX_CPU_C::LOAD_MASK_Quarter_VectorW, &BX_CPU_C::VPMOVZXWQ_MASK_VdqWdqR, BX_ISA_AVX512, OP_Vdq, OP_mVQV, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VPMOVZXDQ_VdqWdq_Kmask, "vpmovzxdq", "vpmovzxdq", &BX_CPU_C::LOAD_MASK_Half_VectorD, &BX_CPU_C::VPMOVZXDQ_MASK_VdqWdqR, BX_ISA_AVX512, OP_Vdq, OP_mVHV, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VPSUBB_VdqHdqWdq, "vpsubb", "vpsubb", &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VPSUBB_VdqHdqWdqR, BX_ISA_AVX512_BW, OP_Vdq, OP_Hdq, OP_mVdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VPSUBSB_VdqHdqWdq, "vpsubsb", "vpsubsb", &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VPSUBSB_VdqHdqWdqR, BX_ISA_AVX512_BW, OP_Vdq, OP_Hdq, OP_mVdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)

View File

@ -201,6 +201,57 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::LOAD_Half_Vector(bxInstruction_c *i)
BX_CPU_CALL_METHOD(i->execute2(), (i));
}
void BX_CPP_AttrRegparmN(1) BX_CPU_C::LOAD_MASK_Half_VectorB(bxInstruction_c *i)
{
Bit32u opmask = (i->opmask() != 0) ? BX_READ_32BIT_OPMASK(i->opmask()) : 0xffffffff;
unsigned len = i->getVL();
opmask &= CUT_OPMASK_TO(BYTE_ELEMENTS(len));
if (opmask == 0) {
BX_CPU_CALL_METHOD(i->execute2(), (i)); // for now let execute method to deal with zero/merge masking semantics
return;
}
bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
avx_masked_load8(i, eaddr, &BX_READ_AVX_REG(BX_VECTOR_TMP_REGISTER), opmask);
BX_CPU_CALL_METHOD(i->execute2(), (i));
}
void BX_CPP_AttrRegparmN(1) BX_CPU_C::LOAD_MASK_Half_VectorW(bxInstruction_c *i)
{
Bit32u opmask = (i->opmask() != 0) ? BX_READ_16BIT_OPMASK(i->opmask()) : 0xffff;
unsigned len = i->getVL();
opmask &= CUT_OPMASK_TO(WORD_ELEMENTS(len));
if (opmask == 0) {
BX_CPU_CALL_METHOD(i->execute2(), (i)); // for now let execute method to deal with zero/merge masking semantics
return;
}
bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
avx_masked_load16(i, eaddr, &BX_READ_AVX_REG(BX_VECTOR_TMP_REGISTER), opmask);
BX_CPU_CALL_METHOD(i->execute2(), (i));
}
void BX_CPP_AttrRegparmN(1) BX_CPU_C::LOAD_MASK_Half_VectorD(bxInstruction_c *i)
{
Bit32u opmask = (i->opmask() != 0) ? BX_READ_8BIT_OPMASK(i->opmask()) : 0xff;
unsigned len = i->getVL();
opmask &= CUT_OPMASK_TO(DWORD_ELEMENTS(len));
if (opmask == 0) {
BX_CPU_CALL_METHOD(i->execute2(), (i)); // for now let execute method to deal with zero/merge masking semantics
return;
}
bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
avx_masked_load32(i, eaddr, &BX_READ_AVX_REG(BX_VECTOR_TMP_REGISTER), opmask);
BX_CPU_CALL_METHOD(i->execute2(), (i));
}
void BX_CPP_AttrRegparmN(1) BX_CPU_C::LOAD_Quarter_Vector(bxInstruction_c *i)
{
bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
@ -226,6 +277,40 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::LOAD_Quarter_Vector(bxInstruction_c *i)
BX_CPU_CALL_METHOD(i->execute2(), (i));
}
void BX_CPP_AttrRegparmN(1) BX_CPU_C::LOAD_MASK_Quarter_VectorB(bxInstruction_c *i)
{
Bit32u opmask = (i->opmask() != 0) ? BX_READ_16BIT_OPMASK(i->opmask()) : 0xffff;
unsigned len = i->getVL();
opmask &= CUT_OPMASK_TO(WORD_ELEMENTS(len));
if (opmask == 0) {
BX_CPU_CALL_METHOD(i->execute2(), (i)); // for now let execute method to deal with zero/merge masking semantics
return;
}
bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
avx_masked_load8(i, eaddr, &BX_READ_AVX_REG(BX_VECTOR_TMP_REGISTER), opmask);
BX_CPU_CALL_METHOD(i->execute2(), (i));
}
void BX_CPP_AttrRegparmN(1) BX_CPU_C::LOAD_MASK_Quarter_VectorW(bxInstruction_c *i)
{
Bit32u opmask = (i->opmask() != 0) ? BX_READ_8BIT_OPMASK(i->opmask()) : 0xff;
unsigned len = i->getVL();
opmask &= CUT_OPMASK_TO(DWORD_ELEMENTS(len));
if (opmask == 0) {
BX_CPU_CALL_METHOD(i->execute2(), (i)); // for now let execute method to deal with zero/merge masking semantics
return;
}
bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
avx_masked_load16(i, eaddr, &BX_READ_AVX_REG(BX_VECTOR_TMP_REGISTER), opmask);
BX_CPU_CALL_METHOD(i->execute2(), (i));
}
void BX_CPP_AttrRegparmN(1) BX_CPU_C::LOAD_Eighth_Vector(bxInstruction_c *i)
{
bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
@ -252,6 +337,23 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::LOAD_Eighth_Vector(bxInstruction_c *i)
BX_CPU_CALL_METHOD(i->execute2(), (i));
}
void BX_CPP_AttrRegparmN(1) BX_CPU_C::LOAD_MASK_Eighth_VectorB(bxInstruction_c *i)
{
Bit32u opmask = (i->opmask() != 0) ? BX_READ_8BIT_OPMASK(i->opmask()) : 0xff;
unsigned len = i->getVL();
opmask &= CUT_OPMASK_TO(QWORD_ELEMENTS(len));
if (opmask == 0) {
BX_CPU_CALL_METHOD(i->execute2(), (i)); // for now let execute method to deal with zero/merge masking semantics
return;
}
bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
avx_masked_load8(i, eaddr, &BX_READ_AVX_REG(BX_VECTOR_TMP_REGISTER), opmask);
BX_CPU_CALL_METHOD(i->execute2(), (i));
}
#endif
#if BX_SUPPORT_EVEX
@ -445,9 +547,9 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::LOAD_BROADCAST_Half_VectorD(bxInstruction_
// load half vector of dwords, support broadcast and masked fault suppression
void BX_CPP_AttrRegparmN(1) BX_CPU_C::LOAD_BROADCAST_MASK_Half_VectorD(bxInstruction_c *i)
{
Bit32u opmask = (i->opmask() != 0) ? BX_READ_8BIT_OPMASK(i->opmask()) : 0xff;
unsigned len = i->getVL();
Bit32u opmask = (i->opmask() != 0) ? BX_READ_16BIT_OPMASK(i->opmask()) : 0xffff;
opmask &= CUT_OPMASK_TO(DWORD_ELEMENTS(len)-1);
opmask &= CUT_OPMASK_TO(QWORD_ELEMENTS(len));
if (opmask == 0) {
BX_CPU_CALL_METHOD(i->execute2(), (i)); // for now let execute method to deal with zero/merge masking semantics