diff --git a/bochs/cpu/cpu.h b/bochs/cpu/cpu.h index 796bb78c1..40c2e4058 100644 --- a/bochs/cpu/cpu.h +++ b/bochs/cpu/cpu.h @@ -2137,6 +2137,7 @@ public: // for now... BX_SMF BX_INSF_TYPE LOAD_Oct_Vector(bxInstruction_c *) BX_CPP_AttrRegparmN(1); #endif #if BX_SUPPORT_EVEX + BX_SMF BX_INSF_TYPE LOAD_MASK_VectorW(bxInstruction_c *) BX_CPP_AttrRegparmN(1); BX_SMF BX_INSF_TYPE LOAD_BROADCAST_VectorD(bxInstruction_c *) BX_CPP_AttrRegparmN(1); BX_SMF BX_INSF_TYPE LOAD_BROADCAST_MASK_VectorD(bxInstruction_c *) BX_CPP_AttrRegparmN(1); BX_SMF BX_INSF_TYPE LOAD_BROADCAST_VectorQ(bxInstruction_c *) BX_CPP_AttrRegparmN(1); diff --git a/bochs/cpu/ia_opcodes.h b/bochs/cpu/ia_opcodes.h index 8a3f47191..17841d730 100644 --- a/bochs/cpu/ia_opcodes.h +++ b/bochs/cpu/ia_opcodes.h @@ -3152,13 +3152,13 @@ bx_define_opcode(BX_IA_V512_VPROLVQ_VdqHdqWdq, &BX_CPU_C::LOAD_BROADCAST_VectorQ bx_define_opcode(BX_IA_V512_VPRORVD_VdqHdqWdq, &BX_CPU_C::LOAD_BROADCAST_VectorD, &BX_CPU_C::VPRORVD_VdqHdqWdqR, BX_ISA_AVX512, OP_Vdq, OP_Hdq, OP_mVdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE) bx_define_opcode(BX_IA_V512_VPRORVQ_VdqHdqWdq, &BX_CPU_C::LOAD_BROADCAST_VectorQ, &BX_CPU_C::VPRORVQ_VdqHdqWdqR, BX_ISA_AVX512, OP_Vdq, OP_Hdq, OP_mVdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE) -bx_define_opcode(BX_IA_V512_VPSRLVW_VdqHdqWdq_Kmask, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VPSRLVW_MASK_VdqHdqWdqR, BX_ISA_AVX512_BW, OP_Vdq, OP_Hdq, OP_mVdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST) +bx_define_opcode(BX_IA_V512_VPSRLVW_VdqHdqWdq_Kmask, &BX_CPU_C::LOAD_MASK_VectorW, &BX_CPU_C::VPSRLVW_MASK_VdqHdqWdqR, BX_ISA_AVX512_BW, OP_Vdq, OP_Hdq, OP_mVdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST) bx_define_opcode(BX_IA_V512_VPSRLVD_VdqHdqWdq_Kmask, &BX_CPU_C::LOAD_BROADCAST_MASK_VectorD, &BX_CPU_C::VPSRLVD_MASK_VdqHdqWdqR, BX_ISA_AVX512, OP_Vdq, OP_Hdq, OP_mVdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE) bx_define_opcode(BX_IA_V512_VPSRLVQ_VdqHdqWdq_Kmask, &BX_CPU_C::LOAD_BROADCAST_MASK_VectorQ, &BX_CPU_C::VPSRLVQ_MASK_VdqHdqWdqR, BX_ISA_AVX512, OP_Vdq, OP_Hdq, OP_mVdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE) -bx_define_opcode(BX_IA_V512_VPSRAVW_VdqHdqWdq_Kmask, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VPSRAVW_MASK_VdqHdqWdqR, BX_ISA_AVX512_BW, OP_Vdq, OP_Hdq, OP_mVdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST) +bx_define_opcode(BX_IA_V512_VPSRAVW_VdqHdqWdq_Kmask, &BX_CPU_C::LOAD_MASK_VectorW, &BX_CPU_C::VPSRAVW_MASK_VdqHdqWdqR, BX_ISA_AVX512_BW, OP_Vdq, OP_Hdq, OP_mVdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST) bx_define_opcode(BX_IA_V512_VPSRAVD_VdqHdqWdq_Kmask, &BX_CPU_C::LOAD_BROADCAST_MASK_VectorD, &BX_CPU_C::VPSRAVD_MASK_VdqHdqWdqR, BX_ISA_AVX512, OP_Vdq, OP_Hdq, OP_mVdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE) bx_define_opcode(BX_IA_V512_VPSRAVQ_VdqHdqWdq_Kmask, &BX_CPU_C::LOAD_BROADCAST_MASK_VectorQ, &BX_CPU_C::VPSRAVQ_MASK_VdqHdqWdqR, BX_ISA_AVX512, OP_Vdq, OP_Hdq, OP_mVdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE) -bx_define_opcode(BX_IA_V512_VPSLLVW_VdqHdqWdq_Kmask, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VPSLLVW_MASK_VdqHdqWdqR, BX_ISA_AVX512_BW, OP_Vdq, OP_Hdq, OP_mVdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST) +bx_define_opcode(BX_IA_V512_VPSLLVW_VdqHdqWdq_Kmask, &BX_CPU_C::LOAD_MASK_VectorW, &BX_CPU_C::VPSLLVW_MASK_VdqHdqWdqR, BX_ISA_AVX512_BW, OP_Vdq, OP_Hdq, OP_mVdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST) bx_define_opcode(BX_IA_V512_VPSLLVD_VdqHdqWdq_Kmask, &BX_CPU_C::LOAD_BROADCAST_MASK_VectorD, &BX_CPU_C::VPSLLVD_MASK_VdqHdqWdqR, BX_ISA_AVX512, OP_Vdq, OP_Hdq, OP_mVdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE) bx_define_opcode(BX_IA_V512_VPSLLVQ_VdqHdqWdq_Kmask, &BX_CPU_C::LOAD_BROADCAST_MASK_VectorQ, &BX_CPU_C::VPSLLVQ_MASK_VdqHdqWdqR, BX_ISA_AVX512, OP_Vdq, OP_Hdq, OP_mVdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE) bx_define_opcode(BX_IA_V512_VPROLVD_VdqHdqWdq_Kmask, &BX_CPU_C::LOAD_BROADCAST_MASK_VectorD, &BX_CPU_C::VPROLVD_MASK_VdqHdqWdqR, BX_ISA_AVX512, OP_Vdq, OP_Hdq, OP_mVdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE) diff --git a/bochs/cpu/load.cc b/bochs/cpu/load.cc index b741d9623..c044893f9 100644 --- a/bochs/cpu/load.cc +++ b/bochs/cpu/load.cc @@ -228,6 +228,23 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::LOAD_Oct_Vector(bxInstruction_c *i #include "simd_int.h" +// load vector of words, support masked fault suppression, no broadcast +BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::LOAD_MASK_VectorW(bxInstruction_c *i) +{ + Bit32u opmask = (i->opmask() != 0) ? BX_READ_32BIT_OPMASK(i->opmask()) : 0xffffffff; + + if (opmask == 0) { + BX_CPU_CALL_METHOD(i->execute2(), (i)); // for now let execute method to deal with zero/merge masking semantics + return; + } + + bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i)); + avx_masked_load16(i, eaddr, &BX_READ_AVX_REG(BX_VECTOR_TMP_REGISTER), opmask); + + BX_CPU_CALL_METHOD(i->execute2(), (i)); +} + +// load vector of dwords, support broadcast, no fault suppression BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::LOAD_BROADCAST_VectorD(bxInstruction_c *i) { bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i)); @@ -249,11 +266,9 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::LOAD_BROADCAST_VectorD(bxInstructi BX_CPU_CALL_METHOD(i->execute2(), (i)); } +// load vector of dwords, support broadcast and masked fault suppression BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::LOAD_BROADCAST_MASK_VectorD(bxInstruction_c *i) { - bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i)); - unsigned len = i->getVL(); - Bit32u opmask = (i->opmask() != 0) ? BX_READ_16BIT_OPMASK(i->opmask()) : 0xffff; if (opmask == 0) { @@ -261,6 +276,9 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::LOAD_BROADCAST_MASK_VectorD(bxInst return; } + bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i)); + unsigned len = i->getVL(); + if (i->getEvexb()) { Bit32u val_32 = read_virtual_dword(i->seg(), eaddr); simd_pbroadcastd(&BX_AVX_REG(BX_VECTOR_TMP_REGISTER), val_32, len * 4); @@ -272,6 +290,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::LOAD_BROADCAST_MASK_VectorD(bxInst BX_CPU_CALL_METHOD(i->execute2(), (i)); } +// load vector of qwords, support broadcast, no fault suppression BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::LOAD_BROADCAST_VectorQ(bxInstruction_c *i) { bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i)); @@ -293,11 +312,9 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::LOAD_BROADCAST_VectorQ(bxInstructi BX_CPU_CALL_METHOD(i->execute2(), (i)); } +// load vector of qwords, support broadcast and masked fault suppression BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::LOAD_BROADCAST_MASK_VectorQ(bxInstruction_c *i) { - bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i)); - unsigned len = i->getVL(); - Bit32u opmask = (i->opmask() != 0) ? BX_READ_8BIT_OPMASK(i->opmask()) : 0xff; if (opmask == 0) { @@ -305,6 +322,9 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::LOAD_BROADCAST_MASK_VectorQ(bxInst return; } + bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i)); + unsigned len = i->getVL(); + if (i->getEvexb()) { Bit64u val_64 = read_virtual_qword(i->seg(), eaddr); simd_pbroadcastq(&BX_AVX_REG(BX_VECTOR_TMP_REGISTER), val_64, len * 2); @@ -316,6 +336,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::LOAD_BROADCAST_MASK_VectorQ(bxInst BX_CPU_CALL_METHOD(i->execute2(), (i)); } +// load half vector of dwords, support broadcast, no fault suppression BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::LOAD_BROADCAST_Half_VectorD(bxInstruction_c *i) { bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i)); @@ -341,9 +362,9 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::LOAD_BROADCAST_Half_VectorD(bxInst BX_CPU_CALL_METHOD(i->execute2(), (i)); } +// load half vector of dwords, support broadcast and masked fault suppression BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::LOAD_BROADCAST_MASK_Half_VectorD(bxInstruction_c *i) { - bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i)); unsigned len = i->getVL(); Bit32u opmask = (i->opmask() != 0) ? BX_READ_16BIT_OPMASK(i->opmask()) : 0xffff; @@ -354,6 +375,8 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::LOAD_BROADCAST_MASK_Half_VectorD(b return; } + bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i)); + if (i->getEvexb()) { Bit32u val_32 = read_virtual_dword(i->seg(), eaddr); simd_pbroadcastd(&BX_AVX_REG(BX_VECTOR_TMP_REGISTER), val_32, len * 2);