final resolution for issue #2 : address VEXPAND* and VPSHUFBITQMB instructions

This commit is contained in:
Stanislav Shwartsman 2023-11-09 19:15:32 +02:00
parent 6655fa9a41
commit 8594972389
3 changed files with 76 additions and 7 deletions

View File

@ -30,6 +30,7 @@
#include "simd_int.h"
#include "simd_compare.h"
#include "scalar_arith.h"
// compare
@ -1616,6 +1617,39 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPEXPANDW_MASK_VdqWdqR(bxInstruction_c *i)
BX_NEXT_INSTR(i);
}
void BX_CPP_AttrRegparmN(1) BX_CPU_C::VEXPANDPS_MASK_VpsWpsM(bxInstruction_c *i)
{
BxPackedAvxRegister result, op;
if (i->isZeroMasking())
result.clear();
else
result = BX_READ_AVX_REG(i->dst());
Bit32u opmask = BX_READ_16BIT_OPMASK(i->opmask());
unsigned len = i->getVL();
opmask &= CUT_OPMASK_TO(DWORD_ELEMENTS(len));
if (opmask) {
// the EXPAND is going to read an element for each bit set to '1 in the opmask
// and place it into the element corresponding to the opmask bit in the result
// so it will read popcntw(opmask) bits from the source
Bit32u load_mask = (1 << popcntw(opmask)) - 1;
avx_masked_load32(i, BX_CPU_RESOLVE_ADDR(i), &op, load_mask); // read only popcntw(opmask) elements from the memory
for (unsigned n = 0, k = 0; n < len*4; n++, opmask >>= 1) {
if (! opmask) break;
if (opmask & 0x1) {
result.vmm32u(n) = op.vmm32u(k);
k++;
}
}
}
BX_WRITE_AVX_REGZ(i->dst(), result, len);
BX_NEXT_INSTR(i);
}
void BX_CPP_AttrRegparmN(1) BX_CPU_C::VEXPANDPS_MASK_VpsWpsR(bxInstruction_c *i)
{
BxPackedAvxRegister op = BX_READ_AVX_REG(i->src()), result;
@ -1640,6 +1674,39 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::VEXPANDPS_MASK_VpsWpsR(bxInstruction_c *i)
BX_NEXT_INSTR(i);
}
void BX_CPP_AttrRegparmN(1) BX_CPU_C::VEXPANDPD_MASK_VpdWpdM(bxInstruction_c *i)
{
BxPackedAvxRegister result, op;
if (i->isZeroMasking())
result.clear();
else
result = BX_READ_AVX_REG(i->dst());
Bit32u opmask = BX_READ_8BIT_OPMASK(i->opmask());
unsigned len = i->getVL();
opmask &= CUT_OPMASK_TO(QWORD_ELEMENTS(len));
if (opmask) {
// the EXPAND is going to read an element for each bit set to '1 in the opmask
// and place it into the element corresponding to the opmask bit in the result
// so it will read popcntb(opmask) bits from the source
Bit32u load_mask = (1 << popcntb(opmask)) - 1;
avx_masked_load64(i, BX_CPU_RESOLVE_ADDR(i), &op, load_mask); // read only popcntb(opmask) elements from the memory
for (unsigned n = 0, k = 0; n < len*2; n++, opmask >>= 1) {
if (! opmask) break;
if (opmask & 0x1) {
result.vmm64u(n) = op.vmm64u(k);
k++;
}
}
}
BX_WRITE_AVX_REGZ(i->dst(), result, len);
BX_NEXT_INSTR(i);
}
void BX_CPP_AttrRegparmN(1) BX_CPU_C::VEXPANDPD_MASK_VpdWpdR(bxInstruction_c *i)
{
BxPackedAvxRegister op = BX_READ_AVX_REG(i->src()), result;
@ -1649,9 +1716,9 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::VEXPANDPD_MASK_VpdWpdR(bxInstruction_c *i)
result = BX_READ_AVX_REG(i->dst());
Bit32u opmask = BX_READ_8BIT_OPMASK(i->opmask());
unsigned len = i->getVL(), k = 0;
unsigned len = i->getVL();
for (unsigned n = 0; n < len*2; n++, opmask >>= 1) {
for (unsigned n = 0, k = 0; n < len*2; n++, opmask >>= 1) {
if (! opmask) break;
if (opmask & 0x1) {

View File

@ -3433,6 +3433,8 @@ public: // for now...
BX_SMF void VCOMPRESSPD_MASK_WpdVpd(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void VEXPANDPS_MASK_VpsWpsR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void VEXPANDPD_MASK_VpdWpdR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void VEXPANDPS_MASK_VpsWpsM(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void VEXPANDPD_MASK_VpdWpdM(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void VPCOMPRESSB_MASK_WdqVdq(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void VPCOMPRESSW_MASK_WdqVdq(bxInstruction_c *) BX_CPP_AttrRegparmN(1);

View File

@ -3804,8 +3804,8 @@ bx_define_opcode(BX_IA_V512_VSHUFF64x2_VpdHpdWpdIb_Kmask, "vshuff64x2", "vshuff6
bx_define_opcode(BX_IA_V512_VEXPANDPS_VpsWps, "vexpandps", "vexpandps", &BX_CPU_C::VMOVUPS_VpsWpsM, &BX_CPU_C::VMOVAPS_VpsWpsR, BX_ISA_AVX512, OP_Vps, OP_mVps32, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VEXPANDPD_VpdWpd, "vexpandpd", "vexpandpd", &BX_CPU_C::VMOVUPS_VpsWpsM, &BX_CPU_C::VMOVAPS_VpsWpsR, BX_ISA_AVX512, OP_Vpd, OP_mVpd64, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VEXPANDPS_VpsWps_Kmask, "vexpandps", "vexpandps", &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VEXPANDPS_MASK_VpsWpsR, BX_ISA_AVX512, OP_Vps, OP_mVps32, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VEXPANDPD_VpdWpd_Kmask, "vexpandpd", "vexpandpd", &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VEXPANDPD_MASK_VpdWpdR, BX_ISA_AVX512, OP_Vpd, OP_mVpd64, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VEXPANDPS_VpsWps_Kmask, "vexpandps", "vexpandps", &BX_CPU_C::VEXPANDPS_MASK_VpsWpsM, &BX_CPU_C::VEXPANDPS_MASK_VpsWpsR, BX_ISA_AVX512, OP_Vps, OP_mVps32, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VEXPANDPD_VpdWpd_Kmask, "vexpandpd", "vexpandpd", &BX_CPU_C::VEXPANDPD_MASK_VpdWpdM, &BX_CPU_C::VEXPANDPD_MASK_VpdWpdR, BX_ISA_AVX512, OP_Vpd, OP_mVpd64, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VCOMPRESSPS_WpsVps, "vcompressps", "vcompressps", &BX_CPU_C::VMOVUPS_WpsVpsM, &BX_CPU_C::VMOVAPS_VpsWpsR, BX_ISA_AVX512, OP_mVps32, OP_Vps, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VCOMPRESSPD_WpdVpd, "vcompresspd", "vcompresspd", &BX_CPU_C::VMOVUPS_WpsVpsM, &BX_CPU_C::VMOVAPS_VpsWpsR, BX_ISA_AVX512, OP_mVpd64, OP_Vpd, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
@ -3819,8 +3819,8 @@ bx_define_opcode(BX_IA_V512_VPEXPANDW_VdqWdq_Kmask, "vpexpandw", "vpexpandw", &B
bx_define_opcode(BX_IA_V512_VPEXPANDD_VdqWdq, "vpexpandd", "vpexpandd", &BX_CPU_C::VMOVUPS_VpsWpsM, &BX_CPU_C::VMOVAPS_VpsWpsR, BX_ISA_AVX512, OP_Vdq, OP_mVdq32, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VPEXPANDQ_VdqWdq, "vpexpandq", "vpexpandq", &BX_CPU_C::VMOVUPS_VpsWpsM, &BX_CPU_C::VMOVAPS_VpsWpsR, BX_ISA_AVX512, OP_Vdq, OP_mVdq64, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VPEXPANDD_VdqWdq_Kmask, "vpexpandd", "vpexpandd", &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VEXPANDPS_MASK_VpsWpsR, BX_ISA_AVX512, OP_Vdq, OP_mVdq32, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VPEXPANDQ_VdqWdq_Kmask, "vpexpandq", "vpexpandq", &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VEXPANDPD_MASK_VpdWpdR, BX_ISA_AVX512, OP_Vdq, OP_mVdq64, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VPEXPANDD_VdqWdq_Kmask, "vpexpandd", "vpexpandd", &BX_CPU_C::VEXPANDPS_MASK_VpsWpsM, &BX_CPU_C::VEXPANDPS_MASK_VpsWpsR, BX_ISA_AVX512, OP_Vdq, OP_mVdq32, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VPEXPANDQ_VdqWdq_Kmask, "vpexpandq", "vpexpandq", &BX_CPU_C::VEXPANDPD_MASK_VpdWpdM, &BX_CPU_C::VEXPANDPD_MASK_VpdWpdR, BX_ISA_AVX512, OP_Vdq, OP_mVdq64, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VPCOMPRESSB_WdqVdq, "vpcompressb", "vpcompressb", &BX_CPU_C::VMOVUPS_WpsVpsM, &BX_CPU_C::VMOVAPS_VpsWpsR, BX_ISA_AVX512_VBMI2, OP_mVdq8, OP_Vdq, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VPCOMPRESSW_WdqVdq, "vpcompressw", "vpcompressw", &BX_CPU_C::VMOVUPS_WpsVpsM, &BX_CPU_C::VMOVAPS_VpsWpsR, BX_ISA_AVX512_VBMI2, OP_mVdq16, OP_Vdq, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
@ -3955,7 +3955,7 @@ bx_define_opcode(BX_IA_V512_VPDPBUSDS_VdqHdqWdq_Kmask, "vpdpbusds", "vpdpbusds",
bx_define_opcode(BX_IA_V512_VPDPWSSD_VdqHdqWdq_Kmask, "vpdpwssd", "vpdpwssd", &BX_CPU_C::LOAD_BROADCAST_MASK_VectorD, &BX_CPU_C::HANDLE_AVX512_3OP_DWORD_EL_MASK<xmm_pdpwssd>, BX_ISA_AVX512_VNNI, OP_Vdq, OP_Hdq, OP_mVdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE)
bx_define_opcode(BX_IA_V512_VPDPWSSDS_VdqHdqWdq_Kmask, "vpdpwssds", "vpdpwssds", &BX_CPU_C::LOAD_BROADCAST_MASK_VectorD, &BX_CPU_C::HANDLE_AVX512_3OP_DWORD_EL_MASK<xmm_pdpwssds>, BX_ISA_AVX512_VNNI, OP_Vdq, OP_Hdq, OP_mVdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE)
bx_define_opcode(BX_IA_V512_VPSHUFBITQMB_KGqHdqWdq_Kmask, "vpshufbitqmb", "vpshufbitqmb", &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VPSHUFBITQMB_MASK_KGqHdqWdqR, BX_ISA_AVX512_BITALG, OP_KGq, OP_Hdq, OP_mVdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VPSHUFBITQMB_KGqHdqWdq_Kmask, "vpshufbitqmb", "vpshufbitqmb", &BX_CPU_C::LOAD_MASK_VectorB, &BX_CPU_C::VPSHUFBITQMB_MASK_KGqHdqWdqR, BX_ISA_AVX512_BITALG, OP_KGq, OP_Hdq, OP_mVdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VP2INTERSECTD_KGqHdqWdq, "vp2intersectd", "vp2intersectd", &BX_CPU_C::LOAD_BROADCAST_VectorD, &BX_CPU_C::VP2INTERSECTD_KGqHdqWdqR, BX_ISA_AVX512_VP2INTERSECT, OP_KGq2, OP_Hdq, OP_mVdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE)
bx_define_opcode(BX_IA_V512_VP2INTERSECTQ_KGqHdqWdq, "vp2intersectq", "vp2intersectq", &BX_CPU_C::LOAD_BROADCAST_VectorQ, &BX_CPU_C::VP2INTERSECTQ_KGqHdqWdqR, BX_ISA_AVX512_VP2INTERSECT, OP_KGq2, OP_Hdq, OP_mVdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE)