Implemented VPMOV?2? and VPMIN* AVX512 instructions
The only missing AVX512BW/AVX512DQ opcodes are now: "512.66.0F38.W1 10 VPSRLVW" "512.66.0F38.W1 11 VPSRAVW" "512.66.0F38.W1 12 VPSLLVW" "512.66.0F3A.W1 0F VPALIGNR" "NDS.66.0F3A.W0 42 VDBPSADBW" "NDS.512.66.0F3A.W0 50 VRANGEPS NDS.512.66.0F3A.W1 50 VRANGEPD" "NDS.512.66.0F3A.W0 51 VRANGESS NDS.512.66.0F3A.W1 51 VRANGESD" "NDS.512.66.0F3A.W0 56 VREDUCEPS NDS.512.66.0F3A.W1 56 VREDUCEPD" "NDS.512.66.0F3A.W0 57 VREDUCESS NDS.512.66.0F3A.W1 57 VREDUCESD"
This commit is contained in:
parent
ad7ef68876
commit
c4c8652a3b
@ -472,6 +472,7 @@ AVX512_2OP_WORD_EL(VPSUBW_MASK_VdqHdqWdqR, xmm_psubw)
|
||||
AVX512_2OP_WORD_EL(VPSUBSW_MASK_VdqHdqWdqR, xmm_psubsw)
|
||||
AVX512_2OP_WORD_EL(VPSUBUSW_MASK_VdqHdqWdqR, xmm_psubusw)
|
||||
AVX512_2OP_WORD_EL(VPMINSW_MASK_VdqHdqWdqR, xmm_pminsw)
|
||||
AVX512_2OP_WORD_EL(VPMINUW_MASK_VdqHdqWdqR, xmm_pminuw)
|
||||
AVX512_2OP_WORD_EL(VPMAXSW_MASK_VdqHdqWdqR, xmm_pmaxsw)
|
||||
AVX512_2OP_WORD_EL(VPMAXUW_MASK_VdqHdqWdqR, xmm_pmaxuw)
|
||||
AVX512_2OP_WORD_EL(VPMADDUBSW_MASK_VdqHdqWdqR, xmm_pmaddubsw)
|
||||
@ -506,6 +507,7 @@ AVX512_2OP_BYTE_EL(VPADDUSB_MASK_VdqHdqWdqR, xmm_paddusb)
|
||||
AVX512_2OP_BYTE_EL(VPSUBB_MASK_VdqHdqWdqR, xmm_psubb)
|
||||
AVX512_2OP_BYTE_EL(VPSUBSB_MASK_VdqHdqWdqR, xmm_psubsb)
|
||||
AVX512_2OP_BYTE_EL(VPSUBUSB_MASK_VdqHdqWdqR, xmm_psubusb)
|
||||
AVX512_2OP_BYTE_EL(VPMINSB_MASK_VdqHdqWdqR, xmm_pminsb)
|
||||
AVX512_2OP_BYTE_EL(VPMINUB_MASK_VdqHdqWdqR, xmm_pminub)
|
||||
AVX512_2OP_BYTE_EL(VPMAXUB_MASK_VdqHdqWdqR, xmm_pmaxub)
|
||||
AVX512_2OP_BYTE_EL(VPMAXSB_MASK_VdqHdqWdqR, xmm_pmaxsb)
|
||||
@ -1975,4 +1977,118 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCOMPRESSPD_MASK_WpdVpd(bxInstruct
|
||||
BX_NEXT_INSTR(i);
|
||||
}
|
||||
|
||||
// convert mask
|
||||
|
||||
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVM2B_VdqKEqR(bxInstruction_c *i)
|
||||
{
|
||||
Bit64u opmask = BX_READ_OPMASK(i->src());
|
||||
unsigned len = i->getVL();
|
||||
|
||||
for (unsigned n=0; n<len; n++) {
|
||||
xmm_pmovm2b(&BX_READ_AVX_REG_LANE(i->dst(), n), (Bit32u) opmask);
|
||||
opmask >>= 16;
|
||||
}
|
||||
|
||||
BX_CLEAR_AVX_REGZ(i->dst(), len);
|
||||
BX_NEXT_INSTR(i);
|
||||
}
|
||||
|
||||
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVM2W_VdqKEdR(bxInstruction_c *i)
|
||||
{
|
||||
Bit32u opmask = BX_READ_32BIT_OPMASK(i->src());
|
||||
unsigned len = i->getVL();
|
||||
|
||||
for (unsigned n=0; n<len; n++) {
|
||||
xmm_pmovm2w(&BX_READ_AVX_REG_LANE(i->dst(), n), opmask);
|
||||
opmask >>= 8;
|
||||
}
|
||||
|
||||
BX_CLEAR_AVX_REGZ(i->dst(), len);
|
||||
BX_NEXT_INSTR(i);
|
||||
}
|
||||
|
||||
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVM2D_VdqKEwR(bxInstruction_c *i)
|
||||
{
|
||||
Bit32u opmask = (Bit32u) BX_READ_16BIT_OPMASK(i->src());
|
||||
unsigned len = i->getVL();
|
||||
|
||||
for (unsigned n=0; n<len; n++) {
|
||||
xmm_pmovm2d(&BX_READ_AVX_REG_LANE(i->dst(), n), opmask);
|
||||
opmask >>= 4;
|
||||
}
|
||||
|
||||
BX_CLEAR_AVX_REGZ(i->dst(), len);
|
||||
BX_NEXT_INSTR(i);
|
||||
}
|
||||
|
||||
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVM2Q_VdqKEbR(bxInstruction_c *i)
|
||||
{
|
||||
Bit32u opmask = (Bit32u) BX_READ_8BIT_OPMASK(i->src());
|
||||
unsigned len = i->getVL();
|
||||
|
||||
for (unsigned n=0; n<len; n++) {
|
||||
xmm_pmovm2q(&BX_READ_AVX_REG_LANE(i->dst(), n), opmask);
|
||||
opmask >>= 2;
|
||||
}
|
||||
|
||||
BX_CLEAR_AVX_REGZ(i->dst(), len);
|
||||
BX_NEXT_INSTR(i);
|
||||
}
|
||||
|
||||
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVB2M_KGqWdqR(bxInstruction_c *i)
|
||||
{
|
||||
BxPackedAvxRegister op = BX_READ_AVX_REG(i->src());
|
||||
unsigned len = i->getVL();
|
||||
Bit64u mask = 0;
|
||||
|
||||
for (unsigned n=0; n<len; n++) {
|
||||
mask |= ((Bit64u) xmm_pmovmskb(&op.vmm128(n))) << (16*n);
|
||||
}
|
||||
|
||||
BX_WRITE_OPMASK(i->dst(), mask);
|
||||
BX_NEXT_INSTR(i);
|
||||
}
|
||||
|
||||
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVW2M_KGdWdqR(bxInstruction_c *i)
|
||||
{
|
||||
BxPackedAvxRegister op = BX_READ_AVX_REG(i->src());
|
||||
unsigned len = i->getVL();
|
||||
Bit32u mask = 0;
|
||||
|
||||
for (unsigned n=0; n<len; n++) {
|
||||
mask |= xmm_pmovmskw(&op.vmm128(n)) << (8*n);
|
||||
}
|
||||
|
||||
BX_WRITE_OPMASK(i->dst(), mask);
|
||||
BX_NEXT_INSTR(i);
|
||||
}
|
||||
|
||||
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVD2M_KGwWdqR(bxInstruction_c *i)
|
||||
{
|
||||
BxPackedAvxRegister op = BX_READ_AVX_REG(i->src());
|
||||
unsigned len = i->getVL();
|
||||
Bit32u mask = 0;
|
||||
|
||||
for (unsigned n=0; n<len; n++) {
|
||||
mask |= xmm_pmovmskd(&op.vmm128(n)) << (4*n);
|
||||
}
|
||||
|
||||
BX_WRITE_OPMASK(i->dst(), mask);
|
||||
BX_NEXT_INSTR(i);
|
||||
}
|
||||
|
||||
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVQ2M_KGbWdqR(bxInstruction_c *i)
|
||||
{
|
||||
BxPackedAvxRegister op = BX_READ_AVX_REG(i->src());
|
||||
unsigned len = i->getVL();
|
||||
Bit32u mask = 0;
|
||||
|
||||
for (unsigned n=0; n<len; n++) {
|
||||
mask |= xmm_pmovmskq(&op.vmm128(n)) << (2*n);
|
||||
}
|
||||
|
||||
BX_WRITE_OPMASK(i->dst(), mask);
|
||||
BX_NEXT_INSTR(i);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -3497,10 +3497,12 @@ public: // for now...
|
||||
BX_SMF BX_INSF_TYPE VPADDSW_MASK_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF BX_INSF_TYPE VPADDUSW_MASK_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
|
||||
BX_SMF BX_INSF_TYPE VPMINSB_MASK_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF BX_INSF_TYPE VPMINUB_MASK_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF BX_INSF_TYPE VPMAXUB_MASK_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF BX_INSF_TYPE VPMAXSB_MASK_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF BX_INSF_TYPE VPMINSW_MASK_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF BX_INSF_TYPE VPMINUW_MASK_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF BX_INSF_TYPE VPMAXSW_MASK_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF BX_INSF_TYPE VPMAXUW_MASK_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
|
||||
@ -3803,6 +3805,16 @@ public: // for now...
|
||||
|
||||
BX_SMF BX_INSF_TYPE VPBROADCASTMB2Q_VdqKEbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF BX_INSF_TYPE VPBROADCASTMW2D_VdqKEwR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
|
||||
BX_SMF BX_INSF_TYPE VPMOVM2B_VdqKEqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF BX_INSF_TYPE VPMOVM2W_VdqKEdR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF BX_INSF_TYPE VPMOVM2D_VdqKEwR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF BX_INSF_TYPE VPMOVM2Q_VdqKEbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
|
||||
BX_SMF BX_INSF_TYPE VPMOVB2M_KGqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF BX_INSF_TYPE VPMOVW2M_KGdWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF BX_INSF_TYPE VPMOVD2M_KGwWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF BX_INSF_TYPE VPMOVQ2M_KGbWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
#endif
|
||||
|
||||
BX_SMF BX_INSF_TYPE LZCNT_GwEwR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
|
@ -604,6 +604,18 @@ static const BxOpcodeInfo_t BxOpcodeGroupEVEX_0f3827[3] = {
|
||||
/* F2 */ { 0, BX_IA_ERROR }
|
||||
};
|
||||
|
||||
static const BxOpcodeInfo_t BxOpcodeGroupEVEX_0f3828[3] = {
|
||||
/* 66 */ { BxVexW1, BX_IA_V512_VPMULDQ_VdqHdqWdq },
|
||||
/* F3 */ { BxAliasVexW, BX_IA_V512_VPMOVM2B_VdqKEq },
|
||||
/* F2 */ { 0, BX_IA_ERROR }
|
||||
};
|
||||
|
||||
static const BxOpcodeInfo_t BxOpcodeGroupEVEX_0f3829[3] = {
|
||||
/* 66 */ { BxVexW1, BX_IA_V512_VPCMPEQQ_KGbHdqWdq },
|
||||
/* F3 */ { BxAliasVexW, BX_IA_V512_VPMOVB2M_KGqWdq },
|
||||
/* F2 */ { 0, BX_IA_ERROR }
|
||||
};
|
||||
|
||||
static const BxOpcodeInfo_t BxOpcodeGroupEVEX_0f382a[3] = {
|
||||
/* 66 */ { BxVexW0, BX_IA_V512_VMOVNTDQA_VdqMdq },
|
||||
/* F3 */ { BxVexW1, BX_IA_V512_VPBROADCASTMB2Q_VdqKEb },
|
||||
@ -682,6 +694,24 @@ static const BxOpcodeInfo_t BxOpcodeGroupEVEX_0f3835_Mask[3] = {
|
||||
/* F2 */ { 0, BX_IA_ERROR }
|
||||
};
|
||||
|
||||
static const BxOpcodeInfo_t BxOpcodeGroupEVEX_0f3838[3] = {
|
||||
/* 66 */ { 0, BX_IA_V512_VPMINSB_VdqHdqWdq },
|
||||
/* F3 */ { BxAliasVexW, BX_IA_V512_VPMOVM2D_VdqKEw },
|
||||
/* F2 */ { 0, BX_IA_ERROR }
|
||||
};
|
||||
|
||||
static const BxOpcodeInfo_t BxOpcodeGroupEVEX_0f3839[3] = {
|
||||
/* 66 */ { BxAliasVexW, BX_IA_V512_VPMINSD_VdqHdqWdq },
|
||||
/* F3 */ { BxAliasVexW, BX_IA_V512_VPMOVD2M_KGwWdq },
|
||||
/* F2 */ { 0, BX_IA_ERROR }
|
||||
};
|
||||
|
||||
static const BxOpcodeInfo_t BxOpcodeGroupEVEX_0f383a[3] = {
|
||||
/* 66 */ { 0, BX_IA_V512_VPMINUW_VdqHdqWdq },
|
||||
/* F3 */ { BxVexW1, BX_IA_V512_VPBROADCASTMW2D_VdqKEw },
|
||||
/* F2 */ { 0, BX_IA_ERROR }
|
||||
};
|
||||
|
||||
/* ************************************************************************ */
|
||||
|
||||
/* ******** */
|
||||
@ -1360,9 +1390,9 @@ static const BxOpcodeInfo_t BxOpcodeTableEVEX[256*3*2] = {
|
||||
/* 26 */ { BxPrefixSSE, BX_IA_ERROR, BxOpcodeGroupEVEX_0f3826 },
|
||||
/* 27 k0 */ { BxPrefixSSE, BX_IA_ERROR, BxOpcodeGroupEVEX_0f3827 },
|
||||
/* 27 */ { BxPrefixSSE, BX_IA_ERROR, BxOpcodeGroupEVEX_0f3827 },
|
||||
/* 28 k0 */ { BxVexW1 | BxPrefixSSE66, BX_IA_V512_VPMULDQ_VdqHdqWdq },
|
||||
/* 28 k0 */ { BxPrefixSSE, BX_IA_ERROR, BxOpcodeGroupEVEX_0f3828 },
|
||||
/* 28 */ { BxVexW1 | BxPrefixSSE66, BX_IA_V512_VPMULDQ_VdqHdqWdq_Kmask },
|
||||
/* 29 k0 */ { BxVexW1 | BxPrefixSSE66, BX_IA_V512_VPCMPEQQ_KGbHdqWdq },
|
||||
/* 29 k0 */ { BxPrefixSSE, BX_IA_ERROR, BxOpcodeGroupEVEX_0f3829 },
|
||||
/* 29 */ { BxVexW1 | BxPrefixSSE66, BX_IA_V512_VPCMPEQQ_KGbHdqWdq },
|
||||
/* 2A k0 */ { BxPrefixSSE, BX_IA_ERROR, BxOpcodeGroupEVEX_0f382a },
|
||||
/* 2A */ { 0, BX_IA_ERROR }, // #UD
|
||||
@ -1392,12 +1422,12 @@ static const BxOpcodeInfo_t BxOpcodeTableEVEX[256*3*2] = {
|
||||
/* 36 */ { BxAliasVexW | BxPrefixSSE66 | BxVexL1, BX_IA_V512_VPERMD_VdqHdqWdq_Kmask },
|
||||
/* 37 k0 */ { BxVexW1 | BxPrefixSSE66, BX_IA_V512_VPCMPGTQ_KGbHdqWdq },
|
||||
/* 37 */ { BxVexW1 | BxPrefixSSE66, BX_IA_V512_VPCMPGTQ_KGbHdqWdq },
|
||||
/* 38 k0 */ { 0, BX_IA_ERROR },
|
||||
/* 38 */ { 0, BX_IA_ERROR },
|
||||
/* 39 k0 */ { BxAliasVexW | BxPrefixSSE66, BX_IA_V512_VPMINSD_VdqHdqWdq },
|
||||
/* 38 k0 */ { BxPrefixSSE, BX_IA_ERROR, BxOpcodeGroupEVEX_0f3838 },
|
||||
/* 38 */ { BxPrefixSSE66, BX_IA_V512_VPMINSB_VdqHdqWdq_Kmask },
|
||||
/* 39 k0 */ { BxPrefixSSE, BX_IA_ERROR, BxOpcodeGroupEVEX_0f3839 },
|
||||
/* 39 */ { BxAliasVexW | BxPrefixSSE66, BX_IA_V512_VPMINSD_VdqHdqWdq_Kmask },
|
||||
/* 3A k0 */ { BxVexW1 | BxPrefixSSEF3, BX_IA_V512_VPBROADCASTMW2D_VdqKEw },
|
||||
/* 3A */ { 0, BX_IA_ERROR }, // #UD
|
||||
/* 3A k0 */ { BxPrefixSSE, BX_IA_ERROR, BxOpcodeGroupEVEX_0f383a },
|
||||
/* 3A */ { BxPrefixSSE66, BX_IA_V512_VPMINUW_VdqHdqWdq_Kmask },
|
||||
/* 3B k0 */ { BxAliasVexW | BxPrefixSSE66, BX_IA_V512_VPMINUD_VdqHdqWdq },
|
||||
/* 3B */ { BxAliasVexW | BxPrefixSSE66, BX_IA_V512_VPMINUD_VdqHdqWdq_Kmask },
|
||||
/* 3C k0 */ { BxPrefixSSE66, BX_IA_V512_VPMAXSB_VdqHdqWdq },
|
||||
|
@ -2926,17 +2926,21 @@ bx_define_opcode(BX_IA_V512_VPADDW_VdqHdqWdq_Kmask, &BX_CPU_C::LOAD_Vector, &BX_
|
||||
bx_define_opcode(BX_IA_V512_VPADDSW_VdqHdqWdq_Kmask, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VPADDSW_MASK_VdqHdqWdqR, BX_ISA_AVX512_BW, OP_Vdq, OP_Hdq, OP_mVdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
|
||||
bx_define_opcode(BX_IA_V512_VPADDUSW_VdqHdqWdq_Kmask, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VPADDUSW_MASK_VdqHdqWdqR, BX_ISA_AVX512_BW, OP_Vdq, OP_Hdq, OP_mVdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
|
||||
|
||||
bx_define_opcode(BX_IA_V512_VPMINSB_VdqHdqWdq, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VPMINSB_VdqHdqWdqR, BX_ISA_AVX512_BW, OP_Vdq, OP_Hdq, OP_mVdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
|
||||
bx_define_opcode(BX_IA_V512_VPMINUB_VdqHdqWdq, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VPMINUB_VdqHdqWdqR, BX_ISA_AVX512_BW, OP_Vdq, OP_Hdq, OP_mVdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
|
||||
bx_define_opcode(BX_IA_V512_VPMAXUB_VdqHdqWdq, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VPMAXUB_VdqHdqWdqR, BX_ISA_AVX512_BW, OP_Vdq, OP_Hdq, OP_mVdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
|
||||
bx_define_opcode(BX_IA_V512_VPMAXSB_VdqHdqWdq, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VPMAXSB_VdqHdqWdqR, BX_ISA_AVX512_BW, OP_Vdq, OP_Hdq, OP_mVdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
|
||||
bx_define_opcode(BX_IA_V512_VPMINSW_VdqHdqWdq, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VPMINSW_VdqHdqWdqR, BX_ISA_AVX512_BW, OP_Vdq, OP_Hdq, OP_mVdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
|
||||
bx_define_opcode(BX_IA_V512_VPMINUW_VdqHdqWdq, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VPMINUW_VdqHdqWdqR, BX_ISA_AVX512_BW, OP_Vdq, OP_Hdq, OP_mVdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
|
||||
bx_define_opcode(BX_IA_V512_VPMAXSW_VdqHdqWdq, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VPMAXSW_VdqHdqWdqR, BX_ISA_AVX512_BW, OP_Vdq, OP_Hdq, OP_mVdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
|
||||
bx_define_opcode(BX_IA_V512_VPMAXUW_VdqHdqWdq, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VPMAXUW_VdqHdqWdqR, BX_ISA_AVX512_BW, OP_Vdq, OP_Hdq, OP_mVdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
|
||||
|
||||
bx_define_opcode(BX_IA_V512_VPMINSB_VdqHdqWdq_Kmask, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VPMINSB_MASK_VdqHdqWdqR, BX_ISA_AVX512_BW, OP_Vdq, OP_Hdq, OP_mVdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
|
||||
bx_define_opcode(BX_IA_V512_VPMINUB_VdqHdqWdq_Kmask, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VPMINUB_MASK_VdqHdqWdqR, BX_ISA_AVX512_BW, OP_Vdq, OP_Hdq, OP_mVdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
|
||||
bx_define_opcode(BX_IA_V512_VPMAXUB_VdqHdqWdq_Kmask, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VPMAXUB_MASK_VdqHdqWdqR, BX_ISA_AVX512_BW, OP_Vdq, OP_Hdq, OP_mVdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
|
||||
bx_define_opcode(BX_IA_V512_VPMAXSB_VdqHdqWdq_Kmask, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VPMAXSB_MASK_VdqHdqWdqR, BX_ISA_AVX512_BW, OP_Vdq, OP_Hdq, OP_mVdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
|
||||
bx_define_opcode(BX_IA_V512_VPMINSW_VdqHdqWdq_Kmask, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VPMINSW_MASK_VdqHdqWdqR, BX_ISA_AVX512_BW, OP_Vdq, OP_Hdq, OP_mVdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
|
||||
bx_define_opcode(BX_IA_V512_VPMINUW_VdqHdqWdq_Kmask, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VPMINUW_MASK_VdqHdqWdqR, BX_ISA_AVX512_BW, OP_Vdq, OP_Hdq, OP_mVdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
|
||||
bx_define_opcode(BX_IA_V512_VPMAXSW_VdqHdqWdq_Kmask, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VPMAXSW_MASK_VdqHdqWdqR, BX_ISA_AVX512_BW, OP_Vdq, OP_Hdq, OP_mVdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
|
||||
bx_define_opcode(BX_IA_V512_VPMAXUW_VdqHdqWdq_Kmask, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VPMAXUW_MASK_VdqHdqWdqR, BX_ISA_AVX512_BW, OP_Vdq, OP_Hdq, OP_mVdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
|
||||
|
||||
@ -3537,6 +3541,16 @@ bx_define_opcode(BX_IA_V512_VPCONFLICTQ_VdqWdq_Kmask, &BX_CPU_C::LOAD_BROADCAST_
|
||||
|
||||
bx_define_opcode(BX_IA_V512_VPLZCNTD_VdqWdq_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorD, &BX_CPU_C::VPLZCNTD_MASK_VdqWdqR, BX_ISA_AVX512_CD, OP_Vdq, OP_mVdq32, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE)
|
||||
bx_define_opcode(BX_IA_V512_VPLZCNTQ_VdqWdq_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorQ, &BX_CPU_C::VPLZCNTQ_MASK_VdqWdqR, BX_ISA_AVX512_CD, OP_Vdq, OP_mVdq64, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE)
|
||||
|
||||
bx_define_opcode(BX_IA_V512_VPMOVM2B_VdqKEq, &BX_CPU_C::BxError, &BX_CPU_C::VPMOVM2B_VdqKEqR, BX_ISA_AVX512_BW, OP_Vdq, OP_KEq, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
|
||||
bx_define_opcode(BX_IA_V512_VPMOVM2W_VdqKEd, &BX_CPU_C::BxError, &BX_CPU_C::VPMOVM2W_VdqKEdR, BX_ISA_AVX512_BW, OP_Vdq, OP_KEd, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
|
||||
bx_define_opcode(BX_IA_V512_VPMOVM2D_VdqKEw, &BX_CPU_C::BxError, &BX_CPU_C::VPMOVM2D_VdqKEwR, BX_ISA_AVX512_DQ, OP_Vdq, OP_KEw, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
|
||||
bx_define_opcode(BX_IA_V512_VPMOVM2Q_VdqKEb, &BX_CPU_C::BxError, &BX_CPU_C::VPMOVM2Q_VdqKEbR, BX_ISA_AVX512_DQ, OP_Vdq, OP_KEb, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
|
||||
|
||||
bx_define_opcode(BX_IA_V512_VPMOVB2M_KGqWdq, &BX_CPU_C::BxError, &BX_CPU_C::VPMOVB2M_KGqWdqR, BX_ISA_AVX512_BW, OP_KGq, OP_Wdq, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
|
||||
bx_define_opcode(BX_IA_V512_VPMOVW2M_KGdWdq, &BX_CPU_C::BxError, &BX_CPU_C::VPMOVW2M_KGdWdqR, BX_ISA_AVX512_BW, OP_KGd, OP_Wdq, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
|
||||
bx_define_opcode(BX_IA_V512_VPMOVD2M_KGwWdq, &BX_CPU_C::BxError, &BX_CPU_C::VPMOVD2M_KGwWdqR, BX_ISA_AVX512_DQ, OP_KGw, OP_Wdq, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
|
||||
bx_define_opcode(BX_IA_V512_VPMOVQ2M_KGbWdq, &BX_CPU_C::BxError, &BX_CPU_C::VPMOVQ2M_KGbWdqR, BX_ISA_AVX512_DQ, OP_KGb, OP_Wdq, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
|
||||
// VexW alias
|
||||
|
||||
// VexW64 aliased
|
||||
|
@ -454,7 +454,7 @@ BX_CPP_INLINE void xmm_psignd(BxPackedXmmRegister *op1, const BxPackedXmmRegiste
|
||||
|
||||
BX_CPP_INLINE Bit32u xmm_pmovmskb(const BxPackedXmmRegister *op)
|
||||
{
|
||||
unsigned mask = 0;
|
||||
Bit32u mask = 0;
|
||||
|
||||
if(op->xmmsbyte(0x0) < 0) mask |= 0x0001;
|
||||
if(op->xmmsbyte(0x1) < 0) mask |= 0x0002;
|
||||
@ -476,9 +476,25 @@ BX_CPP_INLINE Bit32u xmm_pmovmskb(const BxPackedXmmRegister *op)
|
||||
return mask;
|
||||
}
|
||||
|
||||
BX_CPP_INLINE Bit32u xmm_pmovmskw(const BxPackedXmmRegister *op)
|
||||
{
|
||||
Bit32u mask = 0;
|
||||
|
||||
if(op->xmm16s(0) < 0) mask |= 0x01;
|
||||
if(op->xmm16s(1) < 0) mask |= 0x02;
|
||||
if(op->xmm16s(2) < 0) mask |= 0x04;
|
||||
if(op->xmm16s(3) < 0) mask |= 0x08;
|
||||
if(op->xmm16s(4) < 0) mask |= 0x10;
|
||||
if(op->xmm16s(5) < 0) mask |= 0x20;
|
||||
if(op->xmm16s(6) < 0) mask |= 0x40;
|
||||
if(op->xmm16s(7) < 0) mask |= 0x80;
|
||||
|
||||
return mask;
|
||||
}
|
||||
|
||||
BX_CPP_INLINE Bit32u xmm_pmovmskd(const BxPackedXmmRegister *op)
|
||||
{
|
||||
unsigned mask = 0;
|
||||
Bit32u mask = 0;
|
||||
|
||||
if(op->xmm32s(0) < 0) mask |= 0x1;
|
||||
if(op->xmm32s(1) < 0) mask |= 0x2;
|
||||
@ -490,7 +506,7 @@ BX_CPP_INLINE Bit32u xmm_pmovmskd(const BxPackedXmmRegister *op)
|
||||
|
||||
BX_CPP_INLINE Bit32u xmm_pmovmskq(const BxPackedXmmRegister *op)
|
||||
{
|
||||
unsigned mask = 0;
|
||||
Bit32u mask = 0;
|
||||
|
||||
if(op->xmm32s(1) < 0) mask |= 0x1;
|
||||
if(op->xmm32s(3) < 0) mask |= 0x2;
|
||||
@ -498,6 +514,34 @@ BX_CPP_INLINE Bit32u xmm_pmovmskq(const BxPackedXmmRegister *op)
|
||||
return mask;
|
||||
}
|
||||
|
||||
BX_CPP_INLINE void xmm_pmovm2b(BxPackedXmmRegister *dst, Bit32u mask)
|
||||
{
|
||||
for (unsigned n=0; n < 16; n++, mask >>= 1) {
|
||||
dst->xmmsbyte(n) = - (mask & 0x1);
|
||||
}
|
||||
}
|
||||
|
||||
BX_CPP_INLINE void xmm_pmovm2w(BxPackedXmmRegister *dst, Bit32u mask)
|
||||
{
|
||||
for (unsigned n=0; n < 8; n++, mask >>= 1) {
|
||||
dst->xmm16s(n) = - (mask & 0x1);
|
||||
}
|
||||
}
|
||||
|
||||
BX_CPP_INLINE void xmm_pmovm2d(BxPackedXmmRegister *dst, Bit32u mask)
|
||||
{
|
||||
for (unsigned n=0; n < 4; n++, mask >>= 1) {
|
||||
dst->xmm32s(n) = - (mask & 0x1);
|
||||
}
|
||||
}
|
||||
|
||||
BX_CPP_INLINE void xmm_pmovm2q(BxPackedXmmRegister *dst, Bit32u mask)
|
||||
{
|
||||
for (unsigned n=0; n < 2; n++, mask >>= 1) {
|
||||
dst->xmm64s(n) = - (mask & 0x1);
|
||||
}
|
||||
}
|
||||
|
||||
// blend
|
||||
|
||||
BX_CPP_INLINE void xmm_pblendb(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2, Bit32u mask)
|
||||
|
Loading…
Reference in New Issue
Block a user