Implemented VPMOV?2? and VPMIN* AVX512 instructions

The only missing AVX512BW/AVX512DQ opcodes are now:

"512.66.0F38.W1 10 VPSRLVW"
"512.66.0F38.W1 11 VPSRAVW"
"512.66.0F38.W1 12 VPSLLVW"

"512.66.0F3A.W1 0F VPALIGNR"
"NDS.66.0F3A.W0 42 VDBPSADBW"

"NDS.512.66.0F3A.W0 50 VRANGEPS
 NDS.512.66.0F3A.W1 50 VRANGEPD"
"NDS.512.66.0F3A.W0 51 VRANGESS
 NDS.512.66.0F3A.W1 51 VRANGESD"

"NDS.512.66.0F3A.W0 56 VREDUCEPS
 NDS.512.66.0F3A.W1 56 VREDUCEPD"
"NDS.512.66.0F3A.W0 57 VREDUCESS
 NDS.512.66.0F3A.W1 57 VREDUCESD"
This commit is contained in:
Stanislav Shwartsman 2014-07-22 20:36:55 +00:00
parent ad7ef68876
commit c4c8652a3b
5 changed files with 226 additions and 10 deletions

View File

@ -472,6 +472,7 @@ AVX512_2OP_WORD_EL(VPSUBW_MASK_VdqHdqWdqR, xmm_psubw)
AVX512_2OP_WORD_EL(VPSUBSW_MASK_VdqHdqWdqR, xmm_psubsw)
AVX512_2OP_WORD_EL(VPSUBUSW_MASK_VdqHdqWdqR, xmm_psubusw)
AVX512_2OP_WORD_EL(VPMINSW_MASK_VdqHdqWdqR, xmm_pminsw)
AVX512_2OP_WORD_EL(VPMINUW_MASK_VdqHdqWdqR, xmm_pminuw)
AVX512_2OP_WORD_EL(VPMAXSW_MASK_VdqHdqWdqR, xmm_pmaxsw)
AVX512_2OP_WORD_EL(VPMAXUW_MASK_VdqHdqWdqR, xmm_pmaxuw)
AVX512_2OP_WORD_EL(VPMADDUBSW_MASK_VdqHdqWdqR, xmm_pmaddubsw)
@ -506,6 +507,7 @@ AVX512_2OP_BYTE_EL(VPADDUSB_MASK_VdqHdqWdqR, xmm_paddusb)
AVX512_2OP_BYTE_EL(VPSUBB_MASK_VdqHdqWdqR, xmm_psubb)
AVX512_2OP_BYTE_EL(VPSUBSB_MASK_VdqHdqWdqR, xmm_psubsb)
AVX512_2OP_BYTE_EL(VPSUBUSB_MASK_VdqHdqWdqR, xmm_psubusb)
AVX512_2OP_BYTE_EL(VPMINSB_MASK_VdqHdqWdqR, xmm_pminsb)
AVX512_2OP_BYTE_EL(VPMINUB_MASK_VdqHdqWdqR, xmm_pminub)
AVX512_2OP_BYTE_EL(VPMAXUB_MASK_VdqHdqWdqR, xmm_pmaxub)
AVX512_2OP_BYTE_EL(VPMAXSB_MASK_VdqHdqWdqR, xmm_pmaxsb)
@ -1975,4 +1977,118 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCOMPRESSPD_MASK_WpdVpd(bxInstruct
BX_NEXT_INSTR(i);
}
// convert mask
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVM2B_VdqKEqR(bxInstruction_c *i)
{
Bit64u opmask = BX_READ_OPMASK(i->src());
unsigned len = i->getVL();
for (unsigned n=0; n<len; n++) {
xmm_pmovm2b(&BX_READ_AVX_REG_LANE(i->dst(), n), (Bit32u) opmask);
opmask >>= 16;
}
BX_CLEAR_AVX_REGZ(i->dst(), len);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVM2W_VdqKEdR(bxInstruction_c *i)
{
Bit32u opmask = BX_READ_32BIT_OPMASK(i->src());
unsigned len = i->getVL();
for (unsigned n=0; n<len; n++) {
xmm_pmovm2w(&BX_READ_AVX_REG_LANE(i->dst(), n), opmask);
opmask >>= 8;
}
BX_CLEAR_AVX_REGZ(i->dst(), len);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVM2D_VdqKEwR(bxInstruction_c *i)
{
Bit32u opmask = (Bit32u) BX_READ_16BIT_OPMASK(i->src());
unsigned len = i->getVL();
for (unsigned n=0; n<len; n++) {
xmm_pmovm2d(&BX_READ_AVX_REG_LANE(i->dst(), n), opmask);
opmask >>= 4;
}
BX_CLEAR_AVX_REGZ(i->dst(), len);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVM2Q_VdqKEbR(bxInstruction_c *i)
{
Bit32u opmask = (Bit32u) BX_READ_8BIT_OPMASK(i->src());
unsigned len = i->getVL();
for (unsigned n=0; n<len; n++) {
xmm_pmovm2q(&BX_READ_AVX_REG_LANE(i->dst(), n), opmask);
opmask >>= 2;
}
BX_CLEAR_AVX_REGZ(i->dst(), len);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVB2M_KGqWdqR(bxInstruction_c *i)
{
BxPackedAvxRegister op = BX_READ_AVX_REG(i->src());
unsigned len = i->getVL();
Bit64u mask = 0;
for (unsigned n=0; n<len; n++) {
mask |= ((Bit64u) xmm_pmovmskb(&op.vmm128(n))) << (16*n);
}
BX_WRITE_OPMASK(i->dst(), mask);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVW2M_KGdWdqR(bxInstruction_c *i)
{
BxPackedAvxRegister op = BX_READ_AVX_REG(i->src());
unsigned len = i->getVL();
Bit32u mask = 0;
for (unsigned n=0; n<len; n++) {
mask |= xmm_pmovmskw(&op.vmm128(n)) << (8*n);
}
BX_WRITE_OPMASK(i->dst(), mask);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVD2M_KGwWdqR(bxInstruction_c *i)
{
BxPackedAvxRegister op = BX_READ_AVX_REG(i->src());
unsigned len = i->getVL();
Bit32u mask = 0;
for (unsigned n=0; n<len; n++) {
mask |= xmm_pmovmskd(&op.vmm128(n)) << (4*n);
}
BX_WRITE_OPMASK(i->dst(), mask);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVQ2M_KGbWdqR(bxInstruction_c *i)
{
BxPackedAvxRegister op = BX_READ_AVX_REG(i->src());
unsigned len = i->getVL();
Bit32u mask = 0;
for (unsigned n=0; n<len; n++) {
mask |= xmm_pmovmskq(&op.vmm128(n)) << (2*n);
}
BX_WRITE_OPMASK(i->dst(), mask);
BX_NEXT_INSTR(i);
}
#endif

View File

@ -3497,10 +3497,12 @@ public: // for now...
BX_SMF BX_INSF_TYPE VPADDSW_MASK_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPADDUSW_MASK_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPMINSB_MASK_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPMINUB_MASK_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPMAXUB_MASK_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPMAXSB_MASK_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPMINSW_MASK_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPMINUW_MASK_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPMAXSW_MASK_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPMAXUW_MASK_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
@ -3803,6 +3805,16 @@ public: // for now...
BX_SMF BX_INSF_TYPE VPBROADCASTMB2Q_VdqKEbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPBROADCASTMW2D_VdqKEwR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPMOVM2B_VdqKEqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPMOVM2W_VdqKEdR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPMOVM2D_VdqKEwR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPMOVM2Q_VdqKEbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPMOVB2M_KGqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPMOVW2M_KGdWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPMOVD2M_KGwWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPMOVQ2M_KGbWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
#endif
BX_SMF BX_INSF_TYPE LZCNT_GwEwR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);

View File

@ -604,6 +604,18 @@ static const BxOpcodeInfo_t BxOpcodeGroupEVEX_0f3827[3] = {
/* F2 */ { 0, BX_IA_ERROR }
};
static const BxOpcodeInfo_t BxOpcodeGroupEVEX_0f3828[3] = {
/* 66 */ { BxVexW1, BX_IA_V512_VPMULDQ_VdqHdqWdq },
/* F3 */ { BxAliasVexW, BX_IA_V512_VPMOVM2B_VdqKEq },
/* F2 */ { 0, BX_IA_ERROR }
};
static const BxOpcodeInfo_t BxOpcodeGroupEVEX_0f3829[3] = {
/* 66 */ { BxVexW1, BX_IA_V512_VPCMPEQQ_KGbHdqWdq },
/* F3 */ { BxAliasVexW, BX_IA_V512_VPMOVB2M_KGqWdq },
/* F2 */ { 0, BX_IA_ERROR }
};
static const BxOpcodeInfo_t BxOpcodeGroupEVEX_0f382a[3] = {
/* 66 */ { BxVexW0, BX_IA_V512_VMOVNTDQA_VdqMdq },
/* F3 */ { BxVexW1, BX_IA_V512_VPBROADCASTMB2Q_VdqKEb },
@ -682,6 +694,24 @@ static const BxOpcodeInfo_t BxOpcodeGroupEVEX_0f3835_Mask[3] = {
/* F2 */ { 0, BX_IA_ERROR }
};
static const BxOpcodeInfo_t BxOpcodeGroupEVEX_0f3838[3] = {
/* 66 */ { 0, BX_IA_V512_VPMINSB_VdqHdqWdq },
/* F3 */ { BxAliasVexW, BX_IA_V512_VPMOVM2D_VdqKEw },
/* F2 */ { 0, BX_IA_ERROR }
};
static const BxOpcodeInfo_t BxOpcodeGroupEVEX_0f3839[3] = {
/* 66 */ { BxAliasVexW, BX_IA_V512_VPMINSD_VdqHdqWdq },
/* F3 */ { BxAliasVexW, BX_IA_V512_VPMOVD2M_KGwWdq },
/* F2 */ { 0, BX_IA_ERROR }
};
static const BxOpcodeInfo_t BxOpcodeGroupEVEX_0f383a[3] = {
/* 66 */ { 0, BX_IA_V512_VPMINUW_VdqHdqWdq },
/* F3 */ { BxVexW1, BX_IA_V512_VPBROADCASTMW2D_VdqKEw },
/* F2 */ { 0, BX_IA_ERROR }
};
/* ************************************************************************ */
/* ******** */
@ -1360,9 +1390,9 @@ static const BxOpcodeInfo_t BxOpcodeTableEVEX[256*3*2] = {
/* 26 */ { BxPrefixSSE, BX_IA_ERROR, BxOpcodeGroupEVEX_0f3826 },
/* 27 k0 */ { BxPrefixSSE, BX_IA_ERROR, BxOpcodeGroupEVEX_0f3827 },
/* 27 */ { BxPrefixSSE, BX_IA_ERROR, BxOpcodeGroupEVEX_0f3827 },
/* 28 k0 */ { BxVexW1 | BxPrefixSSE66, BX_IA_V512_VPMULDQ_VdqHdqWdq },
/* 28 k0 */ { BxPrefixSSE, BX_IA_ERROR, BxOpcodeGroupEVEX_0f3828 },
/* 28 */ { BxVexW1 | BxPrefixSSE66, BX_IA_V512_VPMULDQ_VdqHdqWdq_Kmask },
/* 29 k0 */ { BxVexW1 | BxPrefixSSE66, BX_IA_V512_VPCMPEQQ_KGbHdqWdq },
/* 29 k0 */ { BxPrefixSSE, BX_IA_ERROR, BxOpcodeGroupEVEX_0f3829 },
/* 29 */ { BxVexW1 | BxPrefixSSE66, BX_IA_V512_VPCMPEQQ_KGbHdqWdq },
/* 2A k0 */ { BxPrefixSSE, BX_IA_ERROR, BxOpcodeGroupEVEX_0f382a },
/* 2A */ { 0, BX_IA_ERROR }, // #UD
@ -1392,12 +1422,12 @@ static const BxOpcodeInfo_t BxOpcodeTableEVEX[256*3*2] = {
/* 36 */ { BxAliasVexW | BxPrefixSSE66 | BxVexL1, BX_IA_V512_VPERMD_VdqHdqWdq_Kmask },
/* 37 k0 */ { BxVexW1 | BxPrefixSSE66, BX_IA_V512_VPCMPGTQ_KGbHdqWdq },
/* 37 */ { BxVexW1 | BxPrefixSSE66, BX_IA_V512_VPCMPGTQ_KGbHdqWdq },
/* 38 k0 */ { 0, BX_IA_ERROR },
/* 38 */ { 0, BX_IA_ERROR },
/* 39 k0 */ { BxAliasVexW | BxPrefixSSE66, BX_IA_V512_VPMINSD_VdqHdqWdq },
/* 38 k0 */ { BxPrefixSSE, BX_IA_ERROR, BxOpcodeGroupEVEX_0f3838 },
/* 38 */ { BxPrefixSSE66, BX_IA_V512_VPMINSB_VdqHdqWdq_Kmask },
/* 39 k0 */ { BxPrefixSSE, BX_IA_ERROR, BxOpcodeGroupEVEX_0f3839 },
/* 39 */ { BxAliasVexW | BxPrefixSSE66, BX_IA_V512_VPMINSD_VdqHdqWdq_Kmask },
/* 3A k0 */ { BxVexW1 | BxPrefixSSEF3, BX_IA_V512_VPBROADCASTMW2D_VdqKEw },
/* 3A */ { 0, BX_IA_ERROR }, // #UD
/* 3A k0 */ { BxPrefixSSE, BX_IA_ERROR, BxOpcodeGroupEVEX_0f383a },
/* 3A */ { BxPrefixSSE66, BX_IA_V512_VPMINUW_VdqHdqWdq_Kmask },
/* 3B k0 */ { BxAliasVexW | BxPrefixSSE66, BX_IA_V512_VPMINUD_VdqHdqWdq },
/* 3B */ { BxAliasVexW | BxPrefixSSE66, BX_IA_V512_VPMINUD_VdqHdqWdq_Kmask },
/* 3C k0 */ { BxPrefixSSE66, BX_IA_V512_VPMAXSB_VdqHdqWdq },

View File

@ -2926,17 +2926,21 @@ bx_define_opcode(BX_IA_V512_VPADDW_VdqHdqWdq_Kmask, &BX_CPU_C::LOAD_Vector, &BX_
bx_define_opcode(BX_IA_V512_VPADDSW_VdqHdqWdq_Kmask, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VPADDSW_MASK_VdqHdqWdqR, BX_ISA_AVX512_BW, OP_Vdq, OP_Hdq, OP_mVdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VPADDUSW_VdqHdqWdq_Kmask, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VPADDUSW_MASK_VdqHdqWdqR, BX_ISA_AVX512_BW, OP_Vdq, OP_Hdq, OP_mVdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VPMINSB_VdqHdqWdq, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VPMINSB_VdqHdqWdqR, BX_ISA_AVX512_BW, OP_Vdq, OP_Hdq, OP_mVdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VPMINUB_VdqHdqWdq, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VPMINUB_VdqHdqWdqR, BX_ISA_AVX512_BW, OP_Vdq, OP_Hdq, OP_mVdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VPMAXUB_VdqHdqWdq, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VPMAXUB_VdqHdqWdqR, BX_ISA_AVX512_BW, OP_Vdq, OP_Hdq, OP_mVdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VPMAXSB_VdqHdqWdq, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VPMAXSB_VdqHdqWdqR, BX_ISA_AVX512_BW, OP_Vdq, OP_Hdq, OP_mVdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VPMINSW_VdqHdqWdq, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VPMINSW_VdqHdqWdqR, BX_ISA_AVX512_BW, OP_Vdq, OP_Hdq, OP_mVdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VPMINUW_VdqHdqWdq, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VPMINUW_VdqHdqWdqR, BX_ISA_AVX512_BW, OP_Vdq, OP_Hdq, OP_mVdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VPMAXSW_VdqHdqWdq, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VPMAXSW_VdqHdqWdqR, BX_ISA_AVX512_BW, OP_Vdq, OP_Hdq, OP_mVdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VPMAXUW_VdqHdqWdq, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VPMAXUW_VdqHdqWdqR, BX_ISA_AVX512_BW, OP_Vdq, OP_Hdq, OP_mVdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VPMINSB_VdqHdqWdq_Kmask, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VPMINSB_MASK_VdqHdqWdqR, BX_ISA_AVX512_BW, OP_Vdq, OP_Hdq, OP_mVdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VPMINUB_VdqHdqWdq_Kmask, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VPMINUB_MASK_VdqHdqWdqR, BX_ISA_AVX512_BW, OP_Vdq, OP_Hdq, OP_mVdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VPMAXUB_VdqHdqWdq_Kmask, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VPMAXUB_MASK_VdqHdqWdqR, BX_ISA_AVX512_BW, OP_Vdq, OP_Hdq, OP_mVdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VPMAXSB_VdqHdqWdq_Kmask, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VPMAXSB_MASK_VdqHdqWdqR, BX_ISA_AVX512_BW, OP_Vdq, OP_Hdq, OP_mVdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VPMINSW_VdqHdqWdq_Kmask, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VPMINSW_MASK_VdqHdqWdqR, BX_ISA_AVX512_BW, OP_Vdq, OP_Hdq, OP_mVdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VPMINUW_VdqHdqWdq_Kmask, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VPMINUW_MASK_VdqHdqWdqR, BX_ISA_AVX512_BW, OP_Vdq, OP_Hdq, OP_mVdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VPMAXSW_VdqHdqWdq_Kmask, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VPMAXSW_MASK_VdqHdqWdqR, BX_ISA_AVX512_BW, OP_Vdq, OP_Hdq, OP_mVdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VPMAXUW_VdqHdqWdq_Kmask, &BX_CPU_C::LOAD_Vector, &BX_CPU_C::VPMAXUW_MASK_VdqHdqWdqR, BX_ISA_AVX512_BW, OP_Vdq, OP_Hdq, OP_mVdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
@ -3537,6 +3541,16 @@ bx_define_opcode(BX_IA_V512_VPCONFLICTQ_VdqWdq_Kmask, &BX_CPU_C::LOAD_BROADCAST_
bx_define_opcode(BX_IA_V512_VPLZCNTD_VdqWdq_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorD, &BX_CPU_C::VPLZCNTD_MASK_VdqWdqR, BX_ISA_AVX512_CD, OP_Vdq, OP_mVdq32, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE)
bx_define_opcode(BX_IA_V512_VPLZCNTQ_VdqWdq_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorQ, &BX_CPU_C::VPLZCNTQ_MASK_VdqWdqR, BX_ISA_AVX512_CD, OP_Vdq, OP_mVdq64, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE)
bx_define_opcode(BX_IA_V512_VPMOVM2B_VdqKEq, &BX_CPU_C::BxError, &BX_CPU_C::VPMOVM2B_VdqKEqR, BX_ISA_AVX512_BW, OP_Vdq, OP_KEq, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VPMOVM2W_VdqKEd, &BX_CPU_C::BxError, &BX_CPU_C::VPMOVM2W_VdqKEdR, BX_ISA_AVX512_BW, OP_Vdq, OP_KEd, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VPMOVM2D_VdqKEw, &BX_CPU_C::BxError, &BX_CPU_C::VPMOVM2D_VdqKEwR, BX_ISA_AVX512_DQ, OP_Vdq, OP_KEw, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VPMOVM2Q_VdqKEb, &BX_CPU_C::BxError, &BX_CPU_C::VPMOVM2Q_VdqKEbR, BX_ISA_AVX512_DQ, OP_Vdq, OP_KEb, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VPMOVB2M_KGqWdq, &BX_CPU_C::BxError, &BX_CPU_C::VPMOVB2M_KGqWdqR, BX_ISA_AVX512_BW, OP_KGq, OP_Wdq, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VPMOVW2M_KGdWdq, &BX_CPU_C::BxError, &BX_CPU_C::VPMOVW2M_KGdWdqR, BX_ISA_AVX512_BW, OP_KGd, OP_Wdq, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VPMOVD2M_KGwWdq, &BX_CPU_C::BxError, &BX_CPU_C::VPMOVD2M_KGwWdqR, BX_ISA_AVX512_DQ, OP_KGw, OP_Wdq, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VPMOVQ2M_KGbWdq, &BX_CPU_C::BxError, &BX_CPU_C::VPMOVQ2M_KGbWdqR, BX_ISA_AVX512_DQ, OP_KGb, OP_Wdq, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
// VexW alias
// VexW64 aliased

View File

@ -454,7 +454,7 @@ BX_CPP_INLINE void xmm_psignd(BxPackedXmmRegister *op1, const BxPackedXmmRegiste
BX_CPP_INLINE Bit32u xmm_pmovmskb(const BxPackedXmmRegister *op)
{
unsigned mask = 0;
Bit32u mask = 0;
if(op->xmmsbyte(0x0) < 0) mask |= 0x0001;
if(op->xmmsbyte(0x1) < 0) mask |= 0x0002;
@ -476,9 +476,25 @@ BX_CPP_INLINE Bit32u xmm_pmovmskb(const BxPackedXmmRegister *op)
return mask;
}
BX_CPP_INLINE Bit32u xmm_pmovmskw(const BxPackedXmmRegister *op)
{
Bit32u mask = 0;
if(op->xmm16s(0) < 0) mask |= 0x01;
if(op->xmm16s(1) < 0) mask |= 0x02;
if(op->xmm16s(2) < 0) mask |= 0x04;
if(op->xmm16s(3) < 0) mask |= 0x08;
if(op->xmm16s(4) < 0) mask |= 0x10;
if(op->xmm16s(5) < 0) mask |= 0x20;
if(op->xmm16s(6) < 0) mask |= 0x40;
if(op->xmm16s(7) < 0) mask |= 0x80;
return mask;
}
BX_CPP_INLINE Bit32u xmm_pmovmskd(const BxPackedXmmRegister *op)
{
unsigned mask = 0;
Bit32u mask = 0;
if(op->xmm32s(0) < 0) mask |= 0x1;
if(op->xmm32s(1) < 0) mask |= 0x2;
@ -490,7 +506,7 @@ BX_CPP_INLINE Bit32u xmm_pmovmskd(const BxPackedXmmRegister *op)
BX_CPP_INLINE Bit32u xmm_pmovmskq(const BxPackedXmmRegister *op)
{
unsigned mask = 0;
Bit32u mask = 0;
if(op->xmm32s(1) < 0) mask |= 0x1;
if(op->xmm32s(3) < 0) mask |= 0x2;
@ -498,6 +514,34 @@ BX_CPP_INLINE Bit32u xmm_pmovmskq(const BxPackedXmmRegister *op)
return mask;
}
BX_CPP_INLINE void xmm_pmovm2b(BxPackedXmmRegister *dst, Bit32u mask)
{
for (unsigned n=0; n < 16; n++, mask >>= 1) {
dst->xmmsbyte(n) = - (mask & 0x1);
}
}
BX_CPP_INLINE void xmm_pmovm2w(BxPackedXmmRegister *dst, Bit32u mask)
{
for (unsigned n=0; n < 8; n++, mask >>= 1) {
dst->xmm16s(n) = - (mask & 0x1);
}
}
BX_CPP_INLINE void xmm_pmovm2d(BxPackedXmmRegister *dst, Bit32u mask)
{
for (unsigned n=0; n < 4; n++, mask >>= 1) {
dst->xmm32s(n) = - (mask & 0x1);
}
}
BX_CPP_INLINE void xmm_pmovm2q(BxPackedXmmRegister *dst, Bit32u mask)
{
for (unsigned n=0; n < 2; n++, mask >>= 1) {
dst->xmm64s(n) = - (mask & 0x1);
}
}
// blend
BX_CPP_INLINE void xmm_pblendb(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2, Bit32u mask)