implemented AVX-512 VINSERTF*/VINSERTI* opcodes

This commit is contained in:
Stanislav Shwartsman 2013-12-17 20:38:19 +00:00
parent 8914ff6e37
commit da0cec4300
5 changed files with 76 additions and 35 deletions

View File

@ -390,13 +390,6 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VBROADCASTF128_VdqMdq(bxInstructio
BxPackedAvxRegister dst;
BxPackedXmmRegister src;
unsigned len = i->getVL();
#if BX_SUPPORT_EVEX
if (len == BX_VL128) {
BX_ERROR(("%s: vector length must be >= 256 bit", i->getIaOpcodeNameShort()));
exception(BX_UD_EXCEPTION, 0);
}
#endif
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
read_virtual_xmmword(i->seg(), eaddr, (Bit8u*) &src);
@ -497,7 +490,7 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VINSERTF128_VdqHdqWdqIbR(bxInstruc
{
BxPackedYmmRegister op1 = BX_READ_YMM_REG(i->src1());
op1.ymm128(i->Ib() & 1) = BX_READ_XMM_REG(i->src2());
op1.ymm128(i->Ib() & 0x1) = BX_READ_XMM_REG(i->src2());
BX_WRITE_YMM_REGZ(i->dst(), op1);

View File

@ -519,6 +519,50 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VSHUFF64x2_MASK_VpdHpdWpdIbR(bxIns
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VINSERTF32x4_MASK_VpsHpsWpsIbR(bxInstruction_c *i)
{
unsigned len = i->getVL();
if (len != BX_VL512) {
BX_ERROR(("%s: vector length must be 512 bit", i->getIaOpcodeNameShort()));
exception(BX_UD_EXCEPTION, 0);
}
BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1());
op1.vmm128(i->Ib() & 0x3) = BX_READ_XMM_REG(i->src2());
if (i->opmask()) {
avx512_write_regd_masked(i, &op1, len, BX_READ_16BIT_OPMASK(i->opmask()));
}
else {
BX_WRITE_AVX_REGZ(i->dst(), op1, len);
}
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VINSERTF64x4_MASK_VpdHpdWpdIbR(bxInstruction_c *i)
{
unsigned len = i->getVL();
if (len != BX_VL512) {
BX_ERROR(("%s: vector length must be 512 bit", i->getIaOpcodeNameShort()));
exception(BX_UD_EXCEPTION, 0);
}
BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1());
op1.vmm256(i->Ib() & 0x1) = BX_READ_YMM_REG(i->src2());
if (i->opmask()) {
avx512_write_regq_masked(i, &op1, len, BX_READ_8BIT_OPMASK(i->opmask()));
}
else {
BX_WRITE_AVX_REGZ(i->dst(), op1, len);
}
BX_NEXT_INSTR(i);
}
// broadcast
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VPBROADCASTD_MASK_VdqWdR(bxInstruction_c *i)
@ -589,13 +633,6 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VBROADCASTF32x4_MASK_VpsMps(bxInst
BxPackedXmmRegister src;
unsigned len = i->getVL();
#if BX_SUPPORT_EVEX
if (len == BX_VL128) {
BX_ERROR(("%s: vector length must be >= 256 bit", i->getIaOpcodeNameShort()));
exception(BX_UD_EXCEPTION, 0);
}
#endif
Bit32u opmask = BX_READ_16BIT_OPMASK(i->opmask());
if (opmask != 0) {
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));

View File

@ -3374,6 +3374,11 @@ public: // for now...
BX_SMF BX_INSF_TYPE VSHUFF32x4_MASK_VpsHpsWpsIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VSHUFF64x2_MASK_VpdHpdWpdIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VINSERTF32x4_MASK_VpsHpsWpsIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VINSERTI32x4_MASK_VdqHdqWdqIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VINSERTF64x4_MASK_VpdHpdWpdIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VINSERTI64x4_MASK_VdqHdqWdqIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPBROADCASTD_MASK_VdqWdR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPBROADCASTQ_MASK_VdqWqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);

View File

@ -956,10 +956,10 @@ static const BxOpcodeInfo_t BxOpcodeTableEVEX[256*3*2] = {
/* 18 */ { BxVexW0 | BxPrefixSSE66, BX_IA_V512_VBROADCASTSS_VpsWss_Kmask },
/* 19 k0 */ { BxVexW1 | BxPrefixSSE66, BX_IA_V512_VBROADCASTSD_VpdWsd },
/* 19 */ { BxVexW1 | BxPrefixSSE66, BX_IA_V512_VBROADCASTSD_VpdWsd_Kmask },
/* 1A k0 */ { BxVexW0 | BxPrefixSSE66, BX_IA_V512_VBROADCASTF32x4_VpsWps },
/* 1A */ { BxVexW0 | BxPrefixSSE66, BX_IA_V512_VBROADCASTF32x4_VpsWps_Kmask },
/* 1B k0 */ { BxVexW1 | BxPrefixSSE66, BX_IA_V512_VBROADCASTF64x4_VpdWpd },
/* 1B */ { BxVexW1 | BxPrefixSSE66, BX_IA_V512_VBROADCASTF64x4_VpdWpd_Kmask },
/* 1A k0 */ { BxVexW0 | BxVexL1 | BxPrefixSSE66, BX_IA_V512_VBROADCASTF32x4_VpsWps },
/* 1A */ { BxVexW0 | BxVexL1 | BxPrefixSSE66, BX_IA_V512_VBROADCASTF32x4_VpsWps_Kmask },
/* 1B k0 */ { BxVexW1 | BxVexL1 | BxPrefixSSE66, BX_IA_V512_VBROADCASTF64x4_VpdWpd },
/* 1B */ { BxVexW1 | BxVexL1 | BxPrefixSSE66, BX_IA_V512_VBROADCASTF64x4_VpdWpd_Kmask },
/* 1C k0 */ { 0, BX_IA_ERROR },
/* 1C */ { 0, BX_IA_ERROR },
/* 1D k0 */ { 0, BX_IA_ERROR },
@ -1084,10 +1084,10 @@ static const BxOpcodeInfo_t BxOpcodeTableEVEX[256*3*2] = {
/* 58 */ { BxVexW0 | BxPrefixSSE66, BX_IA_V512_VPBROADCASTD_VdqWd_Kmask },
/* 59 k0 */ { BxVexW1 | BxPrefixSSE66, BX_IA_V512_VPBROADCASTQ_VdqWq },
/* 59 */ { BxVexW1 | BxPrefixSSE66, BX_IA_V512_VPBROADCASTQ_VdqWq_Kmask },
/* 5A k0 */ { BxVexW0 | BxPrefixSSE66, BX_IA_V512_VBROADCASTI32x4_VdqWdq },
/* 5A */ { BxVexW0 | BxPrefixSSE66, BX_IA_V512_VBROADCASTI32x4_VdqWdq_Kmask },
/* 5B k0 */ { BxVexW1 | BxPrefixSSE66, BX_IA_V512_VBROADCASTI64x4_VdqWdq },
/* 5B */ { BxVexW1 | BxPrefixSSE66, BX_IA_V512_VBROADCASTI64x4_VdqWdq_Kmask },
/* 5A k0 */ { BxVexW0 | BxVexL1 | BxPrefixSSE66, BX_IA_V512_VBROADCASTI32x4_VdqWdq },
/* 5A */ { BxVexW0 | BxVexL1 | BxPrefixSSE66, BX_IA_V512_VBROADCASTI32x4_VdqWdq_Kmask },
/* 5B k0 */ { BxVexW1 | BxVexL1 | BxPrefixSSE66, BX_IA_V512_VBROADCASTI64x4_VdqWdq },
/* 5B */ { BxVexW1 | BxVexL1 | BxPrefixSSE66, BX_IA_V512_VBROADCASTI64x4_VdqWdq_Kmask },
/* 5C k0 */ { 0, BX_IA_ERROR },
/* 5C */ { 0, BX_IA_ERROR },
/* 5D k0 */ { 0, BX_IA_ERROR },
@ -1466,12 +1466,12 @@ static const BxOpcodeInfo_t BxOpcodeTableEVEX[256*3*2] = {
/* 16 */ { 0, BX_IA_ERROR },
/* 17 k0 */ { BxVexW0 | BxPrefixSSE66 | BxImmediate_Ib, BX_IA_V512_VEXTRACTPS_EdVpsIb },
/* 17 */ { 0, BX_IA_ERROR }, // #UD
/* 18 k0 */ { 0, BX_IA_ERROR },
/* 18 */ { 0, BX_IA_ERROR },
/* 18 k0 */ { BxVexW0 | BxVexL1 | BxPrefixSSE66 | BxImmediate_Ib, BX_IA_V512_VINSERTF32x4_VpsHpsWpsIb_Kmask },
/* 18 */ { BxVexW0 | BxVexL1 | BxPrefixSSE66 | BxImmediate_Ib, BX_IA_V512_VINSERTF32x4_VpsHpsWpsIb_Kmask },
/* 19 k0 */ { 0, BX_IA_ERROR },
/* 19 */ { 0, BX_IA_ERROR },
/* 1A k0 */ { 0, BX_IA_ERROR },
/* 1A */ { 0, BX_IA_ERROR },
/* 1A k0 */ { BxVexW0 | BxVexL1 | BxPrefixSSE66 | BxImmediate_Ib, BX_IA_V512_VINSERTF64x4_VpdHpdWpdIb_Kmask },
/* 1A */ { BxVexW0 | BxVexL1 | BxPrefixSSE66 | BxImmediate_Ib, BX_IA_V512_VINSERTF64x4_VpdHpdWpdIb_Kmask },
/* 1B k0 */ { 0, BX_IA_ERROR },
/* 1B */ { 0, BX_IA_ERROR },
/* 1C k0 */ { 0, BX_IA_ERROR },
@ -1488,8 +1488,8 @@ static const BxOpcodeInfo_t BxOpcodeTableEVEX[256*3*2] = {
/* 21 */ { 0, BX_IA_ERROR }, // #UD
/* 22 k0 */ { 0, BX_IA_ERROR },
/* 22 */ { 0, BX_IA_ERROR },
/* 23 k0 */ { BxAliasVexW | BxPrefixSSE66 | BxImmediate_Ib, BX_IA_V512_VSHUFF32x4_VpsHpsWpsIb_Kmask },
/* 23 */ { BxAliasVexW | BxPrefixSSE66 | BxImmediate_Ib, BX_IA_V512_VSHUFF32x4_VpsHpsWpsIb_Kmask },
/* 23 k0 */ { BxAliasVexW | BxVexL1 | BxPrefixSSE66 | BxImmediate_Ib, BX_IA_V512_VSHUFF32x4_VpsHpsWpsIb_Kmask },
/* 23 */ { BxAliasVexW | BxVexL1 | BxPrefixSSE66 | BxImmediate_Ib, BX_IA_V512_VSHUFF32x4_VpsHpsWpsIb_Kmask },
/* 24 k0 */ { 0, BX_IA_ERROR },
/* 24 */ { 0, BX_IA_ERROR },
/* 25 k0 */ { BxAliasVexW | BxPrefixSSE66 | BxImmediate_Ib, BX_IA_V512_VPTERNLOGD_VdqHdqWdqIb },
@ -1530,12 +1530,12 @@ static const BxOpcodeInfo_t BxOpcodeTableEVEX[256*3*2] = {
/* 36 */ { 0, BX_IA_ERROR },
/* 37 k0 */ { 0, BX_IA_ERROR },
/* 37 */ { 0, BX_IA_ERROR },
/* 38 k0 */ { 0, BX_IA_ERROR },
/* 38 */ { 0, BX_IA_ERROR },
/* 38 k0 */ { BxVexW0 | BxVexL1 | BxPrefixSSE66 | BxImmediate_Ib, BX_IA_V512_VINSERTI32x4_VdqHdqWdqIb_Kmask },
/* 38 */ { BxVexW0 | BxVexL1 | BxPrefixSSE66 | BxImmediate_Ib, BX_IA_V512_VINSERTI32x4_VdqHdqWdqIb_Kmask },
/* 39 k0 */ { 0, BX_IA_ERROR },
/* 39 */ { 0, BX_IA_ERROR },
/* 3A k0 */ { 0, BX_IA_ERROR },
/* 3A */ { 0, BX_IA_ERROR },
/* 3A k0 */ { BxVexW0 | BxVexL1 | BxPrefixSSE66 | BxImmediate_Ib, BX_IA_V512_VINSERTI64x4_VdqHdqWdqIb_Kmask },
/* 3A */ { BxVexW0 | BxVexL1 | BxPrefixSSE66 | BxImmediate_Ib, BX_IA_V512_VINSERTI64x4_VdqHdqWdqIb_Kmask },
/* 3B k0 */ { 0, BX_IA_ERROR },
/* 3B */ { 0, BX_IA_ERROR },
/* 3C k0 */ { 0, BX_IA_ERROR },
@ -1552,8 +1552,8 @@ static const BxOpcodeInfo_t BxOpcodeTableEVEX[256*3*2] = {
/* 41 */ { 0, BX_IA_ERROR },
/* 42 k0 */ { 0, BX_IA_ERROR },
/* 42 */ { 0, BX_IA_ERROR },
/* 43 k0 */ { BxAliasVexW | BxPrefixSSE66 | BxImmediate_Ib, BX_IA_V512_VSHUFI32x4_VdqHdqWdqIb_Kmask },
/* 43 */ { BxAliasVexW | BxPrefixSSE66 | BxImmediate_Ib, BX_IA_V512_VSHUFI32x4_VdqHdqWdqIb_Kmask },
/* 43 k0 */ { BxAliasVexW | BxVexL1 | BxPrefixSSE66 | BxImmediate_Ib, BX_IA_V512_VSHUFI32x4_VdqHdqWdqIb_Kmask },
/* 43 */ { BxAliasVexW | BxVexL1 | BxPrefixSSE66 | BxImmediate_Ib, BX_IA_V512_VSHUFI32x4_VdqHdqWdqIb_Kmask },
/* 44 k0 */ { 0, BX_IA_ERROR },
/* 44 */ { 0, BX_IA_ERROR },
/* 45 k0 */ { 0, BX_IA_ERROR },

View File

@ -2682,6 +2682,12 @@ bx_define_opcode(BX_IA_V512_VMOVQ_VqWq, &BX_CPU_C::MOVSD_VsdWsdM, &BX_CPU_C::MOV
bx_define_opcode(BX_IA_V512_VINSERTPS_VpsWssIb, &BX_CPU_C::INSERTPS_VpsHpsWssIb, &BX_CPU_C::INSERTPS_VpsHpsWssIb, BX_ISA_AVX512, OP_Vps, OP_Hps, OP_Wss, OP_Ib, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VEXTRACTPS_EdVpsIb, &BX_CPU_C::EXTRACTPS_EdVpsIbM, &BX_CPU_C::EXTRACTPS_EdVpsIbR, BX_ISA_AVX512, OP_Ed, OP_Vps, OP_Ib, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VINSERTF32x4_VpsHpsWpsIb_Kmask, &BX_CPU_C::LOADU_Wdq, &BX_CPU_C::VINSERTF32x4_MASK_VpsHpsWpsIbR, BX_ISA_AVX512, OP_Vps, OP_Hps, OP_Wps, OP_Ib, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VINSERTF64x4_VpdHpdWpdIb_Kmask, &BX_CPU_C::LOAD_Half_Vector, &BX_CPU_C::VINSERTF64x4_MASK_VpdHpdWpdIbR, BX_ISA_AVX512, OP_Vpd, OP_Hpd, OP_Wpd, OP_Ib, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VINSERTI32x4_VdqHdqWdqIb_Kmask, &BX_CPU_C::LOADU_Wdq, &BX_CPU_C::VINSERTF32x4_MASK_VpsHpsWpsIbR, BX_ISA_AVX512, OP_Vdq, OP_Hdq, OP_Wdq, OP_Ib, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VINSERTI64x4_VdqHdqWdqIb_Kmask, &BX_CPU_C::LOAD_Half_Vector, &BX_CPU_C::VINSERTF64x4_MASK_VpdHpdWpdIbR, BX_ISA_AVX512, OP_Vdq, OP_Hdq, OP_Wdq, OP_Ib, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
// VexW alias
bx_define_opcode(BX_IA_V512_VPADDD_VdqHdqWdq, &BX_CPU_C::LOAD_BROADCAST_VectorD, &BX_CPU_C::VPADDD_VdqHdqWdqR, BX_ISA_AVX512, OP_Vdq, OP_Hdq, OP_Wdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE)
bx_define_opcode(BX_IA_V512_VPADDQ_VdqHdqWdq, &BX_CPU_C::LOAD_BROADCAST_VectorQ, &BX_CPU_C::VPADDQ_VdqHdqWdqR, BX_ISA_AVX512, OP_Vdq, OP_Hdq, OP_Wdq, OP_NONE, BX_PREPARE_EVEX_NO_SAE)