Implemented VCVTPS2PH AVX-512 instruction

Now only missed AVX-512 opcodes now are:

512.66.0F38.W0 2C VSCALEFPS
512.66.0F38.W1 2C VSCALEFPD
NDS.LIG.66.0F38.W0 2D VSCALESS
NDS.LIG.66.0F38.W1 2D VSCALESD

512.66.0F38.W0 4C VRCP14PS
512.66.0F38.W1 4C VRCP14PD
NDS.LIG.66.0F38.W0 4D VRCP14SS
NDS.LIG.66.0F38.W1 4D VRCP14SD
512.66.0F38.W0 4E VRSQRT14PS
512.66.0F38.W1 4E VRSQRT14PD
NDS.LIG.66.0F38.W0 4F VRSQRT14SS
NDS.LIG.66.0F38.W1 4F VRSQRT14SD

512.66.0F3A.W0 08 VRNDSCALEPS
512.66.0F3A.W1 09 VRNDSCALEPD
NDS.LIG.66.0F3A.W1 0A VRNDSCALESS
NDS.LIG.66.0F3A.W1 0B VRNDSCALESD
This commit is contained in:
Stanislav Shwartsman 2014-02-15 19:21:08 +00:00
parent 6c9d08f756
commit 7775483d5e
4 changed files with 74 additions and 2 deletions

View File

@ -567,4 +567,71 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTPH2PS_MASK_VpsWpsR(bxInstructi
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTPS2PH_MASK_WpsVpsIbR(bxInstruction_c *i)
{
BxPackedAvxRegister op = BX_READ_AVX_REG(i->src()), dst = BX_READ_AVX_REG(i->dst());
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
unsigned len = i->getVL();
Bit8u control = i->Ib();
status.flush_underflow_to_zero = 0; // ignore MXCSR.FUZ
// override MXCSR rounding mode with control coming from imm8
if ((control & 0x4) == 0)
status.float_rounding_mode = control & 0x3;
Bit32u opmask = BX_READ_16BIT_OPMASK(i->opmask());
for (unsigned n=0, mask = 0x1; n < DWORD_ELEMENTS(len); n++, mask <<= 1) {
if (opmask & mask)
dst.vmm16u(n) = float32_to_float16(op.vmm32u(n), status);
else if (i->isZeroMasking())
dst.vmm16u(n) = 0;
}
check_exceptionsSSE(get_exception_flags(status));
if (len == BX_VL128) {
BX_WRITE_XMM_REG_LO_QWORD_CLEAR_HIGH(i->dst(), dst.vmm64u(0));
}
else {
BX_WRITE_AVX_REGZ(i->dst(), dst, len >> 1); // write half vector
}
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTPS2PH_MASK_WpsVpsIbM(bxInstruction_c *i)
{
BxPackedAvxRegister op = BX_READ_AVX_REG(i->src()), result;
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
unsigned len = i->getVL();
Bit8u control = i->Ib();
status.flush_underflow_to_zero = 0; // ignore MXCSR.FUZ
// override MXCSR rounding mode with control coming from imm8
if ((control & 0x4) == 0)
status.float_rounding_mode = control & 0x3;
Bit32u opmask = BX_READ_16BIT_OPMASK(i->opmask());
opmask &= (1 << DWORD_ELEMENTS(len)) - 1;
for (unsigned n=0, mask = 0x1; n < DWORD_ELEMENTS(len); n++, mask <<= 1) {
if (opmask & mask)
result.vmm16u(n) = float32_to_float16(op.vmm32u(n), status);
}
check_exceptionsSSE(get_exception_flags(status));
bx_address eaddr = BX_CPU_CALL_METHODR(i->ResolveModrm, (i));
avx_masked_store16(i, eaddr, &result, opmask);
BX_NEXT_INSTR(i);
}
#endif

View File

@ -3300,6 +3300,8 @@ public: // for now...
BX_SMF BX_INSF_TYPE VCVTDQ2PD_MASK_VpdWdqR(bxInstruction_c *i) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VCVTPH2PS_MASK_VpsWpsR(bxInstruction_c *i) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VCVTPS2PH_MASK_WpsVpsIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VCVTPS2PH_MASK_WpsVpsIbM(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPADDD_MASK_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPSUBD_MASK_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);

View File

@ -1728,8 +1728,8 @@ static const BxOpcodeInfo_t BxOpcodeTableEVEX[256*3*2] = {
/* 1B */ { BxVexW1 | BxVexL1 | BxPrefixSSE66 | BxImmediate_Ib, BX_IA_V512_VEXTRACTF64x4_WpdVpdIb_Kmask },
/* 1C k0 */ { 0, BX_IA_ERROR },
/* 1C */ { 0, BX_IA_ERROR },
/* 1D k0 */ { 0, BX_IA_ERROR },
/* 1D */ { 0, BX_IA_ERROR },
/* 1D k0 */ { BxVexW0 | BxPrefixSSE66 | BxImmediate_Ib, BX_IA_V512_VCVTPS2PH_WpsVpsIb },
/* 1D */ { BxVexW0 | BxPrefixSSE66 | BxImmediate_Ib, BX_IA_V512_VCVTPS2PH_WpsVpsIb_Kmask },
/* 1E k0 */ { BxAliasVexW | BxPrefixSSE66 | BxImmediate_Ib, BX_IA_V512_VPCMPUD_KGwHdqWdqIb },
/* 1E */ { BxAliasVexW | BxPrefixSSE66 | BxImmediate_Ib, BX_IA_V512_VPCMPUD_KGwHdqWdqIb },
/* 1F k0 */ { BxAliasVexW | BxPrefixSSE66 | BxImmediate_Ib, BX_IA_V512_VPCMPD_KGwHdqWdqIb },

View File

@ -2604,6 +2604,9 @@ bx_define_opcode(BX_IA_V512_VCVTTPD2DQ_VdqWpd_Kmask, &BX_CPU_C::LOAD_BROADCAST_M
bx_define_opcode(BX_IA_V512_VCVTPH2PS_VpsWps, &BX_CPU_C::LOAD_Half_Vector, &BX_CPU_C::VCVTPH2PS_VpsWpsR, BX_ISA_AVX512, OP_Vps, OP_mVHV, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VCVTPH2PS_VpsWps_Kmask, &BX_CPU_C::LOAD_Half_Vector, &BX_CPU_C::VCVTPH2PS_MASK_VpsWpsR, BX_ISA_AVX512, OP_Vps, OP_mVHV, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VCVTPS2PH_WpsVpsIb, &BX_CPU_C::VCVTPS2PH_WpsVpsIb, &BX_CPU_C::VCVTPS2PH_WpsVpsIb, BX_ISA_AVX512, OP_mVHV, OP_Vps, OP_Ib, OP_NONE, BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VCVTPS2PH_WpsVpsIb_Kmask, &BX_CPU_C::VCVTPS2PH_MASK_WpsVpsIbM, &BX_CPU_C::VCVTPS2PH_MASK_WpsVpsIbR, BX_ISA_AVX512, OP_mVHV, OP_Vps, OP_Ib, OP_NONE, BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VMOVAPS_VpsWps, &BX_CPU_C::VMOVAPS_VpsWpsM, &BX_CPU_C::VMOVAPS_VpsWpsR, BX_ISA_AVX512, OP_Vps, OP_mVps, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VMOVAPS_VpsWps_Kmask, &BX_CPU_C::VMOVAPS_MASK_VpsWpsM, &BX_CPU_C::VMOVAPS_MASK_VpsWpsR, BX_ISA_AVX512, OP_Vps, OP_mVps, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VMOVAPS_WpsVps, &BX_CPU_C::VMOVAPS_WpsVpsM, &BX_CPU_C::VMOVAPS_VpsWpsR, BX_ISA_AVX512, OP_mVps, OP_Vps, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)