implemented two AVX512 unsigned CVT instructions
This commit is contained in:
parent
ef7e02c12c
commit
e200d04ad5
@ -380,7 +380,6 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTUSI2SS_VssEdR(bxInstruction_c
|
|||||||
check_exceptionsSSE(get_exception_flags(status));
|
check_exceptionsSSE(get_exception_flags(status));
|
||||||
|
|
||||||
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
|
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
|
||||||
|
|
||||||
BX_NEXT_INSTR(i);
|
BX_NEXT_INSTR(i);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -397,7 +396,6 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTUSI2SS_VssEqR(bxInstruction_c
|
|||||||
check_exceptionsSSE(get_exception_flags(status));
|
check_exceptionsSSE(get_exception_flags(status));
|
||||||
|
|
||||||
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
|
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
|
||||||
|
|
||||||
BX_NEXT_INSTR(i);
|
BX_NEXT_INSTR(i);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -423,6 +421,99 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTUSI2SD_VsdEqR(bxInstruction_c
|
|||||||
check_exceptionsSSE(get_exception_flags(status));
|
check_exceptionsSSE(get_exception_flags(status));
|
||||||
|
|
||||||
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
|
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
|
||||||
|
BX_NEXT_INSTR(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTUDQ2PS_VpsWdqR(bxInstruction_c *i)
|
||||||
|
{
|
||||||
|
BxPackedAvxRegister op = BX_READ_AVX_REG(i->src());
|
||||||
|
unsigned len = i->getVL(), num_elements = 4*len;
|
||||||
|
|
||||||
|
float_status_t status;
|
||||||
|
mxcsr_to_softfloat_status_word(status, MXCSR);
|
||||||
|
|
||||||
|
for (unsigned n=0; n < num_elements; n++) {
|
||||||
|
op.vmm32u(n) = uint32_to_float32(op.vmm32u(n), status);
|
||||||
|
}
|
||||||
|
|
||||||
|
check_exceptionsSSE(get_exception_flags(status));
|
||||||
|
|
||||||
|
BX_WRITE_AVX_REGZ(i->dst(), op, len);
|
||||||
|
BX_NEXT_INSTR(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTUDQ2PS_MASK_VpsWdqR(bxInstruction_c *i)
|
||||||
|
{
|
||||||
|
BxPackedAvxRegister op = BX_READ_AVX_REG(i->src());
|
||||||
|
unsigned len = i->getVL(), num_elements = 4*len;
|
||||||
|
|
||||||
|
Bit32u opmask = BX_READ_16BIT_OPMASK(i->opmask());
|
||||||
|
|
||||||
|
float_status_t status;
|
||||||
|
mxcsr_to_softfloat_status_word(status, MXCSR);
|
||||||
|
|
||||||
|
for (unsigned n=0, mask = 0x1; n < num_elements; n++, mask <<= 1) {
|
||||||
|
if (opmask & mask) {
|
||||||
|
op.vmm32u(n) = uint32_to_float32(op.vmm32u(n), status);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
op.vmm32u(n) = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
check_exceptionsSSE(get_exception_flags(status));
|
||||||
|
|
||||||
|
if (! i->isZeroMasking()) {
|
||||||
|
for (unsigned n=0; n < len; n++, opmask >>= 4)
|
||||||
|
xmm_blendps(&BX_READ_AVX_REG_LANE(i->dst(), n), &op.vmm128(n), opmask);
|
||||||
|
BX_CLEAR_AVX_REGZ(i->dst(), len);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
BX_WRITE_AVX_REGZ(i->dst(), op, len);
|
||||||
|
}
|
||||||
|
|
||||||
|
BX_NEXT_INSTR(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTUDQ2PD_VpdWdqR(bxInstruction_c *i)
|
||||||
|
{
|
||||||
|
BxPackedYmmRegister op = BX_READ_YMM_REG(i->src());
|
||||||
|
BxPackedAvxRegister result;
|
||||||
|
unsigned len = i->getVL(), num_elements = 2*len;
|
||||||
|
|
||||||
|
for (unsigned n=0; n < num_elements; n++) {
|
||||||
|
result.vmm64u(n) = uint32_to_float64(op.ymm32u(n));
|
||||||
|
}
|
||||||
|
|
||||||
|
BX_WRITE_AVX_REGZ(i->dst(), result, len);
|
||||||
|
BX_NEXT_INSTR(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTUDQ2PD_MASK_VpdWdqR(bxInstruction_c *i)
|
||||||
|
{
|
||||||
|
BxPackedYmmRegister op = BX_READ_YMM_REG(i->src());
|
||||||
|
BxPackedAvxRegister result;
|
||||||
|
unsigned len = i->getVL(), num_elements = 2*len;
|
||||||
|
|
||||||
|
Bit32u opmask = BX_READ_8BIT_OPMASK(i->opmask());
|
||||||
|
|
||||||
|
for (unsigned n=0, mask = 0x1; n < num_elements; n++, mask <<= 1) {
|
||||||
|
if (opmask & mask) {
|
||||||
|
result.vmm64u(n) = uint32_to_float64(op.ymm32u(n));
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
result.vmm64u(n) = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (! i->isZeroMasking()) {
|
||||||
|
for (unsigned n=0; n < len; n++, opmask >>= 2)
|
||||||
|
xmm_blendpd(&BX_READ_AVX_REG_LANE(i->dst(), n), &result.vmm128(n), opmask);
|
||||||
|
BX_CLEAR_AVX_REGZ(i->dst(), len);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
BX_WRITE_AVX_REGZ(i->dst(), result, len);
|
||||||
|
}
|
||||||
|
|
||||||
BX_NEXT_INSTR(i);
|
BX_NEXT_INSTR(i);
|
||||||
}
|
}
|
||||||
|
@ -3235,6 +3235,11 @@ public: // for now...
|
|||||||
BX_SMF BX_INSF_TYPE VCVTUSI2SD_VsdEqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
BX_SMF BX_INSF_TYPE VCVTUSI2SD_VsdEqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||||
BX_SMF BX_INSF_TYPE VCVTUSI2SS_VssEqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
BX_SMF BX_INSF_TYPE VCVTUSI2SS_VssEqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||||
|
|
||||||
|
BX_SMF BX_INSF_TYPE VCVTUDQ2PS_VpsWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||||
|
BX_SMF BX_INSF_TYPE VCVTUDQ2PS_MASK_VpsWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||||
|
BX_SMF BX_INSF_TYPE VCVTUDQ2PD_VpdWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||||
|
BX_SMF BX_INSF_TYPE VCVTUDQ2PD_MASK_VpdWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||||
|
|
||||||
BX_SMF BX_INSF_TYPE VPADDD_MASK_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
BX_SMF BX_INSF_TYPE VPADDD_MASK_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||||
BX_SMF BX_INSF_TYPE VPSUBD_MASK_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
BX_SMF BX_INSF_TYPE VPSUBD_MASK_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||||
BX_SMF BX_INSF_TYPE VPANDD_MASK_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
BX_SMF BX_INSF_TYPE VPANDD_MASK_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||||
|
@ -285,6 +285,18 @@ static const BxOpcodeInfo_t BxOpcodeGroupEVEX_0f6f_Mask[3] = {
|
|||||||
/* F2 */ { 0, BX_IA_ERROR }
|
/* F2 */ { 0, BX_IA_ERROR }
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static const BxOpcodeInfo_t BxOpcodeGroupEVEX_0f7a[3] = {
|
||||||
|
/* 66 */ { 0, BX_IA_ERROR },
|
||||||
|
/* F3 */ { BxVexW0, BX_IA_V512_VCVTUDQ2PD_VpdWdq },
|
||||||
|
/* F2 */ { BxVexW0, BX_IA_V512_VCVTUDQ2PS_VpsWdq }
|
||||||
|
};
|
||||||
|
|
||||||
|
static const BxOpcodeInfo_t BxOpcodeGroupEVEX_0f7a_Mask[3] = {
|
||||||
|
/* 66 */ { 0, BX_IA_ERROR },
|
||||||
|
/* F3 */ { BxVexW0, BX_IA_V512_VCVTUDQ2PD_VpdWdq_Kmask },
|
||||||
|
/* F2 */ { BxVexW0, BX_IA_V512_VCVTUDQ2PS_VpsWdq_Kmask }
|
||||||
|
};
|
||||||
|
|
||||||
static const BxOpcodeInfo_t BxOpcodeGroupEVEX_0f7b[3] = {
|
static const BxOpcodeInfo_t BxOpcodeGroupEVEX_0f7b[3] = {
|
||||||
/* 66 */ { 0, BX_IA_ERROR },
|
/* 66 */ { 0, BX_IA_ERROR },
|
||||||
/* F3 */ { BxAliasVexW64, BX_IA_V512_VCVTUSI2SS_VssEd },
|
/* F3 */ { BxAliasVexW64, BX_IA_V512_VCVTUSI2SS_VssEd },
|
||||||
@ -634,8 +646,8 @@ static const BxOpcodeInfo_t BxOpcodeTableEVEX[256*3*2] = {
|
|||||||
/* 78 */ { 0, BX_IA_ERROR },
|
/* 78 */ { 0, BX_IA_ERROR },
|
||||||
/* 79 k0 */ { 0, BX_IA_ERROR },
|
/* 79 k0 */ { 0, BX_IA_ERROR },
|
||||||
/* 79 */ { 0, BX_IA_ERROR },
|
/* 79 */ { 0, BX_IA_ERROR },
|
||||||
/* 7A k0 */ { 0, BX_IA_ERROR },
|
/* 7A k0 */ { BxPrefixSSE, BX_IA_ERROR, BxOpcodeGroupEVEX_0f7a },
|
||||||
/* 7A */ { 0, BX_IA_ERROR },
|
/* 7A */ { BxPrefixSSE, BX_IA_ERROR, BxOpcodeGroupEVEX_0f7a_Mask },
|
||||||
/* 7B k0 */ { BxPrefixSSE, BX_IA_ERROR, BxOpcodeGroupEVEX_0f7b },
|
/* 7B k0 */ { BxPrefixSSE, BX_IA_ERROR, BxOpcodeGroupEVEX_0f7b },
|
||||||
/* 7B */ { 0, BX_IA_ERROR }, // #UD
|
/* 7B */ { 0, BX_IA_ERROR }, // #UD
|
||||||
/* 7C k0 */ { 0, BX_IA_ERROR },
|
/* 7C k0 */ { 0, BX_IA_ERROR },
|
||||||
|
@ -2563,6 +2563,12 @@ bx_define_opcode(BX_IA_V512_VCOMISS_VssWss, &BX_CPU_C::LOAD_Wss, &BX_CPU_C::COMI
|
|||||||
bx_define_opcode(BX_IA_V512_VUCOMISD_VsdWsd, &BX_CPU_C::LOAD_Wsd, &BX_CPU_C::UCOMISD_VsdWsdR, BX_ISA_AVX512, OP_Vsd, OP_Wsd, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_BROADCAST)
|
bx_define_opcode(BX_IA_V512_VUCOMISD_VsdWsd, &BX_CPU_C::LOAD_Wsd, &BX_CPU_C::UCOMISD_VsdWsdR, BX_ISA_AVX512, OP_Vsd, OP_Wsd, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_BROADCAST)
|
||||||
bx_define_opcode(BX_IA_V512_VCOMISD_VsdWsd, &BX_CPU_C::LOAD_Wsd, &BX_CPU_C::COMISD_VsdWsdR, BX_ISA_AVX512, OP_Vsd, OP_Wsd, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_BROADCAST)
|
bx_define_opcode(BX_IA_V512_VCOMISD_VsdWsd, &BX_CPU_C::LOAD_Wsd, &BX_CPU_C::COMISD_VsdWsdR, BX_ISA_AVX512, OP_Vsd, OP_Wsd, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_BROADCAST)
|
||||||
|
|
||||||
|
bx_define_opcode(BX_IA_V512_VCVTUDQ2PD_VpdWdq, &BX_CPU_C::LOAD_BROADCAST_VectorD, &BX_CPU_C::VCVTUDQ2PD_VpdWdqR, BX_ISA_AVX512, OP_Vpd, OP_Wdq, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE)
|
||||||
|
bx_define_opcode(BX_IA_V512_VCVTUDQ2PS_VpsWdq, &BX_CPU_C::LOAD_BROADCAST_VectorD, &BX_CPU_C::VCVTUDQ2PS_VpsWdqR, BX_ISA_AVX512, OP_Vps, OP_Wdq, OP_NONE, OP_NONE, BX_PREPARE_EVEX)
|
||||||
|
|
||||||
|
bx_define_opcode(BX_IA_V512_VCVTUDQ2PD_VpdWdq_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorD, &BX_CPU_C::VCVTUDQ2PD_MASK_VpdWdqR, BX_ISA_AVX512, OP_Vpd, OP_Wdq, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE)
|
||||||
|
bx_define_opcode(BX_IA_V512_VCVTUDQ2PS_VpsWdq_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorD, &BX_CPU_C::VCVTUDQ2PS_MASK_VpsWdqR, BX_ISA_AVX512, OP_Vps, OP_Wdq, OP_NONE, OP_NONE, BX_PREPARE_EVEX)
|
||||||
|
|
||||||
bx_define_opcode(BX_IA_V512_VMOVAPS_VpsWps, &BX_CPU_C::VMOVAPS_VpsWpsM, &BX_CPU_C::VMOVAPS_VpsWpsR, BX_ISA_AVX512, OP_Vps, OP_Wps, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
|
bx_define_opcode(BX_IA_V512_VMOVAPS_VpsWps, &BX_CPU_C::VMOVAPS_VpsWpsM, &BX_CPU_C::VMOVAPS_VpsWpsR, BX_ISA_AVX512, OP_Vps, OP_Wps, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
|
||||||
bx_define_opcode(BX_IA_V512_VMOVAPS_VpsWps_Kmask, &BX_CPU_C::VMOVAPS_MASK_VpsWpsM, &BX_CPU_C::VMOVAPS_MASK_VpsWpsR, BX_ISA_AVX512, OP_Vps, OP_Wps, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
|
bx_define_opcode(BX_IA_V512_VMOVAPS_VpsWps_Kmask, &BX_CPU_C::VMOVAPS_MASK_VpsWpsM, &BX_CPU_C::VMOVAPS_MASK_VpsWpsR, BX_ISA_AVX512, OP_Vps, OP_Wps, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
|
||||||
bx_define_opcode(BX_IA_V512_VMOVAPS_WpsVps, &BX_CPU_C::VMOVAPS_WpsVpsM, &BX_CPU_C::VMOVAPS_VpsWpsR, BX_ISA_AVX512, OP_Wps, OP_Vps, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
|
bx_define_opcode(BX_IA_V512_VMOVAPS_WpsVps, &BX_CPU_C::VMOVAPS_WpsVpsM, &BX_CPU_C::VMOVAPS_VpsWpsR, BX_ISA_AVX512, OP_Wps, OP_Vps, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user