implemented two AVX512 unsigned CVT instructions
This commit is contained in:
parent
ef7e02c12c
commit
e200d04ad5
@ -380,7 +380,6 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTUSI2SS_VssEdR(bxInstruction_c
|
||||
check_exceptionsSSE(get_exception_flags(status));
|
||||
|
||||
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
|
||||
|
||||
BX_NEXT_INSTR(i);
|
||||
}
|
||||
|
||||
@ -397,7 +396,6 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTUSI2SS_VssEqR(bxInstruction_c
|
||||
check_exceptionsSSE(get_exception_flags(status));
|
||||
|
||||
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
|
||||
|
||||
BX_NEXT_INSTR(i);
|
||||
}
|
||||
|
||||
@ -423,6 +421,99 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTUSI2SD_VsdEqR(bxInstruction_c
|
||||
check_exceptionsSSE(get_exception_flags(status));
|
||||
|
||||
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
|
||||
BX_NEXT_INSTR(i);
|
||||
}
|
||||
|
||||
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTUDQ2PS_VpsWdqR(bxInstruction_c *i)
|
||||
{
|
||||
BxPackedAvxRegister op = BX_READ_AVX_REG(i->src());
|
||||
unsigned len = i->getVL(), num_elements = 4*len;
|
||||
|
||||
float_status_t status;
|
||||
mxcsr_to_softfloat_status_word(status, MXCSR);
|
||||
|
||||
for (unsigned n=0; n < num_elements; n++) {
|
||||
op.vmm32u(n) = uint32_to_float32(op.vmm32u(n), status);
|
||||
}
|
||||
|
||||
check_exceptionsSSE(get_exception_flags(status));
|
||||
|
||||
BX_WRITE_AVX_REGZ(i->dst(), op, len);
|
||||
BX_NEXT_INSTR(i);
|
||||
}
|
||||
|
||||
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTUDQ2PS_MASK_VpsWdqR(bxInstruction_c *i)
|
||||
{
|
||||
BxPackedAvxRegister op = BX_READ_AVX_REG(i->src());
|
||||
unsigned len = i->getVL(), num_elements = 4*len;
|
||||
|
||||
Bit32u opmask = BX_READ_16BIT_OPMASK(i->opmask());
|
||||
|
||||
float_status_t status;
|
||||
mxcsr_to_softfloat_status_word(status, MXCSR);
|
||||
|
||||
for (unsigned n=0, mask = 0x1; n < num_elements; n++, mask <<= 1) {
|
||||
if (opmask & mask) {
|
||||
op.vmm32u(n) = uint32_to_float32(op.vmm32u(n), status);
|
||||
}
|
||||
else {
|
||||
op.vmm32u(n) = 0;
|
||||
}
|
||||
}
|
||||
|
||||
check_exceptionsSSE(get_exception_flags(status));
|
||||
|
||||
if (! i->isZeroMasking()) {
|
||||
for (unsigned n=0; n < len; n++, opmask >>= 4)
|
||||
xmm_blendps(&BX_READ_AVX_REG_LANE(i->dst(), n), &op.vmm128(n), opmask);
|
||||
BX_CLEAR_AVX_REGZ(i->dst(), len);
|
||||
}
|
||||
else {
|
||||
BX_WRITE_AVX_REGZ(i->dst(), op, len);
|
||||
}
|
||||
|
||||
BX_NEXT_INSTR(i);
|
||||
}
|
||||
|
||||
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTUDQ2PD_VpdWdqR(bxInstruction_c *i)
|
||||
{
|
||||
BxPackedYmmRegister op = BX_READ_YMM_REG(i->src());
|
||||
BxPackedAvxRegister result;
|
||||
unsigned len = i->getVL(), num_elements = 2*len;
|
||||
|
||||
for (unsigned n=0; n < num_elements; n++) {
|
||||
result.vmm64u(n) = uint32_to_float64(op.ymm32u(n));
|
||||
}
|
||||
|
||||
BX_WRITE_AVX_REGZ(i->dst(), result, len);
|
||||
BX_NEXT_INSTR(i);
|
||||
}
|
||||
|
||||
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTUDQ2PD_MASK_VpdWdqR(bxInstruction_c *i)
|
||||
{
|
||||
BxPackedYmmRegister op = BX_READ_YMM_REG(i->src());
|
||||
BxPackedAvxRegister result;
|
||||
unsigned len = i->getVL(), num_elements = 2*len;
|
||||
|
||||
Bit32u opmask = BX_READ_8BIT_OPMASK(i->opmask());
|
||||
|
||||
for (unsigned n=0, mask = 0x1; n < num_elements; n++, mask <<= 1) {
|
||||
if (opmask & mask) {
|
||||
result.vmm64u(n) = uint32_to_float64(op.ymm32u(n));
|
||||
}
|
||||
else {
|
||||
result.vmm64u(n) = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (! i->isZeroMasking()) {
|
||||
for (unsigned n=0; n < len; n++, opmask >>= 2)
|
||||
xmm_blendpd(&BX_READ_AVX_REG_LANE(i->dst(), n), &result.vmm128(n), opmask);
|
||||
BX_CLEAR_AVX_REGZ(i->dst(), len);
|
||||
}
|
||||
else {
|
||||
BX_WRITE_AVX_REGZ(i->dst(), result, len);
|
||||
}
|
||||
|
||||
BX_NEXT_INSTR(i);
|
||||
}
|
||||
|
@ -3235,6 +3235,11 @@ public: // for now...
|
||||
BX_SMF BX_INSF_TYPE VCVTUSI2SD_VsdEqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF BX_INSF_TYPE VCVTUSI2SS_VssEqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
|
||||
BX_SMF BX_INSF_TYPE VCVTUDQ2PS_VpsWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF BX_INSF_TYPE VCVTUDQ2PS_MASK_VpsWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF BX_INSF_TYPE VCVTUDQ2PD_VpdWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF BX_INSF_TYPE VCVTUDQ2PD_MASK_VpdWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
|
||||
BX_SMF BX_INSF_TYPE VPADDD_MASK_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF BX_INSF_TYPE VPSUBD_MASK_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF BX_INSF_TYPE VPANDD_MASK_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
|
@ -285,6 +285,18 @@ static const BxOpcodeInfo_t BxOpcodeGroupEVEX_0f6f_Mask[3] = {
|
||||
/* F2 */ { 0, BX_IA_ERROR }
|
||||
};
|
||||
|
||||
static const BxOpcodeInfo_t BxOpcodeGroupEVEX_0f7a[3] = {
|
||||
/* 66 */ { 0, BX_IA_ERROR },
|
||||
/* F3 */ { BxVexW0, BX_IA_V512_VCVTUDQ2PD_VpdWdq },
|
||||
/* F2 */ { BxVexW0, BX_IA_V512_VCVTUDQ2PS_VpsWdq }
|
||||
};
|
||||
|
||||
static const BxOpcodeInfo_t BxOpcodeGroupEVEX_0f7a_Mask[3] = {
|
||||
/* 66 */ { 0, BX_IA_ERROR },
|
||||
/* F3 */ { BxVexW0, BX_IA_V512_VCVTUDQ2PD_VpdWdq_Kmask },
|
||||
/* F2 */ { BxVexW0, BX_IA_V512_VCVTUDQ2PS_VpsWdq_Kmask }
|
||||
};
|
||||
|
||||
static const BxOpcodeInfo_t BxOpcodeGroupEVEX_0f7b[3] = {
|
||||
/* 66 */ { 0, BX_IA_ERROR },
|
||||
/* F3 */ { BxAliasVexW64, BX_IA_V512_VCVTUSI2SS_VssEd },
|
||||
@ -634,8 +646,8 @@ static const BxOpcodeInfo_t BxOpcodeTableEVEX[256*3*2] = {
|
||||
/* 78 */ { 0, BX_IA_ERROR },
|
||||
/* 79 k0 */ { 0, BX_IA_ERROR },
|
||||
/* 79 */ { 0, BX_IA_ERROR },
|
||||
/* 7A k0 */ { 0, BX_IA_ERROR },
|
||||
/* 7A */ { 0, BX_IA_ERROR },
|
||||
/* 7A k0 */ { BxPrefixSSE, BX_IA_ERROR, BxOpcodeGroupEVEX_0f7a },
|
||||
/* 7A */ { BxPrefixSSE, BX_IA_ERROR, BxOpcodeGroupEVEX_0f7a_Mask },
|
||||
/* 7B k0 */ { BxPrefixSSE, BX_IA_ERROR, BxOpcodeGroupEVEX_0f7b },
|
||||
/* 7B */ { 0, BX_IA_ERROR }, // #UD
|
||||
/* 7C k0 */ { 0, BX_IA_ERROR },
|
||||
|
@ -2563,6 +2563,12 @@ bx_define_opcode(BX_IA_V512_VCOMISS_VssWss, &BX_CPU_C::LOAD_Wss, &BX_CPU_C::COMI
|
||||
bx_define_opcode(BX_IA_V512_VUCOMISD_VsdWsd, &BX_CPU_C::LOAD_Wsd, &BX_CPU_C::UCOMISD_VsdWsdR, BX_ISA_AVX512, OP_Vsd, OP_Wsd, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_BROADCAST)
|
||||
bx_define_opcode(BX_IA_V512_VCOMISD_VsdWsd, &BX_CPU_C::LOAD_Wsd, &BX_CPU_C::COMISD_VsdWsdR, BX_ISA_AVX512, OP_Vsd, OP_Wsd, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_BROADCAST)
|
||||
|
||||
bx_define_opcode(BX_IA_V512_VCVTUDQ2PD_VpdWdq, &BX_CPU_C::LOAD_BROADCAST_VectorD, &BX_CPU_C::VCVTUDQ2PD_VpdWdqR, BX_ISA_AVX512, OP_Vpd, OP_Wdq, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE)
|
||||
bx_define_opcode(BX_IA_V512_VCVTUDQ2PS_VpsWdq, &BX_CPU_C::LOAD_BROADCAST_VectorD, &BX_CPU_C::VCVTUDQ2PS_VpsWdqR, BX_ISA_AVX512, OP_Vps, OP_Wdq, OP_NONE, OP_NONE, BX_PREPARE_EVEX)
|
||||
|
||||
bx_define_opcode(BX_IA_V512_VCVTUDQ2PD_VpdWdq_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorD, &BX_CPU_C::VCVTUDQ2PD_MASK_VpdWdqR, BX_ISA_AVX512, OP_Vpd, OP_Wdq, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE)
|
||||
bx_define_opcode(BX_IA_V512_VCVTUDQ2PS_VpsWdq_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorD, &BX_CPU_C::VCVTUDQ2PS_MASK_VpsWdqR, BX_ISA_AVX512, OP_Vps, OP_Wdq, OP_NONE, OP_NONE, BX_PREPARE_EVEX)
|
||||
|
||||
bx_define_opcode(BX_IA_V512_VMOVAPS_VpsWps, &BX_CPU_C::VMOVAPS_VpsWpsM, &BX_CPU_C::VMOVAPS_VpsWpsR, BX_ISA_AVX512, OP_Vps, OP_Wps, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
|
||||
bx_define_opcode(BX_IA_V512_VMOVAPS_VpsWps_Kmask, &BX_CPU_C::VMOVAPS_MASK_VpsWpsM, &BX_CPU_C::VMOVAPS_MASK_VpsWpsR, BX_ISA_AVX512, OP_Vps, OP_Wps, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
|
||||
bx_define_opcode(BX_IA_V512_VMOVAPS_WpsVps, &BX_CPU_C::VMOVAPS_WpsVpsM, &BX_CPU_C::VMOVAPS_VpsWpsR, BX_ISA_AVX512, OP_Wps, OP_Vps, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
|
||||
|
Loading…
x
Reference in New Issue
Block a user