implemented two AVX512 unsigned CVT instructions

This commit is contained in:
Stanislav Shwartsman 2013-12-22 19:53:03 +00:00
parent ef7e02c12c
commit e200d04ad5
4 changed files with 118 additions and 4 deletions

View File

@ -380,7 +380,6 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTUSI2SS_VssEdR(bxInstruction_c
check_exceptionsSSE(get_exception_flags(status)); check_exceptionsSSE(get_exception_flags(status));
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1); BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
BX_NEXT_INSTR(i); BX_NEXT_INSTR(i);
} }
@ -397,7 +396,6 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTUSI2SS_VssEqR(bxInstruction_c
check_exceptionsSSE(get_exception_flags(status)); check_exceptionsSSE(get_exception_flags(status));
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1); BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
BX_NEXT_INSTR(i); BX_NEXT_INSTR(i);
} }
@ -423,6 +421,99 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTUSI2SD_VsdEqR(bxInstruction_c
check_exceptionsSSE(get_exception_flags(status)); check_exceptionsSSE(get_exception_flags(status));
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1); BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTUDQ2PS_VpsWdqR(bxInstruction_c *i)
{
BxPackedAvxRegister op = BX_READ_AVX_REG(i->src());
unsigned len = i->getVL(), num_elements = 4*len;
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
for (unsigned n=0; n < num_elements; n++) {
op.vmm32u(n) = uint32_to_float32(op.vmm32u(n), status);
}
check_exceptionsSSE(get_exception_flags(status));
BX_WRITE_AVX_REGZ(i->dst(), op, len);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTUDQ2PS_MASK_VpsWdqR(bxInstruction_c *i)
{
BxPackedAvxRegister op = BX_READ_AVX_REG(i->src());
unsigned len = i->getVL(), num_elements = 4*len;
Bit32u opmask = BX_READ_16BIT_OPMASK(i->opmask());
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
for (unsigned n=0, mask = 0x1; n < num_elements; n++, mask <<= 1) {
if (opmask & mask) {
op.vmm32u(n) = uint32_to_float32(op.vmm32u(n), status);
}
else {
op.vmm32u(n) = 0;
}
}
check_exceptionsSSE(get_exception_flags(status));
if (! i->isZeroMasking()) {
for (unsigned n=0; n < len; n++, opmask >>= 4)
xmm_blendps(&BX_READ_AVX_REG_LANE(i->dst(), n), &op.vmm128(n), opmask);
BX_CLEAR_AVX_REGZ(i->dst(), len);
}
else {
BX_WRITE_AVX_REGZ(i->dst(), op, len);
}
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTUDQ2PD_VpdWdqR(bxInstruction_c *i)
{
BxPackedYmmRegister op = BX_READ_YMM_REG(i->src());
BxPackedAvxRegister result;
unsigned len = i->getVL(), num_elements = 2*len;
for (unsigned n=0; n < num_elements; n++) {
result.vmm64u(n) = uint32_to_float64(op.ymm32u(n));
}
BX_WRITE_AVX_REGZ(i->dst(), result, len);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTUDQ2PD_MASK_VpdWdqR(bxInstruction_c *i)
{
BxPackedYmmRegister op = BX_READ_YMM_REG(i->src());
BxPackedAvxRegister result;
unsigned len = i->getVL(), num_elements = 2*len;
Bit32u opmask = BX_READ_8BIT_OPMASK(i->opmask());
for (unsigned n=0, mask = 0x1; n < num_elements; n++, mask <<= 1) {
if (opmask & mask) {
result.vmm64u(n) = uint32_to_float64(op.ymm32u(n));
}
else {
result.vmm64u(n) = 0;
}
}
if (! i->isZeroMasking()) {
for (unsigned n=0; n < len; n++, opmask >>= 2)
xmm_blendpd(&BX_READ_AVX_REG_LANE(i->dst(), n), &result.vmm128(n), opmask);
BX_CLEAR_AVX_REGZ(i->dst(), len);
}
else {
BX_WRITE_AVX_REGZ(i->dst(), result, len);
}
BX_NEXT_INSTR(i); BX_NEXT_INSTR(i);
} }

View File

@ -3235,6 +3235,11 @@ public: // for now...
BX_SMF BX_INSF_TYPE VCVTUSI2SD_VsdEqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); BX_SMF BX_INSF_TYPE VCVTUSI2SD_VsdEqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VCVTUSI2SS_VssEqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); BX_SMF BX_INSF_TYPE VCVTUSI2SS_VssEqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VCVTUDQ2PS_VpsWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VCVTUDQ2PS_MASK_VpsWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VCVTUDQ2PD_VpdWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VCVTUDQ2PD_MASK_VpdWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPADDD_MASK_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); BX_SMF BX_INSF_TYPE VPADDD_MASK_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPSUBD_MASK_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); BX_SMF BX_INSF_TYPE VPSUBD_MASK_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF BX_INSF_TYPE VPANDD_MASK_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1); BX_SMF BX_INSF_TYPE VPANDD_MASK_VdqHdqWdqR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);

View File

@ -285,6 +285,18 @@ static const BxOpcodeInfo_t BxOpcodeGroupEVEX_0f6f_Mask[3] = {
/* F2 */ { 0, BX_IA_ERROR } /* F2 */ { 0, BX_IA_ERROR }
}; };
static const BxOpcodeInfo_t BxOpcodeGroupEVEX_0f7a[3] = {
/* 66 */ { 0, BX_IA_ERROR },
/* F3 */ { BxVexW0, BX_IA_V512_VCVTUDQ2PD_VpdWdq },
/* F2 */ { BxVexW0, BX_IA_V512_VCVTUDQ2PS_VpsWdq }
};
static const BxOpcodeInfo_t BxOpcodeGroupEVEX_0f7a_Mask[3] = {
/* 66 */ { 0, BX_IA_ERROR },
/* F3 */ { BxVexW0, BX_IA_V512_VCVTUDQ2PD_VpdWdq_Kmask },
/* F2 */ { BxVexW0, BX_IA_V512_VCVTUDQ2PS_VpsWdq_Kmask }
};
static const BxOpcodeInfo_t BxOpcodeGroupEVEX_0f7b[3] = { static const BxOpcodeInfo_t BxOpcodeGroupEVEX_0f7b[3] = {
/* 66 */ { 0, BX_IA_ERROR }, /* 66 */ { 0, BX_IA_ERROR },
/* F3 */ { BxAliasVexW64, BX_IA_V512_VCVTUSI2SS_VssEd }, /* F3 */ { BxAliasVexW64, BX_IA_V512_VCVTUSI2SS_VssEd },
@ -634,8 +646,8 @@ static const BxOpcodeInfo_t BxOpcodeTableEVEX[256*3*2] = {
/* 78 */ { 0, BX_IA_ERROR }, /* 78 */ { 0, BX_IA_ERROR },
/* 79 k0 */ { 0, BX_IA_ERROR }, /* 79 k0 */ { 0, BX_IA_ERROR },
/* 79 */ { 0, BX_IA_ERROR }, /* 79 */ { 0, BX_IA_ERROR },
/* 7A k0 */ { 0, BX_IA_ERROR }, /* 7A k0 */ { BxPrefixSSE, BX_IA_ERROR, BxOpcodeGroupEVEX_0f7a },
/* 7A */ { 0, BX_IA_ERROR }, /* 7A */ { BxPrefixSSE, BX_IA_ERROR, BxOpcodeGroupEVEX_0f7a_Mask },
/* 7B k0 */ { BxPrefixSSE, BX_IA_ERROR, BxOpcodeGroupEVEX_0f7b }, /* 7B k0 */ { BxPrefixSSE, BX_IA_ERROR, BxOpcodeGroupEVEX_0f7b },
/* 7B */ { 0, BX_IA_ERROR }, // #UD /* 7B */ { 0, BX_IA_ERROR }, // #UD
/* 7C k0 */ { 0, BX_IA_ERROR }, /* 7C k0 */ { 0, BX_IA_ERROR },

View File

@ -2563,6 +2563,12 @@ bx_define_opcode(BX_IA_V512_VCOMISS_VssWss, &BX_CPU_C::LOAD_Wss, &BX_CPU_C::COMI
bx_define_opcode(BX_IA_V512_VUCOMISD_VsdWsd, &BX_CPU_C::LOAD_Wsd, &BX_CPU_C::UCOMISD_VsdWsdR, BX_ISA_AVX512, OP_Vsd, OP_Wsd, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_BROADCAST) bx_define_opcode(BX_IA_V512_VUCOMISD_VsdWsd, &BX_CPU_C::LOAD_Wsd, &BX_CPU_C::UCOMISD_VsdWsdR, BX_ISA_AVX512, OP_Vsd, OP_Wsd, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VCOMISD_VsdWsd, &BX_CPU_C::LOAD_Wsd, &BX_CPU_C::COMISD_VsdWsdR, BX_ISA_AVX512, OP_Vsd, OP_Wsd, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_BROADCAST) bx_define_opcode(BX_IA_V512_VCOMISD_VsdWsd, &BX_CPU_C::LOAD_Wsd, &BX_CPU_C::COMISD_VsdWsdR, BX_ISA_AVX512, OP_Vsd, OP_Wsd, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VCVTUDQ2PD_VpdWdq, &BX_CPU_C::LOAD_BROADCAST_VectorD, &BX_CPU_C::VCVTUDQ2PD_VpdWdqR, BX_ISA_AVX512, OP_Vpd, OP_Wdq, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE)
bx_define_opcode(BX_IA_V512_VCVTUDQ2PS_VpsWdq, &BX_CPU_C::LOAD_BROADCAST_VectorD, &BX_CPU_C::VCVTUDQ2PS_VpsWdqR, BX_ISA_AVX512, OP_Vps, OP_Wdq, OP_NONE, OP_NONE, BX_PREPARE_EVEX)
bx_define_opcode(BX_IA_V512_VCVTUDQ2PD_VpdWdq_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorD, &BX_CPU_C::VCVTUDQ2PD_MASK_VpdWdqR, BX_ISA_AVX512, OP_Vpd, OP_Wdq, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE)
bx_define_opcode(BX_IA_V512_VCVTUDQ2PS_VpsWdq_Kmask, &BX_CPU_C::LOAD_BROADCAST_VectorD, &BX_CPU_C::VCVTUDQ2PS_MASK_VpsWdqR, BX_ISA_AVX512, OP_Vps, OP_Wdq, OP_NONE, OP_NONE, BX_PREPARE_EVEX)
bx_define_opcode(BX_IA_V512_VMOVAPS_VpsWps, &BX_CPU_C::VMOVAPS_VpsWpsM, &BX_CPU_C::VMOVAPS_VpsWpsR, BX_ISA_AVX512, OP_Vps, OP_Wps, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST) bx_define_opcode(BX_IA_V512_VMOVAPS_VpsWps, &BX_CPU_C::VMOVAPS_VpsWpsM, &BX_CPU_C::VMOVAPS_VpsWpsR, BX_ISA_AVX512, OP_Vps, OP_Wps, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VMOVAPS_VpsWps_Kmask, &BX_CPU_C::VMOVAPS_MASK_VpsWpsM, &BX_CPU_C::VMOVAPS_MASK_VpsWpsR, BX_ISA_AVX512, OP_Vps, OP_Wps, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST) bx_define_opcode(BX_IA_V512_VMOVAPS_VpsWps_Kmask, &BX_CPU_C::VMOVAPS_MASK_VpsWpsM, &BX_CPU_C::VMOVAPS_MASK_VpsWpsR, BX_ISA_AVX512, OP_Vps, OP_Wps, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VMOVAPS_WpsVps, &BX_CPU_C::VMOVAPS_WpsVpsM, &BX_CPU_C::VMOVAPS_VpsWpsR, BX_ISA_AVX512, OP_Wps, OP_Vps, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST) bx_define_opcode(BX_IA_V512_VMOVAPS_WpsVps, &BX_CPU_C::VMOVAPS_WpsVpsM, &BX_CPU_C::VMOVAPS_VpsWpsR, BX_ISA_AVX512, OP_Wps, OP_Vps, OP_NONE, OP_NONE, BX_PREPARE_EVEX_NO_SAE | BX_PREPARE_EVEX_NO_BROADCAST)