handle getexp methods though templates

This commit is contained in:
Shwartsman 2023-12-25 08:06:44 +02:00
parent ef30a12afb
commit 7c9bab8182
5 changed files with 92 additions and 107 deletions

View File

@ -98,58 +98,6 @@ EVEX_OP_SCALAR_DOUBLE(VMINSD_MASK_VsdHpdWsdR, float64_min)
EVEX_OP_SCALAR_DOUBLE(VMAXSD_MASK_VsdHpdWsdR, float64_max)
EVEX_OP_SCALAR_DOUBLE(VSCALEFSD_MASK_VsdHpdWsdR, float64_scalef)
void BX_CPP_AttrRegparmN(1) BX_CPU_C::VSQRTPS_MASK_VpsWpsR(bxInstruction_c *i)
{
BxPackedAvxRegister op = BX_READ_AVX_REG(i->src());
unsigned mask = BX_READ_16BIT_OPMASK(i->opmask());
unsigned len = i->getVL();
float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
softfloat_status_word_rc_override(status, i);
for (unsigned n=0, tmp_mask = mask; n < len; n++, tmp_mask >>= 4)
xmm_sqrtps_mask(&op.vmm128(n), status, tmp_mask);
check_exceptionsSSE(get_exception_flags(status));
if (! i->isZeroMasking()) {
for (unsigned n=0; n < len; n++, mask >>= 4)
xmm_blendps(&BX_READ_AVX_REG_LANE(i->dst(), n), &op.vmm128(n), mask);
BX_CLEAR_AVX_REGZ(i->dst(), len);
}
else {
BX_WRITE_AVX_REGZ(i->dst(), op, len);
}
BX_NEXT_INSTR(i);
}
void BX_CPP_AttrRegparmN(1) BX_CPU_C::VSQRTPD_MASK_VpdWpdR(bxInstruction_c *i)
{
BxPackedAvxRegister op = BX_READ_AVX_REG(i->src());
unsigned mask = BX_READ_8BIT_OPMASK(i->opmask());
unsigned len = i->getVL();
float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
softfloat_status_word_rc_override(status, i);
for (unsigned n=0, tmp_mask = mask; n < len; n++, tmp_mask >>= 2)
xmm_sqrtpd_mask(&op.vmm128(n), status, tmp_mask);
check_exceptionsSSE(get_exception_flags(status));
if (! i->isZeroMasking()) {
for (unsigned n=0; n < len; n++, mask >>= 2)
xmm_blendpd(&BX_READ_AVX_REG_LANE(i->dst(), n), &op.vmm128(n), mask);
BX_CLEAR_AVX_REGZ(i->dst(), len);
}
else {
BX_WRITE_AVX_REGZ(i->dst(), op, len);
}
BX_NEXT_INSTR(i);
}
void BX_CPP_AttrRegparmN(1) BX_CPU_C::VSQRTSS_MASK_VssHpsWssR(bxInstruction_c *i)
{
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
@ -738,55 +686,17 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::VFPCLASSSD_MASK_KGbWsdIbR(bxInstruction_c
// getexp
void BX_CPP_AttrRegparmN(1) BX_CPU_C::VGETEXPPS_MASK_VpsWpsR(bxInstruction_c *i)
void BX_CPP_AttrRegparmN(1) BX_CPU_C::VGETEXPSS_VssHpsWssR(bxInstruction_c *i)
{
BxPackedAvxRegister op = BX_READ_AVX_REG(i->src());
Bit32u mask = i->opmask() ? BX_READ_16BIT_OPMASK(i->opmask()) : (Bit32u) -1;
unsigned len = i->getVL();
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
float32 op2 = BX_READ_XMM_REG_LO_DWORD(i->src2());
float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
softfloat_status_word_rc_override(status, i);
for (unsigned n=0, tmp_mask = mask; n < len; n++, tmp_mask >>= 4)
xmm_getexpps_mask(&op.vmm128(n), status, tmp_mask);
op1.xmm32u(0) = float32_getexp(op2, status);
check_exceptionsSSE(get_exception_flags(status));
if (! i->isZeroMasking()) {
for (unsigned n=0; n < len; n++, mask >>= 4)
xmm_blendps(&BX_READ_AVX_REG_LANE(i->dst(), n), &op.vmm128(n), mask);
BX_CLEAR_AVX_REGZ(i->dst(), len);
}
else {
BX_WRITE_AVX_REGZ(i->dst(), op, len);
}
BX_NEXT_INSTR(i);
}
void BX_CPP_AttrRegparmN(1) BX_CPU_C::VGETEXPPD_MASK_VpdWpdR(bxInstruction_c *i)
{
BxPackedAvxRegister op = BX_READ_AVX_REG(i->src());
Bit32u mask = i->opmask() ? BX_READ_8BIT_OPMASK(i->opmask()) : (Bit32u) -1;
unsigned len = i->getVL();
float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
softfloat_status_word_rc_override(status, i);
for (unsigned n=0, tmp_mask = mask; n < len; n++, tmp_mask >>= 2)
xmm_getexppd_mask(&op.vmm128(n), status, tmp_mask);
check_exceptionsSSE(get_exception_flags(status));
if (! i->isZeroMasking()) {
for (unsigned n=0; n < len; n++, mask >>= 2)
xmm_blendpd(&BX_READ_AVX_REG_LANE(i->dst(), n), &op.vmm128(n), mask);
BX_CLEAR_AVX_REGZ(i->dst(), len);
}
else {
BX_WRITE_AVX_REGZ(i->dst(), op, len);
}
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
BX_NEXT_INSTR(i);
}
@ -813,6 +723,20 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::VGETEXPSS_MASK_VssHpsWssR(bxInstruction_c
BX_NEXT_INSTR(i);
}
void BX_CPP_AttrRegparmN(1) BX_CPU_C::VGETEXPSD_VsdHpdWsdR(bxInstruction_c *i)
{
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
float64 op2 = BX_READ_XMM_REG_LO_QWORD(i->src2());
float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
softfloat_status_word_rc_override(status, i);
op1.xmm64u(0) = float64_getexp(op2, status);
check_exceptionsSSE(get_exception_flags(status));
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
BX_NEXT_INSTR(i);
}
void BX_CPP_AttrRegparmN(1) BX_CPU_C::VGETEXPSD_MASK_VsdHpdWsdR(bxInstruction_c *i)
{
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());

View File

@ -3118,8 +3118,6 @@ public: // for now...
BX_SMF void VMINSD_MASK_VsdHpdWsdR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void VMAXSS_MASK_VssHpsWssR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void VMAXSD_MASK_VsdHpdWsdR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void VSQRTPS_MASK_VpsWpsR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void VSQRTPD_MASK_VpdWpdR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void VSQRTSS_MASK_VssHpsWssR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void VSQRTSD_MASK_VsdHpdWsdR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
@ -3128,8 +3126,8 @@ public: // for now...
BX_SMF void VFPCLASSSS_MASK_KGbWssIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void VFPCLASSSD_MASK_KGbWsdIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void VGETEXPPS_MASK_VpsWpsR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void VGETEXPPD_MASK_VpdWpdR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void VGETEXPSS_VssHpsWssR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void VGETEXPSD_VsdHpdWsdR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void VGETEXPSS_MASK_VssHpsWssR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
BX_SMF void VGETEXPSD_MASK_VsdHpdWsdR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);

View File

@ -113,6 +113,60 @@ void BX_CPP_AttrRegparmN(1) BX_CPU_C::HANDLE_AVX_PFP_3OP(bxInstruction_c *i)
#include "simd_int.h"
template <xmm_pfp_1op_mask func>
void BX_CPP_AttrRegparmN(1) BX_CPU_C::HANDLE_AVX512_MASK_PFP_1OP_SINGLE(bxInstruction_c *i)
{
BxPackedAvxRegister op = BX_READ_AVX_REG(i->src());
unsigned mask = BX_READ_16BIT_OPMASK(i->opmask());
unsigned len = i->getVL();
float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
softfloat_status_word_rc_override(status, i);
for (unsigned n=0, tmp_mask = mask; n < len; n++, tmp_mask >>= 4)
(func)(&op.vmm128(n), status, tmp_mask);
check_exceptionsSSE(get_exception_flags(status));
if (! i->isZeroMasking()) {
for (unsigned n=0; n < len; n++, mask >>= 4)
xmm_blendps(&BX_READ_AVX_REG_LANE(i->dst(), n), &op.vmm128(n), mask);
BX_CLEAR_AVX_REGZ(i->dst(), len);
}
else {
BX_WRITE_AVX_REGZ(i->dst(), op, len);
}
BX_NEXT_INSTR(i);
}
template <xmm_pfp_1op_mask func>
void BX_CPP_AttrRegparmN(1) BX_CPU_C::HANDLE_AVX512_MASK_PFP_1OP_DOUBLE(bxInstruction_c *i)
{
BxPackedAvxRegister op = BX_READ_AVX_REG(i->src());
unsigned mask = BX_READ_8BIT_OPMASK(i->opmask());
unsigned len = i->getVL();
float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
softfloat_status_word_rc_override(status, i);
for (unsigned n=0, tmp_mask = mask; n < len; n++, tmp_mask >>= 2)
(func)(&op.vmm128(n), status, tmp_mask);
check_exceptionsSSE(get_exception_flags(status));
if (! i->isZeroMasking()) {
for (unsigned n=0; n < len; n++, mask >>= 2)
xmm_blendpd(&BX_READ_AVX_REG_LANE(i->dst(), n), &op.vmm128(n), mask);
BX_CLEAR_AVX_REGZ(i->dst(), len);
}
else {
BX_WRITE_AVX_REGZ(i->dst(), op, len);
}
BX_NEXT_INSTR(i);
}
template <xmm_pfp_2op_mask func>
void BX_CPP_AttrRegparmN(1) BX_CPU_C::HANDLE_AVX512_MASK_PFP_2OP_SINGLE(bxInstruction_c *i)
{

View File

@ -1087,13 +1087,17 @@ static const Bit64u BxOpcodeGroup_EVEX_0F3840[] = {
};
static const Bit64u BxOpcodeGroup_EVEX_0F3842[] = {
form_opcode(ATTR_SSE_PREFIX_66 | ATTR_VEX_W0, BX_IA_V512_VGETEXPPS_VpsWps_Kmask),
last_opcode(ATTR_SSE_PREFIX_66 | ATTR_VEX_W1, BX_IA_V512_VGETEXPPD_VpdWpd_Kmask)
form_opcode(ATTR_SSE_PREFIX_66 | ATTR_VEX_W0 | ATTR_MASK_K0, BX_IA_V512_VGETEXPPS_VpsWps),
form_opcode(ATTR_SSE_PREFIX_66 | ATTR_VEX_W0, BX_IA_V512_VGETEXPPS_VpsWps_Kmask),
form_opcode(ATTR_SSE_PREFIX_66 | ATTR_VEX_W1 | ATTR_MASK_K0, BX_IA_V512_VGETEXPPD_VpdWpd),
last_opcode(ATTR_SSE_PREFIX_66 | ATTR_VEX_W1, BX_IA_V512_VGETEXPPD_VpdWpd_Kmask)
};
static const Bit64u BxOpcodeGroup_EVEX_0F3843[] = {
form_opcode(ATTR_SSE_PREFIX_66 | ATTR_VEX_W0, BX_IA_V512_VGETEXPSS_VssHpsWss_Kmask),
last_opcode(ATTR_SSE_PREFIX_66 | ATTR_VEX_W1, BX_IA_V512_VGETEXPSD_VsdHpdWsd_Kmask)
form_opcode(ATTR_SSE_PREFIX_66 | ATTR_VEX_W0 | ATTR_MASK_K0, BX_IA_V512_VGETEXPSS_VssHpsWss),
form_opcode(ATTR_SSE_PREFIX_66 | ATTR_VEX_W0, BX_IA_V512_VGETEXPSS_VssHpsWss_Kmask),
form_opcode(ATTR_SSE_PREFIX_66 | ATTR_VEX_W1 | ATTR_MASK_K0, BX_IA_V512_VGETEXPSD_VsdHpdWsd),
last_opcode(ATTR_SSE_PREFIX_66 | ATTR_VEX_W1, BX_IA_V512_VGETEXPSD_VsdHpdWsd_Kmask)
};
static const Bit64u BxOpcodeGroup_EVEX_0F3844[] = {

View File

@ -2904,8 +2904,8 @@ bx_define_opcode(BX_IA_V512_VSQRTPD_VpdWpd, "vsqrtpd", "vsqrtpd", &BX_CPU_C::LOA
bx_define_opcode(BX_IA_V512_VSQRTSS_VssHpsWss, "vsqrtss", "vsqrtss", &BX_CPU_C::LOAD_Wss, &BX_CPU_C::VSQRTSS_VssHpsWssR, BX_ISA_AVX512, OP_Vss, OP_Hps, OP_mVss, OP_NONE, BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VSQRTSD_VsdHpdWsd, "vsqrtsd", "vsqrtsd", &BX_CPU_C::LOAD_Wsd, &BX_CPU_C::VSQRTSD_VsdHpdWsdR, BX_ISA_AVX512, OP_Vsd, OP_Hpd, OP_mVsd, OP_NONE, BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VSQRTPS_VpsWps_Kmask, "vsqrtps", "vsqrtps", &BX_CPU_C::LOAD_BROADCAST_MASK_VectorD, &BX_CPU_C::VSQRTPS_MASK_VpsWpsR, BX_ISA_AVX512, OP_Vps, OP_mVps, OP_NONE, OP_NONE, BX_PREPARE_EVEX)
bx_define_opcode(BX_IA_V512_VSQRTPD_VpdWpd_Kmask, "vsqrtpd", "vsqrtpd", &BX_CPU_C::LOAD_BROADCAST_MASK_VectorQ, &BX_CPU_C::VSQRTPD_MASK_VpdWpdR, BX_ISA_AVX512, OP_Vpd, OP_mVpd, OP_NONE, OP_NONE, BX_PREPARE_EVEX)
bx_define_opcode(BX_IA_V512_VSQRTPS_VpsWps_Kmask, "vsqrtps", "vsqrtps", &BX_CPU_C::LOAD_BROADCAST_MASK_VectorD, &BX_CPU_C::HANDLE_AVX512_MASK_PFP_1OP_SINGLE<xmm_sqrtps_mask>, BX_ISA_AVX512, OP_Vps, OP_mVps, OP_NONE, OP_NONE, BX_PREPARE_EVEX)
bx_define_opcode(BX_IA_V512_VSQRTPD_VpdWpd_Kmask, "vsqrtpd", "vsqrtpd", &BX_CPU_C::LOAD_BROADCAST_MASK_VectorQ, &BX_CPU_C::HANDLE_AVX512_MASK_PFP_1OP_DOUBLE<xmm_sqrtpd_mask>, BX_ISA_AVX512, OP_Vpd, OP_mVpd, OP_NONE, OP_NONE, BX_PREPARE_EVEX)
bx_define_opcode(BX_IA_V512_VSQRTSS_VssHpsWss_Kmask, "vsqrtss", "vsqrtss", &BX_CPU_C::LOAD_MASK_Wss, &BX_CPU_C::VSQRTSS_MASK_VssHpsWssR, BX_ISA_AVX512, OP_Vss, OP_Hps, OP_mVss, OP_NONE, BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VSQRTSD_VsdHpdWsd_Kmask, "vsqrtsd", "vsqrtsd", &BX_CPU_C::LOAD_MASK_Wsd, &BX_CPU_C::VSQRTSD_MASK_VsdHpdWsdR, BX_ISA_AVX512, OP_Vsd, OP_Hpd, OP_mVsd, OP_NONE, BX_PREPARE_EVEX_NO_BROADCAST)
@ -3558,8 +3558,13 @@ bx_define_opcode(BX_IA_V512_VRANGEPD_VpdHpdWpdIb_Kmask, "vrangepd", "vrangepd",
bx_define_opcode(BX_IA_V512_VRANGESS_VssHpsWssIb_Kmask, "vrangess", "vrangess", &BX_CPU_C::LOAD_MASK_Wss, &BX_CPU_C::VRANGESS_MASK_VssHpsWssIbR, BX_ISA_AVX512_DQ, OP_Vss, OP_Hps, OP_mVss, OP_Ib, BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VRANGESD_VsdHpdWsdIb_Kmask, "vrangesd", "vrangesd", &BX_CPU_C::LOAD_MASK_Wsd, &BX_CPU_C::VRANGESD_MASK_VsdHpdWsdIbR, BX_ISA_AVX512_DQ, OP_Vsd, OP_Hpd, OP_mVsd, OP_Ib, BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VGETEXPPS_VpsWps_Kmask, "vgetexpps", "vgetexpps", &BX_CPU_C::LOAD_BROADCAST_MASK_VectorD, &BX_CPU_C::VGETEXPPS_MASK_VpsWpsR, BX_ISA_AVX512, OP_Vps, OP_mVps, OP_NONE, OP_NONE, BX_PREPARE_EVEX)
bx_define_opcode(BX_IA_V512_VGETEXPPD_VpdWpd_Kmask, "vgetexppd", "vgetexppd", &BX_CPU_C::LOAD_BROADCAST_MASK_VectorQ, &BX_CPU_C::VGETEXPPD_MASK_VpdWpdR, BX_ISA_AVX512, OP_Vpd, OP_mVpd, OP_NONE, OP_NONE, BX_PREPARE_EVEX)
bx_define_opcode(BX_IA_V512_VGETEXPPS_VpsWps, "vgetexpps", "vgetexpps", &BX_CPU_C::LOAD_BROADCAST_VectorD, &BX_CPU_C::HANDLE_AVX_PFP_1OP<xmm_getexpps>, BX_ISA_AVX512, OP_Vps, OP_mVps, OP_NONE, OP_NONE, BX_PREPARE_EVEX)
bx_define_opcode(BX_IA_V512_VGETEXPPD_VpdWpd, "vgetexppd", "vgetexppd", &BX_CPU_C::LOAD_BROADCAST_VectorQ, &BX_CPU_C::HANDLE_AVX_PFP_1OP<xmm_getexppd>, BX_ISA_AVX512, OP_Vpd, OP_mVpd, OP_NONE, OP_NONE, BX_PREPARE_EVEX)
bx_define_opcode(BX_IA_V512_VGETEXPSS_VssHpsWss, "vgetexpss", "vgetexpss", &BX_CPU_C::LOAD_Wss, &BX_CPU_C::VGETEXPSS_VssHpsWssR, BX_ISA_AVX512, OP_Vss, OP_Hps, OP_mVss, OP_NONE, BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VGETEXPSD_VsdHpdWsd, "vgetexpsd", "vgetexpsd", &BX_CPU_C::LOAD_Wsd, &BX_CPU_C::VGETEXPSD_VsdHpdWsdR, BX_ISA_AVX512, OP_Vsd, OP_Hpd, OP_mVsd, OP_NONE, BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VGETEXPPS_VpsWps_Kmask, "vgetexpps", "vgetexpps", &BX_CPU_C::LOAD_BROADCAST_MASK_VectorD, &BX_CPU_C::HANDLE_AVX512_MASK_PFP_1OP_SINGLE<xmm_getexpps_mask>, BX_ISA_AVX512, OP_Vps, OP_mVps, OP_NONE, OP_NONE, BX_PREPARE_EVEX)
bx_define_opcode(BX_IA_V512_VGETEXPPD_VpdWpd_Kmask, "vgetexppd", "vgetexppd", &BX_CPU_C::LOAD_BROADCAST_MASK_VectorQ, &BX_CPU_C::HANDLE_AVX512_MASK_PFP_1OP_DOUBLE<xmm_getexppd_mask>, BX_ISA_AVX512, OP_Vpd, OP_mVpd, OP_NONE, OP_NONE, BX_PREPARE_EVEX)
bx_define_opcode(BX_IA_V512_VGETEXPSS_VssHpsWss_Kmask, "vgetexpss", "vgetexpss", &BX_CPU_C::LOAD_MASK_Wss, &BX_CPU_C::VGETEXPSS_MASK_VssHpsWssR, BX_ISA_AVX512, OP_Vss, OP_Hps, OP_mVss, OP_NONE, BX_PREPARE_EVEX_NO_BROADCAST)
bx_define_opcode(BX_IA_V512_VGETEXPSD_VsdHpdWsd_Kmask, "vgetexpsd", "vgetexpsd", &BX_CPU_C::LOAD_MASK_Wsd, &BX_CPU_C::VGETEXPSD_MASK_VsdHpdWsdR, BX_ISA_AVX512, OP_Vsd, OP_Hpd, OP_mVsd, OP_NONE, BX_PREPARE_EVEX_NO_BROADCAST)