Implemented VRNDSCALE AVX-512 instructions.
Now only missed AVX-512 opcodes now are: 512.66.0F38.W0 2C VSCALEFPS 512.66.0F38.W1 2C VSCALEFPD NDS.LIG.66.0F38.W0 2D VSCALESS NDS.LIG.66.0F38.W1 2D VSCALESD
This commit is contained in:
parent
f282fc4e75
commit
695d245116
@ -960,34 +960,160 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VGETMANTPD_MASK_VpdWpdIbR(bxInstru
|
||||
BX_NEXT_INSTR(i);
|
||||
}
|
||||
|
||||
// rndscale
|
||||
|
||||
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VRNDSCALEPS_MASK_VpsWpsIbR(bxInstruction_c *i)
|
||||
{
|
||||
BX_PANIC(("%s: AVX-512 instruction still not implemented", i->getIaOpcodeNameShort()));
|
||||
BxPackedAvxRegister op = BX_READ_AVX_REG(i->src());
|
||||
Bit32u opmask = i->opmask() ? BX_READ_16BIT_OPMASK(i->opmask()) : (Bit32u) -1;
|
||||
unsigned len = i->getVL();
|
||||
|
||||
BX_NEXT_INSTR(i);
|
||||
}
|
||||
float_status_t status;
|
||||
mxcsr_to_softfloat_status_word(status, MXCSR);
|
||||
softfloat_status_word_rc_override(status, i);
|
||||
|
||||
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VRNDSCALEPD_MASK_VpdWpdIbR(bxInstruction_c *i)
|
||||
{
|
||||
BX_PANIC(("%s: AVX-512 instruction still not implemented", i->getIaOpcodeNameShort()));
|
||||
Bit8u control = i->Ib(), scale = control >> 4;
|
||||
|
||||
// override MXCSR rounding mode with control coming from imm8
|
||||
if ((control & 0x4) == 0)
|
||||
status.float_rounding_mode = control & 0x3;
|
||||
// ignore precision exception result
|
||||
if (control & 0x8)
|
||||
status.float_suppress_exception |= float_flag_inexact;
|
||||
|
||||
for (unsigned n=0, mask = 0x1; n < DWORD_ELEMENTS(len); n++, mask <<= 1) {
|
||||
if (opmask & mask)
|
||||
op.vmm32u(n) = float32_round_to_int(op.vmm32u(n), scale, status);
|
||||
else
|
||||
op.vmm32u(n) = 0;
|
||||
}
|
||||
|
||||
check_exceptionsSSE(get_exception_flags(status));
|
||||
|
||||
if (! i->isZeroMasking()) {
|
||||
for (unsigned n=0; n < len; n++, opmask >>= 4)
|
||||
xmm_blendps(&BX_READ_AVX_REG_LANE(i->dst(), n), &op.vmm128(n), opmask);
|
||||
BX_CLEAR_AVX_REGZ(i->dst(), len);
|
||||
}
|
||||
else {
|
||||
BX_WRITE_AVX_REGZ(i->dst(), op, len);
|
||||
}
|
||||
|
||||
BX_NEXT_INSTR(i);
|
||||
}
|
||||
|
||||
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VRNDSCALESS_MASK_VssHpsWssIbR(bxInstruction_c *i)
|
||||
{
|
||||
BX_PANIC(("%s: AVX-512 instruction still not implemented", i->getIaOpcodeNameShort()));
|
||||
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
|
||||
|
||||
if (! i->opmask() || BX_SCALAR_ELEMENT_MASK(i->opmask())) {
|
||||
float32 op2 = BX_READ_XMM_REG_LO_DWORD(i->src2());
|
||||
|
||||
Bit8u control = i->Ib(), scale = control >> 4;
|
||||
|
||||
float_status_t status;
|
||||
mxcsr_to_softfloat_status_word(status, MXCSR);
|
||||
softfloat_status_word_rc_override(status, i);
|
||||
|
||||
// override MXCSR rounding mode with control coming from imm8
|
||||
if ((control & 0x4) == 0)
|
||||
status.float_rounding_mode = control & 0x3;
|
||||
// ignore precision exception result
|
||||
if (control & 0x8)
|
||||
status.float_suppress_exception |= float_flag_inexact;
|
||||
|
||||
op1.xmm32u(0) = float32_round_to_int(op2, scale, status);
|
||||
|
||||
check_exceptionsSSE(get_exception_flags(status));
|
||||
}
|
||||
else {
|
||||
if (i->isZeroMasking())
|
||||
op1.xmm32u(0) = 0;
|
||||
else
|
||||
op1.xmm32u(0) = BX_READ_XMM_REG_LO_DWORD(i->dst());
|
||||
}
|
||||
|
||||
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
|
||||
BX_NEXT_INSTR(i);
|
||||
}
|
||||
|
||||
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VRNDSCALEPD_MASK_VpdWpdIbR(bxInstruction_c *i)
|
||||
{
|
||||
BxPackedAvxRegister op = BX_READ_AVX_REG(i->src());
|
||||
Bit32u opmask = i->opmask() ? BX_READ_8BIT_OPMASK(i->opmask()) : (Bit32u) -1;
|
||||
unsigned len = i->getVL();
|
||||
|
||||
float_status_t status;
|
||||
mxcsr_to_softfloat_status_word(status, MXCSR);
|
||||
softfloat_status_word_rc_override(status, i);
|
||||
|
||||
Bit8u control = i->Ib(), scale = control >> 4;
|
||||
|
||||
// override MXCSR rounding mode with control coming from imm8
|
||||
if ((control & 0x4) == 0)
|
||||
status.float_rounding_mode = control & 0x3;
|
||||
// ignore precision exception result
|
||||
if (control & 0x8)
|
||||
status.float_suppress_exception |= float_flag_inexact;
|
||||
|
||||
for (unsigned n=0, mask = 0x1; n < QWORD_ELEMENTS(len); n++, mask <<= 1) {
|
||||
if (opmask & mask)
|
||||
op.vmm64u(n) = float64_round_to_int(op.vmm64u(n), scale, status);
|
||||
else
|
||||
op.vmm64u(n) = 0;
|
||||
}
|
||||
|
||||
check_exceptionsSSE(get_exception_flags(status));
|
||||
|
||||
if (! i->isZeroMasking()) {
|
||||
for (unsigned n=0; n < len; n++, opmask >>= 2)
|
||||
xmm_blendpd(&BX_READ_AVX_REG_LANE(i->dst(), n), &op.vmm128(n), opmask);
|
||||
BX_CLEAR_AVX_REGZ(i->dst(), len);
|
||||
}
|
||||
else {
|
||||
BX_WRITE_AVX_REGZ(i->dst(), op, len);
|
||||
}
|
||||
|
||||
BX_NEXT_INSTR(i);
|
||||
}
|
||||
|
||||
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VRNDSCALESD_MASK_VsdHpdWsdIbR(bxInstruction_c *i)
|
||||
{
|
||||
BX_PANIC(("%s: AVX-512 instruction still not implemented", i->getIaOpcodeNameShort()));
|
||||
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
|
||||
|
||||
if (! i->opmask() || BX_SCALAR_ELEMENT_MASK(i->opmask())) {
|
||||
float64 op2 = BX_READ_XMM_REG_LO_QWORD(i->src2());
|
||||
|
||||
Bit8u control = i->Ib(), scale = control >> 4;
|
||||
|
||||
float_status_t status;
|
||||
mxcsr_to_softfloat_status_word(status, MXCSR);
|
||||
softfloat_status_word_rc_override(status, i);
|
||||
|
||||
// override MXCSR rounding mode with control coming from imm8
|
||||
if ((control & 0x4) == 0)
|
||||
status.float_rounding_mode = control & 0x3;
|
||||
// ignore precision exception result
|
||||
if (control & 0x8)
|
||||
status.float_suppress_exception |= float_flag_inexact;
|
||||
|
||||
op1.xmm64u(0) = float64_round_to_int(op2, scale, status);
|
||||
|
||||
check_exceptionsSSE(get_exception_flags(status));
|
||||
}
|
||||
else {
|
||||
if (i->isZeroMasking())
|
||||
op1.xmm64u(0) = 0;
|
||||
else
|
||||
op1.xmm64u(0) = BX_READ_XMM_REG_LO_QWORD(i->dst());
|
||||
}
|
||||
|
||||
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
|
||||
BX_NEXT_INSTR(i);
|
||||
}
|
||||
|
||||
// scale
|
||||
|
||||
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VSCALEFPS_MASK_VpsWpsR(bxInstruction_c *i)
|
||||
{
|
||||
BX_PANIC(("%s: AVX-512 instruction still not implemented", i->getIaOpcodeNameShort()));
|
||||
@ -995,14 +1121,14 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VSCALEFPS_MASK_VpsWpsR(bxInstructi
|
||||
BX_NEXT_INSTR(i);
|
||||
}
|
||||
|
||||
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VSCALEFPD_MASK_VpdWpdR(bxInstruction_c *i)
|
||||
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VSCALEFSS_MASK_VssHpsWssR(bxInstruction_c *i)
|
||||
{
|
||||
BX_PANIC(("%s: AVX-512 instruction still not implemented", i->getIaOpcodeNameShort()));
|
||||
|
||||
BX_NEXT_INSTR(i);
|
||||
}
|
||||
|
||||
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VSCALEFSS_MASK_VssHpsWssR(bxInstruction_c *i)
|
||||
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VSCALEFPD_MASK_VpdWpdR(bxInstruction_c *i)
|
||||
{
|
||||
BX_PANIC(("%s: AVX-512 instruction still not implemented", i->getIaOpcodeNameShort()));
|
||||
|
||||
|
@ -508,16 +508,20 @@ float64 float32_to_float64(float32 a, float_status_t &status)
|
||||
| Floating-Point Arithmetic.
|
||||
*----------------------------------------------------------------------------*/
|
||||
|
||||
float32 float32_round_to_int(float32 a, float_status_t &status)
|
||||
float32 float32_round_to_int(float32 a, Bit8u scale, float_status_t &status)
|
||||
{
|
||||
Bit32u lastBitMask, roundBitsMask;
|
||||
int roundingMode = get_float_rounding_mode(status);
|
||||
|
||||
Bit16s aExp = extractFloat32Exp(a);
|
||||
scale &= 0xf;
|
||||
|
||||
if ((aExp == 0xFF) && extractFloat32Frac(a)) {
|
||||
return propagateFloat32NaN(a, status);
|
||||
}
|
||||
|
||||
aExp += scale; // scale the exponent
|
||||
|
||||
if (0x96 <= aExp) {
|
||||
if ((aExp == 0xFF) && extractFloat32Frac(a)) {
|
||||
return propagateFloat32NaN(a, status);
|
||||
}
|
||||
return a;
|
||||
}
|
||||
|
||||
@ -532,16 +536,17 @@ float32 float32_round_to_int(float32 a, float_status_t &status)
|
||||
switch (roundingMode) {
|
||||
case float_round_nearest_even:
|
||||
if ((aExp == 0x7E) && extractFloat32Frac(a)) {
|
||||
return packFloat32(aSign, 0x7F, 0);
|
||||
return packFloat32(aSign, 0x7F - scale, 0);
|
||||
}
|
||||
break;
|
||||
case float_round_down:
|
||||
return aSign ? float32_negative_one : 0;
|
||||
return aSign ? packFloat32(1, 0x7F - scale, 0) : float32_positive_zero;
|
||||
case float_round_up:
|
||||
return aSign ? float32_negative_zero : float32_positive_one;
|
||||
return aSign ? float32_negative_zero : packFloat32(0, 0x7F - scale, 0);
|
||||
}
|
||||
return packFloat32(aSign, 0, 0);
|
||||
}
|
||||
|
||||
lastBitMask = 1;
|
||||
lastBitMask <<= 0x96 - aExp;
|
||||
roundBitsMask = lastBitMask - 1;
|
||||
@ -1610,18 +1615,20 @@ float32 float64_to_float32(float64 a, float_status_t &status)
|
||||
| Floating-Point Arithmetic.
|
||||
*----------------------------------------------------------------------------*/
|
||||
|
||||
float64 float64_round_to_int(float64 a, float_status_t &status)
|
||||
float64 float64_round_to_int(float64 a, Bit8u scale, float_status_t &status)
|
||||
{
|
||||
Bit16s aExp;
|
||||
Bit64u lastBitMask, roundBitsMask;
|
||||
int roundingMode = get_float_rounding_mode(status);
|
||||
float64 z;
|
||||
Bit16s aExp = extractFloat64Exp(a);
|
||||
scale &= 0xf;
|
||||
|
||||
if ((aExp == 0x7FF) && extractFloat64Frac(a)) {
|
||||
return propagateFloat64NaN(a, status);
|
||||
}
|
||||
|
||||
aExp += scale; // scale the exponent
|
||||
|
||||
aExp = extractFloat64Exp(a);
|
||||
if (0x433 <= aExp) {
|
||||
if ((aExp == 0x7FF) && extractFloat64Frac(a)) {
|
||||
return propagateFloat64NaN(a, status);
|
||||
}
|
||||
return a;
|
||||
}
|
||||
|
||||
@ -1636,20 +1643,21 @@ float64 float64_round_to_int(float64 a, float_status_t &status)
|
||||
switch (roundingMode) {
|
||||
case float_round_nearest_even:
|
||||
if ((aExp == 0x3FE) && extractFloat64Frac(a)) {
|
||||
return packFloat64(aSign, 0x3FF, 0);
|
||||
return packFloat64(aSign, 0x3FF - scale, 0);
|
||||
}
|
||||
break;
|
||||
case float_round_down:
|
||||
return aSign ? float64_negative_one : 0;
|
||||
return aSign ? packFloat64(1, 0x3FF - scale, 0) : float64_positive_zero;
|
||||
case float_round_up:
|
||||
return aSign ? float64_negative_zero : float64_positive_one;
|
||||
return aSign ? float64_negative_zero : packFloat64(0, 0x3FF - scale, 0);
|
||||
}
|
||||
return packFloat64(aSign, 0, 0);
|
||||
}
|
||||
|
||||
lastBitMask = 1;
|
||||
lastBitMask <<= 0x433 - aExp;
|
||||
roundBitsMask = lastBitMask - 1;
|
||||
z = a;
|
||||
float64 z = a;
|
||||
if (roundingMode == float_round_nearest_even) {
|
||||
z += lastBitMask>>1;
|
||||
if ((z & roundBitsMask) == 0) z &= ~lastBitMask;
|
||||
|
@ -256,6 +256,7 @@ float64 float32_to_float64(float32, float_status_t &status);
|
||||
| Software IEC/IEEE single-precision operations.
|
||||
*----------------------------------------------------------------------------*/
|
||||
float32 float32_round_to_int(float32, float_status_t &status);
|
||||
float32 float32_round_to_int(float32, Bit8u scale, float_status_t &status);
|
||||
float32 float32_add(float32, float32, float_status_t &status);
|
||||
float32 float32_sub(float32, float32, float_status_t &status);
|
||||
float32 float32_mul(float32, float32, float_status_t &status);
|
||||
@ -264,6 +265,11 @@ float32 float32_sqrt(float32, float_status_t &status);
|
||||
float32 float32_frc(float32, float_status_t &status);
|
||||
float32 float32_muladd(float32, float32, float32, int flags, float_status_t &status);
|
||||
|
||||
BX_CPP_INLINE float32 float32_round_to_int(float32 a, float_status_t &status)
|
||||
{
|
||||
return float32_round_to_int(a, 0, status);
|
||||
}
|
||||
|
||||
BX_CPP_INLINE float32 float32_fmadd(float32 a, float32 b, float32 c, float_status_t &status)
|
||||
{
|
||||
return float32_muladd(a, b, c, 0, status);
|
||||
@ -315,6 +321,7 @@ float32 float64_to_float32(float64, float_status_t &status);
|
||||
| Software IEC/IEEE double-precision operations.
|
||||
*----------------------------------------------------------------------------*/
|
||||
float64 float64_round_to_int(float64, float_status_t &status);
|
||||
float64 float64_round_to_int(float64, Bit8u scale, float_status_t &status);
|
||||
float64 float64_add(float64, float64, float_status_t &status);
|
||||
float64 float64_sub(float64, float64, float_status_t &status);
|
||||
float64 float64_mul(float64, float64, float_status_t &status);
|
||||
@ -323,6 +330,11 @@ float64 float64_sqrt(float64, float_status_t &status);
|
||||
float64 float64_frc(float64, float_status_t &status);
|
||||
float64 float64_muladd(float64, float64, float64, int flags, float_status_t &status);
|
||||
|
||||
BX_CPP_INLINE float64 float64_round_to_int(float64 a, float_status_t &status)
|
||||
{
|
||||
return float64_round_to_int(a, 0, status);
|
||||
}
|
||||
|
||||
BX_CPP_INLINE float64 float64_fmadd(float64 a, float64 b, float64 c, float_status_t &status)
|
||||
{
|
||||
return float64_muladd(a, b, c, 0, status);
|
||||
|
Loading…
Reference in New Issue
Block a user