Implemented VRNDSCALE AVX-512 instructions.

Now only missed AVX-512 opcodes now are:

512.66.0F38.W0 2C VSCALEFPS
512.66.0F38.W1 2C VSCALEFPD
NDS.LIG.66.0F38.W0 2D VSCALESS
NDS.LIG.66.0F38.W1 2D VSCALESD
This commit is contained in:
Stanislav Shwartsman 2014-02-27 18:27:57 +00:00
parent f282fc4e75
commit 695d245116
3 changed files with 175 additions and 29 deletions

View File

@ -960,34 +960,160 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VGETMANTPD_MASK_VpdWpdIbR(bxInstru
BX_NEXT_INSTR(i);
}
// rndscale
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VRNDSCALEPS_MASK_VpsWpsIbR(bxInstruction_c *i)
{
BX_PANIC(("%s: AVX-512 instruction still not implemented", i->getIaOpcodeNameShort()));
BxPackedAvxRegister op = BX_READ_AVX_REG(i->src());
Bit32u opmask = i->opmask() ? BX_READ_16BIT_OPMASK(i->opmask()) : (Bit32u) -1;
unsigned len = i->getVL();
BX_NEXT_INSTR(i);
}
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
softfloat_status_word_rc_override(status, i);
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VRNDSCALEPD_MASK_VpdWpdIbR(bxInstruction_c *i)
{
BX_PANIC(("%s: AVX-512 instruction still not implemented", i->getIaOpcodeNameShort()));
Bit8u control = i->Ib(), scale = control >> 4;
// override MXCSR rounding mode with control coming from imm8
if ((control & 0x4) == 0)
status.float_rounding_mode = control & 0x3;
// ignore precision exception result
if (control & 0x8)
status.float_suppress_exception |= float_flag_inexact;
for (unsigned n=0, mask = 0x1; n < DWORD_ELEMENTS(len); n++, mask <<= 1) {
if (opmask & mask)
op.vmm32u(n) = float32_round_to_int(op.vmm32u(n), scale, status);
else
op.vmm32u(n) = 0;
}
check_exceptionsSSE(get_exception_flags(status));
if (! i->isZeroMasking()) {
for (unsigned n=0; n < len; n++, opmask >>= 4)
xmm_blendps(&BX_READ_AVX_REG_LANE(i->dst(), n), &op.vmm128(n), opmask);
BX_CLEAR_AVX_REGZ(i->dst(), len);
}
else {
BX_WRITE_AVX_REGZ(i->dst(), op, len);
}
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VRNDSCALESS_MASK_VssHpsWssIbR(bxInstruction_c *i)
{
BX_PANIC(("%s: AVX-512 instruction still not implemented", i->getIaOpcodeNameShort()));
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
if (! i->opmask() || BX_SCALAR_ELEMENT_MASK(i->opmask())) {
float32 op2 = BX_READ_XMM_REG_LO_DWORD(i->src2());
Bit8u control = i->Ib(), scale = control >> 4;
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
softfloat_status_word_rc_override(status, i);
// override MXCSR rounding mode with control coming from imm8
if ((control & 0x4) == 0)
status.float_rounding_mode = control & 0x3;
// ignore precision exception result
if (control & 0x8)
status.float_suppress_exception |= float_flag_inexact;
op1.xmm32u(0) = float32_round_to_int(op2, scale, status);
check_exceptionsSSE(get_exception_flags(status));
}
else {
if (i->isZeroMasking())
op1.xmm32u(0) = 0;
else
op1.xmm32u(0) = BX_READ_XMM_REG_LO_DWORD(i->dst());
}
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VRNDSCALEPD_MASK_VpdWpdIbR(bxInstruction_c *i)
{
BxPackedAvxRegister op = BX_READ_AVX_REG(i->src());
Bit32u opmask = i->opmask() ? BX_READ_8BIT_OPMASK(i->opmask()) : (Bit32u) -1;
unsigned len = i->getVL();
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
softfloat_status_word_rc_override(status, i);
Bit8u control = i->Ib(), scale = control >> 4;
// override MXCSR rounding mode with control coming from imm8
if ((control & 0x4) == 0)
status.float_rounding_mode = control & 0x3;
// ignore precision exception result
if (control & 0x8)
status.float_suppress_exception |= float_flag_inexact;
for (unsigned n=0, mask = 0x1; n < QWORD_ELEMENTS(len); n++, mask <<= 1) {
if (opmask & mask)
op.vmm64u(n) = float64_round_to_int(op.vmm64u(n), scale, status);
else
op.vmm64u(n) = 0;
}
check_exceptionsSSE(get_exception_flags(status));
if (! i->isZeroMasking()) {
for (unsigned n=0; n < len; n++, opmask >>= 2)
xmm_blendpd(&BX_READ_AVX_REG_LANE(i->dst(), n), &op.vmm128(n), opmask);
BX_CLEAR_AVX_REGZ(i->dst(), len);
}
else {
BX_WRITE_AVX_REGZ(i->dst(), op, len);
}
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VRNDSCALESD_MASK_VsdHpdWsdIbR(bxInstruction_c *i)
{
BX_PANIC(("%s: AVX-512 instruction still not implemented", i->getIaOpcodeNameShort()));
BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
if (! i->opmask() || BX_SCALAR_ELEMENT_MASK(i->opmask())) {
float64 op2 = BX_READ_XMM_REG_LO_QWORD(i->src2());
Bit8u control = i->Ib(), scale = control >> 4;
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
softfloat_status_word_rc_override(status, i);
// override MXCSR rounding mode with control coming from imm8
if ((control & 0x4) == 0)
status.float_rounding_mode = control & 0x3;
// ignore precision exception result
if (control & 0x8)
status.float_suppress_exception |= float_flag_inexact;
op1.xmm64u(0) = float64_round_to_int(op2, scale, status);
check_exceptionsSSE(get_exception_flags(status));
}
else {
if (i->isZeroMasking())
op1.xmm64u(0) = 0;
else
op1.xmm64u(0) = BX_READ_XMM_REG_LO_QWORD(i->dst());
}
BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
BX_NEXT_INSTR(i);
}
// scale
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VSCALEFPS_MASK_VpsWpsR(bxInstruction_c *i)
{
BX_PANIC(("%s: AVX-512 instruction still not implemented", i->getIaOpcodeNameShort()));
@ -995,14 +1121,14 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VSCALEFPS_MASK_VpsWpsR(bxInstructi
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VSCALEFPD_MASK_VpdWpdR(bxInstruction_c *i)
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VSCALEFSS_MASK_VssHpsWssR(bxInstruction_c *i)
{
BX_PANIC(("%s: AVX-512 instruction still not implemented", i->getIaOpcodeNameShort()));
BX_NEXT_INSTR(i);
}
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VSCALEFSS_MASK_VssHpsWssR(bxInstruction_c *i)
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VSCALEFPD_MASK_VpdWpdR(bxInstruction_c *i)
{
BX_PANIC(("%s: AVX-512 instruction still not implemented", i->getIaOpcodeNameShort()));

View File

@ -508,16 +508,20 @@ float64 float32_to_float64(float32 a, float_status_t &status)
| Floating-Point Arithmetic.
*----------------------------------------------------------------------------*/
float32 float32_round_to_int(float32 a, float_status_t &status)
float32 float32_round_to_int(float32 a, Bit8u scale, float_status_t &status)
{
Bit32u lastBitMask, roundBitsMask;
int roundingMode = get_float_rounding_mode(status);
Bit16s aExp = extractFloat32Exp(a);
scale &= 0xf;
if ((aExp == 0xFF) && extractFloat32Frac(a)) {
return propagateFloat32NaN(a, status);
}
aExp += scale; // scale the exponent
if (0x96 <= aExp) {
if ((aExp == 0xFF) && extractFloat32Frac(a)) {
return propagateFloat32NaN(a, status);
}
return a;
}
@ -532,16 +536,17 @@ float32 float32_round_to_int(float32 a, float_status_t &status)
switch (roundingMode) {
case float_round_nearest_even:
if ((aExp == 0x7E) && extractFloat32Frac(a)) {
return packFloat32(aSign, 0x7F, 0);
return packFloat32(aSign, 0x7F - scale, 0);
}
break;
case float_round_down:
return aSign ? float32_negative_one : 0;
return aSign ? packFloat32(1, 0x7F - scale, 0) : float32_positive_zero;
case float_round_up:
return aSign ? float32_negative_zero : float32_positive_one;
return aSign ? float32_negative_zero : packFloat32(0, 0x7F - scale, 0);
}
return packFloat32(aSign, 0, 0);
}
lastBitMask = 1;
lastBitMask <<= 0x96 - aExp;
roundBitsMask = lastBitMask - 1;
@ -1610,18 +1615,20 @@ float32 float64_to_float32(float64 a, float_status_t &status)
| Floating-Point Arithmetic.
*----------------------------------------------------------------------------*/
float64 float64_round_to_int(float64 a, float_status_t &status)
float64 float64_round_to_int(float64 a, Bit8u scale, float_status_t &status)
{
Bit16s aExp;
Bit64u lastBitMask, roundBitsMask;
int roundingMode = get_float_rounding_mode(status);
float64 z;
Bit16s aExp = extractFloat64Exp(a);
scale &= 0xf;
if ((aExp == 0x7FF) && extractFloat64Frac(a)) {
return propagateFloat64NaN(a, status);
}
aExp += scale; // scale the exponent
aExp = extractFloat64Exp(a);
if (0x433 <= aExp) {
if ((aExp == 0x7FF) && extractFloat64Frac(a)) {
return propagateFloat64NaN(a, status);
}
return a;
}
@ -1636,20 +1643,21 @@ float64 float64_round_to_int(float64 a, float_status_t &status)
switch (roundingMode) {
case float_round_nearest_even:
if ((aExp == 0x3FE) && extractFloat64Frac(a)) {
return packFloat64(aSign, 0x3FF, 0);
return packFloat64(aSign, 0x3FF - scale, 0);
}
break;
case float_round_down:
return aSign ? float64_negative_one : 0;
return aSign ? packFloat64(1, 0x3FF - scale, 0) : float64_positive_zero;
case float_round_up:
return aSign ? float64_negative_zero : float64_positive_one;
return aSign ? float64_negative_zero : packFloat64(0, 0x3FF - scale, 0);
}
return packFloat64(aSign, 0, 0);
}
lastBitMask = 1;
lastBitMask <<= 0x433 - aExp;
roundBitsMask = lastBitMask - 1;
z = a;
float64 z = a;
if (roundingMode == float_round_nearest_even) {
z += lastBitMask>>1;
if ((z & roundBitsMask) == 0) z &= ~lastBitMask;

View File

@ -256,6 +256,7 @@ float64 float32_to_float64(float32, float_status_t &status);
| Software IEC/IEEE single-precision operations.
*----------------------------------------------------------------------------*/
float32 float32_round_to_int(float32, float_status_t &status);
float32 float32_round_to_int(float32, Bit8u scale, float_status_t &status);
float32 float32_add(float32, float32, float_status_t &status);
float32 float32_sub(float32, float32, float_status_t &status);
float32 float32_mul(float32, float32, float_status_t &status);
@ -264,6 +265,11 @@ float32 float32_sqrt(float32, float_status_t &status);
float32 float32_frc(float32, float_status_t &status);
float32 float32_muladd(float32, float32, float32, int flags, float_status_t &status);
BX_CPP_INLINE float32 float32_round_to_int(float32 a, float_status_t &status)
{
return float32_round_to_int(a, 0, status);
}
BX_CPP_INLINE float32 float32_fmadd(float32 a, float32 b, float32 c, float_status_t &status)
{
return float32_muladd(a, b, c, 0, status);
@ -315,6 +321,7 @@ float32 float64_to_float32(float64, float_status_t &status);
| Software IEC/IEEE double-precision operations.
*----------------------------------------------------------------------------*/
float64 float64_round_to_int(float64, float_status_t &status);
float64 float64_round_to_int(float64, Bit8u scale, float_status_t &status);
float64 float64_add(float64, float64, float_status_t &status);
float64 float64_sub(float64, float64, float_status_t &status);
float64 float64_mul(float64, float64, float_status_t &status);
@ -323,6 +330,11 @@ float64 float64_sqrt(float64, float_status_t &status);
float64 float64_frc(float64, float_status_t &status);
float64 float64_muladd(float64, float64, float64, int flags, float_status_t &status);
BX_CPP_INLINE float64 float64_round_to_int(float64 a, float_status_t &status)
{
return float64_round_to_int(a, 0, status);
}
BX_CPP_INLINE float64 float64_fmadd(float64 a, float64 b, float64 c, float_status_t &status)
{
return float64_muladd(a, b, c, 0, status);