Added shape of implementation for last missing VSCALEF* AVX-512 instructons.
The softfloat implementation is still missing (only corner cases are supported). Extend softfloat floatNN_class methods to distinguish between SNaN and QNaN.
This commit is contained in:
parent
aad090d03c
commit
02e19de346
@ -69,6 +69,7 @@ EVEX_OP_PACKED_SINGLE(VMULPS_MASK_VpsHpsWpsR, xmm_mulps_mask)
|
||||
EVEX_OP_PACKED_SINGLE(VDIVPS_MASK_VpsHpsWpsR, xmm_divps_mask)
|
||||
EVEX_OP_PACKED_SINGLE(VMAXPS_MASK_VpsHpsWpsR, xmm_maxps_mask)
|
||||
EVEX_OP_PACKED_SINGLE(VMINPS_MASK_VpsHpsWpsR, xmm_minps_mask)
|
||||
EVEX_OP_PACKED_SINGLE(VSCALEFPS_MASK_VpsHpsWpsR, xmm_scalefps_mask)
|
||||
|
||||
#define EVEX_OP_PACKED_DOUBLE(HANDLER, func) \
|
||||
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C:: HANDLER (bxInstruction_c *i) \
|
||||
@ -105,6 +106,7 @@ EVEX_OP_PACKED_DOUBLE(VMULPD_MASK_VpdHpdWpdR, xmm_mulpd_mask)
|
||||
EVEX_OP_PACKED_DOUBLE(VDIVPD_MASK_VpdHpdWpdR, xmm_divpd_mask)
|
||||
EVEX_OP_PACKED_DOUBLE(VMAXPD_MASK_VpdHpdWpdR, xmm_maxpd_mask)
|
||||
EVEX_OP_PACKED_DOUBLE(VMINPD_MASK_VpdHpdWpdR, xmm_minpd_mask)
|
||||
EVEX_OP_PACKED_DOUBLE(VSCALEFPD_MASK_VpdHpdWpdR, xmm_scalefpd_mask)
|
||||
|
||||
#define EVEX_OP_SCALAR_SINGLE(HANDLER, func) \
|
||||
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C:: HANDLER (bxInstruction_c *i) \
|
||||
@ -137,6 +139,7 @@ EVEX_OP_SCALAR_SINGLE(VMULSS_MASK_VssHpsWssR, float32_mul)
|
||||
EVEX_OP_SCALAR_SINGLE(VDIVSS_MASK_VssHpsWssR, float32_div)
|
||||
EVEX_OP_SCALAR_SINGLE(VMINSS_MASK_VssHpsWssR, float32_min)
|
||||
EVEX_OP_SCALAR_SINGLE(VMAXSS_MASK_VssHpsWssR, float32_max)
|
||||
EVEX_OP_SCALAR_SINGLE(VSCALEFSS_MASK_VssHpsWssR, float32_scalef)
|
||||
|
||||
#define EVEX_OP_SCALAR_DOUBLE(HANDLER, func) \
|
||||
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C:: HANDLER (bxInstruction_c *i) \
|
||||
@ -169,6 +172,7 @@ EVEX_OP_SCALAR_DOUBLE(VMULSD_MASK_VsdHpdWsdR, float64_mul)
|
||||
EVEX_OP_SCALAR_DOUBLE(VDIVSD_MASK_VsdHpdWsdR, float64_div)
|
||||
EVEX_OP_SCALAR_DOUBLE(VMINSD_MASK_VsdHpdWsdR, float64_min)
|
||||
EVEX_OP_SCALAR_DOUBLE(VMAXSD_MASK_VsdHpdWsdR, float64_max)
|
||||
EVEX_OP_SCALAR_SINGLE(VSCALEFSD_MASK_VsdHpdWsdR, float64_scalef)
|
||||
|
||||
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VSQRTPS_MASK_VpsWpsR(bxInstruction_c *i)
|
||||
{
|
||||
@ -416,14 +420,13 @@ float32 float32_fixupimm(float32 dst, float32 op1, Bit32u op2, unsigned imm8, fl
|
||||
ie_fault_mask = 0x80;
|
||||
break;
|
||||
|
||||
case float_NaN:
|
||||
if (float32_is_signaling_nan(tmp_op1)) {
|
||||
token = BX_FIXUPIMM_SNAN_TOKEN;
|
||||
ie_fault_mask = 0x10;
|
||||
}
|
||||
else {
|
||||
token = BX_FIXUPIMM_QNAN_TOKEN;
|
||||
}
|
||||
case float_SNaN:
|
||||
token = BX_FIXUPIMM_SNAN_TOKEN;
|
||||
ie_fault_mask = 0x10;
|
||||
break;
|
||||
|
||||
case float_QNaN:
|
||||
token = BX_FIXUPIMM_QNAN_TOKEN;
|
||||
break;
|
||||
|
||||
case float_denormal:
|
||||
@ -512,14 +515,13 @@ float64 float64_fixupimm(float64 dst, float64 op1, Bit32u op2, unsigned imm8, fl
|
||||
ie_fault_mask = 0x80;
|
||||
break;
|
||||
|
||||
case float_NaN:
|
||||
if (float64_is_signaling_nan(tmp_op1)) {
|
||||
token = BX_FIXUPIMM_SNAN_TOKEN;
|
||||
ie_fault_mask = 0x10;
|
||||
}
|
||||
else {
|
||||
token = BX_FIXUPIMM_QNAN_TOKEN;
|
||||
}
|
||||
case float_SNaN:
|
||||
token = BX_FIXUPIMM_SNAN_TOKEN;
|
||||
ie_fault_mask = 0x10;
|
||||
break;
|
||||
|
||||
case float_QNaN:
|
||||
token = BX_FIXUPIMM_QNAN_TOKEN;
|
||||
break;
|
||||
|
||||
case float_denormal:
|
||||
@ -1112,34 +1114,4 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VRNDSCALESD_MASK_VsdHpdWsdIbR(bxIn
|
||||
BX_NEXT_INSTR(i);
|
||||
}
|
||||
|
||||
// scale
|
||||
|
||||
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VSCALEFPS_MASK_VpsWpsR(bxInstruction_c *i)
|
||||
{
|
||||
BX_PANIC(("%s: AVX-512 instruction still not implemented", i->getIaOpcodeNameShort()));
|
||||
|
||||
BX_NEXT_INSTR(i);
|
||||
}
|
||||
|
||||
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VSCALEFSS_MASK_VssHpsWssR(bxInstruction_c *i)
|
||||
{
|
||||
BX_PANIC(("%s: AVX-512 instruction still not implemented", i->getIaOpcodeNameShort()));
|
||||
|
||||
BX_NEXT_INSTR(i);
|
||||
}
|
||||
|
||||
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VSCALEFPD_MASK_VpdWpdR(bxInstruction_c *i)
|
||||
{
|
||||
BX_PANIC(("%s: AVX-512 instruction still not implemented", i->getIaOpcodeNameShort()));
|
||||
|
||||
BX_NEXT_INSTR(i);
|
||||
}
|
||||
|
||||
BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VSCALEFSD_MASK_VsdHpdWsdR(bxInstruction_c *i)
|
||||
{
|
||||
BX_PANIC(("%s: AVX-512 instruction still not implemented", i->getIaOpcodeNameShort()));
|
||||
|
||||
BX_NEXT_INSTR(i);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -8281,7 +8281,8 @@ float32 approximate_rcp14(float32 op, const float_status_t &status)
|
||||
case float_positive_inf:
|
||||
return packFloat32(sign, 0, 0);
|
||||
|
||||
case float_NaN:
|
||||
case float_SNaN:
|
||||
case float_QNaN:
|
||||
return convert_to_QNaN(op);
|
||||
|
||||
// the rcp14 handle denormals properly
|
||||
@ -8332,7 +8333,8 @@ float64 approximate_rcp14(float64 op, const float_status_t &status)
|
||||
case float_positive_inf:
|
||||
return packFloat64(sign, 0, 0);
|
||||
|
||||
case float_NaN:
|
||||
case float_SNaN:
|
||||
case float_QNaN:
|
||||
return convert_to_QNaN(op);
|
||||
|
||||
// the rcp14 handle denormals properly
|
||||
|
@ -8257,7 +8257,8 @@ float32 approximate_rsqrt14(float32 op, bx_bool daz)
|
||||
case float_negative_inf:
|
||||
return float32_default_nan;
|
||||
|
||||
case float_NaN:
|
||||
case float_SNaN:
|
||||
case float_QNaN:
|
||||
return convert_to_QNaN(op);
|
||||
|
||||
case float_denormal:
|
||||
@ -8307,7 +8308,8 @@ float64 approximate_rsqrt14(float64 op, bx_bool daz)
|
||||
case float_negative_inf:
|
||||
return float64_default_nan;
|
||||
|
||||
case float_NaN:
|
||||
case float_SNaN:
|
||||
case float_QNaN:
|
||||
return convert_to_QNaN(op);
|
||||
|
||||
case float_denormal:
|
||||
|
@ -3261,8 +3261,8 @@ public: // for now...
|
||||
BX_SMF BX_INSF_TYPE VRNDSCALESS_MASK_VssHpsWssIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF BX_INSF_TYPE VRNDSCALESD_MASK_VsdHpdWsdIbR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
|
||||
BX_SMF BX_INSF_TYPE VSCALEFPS_MASK_VpsWpsR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF BX_INSF_TYPE VSCALEFPD_MASK_VpdWpdR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF BX_INSF_TYPE VSCALEFPS_MASK_VpsHpsWpsR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF BX_INSF_TYPE VSCALEFPD_MASK_VpdHpdWpdR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF BX_INSF_TYPE VSCALEFSS_MASK_VssHpsWssR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
BX_SMF BX_INSF_TYPE VSCALEFSD_MASK_VsdHpdWsdR(bxInstruction_c *) BX_CPP_AttrRegparmN(1);
|
||||
|
||||
|
@ -1250,8 +1250,8 @@ static const BxOpcodeInfo_t BxOpcodeTableEVEX[256*3*2] = {
|
||||
/* 2A */ { 0, BX_IA_ERROR }, // #UD
|
||||
/* 2B k0 */ { 0, BX_IA_ERROR },
|
||||
/* 2B */ { 0, BX_IA_ERROR },
|
||||
/* 2C k0 */ { BxAliasVexW | BxPrefixSSE66, BX_IA_V512_VSCALEFPS_VpsWps_Kmask },
|
||||
/* 2C */ { BxAliasVexW | BxPrefixSSE66, BX_IA_V512_VSCALEFPS_VpsWps_Kmask },
|
||||
/* 2C k0 */ { BxAliasVexW | BxPrefixSSE66, BX_IA_V512_VSCALEFPS_VpsHpsWps_Kmask },
|
||||
/* 2C */ { BxAliasVexW | BxPrefixSSE66, BX_IA_V512_VSCALEFPS_VpsHpsWps_Kmask },
|
||||
/* 2D k0 */ { BxAliasVexW | BxPrefixSSE66, BX_IA_V512_VSCALEFSS_VssHpsWss_Kmask },
|
||||
/* 2D */ { BxAliasVexW | BxPrefixSSE66, BX_IA_V512_VSCALEFSS_VssHpsWss_Kmask },
|
||||
/* 2E k0 */ { 0, BX_IA_ERROR },
|
||||
|
@ -536,7 +536,7 @@ void BX_CPU_C::print_state_FPU(void)
|
||||
"32", "RES", "64", "80"
|
||||
};
|
||||
static const char* fp_class[] = {
|
||||
"ZERO", "xNAN", "-INF", "+INF", "DENORMAL", "NORMAL"
|
||||
"ZERO", "SNAN", "QNAN", "-INF", "+INF", "DENORMAL", "NORMAL"
|
||||
};
|
||||
|
||||
Bit32u reg;
|
||||
@ -601,7 +601,7 @@ void BX_CPU_C::print_state_FPU(void)
|
||||
i==tos?"=>":" ", i, (i-tos)&7,
|
||||
"v0se"[tag],
|
||||
fp.exp & 0xffff, GET32H(fp.fraction), GET32L(fp.fraction),
|
||||
f, (f_class == float_NaN) ? (floatx80_is_signaling_nan(fp) ? "SNAN" : "QNAN") : fp_class[f_class]);
|
||||
f, fp_class[f_class]);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -529,7 +529,8 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::FXAM(bxInstruction_c *i)
|
||||
setcc(FPU_SW_C3|FPU_SW_C1);
|
||||
break;
|
||||
|
||||
case float_NaN:
|
||||
case float_SNaN:
|
||||
case float_QNaN:
|
||||
// unsupported handled as NaNs
|
||||
if (floatx80_is_unsupported(reg)) {
|
||||
setcc(FPU_SW_C1);
|
||||
|
@ -708,6 +708,66 @@ float32 float32_getmant(float32 a, float_status_t &status, int sign_ctrl, int in
|
||||
return packFloat32(~sign_ctrl & aSign, aExp, aSig);
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Return the result of a floating point scale of the single-precision floating
|
||||
| point value `a' by multiplying it by 2 power of the single-precision
|
||||
| floating point value 'b' converted to integral value. If the result cannot
|
||||
| be represented in single precision, then the proper overflow response (for
|
||||
| positive scaling operand), or the proper underflow response (for negative
|
||||
| scaling operand) is issued. The operation is performed according to the
|
||||
| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
|
||||
*----------------------------------------------------------------------------*/
|
||||
|
||||
float32 float32_scalef(float32 a, float32 b, float_status_t &status)
|
||||
{
|
||||
Bit32u aSig = extractFloat32Frac(a);
|
||||
Bit16s aExp = extractFloat32Exp(a);
|
||||
int aSign = extractFloat32Sign(a);
|
||||
Bit32u bSig = extractFloat32Frac(b);
|
||||
Bit16s bExp = extractFloat32Exp(b);
|
||||
int bSign = extractFloat32Sign(b);
|
||||
|
||||
if (get_denormals_are_zeros(status)) {
|
||||
if (aExp == 0) aSig = 0;
|
||||
if (bExp == 0) bSig = 0;
|
||||
}
|
||||
|
||||
if (bExp == 0xFF) {
|
||||
if (bSig) return propagateFloat32NaN(a, b, status);
|
||||
}
|
||||
|
||||
if (aExp == 0xFF) {
|
||||
if (aSig) {
|
||||
int aIsSignalingNaN = (aSig & 0x00400000) == 0;
|
||||
if (aIsSignalingNaN || bExp != 0xFF || bSig)
|
||||
return propagateFloat32NaN(a, b, status);
|
||||
|
||||
return bSign ? 0 : float32_positive_inf;
|
||||
}
|
||||
|
||||
if (bExp == 0xFF && bSign) {
|
||||
float_raise(status, float_flag_invalid);
|
||||
return float32_default_nan;
|
||||
}
|
||||
return a;
|
||||
}
|
||||
|
||||
if ((aExp | aSig) == 0) {
|
||||
if (bExp == 0xFF && ! bSign) {
|
||||
float_raise(status, float_flag_invalid);
|
||||
return float32_default_nan;
|
||||
}
|
||||
return a;
|
||||
}
|
||||
|
||||
if (bExp == 0xFF) {
|
||||
if (bSign) return packFloat32(aSign, 0, 0);
|
||||
return packFloat32(aSign, 0xFF, 0);
|
||||
}
|
||||
|
||||
return 0; // fixme
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Returns the result of adding the absolute values of the single-precision
|
||||
| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
|
||||
@ -1136,7 +1196,7 @@ float32 float32_sqrt(float32 a, float_status_t &status)
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Determine single-precision floating-point number class
|
||||
| Determine single-precision floating-point number class.
|
||||
*----------------------------------------------------------------------------*/
|
||||
|
||||
float_class_t float32_class(float32 a)
|
||||
@ -1149,7 +1209,7 @@ float_class_t float32_class(float32 a)
|
||||
if (aSig == 0)
|
||||
return (aSign) ? float_negative_inf : float_positive_inf;
|
||||
|
||||
return float_NaN;
|
||||
return (aSig & 0x00400000) ? float_QNaN : float_SNaN;
|
||||
}
|
||||
|
||||
if(aExp == 0) {
|
||||
@ -1178,13 +1238,13 @@ int float32_compare(float32 a, float32 b, float_status_t &status)
|
||||
float_class_t aClass = float32_class(a);
|
||||
float_class_t bClass = float32_class(b);
|
||||
|
||||
if (aClass == float_NaN || bClass == float_NaN) {
|
||||
if (aClass == float_SNaN || aClass == float_QNaN || bClass == float_SNaN || bClass == float_QNaN)
|
||||
{
|
||||
float_raise(status, float_flag_invalid);
|
||||
return float_relation_unordered;
|
||||
}
|
||||
|
||||
if (aClass == float_denormal || bClass == float_denormal)
|
||||
{
|
||||
if (aClass == float_denormal || bClass == float_denormal) {
|
||||
float_raise(status, float_flag_denormal);
|
||||
}
|
||||
|
||||
@ -1218,17 +1278,16 @@ int float32_compare_quiet(float32 a, float32 b, float_status_t &status)
|
||||
float_class_t aClass = float32_class(a);
|
||||
float_class_t bClass = float32_class(b);
|
||||
|
||||
if (aClass == float_NaN || bClass == float_NaN)
|
||||
{
|
||||
if (float32_is_signaling_nan(a) || float32_is_signaling_nan(b))
|
||||
{
|
||||
float_raise(status, float_flag_invalid);
|
||||
}
|
||||
if (aClass == float_SNaN || bClass == float_SNaN) {
|
||||
float_raise(status, float_flag_invalid);
|
||||
return float_relation_unordered;
|
||||
}
|
||||
|
||||
if (aClass == float_denormal || bClass == float_denormal)
|
||||
{
|
||||
if (aClass == float_QNaN || bClass == float_QNaN) {
|
||||
return float_relation_unordered;
|
||||
}
|
||||
|
||||
if (aClass == float_denormal || bClass == float_denormal) {
|
||||
float_raise(status, float_flag_denormal);
|
||||
}
|
||||
|
||||
@ -1815,6 +1874,66 @@ float64 float64_getmant(float64 a, float_status_t &status, int sign_ctrl, int in
|
||||
return packFloat64(~sign_ctrl & aSign, aExp, aSig);
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Return the result of a floating point scale of the double-precision floating
|
||||
| point value `a' by multiplying it by 2 power of the double-precision
|
||||
| floating point value 'b' converted to integral value. If the result cannot
|
||||
| be represented in double precision, then the proper overflow response (for
|
||||
| positive scaling operand), or the proper underflow response (for negative
|
||||
| scaling operand) is issued. The operation is performed according to the
|
||||
| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
|
||||
*----------------------------------------------------------------------------*/
|
||||
|
||||
float64 float64_scalef(float64 a, float64 b, float_status_t &status)
|
||||
{
|
||||
Bit64u aSig = extractFloat64Frac(a);
|
||||
Bit16s aExp = extractFloat64Exp(a);
|
||||
int aSign = extractFloat64Sign(a);
|
||||
Bit64u bSig = extractFloat64Frac(b);
|
||||
Bit16s bExp = extractFloat64Exp(b);
|
||||
int bSign = extractFloat64Sign(b);
|
||||
|
||||
if (get_denormals_are_zeros(status)) {
|
||||
if (aExp == 0) aSig = 0;
|
||||
if (bExp == 0) bSig = 0;
|
||||
}
|
||||
|
||||
if (bExp == 0x7FF) {
|
||||
if (bSig) return propagateFloat64NaN(a, b, status);
|
||||
}
|
||||
|
||||
if (aExp == 0x7FF) {
|
||||
if (aSig) {
|
||||
int aIsSignalingNaN = (aSig & BX_CONST64(0x0008000000000000)) == 0;
|
||||
if (aIsSignalingNaN || bExp != 0x7FF || bSig)
|
||||
return propagateFloat64NaN(a, b, status);
|
||||
|
||||
return bSign ? 0 : float64_positive_inf;
|
||||
}
|
||||
|
||||
if (bExp == 0x7FF && bSign) {
|
||||
float_raise(status, float_flag_invalid);
|
||||
return float64_default_nan;
|
||||
}
|
||||
return a;
|
||||
}
|
||||
|
||||
if ((aExp | aSig) == 0) {
|
||||
if (bExp == 0x7FF && ! bSign) {
|
||||
float_raise(status, float_flag_invalid);
|
||||
return float64_default_nan;
|
||||
}
|
||||
return a;
|
||||
}
|
||||
|
||||
if (bExp == 0x7FF) {
|
||||
if (bSign) return packFloat64(aSign, 0, 0);
|
||||
return packFloat64(aSign, 0x7FF, 0);
|
||||
}
|
||||
|
||||
return 0; // fixme
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Returns the result of adding the absolute values of the double-precision
|
||||
| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
|
||||
@ -2258,7 +2377,7 @@ float_class_t float64_class(float64 a)
|
||||
if (aSig == 0)
|
||||
return (aSign) ? float_negative_inf : float_positive_inf;
|
||||
|
||||
return float_NaN;
|
||||
return (aSig & BX_CONST64(0x0008000000000000)) ? float_QNaN : float_SNaN;
|
||||
}
|
||||
|
||||
if(aExp == 0) {
|
||||
@ -2288,13 +2407,13 @@ int float64_compare(float64 a, float64 b, float_status_t &status)
|
||||
float_class_t aClass = float64_class(a);
|
||||
float_class_t bClass = float64_class(b);
|
||||
|
||||
if (aClass == float_NaN || bClass == float_NaN) {
|
||||
if (aClass == float_SNaN || aClass == float_QNaN || bClass == float_SNaN || bClass == float_QNaN)
|
||||
{
|
||||
float_raise(status, float_flag_invalid);
|
||||
return float_relation_unordered;
|
||||
}
|
||||
|
||||
if (aClass == float_denormal || bClass == float_denormal)
|
||||
{
|
||||
if (aClass == float_denormal || bClass == float_denormal) {
|
||||
float_raise(status, float_flag_denormal);
|
||||
}
|
||||
|
||||
@ -2328,17 +2447,16 @@ int float64_compare_quiet(float64 a, float64 b, float_status_t &status)
|
||||
float_class_t aClass = float64_class(a);
|
||||
float_class_t bClass = float64_class(b);
|
||||
|
||||
if (aClass == float_NaN || bClass == float_NaN)
|
||||
{
|
||||
if (float64_is_signaling_nan(a) || float64_is_signaling_nan(b))
|
||||
{
|
||||
float_raise(status, float_flag_invalid);
|
||||
}
|
||||
if (aClass == float_SNaN || bClass == float_SNaN) {
|
||||
float_raise(status, float_flag_invalid);
|
||||
return float_relation_unordered;
|
||||
}
|
||||
|
||||
if (aClass == float_denormal || bClass == float_denormal)
|
||||
{
|
||||
if (aClass == float_QNaN || bClass == float_QNaN) {
|
||||
return float_relation_unordered;
|
||||
}
|
||||
|
||||
if (aClass == float_denormal || bClass == float_denormal) {
|
||||
float_raise(status, float_flag_denormal);
|
||||
}
|
||||
|
||||
|
@ -54,7 +54,8 @@ typedef Bit64u float64;
|
||||
*----------------------------------------------------------------------------*/
|
||||
typedef enum {
|
||||
float_zero,
|
||||
float_NaN,
|
||||
float_SNaN,
|
||||
float_QNaN,
|
||||
float_negative_inf,
|
||||
float_positive_inf,
|
||||
float_denormal,
|
||||
@ -264,6 +265,7 @@ float32 float32_div(float32, float32, float_status_t &status);
|
||||
float32 float32_sqrt(float32, float_status_t &status);
|
||||
float32 float32_frc(float32, float_status_t &status);
|
||||
float32 float32_muladd(float32, float32, float32, int flags, float_status_t &status);
|
||||
float32 float32_scalef(float32, float32, float_status_t &status);
|
||||
|
||||
BX_CPP_INLINE float32 float32_round_to_int(float32 a, float_status_t &status)
|
||||
{
|
||||
@ -329,6 +331,7 @@ float64 float64_div(float64, float64, float_status_t &status);
|
||||
float64 float64_sqrt(float64, float_status_t &status);
|
||||
float64 float64_frc(float64, float_status_t &status);
|
||||
float64 float64_muladd(float64, float64, float64, int flags, float_status_t &status);
|
||||
float64 float64_scalef(float64, float64, float_status_t &status);
|
||||
|
||||
BX_CPP_INLINE float64 float64_round_to_int(float64 a, float_status_t &status)
|
||||
{
|
||||
|
@ -54,7 +54,7 @@ float_class_t float16_class(float16 a)
|
||||
if (aSig == 0)
|
||||
return (aSign) ? float_negative_inf : float_positive_inf;
|
||||
|
||||
return float_NaN;
|
||||
return (aSig & 0x200) ? float_QNaN : float_SNaN;
|
||||
}
|
||||
|
||||
if(aExp == 0) {
|
||||
|
@ -224,7 +224,7 @@ float_class_t floatx80_class(floatx80 a)
|
||||
|
||||
/* valid numbers have the MS bit set */
|
||||
if (!(aSig & BX_CONST64(0x8000000000000000)))
|
||||
return float_NaN; /* report unsupported as NaNs */
|
||||
return float_SNaN; /* report unsupported as SNaNs */
|
||||
|
||||
if(aExp == 0x7fff) {
|
||||
int aSign = extractFloatx80Sign(a);
|
||||
@ -232,7 +232,7 @@ float_class_t floatx80_class(floatx80 a)
|
||||
if (((Bit64u) (aSig<< 1)) == 0)
|
||||
return (aSign) ? float_negative_inf : float_positive_inf;
|
||||
|
||||
return float_NaN;
|
||||
return (aSig & BX_CONST64(0xC000000000000000)) ? float_QNaN : float_SNaN;
|
||||
}
|
||||
|
||||
return float_normalized;
|
||||
@ -251,14 +251,13 @@ int floatx80_compare(floatx80 a, floatx80 b, float_status_t &status)
|
||||
float_class_t aClass = floatx80_class(a);
|
||||
float_class_t bClass = floatx80_class(b);
|
||||
|
||||
if (aClass == float_NaN || bClass == float_NaN)
|
||||
if (aClass == float_SNaN || aClass == float_QNaN || bClass == float_SNaN || bClass == float_QNaN)
|
||||
{
|
||||
float_raise(status, float_flag_invalid);
|
||||
return float_relation_unordered;
|
||||
}
|
||||
|
||||
if (aClass == float_denormal || bClass == float_denormal)
|
||||
{
|
||||
if (aClass == float_denormal || bClass == float_denormal) {
|
||||
float_raise(status, float_flag_denormal);
|
||||
}
|
||||
|
||||
@ -310,19 +309,20 @@ int floatx80_compare_quiet(floatx80 a, floatx80 b, float_status_t &status)
|
||||
float_class_t aClass = floatx80_class(a);
|
||||
float_class_t bClass = floatx80_class(b);
|
||||
|
||||
if (aClass == float_NaN || bClass == float_NaN)
|
||||
if (aClass == float_SNaN || bClass == float_SNaN)
|
||||
{
|
||||
if (floatx80_is_unsupported(a) || floatx80_is_unsupported(b))
|
||||
float_raise(status, float_flag_invalid);
|
||||
|
||||
if (floatx80_is_signaling_nan(a) || floatx80_is_signaling_nan(b))
|
||||
float_raise(status, float_flag_invalid);
|
||||
|
||||
float_raise(status, float_flag_invalid);
|
||||
return float_relation_unordered;
|
||||
}
|
||||
|
||||
if (aClass == float_denormal || bClass == float_denormal)
|
||||
{
|
||||
if (aClass == float_QNaN || bClass == float_QNaN) {
|
||||
return float_relation_unordered;
|
||||
}
|
||||
|
||||
if (aClass == float_denormal || bClass == float_denormal) {
|
||||
float_raise(status, float_flag_denormal);
|
||||
}
|
||||
|
||||
|
@ -2952,8 +2952,8 @@ bx_define_opcode(BX_IA_V512_VGETMANTPD_VpdWpdIb_Kmask, &BX_CPU_C::LOAD_BROADCAST
|
||||
bx_define_opcode(BX_IA_V512_VGETMANTSS_VssHpsWssIb_Kmask, &BX_CPU_C::LOAD_Wss, &BX_CPU_C::VGETMANTSS_MASK_VssHpsWssIbR, BX_ISA_AVX512, OP_Vss, OP_Hps, OP_mVss, OP_Ib, BX_PREPARE_EVEX_NO_BROADCAST)
|
||||
bx_define_opcode(BX_IA_V512_VGETMANTSD_VsdHpdWsdIb_Kmask, &BX_CPU_C::LOAD_Wsd, &BX_CPU_C::VGETMANTSD_MASK_VsdHpdWsdIbR, BX_ISA_AVX512, OP_Vsd, OP_Hpd, OP_mVsd, OP_Ib, BX_PREPARE_EVEX_NO_BROADCAST)
|
||||
|
||||
bx_define_opcode(BX_IA_V512_VSCALEFPS_VpsWps_Kmask, &BX_CPU_C::LOAD_BROADCAST_MASK_VectorD, &BX_CPU_C::VSCALEFPS_MASK_VpsWpsR, BX_ISA_AVX512, OP_Vps, OP_mVps, OP_NONE, OP_NONE, BX_PREPARE_EVEX)
|
||||
bx_define_opcode(BX_IA_V512_VSCALEFPD_VpdWpd_Kmask, &BX_CPU_C::LOAD_BROADCAST_MASK_VectorQ, &BX_CPU_C::VSCALEFPD_MASK_VpdWpdR, BX_ISA_AVX512, OP_Vpd, OP_mVpd, OP_NONE, OP_NONE, BX_PREPARE_EVEX)
|
||||
bx_define_opcode(BX_IA_V512_VSCALEFPS_VpsHpsWps_Kmask, &BX_CPU_C::LOAD_BROADCAST_MASK_VectorD, &BX_CPU_C::VSCALEFPS_MASK_VpsHpsWpsR, BX_ISA_AVX512, OP_Vps, OP_Hps, OP_mVps, OP_NONE, BX_PREPARE_EVEX)
|
||||
bx_define_opcode(BX_IA_V512_VSCALEFPD_VpdHpdWpd_Kmask, &BX_CPU_C::LOAD_BROADCAST_MASK_VectorQ, &BX_CPU_C::VSCALEFPD_MASK_VpdHpdWpdR, BX_ISA_AVX512, OP_Vpd, OP_Hpd, OP_mVpd, OP_NONE, BX_PREPARE_EVEX)
|
||||
bx_define_opcode(BX_IA_V512_VSCALEFSS_VssHpsWss_Kmask, &BX_CPU_C::LOAD_Wss, &BX_CPU_C::VSCALEFSS_MASK_VssHpsWssR, BX_ISA_AVX512, OP_Vss, OP_Hps, OP_mVss, OP_NONE, BX_PREPARE_EVEX_NO_BROADCAST)
|
||||
bx_define_opcode(BX_IA_V512_VSCALEFSD_VsdHpdWsd_Kmask, &BX_CPU_C::LOAD_Wsd, &BX_CPU_C::VSCALEFSD_MASK_VsdHpdWsdR, BX_ISA_AVX512, OP_Vsd, OP_Hpd, OP_mVsd, OP_NONE, BX_PREPARE_EVEX_NO_BROADCAST)
|
||||
|
||||
|
@ -694,4 +694,40 @@ BX_CPP_INLINE void xmm_getexppd_mask(BxPackedXmmRegister *op, float_status_t &st
|
||||
}
|
||||
}
|
||||
|
||||
// scalef
|
||||
|
||||
BX_CPP_INLINE void xmm_scalefps(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2, float_status_t &status)
|
||||
{
|
||||
for (unsigned n=0;n<4;n++) {
|
||||
op1->xmm32u(n) = float32_scalef(op1->xmm32u(n), op2->xmm32u(n), status);
|
||||
}
|
||||
}
|
||||
|
||||
BX_CPP_INLINE void xmm_scalefps_mask(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2, float_status_t &status, Bit32u mask)
|
||||
{
|
||||
for (unsigned n=0; n < 4; n++, mask >>= 1) {
|
||||
if (mask & 0x1)
|
||||
op1->xmm32u(n) = float32_scalef(op1->xmm32u(n), op2->xmm32u(n), status);
|
||||
else
|
||||
op1->xmm32u(n) = 0;
|
||||
}
|
||||
}
|
||||
|
||||
BX_CPP_INLINE void xmm_scalefpd(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2, float_status_t &status)
|
||||
{
|
||||
for (unsigned n=0;n<2;n++) {
|
||||
op1->xmm64u(n) = float64_scalef(op1->xmm64u(n), op2->xmm64u(n), status);
|
||||
}
|
||||
}
|
||||
|
||||
BX_CPP_INLINE void xmm_scalefpd_mask(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2, float_status_t &status, Bit32u mask)
|
||||
{
|
||||
for (unsigned n=0; n < 2; n++, mask >>= 1) {
|
||||
if (mask & 0x1)
|
||||
op1->xmm64u(n) = float64_scalef(op1->xmm64u(n), op2->xmm64u(n), status);
|
||||
else
|
||||
op1->xmm64u(n) = 0;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -304,7 +304,8 @@ float32 approximate_rcp(float32 op)
|
||||
case float_positive_inf:
|
||||
return packFloat32(sign, 0, 0);
|
||||
|
||||
case float_NaN:
|
||||
case float_SNaN:
|
||||
case float_QNaN:
|
||||
return convert_to_QNaN(op);
|
||||
|
||||
case float_normalized:
|
||||
@ -651,7 +652,8 @@ float32 approximate_rsqrt(float32 op)
|
||||
case float_negative_inf:
|
||||
return float32_default_nan;
|
||||
|
||||
case float_NaN:
|
||||
case float_SNaN:
|
||||
case float_QNaN:
|
||||
return convert_to_QNaN(op);
|
||||
|
||||
case float_normalized:
|
||||
|
Loading…
Reference in New Issue
Block a user