correctly handle unmasked undeflow in SSE

2011-10-04 06:40:19 +00:00 · 2011-10-04 06:40:19 +00:00 · fc2f8f2266
commit fc2f8f2266
parent 8691ee7747
3 changed files with 55 additions and 40 deletions
--- a/bochs/fpu/softfloat-round-pack.cc
+++ b/bochs/fpu/softfloat-round-pack.cc
@ -210,8 +210,7 @@ float16 roundAndPackFloat16(int zSign, Bit16s zExp, Bit16u zSig, float_status_t
    roundBits = zSig & roundMask;
    if (0x1D <= (Bit16u) zExp) {
        if ((0x1D < zExp)
-             || ((zExp == 0x1D)
-                  && ((Bit16s) (zSig + roundIncrement) < 0)))
+             || ((zExp == 0x1D) && ((Bit16s) (zSig + roundIncrement) < 0)))
        {
            float_raise(status, float_flag_overflow);
            if (roundBits || float_exception_masked(status, float_flag_overflow)) {
@ -229,6 +228,7 @@ float16 roundAndPackFloat16(int zSign, Bit16s zExp, Bit16u zSig, float_status_t
                    float_raise(status, float_flag_underflow | float_flag_inexact);
                    return packFloat16(zSign, 0, 0);
                }
+                // signal the #P according to roundBits calculated AFTER denormalization
                if (roundBits || !float_exception_masked(status, float_flag_underflow)) {
                    float_raise(status, float_flag_underflow);
                }
@ -282,12 +282,12 @@ void normalizeFloat32Subnormal(Bit32u aSig, Bit16s *zExpPtr, Bit32u *zSigPtr)

 float32 roundAndPackFloat32(int zSign, Bit16s zExp, Bit32u zSig, float_status_t &status)
 {
-    Bit32s roundIncrement, roundBits, roundMask;
+    Bit32s roundIncrement, roundBits;
+    const Bit32s roundMask = 0x7F;

    int roundingMode = get_float_rounding_mode(status);
    int roundNearestEven = (roundingMode == float_round_nearest_even);
    roundIncrement = 0x40;
-    roundMask = 0x7F;

    if (! roundNearestEven) {
        if (roundingMode == float_round_to_zero) roundIncrement = 0;
@ -304,8 +304,7 @@ float32 roundAndPackFloat32(int zSign, Bit16s zExp, Bit32u zSig, float_status_t
    roundBits = zSig & roundMask;
    if (0xFD <= (Bit16u) zExp) {
        if ((0xFD < zExp)
-             || ((zExp == 0xFD)
-                  && ((Bit32s) (zSig + roundIncrement) < 0)))
+             || ((zExp == 0xFD) && ((Bit32s) (zSig + roundIncrement) < 0)))
        {
            float_raise(status, float_flag_overflow);
            if (roundBits || float_exception_masked(status, float_flag_overflow)) {
@ -316,25 +315,34 @@ float32 roundAndPackFloat32(int zSign, Bit16s zExp, Bit32u zSig, float_status_t
        }
        if (zExp < 0) {
            int isTiny = (zExp < -1) || (zSig + roundIncrement < 0x80000000);
-            zSig = shift32RightJamming(zSig, -zExp);
-            zExp = 0;
-            roundBits = zSig & roundMask;
            if (isTiny) {
-                if(get_flush_underflow_to_zero(status)) {
-                    float_raise(status, float_flag_underflow | float_flag_inexact);
-                    return packFloat32(zSign, 0, 0);
-                }
-                if (roundBits || !float_exception_masked(status, float_flag_underflow)) {
+                if (!float_exception_masked(status, float_flag_underflow)) {
                    float_raise(status, float_flag_underflow);
+                    zExp += 192; // bias unmasked underflow
+                }
+            }
+            if (zExp < 0) {
+                zSig = shift32RightJamming(zSig, -zExp);
+                zExp = 0;
+                roundBits = zSig & roundMask;
+                if (isTiny) {
+                    // masked underflow
+                    if(get_flush_underflow_to_zero(status)) {
+                        float_raise(status, float_flag_underflow | float_flag_inexact);
+                        return packFloat32(zSign, 0, 0);
+                    }
+                    if (roundBits) float_raise(status, float_flag_underflow);
                }
            }
        }
    }
-    if (roundBits) float_raise(status, float_flag_inexact);
    Bit32u zSigRound = ((zSig + roundIncrement) & ~roundMask) >> 7;
    zSigRound &= ~(((roundBits ^ 0x40) == 0) & roundNearestEven);
-    if ((zSigRound << 7) > zSig) set_float_rounding_up(status);
    if (zSigRound == 0) zExp = 0;
+    if (roundBits) {
+        float_raise(status, float_flag_inexact);
+        if ((zSigRound << 7) > zSig) set_float_rounding_up(status);
+    }
    return packFloat32(zSign, zExp, zSigRound);
 }

@ -392,13 +400,14 @@ void normalizeFloat64Subnormal(Bit64u aSig, Bit16s *zExpPtr, Bit64u *zSigPtr)
 float64 roundAndPackFloat64(int zSign, Bit16s zExp, Bit64u zSig, float_status_t &status)
 {
    Bit16s roundIncrement, roundBits;
+    const Bit16s roundMask = 0x3FF;
    int roundingMode = get_float_rounding_mode(status);
    int roundNearestEven = (roundingMode == float_round_nearest_even);
    roundIncrement = 0x200;
    if (! roundNearestEven) {
        if (roundingMode == float_round_to_zero) roundIncrement = 0;
        else {
-            roundIncrement = 0x3FF;
+            roundIncrement = roundMask;
            if (zSign) {
                if (roundingMode == float_round_up) roundIncrement = 0;
            }
@ -407,7 +416,7 @@ float64 roundAndPackFloat64(int zSign, Bit16s zExp, Bit64u zSig, float_status_t
            }
        }
    }
-    roundBits = (Bit16s)(zSig & 0x3FF);
+    roundBits = (Bit16s)(zSig & roundMask);
    if (0x7FD <= (Bit16u) zExp) {
        if ((0x7FD < zExp)
             || ((zExp == 0x7FD)
@ -422,25 +431,34 @@ float64 roundAndPackFloat64(int zSign, Bit16s zExp, Bit64u zSig, float_status_t
        }
        if (zExp < 0) {
            int isTiny = (zExp < -1) || (zSig + roundIncrement < BX_CONST64(0x8000000000000000));
-            zSig = shift64RightJamming(zSig, -zExp);
-            zExp = 0;
-            roundBits = (Bit16s)(zSig & 0x3FF);
            if (isTiny) {
-                if(get_flush_underflow_to_zero(status)) {
-                    float_raise(status, float_flag_underflow | float_flag_inexact);
-                    return packFloat64(zSign, 0, 0);
-                }
-                if (roundBits || !float_exception_masked(status, float_flag_underflow)) {
+                if (!float_exception_masked(status, float_flag_underflow)) {
                    float_raise(status, float_flag_underflow);
+                    zExp += 1536; // bias unmasked underflow
+                }
+            }
+            if (zExp < 0) {
+                zSig = shift64RightJamming(zSig, -zExp);
+                zExp = 0;
+                roundBits = (Bit16s)(zSig & roundMask);
+                if (isTiny) {
+                    // masked underflow
+                    if(get_flush_underflow_to_zero(status)) {
+                        float_raise(status, float_flag_underflow | float_flag_inexact);
+                        return packFloat64(zSign, 0, 0);
+                    }
+                    if (roundBits) float_raise(status, float_flag_underflow);
                }
            }
        }
    }
-    if (roundBits) float_raise(status, float_flag_inexact);
    Bit64u zSigRound = (zSig + roundIncrement)>>10;
    zSigRound &= ~(((roundBits ^ 0x200) == 0) & roundNearestEven);
-    if ((zSigRound << 10) > zSig) set_float_rounding_up(status);
    if (zSigRound == 0) zExp = 0;
+    if (roundBits) {
+        float_raise(status, float_flag_inexact);
+        if ((zSigRound << 10) > zSig) set_float_rounding_up(status);
+    }
    return packFloat64(zSign, zExp, zSigRound);
 }

@ -548,14 +566,16 @@ floatx80 SoftFloatRoundAndPackFloatx80(int roundingPrecision,
                if (roundBits || (zSig0 && !float_exception_masked(status, float_flag_underflow)))
                    float_raise(status, float_flag_underflow);
            }
-            if (roundBits) float_raise(status, float_flag_inexact);
            zSig0 += roundIncrement;
            if ((Bit64s) zSig0 < 0) zExp = 1;
            roundIncrement = roundMask + 1;
            if (roundNearestEven && (roundBits<<1 == roundIncrement))
                roundMask |= roundIncrement;
            zSig0 &= ~roundMask;
-            if (zSig0 > zSigExact) set_float_rounding_up(status);
+            if (roundBits) {
+                float_raise(status, float_flag_inexact);
+                if (zSig0 > zSigExact) set_float_rounding_up(status);
+            }
            return packFloatx80(zSign, zExp, zSig0);
        }
    }
--- a/bochs/fpu/softfloat.cc
+++ b/bochs/fpu/softfloat.cc
@ -437,8 +437,7 @@ static float32 addFloat32Sigs(float32 a, float32 b, int zSign, float_status_t &s
            return packFloat32(zSign, 0, (aSig + bSig)>>6);
        }
        zSig = 0x40000000 + aSig + bSig;
-        zExp = aExp;
-        goto roundAndPack;
+        return roundAndPackFloat32(zSign, aExp, zSig, status);
    }
    aSig |= 0x20000000;
    zSig = (aSig + bSig)<<1;
@ -447,7 +446,6 @@ static float32 addFloat32Sigs(float32 a, float32 b, int zSign, float_status_t &s
        zSig = aSig + bSig;
        ++zExp;
    }
- roundAndPack:
    return roundAndPackFloat32(zSign, zExp, zSig, status);
 }

@ -1250,8 +1248,7 @@ static float64 addFloat64Sigs(float64 a, float64 b, int zSign, float_status_t &s
            return packFloat64(zSign, 0, (aSig + bSig)>>9);
        }
        zSig = BX_CONST64(0x4000000000000000) + aSig + bSig;
-        zExp = aExp;
-        goto roundAndPack;
+        return roundAndPackFloat64(zSign, aExp, zSig, status);
    }
    aSig |= BX_CONST64(0x2000000000000000);
    zSig = (aSig + bSig)<<1;
@ -1260,7 +1257,6 @@ static float64 addFloat64Sigs(float64 a, float64 b, int zSign, float_status_t &s
        zSig = aSig + bSig;
        ++zExp;
    }
- roundAndPack:
    return roundAndPackFloat64(zSign, zExp, zSig, status);
 }

@ -2067,7 +2063,7 @@ floatx80 floatx80_round_to_int(floatx80 a, float_status_t &status)
 {
    int aSign;
    Bit64u lastBitMask, roundBitsMask;
-    Bit8u roundingMode;
+    int roundingMode = get_float_rounding_mode(status);
    floatx80 z;

    // handle unsupported extended double-precision floating encodings
@ -2092,7 +2088,7 @@ floatx80 floatx80_round_to_int(floatx80 a, float_status_t &status)
        }
        float_raise(status, float_flag_inexact);
        aSign = extractFloatx80Sign(a);
-        switch (get_float_rounding_mode(status)) {
+        switch (roundingMode) {
         case float_round_nearest_even:
            if ((aExp == 0x3FFE) && (Bit64u) (aSig<<1)) {
                set_float_rounding_up(status);
@ -2122,7 +2118,6 @@ floatx80 floatx80_round_to_int(floatx80 a, float_status_t &status)
    lastBitMask <<= 0x403E - aExp;
    roundBitsMask = lastBitMask - 1;
    z = a;
-    roundingMode = get_float_rounding_mode(status);
    if (roundingMode == float_round_nearest_even) {
        z.fraction += lastBitMask>>1;
        if ((z.fraction & roundBitsMask) == 0) z.fraction &= ~lastBitMask;
--- a/bochs/fpu/softfloat.h
+++ b/bochs/fpu/softfloat.h
@ -190,7 +190,7 @@ BX_CPP_INLINE int get_float_nan_handling_mode(const float_status_t &status)
 #ifdef FLOATX80
 BX_CPP_INLINE void set_float_rounding_up(float_status_t &status)
 {
-    status.float_exception_flags |= (float_flag_inexact | RAISE_SW_C1);
+    status.float_exception_flags |= RAISE_SW_C1;
 }
 #endif