implemented biasing of unmasked x87 over/underflow result

2009-06-05 17:48:55 +00:00 · 2009-06-05 17:48:55 +00:00 · d5efb5c378
commit d5efb5c378
parent c6f01e9723
6 changed files with 31 additions and 16 deletions
--- a/bochs/cpu/sse_pfp.cc
+++ b/bochs/cpu/sse_pfp.cc
@ -1,5 +1,5 @@
 /////////////////////////////////////////////////////////////////////////
-// $Id: sse_pfp.cc,v 1.56 2009-05-28 20:18:34 sshwarts Exp $
+// $Id: sse_pfp.cc,v 1.57 2009-06-05 17:48:55 sshwarts Exp $
 /////////////////////////////////////////////////////////////////////////
 //
 //   Copyright (c) 2003 Stanislav Shwartsman
@ -53,6 +53,7 @@ BX_CPP_INLINE void mxcsr_to_softfloat_status_word(float_status_t &status, bx_mxc
  // if underflow is masked and FUZ is 1, set it to 1, else to 0
  status.flush_underflow_to_zero =
       (mxcsr.get_flush_masked_underflow() && mxcsr.get_UM()) ? 1 : 0;
+  status.float_exception_masks = mxcsr.get_exceptions_masks();
 }

 /* Comparison predicate for CMPSS/CMPPS instructions */
--- a/bochs/fpu/fpu_arith.cc
+++ b/bochs/fpu/fpu_arith.cc
@ -1,5 +1,5 @@
 /////////////////////////////////////////////////////////////////////////
-// $Id: fpu_arith.cc,v 1.21 2009-05-28 19:25:33 sshwarts Exp $
+// $Id: fpu_arith.cc,v 1.22 2009-06-05 17:48:55 sshwarts Exp $
 /////////////////////////////////////////////////////////////////////////
 //
 //   Copyright (c) 2003 Stanislav Shwartsman
@ -56,6 +56,7 @@ float_status_t FPU_pre_exception_handling(Bit16u control_word)
  status.float_nan_handling_mode = float_first_operand_nan;
  status.float_rounding_mode = (control_word & FPU_CW_RC) >> 10;
  status.flush_underflow_to_zero = 0;
+  status.float_exception_masks = control_word & FPU_CW_Exceptions_Mask;

  return status;
 }
--- a/bochs/fpu/softfloat-round-pack.cc
+++ b/bochs/fpu/softfloat-round-pack.cc
@ -393,7 +393,7 @@ void normalizeFloatx80Subnormal(Bit64u aSig, Bit32s *zExpPtr, Bit64u *zSigPtr)
 | Floating-Point Arithmetic.
 *----------------------------------------------------------------------------*/

-floatx80 roundAndPackFloatx80(int roundingPrecision,
+floatx80 SoftFloatRoundAndPackFloatx80(int roundingPrecision,
        int zSign, Bit32s zExp, Bit64u zSig0, Bit64u zSig1, float_status_t &status)
 {
    Bit64u roundIncrement, roundMask, roundBits;
@ -540,6 +540,26 @@ floatx80 roundAndPackFloatx80(int roundingPrecision,
    return packFloatx80(zSign, zExp, zSig0);
 }

+floatx80 roundAndPackFloatx80(int roundingPrecision,
+        int zSign, Bit32s zExp, Bit64u zSig0, Bit64u zSig1, float_status_t &status)
+{
+    floatx80 result = SoftFloatRoundAndPackFloatx80(roundingPrecision, zSign, zExp, zSig0, zSig1, status);
+
+    // bias unmasked undeflow
+    if (! (status.float_exception_masks & float_flag_underflow)) {
+       if (status.float_exception_flags & float_flag_underflow)
+           return SoftFloatRoundAndPackFloatx80(roundingPrecision, zSign, zExp + 0x6000, zSig0, zSig1, status);
+    }
+
+    // bias unmasked overflow
+    if (! (status.float_exception_masks & float_flag_overflow)) {
+       if (status.float_exception_flags & float_flag_overflow)
+           return SoftFloatRoundAndPackFloatx80(roundingPrecision, zSign, zExp - 0x6000, zSig0, zSig1, status);
+    }
+
+    return result;
+}
+
 /*----------------------------------------------------------------------------
 | Takes an abstract floating-point value having sign `zSign', exponent
 | `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
--- a/bochs/fpu/softfloat.cc
+++ b/bochs/fpu/softfloat.cc
@ -2603,9 +2603,7 @@ floatx80 floatx80_mul(floatx80 a, floatx80 b, float_status_t &status)
    zSign = aSign ^ bSign;

    if (aExp == 0x7FFF) {
-        if ((Bit64u) (aSig<<1)
-             || ((bExp == 0x7FFF) && (Bit64u) (bSig<<1)))
-        {
+        if ((Bit64u) (aSig<<1) || ((bExp == 0x7FFF) && (Bit64u) (bSig<<1))) {
            return propagateFloatx80NaN(a, b, status);
        }
        if (bExp == 0) {
--- a/bochs/fpu/softfloat.h
+++ b/bochs/fpu/softfloat.h
@ -111,6 +111,7 @@ struct float_status_t
 #endif
    int float_rounding_mode;
    int float_exception_flags;
+    int float_exception_masks;
    int float_nan_handling_mode;	/* flag register */
    int flush_underflow_to_zero;	/* flag register */
 };
--- a/bochs/fpu/todo
+++ b/bochs/fpu/todo
@ -1,13 +1,7 @@
 TODO:
 ----

-1. Check for denormal and pseudodenormal operands in ALL instructions
-   I hope all instructions return the same values as real CPU.
-
-2. Unmasked underflow/overflow should correct the result by magic number
-   for all operations, including float32 and float64.
-
-3. Elliminate floa128 use, Intel uses only 67-bit precision calculations
-   when float128 has at least 112-bit. Replacement of float128 with for
-   example 96-bit precision number could significantly speed up
-   calculations.
+Elliminate floa128 use, Intel uses only 67-bit precision calculations
+when float128 has at least 112-bit. Replacement of float128 with for
+example 96-bit precision number could significantly speed up
+calculations.