From 2a0a3612986fbbbc210edeb3ca796e1831e1c363 Mon Sep 17 00:00:00 2001
From: Stanislav Shwartsman <sshwarts@users.sourceforge.net>
Date: Fri, 25 Jun 2004 18:51:28 +0000
Subject: [PATCH] Implemented precision lost up indication in floating point
 status word

---
 bochs/CHANGES                     |  2 --
 bochs/cpu/3dnow.cc                |  1 -
 bochs/cpu/sse_pfp.cc              |  1 -
 bochs/fpu/fpu_arith.cc            |  1 -
 bochs/fpu/softfloat-round-pack.cc | 22 ++++++++++++----------
 bochs/fpu/softfloat.h             | 20 ++++++++------------
 bochs/fpu/todo                    |  4 +---
 7 files changed, 21 insertions(+), 30 deletions(-)

diff --git a/bochs/CHANGES b/bochs/CHANGES
index 8151098be..39a19264e 100644
--- a/bochs/CHANGES
+++ b/bochs/CHANGES
@@ -32,8 +32,6 @@ Changes to next release:
     ! Unmasked underflow/overflow should correct the result 
       by magic number for all operations, including float32 
       and float64.
-    ! set SW_C1 according to PRECISION_UP or PRECISION_DOWN 
-      conditions.
     ! improve performance of transcendential instructions.
 
 - Disassembler
diff --git a/bochs/cpu/3dnow.cc b/bochs/cpu/3dnow.cc
index 1a496ec74..7348842e1 100755
--- a/bochs/cpu/3dnow.cc
+++ b/bochs/cpu/3dnow.cc
@@ -31,7 +31,6 @@ static void prepare_softfloat_status_word
   status.float_nan_handling_mode = float_first_operand_nan;
   status.float_rounding_mode = rounding_mode;
   status.flush_underflow_to_zero = 0;
-  status.float_precision_lost_up = 0;
 }
 
 void BX_CPU_C::PFPNACC_PqQq(bxInstruction_c *i)
diff --git a/bochs/cpu/sse_pfp.cc b/bochs/cpu/sse_pfp.cc
index d5be30d6a..172dd92bc 100644
--- a/bochs/cpu/sse_pfp.cc
+++ b/bochs/cpu/sse_pfp.cc
@@ -46,7 +46,6 @@ static void mxcsr_to_softfloat_status_word(float_status_t &status, bx_mxcsr_t mx
   // if underflow is masked and FUZ is 1, set it to 1, else to 0
   status.flush_underflow_to_zero = 
        (mxcsr.get_flush_masked_underflow() && mxcsr.get_UM()) ? 1 : 0;
-  status.float_precision_lost_up = 0;
 }
 
 // handle DAZ
diff --git a/bochs/fpu/fpu_arith.cc b/bochs/fpu/fpu_arith.cc
index 26cf76c02..6ab7d4af0 100755
--- a/bochs/fpu/fpu_arith.cc
+++ b/bochs/fpu/fpu_arith.cc
@@ -57,7 +57,6 @@ float_status_t FPU_pre_exception_handling(Bit16u control_word)
   status.float_nan_handling_mode = float_first_operand_nan;
   status.float_rounding_mode = (control_word & FPU_CW_RC) >> 10;
   status.flush_underflow_to_zero = 0;
-  status.float_precision_lost_up = 0;
 
   return status;
 }
diff --git a/bochs/fpu/softfloat-round-pack.cc b/bochs/fpu/softfloat-round-pack.cc
index aa750e136..4bfaef76a 100755
--- a/bochs/fpu/softfloat-round-pack.cc
+++ b/bochs/fpu/softfloat-round-pack.cc
@@ -385,6 +385,7 @@ floatx80 roundAndPackFloatx80(int roundingPrecision,
 {
     Bit64u roundIncrement, roundMask, roundBits;
     int increment;
+    Bit64u zSigExact; /* support rounding-up response */
 
     Bit8u roundingMode = get_float_rounding_mode(status);
     int roundNearestEven = (roundingMode == float_round_nearest_even);
@@ -421,22 +422,23 @@ floatx80 roundAndPackFloatx80(int roundingPrecision,
         if (zExp <= 0) {
             int isTiny = (zExp < 0) || (zSig0 <= zSig0 + roundIncrement);
             shift64RightJamming(zSig0, 1 - zExp, &zSig0);
+            zSigExact = zSig0;
             zExp = 0;
             roundBits = zSig0 & roundMask;
             if (isTiny && roundBits) float_raise(status, float_flag_underflow);
             if (roundBits) float_raise(status, float_flag_inexact);
-            if (roundIncrement) set_float_rounding_up(status);
             zSig0 += roundIncrement;
             if ((Bit64s) zSig0 < 0) zExp = 1;
             roundIncrement = roundMask + 1;
             if (roundNearestEven && (roundBits<<1 == roundIncrement))
                 roundMask |= roundIncrement;
             zSig0 &= ~roundMask;
+            if (zSig0 > zSigExact) set_float_rounding_up(status);
             return packFloatx80(zSign, zExp, zSig0);
         }
     }
     if (roundBits) float_raise(status, float_flag_inexact);
-    if (roundIncrement) set_float_rounding_up(status);
+    zSigExact = zSig0;
     zSig0 += roundIncrement;
     if (zSig0 < roundIncrement) {
         ++zExp;
@@ -446,6 +448,7 @@ floatx80 roundAndPackFloatx80(int roundingPrecision,
     if (roundNearestEven && (roundBits<<1 == roundIncrement))
         roundMask |= roundIncrement;
     zSig0 &= ~roundMask;
+    if (zSig0 > zSigExact) set_float_rounding_up(status);
     if (zSig0 == 0) zExp = 0;
     return packFloatx80(zSign, zExp, zSig0);
  precision80:
@@ -476,7 +479,7 @@ floatx80 roundAndPackFloatx80(int roundingPrecision,
             {
                 return packFloatx80(zSign, 0x7FFE, ~roundMask);
             }
-
+            set_float_rounding_up(status);
             return packFloatx80(zSign, 0x7FFF, BX_CONST64(0x8000000000000000));
         }
         if (zExp <= 0) {
@@ -486,8 +489,7 @@ floatx80 roundAndPackFloatx80(int roundingPrecision,
             zExp = 0;
             if (isTiny && zSig1) float_raise(status, float_flag_underflow);
             if (zSig1) float_raise(status, float_flag_inexact);
-            if (roundNearestEven) 
-                increment = ((Bit64s) zSig1 < 0);
+            if (roundNearestEven) increment = ((Bit64s) zSig1 < 0);
             else {
                 if (zSign) {
                     increment = (roundingMode == float_round_down) && zSig1;
@@ -496,25 +498,25 @@ floatx80 roundAndPackFloatx80(int roundingPrecision,
                 }
             }
             if (increment) {
-                zSig0++;
+                zSigExact = zSig0++;
                 zSig0 &= ~(((Bit64u) (zSig1<<1) == 0) & roundNearestEven);
+                if (zSig0 > zSigExact) set_float_rounding_up(status);
                 if ((Bit64s) zSig0 < 0) zExp = 1;
-                set_float_rounding_up(status);
             }
             return packFloatx80(zSign, zExp, zSig0);
         }
     }
     if (zSig1) float_raise(status, float_flag_inexact);
     if (increment) {
-        ++zSig0;
-        set_float_rounding_up(status);
+        zSigExact = zSig0++;
         if (zSig0 == 0) {
-            ++zExp;
+            zExp++;
             zSig0 = BX_CONST64(0x8000000000000000);
         }
         else {
             zSig0 &= ~(((Bit64u) (zSig1<<1) == 0) & roundNearestEven);
         }
+        if (zSig0 > zSigExact) set_float_rounding_up(status);
     }
     else {
         if (zSig0 == 0) zExp = 0;
diff --git a/bochs/fpu/softfloat.h b/bochs/fpu/softfloat.h
index eded5c0e6..905f8c5e2 100755
--- a/bochs/fpu/softfloat.h
+++ b/bochs/fpu/softfloat.h
@@ -87,6 +87,10 @@ enum float_exception_flag_t {
     float_flag_inexact   = 0x20
 };
 
+#ifdef FLOATX80
+#define RAISE_SW_C1 0x0200
+#endif
+
 /*----------------------------------------------------------------------------
 | Software IEC/IEEE floating-point ordering relations
 *----------------------------------------------------------------------------*/
@@ -104,7 +108,6 @@ struct float_status_t
 {
 #ifdef FLOATX80
     int float_rounding_precision;	/* floatx80 only */
-    int float_precision_lost_up;	/* flag register, floatx80 only */
 #endif
     int float_rounding_mode;
     int float_exception_flags;
@@ -159,7 +162,7 @@ BX_CPP_INLINE int get_float_nan_handling_mode(float_status_t &status)
 #ifdef FLOATX80
 BX_CPP_INLINE void set_float_rounding_up(float_status_t &status)
 {
-    status.float_precision_lost_up = 1;
+    status.float_exception_flags |= (float_flag_inexact | RAISE_SW_C1);
 }
 #endif
 
@@ -290,19 +293,12 @@ struct floatx80 {
 floatx80 int32_to_floatx80(Bit32s);
 floatx80 int64_to_floatx80(Bit64s);
 
-/*----------------------------------------------------------------------------
-| Software IEC/IEEE single-precision conversion routines.
-*----------------------------------------------------------------------------*/
-floatx80 float32_to_floatx80(float32, float_status_t &status);
-
-/*----------------------------------------------------------------------------
-| Software IEC/IEEE double-precision conversion routines.
-*----------------------------------------------------------------------------*/
-floatx80 float64_to_floatx80(float64, float_status_t &status);
-
 /*----------------------------------------------------------------------------
 | Software IEC/IEEE extended double-precision conversion routines.
 *----------------------------------------------------------------------------*/
+floatx80 float32_to_floatx80(float32, float_status_t &status);
+floatx80 float64_to_floatx80(float64, float_status_t &status);
+
 Bit32s floatx80_to_int32(floatx80, float_status_t &status);
 Bit32s floatx80_to_int32_round_to_zero(floatx80, float_status_t &status);
 Bit64s floatx80_to_int64(floatx80, float_status_t &status);
diff --git a/bochs/fpu/todo b/bochs/fpu/todo
index 6bc44f500..b1869fe57 100755
--- a/bochs/fpu/todo
+++ b/bochs/fpu/todo
@@ -7,9 +7,7 @@ TODO:
 2. Unmasked underflow/overflow should correct the result by magic number
    for all operations, including float32 and float64.
 
-3. Set SW_C1 according to PRECISION_UP or PRECISION_DOWN conditions.
-
-4. Elliminate floa128 use, Intel uses only 67-bit precision calculations
+3. Elliminate floa128 use, Intel uses only 67-bit precision calculations
    when float128 has at least 112-bit. Replacement of float128 with for
    example 96-bit precision number could significantly speed up 
    calculations.