From 2a0a3612986fbbbc210edeb3ca796e1831e1c363 Mon Sep 17 00:00:00 2001 From: Stanislav Shwartsman Date: Fri, 25 Jun 2004 18:51:28 +0000 Subject: [PATCH] Implemented precision lost up indication in floating point status word --- bochs/CHANGES | 2 -- bochs/cpu/3dnow.cc | 1 - bochs/cpu/sse_pfp.cc | 1 - bochs/fpu/fpu_arith.cc | 1 - bochs/fpu/softfloat-round-pack.cc | 22 ++++++++++++---------- bochs/fpu/softfloat.h | 20 ++++++++------------ bochs/fpu/todo | 4 +--- 7 files changed, 21 insertions(+), 30 deletions(-) diff --git a/bochs/CHANGES b/bochs/CHANGES index 8151098be..39a19264e 100644 --- a/bochs/CHANGES +++ b/bochs/CHANGES @@ -32,8 +32,6 @@ Changes to next release: ! Unmasked underflow/overflow should correct the result by magic number for all operations, including float32 and float64. - ! set SW_C1 according to PRECISION_UP or PRECISION_DOWN - conditions. ! improve performance of transcendential instructions. - Disassembler diff --git a/bochs/cpu/3dnow.cc b/bochs/cpu/3dnow.cc index 1a496ec74..7348842e1 100755 --- a/bochs/cpu/3dnow.cc +++ b/bochs/cpu/3dnow.cc @@ -31,7 +31,6 @@ static void prepare_softfloat_status_word status.float_nan_handling_mode = float_first_operand_nan; status.float_rounding_mode = rounding_mode; status.flush_underflow_to_zero = 0; - status.float_precision_lost_up = 0; } void BX_CPU_C::PFPNACC_PqQq(bxInstruction_c *i) diff --git a/bochs/cpu/sse_pfp.cc b/bochs/cpu/sse_pfp.cc index d5be30d6a..172dd92bc 100644 --- a/bochs/cpu/sse_pfp.cc +++ b/bochs/cpu/sse_pfp.cc @@ -46,7 +46,6 @@ static void mxcsr_to_softfloat_status_word(float_status_t &status, bx_mxcsr_t mx // if underflow is masked and FUZ is 1, set it to 1, else to 0 status.flush_underflow_to_zero = (mxcsr.get_flush_masked_underflow() && mxcsr.get_UM()) ? 1 : 0; - status.float_precision_lost_up = 0; } // handle DAZ diff --git a/bochs/fpu/fpu_arith.cc b/bochs/fpu/fpu_arith.cc index 26cf76c02..6ab7d4af0 100755 --- a/bochs/fpu/fpu_arith.cc +++ b/bochs/fpu/fpu_arith.cc @@ -57,7 +57,6 @@ float_status_t FPU_pre_exception_handling(Bit16u control_word) status.float_nan_handling_mode = float_first_operand_nan; status.float_rounding_mode = (control_word & FPU_CW_RC) >> 10; status.flush_underflow_to_zero = 0; - status.float_precision_lost_up = 0; return status; } diff --git a/bochs/fpu/softfloat-round-pack.cc b/bochs/fpu/softfloat-round-pack.cc index aa750e136..4bfaef76a 100755 --- a/bochs/fpu/softfloat-round-pack.cc +++ b/bochs/fpu/softfloat-round-pack.cc @@ -385,6 +385,7 @@ floatx80 roundAndPackFloatx80(int roundingPrecision, { Bit64u roundIncrement, roundMask, roundBits; int increment; + Bit64u zSigExact; /* support rounding-up response */ Bit8u roundingMode = get_float_rounding_mode(status); int roundNearestEven = (roundingMode == float_round_nearest_even); @@ -421,22 +422,23 @@ floatx80 roundAndPackFloatx80(int roundingPrecision, if (zExp <= 0) { int isTiny = (zExp < 0) || (zSig0 <= zSig0 + roundIncrement); shift64RightJamming(zSig0, 1 - zExp, &zSig0); + zSigExact = zSig0; zExp = 0; roundBits = zSig0 & roundMask; if (isTiny && roundBits) float_raise(status, float_flag_underflow); if (roundBits) float_raise(status, float_flag_inexact); - if (roundIncrement) set_float_rounding_up(status); zSig0 += roundIncrement; if ((Bit64s) zSig0 < 0) zExp = 1; roundIncrement = roundMask + 1; if (roundNearestEven && (roundBits<<1 == roundIncrement)) roundMask |= roundIncrement; zSig0 &= ~roundMask; + if (zSig0 > zSigExact) set_float_rounding_up(status); return packFloatx80(zSign, zExp, zSig0); } } if (roundBits) float_raise(status, float_flag_inexact); - if (roundIncrement) set_float_rounding_up(status); + zSigExact = zSig0; zSig0 += roundIncrement; if (zSig0 < roundIncrement) { ++zExp; @@ -446,6 +448,7 @@ floatx80 roundAndPackFloatx80(int roundingPrecision, if (roundNearestEven && (roundBits<<1 == roundIncrement)) roundMask |= roundIncrement; zSig0 &= ~roundMask; + if (zSig0 > zSigExact) set_float_rounding_up(status); if (zSig0 == 0) zExp = 0; return packFloatx80(zSign, zExp, zSig0); precision80: @@ -476,7 +479,7 @@ floatx80 roundAndPackFloatx80(int roundingPrecision, { return packFloatx80(zSign, 0x7FFE, ~roundMask); } - + set_float_rounding_up(status); return packFloatx80(zSign, 0x7FFF, BX_CONST64(0x8000000000000000)); } if (zExp <= 0) { @@ -486,8 +489,7 @@ floatx80 roundAndPackFloatx80(int roundingPrecision, zExp = 0; if (isTiny && zSig1) float_raise(status, float_flag_underflow); if (zSig1) float_raise(status, float_flag_inexact); - if (roundNearestEven) - increment = ((Bit64s) zSig1 < 0); + if (roundNearestEven) increment = ((Bit64s) zSig1 < 0); else { if (zSign) { increment = (roundingMode == float_round_down) && zSig1; @@ -496,25 +498,25 @@ floatx80 roundAndPackFloatx80(int roundingPrecision, } } if (increment) { - zSig0++; + zSigExact = zSig0++; zSig0 &= ~(((Bit64u) (zSig1<<1) == 0) & roundNearestEven); + if (zSig0 > zSigExact) set_float_rounding_up(status); if ((Bit64s) zSig0 < 0) zExp = 1; - set_float_rounding_up(status); } return packFloatx80(zSign, zExp, zSig0); } } if (zSig1) float_raise(status, float_flag_inexact); if (increment) { - ++zSig0; - set_float_rounding_up(status); + zSigExact = zSig0++; if (zSig0 == 0) { - ++zExp; + zExp++; zSig0 = BX_CONST64(0x8000000000000000); } else { zSig0 &= ~(((Bit64u) (zSig1<<1) == 0) & roundNearestEven); } + if (zSig0 > zSigExact) set_float_rounding_up(status); } else { if (zSig0 == 0) zExp = 0; diff --git a/bochs/fpu/softfloat.h b/bochs/fpu/softfloat.h index eded5c0e6..905f8c5e2 100755 --- a/bochs/fpu/softfloat.h +++ b/bochs/fpu/softfloat.h @@ -87,6 +87,10 @@ enum float_exception_flag_t { float_flag_inexact = 0x20 }; +#ifdef FLOATX80 +#define RAISE_SW_C1 0x0200 +#endif + /*---------------------------------------------------------------------------- | Software IEC/IEEE floating-point ordering relations *----------------------------------------------------------------------------*/ @@ -104,7 +108,6 @@ struct float_status_t { #ifdef FLOATX80 int float_rounding_precision; /* floatx80 only */ - int float_precision_lost_up; /* flag register, floatx80 only */ #endif int float_rounding_mode; int float_exception_flags; @@ -159,7 +162,7 @@ BX_CPP_INLINE int get_float_nan_handling_mode(float_status_t &status) #ifdef FLOATX80 BX_CPP_INLINE void set_float_rounding_up(float_status_t &status) { - status.float_precision_lost_up = 1; + status.float_exception_flags |= (float_flag_inexact | RAISE_SW_C1); } #endif @@ -290,19 +293,12 @@ struct floatx80 { floatx80 int32_to_floatx80(Bit32s); floatx80 int64_to_floatx80(Bit64s); -/*---------------------------------------------------------------------------- -| Software IEC/IEEE single-precision conversion routines. -*----------------------------------------------------------------------------*/ -floatx80 float32_to_floatx80(float32, float_status_t &status); - -/*---------------------------------------------------------------------------- -| Software IEC/IEEE double-precision conversion routines. -*----------------------------------------------------------------------------*/ -floatx80 float64_to_floatx80(float64, float_status_t &status); - /*---------------------------------------------------------------------------- | Software IEC/IEEE extended double-precision conversion routines. *----------------------------------------------------------------------------*/ +floatx80 float32_to_floatx80(float32, float_status_t &status); +floatx80 float64_to_floatx80(float64, float_status_t &status); + Bit32s floatx80_to_int32(floatx80, float_status_t &status); Bit32s floatx80_to_int32_round_to_zero(floatx80, float_status_t &status); Bit64s floatx80_to_int64(floatx80, float_status_t &status); diff --git a/bochs/fpu/todo b/bochs/fpu/todo index 6bc44f500..b1869fe57 100755 --- a/bochs/fpu/todo +++ b/bochs/fpu/todo @@ -7,9 +7,7 @@ TODO: 2. Unmasked underflow/overflow should correct the result by magic number for all operations, including float32 and float64. -3. Set SW_C1 according to PRECISION_UP or PRECISION_DOWN conditions. - -4. Elliminate floa128 use, Intel uses only 67-bit precision calculations +3. Elliminate floa128 use, Intel uses only 67-bit precision calculations when float128 has at least 112-bit. Replacement of float128 with for example 96-bit precision number could significantly speed up calculations.