261 lines
15 KiB
C
Executable File
261 lines
15 KiB
C
Executable File
/*============================================================================
|
|
This C source file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic
|
|
Package, Release 2b.
|
|
|
|
Written by John R. Hauser. This work was made possible in part by the
|
|
International Computer Science Institute, located at Suite 600, 1947 Center
|
|
Street, Berkeley, California 94704. Funding was partially provided by the
|
|
National Science Foundation under grant MIP-9311980. The original version
|
|
of this code was written as part of a project to build a fixed-point vector
|
|
processor in collaboration with the University of California at Berkeley,
|
|
overseen by Profs. Nelson Morgan and John Wawrzynek. More information
|
|
is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
|
|
arithmetic/SoftFloat.html'.
|
|
|
|
THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has
|
|
been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
|
|
RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
|
|
AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
|
|
COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
|
|
EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
|
|
INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
|
|
OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
|
|
|
|
Derivative works are acceptable, even for commercial purposes, so long as
|
|
(1) the source code for the derivative work includes prominent notice that
|
|
the work is derivative, and (2) the source code includes prominent notice with
|
|
these four paragraphs for those parts of this code that are retained.
|
|
=============================================================================*/
|
|
|
|
/*============================================================================
|
|
* Adapted for Bochs (x86 achitecture simulator) by
|
|
* Stanislav Shwartsman [sshwarts at sourceforge net]
|
|
* ==========================================================================*/
|
|
|
|
#ifndef _SOFTFLOAT_ROUND_PACK_H_
|
|
#define _SOFTFLOAT_ROUND_PACK_H_
|
|
|
|
#include "softfloat.h"
|
|
|
|
/*----------------------------------------------------------------------------
|
|
| Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
|
|
| and 7, and returns the properly rounded 32-bit integer corresponding to the
|
|
| input. If `zSign' is 1, the input is negated before being converted to an
|
|
| integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point input
|
|
| is simply rounded to an integer, with the inexact exception raised if the
|
|
| input cannot be represented exactly as an integer. However, if the fixed-
|
|
| point input is too large, the invalid exception is raised and the integer
|
|
| indefinite value is returned.
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
Bit32s roundAndPackInt32(int zSign, Bit64u absZ, float_status_t &status);
|
|
|
|
/*----------------------------------------------------------------------------
|
|
| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
|
|
| `absZ1', with binary point between bits 63 and 64 (between the input words),
|
|
| and returns the properly rounded 64-bit integer corresponding to the input.
|
|
| If `zSign' is 1, the input is negated before being converted to an integer.
|
|
| Ordinarily, the fixed-point input is simply rounded to an integer, with
|
|
| the inexact exception raised if the input cannot be represented exactly as
|
|
| an integer. However, if the fixed-point input is too large, the invalid
|
|
| exception is raised and the integer indefinite value is returned.
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
Bit64s roundAndPackInt64(int zSign, Bit64u absZ0, Bit64u absZ1, float_status_t &status);
|
|
|
|
/*----------------------------------------------------------------------------
|
|
| Normalizes the subnormal single-precision floating-point value represented
|
|
| by the denormalized significand `aSig'. The normalized exponent and
|
|
| significand are stored at the locations pointed to by `zExpPtr' and
|
|
| `zSigPtr', respectively.
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
void normalizeFloat32Subnormal(Bit32u aSig, Bit16s *zExpPtr, Bit32u *zSigPtr);
|
|
|
|
/*----------------------------------------------------------------------------
|
|
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
|
|
| and significand `zSig', and returns the proper single-precision floating-
|
|
| point value corresponding to the abstract input. Ordinarily, the abstract
|
|
| value is simply rounded and packed into the single-precision format, with
|
|
| the inexact exception raised if the abstract input cannot be represented
|
|
| exactly. However, if the abstract value is too large, the overflow and
|
|
| inexact exceptions are raised and an infinity or maximal finite value is
|
|
| returned. If the abstract value is too small, the input value is rounded to
|
|
| a subnormal number, and the underflow and inexact exceptions are raised if
|
|
| the abstract input cannot be represented exactly as a subnormal single-
|
|
| precision floating-point number.
|
|
| The input significand `zSig' has its binary point between bits 30
|
|
| and 29, which is 7 bits to the left of the usual location. This shifted
|
|
| significand must be normalized or smaller. If `zSig' is not normalized,
|
|
| `zExp' must be 0; in that case, the result returned is a subnormal number,
|
|
| and it must not require rounding. In the usual case that `zSig' is
|
|
| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
|
|
| The handling of underflow and overflow follows the IEC/IEEE Standard for
|
|
| Binary Floating-Point Arithmetic.
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
float32 roundAndPackFloat32(int zSign, Bit16s zExp, Bit32u zSig, float_status_t &status);
|
|
|
|
/*----------------------------------------------------------------------------
|
|
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
|
|
| and significand `zSig', and returns the proper single-precision floating-
|
|
| point value corresponding to the abstract input. This routine is just like
|
|
| `roundAndPackFloat32' except that `zSig' does not have to be normalized.
|
|
| Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
|
|
| floating-point exponent.
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
float32 normalizeRoundAndPackFloat32(int zSign, Bit16s zExp, Bit32u zSig, float_status_t &status);
|
|
|
|
/*----------------------------------------------------------------------------
|
|
| Normalizes the subnormal double-precision floating-point value represented
|
|
| by the denormalized significand `aSig'. The normalized exponent and
|
|
| significand are stored at the locations pointed to by `zExpPtr' and
|
|
| `zSigPtr', respectively.
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
void normalizeFloat64Subnormal(Bit64u aSig, Bit16s *zExpPtr, Bit64u *zSigPtr);
|
|
|
|
/*----------------------------------------------------------------------------
|
|
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
|
|
| and significand `zSig', and returns the proper double-precision floating-
|
|
| point value corresponding to the abstract input. Ordinarily, the abstract
|
|
| value is simply rounded and packed into the double-precision format, with
|
|
| the inexact exception raised if the abstract input cannot be represented
|
|
| exactly. However, if the abstract value is too large, the overflow and
|
|
| inexact exceptions are raised and an infinity or maximal finite value is
|
|
| returned. If the abstract value is too small, the input value is rounded
|
|
| to a subnormal number, and the underflow and inexact exceptions are raised
|
|
| if the abstract input cannot be represented exactly as a subnormal double-
|
|
| precision floating-point number.
|
|
| The input significand `zSig' has its binary point between bits 62
|
|
| and 61, which is 10 bits to the left of the usual location. This shifted
|
|
| significand must be normalized or smaller. If `zSig' is not normalized,
|
|
| `zExp' must be 0; in that case, the result returned is a subnormal number,
|
|
| and it must not require rounding. In the usual case that `zSig' is
|
|
| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
|
|
| The handling of underflow and overflow follows the IEC/IEEE Standard for
|
|
| Binary Floating-Point Arithmetic.
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
float64 roundAndPackFloat64(int zSign, Bit16s zExp, Bit64u zSig, float_status_t &status);
|
|
|
|
/*----------------------------------------------------------------------------
|
|
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
|
|
| and significand `zSig', and returns the proper double-precision floating-
|
|
| point value corresponding to the abstract input. This routine is just like
|
|
| `roundAndPackFloat64' except that `zSig' does not have to be normalized.
|
|
| Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
|
|
| floating-point exponent.
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
float64 normalizeRoundAndPackFloat64(int zSign, Bit16s zExp, Bit64u zSig, float_status_t &status);
|
|
|
|
#ifdef FLOATX80
|
|
|
|
/*----------------------------------------------------------------------------
|
|
| Normalizes the subnormal extended double-precision floating-point value
|
|
| represented by the denormalized significand `aSig'. The normalized exponent
|
|
| and significand are stored at the locations pointed to by `zExpPtr' and
|
|
| `zSigPtr', respectively.
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
void normalizeFloatx80Subnormal(Bit64u aSig, Bit32s *zExpPtr, Bit64u *zSigPtr);
|
|
|
|
/*----------------------------------------------------------------------------
|
|
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
|
|
| and extended significand formed by the concatenation of `zSig0' and `zSig1',
|
|
| and returns the proper extended double-precision floating-point value
|
|
| corresponding to the abstract input. Ordinarily, the abstract value is
|
|
| rounded and packed into the extended double-precision format, with the
|
|
| inexact exception raised if the abstract input cannot be represented
|
|
| exactly. However, if the abstract value is too large, the overflow and
|
|
| inexact exceptions are raised and an infinity or maximal finite value is
|
|
| returned. If the abstract value is too small, the input value is rounded to
|
|
| a subnormal number, and the underflow and inexact exceptions are raised if
|
|
| the abstract input cannot be represented exactly as a subnormal extended
|
|
| double-precision floating-point number.
|
|
| If `roundingPrecision' is 32 or 64, the result is rounded to the same
|
|
| number of bits as single or double precision, respectively. Otherwise, the
|
|
| result is rounded to the full precision of the extended double-precision
|
|
| format.
|
|
| The input significand must be normalized or smaller. If the input
|
|
| significand is not normalized, `zExp' must be 0; in that case, the result
|
|
| returned is a subnormal number, and it must not require rounding. The
|
|
| handling of underflow and overflow follows the IEC/IEEE Standard for Binary
|
|
| Floating-Point Arithmetic.
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
floatx80 roundAndPackFloatx80(int roundingPrecision,
|
|
int zSign, Bit32s zExp, Bit64u zSig0, Bit64u zSig1, float_status_t &status);
|
|
|
|
/*----------------------------------------------------------------------------
|
|
| Takes an abstract floating-point value having sign `zSign', exponent
|
|
| `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
|
|
| and returns the proper extended double-precision floating-point value
|
|
| corresponding to the abstract input. This routine is just like
|
|
| `roundAndPackFloatx80' except that the input significand does not have to be
|
|
| normalized.
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
floatx80 normalizeRoundAndPackFloatx80(int roundingPrecision,
|
|
int zSign, Bit32s zExp, Bit64u zSig0, Bit64u zSig1, float_status_t &status);
|
|
|
|
#endif // FLOATX80
|
|
|
|
#ifdef FLOAT128
|
|
|
|
/*----------------------------------------------------------------------------
|
|
| Normalizes the subnormal quadruple-precision floating-point value
|
|
| represented by the denormalized significand formed by the concatenation of
|
|
| `aSig0' and `aSig1'. The normalized exponent is stored at the location
|
|
| pointed to by `zExpPtr'. The most significant 49 bits of the normalized
|
|
| significand are stored at the location pointed to by `zSig0Ptr', and the
|
|
| least significant 64 bits of the normalized significand are stored at the
|
|
| location pointed to by `zSig1Ptr'.
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
void normalizeFloat128Subnormal(
|
|
Bit64u aSig0, Bit64u aSig1, Bit32s *zExpPtr, Bit64u *zSig0Ptr, Bit64u *zSig1Ptr);
|
|
|
|
/*----------------------------------------------------------------------------
|
|
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
|
|
| and extended significand formed by the concatenation of `zSig0', `zSig1',
|
|
| and `zSig2', and returns the proper quadruple-precision floating-point value
|
|
| corresponding to the abstract input. Ordinarily, the abstract value is
|
|
| simply rounded and packed into the quadruple-precision format, with the
|
|
| inexact exception raised if the abstract input cannot be represented
|
|
| exactly. However, if the abstract value is too large, the overflow and
|
|
| inexact exceptions are raised and an infinity or maximal finite value is
|
|
| returned. If the abstract value is too small, the input value is rounded to
|
|
| a subnormal number, and the underflow and inexact exceptions are raised if
|
|
| the abstract input cannot be represented exactly as a subnormal quadruple-
|
|
| precision floating-point number.
|
|
| The input significand must be normalized or smaller. If the input
|
|
| significand is not normalized, `zExp' must be 0; in that case, the result
|
|
| returned is a subnormal number, and it must not require rounding. In the
|
|
| usual case that the input significand is normalized, `zExp' must be 1 less
|
|
| than the ``true'' floating-point exponent. The handling of underflow and
|
|
| overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
float128 roundAndPackFloat128(
|
|
int zSign, Bit32s zExp, Bit64u zSig0, Bit64u zSig1, Bit64u zSig2, float_status_t &status);
|
|
|
|
/*----------------------------------------------------------------------------
|
|
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
|
|
| and significand formed by the concatenation of `zSig0' and `zSig1', and
|
|
| returns the proper quadruple-precision floating-point value corresponding
|
|
| to the abstract input. This routine is just like `roundAndPackFloat128'
|
|
| except that the input significand has fewer bits and does not have to be
|
|
| normalized. In all cases, `zExp' must be 1 less than the ``true'' floating-
|
|
| point exponent.
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
float128 normalizeRoundAndPackFloat128(
|
|
int zSign, Bit32s zExp, Bit64u zSig0, Bit64u zSig1, float_status_t &status);
|
|
|
|
#endif // FLOAT128
|
|
|
|
#endif
|