/*============================================================================ This C source file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic Package, Release 2b. Written by John R. Hauser. This work was made possible in part by the International Computer Science Institute, located at Suite 600, 1947 Center Street, Berkeley, California 94704. Funding was partially provided by the National Science Foundation under grant MIP-9311980. The original version of this code was written as part of a project to build a fixed-point vector processor in collaboration with the University of California at Berkeley, overseen by Profs. Nelson Morgan and John Wawrzynek. More information is available through the Web page `http://www.cs.berkeley.edu/~jhauser/ arithmetic/SoftFloat.html'. THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES, COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE. Derivative works are acceptable, even for commercial purposes, so long as (1) the source code for the derivative work includes prominent notice that the work is derivative, and (2) the source code includes prominent notice with these four paragraphs for those parts of this code that are retained. =============================================================================*/ /*============================================================================ * Adapted for Bochs (x86 achitecture simulator) by * Stanislav Shwartsman [sshwarts at sourceforge net] * ==========================================================================*/ #ifndef _SOFTFLOAT_ROUND_PACK_H_ #define _SOFTFLOAT_ROUND_PACK_H_ #include "softfloat.h" /*---------------------------------------------------------------------------- | Takes a 64-bit fixed-point value `absZ' with binary point between bits 6 | and 7, and returns the properly rounded 32-bit integer corresponding to the | input. If `zSign' is 1, the input is negated before being converted to an | integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point input | is simply rounded to an integer, with the inexact exception raised if the | input cannot be represented exactly as an integer. However, if the fixed- | point input is too large, the invalid exception is raised and the integer | indefinite value is returned. *----------------------------------------------------------------------------*/ Bit32s roundAndPackInt32(int zSign, Bit64u absZ, float_status_t &status); /*---------------------------------------------------------------------------- | Takes the 128-bit fixed-point value formed by concatenating `absZ0' and | `absZ1', with binary point between bits 63 and 64 (between the input words), | and returns the properly rounded 64-bit integer corresponding to the input. | If `zSign' is 1, the input is negated before being converted to an integer. | Ordinarily, the fixed-point input is simply rounded to an integer, with | the inexact exception raised if the input cannot be represented exactly as | an integer. However, if the fixed-point input is too large, the invalid | exception is raised and the integer indefinite value is returned. *----------------------------------------------------------------------------*/ Bit64s roundAndPackInt64(int zSign, Bit64u absZ0, Bit64u absZ1, float_status_t &status); #ifdef FLOAT16 /*---------------------------------------------------------------------------- | Normalizes the subnormal half-precision floating-point value represented | by the denormalized significand `aSig'. The normalized exponent and | significand are stored at the locations pointed to by `zExpPtr' and | `zSigPtr', respectively. *----------------------------------------------------------------------------*/ void normalizeFloat16Subnormal(Bit16u aSig, Bit16s *zExpPtr, Bit16u *zSigPtr); /*---------------------------------------------------------------------------- | Takes an abstract floating-point value having sign `zSign', exponent `zExp', | and significand `zSig', and returns the proper half-precision floating- | point value corresponding to the abstract input. Ordinarily, the abstract | value is simply rounded and packed into the half-precision format, with | the inexact exception raised if the abstract input cannot be represented | exactly. However, if the abstract value is too large, the overflow and | inexact exceptions are raised and an infinity or maximal finite value is | returned. If the abstract value is too small, the input value is rounded to | a subnormal number, and the underflow and inexact exceptions are raised if | the abstract input cannot be represented exactly as a subnormal single- | precision floating-point number. | The input significand `zSig' has its binary point between bits 14 | and 13, which is 4 bits to the left of the usual location. This shifted | significand must be normalized or smaller. If `zSig' is not normalized, | `zExp' must be 0; in that case, the result returned is a subnormal number, | and it must not require rounding. In the usual case that `zSig' is | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent. | The handling of underflow and overflow follows the IEC/IEEE Standard for | Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ float16 roundAndPackFloat16(int zSign, Bit16s zExp, Bit16u zSig, float_status_t &status); #endif /*---------------------------------------------------------------------------- | Normalizes the subnormal single-precision floating-point value represented | by the denormalized significand `aSig'. The normalized exponent and | significand are stored at the locations pointed to by `zExpPtr' and | `zSigPtr', respectively. *----------------------------------------------------------------------------*/ void normalizeFloat32Subnormal(Bit32u aSig, Bit16s *zExpPtr, Bit32u *zSigPtr); /*---------------------------------------------------------------------------- | Takes an abstract floating-point value having sign `zSign', exponent `zExp', | and significand `zSig', and returns the proper single-precision floating- | point value corresponding to the abstract input. Ordinarily, the abstract | value is simply rounded and packed into the single-precision format, with | the inexact exception raised if the abstract input cannot be represented | exactly. However, if the abstract value is too large, the overflow and | inexact exceptions are raised and an infinity or maximal finite value is | returned. If the abstract value is too small, the input value is rounded to | a subnormal number, and the underflow and inexact exceptions are raised if | the abstract input cannot be represented exactly as a subnormal single- | precision floating-point number. | The input significand `zSig' has its binary point between bits 30 | and 29, which is 7 bits to the left of the usual location. This shifted | significand must be normalized or smaller. If `zSig' is not normalized, | `zExp' must be 0; in that case, the result returned is a subnormal number, | and it must not require rounding. In the usual case that `zSig' is | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent. | The handling of underflow and overflow follows the IEC/IEEE Standard for | Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ float32 roundAndPackFloat32(int zSign, Bit16s zExp, Bit32u zSig, float_status_t &status); /*---------------------------------------------------------------------------- | Takes an abstract floating-point value having sign `zSign', exponent `zExp', | and significand `zSig', and returns the proper single-precision floating- | point value corresponding to the abstract input. This routine is just like | `roundAndPackFloat32' except that `zSig' does not have to be normalized. | Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true'' | floating-point exponent. *----------------------------------------------------------------------------*/ float32 normalizeRoundAndPackFloat32(int zSign, Bit16s zExp, Bit32u zSig, float_status_t &status); /*---------------------------------------------------------------------------- | Normalizes the subnormal double-precision floating-point value represented | by the denormalized significand `aSig'. The normalized exponent and | significand are stored at the locations pointed to by `zExpPtr' and | `zSigPtr', respectively. *----------------------------------------------------------------------------*/ void normalizeFloat64Subnormal(Bit64u aSig, Bit16s *zExpPtr, Bit64u *zSigPtr); /*---------------------------------------------------------------------------- | Takes an abstract floating-point value having sign `zSign', exponent `zExp', | and significand `zSig', and returns the proper double-precision floating- | point value corresponding to the abstract input. Ordinarily, the abstract | value is simply rounded and packed into the double-precision format, with | the inexact exception raised if the abstract input cannot be represented | exactly. However, if the abstract value is too large, the overflow and | inexact exceptions are raised and an infinity or maximal finite value is | returned. If the abstract value is too small, the input value is rounded | to a subnormal number, and the underflow and inexact exceptions are raised | if the abstract input cannot be represented exactly as a subnormal double- | precision floating-point number. | The input significand `zSig' has its binary point between bits 62 | and 61, which is 10 bits to the left of the usual location. This shifted | significand must be normalized or smaller. If `zSig' is not normalized, | `zExp' must be 0; in that case, the result returned is a subnormal number, | and it must not require rounding. In the usual case that `zSig' is | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent. | The handling of underflow and overflow follows the IEC/IEEE Standard for | Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ float64 roundAndPackFloat64(int zSign, Bit16s zExp, Bit64u zSig, float_status_t &status); /*---------------------------------------------------------------------------- | Takes an abstract floating-point value having sign `zSign', exponent `zExp', | and significand `zSig', and returns the proper double-precision floating- | point value corresponding to the abstract input. This routine is just like | `roundAndPackFloat64' except that `zSig' does not have to be normalized. | Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true'' | floating-point exponent. *----------------------------------------------------------------------------*/ float64 normalizeRoundAndPackFloat64(int zSign, Bit16s zExp, Bit64u zSig, float_status_t &status); #ifdef FLOATX80 /*---------------------------------------------------------------------------- | Normalizes the subnormal extended double-precision floating-point value | represented by the denormalized significand `aSig'. The normalized exponent | and significand are stored at the locations pointed to by `zExpPtr' and | `zSigPtr', respectively. *----------------------------------------------------------------------------*/ void normalizeFloatx80Subnormal(Bit64u aSig, Bit32s *zExpPtr, Bit64u *zSigPtr); /*---------------------------------------------------------------------------- | Takes an abstract floating-point value having sign `zSign', exponent `zExp', | and extended significand formed by the concatenation of `zSig0' and `zSig1', | and returns the proper extended double-precision floating-point value | corresponding to the abstract input. Ordinarily, the abstract value is | rounded and packed into the extended double-precision format, with the | inexact exception raised if the abstract input cannot be represented | exactly. However, if the abstract value is too large, the overflow and | inexact exceptions are raised and an infinity or maximal finite value is | returned. If the abstract value is too small, the input value is rounded to | a subnormal number, and the underflow and inexact exceptions are raised if | the abstract input cannot be represented exactly as a subnormal extended | double-precision floating-point number. | If `roundingPrecision' is 32 or 64, the result is rounded to the same | number of bits as single or double precision, respectively. Otherwise, the | result is rounded to the full precision of the extended double-precision | format. | The input significand must be normalized or smaller. If the input | significand is not normalized, `zExp' must be 0; in that case, the result | returned is a subnormal number, and it must not require rounding. The | handling of underflow and overflow follows the IEC/IEEE Standard for Binary | Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ floatx80 roundAndPackFloatx80(int roundingPrecision, int zSign, Bit32s zExp, Bit64u zSig0, Bit64u zSig1, float_status_t &status); /*---------------------------------------------------------------------------- | Takes an abstract floating-point value having sign `zSign', exponent | `zExp', and significand formed by the concatenation of `zSig0' and `zSig1', | and returns the proper extended double-precision floating-point value | corresponding to the abstract input. This routine is just like | `roundAndPackFloatx80' except that the input significand does not have to be | normalized. *----------------------------------------------------------------------------*/ floatx80 normalizeRoundAndPackFloatx80(int roundingPrecision, int zSign, Bit32s zExp, Bit64u zSig0, Bit64u zSig1, float_status_t &status); #endif // FLOATX80 #ifdef FLOAT128 /*---------------------------------------------------------------------------- | Normalizes the subnormal quadruple-precision floating-point value | represented by the denormalized significand formed by the concatenation of | `aSig0' and `aSig1'. The normalized exponent is stored at the location | pointed to by `zExpPtr'. The most significant 49 bits of the normalized | significand are stored at the location pointed to by `zSig0Ptr', and the | least significant 64 bits of the normalized significand are stored at the | location pointed to by `zSig1Ptr'. *----------------------------------------------------------------------------*/ void normalizeFloat128Subnormal( Bit64u aSig0, Bit64u aSig1, Bit32s *zExpPtr, Bit64u *zSig0Ptr, Bit64u *zSig1Ptr); /*---------------------------------------------------------------------------- | Takes an abstract floating-point value having sign `zSign', exponent `zExp', | and extended significand formed by the concatenation of `zSig0', `zSig1', | and `zSig2', and returns the proper quadruple-precision floating-point value | corresponding to the abstract input. Ordinarily, the abstract value is | simply rounded and packed into the quadruple-precision format, with the | inexact exception raised if the abstract input cannot be represented | exactly. However, if the abstract value is too large, the overflow and | inexact exceptions are raised and an infinity or maximal finite value is | returned. If the abstract value is too small, the input value is rounded to | a subnormal number, and the underflow and inexact exceptions are raised if | the abstract input cannot be represented exactly as a subnormal quadruple- | precision floating-point number. | The input significand must be normalized or smaller. If the input | significand is not normalized, `zExp' must be 0; in that case, the result | returned is a subnormal number, and it must not require rounding. In the | usual case that the input significand is normalized, `zExp' must be 1 less | than the ``true'' floating-point exponent. The handling of underflow and | overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ float128 roundAndPackFloat128( int zSign, Bit32s zExp, Bit64u zSig0, Bit64u zSig1, Bit64u zSig2, float_status_t &status); /*---------------------------------------------------------------------------- | Takes an abstract floating-point value having sign `zSign', exponent `zExp', | and significand formed by the concatenation of `zSig0' and `zSig1', and | returns the proper quadruple-precision floating-point value corresponding | to the abstract input. This routine is just like `roundAndPackFloat128' | except that the input significand has fewer bits and does not have to be | normalized. In all cases, `zExp' must be 1 less than the ``true'' floating- | point exponent. *----------------------------------------------------------------------------*/ float128 normalizeRoundAndPackFloat128( int zSign, Bit32s zExp, Bit64u zSig0, Bit64u zSig1, float_status_t &status); #endif // FLOAT128 #endif