softfloat: Fix single-to-half precision float conversions

Fix various bugs in the single-to-half-precision conversion code:
 * input NaNs not correctly converted in IEEE mode
   (fixed by defining and using a commonNaNToFloat16())
 * wrong values returned when converting NaN/Inf into non-IEEE
   half precision value
 * wrong values returned for conversion of values which are
   on the boundary between denormal and zero for the half
   precision format
 * zeroes not correctly identified
 * excessively large results in non-IEEE mode should
   generate InvalidOp, not Overflow

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
This commit is contained in:
Peter Maydell 2011-02-10 11:28:58 +00:00 committed by Aurelien Jarno
parent bcd4d9afd4
commit 600e30d2b2
2 changed files with 38 additions and 11 deletions

View File

@ -119,6 +119,27 @@ float16 float16_maybe_silence_nan(float16 a_)
return a_; return a_;
} }
/*----------------------------------------------------------------------------
| Returns the result of converting the canonical NaN `a' to the half-
| precision floating-point format.
*----------------------------------------------------------------------------*/
static float16 commonNaNToFloat16(commonNaNT a STATUS_PARAM)
{
uint16_t mantissa = a.high>>54;
if (STATUS(default_nan_mode)) {
return float16_default_nan;
}
if (mantissa) {
return make_float16(((((uint16_t) a.sign) << 15)
| (0x1F << 10) | mantissa));
} else {
return float16_default_nan;
}
}
/*---------------------------------------------------------------------------- /*----------------------------------------------------------------------------
| The pattern for a default generated single-precision NaN. | The pattern for a default generated single-precision NaN.
*----------------------------------------------------------------------------*/ *----------------------------------------------------------------------------*/

View File

@ -2796,24 +2796,30 @@ float16 float32_to_float16(float32 a, flag ieee STATUS_PARAM)
aSign = extractFloat32Sign( a ); aSign = extractFloat32Sign( a );
if ( aExp == 0xFF ) { if ( aExp == 0xFF ) {
if (aSig) { if (aSig) {
/* Make sure correct exceptions are raised. */ /* Input is a NaN */
float32ToCommonNaN(a STATUS_VAR); float16 r = commonNaNToFloat16( float32ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
aSig |= 0x00400000; if (!ieee) {
return packFloat16(aSign, 0, 0);
} }
return packFloat16(aSign, 0x1f, aSig >> 13); return r;
} }
if (aExp == 0 && aSign == 0) { /* Infinity */
if (!ieee) {
float_raise(float_flag_invalid STATUS_VAR);
return packFloat16(aSign, 0x1f, 0x3ff);
}
return packFloat16(aSign, 0x1f, 0);
}
if (aExp == 0 && aSig == 0) {
return packFloat16(aSign, 0, 0); return packFloat16(aSign, 0, 0);
} }
/* Decimal point between bits 22 and 23. */ /* Decimal point between bits 22 and 23. */
aSig |= 0x00800000; aSig |= 0x00800000;
aExp -= 0x7f; aExp -= 0x7f;
if (aExp < -14) { if (aExp < -14) {
mask = 0x007fffff; mask = 0x00ffffff;
if (aExp < -24) { if (aExp >= -24) {
aExp = -25; mask >>= 25 + aExp;
} else {
mask >>= 24 + aExp;
} }
} else { } else {
mask = 0x00001fff; mask = 0x00001fff;
@ -2855,7 +2861,7 @@ float16 float32_to_float16(float32 a, flag ieee STATUS_PARAM)
} }
} else { } else {
if (aExp > 16) { if (aExp > 16) {
float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR); float_raise(float_flag_invalid | float_flag_inexact STATUS_VAR);
return packFloat16(aSign, 0x1f, 0x3ff); return packFloat16(aSign, 0x1f, 0x3ff);
} }
} }