softfloat: Add float16 <=> float64 conversion functions
Add the conversion functions float16_to_float64() and float64_to_float16(), which will be needed for the ARM A64 instruction set. Signed-off-by: Peter Maydell <peter.maydell@linaro.org> Reviewed-by: Richard Henderson <rth@twiddle.net>
This commit is contained in:
parent
c4a1c5e7e2
commit
14c9a07eb9
@ -3281,6 +3281,81 @@ float16 float32_to_float16(float32 a, flag ieee STATUS_PARAM)
|
|||||||
return roundAndPackFloat16(aSign, aExp, aSig, ieee STATUS_VAR);
|
return roundAndPackFloat16(aSign, aExp, aSig, ieee STATUS_VAR);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
float64 float16_to_float64(float16 a, flag ieee STATUS_PARAM)
|
||||||
|
{
|
||||||
|
flag aSign;
|
||||||
|
int_fast16_t aExp;
|
||||||
|
uint32_t aSig;
|
||||||
|
|
||||||
|
aSign = extractFloat16Sign(a);
|
||||||
|
aExp = extractFloat16Exp(a);
|
||||||
|
aSig = extractFloat16Frac(a);
|
||||||
|
|
||||||
|
if (aExp == 0x1f && ieee) {
|
||||||
|
if (aSig) {
|
||||||
|
return commonNaNToFloat64(
|
||||||
|
float16ToCommonNaN(a STATUS_VAR) STATUS_VAR);
|
||||||
|
}
|
||||||
|
return packFloat64(aSign, 0x7ff, 0);
|
||||||
|
}
|
||||||
|
if (aExp == 0) {
|
||||||
|
if (aSig == 0) {
|
||||||
|
return packFloat64(aSign, 0, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
normalizeFloat16Subnormal(aSig, &aExp, &aSig);
|
||||||
|
aExp--;
|
||||||
|
}
|
||||||
|
return packFloat64(aSign, aExp + 0x3f0, ((uint64_t)aSig) << 42);
|
||||||
|
}
|
||||||
|
|
||||||
|
float16 float64_to_float16(float64 a, flag ieee STATUS_PARAM)
|
||||||
|
{
|
||||||
|
flag aSign;
|
||||||
|
int_fast16_t aExp;
|
||||||
|
uint64_t aSig;
|
||||||
|
uint32_t zSig;
|
||||||
|
|
||||||
|
a = float64_squash_input_denormal(a STATUS_VAR);
|
||||||
|
|
||||||
|
aSig = extractFloat64Frac(a);
|
||||||
|
aExp = extractFloat64Exp(a);
|
||||||
|
aSign = extractFloat64Sign(a);
|
||||||
|
if (aExp == 0x7FF) {
|
||||||
|
if (aSig) {
|
||||||
|
/* Input is a NaN */
|
||||||
|
if (!ieee) {
|
||||||
|
float_raise(float_flag_invalid STATUS_VAR);
|
||||||
|
return packFloat16(aSign, 0, 0);
|
||||||
|
}
|
||||||
|
return commonNaNToFloat16(
|
||||||
|
float64ToCommonNaN(a STATUS_VAR) STATUS_VAR);
|
||||||
|
}
|
||||||
|
/* Infinity */
|
||||||
|
if (!ieee) {
|
||||||
|
float_raise(float_flag_invalid STATUS_VAR);
|
||||||
|
return packFloat16(aSign, 0x1f, 0x3ff);
|
||||||
|
}
|
||||||
|
return packFloat16(aSign, 0x1f, 0);
|
||||||
|
}
|
||||||
|
shift64RightJamming(aSig, 29, &aSig);
|
||||||
|
zSig = aSig;
|
||||||
|
if (aExp == 0 && zSig == 0) {
|
||||||
|
return packFloat16(aSign, 0, 0);
|
||||||
|
}
|
||||||
|
/* Decimal point between bits 22 and 23. Note that we add the 1 bit
|
||||||
|
* even if the input is denormal; however this is harmless because
|
||||||
|
* the largest possible single-precision denormal is still smaller
|
||||||
|
* than the smallest representable half-precision denormal, and so we
|
||||||
|
* will end up ignoring aSig and returning via the "always return zero"
|
||||||
|
* codepath.
|
||||||
|
*/
|
||||||
|
zSig |= 0x00800000;
|
||||||
|
aExp -= 0x3F1;
|
||||||
|
|
||||||
|
return roundAndPackFloat16(aSign, aExp, zSig, ieee STATUS_VAR);
|
||||||
|
}
|
||||||
|
|
||||||
/*----------------------------------------------------------------------------
|
/*----------------------------------------------------------------------------
|
||||||
| Returns the result of converting the double-precision floating-point value
|
| Returns the result of converting the double-precision floating-point value
|
||||||
| `a' to the extended double-precision floating-point format. The conversion
|
| `a' to the extended double-precision floating-point format. The conversion
|
||||||
|
@ -298,6 +298,8 @@ INLINE float64 uint16_to_float64(uint16_t v STATUS_PARAM)
|
|||||||
*----------------------------------------------------------------------------*/
|
*----------------------------------------------------------------------------*/
|
||||||
float16 float32_to_float16( float32, flag STATUS_PARAM );
|
float16 float32_to_float16( float32, flag STATUS_PARAM );
|
||||||
float32 float16_to_float32( float16, flag STATUS_PARAM );
|
float32 float16_to_float32( float16, flag STATUS_PARAM );
|
||||||
|
float16 float64_to_float16(float64 a, flag ieee STATUS_PARAM);
|
||||||
|
float64 float16_to_float64(float16 a, flag ieee STATUS_PARAM);
|
||||||
|
|
||||||
/*----------------------------------------------------------------------------
|
/*----------------------------------------------------------------------------
|
||||||
| Software half-precision operations.
|
| Software half-precision operations.
|
||||||
|
Loading…
Reference in New Issue
Block a user