softfloat: Add float16 <=> float64 conversion functions
Add the conversion functions float16_to_float64() and float64_to_float16(), which will be needed for the ARM A64 instruction set. Signed-off-by: Peter Maydell <peter.maydell@linaro.org> Reviewed-by: Richard Henderson <rth@twiddle.net>
This commit is contained in:
parent
c4a1c5e7e2
commit
14c9a07eb9
@ -3281,6 +3281,81 @@ float16 float32_to_float16(float32 a, flag ieee STATUS_PARAM)
|
||||
return roundAndPackFloat16(aSign, aExp, aSig, ieee STATUS_VAR);
|
||||
}
|
||||
|
||||
float64 float16_to_float64(float16 a, flag ieee STATUS_PARAM)
|
||||
{
|
||||
flag aSign;
|
||||
int_fast16_t aExp;
|
||||
uint32_t aSig;
|
||||
|
||||
aSign = extractFloat16Sign(a);
|
||||
aExp = extractFloat16Exp(a);
|
||||
aSig = extractFloat16Frac(a);
|
||||
|
||||
if (aExp == 0x1f && ieee) {
|
||||
if (aSig) {
|
||||
return commonNaNToFloat64(
|
||||
float16ToCommonNaN(a STATUS_VAR) STATUS_VAR);
|
||||
}
|
||||
return packFloat64(aSign, 0x7ff, 0);
|
||||
}
|
||||
if (aExp == 0) {
|
||||
if (aSig == 0) {
|
||||
return packFloat64(aSign, 0, 0);
|
||||
}
|
||||
|
||||
normalizeFloat16Subnormal(aSig, &aExp, &aSig);
|
||||
aExp--;
|
||||
}
|
||||
return packFloat64(aSign, aExp + 0x3f0, ((uint64_t)aSig) << 42);
|
||||
}
|
||||
|
||||
float16 float64_to_float16(float64 a, flag ieee STATUS_PARAM)
|
||||
{
|
||||
flag aSign;
|
||||
int_fast16_t aExp;
|
||||
uint64_t aSig;
|
||||
uint32_t zSig;
|
||||
|
||||
a = float64_squash_input_denormal(a STATUS_VAR);
|
||||
|
||||
aSig = extractFloat64Frac(a);
|
||||
aExp = extractFloat64Exp(a);
|
||||
aSign = extractFloat64Sign(a);
|
||||
if (aExp == 0x7FF) {
|
||||
if (aSig) {
|
||||
/* Input is a NaN */
|
||||
if (!ieee) {
|
||||
float_raise(float_flag_invalid STATUS_VAR);
|
||||
return packFloat16(aSign, 0, 0);
|
||||
}
|
||||
return commonNaNToFloat16(
|
||||
float64ToCommonNaN(a STATUS_VAR) STATUS_VAR);
|
||||
}
|
||||
/* Infinity */
|
||||
if (!ieee) {
|
||||
float_raise(float_flag_invalid STATUS_VAR);
|
||||
return packFloat16(aSign, 0x1f, 0x3ff);
|
||||
}
|
||||
return packFloat16(aSign, 0x1f, 0);
|
||||
}
|
||||
shift64RightJamming(aSig, 29, &aSig);
|
||||
zSig = aSig;
|
||||
if (aExp == 0 && zSig == 0) {
|
||||
return packFloat16(aSign, 0, 0);
|
||||
}
|
||||
/* Decimal point between bits 22 and 23. Note that we add the 1 bit
|
||||
* even if the input is denormal; however this is harmless because
|
||||
* the largest possible single-precision denormal is still smaller
|
||||
* than the smallest representable half-precision denormal, and so we
|
||||
* will end up ignoring aSig and returning via the "always return zero"
|
||||
* codepath.
|
||||
*/
|
||||
zSig |= 0x00800000;
|
||||
aExp -= 0x3F1;
|
||||
|
||||
return roundAndPackFloat16(aSign, aExp, zSig, ieee STATUS_VAR);
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Returns the result of converting the double-precision floating-point value
|
||||
| `a' to the extended double-precision floating-point format. The conversion
|
||||
|
@ -298,6 +298,8 @@ INLINE float64 uint16_to_float64(uint16_t v STATUS_PARAM)
|
||||
*----------------------------------------------------------------------------*/
|
||||
float16 float32_to_float16( float32, flag STATUS_PARAM );
|
||||
float32 float16_to_float32( float16, flag STATUS_PARAM );
|
||||
float16 float64_to_float16(float64 a, flag ieee STATUS_PARAM);
|
||||
float64 float16_to_float64(float16 a, flag ieee STATUS_PARAM);
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Software half-precision operations.
|
||||
|
Loading…
Reference in New Issue
Block a user