softfloat: Convert floatx80 to integer to FloatParts
Reviewed-by: Alex Bennée <alex.bennee@linaro.org> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
parent
8ae5719cd4
commit
a1fc527bfb
336
fpu/softfloat.c
336
fpu/softfloat.c
@ -2829,6 +2829,28 @@ static int64_t float128_to_int64_scalbn(float128 a, FloatRoundMode rmode,
|
||||
return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
|
||||
}
|
||||
|
||||
static int32_t floatx80_to_int32_scalbn(floatx80 a, FloatRoundMode rmode,
|
||||
int scale, float_status *s)
|
||||
{
|
||||
FloatParts128 p;
|
||||
|
||||
if (!floatx80_unpack_canonical(&p, a, s)) {
|
||||
parts_default_nan(&p, s);
|
||||
}
|
||||
return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
|
||||
}
|
||||
|
||||
static int64_t floatx80_to_int64_scalbn(floatx80 a, FloatRoundMode rmode,
|
||||
int scale, float_status *s)
|
||||
{
|
||||
FloatParts128 p;
|
||||
|
||||
if (!floatx80_unpack_canonical(&p, a, s)) {
|
||||
parts_default_nan(&p, s);
|
||||
}
|
||||
return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
|
||||
}
|
||||
|
||||
int8_t float16_to_int8(float16 a, float_status *s)
|
||||
{
|
||||
return float16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
|
||||
@ -2889,6 +2911,16 @@ int64_t float128_to_int64(float128 a, float_status *s)
|
||||
return float128_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
|
||||
}
|
||||
|
||||
int32_t floatx80_to_int32(floatx80 a, float_status *s)
|
||||
{
|
||||
return floatx80_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
|
||||
}
|
||||
|
||||
int64_t floatx80_to_int64(floatx80 a, float_status *s)
|
||||
{
|
||||
return floatx80_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
|
||||
}
|
||||
|
||||
int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
|
||||
{
|
||||
return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
|
||||
@ -2944,6 +2976,16 @@ int64_t float128_to_int64_round_to_zero(float128 a, float_status *s)
|
||||
return float128_to_int64_scalbn(a, float_round_to_zero, 0, s);
|
||||
}
|
||||
|
||||
int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *s)
|
||||
{
|
||||
return floatx80_to_int32_scalbn(a, float_round_to_zero, 0, s);
|
||||
}
|
||||
|
||||
int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *s)
|
||||
{
|
||||
return floatx80_to_int64_scalbn(a, float_round_to_zero, 0, s);
|
||||
}
|
||||
|
||||
int16_t bfloat16_to_int16(bfloat16 a, float_status *s)
|
||||
{
|
||||
return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
|
||||
@ -4160,127 +4202,6 @@ bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status)
|
||||
return a;
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
|
||||
| and 7, and returns the properly rounded 32-bit integer corresponding to the
|
||||
| input. If `zSign' is 1, the input is negated before being converted to an
|
||||
| integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point input
|
||||
| is simply rounded to an integer, with the inexact exception raised if the
|
||||
| input cannot be represented exactly as an integer. However, if the fixed-
|
||||
| point input is too large, the invalid exception is raised and the largest
|
||||
| positive or negative integer is returned.
|
||||
*----------------------------------------------------------------------------*/
|
||||
|
||||
static int32_t roundAndPackInt32(bool zSign, uint64_t absZ,
|
||||
float_status *status)
|
||||
{
|
||||
int8_t roundingMode;
|
||||
bool roundNearestEven;
|
||||
int8_t roundIncrement, roundBits;
|
||||
int32_t z;
|
||||
|
||||
roundingMode = status->float_rounding_mode;
|
||||
roundNearestEven = ( roundingMode == float_round_nearest_even );
|
||||
switch (roundingMode) {
|
||||
case float_round_nearest_even:
|
||||
case float_round_ties_away:
|
||||
roundIncrement = 0x40;
|
||||
break;
|
||||
case float_round_to_zero:
|
||||
roundIncrement = 0;
|
||||
break;
|
||||
case float_round_up:
|
||||
roundIncrement = zSign ? 0 : 0x7f;
|
||||
break;
|
||||
case float_round_down:
|
||||
roundIncrement = zSign ? 0x7f : 0;
|
||||
break;
|
||||
case float_round_to_odd:
|
||||
roundIncrement = absZ & 0x80 ? 0 : 0x7f;
|
||||
break;
|
||||
default:
|
||||
abort();
|
||||
}
|
||||
roundBits = absZ & 0x7F;
|
||||
absZ = ( absZ + roundIncrement )>>7;
|
||||
if (!(roundBits ^ 0x40) && roundNearestEven) {
|
||||
absZ &= ~1;
|
||||
}
|
||||
z = absZ;
|
||||
if ( zSign ) z = - z;
|
||||
if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
|
||||
float_raise(float_flag_invalid, status);
|
||||
return zSign ? INT32_MIN : INT32_MAX;
|
||||
}
|
||||
if (roundBits) {
|
||||
float_raise(float_flag_inexact, status);
|
||||
}
|
||||
return z;
|
||||
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
|
||||
| `absZ1', with binary point between bits 63 and 64 (between the input words),
|
||||
| and returns the properly rounded 64-bit integer corresponding to the input.
|
||||
| If `zSign' is 1, the input is negated before being converted to an integer.
|
||||
| Ordinarily, the fixed-point input is simply rounded to an integer, with
|
||||
| the inexact exception raised if the input cannot be represented exactly as
|
||||
| an integer. However, if the fixed-point input is too large, the invalid
|
||||
| exception is raised and the largest positive or negative integer is
|
||||
| returned.
|
||||
*----------------------------------------------------------------------------*/
|
||||
|
||||
static int64_t roundAndPackInt64(bool zSign, uint64_t absZ0, uint64_t absZ1,
|
||||
float_status *status)
|
||||
{
|
||||
int8_t roundingMode;
|
||||
bool roundNearestEven, increment;
|
||||
int64_t z;
|
||||
|
||||
roundingMode = status->float_rounding_mode;
|
||||
roundNearestEven = ( roundingMode == float_round_nearest_even );
|
||||
switch (roundingMode) {
|
||||
case float_round_nearest_even:
|
||||
case float_round_ties_away:
|
||||
increment = ((int64_t) absZ1 < 0);
|
||||
break;
|
||||
case float_round_to_zero:
|
||||
increment = 0;
|
||||
break;
|
||||
case float_round_up:
|
||||
increment = !zSign && absZ1;
|
||||
break;
|
||||
case float_round_down:
|
||||
increment = zSign && absZ1;
|
||||
break;
|
||||
case float_round_to_odd:
|
||||
increment = !(absZ0 & 1) && absZ1;
|
||||
break;
|
||||
default:
|
||||
abort();
|
||||
}
|
||||
if ( increment ) {
|
||||
++absZ0;
|
||||
if ( absZ0 == 0 ) goto overflow;
|
||||
if (!(absZ1 << 1) && roundNearestEven) {
|
||||
absZ0 &= ~1;
|
||||
}
|
||||
}
|
||||
z = absZ0;
|
||||
if ( zSign ) z = - z;
|
||||
if ( z && ( ( z < 0 ) ^ zSign ) ) {
|
||||
overflow:
|
||||
float_raise(float_flag_invalid, status);
|
||||
return zSign ? INT64_MIN : INT64_MAX;
|
||||
}
|
||||
if (absZ1) {
|
||||
float_raise(float_flag_inexact, status);
|
||||
}
|
||||
return z;
|
||||
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Normalizes the subnormal single-precision floating-point value represented
|
||||
| by the denormalized significand `aSig'. The normalized exponent and
|
||||
@ -5486,179 +5407,6 @@ float64 float64_log2(float64 a, float_status *status)
|
||||
return normalizeRoundAndPackFloat64(zSign, 0x408, zSig, status);
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Returns the result of converting the extended double-precision floating-
|
||||
| point value `a' to the 32-bit two's complement integer format. The
|
||||
| conversion is performed according to the IEC/IEEE Standard for Binary
|
||||
| Floating-Point Arithmetic---which means in particular that the conversion
|
||||
| is rounded according to the current rounding mode. If `a' is a NaN, the
|
||||
| largest positive integer is returned. Otherwise, if the conversion
|
||||
| overflows, the largest integer with the same sign as `a' is returned.
|
||||
*----------------------------------------------------------------------------*/
|
||||
|
||||
int32_t floatx80_to_int32(floatx80 a, float_status *status)
|
||||
{
|
||||
bool aSign;
|
||||
int32_t aExp, shiftCount;
|
||||
uint64_t aSig;
|
||||
|
||||
if (floatx80_invalid_encoding(a)) {
|
||||
float_raise(float_flag_invalid, status);
|
||||
return 1 << 31;
|
||||
}
|
||||
aSig = extractFloatx80Frac( a );
|
||||
aExp = extractFloatx80Exp( a );
|
||||
aSign = extractFloatx80Sign( a );
|
||||
if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
|
||||
shiftCount = 0x4037 - aExp;
|
||||
if ( shiftCount <= 0 ) shiftCount = 1;
|
||||
shift64RightJamming( aSig, shiftCount, &aSig );
|
||||
return roundAndPackInt32(aSign, aSig, status);
|
||||
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Returns the result of converting the extended double-precision floating-
|
||||
| point value `a' to the 32-bit two's complement integer format. The
|
||||
| conversion is performed according to the IEC/IEEE Standard for Binary
|
||||
| Floating-Point Arithmetic, except that the conversion is always rounded
|
||||
| toward zero. If `a' is a NaN, the largest positive integer is returned.
|
||||
| Otherwise, if the conversion overflows, the largest integer with the same
|
||||
| sign as `a' is returned.
|
||||
*----------------------------------------------------------------------------*/
|
||||
|
||||
int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *status)
|
||||
{
|
||||
bool aSign;
|
||||
int32_t aExp, shiftCount;
|
||||
uint64_t aSig, savedASig;
|
||||
int32_t z;
|
||||
|
||||
if (floatx80_invalid_encoding(a)) {
|
||||
float_raise(float_flag_invalid, status);
|
||||
return 1 << 31;
|
||||
}
|
||||
aSig = extractFloatx80Frac( a );
|
||||
aExp = extractFloatx80Exp( a );
|
||||
aSign = extractFloatx80Sign( a );
|
||||
if ( 0x401E < aExp ) {
|
||||
if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
|
||||
goto invalid;
|
||||
}
|
||||
else if ( aExp < 0x3FFF ) {
|
||||
if (aExp || aSig) {
|
||||
float_raise(float_flag_inexact, status);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
shiftCount = 0x403E - aExp;
|
||||
savedASig = aSig;
|
||||
aSig >>= shiftCount;
|
||||
z = aSig;
|
||||
if ( aSign ) z = - z;
|
||||
if ( ( z < 0 ) ^ aSign ) {
|
||||
invalid:
|
||||
float_raise(float_flag_invalid, status);
|
||||
return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
|
||||
}
|
||||
if ( ( aSig<<shiftCount ) != savedASig ) {
|
||||
float_raise(float_flag_inexact, status);
|
||||
}
|
||||
return z;
|
||||
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Returns the result of converting the extended double-precision floating-
|
||||
| point value `a' to the 64-bit two's complement integer format. The
|
||||
| conversion is performed according to the IEC/IEEE Standard for Binary
|
||||
| Floating-Point Arithmetic---which means in particular that the conversion
|
||||
| is rounded according to the current rounding mode. If `a' is a NaN,
|
||||
| the largest positive integer is returned. Otherwise, if the conversion
|
||||
| overflows, the largest integer with the same sign as `a' is returned.
|
||||
*----------------------------------------------------------------------------*/
|
||||
|
||||
int64_t floatx80_to_int64(floatx80 a, float_status *status)
|
||||
{
|
||||
bool aSign;
|
||||
int32_t aExp, shiftCount;
|
||||
uint64_t aSig, aSigExtra;
|
||||
|
||||
if (floatx80_invalid_encoding(a)) {
|
||||
float_raise(float_flag_invalid, status);
|
||||
return 1ULL << 63;
|
||||
}
|
||||
aSig = extractFloatx80Frac( a );
|
||||
aExp = extractFloatx80Exp( a );
|
||||
aSign = extractFloatx80Sign( a );
|
||||
shiftCount = 0x403E - aExp;
|
||||
if ( shiftCount <= 0 ) {
|
||||
if ( shiftCount ) {
|
||||
float_raise(float_flag_invalid, status);
|
||||
if (!aSign || floatx80_is_any_nan(a)) {
|
||||
return INT64_MAX;
|
||||
}
|
||||
return INT64_MIN;
|
||||
}
|
||||
aSigExtra = 0;
|
||||
}
|
||||
else {
|
||||
shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
|
||||
}
|
||||
return roundAndPackInt64(aSign, aSig, aSigExtra, status);
|
||||
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Returns the result of converting the extended double-precision floating-
|
||||
| point value `a' to the 64-bit two's complement integer format. The
|
||||
| conversion is performed according to the IEC/IEEE Standard for Binary
|
||||
| Floating-Point Arithmetic, except that the conversion is always rounded
|
||||
| toward zero. If `a' is a NaN, the largest positive integer is returned.
|
||||
| Otherwise, if the conversion overflows, the largest integer with the same
|
||||
| sign as `a' is returned.
|
||||
*----------------------------------------------------------------------------*/
|
||||
|
||||
int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *status)
|
||||
{
|
||||
bool aSign;
|
||||
int32_t aExp, shiftCount;
|
||||
uint64_t aSig;
|
||||
int64_t z;
|
||||
|
||||
if (floatx80_invalid_encoding(a)) {
|
||||
float_raise(float_flag_invalid, status);
|
||||
return 1ULL << 63;
|
||||
}
|
||||
aSig = extractFloatx80Frac( a );
|
||||
aExp = extractFloatx80Exp( a );
|
||||
aSign = extractFloatx80Sign( a );
|
||||
shiftCount = aExp - 0x403E;
|
||||
if ( 0 <= shiftCount ) {
|
||||
aSig &= UINT64_C(0x7FFFFFFFFFFFFFFF);
|
||||
if ( ( a.high != 0xC03E ) || aSig ) {
|
||||
float_raise(float_flag_invalid, status);
|
||||
if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
|
||||
return INT64_MAX;
|
||||
}
|
||||
}
|
||||
return INT64_MIN;
|
||||
}
|
||||
else if ( aExp < 0x3FFF ) {
|
||||
if (aExp | aSig) {
|
||||
float_raise(float_flag_inexact, status);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
z = aSig>>( - shiftCount );
|
||||
if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
|
||||
float_raise(float_flag_inexact, status);
|
||||
}
|
||||
if ( aSign ) z = - z;
|
||||
return z;
|
||||
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Rounds the extended double-precision floating-point value `a'
|
||||
| to the precision provided by floatx80_rounding_precision and returns the
|
||||
|
Loading…
x
Reference in New Issue
Block a user