softfloat: Move round_to_int_and_pack to softfloat-parts.c.inc
Rename to parts$N_float_to_sint. Reimplement float128_to_int{32,64}{_round_to_zero} with FloatParts128. Reviewed-by: Alex Bennée <alex.bennee@linaro.org> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
parent
afc34931eb
commit
463b3f0d7f
@ -751,3 +751,67 @@ static void partsN(round_to_int)(FloatPartsN *a, FloatRoundMode rmode,
|
|||||||
g_assert_not_reached();
|
g_assert_not_reached();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Returns the result of converting the floating-point value `a' to
|
||||||
|
* the two's complement integer format. The conversion is performed
|
||||||
|
* according to the IEC/IEEE Standard for Binary Floating-Point
|
||||||
|
* Arithmetic---which means in particular that the conversion is
|
||||||
|
* rounded according to the current rounding mode. If `a' is a NaN,
|
||||||
|
* the largest positive integer is returned. Otherwise, if the
|
||||||
|
* conversion overflows, the largest integer with the same sign as `a'
|
||||||
|
* is returned.
|
||||||
|
*/
|
||||||
|
static int64_t partsN(float_to_sint)(FloatPartsN *p, FloatRoundMode rmode,
|
||||||
|
int scale, int64_t min, int64_t max,
|
||||||
|
float_status *s)
|
||||||
|
{
|
||||||
|
int flags = 0;
|
||||||
|
uint64_t r;
|
||||||
|
|
||||||
|
switch (p->cls) {
|
||||||
|
case float_class_snan:
|
||||||
|
case float_class_qnan:
|
||||||
|
flags = float_flag_invalid;
|
||||||
|
r = max;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case float_class_inf:
|
||||||
|
flags = float_flag_invalid;
|
||||||
|
r = p->sign ? min : max;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case float_class_zero:
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
case float_class_normal:
|
||||||
|
/* TODO: N - 2 is frac_size for rounding; could use input fmt. */
|
||||||
|
if (parts_round_to_int_normal(p, rmode, scale, N - 2)) {
|
||||||
|
flags = float_flag_inexact;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (p->exp <= DECOMPOSED_BINARY_POINT) {
|
||||||
|
r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp);
|
||||||
|
} else {
|
||||||
|
r = UINT64_MAX;
|
||||||
|
}
|
||||||
|
if (p->sign) {
|
||||||
|
if (r <= -(uint64_t)min) {
|
||||||
|
r = -r;
|
||||||
|
} else {
|
||||||
|
flags = float_flag_invalid;
|
||||||
|
r = min;
|
||||||
|
}
|
||||||
|
} else if (r > max) {
|
||||||
|
flags = float_flag_invalid;
|
||||||
|
r = max;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
g_assert_not_reached();
|
||||||
|
}
|
||||||
|
|
||||||
|
float_raise(flags, s);
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
365
fpu/softfloat.c
365
fpu/softfloat.c
@ -829,6 +829,16 @@ static void parts128_round_to_int(FloatParts128 *a, FloatRoundMode r,
|
|||||||
#define parts_round_to_int(A, R, C, S, F) \
|
#define parts_round_to_int(A, R, C, S, F) \
|
||||||
PARTS_GENERIC_64_128(round_to_int, A)(A, R, C, S, F)
|
PARTS_GENERIC_64_128(round_to_int, A)(A, R, C, S, F)
|
||||||
|
|
||||||
|
static int64_t parts64_float_to_sint(FloatParts64 *p, FloatRoundMode rmode,
|
||||||
|
int scale, int64_t min, int64_t max,
|
||||||
|
float_status *s);
|
||||||
|
static int64_t parts128_float_to_sint(FloatParts128 *p, FloatRoundMode rmode,
|
||||||
|
int scale, int64_t min, int64_t max,
|
||||||
|
float_status *s);
|
||||||
|
|
||||||
|
#define parts_float_to_sint(P, R, Z, MN, MX, S) \
|
||||||
|
PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Helper functions for softfloat-parts.c.inc, per-size operations.
|
* Helper functions for softfloat-parts.c.inc, per-size operations.
|
||||||
*/
|
*/
|
||||||
@ -2352,69 +2362,8 @@ float128 float128_round_to_int(float128 a, float_status *s)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Returns the result of converting the floating-point value `a' to
|
* Floating-point to signed integer conversions
|
||||||
* the two's complement integer format. The conversion is performed
|
*/
|
||||||
* according to the IEC/IEEE Standard for Binary Floating-Point
|
|
||||||
* Arithmetic---which means in particular that the conversion is
|
|
||||||
* rounded according to the current rounding mode. If `a' is a NaN,
|
|
||||||
* the largest positive integer is returned. Otherwise, if the
|
|
||||||
* conversion overflows, the largest integer with the same sign as `a'
|
|
||||||
* is returned.
|
|
||||||
*/
|
|
||||||
|
|
||||||
static int64_t round_to_int_and_pack(FloatParts64 p, FloatRoundMode rmode,
|
|
||||||
int scale, int64_t min, int64_t max,
|
|
||||||
float_status *s)
|
|
||||||
{
|
|
||||||
int flags = 0;
|
|
||||||
uint64_t r;
|
|
||||||
|
|
||||||
switch (p.cls) {
|
|
||||||
case float_class_snan:
|
|
||||||
case float_class_qnan:
|
|
||||||
flags = float_flag_invalid;
|
|
||||||
r = max;
|
|
||||||
break;
|
|
||||||
|
|
||||||
case float_class_inf:
|
|
||||||
flags = float_flag_invalid;
|
|
||||||
r = p.sign ? min : max;
|
|
||||||
break;
|
|
||||||
|
|
||||||
case float_class_zero:
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
case float_class_normal:
|
|
||||||
/* TODO: 62 = N - 2, frac_size for rounding */
|
|
||||||
if (parts_round_to_int_normal(&p, rmode, scale, 62)) {
|
|
||||||
flags = float_flag_inexact;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (p.exp <= DECOMPOSED_BINARY_POINT) {
|
|
||||||
r = p.frac >> (DECOMPOSED_BINARY_POINT - p.exp);
|
|
||||||
} else {
|
|
||||||
r = UINT64_MAX;
|
|
||||||
}
|
|
||||||
if (p.sign) {
|
|
||||||
if (r <= -(uint64_t)min) {
|
|
||||||
r = -r;
|
|
||||||
} else {
|
|
||||||
flags = float_flag_invalid;
|
|
||||||
r = min;
|
|
||||||
}
|
|
||||||
} else if (r > max) {
|
|
||||||
flags = float_flag_invalid;
|
|
||||||
r = max;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
|
|
||||||
default:
|
|
||||||
g_assert_not_reached();
|
|
||||||
}
|
|
||||||
|
|
||||||
float_raise(flags, s);
|
|
||||||
return r;
|
|
||||||
}
|
|
||||||
|
|
||||||
int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale,
|
int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale,
|
||||||
float_status *s)
|
float_status *s)
|
||||||
@ -2422,7 +2371,7 @@ int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale,
|
|||||||
FloatParts64 p;
|
FloatParts64 p;
|
||||||
|
|
||||||
float16_unpack_canonical(&p, a, s);
|
float16_unpack_canonical(&p, a, s);
|
||||||
return round_to_int_and_pack(p, rmode, scale, INT8_MIN, INT8_MAX, s);
|
return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s);
|
||||||
}
|
}
|
||||||
|
|
||||||
int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
|
int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
|
||||||
@ -2431,7 +2380,7 @@ int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
|
|||||||
FloatParts64 p;
|
FloatParts64 p;
|
||||||
|
|
||||||
float16_unpack_canonical(&p, a, s);
|
float16_unpack_canonical(&p, a, s);
|
||||||
return round_to_int_and_pack(p, rmode, scale, INT16_MIN, INT16_MAX, s);
|
return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
|
||||||
}
|
}
|
||||||
|
|
||||||
int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
|
int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
|
||||||
@ -2440,7 +2389,7 @@ int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
|
|||||||
FloatParts64 p;
|
FloatParts64 p;
|
||||||
|
|
||||||
float16_unpack_canonical(&p, a, s);
|
float16_unpack_canonical(&p, a, s);
|
||||||
return round_to_int_and_pack(p, rmode, scale, INT32_MIN, INT32_MAX, s);
|
return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
|
||||||
}
|
}
|
||||||
|
|
||||||
int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
|
int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
|
||||||
@ -2449,7 +2398,7 @@ int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
|
|||||||
FloatParts64 p;
|
FloatParts64 p;
|
||||||
|
|
||||||
float16_unpack_canonical(&p, a, s);
|
float16_unpack_canonical(&p, a, s);
|
||||||
return round_to_int_and_pack(p, rmode, scale, INT64_MIN, INT64_MAX, s);
|
return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
|
||||||
}
|
}
|
||||||
|
|
||||||
int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
|
int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
|
||||||
@ -2458,7 +2407,7 @@ int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
|
|||||||
FloatParts64 p;
|
FloatParts64 p;
|
||||||
|
|
||||||
float32_unpack_canonical(&p, a, s);
|
float32_unpack_canonical(&p, a, s);
|
||||||
return round_to_int_and_pack(p, rmode, scale, INT16_MIN, INT16_MAX, s);
|
return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
|
||||||
}
|
}
|
||||||
|
|
||||||
int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
|
int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
|
||||||
@ -2467,7 +2416,7 @@ int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
|
|||||||
FloatParts64 p;
|
FloatParts64 p;
|
||||||
|
|
||||||
float32_unpack_canonical(&p, a, s);
|
float32_unpack_canonical(&p, a, s);
|
||||||
return round_to_int_and_pack(p, rmode, scale, INT32_MIN, INT32_MAX, s);
|
return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
|
||||||
}
|
}
|
||||||
|
|
||||||
int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
|
int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
|
||||||
@ -2476,7 +2425,7 @@ int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
|
|||||||
FloatParts64 p;
|
FloatParts64 p;
|
||||||
|
|
||||||
float32_unpack_canonical(&p, a, s);
|
float32_unpack_canonical(&p, a, s);
|
||||||
return round_to_int_and_pack(p, rmode, scale, INT64_MIN, INT64_MAX, s);
|
return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
|
||||||
}
|
}
|
||||||
|
|
||||||
int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
|
int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
|
||||||
@ -2485,7 +2434,7 @@ int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
|
|||||||
FloatParts64 p;
|
FloatParts64 p;
|
||||||
|
|
||||||
float64_unpack_canonical(&p, a, s);
|
float64_unpack_canonical(&p, a, s);
|
||||||
return round_to_int_and_pack(p, rmode, scale, INT16_MIN, INT16_MAX, s);
|
return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
|
||||||
}
|
}
|
||||||
|
|
||||||
int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
|
int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
|
||||||
@ -2494,7 +2443,7 @@ int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
|
|||||||
FloatParts64 p;
|
FloatParts64 p;
|
||||||
|
|
||||||
float64_unpack_canonical(&p, a, s);
|
float64_unpack_canonical(&p, a, s);
|
||||||
return round_to_int_and_pack(p, rmode, scale, INT32_MIN, INT32_MAX, s);
|
return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
|
||||||
}
|
}
|
||||||
|
|
||||||
int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
|
int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
|
||||||
@ -2503,7 +2452,52 @@ int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
|
|||||||
FloatParts64 p;
|
FloatParts64 p;
|
||||||
|
|
||||||
float64_unpack_canonical(&p, a, s);
|
float64_unpack_canonical(&p, a, s);
|
||||||
return round_to_int_and_pack(p, rmode, scale, INT64_MIN, INT64_MAX, s);
|
return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
|
||||||
|
}
|
||||||
|
|
||||||
|
int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
|
||||||
|
float_status *s)
|
||||||
|
{
|
||||||
|
FloatParts64 p;
|
||||||
|
|
||||||
|
bfloat16_unpack_canonical(&p, a, s);
|
||||||
|
return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
|
||||||
|
}
|
||||||
|
|
||||||
|
int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
|
||||||
|
float_status *s)
|
||||||
|
{
|
||||||
|
FloatParts64 p;
|
||||||
|
|
||||||
|
bfloat16_unpack_canonical(&p, a, s);
|
||||||
|
return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
|
||||||
|
}
|
||||||
|
|
||||||
|
int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
|
||||||
|
float_status *s)
|
||||||
|
{
|
||||||
|
FloatParts64 p;
|
||||||
|
|
||||||
|
bfloat16_unpack_canonical(&p, a, s);
|
||||||
|
return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int32_t float128_to_int32_scalbn(float128 a, FloatRoundMode rmode,
|
||||||
|
int scale, float_status *s)
|
||||||
|
{
|
||||||
|
FloatParts128 p;
|
||||||
|
|
||||||
|
float128_unpack_canonical(&p, a, s);
|
||||||
|
return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int64_t float128_to_int64_scalbn(float128 a, FloatRoundMode rmode,
|
||||||
|
int scale, float_status *s)
|
||||||
|
{
|
||||||
|
FloatParts128 p;
|
||||||
|
|
||||||
|
float128_unpack_canonical(&p, a, s);
|
||||||
|
return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
|
||||||
}
|
}
|
||||||
|
|
||||||
int8_t float16_to_int8(float16 a, float_status *s)
|
int8_t float16_to_int8(float16 a, float_status *s)
|
||||||
@ -2556,6 +2550,16 @@ int64_t float64_to_int64(float64 a, float_status *s)
|
|||||||
return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
|
return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int32_t float128_to_int32(float128 a, float_status *s)
|
||||||
|
{
|
||||||
|
return float128_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
|
||||||
|
}
|
||||||
|
|
||||||
|
int64_t float128_to_int64(float128 a, float_status *s)
|
||||||
|
{
|
||||||
|
return float128_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
|
||||||
|
}
|
||||||
|
|
||||||
int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
|
int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
|
||||||
{
|
{
|
||||||
return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
|
return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
|
||||||
@ -2601,36 +2605,14 @@ int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
|
|||||||
return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
|
return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
int32_t float128_to_int32_round_to_zero(float128 a, float_status *s)
|
||||||
* Returns the result of converting the floating-point value `a' to
|
|
||||||
* the two's complement integer format.
|
|
||||||
*/
|
|
||||||
|
|
||||||
int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
|
|
||||||
float_status *s)
|
|
||||||
{
|
{
|
||||||
FloatParts64 p;
|
return float128_to_int32_scalbn(a, float_round_to_zero, 0, s);
|
||||||
|
|
||||||
bfloat16_unpack_canonical(&p, a, s);
|
|
||||||
return round_to_int_and_pack(p, rmode, scale, INT16_MIN, INT16_MAX, s);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
|
int64_t float128_to_int64_round_to_zero(float128 a, float_status *s)
|
||||||
float_status *s)
|
|
||||||
{
|
{
|
||||||
FloatParts64 p;
|
return float128_to_int64_scalbn(a, float_round_to_zero, 0, s);
|
||||||
|
|
||||||
bfloat16_unpack_canonical(&p, a, s);
|
|
||||||
return round_to_int_and_pack(p, rmode, scale, INT32_MIN, INT32_MAX, s);
|
|
||||||
}
|
|
||||||
|
|
||||||
int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
|
|
||||||
float_status *s)
|
|
||||||
{
|
|
||||||
FloatParts64 p;
|
|
||||||
|
|
||||||
bfloat16_unpack_canonical(&p, a, s);
|
|
||||||
return round_to_int_and_pack(p, rmode, scale, INT64_MIN, INT64_MAX, s);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int16_t bfloat16_to_int16(bfloat16 a, float_status *s)
|
int16_t bfloat16_to_int16(bfloat16 a, float_status *s)
|
||||||
@ -6554,191 +6536,6 @@ floatx80 floatx80_sqrt(floatx80 a, float_status *status)
|
|||||||
0, zExp, zSig0, zSig1, status);
|
0, zExp, zSig0, zSig1, status);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*----------------------------------------------------------------------------
|
|
||||||
| Returns the result of converting the quadruple-precision floating-point
|
|
||||||
| value `a' to the 32-bit two's complement integer format. The conversion
|
|
||||||
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
|
|
||||||
| Arithmetic---which means in particular that the conversion is rounded
|
|
||||||
| according to the current rounding mode. If `a' is a NaN, the largest
|
|
||||||
| positive integer is returned. Otherwise, if the conversion overflows, the
|
|
||||||
| largest integer with the same sign as `a' is returned.
|
|
||||||
*----------------------------------------------------------------------------*/
|
|
||||||
|
|
||||||
int32_t float128_to_int32(float128 a, float_status *status)
|
|
||||||
{
|
|
||||||
bool aSign;
|
|
||||||
int32_t aExp, shiftCount;
|
|
||||||
uint64_t aSig0, aSig1;
|
|
||||||
|
|
||||||
aSig1 = extractFloat128Frac1( a );
|
|
||||||
aSig0 = extractFloat128Frac0( a );
|
|
||||||
aExp = extractFloat128Exp( a );
|
|
||||||
aSign = extractFloat128Sign( a );
|
|
||||||
if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0;
|
|
||||||
if ( aExp ) aSig0 |= UINT64_C(0x0001000000000000);
|
|
||||||
aSig0 |= ( aSig1 != 0 );
|
|
||||||
shiftCount = 0x4028 - aExp;
|
|
||||||
if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
|
|
||||||
return roundAndPackInt32(aSign, aSig0, status);
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/*----------------------------------------------------------------------------
|
|
||||||
| Returns the result of converting the quadruple-precision floating-point
|
|
||||||
| value `a' to the 32-bit two's complement integer format. The conversion
|
|
||||||
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
|
|
||||||
| Arithmetic, except that the conversion is always rounded toward zero. If
|
|
||||||
| `a' is a NaN, the largest positive integer is returned. Otherwise, if the
|
|
||||||
| conversion overflows, the largest integer with the same sign as `a' is
|
|
||||||
| returned.
|
|
||||||
*----------------------------------------------------------------------------*/
|
|
||||||
|
|
||||||
int32_t float128_to_int32_round_to_zero(float128 a, float_status *status)
|
|
||||||
{
|
|
||||||
bool aSign;
|
|
||||||
int32_t aExp, shiftCount;
|
|
||||||
uint64_t aSig0, aSig1, savedASig;
|
|
||||||
int32_t z;
|
|
||||||
|
|
||||||
aSig1 = extractFloat128Frac1( a );
|
|
||||||
aSig0 = extractFloat128Frac0( a );
|
|
||||||
aExp = extractFloat128Exp( a );
|
|
||||||
aSign = extractFloat128Sign( a );
|
|
||||||
aSig0 |= ( aSig1 != 0 );
|
|
||||||
if ( 0x401E < aExp ) {
|
|
||||||
if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0;
|
|
||||||
goto invalid;
|
|
||||||
}
|
|
||||||
else if ( aExp < 0x3FFF ) {
|
|
||||||
if (aExp || aSig0) {
|
|
||||||
float_raise(float_flag_inexact, status);
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
aSig0 |= UINT64_C(0x0001000000000000);
|
|
||||||
shiftCount = 0x402F - aExp;
|
|
||||||
savedASig = aSig0;
|
|
||||||
aSig0 >>= shiftCount;
|
|
||||||
z = aSig0;
|
|
||||||
if ( aSign ) z = - z;
|
|
||||||
if ( ( z < 0 ) ^ aSign ) {
|
|
||||||
invalid:
|
|
||||||
float_raise(float_flag_invalid, status);
|
|
||||||
return aSign ? INT32_MIN : INT32_MAX;
|
|
||||||
}
|
|
||||||
if ( ( aSig0<<shiftCount ) != savedASig ) {
|
|
||||||
float_raise(float_flag_inexact, status);
|
|
||||||
}
|
|
||||||
return z;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/*----------------------------------------------------------------------------
|
|
||||||
| Returns the result of converting the quadruple-precision floating-point
|
|
||||||
| value `a' to the 64-bit two's complement integer format. The conversion
|
|
||||||
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
|
|
||||||
| Arithmetic---which means in particular that the conversion is rounded
|
|
||||||
| according to the current rounding mode. If `a' is a NaN, the largest
|
|
||||||
| positive integer is returned. Otherwise, if the conversion overflows, the
|
|
||||||
| largest integer with the same sign as `a' is returned.
|
|
||||||
*----------------------------------------------------------------------------*/
|
|
||||||
|
|
||||||
int64_t float128_to_int64(float128 a, float_status *status)
|
|
||||||
{
|
|
||||||
bool aSign;
|
|
||||||
int32_t aExp, shiftCount;
|
|
||||||
uint64_t aSig0, aSig1;
|
|
||||||
|
|
||||||
aSig1 = extractFloat128Frac1( a );
|
|
||||||
aSig0 = extractFloat128Frac0( a );
|
|
||||||
aExp = extractFloat128Exp( a );
|
|
||||||
aSign = extractFloat128Sign( a );
|
|
||||||
if ( aExp ) aSig0 |= UINT64_C(0x0001000000000000);
|
|
||||||
shiftCount = 0x402F - aExp;
|
|
||||||
if ( shiftCount <= 0 ) {
|
|
||||||
if ( 0x403E < aExp ) {
|
|
||||||
float_raise(float_flag_invalid, status);
|
|
||||||
if ( ! aSign
|
|
||||||
|| ( ( aExp == 0x7FFF )
|
|
||||||
&& ( aSig1 || ( aSig0 != UINT64_C(0x0001000000000000) ) )
|
|
||||||
)
|
|
||||||
) {
|
|
||||||
return INT64_MAX;
|
|
||||||
}
|
|
||||||
return INT64_MIN;
|
|
||||||
}
|
|
||||||
shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
|
|
||||||
}
|
|
||||||
return roundAndPackInt64(aSign, aSig0, aSig1, status);
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/*----------------------------------------------------------------------------
|
|
||||||
| Returns the result of converting the quadruple-precision floating-point
|
|
||||||
| value `a' to the 64-bit two's complement integer format. The conversion
|
|
||||||
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
|
|
||||||
| Arithmetic, except that the conversion is always rounded toward zero.
|
|
||||||
| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
|
|
||||||
| the conversion overflows, the largest integer with the same sign as `a' is
|
|
||||||
| returned.
|
|
||||||
*----------------------------------------------------------------------------*/
|
|
||||||
|
|
||||||
int64_t float128_to_int64_round_to_zero(float128 a, float_status *status)
|
|
||||||
{
|
|
||||||
bool aSign;
|
|
||||||
int32_t aExp, shiftCount;
|
|
||||||
uint64_t aSig0, aSig1;
|
|
||||||
int64_t z;
|
|
||||||
|
|
||||||
aSig1 = extractFloat128Frac1( a );
|
|
||||||
aSig0 = extractFloat128Frac0( a );
|
|
||||||
aExp = extractFloat128Exp( a );
|
|
||||||
aSign = extractFloat128Sign( a );
|
|
||||||
if ( aExp ) aSig0 |= UINT64_C(0x0001000000000000);
|
|
||||||
shiftCount = aExp - 0x402F;
|
|
||||||
if ( 0 < shiftCount ) {
|
|
||||||
if ( 0x403E <= aExp ) {
|
|
||||||
aSig0 &= UINT64_C(0x0000FFFFFFFFFFFF);
|
|
||||||
if ( ( a.high == UINT64_C(0xC03E000000000000) )
|
|
||||||
&& ( aSig1 < UINT64_C(0x0002000000000000) ) ) {
|
|
||||||
if (aSig1) {
|
|
||||||
float_raise(float_flag_inexact, status);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
float_raise(float_flag_invalid, status);
|
|
||||||
if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) {
|
|
||||||
return INT64_MAX;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return INT64_MIN;
|
|
||||||
}
|
|
||||||
z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
|
|
||||||
if ( (uint64_t) ( aSig1<<shiftCount ) ) {
|
|
||||||
float_raise(float_flag_inexact, status);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
if ( aExp < 0x3FFF ) {
|
|
||||||
if ( aExp | aSig0 | aSig1 ) {
|
|
||||||
float_raise(float_flag_inexact, status);
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
z = aSig0>>( - shiftCount );
|
|
||||||
if ( aSig1
|
|
||||||
|| ( shiftCount && (uint64_t) ( aSig0<<( shiftCount & 63 ) ) ) ) {
|
|
||||||
float_raise(float_flag_inexact, status);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if ( aSign ) z = - z;
|
|
||||||
return z;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/*----------------------------------------------------------------------------
|
/*----------------------------------------------------------------------------
|
||||||
| Returns the result of converting the quadruple-precision floating-point value
|
| Returns the result of converting the quadruple-precision floating-point value
|
||||||
| `a' to the 64-bit unsigned integer format. The conversion is
|
| `a' to the 64-bit unsigned integer format. The conversion is
|
||||||
|
Loading…
Reference in New Issue
Block a user