softfloat: Convert floatx80_add/sub to FloatParts
Since this is the first such, this includes all of the packing and unpacking routines as well. Reviewed-by: Alex Bennée <alex.bennee@linaro.org> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
parent
7ccae4ce7e
commit
c1b6299be1
339
fpu/softfloat.c
339
fpu/softfloat.c
@ -578,14 +578,14 @@ typedef struct {
|
|||||||
} FloatFmt;
|
} FloatFmt;
|
||||||
|
|
||||||
/* Expand fields based on the size of exponent and fraction */
|
/* Expand fields based on the size of exponent and fraction */
|
||||||
#define FLOAT_PARAMS_(E, F) \
|
#define FLOAT_PARAMS_(E) \
|
||||||
.exp_size = E, \
|
.exp_size = E, \
|
||||||
.exp_bias = ((1 << E) - 1) >> 1, \
|
.exp_bias = ((1 << E) - 1) >> 1, \
|
||||||
.exp_max = (1 << E) - 1, \
|
.exp_max = (1 << E) - 1
|
||||||
.frac_size = F
|
|
||||||
|
|
||||||
#define FLOAT_PARAMS(E, F) \
|
#define FLOAT_PARAMS(E, F) \
|
||||||
FLOAT_PARAMS_(E, F), \
|
FLOAT_PARAMS_(E), \
|
||||||
|
.frac_size = F, \
|
||||||
.frac_shift = (-F - 1) & 63, \
|
.frac_shift = (-F - 1) & 63, \
|
||||||
.round_mask = (1ull << ((-F - 1) & 63)) - 1
|
.round_mask = (1ull << ((-F - 1) & 63)) - 1
|
||||||
|
|
||||||
@ -614,6 +614,18 @@ static const FloatFmt float128_params = {
|
|||||||
FLOAT_PARAMS(15, 112)
|
FLOAT_PARAMS(15, 112)
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#define FLOATX80_PARAMS(R) \
|
||||||
|
FLOAT_PARAMS_(15), \
|
||||||
|
.frac_size = R == 64 ? 63 : R, \
|
||||||
|
.frac_shift = 0, \
|
||||||
|
.round_mask = R == 64 ? -1 : (1ull << ((-R - 1) & 63)) - 1
|
||||||
|
|
||||||
|
static const FloatFmt floatx80_params[3] = {
|
||||||
|
[floatx80_precision_s] = { FLOATX80_PARAMS(23) },
|
||||||
|
[floatx80_precision_d] = { FLOATX80_PARAMS(52) },
|
||||||
|
[floatx80_precision_x] = { FLOATX80_PARAMS(64) },
|
||||||
|
};
|
||||||
|
|
||||||
/* Unpack a float to parts, but do not canonicalize. */
|
/* Unpack a float to parts, but do not canonicalize. */
|
||||||
static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw)
|
static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw)
|
||||||
{
|
{
|
||||||
@ -648,6 +660,16 @@ static inline void float64_unpack_raw(FloatParts64 *p, float64 f)
|
|||||||
unpack_raw64(p, &float64_params, f);
|
unpack_raw64(p, &float64_params, f);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void floatx80_unpack_raw(FloatParts128 *p, floatx80 f)
|
||||||
|
{
|
||||||
|
*p = (FloatParts128) {
|
||||||
|
.cls = float_class_unclassified,
|
||||||
|
.sign = extract32(f.high, 15, 1),
|
||||||
|
.exp = extract32(f.high, 0, 15),
|
||||||
|
.frac_hi = f.low
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
static void float128_unpack_raw(FloatParts128 *p, float128 f)
|
static void float128_unpack_raw(FloatParts128 *p, float128 f)
|
||||||
{
|
{
|
||||||
const int f_size = float128_params.frac_size - 64;
|
const int f_size = float128_params.frac_size - 64;
|
||||||
@ -1536,6 +1558,92 @@ static float128 float128_round_pack_canonical(FloatParts128 *p,
|
|||||||
return float128_pack_raw(p);
|
return float128_pack_raw(p);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Returns false if the encoding is invalid. */
|
||||||
|
static bool floatx80_unpack_canonical(FloatParts128 *p, floatx80 f,
|
||||||
|
float_status *s)
|
||||||
|
{
|
||||||
|
/* Ensure rounding precision is set before beginning. */
|
||||||
|
switch (s->floatx80_rounding_precision) {
|
||||||
|
case floatx80_precision_x:
|
||||||
|
case floatx80_precision_d:
|
||||||
|
case floatx80_precision_s:
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
g_assert_not_reached();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (unlikely(floatx80_invalid_encoding(f))) {
|
||||||
|
float_raise(float_flag_invalid, s);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
floatx80_unpack_raw(p, f);
|
||||||
|
|
||||||
|
if (likely(p->exp != floatx80_params[floatx80_precision_x].exp_max)) {
|
||||||
|
parts_canonicalize(p, s, &floatx80_params[floatx80_precision_x]);
|
||||||
|
} else {
|
||||||
|
/* The explicit integer bit is ignored, after invalid checks. */
|
||||||
|
p->frac_hi &= MAKE_64BIT_MASK(0, 63);
|
||||||
|
p->cls = (p->frac_hi == 0 ? float_class_inf
|
||||||
|
: parts_is_snan_frac(p->frac_hi, s)
|
||||||
|
? float_class_snan : float_class_qnan);
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
static floatx80 floatx80_round_pack_canonical(FloatParts128 *p,
|
||||||
|
float_status *s)
|
||||||
|
{
|
||||||
|
const FloatFmt *fmt = &floatx80_params[s->floatx80_rounding_precision];
|
||||||
|
uint64_t frac;
|
||||||
|
int exp;
|
||||||
|
|
||||||
|
switch (p->cls) {
|
||||||
|
case float_class_normal:
|
||||||
|
if (s->floatx80_rounding_precision == floatx80_precision_x) {
|
||||||
|
parts_uncanon_normal(p, s, fmt);
|
||||||
|
frac = p->frac_hi;
|
||||||
|
exp = p->exp;
|
||||||
|
} else {
|
||||||
|
FloatParts64 p64;
|
||||||
|
|
||||||
|
p64.sign = p->sign;
|
||||||
|
p64.exp = p->exp;
|
||||||
|
frac_truncjam(&p64, p);
|
||||||
|
parts_uncanon_normal(&p64, s, fmt);
|
||||||
|
frac = p64.frac;
|
||||||
|
exp = p64.exp;
|
||||||
|
}
|
||||||
|
if (exp != fmt->exp_max) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
/* rounded to inf -- fall through to set frac correctly */
|
||||||
|
|
||||||
|
case float_class_inf:
|
||||||
|
/* x86 and m68k differ in the setting of the integer bit. */
|
||||||
|
frac = floatx80_infinity_low;
|
||||||
|
exp = fmt->exp_max;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case float_class_zero:
|
||||||
|
frac = 0;
|
||||||
|
exp = 0;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case float_class_snan:
|
||||||
|
case float_class_qnan:
|
||||||
|
/* NaNs have the integer bit set. */
|
||||||
|
frac = p->frac_hi | (1ull << 63);
|
||||||
|
exp = fmt->exp_max;
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
g_assert_not_reached();
|
||||||
|
}
|
||||||
|
|
||||||
|
return packFloatx80(p->sign, exp, frac);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Addition and subtraction
|
* Addition and subtraction
|
||||||
*/
|
*/
|
||||||
@ -1725,6 +1833,30 @@ float128 float128_sub(float128 a, float128 b, float_status *status)
|
|||||||
return float128_addsub(a, b, status, true);
|
return float128_addsub(a, b, status, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static floatx80 QEMU_FLATTEN
|
||||||
|
floatx80_addsub(floatx80 a, floatx80 b, float_status *status, bool subtract)
|
||||||
|
{
|
||||||
|
FloatParts128 pa, pb, *pr;
|
||||||
|
|
||||||
|
if (!floatx80_unpack_canonical(&pa, a, status) ||
|
||||||
|
!floatx80_unpack_canonical(&pb, b, status)) {
|
||||||
|
return floatx80_default_nan(status);
|
||||||
|
}
|
||||||
|
|
||||||
|
pr = parts_addsub(&pa, &pb, status, subtract);
|
||||||
|
return floatx80_round_pack_canonical(pr, status);
|
||||||
|
}
|
||||||
|
|
||||||
|
floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
|
||||||
|
{
|
||||||
|
return floatx80_addsub(a, b, status, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
|
||||||
|
{
|
||||||
|
return floatx80_addsub(a, b, status, true);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Multiplication
|
* Multiplication
|
||||||
*/
|
*/
|
||||||
@ -5731,205 +5863,6 @@ floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*----------------------------------------------------------------------------
|
|
||||||
| Returns the result of adding the absolute values of the extended double-
|
|
||||||
| precision floating-point values `a' and `b'. If `zSign' is 1, the sum is
|
|
||||||
| negated before being returned. `zSign' is ignored if the result is a NaN.
|
|
||||||
| The addition is performed according to the IEC/IEEE Standard for Binary
|
|
||||||
| Floating-Point Arithmetic.
|
|
||||||
*----------------------------------------------------------------------------*/
|
|
||||||
|
|
||||||
static floatx80 addFloatx80Sigs(floatx80 a, floatx80 b, bool zSign,
|
|
||||||
float_status *status)
|
|
||||||
{
|
|
||||||
int32_t aExp, bExp, zExp;
|
|
||||||
uint64_t aSig, bSig, zSig0, zSig1;
|
|
||||||
int32_t expDiff;
|
|
||||||
|
|
||||||
aSig = extractFloatx80Frac( a );
|
|
||||||
aExp = extractFloatx80Exp( a );
|
|
||||||
bSig = extractFloatx80Frac( b );
|
|
||||||
bExp = extractFloatx80Exp( b );
|
|
||||||
expDiff = aExp - bExp;
|
|
||||||
if ( 0 < expDiff ) {
|
|
||||||
if ( aExp == 0x7FFF ) {
|
|
||||||
if ((uint64_t)(aSig << 1)) {
|
|
||||||
return propagateFloatx80NaN(a, b, status);
|
|
||||||
}
|
|
||||||
return a;
|
|
||||||
}
|
|
||||||
if ( bExp == 0 ) --expDiff;
|
|
||||||
shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
|
|
||||||
zExp = aExp;
|
|
||||||
}
|
|
||||||
else if ( expDiff < 0 ) {
|
|
||||||
if ( bExp == 0x7FFF ) {
|
|
||||||
if ((uint64_t)(bSig << 1)) {
|
|
||||||
return propagateFloatx80NaN(a, b, status);
|
|
||||||
}
|
|
||||||
return packFloatx80(zSign,
|
|
||||||
floatx80_infinity_high,
|
|
||||||
floatx80_infinity_low);
|
|
||||||
}
|
|
||||||
if ( aExp == 0 ) ++expDiff;
|
|
||||||
shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
|
|
||||||
zExp = bExp;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
if ( aExp == 0x7FFF ) {
|
|
||||||
if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
|
|
||||||
return propagateFloatx80NaN(a, b, status);
|
|
||||||
}
|
|
||||||
return a;
|
|
||||||
}
|
|
||||||
zSig1 = 0;
|
|
||||||
zSig0 = aSig + bSig;
|
|
||||||
if ( aExp == 0 ) {
|
|
||||||
if ((aSig | bSig) & UINT64_C(0x8000000000000000) && zSig0 < aSig) {
|
|
||||||
/* At least one of the values is a pseudo-denormal,
|
|
||||||
* and there is a carry out of the result. */
|
|
||||||
zExp = 1;
|
|
||||||
goto shiftRight1;
|
|
||||||
}
|
|
||||||
if (zSig0 == 0) {
|
|
||||||
return packFloatx80(zSign, 0, 0);
|
|
||||||
}
|
|
||||||
normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
|
|
||||||
goto roundAndPack;
|
|
||||||
}
|
|
||||||
zExp = aExp;
|
|
||||||
goto shiftRight1;
|
|
||||||
}
|
|
||||||
zSig0 = aSig + bSig;
|
|
||||||
if ( (int64_t) zSig0 < 0 ) goto roundAndPack;
|
|
||||||
shiftRight1:
|
|
||||||
shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
|
|
||||||
zSig0 |= UINT64_C(0x8000000000000000);
|
|
||||||
++zExp;
|
|
||||||
roundAndPack:
|
|
||||||
return roundAndPackFloatx80(status->floatx80_rounding_precision,
|
|
||||||
zSign, zExp, zSig0, zSig1, status);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*----------------------------------------------------------------------------
|
|
||||||
| Returns the result of subtracting the absolute values of the extended
|
|
||||||
| double-precision floating-point values `a' and `b'. If `zSign' is 1, the
|
|
||||||
| difference is negated before being returned. `zSign' is ignored if the
|
|
||||||
| result is a NaN. The subtraction is performed according to the IEC/IEEE
|
|
||||||
| Standard for Binary Floating-Point Arithmetic.
|
|
||||||
*----------------------------------------------------------------------------*/
|
|
||||||
|
|
||||||
static floatx80 subFloatx80Sigs(floatx80 a, floatx80 b, bool zSign,
|
|
||||||
float_status *status)
|
|
||||||
{
|
|
||||||
int32_t aExp, bExp, zExp;
|
|
||||||
uint64_t aSig, bSig, zSig0, zSig1;
|
|
||||||
int32_t expDiff;
|
|
||||||
|
|
||||||
aSig = extractFloatx80Frac( a );
|
|
||||||
aExp = extractFloatx80Exp( a );
|
|
||||||
bSig = extractFloatx80Frac( b );
|
|
||||||
bExp = extractFloatx80Exp( b );
|
|
||||||
expDiff = aExp - bExp;
|
|
||||||
if ( 0 < expDiff ) goto aExpBigger;
|
|
||||||
if ( expDiff < 0 ) goto bExpBigger;
|
|
||||||
if ( aExp == 0x7FFF ) {
|
|
||||||
if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
|
|
||||||
return propagateFloatx80NaN(a, b, status);
|
|
||||||
}
|
|
||||||
float_raise(float_flag_invalid, status);
|
|
||||||
return floatx80_default_nan(status);
|
|
||||||
}
|
|
||||||
if ( aExp == 0 ) {
|
|
||||||
aExp = 1;
|
|
||||||
bExp = 1;
|
|
||||||
}
|
|
||||||
zSig1 = 0;
|
|
||||||
if ( bSig < aSig ) goto aBigger;
|
|
||||||
if ( aSig < bSig ) goto bBigger;
|
|
||||||
return packFloatx80(status->float_rounding_mode == float_round_down, 0, 0);
|
|
||||||
bExpBigger:
|
|
||||||
if ( bExp == 0x7FFF ) {
|
|
||||||
if ((uint64_t)(bSig << 1)) {
|
|
||||||
return propagateFloatx80NaN(a, b, status);
|
|
||||||
}
|
|
||||||
return packFloatx80(zSign ^ 1, floatx80_infinity_high,
|
|
||||||
floatx80_infinity_low);
|
|
||||||
}
|
|
||||||
if ( aExp == 0 ) ++expDiff;
|
|
||||||
shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
|
|
||||||
bBigger:
|
|
||||||
sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
|
|
||||||
zExp = bExp;
|
|
||||||
zSign ^= 1;
|
|
||||||
goto normalizeRoundAndPack;
|
|
||||||
aExpBigger:
|
|
||||||
if ( aExp == 0x7FFF ) {
|
|
||||||
if ((uint64_t)(aSig << 1)) {
|
|
||||||
return propagateFloatx80NaN(a, b, status);
|
|
||||||
}
|
|
||||||
return a;
|
|
||||||
}
|
|
||||||
if ( bExp == 0 ) --expDiff;
|
|
||||||
shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
|
|
||||||
aBigger:
|
|
||||||
sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
|
|
||||||
zExp = aExp;
|
|
||||||
normalizeRoundAndPack:
|
|
||||||
return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
|
|
||||||
zSign, zExp, zSig0, zSig1, status);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*----------------------------------------------------------------------------
|
|
||||||
| Returns the result of adding the extended double-precision floating-point
|
|
||||||
| values `a' and `b'. The operation is performed according to the IEC/IEEE
|
|
||||||
| Standard for Binary Floating-Point Arithmetic.
|
|
||||||
*----------------------------------------------------------------------------*/
|
|
||||||
|
|
||||||
floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
|
|
||||||
{
|
|
||||||
bool aSign, bSign;
|
|
||||||
|
|
||||||
if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
|
|
||||||
float_raise(float_flag_invalid, status);
|
|
||||||
return floatx80_default_nan(status);
|
|
||||||
}
|
|
||||||
aSign = extractFloatx80Sign( a );
|
|
||||||
bSign = extractFloatx80Sign( b );
|
|
||||||
if ( aSign == bSign ) {
|
|
||||||
return addFloatx80Sigs(a, b, aSign, status);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
return subFloatx80Sigs(a, b, aSign, status);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/*----------------------------------------------------------------------------
|
|
||||||
| Returns the result of subtracting the extended double-precision floating-
|
|
||||||
| point values `a' and `b'. The operation is performed according to the
|
|
||||||
| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
|
|
||||||
*----------------------------------------------------------------------------*/
|
|
||||||
|
|
||||||
floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
|
|
||||||
{
|
|
||||||
bool aSign, bSign;
|
|
||||||
|
|
||||||
if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
|
|
||||||
float_raise(float_flag_invalid, status);
|
|
||||||
return floatx80_default_nan(status);
|
|
||||||
}
|
|
||||||
aSign = extractFloatx80Sign( a );
|
|
||||||
bSign = extractFloatx80Sign( b );
|
|
||||||
if ( aSign == bSign ) {
|
|
||||||
return subFloatx80Sigs(a, b, aSign, status);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
return addFloatx80Sigs(a, b, aSign, status);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/*----------------------------------------------------------------------------
|
/*----------------------------------------------------------------------------
|
||||||
| Returns the result of multiplying the extended double-precision floating-
|
| Returns the result of multiplying the extended double-precision floating-
|
||||||
| point values `a' and `b'. The operation is performed according to the
|
| point values `a' and `b'. The operation is performed according to the
|
||||||
|
Loading…
Reference in New Issue
Block a user