softfloat: Move floatN_log2 to softfloat-parts.c.inc
Rename to parts$N_log2. Though this is partly a ruse, since I do not believe the code will succeed for float128 without work. Which is ok for now, because we do not need this for more than float32 and float64. Since berkeley-testfloat-3 doesn't support log2, compare float64_log2 vs the system log2. Fix the errors for inputs near 1.0: test: 3ff00000000000b0 +0x1.00000000000b0p+0 sf: 3d2fa00000000000 +0x1.fa00000000000p-45 libm: 3d2fbd422b1bd36f +0x1.fbd422b1bd36fp-45 Error in fraction: 32170028290927 ulp test: 3feec24f6770b100 +0x1.ec24f6770b100p-1 sf: bfad3740d13c9ec0 -0x1.d3740d13c9ec0p-5 libm: bfad3740d13c9e98 -0x1.d3740d13c9e98p-5 Error in fraction: 40 ulp Reviewed-by: Alex Bennée <alex.bennee@linaro.org> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
parent
572c4d862f
commit
2fa3546c8f
@ -1331,3 +1331,128 @@ static void partsN(scalbn)(FloatPartsN *a, int n, float_status *s)
|
||||
g_assert_not_reached();
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Return log2(A)
|
||||
*/
|
||||
static void partsN(log2)(FloatPartsN *a, float_status *s, const FloatFmt *fmt)
|
||||
{
|
||||
uint64_t a0, a1, r, t, ign;
|
||||
FloatPartsN f;
|
||||
int i, n, a_exp, f_exp;
|
||||
|
||||
if (unlikely(a->cls != float_class_normal)) {
|
||||
switch (a->cls) {
|
||||
case float_class_snan:
|
||||
case float_class_qnan:
|
||||
parts_return_nan(a, s);
|
||||
return;
|
||||
case float_class_zero:
|
||||
/* log2(0) = -inf */
|
||||
a->cls = float_class_inf;
|
||||
a->sign = 1;
|
||||
return;
|
||||
case float_class_inf:
|
||||
if (unlikely(a->sign)) {
|
||||
goto d_nan;
|
||||
}
|
||||
return;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
g_assert_not_reached();
|
||||
}
|
||||
if (unlikely(a->sign)) {
|
||||
goto d_nan;
|
||||
}
|
||||
|
||||
/* TODO: This algorithm looses bits too quickly for float128. */
|
||||
g_assert(N == 64);
|
||||
|
||||
a_exp = a->exp;
|
||||
f_exp = -1;
|
||||
|
||||
r = 0;
|
||||
t = DECOMPOSED_IMPLICIT_BIT;
|
||||
a0 = a->frac_hi;
|
||||
a1 = 0;
|
||||
|
||||
n = fmt->frac_size + 2;
|
||||
if (unlikely(a_exp == -1)) {
|
||||
/*
|
||||
* When a_exp == -1, we're computing the log2 of a value [0.5,1.0).
|
||||
* When the value is very close to 1.0, there are lots of 1's in
|
||||
* the msb parts of the fraction. At the end, when we subtract
|
||||
* this value from -1.0, we can see a catastrophic loss of precision,
|
||||
* as 0x800..000 - 0x7ff..ffx becomes 0x000..00y, leaving only the
|
||||
* bits of y in the final result. To minimize this, compute as many
|
||||
* digits as we can.
|
||||
* ??? This case needs another algorithm to avoid this.
|
||||
*/
|
||||
n = fmt->frac_size * 2 + 2;
|
||||
/* Don't compute a value overlapping the sticky bit */
|
||||
n = MIN(n, 62);
|
||||
}
|
||||
|
||||
for (i = 0; i < n; i++) {
|
||||
if (a1) {
|
||||
mul128To256(a0, a1, a0, a1, &a0, &a1, &ign, &ign);
|
||||
} else if (a0 & 0xffffffffull) {
|
||||
mul64To128(a0, a0, &a0, &a1);
|
||||
} else if (a0 & ~DECOMPOSED_IMPLICIT_BIT) {
|
||||
a0 >>= 32;
|
||||
a0 *= a0;
|
||||
} else {
|
||||
goto exact;
|
||||
}
|
||||
|
||||
if (a0 & DECOMPOSED_IMPLICIT_BIT) {
|
||||
if (unlikely(a_exp == 0 && r == 0)) {
|
||||
/*
|
||||
* When a_exp == 0, we're computing the log2 of a value
|
||||
* [1.0,2.0). When the value is very close to 1.0, there
|
||||
* are lots of 0's in the msb parts of the fraction.
|
||||
* We need to compute more digits to produce a correct
|
||||
* result -- restart at the top of the fraction.
|
||||
* ??? This is likely to lose precision quickly, as for
|
||||
* float128; we may need another method.
|
||||
*/
|
||||
f_exp -= i;
|
||||
t = r = DECOMPOSED_IMPLICIT_BIT;
|
||||
i = 0;
|
||||
} else {
|
||||
r |= t;
|
||||
}
|
||||
} else {
|
||||
add128(a0, a1, a0, a1, &a0, &a1);
|
||||
}
|
||||
t >>= 1;
|
||||
}
|
||||
|
||||
/* Set sticky for inexact. */
|
||||
r |= (a1 || a0 & ~DECOMPOSED_IMPLICIT_BIT);
|
||||
|
||||
exact:
|
||||
parts_sint_to_float(a, a_exp, 0, s);
|
||||
if (r == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
memset(&f, 0, sizeof(f));
|
||||
f.cls = float_class_normal;
|
||||
f.frac_hi = r;
|
||||
f.exp = f_exp - frac_normalize(&f);
|
||||
|
||||
if (a_exp < 0) {
|
||||
parts_sub_normal(a, &f);
|
||||
} else if (a_exp > 0) {
|
||||
parts_add_normal(a, &f);
|
||||
} else {
|
||||
*a = f;
|
||||
}
|
||||
return;
|
||||
|
||||
d_nan:
|
||||
float_raise(float_flag_invalid, s);
|
||||
parts_default_nan(a, s);
|
||||
}
|
||||
|
126
fpu/softfloat.c
126
fpu/softfloat.c
@ -927,6 +927,12 @@ static void parts128_scalbn(FloatParts128 *a, int n, float_status *s);
|
||||
#define parts_scalbn(A, N, S) \
|
||||
PARTS_GENERIC_64_128(scalbn, A)(A, N, S)
|
||||
|
||||
static void parts64_log2(FloatParts64 *a, float_status *s, const FloatFmt *f);
|
||||
static void parts128_log2(FloatParts128 *a, float_status *s, const FloatFmt *f);
|
||||
|
||||
#define parts_log2(A, S, F) \
|
||||
PARTS_GENERIC_64_128(log2, A)(A, S, F)
|
||||
|
||||
/*
|
||||
* Helper functions for softfloat-parts.c.inc, per-size operations.
|
||||
*/
|
||||
@ -4060,6 +4066,27 @@ floatx80 floatx80_sqrt(floatx80 a, float_status *s)
|
||||
return floatx80_round_pack_canonical(&p, s);
|
||||
}
|
||||
|
||||
/*
|
||||
* log2
|
||||
*/
|
||||
float32 float32_log2(float32 a, float_status *status)
|
||||
{
|
||||
FloatParts64 p;
|
||||
|
||||
float32_unpack_canonical(&p, a, status);
|
||||
parts_log2(&p, status, &float32_params);
|
||||
return float32_round_pack_canonical(&p, status);
|
||||
}
|
||||
|
||||
float64 float64_log2(float64 a, float_status *status)
|
||||
{
|
||||
FloatParts64 p;
|
||||
|
||||
float64_unpack_canonical(&p, a, status);
|
||||
parts_log2(&p, status, &float64_params);
|
||||
return float64_round_pack_canonical(&p, status);
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| The pattern for a default generated NaN.
|
||||
*----------------------------------------------------------------------------*/
|
||||
@ -5246,56 +5273,6 @@ float32 float32_exp2(float32 a, float_status *status)
|
||||
return float32_round_pack_canonical(&rp, status);
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Returns the binary log of the single-precision floating-point value `a'.
|
||||
| The operation is performed according to the IEC/IEEE Standard for Binary
|
||||
| Floating-Point Arithmetic.
|
||||
*----------------------------------------------------------------------------*/
|
||||
float32 float32_log2(float32 a, float_status *status)
|
||||
{
|
||||
bool aSign, zSign;
|
||||
int aExp;
|
||||
uint32_t aSig, zSig, i;
|
||||
|
||||
a = float32_squash_input_denormal(a, status);
|
||||
aSig = extractFloat32Frac( a );
|
||||
aExp = extractFloat32Exp( a );
|
||||
aSign = extractFloat32Sign( a );
|
||||
|
||||
if ( aExp == 0 ) {
|
||||
if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 );
|
||||
normalizeFloat32Subnormal( aSig, &aExp, &aSig );
|
||||
}
|
||||
if ( aSign ) {
|
||||
float_raise(float_flag_invalid, status);
|
||||
return float32_default_nan(status);
|
||||
}
|
||||
if ( aExp == 0xFF ) {
|
||||
if (aSig) {
|
||||
return propagateFloat32NaN(a, float32_zero, status);
|
||||
}
|
||||
return a;
|
||||
}
|
||||
|
||||
aExp -= 0x7F;
|
||||
aSig |= 0x00800000;
|
||||
zSign = aExp < 0;
|
||||
zSig = aExp << 23;
|
||||
|
||||
for (i = 1 << 22; i > 0; i >>= 1) {
|
||||
aSig = ( (uint64_t)aSig * aSig ) >> 23;
|
||||
if ( aSig & 0x01000000 ) {
|
||||
aSig >>= 1;
|
||||
zSig |= i;
|
||||
}
|
||||
}
|
||||
|
||||
if ( zSign )
|
||||
zSig = -zSig;
|
||||
|
||||
return normalizeRoundAndPackFloat32(zSign, 0x85, zSig, status);
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Returns the remainder of the double-precision floating-point value `a'
|
||||
| with respect to the corresponding value `b'. The operation is performed
|
||||
@ -5384,55 +5361,6 @@ float64 float64_rem(float64 a, float64 b, float_status *status)
|
||||
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Returns the binary log of the double-precision floating-point value `a'.
|
||||
| The operation is performed according to the IEC/IEEE Standard for Binary
|
||||
| Floating-Point Arithmetic.
|
||||
*----------------------------------------------------------------------------*/
|
||||
float64 float64_log2(float64 a, float_status *status)
|
||||
{
|
||||
bool aSign, zSign;
|
||||
int aExp;
|
||||
uint64_t aSig, aSig0, aSig1, zSig, i;
|
||||
a = float64_squash_input_denormal(a, status);
|
||||
|
||||
aSig = extractFloat64Frac( a );
|
||||
aExp = extractFloat64Exp( a );
|
||||
aSign = extractFloat64Sign( a );
|
||||
|
||||
if ( aExp == 0 ) {
|
||||
if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 );
|
||||
normalizeFloat64Subnormal( aSig, &aExp, &aSig );
|
||||
}
|
||||
if ( aSign ) {
|
||||
float_raise(float_flag_invalid, status);
|
||||
return float64_default_nan(status);
|
||||
}
|
||||
if ( aExp == 0x7FF ) {
|
||||
if (aSig) {
|
||||
return propagateFloat64NaN(a, float64_zero, status);
|
||||
}
|
||||
return a;
|
||||
}
|
||||
|
||||
aExp -= 0x3FF;
|
||||
aSig |= UINT64_C(0x0010000000000000);
|
||||
zSign = aExp < 0;
|
||||
zSig = (uint64_t)aExp << 52;
|
||||
for (i = 1LL << 51; i > 0; i >>= 1) {
|
||||
mul64To128( aSig, aSig, &aSig0, &aSig1 );
|
||||
aSig = ( aSig0 << 12 ) | ( aSig1 >> 52 );
|
||||
if ( aSig & UINT64_C(0x0020000000000000) ) {
|
||||
aSig >>= 1;
|
||||
zSig |= i;
|
||||
}
|
||||
}
|
||||
|
||||
if ( zSign )
|
||||
zSig = -zSig;
|
||||
return normalizeRoundAndPackFloat64(zSign, 0x408, zSig, status);
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Rounds the extended double-precision floating-point value `a'
|
||||
| to the precision provided by floatx80_rounding_precision and returns the
|
||||
|
118
tests/fp/fp-test-log2.c
Normal file
118
tests/fp/fp-test-log2.c
Normal file
@ -0,0 +1,118 @@
|
||||
/*
|
||||
* fp-test-log2.c - test QEMU's softfloat log2
|
||||
*
|
||||
* Copyright (C) 2020, Linaro, Ltd.
|
||||
*
|
||||
* License: GNU GPL, version 2 or later.
|
||||
* See the COPYING file in the top-level directory.
|
||||
*/
|
||||
#ifndef HW_POISON_H
|
||||
#error Must define HW_POISON_H to work around TARGET_* poisoning
|
||||
#endif
|
||||
|
||||
#include "qemu/osdep.h"
|
||||
#include "qemu/cutils.h"
|
||||
#include <math.h>
|
||||
#include "fpu/softfloat.h"
|
||||
|
||||
typedef union {
|
||||
double d;
|
||||
float64 i;
|
||||
} ufloat64;
|
||||
|
||||
static int errors;
|
||||
|
||||
static void compare(ufloat64 test, ufloat64 real, ufloat64 soft, bool exact)
|
||||
{
|
||||
int msb;
|
||||
uint64_t ulp = UINT64_MAX;
|
||||
|
||||
if (real.i == soft.i) {
|
||||
return;
|
||||
}
|
||||
msb = 63 - __builtin_clzll(real.i ^ soft.i);
|
||||
|
||||
if (msb < 52) {
|
||||
if (real.i > soft.i) {
|
||||
ulp = real.i - soft.i;
|
||||
} else {
|
||||
ulp = soft.i - real.i;
|
||||
}
|
||||
}
|
||||
|
||||
/* glibc allows 3 ulp error in its libm-test-ulps; allow 4 here */
|
||||
if (!exact && ulp <= 4) {
|
||||
return;
|
||||
}
|
||||
|
||||
printf("test: %016" PRIx64 " %+.13a\n"
|
||||
" sf: %016" PRIx64 " %+.13a\n"
|
||||
"libm: %016" PRIx64 " %+.13a\n",
|
||||
test.i, test.d, soft.i, soft.d, real.i, real.d);
|
||||
|
||||
if (msb == 63) {
|
||||
printf("Error in sign!\n\n");
|
||||
} else if (msb >= 52) {
|
||||
printf("Error in exponent: %d\n\n",
|
||||
(int)(soft.i >> 52) - (int)(real.i >> 52));
|
||||
} else {
|
||||
printf("Error in fraction: %" PRIu64 " ulp\n\n", ulp);
|
||||
}
|
||||
|
||||
if (++errors == 20) {
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
int main(int ac, char **av)
|
||||
{
|
||||
ufloat64 test, real, soft;
|
||||
float_status qsf = {0};
|
||||
int i;
|
||||
|
||||
set_float_rounding_mode(float_round_nearest_even, &qsf);
|
||||
|
||||
test.d = 0.0;
|
||||
real.d = -__builtin_inf();
|
||||
soft.i = float64_log2(test.i, &qsf);
|
||||
compare(test, real, soft, true);
|
||||
|
||||
test.d = 1.0;
|
||||
real.d = 0.0;
|
||||
soft.i = float64_log2(test.i, &qsf);
|
||||
compare(test, real, soft, true);
|
||||
|
||||
test.d = 2.0;
|
||||
real.d = 1.0;
|
||||
soft.i = float64_log2(test.i, &qsf);
|
||||
compare(test, real, soft, true);
|
||||
|
||||
test.d = 4.0;
|
||||
real.d = 2.0;
|
||||
soft.i = float64_log2(test.i, &qsf);
|
||||
compare(test, real, soft, true);
|
||||
|
||||
test.d = 0x1p64;
|
||||
real.d = 64.0;
|
||||
soft.i = float64_log2(test.i, &qsf);
|
||||
compare(test, real, soft, true);
|
||||
|
||||
test.d = __builtin_inf();
|
||||
real.d = __builtin_inf();
|
||||
soft.i = float64_log2(test.i, &qsf);
|
||||
compare(test, real, soft, true);
|
||||
|
||||
for (i = 0; i < 10000; ++i) {
|
||||
test.d = drand48() + 1.0; /* [1.0, 2.0) */
|
||||
real.d = log2(test.d);
|
||||
soft.i = float64_log2(test.i, &qsf);
|
||||
compare(test, real, soft, false);
|
||||
|
||||
test.d = drand48() * 100; /* [0.0, 100) */
|
||||
real.d = log2(test.d);
|
||||
soft.i = float64_log2(test.i, &qsf);
|
||||
compare(test, real, soft, false);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
@ -634,3 +634,14 @@ fpbench = executable(
|
||||
include_directories: [sfinc, include_directories(tfdir)],
|
||||
c_args: fpcflags,
|
||||
)
|
||||
|
||||
fptestlog2 = executable(
|
||||
'fp-test-log2',
|
||||
['fp-test-log2.c', '../../fpu/softfloat.c'],
|
||||
link_with: [libsoftfloat],
|
||||
dependencies: [qemuutil],
|
||||
include_directories: [sfinc],
|
||||
c_args: fpcflags,
|
||||
)
|
||||
test('fp-test-log2', fptestlog2,
|
||||
suite: ['softfloat', 'softfloat-ops'])
|
||||
|
Loading…
Reference in New Issue
Block a user