mirror of
https://git.musl-libc.org/git/musl
synced 2025-02-15 09:43:57 +03:00
math: new software sqrtf
same method as in sqrt, this was tested on all inputs against an sqrtf instruction. (the only difference found was that x86 sqrtf does not signal the x86 specific input-denormal exception on negative subnormal inputs while the software sqrtf does, this is fine as it was designed for ieee754 exceptions only.) there is known faster method: "Computing Floating-Point Square Roots via Bivariate Polynomial Evaluation" that computes sqrtf directly via pipelined polynomial evaluation which allows more parallelism, but the design does not generalize easily to higher precisions.
This commit is contained in:
parent
97e9b73d59
commit
b1756ec884
146
src/math/sqrtf.c
146
src/math/sqrtf.c
@ -1,83 +1,83 @@
|
||||
/* origin: FreeBSD /usr/src/lib/msun/src/e_sqrtf.c */
|
||||
/*
|
||||
* Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
|
||||
*/
|
||||
/*
|
||||
* ====================================================
|
||||
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
||||
*
|
||||
* Developed at SunPro, a Sun Microsystems, Inc. business.
|
||||
* Permission to use, copy, modify, and distribute this
|
||||
* software is freely granted, provided that this notice
|
||||
* is preserved.
|
||||
* ====================================================
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
#include <math.h>
|
||||
#include "libm.h"
|
||||
#include "sqrt_data.h"
|
||||
|
||||
static const float tiny = 1.0e-30;
|
||||
#define FENV_SUPPORT 1
|
||||
|
||||
static inline uint32_t mul32(uint32_t a, uint32_t b)
|
||||
{
|
||||
return (uint64_t)a*b >> 32;
|
||||
}
|
||||
|
||||
/* see sqrt.c for more detailed comments. */
|
||||
|
||||
float sqrtf(float x)
|
||||
{
|
||||
float z;
|
||||
int32_t sign = (int)0x80000000;
|
||||
int32_t ix,s,q,m,t,i;
|
||||
uint32_t r;
|
||||
uint32_t ix, m, m1, m0, even, ey;
|
||||
|
||||
GET_FLOAT_WORD(ix, x);
|
||||
|
||||
/* take care of Inf and NaN */
|
||||
if ((ix&0x7f800000) == 0x7f800000)
|
||||
return x*x + x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf, sqrt(-inf)=sNaN */
|
||||
|
||||
/* take care of zero */
|
||||
if (ix <= 0) {
|
||||
if ((ix&~sign) == 0)
|
||||
return x; /* sqrt(+-0) = +-0 */
|
||||
if (ix < 0)
|
||||
return (x-x)/(x-x); /* sqrt(-ve) = sNaN */
|
||||
}
|
||||
/* normalize x */
|
||||
m = ix>>23;
|
||||
if (m == 0) { /* subnormal x */
|
||||
for (i = 0; (ix&0x00800000) == 0; i++)
|
||||
ix<<=1;
|
||||
m -= i - 1;
|
||||
}
|
||||
m -= 127; /* unbias exponent */
|
||||
ix = (ix&0x007fffff)|0x00800000;
|
||||
if (m&1) /* odd m, double x to make it even */
|
||||
ix += ix;
|
||||
m >>= 1; /* m = [m/2] */
|
||||
|
||||
/* generate sqrt(x) bit by bit */
|
||||
ix += ix;
|
||||
q = s = 0; /* q = sqrt(x) */
|
||||
r = 0x01000000; /* r = moving bit from right to left */
|
||||
|
||||
while (r != 0) {
|
||||
t = s + r;
|
||||
if (t <= ix) {
|
||||
s = t+r;
|
||||
ix -= t;
|
||||
q += r;
|
||||
}
|
||||
ix += ix;
|
||||
r >>= 1;
|
||||
ix = asuint(x);
|
||||
if (predict_false(ix - 0x00800000 >= 0x7f800000 - 0x00800000)) {
|
||||
/* x < 0x1p-126 or inf or nan. */
|
||||
if (ix * 2 == 0)
|
||||
return x;
|
||||
if (ix == 0x7f800000)
|
||||
return x;
|
||||
if (ix > 0x7f800000)
|
||||
return __math_invalidf(x);
|
||||
/* x is subnormal, normalize it. */
|
||||
ix = asuint(x * 0x1p23f);
|
||||
ix -= 23 << 23;
|
||||
}
|
||||
|
||||
/* use floating add to find out rounding direction */
|
||||
if (ix != 0) {
|
||||
z = 1.0f - tiny; /* raise inexact flag */
|
||||
if (z >= 1.0f) {
|
||||
z = 1.0f + tiny;
|
||||
if (z > 1.0f)
|
||||
q += 2;
|
||||
else
|
||||
q += q & 1;
|
||||
}
|
||||
/* x = 4^e m; with int e and m in [1, 4). */
|
||||
even = ix & 0x00800000;
|
||||
m1 = (ix << 8) | 0x80000000;
|
||||
m0 = (ix << 7) & 0x7fffffff;
|
||||
m = even ? m0 : m1;
|
||||
|
||||
/* 2^e is the exponent part of the return value. */
|
||||
ey = ix >> 1;
|
||||
ey += 0x3f800000 >> 1;
|
||||
ey &= 0x7f800000;
|
||||
|
||||
/* compute r ~ 1/sqrt(m), s ~ sqrt(m) with 2 goldschmidt iterations. */
|
||||
static const uint32_t three = 0xc0000000;
|
||||
uint32_t r, s, d, u, i;
|
||||
i = (ix >> 17) % 128;
|
||||
r = (uint32_t)__rsqrt_tab[i] << 16;
|
||||
/* |r*sqrt(m) - 1| < 0x1p-8 */
|
||||
s = mul32(m, r);
|
||||
/* |s/sqrt(m) - 1| < 0x1p-8 */
|
||||
d = mul32(s, r);
|
||||
u = three - d;
|
||||
r = mul32(r, u) << 1;
|
||||
/* |r*sqrt(m) - 1| < 0x1.7bp-16 */
|
||||
s = mul32(s, u) << 1;
|
||||
/* |s/sqrt(m) - 1| < 0x1.7bp-16 */
|
||||
d = mul32(s, r);
|
||||
u = three - d;
|
||||
s = mul32(s, u);
|
||||
/* -0x1.03p-28 < s/sqrt(m) - 1 < 0x1.fp-31 */
|
||||
s = (s - 1)>>6;
|
||||
/* s < sqrt(m) < s + 0x1.08p-23 */
|
||||
|
||||
/* compute nearest rounded result. */
|
||||
uint32_t d0, d1, d2;
|
||||
float y, t;
|
||||
d0 = (m << 16) - s*s;
|
||||
d1 = s - d0;
|
||||
d2 = d1 + s + 1;
|
||||
s += d1 >> 31;
|
||||
s &= 0x007fffff;
|
||||
s |= ey;
|
||||
y = asfloat(s);
|
||||
if (FENV_SUPPORT) {
|
||||
/* handle rounding and inexact exception. */
|
||||
uint32_t tiny = predict_false(d2==0) ? 0 : 0x01000000;
|
||||
tiny |= (d1^d2) & 0x80000000;
|
||||
t = asfloat(tiny);
|
||||
y = eval_as_float(y + t);
|
||||
}
|
||||
ix = (q>>1) + 0x3f000000;
|
||||
SET_FLOAT_WORD(z, ix + ((uint32_t)m << 23));
|
||||
return z;
|
||||
return y;
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user