Bochs/bochs/fpu/wm_sqrt.c
Bryce Denney a6ad4c3903 - Applied const64bit patch:
For compilers (such as Microsoft VC++) which don't allow "LL" after a
  constant to make it 64-bit, this patch declares all such constants as
  BX_CONST64(value).  Then in config.in, a switch called
  BX_64BIT_CONSTANTS_USE_LL controls whether the macro puts the
  LL's in or not.  Configure sets the macro, if you're on a platform
  that can run such things.
2001-04-10 02:06:10 +00:00

341 lines
9.0 KiB
C

/*---------------------------------------------------------------------------+
| wm_sqrt.c |
| |
| Fixed point arithmetic square root evaluation. |
| |
| Copyright (C) 1992,1993,1995,1997,1999 |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
| Australia. E-mail billm@melbpc.org.au |
| |
+---------------------------------------------------------------------------*/
/*---------------------------------------------------------------------------+
| returns the square root of n in n. |
| |
| Use Newton's method to compute the square root of a number, which must |
| be in the range [1.0 .. 4.0), to 64 bits accuracy. |
| Does not check the sign or tag of the argument. |
| Sets the exponent, but not the sign or tag of the result. |
| |
| The guess is kept in %esi:%edi |
+---------------------------------------------------------------------------*/
#include "exception.h"
#include "fpu_emu.h"
/*
The following value indicates the trailing bits (of 96 bits)
which may be in error when the final Newton iteration is finished
(0x20 corresponds to the last 5 bits in error, i.e. 91 bits precision).
A check of the following code with more than 3 billion (3.0e9) random
and selected values showed that 0x10 was always a large enough value,
so 0x20 should be a conservative choice.
*/
#define ERR_MARGIN 0x20
int wm_sqrt(FPU_REG *n, s32 dummy1, s32 dummy2, u16 control_w, u8 sign)
{
u64 nn, guess, halfn, lowr, mid, upr, diff, uwork;
s64 work;
u32 ne, guess32, work32, diff32, mid32;
int shifted;
nn = significand(n);
ne = 0;
if ( exponent16(n) == EXP_BIAS )
{
/* Shift the argument right one position. */
if ( nn & 1 )
ne = 0x80000000;
nn >>= 1;
guess = n->sigh >> 2;
shifted = 1;
}
else
{
guess = n->sigh >> 1;
shifted = 0;
}
guess += 0x40000000;
guess *= 0xaaaaaaaa;
guess <<= 1;
guess32 = guess >> 32;
if ( !(guess32 & 0x80000000) )
guess32 = 0x80000000;
halfn = nn >> 1;
guess32 = halfn / guess32 + (guess32 >> 1);
guess32 = halfn / guess32 + (guess32 >> 1);
guess32 = halfn / guess32 + (guess32 >> 1);
/*
* Now that an estimate accurate to about 30 bits has been obtained,
* we improve it to 60 bits or so.
*
* The strategy from now on is to compute new estimates from
* guess := guess + (n - guess^2) / (2 * guess)
*/
work = guess32;
work = nn - work * guess32;
work <<= 28; /* 29 - 1 */
work /= guess32;
work <<= 3; /* 29 + 3 = 32 */
work += ((u64)guess32) << 32;
if ( work == 0 ) /* This happens in one or two special cases */
work = BX_CONST64(0xffffffffffffffff);
guess = work;
/* guess is now accurate to about 60 bits */
if ( work > 0 )
{
#ifdef PARANOID
if ( (n->sigh != 0xffffffff) && (n->sigl != 0xffffffff) )
{
EXCEPTION(EX_INTERNAL|0x213);
}
#endif
/* We know the answer here. */
return FPU_round(n, 0x7fffffff, 0, control_w, sign);
}
/* Refine the guess to significantly more than 64 bits. */
/* First, square the current guess. */
guess32 = guess >> 32;
work32 = guess;
/* lower 32 times lower 32 */
lowr = work32;
lowr *= work32;
/* lower 32 times upper 32 */
mid = guess32;
mid *= work32;
/* upper 32 times upper 32 */
upr = guess32;
upr *= guess32;
/* upper 32 bits of the middle product times 2 */
upr += mid >> (32-1);
/* lower 32 bits of the middle product times 2 */
work32 = mid << 1;
/* upper 32 bits of the lower product */
mid32 = lowr >> 32;
mid32 += work32;
if ( mid32 < work32 )
upr ++;
/* We now have the first 96 bits (truncated) of the square of the guess */
diff = upr - nn;
diff32 = mid32 - ne;
if ( diff32 > mid32 )
diff --;
if ( ((s64)diff) < 0 )
{
/* The difference is negative, negate it. */
diff32 = -((s32)diff32);
diff = ~diff;
if ( diff32 == 0 )
diff ++;
#ifdef PARANOID
if ( (diff >> 32) != 0 )
{
EXCEPTION(EX_INTERNAL|0x207);
}
#endif
diff <<= 32;
diff |= diff32;
work32 = diff / guess32;
work = work32;
work <<= 32;
diff = diff % guess32;
diff <<= 32;
work32 = diff / guess32;
work |= work32;
work >>= 1;
work32 = work >> 32;
guess += work32; /* The first 64 bits */
guess32 = work; /* The next 32 bits */
/* The guess should now be good to about 90 bits */
}
else
{
/* The difference is positive. */
diff <<= 32;
diff |= diff32;
work32 = diff / guess32;
work = work32;
work <<= 32;
diff = diff % guess32;
diff <<= 32;
work32 = diff / guess32;
work |= work32;
work >>= 1;
work32 = work >> 32;
guess32 = work; /* The last 32 bits (of 96) */
guess32 = -guess32;
if ( guess32 )
guess --;
guess -= work32; /* The first 64 bits */
/* The guess should now be good to about 90 bits */
}
setexponent16(n, 0);
if ( guess32 >= (u32) -ERR_MARGIN )
{
/* Nearly exact, we round the 64 bit result upward. */
guess ++;
}
else if ( (guess32 > ERR_MARGIN) &&
((guess32 < 0x80000000-ERR_MARGIN)
|| (guess32 > 0x80000000+ERR_MARGIN)) )
{
/* We have enough accuracy to decide rounding */
significand(n) = guess;
return FPU_round(n, guess32, 0, control_w, sign);
}
if ( (guess32 <= ERR_MARGIN) || (guess32 >= (u32) -ERR_MARGIN) )
{
/*
* This is an easy case because x^1/2 is monotonic.
* We need just find the square of our estimate, compare it
* with the argument, and deduce whether our estimate is
* above, below, or exact. We use the fact that the estimate
* is known to be accurate to about 90 bits.
*/
/* We compute the lower 64 bits of the 128 bit product */
work32 = guess;
lowr = work32;
lowr *= work32;
uwork = guess >> 32;
work32 = guess;
uwork *= work32;
uwork <<= 33; /* 33 = 32+1 (for two times the product) */
lowr += uwork; /* We now have the 64 bits */
/* We need only look at bits 65..96 of the square of guess. */
if ( shifted )
work32 = lowr >> 31;
else
work32 = lowr >> 32;
#ifdef PARANOID
if ( ((s32)work32 > 3*ERR_MARGIN) || ((s32)work32 < -3*ERR_MARGIN) )
{
EXCEPTION(EX_INTERNAL|0x214);
}
#endif
significand(n) = guess;
if ( (s32)work32 > 0 )
{
/* guess is too large */
significand(n) --;
return FPU_round(n, 0xffffff00, 0, control_w, sign);
}
else if ( (s32)work32 < 0 )
{
/* guess is a little too small */
return FPU_round(n, 0x000000ff, 0, control_w, sign);
}
else if ( (u32)lowr != 0 )
{
/* guess is too large */
significand(n) --;
return FPU_round(n, 0xffffff00, 0, control_w, sign);
}
/* Our guess is precise. */
return FPU_round(n, 0, 0, control_w, sign);
}
/* Very similar to the case above, but the last bit is near 0.5.
We handle this just like the case above but we shift everything
by one bit. */
uwork = guess;
uwork <<= 1;
uwork |= 1; /* add the half bit */
/* We compute the lower 64 bits of the 128 bit product */
work32 = uwork;
lowr = work32;
lowr *= work32;
work32 = uwork >> 32;
uwork &= 0xffffffff;
uwork *= work32;
uwork <<= 33; /* 33 = 32+1 (for two times the product) */
lowr += uwork; /* We now have the 64 bits. The lowest 32 bits of lowr
are not all zero (the lsb is 1). */
/* We need only look at bits 65..96 of the square of guess. */
if ( shifted )
work32 = lowr >> 31;
else
work32 = lowr >> 32;
#ifdef PARANOID
if ( ((s32)work32 > 4*3*ERR_MARGIN) || ((s32)work32 < -4*3*ERR_MARGIN) )
{
EXCEPTION(EX_INTERNAL|0x215);
}
#endif
significand(n) = guess;
if ( (s32)work32 < 0 )
{
/* guess plus half bit is a little too small */
return FPU_round(n, 0x800000ff, 0, control_w, sign);
}
else /* Note that the lower 64 bits of the product are not all zero */
{
/* guess plus half bit is too large */
return FPU_round(n, 0x7fffff00, 0, control_w, sign);
}
/*
Note that the result of a square root cannot have precisely a half bit
of a least significant place (it is left as an exercise for the reader
to prove this! (hint: 65 bit*65 bit => n bits)).
*/
}