Bochs/bochs/fpu/wm_sqrt.c

/*---------------------------------------------------------------------------+
 |  wm_sqrt.c                                                                |
 |                                                                           |
 | Fixed point arithmetic square root evaluation.                            |
 |                                                                           |
 | Copyright (C) 1992,1993,1995,1997,1999                                    |
 |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
 |                       Australia.  E-mail billm@melbpc.org.au              |
 |                                                                           |
 +---------------------------------------------------------------------------*/

/*---------------------------------------------------------------------------+
 |    returns the square root of n in n.                                     |
 |                                                                           |
 |  Use Newton's method to compute the square root of a number, which must   |
 |  be in the range  [1.0 .. 4.0),  to 64 bits accuracy.                     |
 |  Does not check the sign or tag of the argument.                          |
 |  Sets the exponent, but not the sign or tag of the result.                |
 |                                                                           |
 |  The guess is kept in %esi:%edi                                           |
 +---------------------------------------------------------------------------*/

#include "exception.h"
#include "fpu_emu.h"

/*
  The following value indicates the trailing bits (of 96 bits)
  which may be in error when the final Newton iteration is finished
  (0x20 corresponds to the last 5 bits in error, i.e. 91 bits precision).
  A check of the following code with more than 3 billion (3.0e9) random
  and selected values showed that 0x10 was always a large enough value,
  so 0x20 should be a conservative choice.
 */
#define ERR_MARGIN 0x20


int wm_sqrt(FPU_REG *n, s32 dummy1, s32 dummy2, u16 control_w, u8 sign)
{
  u64 nn, guess, halfn, lowr, mid, upr, diff, uwork;
  s64 work;
  u32 ne, guess32, work32, diff32, mid32;
  int shifted;

  nn = significand(n);
  ne = 0;
  if ( exponent16(n) == EXP_BIAS )
    {
      /* Shift the argument right one position. */
      if ( nn & 1 )
	ne = 0x80000000;
      nn >>= 1;
      guess = n->sigh >> 2;
      shifted = 1;
    }
  else
    {
      guess = n->sigh >> 1;
      shifted = 0;
    }

  guess += 0x40000000;
  guess *= 0xaaaaaaaa;
  guess <<= 1;
  guess32 = guess >> 32;
  if ( !(guess32 & 0x80000000) )
    guess32 = 0x80000000;
  halfn = nn >> 1;

  guess32 = halfn / guess32 + (guess32 >> 1);
  guess32 = halfn / guess32 + (guess32 >> 1);
  guess32 = halfn / guess32 + (guess32 >> 1);


/*
 * Now that an estimate accurate to about 30 bits has been obtained,
 * we improve it to 60 bits or so.
 *
 * The strategy from now on is to compute new estimates from
 *      guess := guess + (n - guess^2) / (2 * guess)
 */

  work = guess32;
  work = nn - work * guess32;
  work <<= 28;       /* 29 - 1 */
  work /= guess32;
  work <<= 3;        /* 29 + 3 = 32 */
  work += ((u64)guess32) << 32;

  if ( work == 0 )  /* This happens in one or two special cases */
    work = BX_CONST64(0xffffffffffffffff);

  guess = work;

  /* guess is now accurate to about 60 bits */


  if ( work > 0 )
    {
#ifdef PARANOID
      if ( (n->sigh != 0xffffffff) && (n->sigl != 0xffffffff) )
	{
	  EXCEPTION(EX_INTERNAL|0x213);
	}
#endif
      /* We know the answer here. */
      return FPU_round(n, 0x7fffffff, 0, control_w, sign);
    }

  /* Refine the guess to significantly more than 64 bits. */

  /* First, square the current guess. */

  guess32 = guess >> 32;
  work32 = guess;

  /* lower 32 times lower 32 */
  lowr = work32;
  lowr *= work32;

  /* lower 32 times upper 32 */
  mid = guess32;
  mid *= work32;

  /* upper 32 times upper 32 */
  upr = guess32;
  upr *= guess32;

  /* upper 32 bits of the middle product times 2 */
  upr += mid >> (32-1);

  /* lower 32 bits of the middle product times 2 */
  work32 = mid << 1;

  /* upper 32 bits of the lower product */
  mid32 = lowr >> 32;
  mid32 += work32;
  if ( mid32 < work32 )
    upr ++;

  /* We now have the first 96 bits (truncated) of the square of the guess */

  diff = upr - nn;
  diff32 = mid32 - ne;
  if ( diff32 > mid32 )
    diff --;

  if ( ((s64)diff) < 0 )
    {
      /* The difference is negative, negate it. */
      diff32 = -((s32)diff32);
      diff = ~diff;
      if ( diff32 == 0 )
	diff ++;
#ifdef PARANOID
      if ( (diff >> 32) != 0 )
	{
	  EXCEPTION(EX_INTERNAL|0x207);
	}
#endif

      diff <<= 32;
      diff |= diff32;
      work32 = diff / guess32;
      work = work32;
      work <<= 32;

      diff = diff % guess32;
      diff <<= 32;
      work32 = diff / guess32;

      work |= work32;

      work >>= 1;
      work32 = work >> 32;


      guess += work32;       /* The first 64 bits */
      guess32 = work;        /* The next 32 bits */
      /* The guess should now be good to about 90 bits */
    }
  else
    {
      /* The difference is positive. */
      diff <<= 32;
      diff |= diff32;

      work32 = diff / guess32;
      work = work32;
      work <<= 32;

      diff = diff % guess32;
      diff <<= 32;
      work32 = diff / guess32;

      work |= work32;

      work >>= 1;
      work32 = work >> 32;

      guess32 = work;        /* The last 32 bits (of 96) */
      guess32 = -guess32;
      if ( guess32 )
	guess --;
      guess -= work32;       /* The first 64 bits */
      /* The guess should now be good to about 90 bits */
    }


  setexponent16(n, 0);

  if ( guess32 >= (u32) -ERR_MARGIN )
    {
      /* Nearly exact, we round the 64 bit result upward. */
      guess ++;
    }
  else if ( (guess32 > ERR_MARGIN) &&
	   ((guess32 < 0x80000000-ERR_MARGIN)
	    || (guess32 > 0x80000000+ERR_MARGIN)) )
    {
      /* We have enough accuracy to decide rounding */
      significand(n) = guess;
      return FPU_round(n, guess32, 0, control_w, sign);
    }

  if ( (guess32 <= ERR_MARGIN) || (guess32 >= (u32) -ERR_MARGIN) )
    {
      /*
       * This is an easy case because x^1/2 is monotonic.
       * We need just find the square of our estimate, compare it
       * with the argument, and deduce whether our estimate is
       * above, below, or exact. We use the fact that the estimate
       * is known to be accurate to about 90 bits.
       */


      /* We compute the lower 64 bits of the 128 bit product */
      work32 = guess;
      lowr = work32;
      lowr *= work32;

      uwork = guess >> 32;
      work32 = guess;
      uwork *= work32;
      uwork <<= 33;   /* 33 = 32+1 (for two times the product) */

      lowr += uwork;  /* We now have the 64 bits */

      /* We need only look at bits 65..96 of the square of guess. */
      if ( shifted )
	work32 = lowr >> 31;
      else
	work32 = lowr >> 32;

#ifdef PARANOID
      if ( ((s32)work32 > 3*ERR_MARGIN) || ((s32)work32 < -3*ERR_MARGIN) )
	{
	  EXCEPTION(EX_INTERNAL|0x214);
	}
#endif

      significand(n) = guess;
      if ( (s32)work32 > 0 )
	{
	  /* guess is too large */
	  significand(n) --;
	  return FPU_round(n, 0xffffff00, 0, control_w, sign);
	}
      else if ( (s32)work32 < 0 )
	{
	  /* guess is a little too small */
	  return FPU_round(n, 0x000000ff, 0, control_w, sign);
	}

      else if ( (u32)lowr != 0 )
       {

         /* guess is too large */
         significand(n) --;
         return FPU_round(n, 0xffffff00, 0, control_w, sign);
       }

      /* Our guess is precise. */
      return FPU_round(n, 0, 0, control_w, sign);
    }

  /* Very similar to the case above, but the last bit is near 0.5.
     We handle this just like the case above but we shift everything
     by one bit. */


  uwork = guess;
  uwork <<= 1;
  uwork |= 1;      /* add the half bit */

  /* We compute the lower 64 bits of the 128 bit product */
  work32 = uwork;
  lowr = work32;
  lowr *= work32;

  work32 = uwork >> 32;
  uwork &= 0xffffffff;
  uwork *= work32;
  uwork <<= 33;   /* 33 = 32+1 (for two times the product) */

  lowr += uwork;  /* We now have the 64 bits. The lowest 32 bits of lowr
                    are not all zero (the lsb is 1). */

  /* We need only look at bits 65..96 of the square of guess. */
  if ( shifted )
    work32 = lowr >> 31;
  else
    work32 = lowr >> 32;

#ifdef PARANOID
  if ( ((s32)work32 > 4*3*ERR_MARGIN) || ((s32)work32 < -4*3*ERR_MARGIN) )
    {
      EXCEPTION(EX_INTERNAL|0x215);
    }
#endif

  significand(n) = guess;
  if ( (s32)work32 < 0 )
    {
      /* guess plus half bit is a little too small */
      return FPU_round(n, 0x800000ff, 0, control_w, sign);
    }
  else /* Note that the lower 64 bits of the product are not all zero */
    {
      /* guess plus half bit is too large */
      return FPU_round(n, 0x7fffff00, 0, control_w, sign);
    }

  /*
    Note that the result of a square root cannot have precisely a half bit
    of a least significant place (it is left as an exercise for the reader
    to prove this! (hint: 65 bit*65 bit => n bits)).
  */

}