Bochs/bochs/fpu/wm_sqrt.c

/*---------------------------------------------------------------------------+
 |  wm_sqrt.c                                                                |
 |                                                                           |
 | Fixed point arithmetic square root evaluation.                            |
 |                                                                           |
 | Copyright (C) 1992,1993,1995,1997,1999                                    |
 |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
 |                       Australia.  E-mail billm@melbpc.org.au              |
 |                                                                           |
 +---------------------------------------------------------------------------*/

/*---------------------------------------------------------------------------+
 |    returns the square root of n in n.                                     |
 |                                                                           |
 |  Use Newton's method to compute the square root of a number, which must   |
 |  be in the range  [1.0 .. 4.0),  to 64 bits accuracy.                     |
 |  Does not check the sign or tag of the argument.                          |
 |  Sets the exponent, but not the sign or tag of the result.                |
 |                                                                           |
 |  The guess is kept in %esi:%edi                                           |
 +---------------------------------------------------------------------------*/

#include "exception.h"
#include "fpu_emu.h"

/*
  The following value indicates the trailing bits (of 96 bits)
  which may be in error when the final Newton iteration is finished
  (0x20 corresponds to the last 5 bits in error, i.e. 91 bits precision).
  A check of the following code with more than 3 billion (3.0e9) random
  and selected values showed that 0x10 was always a large enough value,
  so 0x20 should be a conservative choice.
 */
#define ERR_MARGIN 0x20


int wm_sqrt(FPU_REG *n, s32 dummy1, s32 dummy2, u16 control_w, u8 sign)
{
  u64 nn, guess, halfn, lowr, mid, upr, diff, uwork;
  s64 work;
  u32 ne, guess32, work32, diff32, mid32;
  int shifted;

  nn = significand(n);
  ne = 0;
  if ( exponent16(n) == EXP_BIAS )
    {
      /* Shift the argument right one position. */
      if ( nn & 1 )
	ne = 0x80000000;
      nn >>= 1;
      guess = n->sigh >> 2;
      shifted = 1;
    }
  else
    {
      guess = n->sigh >> 1;
      shifted = 0;
    }

  guess += 0x40000000;
  guess *= 0xaaaaaaaa;
  guess <<= 1;
  guess32 = guess >> 32;
  if ( !(guess32 & 0x80000000) )
    guess32 = 0x80000000;
  halfn = nn >> 1;

  guess32 = halfn / guess32 + (guess32 >> 1);
  guess32 = halfn / guess32 + (guess32 >> 1);
  guess32 = halfn / guess32 + (guess32 >> 1);


/*
 * Now that an estimate accurate to about 30 bits has been obtained,
 * we improve it to 60 bits or so.
 *
 * The strategy from now on is to compute new estimates from
 *      guess := guess + (n - guess^2) / (2 * guess)
 */

  work = guess32;
  work = nn - work * guess32;
  work <<= 28;       /* 29 - 1 */
  work /= guess32;
  work <<= 3;        /* 29 + 3 = 32 */
  work += ((u64)guess32) << 32;

  if ( work == 0 )  /* This happens in one or two special cases */
    work = BX_CONST64(0xffffffffffffffff);

  guess = work;

  /* guess is now accurate to about 60 bits */


  if ( work > 0 )
    {
#ifdef PARANOID
      if ( (n->sigh != 0xffffffff) && (n->sigl != 0xffffffff) )
	{
	  EXCEPTION(EX_INTERNAL|0x213);
	}
#endif
      /* We know the answer here. */
      return FPU_round(n, 0x7fffffff, 0, control_w, sign);
    }

  /* Refine the guess to significantly more than 64 bits. */

  /* First, square the current guess. */

  guess32 = guess >> 32;
  work32 = guess;

  /* lower 32 times lower 32 */
  lowr = work32;
  lowr *= work32;

  /* lower 32 times upper 32 */
  mid = guess32;
  mid *= work32;

  /* upper 32 times upper 32 */
  upr = guess32;
  upr *= guess32;

  /* upper 32 bits of the middle product times 2 */
  upr += mid >> (32-1);

  /* lower 32 bits of the middle product times 2 */
  work32 = mid << 1;

  /* upper 32 bits of the lower product */
  mid32 = lowr >> 32;
  mid32 += work32;
  if ( mid32 < work32 )
    upr ++;

  /* We now have the first 96 bits (truncated) of the square of the guess */

  diff = upr - nn;
  diff32 = mid32 - ne;
  if ( diff32 > mid32 )
    diff --;

  if ( ((s64)diff) < 0 )
    {
      /* The difference is negative, negate it. */
      diff32 = -((s32)diff32);
      diff = ~diff;
      if ( diff32 == 0 )
	diff ++;
#ifdef PARANOID
      if ( (diff >> 32) != 0 )
	{
	  EXCEPTION(EX_INTERNAL|0x207);
	}
#endif

      diff <<= 32;
      diff |= diff32;
      work32 = diff / guess32;
      work = work32;
      work <<= 32;

      diff = diff % guess32;
      diff <<= 32;
      work32 = diff / guess32;

      work |= work32;

      work >>= 1;
      work32 = work >> 32;


      guess += work32;       /* The first 64 bits */
      guess32 = work;        /* The next 32 bits */
      /* The guess should now be good to about 90 bits */
    }
  else
    {
      /* The difference is positive. */
      diff <<= 32;
      diff |= diff32;

      work32 = diff / guess32;
      work = work32;
      work <<= 32;

      diff = diff % guess32;
      diff <<= 32;
      work32 = diff / guess32;

      work |= work32;

      work >>= 1;
      work32 = work >> 32;

      guess32 = work;        /* The last 32 bits (of 96) */
      guess32 = -guess32;
      if ( guess32 )
	guess --;
      guess -= work32;       /* The first 64 bits */
      /* The guess should now be good to about 90 bits */
    }


  setexponent16(n, 0);

  if ( guess32 >= (u32) -ERR_MARGIN )
    {
      /* Nearly exact, we round the 64 bit result upward. */
      guess ++;
    }
  else if ( (guess32 > ERR_MARGIN) &&
	   ((guess32 < 0x80000000-ERR_MARGIN)
	    || (guess32 > 0x80000000+ERR_MARGIN)) )
    {
      /* We have enough accuracy to decide rounding */
      significand(n) = guess;
      return FPU_round(n, guess32, 0, control_w, sign);
    }

  if ( (guess32 <= ERR_MARGIN) || (guess32 >= (u32) -ERR_MARGIN) )
    {
      /*
       * This is an easy case because x^1/2 is monotonic.
       * We need just find the square of our estimate, compare it
       * with the argument, and deduce whether our estimate is
       * above, below, or exact. We use the fact that the estimate
       * is known to be accurate to about 90 bits.
       */


      /* We compute the lower 64 bits of the 128 bit product */
      work32 = guess;
      lowr = work32;
      lowr *= work32;

      uwork = guess >> 32;
      work32 = guess;
      uwork *= work32;
      uwork <<= 33;   /* 33 = 32+1 (for two times the product) */

      lowr += uwork;  /* We now have the 64 bits */

      /* We need only look at bits 65..96 of the square of guess. */
      if ( shifted )
	work32 = lowr >> 31;
      else
	work32 = lowr >> 32;

#ifdef PARANOID
      if ( ((s32)work32 > 3*ERR_MARGIN) || ((s32)work32 < -3*ERR_MARGIN) )
	{
	  EXCEPTION(EX_INTERNAL|0x214);
	}
#endif

      significand(n) = guess;
      if ( (s32)work32 > 0 )
	{
	  /* guess is too large */
	  significand(n) --;
	  return FPU_round(n, 0xffffff00, 0, control_w, sign);
	}
      else if ( (s32)work32 < 0 )
	{
	  /* guess is a little too small */
	  return FPU_round(n, 0x000000ff, 0, control_w, sign);
	}

      else if ( (u32)lowr != 0 )
       {

         /* guess is too large */
         significand(n) --;
         return FPU_round(n, 0xffffff00, 0, control_w, sign);
       }

      /* Our guess is precise. */
      return FPU_round(n, 0, 0, control_w, sign);
    }

  /* Very similar to the case above, but the last bit is near 0.5.
     We handle this just like the case above but we shift everything
     by one bit. */


  uwork = guess;
  uwork <<= 1;
  uwork |= 1;      /* add the half bit */

  /* We compute the lower 64 bits of the 128 bit product */
  work32 = uwork;
  lowr = work32;
  lowr *= work32;

  work32 = uwork >> 32;
  uwork &= 0xffffffff;
  uwork *= work32;
  uwork <<= 33;   /* 33 = 32+1 (for two times the product) */

  lowr += uwork;  /* We now have the 64 bits. The lowest 32 bits of lowr
                    are not all zero (the lsb is 1). */

  /* We need only look at bits 65..96 of the square of guess. */
  if ( shifted )
    work32 = lowr >> 31;
  else
    work32 = lowr >> 32;

#ifdef PARANOID
  if ( ((s32)work32 > 4*3*ERR_MARGIN) || ((s32)work32 < -4*3*ERR_MARGIN) )
    {
      EXCEPTION(EX_INTERNAL|0x215);
    }
#endif

  significand(n) = guess;
  if ( (s32)work32 < 0 )
    {
      /* guess plus half bit is a little too small */
      return FPU_round(n, 0x800000ff, 0, control_w, sign);
    }
  else /* Note that the lower 64 bits of the product are not all zero */
    {
      /* guess plus half bit is too large */
      return FPU_round(n, 0x7fffff00, 0, control_w, sign);
    }

  /*
    Note that the result of a square root cannot have precisely a half bit
    of a least significant place (it is left as an exercise for the reader
    to prove this! (hint: 65 bit*65 bit => n bits)).
  */

}
- entered original Bochs snapshot bochs-2000_0325a.tar.gz from ftp.bochs.com 2001-04-10 05:04:59 +04:00			`/*---------------------------------------------------------------------------+`
			`\| wm_sqrt.c \|`
			`\| \|`
			`\| Fixed point arithmetic square root evaluation. \|`
			`\| \|`
			`\| Copyright (C) 1992,1993,1995,1997,1999 \|`
			`\| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, \|`
			`\| Australia. E-mail billm@melbpc.org.au \|`
			`\| \|`
			`+---------------------------------------------------------------------------*/`

			`/*---------------------------------------------------------------------------+`
			`\| returns the square root of n in n. \|`
			`\| \|`
			`\| Use Newton's method to compute the square root of a number, which must \|`
			`\| be in the range [1.0 .. 4.0), to 64 bits accuracy. \|`
			`\| Does not check the sign or tag of the argument. \|`
			`\| Sets the exponent, but not the sign or tag of the result. \|`
			`\| \|`
			`\| The guess is kept in %esi:%edi \|`
			`+---------------------------------------------------------------------------*/`

			`#include "exception.h"`
			`#include "fpu_emu.h"`

			`/*`
			`The following value indicates the trailing bits (of 96 bits)`
			`which may be in error when the final Newton iteration is finished`
			`(0x20 corresponds to the last 5 bits in error, i.e. 91 bits precision).`
			`A check of the following code with more than 3 billion (3.0e9) random`
			`and selected values showed that 0x10 was always a large enough value,`
			`so 0x20 should be a conservative choice.`
			`*/`
			`#define ERR_MARGIN 0x20`


			`int wm_sqrt(FPU_REG *n, s32 dummy1, s32 dummy2, u16 control_w, u8 sign)`
			`{`
			`u64 nn, guess, halfn, lowr, mid, upr, diff, uwork;`
			`s64 work;`
			`u32 ne, guess32, work32, diff32, mid32;`
			`int shifted;`

			`nn = significand(n);`
			`ne = 0;`
			`if ( exponent16(n) == EXP_BIAS )`
			`{`
			`/* Shift the argument right one position. */`
			`if ( nn & 1 )`
			`ne = 0x80000000;`
			`nn >>= 1;`
			`guess = n->sigh >> 2;`
			`shifted = 1;`
			`}`
			`else`
			`{`
			`guess = n->sigh >> 1;`
			`shifted = 0;`
			`}`

			`guess += 0x40000000;`
			`guess *= 0xaaaaaaaa;`
			`guess <<= 1;`
			`guess32 = guess >> 32;`
			`if ( !(guess32 & 0x80000000) )`
			`guess32 = 0x80000000;`
			`halfn = nn >> 1;`

			`guess32 = halfn / guess32 + (guess32 >> 1);`
			`guess32 = halfn / guess32 + (guess32 >> 1);`
			`guess32 = halfn / guess32 + (guess32 >> 1);`


			`/*`
			`* Now that an estimate accurate to about 30 bits has been obtained,`
			`* we improve it to 60 bits or so.`
			`*`
			`* The strategy from now on is to compute new estimates from`
			`* guess := guess + (n - guess^2) / (2 * guess)`
			`*/`

			`work = guess32;`
			`work = nn - work * guess32;`
			`work <<= 28; /* 29 - 1 */`
			`work /= guess32;`
			`work <<= 3; /* 29 + 3 = 32 */`
			`work += ((u64)guess32) << 32;`

			`if ( work == 0 ) /* This happens in one or two special cases */`
- Applied const64bit patch: For compilers (such as Microsoft VC++) which don't allow "LL" after a constant to make it 64-bit, this patch declares all such constants as BX_CONST64(value). Then in config.in, a switch called BX_64BIT_CONSTANTS_USE_LL controls whether the macro puts the LL's in or not. Configure sets the macro, if you're on a platform that can run such things. 2001-04-10 06:06:10 +04:00			`work = BX_CONST64(0xffffffffffffffff);`
- entered original Bochs snapshot bochs-2000_0325a.tar.gz from ftp.bochs.com 2001-04-10 05:04:59 +04:00
			`guess = work;`

			`/* guess is now accurate to about 60 bits */`


			`if ( work > 0 )`
			`{`
			`#ifdef PARANOID`
			`if ( (n->sigh != 0xffffffff) && (n->sigl != 0xffffffff) )`
			`{`
			`EXCEPTION(EX_INTERNAL\|0x213);`
			`}`
			`#endif`
			`/* We know the answer here. */`
			`return FPU_round(n, 0x7fffffff, 0, control_w, sign);`
			`}`

			`/* Refine the guess to significantly more than 64 bits. */`

			`/* First, square the current guess. */`

			`guess32 = guess >> 32;`
			`work32 = guess;`

			`/* lower 32 times lower 32 */`
			`lowr = work32;`
			`lowr *= work32;`

			`/* lower 32 times upper 32 */`
			`mid = guess32;`
			`mid *= work32;`

			`/* upper 32 times upper 32 */`
			`upr = guess32;`
			`upr *= guess32;`

			`/* upper 32 bits of the middle product times 2 */`
			`upr += mid >> (32-1);`

			`/* lower 32 bits of the middle product times 2 */`
			`work32 = mid << 1;`

			`/* upper 32 bits of the lower product */`
			`mid32 = lowr >> 32;`
			`mid32 += work32;`
			`if ( mid32 < work32 )`
			`upr ++;`

			`/* We now have the first 96 bits (truncated) of the square of the guess */`

			`diff = upr - nn;`
			`diff32 = mid32 - ne;`
			`if ( diff32 > mid32 )`
			`diff --;`

			`if ( ((s64)diff) < 0 )`
			`{`
			`/* The difference is negative, negate it. */`
			`diff32 = -((s32)diff32);`
			`diff = ~diff;`
			`if ( diff32 == 0 )`
			`diff ++;`
			`#ifdef PARANOID`
			`if ( (diff >> 32) != 0 )`
			`{`
			`EXCEPTION(EX_INTERNAL\|0x207);`
			`}`
			`#endif`

			`diff <<= 32;`
			`diff \|= diff32;`
			`work32 = diff / guess32;`
			`work = work32;`
			`work <<= 32;`

			`diff = diff % guess32;`
			`diff <<= 32;`
			`work32 = diff / guess32;`

			`work \|= work32;`

			`work >>= 1;`
			`work32 = work >> 32;`


			`guess += work32; /* The first 64 bits */`
			`guess32 = work; /* The next 32 bits */`
			`/* The guess should now be good to about 90 bits */`
			`}`
			`else`
			`{`
			`/* The difference is positive. */`
			`diff <<= 32;`
			`diff \|= diff32;`

			`work32 = diff / guess32;`
			`work = work32;`
			`work <<= 32;`

			`diff = diff % guess32;`
			`diff <<= 32;`
			`work32 = diff / guess32;`

			`work \|= work32;`

			`work >>= 1;`
			`work32 = work >> 32;`

			`guess32 = work; /* The last 32 bits (of 96) */`
			`guess32 = -guess32;`
			`if ( guess32 )`
			`guess --;`
			`guess -= work32; /* The first 64 bits */`
			`/* The guess should now be good to about 90 bits */`
			`}`


			`setexponent16(n, 0);`

			`if ( guess32 >= (u32) -ERR_MARGIN )`
			`{`
			`/* Nearly exact, we round the 64 bit result upward. */`
			`guess ++;`
			`}`
			`else if ( (guess32 > ERR_MARGIN) &&`
			`((guess32 < 0x80000000-ERR_MARGIN)`
			`\|\| (guess32 > 0x80000000+ERR_MARGIN)) )`
			`{`
			`/* We have enough accuracy to decide rounding */`
			`significand(n) = guess;`
			`return FPU_round(n, guess32, 0, control_w, sign);`
			`}`

			`if ( (guess32 <= ERR_MARGIN) \|\| (guess32 >= (u32) -ERR_MARGIN) )`
			`{`
			`/*`
			`* This is an easy case because x^1/2 is monotonic.`
			`* We need just find the square of our estimate, compare it`
			`* with the argument, and deduce whether our estimate is`
			`* above, below, or exact. We use the fact that the estimate`
			`* is known to be accurate to about 90 bits.`
			`*/`


			`/* We compute the lower 64 bits of the 128 bit product */`
			`work32 = guess;`
			`lowr = work32;`
			`lowr *= work32;`

			`uwork = guess >> 32;`
			`work32 = guess;`
			`uwork *= work32;`
			`uwork <<= 33; /* 33 = 32+1 (for two times the product) */`

			`lowr += uwork; /* We now have the 64 bits */`

			`/* We need only look at bits 65..96 of the square of guess. */`
			`if ( shifted )`
			`work32 = lowr >> 31;`
			`else`
			`work32 = lowr >> 32;`

			`#ifdef PARANOID`
			`if ( ((s32)work32 > 3ERR_MARGIN) \|\| ((s32)work32 < -3ERR_MARGIN) )`
			`{`
			`EXCEPTION(EX_INTERNAL\|0x214);`
			`}`
			`#endif`

			`significand(n) = guess;`
			`if ( (s32)work32 > 0 )`
			`{`
			`/* guess is too large */`
			`significand(n) --;`
			`return FPU_round(n, 0xffffff00, 0, control_w, sign);`
			`}`
			`else if ( (s32)work32 < 0 )`
			`{`
			`/* guess is a little too small */`
			`return FPU_round(n, 0x000000ff, 0, control_w, sign);`
			`}`

			`else if ( (u32)lowr != 0 )`
			`{`

			`/* guess is too large */`
			`significand(n) --;`
			`return FPU_round(n, 0xffffff00, 0, control_w, sign);`
			`}`

			`/* Our guess is precise. */`
			`return FPU_round(n, 0, 0, control_w, sign);`
			`}`

			`/* Very similar to the case above, but the last bit is near 0.5.`
			`We handle this just like the case above but we shift everything`
			`by one bit. */`


			`uwork = guess;`
			`uwork <<= 1;`
			`uwork \|= 1; /* add the half bit */`

			`/* We compute the lower 64 bits of the 128 bit product */`
			`work32 = uwork;`
			`lowr = work32;`
			`lowr *= work32;`

			`work32 = uwork >> 32;`
			`uwork &= 0xffffffff;`
			`uwork *= work32;`
			`uwork <<= 33; /* 33 = 32+1 (for two times the product) */`

			`lowr += uwork; /* We now have the 64 bits. The lowest 32 bits of lowr`
			`are not all zero (the lsb is 1). */`

			`/* We need only look at bits 65..96 of the square of guess. */`
			`if ( shifted )`
			`work32 = lowr >> 31;`
			`else`
			`work32 = lowr >> 32;`

			`#ifdef PARANOID`
			`if ( ((s32)work32 > 43ERR_MARGIN) \|\| ((s32)work32 < -43ERR_MARGIN) )`
			`{`
			`EXCEPTION(EX_INTERNAL\|0x215);`
			`}`
			`#endif`

			`significand(n) = guess;`
			`if ( (s32)work32 < 0 )`
			`{`
			`/* guess plus half bit is a little too small */`
			`return FPU_round(n, 0x800000ff, 0, control_w, sign);`
			`}`
			`else /* Note that the lower 64 bits of the product are not all zero */`
			`{`
			`/* guess plus half bit is too large */`
			`return FPU_round(n, 0x7fffff00, 0, control_w, sign);`
			`}`

			`/*`
			`Note that the result of a square root cannot have precisely a half bit`
			`of a least significant place (it is left as an exercise for the reader`
			`to prove this! (hint: 65 bit*65 bit => n bits)).`
			`*/`

			`}`