Bochs/bochs/fpu/wm_sqrt.S

	.file	"wm_sqrt.S"
/*---------------------------------------------------------------------------+
 |  wm_sqrt.S                                                                |
 |                                                                           |
 | Fixed point arithmetic square root evaluation.                            |
 |                                                                           |
 | Copyright (C) 1992,1993,1995,1997                                         |
 |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
 |                       Australia.  E-mail billm@suburbia.net               |
 |                                                                           |
 | Call from C as:                                                           |
 |    int wm_sqrt(FPU_REG *n, unsigned int control_word)                     |
 |                                                                           |
 +---------------------------------------------------------------------------*/

/*---------------------------------------------------------------------------+
 |  wm_sqrt(FPU_REG *n, unsigned int control_word)                           |
 |    returns the square root of n in n.                                     |
 |                                                                           |
 |  Use Newton's method to compute the square root of a number, which must   |
 |  be in the range  [1.0 .. 4.0),  to 64 bits accuracy.                     |
 |  Does not check the sign or tag of the argument.                          |
 |  Sets the exponent, but not the sign or tag of the result.                |
 |                                                                           |
 |  The guess is kept in %esi:%edi                                           |
 +---------------------------------------------------------------------------*/

#include "exception.h"
#include "fpu_emu.h"


#ifndef NON_REENTRANT_FPU
/*	Local storage on the stack: */
#define FPU_accum_3	-4(%ebp)	/* ms word */
#define FPU_accum_2	-8(%ebp)
#define FPU_accum_1	-12(%ebp)
#define FPU_accum_0	-16(%ebp)

/*
 * The de-normalised argument:
 *                  sq_2                  sq_1              sq_0
 *        b b b b b b b ... b b b   b b b .... b b b   b 0 0 0 ... 0
 *           ^ binary point here
 */
#define FPU_fsqrt_arg_2	-20(%ebp)	/* ms word */
#define FPU_fsqrt_arg_1	-24(%ebp)
#define FPU_fsqrt_arg_0	-28(%ebp)	/* ls word, at most the ms bit is set */

#else
/*	Local storage in a static area: */
.data
	.align 4,0
FPU_accum_3:
	.long	0		/* ms word */
FPU_accum_2:
	.long	0
FPU_accum_1:
	.long	0
FPU_accum_0:
	.long	0

/* The de-normalised argument:
                    sq_2                  sq_1              sq_0
          b b b b b b b ... b b b   b b b .... b b b   b 0 0 0 ... 0
             ^ binary point here
 */
FPU_fsqrt_arg_2:
	.long	0		/* ms word */
FPU_fsqrt_arg_1:
	.long	0
FPU_fsqrt_arg_0:
	.long	0		/* ls word, at most the ms bit is set */
#endif /* NON_REENTRANT_FPU */


.text
ENTRY(wm_sqrt)
	pushl	%ebp
	movl	%esp,%ebp
#ifndef NON_REENTRANT_FPU
	subl	$28,%esp
#endif /* NON_REENTRANT_FPU */
	pushl	%esi
	pushl	%edi
	pushl	%ebx

	movl	PARAM1,%esi

	movl	SIGH(%esi),%eax
	movl	SIGL(%esi),%ecx
	xorl	%edx,%edx

/* We use a rough linear estimate for the first guess.. */

	cmpw	EXP_BIAS,EXP(%esi)
	jnz	sqrt_arg_ge_2

	shrl	$1,%eax			/* arg is in the range  [1.0 .. 2.0) */
	rcrl	$1,%ecx
	rcrl	$1,%edx

sqrt_arg_ge_2:
/* From here on, n is never accessed directly again until it is
   replaced by the answer. */

	movl	%eax,FPU_fsqrt_arg_2		/* ms word of n */
	movl	%ecx,FPU_fsqrt_arg_1
	movl	%edx,FPU_fsqrt_arg_0

/* Make a linear first estimate */
	shrl	$1,%eax
	addl	$0x40000000,%eax
	movl	$0xaaaaaaaa,%ecx
	mull	%ecx
	shll	%edx			/* max result was 7fff... */
	testl	$0x80000000,%edx	/* but min was 3fff... */
	jnz	sqrt_prelim_no_adjust

	movl	$0x80000000,%edx	/* round up */

sqrt_prelim_no_adjust:
	movl	%edx,%esi	/* Our first guess */

/* We have now computed (approx)   (2 + x) / 3, which forms the basis
   for a few iterations of Newton's method */

	movl	FPU_fsqrt_arg_2,%ecx	/* ms word */

/*
 * From our initial estimate, three iterations are enough to get us
 * to 30 bits or so. This will then allow two iterations at better
 * precision to complete the process.
 */

/* Compute  (g + n/g)/2  at each iteration (g is the guess). */
	shrl	%ecx		/* Doing this first will prevent a divide */
				/* overflow later. */

	movl	%ecx,%edx	/* msw of the arg / 2 */
	divl	%esi		/* current estimate */
	shrl	%esi		/* divide by 2 */
	addl	%eax,%esi	/* the new estimate */

	movl	%ecx,%edx
	divl	%esi
	shrl	%esi
	addl	%eax,%esi

	movl	%ecx,%edx
	divl	%esi
	shrl	%esi
	addl	%eax,%esi

/*
 * Now that an estimate accurate to about 30 bits has been obtained (in %esi),
 * we improve it to 60 bits or so.
 *
 * The strategy from now on is to compute new estimates from
 *      guess := guess + (n - guess^2) / (2 * guess)
 */

/* First, find the square of the guess */
	movl	%esi,%eax
	mull	%esi
/* guess^2 now in %edx:%eax */

	movl	FPU_fsqrt_arg_1,%ecx
	subl	%ecx,%eax
	movl	FPU_fsqrt_arg_2,%ecx	/* ms word of normalized n */
	sbbl	%ecx,%edx
	jnc	sqrt_stage_2_positive

/* Subtraction gives a negative result,
   negate the result before division. */
	notl	%edx
	notl	%eax
	addl	$1,%eax
	adcl	$0,%edx

	divl	%esi
	movl	%eax,%ecx

	movl	%edx,%eax
	divl	%esi
	jmp	sqrt_stage_2_finish

sqrt_stage_2_positive:
	divl	%esi
	movl	%eax,%ecx

	movl	%edx,%eax
	divl	%esi

	notl	%ecx
	notl	%eax
	addl	$1,%eax
	adcl	$0,%ecx

sqrt_stage_2_finish:
	sarl	$1,%ecx		/* divide by 2 */
	rcrl	$1,%eax

	/* Form the new estimate in %esi:%edi */
	movl	%eax,%edi
	addl	%ecx,%esi

	jnz	sqrt_stage_2_done	/* result should be [1..2) */

#ifdef PARANOID
/* It should be possible to get here only if the arg is ffff....ffff */
	cmp	$0xffffffff,FPU_fsqrt_arg_1
	jnz	sqrt_stage_2_error
#endif /* PARANOID */

/* The best rounded result. */
	xorl	%eax,%eax
	decl	%eax
	movl	%eax,%edi
	movl	%eax,%esi
	movl	$0x7fffffff,%eax
	jmp	sqrt_round_result

#ifdef PARANOID
sqrt_stage_2_error:
	pushl	EX_INTERNAL|0x213
	call	EXCEPTION
#endif /* PARANOID */

sqrt_stage_2_done:

/* Now the square root has been computed to better than 60 bits. */

/* Find the square of the guess. */
	movl	%edi,%eax		/* ls word of guess */
	mull	%edi
	movl	%edx,FPU_accum_1

	movl	%esi,%eax
	mull	%esi
	movl	%edx,FPU_accum_3
	movl	%eax,FPU_accum_2

	movl	%edi,%eax
	mull	%esi
	addl	%eax,FPU_accum_1
	adcl	%edx,FPU_accum_2
	adcl	$0,FPU_accum_3

/*	movl	%esi,%eax */
/*	mull	%edi */
	addl	%eax,FPU_accum_1
	adcl	%edx,FPU_accum_2
	adcl	$0,FPU_accum_3

/* guess^2 now in FPU_accum_3:FPU_accum_2:FPU_accum_1 */

	movl	FPU_fsqrt_arg_0,%eax		/* get normalized n */
	subl	%eax,FPU_accum_1
	movl	FPU_fsqrt_arg_1,%eax
	sbbl	%eax,FPU_accum_2
	movl	FPU_fsqrt_arg_2,%eax		/* ms word of normalized n */
	sbbl	%eax,FPU_accum_3
	jnc	sqrt_stage_3_positive

/* Subtraction gives a negative result,
   negate the result before division */
	notl	FPU_accum_1
	notl	FPU_accum_2
	notl	FPU_accum_3
	addl	$1,FPU_accum_1
	adcl	$0,FPU_accum_2

#ifdef PARANOID
	adcl	$0,FPU_accum_3	/* This must be zero */
	jz	sqrt_stage_3_no_error

sqrt_stage_3_error:
	pushl	EX_INTERNAL|0x207
	call	EXCEPTION

sqrt_stage_3_no_error:
#endif /* PARANOID */

	movl	FPU_accum_2,%edx
	movl	FPU_accum_1,%eax
	divl	%esi
	movl	%eax,%ecx

	movl	%edx,%eax
	divl	%esi

	sarl	$1,%ecx		/* divide by 2 */
	rcrl	$1,%eax

	/* prepare to round the result */

	addl	%ecx,%edi
	adcl	$0,%esi

	jmp	sqrt_stage_3_finished

sqrt_stage_3_positive:
	movl	FPU_accum_2,%edx
	movl	FPU_accum_1,%eax
	divl	%esi
	movl	%eax,%ecx

	movl	%edx,%eax
	divl	%esi

	sarl	$1,%ecx		/* divide by 2 */
	rcrl	$1,%eax

	/* prepare to round the result */

	notl	%eax		/* Negate the correction term */
	notl	%ecx
	addl	$1,%eax
	adcl	$0,%ecx		/* carry here ==> correction == 0 */
	adcl	$0xffffffff,%esi

	addl	%ecx,%edi
	adcl	$0,%esi

sqrt_stage_3_finished:

/*
 * The result in %esi:%edi:%eax should be good to about 90 bits here,
 * and the rounding information here does not have sufficient accuracy
 * in a few rare cases.
 */
	cmpl	$0xffffffe0,%eax
	ja	sqrt_near_exact_x

	cmpl	$0x00000020,%eax
	jb	sqrt_near_exact

	cmpl	$0x7fffffe0,%eax
	jb	sqrt_round_result

	cmpl	$0x80000020,%eax
	jb	sqrt_get_more_precision

sqrt_round_result:
/* Set up for rounding operations */
	movl	%eax,%edx
	movl	%esi,%eax
	movl	%edi,%ebx
	movl	PARAM1,%edi
	movw	EXP_BIAS,EXP(%edi)	/* Result is in  [1.0 .. 2.0) */
	jmp	fpu_reg_round


sqrt_near_exact_x:
/* First, the estimate must be rounded up. */
	addl	$1,%edi
	adcl	$0,%esi

sqrt_near_exact:
/*
 * This is an easy case because x^1/2 is monotonic.
 * We need just find the square of our estimate, compare it
 * with the argument, and deduce whether our estimate is
 * above, below, or exact. We use the fact that the estimate
 * is known to be accurate to about 90 bits.
 */
	movl	%edi,%eax		/* ls word of guess */
	mull	%edi
	movl	%edx,%ebx		/* 2nd ls word of square */
	movl	%eax,%ecx		/* ls word of square */

	movl	%edi,%eax
	mull	%esi
	addl	%eax,%ebx
	addl	%eax,%ebx

#ifdef PARANOID
	cmp	$0xffffffb0,%ebx
	jb	sqrt_near_exact_ok

	cmp	$0x00000050,%ebx
	ja	sqrt_near_exact_ok

	pushl	EX_INTERNAL|0x214
	call	EXCEPTION

sqrt_near_exact_ok:
#endif /* PARANOID */

	or	%ebx,%ebx
	js	sqrt_near_exact_small

	jnz	sqrt_near_exact_large

	or	%ebx,%edx
	jnz	sqrt_near_exact_large

/* Our estimate is exactly the right answer */
	xorl	%eax,%eax
	jmp	sqrt_round_result

sqrt_near_exact_small:
/* Our estimate is too small */
	movl	$0x000000ff,%eax
	jmp	sqrt_round_result
	
sqrt_near_exact_large:
/* Our estimate is too large, we need to decrement it */
	subl	$1,%edi
	sbbl	$0,%esi
	movl	$0xffffff00,%eax
	jmp	sqrt_round_result


sqrt_get_more_precision:
/* This case is almost the same as the above, except we start
   with an extra bit of precision in the estimate. */
	stc			/* The extra bit. */
	rcll	$1,%edi		/* Shift the estimate left one bit */
	rcll	$1,%esi

	movl	%edi,%eax		/* ls word of guess */
	mull	%edi
	movl	%edx,%ebx		/* 2nd ls word of square */
	movl	%eax,%ecx		/* ls word of square */

	movl	%edi,%eax
	mull	%esi
	addl	%eax,%ebx
	addl	%eax,%ebx

/* Put our estimate back to its original value */
	stc			/* The ms bit. */
	rcrl	$1,%esi		/* Shift the estimate left one bit */
	rcrl	$1,%edi

#ifdef PARANOID
	cmp	$0xffffff60,%ebx
	jb	sqrt_more_prec_ok

	cmp	$0x000000a0,%ebx
	ja	sqrt_more_prec_ok

	pushl	EX_INTERNAL|0x215
	call	EXCEPTION

sqrt_more_prec_ok:
#endif /* PARANOID */

	or	%ebx,%ebx
	js	sqrt_more_prec_small

	jnz	sqrt_more_prec_large

	or	%ebx,%ecx
	jnz	sqrt_more_prec_large

/* Our estimate is exactly the right answer */
	movl	$0x80000000,%eax
	jmp	sqrt_round_result

sqrt_more_prec_small:
/* Our estimate is too small */
	movl	$0x800000ff,%eax
	jmp	sqrt_round_result
	
sqrt_more_prec_large:
/* Our estimate is too large */
	movl	$0x7fffff00,%eax
	jmp	sqrt_round_result
- entered original Bochs snapshot bochs-2000_0325a.tar.gz from ftp.bochs.com 2001-04-10 05:04:59 +04:00			`.file "wm_sqrt.S"`
			`/*---------------------------------------------------------------------------+`
			`\| wm_sqrt.S \|`
			`\| \|`
			`\| Fixed point arithmetic square root evaluation. \|`
			`\| \|`
			`\| Copyright (C) 1992,1993,1995,1997 \|`
			`\| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, \|`
			`\| Australia. E-mail billm@suburbia.net \|`
			`\| \|`
			`\| Call from C as: \|`
			`\| int wm_sqrt(FPU_REG *n, unsigned int control_word) \|`
			`\| \|`
			`+---------------------------------------------------------------------------*/`

			`/*---------------------------------------------------------------------------+`
			`\| wm_sqrt(FPU_REG *n, unsigned int control_word) \|`
			`\| returns the square root of n in n. \|`
			`\| \|`
			`\| Use Newton's method to compute the square root of a number, which must \|`
			`\| be in the range [1.0 .. 4.0), to 64 bits accuracy. \|`
			`\| Does not check the sign or tag of the argument. \|`
			`\| Sets the exponent, but not the sign or tag of the result. \|`
			`\| \|`
			`\| The guess is kept in %esi:%edi \|`
			`+---------------------------------------------------------------------------*/`

			`#include "exception.h"`
			`#include "fpu_emu.h"`


			`#ifndef NON_REENTRANT_FPU`
			`/* Local storage on the stack: */`
			`#define FPU_accum_3 -4(%ebp) /* ms word */`
			`#define FPU_accum_2 -8(%ebp)`
			`#define FPU_accum_1 -12(%ebp)`
			`#define FPU_accum_0 -16(%ebp)`

			`/*`
			`* The de-normalised argument:`
			`* sq_2 sq_1 sq_0`
			`* b b b b b b b ... b b b b b b .... b b b b 0 0 0 ... 0`
			`* ^ binary point here`
			`*/`
			`#define FPU_fsqrt_arg_2 -20(%ebp) /* ms word */`
			`#define FPU_fsqrt_arg_1 -24(%ebp)`
			`#define FPU_fsqrt_arg_0 -28(%ebp) /* ls word, at most the ms bit is set */`

			`#else`
			`/* Local storage in a static area: */`
			`.data`
			`.align 4,0`
			`FPU_accum_3:`
			`.long 0 /* ms word */`
			`FPU_accum_2:`
			`.long 0`
			`FPU_accum_1:`
			`.long 0`
			`FPU_accum_0:`
			`.long 0`

			`/* The de-normalised argument:`
			`sq_2 sq_1 sq_0`
			`b b b b b b b ... b b b b b b .... b b b b 0 0 0 ... 0`
			`^ binary point here`
			`*/`
			`FPU_fsqrt_arg_2:`
			`.long 0 /* ms word */`
			`FPU_fsqrt_arg_1:`
			`.long 0`
			`FPU_fsqrt_arg_0:`
			`.long 0 /* ls word, at most the ms bit is set */`
These changes are from "portable1" patch. - put /comments symbols/ around any chars after #endif. Other compilers do not get it. - fix cases in which a pointer is cast to a 32-bit int, then back to a pointer. This breaks on a machine with 64-bit pointers. Examples: FPU_sub arg 2 and FPU_div arg 2. The int->ptr->int conversions are now done more safely by macros REGNO2PTR and PTR2INT. - use GCC_ATTRIBUTE macro instead of __attribute__. For compilers that do not support __attribute__, the macro can be defined to be nothing. - in fpu_entry.c, arg1 of FPU_load_int32 is (s32), but the calls to it cast their data to (u32). - if compiler does NOT inline functions in poly.h, the "extern inline" setting caused duplicate symbols to be created. Changed them to "static inline" so that the mul_32_32 from different .c files do not conflict. - implemented setcc so that it doesn't use curly brackets inside parens - comment out sigcontext structure definition, which conflicts with non-linux or non-intel operating systems. It's not used by bochs anyway. 2001-04-10 05:43:09 +04:00			`#endif /* NON_REENTRANT_FPU */`
- entered original Bochs snapshot bochs-2000_0325a.tar.gz from ftp.bochs.com 2001-04-10 05:04:59 +04:00

			`.text`
			`ENTRY(wm_sqrt)`
			`pushl %ebp`
			`movl %esp,%ebp`
			`#ifndef NON_REENTRANT_FPU`
			`subl $28,%esp`
These changes are from "portable1" patch. - put /comments symbols/ around any chars after #endif. Other compilers do not get it. - fix cases in which a pointer is cast to a 32-bit int, then back to a pointer. This breaks on a machine with 64-bit pointers. Examples: FPU_sub arg 2 and FPU_div arg 2. The int->ptr->int conversions are now done more safely by macros REGNO2PTR and PTR2INT. - use GCC_ATTRIBUTE macro instead of __attribute__. For compilers that do not support __attribute__, the macro can be defined to be nothing. - in fpu_entry.c, arg1 of FPU_load_int32 is (s32), but the calls to it cast their data to (u32). - if compiler does NOT inline functions in poly.h, the "extern inline" setting caused duplicate symbols to be created. Changed them to "static inline" so that the mul_32_32 from different .c files do not conflict. - implemented setcc so that it doesn't use curly brackets inside parens - comment out sigcontext structure definition, which conflicts with non-linux or non-intel operating systems. It's not used by bochs anyway. 2001-04-10 05:43:09 +04:00			`#endif /* NON_REENTRANT_FPU */`
- entered original Bochs snapshot bochs-2000_0325a.tar.gz from ftp.bochs.com 2001-04-10 05:04:59 +04:00			`pushl %esi`
			`pushl %edi`
			`pushl %ebx`

			`movl PARAM1,%esi`

			`movl SIGH(%esi),%eax`
			`movl SIGL(%esi),%ecx`
			`xorl %edx,%edx`

			`/* We use a rough linear estimate for the first guess.. */`

			`cmpw EXP_BIAS,EXP(%esi)`
			`jnz sqrt_arg_ge_2`

			`shrl $1,%eax /* arg is in the range [1.0 .. 2.0) */`
			`rcrl $1,%ecx`
			`rcrl $1,%edx`

			`sqrt_arg_ge_2:`
			`/* From here on, n is never accessed directly again until it is`
			`replaced by the answer. */`

			`movl %eax,FPU_fsqrt_arg_2 /* ms word of n */`
			`movl %ecx,FPU_fsqrt_arg_1`
			`movl %edx,FPU_fsqrt_arg_0`

			`/* Make a linear first estimate */`
			`shrl $1,%eax`
			`addl $0x40000000,%eax`
			`movl $0xaaaaaaaa,%ecx`
			`mull %ecx`
			`shll %edx /* max result was 7fff... */`
			`testl $0x80000000,%edx /* but min was 3fff... */`
			`jnz sqrt_prelim_no_adjust`

			`movl $0x80000000,%edx /* round up */`

			`sqrt_prelim_no_adjust:`
			`movl %edx,%esi /* Our first guess */`

			`/* We have now computed (approx) (2 + x) / 3, which forms the basis`
			`for a few iterations of Newton's method */`

			`movl FPU_fsqrt_arg_2,%ecx /* ms word */`

			`/*`
			`* From our initial estimate, three iterations are enough to get us`
			`* to 30 bits or so. This will then allow two iterations at better`
			`* precision to complete the process.`
			`*/`

			`/* Compute (g + n/g)/2 at each iteration (g is the guess). */`
			`shrl %ecx /* Doing this first will prevent a divide */`
			`/* overflow later. */`

			`movl %ecx,%edx /* msw of the arg / 2 */`
			`divl %esi /* current estimate */`
			`shrl %esi /* divide by 2 */`
			`addl %eax,%esi /* the new estimate */`

			`movl %ecx,%edx`
			`divl %esi`
			`shrl %esi`
			`addl %eax,%esi`

			`movl %ecx,%edx`
			`divl %esi`
			`shrl %esi`
			`addl %eax,%esi`

			`/*`
			`* Now that an estimate accurate to about 30 bits has been obtained (in %esi),`
			`* we improve it to 60 bits or so.`
			`*`
			`* The strategy from now on is to compute new estimates from`
			`* guess := guess + (n - guess^2) / (2 * guess)`
			`*/`

			`/* First, find the square of the guess */`
			`movl %esi,%eax`
			`mull %esi`
			`/* guess^2 now in %edx:%eax */`

			`movl FPU_fsqrt_arg_1,%ecx`
			`subl %ecx,%eax`
			`movl FPU_fsqrt_arg_2,%ecx /* ms word of normalized n */`
			`sbbl %ecx,%edx`
			`jnc sqrt_stage_2_positive`

			`/* Subtraction gives a negative result,`
			`negate the result before division. */`
			`notl %edx`
			`notl %eax`
			`addl $1,%eax`
			`adcl $0,%edx`

			`divl %esi`
			`movl %eax,%ecx`

			`movl %edx,%eax`
			`divl %esi`
			`jmp sqrt_stage_2_finish`

			`sqrt_stage_2_positive:`
			`divl %esi`
			`movl %eax,%ecx`

			`movl %edx,%eax`
			`divl %esi`

			`notl %ecx`
			`notl %eax`
			`addl $1,%eax`
			`adcl $0,%ecx`

			`sqrt_stage_2_finish:`
			`sarl $1,%ecx /* divide by 2 */`
			`rcrl $1,%eax`

			`/* Form the new estimate in %esi:%edi */`
			`movl %eax,%edi`
			`addl %ecx,%esi`

			`jnz sqrt_stage_2_done /* result should be [1..2) */`

			`#ifdef PARANOID`
			`/* It should be possible to get here only if the arg is ffff....ffff */`
			`cmp $0xffffffff,FPU_fsqrt_arg_1`
			`jnz sqrt_stage_2_error`
These changes are from "portable1" patch. - put /comments symbols/ around any chars after #endif. Other compilers do not get it. - fix cases in which a pointer is cast to a 32-bit int, then back to a pointer. This breaks on a machine with 64-bit pointers. Examples: FPU_sub arg 2 and FPU_div arg 2. The int->ptr->int conversions are now done more safely by macros REGNO2PTR and PTR2INT. - use GCC_ATTRIBUTE macro instead of __attribute__. For compilers that do not support __attribute__, the macro can be defined to be nothing. - in fpu_entry.c, arg1 of FPU_load_int32 is (s32), but the calls to it cast their data to (u32). - if compiler does NOT inline functions in poly.h, the "extern inline" setting caused duplicate symbols to be created. Changed them to "static inline" so that the mul_32_32 from different .c files do not conflict. - implemented setcc so that it doesn't use curly brackets inside parens - comment out sigcontext structure definition, which conflicts with non-linux or non-intel operating systems. It's not used by bochs anyway. 2001-04-10 05:43:09 +04:00			`#endif /* PARANOID */`
- entered original Bochs snapshot bochs-2000_0325a.tar.gz from ftp.bochs.com 2001-04-10 05:04:59 +04:00
			`/* The best rounded result. */`
			`xorl %eax,%eax`
			`decl %eax`
			`movl %eax,%edi`
			`movl %eax,%esi`
			`movl $0x7fffffff,%eax`
			`jmp sqrt_round_result`

			`#ifdef PARANOID`
			`sqrt_stage_2_error:`
			`pushl EX_INTERNAL\|0x213`
			`call EXCEPTION`
These changes are from "portable1" patch. - put /comments symbols/ around any chars after #endif. Other compilers do not get it. - fix cases in which a pointer is cast to a 32-bit int, then back to a pointer. This breaks on a machine with 64-bit pointers. Examples: FPU_sub arg 2 and FPU_div arg 2. The int->ptr->int conversions are now done more safely by macros REGNO2PTR and PTR2INT. - use GCC_ATTRIBUTE macro instead of __attribute__. For compilers that do not support __attribute__, the macro can be defined to be nothing. - in fpu_entry.c, arg1 of FPU_load_int32 is (s32), but the calls to it cast their data to (u32). - if compiler does NOT inline functions in poly.h, the "extern inline" setting caused duplicate symbols to be created. Changed them to "static inline" so that the mul_32_32 from different .c files do not conflict. - implemented setcc so that it doesn't use curly brackets inside parens - comment out sigcontext structure definition, which conflicts with non-linux or non-intel operating systems. It's not used by bochs anyway. 2001-04-10 05:43:09 +04:00			`#endif /* PARANOID */`
- entered original Bochs snapshot bochs-2000_0325a.tar.gz from ftp.bochs.com 2001-04-10 05:04:59 +04:00
			`sqrt_stage_2_done:`

			`/* Now the square root has been computed to better than 60 bits. */`

			`/* Find the square of the guess. */`
			`movl %edi,%eax /* ls word of guess */`
			`mull %edi`
			`movl %edx,FPU_accum_1`

			`movl %esi,%eax`
			`mull %esi`
			`movl %edx,FPU_accum_3`
			`movl %eax,FPU_accum_2`

			`movl %edi,%eax`
			`mull %esi`
			`addl %eax,FPU_accum_1`
			`adcl %edx,FPU_accum_2`
			`adcl $0,FPU_accum_3`

			`/* movl %esi,%eax */`
			`/* mull %edi */`
			`addl %eax,FPU_accum_1`
			`adcl %edx,FPU_accum_2`
			`adcl $0,FPU_accum_3`

			`/* guess^2 now in FPU_accum_3:FPU_accum_2:FPU_accum_1 */`

			`movl FPU_fsqrt_arg_0,%eax /* get normalized n */`
			`subl %eax,FPU_accum_1`
			`movl FPU_fsqrt_arg_1,%eax`
			`sbbl %eax,FPU_accum_2`
			`movl FPU_fsqrt_arg_2,%eax /* ms word of normalized n */`
			`sbbl %eax,FPU_accum_3`
			`jnc sqrt_stage_3_positive`

			`/* Subtraction gives a negative result,`
			`negate the result before division */`
			`notl FPU_accum_1`
			`notl FPU_accum_2`
			`notl FPU_accum_3`
			`addl $1,FPU_accum_1`
			`adcl $0,FPU_accum_2`

			`#ifdef PARANOID`
			`adcl $0,FPU_accum_3 /* This must be zero */`
			`jz sqrt_stage_3_no_error`

			`sqrt_stage_3_error:`
			`pushl EX_INTERNAL\|0x207`
			`call EXCEPTION`

			`sqrt_stage_3_no_error:`
These changes are from "portable1" patch. - put /comments symbols/ around any chars after #endif. Other compilers do not get it. - fix cases in which a pointer is cast to a 32-bit int, then back to a pointer. This breaks on a machine with 64-bit pointers. Examples: FPU_sub arg 2 and FPU_div arg 2. The int->ptr->int conversions are now done more safely by macros REGNO2PTR and PTR2INT. - use GCC_ATTRIBUTE macro instead of __attribute__. For compilers that do not support __attribute__, the macro can be defined to be nothing. - in fpu_entry.c, arg1 of FPU_load_int32 is (s32), but the calls to it cast their data to (u32). - if compiler does NOT inline functions in poly.h, the "extern inline" setting caused duplicate symbols to be created. Changed them to "static inline" so that the mul_32_32 from different .c files do not conflict. - implemented setcc so that it doesn't use curly brackets inside parens - comment out sigcontext structure definition, which conflicts with non-linux or non-intel operating systems. It's not used by bochs anyway. 2001-04-10 05:43:09 +04:00			`#endif /* PARANOID */`
- entered original Bochs snapshot bochs-2000_0325a.tar.gz from ftp.bochs.com 2001-04-10 05:04:59 +04:00
			`movl FPU_accum_2,%edx`
			`movl FPU_accum_1,%eax`
			`divl %esi`
			`movl %eax,%ecx`

			`movl %edx,%eax`
			`divl %esi`

			`sarl $1,%ecx /* divide by 2 */`
			`rcrl $1,%eax`

			`/* prepare to round the result */`

			`addl %ecx,%edi`
			`adcl $0,%esi`

			`jmp sqrt_stage_3_finished`

			`sqrt_stage_3_positive:`
			`movl FPU_accum_2,%edx`
			`movl FPU_accum_1,%eax`
			`divl %esi`
			`movl %eax,%ecx`

			`movl %edx,%eax`
			`divl %esi`

			`sarl $1,%ecx /* divide by 2 */`
			`rcrl $1,%eax`

			`/* prepare to round the result */`

			`notl %eax /* Negate the correction term */`
			`notl %ecx`
			`addl $1,%eax`
			`adcl $0,%ecx /* carry here ==> correction == 0 */`
			`adcl $0xffffffff,%esi`

			`addl %ecx,%edi`
			`adcl $0,%esi`

			`sqrt_stage_3_finished:`

			`/*`
			`* The result in %esi:%edi:%eax should be good to about 90 bits here,`
			`* and the rounding information here does not have sufficient accuracy`
			`* in a few rare cases.`
			`*/`
			`cmpl $0xffffffe0,%eax`
			`ja sqrt_near_exact_x`

			`cmpl $0x00000020,%eax`
			`jb sqrt_near_exact`

			`cmpl $0x7fffffe0,%eax`
			`jb sqrt_round_result`

			`cmpl $0x80000020,%eax`
			`jb sqrt_get_more_precision`

			`sqrt_round_result:`
			`/* Set up for rounding operations */`
			`movl %eax,%edx`
			`movl %esi,%eax`
			`movl %edi,%ebx`
			`movl PARAM1,%edi`
			`movw EXP_BIAS,EXP(%edi) /* Result is in [1.0 .. 2.0) */`
			`jmp fpu_reg_round`


			`sqrt_near_exact_x:`
			`/* First, the estimate must be rounded up. */`
			`addl $1,%edi`
			`adcl $0,%esi`

			`sqrt_near_exact:`
			`/*`
			`* This is an easy case because x^1/2 is monotonic.`
			`* We need just find the square of our estimate, compare it`
			`* with the argument, and deduce whether our estimate is`
			`* above, below, or exact. We use the fact that the estimate`
			`* is known to be accurate to about 90 bits.`
			`*/`
			`movl %edi,%eax /* ls word of guess */`
			`mull %edi`
			`movl %edx,%ebx /* 2nd ls word of square */`
			`movl %eax,%ecx /* ls word of square */`

			`movl %edi,%eax`
			`mull %esi`
			`addl %eax,%ebx`
			`addl %eax,%ebx`

			`#ifdef PARANOID`
			`cmp $0xffffffb0,%ebx`
			`jb sqrt_near_exact_ok`

			`cmp $0x00000050,%ebx`
			`ja sqrt_near_exact_ok`

			`pushl EX_INTERNAL\|0x214`
			`call EXCEPTION`

			`sqrt_near_exact_ok:`
These changes are from "portable1" patch. - put /comments symbols/ around any chars after #endif. Other compilers do not get it. - fix cases in which a pointer is cast to a 32-bit int, then back to a pointer. This breaks on a machine with 64-bit pointers. Examples: FPU_sub arg 2 and FPU_div arg 2. The int->ptr->int conversions are now done more safely by macros REGNO2PTR and PTR2INT. - use GCC_ATTRIBUTE macro instead of __attribute__. For compilers that do not support __attribute__, the macro can be defined to be nothing. - in fpu_entry.c, arg1 of FPU_load_int32 is (s32), but the calls to it cast their data to (u32). - if compiler does NOT inline functions in poly.h, the "extern inline" setting caused duplicate symbols to be created. Changed them to "static inline" so that the mul_32_32 from different .c files do not conflict. - implemented setcc so that it doesn't use curly brackets inside parens - comment out sigcontext structure definition, which conflicts with non-linux or non-intel operating systems. It's not used by bochs anyway. 2001-04-10 05:43:09 +04:00			`#endif /* PARANOID */`
- entered original Bochs snapshot bochs-2000_0325a.tar.gz from ftp.bochs.com 2001-04-10 05:04:59 +04:00
			`or %ebx,%ebx`
			`js sqrt_near_exact_small`

			`jnz sqrt_near_exact_large`

			`or %ebx,%edx`
			`jnz sqrt_near_exact_large`

			`/* Our estimate is exactly the right answer */`
			`xorl %eax,%eax`
			`jmp sqrt_round_result`

			`sqrt_near_exact_small:`
			`/* Our estimate is too small */`
			`movl $0x000000ff,%eax`
			`jmp sqrt_round_result`

			`sqrt_near_exact_large:`
			`/* Our estimate is too large, we need to decrement it */`
			`subl $1,%edi`
			`sbbl $0,%esi`
			`movl $0xffffff00,%eax`
			`jmp sqrt_round_result`


			`sqrt_get_more_precision:`
			`/* This case is almost the same as the above, except we start`
			`with an extra bit of precision in the estimate. */`
			`stc /* The extra bit. */`
			`rcll $1,%edi /* Shift the estimate left one bit */`
			`rcll $1,%esi`

			`movl %edi,%eax /* ls word of guess */`
			`mull %edi`
			`movl %edx,%ebx /* 2nd ls word of square */`
			`movl %eax,%ecx /* ls word of square */`

			`movl %edi,%eax`
			`mull %esi`
			`addl %eax,%ebx`
			`addl %eax,%ebx`

			`/* Put our estimate back to its original value */`
			`stc /* The ms bit. */`
			`rcrl $1,%esi /* Shift the estimate left one bit */`
			`rcrl $1,%edi`

			`#ifdef PARANOID`
			`cmp $0xffffff60,%ebx`
			`jb sqrt_more_prec_ok`

			`cmp $0x000000a0,%ebx`
			`ja sqrt_more_prec_ok`

			`pushl EX_INTERNAL\|0x215`
			`call EXCEPTION`

			`sqrt_more_prec_ok:`
These changes are from "portable1" patch. - put /comments symbols/ around any chars after #endif. Other compilers do not get it. - fix cases in which a pointer is cast to a 32-bit int, then back to a pointer. This breaks on a machine with 64-bit pointers. Examples: FPU_sub arg 2 and FPU_div arg 2. The int->ptr->int conversions are now done more safely by macros REGNO2PTR and PTR2INT. - use GCC_ATTRIBUTE macro instead of __attribute__. For compilers that do not support __attribute__, the macro can be defined to be nothing. - in fpu_entry.c, arg1 of FPU_load_int32 is (s32), but the calls to it cast their data to (u32). - if compiler does NOT inline functions in poly.h, the "extern inline" setting caused duplicate symbols to be created. Changed them to "static inline" so that the mul_32_32 from different .c files do not conflict. - implemented setcc so that it doesn't use curly brackets inside parens - comment out sigcontext structure definition, which conflicts with non-linux or non-intel operating systems. It's not used by bochs anyway. 2001-04-10 05:43:09 +04:00			`#endif /* PARANOID */`
- entered original Bochs snapshot bochs-2000_0325a.tar.gz from ftp.bochs.com 2001-04-10 05:04:59 +04:00
			`or %ebx,%ebx`
			`js sqrt_more_prec_small`

			`jnz sqrt_more_prec_large`

			`or %ebx,%ecx`
			`jnz sqrt_more_prec_large`

			`/* Our estimate is exactly the right answer */`
			`movl $0x80000000,%eax`
			`jmp sqrt_round_result`

			`sqrt_more_prec_small:`
			`/* Our estimate is too small */`
			`movl $0x800000ff,%eax`
			`jmp sqrt_round_result`

			`sqrt_more_prec_large:`
			`/* Our estimate is too large */`
			`movl $0x7fffff00,%eax`
			`jmp sqrt_round_result`