Remove .S files from FPU directory, they not contains a lot of bugfixes and another chnages made in .C versions
Remove empty include files
This commit is contained in:
parent
548dd3a13c
commit
87c91a9d72
@ -1,365 +0,0 @@
|
||||
.file "div_Xsig.S"
|
||||
/*---------------------------------------------------------------------------+
|
||||
| div_Xsig.S |
|
||||
| |
|
||||
| Division subroutine for 96 bit quantities |
|
||||
| |
|
||||
| Copyright (C) 1994,1995 |
|
||||
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
|
||||
| Australia. E-mail billm@jacobi.maths.monash.edu.au |
|
||||
| |
|
||||
| |
|
||||
+---------------------------------------------------------------------------*/
|
||||
|
||||
/*---------------------------------------------------------------------------+
|
||||
| Divide the 96 bit quantity pointed to by a, by that pointed to by b, and |
|
||||
| put the 96 bit result at the location d. |
|
||||
| |
|
||||
| The result may not be accurate to 96 bits. It is intended for use where |
|
||||
| a result better than 64 bits is required. The result should usually be |
|
||||
| good to at least 94 bits. |
|
||||
| The returned result is actually divided by one half. This is done to |
|
||||
| prevent overflow. |
|
||||
| |
|
||||
| .aaaaaaaaaaaaaa / .bbbbbbbbbbbbb -> .dddddddddddd |
|
||||
| |
|
||||
| void div_Xsig(Xsig *a, Xsig *b, Xsig *dest) |
|
||||
| |
|
||||
+---------------------------------------------------------------------------*/
|
||||
|
||||
#include "exception.h"
|
||||
#include "fpu_emu.h"
|
||||
|
||||
|
||||
#define XsigLL(x) (x)
|
||||
#define XsigL(x) 4(x)
|
||||
#define XsigH(x) 8(x)
|
||||
|
||||
|
||||
#ifndef NON_REENTRANT_FPU
|
||||
/*
|
||||
Local storage on the stack:
|
||||
Accumulator: FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0
|
||||
*/
|
||||
#define FPU_accum_3 -4(%ebp)
|
||||
#define FPU_accum_2 -8(%ebp)
|
||||
#define FPU_accum_1 -12(%ebp)
|
||||
#define FPU_accum_0 -16(%ebp)
|
||||
#define FPU_result_3 -20(%ebp)
|
||||
#define FPU_result_2 -24(%ebp)
|
||||
#define FPU_result_1 -28(%ebp)
|
||||
|
||||
#else
|
||||
.data
|
||||
/*
|
||||
Local storage in a static area:
|
||||
Accumulator: FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0
|
||||
*/
|
||||
.align 4,0
|
||||
FPU_accum_3:
|
||||
.long 0
|
||||
FPU_accum_2:
|
||||
.long 0
|
||||
FPU_accum_1:
|
||||
.long 0
|
||||
FPU_accum_0:
|
||||
.long 0
|
||||
FPU_result_3:
|
||||
.long 0
|
||||
FPU_result_2:
|
||||
.long 0
|
||||
FPU_result_1:
|
||||
.long 0
|
||||
#endif /* NON_REENTRANT_FPU */
|
||||
|
||||
|
||||
.text
|
||||
ENTRY(div_Xsig)
|
||||
pushl %ebp
|
||||
movl %esp,%ebp
|
||||
#ifndef NON_REENTRANT_FPU
|
||||
subl $28,%esp
|
||||
#endif /* NON_REENTRANT_FPU */
|
||||
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
pushl %ebx
|
||||
|
||||
movl PARAM1,%esi /* pointer to num */
|
||||
movl PARAM2,%ebx /* pointer to denom */
|
||||
|
||||
#ifdef PARANOID
|
||||
testl $0x80000000, XsigH(%ebx) /* Divisor */
|
||||
je L_bugged
|
||||
#endif /* PARANOID */
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------+
|
||||
| Divide: Return arg1/arg2 to arg3. |
|
||||
| |
|
||||
| The maximum returned value is (ignoring exponents) |
|
||||
| .ffffffff ffffffff |
|
||||
| ------------------ = 1.ffffffff fffffffe |
|
||||
| .80000000 00000000 |
|
||||
| and the minimum is |
|
||||
| .80000000 00000000 |
|
||||
| ------------------ = .80000000 00000001 (rounded) |
|
||||
| .ffffffff ffffffff |
|
||||
| |
|
||||
+---------------------------------------------------------------------------*/
|
||||
|
||||
/* Save extended dividend in local register */
|
||||
|
||||
/* Divide by 2 to prevent overflow */
|
||||
clc
|
||||
movl XsigH(%esi),%eax
|
||||
rcrl %eax
|
||||
movl %eax,FPU_accum_3
|
||||
movl XsigL(%esi),%eax
|
||||
rcrl %eax
|
||||
movl %eax,FPU_accum_2
|
||||
movl XsigLL(%esi),%eax
|
||||
rcrl %eax
|
||||
movl %eax,FPU_accum_1
|
||||
movl $0,%eax
|
||||
rcrl %eax
|
||||
movl %eax,FPU_accum_0
|
||||
|
||||
movl FPU_accum_2,%eax /* Get the current num */
|
||||
movl FPU_accum_3,%edx
|
||||
|
||||
/*----------------------------------------------------------------------*/
|
||||
/* Initialization done.
|
||||
Do the first 32 bits. */
|
||||
|
||||
/* We will divide by a number which is too large */
|
||||
movl XsigH(%ebx),%ecx
|
||||
addl $1,%ecx
|
||||
jnc LFirst_div_not_1
|
||||
|
||||
/* here we need to divide by 100000000h,
|
||||
i.e., no division at all.. */
|
||||
mov %edx,%eax
|
||||
jmp LFirst_div_done
|
||||
|
||||
LFirst_div_not_1:
|
||||
divl %ecx /* Divide the numerator by the augmented
|
||||
denom ms dw */
|
||||
|
||||
LFirst_div_done:
|
||||
movl %eax,FPU_result_3 /* Put the result in the answer */
|
||||
|
||||
mull XsigH(%ebx) /* mul by the ms dw of the denom */
|
||||
|
||||
subl %eax,FPU_accum_2 /* Subtract from the num local reg */
|
||||
sbbl %edx,FPU_accum_3
|
||||
|
||||
movl FPU_result_3,%eax /* Get the result back */
|
||||
mull XsigL(%ebx) /* now mul the ls dw of the denom */
|
||||
|
||||
subl %eax,FPU_accum_1 /* Subtract from the num local reg */
|
||||
sbbl %edx,FPU_accum_2
|
||||
sbbl $0,FPU_accum_3
|
||||
je LDo_2nd_32_bits /* Must check for non-zero result here */
|
||||
|
||||
#ifdef PARANOID
|
||||
jb L_bugged_1
|
||||
#endif /* PARANOID */
|
||||
|
||||
/* need to subtract another once of the denom */
|
||||
incl FPU_result_3 /* Correct the answer */
|
||||
|
||||
movl XsigL(%ebx),%eax
|
||||
movl XsigH(%ebx),%edx
|
||||
subl %eax,FPU_accum_1 /* Subtract from the num local reg */
|
||||
sbbl %edx,FPU_accum_2
|
||||
|
||||
#ifdef PARANOID
|
||||
sbbl $0,FPU_accum_3
|
||||
jne L_bugged_1 /* Must check for non-zero result here */
|
||||
#endif /* PARANOID */
|
||||
|
||||
/*----------------------------------------------------------------------*/
|
||||
/* Half of the main problem is done, there is just a reduced numerator
|
||||
to handle now.
|
||||
Work with the second 32 bits, FPU_accum_0 not used from now on */
|
||||
LDo_2nd_32_bits:
|
||||
movl FPU_accum_2,%edx /* get the reduced num */
|
||||
movl FPU_accum_1,%eax
|
||||
|
||||
/* need to check for possible subsequent overflow */
|
||||
cmpl XsigH(%ebx),%edx
|
||||
jb LDo_2nd_div
|
||||
ja LPrevent_2nd_overflow
|
||||
|
||||
cmpl XsigL(%ebx),%eax
|
||||
jb LDo_2nd_div
|
||||
|
||||
LPrevent_2nd_overflow:
|
||||
/* The numerator is greater or equal, would cause overflow */
|
||||
/* prevent overflow */
|
||||
subl XsigL(%ebx),%eax
|
||||
sbbl XsigH(%ebx),%edx
|
||||
movl %edx,FPU_accum_2
|
||||
movl %eax,FPU_accum_1
|
||||
|
||||
incl FPU_result_3 /* Reflect the subtraction in the answer */
|
||||
|
||||
#ifdef PARANOID
|
||||
je L_bugged_2 /* Can't bump the result to 1.0 */
|
||||
#endif /* PARANOID */
|
||||
|
||||
LDo_2nd_div:
|
||||
cmpl $0,%ecx /* augmented denom msw */
|
||||
jnz LSecond_div_not_1
|
||||
|
||||
/* %ecx == 0, we are dividing by 1.0 */
|
||||
mov %edx,%eax
|
||||
jmp LSecond_div_done
|
||||
|
||||
LSecond_div_not_1:
|
||||
divl %ecx /* Divide the numerator by the denom ms dw */
|
||||
|
||||
LSecond_div_done:
|
||||
movl %eax,FPU_result_2 /* Put the result in the answer */
|
||||
|
||||
mull XsigH(%ebx) /* mul by the ms dw of the denom */
|
||||
|
||||
subl %eax,FPU_accum_1 /* Subtract from the num local reg */
|
||||
sbbl %edx,FPU_accum_2
|
||||
|
||||
#ifdef PARANOID
|
||||
jc L_bugged_2
|
||||
#endif /* PARANOID */
|
||||
|
||||
movl FPU_result_2,%eax /* Get the result back */
|
||||
mull XsigL(%ebx) /* now mul the ls dw of the denom */
|
||||
|
||||
subl %eax,FPU_accum_0 /* Subtract from the num local reg */
|
||||
sbbl %edx,FPU_accum_1 /* Subtract from the num local reg */
|
||||
sbbl $0,FPU_accum_2
|
||||
|
||||
#ifdef PARANOID
|
||||
jc L_bugged_2
|
||||
#endif /* PARANOID */
|
||||
|
||||
jz LDo_3rd_32_bits
|
||||
|
||||
#ifdef PARANOID
|
||||
cmpl $1,FPU_accum_2
|
||||
jne L_bugged_2
|
||||
#endif /* PARANOID */
|
||||
|
||||
/* need to subtract another once of the denom */
|
||||
movl XsigL(%ebx),%eax
|
||||
movl XsigH(%ebx),%edx
|
||||
subl %eax,FPU_accum_0 /* Subtract from the num local reg */
|
||||
sbbl %edx,FPU_accum_1
|
||||
sbbl $0,FPU_accum_2
|
||||
|
||||
#ifdef PARANOID
|
||||
jc L_bugged_2
|
||||
jne L_bugged_2
|
||||
#endif /* PARANOID */
|
||||
|
||||
addl $1,FPU_result_2 /* Correct the answer */
|
||||
adcl $0,FPU_result_3
|
||||
|
||||
#ifdef PARANOID
|
||||
jc L_bugged_2 /* Must check for non-zero result here */
|
||||
#endif /* PARANOID */
|
||||
|
||||
/*----------------------------------------------------------------------*/
|
||||
/* The division is essentially finished here, we just need to perform
|
||||
tidying operations.
|
||||
Deal with the 3rd 32 bits */
|
||||
LDo_3rd_32_bits:
|
||||
/* We use an approximation for the third 32 bits.
|
||||
To take account of the 3rd 32 bits of the divisor
|
||||
(call them del), we subtract del * (a/b) */
|
||||
|
||||
movl FPU_result_3,%eax /* a/b */
|
||||
mull XsigLL(%ebx) /* del */
|
||||
|
||||
subl %edx,FPU_accum_1
|
||||
|
||||
/* A borrow indicates that the result is negative */
|
||||
jnb LTest_over
|
||||
|
||||
movl XsigH(%ebx),%edx
|
||||
addl %edx,FPU_accum_1
|
||||
|
||||
subl $1,FPU_result_2 /* Adjust the answer */
|
||||
sbbl $0,FPU_result_3
|
||||
|
||||
/* The above addition might not have been enough, check again. */
|
||||
movl FPU_accum_1,%edx /* get the reduced num */
|
||||
cmpl XsigH(%ebx),%edx /* denom */
|
||||
jb LDo_3rd_div
|
||||
|
||||
movl XsigH(%ebx),%edx
|
||||
addl %edx,FPU_accum_1
|
||||
|
||||
subl $1,FPU_result_2 /* Adjust the answer */
|
||||
sbbl $0,FPU_result_3
|
||||
jmp LDo_3rd_div
|
||||
|
||||
LTest_over:
|
||||
movl FPU_accum_1,%edx /* get the reduced num */
|
||||
|
||||
/* need to check for possible subsequent overflow */
|
||||
cmpl XsigH(%ebx),%edx /* denom */
|
||||
jb LDo_3rd_div
|
||||
|
||||
/* prevent overflow */
|
||||
subl XsigH(%ebx),%edx
|
||||
movl %edx,FPU_accum_1
|
||||
|
||||
addl $1,FPU_result_2 /* Reflect the subtraction in the answer */
|
||||
adcl $0,FPU_result_3
|
||||
|
||||
LDo_3rd_div:
|
||||
movl FPU_accum_0,%eax
|
||||
movl FPU_accum_1,%edx
|
||||
divl XsigH(%ebx)
|
||||
|
||||
movl %eax,FPU_result_1 /* Rough estimate of third word */
|
||||
|
||||
movl PARAM3,%esi /* pointer to answer */
|
||||
|
||||
movl FPU_result_1,%eax
|
||||
movl %eax,XsigLL(%esi)
|
||||
movl FPU_result_2,%eax
|
||||
movl %eax,XsigL(%esi)
|
||||
movl FPU_result_3,%eax
|
||||
movl %eax,XsigH(%esi)
|
||||
|
||||
L_exit:
|
||||
popl %ebx
|
||||
popl %edi
|
||||
popl %esi
|
||||
|
||||
leave
|
||||
ret
|
||||
|
||||
|
||||
#ifdef PARANOID
|
||||
/* The logic is wrong if we got here */
|
||||
L_bugged:
|
||||
pushl EX_INTERNAL|0x240
|
||||
call EXCEPTION
|
||||
pop %ebx
|
||||
jmp L_exit
|
||||
|
||||
L_bugged_1:
|
||||
pushl EX_INTERNAL|0x241
|
||||
call EXCEPTION
|
||||
pop %ebx
|
||||
jmp L_exit
|
||||
|
||||
L_bugged_2:
|
||||
pushl EX_INTERNAL|0x242
|
||||
call EXCEPTION
|
||||
pop %ebx
|
||||
jmp L_exit
|
||||
#endif /* PARANOID */
|
@ -1,47 +0,0 @@
|
||||
.file "div_small.S"
|
||||
/*---------------------------------------------------------------------------+
|
||||
| div_small.S |
|
||||
| |
|
||||
| Divide a 64 bit integer by a 32 bit integer & return remainder. |
|
||||
| |
|
||||
| Copyright (C) 1992,1995 |
|
||||
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
|
||||
| Australia. E-mail billm@jacobi.maths.monash.edu.au |
|
||||
| |
|
||||
| |
|
||||
+---------------------------------------------------------------------------*/
|
||||
|
||||
/*---------------------------------------------------------------------------+
|
||||
| unsigned long FPU_div_small(unsigned long long *x, unsigned long y) |
|
||||
+---------------------------------------------------------------------------*/
|
||||
|
||||
#include "fpu_emu.h"
|
||||
|
||||
.text
|
||||
ENTRY(FPU_div_small)
|
||||
pushl %ebp
|
||||
movl %esp,%ebp
|
||||
|
||||
pushl %esi
|
||||
|
||||
movl PARAM1,%esi /* pointer to num */
|
||||
movl PARAM2,%ecx /* The denominator */
|
||||
|
||||
movl 4(%esi),%eax /* Get the current num msw */
|
||||
xorl %edx,%edx
|
||||
divl %ecx
|
||||
|
||||
movl %eax,4(%esi)
|
||||
|
||||
movl (%esi),%eax /* Get the num lsw */
|
||||
divl %ecx
|
||||
|
||||
movl %eax,(%esi)
|
||||
|
||||
movl %edx,%eax /* Return the remainder in eax */
|
||||
|
||||
popl %esi
|
||||
|
||||
leave
|
||||
ret
|
||||
|
@ -1,176 +0,0 @@
|
||||
/*---------------------------------------------------------------------------+
|
||||
| mul_Xsig.S |
|
||||
| |
|
||||
| Multiply a 12 byte fixed point number by another fixed point number. |
|
||||
| |
|
||||
| Copyright (C) 1992,1994,1995 |
|
||||
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
|
||||
| Australia. E-mail billm@jacobi.maths.monash.edu.au |
|
||||
| |
|
||||
| Call from C as: |
|
||||
| void mul32_Xsig(Xsig *x, unsigned b) |
|
||||
| |
|
||||
| void mul64_Xsig(Xsig *x, unsigned long long *b) |
|
||||
| |
|
||||
| void mul_Xsig_Xsig(Xsig *x, unsigned *b) |
|
||||
| |
|
||||
| The result is neither rounded nor normalized, and the ls bit or so may |
|
||||
| be wrong. |
|
||||
| |
|
||||
+---------------------------------------------------------------------------*/
|
||||
.file "mul_Xsig.S"
|
||||
|
||||
|
||||
#include "fpu_emu.h"
|
||||
|
||||
.text
|
||||
ENTRY(mul32_Xsig)
|
||||
pushl %ebp
|
||||
movl %esp,%ebp
|
||||
subl $16,%esp
|
||||
pushl %esi
|
||||
|
||||
movl PARAM1,%esi
|
||||
movl PARAM2,%ecx
|
||||
|
||||
xor %eax,%eax
|
||||
movl %eax,-4(%ebp)
|
||||
movl %eax,-8(%ebp)
|
||||
|
||||
movl (%esi),%eax /* lsl of Xsig */
|
||||
mull %ecx /* msl of b */
|
||||
movl %edx,-12(%ebp)
|
||||
|
||||
movl 4(%esi),%eax /* midl of Xsig */
|
||||
mull %ecx /* msl of b */
|
||||
addl %eax,-12(%ebp)
|
||||
adcl %edx,-8(%ebp)
|
||||
adcl $0,-4(%ebp)
|
||||
|
||||
movl 8(%esi),%eax /* msl of Xsig */
|
||||
mull %ecx /* msl of b */
|
||||
addl %eax,-8(%ebp)
|
||||
adcl %edx,-4(%ebp)
|
||||
|
||||
movl -12(%ebp),%eax
|
||||
movl %eax,(%esi)
|
||||
movl -8(%ebp),%eax
|
||||
movl %eax,4(%esi)
|
||||
movl -4(%ebp),%eax
|
||||
movl %eax,8(%esi)
|
||||
|
||||
popl %esi
|
||||
leave
|
||||
ret
|
||||
|
||||
|
||||
ENTRY(mul64_Xsig)
|
||||
pushl %ebp
|
||||
movl %esp,%ebp
|
||||
subl $16,%esp
|
||||
pushl %esi
|
||||
|
||||
movl PARAM1,%esi
|
||||
movl PARAM2,%ecx
|
||||
|
||||
xor %eax,%eax
|
||||
movl %eax,-4(%ebp)
|
||||
movl %eax,-8(%ebp)
|
||||
|
||||
movl (%esi),%eax /* lsl of Xsig */
|
||||
mull 4(%ecx) /* msl of b */
|
||||
movl %edx,-12(%ebp)
|
||||
|
||||
movl 4(%esi),%eax /* midl of Xsig */
|
||||
mull (%ecx) /* lsl of b */
|
||||
addl %edx,-12(%ebp)
|
||||
adcl $0,-8(%ebp)
|
||||
adcl $0,-4(%ebp)
|
||||
|
||||
movl 4(%esi),%eax /* midl of Xsig */
|
||||
mull 4(%ecx) /* msl of b */
|
||||
addl %eax,-12(%ebp)
|
||||
adcl %edx,-8(%ebp)
|
||||
adcl $0,-4(%ebp)
|
||||
|
||||
movl 8(%esi),%eax /* msl of Xsig */
|
||||
mull (%ecx) /* lsl of b */
|
||||
addl %eax,-12(%ebp)
|
||||
adcl %edx,-8(%ebp)
|
||||
adcl $0,-4(%ebp)
|
||||
|
||||
movl 8(%esi),%eax /* msl of Xsig */
|
||||
mull 4(%ecx) /* msl of b */
|
||||
addl %eax,-8(%ebp)
|
||||
adcl %edx,-4(%ebp)
|
||||
|
||||
movl -12(%ebp),%eax
|
||||
movl %eax,(%esi)
|
||||
movl -8(%ebp),%eax
|
||||
movl %eax,4(%esi)
|
||||
movl -4(%ebp),%eax
|
||||
movl %eax,8(%esi)
|
||||
|
||||
popl %esi
|
||||
leave
|
||||
ret
|
||||
|
||||
|
||||
|
||||
ENTRY(mul_Xsig_Xsig)
|
||||
pushl %ebp
|
||||
movl %esp,%ebp
|
||||
subl $16,%esp
|
||||
pushl %esi
|
||||
|
||||
movl PARAM1,%esi
|
||||
movl PARAM2,%ecx
|
||||
|
||||
xor %eax,%eax
|
||||
movl %eax,-4(%ebp)
|
||||
movl %eax,-8(%ebp)
|
||||
|
||||
movl (%esi),%eax /* lsl of Xsig */
|
||||
mull 8(%ecx) /* msl of b */
|
||||
movl %edx,-12(%ebp)
|
||||
|
||||
movl 4(%esi),%eax /* midl of Xsig */
|
||||
mull 4(%ecx) /* midl of b */
|
||||
addl %edx,-12(%ebp)
|
||||
adcl $0,-8(%ebp)
|
||||
adcl $0,-4(%ebp)
|
||||
|
||||
movl 8(%esi),%eax /* msl of Xsig */
|
||||
mull (%ecx) /* lsl of b */
|
||||
addl %edx,-12(%ebp)
|
||||
adcl $0,-8(%ebp)
|
||||
adcl $0,-4(%ebp)
|
||||
|
||||
movl 4(%esi),%eax /* midl of Xsig */
|
||||
mull 8(%ecx) /* msl of b */
|
||||
addl %eax,-12(%ebp)
|
||||
adcl %edx,-8(%ebp)
|
||||
adcl $0,-4(%ebp)
|
||||
|
||||
movl 8(%esi),%eax /* msl of Xsig */
|
||||
mull 4(%ecx) /* midl of b */
|
||||
addl %eax,-12(%ebp)
|
||||
adcl %edx,-8(%ebp)
|
||||
adcl $0,-4(%ebp)
|
||||
|
||||
movl 8(%esi),%eax /* msl of Xsig */
|
||||
mull 8(%ecx) /* msl of b */
|
||||
addl %eax,-8(%ebp)
|
||||
adcl %edx,-4(%ebp)
|
||||
|
||||
movl -12(%ebp),%edx
|
||||
movl %edx,(%esi)
|
||||
movl -8(%ebp),%edx
|
||||
movl %edx,4(%esi)
|
||||
movl -4(%ebp),%edx
|
||||
movl %edx,8(%esi)
|
||||
|
||||
popl %esi
|
||||
leave
|
||||
ret
|
||||
|
@ -1,142 +0,0 @@
|
||||
/*---------------------------------------------------------------------------+
|
||||
| polynomial_Xsig.S |
|
||||
| |
|
||||
| Fixed point arithmetic polynomial evaluation. |
|
||||
| |
|
||||
| Copyright (C) 1992,1993,1994,1995,1999 |
|
||||
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
|
||||
| Australia. E-mail billm@melbpc.org.au |
|
||||
| |
|
||||
| Call from C as: |
|
||||
| void polynomial_Xsig(Xsig *accum, unsigned long long *x, |
|
||||
| unsigned long long terms[], int n) |
|
||||
| |
|
||||
| Computes: |
|
||||
| terms[0] + (terms[1] + (terms[2] + ... + (terms[n]*x)*x)*x)*x) ... )*x |
|
||||
| and adds the result to the 12 byte Xsig. |
|
||||
| The terms[] are each 8 bytes, but all computation is performed to 12 byte |
|
||||
| precision. |
|
||||
| |
|
||||
| This function must be used carefully: most overflow of intermediate |
|
||||
| results is controlled, but overflow of the result is not. |
|
||||
| |
|
||||
+---------------------------------------------------------------------------*/
|
||||
.file "polynomial_Xsig.S"
|
||||
|
||||
#include "fpu_emu.h"
|
||||
|
||||
|
||||
#define TERM_SIZE $8
|
||||
#define SUM_MS -20(%ebp) /* sum ms long */
|
||||
#define SUM_MIDDLE -24(%ebp) /* sum middle long */
|
||||
#define SUM_LS -28(%ebp) /* sum ls long */
|
||||
#define ACCUM_MS -4(%ebp) /* accum ms long */
|
||||
#define ACCUM_MIDDLE -8(%ebp) /* accum middle long */
|
||||
#define ACCUM_LS -12(%ebp) /* accum ls long */
|
||||
#define OVERFLOWED -16(%ebp) /* addition overflow flag */
|
||||
|
||||
.text
|
||||
ENTRY(polynomial_Xsig)
|
||||
pushl %ebp
|
||||
movl %esp,%ebp
|
||||
subl $32,%esp
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
pushl %ebx
|
||||
|
||||
movl PARAM2,%esi /* x */
|
||||
movl PARAM3,%edi /* terms */
|
||||
|
||||
movl TERM_SIZE,%eax
|
||||
mull PARAM4 /* n */
|
||||
addl %eax,%edi
|
||||
|
||||
movl 4(%edi),%edx /* terms[n] */
|
||||
movl %edx,SUM_MS
|
||||
movl (%edi),%edx /* terms[n] */
|
||||
movl %edx,SUM_MIDDLE
|
||||
xor %eax,%eax
|
||||
movl %eax,SUM_LS
|
||||
movb %al,OVERFLOWED
|
||||
|
||||
subl TERM_SIZE,%edi
|
||||
decl PARAM4
|
||||
js L_accum_done
|
||||
|
||||
L_accum_loop:
|
||||
xor %eax,%eax
|
||||
movl %eax,ACCUM_MS
|
||||
movl %eax,ACCUM_MIDDLE
|
||||
|
||||
movl SUM_MIDDLE,%eax
|
||||
mull (%esi) /* x ls long */
|
||||
movl %edx,ACCUM_LS
|
||||
|
||||
movl SUM_LS,%eax
|
||||
mull 4(%esi) /* x ms long */
|
||||
addl %edx,ACCUM_LS
|
||||
adcl $0,ACCUM_MIDDLE
|
||||
adcl $0,ACCUM_MS
|
||||
|
||||
movl SUM_MIDDLE,%eax
|
||||
mull 4(%esi) /* x ms long */
|
||||
addl %eax,ACCUM_LS
|
||||
adcl %edx,ACCUM_MIDDLE
|
||||
adcl $0,ACCUM_MS
|
||||
|
||||
movl SUM_MS,%eax
|
||||
mull (%esi) /* x ls long */
|
||||
addl %eax,ACCUM_LS
|
||||
adcl %edx,ACCUM_MIDDLE
|
||||
adcl $0,ACCUM_MS
|
||||
|
||||
movl SUM_MS,%eax
|
||||
mull 4(%esi) /* x ms long */
|
||||
addl %eax,ACCUM_MIDDLE
|
||||
adcl %edx,ACCUM_MS
|
||||
|
||||
testb $0xff,OVERFLOWED
|
||||
jz L_no_overflow
|
||||
|
||||
movl (%esi),%eax
|
||||
addl %eax,ACCUM_MIDDLE
|
||||
movl 4(%esi),%eax
|
||||
adcl %eax,ACCUM_MS /* This could overflow too */
|
||||
|
||||
L_no_overflow:
|
||||
|
||||
/*
|
||||
* Now put the sum of next term and the accumulator
|
||||
* into the sum register
|
||||
*/
|
||||
movl ACCUM_LS,%eax
|
||||
// addl (%edi),%eax /* term ls long */
|
||||
movl %eax,SUM_LS
|
||||
movl ACCUM_MIDDLE,%eax
|
||||
// adcl (%edi),%eax /* term ls long */
|
||||
addl (%edi),%eax /* term ls long */
|
||||
movl %eax,SUM_MIDDLE
|
||||
movl ACCUM_MS,%eax
|
||||
adcl 4(%edi),%eax /* term ms long */
|
||||
movl %eax,SUM_MS
|
||||
sbbb %al,%al
|
||||
movb %al,OVERFLOWED /* Used in the next iteration */
|
||||
|
||||
subl TERM_SIZE,%edi
|
||||
decl PARAM4
|
||||
jns L_accum_loop
|
||||
|
||||
L_accum_done:
|
||||
movl PARAM1,%edi /* accum */
|
||||
movl SUM_LS,%eax
|
||||
addl %eax,(%edi)
|
||||
movl SUM_MIDDLE,%eax
|
||||
adcl %eax,4(%edi)
|
||||
movl SUM_MS,%eax
|
||||
adcl %eax,8(%edi)
|
||||
|
||||
popl %ebx
|
||||
popl %edi
|
||||
popl %esi
|
||||
leave
|
||||
ret
|
@ -1,71 +0,0 @@
|
||||
/*---------------------------------------------------------------------------+
|
||||
| reg_norm.S |
|
||||
| |
|
||||
| Copyright (C) 1992,1993,1994,1995,1997,1999 |
|
||||
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
|
||||
| Australia. E-mail billm@melbpc.org.au |
|
||||
| |
|
||||
| Normalize the value in a FPU_REG. |
|
||||
| |
|
||||
| Call from C as: |
|
||||
| int FPU_normalize_nuo(FPU_REG *n, int bias) |
|
||||
| |
|
||||
| Return value is the tag of the answer. |
|
||||
| |
|
||||
+---------------------------------------------------------------------------*/
|
||||
|
||||
#include "fpu_emu.h"
|
||||
|
||||
|
||||
/* Normalise without reporting underflow or overflow */
|
||||
ENTRY(FPU_normalize_nuo)
|
||||
pushl %ebp
|
||||
movl %esp,%ebp
|
||||
pushl %ebx
|
||||
|
||||
movl PARAM1,%ebx
|
||||
|
||||
movl SIGH(%ebx),%edx
|
||||
movl SIGL(%ebx),%eax
|
||||
|
||||
orl %edx,%edx /* ms bits */
|
||||
js L_exit_nuo_valid /* Already normalized */
|
||||
jnz L_nuo_shift_1 /* Shift left 1 - 31 bits */
|
||||
|
||||
orl %eax,%eax
|
||||
jz L_exit_nuo_zero /* The contents are zero */
|
||||
|
||||
movl %eax,%edx
|
||||
xorl %eax,%eax
|
||||
subw $32,EXP(%ebx) /* This can cause an underflow */
|
||||
|
||||
/* We need to shift left by 1 - 31 bits */
|
||||
L_nuo_shift_1:
|
||||
bsrl %edx,%ecx /* get the required shift in %ecx */
|
||||
subl $31,%ecx
|
||||
negl %ecx
|
||||
shld %cl,%eax,%edx
|
||||
shl %cl,%eax
|
||||
subw %cx,EXP(%ebx) /* This can cause an underflow */
|
||||
|
||||
movl %edx,SIGH(%ebx)
|
||||
movl %eax,SIGL(%ebx)
|
||||
|
||||
L_exit_nuo_valid:
|
||||
movl PARAM2,%eax
|
||||
addw %ax,EXP(%ebx)
|
||||
movl TAG_Valid,%eax
|
||||
|
||||
popl %ebx
|
||||
leave
|
||||
ret
|
||||
|
||||
L_exit_nuo_zero:
|
||||
movw EXP_UNDER,EXP(%ebx)
|
||||
movl PARAM2,%eax
|
||||
addw %ax,EXP(%ebx)
|
||||
movl TAG_Zero,%eax
|
||||
|
||||
popl %ebx
|
||||
leave
|
||||
ret
|
@ -1,710 +0,0 @@
|
||||
.file "reg_round.S"
|
||||
/*---------------------------------------------------------------------------+
|
||||
| reg_round.S |
|
||||
| |
|
||||
| Rounding/truncation/etc for FPU basic arithmetic functions. |
|
||||
| |
|
||||
| Copyright (C) 1993,1995,1997 |
|
||||
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
|
||||
| Australia. E-mail billm@suburbia.net |
|
||||
| |
|
||||
| This code has four possible entry points. |
|
||||
| The following must be entered by a jmp instruction: |
|
||||
| fpu_reg_round, fpu_reg_round_sqrt, and fpu_Arith_exit. |
|
||||
| |
|
||||
| The FPU_round entry point is intended to be used by C code. |
|
||||
| From C, call as: |
|
||||
| int FPU_round(FPU_REG *arg, unsigned int extent, unsigned int control_w) |
|
||||
| |
|
||||
| Return value is the tag of the answer, or-ed with FPU_Exception if |
|
||||
| one was raised, or -1 on internal error. |
|
||||
| |
|
||||
| For correct "up" and "down" rounding, the argument must have the correct |
|
||||
| sign. |
|
||||
| |
|
||||
+---------------------------------------------------------------------------*/
|
||||
|
||||
/*---------------------------------------------------------------------------+
|
||||
| Four entry points. |
|
||||
| |
|
||||
| Needed by both the fpu_reg_round and fpu_reg_round_sqrt entry points: |
|
||||
| %eax:%ebx 64 bit significand |
|
||||
| %edx 32 bit extension of the significand |
|
||||
| %edi pointer to an FPU_REG for the result to be stored |
|
||||
| stack calling function must have set up a C stack frame and |
|
||||
| pushed %esi, %edi, and %ebx |
|
||||
| |
|
||||
| Needed just for the fpu_reg_round_sqrt entry point: |
|
||||
| %cx A control word in the same format as the FPU control word. |
|
||||
| Otherwise, PARAM4 must give such a value. |
|
||||
| |
|
||||
| |
|
||||
| The significand and its extension are assumed to be exact in the |
|
||||
| following sense: |
|
||||
| If the significand by itself is the exact result then the significand |
|
||||
| extension (%edx) must contain 0, otherwise the significand extension |
|
||||
| must be non-zero. |
|
||||
| If the significand extension is non-zero then the significand is |
|
||||
| smaller than the magnitude of the correct exact result by an amount |
|
||||
| greater than zero and less than one ls bit of the significand. |
|
||||
| The significand extension is only required to have three possible |
|
||||
| non-zero values: |
|
||||
| less than 0x80000000 <=> the significand is less than 1/2 an ls |
|
||||
| bit smaller than the magnitude of the |
|
||||
| true exact result. |
|
||||
| exactly 0x80000000 <=> the significand is exactly 1/2 an ls bit |
|
||||
| smaller than the magnitude of the true |
|
||||
| exact result. |
|
||||
| greater than 0x80000000 <=> the significand is more than 1/2 an ls |
|
||||
| bit smaller than the magnitude of the |
|
||||
| true exact result. |
|
||||
| |
|
||||
+---------------------------------------------------------------------------*/
|
||||
|
||||
/*---------------------------------------------------------------------------+
|
||||
| The code in this module has become quite complex, but it should handle |
|
||||
| all of the FPU flags which are set at this stage of the basic arithmetic |
|
||||
| computations. |
|
||||
| There are a few rare cases where the results are not set identically to |
|
||||
| a real FPU. These require a bit more thought because at this stage the |
|
||||
| results of the code here appear to be more consistent... |
|
||||
| This may be changed in a future version. |
|
||||
+---------------------------------------------------------------------------*/
|
||||
|
||||
|
||||
#include "fpu_emu.h"
|
||||
#include "exception.h"
|
||||
#include "control_w.h"
|
||||
|
||||
/* Flags for FPU_bits_lost */
|
||||
#define LOST_DOWN $1
|
||||
#define LOST_UP $2
|
||||
|
||||
/* Flags for FPU_denormal */
|
||||
#define DENORMAL $1
|
||||
#define UNMASKED_UNDERFLOW $2
|
||||
|
||||
|
||||
#ifndef NON_REENTRANT_FPU
|
||||
/* Make the code re-entrant by putting
|
||||
local storage on the stack: */
|
||||
#define FPU_bits_lost (%esp)
|
||||
#define FPU_denormal 1(%esp)
|
||||
|
||||
#else
|
||||
/* Not re-entrant, so we can gain speed by putting
|
||||
local storage in a static area: */
|
||||
.data
|
||||
.align 4,0
|
||||
FPU_bits_lost:
|
||||
.byte 0
|
||||
FPU_denormal:
|
||||
.byte 0
|
||||
#endif /* NON_REENTRANT_FPU */
|
||||
|
||||
|
||||
.text
|
||||
.globl fpu_reg_round
|
||||
.globl fpu_reg_round_sqrt
|
||||
.globl fpu_Arith_exit
|
||||
|
||||
/* Entry point when called from C */
|
||||
ENTRY(FPU_round)
|
||||
pushl %ebp
|
||||
movl %esp,%ebp
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
pushl %ebx
|
||||
|
||||
movl PARAM1,%edi
|
||||
movl SIGH(%edi),%eax
|
||||
movl SIGL(%edi),%ebx
|
||||
movl PARAM2,%edx
|
||||
|
||||
fpu_reg_round: /* Normal entry point */
|
||||
movl PARAM4,%ecx
|
||||
|
||||
#ifndef NON_REENTRANT_FPU
|
||||
pushl %ebx /* adjust the stack pointer */
|
||||
#endif /* NON_REENTRANT_FPU */
|
||||
|
||||
#ifdef PARANOID
|
||||
/* Cannot use this here yet */
|
||||
/* orl %eax,%eax */
|
||||
/* jns L_entry_bugged */
|
||||
#endif /* PARANOID */
|
||||
|
||||
cmpw EXP_UNDER,EXP(%edi)
|
||||
jle L_Make_denorm /* The number is a de-normal */
|
||||
|
||||
movb $0,FPU_denormal /* 0 -> not a de-normal */
|
||||
|
||||
Denorm_done:
|
||||
movb $0,FPU_bits_lost /* No bits yet lost in rounding */
|
||||
|
||||
movl %ecx,%esi
|
||||
andl CW_PC,%ecx
|
||||
cmpl PR_64_BITS,%ecx
|
||||
je LRound_To_64
|
||||
|
||||
cmpl PR_53_BITS,%ecx
|
||||
je LRound_To_53
|
||||
|
||||
cmpl PR_24_BITS,%ecx
|
||||
je LRound_To_24
|
||||
|
||||
#ifdef PECULIAR_486
|
||||
/* With the precision control bits set to 01 "(reserved)", a real 80486
|
||||
behaves as if the precision control bits were set to 11 "64 bits" */
|
||||
cmpl PR_RESERVED_BITS,%ecx
|
||||
je LRound_To_64
|
||||
#ifdef PARANOID
|
||||
jmp L_bugged_denorm_486
|
||||
#endif /* PARANOID */
|
||||
#else
|
||||
#ifdef PARANOID
|
||||
jmp L_bugged_denorm /* There is no bug, just a bad control word */
|
||||
#endif /* PARANOID */
|
||||
#endif /* PECULIAR_486 */
|
||||
|
||||
|
||||
/* Round etc to 24 bit precision */
|
||||
LRound_To_24:
|
||||
movl %esi,%ecx
|
||||
andl CW_RC,%ecx
|
||||
cmpl RC_RND,%ecx
|
||||
je LRound_nearest_24
|
||||
|
||||
cmpl RC_CHOP,%ecx
|
||||
je LCheck_truncate_24
|
||||
|
||||
cmpl RC_UP,%ecx /* Towards +infinity */
|
||||
je LUp_24
|
||||
|
||||
cmpl RC_DOWN,%ecx /* Towards -infinity */
|
||||
je LDown_24
|
||||
|
||||
#ifdef PARANOID
|
||||
jmp L_bugged_round24
|
||||
#endif /* PARANOID */
|
||||
|
||||
LUp_24:
|
||||
cmpb SIGN_POS,PARAM5
|
||||
jne LCheck_truncate_24 /* If negative then up==truncate */
|
||||
|
||||
jmp LCheck_24_round_up
|
||||
|
||||
LDown_24:
|
||||
cmpb SIGN_POS,PARAM5
|
||||
je LCheck_truncate_24 /* If positive then down==truncate */
|
||||
|
||||
LCheck_24_round_up:
|
||||
movl %eax,%ecx
|
||||
andl $0x000000ff,%ecx
|
||||
orl %ebx,%ecx
|
||||
orl %edx,%ecx
|
||||
jnz LDo_24_round_up
|
||||
jmp L_Re_normalise
|
||||
|
||||
LRound_nearest_24:
|
||||
/* Do rounding of the 24th bit if needed (nearest or even) */
|
||||
movl %eax,%ecx
|
||||
andl $0x000000ff,%ecx
|
||||
cmpl $0x00000080,%ecx
|
||||
jc LCheck_truncate_24 /* less than half, no increment needed */
|
||||
|
||||
jne LGreater_Half_24 /* greater than half, increment needed */
|
||||
|
||||
/* Possibly half, we need to check the ls bits */
|
||||
orl %ebx,%ebx
|
||||
jnz LGreater_Half_24 /* greater than half, increment needed */
|
||||
|
||||
orl %edx,%edx
|
||||
jnz LGreater_Half_24 /* greater than half, increment needed */
|
||||
|
||||
/* Exactly half, increment only if 24th bit is 1 (round to even) */
|
||||
testl $0x00000100,%eax
|
||||
jz LDo_truncate_24
|
||||
|
||||
LGreater_Half_24: /* Rounding: increment at the 24th bit */
|
||||
LDo_24_round_up:
|
||||
andl $0xffffff00,%eax /* Truncate to 24 bits */
|
||||
xorl %ebx,%ebx
|
||||
movb LOST_UP,FPU_bits_lost
|
||||
addl $0x00000100,%eax
|
||||
jmp LCheck_Round_Overflow
|
||||
|
||||
LCheck_truncate_24:
|
||||
movl %eax,%ecx
|
||||
andl $0x000000ff,%ecx
|
||||
orl %ebx,%ecx
|
||||
orl %edx,%ecx
|
||||
jz L_Re_normalise /* No truncation needed */
|
||||
|
||||
LDo_truncate_24:
|
||||
andl $0xffffff00,%eax /* Truncate to 24 bits */
|
||||
xorl %ebx,%ebx
|
||||
movb LOST_DOWN,FPU_bits_lost
|
||||
jmp L_Re_normalise
|
||||
|
||||
|
||||
/* Round etc to 53 bit precision */
|
||||
LRound_To_53:
|
||||
movl %esi,%ecx
|
||||
andl CW_RC,%ecx
|
||||
cmpl RC_RND,%ecx
|
||||
je LRound_nearest_53
|
||||
|
||||
cmpl RC_CHOP,%ecx
|
||||
je LCheck_truncate_53
|
||||
|
||||
cmpl RC_UP,%ecx /* Towards +infinity */
|
||||
je LUp_53
|
||||
|
||||
cmpl RC_DOWN,%ecx /* Towards -infinity */
|
||||
je LDown_53
|
||||
|
||||
#ifdef PARANOID
|
||||
jmp L_bugged_round53
|
||||
#endif /* PARANOID */
|
||||
|
||||
LUp_53:
|
||||
cmpb SIGN_POS,PARAM5
|
||||
jne LCheck_truncate_53 /* If negative then up==truncate */
|
||||
|
||||
jmp LCheck_53_round_up
|
||||
|
||||
LDown_53:
|
||||
cmpb SIGN_POS,PARAM5
|
||||
je LCheck_truncate_53 /* If positive then down==truncate */
|
||||
|
||||
LCheck_53_round_up:
|
||||
movl %ebx,%ecx
|
||||
andl $0x000007ff,%ecx
|
||||
orl %edx,%ecx
|
||||
jnz LDo_53_round_up
|
||||
jmp L_Re_normalise
|
||||
|
||||
LRound_nearest_53:
|
||||
/* Do rounding of the 53rd bit if needed (nearest or even) */
|
||||
movl %ebx,%ecx
|
||||
andl $0x000007ff,%ecx
|
||||
cmpl $0x00000400,%ecx
|
||||
jc LCheck_truncate_53 /* less than half, no increment needed */
|
||||
|
||||
jnz LGreater_Half_53 /* greater than half, increment needed */
|
||||
|
||||
/* Possibly half, we need to check the ls bits */
|
||||
orl %edx,%edx
|
||||
jnz LGreater_Half_53 /* greater than half, increment needed */
|
||||
|
||||
/* Exactly half, increment only if 53rd bit is 1 (round to even) */
|
||||
testl $0x00000800,%ebx
|
||||
jz LTruncate_53
|
||||
|
||||
LGreater_Half_53: /* Rounding: increment at the 53rd bit */
|
||||
LDo_53_round_up:
|
||||
movb LOST_UP,FPU_bits_lost
|
||||
andl $0xfffff800,%ebx /* Truncate to 53 bits */
|
||||
addl $0x00000800,%ebx
|
||||
adcl $0,%eax
|
||||
jmp LCheck_Round_Overflow
|
||||
|
||||
LCheck_truncate_53:
|
||||
movl %ebx,%ecx
|
||||
andl $0x000007ff,%ecx
|
||||
orl %edx,%ecx
|
||||
jz L_Re_normalise
|
||||
|
||||
LTruncate_53:
|
||||
movb LOST_DOWN,FPU_bits_lost
|
||||
andl $0xfffff800,%ebx /* Truncate to 53 bits */
|
||||
jmp L_Re_normalise
|
||||
|
||||
|
||||
/* Round etc to 64 bit precision */
|
||||
LRound_To_64:
|
||||
movl %esi,%ecx
|
||||
andl CW_RC,%ecx
|
||||
cmpl RC_RND,%ecx
|
||||
je LRound_nearest_64
|
||||
|
||||
cmpl RC_CHOP,%ecx
|
||||
je LCheck_truncate_64
|
||||
|
||||
cmpl RC_UP,%ecx /* Towards +infinity */
|
||||
je LUp_64
|
||||
|
||||
cmpl RC_DOWN,%ecx /* Towards -infinity */
|
||||
je LDown_64
|
||||
|
||||
#ifdef PARANOID
|
||||
jmp L_bugged_round64
|
||||
#endif /* PARANOID */
|
||||
|
||||
LUp_64:
|
||||
cmpb SIGN_POS,PARAM5
|
||||
jne LCheck_truncate_64 /* If negative then up==truncate */
|
||||
|
||||
orl %edx,%edx
|
||||
jnz LDo_64_round_up
|
||||
jmp L_Re_normalise
|
||||
|
||||
LDown_64:
|
||||
cmpb SIGN_POS,PARAM5
|
||||
je LCheck_truncate_64 /* If positive then down==truncate */
|
||||
|
||||
orl %edx,%edx
|
||||
jnz LDo_64_round_up
|
||||
jmp L_Re_normalise
|
||||
|
||||
LRound_nearest_64:
|
||||
cmpl $0x80000000,%edx
|
||||
jc LCheck_truncate_64
|
||||
|
||||
jne LDo_64_round_up
|
||||
|
||||
/* Now test for round-to-even */
|
||||
testb $1,%bl
|
||||
jz LCheck_truncate_64
|
||||
|
||||
LDo_64_round_up:
|
||||
movb LOST_UP,FPU_bits_lost
|
||||
addl $1,%ebx
|
||||
adcl $0,%eax
|
||||
|
||||
LCheck_Round_Overflow:
|
||||
jnc L_Re_normalise
|
||||
|
||||
/* Overflow, adjust the result (significand to 1.0) */
|
||||
rcrl $1,%eax
|
||||
rcrl $1,%ebx
|
||||
incw EXP(%edi)
|
||||
jmp L_Re_normalise
|
||||
|
||||
LCheck_truncate_64:
|
||||
orl %edx,%edx
|
||||
jz L_Re_normalise
|
||||
|
||||
LTruncate_64:
|
||||
movb LOST_DOWN,FPU_bits_lost
|
||||
|
||||
L_Re_normalise:
|
||||
testb $0xff,FPU_denormal
|
||||
jnz Normalise_result
|
||||
|
||||
L_Normalised:
|
||||
movl TAG_Valid,%edx
|
||||
|
||||
L_deNormalised:
|
||||
cmpb LOST_UP,FPU_bits_lost
|
||||
je L_precision_lost_up
|
||||
|
||||
cmpb LOST_DOWN,FPU_bits_lost
|
||||
je L_precision_lost_down
|
||||
|
||||
L_no_precision_loss:
|
||||
/* store the result */
|
||||
|
||||
L_Store_significand:
|
||||
movl %eax,SIGH(%edi)
|
||||
movl %ebx,SIGL(%edi)
|
||||
|
||||
cmpw EXP_OVER,EXP(%edi)
|
||||
jge L_overflow
|
||||
|
||||
movl %edx,%eax
|
||||
|
||||
/* Convert the exponent to 80x87 form. */
|
||||
addw EXTENDED_Ebias,EXP(%edi)
|
||||
andw $0x7fff,EXP(%edi)
|
||||
|
||||
fpu_reg_round_signed_special_exit:
|
||||
|
||||
cmpb SIGN_POS,PARAM5
|
||||
je fpu_reg_round_special_exit
|
||||
|
||||
orw $0x8000,EXP(%edi) /* Negative sign for the result. */
|
||||
|
||||
fpu_reg_round_special_exit:
|
||||
|
||||
#ifndef NON_REENTRANT_FPU
|
||||
popl %ebx /* adjust the stack pointer */
|
||||
#endif /* NON_REENTRANT_FPU */
|
||||
|
||||
fpu_Arith_exit:
|
||||
popl %ebx
|
||||
popl %edi
|
||||
popl %esi
|
||||
leave
|
||||
ret
|
||||
|
||||
|
||||
/*
|
||||
* Set the FPU status flags to represent precision loss due to
|
||||
* round-up.
|
||||
*/
|
||||
L_precision_lost_up:
|
||||
push %edx
|
||||
push %eax
|
||||
call SYMBOL_NAME(set_precision_flag_up)
|
||||
popl %eax
|
||||
popl %edx
|
||||
jmp L_no_precision_loss
|
||||
|
||||
/*
|
||||
* Set the FPU status flags to represent precision loss due to
|
||||
* truncation.
|
||||
*/
|
||||
L_precision_lost_down:
|
||||
push %edx
|
||||
push %eax
|
||||
call SYMBOL_NAME(set_precision_flag_down)
|
||||
popl %eax
|
||||
popl %edx
|
||||
jmp L_no_precision_loss
|
||||
|
||||
|
||||
/*
|
||||
* The number is a denormal (which might get rounded up to a normal)
|
||||
* Shift the number right the required number of bits, which will
|
||||
* have to be undone later...
|
||||
*/
|
||||
L_Make_denorm:
|
||||
/* The action to be taken depends upon whether the underflow
|
||||
exception is masked */
|
||||
testb CW_Underflow,%cl /* Underflow mask. */
|
||||
jz Unmasked_underflow /* Do not make a denormal. */
|
||||
|
||||
movb DENORMAL,FPU_denormal
|
||||
|
||||
pushl %ecx /* Save */
|
||||
movw EXP_UNDER+1,%cx
|
||||
subw EXP(%edi),%cx
|
||||
|
||||
cmpw $64,%cx /* shrd only works for 0..31 bits */
|
||||
jnc Denorm_shift_more_than_63
|
||||
|
||||
cmpw $32,%cx /* shrd only works for 0..31 bits */
|
||||
jnc Denorm_shift_more_than_32
|
||||
|
||||
/*
|
||||
* We got here without jumps by assuming that the most common requirement
|
||||
* is for a small de-normalising shift.
|
||||
* Shift by [1..31] bits
|
||||
*/
|
||||
addw %cx,EXP(%edi)
|
||||
orl %edx,%edx /* extension */
|
||||
setne %ch /* Save whether %edx is non-zero */
|
||||
xorl %edx,%edx
|
||||
shrd %cl,%ebx,%edx
|
||||
shrd %cl,%eax,%ebx
|
||||
shr %cl,%eax
|
||||
orb %ch,%dl
|
||||
popl %ecx
|
||||
jmp Denorm_done
|
||||
|
||||
/* Shift by [32..63] bits */
|
||||
Denorm_shift_more_than_32:
|
||||
addw %cx,EXP(%edi)
|
||||
subb $32,%cl
|
||||
orl %edx,%edx
|
||||
setne %ch
|
||||
orb %ch,%bl
|
||||
xorl %edx,%edx
|
||||
shrd %cl,%ebx,%edx
|
||||
shrd %cl,%eax,%ebx
|
||||
shr %cl,%eax
|
||||
orl %edx,%edx /* test these 32 bits */
|
||||
setne %cl
|
||||
orb %ch,%bl
|
||||
orb %cl,%bl
|
||||
movl %ebx,%edx
|
||||
movl %eax,%ebx
|
||||
xorl %eax,%eax
|
||||
popl %ecx
|
||||
jmp Denorm_done
|
||||
|
||||
/* Shift by [64..) bits */
|
||||
Denorm_shift_more_than_63:
|
||||
cmpw $64,%cx
|
||||
jne Denorm_shift_more_than_64
|
||||
|
||||
/* Exactly 64 bit shift */
|
||||
addw %cx,EXP(%edi)
|
||||
xorl %ecx,%ecx
|
||||
orl %edx,%edx
|
||||
setne %cl
|
||||
orl %ebx,%ebx
|
||||
setne %ch
|
||||
orb %ch,%cl
|
||||
orb %cl,%al
|
||||
movl %eax,%edx
|
||||
xorl %eax,%eax
|
||||
xorl %ebx,%ebx
|
||||
popl %ecx
|
||||
jmp Denorm_done
|
||||
|
||||
Denorm_shift_more_than_64:
|
||||
movw EXP_UNDER+1,EXP(%edi)
|
||||
/* This is easy, %eax must be non-zero, so.. */
|
||||
movl $1,%edx
|
||||
xorl %eax,%eax
|
||||
xorl %ebx,%ebx
|
||||
popl %ecx
|
||||
jmp Denorm_done
|
||||
|
||||
|
||||
Unmasked_underflow:
|
||||
movb UNMASKED_UNDERFLOW,FPU_denormal
|
||||
jmp Denorm_done
|
||||
|
||||
|
||||
/* Undo the de-normalisation. */
|
||||
Normalise_result:
|
||||
cmpb UNMASKED_UNDERFLOW,FPU_denormal
|
||||
je Signal_underflow
|
||||
|
||||
/* The number must be a denormal if we got here. */
|
||||
#ifdef PARANOID
|
||||
/* But check it... just in case. */
|
||||
cmpw EXP_UNDER+1,EXP(%edi)
|
||||
jne L_norm_bugged
|
||||
#endif /* PARANOID */
|
||||
|
||||
#ifdef PECULIAR_486
|
||||
/*
|
||||
* This implements a special feature of 80486 behaviour.
|
||||
* Underflow will be signalled even if the number is
|
||||
* not a denormal after rounding.
|
||||
* This difference occurs only for masked underflow, and not
|
||||
* in the unmasked case.
|
||||
* Actual 80486 behaviour differs from this in some circumstances.
|
||||
*/
|
||||
orl %eax,%eax /* ms bits */
|
||||
js LPseudoDenormal /* Will be masked underflow */
|
||||
#else
|
||||
orl %eax,%eax /* ms bits */
|
||||
js L_Normalised /* No longer a denormal */
|
||||
#endif /* PECULIAR_486 */
|
||||
|
||||
jnz LDenormal_adj_exponent
|
||||
|
||||
orl %ebx,%ebx
|
||||
jz L_underflow_to_zero /* The contents are zero */
|
||||
|
||||
LDenormal_adj_exponent:
|
||||
decw EXP(%edi)
|
||||
|
||||
LPseudoDenormal:
|
||||
testb $0xff,FPU_bits_lost /* bits lost == underflow */
|
||||
movl TAG_Special,%edx
|
||||
jz L_deNormalised
|
||||
|
||||
/* There must be a masked underflow */
|
||||
push %eax
|
||||
pushl EX_Underflow
|
||||
call EXCEPTION
|
||||
popl %eax
|
||||
popl %eax
|
||||
movl TAG_Special,%edx
|
||||
jmp L_deNormalised
|
||||
|
||||
|
||||
/*
|
||||
* The operations resulted in a number too small to represent.
|
||||
* Masked response.
|
||||
*/
|
||||
L_underflow_to_zero:
|
||||
push %eax
|
||||
call SYMBOL_NAME(set_precision_flag_down)
|
||||
popl %eax
|
||||
|
||||
push %eax
|
||||
pushl EX_Underflow
|
||||
call EXCEPTION
|
||||
popl %eax
|
||||
popl %eax
|
||||
|
||||
/* Reduce the exponent to EXP_UNDER */
|
||||
movw EXP_UNDER,EXP(%edi)
|
||||
movl TAG_Zero,%edx
|
||||
jmp L_Store_significand
|
||||
|
||||
|
||||
/* The operations resulted in a number too large to represent. */
|
||||
L_overflow:
|
||||
pushw PARAM5
|
||||
addw EXTENDED_Ebias,EXP(%edi) /* Set for unmasked response. */
|
||||
push %edi
|
||||
call SYMBOL_NAME(arith_round_overflow)
|
||||
pop %edi
|
||||
jmp fpu_reg_round_signed_special_exit
|
||||
|
||||
|
||||
Signal_underflow:
|
||||
/* The number may have been changed to a non-denormal */
|
||||
/* by the rounding operations. */
|
||||
cmpw EXP_UNDER,EXP(%edi)
|
||||
jle Do_unmasked_underflow
|
||||
|
||||
jmp L_Normalised
|
||||
|
||||
Do_unmasked_underflow:
|
||||
/* Increase the exponent by the magic number */
|
||||
addw $(3*(1<<13)),EXP(%edi)
|
||||
push %eax
|
||||
pushl EX_Underflow
|
||||
call EXCEPTION
|
||||
popl %eax
|
||||
popl %eax
|
||||
jmp L_Normalised
|
||||
|
||||
|
||||
#ifdef PARANOID
|
||||
#ifdef PECULIAR_486
|
||||
L_bugged_denorm_486:
|
||||
pushl EX_INTERNAL|0x236
|
||||
call EXCEPTION
|
||||
popl %ebx
|
||||
jmp L_exception_exit
|
||||
#else
|
||||
L_bugged_denorm:
|
||||
pushl EX_INTERNAL|0x230
|
||||
call EXCEPTION
|
||||
popl %ebx
|
||||
jmp L_exception_exit
|
||||
#endif /* PECULIAR_486 */
|
||||
|
||||
L_bugged_round24:
|
||||
pushl EX_INTERNAL|0x231
|
||||
call EXCEPTION
|
||||
popl %ebx
|
||||
jmp L_exception_exit
|
||||
|
||||
L_bugged_round53:
|
||||
pushl EX_INTERNAL|0x232
|
||||
call EXCEPTION
|
||||
popl %ebx
|
||||
jmp L_exception_exit
|
||||
|
||||
L_bugged_round64:
|
||||
pushl EX_INTERNAL|0x233
|
||||
call EXCEPTION
|
||||
popl %ebx
|
||||
jmp L_exception_exit
|
||||
|
||||
L_norm_bugged:
|
||||
pushl EX_INTERNAL|0x234
|
||||
call EXCEPTION
|
||||
popl %ebx
|
||||
jmp L_exception_exit
|
||||
|
||||
L_entry_bugged:
|
||||
pushl EX_INTERNAL|0x235
|
||||
call EXCEPTION
|
||||
popl %ebx
|
||||
L_exception_exit:
|
||||
mov $-1,%eax
|
||||
jmp fpu_reg_round_special_exit
|
||||
#endif /* PARANOID */
|
@ -1,167 +0,0 @@
|
||||
.file "reg_u_add.S"
|
||||
/*---------------------------------------------------------------------------+
|
||||
| reg_u_add.S |
|
||||
| |
|
||||
| Add two valid (TAG_Valid) FPU_REG numbers, of the same sign, and put the |
|
||||
| result in a destination FPU_REG. |
|
||||
| |
|
||||
| Copyright (C) 1992,1993,1995,1997 |
|
||||
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
|
||||
| E-mail billm@suburbia.net |
|
||||
| |
|
||||
| Call from C as: |
|
||||
| int FPU_u_add(FPU_REG *arg1, FPU_REG *arg2, FPU_REG *answ, |
|
||||
| int control_w) |
|
||||
| Return value is the tag of the answer, or-ed with FPU_Exception if |
|
||||
| one was raised, or -1 on internal error. |
|
||||
| |
|
||||
+---------------------------------------------------------------------------*/
|
||||
|
||||
/*
|
||||
| Kernel addition routine FPU_u_add(reg *arg1, reg *arg2, reg *answ).
|
||||
| Takes two valid reg f.p. numbers (TAG_Valid), which are
|
||||
| treated as unsigned numbers,
|
||||
| and returns their sum as a TAG_Valid or TAG_Special f.p. number.
|
||||
| The returned number is normalized.
|
||||
| Basic checks are performed if PARANOID is defined.
|
||||
*/
|
||||
|
||||
#include "exception.h"
|
||||
#include "fpu_emu.h"
|
||||
#include "control_w.h"
|
||||
|
||||
.text
|
||||
ENTRY(FPU_u_add)
|
||||
pushl %ebp
|
||||
movl %esp,%ebp
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
pushl %ebx
|
||||
|
||||
movl PARAM1,%esi /* source 1 */
|
||||
movl PARAM2,%edi /* source 2 */
|
||||
|
||||
movl PARAM6,%ecx
|
||||
movl %ecx,%edx
|
||||
subl PARAM7,%ecx /* exp1 - exp2 */
|
||||
jge L_arg1_larger
|
||||
|
||||
/* num1 is smaller */
|
||||
movl SIGL(%esi),%ebx
|
||||
movl SIGH(%esi),%eax
|
||||
|
||||
movl %edi,%esi
|
||||
movl PARAM7,%edx
|
||||
negw %cx
|
||||
jmp L_accum_loaded
|
||||
|
||||
L_arg1_larger:
|
||||
/* num1 has larger or equal exponent */
|
||||
movl SIGL(%edi),%ebx
|
||||
movl SIGH(%edi),%eax
|
||||
|
||||
L_accum_loaded:
|
||||
movl PARAM3,%edi /* destination */
|
||||
movw %dx,EXP(%edi) /* Copy exponent to destination */
|
||||
|
||||
xorl %edx,%edx /* clear the extension */
|
||||
|
||||
#ifdef PARANOID
|
||||
testl $0x80000000,%eax
|
||||
je L_bugged
|
||||
|
||||
testl $0x80000000,SIGH(%esi)
|
||||
je L_bugged
|
||||
#endif /* PARANOID */
|
||||
|
||||
/* The number to be shifted is in %eax:%ebx:%edx */
|
||||
cmpw $32,%cx /* shrd only works for 0..31 bits */
|
||||
jnc L_more_than_31
|
||||
|
||||
/* less than 32 bits */
|
||||
shrd %cl,%ebx,%edx
|
||||
shrd %cl,%eax,%ebx
|
||||
shr %cl,%eax
|
||||
jmp L_shift_done
|
||||
|
||||
L_more_than_31:
|
||||
cmpw $64,%cx
|
||||
jnc L_more_than_63
|
||||
|
||||
subb $32,%cl
|
||||
jz L_exactly_32
|
||||
|
||||
shrd %cl,%eax,%edx
|
||||
shr %cl,%eax
|
||||
orl %ebx,%ebx
|
||||
jz L_more_31_no_low /* none of the lowest bits is set */
|
||||
|
||||
orl $1,%edx /* record the fact in the extension */
|
||||
|
||||
L_more_31_no_low:
|
||||
movl %eax,%ebx
|
||||
xorl %eax,%eax
|
||||
jmp L_shift_done
|
||||
|
||||
L_exactly_32:
|
||||
movl %ebx,%edx
|
||||
movl %eax,%ebx
|
||||
xorl %eax,%eax
|
||||
jmp L_shift_done
|
||||
|
||||
L_more_than_63:
|
||||
cmpw $65,%cx
|
||||
jnc L_more_than_64
|
||||
|
||||
movl %eax,%edx
|
||||
orl %ebx,%ebx
|
||||
jz L_more_63_no_low
|
||||
|
||||
orl $1,%edx
|
||||
jmp L_more_63_no_low
|
||||
|
||||
L_more_than_64:
|
||||
movl $1,%edx /* The shifted nr always at least one '1' */
|
||||
|
||||
L_more_63_no_low:
|
||||
xorl %ebx,%ebx
|
||||
xorl %eax,%eax
|
||||
|
||||
L_shift_done:
|
||||
/* Now do the addition */
|
||||
addl SIGL(%esi),%ebx
|
||||
adcl SIGH(%esi),%eax
|
||||
jnc L_round_the_result
|
||||
|
||||
/* Overflow, adjust the result */
|
||||
rcrl $1,%eax
|
||||
rcrl $1,%ebx
|
||||
rcrl $1,%edx
|
||||
jnc L_no_bit_lost
|
||||
|
||||
orl $1,%edx
|
||||
|
||||
L_no_bit_lost:
|
||||
incw EXP(%edi)
|
||||
|
||||
L_round_the_result:
|
||||
jmp fpu_reg_round /* Round the result */
|
||||
|
||||
|
||||
|
||||
#ifdef PARANOID
|
||||
/* If we ever get here then we have problems! */
|
||||
L_bugged:
|
||||
pushl EX_INTERNAL|0x201
|
||||
call EXCEPTION
|
||||
pop %ebx
|
||||
movl $-1,%eax
|
||||
jmp L_exit
|
||||
|
||||
L_exit:
|
||||
popl %ebx
|
||||
popl %edi
|
||||
popl %esi
|
||||
leave
|
||||
ret
|
||||
#endif /* PARANOID */
|
@ -1,473 +0,0 @@
|
||||
.file "reg_u_div.S"
|
||||
/*---------------------------------------------------------------------------+
|
||||
| reg_u_div.S |
|
||||
| |
|
||||
| Divide one FPU_REG by another and put the result in a destination FPU_REG.|
|
||||
| |
|
||||
| Copyright (C) 1992,1993,1995,1997 |
|
||||
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
|
||||
| E-mail billm@suburbia.net |
|
||||
| |
|
||||
| |
|
||||
+---------------------------------------------------------------------------*/
|
||||
|
||||
/*---------------------------------------------------------------------------+
|
||||
| Call from C as: |
|
||||
| int FPU_u_div(FPU_REG *a, FPU_REG *b, FPU_REG *dest, |
|
||||
| unsigned int control_word, char *sign) |
|
||||
| |
|
||||
| Does not compute the destination exponent, but does adjust it. |
|
||||
| |
|
||||
| Return value is the tag of the answer, or-ed with FPU_Exception if |
|
||||
| one was raised, or -1 on internal error. |
|
||||
+---------------------------------------------------------------------------*/
|
||||
|
||||
#include "exception.h"
|
||||
#include "fpu_emu.h"
|
||||
#include "control_w.h"
|
||||
|
||||
|
||||
/* #define dSIGL(x) (x) */
|
||||
/* #define dSIGH(x) 4(x) */
|
||||
|
||||
|
||||
#ifndef NON_REENTRANT_FPU
|
||||
/*
|
||||
Local storage on the stack:
|
||||
Result: FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0
|
||||
Overflow flag: ovfl_flag
|
||||
*/
|
||||
#define FPU_accum_3 -4(%ebp)
|
||||
#define FPU_accum_2 -8(%ebp)
|
||||
#define FPU_accum_1 -12(%ebp)
|
||||
#define FPU_accum_0 -16(%ebp)
|
||||
#define FPU_result_1 -20(%ebp)
|
||||
#define FPU_result_2 -24(%ebp)
|
||||
#define FPU_ovfl_flag -28(%ebp)
|
||||
|
||||
#else
|
||||
.data
|
||||
/*
|
||||
Local storage in a static area:
|
||||
Result: FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0
|
||||
Overflow flag: ovfl_flag
|
||||
*/
|
||||
.align 4,0
|
||||
FPU_accum_3:
|
||||
.long 0
|
||||
FPU_accum_2:
|
||||
.long 0
|
||||
FPU_accum_1:
|
||||
.long 0
|
||||
FPU_accum_0:
|
||||
.long 0
|
||||
FPU_result_1:
|
||||
.long 0
|
||||
FPU_result_2:
|
||||
.long 0
|
||||
FPU_ovfl_flag:
|
||||
.byte 0
|
||||
#endif /* NON_REENTRANT_FPU */
|
||||
|
||||
#define REGA PARAM1
|
||||
#define REGB PARAM2
|
||||
#define DEST PARAM3
|
||||
|
||||
.text
|
||||
ENTRY(FPU_u_div)
|
||||
pushl %ebp
|
||||
movl %esp,%ebp
|
||||
#ifndef NON_REENTRANT_FPU
|
||||
subl $28,%esp
|
||||
#endif /* NON_REENTRANT_FPU */
|
||||
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
pushl %ebx
|
||||
|
||||
movl REGA,%esi
|
||||
movl REGB,%ebx
|
||||
movl DEST,%edi
|
||||
|
||||
movw EXP(%esi),%dx
|
||||
movw EXP(%ebx),%ax
|
||||
.byte 0x0f,0xbf,0xc0 /* movsx %ax,%eax */
|
||||
.byte 0x0f,0xbf,0xd2 /* movsx %dx,%edx */
|
||||
subl %eax,%edx
|
||||
addl EXP_BIAS,%edx
|
||||
|
||||
/* A denormal and a large number can cause an exponent underflow */
|
||||
cmpl EXP_WAY_UNDER,%edx
|
||||
jg xExp_not_underflow
|
||||
|
||||
/* Set to a really low value allow correct handling */
|
||||
movl EXP_WAY_UNDER,%edx
|
||||
|
||||
xExp_not_underflow:
|
||||
|
||||
movw %dx,EXP(%edi)
|
||||
|
||||
#ifdef PARANOID
|
||||
/* testl $0x80000000, SIGH(%esi) // Dividend */
|
||||
/* je L_bugged */
|
||||
testl $0x80000000, SIGH(%ebx) /* Divisor */
|
||||
je L_bugged
|
||||
#endif /* PARANOID */
|
||||
|
||||
/* Check if the divisor can be treated as having just 32 bits */
|
||||
cmpl $0,SIGL(%ebx)
|
||||
jnz L_Full_Division /* Can't do a quick divide */
|
||||
|
||||
/* We should be able to zip through the division here */
|
||||
movl SIGH(%ebx),%ecx /* The divisor */
|
||||
movl SIGH(%esi),%edx /* Dividend */
|
||||
movl SIGL(%esi),%eax /* Dividend */
|
||||
|
||||
cmpl %ecx,%edx
|
||||
setaeb FPU_ovfl_flag /* Keep a record */
|
||||
jb L_no_adjust
|
||||
|
||||
subl %ecx,%edx /* Prevent the overflow */
|
||||
|
||||
L_no_adjust:
|
||||
/* Divide the 64 bit number by the 32 bit denominator */
|
||||
divl %ecx
|
||||
movl %eax,FPU_result_2
|
||||
|
||||
/* Work on the remainder of the first division */
|
||||
xorl %eax,%eax
|
||||
divl %ecx
|
||||
movl %eax,FPU_result_1
|
||||
|
||||
/* Work on the remainder of the 64 bit division */
|
||||
xorl %eax,%eax
|
||||
divl %ecx
|
||||
|
||||
testb $255,FPU_ovfl_flag /* was the num > denom ? */
|
||||
je L_no_overflow
|
||||
|
||||
/* Do the shifting here */
|
||||
/* increase the exponent */
|
||||
incw EXP(%edi)
|
||||
|
||||
/* shift the mantissa right one bit */
|
||||
stc /* To set the ms bit */
|
||||
rcrl FPU_result_2
|
||||
rcrl FPU_result_1
|
||||
rcrl %eax
|
||||
|
||||
L_no_overflow:
|
||||
jmp LRound_precision /* Do the rounding as required */
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------+
|
||||
| Divide: Return arg1/arg2 to arg3. |
|
||||
| |
|
||||
| This routine does not use the exponents of arg1 and arg2, but does |
|
||||
| adjust the exponent of arg3. |
|
||||
| |
|
||||
| The maximum returned value is (ignoring exponents) |
|
||||
| .ffffffff ffffffff |
|
||||
| ------------------ = 1.ffffffff fffffffe |
|
||||
| .80000000 00000000 |
|
||||
| and the minimum is |
|
||||
| .80000000 00000000 |
|
||||
| ------------------ = .80000000 00000001 (rounded) |
|
||||
| .ffffffff ffffffff |
|
||||
| |
|
||||
+---------------------------------------------------------------------------*/
|
||||
|
||||
|
||||
L_Full_Division:
|
||||
/* Save extended dividend in local register */
|
||||
movl SIGL(%esi),%eax
|
||||
movl %eax,FPU_accum_2
|
||||
movl SIGH(%esi),%eax
|
||||
movl %eax,FPU_accum_3
|
||||
xorl %eax,%eax
|
||||
movl %eax,FPU_accum_1 /* zero the extension */
|
||||
movl %eax,FPU_accum_0 /* zero the extension */
|
||||
|
||||
movl SIGL(%esi),%eax /* Get the current num */
|
||||
movl SIGH(%esi),%edx
|
||||
|
||||
/*----------------------------------------------------------------------*/
|
||||
/* Initialization done.
|
||||
Do the first 32 bits. */
|
||||
|
||||
movb $0,FPU_ovfl_flag
|
||||
cmpl SIGH(%ebx),%edx /* Test for imminent overflow */
|
||||
jb LLess_than_1
|
||||
ja LGreater_than_1
|
||||
|
||||
cmpl SIGL(%ebx),%eax
|
||||
jb LLess_than_1
|
||||
|
||||
LGreater_than_1:
|
||||
/* The dividend is greater or equal, would cause overflow */
|
||||
setaeb FPU_ovfl_flag /* Keep a record */
|
||||
|
||||
subl SIGL(%ebx),%eax
|
||||
sbbl SIGH(%ebx),%edx /* Prevent the overflow */
|
||||
movl %eax,FPU_accum_2
|
||||
movl %edx,FPU_accum_3
|
||||
|
||||
LLess_than_1:
|
||||
/* At this point, we have a dividend < divisor, with a record of
|
||||
adjustment in FPU_ovfl_flag */
|
||||
|
||||
/* We will divide by a number which is too large */
|
||||
movl SIGH(%ebx),%ecx
|
||||
addl $1,%ecx
|
||||
jnc LFirst_div_not_1
|
||||
|
||||
/* here we need to divide by 100000000h,
|
||||
i.e., no division at all.. */
|
||||
mov %edx,%eax
|
||||
jmp LFirst_div_done
|
||||
|
||||
LFirst_div_not_1:
|
||||
divl %ecx /* Divide the numerator by the augmented
|
||||
denom ms dw */
|
||||
|
||||
LFirst_div_done:
|
||||
movl %eax,FPU_result_2 /* Put the result in the answer */
|
||||
|
||||
mull SIGH(%ebx) /* mul by the ms dw of the denom */
|
||||
|
||||
subl %eax,FPU_accum_2 /* Subtract from the num local reg */
|
||||
sbbl %edx,FPU_accum_3
|
||||
|
||||
movl FPU_result_2,%eax /* Get the result back */
|
||||
mull SIGL(%ebx) /* now mul the ls dw of the denom */
|
||||
|
||||
subl %eax,FPU_accum_1 /* Subtract from the num local reg */
|
||||
sbbl %edx,FPU_accum_2
|
||||
sbbl $0,FPU_accum_3
|
||||
je LDo_2nd_32_bits /* Must check for non-zero result here */
|
||||
|
||||
#ifdef PARANOID
|
||||
jb L_bugged_1
|
||||
#endif /* PARANOID */
|
||||
|
||||
/* need to subtract another once of the denom */
|
||||
incl FPU_result_2 /* Correct the answer */
|
||||
|
||||
movl SIGL(%ebx),%eax
|
||||
movl SIGH(%ebx),%edx
|
||||
subl %eax,FPU_accum_1 /* Subtract from the num local reg */
|
||||
sbbl %edx,FPU_accum_2
|
||||
|
||||
#ifdef PARANOID
|
||||
sbbl $0,FPU_accum_3
|
||||
jne L_bugged_1 /* Must check for non-zero result here */
|
||||
#endif /* PARANOID */
|
||||
|
||||
/*----------------------------------------------------------------------*/
|
||||
/* Half of the main problem is done, there is just a reduced numerator
|
||||
to handle now.
|
||||
Work with the second 32 bits, FPU_accum_0 not used from now on */
|
||||
LDo_2nd_32_bits:
|
||||
movl FPU_accum_2,%edx /* get the reduced num */
|
||||
movl FPU_accum_1,%eax
|
||||
|
||||
/* need to check for possible subsequent overflow */
|
||||
cmpl SIGH(%ebx),%edx
|
||||
jb LDo_2nd_div
|
||||
ja LPrevent_2nd_overflow
|
||||
|
||||
cmpl SIGL(%ebx),%eax
|
||||
jb LDo_2nd_div
|
||||
|
||||
LPrevent_2nd_overflow:
|
||||
/* The numerator is greater or equal, would cause overflow */
|
||||
/* prevent overflow */
|
||||
subl SIGL(%ebx),%eax
|
||||
sbbl SIGH(%ebx),%edx
|
||||
movl %edx,FPU_accum_2
|
||||
movl %eax,FPU_accum_1
|
||||
|
||||
incl FPU_result_2 /* Reflect the subtraction in the answer */
|
||||
|
||||
#ifdef PARANOID
|
||||
je L_bugged_2 /* Can't bump the result to 1.0 */
|
||||
#endif /* PARANOID */
|
||||
|
||||
LDo_2nd_div:
|
||||
cmpl $0,%ecx /* augmented denom msw */
|
||||
jnz LSecond_div_not_1
|
||||
|
||||
/* %ecx == 0, we are dividing by 1.0 */
|
||||
mov %edx,%eax
|
||||
jmp LSecond_div_done
|
||||
|
||||
LSecond_div_not_1:
|
||||
divl %ecx /* Divide the numerator by the denom ms dw */
|
||||
|
||||
LSecond_div_done:
|
||||
movl %eax,FPU_result_1 /* Put the result in the answer */
|
||||
|
||||
mull SIGH(%ebx) /* mul by the ms dw of the denom */
|
||||
|
||||
subl %eax,FPU_accum_1 /* Subtract from the num local reg */
|
||||
sbbl %edx,FPU_accum_2
|
||||
|
||||
#ifdef PARANOID
|
||||
jc L_bugged_2
|
||||
#endif /* PARANOID */
|
||||
|
||||
movl FPU_result_1,%eax /* Get the result back */
|
||||
mull SIGL(%ebx) /* now mul the ls dw of the denom */
|
||||
|
||||
subl %eax,FPU_accum_0 /* Subtract from the num local reg */
|
||||
sbbl %edx,FPU_accum_1 /* Subtract from the num local reg */
|
||||
sbbl $0,FPU_accum_2
|
||||
|
||||
#ifdef PARANOID
|
||||
jc L_bugged_2
|
||||
#endif /* PARANOID */
|
||||
|
||||
jz LDo_3rd_32_bits
|
||||
|
||||
#ifdef PARANOID
|
||||
cmpl $1,FPU_accum_2
|
||||
jne L_bugged_2
|
||||
#endif /* PARANOID */
|
||||
|
||||
/* need to subtract another once of the denom */
|
||||
movl SIGL(%ebx),%eax
|
||||
movl SIGH(%ebx),%edx
|
||||
subl %eax,FPU_accum_0 /* Subtract from the num local reg */
|
||||
sbbl %edx,FPU_accum_1
|
||||
sbbl $0,FPU_accum_2
|
||||
|
||||
#ifdef PARANOID
|
||||
jc L_bugged_2
|
||||
jne L_bugged_2
|
||||
#endif /* PARANOID */
|
||||
|
||||
addl $1,FPU_result_1 /* Correct the answer */
|
||||
adcl $0,FPU_result_2
|
||||
|
||||
#ifdef PARANOID
|
||||
jc L_bugged_2 /* Must check for non-zero result here */
|
||||
#endif /* PARANOID */
|
||||
|
||||
/*----------------------------------------------------------------------*/
|
||||
/* The division is essentially finished here, we just need to perform
|
||||
tidying operations.
|
||||
Deal with the 3rd 32 bits */
|
||||
LDo_3rd_32_bits:
|
||||
movl FPU_accum_1,%edx /* get the reduced num */
|
||||
movl FPU_accum_0,%eax
|
||||
|
||||
/* need to check for possible subsequent overflow */
|
||||
cmpl SIGH(%ebx),%edx /* denom */
|
||||
jb LRound_prep
|
||||
ja LPrevent_3rd_overflow
|
||||
|
||||
cmpl SIGL(%ebx),%eax /* denom */
|
||||
jb LRound_prep
|
||||
|
||||
LPrevent_3rd_overflow:
|
||||
/* prevent overflow */
|
||||
subl SIGL(%ebx),%eax
|
||||
sbbl SIGH(%ebx),%edx
|
||||
movl %edx,FPU_accum_1
|
||||
movl %eax,FPU_accum_0
|
||||
|
||||
addl $1,FPU_result_1 /* Reflect the subtraction in the answer */
|
||||
adcl $0,FPU_result_2
|
||||
jne LRound_prep
|
||||
jnc LRound_prep
|
||||
|
||||
/* This is a tricky spot, there is an overflow of the answer */
|
||||
movb $255,FPU_ovfl_flag /* Overflow -> 1.000 */
|
||||
|
||||
LRound_prep:
|
||||
/*
|
||||
* Prepare for rounding.
|
||||
* To test for rounding, we just need to compare 2*accum with the
|
||||
* denom.
|
||||
*/
|
||||
movl FPU_accum_0,%ecx
|
||||
movl FPU_accum_1,%edx
|
||||
movl %ecx,%eax
|
||||
orl %edx,%eax
|
||||
jz LRound_ovfl /* The accumulator contains zero. */
|
||||
|
||||
/* Multiply by 2 */
|
||||
clc
|
||||
rcll $1,%ecx
|
||||
rcll $1,%edx
|
||||
jc LRound_large /* No need to compare, denom smaller */
|
||||
|
||||
subl SIGL(%ebx),%ecx
|
||||
sbbl SIGH(%ebx),%edx
|
||||
jnc LRound_not_small
|
||||
|
||||
movl $0x70000000,%eax /* Denom was larger */
|
||||
jmp LRound_ovfl
|
||||
|
||||
LRound_not_small:
|
||||
jnz LRound_large
|
||||
|
||||
movl $0x80000000,%eax /* Remainder was exactly 1/2 denom */
|
||||
jmp LRound_ovfl
|
||||
|
||||
LRound_large:
|
||||
movl $0xff000000,%eax /* Denom was smaller */
|
||||
|
||||
LRound_ovfl:
|
||||
/* We are now ready to deal with rounding, but first we must get
|
||||
the bits properly aligned */
|
||||
testb $255,FPU_ovfl_flag /* was the num > denom ? */
|
||||
je LRound_precision
|
||||
|
||||
incw EXP(%edi)
|
||||
|
||||
/* shift the mantissa right one bit */
|
||||
stc /* Will set the ms bit */
|
||||
rcrl FPU_result_2
|
||||
rcrl FPU_result_1
|
||||
rcrl %eax
|
||||
|
||||
/* Round the result as required */
|
||||
LRound_precision:
|
||||
decw EXP(%edi) /* binary point between 1st & 2nd bits */
|
||||
|
||||
movl %eax,%edx
|
||||
movl FPU_result_1,%ebx
|
||||
movl FPU_result_2,%eax
|
||||
jmp fpu_reg_round
|
||||
|
||||
|
||||
#ifdef PARANOID
|
||||
/* The logic is wrong if we got here */
|
||||
L_bugged:
|
||||
pushl EX_INTERNAL|0x202
|
||||
call EXCEPTION
|
||||
pop %ebx
|
||||
jmp L_exit
|
||||
|
||||
L_bugged_1:
|
||||
pushl EX_INTERNAL|0x203
|
||||
call EXCEPTION
|
||||
pop %ebx
|
||||
jmp L_exit
|
||||
|
||||
L_bugged_2:
|
||||
pushl EX_INTERNAL|0x204
|
||||
call EXCEPTION
|
||||
pop %ebx
|
||||
jmp L_exit
|
||||
|
||||
L_exit:
|
||||
movl $-1,%eax
|
||||
popl %ebx
|
||||
popl %edi
|
||||
popl %esi
|
||||
|
||||
leave
|
||||
ret
|
||||
#endif /* PARANOID */
|
@ -1,148 +0,0 @@
|
||||
.file "reg_u_mul.S"
|
||||
/*---------------------------------------------------------------------------+
|
||||
| reg_u_mul.S |
|
||||
| |
|
||||
| Core multiplication routine |
|
||||
| |
|
||||
| Copyright (C) 1992,1993,1995,1997 |
|
||||
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
|
||||
| E-mail billm@suburbia.net |
|
||||
| |
|
||||
| |
|
||||
+---------------------------------------------------------------------------*/
|
||||
|
||||
/*---------------------------------------------------------------------------+
|
||||
| Basic multiplication routine. |
|
||||
| Does not check the resulting exponent for overflow/underflow |
|
||||
| |
|
||||
| FPU_u_mul(FPU_REG *a, FPU_REG *b, FPU_REG *c, unsigned int cw); |
|
||||
| |
|
||||
| Internal working is at approx 128 bits. |
|
||||
| Result is rounded to nearest 53 or 64 bits, using "nearest or even". |
|
||||
+---------------------------------------------------------------------------*/
|
||||
|
||||
#include "exception.h"
|
||||
#include "fpu_emu.h"
|
||||
#include "control_w.h"
|
||||
|
||||
|
||||
|
||||
#ifndef NON_REENTRANT_FPU
|
||||
/* Local storage on the stack: */
|
||||
#define FPU_accum_0 -4(%ebp) /* ms word */
|
||||
#define FPU_accum_1 -8(%ebp)
|
||||
|
||||
#else
|
||||
/* Local storage in a static area: */
|
||||
.data
|
||||
.align 4,0
|
||||
FPU_accum_0:
|
||||
.long 0
|
||||
FPU_accum_1:
|
||||
.long 0
|
||||
#endif /* NON_REENTRANT_FPU */
|
||||
|
||||
|
||||
.text
|
||||
ENTRY(FPU_u_mul)
|
||||
pushl %ebp
|
||||
movl %esp,%ebp
|
||||
#ifndef NON_REENTRANT_FPU
|
||||
subl $8,%esp
|
||||
#endif /* NON_REENTRANT_FPU */
|
||||
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
pushl %ebx
|
||||
|
||||
movl PARAM1,%esi
|
||||
movl PARAM2,%edi
|
||||
|
||||
#ifdef PARANOID
|
||||
testl $0x80000000,SIGH(%esi)
|
||||
jz L_bugged
|
||||
testl $0x80000000,SIGH(%edi)
|
||||
jz L_bugged
|
||||
#endif /* PARANOID */
|
||||
|
||||
xorl %ecx,%ecx
|
||||
xorl %ebx,%ebx
|
||||
|
||||
movl SIGL(%esi),%eax
|
||||
mull SIGL(%edi)
|
||||
movl %eax,FPU_accum_0
|
||||
movl %edx,FPU_accum_1
|
||||
|
||||
movl SIGL(%esi),%eax
|
||||
mull SIGH(%edi)
|
||||
addl %eax,FPU_accum_1
|
||||
adcl %edx,%ebx
|
||||
/* adcl $0,%ecx // overflow here is not possible */
|
||||
|
||||
movl SIGH(%esi),%eax
|
||||
mull SIGL(%edi)
|
||||
addl %eax,FPU_accum_1
|
||||
adcl %edx,%ebx
|
||||
adcl $0,%ecx
|
||||
|
||||
movl SIGH(%esi),%eax
|
||||
mull SIGH(%edi)
|
||||
addl %eax,%ebx
|
||||
adcl %edx,%ecx
|
||||
|
||||
/* Get the sum of the exponents. */
|
||||
movl PARAM6,%eax
|
||||
subl EXP_BIAS-1,%eax
|
||||
|
||||
/* Two denormals can cause an exponent underflow */
|
||||
cmpl EXP_WAY_UNDER,%eax
|
||||
jg Exp_not_underflow
|
||||
|
||||
/* Set to a really low value allow correct handling */
|
||||
movl EXP_WAY_UNDER,%eax
|
||||
|
||||
Exp_not_underflow:
|
||||
|
||||
/* Have now finished with the sources */
|
||||
movl PARAM3,%edi /* Point to the destination */
|
||||
movw %ax,EXP(%edi)
|
||||
|
||||
/* Now make sure that the result is normalized */
|
||||
testl $0x80000000,%ecx
|
||||
jnz LResult_Normalised
|
||||
|
||||
/* Normalize by shifting left one bit */
|
||||
shll $1,FPU_accum_0
|
||||
rcll $1,FPU_accum_1
|
||||
rcll $1,%ebx
|
||||
rcll $1,%ecx
|
||||
decw EXP(%edi)
|
||||
|
||||
LResult_Normalised:
|
||||
movl FPU_accum_0,%eax
|
||||
movl FPU_accum_1,%edx
|
||||
orl %eax,%eax
|
||||
jz L_extent_zero
|
||||
|
||||
orl $1,%edx
|
||||
|
||||
L_extent_zero:
|
||||
movl %ecx,%eax
|
||||
jmp fpu_reg_round
|
||||
|
||||
|
||||
#ifdef PARANOID
|
||||
L_bugged:
|
||||
pushl EX_INTERNAL|0x205
|
||||
call EXCEPTION
|
||||
pop %ebx
|
||||
jmp L_exit
|
||||
|
||||
L_exit:
|
||||
popl %ebx
|
||||
popl %edi
|
||||
popl %esi
|
||||
leave
|
||||
ret
|
||||
#endif /* PARANOID */
|
||||
|
@ -1,272 +0,0 @@
|
||||
.file "reg_u_sub.S"
|
||||
/*---------------------------------------------------------------------------+
|
||||
| reg_u_sub.S |
|
||||
| |
|
||||
| Core floating point subtraction routine. |
|
||||
| |
|
||||
| Copyright (C) 1992,1993,1995,1997 |
|
||||
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
|
||||
| E-mail billm@suburbia.net |
|
||||
| |
|
||||
| Call from C as: |
|
||||
| int FPU_u_sub(FPU_REG *arg1, FPU_REG *arg2, FPU_REG *answ, |
|
||||
| int control_w) |
|
||||
| Return value is the tag of the answer, or-ed with FPU_Exception if |
|
||||
| one was raised, or -1 on internal error. |
|
||||
| |
|
||||
+---------------------------------------------------------------------------*/
|
||||
|
||||
/*
|
||||
| Kernel subtraction routine FPU_u_sub(reg *arg1, reg *arg2, reg *answ).
|
||||
| Takes two valid reg f.p. numbers (TAG_Valid), which are
|
||||
| treated as unsigned numbers,
|
||||
| and returns their difference as a TAG_Valid or TAG_Zero f.p.
|
||||
| number.
|
||||
| The first number (arg1) must be the larger.
|
||||
| The returned number is normalized.
|
||||
| Basic checks are performed if PARANOID is defined.
|
||||
*/
|
||||
|
||||
#include "exception.h"
|
||||
#include "fpu_emu.h"
|
||||
#include "control_w.h"
|
||||
|
||||
.text
|
||||
ENTRY(FPU_u_sub)
|
||||
pushl %ebp
|
||||
movl %esp,%ebp
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
pushl %ebx
|
||||
|
||||
movl PARAM1,%esi /* source 1 */
|
||||
movl PARAM2,%edi /* source 2 */
|
||||
|
||||
movl PARAM6,%ecx
|
||||
subl PARAM7,%ecx /* exp1 - exp2 */
|
||||
|
||||
#ifdef PARANOID
|
||||
/* source 2 is always smaller than source 1 */
|
||||
js L_bugged_1
|
||||
|
||||
testl $0x80000000,SIGH(%edi) /* The args are assumed to be be normalized */
|
||||
je L_bugged_2
|
||||
|
||||
testl $0x80000000,SIGH(%esi)
|
||||
je L_bugged_2
|
||||
#endif /* PARANOID */
|
||||
|
||||
/*--------------------------------------+
|
||||
| Form a register holding the |
|
||||
| smaller number |
|
||||
+--------------------------------------*/
|
||||
movl SIGH(%edi),%eax /* register ms word */
|
||||
movl SIGL(%edi),%ebx /* register ls word */
|
||||
|
||||
movl PARAM3,%edi /* destination */
|
||||
movl PARAM6,%edx
|
||||
movw %dx,EXP(%edi) /* Copy exponent to destination */
|
||||
|
||||
xorl %edx,%edx /* register extension */
|
||||
|
||||
/*--------------------------------------+
|
||||
| Shift the temporary register |
|
||||
| right the required number of |
|
||||
| places. |
|
||||
+--------------------------------------*/
|
||||
|
||||
cmpw $32,%cx /* shrd only works for 0..31 bits */
|
||||
jnc L_more_than_31
|
||||
|
||||
/* less than 32 bits */
|
||||
shrd %cl,%ebx,%edx
|
||||
shrd %cl,%eax,%ebx
|
||||
shr %cl,%eax
|
||||
jmp L_shift_done
|
||||
|
||||
L_more_than_31:
|
||||
cmpw $64,%cx
|
||||
jnc L_more_than_63
|
||||
|
||||
subb $32,%cl
|
||||
jz L_exactly_32
|
||||
|
||||
shrd %cl,%eax,%edx
|
||||
shr %cl,%eax
|
||||
orl %ebx,%ebx
|
||||
jz L_more_31_no_low /* none of the lowest bits is set */
|
||||
|
||||
orl $1,%edx /* record the fact in the extension */
|
||||
|
||||
L_more_31_no_low:
|
||||
movl %eax,%ebx
|
||||
xorl %eax,%eax
|
||||
jmp L_shift_done
|
||||
|
||||
L_exactly_32:
|
||||
movl %ebx,%edx
|
||||
movl %eax,%ebx
|
||||
xorl %eax,%eax
|
||||
jmp L_shift_done
|
||||
|
||||
L_more_than_63:
|
||||
cmpw $65,%cx
|
||||
jnc L_more_than_64
|
||||
|
||||
/* Shift right by 64 bits */
|
||||
movl %eax,%edx
|
||||
orl %ebx,%ebx
|
||||
jz L_more_63_no_low
|
||||
|
||||
orl $1,%edx
|
||||
jmp L_more_63_no_low
|
||||
|
||||
L_more_than_64:
|
||||
jne L_more_than_65
|
||||
|
||||
/* Shift right by 65 bits */
|
||||
/* Carry is clear if we get here */
|
||||
movl %eax,%edx
|
||||
rcrl %edx
|
||||
jnc L_shift_65_nc
|
||||
|
||||
orl $1,%edx
|
||||
jmp L_more_63_no_low
|
||||
|
||||
L_shift_65_nc:
|
||||
orl %ebx,%ebx
|
||||
jz L_more_63_no_low
|
||||
|
||||
orl $1,%edx
|
||||
jmp L_more_63_no_low
|
||||
|
||||
L_more_than_65:
|
||||
movl $1,%edx /* The shifted nr always at least one '1' */
|
||||
|
||||
L_more_63_no_low:
|
||||
xorl %ebx,%ebx
|
||||
xorl %eax,%eax
|
||||
|
||||
L_shift_done:
|
||||
L_subtr:
|
||||
/*------------------------------+
|
||||
| Do the subtraction |
|
||||
+------------------------------*/
|
||||
xorl %ecx,%ecx
|
||||
subl %edx,%ecx
|
||||
movl %ecx,%edx
|
||||
movl SIGL(%esi),%ecx
|
||||
sbbl %ebx,%ecx
|
||||
movl %ecx,%ebx
|
||||
movl SIGH(%esi),%ecx
|
||||
sbbl %eax,%ecx
|
||||
movl %ecx,%eax
|
||||
|
||||
#ifdef PARANOID
|
||||
/* We can never get a borrow */
|
||||
jc L_bugged
|
||||
#endif /* PARANOID */
|
||||
|
||||
/*--------------------------------------+
|
||||
| Normalize the result |
|
||||
+--------------------------------------*/
|
||||
testl $0x80000000,%eax
|
||||
jnz L_round /* no shifting needed */
|
||||
|
||||
orl %eax,%eax
|
||||
jnz L_shift_1 /* shift left 1 - 31 bits */
|
||||
|
||||
orl %ebx,%ebx
|
||||
jnz L_shift_32 /* shift left 32 - 63 bits */
|
||||
|
||||
/*
|
||||
* A rare case, the only one which is non-zero if we got here
|
||||
* is: 1000000 .... 0000
|
||||
* -0111111 .... 1111 1
|
||||
* --------------------
|
||||
* 0000000 .... 0000 1
|
||||
*/
|
||||
|
||||
cmpl $0x80000000,%edx
|
||||
jnz L_must_be_zero
|
||||
|
||||
/* Shift left 64 bits */
|
||||
subw $64,EXP(%edi)
|
||||
xchg %edx,%eax
|
||||
jmp fpu_reg_round
|
||||
|
||||
L_must_be_zero:
|
||||
#ifdef PARANOID
|
||||
orl %edx,%edx
|
||||
jnz L_bugged_3
|
||||
#endif /* PARANOID */
|
||||
|
||||
/* The result is zero */
|
||||
movw $0,EXP(%edi) /* exponent */
|
||||
movl $0,SIGL(%edi)
|
||||
movl $0,SIGH(%edi)
|
||||
movl TAG_Zero,%eax
|
||||
jmp L_exit
|
||||
|
||||
L_shift_32:
|
||||
movl %ebx,%eax
|
||||
movl %edx,%ebx
|
||||
movl $0,%edx
|
||||
subw $32,EXP(%edi) /* Can get underflow here */
|
||||
|
||||
/* We need to shift left by 1 - 31 bits */
|
||||
L_shift_1:
|
||||
bsrl %eax,%ecx /* get the required shift in %ecx */
|
||||
subl $31,%ecx
|
||||
negl %ecx
|
||||
shld %cl,%ebx,%eax
|
||||
shld %cl,%edx,%ebx
|
||||
shl %cl,%edx
|
||||
subw %cx,EXP(%edi) /* Can get underflow here */
|
||||
|
||||
L_round:
|
||||
jmp fpu_reg_round /* Round the result */
|
||||
|
||||
|
||||
#ifdef PARANOID
|
||||
L_bugged_1:
|
||||
pushl EX_INTERNAL|0x206
|
||||
call EXCEPTION
|
||||
pop %ebx
|
||||
jmp L_error_exit
|
||||
|
||||
L_bugged_2:
|
||||
pushl EX_INTERNAL|0x209
|
||||
call EXCEPTION
|
||||
pop %ebx
|
||||
jmp L_error_exit
|
||||
|
||||
L_bugged_3:
|
||||
pushl EX_INTERNAL|0x210
|
||||
call EXCEPTION
|
||||
pop %ebx
|
||||
jmp L_error_exit
|
||||
|
||||
L_bugged_4:
|
||||
pushl EX_INTERNAL|0x211
|
||||
call EXCEPTION
|
||||
pop %ebx
|
||||
jmp L_error_exit
|
||||
|
||||
L_bugged:
|
||||
pushl EX_INTERNAL|0x212
|
||||
call EXCEPTION
|
||||
pop %ebx
|
||||
jmp L_error_exit
|
||||
|
||||
L_error_exit:
|
||||
movl $-1,%eax
|
||||
|
||||
#endif /* PARANOID */
|
||||
|
||||
L_exit:
|
||||
popl %ebx
|
||||
popl %edi
|
||||
popl %esi
|
||||
leave
|
||||
ret
|
@ -1,141 +0,0 @@
|
||||
/*---------------------------------------------------------------------------+
|
||||
| round_Xsig.S |
|
||||
| |
|
||||
| Copyright (C) 1992,1993,1994,1995 |
|
||||
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
|
||||
| Australia. E-mail billm@jacobi.maths.monash.edu.au |
|
||||
| |
|
||||
| Normalize and round a 12 byte quantity. |
|
||||
| Call from C as: |
|
||||
| int round_Xsig(Xsig *n) |
|
||||
| |
|
||||
| Normalize a 12 byte quantity. |
|
||||
| Call from C as: |
|
||||
| int norm_Xsig(Xsig *n) |
|
||||
| |
|
||||
| Each function returns the size of the shift (nr of bits). |
|
||||
| |
|
||||
+---------------------------------------------------------------------------*/
|
||||
.file "round_Xsig.S"
|
||||
|
||||
#include "fpu_emu.h"
|
||||
|
||||
|
||||
.text
|
||||
ENTRY(round_Xsig)
|
||||
pushl %ebp
|
||||
movl %esp,%ebp
|
||||
pushl %ebx /* Reserve some space */
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
|
||||
movl PARAM1,%esi
|
||||
|
||||
movl 8(%esi),%edx
|
||||
movl 4(%esi),%ebx
|
||||
movl (%esi),%eax
|
||||
|
||||
movl $0,-4(%ebp)
|
||||
|
||||
orl %edx,%edx /* ms bits */
|
||||
js L_round /* Already normalized */
|
||||
jnz L_shift_1 /* Shift left 1 - 31 bits */
|
||||
|
||||
movl %ebx,%edx
|
||||
movl %eax,%ebx
|
||||
xorl %eax,%eax
|
||||
movl $-32,-4(%ebp)
|
||||
|
||||
/* We need to shift left by 1 - 31 bits */
|
||||
L_shift_1:
|
||||
bsrl %edx,%ecx /* get the required shift in %ecx */
|
||||
subl $31,%ecx
|
||||
negl %ecx
|
||||
subl %ecx,-4(%ebp)
|
||||
shld %cl,%ebx,%edx
|
||||
shld %cl,%eax,%ebx
|
||||
shl %cl,%eax
|
||||
|
||||
L_round:
|
||||
testl $0x80000000,%eax
|
||||
jz L_exit
|
||||
|
||||
addl $1,%ebx
|
||||
adcl $0,%edx
|
||||
jnz L_exit
|
||||
|
||||
movl $0x80000000,%edx
|
||||
incl -4(%ebp)
|
||||
|
||||
L_exit:
|
||||
movl %edx,8(%esi)
|
||||
movl %ebx,4(%esi)
|
||||
movl %eax,(%esi)
|
||||
|
||||
movl -4(%ebp),%eax
|
||||
|
||||
popl %esi
|
||||
popl %ebx
|
||||
leave
|
||||
ret
|
||||
|
||||
|
||||
|
||||
|
||||
ENTRY(norm_Xsig)
|
||||
pushl %ebp
|
||||
movl %esp,%ebp
|
||||
pushl %ebx /* Reserve some space */
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
|
||||
movl PARAM1,%esi
|
||||
|
||||
movl 8(%esi),%edx
|
||||
movl 4(%esi),%ebx
|
||||
movl (%esi),%eax
|
||||
|
||||
movl $0,-4(%ebp)
|
||||
|
||||
orl %edx,%edx /* ms bits */
|
||||
js L_n_exit /* Already normalized */
|
||||
jnz L_n_shift_1 /* Shift left 1 - 31 bits */
|
||||
|
||||
movl %ebx,%edx
|
||||
movl %eax,%ebx
|
||||
xorl %eax,%eax
|
||||
movl $-32,-4(%ebp)
|
||||
|
||||
orl %edx,%edx /* ms bits */
|
||||
js L_n_exit /* Normalized now */
|
||||
jnz L_n_shift_1 /* Shift left 1 - 31 bits */
|
||||
|
||||
movl %ebx,%edx
|
||||
movl %eax,%ebx
|
||||
xorl %eax,%eax
|
||||
addl $-32,-4(%ebp)
|
||||
jmp L_n_exit /* Might not be normalized,
|
||||
but shift no more. */
|
||||
|
||||
/* We need to shift left by 1 - 31 bits */
|
||||
L_n_shift_1:
|
||||
bsrl %edx,%ecx /* get the required shift in %ecx */
|
||||
subl $31,%ecx
|
||||
negl %ecx
|
||||
subl %ecx,-4(%ebp)
|
||||
shld %cl,%ebx,%edx
|
||||
shld %cl,%eax,%ebx
|
||||
shl %cl,%eax
|
||||
|
||||
L_n_exit:
|
||||
movl %edx,8(%esi)
|
||||
movl %ebx,4(%esi)
|
||||
movl %eax,(%esi)
|
||||
|
||||
movl -4(%ebp),%eax
|
||||
|
||||
popl %esi
|
||||
popl %ebx
|
||||
leave
|
||||
ret
|
||||
|
@ -1,87 +0,0 @@
|
||||
.file "shr_Xsig.S"
|
||||
/*---------------------------------------------------------------------------+
|
||||
| shr_Xsig.S |
|
||||
| |
|
||||
| 12 byte right shift function |
|
||||
| |
|
||||
| Copyright (C) 1992,1994,1995 |
|
||||
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
|
||||
| Australia. E-mail billm@jacobi.maths.monash.edu.au |
|
||||
| |
|
||||
| Call from C as: |
|
||||
| void shr_Xsig(Xsig *arg, unsigned nr) |
|
||||
| |
|
||||
| Extended shift right function. |
|
||||
| Fastest for small shifts. |
|
||||
| Shifts the 12 byte quantity pointed to by the first arg (arg) |
|
||||
| right by the number of bits specified by the second arg (nr). |
|
||||
| |
|
||||
+---------------------------------------------------------------------------*/
|
||||
|
||||
#include "fpu_emu.h"
|
||||
|
||||
.text
|
||||
ENTRY(shr_Xsig)
|
||||
push %ebp
|
||||
movl %esp,%ebp
|
||||
pushl %esi
|
||||
movl PARAM2,%ecx
|
||||
movl PARAM1,%esi
|
||||
cmpl $32,%ecx /* shrd only works for 0..31 bits */
|
||||
jnc L_more_than_31
|
||||
|
||||
/* less than 32 bits */
|
||||
pushl %ebx
|
||||
movl (%esi),%eax /* lsl */
|
||||
movl 4(%esi),%ebx /* midl */
|
||||
movl 8(%esi),%edx /* msl */
|
||||
shrd %cl,%ebx,%eax
|
||||
shrd %cl,%edx,%ebx
|
||||
shr %cl,%edx
|
||||
movl %eax,(%esi)
|
||||
movl %ebx,4(%esi)
|
||||
movl %edx,8(%esi)
|
||||
popl %ebx
|
||||
popl %esi
|
||||
leave
|
||||
ret
|
||||
|
||||
L_more_than_31:
|
||||
cmpl $64,%ecx
|
||||
jnc L_more_than_63
|
||||
|
||||
subb $32,%cl
|
||||
movl 4(%esi),%eax /* midl */
|
||||
movl 8(%esi),%edx /* msl */
|
||||
shrd %cl,%edx,%eax
|
||||
shr %cl,%edx
|
||||
movl %eax,(%esi)
|
||||
movl %edx,4(%esi)
|
||||
movl $0,8(%esi)
|
||||
popl %esi
|
||||
leave
|
||||
ret
|
||||
|
||||
L_more_than_63:
|
||||
cmpl $96,%ecx
|
||||
jnc L_more_than_95
|
||||
|
||||
subb $64,%cl
|
||||
movl 8(%esi),%eax /* msl */
|
||||
shr %cl,%eax
|
||||
xorl %edx,%edx
|
||||
movl %eax,(%esi)
|
||||
movl %edx,4(%esi)
|
||||
movl %edx,8(%esi)
|
||||
popl %esi
|
||||
leave
|
||||
ret
|
||||
|
||||
L_more_than_95:
|
||||
xorl %eax,%eax
|
||||
movl %eax,(%esi)
|
||||
movl %eax,4(%esi)
|
||||
movl %eax,8(%esi)
|
||||
popl %esi
|
||||
leave
|
||||
ret
|
@ -1,6 +0,0 @@
|
||||
#ifndef _I386_MATH_EMU_H
|
||||
#define _I386_MATH_EMU_H
|
||||
|
||||
// Don't really need anything in here.
|
||||
|
||||
#endif
|
@ -1,7 +0,0 @@
|
||||
#ifndef _I386_TYPES_H
|
||||
#define _I386_TYPES_H
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
#endif
|
||||
|
||||
#endif /* _I386_TYPES_H */
|
@ -1,6 +0,0 @@
|
||||
#ifndef _I386_UACCESS_H
|
||||
#define _I386_UACCESS_H
|
||||
|
||||
|
||||
|
||||
#endif
|
@ -1,204 +0,0 @@
|
||||
.file "wm_shrx.S"
|
||||
/*---------------------------------------------------------------------------+
|
||||
| wm_shrx.S |
|
||||
| |
|
||||
| 64 bit right shift functions |
|
||||
| |
|
||||
| Copyright (C) 1992,1995 |
|
||||
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
|
||||
| Australia. E-mail billm@jacobi.maths.monash.edu.au |
|
||||
| |
|
||||
| Call from C as: |
|
||||
| unsigned FPU_shrx(void *arg1, unsigned arg2) |
|
||||
| and |
|
||||
| unsigned FPU_shrxs(void *arg1, unsigned arg2) |
|
||||
| |
|
||||
+---------------------------------------------------------------------------*/
|
||||
|
||||
#include "fpu_emu.h"
|
||||
|
||||
.text
|
||||
/*---------------------------------------------------------------------------+
|
||||
| unsigned FPU_shrx(void *arg1, unsigned arg2) |
|
||||
| |
|
||||
| Extended shift right function. |
|
||||
| Fastest for small shifts. |
|
||||
| Shifts the 64 bit quantity pointed to by the first arg (arg1) |
|
||||
| right by the number of bits specified by the second arg (arg2). |
|
||||
| Forms a 96 bit quantity from the 64 bit arg and eax: |
|
||||
| [ 64 bit arg ][ eax ] |
|
||||
| shift right ---------> |
|
||||
| The eax register is initialized to 0 before the shifting. |
|
||||
| Results returned in the 64 bit arg and eax. |
|
||||
+---------------------------------------------------------------------------*/
|
||||
|
||||
ENTRY(FPU_shrx)
|
||||
push %ebp
|
||||
movl %esp,%ebp
|
||||
pushl %esi
|
||||
movl PARAM2,%ecx
|
||||
movl PARAM1,%esi
|
||||
cmpl $32,%ecx /* shrd only works for 0..31 bits */
|
||||
jnc L_more_than_31
|
||||
|
||||
/* less than 32 bits */
|
||||
pushl %ebx
|
||||
movl (%esi),%ebx /* lsl */
|
||||
movl 4(%esi),%edx /* msl */
|
||||
xorl %eax,%eax /* extension */
|
||||
shrd %cl,%ebx,%eax
|
||||
shrd %cl,%edx,%ebx
|
||||
shr %cl,%edx
|
||||
movl %ebx,(%esi)
|
||||
movl %edx,4(%esi)
|
||||
popl %ebx
|
||||
popl %esi
|
||||
leave
|
||||
ret
|
||||
|
||||
L_more_than_31:
|
||||
cmpl $64,%ecx
|
||||
jnc L_more_than_63
|
||||
|
||||
subb $32,%cl
|
||||
movl (%esi),%eax /* lsl */
|
||||
movl 4(%esi),%edx /* msl */
|
||||
shrd %cl,%edx,%eax
|
||||
shr %cl,%edx
|
||||
movl %edx,(%esi)
|
||||
movl $0,4(%esi)
|
||||
popl %esi
|
||||
leave
|
||||
ret
|
||||
|
||||
L_more_than_63:
|
||||
cmpl $96,%ecx
|
||||
jnc L_more_than_95
|
||||
|
||||
subb $64,%cl
|
||||
movl 4(%esi),%eax /* msl */
|
||||
shr %cl,%eax
|
||||
xorl %edx,%edx
|
||||
movl %edx,(%esi)
|
||||
movl %edx,4(%esi)
|
||||
popl %esi
|
||||
leave
|
||||
ret
|
||||
|
||||
L_more_than_95:
|
||||
xorl %eax,%eax
|
||||
movl %eax,(%esi)
|
||||
movl %eax,4(%esi)
|
||||
popl %esi
|
||||
leave
|
||||
ret
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------+
|
||||
| unsigned FPU_shrxs(void *arg1, unsigned arg2) |
|
||||
| |
|
||||
| Extended shift right function (optimized for small floating point |
|
||||
| integers). |
|
||||
| Shifts the 64 bit quantity pointed to by the first arg (arg1) |
|
||||
| right by the number of bits specified by the second arg (arg2). |
|
||||
| Forms a 96 bit quantity from the 64 bit arg and eax: |
|
||||
| [ 64 bit arg ][ eax ] |
|
||||
| shift right ---------> |
|
||||
| The eax register is initialized to 0 before the shifting. |
|
||||
| The lower 8 bits of eax are lost and replaced by a flag which is |
|
||||
| set (to 0x01) if any bit, apart from the first one, is set in the |
|
||||
| part which has been shifted out of the arg. |
|
||||
| Results returned in the 64 bit arg and eax. |
|
||||
+---------------------------------------------------------------------------*/
|
||||
ENTRY(FPU_shrxs)
|
||||
push %ebp
|
||||
movl %esp,%ebp
|
||||
pushl %esi
|
||||
pushl %ebx
|
||||
movl PARAM2,%ecx
|
||||
movl PARAM1,%esi
|
||||
cmpl $64,%ecx /* shrd only works for 0..31 bits */
|
||||
jnc Ls_more_than_63
|
||||
|
||||
cmpl $32,%ecx /* shrd only works for 0..31 bits */
|
||||
jc Ls_less_than_32
|
||||
|
||||
/* We got here without jumps by assuming that the most common requirement
|
||||
is for small integers */
|
||||
/* Shift by [32..63] bits */
|
||||
subb $32,%cl
|
||||
movl (%esi),%eax /* lsl */
|
||||
movl 4(%esi),%edx /* msl */
|
||||
xorl %ebx,%ebx
|
||||
shrd %cl,%eax,%ebx
|
||||
shrd %cl,%edx,%eax
|
||||
shr %cl,%edx
|
||||
orl %ebx,%ebx /* test these 32 bits */
|
||||
setne %bl
|
||||
test $0x7fffffff,%eax /* and 31 bits here */
|
||||
setne %bh
|
||||
orw %bx,%bx /* Any of the 63 bit set ? */
|
||||
setne %al
|
||||
movl %edx,(%esi)
|
||||
movl $0,4(%esi)
|
||||
popl %ebx
|
||||
popl %esi
|
||||
leave
|
||||
ret
|
||||
|
||||
/* Shift by [0..31] bits */
|
||||
Ls_less_than_32:
|
||||
movl (%esi),%ebx /* lsl */
|
||||
movl 4(%esi),%edx /* msl */
|
||||
xorl %eax,%eax /* extension */
|
||||
shrd %cl,%ebx,%eax
|
||||
shrd %cl,%edx,%ebx
|
||||
shr %cl,%edx
|
||||
test $0x7fffffff,%eax /* only need to look at eax here */
|
||||
setne %al
|
||||
movl %ebx,(%esi)
|
||||
movl %edx,4(%esi)
|
||||
popl %ebx
|
||||
popl %esi
|
||||
leave
|
||||
ret
|
||||
|
||||
/* Shift by [64..95] bits */
|
||||
Ls_more_than_63:
|
||||
cmpl $96,%ecx
|
||||
jnc Ls_more_than_95
|
||||
|
||||
subb $64,%cl
|
||||
movl (%esi),%ebx /* lsl */
|
||||
movl 4(%esi),%eax /* msl */
|
||||
xorl %edx,%edx /* extension */
|
||||
shrd %cl,%ebx,%edx
|
||||
shrd %cl,%eax,%ebx
|
||||
shr %cl,%eax
|
||||
orl %ebx,%edx
|
||||
setne %bl
|
||||
test $0x7fffffff,%eax /* only need to look at eax here */
|
||||
setne %bh
|
||||
orw %bx,%bx
|
||||
setne %al
|
||||
xorl %edx,%edx
|
||||
movl %edx,(%esi) /* set to zero */
|
||||
movl %edx,4(%esi) /* set to zero */
|
||||
popl %ebx
|
||||
popl %esi
|
||||
leave
|
||||
ret
|
||||
|
||||
Ls_more_than_95:
|
||||
/* Shift by [96..inf) bits */
|
||||
xorl %eax,%eax
|
||||
movl (%esi),%ebx
|
||||
orl 4(%esi),%ebx
|
||||
setne %al
|
||||
xorl %ebx,%ebx
|
||||
movl %ebx,(%esi)
|
||||
movl %ebx,4(%esi)
|
||||
popl %ebx
|
||||
popl %esi
|
||||
leave
|
||||
ret
|
@ -1,470 +0,0 @@
|
||||
.file "wm_sqrt.S"
|
||||
/*---------------------------------------------------------------------------+
|
||||
| wm_sqrt.S |
|
||||
| |
|
||||
| Fixed point arithmetic square root evaluation. |
|
||||
| |
|
||||
| Copyright (C) 1992,1993,1995,1997 |
|
||||
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
|
||||
| Australia. E-mail billm@suburbia.net |
|
||||
| |
|
||||
| Call from C as: |
|
||||
| int wm_sqrt(FPU_REG *n, unsigned int control_word) |
|
||||
| |
|
||||
+---------------------------------------------------------------------------*/
|
||||
|
||||
/*---------------------------------------------------------------------------+
|
||||
| wm_sqrt(FPU_REG *n, unsigned int control_word) |
|
||||
| returns the square root of n in n. |
|
||||
| |
|
||||
| Use Newton's method to compute the square root of a number, which must |
|
||||
| be in the range [1.0 .. 4.0), to 64 bits accuracy. |
|
||||
| Does not check the sign or tag of the argument. |
|
||||
| Sets the exponent, but not the sign or tag of the result. |
|
||||
| |
|
||||
| The guess is kept in %esi:%edi |
|
||||
+---------------------------------------------------------------------------*/
|
||||
|
||||
#include "exception.h"
|
||||
#include "fpu_emu.h"
|
||||
|
||||
|
||||
#ifndef NON_REENTRANT_FPU
|
||||
/* Local storage on the stack: */
|
||||
#define FPU_accum_3 -4(%ebp) /* ms word */
|
||||
#define FPU_accum_2 -8(%ebp)
|
||||
#define FPU_accum_1 -12(%ebp)
|
||||
#define FPU_accum_0 -16(%ebp)
|
||||
|
||||
/*
|
||||
* The de-normalised argument:
|
||||
* sq_2 sq_1 sq_0
|
||||
* b b b b b b b ... b b b b b b .... b b b b 0 0 0 ... 0
|
||||
* ^ binary point here
|
||||
*/
|
||||
#define FPU_fsqrt_arg_2 -20(%ebp) /* ms word */
|
||||
#define FPU_fsqrt_arg_1 -24(%ebp)
|
||||
#define FPU_fsqrt_arg_0 -28(%ebp) /* ls word, at most the ms bit is set */
|
||||
|
||||
#else
|
||||
/* Local storage in a static area: */
|
||||
.data
|
||||
.align 4,0
|
||||
FPU_accum_3:
|
||||
.long 0 /* ms word */
|
||||
FPU_accum_2:
|
||||
.long 0
|
||||
FPU_accum_1:
|
||||
.long 0
|
||||
FPU_accum_0:
|
||||
.long 0
|
||||
|
||||
/* The de-normalised argument:
|
||||
sq_2 sq_1 sq_0
|
||||
b b b b b b b ... b b b b b b .... b b b b 0 0 0 ... 0
|
||||
^ binary point here
|
||||
*/
|
||||
FPU_fsqrt_arg_2:
|
||||
.long 0 /* ms word */
|
||||
FPU_fsqrt_arg_1:
|
||||
.long 0
|
||||
FPU_fsqrt_arg_0:
|
||||
.long 0 /* ls word, at most the ms bit is set */
|
||||
#endif /* NON_REENTRANT_FPU */
|
||||
|
||||
|
||||
.text
|
||||
ENTRY(wm_sqrt)
|
||||
pushl %ebp
|
||||
movl %esp,%ebp
|
||||
#ifndef NON_REENTRANT_FPU
|
||||
subl $28,%esp
|
||||
#endif /* NON_REENTRANT_FPU */
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
pushl %ebx
|
||||
|
||||
movl PARAM1,%esi
|
||||
|
||||
movl SIGH(%esi),%eax
|
||||
movl SIGL(%esi),%ecx
|
||||
xorl %edx,%edx
|
||||
|
||||
/* We use a rough linear estimate for the first guess.. */
|
||||
|
||||
cmpw EXP_BIAS,EXP(%esi)
|
||||
jnz sqrt_arg_ge_2
|
||||
|
||||
shrl $1,%eax /* arg is in the range [1.0 .. 2.0) */
|
||||
rcrl $1,%ecx
|
||||
rcrl $1,%edx
|
||||
|
||||
sqrt_arg_ge_2:
|
||||
/* From here on, n is never accessed directly again until it is
|
||||
replaced by the answer. */
|
||||
|
||||
movl %eax,FPU_fsqrt_arg_2 /* ms word of n */
|
||||
movl %ecx,FPU_fsqrt_arg_1
|
||||
movl %edx,FPU_fsqrt_arg_0
|
||||
|
||||
/* Make a linear first estimate */
|
||||
shrl $1,%eax
|
||||
addl $0x40000000,%eax
|
||||
movl $0xaaaaaaaa,%ecx
|
||||
mull %ecx
|
||||
shll %edx /* max result was 7fff... */
|
||||
testl $0x80000000,%edx /* but min was 3fff... */
|
||||
jnz sqrt_prelim_no_adjust
|
||||
|
||||
movl $0x80000000,%edx /* round up */
|
||||
|
||||
sqrt_prelim_no_adjust:
|
||||
movl %edx,%esi /* Our first guess */
|
||||
|
||||
/* We have now computed (approx) (2 + x) / 3, which forms the basis
|
||||
for a few iterations of Newton's method */
|
||||
|
||||
movl FPU_fsqrt_arg_2,%ecx /* ms word */
|
||||
|
||||
/*
|
||||
* From our initial estimate, three iterations are enough to get us
|
||||
* to 30 bits or so. This will then allow two iterations at better
|
||||
* precision to complete the process.
|
||||
*/
|
||||
|
||||
/* Compute (g + n/g)/2 at each iteration (g is the guess). */
|
||||
shrl %ecx /* Doing this first will prevent a divide */
|
||||
/* overflow later. */
|
||||
|
||||
movl %ecx,%edx /* msw of the arg / 2 */
|
||||
divl %esi /* current estimate */
|
||||
shrl %esi /* divide by 2 */
|
||||
addl %eax,%esi /* the new estimate */
|
||||
|
||||
movl %ecx,%edx
|
||||
divl %esi
|
||||
shrl %esi
|
||||
addl %eax,%esi
|
||||
|
||||
movl %ecx,%edx
|
||||
divl %esi
|
||||
shrl %esi
|
||||
addl %eax,%esi
|
||||
|
||||
/*
|
||||
* Now that an estimate accurate to about 30 bits has been obtained (in %esi),
|
||||
* we improve it to 60 bits or so.
|
||||
*
|
||||
* The strategy from now on is to compute new estimates from
|
||||
* guess := guess + (n - guess^2) / (2 * guess)
|
||||
*/
|
||||
|
||||
/* First, find the square of the guess */
|
||||
movl %esi,%eax
|
||||
mull %esi
|
||||
/* guess^2 now in %edx:%eax */
|
||||
|
||||
movl FPU_fsqrt_arg_1,%ecx
|
||||
subl %ecx,%eax
|
||||
movl FPU_fsqrt_arg_2,%ecx /* ms word of normalized n */
|
||||
sbbl %ecx,%edx
|
||||
jnc sqrt_stage_2_positive
|
||||
|
||||
/* Subtraction gives a negative result,
|
||||
negate the result before division. */
|
||||
notl %edx
|
||||
notl %eax
|
||||
addl $1,%eax
|
||||
adcl $0,%edx
|
||||
|
||||
divl %esi
|
||||
movl %eax,%ecx
|
||||
|
||||
movl %edx,%eax
|
||||
divl %esi
|
||||
jmp sqrt_stage_2_finish
|
||||
|
||||
sqrt_stage_2_positive:
|
||||
divl %esi
|
||||
movl %eax,%ecx
|
||||
|
||||
movl %edx,%eax
|
||||
divl %esi
|
||||
|
||||
notl %ecx
|
||||
notl %eax
|
||||
addl $1,%eax
|
||||
adcl $0,%ecx
|
||||
|
||||
sqrt_stage_2_finish:
|
||||
sarl $1,%ecx /* divide by 2 */
|
||||
rcrl $1,%eax
|
||||
|
||||
/* Form the new estimate in %esi:%edi */
|
||||
movl %eax,%edi
|
||||
addl %ecx,%esi
|
||||
|
||||
jnz sqrt_stage_2_done /* result should be [1..2) */
|
||||
|
||||
#ifdef PARANOID
|
||||
/* It should be possible to get here only if the arg is ffff....ffff */
|
||||
cmp $0xffffffff,FPU_fsqrt_arg_1
|
||||
jnz sqrt_stage_2_error
|
||||
#endif /* PARANOID */
|
||||
|
||||
/* The best rounded result. */
|
||||
xorl %eax,%eax
|
||||
decl %eax
|
||||
movl %eax,%edi
|
||||
movl %eax,%esi
|
||||
movl $0x7fffffff,%eax
|
||||
jmp sqrt_round_result
|
||||
|
||||
#ifdef PARANOID
|
||||
sqrt_stage_2_error:
|
||||
pushl EX_INTERNAL|0x213
|
||||
call EXCEPTION
|
||||
#endif /* PARANOID */
|
||||
|
||||
sqrt_stage_2_done:
|
||||
|
||||
/* Now the square root has been computed to better than 60 bits. */
|
||||
|
||||
/* Find the square of the guess. */
|
||||
movl %edi,%eax /* ls word of guess */
|
||||
mull %edi
|
||||
movl %edx,FPU_accum_1
|
||||
|
||||
movl %esi,%eax
|
||||
mull %esi
|
||||
movl %edx,FPU_accum_3
|
||||
movl %eax,FPU_accum_2
|
||||
|
||||
movl %edi,%eax
|
||||
mull %esi
|
||||
addl %eax,FPU_accum_1
|
||||
adcl %edx,FPU_accum_2
|
||||
adcl $0,FPU_accum_3
|
||||
|
||||
/* movl %esi,%eax */
|
||||
/* mull %edi */
|
||||
addl %eax,FPU_accum_1
|
||||
adcl %edx,FPU_accum_2
|
||||
adcl $0,FPU_accum_3
|
||||
|
||||
/* guess^2 now in FPU_accum_3:FPU_accum_2:FPU_accum_1 */
|
||||
|
||||
movl FPU_fsqrt_arg_0,%eax /* get normalized n */
|
||||
subl %eax,FPU_accum_1
|
||||
movl FPU_fsqrt_arg_1,%eax
|
||||
sbbl %eax,FPU_accum_2
|
||||
movl FPU_fsqrt_arg_2,%eax /* ms word of normalized n */
|
||||
sbbl %eax,FPU_accum_3
|
||||
jnc sqrt_stage_3_positive
|
||||
|
||||
/* Subtraction gives a negative result,
|
||||
negate the result before division */
|
||||
notl FPU_accum_1
|
||||
notl FPU_accum_2
|
||||
notl FPU_accum_3
|
||||
addl $1,FPU_accum_1
|
||||
adcl $0,FPU_accum_2
|
||||
|
||||
#ifdef PARANOID
|
||||
adcl $0,FPU_accum_3 /* This must be zero */
|
||||
jz sqrt_stage_3_no_error
|
||||
|
||||
sqrt_stage_3_error:
|
||||
pushl EX_INTERNAL|0x207
|
||||
call EXCEPTION
|
||||
|
||||
sqrt_stage_3_no_error:
|
||||
#endif /* PARANOID */
|
||||
|
||||
movl FPU_accum_2,%edx
|
||||
movl FPU_accum_1,%eax
|
||||
divl %esi
|
||||
movl %eax,%ecx
|
||||
|
||||
movl %edx,%eax
|
||||
divl %esi
|
||||
|
||||
sarl $1,%ecx /* divide by 2 */
|
||||
rcrl $1,%eax
|
||||
|
||||
/* prepare to round the result */
|
||||
|
||||
addl %ecx,%edi
|
||||
adcl $0,%esi
|
||||
|
||||
jmp sqrt_stage_3_finished
|
||||
|
||||
sqrt_stage_3_positive:
|
||||
movl FPU_accum_2,%edx
|
||||
movl FPU_accum_1,%eax
|
||||
divl %esi
|
||||
movl %eax,%ecx
|
||||
|
||||
movl %edx,%eax
|
||||
divl %esi
|
||||
|
||||
sarl $1,%ecx /* divide by 2 */
|
||||
rcrl $1,%eax
|
||||
|
||||
/* prepare to round the result */
|
||||
|
||||
notl %eax /* Negate the correction term */
|
||||
notl %ecx
|
||||
addl $1,%eax
|
||||
adcl $0,%ecx /* carry here ==> correction == 0 */
|
||||
adcl $0xffffffff,%esi
|
||||
|
||||
addl %ecx,%edi
|
||||
adcl $0,%esi
|
||||
|
||||
sqrt_stage_3_finished:
|
||||
|
||||
/*
|
||||
* The result in %esi:%edi:%eax should be good to about 90 bits here,
|
||||
* and the rounding information here does not have sufficient accuracy
|
||||
* in a few rare cases.
|
||||
*/
|
||||
cmpl $0xffffffe0,%eax
|
||||
ja sqrt_near_exact_x
|
||||
|
||||
cmpl $0x00000020,%eax
|
||||
jb sqrt_near_exact
|
||||
|
||||
cmpl $0x7fffffe0,%eax
|
||||
jb sqrt_round_result
|
||||
|
||||
cmpl $0x80000020,%eax
|
||||
jb sqrt_get_more_precision
|
||||
|
||||
sqrt_round_result:
|
||||
/* Set up for rounding operations */
|
||||
movl %eax,%edx
|
||||
movl %esi,%eax
|
||||
movl %edi,%ebx
|
||||
movl PARAM1,%edi
|
||||
movw EXP_BIAS,EXP(%edi) /* Result is in [1.0 .. 2.0) */
|
||||
jmp fpu_reg_round
|
||||
|
||||
|
||||
sqrt_near_exact_x:
|
||||
/* First, the estimate must be rounded up. */
|
||||
addl $1,%edi
|
||||
adcl $0,%esi
|
||||
|
||||
sqrt_near_exact:
|
||||
/*
|
||||
* This is an easy case because x^1/2 is monotonic.
|
||||
* We need just find the square of our estimate, compare it
|
||||
* with the argument, and deduce whether our estimate is
|
||||
* above, below, or exact. We use the fact that the estimate
|
||||
* is known to be accurate to about 90 bits.
|
||||
*/
|
||||
movl %edi,%eax /* ls word of guess */
|
||||
mull %edi
|
||||
movl %edx,%ebx /* 2nd ls word of square */
|
||||
movl %eax,%ecx /* ls word of square */
|
||||
|
||||
movl %edi,%eax
|
||||
mull %esi
|
||||
addl %eax,%ebx
|
||||
addl %eax,%ebx
|
||||
|
||||
#ifdef PARANOID
|
||||
cmp $0xffffffb0,%ebx
|
||||
jb sqrt_near_exact_ok
|
||||
|
||||
cmp $0x00000050,%ebx
|
||||
ja sqrt_near_exact_ok
|
||||
|
||||
pushl EX_INTERNAL|0x214
|
||||
call EXCEPTION
|
||||
|
||||
sqrt_near_exact_ok:
|
||||
#endif /* PARANOID */
|
||||
|
||||
or %ebx,%ebx
|
||||
js sqrt_near_exact_small
|
||||
|
||||
jnz sqrt_near_exact_large
|
||||
|
||||
or %ebx,%edx
|
||||
jnz sqrt_near_exact_large
|
||||
|
||||
/* Our estimate is exactly the right answer */
|
||||
xorl %eax,%eax
|
||||
jmp sqrt_round_result
|
||||
|
||||
sqrt_near_exact_small:
|
||||
/* Our estimate is too small */
|
||||
movl $0x000000ff,%eax
|
||||
jmp sqrt_round_result
|
||||
|
||||
sqrt_near_exact_large:
|
||||
/* Our estimate is too large, we need to decrement it */
|
||||
subl $1,%edi
|
||||
sbbl $0,%esi
|
||||
movl $0xffffff00,%eax
|
||||
jmp sqrt_round_result
|
||||
|
||||
|
||||
sqrt_get_more_precision:
|
||||
/* This case is almost the same as the above, except we start
|
||||
with an extra bit of precision in the estimate. */
|
||||
stc /* The extra bit. */
|
||||
rcll $1,%edi /* Shift the estimate left one bit */
|
||||
rcll $1,%esi
|
||||
|
||||
movl %edi,%eax /* ls word of guess */
|
||||
mull %edi
|
||||
movl %edx,%ebx /* 2nd ls word of square */
|
||||
movl %eax,%ecx /* ls word of square */
|
||||
|
||||
movl %edi,%eax
|
||||
mull %esi
|
||||
addl %eax,%ebx
|
||||
addl %eax,%ebx
|
||||
|
||||
/* Put our estimate back to its original value */
|
||||
stc /* The ms bit. */
|
||||
rcrl $1,%esi /* Shift the estimate left one bit */
|
||||
rcrl $1,%edi
|
||||
|
||||
#ifdef PARANOID
|
||||
cmp $0xffffff60,%ebx
|
||||
jb sqrt_more_prec_ok
|
||||
|
||||
cmp $0x000000a0,%ebx
|
||||
ja sqrt_more_prec_ok
|
||||
|
||||
pushl EX_INTERNAL|0x215
|
||||
call EXCEPTION
|
||||
|
||||
sqrt_more_prec_ok:
|
||||
#endif /* PARANOID */
|
||||
|
||||
or %ebx,%ebx
|
||||
js sqrt_more_prec_small
|
||||
|
||||
jnz sqrt_more_prec_large
|
||||
|
||||
or %ebx,%ecx
|
||||
jnz sqrt_more_prec_large
|
||||
|
||||
/* Our estimate is exactly the right answer */
|
||||
movl $0x80000000,%eax
|
||||
jmp sqrt_round_result
|
||||
|
||||
sqrt_more_prec_small:
|
||||
/* Our estimate is too small */
|
||||
movl $0x800000ff,%eax
|
||||
jmp sqrt_round_result
|
||||
|
||||
sqrt_more_prec_large:
|
||||
/* Our estimate is too large */
|
||||
movl $0x7fffff00,%eax
|
||||
jmp sqrt_round_result
|
Loading…
Reference in New Issue
Block a user