Remove .S files from FPU directory, they not contains a lot of bugfixes and another chnages made in .C versions

Remove empty include files
This commit is contained in:
Stanislav Shwartsman 2003-04-22 16:16:33 +00:00
parent 548dd3a13c
commit 87c91a9d72
18 changed files with 0 additions and 3492 deletions

View File

@ -1,365 +0,0 @@
.file "div_Xsig.S"
/*---------------------------------------------------------------------------+
| div_Xsig.S |
| |
| Division subroutine for 96 bit quantities |
| |
| Copyright (C) 1994,1995 |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
| Australia. E-mail billm@jacobi.maths.monash.edu.au |
| |
| |
+---------------------------------------------------------------------------*/
/*---------------------------------------------------------------------------+
| Divide the 96 bit quantity pointed to by a, by that pointed to by b, and |
| put the 96 bit result at the location d. |
| |
| The result may not be accurate to 96 bits. It is intended for use where |
| a result better than 64 bits is required. The result should usually be |
| good to at least 94 bits. |
| The returned result is actually divided by one half. This is done to |
| prevent overflow. |
| |
| .aaaaaaaaaaaaaa / .bbbbbbbbbbbbb -> .dddddddddddd |
| |
| void div_Xsig(Xsig *a, Xsig *b, Xsig *dest) |
| |
+---------------------------------------------------------------------------*/
#include "exception.h"
#include "fpu_emu.h"
#define XsigLL(x) (x)
#define XsigL(x) 4(x)
#define XsigH(x) 8(x)
#ifndef NON_REENTRANT_FPU
/*
Local storage on the stack:
Accumulator: FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0
*/
#define FPU_accum_3 -4(%ebp)
#define FPU_accum_2 -8(%ebp)
#define FPU_accum_1 -12(%ebp)
#define FPU_accum_0 -16(%ebp)
#define FPU_result_3 -20(%ebp)
#define FPU_result_2 -24(%ebp)
#define FPU_result_1 -28(%ebp)
#else
.data
/*
Local storage in a static area:
Accumulator: FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0
*/
.align 4,0
FPU_accum_3:
.long 0
FPU_accum_2:
.long 0
FPU_accum_1:
.long 0
FPU_accum_0:
.long 0
FPU_result_3:
.long 0
FPU_result_2:
.long 0
FPU_result_1:
.long 0
#endif /* NON_REENTRANT_FPU */
.text
ENTRY(div_Xsig)
pushl %ebp
movl %esp,%ebp
#ifndef NON_REENTRANT_FPU
subl $28,%esp
#endif /* NON_REENTRANT_FPU */
pushl %esi
pushl %edi
pushl %ebx
movl PARAM1,%esi /* pointer to num */
movl PARAM2,%ebx /* pointer to denom */
#ifdef PARANOID
testl $0x80000000, XsigH(%ebx) /* Divisor */
je L_bugged
#endif /* PARANOID */
/*---------------------------------------------------------------------------+
| Divide: Return arg1/arg2 to arg3. |
| |
| The maximum returned value is (ignoring exponents) |
| .ffffffff ffffffff |
| ------------------ = 1.ffffffff fffffffe |
| .80000000 00000000 |
| and the minimum is |
| .80000000 00000000 |
| ------------------ = .80000000 00000001 (rounded) |
| .ffffffff ffffffff |
| |
+---------------------------------------------------------------------------*/
/* Save extended dividend in local register */
/* Divide by 2 to prevent overflow */
clc
movl XsigH(%esi),%eax
rcrl %eax
movl %eax,FPU_accum_3
movl XsigL(%esi),%eax
rcrl %eax
movl %eax,FPU_accum_2
movl XsigLL(%esi),%eax
rcrl %eax
movl %eax,FPU_accum_1
movl $0,%eax
rcrl %eax
movl %eax,FPU_accum_0
movl FPU_accum_2,%eax /* Get the current num */
movl FPU_accum_3,%edx
/*----------------------------------------------------------------------*/
/* Initialization done.
Do the first 32 bits. */
/* We will divide by a number which is too large */
movl XsigH(%ebx),%ecx
addl $1,%ecx
jnc LFirst_div_not_1
/* here we need to divide by 100000000h,
i.e., no division at all.. */
mov %edx,%eax
jmp LFirst_div_done
LFirst_div_not_1:
divl %ecx /* Divide the numerator by the augmented
denom ms dw */
LFirst_div_done:
movl %eax,FPU_result_3 /* Put the result in the answer */
mull XsigH(%ebx) /* mul by the ms dw of the denom */
subl %eax,FPU_accum_2 /* Subtract from the num local reg */
sbbl %edx,FPU_accum_3
movl FPU_result_3,%eax /* Get the result back */
mull XsigL(%ebx) /* now mul the ls dw of the denom */
subl %eax,FPU_accum_1 /* Subtract from the num local reg */
sbbl %edx,FPU_accum_2
sbbl $0,FPU_accum_3
je LDo_2nd_32_bits /* Must check for non-zero result here */
#ifdef PARANOID
jb L_bugged_1
#endif /* PARANOID */
/* need to subtract another once of the denom */
incl FPU_result_3 /* Correct the answer */
movl XsigL(%ebx),%eax
movl XsigH(%ebx),%edx
subl %eax,FPU_accum_1 /* Subtract from the num local reg */
sbbl %edx,FPU_accum_2
#ifdef PARANOID
sbbl $0,FPU_accum_3
jne L_bugged_1 /* Must check for non-zero result here */
#endif /* PARANOID */
/*----------------------------------------------------------------------*/
/* Half of the main problem is done, there is just a reduced numerator
to handle now.
Work with the second 32 bits, FPU_accum_0 not used from now on */
LDo_2nd_32_bits:
movl FPU_accum_2,%edx /* get the reduced num */
movl FPU_accum_1,%eax
/* need to check for possible subsequent overflow */
cmpl XsigH(%ebx),%edx
jb LDo_2nd_div
ja LPrevent_2nd_overflow
cmpl XsigL(%ebx),%eax
jb LDo_2nd_div
LPrevent_2nd_overflow:
/* The numerator is greater or equal, would cause overflow */
/* prevent overflow */
subl XsigL(%ebx),%eax
sbbl XsigH(%ebx),%edx
movl %edx,FPU_accum_2
movl %eax,FPU_accum_1
incl FPU_result_3 /* Reflect the subtraction in the answer */
#ifdef PARANOID
je L_bugged_2 /* Can't bump the result to 1.0 */
#endif /* PARANOID */
LDo_2nd_div:
cmpl $0,%ecx /* augmented denom msw */
jnz LSecond_div_not_1
/* %ecx == 0, we are dividing by 1.0 */
mov %edx,%eax
jmp LSecond_div_done
LSecond_div_not_1:
divl %ecx /* Divide the numerator by the denom ms dw */
LSecond_div_done:
movl %eax,FPU_result_2 /* Put the result in the answer */
mull XsigH(%ebx) /* mul by the ms dw of the denom */
subl %eax,FPU_accum_1 /* Subtract from the num local reg */
sbbl %edx,FPU_accum_2
#ifdef PARANOID
jc L_bugged_2
#endif /* PARANOID */
movl FPU_result_2,%eax /* Get the result back */
mull XsigL(%ebx) /* now mul the ls dw of the denom */
subl %eax,FPU_accum_0 /* Subtract from the num local reg */
sbbl %edx,FPU_accum_1 /* Subtract from the num local reg */
sbbl $0,FPU_accum_2
#ifdef PARANOID
jc L_bugged_2
#endif /* PARANOID */
jz LDo_3rd_32_bits
#ifdef PARANOID
cmpl $1,FPU_accum_2
jne L_bugged_2
#endif /* PARANOID */
/* need to subtract another once of the denom */
movl XsigL(%ebx),%eax
movl XsigH(%ebx),%edx
subl %eax,FPU_accum_0 /* Subtract from the num local reg */
sbbl %edx,FPU_accum_1
sbbl $0,FPU_accum_2
#ifdef PARANOID
jc L_bugged_2
jne L_bugged_2
#endif /* PARANOID */
addl $1,FPU_result_2 /* Correct the answer */
adcl $0,FPU_result_3
#ifdef PARANOID
jc L_bugged_2 /* Must check for non-zero result here */
#endif /* PARANOID */
/*----------------------------------------------------------------------*/
/* The division is essentially finished here, we just need to perform
tidying operations.
Deal with the 3rd 32 bits */
LDo_3rd_32_bits:
/* We use an approximation for the third 32 bits.
To take account of the 3rd 32 bits of the divisor
(call them del), we subtract del * (a/b) */
movl FPU_result_3,%eax /* a/b */
mull XsigLL(%ebx) /* del */
subl %edx,FPU_accum_1
/* A borrow indicates that the result is negative */
jnb LTest_over
movl XsigH(%ebx),%edx
addl %edx,FPU_accum_1
subl $1,FPU_result_2 /* Adjust the answer */
sbbl $0,FPU_result_3
/* The above addition might not have been enough, check again. */
movl FPU_accum_1,%edx /* get the reduced num */
cmpl XsigH(%ebx),%edx /* denom */
jb LDo_3rd_div
movl XsigH(%ebx),%edx
addl %edx,FPU_accum_1
subl $1,FPU_result_2 /* Adjust the answer */
sbbl $0,FPU_result_3
jmp LDo_3rd_div
LTest_over:
movl FPU_accum_1,%edx /* get the reduced num */
/* need to check for possible subsequent overflow */
cmpl XsigH(%ebx),%edx /* denom */
jb LDo_3rd_div
/* prevent overflow */
subl XsigH(%ebx),%edx
movl %edx,FPU_accum_1
addl $1,FPU_result_2 /* Reflect the subtraction in the answer */
adcl $0,FPU_result_3
LDo_3rd_div:
movl FPU_accum_0,%eax
movl FPU_accum_1,%edx
divl XsigH(%ebx)
movl %eax,FPU_result_1 /* Rough estimate of third word */
movl PARAM3,%esi /* pointer to answer */
movl FPU_result_1,%eax
movl %eax,XsigLL(%esi)
movl FPU_result_2,%eax
movl %eax,XsigL(%esi)
movl FPU_result_3,%eax
movl %eax,XsigH(%esi)
L_exit:
popl %ebx
popl %edi
popl %esi
leave
ret
#ifdef PARANOID
/* The logic is wrong if we got here */
L_bugged:
pushl EX_INTERNAL|0x240
call EXCEPTION
pop %ebx
jmp L_exit
L_bugged_1:
pushl EX_INTERNAL|0x241
call EXCEPTION
pop %ebx
jmp L_exit
L_bugged_2:
pushl EX_INTERNAL|0x242
call EXCEPTION
pop %ebx
jmp L_exit
#endif /* PARANOID */

View File

@ -1,47 +0,0 @@
.file "div_small.S"
/*---------------------------------------------------------------------------+
| div_small.S |
| |
| Divide a 64 bit integer by a 32 bit integer & return remainder. |
| |
| Copyright (C) 1992,1995 |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
| Australia. E-mail billm@jacobi.maths.monash.edu.au |
| |
| |
+---------------------------------------------------------------------------*/
/*---------------------------------------------------------------------------+
| unsigned long FPU_div_small(unsigned long long *x, unsigned long y) |
+---------------------------------------------------------------------------*/
#include "fpu_emu.h"
.text
ENTRY(FPU_div_small)
pushl %ebp
movl %esp,%ebp
pushl %esi
movl PARAM1,%esi /* pointer to num */
movl PARAM2,%ecx /* The denominator */
movl 4(%esi),%eax /* Get the current num msw */
xorl %edx,%edx
divl %ecx
movl %eax,4(%esi)
movl (%esi),%eax /* Get the num lsw */
divl %ecx
movl %eax,(%esi)
movl %edx,%eax /* Return the remainder in eax */
popl %esi
leave
ret

View File

@ -1,176 +0,0 @@
/*---------------------------------------------------------------------------+
| mul_Xsig.S |
| |
| Multiply a 12 byte fixed point number by another fixed point number. |
| |
| Copyright (C) 1992,1994,1995 |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
| Australia. E-mail billm@jacobi.maths.monash.edu.au |
| |
| Call from C as: |
| void mul32_Xsig(Xsig *x, unsigned b) |
| |
| void mul64_Xsig(Xsig *x, unsigned long long *b) |
| |
| void mul_Xsig_Xsig(Xsig *x, unsigned *b) |
| |
| The result is neither rounded nor normalized, and the ls bit or so may |
| be wrong. |
| |
+---------------------------------------------------------------------------*/
.file "mul_Xsig.S"
#include "fpu_emu.h"
.text
ENTRY(mul32_Xsig)
pushl %ebp
movl %esp,%ebp
subl $16,%esp
pushl %esi
movl PARAM1,%esi
movl PARAM2,%ecx
xor %eax,%eax
movl %eax,-4(%ebp)
movl %eax,-8(%ebp)
movl (%esi),%eax /* lsl of Xsig */
mull %ecx /* msl of b */
movl %edx,-12(%ebp)
movl 4(%esi),%eax /* midl of Xsig */
mull %ecx /* msl of b */
addl %eax,-12(%ebp)
adcl %edx,-8(%ebp)
adcl $0,-4(%ebp)
movl 8(%esi),%eax /* msl of Xsig */
mull %ecx /* msl of b */
addl %eax,-8(%ebp)
adcl %edx,-4(%ebp)
movl -12(%ebp),%eax
movl %eax,(%esi)
movl -8(%ebp),%eax
movl %eax,4(%esi)
movl -4(%ebp),%eax
movl %eax,8(%esi)
popl %esi
leave
ret
ENTRY(mul64_Xsig)
pushl %ebp
movl %esp,%ebp
subl $16,%esp
pushl %esi
movl PARAM1,%esi
movl PARAM2,%ecx
xor %eax,%eax
movl %eax,-4(%ebp)
movl %eax,-8(%ebp)
movl (%esi),%eax /* lsl of Xsig */
mull 4(%ecx) /* msl of b */
movl %edx,-12(%ebp)
movl 4(%esi),%eax /* midl of Xsig */
mull (%ecx) /* lsl of b */
addl %edx,-12(%ebp)
adcl $0,-8(%ebp)
adcl $0,-4(%ebp)
movl 4(%esi),%eax /* midl of Xsig */
mull 4(%ecx) /* msl of b */
addl %eax,-12(%ebp)
adcl %edx,-8(%ebp)
adcl $0,-4(%ebp)
movl 8(%esi),%eax /* msl of Xsig */
mull (%ecx) /* lsl of b */
addl %eax,-12(%ebp)
adcl %edx,-8(%ebp)
adcl $0,-4(%ebp)
movl 8(%esi),%eax /* msl of Xsig */
mull 4(%ecx) /* msl of b */
addl %eax,-8(%ebp)
adcl %edx,-4(%ebp)
movl -12(%ebp),%eax
movl %eax,(%esi)
movl -8(%ebp),%eax
movl %eax,4(%esi)
movl -4(%ebp),%eax
movl %eax,8(%esi)
popl %esi
leave
ret
ENTRY(mul_Xsig_Xsig)
pushl %ebp
movl %esp,%ebp
subl $16,%esp
pushl %esi
movl PARAM1,%esi
movl PARAM2,%ecx
xor %eax,%eax
movl %eax,-4(%ebp)
movl %eax,-8(%ebp)
movl (%esi),%eax /* lsl of Xsig */
mull 8(%ecx) /* msl of b */
movl %edx,-12(%ebp)
movl 4(%esi),%eax /* midl of Xsig */
mull 4(%ecx) /* midl of b */
addl %edx,-12(%ebp)
adcl $0,-8(%ebp)
adcl $0,-4(%ebp)
movl 8(%esi),%eax /* msl of Xsig */
mull (%ecx) /* lsl of b */
addl %edx,-12(%ebp)
adcl $0,-8(%ebp)
adcl $0,-4(%ebp)
movl 4(%esi),%eax /* midl of Xsig */
mull 8(%ecx) /* msl of b */
addl %eax,-12(%ebp)
adcl %edx,-8(%ebp)
adcl $0,-4(%ebp)
movl 8(%esi),%eax /* msl of Xsig */
mull 4(%ecx) /* midl of b */
addl %eax,-12(%ebp)
adcl %edx,-8(%ebp)
adcl $0,-4(%ebp)
movl 8(%esi),%eax /* msl of Xsig */
mull 8(%ecx) /* msl of b */
addl %eax,-8(%ebp)
adcl %edx,-4(%ebp)
movl -12(%ebp),%edx
movl %edx,(%esi)
movl -8(%ebp),%edx
movl %edx,4(%esi)
movl -4(%ebp),%edx
movl %edx,8(%esi)
popl %esi
leave
ret

View File

@ -1,142 +0,0 @@
/*---------------------------------------------------------------------------+
| polynomial_Xsig.S |
| |
| Fixed point arithmetic polynomial evaluation. |
| |
| Copyright (C) 1992,1993,1994,1995,1999 |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
| Australia. E-mail billm@melbpc.org.au |
| |
| Call from C as: |
| void polynomial_Xsig(Xsig *accum, unsigned long long *x, |
| unsigned long long terms[], int n) |
| |
| Computes: |
| terms[0] + (terms[1] + (terms[2] + ... + (terms[n]*x)*x)*x)*x) ... )*x |
| and adds the result to the 12 byte Xsig. |
| The terms[] are each 8 bytes, but all computation is performed to 12 byte |
| precision. |
| |
| This function must be used carefully: most overflow of intermediate |
| results is controlled, but overflow of the result is not. |
| |
+---------------------------------------------------------------------------*/
.file "polynomial_Xsig.S"
#include "fpu_emu.h"
#define TERM_SIZE $8
#define SUM_MS -20(%ebp) /* sum ms long */
#define SUM_MIDDLE -24(%ebp) /* sum middle long */
#define SUM_LS -28(%ebp) /* sum ls long */
#define ACCUM_MS -4(%ebp) /* accum ms long */
#define ACCUM_MIDDLE -8(%ebp) /* accum middle long */
#define ACCUM_LS -12(%ebp) /* accum ls long */
#define OVERFLOWED -16(%ebp) /* addition overflow flag */
.text
ENTRY(polynomial_Xsig)
pushl %ebp
movl %esp,%ebp
subl $32,%esp
pushl %esi
pushl %edi
pushl %ebx
movl PARAM2,%esi /* x */
movl PARAM3,%edi /* terms */
movl TERM_SIZE,%eax
mull PARAM4 /* n */
addl %eax,%edi
movl 4(%edi),%edx /* terms[n] */
movl %edx,SUM_MS
movl (%edi),%edx /* terms[n] */
movl %edx,SUM_MIDDLE
xor %eax,%eax
movl %eax,SUM_LS
movb %al,OVERFLOWED
subl TERM_SIZE,%edi
decl PARAM4
js L_accum_done
L_accum_loop:
xor %eax,%eax
movl %eax,ACCUM_MS
movl %eax,ACCUM_MIDDLE
movl SUM_MIDDLE,%eax
mull (%esi) /* x ls long */
movl %edx,ACCUM_LS
movl SUM_LS,%eax
mull 4(%esi) /* x ms long */
addl %edx,ACCUM_LS
adcl $0,ACCUM_MIDDLE
adcl $0,ACCUM_MS
movl SUM_MIDDLE,%eax
mull 4(%esi) /* x ms long */
addl %eax,ACCUM_LS
adcl %edx,ACCUM_MIDDLE
adcl $0,ACCUM_MS
movl SUM_MS,%eax
mull (%esi) /* x ls long */
addl %eax,ACCUM_LS
adcl %edx,ACCUM_MIDDLE
adcl $0,ACCUM_MS
movl SUM_MS,%eax
mull 4(%esi) /* x ms long */
addl %eax,ACCUM_MIDDLE
adcl %edx,ACCUM_MS
testb $0xff,OVERFLOWED
jz L_no_overflow
movl (%esi),%eax
addl %eax,ACCUM_MIDDLE
movl 4(%esi),%eax
adcl %eax,ACCUM_MS /* This could overflow too */
L_no_overflow:
/*
* Now put the sum of next term and the accumulator
* into the sum register
*/
movl ACCUM_LS,%eax
// addl (%edi),%eax /* term ls long */
movl %eax,SUM_LS
movl ACCUM_MIDDLE,%eax
// adcl (%edi),%eax /* term ls long */
addl (%edi),%eax /* term ls long */
movl %eax,SUM_MIDDLE
movl ACCUM_MS,%eax
adcl 4(%edi),%eax /* term ms long */
movl %eax,SUM_MS
sbbb %al,%al
movb %al,OVERFLOWED /* Used in the next iteration */
subl TERM_SIZE,%edi
decl PARAM4
jns L_accum_loop
L_accum_done:
movl PARAM1,%edi /* accum */
movl SUM_LS,%eax
addl %eax,(%edi)
movl SUM_MIDDLE,%eax
adcl %eax,4(%edi)
movl SUM_MS,%eax
adcl %eax,8(%edi)
popl %ebx
popl %edi
popl %esi
leave
ret

View File

@ -1,71 +0,0 @@
/*---------------------------------------------------------------------------+
| reg_norm.S |
| |
| Copyright (C) 1992,1993,1994,1995,1997,1999 |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
| Australia. E-mail billm@melbpc.org.au |
| |
| Normalize the value in a FPU_REG. |
| |
| Call from C as: |
| int FPU_normalize_nuo(FPU_REG *n, int bias) |
| |
| Return value is the tag of the answer. |
| |
+---------------------------------------------------------------------------*/
#include "fpu_emu.h"
/* Normalise without reporting underflow or overflow */
ENTRY(FPU_normalize_nuo)
pushl %ebp
movl %esp,%ebp
pushl %ebx
movl PARAM1,%ebx
movl SIGH(%ebx),%edx
movl SIGL(%ebx),%eax
orl %edx,%edx /* ms bits */
js L_exit_nuo_valid /* Already normalized */
jnz L_nuo_shift_1 /* Shift left 1 - 31 bits */
orl %eax,%eax
jz L_exit_nuo_zero /* The contents are zero */
movl %eax,%edx
xorl %eax,%eax
subw $32,EXP(%ebx) /* This can cause an underflow */
/* We need to shift left by 1 - 31 bits */
L_nuo_shift_1:
bsrl %edx,%ecx /* get the required shift in %ecx */
subl $31,%ecx
negl %ecx
shld %cl,%eax,%edx
shl %cl,%eax
subw %cx,EXP(%ebx) /* This can cause an underflow */
movl %edx,SIGH(%ebx)
movl %eax,SIGL(%ebx)
L_exit_nuo_valid:
movl PARAM2,%eax
addw %ax,EXP(%ebx)
movl TAG_Valid,%eax
popl %ebx
leave
ret
L_exit_nuo_zero:
movw EXP_UNDER,EXP(%ebx)
movl PARAM2,%eax
addw %ax,EXP(%ebx)
movl TAG_Zero,%eax
popl %ebx
leave
ret

View File

@ -1,710 +0,0 @@
.file "reg_round.S"
/*---------------------------------------------------------------------------+
| reg_round.S |
| |
| Rounding/truncation/etc for FPU basic arithmetic functions. |
| |
| Copyright (C) 1993,1995,1997 |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
| Australia. E-mail billm@suburbia.net |
| |
| This code has four possible entry points. |
| The following must be entered by a jmp instruction: |
| fpu_reg_round, fpu_reg_round_sqrt, and fpu_Arith_exit. |
| |
| The FPU_round entry point is intended to be used by C code. |
| From C, call as: |
| int FPU_round(FPU_REG *arg, unsigned int extent, unsigned int control_w) |
| |
| Return value is the tag of the answer, or-ed with FPU_Exception if |
| one was raised, or -1 on internal error. |
| |
| For correct "up" and "down" rounding, the argument must have the correct |
| sign. |
| |
+---------------------------------------------------------------------------*/
/*---------------------------------------------------------------------------+
| Four entry points. |
| |
| Needed by both the fpu_reg_round and fpu_reg_round_sqrt entry points: |
| %eax:%ebx 64 bit significand |
| %edx 32 bit extension of the significand |
| %edi pointer to an FPU_REG for the result to be stored |
| stack calling function must have set up a C stack frame and |
| pushed %esi, %edi, and %ebx |
| |
| Needed just for the fpu_reg_round_sqrt entry point: |
| %cx A control word in the same format as the FPU control word. |
| Otherwise, PARAM4 must give such a value. |
| |
| |
| The significand and its extension are assumed to be exact in the |
| following sense: |
| If the significand by itself is the exact result then the significand |
| extension (%edx) must contain 0, otherwise the significand extension |
| must be non-zero. |
| If the significand extension is non-zero then the significand is |
| smaller than the magnitude of the correct exact result by an amount |
| greater than zero and less than one ls bit of the significand. |
| The significand extension is only required to have three possible |
| non-zero values: |
| less than 0x80000000 <=> the significand is less than 1/2 an ls |
| bit smaller than the magnitude of the |
| true exact result. |
| exactly 0x80000000 <=> the significand is exactly 1/2 an ls bit |
| smaller than the magnitude of the true |
| exact result. |
| greater than 0x80000000 <=> the significand is more than 1/2 an ls |
| bit smaller than the magnitude of the |
| true exact result. |
| |
+---------------------------------------------------------------------------*/
/*---------------------------------------------------------------------------+
| The code in this module has become quite complex, but it should handle |
| all of the FPU flags which are set at this stage of the basic arithmetic |
| computations. |
| There are a few rare cases where the results are not set identically to |
| a real FPU. These require a bit more thought because at this stage the |
| results of the code here appear to be more consistent... |
| This may be changed in a future version. |
+---------------------------------------------------------------------------*/
#include "fpu_emu.h"
#include "exception.h"
#include "control_w.h"
/* Flags for FPU_bits_lost */
#define LOST_DOWN $1
#define LOST_UP $2
/* Flags for FPU_denormal */
#define DENORMAL $1
#define UNMASKED_UNDERFLOW $2
#ifndef NON_REENTRANT_FPU
/* Make the code re-entrant by putting
local storage on the stack: */
#define FPU_bits_lost (%esp)
#define FPU_denormal 1(%esp)
#else
/* Not re-entrant, so we can gain speed by putting
local storage in a static area: */
.data
.align 4,0
FPU_bits_lost:
.byte 0
FPU_denormal:
.byte 0
#endif /* NON_REENTRANT_FPU */
.text
.globl fpu_reg_round
.globl fpu_reg_round_sqrt
.globl fpu_Arith_exit
/* Entry point when called from C */
ENTRY(FPU_round)
pushl %ebp
movl %esp,%ebp
pushl %esi
pushl %edi
pushl %ebx
movl PARAM1,%edi
movl SIGH(%edi),%eax
movl SIGL(%edi),%ebx
movl PARAM2,%edx
fpu_reg_round: /* Normal entry point */
movl PARAM4,%ecx
#ifndef NON_REENTRANT_FPU
pushl %ebx /* adjust the stack pointer */
#endif /* NON_REENTRANT_FPU */
#ifdef PARANOID
/* Cannot use this here yet */
/* orl %eax,%eax */
/* jns L_entry_bugged */
#endif /* PARANOID */
cmpw EXP_UNDER,EXP(%edi)
jle L_Make_denorm /* The number is a de-normal */
movb $0,FPU_denormal /* 0 -> not a de-normal */
Denorm_done:
movb $0,FPU_bits_lost /* No bits yet lost in rounding */
movl %ecx,%esi
andl CW_PC,%ecx
cmpl PR_64_BITS,%ecx
je LRound_To_64
cmpl PR_53_BITS,%ecx
je LRound_To_53
cmpl PR_24_BITS,%ecx
je LRound_To_24
#ifdef PECULIAR_486
/* With the precision control bits set to 01 "(reserved)", a real 80486
behaves as if the precision control bits were set to 11 "64 bits" */
cmpl PR_RESERVED_BITS,%ecx
je LRound_To_64
#ifdef PARANOID
jmp L_bugged_denorm_486
#endif /* PARANOID */
#else
#ifdef PARANOID
jmp L_bugged_denorm /* There is no bug, just a bad control word */
#endif /* PARANOID */
#endif /* PECULIAR_486 */
/* Round etc to 24 bit precision */
LRound_To_24:
movl %esi,%ecx
andl CW_RC,%ecx
cmpl RC_RND,%ecx
je LRound_nearest_24
cmpl RC_CHOP,%ecx
je LCheck_truncate_24
cmpl RC_UP,%ecx /* Towards +infinity */
je LUp_24
cmpl RC_DOWN,%ecx /* Towards -infinity */
je LDown_24
#ifdef PARANOID
jmp L_bugged_round24
#endif /* PARANOID */
LUp_24:
cmpb SIGN_POS,PARAM5
jne LCheck_truncate_24 /* If negative then up==truncate */
jmp LCheck_24_round_up
LDown_24:
cmpb SIGN_POS,PARAM5
je LCheck_truncate_24 /* If positive then down==truncate */
LCheck_24_round_up:
movl %eax,%ecx
andl $0x000000ff,%ecx
orl %ebx,%ecx
orl %edx,%ecx
jnz LDo_24_round_up
jmp L_Re_normalise
LRound_nearest_24:
/* Do rounding of the 24th bit if needed (nearest or even) */
movl %eax,%ecx
andl $0x000000ff,%ecx
cmpl $0x00000080,%ecx
jc LCheck_truncate_24 /* less than half, no increment needed */
jne LGreater_Half_24 /* greater than half, increment needed */
/* Possibly half, we need to check the ls bits */
orl %ebx,%ebx
jnz LGreater_Half_24 /* greater than half, increment needed */
orl %edx,%edx
jnz LGreater_Half_24 /* greater than half, increment needed */
/* Exactly half, increment only if 24th bit is 1 (round to even) */
testl $0x00000100,%eax
jz LDo_truncate_24
LGreater_Half_24: /* Rounding: increment at the 24th bit */
LDo_24_round_up:
andl $0xffffff00,%eax /* Truncate to 24 bits */
xorl %ebx,%ebx
movb LOST_UP,FPU_bits_lost
addl $0x00000100,%eax
jmp LCheck_Round_Overflow
LCheck_truncate_24:
movl %eax,%ecx
andl $0x000000ff,%ecx
orl %ebx,%ecx
orl %edx,%ecx
jz L_Re_normalise /* No truncation needed */
LDo_truncate_24:
andl $0xffffff00,%eax /* Truncate to 24 bits */
xorl %ebx,%ebx
movb LOST_DOWN,FPU_bits_lost
jmp L_Re_normalise
/* Round etc to 53 bit precision */
LRound_To_53:
movl %esi,%ecx
andl CW_RC,%ecx
cmpl RC_RND,%ecx
je LRound_nearest_53
cmpl RC_CHOP,%ecx
je LCheck_truncate_53
cmpl RC_UP,%ecx /* Towards +infinity */
je LUp_53
cmpl RC_DOWN,%ecx /* Towards -infinity */
je LDown_53
#ifdef PARANOID
jmp L_bugged_round53
#endif /* PARANOID */
LUp_53:
cmpb SIGN_POS,PARAM5
jne LCheck_truncate_53 /* If negative then up==truncate */
jmp LCheck_53_round_up
LDown_53:
cmpb SIGN_POS,PARAM5
je LCheck_truncate_53 /* If positive then down==truncate */
LCheck_53_round_up:
movl %ebx,%ecx
andl $0x000007ff,%ecx
orl %edx,%ecx
jnz LDo_53_round_up
jmp L_Re_normalise
LRound_nearest_53:
/* Do rounding of the 53rd bit if needed (nearest or even) */
movl %ebx,%ecx
andl $0x000007ff,%ecx
cmpl $0x00000400,%ecx
jc LCheck_truncate_53 /* less than half, no increment needed */
jnz LGreater_Half_53 /* greater than half, increment needed */
/* Possibly half, we need to check the ls bits */
orl %edx,%edx
jnz LGreater_Half_53 /* greater than half, increment needed */
/* Exactly half, increment only if 53rd bit is 1 (round to even) */
testl $0x00000800,%ebx
jz LTruncate_53
LGreater_Half_53: /* Rounding: increment at the 53rd bit */
LDo_53_round_up:
movb LOST_UP,FPU_bits_lost
andl $0xfffff800,%ebx /* Truncate to 53 bits */
addl $0x00000800,%ebx
adcl $0,%eax
jmp LCheck_Round_Overflow
LCheck_truncate_53:
movl %ebx,%ecx
andl $0x000007ff,%ecx
orl %edx,%ecx
jz L_Re_normalise
LTruncate_53:
movb LOST_DOWN,FPU_bits_lost
andl $0xfffff800,%ebx /* Truncate to 53 bits */
jmp L_Re_normalise
/* Round etc to 64 bit precision */
LRound_To_64:
movl %esi,%ecx
andl CW_RC,%ecx
cmpl RC_RND,%ecx
je LRound_nearest_64
cmpl RC_CHOP,%ecx
je LCheck_truncate_64
cmpl RC_UP,%ecx /* Towards +infinity */
je LUp_64
cmpl RC_DOWN,%ecx /* Towards -infinity */
je LDown_64
#ifdef PARANOID
jmp L_bugged_round64
#endif /* PARANOID */
LUp_64:
cmpb SIGN_POS,PARAM5
jne LCheck_truncate_64 /* If negative then up==truncate */
orl %edx,%edx
jnz LDo_64_round_up
jmp L_Re_normalise
LDown_64:
cmpb SIGN_POS,PARAM5
je LCheck_truncate_64 /* If positive then down==truncate */
orl %edx,%edx
jnz LDo_64_round_up
jmp L_Re_normalise
LRound_nearest_64:
cmpl $0x80000000,%edx
jc LCheck_truncate_64
jne LDo_64_round_up
/* Now test for round-to-even */
testb $1,%bl
jz LCheck_truncate_64
LDo_64_round_up:
movb LOST_UP,FPU_bits_lost
addl $1,%ebx
adcl $0,%eax
LCheck_Round_Overflow:
jnc L_Re_normalise
/* Overflow, adjust the result (significand to 1.0) */
rcrl $1,%eax
rcrl $1,%ebx
incw EXP(%edi)
jmp L_Re_normalise
LCheck_truncate_64:
orl %edx,%edx
jz L_Re_normalise
LTruncate_64:
movb LOST_DOWN,FPU_bits_lost
L_Re_normalise:
testb $0xff,FPU_denormal
jnz Normalise_result
L_Normalised:
movl TAG_Valid,%edx
L_deNormalised:
cmpb LOST_UP,FPU_bits_lost
je L_precision_lost_up
cmpb LOST_DOWN,FPU_bits_lost
je L_precision_lost_down
L_no_precision_loss:
/* store the result */
L_Store_significand:
movl %eax,SIGH(%edi)
movl %ebx,SIGL(%edi)
cmpw EXP_OVER,EXP(%edi)
jge L_overflow
movl %edx,%eax
/* Convert the exponent to 80x87 form. */
addw EXTENDED_Ebias,EXP(%edi)
andw $0x7fff,EXP(%edi)
fpu_reg_round_signed_special_exit:
cmpb SIGN_POS,PARAM5
je fpu_reg_round_special_exit
orw $0x8000,EXP(%edi) /* Negative sign for the result. */
fpu_reg_round_special_exit:
#ifndef NON_REENTRANT_FPU
popl %ebx /* adjust the stack pointer */
#endif /* NON_REENTRANT_FPU */
fpu_Arith_exit:
popl %ebx
popl %edi
popl %esi
leave
ret
/*
* Set the FPU status flags to represent precision loss due to
* round-up.
*/
L_precision_lost_up:
push %edx
push %eax
call SYMBOL_NAME(set_precision_flag_up)
popl %eax
popl %edx
jmp L_no_precision_loss
/*
* Set the FPU status flags to represent precision loss due to
* truncation.
*/
L_precision_lost_down:
push %edx
push %eax
call SYMBOL_NAME(set_precision_flag_down)
popl %eax
popl %edx
jmp L_no_precision_loss
/*
* The number is a denormal (which might get rounded up to a normal)
* Shift the number right the required number of bits, which will
* have to be undone later...
*/
L_Make_denorm:
/* The action to be taken depends upon whether the underflow
exception is masked */
testb CW_Underflow,%cl /* Underflow mask. */
jz Unmasked_underflow /* Do not make a denormal. */
movb DENORMAL,FPU_denormal
pushl %ecx /* Save */
movw EXP_UNDER+1,%cx
subw EXP(%edi),%cx
cmpw $64,%cx /* shrd only works for 0..31 bits */
jnc Denorm_shift_more_than_63
cmpw $32,%cx /* shrd only works for 0..31 bits */
jnc Denorm_shift_more_than_32
/*
* We got here without jumps by assuming that the most common requirement
* is for a small de-normalising shift.
* Shift by [1..31] bits
*/
addw %cx,EXP(%edi)
orl %edx,%edx /* extension */
setne %ch /* Save whether %edx is non-zero */
xorl %edx,%edx
shrd %cl,%ebx,%edx
shrd %cl,%eax,%ebx
shr %cl,%eax
orb %ch,%dl
popl %ecx
jmp Denorm_done
/* Shift by [32..63] bits */
Denorm_shift_more_than_32:
addw %cx,EXP(%edi)
subb $32,%cl
orl %edx,%edx
setne %ch
orb %ch,%bl
xorl %edx,%edx
shrd %cl,%ebx,%edx
shrd %cl,%eax,%ebx
shr %cl,%eax
orl %edx,%edx /* test these 32 bits */
setne %cl
orb %ch,%bl
orb %cl,%bl
movl %ebx,%edx
movl %eax,%ebx
xorl %eax,%eax
popl %ecx
jmp Denorm_done
/* Shift by [64..) bits */
Denorm_shift_more_than_63:
cmpw $64,%cx
jne Denorm_shift_more_than_64
/* Exactly 64 bit shift */
addw %cx,EXP(%edi)
xorl %ecx,%ecx
orl %edx,%edx
setne %cl
orl %ebx,%ebx
setne %ch
orb %ch,%cl
orb %cl,%al
movl %eax,%edx
xorl %eax,%eax
xorl %ebx,%ebx
popl %ecx
jmp Denorm_done
Denorm_shift_more_than_64:
movw EXP_UNDER+1,EXP(%edi)
/* This is easy, %eax must be non-zero, so.. */
movl $1,%edx
xorl %eax,%eax
xorl %ebx,%ebx
popl %ecx
jmp Denorm_done
Unmasked_underflow:
movb UNMASKED_UNDERFLOW,FPU_denormal
jmp Denorm_done
/* Undo the de-normalisation. */
Normalise_result:
cmpb UNMASKED_UNDERFLOW,FPU_denormal
je Signal_underflow
/* The number must be a denormal if we got here. */
#ifdef PARANOID
/* But check it... just in case. */
cmpw EXP_UNDER+1,EXP(%edi)
jne L_norm_bugged
#endif /* PARANOID */
#ifdef PECULIAR_486
/*
* This implements a special feature of 80486 behaviour.
* Underflow will be signalled even if the number is
* not a denormal after rounding.
* This difference occurs only for masked underflow, and not
* in the unmasked case.
* Actual 80486 behaviour differs from this in some circumstances.
*/
orl %eax,%eax /* ms bits */
js LPseudoDenormal /* Will be masked underflow */
#else
orl %eax,%eax /* ms bits */
js L_Normalised /* No longer a denormal */
#endif /* PECULIAR_486 */
jnz LDenormal_adj_exponent
orl %ebx,%ebx
jz L_underflow_to_zero /* The contents are zero */
LDenormal_adj_exponent:
decw EXP(%edi)
LPseudoDenormal:
testb $0xff,FPU_bits_lost /* bits lost == underflow */
movl TAG_Special,%edx
jz L_deNormalised
/* There must be a masked underflow */
push %eax
pushl EX_Underflow
call EXCEPTION
popl %eax
popl %eax
movl TAG_Special,%edx
jmp L_deNormalised
/*
* The operations resulted in a number too small to represent.
* Masked response.
*/
L_underflow_to_zero:
push %eax
call SYMBOL_NAME(set_precision_flag_down)
popl %eax
push %eax
pushl EX_Underflow
call EXCEPTION
popl %eax
popl %eax
/* Reduce the exponent to EXP_UNDER */
movw EXP_UNDER,EXP(%edi)
movl TAG_Zero,%edx
jmp L_Store_significand
/* The operations resulted in a number too large to represent. */
L_overflow:
pushw PARAM5
addw EXTENDED_Ebias,EXP(%edi) /* Set for unmasked response. */
push %edi
call SYMBOL_NAME(arith_round_overflow)
pop %edi
jmp fpu_reg_round_signed_special_exit
Signal_underflow:
/* The number may have been changed to a non-denormal */
/* by the rounding operations. */
cmpw EXP_UNDER,EXP(%edi)
jle Do_unmasked_underflow
jmp L_Normalised
Do_unmasked_underflow:
/* Increase the exponent by the magic number */
addw $(3*(1<<13)),EXP(%edi)
push %eax
pushl EX_Underflow
call EXCEPTION
popl %eax
popl %eax
jmp L_Normalised
#ifdef PARANOID
#ifdef PECULIAR_486
L_bugged_denorm_486:
pushl EX_INTERNAL|0x236
call EXCEPTION
popl %ebx
jmp L_exception_exit
#else
L_bugged_denorm:
pushl EX_INTERNAL|0x230
call EXCEPTION
popl %ebx
jmp L_exception_exit
#endif /* PECULIAR_486 */
L_bugged_round24:
pushl EX_INTERNAL|0x231
call EXCEPTION
popl %ebx
jmp L_exception_exit
L_bugged_round53:
pushl EX_INTERNAL|0x232
call EXCEPTION
popl %ebx
jmp L_exception_exit
L_bugged_round64:
pushl EX_INTERNAL|0x233
call EXCEPTION
popl %ebx
jmp L_exception_exit
L_norm_bugged:
pushl EX_INTERNAL|0x234
call EXCEPTION
popl %ebx
jmp L_exception_exit
L_entry_bugged:
pushl EX_INTERNAL|0x235
call EXCEPTION
popl %ebx
L_exception_exit:
mov $-1,%eax
jmp fpu_reg_round_special_exit
#endif /* PARANOID */

View File

@ -1,167 +0,0 @@
.file "reg_u_add.S"
/*---------------------------------------------------------------------------+
| reg_u_add.S |
| |
| Add two valid (TAG_Valid) FPU_REG numbers, of the same sign, and put the |
| result in a destination FPU_REG. |
| |
| Copyright (C) 1992,1993,1995,1997 |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
| E-mail billm@suburbia.net |
| |
| Call from C as: |
| int FPU_u_add(FPU_REG *arg1, FPU_REG *arg2, FPU_REG *answ, |
| int control_w) |
| Return value is the tag of the answer, or-ed with FPU_Exception if |
| one was raised, or -1 on internal error. |
| |
+---------------------------------------------------------------------------*/
/*
| Kernel addition routine FPU_u_add(reg *arg1, reg *arg2, reg *answ).
| Takes two valid reg f.p. numbers (TAG_Valid), which are
| treated as unsigned numbers,
| and returns their sum as a TAG_Valid or TAG_Special f.p. number.
| The returned number is normalized.
| Basic checks are performed if PARANOID is defined.
*/
#include "exception.h"
#include "fpu_emu.h"
#include "control_w.h"
.text
ENTRY(FPU_u_add)
pushl %ebp
movl %esp,%ebp
pushl %esi
pushl %edi
pushl %ebx
movl PARAM1,%esi /* source 1 */
movl PARAM2,%edi /* source 2 */
movl PARAM6,%ecx
movl %ecx,%edx
subl PARAM7,%ecx /* exp1 - exp2 */
jge L_arg1_larger
/* num1 is smaller */
movl SIGL(%esi),%ebx
movl SIGH(%esi),%eax
movl %edi,%esi
movl PARAM7,%edx
negw %cx
jmp L_accum_loaded
L_arg1_larger:
/* num1 has larger or equal exponent */
movl SIGL(%edi),%ebx
movl SIGH(%edi),%eax
L_accum_loaded:
movl PARAM3,%edi /* destination */
movw %dx,EXP(%edi) /* Copy exponent to destination */
xorl %edx,%edx /* clear the extension */
#ifdef PARANOID
testl $0x80000000,%eax
je L_bugged
testl $0x80000000,SIGH(%esi)
je L_bugged
#endif /* PARANOID */
/* The number to be shifted is in %eax:%ebx:%edx */
cmpw $32,%cx /* shrd only works for 0..31 bits */
jnc L_more_than_31
/* less than 32 bits */
shrd %cl,%ebx,%edx
shrd %cl,%eax,%ebx
shr %cl,%eax
jmp L_shift_done
L_more_than_31:
cmpw $64,%cx
jnc L_more_than_63
subb $32,%cl
jz L_exactly_32
shrd %cl,%eax,%edx
shr %cl,%eax
orl %ebx,%ebx
jz L_more_31_no_low /* none of the lowest bits is set */
orl $1,%edx /* record the fact in the extension */
L_more_31_no_low:
movl %eax,%ebx
xorl %eax,%eax
jmp L_shift_done
L_exactly_32:
movl %ebx,%edx
movl %eax,%ebx
xorl %eax,%eax
jmp L_shift_done
L_more_than_63:
cmpw $65,%cx
jnc L_more_than_64
movl %eax,%edx
orl %ebx,%ebx
jz L_more_63_no_low
orl $1,%edx
jmp L_more_63_no_low
L_more_than_64:
movl $1,%edx /* The shifted nr always at least one '1' */
L_more_63_no_low:
xorl %ebx,%ebx
xorl %eax,%eax
L_shift_done:
/* Now do the addition */
addl SIGL(%esi),%ebx
adcl SIGH(%esi),%eax
jnc L_round_the_result
/* Overflow, adjust the result */
rcrl $1,%eax
rcrl $1,%ebx
rcrl $1,%edx
jnc L_no_bit_lost
orl $1,%edx
L_no_bit_lost:
incw EXP(%edi)
L_round_the_result:
jmp fpu_reg_round /* Round the result */
#ifdef PARANOID
/* If we ever get here then we have problems! */
L_bugged:
pushl EX_INTERNAL|0x201
call EXCEPTION
pop %ebx
movl $-1,%eax
jmp L_exit
L_exit:
popl %ebx
popl %edi
popl %esi
leave
ret
#endif /* PARANOID */

View File

@ -1,473 +0,0 @@
.file "reg_u_div.S"
/*---------------------------------------------------------------------------+
| reg_u_div.S |
| |
| Divide one FPU_REG by another and put the result in a destination FPU_REG.|
| |
| Copyright (C) 1992,1993,1995,1997 |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
| E-mail billm@suburbia.net |
| |
| |
+---------------------------------------------------------------------------*/
/*---------------------------------------------------------------------------+
| Call from C as: |
| int FPU_u_div(FPU_REG *a, FPU_REG *b, FPU_REG *dest, |
| unsigned int control_word, char *sign) |
| |
| Does not compute the destination exponent, but does adjust it. |
| |
| Return value is the tag of the answer, or-ed with FPU_Exception if |
| one was raised, or -1 on internal error. |
+---------------------------------------------------------------------------*/
#include "exception.h"
#include "fpu_emu.h"
#include "control_w.h"
/* #define dSIGL(x) (x) */
/* #define dSIGH(x) 4(x) */
#ifndef NON_REENTRANT_FPU
/*
Local storage on the stack:
Result: FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0
Overflow flag: ovfl_flag
*/
#define FPU_accum_3 -4(%ebp)
#define FPU_accum_2 -8(%ebp)
#define FPU_accum_1 -12(%ebp)
#define FPU_accum_0 -16(%ebp)
#define FPU_result_1 -20(%ebp)
#define FPU_result_2 -24(%ebp)
#define FPU_ovfl_flag -28(%ebp)
#else
.data
/*
Local storage in a static area:
Result: FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0
Overflow flag: ovfl_flag
*/
.align 4,0
FPU_accum_3:
.long 0
FPU_accum_2:
.long 0
FPU_accum_1:
.long 0
FPU_accum_0:
.long 0
FPU_result_1:
.long 0
FPU_result_2:
.long 0
FPU_ovfl_flag:
.byte 0
#endif /* NON_REENTRANT_FPU */
#define REGA PARAM1
#define REGB PARAM2
#define DEST PARAM3
.text
ENTRY(FPU_u_div)
pushl %ebp
movl %esp,%ebp
#ifndef NON_REENTRANT_FPU
subl $28,%esp
#endif /* NON_REENTRANT_FPU */
pushl %esi
pushl %edi
pushl %ebx
movl REGA,%esi
movl REGB,%ebx
movl DEST,%edi
movw EXP(%esi),%dx
movw EXP(%ebx),%ax
.byte 0x0f,0xbf,0xc0 /* movsx %ax,%eax */
.byte 0x0f,0xbf,0xd2 /* movsx %dx,%edx */
subl %eax,%edx
addl EXP_BIAS,%edx
/* A denormal and a large number can cause an exponent underflow */
cmpl EXP_WAY_UNDER,%edx
jg xExp_not_underflow
/* Set to a really low value allow correct handling */
movl EXP_WAY_UNDER,%edx
xExp_not_underflow:
movw %dx,EXP(%edi)
#ifdef PARANOID
/* testl $0x80000000, SIGH(%esi) // Dividend */
/* je L_bugged */
testl $0x80000000, SIGH(%ebx) /* Divisor */
je L_bugged
#endif /* PARANOID */
/* Check if the divisor can be treated as having just 32 bits */
cmpl $0,SIGL(%ebx)
jnz L_Full_Division /* Can't do a quick divide */
/* We should be able to zip through the division here */
movl SIGH(%ebx),%ecx /* The divisor */
movl SIGH(%esi),%edx /* Dividend */
movl SIGL(%esi),%eax /* Dividend */
cmpl %ecx,%edx
setaeb FPU_ovfl_flag /* Keep a record */
jb L_no_adjust
subl %ecx,%edx /* Prevent the overflow */
L_no_adjust:
/* Divide the 64 bit number by the 32 bit denominator */
divl %ecx
movl %eax,FPU_result_2
/* Work on the remainder of the first division */
xorl %eax,%eax
divl %ecx
movl %eax,FPU_result_1
/* Work on the remainder of the 64 bit division */
xorl %eax,%eax
divl %ecx
testb $255,FPU_ovfl_flag /* was the num > denom ? */
je L_no_overflow
/* Do the shifting here */
/* increase the exponent */
incw EXP(%edi)
/* shift the mantissa right one bit */
stc /* To set the ms bit */
rcrl FPU_result_2
rcrl FPU_result_1
rcrl %eax
L_no_overflow:
jmp LRound_precision /* Do the rounding as required */
/*---------------------------------------------------------------------------+
| Divide: Return arg1/arg2 to arg3. |
| |
| This routine does not use the exponents of arg1 and arg2, but does |
| adjust the exponent of arg3. |
| |
| The maximum returned value is (ignoring exponents) |
| .ffffffff ffffffff |
| ------------------ = 1.ffffffff fffffffe |
| .80000000 00000000 |
| and the minimum is |
| .80000000 00000000 |
| ------------------ = .80000000 00000001 (rounded) |
| .ffffffff ffffffff |
| |
+---------------------------------------------------------------------------*/
L_Full_Division:
/* Save extended dividend in local register */
movl SIGL(%esi),%eax
movl %eax,FPU_accum_2
movl SIGH(%esi),%eax
movl %eax,FPU_accum_3
xorl %eax,%eax
movl %eax,FPU_accum_1 /* zero the extension */
movl %eax,FPU_accum_0 /* zero the extension */
movl SIGL(%esi),%eax /* Get the current num */
movl SIGH(%esi),%edx
/*----------------------------------------------------------------------*/
/* Initialization done.
Do the first 32 bits. */
movb $0,FPU_ovfl_flag
cmpl SIGH(%ebx),%edx /* Test for imminent overflow */
jb LLess_than_1
ja LGreater_than_1
cmpl SIGL(%ebx),%eax
jb LLess_than_1
LGreater_than_1:
/* The dividend is greater or equal, would cause overflow */
setaeb FPU_ovfl_flag /* Keep a record */
subl SIGL(%ebx),%eax
sbbl SIGH(%ebx),%edx /* Prevent the overflow */
movl %eax,FPU_accum_2
movl %edx,FPU_accum_3
LLess_than_1:
/* At this point, we have a dividend < divisor, with a record of
adjustment in FPU_ovfl_flag */
/* We will divide by a number which is too large */
movl SIGH(%ebx),%ecx
addl $1,%ecx
jnc LFirst_div_not_1
/* here we need to divide by 100000000h,
i.e., no division at all.. */
mov %edx,%eax
jmp LFirst_div_done
LFirst_div_not_1:
divl %ecx /* Divide the numerator by the augmented
denom ms dw */
LFirst_div_done:
movl %eax,FPU_result_2 /* Put the result in the answer */
mull SIGH(%ebx) /* mul by the ms dw of the denom */
subl %eax,FPU_accum_2 /* Subtract from the num local reg */
sbbl %edx,FPU_accum_3
movl FPU_result_2,%eax /* Get the result back */
mull SIGL(%ebx) /* now mul the ls dw of the denom */
subl %eax,FPU_accum_1 /* Subtract from the num local reg */
sbbl %edx,FPU_accum_2
sbbl $0,FPU_accum_3
je LDo_2nd_32_bits /* Must check for non-zero result here */
#ifdef PARANOID
jb L_bugged_1
#endif /* PARANOID */
/* need to subtract another once of the denom */
incl FPU_result_2 /* Correct the answer */
movl SIGL(%ebx),%eax
movl SIGH(%ebx),%edx
subl %eax,FPU_accum_1 /* Subtract from the num local reg */
sbbl %edx,FPU_accum_2
#ifdef PARANOID
sbbl $0,FPU_accum_3
jne L_bugged_1 /* Must check for non-zero result here */
#endif /* PARANOID */
/*----------------------------------------------------------------------*/
/* Half of the main problem is done, there is just a reduced numerator
to handle now.
Work with the second 32 bits, FPU_accum_0 not used from now on */
LDo_2nd_32_bits:
movl FPU_accum_2,%edx /* get the reduced num */
movl FPU_accum_1,%eax
/* need to check for possible subsequent overflow */
cmpl SIGH(%ebx),%edx
jb LDo_2nd_div
ja LPrevent_2nd_overflow
cmpl SIGL(%ebx),%eax
jb LDo_2nd_div
LPrevent_2nd_overflow:
/* The numerator is greater or equal, would cause overflow */
/* prevent overflow */
subl SIGL(%ebx),%eax
sbbl SIGH(%ebx),%edx
movl %edx,FPU_accum_2
movl %eax,FPU_accum_1
incl FPU_result_2 /* Reflect the subtraction in the answer */
#ifdef PARANOID
je L_bugged_2 /* Can't bump the result to 1.0 */
#endif /* PARANOID */
LDo_2nd_div:
cmpl $0,%ecx /* augmented denom msw */
jnz LSecond_div_not_1
/* %ecx == 0, we are dividing by 1.0 */
mov %edx,%eax
jmp LSecond_div_done
LSecond_div_not_1:
divl %ecx /* Divide the numerator by the denom ms dw */
LSecond_div_done:
movl %eax,FPU_result_1 /* Put the result in the answer */
mull SIGH(%ebx) /* mul by the ms dw of the denom */
subl %eax,FPU_accum_1 /* Subtract from the num local reg */
sbbl %edx,FPU_accum_2
#ifdef PARANOID
jc L_bugged_2
#endif /* PARANOID */
movl FPU_result_1,%eax /* Get the result back */
mull SIGL(%ebx) /* now mul the ls dw of the denom */
subl %eax,FPU_accum_0 /* Subtract from the num local reg */
sbbl %edx,FPU_accum_1 /* Subtract from the num local reg */
sbbl $0,FPU_accum_2
#ifdef PARANOID
jc L_bugged_2
#endif /* PARANOID */
jz LDo_3rd_32_bits
#ifdef PARANOID
cmpl $1,FPU_accum_2
jne L_bugged_2
#endif /* PARANOID */
/* need to subtract another once of the denom */
movl SIGL(%ebx),%eax
movl SIGH(%ebx),%edx
subl %eax,FPU_accum_0 /* Subtract from the num local reg */
sbbl %edx,FPU_accum_1
sbbl $0,FPU_accum_2
#ifdef PARANOID
jc L_bugged_2
jne L_bugged_2
#endif /* PARANOID */
addl $1,FPU_result_1 /* Correct the answer */
adcl $0,FPU_result_2
#ifdef PARANOID
jc L_bugged_2 /* Must check for non-zero result here */
#endif /* PARANOID */
/*----------------------------------------------------------------------*/
/* The division is essentially finished here, we just need to perform
tidying operations.
Deal with the 3rd 32 bits */
LDo_3rd_32_bits:
movl FPU_accum_1,%edx /* get the reduced num */
movl FPU_accum_0,%eax
/* need to check for possible subsequent overflow */
cmpl SIGH(%ebx),%edx /* denom */
jb LRound_prep
ja LPrevent_3rd_overflow
cmpl SIGL(%ebx),%eax /* denom */
jb LRound_prep
LPrevent_3rd_overflow:
/* prevent overflow */
subl SIGL(%ebx),%eax
sbbl SIGH(%ebx),%edx
movl %edx,FPU_accum_1
movl %eax,FPU_accum_0
addl $1,FPU_result_1 /* Reflect the subtraction in the answer */
adcl $0,FPU_result_2
jne LRound_prep
jnc LRound_prep
/* This is a tricky spot, there is an overflow of the answer */
movb $255,FPU_ovfl_flag /* Overflow -> 1.000 */
LRound_prep:
/*
* Prepare for rounding.
* To test for rounding, we just need to compare 2*accum with the
* denom.
*/
movl FPU_accum_0,%ecx
movl FPU_accum_1,%edx
movl %ecx,%eax
orl %edx,%eax
jz LRound_ovfl /* The accumulator contains zero. */
/* Multiply by 2 */
clc
rcll $1,%ecx
rcll $1,%edx
jc LRound_large /* No need to compare, denom smaller */
subl SIGL(%ebx),%ecx
sbbl SIGH(%ebx),%edx
jnc LRound_not_small
movl $0x70000000,%eax /* Denom was larger */
jmp LRound_ovfl
LRound_not_small:
jnz LRound_large
movl $0x80000000,%eax /* Remainder was exactly 1/2 denom */
jmp LRound_ovfl
LRound_large:
movl $0xff000000,%eax /* Denom was smaller */
LRound_ovfl:
/* We are now ready to deal with rounding, but first we must get
the bits properly aligned */
testb $255,FPU_ovfl_flag /* was the num > denom ? */
je LRound_precision
incw EXP(%edi)
/* shift the mantissa right one bit */
stc /* Will set the ms bit */
rcrl FPU_result_2
rcrl FPU_result_1
rcrl %eax
/* Round the result as required */
LRound_precision:
decw EXP(%edi) /* binary point between 1st & 2nd bits */
movl %eax,%edx
movl FPU_result_1,%ebx
movl FPU_result_2,%eax
jmp fpu_reg_round
#ifdef PARANOID
/* The logic is wrong if we got here */
L_bugged:
pushl EX_INTERNAL|0x202
call EXCEPTION
pop %ebx
jmp L_exit
L_bugged_1:
pushl EX_INTERNAL|0x203
call EXCEPTION
pop %ebx
jmp L_exit
L_bugged_2:
pushl EX_INTERNAL|0x204
call EXCEPTION
pop %ebx
jmp L_exit
L_exit:
movl $-1,%eax
popl %ebx
popl %edi
popl %esi
leave
ret
#endif /* PARANOID */

View File

@ -1,148 +0,0 @@
.file "reg_u_mul.S"
/*---------------------------------------------------------------------------+
| reg_u_mul.S |
| |
| Core multiplication routine |
| |
| Copyright (C) 1992,1993,1995,1997 |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
| E-mail billm@suburbia.net |
| |
| |
+---------------------------------------------------------------------------*/
/*---------------------------------------------------------------------------+
| Basic multiplication routine. |
| Does not check the resulting exponent for overflow/underflow |
| |
| FPU_u_mul(FPU_REG *a, FPU_REG *b, FPU_REG *c, unsigned int cw); |
| |
| Internal working is at approx 128 bits. |
| Result is rounded to nearest 53 or 64 bits, using "nearest or even". |
+---------------------------------------------------------------------------*/
#include "exception.h"
#include "fpu_emu.h"
#include "control_w.h"
#ifndef NON_REENTRANT_FPU
/* Local storage on the stack: */
#define FPU_accum_0 -4(%ebp) /* ms word */
#define FPU_accum_1 -8(%ebp)
#else
/* Local storage in a static area: */
.data
.align 4,0
FPU_accum_0:
.long 0
FPU_accum_1:
.long 0
#endif /* NON_REENTRANT_FPU */
.text
ENTRY(FPU_u_mul)
pushl %ebp
movl %esp,%ebp
#ifndef NON_REENTRANT_FPU
subl $8,%esp
#endif /* NON_REENTRANT_FPU */
pushl %esi
pushl %edi
pushl %ebx
movl PARAM1,%esi
movl PARAM2,%edi
#ifdef PARANOID
testl $0x80000000,SIGH(%esi)
jz L_bugged
testl $0x80000000,SIGH(%edi)
jz L_bugged
#endif /* PARANOID */
xorl %ecx,%ecx
xorl %ebx,%ebx
movl SIGL(%esi),%eax
mull SIGL(%edi)
movl %eax,FPU_accum_0
movl %edx,FPU_accum_1
movl SIGL(%esi),%eax
mull SIGH(%edi)
addl %eax,FPU_accum_1
adcl %edx,%ebx
/* adcl $0,%ecx // overflow here is not possible */
movl SIGH(%esi),%eax
mull SIGL(%edi)
addl %eax,FPU_accum_1
adcl %edx,%ebx
adcl $0,%ecx
movl SIGH(%esi),%eax
mull SIGH(%edi)
addl %eax,%ebx
adcl %edx,%ecx
/* Get the sum of the exponents. */
movl PARAM6,%eax
subl EXP_BIAS-1,%eax
/* Two denormals can cause an exponent underflow */
cmpl EXP_WAY_UNDER,%eax
jg Exp_not_underflow
/* Set to a really low value allow correct handling */
movl EXP_WAY_UNDER,%eax
Exp_not_underflow:
/* Have now finished with the sources */
movl PARAM3,%edi /* Point to the destination */
movw %ax,EXP(%edi)
/* Now make sure that the result is normalized */
testl $0x80000000,%ecx
jnz LResult_Normalised
/* Normalize by shifting left one bit */
shll $1,FPU_accum_0
rcll $1,FPU_accum_1
rcll $1,%ebx
rcll $1,%ecx
decw EXP(%edi)
LResult_Normalised:
movl FPU_accum_0,%eax
movl FPU_accum_1,%edx
orl %eax,%eax
jz L_extent_zero
orl $1,%edx
L_extent_zero:
movl %ecx,%eax
jmp fpu_reg_round
#ifdef PARANOID
L_bugged:
pushl EX_INTERNAL|0x205
call EXCEPTION
pop %ebx
jmp L_exit
L_exit:
popl %ebx
popl %edi
popl %esi
leave
ret
#endif /* PARANOID */

View File

@ -1,272 +0,0 @@
.file "reg_u_sub.S"
/*---------------------------------------------------------------------------+
| reg_u_sub.S |
| |
| Core floating point subtraction routine. |
| |
| Copyright (C) 1992,1993,1995,1997 |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
| E-mail billm@suburbia.net |
| |
| Call from C as: |
| int FPU_u_sub(FPU_REG *arg1, FPU_REG *arg2, FPU_REG *answ, |
| int control_w) |
| Return value is the tag of the answer, or-ed with FPU_Exception if |
| one was raised, or -1 on internal error. |
| |
+---------------------------------------------------------------------------*/
/*
| Kernel subtraction routine FPU_u_sub(reg *arg1, reg *arg2, reg *answ).
| Takes two valid reg f.p. numbers (TAG_Valid), which are
| treated as unsigned numbers,
| and returns their difference as a TAG_Valid or TAG_Zero f.p.
| number.
| The first number (arg1) must be the larger.
| The returned number is normalized.
| Basic checks are performed if PARANOID is defined.
*/
#include "exception.h"
#include "fpu_emu.h"
#include "control_w.h"
.text
ENTRY(FPU_u_sub)
pushl %ebp
movl %esp,%ebp
pushl %esi
pushl %edi
pushl %ebx
movl PARAM1,%esi /* source 1 */
movl PARAM2,%edi /* source 2 */
movl PARAM6,%ecx
subl PARAM7,%ecx /* exp1 - exp2 */
#ifdef PARANOID
/* source 2 is always smaller than source 1 */
js L_bugged_1
testl $0x80000000,SIGH(%edi) /* The args are assumed to be be normalized */
je L_bugged_2
testl $0x80000000,SIGH(%esi)
je L_bugged_2
#endif /* PARANOID */
/*--------------------------------------+
| Form a register holding the |
| smaller number |
+--------------------------------------*/
movl SIGH(%edi),%eax /* register ms word */
movl SIGL(%edi),%ebx /* register ls word */
movl PARAM3,%edi /* destination */
movl PARAM6,%edx
movw %dx,EXP(%edi) /* Copy exponent to destination */
xorl %edx,%edx /* register extension */
/*--------------------------------------+
| Shift the temporary register |
| right the required number of |
| places. |
+--------------------------------------*/
cmpw $32,%cx /* shrd only works for 0..31 bits */
jnc L_more_than_31
/* less than 32 bits */
shrd %cl,%ebx,%edx
shrd %cl,%eax,%ebx
shr %cl,%eax
jmp L_shift_done
L_more_than_31:
cmpw $64,%cx
jnc L_more_than_63
subb $32,%cl
jz L_exactly_32
shrd %cl,%eax,%edx
shr %cl,%eax
orl %ebx,%ebx
jz L_more_31_no_low /* none of the lowest bits is set */
orl $1,%edx /* record the fact in the extension */
L_more_31_no_low:
movl %eax,%ebx
xorl %eax,%eax
jmp L_shift_done
L_exactly_32:
movl %ebx,%edx
movl %eax,%ebx
xorl %eax,%eax
jmp L_shift_done
L_more_than_63:
cmpw $65,%cx
jnc L_more_than_64
/* Shift right by 64 bits */
movl %eax,%edx
orl %ebx,%ebx
jz L_more_63_no_low
orl $1,%edx
jmp L_more_63_no_low
L_more_than_64:
jne L_more_than_65
/* Shift right by 65 bits */
/* Carry is clear if we get here */
movl %eax,%edx
rcrl %edx
jnc L_shift_65_nc
orl $1,%edx
jmp L_more_63_no_low
L_shift_65_nc:
orl %ebx,%ebx
jz L_more_63_no_low
orl $1,%edx
jmp L_more_63_no_low
L_more_than_65:
movl $1,%edx /* The shifted nr always at least one '1' */
L_more_63_no_low:
xorl %ebx,%ebx
xorl %eax,%eax
L_shift_done:
L_subtr:
/*------------------------------+
| Do the subtraction |
+------------------------------*/
xorl %ecx,%ecx
subl %edx,%ecx
movl %ecx,%edx
movl SIGL(%esi),%ecx
sbbl %ebx,%ecx
movl %ecx,%ebx
movl SIGH(%esi),%ecx
sbbl %eax,%ecx
movl %ecx,%eax
#ifdef PARANOID
/* We can never get a borrow */
jc L_bugged
#endif /* PARANOID */
/*--------------------------------------+
| Normalize the result |
+--------------------------------------*/
testl $0x80000000,%eax
jnz L_round /* no shifting needed */
orl %eax,%eax
jnz L_shift_1 /* shift left 1 - 31 bits */
orl %ebx,%ebx
jnz L_shift_32 /* shift left 32 - 63 bits */
/*
* A rare case, the only one which is non-zero if we got here
* is: 1000000 .... 0000
* -0111111 .... 1111 1
* --------------------
* 0000000 .... 0000 1
*/
cmpl $0x80000000,%edx
jnz L_must_be_zero
/* Shift left 64 bits */
subw $64,EXP(%edi)
xchg %edx,%eax
jmp fpu_reg_round
L_must_be_zero:
#ifdef PARANOID
orl %edx,%edx
jnz L_bugged_3
#endif /* PARANOID */
/* The result is zero */
movw $0,EXP(%edi) /* exponent */
movl $0,SIGL(%edi)
movl $0,SIGH(%edi)
movl TAG_Zero,%eax
jmp L_exit
L_shift_32:
movl %ebx,%eax
movl %edx,%ebx
movl $0,%edx
subw $32,EXP(%edi) /* Can get underflow here */
/* We need to shift left by 1 - 31 bits */
L_shift_1:
bsrl %eax,%ecx /* get the required shift in %ecx */
subl $31,%ecx
negl %ecx
shld %cl,%ebx,%eax
shld %cl,%edx,%ebx
shl %cl,%edx
subw %cx,EXP(%edi) /* Can get underflow here */
L_round:
jmp fpu_reg_round /* Round the result */
#ifdef PARANOID
L_bugged_1:
pushl EX_INTERNAL|0x206
call EXCEPTION
pop %ebx
jmp L_error_exit
L_bugged_2:
pushl EX_INTERNAL|0x209
call EXCEPTION
pop %ebx
jmp L_error_exit
L_bugged_3:
pushl EX_INTERNAL|0x210
call EXCEPTION
pop %ebx
jmp L_error_exit
L_bugged_4:
pushl EX_INTERNAL|0x211
call EXCEPTION
pop %ebx
jmp L_error_exit
L_bugged:
pushl EX_INTERNAL|0x212
call EXCEPTION
pop %ebx
jmp L_error_exit
L_error_exit:
movl $-1,%eax
#endif /* PARANOID */
L_exit:
popl %ebx
popl %edi
popl %esi
leave
ret

View File

@ -1,141 +0,0 @@
/*---------------------------------------------------------------------------+
| round_Xsig.S |
| |
| Copyright (C) 1992,1993,1994,1995 |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
| Australia. E-mail billm@jacobi.maths.monash.edu.au |
| |
| Normalize and round a 12 byte quantity. |
| Call from C as: |
| int round_Xsig(Xsig *n) |
| |
| Normalize a 12 byte quantity. |
| Call from C as: |
| int norm_Xsig(Xsig *n) |
| |
| Each function returns the size of the shift (nr of bits). |
| |
+---------------------------------------------------------------------------*/
.file "round_Xsig.S"
#include "fpu_emu.h"
.text
ENTRY(round_Xsig)
pushl %ebp
movl %esp,%ebp
pushl %ebx /* Reserve some space */
pushl %ebx
pushl %esi
movl PARAM1,%esi
movl 8(%esi),%edx
movl 4(%esi),%ebx
movl (%esi),%eax
movl $0,-4(%ebp)
orl %edx,%edx /* ms bits */
js L_round /* Already normalized */
jnz L_shift_1 /* Shift left 1 - 31 bits */
movl %ebx,%edx
movl %eax,%ebx
xorl %eax,%eax
movl $-32,-4(%ebp)
/* We need to shift left by 1 - 31 bits */
L_shift_1:
bsrl %edx,%ecx /* get the required shift in %ecx */
subl $31,%ecx
negl %ecx
subl %ecx,-4(%ebp)
shld %cl,%ebx,%edx
shld %cl,%eax,%ebx
shl %cl,%eax
L_round:
testl $0x80000000,%eax
jz L_exit
addl $1,%ebx
adcl $0,%edx
jnz L_exit
movl $0x80000000,%edx
incl -4(%ebp)
L_exit:
movl %edx,8(%esi)
movl %ebx,4(%esi)
movl %eax,(%esi)
movl -4(%ebp),%eax
popl %esi
popl %ebx
leave
ret
ENTRY(norm_Xsig)
pushl %ebp
movl %esp,%ebp
pushl %ebx /* Reserve some space */
pushl %ebx
pushl %esi
movl PARAM1,%esi
movl 8(%esi),%edx
movl 4(%esi),%ebx
movl (%esi),%eax
movl $0,-4(%ebp)
orl %edx,%edx /* ms bits */
js L_n_exit /* Already normalized */
jnz L_n_shift_1 /* Shift left 1 - 31 bits */
movl %ebx,%edx
movl %eax,%ebx
xorl %eax,%eax
movl $-32,-4(%ebp)
orl %edx,%edx /* ms bits */
js L_n_exit /* Normalized now */
jnz L_n_shift_1 /* Shift left 1 - 31 bits */
movl %ebx,%edx
movl %eax,%ebx
xorl %eax,%eax
addl $-32,-4(%ebp)
jmp L_n_exit /* Might not be normalized,
but shift no more. */
/* We need to shift left by 1 - 31 bits */
L_n_shift_1:
bsrl %edx,%ecx /* get the required shift in %ecx */
subl $31,%ecx
negl %ecx
subl %ecx,-4(%ebp)
shld %cl,%ebx,%edx
shld %cl,%eax,%ebx
shl %cl,%eax
L_n_exit:
movl %edx,8(%esi)
movl %ebx,4(%esi)
movl %eax,(%esi)
movl -4(%ebp),%eax
popl %esi
popl %ebx
leave
ret

View File

@ -1,87 +0,0 @@
.file "shr_Xsig.S"
/*---------------------------------------------------------------------------+
| shr_Xsig.S |
| |
| 12 byte right shift function |
| |
| Copyright (C) 1992,1994,1995 |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
| Australia. E-mail billm@jacobi.maths.monash.edu.au |
| |
| Call from C as: |
| void shr_Xsig(Xsig *arg, unsigned nr) |
| |
| Extended shift right function. |
| Fastest for small shifts. |
| Shifts the 12 byte quantity pointed to by the first arg (arg) |
| right by the number of bits specified by the second arg (nr). |
| |
+---------------------------------------------------------------------------*/
#include "fpu_emu.h"
.text
ENTRY(shr_Xsig)
push %ebp
movl %esp,%ebp
pushl %esi
movl PARAM2,%ecx
movl PARAM1,%esi
cmpl $32,%ecx /* shrd only works for 0..31 bits */
jnc L_more_than_31
/* less than 32 bits */
pushl %ebx
movl (%esi),%eax /* lsl */
movl 4(%esi),%ebx /* midl */
movl 8(%esi),%edx /* msl */
shrd %cl,%ebx,%eax
shrd %cl,%edx,%ebx
shr %cl,%edx
movl %eax,(%esi)
movl %ebx,4(%esi)
movl %edx,8(%esi)
popl %ebx
popl %esi
leave
ret
L_more_than_31:
cmpl $64,%ecx
jnc L_more_than_63
subb $32,%cl
movl 4(%esi),%eax /* midl */
movl 8(%esi),%edx /* msl */
shrd %cl,%edx,%eax
shr %cl,%edx
movl %eax,(%esi)
movl %edx,4(%esi)
movl $0,8(%esi)
popl %esi
leave
ret
L_more_than_63:
cmpl $96,%ecx
jnc L_more_than_95
subb $64,%cl
movl 8(%esi),%eax /* msl */
shr %cl,%eax
xorl %edx,%edx
movl %eax,(%esi)
movl %edx,4(%esi)
movl %edx,8(%esi)
popl %esi
leave
ret
L_more_than_95:
xorl %eax,%eax
movl %eax,(%esi)
movl %eax,4(%esi)
movl %eax,8(%esi)
popl %esi
leave
ret

View File

@ -1,6 +0,0 @@
#ifndef _I386_MATH_EMU_H
#define _I386_MATH_EMU_H
// Don't really need anything in here.
#endif

View File

@ -1,7 +0,0 @@
#ifndef _I386_TYPES_H
#define _I386_TYPES_H
#ifndef __ASSEMBLY__
#endif
#endif /* _I386_TYPES_H */

View File

@ -1,6 +0,0 @@
#ifndef _I386_UACCESS_H
#define _I386_UACCESS_H
#endif

View File

@ -1,204 +0,0 @@
.file "wm_shrx.S"
/*---------------------------------------------------------------------------+
| wm_shrx.S |
| |
| 64 bit right shift functions |
| |
| Copyright (C) 1992,1995 |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
| Australia. E-mail billm@jacobi.maths.monash.edu.au |
| |
| Call from C as: |
| unsigned FPU_shrx(void *arg1, unsigned arg2) |
| and |
| unsigned FPU_shrxs(void *arg1, unsigned arg2) |
| |
+---------------------------------------------------------------------------*/
#include "fpu_emu.h"
.text
/*---------------------------------------------------------------------------+
| unsigned FPU_shrx(void *arg1, unsigned arg2) |
| |
| Extended shift right function. |
| Fastest for small shifts. |
| Shifts the 64 bit quantity pointed to by the first arg (arg1) |
| right by the number of bits specified by the second arg (arg2). |
| Forms a 96 bit quantity from the 64 bit arg and eax: |
| [ 64 bit arg ][ eax ] |
| shift right ---------> |
| The eax register is initialized to 0 before the shifting. |
| Results returned in the 64 bit arg and eax. |
+---------------------------------------------------------------------------*/
ENTRY(FPU_shrx)
push %ebp
movl %esp,%ebp
pushl %esi
movl PARAM2,%ecx
movl PARAM1,%esi
cmpl $32,%ecx /* shrd only works for 0..31 bits */
jnc L_more_than_31
/* less than 32 bits */
pushl %ebx
movl (%esi),%ebx /* lsl */
movl 4(%esi),%edx /* msl */
xorl %eax,%eax /* extension */
shrd %cl,%ebx,%eax
shrd %cl,%edx,%ebx
shr %cl,%edx
movl %ebx,(%esi)
movl %edx,4(%esi)
popl %ebx
popl %esi
leave
ret
L_more_than_31:
cmpl $64,%ecx
jnc L_more_than_63
subb $32,%cl
movl (%esi),%eax /* lsl */
movl 4(%esi),%edx /* msl */
shrd %cl,%edx,%eax
shr %cl,%edx
movl %edx,(%esi)
movl $0,4(%esi)
popl %esi
leave
ret
L_more_than_63:
cmpl $96,%ecx
jnc L_more_than_95
subb $64,%cl
movl 4(%esi),%eax /* msl */
shr %cl,%eax
xorl %edx,%edx
movl %edx,(%esi)
movl %edx,4(%esi)
popl %esi
leave
ret
L_more_than_95:
xorl %eax,%eax
movl %eax,(%esi)
movl %eax,4(%esi)
popl %esi
leave
ret
/*---------------------------------------------------------------------------+
| unsigned FPU_shrxs(void *arg1, unsigned arg2) |
| |
| Extended shift right function (optimized for small floating point |
| integers). |
| Shifts the 64 bit quantity pointed to by the first arg (arg1) |
| right by the number of bits specified by the second arg (arg2). |
| Forms a 96 bit quantity from the 64 bit arg and eax: |
| [ 64 bit arg ][ eax ] |
| shift right ---------> |
| The eax register is initialized to 0 before the shifting. |
| The lower 8 bits of eax are lost and replaced by a flag which is |
| set (to 0x01) if any bit, apart from the first one, is set in the |
| part which has been shifted out of the arg. |
| Results returned in the 64 bit arg and eax. |
+---------------------------------------------------------------------------*/
ENTRY(FPU_shrxs)
push %ebp
movl %esp,%ebp
pushl %esi
pushl %ebx
movl PARAM2,%ecx
movl PARAM1,%esi
cmpl $64,%ecx /* shrd only works for 0..31 bits */
jnc Ls_more_than_63
cmpl $32,%ecx /* shrd only works for 0..31 bits */
jc Ls_less_than_32
/* We got here without jumps by assuming that the most common requirement
is for small integers */
/* Shift by [32..63] bits */
subb $32,%cl
movl (%esi),%eax /* lsl */
movl 4(%esi),%edx /* msl */
xorl %ebx,%ebx
shrd %cl,%eax,%ebx
shrd %cl,%edx,%eax
shr %cl,%edx
orl %ebx,%ebx /* test these 32 bits */
setne %bl
test $0x7fffffff,%eax /* and 31 bits here */
setne %bh
orw %bx,%bx /* Any of the 63 bit set ? */
setne %al
movl %edx,(%esi)
movl $0,4(%esi)
popl %ebx
popl %esi
leave
ret
/* Shift by [0..31] bits */
Ls_less_than_32:
movl (%esi),%ebx /* lsl */
movl 4(%esi),%edx /* msl */
xorl %eax,%eax /* extension */
shrd %cl,%ebx,%eax
shrd %cl,%edx,%ebx
shr %cl,%edx
test $0x7fffffff,%eax /* only need to look at eax here */
setne %al
movl %ebx,(%esi)
movl %edx,4(%esi)
popl %ebx
popl %esi
leave
ret
/* Shift by [64..95] bits */
Ls_more_than_63:
cmpl $96,%ecx
jnc Ls_more_than_95
subb $64,%cl
movl (%esi),%ebx /* lsl */
movl 4(%esi),%eax /* msl */
xorl %edx,%edx /* extension */
shrd %cl,%ebx,%edx
shrd %cl,%eax,%ebx
shr %cl,%eax
orl %ebx,%edx
setne %bl
test $0x7fffffff,%eax /* only need to look at eax here */
setne %bh
orw %bx,%bx
setne %al
xorl %edx,%edx
movl %edx,(%esi) /* set to zero */
movl %edx,4(%esi) /* set to zero */
popl %ebx
popl %esi
leave
ret
Ls_more_than_95:
/* Shift by [96..inf) bits */
xorl %eax,%eax
movl (%esi),%ebx
orl 4(%esi),%ebx
setne %al
xorl %ebx,%ebx
movl %ebx,(%esi)
movl %ebx,4(%esi)
popl %ebx
popl %esi
leave
ret

View File

@ -1,470 +0,0 @@
.file "wm_sqrt.S"
/*---------------------------------------------------------------------------+
| wm_sqrt.S |
| |
| Fixed point arithmetic square root evaluation. |
| |
| Copyright (C) 1992,1993,1995,1997 |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
| Australia. E-mail billm@suburbia.net |
| |
| Call from C as: |
| int wm_sqrt(FPU_REG *n, unsigned int control_word) |
| |
+---------------------------------------------------------------------------*/
/*---------------------------------------------------------------------------+
| wm_sqrt(FPU_REG *n, unsigned int control_word) |
| returns the square root of n in n. |
| |
| Use Newton's method to compute the square root of a number, which must |
| be in the range [1.0 .. 4.0), to 64 bits accuracy. |
| Does not check the sign or tag of the argument. |
| Sets the exponent, but not the sign or tag of the result. |
| |
| The guess is kept in %esi:%edi |
+---------------------------------------------------------------------------*/
#include "exception.h"
#include "fpu_emu.h"
#ifndef NON_REENTRANT_FPU
/* Local storage on the stack: */
#define FPU_accum_3 -4(%ebp) /* ms word */
#define FPU_accum_2 -8(%ebp)
#define FPU_accum_1 -12(%ebp)
#define FPU_accum_0 -16(%ebp)
/*
* The de-normalised argument:
* sq_2 sq_1 sq_0
* b b b b b b b ... b b b b b b .... b b b b 0 0 0 ... 0
* ^ binary point here
*/
#define FPU_fsqrt_arg_2 -20(%ebp) /* ms word */
#define FPU_fsqrt_arg_1 -24(%ebp)
#define FPU_fsqrt_arg_0 -28(%ebp) /* ls word, at most the ms bit is set */
#else
/* Local storage in a static area: */
.data
.align 4,0
FPU_accum_3:
.long 0 /* ms word */
FPU_accum_2:
.long 0
FPU_accum_1:
.long 0
FPU_accum_0:
.long 0
/* The de-normalised argument:
sq_2 sq_1 sq_0
b b b b b b b ... b b b b b b .... b b b b 0 0 0 ... 0
^ binary point here
*/
FPU_fsqrt_arg_2:
.long 0 /* ms word */
FPU_fsqrt_arg_1:
.long 0
FPU_fsqrt_arg_0:
.long 0 /* ls word, at most the ms bit is set */
#endif /* NON_REENTRANT_FPU */
.text
ENTRY(wm_sqrt)
pushl %ebp
movl %esp,%ebp
#ifndef NON_REENTRANT_FPU
subl $28,%esp
#endif /* NON_REENTRANT_FPU */
pushl %esi
pushl %edi
pushl %ebx
movl PARAM1,%esi
movl SIGH(%esi),%eax
movl SIGL(%esi),%ecx
xorl %edx,%edx
/* We use a rough linear estimate for the first guess.. */
cmpw EXP_BIAS,EXP(%esi)
jnz sqrt_arg_ge_2
shrl $1,%eax /* arg is in the range [1.0 .. 2.0) */
rcrl $1,%ecx
rcrl $1,%edx
sqrt_arg_ge_2:
/* From here on, n is never accessed directly again until it is
replaced by the answer. */
movl %eax,FPU_fsqrt_arg_2 /* ms word of n */
movl %ecx,FPU_fsqrt_arg_1
movl %edx,FPU_fsqrt_arg_0
/* Make a linear first estimate */
shrl $1,%eax
addl $0x40000000,%eax
movl $0xaaaaaaaa,%ecx
mull %ecx
shll %edx /* max result was 7fff... */
testl $0x80000000,%edx /* but min was 3fff... */
jnz sqrt_prelim_no_adjust
movl $0x80000000,%edx /* round up */
sqrt_prelim_no_adjust:
movl %edx,%esi /* Our first guess */
/* We have now computed (approx) (2 + x) / 3, which forms the basis
for a few iterations of Newton's method */
movl FPU_fsqrt_arg_2,%ecx /* ms word */
/*
* From our initial estimate, three iterations are enough to get us
* to 30 bits or so. This will then allow two iterations at better
* precision to complete the process.
*/
/* Compute (g + n/g)/2 at each iteration (g is the guess). */
shrl %ecx /* Doing this first will prevent a divide */
/* overflow later. */
movl %ecx,%edx /* msw of the arg / 2 */
divl %esi /* current estimate */
shrl %esi /* divide by 2 */
addl %eax,%esi /* the new estimate */
movl %ecx,%edx
divl %esi
shrl %esi
addl %eax,%esi
movl %ecx,%edx
divl %esi
shrl %esi
addl %eax,%esi
/*
* Now that an estimate accurate to about 30 bits has been obtained (in %esi),
* we improve it to 60 bits or so.
*
* The strategy from now on is to compute new estimates from
* guess := guess + (n - guess^2) / (2 * guess)
*/
/* First, find the square of the guess */
movl %esi,%eax
mull %esi
/* guess^2 now in %edx:%eax */
movl FPU_fsqrt_arg_1,%ecx
subl %ecx,%eax
movl FPU_fsqrt_arg_2,%ecx /* ms word of normalized n */
sbbl %ecx,%edx
jnc sqrt_stage_2_positive
/* Subtraction gives a negative result,
negate the result before division. */
notl %edx
notl %eax
addl $1,%eax
adcl $0,%edx
divl %esi
movl %eax,%ecx
movl %edx,%eax
divl %esi
jmp sqrt_stage_2_finish
sqrt_stage_2_positive:
divl %esi
movl %eax,%ecx
movl %edx,%eax
divl %esi
notl %ecx
notl %eax
addl $1,%eax
adcl $0,%ecx
sqrt_stage_2_finish:
sarl $1,%ecx /* divide by 2 */
rcrl $1,%eax
/* Form the new estimate in %esi:%edi */
movl %eax,%edi
addl %ecx,%esi
jnz sqrt_stage_2_done /* result should be [1..2) */
#ifdef PARANOID
/* It should be possible to get here only if the arg is ffff....ffff */
cmp $0xffffffff,FPU_fsqrt_arg_1
jnz sqrt_stage_2_error
#endif /* PARANOID */
/* The best rounded result. */
xorl %eax,%eax
decl %eax
movl %eax,%edi
movl %eax,%esi
movl $0x7fffffff,%eax
jmp sqrt_round_result
#ifdef PARANOID
sqrt_stage_2_error:
pushl EX_INTERNAL|0x213
call EXCEPTION
#endif /* PARANOID */
sqrt_stage_2_done:
/* Now the square root has been computed to better than 60 bits. */
/* Find the square of the guess. */
movl %edi,%eax /* ls word of guess */
mull %edi
movl %edx,FPU_accum_1
movl %esi,%eax
mull %esi
movl %edx,FPU_accum_3
movl %eax,FPU_accum_2
movl %edi,%eax
mull %esi
addl %eax,FPU_accum_1
adcl %edx,FPU_accum_2
adcl $0,FPU_accum_3
/* movl %esi,%eax */
/* mull %edi */
addl %eax,FPU_accum_1
adcl %edx,FPU_accum_2
adcl $0,FPU_accum_3
/* guess^2 now in FPU_accum_3:FPU_accum_2:FPU_accum_1 */
movl FPU_fsqrt_arg_0,%eax /* get normalized n */
subl %eax,FPU_accum_1
movl FPU_fsqrt_arg_1,%eax
sbbl %eax,FPU_accum_2
movl FPU_fsqrt_arg_2,%eax /* ms word of normalized n */
sbbl %eax,FPU_accum_3
jnc sqrt_stage_3_positive
/* Subtraction gives a negative result,
negate the result before division */
notl FPU_accum_1
notl FPU_accum_2
notl FPU_accum_3
addl $1,FPU_accum_1
adcl $0,FPU_accum_2
#ifdef PARANOID
adcl $0,FPU_accum_3 /* This must be zero */
jz sqrt_stage_3_no_error
sqrt_stage_3_error:
pushl EX_INTERNAL|0x207
call EXCEPTION
sqrt_stage_3_no_error:
#endif /* PARANOID */
movl FPU_accum_2,%edx
movl FPU_accum_1,%eax
divl %esi
movl %eax,%ecx
movl %edx,%eax
divl %esi
sarl $1,%ecx /* divide by 2 */
rcrl $1,%eax
/* prepare to round the result */
addl %ecx,%edi
adcl $0,%esi
jmp sqrt_stage_3_finished
sqrt_stage_3_positive:
movl FPU_accum_2,%edx
movl FPU_accum_1,%eax
divl %esi
movl %eax,%ecx
movl %edx,%eax
divl %esi
sarl $1,%ecx /* divide by 2 */
rcrl $1,%eax
/* prepare to round the result */
notl %eax /* Negate the correction term */
notl %ecx
addl $1,%eax
adcl $0,%ecx /* carry here ==> correction == 0 */
adcl $0xffffffff,%esi
addl %ecx,%edi
adcl $0,%esi
sqrt_stage_3_finished:
/*
* The result in %esi:%edi:%eax should be good to about 90 bits here,
* and the rounding information here does not have sufficient accuracy
* in a few rare cases.
*/
cmpl $0xffffffe0,%eax
ja sqrt_near_exact_x
cmpl $0x00000020,%eax
jb sqrt_near_exact
cmpl $0x7fffffe0,%eax
jb sqrt_round_result
cmpl $0x80000020,%eax
jb sqrt_get_more_precision
sqrt_round_result:
/* Set up for rounding operations */
movl %eax,%edx
movl %esi,%eax
movl %edi,%ebx
movl PARAM1,%edi
movw EXP_BIAS,EXP(%edi) /* Result is in [1.0 .. 2.0) */
jmp fpu_reg_round
sqrt_near_exact_x:
/* First, the estimate must be rounded up. */
addl $1,%edi
adcl $0,%esi
sqrt_near_exact:
/*
* This is an easy case because x^1/2 is monotonic.
* We need just find the square of our estimate, compare it
* with the argument, and deduce whether our estimate is
* above, below, or exact. We use the fact that the estimate
* is known to be accurate to about 90 bits.
*/
movl %edi,%eax /* ls word of guess */
mull %edi
movl %edx,%ebx /* 2nd ls word of square */
movl %eax,%ecx /* ls word of square */
movl %edi,%eax
mull %esi
addl %eax,%ebx
addl %eax,%ebx
#ifdef PARANOID
cmp $0xffffffb0,%ebx
jb sqrt_near_exact_ok
cmp $0x00000050,%ebx
ja sqrt_near_exact_ok
pushl EX_INTERNAL|0x214
call EXCEPTION
sqrt_near_exact_ok:
#endif /* PARANOID */
or %ebx,%ebx
js sqrt_near_exact_small
jnz sqrt_near_exact_large
or %ebx,%edx
jnz sqrt_near_exact_large
/* Our estimate is exactly the right answer */
xorl %eax,%eax
jmp sqrt_round_result
sqrt_near_exact_small:
/* Our estimate is too small */
movl $0x000000ff,%eax
jmp sqrt_round_result
sqrt_near_exact_large:
/* Our estimate is too large, we need to decrement it */
subl $1,%edi
sbbl $0,%esi
movl $0xffffff00,%eax
jmp sqrt_round_result
sqrt_get_more_precision:
/* This case is almost the same as the above, except we start
with an extra bit of precision in the estimate. */
stc /* The extra bit. */
rcll $1,%edi /* Shift the estimate left one bit */
rcll $1,%esi
movl %edi,%eax /* ls word of guess */
mull %edi
movl %edx,%ebx /* 2nd ls word of square */
movl %eax,%ecx /* ls word of square */
movl %edi,%eax
mull %esi
addl %eax,%ebx
addl %eax,%ebx
/* Put our estimate back to its original value */
stc /* The ms bit. */
rcrl $1,%esi /* Shift the estimate left one bit */
rcrl $1,%edi
#ifdef PARANOID
cmp $0xffffff60,%ebx
jb sqrt_more_prec_ok
cmp $0x000000a0,%ebx
ja sqrt_more_prec_ok
pushl EX_INTERNAL|0x215
call EXCEPTION
sqrt_more_prec_ok:
#endif /* PARANOID */
or %ebx,%ebx
js sqrt_more_prec_small
jnz sqrt_more_prec_large
or %ebx,%ecx
jnz sqrt_more_prec_large
/* Our estimate is exactly the right answer */
movl $0x80000000,%eax
jmp sqrt_round_result
sqrt_more_prec_small:
/* Our estimate is too small */
movl $0x800000ff,%eax
jmp sqrt_round_result
sqrt_more_prec_large:
/* Our estimate is too large */
movl $0x7fffff00,%eax
jmp sqrt_round_result