235 lines
6.5 KiB
ArmAsm
235 lines
6.5 KiB
ArmAsm
; Copyright (c) 1985 Regents of the University of California.
|
|
; All rights reserved.
|
|
;
|
|
; Redistribution and use in source and binary forms, with or without
|
|
; modification, are permitted provided that the following conditions
|
|
; are met:
|
|
; 1. Redistributions of source code must retain the above copyright
|
|
; notice, this list of conditions and the following disclaimer.
|
|
; 2. Redistributions in binary form must reproduce the above copyright
|
|
; notice, this list of conditions and the following disclaimer in the
|
|
; documentation and/or other materials provided with the distribution.
|
|
; 3. All advertising materials mentioning features or use of this software
|
|
; must display the following acknowledgement:
|
|
; This product includes software developed by the University of
|
|
; California, Berkeley and its contributors.
|
|
; 4. Neither the name of the University nor the names of its contributors
|
|
; may be used to endorse or promote products derived from this software
|
|
; without specific prior written permission.
|
|
;
|
|
; THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
|
; ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
; ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
|
; FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
; OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
; HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
; LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
; OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
; SUCH DAMAGE.
|
|
;
|
|
;_sccsid:
|
|
;.asciz "from: @(#)sqrt.s 5.4 (Berkeley) 10/9/90"
|
|
_rcsid:
|
|
.asciz "$Id: sqrt.S,v 1.3 1993/08/14 13:43:52 mycroft Exp $"
|
|
|
|
; double sqrt(x)
|
|
; double x;
|
|
; IEEE double precision sqrt
|
|
; code in NSC assembly by K.C. Ng
|
|
; 12/13/85
|
|
;
|
|
; Method:
|
|
; Use Kahan's trick to get 8 bits initial approximation
|
|
; by integer shift and add/subtract. Then three Newton
|
|
; iterations to get down error to within one ulp. Finally
|
|
; twiddle the last bit to make to correctly rounded
|
|
; according to the rounding mode.
|
|
;
|
|
.vers 2
|
|
.text
|
|
.align 2
|
|
.globl _sqrt
|
|
_sqrt:
|
|
enter [r3,r4,r5,r6,r7],44
|
|
movl f4,tos
|
|
movl f6,tos
|
|
movd 2146435072,r2 ; r2 = 0x7ff00000
|
|
movl 8(fp),f0 ; f2 = x
|
|
movd 12(fp),r3 ; r3 = high part of x
|
|
movd r3,r4 ; make a copy of high part of x in r4
|
|
andd r2,r3 ; r3 become the bias exponent of x
|
|
cmpd r2,r3 ; if r3 = 0x7ff00000 then x is INF or NAN
|
|
bne L22
|
|
; to see if x is INF
|
|
movd 8(fp),r0 ; r0 = low part of x
|
|
movd r4,r1 ; r1 is high part of x again
|
|
andd 0xfff00000,r1 ; mask off the sign and exponent of x
|
|
ord r0,r1 ; or with low part, if 0 then x is INF
|
|
cmpqd 0,r1 ;
|
|
bne L1 ; not 0; therefore x is NaN; return x.
|
|
cmpqd 0,r4 ; now x is Inf, is it +inf?
|
|
blt L1 ; +INF, return x
|
|
; -INF, return NaN by doing 0/0
|
|
nan: movl 0f0.0,f0 ;
|
|
divl f0,f0
|
|
br L1
|
|
L22: ; now x is finite
|
|
cmpl 0f0.0,f0 ; x = 0 ?
|
|
beq L1 ; return x if x is +0 or -0
|
|
cmpqd 0,r4 ; Is x < 0 ?
|
|
bgt nan ; if x < 0 return NaN
|
|
movqd 0,r5 ; r5 == scalx initialize to zero
|
|
cmpqd 0,r3 ; is x is subnormal ? (r3 is the exponent)
|
|
bne L21 ; if x is normal, goto L21
|
|
movl L30,f2 ; f2 = 2**54
|
|
mull f2,f0 ; scale up x by 2**54
|
|
subd 0x1b00000,r5 ; off set the scale factor by -27 in exponent
|
|
L21:
|
|
; now x is normal
|
|
; notations:
|
|
; r1 == copy of fsr
|
|
; r2 == mask of e inexact enable flag
|
|
; r3 == mask of i inexact flag
|
|
; r4 == mask of r rounding mode
|
|
; r5 == x's scale factor (already defined)
|
|
|
|
movd 0x20,r2
|
|
movd 0x40,r3
|
|
movd 0x180,r4
|
|
sfsr r0 ; store fsr to r0
|
|
movd r0,r1 ; make a copy of fsr to r1
|
|
bicd [5,6,7,8],r0 ; clear e,i, and set r to round to nearest
|
|
lfsr r0
|
|
|
|
; begin to compute sqrt(x)
|
|
movl f0,-8(fp)
|
|
movd -4(fp),r0 ; r0 the high part of modified x
|
|
lshd -1,r0 ; r0 >> 1
|
|
addd 0x1ff80000,r0 ; add correction to r0 ...got 5 bits approx.
|
|
movd r0,r6
|
|
lshd -13,r6 ; r6 = r0>>-15
|
|
andd 0x7c,r6 ; obtain 4*leading 5 bits of r0
|
|
addrd L29,r7 ; r7 = address of L29 = table[0]
|
|
addd r6,r7 ; r6 = address of L29[r6] = table[r6]
|
|
subd 0(r7),r0 ; r0 = r0 - table[r6]
|
|
movd r0,-4(fp)
|
|
movl -8(fp),f2 ; now f2 = y approximate sqrt(f0) to 8 bits
|
|
|
|
movl 0f0.5,f6 ; f6 = 0.5
|
|
movl f0,f4
|
|
divl f2,f4 ; t = x/y
|
|
addl f4,f2 ; y = y + x/y
|
|
mull f6,f2 ; y = 0.5(y+x/y) got 17 bits approx.
|
|
movl f0,f4
|
|
divl f2,f4 ; t = x/y
|
|
addl f4,f2 ; y = y + x/y
|
|
mull f6,f2 ; y = 0.5(y+x/y) got 35 bits approx.
|
|
movl f0,f4
|
|
divl f2,f4 ; t = x/y
|
|
subl f2,f4 ; t = x/y - y
|
|
mull f6,f4 ; t = 0.5(x/y-y)
|
|
addl f4,f2 ; y = y + 0.5(x/y -y)
|
|
; now y approx. sqrt(x) to within 1 ulp
|
|
|
|
; twiddle last bit to force y correctly rounded
|
|
movd r1,r0 ; restore the old fsr
|
|
bicd [6,7,8],r0 ; clear inexact bit but retain inexact enable
|
|
ord 0x80,r0 ; set rounding mode to round to zero
|
|
lfsr r0
|
|
|
|
movl f0,f4
|
|
divl f2,f4 ; f4 = x/y
|
|
sfsr r0
|
|
andd r3,r0 ; get the inexact flag
|
|
cmpqd 0,r0
|
|
bne L18
|
|
; if x/y exact, then ...
|
|
cmpl f2,f4 ; if y == x/y
|
|
beq L2
|
|
movl f4,-8(fp)
|
|
subd 1,-8(fp)
|
|
subcd 0,-4(fp)
|
|
movl -8(fp),f4 ; f4 = f4 - ulp
|
|
L18:
|
|
bicd [6],r1
|
|
ord r3,r1 ; set inexact flag in r1
|
|
|
|
andd r1,r4 ; r4 = the old rounding mode
|
|
cmpqd 0,r4 ; round to nearest?
|
|
bne L17
|
|
movl f4,-8(fp)
|
|
addd 1,-8(fp)
|
|
addcd 0,-4(fp)
|
|
movl -8(fp),f4 ; f4 = f4 + ulp
|
|
br L16
|
|
L17:
|
|
cmpd 0x100,r4 ; round to positive inf ?
|
|
bne L16
|
|
movl f4,-8(fp)
|
|
addd 1,-8(fp)
|
|
addcd 0,-4(fp)
|
|
movl -8(fp),f4 ; f4 = f4 + ulp
|
|
|
|
movl f2,-8(fp)
|
|
addd 1,-8(fp)
|
|
addcd 0,-4(fp)
|
|
movl -8(fp),f2 ; f2 = f2 + ulp
|
|
L16:
|
|
addl f4,f2 ; y = y + t
|
|
subd 0x100000,r5 ; scalx = scalx - 1
|
|
L2:
|
|
movl f2,-8(fp)
|
|
addd r5,-4(fp)
|
|
movl -8(fp),f0
|
|
lfsr r1
|
|
L1:
|
|
movl tos,f6
|
|
movl tos,f4
|
|
exit [r3,r4,r5,r6,r7]
|
|
ret 0
|
|
.data
|
|
L28: .byte 64,40,35,41,115,113,114,116,46,99
|
|
.byte 9,49,46,49,32,40,117,99,98,46
|
|
.byte 101,108,101,102,117,110,116,41,32,57
|
|
.byte 47,49,57,47,56,53,0
|
|
L29: .blkb 4
|
|
.double 1204
|
|
.double 3062
|
|
.double 5746
|
|
.double 9193
|
|
.double 13348
|
|
.double 18162
|
|
.double 23592
|
|
.double 29598
|
|
.double 36145
|
|
.double 43202
|
|
.double 50740
|
|
.double 58733
|
|
.double 67158
|
|
.double 75992
|
|
.double 85215
|
|
.double 83599
|
|
.double 71378
|
|
.double 60428
|
|
.double 50647
|
|
.double 41945
|
|
.double 34246
|
|
.double 27478
|
|
.double 21581
|
|
.double 16499
|
|
.double 12183
|
|
.double 8588
|
|
.double 5674
|
|
.double 3403
|
|
.double 1742
|
|
.double 661
|
|
.double 130
|
|
L30: .blkb 4
|
|
.double 1129316352 ;L30: .double 0,0x43500000
|
|
L31: .blkb 4
|
|
.double 0x1ff00000
|
|
L32: .blkb 4
|
|
.double 0x5ff00000
|