NetBSD/lib/libm/national/sqrt.S

235 lines
6.5 KiB
ArmAsm

; Copyright (c) 1985 Regents of the University of California.
; All rights reserved.
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions
; are met:
; 1. Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
; 2. Redistributions in binary form must reproduce the above copyright
; notice, this list of conditions and the following disclaimer in the
; documentation and/or other materials provided with the distribution.
; 3. All advertising materials mentioning features or use of this software
; must display the following acknowledgement:
; This product includes software developed by the University of
; California, Berkeley and its contributors.
; 4. Neither the name of the University nor the names of its contributors
; may be used to endorse or promote products derived from this software
; without specific prior written permission.
;
; THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
; ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
; ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
; FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
; OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
; HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
; LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
; OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
; SUCH DAMAGE.
;
;_sccsid:
;.asciz "from: @(#)sqrt.s 5.4 (Berkeley) 10/9/90"
_rcsid:
.asciz "$Id: sqrt.S,v 1.3 1993/08/14 13:43:52 mycroft Exp $"
; double sqrt(x)
; double x;
; IEEE double precision sqrt
; code in NSC assembly by K.C. Ng
; 12/13/85
;
; Method:
; Use Kahan's trick to get 8 bits initial approximation
; by integer shift and add/subtract. Then three Newton
; iterations to get down error to within one ulp. Finally
; twiddle the last bit to make to correctly rounded
; according to the rounding mode.
;
.vers 2
.text
.align 2
.globl _sqrt
_sqrt:
enter [r3,r4,r5,r6,r7],44
movl f4,tos
movl f6,tos
movd 2146435072,r2 ; r2 = 0x7ff00000
movl 8(fp),f0 ; f2 = x
movd 12(fp),r3 ; r3 = high part of x
movd r3,r4 ; make a copy of high part of x in r4
andd r2,r3 ; r3 become the bias exponent of x
cmpd r2,r3 ; if r3 = 0x7ff00000 then x is INF or NAN
bne L22
; to see if x is INF
movd 8(fp),r0 ; r0 = low part of x
movd r4,r1 ; r1 is high part of x again
andd 0xfff00000,r1 ; mask off the sign and exponent of x
ord r0,r1 ; or with low part, if 0 then x is INF
cmpqd 0,r1 ;
bne L1 ; not 0; therefore x is NaN; return x.
cmpqd 0,r4 ; now x is Inf, is it +inf?
blt L1 ; +INF, return x
; -INF, return NaN by doing 0/0
nan: movl 0f0.0,f0 ;
divl f0,f0
br L1
L22: ; now x is finite
cmpl 0f0.0,f0 ; x = 0 ?
beq L1 ; return x if x is +0 or -0
cmpqd 0,r4 ; Is x < 0 ?
bgt nan ; if x < 0 return NaN
movqd 0,r5 ; r5 == scalx initialize to zero
cmpqd 0,r3 ; is x is subnormal ? (r3 is the exponent)
bne L21 ; if x is normal, goto L21
movl L30,f2 ; f2 = 2**54
mull f2,f0 ; scale up x by 2**54
subd 0x1b00000,r5 ; off set the scale factor by -27 in exponent
L21:
; now x is normal
; notations:
; r1 == copy of fsr
; r2 == mask of e inexact enable flag
; r3 == mask of i inexact flag
; r4 == mask of r rounding mode
; r5 == x's scale factor (already defined)
movd 0x20,r2
movd 0x40,r3
movd 0x180,r4
sfsr r0 ; store fsr to r0
movd r0,r1 ; make a copy of fsr to r1
bicd [5,6,7,8],r0 ; clear e,i, and set r to round to nearest
lfsr r0
; begin to compute sqrt(x)
movl f0,-8(fp)
movd -4(fp),r0 ; r0 the high part of modified x
lshd -1,r0 ; r0 >> 1
addd 0x1ff80000,r0 ; add correction to r0 ...got 5 bits approx.
movd r0,r6
lshd -13,r6 ; r6 = r0>>-15
andd 0x7c,r6 ; obtain 4*leading 5 bits of r0
addrd L29,r7 ; r7 = address of L29 = table[0]
addd r6,r7 ; r6 = address of L29[r6] = table[r6]
subd 0(r7),r0 ; r0 = r0 - table[r6]
movd r0,-4(fp)
movl -8(fp),f2 ; now f2 = y approximate sqrt(f0) to 8 bits
movl 0f0.5,f6 ; f6 = 0.5
movl f0,f4
divl f2,f4 ; t = x/y
addl f4,f2 ; y = y + x/y
mull f6,f2 ; y = 0.5(y+x/y) got 17 bits approx.
movl f0,f4
divl f2,f4 ; t = x/y
addl f4,f2 ; y = y + x/y
mull f6,f2 ; y = 0.5(y+x/y) got 35 bits approx.
movl f0,f4
divl f2,f4 ; t = x/y
subl f2,f4 ; t = x/y - y
mull f6,f4 ; t = 0.5(x/y-y)
addl f4,f2 ; y = y + 0.5(x/y -y)
; now y approx. sqrt(x) to within 1 ulp
; twiddle last bit to force y correctly rounded
movd r1,r0 ; restore the old fsr
bicd [6,7,8],r0 ; clear inexact bit but retain inexact enable
ord 0x80,r0 ; set rounding mode to round to zero
lfsr r0
movl f0,f4
divl f2,f4 ; f4 = x/y
sfsr r0
andd r3,r0 ; get the inexact flag
cmpqd 0,r0
bne L18
; if x/y exact, then ...
cmpl f2,f4 ; if y == x/y
beq L2
movl f4,-8(fp)
subd 1,-8(fp)
subcd 0,-4(fp)
movl -8(fp),f4 ; f4 = f4 - ulp
L18:
bicd [6],r1
ord r3,r1 ; set inexact flag in r1
andd r1,r4 ; r4 = the old rounding mode
cmpqd 0,r4 ; round to nearest?
bne L17
movl f4,-8(fp)
addd 1,-8(fp)
addcd 0,-4(fp)
movl -8(fp),f4 ; f4 = f4 + ulp
br L16
L17:
cmpd 0x100,r4 ; round to positive inf ?
bne L16
movl f4,-8(fp)
addd 1,-8(fp)
addcd 0,-4(fp)
movl -8(fp),f4 ; f4 = f4 + ulp
movl f2,-8(fp)
addd 1,-8(fp)
addcd 0,-4(fp)
movl -8(fp),f2 ; f2 = f2 + ulp
L16:
addl f4,f2 ; y = y + t
subd 0x100000,r5 ; scalx = scalx - 1
L2:
movl f2,-8(fp)
addd r5,-4(fp)
movl -8(fp),f0
lfsr r1
L1:
movl tos,f6
movl tos,f4
exit [r3,r4,r5,r6,r7]
ret 0
.data
L28: .byte 64,40,35,41,115,113,114,116,46,99
.byte 9,49,46,49,32,40,117,99,98,46
.byte 101,108,101,102,117,110,116,41,32,57
.byte 47,49,57,47,56,53,0
L29: .blkb 4
.double 1204
.double 3062
.double 5746
.double 9193
.double 13348
.double 18162
.double 23592
.double 29598
.double 36145
.double 43202
.double 50740
.double 58733
.double 67158
.double 75992
.double 85215
.double 83599
.double 71378
.double 60428
.double 50647
.double 41945
.double 34246
.double 27478
.double 21581
.double 16499
.double 12183
.double 8588
.double 5674
.double 3403
.double 1742
.double 661
.double 130
L30: .blkb 4
.double 1129316352 ;L30: .double 0,0x43500000
L31: .blkb 4
.double 0x1ff00000
L32: .blkb 4
.double 0x5ff00000