bd52d17906
too large to list, but see: http://gcc.gnu.org/gcc-3.4/changes.html http://gcc.gnu.org/gcc-4.0/changes.html http://gcc.gnu.org/gcc-4.1/changes.html for the details.
1503 lines
26 KiB
ArmAsm
1503 lines
26 KiB
ArmAsm
/* libgcc routines for the Texas Instruments TMS320C[34]x
|
|
Copyright (C) 1997,98, 1999 Free Software Foundation, Inc.
|
|
|
|
Contributed by Michael Hayes (m.hayes@elec.canterbury.ac.nz)
|
|
and Herman Ten Brugge (Haj.Ten.Brugge@net.HCC.nl).
|
|
|
|
|
|
This file is part of GCC.
|
|
|
|
GCC is free software; you can redistribute it and/or modify it
|
|
under the terms of the GNU General Public License as published by the
|
|
Free Software Foundation; either version 2, or (at your option) any
|
|
later version.
|
|
|
|
In addition to the permissions in the GNU General Public License, the
|
|
Free Software Foundation gives you unlimited permission to link the
|
|
compiled version of this file into combinations with other programs,
|
|
and to distribute those combinations without any restriction coming
|
|
from the use of this file. (The General Public License restrictions
|
|
do apply in other respects; for example, they cover modification of
|
|
the file, and distribution when not linked into a combine
|
|
executable.)
|
|
|
|
This file is distributed in the hope that it will be useful, but
|
|
WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; see the file COPYING. If not, write to
|
|
the Free Software Foundation, 51 Franklin Street, Fifth Floor,
|
|
Boston, MA 02110-1301, USA. */
|
|
|
|
; These routines are called using the standard TI register argument
|
|
; passing model.
|
|
; The following registers do not have to be saved:
|
|
; r0, r1, r2, r3, ar0, ar1, ar2, ir0, ir1, bk, rs, rc, re, (r9, r10, r11)
|
|
;
|
|
; Perform floating point divqf3
|
|
;
|
|
; This routine performs a reciprocal of the divisor using the method
|
|
; described in the C30/C40 user manuals. It then multiplies that
|
|
; result by the dividend.
|
|
;
|
|
; Let r be the reciprocal of the divisor v and let the ith estimate
|
|
; of r be denoted by r[i]. An iterative approach can be used to
|
|
; improve the estimate of r, given an initial estimate r[0], where
|
|
;
|
|
; r[i + 1] = r[i] * (2.0 - v * r[i])
|
|
;
|
|
; The normalized error e[i] at the ith iteration is
|
|
;
|
|
; e[i] = (r - r[i]) / r = (1 / v - r[i]) * v = (1 - v * r[i])
|
|
;
|
|
; Note that
|
|
;
|
|
; e[i + 1] = (1 - v * r[i + 1]) = 1 - 2 * v * r[i] + v^2 + (r[i])^2
|
|
; = (1 - v * r[i])^2 = (e[i])^2
|
|
|
|
; r2 dividend, r3 divisor, r0 quotient
|
|
; clobbers r1, ar1
|
|
#ifdef L_divsf3
|
|
.text
|
|
.global ___divqf3
|
|
___divqf3:
|
|
|
|
#ifdef _TMS320C4x
|
|
.if .REGPARM == 0
|
|
lda sp,ar0
|
|
ldf *-ar0(2), r3
|
|
.endif
|
|
|
|
pop ar1 ; Pop return address
|
|
|
|
; r0 = estimate of r, r1 = tmp, r2 = dividend, r3 = divisor
|
|
rcpf r3, r0 ; Compute initial estimate r[0]
|
|
|
|
mpyf3 r0, r3, r1 ; r1 = r[0] * v
|
|
subrf 2.0, r1 ; r1 = 2.0 - r[0] * v
|
|
mpyf r1, r0 ; r0 = r[0] * (2.0 - r[0] * v) = r[1]
|
|
; End of 1st iteration (16 bits accuracy)
|
|
|
|
mpyf3 r0, r3, r1 ; r1 = r[1] * v
|
|
subrf 2.0, r1 ; r1 = 2.0 - r[1] * v
|
|
|
|
bud ar1 ; Delayed branch
|
|
mpyf r1, r0 ; r0 = r[1] * (2.0 - r[1] * v) = r[2]
|
|
; End of 2nd iteration (32 bits accuracy)
|
|
.if .REGPARM == 0
|
|
mpyf *-ar0(1), r0 ; Multiply by the dividend
|
|
.else
|
|
mpyf r2, r0 ; Multiply by the dividend
|
|
.endif
|
|
rnd r0
|
|
; Branch occurs here
|
|
#else
|
|
.if .REGPARM == 0
|
|
ldiu sp,ar0
|
|
ldf *-ar0(2), r3
|
|
.endif
|
|
|
|
pop ar1 ; Pop return address
|
|
|
|
; Initial estimate r[0] = 1.0 * 2^(-e - 1)
|
|
; where v = m * 2^e
|
|
|
|
; r0 = estimate of r, r1 = tmp, r2 = dividend, r3 = divisor
|
|
|
|
; Calculate initial estimate r[0]
|
|
pushf r3
|
|
pop r0
|
|
not r0 ; r0 = -e
|
|
; complement exponent = -e -1
|
|
; complement sign (side effect)
|
|
; complement mantissa (almost 3 bit accurate)
|
|
push r0
|
|
popf r0 ; r0 = 1.0 * e^(-e - 1) + inverted mantissa
|
|
ldf -1.0, r1 ; undo complement sign bit
|
|
xor r1, r0
|
|
|
|
mpyf3 r0, r3, r1 ; r1 = r[0] * v
|
|
subrf 2.0, r1 ; r1 = 2.0 - r[0] * v
|
|
mpyf r1, r0 ; r0 = r[0] * (2.0 - r[0] * v) = r[1]
|
|
; End of 1st iteration
|
|
|
|
mpyf3 r0, r3, r1 ; r1 = r[1] * v
|
|
subrf 2.0, r1 ; r1 = 2.0 - r[1] * v
|
|
mpyf r1, r0 ; r0 = r[1] * (2.0 - r[1] * v) = r[2]
|
|
; End of 2nd iteration
|
|
|
|
mpyf3 r0, r3, r1 ; r1 = r[2] * v
|
|
subrf 2.0, r1 ; r1 = 2.0 - r[2] * v
|
|
mpyf r1, r0 ; r0 = r[2] * (2.0 - r[2] * v) = r[3]
|
|
; End of 3rd iteration
|
|
|
|
rnd r0 ; Minimize error in x[3]'s LSBs
|
|
|
|
; Use modified last iteration
|
|
; r[4] = (r[3] * (1.0 - (v * r[3]))) + r[3]
|
|
mpyf3 r0, r3, r1 ; r1 = r[3] * v
|
|
subrf 1.0, r1 ; r1 = 1.0 - r[3] * v
|
|
mpyf r0, r1 ; r1 = r[3] * (1.0 - r[3] * v)
|
|
addf r1, r0 ; r0 = r[3] * (1.0 - r[3] * v) + r[3] = r[4]
|
|
|
|
rnd r0 ; Minimize error in x[4]'s LSBs
|
|
|
|
bud ar1 ; Delayed branch
|
|
|
|
.if .REGPARM == 0
|
|
ldfu *-ar0(1), r2 ; Dividend in mem has only 24 bits significance
|
|
.else
|
|
rnd r2 ; Minimize error in reg dividend's LSBs
|
|
; since this may have 32 bit significance
|
|
.endif
|
|
|
|
mpyf r2, r0 ; Multiply by the dividend
|
|
rnd r0 ; Round result to 32 bits
|
|
|
|
; Branch occurs here
|
|
#endif
|
|
|
|
#endif
|
|
;
|
|
; Integer signed division
|
|
;
|
|
; ar2 dividend, r2 divisor, r0 quotient
|
|
; clobbers r1, r3, ar0, ar1, ir0, ir1, rc, rs, re
|
|
#ifdef L_divsi3
|
|
.text
|
|
.global ___divqi3
|
|
.ref udivqi3n
|
|
___divqi3:
|
|
.if .REGPARM == 0
|
|
#ifdef _TMS320C4x
|
|
lda sp,ar0
|
|
#else
|
|
ldiu sp,ar0
|
|
#endif
|
|
ldi *-ar0(1), ar2
|
|
ldi *-ar0(2), r2
|
|
.endif
|
|
|
|
xor3 ar2, r2, r3 ; Get the sign
|
|
absi ar2, r0
|
|
bvd divq32
|
|
ldi r0, ar2
|
|
absi r2, r2
|
|
cmpi ar2, r2 ; Divisor > dividend?
|
|
|
|
pop ir1
|
|
bhid zero ; If so, return 0
|
|
|
|
;
|
|
; Normalize oeprands. Use difference exponents as shift count
|
|
; for divisor, and as repeat count for "subc"
|
|
;
|
|
float ar2, r1 ; Normalize dividend
|
|
pushf r1 ; Get as integer
|
|
pop ar0
|
|
lsh -24, ar0 ; Get exponent
|
|
|
|
float r2, r1 ; Normalize divisor
|
|
pushf r1 ; Get as integer
|
|
pop ir0
|
|
lsh -24, ir0 ; Get exponent
|
|
|
|
subi ir0, ar0 ; Get difference of exponents
|
|
lsh ar0, r2 ; Align divisor with dividend
|
|
|
|
;
|
|
; Do count + 1 subtracts and shifts
|
|
;
|
|
rpts ar0
|
|
subc r2, ar2
|
|
|
|
;
|
|
; Mask off the lower count+1 bits of ar2
|
|
;
|
|
subri 31, ar0 ; Shift count is (32 - (ar0 + 1))
|
|
lsh ar0, ar2 ; Shift left
|
|
negi ar0, ar0
|
|
lsh3 ar0, ar2, r0 ; Shift right and put result in r0
|
|
|
|
;
|
|
; Check sign and negate result if necessary
|
|
;
|
|
bud ir1 ; Delayed return
|
|
negi r0, r1 ; Negate result
|
|
ash -31, r3 ; Check sign
|
|
ldinz r1, r0 ; If set, use negative result
|
|
; Branch occurs here
|
|
|
|
zero: bud ir1 ; Delayed branch
|
|
ldi 0, r0
|
|
nop
|
|
nop
|
|
; Branch occurs here
|
|
;
|
|
; special case where ar2 = abs(ar2) = 0x80000000. We handle this by
|
|
; calling unsigned divide and negating the result if necessary.
|
|
;
|
|
divq32:
|
|
push r3 ; Save sign
|
|
call udivqi3n
|
|
pop r3
|
|
pop ir1
|
|
bd ir1
|
|
negi r0, r1 ; Negate result
|
|
ash -31, r3 ; Check sign
|
|
ldinz r1, r0 ; If set, use negative result
|
|
; Branch occurs here
|
|
#endif
|
|
;
|
|
;
|
|
; ar2 dividend, r2 divisor, r0 quotient,
|
|
; clobbers r1, r3, ar0, ar1, ir0, ir1, rc, rs, re
|
|
#ifdef L_udivsi3
|
|
.text
|
|
.global ___udivqi3
|
|
.global udivqi3n
|
|
___udivqi3:
|
|
.if .REGPARM == 0
|
|
#ifdef _TMS320C4x
|
|
lda sp,ar0
|
|
#else
|
|
ldiu sp,ar0
|
|
#endif
|
|
ldi *-ar0(1), ar2
|
|
ldi *-ar0(2), r2
|
|
.endif
|
|
|
|
udivqi3n:
|
|
pop ir1
|
|
|
|
cmpi ar2, r2 ; If divisor > dividend
|
|
bhi qzero ; return zero
|
|
ldi r2, ar1 ; Store divisor in ar1
|
|
|
|
tstb ar2, ar2 ; Check top bit, jump if set to special handler
|
|
bld div_32 ; Delayed branch
|
|
|
|
;
|
|
; Get divisor exponent
|
|
;
|
|
float ar1, r1 ; Normalize the divisor
|
|
pushf r1 ; Get into int register
|
|
pop rc
|
|
; branch occurs here
|
|
|
|
bzd qzero ; if (float) divisor zero, return zero
|
|
|
|
float ar2, r1 ; Normalize the dividend
|
|
pushf r1 ; Get into int register
|
|
pop ar0
|
|
lsh -24, ar0 ; Get both the exponents
|
|
lsh -24, rc
|
|
|
|
subi rc, ar0 ; Get the difference between the exponents
|
|
lsh ar0, ar1 ; Normalize the divisor with the dividend
|
|
|
|
;
|
|
; Do count_1 subtracts and shifts
|
|
;
|
|
rpts ar0
|
|
subc ar1, ar2
|
|
|
|
;
|
|
; mask off the lower count+1 bits
|
|
;
|
|
subri 31, ar0 ; Shift count (31 - (ar0+1))
|
|
bud ir1 ; Delayed return
|
|
lsh3 ar0, ar2, r0
|
|
negi ar0, ar0
|
|
lsh ar0, r0
|
|
; Branch occurs here
|
|
|
|
;
|
|
; Handle a full 32-bit dividend
|
|
;
|
|
div_32: tstb ar1, ar1
|
|
bld qone ; if divisor high bit is one, the result is one
|
|
lsh -24, rc
|
|
subri 31, rc
|
|
lsh rc, ar1 ; Line up the divisor
|
|
|
|
;
|
|
; Now divisor and dividend are aligned. Do first SUBC by hand, save
|
|
; of the forst quotient digit. Then, shift divisor right rather
|
|
; than shifting dividend left. This leaves a zero in the top bit of
|
|
; the divident
|
|
;
|
|
ldi 1, ar0 ; Initizialize MSB of quotient
|
|
lsh rc, ar0 ; create a mask for MSBs
|
|
subi 1, ar0 ; mask is (2 << count) - 1
|
|
|
|
subi3 ar1, ar2, r1
|
|
ldihs r1, ar2
|
|
ldihs 1, r1
|
|
ldilo 0, r1
|
|
lsh rc, r1
|
|
|
|
lsh -1, ar1
|
|
subi 1, rc
|
|
;
|
|
; do the rest of the shifts and subtracts
|
|
;
|
|
rpts rc
|
|
subc ar1, ar2
|
|
|
|
bud ir1
|
|
and ar0, ar2
|
|
or3 r1, ar2, r0
|
|
nop
|
|
|
|
qone:
|
|
bud ir1
|
|
ldi 1, r0
|
|
nop
|
|
nop
|
|
|
|
qzero:
|
|
bud ir1
|
|
ldi 0, r0
|
|
nop
|
|
nop
|
|
#endif
|
|
|
|
#ifdef L_umodsi3
|
|
.text
|
|
.global ___umodqi3
|
|
.global umodqi3n
|
|
___umodqi3:
|
|
.if .REGPARM == 0
|
|
#ifdef _TMS320C4x
|
|
lda sp,ar0
|
|
#else
|
|
ldiu sp,ar0
|
|
#endif
|
|
ldi *-ar0(1), ar2
|
|
ldi *-ar0(2), r2
|
|
.endif
|
|
|
|
umodqi3n:
|
|
pop ir1 ; return address
|
|
cmpi ar2, r2 ; divisor > dividend ?
|
|
bhi uzero ; if so, return dividend
|
|
ldi r2, ar1 ; load divisor
|
|
;
|
|
; If top bit of dividend is set, handle specially.
|
|
;
|
|
tstb ar2, ar2 ; check top bit
|
|
bld umod_32 ; get divisor exponent, then jump.
|
|
;
|
|
; Get divisor exponent by converting to float.
|
|
;
|
|
float ar1, r1 ; normalize divisor
|
|
pushf r1 ; push as float
|
|
pop rc ; pop as int to get exponent
|
|
bzd uzero ; if (float)divisor was zero, return
|
|
;
|
|
; 31 or less bits in dividend. Get dividend exponent.
|
|
;
|
|
float ar2, r1 ; normalize dividend
|
|
pushf r1 ; push as float
|
|
pop ar0 ; pop as int to get exponent
|
|
;
|
|
; Use difference in exponents as shift count to line up MSBs.
|
|
;
|
|
lsh -24, rc ; divisor exponent
|
|
lsh -24, ar0 ; dividend exponent
|
|
subi rc, ar0 ; difference
|
|
lsh ar0, ar1 ; shift divisor up
|
|
;
|
|
; Do COUNT+1 subtract & shifts.
|
|
;
|
|
rpts ar0
|
|
subc ar1, ar2
|
|
;
|
|
; Remainder is in upper 31-COUNT bits.
|
|
;
|
|
bud ir1 ; delayed branch to return
|
|
addi 1, ar0 ; shift count is COUNT+1
|
|
negi ar0, ar0 ; negate for right shift
|
|
lsh3 ar0, ar2, r0 ; shift to get result
|
|
; Return occurs here
|
|
|
|
;
|
|
; The following code handles cases of a full 32-bit dividend. Before
|
|
; SUBC can be used, the top bit must be cleared (otherwise SUBC can
|
|
; possibly shift a significant 1 out the top of the dividend). This
|
|
; is accomplished by first doing a normal subtraction, then proceeding
|
|
; with SUBCs.
|
|
;
|
|
umod_32:
|
|
;
|
|
; If the top bit of the divisor is set too, the remainder is simply
|
|
; the difference between the dividend and divisor. Otherwise, shift
|
|
; the divisor up to line up the MSBs.
|
|
;
|
|
tstb ar1, ar1 ; check divisor
|
|
bld uone ; if negative, remainder is diff
|
|
|
|
lsh -24, rc ; divisor exponent
|
|
subri 31, rc ; shift count = 31 - exp
|
|
negi rc, ar0 ; used later as shift count
|
|
lsh rc, ar1 ; shift up to line up MSBs
|
|
;
|
|
; Now MSBs are aligned. Do first SUBC by hand using a plain subtraction.
|
|
; Then, shift divisor right rather than shifting dividend left. This leaves
|
|
; a 0 in the top bit of the dividend.
|
|
;
|
|
subi3 ar1, ar2, r1 ; subtract
|
|
ldihs r1, ar2 ; if positive, replace dividend
|
|
subi 1, rc ; first iteration is done
|
|
lsh -1, ar1 ; shift divisor down
|
|
;
|
|
; Do EXP subtract & shifts.
|
|
;
|
|
rpts rc
|
|
subc ar1, ar2
|
|
;
|
|
; Quotient is in EXP+1 LSBs; shift remainder (in MSBs) down.
|
|
;
|
|
bud ir1
|
|
lsh3 ar0, ar2, r0 ; COUNT contains -(EXP+1)
|
|
nop
|
|
nop
|
|
;
|
|
; Return (dividend - divisor).
|
|
;
|
|
uone: bud ir1
|
|
subi3 r2, ar2, r0
|
|
nop
|
|
nop
|
|
;
|
|
; Return dividend.
|
|
;
|
|
uzero: bud ir1
|
|
ldi ar2, r0 ; set status from result
|
|
nop
|
|
nop
|
|
#endif
|
|
|
|
#ifdef L_modsi3
|
|
.text
|
|
.global ___modqi3
|
|
.ref umodqi3n
|
|
___modqi3:
|
|
.if .REGPARM == 0
|
|
#ifdef _TMS320C4x
|
|
lda sp,ar0
|
|
#else
|
|
ldiu sp,ar0
|
|
#endif
|
|
ldi *-ar0(1), ar2
|
|
ldi *-ar0(2), r2
|
|
.endif
|
|
|
|
;
|
|
; Determine sign of result. Get absolute value of operands.
|
|
;
|
|
ldi ar2, ar0 ; sign of result same as dividend
|
|
absi ar2, r0 ; make dividend positive
|
|
bvd mod_32 ; if still negative, escape
|
|
absi r2, r1 ; make divisor positive
|
|
ldi r1, ar1 ; save in ar1
|
|
cmpi r0, ar1 ; divisor > dividend ?
|
|
|
|
pop ir1 ; return address
|
|
bhid return ; if so, return dividend
|
|
;
|
|
; Normalize operands. Use difference in exponents as shift count
|
|
; for divisor, and as repeat count for SUBC.
|
|
;
|
|
float r1, r1 ; normalize divisor
|
|
pushf r1 ; push as float
|
|
pop rc ; pop as int
|
|
bzd return ; if (float)divisor was zero, return
|
|
|
|
float r0, r1 ; normalize dividend
|
|
pushf r1 ; push as float
|
|
pop r1 ; pop as int
|
|
|
|
lsh -24, rc ; get divisor exponent
|
|
lsh -24, r1 ; get dividend exponent
|
|
subi rc, r1 ; get difference in exponents
|
|
lsh r1, ar1 ; align divisor with dividend
|
|
;
|
|
; Do COUNT+1 subtract & shifts.
|
|
;
|
|
rpts r1
|
|
subc ar1, r0
|
|
;
|
|
; Remainder is in upper bits of R0
|
|
;
|
|
addi 1, r1 ; shift count is -(r1+1)
|
|
negi r1, r1
|
|
lsh r1, r0 ; shift right
|
|
;
|
|
; Check sign and negate result if necessary.
|
|
;
|
|
return:
|
|
bud ir1 ; delayed branch to return
|
|
negi r0, r1 ; negate result
|
|
cmpi 0, ar0 ; check sign
|
|
ldin r1, r0 ; if set, use negative result
|
|
; Return occurs here
|
|
;
|
|
; The following code handles cases of a full 32-bit dividend. This occurs
|
|
; when R0 = abs(R0) = 080000000h. Handle this by calling the unsigned mod
|
|
; function, then negating the result if necessary.
|
|
;
|
|
mod_32:
|
|
push ar0 ; remember sign
|
|
call umodqi3n ; do divide
|
|
|
|
brd return ; return
|
|
pop ar0 ; restore sign
|
|
pop ir1 ; return address
|
|
nop
|
|
#endif
|
|
|
|
#ifdef L_unsfltconst
|
|
.section .const
|
|
.global ___unsfltconst
|
|
___unsfltconst: .float 4294967296.0
|
|
#endif
|
|
|
|
#ifdef L_unsfltcompare
|
|
.section .const
|
|
.global ___unsfltcompare
|
|
___unsfltcompare: .float 2147483648.0
|
|
#endif
|
|
|
|
; Integer 32-bit signed multiplication
|
|
;
|
|
; The TMS320C3x MPYI instruction takes two 24-bit signed integers
|
|
; and produces a 48-bit signed result which is truncated to 32-bits.
|
|
;
|
|
; A 32-bit by 32-bit multiplication thus requires a number of steps.
|
|
;
|
|
; Consider the product of two 32-bit signed integers,
|
|
;
|
|
; z = x * y
|
|
;
|
|
; where x = (b << 16) + a, y = (d << 16) + c
|
|
;
|
|
; This can be expressed as
|
|
;
|
|
; z = ((b << 16) + a) * ((d << 16) + c)
|
|
;
|
|
; = ((b * d) << 32) + ((b * c + a * d) << 16) + a * c
|
|
;
|
|
; Let z = (f << 16) + e where f < (1 << 16).
|
|
;
|
|
; Since we are only interested in a 32-bit result, we can ignore the
|
|
; (b * d) << 32 term, and thus
|
|
;
|
|
; f = b * c + a * d, e = a * c
|
|
;
|
|
; We can simplify things if we have some a priori knowledge of the
|
|
; operands, for example, if -32768 <= y <= 32767, then y = c and d = 0 and thus
|
|
;
|
|
; f = b * c, e = a * c
|
|
;
|
|
; ar2 multiplier, r2 multiplicand, r0 product
|
|
; clobbers r1, r2, r3
|
|
#ifdef L_mulsi3
|
|
.text
|
|
.global ___mulqi3
|
|
___mulqi3:
|
|
.if .REGPARM == 0
|
|
#ifdef _TMS320C4x
|
|
lda sp,ar0
|
|
#else
|
|
ldiu sp,ar0
|
|
#endif
|
|
ldi *-ar0(1), ar2
|
|
ldi *-ar0(2), r2
|
|
.endif
|
|
|
|
pop ir1 ; return address
|
|
ldi ar2, r0 ;
|
|
and 0ffffh, r0 ; a
|
|
lsh -16, ar2 ; b
|
|
ldi r2, r3 ;
|
|
and 0ffffh, r3 ; c
|
|
mpyi r3, ar2 ; c * b
|
|
lsh -16, r2 ; d
|
|
mpyi r0, r2 ; a * d
|
|
addi ar2, r2 ; c * b + a * d
|
|
bd ir1 ; delayed branch to return
|
|
lsh 16, r2 ; (c * b + a * d) << 16
|
|
mpyi r3, r0 ; a * c
|
|
addi r2, r0 ; a * c + (c * b + a * d) << 16
|
|
; branch occurs here
|
|
|
|
#endif
|
|
|
|
;
|
|
; Integer 64 by 64 multiply
|
|
; long1 and long2 on stack
|
|
; result in r0,r1
|
|
;
|
|
#ifdef L_muldi3
|
|
.text
|
|
.global ___mulhi3
|
|
#ifdef _TMS320C4x
|
|
___mulhi3:
|
|
pop ar0
|
|
ldi sp,ar2
|
|
ldi *-ar2(1),r2
|
|
ldi *-ar2(3),r3
|
|
mpyi3 r2,r3,r0
|
|
mpyuhi3 r2,r3,r1
|
|
mpyi *-ar2(2),r2
|
|
bd ar0
|
|
mpyi *-ar2(0),r3
|
|
addi r2,r1
|
|
addi r3,r1
|
|
#else
|
|
___mulhi3:
|
|
ldi sp,ar2
|
|
ldi -16,rs
|
|
ldi *-ar2(2),ar0
|
|
ldi *-ar2(4),ar1
|
|
ldi ar0,r2
|
|
and 0ffffh,r2
|
|
ldi ar1,r3
|
|
and 0ffffh,r3
|
|
lsh rs,ar0
|
|
lsh rs,ar1
|
|
|
|
mpyi r2,r3,r0
|
|
mpyi ar0,ar1,r1
|
|
mpyi r2,ar1,rc
|
|
lsh rs,rc,re
|
|
addi re,r1
|
|
lsh 16,rc
|
|
addi rc,r0
|
|
addc 0,r1
|
|
mpyi r3,ar0,rc
|
|
lsh rs,rc,re
|
|
addi re,r1
|
|
lsh 16,rc
|
|
addi rc,r0
|
|
addc 0,r1
|
|
|
|
ldi *-ar2(1),ar0
|
|
ldi ar0,r2
|
|
and 0ffffh,r2
|
|
lsh rs,ar0
|
|
mpyi r2,r3,rc
|
|
addi rc,r1
|
|
mpyi r2,ar1,rc
|
|
mpyi r3,ar0,re
|
|
addi re,rc
|
|
lsh 16,rc
|
|
addi rc,r1
|
|
|
|
ldi *-ar2(2),ar0
|
|
ldi *-ar2(3),ar1
|
|
ldi ar0,r2
|
|
and 0ffffh,r2
|
|
ldi ar1,r3
|
|
and 0ffffh,r3
|
|
lsh rs,ar0
|
|
lsh rs,ar1
|
|
mpyi r2,r3,rc
|
|
addi rc,r1
|
|
mpyi r2,ar1,rc
|
|
mpyi r3,ar0,re
|
|
pop ar0
|
|
bd ar0
|
|
addi re,rc
|
|
lsh 16,rc
|
|
addi rc,r1
|
|
#endif
|
|
#endif
|
|
|
|
;
|
|
; Integer 32 by 32 multiply highpart unsigned
|
|
; src1 in ar2
|
|
; src2 in r2
|
|
; result in r0
|
|
;
|
|
#ifdef L_umuldi3_high
|
|
.text
|
|
.global ___umulhi3_high
|
|
___umulhi3_high:
|
|
.if .REGPARM == 0
|
|
#ifdef _TMS320C4x
|
|
lda sp,ar0
|
|
#else
|
|
ldiu sp,ar0
|
|
#endif
|
|
ldi *-ar0(1), ar2
|
|
ldi *-ar0(2), r2
|
|
.endif
|
|
|
|
ldi -16,rs
|
|
ldi r2,r3
|
|
and 0ffffh,r2
|
|
ldi ar2,ar1
|
|
and 0ffffh,ar2
|
|
lsh rs,r3
|
|
lsh rs,ar1
|
|
|
|
mpyi ar2,r2,r1
|
|
mpyi ar1,r3,r0
|
|
mpyi ar2,r3,rc
|
|
lsh rs,rc,re
|
|
addi re,r0
|
|
lsh 16,rc
|
|
addi rc,r1
|
|
addc 0,r0
|
|
mpyi r2,ar1,rc
|
|
lsh rs,rc,re
|
|
addi re,r0
|
|
pop ar0
|
|
bd ar0
|
|
lsh 16,rc
|
|
addi rc,r1
|
|
addc 0,r0
|
|
#endif
|
|
|
|
;
|
|
; Integer 32 by 32 multiply highpart signed
|
|
; src1 in ar2
|
|
; src2 in r2
|
|
; result in r0
|
|
;
|
|
#ifdef L_smuldi3_high
|
|
.text
|
|
.global ___smulhi3_high
|
|
___smulhi3_high:
|
|
.if .REGPARM == 0
|
|
#ifdef _TMS320C4x
|
|
lda sp,ar0
|
|
#else
|
|
ldiu sp,ar0
|
|
#endif
|
|
ldi *-ar0(1), ar2
|
|
ldi *-ar0(2), r2
|
|
.endif
|
|
|
|
ldi -16,rs
|
|
ldi 0,rc
|
|
subi3 ar2,rc,r0
|
|
ldi r2,r3
|
|
ldilt r0,rc
|
|
subi3 r2,rc,r0
|
|
ldi ar2,ar1
|
|
tstb ar1,ar1
|
|
ldilt r0,rc
|
|
and 0ffffh,r2
|
|
and 0ffffh,ar2
|
|
lsh rs,r3
|
|
lsh rs,ar1
|
|
|
|
mpyi ar2,r2,r1
|
|
mpyi ar1,r3,r0
|
|
addi rc,r0
|
|
mpyi ar2,r3,rc
|
|
lsh rs,rc,re
|
|
addi re,r0
|
|
lsh 16,rc
|
|
addi rc,r1
|
|
addc 0,r0
|
|
mpyi r2,ar1,rc
|
|
lsh rs,rc,re
|
|
addi re,r0
|
|
pop ar0
|
|
bd ar0
|
|
lsh 16,rc
|
|
addi rc,r1
|
|
addc 0,r0
|
|
#endif
|
|
|
|
;
|
|
; Integer 64 by 64 unsigned divide
|
|
; long1 and long2 on stack
|
|
; divide in r0,r1
|
|
; modulo in r2,r3
|
|
; routine takes a maximum of 64*8+23=535 cycles = 21.4 us @ 50Mhz
|
|
;
|
|
#ifdef L_udivdi3
|
|
.text
|
|
.global ___udivhi3
|
|
.global ___udivide
|
|
.global ___umodulo
|
|
.ref udivqi3n
|
|
.ref umodqi3n
|
|
___udivhi3:
|
|
ldi sp,ar2
|
|
ldi *-ar2(4),ar0
|
|
ldi *-ar2(3),ar1
|
|
ldi *-ar2(2),r0
|
|
ldi *-ar2(1),r1
|
|
|
|
___udivide:
|
|
or r1,ar1,r2
|
|
bne udiv0
|
|
ldi ar0,r2
|
|
ldi r0,ar2
|
|
call udivqi3n
|
|
ldiu 0,r1
|
|
rets
|
|
|
|
___umodulo:
|
|
or r1,ar1,r2
|
|
bne udiv0
|
|
ldi ar0,r2
|
|
ldi r0,ar2
|
|
call umodqi3n
|
|
ldi r0,r2
|
|
ldiu 0,r3
|
|
rets
|
|
|
|
udiv0:
|
|
tstb ar1,ar1
|
|
bne udiv1
|
|
tstb ar0,ar0
|
|
bn udiv1
|
|
|
|
ldiu 63,rc
|
|
#ifdef _TMS320C4x
|
|
rptbd udivend0
|
|
ldiu 0,r2
|
|
addi r0,r0
|
|
rolc r1
|
|
#else
|
|
ldiu 0,r2
|
|
addi r0,r0
|
|
rolc r1
|
|
rptb udivend0
|
|
#endif
|
|
|
|
rolc r2
|
|
subi3 ar0,r2,r3
|
|
ldinc r3,r2
|
|
rolc r0
|
|
udivend0:
|
|
rolc r1
|
|
|
|
not r0
|
|
not r1
|
|
ldiu 0,r3
|
|
rets
|
|
udiv1:
|
|
push r4
|
|
push r5
|
|
ldiu 63,rc
|
|
ldiu 0,r2
|
|
#ifdef _TMS320C4x
|
|
rptbd udivend1
|
|
ldiu 0,r3
|
|
addi r0,r0
|
|
rolc r1
|
|
#else
|
|
ldiu 0,r3
|
|
addi r0,r0
|
|
rolc r1
|
|
rptb udivend1
|
|
#endif
|
|
|
|
rolc r2
|
|
rolc r3
|
|
subi3 ar0,r2,r4
|
|
subb3 ar1,r3,r5
|
|
ldinc r4,r2
|
|
ldinc r5,r3
|
|
rolc r0
|
|
udivend1:
|
|
rolc r1
|
|
|
|
not r0
|
|
not r1
|
|
pop r5
|
|
pop r4
|
|
rets
|
|
#endif
|
|
|
|
;
|
|
; Integer 64 by 64 unsigned modulo
|
|
; long1 and long2 on stack
|
|
; result in r0,r1
|
|
;
|
|
#ifdef L_umoddi3
|
|
.text
|
|
.global ___umodhi3
|
|
.ref ___modulo
|
|
___umodhi3:
|
|
ldi sp,ar2
|
|
ldi *-ar2(4),ar0
|
|
ldi *-ar2(3),ar1
|
|
ldi *-ar2(2),r0
|
|
ldi *-ar2(1),r1
|
|
call ___umodulo
|
|
pop ar0
|
|
bd ar0
|
|
ldi r2,r0
|
|
ldi r3,r1
|
|
nop
|
|
#endif
|
|
|
|
;
|
|
; Integer 64 by 64 signed divide
|
|
; long1 and long2 on stack
|
|
; result in r0,r1
|
|
;
|
|
#ifdef L_divdi3
|
|
.text
|
|
.global ___divhi3
|
|
.ref ___udivide
|
|
___divhi3:
|
|
ldi 0,ir0
|
|
ldi sp,ar2
|
|
ldi *-ar2(4),r0
|
|
ldi *-ar2(3),r1
|
|
bge div1
|
|
not ir0
|
|
negi r0
|
|
negb r1
|
|
div1:
|
|
ldi r0,ar0
|
|
ldi r1,ar1
|
|
ldi *-ar2(2),r0
|
|
ldi *-ar2(1),r1
|
|
bge div2
|
|
not ir0
|
|
negi r0
|
|
negb r1
|
|
div2:
|
|
call ___udivide
|
|
tstb ir0,ir0
|
|
bge div3
|
|
negi r0
|
|
negb r1
|
|
div3:
|
|
rets
|
|
#endif
|
|
|
|
;
|
|
; Integer 64 by 64 signed modulo
|
|
; long1 and long2 on stack
|
|
; result in r0,r1
|
|
;
|
|
#ifdef L_moddi3
|
|
.text
|
|
.global ___modhi3
|
|
.ref ___umodulo
|
|
___modhi3:
|
|
ldi 0,ir0
|
|
ldi sp,ar2
|
|
ldi *-ar2(4),r0
|
|
ldi *-ar2(3),r1
|
|
bge mod1
|
|
not ir0
|
|
negi r0
|
|
negb r1
|
|
mod1:
|
|
ldi r0,ar0
|
|
ldi r1,ar1
|
|
ldi *-ar2(2),r0
|
|
ldi *-ar2(1),r1
|
|
bge mod2
|
|
not ir0
|
|
negi r0
|
|
negb r1
|
|
mod2:
|
|
call ___umodulo
|
|
ldi r2,r0
|
|
ldi r3,r1
|
|
tstb ir0,ir0
|
|
bge mod3
|
|
negi r0
|
|
negb r1
|
|
mod3:
|
|
rets
|
|
#endif
|
|
|
|
;
|
|
; double to signed long long conversion
|
|
; input in r2
|
|
; result in r0,r1
|
|
;
|
|
#ifdef L_fix_truncsfdi2
|
|
.text
|
|
.global ___fix_truncqfhi2
|
|
.ref ufix_truncqfhi2n
|
|
___fix_truncqfhi2:
|
|
.if .REGPARM == 0
|
|
#ifdef _TMS320C4x
|
|
lda sp,ar0
|
|
#else
|
|
ldiu sp,ar0
|
|
#endif
|
|
ldf *-ar0(1), r2
|
|
.endif
|
|
|
|
cmpf 0.0,r2
|
|
bge ufix_truncqfhi2n
|
|
negf r2
|
|
call ufix_truncqfhi2n
|
|
negi r0
|
|
negb r1
|
|
rets
|
|
#endif
|
|
|
|
;
|
|
; double to unsigned long long conversion
|
|
; input in r2
|
|
; result in r0,r1
|
|
;
|
|
#ifdef L_ufix_truncsfdi2
|
|
.text
|
|
.global ___ufix_truncqfhi2
|
|
.global ufix_truncqfhi2n
|
|
___ufix_truncqfhi2:
|
|
.if .REGPARM == 0
|
|
#ifdef _TMS320C4x
|
|
lda sp,ar0
|
|
#else
|
|
ldiu sp,ar0
|
|
#endif
|
|
ldf *-ar0(1), r2
|
|
.endif
|
|
|
|
ufix_truncqfhi2n:
|
|
cmpf 0.0,r2
|
|
ble ufix1
|
|
pushf r2
|
|
pop r3
|
|
ash -24,r3
|
|
subi 31,r3
|
|
cmpi 32,r3
|
|
bgt ufix1
|
|
cmpi -32,r3
|
|
ble ufix1
|
|
ldi 1,r0
|
|
ash 31,r0
|
|
or3 r0,r2,r0
|
|
ldi r0,r1
|
|
lsh3 r3,r0,r0
|
|
subi 32,r3
|
|
cmpi -32,r3
|
|
ldile 0,r1
|
|
lsh3 r3,r1,r1
|
|
rets
|
|
ufix1:
|
|
ldi 0,r0
|
|
ldi 0,r1
|
|
rets
|
|
#endif
|
|
|
|
;
|
|
; signed long long to double conversion
|
|
; input on stack
|
|
; result in r0
|
|
;
|
|
#ifdef L_floatdisf2
|
|
.text
|
|
.global ___floathiqf2
|
|
.ref ufloathiqf2n
|
|
___floathiqf2:
|
|
ldi sp,ar2
|
|
ldi *-ar2(2),r0
|
|
ldi *-ar2(1),r1
|
|
bge ufloathiqf2n
|
|
negi r0
|
|
negb r1
|
|
call ufloathiqf2n
|
|
negf r0
|
|
rets
|
|
#endif
|
|
|
|
;
|
|
; unsigned long long to double conversion
|
|
; input on stack
|
|
; result in r0
|
|
;
|
|
#ifdef L_ufloatdisf2
|
|
.text
|
|
.global ___ufloathiqf2
|
|
.global ufloathiqf2n
|
|
.ref ___unsfltconst
|
|
___ufloathiqf2:
|
|
ldi sp,ar2
|
|
ldi *-ar2(2),r0
|
|
ldi *-ar2(1),r1
|
|
ufloathiqf2n:
|
|
.if .BIGMODEL
|
|
#ifdef _TMS320C4x
|
|
ldpk @___unsfltconst
|
|
#else
|
|
ldp @___unsfltconst
|
|
#endif
|
|
.endif
|
|
ldf @___unsfltconst,r2
|
|
float r0
|
|
bge uflt1
|
|
addf r2,r0
|
|
uflt1:
|
|
float r1
|
|
bge uflt2
|
|
addf r2,r1
|
|
uflt2:
|
|
#ifdef _TMS320C4x
|
|
pop r3
|
|
bd r3
|
|
mpyf r2,r1
|
|
addf r1,r0
|
|
nop
|
|
#else
|
|
ldf r1,r3
|
|
and 0ffh,r3
|
|
norm r3,r3
|
|
mpyf r2,r3
|
|
pop ar2
|
|
bd ar2
|
|
addf r3,r0
|
|
mpyf r2,r1
|
|
addf r1,r0
|
|
#endif
|
|
#endif
|
|
|
|
;
|
|
; long double to signed long long conversion
|
|
; input in r2
|
|
; result in r0,r1
|
|
;
|
|
#ifdef L_fix_truncdfdi2
|
|
.text
|
|
.global ___fix_trunchfhi2
|
|
.ref ufix_trunchfhi2n
|
|
___fix_trunchfhi2:
|
|
.if .REGPARM == 0
|
|
#ifdef _TMS320C4x
|
|
lda sp,ar0
|
|
#else
|
|
ldiu sp,ar0
|
|
#endif
|
|
ldf *-ar0(2), r2
|
|
ldi *-ar0(1), r2
|
|
.endif
|
|
|
|
cmpf 0.0,r2
|
|
bge ufix_trunchfhi2n
|
|
negf r2
|
|
call ufix_trunchfhi2n
|
|
negi r0
|
|
negb r1
|
|
rets
|
|
#endif
|
|
|
|
;
|
|
; long double to unsigned long long conversion
|
|
; input in r2
|
|
; result in r0,r1
|
|
;
|
|
#ifdef L_ufix_truncdfdi2
|
|
.text
|
|
.global ___ufix_trunchfhi2
|
|
.global ufix_trunchfhi2n
|
|
___ufix_trunchfhi2:
|
|
.if .REGPARM == 0
|
|
#ifdef _TMS320C4x
|
|
lda sp,ar0
|
|
#else
|
|
ldiu sp,ar0
|
|
#endif
|
|
ldf *-ar0(2), r2
|
|
ldi *-ar0(1), r2
|
|
.endif
|
|
|
|
ufix_trunchfhi2n:
|
|
cmpf 0.0,r2
|
|
ble ufixh1
|
|
pushf r2
|
|
pop r3
|
|
ash -24,r3
|
|
subi 31,r3
|
|
cmpi 32,r3
|
|
bgt ufixh1
|
|
cmpi -32,r3
|
|
ble ufixh1
|
|
ldi 1,r0
|
|
ash 31,r0
|
|
or3 r0,r2,r0
|
|
ldi r0,r1
|
|
lsh3 r3,r0,r0
|
|
subi 32,r3
|
|
cmpi -32,r3
|
|
ldile 0,r1
|
|
lsh3 r3,r1,r1
|
|
rets
|
|
ufixh1:
|
|
ldi 0,r0
|
|
ldi 0,r1
|
|
rets
|
|
#endif
|
|
|
|
;
|
|
; signed long long to long double conversion
|
|
; input on stack
|
|
; result in r0
|
|
;
|
|
#ifdef L_floatdidf2
|
|
.text
|
|
.global ___floathihf2
|
|
.ref ufloathihf2n
|
|
___floathihf2:
|
|
ldi sp,ar2
|
|
ldi *-ar2(2),r0
|
|
ldi *-ar2(1),r1
|
|
bge ufloathihf2n
|
|
negi r0
|
|
negb r1
|
|
call ufloathihf2n
|
|
negf r0
|
|
rets
|
|
#endif
|
|
|
|
;
|
|
; unsigned long long to double conversion
|
|
; input on stack
|
|
; result in r0
|
|
;
|
|
#ifdef L_ufloatdidf2
|
|
.text
|
|
.global ___ufloathihf2
|
|
.global ufloathihf2n
|
|
.ref ___unsfltconst
|
|
___ufloathihf2:
|
|
ldi sp,ar2
|
|
ldi *-ar2(2),r0
|
|
ldi *-ar2(1),r1
|
|
ufloathihf2n
|
|
.if .BIGMODEL
|
|
#ifdef _TMS320C4x
|
|
ldpk @___unsfltconst
|
|
#else
|
|
ldp @___unsfltconst
|
|
#endif
|
|
.endif
|
|
ldf @___unsfltconst,r2
|
|
float r0
|
|
bge uflth1
|
|
addf r2,r0
|
|
uflth1:
|
|
float r1
|
|
bge uflth2
|
|
addf r2,r1
|
|
uflth2:
|
|
#ifdef _TMS320C4x
|
|
pop r3
|
|
bd r3
|
|
mpyf r2,r1
|
|
addf r1,r0
|
|
nop
|
|
#else
|
|
ldf r1,r3
|
|
and 0ffh,r3
|
|
norm r3,r3
|
|
mpyf r2,r3
|
|
pop ar2
|
|
bd ar2
|
|
addf r3,r0
|
|
mpyf r2,r1
|
|
addf r1,r0
|
|
#endif
|
|
#endif
|
|
|
|
;
|
|
; calculate ffs
|
|
; input in ar2
|
|
; result in r0
|
|
;
|
|
#ifdef L_ffs
|
|
.global ___ffs
|
|
.ref ___unsfltconst
|
|
.text
|
|
___ffs:
|
|
.if .REGPARM == 0
|
|
#ifdef _TMS320C4x
|
|
lda sp,ar0
|
|
#else
|
|
ldiu sp,ar0
|
|
#endif
|
|
ldi *-ar0(1), ar2
|
|
.endif
|
|
|
|
negi ar2,r0
|
|
and ar2,r0
|
|
float r0,r0
|
|
ldfu 0.0,r1
|
|
.if .BIGMODEL
|
|
#ifdef _TMS320C4x
|
|
ldpk @___unsfltconst
|
|
#else
|
|
ldp @___unsfltconst
|
|
#endif
|
|
.endif
|
|
ldflt @___unsfltconst,r1
|
|
addf r1,r0
|
|
pushf r0
|
|
pop r0
|
|
pop ar0
|
|
bd ar0
|
|
ash -24,r0
|
|
ldilt -1,r0
|
|
addi 1,r0
|
|
#endif
|
|
|
|
;
|
|
; calculate long double * long double
|
|
; input in r2, r3
|
|
; output in r0
|
|
;
|
|
#ifdef L_muldf3
|
|
.global ___mulhf3
|
|
.text
|
|
___mulhf3:
|
|
.if .REGPARM == 0
|
|
#ifdef _TMS320C4x
|
|
lda sp,ar0
|
|
#else
|
|
ldiu sp,ar0
|
|
#endif
|
|
ldf *-ar0(2), r2
|
|
ldi *-ar0(1), r2
|
|
ldf *-ar0(4), r3
|
|
ldi *-ar0(3), r3
|
|
.endif
|
|
|
|
pop ar2 ; return ad
|
|
ldf r2,r0 ; copy lsb0
|
|
ldf r3,r1 ; copy lsb1
|
|
and 0ffh,r0 ; mask lsb0
|
|
and 0ffh,r1 ; mask lsb1
|
|
norm r0,r0 ; correct lsb0
|
|
norm r1,r1 ; correct lsb1
|
|
mpyf r2,r1 ; arg0*lsb1
|
|
mpyf r3,r0 ; arg1*lsb0
|
|
bd ar2 ; return (delayed)
|
|
addf r0,r1 ; arg0*lsb1 + arg1*lsb0
|
|
mpyf r2,r3,r0 ; msb0*msb1
|
|
addf r1,r0 ; msb0*msb1 + arg0*lsb1 + arg1*lsb0
|
|
#endif
|
|
|
|
;
|
|
; calculate long double / long double
|
|
; r2 dividend, r3 divisor, r0 quotient
|
|
;
|
|
#ifdef L_divdf3
|
|
.global ___divhf3
|
|
.text
|
|
___divhf3:
|
|
.if .REGPARM == 0
|
|
#ifdef _TMS320C4x
|
|
lda sp,ar0
|
|
#else
|
|
ldiu sp,ar0
|
|
#endif
|
|
ldf *-ar0(2), r2
|
|
ldi *-ar0(1), r2
|
|
ldf *-ar0(4), r3
|
|
ldi *-ar0(3), r3
|
|
.endif
|
|
|
|
#ifdef _TMS320C4x
|
|
pop ar1
|
|
rcpf r3, r0
|
|
mpyf3 r0, r3, r1
|
|
subrf 2.0, r1
|
|
mpyf r1, r0
|
|
mpyf3 r0, r3, r1
|
|
bud ar1
|
|
subrf 2.0, r1
|
|
mpyf r1, r0
|
|
mpyf r2, r0
|
|
#else
|
|
pop ar1
|
|
pushf r3
|
|
pop r0
|
|
not r0
|
|
push r0
|
|
popf r0
|
|
ldf -1.0, r1
|
|
xor r1, r0
|
|
|
|
mpyf3 r0, r3, r1 ; r1 = r[0] * v
|
|
subrf 2.0, r1 ; r1 = 2.0 - r[0] * v
|
|
mpyf r1, r0 ; r0 = r[0] * (2.0 - r[0] * v) = r[1]
|
|
; End of 1st iteration
|
|
|
|
mpyf3 r0, r3, r1 ; r1 = r[1] * v
|
|
subrf 2.0, r1 ; r1 = 2.0 - r[1] * v
|
|
mpyf r1, r0 ; r0 = r[1] * (2.0 - r[1] * v) = r[2]
|
|
; End of 2nd iteration
|
|
|
|
mpyf3 r0, r3, r1 ; r1 = r[2] * v
|
|
subrf 2.0, r1 ; r1 = 2.0 - r[2] * v
|
|
mpyf r1, r0 ; r0 = r[2] * (2.0 - r[2] * v) = r[3]
|
|
; End of 3rd iteration
|
|
|
|
or 080h, r0
|
|
rnd r0
|
|
|
|
; mpyf3 r0, r3, r1 ; r1 = r[3] * v
|
|
push r4
|
|
pushf r4
|
|
mpyf r0, r3, r1
|
|
|
|
ldf r0, r4
|
|
and 0ffh, r4
|
|
norm r4, r4
|
|
mpyf r3, r4
|
|
addf r4, r1
|
|
|
|
ldf r3, r4
|
|
and 0ffh, r4
|
|
norm r4, r4
|
|
mpyf r0, r4
|
|
addf r4, r1
|
|
|
|
subrf 2.0, r1 ; r1 = 2.0 - r[3] * v
|
|
|
|
mpyf r1, r0, r3 ; r3 = r[3] * (2.0 - r[3] * v) = r[5]
|
|
|
|
ldf r1, r4
|
|
and 0ffh, r4
|
|
norm r4, r4
|
|
mpyf r0, r4
|
|
addf r4, r3
|
|
|
|
ldf r0, r4
|
|
and 0ffh, r4
|
|
norm r4, r4
|
|
mpyf r1, r4
|
|
addf r4, r3
|
|
|
|
mpyf r2, r3, r0 ; Multiply by the dividend
|
|
|
|
ldf r2, r4
|
|
and 0ffh, r4
|
|
norm r4, r4
|
|
mpyf r3, r4
|
|
addf r4, r0
|
|
|
|
ldf r3, r4
|
|
and 0ffh, r4
|
|
norm r4, r4
|
|
mpyf r2, r4
|
|
bd ar1
|
|
addf r4, r0
|
|
|
|
popf r4
|
|
pop r4
|
|
#endif
|
|
#endif
|