NetBSD/sys/arch/m68k/fpe/fpu_mul.c

/*	$NetBSD: fpu_mul.c,v 1.4 2003/08/07 16:28:11 agc Exp $ */

/*
 * Copyright (c) 1992, 1993
 *	The Regents of the University of California.  All rights reserved.
 *
 * This software was developed by the Computer Systems Engineering group
 * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
 * contributed to Berkeley.
 *
 * All advertising materials mentioning features or use of this software
 * must display the following acknowledgement:
 *	This product includes software developed by the University of
 *	California, Lawrence Berkeley Laboratory.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *	@(#)fpu_mul.c	8.1 (Berkeley) 6/11/93
 */

/*
 * Perform an FPU multiply (return x * y).
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: fpu_mul.c,v 1.4 2003/08/07 16:28:11 agc Exp $");

#include <sys/types.h>

#include <machine/reg.h>

#include "fpu_arith.h"
#include "fpu_emulate.h"

/*
 * The multiplication algorithm for normal numbers is as follows:
 *
 * The fraction of the product is built in the usual stepwise fashion.
 * Each step consists of shifting the accumulator right one bit
 * (maintaining any guard bits) and, if the next bit in y is set,
 * adding the multiplicand (x) to the accumulator.  Then, in any case,
 * we advance one bit leftward in y.  Algorithmically:
 *
 *	A = 0;
 *	for (bit = 0; bit < FP_NMANT; bit++) {
 *		sticky |= A & 1, A >>= 1;
 *		if (Y & (1 << bit))
 *			A += X;
 *	}
 *
 * (X and Y here represent the mantissas of x and y respectively.)
 * The resultant accumulator (A) is the product's mantissa.  It may
 * be as large as 11.11111... in binary and hence may need to be
 * shifted right, but at most one bit.
 *
 * Since we do not have efficient multiword arithmetic, we code the
 * accumulator as four separate words, just like any other mantissa.
 * We use local `register' variables in the hope that this is faster
 * than memory.  We keep x->fp_mant in locals for the same reason.
 *
 * In the algorithm above, the bits in y are inspected one at a time.
 * We will pick them up 32 at a time and then deal with those 32, one
 * at a time.  Note, however, that we know several things about y:
 *
 *    - the guard and round bits at the bottom are sure to be zero;
 *
 *    - often many low bits are zero (y is often from a single or double
 *	precision source);
 *
 *    - bit FP_NMANT-1 is set, and FP_1*2 fits in a word.
 *
 * We can also test for 32-zero-bits swiftly.  In this case, the center
 * part of the loop---setting sticky, shifting A, and not adding---will
 * run 32 times without adding X to A.  We can do a 32-bit shift faster
 * by simply moving words.  Since zeros are common, we optimize this case.
 * Furthermore, since A is initially zero, we can omit the shift as well
 * until we reach a nonzero word.
 */
struct fpn *
fpu_mul(fe)
	register struct fpemu *fe;
{
	register struct fpn *x = &fe->fe_f1, *y = &fe->fe_f2;
	register u_int a2, a1, a0, x2, x1, x0, bit, m;
	register int sticky;
	FPU_DECL_CARRY

	/*
	 * Put the `heavier' operand on the right (see fpu_emu.h).
	 * Then we will have one of the following cases, taken in the
	 * following order:
	 *
	 *  - y = NaN.  Implied: if only one is a signalling NaN, y is.
	 *	The result is y.
	 *  - y = Inf.  Implied: x != NaN (is 0, number, or Inf: the NaN
	 *    case was taken care of earlier).
	 *	If x = 0, the result is NaN.  Otherwise the result
	 *	is y, with its sign reversed if x is negative.
	 *  - x = 0.  Implied: y is 0 or number.
	 *	The result is 0 (with XORed sign as usual).
	 *  - other.  Implied: both x and y are numbers.
	 *	The result is x * y (XOR sign, multiply bits, add exponents).
	 */
	ORDER(x, y);
	if (ISNAN(y)) {
		y->fp_sign ^= x->fp_sign;
		return (y);
	}
	if (ISINF(y)) {
		if (ISZERO(x))
			return (fpu_newnan(fe));
		y->fp_sign ^= x->fp_sign;
		return (y);
	}
	if (ISZERO(x)) {
		x->fp_sign ^= y->fp_sign;
		return (x);
	}

	/*
	 * Setup.  In the code below, the mask `m' will hold the current
	 * mantissa byte from y.  The variable `bit' denotes the bit
	 * within m.  We also define some macros to deal with everything.
	 */
	x2 = x->fp_mant[2];
	x1 = x->fp_mant[1];
	x0 = x->fp_mant[0];
	sticky = a2 = a1 = a0 = 0;

#define	ADD	/* A += X */ \
	FPU_ADDS(a2, a2, x2); \
	FPU_ADDCS(a1, a1, x1); \
	FPU_ADDC(a0, a0, x0)

#define	SHR1	/* A >>= 1, with sticky */ \
	sticky |= a2 & 1, \
	a2 = (a2 >> 1) | (a1 << 31), a1 = (a1 >> 1) | (a0 << 31), a0 >>= 1

#define	SHR32	/* A >>= 32, with sticky */ \
	sticky |= a2, a2 = a1, a1 = a0, a0 = 0

#define	STEP	/* each 1-bit step of the multiplication */ \
	SHR1; if (bit & m) { ADD; }; bit <<= 1

	/*
	 * We are ready to begin.  The multiply loop runs once for each
	 * of the four 32-bit words.  Some words, however, are special.
	 * As noted above, the low order bits of Y are often zero.  Even
	 * if not, the first loop can certainly skip the guard bits.
	 * The last word of y has its highest 1-bit in position FP_NMANT-1,
	 * so we stop the loop when we move past that bit.
	 */
	if ((m = y->fp_mant[2]) == 0) {
		/* SHR32; */			/* unneeded since A==0 */
	} else {
		bit = 1 << FP_NG;
		do {
			STEP;
		} while (bit != 0);
	}
	if ((m = y->fp_mant[1]) == 0) {
		SHR32;
	} else {
		bit = 1;
		do {
			STEP;
		} while (bit != 0);
	}
	m = y->fp_mant[0];		/* definitely != 0 */
	bit = 1;
	do {
		STEP;
	} while (bit <= m);

	/*
	 * Done with mantissa calculation.  Get exponent and handle
	 * 11.111...1 case, then put result in place.  We reuse x since
	 * it already has the right class (FP_NUM).
	 */
	m = x->fp_exp + y->fp_exp;
	if (a0 >= FP_2) {
		SHR1;
		m++;
	}
	x->fp_sign ^= y->fp_sign;
	x->fp_exp = m;
	x->fp_sticky = sticky;
	x->fp_mant[2] = a2;
	x->fp_mant[1] = a1;
	x->fp_mant[0] = a0;
	return (x);
}
Move UCB-licensed code from 4-clause to 3-clause licence. Patches provided by Joel Baker in PR 22364, verified by myself. 2003-08-07 20:26:28 +04:00			`/* $NetBSD: fpu_mul.c,v 1.4 2003/08/07 16:28:11 agc Exp $ */`
Still incomplete, but much more complete FPE from Ken Nakata <kenn@remus.rutgers.edu>. This emulator does not yet emulate the following functions: FSINH, FETOXM1, FTANH, FATAN, FASIN, FATANH, FSIN, FTAN, FETOX, FTWOTOX, FTENTOX, FCOSH, FACOS, FCOS, FSINCOS It is sufficient, however, to allow programs like df, w, and newfs, to run to completion with correct results. Portions of this code were based on the sparc fpe and on initial work by gwr. 1995-11-03 07:46:55 +03:00
			`/*`
			`* Copyright (c) 1992, 1993`
			`* The Regents of the University of California. All rights reserved.`
			`*`
			`* This software was developed by the Computer Systems Engineering group`
			`* at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and`
			`* contributed to Berkeley.`
			`*`
			`* All advertising materials mentioning features or use of this software`
			`* must display the following acknowledgement:`
			`* This product includes software developed by the University of`
			`* California, Lawrence Berkeley Laboratory.`
			`*`
			`* Redistribution and use in source and binary forms, with or without`
			`* modification, are permitted provided that the following conditions`
			`* are met:`
			`* 1. Redistributions of source code must retain the above copyright`
			`* notice, this list of conditions and the following disclaimer.`
			`* 2. Redistributions in binary form must reproduce the above copyright`
			`* notice, this list of conditions and the following disclaimer in the`
			`* documentation and/or other materials provided with the distribution.`
Move UCB-licensed code from 4-clause to 3-clause licence. Patches provided by Joel Baker in PR 22364, verified by myself. 2003-08-07 20:26:28 +04:00			`* 3. Neither the name of the University nor the names of its contributors`
Still incomplete, but much more complete FPE from Ken Nakata <kenn@remus.rutgers.edu>. This emulator does not yet emulate the following functions: FSINH, FETOXM1, FTANH, FATAN, FASIN, FATANH, FSIN, FTAN, FETOX, FTWOTOX, FTENTOX, FCOSH, FACOS, FCOS, FSINCOS It is sufficient, however, to allow programs like df, w, and newfs, to run to completion with correct results. Portions of this code were based on the sparc fpe and on initial work by gwr. 1995-11-03 07:46:55 +03:00			`* may be used to endorse or promote products derived from this software`
			`* without specific prior written permission.`
			`*`
			* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
			`* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE`
			`* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE`
			`* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE`
			`* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL`
			`* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS`
			`* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)`
			`* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT`
			`* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY`
			`* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF`
			`* SUCH DAMAGE.`
			`*`
			`* @(#)fpu_mul.c 8.1 (Berkeley) 6/11/93`
			`*/`

			`/*`
			`* Perform an FPU multiply (return x * y).`
			`*/`

__KERNEL_RCSID() 2003-07-15 06:43:09 +04:00			`#include <sys/cdefs.h>`
Move UCB-licensed code from 4-clause to 3-clause licence. Patches provided by Joel Baker in PR 22364, verified by myself. 2003-08-07 20:26:28 +04:00			`__KERNEL_RCSID(0, "$NetBSD: fpu_mul.c,v 1.4 2003/08/07 16:28:11 agc Exp $");`
__KERNEL_RCSID() 2003-07-15 06:43:09 +04:00
Still incomplete, but much more complete FPE from Ken Nakata <kenn@remus.rutgers.edu>. This emulator does not yet emulate the following functions: FSINH, FETOXM1, FTANH, FATAN, FASIN, FATANH, FSIN, FTAN, FETOX, FTWOTOX, FTENTOX, FCOSH, FACOS, FCOS, FSINCOS It is sufficient, however, to allow programs like df, w, and newfs, to run to completion with correct results. Portions of this code were based on the sparc fpe and on initial work by gwr. 1995-11-03 07:46:55 +03:00			`#include <sys/types.h>`

			`#include <machine/reg.h>`

			`#include "fpu_arith.h"`
			`#include "fpu_emulate.h"`

			`/*`
			`* The multiplication algorithm for normal numbers is as follows:`
			`*`
			`* The fraction of the product is built in the usual stepwise fashion.`
			`* Each step consists of shifting the accumulator right one bit`
			`* (maintaining any guard bits) and, if the next bit in y is set,`
			`* adding the multiplicand (x) to the accumulator. Then, in any case,`
			`* we advance one bit leftward in y. Algorithmically:`
			`*`
			`* A = 0;`
			`* for (bit = 0; bit < FP_NMANT; bit++) {`
			`* sticky \|= A & 1, A >>= 1;`
			`* if (Y & (1 << bit))`
			`* A += X;`
			`* }`
			`*`
			`* (X and Y here represent the mantissas of x and y respectively.)`
			`* The resultant accumulator (A) is the product's mantissa. It may`
			`* be as large as 11.11111... in binary and hence may need to be`
			`* shifted right, but at most one bit.`
			`*`
			`* Since we do not have efficient multiword arithmetic, we code the`
			`* accumulator as four separate words, just like any other mantissa.`
			* We use local `register' variables in the hope that this is faster
			`* than memory. We keep x->fp_mant in locals for the same reason.`
			`*`
			`* In the algorithm above, the bits in y are inspected one at a time.`
			`* We will pick them up 32 at a time and then deal with those 32, one`
			`* at a time. Note, however, that we know several things about y:`
			`*`
			`* - the guard and round bits at the bottom are sure to be zero;`
			`*`
			`* - often many low bits are zero (y is often from a single or double`
			`* precision source);`
			`*`
			`* - bit FP_NMANT-1 is set, and FP_1*2 fits in a word.`
			`*`
			`* We can also test for 32-zero-bits swiftly. In this case, the center`
			`* part of the loop---setting sticky, shifting A, and not adding---will`
			`* run 32 times without adding X to A. We can do a 32-bit shift faster`
			`* by simply moving words. Since zeros are common, we optimize this case.`
			`* Furthermore, since A is initially zero, we can omit the shift as well`
			`* until we reach a nonzero word.`
			`*/`
			`struct fpn *`
			`fpu_mul(fe)`
			`register struct fpemu *fe;`
			`{`
			`register struct fpn x = &fe->fe_f1, y = &fe->fe_f2;`
PR 7220 from Ken Nakata: I've fixed most (not all) m68k FPE bugs that give bogus calculation results, esp. fsqrt instruction. Also, the internal FP representation has been reduced from 115-bit mantissa to 67-bit mantissa which reduced the required mantissa operation roughly by one fourth. I've done an extensive (though not exhaustive - it's impossible!) test on the internal routines by feeding them randomly generated FP numbers, and found that the new code is more precise than MC68040 FPU (it seems to have a rounding bug). [ Only change was to keep fpu_calcea.c's name instead of renaming to fpu_ea.c in Ken's patch. --akb ] 1999-05-31 00:17:48 +04:00			`register u_int a2, a1, a0, x2, x1, x0, bit, m;`
Still incomplete, but much more complete FPE from Ken Nakata <kenn@remus.rutgers.edu>. This emulator does not yet emulate the following functions: FSINH, FETOXM1, FTANH, FATAN, FASIN, FATANH, FSIN, FTAN, FETOX, FTWOTOX, FTENTOX, FCOSH, FACOS, FCOS, FSINCOS It is sufficient, however, to allow programs like df, w, and newfs, to run to completion with correct results. Portions of this code were based on the sparc fpe and on initial work by gwr. 1995-11-03 07:46:55 +03:00			`register int sticky;`
			`FPU_DECL_CARRY`

			`/*`
			* Put the `heavier' operand on the right (see fpu_emu.h).
			`* Then we will have one of the following cases, taken in the`
			`* following order:`
			`*`
			`* - y = NaN. Implied: if only one is a signalling NaN, y is.`
			`* The result is y.`
			`* - y = Inf. Implied: x != NaN (is 0, number, or Inf: the NaN`
			`* case was taken care of earlier).`
			`* If x = 0, the result is NaN. Otherwise the result`
			`* is y, with its sign reversed if x is negative.`
			`* - x = 0. Implied: y is 0 or number.`
			`* The result is 0 (with XORed sign as usual).`
			`* - other. Implied: both x and y are numbers.`
			`* The result is x * y (XOR sign, multiply bits, add exponents).`
			`*/`
			`ORDER(x, y);`
			`if (ISNAN(y)) {`
			`y->fp_sign ^= x->fp_sign;`
			`return (y);`
			`}`
			`if (ISINF(y)) {`
			`if (ISZERO(x))`
			`return (fpu_newnan(fe));`
			`y->fp_sign ^= x->fp_sign;`
			`return (y);`
			`}`
			`if (ISZERO(x)) {`
			`x->fp_sign ^= y->fp_sign;`
			`return (x);`
			`}`

			`/*`
			* Setup. In the code below, the mask `m' will hold the current
			* mantissa byte from y. The variable `bit' denotes the bit
			`* within m. We also define some macros to deal with everything.`
			`*/`
			`x2 = x->fp_mant[2];`
			`x1 = x->fp_mant[1];`
			`x0 = x->fp_mant[0];`
PR 7220 from Ken Nakata: I've fixed most (not all) m68k FPE bugs that give bogus calculation results, esp. fsqrt instruction. Also, the internal FP representation has been reduced from 115-bit mantissa to 67-bit mantissa which reduced the required mantissa operation roughly by one fourth. I've done an extensive (though not exhaustive - it's impossible!) test on the internal routines by feeding them randomly generated FP numbers, and found that the new code is more precise than MC68040 FPU (it seems to have a rounding bug). [ Only change was to keep fpu_calcea.c's name instead of renaming to fpu_ea.c in Ken's patch. --akb ] 1999-05-31 00:17:48 +04:00			`sticky = a2 = a1 = a0 = 0;`
Still incomplete, but much more complete FPE from Ken Nakata <kenn@remus.rutgers.edu>. This emulator does not yet emulate the following functions: FSINH, FETOXM1, FTANH, FATAN, FASIN, FATANH, FSIN, FTAN, FETOX, FTWOTOX, FTENTOX, FCOSH, FACOS, FCOS, FSINCOS It is sufficient, however, to allow programs like df, w, and newfs, to run to completion with correct results. Portions of this code were based on the sparc fpe and on initial work by gwr. 1995-11-03 07:46:55 +03:00
			`#define ADD /* A += X */ \`
PR 7220 from Ken Nakata: I've fixed most (not all) m68k FPE bugs that give bogus calculation results, esp. fsqrt instruction. Also, the internal FP representation has been reduced from 115-bit mantissa to 67-bit mantissa which reduced the required mantissa operation roughly by one fourth. I've done an extensive (though not exhaustive - it's impossible!) test on the internal routines by feeding them randomly generated FP numbers, and found that the new code is more precise than MC68040 FPU (it seems to have a rounding bug). [ Only change was to keep fpu_calcea.c's name instead of renaming to fpu_ea.c in Ken's patch. --akb ] 1999-05-31 00:17:48 +04:00			`FPU_ADDS(a2, a2, x2); \`
Still incomplete, but much more complete FPE from Ken Nakata <kenn@remus.rutgers.edu>. This emulator does not yet emulate the following functions: FSINH, FETOXM1, FTANH, FATAN, FASIN, FATANH, FSIN, FTAN, FETOX, FTWOTOX, FTENTOX, FCOSH, FACOS, FCOS, FSINCOS It is sufficient, however, to allow programs like df, w, and newfs, to run to completion with correct results. Portions of this code were based on the sparc fpe and on initial work by gwr. 1995-11-03 07:46:55 +03:00			`FPU_ADDCS(a1, a1, x1); \`
			`FPU_ADDC(a0, a0, x0)`

			`#define SHR1 /* A >>= 1, with sticky */ \`
PR 7220 from Ken Nakata: I've fixed most (not all) m68k FPE bugs that give bogus calculation results, esp. fsqrt instruction. Also, the internal FP representation has been reduced from 115-bit mantissa to 67-bit mantissa which reduced the required mantissa operation roughly by one fourth. I've done an extensive (though not exhaustive - it's impossible!) test on the internal routines by feeding them randomly generated FP numbers, and found that the new code is more precise than MC68040 FPU (it seems to have a rounding bug). [ Only change was to keep fpu_calcea.c's name instead of renaming to fpu_ea.c in Ken's patch. --akb ] 1999-05-31 00:17:48 +04:00			`sticky \|= a2 & 1, \`
Still incomplete, but much more complete FPE from Ken Nakata <kenn@remus.rutgers.edu>. This emulator does not yet emulate the following functions: FSINH, FETOXM1, FTANH, FATAN, FASIN, FATANH, FSIN, FTAN, FETOX, FTWOTOX, FTENTOX, FCOSH, FACOS, FCOS, FSINCOS It is sufficient, however, to allow programs like df, w, and newfs, to run to completion with correct results. Portions of this code were based on the sparc fpe and on initial work by gwr. 1995-11-03 07:46:55 +03:00			`a2 = (a2 >> 1) \| (a1 << 31), a1 = (a1 >> 1) \| (a0 << 31), a0 >>= 1`

			`#define SHR32 /* A >>= 32, with sticky */ \`
PR 7220 from Ken Nakata: I've fixed most (not all) m68k FPE bugs that give bogus calculation results, esp. fsqrt instruction. Also, the internal FP representation has been reduced from 115-bit mantissa to 67-bit mantissa which reduced the required mantissa operation roughly by one fourth. I've done an extensive (though not exhaustive - it's impossible!) test on the internal routines by feeding them randomly generated FP numbers, and found that the new code is more precise than MC68040 FPU (it seems to have a rounding bug). [ Only change was to keep fpu_calcea.c's name instead of renaming to fpu_ea.c in Ken's patch. --akb ] 1999-05-31 00:17:48 +04:00			`sticky \|= a2, a2 = a1, a1 = a0, a0 = 0`
Still incomplete, but much more complete FPE from Ken Nakata <kenn@remus.rutgers.edu>. This emulator does not yet emulate the following functions: FSINH, FETOXM1, FTANH, FATAN, FASIN, FATANH, FSIN, FTAN, FETOX, FTWOTOX, FTENTOX, FCOSH, FACOS, FCOS, FSINCOS It is sufficient, however, to allow programs like df, w, and newfs, to run to completion with correct results. Portions of this code were based on the sparc fpe and on initial work by gwr. 1995-11-03 07:46:55 +03:00
			`#define STEP /* each 1-bit step of the multiplication */ \`
			`SHR1; if (bit & m) { ADD; }; bit <<= 1`

			`/*`
			`* We are ready to begin. The multiply loop runs once for each`
			`* of the four 32-bit words. Some words, however, are special.`
			`* As noted above, the low order bits of Y are often zero. Even`
			`* if not, the first loop can certainly skip the guard bits.`
			`* The last word of y has its highest 1-bit in position FP_NMANT-1,`
			`* so we stop the loop when we move past that bit.`
			`*/`
PR 7220 from Ken Nakata: I've fixed most (not all) m68k FPE bugs that give bogus calculation results, esp. fsqrt instruction. Also, the internal FP representation has been reduced from 115-bit mantissa to 67-bit mantissa which reduced the required mantissa operation roughly by one fourth. I've done an extensive (though not exhaustive - it's impossible!) test on the internal routines by feeding them randomly generated FP numbers, and found that the new code is more precise than MC68040 FPU (it seems to have a rounding bug). [ Only change was to keep fpu_calcea.c's name instead of renaming to fpu_ea.c in Ken's patch. --akb ] 1999-05-31 00:17:48 +04:00			`if ((m = y->fp_mant[2]) == 0) {`
Still incomplete, but much more complete FPE from Ken Nakata <kenn@remus.rutgers.edu>. This emulator does not yet emulate the following functions: FSINH, FETOXM1, FTANH, FATAN, FASIN, FATANH, FSIN, FTAN, FETOX, FTWOTOX, FTENTOX, FCOSH, FACOS, FCOS, FSINCOS It is sufficient, however, to allow programs like df, w, and newfs, to run to completion with correct results. Portions of this code were based on the sparc fpe and on initial work by gwr. 1995-11-03 07:46:55 +03:00			`/* SHR32; / / unneeded since A==0 */`
			`} else {`
			`bit = 1 << FP_NG;`
			`do {`
			`STEP;`
			`} while (bit != 0);`
			`}`
			`if ((m = y->fp_mant[1]) == 0) {`
			`SHR32;`
			`} else {`
			`bit = 1;`
			`do {`
			`STEP;`
			`} while (bit != 0);`
			`}`
			`m = y->fp_mant[0]; /* definitely != 0 */`
			`bit = 1;`
			`do {`
			`STEP;`
			`} while (bit <= m);`

			`/*`
			`* Done with mantissa calculation. Get exponent and handle`
			`* 11.111...1 case, then put result in place. We reuse x since`
			`* it already has the right class (FP_NUM).`
			`*/`
			`m = x->fp_exp + y->fp_exp;`
			`if (a0 >= FP_2) {`
			`SHR1;`
			`m++;`
			`}`
			`x->fp_sign ^= y->fp_sign;`
			`x->fp_exp = m;`
			`x->fp_sticky = sticky;`
			`x->fp_mant[2] = a2;`
			`x->fp_mant[1] = a1;`
			`x->fp_mant[0] = a0;`
			`return (x);`
			`}`