diff --git a/sys/arch/sparc/fpu/fpu.c b/sys/arch/sparc/fpu/fpu.c
index 5f798c49f69f..ecef6fa38836 100644
--- a/sys/arch/sparc/fpu/fpu.c
+++ b/sys/arch/sparc/fpu/fpu.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: fpu.c,v 1.6 1997/07/29 10:09:51 fair Exp $ */
+/*	$NetBSD: fpu.c,v 1.7 2000/06/18 06:54:17 mrg Exp $ */
 
 /*
  * Copyright (c) 1992, 1993
@@ -98,7 +98,11 @@ static u_char fpu_codes[] = {
 void
 fpu_cleanup(p, fs)
 	register struct proc *p;
+#ifndef SUN4U
 	register struct fpstate *fs;
+#else /* SUN4U */
+	register struct fpstate64 *fs;
+#endif /* SUN4U */
 {
 	register int i, fsr = fs->fs_fsr, error;
 	union instr instr;
@@ -107,7 +111,7 @@ fpu_cleanup(p, fs)
 	switch ((fsr >> FSR_FTT_SHIFT) & FSR_FTT_MASK) {
 
 	case FSR_TT_NONE:
-		panic("fpu_cleanup 1");	/* ??? */
+		panic("fpu_cleanup: No fault");	/* ??? */
 		break;
 
 	case FSR_TT_IEEE:
@@ -118,9 +122,18 @@ fpu_cleanup(p, fs)
 		break;		/* XXX should return, but queue remains */
 
 	case FSR_TT_UNFIN:
+#ifdef SUN4U
+		if (fs->fs_qsize == 0) {
+			printf("fpu_cleanup: unfinished fpop");
+			/* The book sez reexecute or emulate. */
+			return;
+		}
+		break;
+
+#endif /* SUN4U */
 	case FSR_TT_UNIMP:
 		if (fs->fs_qsize == 0)
-			panic("fpu_cleanup 2");
+			panic("fpu_cleanup: unimplemented fpop");
 		break;
 
 	case FSR_TT_SEQ:
@@ -159,6 +172,12 @@ fpu_cleanup(p, fs)
 			break;
 
 		case NOTFPU:
+#ifdef SUN4U
+#ifdef DEBUG
+			printf("fpu_cleanup: not an FPU error -- sending SIGILL\n", p);
+			Debugger();
+#endif
+#endif /* SUN4U */
 			trapsignal(p, SIGILL, 0);	/* ??? code?  */
 			break;
 
@@ -183,7 +202,11 @@ out:
 fpu_emulate(p, tf, fs)
 	struct proc *p;
 	register struct trapframe *tf;
+#ifndef SUN4U
 	register struct fpstate *fs;
+#else /* SUN4U */
+	register struct fpstate64 *fs;
+#endif /* SUN4U */
 {
 
 	do {
@@ -234,8 +257,13 @@ fpu_execute(fe, instr)
 	union instr instr;
 {
 	register struct fpn *fp;
+#ifndef SUN4U
 	register int opf, rs1, rs2, rd, type, mask, fsr, cx;
 	register struct fpstate *fs;
+#else /* SUN4U */
+	register int opf, rs1, rs2, rd, type, mask, fsr, cx, i, cond;
+	register struct fpstate64 *fs;
+#endif /* SUN4U */
 	u_int space[4];
 
 	/*
@@ -256,6 +284,119 @@ fpu_execute(fe, instr)
 	fs = fe->fe_fpstate;
 	fe->fe_fsr = fs->fs_fsr & ~FSR_CX;
 	fe->fe_cx = 0;
+#ifdef SUN4U
+	/*
+	 * Check to see if we're dealing with a fancy cmove and handle
+	 * it first.
+	 */
+	if (instr.i_op3.i_op3 == IOP3_FPop2 && (opf&0xff0) != (FCMP&0xff0)) {
+		switch (opf >>= 2) {
+		case FMVFC0 >> 2:
+			cond = (fs->fs_fsr>>FSR_FCC_SHIFT)&FSR_FCC_MASK;
+			if (instr.i_fmovcc.i_cond != cond) return(0); /* success */
+			rs1 = fs->fs_regs[rs2];
+			goto mov;
+		case FMVFC1 >> 2:
+			cond = (fs->fs_fsr>>FSR_FCC1_SHIFT)&FSR_FCC_MASK;
+			if (instr.i_fmovcc.i_cond != cond) return(0); /* success */
+			rs1 = fs->fs_regs[rs2];
+			goto mov;
+		case FMVFC2 >> 2:
+			cond = (fs->fs_fsr>>FSR_FCC2_SHIFT)&FSR_FCC_MASK;
+			if (instr.i_fmovcc.i_cond != cond) return(0); /* success */
+			rs1 = fs->fs_regs[rs2];
+			goto mov;
+		case FMVFC3 >> 2:
+			cond = (fs->fs_fsr>>FSR_FCC3_SHIFT)&FSR_FCC_MASK;
+			if (instr.i_fmovcc.i_cond != cond) return(0); /* success */
+			rs1 = fs->fs_regs[rs2];
+			goto mov;
+		case FMVIC >> 2:
+			/* Presume we're curproc */
+			cond = (curproc->p_md.md_tf->tf_tstate>>TSTATE_CCR_SHIFT)&PSR_ICC;
+			if (instr.i_fmovcc.i_cond != cond) return(0); /* success */
+			rs1 = fs->fs_regs[rs2];
+			goto mov;			
+		case FMVXC >> 2:
+			/* Presume we're curproc */
+			cond = (curproc->p_md.md_tf->tf_tstate>>(TSTATE_CCR_SHIFT+XCC_SHIFT))&PSR_ICC;
+			if (instr.i_fmovcc.i_cond != cond) return(0); /* success */
+			rs1 = fs->fs_regs[rs2];
+			goto mov;			
+		case FMVRZ >> 2:
+			/* Presume we're curproc */
+			rs1 = instr.i_fmovr.i_rs1;
+			if (rs1 != 0 && (int64_t)curproc->p_md.md_tf->tf_global[rs1] != 0)
+				return (0); /* success */
+			rs1 = fs->fs_regs[rs2];
+			goto mov;			
+		case FMVRLEZ >> 2:
+			/* Presume we're curproc */
+			rs1 = instr.i_fmovr.i_rs1;
+			if (rs1 != 0 && (int64_t)curproc->p_md.md_tf->tf_global[rs1] > 0)
+				return (0); /* success */
+			rs1 = fs->fs_regs[rs2];
+			goto mov;			
+		case FMVRLZ >> 2:
+			/* Presume we're curproc */
+			rs1 = instr.i_fmovr.i_rs1;
+			if (rs1 == 0 || (int64_t)curproc->p_md.md_tf->tf_global[rs1] >= 0)
+				return (0); /* success */
+			rs1 = fs->fs_regs[rs2];
+			goto mov;			
+		case FMVRNZ >> 2:
+			/* Presume we're curproc */
+			rs1 = instr.i_fmovr.i_rs1;
+			if (rs1 == 0 || (int64_t)curproc->p_md.md_tf->tf_global[rs1] == 0)
+				return (0); /* success */
+			rs1 = fs->fs_regs[rs2];
+			goto mov;			
+		case FMVRGZ >> 2:
+			/* Presume we're curproc */
+			rs1 = instr.i_fmovr.i_rs1;
+			if (rs1 == 0 || (int64_t)curproc->p_md.md_tf->tf_global[rs1] <= 0)
+				return (0); /* success */
+			rs1 = fs->fs_regs[rs2];
+			goto mov;			
+		case FMVRGEZ >> 2:
+			/* Presume we're curproc */
+			rs1 = instr.i_fmovr.i_rs1;
+			if (rs1 != 0 && (int64_t)curproc->p_md.md_tf->tf_global[rs1] < 0)
+				return (0); /* success */
+			rs1 = fs->fs_regs[rs2];
+			goto mov;			
+		case FCMP >> 2:
+			fpu_explode(fe, &fe->fe_f1, type, rs1);
+			fpu_explode(fe, &fe->fe_f2, type, rs2);
+			fpu_compare(fe, 0);
+			goto cmpdone;
+			
+		case FCMPE >> 2:
+			fpu_explode(fe, &fe->fe_f1, type, rs1);
+			fpu_explode(fe, &fe->fe_f2, type, rs2);
+			fpu_compare(fe, 1);
+		cmpdone:
+			/*
+			 * The only possible exception here is NV; catch it
+			 * early and get out, as there is no result register.
+			 */
+			cx = fe->fe_cx;
+			fsr = fe->fe_fsr | (cx << FSR_CX_SHIFT);
+			if (cx != 0) {
+				if (fsr & (FSR_NV << FSR_TEM_SHIFT)) {
+					fs->fs_fsr = (fsr & ~FSR_FTT) |
+						(FSR_TT_IEEE << FSR_FTT_SHIFT);
+					return (FPE);
+				}
+				fsr |= FSR_NV << FSR_AX_SHIFT;
+			}
+			fs->fs_fsr = fsr;
+			return (0);
+		default:
+			return (NOTFPU);
+		}
+	}
+#endif /* SUN4U */
 	switch (opf >>= 2) {
 
 	default:
@@ -272,7 +413,14 @@ fpu_execute(fe, instr)
 	case FABS >> 2:
 		rs1 = fs->fs_regs[rs2] & ~(1 << 31);
 	mov:
+#ifndef SUN4U
 		fs->fs_regs[rd] = rs1;
+#else /* SUN4U */
+		i = 1<<type;
+		fs->fs_regs[rd++] = rs1;
+		while (--i) 
+			fs->fs_regs[rd++] = fs->fs_regs[++rs2];
+#endif /* SUN4U */
 		fs->fs_fsr = fe->fe_fsr;
 		return (0);	/* success */
 
@@ -305,6 +453,7 @@ fpu_execute(fe, instr)
 		fp = fpu_div(fe);
 		break;
 
+#ifndef SUN4U
 	case FCMP >> 2:
 		fpu_explode(fe, &fe->fe_f1, type, rs1);
 		fpu_explode(fe, &fe->fe_f2, type, rs2);
@@ -333,6 +482,7 @@ fpu_execute(fe, instr)
 		fs->fs_fsr = fsr;
 		return (0);
 
+#endif /* not SUN4U */
 	case FSMULD >> 2:
 	case FDMULX >> 2:
 		if (type == FTYPE_EXT)
@@ -343,9 +493,27 @@ fpu_execute(fe, instr)
 		fp = fpu_mul(fe);
 		break;
 
+#ifdef SUN4U
+	case FXTOS >> 2:
+	case FXTOD >> 2:
+	case FXTOQ >> 2:
+		type = FTYPE_LNG;
+		fpu_explode(fe, fp = &fe->fe_f1, type, rs2);
+		type = opf & 3;	/* sneaky; depends on instruction encoding */
+		break;
+
+	case FTOX >> 2:
+		fpu_explode(fe, fp = &fe->fe_f1, type, rs2);
+		type = FTYPE_LNG;
+#endif /* SUN4U */
+
 	case FTOS >> 2:
 	case FTOD >> 2:
+#ifndef SUN4U
 	case FTOX >> 2:
+#else /* SUN4U */
+	case FTOQ >> 2:
+#endif /* SUN4U */
 	case FTOI >> 2:
 		fpu_explode(fe, fp = &fe->fe_f1, type, rs2);
 		type = opf & 3;	/* sneaky; depends on instruction encoding */
@@ -374,7 +542,11 @@ fpu_execute(fe, instr)
 	}
 	fs->fs_fsr = fsr;
 	fs->fs_regs[rd] = space[0];
+#ifndef SUN4U
 	if (type >= FTYPE_DBL) {
+#else /* SUN4U */
+	if (type >= FTYPE_DBL || type == FTYPE_LNG) {
+#endif /* SUN4U */
 		fs->fs_regs[rd + 1] = space[1];
 		if (type > FTYPE_DBL) {
 			fs->fs_regs[rd + 2] = space[2];
diff --git a/sys/arch/sparc/fpu/fpu_emu.h b/sys/arch/sparc/fpu/fpu_emu.h
index 5b2b317c3838..00154e49197b 100644
--- a/sys/arch/sparc/fpu/fpu_emu.h
+++ b/sys/arch/sparc/fpu/fpu_emu.h
@@ -1,4 +1,4 @@
-/*	$NetBSD: fpu_emu.h,v 1.2 1994/11/20 20:52:39 deraadt Exp $ */
+/*	$NetBSD: fpu_emu.h,v 1.3 2000/06/18 06:54:17 mrg Exp $ */
 
 /*
  * Copyright (c) 1992, 1993
@@ -90,6 +90,7 @@ struct fpn {
 #define	FP_NMANT	115		/* total bits in mantissa (incl g,r) */
 #define	FP_NG		2		/* number of low-order guard bits */
 #define	FP_LG		((FP_NMANT - 1) & 31)	/* log2(1.0) for fp_mant[0] */
+#define	FP_LG2		((FP_NMANT - 1) & 63)	/* log2(1.0) for fp_mant[0] and fp_mant[1] */
 #define	FP_QUIETBIT	(1 << (FP_LG - 1))	/* Quiet bit in NaNs (0.5) */
 #define	FP_1		(1 << FP_LG)		/* 1.0 in fp_mant[0] */
 #define	FP_2		(1 << (FP_LG + 1))	/* 2.0 in fp_mant[0] */
@@ -138,7 +139,11 @@ struct fpn {
  * Emulator state.
  */
 struct fpemu {
+#ifndef SUN4U
 	struct	fpstate *fe_fpstate;	/* registers, etc */
+#else /* SUN4U */
+	struct	fpstate64 *fe_fpstate;	/* registers, etc */
+#endif /* SUN4U */
 	int	fe_fsr;			/* fsr copy (modified during op) */
 	int	fe_cx;			/* exceptions */
 	struct	fpn fe_f1;		/* operand 1 */
diff --git a/sys/arch/sparc/fpu/fpu_explode.c b/sys/arch/sparc/fpu/fpu_explode.c
index ceaa896c02df..5a1400b33f6a 100644
--- a/sys/arch/sparc/fpu/fpu_explode.c
+++ b/sys/arch/sparc/fpu/fpu_explode.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: fpu_explode.c,v 1.3 1996/03/14 19:41:54 christos Exp $ */
+/*	$NetBSD: fpu_explode.c,v 1.4 2000/06/18 06:54:17 mrg Exp $ */
 
 /*
  * Copyright (c) 1992, 1993
@@ -105,6 +105,33 @@ fpu_itof(fp, i)
 	return (FPC_NUM);
 }
 
+#ifdef SUN4U
+/*
+ * 64-bit int -> fpn.
+ */
+int
+fpu_xitof(fp, i)
+	register struct fpn *fp;
+	register u_int64_t i;
+{
+
+	if (i == 0)
+		return (FPC_ZERO);
+	/*
+	 * The value FP_1 represents 2^FP_LG, so set the exponent
+	 * there and let normalization fix it up.  Convert negative
+	 * numbers to sign-and-magnitude.  Note that this relies on
+	 * fpu_norm()'s handling of `supernormals'; see fpu_subr.c.
+	 */
+	fp->fp_exp = FP_LG2;
+	*((int64_t*)fp->fp_mant) = (int64_t)i < 0 ? -i : i;
+	fp->fp_mant[2] = 0;
+	fp->fp_mant[3] = 0;
+	fpu_norm(fp);
+	return (FPC_NUM);
+}
+#endif /* SUN4U */
+
 #define	mask(nbits) ((1 << (nbits)) - 1)
 
 /*
@@ -220,12 +247,22 @@ fpu_explode(fe, fp, type, reg)
 	int type, reg;
 {
 	register u_int s, *space;
+#ifdef SUN4U
+	u_int64_t l, *xspace;
 
+	xspace = (u_int64_t *)&fe->fe_fpstate->fs_regs[reg & ~1];
+	l = xspace[0];
+#endif /* SUN4U */
 	space = &fe->fe_fpstate->fs_regs[reg];
 	s = space[0];
 	fp->fp_sign = s >> 31;
 	fp->fp_sticky = 0;
 	switch (type) {
+#ifdef SUN4U
+	case FTYPE_LNG:
+		s = fpu_xitof(fp, l);
+		break;
+#endif /* SUN4U */
 
 	case FTYPE_INT:
 		s = fpu_itof(fp, s);
diff --git a/sys/arch/sparc/fpu/fpu_extern.h b/sys/arch/sparc/fpu/fpu_extern.h
index 460fbb169d4f..7a174f228bdf 100644
--- a/sys/arch/sparc/fpu/fpu_extern.h
+++ b/sys/arch/sparc/fpu/fpu_extern.h
@@ -1,4 +1,4 @@
-/*	$NetBSD: fpu_extern.h,v 1.2 1998/09/05 15:28:08 christos Exp $	*/
+/*	$NetBSD: fpu_extern.h,v 1.3 2000/06/18 06:54:17 mrg Exp $	*/
 
 /*-
  * Copyright (c) 1995 The NetBSD Foundation, Inc.
@@ -37,15 +37,25 @@
  */
 
 struct proc;
+#ifndef SUN4U
 struct fpstate;
 struct trapframe;
+#else /* SUN4U */
+struct fpstate64;
+struct trapframe64;
+#endif /* SUN4U */
 union instr;
 struct fpemu;
 struct fpn;
 
 /* fpu.c */
+#ifndef SUN4U
 void fpu_cleanup __P((struct proc *, struct fpstate *));
 int fpu_emulate __P((struct proc *, struct trapframe *, struct fpstate *));
+#else /* SUN4U */
+void fpu_cleanup __P((struct proc *, struct fpstate64 *));
+int fpu_emulate __P((struct proc *, struct trapframe64 *, struct fpstate64 *));
+#endif /* SUN4U */
 int fpu_execute __P((struct fpemu *, union instr));
 
 /* fpu_add.c */
@@ -59,6 +69,9 @@ struct fpn *fpu_div __P((struct fpemu *));
 
 /* fpu_explode.c */
 int fpu_itof __P((struct fpn *, u_int));
+#ifdef SUN4U
+int fpu_xitof __P((struct fpn *, u_int64_t));
+#endif /* SUN4U */
 int fpu_stof __P((struct fpn *, u_int));
 int fpu_dtof __P((struct fpn *, u_int, u_int ));
 int fpu_xtof __P((struct fpn *, u_int, u_int , u_int , u_int ));
@@ -66,6 +79,9 @@ void fpu_explode __P((struct fpemu *, struct fpn *, int, int ));
 
 /* fpu_implode.c */
 u_int fpu_ftoi __P((struct fpemu *, struct fpn *));
+#ifdef SUN4U
+u_int fpu_ftoxi __P((struct fpemu *, struct fpn *, u_int *));
+#endif /* SUN4U */
 u_int fpu_ftos __P((struct fpemu *, struct fpn *));
 u_int fpu_ftod __P((struct fpemu *, struct fpn *, u_int *));
 u_int fpu_ftox __P((struct fpemu *, struct fpn *, u_int *));
diff --git a/sys/arch/sparc/fpu/fpu_implode.c b/sys/arch/sparc/fpu/fpu_implode.c
index 5e4b4fd1576c..b01e8ad423bc 100644
--- a/sys/arch/sparc/fpu/fpu_implode.c
+++ b/sys/arch/sparc/fpu/fpu_implode.c
@@ -1,4 +1,4 @@
-/*	$NetBSD: fpu_implode.c,v 1.4 2000/04/12 04:06:41 chs Exp $ */
+/*	$NetBSD: fpu_implode.c,v 1.5 2000/06/18 06:54:17 mrg Exp $ */
 
 /*
  * Copyright (c) 1992, 1993
@@ -233,6 +233,59 @@ fpu_ftoi(fe, fp)
 	return (0x7fffffff + sign);
 }
 
+#ifdef SUN4U
+/*
+ * fpn -> extended int (high bits of int value returned as return value).
+ *
+ * N.B.: this conversion always rounds towards zero (this is a peculiarity
+ * of the SPARC instruction set).
+ */
+u_int
+fpu_ftoxi(fe, fp, res)
+	struct fpemu *fe;
+	register struct fpn *fp;
+	u_int *res;
+{
+	register u_int64_t i;
+	register int sign, exp;
+
+	sign = fp->fp_sign;
+	switch (fp->fp_class) {
+
+	case FPC_ZERO:
+		res[1] = 0;
+		return (0);
+
+	case FPC_NUM:
+		/*
+		 * If exp >= 2^64, overflow.  Otherwise shift value right
+		 * into last mantissa word (this will not exceed 0xffffffffffffffff),
+		 * shifting any guard and round bits out into the sticky
+		 * bit.  Then ``round'' towards zero, i.e., just set an
+		 * inexact exception if sticky is set (see round()).
+		 * If the result is > 0x8000000000000000, or is positive and equals
+		 * 0x8000000000000000, overflow; otherwise the last fraction word
+		 * is the result.
+		 */
+		if ((exp = fp->fp_exp) >= 64)
+			break;
+		/* NB: the following includes exp < 0 cases */
+		if (fpu_shr(fp, FP_NMANT - 1 - exp) != 0)
+			fe->fe_cx |= FSR_NX;
+		i = (fp->fp_mant[2]<<32)|fp->fp_mant[3];
+		if (i >= ((u_int64_t)0x8000000000000000LL + sign))
+			break;
+		return (sign ? -i : i);
+
+	default:		/* Inf, qNaN, sNaN */
+		break;
+	}
+	/* overflow: replace any inexact exception with invalid */
+	fe->fe_cx = (fe->fe_cx & ~FSR_NX) | FSR_NV;
+	return (0x7fffffffffffffffLL + sign);
+}
+#endif /* SUN4U */
+
 /*
  * fpn -> single (32 bit single returned as return value).
  * We assume <= 29 bits in a single-precision fraction (1.f part).
@@ -452,6 +505,12 @@ fpu_implode(fe, fp, type, space)
 
 	switch (type) {
 
+#ifdef SUN4U
+	case FTYPE_LNG:
+		space[0] = fpu_ftoxi(fe, fp, space);
+		break;
+#endif /* SUN4U */
+
 	case FTYPE_INT:
 		space[0] = fpu_ftoi(fe, fp);
 		break;