Improve FPU state save/clear like x86, idea from OpenBSD.

This commit is contained in:
nakayama 2008-03-14 15:39:18 +00:00
parent 74a205f37a
commit 5374009247
5 changed files with 119 additions and 108 deletions

View File

@ -1,4 +1,4 @@
/* $NetBSD: emul.c,v 1.19 2007/03/04 06:00:50 christos Exp $ */
/* $NetBSD: emul.c,v 1.20 2008/03/14 15:39:18 nakayama Exp $ */
/*-
* Copyright (c) 1997, 2001 The NetBSD Foundation, Inc.
@ -37,7 +37,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: emul.c,v 1.19 2007/03/04 06:00:50 christos Exp $");
__KERNEL_RCSID(0, "$NetBSD: emul.c,v 1.20 2008/03/14 15:39:18 nakayama Exp $");
#include <sys/param.h>
#include <sys/systm.h>
@ -305,10 +305,7 @@ fixalign(struct lwp *l, struct trapframe64 *tf)
if (op.bits.st) {
if (op.bits.fl) {
if (l == fplwp) {
savefpstate(l->l_md.md_fpstate);
fplwp = NULL;
}
fpusave_lwp(l, true);
error = readfpreg(l, code.i_op3.i_rd, &data.i[0]);
if (error)

View File

@ -1,4 +1,4 @@
/* $NetBSD: machdep.c,v 1.216 2008/02/22 10:55:00 martin Exp $ */
/* $NetBSD: machdep.c,v 1.217 2008/03/14 15:39:18 nakayama Exp $ */
/*-
* Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
@ -78,7 +78,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.216 2008/02/22 10:55:00 martin Exp $");
__KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.217 2008/03/14 15:39:18 nakayama Exp $");
#include "opt_ddb.h"
#include "opt_multiprocessor.h"
@ -289,10 +289,7 @@ setregs(struct lwp *l, struct exec_package *pack, vaddr_t stack)
* we must get rid of it, and the only way to do that is
* to save it. In any case, get rid of our FPU state.
*/
if (l == fplwp) {
savefpstate(fs);
fplwp = NULL;
}
fpusave_lwp(l, false);
free((void *)fs, M_SUBPROC);
l->l_md.md_fpstate = NULL;
}
@ -520,8 +517,7 @@ sendsig_siginfo(const ksiginfo_t *ksi, const sigset_t *mask)
newsp = (struct rwindow *)((u_long)fp - CCFSZ);
error = (copyout(&ksi->ksi_info, &fp->sf_si, sizeof(ksi->ksi_info)) != 0 ||
copyout(&uc, &fp->sf_uc, ucsz) != 0 ||
copyout(&tf->tf_out[6], &newsp->rw_in[6],
sizeof(tf->tf_out[6])) != 0);
suword(&newsp->rw_in[6], (uintptr_t)tf->tf_out[6]) != 0);
mutex_enter(&p->p_smutex);
if (error) {
@ -595,11 +591,6 @@ cpu_reboot(register int howto, char *user_boot_string)
}
(void) splhigh(); /* ??? */
#if defined(MULTIPROCESSOR)
/* Stop all secondary cpus */
mp_halt_cpus();
#endif
/* If rebooting and a dump is requested, do it. */
if (howto & RB_DUMP)
dumpsys();
@ -608,6 +599,11 @@ haltsys:
/* Run any shutdown hooks. */
doshutdownhooks();
#ifdef MULTIPROCESSOR
/* Stop all secondary cpus */
mp_halt_cpus();
#endif
/* If powerdown was requested, do it. */
if ((howto & RB_POWERDOWN) == RB_POWERDOWN) {
/* Let the OBP do the work. */
@ -619,12 +615,20 @@ haltsys:
}
if (howto & RB_HALT) {
#ifdef MULTIPROCESSOR
printf("cpu%d: halted\n\n", cpu_number());
#else
printf("halted\n\n");
#endif
OF_exit();
panic("PROM exit failed");
}
#ifdef MULTIPROCESSOR
printf("cpu%d: rebooting\n\n", cpu_number());
#else
printf("rebooting\n\n");
#endif
if (user_boot_string && *user_boot_string) {
i = strlen(user_boot_string);
if (i > sizeof(str))
@ -1826,7 +1830,7 @@ cpu_getmcontext(struct lwp *l, mcontext_t *mcp, unsigned int *flags)
/* Save FP register context, if any. */
if (l->l_md.md_fpstate != NULL) {
struct fpstate64 fs, *fsp;
struct fpstate64 *fsp;
__fpregset_t *fpr = &mcp->__fpregs;
/*
@ -1835,12 +1839,8 @@ cpu_getmcontext(struct lwp *l, mcontext_t *mcp, unsigned int *flags)
* with it later when it becomes necessary.
* Otherwise, get it from the process's save area.
*/
if (l == fplwp) {
fsp = &fs;
savefpstate(fsp);
} else {
fsp = l->l_md.md_fpstate;
}
fpusave_lwp(l, true);
fsp = l->l_md.md_fpstate;
memcpy(&fpr->__fpu_fr, fsp->fs_regs, sizeof (fpr->__fpu_fr));
mcp->__fpregs.__fpu_q = NULL; /* `Need more info.' */
mcp->__fpregs.__fpu_fsr = fsp->fs_fsr;
@ -1925,11 +1925,13 @@ cpu_setmcontext(struct lwp *l, const mcontext_t *mcp, unsigned int flags)
* by lazy FPU context switching); allocate it if necessary.
*/
if ((fsp = l->l_md.md_fpstate) == NULL) {
KERNEL_LOCK(1, l);
fsp = malloc(sizeof (*fsp), M_SUBPROC, M_WAITOK);
l->l_md.md_fpstate = fsp;
} else if (l == fplwp) {
KERNEL_UNLOCK_ONE(l);
} else {
/* Drop the live context on the floor. */
savefpstate(fsp);
fpusave_lwp(l, false);
}
/* Note: sizeof fpr->__fpu_fr <= sizeof fsp->fs_regs. */
memcpy(fsp->fs_regs, &fpr->__fpu_fr, sizeof (fpr->__fpu_fr));
@ -1970,7 +1972,7 @@ cpu_need_resched(struct cpu_info *ci, int flags)
#if defined(MULTIPROCESSOR)
/* Just interrupt the target CPU, so it can notice its AST */
if ((flags & RESCHED_IMMED) || ci->ci_index != cpu_number())
sparc64_send_ipi(ci->ci_cpuid, sparc64_ipi_nop, 0);
sparc64_send_ipi(ci->ci_cpuid, sparc64_ipi_nop, 0, 0);
#endif
}

View File

@ -1,4 +1,4 @@
/* $NetBSD: netbsd32_machdep.c,v 1.75 2008/02/25 10:00:45 nakayama Exp $ */
/* $NetBSD: netbsd32_machdep.c,v 1.76 2008/03/14 15:39:18 nakayama Exp $ */
/*
* Copyright (c) 1998, 2001 Matthew R. Green
@ -29,7 +29,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: netbsd32_machdep.c,v 1.75 2008/02/25 10:00:45 nakayama Exp $");
__KERNEL_RCSID(0, "$NetBSD: netbsd32_machdep.c,v 1.76 2008/03/14 15:39:18 nakayama Exp $");
#ifdef _KERNEL_OPT
#include "opt_compat_netbsd.h"
@ -136,10 +136,7 @@ netbsd32_setregs(struct lwp *l, struct exec_package *pack, u_long stack)
* we must get rid of it, and the only way to do that is
* to save it. In any case, get rid of our FPU state.
*/
if (l == fplwp) {
savefpstate(fs);
fplwp = NULL;
}
fpusave_lwp(l, false);
free((void *)fs, M_SUBPROC);
l->l_md.md_fpstate = NULL;
}
@ -711,10 +708,7 @@ cpu_coredump32(struct lwp *l, void *iocookie, struct core32 *chdr)
}
if (l->l_md.md_fpstate) {
if (l == fplwp) {
savefpstate(l->l_md.md_fpstate);
fplwp = NULL;
}
fpusave_lwp(l, true);
/* Copy individual fields */
for (i=0; i<32; i++)
md_core.md_fpstate.fs_regs[i] =
@ -792,7 +786,7 @@ netbsd32_cpu_getmcontext(l, mcp, flags)
/* Save FP register context, if any. */
if (l->l_md.md_fpstate != NULL) {
struct fpstate fs, *fsp;
struct fpstate *fsp;
netbsd32_fpregset_t *fpr = &mcp->__fpregs;
/*
@ -801,12 +795,8 @@ netbsd32_cpu_getmcontext(l, mcp, flags)
* with it later when it becomes necessary.
* Otherwise, get it from the process's save area.
*/
if (p == fplwp) {
fsp = &fs;
savefpstate(fsp);
} else {
fsp = l->l_md.md_fpstate;
}
fpusave_lwp(l, true);
fsp = l->l_md.md_fpstate;
memcpy(&fpr->__fpu_fr, fsp->fs_regs, sizeof (fpr->__fpu_fr));
mcp->__fpregs.__fpu_q = NULL; /* `Need more info.' */
mcp->__fpregs.__fpu_fsr = fs.fs_fsr;
@ -898,11 +888,13 @@ netbsd32_cpu_setmcontext(l, mcp, flags)
* XXX immediately or just fault it in later?
*/
if ((fsp = l->l_md.md_fpstate) == NULL) {
KERNEL_LOCK(1, l);
fsp = malloc(sizeof (*fsp), M_SUBPROC, M_WAITOK);
l->l_md.md_fpstate = fsp;
} else if (p == fplwp) {
KERNEL_UNLOCK_ONE(l);
} else {
/* Drop the live context on the floor. */
savefpstate(fsp);
fpusave_lwp(l, false);
reload = 1;
}
/* Note: sizeof fpr->__fpu_fr <= sizeof fsp->fs_regs. */
@ -1214,11 +1206,13 @@ cpu_setmcontext32(struct lwp *l, const mcontext32_t *mcp, unsigned int flags)
* by lazy FPU context switching); allocate it if necessary.
*/
if ((fsp = l->l_md.md_fpstate) == NULL) {
KERNEL_LOCK(1, l);
fsp = malloc(sizeof (*fsp), M_SUBPROC, M_WAITOK);
l->l_md.md_fpstate = fsp;
} else if (l == fplwp) {
KERNEL_UNLOCK_ONE(l);
} else {
/* Drop the live context on the floor. */
savefpstate(fsp);
fpusave_lwp(l, false);
}
/* Note: sizeof fpr->__fpu_fr <= sizeof fsp->fs_regs. */
memcpy(fsp->fs_regs, &fpr->__fpu_fr, sizeof (fpr->__fpu_fr));
@ -1289,7 +1283,7 @@ cpu_getmcontext32(struct lwp *l, mcontext32_t *mcp, unsigned int *flags)
/* Save FP register context, if any. */
if (l->l_md.md_fpstate != NULL) {
#ifdef notyet
struct fpstate64 fs, *fsp;
struct fpstate64 *fsp;
__fpregset_t *fpr = &mcp->__fpregs;
/*
@ -1298,12 +1292,8 @@ cpu_getmcontext32(struct lwp *l, mcontext32_t *mcp, unsigned int *flags)
* with it later when it becomes necessary.
* Otherwise, get it from the process's save area.
*/
if (l == fplwp) {
fsp = &fs;
savefpstate(fsp);
} else {
fsp = l->l_md.md_fpstate;
}
fpusave_lwp(l, true);
fsp = l->l_md.md_fpstate;
memcpy(&fpr->__fpu_fr, fsp->fs_regs, sizeof (fpr->__fpu_fr));
mcp->__fpregs.__fpu_q = NULL; /* `Need more info.' */
mcp->__fpregs.__fpu_fsr = fs.fs_fsr;

View File

@ -1,4 +1,4 @@
/* $NetBSD: trap.c,v 1.145 2007/10/24 14:50:40 ad Exp $ */
/* $NetBSD: trap.c,v 1.146 2008/03/14 15:39:18 nakayama Exp $ */
/*
* Copyright (c) 1996-2002 Eduardo Horvath. All rights reserved.
@ -50,11 +50,12 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: trap.c,v 1.145 2007/10/24 14:50:40 ad Exp $");
__KERNEL_RCSID(0, "$NetBSD: trap.c,v 1.146 2008/03/14 15:39:18 nakayama Exp $");
#define NEW_FPSTATE
#include "opt_ddb.h"
#include "opt_multiprocessor.h"
#include "opt_compat_svr4.h"
#include "opt_compat_netbsd32.h"
@ -434,6 +435,14 @@ trap(struct trapframe64 *tf, unsigned int type, vaddr_t pc, long tstate)
ksiginfo_t ksi;
int error;
int code, sig;
#ifdef MULTIPROCESSOR
int s;
#define disintr() s = intr_disable()
#define rstintr() intr_restore(s)
#else
#define disintr() /* nothing */
#define rstintr() /* nothing */
#endif
/* This steps the PC over the trap. */
#define ADVANCE (n = tf->tf_npc, tf->tf_pc = n, tf->tf_npc = n + 4)
@ -529,14 +538,17 @@ extern void db_printf(const char * , ...);
newfplwp = curlwp;
/* force other cpus to give up this fpstate */
if (newfplwp->l_md.md_fpstate)
save_and_clear_fpstate(newfplwp);
fpusave_lwp(newfplwp, true);
}
if (fplwp != newfplwp) {
disintr();
if (fplwp != NULL) {
/* someone else had it, maybe? */
KASSERT(fplwp->l_md.md_fpstate != NULL);
savefpstate(fplwp->l_md.md_fpstate);
fplwp = NULL;
}
rstintr();
/* If we have an allocated fpstate, load it */
if (newfplwp->l_md.md_fpstate != NULL) {
fplwp = newfplwp;
@ -668,11 +680,13 @@ badtrap:
struct fpstate64 *fs = l->l_md.md_fpstate;
if (fs == NULL) {
KERNEL_LOCK(1, l);
/* NOTE: fpstate must be 64-bit aligned */
fs = malloc((sizeof *fs), M_SUBPROC, M_WAITOK);
*fs = initfpstate;
fs->fs_qsize = 0;
l->l_md.md_fpstate = fs;
KERNEL_UNLOCK_ONE(l);
}
/*
* We may have more FPEs stored up and/or ops queued.
@ -688,11 +702,15 @@ badtrap:
}
if (fplwp != l) { /* we do not have it */
/* but maybe another CPU has it? */
save_and_clear_fpstate(l);
if (fplwp != NULL) /* someone else had it */
fpusave_lwp(l, true);
disintr();
if (fplwp != NULL) { /* someone else had it */
KASSERT(fplwp->l_md.md_fpstate != NULL);
savefpstate(fplwp->l_md.md_fpstate);
}
loadfpstate(fs);
fplwp = l; /* now we do have it */
rstintr();
}
tf->tf_tstate |= (PSTATE_PEF << TSTATE_PSTATE_SHIFT);
break;
@ -756,8 +774,11 @@ badtrap:
*/
if (l != fplwp)
panic("fpe without being the FP user");
disintr();
KASSERT(l->l_md.md_fpstate != NULL);
savefpstate(l->l_md.md_fpstate);
fplwp = NULL;
rstintr();
/* tf->tf_psr &= ~PSR_EF; */ /* share_fpu will do this */
if (l->l_md.md_fpstate->fs_qsize == 0) {
error = copyin((void *)pc,

View File

@ -1,4 +1,4 @@
/* $NetBSD: vm_machdep.c,v 1.78 2008/02/22 10:55:00 martin Exp $ */
/* $NetBSD: vm_machdep.c,v 1.79 2008/03/14 15:39:18 nakayama Exp $ */
/*
* Copyright (c) 1996-2002 Eduardo Horvath. All rights reserved.
@ -50,8 +50,9 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: vm_machdep.c,v 1.78 2008/02/22 10:55:00 martin Exp $");
__KERNEL_RCSID(0, "$NetBSD: vm_machdep.c,v 1.79 2008/03/14 15:39:18 nakayama Exp $");
#include "opt_multiprocessor.h"
#include "opt_coredump.h"
#include <sys/param.h>
@ -231,7 +232,7 @@ cpu_lwp_fork(l1, l2, stack, stacksize, func, arg)
#endif
memcpy(npcb, opcb, sizeof(struct pcb));
if (l1->l_md.md_fpstate) {
save_and_clear_fpstate(l1);
fpusave_lwp(l1, true);
l2->l_md.md_fpstate = malloc(sizeof(struct fpstate64),
M_SUBPROC, M_WAITOK);
memcpy(l2->l_md.md_fpstate, l1->l_md.md_fpstate,
@ -295,68 +296,68 @@ cpu_lwp_fork(l1, l2, stack, stacksize, func, arg)
#endif
}
static inline void
fpusave_cpu(bool save)
{
struct lwp *l = fplwp;
if (l == NULL)
return;
if (save)
savefpstate(l->l_md.md_fpstate);
else
clearfpstate();
fplwp = NULL;
}
void
save_and_clear_fpstate(struct lwp *l)
fpusave_lwp(struct lwp *l, bool save)
{
#ifdef MULTIPROCESSOR
struct cpu_info *ci;
#endif
volatile struct cpu_info *ci;
if (l == fplwp) {
savefpstate(l->l_md.md_fpstate);
fplwp = NULL;
int s = intr_disable();
fpusave_cpu(save);
intr_restore(s);
return;
}
#ifdef MULTIPROCESSOR
for (ci = cpus; ci != NULL; ci = ci->ci_next) {
if (ci == curcpu())
int spincount;
if (ci == curcpu() || !CPUSET_HAS(cpus_active, ci->ci_index))
continue;
if (ci->ci_fplwp != l)
continue;
sparc64_send_ipi(ci->ci_cpuid, sparc64_ipi_save_fpstate, 0);
sparc64_send_ipi(ci->ci_cpuid, save ?
sparc64_ipi_save_fpstate :
sparc64_ipi_drop_fpstate, 0, 0);
spincount = 0;
while (ci->ci_fplwp == l) {
membar_sync();
spincount++;
if (spincount > 10000000)
panic("fpusave_lwp ipi didn't");
}
break;
}
#else
if (l == fplwp)
fpusave_cpu(save);
#endif
}
void
cpu_lwp_free(l, proc)
struct lwp *l;
int proc;
cpu_lwp_free(struct lwp *l, int proc)
{
#ifdef MULTIPROCESSOR
struct cpu_info *ci;
int found;
found = 0;
#endif
if (l->l_md.md_fpstate != NULL) {
if (l == fplwp) {
clearfpstate();
fplwp = NULL;
#ifdef MULTIPROCESSOR
found = 1;
#endif
}
#ifdef MULTIPROCESSOR
if (found)
return;
#endif
}
#ifdef MULTIPROCESSOR
/* check if anyone else has this lwp as fplwp */
for (ci = cpus; ci != NULL; ci = ci->ci_next) {
if (ci == curcpu())
continue;
if (l == ci->ci_fplwp) {
/* drop the fplwp from the other fpu */
sparc64_send_ipi(ci->ci_cpuid,
sparc64_ipi_drop_fpstate, 0);
break;
}
}
#endif
if (l->l_md.md_fpstate != NULL)
fpusave_lwp(l, false);
}
void
@ -436,7 +437,7 @@ cpu_coredump(struct lwp *l, void *iocookie, struct core *chdr)
md_core.md_tf.tf_in[7] = l->l_md.md_tf->tf_in[7];
#endif
if (l->l_md.md_fpstate) {
save_and_clear_fpstate(l);
fpusave_lwp(l, true);
md_core.md_fpstate = *l->l_md.md_fpstate;
} else
memset(&md_core.md_fpstate, 0,