Add support for saving/restoring SSE/SSE2 state using FXSAVE/FXRSTOR.

Reviewed by Frank.
This commit is contained in:
thorpej 2001-08-02 21:04:43 +00:00
parent a2ac0e50f5
commit 99a7f640fe
7 changed files with 299 additions and 49 deletions

View File

@ -1,4 +1,4 @@
/* $NetBSD: locore.s,v 1.245 2001/07/31 22:52:44 jdolecek Exp $ */
/* $NetBSD: locore.s,v 1.246 2001/08/02 21:04:43 thorpej Exp $ */
/*-
* Copyright (c) 1998, 2000 The NetBSD Foundation, Inc.
@ -2141,7 +2141,7 @@ IDTVEC(trap07)
pushl $T_DNA
INTRENTRY
pushl _C_LABEL(curproc)
call _C_LABEL(npxdna)
call *_C_LABEL(npxdna_func)
addl $4,%esp
testl %eax,%eax
jz calltrap

View File

@ -1,4 +1,4 @@
/* $NetBSD: machdep.c,v 1.449 2001/08/01 19:50:48 thorpej Exp $ */
/* $NetBSD: machdep.c,v 1.450 2001/08/02 21:04:43 thorpej Exp $ */
/*-
* Copyright (c) 1996, 1997, 1998, 2000 The NetBSD Foundation, Inc.
@ -200,6 +200,8 @@ int i386_fpu_present;
int i386_fpu_exception;
int i386_fpu_fdivbug;
int cpu_use_fxsave;
#define CPUID2MODEL(cpuid) (((cpuid) >> 4) & 15)
vaddr_t msgbuf_vaddr;
@ -1515,6 +1517,17 @@ identifycpu(struct cpu_info *ci)
cpu_tsc_freq = (rdtsc() - last_tsc) * 10;
}
#endif
#if defined(I686_CPU)
/*
* If we have FXSAVE/FXRESTOR, use them.
*/
if (cpu_feature & CPUID_FXSR) {
cpu_use_fxsave = 1;
lcr4(rcr4() | CR4_OSFXSR);
} else
cpu_use_fxsave = 0;
#endif /* I686_CPU */
}
/*
@ -2132,7 +2145,10 @@ setregs(p, pack, stack)
p->p_md.md_flags &= ~MDP_USEDFPU;
pcb->pcb_flags = 0;
pcb->pcb_savefpu.sv_env.en_cw = __NetBSD_NPXCW__;
if (cpu_use_fxsave)
pcb->pcb_savefpu.sv_xmm.sv_env.en_cw = __NetBSD_NPXCW__;
else
pcb->pcb_savefpu.sv_87.sv_env.en_cw = __NetBSD_NPXCW__;
tf = p->p_md.md_regs;
tf->tf_gs = LSEL(LUDATA_SEL, SEL_UPL);
@ -2332,6 +2348,13 @@ init386(first_avail)
if (PAGE_SIZE != NBPG)
panic("init386: PAGE_SIZE != NBPG");
/*
* Saving SSE registers won't work if the save area isn't
* 16-byte aligned.
*/
if (offsetof(struct user, u_pcb.pcb_savefpu) & 0xf)
panic("init386: pcb_savefpu not 16-byte aligned");
/*
* Start with 2 color bins -- this is just a guess to get us
* started. We'll recolor when we determine the largest cache

View File

@ -1,4 +1,4 @@
/* $NetBSD: process_machdep.c,v 1.34 2001/06/18 03:33:59 sommerfeld Exp $ */
/* $NetBSD: process_machdep.c,v 1.35 2001/08/02 21:04:44 thorpej Exp $ */
/*-
* Copyright (c) 1998, 2000 The NetBSD Foundation, Inc.
@ -80,25 +80,88 @@
#include <machine/vm86.h>
#endif
static __inline struct trapframe *process_frame __P((struct proc *));
static __inline struct save87 *process_fpframe __P((struct proc *));
static __inline struct trapframe *
process_frame(p)
struct proc *p;
process_frame(struct proc *p)
{
return (p->p_md.md_regs);
}
static __inline struct save87 *
process_fpframe(p)
struct proc *p;
static __inline union savefpu *
process_fpframe(struct proc *p)
{
return (&p->p_addr->u_pcb.pcb_savefpu);
}
void
process_xmm_to_s87(const struct savexmm *sxmm, struct save87 *s87)
{
int i;
/* FPU control/status */
s87->sv_env.en_cw = sxmm->sv_env.en_cw;
s87->sv_env.en_sw = sxmm->sv_env.en_sw;
/* tag word handled below */
s87->sv_env.en_fip = sxmm->sv_env.en_fip;
s87->sv_env.en_fcs = sxmm->sv_env.en_fcs;
s87->sv_env.en_opcode = sxmm->sv_env.en_opcode;
s87->sv_env.en_foo = sxmm->sv_env.en_foo;
s87->sv_env.en_fos = sxmm->sv_env.en_fos;
/* Tag word and registers. */
for (i = 0; i < 8; i++) {
if (sxmm->sv_env.en_tw & (1U << i))
s87->sv_env.en_tw &= ~(3U << (i * 2));
else
s87->sv_env.en_tw |= (3U << (i * 2));
if (sxmm->sv_ex_tw & (1U << i))
s87->sv_ex_tw &= ~(3U << (i * 2));
else
s87->sv_ex_tw |= (3U << (i * 2));
memcpy(&s87->sv_ac[i].fp_bytes, &sxmm->sv_ac[i].fp_bytes,
sizeof(s87->sv_ac[i].fp_bytes));
}
s87->sv_ex_sw = sxmm->sv_ex_sw;
}
void
process_s87_to_xmm(const struct save87 *s87, struct savexmm *sxmm)
{
int i;
/* FPU control/status */
sxmm->sv_env.en_cw = s87->sv_env.en_cw;
sxmm->sv_env.en_sw = s87->sv_env.en_sw;
/* tag word handled below */
sxmm->sv_env.en_fip = s87->sv_env.en_fip;
sxmm->sv_env.en_fcs = s87->sv_env.en_fcs;
sxmm->sv_env.en_opcode = s87->sv_env.en_opcode;
sxmm->sv_env.en_foo = s87->sv_env.en_foo;
sxmm->sv_env.en_fos = s87->sv_env.en_fos;
/* Tag word and registers. */
for (i = 0; i < 8; i++) {
if (((s87->sv_env.en_tw >> (i * 2)) & 3) == 3)
sxmm->sv_env.en_tw &= ~(1U << i);
else
sxmm->sv_env.en_tw |= (1U << i);
if (((s87->sv_ex_tw >> (i * 2)) & 3) == 3)
sxmm->sv_ex_tw &= ~(1U << i);
else
sxmm->sv_ex_tw |= (1U << i);
memcpy(&sxmm->sv_ac[i].fp_bytes, &s87->sv_ac[i].fp_bytes,
sizeof(sxmm->sv_ac[i].fp_bytes));
}
sxmm->sv_ex_sw = s87->sv_ex_sw;
}
int
process_read_regs(p, regs)
struct proc *p;
@ -142,7 +205,7 @@ process_read_fpregs(p, regs)
struct proc *p;
struct fpreg *regs;
{
struct save87 *frame = process_fpframe(p);
union savefpu *frame = process_fpframe(p);
if (p->p_md.md_flags & MDP_USEDFPU) {
#if NNPX > 0
@ -159,15 +222,31 @@ process_read_fpregs(p, regs)
* The initial control word was already set by setregs(), so
* save it temporarily.
*/
cw = frame->sv_env.en_cw;
memset(frame, 0, sizeof(*regs));
frame->sv_env.en_cw = cw;
frame->sv_env.en_sw = 0x0000;
frame->sv_env.en_tw = 0xffff;
if (cpu_use_fxsave) {
cw = frame->sv_xmm.sv_env.en_cw;
/* XXX Don't zero XMM regs? */
memset(&frame->sv_xmm, 0, sizeof(frame->sv_xmm));
frame->sv_xmm.sv_env.en_cw = cw;
frame->sv_xmm.sv_env.en_sw = 0x0000;
frame->sv_xmm.sv_env.en_tw = 0x00;
} else {
cw = frame->sv_87.sv_env.en_cw;
memset(&frame->sv_87, 0, sizeof(frame->sv_87));
frame->sv_87.sv_env.en_cw = cw;
frame->sv_87.sv_env.en_sw = 0x0000;
frame->sv_87.sv_env.en_tw = 0xffff;
}
p->p_md.md_flags |= MDP_USEDFPU;
}
memcpy(regs, frame, sizeof(*regs));
if (cpu_use_fxsave) {
struct save87 s87;
/* XXX Yuck */
process_xmm_to_s87(&frame->sv_xmm, &s87);
memcpy(regs, &s87, sizeof(*regs));
} else
memcpy(regs, &frame->sv_87, sizeof(*regs));
return (0);
}
@ -228,7 +307,7 @@ process_write_fpregs(p, regs)
struct proc *p;
struct fpreg *regs;
{
struct save87 *frame = process_fpframe(p);
union savefpu *frame = process_fpframe(p);
if (p->p_md.md_flags & MDP_USEDFPU) {
#if NNPX > 0
@ -241,7 +320,14 @@ process_write_fpregs(p, regs)
p->p_md.md_flags |= MDP_USEDFPU;
}
memcpy(frame, regs, sizeof(*regs));
if (cpu_use_fxsave) {
struct save87 s87;
/* XXX Yuck. */
memcpy(&s87, regs, sizeof(*regs));
process_s87_to_xmm(&s87, &frame->sv_xmm);
} else
memcpy(&frame->sv_87, regs, sizeof(*regs));
return (0);
}

View File

@ -1,4 +1,4 @@
/* $NetBSD: cpu.h,v 1.73 2001/08/01 19:50:49 thorpej Exp $ */
/* $NetBSD: cpu.h,v 1.74 2001/08/02 21:04:44 thorpej Exp $ */
/*-
* Copyright (c) 1990 The Regents of the University of California.
@ -181,6 +181,8 @@ extern int cpuid_level;
extern const struct cpu_nocpuid_nameclass i386_nocpuid_cpus[];
extern const struct cpu_cpuid_nameclass i386_cpuid_cpus[];
extern int cpu_use_fxsave;
/* machdep.c */
void delay __P((int));
void dumpconf __P((void));

View File

@ -1,4 +1,4 @@
/* $NetBSD: npx.h,v 1.14 1999/01/26 14:25:02 christos Exp $ */
/* $NetBSD: npx.h,v 1.15 2001/08/02 21:04:45 thorpej Exp $ */
/*-
* Copyright (c) 1990 The Regents of the University of California.
@ -47,7 +47,7 @@
#define _I386_NPX_H_
/* Environment information of floating point unit */
struct env87 {
struct env87 {
long en_cw; /* control word (16bits) */
long en_sw; /* status word (16bits) */
long en_tw; /* tag word (16bits) */
@ -59,7 +59,7 @@ struct env87 {
};
/* Contents of each floating point accumulator */
struct fpacc87 {
struct fpacc87 {
#ifdef dontdef /* too unportable */
u_long fp_mantlo; /* mantissa low (31:0) */
u_long fp_manthi; /* mantissa high (63:32) */
@ -71,7 +71,7 @@ struct fpacc87 {
};
/* Floating point context */
struct save87 {
struct save87 {
struct env87 sv_env; /* floating point control/status */
struct fpacc87 sv_ac[8]; /* accumulator contents, 0-7 */
#ifndef dontdef
@ -81,8 +81,52 @@ struct save87 {
#endif
};
/* Environment of FPU/MMX/SSE/SSE2. */
struct envxmm {
/*0*/ uint16_t en_cw; /* FPU Control Word */
uint16_t en_sw; /* FPU Status Word */
uint8_t en_rsvd0;
uint8_t en_tw; /* FPU Tag Word (abridged) */
uint16_t en_opcode; /* FPU Opcode */
uint32_t en_fip; /* FPU Instruction Pointer */
uint16_t en_fcs; /* FPU IP selector */
uint16_t en_rsvd1;
/*16*/ uint32_t en_foo; /* FPU Data pointer */
uint16_t en_fos; /* FPU Data pointer selector */
uint16_t en_rsvd2;
uint32_t en_mxcsr; /* MXCSR Register State */
uint32_t en_rsvd3;
};
/* FPU regsters in the extended save format. */
struct fpaccxmm {
uint8_t fp_bytes[10];
uint8_t fp_rsvd[6];
};
/* SSE/SSE2 registers. */
struct xmmreg {
uint8_t sse_bytes[16];
};
/* FPU/MMX/SSE/SSE2 context */
struct savexmm {
struct envxmm sv_env; /* control/status context */
struct fpaccxmm sv_ac[8]; /* ST/MM regs */
struct xmmreg sv_xmmregs[8]; /* XMM regs */
uint8_t sv_rsvd[16 * 14];
/* 512-bytes --- end of hardware portion of save area */
uint32_t sv_ex_sw; /* saved SW from last exception */
uint32_t sv_ex_tw; /* saved TW from last exception */
};
union savefpu {
struct save87 sv_87;
struct savexmm sv_xmm;
};
/* Cyrix EMC memory - mapped coprocessor context switch information */
struct emcsts {
struct emcsts {
long em_msw; /* memory mapped status register when swtched */
long em_tar; /* memory mapped temp A register when swtched */
long em_dl; /* memory mapped D low register when swtched */
@ -129,9 +173,12 @@ struct emcsts {
#ifdef _KERNEL
void probeintr __P((void));
void probetrap __P((void));
int npx586bug1 __P((int, int));
void probeintr __P((void));
void probetrap __P((void));
int npx586bug1 __P((int, int));
void process_xmm_to_s87(const struct savexmm *, struct save87 *);
void process_s87_to_xmm(const struct save87 *, struct savexmm *);
#endif

View File

@ -1,4 +1,4 @@
/* $NetBSD: pcb.h,v 1.28 2001/06/17 21:01:38 sommerfeld Exp $ */
/* $NetBSD: pcb.h,v 1.29 2001/08/02 21:04:45 thorpej Exp $ */
/*-
* Copyright (c) 1998 The NetBSD Foundation, Inc.
@ -97,7 +97,8 @@ struct pcb {
#define pcb_ebp pcb_tss.tss_ebp
#define pcb_ldt_sel pcb_tss.tss_ldt
int pcb_cr0; /* saved image of CR0 */
struct save87 pcb_savefpu; /* floating point state for 287/387 */
int pcb_pad0; /* align pcb_savefpu to 16 bytes */
union savefpu pcb_savefpu; /* floating point state for FPU */
struct emcsts pcb_saveemc; /* Cyrix EMC state */
/*
* Software pcb (extension)

View File

@ -1,4 +1,4 @@
/* $NetBSD: npx.c,v 1.76 2001/05/17 16:35:06 lukem Exp $ */
/* $NetBSD: npx.c,v 1.77 2001/08/02 21:04:45 thorpej Exp $ */
#if 0
#define IPRINTF(x) printf x
@ -43,6 +43,8 @@
* @(#)npx.c 7.2 (Berkeley) 5/12/91
*/
#include "opt_cputype.h"
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/conf.h>
@ -102,9 +104,40 @@
#define clts() __asm("clts")
#define stts() lcr0(rcr0() | CR0_TS)
int npxdna(struct proc *);
void npxexit(void);
static void npxsave1(void);
#ifdef I686_CPU
#define fxsave(addr) __asm("fxsave %0" : "=m" (*addr))
#define fxrstor(addr) __asm("fxrstor %0" : : "m" (*addr))
#endif /* I686_CPU */
static __inline void
fpu_save(union savefpu *addr)
{
#ifdef I686_CPU
if (cpu_use_fxsave) {
fxsave(&addr->sv_xmm);
/* FXSAVE doesn't FNINIT like FNSAVE does -- so do it here. */
fwait(); /* XXX needed? */
fninit();
} else
#endif /* I686_CPU */
fnsave(&addr->sv_87);
}
static int
npxdna_notset(struct proc *p)
{
panic("npxdna vector not initialized");
}
int (*npxdna_func)(struct proc *) = npxdna_notset;
int npxdna_s87(struct proc *);
#ifdef I686_CPU
int npxdna_xmm(struct proc *);
#endif /* I686_CPU */
void npxexit(void);
struct proc *npxproc;
@ -236,6 +269,13 @@ npxattach(struct npx_softc *sc)
}
lcr0(rcr0() | (CR0_TS));
i386_fpu_present = 1;
#ifdef I686_CPU
if (cpu_use_fxsave)
npxdna_func = npxdna_xmm;
else
#endif /* I686_CPU */
npxdna_func = npxdna_s87;
}
/*
@ -257,7 +297,7 @@ int
npxintr(void *arg)
{
register struct proc *p = npxproc;
register struct save87 *addr;
union savefpu *addr;
struct intrframe *frame = arg;
struct npx_softc *sc;
int code;
@ -303,12 +343,15 @@ npxintr(void *arg)
* Save state. This does an implied fninit. It had better not halt
* the cpu or we'll hang.
*/
fnsave(addr);
fpu_save(addr);
fwait();
/*
* Restore control word (was clobbered by fnsave).
* Restore control word (was clobbered by fpu_save).
*/
fldcw(&addr->sv_env.en_cw);
if (cpu_use_fxsave)
fldcw(&addr->sv_xmm.sv_env.en_cw);
else
fldcw(&addr->sv_87.sv_env.en_cw);
fwait();
/*
* Remember the exception status word and tag word. The current
@ -318,8 +361,13 @@ npxintr(void *arg)
* preserved the control word and will copy the status and tag
* words, so the complete exception state can be recovered.
*/
addr->sv_ex_sw = addr->sv_env.en_sw;
addr->sv_ex_tw = addr->sv_env.en_tw;
if (cpu_use_fxsave) {
addr->sv_xmm.sv_ex_sw = addr->sv_xmm.sv_env.en_sw;
addr->sv_xmm.sv_ex_tw = addr->sv_xmm.sv_env.en_tw;
} else {
addr->sv_87.sv_ex_sw = addr->sv_87.sv_env.en_sw;
addr->sv_87.sv_ex_tw = addr->sv_87.sv_env.en_tw;
}
/*
* Pass exception to process.
@ -364,7 +412,7 @@ npxintr(void *arg)
}
/*
* Wrapper for the fnsave instruction. We set the TS bit in the saved CR0 for
* Wrapper for the fpu_save operation. We set the TS bit in the saved CR0 for
* this process, so that it will get a DNA exception on the FPU instruction and
* force a reload. This routine is always called with npx_nointr set, so that
* any pending exception will be thrown away. (It will be caught again if/when
@ -374,12 +422,12 @@ npxintr(void *arg)
* interrupt masked, it would be necessary to forcibly unmask the NPX interrupt
* so that it could succeed.
*/
static inline void
static __inline void
npxsave1(void)
{
struct proc *p = npxproc;
fnsave(&p->p_addr->u_pcb.pcb_savefpu);
fpu_save(&p->p_addr->u_pcb.pcb_savefpu);
p->p_addr->u_pcb.pcb_cr0 |= CR0_TS;
fwait();
}
@ -391,8 +439,48 @@ npxsave1(void)
* Otherwise, we save the previous state, if necessary, and restore our last
* saved state.
*/
#ifdef I686_CPU
int
npxdna(struct proc *p)
npxdna_xmm(struct proc *p)
{
#ifdef DIAGNOSTIC
if (cpl != 0 || npx_nointr != 0)
panic("npxdna: masked");
#endif
p->p_addr->u_pcb.pcb_cr0 &= ~CR0_TS;
clts();
/*
* Initialize the FPU state to clear any exceptions. If someone else
* was using the FPU, save their state (which does an implicit
* initialization).
*/
npx_nointr = 1;
if (npxproc != 0 && npxproc != p) {
IPRINTF(("Save"));
npxsave1();
} else {
IPRINTF(("Init"));
fninit();
fwait();
}
npx_nointr = 0;
npxproc = p;
if ((p->p_md.md_flags & MDP_USEDFPU) == 0) {
fldcw(&p->p_addr->u_pcb.pcb_savefpu.sv_xmm.sv_env.en_cw);
p->p_md.md_flags |= MDP_USEDFPU;
} else
fxrstor(&p->p_addr->u_pcb.pcb_savefpu.sv_xmm);
return (1);
}
#endif /* I686_CPU */
int
npxdna_s87(struct proc *p)
{
if (npx_type == NPX_NONE) {
@ -426,7 +514,10 @@ npxdna(struct proc *p)
npxproc = p;
if ((p->p_md.md_flags & MDP_USEDFPU) == 0) {
fldcw(&p->p_addr->u_pcb.pcb_savefpu.sv_env.en_cw);
if (cpu_use_fxsave)
fldcw(&p->p_addr->u_pcb.pcb_savefpu.sv_xmm.sv_env.en_cw);
else
fldcw(&p->p_addr->u_pcb.pcb_savefpu.sv_87.sv_env.en_cw);
p->p_md.md_flags |= MDP_USEDFPU;
} else {
/*
@ -442,7 +533,7 @@ npxdna(struct proc *p)
* fnclex if it is the first FPU instruction after a context
* switch.
*/
frstor(&p->p_addr->u_pcb.pcb_savefpu);
frstor(&p->p_addr->u_pcb.pcb_savefpu.sv_87);
}
return (1);