diff --git a/sys/arch/i386/i386/locore.s b/sys/arch/i386/i386/locore.s index beb7812f3538..8d11c92fb92b 100644 --- a/sys/arch/i386/i386/locore.s +++ b/sys/arch/i386/i386/locore.s @@ -1,4 +1,4 @@ -/* $NetBSD: locore.s,v 1.245 2001/07/31 22:52:44 jdolecek Exp $ */ +/* $NetBSD: locore.s,v 1.246 2001/08/02 21:04:43 thorpej Exp $ */ /*- * Copyright (c) 1998, 2000 The NetBSD Foundation, Inc. @@ -2141,7 +2141,7 @@ IDTVEC(trap07) pushl $T_DNA INTRENTRY pushl _C_LABEL(curproc) - call _C_LABEL(npxdna) + call *_C_LABEL(npxdna_func) addl $4,%esp testl %eax,%eax jz calltrap diff --git a/sys/arch/i386/i386/machdep.c b/sys/arch/i386/i386/machdep.c index e85a02f96ee0..882af1001df5 100644 --- a/sys/arch/i386/i386/machdep.c +++ b/sys/arch/i386/i386/machdep.c @@ -1,4 +1,4 @@ -/* $NetBSD: machdep.c,v 1.449 2001/08/01 19:50:48 thorpej Exp $ */ +/* $NetBSD: machdep.c,v 1.450 2001/08/02 21:04:43 thorpej Exp $ */ /*- * Copyright (c) 1996, 1997, 1998, 2000 The NetBSD Foundation, Inc. @@ -200,6 +200,8 @@ int i386_fpu_present; int i386_fpu_exception; int i386_fpu_fdivbug; +int cpu_use_fxsave; + #define CPUID2MODEL(cpuid) (((cpuid) >> 4) & 15) vaddr_t msgbuf_vaddr; @@ -1515,6 +1517,17 @@ identifycpu(struct cpu_info *ci) cpu_tsc_freq = (rdtsc() - last_tsc) * 10; } #endif + +#if defined(I686_CPU) + /* + * If we have FXSAVE/FXRESTOR, use them. + */ + if (cpu_feature & CPUID_FXSR) { + cpu_use_fxsave = 1; + lcr4(rcr4() | CR4_OSFXSR); + } else + cpu_use_fxsave = 0; +#endif /* I686_CPU */ } /* @@ -2132,7 +2145,10 @@ setregs(p, pack, stack) p->p_md.md_flags &= ~MDP_USEDFPU; pcb->pcb_flags = 0; - pcb->pcb_savefpu.sv_env.en_cw = __NetBSD_NPXCW__; + if (cpu_use_fxsave) + pcb->pcb_savefpu.sv_xmm.sv_env.en_cw = __NetBSD_NPXCW__; + else + pcb->pcb_savefpu.sv_87.sv_env.en_cw = __NetBSD_NPXCW__; tf = p->p_md.md_regs; tf->tf_gs = LSEL(LUDATA_SEL, SEL_UPL); @@ -2332,6 +2348,13 @@ init386(first_avail) if (PAGE_SIZE != NBPG) panic("init386: PAGE_SIZE != NBPG"); + /* + * Saving SSE registers won't work if the save area isn't + * 16-byte aligned. + */ + if (offsetof(struct user, u_pcb.pcb_savefpu) & 0xf) + panic("init386: pcb_savefpu not 16-byte aligned"); + /* * Start with 2 color bins -- this is just a guess to get us * started. We'll recolor when we determine the largest cache diff --git a/sys/arch/i386/i386/process_machdep.c b/sys/arch/i386/i386/process_machdep.c index 992df8d8bb51..e7c28df9094f 100644 --- a/sys/arch/i386/i386/process_machdep.c +++ b/sys/arch/i386/i386/process_machdep.c @@ -1,4 +1,4 @@ -/* $NetBSD: process_machdep.c,v 1.34 2001/06/18 03:33:59 sommerfeld Exp $ */ +/* $NetBSD: process_machdep.c,v 1.35 2001/08/02 21:04:44 thorpej Exp $ */ /*- * Copyright (c) 1998, 2000 The NetBSD Foundation, Inc. @@ -80,25 +80,88 @@ #include #endif -static __inline struct trapframe *process_frame __P((struct proc *)); -static __inline struct save87 *process_fpframe __P((struct proc *)); - static __inline struct trapframe * -process_frame(p) - struct proc *p; +process_frame(struct proc *p) { return (p->p_md.md_regs); } -static __inline struct save87 * -process_fpframe(p) - struct proc *p; +static __inline union savefpu * +process_fpframe(struct proc *p) { return (&p->p_addr->u_pcb.pcb_savefpu); } +void +process_xmm_to_s87(const struct savexmm *sxmm, struct save87 *s87) +{ + int i; + + /* FPU control/status */ + s87->sv_env.en_cw = sxmm->sv_env.en_cw; + s87->sv_env.en_sw = sxmm->sv_env.en_sw; + /* tag word handled below */ + s87->sv_env.en_fip = sxmm->sv_env.en_fip; + s87->sv_env.en_fcs = sxmm->sv_env.en_fcs; + s87->sv_env.en_opcode = sxmm->sv_env.en_opcode; + s87->sv_env.en_foo = sxmm->sv_env.en_foo; + s87->sv_env.en_fos = sxmm->sv_env.en_fos; + + /* Tag word and registers. */ + for (i = 0; i < 8; i++) { + if (sxmm->sv_env.en_tw & (1U << i)) + s87->sv_env.en_tw &= ~(3U << (i * 2)); + else + s87->sv_env.en_tw |= (3U << (i * 2)); + + if (sxmm->sv_ex_tw & (1U << i)) + s87->sv_ex_tw &= ~(3U << (i * 2)); + else + s87->sv_ex_tw |= (3U << (i * 2)); + + memcpy(&s87->sv_ac[i].fp_bytes, &sxmm->sv_ac[i].fp_bytes, + sizeof(s87->sv_ac[i].fp_bytes)); + } + + s87->sv_ex_sw = sxmm->sv_ex_sw; +} + +void +process_s87_to_xmm(const struct save87 *s87, struct savexmm *sxmm) +{ + int i; + + /* FPU control/status */ + sxmm->sv_env.en_cw = s87->sv_env.en_cw; + sxmm->sv_env.en_sw = s87->sv_env.en_sw; + /* tag word handled below */ + sxmm->sv_env.en_fip = s87->sv_env.en_fip; + sxmm->sv_env.en_fcs = s87->sv_env.en_fcs; + sxmm->sv_env.en_opcode = s87->sv_env.en_opcode; + sxmm->sv_env.en_foo = s87->sv_env.en_foo; + sxmm->sv_env.en_fos = s87->sv_env.en_fos; + + /* Tag word and registers. */ + for (i = 0; i < 8; i++) { + if (((s87->sv_env.en_tw >> (i * 2)) & 3) == 3) + sxmm->sv_env.en_tw &= ~(1U << i); + else + sxmm->sv_env.en_tw |= (1U << i); + + if (((s87->sv_ex_tw >> (i * 2)) & 3) == 3) + sxmm->sv_ex_tw &= ~(1U << i); + else + sxmm->sv_ex_tw |= (1U << i); + + memcpy(&sxmm->sv_ac[i].fp_bytes, &s87->sv_ac[i].fp_bytes, + sizeof(sxmm->sv_ac[i].fp_bytes)); + } + + sxmm->sv_ex_sw = s87->sv_ex_sw; +} + int process_read_regs(p, regs) struct proc *p; @@ -142,7 +205,7 @@ process_read_fpregs(p, regs) struct proc *p; struct fpreg *regs; { - struct save87 *frame = process_fpframe(p); + union savefpu *frame = process_fpframe(p); if (p->p_md.md_flags & MDP_USEDFPU) { #if NNPX > 0 @@ -159,15 +222,31 @@ process_read_fpregs(p, regs) * The initial control word was already set by setregs(), so * save it temporarily. */ - cw = frame->sv_env.en_cw; - memset(frame, 0, sizeof(*regs)); - frame->sv_env.en_cw = cw; - frame->sv_env.en_sw = 0x0000; - frame->sv_env.en_tw = 0xffff; + if (cpu_use_fxsave) { + cw = frame->sv_xmm.sv_env.en_cw; + /* XXX Don't zero XMM regs? */ + memset(&frame->sv_xmm, 0, sizeof(frame->sv_xmm)); + frame->sv_xmm.sv_env.en_cw = cw; + frame->sv_xmm.sv_env.en_sw = 0x0000; + frame->sv_xmm.sv_env.en_tw = 0x00; + } else { + cw = frame->sv_87.sv_env.en_cw; + memset(&frame->sv_87, 0, sizeof(frame->sv_87)); + frame->sv_87.sv_env.en_cw = cw; + frame->sv_87.sv_env.en_sw = 0x0000; + frame->sv_87.sv_env.en_tw = 0xffff; + } p->p_md.md_flags |= MDP_USEDFPU; } - memcpy(regs, frame, sizeof(*regs)); + if (cpu_use_fxsave) { + struct save87 s87; + + /* XXX Yuck */ + process_xmm_to_s87(&frame->sv_xmm, &s87); + memcpy(regs, &s87, sizeof(*regs)); + } else + memcpy(regs, &frame->sv_87, sizeof(*regs)); return (0); } @@ -228,7 +307,7 @@ process_write_fpregs(p, regs) struct proc *p; struct fpreg *regs; { - struct save87 *frame = process_fpframe(p); + union savefpu *frame = process_fpframe(p); if (p->p_md.md_flags & MDP_USEDFPU) { #if NNPX > 0 @@ -241,7 +320,14 @@ process_write_fpregs(p, regs) p->p_md.md_flags |= MDP_USEDFPU; } - memcpy(frame, regs, sizeof(*regs)); + if (cpu_use_fxsave) { + struct save87 s87; + + /* XXX Yuck. */ + memcpy(&s87, regs, sizeof(*regs)); + process_s87_to_xmm(&s87, &frame->sv_xmm); + } else + memcpy(&frame->sv_87, regs, sizeof(*regs)); return (0); } diff --git a/sys/arch/i386/include/cpu.h b/sys/arch/i386/include/cpu.h index cdd1046ff131..bd051b015adf 100644 --- a/sys/arch/i386/include/cpu.h +++ b/sys/arch/i386/include/cpu.h @@ -1,4 +1,4 @@ -/* $NetBSD: cpu.h,v 1.73 2001/08/01 19:50:49 thorpej Exp $ */ +/* $NetBSD: cpu.h,v 1.74 2001/08/02 21:04:44 thorpej Exp $ */ /*- * Copyright (c) 1990 The Regents of the University of California. @@ -181,6 +181,8 @@ extern int cpuid_level; extern const struct cpu_nocpuid_nameclass i386_nocpuid_cpus[]; extern const struct cpu_cpuid_nameclass i386_cpuid_cpus[]; +extern int cpu_use_fxsave; + /* machdep.c */ void delay __P((int)); void dumpconf __P((void)); diff --git a/sys/arch/i386/include/npx.h b/sys/arch/i386/include/npx.h index ea39ac5a1123..a4c4f4a854f5 100644 --- a/sys/arch/i386/include/npx.h +++ b/sys/arch/i386/include/npx.h @@ -1,4 +1,4 @@ -/* $NetBSD: npx.h,v 1.14 1999/01/26 14:25:02 christos Exp $ */ +/* $NetBSD: npx.h,v 1.15 2001/08/02 21:04:45 thorpej Exp $ */ /*- * Copyright (c) 1990 The Regents of the University of California. @@ -47,7 +47,7 @@ #define _I386_NPX_H_ /* Environment information of floating point unit */ -struct env87 { +struct env87 { long en_cw; /* control word (16bits) */ long en_sw; /* status word (16bits) */ long en_tw; /* tag word (16bits) */ @@ -59,7 +59,7 @@ struct env87 { }; /* Contents of each floating point accumulator */ -struct fpacc87 { +struct fpacc87 { #ifdef dontdef /* too unportable */ u_long fp_mantlo; /* mantissa low (31:0) */ u_long fp_manthi; /* mantissa high (63:32) */ @@ -71,7 +71,7 @@ struct fpacc87 { }; /* Floating point context */ -struct save87 { +struct save87 { struct env87 sv_env; /* floating point control/status */ struct fpacc87 sv_ac[8]; /* accumulator contents, 0-7 */ #ifndef dontdef @@ -81,8 +81,52 @@ struct save87 { #endif }; +/* Environment of FPU/MMX/SSE/SSE2. */ +struct envxmm { +/*0*/ uint16_t en_cw; /* FPU Control Word */ + uint16_t en_sw; /* FPU Status Word */ + uint8_t en_rsvd0; + uint8_t en_tw; /* FPU Tag Word (abridged) */ + uint16_t en_opcode; /* FPU Opcode */ + uint32_t en_fip; /* FPU Instruction Pointer */ + uint16_t en_fcs; /* FPU IP selector */ + uint16_t en_rsvd1; +/*16*/ uint32_t en_foo; /* FPU Data pointer */ + uint16_t en_fos; /* FPU Data pointer selector */ + uint16_t en_rsvd2; + uint32_t en_mxcsr; /* MXCSR Register State */ + uint32_t en_rsvd3; +}; + +/* FPU regsters in the extended save format. */ +struct fpaccxmm { + uint8_t fp_bytes[10]; + uint8_t fp_rsvd[6]; +}; + +/* SSE/SSE2 registers. */ +struct xmmreg { + uint8_t sse_bytes[16]; +}; + +/* FPU/MMX/SSE/SSE2 context */ +struct savexmm { + struct envxmm sv_env; /* control/status context */ + struct fpaccxmm sv_ac[8]; /* ST/MM regs */ + struct xmmreg sv_xmmregs[8]; /* XMM regs */ + uint8_t sv_rsvd[16 * 14]; + /* 512-bytes --- end of hardware portion of save area */ + uint32_t sv_ex_sw; /* saved SW from last exception */ + uint32_t sv_ex_tw; /* saved TW from last exception */ +}; + +union savefpu { + struct save87 sv_87; + struct savexmm sv_xmm; +}; + /* Cyrix EMC memory - mapped coprocessor context switch information */ -struct emcsts { +struct emcsts { long em_msw; /* memory mapped status register when swtched */ long em_tar; /* memory mapped temp A register when swtched */ long em_dl; /* memory mapped D low register when swtched */ @@ -129,9 +173,12 @@ struct emcsts { #ifdef _KERNEL -void probeintr __P((void)); -void probetrap __P((void)); -int npx586bug1 __P((int, int)); +void probeintr __P((void)); +void probetrap __P((void)); +int npx586bug1 __P((int, int)); + +void process_xmm_to_s87(const struct savexmm *, struct save87 *); +void process_s87_to_xmm(const struct save87 *, struct savexmm *); #endif diff --git a/sys/arch/i386/include/pcb.h b/sys/arch/i386/include/pcb.h index dfbc0d569b7e..eaa06d715c53 100644 --- a/sys/arch/i386/include/pcb.h +++ b/sys/arch/i386/include/pcb.h @@ -1,4 +1,4 @@ -/* $NetBSD: pcb.h,v 1.28 2001/06/17 21:01:38 sommerfeld Exp $ */ +/* $NetBSD: pcb.h,v 1.29 2001/08/02 21:04:45 thorpej Exp $ */ /*- * Copyright (c) 1998 The NetBSD Foundation, Inc. @@ -97,7 +97,8 @@ struct pcb { #define pcb_ebp pcb_tss.tss_ebp #define pcb_ldt_sel pcb_tss.tss_ldt int pcb_cr0; /* saved image of CR0 */ - struct save87 pcb_savefpu; /* floating point state for 287/387 */ + int pcb_pad0; /* align pcb_savefpu to 16 bytes */ + union savefpu pcb_savefpu; /* floating point state for FPU */ struct emcsts pcb_saveemc; /* Cyrix EMC state */ /* * Software pcb (extension) diff --git a/sys/arch/i386/isa/npx.c b/sys/arch/i386/isa/npx.c index dfcdd09491a7..a1ece636feec 100644 --- a/sys/arch/i386/isa/npx.c +++ b/sys/arch/i386/isa/npx.c @@ -1,4 +1,4 @@ -/* $NetBSD: npx.c,v 1.76 2001/05/17 16:35:06 lukem Exp $ */ +/* $NetBSD: npx.c,v 1.77 2001/08/02 21:04:45 thorpej Exp $ */ #if 0 #define IPRINTF(x) printf x @@ -43,6 +43,8 @@ * @(#)npx.c 7.2 (Berkeley) 5/12/91 */ +#include "opt_cputype.h" + #include #include #include @@ -102,9 +104,40 @@ #define clts() __asm("clts") #define stts() lcr0(rcr0() | CR0_TS) -int npxdna(struct proc *); -void npxexit(void); -static void npxsave1(void); +#ifdef I686_CPU +#define fxsave(addr) __asm("fxsave %0" : "=m" (*addr)) +#define fxrstor(addr) __asm("fxrstor %0" : : "m" (*addr)) +#endif /* I686_CPU */ + +static __inline void +fpu_save(union savefpu *addr) +{ + +#ifdef I686_CPU + if (cpu_use_fxsave) { + fxsave(&addr->sv_xmm); + /* FXSAVE doesn't FNINIT like FNSAVE does -- so do it here. */ + fwait(); /* XXX needed? */ + fninit(); + } else +#endif /* I686_CPU */ + fnsave(&addr->sv_87); +} + +static int +npxdna_notset(struct proc *p) +{ + + panic("npxdna vector not initialized"); +} + +int (*npxdna_func)(struct proc *) = npxdna_notset; + +int npxdna_s87(struct proc *); +#ifdef I686_CPU +int npxdna_xmm(struct proc *); +#endif /* I686_CPU */ +void npxexit(void); struct proc *npxproc; @@ -236,6 +269,13 @@ npxattach(struct npx_softc *sc) } lcr0(rcr0() | (CR0_TS)); i386_fpu_present = 1; + +#ifdef I686_CPU + if (cpu_use_fxsave) + npxdna_func = npxdna_xmm; + else +#endif /* I686_CPU */ + npxdna_func = npxdna_s87; } /* @@ -257,7 +297,7 @@ int npxintr(void *arg) { register struct proc *p = npxproc; - register struct save87 *addr; + union savefpu *addr; struct intrframe *frame = arg; struct npx_softc *sc; int code; @@ -303,12 +343,15 @@ npxintr(void *arg) * Save state. This does an implied fninit. It had better not halt * the cpu or we'll hang. */ - fnsave(addr); + fpu_save(addr); fwait(); /* - * Restore control word (was clobbered by fnsave). + * Restore control word (was clobbered by fpu_save). */ - fldcw(&addr->sv_env.en_cw); + if (cpu_use_fxsave) + fldcw(&addr->sv_xmm.sv_env.en_cw); + else + fldcw(&addr->sv_87.sv_env.en_cw); fwait(); /* * Remember the exception status word and tag word. The current @@ -318,8 +361,13 @@ npxintr(void *arg) * preserved the control word and will copy the status and tag * words, so the complete exception state can be recovered. */ - addr->sv_ex_sw = addr->sv_env.en_sw; - addr->sv_ex_tw = addr->sv_env.en_tw; + if (cpu_use_fxsave) { + addr->sv_xmm.sv_ex_sw = addr->sv_xmm.sv_env.en_sw; + addr->sv_xmm.sv_ex_tw = addr->sv_xmm.sv_env.en_tw; + } else { + addr->sv_87.sv_ex_sw = addr->sv_87.sv_env.en_sw; + addr->sv_87.sv_ex_tw = addr->sv_87.sv_env.en_tw; + } /* * Pass exception to process. @@ -364,7 +412,7 @@ npxintr(void *arg) } /* - * Wrapper for the fnsave instruction. We set the TS bit in the saved CR0 for + * Wrapper for the fpu_save operation. We set the TS bit in the saved CR0 for * this process, so that it will get a DNA exception on the FPU instruction and * force a reload. This routine is always called with npx_nointr set, so that * any pending exception will be thrown away. (It will be caught again if/when @@ -374,12 +422,12 @@ npxintr(void *arg) * interrupt masked, it would be necessary to forcibly unmask the NPX interrupt * so that it could succeed. */ -static inline void +static __inline void npxsave1(void) { struct proc *p = npxproc; - fnsave(&p->p_addr->u_pcb.pcb_savefpu); + fpu_save(&p->p_addr->u_pcb.pcb_savefpu); p->p_addr->u_pcb.pcb_cr0 |= CR0_TS; fwait(); } @@ -391,8 +439,48 @@ npxsave1(void) * Otherwise, we save the previous state, if necessary, and restore our last * saved state. */ +#ifdef I686_CPU int -npxdna(struct proc *p) +npxdna_xmm(struct proc *p) +{ + +#ifdef DIAGNOSTIC + if (cpl != 0 || npx_nointr != 0) + panic("npxdna: masked"); +#endif + + p->p_addr->u_pcb.pcb_cr0 &= ~CR0_TS; + clts(); + + /* + * Initialize the FPU state to clear any exceptions. If someone else + * was using the FPU, save their state (which does an implicit + * initialization). + */ + npx_nointr = 1; + if (npxproc != 0 && npxproc != p) { + IPRINTF(("Save")); + npxsave1(); + } else { + IPRINTF(("Init")); + fninit(); + fwait(); + } + npx_nointr = 0; + npxproc = p; + + if ((p->p_md.md_flags & MDP_USEDFPU) == 0) { + fldcw(&p->p_addr->u_pcb.pcb_savefpu.sv_xmm.sv_env.en_cw); + p->p_md.md_flags |= MDP_USEDFPU; + } else + fxrstor(&p->p_addr->u_pcb.pcb_savefpu.sv_xmm); + + return (1); +} +#endif /* I686_CPU */ + +int +npxdna_s87(struct proc *p) { if (npx_type == NPX_NONE) { @@ -426,7 +514,10 @@ npxdna(struct proc *p) npxproc = p; if ((p->p_md.md_flags & MDP_USEDFPU) == 0) { - fldcw(&p->p_addr->u_pcb.pcb_savefpu.sv_env.en_cw); + if (cpu_use_fxsave) + fldcw(&p->p_addr->u_pcb.pcb_savefpu.sv_xmm.sv_env.en_cw); + else + fldcw(&p->p_addr->u_pcb.pcb_savefpu.sv_87.sv_env.en_cw); p->p_md.md_flags |= MDP_USEDFPU; } else { /* @@ -442,7 +533,7 @@ npxdna(struct proc *p) * fnclex if it is the first FPU instruction after a context * switch. */ - frstor(&p->p_addr->u_pcb.pcb_savefpu); + frstor(&p->p_addr->u_pcb.pcb_savefpu.sv_87); } return (1);