From fbf420517ead53928f661f0eb563896c1dbf7d48 Mon Sep 17 00:00:00 2001 From: maxv Date: Thu, 14 Jun 2018 14:36:46 +0000 Subject: [PATCH] Add some code to support eager fpu switch, INTEL-SA-00145. We restore the FPU state of the lwp right away during context switches. This guarantees that when the CPU executes in userland, the FPU doesn't contain secrets. Maybe we also need to clear the FPU in setregs(), not sure about this one. Can be enabled/disabled via: machdep.fpu_eager = {0/1} Not yet turned on automatically on affected CPUs (Intel Family 6). More generally it would be good to turn it on automatically when XSAVEOPT is supported, because in this case there is probably a non-negligible performance gain; but we need to fix PR/52966. --- sys/arch/amd64/amd64/locore.S | 16 +++- sys/arch/x86/include/cpu.h | 3 +- sys/arch/x86/include/fpu.h | 4 +- sys/arch/x86/x86/fpu.c | 167 ++++++++++++++++++++++----------- sys/arch/x86/x86/x86_machdep.c | 13 ++- 5 files changed, 140 insertions(+), 63 deletions(-) diff --git a/sys/arch/amd64/amd64/locore.S b/sys/arch/amd64/amd64/locore.S index 2ce00e15bdd8..d62b4892617b 100644 --- a/sys/arch/amd64/amd64/locore.S +++ b/sys/arch/amd64/amd64/locore.S @@ -1,4 +1,4 @@ -/* $NetBSD: locore.S,v 1.165 2018/04/23 22:53:04 joerg Exp $ */ +/* $NetBSD: locore.S,v 1.166 2018/06/14 14:36:46 maxv Exp $ */ /* * Copyright-o-rama! @@ -1097,7 +1097,7 @@ ENTRY(cpu_switchto) .Lskip_svs: popq %rdx - /* Restore rdi/rsi for speculation_barrier */ + /* RDI/RSI got clobbered. */ movq %r13,%rdi movq %r12,%rsi #endif @@ -1106,6 +1106,18 @@ ENTRY(cpu_switchto) pushq %rdx callq _C_LABEL(speculation_barrier) popq %rdx + + /* RDI/RSI got clobbered. */ + movq %r13,%rdi + movq %r12,%rsi + + pushq %rdx + movb _C_LABEL(x86_fpu_eager),%dl + testb %dl,%dl + jz .Lno_eagerfpu + callq _C_LABEL(fpu_eagerswitch) +.Lno_eagerfpu: + popq %rdx #endif /* Switch to newlwp's stack. */ diff --git a/sys/arch/x86/include/cpu.h b/sys/arch/x86/include/cpu.h index 0177fd2503c6..c7dcc2f45cf8 100644 --- a/sys/arch/x86/include/cpu.h +++ b/sys/arch/x86/include/cpu.h @@ -1,4 +1,4 @@ -/* $NetBSD: cpu.h,v 1.91 2018/04/04 12:59:49 maxv Exp $ */ +/* $NetBSD: cpu.h,v 1.92 2018/06/14 14:36:46 maxv Exp $ */ /* * Copyright (c) 1990 The Regents of the University of California. @@ -417,6 +417,7 @@ extern int x86_fpu_save; #define FPU_SAVE_XSAVEOPT 3 extern unsigned int x86_fpu_save_size; extern uint64_t x86_xsave_features; +extern bool x86_fpu_eager; extern void (*x86_cpu_idle)(void); #define cpu_idle() (*x86_cpu_idle)() diff --git a/sys/arch/x86/include/fpu.h b/sys/arch/x86/include/fpu.h index bbe59989bc50..45b1400ff3d5 100644 --- a/sys/arch/x86/include/fpu.h +++ b/sys/arch/x86/include/fpu.h @@ -1,4 +1,4 @@ -/* $NetBSD: fpu.h,v 1.8 2018/05/23 07:45:35 maxv Exp $ */ +/* $NetBSD: fpu.h,v 1.9 2018/06/14 14:36:46 maxv Exp $ */ #ifndef _X86_FPU_H_ #define _X86_FPU_H_ @@ -16,6 +16,8 @@ void fpuinit_mxcsr_mask(void); void fpusave_lwp(struct lwp *, bool); void fpusave_cpu(bool); +void fpu_eagerswitch(struct lwp *, struct lwp *); + void fpu_set_default_cw(struct lwp *, unsigned int); void fputrap(struct trapframe *); diff --git a/sys/arch/x86/x86/fpu.c b/sys/arch/x86/x86/fpu.c index b81339b447b6..143e780f3778 100644 --- a/sys/arch/x86/x86/fpu.c +++ b/sys/arch/x86/x86/fpu.c @@ -1,4 +1,4 @@ -/* $NetBSD: fpu.c,v 1.32 2018/05/23 10:21:43 maxv Exp $ */ +/* $NetBSD: fpu.c,v 1.33 2018/06/14 14:36:46 maxv Exp $ */ /* * Copyright (c) 2008 The NetBSD Foundation, Inc. All @@ -96,7 +96,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: fpu.c,v 1.32 2018/05/23 10:21:43 maxv Exp $"); +__KERNEL_RCSID(0, "$NetBSD: fpu.c,v 1.33 2018/06/14 14:36:46 maxv Exp $"); #include "opt_multiprocessor.h" @@ -127,6 +127,8 @@ __KERNEL_RCSID(0, "$NetBSD: fpu.c,v 1.32 2018/05/23 10:21:43 maxv Exp $"); #define stts() HYPERVISOR_fpu_taskswitch(1) #endif +bool x86_fpu_eager __read_mostly = false; + static uint32_t x86_fpu_mxcsr_mask __read_mostly = 0; static inline union savefpu * @@ -263,6 +265,109 @@ fpuinit_mxcsr_mask(void) #endif } +static void +fpu_clear_amd(void) +{ + /* + * AMD FPUs do not restore FIP, FDP, and FOP on fxrstor and xrstor + * when FSW.ES=0, leaking other threads' execution history. + * + * Clear them manually by loading a zero (fldummy). We do this + * unconditionally, regardless of FSW.ES. + * + * Before that, clear the ES bit in the x87 status word if it is + * currently set, in order to avoid causing a fault in the + * upcoming load. + * + * Newer generations of AMD CPUs have CPUID_Fn80000008_EBX[2], + * which indicates that FIP/FDP/FOP are restored (same behavior + * as Intel). We're not using it though. + */ + if (fngetsw() & 0x80) + fnclex(); + fldummy(); +} + +static void +fpu_save(struct lwp *l) +{ + struct pcb *pcb = lwp_getpcb(l); + + switch (x86_fpu_save) { + case FPU_SAVE_FSAVE: + fnsave(&pcb->pcb_savefpu); + break; + case FPU_SAVE_FXSAVE: + fxsave(&pcb->pcb_savefpu); + break; + case FPU_SAVE_XSAVE: + xsave(&pcb->pcb_savefpu, x86_xsave_features); + break; + case FPU_SAVE_XSAVEOPT: + xsaveopt(&pcb->pcb_savefpu, x86_xsave_features); + break; + } +} + +static void +fpu_restore(struct lwp *l) +{ + struct pcb *pcb = lwp_getpcb(l); + + switch (x86_fpu_save) { + case FPU_SAVE_FSAVE: + frstor(&pcb->pcb_savefpu); + break; + case FPU_SAVE_FXSAVE: + if (cpu_vendor == CPUVENDOR_AMD) + fpu_clear_amd(); + fxrstor(&pcb->pcb_savefpu); + break; + case FPU_SAVE_XSAVE: + case FPU_SAVE_XSAVEOPT: + if (cpu_vendor == CPUVENDOR_AMD) + fpu_clear_amd(); + xrstor(&pcb->pcb_savefpu, x86_xsave_features); + break; + } +} + +static void +fpu_reset(void) +{ + clts(); + fninit(); + stts(); +} + +static void +fpu_eagerrestore(struct lwp *l) +{ + struct pcb *pcb = lwp_getpcb(l); + struct cpu_info *ci = curcpu(); + + clts(); + ci->ci_fpcurlwp = l; + pcb->pcb_fpcpu = ci; + fpu_restore(l); +} + +void +fpu_eagerswitch(struct lwp *oldlwp, struct lwp *newlwp) +{ + int s; + + s = splhigh(); + fpusave_cpu(true); + if (newlwp->l_flag & LW_SYSTEM) + fpu_reset(); + else + fpu_eagerrestore(newlwp); + splx(s); +} + +/* -------------------------------------------------------------------------- */ + /* * This is a synchronous trap on either an x87 instruction (due to an * unmasked error on the previous x87 instruction) or on an SSE/SSE2 etc @@ -339,29 +444,6 @@ fputrap(struct trapframe *frame) (*curlwp->l_proc->p_emul->e_trapsignal)(curlwp, &ksi); } -static void -fpu_clear_amd(void) -{ - /* - * AMD FPUs do not restore FIP, FDP, and FOP on fxrstor and xrstor - * when FSW.ES=0, leaking other threads' execution history. - * - * Clear them manually by loading a zero (fldummy). We do this - * unconditionally, regardless of FSW.ES. - * - * Before that, clear the ES bit in the x87 status word if it is - * currently set, in order to avoid causing a fault in the - * upcoming load. - * - * Newer generations of AMD CPUs have CPUID_Fn80000008_EBX[2], - * which indicates that FIP/FDP/FOP are restored (same behavior - * as Intel). We're not using it though. - */ - if (fngetsw() & 0x80) - fnclex(); - fldummy(); -} - /* * Implement device not available (DNA) exception * @@ -429,22 +511,7 @@ fpudna(struct trapframe *frame) ci->ci_fpcurlwp = l; pcb->pcb_fpcpu = ci; - switch (x86_fpu_save) { - case FPU_SAVE_FSAVE: - frstor(&pcb->pcb_savefpu); - break; - case FPU_SAVE_FXSAVE: - if (cpu_vendor == CPUVENDOR_AMD) - fpu_clear_amd(); - fxrstor(&pcb->pcb_savefpu); - break; - case FPU_SAVE_XSAVE: - case FPU_SAVE_XSAVEOPT: - if (cpu_vendor == CPUVENDOR_AMD) - fpu_clear_amd(); - xrstor(&pcb->pcb_savefpu, x86_xsave_features); - break; - } + fpu_restore(l); KASSERT(ci == curcpu()); splx(s); @@ -471,21 +538,7 @@ fpusave_cpu(bool save) if (save) { clts(); - - switch (x86_fpu_save) { - case FPU_SAVE_FSAVE: - fnsave(&pcb->pcb_savefpu); - break; - case FPU_SAVE_FXSAVE: - fxsave(&pcb->pcb_savefpu); - break; - case FPU_SAVE_XSAVE: - xsave(&pcb->pcb_savefpu, x86_xsave_features); - break; - case FPU_SAVE_XSAVEOPT: - xsaveopt(&pcb->pcb_savefpu, x86_xsave_features); - break; - } + fpu_save(l); } stts(); @@ -630,7 +683,7 @@ fpu_save_area_fork(struct pcb *pcb2, const struct pcb *pcb1) memcpy(pcb2 + 1, pcb1 + 1, extra); } -/* ------------------------------------------------------------------------- */ +/* -------------------------------------------------------------------------- */ static void process_xmm_to_s87(const struct fxsave *sxmm, struct save87 *s87) diff --git a/sys/arch/x86/x86/x86_machdep.c b/sys/arch/x86/x86/x86_machdep.c index d81811706420..68488464c035 100644 --- a/sys/arch/x86/x86/x86_machdep.c +++ b/sys/arch/x86/x86/x86_machdep.c @@ -1,4 +1,4 @@ -/* $NetBSD: x86_machdep.c,v 1.115 2018/05/22 09:25:58 maxv Exp $ */ +/* $NetBSD: x86_machdep.c,v 1.116 2018/06/14 14:36:46 maxv Exp $ */ /*- * Copyright (c) 2002, 2006, 2007 YAMAMOTO Takashi, @@ -31,7 +31,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: x86_machdep.c,v 1.115 2018/05/22 09:25:58 maxv Exp $"); +__KERNEL_RCSID(0, "$NetBSD: x86_machdep.c,v 1.116 2018/06/14 14:36:46 maxv Exp $"); #include "opt_modular.h" #include "opt_physmem.h" @@ -1276,6 +1276,15 @@ SYSCTL_SETUP(sysctl_machdep_setup, "sysctl machdep subtree setup") sysctl_speculation_init(clog); #endif +#ifndef XEN + sysctl_createv(clog, 0, NULL, NULL, + CTLFLAG_READWRITE, + CTLTYPE_BOOL, "fpu_eager", + SYSCTL_DESCR("Whether the kernel uses Eager FPU Switch"), + NULL, 0, &x86_fpu_eager, 0, + CTL_MACHDEP, CTL_CREATE, CTL_EOL); +#endif + /* None of these can ever change once the system has booted */ const_sysctl(clog, "fpu_present", CTLTYPE_INT, i386_fpu_present, CPU_FPU_PRESENT);