From 560337f76b0fda150b6ef9df214d2c8ea69b2232 Mon Sep 17 00:00:00 2001 From: maxv Date: Sat, 12 Oct 2019 06:31:03 +0000 Subject: [PATCH] Rewrite the FPU code on x86. This greatly simplifies the logic and removes the dependency on IPL_HIGH. NVMM is updated accordingly. Posted on port-amd64 a week ago. Bump the kernel version to 9.99.16. --- sys/arch/amd64/amd64/amd64_trap.S | 3 +- sys/arch/amd64/amd64/genassym.cf | 5 +- sys/arch/amd64/amd64/locore.S | 27 +-- sys/arch/amd64/amd64/machdep.c | 15 +- sys/arch/amd64/amd64/spl.S | 3 +- sys/arch/amd64/include/frameasm.h | 16 +- sys/arch/amd64/include/pcb.h | 3 +- sys/arch/amd64/include/proc.h | 3 +- sys/arch/i386/i386/genassym.cf | 7 +- sys/arch/i386/i386/i386_trap.S | 15 +- sys/arch/i386/i386/locore.S | 29 +--- sys/arch/i386/i386/machdep.c | 16 +- sys/arch/i386/i386/spl.S | 5 +- sys/arch/i386/include/frameasm.h | 18 +- sys/arch/i386/include/pcb.h | 3 +- sys/arch/i386/include/proc.h | 4 +- sys/arch/x86/acpi/acpi_wakeup.c | 8 +- sys/arch/x86/include/cpu.h | 3 +- sys/arch/x86/include/fpu.h | 8 +- sys/arch/x86/x86/cpu.c | 17 +- sys/arch/x86/x86/fpu.c | 279 ++++++++++-------------------- sys/arch/x86/x86/ipi.c | 6 +- sys/arch/x86/x86/vm_machdep.c | 21 +-- sys/arch/xen/x86/cpu.c | 17 +- sys/arch/xen/x86/xen_ipi.c | 8 +- sys/dev/nvmm/x86/nvmm_x86_svm.c | 22 +-- sys/dev/nvmm/x86/nvmm_x86_vmx.c | 22 +-- sys/sys/param.h | 4 +- 28 files changed, 225 insertions(+), 362 deletions(-) diff --git a/sys/arch/amd64/amd64/amd64_trap.S b/sys/arch/amd64/amd64/amd64_trap.S index 3f510e98ab2f..a51471373b1d 100644 --- a/sys/arch/amd64/amd64/amd64_trap.S +++ b/sys/arch/amd64/amd64/amd64_trap.S @@ -1,4 +1,4 @@ -/* $NetBSD: amd64_trap.S,v 1.48 2019/05/18 13:32:12 maxv Exp $ */ +/* $NetBSD: amd64_trap.S,v 1.49 2019/10/12 06:31:03 maxv Exp $ */ /* * Copyright (c) 1998, 2007, 2008, 2017 The NetBSD Foundation, Inc. @@ -672,6 +672,7 @@ calltrap: jmp .Lalltraps_checkast /* re-check ASTs */ 3: CHECK_DEFERRED_SWITCH jnz 9f + HANDLE_DEFERRED_FPU 6: #ifdef DIAGNOSTIC diff --git a/sys/arch/amd64/amd64/genassym.cf b/sys/arch/amd64/amd64/genassym.cf index 168cae5cb8b5..8c3e42c07d36 100644 --- a/sys/arch/amd64/amd64/genassym.cf +++ b/sys/arch/amd64/amd64/genassym.cf @@ -1,4 +1,4 @@ -# $NetBSD: genassym.cf,v 1.76 2019/05/29 16:54:41 maxv Exp $ +# $NetBSD: genassym.cf,v 1.77 2019/10/12 06:31:03 maxv Exp $ # # Copyright (c) 1998, 2006, 2007, 2008 The NetBSD Foundation, Inc. @@ -164,6 +164,7 @@ define PAGE_SIZE PAGE_SIZE define MDL_IRET MDL_IRET define MDL_COMPAT32 MDL_COMPAT32 +define MDL_FPU_IN_CPU MDL_FPU_IN_CPU define P_FLAG offsetof(struct proc, p_flag) define P_RASLIST offsetof(struct proc, p_raslist) @@ -188,6 +189,7 @@ define PCB_FLAGS offsetof(struct pcb, pcb_flags) define PCB_COMPAT32 PCB_COMPAT32 define PCB_FS offsetof(struct pcb, pcb_fs) define PCB_GS offsetof(struct pcb, pcb_gs) +define PCB_SAVEFPU offsetof(struct pcb, pcb_savefpu) define TF_RDI offsetof(struct trapframe, tf_rdi) define TF_RSI offsetof(struct trapframe, tf_rsi) @@ -244,7 +246,6 @@ define CPU_INFO_NSYSCALL offsetof(struct cpu_info, ci_data.cpu_nsyscall) define CPU_INFO_NTRAP offsetof(struct cpu_info, ci_data.cpu_ntrap) define CPU_INFO_NINTR offsetof(struct cpu_info, ci_data.cpu_nintr) define CPU_INFO_CURPRIORITY offsetof(struct cpu_info, ci_schedstate.spc_curpriority) -define CPU_INFO_FPCURLWP offsetof(struct cpu_info, ci_fpcurlwp) define CPU_INFO_GDT offsetof(struct cpu_info, ci_gdt) define CPU_INFO_ILEVEL offsetof(struct cpu_info, ci_ilevel) diff --git a/sys/arch/amd64/amd64/locore.S b/sys/arch/amd64/amd64/locore.S index 1f7db04090b4..ae0f4ed5f867 100644 --- a/sys/arch/amd64/amd64/locore.S +++ b/sys/arch/amd64/amd64/locore.S @@ -1,4 +1,4 @@ -/* $NetBSD: locore.S,v 1.188 2019/10/04 11:47:07 maxv Exp $ */ +/* $NetBSD: locore.S,v 1.189 2019/10/12 06:31:03 maxv Exp $ */ /* * Copyright-o-rama! @@ -1170,29 +1170,10 @@ ENTRY(cpu_switchto) movq %rax,TF_RIP(%rbx) .Lno_RAS: - /* - * Restore cr0 including FPU state (may have CR0_TS set). Note that - * IPL_SCHED prevents from FPU interrupt altering the LWP's saved cr0. - */ #ifndef XENPV + /* Raise the IPL to IPL_HIGH. Dropping the priority is deferred until + * mi_switch(), when cpu_switchto() returns. XXX Still needed? */ movl $IPL_HIGH,CPUVAR(ILEVEL) - movl PCB_CR0(%r14),%ecx /* has CR0_TS clear */ - movq %cr0,%rdx - - /* - * If our floating point registers are on a different CPU, - * set CR0_TS so we'll trap rather than reuse bogus state. - */ - cmpq CPUVAR(FPCURLWP),%r12 - je .Lskip_TS - orq $CR0_TS,%rcx -.Lskip_TS: - - /* Reloading CR0 is very expensive - avoid if possible. */ - cmpq %rdx,%rcx - je .Lskip_CR0 - movq %rcx,%cr0 -.Lskip_CR0: /* The 32bit LWPs are handled differently. */ testl $PCB_COMPAT32,PCB_FLAGS(%r14) @@ -1305,6 +1286,8 @@ ENTRY(handle_syscall) jne .Lspl_error #endif + HANDLE_DEFERRED_FPU + /* * Decide if we need to take a slow path. That's the case when we * want to reload %cs and %ss on a 64bit LWP (MDL_IRET set), or when diff --git a/sys/arch/amd64/amd64/machdep.c b/sys/arch/amd64/amd64/machdep.c index f095a10e257c..f16e0c8640e4 100644 --- a/sys/arch/amd64/amd64/machdep.c +++ b/sys/arch/amd64/amd64/machdep.c @@ -1,4 +1,4 @@ -/* $NetBSD: machdep.c,v 1.336 2019/08/21 20:30:36 skrll Exp $ */ +/* $NetBSD: machdep.c,v 1.337 2019/10/12 06:31:03 maxv Exp $ */ /* * Copyright (c) 1996, 1997, 1998, 2000, 2006, 2007, 2008, 2011 @@ -110,7 +110,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.336 2019/08/21 20:30:36 skrll Exp $"); +__KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.337 2019/10/12 06:31:03 maxv Exp $"); #include "opt_modular.h" #include "opt_user_ldt.h" @@ -442,18 +442,9 @@ x86_64_tls_switch(struct lwp *l) uint64_t zero = 0; /* - * Raise the IPL to IPL_HIGH. - * FPU IPIs can alter the LWP's saved cr0. Dropping the priority - * is deferred until mi_switch(), when cpu_switchto() returns. + * Raise the IPL to IPL_HIGH. XXX Still needed? */ (void)splhigh(); - /* - * If our floating point registers are on a different CPU, - * set CR0_TS so we'll trap rather than reuse bogus state. - */ - if (l != ci->ci_fpcurlwp) { - HYPERVISOR_fpu_taskswitch(1); - } /* Update segment registers */ if (pcb->pcb_flags & PCB_COMPAT32) { diff --git a/sys/arch/amd64/amd64/spl.S b/sys/arch/amd64/amd64/spl.S index eb4715af052c..fc6bfec97504 100644 --- a/sys/arch/amd64/amd64/spl.S +++ b/sys/arch/amd64/amd64/spl.S @@ -1,4 +1,4 @@ -/* $NetBSD: spl.S,v 1.40 2019/02/14 08:18:25 cherry Exp $ */ +/* $NetBSD: spl.S,v 1.41 2019/10/12 06:31:03 maxv Exp $ */ /* * Copyright (c) 2003 Wasabi Systems, Inc. @@ -397,6 +397,7 @@ LABEL(doreti_checkast) 3: CHECK_DEFERRED_SWITCH jnz 9f + HANDLE_DEFERRED_FPU 6: INTRFASTEXIT 9: diff --git a/sys/arch/amd64/include/frameasm.h b/sys/arch/amd64/include/frameasm.h index 92aa3dcc924a..e65e64077a2f 100644 --- a/sys/arch/amd64/include/frameasm.h +++ b/sys/arch/amd64/include/frameasm.h @@ -1,4 +1,4 @@ -/* $NetBSD: frameasm.h,v 1.44 2019/05/18 13:32:12 maxv Exp $ */ +/* $NetBSD: frameasm.h,v 1.45 2019/10/12 06:31:03 maxv Exp $ */ #ifndef _AMD64_MACHINE_FRAMEASM_H #define _AMD64_MACHINE_FRAMEASM_H @@ -246,4 +246,18 @@ #define CHECK_ASTPENDING(reg) cmpl $0, L_MD_ASTPENDING(reg) #define CLEAR_ASTPENDING(reg) movl $0, L_MD_ASTPENDING(reg) +/* + * If the FPU state is not in the CPU, restore it. Executed with interrupts + * disabled. + * + * %r14 is curlwp, must not be modified + * %rbx must not be modified + */ +#define HANDLE_DEFERRED_FPU \ + testl $MDL_FPU_IN_CPU,L_MD_FLAGS(%r14) ; \ + jnz 1f ; \ + call _C_LABEL(fpu_handle_deferred) ; \ + orl $MDL_FPU_IN_CPU,L_MD_FLAGS(%r14) ; \ +1: + #endif /* _AMD64_MACHINE_FRAMEASM_H */ diff --git a/sys/arch/amd64/include/pcb.h b/sys/arch/amd64/include/pcb.h index 3a3d748709ac..ba6ebb2f49a0 100644 --- a/sys/arch/amd64/include/pcb.h +++ b/sys/arch/amd64/include/pcb.h @@ -1,4 +1,4 @@ -/* $NetBSD: pcb.h,v 1.29 2018/07/26 09:29:08 maxv Exp $ */ +/* $NetBSD: pcb.h,v 1.30 2019/10/12 06:31:03 maxv Exp $ */ /* * Copyright (c) 1998 The NetBSD Foundation, Inc. @@ -96,7 +96,6 @@ struct pcb { uint32_t pcb_unused[8]; /* unused */ - struct cpu_info *pcb_fpcpu; /* cpu holding our fp state. */ union savefpu pcb_savefpu __aligned(64); /* floating point state */ /* **** DO NOT ADD ANYTHING HERE **** */ }; diff --git a/sys/arch/amd64/include/proc.h b/sys/arch/amd64/include/proc.h index 06a5688aaecf..033e876c09bd 100644 --- a/sys/arch/amd64/include/proc.h +++ b/sys/arch/amd64/include/proc.h @@ -1,4 +1,4 @@ -/* $NetBSD: proc.h,v 1.22 2017/02/25 13:34:21 kamil Exp $ */ +/* $NetBSD: proc.h,v 1.23 2019/10/12 06:31:03 maxv Exp $ */ /* * Copyright (c) 1991 Regents of the University of California. @@ -55,6 +55,7 @@ struct mdlwp { #define MDL_COMPAT32 0x0008 /* i386, always return via iret */ #define MDL_IRET 0x0010 /* force return via iret, not sysret */ +#define MDL_FPU_IN_CPU 0x0020 /* the FPU state is in the CPU */ struct mdproc { int md_flags; diff --git a/sys/arch/i386/i386/genassym.cf b/sys/arch/i386/i386/genassym.cf index 479c5319fc2f..01bc0594c473 100644 --- a/sys/arch/i386/i386/genassym.cf +++ b/sys/arch/i386/i386/genassym.cf @@ -1,4 +1,4 @@ -# $NetBSD: genassym.cf,v 1.113 2019/03/09 08:42:25 maxv Exp $ +# $NetBSD: genassym.cf,v 1.114 2019/10/12 06:31:03 maxv Exp $ # # Copyright (c) 1998, 2006, 2007, 2008 The NetBSD Foundation, Inc. @@ -165,6 +165,7 @@ define L_PCB offsetof(struct lwp, l_addr) define L_FLAG offsetof(struct lwp, l_flag) define L_PROC offsetof(struct lwp, l_proc) define L_MD_REGS offsetof(struct lwp, l_md.md_regs) +define L_MD_FLAGS offsetof(struct lwp, l_md.md_flags) define L_CTXSWTCH offsetof(struct lwp, l_ctxswtch) define L_MD_ASTPENDING offsetof(struct lwp, l_md.md_astpending) define L_CPU offsetof(struct lwp, l_cpu) @@ -176,6 +177,8 @@ define P_FLAG offsetof(struct proc, p_flag) define P_RASLIST offsetof(struct proc, p_raslist) define P_MD_SYSCALL offsetof(struct proc, p_md.md_syscall) +define MDL_FPU_IN_CPU MDL_FPU_IN_CPU + define LW_SYSTEM LW_SYSTEM define GUFS_SEL GUFS_SEL @@ -200,6 +203,7 @@ define PCB_ONFAULT offsetof(struct pcb, pcb_onfault) define PCB_FSD offsetof(struct pcb, pcb_fsd) define PCB_GSD offsetof(struct pcb, pcb_gsd) define PCB_IOMAP offsetof(struct pcb, pcb_iomap) +define PCB_SAVEFPU offsetof(struct pcb, pcb_savefpu) define TF_CS offsetof(struct trapframe, tf_cs) define TF_EIP offsetof(struct trapframe, tf_eip) @@ -251,7 +255,6 @@ define CPU_INFO_WANT_PMAPLOAD offsetof(struct cpu_info, ci_want_pmapload) define CPU_INFO_TLBSTATE offsetof(struct cpu_info, ci_tlbstate) define TLBSTATE_VALID TLBSTATE_VALID define CPU_INFO_CURLWP offsetof(struct cpu_info, ci_curlwp) -define CPU_INFO_FPCURLWP offsetof(struct cpu_info, ci_fpcurlwp) define CPU_INFO_CURLDT offsetof(struct cpu_info, ci_curldt) define CPU_INFO_IDLELWP offsetof(struct cpu_info, ci_data.cpu_idlelwp) define CPU_INFO_PMAP offsetof(struct cpu_info, ci_pmap) diff --git a/sys/arch/i386/i386/i386_trap.S b/sys/arch/i386/i386/i386_trap.S index 592cef92cae8..bec98ad1571b 100644 --- a/sys/arch/i386/i386/i386_trap.S +++ b/sys/arch/i386/i386/i386_trap.S @@ -1,4 +1,4 @@ -/* $NetBSD: i386_trap.S,v 1.19 2019/10/04 15:28:00 maxv Exp $ */ +/* $NetBSD: i386_trap.S,v 1.20 2019/10/12 06:31:03 maxv Exp $ */ /* * Copyright 2002 (c) Wasabi Systems, Inc. @@ -66,7 +66,7 @@ #if 0 #include -__KERNEL_RCSID(0, "$NetBSD: i386_trap.S,v 1.19 2019/10/04 15:28:00 maxv Exp $"); +__KERNEL_RCSID(0, "$NetBSD: i386_trap.S,v 1.20 2019/10/12 06:31:03 maxv Exp $"); #endif /* @@ -436,10 +436,10 @@ calltrap: #ifdef XEN STIC(%eax) - jz 6f + jz 22f call _C_LABEL(stipending) testl %eax,%eax - jz 6f + jz 22f /* process pending interrupts */ CLI(%eax) movl CPUVAR(ILEVEL),%ebx @@ -448,15 +448,18 @@ calltrap: movl %ebx,%eax /* get cpl */ movl CPUVAR(XUNMASK)(,%eax,4),%eax andl CPUVAR(XPENDING),%eax /* any non-masked bits left? */ - jz 7f + jz 11f bsrl %eax,%eax btrl %eax,CPUVAR(XPENDING) movl CPUVAR(XSOURCES)(,%eax,4),%eax jmp *IS_RESUME(%eax) -7: movl %ebx,CPUVAR(ILEVEL) /* restore cpl */ +11: movl %ebx,CPUVAR(ILEVEL) /* restore cpl */ jmp .Lalltraps_checkusr +22: #endif + HANDLE_DEFERRED_FPU + 6: #ifdef DIAGNOSTIC cmpl CPUVAR(ILEVEL),%ebx diff --git a/sys/arch/i386/i386/locore.S b/sys/arch/i386/i386/locore.S index c6df3d46fbb5..5cd1cc3cae57 100644 --- a/sys/arch/i386/i386/locore.S +++ b/sys/arch/i386/i386/locore.S @@ -1,4 +1,4 @@ -/* $NetBSD: locore.S,v 1.171 2019/10/04 15:28:00 maxv Exp $ */ +/* $NetBSD: locore.S,v 1.172 2019/10/12 06:31:03 maxv Exp $ */ /* * Copyright-o-rama! @@ -128,7 +128,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: locore.S,v 1.171 2019/10/04 15:28:00 maxv Exp $"); +__KERNEL_RCSID(0, "$NetBSD: locore.S,v 1.172 2019/10/12 06:31:03 maxv Exp $"); #include "opt_copy_symtab.h" #include "opt_ddb.h" @@ -1187,33 +1187,14 @@ skip_save: movl %eax,TF_EIP(%ecx) no_RAS: - /* - * Restore cr0 (including FPU state). Raise the IPL to IPL_HIGH. - * FPU IPIs can alter the LWP's saved cr0. Dropping the priority - * is deferred until mi_switch(), when cpu_switchto() returns. - */ #ifdef XENPV pushl %edi call _C_LABEL(i386_tls_switch) addl $4,%esp #else + /* Raise the IPL to IPL_HIGH. Dropping the priority is deferred until + * mi_switch(), when cpu_switchto() returns. XXX Still needed? */ movl $IPL_HIGH,CPUVAR(ILEVEL) - movl PCB_CR0(%ebx),%ecx /* has CR0_TS clear */ - movl %cr0,%edx - - /* - * If our floating point registers are on a different CPU, - * set CR0_TS so we'll trap rather than reuse bogus state. - */ - cmpl CPUVAR(FPCURLWP),%edi - je skip_TS - orl $CR0_TS,%ecx -skip_TS: - - /* Reloading CR0 is very expensive - avoid if possible. */ - cmpl %edx,%ecx - je switch_return - movl %ecx,%cr0 #endif switch_return: @@ -1322,6 +1303,8 @@ IDTVEC(syscall) jne 3f #endif + HANDLE_DEFERRED_FPU + INTRFASTEXIT #ifdef DIAGNOSTIC diff --git a/sys/arch/i386/i386/machdep.c b/sys/arch/i386/i386/machdep.c index eb3459d65ac4..d1c86e1b0454 100644 --- a/sys/arch/i386/i386/machdep.c +++ b/sys/arch/i386/i386/machdep.c @@ -1,4 +1,4 @@ -/* $NetBSD: machdep.c,v 1.820 2019/05/19 08:46:15 maxv Exp $ */ +/* $NetBSD: machdep.c,v 1.821 2019/10/12 06:31:03 maxv Exp $ */ /* * Copyright (c) 1996, 1997, 1998, 2000, 2004, 2006, 2008, 2009, 2017 @@ -67,7 +67,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.820 2019/05/19 08:46:15 maxv Exp $"); +__KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.821 2019/10/12 06:31:03 maxv Exp $"); #include "opt_beep.h" #include "opt_compat_freebsd.h" @@ -509,20 +509,10 @@ i386_tls_switch(lwp_t *l) struct pcb *pcb = lwp_getpcb(l); /* - * Raise the IPL to IPL_HIGH. - * FPU IPIs can alter the LWP's saved cr0. Dropping the priority - * is deferred until mi_switch(), when cpu_switchto() returns. + * Raise the IPL to IPL_HIGH. XXX Still needed? */ (void)splhigh(); - /* - * If our floating point registers are on a different CPU, - * set CR0_TS so we'll trap rather than reuse bogus state. - */ - if (l != ci->ci_fpcurlwp) { - HYPERVISOR_fpu_taskswitch(1); - } - /* Update TLS segment pointers */ update_descriptor(&ci->ci_gdt[GUFS_SEL], (union descriptor *)&pcb->pcb_fsd); diff --git a/sys/arch/i386/i386/spl.S b/sys/arch/i386/i386/spl.S index a38766d9cca9..a1380627f04f 100644 --- a/sys/arch/i386/i386/spl.S +++ b/sys/arch/i386/i386/spl.S @@ -1,4 +1,4 @@ -/* $NetBSD: spl.S,v 1.48 2019/10/04 15:28:00 maxv Exp $ */ +/* $NetBSD: spl.S,v 1.49 2019/10/12 06:31:03 maxv Exp $ */ /* * Copyright (c) 1998, 2007, 2008 The NetBSD Foundation, Inc. @@ -30,7 +30,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: spl.S,v 1.48 2019/10/04 15:28:00 maxv Exp $"); +__KERNEL_RCSID(0, "$NetBSD: spl.S,v 1.49 2019/10/12 06:31:03 maxv Exp $"); #include "opt_ddb.h" #include "opt_spldebug.h" @@ -326,6 +326,7 @@ END(doreti_checkast) 3: CHECK_DEFERRED_SWITCH jnz 9f + HANDLE_DEFERRED_FPU 6: #ifdef XEN STIC(%eax) diff --git a/sys/arch/i386/include/frameasm.h b/sys/arch/i386/include/frameasm.h index 0e90db676065..e787afa433f4 100644 --- a/sys/arch/i386/include/frameasm.h +++ b/sys/arch/i386/include/frameasm.h @@ -1,4 +1,4 @@ -/* $NetBSD: frameasm.h,v 1.28 2019/02/14 08:18:25 cherry Exp $ */ +/* $NetBSD: frameasm.h,v 1.29 2019/10/12 06:31:03 maxv Exp $ */ #ifndef _I386_FRAMEASM_H_ #define _I386_FRAMEASM_H_ @@ -98,6 +98,22 @@ cmpl $0, L_MD_ASTPENDING(reg) #define CLEAR_ASTPENDING(reg) movl $0, L_MD_ASTPENDING(reg) +/* + * If the FPU state is not in the CPU, restore it. Executed with interrupts + * disabled. + * + * %ebx must not be modified + */ +#define HANDLE_DEFERRED_FPU \ + movl CPUVAR(CURLWP),%eax ; \ + testl $MDL_FPU_IN_CPU,L_MD_FLAGS(%eax) ; \ + jnz 1f ; \ + pushl %eax ; \ + call _C_LABEL(fpu_handle_deferred) ; \ + popl %eax ; \ + orl $MDL_FPU_IN_CPU,L_MD_FLAGS(%eax) ; \ +1: + /* * IDEPTH_INCR: * increase ci_idepth and switch to the interrupt stack if necessary. diff --git a/sys/arch/i386/include/pcb.h b/sys/arch/i386/include/pcb.h index 3756108374de..5deb5ac99088 100644 --- a/sys/arch/i386/include/pcb.h +++ b/sys/arch/i386/include/pcb.h @@ -1,4 +1,4 @@ -/* $NetBSD: pcb.h,v 1.58 2018/07/26 09:29:08 maxv Exp $ */ +/* $NetBSD: pcb.h,v 1.59 2019/10/12 06:31:03 maxv Exp $ */ /* * Copyright (c) 1998, 2009 The NetBSD Foundation, Inc. @@ -99,7 +99,6 @@ struct pcb { int not_used[15]; /* floating point state */ - struct cpu_info *pcb_fpcpu; /* cpu holding our fp state. */ union savefpu pcb_savefpu __aligned(64); /* **** DO NOT ADD ANYTHING HERE **** */ diff --git a/sys/arch/i386/include/proc.h b/sys/arch/i386/include/proc.h index dd3cad8238ee..855fe9b5f82a 100644 --- a/sys/arch/i386/include/proc.h +++ b/sys/arch/i386/include/proc.h @@ -1,4 +1,4 @@ -/* $NetBSD: proc.h,v 1.45 2017/02/25 13:34:21 kamil Exp $ */ +/* $NetBSD: proc.h,v 1.46 2019/10/12 06:31:03 maxv Exp $ */ /* * Copyright (c) 1991 Regents of the University of California. @@ -43,6 +43,8 @@ struct pmap; struct vm_page; +#define MDL_FPU_IN_CPU 0x0020 /* the FPU state is in the CPU */ + struct mdlwp { struct trapframe *md_regs; /* registers on current frame */ int md_flags; /* machine-dependent flags */ diff --git a/sys/arch/x86/acpi/acpi_wakeup.c b/sys/arch/x86/acpi/acpi_wakeup.c index c0e54ad89637..9f1689cad3f7 100644 --- a/sys/arch/x86/acpi/acpi_wakeup.c +++ b/sys/arch/x86/acpi/acpi_wakeup.c @@ -1,4 +1,4 @@ -/* $NetBSD: acpi_wakeup.c,v 1.50 2019/06/17 16:34:02 jmcneill Exp $ */ +/* $NetBSD: acpi_wakeup.c,v 1.51 2019/10/12 06:31:03 maxv Exp $ */ /*- * Copyright (c) 2002, 2011 The NetBSD Foundation, Inc. @@ -59,7 +59,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: acpi_wakeup.c,v 1.50 2019/06/17 16:34:02 jmcneill Exp $"); +__KERNEL_RCSID(0, "$NetBSD: acpi_wakeup.c,v 1.51 2019/10/12 06:31:03 maxv Exp $"); #include #include @@ -249,7 +249,7 @@ acpi_cpu_sleep(struct cpu_info *ci) KASSERT(ci == curcpu()); s = splhigh(); - fpusave_cpu(true); + fpu_save(); x86_disable_intr(); /* @@ -313,7 +313,7 @@ acpi_md_sleep(int state) AcpiSetFirmwareWakingVector(acpi_wakeup_paddr, 0); s = splhigh(); - fpusave_cpu(true); + fpu_save(); x86_disable_intr(); #ifdef MULTIPROCESSOR diff --git a/sys/arch/x86/include/cpu.h b/sys/arch/x86/include/cpu.h index 5c9ef28aff19..059dd4a0b728 100644 --- a/sys/arch/x86/include/cpu.h +++ b/sys/arch/x86/include/cpu.h @@ -1,4 +1,4 @@ -/* $NetBSD: cpu.h,v 1.109 2019/10/03 05:06:29 maxv Exp $ */ +/* $NetBSD: cpu.h,v 1.110 2019/10/12 06:31:03 maxv Exp $ */ /* * Copyright (c) 1990 The Regents of the University of California. @@ -119,7 +119,6 @@ struct cpu_info { */ struct cpu_info *ci_next; /* next cpu */ struct lwp *ci_curlwp; /* current owner of the processor */ - struct lwp *ci_fpcurlwp; /* current owner of the FPU */ cpuid_t ci_cpuid; /* our CPU ID */ uint32_t ci_acpiid; /* our ACPI/MADT ID */ uint32_t ci_initapicid; /* our initial APIC ID */ diff --git a/sys/arch/x86/include/fpu.h b/sys/arch/x86/include/fpu.h index 65605129badd..ba2f55b6d0f3 100644 --- a/sys/arch/x86/include/fpu.h +++ b/sys/arch/x86/include/fpu.h @@ -1,4 +1,4 @@ -/* $NetBSD: fpu.h,v 1.18 2019/10/04 11:47:08 maxv Exp $ */ +/* $NetBSD: fpu.h,v 1.19 2019/10/12 06:31:03 maxv Exp $ */ #ifndef _X86_FPU_H_ #define _X86_FPU_H_ @@ -17,8 +17,7 @@ void fpuinit_mxcsr_mask(void); void fpu_area_save(void *, uint64_t); void fpu_area_restore(void *, uint64_t); -void fpusave_lwp(struct lwp *, bool); -void fpusave_cpu(bool); +void fpu_save(void); void fpu_set_default_cw(struct lwp *, unsigned int); @@ -28,7 +27,8 @@ void fpudna(struct trapframe *); void fpu_clear(struct lwp *, unsigned int); void fpu_sigreset(struct lwp *); -void fpu_save_area_fork(struct pcb *, const struct pcb *); +void fpu_lwp_fork(struct lwp *, struct lwp *); +void fpu_lwp_abandon(struct lwp *l); void process_write_fpregs_xmm(struct lwp *, const struct fxsave *); void process_write_fpregs_s87(struct lwp *, const struct save87 *); diff --git a/sys/arch/x86/x86/cpu.c b/sys/arch/x86/x86/cpu.c index 9ad26e633e56..463f1194c7cf 100644 --- a/sys/arch/x86/x86/cpu.c +++ b/sys/arch/x86/x86/cpu.c @@ -1,4 +1,4 @@ -/* $NetBSD: cpu.c,v 1.172 2019/08/30 07:53:47 mrg Exp $ */ +/* $NetBSD: cpu.c,v 1.173 2019/10/12 06:31:04 maxv Exp $ */ /* * Copyright (c) 2000-2012 NetBSD Foundation, Inc. @@ -62,7 +62,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.172 2019/08/30 07:53:47 mrg Exp $"); +__KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.173 2019/10/12 06:31:04 maxv Exp $"); #include "opt_ddb.h" #include "opt_mpbios.h" /* for MPDEBUG */ @@ -986,15 +986,14 @@ cpu_debug_dump(void) ""; db_printf("addr %sdev id flags ipis curlwp " - "fpcurlwp\n", sixtyfour64space); + "\n", sixtyfour64space); for (CPU_INFO_FOREACH(cii, ci)) { - db_printf("%p %s %ld %x %x %10p %10p\n", + db_printf("%p %s %ld %x %x %10p\n", ci, ci->ci_dev == NULL ? "BOOT" : device_xname(ci->ci_dev), (long)ci->ci_cpuid, ci->ci_flags, ci->ci_ipis, - ci->ci_curlwp, - ci->ci_fpcurlwp); + ci->ci_curlwp); } } #endif @@ -1159,11 +1158,7 @@ cpu_init_msrs(struct cpu_info *ci, bool full) void cpu_offline_md(void) { - int s; - - s = splhigh(); - fpusave_cpu(true); - splx(s); + return; } /* XXX joerg restructure and restart CPUs individually */ diff --git a/sys/arch/x86/x86/fpu.c b/sys/arch/x86/x86/fpu.c index 75dac19d50c2..1fe7bf738949 100644 --- a/sys/arch/x86/x86/fpu.c +++ b/sys/arch/x86/x86/fpu.c @@ -1,11 +1,11 @@ -/* $NetBSD: fpu.c,v 1.57 2019/10/04 11:47:08 maxv Exp $ */ +/* $NetBSD: fpu.c,v 1.58 2019/10/12 06:31:04 maxv Exp $ */ /* - * Copyright (c) 2008 The NetBSD Foundation, Inc. All + * Copyright (c) 2008, 2019 The NetBSD Foundation, Inc. All * rights reserved. * * This code is derived from software developed for The NetBSD Foundation - * by Andrew Doran. + * by Andrew Doran and Maxime Villard. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -96,7 +96,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: fpu.c,v 1.57 2019/10/04 11:47:08 maxv Exp $"); +__KERNEL_RCSID(0, "$NetBSD: fpu.c,v 1.58 2019/10/12 06:31:04 maxv Exp $"); #include "opt_multiprocessor.h" @@ -126,14 +126,44 @@ __KERNEL_RCSID(0, "$NetBSD: fpu.c,v 1.57 2019/10/04 11:47:08 maxv Exp $"); #define stts() HYPERVISOR_fpu_taskswitch(1) #endif +void fpu_handle_deferred(void); +void fpu_switch(struct lwp *, struct lwp *); + uint32_t x86_fpu_mxcsr_mask __read_mostly = 0; static inline union savefpu * -lwp_fpuarea(struct lwp *l) +fpu_lwp_area(struct lwp *l) { struct pcb *pcb = lwp_getpcb(l); + union savefpu *area = &pcb->pcb_savefpu; - return &pcb->pcb_savefpu; + KASSERT((l->l_flag & LW_SYSTEM) == 0); + if (l == curlwp) { + fpu_save(); + } + KASSERT(!(l->l_md.md_flags & MDL_FPU_IN_CPU)); + + return area; +} + +/* + * Bring curlwp's FPU state in memory. It will get installed back in the CPU + * when returning to userland. + */ +void +fpu_save(void) +{ + struct lwp *l = curlwp; + struct pcb *pcb = lwp_getpcb(l); + union savefpu *area = &pcb->pcb_savefpu; + + kpreempt_disable(); + if (l->l_md.md_flags & MDL_FPU_IN_CPU) { + KASSERT((l->l_flag & LW_SYSTEM) == 0); + fpu_area_save(area, x86_xsave_features); + l->l_md.md_flags &= ~MDL_FPU_IN_CPU; + } + kpreempt_enable(); } void @@ -213,8 +243,6 @@ fpu_errata_amd(void) void fpu_area_save(void *area, uint64_t xsave_features) { - clts(); - switch (x86_fpu_save) { case FPU_SAVE_FSAVE: fnsave(area); @@ -229,6 +257,8 @@ fpu_area_save(void *area, uint64_t xsave_features) xsaveopt(area, xsave_features); break; } + + stts(); } void @@ -254,45 +284,56 @@ fpu_area_restore(void *area, uint64_t xsave_features) } } -static void -fpu_lwp_install(struct lwp *l) +void +fpu_handle_deferred(void) { - struct pcb *pcb = lwp_getpcb(l); - struct cpu_info *ci = curcpu(); - - KASSERT(ci->ci_fpcurlwp == NULL); - KASSERT(pcb->pcb_fpcpu == NULL); - ci->ci_fpcurlwp = l; - pcb->pcb_fpcpu = ci; + struct pcb *pcb = lwp_getpcb(curlwp); fpu_area_restore(&pcb->pcb_savefpu, x86_xsave_features); } -void fpu_switch(struct lwp *, struct lwp *); - void fpu_switch(struct lwp *oldlwp, struct lwp *newlwp) { - int s; + struct pcb *pcb; - s = splhigh(); -#ifdef DIAGNOSTIC - if (oldlwp != NULL) { - struct pcb *pcb = lwp_getpcb(oldlwp); - struct cpu_info *ci = curcpu(); - if (pcb->pcb_fpcpu == NULL) { - KASSERT(ci->ci_fpcurlwp != oldlwp); - } else if (pcb->pcb_fpcpu == ci) { - KASSERT(ci->ci_fpcurlwp == oldlwp); - } else { - panic("%s: oldlwp's state installed elsewhere", - __func__); - } + if ((oldlwp != NULL) && (oldlwp->l_md.md_flags & MDL_FPU_IN_CPU)) { + KASSERT(!(oldlwp->l_flag & LW_SYSTEM)); + pcb = lwp_getpcb(oldlwp); + fpu_area_save(&pcb->pcb_savefpu, x86_xsave_features); + oldlwp->l_md.md_flags &= ~MDL_FPU_IN_CPU; } -#endif - fpusave_cpu(true); - if (!(newlwp->l_flag & LW_SYSTEM)) - fpu_lwp_install(newlwp); - splx(s); + KASSERT(!(newlwp->l_md.md_flags & MDL_FPU_IN_CPU)); +} + +void +fpu_lwp_fork(struct lwp *l1, struct lwp *l2) +{ + struct pcb *pcb2 = lwp_getpcb(l2); + union savefpu *fpu_save; + + /* Kernel threads have no FPU. */ + if (__predict_false(l2->l_flag & LW_SYSTEM)) { + return; + } + /* For init(8). */ + if (__predict_false(l1->l_flag & LW_SYSTEM)) { + memset(&pcb2->pcb_savefpu, 0, x86_fpu_save_size); + return; + } + + fpu_save = fpu_lwp_area(l1); + memcpy(&pcb2->pcb_savefpu, fpu_save, x86_fpu_save_size); + l2->l_md.md_flags &= ~MDL_FPU_IN_CPU; +} + +void +fpu_lwp_abandon(struct lwp *l) +{ + KASSERT(l == curlwp); + kpreempt_disable(); + l->l_md.md_flags &= ~MDL_FPU_IN_CPU; + stts(); + kpreempt_enable(); } /* -------------------------------------------------------------------------- */ @@ -399,11 +440,7 @@ fputrap(struct trapframe *frame) panic("fpu trap from kernel, trapframe %p\n", frame); } - /* - * At this point, fpcurlwp should be curlwp. If it wasn't, the TS bit - * should be set, and we should have gotten a DNA exception. - */ - KASSERT(curcpu()->ci_fpcurlwp == curlwp); + KASSERT(curlwp->l_md.md_flags & MDL_FPU_IN_CPU); if (frame->tf_trapno == T_XMM) { uint32_t mxcsr; @@ -440,104 +477,16 @@ fputrap(struct trapframe *frame) (*curlwp->l_proc->p_emul->e_trapsignal)(curlwp, &ksi); } -/* - * Implement device not available (DNA) exception. Called with interrupts still - * disabled. - */ void fpudna(struct trapframe *frame) { - struct cpu_info *ci = curcpu(); - int s; - - if (!USERMODE(frame->tf_cs)) { - panic("fpudna from kernel, ip %p, trapframe %p\n", - (void *)X86_TF_RIP(frame), frame); - } - - /* Install the LWP's FPU state. */ - s = splhigh(); - fpu_lwp_install(ci->ci_curlwp); - splx(s); + panic("fpudna from %s, ip %p, trapframe %p", + USERMODE(frame->tf_cs) ? "userland" : "kernel", + (void *)X86_TF_RIP(frame), frame); } /* -------------------------------------------------------------------------- */ -/* - * Save current CPU's FPU state. Must be called at IPL_HIGH. - */ -void -fpusave_cpu(bool save) -{ - struct cpu_info *ci; - struct pcb *pcb; - struct lwp *l; - - KASSERT(curcpu()->ci_ilevel == IPL_HIGH); - - ci = curcpu(); - l = ci->ci_fpcurlwp; - if (l == NULL) { - return; - } - pcb = lwp_getpcb(l); - - if (save) { - fpu_area_save(&pcb->pcb_savefpu, x86_xsave_features); - } - - stts(); - pcb->pcb_fpcpu = NULL; - ci->ci_fpcurlwp = NULL; -} - -/* - * Save l's FPU state, which may be on this processor or another processor. - * It may take some time, so we avoid disabling preemption where possible. - * Caller must know that the target LWP is stopped, otherwise this routine - * may race against it. - */ -void -fpusave_lwp(struct lwp *l, bool save) -{ - struct pcb *pcb = lwp_getpcb(l); - struct cpu_info *oci; - int s, spins, ticks; - - spins = 0; - ticks = hardclock_ticks; - for (;;) { - s = splhigh(); - oci = pcb->pcb_fpcpu; - if (oci == NULL) { - splx(s); - break; - } - if (oci == curcpu()) { - KASSERT(oci->ci_fpcurlwp == l); - fpusave_cpu(save); - splx(s); - break; - } - splx(s); -#ifdef XENPV - if (xen_send_ipi(oci, XEN_IPI_SYNCH_FPU) != 0) { - panic("xen_send_ipi(%s, XEN_IPI_SYNCH_FPU) failed.", - cpu_name(oci)); - } -#else - x86_send_ipi(oci, X86_IPI_SYNCH_FPU); -#endif - while (pcb->pcb_fpcpu == oci && ticks == hardclock_ticks) { - x86_pause(); - spins++; - } - if (spins > 100000000) { - panic("fpusave_lwp: did not"); - } - } -} - static inline void fpu_xstate_reload(union savefpu *fpu_save, uint64_t xstate) { @@ -552,7 +501,7 @@ fpu_xstate_reload(union savefpu *fpu_save, uint64_t xstate) void fpu_set_default_cw(struct lwp *l, unsigned int x87_cw) { - union savefpu *fpu_save = lwp_fpuarea(l); + union savefpu *fpu_save = fpu_lwp_area(l); struct pcb *pcb = lwp_getpcb(l); if (i386_use_fxsave) { @@ -571,18 +520,9 @@ fpu_clear(struct lwp *l, unsigned int x87_cw) { union savefpu *fpu_save; struct pcb *pcb; - int s; KASSERT(l == curlwp); - KASSERT((l->l_flag & LW_SYSTEM) == 0); - fpu_save = lwp_fpuarea(l); - pcb = lwp_getpcb(l); - - s = splhigh(); - - KASSERT(pcb->pcb_fpcpu == NULL || pcb->pcb_fpcpu == curcpu()); - fpusave_cpu(false); - KASSERT(pcb->pcb_fpcpu == NULL); + fpu_save = fpu_lwp_area(l); switch (x86_fpu_save) { case FPU_SAVE_FSAVE: @@ -608,16 +548,14 @@ fpu_clear(struct lwp *l, unsigned int x87_cw) break; } + pcb = lwp_getpcb(l); pcb->pcb_fpu_dflt_cw = x87_cw; - - fpu_lwp_install(l); - splx(s); } void fpu_sigreset(struct lwp *l) { - union savefpu *fpu_save = lwp_fpuarea(l); + union savefpu *fpu_save = fpu_lwp_area(l); struct pcb *pcb = lwp_getpcb(l); /* @@ -635,17 +573,6 @@ fpu_sigreset(struct lwp *l) } } -void -fpu_save_area_fork(struct pcb *pcb2, const struct pcb *pcb1) -{ - const uint8_t *src = (const uint8_t *)&pcb1->pcb_savefpu; - uint8_t *dst = (uint8_t *)&pcb2->pcb_savefpu; - - memcpy(dst, src, x86_fpu_save_size); - - KASSERT(pcb2->pcb_fpcpu == NULL); -} - /* -------------------------------------------------------------------------- */ static void @@ -769,10 +696,7 @@ process_s87_to_xmm(const struct save87 *s87, struct fxsave *sxmm) void process_write_fpregs_xmm(struct lwp *l, const struct fxsave *fpregs) { - union savefpu *fpu_save; - - fpusave_lwp(l, true); - fpu_save = lwp_fpuarea(l); + union savefpu *fpu_save = fpu_lwp_area(l); if (i386_use_fxsave) { memcpy(&fpu_save->sv_xmm, fpregs, sizeof(fpu_save->sv_xmm)); @@ -792,17 +716,12 @@ process_write_fpregs_xmm(struct lwp *l, const struct fxsave *fpregs) void process_write_fpregs_s87(struct lwp *l, const struct save87 *fpregs) { - union savefpu *fpu_save; + union savefpu *fpu_save = fpu_lwp_area(l); if (i386_use_fxsave) { - /* Save so we don't lose the xmm registers */ - fpusave_lwp(l, true); - fpu_save = lwp_fpuarea(l); process_s87_to_xmm(fpregs, &fpu_save->sv_xmm); fpu_xstate_reload(fpu_save, XCR0_X87 | XCR0_SSE); } else { - fpusave_lwp(l, false); - fpu_save = lwp_fpuarea(l); memcpy(&fpu_save->sv_87, fpregs, sizeof(fpu_save->sv_87)); } } @@ -810,10 +729,7 @@ process_write_fpregs_s87(struct lwp *l, const struct save87 *fpregs) void process_read_fpregs_xmm(struct lwp *l, struct fxsave *fpregs) { - union savefpu *fpu_save; - - fpusave_lwp(l, true); - fpu_save = lwp_fpuarea(l); + union savefpu *fpu_save = fpu_lwp_area(l); if (i386_use_fxsave) { memcpy(fpregs, &fpu_save->sv_xmm, sizeof(fpu_save->sv_xmm)); @@ -826,10 +742,7 @@ process_read_fpregs_xmm(struct lwp *l, struct fxsave *fpregs) void process_read_fpregs_s87(struct lwp *l, struct save87 *fpregs) { - union savefpu *fpu_save; - - fpusave_lwp(l, true); - fpu_save = lwp_fpuarea(l); + union savefpu *fpu_save = fpu_lwp_area(l); if (i386_use_fxsave) { memset(fpregs, 0, sizeof(*fpregs)); @@ -842,10 +755,7 @@ process_read_fpregs_s87(struct lwp *l, struct save87 *fpregs) int process_read_xstate(struct lwp *l, struct xstate *xstate) { - union savefpu *fpu_save; - - fpusave_lwp(l, true); - fpu_save = lwp_fpuarea(l); + union savefpu *fpu_save = fpu_lwp_area(l); if (x86_fpu_save == FPU_SAVE_FSAVE) { /* Convert from legacy FSAVE format. */ @@ -924,10 +834,7 @@ process_verify_xstate(const struct xstate *xstate) int process_write_xstate(struct lwp *l, const struct xstate *xstate) { - union savefpu *fpu_save; - - fpusave_lwp(l, true); - fpu_save = lwp_fpuarea(l); + union savefpu *fpu_save = fpu_lwp_area(l); /* Convert data into legacy FSAVE format. */ if (x86_fpu_save == FPU_SAVE_FSAVE) { diff --git a/sys/arch/x86/x86/ipi.c b/sys/arch/x86/x86/ipi.c index 766e85b185b4..fea2be1608f7 100644 --- a/sys/arch/x86/x86/ipi.c +++ b/sys/arch/x86/x86/ipi.c @@ -1,4 +1,4 @@ -/* $NetBSD: ipi.c,v 1.27 2017/02/08 10:08:26 maxv Exp $ */ +/* $NetBSD: ipi.c,v 1.28 2019/10/12 06:31:04 maxv Exp $ */ /*- * Copyright (c) 2000, 2008, 2009 The NetBSD Foundation, Inc. @@ -32,7 +32,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: ipi.c,v 1.27 2017/02/08 10:08:26 maxv Exp $"); +__KERNEL_RCSID(0, "$NetBSD: ipi.c,v 1.28 2019/10/12 06:31:04 maxv Exp $"); #include "opt_mtrr.h" @@ -176,7 +176,7 @@ static void x86_ipi_synch_fpu(struct cpu_info *ci) { - fpusave_cpu(true); + panic("%s: impossible", __func__); } #ifdef MTRR diff --git a/sys/arch/x86/x86/vm_machdep.c b/sys/arch/x86/x86/vm_machdep.c index d90254f2b465..4ddd79c9aac2 100644 --- a/sys/arch/x86/x86/vm_machdep.c +++ b/sys/arch/x86/x86/vm_machdep.c @@ -1,4 +1,4 @@ -/* $NetBSD: vm_machdep.c,v 1.37 2019/02/11 14:59:33 cherry Exp $ */ +/* $NetBSD: vm_machdep.c,v 1.38 2019/10/12 06:31:04 maxv Exp $ */ /*- * Copyright (c) 1982, 1986 The Regents of the University of California. @@ -80,7 +80,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: vm_machdep.c,v 1.37 2019/02/11 14:59:33 cherry Exp $"); +__KERNEL_RCSID(0, "$NetBSD: vm_machdep.c,v 1.38 2019/10/12 06:31:04 maxv Exp $"); #include "opt_mtrr.h" @@ -139,12 +139,6 @@ cpu_lwp_fork(struct lwp *l1, struct lwp *l2, void *stack, size_t stacksize, pcb1 = lwp_getpcb(l1); pcb2 = lwp_getpcb(l2); - /* - * If parent LWP was using FPU, then we have to save the FPU h/w - * state to PCB so that we can copy it. - */ - fpusave_lwp(l1, true); - /* * Sync the PCB before we copy it. */ @@ -158,11 +152,8 @@ cpu_lwp_fork(struct lwp *l1, struct lwp *l2, void *stack, size_t stacksize, /* Copy the PCB from parent, except the FPU state. */ memcpy(pcb2, pcb1, offsetof(struct pcb, pcb_savefpu)); - /* FPU state not installed. */ - pcb2->pcb_fpcpu = NULL; - - /* Copy FPU state. */ - fpu_save_area_fork(pcb2, pcb1); + /* Fork the FPU state. */ + fpu_lwp_fork(l1, l2); /* Never inherit CPU Debug Registers */ pcb2->pcb_dbregs = NULL; @@ -260,8 +251,8 @@ void cpu_lwp_free(struct lwp *l, int proc) { - /* If we were using the FPU, forget about it. */ - fpusave_lwp(l, false); + /* Abandon the FPU state. */ + fpu_lwp_abandon(l); /* Abandon the dbregs state. */ x86_dbregs_abandon(l); diff --git a/sys/arch/xen/x86/cpu.c b/sys/arch/xen/x86/cpu.c index fcf8ae2a1b84..dc49a82ae824 100644 --- a/sys/arch/xen/x86/cpu.c +++ b/sys/arch/xen/x86/cpu.c @@ -1,4 +1,4 @@ -/* $NetBSD: cpu.c,v 1.129 2019/03/09 08:42:25 maxv Exp $ */ +/* $NetBSD: cpu.c,v 1.130 2019/10/12 06:31:04 maxv Exp $ */ /*- * Copyright (c) 2000 The NetBSD Foundation, Inc. @@ -65,7 +65,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.129 2019/03/09 08:42:25 maxv Exp $"); +__KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.130 2019/10/12 06:31:04 maxv Exp $"); #include "opt_ddb.h" #include "opt_multiprocessor.h" @@ -759,15 +759,14 @@ cpu_debug_dump(void) struct cpu_info *ci; CPU_INFO_ITERATOR cii; - db_printf("addr dev id flags ipis curlwp fpcurlwp\n"); + db_printf("addr dev id flags ipis curlwp\n"); for (CPU_INFO_FOREACH(cii, ci)) { - db_printf("%p %s %ld %x %x %10p %10p\n", + db_printf("%p %s %ld %x %x %10p\n", ci, ci->ci_dev == NULL ? "BOOT" : device_xname(ci->ci_dev), (long)ci->ci_cpuid, ci->ci_flags, ci->ci_ipis, - ci->ci_curlwp, - ci->ci_fpcurlwp); + ci->ci_curlwp); } } #endif /* DDB */ @@ -1066,11 +1065,7 @@ cpu_init_msrs(struct cpu_info *ci, bool full) void cpu_offline_md(void) { - int s; - - s = splhigh(); - fpusave_cpu(true); - splx(s); + return; } void diff --git a/sys/arch/xen/x86/xen_ipi.c b/sys/arch/xen/x86/xen_ipi.c index 3eeb2113e4a4..538332b8dc11 100644 --- a/sys/arch/xen/x86/xen_ipi.c +++ b/sys/arch/xen/x86/xen_ipi.c @@ -1,4 +1,4 @@ -/* $NetBSD: xen_ipi.c,v 1.32 2019/02/02 12:32:55 cherry Exp $ */ +/* $NetBSD: xen_ipi.c,v 1.33 2019/10/12 06:31:04 maxv Exp $ */ /*- * Copyright (c) 2011 The NetBSD Foundation, Inc. @@ -33,10 +33,10 @@ /* * Based on: x86/ipi.c - * __KERNEL_RCSID(0, "$NetBSD: xen_ipi.c,v 1.32 2019/02/02 12:32:55 cherry Exp $"); + * __KERNEL_RCSID(0, "$NetBSD: xen_ipi.c,v 1.33 2019/10/12 06:31:04 maxv Exp $"); */ -__KERNEL_RCSID(0, "$NetBSD: xen_ipi.c,v 1.32 2019/02/02 12:32:55 cherry Exp $"); +__KERNEL_RCSID(0, "$NetBSD: xen_ipi.c,v 1.33 2019/10/12 06:31:04 maxv Exp $"); #include "opt_ddb.h" @@ -237,7 +237,7 @@ xen_ipi_synch_fpu(struct cpu_info *ci, struct intrframe *intrf) KASSERT(ci != NULL); KASSERT(intrf != NULL); - fpusave_cpu(true); + panic("%s: impossible", __func__); } #ifdef DDB diff --git a/sys/dev/nvmm/x86/nvmm_x86_svm.c b/sys/dev/nvmm/x86/nvmm_x86_svm.c index 0c8b9ef84704..efd3cd66255c 100644 --- a/sys/dev/nvmm/x86/nvmm_x86_svm.c +++ b/sys/dev/nvmm/x86/nvmm_x86_svm.c @@ -1,4 +1,4 @@ -/* $NetBSD: nvmm_x86_svm.c,v 1.49 2019/10/04 12:17:05 maxv Exp $ */ +/* $NetBSD: nvmm_x86_svm.c,v 1.50 2019/10/12 06:31:04 maxv Exp $ */ /* * Copyright (c) 2018 The NetBSD Foundation, Inc. @@ -30,7 +30,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: nvmm_x86_svm.c,v 1.49 2019/10/04 12:17:05 maxv Exp $"); +__KERNEL_RCSID(0, "$NetBSD: nvmm_x86_svm.c,v 1.50 2019/10/12 06:31:04 maxv Exp $"); #include #include @@ -541,8 +541,6 @@ struct svm_cpudata { uint64_t sfmask; uint64_t fsbase; uint64_t kernelgsbase; - bool ts_set; - struct xsave_header hfpu __aligned(64); /* Intr state */ bool int_window_exit; @@ -1137,6 +1135,9 @@ svm_exit_xsetbv(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, } cpudata->gxcr0 = val; + if (svm_xcr0_mask != 0) { + wrxcr(0, cpudata->gxcr0); + } svm_inkernel_advance(cpudata->vmcb); return; @@ -1159,9 +1160,7 @@ svm_vcpu_guest_fpu_enter(struct nvmm_cpu *vcpu) { struct svm_cpudata *cpudata = vcpu->cpudata; - cpudata->ts_set = (rcr0() & CR0_TS) != 0; - - fpu_area_save(&cpudata->hfpu, svm_xcr0_mask); + fpu_save(); fpu_area_restore(&cpudata->gfpu, svm_xcr0_mask); if (svm_xcr0_mask != 0) { @@ -1181,11 +1180,6 @@ svm_vcpu_guest_fpu_leave(struct nvmm_cpu *vcpu) } fpu_area_save(&cpudata->gfpu, svm_xcr0_mask); - fpu_area_restore(&cpudata->hfpu, svm_xcr0_mask); - - if (cpudata->ts_set) { - stts(); - } } static void @@ -1327,6 +1321,7 @@ svm_vcpu_run(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, svm_vcpu_guest_dbregs_enter(vcpu); svm_vcpu_guest_misc_enter(vcpu); + svm_vcpu_guest_fpu_enter(vcpu); while (1) { if (cpudata->gtlb_want_flush) { @@ -1342,9 +1337,7 @@ svm_vcpu_run(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, s = splhigh(); machgen = svm_htlb_flush(machdata, cpudata); - svm_vcpu_guest_fpu_enter(vcpu); svm_vmrun(cpudata->vmcb_pa, cpudata->gprs); - svm_vcpu_guest_fpu_leave(vcpu); svm_htlb_flush_ack(cpudata, machgen); splx(s); @@ -1437,6 +1430,7 @@ svm_vcpu_run(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, cpudata->gtsc = rdtsc() + vmcb->ctrl.tsc_offset; + svm_vcpu_guest_fpu_leave(vcpu); svm_vcpu_guest_misc_leave(vcpu); svm_vcpu_guest_dbregs_leave(vcpu); diff --git a/sys/dev/nvmm/x86/nvmm_x86_vmx.c b/sys/dev/nvmm/x86/nvmm_x86_vmx.c index fc7a64a7fb42..d7a4a1eb9e1f 100644 --- a/sys/dev/nvmm/x86/nvmm_x86_vmx.c +++ b/sys/dev/nvmm/x86/nvmm_x86_vmx.c @@ -1,4 +1,4 @@ -/* $NetBSD: nvmm_x86_vmx.c,v 1.38 2019/10/04 12:17:05 maxv Exp $ */ +/* $NetBSD: nvmm_x86_vmx.c,v 1.39 2019/10/12 06:31:04 maxv Exp $ */ /* * Copyright (c) 2018 The NetBSD Foundation, Inc. @@ -30,7 +30,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: nvmm_x86_vmx.c,v 1.38 2019/10/04 12:17:05 maxv Exp $"); +__KERNEL_RCSID(0, "$NetBSD: nvmm_x86_vmx.c,v 1.39 2019/10/12 06:31:04 maxv Exp $"); #include #include @@ -733,8 +733,6 @@ struct vmx_cpudata { uint64_t cstar; uint64_t sfmask; uint64_t kernelgsbase; - bool ts_set; - struct xsave_header hfpu __aligned(64); /* Intr state */ bool int_window_exit; @@ -1659,6 +1657,9 @@ vmx_exit_xsetbv(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, } cpudata->gxcr0 = val; + if (vmx_xcr0_mask != 0) { + wrxcr(0, cpudata->gxcr0); + } vmx_inkernel_advance(); return; @@ -1703,9 +1704,7 @@ vmx_vcpu_guest_fpu_enter(struct nvmm_cpu *vcpu) { struct vmx_cpudata *cpudata = vcpu->cpudata; - cpudata->ts_set = (rcr0() & CR0_TS) != 0; - - fpu_area_save(&cpudata->hfpu, vmx_xcr0_mask); + fpu_save(); fpu_area_restore(&cpudata->gfpu, vmx_xcr0_mask); if (vmx_xcr0_mask != 0) { @@ -1725,11 +1724,6 @@ vmx_vcpu_guest_fpu_leave(struct nvmm_cpu *vcpu) } fpu_area_save(&cpudata->gfpu, vmx_xcr0_mask); - fpu_area_restore(&cpudata->hfpu, vmx_xcr0_mask); - - if (cpudata->ts_set) { - stts(); - } } static void @@ -1911,6 +1905,7 @@ vmx_vcpu_run(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, vmx_vcpu_guest_dbregs_enter(vcpu); vmx_vcpu_guest_misc_enter(vcpu); + vmx_vcpu_guest_fpu_enter(vcpu); while (1) { if (cpudata->gtlb_want_flush) { @@ -1927,7 +1922,6 @@ vmx_vcpu_run(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, s = splhigh(); machgen = vmx_htlb_flush(machdata, cpudata); - vmx_vcpu_guest_fpu_enter(vcpu); lcr2(cpudata->gcr2); if (launched) { ret = vmx_vmresume(cpudata->gprs); @@ -1935,7 +1929,6 @@ vmx_vcpu_run(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, ret = vmx_vmlaunch(cpudata->gprs); } cpudata->gcr2 = rcr2(); - vmx_vcpu_guest_fpu_leave(vcpu); vmx_htlb_flush_ack(cpudata, machgen); splx(s); @@ -2039,6 +2032,7 @@ vmx_vcpu_run(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, cpudata->gtsc = vmx_vmread(VMCS_TSC_OFFSET) + rdtsc(); + vmx_vcpu_guest_fpu_leave(vcpu); vmx_vcpu_guest_misc_leave(vcpu); vmx_vcpu_guest_dbregs_leave(vcpu); diff --git a/sys/sys/param.h b/sys/sys/param.h index bd237d22d675..a38582c637c5 100644 --- a/sys/sys/param.h +++ b/sys/sys/param.h @@ -1,4 +1,4 @@ -/* $NetBSD: param.h,v 1.616 2019/09/30 21:18:00 kamil Exp $ */ +/* $NetBSD: param.h,v 1.617 2019/10/12 06:31:04 maxv Exp $ */ /*- * Copyright (c) 1982, 1986, 1989, 1993 @@ -67,7 +67,7 @@ * 2.99.9 (299000900) */ -#define __NetBSD_Version__ 999001500 /* NetBSD 9.99.15 */ +#define __NetBSD_Version__ 999001600 /* NetBSD 9.99.16 */ #define __NetBSD_Prereq__(M,m,p) (((((M) * 100000000) + \ (m) * 1000000) + (p) * 100) <= __NetBSD_Version__)