From d749a2d0b42b6cef94a2717c25d0dbb6d9d79f2a Mon Sep 17 00:00:00 2001 From: yamt Date: Fri, 20 Feb 2004 17:35:01 +0000 Subject: [PATCH] defer pmap switching until it's really needed to avoid frequent loading of cr3 register, which involves tlb flush. with some fixes/improvements from Stephan Uphoff and Bang Jun-Young. --- sys/arch/i386/i386/cpu.c | 10 +- sys/arch/i386/i386/genassym.cf | 5 +- sys/arch/i386/i386/locore.S | 71 +++++-- sys/arch/i386/i386/mach_sigcode.S | 10 +- sys/arch/i386/i386/pmap.c | 311 ++++++++++++++++++++++++++---- sys/arch/i386/i386/spl.S | 20 +- sys/arch/i386/i386/svr4_sigcode.S | 10 +- sys/arch/i386/i386/trap.c | 18 +- sys/arch/i386/i386/vector.S | 34 ++-- sys/arch/i386/include/cpu.h | 10 +- sys/arch/i386/include/frameasm.h | 11 +- sys/arch/i386/include/pcb.h | 3 +- sys/arch/i386/include/pmap.h | 4 +- 13 files changed, 424 insertions(+), 93 deletions(-) diff --git a/sys/arch/i386/i386/cpu.c b/sys/arch/i386/i386/cpu.c index 5b7cf8e27531..21ba73bab86d 100644 --- a/sys/arch/i386/i386/cpu.c +++ b/sys/arch/i386/i386/cpu.c @@ -1,4 +1,4 @@ -/* $NetBSD: cpu.c,v 1.17 2004/02/13 11:36:13 wiz Exp $ */ +/* $NetBSD: cpu.c,v 1.18 2004/02/20 17:35:01 yamt Exp $ */ /*- * Copyright (c) 2000 The NetBSD Foundation, Inc. @@ -71,7 +71,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.17 2004/02/13 11:36:13 wiz Exp $"); +__KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.18 2004/02/20 17:35:01 yamt Exp $"); #include "opt_ddb.h" #include "opt_multiprocessor.h" @@ -314,10 +314,12 @@ cpu_attach(parent, self, aux) kstack + USPACE - 16 - sizeof (struct trapframe); pcb->pcb_tss.tss_esp = kstack + USPACE - 16 - sizeof (struct trapframe); - pcb->pcb_pmap = pmap_kernel(); pcb->pcb_cr0 = rcr0(); - pcb->pcb_cr3 = pcb->pcb_pmap->pm_pdirpa; + pcb->pcb_cr3 = pmap_kernel()->pm_pdirpa; #endif + pmap_reference(pmap_kernel()); + ci->ci_pmap = pmap_kernel(); + ci->ci_tlbstate = TLBSTATE_STALE; /* further PCB init done later. */ diff --git a/sys/arch/i386/i386/genassym.cf b/sys/arch/i386/i386/genassym.cf index aea58d4667e9..d2ee0a907858 100644 --- a/sys/arch/i386/i386/genassym.cf +++ b/sys/arch/i386/i386/genassym.cf @@ -1,4 +1,4 @@ -# $NetBSD: genassym.cf,v 1.39 2003/11/04 10:33:15 dsl Exp $ +# $NetBSD: genassym.cf,v 1.40 2004/02/20 17:35:01 yamt Exp $ # # Copyright (c) 1998 The NetBSD Foundation, Inc. @@ -258,6 +258,9 @@ endif define CPU_INFO_SELF offsetof(struct cpu_info, ci_self) define CPU_INFO_RESCHED offsetof(struct cpu_info, ci_want_resched) +define CPU_INFO_WANT_PMAPLOAD offsetof(struct cpu_info, ci_want_pmapload) +define CPU_INFO_TLBSTATE offsetof(struct cpu_info, ci_tlbstate) +define TLBSTATE_VALID TLBSTATE_VALID define CPU_INFO_CURLWP offsetof(struct cpu_info, ci_curlwp) define CPU_INFO_CURPCB offsetof(struct cpu_info, ci_curpcb) define CPU_INFO_IDLE_PCB offsetof(struct cpu_info, ci_idle_pcb) diff --git a/sys/arch/i386/i386/locore.S b/sys/arch/i386/i386/locore.S index 9820830500da..da08c1f67bf3 100644 --- a/sys/arch/i386/i386/locore.S +++ b/sys/arch/i386/i386/locore.S @@ -1,4 +1,4 @@ -/* $NetBSD: locore.S,v 1.23 2004/02/16 17:11:27 wiz Exp $ */ +/* $NetBSD: locore.S,v 1.24 2004/02/20 17:35:01 yamt Exp $ */ /*- * Copyright (c) 1998, 2000 The NetBSD Foundation, Inc. @@ -125,7 +125,7 @@ #define GET_CURPCB(reg) movl CPUVAR(CURPCB),reg #define SET_CURPCB(reg) movl reg,CPUVAR(CURPCB) - + #define CLEAR_RESCHED(reg) movl reg,CPUVAR(RESCHED) /* XXX temporary kluge; these should not be here */ @@ -701,6 +701,7 @@ NENTRY(proc_trampoline) pushl %ebx call *%esi addl $4,%esp + DO_DEFERRED_SWITCH(%eax) INTRFASTEXIT /* NOTREACHED */ @@ -778,7 +779,7 @@ ENTRY(kcopy) pushl %edi GET_CURPCB(%eax) # load curpcb into eax and set on-fault pushl PCB_ONFAULT(%eax) - movl $_C_LABEL(copy_fault), PCB_ONFAULT(%eax) + movl $_C_LABEL(kcopy_fault), PCB_ONFAULT(%eax) movl 16(%esp),%esi movl 20(%esp),%edi @@ -871,6 +872,7 @@ _C_LABEL(copyin_func): */ /* LINTSTUB: Func: int copyout(const void *kaddr, void *uaddr, size_t len) */ ENTRY(copyout) + DO_DEFERRED_SWITCH(%eax) jmp *_C_LABEL(copyout_func) #if defined(I386_CPU) @@ -1012,6 +1014,7 @@ ENTRY(i486_copyout) */ /* LINTSTUB: Func: int copyin(const void *uaddr, void *kaddr, size_t len) */ ENTRY(copyin) + DO_DEFERRED_SWITCH(%eax) jmp *_C_LABEL(copyin_func) #if defined(I386_CPU) || defined(I486_CPU) || defined(I586_CPU) || \ @@ -1062,6 +1065,19 @@ ENTRY(i386_copyin) NENTRY(copy_efault) movl $EFAULT,%eax +/* + * kcopy_fault is used by kcopy and copy_fault is used by copyin/out. + * + * they're distinguished for lazy pmap switching. see trap(). + */ +/* LINTSTUB: Ignore */ +NENTRY(kcopy_fault) + GET_CURPCB(%edx) + popl PCB_ONFAULT(%edx) + popl %edi + popl %esi + ret + /* LINTSTUB: Ignore */ NENTRY(copy_fault) GET_CURPCB(%edx) @@ -1083,6 +1099,8 @@ ENTRY(copyoutstr) pushl %esi pushl %edi + DO_DEFERRED_SWITCH(%eax) + movl 12(%esp),%esi # esi = from movl 16(%esp),%edi # edi = to movl 20(%esp),%edx # edx = maxlen @@ -1200,6 +1218,9 @@ ENTRY(copyoutstr) ENTRY(copyinstr) pushl %esi pushl %edi + + DO_DEFERRED_SWITCH(%eax) + GET_CURPCB(%ecx) movl $_C_LABEL(copystr_fault),PCB_ONFAULT(%ecx) @@ -1311,6 +1332,7 @@ ENTRY(copystr) */ /* LINTSTUB: Func: long fuword(const void *base) */ ENTRY(fuword) + DO_DEFERRED_SWITCH(%eax) movl 4(%esp),%edx cmpl $VM_MAXUSER_ADDRESS-4,%edx ja _C_LABEL(fusuaddrfault) @@ -1327,6 +1349,7 @@ ENTRY(fuword) */ /* LINTSTUB: Func: int fusword(const void *base) */ ENTRY(fusword) + DO_DEFERRED_SWITCH(%eax) movl 4(%esp),%edx cmpl $VM_MAXUSER_ADDRESS-2,%edx ja _C_LABEL(fusuaddrfault) @@ -1344,6 +1367,8 @@ ENTRY(fusword) */ /* LINTSTUB: Func: int fuswintr(const void *base) */ ENTRY(fuswintr) + cmpl $TLBSTATE_VALID, CPUVAR(TLBSTATE) + jnz _C_LABEL(fusuaddrfault) movl 4(%esp),%edx cmpl $VM_MAXUSER_ADDRESS-2,%edx ja _C_LABEL(fusuaddrfault) @@ -1361,6 +1386,7 @@ ENTRY(fuswintr) */ /* LINTSTUB: Func: int fubyte(const void *base) */ ENTRY(fubyte) + DO_DEFERRED_SWITCH(%eax) movl 4(%esp),%edx cmpl $VM_MAXUSER_ADDRESS-1,%edx ja _C_LABEL(fusuaddrfault) @@ -1405,6 +1431,7 @@ NENTRY(fusuaddrfault) */ /* LINTSTUB: Func: int suword(void *base, long c) */ ENTRY(suword) + DO_DEFERRED_SWITCH(%eax) movl 4(%esp),%edx cmpl $VM_MAXUSER_ADDRESS-4,%edx ja _C_LABEL(fusuaddrfault) @@ -1452,6 +1479,7 @@ ENTRY(suword) */ /* LINTSTUB: Func: int susword(void *base, short c) */ ENTRY(susword) + DO_DEFERRED_SWITCH(%eax) movl 4(%esp),%edx cmpl $VM_MAXUSER_ADDRESS-2,%edx ja _C_LABEL(fusuaddrfault) @@ -1500,6 +1528,8 @@ ENTRY(susword) */ /* LINTSTUB: Func: int suswintr(void *base, short c) */ ENTRY(suswintr) + cmpl $TLBSTATE_VALID, CPUVAR(TLBSTATE) + jnz _C_LABEL(fusuaddrfault) movl 4(%esp),%edx cmpl $VM_MAXUSER_ADDRESS-2,%edx ja _C_LABEL(fusuaddrfault) @@ -1537,6 +1567,7 @@ ENTRY(suswintr) */ /* LINTSTUB: Func: int subyte(void *base, int c) */ ENTRY(subyte) + DO_DEFERRED_SWITCH(%eax) movl 4(%esp),%edx cmpl $VM_MAXUSER_ADDRESS-1,%edx ja _C_LABEL(fusuaddrfault) @@ -1722,7 +1753,7 @@ ENTRY(cpu_switch) */ pushl %esi - call _C_LABEL(pmap_deactivate) # pmap_deactivate(oldproc) + call _C_LABEL(pmap_deactivate2) # pmap_deactivate(oldproc) addl $4,%esp movl L_ADDR(%esi),%esi @@ -1749,11 +1780,6 @@ ENTRY(cpu_switch) movl PCB_ESP(%edi),%esp movl PCB_EBP(%edi),%ebp - - /* Switch address space. */ - movl PCB_CR3(%edi),%ecx - movl %ecx,%cr3 - /* Switch TSS. Reset "task busy" flag before loading. */ #ifdef MULTIPROCESSOR movl CPUVAR(GDT),%eax @@ -1872,7 +1898,7 @@ switch_resume: */ pushl %esi - call _C_LABEL(pmap_deactivate) # pmap_deactivate(oldproc) + call _C_LABEL(pmap_deactivate2) # pmap_deactivate(oldproc) addl $4,%esp movl L_ADDR(%esi),%esi @@ -2066,10 +2092,6 @@ ENTRY(cpu_exit) movl _C_LABEL(gdt),%eax #endif - /* Switch address space. */ - movl PCB_CR3(%esi),%ecx - movl %ecx,%cr3 - /* Switch TSS. */ andl $~0x0200,4-SEL_KPL(%eax,%edx,1) ltr %dx @@ -2134,6 +2156,12 @@ syscall1: INTRENTRY #ifdef DIAGNOSTIC + cmpl $0, CPUVAR(WANT_PMAPLOAD) + jz 1f + pushl $6f + call _C_LABEL(printf) + addl $4, %esp +1: movl CPUVAR(ILEVEL),%ebx testl %ebx,%ebx jz 1f @@ -2151,7 +2179,8 @@ syscall1: pushl %esp call *P_MD_SYSCALL(%edx) # get pointer to syscall() function addl $4,%esp -2: /* Check for ASTs on exit to user mode. */ +syscall_checkast: + /* Check for ASTs on exit to user mode. */ cli CHECK_ASTPENDING(%eax) je 1f @@ -2162,11 +2191,13 @@ syscall1: pushl %esp call _C_LABEL(trap) addl $4,%esp - jmp 2b + jmp syscall_checkast /* re-check ASTs */ +1: CHECK_DEFERRED_SWITCH(%eax) + jnz 9f #ifndef DIAGNOSTIC -1: INTRFASTEXIT + INTRFASTEXIT #else /* DIAGNOSTIC */ -1: cmpl $IPL_NONE,CPUVAR(ILEVEL) + cmpl $IPL_NONE,CPUVAR(ILEVEL) jne 3f INTRFASTEXIT 3: sti @@ -2180,7 +2211,11 @@ syscall1: jmp 2b 4: .asciz "WARNING: SPL NOT LOWERED ON SYSCALL EXIT\n" 5: .asciz "WARNING: SPL NOT ZERO ON SYSCALL ENTRY\n" +6: .asciz "WARNING: WANT PMAPLOAD ON SYSCALL ENTRY\n" #endif /* DIAGNOSTIC */ +9: sti + call _C_LABEL(pmap_load) + jmp syscall_checkast /* re-check ASTs */ #if NNPX > 0 /* diff --git a/sys/arch/i386/i386/mach_sigcode.S b/sys/arch/i386/i386/mach_sigcode.S index bea82da6ebc0..7c6763418e9c 100644 --- a/sys/arch/i386/i386/mach_sigcode.S +++ b/sys/arch/i386/i386/mach_sigcode.S @@ -1,4 +1,4 @@ -/* $NetBSD: mach_sigcode.S,v 1.5 2003/08/20 21:48:37 fvdl Exp $ */ +/* $NetBSD: mach_sigcode.S,v 1.6 2004/02/20 17:35:01 yamt Exp $ */ /*- * Copyright (c) 1998 The NetBSD Foundation, Inc. @@ -125,4 +125,10 @@ IDTVEC(mach_trap) call _C_LABEL(trap) addl $4,%esp jmp 2b -1: INTRFASTEXIT +1: CHECK_DEFERRED_SWITCH(%eax) + jnz 9f + INTRFASTEXIT +9: sti + call _C_LABEL(pmap_load) + cli + jmp 2b diff --git a/sys/arch/i386/i386/pmap.c b/sys/arch/i386/i386/pmap.c index b96f0e6280b6..302b1d34ad05 100644 --- a/sys/arch/i386/i386/pmap.c +++ b/sys/arch/i386/i386/pmap.c @@ -1,4 +1,4 @@ -/* $NetBSD: pmap.c,v 1.170 2004/02/13 11:36:14 wiz Exp $ */ +/* $NetBSD: pmap.c,v 1.171 2004/02/20 17:35:01 yamt Exp $ */ /* * @@ -60,7 +60,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.170 2004/02/13 11:36:14 wiz Exp $"); +__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.171 2004/02/20 17:35:01 yamt Exp $"); #include "opt_cputype.h" #include "opt_user_ldt.h" @@ -501,6 +501,8 @@ static void pmap_tmpunmap_pa(void); static void pmap_tmpunmap_pvepte(struct pv_entry *); static void pmap_unmap_ptes(struct pmap *); +static boolean_t pmap_reactivate(struct pmap *); + /* * p m a p i n l i n e h e l p e r f u n c t i o n s */ @@ -514,8 +516,9 @@ __inline static boolean_t pmap_is_curpmap(pmap) struct pmap *pmap; { + return((pmap == pmap_kernel()) || - (pmap->pm_pdirpa == (paddr_t) rcr3())); + (pmap == curcpu()->ci_pmap)); } /* @@ -663,24 +666,33 @@ pmap_map_ptes(pmap) struct pmap *pmap; { pd_entry_t opde; + struct pmap *ourpmap; + struct cpu_info *ci; /* the kernel's pmap is always accessible */ if (pmap == pmap_kernel()) { return(PTE_BASE); } + ci = curcpu(); + if (ci->ci_want_pmapload && + vm_map_pmap(&ci->ci_curlwp->l_proc->p_vmspace->vm_map) == pmap) + pmap_load(); + /* if curpmap then we are always mapped */ if (pmap_is_curpmap(pmap)) { simple_lock(&pmap->pm_obj.vmobjlock); return(PTE_BASE); } + ourpmap = ci->ci_pmap; + /* need to lock both curpmap and pmap: use ordered locking */ - if ((unsigned) pmap < (unsigned) curpcb->pcb_pmap) { + if ((unsigned) pmap < (unsigned) ourpmap) { simple_lock(&pmap->pm_obj.vmobjlock); - simple_lock(&curpcb->pcb_pmap->pm_obj.vmobjlock); + simple_lock(&ourpmap->pm_obj.vmobjlock); } else { - simple_lock(&curpcb->pcb_pmap->pm_obj.vmobjlock); + simple_lock(&ourpmap->pm_obj.vmobjlock); simple_lock(&pmap->pm_obj.vmobjlock); } @@ -690,7 +702,7 @@ pmap_map_ptes(pmap) if (!pmap_valid_entry(opde) || (opde & PG_FRAME) != pmap->pm_pdirpa) { *APDP_PDE = (pd_entry_t) (pmap->pm_pdirpa | PG_RW | PG_V); if (pmap_valid_entry(opde)) - pmap_apte_flush(curpcb->pcb_pmap); + pmap_apte_flush(ourpmap); } return(APTE_BASE); } @@ -703,19 +715,22 @@ __inline static void pmap_unmap_ptes(pmap) struct pmap *pmap; { + if (pmap == pmap_kernel()) { return; } if (pmap_is_curpmap(pmap)) { simple_unlock(&pmap->pm_obj.vmobjlock); } else { + struct pmap *ourpmap = curcpu()->ci_pmap; + #if defined(MULTIPROCESSOR) *APDP_PDE = 0; - pmap_apte_flush(curpcb->pcb_pmap); + pmap_apte_flush(ourpmap); #endif COUNT(apdp_pde_unmap); simple_unlock(&pmap->pm_obj.vmobjlock); - simple_unlock(&curpcb->pcb_pmap->pm_obj.vmobjlock); + simple_unlock(&ourpmap->pm_obj.vmobjlock); } } @@ -952,8 +967,6 @@ pmap_bootstrap(kva_start) * operation of the system. */ - curpcb->pcb_pmap = kpm; /* proc0's pcb */ - /* * Begin to enable global TLB entries if they are supported. * The G bit has no effect until the CR4_PGE bit is set in CR4, @@ -1764,6 +1777,10 @@ pmap_destroy(pmap) struct pmap *pmap; { int refs; +#ifdef DIAGNOSTIC + struct cpu_info *ci; + CPU_INFO_ITERATOR cii; +#endif /* DIAGNOSTIC */ /* * drop reference count @@ -1776,6 +1793,12 @@ pmap_destroy(pmap) return; } +#ifdef DIAGNOSTIC + for (CPU_INFO_FOREACH(cii, ci)) + if (ci->ci_pmap == pmap) + panic("destroying pmap being used"); +#endif /* DIAGNOSTIC */ + /* * reference count is zero, free pmap resources and then free pmap. */ @@ -1904,31 +1927,26 @@ pmap_ldt_cleanup(l) #endif /* USER_LDT */ /* - * pmap_activate: activate a process' pmap (fill in %cr3 and LDT info) + * pmap_activate: activate a process' pmap * * => called from cpu_switch() - * => if proc is the curlwp, then load it into the MMU + * => if lwp is the curlwp, then set ci_want_pmapload so that + * actual MMU context switch will be done by pmap_load() later */ void pmap_activate(l) struct lwp *l; { + struct cpu_info *ci = curcpu(); struct pcb *pcb = &l->l_addr->u_pcb; - struct pmap *pmap = l->l_proc->p_vmspace->vm_map.pmap; + struct pmap *pmap = vm_map_pmap(&l->l_proc->p_vmspace->vm_map); - pcb->pcb_pmap = pmap; pcb->pcb_ldt_sel = pmap->pm_ldt_sel; pcb->pcb_cr3 = pmap->pm_pdirpa; - if (l == curlwp) { - lcr3(pcb->pcb_cr3); - lldt(pcb->pcb_ldt_sel); - - /* - * mark the pmap in use by this processor. - */ - x86_atomic_setbits_l(&pmap->pm_cpus, (1U << cpu_number())); - + if (l == ci->ci_curlwp) { + KASSERT(ci->ci_want_pmapload == 0); + KASSERT(ci->ci_tlbstate != TLBSTATE_VALID); #ifdef KSTACK_CHECK_DR0 /* * setup breakpoint on the top of stack @@ -1938,9 +1956,130 @@ pmap_activate(l) else dr0(KSTACK_LOWEST_ADDR(l), 1, 3, 1); #endif + + /* + * no need to switch to kernel vmspace because + * it's a subset of any vmspace. + */ + + if (pmap == pmap_kernel()) { + ci->ci_want_pmapload = 0; + return; + } + + ci->ci_want_pmapload = 1; } } +/* + * pmap_reactivate: try to regain reference to the pmap. + */ + +static boolean_t +pmap_reactivate(struct pmap *pmap) +{ + struct cpu_info *ci = curcpu(); + u_int32_t cpumask = 1U << ci->ci_cpuid; + int s; + boolean_t result; + u_int32_t oldcpus; + + KASSERT(pmap->pm_pdirpa == rcr3()); + + /* + * if we still have a lazy reference to this pmap, + * we can assume that there was no tlb shootdown + * for this pmap in the meantime. + */ + + s = splipi(); /* protect from tlb shootdown ipis. */ + oldcpus = pmap->pm_cpus; + x86_atomic_setbits_l(&pmap->pm_cpus, cpumask); + if (oldcpus & cpumask) { + KASSERT(ci->ci_tlbstate == TLBSTATE_LAZY); + /* got it */ + result = TRUE; + } else { + KASSERT(ci->ci_tlbstate == TLBSTATE_STALE); + result = FALSE; + } + ci->ci_tlbstate = TLBSTATE_VALID; + splx(s); + + return result; +} + +/* + * pmap_load: actually switch pmap. (fill in %cr3 and LDT info) + */ + +void +pmap_load() +{ + struct cpu_info *ci = curcpu(); + u_int32_t cpumask = 1U << ci->ci_cpuid; + struct pmap *pmap; + struct pmap *oldpmap; + struct lwp *l; + int s; + + KASSERT(ci->ci_want_pmapload); + + l = ci->ci_curlwp; + KASSERT(l != NULL); + pmap = vm_map_pmap(&l->l_proc->p_vmspace->vm_map); + KASSERT(pmap != pmap_kernel()); + oldpmap = ci->ci_pmap; + + KASSERT(pmap->pm_ldt_sel == l->l_addr->u_pcb.pcb_ldt_sel); + lldt(pmap->pm_ldt_sel); + + if (pmap == oldpmap) { + if (!pmap_reactivate(pmap)) { + + /* + * pmap has been changed during deactivated. + * our tlb may be stale. + */ + + tlbflush(); + } + + ci->ci_want_pmapload = 0; + return; + } + + /* + * actually switch pmap. + */ + + x86_atomic_clearbits_l(&oldpmap->pm_cpus, cpumask); + + KASSERT(oldpmap->pm_pdirpa == rcr3()); + KASSERT((pmap->pm_cpus & cpumask) == 0); + + KERNEL_LOCK(LK_EXCLUSIVE | LK_CANRECURSE); + pmap_reference(pmap); + KERNEL_UNLOCK(); + + /* + * mark the pmap in use by this processor. + */ + + s = splipi(); + x86_atomic_setbits_l(&pmap->pm_cpus, cpumask); + ci->ci_pmap = pmap; + ci->ci_tlbstate = TLBSTATE_VALID; + splx(s); + lcr3(pmap->pm_pdirpa); + + ci->ci_want_pmapload = 0; + + KERNEL_LOCK(LK_EXCLUSIVE | LK_CANRECURSE); + pmap_destroy(oldpmap); + KERNEL_UNLOCK(); +} + /* * pmap_deactivate: deactivate a process' pmap */ @@ -1949,12 +2088,49 @@ void pmap_deactivate(l) struct lwp *l; { - struct pmap *pmap = l->l_proc->p_vmspace->vm_map.pmap; - /* - * mark the pmap no longer in use by this processor. - */ - x86_atomic_clearbits_l(&pmap->pm_cpus, (1U << cpu_number())); + if (l == curlwp) + pmap_deactivate2(l); +} + +/* + * pmap_deactivate2: context switch version of pmap_deactivate. + * always treat l as curlwp. + */ + +void +pmap_deactivate2(l) + struct lwp *l; +{ + struct pmap *pmap; + struct cpu_info *ci = curcpu(); + + if (ci->ci_want_pmapload) { + KASSERT(vm_map_pmap(&l->l_proc->p_vmspace->vm_map) + != pmap_kernel()); + KASSERT(vm_map_pmap(&l->l_proc->p_vmspace->vm_map) + != ci->ci_pmap || ci->ci_tlbstate != TLBSTATE_VALID); + + /* + * userspace has not been touched. + * nothing to do here. + */ + + ci->ci_want_pmapload = 0; + return; + } + + pmap = vm_map_pmap(&l->l_proc->p_vmspace->vm_map); + + if (pmap == pmap_kernel()) { + return; + } + + KASSERT(pmap->pm_pdirpa == rcr3()); + KASSERT(ci->ci_pmap == pmap); + + KASSERT(ci->ci_tlbstate == TLBSTATE_VALID); + ci->ci_tlbstate = TLBSTATE_LAZY; } /* @@ -2380,6 +2556,8 @@ pmap_do_remove(pmap, sva, eva, flags) struct vm_page *ptp; int32_t cpumask = 0; TAILQ_HEAD(, vm_page) empty_ptps; + struct cpu_info *ci; + struct pmap *curpmap; /* * we lock in the pmap => pv_head direction @@ -2388,8 +2566,12 @@ pmap_do_remove(pmap, sva, eva, flags) TAILQ_INIT(&empty_ptps); PMAP_MAP_TO_HEAD_LOCK(); + ptes = pmap_map_ptes(pmap); /* locks pmap */ + ci = curcpu(); + curpmap = ci->ci_pmap; + /* * removing one page? take shortcut function. */ @@ -2438,7 +2620,7 @@ pmap_do_remove(pmap, sva, eva, flags) * here if we're using APTE space. */ #endif - pmap_tlb_shootdown(curpcb->pcb_pmap, + pmap_tlb_shootdown(curpmap, ((vaddr_t)ptes) + ptp->offset, opte, &cpumask); #if defined(MULTIPROCESSOR) @@ -2446,8 +2628,7 @@ pmap_do_remove(pmap, sva, eva, flags) * Always shoot down the pmap's self-mapping * of the PTP. * XXXthorpej Redundant shootdown can happen - * here if pmap == curpcb->pcb_pmap (not APTE - * space). + * here if pmap == curpmap (not APTE space). */ pmap_tlb_shootdown(pmap, ((vaddr_t)PTE_BASE) + ptp->offset, opte, @@ -2537,14 +2718,14 @@ pmap_do_remove(pmap, sva, eva, flags) * if we're using APTE space. */ #endif - pmap_tlb_shootdown(curpcb->pcb_pmap, + pmap_tlb_shootdown(curpmap, ((vaddr_t)ptes) + ptp->offset, opte, &cpumask); #if defined(MULTIPROCESSOR) /* * Always shoot down the pmap's self-mapping * of the PTP. * XXXthorpej Redundant shootdown can happen here - * if pmap == curpcb->pcb_pmap (not APTE space). + * if pmap == curpmap (not APTE space). */ pmap_tlb_shootdown(pmap, ((vaddr_t)PTE_BASE) + ptp->offset, opte, &cpumask); @@ -2585,6 +2766,8 @@ pmap_page_remove(pg) int32_t cpumask = 0; TAILQ_HEAD(, vm_page) empty_ptps; struct vm_page *ptp; + struct cpu_info *ci; + struct pmap *curpmap; #ifdef DIAGNOSTIC int bank, off; @@ -2604,6 +2787,9 @@ pmap_page_remove(pg) /* set pv_head => pmap locking */ PMAP_HEAD_TO_MAP_LOCK(); + ci = curcpu(); + curpmap = ci->ci_pmap; + /* XXX: needed if we hold head->map lock? */ simple_lock(&pvh->pvh_lock); @@ -2657,7 +2843,7 @@ pmap_page_remove(pg) opte = x86_atomic_testset_ul( &pve->pv_pmap->pm_pdir[pdei(pve->pv_va)], 0); - pmap_tlb_shootdown(curpcb->pcb_pmap, + pmap_tlb_shootdown(curpmap, ((vaddr_t)ptes) + pve->pv_ptp->offset, opte, &cpumask); #if defined(MULTIPROCESSOR) @@ -3427,8 +3613,9 @@ pmap_dump(pmap, sva, eva) void pmap_tlb_shootnow(int32_t cpumask) { + struct cpu_info *self; #ifdef MULTIPROCESSOR - struct cpu_info *ci, *self; + struct cpu_info *ci; CPU_INFO_ITERATOR cii; int s; #ifdef DIAGNOSTIC @@ -3439,13 +3626,13 @@ pmap_tlb_shootnow(int32_t cpumask) if (cpumask == 0) return; -#ifdef MULTIPROCESSOR self = curcpu(); +#ifdef MULTIPROCESSOR s = splipi(); self->ci_tlb_ipi_mask = cpumask; #endif - pmap_do_tlb_shootdown(0); /* do *our* work. */ + pmap_do_tlb_shootdown(self); /* do *our* work. */ #ifdef MULTIPROCESSOR splx(s); @@ -3584,6 +3771,40 @@ pmap_tlb_shootdown(pmap, va, pte, cpumaskp) splx(s); } +/* + * pmap_do_tlb_shootdown_checktlbstate: check and update ci_tlbstate. + * + * => called at splipi. + * => return TRUE if we need to maintain user tlbs. + */ +static __inline boolean_t +pmap_do_tlb_shootdown_checktlbstate(struct cpu_info *ci) +{ + + KASSERT(ci == curcpu()); + + if (ci->ci_tlbstate == TLBSTATE_LAZY) { + KASSERT(ci->ci_pmap != pmap_kernel()); + /* + * mostly KASSERT(ci->ci_pmap->pm_cpus & (1U << ci->ci_cpuid)); + */ + + /* + * we no longer want tlb shootdown ipis for this pmap. + * mark the pmap no longer in use by this processor. + */ + + x86_atomic_clearbits_l(&ci->ci_pmap->pm_cpus, + 1U << ci->ci_cpuid); + ci->ci_tlbstate = TLBSTATE_STALE; + } + + if (ci->ci_tlbstate == TLBSTATE_STALE) + return FALSE; + + return TRUE; +} + /* * pmap_do_tlb_shootdown: * @@ -3592,7 +3813,7 @@ pmap_tlb_shootdown(pmap, va, pte, cpumaskp) void pmap_do_tlb_shootdown(struct cpu_info *self) { - u_long cpu_id = cpu_number(); + u_long cpu_id = self->ci_cpuid; struct pmap_tlb_shootdown_q *pq = &pmap_tlb_shootdown_q[cpu_id]; struct pmap_tlb_shootdown_job *pj; int s; @@ -3600,6 +3821,7 @@ pmap_do_tlb_shootdown(struct cpu_info *self) struct cpu_info *ci; CPU_INFO_ITERATOR cii; #endif + KASSERT(self == curcpu()); s = splipi(); @@ -3607,6 +3829,7 @@ pmap_do_tlb_shootdown(struct cpu_info *self) if (pq->pq_flushg) { COUNT(flushg); + pmap_do_tlb_shootdown_checktlbstate(self); tlbflushg(); pq->pq_flushg = 0; pq->pq_flushu = 0; @@ -3618,14 +3841,20 @@ pmap_do_tlb_shootdown(struct cpu_info *self) */ if (pq->pq_flushu) { COUNT(flushu); + pmap_do_tlb_shootdown_checktlbstate(self); tlbflush(); } while ((pj = TAILQ_FIRST(&pq->pq_head)) != NULL) { TAILQ_REMOVE(&pq->pq_head, pj, pj_list); - if ((!pq->pq_flushu && pmap_is_curpmap(pj->pj_pmap)) || - (pj->pj_pte & pmap_pg_g)) + if ((pj->pj_pte & pmap_pg_g) || + pj->pj_pmap == pmap_kernel()) { pmap_update_pg(pj->pj_va); + } else if (!pq->pq_flushu && + pj->pj_pmap == self->ci_pmap) { + if (pmap_do_tlb_shootdown_checktlbstate(self)) + pmap_update_pg(pj->pj_va); + } pmap_tlb_shootdown_job_put(pq, pj); } diff --git a/sys/arch/i386/i386/spl.S b/sys/arch/i386/i386/spl.S index a1280074a712..0e8783c70f1b 100644 --- a/sys/arch/i386/i386/spl.S +++ b/sys/arch/i386/i386/spl.S @@ -1,4 +1,4 @@ -/* $NetBSD: spl.S,v 1.7 2003/08/20 21:48:41 fvdl Exp $ */ +/* $NetBSD: spl.S,v 1.8 2004/02/20 17:35:01 yamt Exp $ */ /* * Copyright (c) 1998 The NetBSD Foundation, Inc. @@ -150,13 +150,17 @@ IDTVEC(doreti) jmp *IS_RESUME(%eax) 2: /* Check for ASTs on exit to user mode. */ movl %ebx,CPUVAR(ILEVEL) -5: CHECK_ASTPENDING(%eax) - je 3f +5: testb $SEL_RPL,TF_CS(%esp) + jnz doreti_checkast #ifdef VM86 - jnz 4f testl $PSL_VM,TF_EFLAGS(%esp) + jz 6f +#else + jmp 6f #endif +doreti_checkast: + CHECK_ASTPENDING(%eax) jz 3f 4: CLEAR_ASTPENDING(%eax) sti @@ -168,4 +172,12 @@ IDTVEC(doreti) cli jmp 5b 3: + CHECK_DEFERRED_SWITCH(%eax) + jnz 9f +6: INTRFASTEXIT +9: + sti + call _C_LABEL(pmap_load) + cli + jmp doreti_checkast /* recheck ASTs */ diff --git a/sys/arch/i386/i386/svr4_sigcode.S b/sys/arch/i386/i386/svr4_sigcode.S index 459167d6ba44..314a9205aa61 100644 --- a/sys/arch/i386/i386/svr4_sigcode.S +++ b/sys/arch/i386/i386/svr4_sigcode.S @@ -1,4 +1,4 @@ -/* $NetBSD: svr4_sigcode.S,v 1.6 2003/08/20 21:48:42 fvdl Exp $ */ +/* $NetBSD: svr4_sigcode.S,v 1.7 2004/02/20 17:35:01 yamt Exp $ */ /*- * Copyright (c) 1998 The NetBSD Foundation, Inc. @@ -119,4 +119,10 @@ IDTVEC(svr4_fasttrap) call _C_LABEL(trap) addl $4,%esp jmp 2b -1: INTRFASTEXIT +1: CHECK_DEFERRED_SWITCH(%eax) + jnz 9f + INTRFASTEXIT +9: sti + call _C_LABEL(pmap_load) + cli + jmp 2b diff --git a/sys/arch/i386/i386/trap.c b/sys/arch/i386/i386/trap.c index ae026efb4b71..3a7d44f69c35 100644 --- a/sys/arch/i386/i386/trap.c +++ b/sys/arch/i386/i386/trap.c @@ -1,4 +1,4 @@ -/* $NetBSD: trap.c,v 1.195 2004/02/19 17:02:44 drochner Exp $ */ +/* $NetBSD: trap.c,v 1.196 2004/02/20 17:35:01 yamt Exp $ */ /*- * Copyright (c) 1998, 2000 The NetBSD Foundation, Inc. @@ -75,7 +75,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: trap.c,v 1.195 2004/02/19 17:02:44 drochner Exp $"); +__KERNEL_RCSID(0, "$NetBSD: trap.c,v 1.196 2004/02/20 17:35:01 yamt Exp $"); #include "opt_ddb.h" #include "opt_kgdb.h" @@ -216,7 +216,7 @@ trap(frame) struct proc *p = l ? l->l_proc : 0; int type = frame->tf_trapno; struct pcb *pcb; - extern char fusubail[], + extern char fusubail[], kcopy_fault[], resume_iret[], resume_pop_ds[], resume_pop_es[], resume_pop_fs[], resume_pop_gs[], IDTVEC(osyscall)[]; @@ -616,6 +616,18 @@ copyfault: if (type == T_PAGEFLT) { KERNEL_UNLOCK(); + + /* + * we need to switch pmap now if we're in + * the middle of copyin/out. + * + * but we don't need to do so for kcopy as + * it never touch userspace. + */ + + if (onfault != kcopy_fault && + curcpu()->ci_want_pmapload) + pmap_load(); return; } l->l_flag &= ~L_SA_PAGEFAULT; diff --git a/sys/arch/i386/i386/vector.S b/sys/arch/i386/i386/vector.S index ec54b80f9038..590fd6107553 100644 --- a/sys/arch/i386/i386/vector.S +++ b/sys/arch/i386/i386/vector.S @@ -1,4 +1,4 @@ -/* $NetBSD: vector.S,v 1.11 2003/12/12 20:17:53 nathanw Exp $ */ +/* $NetBSD: vector.S,v 1.12 2004/02/20 17:35:01 yamt Exp $ */ /* * Copyright 2002 (c) Wasabi Systems, Inc. @@ -860,27 +860,32 @@ calltrap: pushl %esp call _C_LABEL(trap) addl $4,%esp -2: /* Check for ASTs on exit to user mode. */ + testb $SEL_RPL,TF_CS(%esp) + jnz alltraps_checkast +#ifdef VM86 + testl $PSL_VM,TF_EFLAGS(%esp) + jz 6f +#else + jmp 6f +#endif +alltraps_checkast: + /* Check for ASTs on exit to user mode. */ cli CHECK_ASTPENDING(%eax) - je 1f - testb $SEL_RPL,TF_CS(%esp) -#ifdef VM86 - jnz 5f - testl $PSL_VM,TF_EFLAGS(%esp) -#endif - jz 1f + jz 3f 5: CLEAR_ASTPENDING(%eax) sti movl $T_ASTFLT,TF_TRAPNO(%esp) pushl %esp call _C_LABEL(trap) addl $4,%esp - jmp 2b + jmp alltraps_checkast /* re-check ASTs */ +3: CHECK_DEFERRED_SWITCH(%eax) + jnz 9f #ifndef DIAGNOSTIC -1: INTRFASTEXIT +6: INTRFASTEXIT #else -1: cmpl CPUVAR(ILEVEL),%ebx +6: cmpl CPUVAR(ILEVEL),%ebx jne 3f INTRFASTEXIT 3: sti @@ -891,9 +896,12 @@ calltrap: int $3 #endif /* DDB */ movl %ebx,CPUVAR(ILEVEL) - jmp 2b + jmp alltraps_checkast /* re-check ASTs */ 4: .asciz "WARNING: SPL NOT LOWERED ON TRAP EXIT\n" #endif /* DIAGNOSTIC */ +9: sti + call _C_LABEL(pmap_load) + jmp alltraps_checkast /* re-check ASTs */ #ifdef IPKDB /* LINTSTUB: Ignore */ diff --git a/sys/arch/i386/include/cpu.h b/sys/arch/i386/include/cpu.h index 9de88d878ef9..597362bb86d5 100644 --- a/sys/arch/i386/include/cpu.h +++ b/sys/arch/i386/include/cpu.h @@ -1,4 +1,4 @@ -/* $NetBSD: cpu.h,v 1.112 2004/01/04 11:44:52 jdolecek Exp $ */ +/* $NetBSD: cpu.h,v 1.113 2004/02/20 17:35:01 yamt Exp $ */ /*- * Copyright (c) 1990 The Regents of the University of California. @@ -61,6 +61,7 @@ #include /* offsetof */ struct intrsource; +struct pmap; /* * a bunch of this belongs in cpuvar.h; move it later.. @@ -92,6 +93,13 @@ struct cpu_info { volatile u_int32_t ci_tlb_ipi_mask; + struct pmap *ci_pmap; /* current pmap */ + int ci_want_pmapload; /* pmap_load() is needed */ + int ci_tlbstate; /* one of TLBSTATE_ states. see below */ +#define TLBSTATE_VALID 0 /* all user tlbs are valid */ +#define TLBSTATE_LAZY 1 /* tlbs are valid but won't be kept uptodate */ +#define TLBSTATE_STALE 2 /* we might have stale user tlbs */ + struct pcb *ci_curpcb; /* VA of current HW PCB */ struct pcb *ci_idle_pcb; /* VA of current PCB */ int ci_idle_tss_sel; /* TSS selector of idle PCB */ diff --git a/sys/arch/i386/include/frameasm.h b/sys/arch/i386/include/frameasm.h index 39cda4b5186f..6659037bbc5c 100644 --- a/sys/arch/i386/include/frameasm.h +++ b/sys/arch/i386/include/frameasm.h @@ -1,4 +1,4 @@ -/* $NetBSD: frameasm.h,v 1.3 2003/10/04 05:57:51 junyoung Exp $ */ +/* $NetBSD: frameasm.h,v 1.4 2004/02/20 17:35:01 yamt Exp $ */ #ifndef _I386_FRAMEASM_H_ #define _I386_FRAMEASM_H_ @@ -80,6 +80,15 @@ addl $(TF_PUSHSIZE+8),%esp ; \ iret +#define DO_DEFERRED_SWITCH(reg) \ + cmpl $0, CPUVAR(WANT_PMAPLOAD) ; \ + jz 1f ; \ + call _C_LABEL(pmap_load) ; \ + 1: + +#define CHECK_DEFERRED_SWITCH(reg) \ + cmpl $0, CPUVAR(WANT_PMAPLOAD) + #define CHECK_ASTPENDING(reg) movl CPUVAR(CURLWP),reg ; \ cmpl $0, reg ; \ je 1f ; \ diff --git a/sys/arch/i386/include/pcb.h b/sys/arch/i386/include/pcb.h index 7a463c2b5906..aeedea86a00f 100644 --- a/sys/arch/i386/include/pcb.h +++ b/sys/arch/i386/include/pcb.h @@ -1,4 +1,4 @@ -/* $NetBSD: pcb.h,v 1.36 2003/11/09 05:29:59 tsutsui Exp $ */ +/* $NetBSD: pcb.h,v 1.37 2004/02/20 17:35:01 yamt Exp $ */ /*- * Copyright (c) 1998 The NetBSD Foundation, Inc. @@ -108,7 +108,6 @@ struct pcb { int vm86_eflags; /* virtual eflags for vm86 mode */ int vm86_flagmask; /* flag mask for vm86 mode */ void *vm86_userp; /* XXX performance hack */ - struct pmap *pcb_pmap; /* back pointer to our pmap */ struct cpu_info *pcb_fpcpu; /* cpu holding our fp state. */ u_long pcb_iomap[NIOPORTS/32]; /* I/O bitmap */ }; diff --git a/sys/arch/i386/include/pmap.h b/sys/arch/i386/include/pmap.h index 5573919f2628..7f2173bc18d3 100644 --- a/sys/arch/i386/include/pmap.h +++ b/sys/arch/i386/include/pmap.h @@ -1,4 +1,4 @@ -/* $NetBSD: pmap.h,v 1.78 2003/10/27 13:44:20 junyoung Exp $ */ +/* $NetBSD: pmap.h,v 1.79 2004/02/20 17:35:01 yamt Exp $ */ /* * @@ -342,12 +342,14 @@ void pmap_activate(struct lwp *); void pmap_bootstrap(vaddr_t); boolean_t pmap_clear_attrs(struct vm_page *, int); void pmap_deactivate(struct lwp *); +void pmap_deactivate2(struct lwp *); void pmap_page_remove (struct vm_page *); void pmap_remove(struct pmap *, vaddr_t, vaddr_t); boolean_t pmap_test_attrs(struct vm_page *, int); void pmap_write_protect(struct pmap *, vaddr_t, vaddr_t, vm_prot_t); int pmap_exec_fixup(struct vm_map *, struct trapframe *, struct pcb *); +void pmap_load(void); vaddr_t reserve_dumppages(vaddr_t); /* XXX: not a pmap fn */