From c08e54b26b3fa5d99b894931f8625f6314a78fde Mon Sep 17 00:00:00 2001 From: cl Date: Tue, 16 Sep 2003 13:46:24 +0000 Subject: [PATCH] fix SA/pthread pagefault failure: - prevent BLOCKED upcalls on double page faults and during upcalls - make libpthread handle blocked threads which hold locks - prevent UNBLOCKED upcalls from overtaking their BLOCKED upcall this adds a new syscall sa_unblockyield see also http://mail-index.netbsd.org/tech-kern/2003/09/15/0020.html --- sys/kern/kern_exit.c | 7 +- sys/kern/kern_sa.c | 262 +++++++++++++++++++++++++++++++++++++-- sys/kern/syscalls.master | 7 +- sys/sys/lwp.h | 4 +- sys/sys/savar.h | 6 +- 5 files changed, 267 insertions(+), 19 deletions(-) diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c index 3eb1c8dcace7..8a388b3b6489 100644 --- a/sys/kern/kern_exit.c +++ b/sys/kern/kern_exit.c @@ -1,4 +1,4 @@ -/* $NetBSD: kern_exit.c,v 1.123 2003/09/13 15:32:40 christos Exp $ */ +/* $NetBSD: kern_exit.c,v 1.124 2003/09/16 13:46:24 cl Exp $ */ /*- * Copyright (c) 1998, 1999 The NetBSD Foundation, Inc. @@ -74,7 +74,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: kern_exit.c,v 1.123 2003/09/13 15:32:40 christos Exp $"); +__KERNEL_RCSID(0, "$NetBSD: kern_exit.c,v 1.124 2003/09/16 13:46:24 cl Exp $"); #include "opt_ktrace.h" #include "opt_perfctrs.h" @@ -499,6 +499,9 @@ exit_lwps(struct lwp *l) if(l2->l_flag & L_SA_WANTS_VP) wakeup(l2); + if (l2->l_wchan == &l2->l_upcallstack) + wakeup(&l2->l_upcallstack); + if ((l2->l_stat == LSSLEEP && (l2->l_flag & L_SINTR)) || l2->l_stat == LSSUSPENDED || l2->l_stat == LSSTOP) { SCHED_LOCK(s); diff --git a/sys/kern/kern_sa.c b/sys/kern/kern_sa.c index 7b3c8608a98b..656dd8ca21b9 100644 --- a/sys/kern/kern_sa.c +++ b/sys/kern/kern_sa.c @@ -1,4 +1,4 @@ -/* $NetBSD: kern_sa.c,v 1.23 2003/09/11 01:32:10 cl Exp $ */ +/* $NetBSD: kern_sa.c,v 1.24 2003/09/16 13:46:24 cl Exp $ */ /*- * Copyright (c) 2001 The NetBSD Foundation, Inc. @@ -37,7 +37,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: kern_sa.c,v 1.23 2003/09/11 01:32:10 cl Exp $"); +__KERNEL_RCSID(0, "$NetBSD: kern_sa.c,v 1.24 2003/09/16 13:46:24 cl Exp $"); #include #include @@ -57,6 +57,8 @@ static void sa_vp_donate(struct lwp *); static int sa_newcachelwp(struct lwp *); static struct lwp *sa_vp_repossess(struct lwp *l); +static int sa_pagefault(struct lwp *, ucontext_t *); + void sa_upcall_getstate(struct sadata_upcall *, struct lwp *, struct lwp *); MALLOC_DEFINE(M_SA, "sa", "Scheduler activations"); @@ -156,6 +158,10 @@ sys_sa_register(struct lwp *l, void *v, register_t *retval) sa->sa_stacks = malloc(sizeof(stack_t) * SA_NUMSTACKS, M_SA, M_WAITOK); sa->sa_nstacks = 0; + sa->sa_vp_faultaddr = NULL; + sa->sa_vp_ofaultaddr = NULL; + sa->sa_vp_stacks_low = NULL; + sa->sa_vp_stacks_high = NULL; LIST_INIT(&sa->sa_lwpcache); SIMPLEQ_INIT(&sa->sa_upcalls); p->p_sa = sa; @@ -183,7 +189,8 @@ sys_sa_stacks(struct lwp *l, void *v, register_t *retval) syscallarg(stack_t *) stacks; } */ *uap = v; struct sadata *sa = l->l_proc->p_sa; - int error, count; + struct lwp *l2; + int count, error, f, i; /* We have to be using scheduler activations */ if (sa == NULL) @@ -194,14 +201,35 @@ sys_sa_stacks(struct lwp *l, void *v, register_t *retval) return (EINVAL); count = min(count, SA_NUMSTACKS - sa->sa_nstacks); + SA_LWP_STATE_LOCK(l, f); error = copyin(SCARG(uap, stacks), sa->sa_stacks + sa->sa_nstacks, sizeof(stack_t) * count); + SA_LWP_STATE_UNLOCK(l, f); if (error) return (error); + for (i = sa->sa_nstacks; i < sa->sa_nstacks + count; i++) { + LIST_FOREACH(l2, &l->l_proc->p_lwps, l_sibling) { + if ((l2->l_upcallstack == sa->sa_stacks[i].ss_sp)) { + l2->l_upcallstack = NULL; + wakeup(&l2->l_upcallstack); + } + } + } + if ((sa->sa_nstacks == 0) && (sa->sa_vp_wait_count != 0)) l->l_flag |= L_SA_UPCALL; + if (sa->sa_vp_stacks_low == 0) { + sa->sa_vp_stacks_low = (uintptr_t)sa->sa_stacks[0].ss_sp; + sa->sa_vp_stacks_high = (uintptr_t)sa->sa_stacks[count - 1].ss_sp + + sa->sa_stacks[count - 1].ss_size; + DPRINTFN(11,("sys_sa_stacks(%d.%d): low 0x%llx high 0x%llx\n", + l->l_proc->p_pid, l->l_lid, + (unsigned long long)sa->sa_vp_stacks_low, + (unsigned long long)sa->sa_vp_stacks_high)); + } + sa->sa_nstacks += count; DPRINTFN(9, ("sa_stacks(%d.%d) nstacks + %d = %2d\n", l->l_proc->p_pid, l->l_lid, count, sa->sa_nstacks)); @@ -278,8 +306,11 @@ sys_sa_yield(struct lwp *l, void *v, register_t *retval) { struct proc *p = l->l_proc; - if (p->p_sa == NULL || !(p->p_flag & P_SA)) + if (p->p_sa == NULL || !(p->p_flag & P_SA)) { + DPRINTFN(1,("sys_sa_yield(%d.%d) proc %p not SA (p_sa %p, flag %s)\n", + p->p_pid, l->l_lid, p, p->p_sa, p->p_flag & P_SA ? "T" : "F")); return (EINVAL); + } sa_yield(l); @@ -397,6 +428,127 @@ sa_preempt(struct lwp *l) } +int +sys_sa_unblockyield(struct lwp *l, void *v, register_t *retval) +{ + struct sys_sa_unblockyield_args /* { + syscallarg(int) sa_id; + syscallarg(void *) up_preempted; + syscallarg(stack_t *) up_stack; + } */ *uap = v; + struct sadata *sa = l->l_proc->p_sa; + struct proc *p = l->l_proc; + struct lwp *l2; + int error, f, s; + void *preempted; + + if (sa == NULL) + return (EINVAL); + + if (sa->sa_nstacks == SA_NUMSTACKS) + return (EINVAL); + + SA_LWP_STATE_LOCK(l, f); + error = copyin(SCARG(uap, up_stack), sa->sa_stacks + sa->sa_nstacks, + sizeof(stack_t)); + if (error) { + SA_LWP_STATE_UNLOCK(l, f); + return (error); + } + + if (SCARG(uap, up_preempted) != NULL) { + error = copyin(SCARG(uap, up_preempted), &preempted, + sizeof(void *)); + if (error) { + SA_LWP_STATE_UNLOCK(l, f); + return (error); + } + } else + preempted = (void *)-1; + SA_LWP_STATE_UNLOCK(l, f); + + SCHED_LOCK(s); + LIST_FOREACH(l2, &p->p_lwps, l_sibling) { + if (l2->l_lid == SCARG(uap, sa_id)) { + KDASSERT(l2->l_upcallstack == + sa->sa_stacks[sa->sa_nstacks].ss_sp); + break; + } + } + KDASSERT(l2 != NULL); + + /* + * upcall not interrupted: (*up_preempted == NULL) + * - lwp ready: (wchan == upcallstacks) + * ==> recycle stack, put lwp on vp, + * unsleep lwp, make runnable, recycle upcall lwp (=l) + * - lwp not ready: + * ==> recycle stack, put lwp on vp, recycle upcall lwp (=l) + * + * upcall interrupted: (*up_preempted != NULL || up_preempted == NULL) + * ==> recycle upcall lwp + */ + + if (preempted != NULL) { + DPRINTFN(11,("sys_sa_unblockyield(%d.%d) recycle %d " + "(was %sready) upcall stack %p\n", + p->p_pid, l->l_lid, l2->l_lid, + (l2->l_wchan == &l2->l_upcallstack) ? "" : + "not ", sa->sa_stacks[sa->sa_nstacks].ss_sp)); + + l2->l_upcallstack = (void *)-1; + if (l2->l_wchan == &l2->l_upcallstack) { + unsleep(l2); + if (l2->l_stat == LSSLEEP) { + l2->l_slptime = 0; + l2->l_stat = LSRUN; + l2->l_proc->p_nrlwps++; + if (l2->l_flag & L_INMEM) + setrunqueue(l2); + else + sched_wakeup((caddr_t)&proc0); + } + } + } else { + DPRINTFN(11,("sys_sa_unblockyield(%d.%d) resuming %d " + "(is %sready) upcall stack %p\n", + p->p_pid, l->l_lid, l2->l_lid, + (l2->l_wchan == &l2->l_upcallstack) ? "" : + "not ", sa->sa_stacks[sa->sa_nstacks].ss_sp)); + + sa->sa_vp = l2; + sa->sa_nstacks += 1; + l2->l_flag &= ~L_SA_BLOCKING; + l2->l_upcallstack = NULL; + + if (l2->l_wchan == &l2->l_upcallstack) { + unsleep(l2); + if (l2->l_stat == LSSLEEP) { + l2->l_slptime = 0; + l2->l_stat = LSRUN; + l2->l_proc->p_nrlwps++; + if (l2->l_flag & L_INMEM) + setrunqueue(l2); + else + sched_wakeup((caddr_t)&proc0); + } + } + + p->p_nrlwps--; + sa_putcachelwp(p, l); + mi_switch(l, NULL); + /* mostly NOTREACHED */ + SCHED_ASSERT_UNLOCKED(); + splx(s); + KDASSERT(p->p_flag & P_WEXIT); + lwp_exit(l); + } + + SCHED_UNLOCK(s); + return (0); +} + + /* * Set up the user-level stack and trapframe to do an upcall. * @@ -410,7 +562,7 @@ sa_upcall(struct lwp *l, int type, struct lwp *event, struct lwp *interrupted, struct sadata_upcall *sau; struct sadata *sa = l->l_proc->p_sa; stack_t st; - int f; + int error, f; /* XXX prevent recursive upcalls if we sleep formemory */ SA_LWP_STATE_LOCK(l, f); @@ -428,15 +580,23 @@ sa_upcall(struct lwp *l, int type, struct lwp *event, struct lwp *interrupted, DPRINTFN(9,("sa_upcall(%d.%d) nstacks-- = %2d\n", l->l_proc->p_pid, l->l_lid, sa->sa_nstacks)); - return sa_upcall0(l, type, event, interrupted, argsize, arg, sau, &st); + error = sa_upcall0(l, type, event, interrupted, argsize, arg, sau, &st); + if (error) { + sa->sa_stacks[sa->sa_nstacks++] = st; + sadata_upcall_free(sau); + return (error); + } + + SIMPLEQ_INSERT_TAIL(&sa->sa_upcalls, sau, sau_next); + l->l_flag |= L_SA_UPCALL; + + return (0); } int sa_upcall0(struct lwp *l, int type, struct lwp *event, struct lwp *interrupted, size_t argsize, void *arg, struct sadata_upcall *sau, stack_t *st) { - struct proc *p = l->l_proc; - struct sadata *sa = p->p_sa; KDASSERT((event == NULL) || (event != interrupted)); @@ -453,9 +613,6 @@ sa_upcall0(struct lwp *l, int type, struct lwp *event, struct lwp *interrupted, } else sa_upcall_getstate(sau, event, interrupted); - SIMPLEQ_INSERT_TAIL(&sa->sa_upcalls, sau, sau_next); - l->l_flag |= L_SA_UPCALL; - return (0); } @@ -496,6 +653,38 @@ sa_upcall_getstate(struct sadata_upcall *sau, struct lwp *event, } +static int +sa_pagefault(struct lwp *l, ucontext_t *l_ctx) +{ + struct proc *p; + struct sadata *sa; + vaddr_t usp; + + p = l->l_proc; + sa = p->p_sa; + + KDASSERT(sa->sa_vp == l); + + if (sa->sa_vp_faultaddr == sa->sa_vp_ofaultaddr) { + DPRINTFN(10,("sa_check_upcall(%d.%d) double page fault\n", + p->p_pid, l->l_lid)); + return 1; + } + + usp = (uintptr_t)_UC_MACHINE_SP(l_ctx); + + if ((usp >= sa->sa_vp_stacks_low) && + (usp < sa->sa_vp_stacks_high)) { + DPRINTFN(10,("sa_check_upcall(%d.%d) upcall page fault\n", + p->p_pid, l->l_lid)); + return 1; + } + + sa->sa_vp_ofaultaddr = sa->sa_vp_faultaddr; + return 0; +} + + /* * Called by tsleep(). Block current LWP and switch to another. * @@ -592,7 +781,21 @@ sa_switch(struct lwp *l, int type) goto sa_upcall_failed; } + if (l->l_flag & L_SA_PAGEFAULT && sa_pagefault(l, + &sau->sau_state.captured.e_ctx) != 0) { + sadata_upcall_free(sau); + sa->sa_stacks[sa->sa_nstacks++] = st; + sa_putcachelwp(p, l2); + PRELE(l2); /* Remove the artificial hold-count */ + mi_switch(l, NULL); + return; + } + + SIMPLEQ_INSERT_TAIL(&sa->sa_upcalls, sau, sau_next); + l2->l_flag |= L_SA_UPCALL; + l->l_flag |= L_SA_BLOCKING; + l->l_upcallstack = st.ss_sp; l2->l_priority = l2->l_usrpri; sa->sa_vp = l2; setrunnable(l2); @@ -835,7 +1038,7 @@ sa_upcall_userret(struct lwp *l) stack_t st; void *stack, *ap; ucontext_t u, *up; - int i, nsas, nint, nevents, type, f, sig; + int f, i, nsas, nint, nevents, sig, s, type; p = l->l_proc; sa = p->p_sa; @@ -848,6 +1051,33 @@ sa_upcall_userret(struct lwp *l) DPRINTFN(7,("sa_upcall_userret(%d.%d %x) \n", p->p_pid, l->l_lid, l->l_flag)); + while (l->l_upcallstack != NULL) { + if (l->l_upcallstack == (void *)-1) { + SCHED_LOCK(s); + l->l_flag &= ~(L_SA_UPCALL|L_SA_BLOCKING); + l->l_upcallstack = NULL; + p->p_nrlwps--; + sa_putcachelwp(p, l); + SA_LWP_STATE_UNLOCK(l, f); + KERNEL_PROC_UNLOCK(l); + mi_switch(l, NULL); + /* mostly NOTREACHED */ + SCHED_ASSERT_UNLOCKED(); + splx(s); + KERNEL_PROC_LOCK(l); + KDASSERT(p->p_flag & P_WEXIT); + lwp_exit(l); + } + if ((l->l_flag & L_SA_BLOCKING) == 0) { + l->l_upcallstack = NULL; + break; + } + tsleep((caddr_t) &l->l_upcallstack, PWAIT, + "saunblock", 0); + if (p->p_flag & P_WEXIT) + lwp_exit(l); + } + if (l->l_flag & L_SA_BLOCKING) { /* Invoke an "unblocked" upcall */ struct lwp *l2; @@ -895,6 +1125,8 @@ sa_upcall_userret(struct lwp *l) sigexit(l, SIGABRT); /* NOTREACHED */ } + SIMPLEQ_INSERT_TAIL(&sa->sa_upcalls, sau, sau_next); + l->l_flag |= L_SA_UPCALL; l->l_flag &= ~L_SA_BLOCKING; /* We migth have sneaked past signal handling and userret */ @@ -913,6 +1145,8 @@ sa_upcall_userret(struct lwp *l) SA_LWP_STATE_LOCK(l, f); } + KDASSERT(sa->sa_vp == l); + if (SIMPLEQ_EMPTY(&sa->sa_upcalls)) { l->l_flag &= ~L_SA_UPCALL; @@ -1135,6 +1369,8 @@ sa_vp_repossess(struct lwp *l) SCHED_ASSERT_UNLOCKED(); + DPRINTFN(1,("sa_vp_repossess(%d.%d): want vp\n", + p->p_pid, l->l_lid)); while(sa->sa_vp != l) { tsleep((caddr_t) l, PWAIT, "saprocessor", 0); @@ -1145,6 +1381,8 @@ sa_vp_repossess(struct lwp *l) return 0; } } + DPRINTFN(1,("sa_vp_repossess(%d.%d): on vp\n", + p->p_pid, l->l_lid)); l2 = sa->sa_old_lwp; diff --git a/sys/kern/syscalls.master b/sys/kern/syscalls.master index ec93a8cf605a..2f9c282e37bc 100644 --- a/sys/kern/syscalls.master +++ b/sys/kern/syscalls.master @@ -1,4 +1,4 @@ - $NetBSD: syscalls.master,v 1.129 2003/09/10 16:43:35 christos Exp $ + $NetBSD: syscalls.master,v 1.130 2003/09/16 13:46:25 cl Exp $ ; @(#)syscalls.master 8.2 (Berkeley) 1/13/94 @@ -674,10 +674,11 @@ 333 STD { int sys_sa_setconcurrency(int concurrency); } 334 STD { int sys_sa_yield(void); } 335 STD { int sys_sa_preempt(int sa_id); } +336 STD { int sys_sa_unblockyield(int sa_id, \ + void *up_preempted, stack_t *up_stack); } ; -; Syscalls 336-339 are reserved for other scheduler activation syscalls. +; Syscalls 337-339 are reserved for other scheduler activation syscalls. ; -336 UNIMPL 337 UNIMPL 338 UNIMPL 339 UNIMPL diff --git a/sys/sys/lwp.h b/sys/sys/lwp.h index d1ae1cfe1e21..a55521499fdf 100644 --- a/sys/sys/lwp.h +++ b/sys/sys/lwp.h @@ -1,4 +1,4 @@ -/* $NetBSD: lwp.h,v 1.10 2003/09/13 08:32:18 jdolecek Exp $ */ +/* $NetBSD: lwp.h,v 1.11 2003/09/16 13:46:26 cl Exp $ */ /*- * Copyright (c) 2001 The NetBSD Foundation, Inc. @@ -74,6 +74,7 @@ struct lwp { int l_holdcnt; /* If non-zero, don't swap. */ void *l_ctxlink; /* uc_link {get,set}context */ int l_dupfd; /* Sideways return value from cloning devices XXX */ + void *l_upcallstack; /* Upcall stack used during blocking upcall */ #define l_endzero l_priority @@ -119,6 +120,7 @@ extern struct lwp lwp0; /* LWP for proc0 */ #define L_DETACHED 0x800000 /* Won't be waited for. */ #define L_SA_WANTS_VP 0x1000000 /* SA LWP wants a virtual processor */ #define L_CANCELLED 0x2000000 /* tsleep should not sleep */ +#define L_SA_PAGEFAULT 0x4000000 /* SA LWP in pagefault handler */ /* * Status values. diff --git a/sys/sys/savar.h b/sys/sys/savar.h index 98429e744486..8db1b9b16140 100644 --- a/sys/sys/savar.h +++ b/sys/sys/savar.h @@ -1,4 +1,4 @@ -/* $NetBSD: savar.h,v 1.5 2003/07/17 18:16:59 fvdl Exp $ */ +/* $NetBSD: savar.h,v 1.6 2003/09/16 13:46:26 cl Exp $ */ /*- * Copyright (c) 2001 The NetBSD Foundation, Inc. @@ -80,6 +80,10 @@ struct sadata { struct lwp *sa_woken; /* list of woken lwps */ struct lwp *sa_idle; /* lwp in sawait */ + vaddr_t sa_vp_stacks_low; /* SA upcall stack lowest address */ + vaddr_t sa_vp_stacks_high; /* SA upcall stack highest address */ + vaddr_t sa_vp_faultaddr; /* page fault address */ + vaddr_t sa_vp_ofaultaddr; /* old page fault address */ int sa_concurrency; /* desired concurrency */ LIST_HEAD(, lwp) sa_lwpcache; /* list of avaliable lwps */ int sa_ncached; /* list length */