From ea775f75983cb66e54336542f38cdba42b7b636d Mon Sep 17 00:00:00 2001 From: rmind Date: Thu, 27 Sep 2012 20:43:15 +0000 Subject: [PATCH] exit_lwps, lwp_wait: fix a race condition by re-trying if p_lock was dropped in a case of process exit. Necessary to re-flag all LWPs for exit, as their state might have changed or new LWPs spawned. Should fix PR/46168 and PR/46402. --- sys/kern/kern_exit.c | 39 +++++++++++++-------------------------- sys/kern/kern_lwp.c | 31 ++++++++++++++----------------- sys/kern/sys_lwp.c | 15 +++++---------- sys/sys/lwp.h | 6 ++---- 4 files changed, 34 insertions(+), 57 deletions(-) diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c index b9e73f0396fd..b8f1a6508546 100644 --- a/sys/kern/kern_exit.c +++ b/sys/kern/kern_exit.c @@ -1,4 +1,4 @@ -/* $NetBSD: kern_exit.c,v 1.241 2012/08/05 14:53:25 riastradh Exp $ */ +/* $NetBSD: kern_exit.c,v 1.242 2012/09/27 20:43:15 rmind Exp $ */ /*- * Copyright (c) 1998, 1999, 2006, 2007, 2008 The NetBSD Foundation, Inc. @@ -67,7 +67,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: kern_exit.c,v 1.241 2012/08/05 14:53:25 riastradh Exp $"); +__KERNEL_RCSID(0, "$NetBSD: kern_exit.c,v 1.242 2012/09/27 20:43:15 rmind Exp $"); #include "opt_ktrace.h" #include "opt_perfctrs.h" @@ -599,17 +599,14 @@ exit1(struct lwp *l, int rv) void exit_lwps(struct lwp *l) { - struct proc *p; - struct lwp *l2; - int error; - lwpid_t waited; + proc_t *p = l->l_proc; + lwp_t *l2; int nlocks; KERNEL_UNLOCK_ALL(l, &nlocks); - - p = l->l_proc; - KASSERT(mutex_owned(p->p_lock)); retry: + KASSERT(mutex_owned(p->p_lock)); + /* * Interrupt LWPs in interruptable sleep, unsuspend suspended * LWPs and then wait for everyone else to finish. @@ -623,30 +620,20 @@ retry: l2->l_stat == LSSUSPENDED || l2->l_stat == LSSTOP) { /* setrunnable() will release the lock. */ setrunnable(l2); - DPRINTF(("exit_lwps: Made %d.%d runnable\n", - p->p_pid, l2->l_lid)); continue; } lwp_unlock(l2); } + + /* + * Wait for every LWP to exit. Note: LWPs can get suspended/slept + * behind us or there may even be new LWPs created. Therefore, a + * full retry is required on error. + */ while (p->p_nlwps > 1) { - DPRINTF(("exit_lwps: waiting for %d LWPs (%d zombies)\n", - p->p_nlwps, p->p_nzlwps)); - error = lwp_wait1(l, 0, &waited, LWPWAIT_EXITCONTROL); - if (p->p_nlwps == 1) - break; - if (error == EDEADLK) { - /* - * LWPs can get suspended/slept behind us. - * (eg. sa_setwoken) - * kick them again and retry. - */ + if (lwp_wait(l, 0, NULL, true)) { goto retry; } - if (error) - panic("exit_lwps: lwp_wait1 failed with error %d", - error); - DPRINTF(("exit_lwps: Got LWP %d from lwp_wait1()\n", waited)); } KERNEL_LOCK(nlocks, l); diff --git a/sys/kern/kern_lwp.c b/sys/kern/kern_lwp.c index 611505b99361..48693632d37d 100644 --- a/sys/kern/kern_lwp.c +++ b/sys/kern/kern_lwp.c @@ -1,4 +1,4 @@ -/* $NetBSD: kern_lwp.c,v 1.172 2012/08/30 02:26:02 matt Exp $ */ +/* $NetBSD: kern_lwp.c,v 1.173 2012/09/27 20:43:15 rmind Exp $ */ /*- * Copyright (c) 2001, 2006, 2007, 2008, 2009 The NetBSD Foundation, Inc. @@ -211,7 +211,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: kern_lwp.c,v 1.172 2012/08/30 02:26:02 matt Exp $"); +__KERNEL_RCSID(0, "$NetBSD: kern_lwp.c,v 1.173 2012/09/27 20:43:15 rmind Exp $"); #include "opt_ddb.h" #include "opt_lockdebug.h" @@ -533,22 +533,21 @@ lwp_unstop(struct lwp *l) * Must be called with p->p_lock held. */ int -lwp_wait1(struct lwp *l, lwpid_t lid, lwpid_t *departed, int flags) +lwp_wait(struct lwp *l, lwpid_t lid, lwpid_t *departed, bool exiting) { - struct proc *p = l->l_proc; - struct lwp *l2; - int nfound, error; - lwpid_t curlid; - bool exiting; + const lwpid_t curlid = l->l_lid; + proc_t *p = l->l_proc; + lwp_t *l2; + int error; KASSERT(mutex_owned(p->p_lock)); p->p_nlwpwait++; l->l_waitingfor = lid; - curlid = l->l_lid; - exiting = ((flags & LWPWAIT_EXITCONTROL) != 0); for (;;) { + int nfound; + /* * Avoid a race between exit1() and sigexit(): if the * process is dumping core, then we need to bail out: call @@ -558,10 +557,7 @@ lwp_wait1(struct lwp *l, lwpid_t lid, lwpid_t *departed, int flags) if ((p->p_sflag & PS_WCORE) != 0) { mutex_exit(p->p_lock); lwp_userret(l); -#ifdef DIAGNOSTIC - panic("lwp_wait1"); -#endif - /* NOTREACHED */ + KASSERT(false); } /* @@ -651,13 +647,14 @@ lwp_wait1(struct lwp *l, lwpid_t lid, lwpid_t *departed, int flags) } /* - * The kernel is careful to ensure that it can not deadlock - * when exiting - just keep waiting. + * Note: since the lock will be dropped, need to restart on + * wakeup to run all LWPs again, e.g. there may be new LWPs. */ if (exiting) { KASSERT(p->p_nlwps > 1); cv_wait(&p->p_lwpcv, p->p_lock); - continue; + error = EAGAIN; + break; } /* diff --git a/sys/kern/sys_lwp.c b/sys/kern/sys_lwp.c index 8ac81aa73762..fb41072f48a7 100644 --- a/sys/kern/sys_lwp.c +++ b/sys/kern/sys_lwp.c @@ -1,4 +1,4 @@ -/* $NetBSD: sys_lwp.c,v 1.54 2012/05/21 14:15:19 martin Exp $ */ +/* $NetBSD: sys_lwp.c,v 1.55 2012/09/27 20:43:15 rmind Exp $ */ /*- * Copyright (c) 2001, 2006, 2007, 2008 The NetBSD Foundation, Inc. @@ -35,7 +35,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: sys_lwp.c,v 1.54 2012/05/21 14:15:19 martin Exp $"); +__KERNEL_RCSID(0, "$NetBSD: sys_lwp.c,v 1.55 2012/09/27 20:43:15 rmind Exp $"); #include #include @@ -354,19 +354,14 @@ sys__lwp_wait(struct lwp *l, const struct sys__lwp_wait_args *uap, lwpid_t dep; mutex_enter(p->p_lock); - error = lwp_wait1(l, SCARG(uap, wait_for), &dep, 0); + error = lwp_wait(l, SCARG(uap, wait_for), &dep, false); mutex_exit(p->p_lock); - if (error) - return error; - - if (SCARG(uap, departed)) { + if (!error && SCARG(uap, departed)) { error = copyout(&dep, SCARG(uap, departed), sizeof(dep)); - if (error) - return error; } - return 0; + return error; } int diff --git a/sys/sys/lwp.h b/sys/sys/lwp.h index c1615e8dbf92..77e68501be15 100644 --- a/sys/sys/lwp.h +++ b/sys/sys/lwp.h @@ -1,4 +1,4 @@ -/* $NetBSD: lwp.h,v 1.163 2012/07/22 22:40:18 rmind Exp $ */ +/* $NetBSD: lwp.h,v 1.164 2012/09/27 20:43:15 rmind Exp $ */ /*- * Copyright (c) 2001, 2006, 2007, 2008, 2009, 2010 @@ -315,9 +315,7 @@ void lwp_drainrefs(lwp_t *); bool lwp_alive(lwp_t *); lwp_t *lwp_find_first(proc_t *); -/* Flags for _lwp_wait1 */ -#define LWPWAIT_EXITCONTROL 0x00000001 -int lwp_wait1(lwp_t *, lwpid_t, lwpid_t *, int); +int lwp_wait(lwp_t *, lwpid_t, lwpid_t *, bool); void lwp_continue(lwp_t *); void lwp_unsleep(lwp_t *, bool); void lwp_unstop(lwp_t *);