Improvements to lwp_wait1(), for PR kern/35932:
- Better detect simple cycles of threads calling _lwp_wait and return EDEADLK. Does not handle deeper cycles like t1 -> t2 -> t3 -> t1. - If there are multiple threads in _lwp_wait, then make sure that targeted waits take precedence over waits for any LWP to exit. - When checking for deadlock, also count the number of zombies currently in the process as potentially reapable. Whenever a zombie is murdered, kick all waiters to make them check again for deadlock. - Add more comments. Also, while here: - LOCK_ASSERT -> KASSERT in some places - lwp_free: change boolean arguments to type 'bool'. - proc_free: let lwp_free spin waiting for the last LWP to exit, there's no reason to do it here.
This commit is contained in:
parent
6ae05af027
commit
fed1793605
@ -1,4 +1,4 @@
|
||||
/* $NetBSD: kern_exit.c,v 1.171 2007/03/11 23:40:58 ad Exp $ */
|
||||
/* $NetBSD: kern_exit.c,v 1.172 2007/03/21 18:26:00 ad Exp $ */
|
||||
|
||||
/*-
|
||||
* Copyright (c) 1998, 1999, 2006, 2007 The NetBSD Foundation, Inc.
|
||||
@ -74,7 +74,7 @@
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__KERNEL_RCSID(0, "$NetBSD: kern_exit.c,v 1.171 2007/03/11 23:40:58 ad Exp $");
|
||||
__KERNEL_RCSID(0, "$NetBSD: kern_exit.c,v 1.172 2007/03/21 18:26:00 ad Exp $");
|
||||
|
||||
#include "opt_ktrace.h"
|
||||
#include "opt_perfctrs.h"
|
||||
@ -200,7 +200,7 @@ exit1(struct lwp *l, int rv)
|
||||
|
||||
p = l->l_proc;
|
||||
|
||||
LOCK_ASSERT(mutex_owned(&p->p_smutex));
|
||||
KASSERT(mutex_owned(&p->p_smutex));
|
||||
|
||||
if (__predict_false(p == initproc))
|
||||
panic("init died (signal %d, exit %d)",
|
||||
@ -922,26 +922,8 @@ proc_free(struct proc *p, struct rusage *caller_ru)
|
||||
vp = p->p_textvp;
|
||||
cred = p->p_cred;
|
||||
ru = p->p_ru;
|
||||
|
||||
l = LIST_FIRST(&p->p_lwps);
|
||||
|
||||
#ifdef MULTIPROCESSOR
|
||||
/*
|
||||
* If the last remaining LWP is still on the CPU (unlikely), then
|
||||
* spin until it has switched away. We need to release all locks
|
||||
* to avoid deadlock against interrupt handlers on the target CPU.
|
||||
*/
|
||||
if (l->l_cpu->ci_curlwp == l) {
|
||||
int count;
|
||||
mutex_exit(&proclist_lock);
|
||||
KERNEL_UNLOCK_ALL(l, &count);
|
||||
while (l->l_cpu->ci_curlwp == l)
|
||||
SPINLOCK_BACKOFF_HOOK;
|
||||
KERNEL_LOCK(count, l);
|
||||
mutex_enter(&proclist_lock);
|
||||
}
|
||||
#endif
|
||||
|
||||
mutex_destroy(&p->p_rasmutex);
|
||||
mutex_destroy(&p->p_mutex);
|
||||
mutex_destroy(&p->p_stmutex);
|
||||
@ -984,7 +966,7 @@ proc_free(struct proc *p, struct rusage *caller_ru)
|
||||
/*
|
||||
* Free the last LWP's resources.
|
||||
*/
|
||||
lwp_free(l, 0, 1);
|
||||
lwp_free(l, false, true);
|
||||
|
||||
/*
|
||||
* Collect child u-areas.
|
||||
|
@ -1,4 +1,4 @@
|
||||
/* $NetBSD: kern_lwp.c,v 1.62 2007/03/12 18:18:33 ad Exp $ */
|
||||
/* $NetBSD: kern_lwp.c,v 1.63 2007/03/21 18:26:00 ad Exp $ */
|
||||
|
||||
/*-
|
||||
* Copyright (c) 2001, 2006, 2007 The NetBSD Foundation, Inc.
|
||||
@ -204,7 +204,7 @@
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__KERNEL_RCSID(0, "$NetBSD: kern_lwp.c,v 1.62 2007/03/12 18:18:33 ad Exp $");
|
||||
__KERNEL_RCSID(0, "$NetBSD: kern_lwp.c,v 1.63 2007/03/21 18:26:00 ad Exp $");
|
||||
|
||||
#include "opt_multiprocessor.h"
|
||||
#include "opt_lockdebug.h"
|
||||
@ -262,8 +262,8 @@ lwp_suspend(struct lwp *curl, struct lwp *t)
|
||||
{
|
||||
int error;
|
||||
|
||||
LOCK_ASSERT(mutex_owned(&t->l_proc->p_smutex));
|
||||
LOCK_ASSERT(lwp_locked(t, NULL));
|
||||
KASSERT(mutex_owned(&t->l_proc->p_smutex));
|
||||
KASSERT(lwp_locked(t, NULL));
|
||||
|
||||
KASSERT(curl != t || curl->l_stat == LSONPROC);
|
||||
|
||||
@ -338,8 +338,8 @@ void
|
||||
lwp_continue(struct lwp *l)
|
||||
{
|
||||
|
||||
LOCK_ASSERT(mutex_owned(&l->l_proc->p_smutex));
|
||||
LOCK_ASSERT(lwp_locked(l, NULL));
|
||||
KASSERT(mutex_owned(&l->l_proc->p_smutex));
|
||||
KASSERT(lwp_locked(l, NULL));
|
||||
|
||||
DPRINTF(("lwp_continue of %d.%d (%s), state %d, wchan %p\n",
|
||||
l->l_proc->p_pid, l->l_lid, l->l_proc->p_comm, l->l_stat,
|
||||
@ -374,30 +374,18 @@ lwp_wait1(struct lwp *l, lwpid_t lid, lwpid_t *departed, int flags)
|
||||
struct proc *p = l->l_proc;
|
||||
struct lwp *l2;
|
||||
int nfound, error;
|
||||
lwpid_t curlid;
|
||||
bool exiting;
|
||||
|
||||
DPRINTF(("lwp_wait1: %d.%d waiting for %d.\n",
|
||||
p->p_pid, l->l_lid, lid));
|
||||
|
||||
LOCK_ASSERT(mutex_owned(&p->p_smutex));
|
||||
|
||||
/*
|
||||
* We try to check for deadlock:
|
||||
*
|
||||
* 1) If all other LWPs are waiting for exits or suspended.
|
||||
* 2) If we are trying to wait on ourself.
|
||||
*
|
||||
* XXX we'd like to check for a cycle of waiting LWPs (specific LID
|
||||
* waits, not any-LWP waits) and detect that sort of deadlock, but
|
||||
* we don't have a good place to store the lwp that is being waited
|
||||
* for. wchan is already filled with &p->p_nlwps, and putting the
|
||||
* lwp address in there for deadlock tracing would require exiting
|
||||
* LWPs to call wakeup on both their own address and &p->p_nlwps, to
|
||||
* get threads sleeping on any LWP exiting.
|
||||
*/
|
||||
if (lid == l->l_lid)
|
||||
return EDEADLK;
|
||||
KASSERT(mutex_owned(&p->p_smutex));
|
||||
|
||||
p->p_nlwpwait++;
|
||||
l->l_waitingfor = lid;
|
||||
curlid = l->l_lid;
|
||||
exiting = ((flags & LWPWAIT_EXITCONTROL) != 0);
|
||||
|
||||
for (;;) {
|
||||
/*
|
||||
@ -421,7 +409,7 @@ lwp_wait1(struct lwp *l, lwpid_t lid, lwpid_t *departed, int flags)
|
||||
*/
|
||||
while ((l2 = p->p_zomblwp) != NULL) {
|
||||
p->p_zomblwp = NULL;
|
||||
lwp_free(l2, 0, 0); /* releases proc mutex */
|
||||
lwp_free(l2, false, false);/* releases proc mutex */
|
||||
mutex_enter(&p->p_smutex);
|
||||
}
|
||||
|
||||
@ -431,11 +419,44 @@ lwp_wait1(struct lwp *l, lwpid_t lid, lwpid_t *departed, int flags)
|
||||
* but don't drain them here.
|
||||
*/
|
||||
nfound = 0;
|
||||
error = 0;
|
||||
LIST_FOREACH(l2, &p->p_lwps, l_sibling) {
|
||||
if (l2 == l || (lid != 0 && l2->l_lid != lid))
|
||||
/*
|
||||
* If a specific wait and the target is waiting on
|
||||
* us, then avoid deadlock. This also traps LWPs
|
||||
* that try to wait on themselves.
|
||||
*
|
||||
* Note that this does not handle more complicated
|
||||
* cycles, like: t1 -> t2 -> t3 -> t1. The process
|
||||
* can still be killed so it is not a major problem.
|
||||
*/
|
||||
if (l2->l_lid == lid && l2->l_waitingfor == curlid) {
|
||||
error = EDEADLK;
|
||||
break;
|
||||
}
|
||||
if (l2 == l)
|
||||
continue;
|
||||
if ((l2->l_prflag & LPR_DETACHED) != 0) {
|
||||
nfound += ((flags & LWPWAIT_EXITCONTROL) != 0);
|
||||
nfound += exiting;
|
||||
continue;
|
||||
}
|
||||
if (lid != 0) {
|
||||
if (l2->l_lid != lid)
|
||||
continue;
|
||||
/*
|
||||
* Mark this LWP as the first waiter, if there
|
||||
* is no other.
|
||||
*/
|
||||
if (l2->l_waiter == 0)
|
||||
l2->l_waiter = curlid;
|
||||
} else if (l2->l_waiter != 0) {
|
||||
/*
|
||||
* It already has a waiter - so don't
|
||||
* collect it. If the waiter doesn't
|
||||
* grab it we'll get another chance
|
||||
* later.
|
||||
*/
|
||||
nfound++;
|
||||
continue;
|
||||
}
|
||||
nfound++;
|
||||
@ -444,33 +465,83 @@ lwp_wait1(struct lwp *l, lwpid_t lid, lwpid_t *departed, int flags)
|
||||
if (l2->l_stat != LSZOMB)
|
||||
continue;
|
||||
|
||||
/*
|
||||
* We're no longer waiting. Reset the "first waiter"
|
||||
* pointer on the target, in case it was us.
|
||||
*/
|
||||
l->l_waitingfor = 0;
|
||||
l2->l_waiter = 0;
|
||||
p->p_nlwpwait--;
|
||||
if (departed)
|
||||
*departed = l2->l_lid;
|
||||
lwp_free(l2, 0, 0);
|
||||
|
||||
/* lwp_free() releases the proc lock. */
|
||||
lwp_free(l2, false, false);
|
||||
mutex_enter(&p->p_smutex);
|
||||
p->p_nlwpwait--;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (error != 0)
|
||||
break;
|
||||
if (nfound == 0) {
|
||||
error = ESRCH;
|
||||
break;
|
||||
}
|
||||
if ((flags & LWPWAIT_EXITCONTROL) != 0) {
|
||||
|
||||
/*
|
||||
* The kernel is careful to ensure that it can not deadlock
|
||||
* when exiting - just keep waiting.
|
||||
*/
|
||||
if (exiting) {
|
||||
KASSERT(p->p_nlwps > 1);
|
||||
cv_wait(&p->p_lwpcv, &p->p_smutex);
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* If all other LWPs are waiting for exits or suspends
|
||||
* and the supply of zombies and potential zombies is
|
||||
* exhausted, then we are about to deadlock.
|
||||
*
|
||||
* If the process is exiting (and this LWP is not the one
|
||||
* that is coordinating the exit) then bail out now.
|
||||
*/
|
||||
if ((p->p_sflag & PS_WEXIT) != 0 ||
|
||||
p->p_nrlwps <= p->p_nlwpwait + p->p_ndlwps) {
|
||||
p->p_nrlwps + p->p_nzlwps - p->p_ndlwps <= p->p_nlwpwait) {
|
||||
error = EDEADLK;
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* Sit around and wait for something to happen. We'll be
|
||||
* awoken if any of the conditions examined change: if an
|
||||
* LWP exits, is collected, or is detached.
|
||||
*/
|
||||
if ((error = cv_wait_sig(&p->p_lwpcv, &p->p_smutex)) != 0)
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* We didn't find any LWPs to collect, we may have received a
|
||||
* signal, or some other condition has caused us to bail out.
|
||||
*
|
||||
* If waiting on a specific LWP, clear the waiters marker: some
|
||||
* other LWP may want it. Then, kick all the remaining waiters
|
||||
* so that they can re-check for zombies and for deadlock.
|
||||
*/
|
||||
if (lid != 0) {
|
||||
LIST_FOREACH(l2, &p->p_lwps, l_sibling) {
|
||||
if (l2->l_lid == lid) {
|
||||
if (l2->l_waiter == curlid)
|
||||
l2->l_waiter = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
p->p_nlwpwait--;
|
||||
l->l_waitingfor = 0;
|
||||
cv_broadcast(&p->p_lwpcv);
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
@ -496,7 +567,7 @@ newlwp(struct lwp *l1, struct proc *p2, vaddr_t uaddr, bool inmem,
|
||||
mutex_enter(&p2->p_smutex);
|
||||
if ((isfree = p2->p_zomblwp) != NULL) {
|
||||
p2->p_zomblwp = NULL;
|
||||
lwp_free(isfree, 1, 0); /* releases proc mutex */
|
||||
lwp_free(isfree, true, false);/* releases proc mutex */
|
||||
} else
|
||||
mutex_exit(&p2->p_smutex);
|
||||
}
|
||||
@ -654,7 +725,7 @@ lwp_exit(struct lwp *l)
|
||||
if ((l->l_prflag & LPR_DETACHED) != 0) {
|
||||
while ((l2 = p->p_zomblwp) != NULL) {
|
||||
p->p_zomblwp = NULL;
|
||||
lwp_free(l2, 0, 0); /* releases proc mutex */
|
||||
lwp_free(l2, false, false);/* releases proc mutex */
|
||||
mutex_enter(&p->p_smutex);
|
||||
}
|
||||
p->p_zomblwp = l;
|
||||
@ -723,7 +794,7 @@ lwp_exit2(struct lwp *l)
|
||||
* XXXLWP limits.
|
||||
*/
|
||||
void
|
||||
lwp_free(struct lwp *l, int recycle, int last)
|
||||
lwp_free(struct lwp *l, bool recycle, bool last)
|
||||
{
|
||||
struct proc *p = l->l_proc;
|
||||
ksiginfoq_t kq;
|
||||
@ -743,24 +814,30 @@ lwp_free(struct lwp *l, int recycle, int last)
|
||||
p->p_nzlwps--;
|
||||
if ((l->l_prflag & LPR_DETACHED) != 0)
|
||||
p->p_ndlwps--;
|
||||
|
||||
/*
|
||||
* Have any LWPs sleeping in lwp_wait() recheck for
|
||||
* deadlock.
|
||||
*/
|
||||
cv_broadcast(&p->p_lwpcv);
|
||||
mutex_exit(&p->p_smutex);
|
||||
}
|
||||
|
||||
#ifdef MULTIPROCESSOR
|
||||
/*
|
||||
* In the unlikely event that the LWP is still on the CPU,
|
||||
* then spin until it has switched away. We need to release
|
||||
* all locks to avoid deadlock against interrupt handlers on
|
||||
* the target CPU.
|
||||
*/
|
||||
if (l->l_cpu->ci_curlwp == l) {
|
||||
int count;
|
||||
KERNEL_UNLOCK_ALL(curlwp, &count);
|
||||
while (l->l_cpu->ci_curlwp == l)
|
||||
SPINLOCK_BACKOFF_HOOK;
|
||||
KERNEL_LOCK(count, curlwp);
|
||||
}
|
||||
#endif
|
||||
/*
|
||||
* In the unlikely event that the LWP is still on the CPU,
|
||||
* then spin until it has switched away. We need to release
|
||||
* all locks to avoid deadlock against interrupt handlers on
|
||||
* the target CPU.
|
||||
*/
|
||||
if (l->l_cpu->ci_curlwp == l) {
|
||||
int count;
|
||||
KERNEL_UNLOCK_ALL(curlwp, &count);
|
||||
while (l->l_cpu->ci_curlwp == l)
|
||||
SPINLOCK_BACKOFF_HOOK;
|
||||
KERNEL_LOCK(count, curlwp);
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Destroy the LWP's remaining signal information.
|
||||
@ -812,7 +889,7 @@ proc_representative_lwp(struct proc *p, int *nrlwps, int locking)
|
||||
int cnt;
|
||||
|
||||
if (locking) {
|
||||
LOCK_ASSERT(mutex_owned(&p->p_smutex));
|
||||
KASSERT(mutex_owned(&p->p_smutex));
|
||||
}
|
||||
|
||||
/* Trivial case: only one LWP */
|
||||
@ -913,7 +990,7 @@ lwp_find(struct proc *p, int id)
|
||||
{
|
||||
struct lwp *l;
|
||||
|
||||
LOCK_ASSERT(mutex_owned(&p->p_smutex));
|
||||
KASSERT(mutex_owned(&p->p_smutex));
|
||||
|
||||
LIST_FOREACH(l, &p->p_lwps, l_sibling) {
|
||||
if (l->l_lid == id)
|
||||
@ -1015,7 +1092,7 @@ void
|
||||
lwp_setlock(struct lwp *l, kmutex_t *new)
|
||||
{
|
||||
|
||||
LOCK_ASSERT(mutex_owned(l->l_mutex));
|
||||
KASSERT(mutex_owned(l->l_mutex));
|
||||
|
||||
#if defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
|
||||
mb_write();
|
||||
@ -1034,7 +1111,7 @@ lwp_unlock_to(struct lwp *l, kmutex_t *new)
|
||||
{
|
||||
kmutex_t *old;
|
||||
|
||||
LOCK_ASSERT(mutex_owned(l->l_mutex));
|
||||
KASSERT(mutex_owned(l->l_mutex));
|
||||
|
||||
old = l->l_mutex;
|
||||
#if defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
|
||||
@ -1057,7 +1134,7 @@ lwp_relock(struct lwp *l, kmutex_t *new)
|
||||
kmutex_t *old;
|
||||
#endif
|
||||
|
||||
LOCK_ASSERT(mutex_owned(l->l_mutex));
|
||||
KASSERT(mutex_owned(l->l_mutex));
|
||||
|
||||
#if defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
|
||||
old = l->l_mutex;
|
||||
@ -1168,7 +1245,7 @@ lwp_userret(struct lwp *l)
|
||||
void
|
||||
lwp_need_userret(struct lwp *l)
|
||||
{
|
||||
LOCK_ASSERT(lwp_locked(l, NULL));
|
||||
KASSERT(lwp_locked(l, NULL));
|
||||
|
||||
/*
|
||||
* Since the tests in lwp_userret() are done unlocked, make sure
|
||||
@ -1187,7 +1264,7 @@ void
|
||||
lwp_addref(struct lwp *l)
|
||||
{
|
||||
|
||||
LOCK_ASSERT(mutex_owned(&l->l_proc->p_smutex));
|
||||
KASSERT(mutex_owned(&l->l_proc->p_smutex));
|
||||
KASSERT(l->l_stat != LSZOMB);
|
||||
KASSERT(l->l_refcnt != 0);
|
||||
|
||||
@ -1217,7 +1294,7 @@ lwp_drainrefs(struct lwp *l)
|
||||
{
|
||||
struct proc *p = l->l_proc;
|
||||
|
||||
LOCK_ASSERT(mutex_owned(&p->p_smutex));
|
||||
KASSERT(mutex_owned(&p->p_smutex));
|
||||
KASSERT(l->l_refcnt != 0);
|
||||
|
||||
l->l_refcnt--;
|
||||
|
@ -1,4 +1,4 @@
|
||||
/* $NetBSD: sys_lwp.c,v 1.16 2007/03/20 23:25:17 ad Exp $ */
|
||||
/* $NetBSD: sys_lwp.c,v 1.17 2007/03/21 18:26:00 ad Exp $ */
|
||||
|
||||
/*-
|
||||
* Copyright (c) 2001, 2006, 2007 The NetBSD Foundation, Inc.
|
||||
@ -42,7 +42,7 @@
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__KERNEL_RCSID(0, "$NetBSD: sys_lwp.c,v 1.16 2007/03/20 23:25:17 ad Exp $");
|
||||
__KERNEL_RCSID(0, "$NetBSD: sys_lwp.c,v 1.17 2007/03/21 18:26:00 ad Exp $");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/systm.h>
|
||||
@ -386,17 +386,22 @@ sys__lwp_detach(struct lwp *l, void *v, register_t *retval)
|
||||
p->p_ndlwps++;
|
||||
t->l_prflag |= LPR_DETACHED;
|
||||
if (t->l_stat == LSZOMB) {
|
||||
cv_broadcast(&p->p_lwpcv);
|
||||
lwp_free(t, 0, 0); /* releases proc mutex */
|
||||
/* Releases proc mutex. */
|
||||
lwp_free(t, false, false);
|
||||
return 0;
|
||||
}
|
||||
error = 0;
|
||||
|
||||
/*
|
||||
* Have any LWPs sleeping in lwp_wait() recheck
|
||||
* for deadlock.
|
||||
*/
|
||||
cv_broadcast(&p->p_lwpcv);
|
||||
} else
|
||||
error = EINVAL;
|
||||
} else
|
||||
error = ESRCH;
|
||||
|
||||
cv_broadcast(&p->p_lwpcv);
|
||||
mutex_exit(&p->p_smutex);
|
||||
|
||||
return error;
|
||||
|
@ -1,4 +1,4 @@
|
||||
/* $NetBSD: lwp.h,v 1.56 2007/03/02 15:57:06 skd Exp $ */
|
||||
/* $NetBSD: lwp.h,v 1.57 2007/03/21 18:25:59 ad Exp $ */
|
||||
|
||||
/*-
|
||||
* Copyright (c) 2001, 2006, 2007 The NetBSD Foundation, Inc.
|
||||
@ -105,6 +105,8 @@ struct lwp {
|
||||
void *l_ctxlink; /* p: uc_link {get,set}context */
|
||||
struct proc *l_proc; /* p: parent process */
|
||||
LIST_ENTRY(lwp) l_sibling; /* p: entry on proc's list of LWPs */
|
||||
lwpid_t l_waiter; /* p: first LWP waiting on us */
|
||||
lwpid_t l_waitingfor; /* p: specific LWP we are waiting on */
|
||||
int l_prflag; /* p: process level flags */
|
||||
u_int l_refcnt; /* p: reference count on this LWP */
|
||||
lwpid_t l_lid; /* (: LWP identifier; local to proc */
|
||||
@ -274,7 +276,7 @@ void lwp_update_creds(struct lwp *);
|
||||
struct lwp *lwp_find(struct proc *, int);
|
||||
void lwp_userret(struct lwp *);
|
||||
void lwp_need_userret(struct lwp *);
|
||||
void lwp_free(struct lwp *, int, int);
|
||||
void lwp_free(struct lwp *, bool, bool);
|
||||
void lwp_sys_init(void);
|
||||
|
||||
int lwp_specific_key_create(specificdata_key_t *, specificdata_dtor_t);
|
||||
|
Loading…
Reference in New Issue
Block a user