Overhaul the way LWP IDs are allocated. Instead of each LWP having it's

own LWP ID space, LWP IDs came from the same number space as PIDs.  The
lead LWP of a process gets the PID as its LID.  If a multi-LWP process's
lead LWP exits, the PID persists for the process.

In addition to providing system-wide unique thread IDs, this also lets us
eliminate the per-process LWP radix tree, and some associated locks.

Remove the separate "global thread ID" map added previously; it is no longer
needed to provide this functionality.

Nudged in this direction by ad@ and chs@.
This commit is contained in:
thorpej 2020-04-24 03:22:06 +00:00
parent 127c7b9f04
commit 156895706e
10 changed files with 589 additions and 491 deletions

View File

@ -1,4 +1,4 @@
/* $NetBSD: linux_exec.c,v 1.121 2020/02/15 17:13:55 ad Exp $ */
/* $NetBSD: linux_exec.c,v 1.122 2020/04/24 03:22:06 thorpej Exp $ */
/*-
* Copyright (c) 1994, 1995, 1998, 2000, 2007, 2008, 2020
@ -32,7 +32,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: linux_exec.c,v 1.121 2020/02/15 17:13:55 ad Exp $");
__KERNEL_RCSID(0, "$NetBSD: linux_exec.c,v 1.122 2020/04/24 03:22:06 thorpej Exp $");
#include <sys/param.h>
#include <sys/systm.h>
@ -130,8 +130,6 @@ linux_e_proc_exec(struct proc *p, struct exec_package *epp)
}
KASSERT(p->p_nlwps == 1);
l = LIST_FIRST(&p->p_lwps);
lwp_renumber(l, p->p_pid);
}
void
@ -152,7 +150,6 @@ linux_e_proc_fork(struct proc *p2, struct lwp *l1, int flags)
KASSERT(p2->p_nlwps == 1);
l2 = LIST_FIRST(&p2->p_lwps);
lwp_renumber(l2, p2->p_pid);
led1 = l1->l_emuldata;
led2 = l2->l_emuldata;
led2->led_child_tidptr = led1->led_child_tidptr;

View File

@ -1,4 +1,4 @@
/* $NetBSD: linux_sched.c,v 1.74 2020/04/19 20:31:59 thorpej Exp $ */
/* $NetBSD: linux_sched.c,v 1.75 2020/04/24 03:22:06 thorpej Exp $ */
/*-
* Copyright (c) 1999, 2019 The NetBSD Foundation, Inc.
@ -35,7 +35,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: linux_sched.c,v 1.74 2020/04/19 20:31:59 thorpej Exp $");
__KERNEL_RCSID(0, "$NetBSD: linux_sched.c,v 1.75 2020/04/24 03:22:06 thorpej Exp $");
#include <sys/param.h>
#include <sys/mount.h>
@ -195,7 +195,7 @@ linux_clone_nptl(struct lwp *l, const struct linux_sys_clone_args *uap, register
return ENOMEM;
}
error = lwp_create(l, p, uaddr, LWP_DETACHED | LWP_PIDLID,
error = lwp_create(l, p, uaddr, LWP_DETACHED,
SCARG(uap, stack), 0, child_return, NULL, &l2, l->l_class,
&l->l_sigmask, &l->l_sigstk);
if (__predict_false(error)) {

View File

@ -1,4 +1,4 @@
/* $NetBSD: kern_exec.c,v 1.498 2020/04/21 21:42:47 ad Exp $ */
/* $NetBSD: kern_exec.c,v 1.499 2020/04/24 03:22:06 thorpej Exp $ */
/*-
* Copyright (c) 2008, 2019, 2020 The NetBSD Foundation, Inc.
@ -62,7 +62,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: kern_exec.c,v 1.498 2020/04/21 21:42:47 ad Exp $");
__KERNEL_RCSID(0, "$NetBSD: kern_exec.c,v 1.499 2020/04/24 03:22:06 thorpej Exp $");
#include "opt_exec.h"
#include "opt_execfmt.h"
@ -1148,10 +1148,6 @@ emulexec(struct lwp *l, struct exec_package *epp)
&& p->p_emul != epp->ep_esch->es_emul)
(*p->p_emul->e_proc_exit)(p);
/* This is now LWP 1. Re-number the LWP if needed. */
if (l->l_lid != 1)
lwp_renumber(l, 1);
/*
* Call exec hook. Emulation code may NOT store reference to anything
* from &pack.
@ -2495,10 +2491,18 @@ do_posix_spawn(struct lwp *l1, pid_t *pid_res, bool *child_ok, const char *path,
* Allocate new proc. Borrow proc0 vmspace for it, we will
* replace it with its own before returning to userland
* in the child.
*/
p2 = proc_alloc();
if (p2 == NULL) {
/* We were unable to allocate a process ID. */
error = EAGAIN;
goto error_exit;
}
/*
* This is a point of no return, we will have to go through
* the child proc to properly clean it up past this point.
*/
p2 = proc_alloc();
pid = p2->p_pid;
/*
@ -2533,7 +2537,6 @@ do_posix_spawn(struct lwp *l1, pid_t *pid_res, bool *child_ok, const char *path,
mutex_init(&p2->p_stmutex, MUTEX_DEFAULT, IPL_HIGH);
mutex_init(&p2->p_auxlock, MUTEX_DEFAULT, IPL_NONE);
rw_init(&p2->p_reflock);
rw_init(&p2->p_treelock);
cv_init(&p2->p_waitcv, "wait");
cv_init(&p2->p_lwpcv, "lwpwait");

View File

@ -1,4 +1,4 @@
/* $NetBSD: kern_exit.c,v 1.288 2020/04/19 20:31:59 thorpej Exp $ */
/* $NetBSD: kern_exit.c,v 1.289 2020/04/24 03:22:06 thorpej Exp $ */
/*-
* Copyright (c) 1998, 1999, 2006, 2007, 2008, 2020 The NetBSD Foundation, Inc.
@ -67,7 +67,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: kern_exit.c,v 1.288 2020/04/19 20:31:59 thorpej Exp $");
__KERNEL_RCSID(0, "$NetBSD: kern_exit.c,v 1.289 2020/04/24 03:22:06 thorpej Exp $");
#include "opt_ktrace.h"
#include "opt_dtrace.h"
@ -202,7 +202,6 @@ exit1(struct lwp *l, int exitcode, int signo)
ksiginfo_t ksi;
ksiginfoq_t kq;
int wakeinit;
struct lwp *l2 __diagused;
p = l->l_proc;
@ -560,14 +559,8 @@ exit1(struct lwp *l, int exitcode, int signo)
pcu_discard_all(l);
mutex_enter(p->p_lock);
/* Don't bother with p_treelock as no other LWPs remain. */
l2 = radix_tree_remove_node(&p->p_lwptree, (uint64_t)(l->l_lid - 1));
KASSERT(l2 == l);
KASSERT(radix_tree_empty_tree_p(&p->p_lwptree));
radix_tree_fini_tree(&p->p_lwptree);
/* Free the linux lwp id */
if ((l->l_pflag & LP_PIDLID) != 0 && l->l_lid != p->p_pid)
proc_free_pid(l->l_lid);
/* Free the LWP ID */
proc_free_lwpid(p, l->l_lid);
lwp_drainrefs(l);
lwp_lock(l);
l->l_prflag &= ~LPR_DETACHED;
@ -1269,7 +1262,6 @@ proc_free(struct proc *p, struct wrusage *wru)
cv_destroy(&p->p_waitcv);
cv_destroy(&p->p_lwpcv);
rw_destroy(&p->p_reflock);
rw_destroy(&p->p_treelock);
proc_free_mem(p);
}

View File

@ -1,4 +1,4 @@
/* $NetBSD: kern_fork.c,v 1.222 2020/04/14 22:42:18 kamil Exp $ */
/* $NetBSD: kern_fork.c,v 1.223 2020/04/24 03:22:06 thorpej Exp $ */
/*-
* Copyright (c) 1999, 2001, 2004, 2006, 2007, 2008, 2019
@ -68,7 +68,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: kern_fork.c,v 1.222 2020/04/14 22:42:18 kamil Exp $");
__KERNEL_RCSID(0, "$NetBSD: kern_fork.c,v 1.223 2020/04/24 03:22:06 thorpej Exp $");
#include "opt_ktrace.h"
#include "opt_dtrace.h"
@ -305,14 +305,18 @@ fork1(struct lwp *l1, int flags, int exitsig, void *stack, size_t stacksize,
return ENOMEM;
}
/* Allocate new proc. */
p2 = proc_alloc();
if (p2 == NULL) {
/* We were unable to allocate a process ID. */
return EAGAIN;
}
/*
* We are now committed to the fork. From here on, we may
* block on resources, but resource allocation may NOT fail.
*/
/* Allocate new proc. */
p2 = proc_alloc();
/*
* Make a proc table entry for the new process.
* Start by zeroing the section of proc that is zero-initialized,
@ -327,7 +331,6 @@ fork1(struct lwp *l1, int flags, int exitsig, void *stack, size_t stacksize,
LIST_INIT(&p2->p_lwps);
LIST_INIT(&p2->p_sigwaiters);
radix_tree_init_tree(&p2->p_lwptree);
/*
* Duplicate sub-structures as needed.
@ -354,7 +357,6 @@ fork1(struct lwp *l1, int flags, int exitsig, void *stack, size_t stacksize,
mutex_init(&p2->p_stmutex, MUTEX_DEFAULT, IPL_HIGH);
mutex_init(&p2->p_auxlock, MUTEX_DEFAULT, IPL_NONE);
rw_init(&p2->p_reflock);
rw_init(&p2->p_treelock);
cv_init(&p2->p_waitcv, "wait");
cv_init(&p2->p_lwpcv, "lwpwait");

View File

@ -1,4 +1,4 @@
/* $NetBSD: kern_lwp.c,v 1.234 2020/04/19 23:05:04 ad Exp $ */
/* $NetBSD: kern_lwp.c,v 1.235 2020/04/24 03:22:06 thorpej Exp $ */
/*-
* Copyright (c) 2001, 2006, 2007, 2008, 2009, 2019, 2020
@ -83,6 +83,16 @@
* The LP_RUNNING flag in lwp::l_pflag indicates that an LWP is running.
* Importantly, it indicates that its state is tied to a CPU.
*
* LSLARVAL:
*
* Born, but not fully mature: the LWP is in the process
* of being constructed. This state exists so that the
* LWP can occupy a slot in the PID table, but without
* having to worry about being touched; lookups of the
* LWP will fail while in this state. The LWP will become
* visible in the PID table once its state transitions
* to LSIDL.
*
* LSZOMB:
*
* Dead or dying: the LWP has released most of its resources
@ -120,6 +130,8 @@
*
* LWPs may transition states in the following ways:
*
* LARVAL ----> IDL
*
* RUN -------> ONPROC ONPROC -----> RUN
* > SLEEP
* > STOPPED
@ -211,7 +223,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: kern_lwp.c,v 1.234 2020/04/19 23:05:04 ad Exp $");
__KERNEL_RCSID(0, "$NetBSD: kern_lwp.c,v 1.235 2020/04/24 03:22:06 thorpej Exp $");
#include "opt_ddb.h"
#include "opt_lockdebug.h"
@ -245,7 +257,6 @@ __KERNEL_RCSID(0, "$NetBSD: kern_lwp.c,v 1.234 2020/04/19 23:05:04 ad Exp $");
#include <sys/psref.h>
#include <sys/msan.h>
#include <sys/kcov.h>
#include <sys/thmap.h>
#include <sys/cprng.h>
#include <uvm/uvm_extern.h>
@ -258,59 +269,27 @@ struct lwplist alllwp __cacheline_aligned;
* Lookups by global thread ID operate outside of the normal LWP
* locking protocol.
*
* We are using a thmap, which internally can perform lookups lock-free.
* However, we still need to serialize lookups against LWP exit. We
* achieve this as follows:
*
* => Assignment of TID is performed lazily by the LWP itself, when it
* is first requested. Insertion into the thmap is done completely
* lock-free (other than the internal locking performed by thmap itself).
* Once the TID is published in the map, the l___tid field in the LWP
* is protected by p_lock.
*
* => When we look up an LWP in the thmap, we take lwp_threadid_lock as
* => When we look up an LWP in the table, we take lwp_threadid_lock as
* a READER. While still holding the lock, we add a reference to
* the LWP (using atomics). After adding the reference, we drop the
* lwp_threadid_lock. We now take p_lock and check the state of the
* LWP. If the LWP is draining its references or if the l___tid field
* has been invalidated, we drop the reference we took and return NULL.
* Otherwise, the lookup has succeeded and the LWP is returned with a
* reference count that the caller is responsible for dropping.
*
* => When a LWP is exiting it releases its TID. While holding the
* p_lock, the entry is deleted from the thmap and the l___tid field
* invalidated. Once the field is invalidated, p_lock is released.
* It is done in this sequence because the l___tid field is used as
* the lookup key storage in the thmap in order to conserve memory.
* Even if a lookup races with this process and succeeds only to have
* the TID invalidated, it's OK because it also results in a reference
* that will be drained later.
*
* => Deleting a node also requires GC of now-unused thmap nodes. The
* serialization point between stage_gc and gc is performed by simply
* taking the lwp_threadid_lock as a WRITER and immediately releasing
* it. By doing this, we know that any busy readers will have drained.
* LWP. If the LWP is draining its references, we drop the reference
* we took and return NULL. Otherwise, the lookup has succeeded and
* the LWP is returned with a reference count that the caller is
* responsible for dropping.
*
* => When a LWP is exiting, it also drains off any references being
* held by others. However, the reference in the lookup path is taken
* outside the normal locking protocol. There needs to be additional
* serialization so that EITHER lwp_drainrefs() sees the incremented
* reference count so that it knows to wait, OR lwp_getref_tid() sees
* reference count so that it knows to wait, OR lwp_getref_lwpid() sees
* that the LWP is waiting to drain and thus drops the reference
* immediately. This is achieved by taking lwp_threadid_lock as a
* WRITER when setting LPR_DRAINING. Note the locking order:
*
* p_lock -> lwp_threadid_lock
*
* Note that this scheme could easily use pserialize(9) in place of the
* lwp_threadid_lock rwlock lock. However, this would require placing a
* pserialize_perform() call in the LWP exit path, which is arguably more
* expensive than briefly taking a global lock that should be relatively
* uncontended. This issue can be revisited if the rwlock proves to be
* a performance problem.
*/
static krwlock_t lwp_threadid_lock __cacheline_aligned;
static thmap_t * lwp_threadid_map __read_mostly;
static void lwp_dtor(void *, void *);
@ -330,7 +309,7 @@ struct lwp lwp0 __aligned(MIN_LWP_ALIGNMENT) = {
.l_md = LWP0_MD_INITIALIZER,
#endif
.l_proc = &proc0,
.l_lid = 1,
.l_lid = 0, /* we own proc0's slot in the pid table */
.l_flag = LW_SYSTEM,
.l_stat = LSONPROC,
.l_ts = &turnstile0,
@ -407,7 +386,6 @@ lwp0_init(void)
struct lwp *l = &lwp0;
KASSERT((void *)uvm_lwp_getuarea(l) != NULL);
KASSERT(l->l_lid == proc0.p_nlwpid);
LIST_INSERT_HEAD(&alllwp, l, l_list);
@ -646,8 +624,7 @@ lwp_wait(struct lwp *l, lwpid_t lid, lwpid_t *departed, bool exiting)
* it's not detached.
*/
if (lid != 0) {
l2 = radix_tree_lookup_node(&p->p_lwptree,
(uint64_t)(lid - 1));
l2 = proc_find_lwp(p, lid);
if (l2 == NULL) {
error = ESRCH;
break;
@ -767,8 +744,7 @@ lwp_wait(struct lwp *l, lwpid_t lid, lwpid_t *departed, bool exiting)
* so that they can re-check for zombies and for deadlock.
*/
if (lid != 0) {
l2 = radix_tree_lookup_node(&p->p_lwptree,
(uint64_t)(lid - 1));
l2 = proc_find_lwp(p, lid);
KASSERT(l2 == NULL || l2->l_lid == lid);
if (l2 != NULL && l2->l_waiter == curlid)
@ -781,43 +757,6 @@ lwp_wait(struct lwp *l, lwpid_t lid, lwpid_t *departed, bool exiting)
return error;
}
/*
* Find an unused LID for a new LWP.
*/
static lwpid_t
lwp_find_free_lid(struct proc *p)
{
struct lwp *gang[32];
lwpid_t lid;
unsigned n;
KASSERT(mutex_owned(p->p_lock));
KASSERT(p->p_nlwpid > 0);
/*
* Scoot forward through the tree in blocks of LIDs doing gang
* lookup with dense=true, meaning the lookup will terminate the
* instant a hole is encountered. Most of the time the first entry
* (p->p_nlwpid) is free and the lookup fails fast.
*/
for (lid = p->p_nlwpid;;) {
n = radix_tree_gang_lookup_node(&p->p_lwptree, lid - 1,
(void **)gang, __arraycount(gang), true);
if (n == 0) {
/* Start point was empty. */
break;
}
KASSERT(gang[0]->l_lid == lid);
lid = gang[n - 1]->l_lid + 1;
if (n < __arraycount(gang)) {
/* Scan encountered a hole. */
break;
}
}
return (lwpid_t)lid;
}
/*
* Create a new LWP within process 'p2', using LWP 'l1' as a template.
* The new LWP is created in state LSIDL and must be set running,
@ -831,7 +770,6 @@ lwp_create(lwp_t *l1, proc_t *p2, vaddr_t uaddr, int flags,
{
struct lwp *l2;
turnstile_t *ts;
lwpid_t lid;
KASSERT(l1 == curlwp || l1->l_proc == &proc0);
@ -876,15 +814,33 @@ lwp_create(lwp_t *l1, proc_t *p2, vaddr_t uaddr, int flags,
mutex_exit(p2->p_lock);
l2 = pool_cache_get(lwp_cache, PR_WAITOK);
memset(l2, 0, sizeof(*l2));
l2->l_ts = pool_cache_get(turnstile_cache, PR_WAITOK);
ts = l2->l_ts = pool_cache_get(turnstile_cache, PR_WAITOK);
SLIST_INIT(&l2->l_pi_lenders);
}
l2->l_stat = LSIDL;
l2->l_stat = LSLARVAL;
l2->l_proc = p2;
l2->l_refcnt = 0;
l2->l_class = sclass;
/*
* Allocate a process ID for this LWP. We need to do this now
* while we can still unwind if it fails. Beacuse we're marked
* as LARVAL, no lookups by the ID will succeed.
*
* N.B. this will always succeed for the first LWP in a process,
* because proc_alloc_lwpid() will usurp the slot. Also note
* that l2->l_proc MUST be valid so that lookups of the proc
* will succeed, even if the LWP itself is not visible.
*/
if (__predict_false(proc_alloc_lwpid(p2, l2) == -1)) {
if (ts != &turnstile0)
pool_cache_put(turnstile_cache, ts);
l2->l_ts = NULL;
pool_cache_put(lwp_cache, l2);
return EAGAIN;
}
/*
* If vfork(), we want the LWP to run fast and on the same CPU
* as its parent, so that it can reuse the VM context and cache
@ -959,55 +915,13 @@ lwp_create(lwp_t *l1, proc_t *p2, vaddr_t uaddr, int flags,
uvm_lwp_setuarea(l2, uaddr);
uvm_lwp_fork(l1, l2, stack, stacksize, func, (arg != NULL) ? arg : l2);
if ((flags & LWP_PIDLID) != 0) {
/* Linux threads: use a PID. */
lid = proc_alloc_pid(p2);
l2->l_pflag |= LP_PIDLID;
} else if (p2->p_nlwps == 0) {
/*
* First LWP in process. Copy the parent's LID to avoid
* causing problems for fork() + threads. Don't give
* subsequent threads the distinction of using LID 1.
*/
lid = l1->l_lid;
p2->p_nlwpid = 2;
} else {
/* Scan the radix tree for a free LID. */
lid = 0;
}
mutex_enter(p2->p_lock);
/*
* Allocate LID if needed, and insert into the radix tree. The
* first LWP in most processes has a LID of 1. It turns out that if
* you insert an item with a key of zero to a radixtree, it's stored
* directly in the root (p_lwptree) and no extra memory is
* allocated. We therefore always subtract 1 from the LID, which
* means no memory is allocated for the tree unless the program is
* using threads. NB: the allocation and insert must take place
* under the same hold of p_lock.
* This renders l2 visible in the pid table once p2->p_lock is
* released.
*/
mutex_enter(p2->p_lock);
for (;;) {
int error;
l2->l_lid = (lid == 0 ? lwp_find_free_lid(p2) : lid);
rw_enter(&p2->p_treelock, RW_WRITER);
error = radix_tree_insert_node(&p2->p_lwptree,
(uint64_t)(l2->l_lid - 1), l2);
rw_exit(&p2->p_treelock);
if (__predict_true(error == 0)) {
if (lid == 0)
p2->p_nlwpid = l2->l_lid + 1;
break;
}
KASSERT(error == ENOMEM);
mutex_exit(p2->p_lock);
radix_tree_await_memory();
mutex_enter(p2->p_lock);
}
l2->l_stat = LSIDL;
if ((flags & LWP_DETACHED) != 0) {
l2->l_prflag = LPR_DETACHED;
@ -1189,8 +1103,8 @@ lwp_exit(struct lwp *l)
/*
* Perform any required thread cleanup. Do this early so
* anyone wanting to look us up by our global thread ID
* will fail to find us.
* anyone wanting to look us up with lwp_getref_lwpid() will
* fail to find us before we become a zombie.
*
* N.B. this will unlock p->p_lock on our behalf.
*/
@ -1238,9 +1152,6 @@ lwp_exit(struct lwp *l)
}
LIST_REMOVE(l, l_list);
if ((l->l_pflag & LP_PIDLID) != 0 && l->l_lid != p->p_pid) {
proc_free_pid(l->l_lid);
}
mutex_exit(proc_lock);
/*
@ -1328,7 +1239,6 @@ lwp_free(struct lwp *l, bool recycle, bool last)
{
struct proc *p = l->l_proc;
struct rusage *ru;
struct lwp *l2 __diagused;
ksiginfoq_t kq;
KASSERT(l != curlwp);
@ -1364,14 +1274,8 @@ lwp_free(struct lwp *l, bool recycle, bool last)
if ((l->l_prflag & LPR_DETACHED) != 0)
p->p_ndlwps--;
/* Make note of the LID being free, and remove from tree. */
if (l->l_lid < p->p_nlwpid)
p->p_nlwpid = l->l_lid;
rw_enter(&p->p_treelock, RW_WRITER);
l2 = radix_tree_remove_node(&p->p_lwptree,
(uint64_t)(l->l_lid - 1));
KASSERT(l2 == l);
rw_exit(&p->p_treelock);
/* Free the LWP ID. */
proc_free_lwpid(p, l->l_lid);
/*
* Have any LWPs sleeping in lwp_wait() recheck for
@ -1550,7 +1454,7 @@ lwp_find(struct proc *p, lwpid_t id)
KASSERT(mutex_owned(p->p_lock));
l = radix_tree_lookup_node(&p->p_lwptree, (uint64_t)(id - 1));
l = proc_find_lwp(p, id);
KASSERT(l == NULL || l->l_lid == id);
/*
@ -1761,13 +1665,15 @@ lwp_need_userret(struct lwp *l)
/*
* Add one reference to an LWP. Interlocked against lwp_drainrefs()
* either by holding the proc's lock or by holding lwp_threadid_lock.
* If callers don't hold the proc's lock, then they must check for a
* larva after acquiring the reference. References can't be added to
* zombies because references have already been drained off before the
* state changes to LSZOMB.
*/
static void
lwp_addref2(struct lwp *l)
{
KASSERT(l->l_stat != LSZOMB);
atomic_inc_uint(&l->l_refcnt);
}
@ -1778,7 +1684,6 @@ lwp_addref2(struct lwp *l)
void
lwp_addref(struct lwp *l)
{
KASSERT(mutex_owned(l->l_proc->p_lock));
lwp_addref2(l);
}
@ -1828,11 +1733,11 @@ lwp_drainrefs(struct lwp *l)
KASSERT(mutex_owned(p->p_lock));
/*
* Lookups in the lwp_threadid_map hold lwp_threadid_lock
* as a reader, increase l_refcnt, release it, and then
* acquire p_lock to check for LPR_DRAINING. By taking
* lwp_threadid_lock as a writer here we ensure that either
* we see the increase in l_refcnt or that they see LPR_DRAINING.
* Lookups by thread ID hold lwp_threadid_lock as a reader,
* increase l_refcnt, release it, and then acquire p_lock to
* check for LPR_DRAINING. By taking lwp_threadid_lock as a
* writer here we ensure that either we see the increase in
* l_refcnt or that they see LPR_DRAINING.
*/
rw_enter(&lwp_threadid_lock, RW_WRITER);
l->l_prflag |= LPR_DRAINING;
@ -2125,131 +2030,10 @@ lwp_setprivate(struct lwp *l, void *ptr)
return error;
}
/*
* Renumber the first and only LWP in a process on exec() or fork().
* Don't bother with p_treelock here as this is the only live LWP in
* the proc right now.
*/
void
lwp_renumber(lwp_t *l, lwpid_t lid)
{
lwp_t *l2 __diagused;
proc_t *p = l->l_proc;
int error;
KASSERT(p->p_nlwps == 1);
while (l->l_lid != lid) {
mutex_enter(p->p_lock);
error = radix_tree_insert_node(&p->p_lwptree, lid - 1, l);
if (error == 0) {
l2 = radix_tree_remove_node(&p->p_lwptree,
(uint64_t)(l->l_lid - 1));
KASSERT(l2 == l);
p->p_nlwpid = lid + 1;
l->l_lid = lid;
}
mutex_exit(p->p_lock);
if (error == 0)
break;
KASSERT(error == ENOMEM);
radix_tree_await_memory();
}
}
#define LWP_TID_MASK 0x3fffffff /* placeholder */
static void
lwp_threadid_init(void)
{
rw_init(&lwp_threadid_lock);
lwp_threadid_map = thmap_create(0, NULL, THMAP_NOCOPY);
}
static void
lwp_threadid_alloc(struct lwp * const l)
{
KASSERT(l == curlwp);
KASSERT(l->l___tid == 0);
for (;;) {
l->l___tid = cprng_fast32() & LWP_TID_MASK;
if (l->l___tid != 0 &&
/*
* There is no need to take the lwp_threadid_lock
* while inserting into the map: internally, the
* map is already concurrency-safe, and the lock
* is only needed to serialize removal with respect
* to lookup.
*/
thmap_put(lwp_threadid_map,
&l->l___tid, sizeof(l->l___tid), l) == l) {
/* claimed! */
return;
}
preempt_point();
}
}
static inline void
lwp_threadid_gc_serialize(void)
{
/*
* By acquiring the lock as a writer, we will know that
* all of the existing readers have drained away and thus
* the GC is safe.
*/
rw_enter(&lwp_threadid_lock, RW_WRITER);
rw_exit(&lwp_threadid_lock);
}
static void
lwp_threadid_free(struct lwp * const l)
{
KASSERT(l == curlwp);
KASSERT(l->l___tid != 0);
/*
* Ensure that anyone who finds this entry in the lock-free lookup
* path sees that the key has been deleted by serialzing with the
* examination of l___tid.
*
* N.B. l___tid field must be zapped *after* deleting from the map
* because that field is being used as the key storage by thmap.
*/
KASSERT(mutex_owned(l->l_proc->p_lock));
struct lwp * const ldiag __diagused = thmap_del(lwp_threadid_map,
&l->l___tid, sizeof(l->l___tid));
l->l___tid = 0;
mutex_exit(l->l_proc->p_lock);
KASSERT(l == ldiag);
void * const gc_ref = thmap_stage_gc(lwp_threadid_map);
lwp_threadid_gc_serialize();
thmap_gc(lwp_threadid_map, gc_ref);
}
/*
* Return the current LWP's global thread ID. Only the current LWP
* should ever use this value, unless it is guaranteed that the LWP
* is paused (and then it should be accessed directly, rather than
* by this accessor).
*/
lwpid_t
lwp_gettid(void)
{
struct lwp * const l = curlwp;
if (l->l___tid == 0)
lwp_threadid_alloc(l);
return l->l___tid;
}
/*
@ -2259,12 +2043,17 @@ lwp_gettid(void)
* with lwp_delref().
*/
struct lwp *
lwp_getref_tid(lwpid_t tid)
lwp_getref_lwpid(lwpid_t tid)
{
struct lwp *l, *rv;
struct lwp *l;
/*
* We rely on lwp_thread_cleanup() to hide LWP IDs from us
* to ensure that we cannot add a reference do an exiting
* LWP.
*/
rw_enter(&lwp_threadid_lock, RW_READER);
l = thmap_get(lwp_threadid_map, &tid, sizeof(&tid));
l = proc_seek_lwpid(tid);
if (__predict_false(l == NULL)) {
rw_exit(&lwp_threadid_lock);
return NULL;
@ -2281,17 +2070,16 @@ lwp_getref_tid(lwpid_t tid)
/*
* Now verify that our reference is valid.
*/
mutex_enter(l->l_proc->p_lock);
if (__predict_false((l->l_prflag & LPR_DRAINING) != 0 ||
l->l___tid == 0)) {
struct proc *p = l->l_proc;
mutex_enter(p->p_lock);
if (__predict_false(l->l_stat == LSLARVAL ||
(l->l_prflag & LPR_DRAINING) != 0)) {
lwp_delref2(l);
rv = NULL;
} else {
rv = l;
l = NULL;
}
mutex_exit(l->l_proc->p_lock);
mutex_exit(p->p_lock);
return rv;
return l;
}
/*
@ -2303,23 +2091,18 @@ void
lwp_thread_cleanup(struct lwp *l)
{
KASSERT(l == curlwp);
const lwpid_t tid = l->l___tid;
const lwpid_t tid = l->l_lid;
KASSERT(mutex_owned(l->l_proc->p_lock));
if (__predict_false(tid != 0)) {
/*
* Drop our thread ID. This will also unlock
* our proc.
*/
lwp_threadid_free(l);
} else {
/*
* No thread cleanup was required; just unlock
* the proc.
* Hide this LWP from seekers (namely lwp_getref_lwpid())
* to prevent them from attempting to acquire a reference
* on a zombie.
*/
proc_hide_lwpid(tid);
mutex_exit(l->l_proc->p_lock);
}
}
#if defined(DDB)

View File

@ -1,4 +1,4 @@
/* $NetBSD: kern_proc.c,v 1.246 2020/04/21 21:42:47 ad Exp $ */
/* $NetBSD: kern_proc.c,v 1.247 2020/04/24 03:22:06 thorpej Exp $ */
/*-
* Copyright (c) 1999, 2006, 2007, 2008, 2020 The NetBSD Foundation, Inc.
@ -62,7 +62,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: kern_proc.c,v 1.246 2020/04/21 21:42:47 ad Exp $");
__KERNEL_RCSID(0, "$NetBSD: kern_proc.c,v 1.247 2020/04/24 03:22:06 thorpej Exp $");
#ifdef _KERNEL_OPT
#include "opt_kstack.h"
@ -117,37 +117,66 @@ __KERNEL_RCSID(0, "$NetBSD: kern_proc.c,v 1.246 2020/04/21 21:42:47 ad Exp $");
struct proclist allproc __cacheline_aligned;
struct proclist zombproc __cacheline_aligned;
kmutex_t * proc_lock __cacheline_aligned;
static kmutex_t proc_lock_s __cacheline_aligned;
kmutex_t * proc_lock __read_mostly;
/*
* pid to proc lookup is done by indexing the pid_table array.
* pid to lwp/proc lookup is done by indexing the pid_table array.
* Since pid numbers are only allocated when an empty slot
* has been found, there is no need to search any lists ever.
* (an orphaned pgrp will lock the slot, a session will lock
* the pgrp with the same number.)
* If the table is too small it is reallocated with twice the
* previous size and the entries 'unzipped' into the two halves.
* A linked list of free entries is passed through the pt_proc
* field of 'free' items - set odd to be an invalid ptr.
* A linked list of free entries is passed through the pt_lwp
* field of 'free' items - set odd to be an invalid ptr. Two
* additional bits are also used to indicate if the slot is
* currently occupied by a proc or lwp, and if the PID is
* hidden from certain kinds of lookups. We thus require a
* minimum alignment for proc and lwp structures (LWPs are
* at least 32-byte aligned).
*/
struct pid_table {
struct proc *pt_proc;
uintptr_t pt_slot;
struct pgrp *pt_pgrp;
pid_t pt_pid;
};
#if 1 /* strongly typed cast - should be a noop */
static inline uint p2u(struct proc *p) { return (uint)(uintptr_t)p; }
#else
#define p2u(p) ((uint)p)
#endif
#define P_VALID(p) (!(p2u(p) & 1))
#define P_NEXT(p) (p2u(p) >> 1)
#define P_FREE(pid) ((struct proc *)(uintptr_t)((pid) << 1 | 1))
#define PT_F_FREE __BIT(0)
#define PT_F_LWP 0 /* pseudo-flag */
#define PT_F_PROC __BIT(1)
#define PT_F_HIDDEN __BIT(2)
#define PT_F_TYPEBITS (PT_F_FREE|PT_F_PROC)
#define PT_F_ALLBITS (PT_F_FREE|PT_F_PROC|PT_F_HIDDEN)
#define PT_VALID(s) (((s) & PT_F_FREE) == 0)
#define PT_RESERVED(s) ((s) == 0)
#define PT_HIDDEN(s) ((s) & PT_F_HIDDEN)
#define PT_NEXT(s) ((u_int)(s) >> 1)
#define PT_SET_FREE(pid) (((pid) << 1) | PT_F_FREE)
#define PT_SET_HIDDEN(s) ((s) | PT_F_HIDDEN)
#define PT_SET_LWP(l) ((uintptr_t)(l))
#define PT_SET_PROC(p) (((uintptr_t)(p)) | PT_F_PROC)
#define PT_SET_RESERVED 0
#define PT_GET_LWP(s) ((struct lwp *)((s) & ~PT_F_ALLBITS))
#define PT_GET_PROC(s) ((struct proc *)((s) & ~PT_F_ALLBITS))
#define PT_GET_TYPE(s) ((s) & PT_F_TYPEBITS)
#define PT_IS_LWP(s) (PT_GET_TYPE(s) == PT_F_LWP && (s) != 0)
#define PT_IS_PROC(s) (PT_GET_TYPE(s) == PT_F_PROC)
#define MIN_PROC_ALIGNMENT (PT_F_ALLBITS + 1)
/*
* Table of process IDs (PIDs).
*
* Locking order:
* proc_lock -> pid_table_lock
* or
* proc::p_lock -> pid_table_lock
*/
static krwlock_t pid_table_lock __cacheline_aligned;
static struct pid_table *pid_table __read_mostly;
#define INITIAL_PID_TABLE_SIZE (1 << 5)
@ -188,7 +217,6 @@ struct proc proc0 = {
.p_sigwaiters = LIST_HEAD_INITIALIZER(&proc0.p_sigwaiters),
.p_nlwps = 1,
.p_nrlwps = 1,
.p_nlwpid = 1, /* must match lwp0.l_lid */
.p_pgrp = &pgrp0,
.p_comm = "system",
/*
@ -338,6 +366,8 @@ proc_ctor(void *arg __unused, void *obj, int flags __unused)
return 0;
}
static pid_t proc_alloc_pid_slot(struct proc *, uintptr_t);
/*
* Initialize global process hashing structures.
*/
@ -351,7 +381,11 @@ procinit(void)
for (pd = proclists; pd->pd_list != NULL; pd++)
LIST_INIT(pd->pd_list);
proc_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
mutex_init(&proc_lock_s, MUTEX_DEFAULT, IPL_NONE);
proc_lock = &proc_lock_s;
rw_init(&pid_table_lock);
pid_table = kmem_alloc(INITIAL_PID_TABLE_SIZE
* sizeof(struct pid_table), KM_SLEEP);
pid_tbl_mask = INITIAL_PID_TABLE_SIZE - 1;
@ -360,7 +394,7 @@ procinit(void)
/* Set free list running through table...
Preset 'use count' above PID_MAX so we allocate pid 1 next. */
for (i = 0; i <= pid_tbl_mask; i++) {
pid_table[i].pt_proc = P_FREE(LINK_EMPTY + i + 1);
pid_table[i].pt_slot = PT_SET_FREE(LINK_EMPTY + i + 1);
pid_table[i].pt_pgrp = 0;
pid_table[i].pt_pid = 0;
}
@ -368,15 +402,25 @@ procinit(void)
next_free_pt = 1;
/* Need to fix last entry. */
last_free_pt = pid_tbl_mask;
pid_table[last_free_pt].pt_proc = P_FREE(LINK_EMPTY);
pid_table[last_free_pt].pt_slot = PT_SET_FREE(LINK_EMPTY);
/* point at which we grow table - to avoid reusing pids too often */
pid_alloc_lim = pid_tbl_mask - 1;
#undef LINK_EMPTY
/* Reserve PID 1 for init(8). */ /* XXX slightly gross */
rw_enter(&pid_table_lock, RW_WRITER);
if (proc_alloc_pid_slot(&proc0, PT_SET_RESERVED) != 1)
panic("failed to reserve PID 1 for init(8)");
rw_exit(&pid_table_lock);
proc_specificdata_domain = specificdata_domain_create();
KASSERT(proc_specificdata_domain != NULL);
proc_cache = pool_cache_init(sizeof(struct proc), coherency_unit, 0, 0,
size_t proc_alignment = coherency_unit;
if (proc_alignment < MIN_PROC_ALIGNMENT)
proc_alignment = MIN_PROC_ALIGNMENT;
proc_cache = pool_cache_init(sizeof(struct proc), proc_alignment, 0, 0,
"procpl", NULL, IPL_NONE, proc_ctor, NULL, NULL);
proc_listener = kauth_listen_scope(KAUTH_SCOPE_PROCESS,
@ -440,7 +484,6 @@ proc0_init(void)
struct pgrp *pg;
struct rlimit *rlim;
rlim_t lim;
int error __diagused;
int i;
p = &proc0;
@ -451,20 +494,16 @@ proc0_init(void)
p->p_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
rw_init(&p->p_reflock);
rw_init(&p->p_treelock);
cv_init(&p->p_waitcv, "wait");
cv_init(&p->p_lwpcv, "lwpwait");
LIST_INSERT_HEAD(&p->p_lwps, &lwp0, l_sibling);
radix_tree_init_tree(&p->p_lwptree);
error = radix_tree_insert_node(&p->p_lwptree,
(uint64_t)(lwp0.l_lid - 1), &lwp0);
KASSERT(error == 0);
pid_table[0].pt_proc = p;
KASSERT(lwp0.l_lid == 0);
pid_table[lwp0.l_lid].pt_slot = PT_SET_LWP(&lwp0);
LIST_INSERT_HEAD(&allproc, p, p_list);
pid_table[0].pt_pgrp = pg;
pid_table[lwp0.l_lid].pt_pgrp = pg;
LIST_INSERT_HEAD(&pg->pg_members, p, p_pglist);
#ifdef __HAVE_SYSCALL_INTERN
@ -548,28 +587,41 @@ proc_sesshold(struct session *ss)
ss->s_count++;
}
void
proc_sessrele(struct session *ss)
static void
proc_sessrele_pid_table_write_locked(struct session *ss)
{
struct pgrp *pg;
KASSERT(mutex_owned(proc_lock));
KASSERT(rw_write_held(&pid_table_lock));
KASSERT(ss->s_count > 0);
/*
* We keep the pgrp with the same id as the session in order to
* stop a process being given the same pid. Since the pgrp holds
* a reference to the session, it must be a 'zombie' pgrp by now.
*/
if (--ss->s_count == 0) {
struct pgrp *pg;
pg = pg_remove(ss->s_sid);
} else {
pg = NULL;
ss = NULL;
}
rw_exit(&pid_table_lock);
mutex_exit(proc_lock);
if (pg)
kmem_free(pg, sizeof(struct pgrp));
if (ss)
kmem_free(ss, sizeof(struct session));
} else {
mutex_exit(proc_lock);
}
}
void
proc_sessrele(struct session *ss)
{
rw_enter(&pid_table_lock, RW_WRITER);
proc_sessrele_pid_table_write_locked(ss);
}
/*
@ -623,38 +675,147 @@ p_inferior(struct proc *p, struct proc *q)
}
/*
* proc_find: locate a process by the ID.
* proc_find_lwp: locate an lwp in said proc by the ID.
*
* => Must be called with proc_lock held.
* => Must be called with p::p_lock held.
* => LARVAL lwps are not returned because they are only partially
* constructed while occupying the slot.
* => Callers need to be careful about lwp::l_stat of the returned
* lwp.
*/
proc_t *
proc_find_raw(pid_t pid)
struct lwp *
proc_find_lwp(proc_t *p, pid_t pid)
{
struct pid_table *pt;
proc_t *p;
struct lwp *l = NULL;
uintptr_t slot;
KASSERT(mutex_owned(p->p_lock));
rw_enter(&pid_table_lock, RW_READER);
pt = &pid_table[pid & pid_tbl_mask];
slot = pt->pt_slot;
if (__predict_true(PT_IS_LWP(slot) && pt->pt_pid == pid)) {
l = PT_GET_LWP(slot);
if (__predict_false(l->l_proc != p || l->l_stat == LSLARVAL)) {
l = NULL;
}
}
rw_exit(&pid_table_lock);
return l;
}
/*
* proc_seek_lwpid: locate an lwp by only the ID.
*
* => This is a specialized interface used for looking up an LWP
* without holding a lock on its owner process.
* => Callers of this interface MUST provide a separate synchronization
* mechanism to ensure the validity of the returned LWP. LARVAL LWPs
* are found there, so callers must check for them!
* => Only returns LWPs whose ID has not been hidden from us.
*/
struct lwp *
proc_seek_lwpid(pid_t pid)
{
struct pid_table *pt;
struct lwp *l = NULL;
uintptr_t slot;
rw_enter(&pid_table_lock, RW_READER);
pt = &pid_table[pid & pid_tbl_mask];
slot = pt->pt_slot;
if (__predict_true(PT_IS_LWP(slot) && pt->pt_pid == pid &&
!PT_HIDDEN(slot))) {
l = PT_GET_LWP(slot);
}
rw_exit(&pid_table_lock);
return l;
}
/*
* proc_hide_lwpid: hide an lwp ID from seekers.
*/
void
proc_hide_lwpid(pid_t pid)
{
struct pid_table *pt;
uintptr_t slot;
rw_enter(&pid_table_lock, RW_WRITER);
pt = &pid_table[pid & pid_tbl_mask];
slot = pt->pt_slot;
KASSERT(PT_IS_LWP(slot));
KASSERT(pt->pt_pid == pid);
pt->pt_slot = PT_SET_HIDDEN(slot);
rw_exit(&pid_table_lock);
}
/*
* proc_find_raw_pid_table_locked: locate a process by the ID.
*
* => Must be called with proc_lock held and the pid_table_lock
* at least held for reading.
*/
static proc_t *
proc_find_raw_pid_table_locked(pid_t pid)
{
struct pid_table *pt;
proc_t *p = NULL;
uintptr_t slot;
KASSERT(mutex_owned(proc_lock));
pt = &pid_table[pid & pid_tbl_mask];
p = pt->pt_proc;
if (__predict_false(!P_VALID(p) || pt->pt_pid != pid)) {
return NULL;
slot = pt->pt_slot;
if (__predict_true(PT_IS_LWP(slot) && pt->pt_pid == pid)) {
/*
* When looking up processes, require a direct match
* on the PID assigned to the proc, not just one of
* its LWPs.
*
* N.B. We require lwp::l_proc of LARVAL LWPs to be
* valid here.
*/
p = PT_GET_LWP(slot)->l_proc;
if (__predict_false(p->p_pid != pid))
p = NULL;
} else if (PT_IS_PROC(slot) && pt->pt_pid == pid) {
p = PT_GET_PROC(slot);
}
return p;
}
proc_t *
proc_find(pid_t pid)
proc_find_raw(pid_t pid)
{
KASSERT(mutex_owned(proc_lock));
rw_enter(&pid_table_lock, RW_READER);
proc_t *p = proc_find_raw_pid_table_locked(pid);
rw_exit(&pid_table_lock);
return p;
}
static proc_t *
proc_find_pid_table_locked(pid_t pid)
{
proc_t *p;
p = proc_find_raw(pid);
KASSERT(mutex_owned(proc_lock));
p = proc_find_raw_pid_table_locked(pid);
if (__predict_false(p == NULL)) {
return NULL;
}
/*
* Only allow live processes to be found by PID.
* XXX: p_stat might change, since unlocked.
* XXX: p_stat might change, since proc unlocked.
*/
if (__predict_true(p->p_stat == SACTIVE || p->p_stat == SSTOP)) {
return p;
@ -662,13 +823,24 @@ proc_find(pid_t pid)
return NULL;
}
proc_t *
proc_find(pid_t pid)
{
KASSERT(mutex_owned(proc_lock));
rw_enter(&pid_table_lock, RW_READER);
proc_t *p = proc_find_pid_table_locked(pid);
rw_exit(&pid_table_lock);
return p;
}
/*
* pgrp_find: locate a process group by the ID.
* pgrp_find_pid_table_locked: locate a process group by the ID.
*
* => Must be called with proc_lock held.
* => Must be called with proc_lock held and the pid_table_lock
* held at least for reading.
*/
struct pgrp *
pgrp_find(pid_t pgid)
static struct pgrp *
pgrp_find_pid_table_locked(pid_t pgid)
{
struct pgrp *pg;
@ -686,28 +858,43 @@ pgrp_find(pid_t pgid)
return pg;
}
struct pgrp *
pgrp_find(pid_t pgid)
{
KASSERT(mutex_owned(proc_lock));
rw_enter(&pid_table_lock, RW_READER);
struct pgrp *pg = pgrp_find_pid_table_locked(pgid);
rw_exit(&pid_table_lock);
return pg;
}
static void
expand_pid_table(void)
{
size_t pt_size, tsz;
struct pid_table *n_pt, *new_pt;
struct proc *proc;
uintptr_t slot;
struct pgrp *pgrp;
pid_t pid, rpid;
u_int i;
uint new_pt_mask;
KASSERT(rw_write_held(&pid_table_lock));
/* Unlock the pid_table briefly to allocate memory. */
rw_exit(&pid_table_lock);
pt_size = pid_tbl_mask + 1;
tsz = pt_size * 2 * sizeof(struct pid_table);
new_pt = kmem_alloc(tsz, KM_SLEEP);
new_pt_mask = pt_size * 2 - 1;
mutex_enter(proc_lock);
rw_enter(&pid_table_lock, RW_WRITER);
if (pt_size != pid_tbl_mask + 1) {
/* Another process beat us to it... */
mutex_exit(proc_lock);
rw_exit(&pid_table_lock);
kmem_free(new_pt, tsz);
return;
goto out;
}
/*
@ -724,13 +911,13 @@ expand_pid_table(void)
i = pt_size - 1;
n_pt = new_pt + i;
for (; ; i--, n_pt--) {
proc = pid_table[i].pt_proc;
slot = pid_table[i].pt_slot;
pgrp = pid_table[i].pt_pgrp;
if (!P_VALID(proc)) {
if (!PT_VALID(slot)) {
/* Up 'use count' so that link is valid */
pid = (P_NEXT(proc) + pt_size) & ~pt_size;
pid = (PT_NEXT(slot) + pt_size) & ~pt_size;
rpid = 0;
proc = P_FREE(pid);
slot = PT_SET_FREE(pid);
if (pgrp)
pid = pgrp->pg_id;
} else {
@ -739,14 +926,14 @@ expand_pid_table(void)
}
/* Save entry in appropriate half of table */
n_pt[pid & pt_size].pt_proc = proc;
n_pt[pid & pt_size].pt_slot = slot;
n_pt[pid & pt_size].pt_pgrp = pgrp;
n_pt[pid & pt_size].pt_pid = rpid;
/* Put other piece on start of free list */
pid = (pid ^ pt_size) & ~pid_tbl_mask;
n_pt[pid & pt_size].pt_proc =
P_FREE((pid & ~pt_size) | next_free_pt);
n_pt[pid & pt_size].pt_slot =
PT_SET_FREE((pid & ~pt_size) | next_free_pt);
n_pt[pid & pt_size].pt_pgrp = 0;
n_pt[pid & pt_size].pt_pid = 0;
@ -771,8 +958,11 @@ expand_pid_table(void)
} else
pid_alloc_lim <<= 1; /* doubles number of free slots... */
mutex_exit(proc_lock);
rw_exit(&pid_table_lock);
kmem_free(n_pt, tsz);
out: /* Return with the pid_table_lock held again. */
rw_enter(&pid_table_lock, RW_WRITER);
}
struct proc *
@ -784,38 +974,63 @@ proc_alloc(void)
p->p_stat = SIDL; /* protect against others */
proc_initspecific(p);
kdtrace_proc_ctor(NULL, p);
p->p_pid = -1;
proc_alloc_pid(p);
/*
* Allocate a placeholder in the pid_table. When we create the
* first LWP for this process, it will take ownership of the
* slot.
*/
if (__predict_false(proc_alloc_pid(p) == -1)) {
/* Allocating the PID failed; unwind. */
proc_finispecific(p);
proc_free_mem(p);
p = NULL;
}
return p;
}
/*
* proc_alloc_pid: allocate PID and record the given proc 'p' so that
* proc_alloc_pid_slot: allocate PID and record the occcupant so that
* proc_find_raw() can find it by the PID.
*/
pid_t
proc_alloc_pid(struct proc *p)
static pid_t __noinline
proc_alloc_pid_slot(struct proc *p, uintptr_t slot)
{
struct pid_table *pt;
pid_t pid;
int nxt;
KASSERT(rw_write_held(&pid_table_lock));
for (;;expand_pid_table()) {
if (__predict_false(pid_alloc_cnt >= pid_alloc_lim))
if (__predict_false(pid_alloc_cnt >= pid_alloc_lim)) {
/* ensure pids cycle through 2000+ values */
continue;
mutex_enter(proc_lock);
}
/*
* The first user process *must* be given PID 1.
* it has already been reserved for us. This
* will be coming in from the proc_alloc() call
* above, and the entry will be usurped later when
* the first user LWP is created.
* XXX this is slightly gross.
*/
if (__predict_false(PT_RESERVED(pid_table[1].pt_slot) &&
p != &proc0)) {
KASSERT(PT_IS_PROC(slot));
pt = &pid_table[1];
pt->pt_slot = slot;
return 1;
}
pt = &pid_table[next_free_pt];
#ifdef DIAGNOSTIC
if (__predict_false(P_VALID(pt->pt_proc) || pt->pt_pgrp))
if (__predict_false(PT_VALID(pt->pt_slot) || pt->pt_pgrp))
panic("proc_alloc: slot busy");
#endif
nxt = P_NEXT(pt->pt_proc);
nxt = PT_NEXT(pt->pt_slot);
if (nxt & pid_tbl_mask)
break;
/* Table full - expand (NB last entry not used....) */
mutex_exit(proc_lock);
}
/* pid is 'saved use count' + 'size' + entry */
@ -825,19 +1040,92 @@ proc_alloc_pid(struct proc *p)
next_free_pt = nxt & pid_tbl_mask;
/* Grab table slot */
pt->pt_proc = p;
pt->pt_slot = slot;
KASSERT(pt->pt_pid == 0);
pt->pt_pid = pid;
if (p->p_pid == -1) {
p->p_pid = pid;
}
pid_alloc_cnt++;
mutex_exit(proc_lock);
return pid;
}
pid_t
proc_alloc_pid(struct proc *p)
{
pid_t pid;
KASSERT((((uintptr_t)p) & PT_F_ALLBITS) == 0);
rw_enter(&pid_table_lock, RW_WRITER);
pid = proc_alloc_pid_slot(p, PT_SET_PROC(p));
if (pid != -1)
p->p_pid = pid;
rw_exit(&pid_table_lock);
return pid;
}
pid_t
proc_alloc_lwpid(struct proc *p, struct lwp *l)
{
struct pid_table *pt;
pid_t pid;
KASSERT((((uintptr_t)l) & PT_F_ALLBITS) == 0);
/*
* If the slot for p->p_pid currently points to the proc,
* then we should usurp this ID for the LWP. This happens
* at least once per process (for the first LWP), and can
* happen again if the first LWP for a process exits and
* before the process creates another.
*/
rw_enter(&pid_table_lock, RW_WRITER);
pid = p->p_pid;
pt = &pid_table[pid & pid_tbl_mask];
KASSERT(pt->pt_pid == pid);
if (PT_IS_PROC(pt->pt_slot)) {
KASSERT(PT_GET_PROC(pt->pt_slot) == p);
l->l_lid = pid;
pt->pt_slot = PT_SET_LWP(l);
} else {
/* Need to allocate a new slot. */
pid = proc_alloc_pid_slot(p, PT_SET_LWP(l));
if (pid != -1)
l->l_lid = pid;
}
rw_exit(&pid_table_lock);
return pid;
}
static void __noinline
proc_free_pid_internal(pid_t pid, uintptr_t type __diagused)
{
struct pid_table *pt;
rw_enter(&pid_table_lock, RW_WRITER);
pt = &pid_table[pid & pid_tbl_mask];
KASSERT(PT_GET_TYPE(pt->pt_slot) == type);
KASSERT(pt->pt_pid == pid);
/* save pid use count in slot */
pt->pt_slot = PT_SET_FREE(pid & ~pid_tbl_mask);
pt->pt_pid = 0;
if (pt->pt_pgrp == NULL) {
/* link last freed entry onto ours */
pid &= pid_tbl_mask;
pt = &pid_table[last_free_pt];
pt->pt_slot = PT_SET_FREE(PT_NEXT(pt->pt_slot) | pid);
pt->pt_pid = 0;
last_free_pt = pid;
pid_alloc_cnt--;
}
rw_exit(&pid_table_lock);
}
/*
* Free a process id - called from proc_free (in kern_exit.c)
*
@ -846,26 +1134,39 @@ proc_alloc_pid(struct proc *p)
void
proc_free_pid(pid_t pid)
{
KASSERT(mutex_owned(proc_lock));
proc_free_pid_internal(pid, PT_F_PROC);
}
/*
* Free a process id used by an LWP. If this was the process's
* first LWP, we convert the slot to point to the process; the
* entry will get cleaned up later when the process finishes exiting.
*
* If not, then it's the same as proc_free_pid().
*/
void
proc_free_lwpid(struct proc *p, pid_t pid)
{
KASSERT(mutex_owned(p->p_lock));
if (__predict_true(p->p_pid == pid)) {
struct pid_table *pt;
KASSERT(mutex_owned(proc_lock));
rw_enter(&pid_table_lock, RW_WRITER);
pt = &pid_table[pid & pid_tbl_mask];
/* save pid use count in slot */
pt->pt_proc = P_FREE(pid & ~pid_tbl_mask);
KASSERT(pt->pt_pid == pid);
pt->pt_pid = 0;
KASSERT(PT_IS_LWP(pt->pt_slot));
KASSERT(PT_GET_LWP(pt->pt_slot)->l_proc == p);
if (pt->pt_pgrp == NULL) {
/* link last freed entry onto ours */
pid &= pid_tbl_mask;
pt = &pid_table[last_free_pt];
pt->pt_proc = P_FREE(P_NEXT(pt->pt_proc) | pid);
pt->pt_pid = 0;
last_free_pt = pid;
pid_alloc_cnt--;
pt->pt_slot = PT_SET_PROC(p);
rw_exit(&pid_table_lock);
return;
}
proc_free_pid_internal(pid, PT_F_LWP);
}
void
@ -899,13 +1200,16 @@ proc_enterpgrp(struct proc *curp, pid_t pid, pid_t pgid, bool mksess)
sess = mksess ? kmem_alloc(sizeof(*sess), KM_SLEEP) : NULL;
/* Allocate data areas we might need before doing any validity checks */
mutex_enter(proc_lock); /* Because pid_table might change */
rw_enter(&pid_table_lock, RW_READER);/* Because pid_table might change */
if (pid_table[pgid & pid_tbl_mask].pt_pgrp == 0) {
mutex_exit(proc_lock);
rw_exit(&pid_table_lock);
new_pgrp = kmem_alloc(sizeof(*new_pgrp), KM_SLEEP);
mutex_enter(proc_lock);
} else
} else {
rw_exit(&pid_table_lock);
new_pgrp = NULL;
}
mutex_enter(proc_lock);
rw_enter(&pid_table_lock, RW_WRITER);
rval = EPERM; /* most common error (to save typing) */
/* Check pgrp exists or can be created */
@ -916,7 +1220,7 @@ proc_enterpgrp(struct proc *curp, pid_t pid, pid_t pgid, bool mksess)
/* Can only set another process under restricted circumstances. */
if (pid != curp->p_pid) {
/* Must exist and be one of our children... */
p = proc_find(pid);
p = proc_find_pid_table_locked(pid);
if (p == NULL || !p_inferior(p, curp)) {
rval = ESRCH;
goto done;
@ -935,7 +1239,7 @@ proc_enterpgrp(struct proc *curp, pid_t pid, pid_t pgid, bool mksess)
} else {
/* ... setsid() cannot re-enter a pgrp */
if (mksess && (curp->p_pgid == curp->p_pid ||
pgrp_find(curp->p_pid)))
pgrp_find_pid_table_locked(curp->p_pid)))
goto done;
p = curp;
}
@ -1029,6 +1333,7 @@ proc_enterpgrp(struct proc *curp, pid_t pid, pid_t pgid, bool mksess)
mutex_spin_exit(&tty_lock);
done:
rw_exit(&pid_table_lock);
if (pg_id != NO_PGID) {
/* Releases proc_lock. */
pg_delete(pg_id);
@ -1085,6 +1390,7 @@ pg_remove(pid_t pg_id)
struct pid_table *pt;
KASSERT(mutex_owned(proc_lock));
KASSERT(rw_write_held(&pid_table_lock));
pt = &pid_table[pg_id & pid_tbl_mask];
pgrp = pt->pt_pgrp;
@ -1095,12 +1401,12 @@ pg_remove(pid_t pg_id)
pt->pt_pgrp = NULL;
if (!P_VALID(pt->pt_proc)) {
if (!PT_VALID(pt->pt_slot)) {
/* Orphaned pgrp, put slot onto free list. */
KASSERT((P_NEXT(pt->pt_proc) & pid_tbl_mask) == 0);
KASSERT((PT_NEXT(pt->pt_slot) & pid_tbl_mask) == 0);
pg_id &= pid_tbl_mask;
pt = &pid_table[last_free_pt];
pt->pt_proc = P_FREE(P_NEXT(pt->pt_proc) | pg_id);
pt->pt_slot = PT_SET_FREE(PT_NEXT(pt->pt_slot) | pg_id);
KASSERT(pt->pt_pid == 0);
last_free_pt = pg_id;
pid_alloc_cnt--;
@ -1121,8 +1427,10 @@ pg_delete(pid_t pg_id)
KASSERT(mutex_owned(proc_lock));
rw_enter(&pid_table_lock, RW_WRITER);
pg = pid_table[pg_id & pid_tbl_mask].pt_pgrp;
if (pg == NULL || pg->pg_id != pg_id || !LIST_EMPTY(&pg->pg_members)) {
rw_exit(&pid_table_lock);
mutex_exit(proc_lock);
return;
}
@ -1139,14 +1447,15 @@ pg_delete(pid_t pg_id)
mutex_spin_exit(&tty_lock);
/*
* The leading process group in a session is freed by proc_sessrele(),
* if last reference. Note: proc_sessrele() releases proc_lock.
* The leading process group in a session is freed by
* proc_sessrele_pid_table_write_locked(), if last
* reference. It will also release the locks.
*/
pg = (ss->s_sid != pg->pg_id) ? pg_remove(pg_id) : NULL;
proc_sessrele(ss);
proc_sessrele_pid_table_write_locked(ss);
if (pg != NULL) {
/* Free it, if was not done by proc_sessrele(). */
/* Free it, if was not done above. */
kmem_free(pg, sizeof(struct pgrp));
}
}
@ -1241,23 +1550,31 @@ pidtbl_dump(void)
struct pid_table *pt;
struct proc *p;
struct pgrp *pgrp;
uintptr_t slot;
int id;
db_printf("pid table %p size %x, next %x, last %x\n",
pid_table, pid_tbl_mask+1,
next_free_pt, last_free_pt);
for (pt = pid_table, id = 0; id <= pid_tbl_mask; id++, pt++) {
p = pt->pt_proc;
if (!P_VALID(p) && !pt->pt_pgrp)
slot = pt->pt_slot;
if (!PT_VALID(slot) && !pt->pt_pgrp)
continue;
if (PT_IS_LWP(slot)) {
p = PT_GET_LWP(slot)->l_proc;
} else if (PT_IS_PROC(slot)) {
p = PT_GET_PROC(slot);
} else {
p = NULL;
}
db_printf(" id %x: ", id);
if (P_VALID(p))
if (p != NULL)
db_printf("slotpid %d proc %p id %d (0x%x) %s\n",
pt->pt_pid, p, p->p_pid, p->p_pid, p->p_comm);
else
db_printf("next %x use %x\n",
P_NEXT(p) & pid_tbl_mask,
P_NEXT(p) & ~pid_tbl_mask);
PT_NEXT(slot) & pid_tbl_mask,
PT_NEXT(slot) & ~pid_tbl_mask);
if ((pgrp = pt->pt_pgrp)) {
db_printf("\tsession %p, sid %d, count %d, login %s\n",
pgrp->pg_session, pgrp->pg_session->s_sid,
@ -2245,7 +2562,6 @@ fill_proc(const struct proc *psrc, struct proc *p, bool allowaddr)
p->p_nrlwps = psrc->p_nrlwps;
p->p_nlwpwait = psrc->p_nlwpwait;
p->p_ndlwps = psrc->p_ndlwps;
p->p_nlwpid = psrc->p_nlwpid;
p->p_nstopchild = psrc->p_nstopchild;
p->p_waited = psrc->p_waited;
COND_SET_VALUE(p->p_zomblwp, psrc->p_zomblwp, allowaddr);

View File

@ -1,4 +1,4 @@
/* $NetBSD: sys_lwp.c,v 1.78 2020/04/22 21:22:21 thorpej Exp $ */
/* $NetBSD: sys_lwp.c,v 1.79 2020/04/24 03:22:06 thorpej Exp $ */
/*-
* Copyright (c) 2001, 2006, 2007, 2008, 2019, 2020 The NetBSD Foundation, Inc.
@ -35,7 +35,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: sys_lwp.c,v 1.78 2020/04/22 21:22:21 thorpej Exp $");
__KERNEL_RCSID(0, "$NetBSD: sys_lwp.c,v 1.79 2020/04/24 03:22:06 thorpej Exp $");
#include <sys/param.h>
#include <sys/systm.h>
@ -415,8 +415,7 @@ sys__lwp_detach(struct lwp *l, const struct sys__lwp_detach_args *uap,
* We can't use lwp_find() here because the target might
* be a zombie.
*/
t = radix_tree_lookup_node(&p->p_lwptree,
(uint64_t)(target - 1));
t = proc_find_lwp(p, target);
KASSERT(t == NULL || t->l_lid == target);
}
@ -458,7 +457,6 @@ sys__lwp_detach(struct lwp *l, const struct sys__lwp_detach_args *uap,
int
lwp_unpark(const lwpid_t *tp, const u_int ntargets)
{
uint64_t id;
u_int target;
int error;
proc_t *p;
@ -467,21 +465,40 @@ lwp_unpark(const lwpid_t *tp, const u_int ntargets)
p = curproc;
error = 0;
rw_enter(&p->p_treelock, RW_READER);
mutex_enter(p->p_lock);
for (target = 0; target < ntargets; target++) {
/*
* We don't bother excluding zombies or idle LWPs here, as
* We don't bother excluding idle LWPs here, as
* setting LW_UNPARKED on them won't do any harm.
*/
id = (uint64_t)(tp[target] - 1);
t = radix_tree_lookup_node(&p->p_lwptree, id);
if (t == NULL) {
t = proc_find_lwp(p, tp[target]);
if (__predict_false(t == NULL)) {
error = ESRCH;
continue;
}
/*
* The locking order is p::p_lock -> l::l_mutex,
* but it may not be unsafe to release p::p_lock
* while l::l_mutex is held because l::l_mutex is
* a scheduler lock and we don't want to get tied
* in knots while unwinding priority inheritance.
* So, get a reference count on the LWP and then
* unlock p::p_lock before acquiring l::l_mutex.
*/
if (__predict_false(t->l_stat == LSZOMB)) {
continue;
}
lwp_addref(t);
mutex_exit(p->p_lock);
/*
* Note the LWP cannot become a zombie while we
* hold a reference.
*/
lwp_lock(t);
if (t->l_syncobj == &lwp_park_syncobj) {
if (__predict_true(t->l_syncobj == &lwp_park_syncobj)) {
/*
* As expected it's parked, so wake it up.
* lwp_unsleep() will release the LWP lock.
@ -499,8 +516,10 @@ lwp_unpark(const lwpid_t *tp, const u_int ntargets)
t->l_flag |= LW_UNPARKED;
lwp_unlock(t);
}
mutex_enter(p->p_lock);
lwp_delref2(t);
}
rw_exit(&p->p_treelock);
mutex_exit(p->p_lock);
return error;
}

View File

@ -1,4 +1,4 @@
/* $NetBSD: lwp.h,v 1.206 2020/04/10 17:16:21 ad Exp $ */
/* $NetBSD: lwp.h,v 1.207 2020/04/24 03:22:06 thorpej Exp $ */
/*
* Copyright (c) 2001, 2006, 2007, 2008, 2009, 2010, 2019, 2020
@ -136,22 +136,8 @@ struct lwp {
bool l_vforkwaiting; /* a: vfork() waiting */
/* User-space synchronization. */
uintptr_t l___reserved; /* reserved for future use */
/*
* The global thread ID has special locking and access
* considerations. Because many LWPs may never need one,
* global thread IDs are allocated lazily in lwp_gettid().
* l___tid is not bean to be accessed directly unless
* the accessor has specific knowledge that doing so
* is safe. l___tid is only assigned by the LWP itself.
* Once assigned, it is stable until the LWP exits.
* An LWP assigns its own thread ID unlocked before it
* reaches visibility to the rest of the system, and
* can access its own thread ID unlocked. But once
* published, it must hold the proc's lock to change
* the value.
*/
lwpid_t l___tid; /* p: global thread id */
uintptr_t l___rsvd0; /* reserved for future use */
uint32_t l___rsvd1; /* reserved for future use */
#if PCU_UNIT_COUNT > 0
struct cpu_info * volatile l_pcu_cpu[PCU_UNIT_COUNT];
@ -287,7 +273,7 @@ extern int maxlwp __read_mostly; /* max number of lwps */
#define LP_KTRACTIVE 0x00000001 /* Executing ktrace operation */
#define LP_KTRCSW 0x00000002 /* ktrace context switch marker */
#define LP_KTRCSWUSER 0x00000004 /* ktrace context switch marker */
#define LP_PIDLID 0x00000008 /* free LID from PID space on exit */
/* 0x00000008 was LP_PIDLID */
#define LP_OWEUPC 0x00000010 /* Owe user profiling tick */
#define LP_MPSAFE 0x00000020 /* Starts life without kernel_lock */
#define LP_INTR 0x00000040 /* Soft interrupt handler */
@ -325,6 +311,7 @@ extern int maxlwp __read_mostly; /* max number of lwps */
*
* These values are set in stone and must not be reused with future changes.
*/
#define LSLARVAL 0 /* in pid table, but partially constructed */
#define LSIDL 1 /* Process being created by fork. */
#define LSRUN 2 /* Currently runnable. */
#define LSSLEEP 3 /* Sleeping on an address. */
@ -362,13 +349,13 @@ kmutex_t *lwp_setlock(lwp_t *, kmutex_t *);
void lwp_unlock_to(lwp_t *, kmutex_t *);
int lwp_trylock(lwp_t *);
void lwp_addref(lwp_t *);
lwp_t * lwp_getref_lwpid(lwpid_t);
void lwp_delref(lwp_t *);
void lwp_delref2(lwp_t *);
bool lwp_drainrefs(lwp_t *);
bool lwp_alive(lwp_t *);
lwp_t *lwp_find_first(proc_t *);
void lwp_renumber(lwp_t *, lwpid_t);
int lwp_wait(lwp_t *, lwpid_t, lwpid_t *, bool);
void lwp_continue(lwp_t *);
void lwp_unsleep(lwp_t *, bool);
@ -389,8 +376,6 @@ int lwp_setprivate(lwp_t *, void *);
int do_lwp_create(lwp_t *, void *, u_long, lwp_t **, const sigset_t *,
const stack_t *);
lwpid_t lwp_gettid(void);
lwp_t * lwp_getref_tid(lwpid_t);
void lwp_thread_cleanup(lwp_t *);
void lwpinit_specificdata(void);
@ -606,7 +591,7 @@ curlwp_bindx(int bound)
#define LWP_SUSPENDED 0x00000080
/* Kernel-internal flags for LWP creation. */
#define LWP_PIDLID 0x40000000
/* 0x40000000 was LWP_PIDLID */
#define LWP_VFORK 0x80000000
#endif /* !_SYS_LWP_H_ */

View File

@ -1,4 +1,4 @@
/* $NetBSD: proc.h,v 1.362 2020/04/06 08:20:05 kamil Exp $ */
/* $NetBSD: proc.h,v 1.363 2020/04/24 03:22:06 thorpej Exp $ */
/*-
* Copyright (c) 2006, 2007, 2008, 2020 The NetBSD Foundation, Inc.
@ -223,8 +223,6 @@ struct emul {
* l: proc_lock
* t: p_stmutex
* p: p_lock
* r: p_treelock (only for use by LWPs in the same proc)
* p,r: p_lock + p_treelock to modify, either to inspect
* (: updated atomically
* :: unlocked, stable
*/
@ -265,7 +263,6 @@ struct proc {
LIST_ENTRY(proc) p_sibling; /* l: List of sibling processes. */
LIST_HEAD(, proc) p_children; /* l: List of children. */
LIST_HEAD(, lwp) p_lwps; /* p: List of LWPs. */
struct radix_tree p_lwptree; /* p,r: Tree of LWPs. */
struct ras *p_raslist; /* a: List of RAS entries */
/* The following fields are all zeroed upon creation in fork. */
@ -276,7 +273,6 @@ struct proc {
int p_nrlwps; /* p: Number running/sleeping LWPs */
int p_nlwpwait; /* p: Number of LWPs in lwp_wait1() */
int p_ndlwps; /* p: Number of detached LWPs */
int p_nlwpid; /* p: Next LWP ID */
u_int p_nstopchild; /* l: Count of stopped/dead children */
u_int p_waited; /* l: parent has waited on child */
struct lwp *p_zomblwp; /* p: detached LWP to be reaped */
@ -350,7 +346,6 @@ struct proc {
__aligned(COHERENCY_UNIT);
kmutex_t p_stmutex; /* :: mutex on profiling state */
krwlock_t p_reflock; /* :: lock for debugger, procfs */
krwlock_t p_treelock; /* :: lock on p_lwptree */
};
#define p_rlimit p_limit->pl_rlimit
@ -502,8 +497,12 @@ extern struct pool ptimer_pool; /* Memory pool for ptimers */
int proc_find_locked(struct lwp *, struct proc **, pid_t);
proc_t * proc_find_raw(pid_t);
proc_t * proc_find(pid_t); /* Find process by ID */
struct lwp * proc_find_lwp(proc_t *, pid_t); /* Find LWP in proc by ID */
struct pgrp * pgrp_find(pid_t); /* Find process group by ID */
struct lwp * proc_seek_lwpid(pid_t); /* Find LWP by ID only */
void proc_hide_lwpid(pid_t); /* Hide LWP ID from seekers */
void procinit(void);
void procinit_sysctl(void);
int proc_enterpgrp(struct proc *, pid_t, pid_t, bool);
@ -526,6 +525,8 @@ struct proc *proc_alloc(void);
void proc0_init(void);
pid_t proc_alloc_pid(struct proc *);
void proc_free_pid(pid_t);
pid_t proc_alloc_lwpid(struct proc *, struct lwp *);
void proc_free_lwpid(struct proc *, pid_t);
void proc_free_mem(struct proc *);
void exit_lwps(struct lwp *l);
int fork1(struct lwp *, int, int, void *, size_t,