- Replace pid_table_lock with a lockless lookup covered by pserialize, with

the "writer" side being pid_table expansion.  The basic idea is that when
  doing an LWP lookup there is usually already a lock held (p->p_lock), or a
  spin mutex that needs to be taken (l->l_mutex), and either can be used to
  get the found LWP stable and confidently determine that all is correct.

- For user processes LSLARVAL implies the same thing as LSIDL ("not visible
  by ID"), and lookup by ID in proc0 doesn't really happen.  In-tree the new
  state should be understood by top(1), the tty subsystem and so on, and
  would attract the attention of 3rd party kernel grovellers in time, so
  remove it and just rely on LSIDL.
This commit is contained in:
ad 2020-05-23 20:45:10 +00:00
parent a5899d2545
commit 20180cb18f
8 changed files with 284 additions and 218 deletions

View File

@ -1,4 +1,4 @@
/* $NetBSD: kern_lwp.c,v 1.237 2020/04/29 01:52:26 thorpej Exp $ */
/* $NetBSD: kern_lwp.c,v 1.238 2020/05/23 20:45:10 ad Exp $ */
/*-
* Copyright (c) 2001, 2006, 2007, 2008, 2009, 2019, 2020
@ -65,9 +65,15 @@
*
* LSIDL
*
* Idle: the LWP has been created but has not yet executed,
* or it has ceased executing a unit of work and is waiting
* to be started again.
* Idle: the LWP has been created but has not yet executed, or
* it has ceased executing a unit of work and is waiting to be
* started again. This state exists so that the LWP can occupy
* a slot in the process & PID table, but without having to
* worry about being touched; lookups of the LWP by ID will
* fail while in this state. The LWP will become visible for
* lookup once its state transitions further. Some special
* kernel threads also (ab)use this state to indicate that they
* are idle (soft interrupts and idle LWPs).
*
* LSSUSPENDED:
*
@ -83,16 +89,6 @@
* The LP_RUNNING flag in lwp::l_pflag indicates that an LWP is running.
* Importantly, it indicates that its state is tied to a CPU.
*
* LSLARVAL:
*
* Born, but not fully mature: the LWP is in the process
* of being constructed. This state exists so that the
* LWP can occupy a slot in the PID table, but without
* having to worry about being touched; lookups of the
* LWP will fail while in this state. The LWP will become
* visible in the PID table once its state transitions
* to LSIDL.
*
* LSZOMB:
*
* Dead or dying: the LWP has released most of its resources
@ -130,8 +126,6 @@
*
* LWPs may transition states in the following ways:
*
* LARVAL ----> IDL
*
* RUN -------> ONPROC ONPROC -----> RUN
* > SLEEP
* > STOPPED
@ -223,7 +217,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: kern_lwp.c,v 1.237 2020/04/29 01:52:26 thorpej Exp $");
__KERNEL_RCSID(0, "$NetBSD: kern_lwp.c,v 1.238 2020/05/23 20:45:10 ad Exp $");
#include "opt_ddb.h"
#include "opt_lockdebug.h"
@ -266,6 +260,7 @@ __KERNEL_RCSID(0, "$NetBSD: kern_lwp.c,v 1.237 2020/04/29 01:52:26 thorpej Exp $
static pool_cache_t lwp_cache __read_mostly;
struct lwplist alllwp __cacheline_aligned;
static int lwp_ctor(void *, void *, int);
static void lwp_dtor(void *, void *);
/* DTrace proc provider probes */
@ -347,7 +342,7 @@ lwpinit(void)
LIST_INIT(&alllwp);
lwpinit_specificdata();
lwp_cache = pool_cache_init(sizeof(lwp_t), MIN_LWP_ALIGNMENT, 0, 0,
"lwppl", NULL, IPL_NONE, NULL, lwp_dtor, NULL);
"lwppl", NULL, IPL_NONE, lwp_ctor, lwp_dtor, NULL);
maxlwp = cpu_maxlwp();
sysctl_kern_lwp_setup();
@ -376,6 +371,27 @@ lwp0_init(void)
SYSCALL_TIME_LWP_INIT(l);
}
/*
* Initialize the non-zeroed portion of an lwp_t.
*/
static int
lwp_ctor(void *arg, void *obj, int flags)
{
lwp_t *l = obj;
l->l_stat = LSIDL;
l->l_cpu = curcpu();
l->l_mutex = l->l_cpu->ci_schedstate.spc_lwplock;
l->l_ts = pool_get(&turnstile_pool, flags);
if (l->l_ts == NULL) {
return ENOMEM;
} else {
turnstile_ctor(l->l_ts);
return 0;
}
}
static void
lwp_dtor(void *arg, void *obj)
{
@ -389,13 +405,22 @@ lwp_dtor(void *arg, void *obj)
* Kernel preemption is disabled around mutex_oncpu() and rw_oncpu()
* callers, therefore cross-call to all CPUs will do the job. Also,
* the value of l->l_cpu must be still valid at this point.
*
* XXX should use epoch based reclamation.
*/
KASSERT(l->l_cpu != NULL);
xc_barrier(0);
/*
* We can't return turnstile0 to the pool (it didn't come from it),
* so if it comes up just drop it quietly and move on.
*/
if (l->l_ts != &turnstile0)
pool_put(&turnstile_pool, l->l_ts);
}
/*
* Set an suspended.
* Set an LWP suspended.
*
* Must be called with p_lock held, and the LWP locked. Will unlock the
* LWP before return.
@ -593,7 +618,7 @@ lwp_wait(struct lwp *l, lwpid_t lid, lwpid_t *departed, bool exiting)
error = 0;
/*
* If given a specific LID, go via the tree and make sure
* If given a specific LID, go via pid_table and make sure
* it's not detached.
*/
if (lid != 0) {
@ -742,7 +767,6 @@ lwp_create(lwp_t *l1, proc_t *p2, vaddr_t uaddr, int flags,
const stack_t *sigstk)
{
struct lwp *l2;
turnstile_t *ts;
KASSERT(l1 == curlwp || l1->l_proc == &proc0);
@ -778,20 +802,29 @@ lwp_create(lwp_t *l1, proc_t *p2, vaddr_t uaddr, int flags,
p2->p_zomblwp = NULL;
lwp_free(l2, true, false);
/* p2 now unlocked by lwp_free() */
ts = l2->l_ts;
KASSERT(l2->l_ts != NULL);
KASSERT(l2->l_inheritedprio == -1);
KASSERT(SLIST_EMPTY(&l2->l_pi_lenders));
memset(l2, 0, sizeof(*l2));
l2->l_ts = ts;
memset(&l2->l_startzero, 0, sizeof(*l2) -
offsetof(lwp_t, l_startzero));
} else {
mutex_exit(p2->p_lock);
l2 = pool_cache_get(lwp_cache, PR_WAITOK);
memset(l2, 0, sizeof(*l2));
ts = l2->l_ts = pool_cache_get(turnstile_cache, PR_WAITOK);
memset(&l2->l_startzero, 0, sizeof(*l2) -
offsetof(lwp_t, l_startzero));
SLIST_INIT(&l2->l_pi_lenders);
}
l2->l_stat = LSLARVAL;
/*
* Because of lockless lookup via pid_table, the LWP can be locked
* and inspected briefly even after it's freed, so a few fields are
* kept stable.
*/
KASSERT(l2->l_stat == LSIDL);
KASSERT(l2->l_cpu != NULL);
KASSERT(l2->l_ts != NULL);
KASSERT(l2->l_mutex == l2->l_cpu->ci_schedstate.spc_lwplock);
l2->l_proc = p2;
l2->l_refcnt = 0;
l2->l_class = sclass;
@ -799,7 +832,7 @@ lwp_create(lwp_t *l1, proc_t *p2, vaddr_t uaddr, int flags,
/*
* Allocate a process ID for this LWP. We need to do this now
* while we can still unwind if it fails. Beacuse we're marked
* as LARVAL, no lookups by the ID will succeed.
* as LSIDL, no lookups by the ID will succeed.
*
* N.B. this will always succeed for the first LWP in a process,
* because proc_alloc_lwpid() will usurp the slot. Also note
@ -807,9 +840,6 @@ lwp_create(lwp_t *l1, proc_t *p2, vaddr_t uaddr, int flags,
* will succeed, even if the LWP itself is not visible.
*/
if (__predict_false(proc_alloc_lwpid(p2, l2) == -1)) {
if (ts != &turnstile0)
pool_cache_put(turnstile_cache, ts);
l2->l_ts = NULL;
pool_cache_put(lwp_cache, l2);
return EAGAIN;
}
@ -857,11 +887,6 @@ lwp_create(lwp_t *l1, proc_t *p2, vaddr_t uaddr, int flags,
l2->l_flag |= LW_SYSTEM;
}
kpreempt_disable();
l2->l_mutex = l1->l_cpu->ci_schedstate.spc_lwplock;
l2->l_cpu = l1->l_cpu;
kpreempt_enable();
kdtrace_thread_ctor(NULL, l2);
lwp_initspecific(l2);
sched_lwp_fork(l1, l2);
@ -889,13 +914,6 @@ lwp_create(lwp_t *l1, proc_t *p2, vaddr_t uaddr, int flags,
uvm_lwp_fork(l1, l2, stack, stacksize, func, (arg != NULL) ? arg : l2);
mutex_enter(p2->p_lock);
/*
* This renders l2 visible in the pid table once p2->p_lock is
* released.
*/
l2->l_stat = LSIDL;
if ((flags & LWP_DETACHED) != 0) {
l2->l_prflag = LPR_DETACHED;
p2->p_ndlwps++;
@ -1226,6 +1244,31 @@ lwp_free(struct lwp *l, bool recycle, bool last)
if (p != &proc0 && p->p_nlwps != 1)
(void)chglwpcnt(kauth_cred_getuid(p->p_cred), -1);
/*
* In the unlikely event that the LWP is still on the CPU,
* then spin until it has switched away.
*/
membar_consumer();
while (__predict_false((l->l_pflag & LP_RUNNING) != 0)) {
SPINLOCK_BACKOFF_HOOK;
}
/*
* Now that the LWP's known off the CPU, reset its state back to
* LSIDL, which defeats anything that might have gotten a hold on
* the LWP via pid_table before the ID was freed. It's important
* to do this with both the LWP locked and p_lock held.
*
* Also reset the CPU and lock pointer back to curcpu(), since the
* LWP will in all likelyhood be cached with the current CPU in
* lwp_cache when we free it and later allocated from there again
* (avoid incidental lock contention).
*/
lwp_lock(l);
l->l_stat = LSIDL;
l->l_cpu = curcpu();
lwp_unlock_to(l, l->l_cpu->ci_schedstate.spc_lwplock);
/*
* If this was not the last LWP in the process, then adjust counters
* and unlock. This is done differently for the last LWP in exit1().
@ -1247,24 +1290,17 @@ lwp_free(struct lwp *l, bool recycle, bool last)
if ((l->l_prflag & LPR_DETACHED) != 0)
p->p_ndlwps--;
/* Free the LWP ID. */
proc_free_lwpid(p, l->l_lid);
/*
* Have any LWPs sleeping in lwp_wait() recheck for
* deadlock.
*/
cv_broadcast(&p->p_lwpcv);
mutex_exit(p->p_lock);
}
/*
* In the unlikely event that the LWP is still on the CPU,
* then spin until it has switched away.
*/
membar_consumer();
while (__predict_false((l->l_pflag & LP_RUNNING) != 0)) {
SPINLOCK_BACKOFF_HOOK;
/* Free the LWP ID. */
mutex_enter(proc_lock);
proc_free_lwpid(p, l->l_lid);
mutex_exit(proc_lock);
}
/*
@ -1288,18 +1324,9 @@ lwp_free(struct lwp *l, bool recycle, bool last)
}
/*
* Free the LWP's turnstile and the LWP structure itself unless the
* caller wants to recycle them. Also, free the scheduler specific
* data.
*
* We can't return turnstile0 to the pool (it didn't come from it),
* so if it comes up just drop it quietly and move on.
*
* We don't recycle the VM resources at this time.
* Free remaining data structures and the LWP itself unless the
* caller wants to recycle.
*/
if (!recycle && l->l_ts != &turnstile0)
pool_cache_put(turnstile_cache, l->l_ts);
if (l->l_name != NULL)
kmem_free(l->l_name, MAXCOMLEN);

View File

@ -1,4 +1,4 @@
/* $NetBSD: kern_proc.c,v 1.251 2020/04/29 01:52:26 thorpej Exp $ */
/* $NetBSD: kern_proc.c,v 1.252 2020/05/23 20:45:10 ad Exp $ */
/*-
* Copyright (c) 1999, 2006, 2007, 2008, 2020 The NetBSD Foundation, Inc.
@ -62,7 +62,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: kern_proc.c,v 1.251 2020/04/29 01:52:26 thorpej Exp $");
__KERNEL_RCSID(0, "$NetBSD: kern_proc.c,v 1.252 2020/05/23 20:45:10 ad Exp $");
#ifdef _KERNEL_OPT
#include "opt_kstack.h"
@ -107,6 +107,7 @@ __KERNEL_RCSID(0, "$NetBSD: kern_proc.c,v 1.251 2020/04/29 01:52:26 thorpej Exp
#include <sys/cpu.h>
#include <sys/compat_stub.h>
#include <sys/futex.h>
#include <sys/pserialize.h>
#include <uvm/uvm_extern.h>
#include <uvm/uvm.h>
@ -120,6 +121,7 @@ struct proclist zombproc __cacheline_aligned;
static kmutex_t proc_lock_s __cacheline_aligned;
kmutex_t * proc_lock __read_mostly;
static pserialize_t proc_psz;
/*
* pid to lwp/proc lookup is done by indexing the pid_table array.
@ -168,13 +170,7 @@ struct pid_table {
/*
* Table of process IDs (PIDs).
*
* Locking order:
* proc_lock -> pid_table_lock
* or
* proc::p_lock -> pid_table_lock
*/
static krwlock_t pid_table_lock __cacheline_aligned;
static struct pid_table *pid_table __read_mostly;
#define INITIAL_PID_TABLE_SIZE (1 << 5)
@ -382,7 +378,7 @@ procinit(void)
mutex_init(&proc_lock_s, MUTEX_DEFAULT, IPL_NONE);
proc_lock = &proc_lock_s;
rw_init(&pid_table_lock);
proc_psz = pserialize_create();
pid_table = kmem_alloc(INITIAL_PID_TABLE_SIZE
* sizeof(struct pid_table), KM_SLEEP);
@ -406,10 +402,10 @@ procinit(void)
#undef LINK_EMPTY
/* Reserve PID 1 for init(8). */ /* XXX slightly gross */
rw_enter(&pid_table_lock, RW_WRITER);
mutex_enter(proc_lock);
if (proc_alloc_pid_slot(&proc0, PT_SET_RESERVED) != 1)
panic("failed to reserve PID 1 for init(8)");
rw_exit(&pid_table_lock);
mutex_exit(proc_lock);
proc_specificdata_domain = specificdata_domain_create();
KASSERT(proc_specificdata_domain != NULL);
@ -585,13 +581,12 @@ proc_sesshold(struct session *ss)
ss->s_count++;
}
static void
proc_sessrele_pid_table_write_locked(struct session *ss)
void
proc_sessrele(struct session *ss)
{
struct pgrp *pg;
KASSERT(mutex_owned(proc_lock));
KASSERT(rw_write_held(&pid_table_lock));
KASSERT(ss->s_count > 0);
/*
@ -606,7 +601,6 @@ proc_sessrele_pid_table_write_locked(struct session *ss)
ss = NULL;
}
rw_exit(&pid_table_lock);
mutex_exit(proc_lock);
if (pg)
@ -615,13 +609,6 @@ proc_sessrele_pid_table_write_locked(struct session *ss)
kmem_free(ss, sizeof(struct session));
}
void
proc_sessrele(struct session *ss)
{
rw_enter(&pid_table_lock, RW_WRITER);
proc_sessrele_pid_table_write_locked(ss);
}
/*
* Check that the specified process group is in the session of the
* specified process.
@ -676,7 +663,7 @@ p_inferior(struct proc *p, struct proc *q)
* proc_find_lwp: locate an lwp in said proc by the ID.
*
* => Must be called with p::p_lock held.
* => LARVAL lwps are not returned because they are only partially
* => LSIDL lwps are not returned because they are only partially
* constructed while occupying the slot.
* => Callers need to be careful about lwp::l_stat of the returned
* lwp.
@ -687,20 +674,104 @@ proc_find_lwp(proc_t *p, pid_t pid)
struct pid_table *pt;
struct lwp *l = NULL;
uintptr_t slot;
int s;
KASSERT(mutex_owned(p->p_lock));
rw_enter(&pid_table_lock, RW_READER);
/*
* Look in the pid_table. This is done unlocked inside a pserialize
* read section covering pid_table's memory allocation only, so take
* care to read the slot atomically and only once. This issues a
* memory barrier for dependent loads on alpha.
*/
s = pserialize_read_enter();
pt = &pid_table[pid & pid_tbl_mask];
slot = pt->pt_slot;
if (__predict_true(PT_IS_LWP(slot) && pt->pt_pid == pid)) {
l = PT_GET_LWP(slot);
if (__predict_false(l->l_proc != p || l->l_stat == LSLARVAL)) {
l = NULL;
}
slot = atomic_load_consume(&pt->pt_slot);
if (__predict_false(!PT_IS_LWP(slot))) {
pserialize_read_exit(s);
return NULL;
}
rw_exit(&pid_table_lock);
/*
* Check to see if the LWP is from the correct process. We won't
* see entries in pid_table from a prior process that also used "p",
* by virtue of the fact that allocating "p" means all prior updates
* to dependant data structures are visible to this thread.
*/
l = PT_GET_LWP(slot);
if (__predict_false(atomic_load_relaxed(&l->l_proc) != p)) {
pserialize_read_exit(s);
return NULL;
}
/*
* We now know that p->p_lock holds this LWP stable.
*
* If the status is not LSIDL, it means the LWP is intended to be
* findable by LID and l_lid cannot change behind us.
*
* No need to acquire the LWP's lock to check for LSIDL, as
* p->p_lock must be held to transition in and out of LSIDL.
* Any other observed state of is no particular interest.
*/
pserialize_read_exit(s);
return l->l_stat != LSIDL && l->l_lid == pid ? l : NULL;
}
/*
* proc_find_lwp_unlocked: locate an lwp in said proc by the ID.
*
* => Called in a pserialize read section with no locks held.
* => LSIDL lwps are not returned because they are only partially
* constructed while occupying the slot.
* => Callers need to be careful about lwp::l_stat of the returned
* lwp.
* => If an LWP is found, it's returned locked.
*/
struct lwp *
proc_find_lwp_unlocked(proc_t *p, pid_t pid)
{
struct pid_table *pt;
struct lwp *l = NULL;
uintptr_t slot;
KASSERT(pserialize_in_read_section());
/*
* Look in the pid_table. This is done unlocked inside a pserialize
* read section covering pid_table's memory allocation only, so take
* care to read the slot atomically and only once. This issues a
* memory barrier for dependent loads on alpha.
*/
pt = &pid_table[pid & pid_tbl_mask];
slot = atomic_load_consume(&pt->pt_slot);
if (__predict_false(!PT_IS_LWP(slot))) {
return NULL;
}
/*
* Lock the LWP we found to get it stable. If it's embryonic or
* reaped (LSIDL) then none of the other fields can safely be
* checked.
*/
l = PT_GET_LWP(slot);
lwp_lock(l);
if (__predict_false(l->l_stat == LSIDL)) {
lwp_unlock(l);
return NULL;
}
/*
* l_proc and l_lid are now known stable because the LWP is not
* LSIDL, so check those fields too to make sure we found the
* right thing.
*/
if (__predict_false(l->l_proc != p || l->l_lid != pid)) {
lwp_unlock(l);
return NULL;
}
/* Everything checks out, return it locked. */
return l;
}
@ -723,29 +794,18 @@ proc_find_lwp_acquire_proc(pid_t pid, struct proc **pp)
KASSERT(pp != NULL);
mutex_enter(proc_lock);
rw_enter(&pid_table_lock, RW_READER);
pt = &pid_table[pid & pid_tbl_mask];
slot = pt->pt_slot;
if (__predict_true(PT_IS_LWP(slot) && pt->pt_pid == pid)) {
/*
* Locking order is p::p_lock -> pid_table_lock, but
* we're already holding pid_table_lock; we need to
* release it before acquiring p::p_lock. This is
* safe because p will be stable by virtue of holding
* proc_lock.
*/
l = PT_GET_LWP(slot);
p = l->l_proc;
rw_exit(&pid_table_lock);
mutex_enter(p->p_lock);
if (__predict_false(l->l_stat == LSLARVAL)) {
if (__predict_false(l->l_stat == LSIDL)) {
mutex_exit(p->p_lock);
l = NULL;
p = NULL;
}
} else {
rw_exit(&pid_table_lock);
}
mutex_exit(proc_lock);
@ -757,8 +817,7 @@ proc_find_lwp_acquire_proc(pid_t pid, struct proc **pp)
/*
* proc_find_raw_pid_table_locked: locate a process by the ID.
*
* => Must be called with proc_lock held and the pid_table_lock
* at least held for reading.
* => Must be called with proc_lock held.
*/
static proc_t *
proc_find_raw_pid_table_locked(pid_t pid, bool any_lwpid)
@ -767,7 +826,7 @@ proc_find_raw_pid_table_locked(pid_t pid, bool any_lwpid)
proc_t *p = NULL;
uintptr_t slot;
KASSERT(mutex_owned(proc_lock));
/* No - used by DDB. KASSERT(mutex_owned(proc_lock)); */
pt = &pid_table[pid & pid_tbl_mask];
slot = pt->pt_slot;
@ -777,7 +836,7 @@ proc_find_raw_pid_table_locked(pid_t pid, bool any_lwpid)
* on the PID assigned to the proc, not just one of
* its LWPs.
*
* N.B. We require lwp::l_proc of LARVAL LWPs to be
* N.B. We require lwp::l_proc of LSIDL LWPs to be
* valid here.
*/
p = PT_GET_LWP(slot)->l_proc;
@ -792,15 +851,12 @@ proc_find_raw_pid_table_locked(pid_t pid, bool any_lwpid)
proc_t *
proc_find_raw(pid_t pid)
{
KASSERT(mutex_owned(proc_lock));
rw_enter(&pid_table_lock, RW_READER);
proc_t *p = proc_find_raw_pid_table_locked(pid, false);
rw_exit(&pid_table_lock);
return p;
return proc_find_raw_pid_table_locked(pid, false);
}
static proc_t *
proc_find_pid_table_locked(pid_t pid, bool any_lwpid)
proc_find_internal(pid_t pid, bool any_lwpid)
{
proc_t *p;
@ -821,16 +877,6 @@ proc_find_pid_table_locked(pid_t pid, bool any_lwpid)
return NULL;
}
static proc_t *
proc_find_internal(pid_t pid, bool any_lwpid)
{
KASSERT(mutex_owned(proc_lock));
rw_enter(&pid_table_lock, RW_READER);
proc_t *p = proc_find_pid_table_locked(pid, any_lwpid);
rw_exit(&pid_table_lock);
return p;
}
proc_t *
proc_find(pid_t pid)
{
@ -844,13 +890,12 @@ proc_find_lwpid(pid_t pid)
}
/*
* pgrp_find_pid_table_locked: locate a process group by the ID.
* pgrp_find: locate a process group by the ID.
*
* => Must be called with proc_lock held and the pid_table_lock
* held at least for reading.
* => Must be called with proc_lock held.
*/
static struct pgrp *
pgrp_find_pid_table_locked(pid_t pgid)
struct pgrp *
pgrp_find(pid_t pgid)
{
struct pgrp *pg;
@ -868,16 +913,6 @@ pgrp_find_pid_table_locked(pid_t pgid)
return pg;
}
struct pgrp *
pgrp_find(pid_t pgid)
{
KASSERT(mutex_owned(proc_lock));
rw_enter(&pid_table_lock, RW_READER);
struct pgrp *pg = pgrp_find_pid_table_locked(pgid);
rw_exit(&pid_table_lock);
return pg;
}
static void
expand_pid_table(void)
{
@ -889,11 +924,11 @@ expand_pid_table(void)
u_int i;
uint new_pt_mask;
KASSERT(rw_write_held(&pid_table_lock));
KASSERT(mutex_owned(proc_lock));
/* Unlock the pid_table briefly to allocate memory. */
pt_size = pid_tbl_mask + 1;
rw_exit(&pid_table_lock);
mutex_exit(proc_lock);
tsz = pt_size * 2 * sizeof(struct pid_table);
new_pt = kmem_alloc(tsz, KM_SLEEP);
@ -902,10 +937,10 @@ expand_pid_table(void)
/* XXX For now. The pratical limit is much lower anyway. */
KASSERT(new_pt_mask <= FUTEX_TID_MASK);
rw_enter(&pid_table_lock, RW_WRITER);
mutex_enter(proc_lock);
if (pt_size != pid_tbl_mask + 1) {
/* Another process beat us to it... */
rw_exit(&pid_table_lock);
mutex_exit(proc_lock);
kmem_free(new_pt, tsz);
goto out;
}
@ -971,11 +1006,17 @@ expand_pid_table(void)
} else
pid_alloc_lim <<= 1; /* doubles number of free slots... */
rw_exit(&pid_table_lock);
mutex_exit(proc_lock);
/*
* Make sure that unlocked access to the old pid_table is complete
* and then free it.
*/
pserialize_perform(proc_psz);
kmem_free(n_pt, tsz);
out: /* Return with the pid_table_lock held again. */
rw_enter(&pid_table_lock, RW_WRITER);
out: /* Return with proc_lock held again. */
mutex_enter(proc_lock);
}
struct proc *
@ -1013,7 +1054,7 @@ proc_alloc_pid_slot(struct proc *p, uintptr_t slot)
pid_t pid;
int nxt;
KASSERT(rw_write_held(&pid_table_lock));
KASSERT(mutex_owned(proc_lock));
for (;;expand_pid_table()) {
if (__predict_false(pid_alloc_cnt >= pid_alloc_lim)) {
@ -1071,12 +1112,13 @@ proc_alloc_pid(struct proc *p)
pid_t pid;
KASSERT((((uintptr_t)p) & PT_F_ALLBITS) == 0);
KASSERT(p->p_stat == SIDL);
rw_enter(&pid_table_lock, RW_WRITER);
mutex_enter(proc_lock);
pid = proc_alloc_pid_slot(p, PT_SET_PROC(p));
if (pid != -1)
p->p_pid = pid;
rw_exit(&pid_table_lock);
mutex_exit(proc_lock);
return pid;
}
@ -1088,6 +1130,17 @@ proc_alloc_lwpid(struct proc *p, struct lwp *l)
pid_t pid;
KASSERT((((uintptr_t)l) & PT_F_ALLBITS) == 0);
KASSERT(l->l_proc == p);
KASSERT(l->l_stat == LSIDL);
/*
* For unlocked lookup in proc_find_lwp(), make sure l->l_proc
* is globally visible before the LWP becomes visible via the
* pid_table.
*/
#ifndef __HAVE_ATOMIC_AS_MEMBAR
membar_producer();
#endif
/*
* If the slot for p->p_pid currently points to the proc,
@ -1096,7 +1149,7 @@ proc_alloc_lwpid(struct proc *p, struct lwp *l)
* happen again if the first LWP for a process exits and
* before the process creates another.
*/
rw_enter(&pid_table_lock, RW_WRITER);
mutex_enter(proc_lock);
pid = p->p_pid;
pt = &pid_table[pid & pid_tbl_mask];
KASSERT(pt->pt_pid == pid);
@ -1110,7 +1163,7 @@ proc_alloc_lwpid(struct proc *p, struct lwp *l)
if (pid != -1)
l->l_lid = pid;
}
rw_exit(&pid_table_lock);
mutex_exit(proc_lock);
return pid;
}
@ -1120,7 +1173,6 @@ proc_free_pid_internal(pid_t pid, uintptr_t type __diagused)
{
struct pid_table *pt;
rw_enter(&pid_table_lock, RW_WRITER);
pt = &pid_table[pid & pid_tbl_mask];
KASSERT(PT_GET_TYPE(pt->pt_slot) == type);
@ -1139,7 +1191,6 @@ proc_free_pid_internal(pid_t pid, uintptr_t type __diagused)
last_free_pt = pid;
pid_alloc_cnt--;
}
rw_exit(&pid_table_lock);
}
/*
@ -1165,12 +1216,11 @@ void
proc_free_lwpid(struct proc *p, pid_t pid)
{
KASSERT(mutex_owned(p->p_lock));
KASSERT(mutex_owned(proc_lock));
if (__predict_true(p->p_pid == pid)) {
struct pid_table *pt;
rw_enter(&pid_table_lock, RW_WRITER);
pt = &pid_table[pid & pid_tbl_mask];
KASSERT(pt->pt_pid == pid);
@ -1178,8 +1228,6 @@ proc_free_lwpid(struct proc *p, pid_t pid)
KASSERT(PT_GET_LWP(pt->pt_slot)->l_proc == p);
pt->pt_slot = PT_SET_PROC(p);
rw_exit(&pid_table_lock);
return;
}
proc_free_pid_internal(pid, PT_F_LWP);
@ -1213,19 +1261,11 @@ proc_enterpgrp(struct proc *curp, pid_t pid, pid_t pgid, bool mksess)
int rval;
pid_t pg_id = NO_PGID;
sess = mksess ? kmem_alloc(sizeof(*sess), KM_SLEEP) : NULL;
/* Allocate data areas we might need before doing any validity checks */
rw_enter(&pid_table_lock, RW_READER);/* Because pid_table might change */
if (pid_table[pgid & pid_tbl_mask].pt_pgrp == 0) {
rw_exit(&pid_table_lock);
new_pgrp = kmem_alloc(sizeof(*new_pgrp), KM_SLEEP);
} else {
rw_exit(&pid_table_lock);
new_pgrp = NULL;
}
sess = mksess ? kmem_alloc(sizeof(*sess), KM_SLEEP) : NULL;
new_pgrp = kmem_alloc(sizeof(*new_pgrp), KM_SLEEP);
mutex_enter(proc_lock);
rw_enter(&pid_table_lock, RW_WRITER);
rval = EPERM; /* most common error (to save typing) */
/* Check pgrp exists or can be created */
@ -1236,7 +1276,7 @@ proc_enterpgrp(struct proc *curp, pid_t pid, pid_t pgid, bool mksess)
/* Can only set another process under restricted circumstances. */
if (pid != curp->p_pid) {
/* Must exist and be one of our children... */
p = proc_find_pid_table_locked(pid, false);
p = proc_find_internal(pid, false);
if (p == NULL || !p_inferior(p, curp)) {
rval = ESRCH;
goto done;
@ -1255,7 +1295,7 @@ proc_enterpgrp(struct proc *curp, pid_t pid, pid_t pgid, bool mksess)
} else {
/* ... setsid() cannot re-enter a pgrp */
if (mksess && (curp->p_pgid == curp->p_pid ||
pgrp_find_pid_table_locked(curp->p_pid)))
pgrp_find(curp->p_pid)))
goto done;
p = curp;
}
@ -1349,7 +1389,6 @@ proc_enterpgrp(struct proc *curp, pid_t pid, pid_t pgid, bool mksess)
mutex_spin_exit(&tty_lock);
done:
rw_exit(&pid_table_lock);
if (pg_id != NO_PGID) {
/* Releases proc_lock. */
pg_delete(pg_id);
@ -1406,7 +1445,6 @@ pg_remove(pid_t pg_id)
struct pid_table *pt;
KASSERT(mutex_owned(proc_lock));
KASSERT(rw_write_held(&pid_table_lock));
pt = &pid_table[pg_id & pid_tbl_mask];
pgrp = pt->pt_pgrp;
@ -1443,10 +1481,8 @@ pg_delete(pid_t pg_id)
KASSERT(mutex_owned(proc_lock));
rw_enter(&pid_table_lock, RW_WRITER);
pg = pid_table[pg_id & pid_tbl_mask].pt_pgrp;
if (pg == NULL || pg->pg_id != pg_id || !LIST_EMPTY(&pg->pg_members)) {
rw_exit(&pid_table_lock);
mutex_exit(proc_lock);
return;
}
@ -1463,12 +1499,11 @@ pg_delete(pid_t pg_id)
mutex_spin_exit(&tty_lock);
/*
* The leading process group in a session is freed by
* proc_sessrele_pid_table_write_locked(), if last
* reference. It will also release the locks.
* The leading process group in a session is freed by proc_sessrele(),
* if last reference. It will also release the locks.
*/
pg = (ss->s_sid != pg->pg_id) ? pg_remove(pg_id) : NULL;
proc_sessrele_pid_table_write_locked(ss);
proc_sessrele(ss);
if (pg != NULL) {
/* Free it, if was not done above. */

View File

@ -1,4 +1,4 @@
/* $NetBSD: kern_turnstile.c,v 1.39 2020/04/19 20:35:29 ad Exp $ */
/* $NetBSD: kern_turnstile.c,v 1.40 2020/05/23 20:45:10 ad Exp $ */
/*-
* Copyright (c) 2002, 2006, 2007, 2009, 2019, 2020
@ -61,7 +61,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: kern_turnstile.c,v 1.39 2020/04/19 20:35:29 ad Exp $");
__KERNEL_RCSID(0, "$NetBSD: kern_turnstile.c,v 1.40 2020/05/23 20:45:10 ad Exp $");
#include <sys/param.h>
#include <sys/lockdebug.h>
@ -80,7 +80,7 @@ __KERNEL_RCSID(0, "$NetBSD: kern_turnstile.c,v 1.39 2020/04/19 20:35:29 ad Exp $
#define TS_HASH(obj) (((uintptr_t)(obj) >> 6) & TS_HASH_MASK)
static tschain_t turnstile_chains[TS_HASH_SIZE] __cacheline_aligned;
pool_cache_t turnstile_cache __read_mostly;
struct pool turnstile_pool;
extern turnstile_t turnstile0;
static union {
@ -88,8 +88,6 @@ static union {
uint8_t pad[COHERENCY_UNIT];
} turnstile_locks[TS_HASH_SIZE] __cacheline_aligned;
static int turnstile_ctor(void *, void *, int);
/*
* turnstile_init:
*
@ -105,11 +103,10 @@ turnstile_init(void)
mutex_init(&turnstile_locks[i].lock, MUTEX_DEFAULT, IPL_SCHED);
}
turnstile_cache = pool_cache_init(sizeof(turnstile_t), coherency_unit,
0, 0, "tstile", NULL, IPL_NONE, turnstile_ctor, NULL, NULL);
KASSERT(turnstile_cache != NULL);
pool_init(&turnstile_pool, sizeof(turnstile_t), coherency_unit,
0, 0, "tstile", NULL, IPL_NONE);
(void)turnstile_ctor(NULL, &turnstile0, 0);
turnstile_ctor(&turnstile0);
}
/*
@ -117,15 +114,13 @@ turnstile_init(void)
*
* Constructor for turnstiles.
*/
static int
turnstile_ctor(void *arg, void *obj, int flags)
void
turnstile_ctor(turnstile_t *ts)
{
turnstile_t *ts = obj;
memset(ts, 0, sizeof(*ts));
sleepq_init(&ts->ts_sleepq[TS_READER_Q]);
sleepq_init(&ts->ts_sleepq[TS_WRITER_Q]);
return (0);
}
/*

View File

@ -1,4 +1,4 @@
/* $NetBSD: sys_lwp.c,v 1.80 2020/05/05 22:12:06 ad Exp $ */
/* $NetBSD: sys_lwp.c,v 1.81 2020/05/23 20:45:10 ad Exp $ */
/*-
* Copyright (c) 2001, 2006, 2007, 2008, 2019, 2020 The NetBSD Foundation, Inc.
@ -35,7 +35,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: sys_lwp.c,v 1.80 2020/05/05 22:12:06 ad Exp $");
__KERNEL_RCSID(0, "$NetBSD: sys_lwp.c,v 1.81 2020/05/23 20:45:10 ad Exp $");
#include <sys/param.h>
#include <sys/systm.h>
@ -49,6 +49,7 @@ __KERNEL_RCSID(0, "$NetBSD: sys_lwp.c,v 1.80 2020/05/05 22:12:06 ad Exp $");
#include <sys/sleepq.h>
#include <sys/lwpctl.h>
#include <sys/cpu.h>
#include <sys/pserialize.h>
#include <uvm/uvm_extern.h>
@ -458,22 +459,23 @@ int
lwp_unpark(const lwpid_t *tp, const u_int ntargets)
{
u_int target;
int error;
int error, s;
proc_t *p;
lwp_t *t;
p = curproc;
error = 0;
mutex_enter(p->p_lock);
s = pserialize_read_enter();
for (target = 0; target < ntargets; target++) {
t = proc_find_lwp(p, tp[target]);
t = proc_find_lwp_unlocked(p, tp[target]);
if (__predict_false(t == NULL)) {
error = ESRCH;
continue;
}
lwp_lock(t);
KASSERT(lwp_locked(t, NULL));
if (__predict_true(t->l_syncobj == &lwp_park_syncobj)) {
/*
* As expected it's parked, so wake it up.
@ -496,7 +498,7 @@ lwp_unpark(const lwpid_t *tp, const u_int ntargets)
lwp_unlock(t);
}
}
mutex_exit(p->p_lock);
pserialize_read_exit(s);
return error;
}

View File

@ -1,4 +1,4 @@
/* $NetBSD: lwproc.c,v 1.48 2020/04/25 15:42:15 bouyer Exp $ */
/* $NetBSD: lwproc.c,v 1.49 2020/05/23 20:45:11 ad Exp $ */
/*
* Copyright (c) 2010, 2011 Antti Kantee. All Rights Reserved.
@ -28,7 +28,7 @@
#define RUMP__CURLWP_PRIVATE
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: lwproc.c,v 1.48 2020/04/25 15:42:15 bouyer Exp $");
__KERNEL_RCSID(0, "$NetBSD: lwproc.c,v 1.49 2020/05/23 20:45:11 ad Exp $");
#include <sys/param.h>
#include <sys/atomic.h>
@ -310,8 +310,6 @@ lwproc_freelwp(struct lwp *l)
KASSERT(l->l_flag & LW_WEXIT);
KASSERT(l->l_refcnt == 0);
/* ok, zero references, continue with nuke */
proc_free_lwpid(p, l->l_lid);
LIST_REMOVE(l, l_sibling);
KASSERT(p->p_nlwps >= 1);
if (--p->p_nlwps == 0) {
@ -322,9 +320,11 @@ lwproc_freelwp(struct lwp *l)
}
cv_broadcast(&p->p_lwpcv); /* nobody sleeps on this in a rump kernel? */
kauth_cred_free(l->l_cred);
l->l_stat = LSIDL;
mutex_exit(p->p_lock);
mutex_enter(proc_lock);
proc_free_lwpid(p, l->l_lid);
LIST_REMOVE(l, l_list);
mutex_exit(proc_lock);
@ -361,6 +361,8 @@ lwproc_makelwp(struct proc *p, struct lwp *l, bool doswitch, bool procmake)
l->l_refcnt = 1;
l->l_proc = p;
l->l_stat = LSIDL;
l->l_mutex = &unruntime_lock;
proc_alloc_lwpid(p, l);
LIST_INSERT_HEAD(&p->p_lwps, l, l_sibling);
@ -369,7 +371,6 @@ lwproc_makelwp(struct proc *p, struct lwp *l, bool doswitch, bool procmake)
l->l_cpu = &rump_bootcpu;
l->l_target_cpu = &rump_bootcpu; /* Initial target CPU always same */
l->l_stat = LSRUN;
l->l_mutex = &unruntime_lock;
TAILQ_INIT(&l->l_ld_locks);
mutex_exit(p->p_lock);
@ -520,6 +521,7 @@ rump_lwproc_switch(struct lwp *newlwp)
l->l_stat = LSRUN;
if (l->l_flag & LW_WEXIT) {
l->l_stat = LSIDL;
lwproc_freelwp(l);
}
}

View File

@ -1,4 +1,4 @@
/* $NetBSD: lwp.h,v 1.209 2020/04/29 01:52:26 thorpej Exp $ */
/* $NetBSD: lwp.h,v 1.210 2020/05/23 20:45:11 ad Exp $ */
/*
* Copyright (c) 2001, 2006, 2007, 2008, 2009, 2010, 2019, 2020
@ -83,14 +83,20 @@ struct lockdebug;
struct sysent;
struct lwp {
/* Must not be zeroed on free. */
struct cpu_info *volatile l_cpu;/* s: CPU we're on if LSONPROC */
kmutex_t * volatile l_mutex; /* l: ptr to mutex on sched state */
struct turnstile *l_ts; /* l: current turnstile */
int l_stat; /* l: overall LWP status */
int l__reserved; /* : padding - reuse as needed */
/* Scheduling and overall state. */
#define l_startzero l_runq
TAILQ_ENTRY(lwp) l_runq; /* s: run queue */
union {
void * info; /* s: scheduler-specific structure */
u_int timeslice; /* l: time-quantum for SCHED_M2 */
} l_sched;
struct cpu_info *volatile l_cpu;/* s: CPU we're on if LSONPROC */
kmutex_t * volatile l_mutex; /* l: ptr to mutex on sched state */
void *l_addr; /* l: PCB address; use lwp_getpcb() */
struct mdlwp l_md; /* l: machine-dependent fields. */
struct bintime l_rtime; /* l: real time */
@ -102,8 +108,7 @@ struct lwp {
u_int l_slpticks; /* l: Saved start time of sleep */
u_int l_slpticksum; /* l: Sum of ticks spent sleeping */
int l_biglocks; /* l: biglock count before sleep */
short l_stat; /* l: overall LWP status */
short l_class; /* l: scheduling class */
int l_class; /* l: scheduling class */
int l_kpriority; /* !: has kernel priority boost */
pri_t l_kpribase; /* !: kernel priority base level */
pri_t l_priority; /* l: scheduler priority */
@ -124,7 +129,6 @@ struct lwp {
kcpuset_t *l_affinity; /* l: CPU set for affinity */
/* Synchronisation. */
struct turnstile *l_ts; /* l: current turnstile */
struct syncobj *l_syncobj; /* l: sync object operations set */
LIST_ENTRY(lwp) l_sleepchain; /* l: sleep queue */
wchan_t l_wchan; /* l: sleep address */
@ -311,7 +315,6 @@ extern int maxlwp __read_mostly; /* max number of lwps */
*
* These values are set in stone and must not be reused with future changes.
*/
#define LSLARVAL 0 /* in pid table, but partially constructed */
#define LSIDL 1 /* Process being created by fork. */
#define LSRUN 2 /* Currently runnable. */
#define LSSLEEP 3 /* Sleeping on an address. */

View File

@ -1,4 +1,4 @@
/* $NetBSD: proc.h,v 1.365 2020/05/07 20:02:34 kamil Exp $ */
/* $NetBSD: proc.h,v 1.366 2020/05/23 20:45:11 ad Exp $ */
/*-
* Copyright (c) 2006, 2007, 2008, 2020 The NetBSD Foundation, Inc.
@ -500,6 +500,7 @@ proc_t * proc_find_raw(pid_t);
proc_t * proc_find(pid_t); /* Find process by ID */
proc_t * proc_find_lwpid(pid_t); /* Find process by LWP ID */
struct lwp * proc_find_lwp(proc_t *, pid_t); /* Find LWP in proc by ID */
struct lwp * proc_find_lwp_unlocked(proc_t *, pid_t);
/* Find LWP, acquire proc */
struct lwp * proc_find_lwp_acquire_proc(pid_t, proc_t **);
struct pgrp * pgrp_find(pid_t); /* Find process group by ID */

View File

@ -1,4 +1,4 @@
/* $NetBSD: sleepq.h,v 1.30 2020/05/08 03:26:51 thorpej Exp $ */
/* $NetBSD: sleepq.h,v 1.31 2020/05/23 20:45:11 ad Exp $ */
/*-
* Copyright (c) 2002, 2006, 2007, 2008, 2009, 2019, 2020
@ -179,6 +179,7 @@ typedef struct tschain tschain_t;
void turnstile_init(void);
turnstile_t *turnstile_lookup(wchan_t);
void turnstile_ctor(turnstile_t *);
void turnstile_exit(wchan_t);
void turnstile_block(turnstile_t *, int, wchan_t, syncobj_t *);
void turnstile_wakeup(turnstile_t *, int, int, lwp_t *);
@ -187,7 +188,7 @@ void turnstile_print(volatile void *, void (*)(const char *, ...)
void turnstile_unsleep(lwp_t *, bool);
void turnstile_changepri(lwp_t *, pri_t);
extern pool_cache_t turnstile_cache;
extern struct pool turnstile_pool;
extern turnstile_t turnstile0;
#endif /* _KERNEL */