20180cb18f
the "writer" side being pid_table expansion. The basic idea is that when doing an LWP lookup there is usually already a lock held (p->p_lock), or a spin mutex that needs to be taken (l->l_mutex), and either can be used to get the found LWP stable and confidently determine that all is correct. - For user processes LSLARVAL implies the same thing as LSIDL ("not visible by ID"), and lookup by ID in proc0 doesn't really happen. In-tree the new state should be understood by top(1), the tty subsystem and so on, and would attract the attention of 3rd party kernel grovellers in time, so remove it and just rely on LSIDL.
567 lines
14 KiB
C
567 lines
14 KiB
C
/* $NetBSD: kern_turnstile.c,v 1.40 2020/05/23 20:45:10 ad Exp $ */
|
|
|
|
/*-
|
|
* Copyright (c) 2002, 2006, 2007, 2009, 2019, 2020
|
|
* The NetBSD Foundation, Inc.
|
|
* All rights reserved.
|
|
*
|
|
* This code is derived from software contributed to The NetBSD Foundation
|
|
* by Jason R. Thorpe and Andrew Doran.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
|
|
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
|
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
|
|
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
* POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
/*
|
|
* Turnstiles are described in detail in:
|
|
*
|
|
* Solaris Internals: Core Kernel Architecture, Jim Mauro and
|
|
* Richard McDougall.
|
|
*
|
|
* Turnstiles are kept in a hash table. There are likely to be many more
|
|
* synchronisation objects than there are threads. Since a thread can block
|
|
* on only one lock at a time, we only need one turnstile per thread, and
|
|
* so they are allocated at thread creation time.
|
|
*
|
|
* When a thread decides it needs to block on a lock, it looks up the
|
|
* active turnstile for that lock. If no active turnstile exists, then
|
|
* the process lends its turnstile to the lock. If there is already an
|
|
* active turnstile for the lock, the thread places its turnstile on a
|
|
* list of free turnstiles, and references the active one instead.
|
|
*
|
|
* The act of looking up the turnstile acquires an interlock on the sleep
|
|
* queue. If a thread decides it doesn't need to block after all, then this
|
|
* interlock must be released by explicitly aborting the turnstile
|
|
* operation.
|
|
*
|
|
* When a thread is awakened, it needs to get its turnstile back. If there
|
|
* are still other threads waiting in the active turnstile, the thread
|
|
* grabs a free turnstile off the free list. Otherwise, it can take back
|
|
* the active turnstile from the lock (thus deactivating the turnstile).
|
|
*
|
|
* Turnstiles are where we do priority inheritence.
|
|
*/
|
|
|
|
#include <sys/cdefs.h>
|
|
__KERNEL_RCSID(0, "$NetBSD: kern_turnstile.c,v 1.40 2020/05/23 20:45:10 ad Exp $");
|
|
|
|
#include <sys/param.h>
|
|
#include <sys/lockdebug.h>
|
|
#include <sys/pool.h>
|
|
#include <sys/proc.h>
|
|
#include <sys/sleepq.h>
|
|
#include <sys/systm.h>
|
|
|
|
/*
|
|
* Shift of 6 aligns to typical cache line size of 64 bytes; there's no
|
|
* point having two turnstile locks to back two lock objects that share one
|
|
* cache line.
|
|
*/
|
|
#define TS_HASH_SIZE 128
|
|
#define TS_HASH_MASK (TS_HASH_SIZE - 1)
|
|
#define TS_HASH(obj) (((uintptr_t)(obj) >> 6) & TS_HASH_MASK)
|
|
|
|
static tschain_t turnstile_chains[TS_HASH_SIZE] __cacheline_aligned;
|
|
struct pool turnstile_pool;
|
|
extern turnstile_t turnstile0;
|
|
|
|
static union {
|
|
kmutex_t lock;
|
|
uint8_t pad[COHERENCY_UNIT];
|
|
} turnstile_locks[TS_HASH_SIZE] __cacheline_aligned;
|
|
|
|
/*
|
|
* turnstile_init:
|
|
*
|
|
* Initialize the turnstile mechanism.
|
|
*/
|
|
void
|
|
turnstile_init(void)
|
|
{
|
|
int i;
|
|
|
|
for (i = 0; i < TS_HASH_SIZE; i++) {
|
|
LIST_INIT(&turnstile_chains[i]);
|
|
mutex_init(&turnstile_locks[i].lock, MUTEX_DEFAULT, IPL_SCHED);
|
|
}
|
|
|
|
pool_init(&turnstile_pool, sizeof(turnstile_t), coherency_unit,
|
|
0, 0, "tstile", NULL, IPL_NONE);
|
|
|
|
turnstile_ctor(&turnstile0);
|
|
}
|
|
|
|
/*
|
|
* turnstile_ctor:
|
|
*
|
|
* Constructor for turnstiles.
|
|
*/
|
|
void
|
|
turnstile_ctor(turnstile_t *ts)
|
|
{
|
|
|
|
memset(ts, 0, sizeof(*ts));
|
|
sleepq_init(&ts->ts_sleepq[TS_READER_Q]);
|
|
sleepq_init(&ts->ts_sleepq[TS_WRITER_Q]);
|
|
}
|
|
|
|
/*
|
|
* turnstile_remove:
|
|
*
|
|
* Remove an LWP from a turnstile sleep queue and wake it.
|
|
*/
|
|
static inline void
|
|
turnstile_remove(turnstile_t *ts, lwp_t *l, int q)
|
|
{
|
|
turnstile_t *nts;
|
|
|
|
KASSERT(l->l_ts == ts);
|
|
|
|
/*
|
|
* This process is no longer using the active turnstile.
|
|
* Find an inactive one on the free list to give to it.
|
|
*/
|
|
if ((nts = ts->ts_free) != NULL) {
|
|
KASSERT(TS_ALL_WAITERS(ts) > 1);
|
|
l->l_ts = nts;
|
|
ts->ts_free = nts->ts_free;
|
|
nts->ts_free = NULL;
|
|
} else {
|
|
/*
|
|
* If the free list is empty, this is the last
|
|
* waiter.
|
|
*/
|
|
KASSERT(TS_ALL_WAITERS(ts) == 1);
|
|
LIST_REMOVE(ts, ts_chain);
|
|
}
|
|
|
|
ts->ts_waiters[q]--;
|
|
sleepq_remove(&ts->ts_sleepq[q], l);
|
|
}
|
|
|
|
/*
|
|
* turnstile_lookup:
|
|
*
|
|
* Look up the turnstile for the specified lock. This acquires and
|
|
* holds the turnstile chain lock (sleep queue interlock).
|
|
*/
|
|
turnstile_t *
|
|
turnstile_lookup(wchan_t obj)
|
|
{
|
|
turnstile_t *ts;
|
|
tschain_t *tc;
|
|
u_int hash;
|
|
|
|
hash = TS_HASH(obj);
|
|
tc = &turnstile_chains[hash];
|
|
mutex_spin_enter(&turnstile_locks[hash].lock);
|
|
|
|
LIST_FOREACH(ts, tc, ts_chain)
|
|
if (ts->ts_obj == obj)
|
|
return (ts);
|
|
|
|
/*
|
|
* No turnstile yet for this lock. No problem, turnstile_block()
|
|
* handles this by fetching the turnstile from the blocking thread.
|
|
*/
|
|
return (NULL);
|
|
}
|
|
|
|
/*
|
|
* turnstile_exit:
|
|
*
|
|
* Abort a turnstile operation.
|
|
*/
|
|
void
|
|
turnstile_exit(wchan_t obj)
|
|
{
|
|
|
|
mutex_spin_exit(&turnstile_locks[TS_HASH(obj)].lock);
|
|
}
|
|
|
|
/*
|
|
* turnstile_lendpri:
|
|
*
|
|
* Lend our priority to lwps on the blocking chain.
|
|
*
|
|
* If the current owner of the lock (l->l_wchan, set by sleepq_enqueue)
|
|
* has a priority lower than ours (lwp_eprio(l)), lend our priority to
|
|
* him to avoid priority inversions.
|
|
*/
|
|
|
|
static void
|
|
turnstile_lendpri(lwp_t *cur)
|
|
{
|
|
lwp_t * l = cur;
|
|
pri_t prio;
|
|
|
|
/*
|
|
* NOTE: if you get a panic in this code block, it is likely that
|
|
* a lock has been destroyed or corrupted while still in use. Try
|
|
* compiling a kernel with LOCKDEBUG to pinpoint the problem.
|
|
*/
|
|
|
|
LOCKDEBUG_BARRIER(l->l_mutex, 1);
|
|
KASSERT(l == curlwp);
|
|
prio = lwp_eprio(l);
|
|
for (;;) {
|
|
lwp_t *owner;
|
|
turnstile_t *ts;
|
|
bool dolock;
|
|
|
|
if (l->l_wchan == NULL)
|
|
break;
|
|
|
|
/*
|
|
* Ask syncobj the owner of the lock.
|
|
*/
|
|
owner = (*l->l_syncobj->sobj_owner)(l->l_wchan);
|
|
if (owner == NULL)
|
|
break;
|
|
|
|
/*
|
|
* The owner may have changed as we have dropped the tc lock.
|
|
*/
|
|
if (cur == owner) {
|
|
/*
|
|
* We own the lock: stop here, sleepq_block()
|
|
* should wake up immediatly.
|
|
*/
|
|
break;
|
|
}
|
|
/*
|
|
* Acquire owner->l_mutex if we don't have it yet.
|
|
* Because we already have another LWP lock (l->l_mutex) held,
|
|
* we need to play a try lock dance to avoid deadlock.
|
|
*/
|
|
dolock = l->l_mutex != owner->l_mutex;
|
|
if (l == owner || (dolock && !lwp_trylock(owner))) {
|
|
/*
|
|
* The owner was changed behind us or trylock failed.
|
|
* Restart from curlwp.
|
|
*
|
|
* Note that there may be a livelock here:
|
|
* the owner may try grabing cur's lock (which is the
|
|
* tc lock) while we're trying to grab the owner's lock.
|
|
*/
|
|
lwp_unlock(l);
|
|
l = cur;
|
|
lwp_lock(l);
|
|
prio = lwp_eprio(l);
|
|
continue;
|
|
}
|
|
/*
|
|
* If the owner's priority is already higher than ours,
|
|
* there's nothing to do anymore.
|
|
*/
|
|
if (prio <= lwp_eprio(owner)) {
|
|
if (dolock)
|
|
lwp_unlock(owner);
|
|
break;
|
|
}
|
|
/*
|
|
* Lend our priority to the 'owner' LWP.
|
|
*
|
|
* Update lenders info for turnstile_unlendpri.
|
|
*/
|
|
ts = l->l_ts;
|
|
KASSERT(ts->ts_inheritor == owner || ts->ts_inheritor == NULL);
|
|
if (ts->ts_inheritor == NULL) {
|
|
ts->ts_inheritor = owner;
|
|
ts->ts_eprio = prio;
|
|
SLIST_INSERT_HEAD(&owner->l_pi_lenders, ts, ts_pichain);
|
|
lwp_lendpri(owner, prio);
|
|
} else if (prio > ts->ts_eprio) {
|
|
ts->ts_eprio = prio;
|
|
lwp_lendpri(owner, prio);
|
|
}
|
|
if (dolock)
|
|
lwp_unlock(l);
|
|
LOCKDEBUG_BARRIER(owner->l_mutex, 1);
|
|
l = owner;
|
|
}
|
|
LOCKDEBUG_BARRIER(l->l_mutex, 1);
|
|
if (cur->l_mutex != l->l_mutex) {
|
|
lwp_unlock(l);
|
|
lwp_lock(cur);
|
|
}
|
|
LOCKDEBUG_BARRIER(cur->l_mutex, 1);
|
|
}
|
|
|
|
/*
|
|
* turnstile_unlendpri: undo turnstile_lendpri
|
|
*/
|
|
|
|
static void
|
|
turnstile_unlendpri(turnstile_t *ts)
|
|
{
|
|
lwp_t * const l = curlwp;
|
|
turnstile_t *iter;
|
|
turnstile_t *next;
|
|
turnstile_t *prev = NULL;
|
|
pri_t prio;
|
|
bool dolock;
|
|
|
|
KASSERT(ts->ts_inheritor != NULL);
|
|
ts->ts_inheritor = NULL;
|
|
dolock = l->l_mutex == l->l_cpu->ci_schedstate.spc_lwplock;
|
|
if (dolock) {
|
|
lwp_lock(l);
|
|
}
|
|
|
|
/*
|
|
* the following loop does two things.
|
|
*
|
|
* - remove ts from the list.
|
|
*
|
|
* - from the rest of the list, find the highest priority.
|
|
*/
|
|
|
|
prio = -1;
|
|
KASSERT(!SLIST_EMPTY(&l->l_pi_lenders));
|
|
for (iter = SLIST_FIRST(&l->l_pi_lenders);
|
|
iter != NULL; iter = next) {
|
|
KASSERT(lwp_eprio(l) >= ts->ts_eprio);
|
|
next = SLIST_NEXT(iter, ts_pichain);
|
|
if (iter == ts) {
|
|
if (prev == NULL) {
|
|
SLIST_REMOVE_HEAD(&l->l_pi_lenders,
|
|
ts_pichain);
|
|
} else {
|
|
SLIST_REMOVE_AFTER(prev, ts_pichain);
|
|
}
|
|
} else if (prio < iter->ts_eprio) {
|
|
prio = iter->ts_eprio;
|
|
}
|
|
prev = iter;
|
|
}
|
|
|
|
lwp_lendpri(l, prio);
|
|
|
|
if (dolock) {
|
|
lwp_unlock(l);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* turnstile_block:
|
|
*
|
|
* Enter an object into the turnstile chain and prepare the current
|
|
* LWP for sleep.
|
|
*/
|
|
void
|
|
turnstile_block(turnstile_t *ts, int q, wchan_t obj, syncobj_t *sobj)
|
|
{
|
|
lwp_t * const l = curlwp; /* cached curlwp */
|
|
turnstile_t *ots;
|
|
tschain_t *tc;
|
|
kmutex_t *lock;
|
|
sleepq_t *sq;
|
|
pri_t obase;
|
|
u_int hash;
|
|
|
|
hash = TS_HASH(obj);
|
|
tc = &turnstile_chains[hash];
|
|
lock = &turnstile_locks[hash].lock;
|
|
|
|
KASSERT(q == TS_READER_Q || q == TS_WRITER_Q);
|
|
KASSERT(mutex_owned(lock));
|
|
KASSERT(l != NULL && l->l_ts != NULL);
|
|
|
|
if (ts == NULL) {
|
|
/*
|
|
* We are the first thread to wait for this object;
|
|
* lend our turnstile to it.
|
|
*/
|
|
ts = l->l_ts;
|
|
KASSERT(TS_ALL_WAITERS(ts) == 0);
|
|
KASSERT(LIST_EMPTY(&ts->ts_sleepq[TS_READER_Q]) &&
|
|
LIST_EMPTY(&ts->ts_sleepq[TS_WRITER_Q]));
|
|
ts->ts_obj = obj;
|
|
ts->ts_inheritor = NULL;
|
|
LIST_INSERT_HEAD(tc, ts, ts_chain);
|
|
} else {
|
|
/*
|
|
* Object already has a turnstile. Put our turnstile
|
|
* onto the free list, and reference the existing
|
|
* turnstile instead.
|
|
*/
|
|
ots = l->l_ts;
|
|
KASSERT(ots->ts_free == NULL);
|
|
ots->ts_free = ts->ts_free;
|
|
ts->ts_free = ots;
|
|
l->l_ts = ts;
|
|
|
|
KASSERT(ts->ts_obj == obj);
|
|
KASSERT(TS_ALL_WAITERS(ts) != 0);
|
|
KASSERT(!LIST_EMPTY(&ts->ts_sleepq[TS_READER_Q]) ||
|
|
!LIST_EMPTY(&ts->ts_sleepq[TS_WRITER_Q]));
|
|
}
|
|
|
|
sq = &ts->ts_sleepq[q];
|
|
ts->ts_waiters[q]++;
|
|
sleepq_enter(sq, l, lock);
|
|
LOCKDEBUG_BARRIER(lock, 1);
|
|
l->l_kpriority = true;
|
|
obase = l->l_kpribase;
|
|
if (obase < PRI_KTHREAD)
|
|
l->l_kpribase = PRI_KTHREAD;
|
|
sleepq_enqueue(sq, obj, "tstile", sobj, false);
|
|
|
|
/*
|
|
* Disable preemption across this entire block, as we may drop
|
|
* scheduler locks (allowing preemption), and would prefer not
|
|
* to be interrupted while in a state of flux.
|
|
*/
|
|
KPREEMPT_DISABLE(l);
|
|
KASSERT(lock == l->l_mutex);
|
|
turnstile_lendpri(l);
|
|
sleepq_block(0, false);
|
|
l->l_kpribase = obase;
|
|
KPREEMPT_ENABLE(l);
|
|
}
|
|
|
|
/*
|
|
* turnstile_wakeup:
|
|
*
|
|
* Wake up the specified number of threads that are blocked
|
|
* in a turnstile.
|
|
*/
|
|
void
|
|
turnstile_wakeup(turnstile_t *ts, int q, int count, lwp_t *nl)
|
|
{
|
|
sleepq_t *sq;
|
|
kmutex_t *lock;
|
|
u_int hash;
|
|
lwp_t *l;
|
|
|
|
hash = TS_HASH(ts->ts_obj);
|
|
lock = &turnstile_locks[hash].lock;
|
|
sq = &ts->ts_sleepq[q];
|
|
|
|
KASSERT(q == TS_READER_Q || q == TS_WRITER_Q);
|
|
KASSERT(count > 0 && count <= TS_WAITERS(ts, q));
|
|
KASSERT(mutex_owned(lock));
|
|
KASSERT(ts->ts_inheritor == curlwp || ts->ts_inheritor == NULL);
|
|
|
|
/*
|
|
* restore inherited priority if necessary.
|
|
*/
|
|
|
|
if (ts->ts_inheritor != NULL) {
|
|
turnstile_unlendpri(ts);
|
|
}
|
|
|
|
if (nl != NULL) {
|
|
#if defined(DEBUG) || defined(LOCKDEBUG)
|
|
LIST_FOREACH(l, sq, l_sleepchain) {
|
|
if (l == nl)
|
|
break;
|
|
}
|
|
if (l == NULL)
|
|
panic("turnstile_wakeup: nl not on sleepq");
|
|
#endif
|
|
turnstile_remove(ts, nl, q);
|
|
} else {
|
|
while (count-- > 0) {
|
|
l = LIST_FIRST(sq);
|
|
KASSERT(l != NULL);
|
|
turnstile_remove(ts, l, q);
|
|
}
|
|
}
|
|
mutex_spin_exit(lock);
|
|
}
|
|
|
|
/*
|
|
* turnstile_unsleep:
|
|
*
|
|
* Remove an LWP from the turnstile. This is called when the LWP has
|
|
* not been awoken normally but instead interrupted: for example, if it
|
|
* has received a signal. It's not a valid action for turnstiles,
|
|
* since LWPs blocking on a turnstile are not interruptable.
|
|
*/
|
|
void
|
|
turnstile_unsleep(lwp_t *l, bool cleanup)
|
|
{
|
|
|
|
lwp_unlock(l);
|
|
panic("turnstile_unsleep");
|
|
}
|
|
|
|
/*
|
|
* turnstile_changepri:
|
|
*
|
|
* Adjust the priority of an LWP residing on a turnstile.
|
|
*/
|
|
void
|
|
turnstile_changepri(lwp_t *l, pri_t pri)
|
|
{
|
|
|
|
/* XXX priority inheritance */
|
|
sleepq_changepri(l, pri);
|
|
}
|
|
|
|
#if defined(LOCKDEBUG)
|
|
/*
|
|
* turnstile_print:
|
|
*
|
|
* Given the address of a lock object, print the contents of a
|
|
* turnstile.
|
|
*/
|
|
void
|
|
turnstile_print(volatile void *obj, void (*pr)(const char *, ...))
|
|
{
|
|
turnstile_t *ts;
|
|
tschain_t *tc;
|
|
sleepq_t *rsq, *wsq;
|
|
u_int hash;
|
|
lwp_t *l;
|
|
|
|
hash = TS_HASH(obj);
|
|
tc = &turnstile_chains[hash];
|
|
|
|
LIST_FOREACH(ts, tc, ts_chain)
|
|
if (ts->ts_obj == obj)
|
|
break;
|
|
|
|
if (ts == NULL) {
|
|
(*pr)("Turnstile: no active turnstile for this lock.\n");
|
|
return;
|
|
}
|
|
|
|
rsq = &ts->ts_sleepq[TS_READER_Q];
|
|
wsq = &ts->ts_sleepq[TS_WRITER_Q];
|
|
|
|
(*pr)("Turnstile:\n");
|
|
(*pr)("=> %d waiting readers:", TS_WAITERS(ts, TS_READER_Q));
|
|
LIST_FOREACH(l, rsq, l_sleepchain) {
|
|
(*pr)(" %p", l);
|
|
}
|
|
(*pr)("\n");
|
|
|
|
(*pr)("=> %d waiting writers:", TS_WAITERS(ts, TS_WRITER_Q));
|
|
LIST_FOREACH(l, wsq, l_sleepchain) {
|
|
(*pr)(" %p", l);
|
|
}
|
|
(*pr)("\n");
|
|
}
|
|
#endif /* LOCKDEBUG */
|