NetBSD/sys/kern/kern_sleepq.c
ad 513227e941 - Fix sleepq_block() to return EINTR if the LWP is cancelled. Pointed out
by yamt@.

- Introduce SOBJ_SLEEPQ_LIFO, and use for LWPs sleeping via _lwp_park.
  libpthread enqueues most waiters in LIFO order to try and wake LWPs that
  ran recently, since their working set is more likely to be in cache.
  Matching the order of insertion reduces the time spent searching queues
  in the kernel.

- Do not boost the priority of LWPs sleeping in _lwp_park, just let them
  sleep at their user priority level. LWPs waiting for some I/O event in
  the kernel still wait with kernel priority and get woken more quickly.
  This needs more evaluation and is to be revisited, but the effect on a
  variety of benchmarks is positive.

- When waking LWPs, do not send an IPI to remote CPUs or arrange for the
  current LWP to be preempted unless (a) the thread being awoken has kernel
  priority and has higher priority than the currently running thread or (b)
  the remote CPU is idle.
2007-09-06 23:58:56 +00:00

472 lines
11 KiB
C

/* $NetBSD: kern_sleepq.c,v 1.14 2007/09/06 23:59:01 ad Exp $ */
/*-
* Copyright (c) 2006, 2007 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by Andrew Doran.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the NetBSD
* Foundation, Inc. and its contributors.
* 4. Neither the name of The NetBSD Foundation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/*
* Sleep queue implementation, used by turnstiles and general sleep/wakeup
* interfaces.
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: kern_sleepq.c,v 1.14 2007/09/06 23:59:01 ad Exp $");
#include <sys/param.h>
#include <sys/lock.h>
#include <sys/kernel.h>
#include <sys/cpu.h>
#include <sys/pool.h>
#include <sys/proc.h>
#include <sys/resourcevar.h>
#include <sys/sched.h>
#include <sys/systm.h>
#include <sys/sleepq.h>
#include <sys/ktrace.h>
#include <uvm/uvm_extern.h>
int sleepq_sigtoerror(lwp_t *, int);
/* General purpose sleep table, used by ltsleep() and condition variables. */
sleeptab_t sleeptab;
/*
* sleeptab_init:
*
* Initialize a sleep table.
*/
void
sleeptab_init(sleeptab_t *st)
{
sleepq_t *sq;
int i;
for (i = 0; i < SLEEPTAB_HASH_SIZE; i++) {
sq = &st->st_queues[i].st_queue;
mutex_init(&st->st_queues[i].st_mutex, MUTEX_SPIN, IPL_SCHED);
sleepq_init(sq, &st->st_queues[i].st_mutex);
}
}
/*
* sleepq_init:
*
* Prepare a sleep queue for use.
*/
void
sleepq_init(sleepq_t *sq, kmutex_t *mtx)
{
sq->sq_waiters = 0;
sq->sq_mutex = mtx;
TAILQ_INIT(&sq->sq_queue);
}
/*
* sleepq_remove:
*
* Remove an LWP from a sleep queue and wake it up. Return non-zero if
* the LWP is swapped out; if so the caller needs to awaken the swapper
* to bring the LWP into memory.
*/
int
sleepq_remove(sleepq_t *sq, lwp_t *l)
{
struct schedstate_percpu *spc;
struct cpu_info *ci;
pri_t pri;
KASSERT(lwp_locked(l, sq->sq_mutex));
KASSERT(sq->sq_waiters > 0);
sq->sq_waiters--;
TAILQ_REMOVE(&sq->sq_queue, l, l_sleepchain);
#ifdef DIAGNOSTIC
if (sq->sq_waiters == 0)
KASSERT(TAILQ_FIRST(&sq->sq_queue) == NULL);
else
KASSERT(TAILQ_FIRST(&sq->sq_queue) != NULL);
#endif
l->l_syncobj = &sched_syncobj;
l->l_wchan = NULL;
l->l_sleepq = NULL;
l->l_flag &= ~LW_SINTR;
ci = l->l_cpu;
spc = &ci->ci_schedstate;
/*
* If not sleeping, the LWP must have been suspended. Let whoever
* holds it stopped set it running again.
*/
if (l->l_stat != LSSLEEP) {
KASSERT(l->l_stat == LSSTOP || l->l_stat == LSSUSPENDED);
lwp_setlock(l, &spc->spc_lwplock);
return 0;
}
/*
* If the LWP is still on the CPU, mark it as LSONPROC. It may be
* about to call mi_switch(), in which case it will yield.
*/
if ((l->l_flag & LW_RUNNING) != 0) {
l->l_stat = LSONPROC;
l->l_slptime = 0;
lwp_setlock(l, &spc->spc_lwplock);
return 0;
}
/*
* Set it running. We'll try to get the last CPU that ran
* this LWP to pick it up again.
*/
spc_lock(ci);
lwp_setlock(l, spc->spc_mutex);
sched_setrunnable(l);
l->l_stat = LSRUN;
l->l_slptime = 0;
if ((l->l_flag & LW_INMEM) != 0) {
sched_enqueue(l, false);
pri = lwp_eprio(l);
/* XXX This test is not good enough! */
if ((pri < spc->spc_curpriority && pri < PUSER) ||
#ifdef MULTIPROCESSOR
ci->ci_curlwp == ci->ci_data.cpu_idlelwp) {
#else
curlwp == ci->ci_data.cpu_idlelwp) {
#endif
cpu_need_resched(ci, RESCHED_IMMED);
}
spc_unlock(ci);
return 0;
}
spc_unlock(ci);
return 1;
}
/*
* sleepq_insert:
*
* Insert an LWP into the sleep queue, optionally sorting by priority.
*/
inline void
sleepq_insert(sleepq_t *sq, lwp_t *l, syncobj_t *sobj)
{
lwp_t *l2;
const int pri = lwp_eprio(l);
if ((sobj->sobj_flag & SOBJ_SLEEPQ_SORTED) != 0) {
TAILQ_FOREACH(l2, &sq->sq_queue, l_sleepchain) {
if (lwp_eprio(l2) > pri) {
TAILQ_INSERT_BEFORE(l2, l, l_sleepchain);
return;
}
}
}
if ((sobj->sobj_flag & SOBJ_SLEEPQ_LIFO) != 0)
TAILQ_INSERT_HEAD(&sq->sq_queue, l, l_sleepchain);
else
TAILQ_INSERT_TAIL(&sq->sq_queue, l, l_sleepchain);
}
/*
* sleepq_enqueue:
*
* Enter an LWP into the sleep queue and prepare for sleep. The sleep
* queue must already be locked, and any interlock (such as the kernel
* lock) must have be released (see sleeptab_lookup(), sleepq_enter()).
*/
void
sleepq_enqueue(sleepq_t *sq, pri_t pri, wchan_t wchan, const char *wmesg,
syncobj_t *sobj)
{
lwp_t *l = curlwp;
KASSERT(mutex_owned(sq->sq_mutex));
KASSERT(l->l_stat == LSONPROC);
KASSERT(l->l_wchan == NULL && l->l_sleepq == NULL);
l->l_syncobj = sobj;
l->l_wchan = wchan;
l->l_sleepq = sq;
l->l_wmesg = wmesg;
l->l_slptime = 0;
l->l_priority = pri;
l->l_stat = LSSLEEP;
l->l_sleeperr = 0;
sq->sq_waiters++;
sleepq_insert(sq, l, sobj);
}
/*
* sleepq_block:
*
* After any intermediate step such as releasing an interlock, switch.
* sleepq_block() may return early under exceptional conditions, for
* example if the LWP's containing process is exiting.
*/
int
sleepq_block(int timo, bool catch)
{
int error = 0, sig;
struct proc *p;
lwp_t *l = curlwp;
bool early = false;
ktrcsw(1, 0);
/*
* If sleeping interruptably, check for pending signals, exits or
* core dump events.
*/
if (catch) {
l->l_flag |= LW_SINTR;
if ((l->l_flag & (LW_CANCELLED|LW_WEXIT|LW_WCORE)) != 0) {
l->l_flag &= ~LW_CANCELLED;
error = EINTR;
early = true;
} else if ((l->l_flag & LW_PENDSIG) != 0 && sigispending(l, 0))
early = true;
}
if (early) {
/* lwp_unsleep() will release the lock */
lwp_unsleep(l);
} else {
if (timo)
callout_schedule(&l->l_timeout_ch, timo);
mi_switch(l);
/* The LWP and sleep queue are now unlocked. */
if (timo) {
/*
* Even if the callout appears to have fired, we need to
* stop it in order to synchronise with other CPUs.
*/
if (callout_stop(&l->l_timeout_ch))
error = EWOULDBLOCK;
}
}
if (catch && error == 0) {
p = l->l_proc;
if ((l->l_flag & (LW_CANCELLED | LW_WEXIT | LW_WCORE)) != 0)
error = EINTR;
else if ((l->l_flag & LW_PENDSIG) != 0) {
KERNEL_LOCK(1, l); /* XXXSMP pool_put() */
mutex_enter(&p->p_smutex);
if ((sig = issignal(l)) != 0)
error = sleepq_sigtoerror(l, sig);
mutex_exit(&p->p_smutex);
KERNEL_UNLOCK_LAST(l);
}
}
ktrcsw(0, 0);
KERNEL_LOCK(l->l_biglocks, l);
return error;
}
/*
* sleepq_wake:
*
* Wake zero or more LWPs blocked on a single wait channel.
*/
lwp_t *
sleepq_wake(sleepq_t *sq, wchan_t wchan, u_int expected)
{
lwp_t *l, *next;
int swapin = 0;
KASSERT(mutex_owned(sq->sq_mutex));
for (l = TAILQ_FIRST(&sq->sq_queue); l != NULL; l = next) {
KASSERT(l->l_sleepq == sq);
next = TAILQ_NEXT(l, l_sleepchain);
if (l->l_wchan != wchan)
continue;
swapin |= sleepq_remove(sq, l);
if (--expected == 0)
break;
}
sleepq_unlock(sq);
/*
* If there are newly awakend threads that need to be swapped in,
* then kick the swapper into action.
*/
if (swapin)
uvm_kick_scheduler();
return l;
}
/*
* sleepq_unsleep:
*
* Remove an LWP from its sleep queue and set it runnable again.
* sleepq_unsleep() is called with the LWP's mutex held, and will
* always release it.
*/
void
sleepq_unsleep(lwp_t *l)
{
sleepq_t *sq = l->l_sleepq;
int swapin;
KASSERT(lwp_locked(l, NULL));
KASSERT(l->l_wchan != NULL);
KASSERT(l->l_mutex == sq->sq_mutex);
swapin = sleepq_remove(sq, l);
sleepq_unlock(sq);
if (swapin)
uvm_kick_scheduler();
}
/*
* sleepq_timeout:
*
* Entered via the callout(9) subsystem to time out an LWP that is on a
* sleep queue.
*/
void
sleepq_timeout(void *arg)
{
lwp_t *l = arg;
/*
* Lock the LWP. Assuming it's still on the sleep queue, its
* current mutex will also be the sleep queue mutex.
*/
lwp_lock(l);
if (l->l_wchan == NULL) {
/* Somebody beat us to it. */
lwp_unlock(l);
return;
}
lwp_unsleep(l);
}
/*
* sleepq_sigtoerror:
*
* Given a signal number, interpret and return an error code.
*/
int
sleepq_sigtoerror(lwp_t *l, int sig)
{
struct proc *p = l->l_proc;
int error;
KASSERT(mutex_owned(&p->p_smutex));
/*
* If this sleep was canceled, don't let the syscall restart.
*/
if ((SIGACTION(p, sig).sa_flags & SA_RESTART) == 0)
error = EINTR;
else
error = ERESTART;
return error;
}
/*
* sleepq_abort:
*
* After a panic or during autoconfiguration, lower the interrupt
* priority level to give pending interrupts a chance to run, and
* then return. Called if sleepq_dontsleep() returns non-zero, and
* always returns zero.
*/
int
sleepq_abort(kmutex_t *mtx, int unlock)
{
extern int safepri;
int s;
s = splhigh();
splx(safepri);
splx(s);
if (mtx != NULL && unlock != 0)
mutex_exit(mtx);
return 0;
}
/*
* sleepq_changepri:
*
* Adjust the priority of an LWP residing on a sleepq. This method
* will only alter the user priority; the effective priority is
* assumed to have been fixed at the time of insertion into the queue.
*/
void
sleepq_changepri(lwp_t *l, pri_t pri)
{
KASSERT(lwp_locked(l, l->l_sleepq->sq_mutex));
l->l_usrpri = pri;
}
void
sleepq_lendpri(lwp_t *l, pri_t pri)
{
sleepq_t *sq = l->l_sleepq;
pri_t opri;
KASSERT(lwp_locked(l, sq->sq_mutex));
opri = lwp_eprio(l);
l->l_inheritedprio = pri;
if (lwp_eprio(l) != opri &&
(l->l_syncobj->sobj_flag & SOBJ_SLEEPQ_SORTED) != 0) {
TAILQ_REMOVE(&sq->sq_queue, l, l_sleepchain);
sleepq_insert(sq, l, l->l_syncobj);
}
}