NetBSD/sys/kern/sys_lwp.c
ad 513227e941 - Fix sleepq_block() to return EINTR if the LWP is cancelled. Pointed out
by yamt@.

- Introduce SOBJ_SLEEPQ_LIFO, and use for LWPs sleeping via _lwp_park.
  libpthread enqueues most waiters in LIFO order to try and wake LWPs that
  ran recently, since their working set is more likely to be in cache.
  Matching the order of insertion reduces the time spent searching queues
  in the kernel.

- Do not boost the priority of LWPs sleeping in _lwp_park, just let them
  sleep at their user priority level. LWPs waiting for some I/O event in
  the kernel still wait with kernel priority and get woken more quickly.
  This needs more evaluation and is to be revisited, but the effect on a
  variety of benchmarks is positive.

- When waking LWPs, do not send an IPI to remote CPUs or arrange for the
  current LWP to be preempted unless (a) the thread being awoken has kernel
  priority and has higher priority than the currently running thread or (b)
  the remote CPU is idle.
2007-09-06 23:58:56 +00:00

738 lines
16 KiB
C

/* $NetBSD: sys_lwp.c,v 1.26 2007/09/06 23:59:01 ad Exp $ */
/*-
* Copyright (c) 2001, 2006, 2007 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by Nathan J. Williams, and Andrew Doran.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the NetBSD
* Foundation, Inc. and its contributors.
* 4. Neither the name of The NetBSD Foundation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/*
* Lightweight process (LWP) system calls. See kern_lwp.c for a description
* of LWPs.
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: sys_lwp.c,v 1.26 2007/09/06 23:59:01 ad Exp $");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/pool.h>
#include <sys/proc.h>
#include <sys/types.h>
#include <sys/syscallargs.h>
#include <sys/kauth.h>
#include <sys/kmem.h>
#include <sys/sleepq.h>
#include <uvm/uvm_extern.h>
#define LWP_UNPARK_MAX 1024
syncobj_t lwp_park_sobj = {
SOBJ_SLEEPQ_LIFO,
sleepq_unsleep,
sleepq_changepri,
sleepq_lendpri,
syncobj_noowner,
};
sleeptab_t lwp_park_tab;
void
lwp_sys_init(void)
{
sleeptab_init(&lwp_park_tab);
}
/* ARGSUSED */
int
sys__lwp_create(struct lwp *l, void *v, register_t *retval)
{
struct sys__lwp_create_args /* {
syscallarg(const ucontext_t *) ucp;
syscallarg(u_long) flags;
syscallarg(lwpid_t *) new_lwp;
} */ *uap = v;
struct proc *p = l->l_proc;
struct lwp *l2;
vaddr_t uaddr;
bool inmem;
ucontext_t *newuc;
int error, lid;
newuc = pool_get(&lwp_uc_pool, PR_WAITOK);
error = copyin(SCARG(uap, ucp), newuc, p->p_emul->e_ucsize);
if (error) {
pool_put(&lwp_uc_pool, newuc);
return error;
}
/* XXX check against resource limits */
inmem = uvm_uarea_alloc(&uaddr);
if (__predict_false(uaddr == 0)) {
pool_put(&lwp_uc_pool, newuc);
return ENOMEM;
}
error = newlwp(l, p, uaddr, inmem,
SCARG(uap, flags) & LWP_DETACHED,
NULL, 0, p->p_emul->e_startlwp, newuc, &l2);
if (error) {
uvm_uarea_free(uaddr);
pool_put(&lwp_uc_pool, newuc);
return error;
}
lid = l2->l_lid;
error = copyout(&lid, SCARG(uap, new_lwp), sizeof(lid));
if (error) {
lwp_exit(l2);
pool_put(&lwp_uc_pool, newuc);
return error;
}
/*
* Set the new LWP running, unless the caller has requested that
* it be created in suspended state. If the process is stopping,
* then the LWP is created stopped.
*/
mutex_enter(&p->p_smutex);
lwp_lock(l2);
if ((SCARG(uap, flags) & LWP_SUSPENDED) == 0 &&
(l->l_flag & (LW_WREBOOT | LW_WSUSPEND | LW_WEXIT)) == 0) {
if (p->p_stat == SSTOP || (p->p_sflag & PS_STOPPING) != 0)
l2->l_stat = LSSTOP;
else {
KASSERT(lwp_locked(l2, l2->l_cpu->ci_schedstate.spc_mutex));
p->p_nrlwps++;
l2->l_stat = LSRUN;
sched_enqueue(l2, false);
}
} else
l2->l_stat = LSSUSPENDED;
lwp_unlock(l2);
mutex_exit(&p->p_smutex);
return 0;
}
int
sys__lwp_exit(struct lwp *l, void *v, register_t *retval)
{
lwp_exit(l);
return 0;
}
int
sys__lwp_self(struct lwp *l, void *v, register_t *retval)
{
*retval = l->l_lid;
return 0;
}
int
sys__lwp_getprivate(struct lwp *l, void *v, register_t *retval)
{
*retval = (uintptr_t)l->l_private;
return 0;
}
int
sys__lwp_setprivate(struct lwp *l, void *v, register_t *retval)
{
struct sys__lwp_setprivate_args /* {
syscallarg(void *) ptr;
} */ *uap = v;
l->l_private = SCARG(uap, ptr);
return 0;
}
int
sys__lwp_suspend(struct lwp *l, void *v, register_t *retval)
{
struct sys__lwp_suspend_args /* {
syscallarg(lwpid_t) target;
} */ *uap = v;
struct proc *p = l->l_proc;
struct lwp *t;
int error;
mutex_enter(&p->p_smutex);
if ((t = lwp_find(p, SCARG(uap, target))) == NULL) {
mutex_exit(&p->p_smutex);
return ESRCH;
}
/*
* Check for deadlock, which is only possible when we're suspending
* ourself. XXX There is a short race here, as p_nrlwps is only
* incremented when an LWP suspends itself on the kernel/user
* boundary. It's still possible to kill -9 the process so we
* don't bother checking further.
*/
lwp_lock(t);
if ((t == l && p->p_nrlwps == 1) ||
(l->l_flag & (LW_WCORE | LW_WEXIT)) != 0) {
lwp_unlock(t);
mutex_exit(&p->p_smutex);
return EDEADLK;
}
/*
* Suspend the LWP. XXX If it's on a different CPU, we should wait
* for it to be preempted, where it will put itself to sleep.
*
* Suspension of the current LWP will happen on return to userspace.
*/
error = lwp_suspend(l, t);
if (error) {
mutex_exit(&p->p_smutex);
return error;
}
/*
* Wait for:
* o process exiting
* o target LWP suspended
* o target LWP not suspended and L_WSUSPEND clear
* o target LWP exited
*/
for (;;) {
error = cv_wait_sig(&p->p_lwpcv, &p->p_smutex);
if (error) {
error = ERESTART;
break;
}
if (lwp_find(p, SCARG(uap, target)) == NULL) {
error = ESRCH;
break;
}
if ((l->l_flag | t->l_flag) & (LW_WCORE | LW_WEXIT)) {
error = ERESTART;
break;
}
if (t->l_stat == LSSUSPENDED ||
(t->l_flag & LW_WSUSPEND) == 0)
break;
}
mutex_exit(&p->p_smutex);
return error;
}
int
sys__lwp_continue(struct lwp *l, void *v, register_t *retval)
{
struct sys__lwp_continue_args /* {
syscallarg(lwpid_t) target;
} */ *uap = v;
int error;
struct proc *p = l->l_proc;
struct lwp *t;
error = 0;
mutex_enter(&p->p_smutex);
if ((t = lwp_find(p, SCARG(uap, target))) == NULL) {
mutex_exit(&p->p_smutex);
return ESRCH;
}
lwp_lock(t);
lwp_continue(t);
mutex_exit(&p->p_smutex);
return error;
}
int
sys__lwp_wakeup(struct lwp *l, void *v, register_t *retval)
{
struct sys__lwp_wakeup_args /* {
syscallarg(lwpid_t) target;
} */ *uap = v;
struct lwp *t;
struct proc *p;
int error;
p = l->l_proc;
mutex_enter(&p->p_smutex);
if ((t = lwp_find(p, SCARG(uap, target))) == NULL) {
mutex_exit(&p->p_smutex);
return ESRCH;
}
lwp_lock(t);
t->l_flag |= (LW_CANCELLED | LW_UNPARKED);
if (t->l_stat != LSSLEEP) {
lwp_unlock(t);
error = ENODEV;
} else if ((t->l_flag & LW_SINTR) == 0) {
lwp_unlock(t);
error = EBUSY;
} else {
/* Wake it up. lwp_unsleep() will release the LWP lock. */
lwp_unsleep(t);
error = 0;
}
mutex_exit(&p->p_smutex);
return error;
}
int
sys__lwp_wait(struct lwp *l, void *v, register_t *retval)
{
struct sys__lwp_wait_args /* {
syscallarg(lwpid_t) wait_for;
syscallarg(lwpid_t *) departed;
} */ *uap = v;
struct proc *p = l->l_proc;
int error;
lwpid_t dep;
mutex_enter(&p->p_smutex);
error = lwp_wait1(l, SCARG(uap, wait_for), &dep, 0);
mutex_exit(&p->p_smutex);
if (error)
return error;
if (SCARG(uap, departed)) {
error = copyout(&dep, SCARG(uap, departed), sizeof(dep));
if (error)
return error;
}
return 0;
}
/* ARGSUSED */
int
sys__lwp_kill(struct lwp *l, void *v, register_t *retval)
{
struct sys__lwp_kill_args /* {
syscallarg(lwpid_t) target;
syscallarg(int) signo;
} */ *uap = v;
struct proc *p = l->l_proc;
struct lwp *t;
ksiginfo_t ksi;
int signo = SCARG(uap, signo);
int error = 0;
if ((u_int)signo >= NSIG)
return EINVAL;
KSI_INIT(&ksi);
ksi.ksi_signo = signo;
ksi.ksi_code = SI_USER;
ksi.ksi_pid = p->p_pid;
ksi.ksi_uid = kauth_cred_geteuid(l->l_cred);
ksi.ksi_lid = SCARG(uap, target);
mutex_enter(&proclist_mutex);
mutex_enter(&p->p_smutex);
if ((t = lwp_find(p, ksi.ksi_lid)) == NULL)
error = ESRCH;
else if (signo != 0)
kpsignal2(p, &ksi);
mutex_exit(&p->p_smutex);
mutex_exit(&proclist_mutex);
return error;
}
int
sys__lwp_detach(struct lwp *l, void *v, register_t *retval)
{
struct sys__lwp_detach_args /* {
syscallarg(lwpid_t) target;
} */ *uap = v;
struct proc *p;
struct lwp *t;
lwpid_t target;
int error;
target = SCARG(uap, target);
p = l->l_proc;
mutex_enter(&p->p_smutex);
if (l->l_lid == target)
t = l;
else {
/*
* We can't use lwp_find() here because the target might
* be a zombie.
*/
LIST_FOREACH(t, &p->p_lwps, l_sibling)
if (t->l_lid == target)
break;
}
/*
* If the LWP is already detached, there's nothing to do.
* If it's a zombie, we need to clean up after it. LSZOMB
* is visible with the proc mutex held.
*
* After we have detached or released the LWP, kick any
* other LWPs that may be sitting in _lwp_wait(), waiting
* for the target LWP to exit.
*/
if (t != NULL && t->l_stat != LSIDL) {
if ((t->l_prflag & LPR_DETACHED) == 0) {
p->p_ndlwps++;
t->l_prflag |= LPR_DETACHED;
if (t->l_stat == LSZOMB) {
/* Releases proc mutex. */
lwp_free(t, false, false);
return 0;
}
error = 0;
/*
* Have any LWPs sleeping in lwp_wait() recheck
* for deadlock.
*/
cv_broadcast(&p->p_lwpcv);
} else
error = EINVAL;
} else
error = ESRCH;
mutex_exit(&p->p_smutex);
return error;
}
static inline wchan_t
lwp_park_wchan(struct proc *p, const void *hint)
{
return (wchan_t)((uintptr_t)p ^ (uintptr_t)hint);
}
int
lwp_unpark(lwpid_t target, const void *hint)
{
sleepq_t *sq;
wchan_t wchan;
int swapin;
proc_t *p;
lwp_t *t;
/*
* Easy case: search for the LWP on the sleep queue. If
* it's parked, remove it from the queue and set running.
*/
p = curproc;
wchan = lwp_park_wchan(p, hint);
sq = sleeptab_lookup(&lwp_park_tab, wchan);
TAILQ_FOREACH(t, &sq->sq_queue, l_sleepchain)
if (t->l_proc == p && t->l_lid == target)
break;
if (__predict_true(t != NULL)) {
swapin = sleepq_remove(sq, t);
sleepq_unlock(sq);
if (swapin)
uvm_kick_scheduler();
return 0;
}
/*
* The LWP hasn't parked yet. Take the hit and mark the
* operation as pending.
*/
sleepq_unlock(sq);
mutex_enter(&p->p_smutex);
if ((t = lwp_find(p, target)) == NULL) {
mutex_exit(&p->p_smutex);
return ESRCH;
}
/*
* It may not have parked yet, we may have raced, or it
* is parked on a different user sync object.
*/
lwp_lock(t);
if (t->l_syncobj == &lwp_park_sobj) {
/* Releases the LWP lock. */
lwp_unsleep(t);
} else {
/*
* Set the operation pending. The next call to _lwp_park
* will return early.
*/
t->l_flag |= LW_UNPARKED;
lwp_unlock(t);
}
mutex_exit(&p->p_smutex);
return 0;
}
int
lwp_park(struct timespec *ts, const void *hint)
{
struct timespec tsx;
sleepq_t *sq;
wchan_t wchan;
int timo, error;
lwp_t *l;
/* Fix up the given timeout value. */
if (ts != NULL) {
getnanotime(&tsx);
timespecsub(ts, &tsx, &tsx);
if (tsx.tv_sec < 0 || (tsx.tv_sec == 0 && tsx.tv_nsec <= 0))
return ETIMEDOUT;
if ((error = itimespecfix(&tsx)) != 0)
return error;
timo = tstohz(&tsx);
KASSERT(timo != 0);
} else
timo = 0;
/* Find and lock the sleep queue. */
l = curlwp;
wchan = lwp_park_wchan(l->l_proc, hint);
sq = sleeptab_lookup(&lwp_park_tab, wchan);
/*
* Before going the full route and blocking, check to see if an
* unpark op is pending.
*/
lwp_lock(l);
if ((l->l_flag & (LW_CANCELLED | LW_UNPARKED)) != 0) {
l->l_flag &= ~(LW_CANCELLED | LW_UNPARKED);
lwp_unlock(l);
sleepq_unlock(sq);
return EALREADY;
}
lwp_unlock_to(l, sq->sq_mutex);
l->l_biglocks = 0;
sleepq_enqueue(sq, l->l_usrpri, wchan, "parked", &lwp_park_sobj);
error = sleepq_block(timo, true);
switch (error) {
case EWOULDBLOCK:
error = ETIMEDOUT;
break;
case ERESTART:
error = EINTR;
break;
default:
/* nothing */
break;
}
return error;
}
/*
* 'park' an LWP waiting on a user-level synchronisation object. The LWP
* will remain parked until another LWP in the same process calls in and
* requests that it be unparked.
*/
int
sys__lwp_park(struct lwp *l, void *v, register_t *retval)
{
struct sys__lwp_park_args /* {
syscallarg(const struct timespec *) ts;
syscallarg(lwpid_t) unpark;
syscallarg(const void *) hint;
syscallarg(const void *) unparkhint;
} */ *uap = v;
struct timespec ts, *tsp;
int error;
if (SCARG(uap, ts) == NULL)
tsp = NULL;
else {
error = copyin(SCARG(uap, ts), &ts, sizeof(ts));
if (error != 0)
return error;
tsp = &ts;
}
if (SCARG(uap, unpark) != 0) {
error = lwp_unpark(SCARG(uap, unpark), SCARG(uap, unparkhint));
if (error != 0)
return error;
}
return lwp_park(tsp, SCARG(uap, hint));
}
int
sys__lwp_unpark(struct lwp *l, void *v, register_t *retval)
{
struct sys__lwp_unpark_args /* {
syscallarg(lwpid_t) target;
syscallarg(const void *) hint;
} */ *uap = v;
return lwp_unpark(SCARG(uap, target), SCARG(uap, hint));
}
int
sys__lwp_unpark_all(struct lwp *l, void *v, register_t *retval)
{
struct sys__lwp_unpark_all_args /* {
syscallarg(const lwpid_t *) targets;
syscallarg(size_t) ntargets;
syscallarg(const void *) hint;
} */ *uap = v;
struct proc *p;
struct lwp *t;
sleepq_t *sq;
wchan_t wchan;
lwpid_t targets[32], *tp, *tpp, *tmax, target;
int swapin, error;
u_int ntargets;
size_t sz;
p = l->l_proc;
ntargets = SCARG(uap, ntargets);
if (SCARG(uap, targets) == NULL) {
/*
* Let the caller know how much we are willing to do, and
* let it unpark the LWPs in blocks.
*/
*retval = LWP_UNPARK_MAX;
return 0;
}
if (ntargets > LWP_UNPARK_MAX || ntargets == 0)
return EINVAL;
/*
* Copy in the target array. If it's a small number of LWPs, then
* place the numbers on the stack.
*/
sz = sizeof(target) * ntargets;
if (sz <= sizeof(targets))
tp = targets;
else {
KERNEL_LOCK(1, l); /* XXXSMP */
tp = kmem_alloc(sz, KM_SLEEP);
KERNEL_UNLOCK_ONE(l); /* XXXSMP */
if (tp == NULL)
return ENOMEM;
}
error = copyin(SCARG(uap, targets), tp, sz);
if (error != 0) {
if (tp != targets) {
KERNEL_LOCK(1, l); /* XXXSMP */
kmem_free(tp, sz);
KERNEL_UNLOCK_ONE(l); /* XXXSMP */
}
return error;
}
swapin = 0;
wchan = lwp_park_wchan(p, SCARG(uap, hint));
sq = sleeptab_lookup(&lwp_park_tab, wchan);
for (tmax = tp + ntargets, tpp = tp; tpp < tmax; tpp++) {
target = *tpp;
/*
* Easy case: search for the LWP on the sleep queue. If
* it's parked, remove it from the queue and set running.
*/
TAILQ_FOREACH(t, &sq->sq_queue, l_sleepchain)
if (t->l_proc == p && t->l_lid == target)
break;
if (t != NULL) {
swapin |= sleepq_remove(sq, t);
continue;
}
/*
* The LWP hasn't parked yet. Take the hit and
* mark the operation as pending.
*/
sleepq_unlock(sq);
mutex_enter(&p->p_smutex);
if ((t = lwp_find(p, target)) == NULL) {
mutex_exit(&p->p_smutex);
sleepq_lock(sq);
continue;
}
lwp_lock(t);
/*
* It may not have parked yet, we may have raced, or
* it is parked on a different user sync object.
*/
if (t->l_syncobj == &lwp_park_sobj) {
/* Releases the LWP lock. */
lwp_unsleep(t);
} else {
/*
* Set the operation pending. The next call to
* _lwp_park will return early.
*/
t->l_flag |= LW_UNPARKED;
lwp_unlock(t);
}
mutex_exit(&p->p_smutex);
sleepq_lock(sq);
}
sleepq_unlock(sq);
if (tp != targets) {
KERNEL_LOCK(1, l); /* XXXSMP */
kmem_free(tp, sz);
KERNEL_UNLOCK_ONE(l); /* XXXSMP */
}
if (swapin)
uvm_kick_scheduler();
return 0;
}