NetBSD/sys/kern/kern_sa.c
yamt 01de53d5f4 don't assume the order of upcall stacks in the array from userland.
(it's reversed in the case of topdown vm.)

kern/23266 from Kouichirou Hiratsuka and tested by him.
2003-10-25 12:08:45 +00:00

1570 lines
37 KiB
C

/* $NetBSD: kern_sa.c,v 1.29 2003/10/25 12:08:45 yamt Exp $ */
/*-
* Copyright (c) 2001 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by Nathan J. Williams.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the NetBSD
* Foundation, Inc. and its contributors.
* 4. Neither the name of The NetBSD Foundation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: kern_sa.c,v 1.29 2003/10/25 12:08:45 yamt Exp $");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/pool.h>
#include <sys/proc.h>
#include <sys/types.h>
#include <sys/ucontext.h>
#include <sys/malloc.h>
#include <sys/mount.h>
#include <sys/sa.h>
#include <sys/savar.h>
#include <sys/syscallargs.h>
#include <uvm/uvm_extern.h>
static void sa_vp_donate(struct lwp *);
static int sa_newcachelwp(struct lwp *);
static struct lwp *sa_vp_repossess(struct lwp *l);
static int sa_pagefault(struct lwp *, ucontext_t *);
void sa_upcall_getstate(struct sadata_upcall *, struct lwp *, struct lwp *);
MALLOC_DEFINE(M_SA, "sa", "Scheduler activations");
#define SA_DEBUG
#ifdef SA_DEBUG
#define DPRINTF(x) do { if (sadebug) printf x; } while (0)
#define DPRINTFN(n,x) do { if (sadebug & (1<<(n-1))) printf x; } while (0)
int sadebug = 0;
#else
#define DPRINTF(x)
#define DPRINTFN(n,x)
#endif
#define SA_LWP_STATE_LOCK(l, f) do { \
(f) = (l)->l_flag; \
(l)->l_flag &= ~L_SA; \
} while (/*CONSTCOND*/ 0)
#define SA_LWP_STATE_UNLOCK(l, f) do { \
(l)->l_flag |= (f) & L_SA; \
} while (/*CONSTCOND*/ 0)
/*
* sadata_upcall_alloc:
*
* Allocate an sadata_upcall structure.
*/
struct sadata_upcall *
sadata_upcall_alloc(int waitok)
{
/* XXX zero the memory? */
return (pool_get(&saupcall_pool, waitok ? PR_WAITOK : PR_NOWAIT));
}
/*
* sadata_upcall_free:
*
* Free an sadata_upcall structure, and any associated
* argument data.
*/
void
sadata_upcall_free(struct sadata_upcall *sau)
{
extern struct pool siginfo_pool; /* XXX Ew. */
/*
* XXX We have to know what the origin of sau_arg is
* XXX in order to do the right thing, here. Sucks
* XXX to be a non-garbage-collecting kernel.
*/
if (sau->sau_arg) {
switch (sau->sau_type) {
case SA_UPCALL_SIGNAL:
case SA_UPCALL_SIGEV:
pool_put(&siginfo_pool, sau->sau_arg);
break;
default:
panic("sadata_free: unknown type of sau_arg: %d",
sau->sau_type);
}
}
pool_put(&saupcall_pool, sau);
}
int
sys_sa_register(struct lwp *l, void *v, register_t *retval)
{
struct sys_sa_register_args /* {
syscallarg(sa_upcall_t) new;
syscallarg(sa_upcall_t *) old;
syscallarg(int) flags;
} */ *uap = v;
struct proc *p = l->l_proc;
struct sadata *sa;
sa_upcall_t prev;
int error;
if (p->p_sa == NULL) {
/* Allocate scheduler activations data structure */
sa = pool_get(&sadata_pool, PR_WAITOK);
/* Initialize. */
memset(sa, 0, sizeof(*sa));
simple_lock_init(&sa->sa_lock);
sa->sa_flag = SCARG(uap, flags) & SA_FLAG_ALL;
sa->sa_vp = NULL;
sa->sa_old_lwp = NULL;
sa->sa_vp_wait_count = 0;
sa->sa_idle = NULL;
sa->sa_woken = NULL;
sa->sa_concurrency = 1;
sa->sa_stacks = malloc(sizeof(stack_t) * SA_NUMSTACKS,
M_SA, M_WAITOK);
sa->sa_nstacks = 0;
sa->sa_vp_faultaddr = 0;
sa->sa_vp_ofaultaddr = 0;
sa->sa_vp_stacks_low = 0;
sa->sa_vp_stacks_high = 0;
LIST_INIT(&sa->sa_lwpcache);
SIMPLEQ_INIT(&sa->sa_upcalls);
p->p_sa = sa;
sa_newcachelwp(l);
}
prev = p->p_sa->sa_upcall;
p->p_sa->sa_upcall = SCARG(uap, new);
if (SCARG(uap, old)) {
error = copyout(&prev, SCARG(uap, old),
sizeof(prev));
if (error)
return (error);
}
return (0);
}
int
sys_sa_stacks(struct lwp *l, void *v, register_t *retval)
{
struct sys_sa_stacks_args /* {
syscallarg(int) num;
syscallarg(stack_t *) stacks;
} */ *uap = v;
struct sadata *sa = l->l_proc->p_sa;
struct lwp *l2;
int count, error, f, i;
/* We have to be using scheduler activations */
if (sa == NULL)
return (EINVAL);
count = SCARG(uap, num);
if (count < 0)
return (EINVAL);
count = min(count, SA_NUMSTACKS - sa->sa_nstacks);
SA_LWP_STATE_LOCK(l, f);
error = copyin(SCARG(uap, stacks), sa->sa_stacks + sa->sa_nstacks,
sizeof(stack_t) * count);
SA_LWP_STATE_UNLOCK(l, f);
if (error)
return (error);
for (i = sa->sa_nstacks; i < sa->sa_nstacks + count; i++) {
LIST_FOREACH(l2, &l->l_proc->p_lwps, l_sibling) {
if ((l2->l_upcallstack == sa->sa_stacks[i].ss_sp)) {
l2->l_upcallstack = NULL;
wakeup(&l2->l_upcallstack);
}
}
}
if ((sa->sa_nstacks == 0) && (sa->sa_vp_wait_count != 0))
l->l_flag |= L_SA_UPCALL;
/*
* Save addresses of the first and last stack on initial load
* the pagefault code uses the saved address to detect threads
* running on an upcall stack.
* XXX assumes all stacks are adjoining
* XXX assumes initial load includes all stacks ever used
*/
if (sa->sa_vp_stacks_low == 0) {
vaddr_t low = VM_MAXUSER_ADDRESS;
vaddr_t high = 0;
for (i = 0; i < count; i++) {
stack_t *stackp = &sa->sa_stacks[sa->sa_nstacks + i];
low = min(low, (vaddr_t)stackp->ss_sp);
high = max(high,
(vaddr_t)stackp->ss_sp + stackp->ss_size);
}
sa->sa_vp_stacks_low = low;
sa->sa_vp_stacks_high = high;
DPRINTFN(11,("sys_sa_stacks(%d.%d): low 0x%llx high 0x%llx\n",
l->l_proc->p_pid, l->l_lid,
(unsigned long long)sa->sa_vp_stacks_low,
(unsigned long long)sa->sa_vp_stacks_high));
}
sa->sa_nstacks += count;
DPRINTFN(9, ("sa_stacks(%d.%d) nstacks + %d = %2d\n",
l->l_proc->p_pid, l->l_lid, count, sa->sa_nstacks));
*retval = count;
return (0);
}
int
sys_sa_enable(struct lwp *l, void *v, register_t *retval)
{
struct proc *p = l->l_proc;
struct sadata *sa = p->p_sa;
int error;
DPRINTF(("sys_sa_enable(%d.%d)\n", l->l_proc->p_pid,
l->l_lid));
/* We have to be using scheduler activations */
if (sa == NULL)
return (EINVAL);
if (p->p_flag & P_SA) /* Already running! */
return (EBUSY);
error = sa_upcall(l, SA_UPCALL_NEWPROC, l, NULL, 0, NULL);
if (error)
return (error);
p->p_flag |= P_SA;
l->l_flag |= L_SA; /* We are now an activation LWP */
/* Assign this LWP to the virtual processor */
sa->sa_vp = l;
/* This will not return to the place in user space it came from. */
return (0);
}
int
sys_sa_setconcurrency(struct lwp *l, void *v, register_t *retval)
{
struct sys_sa_setconcurrency_args /* {
syscallarg(int) concurrency;
} */ *uap = v;
struct sadata *sa = l->l_proc->p_sa;
DPRINTF(("sys_sa_concurrency(%d.%d)\n", l->l_proc->p_pid,
l->l_lid));
/* We have to be using scheduler activations */
if (sa == NULL)
return (EINVAL);
if (SCARG(uap, concurrency) < 1)
return (EINVAL);
*retval = sa->sa_concurrency;
/*
* Concurrency greater than the number of physical CPUs does
* not make sense.
* XXX Should we ever support hot-plug CPUs, this will need
* adjustment.
*/
sa->sa_concurrency = min(SCARG(uap, concurrency), 1 /* XXX ncpus */);
return (0);
}
int
sys_sa_yield(struct lwp *l, void *v, register_t *retval)
{
struct proc *p = l->l_proc;
if (p->p_sa == NULL || !(p->p_flag & P_SA)) {
DPRINTFN(1,("sys_sa_yield(%d.%d) proc %p not SA (p_sa %p, flag %s)\n",
p->p_pid, l->l_lid, p, p->p_sa, p->p_flag & P_SA ? "T" : "F"));
return (EINVAL);
}
sa_yield(l);
return (0);
}
void
sa_yield(struct lwp *l)
{
#if 0
struct lwp *l2;
#endif
struct proc *p = l->l_proc;
struct sadata *sa = p->p_sa;
int s, ret;
/*
* If we're the last running LWP, stick around to recieve
* signals.
*/
#if 0
if (p->p_nrlwps == 1) {
#endif
DPRINTFN(1,("sa_yield(%d.%d) going dormant\n",
p->p_pid, l->l_lid));
/*
* A signal will probably wake us up. Worst case, the upcall
* happens and just causes the process to yield again.
*/
SCHED_ASSERT_UNLOCKED();
sa_vp_donate(l);
SCHED_ASSERT_UNLOCKED();
s = splsched(); /* Protect from timer expirations */
KDASSERT(sa->sa_vp == l);
/*
* If we were told to make an upcall or exit before
* the splsched(), make sure we process it instead of
* going to sleep. It might make more sense for this to
* be handled inside of tsleep....
*/
ret = 0;
while ((ret == 0) && (p->p_userret == NULL)) {
sa->sa_idle = l;
l->l_flag &= ~L_SA;
SCHED_ASSERT_UNLOCKED();
ret = tsleep((caddr_t) l, PUSER | PCATCH, "sawait", 0);
SCHED_ASSERT_UNLOCKED();
l->l_flag |= L_SA;
sa->sa_idle = NULL;
splx(s);
sa_vp_donate(l);
KDASSERT(sa->sa_vp == l);
s = splsched(); /* Protect from timer expirations */
}
l->l_flag |= L_SA_UPCALL;
splx(s);
DPRINTFN(1,("sa_yield(%d.%d) returned\n",
p->p_pid, l->l_lid));
#if 0
} else {
DPRINTFN(1,("sa_yield(%d.%d) stepping aside\n", p->p_pid, l->l_lid));
SCHED_LOCK(s);
l2 = sa->sa_woken;
sa->sa_woken = NULL;
sa->sa_vp = NULL;
p->p_nrlwps--;
sa_putcachelwp(p, l);
KDASSERT((l2 == NULL) || (l2->l_proc == l->l_proc));
KDASSERT((l2 == NULL) || (l2->l_stat == LSRUN));
mi_switch(l, l2);
/*
* This isn't quite a NOTREACHED; we may get here if
* the process exits before this LWP is reused. In
* that case, we want to call lwp_exit(), which will
* be done by the userret() hooks.
*/
SCHED_ASSERT_UNLOCKED();
splx(s);
KDASSERT(p->p_flag & P_WEXIT);
/* mostly NOTREACHED */
}
#endif
}
int
sys_sa_preempt(struct lwp *l, void *v, register_t *retval)
{
/* XXX Implement me. */
return (ENOSYS);
}
/* XXX Hm, naming collision. */
void
sa_preempt(struct lwp *l)
{
struct proc *p = l->l_proc;
struct sadata *sa = p->p_sa;
if (sa->sa_flag & SA_FLAG_PREEMPT)
sa_upcall(l, SA_UPCALL_PREEMPTED, l, NULL, 0, NULL);
}
/*
* Help userspace library resolve locks and critical sections
* - recycles the calling LWP and its stack if it was not preempted
* and idle the VP until the sa_id LWP unblocks
* - recycles the to be unblocked LWP if the calling LWP was preempted
* and returns control to the userspace library so it can switch to
* the blocked thread
* This is used if a thread blocks because of a pagefault and is in a
* critical section in the userspace library and the critical section
* resolving code cannot continue until the blocked thread is unblocked.
* If the userspace library switches to the blocked thread in the second
* case, it will either continue (because the pagefault has been handled)
* or it will pagefault again. The second pagefault will be detected by
* the double pagefault code and the VP will idle until the pagefault
* has been handled.
*/
int
sys_sa_unblockyield(struct lwp *l, void *v, register_t *retval)
{
struct sys_sa_unblockyield_args /* {
syscallarg(int) sa_id;
syscallarg(void *) up_preempted;
syscallarg(stack_t *) up_stack;
} */ *uap = v;
struct sadata *sa = l->l_proc->p_sa;
struct proc *p = l->l_proc;
struct lwp *l2;
int error, f, s;
void *preempted;
if (sa == NULL)
return (EINVAL);
if (sa->sa_nstacks == SA_NUMSTACKS)
return (EINVAL);
SA_LWP_STATE_LOCK(l, f);
error = copyin(SCARG(uap, up_stack), sa->sa_stacks + sa->sa_nstacks,
sizeof(stack_t));
if (error) {
SA_LWP_STATE_UNLOCK(l, f);
return (error);
}
if (SCARG(uap, up_preempted) != NULL) {
error = copyin(SCARG(uap, up_preempted), &preempted,
sizeof(void *));
if (error) {
SA_LWP_STATE_UNLOCK(l, f);
return (error);
}
} else
preempted = (void *)-1;
SA_LWP_STATE_UNLOCK(l, f);
SCHED_LOCK(s);
LIST_FOREACH(l2, &p->p_lwps, l_sibling) {
if (l2->l_lid == SCARG(uap, sa_id)) {
break;
}
}
if (l2 == NULL) {
SCHED_UNLOCK(s);
return (ESRCH);
}
if (l2->l_upcallstack != sa->sa_stacks[sa->sa_nstacks].ss_sp) {
SCHED_UNLOCK(s);
return (EINVAL);
}
/*
* upcall not interrupted: (*up_preempted == NULL)
* - lwp ready: (wchan == upcallstacks)
* ==> recycle stack, put lwp on vp,
* unsleep lwp, make runnable, recycle upcall lwp (=l)
* - lwp not ready:
* ==> recycle stack, put lwp on vp, recycle upcall lwp (=l)
*
* upcall interrupted: (*up_preempted != NULL || up_preempted == NULL)
* ==> recycle upcall lwp
*/
if (preempted != NULL) {
DPRINTFN(11,("sys_sa_unblockyield(%d.%d) recycle %d "
"(was %sready) upcall stack %p\n",
p->p_pid, l->l_lid, l2->l_lid,
(l2->l_wchan == &l2->l_upcallstack) ? "" :
"not ", sa->sa_stacks[sa->sa_nstacks].ss_sp));
l2->l_upcallstack = (void *)-1;
if (l2->l_wchan == &l2->l_upcallstack) {
unsleep(l2);
if (l2->l_stat == LSSLEEP) {
l2->l_slptime = 0;
l2->l_stat = LSRUN;
l2->l_proc->p_nrlwps++;
if (l2->l_flag & L_INMEM)
setrunqueue(l2);
else
sched_wakeup((caddr_t)&proc0);
}
}
} else {
DPRINTFN(11,("sys_sa_unblockyield(%d.%d) resuming %d "
"(is %sready) upcall stack %p\n",
p->p_pid, l->l_lid, l2->l_lid,
(l2->l_wchan == &l2->l_upcallstack) ? "" :
"not ", sa->sa_stacks[sa->sa_nstacks].ss_sp));
sa->sa_vp = l2;
sa->sa_nstacks += 1;
l2->l_flag &= ~L_SA_BLOCKING;
l2->l_upcallstack = NULL;
if (l2->l_wchan == &l2->l_upcallstack) {
unsleep(l2);
if (l2->l_stat == LSSLEEP) {
l2->l_slptime = 0;
l2->l_stat = LSRUN;
l2->l_proc->p_nrlwps++;
if (l2->l_flag & L_INMEM)
setrunqueue(l2);
else
sched_wakeup((caddr_t)&proc0);
}
}
p->p_nrlwps--;
sa_putcachelwp(p, l);
mi_switch(l, NULL);
/* mostly NOTREACHED */
SCHED_ASSERT_UNLOCKED();
splx(s);
KDASSERT(p->p_flag & P_WEXIT);
lwp_exit(l);
}
SCHED_UNLOCK(s);
return (0);
}
/*
* Set up the user-level stack and trapframe to do an upcall.
*
* NOTE: This routine WILL FREE "arg" in the case of failure! Callers
* should not touch the "arg" pointer once calling sa_upcall().
*/
int
sa_upcall(struct lwp *l, int type, struct lwp *event, struct lwp *interrupted,
size_t argsize, void *arg)
{
struct sadata_upcall *sau;
struct sadata *sa = l->l_proc->p_sa;
stack_t st;
int error, f;
/* XXX prevent recursive upcalls if we sleep formemory */
SA_LWP_STATE_LOCK(l, f);
sau = sadata_upcall_alloc(1);
SA_LWP_STATE_UNLOCK(l, f);
if (sa->sa_nstacks == 0) {
/* assign to assure that it gets freed */
sau->sau_type = type & ~SA_UPCALL_DEFER;
sau->sau_arg = arg;
sadata_upcall_free(sau);
return (ENOMEM);
}
st = sa->sa_stacks[--sa->sa_nstacks];
DPRINTFN(9,("sa_upcall(%d.%d) nstacks-- = %2d\n",
l->l_proc->p_pid, l->l_lid, sa->sa_nstacks));
error = sa_upcall0(l, type, event, interrupted, argsize, arg, sau, &st);
if (error) {
sa->sa_stacks[sa->sa_nstacks++] = st;
sadata_upcall_free(sau);
return (error);
}
SIMPLEQ_INSERT_TAIL(&sa->sa_upcalls, sau, sau_next);
l->l_flag |= L_SA_UPCALL;
return (0);
}
int
sa_upcall0(struct lwp *l, int type, struct lwp *event, struct lwp *interrupted,
size_t argsize, void *arg, struct sadata_upcall *sau, stack_t *st)
{
KDASSERT((event == NULL) || (event != interrupted));
sau->sau_flags = 0;
sau->sau_type = type & ~SA_UPCALL_DEFER;
sau->sau_argsize = argsize;
sau->sau_arg = arg;
sau->sau_stack = *st;
if (type & SA_UPCALL_DEFER) {
sau->sau_state.deferred.e_lwp = event;
sau->sau_state.deferred.i_lwp = interrupted;
sau->sau_flags = SAU_FLAG_DEFERRED;
} else
sa_upcall_getstate(sau, event, interrupted);
return (0);
}
void
sa_upcall_getstate(struct sadata_upcall *sau, struct lwp *event,
struct lwp *interrupted)
{
if (event) {
getucontext(event, &sau->sau_state.captured.e_ctx);
sau->sau_state.captured.e_sa.sa_context = (ucontext_t *)
(intptr_t)((_UC_MACHINE_SP(&sau->sau_state.captured.e_ctx) -
sizeof(ucontext_t))
#ifdef _UC_UCONTEXT_ALIGN
& _UC_UCONTEXT_ALIGN
#endif
);
sau->sau_state.captured.e_sa.sa_id = event->l_lid;
sau->sau_state.captured.e_sa.sa_cpu = 0; /* XXX extract from l_cpu */
} else
sau->sau_state.captured.e_sa.sa_context = NULL;
if (interrupted) {
getucontext(interrupted, &sau->sau_state.captured.i_ctx);
sau->sau_state.captured.i_sa.sa_context = (ucontext_t *)
(intptr_t)((_UC_MACHINE_SP(&sau->sau_state.captured.i_ctx) -
sizeof(ucontext_t))
#ifdef _UC_UCONTEXT_ALIGN
& _UC_UCONTEXT_ALIGN
#endif
);
sau->sau_state.captured.i_sa.sa_id = interrupted->l_lid;
sau->sau_state.captured.i_sa.sa_cpu = 0; /* XXX extract from l_cpu */
} else
sau->sau_state.captured.i_sa.sa_context = NULL;
}
/*
* Detect double pagefaults and pagefaults on upcalls.
* - double pagefaults are detected by comparing the previous faultaddr
* against the current faultaddr
* - pagefaults on upcalls are detected by checking if the userspace
* thread is running on an upcall stack
*/
static int
sa_pagefault(struct lwp *l, ucontext_t *l_ctx)
{
struct proc *p;
struct sadata *sa;
vaddr_t usp;
p = l->l_proc;
sa = p->p_sa;
KDASSERT(sa->sa_vp == l);
if (sa->sa_vp_faultaddr == sa->sa_vp_ofaultaddr) {
DPRINTFN(10,("sa_check_upcall(%d.%d) double page fault\n",
p->p_pid, l->l_lid));
return 1;
}
usp = (uintptr_t)_UC_MACHINE_SP(l_ctx);
if ((usp >= sa->sa_vp_stacks_low) &&
(usp < sa->sa_vp_stacks_high)) {
DPRINTFN(10,("sa_check_upcall(%d.%d) upcall page fault\n",
p->p_pid, l->l_lid));
return 1;
}
sa->sa_vp_ofaultaddr = sa->sa_vp_faultaddr;
return 0;
}
/*
* Called by tsleep(). Block current LWP and switch to another.
*
* WE ARE NOT ALLOWED TO SLEEP HERE! WE ARE CALLED FROM WITHIN
* TSLEEP() ITSELF! We are called with sched_lock held, and must
* hold it right through the mi_switch() call.
*/
void
sa_switch(struct lwp *l, int type)
{
struct proc *p = l->l_proc;
struct sadata *sa = p->p_sa;
struct sadata_upcall *sau;
struct lwp *l2;
stack_t st;
int error;
DPRINTFN(4,("sa_switch(%d.%d type %d VP %d)\n", p->p_pid, l->l_lid,
type, sa->sa_vp ? sa->sa_vp->l_lid : 0));
SCHED_ASSERT_LOCKED();
if (p->p_flag & P_WEXIT) {
mi_switch(l,0);
return;
}
if (sa->sa_vp == l) {
/*
* Case 1: we're blocking for the first time; generate
* a SA_BLOCKED upcall and allocate resources for the
* UNBLOCKED upcall.
*/
/*
* The process of allocating a new LWP could cause
* sleeps. We're called from inside sleep, so that
* would be Bad. Therefore, we must use a cached new
* LWP. The first thing that this new LWP must do is
* allocate another LWP for the cache. */
l2 = sa_getcachelwp(p);
if (l2 == NULL) {
/* XXXSMP */
/* No upcall for you! */
/* XXX The consequences of this are more subtle and
* XXX the recovery from this situation deserves
* XXX more thought.
*/
/* XXXUPSXXX Should only happen with concurrency > 1 */
#ifdef DIAGNOSTIC
printf("sa_switch(%d.%d): no cached LWP for upcall.\n",
p->p_pid, l->l_lid);
#endif
mi_switch(l, NULL);
return;
}
/*
* XXX We need to allocate the sadata_upcall structure here,
* XXX since we can't sleep while waiting for memory inside
* XXX sa_upcall(). It would be nice if we could safely
* XXX allocate the sadata_upcall structure on the stack, here.
*/
if (sa->sa_nstacks == 0) {
#ifdef DIAGNOSTIC
printf("sa_switch(%d.%d flag %x): Not enough stacks.\n",
p->p_pid, l->l_lid, l->l_flag);
#endif
goto sa_upcall_failed;
}
sau = sadata_upcall_alloc(0);
if (sau == NULL) {
#ifdef DIAGNOSTIC
printf("sa_switch(%d.%d): "
"couldn't allocate upcall data.\n",
p->p_pid, l->l_lid);
#endif
goto sa_upcall_failed;
}
st = sa->sa_stacks[--sa->sa_nstacks];
DPRINTFN(9,("sa_switch(%d.%d) nstacks-- = %2d\n",
l->l_proc->p_pid, l->l_lid, sa->sa_nstacks));
cpu_setfunc(l2, sa_switchcall, l2);
error = sa_upcall0(l2, SA_UPCALL_BLOCKED, l, NULL, 0, NULL,
sau, &st);
if (error) {
#ifdef DIAGNOSTIC
printf("sa_switch(%d.%d): Error %d from sa_upcall()\n",
p->p_pid, l->l_lid, error);
#endif
goto sa_upcall_failed;
}
/*
* Perform the double/upcall pagefault check.
* We do this only here since we need l's ucontext to
* get l's userspace stack. sa_upcall0 above has saved
* it for us.
* The L_SA_PAGEFAULT flag is set in the MD
* pagefault code to indicate a pagefault. The MD
* pagefault code also saves the faultaddr for us.
*/
if ((l->l_flag & L_SA_PAGEFAULT) && sa_pagefault(l,
&sau->sau_state.captured.e_ctx) != 0) {
sadata_upcall_free(sau);
sa->sa_stacks[sa->sa_nstacks++] = st;
sa_putcachelwp(p, l2);
PRELE(l2); /* Remove the artificial hold-count */
mi_switch(l, NULL);
return;
}
SIMPLEQ_INSERT_TAIL(&sa->sa_upcalls, sau, sau_next);
l2->l_flag |= L_SA_UPCALL;
l->l_flag |= L_SA_BLOCKING;
l->l_upcallstack = st.ss_sp;
l2->l_priority = l2->l_usrpri;
sa->sa_vp = l2;
setrunnable(l2);
PRELE(l2); /* Remove the artificial hold-count */
KDASSERT(l2 != l);
} else if (sa->sa_vp != NULL) {
/*
* Case 2: We've been woken up while another LWP was
* on the VP, but we're going back to sleep without
* having returned to userland and delivering the
* SA_UNBLOCKED upcall (select and poll cause this
* kind of behavior a lot). We just switch back to the
* LWP that had been running and let it have another
* go. If the LWP on the VP was idling, don't make it
* run again, though.
*/
if (sa->sa_idle)
l2 = NULL;
else {
l2 = sa->sa_vp; /* XXXUPSXXX Unfair advantage for l2 ? */
if((l2->l_stat != LSRUN) || ((l2->l_flag & L_INMEM) == 0))
l2 = NULL;
}
} else {
#if 0
/*
* Case 3: The VP is empty. As in case 2, we were
* woken up and called tsleep again, but additionally,
* the running LWP called sa_yield() between our wakeup() and
* when we got to run again (in fact, it probably was
* responsible for switching to us, via sa_woken).
* The right thing is to pull a LWP off the cache and have
* it jump straight back into sa_yield.
*/
l2 = sa_getcachelwp(p);
if (l2 == NULL) {
#ifdef DIAGNOSTIC
printf("sa_switch(%d.%d): no cached LWP for reidling.\n",
p->p_pid, l->l_lid);
#endif
mi_switch(l, NULL);
return;
}
#else
mi_switch(l, NULL);
return;
#endif
sa_upcall_failed:
#if 0
cpu_setfunc(l2, sa_yieldcall, l2);
l2->l_priority = l2->l_usrpri;
setrunnable(l2);
PRELE(l2); /* Remove the artificial hold-count */
#else
/* sa_putcachelwp does not block because we have a hold count on l2 */
sa_putcachelwp(p, l2);
PRELE(l2); /* Remove the artificial hold-count */
mi_switch(l, NULL);
return;
#endif
}
DPRINTFN(4,("sa_switch(%d.%d) switching to LWP %d.\n",
p->p_pid, l->l_lid, l2 ? l2->l_lid : 0));
mi_switch(l, l2);
DPRINTFN(4,("sa_switch(%d.%d flag %x) returned.\n", p->p_pid, l->l_lid, l->l_flag));
KDASSERT(l->l_wchan == 0);
SCHED_ASSERT_UNLOCKED();
if (sa->sa_woken == l)
sa->sa_woken = NULL;
/*
* The process is trying to exit. In this case, the last thing
* we want to do is put something back on the cache list.
* It's also not useful to make the upcall at all, so just punt.
*/
if (p->p_flag & P_WEXIT)
return;
/*
* Okay, now we've been woken up. This means that it's time
* for a SA_UNBLOCKED upcall when we get back to userlevel
*/
l->l_flag |= L_SA_UPCALL;
}
void
sa_switchcall(void *arg)
{
struct lwp *l;
struct proc *p;
struct sadata *sa;
int f;
l = arg;
p = l->l_proc;
sa = p->p_sa;
sa->sa_vp = l;
DPRINTFN(6,("sa_switchcall(%d.%d)\n", p->p_pid, l->l_lid));
if (LIST_EMPTY(&sa->sa_lwpcache)) {
/* Allocate the next cache LWP */
DPRINTFN(6,("sa_switchcall(%d.%d) allocating LWP\n",
p->p_pid, l->l_lid));
SA_LWP_STATE_LOCK(l, f);
sa_newcachelwp(l);
SA_LWP_STATE_UNLOCK(l, f);
}
upcallret(l);
}
#if 0
void
sa_yieldcall(void *arg)
{
struct lwp *l;
struct proc *p;
struct sadata *sa;
l = arg;
p = l->l_proc;
sa = p->p_sa;
sa->sa_vp = l;
DPRINTFN(6,("sa_yieldcall(%d.%d)\n", p->p_pid, l->l_lid));
if (LIST_EMPTY(&sa->sa_lwpcache)) {
/* Allocate the next cache LWP */
DPRINTFN(6,("sa_yieldcall(%d.%d) allocating LWP\n",
p->p_pid, l->l_lid));
sa_newcachelwp(l);
}
sa_yield(l);
upcallret(l);
}
#endif
static int
sa_newcachelwp(struct lwp *l)
{
struct proc *p;
struct lwp *l2;
vaddr_t uaddr;
boolean_t inmem;
int s;
p = l->l_proc;
inmem = uvm_uarea_alloc(&uaddr);
if (__predict_false(uaddr == 0)) {
return (ENOMEM);
} else {
newlwp(l, p, uaddr, inmem, 0, NULL, 0, child_return, 0, &l2);
/* We don't want this LWP on the process's main LWP list, but
* newlwp helpfully puts it there. Unclear if newlwp should
* be tweaked.
*/
SCHED_LOCK(s);
sa_putcachelwp(p, l2);
SCHED_UNLOCK(s);
}
return (0);
}
/*
* Take a normal process LWP and place it in the SA cache.
* LWP must not be running!
*/
void
sa_putcachelwp(struct proc *p, struct lwp *l)
{
struct sadata *sa;
SCHED_ASSERT_LOCKED();
sa = p->p_sa;
LIST_REMOVE(l, l_sibling);
p->p_nlwps--;
l->l_stat = LSSUSPENDED;
l->l_flag |= (L_DETACHED | L_SA);
PHOLD(l);
/* XXX lock sadata */
DPRINTFN(5,("sa_putcachelwp(%d.%d) Adding LWP %d to cache\n",
p->p_pid, curlwp->l_lid, l->l_lid));
LIST_INSERT_HEAD(&sa->sa_lwpcache, l, l_sibling);
sa->sa_ncached++;
/* XXX unlock */
}
/*
* Fetch a LWP from the cache.
*/
struct lwp *
sa_getcachelwp(struct proc *p)
{
struct sadata *sa;
struct lwp *l;
SCHED_ASSERT_LOCKED();
l = NULL;
sa = p->p_sa;
/* XXX lock sadata */
if (sa->sa_ncached > 0) {
sa->sa_ncached--;
l = LIST_FIRST(&sa->sa_lwpcache);
LIST_REMOVE(l, l_sibling);
LIST_INSERT_HEAD(&p->p_lwps, l, l_sibling);
p->p_nlwps++;
DPRINTFN(5,("sa_getcachelwp(%d.%d) Got LWP %d from cache.\n",
p->p_pid, curlwp->l_lid, l->l_lid));
}
/* XXX unlock */
return l;
}
void
sa_upcall_userret(struct lwp *l)
{
struct proc *p;
struct sadata *sa;
struct sa_t **sapp, *sap;
struct sadata_upcall *sau;
struct sa_t self_sa;
struct sa_t *sas[3];
stack_t st;
void *stack, *ap;
ucontext_t u, *up;
int f, i, nsas, nint, nevents, sig, s, type;
p = l->l_proc;
sa = p->p_sa;
SCHED_ASSERT_UNLOCKED();
KERNEL_PROC_LOCK(l);
SA_LWP_STATE_LOCK(l, f);
DPRINTFN(7,("sa_upcall_userret(%d.%d %x) \n", p->p_pid, l->l_lid,
l->l_flag));
while (l->l_upcallstack != NULL) {
if (l->l_upcallstack == (void *)-1) {
SCHED_LOCK(s);
l->l_flag &= ~(L_SA_UPCALL|L_SA_BLOCKING);
l->l_upcallstack = NULL;
p->p_nrlwps--;
sa_putcachelwp(p, l);
SA_LWP_STATE_UNLOCK(l, f);
KERNEL_PROC_UNLOCK(l);
mi_switch(l, NULL);
/* mostly NOTREACHED */
SCHED_ASSERT_UNLOCKED();
splx(s);
KERNEL_PROC_LOCK(l);
KDASSERT(p->p_flag & P_WEXIT);
lwp_exit(l);
}
if ((l->l_flag & L_SA_BLOCKING) == 0) {
l->l_upcallstack = NULL;
break;
}
tsleep((caddr_t) &l->l_upcallstack, PWAIT,
"saunblock", 0);
if (p->p_flag & P_WEXIT)
lwp_exit(l);
}
if (l->l_flag & L_SA_BLOCKING) {
/* Invoke an "unblocked" upcall */
struct lwp *l2;
DPRINTFN(8,("sa_upcall_userret(%d.%d) unblocking\n",
p->p_pid, l->l_lid));
sau = sadata_upcall_alloc(1);
sau->sau_arg = NULL;
if (p->p_flag & P_WEXIT) {
sadata_upcall_free(sau);
lwp_exit(l);
}
SCHED_ASSERT_UNLOCKED();
l2 = sa_vp_repossess(l);
KDASSERT(sa->sa_nstacks > 0);
st = sa->sa_stacks[--sa->sa_nstacks];
SCHED_ASSERT_UNLOCKED();
if (l2 == NULL) {
sadata_upcall_free(sau);
/* No need to put st back */
lwp_exit(l);
}
DPRINTFN(9,("sa_upcall_userret(%d.%d) nstacks-- = %2d\n",
l->l_proc->p_pid, l->l_lid, sa->sa_nstacks));
if (sa_upcall0(l, SA_UPCALL_UNBLOCKED | SA_UPCALL_DEFER, l, l2, 0, NULL, sau,
&st) != 0) {
/*
* We were supposed to deliver an UNBLOCKED
* upcall, but don't have resources to do so.
*/
#ifdef DIAGNOSTIC
printf("sa_upcall_userret: out of upcall resources"
" for %d.%d\n", p->p_pid, l->l_lid);
#endif
sigexit(l, SIGABRT);
/* NOTREACHED */
}
SIMPLEQ_INSERT_TAIL(&sa->sa_upcalls, sau, sau_next);
l->l_flag |= L_SA_UPCALL;
l->l_flag &= ~L_SA_BLOCKING;
/* We migth have sneaked past signal handling and userret */
SA_LWP_STATE_UNLOCK(l, f);
KERNEL_PROC_UNLOCK(l);
/* take pending signals */
while ((sig = CURSIG(l)) != 0)
postsig(sig);
/* Invoke per-process kernel-exit handling, if any */
if (p->p_userret)
(p->p_userret)(l, p->p_userret_arg);
KERNEL_PROC_LOCK(l);
SA_LWP_STATE_LOCK(l, f);
}
KDASSERT(sa->sa_vp == l);
if (SIMPLEQ_EMPTY(&sa->sa_upcalls)) {
l->l_flag &= ~L_SA_UPCALL;
sa_vp_donate(l);
SA_LWP_STATE_UNLOCK(l, f);
KERNEL_PROC_UNLOCK(l);
return;
}
sau = SIMPLEQ_FIRST(&sa->sa_upcalls);
SIMPLEQ_REMOVE_HEAD(&sa->sa_upcalls, sau_next);
if (sau->sau_flags & SAU_FLAG_DEFERRED) {
sa_upcall_getstate(sau,
sau->sau_state.deferred.e_lwp,
sau->sau_state.deferred.i_lwp);
}
stack = (void *)
(((uintptr_t)sau->sau_stack.ss_sp + sau->sau_stack.ss_size)
& ~ALIGNBYTES);
self_sa.sa_id = l->l_lid;
self_sa.sa_cpu = 0; /* XXX l->l_cpu; */
sas[0] = &self_sa;
nsas = 1;
nevents = 0;
nint = 0;
if (sau->sau_state.captured.e_sa.sa_context != NULL) {
if (copyout(&sau->sau_state.captured.e_ctx,
sau->sau_state.captured.e_sa.sa_context,
sizeof(ucontext_t)) != 0) {
#ifdef DIAGNOSTIC
printf("sa_upcall_userret(%d.%d): couldn't copyout"
" context of event LWP %d\n",
p->p_pid, l->l_lid, sau->sau_state.captured.e_sa.sa_id);
#endif
sigexit(l, SIGILL);
/* NOTREACHED */
}
sas[nsas] = &sau->sau_state.captured.e_sa;
nsas++;
nevents = 1;
}
if (sau->sau_state.captured.i_sa.sa_context != NULL) {
KDASSERT(sau->sau_state.captured.i_sa.sa_context !=
sau->sau_state.captured.e_sa.sa_context);
if (copyout(&sau->sau_state.captured.i_ctx,
sau->sau_state.captured.i_sa.sa_context,
sizeof(ucontext_t)) != 0) {
#ifdef DIAGNOSTIC
printf("sa_upcall_userret(%d.%d): couldn't copyout"
" context of interrupted LWP %d\n",
p->p_pid, l->l_lid, sau->sau_state.captured.i_sa.sa_id);
#endif
sigexit(l, SIGILL);
/* NOTREACHED */
}
sas[nsas] = &sau->sau_state.captured.i_sa;
nsas++;
nint = 1;
}
/* Copy out the activation's ucontext */
u.uc_stack = sau->sau_stack;
u.uc_flags = _UC_STACK;
up = stack;
up--;
if (copyout(&u, up, sizeof(ucontext_t)) != 0) {
sadata_upcall_free(sau);
#ifdef DIAGNOSTIC
printf("sa_upcall_userret: couldn't copyout activation"
" ucontext for %d.%d\n", l->l_proc->p_pid, l->l_lid);
#endif
sigexit(l, SIGILL);
/* NOTREACHED */
}
sas[0]->sa_context = up;
/* Next, copy out the sa_t's and pointers to them. */
sap = (struct sa_t *) up;
sapp = (struct sa_t **) (sap - nsas);
for (i = nsas - 1; i >= 0; i--) {
sap--;
sapp--;
if ((copyout(sas[i], sap, sizeof(struct sa_t)) != 0) ||
(copyout(&sap, sapp, sizeof(struct sa_t *)) != 0)) {
/* Copying onto the stack didn't work. Die. */
sadata_upcall_free(sau);
#ifdef DIAGNOSTIC
printf("sa_upcall_userret: couldn't copyout sa_t "
"%d for %d.%d\n", i, p->p_pid, l->l_lid);
#endif
sigexit(l, SIGILL);
/* NOTREACHED */
}
}
/* Copy out the arg, if any */
/* xxx assume alignment works out; everything so far has been
* a structure, so...
*/
if (sau->sau_arg) {
ap = (char *)sapp - sau->sau_argsize;
stack = ap;
if (copyout(sau->sau_arg, ap, sau->sau_argsize) != 0) {
/* Copying onto the stack didn't work. Die. */
sadata_upcall_free(sau);
#ifdef DIAGNOSTIC
printf("sa_upcall_userret(%d.%d): couldn't copyout"
" sadata_upcall arg %p size %ld to %p \n",
p->p_pid, l->l_lid,
sau->sau_arg, (long) sau->sau_argsize, ap);
#endif
sigexit(l, SIGILL);
/* NOTREACHED */
}
} else {
ap = 0;
stack = sapp;
}
type = sau->sau_type;
sadata_upcall_free(sau);
DPRINTFN(7,("sa_upcall_userret(%d.%d): type %d\n",p->p_pid,
l->l_lid, type));
cpu_upcall(l, type, nevents, nint, sapp, ap, stack, sa->sa_upcall);
if (SIMPLEQ_EMPTY(&sa->sa_upcalls)) {
l->l_flag &= ~L_SA_UPCALL;
sa_vp_donate(l);
/* May not be reached */
}
/* May not be reached */
SA_LWP_STATE_UNLOCK(l, f);
KERNEL_PROC_UNLOCK(l);
}
#if 0
static struct lwp *
sa_vp_repossess(struct lwp *l)
{
struct lwp *l2;
struct proc *p = l->l_proc;
struct sadata *sa = p->p_sa;
int s;
/*
* Put ourselves on the virtual processor and note that the
* previous occupant of that position was interrupted.
*/
l2 = sa->sa_vp;
sa->sa_vp = l;
if (sa->sa_idle == l2)
sa->sa_idle = NULL;
KDASSERT(l2 != l);
if (l2) {
SCHED_LOCK(s);
switch (l2->l_stat) {
case LSRUN:
remrunqueue(l2);
p->p_nrlwps--;
break;
case LSSLEEP:
unsleep(l2);
l2->l_flag &= ~L_SINTR;
break;
#ifdef DIAGNOSTIC
default:
panic("SA VP %d.%d is in state %d, not running"
" or sleeping\n", p->p_pid, l2->l_lid,
l2->l_stat);
#endif
}
sa_putcachelwp(p, l2);
/*
* XXX SMP race! Need to be sure that l2's state is
* captured before the upcall before we make it possible
* for another processor to grab it.
*/
SCHED_UNLOCK(s);
}
return l2;
}
#endif
static struct lwp *
sa_vp_repossess(struct lwp *l)
{
struct lwp *l2;
struct proc *p = l->l_proc;
struct sadata *sa = p->p_sa;
int s;
SCHED_ASSERT_UNLOCKED();
l->l_flag |= L_SA_WANTS_VP;
sa->sa_vp_wait_count++;
if(sa->sa_idle != NULL) {
/* XXXUPSXXX Simple but slow */
wakeup(sa->sa_idle);
} else {
SCHED_LOCK(s);
sa->sa_vp->l_flag |= L_SA_UPCALL;
/* kick the process */
signotify(p);
SCHED_UNLOCK(s);
}
SCHED_ASSERT_UNLOCKED();
DPRINTFN(1,("sa_vp_repossess(%d.%d): want vp\n",
p->p_pid, l->l_lid));
while(sa->sa_vp != l) {
tsleep((caddr_t) l, PWAIT, "saprocessor", 0);
/* XXXUPSXXX NEED TO STOP THE LWP HERE ON REQUEST ??? */
if (p->p_flag & P_WEXIT) {
l->l_flag &= ~L_SA_WANTS_VP;
sa->sa_vp_wait_count--;
return 0;
}
}
DPRINTFN(1,("sa_vp_repossess(%d.%d): on vp\n",
p->p_pid, l->l_lid));
l2 = sa->sa_old_lwp;
return l2;
}
static void
sa_vp_donate(struct lwp *l)
{
struct proc *p = l->l_proc;
struct sadata *sa = p->p_sa;
struct lwp *l2;
int s;
SCHED_ASSERT_UNLOCKED();
/* Nobody wants the vp */
if (sa->sa_vp_wait_count == 0)
return;
/* No stack for an unblock call */
if (sa->sa_nstacks == 0)
return;
LIST_FOREACH(l2, &p->p_lwps, l_sibling) {
if(l2->l_flag & L_SA_WANTS_VP) {
SCHED_LOCK(s);
p->p_nrlwps--;
sa_putcachelwp(p, l);
sa->sa_vp = l2;
sa->sa_vp_wait_count--;
l2->l_flag &= ~L_SA_WANTS_VP;
sa->sa_old_lwp = l;
sched_wakeup((caddr_t) l2);
KERNEL_PROC_UNLOCK(l);
if((l2->l_stat == LSRUN) && ((l2->l_flag & L_INMEM) != 0))
mi_switch(l,l2);
else
mi_switch(l,NULL);
/*
* This isn't quite a NOTREACHED; we may get here if
* the process exits before this LWP is reused. In
* that case, we want to call lwp_exit(), which will
* be done by the userret() hooks.
*/
SCHED_ASSERT_UNLOCKED();
splx(s);
KERNEL_PROC_LOCK(l);
KDASSERT(p->p_flag & P_WEXIT);
/* mostly NOTREACHED */
lwp_exit(l);
}
}
#ifdef DIAGNOSTIC
printf("sa_vp_donate couldn't find someone to donate the CPU to \n");
#endif
}
#ifdef DEBUG
int debug_print_sa(struct proc *);
int debug_print_lwp(struct lwp *);
int debug_print_proc(int);
int
debug_print_proc(int pid)
{
struct proc *p;
p = pfind(pid);
if (p == NULL)
printf("No process %d\n", pid);
else
debug_print_sa(p);
return 0;
}
int
debug_print_sa(struct proc *p)
{
struct lwp *l;
struct sadata *sa;
printf("Process %d (%s), state %d, address %p, flags %x\n",
p->p_pid, p->p_comm, p->p_stat, p, p->p_flag);
printf("LWPs: %d (%d running, %d zombies)\n",
p->p_nlwps, p->p_nrlwps, p->p_nzlwps);
LIST_FOREACH(l, &p->p_lwps, l_sibling)
debug_print_lwp(l);
sa = p->p_sa;
if (sa) {
if (sa->sa_vp)
printf("SA VP: %d\n", sa->sa_vp->l_lid);
if (sa->sa_idle)
printf("SA idle: %d\n", sa->sa_idle->l_lid);
printf("SAs: %d cached LWPs\n", sa->sa_ncached);
printf("%d upcall stacks\n", sa->sa_nstacks);
LIST_FOREACH(l, &sa->sa_lwpcache, l_sibling)
debug_print_lwp(l);
}
return 0;
}
int
debug_print_lwp(struct lwp *l)
{
struct proc *p;
p = l->l_proc;
printf("LWP %d address %p ", l->l_lid, l);
printf("state %d flags %x ", l->l_stat, l->l_flag);
if (l->l_wchan)
printf("wait %p %s", l->l_wchan, l->l_wmesg);
printf("\n");
return 0;
}
#endif