/* $NetBSD: kern_sa.c,v 1.25 2003/09/16 15:28:45 cl Exp $ */ /*- * Copyright (c) 2001 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Nathan J. Williams. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the NetBSD * Foundation, Inc. and its contributors. * 4. Neither the name of The NetBSD Foundation nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include __KERNEL_RCSID(0, "$NetBSD: kern_sa.c,v 1.25 2003/09/16 15:28:45 cl Exp $"); #include #include #include #include #include #include #include #include #include #include #include #include static void sa_vp_donate(struct lwp *); static int sa_newcachelwp(struct lwp *); static struct lwp *sa_vp_repossess(struct lwp *l); static int sa_pagefault(struct lwp *, ucontext_t *); void sa_upcall_getstate(struct sadata_upcall *, struct lwp *, struct lwp *); MALLOC_DEFINE(M_SA, "sa", "Scheduler activations"); #define SA_DEBUG #ifdef SA_DEBUG #define DPRINTF(x) do { if (sadebug) printf x; } while (0) #define DPRINTFN(n,x) do { if (sadebug & (1<<(n-1))) printf x; } while (0) int sadebug = 0; #else #define DPRINTF(x) #define DPRINTFN(n,x) #endif #define SA_LWP_STATE_LOCK(l, f) do { \ (f) = (l)->l_flag; \ (l)->l_flag &= ~L_SA; \ } while (/*CONSTCOND*/ 0) #define SA_LWP_STATE_UNLOCK(l, f) do { \ (l)->l_flag |= (f) & L_SA; \ } while (/*CONSTCOND*/ 0) /* * sadata_upcall_alloc: * * Allocate an sadata_upcall structure. */ struct sadata_upcall * sadata_upcall_alloc(int waitok) { /* XXX zero the memory? */ return (pool_get(&saupcall_pool, waitok ? PR_WAITOK : PR_NOWAIT)); } /* * sadata_upcall_free: * * Free an sadata_upcall structure, and any associated * argument data. */ void sadata_upcall_free(struct sadata_upcall *sau) { extern struct pool siginfo_pool; /* XXX Ew. */ /* * XXX We have to know what the origin of sau_arg is * XXX in order to do the right thing, here. Sucks * XXX to be a non-garbage-collecting kernel. */ if (sau->sau_arg) { switch (sau->sau_type) { case SA_UPCALL_SIGNAL: case SA_UPCALL_SIGEV: pool_put(&siginfo_pool, sau->sau_arg); break; default: panic("sadata_free: unknown type of sau_arg: %d", sau->sau_type); } } pool_put(&saupcall_pool, sau); } int sys_sa_register(struct lwp *l, void *v, register_t *retval) { struct sys_sa_register_args /* { syscallarg(sa_upcall_t) new; syscallarg(sa_upcall_t *) old; syscallarg(int) flags; } */ *uap = v; struct proc *p = l->l_proc; struct sadata *sa; sa_upcall_t prev; int error; if (p->p_sa == NULL) { /* Allocate scheduler activations data structure */ sa = pool_get(&sadata_pool, PR_WAITOK); /* Initialize. */ memset(sa, 0, sizeof(*sa)); simple_lock_init(&sa->sa_lock); sa->sa_flag = SCARG(uap, flags) & SA_FLAG_ALL; sa->sa_vp = NULL; sa->sa_old_lwp = NULL; sa->sa_vp_wait_count = 0; sa->sa_idle = NULL; sa->sa_woken = NULL; sa->sa_concurrency = 1; sa->sa_stacks = malloc(sizeof(stack_t) * SA_NUMSTACKS, M_SA, M_WAITOK); sa->sa_nstacks = 0; sa->sa_vp_faultaddr = NULL; sa->sa_vp_ofaultaddr = NULL; sa->sa_vp_stacks_low = NULL; sa->sa_vp_stacks_high = NULL; LIST_INIT(&sa->sa_lwpcache); SIMPLEQ_INIT(&sa->sa_upcalls); p->p_sa = sa; sa_newcachelwp(l); } prev = p->p_sa->sa_upcall; p->p_sa->sa_upcall = SCARG(uap, new); if (SCARG(uap, old)) { error = copyout(&prev, SCARG(uap, old), sizeof(prev)); if (error) return (error); } return (0); } int sys_sa_stacks(struct lwp *l, void *v, register_t *retval) { struct sys_sa_stacks_args /* { syscallarg(int) num; syscallarg(stack_t *) stacks; } */ *uap = v; struct sadata *sa = l->l_proc->p_sa; struct lwp *l2; int count, error, f, i; /* We have to be using scheduler activations */ if (sa == NULL) return (EINVAL); count = SCARG(uap, num); if (count < 0) return (EINVAL); count = min(count, SA_NUMSTACKS - sa->sa_nstacks); SA_LWP_STATE_LOCK(l, f); error = copyin(SCARG(uap, stacks), sa->sa_stacks + sa->sa_nstacks, sizeof(stack_t) * count); SA_LWP_STATE_UNLOCK(l, f); if (error) return (error); for (i = sa->sa_nstacks; i < sa->sa_nstacks + count; i++) { LIST_FOREACH(l2, &l->l_proc->p_lwps, l_sibling) { if ((l2->l_upcallstack == sa->sa_stacks[i].ss_sp)) { l2->l_upcallstack = NULL; wakeup(&l2->l_upcallstack); } } } if ((sa->sa_nstacks == 0) && (sa->sa_vp_wait_count != 0)) l->l_flag |= L_SA_UPCALL; /* * Save addresses of the first and last stack on initial load * the pagefault code uses the saved address to detect threads * running on an upcall stack. * XXX assumes all stacks are adjoining * XXX assumes initial load includes all stacks ever used */ if (sa->sa_vp_stacks_low == 0) { sa->sa_vp_stacks_low = (uintptr_t)sa->sa_stacks[0].ss_sp; sa->sa_vp_stacks_high = (uintptr_t)sa->sa_stacks[count - 1].ss_sp + sa->sa_stacks[count - 1].ss_size; DPRINTFN(11,("sys_sa_stacks(%d.%d): low 0x%llx high 0x%llx\n", l->l_proc->p_pid, l->l_lid, (unsigned long long)sa->sa_vp_stacks_low, (unsigned long long)sa->sa_vp_stacks_high)); } sa->sa_nstacks += count; DPRINTFN(9, ("sa_stacks(%d.%d) nstacks + %d = %2d\n", l->l_proc->p_pid, l->l_lid, count, sa->sa_nstacks)); *retval = count; return (0); } int sys_sa_enable(struct lwp *l, void *v, register_t *retval) { struct proc *p = l->l_proc; struct sadata *sa = p->p_sa; int error; DPRINTF(("sys_sa_enable(%d.%d)\n", l->l_proc->p_pid, l->l_lid)); /* We have to be using scheduler activations */ if (sa == NULL) return (EINVAL); if (p->p_flag & P_SA) /* Already running! */ return (EBUSY); error = sa_upcall(l, SA_UPCALL_NEWPROC, l, NULL, 0, NULL); if (error) return (error); p->p_flag |= P_SA; l->l_flag |= L_SA; /* We are now an activation LWP */ /* Assign this LWP to the virtual processor */ sa->sa_vp = l; /* This will not return to the place in user space it came from. */ return (0); } int sys_sa_setconcurrency(struct lwp *l, void *v, register_t *retval) { struct sys_sa_setconcurrency_args /* { syscallarg(int) concurrency; } */ *uap = v; struct sadata *sa = l->l_proc->p_sa; DPRINTF(("sys_sa_concurrency(%d.%d)\n", l->l_proc->p_pid, l->l_lid)); /* We have to be using scheduler activations */ if (sa == NULL) return (EINVAL); if (SCARG(uap, concurrency) < 1) return (EINVAL); *retval = sa->sa_concurrency; /* * Concurrency greater than the number of physical CPUs does * not make sense. * XXX Should we ever support hot-plug CPUs, this will need * adjustment. */ sa->sa_concurrency = min(SCARG(uap, concurrency), 1 /* XXX ncpus */); return (0); } int sys_sa_yield(struct lwp *l, void *v, register_t *retval) { struct proc *p = l->l_proc; if (p->p_sa == NULL || !(p->p_flag & P_SA)) { DPRINTFN(1,("sys_sa_yield(%d.%d) proc %p not SA (p_sa %p, flag %s)\n", p->p_pid, l->l_lid, p, p->p_sa, p->p_flag & P_SA ? "T" : "F")); return (EINVAL); } sa_yield(l); return (0); } void sa_yield(struct lwp *l) { #if 0 struct lwp *l2; #endif struct proc *p = l->l_proc; struct sadata *sa = p->p_sa; int s, ret; /* * If we're the last running LWP, stick around to recieve * signals. */ #if 0 if (p->p_nrlwps == 1) { #endif DPRINTFN(1,("sa_yield(%d.%d) going dormant\n", p->p_pid, l->l_lid)); /* * A signal will probably wake us up. Worst case, the upcall * happens and just causes the process to yield again. */ SCHED_ASSERT_UNLOCKED(); sa_vp_donate(l); SCHED_ASSERT_UNLOCKED(); s = splsched(); /* Protect from timer expirations */ KDASSERT(sa->sa_vp == l); /* * If we were told to make an upcall or exit before * the splsched(), make sure we process it instead of * going to sleep. It might make more sense for this to * be handled inside of tsleep.... */ ret = 0; while ((ret == 0) && (p->p_userret == NULL)) { sa->sa_idle = l; l->l_flag &= ~L_SA; SCHED_ASSERT_UNLOCKED(); ret = tsleep((caddr_t) l, PUSER | PCATCH, "sawait", 0); SCHED_ASSERT_UNLOCKED(); l->l_flag |= L_SA; sa->sa_idle = NULL; splx(s); sa_vp_donate(l); KDASSERT(sa->sa_vp == l); s = splsched(); /* Protect from timer expirations */ } l->l_flag |= L_SA_UPCALL; splx(s); DPRINTFN(1,("sa_yield(%d.%d) returned\n", p->p_pid, l->l_lid)); #if 0 } else { DPRINTFN(1,("sa_yield(%d.%d) stepping aside\n", p->p_pid, l->l_lid)); SCHED_LOCK(s); l2 = sa->sa_woken; sa->sa_woken = NULL; sa->sa_vp = NULL; p->p_nrlwps--; sa_putcachelwp(p, l); KDASSERT((l2 == NULL) || (l2->l_proc == l->l_proc)); KDASSERT((l2 == NULL) || (l2->l_stat == LSRUN)); mi_switch(l, l2); /* * This isn't quite a NOTREACHED; we may get here if * the process exits before this LWP is reused. In * that case, we want to call lwp_exit(), which will * be done by the userret() hooks. */ SCHED_ASSERT_UNLOCKED(); splx(s); KDASSERT(p->p_flag & P_WEXIT); /* mostly NOTREACHED */ } #endif } int sys_sa_preempt(struct lwp *l, void *v, register_t *retval) { /* XXX Implement me. */ return (ENOSYS); } /* XXX Hm, naming collision. */ void sa_preempt(struct lwp *l) { struct proc *p = l->l_proc; struct sadata *sa = p->p_sa; if (sa->sa_flag & SA_FLAG_PREEMPT) sa_upcall(l, SA_UPCALL_PREEMPTED, l, NULL, 0, NULL); } /* * Help userspace library resolve locks and critical sections * - recycles the calling LWP and its stack if it was not preempted * and idle the VP until the sa_id LWP unblocks * - recycles the to be unblocked LWP if the calling LWP was preempted * and returns control to the userspace library so it can switch to * the blocked thread * This is used if a thread blocks because of a pagefault and is in a * critical section in the userspace library and the critical section * resolving code cannot continue until the blocked thread is unblocked. * If the userspace library switches to the blocked thread in the second * case, it will either continue (because the pagefault has been handled) * or it will pagefault again. The second pagefault will be detected by * the double pagefault code and the VP will idle until the pagefault * has been handled. */ int sys_sa_unblockyield(struct lwp *l, void *v, register_t *retval) { struct sys_sa_unblockyield_args /* { syscallarg(int) sa_id; syscallarg(void *) up_preempted; syscallarg(stack_t *) up_stack; } */ *uap = v; struct sadata *sa = l->l_proc->p_sa; struct proc *p = l->l_proc; struct lwp *l2; int error, f, s; void *preempted; if (sa == NULL) return (EINVAL); if (sa->sa_nstacks == SA_NUMSTACKS) return (EINVAL); SA_LWP_STATE_LOCK(l, f); error = copyin(SCARG(uap, up_stack), sa->sa_stacks + sa->sa_nstacks, sizeof(stack_t)); if (error) { SA_LWP_STATE_UNLOCK(l, f); return (error); } if (SCARG(uap, up_preempted) != NULL) { error = copyin(SCARG(uap, up_preempted), &preempted, sizeof(void *)); if (error) { SA_LWP_STATE_UNLOCK(l, f); return (error); } } else preempted = (void *)-1; SA_LWP_STATE_UNLOCK(l, f); SCHED_LOCK(s); LIST_FOREACH(l2, &p->p_lwps, l_sibling) { if (l2->l_lid == SCARG(uap, sa_id)) { KDASSERT(l2->l_upcallstack == sa->sa_stacks[sa->sa_nstacks].ss_sp); break; } } KDASSERT(l2 != NULL); /* * upcall not interrupted: (*up_preempted == NULL) * - lwp ready: (wchan == upcallstacks) * ==> recycle stack, put lwp on vp, * unsleep lwp, make runnable, recycle upcall lwp (=l) * - lwp not ready: * ==> recycle stack, put lwp on vp, recycle upcall lwp (=l) * * upcall interrupted: (*up_preempted != NULL || up_preempted == NULL) * ==> recycle upcall lwp */ if (preempted != NULL) { DPRINTFN(11,("sys_sa_unblockyield(%d.%d) recycle %d " "(was %sready) upcall stack %p\n", p->p_pid, l->l_lid, l2->l_lid, (l2->l_wchan == &l2->l_upcallstack) ? "" : "not ", sa->sa_stacks[sa->sa_nstacks].ss_sp)); l2->l_upcallstack = (void *)-1; if (l2->l_wchan == &l2->l_upcallstack) { unsleep(l2); if (l2->l_stat == LSSLEEP) { l2->l_slptime = 0; l2->l_stat = LSRUN; l2->l_proc->p_nrlwps++; if (l2->l_flag & L_INMEM) setrunqueue(l2); else sched_wakeup((caddr_t)&proc0); } } } else { DPRINTFN(11,("sys_sa_unblockyield(%d.%d) resuming %d " "(is %sready) upcall stack %p\n", p->p_pid, l->l_lid, l2->l_lid, (l2->l_wchan == &l2->l_upcallstack) ? "" : "not ", sa->sa_stacks[sa->sa_nstacks].ss_sp)); sa->sa_vp = l2; sa->sa_nstacks += 1; l2->l_flag &= ~L_SA_BLOCKING; l2->l_upcallstack = NULL; if (l2->l_wchan == &l2->l_upcallstack) { unsleep(l2); if (l2->l_stat == LSSLEEP) { l2->l_slptime = 0; l2->l_stat = LSRUN; l2->l_proc->p_nrlwps++; if (l2->l_flag & L_INMEM) setrunqueue(l2); else sched_wakeup((caddr_t)&proc0); } } p->p_nrlwps--; sa_putcachelwp(p, l); mi_switch(l, NULL); /* mostly NOTREACHED */ SCHED_ASSERT_UNLOCKED(); splx(s); KDASSERT(p->p_flag & P_WEXIT); lwp_exit(l); } SCHED_UNLOCK(s); return (0); } /* * Set up the user-level stack and trapframe to do an upcall. * * NOTE: This routine WILL FREE "arg" in the case of failure! Callers * should not touch the "arg" pointer once calling sa_upcall(). */ int sa_upcall(struct lwp *l, int type, struct lwp *event, struct lwp *interrupted, size_t argsize, void *arg) { struct sadata_upcall *sau; struct sadata *sa = l->l_proc->p_sa; stack_t st; int error, f; /* XXX prevent recursive upcalls if we sleep formemory */ SA_LWP_STATE_LOCK(l, f); sau = sadata_upcall_alloc(1); SA_LWP_STATE_UNLOCK(l, f); if (sa->sa_nstacks == 0) { /* assign to assure that it gets freed */ sau->sau_type = type & ~SA_UPCALL_DEFER; sau->sau_arg = arg; sadata_upcall_free(sau); return (ENOMEM); } st = sa->sa_stacks[--sa->sa_nstacks]; DPRINTFN(9,("sa_upcall(%d.%d) nstacks-- = %2d\n", l->l_proc->p_pid, l->l_lid, sa->sa_nstacks)); error = sa_upcall0(l, type, event, interrupted, argsize, arg, sau, &st); if (error) { sa->sa_stacks[sa->sa_nstacks++] = st; sadata_upcall_free(sau); return (error); } SIMPLEQ_INSERT_TAIL(&sa->sa_upcalls, sau, sau_next); l->l_flag |= L_SA_UPCALL; return (0); } int sa_upcall0(struct lwp *l, int type, struct lwp *event, struct lwp *interrupted, size_t argsize, void *arg, struct sadata_upcall *sau, stack_t *st) { KDASSERT((event == NULL) || (event != interrupted)); sau->sau_flags = 0; sau->sau_type = type & ~SA_UPCALL_DEFER; sau->sau_argsize = argsize; sau->sau_arg = arg; sau->sau_stack = *st; if (type & SA_UPCALL_DEFER) { sau->sau_state.deferred.e_lwp = event; sau->sau_state.deferred.i_lwp = interrupted; sau->sau_flags = SAU_FLAG_DEFERRED; } else sa_upcall_getstate(sau, event, interrupted); return (0); } void sa_upcall_getstate(struct sadata_upcall *sau, struct lwp *event, struct lwp *interrupted) { if (event) { getucontext(event, &sau->sau_state.captured.e_ctx); sau->sau_state.captured.e_sa.sa_context = (ucontext_t *) (intptr_t)((_UC_MACHINE_SP(&sau->sau_state.captured.e_ctx) - sizeof(ucontext_t)) #ifdef _UC_UCONTEXT_ALIGN & _UC_UCONTEXT_ALIGN #endif ); sau->sau_state.captured.e_sa.sa_id = event->l_lid; sau->sau_state.captured.e_sa.sa_cpu = 0; /* XXX extract from l_cpu */ } else sau->sau_state.captured.e_sa.sa_context = NULL; if (interrupted) { getucontext(interrupted, &sau->sau_state.captured.i_ctx); sau->sau_state.captured.i_sa.sa_context = (ucontext_t *) (intptr_t)((_UC_MACHINE_SP(&sau->sau_state.captured.i_ctx) - sizeof(ucontext_t)) #ifdef _UC_UCONTEXT_ALIGN & _UC_UCONTEXT_ALIGN #endif ); sau->sau_state.captured.i_sa.sa_id = interrupted->l_lid; sau->sau_state.captured.i_sa.sa_cpu = 0; /* XXX extract from l_cpu */ } else sau->sau_state.captured.i_sa.sa_context = NULL; } /* * Detect double pagefaults and pagefaults on upcalls. * - double pagefaults are detected by comparing the previous faultaddr * against the current faultaddr * - pagefaults on upcalls are detected by checking if the userspace * thread is running on an upcall stack */ static int sa_pagefault(struct lwp *l, ucontext_t *l_ctx) { struct proc *p; struct sadata *sa; vaddr_t usp; p = l->l_proc; sa = p->p_sa; KDASSERT(sa->sa_vp == l); if (sa->sa_vp_faultaddr == sa->sa_vp_ofaultaddr) { DPRINTFN(10,("sa_check_upcall(%d.%d) double page fault\n", p->p_pid, l->l_lid)); return 1; } usp = (uintptr_t)_UC_MACHINE_SP(l_ctx); if ((usp >= sa->sa_vp_stacks_low) && (usp < sa->sa_vp_stacks_high)) { DPRINTFN(10,("sa_check_upcall(%d.%d) upcall page fault\n", p->p_pid, l->l_lid)); return 1; } sa->sa_vp_ofaultaddr = sa->sa_vp_faultaddr; return 0; } /* * Called by tsleep(). Block current LWP and switch to another. * * WE ARE NOT ALLOWED TO SLEEP HERE! WE ARE CALLED FROM WITHIN * TSLEEP() ITSELF! We are called with sched_lock held, and must * hold it right through the mi_switch() call. */ void sa_switch(struct lwp *l, int type) { struct proc *p = l->l_proc; struct sadata *sa = p->p_sa; struct sadata_upcall *sau; struct lwp *l2; stack_t st; int error; DPRINTFN(4,("sa_switch(%d.%d type %d VP %d)\n", p->p_pid, l->l_lid, type, sa->sa_vp ? sa->sa_vp->l_lid : 0)); SCHED_ASSERT_LOCKED(); if (p->p_flag & P_WEXIT) { mi_switch(l,0); return; } if (sa->sa_vp == l) { /* * Case 1: we're blocking for the first time; generate * a SA_BLOCKED upcall and allocate resources for the * UNBLOCKED upcall. */ /* * The process of allocating a new LWP could cause * sleeps. We're called from inside sleep, so that * would be Bad. Therefore, we must use a cached new * LWP. The first thing that this new LWP must do is * allocate another LWP for the cache. */ l2 = sa_getcachelwp(p); if (l2 == NULL) { /* XXXSMP */ /* No upcall for you! */ /* XXX The consequences of this are more subtle and * XXX the recovery from this situation deserves * XXX more thought. */ /* XXXUPSXXX Should only happen with concurrency > 1 */ #ifdef DIAGNOSTIC printf("sa_switch(%d.%d): no cached LWP for upcall.\n", p->p_pid, l->l_lid); #endif mi_switch(l, NULL); return; } /* * XXX We need to allocate the sadata_upcall structure here, * XXX since we can't sleep while waiting for memory inside * XXX sa_upcall(). It would be nice if we could safely * XXX allocate the sadata_upcall structure on the stack, here. */ if (sa->sa_nstacks == 0) { #ifdef DIAGNOSTIC printf("sa_switch(%d.%d flag %x): Not enough stacks.\n", p->p_pid, l->l_lid, l->l_flag); #endif goto sa_upcall_failed; } sau = sadata_upcall_alloc(0); if (sau == NULL) { #ifdef DIAGNOSTIC printf("sa_switch(%d.%d): " "couldn't allocate upcall data.\n", p->p_pid, l->l_lid); #endif goto sa_upcall_failed; } st = sa->sa_stacks[--sa->sa_nstacks]; DPRINTFN(9,("sa_switch(%d.%d) nstacks-- = %2d\n", l->l_proc->p_pid, l->l_lid, sa->sa_nstacks)); cpu_setfunc(l2, sa_switchcall, l2); error = sa_upcall0(l2, SA_UPCALL_BLOCKED, l, NULL, 0, NULL, sau, &st); if (error) { #ifdef DIAGNOSTIC printf("sa_switch(%d.%d): Error %d from sa_upcall()\n", p->p_pid, l->l_lid, error); #endif goto sa_upcall_failed; } /* * Perform the double/upcall pagefault check. * We do this only here since we need l's ucontext to * get l's userspace stack. sa_upcall0 above has saved * it for us. * The L_SA_PAGEFAULT flag is set in the MD * pagefault code to indicate a pagefault. The MD * pagefault code also saves the faultaddr for us. */ if ((l->l_flag & L_SA_PAGEFAULT) && sa_pagefault(l, &sau->sau_state.captured.e_ctx) != 0) { sadata_upcall_free(sau); sa->sa_stacks[sa->sa_nstacks++] = st; sa_putcachelwp(p, l2); PRELE(l2); /* Remove the artificial hold-count */ mi_switch(l, NULL); return; } SIMPLEQ_INSERT_TAIL(&sa->sa_upcalls, sau, sau_next); l2->l_flag |= L_SA_UPCALL; l->l_flag |= L_SA_BLOCKING; l->l_upcallstack = st.ss_sp; l2->l_priority = l2->l_usrpri; sa->sa_vp = l2; setrunnable(l2); PRELE(l2); /* Remove the artificial hold-count */ KDASSERT(l2 != l); } else if (sa->sa_vp != NULL) { /* * Case 2: We've been woken up while another LWP was * on the VP, but we're going back to sleep without * having returned to userland and delivering the * SA_UNBLOCKED upcall (select and poll cause this * kind of behavior a lot). We just switch back to the * LWP that had been running and let it have another * go. If the LWP on the VP was idling, don't make it * run again, though. */ if (sa->sa_idle) l2 = NULL; else { l2 = sa->sa_vp; /* XXXUPSXXX Unfair advantage for l2 ? */ if((l2->l_stat != LSRUN) || ((l2->l_flag & L_INMEM) == 0)) l2 = NULL; } } else { #if 0 /* * Case 3: The VP is empty. As in case 2, we were * woken up and called tsleep again, but additionally, * the running LWP called sa_yield() between our wakeup() and * when we got to run again (in fact, it probably was * responsible for switching to us, via sa_woken). * The right thing is to pull a LWP off the cache and have * it jump straight back into sa_yield. */ l2 = sa_getcachelwp(p); if (l2 == NULL) { #ifdef DIAGNOSTIC printf("sa_switch(%d.%d): no cached LWP for reidling.\n", p->p_pid, l->l_lid); #endif mi_switch(l, NULL); return; } #else mi_switch(l, NULL); return; #endif sa_upcall_failed: #if 0 cpu_setfunc(l2, sa_yieldcall, l2); l2->l_priority = l2->l_usrpri; setrunnable(l2); PRELE(l2); /* Remove the artificial hold-count */ #else /* sa_putcachelwp does not block because we have a hold count on l2 */ sa_putcachelwp(p, l2); PRELE(l2); /* Remove the artificial hold-count */ mi_switch(l, NULL); return; #endif } DPRINTFN(4,("sa_switch(%d.%d) switching to LWP %d.\n", p->p_pid, l->l_lid, l2 ? l2->l_lid : 0)); mi_switch(l, l2); DPRINTFN(4,("sa_switch(%d.%d flag %x) returned.\n", p->p_pid, l->l_lid, l->l_flag)); KDASSERT(l->l_wchan == 0); SCHED_ASSERT_UNLOCKED(); if (sa->sa_woken == l) sa->sa_woken = NULL; /* * The process is trying to exit. In this case, the last thing * we want to do is put something back on the cache list. * It's also not useful to make the upcall at all, so just punt. */ if (p->p_flag & P_WEXIT) return; /* * Okay, now we've been woken up. This means that it's time * for a SA_UNBLOCKED upcall when we get back to userlevel */ l->l_flag |= L_SA_UPCALL; } void sa_switchcall(void *arg) { struct lwp *l; struct proc *p; struct sadata *sa; int f; l = arg; p = l->l_proc; sa = p->p_sa; sa->sa_vp = l; DPRINTFN(6,("sa_switchcall(%d.%d)\n", p->p_pid, l->l_lid)); if (LIST_EMPTY(&sa->sa_lwpcache)) { /* Allocate the next cache LWP */ DPRINTFN(6,("sa_switchcall(%d.%d) allocating LWP\n", p->p_pid, l->l_lid)); SA_LWP_STATE_LOCK(l, f); sa_newcachelwp(l); SA_LWP_STATE_UNLOCK(l, f); } upcallret(l); } #if 0 void sa_yieldcall(void *arg) { struct lwp *l; struct proc *p; struct sadata *sa; l = arg; p = l->l_proc; sa = p->p_sa; sa->sa_vp = l; DPRINTFN(6,("sa_yieldcall(%d.%d)\n", p->p_pid, l->l_lid)); if (LIST_EMPTY(&sa->sa_lwpcache)) { /* Allocate the next cache LWP */ DPRINTFN(6,("sa_yieldcall(%d.%d) allocating LWP\n", p->p_pid, l->l_lid)); sa_newcachelwp(l); } sa_yield(l); upcallret(l); } #endif static int sa_newcachelwp(struct lwp *l) { struct proc *p; struct lwp *l2; vaddr_t uaddr; boolean_t inmem; int s; p = l->l_proc; inmem = uvm_uarea_alloc(&uaddr); if (__predict_false(uaddr == 0)) { return (ENOMEM); } else { newlwp(l, p, uaddr, inmem, 0, NULL, 0, child_return, 0, &l2); /* We don't want this LWP on the process's main LWP list, but * newlwp helpfully puts it there. Unclear if newlwp should * be tweaked. */ SCHED_LOCK(s); sa_putcachelwp(p, l2); SCHED_UNLOCK(s); } return (0); } /* * Take a normal process LWP and place it in the SA cache. * LWP must not be running! */ void sa_putcachelwp(struct proc *p, struct lwp *l) { struct sadata *sa; SCHED_ASSERT_LOCKED(); sa = p->p_sa; LIST_REMOVE(l, l_sibling); p->p_nlwps--; l->l_stat = LSSUSPENDED; l->l_flag |= (L_DETACHED | L_SA); PHOLD(l); /* XXX lock sadata */ DPRINTFN(5,("sa_putcachelwp(%d.%d) Adding LWP %d to cache\n", p->p_pid, curlwp->l_lid, l->l_lid)); LIST_INSERT_HEAD(&sa->sa_lwpcache, l, l_sibling); sa->sa_ncached++; /* XXX unlock */ } /* * Fetch a LWP from the cache. */ struct lwp * sa_getcachelwp(struct proc *p) { struct sadata *sa; struct lwp *l; SCHED_ASSERT_LOCKED(); l = NULL; sa = p->p_sa; /* XXX lock sadata */ if (sa->sa_ncached > 0) { sa->sa_ncached--; l = LIST_FIRST(&sa->sa_lwpcache); LIST_REMOVE(l, l_sibling); LIST_INSERT_HEAD(&p->p_lwps, l, l_sibling); p->p_nlwps++; DPRINTFN(5,("sa_getcachelwp(%d.%d) Got LWP %d from cache.\n", p->p_pid, curlwp->l_lid, l->l_lid)); } /* XXX unlock */ return l; } void sa_upcall_userret(struct lwp *l) { struct proc *p; struct sadata *sa; struct sa_t **sapp, *sap; struct sadata_upcall *sau; struct sa_t self_sa; struct sa_t *sas[3]; stack_t st; void *stack, *ap; ucontext_t u, *up; int f, i, nsas, nint, nevents, sig, s, type; p = l->l_proc; sa = p->p_sa; SCHED_ASSERT_UNLOCKED(); KERNEL_PROC_LOCK(l); SA_LWP_STATE_LOCK(l, f); DPRINTFN(7,("sa_upcall_userret(%d.%d %x) \n", p->p_pid, l->l_lid, l->l_flag)); while (l->l_upcallstack != NULL) { if (l->l_upcallstack == (void *)-1) { SCHED_LOCK(s); l->l_flag &= ~(L_SA_UPCALL|L_SA_BLOCKING); l->l_upcallstack = NULL; p->p_nrlwps--; sa_putcachelwp(p, l); SA_LWP_STATE_UNLOCK(l, f); KERNEL_PROC_UNLOCK(l); mi_switch(l, NULL); /* mostly NOTREACHED */ SCHED_ASSERT_UNLOCKED(); splx(s); KERNEL_PROC_LOCK(l); KDASSERT(p->p_flag & P_WEXIT); lwp_exit(l); } if ((l->l_flag & L_SA_BLOCKING) == 0) { l->l_upcallstack = NULL; break; } tsleep((caddr_t) &l->l_upcallstack, PWAIT, "saunblock", 0); if (p->p_flag & P_WEXIT) lwp_exit(l); } if (l->l_flag & L_SA_BLOCKING) { /* Invoke an "unblocked" upcall */ struct lwp *l2; DPRINTFN(8,("sa_upcall_userret(%d.%d) unblocking\n", p->p_pid, l->l_lid)); sau = sadata_upcall_alloc(1); sau->sau_arg = NULL; if (p->p_flag & P_WEXIT) { sadata_upcall_free(sau); lwp_exit(l); } SCHED_ASSERT_UNLOCKED(); l2 = sa_vp_repossess(l); KDASSERT(sa->sa_nstacks > 0); st = sa->sa_stacks[--sa->sa_nstacks]; SCHED_ASSERT_UNLOCKED(); if (l2 == NULL) { sadata_upcall_free(sau); /* No need to put st back */ lwp_exit(l); } DPRINTFN(9,("sa_upcall_userret(%d.%d) nstacks-- = %2d\n", l->l_proc->p_pid, l->l_lid, sa->sa_nstacks)); if (sa_upcall0(l, SA_UPCALL_UNBLOCKED | SA_UPCALL_DEFER, l, l2, 0, NULL, sau, &st) != 0) { /* * We were supposed to deliver an UNBLOCKED * upcall, but don't have resources to do so. */ #ifdef DIAGNOSTIC printf("sa_upcall_userret: out of upcall resources" " for %d.%d\n", p->p_pid, l->l_lid); #endif sigexit(l, SIGABRT); /* NOTREACHED */ } SIMPLEQ_INSERT_TAIL(&sa->sa_upcalls, sau, sau_next); l->l_flag |= L_SA_UPCALL; l->l_flag &= ~L_SA_BLOCKING; /* We migth have sneaked past signal handling and userret */ SA_LWP_STATE_UNLOCK(l, f); KERNEL_PROC_UNLOCK(l); /* take pending signals */ while ((sig = CURSIG(l)) != 0) postsig(sig); /* Invoke per-process kernel-exit handling, if any */ if (p->p_userret) (p->p_userret)(l, p->p_userret_arg); KERNEL_PROC_LOCK(l); SA_LWP_STATE_LOCK(l, f); } KDASSERT(sa->sa_vp == l); if (SIMPLEQ_EMPTY(&sa->sa_upcalls)) { l->l_flag &= ~L_SA_UPCALL; sa_vp_donate(l); SA_LWP_STATE_UNLOCK(l, f); KERNEL_PROC_UNLOCK(l); return; } sau = SIMPLEQ_FIRST(&sa->sa_upcalls); SIMPLEQ_REMOVE_HEAD(&sa->sa_upcalls, sau_next); if (sau->sau_flags & SAU_FLAG_DEFERRED) { sa_upcall_getstate(sau, sau->sau_state.deferred.e_lwp, sau->sau_state.deferred.i_lwp); } stack = (void *) (((uintptr_t)sau->sau_stack.ss_sp + sau->sau_stack.ss_size) & ~ALIGNBYTES); self_sa.sa_id = l->l_lid; self_sa.sa_cpu = 0; /* XXX l->l_cpu; */ sas[0] = &self_sa; nsas = 1; nevents = 0; nint = 0; if (sau->sau_state.captured.e_sa.sa_context != NULL) { if (copyout(&sau->sau_state.captured.e_ctx, sau->sau_state.captured.e_sa.sa_context, sizeof(ucontext_t)) != 0) { #ifdef DIAGNOSTIC printf("sa_upcall_userret(%d.%d): couldn't copyout" " context of event LWP %d\n", p->p_pid, l->l_lid, sau->sau_state.captured.e_sa.sa_id); #endif sigexit(l, SIGILL); /* NOTREACHED */ } sas[nsas] = &sau->sau_state.captured.e_sa; nsas++; nevents = 1; } if (sau->sau_state.captured.i_sa.sa_context != NULL) { KDASSERT(sau->sau_state.captured.i_sa.sa_context != sau->sau_state.captured.e_sa.sa_context); if (copyout(&sau->sau_state.captured.i_ctx, sau->sau_state.captured.i_sa.sa_context, sizeof(ucontext_t)) != 0) { #ifdef DIAGNOSTIC printf("sa_upcall_userret(%d.%d): couldn't copyout" " context of interrupted LWP %d\n", p->p_pid, l->l_lid, sau->sau_state.captured.i_sa.sa_id); #endif sigexit(l, SIGILL); /* NOTREACHED */ } sas[nsas] = &sau->sau_state.captured.i_sa; nsas++; nint = 1; } /* Copy out the activation's ucontext */ u.uc_stack = sau->sau_stack; u.uc_flags = _UC_STACK; up = stack; up--; if (copyout(&u, up, sizeof(ucontext_t)) != 0) { sadata_upcall_free(sau); #ifdef DIAGNOSTIC printf("sa_upcall_userret: couldn't copyout activation" " ucontext for %d.%d\n", l->l_proc->p_pid, l->l_lid); #endif sigexit(l, SIGILL); /* NOTREACHED */ } sas[0]->sa_context = up; /* Next, copy out the sa_t's and pointers to them. */ sap = (struct sa_t *) up; sapp = (struct sa_t **) (sap - nsas); for (i = nsas - 1; i >= 0; i--) { sap--; sapp--; if ((copyout(sas[i], sap, sizeof(struct sa_t)) != 0) || (copyout(&sap, sapp, sizeof(struct sa_t *)) != 0)) { /* Copying onto the stack didn't work. Die. */ sadata_upcall_free(sau); #ifdef DIAGNOSTIC printf("sa_upcall_userret: couldn't copyout sa_t " "%d for %d.%d\n", i, p->p_pid, l->l_lid); #endif sigexit(l, SIGILL); /* NOTREACHED */ } } /* Copy out the arg, if any */ /* xxx assume alignment works out; everything so far has been * a structure, so... */ if (sau->sau_arg) { ap = (char *)sapp - sau->sau_argsize; stack = ap; if (copyout(sau->sau_arg, ap, sau->sau_argsize) != 0) { /* Copying onto the stack didn't work. Die. */ sadata_upcall_free(sau); #ifdef DIAGNOSTIC printf("sa_upcall_userret(%d.%d): couldn't copyout" " sadata_upcall arg %p size %ld to %p \n", p->p_pid, l->l_lid, sau->sau_arg, (long) sau->sau_argsize, ap); #endif sigexit(l, SIGILL); /* NOTREACHED */ } } else { ap = 0; stack = sapp; } type = sau->sau_type; sadata_upcall_free(sau); DPRINTFN(7,("sa_upcall_userret(%d.%d): type %d\n",p->p_pid, l->l_lid, type)); cpu_upcall(l, type, nevents, nint, sapp, ap, stack, sa->sa_upcall); if (SIMPLEQ_EMPTY(&sa->sa_upcalls)) { l->l_flag &= ~L_SA_UPCALL; sa_vp_donate(l); /* May not be reached */ } /* May not be reached */ SA_LWP_STATE_UNLOCK(l, f); KERNEL_PROC_UNLOCK(l); } #if 0 static struct lwp * sa_vp_repossess(struct lwp *l) { struct lwp *l2; struct proc *p = l->l_proc; struct sadata *sa = p->p_sa; int s; /* * Put ourselves on the virtual processor and note that the * previous occupant of that position was interrupted. */ l2 = sa->sa_vp; sa->sa_vp = l; if (sa->sa_idle == l2) sa->sa_idle = NULL; KDASSERT(l2 != l); if (l2) { SCHED_LOCK(s); switch (l2->l_stat) { case LSRUN: remrunqueue(l2); p->p_nrlwps--; break; case LSSLEEP: unsleep(l2); l2->l_flag &= ~L_SINTR; break; #ifdef DIAGNOSTIC default: panic("SA VP %d.%d is in state %d, not running" " or sleeping\n", p->p_pid, l2->l_lid, l2->l_stat); #endif } sa_putcachelwp(p, l2); /* * XXX SMP race! Need to be sure that l2's state is * captured before the upcall before we make it possible * for another processor to grab it. */ SCHED_UNLOCK(s); } return l2; } #endif static struct lwp * sa_vp_repossess(struct lwp *l) { struct lwp *l2; struct proc *p = l->l_proc; struct sadata *sa = p->p_sa; int s; SCHED_ASSERT_UNLOCKED(); l->l_flag |= L_SA_WANTS_VP; sa->sa_vp_wait_count++; if(sa->sa_idle != NULL) { /* XXXUPSXXX Simple but slow */ wakeup(sa->sa_idle); } else { SCHED_LOCK(s); sa->sa_vp->l_flag |= L_SA_UPCALL; /* kick the process */ signotify(p); SCHED_UNLOCK(s); } SCHED_ASSERT_UNLOCKED(); DPRINTFN(1,("sa_vp_repossess(%d.%d): want vp\n", p->p_pid, l->l_lid)); while(sa->sa_vp != l) { tsleep((caddr_t) l, PWAIT, "saprocessor", 0); /* XXXUPSXXX NEED TO STOP THE LWP HERE ON REQUEST ??? */ if (p->p_flag & P_WEXIT) { l->l_flag &= ~L_SA_WANTS_VP; sa->sa_vp_wait_count--; return 0; } } DPRINTFN(1,("sa_vp_repossess(%d.%d): on vp\n", p->p_pid, l->l_lid)); l2 = sa->sa_old_lwp; return l2; } static void sa_vp_donate(struct lwp *l) { struct proc *p = l->l_proc; struct sadata *sa = p->p_sa; struct lwp *l2; int s; SCHED_ASSERT_UNLOCKED(); /* Nobody wants the vp */ if (sa->sa_vp_wait_count == 0) return; /* No stack for an unblock call */ if (sa->sa_nstacks == 0) return; LIST_FOREACH(l2, &p->p_lwps, l_sibling) { if(l2->l_flag & L_SA_WANTS_VP) { SCHED_LOCK(s); sa_putcachelwp(p, l); sa->sa_vp = l2; sa->sa_vp_wait_count--; l2->l_flag &= ~L_SA_WANTS_VP; sa->sa_old_lwp = l; sched_wakeup((caddr_t) l2); KERNEL_PROC_UNLOCK(l); if((l2->l_stat == LSRUN) && ((l2->l_flag & L_INMEM) != 0)) mi_switch(l,l2); else mi_switch(l,NULL); /* * This isn't quite a NOTREACHED; we may get here if * the process exits before this LWP is reused. In * that case, we want to call lwp_exit(), which will * be done by the userret() hooks. */ SCHED_ASSERT_UNLOCKED(); splx(s); KERNEL_PROC_LOCK(l); KDASSERT(p->p_flag & P_WEXIT); /* mostly NOTREACHED */ lwp_exit(l); } } #ifdef DIAGNOSTIC printf("sa_vp_donate couldn't find someone to donate the CPU to \n"); #endif } #ifdef DEBUG int debug_print_sa(struct proc *); int debug_print_lwp(struct lwp *); int debug_print_proc(int); int debug_print_proc(int pid) { struct proc *p; p = pfind(pid); if (p == NULL) printf("No process %d\n", pid); else debug_print_sa(p); return 0; } int debug_print_sa(struct proc *p) { struct lwp *l; struct sadata *sa; printf("Process %d (%s), state %d, address %p, flags %x\n", p->p_pid, p->p_comm, p->p_stat, p, p->p_flag); printf("LWPs: %d (%d running, %d zombies)\n", p->p_nlwps, p->p_nrlwps, p->p_nzlwps); LIST_FOREACH(l, &p->p_lwps, l_sibling) debug_print_lwp(l); sa = p->p_sa; if (sa) { if (sa->sa_vp) printf("SA VP: %d\n", sa->sa_vp->l_lid); if (sa->sa_idle) printf("SA idle: %d\n", sa->sa_idle->l_lid); printf("SAs: %d cached LWPs\n", sa->sa_ncached); printf("%d upcall stacks\n", sa->sa_nstacks); LIST_FOREACH(l, &sa->sa_lwpcache, l_sibling) debug_print_lwp(l); } return 0; } int debug_print_lwp(struct lwp *l) { struct proc *p; p = l->l_proc; printf("LWP %d address %p ", l->l_lid, l); printf("state %d flags %x ", l->l_stat, l->l_flag); if (l->l_wchan) printf("wait %p %s", l->l_wchan, l->l_wmesg); printf("\n"); return 0; } #endif