2a9368a7e7
a better chance at properly converting the priority. Noted by & okay yamt@.
643 lines
15 KiB
C
643 lines
15 KiB
C
/* $NetBSD: linux_sched.c,v 1.50 2008/02/29 14:29:06 elad Exp $ */
|
|
|
|
/*-
|
|
* Copyright (c) 1999 The NetBSD Foundation, Inc.
|
|
* All rights reserved.
|
|
*
|
|
* This code is derived from software contributed to The NetBSD Foundation
|
|
* by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
|
|
* NASA Ames Research Center; by Matthias Scheler.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
* 3. All advertising materials mentioning features or use of this software
|
|
* must display the following acknowledgement:
|
|
* This product includes software developed by the NetBSD
|
|
* Foundation, Inc. and its contributors.
|
|
* 4. Neither the name of The NetBSD Foundation nor the names of its
|
|
* contributors may be used to endorse or promote products derived
|
|
* from this software without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
|
|
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
|
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
|
|
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
* POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
/*
|
|
* Linux compatibility module. Try to deal with scheduler related syscalls.
|
|
*/
|
|
|
|
#include <sys/cdefs.h>
|
|
__KERNEL_RCSID(0, "$NetBSD: linux_sched.c,v 1.50 2008/02/29 14:29:06 elad Exp $");
|
|
|
|
#include <sys/param.h>
|
|
#include <sys/mount.h>
|
|
#include <sys/proc.h>
|
|
#include <sys/systm.h>
|
|
#include <sys/sysctl.h>
|
|
#include <sys/malloc.h>
|
|
#include <sys/syscallargs.h>
|
|
#include <sys/wait.h>
|
|
#include <sys/kauth.h>
|
|
#include <sys/ptrace.h>
|
|
|
|
#include <sys/cpu.h>
|
|
|
|
#include <compat/linux/common/linux_types.h>
|
|
#include <compat/linux/common/linux_signal.h>
|
|
#include <compat/linux/common/linux_machdep.h> /* For LINUX_NPTL */
|
|
#include <compat/linux/common/linux_emuldata.h>
|
|
#include <compat/linux/common/linux_ipc.h>
|
|
#include <compat/linux/common/linux_sem.h>
|
|
|
|
#include <compat/linux/linux_syscallargs.h>
|
|
|
|
#include <compat/linux/common/linux_sched.h>
|
|
|
|
int
|
|
linux_sys_clone(struct lwp *l, const struct linux_sys_clone_args *uap, register_t *retval)
|
|
{
|
|
/* {
|
|
syscallarg(int) flags;
|
|
syscallarg(void *) stack;
|
|
#ifdef LINUX_NPTL
|
|
syscallarg(void *) parent_tidptr;
|
|
syscallarg(void *) child_tidptr;
|
|
#endif
|
|
} */
|
|
int flags, sig;
|
|
int error;
|
|
#ifdef LINUX_NPTL
|
|
struct linux_emuldata *led;
|
|
#endif
|
|
|
|
/*
|
|
* We don't support the Linux CLONE_PID or CLONE_PTRACE flags.
|
|
*/
|
|
if (SCARG(uap, flags) & (LINUX_CLONE_PID|LINUX_CLONE_PTRACE))
|
|
return (EINVAL);
|
|
|
|
/*
|
|
* Thread group implies shared signals. Shared signals
|
|
* imply shared VM. This matches what Linux kernel does.
|
|
*/
|
|
if (SCARG(uap, flags) & LINUX_CLONE_THREAD
|
|
&& (SCARG(uap, flags) & LINUX_CLONE_SIGHAND) == 0)
|
|
return (EINVAL);
|
|
if (SCARG(uap, flags) & LINUX_CLONE_SIGHAND
|
|
&& (SCARG(uap, flags) & LINUX_CLONE_VM) == 0)
|
|
return (EINVAL);
|
|
|
|
flags = 0;
|
|
|
|
if (SCARG(uap, flags) & LINUX_CLONE_VM)
|
|
flags |= FORK_SHAREVM;
|
|
if (SCARG(uap, flags) & LINUX_CLONE_FS)
|
|
flags |= FORK_SHARECWD;
|
|
if (SCARG(uap, flags) & LINUX_CLONE_FILES)
|
|
flags |= FORK_SHAREFILES;
|
|
if (SCARG(uap, flags) & LINUX_CLONE_SIGHAND)
|
|
flags |= FORK_SHARESIGS;
|
|
if (SCARG(uap, flags) & LINUX_CLONE_VFORK)
|
|
flags |= FORK_PPWAIT;
|
|
|
|
sig = SCARG(uap, flags) & LINUX_CLONE_CSIGNAL;
|
|
if (sig < 0 || sig >= LINUX__NSIG)
|
|
return (EINVAL);
|
|
sig = linux_to_native_signo[sig];
|
|
|
|
#ifdef LINUX_NPTL
|
|
led = (struct linux_emuldata *)l->l_proc->p_emuldata;
|
|
|
|
led->parent_tidptr = SCARG(uap, parent_tidptr);
|
|
led->child_tidptr = SCARG(uap, child_tidptr);
|
|
led->clone_flags = SCARG(uap, flags);
|
|
#endif /* LINUX_NPTL */
|
|
|
|
/*
|
|
* Note that Linux does not provide a portable way of specifying
|
|
* the stack area; the caller must know if the stack grows up
|
|
* or down. So, we pass a stack size of 0, so that the code
|
|
* that makes this adjustment is a noop.
|
|
*/
|
|
if ((error = fork1(l, flags, sig, SCARG(uap, stack), 0,
|
|
NULL, NULL, retval, NULL)) != 0)
|
|
return error;
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* linux realtime priority
|
|
*
|
|
* - SCHED_RR and SCHED_FIFO tasks have priorities [1,99].
|
|
*
|
|
* - SCHED_OTHER tasks don't have realtime priorities.
|
|
* in particular, sched_param::sched_priority is always 0.
|
|
*/
|
|
|
|
#define LINUX_SCHED_RTPRIO_MIN 1
|
|
#define LINUX_SCHED_RTPRIO_MAX 99
|
|
|
|
static int
|
|
sched_linux2native(int linux_policy, struct linux_sched_param *linux_params,
|
|
int *native_policy, struct sched_param *native_params)
|
|
{
|
|
|
|
switch (linux_policy) {
|
|
case LINUX_SCHED_OTHER:
|
|
if (native_policy != NULL) {
|
|
*native_policy = SCHED_OTHER;
|
|
}
|
|
break;
|
|
|
|
case LINUX_SCHED_FIFO:
|
|
if (native_policy != NULL) {
|
|
*native_policy = SCHED_FIFO;
|
|
}
|
|
break;
|
|
|
|
case LINUX_SCHED_RR:
|
|
if (native_policy != NULL) {
|
|
*native_policy = SCHED_RR;
|
|
}
|
|
break;
|
|
|
|
default:
|
|
return EINVAL;
|
|
}
|
|
|
|
if (linux_params != NULL) {
|
|
int prio = linux_params->sched_priority;
|
|
|
|
KASSERT(native_params != NULL);
|
|
|
|
if (linux_policy == LINUX_SCHED_OTHER) {
|
|
if (prio != 0) {
|
|
return EINVAL;
|
|
}
|
|
native_params->sched_priority = PRI_NONE; /* XXX */
|
|
} else {
|
|
if (prio < LINUX_SCHED_RTPRIO_MIN ||
|
|
prio > LINUX_SCHED_RTPRIO_MAX) {
|
|
return EINVAL;
|
|
}
|
|
native_params->sched_priority =
|
|
(prio - LINUX_SCHED_RTPRIO_MIN)
|
|
* (SCHED_PRI_MAX - SCHED_PRI_MIN)
|
|
/ (LINUX_SCHED_RTPRIO_MAX - LINUX_SCHED_RTPRIO_MIN)
|
|
+ SCHED_PRI_MIN;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
sched_native2linux(int native_policy, struct sched_param *native_params,
|
|
int *linux_policy, struct linux_sched_param *linux_params)
|
|
{
|
|
|
|
switch (native_policy) {
|
|
case SCHED_OTHER:
|
|
if (linux_policy != NULL) {
|
|
*linux_policy = LINUX_SCHED_OTHER;
|
|
}
|
|
break;
|
|
|
|
case SCHED_FIFO:
|
|
if (linux_policy != NULL) {
|
|
*linux_policy = LINUX_SCHED_FIFO;
|
|
}
|
|
break;
|
|
|
|
case SCHED_RR:
|
|
if (linux_policy != NULL) {
|
|
*linux_policy = LINUX_SCHED_RR;
|
|
}
|
|
break;
|
|
|
|
default:
|
|
panic("%s: unknown policy %d\n", __func__, native_policy);
|
|
}
|
|
|
|
if (native_params != NULL) {
|
|
int prio = native_params->sched_priority;
|
|
|
|
KASSERT(prio >= SCHED_PRI_MIN);
|
|
KASSERT(prio <= SCHED_PRI_MAX);
|
|
KASSERT(linux_params != NULL);
|
|
|
|
if (native_policy == SCHED_OTHER) {
|
|
linux_params->sched_priority = 0;
|
|
} else {
|
|
linux_params->sched_priority =
|
|
(prio - SCHED_PRI_MIN)
|
|
* (LINUX_SCHED_RTPRIO_MAX - LINUX_SCHED_RTPRIO_MIN)
|
|
/ (SCHED_PRI_MAX - SCHED_PRI_MIN)
|
|
+ LINUX_SCHED_RTPRIO_MIN;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int
|
|
linux_sys_sched_setparam(struct lwp *l, const struct linux_sys_sched_setparam_args *uap, register_t *retval)
|
|
{
|
|
/* {
|
|
syscallarg(linux_pid_t) pid;
|
|
syscallarg(const struct linux_sched_param *) sp;
|
|
} */
|
|
int error, policy;
|
|
struct linux_sched_param lp;
|
|
struct sched_param sp;
|
|
|
|
if (SCARG(uap, pid) < 0 || SCARG(uap, sp) == NULL) {
|
|
error = EINVAL;
|
|
goto out;
|
|
}
|
|
|
|
error = copyin(SCARG(uap, sp), &lp, sizeof(lp));
|
|
if (error)
|
|
goto out;
|
|
|
|
/* We need the current policy in Linux terms. */
|
|
error = do_sched_getparam(SCARG(uap, pid), 0, &policy, NULL);
|
|
if (error)
|
|
goto out;
|
|
error = sched_native2linux(policy, NULL, &policy, NULL);
|
|
if (error)
|
|
goto out;
|
|
|
|
error = sched_linux2native(policy, &lp, &policy, &sp);
|
|
if (error)
|
|
goto out;
|
|
|
|
error = do_sched_setparam(SCARG(uap, pid), 0, policy, &sp);
|
|
if (error)
|
|
goto out;
|
|
|
|
out:
|
|
return error;
|
|
}
|
|
|
|
int
|
|
linux_sys_sched_getparam(struct lwp *l, const struct linux_sys_sched_getparam_args *uap, register_t *retval)
|
|
{
|
|
/* {
|
|
syscallarg(linux_pid_t) pid;
|
|
syscallarg(struct linux_sched_param *) sp;
|
|
} */
|
|
struct linux_sched_param lp;
|
|
struct sched_param sp;
|
|
int error, policy;
|
|
|
|
if (SCARG(uap, pid) < 0 || SCARG(uap, sp) == NULL) {
|
|
error = EINVAL;
|
|
goto out;
|
|
}
|
|
|
|
error = do_sched_getparam(SCARG(uap, pid), 0, &policy, &sp);
|
|
if (error)
|
|
goto out;
|
|
|
|
error = sched_native2linux(policy, &sp, NULL, &lp);
|
|
if (error)
|
|
goto out;
|
|
|
|
error = copyout(&lp, SCARG(uap, sp), sizeof(lp));
|
|
if (error)
|
|
goto out;
|
|
|
|
out:
|
|
return error;
|
|
}
|
|
|
|
int
|
|
linux_sys_sched_setscheduler(struct lwp *l, const struct linux_sys_sched_setscheduler_args *uap, register_t *retval)
|
|
{
|
|
/* {
|
|
syscallarg(linux_pid_t) pid;
|
|
syscallarg(int) policy;
|
|
syscallarg(cont struct linux_sched_scheduler *) sp;
|
|
} */
|
|
int error, policy;
|
|
struct linux_sched_param lp;
|
|
struct sched_param sp;
|
|
|
|
if (SCARG(uap, pid) < 0 || SCARG(uap, sp) == NULL) {
|
|
error = EINVAL;
|
|
goto out;
|
|
}
|
|
|
|
error = copyin(SCARG(uap, sp), &lp, sizeof(lp));
|
|
if (error)
|
|
goto out;
|
|
|
|
error = sched_linux2native(SCARG(uap, policy), &lp, &policy, &sp);
|
|
if (error)
|
|
goto out;
|
|
|
|
error = do_sched_setparam(SCARG(uap, pid), 0, policy, &sp);
|
|
if (error)
|
|
goto out;
|
|
|
|
out:
|
|
return error;
|
|
}
|
|
|
|
int
|
|
linux_sys_sched_getscheduler(struct lwp *l, const struct linux_sys_sched_getscheduler_args *uap, register_t *retval)
|
|
{
|
|
/* {
|
|
syscallarg(linux_pid_t) pid;
|
|
} */
|
|
int error, policy;
|
|
|
|
*retval = -1;
|
|
|
|
error = do_sched_getparam(SCARG(uap, pid), 0, &policy, NULL);
|
|
if (error)
|
|
goto out;
|
|
|
|
error = sched_native2linux(policy, NULL, &policy, NULL);
|
|
if (error)
|
|
goto out;
|
|
|
|
*retval = policy;
|
|
|
|
out:
|
|
return error;
|
|
}
|
|
|
|
int
|
|
linux_sys_sched_yield(struct lwp *l, const void *v, register_t *retval)
|
|
{
|
|
|
|
yield();
|
|
return 0;
|
|
}
|
|
|
|
int
|
|
linux_sys_sched_get_priority_max(struct lwp *l, const struct linux_sys_sched_get_priority_max_args *uap, register_t *retval)
|
|
{
|
|
/* {
|
|
syscallarg(int) policy;
|
|
} */
|
|
|
|
/*
|
|
* We can't emulate anything put the default scheduling policy.
|
|
*/
|
|
if (SCARG(uap, policy) != LINUX_SCHED_OTHER) {
|
|
*retval = -1;
|
|
return EINVAL;
|
|
}
|
|
|
|
*retval = 0;
|
|
return 0;
|
|
}
|
|
|
|
int
|
|
linux_sys_sched_get_priority_min(struct lwp *l, const struct linux_sys_sched_get_priority_min_args *uap, register_t *retval)
|
|
{
|
|
/* {
|
|
syscallarg(int) policy;
|
|
} */
|
|
|
|
/*
|
|
* We can't emulate anything put the default scheduling policy.
|
|
*/
|
|
if (SCARG(uap, policy) != LINUX_SCHED_OTHER) {
|
|
*retval = -1;
|
|
return EINVAL;
|
|
}
|
|
|
|
*retval = 0;
|
|
return 0;
|
|
}
|
|
|
|
#ifndef __m68k__
|
|
/* Present on everything but m68k */
|
|
int
|
|
linux_sys_exit_group(struct lwp *l, const struct linux_sys_exit_group_args *uap, register_t *retval)
|
|
{
|
|
#ifdef LINUX_NPTL
|
|
/* {
|
|
syscallarg(int) error_code;
|
|
} */
|
|
struct proc *p = l->l_proc;
|
|
struct linux_emuldata *led = p->p_emuldata;
|
|
struct linux_emuldata *e;
|
|
|
|
if (led->s->flags & LINUX_LES_USE_NPTL) {
|
|
|
|
#ifdef DEBUG_LINUX
|
|
printf("%s:%d, led->s->refs = %d\n", __func__, __LINE__,
|
|
led->s->refs);
|
|
#endif
|
|
|
|
/*
|
|
* The calling thread is supposed to kill all threads
|
|
* in the same thread group (i.e. all threads created
|
|
* via clone(2) with CLONE_THREAD flag set).
|
|
*
|
|
* If there is only one thread, things are quite simple
|
|
*/
|
|
if (led->s->refs == 1)
|
|
return sys_exit(l, (const void *)uap, retval);
|
|
|
|
#ifdef DEBUG_LINUX
|
|
printf("%s:%d\n", __func__, __LINE__);
|
|
#endif
|
|
|
|
led->s->flags |= LINUX_LES_INEXITGROUP;
|
|
led->s->xstat = W_EXITCODE(SCARG(uap, error_code), 0);
|
|
|
|
/*
|
|
* Kill all threads in the group. The emulation exit hook takes
|
|
* care of hiding the zombies and reporting the exit code
|
|
* properly.
|
|
*/
|
|
mutex_enter(&proclist_mutex);
|
|
LIST_FOREACH(e, &led->s->threads, threads) {
|
|
if (e->proc == p)
|
|
continue;
|
|
|
|
#ifdef DEBUG_LINUX
|
|
printf("%s: kill PID %d\n", __func__, e->proc->p_pid);
|
|
#endif
|
|
psignal(e->proc, SIGKILL);
|
|
}
|
|
|
|
/* Now, kill ourselves */
|
|
psignal(p, SIGKILL);
|
|
mutex_exit(&proclist_mutex);
|
|
|
|
return 0;
|
|
|
|
}
|
|
#endif /* LINUX_NPTL */
|
|
|
|
return sys_exit(l, (const void *)uap, retval);
|
|
}
|
|
#endif /* !__m68k__ */
|
|
|
|
#ifdef LINUX_NPTL
|
|
int
|
|
linux_sys_set_tid_address(struct lwp *l, const struct linux_sys_set_tid_address_args *uap, register_t *retval)
|
|
{
|
|
/* {
|
|
syscallarg(int *) tidptr;
|
|
} */
|
|
struct linux_emuldata *led;
|
|
|
|
led = (struct linux_emuldata *)l->l_proc->p_emuldata;
|
|
led->clear_tid = SCARG(uap, tid);
|
|
|
|
led->s->flags |= LINUX_LES_USE_NPTL;
|
|
|
|
*retval = l->l_proc->p_pid;
|
|
|
|
return 0;
|
|
}
|
|
|
|
/* ARGUSED1 */
|
|
int
|
|
linux_sys_gettid(struct lwp *l, const void *v, register_t *retval)
|
|
{
|
|
/* The Linux kernel does it exactly that way */
|
|
*retval = l->l_proc->p_pid;
|
|
return 0;
|
|
}
|
|
|
|
#ifdef LINUX_NPTL
|
|
/* ARGUSED1 */
|
|
int
|
|
linux_sys_getpid(struct lwp *l, const void *v, register_t *retval)
|
|
{
|
|
struct linux_emuldata *led = l->l_proc->p_emuldata;
|
|
|
|
if (led->s->flags & LINUX_LES_USE_NPTL) {
|
|
/* The Linux kernel does it exactly that way */
|
|
*retval = led->s->group_pid;
|
|
} else {
|
|
*retval = l->l_proc->p_pid;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/* ARGUSED1 */
|
|
int
|
|
linux_sys_getppid(struct lwp *l, const void *v, register_t *retval)
|
|
{
|
|
struct proc *p = l->l_proc;
|
|
struct linux_emuldata *led = p->p_emuldata;
|
|
struct proc *glp;
|
|
struct proc *pp;
|
|
|
|
if (led->s->flags & LINUX_LES_USE_NPTL) {
|
|
|
|
/* Find the thread group leader's parent */
|
|
if ((glp = pfind(led->s->group_pid)) == NULL) {
|
|
/* Maybe panic... */
|
|
printf("linux_sys_getppid: missing group leader PID"
|
|
" %d\n", led->s->group_pid);
|
|
return -1;
|
|
}
|
|
pp = glp->p_pptr;
|
|
|
|
/* If this is a Linux process too, return thread group PID */
|
|
if (pp->p_emul == p->p_emul) {
|
|
struct linux_emuldata *pled;
|
|
|
|
pled = pp->p_emuldata;
|
|
*retval = pled->s->group_pid;
|
|
} else {
|
|
*retval = pp->p_pid;
|
|
}
|
|
|
|
} else {
|
|
*retval = p->p_pptr->p_pid;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
#endif /* LINUX_NPTL */
|
|
|
|
int
|
|
linux_sys_sched_getaffinity(struct lwp *l, const struct linux_sys_sched_getaffinity_args *uap, register_t *retval)
|
|
{
|
|
/* {
|
|
syscallarg(pid_t) pid;
|
|
syscallarg(unsigned int) len;
|
|
syscallarg(unsigned long *) mask;
|
|
} */
|
|
int error;
|
|
int ret;
|
|
char *data;
|
|
int *retp;
|
|
|
|
if (SCARG(uap, mask) == NULL)
|
|
return EINVAL;
|
|
|
|
if (SCARG(uap, len) < sizeof(int))
|
|
return EINVAL;
|
|
|
|
if (pfind(SCARG(uap, pid)) == NULL)
|
|
return ESRCH;
|
|
|
|
/*
|
|
* return the actual number of CPU, tag all of them as available
|
|
* The result is a mask, the first CPU being in the least significant
|
|
* bit.
|
|
*/
|
|
ret = (1 << ncpu) - 1;
|
|
data = malloc(SCARG(uap, len), M_TEMP, M_WAITOK|M_ZERO);
|
|
retp = (int *)&data[SCARG(uap, len) - sizeof(ret)];
|
|
*retp = ret;
|
|
|
|
if ((error = copyout(data, SCARG(uap, mask), SCARG(uap, len))) != 0)
|
|
return error;
|
|
|
|
free(data, M_TEMP);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
int
|
|
linux_sys_sched_setaffinity(struct lwp *l, const struct linux_sys_sched_setaffinity_args *uap, register_t *retval)
|
|
{
|
|
/* {
|
|
syscallarg(pid_t) pid;
|
|
syscallarg(unsigned int) len;
|
|
syscallarg(unsigned long *) mask;
|
|
} */
|
|
|
|
if (pfind(SCARG(uap, pid)) == NULL)
|
|
return ESRCH;
|
|
|
|
/* Let's ignore it */
|
|
#ifdef DEBUG_LINUX
|
|
printf("linux_sys_sched_setaffinity\n");
|
|
#endif
|
|
return 0;
|
|
};
|
|
#endif /* LINUX_NPTL */
|