diff --git a/distrib/sets/lists/base/mi b/distrib/sets/lists/base/mi index 5261e504e65d..d88567808fa5 100644 --- a/distrib/sets/lists/base/mi +++ b/distrib/sets/lists/base/mi @@ -1,4 +1,4 @@ -# $NetBSD: mi,v 1.737 2008/01/09 11:25:59 simonb Exp $ +# $NetBSD: mi,v 1.738 2008/01/15 03:37:12 rmind Exp $ # # Note: don't delete entries from here - mark them as "obsolete" instead. # @@ -1270,6 +1270,7 @@ ./usr/sbin/setencstat base-sysutil-bin ./usr/sbin/setkey base-obsolete obsolete ./usr/sbin/setobjstat base-sysutil-bin +./usr/sbin/schedctl base-sysutil-bin ./usr/sbin/sliplogin base-slip-bin ./usr/sbin/slstats base-slip-bin ./usr/sbin/smrsh base-obsolete obsolete diff --git a/distrib/sets/lists/comp/mi b/distrib/sets/lists/comp/mi index e305e90d03c0..3407db8fb1d2 100644 --- a/distrib/sets/lists/comp/mi +++ b/distrib/sets/lists/comp/mi @@ -1,4 +1,4 @@ -# $NetBSD: mi,v 1.1108 2008/01/03 20:47:58 jdc Exp $ +# $NetBSD: mi,v 1.1109 2008/01/15 03:37:13 rmind Exp $ # # Note: don't delete entries from here - mark them as "obsolete" instead. # @@ -1893,6 +1893,7 @@ ./usr/include/sys/proc.h comp-c-include ./usr/include/sys/properties.h comp-obsolete obsolete ./usr/include/sys/protosw.h comp-c-include +./usr/include/sys/pset.h comp-c-include ./usr/include/sys/ptrace.h comp-c-include ./usr/include/sys/queue.h comp-c-include ./usr/include/sys/radioio.h comp-c-include diff --git a/lib/libc/gen/sysconf.c b/lib/libc/gen/sysconf.c index 26607f6f337e..4f8e1459a269 100644 --- a/lib/libc/gen/sysconf.c +++ b/lib/libc/gen/sysconf.c @@ -1,4 +1,4 @@ -/* $NetBSD: sysconf.c,v 1.25 2007/10/15 14:12:56 ad Exp $ */ +/* $NetBSD: sysconf.c,v 1.26 2008/01/15 03:37:14 rmind Exp $ */ /*- * Copyright (c) 1993 @@ -37,7 +37,7 @@ #if 0 static char sccsid[] = "@(#)sysconf.c 8.2 (Berkeley) 3/20/94"; #else -__RCSID("$NetBSD: sysconf.c,v 1.25 2007/10/15 14:12:56 ad Exp $"); +__RCSID("$NetBSD: sysconf.c,v 1.26 2008/01/15 03:37:14 rmind Exp $"); #endif #endif /* LIBC_SCCS and not lint */ @@ -321,6 +321,11 @@ sysconf(int name) NULL, NULL, NULL, SYSCTL_VERSION)) return -1; break; + case _SC_PRIORITY_SCHEDULING: + if (sysctlgetmibinfo("kern.posix_sched", &mib[0], &mib_len, + NULL, NULL, NULL, SYSCTL_VERSION)) + return -1; + goto yesno; case _SC_ATEXIT_MAX: mib[0] = CTL_USER; mib[1] = USER_ATEXIT_MAX; @@ -350,6 +355,23 @@ yesno: if (sysctl(mib, mib_len, &value, &len, NULL, 0) == -1) mib[1] = HW_NCPUONLINE; break; +/* Native */ + case _SC_SCHED_RT_TS: + if (sysctlgetmibinfo("kern.sched.rt_ts", &mib[0], &mib_len, + NULL, NULL, NULL, SYSCTL_VERSION)) + return -1; + break; + case _SC_SCHED_PRI_MIN: + if (sysctlgetmibinfo("kern.sched.pri_min", &mib[0], &mib_len, + NULL, NULL, NULL, SYSCTL_VERSION)) + return -1; + break; + case _SC_SCHED_PRI_MAX: + if (sysctlgetmibinfo("kern.sched.pri_max", &mib[0], &mib_len, + NULL, NULL, NULL, SYSCTL_VERSION)) + return -1; + break; + default: errno = EINVAL; return (-1); diff --git a/lib/libc/sys/Makefile.inc b/lib/libc/sys/Makefile.inc index 3e536813dbad..a476525fe3b0 100644 --- a/lib/libc/sys/Makefile.inc +++ b/lib/libc/sys/Makefile.inc @@ -1,4 +1,4 @@ -# $NetBSD: Makefile.inc,v 1.182 2007/12/14 21:52:17 yamt Exp $ +# $NetBSD: Makefile.inc,v 1.183 2008/01/15 03:37:14 rmind Exp $ # @(#)Makefile.inc 8.3 (Berkeley) 10/24/94 # sys sources @@ -113,7 +113,9 @@ WEAKASM= accept.S aio_suspend.S close.S connect.S execve.S \ kill.S mq_receive.S mq_send.S mq_timedreceive.S mq_timedsend.S \ msgrcv.S msgsnd.S __msync13.S \ nanosleep.S open.S poll.S pollts.S pselect.S read.S readlink.S readv.S \ - sched_yield.S select.S __sigprocmask14.S __sigsuspend14.S sysarch.S \ + _sched_setparam.S _sched_getparam.S _sched_setaffinity.S \ + _sched_getaffinity.S sched_yield.S \ + select.S __sigprocmask14.S __sigsuspend14.S sysarch.S \ wait4.S write.S writev.S NOERR= getegid.S geteuid.S getgid.S getpid.S getppid.S getuid.S \ diff --git a/lib/libc/sys/makelintstub b/lib/libc/sys/makelintstub index f688c45383a3..398d844e50b6 100755 --- a/lib/libc/sys/makelintstub +++ b/lib/libc/sys/makelintstub @@ -1,5 +1,5 @@ #!/bin/sh - -# $NetBSD: makelintstub,v 1.20 2007/11/13 09:07:33 he Exp $ +# $NetBSD: makelintstub,v 1.21 2008/01/15 03:37:14 rmind Exp $ # # Copyright (c) 1996, 1997 Christopher G. Demetriou # All rights reserved. @@ -68,6 +68,7 @@ header() #include #include #include + #include #include #include #include diff --git a/lib/libpthread/Makefile b/lib/libpthread/Makefile index e6e181d10213..a9c9f6e1379f 100644 --- a/lib/libpthread/Makefile +++ b/lib/libpthread/Makefile @@ -1,4 +1,4 @@ -# $NetBSD: Makefile,v 1.48 2007/12/14 21:51:21 yamt Exp $ +# $NetBSD: Makefile,v 1.49 2008/01/15 03:37:14 rmind Exp $ # WARNS= 4 @@ -61,7 +61,6 @@ SRCS+= pthread_specific.c SRCS+= pthread_spin.c SRCS+= pthread_tsd.c SRCS+= res_state.c -SRCS+= sched.c SRCS+= sem.c # Architecture-dependent files SRCS+= _context_u.S diff --git a/lib/libpthread/pthread.h b/lib/libpthread/pthread.h index 188367a35024..285fc3a5f5e8 100644 --- a/lib/libpthread/pthread.h +++ b/lib/libpthread/pthread.h @@ -1,4 +1,4 @@ -/* $NetBSD: pthread.h,v 1.24 2007/12/24 14:46:28 ad Exp $ */ +/* $NetBSD: pthread.h,v 1.25 2008/01/15 03:37:14 rmind Exp $ */ /*- * Copyright (c) 2001 The NetBSD Foundation, Inc. @@ -43,6 +43,7 @@ #include /* For timespec */ #include +#include #include @@ -186,6 +187,9 @@ int pthread_barrierattr_destroy(pthread_barrierattr_t *); int pthread_getschedparam(pthread_t, int * __restrict, struct sched_param * __restrict); int pthread_setschedparam(pthread_t, int, const struct sched_param *); +int pthread_getaffinity_np(pthread_t, size_t, cpuset_t *); +int pthread_setaffinity_np(pthread_t, size_t, cpuset_t *); +int pthread_setschedprio(pthread_t, int); int *pthread__errno(void); diff --git a/lib/libpthread/pthread_misc.c b/lib/libpthread/pthread_misc.c index 47b01eaeb074..42e77eae4a61 100644 --- a/lib/libpthread/pthread_misc.c +++ b/lib/libpthread/pthread_misc.c @@ -1,7 +1,7 @@ -/* $NetBSD: pthread_misc.c,v 1.3 2007/11/13 17:20:09 ad Exp $ */ +/* $NetBSD: pthread_misc.c,v 1.4 2008/01/15 03:37:14 rmind Exp $ */ /*- - * Copyright (c) 2001, 2006, 2007 The NetBSD Foundation, Inc. + * Copyright (c) 2001, 2006, 2007, 2008 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation @@ -37,14 +37,19 @@ */ #include -__RCSID("$NetBSD: pthread_misc.c,v 1.3 2007/11/13 17:20:09 ad Exp $"); +__RCSID("$NetBSD: pthread_misc.c,v 1.4 2008/01/15 03:37:14 rmind Exp $"); + +#include +#include +#include #include +#include #include #include -#include #include +#include #include "pthread.h" #include "pthread_int.h" @@ -60,27 +65,65 @@ __strong_alias(__libc_thr_sigsetmask,pthread_sigmask) __strong_alias(__sigprocmask14,pthread_sigmask) __strong_alias(__libc_thr_yield,pthread__sched_yield) -/*ARGSUSED*/ int pthread_getschedparam(pthread_t thread, int *policy, struct sched_param *param) { - if (param == NULL || policy == NULL) - return EINVAL; - param->sched_priority = 0; - *policy = SCHED_RR; - return 0; + int error; + + if (pthread__find(thread) != 0) + return ESRCH; + + error = _sched_getparam(getpid(), thread->pt_lid, param); + if (error == 0) + *policy = param->sched_class; + return error; } -/*ARGSUSED*/ int pthread_setschedparam(pthread_t thread, int policy, const struct sched_param *param) { - if (param == NULL || policy < SCHED_FIFO || policy > SCHED_RR) - return EINVAL; - if (param->sched_priority > 0 || policy != SCHED_RR) - return ENOTSUP; - return 0; + struct sched_param sp; + + if (pthread__find(thread) != 0) + return ESRCH; + + memcpy(&sp, param, sizeof(struct sched_param)); + sp.sched_class = policy; + return _sched_setparam(getpid(), thread->pt_lid, &sp); +} + +int +pthread_getaffinity_np(pthread_t thread, size_t size, cpuset_t *cpuset) +{ + + if (pthread__find(thread) != 0) + return ESRCH; + + return _sched_getaffinity(getpid(), thread->pt_lid, size, cpuset); +} + +int +pthread_setaffinity_np(pthread_t thread, size_t size, cpuset_t *cpuset) +{ + + if (pthread__find(thread) != 0) + return ESRCH; + + return _sched_setaffinity(getpid(), thread->pt_lid, size, cpuset); +} + +int +pthread_setschedprio(pthread_t thread, int prio) +{ + struct sched_param sp; + + if (pthread__find(thread) != 0) + return ESRCH; + + sp.sched_class = SCHED_NONE; + sp.sched_priority = prio; + return _sched_setparam(getpid(), thread->pt_lid, &sp); } int diff --git a/lib/librt/Makefile b/lib/librt/Makefile index 42bb224b452d..89af56a966fd 100644 --- a/lib/librt/Makefile +++ b/lib/librt/Makefile @@ -1,10 +1,11 @@ -# $NetBSD: Makefile,v 1.4 2007/06/17 12:47:41 rmind Exp $ +# $NetBSD: Makefile,v 1.5 2008/01/15 03:37:15 rmind Exp $ # WARNS= 2 LIB= rt SRCS= sem.c +SRCS+= sched.c MAN+= aio_cancel.3 aio_error.3 aio_fsync.3 aio_read.3 aio_return.3 \ aio_suspend.3 aio_write.3 lio_listio.3 \ diff --git a/lib/libpthread/sched.c b/lib/librt/sched.c similarity index 57% rename from lib/libpthread/sched.c rename to lib/librt/sched.c index bd56399c8701..263bebf3a3a4 100644 --- a/lib/libpthread/sched.c +++ b/lib/librt/sched.c @@ -1,12 +1,9 @@ -/* $NetBSD: sched.c,v 1.4 2005/10/09 11:17:28 kleink Exp $ */ +/* $NetBSD: sched.c,v 1.1 2008/01/15 03:37:15 rmind Exp $ */ -/*- - * Copyright (c) 2001 The NetBSD Foundation, Inc. +/* + * Copyright (c) 2008, Mindaugas Rasiukevicius * All rights reserved. - * - * This code is derived from software contributed to The NetBSD Foundation - * by Nathan J. Williams. - * + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: @@ -15,15 +12,8 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the NetBSD - * Foundation, Inc. and its contributors. - * 4. Neither the name of The NetBSD Foundation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. * - * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS @@ -37,73 +27,91 @@ */ #include -__RCSID("$NetBSD: sched.c,v 1.4 2005/10/09 11:17:28 kleink Exp $"); +__RCSID("$NetBSD: sched.c,v 1.1 2008/01/15 03:37:15 rmind Exp $"); +#include +#include #include #include +#include +#include +#include -int pthread__sched_binder; +/* + * Scheduling parameters. + */ -/* ARGSUSED */ int sched_setparam(pid_t pid, const struct sched_param *param) { - errno = ENOSYS; - return -1; + return _sched_setparam(pid, 0, param); } -/* ARGSUSED */ int sched_getparam(pid_t pid, struct sched_param *param) { - errno = ENOSYS; - return -1; + return _sched_getparam(pid, 0, param); } -/* ARGSUSED */ int -sched_setscheduler(pid_t pid, int policy, - const struct sched_param *param) +sched_setscheduler(pid_t pid, int policy, const struct sched_param *param) { + struct sched_param sp; - errno = ENOSYS; - return -1; + memcpy(&sp, param, sizeof(struct sched_param)); + sp.sched_class = policy; + return _sched_setparam(pid, 0, &sp); } -/* ARGSUSED */ int sched_getscheduler(pid_t pid) { + struct sched_param sp; + int error; - errno = ENOSYS; - return -1; + error = _sched_getparam(pid, 0, &sp); + if (error) + return error; + + return sp.sched_class; } -/* ARGSUSED */ +/* + * Scheduling priorities. + */ + int sched_get_priority_max(int policy) { - errno = ENOSYS; - return -1; + return sysconf(_SC_SCHED_PRI_MAX); } -/* ARGSUSED */ int sched_get_priority_min(int policy) { - errno = ENOSYS; - return -1; + return sysconf(_SC_SCHED_PRI_MIN); } -/* ARGSUSED */ int sched_rr_get_interval(pid_t pid, struct timespec *interval) { - errno = ENOSYS; - return -1; + interval->tv_sec = 0; + interval->tv_nsec = sysconf(_SC_SCHED_RT_TS) * 1000; + return 0; +} + +/* + * Processor-sets. + */ + +int +pset_bind(psetid_t psid, idtype_t idtype, id_t id, psetid_t *opsid) +{ + + return _pset_bind(idtype, id, 0, psid, opsid); } diff --git a/lib/librt/sys/Makefile.inc b/lib/librt/sys/Makefile.inc index 64a3fd6adb05..938e333e7380 100644 --- a/lib/librt/sys/Makefile.inc +++ b/lib/librt/sys/Makefile.inc @@ -1,11 +1,12 @@ -# $NetBSD: Makefile.inc,v 1.4 2007/10/09 18:18:33 rmind Exp $ +# $NetBSD: Makefile.inc,v 1.5 2008/01/15 03:37:15 rmind Exp $ .PATH: ${.CURDIR}/sys ASM= aio_cancel.S aio_error.S aio_fsync.S aio_read.S aio_return.S \ aio_write.S lio_listio.S \ mq_close.S mq_getattr.S mq_notify.S mq_open.S \ - mq_setattr.S mq_unlink.S + mq_setattr.S mq_unlink.S \ + pset_create.S pset_destroy.S pset_assign.S _pset_bind.S SRCS+= ${ASM} CLEANFILES+= ${ASM} diff --git a/sys/conf/files b/sys/conf/files index fe0820be3146..22efa387ff2f 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -1,4 +1,4 @@ -# $NetBSD: files,v 1.884 2008/01/14 12:40:04 yamt Exp $ +# $NetBSD: files,v 1.885 2008/01/15 03:37:10 rmind Exp $ # @(#)files.newconf 7.5 (Berkeley) 5/10/93 @@ -1421,6 +1421,7 @@ file kern/sys_lwp.c file kern/sys_pipe.c !pipe_socketpair file kern/sys_pmc.c file kern/sys_process.c ptrace | ktrace +file kern/sys_pset.c file kern/sys_sig.c file kern/sys_sched.c file kern/sys_socket.c diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c index f438c13394b8..b904956e28d8 100644 --- a/sys/kern/init_main.c +++ b/sys/kern/init_main.c @@ -1,4 +1,4 @@ -/* $NetBSD: init_main.c,v 1.338 2008/01/14 12:40:03 yamt Exp $ */ +/* $NetBSD: init_main.c,v 1.339 2008/01/15 03:37:10 rmind Exp $ */ /* * Copyright (c) 1982, 1986, 1989, 1991, 1992, 1993 @@ -71,7 +71,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: init_main.c,v 1.338 2008/01/14 12:40:03 yamt Exp $"); +__KERNEL_RCSID(0, "$NetBSD: init_main.c,v 1.339 2008/01/15 03:37:10 rmind Exp $"); #include "opt_ipsec.h" #include "opt_ntp.h" @@ -339,6 +339,9 @@ main(void) turnstile_init(); sleeptab_init(&sleeptab); + /* Initialize processor-sets */ + psets_init(); + /* MI initialization of the boot cpu */ error = mi_cpu_attach(curcpu()); KASSERT(error == 0); diff --git a/sys/kern/kern_cpu.c b/sys/kern/kern_cpu.c index 4b81f139e726..0d1ebaa762c0 100644 --- a/sys/kern/kern_cpu.c +++ b/sys/kern/kern_cpu.c @@ -1,4 +1,4 @@ -/* $NetBSD: kern_cpu.c,v 1.17 2008/01/14 12:40:03 yamt Exp $ */ +/* $NetBSD: kern_cpu.c,v 1.18 2008/01/15 03:37:10 rmind Exp $ */ /*- * Copyright (c) 2007 The NetBSD Foundation, Inc. @@ -64,7 +64,7 @@ #include -__KERNEL_RCSID(0, "$NetBSD: kern_cpu.c,v 1.17 2008/01/14 12:40:03 yamt Exp $"); +__KERNEL_RCSID(0, "$NetBSD: kern_cpu.c,v 1.18 2008/01/15 03:37:10 rmind Exp $"); #include #include @@ -284,19 +284,8 @@ cpu_xc_offline(struct cpu_info *ci) lwp_unlock(l); } - /* - * Runqueues are locked with the global lock if pointers match, - * thus hold only one. Otherwise, double-lock the runqueues. - */ - if (spc->spc_mutex == mspc->spc_mutex) { - spc_lock(ci); - } else if (ci < mci) { - spc_lock(ci); - spc_lock(mci); - } else { - spc_lock(mci); - spc_lock(ci); - } + /* Double-lock the run-queues */ + spc_dlock(ci, mci); /* Handle LSRUN and LSIDL cases */ LIST_FOREACH(l, &alllwp, l_list) { @@ -312,13 +301,7 @@ cpu_xc_offline(struct cpu_info *ci) lwp_setlock(l, mspc->spc_mutex); } } - if (spc->spc_mutex == mspc->spc_mutex) { - spc_unlock(ci); - } else { - spc_unlock(ci); - spc_unlock(mci); - } - + spc_dunlock(ci, mci); mutex_exit(&proclist_lock); } diff --git a/sys/kern/kern_lwp.c b/sys/kern/kern_lwp.c index a2c3ab6e93b2..c4ee3153f5a3 100644 --- a/sys/kern/kern_lwp.c +++ b/sys/kern/kern_lwp.c @@ -1,4 +1,4 @@ -/* $NetBSD: kern_lwp.c,v 1.90 2008/01/12 18:06:40 ad Exp $ */ +/* $NetBSD: kern_lwp.c,v 1.91 2008/01/15 03:37:11 rmind Exp $ */ /*- * Copyright (c) 2001, 2006, 2007 The NetBSD Foundation, Inc. @@ -205,7 +205,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: kern_lwp.c,v 1.90 2008/01/12 18:06:40 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: kern_lwp.c,v 1.91 2008/01/15 03:37:11 rmind Exp $"); #include "opt_ddb.h" #include "opt_multiprocessor.h" @@ -225,6 +225,7 @@ __KERNEL_RCSID(0, "$NetBSD: kern_lwp.c,v 1.90 2008/01/12 18:06:40 ad Exp $"); #include #include #include +#include #include #include #include @@ -584,15 +585,8 @@ lwp_create(lwp_t *l1, proc_t *p2, vaddr_t uaddr, bool inmem, int flags, l2->l_pflag = LP_MPSAFE; if (p2->p_flag & PK_SYSTEM) { - /* - * Mark it as a system process and not a candidate for - * swapping. - */ + /* Mark it as a system LWP and not a candidate for swapping */ l2->l_flag |= LW_SYSTEM; - } else { - /* Look for a CPU to start */ - l2->l_cpu = sched_takecpu(l2); - l2->l_mutex = l2->l_cpu->ci_schedstate.spc_mutex; } lwp_initspecific(l2); @@ -636,6 +630,18 @@ lwp_create(lwp_t *l1, proc_t *p2, vaddr_t uaddr, bool inmem, int flags, LIST_INSERT_HEAD(&alllwp, l2, l_list); mutex_exit(&proclist_lock); + if ((p2->p_flag & PK_SYSTEM) == 0) { + /* Locking is needed, since LWP is in the list of all LWPs */ + lwp_lock(l2); + /* Inherit a processor-set */ + l2->l_psid = l1->l_psid; + /* Inherit an affinity */ + memcpy(&l2->l_affinity, &l1->l_affinity, sizeof(cpuset_t)); + /* Look for a CPU to start */ + l2->l_cpu = sched_takecpu(l2); + lwp_unlock_to(l2, l2->l_cpu->ci_schedstate.spc_mutex); + } + SYSCALL_TIME_LWP_INIT(l2); if (p2->p_emul->e_lwp_fork) @@ -1044,6 +1050,74 @@ proc_representative_lwp(struct proc *p, int *nrlwps, int locking) return NULL; } +/* + * Migrate the LWP to the another CPU. Unlocks the LWP. + */ +void +lwp_migrate(lwp_t *l, struct cpu_info *ci) +{ + struct schedstate_percpu *spc; + KASSERT(lwp_locked(l, NULL)); + + if (l->l_cpu == ci) { + lwp_unlock(l); + return; + } + + spc = &ci->ci_schedstate; + switch (l->l_stat) { + case LSRUN: + if (l->l_flag & LW_INMEM) { + l->l_target_cpu = ci; + break; + } + case LSIDL: + l->l_cpu = ci; + lwp_unlock_to(l, spc->spc_mutex); + KASSERT(!mutex_owned(spc->spc_mutex)); + return; + case LSSLEEP: + l->l_cpu = ci; + break; + case LSSTOP: + case LSSUSPENDED: + if (l->l_wchan != NULL) { + l->l_cpu = ci; + break; + } + case LSONPROC: + l->l_target_cpu = ci; + break; + } + lwp_unlock(l); +} + +/* + * Find the LWP in the process. + * On success - returns LWP locked. + */ +struct lwp * +lwp_find2(pid_t pid, lwpid_t lid) +{ + proc_t *p; + lwp_t *l; + + /* Find the process */ + p = p_find(pid, PFIND_UNLOCK_FAIL); + if (p == NULL) + return NULL; + mutex_enter(&p->p_smutex); + mutex_exit(&proclist_lock); + + /* Find the thread */ + l = lwp_find(p, lid); + if (l != NULL) + lwp_lock(l); + mutex_exit(&p->p_smutex); + + return l; +} + /* * Look up a live LWP within the speicifed process, and return it locked. * diff --git a/sys/kern/kern_synch.c b/sys/kern/kern_synch.c index 1fdc4963b7f2..343576cbc632 100644 --- a/sys/kern/kern_synch.c +++ b/sys/kern/kern_synch.c @@ -1,4 +1,4 @@ -/* $NetBSD: kern_synch.c,v 1.215 2008/01/04 21:18:10 ad Exp $ */ +/* $NetBSD: kern_synch.c,v 1.216 2008/01/15 03:37:11 rmind Exp $ */ /*- * Copyright (c) 1999, 2000, 2004, 2006, 2007 The NetBSD Foundation, Inc. @@ -75,7 +75,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: kern_synch.c,v 1.215 2008/01/04 21:18:10 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: kern_synch.c,v 1.216 2008/01/15 03:37:11 rmind Exp $"); #include "opt_kstack.h" #include "opt_lockdebug.h" @@ -354,10 +354,10 @@ updatertime(lwp_t *l, const struct bintime *now) int mi_switch(lwp_t *l) { + struct cpu_info *ci, *tci = NULL; struct schedstate_percpu *spc; struct lwp *newl; int retval, oldspl; - struct cpu_info *ci; struct bintime bt; bool returning; @@ -421,16 +421,41 @@ mi_switch(lwp_t *l) /* * If on the CPU and we have gotten this far, then we must yield. */ - mutex_spin_enter(spc->spc_mutex); KASSERT(l->l_stat != LSRUN); - if (l->l_stat == LSONPROC && l != newl) { + if (l->l_stat == LSONPROC && (l->l_target_cpu || l != newl)) { KASSERT(lwp_locked(l, &spc->spc_lwplock)); + + tci = l->l_target_cpu; + if (__predict_false(tci != NULL)) { + /* Double-lock the runqueues */ + spc_dlock(ci, tci); + } else { + /* Lock the runqueue */ + spc_lock(ci); + } + if ((l->l_flag & LW_IDLE) == 0) { l->l_stat = LSRUN; - lwp_setlock(l, spc->spc_mutex); + if (__predict_false(tci != NULL)) { + /* + * Set the new CPU, lock and unset the + * l_target_cpu - thread will be enqueued + * to the runqueue of target CPU. + */ + l->l_cpu = tci; + lwp_setlock(l, tci->ci_schedstate.spc_mutex); + l->l_target_cpu = NULL; + } else { + lwp_setlock(l, spc->spc_mutex); + } sched_enqueue(l, true); - } else + } else { + KASSERT(tci == NULL); l->l_stat = LSIDL; + } + } else { + /* Lock the runqueue */ + spc_lock(ci); } /* @@ -482,7 +507,13 @@ mi_switch(lwp_t *l) struct lwp *prevlwp; /* Release all locks, but leave the current LWP locked */ - if (l->l_mutex == spc->spc_mutex) { + if (l->l_mutex == l->l_cpu->ci_schedstate.spc_mutex) { + /* + * In case of migration, drop the local runqueue + * lock, thread is on other runqueue now. + */ + if (__predict_false(tci != NULL)) + spc_unlock(ci); /* * Drop spc_lwplock, if the current LWP has been moved * to the run queue (it is now locked by spc_mutex). @@ -494,6 +525,7 @@ mi_switch(lwp_t *l) * run queues. */ mutex_spin_exit(spc->spc_mutex); + KASSERT(tci == NULL); } /* @@ -565,7 +597,8 @@ mi_switch(lwp_t *l) retval = 1; } else { /* Nothing to do - just unlock and return. */ - mutex_spin_exit(spc->spc_mutex); + KASSERT(tci == NULL); + spc_unlock(ci); lwp_unlock(l); retval = 0; } diff --git a/sys/kern/sched_4bsd.c b/sys/kern/sched_4bsd.c index fcd213c20f95..dc090a839343 100644 --- a/sys/kern/sched_4bsd.c +++ b/sys/kern/sched_4bsd.c @@ -1,4 +1,4 @@ -/* $NetBSD: sched_4bsd.c,v 1.11 2007/12/21 12:05:39 ad Exp $ */ +/* $NetBSD: sched_4bsd.c,v 1.12 2008/01/15 03:37:11 rmind Exp $ */ /*- * Copyright (c) 1999, 2000, 2004, 2006, 2007 The NetBSD Foundation, Inc. @@ -75,7 +75,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: sched_4bsd.c,v 1.11 2007/12/21 12:05:39 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: sched_4bsd.c,v 1.12 2008/01/15 03:37:11 rmind Exp $"); #include "opt_ddb.h" #include "opt_lockdebug.h" @@ -136,7 +136,7 @@ extern unsigned int sched_pstats_ticks; /* defined in kern_synch.c */ kmutex_t runqueue_lock; /* Number of hardclock ticks per sched_tick() */ -int rrticks; +static int rrticks; const int schedppq = 1; @@ -642,6 +642,12 @@ void sched_enqueue(struct lwp *l, bool ctxswitch) { + if (__predict_false(l->l_target_cpu != NULL)) { + /* Global mutex is used - just change the CPU */ + l->l_cpu = l->l_target_cpu; + l->l_target_cpu = NULL; + } + if ((l->l_flag & LW_BOUND) != 0) runqueue_enqueue(l->l_cpu->ci_schedstate.spc_sched_info, l); else @@ -740,8 +746,20 @@ sched_lwp_collect(struct lwp *t) } /* - * sysctl setup. XXX This should be split with kern_synch.c. + * Sysctl nodes and initialization. */ + +static int +sysctl_sched_rtts(SYSCTLFN_ARGS) +{ + struct sysctlnode node; + int rttsms = hztoms(rrticks); + + node = *rnode; + node.sysctl_data = &rttsms; + return sysctl_lookup(SYSCTLFN_CALL(&node)); +} + SYSCTL_SETUP(sysctl_sched_setup, "sysctl kern.sched subtree setup") { const struct sysctlnode *node = NULL; @@ -765,6 +783,12 @@ SYSCTL_SETUP(sysctl_sched_setup, "sysctl kern.sched subtree setup") CTLTYPE_STRING, "name", NULL, NULL, 0, __UNCONST("4.4BSD"), 0, CTL_CREATE, CTL_EOL); + sysctl_createv(clog, 0, &node, NULL, + CTLFLAG_PERMANENT, + CTLTYPE_INT, "rtts", + SYSCTL_DESCR("Round-robin time quantum (in miliseconds)"), + sysctl_sched_rtts, 0, NULL, 0, + CTL_CREATE, CTL_EOL); sysctl_createv(clog, 0, &node, NULL, CTLFLAG_READWRITE, CTLTYPE_INT, "timesoftints", diff --git a/sys/kern/sched_m2.c b/sys/kern/sched_m2.c index 25c18347853f..fbe9b14e1ace 100644 --- a/sys/kern/sched_m2.c +++ b/sys/kern/sched_m2.c @@ -1,7 +1,7 @@ -/* $NetBSD: sched_m2.c,v 1.14 2007/12/21 12:05:39 ad Exp $ */ +/* $NetBSD: sched_m2.c,v 1.15 2008/01/15 03:37:11 rmind Exp $ */ /* - * Copyright (c) 2007, Mindaugas Rasiukevicius + * Copyright (c) 2007, 2008 Mindaugas Rasiukevicius * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -33,7 +33,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: sched_m2.c,v 1.14 2007/12/21 12:05:39 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: sched_m2.c,v 1.15 2008/01/15 03:37:11 rmind Exp $"); #include @@ -47,6 +47,7 @@ __KERNEL_RCSID(0, "$NetBSD: sched_m2.c,v 1.14 2007/12/21 12:05:39 ad Exp $"); #include #include #include +#include #include #include #include @@ -87,6 +88,7 @@ static pri_t high_pri[PRI_COUNT]; /* Map for priority increase */ * Migration and balancing. */ #ifdef MULTIPROCESSOR + static u_int cacheht_time; /* Cache hotness time */ static u_int min_catch; /* Minimal LWP count for catching */ @@ -420,6 +422,7 @@ sched_dequeue(struct lwp *l) ci_rq = l->l_cpu->ci_schedstate.spc_sched_info; KASSERT(lwp_locked(l, l->l_cpu->ci_schedstate.spc_mutex)); + KASSERT(eprio <= ci_rq->r_highest_pri); KASSERT(ci_rq->r_bitmap[eprio >> BITMAP_SHIFT] != 0); KASSERT(ci_rq->r_count > 0); @@ -554,19 +557,20 @@ sched_pstats_hook(struct lwp *l) /* Check if LWP can migrate to the chosen CPU */ static inline bool -sched_migratable(const struct lwp *l, const struct cpu_info *ci) +sched_migratable(const struct lwp *l, struct cpu_info *ci) { + const struct schedstate_percpu *spc = &ci->ci_schedstate; - if (ci->ci_schedstate.spc_flags & SPCF_OFFLINE) + /* CPU is offline */ + if (__predict_false(spc->spc_flags & SPCF_OFFLINE)) return false; - if ((l->l_flag & LW_BOUND) == 0) - return true; -#if 0 - return cpu_in_pset(ci, l->l_psid); -#else - return false; -#endif + /* Affinity bind */ + if (__predict_false(l->l_flag & LW_AFFINITY)) + return CPU_ISSET(cpu_index(ci), &l->l_affinity); + + /* Processor-set */ + return (spc->spc_psid == l->l_psid); } /* @@ -576,17 +580,23 @@ sched_migratable(const struct lwp *l, const struct cpu_info *ci) struct cpu_info * sched_takecpu(struct lwp *l) { - struct cpu_info *ci, *tci = NULL; + struct cpu_info *ci, *tci; struct schedstate_percpu *spc; runqueue_t *ci_rq; sched_info_lwp_t *sil; CPU_INFO_ITERATOR cii; pri_t eprio, lpri; + KASSERT(lwp_locked(l, NULL)); + ci = l->l_cpu; spc = &ci->ci_schedstate; ci_rq = spc->spc_sched_info; + /* If thread is strictly bound, do not estimate other CPUs */ + if (l->l_flag & LW_BOUND) + return ci; + /* CPU of this thread is idling - run there */ if (ci_rq->r_count == 0) return ci; @@ -609,6 +619,7 @@ sched_takecpu(struct lwp *l) * Look for the CPU with the lowest priority thread. In case of * equal the priority - check the lower count of the threads. */ + tci = l->l_cpu; lpri = PRI_COUNT; for (CPU_INFO_FOREACH(cii, ci)) { runqueue_t *ici_rq; @@ -620,18 +631,16 @@ sched_takecpu(struct lwp *l) if (pri > lpri) continue; - if (pri == lpri && tci && ci_rq->r_count < ici_rq->r_count) + if (pri == lpri && ci_rq->r_count < ici_rq->r_count) continue; - if (sched_migratable(l, ci) == false) + if (!sched_migratable(l, ci)) continue; lpri = pri; tci = ci; ci_rq = ici_rq; } - - KASSERT(tci != NULL); return tci; } @@ -691,7 +700,7 @@ sched_catchlwp(void) /* Look for threads, whose are allowed to migrate */ sil = l->l_sched_info; if ((l->l_flag & LW_SYSTEM) || CACHE_HOT(sil) || - sched_migratable(l, curci) == false) { + !sched_migratable(l, curci)) { l = TAILQ_NEXT(l, l_runq); continue; } @@ -866,7 +875,7 @@ sched_tick(struct cpu_info *ci) * If there are higher priority threads or threads in the same queue, * mark that thread should yield, otherwise, continue running. */ - if (lwp_eprio(l) <= ci_rq->r_highest_pri) { + if (lwp_eprio(l) <= ci_rq->r_highest_pri || l->l_target_cpu) { spc->spc_flags |= SPCF_SHOULDYIELD; cpu_need_resched(ci, 0); } else @@ -877,6 +886,17 @@ sched_tick(struct cpu_info *ci) * Sysctl nodes and initialization. */ +static int +sysctl_sched_rtts(SYSCTLFN_ARGS) +{ + struct sysctlnode node; + int rttsms = hztoms(rt_ts); + + node = *rnode; + node.sysctl_data = &rttsms; + return sysctl_lookup(SYSCTLFN_CALL(&node)); +} + static int sysctl_sched_mints(SYSCTLFN_ARGS) { @@ -967,6 +987,12 @@ SYSCTL_SETUP(sysctl_sched_setup, "sysctl kern.sched subtree setup") CTLTYPE_STRING, "name", NULL, NULL, 0, __UNCONST("M2"), 0, CTL_CREATE, CTL_EOL); + sysctl_createv(clog, 0, &node, NULL, + CTLFLAG_PERMANENT, + CTLTYPE_INT, "rtts", + SYSCTL_DESCR("Round-robin time quantum (in miliseconds)"), + sysctl_sched_rtts, 0, NULL, 0, + CTL_CREATE, CTL_EOL); sysctl_createv(clog, 0, &node, NULL, CTLFLAG_PERMANENT | CTLFLAG_READWRITE, CTLTYPE_INT, "maxts", diff --git a/sys/kern/sys_pset.c b/sys/kern/sys_pset.c new file mode 100644 index 000000000000..c057402e9ee9 --- /dev/null +++ b/sys/kern/sys_pset.c @@ -0,0 +1,503 @@ +/* $NetBSD: sys_pset.c,v 1.1 2008/01/15 03:41:49 rmind Exp $ */ + +/* + * Copyright (c) 2008, Mindaugas Rasiukevicius + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Implementation of the Processor Sets. + * + * Locking + * The array of the processor-set structures and its members are protected + * by the global psets_lock. Note that in scheduler, the very l_psid value + * might be used without lock held. + */ + +#include +__KERNEL_RCSID(0, "$NetBSD: sys_pset.c,v 1.1 2008/01/15 03:41:49 rmind Exp $"); + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static pset_info_t ** psets; +static kmutex_t psets_lock; +static u_int psets_max; +static u_int psets_count; + +static int psets_realloc(int); +static int psid_validate(psetid_t, bool); +static int kern_pset_create(psetid_t *); +static int kern_pset_destroy(psetid_t); + +/* + * Initialization of the processor-sets. + */ +void +psets_init(void) +{ + + psets_max = max(MAXCPUS, 32); + psets = kmem_zalloc(psets_max * sizeof(void *), KM_SLEEP); + mutex_init(&psets_lock, MUTEX_DEFAULT, IPL_NONE); + psets_count = 0; +} + +/* + * Reallocate the array of the processor-set structures. + */ +static int +psets_realloc(int new_psets_max) +{ + pset_info_t **new_psets, **old_psets; + const u_int newsize = new_psets_max * sizeof(void *); + u_int i, oldsize; + + if (new_psets_max < 1) + return EINVAL; + + new_psets = kmem_zalloc(newsize, KM_SLEEP); + mutex_enter(&psets_lock); + old_psets = psets; + oldsize = psets_max * sizeof(void *); + + /* Check if we can lower the size of the array */ + if (new_psets_max < psets_max) { + for (i = new_psets_max; i < psets_max; i++) { + if (psets[i] == NULL) + continue; + mutex_exit(&psets_lock); + kmem_free(new_psets, newsize); + return EBUSY; + } + } + + /* Copy all pointers to the new array */ + memcpy(new_psets, psets, newsize); + psets_max = new_psets_max; + psets = new_psets; + mutex_exit(&psets_lock); + + kmem_free(old_psets, oldsize); + return 0; +} + +/* + * Validate processor-set ID. + */ +static int +psid_validate(psetid_t psid, bool chkps) +{ + + KASSERT(mutex_owned(&psets_lock)); + + if (chkps && (psid == PS_NONE || psid == PS_QUERY || psid == PS_MYID)) + return 0; + if (psid <= 0 || psid > psets_max) + return EINVAL; + if (psets[psid - 1] == NULL) + return EINVAL; + if (psets[psid - 1]->ps_flags & PSET_BUSY) + return EBUSY; + + return 0; +} + +/* + * Create a processor-set. + */ +static int +kern_pset_create(psetid_t *psid) +{ + pset_info_t *pi; + u_int i; + + if (psets_count == psets_max) + return ENOMEM; + + pi = kmem_zalloc(sizeof(pset_info_t), KM_SLEEP); + + mutex_enter(&psets_lock); + if (psets_count == psets_max) { + mutex_exit(&psets_lock); + kmem_free(pi, sizeof(pset_info_t)); + return ENOMEM; + } + + /* Find a free entry in the array */ + for (i = 0; i < psets_max; i++) + if (psets[i] == NULL) + break; + KASSERT(i != psets_max); + + psets[i] = pi; + psets_count++; + mutex_exit(&psets_lock); + + *psid = i + 1; + return 0; +} + +/* + * Destroy a processor-set. + */ +static int +kern_pset_destroy(psetid_t psid) +{ + struct cpu_info *ci; + pset_info_t *pi; + struct lwp *l; + CPU_INFO_ITERATOR cii; + int error; + + mutex_enter(&psets_lock); + if (psid == PS_MYID) { + /* Use caller's processor-set ID */ + psid = curlwp->l_psid; + } + error = psid_validate(psid, false); + if (error) { + mutex_exit(&psets_lock); + return error; + } + + /* Release the processor-set from all CPUs */ + for (CPU_INFO_FOREACH(cii, ci)) { + struct schedstate_percpu *spc; + + spc = &ci->ci_schedstate; + if (spc->spc_psid != psid) + continue; + spc->spc_psid = PS_NONE; + } + /* Mark that processor-set is going to be destroyed */ + pi = psets[psid - 1]; + pi->ps_flags |= PSET_BUSY; + mutex_exit(&psets_lock); + + /* Unmark the processor-set ID from each thread */ + mutex_enter(&proclist_lock); + LIST_FOREACH(l, &alllwp, l_list) { + /* Safe to check and set without lock held */ + if (l->l_psid != psid) + continue; + l->l_psid = PS_NONE; + } + mutex_exit(&proclist_lock); + + /* Destroy the processor-set */ + mutex_enter(&psets_lock); + psets[psid - 1] = NULL; + psets_count--; + mutex_exit(&psets_lock); + + kmem_free(pi, sizeof(pset_info_t)); + return 0; +} + +/* + * General system calls for the processor-sets. + */ + +int +sys_pset_create(struct lwp *l, const struct sys_pset_create_args *uap, + register_t *retval) +{ + /* { + syscallarg(psetid_t) *psid; + } */ + psetid_t psid; + int error; + + /* Available only for super-user */ + if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER, NULL)) + return EPERM; + + error = kern_pset_create(&psid); + if (error) + return error; + + error = copyout(&psid, SCARG(uap, psid), sizeof(psetid_t)); + if (error) + (void)kern_pset_destroy(psid); + + return error; +} + +int +sys_pset_destroy(struct lwp *l, const struct sys_pset_destroy_args *uap, + register_t *retval) +{ + /* { + syscallarg(psetid_t) psid; + } */ + + /* Available only for super-user */ + if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER, NULL)) + return EPERM; + + return kern_pset_destroy(SCARG(uap, psid)); +} + +int +sys_pset_assign(struct lwp *l, const struct sys_pset_assign_args *uap, + register_t *retval) +{ + /* { + syscallarg(psetid_t) psid; + syscallarg(cpuid_t) cpuid; + syscallarg(psetid_t) *opsid; + } */ + struct cpu_info *ci; + struct schedstate_percpu *spc; + psetid_t psid = SCARG(uap, psid), opsid = 0; + CPU_INFO_ITERATOR cii; + int error = 0; + + /* Available only for super-user, except the case of PS_QUERY */ + if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER, NULL) && + psid != PS_QUERY) + return EPERM; + + /* Find the target CPU */ + for (CPU_INFO_FOREACH(cii, ci)) + if (cpu_index(ci) == SCARG(uap, cpuid)) + break; + if (ci == NULL) + return EINVAL; + spc = &ci->ci_schedstate; + + mutex_enter(&psets_lock); + error = psid_validate(psid, true); + if (error) { + mutex_exit(&psets_lock); + return error; + } + opsid = spc->spc_psid; + switch (psid) { + case PS_QUERY: + break; + case PS_MYID: + psid = curlwp->l_psid; + default: + spc->spc_psid = psid; + } + mutex_exit(&psets_lock); + + if (SCARG(uap, opsid) != NULL) + error = copyout(&opsid, SCARG(uap, opsid), sizeof(psetid_t)); + + return error; +} + +int +sys__pset_bind(struct lwp *l, const struct sys__pset_bind_args *uap, + register_t *retval) +{ + /* { + syscallarg(idtype_t) idtype; + syscallarg(id_t) first_id; + syscallarg(id_t) second_id; + syscallarg(psetid_t) psid; + syscallarg(psetid_t) *opsid; + } */ + struct cpu_info *ci; + struct proc *p; + struct lwp *t; + id_t id1, id2; + pid_t pid = 0; + lwpid_t lid = 0; + psetid_t psid, opsid; + int error = 0, lcnt; + + psid = SCARG(uap, psid); + + /* Available only for super-user, except the case of PS_QUERY */ + if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER, NULL) && + psid != PS_QUERY) + return EPERM; + + mutex_enter(&psets_lock); + error = psid_validate(psid, true); + if (error) { + mutex_exit(&psets_lock); + return error; + } + if (psid == PS_MYID) + psid = curlwp->l_psid; + if (psid != PS_QUERY && psid != PS_NONE) + psets[psid - 1]->ps_flags |= PSET_BUSY; + mutex_exit(&psets_lock); + + /* + * Get PID and LID from the ID. + */ + p = l->l_proc; + id1 = SCARG(uap, first_id); + id2 = SCARG(uap, second_id); + + switch (SCARG(uap, idtype)) { + case P_PID: + /* + * Process: + * First ID - PID; + * Second ID - ignored; + */ + pid = (id1 == P_MYID) ? p->p_pid : id1; + lid = 0; + break; + case P_LWPID: + /* + * Thread (LWP): + * First ID - LID; + * Second ID - PID; + */ + if (id1 == P_MYID) { + pid = p->p_pid; + lid = l->l_lid; + break; + } + lid = id1; + pid = (id2 == P_MYID) ? p->p_pid : id2; + break; + default: + return EINVAL; + } + + /* Find the process */ + p = p_find(pid, PFIND_UNLOCK_FAIL); + if (p == NULL) { + error = ESRCH; + goto error; + } + mutex_enter(&p->p_smutex); + mutex_exit(&proclist_lock); + + /* Disallow modification of the system processes */ + if (p->p_flag & PK_SYSTEM) { + mutex_exit(&p->p_smutex); + error = EPERM; + goto error; + } + + /* Find the LWP(s) */ + lcnt = 0; + ci = NULL; + LIST_FOREACH(t, &p->p_lwps, l_sibling) { + if (lid && lid != t->l_lid) + continue; + /* + * Bind the thread to the processor-set, + * take some CPU and migrate. + */ + lwp_lock(t); + opsid = t->l_psid; + t->l_psid = psid; + ci = sched_takecpu(l); + /* Unlocks LWP */ + lwp_migrate(t, ci); + lcnt++; + } + mutex_exit(&p->p_smutex); + if (lcnt == 0) { + error = ESRCH; + goto error; + } + *retval = lcnt; + if (SCARG(uap, opsid)) + error = copyout(&opsid, SCARG(uap, opsid), sizeof(psetid_t)); +error: + if (psid != PS_QUERY && psid != PS_NONE) { + mutex_enter(&psets_lock); + psets[psid - 1]->ps_flags &= ~PSET_BUSY; + mutex_exit(&psets_lock); + } + return error; +} + +/* + * Sysctl nodes and initialization. + */ + +static int +sysctl_psets_max(SYSCTLFN_ARGS) +{ + struct sysctlnode node; + int error, newsize; + + node = *rnode; + node.sysctl_data = &newsize; + + newsize = psets_max; + error = sysctl_lookup(SYSCTLFN_CALL(&node)); + if (error || newp == NULL) + return error; + + if (newsize <= 0) + return EINVAL; + + sysctl_unlock(); + error = psets_realloc(newsize); + sysctl_relock(); + return error; +} + +SYSCTL_SETUP(sysctl_pset_setup, "sysctl kern.pset subtree setup") +{ + const struct sysctlnode *node = NULL; + + sysctl_createv(clog, 0, NULL, NULL, + CTLFLAG_PERMANENT, + CTLTYPE_NODE, "kern", NULL, + NULL, 0, NULL, 0, + CTL_KERN, CTL_EOL); + sysctl_createv(clog, 0, NULL, &node, + CTLFLAG_PERMANENT, + CTLTYPE_NODE, "pset", + SYSCTL_DESCR("Processor-set options"), + NULL, 0, NULL, 0, + CTL_KERN, CTL_CREATE, CTL_EOL); + + if (node == NULL) + return; + + sysctl_createv(clog, 0, &node, NULL, + CTLFLAG_PERMANENT | CTLFLAG_READWRITE, + CTLTYPE_INT, "psets_max", + SYSCTL_DESCR("Maximal count of the processor-sets"), + sysctl_psets_max, 0, &psets_max, 0, + CTL_CREATE, CTL_EOL); +} diff --git a/sys/kern/sys_sched.c b/sys/kern/sys_sched.c index 7e0aa43abf03..861677959a48 100644 --- a/sys/kern/sys_sched.c +++ b/sys/kern/sys_sched.c @@ -1,9 +1,9 @@ -/* $NetBSD: sys_sched.c,v 1.4 2007/12/20 23:03:11 dsl Exp $ */ +/* $NetBSD: sys_sched.c,v 1.5 2008/01/15 03:37:11 rmind Exp $ */ -/*- - * Copyright (c) 2007 The NetBSD Foundation, Inc. +/* + * Copyright (c) 2008, Mindaugas Rasiukevicius * All rights reserved. - * + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: @@ -12,15 +12,8 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the NetBSD - * Foundation, Inc. and its contributors. - * 4. Neither the name of The NetBSD Foundation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. * - * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS @@ -33,14 +26,324 @@ * POSSIBILITY OF SUCH DAMAGE. */ +/* + * TODO: + * - Handle pthread_setschedprio() as defined by POSIX; + * - Handle sched_yield() case for SCHED_FIFO as defined by POSIX; + */ + #include -__KERNEL_RCSID(0, "$NetBSD: sys_sched.c,v 1.4 2007/12/20 23:03:11 dsl Exp $"); +__KERNEL_RCSID(0, "$NetBSD: sys_sched.c,v 1.5 2008/01/15 03:37:11 rmind Exp $"); #include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * Set scheduling parameters. + */ +int +sys__sched_setparam(struct lwp *l, const struct sys__sched_setparam_args *uap, + register_t *retval) +{ + /* { + syscallarg(pid_t) pid; + syscallarg(lwpid_t) lid; + syscallarg(const struct sched_param *) params; + } */ + struct sched_param *sp; + struct proc *p; + struct lwp *t; + pid_t pid; + lwpid_t lid; + u_int lcnt; + pri_t pri; + int error; + + /* Available only for super-user */ + if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER, NULL)) + return EACCES; + + /* Get the parameters from the user-space */ + sp = kmem_zalloc(sizeof(struct sched_param), KM_SLEEP); + error = copyin(SCARG(uap, params), sp, sizeof(struct sched_param)); + if (error) + goto error; + + /* + * Validate scheduling class and priority. + * Convert the user priority to the in-kernel value. + */ + pri = sp->sched_priority; + if (pri != PRI_NONE && (pri < SCHED_PRI_MIN || pri > SCHED_PRI_MAX)) { + error = EINVAL; + goto error; + } + switch (sp->sched_class) { + case SCHED_OTHER: + if (pri == PRI_NONE) + pri = PRI_USER; + else + pri += PRI_USER; + break; + case SCHED_RR: + case SCHED_FIFO: + if (pri == PRI_NONE) + pri = PRI_USER_RT; + else + pri += PRI_USER_RT; + break; + case SCHED_NONE: + break; + default: + error = EINVAL; + goto error; + } + + /* Find the process */ + pid = SCARG(uap, pid); + p = p_find(pid, PFIND_UNLOCK_FAIL); + if (p == NULL) { + error = ESRCH; + goto error; + } + mutex_enter(&p->p_smutex); + mutex_exit(&proclist_lock); + + /* Disallow modification of system processes */ + if (p->p_flag & PK_SYSTEM) { + mutex_exit(&p->p_smutex); + error = EACCES; + goto error; + } + + /* Find the LWP(s) */ + lcnt = 0; + lid = SCARG(uap, lid); + LIST_FOREACH(t, &p->p_lwps, l_sibling) { + bool chpri; + + if (lid && lid != t->l_lid) + continue; + + /* Set the scheduling class */ + lwp_lock(t); + if (sp->sched_class != SCHED_NONE) { + /* + * Priority must be changed to get into the correct + * priority range of the new scheduling class. + */ + chpri = (t->l_class != sp->sched_class); + t->l_class = sp->sched_class; + } else + chpri = false; + + /* Change the priority */ + if (sp->sched_priority != PRI_NONE || chpri) + lwp_changepri(t, pri); + + lwp_unlock(t); + lcnt++; + } + mutex_exit(&p->p_smutex); + if (lcnt != 0) + *retval = lcnt; + else + error = ESRCH; +error: + kmem_free(sp, sizeof(struct sched_param)); + return error; +} + +/* + * Get scheduling parameters. + */ +int +sys__sched_getparam(struct lwp *l, const struct sys__sched_getparam_args *uap, + register_t *retval) +{ + /* { + syscallarg(pid_t) pid; + syscallarg(lwpid_t) lid; + syscallarg(struct sched_param *) params; + } */ + struct sched_param *sp; + struct lwp *t; + int error; + + sp = kmem_zalloc(sizeof(struct sched_param), KM_SLEEP); + + /* Locks the LWP */ + t = lwp_find2(SCARG(uap, pid), SCARG(uap, lid)); + if (t == NULL) { + kmem_free(sp, sizeof(struct sched_param)); + return ESRCH; + } + sp->sched_priority = t->l_priority; + sp->sched_class = t->l_class; + lwp_unlock(t); + + switch (sp->sched_class) { + case SCHED_OTHER: + sp->sched_priority -= PRI_USER; + break; + case SCHED_RR: + case SCHED_FIFO: + sp->sched_priority -= PRI_USER_RT; + break; + } + error = copyout(sp, SCARG(uap, params), sizeof(struct sched_param)); + kmem_free(sp, sizeof(struct sched_param)); + return error; +} + +/* + * Set affinity. + */ +int +sys__sched_setaffinity(struct lwp *l, + const struct sys__sched_setaffinity_args *uap, register_t *retval) +{ + /* { + syscallarg(pid_t) pid; + syscallarg(lwpid_t) lid; + syscallarg(size_t) size; + syscallarg(void *) cpuset; + } */ + cpuset_t *cpuset; + struct cpu_info *ci = NULL; + struct proc *p; + struct lwp *t; + CPU_INFO_ITERATOR cii; + lwpid_t lid; + u_int lcnt; + int error; + + /* Available only for super-user */ + if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER, NULL)) + return EACCES; + + if (SCARG(uap, size) <= 0) + return EINVAL; + + /* Allocate the CPU set, and get it from userspace */ + cpuset = kmem_zalloc(sizeof(cpuset_t), KM_SLEEP); + error = copyin(SCARG(uap, cpuset), cpuset, + min(SCARG(uap, size), sizeof(cpuset_t))); + if (error) + goto error; + + /* Look for a CPU in the set */ + for (CPU_INFO_FOREACH(cii, ci)) + if (CPU_ISSET(cpu_index(ci), cpuset)) + break; + if (ci == NULL) { + /* Empty set */ + kmem_free(cpuset, sizeof(cpuset_t)); + cpuset = NULL; + } + + /* Find the process */ + p = p_find(SCARG(uap, pid), PFIND_UNLOCK_FAIL); + if (p == NULL) { + error = ESRCH; + goto error; + } + mutex_enter(&p->p_smutex); + mutex_exit(&proclist_lock); + + /* Disallow modification of system processes */ + if (p->p_flag & PK_SYSTEM) { + mutex_exit(&p->p_smutex); + error = EACCES; + goto error; + } + + /* Find the LWP(s) */ + lcnt = 0; + lid = SCARG(uap, lid); + LIST_FOREACH(t, &p->p_lwps, l_sibling) { + if (lid && lid != t->l_lid) + continue; + lwp_lock(t); + if (cpuset) { + /* Set the affinity flag and new CPU set */ + t->l_flag |= LW_AFFINITY; + memcpy(&t->l_affinity, cpuset, sizeof(cpuset_t)); + /* Migrate to another CPU, unlocks LWP */ + lwp_migrate(t, ci); + } else { + /* Unset the affinity flag */ + t->l_flag &= ~LW_AFFINITY; + lwp_unlock(t); + } + lcnt++; + } + mutex_exit(&p->p_smutex); + if (lcnt == 0) + error = ESRCH; + else + *retval = lcnt; +error: + if (cpuset != NULL) + kmem_free(cpuset, sizeof(cpuset_t)); + return error; +} + +/* + * Get affinity. + */ +int +sys__sched_getaffinity(struct lwp *l, + const struct sys__sched_getaffinity_args *uap, register_t *retval) +{ + /* { + syscallarg(pid_t) pid; + syscallarg(lwpid_t) lid; + syscallarg(size_t) size; + syscallarg(void *) cpuset; + } */ + struct lwp *t; + void *cpuset; + int error; + + if (SCARG(uap, size) <= 0) + return EINVAL; + + cpuset = kmem_zalloc(sizeof(cpuset_t), KM_SLEEP); + + /* Locks the LWP */ + t = lwp_find2(SCARG(uap, pid), SCARG(uap, lid)); + if (t == NULL) { + kmem_free(cpuset, sizeof(cpuset_t)); + return ESRCH; + } + if (t->l_flag & LW_AFFINITY) + memcpy(cpuset, &t->l_affinity, sizeof(cpuset_t)); + lwp_unlock(t); + + error = copyout(cpuset, SCARG(uap, cpuset), + min(SCARG(uap, size), sizeof(cpuset_t))); + + kmem_free(cpuset, sizeof(cpuset_t)); + return error; +} + +/* + * Yield. + */ int sys_sched_yield(struct lwp *l, const void *v, register_t *retval) { @@ -48,3 +351,47 @@ sys_sched_yield(struct lwp *l, const void *v, register_t *retval) yield(); return 0; } + +/* + * Sysctl nodes and initialization. + */ +SYSCTL_SETUP(sysctl_sched_setup, "sysctl sched setup") +{ + const struct sysctlnode *node = NULL; + + sysctl_createv(clog, 0, NULL, NULL, + CTLFLAG_PERMANENT, + CTLTYPE_NODE, "kern", NULL, + NULL, 0, NULL, 0, + CTL_KERN, CTL_EOL); + sysctl_createv(clog, 0, NULL, NULL, + CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE, + CTLTYPE_INT, "posix_sched", + SYSCTL_DESCR("Version of IEEE Std 1003.1 and its " + "Process Scheduling option to which the " + "system attempts to conform"), + NULL, _POSIX_PRIORITY_SCHEDULING, NULL, 0, + CTL_KERN, CTL_CREATE, CTL_EOL); + sysctl_createv(clog, 0, NULL, &node, + CTLFLAG_PERMANENT, + CTLTYPE_NODE, "sched", + SYSCTL_DESCR("Scheduler options"), + NULL, 0, NULL, 0, + CTL_KERN, CTL_CREATE, CTL_EOL); + + if (node == NULL) + return; + + sysctl_createv(clog, 0, &node, NULL, + CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE, + CTLTYPE_INT, "pri_min", + SYSCTL_DESCR("Minimal POSIX real-time priority"), + NULL, SCHED_PRI_MIN, NULL, 0, + CTL_CREATE, CTL_EOL); + sysctl_createv(clog, 0, &node, NULL, + CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE, + CTLTYPE_INT, "pri_max", + SYSCTL_DESCR("Minimal POSIX real-time priority"), + NULL, SCHED_PRI_MAX, NULL, 0, + CTL_CREATE, CTL_EOL); +} diff --git a/sys/kern/syscalls.master b/sys/kern/syscalls.master index 642e1ec36990..d8c90588cd8a 100644 --- a/sys/kern/syscalls.master +++ b/sys/kern/syscalls.master @@ -1,4 +1,4 @@ - $NetBSD: syscalls.master,v 1.183 2008/01/07 16:15:36 ad Exp $ + $NetBSD: syscalls.master,v 1.184 2008/01/15 03:37:11 rmind Exp $ ; @(#)syscalls.master 8.2 (Berkeley) 1/13/94 @@ -696,17 +696,20 @@ const struct kevent *changelist, size_t nchanges, \ struct kevent *eventlist, size_t nevents, \ const struct timespec *timeout); } -; -; Syscalls 346-353 are reserved for the IEEE Std1003.1b scheduling syscalls -; -346 UNIMPL sys_sched_setparam -347 UNIMPL sys_sched_getparam -348 UNIMPL sys_sched_setscheduler -349 UNIMPL sys_sched_getscheduler + +; Scheduling system calls. +346 STD MPSAFE { int sys__sched_setparam(pid_t pid, lwpid_t lid, \ + const struct sched_param *params); } +347 STD MPSAFE { int sys__sched_getparam(pid_t pid, lwpid_t lid, \ + struct sched_param *params); } +348 STD MPSAFE { int sys__sched_setaffinity(pid_t pid, lwpid_t lid, \ + size_t size, void *cpuset); } +349 STD MPSAFE { int sys__sched_getaffinity(pid_t pid, lwpid_t lid, \ + size_t size, void *cpuset); } 350 STD MPSAFE { int sys_sched_yield(void); } -351 UNIMPL sys_sched_get_priority_max -352 UNIMPL sys_sched_get_priority_min -353 UNIMPL sys_sched_rr_get_interval +351 UNIMPL +352 UNIMPL +353 UNIMPL 354 STD MPSAFE { int sys_fsync_range(int fd, int flags, off_t start, \ off_t length); } @@ -824,3 +827,11 @@ size_t data_len); } 411 STD MPSAFE { void *sys_mremap(void *old_address, size_t old_size, \ void *new_address, size_t new_size, int flags); } + +; Processor-sets system calls +412 STD MPSAFE { int sys_pset_create(psetid_t *psid); } +413 STD MPSAFE { int sys_pset_destroy(psetid_t psid); } +414 STD MPSAFE { int sys_pset_assign(psetid_t psid, cpuid_t cpuid, \ + psetid_t *opsid); } +415 STD MPSAFE { int sys__pset_bind(idtype_t idtype, id_t first_id, \ + id_t second_id, psetid_t psid, psetid_t *opsid); } diff --git a/sys/sys/Makefile b/sys/sys/Makefile index 03bba00ce4e3..8e8b76ba2aba 100644 --- a/sys/sys/Makefile +++ b/sys/sys/Makefile @@ -1,4 +1,4 @@ -# $NetBSD: Makefile,v 1.106 2007/12/31 15:32:14 ad Exp $ +# $NetBSD: Makefile,v 1.107 2008/01/15 03:37:12 rmind Exp $ .include @@ -23,7 +23,7 @@ INCS= acct.h agpio.h aio.h ansi.h ataio.h atomic.h audioio.h \ md5.h midiio.h mman.h mount.h mqueue.h msg.h msgbuf.h mtio.h mutex.h \ namei.h null.h \ param.h pipe.h pmc.h poll.h pool.h power.h proc.h \ - protosw.h ptrace.h queue.h \ + protosw.h pset.h ptrace.h queue.h \ ras.h reboot.h radioio.h resource.h resourcevar.h rmd160.h rnd.h rwlock.h \ scanio.h sched.h scsiio.h select.h selinfo.h sem.h sha1.h sha2.h \ shm.h siginfo.h signal.h signalvar.h sigtypes.h simplelock.h \ diff --git a/sys/sys/lwp.h b/sys/sys/lwp.h index 0ca4e8da07f3..ea9bceff97f9 100644 --- a/sys/sys/lwp.h +++ b/sys/sys/lwp.h @@ -1,4 +1,4 @@ -/* $NetBSD: lwp.h,v 1.76 2008/01/07 16:12:56 ad Exp $ */ +/* $NetBSD: lwp.h,v 1.77 2008/01/15 03:37:12 rmind Exp $ */ /*- * Copyright (c) 2001, 2006, 2007 The NetBSD Foundation, Inc. @@ -44,6 +44,7 @@ #include #include #include +#include #include #include #include @@ -98,9 +99,12 @@ struct lwp { int l_cpticks; /* t: Ticks of CPU time */ fixpt_t l_pctcpu; /* t: %cpu during l_swtime */ fixpt_t l_estcpu; /* l: cpu time for SCHED_4BSD */ + psetid_t l_psid; /* l: assigned processor-set ID */ + struct cpu_info *l_target_cpu; /* l: target CPU to migrate */ kmutex_t l_swaplock; /* l: lock to prevent swapping */ struct lwpctl *l_lwpctl; /* p: lwpctl block kernel address */ struct lcpage *l_lcpage; /* p: lwpctl containing page */ + cpuset_t l_affinity; /* l: CPU set for affinity */ /* Synchronisation */ struct turnstile *l_ts; /* l: current turnstile */ @@ -191,6 +195,7 @@ extern lwp_t lwp0; /* LWP for proc0 */ #define LW_WSUSPEND 0x00020000 /* Suspend before return to user */ #define LW_WCORE 0x00080000 /* Stop for core dump on return to user */ #define LW_WEXIT 0x00100000 /* Exit before return to user */ +#define LW_AFFINITY 0x00200000 /* Affinity is assigned to the thread */ #define LW_PENDSIG 0x01000000 /* Pending signal for us */ #define LW_CANCELLED 0x02000000 /* tsleep should not sleep */ #define LW_WUSERRET 0x04000000 /* Call proc::p_userret on return to user */ @@ -270,7 +275,9 @@ lwp_t *proc_representative_lwp(struct proc *, int *, int); int lwp_suspend(lwp_t *, lwp_t *); int lwp_create1(lwp_t *, const void *, size_t, u_long, lwpid_t *); void lwp_update_creds(lwp_t *); -lwp_t *lwp_find(struct proc *, int); +void lwp_migrate(lwp_t *, struct cpu_info *); +lwp_t *lwp_find2(pid_t, lwpid_t); +lwp_t *lwp_find(proc_t *, int); void lwp_userret(lwp_t *); void lwp_need_userret(lwp_t *); void lwp_free(lwp_t *, bool, bool); @@ -382,6 +389,39 @@ spc_unlock(struct cpu_info *ci) mutex_spin_exit(ci->ci_schedstate.spc_mutex); } +static inline void +spc_dlock(struct cpu_info *ci1, struct cpu_info *ci2) +{ + struct schedstate_percpu *spc1 = &ci1->ci_schedstate; + struct schedstate_percpu *spc2 = &ci2->ci_schedstate; + + KASSERT(ci1 != ci2); + if (spc1->spc_mutex == spc2->spc_mutex) { + mutex_spin_enter(spc1->spc_mutex); + } else if (ci1 < ci2) { + mutex_spin_enter(spc1->spc_mutex); + mutex_spin_enter(spc2->spc_mutex); + } else { + mutex_spin_enter(spc2->spc_mutex); + mutex_spin_enter(spc1->spc_mutex); + } +} + +static inline void +spc_dunlock(struct cpu_info *ci1, struct cpu_info *ci2) +{ + struct schedstate_percpu *spc1 = &ci1->ci_schedstate; + struct schedstate_percpu *spc2 = &ci2->ci_schedstate; + + KASSERT(ci1 != ci2); + if (spc1->spc_mutex == spc2->spc_mutex) { + mutex_spin_exit(spc1->spc_mutex); + } else { + mutex_spin_exit(spc1->spc_mutex); + mutex_spin_exit(spc2->spc_mutex); + } +} + #endif /* _KERNEL */ /* Flags for _lwp_create(), as per Solaris. */ diff --git a/sys/sys/param.h b/sys/sys/param.h index dcdca56fd747..920f039e495d 100644 --- a/sys/sys/param.h +++ b/sys/sys/param.h @@ -1,4 +1,4 @@ -/* $NetBSD: param.h,v 1.299 2008/01/09 16:16:27 ad Exp $ */ +/* $NetBSD: param.h,v 1.300 2008/01/15 03:37:12 rmind Exp $ */ /*- * Copyright (c) 1982, 1986, 1989, 1993 @@ -244,6 +244,10 @@ #define NPRI_USER 64 #define MAXPRI_USER (PRI_USER + NPRI_USER - 1) +/* Priority range used by POSIX real-time features */ +#define SCHED_PRI_MIN 0 +#define SCHED_PRI_MAX 63 + /* * Kernel thread priorities. */ diff --git a/sys/sys/pset.h b/sys/sys/pset.h new file mode 100644 index 000000000000..7f07b1471879 --- /dev/null +++ b/sys/sys/pset.h @@ -0,0 +1,99 @@ +/* $NetBSD: pset.h,v 1.1 2008/01/15 03:41:50 rmind Exp $ */ + +/* + * Copyright (c) 2008, Mindaugas Rasiukevicius + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _SYS_PSET_H_ +#define _SYS_PSET_H_ + +#include +#include + +/* Types of processor-sets */ +#define PS_NONE 0 +#define PS_MYID -1 +#define PS_QUERY -2 + +/* ID types for processor-set calls */ +#define P_MYID -1 +#define P_PID 1 +#define P_LWPID 2 + +/* For compatibility only */ +typedef cpuid_t processorid_t; + +__BEGIN_DECLS +int pset_assign(psetid_t, cpuid_t, psetid_t *); +int pset_bind(psetid_t, idtype_t, id_t, psetid_t *); +int pset_create(psetid_t *); +int pset_destroy(psetid_t); +__END_DECLS + +/* Size of the CPU set bitmap */ +#define CPUSET_SHIFT 5 +#define CPUSET_MASK 31 +#if MAXCPUS > 32 +#define CPUSET_SIZE (MAXCPUS >> CPUSET_SHIFT) +#else +#define CPUSET_SIZE 1 +#endif + +/* Bitmap of the CPUs */ +typedef struct { + uint32_t bits[CPUSET_SIZE]; +} cpuset_t; + +#define CPU_ZERO(c) \ + (memset(c, 0, sizeof(cpuset_t))) + +#define CPU_ISSET(i, c) \ + ((1 << (i & CPUSET_MASK)) & (c)->bits[i >> CPUSET_SHIFT]) + +#define CPU_SET(i, c) \ + ((c)->bits[i >> CPUSET_SHIFT] |= 1 << (i & CPUSET_MASK)) + +#define CPU_CLR(i, c) \ + ((c)->bits[i >> CPUSET_SHIFT] &= ~(1 << (i & CPUSET_MASK))) + +#ifdef _NETBSD_SOURCE +int _pset_bind(idtype_t, id_t, id_t, psetid_t, psetid_t *); +#endif /* _NETBSD_SOURCE */ + +#ifdef _KERNEL + +/* Processor-set structure */ +typedef struct { + int ps_flags; +} pset_info_t; + +/* Flags */ +#define PSET_BUSY 0x01 + +void psets_init(void); + +#endif /* _KERNEL */ + +#endif /* _SYS_PSET_H_ */ diff --git a/sys/sys/sched.h b/sys/sys/sched.h index 26041b9fd3b5..9fff1196df53 100644 --- a/sys/sys/sched.h +++ b/sys/sys/sched.h @@ -1,4 +1,4 @@ -/* $NetBSD: sched.h,v 1.44 2007/12/22 01:14:53 yamt Exp $ */ +/* $NetBSD: sched.h,v 1.45 2008/01/15 03:37:12 rmind Exp $ */ /*- * Copyright (c) 1999, 2000, 2001, 2002, 2007 The NetBSD Foundation, Inc. @@ -77,6 +77,7 @@ #define _SYS_SCHED_H_ #include +#include #if defined(_KERNEL_OPT) #include "opt_multiprocessor.h" @@ -84,20 +85,27 @@ #endif struct sched_param { + int sched_class; int sched_priority; + int _reserved1; + int _reserved2; }; /* * Scheduling policies required by IEEE Std 1003.1-2001 */ -#define SCHED_OTHER 0 /* Behavior can be FIFO or RR, or not */ +#define SCHED_NONE -1 +#define SCHED_OTHER 0 #define SCHED_FIFO 1 #define SCHED_RR 2 -/* Other nonstandard policies: */ - #if defined(_NETBSD_SOURCE) +int _sched_getaffinity(pid_t, lwpid_t, size_t, void *); +int _sched_setaffinity(pid_t, lwpid_t, size_t, void *); +int _sched_getparam(pid_t, lwpid_t, struct sched_param *); +int _sched_setparam(pid_t, lwpid_t, const struct sched_param *); + /* * CPU states. * XXX Not really scheduler state, but no other good place to put @@ -128,6 +136,7 @@ struct schedstate_percpu { kmutex_t *spc_mutex; /* (: lock on below, runnable LWPs */ kmutex_t spc_lwplock; /* (: general purpose lock for LWPs */ pri_t spc_curpriority;/* m: usrpri of curlwp */ + psetid_t spc_psid; /* (: processor-set ID */ time_t spc_lastmod; /* c: time of last cpu state change */ /* For the most part, this set of data is CPU-private. */ @@ -163,7 +172,7 @@ struct schedstate_percpu { #define CLONE_VFORK 0x00004000 /* parent blocks until child exits */ -#endif /* !_POSIX_SOURCE && !_XOPEN_SOURCE && !_ANSI_SOURCE */ +#endif /* _NETBSD_SOURCE */ #ifdef _KERNEL diff --git a/sys/sys/types.h b/sys/sys/types.h index 401ca72be514..bf3efae7db8c 100644 --- a/sys/sys/types.h +++ b/sys/sys/types.h @@ -1,4 +1,4 @@ -/* $NetBSD: types.h,v 1.77 2007/09/07 18:56:13 rmind Exp $ */ +/* $NetBSD: types.h,v 1.78 2008/01/15 03:37:12 rmind Exp $ */ /*- * Copyright (c) 1982, 1986, 1991, 1993, 1994 @@ -164,6 +164,7 @@ typedef __gid_t gid_t; /* group id */ #define gid_t __gid_t #endif +typedef int idtype_t; /* type of the id */ typedef uint32_t id_t; /* group id, process id or user id */ typedef uint64_t ino_t; /* inode number */ typedef long key_t; /* IPC key (for Sys V IPC) */ @@ -198,6 +199,8 @@ typedef int32_t dtime_t; /* on-disk time_t */ typedef int mqd_t; +typedef int psetid_t; + #if defined(_KERNEL) || defined(_STANDALONE) /* * Boolean type definitions for the kernel environment. User-space diff --git a/sys/sys/unistd.h b/sys/sys/unistd.h index cee41fdca0cb..da9691ba6c15 100644 --- a/sys/sys/unistd.h +++ b/sys/sys/unistd.h @@ -1,4 +1,4 @@ -/* $NetBSD: unistd.h,v 1.39 2007/10/15 14:12:55 ad Exp $ */ +/* $NetBSD: unistd.h,v 1.40 2008/01/15 03:37:12 rmind Exp $ */ /* * Copyright (c) 1989, 1993 @@ -87,6 +87,8 @@ #define _POSIX_MESSAGE_PASSING 200112L /* monotonic clock */ #define _POSIX_MONOTONIC_CLOCK 200112L + /* priority scheduling */ +#define _POSIX_PRIORITY_SCHEDULING 200112L /* threads */ #define _POSIX_THREADS 200112L /* semaphores */ @@ -206,11 +208,16 @@ #define _SC_MESSAGE_PASSING 53 #define _SC_MQ_OPEN_MAX 54 #define _SC_MQ_PRIO_MAX 55 +#define _SC_PRIORITY_SCHEDULING 56 #ifdef _NETBSD_SOURCE /* Commonly provided sysconf() extensions */ #define _SC_NPROCESSORS_CONF 1001 #define _SC_NPROCESSORS_ONLN 1002 +/* Native variables */ +#define _SC_SCHED_RT_TS 2001 +#define _SC_SCHED_PRI_MIN 2002 +#define _SC_SCHED_PRI_MAX 2003 #endif /* _NETBSD_SOURCE */ /* configurable system strings */ diff --git a/usr.bin/getconf/getconf.c b/usr.bin/getconf/getconf.c index e6895aa344fb..565e716a228a 100644 --- a/usr.bin/getconf/getconf.c +++ b/usr.bin/getconf/getconf.c @@ -1,4 +1,4 @@ -/* $NetBSD: getconf.c,v 1.30 2007/12/15 19:44:50 perry Exp $ */ +/* $NetBSD: getconf.c,v 1.31 2008/01/15 03:37:12 rmind Exp $ */ /*- * Copyright (c) 1996, 1998 The NetBSD Foundation, Inc. @@ -38,7 +38,7 @@ #include #ifndef lint -__RCSID("$NetBSD: getconf.c,v 1.30 2007/12/15 19:44:50 perry Exp $"); +__RCSID("$NetBSD: getconf.c,v 1.31 2008/01/15 03:37:12 rmind Exp $"); #endif /* not lint */ #include @@ -152,6 +152,7 @@ static const struct conf_variable conf_table[] = { "_POSIX_MEMORY_PROTECTION", SYSCONF, _SC_MEMORY_PROTECTION }, { "_POSIX_MESSAGE_PASSING", SYSCONF, _POSIX_MESSAGE_PASSING }, { "_POSIX_MONOTONIC_CLOCK", SYSCONF, _SC_MONOTONIC_CLOCK }, + { "_POSIX_PRIORITY_SCHEDULING", SYSCONF, _SC_PRIORITY_SCHEDULING }, { "_POSIX_SEMAPHORES", SYSCONF, _SC_SEMAPHORES }, { "_POSIX_SYNCHRONIZED_IO", SYSCONF, _SC_SYNCHRONIZED_IO }, { "_POSIX_TIMERS", SYSCONF, _SC_TIMERS }, diff --git a/usr.sbin/Makefile b/usr.sbin/Makefile index c261ff880a88..a70acecbdd52 100644 --- a/usr.sbin/Makefile +++ b/usr.sbin/Makefile @@ -1,4 +1,4 @@ -# $NetBSD: Makefile,v 1.229 2007/08/04 11:03:03 ad Exp $ +# $NetBSD: Makefile,v 1.230 2008/01/15 03:37:15 rmind Exp $ # from: @(#)Makefile 5.20 (Berkeley) 6/12/93 .include @@ -19,7 +19,7 @@ SUBDIR= ac accton acpitools altq amd apm apmd arp bad144 bind bootp \ rarpd rbootd rdate \ repquota rmt rpc.bootparamd rpc.lockd rpc.pcnfsd \ rpc.statd rpcbind rwhod sa screenblank sdpd services_mkdb sesd \ - sliplogin sntp \ + schedctl sliplogin sntp \ spray srtconfig sti sunlabel sup syslogd tadpolectl tcpdchk \ tcpdmatch tcpdump tcpdrop timed tpctl traceroute trpt unlink \ usbdevs user videomode vipw veriexecgen vnconfig wiconfig wlanctl wpa \ diff --git a/usr.sbin/schedctl/Makefile b/usr.sbin/schedctl/Makefile new file mode 100644 index 000000000000..a57e907ee838 --- /dev/null +++ b/usr.sbin/schedctl/Makefile @@ -0,0 +1,10 @@ +# $NetBSD: Makefile,v 1.1 2008/01/15 03:37:15 rmind Exp $ + +PROG= schedctl +MAN= + +LDADD+= -lrt -lkvm + +WARNS= 4 + +.include diff --git a/usr.sbin/schedctl/schedctl.c b/usr.sbin/schedctl/schedctl.c new file mode 100644 index 000000000000..ec27ac207cc9 --- /dev/null +++ b/usr.sbin/schedctl/schedctl.c @@ -0,0 +1,298 @@ +/* $NetBSD: schedctl.c,v 1.1 2008/01/15 03:37:15 rmind Exp $ */ + +/* + * Copyright (c) 2008, Mindaugas Rasiukevicius + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * schedctl(8) - a program to control scheduling of processes and threads. + */ + +#include + +#ifndef lint +__RCSID("$NetBSD: schedctl.c,v 1.1 2008/01/15 03:37:15 rmind Exp $"); +#endif + +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +static const char *class_str[] = { + "SCHED_OTHER", + "SCHED_FIFO", + "SCHED_RR" +}; + +static void sched_set(pid_t, lwpid_t, struct sched_param *, cpuset_t *); +static void thread_info(pid_t, lwpid_t); +static cpuset_t *makecpuset(char *); +static char *showcpuset(cpuset_t *); +static void usage(void); + +int +main(int argc, char **argv) +{ + kvm_t *kd; + struct kinfo_lwp *lwp_list, *lwp; + struct sched_param *sp; + cpuset_t *cpuset; + int i, count, ch; + pid_t pid; + lwpid_t lid; + bool set; + + pid = lid = 0; + cpuset = NULL; + set = false; + + sp = malloc(sizeof(struct sched_param)); + if (sp == NULL) + err(EXIT_FAILURE, "malloc"); + + memset(sp, 0, sizeof(struct sched_param)); + sp->sched_class = SCHED_NONE; + sp->sched_priority = PRI_NONE; + + while ((ch = getopt(argc, argv, "A:C:P:p:t:")) != -1) { + switch (ch) { + case 'p': + /* PID */ + pid = atoi(optarg); + break; + case 't': + /* Thread (LWP) ID */ + lid = atoi(optarg); + break; + case 'A': + /* Affinity */ + cpuset = makecpuset(optarg); + if (cpuset == NULL) { + fprintf(stderr, "%s: invalid CPU value\n", + getprogname()); + exit(EXIT_FAILURE); + } + break; + case 'C': + /* Scheduling class */ + sp->sched_class = atoi(optarg); + if (sp->sched_class < SCHED_OTHER || + sp->sched_class > SCHED_RR) { + fprintf(stderr, + "%s: invalid scheduling class\n", + getprogname()); + exit(EXIT_FAILURE); + } + set = true; + break; + case 'P': + /* Priority */ + sp->sched_priority = atoi(optarg); + if (sp->sched_priority < sysconf(_SC_SCHED_PRI_MIN) || + sp->sched_priority > sysconf(_SC_SCHED_PRI_MAX)) { + fprintf(stderr, "%s: invalid priority\n", + getprogname()); + exit(EXIT_FAILURE); + } + set = true; + break; + default: + usage(); + } + } + + /* At least PID must be specified */ + if (pid == 0) + usage(); + + /* Set the scheduling information for thread/process */ + sched_set(pid, lid, set ? sp : NULL, cpuset); + + /* Show information about each thread */ + kd = kvm_open(NULL, NULL, NULL, KVM_NO_FILES, "kvm_open"); + if (kd == NULL) + err(EXIT_FAILURE, "kvm_open"); + lwp_list = kvm_getlwps(kd, pid, 0, sizeof(struct kinfo_lwp), &count); + if (lwp_list == NULL) + err(EXIT_FAILURE, "kvm_getlwps"); + for (lwp = lwp_list, i = 0; i < count; lwp++, i++) { + if (lid && lid != lwp->l_lid) + continue; + thread_info(pid, lwp->l_lid); + } + kvm_close(kd); + + free(sp); + free(cpuset); + return 0; +} + +static void +sched_set(pid_t pid, lwpid_t lid, struct sched_param *sp, cpuset_t *cpuset) +{ + int error; + + if (sp) { + /* Set the scheduling parameters for the thread */ + error = _sched_setparam(pid, lid, sp); + if (error < 0) + err(EXIT_FAILURE, "_sched_setparam"); + } + if (cpuset) { + /* Set the CPU-set for affinity */ + error = _sched_setaffinity(pid, lid, + sizeof(cpuset_t), cpuset); + if (error < 0) + err(EXIT_FAILURE, "_sched_setaffinity"); + } +} + +static void +thread_info(pid_t pid, lwpid_t lid) +{ + struct sched_param sp; + cpuset_t *cpuset; + char *cpus; + int error; + + cpuset = malloc(sizeof(cpuset_t)); + if (cpuset == NULL) + err(EXIT_FAILURE, "malloc"); + + error = _sched_getparam(pid, lid, &sp); + if (error < 0) + err(EXIT_FAILURE, "_sched_getparam"); + + error = _sched_getaffinity(pid, lid, sizeof(cpuset_t), cpuset); + if (error < 0) + err(EXIT_FAILURE, "_sched_getaffinity"); + + printf(" LID: %d\n", lid); + printf(" Priority: %d\n", sp.sched_priority); + printf(" Class: %s\n", class_str[sp.sched_class]); + + cpus = showcpuset(cpuset); + printf(" Affinity (CPUs): %s\n", cpus); + free(cpus); + + free(cpuset); +} + +static cpuset_t * +makecpuset(char *str) +{ + cpuset_t *cpuset; + char *cpustr, *s; + + if (str == NULL) + return NULL; + + cpuset = malloc(sizeof(cpuset_t)); + if (cpuset == NULL) + err(EXIT_FAILURE, "malloc"); + memset(cpuset, 0, sizeof(cpuset_t)); + + cpustr = strdup(str); + if (cpustr == NULL) + err(EXIT_FAILURE, "strdup"); + s = cpustr; + + while (s != NULL) { + char *p; + int i; + + /* Get the CPU number and validate the range */ + p = strsep(&s, ","); + if (p == NULL) { + free(cpuset); + cpuset = NULL; + break; + } + i = atoi(p); + if (i == -1) { + memset(cpuset, 0, sizeof(cpuset_t)); + break; + } + if ((unsigned int)i > MAXCPUS) { + free(cpuset); + cpuset = NULL; + break; + } + + /* Set the bit */ + CPU_SET(i, cpuset); + } + + free(cpustr); + return cpuset; +} + +static char * +showcpuset(cpuset_t *cpuset) +{ + char *buf; + size_t size; + int i; + + size = 3 * MAXCPUS; /* XXX */ + buf = malloc(size + 1); + if (cpuset == NULL) + err(EXIT_FAILURE, "malloc"); + memset(buf, '\0', size + 1); + + for (i = 0; i < MAXCPUS; i++) + if (CPU_ISSET(i, cpuset)) + snprintf(buf, size, "%s%d,", buf, i); + + i = strlen(buf); + if (i != 0) { + buf[i - 1] = '\0'; + } else { + strncpy(buf, "", size); + } + + return buf; +} + +static void +usage(void) +{ + const char *progname = getprogname(); + + fprintf(stderr, "usage: %s -p pid [ -t lid ] [ -A processor ]\n" + "\t [ -C class ] [ -P priority ]\n", progname); + exit(EXIT_FAILURE); +}