- Add some more failsafes to the CPU topology stuff, and build a 3rd

circular list of peer CPUs in other packages, so we might scroll through
  them in the scheduler when looking to distribute or steal jobs.

- Fold the run queue data structure into spc_schedstate.  Makes kern_runq.c
  a far more pleasant place to work.

- Remove the code in sched_nextlwp() that tries to steal jobs from other
  CPUs.  It's not needed, because we do the very same thing in the idle LWP
  anyway.  Outside the VM system this was one of the the main causes of L3
  cache misses I saw during builds.  On my machine, this change yields a
  60%-70% drop in time on the "hackbench" benchmark (there's clearly a bit
  more going on here, but basically being less aggressive helps).
This commit is contained in:
ad 2019-12-03 22:28:41 +00:00
parent ab935ef629
commit dece39714a
4 changed files with 229 additions and 211 deletions

View File

@ -1,4 +1,4 @@
/* $NetBSD: kern_cpu.c,v 1.79 2019/12/02 23:22:43 ad Exp $ */
/* $NetBSD: kern_cpu.c,v 1.80 2019/12/03 22:28:41 ad Exp $ */
/*-
* Copyright (c) 2007, 2008, 2009, 2010, 2012, 2019 The NetBSD Foundation, Inc.
@ -56,7 +56,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: kern_cpu.c,v 1.79 2019/12/02 23:22:43 ad Exp $");
__KERNEL_RCSID(0, "$NetBSD: kern_cpu.c,v 1.80 2019/12/03 22:28:41 ad Exp $");
#include "opt_cpu_ucode.h"
@ -595,35 +595,118 @@ cpu_softintr_p(void)
void
cpu_topology_set(struct cpu_info *ci, int package_id, int core_id, int smt_id)
{
enum cpu_rel rel;
cpu_topology_present = true;
ci->ci_package_id = package_id;
ci->ci_core_id = core_id;
ci->ci_smt_id = smt_id;
ci->ci_package_cpus = ci;
ci->ci_npackage_cpus = 1;
ci->ci_core_cpus = ci;
ci->ci_ncore_cpus = 1;
for (rel = 0; rel < __arraycount(ci->ci_sibling); rel++) {
ci->ci_sibling[rel] = ci;
ci->ci_nsibling[rel] = 1;
}
}
/*
* Link a CPU into the given circular list.
*/
static void
cpu_topology_link(struct cpu_info *ci, struct cpu_info *ci2, enum cpu_rel rel)
{
struct cpu_info *ci3;
/* Walk to the end of the existing circular list and append. */
for (ci3 = ci2;; ci3 = ci3->ci_sibling[rel]) {
ci3->ci_nsibling[rel]++;
if (ci3->ci_sibling[rel] == ci2) {
break;
}
}
ci->ci_sibling[rel] = ci2;
ci3->ci_sibling[rel] = ci;
ci->ci_nsibling[rel] = ci3->ci_nsibling[rel];
}
/*
* Find peer CPUs in other packages.
*/
static void
cpu_topology_peers(void)
{
CPU_INFO_ITERATOR cii, cii2;
struct cpu_info *ci, *ci2;
for (CPU_INFO_FOREACH(cii, ci)) {
if (ci->ci_nsibling[CPUREL_PEER] > 1) {
/* Already linked. */
continue;
}
for (CPU_INFO_FOREACH(cii2, ci2)) {
if (ci != ci2 &&
ci->ci_package_id != ci2->ci_package_id &&
ci->ci_core_id == ci2->ci_core_id &&
ci->ci_smt_id == ci2->ci_smt_id) {
cpu_topology_link(ci, ci2, CPUREL_PEER);
break;
}
}
}
}
/*
* Print out the toplogy lists.
*/
static void
cpu_topology_print(void)
{
#ifdef DEBUG
CPU_INFO_ITERATOR cii;
struct cpu_info *ci, *ci2;
const char *names[] = { "core", "package", "peer" };
enum cpu_rel rel;
int i;
for (CPU_INFO_FOREACH(cii, ci)) {
for (rel = 0; rel < __arraycount(ci->ci_sibling); rel++) {
printf("%s has %dx %s siblings: ", cpu_name(ci),
ci->ci_nsibling[rel], names[rel]);
ci2 = ci->ci_sibling[rel];
i = 0;
do {
printf(" %s", cpu_name(ci2));
ci2 = ci2->ci_sibling[rel];
} while (++i < 64 && ci2 != ci->ci_sibling[rel]);
if (i == 64) {
printf(" GAVE UP");
}
printf("\n");
}
}
#endif /* DEBUG */
}
/*
* Fake up toplogy info if we have none, or if what we got was bogus.
* Don't override ci_package_id, etc, if cpu_topology_present is set.
* MD code also uses these.
*/
static void
cpu_topology_fake(void)
{
CPU_INFO_ITERATOR cii;
struct cpu_info *ci;
enum cpu_rel rel;
for (CPU_INFO_FOREACH(cii, ci)) {
ci->ci_package_id = cpu_index(ci);
ci->ci_core_id = 0;
ci->ci_smt_id = 0;
ci->ci_ncore_cpus = 1;
ci->ci_core_cpus = ci;
ci->ci_package_cpus = ci;
ci->ci_npackage_cpus = 1;
for (rel = 0; rel < __arraycount(ci->ci_sibling); rel++) {
ci->ci_sibling[rel] = ci;
ci->ci_nsibling[rel] = 1;
}
if (!cpu_topology_present) {
ci->ci_package_id = cpu_index(ci);
}
}
cpu_topology_print();
}
/*
@ -634,20 +717,16 @@ void
cpu_topology_init(void)
{
CPU_INFO_ITERATOR cii, cii2;
struct cpu_info *ci, *ci2, *ci3;
struct cpu_info *ci, *ci2;
int ncore, npackage, npeer;
bool symmetric;
if (!cpu_topology_present) {
cpu_topology_fake();
return;
}
for (CPU_INFO_FOREACH(cii, ci)) {
ci->ci_ncore_cpus = 1;
ci->ci_core_cpus = ci;
ci->ci_package_cpus = ci;
ci->ci_npackage_cpus = 1;
}
/* Find siblings in same core and package. */
for (CPU_INFO_FOREACH(cii, ci)) {
for (CPU_INFO_FOREACH(cii2, ci2)) {
/* Avoid bad things happening. */
@ -664,39 +743,42 @@ cpu_topology_init(void)
ci2->ci_package_id != ci->ci_package_id) {
continue;
}
/*
* Find CPUs in the same core. Walk to the end of
* the existing circular list and append.
*/
if (ci->ci_ncore_cpus == 1 &&
/* Find CPUs in the same core. */
if (ci->ci_nsibling[CPUREL_CORE] == 1 &&
ci->ci_core_id == ci2->ci_core_id) {
for (ci3 = ci2;; ci3 = ci3->ci_core_cpus) {
ci3->ci_ncore_cpus++;
if (ci3->ci_core_cpus == ci2) {
break;
}
}
ci->ci_core_cpus = ci2;
ci3->ci_core_cpus = ci;
ci->ci_ncore_cpus = ci3->ci_ncore_cpus;
cpu_topology_link(ci, ci2, CPUREL_CORE);
}
/* Same, but for package. */
if (ci->ci_npackage_cpus == 1) {
for (ci3 = ci2;; ci3 = ci3->ci_package_cpus) {
ci3->ci_npackage_cpus++;
if (ci3->ci_package_cpus == ci2) {
break;
}
}
ci->ci_package_cpus = ci2;
ci3->ci_package_cpus = ci;
ci->ci_npackage_cpus = ci3->ci_npackage_cpus;
/* Find CPUs in the same package. */
if (ci->ci_nsibling[CPUREL_PACKAGE] == 1) {
cpu_topology_link(ci, ci2, CPUREL_PACKAGE);
}
if (ci->ci_ncore_cpus > 1 && ci->ci_npackage_cpus > 1) {
if (ci->ci_nsibling[CPUREL_CORE] > 1 &&
ci->ci_nsibling[CPUREL_PACKAGE] > 1) {
break;
}
}
}
/* Find peers in other packages. */
cpu_topology_peers();
/* Determine whether the topology is bogus/symmetric. */
npackage = curcpu()->ci_nsibling[CPUREL_PACKAGE];
ncore = curcpu()->ci_nsibling[CPUREL_CORE];
npeer = curcpu()->ci_nsibling[CPUREL_PEER];
symmetric = true;
for (CPU_INFO_FOREACH(cii, ci)) {
if (npackage != ci->ci_nsibling[CPUREL_PACKAGE] ||
ncore != ci->ci_nsibling[CPUREL_CORE] ||
npeer != ci->ci_nsibling[CPUREL_PEER]) {
symmetric = false;
}
}
cpu_topology_print();
if (symmetric == false) {
printf("cpu_topology_init: not symmetric, faking it\n");
cpu_topology_fake();
}
}
#ifdef CPU_UCODE

View File

@ -1,4 +1,4 @@
/* $NetBSD: kern_runq.c,v 1.52 2019/12/01 15:34:46 ad Exp $ */
/* $NetBSD: kern_runq.c,v 1.53 2019/12/03 22:28:41 ad Exp $ */
/*-
* Copyright (c) 2019 The NetBSD Foundation, Inc.
@ -56,7 +56,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: kern_runq.c,v 1.52 2019/12/01 15:34:46 ad Exp $");
__KERNEL_RCSID(0, "$NetBSD: kern_runq.c,v 1.53 2019/12/03 22:28:41 ad Exp $");
#include "opt_dtrace.h"
@ -78,15 +78,6 @@ __KERNEL_RCSID(0, "$NetBSD: kern_runq.c,v 1.52 2019/12/01 15:34:46 ad Exp $");
#include <sys/evcnt.h>
#include <sys/atomic.h>
/*
* Priority related definitions.
*/
#define PRI_TS_COUNT (NPRI_USER)
#define PRI_RT_COUNT (PRI_COUNT - PRI_TS_COUNT)
#define PRI_HTS_RANGE (PRI_TS_COUNT / 10)
#define PRI_HIGHEST_TS (MAXPRI_USER)
/*
* Bits per map.
*/
@ -95,34 +86,9 @@ __KERNEL_RCSID(0, "$NetBSD: kern_runq.c,v 1.52 2019/12/01 15:34:46 ad Exp $");
#define BITMAP_MSB (0x80000000U)
#define BITMAP_MASK (BITMAP_BITS - 1)
/*
* Structures, runqueue.
*/
const int schedppq = 1;
typedef struct {
TAILQ_HEAD(, lwp) q_head;
} queue_t;
typedef struct {
/* Bitmap */
uint32_t r_bitmap[PRI_COUNT >> BITMAP_SHIFT];
/* Counters */
u_int r_count; /* Count of the threads */
u_int r_avgcount; /* Average count of threads (* 256) */
u_int r_mcount; /* Count of migratable threads */
/* Runqueues */
queue_t r_rt_queue[PRI_RT_COUNT];
queue_t r_ts_queue[PRI_TS_COUNT];
/* Event counters */
struct evcnt r_ev_pull;
struct evcnt r_ev_push;
struct evcnt r_ev_stay;
struct evcnt r_ev_localize;
} runqueue_t;
static void * sched_getrq(runqueue_t *, const pri_t);
static void *sched_getrq(struct schedstate_percpu *, const pri_t);
#ifdef MULTIPROCESSOR
static lwp_t * sched_catchlwp(struct cpu_info *);
static void sched_balance(void *);
@ -182,45 +148,43 @@ runq_init(void)
void
sched_cpuattach(struct cpu_info *ci)
{
runqueue_t *ci_rq;
void *rq_ptr;
u_int i, size;
struct schedstate_percpu *spc;
size_t size;
void *p;
u_int i;
if (ci->ci_schedstate.spc_lwplock == NULL) {
ci->ci_schedstate.spc_lwplock =
mutex_obj_alloc(MUTEX_DEFAULT, IPL_SCHED);
spc = &ci->ci_schedstate;
if (spc->spc_lwplock == NULL) {
spc->spc_lwplock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_SCHED);
}
if (ci == lwp0.l_cpu) {
/* Initialize the scheduler structure of the primary LWP */
lwp0.l_mutex = ci->ci_schedstate.spc_lwplock;
lwp0.l_mutex = spc->spc_lwplock;
}
if (ci->ci_schedstate.spc_mutex != NULL) {
if (spc->spc_mutex != NULL) {
/* Already initialized. */
return;
}
/* Allocate the run queue */
size = roundup2(sizeof(runqueue_t), coherency_unit) + coherency_unit;
rq_ptr = kmem_zalloc(size, KM_SLEEP);
ci_rq = (void *)(roundup2((uintptr_t)(rq_ptr), coherency_unit));
size = roundup2(sizeof(spc->spc_queue[0]) * PRI_COUNT, coherency_unit) +
coherency_unit;
p = kmem_alloc(size, KM_SLEEP);
spc->spc_queue = (void *)roundup2((uintptr_t)p, coherency_unit);
/* Initialize run queues */
ci->ci_schedstate.spc_mutex =
mutex_obj_alloc(MUTEX_DEFAULT, IPL_SCHED);
for (i = 0; i < PRI_RT_COUNT; i++)
TAILQ_INIT(&ci_rq->r_rt_queue[i].q_head);
for (i = 0; i < PRI_TS_COUNT; i++)
TAILQ_INIT(&ci_rq->r_ts_queue[i].q_head);
spc->spc_mutex = mutex_obj_alloc(MUTEX_DEFAULT, IPL_SCHED);
for (i = 0; i < PRI_COUNT; i++)
TAILQ_INIT(&spc->spc_queue[i]);
ci->ci_schedstate.spc_sched_info = ci_rq;
evcnt_attach_dynamic(&ci_rq->r_ev_pull, EVCNT_TYPE_MISC, NULL,
evcnt_attach_dynamic(&spc->spc_ev_pull, EVCNT_TYPE_MISC, NULL,
cpu_name(ci), "runqueue pull");
evcnt_attach_dynamic(&ci_rq->r_ev_push, EVCNT_TYPE_MISC, NULL,
evcnt_attach_dynamic(&spc->spc_ev_push, EVCNT_TYPE_MISC, NULL,
cpu_name(ci), "runqueue push");
evcnt_attach_dynamic(&ci_rq->r_ev_stay, EVCNT_TYPE_MISC, NULL,
evcnt_attach_dynamic(&spc->spc_ev_stay, EVCNT_TYPE_MISC, NULL,
cpu_name(ci), "runqueue stay");
evcnt_attach_dynamic(&ci_rq->r_ev_localize, EVCNT_TYPE_MISC, NULL,
evcnt_attach_dynamic(&spc->spc_ev_localize, EVCNT_TYPE_MISC, NULL,
cpu_name(ci), "runqueue localize");
}
@ -229,13 +193,11 @@ sched_cpuattach(struct cpu_info *ci)
*/
static inline void *
sched_getrq(runqueue_t *ci_rq, const pri_t prio)
sched_getrq(struct schedstate_percpu *spc, const pri_t prio)
{
KASSERT(prio < PRI_COUNT);
return (prio <= PRI_HIGHEST_TS) ?
&ci_rq->r_ts_queue[prio].q_head :
&ci_rq->r_rt_queue[prio - PRI_HIGHEST_TS - 1].q_head;
return &spc->spc_queue[prio];
}
/*
@ -245,7 +207,6 @@ sched_getrq(runqueue_t *ci_rq, const pri_t prio)
void
sched_enqueue(struct lwp *l)
{
runqueue_t *ci_rq;
struct schedstate_percpu *spc;
TAILQ_HEAD(, lwp) *q_head;
const pri_t eprio = lwp_eprio(l);
@ -253,11 +214,10 @@ sched_enqueue(struct lwp *l)
ci = l->l_cpu;
spc = &ci->ci_schedstate;
ci_rq = spc->spc_sched_info;
KASSERT(lwp_locked(l, l->l_cpu->ci_schedstate.spc_mutex));
/* Enqueue the thread */
q_head = sched_getrq(ci_rq, eprio);
q_head = sched_getrq(spc, eprio);
if (TAILQ_EMPTY(q_head)) {
u_int i;
uint32_t q;
@ -265,8 +225,8 @@ sched_enqueue(struct lwp *l)
/* Mark bit */
i = eprio >> BITMAP_SHIFT;
q = BITMAP_MSB >> (eprio & BITMAP_MASK);
KASSERT((ci_rq->r_bitmap[i] & q) == 0);
ci_rq->r_bitmap[i] |= q;
KASSERT((spc->spc_bitmap[i] & q) == 0);
spc->spc_bitmap[i] |= q;
}
/* Preempted SCHED_RR and SCHED_FIFO LWPs go to the queue head. */
if (l->l_class != SCHED_OTHER && (l->l_pflag & LP_PREEMPTING) != 0) {
@ -274,9 +234,9 @@ sched_enqueue(struct lwp *l)
} else {
TAILQ_INSERT_TAIL(q_head, l, l_runq);
}
ci_rq->r_count++;
spc->spc_count++;
if ((l->l_pflag & LP_BOUND) == 0)
ci_rq->r_mcount++;
spc->spc_mcount++;
/*
* Update the value of highest priority in the runqueue,
@ -295,27 +255,25 @@ sched_enqueue(struct lwp *l)
void
sched_dequeue(struct lwp *l)
{
runqueue_t *ci_rq;
TAILQ_HEAD(, lwp) *q_head;
struct schedstate_percpu *spc;
const pri_t eprio = lwp_eprio(l);
spc = & l->l_cpu->ci_schedstate;
ci_rq = spc->spc_sched_info;
KASSERT(lwp_locked(l, spc->spc_mutex));
spc = &l->l_cpu->ci_schedstate;
KASSERT(lwp_locked(l, spc->spc_mutex));
KASSERT(eprio <= spc->spc_maxpriority);
KASSERT(ci_rq->r_bitmap[eprio >> BITMAP_SHIFT] != 0);
KASSERT(ci_rq->r_count > 0);
KASSERT(spc->spc_bitmap[eprio >> BITMAP_SHIFT] != 0);
KASSERT(spc->spc_count > 0);
if (spc->spc_migrating == l)
spc->spc_migrating = NULL;
ci_rq->r_count--;
spc->spc_count--;
if ((l->l_pflag & LP_BOUND) == 0)
ci_rq->r_mcount--;
spc->spc_mcount--;
q_head = sched_getrq(ci_rq, eprio);
q_head = sched_getrq(spc, eprio);
TAILQ_REMOVE(q_head, l, l_runq);
if (TAILQ_EMPTY(q_head)) {
u_int i;
@ -324,8 +282,8 @@ sched_dequeue(struct lwp *l)
/* Unmark bit */
i = eprio >> BITMAP_SHIFT;
q = BITMAP_MSB >> (eprio & BITMAP_MASK);
KASSERT((ci_rq->r_bitmap[i] & q) != 0);
ci_rq->r_bitmap[i] &= ~q;
KASSERT((spc->spc_bitmap[i] & q) != 0);
spc->spc_bitmap[i] &= ~q;
/*
* Update the value of highest priority in the runqueue, in a
@ -335,8 +293,8 @@ sched_dequeue(struct lwp *l)
return;
do {
if (ci_rq->r_bitmap[i] != 0) {
q = ffs(ci_rq->r_bitmap[i]);
if (spc->spc_bitmap[i] != 0) {
q = ffs(spc->spc_bitmap[i]);
spc->spc_maxpriority =
(i << BITMAP_SHIFT) + (BITMAP_BITS - q);
return;
@ -502,8 +460,7 @@ struct cpu_info *
sched_takecpu(struct lwp *l)
{
struct cpu_info *ci, *tci, *pivot, *next;
struct schedstate_percpu *spc;
runqueue_t *ci_rq, *ici_rq;
struct schedstate_percpu *spc, *ici_spc;
pri_t eprio, lpri, pri;
KASSERT(lwp_locked(l, NULL));
@ -514,14 +471,13 @@ sched_takecpu(struct lwp *l)
return ci;
spc = &ci->ci_schedstate;
ci_rq = spc->spc_sched_info;
eprio = lwp_eprio(l);
/* Make sure that thread is in appropriate processor-set */
if (__predict_true(spc->spc_psid == l->l_psid)) {
/* If CPU of this thread is idling - run there */
if (ci_rq->r_count == 0) {
ci_rq->r_ev_stay.ev_count++;
if (spc->spc_count == 0) {
spc->spc_ev_stay.ev_count++;
return ci;
}
/*
@ -532,12 +488,12 @@ sched_takecpu(struct lwp *l)
* chance of reusing the VM context from the parent.
*/
if (l->l_stat == LSIDL) {
ci_rq->r_ev_stay.ev_count++;
spc->spc_ev_stay.ev_count++;
return ci;
}
/* Stay if thread is cache-hot */
if (lwp_cache_hot(l) && eprio >= spc->spc_curpriority) {
ci_rq->r_ev_stay.ev_count++;
spc->spc_ev_stay.ev_count++;
return ci;
}
}
@ -546,8 +502,8 @@ sched_takecpu(struct lwp *l)
ci = curcpu();
spc = &ci->ci_schedstate;
if (eprio > spc->spc_curpriority && sched_migratable(l, ci)) {
ci_rq = spc->spc_sched_info;
ci_rq->r_ev_localize.ev_count++;
/* XXXAD foreign CPU not locked */
spc->spc_ev_localize.ev_count++;
return ci;
}
@ -564,13 +520,12 @@ sched_takecpu(struct lwp *l)
/* Reached the end, start from the beginning. */
next = cpu_lookup(0);
}
spc = &ci->ci_schedstate;
ici_rq = spc->spc_sched_info;
pri = MAX(spc->spc_curpriority, spc->spc_maxpriority);
ici_spc = &ci->ci_schedstate;
pri = MAX(ici_spc->spc_curpriority, ici_spc->spc_maxpriority);
if (pri > lpri)
continue;
if (pri == lpri && ci_rq->r_count < ici_rq->r_count)
if (pri == lpri && spc->spc_count < ici_spc->spc_count)
continue;
if (!sched_migratable(l, ci))
@ -578,11 +533,11 @@ sched_takecpu(struct lwp *l)
lpri = pri;
tci = ci;
ci_rq = ici_rq;
spc = ici_spc;
} while (ci = next, ci != pivot);
ci_rq = tci->ci_schedstate.spc_sched_info;
ci_rq->r_ev_push.ev_count++;
/* XXXAD remote CPU, unlocked */
tci->ci_schedstate.spc_ev_push.ev_count++;
return tci;
}
@ -596,21 +551,19 @@ sched_catchlwp(struct cpu_info *ci)
struct cpu_info *curci = curcpu();
struct schedstate_percpu *spc, *curspc;
TAILQ_HEAD(, lwp) *q_head;
runqueue_t *ci_rq;
struct lwp *l;
curspc = &curci->ci_schedstate;
spc = &ci->ci_schedstate;
KASSERT(curspc->spc_psid == spc->spc_psid);
ci_rq = spc->spc_sched_info;
if (ci_rq->r_mcount < min_catch) {
if (spc->spc_mcount < min_catch) {
spc_unlock(ci);
return NULL;
}
/* Take the highest priority thread */
q_head = sched_getrq(ci_rq, spc->spc_maxpriority);
q_head = sched_getrq(spc, spc->spc_maxpriority);
l = TAILQ_FIRST(q_head);
for (;;) {
@ -643,7 +596,7 @@ sched_catchlwp(struct cpu_info *ci)
SPINLOCK_BACKOFF(count);
}
l->l_cpu = curci;
ci_rq->r_ev_pull.ev_count++;
spc->spc_ev_pull.ev_count++;
lwp_unlock_to(l, curspc->spc_mutex);
sched_enqueue(l);
return l;
@ -660,7 +613,7 @@ static void
sched_balance(void *nocallout)
{
struct cpu_info *ci, *hci;
runqueue_t *ci_rq;
struct schedstate_percpu *spc;
CPU_INFO_ITERATOR cii;
u_int highest;
u_int weight;
@ -673,7 +626,7 @@ sched_balance(void *nocallout)
/* Make lockless countings */
for (CPU_INFO_FOREACH(cii, ci)) {
ci_rq = ci->ci_schedstate.spc_sched_info;
spc = &ci->ci_schedstate;
/*
* Average count of the threads
@ -681,14 +634,14 @@ sched_balance(void *nocallout)
* The average is computed as a fixpoint number with
* 8 fractional bits.
*/
ci_rq->r_avgcount = (
weight * ci_rq->r_avgcount + (100 - weight) * 256 * ci_rq->r_mcount
spc->spc_avgcount = (
weight * spc->spc_avgcount + (100 - weight) * 256 * spc->spc_mcount
) / 100;
/* Look for CPU with the highest average */
if (ci_rq->r_avgcount > highest) {
if (spc->spc_avgcount > highest) {
hci = ci;
highest = ci_rq->r_avgcount;
highest = spc->spc_avgcount;
}
}
@ -707,7 +660,6 @@ sched_idle(void)
{
struct cpu_info *ci = curcpu(), *tci = NULL;
struct schedstate_percpu *spc, *tspc;
runqueue_t *ci_rq, *tci_rq;
bool dlock = false;
/* Check if there is a migrating LWP */
@ -782,21 +734,19 @@ sched_idle(void)
spc_unlock(ci);
no_migration:
ci_rq = spc->spc_sched_info;
if ((spc->spc_flags & SPCF_OFFLINE) != 0 || ci_rq->r_count != 0) {
if ((spc->spc_flags & SPCF_OFFLINE) != 0 || spc->spc_count != 0) {
return;
}
/* Reset the counter, and call the balancer */
ci_rq->r_avgcount = 0;
spc->spc_avgcount = 0;
sched_balance(ci);
tci = worker_ci;
tspc = &tci->ci_schedstate;
if (ci == tci || spc->spc_psid != tspc->spc_psid)
return;
/* Don't hit the locks unless there's something to do. */
tci_rq = tci->ci_schedstate.spc_sched_info;
if (tci_rq->r_mcount >= min_catch) {
if (tspc->spc_mcount >= min_catch) {
spc_dlock(ci, tci);
(void)sched_catchlwp(tci);
spc_unlock(ci);
@ -888,7 +838,6 @@ sched_nextlwp(void)
struct cpu_info *ci = curcpu();
struct schedstate_percpu *spc;
TAILQ_HEAD(, lwp) *q_head;
runqueue_t *ci_rq;
struct lwp *l;
/* Update the last run time on switch */
@ -899,36 +848,14 @@ sched_nextlwp(void)
spc = &ci->ci_schedstate;
if (__predict_false(spc->spc_migrating != NULL))
return NULL;
ci_rq = spc->spc_sched_info;
#ifdef MULTIPROCESSOR
/* If runqueue is empty, try to catch some thread from other CPU */
if (__predict_false(ci_rq->r_count == 0)) {
struct schedstate_percpu *cspc;
struct cpu_info *cci;
/* Offline CPUs should not perform this, however */
if (__predict_false(spc->spc_flags & SPCF_OFFLINE))
return NULL;
/* Reset the counter, and call the balancer */
ci_rq->r_avgcount = 0;
sched_balance(ci);
cci = worker_ci;
cspc = &cci->ci_schedstate;
if (ci == cci || spc->spc_psid != cspc->spc_psid ||
!mutex_tryenter(cci->ci_schedstate.spc_mutex))
return NULL;
return sched_catchlwp(cci);
}
#else
if (__predict_false(ci_rq->r_count == 0))
/* Return to idle LWP if there is no runnable job */
if (__predict_false(spc->spc_count == 0))
return NULL;
#endif
/* Take the highest priority thread */
KASSERT(ci_rq->r_bitmap[spc->spc_maxpriority >> BITMAP_SHIFT]);
q_head = sched_getrq(ci_rq, spc->spc_maxpriority);
KASSERT(spc->spc_bitmap[spc->spc_maxpriority >> BITMAP_SHIFT]);
q_head = sched_getrq(spc, spc->spc_maxpriority);
l = TAILQ_FIRST(q_head);
KASSERT(l != NULL);
@ -947,13 +874,11 @@ sched_curcpu_runnable_p(void)
{
const struct cpu_info *ci;
const struct schedstate_percpu *spc;
const runqueue_t *ci_rq;
bool rv;
kpreempt_disable();
ci = curcpu();
spc = &ci->ci_schedstate;
ci_rq = spc->spc_sched_info;
#ifndef __HAVE_FAST_SOFTINTS
if (ci->ci_data.cpu_softints) {
@ -962,7 +887,7 @@ sched_curcpu_runnable_p(void)
}
#endif
rv = (ci_rq->r_count != 0) ? true : false;
rv = (spc->spc_count != 0) ? true : false;
kpreempt_enable();
return rv;
@ -1033,7 +958,6 @@ SYSCTL_SETUP(sysctl_sched_setup, "sysctl sched setup")
void
sched_print_runqueue(void (*pr)(const char *, ...))
{
runqueue_t *ci_rq;
struct cpu_info *ci, *tci;
struct schedstate_percpu *spc;
struct lwp *l;
@ -1044,7 +968,6 @@ sched_print_runqueue(void (*pr)(const char *, ...))
int i;
spc = &ci->ci_schedstate;
ci_rq = spc->spc_sched_info;
(*pr)("Run-queue (CPU = %u):\n", ci->ci_index);
(*pr)(" pid.lid = %d.%d, r_count = %u, r_avgcount = %u, "
@ -1054,12 +977,12 @@ sched_print_runqueue(void (*pr)(const char *, ...))
#else
curlwp->l_proc->p_pid, curlwp->l_lid,
#endif
ci_rq->r_count, ci_rq->r_avgcount, spc->spc_maxpriority,
spc->spc_count, spc->spc_avgcount, spc->spc_maxpriority,
spc->spc_migrating);
i = (PRI_COUNT >> BITMAP_SHIFT) - 1;
do {
uint32_t q;
q = ci_rq->r_bitmap[i];
q = spc->spc_bitmap[i];
(*pr)(" bitmap[%d] => [ %d (0x%x) ]\n", i, ffs(q), q);
} while (i--);
}

View File

@ -1,4 +1,4 @@
/* $NetBSD: cpu_data.h,v 1.42 2019/12/03 05:07:49 riastradh Exp $ */
/* $NetBSD: cpu_data.h,v 1.43 2019/12/03 22:28:41 ad Exp $ */
/*-
* Copyright (c) 2004, 2006, 2007, 2008, 2019 The NetBSD Foundation, Inc.
@ -59,6 +59,13 @@ struct lwp;
struct lockdebug;
enum cpu_rel {
CPUREL_CORE, /* CPUs in the same core */
CPUREL_PACKAGE, /* CPUs in the same package */
CPUREL_PEER, /* peer CPUs in other packages */
CPUREL_COUNT
};
struct cpu_data {
/*
* The first section is likely to be touched by other CPUs -
@ -76,10 +83,8 @@ struct cpu_data {
cpuid_t cpu_package_id;
cpuid_t cpu_core_id;
cpuid_t cpu_smt_id;
u_int cpu_npackage_cpus;
u_int cpu_ncore_cpus;
struct cpu_info *cpu_package_cpus; /* sibling CPUs in package */
struct cpu_info *cpu_core_cpus; /* sibling CPUs in core */
u_int cpu_nsibling[CPUREL_COUNT];
struct cpu_info *cpu_sibling[CPUREL_COUNT];
/*
* This section is mostly CPU-private.
@ -133,10 +138,8 @@ struct cpu_data {
#define ci_package_id ci_data.cpu_package_id
#define ci_core_id ci_data.cpu_core_id
#define ci_smt_id ci_data.cpu_smt_id
#define ci_npackage_cpus ci_data.cpu_npackage_cpus
#define ci_ncore_cpus ci_data.cpu_ncore_cpus
#define ci_package_cpus ci_data.cpu_package_cpus
#define ci_core_cpus ci_data.cpu_core_cpus
#define ci_nsibling ci_data.cpu_nsibling
#define ci_sibling ci_data.cpu_sibling
void mi_cpu_init(void);
int mi_cpu_attach(struct cpu_info *);

View File

@ -1,4 +1,4 @@
/* $NetBSD: sched.h,v 1.78 2019/11/30 17:46:27 ad Exp $ */
/* $NetBSD: sched.h,v 1.79 2019/12/03 22:28:41 ad Exp $ */
/*-
* Copyright (c) 1999, 2000, 2001, 2002, 2007, 2008, 2019
@ -144,6 +144,7 @@ __END_DECLS
#include <sys/mutex.h>
#include <sys/time.h>
#include <sys/evcnt.h>
/*
* Per-CPU scheduler state. Field markings and the corresponding locks:
@ -157,17 +158,26 @@ struct schedstate_percpu {
kmutex_t *spc_mutex; /* (: lock on below, runnable LWPs */
kmutex_t *spc_lwplock; /* (: general purpose lock for LWPs */
struct lwp *spc_migrating; /* (: migrating LWP */
volatile pri_t spc_curpriority;/* m: usrpri of curlwp */
pri_t spc_maxpriority;/* m: highest priority queued */
psetid_t spc_psid; /* c: processor-set ID */
time_t spc_lastmod; /* c: time of last cpu state change */
void *spc_sched_info;/* (: scheduler-specific structure */
volatile int spc_flags; /* s: flags; see below */
u_int spc_schedticks; /* s: ticks for schedclock() */
uint64_t spc_cp_time[CPUSTATES];/* s: CPU state statistics */
int spc_ticks; /* s: ticks until sched_tick() */
int spc_pscnt; /* s: prof/stat counter */
int spc_psdiv; /* s: prof/stat divisor */
/* Run queue */
volatile pri_t spc_curpriority;/* s: usrpri of curlwp */
pri_t spc_maxpriority;/* m: highest priority queued */
u_int spc_count; /* m: count of the threads */
u_int spc_avgcount; /* m: average count of threads (* 256) */
u_int spc_mcount; /* m: count of migratable threads */
uint32_t spc_bitmap[8]; /* m: bitmap of active queues */
TAILQ_HEAD(,lwp) *spc_queue; /* m: queue for each priority */
struct evcnt spc_ev_pull; /* m: event counters */
struct evcnt spc_ev_push;
struct evcnt spc_ev_stay;
struct evcnt spc_ev_localize;
};
/* spc_flags */