Improve per-CPU support for the workqueue(9):
- Make structures CPU-cache friendly, as suggested and explained by Andrew Doran. CACHE_LINE_SIZE definition is invented. - Use current CPU if NULL is passed to the workqueue_enqueue(). - Implemented MI CPU index, which could be used as an index of array. Removed linked-lists usage for work queues. The roundup2() function avoids division, but works only with power of 2. Reviewed by: <ad>, <yamt>, <tech-kern>
This commit is contained in:
parent
f6bcdcfe0c
commit
c8c024369c
@ -1,4 +1,4 @@
|
||||
/* $NetBSD: fw_port.h,v 1.23 2007/07/09 21:00:41 ad Exp $ */
|
||||
/* $NetBSD: fw_port.h,v 1.24 2007/08/05 01:19:17 rmind Exp $ */
|
||||
/*
|
||||
* Copyright (c) 2004 KIYOHARA Takashi
|
||||
* All rights reserved.
|
||||
@ -1091,7 +1091,6 @@ typedef struct scsipi_inquiry_data sbp_scsi_inquiry_data;
|
||||
#define splfwsbp() splbio()
|
||||
#define splsoftvm() splbio()
|
||||
|
||||
#define roundup2(x, y) roundup((x), (y))
|
||||
#ifndef rounddown
|
||||
#define rounddown(x, y) ((x) / (y) * (y))
|
||||
#endif
|
||||
|
@ -1,4 +1,4 @@
|
||||
/* $NetBSD: kern_cpu.c,v 1.4 2007/08/04 11:57:54 ad Exp $ */
|
||||
/* $NetBSD: kern_cpu.c,v 1.5 2007/08/05 01:19:17 rmind Exp $ */
|
||||
|
||||
/*-
|
||||
* Copyright (c) 2007 The NetBSD Foundation, Inc.
|
||||
@ -64,7 +64,7 @@
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
|
||||
__KERNEL_RCSID(0, "$NetBSD: kern_cpu.c,v 1.4 2007/08/04 11:57:54 ad Exp $");
|
||||
__KERNEL_RCSID(0, "$NetBSD: kern_cpu.c,v 1.5 2007/08/05 01:19:17 rmind Exp $");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/systm.h>
|
||||
@ -95,6 +95,8 @@ mi_cpu_attach(struct cpu_info *ci)
|
||||
struct schedstate_percpu *spc = &ci->ci_schedstate;
|
||||
int error;
|
||||
|
||||
ci->ci_index = ncpu;
|
||||
|
||||
mutex_init(&spc->spc_lwplock, MUTEX_SPIN, IPL_SCHED);
|
||||
sched_cpuattach(ci);
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
/* $NetBSD: subr_workqueue.c,v 1.17 2007/07/20 12:43:26 yamt Exp $ */
|
||||
/* $NetBSD: subr_workqueue.c,v 1.18 2007/08/05 01:19:17 rmind Exp $ */
|
||||
|
||||
/*-
|
||||
* Copyright (c)2002, 2005 YAMAMOTO Takashi,
|
||||
@ -27,9 +27,10 @@
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__KERNEL_RCSID(0, "$NetBSD: subr_workqueue.c,v 1.17 2007/07/20 12:43:26 yamt Exp $");
|
||||
__KERNEL_RCSID(0, "$NetBSD: subr_workqueue.c,v 1.18 2007/08/05 01:19:17 rmind Exp $");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/cpu.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/kthread.h>
|
||||
#include <sys/kmem.h>
|
||||
@ -50,31 +51,40 @@ struct workqueue_queue {
|
||||
kcondvar_t q_cv;
|
||||
struct workqhead q_queue;
|
||||
struct lwp *q_worker;
|
||||
struct cpu_info *q_ci;
|
||||
SLIST_ENTRY(workqueue_queue) q_list;
|
||||
};
|
||||
|
||||
struct workqueue {
|
||||
SLIST_HEAD(, workqueue_queue) wq_queue;
|
||||
void (*wq_func)(struct work *, void *);
|
||||
void *wq_arg;
|
||||
const char *wq_name;
|
||||
pri_t wq_prio;
|
||||
int wq_flags;
|
||||
void *wq_ptr;
|
||||
ipl_cookie_t wq_ipl;
|
||||
};
|
||||
|
||||
#ifdef MULTIPROCESSOR
|
||||
#define CPU_ALIGN_SIZE CACHE_LINE_SIZE
|
||||
#else
|
||||
#define CPU_ALIGN_SIZE (ALIGNBYTES + 1)
|
||||
#endif
|
||||
|
||||
#define WQ_SIZE (roundup2(sizeof(struct workqueue), CPU_ALIGN_SIZE))
|
||||
#define WQ_QUEUE_SIZE (roundup2(sizeof(struct workqueue_queue), CPU_ALIGN_SIZE))
|
||||
|
||||
#define POISON 0xaabbccdd
|
||||
|
||||
static struct workqueue_queue *
|
||||
workqueue_queue_lookup(struct workqueue *wq, struct cpu_info *ci)
|
||||
{
|
||||
struct workqueue_queue *q;
|
||||
u_int idx = 0;
|
||||
|
||||
SLIST_FOREACH(q, &wq->wq_queue, q_list)
|
||||
if (q->q_ci == ci)
|
||||
return q;
|
||||
if (wq->wq_flags & WQ_PERCPU) {
|
||||
idx = ci ? cpu_index(ci) : cpu_index(curcpu());
|
||||
}
|
||||
|
||||
return SLIST_FIRST(&wq->wq_queue);
|
||||
return (void *)((intptr_t)(wq) + WQ_SIZE + (idx * WQ_QUEUE_SIZE));
|
||||
}
|
||||
|
||||
static void
|
||||
@ -100,7 +110,6 @@ workqueue_run(struct workqueue *wq)
|
||||
|
||||
/* find the workqueue of this kthread */
|
||||
q = workqueue_queue_lookup(wq, curlwp->l_cpu);
|
||||
KASSERT(q != NULL);
|
||||
|
||||
for (;;) {
|
||||
struct workqhead tmp;
|
||||
@ -150,33 +159,26 @@ workqueue_init(struct workqueue *wq, const char *name,
|
||||
wq->wq_name = name;
|
||||
wq->wq_func = callback_func;
|
||||
wq->wq_arg = callback_arg;
|
||||
SLIST_INIT(&wq->wq_queue);
|
||||
}
|
||||
|
||||
static int
|
||||
workqueue_initqueue(struct workqueue *wq, int ipl,
|
||||
int flags, struct cpu_info *ci)
|
||||
workqueue_initqueue(struct workqueue *wq, struct workqueue_queue *q,
|
||||
int ipl, struct cpu_info *ci)
|
||||
{
|
||||
struct workqueue_queue *q;
|
||||
int error, ktf;
|
||||
cpuid_t cpuid;
|
||||
|
||||
#ifdef MULTIPROCESSOR
|
||||
cpuid = ci->ci_cpuid;
|
||||
#else
|
||||
cpuid = 0;
|
||||
#endif
|
||||
|
||||
q = kmem_alloc(sizeof(struct workqueue_queue), KM_SLEEP);
|
||||
SLIST_INSERT_HEAD(&wq->wq_queue, q, q_list);
|
||||
q->q_ci = ci;
|
||||
|
||||
mutex_init(&q->q_mutex, MUTEX_DRIVER, ipl);
|
||||
cv_init(&q->q_cv, wq->wq_name);
|
||||
q->q_worker = NULL;
|
||||
SIMPLEQ_INIT(&q->q_queue);
|
||||
ktf = ((flags & WQ_MPSAFE) != 0 ? KTHREAD_MPSAFE : 0);
|
||||
error = kthread_create(wq->wq_prio, ktf, ci, workqueue_worker,
|
||||
wq, &q->q_worker, "%s/%d", wq->wq_name, (int)cpuid);
|
||||
ktf = ((wq->wq_flags & WQ_MPSAFE) != 0 ? KTHREAD_MPSAFE : 0);
|
||||
if (ci) {
|
||||
error = kthread_create(wq->wq_prio, ktf, ci, workqueue_worker,
|
||||
wq, &q->q_worker, "%s/%lu", wq->wq_name, cpu_index(ci));
|
||||
} else {
|
||||
error = kthread_create(wq->wq_prio, ktf, ci, workqueue_worker,
|
||||
wq, &q->q_worker, "%s", wq->wq_name);
|
||||
}
|
||||
|
||||
return error;
|
||||
}
|
||||
@ -223,7 +225,6 @@ workqueue_finiqueue(struct workqueue *wq, struct workqueue_queue *q)
|
||||
mutex_exit(&q->q_mutex);
|
||||
mutex_destroy(&q->q_mutex);
|
||||
cv_destroy(&q->q_cv);
|
||||
kmem_free(q, sizeof(struct workqueue_queue));
|
||||
}
|
||||
|
||||
/* --- */
|
||||
@ -234,42 +235,61 @@ workqueue_create(struct workqueue **wqp, const char *name,
|
||||
pri_t prio, int ipl, int flags)
|
||||
{
|
||||
struct workqueue *wq;
|
||||
int error = 0;
|
||||
struct workqueue_queue *q;
|
||||
void *ptr;
|
||||
int i, error = 0;
|
||||
size_t size;
|
||||
|
||||
KASSERT(sizeof(work_impl_t) <= sizeof(struct work));
|
||||
|
||||
wq = kmem_alloc(sizeof(*wq), KM_SLEEP);
|
||||
i = (flags & WQ_PERCPU) ? ncpu : 1;
|
||||
if (ncpu == 1) {
|
||||
flags &= ~WQ_PERCPU;
|
||||
}
|
||||
|
||||
size = WQ_SIZE + (i * WQ_QUEUE_SIZE) + CPU_ALIGN_SIZE;
|
||||
ptr = kmem_alloc(size, KM_SLEEP);
|
||||
|
||||
wq = (void *)roundup2((intptr_t)ptr, CPU_ALIGN_SIZE);
|
||||
wq->wq_ptr = ptr;
|
||||
wq->wq_flags = flags;
|
||||
q = (void *)((intptr_t)(wq) + WQ_SIZE);
|
||||
|
||||
workqueue_init(wq, name, callback_func, callback_arg, prio, ipl);
|
||||
i = 0;
|
||||
|
||||
#ifdef MULTIPROCESSOR
|
||||
if (flags & WQ_PERCPU) {
|
||||
#ifdef MULTIPROCESSOR
|
||||
struct cpu_info *ci;
|
||||
CPU_INFO_ITERATOR cii;
|
||||
|
||||
/* create the work-queue for each CPU */
|
||||
for (CPU_INFO_FOREACH(cii, ci)) {
|
||||
error = workqueue_initqueue(wq, ipl, flags, ci);
|
||||
if (error)
|
||||
error = workqueue_initqueue(wq, q, ipl, ci);
|
||||
if (error) {
|
||||
break;
|
||||
}
|
||||
q = (void *)((intptr_t)(q) + WQ_QUEUE_SIZE);
|
||||
i++;
|
||||
}
|
||||
if (error)
|
||||
workqueue_destroy(wq);
|
||||
|
||||
#endif
|
||||
} else {
|
||||
error = workqueue_initqueue(wq, ipl, flags, curcpu());
|
||||
if (error) {
|
||||
kmem_free(wq, sizeof(*wq));
|
||||
return error;
|
||||
}
|
||||
/* initialize a work-queue */
|
||||
error = workqueue_initqueue(wq, q, ipl, NULL);
|
||||
}
|
||||
#else
|
||||
error = workqueue_initqueue(wq, ipl, flags, curcpu());
|
||||
|
||||
if (error) {
|
||||
kmem_free(wq, sizeof(*wq));
|
||||
/*
|
||||
* workqueue_finiqueue() should be
|
||||
* called for the failing one too.
|
||||
*/
|
||||
do {
|
||||
workqueue_finiqueue(wq, q);
|
||||
q = (void *)((intptr_t)(q) - WQ_QUEUE_SIZE);
|
||||
} while(i--);
|
||||
kmem_free(ptr, size);
|
||||
return error;
|
||||
}
|
||||
#endif
|
||||
|
||||
*wqp = wq;
|
||||
return 0;
|
||||
@ -279,12 +299,25 @@ void
|
||||
workqueue_destroy(struct workqueue *wq)
|
||||
{
|
||||
struct workqueue_queue *q;
|
||||
u_int i = 1;
|
||||
|
||||
while ((q = SLIST_FIRST(&wq->wq_queue)) != NULL) {
|
||||
SLIST_REMOVE_HEAD(&wq->wq_queue, q_list);
|
||||
if (wq->wq_flags & WQ_PERCPU) {
|
||||
#ifdef MULTIPROCESSOR
|
||||
struct cpu_info *ci;
|
||||
CPU_INFO_ITERATOR cii;
|
||||
|
||||
for (CPU_INFO_FOREACH(cii, ci)) {
|
||||
q = workqueue_queue_lookup(wq, ci);
|
||||
workqueue_finiqueue(wq, q);
|
||||
}
|
||||
i = ncpu;
|
||||
#endif
|
||||
} else {
|
||||
q = workqueue_queue_lookup(wq, NULL);
|
||||
workqueue_finiqueue(wq, q);
|
||||
}
|
||||
kmem_free(wq, sizeof(*wq));
|
||||
|
||||
kmem_free(wq->wq_ptr, WQ_SIZE + (i * WQ_QUEUE_SIZE) + CPU_ALIGN_SIZE);
|
||||
}
|
||||
|
||||
void
|
||||
@ -293,8 +326,8 @@ workqueue_enqueue(struct workqueue *wq, struct work *wk0, struct cpu_info *ci)
|
||||
struct workqueue_queue *q;
|
||||
work_impl_t *wk = (void *)wk0;
|
||||
|
||||
KASSERT(wq->wq_flags & WQ_PERCPU || ci == NULL);
|
||||
q = workqueue_queue_lookup(wq, ci);
|
||||
KASSERT(q != NULL);
|
||||
|
||||
mutex_enter(&q->q_mutex);
|
||||
SIMPLEQ_INSERT_TAIL(&q->q_queue, wk, wk_entry);
|
||||
|
@ -1,4 +1,4 @@
|
||||
/* $NetBSD: cpu.h,v 1.9 2007/08/04 11:03:02 ad Exp $ */
|
||||
/* $NetBSD: cpu.h,v 1.10 2007/08/05 01:19:17 rmind Exp $ */
|
||||
|
||||
/*-
|
||||
* Copyright (c) 2007 YAMAMOTO Takashi,
|
||||
@ -65,4 +65,10 @@ int cpu_setonline(struct cpu_info *, bool);
|
||||
|
||||
extern kmutex_t cpu_lock;
|
||||
|
||||
static inline cpuid_t
|
||||
cpu_index(struct cpu_info *ci)
|
||||
{
|
||||
return ci->ci_index;
|
||||
}
|
||||
|
||||
#endif /* !_SYS_CPU_H_ */
|
||||
|
@ -1,4 +1,4 @@
|
||||
/* $NetBSD: cpu_data.h,v 1.9 2007/07/09 21:11:32 ad Exp $ */
|
||||
/* $NetBSD: cpu_data.h,v 1.10 2007/08/05 01:19:17 rmind Exp $ */
|
||||
|
||||
/*-
|
||||
* Copyright (c) 2004, 2006, 2007 The NetBSD Foundation, Inc.
|
||||
@ -60,6 +60,7 @@ struct lwp;
|
||||
struct cpu_data {
|
||||
struct schedstate_percpu cpu_schedstate; /* scheduler state */
|
||||
struct lwp *cpu_idlelwp; /* idle lwp */
|
||||
cpuid_t cpu_index; /* CPU index */
|
||||
|
||||
u_int cpu_biglock_count;
|
||||
struct lwp *cpu_biglock_wanted;
|
||||
@ -80,6 +81,7 @@ struct cpu_data {
|
||||
|
||||
/* compat definitions */
|
||||
#define ci_schedstate ci_data.cpu_schedstate
|
||||
#define ci_index ci_data.cpu_index
|
||||
#define ci_biglock_count ci_data.cpu_biglock_count
|
||||
#define ci_biglock_wanted ci_data.cpu_biglock_wanted
|
||||
#define ci_spin_locks ci_data.cpu_spin_locks
|
||||
|
@ -1,4 +1,4 @@
|
||||
/* $NetBSD: param.h,v 1.269 2007/07/31 21:18:20 pooka Exp $ */
|
||||
/* $NetBSD: param.h,v 1.270 2007/08/05 01:19:17 rmind Exp $ */
|
||||
|
||||
/*-
|
||||
* Copyright (c) 1982, 1986, 1989, 1993
|
||||
@ -157,6 +157,13 @@
|
||||
#define dbtob(x) ((x) << DEV_BSHIFT)
|
||||
#define btodb(x) ((x) >> DEV_BSHIFT)
|
||||
|
||||
/*
|
||||
* CPU cache values
|
||||
*/
|
||||
#ifndef CACHE_LINE_SIZE
|
||||
#define CACHE_LINE_SIZE 64
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Stack macros. On most architectures, the stack grows down,
|
||||
* towards lower addresses; it is the rare architecture where
|
||||
@ -260,7 +267,8 @@
|
||||
#define howmany(x, y) (((x)+((y)-1))/(y))
|
||||
#endif
|
||||
#define roundup(x, y) ((((x)+((y)-1))/(y))*(y))
|
||||
#define rounddown(x,y) (((x)/(y))*(y))
|
||||
#define rounddown(x,y) (((x)/(y))*(y))
|
||||
#define roundup2(x, m) (((x) + m - 1) & ~(m - 1))
|
||||
#define powerof2(x) ((((x)-1)&(x))==0)
|
||||
|
||||
/* Macros for min/max. */
|
||||
|
Loading…
Reference in New Issue
Block a user