1168 lines
26 KiB
C
1168 lines
26 KiB
C
/* $NetBSD: tprof.c,v 1.23 2023/04/11 10:07:12 msaitoh Exp $ */
|
|
|
|
/*-
|
|
* Copyright (c)2008,2009,2010 YAMAMOTO Takashi,
|
|
* All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
* SUCH DAMAGE.
|
|
*/
|
|
|
|
#include <sys/cdefs.h>
|
|
__KERNEL_RCSID(0, "$NetBSD: tprof.c,v 1.23 2023/04/11 10:07:12 msaitoh Exp $");
|
|
|
|
#include <sys/param.h>
|
|
#include <sys/systm.h>
|
|
#include <sys/kernel.h>
|
|
|
|
#include <sys/callout.h>
|
|
#include <sys/conf.h>
|
|
#include <sys/cpu.h>
|
|
#include <sys/kmem.h>
|
|
#include <sys/module.h>
|
|
#include <sys/percpu.h>
|
|
#include <sys/poll.h>
|
|
#include <sys/proc.h>
|
|
#include <sys/queue.h>
|
|
#include <sys/select.h>
|
|
#include <sys/workqueue.h>
|
|
#include <sys/xcall.h>
|
|
|
|
#include <dev/tprof/tprof.h>
|
|
#include <dev/tprof/tprof_ioctl.h>
|
|
|
|
#include "ioconf.h"
|
|
|
|
#ifndef TPROF_HZ
|
|
#define TPROF_HZ 10000
|
|
#endif
|
|
|
|
/*
|
|
* locking order:
|
|
* tprof_reader_lock -> tprof_lock
|
|
* tprof_startstop_lock -> tprof_lock
|
|
*/
|
|
|
|
/*
|
|
* protected by:
|
|
* L: tprof_lock
|
|
* R: tprof_reader_lock
|
|
* S: tprof_startstop_lock
|
|
* s: writer should hold tprof_startstop_lock and tprof_lock
|
|
* reader should hold tprof_startstop_lock or tprof_lock
|
|
*/
|
|
|
|
typedef struct tprof_buf {
|
|
u_int b_used;
|
|
u_int b_size;
|
|
u_int b_overflow;
|
|
u_int b_unused;
|
|
STAILQ_ENTRY(tprof_buf) b_list;
|
|
tprof_sample_t b_data[];
|
|
} tprof_buf_t;
|
|
#define TPROF_BUF_BYTESIZE(sz) \
|
|
(sizeof(tprof_buf_t) + (sz) * sizeof(tprof_sample_t))
|
|
#define TPROF_MAX_SAMPLES_PER_BUF TPROF_HZ
|
|
|
|
typedef struct {
|
|
tprof_buf_t *c_buf;
|
|
uint32_t c_cpuid;
|
|
struct work c_work;
|
|
callout_t c_callout;
|
|
} __aligned(CACHE_LINE_SIZE) tprof_cpu_t;
|
|
|
|
typedef struct tprof_backend {
|
|
/*
|
|
* tprof_backend_softc_t must be passed as an argument to the interrupt
|
|
* handler, but since this is difficult to implement in armv7/v8. Then,
|
|
* tprof_backend is exposed. Additionally, softc must be placed at the
|
|
* beginning of struct tprof_backend.
|
|
*/
|
|
tprof_backend_softc_t tb_softc;
|
|
|
|
const char *tb_name;
|
|
const tprof_backend_ops_t *tb_ops;
|
|
LIST_ENTRY(tprof_backend) tb_list;
|
|
} tprof_backend_t;
|
|
|
|
static kmutex_t tprof_lock;
|
|
static u_int tprof_nworker; /* L: # of running worker LWPs */
|
|
static lwp_t *tprof_owner;
|
|
static STAILQ_HEAD(, tprof_buf) tprof_list; /* L: global buffer list */
|
|
static u_int tprof_nbuf_on_list; /* L: # of buffers on tprof_list */
|
|
static struct workqueue *tprof_wq;
|
|
static struct percpu *tprof_cpus __read_mostly; /* tprof_cpu_t * */
|
|
static u_int tprof_samples_per_buf;
|
|
static u_int tprof_max_buf;
|
|
|
|
tprof_backend_t *tprof_backend; /* S: */
|
|
static LIST_HEAD(, tprof_backend) tprof_backends =
|
|
LIST_HEAD_INITIALIZER(tprof_backend); /* S: */
|
|
|
|
static kmutex_t tprof_reader_lock;
|
|
static kcondvar_t tprof_reader_cv; /* L: */
|
|
static off_t tprof_reader_offset; /* R: */
|
|
|
|
static kmutex_t tprof_startstop_lock;
|
|
static kcondvar_t tprof_cv; /* L: */
|
|
static struct selinfo tprof_selp; /* L: */
|
|
|
|
static struct tprof_stat tprof_stat; /* L: */
|
|
|
|
static tprof_cpu_t *
|
|
tprof_cpu_direct(struct cpu_info *ci)
|
|
{
|
|
tprof_cpu_t **cp;
|
|
|
|
cp = percpu_getptr_remote(tprof_cpus, ci);
|
|
return *cp;
|
|
}
|
|
|
|
static tprof_cpu_t *
|
|
tprof_cpu(struct cpu_info *ci)
|
|
{
|
|
tprof_cpu_t *c;
|
|
|
|
/*
|
|
* As long as xcalls are blocked -- e.g., by kpreempt_disable
|
|
* -- the percpu object will not be swapped and destroyed. We
|
|
* can't write to it, because the data may have already been
|
|
* moved to a new buffer, but we can safely read from it.
|
|
*/
|
|
kpreempt_disable();
|
|
c = tprof_cpu_direct(ci);
|
|
kpreempt_enable();
|
|
|
|
return c;
|
|
}
|
|
|
|
static tprof_cpu_t *
|
|
tprof_curcpu(void)
|
|
{
|
|
|
|
return tprof_cpu(curcpu());
|
|
}
|
|
|
|
static tprof_buf_t *
|
|
tprof_buf_alloc(void)
|
|
{
|
|
tprof_buf_t *new;
|
|
u_int size = tprof_samples_per_buf;
|
|
|
|
new = kmem_alloc(TPROF_BUF_BYTESIZE(size), KM_SLEEP);
|
|
new->b_used = 0;
|
|
new->b_size = size;
|
|
new->b_overflow = 0;
|
|
return new;
|
|
}
|
|
|
|
static void
|
|
tprof_buf_free(tprof_buf_t *buf)
|
|
{
|
|
|
|
kmem_free(buf, TPROF_BUF_BYTESIZE(buf->b_size));
|
|
}
|
|
|
|
static tprof_buf_t *
|
|
tprof_buf_switch(tprof_cpu_t *c, tprof_buf_t *new)
|
|
{
|
|
tprof_buf_t *old;
|
|
|
|
old = c->c_buf;
|
|
c->c_buf = new;
|
|
return old;
|
|
}
|
|
|
|
static tprof_buf_t *
|
|
tprof_buf_refresh(void)
|
|
{
|
|
tprof_cpu_t * const c = tprof_curcpu();
|
|
tprof_buf_t *new;
|
|
|
|
new = tprof_buf_alloc();
|
|
return tprof_buf_switch(c, new);
|
|
}
|
|
|
|
static void
|
|
tprof_worker(struct work *wk, void *dummy)
|
|
{
|
|
tprof_cpu_t * const c = tprof_curcpu();
|
|
tprof_buf_t *buf;
|
|
tprof_backend_t *tb;
|
|
bool shouldstop;
|
|
|
|
KASSERT(wk == &c->c_work);
|
|
KASSERT(dummy == NULL);
|
|
|
|
/*
|
|
* Get a per cpu buffer.
|
|
*/
|
|
buf = tprof_buf_refresh();
|
|
|
|
/*
|
|
* and put it on the global list for read(2).
|
|
*/
|
|
mutex_enter(&tprof_lock);
|
|
tb = tprof_backend;
|
|
shouldstop = (tb == NULL || tb->tb_softc.sc_ctr_running_mask == 0);
|
|
if (shouldstop) {
|
|
KASSERT(tprof_nworker > 0);
|
|
tprof_nworker--;
|
|
cv_broadcast(&tprof_cv);
|
|
cv_broadcast(&tprof_reader_cv);
|
|
}
|
|
if (buf->b_used == 0) {
|
|
tprof_stat.ts_emptybuf++;
|
|
} else if (tprof_nbuf_on_list < tprof_max_buf) {
|
|
tprof_stat.ts_sample += buf->b_used;
|
|
tprof_stat.ts_overflow += buf->b_overflow;
|
|
tprof_stat.ts_buf++;
|
|
STAILQ_INSERT_TAIL(&tprof_list, buf, b_list);
|
|
tprof_nbuf_on_list++;
|
|
buf = NULL;
|
|
selnotify(&tprof_selp, 0, NOTE_SUBMIT);
|
|
cv_broadcast(&tprof_reader_cv);
|
|
} else {
|
|
tprof_stat.ts_dropbuf_sample += buf->b_used;
|
|
tprof_stat.ts_dropbuf++;
|
|
}
|
|
mutex_exit(&tprof_lock);
|
|
if (buf)
|
|
tprof_buf_free(buf);
|
|
|
|
if (!shouldstop)
|
|
callout_schedule(&c->c_callout, hz / 8);
|
|
}
|
|
|
|
static void
|
|
tprof_kick(void *vp)
|
|
{
|
|
struct cpu_info * const ci = vp;
|
|
tprof_cpu_t * const c = tprof_cpu(ci);
|
|
|
|
workqueue_enqueue(tprof_wq, &c->c_work, ci);
|
|
}
|
|
|
|
static void
|
|
tprof_stop1(void)
|
|
{
|
|
CPU_INFO_ITERATOR cii;
|
|
struct cpu_info *ci;
|
|
|
|
KASSERT(mutex_owned(&tprof_startstop_lock));
|
|
KASSERT(tprof_nworker == 0);
|
|
|
|
for (CPU_INFO_FOREACH(cii, ci)) {
|
|
tprof_cpu_t * const c = tprof_cpu(ci);
|
|
tprof_buf_t *old;
|
|
|
|
old = tprof_buf_switch(c, NULL);
|
|
if (old != NULL)
|
|
tprof_buf_free(old);
|
|
|
|
callout_destroy(&c->c_callout);
|
|
}
|
|
workqueue_destroy(tprof_wq);
|
|
}
|
|
|
|
static void
|
|
tprof_getinfo(struct tprof_info *info)
|
|
{
|
|
tprof_backend_t *tb;
|
|
|
|
KASSERT(mutex_owned(&tprof_startstop_lock));
|
|
|
|
memset(info, 0, sizeof(*info));
|
|
info->ti_version = TPROF_VERSION;
|
|
if ((tb = tprof_backend) != NULL)
|
|
info->ti_ident = tb->tb_ops->tbo_ident();
|
|
}
|
|
|
|
static int
|
|
tprof_getncounters(u_int *ncounters)
|
|
{
|
|
tprof_backend_t *tb;
|
|
|
|
tb = tprof_backend;
|
|
if (tb == NULL)
|
|
return ENOENT;
|
|
|
|
*ncounters = tb->tb_ops->tbo_ncounters();
|
|
return 0;
|
|
}
|
|
|
|
static void
|
|
tprof_start_cpu(void *arg1, void *arg2)
|
|
{
|
|
tprof_backend_t *tb = arg1;
|
|
tprof_countermask_t runmask = (uintptr_t)arg2;
|
|
|
|
tb->tb_ops->tbo_start(runmask);
|
|
}
|
|
|
|
static void
|
|
tprof_stop_cpu(void *arg1, void *arg2)
|
|
{
|
|
tprof_backend_t *tb = arg1;
|
|
tprof_countermask_t stopmask = (uintptr_t)arg2;
|
|
|
|
tb->tb_ops->tbo_stop(stopmask);
|
|
}
|
|
|
|
static int
|
|
tprof_start(tprof_countermask_t runmask)
|
|
{
|
|
CPU_INFO_ITERATOR cii;
|
|
struct cpu_info *ci;
|
|
tprof_backend_t *tb;
|
|
uint64_t xc;
|
|
int error;
|
|
bool firstrun;
|
|
|
|
KASSERT(mutex_owned(&tprof_startstop_lock));
|
|
|
|
tb = tprof_backend;
|
|
if (tb == NULL) {
|
|
error = ENOENT;
|
|
goto done;
|
|
}
|
|
|
|
runmask &= ~tb->tb_softc.sc_ctr_running_mask;
|
|
runmask &= tb->tb_softc.sc_ctr_configured_mask;
|
|
if (runmask == 0) {
|
|
/*
|
|
* Targets are already running.
|
|
* Unconfigured counters are ignored.
|
|
*/
|
|
error = 0;
|
|
goto done;
|
|
}
|
|
|
|
firstrun = (tb->tb_softc.sc_ctr_running_mask == 0);
|
|
if (firstrun) {
|
|
if (tb->tb_ops->tbo_establish != NULL) {
|
|
error = tb->tb_ops->tbo_establish(&tb->tb_softc);
|
|
if (error != 0)
|
|
goto done;
|
|
}
|
|
|
|
tprof_samples_per_buf = TPROF_MAX_SAMPLES_PER_BUF;
|
|
tprof_max_buf = ncpu * 3;
|
|
error = workqueue_create(&tprof_wq, "tprofmv", tprof_worker,
|
|
NULL, PRI_NONE, IPL_SOFTCLOCK, WQ_MPSAFE | WQ_PERCPU);
|
|
if (error != 0) {
|
|
if (tb->tb_ops->tbo_disestablish != NULL)
|
|
tb->tb_ops->tbo_disestablish(&tb->tb_softc);
|
|
goto done;
|
|
}
|
|
|
|
for (CPU_INFO_FOREACH(cii, ci)) {
|
|
tprof_cpu_t * const c = tprof_cpu(ci);
|
|
tprof_buf_t *new;
|
|
tprof_buf_t *old;
|
|
|
|
new = tprof_buf_alloc();
|
|
old = tprof_buf_switch(c, new);
|
|
if (old != NULL) {
|
|
tprof_buf_free(old);
|
|
}
|
|
callout_init(&c->c_callout, CALLOUT_MPSAFE);
|
|
callout_setfunc(&c->c_callout, tprof_kick, ci);
|
|
}
|
|
}
|
|
|
|
runmask &= tb->tb_softc.sc_ctr_configured_mask;
|
|
xc = xc_broadcast(0, tprof_start_cpu, tb, (void *)(uintptr_t)runmask);
|
|
xc_wait(xc);
|
|
mutex_enter(&tprof_lock);
|
|
tb->tb_softc.sc_ctr_running_mask |= runmask;
|
|
mutex_exit(&tprof_lock);
|
|
|
|
if (firstrun) {
|
|
for (CPU_INFO_FOREACH(cii, ci)) {
|
|
tprof_cpu_t * const c = tprof_cpu(ci);
|
|
|
|
mutex_enter(&tprof_lock);
|
|
tprof_nworker++;
|
|
mutex_exit(&tprof_lock);
|
|
workqueue_enqueue(tprof_wq, &c->c_work, ci);
|
|
}
|
|
}
|
|
error = 0;
|
|
|
|
done:
|
|
return error;
|
|
}
|
|
|
|
static void
|
|
tprof_stop(tprof_countermask_t stopmask)
|
|
{
|
|
tprof_backend_t *tb;
|
|
uint64_t xc;
|
|
|
|
tb = tprof_backend;
|
|
if (tb == NULL)
|
|
return;
|
|
|
|
KASSERT(mutex_owned(&tprof_startstop_lock));
|
|
stopmask &= tb->tb_softc.sc_ctr_running_mask;
|
|
if (stopmask == 0) {
|
|
/* Targets are not running */
|
|
goto done;
|
|
}
|
|
|
|
xc = xc_broadcast(0, tprof_stop_cpu, tb, (void *)(uintptr_t)stopmask);
|
|
xc_wait(xc);
|
|
mutex_enter(&tprof_lock);
|
|
tb->tb_softc.sc_ctr_running_mask &= ~stopmask;
|
|
mutex_exit(&tprof_lock);
|
|
|
|
/* All counters have stopped? */
|
|
if (tb->tb_softc.sc_ctr_running_mask == 0) {
|
|
mutex_enter(&tprof_lock);
|
|
cv_broadcast(&tprof_reader_cv);
|
|
while (tprof_nworker > 0)
|
|
cv_wait(&tprof_cv, &tprof_lock);
|
|
|
|
mutex_exit(&tprof_lock);
|
|
|
|
tprof_stop1();
|
|
if (tb->tb_ops->tbo_disestablish != NULL)
|
|
tb->tb_ops->tbo_disestablish(&tb->tb_softc);
|
|
}
|
|
done:
|
|
;
|
|
}
|
|
|
|
static void
|
|
tprof_init_percpu_counters_offset(void *vp, void *vp2, struct cpu_info *ci)
|
|
{
|
|
uint64_t *counters_offset = vp;
|
|
u_int counter = (uintptr_t)vp2;
|
|
|
|
tprof_backend_t *tb = tprof_backend;
|
|
tprof_param_t *param = &tb->tb_softc.sc_count[counter].ctr_param;
|
|
counters_offset[counter] = param->p_value;
|
|
}
|
|
|
|
static void
|
|
tprof_configure_event_cpu(void *arg1, void *arg2)
|
|
{
|
|
tprof_backend_t *tb = arg1;
|
|
u_int counter = (uintptr_t)arg2;
|
|
tprof_param_t *param = &tb->tb_softc.sc_count[counter].ctr_param;
|
|
|
|
tb->tb_ops->tbo_configure_event(counter, param);
|
|
}
|
|
|
|
static int
|
|
tprof_configure_event(const tprof_param_t *param)
|
|
{
|
|
tprof_backend_t *tb;
|
|
tprof_backend_softc_t *sc;
|
|
tprof_param_t *sc_param;
|
|
uint64_t xc;
|
|
int c, error;
|
|
|
|
if ((param->p_flags & (TPROF_PARAM_USER | TPROF_PARAM_KERN)) == 0) {
|
|
error = EINVAL;
|
|
goto done;
|
|
}
|
|
|
|
tb = tprof_backend;
|
|
if (tb == NULL) {
|
|
error = ENOENT;
|
|
goto done;
|
|
}
|
|
sc = &tb->tb_softc;
|
|
|
|
c = param->p_counter;
|
|
if (c >= tb->tb_softc.sc_ncounters) {
|
|
error = EINVAL;
|
|
goto done;
|
|
}
|
|
|
|
if (tb->tb_ops->tbo_valid_event != NULL) {
|
|
error = tb->tb_ops->tbo_valid_event(param->p_counter, param);
|
|
if (error != 0)
|
|
goto done;
|
|
}
|
|
|
|
/* if already running, stop the counter */
|
|
if (ISSET(c, tb->tb_softc.sc_ctr_running_mask))
|
|
tprof_stop(__BIT(c));
|
|
|
|
sc->sc_count[c].ctr_bitwidth =
|
|
tb->tb_ops->tbo_counter_bitwidth(param->p_counter);
|
|
|
|
sc_param = &sc->sc_count[c].ctr_param;
|
|
memcpy(sc_param, param, sizeof(*sc_param)); /* save copy of param */
|
|
|
|
if (ISSET(param->p_flags, TPROF_PARAM_PROFILE)) {
|
|
uint64_t freq, inum, dnum;
|
|
|
|
freq = tb->tb_ops->tbo_counter_estimate_freq(c);
|
|
sc->sc_count[c].ctr_counter_val = freq / TPROF_HZ;
|
|
if (sc->sc_count[c].ctr_counter_val == 0) {
|
|
printf("%s: counter#%d frequency (%"PRIu64") is"
|
|
" very low relative to TPROF_HZ (%u)\n", __func__,
|
|
c, freq, TPROF_HZ);
|
|
sc->sc_count[c].ctr_counter_val =
|
|
4000000000ULL / TPROF_HZ;
|
|
}
|
|
|
|
switch (param->p_flags & TPROF_PARAM_VALUE2_MASK) {
|
|
case TPROF_PARAM_VALUE2_SCALE:
|
|
if (sc_param->p_value2 == 0)
|
|
break;
|
|
/*
|
|
* p_value2 is 64-bit fixed-point
|
|
* upper 32 bits are the integer part
|
|
* lower 32 bits are the decimal part
|
|
*/
|
|
inum = sc_param->p_value2 >> 32;
|
|
dnum = sc_param->p_value2 & __BITS(31, 0);
|
|
sc->sc_count[c].ctr_counter_val =
|
|
sc->sc_count[c].ctr_counter_val * inum +
|
|
(sc->sc_count[c].ctr_counter_val * dnum >> 32);
|
|
if (sc->sc_count[c].ctr_counter_val == 0)
|
|
sc->sc_count[c].ctr_counter_val = 1;
|
|
break;
|
|
case TPROF_PARAM_VALUE2_TRIGGERCOUNT:
|
|
if (sc_param->p_value2 == 0)
|
|
sc_param->p_value2 = 1;
|
|
if (sc_param->p_value2 >
|
|
__BITS(sc->sc_count[c].ctr_bitwidth - 1, 0)) {
|
|
sc_param->p_value2 =
|
|
__BITS(sc->sc_count[c].ctr_bitwidth - 1, 0);
|
|
}
|
|
sc->sc_count[c].ctr_counter_val = sc_param->p_value2;
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
sc->sc_count[c].ctr_counter_reset_val =
|
|
-sc->sc_count[c].ctr_counter_val;
|
|
sc->sc_count[c].ctr_counter_reset_val &=
|
|
__BITS(sc->sc_count[c].ctr_bitwidth - 1, 0);
|
|
} else {
|
|
sc->sc_count[c].ctr_counter_val = 0;
|
|
sc->sc_count[c].ctr_counter_reset_val = 0;
|
|
}
|
|
|
|
/* At this point, p_value is used as an initial value */
|
|
percpu_foreach(tb->tb_softc.sc_ctr_offset_percpu,
|
|
tprof_init_percpu_counters_offset, (void *)(uintptr_t)c);
|
|
/* On the backend side, p_value is used as the reset value */
|
|
sc_param->p_value = tb->tb_softc.sc_count[c].ctr_counter_reset_val;
|
|
|
|
xc = xc_broadcast(0, tprof_configure_event_cpu,
|
|
tb, (void *)(uintptr_t)c);
|
|
xc_wait(xc);
|
|
|
|
mutex_enter(&tprof_lock);
|
|
/* update counters bitmasks */
|
|
SET(tb->tb_softc.sc_ctr_configured_mask, __BIT(c));
|
|
CLR(tb->tb_softc.sc_ctr_prof_mask, __BIT(c));
|
|
CLR(tb->tb_softc.sc_ctr_ovf_mask, __BIT(c));
|
|
/* profiled counter requires overflow handling */
|
|
if (ISSET(param->p_flags, TPROF_PARAM_PROFILE)) {
|
|
SET(tb->tb_softc.sc_ctr_prof_mask, __BIT(c));
|
|
SET(tb->tb_softc.sc_ctr_ovf_mask, __BIT(c));
|
|
}
|
|
/* counters with less than 64bits also require overflow handling */
|
|
if (sc->sc_count[c].ctr_bitwidth != 64)
|
|
SET(tb->tb_softc.sc_ctr_ovf_mask, __BIT(c));
|
|
mutex_exit(&tprof_lock);
|
|
|
|
error = 0;
|
|
|
|
done:
|
|
return error;
|
|
}
|
|
|
|
static void
|
|
tprof_getcounts_cpu(void *arg1, void *arg2)
|
|
{
|
|
tprof_backend_t *tb = arg1;
|
|
tprof_backend_softc_t *sc = &tb->tb_softc;
|
|
uint64_t *counters = arg2;
|
|
uint64_t *counters_offset;
|
|
unsigned int c;
|
|
|
|
tprof_countermask_t configmask = sc->sc_ctr_configured_mask;
|
|
counters_offset = percpu_getref(sc->sc_ctr_offset_percpu);
|
|
for (c = 0; c < sc->sc_ncounters; c++) {
|
|
if (ISSET(configmask, __BIT(c))) {
|
|
uint64_t ctr = tb->tb_ops->tbo_counter_read(c);
|
|
counters[c] = counters_offset[c] +
|
|
((ctr - sc->sc_count[c].ctr_counter_reset_val) &
|
|
__BITS(sc->sc_count[c].ctr_bitwidth - 1, 0));
|
|
} else
|
|
counters[c] = 0;
|
|
}
|
|
percpu_putref(sc->sc_ctr_offset_percpu);
|
|
}
|
|
|
|
static int
|
|
tprof_getcounts(tprof_counts_t *counts)
|
|
{
|
|
struct cpu_info *ci;
|
|
tprof_backend_t *tb;
|
|
uint64_t xc;
|
|
|
|
tb = tprof_backend;
|
|
if (tb == NULL)
|
|
return ENOENT;
|
|
|
|
if (counts->c_cpu >= ncpu)
|
|
return ESRCH;
|
|
ci = cpu_lookup(counts->c_cpu);
|
|
if (ci == NULL)
|
|
return ESRCH;
|
|
|
|
xc = xc_unicast(0, tprof_getcounts_cpu, tb, counts->c_count, ci);
|
|
xc_wait(xc);
|
|
|
|
counts->c_ncounters = tb->tb_softc.sc_ncounters;
|
|
counts->c_runningmask = tb->tb_softc.sc_ctr_running_mask;
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* tprof_clear: drain unread samples.
|
|
*/
|
|
|
|
static void
|
|
tprof_clear(void)
|
|
{
|
|
tprof_buf_t *buf;
|
|
|
|
mutex_enter(&tprof_reader_lock);
|
|
mutex_enter(&tprof_lock);
|
|
while ((buf = STAILQ_FIRST(&tprof_list)) != NULL) {
|
|
if (buf != NULL) {
|
|
STAILQ_REMOVE_HEAD(&tprof_list, b_list);
|
|
KASSERT(tprof_nbuf_on_list > 0);
|
|
tprof_nbuf_on_list--;
|
|
mutex_exit(&tprof_lock);
|
|
tprof_buf_free(buf);
|
|
mutex_enter(&tprof_lock);
|
|
}
|
|
}
|
|
KASSERT(tprof_nbuf_on_list == 0);
|
|
mutex_exit(&tprof_lock);
|
|
tprof_reader_offset = 0;
|
|
mutex_exit(&tprof_reader_lock);
|
|
|
|
memset(&tprof_stat, 0, sizeof(tprof_stat));
|
|
}
|
|
|
|
static tprof_backend_t *
|
|
tprof_backend_lookup(const char *name)
|
|
{
|
|
tprof_backend_t *tb;
|
|
|
|
KASSERT(mutex_owned(&tprof_startstop_lock));
|
|
|
|
LIST_FOREACH(tb, &tprof_backends, tb_list) {
|
|
if (!strcmp(tb->tb_name, name)) {
|
|
return tb;
|
|
}
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
/* -------------------- backend interfaces */
|
|
|
|
/*
|
|
* tprof_sample: record a sample on the per-cpu buffer.
|
|
*
|
|
* be careful; can be called in NMI context.
|
|
* we are bluntly assuming the followings are safe.
|
|
* curcpu()
|
|
* curlwp->l_lid
|
|
* curlwp->l_proc->p_pid
|
|
*/
|
|
|
|
void
|
|
tprof_sample(void *unused, const tprof_frame_info_t *tfi)
|
|
{
|
|
tprof_cpu_t * const c = tprof_cpu_direct(curcpu());
|
|
tprof_buf_t * const buf = c->c_buf;
|
|
tprof_sample_t *sp;
|
|
const uintptr_t pc = tfi->tfi_pc;
|
|
const lwp_t * const l = curlwp;
|
|
u_int idx;
|
|
|
|
idx = buf->b_used;
|
|
if (__predict_false(idx >= buf->b_size)) {
|
|
buf->b_overflow++;
|
|
return;
|
|
}
|
|
sp = &buf->b_data[idx];
|
|
sp->s_pid = l->l_proc->p_pid;
|
|
sp->s_lwpid = l->l_lid;
|
|
sp->s_cpuid = c->c_cpuid;
|
|
sp->s_flags = ((tfi->tfi_inkernel) ? TPROF_SAMPLE_INKERNEL : 0) |
|
|
__SHIFTIN(tfi->tfi_counter, TPROF_SAMPLE_COUNTER_MASK);
|
|
sp->s_pc = pc;
|
|
buf->b_used = idx + 1;
|
|
}
|
|
|
|
/*
|
|
* tprof_backend_register:
|
|
*/
|
|
|
|
int
|
|
tprof_backend_register(const char *name, const tprof_backend_ops_t *ops,
|
|
int vers)
|
|
{
|
|
tprof_backend_t *tb;
|
|
|
|
if (vers != TPROF_BACKEND_VERSION)
|
|
return EINVAL;
|
|
|
|
mutex_enter(&tprof_startstop_lock);
|
|
tb = tprof_backend_lookup(name);
|
|
if (tb != NULL) {
|
|
mutex_exit(&tprof_startstop_lock);
|
|
return EEXIST;
|
|
}
|
|
#if 1 /* XXX for now */
|
|
if (!LIST_EMPTY(&tprof_backends)) {
|
|
mutex_exit(&tprof_startstop_lock);
|
|
return ENOTSUP;
|
|
}
|
|
#endif
|
|
tb = kmem_zalloc(sizeof(*tb), KM_SLEEP);
|
|
tb->tb_name = name;
|
|
tb->tb_ops = ops;
|
|
LIST_INSERT_HEAD(&tprof_backends, tb, tb_list);
|
|
#if 1 /* XXX for now */
|
|
if (tprof_backend == NULL) {
|
|
tprof_backend = tb;
|
|
}
|
|
#endif
|
|
mutex_exit(&tprof_startstop_lock);
|
|
|
|
/* Init backend softc */
|
|
tb->tb_softc.sc_ncounters = tb->tb_ops->tbo_ncounters();
|
|
tb->tb_softc.sc_ctr_offset_percpu_size =
|
|
sizeof(uint64_t) * tb->tb_softc.sc_ncounters;
|
|
tb->tb_softc.sc_ctr_offset_percpu =
|
|
percpu_alloc(tb->tb_softc.sc_ctr_offset_percpu_size);
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* tprof_backend_unregister:
|
|
*/
|
|
|
|
int
|
|
tprof_backend_unregister(const char *name)
|
|
{
|
|
tprof_backend_t *tb;
|
|
|
|
mutex_enter(&tprof_startstop_lock);
|
|
tb = tprof_backend_lookup(name);
|
|
#if defined(DIAGNOSTIC)
|
|
if (tb == NULL) {
|
|
mutex_exit(&tprof_startstop_lock);
|
|
panic("%s: not found '%s'", __func__, name);
|
|
}
|
|
#endif /* defined(DIAGNOSTIC) */
|
|
if (tb->tb_softc.sc_ctr_running_mask != 0) {
|
|
mutex_exit(&tprof_startstop_lock);
|
|
return EBUSY;
|
|
}
|
|
#if 1 /* XXX for now */
|
|
if (tprof_backend == tb)
|
|
tprof_backend = NULL;
|
|
#endif
|
|
LIST_REMOVE(tb, tb_list);
|
|
mutex_exit(&tprof_startstop_lock);
|
|
|
|
/* fini backend softc */
|
|
percpu_free(tb->tb_softc.sc_ctr_offset_percpu,
|
|
tb->tb_softc.sc_ctr_offset_percpu_size);
|
|
|
|
/* Free backend */
|
|
kmem_free(tb, sizeof(*tb));
|
|
|
|
return 0;
|
|
}
|
|
|
|
/* -------------------- cdevsw interfaces */
|
|
|
|
static int
|
|
tprof_open(dev_t dev, int flags, int type, struct lwp *l)
|
|
{
|
|
|
|
if (minor(dev) != 0)
|
|
return EXDEV;
|
|
|
|
mutex_enter(&tprof_lock);
|
|
if (tprof_owner != NULL) {
|
|
mutex_exit(&tprof_lock);
|
|
return EBUSY;
|
|
}
|
|
tprof_owner = curlwp;
|
|
mutex_exit(&tprof_lock);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
tprof_close(dev_t dev, int flags, int type, struct lwp *l)
|
|
{
|
|
|
|
KASSERT(minor(dev) == 0);
|
|
|
|
mutex_enter(&tprof_startstop_lock);
|
|
mutex_enter(&tprof_lock);
|
|
tprof_owner = NULL;
|
|
mutex_exit(&tprof_lock);
|
|
tprof_stop(TPROF_COUNTERMASK_ALL);
|
|
tprof_clear();
|
|
|
|
tprof_backend_t *tb = tprof_backend;
|
|
if (tb != NULL) {
|
|
KASSERT(tb->tb_softc.sc_ctr_running_mask == 0);
|
|
tb->tb_softc.sc_ctr_configured_mask = 0;
|
|
tb->tb_softc.sc_ctr_prof_mask = 0;
|
|
tb->tb_softc.sc_ctr_ovf_mask = 0;
|
|
}
|
|
|
|
mutex_exit(&tprof_startstop_lock);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
tprof_poll(dev_t dev, int events, struct lwp *l)
|
|
{
|
|
int revents;
|
|
|
|
revents = events & (POLLIN | POLLRDNORM);
|
|
if (revents == 0)
|
|
return 0;
|
|
|
|
mutex_enter(&tprof_lock);
|
|
if (STAILQ_EMPTY(&tprof_list)) {
|
|
revents = 0;
|
|
selrecord(l, &tprof_selp);
|
|
}
|
|
mutex_exit(&tprof_lock);
|
|
|
|
return revents;
|
|
}
|
|
|
|
static void
|
|
filt_tprof_read_detach(struct knote *kn)
|
|
{
|
|
mutex_enter(&tprof_lock);
|
|
selremove_knote(&tprof_selp, kn);
|
|
mutex_exit(&tprof_lock);
|
|
}
|
|
|
|
static int
|
|
filt_tprof_read_event(struct knote *kn, long hint)
|
|
{
|
|
int rv = 0;
|
|
|
|
if ((hint & NOTE_SUBMIT) == 0)
|
|
mutex_enter(&tprof_lock);
|
|
|
|
if (!STAILQ_EMPTY(&tprof_list)) {
|
|
tprof_buf_t *buf;
|
|
int64_t n = 0;
|
|
|
|
STAILQ_FOREACH(buf, &tprof_list, b_list) {
|
|
n += buf->b_used;
|
|
}
|
|
kn->kn_data = n * sizeof(tprof_sample_t);
|
|
|
|
rv = 1;
|
|
}
|
|
|
|
if ((hint & NOTE_SUBMIT) == 0)
|
|
mutex_exit(&tprof_lock);
|
|
|
|
return rv;
|
|
}
|
|
|
|
static const struct filterops tprof_read_filtops = {
|
|
.f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE,
|
|
.f_attach = NULL,
|
|
.f_detach = filt_tprof_read_detach,
|
|
.f_event = filt_tprof_read_event,
|
|
};
|
|
|
|
static int
|
|
tprof_kqfilter(dev_t dev, struct knote *kn)
|
|
{
|
|
switch (kn->kn_filter) {
|
|
case EVFILT_READ:
|
|
kn->kn_fop = &tprof_read_filtops;
|
|
mutex_enter(&tprof_lock);
|
|
selrecord_knote(&tprof_selp, kn);
|
|
mutex_exit(&tprof_lock);
|
|
break;
|
|
default:
|
|
return EINVAL;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
tprof_read(dev_t dev, struct uio *uio, int flags)
|
|
{
|
|
tprof_buf_t *buf;
|
|
size_t bytes;
|
|
size_t resid;
|
|
size_t done = 0;
|
|
int error = 0;
|
|
|
|
KASSERT(minor(dev) == 0);
|
|
mutex_enter(&tprof_reader_lock);
|
|
while (uio->uio_resid > 0 && error == 0) {
|
|
/*
|
|
* Take the first buffer from the list.
|
|
*/
|
|
mutex_enter(&tprof_lock);
|
|
buf = STAILQ_FIRST(&tprof_list);
|
|
if (buf == NULL) {
|
|
if (tprof_nworker == 0 || done != 0) {
|
|
mutex_exit(&tprof_lock);
|
|
error = 0;
|
|
break;
|
|
}
|
|
mutex_exit(&tprof_reader_lock);
|
|
error = cv_wait_sig(&tprof_reader_cv, &tprof_lock);
|
|
mutex_exit(&tprof_lock);
|
|
mutex_enter(&tprof_reader_lock);
|
|
continue;
|
|
}
|
|
STAILQ_REMOVE_HEAD(&tprof_list, b_list);
|
|
KASSERT(tprof_nbuf_on_list > 0);
|
|
tprof_nbuf_on_list--;
|
|
mutex_exit(&tprof_lock);
|
|
|
|
/*
|
|
* Copy it out.
|
|
*/
|
|
bytes = MIN(buf->b_used * sizeof(tprof_sample_t) -
|
|
tprof_reader_offset, uio->uio_resid);
|
|
resid = uio->uio_resid;
|
|
error = uiomove((char *)buf->b_data + tprof_reader_offset,
|
|
bytes, uio);
|
|
done = resid - uio->uio_resid;
|
|
tprof_reader_offset += done;
|
|
|
|
/*
|
|
* If we didn't consume the whole buffer,
|
|
* put it back to the list.
|
|
*/
|
|
if (tprof_reader_offset <
|
|
buf->b_used * sizeof(tprof_sample_t)) {
|
|
mutex_enter(&tprof_lock);
|
|
STAILQ_INSERT_HEAD(&tprof_list, buf, b_list);
|
|
tprof_nbuf_on_list++;
|
|
cv_broadcast(&tprof_reader_cv);
|
|
mutex_exit(&tprof_lock);
|
|
} else {
|
|
tprof_buf_free(buf);
|
|
tprof_reader_offset = 0;
|
|
}
|
|
}
|
|
mutex_exit(&tprof_reader_lock);
|
|
|
|
return error;
|
|
}
|
|
|
|
static int
|
|
tprof_ioctl(dev_t dev, u_long cmd, void *data, int flags, struct lwp *l)
|
|
{
|
|
const tprof_param_t *param;
|
|
tprof_counts_t *counts;
|
|
int error = 0;
|
|
|
|
KASSERT(minor(dev) == 0);
|
|
|
|
switch (cmd) {
|
|
case TPROF_IOC_GETINFO:
|
|
mutex_enter(&tprof_startstop_lock);
|
|
tprof_getinfo(data);
|
|
mutex_exit(&tprof_startstop_lock);
|
|
break;
|
|
case TPROF_IOC_GETNCOUNTERS:
|
|
mutex_enter(&tprof_lock);
|
|
error = tprof_getncounters((u_int *)data);
|
|
mutex_exit(&tprof_lock);
|
|
break;
|
|
case TPROF_IOC_START:
|
|
mutex_enter(&tprof_startstop_lock);
|
|
error = tprof_start(*(tprof_countermask_t *)data);
|
|
mutex_exit(&tprof_startstop_lock);
|
|
break;
|
|
case TPROF_IOC_STOP:
|
|
mutex_enter(&tprof_startstop_lock);
|
|
tprof_stop(*(tprof_countermask_t *)data);
|
|
mutex_exit(&tprof_startstop_lock);
|
|
break;
|
|
case TPROF_IOC_GETSTAT:
|
|
mutex_enter(&tprof_lock);
|
|
memcpy(data, &tprof_stat, sizeof(tprof_stat));
|
|
mutex_exit(&tprof_lock);
|
|
break;
|
|
case TPROF_IOC_CONFIGURE_EVENT:
|
|
param = data;
|
|
mutex_enter(&tprof_startstop_lock);
|
|
error = tprof_configure_event(param);
|
|
mutex_exit(&tprof_startstop_lock);
|
|
break;
|
|
case TPROF_IOC_GETCOUNTS:
|
|
counts = data;
|
|
mutex_enter(&tprof_startstop_lock);
|
|
error = tprof_getcounts(counts);
|
|
mutex_exit(&tprof_startstop_lock);
|
|
break;
|
|
default:
|
|
error = EINVAL;
|
|
break;
|
|
}
|
|
|
|
return error;
|
|
}
|
|
|
|
const struct cdevsw tprof_cdevsw = {
|
|
.d_open = tprof_open,
|
|
.d_close = tprof_close,
|
|
.d_read = tprof_read,
|
|
.d_write = nowrite,
|
|
.d_ioctl = tprof_ioctl,
|
|
.d_stop = nostop,
|
|
.d_tty = notty,
|
|
.d_poll = tprof_poll,
|
|
.d_mmap = nommap,
|
|
.d_kqfilter = tprof_kqfilter,
|
|
.d_discard = nodiscard,
|
|
.d_flag = D_OTHER | D_MPSAFE
|
|
};
|
|
|
|
void
|
|
tprofattach(int nunits)
|
|
{
|
|
|
|
/* Nothing */
|
|
}
|
|
|
|
MODULE(MODULE_CLASS_DRIVER, tprof, NULL);
|
|
|
|
static void
|
|
tprof_cpu_init(void *vcp, void *vcookie, struct cpu_info *ci)
|
|
{
|
|
tprof_cpu_t **cp = vcp, *c;
|
|
|
|
c = kmem_zalloc(sizeof(*c), KM_SLEEP);
|
|
c->c_buf = NULL;
|
|
c->c_cpuid = cpu_index(ci);
|
|
*cp = c;
|
|
}
|
|
|
|
static void
|
|
tprof_cpu_fini(void *vcp, void *vcookie, struct cpu_info *ci)
|
|
{
|
|
tprof_cpu_t **cp = vcp, *c;
|
|
|
|
c = *cp;
|
|
KASSERT(c->c_cpuid == cpu_index(ci));
|
|
KASSERT(c->c_buf == NULL);
|
|
kmem_free(c, sizeof(*c));
|
|
*cp = NULL;
|
|
}
|
|
|
|
static void
|
|
tprof_driver_init(void)
|
|
{
|
|
|
|
mutex_init(&tprof_lock, MUTEX_DEFAULT, IPL_NONE);
|
|
mutex_init(&tprof_reader_lock, MUTEX_DEFAULT, IPL_NONE);
|
|
mutex_init(&tprof_startstop_lock, MUTEX_DEFAULT, IPL_NONE);
|
|
selinit(&tprof_selp);
|
|
cv_init(&tprof_cv, "tprof");
|
|
cv_init(&tprof_reader_cv, "tprof_rd");
|
|
STAILQ_INIT(&tprof_list);
|
|
tprof_cpus = percpu_create(sizeof(tprof_cpu_t *),
|
|
tprof_cpu_init, tprof_cpu_fini, NULL);
|
|
}
|
|
|
|
static void
|
|
tprof_driver_fini(void)
|
|
{
|
|
|
|
percpu_free(tprof_cpus, sizeof(tprof_cpu_t *));
|
|
mutex_destroy(&tprof_lock);
|
|
mutex_destroy(&tprof_reader_lock);
|
|
mutex_destroy(&tprof_startstop_lock);
|
|
seldestroy(&tprof_selp);
|
|
cv_destroy(&tprof_cv);
|
|
cv_destroy(&tprof_reader_cv);
|
|
}
|
|
|
|
static int
|
|
tprof_modcmd(modcmd_t cmd, void *arg)
|
|
{
|
|
|
|
switch (cmd) {
|
|
case MODULE_CMD_INIT:
|
|
tprof_driver_init();
|
|
#if defined(_MODULE)
|
|
{
|
|
devmajor_t bmajor = NODEVMAJOR;
|
|
devmajor_t cmajor = NODEVMAJOR;
|
|
int error;
|
|
|
|
error = devsw_attach("tprof", NULL, &bmajor,
|
|
&tprof_cdevsw, &cmajor);
|
|
if (error) {
|
|
tprof_driver_fini();
|
|
return error;
|
|
}
|
|
}
|
|
#endif /* defined(_MODULE) */
|
|
return 0;
|
|
|
|
case MODULE_CMD_FINI:
|
|
#if defined(_MODULE)
|
|
devsw_detach(NULL, &tprof_cdevsw);
|
|
#endif /* defined(_MODULE) */
|
|
tprof_driver_fini();
|
|
return 0;
|
|
|
|
default:
|
|
return ENOTTY;
|
|
}
|
|
}
|