From 48a1e4c2c17f5ceecfbed8e1630d5b16e96c48b5 Mon Sep 17 00:00:00 2001 From: yamt Date: Tue, 1 Jan 2008 21:28:37 +0000 Subject: [PATCH] a simple performance monitor based profiler, inspired from linux oprofile. --- sys/arch/amd64/amd64/trap.c | 21 +- sys/arch/i386/i386/trap.c | 21 +- sys/arch/x86/include/tprof.h | 35 +++ sys/arch/x86/x86/tprof_pmi.c | 228 +++++++++++++++ sys/conf/files | 7 +- sys/conf/majors | 3 +- sys/dev/tprof/files.tprof | 4 + sys/dev/tprof/tprof.c | 529 +++++++++++++++++++++++++++++++++++ sys/dev/tprof/tprof.h | 39 +++ sys/dev/tprof/tprof_ioctl.h | 55 ++++ 10 files changed, 934 insertions(+), 8 deletions(-) create mode 100644 sys/arch/x86/include/tprof.h create mode 100644 sys/arch/x86/x86/tprof_pmi.c create mode 100644 sys/dev/tprof/files.tprof create mode 100644 sys/dev/tprof/tprof.c create mode 100644 sys/dev/tprof/tprof.h create mode 100644 sys/dev/tprof/tprof_ioctl.h diff --git a/sys/arch/amd64/amd64/trap.c b/sys/arch/amd64/amd64/trap.c index cbd84aeebe66..a93f04def038 100644 --- a/sys/arch/amd64/amd64/trap.c +++ b/sys/arch/amd64/amd64/trap.c @@ -1,4 +1,4 @@ -/* $NetBSD: trap.c,v 1.41 2008/01/01 13:40:21 yamt Exp $ */ +/* $NetBSD: trap.c,v 1.42 2008/01/01 21:28:40 yamt Exp $ */ /*- * Copyright (c) 1998, 2000 The NetBSD Foundation, Inc. @@ -75,7 +75,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: trap.c,v 1.41 2008/01/01 13:40:21 yamt Exp $"); +__KERNEL_RCSID(0, "$NetBSD: trap.c,v 1.42 2008/01/01 21:28:40 yamt Exp $"); #include "opt_ddb.h" #include "opt_kgdb.h" @@ -83,6 +83,12 @@ __KERNEL_RCSID(0, "$NetBSD: trap.c,v 1.41 2008/01/01 13:40:21 yamt Exp $"); #include "opt_multiprocessor.h" #include "opt_compat_netbsd.h" #include "opt_compat_ibcs2.h" +#include "opt_xen.h" +#if !defined(XEN) +#include "tprof.h" +#else /* !defined(XEN) */ +#define NTPROF 0 +#endif /* !defined(XEN) */ #include #include @@ -100,6 +106,10 @@ __KERNEL_RCSID(0, "$NetBSD: trap.c,v 1.41 2008/01/01 13:40:21 yamt Exp $"); #include +#if NTPROF > 0 +#include +#endif /* NTPROF > 0 */ + #include #include #include @@ -576,8 +586,12 @@ faultcommon: } break; -#if NISA > 0 case T_NMI: +#if NTPROF > 0 + if (tprof_pmi_nmi(frame)) + return; +#endif /* NTPROF > 0 */ +#if NISA > 0 #if defined(KGDB) || defined(DDB) /* NMI can be hooked up to a pushbutton for debugging */ printf ("NMI ... going to debugger\n"); @@ -598,6 +612,7 @@ faultcommon: else return; #endif /* NISA > 0 */ + ; /* avoid a label at end of compound statement */ } if ((type & T_USER) == 0) diff --git a/sys/arch/i386/i386/trap.c b/sys/arch/i386/i386/trap.c index 25eb4f0c3b22..f45070300c22 100644 --- a/sys/arch/i386/i386/trap.c +++ b/sys/arch/i386/i386/trap.c @@ -1,4 +1,4 @@ -/* $NetBSD: trap.c,v 1.229 2008/01/01 13:40:20 yamt Exp $ */ +/* $NetBSD: trap.c,v 1.230 2008/01/01 21:28:40 yamt Exp $ */ /*- * Copyright (c) 1998, 2000, 2005 The NetBSD Foundation, Inc. @@ -75,7 +75,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: trap.c,v 1.229 2008/01/01 13:40:20 yamt Exp $"); +__KERNEL_RCSID(0, "$NetBSD: trap.c,v 1.230 2008/01/01 21:28:40 yamt Exp $"); #include "opt_ddb.h" #include "opt_kgdb.h" @@ -85,6 +85,12 @@ __KERNEL_RCSID(0, "$NetBSD: trap.c,v 1.229 2008/01/01 13:40:20 yamt Exp $"); #include "opt_vm86.h" #include "opt_kvm86.h" #include "opt_kstack_dr0.h" +#include "opt_xen.h" +#if !defined(XEN) +#include "tprof.h" +#else /* defined(XEN) */ +#define NTPROF 0 +#endif /* defined(XEN) */ #include #include @@ -101,6 +107,10 @@ __KERNEL_RCSID(0, "$NetBSD: trap.c,v 1.229 2008/01/01 13:40:20 yamt Exp $"); #include #include +#if NTPROF > 0 +#include +#endif /* NTPROF > 0 */ + #include #include #include @@ -757,8 +767,12 @@ copyfault: } break; -#if !defined(XEN) && (NISA > 0 || NMCA > 0) case T_NMI: +#if NTPROF > 0 + if (tprof_pmi_nmi(frame)) + return; +#endif /* NTPROF > 0 */ +#if !defined(XEN) && (NISA > 0 || NMCA > 0) #if defined(KGDB) || defined(DDB) /* NMI can be hooked up to a pushbutton for debugging */ printf ("NMI ... going to debugger\n"); @@ -787,6 +801,7 @@ copyfault: return; #endif /* NMCA > 0 */ #endif /* !defined(XEN) && (NISA > 0 || NMCA > 0) */ + ; /* avoid a label at end of compound statement */ } if ((type & T_USER) == 0) diff --git a/sys/arch/x86/include/tprof.h b/sys/arch/x86/include/tprof.h new file mode 100644 index 000000000000..5ffeaa143c85 --- /dev/null +++ b/sys/arch/x86/include/tprof.h @@ -0,0 +1,35 @@ +/* $NetBSD: tprof.h,v 1.1 2008/01/01 21:28:39 yamt Exp $ */ + +/*- + * Copyright (c)2008 YAMAMOTO Takashi, + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _X86_TPROF_H_ +#define _X86_TPROF_H_ + +struct trapframe; +int tprof_pmi_nmi(const struct trapframe *); + +#endif /* _X86_TPROF_H_ */ diff --git a/sys/arch/x86/x86/tprof_pmi.c b/sys/arch/x86/x86/tprof_pmi.c new file mode 100644 index 000000000000..aa203fbd379b --- /dev/null +++ b/sys/arch/x86/x86/tprof_pmi.c @@ -0,0 +1,228 @@ +/* $NetBSD: tprof_pmi.c,v 1.1 2008/01/01 21:28:39 yamt Exp $ */ + +/*- + * Copyright (c)2008 YAMAMOTO Takashi, + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__KERNEL_RCSID(0, "$NetBSD: tprof_pmi.c,v 1.1 2008/01/01 21:28:39 yamt Exp $"); + +#include +#include +#include + +#include +#include + +#include + +#include +#include /* PC_REGS */ +#include /* cpu_vendor */ +#include /* CPUVENDER_* */ +#include +#include + +#define ESCR_T1_USR __BIT(0) +#define ESCR_T1_OS __BIT(1) +#define ESCR_T0_USR __BIT(2) +#define ESCR_T0_OS __BIT(3) +#define ESCR_TAG_ENABLE __BIT(4) +#define ESCR_TAG_VALUE __BITS(5, 8) +#define ESCR_EVENT_MASK __BITS(9, 24) +#define ESCR_EVENT_SELECT __BITS(25, 30) + +#define CCCR_ENABLE __BIT(12) +#define CCCR_ESCR_SELECT __BITS(13, 15) +#define CCCR_MUST_BE_SET __BITS(16, 17) +#define CCCR_COMPARE __BIT(18) +#define CCCR_COMPLEMENT __BIT(19) +#define CCCR_THRESHOLD __BITS(20, 23) +#define CCCR_EDGE __BIT(24) +#define CCCR_FORCE_OVF __BIT(25) +#define CCCR_OVF_PMI_T0 __BIT(26) +#define CCCR_OVF_PMI_T1 __BIT(27) +#define CCCR_CASCADE __BIT(30) +#define CCCR_OVF __BIT(31) + +struct msrs { + u_int msr_cccr; + u_int msr_escr; + u_int msr_counter; +}; + +/* + * parameters (see 253669.pdf Table A-6) + * + * XXX should not hardcode + */ + +static const struct msrs msrs[] = { + { + .msr_cccr = 0x360, /* MSR_BPU_CCCR0 */ + .msr_escr = 0x3a2, /* MSR_FSB_ESCR0 */ + .msr_counter = 0x300, /* MSR_BPU_COUNTER0 */ + }, + { + .msr_cccr = 0x362, /* MSR_BPU_CCCR2 */ + .msr_escr = 0x3a3, /* MSR_FSB_ESCR1 */ + .msr_counter = 0x302, /* MSR_BPU_COUNTER2 */ + }, +}; +static const u_int cccr_escr_select = 0x6; /* MSR_FSB_ESCR? */ +static const u_int escr_event_select = 0x13; /* global_power_events */ +static const u_int escr_event_mask = 0x1; /* running */ + +static uint64_t counter_val = 5000000; +static uint64_t counter_reset_val; +static uint32_t tprof_pmi_lapic_saved[MAXCPUS]; + +static void +tprof_pmi_start_cpu(void *arg1, void *arg2) +{ + struct cpu_info * const ci = curcpu(); + const struct msrs *msr; + uint64_t cccr; + uint64_t escr; + + if (ci->ci_smtid >= 2) { + printf("%s: ignoring %s smtid=%u", + __func__, ci->ci_dev->dv_xname, ci->ci_smtid); + return; + } + msr = &msrs[ci->ci_smtid]; + escr = __SHIFTIN(escr_event_mask, ESCR_EVENT_MASK) | + __SHIFTIN(escr_event_select, ESCR_EVENT_SELECT); + cccr = CCCR_ENABLE | __SHIFTIN(cccr_escr_select, __BITS(13, 15)) | + CCCR_MUST_BE_SET; + if (ci->ci_smtid == 0) { + escr |= ESCR_T0_OS; + cccr |= CCCR_OVF_PMI_T0; + } else { + escr |= ESCR_T1_OS; + cccr |= CCCR_OVF_PMI_T1; + } + + wrmsr(msr->msr_counter, counter_reset_val); + wrmsr(msr->msr_escr, escr); + wrmsr(msr->msr_cccr, cccr); + tprof_pmi_lapic_saved[cpu_index(ci)] = i82489_readreg(LAPIC_PCINT); + i82489_writereg(LAPIC_PCINT, LAPIC_DLMODE_NMI); +} + +static void +tprof_pmi_stop_cpu(void *arg1, void *arg2) +{ + struct cpu_info * const ci = curcpu(); + const struct msrs *msr; + + if (ci->ci_smtid >= 2) { + printf("%s: ignoring %s smtid=%u", + __func__, ci->ci_dev->dv_xname, ci->ci_smtid); + return; + } + msr = &msrs[ci->ci_smtid]; + + wrmsr(msr->msr_escr, 0); + wrmsr(msr->msr_cccr, 0); + i82489_writereg(LAPIC_PCINT, tprof_pmi_lapic_saved[cpu_index(ci)]); +} + +uint64_t +tprof_backend_estimate_freq(void) +{ + uint64_t cpufreq = curcpu()->ci_tsc_freq; + uint64_t freq = 10000; + + counter_val = cpufreq / freq; + if (counter_val == 0) { + counter_val = UINT64_C(4000000000) / freq; + return freq; + } + return freq; +} + +int +tprof_backend_start(void) +{ + struct cpu_info * const ci = curcpu(); + uint64_t xc; + + if (!(cpu_vendor == CPUVENDOR_INTEL && + CPUID2FAMILY(ci->ci_signature) == 15)) { + return ENOTSUP; + } + + counter_reset_val = - counter_val + 1; + xc = xc_broadcast(0, tprof_pmi_start_cpu, NULL, NULL); + xc_wait(xc); + + return 0; +} + +void +tprof_backend_stop(void) +{ + uint64_t xc; + + xc = xc_broadcast(0, tprof_pmi_stop_cpu, NULL, NULL); + xc_wait(xc); +} + +int +tprof_pmi_nmi(const struct trapframe *tf) +{ + struct cpu_info * const ci = curcpu(); + const struct msrs *msr; + uint32_t pcint; + uint64_t cccr; + + if (ci->ci_smtid >= 2) { + /* not ours */ + return 0; + } + msr = &msrs[ci->ci_smtid]; + + /* check if it's for us */ + cccr = rdmsr(msr->msr_cccr); + if ((cccr & CCCR_OVF) == 0) { + /* not ours */ + return 0; + } + + /* record a sample */ + tprof_sample(tf); + + /* reset counter */ + wrmsr(msr->msr_counter, counter_reset_val); + wrmsr(msr->msr_cccr, cccr & ~CCCR_OVF); + + /* unmask PMI */ + pcint = i82489_readreg(LAPIC_PCINT); + KASSERT((pcint & LAPIC_LVT_MASKED) != 0); + i82489_writereg(LAPIC_PCINT, pcint & ~LAPIC_LVT_MASKED); + + return 1; +} diff --git a/sys/conf/files b/sys/conf/files index 52d766e26b22..0d5e679dbc2c 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -1,4 +1,4 @@ -# $NetBSD: files,v 1.881 2007/12/31 15:32:09 ad Exp $ +# $NetBSD: files,v 1.882 2008/01/01 21:28:38 yamt Exp $ # @(#)files.newconf 7.5 (Berkeley) 5/10/93 @@ -1575,3 +1575,8 @@ include "dev/pad/files.pad" # Intel 386 Real Mode emulator # include "lib/libx86emu/files.x86emu" + +# +# "tprof" profiler. +# +include "dev/tprof/files.tprof" diff --git a/sys/conf/majors b/sys/conf/majors index 22a5335e9914..ce0f4a59e229 100644 --- a/sys/conf/majors +++ b/sys/conf/majors @@ -1,4 +1,4 @@ -# $NetBSD: majors,v 1.39 2007/12/24 15:12:05 ad Exp $ +# $NetBSD: majors,v 1.40 2008/01/01 21:28:39 yamt Exp $ # # Device majors for Machine-Independent drivers. # @@ -35,3 +35,4 @@ device-major twa char 187 twa device-major cpuctl char 188 device-major pad char 189 pad device-major zfs char 190 zfs +device-major tprof char 191 tprof diff --git a/sys/dev/tprof/files.tprof b/sys/dev/tprof/files.tprof new file mode 100644 index 000000000000..a45dca52e0b2 --- /dev/null +++ b/sys/dev/tprof/files.tprof @@ -0,0 +1,4 @@ +# $NetBSD: files.tprof,v 1.1 2008/01/01 21:28:37 yamt Exp $ + +defpseudo tprof +file dev/tprof/tprof.c tprof diff --git a/sys/dev/tprof/tprof.c b/sys/dev/tprof/tprof.c new file mode 100644 index 000000000000..998e713cb387 --- /dev/null +++ b/sys/dev/tprof/tprof.c @@ -0,0 +1,529 @@ +/* $NetBSD: tprof.c,v 1.1 2008/01/01 21:28:37 yamt Exp $ */ + +/*- + * Copyright (c)2008 YAMAMOTO Takashi, + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__KERNEL_RCSID(0, "$NetBSD: tprof.c,v 1.1 2008/01/01 21:28:37 yamt Exp $"); + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include /* PC_REGS */ + +typedef struct { + uintptr_t s_pc; /* program counter */ +} tprof_sample_t; + +typedef struct tprof_buf { + u_int b_used; + u_int b_size; + u_int b_overflow; + u_int b_unused; + STAILQ_ENTRY(tprof_buf) b_list; + tprof_sample_t b_data[]; +} tprof_buf_t; +#define TPROF_BUF_BYTESIZE(sz) \ + (sizeof(tprof_buf_t) + (sz) * sizeof(tprof_sample_t)) +#define TPROF_MAX_SAMPLES_PER_BUF 10000 + +#define TPROF_MAX_BUF 100 + +typedef struct { + tprof_buf_t *c_buf; + struct work c_work; + callout_t c_callout; +} __aligned(CACHE_LINE_SIZE) tprof_cpu_t; + +static kmutex_t tprof_lock; +static bool tprof_running; +static u_int tprof_nworker; +static lwp_t *tprof_owner; +static STAILQ_HEAD(, tprof_buf) tprof_list; +static u_int tprof_nbuf_on_list; +static struct workqueue *tprof_wq; +static tprof_cpu_t tprof_cpus[MAXCPUS] __aligned(CACHE_LINE_SIZE); +static u_int tprof_samples_per_buf; + +static kmutex_t tprof_reader_lock; +static kcondvar_t tprof_reader_cv; +static off_t tprof_reader_offset; + +static kmutex_t tprof_startstop_lock; +static kcondvar_t tprof_cv; + +static struct tprof_stat tprof_stat; + +static tprof_cpu_t * +tprof_cpu(struct cpu_info *ci) +{ + + return &tprof_cpus[cpu_index(ci)]; +} + +static tprof_cpu_t * +tprof_curcpu(void) +{ + + return tprof_cpu(curcpu()); +} + +static tprof_buf_t * +tprof_buf_alloc(void) +{ + tprof_buf_t *new; + u_int size = tprof_samples_per_buf; + + new = kmem_alloc(TPROF_BUF_BYTESIZE(size), KM_SLEEP); + new->b_used = 0; + new->b_size = size; + new->b_overflow = 0; + return new; +} + +static void +tprof_buf_free(tprof_buf_t *buf) +{ + + kmem_free(buf, TPROF_BUF_BYTESIZE(buf->b_size)); +} + +static tprof_buf_t * +tprof_buf_switch(tprof_cpu_t *c, tprof_buf_t *new) +{ + tprof_buf_t *old; + + old = c->c_buf; + c->c_buf = new; + return old; +} + +static tprof_buf_t * +tprof_buf_refresh(void) +{ + tprof_cpu_t * const c = tprof_curcpu(); + tprof_buf_t *new; + + new = tprof_buf_alloc(); + return tprof_buf_switch(c, new); +} + +static void +tprof_worker(struct work *wk, void *dummy) +{ + tprof_cpu_t * const c = tprof_curcpu(); + tprof_buf_t *buf; + bool shouldstop; + + KASSERT(wk == &c->c_work); + KASSERT(dummy == NULL); + + /* + * get a per cpu buffer. + */ + buf = tprof_buf_refresh(); + + /* + * and put it on the global list for read(2). + */ + mutex_enter(&tprof_lock); + shouldstop = !tprof_running; + if (shouldstop) { + KASSERT(tprof_nworker > 0); + tprof_nworker--; + cv_broadcast(&tprof_cv); + cv_broadcast(&tprof_reader_cv); + } + if (buf->b_used == 0) { + tprof_stat.ts_emptybuf++; + } else if (tprof_nbuf_on_list < TPROF_MAX_BUF) { + tprof_stat.ts_sample += buf->b_used; + tprof_stat.ts_overflow += buf->b_overflow; + tprof_stat.ts_buf++; + STAILQ_INSERT_TAIL(&tprof_list, buf, b_list); + tprof_nbuf_on_list++; + buf = NULL; + cv_broadcast(&tprof_reader_cv); + } else { + tprof_stat.ts_dropbuf_sample += buf->b_used; + tprof_stat.ts_dropbuf++; + } + mutex_exit(&tprof_lock); + if (buf) { + tprof_buf_free(buf); + } + if (!shouldstop) { + callout_schedule(&c->c_callout, hz); + } +} + +static void +tprof_kick(void *vp) +{ + struct cpu_info * const ci = vp; + tprof_cpu_t * const c = tprof_cpu(ci); + + workqueue_enqueue(tprof_wq, &c->c_work, ci); +} + +static void +tprof_stop1(void) +{ + CPU_INFO_ITERATOR cii; + struct cpu_info *ci; + + KASSERT(mutex_owned(&tprof_startstop_lock)); + + for (CPU_INFO_FOREACH(cii, ci)) { + tprof_cpu_t * const c = tprof_cpu(ci); + tprof_buf_t *old; + + old = tprof_buf_switch(c, NULL); + if (old != NULL) { + tprof_buf_free(old); + } + callout_destroy(&c->c_callout); + } + workqueue_destroy(tprof_wq); +} + +static int +tprof_start(const struct tprof_param *param) +{ + CPU_INFO_ITERATOR cii; + struct cpu_info *ci; + int error; + uint64_t freq; + + KASSERT(mutex_owned(&tprof_startstop_lock)); + if (tprof_running) { + error = EBUSY; + goto done; + } + + freq = tprof_backend_estimate_freq(); + tprof_samples_per_buf = MIN(freq * 2, TPROF_MAX_SAMPLES_PER_BUF); + + error = workqueue_create(&tprof_wq, "tprofmv", tprof_worker, NULL, + PRI_NONE, PRI_SOFTCLOCK, WQ_MPSAFE | WQ_PERCPU); + if (error != 0) { + goto done; + } + + for (CPU_INFO_FOREACH(cii, ci)) { + tprof_cpu_t * const c = tprof_cpu(ci); + tprof_buf_t *new; + tprof_buf_t *old; + + new = tprof_buf_alloc(); + old = tprof_buf_switch(c, new); + if (old != NULL) { + tprof_buf_free(old); + } + callout_init(&c->c_callout, CALLOUT_MPSAFE); + callout_setfunc(&c->c_callout, tprof_kick, ci); + } + + error = tprof_backend_start(); + if (error != 0) { + tprof_stop1(); + goto done; + } + + mutex_enter(&tprof_lock); + tprof_running = true; + mutex_exit(&tprof_lock); + for (CPU_INFO_FOREACH(cii, ci)) { + tprof_cpu_t * const c = tprof_cpu(ci); + + mutex_enter(&tprof_lock); + tprof_nworker++; + mutex_exit(&tprof_lock); + workqueue_enqueue(tprof_wq, &c->c_work, ci); + } +done: + return error; +} + +static void +tprof_stop(void) +{ + CPU_INFO_ITERATOR cii; + struct cpu_info *ci; + + KASSERT(mutex_owned(&tprof_startstop_lock)); + if (!tprof_running) { + goto done; + } + + tprof_backend_stop(); + + mutex_enter(&tprof_lock); + tprof_running = false; + cv_broadcast(&tprof_reader_cv); + mutex_exit(&tprof_lock); + + for (CPU_INFO_FOREACH(cii, ci)) { + mutex_enter(&tprof_lock); + while (tprof_nworker > 0) { + cv_wait(&tprof_cv, &tprof_lock); + } + mutex_exit(&tprof_lock); + } + + tprof_stop1(); +done: + ; +} + +static void +tprof_clear(void) +{ + tprof_buf_t *buf; + + mutex_enter(&tprof_reader_lock); + mutex_enter(&tprof_lock); + while ((buf = STAILQ_FIRST(&tprof_list)) != NULL) { + if (buf != NULL) { + STAILQ_REMOVE_HEAD(&tprof_list, b_list); + KASSERT(tprof_nbuf_on_list > 0); + tprof_nbuf_on_list--; + mutex_exit(&tprof_lock); + tprof_buf_free(buf); + mutex_enter(&tprof_lock); + } + } + KASSERT(tprof_nbuf_on_list == 0); + mutex_exit(&tprof_lock); + tprof_reader_offset = 0; + mutex_exit(&tprof_reader_lock); + + memset(&tprof_stat, 0, sizeof(tprof_stat)); +} + +/* -------------------- backend interfaces */ + +/* + * tprof_sample: record a sample on the per-cpu buffer. + * + * be careful; can be called in NMI context. + * we are assuming that curcpu() is safe. + */ + +void +tprof_sample(const struct trapframe *tf) +{ + tprof_cpu_t * const c = tprof_curcpu(); + tprof_buf_t * const buf = c->c_buf; + const uintptr_t pc = PC_REGS(tf); + u_int idx; + + idx = buf->b_used; + if (__predict_false(idx >= buf->b_size)) { + buf->b_overflow++; + return; + } + buf->b_data[idx].s_pc = pc; + buf->b_used = idx + 1; +} + +/* -------------------- cdevsw interfaces */ + +void tprofattach(int); + +static int +tprof_open(dev_t dev, int flags, int type, struct lwp *l) +{ + + if (minor(dev) != 0) { + return EXDEV; + } + mutex_enter(&tprof_lock); + if (tprof_owner != NULL) { + mutex_exit(&tprof_lock); + return EBUSY; + } + tprof_owner = curlwp; + mutex_exit(&tprof_lock); + + return 0; +} + +static int +tprof_close(dev_t dev, int flags, int type, struct lwp *l) +{ + + KASSERT(minor(dev) == 0); + + mutex_enter(&tprof_startstop_lock); + mutex_enter(&tprof_lock); + tprof_owner = NULL; + mutex_exit(&tprof_lock); + tprof_stop(); + tprof_clear(); + mutex_exit(&tprof_startstop_lock); + + return 0; +} + +static int +tprof_read(dev_t dev, struct uio *uio, int flags) +{ + tprof_buf_t *buf; + size_t bytes; + size_t resid; + size_t done; + int error = 0; + + KASSERT(minor(dev) == 0); + mutex_enter(&tprof_reader_lock); + while (uio->uio_resid > 0 && error == 0) { + /* + * take the first buffer from the list. + */ + mutex_enter(&tprof_lock); + buf = STAILQ_FIRST(&tprof_list); + if (buf == NULL) { + if (tprof_nworker == 0) { + mutex_exit(&tprof_lock); + error = 0; + break; + } + mutex_exit(&tprof_reader_lock); + error = cv_wait_sig(&tprof_reader_cv, &tprof_lock); + mutex_exit(&tprof_lock); + mutex_enter(&tprof_reader_lock); + continue; + } + STAILQ_REMOVE_HEAD(&tprof_list, b_list); + KASSERT(tprof_nbuf_on_list > 0); + tprof_nbuf_on_list--; + mutex_exit(&tprof_lock); + + /* + * copy it out. + */ + bytes = MIN(buf->b_used * sizeof(tprof_sample_t) - + tprof_reader_offset, uio->uio_resid); + resid = uio->uio_resid; + error = uiomove((char *)buf->b_data + tprof_reader_offset, + bytes, uio); + done = resid - uio->uio_resid; + tprof_reader_offset += done; + + /* + * if we didn't consume the whole buffer, + * put it back to the list. + */ + if (tprof_reader_offset < + buf->b_used * sizeof(tprof_sample_t)) { + mutex_enter(&tprof_lock); + STAILQ_INSERT_HEAD(&tprof_list, buf, b_list); + tprof_nbuf_on_list++; + cv_broadcast(&tprof_reader_cv); + mutex_exit(&tprof_lock); + } else { + tprof_buf_free(buf); + tprof_reader_offset = 0; + } + } + mutex_exit(&tprof_reader_lock); + + return error; +} + +static int +tprof_ioctl(dev_t dev, u_long cmd, void *data, int flags, struct lwp *l) +{ + const struct tprof_param *param; + int error = 0; + + KASSERT(minor(dev) == 0); + + switch (cmd) { + case TPROF_IOC_GETVERSION: + *(int *)data = TPROF_VERSION; + break; + case TPROF_IOC_START: + param = data; + mutex_enter(&tprof_startstop_lock); + error = tprof_start(param); + mutex_exit(&tprof_startstop_lock); + break; + case TPROF_IOC_STOP: + mutex_enter(&tprof_startstop_lock); + tprof_stop(); + mutex_exit(&tprof_startstop_lock); + break; + case TPROF_IOC_GETSTAT: + mutex_enter(&tprof_lock); + memcpy(data, &tprof_stat, sizeof(tprof_stat)); + mutex_exit(&tprof_lock); + break; + default: + error = EINVAL; + break; + } + + return error; +} + +const struct cdevsw tprof_cdevsw = { + .d_open = tprof_open, + .d_close = tprof_close, + .d_read = tprof_read, + .d_write = nowrite, + .d_ioctl = tprof_ioctl, + .d_stop = nostop, + .d_tty = notty, + .d_poll = nopoll, + .d_mmap = nommap, + .d_kqfilter = nokqfilter, + .d_flag = D_OTHER | D_MPSAFE, +}; + +void +tprofattach(int nunits) +{ + + mutex_init(&tprof_lock, MUTEX_DEFAULT, IPL_NONE); + mutex_init(&tprof_reader_lock, MUTEX_DEFAULT, IPL_NONE); + mutex_init(&tprof_startstop_lock, MUTEX_DEFAULT, IPL_NONE); + cv_init(&tprof_cv, "tprof"); + cv_init(&tprof_reader_cv, "tprofread"); + STAILQ_INIT(&tprof_list); +} diff --git a/sys/dev/tprof/tprof.h b/sys/dev/tprof/tprof.h new file mode 100644 index 000000000000..0af36957a203 --- /dev/null +++ b/sys/dev/tprof/tprof.h @@ -0,0 +1,39 @@ +/* $NetBSD: tprof.h,v 1.1 2008/01/01 21:28:38 yamt Exp $ */ + +/*- + * Copyright (c)2008 YAMAMOTO Takashi, + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _DEV_TPROF_TPROF_H_ +#define _DEV_TPROF_TPROF_H_ + +uint64_t tprof_backend_estimate_freq(void); +int tprof_backend_start(void); +void tprof_backend_stop(void); + +struct trapframe; +void tprof_sample(const struct trapframe *); + +#endif /* _DEV_TPROF_TPROF_H_ */ diff --git a/sys/dev/tprof/tprof_ioctl.h b/sys/dev/tprof/tprof_ioctl.h new file mode 100644 index 000000000000..2cb6d2c82e5e --- /dev/null +++ b/sys/dev/tprof/tprof_ioctl.h @@ -0,0 +1,55 @@ +/* $NetBSD: tprof_ioctl.h,v 1.1 2008/01/01 21:28:38 yamt Exp $ */ + +/*- + * Copyright (c)2008 YAMAMOTO Takashi, + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _DEV_TPROF_TPROF_IOCTL_H_ +#define _DEV_TPROF_TPROF_IOCTL_H_ + +#include + +#define TPROF_VERSION 1 + +#define TPROF_IOC_GETVERSION _IOR('T', 1, int) + +struct tprof_param { + int dummy; +}; +#define TPROF_IOC_START _IOW('T', 2, struct tprof_param) + +#define TPROF_IOC_STOP _IO('T', 3) + +struct tprof_stat { + uint64_t ts_sample; /* samples successfully recorded */ + uint64_t ts_overflow; /* samples dropped due to overflow */ + uint64_t ts_buf; /* buffers successfully queued for read(2) */ + uint64_t ts_emptybuf; /* empty buffers dropped */ + uint64_t ts_dropbuf; /* buffers dropped due to the global limit */ + uint64_t ts_dropbuf_sample; /* samples dropped with ts_dropbuf */ +}; +#define TPROF_IOC_GETSTAT _IOR('T', 4, struct tprof_stat) + +#endif /* _DEV_TPROF_TPROF_IOCTL_H_ */