Revamp tprof.

Rewrite the Intel backend to use the generic PMC interface, which is
available on all Intel CPUs. Synchronize the AMD backend with the new
interface.

The kernel identifies the PMC interface, and gives its id to userland.
Userland then queries the events itself (via cpuid etc). These events
depend on the PMC interface.

The tprof utility is rewritten to allow the user to choose which event
to count (which was not possible until now, the event was hardcoded in
the backend). The command line format is based on usr.bin/pmc, eg:

	tprof -e llc-misses:k -o output sleep 20

The man page is updated too, but the arguments will likely change soon
anyway so it doesn't matter a lot.

The tprof utility has three tables:

	Intel Architectural Version 1
	Intel Skylake/Kabylake
	AMD Family 10h

A CPU can support a combination of tables. For example Kabylake has
Intel-Architectural-Version-1 and its own Intel-Kabylake table.

For now the Intel Skylake/Kabylake table contains only one event, just
to demonstrate that the combination of tables works. Tested on an
Intel Core i5 Kabylake.

The code for AMD Family 10h is taken from the code I had written for
usr.bin/pmc. I haven't tested it yet, but it's the same as pmc(1), so
I guess it works as-is.

The whole thing is written in such a way that (I think) it is not
complicated to add more CPU models, and more architectures (other than
x86).
This commit is contained in:
maxv 2018-07-13 07:56:29 +00:00
parent 96ef5eaedd
commit a087cb3c40
15 changed files with 827 additions and 457 deletions

View File

@ -1,6 +1,35 @@
/* $NetBSD: tprof_amdpmi.c,v 1.7 2017/05/23 08:54:39 nonaka Exp $ */
/* $NetBSD: tprof_amdpmi.c,v 1.8 2018/07/13 07:56:29 maxv Exp $ */
/*-
/*
* Copyright (c) 2018 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by Maxime Villard.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/*
* Copyright (c)2008,2009 YAMAMOTO Takashi,
* All rights reserved.
*
@ -27,7 +56,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: tprof_amdpmi.c,v 1.7 2017/05/23 08:54:39 nonaka Exp $");
__KERNEL_RCSID(0, "$NetBSD: tprof_amdpmi.c,v 1.8 2018/07/13 07:56:29 maxv Exp $");
#include <sys/param.h>
#include <sys/systm.h>
@ -78,66 +107,53 @@ __KERNEL_RCSID(0, "$NetBSD: tprof_amdpmi.c,v 1.7 2017/05/23 08:54:39 nonaka Exp
* http://developer.amd.com/wordpress/media/2012/10/Basic_Performance_Measurements.pdf
*/
/* Event flags - abbreviations as found in the documents */
#define CPU_clocks__EVENT 0x76
#define CPU_clocks__UNIT 0x00
#define DC_refills_L2__EVENT 0x42
#define DC_refills_L2__UNIT 0x1E
#define DC_refills_sys__EVENT 0x43
#define DC_refills_sys__UNIT 0x1E
/*
* Hardcode your counter here. There is no detection, so make sure it is
* supported by your CPU family.
*/
static uint32_t event = CPU_clocks__EVENT;
static uint32_t unit = CPU_clocks__UNIT;
static int ctrno = 0;
static uint64_t counter_val = 5000000;
static uint64_t counter_reset_val;
static uint32_t tprof_amdpmi_lapic_saved[MAXCPUS];
static nmi_handler_t *tprof_amdpmi_nmi_handle;
static tprof_backend_cookie_t *tprof_cookie;
static uint32_t amd_lapic_saved[MAXCPUS];
static nmi_handler_t *amd_nmi_handle;
static tprof_param_t amd_param;
static void
tprof_amdpmi_start_cpu(void *arg1, void *arg2)
tprof_amd_start_cpu(void *arg1, void *arg2)
{
struct cpu_info * const ci = curcpu();
uint64_t pesr;
uint64_t event_lo;
uint64_t event_hi;
event_hi = event >> 8;
event_lo = event & 0xff;
pesr = PESR_USR | PESR_OS | PESR_INT |
event_hi = amd_param.p_event >> 8;
event_lo = amd_param.p_event & 0xff;
pesr =
((amd_param.p_flags & TPROF_PARAM_USER) ? PESR_USR : 0) |
((amd_param.p_flags & TPROF_PARAM_KERN) ? PESR_OS : 0) |
PESR_INT |
__SHIFTIN(event_lo, PESR_EVENT_MASK_LO) |
__SHIFTIN(event_hi, PESR_EVENT_MASK_HI) |
__SHIFTIN(0, PESR_COUNTER_MASK) |
__SHIFTIN(unit, PESR_UNIT_MASK);
__SHIFTIN(amd_param.p_unit, PESR_UNIT_MASK);
wrmsr(PERFCTR(ctrno), counter_reset_val);
wrmsr(PERFEVTSEL(ctrno), pesr);
tprof_amdpmi_lapic_saved[cpu_index(ci)] = lapic_readreg(LAPIC_PCINT);
amd_lapic_saved[cpu_index(ci)] = lapic_readreg(LAPIC_PCINT);
lapic_writereg(LAPIC_PCINT, LAPIC_DLMODE_NMI);
wrmsr(PERFEVTSEL(ctrno), pesr | PESR_EN);
}
static void
tprof_amdpmi_stop_cpu(void *arg1, void *arg2)
tprof_amd_stop_cpu(void *arg1, void *arg2)
{
struct cpu_info * const ci = curcpu();
wrmsr(PERFEVTSEL(ctrno), 0);
lapic_writereg(LAPIC_PCINT, tprof_amdpmi_lapic_saved[cpu_index(ci)]);
lapic_writereg(LAPIC_PCINT, amd_lapic_saved[cpu_index(ci)]);
}
static int
tprof_amdpmi_nmi(const struct trapframe *tf, void *dummy)
tprof_amd_nmi(const struct trapframe *tf, void *dummy)
{
tprof_frame_info_t tfi;
uint64_t ctr;
@ -154,11 +170,11 @@ tprof_amdpmi_nmi(const struct trapframe *tf, void *dummy)
/* record a sample */
#if defined(__x86_64__)
tfi.tfi_pc = tf->tf_rip;
#else /* defined(__x86_64__) */
#else
tfi.tfi_pc = tf->tf_eip;
#endif /* defined(__x86_64__) */
#endif
tfi.tfi_inkernel = tfi.tfi_pc >= VM_MIN_KERNEL_ADDRESS;
tprof_sample(tprof_cookie, &tfi);
tprof_sample(NULL, &tfi);
/* reset counter */
wrmsr(PERFCTR(ctrno), counter_reset_val);
@ -167,7 +183,7 @@ tprof_amdpmi_nmi(const struct trapframe *tf, void *dummy)
}
static uint64_t
tprof_amdpmi_estimate_freq(void)
tprof_amd_estimate_freq(void)
{
uint64_t cpufreq = curcpu()->ci_data.cpu_cc_freq;
uint64_t freq = 10000;
@ -175,52 +191,66 @@ tprof_amdpmi_estimate_freq(void)
counter_val = cpufreq / freq;
if (counter_val == 0) {
counter_val = UINT64_C(4000000000) / freq;
return freq;
}
return freq;
}
static uint32_t
tprof_amd_ident(void)
{
struct cpu_info *ci = curcpu();
if (cpu_vendor != CPUVENDOR_AMD) {
return TPROF_IDENT_NONE;
}
switch (CPUID_TO_FAMILY(ci->ci_signature)) {
case 0x10:
return TPROF_IDENT_AMD_GENERIC;
}
return TPROF_IDENT_NONE;
}
static int
tprof_amdpmi_start(tprof_backend_cookie_t *cookie)
tprof_amd_start(const tprof_param_t *param)
{
uint64_t xc;
if (cpu_vendor != CPUVENDOR_AMD) {
if (tprof_amd_ident() == TPROF_IDENT_NONE) {
return ENOTSUP;
}
KASSERT(tprof_amdpmi_nmi_handle == NULL);
tprof_amdpmi_nmi_handle = nmi_establish(tprof_amdpmi_nmi, NULL);
KASSERT(amd_nmi_handle == NULL);
amd_nmi_handle = nmi_establish(tprof_amd_nmi, NULL);
counter_reset_val = - counter_val + 1;
xc = xc_broadcast(0, tprof_amdpmi_start_cpu, NULL, NULL);
xc_wait(xc);
memcpy(&amd_param, param, sizeof(*param));
KASSERT(tprof_cookie == NULL);
tprof_cookie = cookie;
xc = xc_broadcast(0, tprof_amd_start_cpu, NULL, NULL);
xc_wait(xc);
return 0;
}
static void
tprof_amdpmi_stop(tprof_backend_cookie_t *cookie)
tprof_amd_stop(const tprof_param_t *param)
{
uint64_t xc;
xc = xc_broadcast(0, tprof_amdpmi_stop_cpu, NULL, NULL);
xc = xc_broadcast(0, tprof_amd_stop_cpu, NULL, NULL);
xc_wait(xc);
KASSERT(tprof_amdpmi_nmi_handle != NULL);
KASSERT(tprof_cookie == cookie);
nmi_disestablish(tprof_amdpmi_nmi_handle);
tprof_amdpmi_nmi_handle = NULL;
tprof_cookie = NULL;
KASSERT(amd_nmi_handle != NULL);
nmi_disestablish(amd_nmi_handle);
amd_nmi_handle = NULL;
}
static const tprof_backend_ops_t tprof_amdpmi_ops = {
.tbo_estimate_freq = tprof_amdpmi_estimate_freq,
.tbo_start = tprof_amdpmi_start,
.tbo_stop = tprof_amdpmi_stop,
static const tprof_backend_ops_t tprof_amd_ops = {
.tbo_estimate_freq = tprof_amd_estimate_freq,
.tbo_ident = tprof_amd_ident,
.tbo_start = tprof_amd_start,
.tbo_stop = tprof_amd_stop,
};
MODULE(MODULE_CLASS_DRIVER, tprof_amdpmi, "tprof");
@ -231,7 +261,7 @@ tprof_amdpmi_modcmd(modcmd_t cmd, void *arg)
switch (cmd) {
case MODULE_CMD_INIT:
return tprof_backend_register("tprof_amd", &tprof_amdpmi_ops,
return tprof_backend_register("tprof_amd", &tprof_amd_ops,
TPROF_BACKEND_VERSION);
case MODULE_CMD_FINI:

View File

@ -1,6 +1,35 @@
/* $NetBSD: tprof_pmi.c,v 1.14 2017/05/23 08:54:39 nonaka Exp $ */
/* $NetBSD: tprof_pmi.c,v 1.15 2018/07/13 07:56:29 maxv Exp $ */
/*-
/*
* Copyright (c) 2018 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by Maxime Villard.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/*
* Copyright (c)2008,2009 YAMAMOTO Takashi,
* All rights reserved.
*
@ -27,7 +56,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: tprof_pmi.c,v 1.14 2017/05/23 08:54:39 nonaka Exp $");
__KERNEL_RCSID(0, "$NetBSD: tprof_pmi.c,v 1.15 2018/07/13 07:56:29 maxv Exp $");
#include <sys/param.h>
#include <sys/systm.h>
@ -46,157 +75,92 @@ __KERNEL_RCSID(0, "$NetBSD: tprof_pmi.c,v 1.14 2017/05/23 08:54:39 nonaka Exp $"
#include <x86/nmi.h>
#include <machine/cpufunc.h>
#include <machine/cputypes.h> /* CPUVENDER_* */
#include <machine/cputypes.h> /* CPUVENDOR_* */
#include <machine/cpuvar.h> /* cpu_vendor */
#include <machine/i82489reg.h>
#include <machine/i82489var.h>
#define ESCR_T1_USR __BIT(0)
#define ESCR_T1_OS __BIT(1)
#define ESCR_T0_USR __BIT(2)
#define ESCR_T0_OS __BIT(3)
#define ESCR_TAG_ENABLE __BIT(4)
#define ESCR_TAG_VALUE __BITS(5, 8)
#define ESCR_EVENT_MASK __BITS(9, 24)
#define ESCR_EVENT_SELECT __BITS(25, 30)
#define PERFEVTSEL_EVENT_SELECT __BITS(0, 7)
#define PERFEVTSEL_UNIT_MASK __BITS(8, 15)
#define PERFEVTSEL_USR __BIT(16)
#define PERFEVTSEL_OS __BIT(17)
#define PERFEVTSEL_E __BIT(18)
#define PERFEVTSEL_PC __BIT(19)
#define PERFEVTSEL_INT __BIT(20)
#define PERFEVTSEL_EN __BIT(22)
#define PERFEVTSEL_INV __BIT(23)
#define PERFEVTSEL_COUNTER_MASK __BITS(24, 31)
#define CCCR_ENABLE __BIT(12)
#define CCCR_ESCR_SELECT __BITS(13, 15)
#define CCCR_MUST_BE_SET __BITS(16, 17)
#define CCCR_COMPARE __BIT(18)
#define CCCR_COMPLEMENT __BIT(19)
#define CCCR_THRESHOLD __BITS(20, 23)
#define CCCR_EDGE __BIT(24)
#define CCCR_FORCE_OVF __BIT(25)
#define CCCR_OVF_PMI_T0 __BIT(26)
#define CCCR_OVF_PMI_T1 __BIT(27)
#define CCCR_CASCADE __BIT(30)
#define CCCR_OVF __BIT(31)
struct msrs {
u_int msr_cccr;
u_int msr_escr;
u_int msr_counter;
};
/*
* parameters (see 253669.pdf Table A-6)
*
* XXX should not hardcode
*/
static const struct msrs msrs[] = {
{
.msr_cccr = 0x360, /* MSR_BPU_CCCR0 */
.msr_escr = 0x3a2, /* MSR_FSB_ESCR0 */
.msr_counter = 0x300, /* MSR_BPU_COUNTER0 */
},
{
.msr_cccr = 0x362, /* MSR_BPU_CCCR2 */
.msr_escr = 0x3a3, /* MSR_FSB_ESCR1 */
.msr_counter = 0x302, /* MSR_BPU_COUNTER2 */
},
};
static const u_int cccr_escr_select = 0x6; /* MSR_FSB_ESCR? */
static const u_int escr_event_select = 0x13; /* global_power_events */
static const u_int escr_event_mask = 0x1; /* running */
#define CPUID_0A_VERSION __BITS(0, 7)
#define CPUID_0A_NCOUNTERS __BITS(8, 15)
#define CPUID_0A_BITWIDTH __BITS(16, 23)
static uint64_t counter_bitwidth;
static uint64_t counter_val = 5000000;
static uint64_t counter_reset_val;
static uint32_t tprof_pmi_lapic_saved[MAXCPUS];
static nmi_handler_t *tprof_pmi_nmi_handle;
static tprof_backend_cookie_t *tprof_cookie;
static uint32_t intel_lapic_saved[MAXCPUS];
static nmi_handler_t *intel_nmi_handle;
static tprof_param_t intel_param;
static void
tprof_pmi_start_cpu(void *arg1, void *arg2)
tprof_intel_start_cpu(void *arg1, void *arg2)
{
struct cpu_info * const ci = curcpu();
const struct msrs *msr;
uint64_t cccr;
uint64_t escr;
uint64_t evtval;
if (ci->ci_smt_id >= 2) {
printf("%s: ignoring %s smt id=%u",
__func__, device_xname(ci->ci_dev),
(u_int)ci->ci_smt_id);
return;
}
msr = &msrs[ci->ci_smt_id];
escr = __SHIFTIN(escr_event_mask, ESCR_EVENT_MASK) |
__SHIFTIN(escr_event_select, ESCR_EVENT_SELECT);
cccr = CCCR_ENABLE | __SHIFTIN(cccr_escr_select, CCCR_ESCR_SELECT) |
CCCR_MUST_BE_SET;
if (ci->ci_smt_id == 0) {
escr |= ESCR_T0_OS | ESCR_T0_USR;
cccr |= CCCR_OVF_PMI_T0;
} else {
escr |= ESCR_T1_OS | ESCR_T0_USR;
cccr |= CCCR_OVF_PMI_T1;
}
evtval =
__SHIFTIN(intel_param.p_event, PERFEVTSEL_EVENT_SELECT) |
__SHIFTIN(intel_param.p_unit, PERFEVTSEL_UNIT_MASK) |
((intel_param.p_flags & TPROF_PARAM_USER) ? PERFEVTSEL_USR : 0) |
((intel_param.p_flags & TPROF_PARAM_KERN) ? PERFEVTSEL_OS : 0) |
PERFEVTSEL_INT |
PERFEVTSEL_EN;
wrmsr(msr->msr_counter, counter_reset_val);
wrmsr(msr->msr_escr, escr);
wrmsr(msr->msr_cccr, cccr);
tprof_pmi_lapic_saved[cpu_index(ci)] = lapic_readreg(LAPIC_PCINT);
wrmsr(MSR_PERFCTR0, counter_reset_val);
wrmsr(MSR_EVNTSEL0, evtval);
intel_lapic_saved[cpu_index(ci)] = lapic_readreg(LAPIC_PCINT);
lapic_writereg(LAPIC_PCINT, LAPIC_DLMODE_NMI);
}
static void
tprof_pmi_stop_cpu(void *arg1, void *arg2)
tprof_intel_stop_cpu(void *arg1, void *arg2)
{
struct cpu_info * const ci = curcpu();
const struct msrs *msr;
if (ci->ci_smt_id >= 2) {
printf("%s: ignoring %s smt id=%u",
__func__, device_xname(ci->ci_dev),
(u_int)ci->ci_smt_id);
return;
}
msr = &msrs[ci->ci_smt_id];
wrmsr(MSR_EVNTSEL0, 0);
wrmsr(MSR_PERFCTR0, 0);
wrmsr(msr->msr_escr, 0);
wrmsr(msr->msr_cccr, 0);
lapic_writereg(LAPIC_PCINT, tprof_pmi_lapic_saved[cpu_index(ci)]);
lapic_writereg(LAPIC_PCINT, intel_lapic_saved[cpu_index(ci)]);
}
static int
tprof_pmi_nmi(const struct trapframe *tf, void *dummy)
tprof_intel_nmi(const struct trapframe *tf, void *dummy)
{
struct cpu_info * const ci = curcpu();
const struct msrs *msr;
uint32_t pcint;
uint64_t cccr;
uint64_t ctr;
tprof_frame_info_t tfi;
KASSERT(dummy == NULL);
if (ci->ci_smt_id >= 2) {
/* not ours */
return 0;
}
msr = &msrs[ci->ci_smt_id];
/* check if it's for us */
cccr = rdmsr(msr->msr_cccr);
if ((cccr & CCCR_OVF) == 0) {
/* not ours */
ctr = rdmsr(MSR_PERFCTR0);
/* If the highest bit is non zero, then it's not for us. */
if ((ctr & __BIT(counter_bitwidth-1)) != 0) {
return 0;
}
/* record a sample */
#if defined(__x86_64__)
tfi.tfi_pc = tf->tf_rip;
#else /* defined(__x86_64__) */
#else
tfi.tfi_pc = tf->tf_eip;
#endif /* defined(__x86_64__) */
#endif
tfi.tfi_inkernel = tfi.tfi_pc >= VM_MIN_KERNEL_ADDRESS;
tprof_sample(tprof_cookie, &tfi);
tprof_sample(NULL, &tfi);
/* reset counter */
wrmsr(msr->msr_counter, counter_reset_val);
wrmsr(msr->msr_cccr, cccr & ~CCCR_OVF);
wrmsr(MSR_PERFCTR0, counter_reset_val);
/* unmask PMI */
pcint = lapic_readreg(LAPIC_PCINT);
@ -207,7 +171,7 @@ tprof_pmi_nmi(const struct trapframe *tf, void *dummy)
}
static uint64_t
tprof_pmi_estimate_freq(void)
tprof_intel_estimate_freq(void)
{
uint64_t cpufreq = curcpu()->ci_data.cpu_cc_freq;
uint64_t freq = 10000;
@ -215,54 +179,74 @@ tprof_pmi_estimate_freq(void)
counter_val = cpufreq / freq;
if (counter_val == 0) {
counter_val = UINT64_C(4000000000) / freq;
return freq;
}
return freq;
}
static int
tprof_pmi_start(tprof_backend_cookie_t *cookie)
static uint32_t
tprof_intel_ident(void)
{
uint32_t descs[4];
if (cpu_vendor != CPUVENDOR_INTEL) {
return TPROF_IDENT_NONE;
}
if (cpuid_level < 0x0A) {
return TPROF_IDENT_NONE;
}
x86_cpuid(0x0A, descs);
if ((descs[0] & CPUID_0A_VERSION) == 0) {
return TPROF_IDENT_NONE;
}
if ((descs[0] & CPUID_0A_NCOUNTERS) == 0) {
return TPROF_IDENT_NONE;
}
counter_bitwidth = __SHIFTOUT(descs[0], CPUID_0A_BITWIDTH);
return TPROF_IDENT_INTEL_GENERIC;
}
static int
tprof_intel_start(const tprof_param_t *param)
{
struct cpu_info * const ci = curcpu();
uint64_t xc;
if (!(cpu_vendor == CPUVENDOR_INTEL &&
CPUID_TO_BASEFAMILY(ci->ci_signature) == 15)) {
if (tprof_intel_ident() == TPROF_IDENT_NONE) {
return ENOTSUP;
}
KASSERT(tprof_pmi_nmi_handle == NULL);
tprof_pmi_nmi_handle = nmi_establish(tprof_pmi_nmi, NULL);
KASSERT(intel_nmi_handle == NULL);
intel_nmi_handle = nmi_establish(tprof_intel_nmi, NULL);
counter_reset_val = - counter_val + 1;
xc = xc_broadcast(0, tprof_pmi_start_cpu, NULL, NULL);
xc_wait(xc);
memcpy(&intel_param, param, sizeof(*param));
KASSERT(tprof_cookie == NULL);
tprof_cookie = cookie;
xc = xc_broadcast(0, tprof_intel_start_cpu, NULL, NULL);
xc_wait(xc);
return 0;
}
static void
tprof_pmi_stop(tprof_backend_cookie_t *cookie)
tprof_intel_stop(const tprof_param_t *param)
{
uint64_t xc;
xc = xc_broadcast(0, tprof_pmi_stop_cpu, NULL, NULL);
xc = xc_broadcast(0, tprof_intel_stop_cpu, NULL, NULL);
xc_wait(xc);
KASSERT(tprof_pmi_nmi_handle != NULL);
KASSERT(tprof_cookie == cookie);
nmi_disestablish(tprof_pmi_nmi_handle);
tprof_pmi_nmi_handle = NULL;
tprof_cookie = NULL;
KASSERT(intel_nmi_handle != NULL);
nmi_disestablish(intel_nmi_handle);
intel_nmi_handle = NULL;
}
static const tprof_backend_ops_t tprof_pmi_ops = {
.tbo_estimate_freq = tprof_pmi_estimate_freq,
.tbo_start = tprof_pmi_start,
.tbo_stop = tprof_pmi_stop,
static const tprof_backend_ops_t tprof_intel_ops = {
.tbo_estimate_freq = tprof_intel_estimate_freq,
.tbo_ident = tprof_intel_ident,
.tbo_start = tprof_intel_start,
.tbo_stop = tprof_intel_stop,
};
MODULE(MODULE_CLASS_DRIVER, tprof_pmi, "tprof");
@ -273,7 +257,7 @@ tprof_pmi_modcmd(modcmd_t cmd, void *arg)
switch (cmd) {
case MODULE_CMD_INIT:
return tprof_backend_register("tprof_pmi", &tprof_pmi_ops,
return tprof_backend_register("tprof_pmi", &tprof_intel_ops,
TPROF_BACKEND_VERSION);
case MODULE_CMD_FINI:

View File

@ -1,4 +1,4 @@
/* $NetBSD: tprof.c,v 1.13 2015/08/20 14:40:18 christos Exp $ */
/* $NetBSD: tprof.c,v 1.14 2018/07/13 07:56:29 maxv Exp $ */
/*-
* Copyright (c)2008,2009,2010 YAMAMOTO Takashi,
@ -27,7 +27,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: tprof.c,v 1.13 2015/08/20 14:40:18 christos Exp $");
__KERNEL_RCSID(0, "$NetBSD: tprof.c,v 1.14 2018/07/13 07:56:29 maxv Exp $");
#include <sys/param.h>
#include <sys/systm.h>
@ -247,8 +247,22 @@ tprof_stop1(void)
workqueue_destroy(tprof_wq);
}
static void
tprof_getinfo(struct tprof_info *info)
{
tprof_backend_t *tb;
KASSERT(mutex_owned(&tprof_startstop_lock));
memset(info, 0, sizeof(*info));
info->ti_version = TPROF_VERSION;
if ((tb = tprof_backend) != NULL) {
info->ti_ident = tb->tb_ops->tbo_ident();
}
}
static int
tprof_start(const struct tprof_param *param)
tprof_start(const tprof_param_t *param)
{
CPU_INFO_ITERATOR cii;
struct cpu_info *ci;
@ -296,7 +310,7 @@ tprof_start(const struct tprof_param *param)
callout_setfunc(&c->c_callout, tprof_kick, ci);
}
error = tb->tb_ops->tbo_start(NULL);
error = tb->tb_ops->tbo_start(param);
if (error != 0) {
KASSERT(tb->tb_usecount > 0);
tb->tb_usecount--;
@ -404,7 +418,7 @@ tprof_backend_lookup(const char *name)
*/
void
tprof_sample(tprof_backend_cookie_t *cookie, const tprof_frame_info_t *tfi)
tprof_sample(void *unused, const tprof_frame_info_t *tfi)
{
tprof_cpu_t * const c = tprof_curcpu();
tprof_buf_t * const buf = c->c_buf;
@ -608,14 +622,16 @@ tprof_read(dev_t dev, struct uio *uio, int flags)
static int
tprof_ioctl(dev_t dev, u_long cmd, void *data, int flags, struct lwp *l)
{
const struct tprof_param *param;
const tprof_param_t *param;
int error = 0;
KASSERT(minor(dev) == 0);
switch (cmd) {
case TPROF_IOC_GETVERSION:
*(int *)data = TPROF_VERSION;
case TPROF_IOC_GETINFO:
mutex_enter(&tprof_startstop_lock);
tprof_getinfo(data);
mutex_exit(&tprof_startstop_lock);
break;
case TPROF_IOC_START:
param = data;

View File

@ -1,4 +1,4 @@
/* $NetBSD: tprof.h,v 1.5 2011/02/05 14:04:40 yamt Exp $ */
/* $NetBSD: tprof.h,v 1.6 2018/07/13 07:56:29 maxv Exp $ */
/*-
* Copyright (c)2008,2009,2010 YAMAMOTO Takashi,
@ -37,12 +37,11 @@
#include <dev/tprof/tprof_types.h>
typedef struct tprof_backend_cookie tprof_backend_cookie_t;
typedef struct tprof_backend_ops {
uint64_t (*tbo_estimate_freq)(void); /* samples per second */
int (*tbo_start)(tprof_backend_cookie_t *);
void (*tbo_stop)(tprof_backend_cookie_t *);
uint32_t (*tbo_ident)(void);
int (*tbo_start)(const tprof_param_t *);
void (*tbo_stop)(const tprof_param_t *);
} tprof_backend_ops_t;
#define TPROF_BACKEND_VERSION 3
@ -54,6 +53,6 @@ typedef struct {
bool tfi_inkernel; /* if tfi_pc is in the kernel address space */
} tprof_frame_info_t;
void tprof_sample(tprof_backend_cookie_t *, const tprof_frame_info_t *);
void tprof_sample(void *, const tprof_frame_info_t *);
#endif /* _DEV_TPROF_TPROF_H_ */

View File

@ -1,4 +1,4 @@
/* $NetBSD: tprof_ioctl.h,v 1.3 2011/04/14 16:23:59 yamt Exp $ */
/* $NetBSD: tprof_ioctl.h,v 1.4 2018/07/13 07:56:29 maxv Exp $ */
/*-
* Copyright (c)2008,2010 YAMAMOTO Takashi,
@ -37,14 +37,15 @@
#include <dev/tprof/tprof_types.h>
#define TPROF_VERSION 3 /* kernel-userland ABI version */
#define TPROF_VERSION 4 /* kernel-userland ABI version */
#define TPROF_IOC_GETVERSION _IOR('T', 1, int)
struct tprof_param {
int dummy;
struct tprof_info {
uint32_t ti_version;
uint32_t ti_ident;
};
#define TPROF_IOC_START _IOW('T', 2, struct tprof_param)
#define TPROF_IOC_GETINFO _IOR('T', 1, struct tprof_info)
#define TPROF_IOC_START _IOW('T', 2, tprof_param_t)
#define TPROF_IOC_STOP _IO('T', 3)

View File

@ -1,4 +1,4 @@
/* $NetBSD: tprof_types.h,v 1.2 2011/04/14 16:23:59 yamt Exp $ */
/* $NetBSD: tprof_types.h,v 1.3 2018/07/13 07:56:29 maxv Exp $ */
/*-
* Copyright (c)2010,2011 YAMAMOTO Takashi,
@ -30,14 +30,14 @@
#define _DEV_TPROF_TPROF_TYPES_H_
/*
* definitions used by both of kernel and userland
* definitions used by both kernel and userland
*/
#if defined(_KERNEL)
#include <sys/types.h>
#else /* defined(_KERNEL) */
#else
#include <stdint.h>
#endif /* defined(_KERNEL) */
#endif
typedef struct {
uint32_t s_pid; /* process id */
@ -47,10 +47,22 @@ typedef struct {
uintptr_t s_pc; /* program counter */
} tprof_sample_t;
/*
* s_flags
*/
typedef struct tprof_param {
uint64_t p_event; /* event class */
uint64_t p_unit; /* unit within the event class */
uint64_t p_flags;
} tprof_param_t;
/* s_flags */
#define TPROF_SAMPLE_INKERNEL 1 /* s_pc is in kernel address space */
/* p_flags */
#define TPROF_PARAM_KERN 0x01
#define TPROF_PARAM_USER 0x02
/* ti_ident */
#define TPROF_IDENT_NONE 0x00
#define TPROF_IDENT_INTEL_GENERIC 0x01
#define TPROF_IDENT_AMD_GENERIC 0x02
#endif /* _DEV_TPROF_TPROF_TYPES_H_ */

View File

@ -1,7 +1,16 @@
# $NetBSD: Makefile,v 1.3 2011/11/26 05:02:44 yamt Exp $
# $NetBSD: Makefile,v 1.4 2018/07/13 07:56:29 maxv Exp $
.PATH: ${.CURDIR}/arch
PROG= tprof
MAN= tprof.8
SRCS= tprof.c
.if ${MACHINE_ARCH} == "i386" || ${MACHINE_ARCH} == "x86_64"
SRCS+= tprof_x86.c
.else
SRCS+= tprof_noarch.c
.endif
CPPFLAGS+= -I${NETBSDSRCDIR}/sys/

View File

@ -1,112 +0,0 @@
$NetBSD: README,v 1.9 2008/01/30 14:16:42 ad Exp $
NOTE:
- tprof driver currently only supports pentium4 (netburst) processors.
- it samples program counters on every PMIs.
- it's currently hardcoded to use global_power_events events.
for details, see x86/x86/tprof_pmi.c and intel's processor manuals.
usage:
0. set SIZEOF_PTR environment variable, which is used by tpfmt.sh and tpann.sh.
if not set, SIZEOF_PTR=4 is assumed.
1. add a line to your kernel config.
pseudo-device tprof
2. create a device special file.
# mknod /dev/tprof c 191 0
3. run the tprof command.
# tprof -o /tmp/foo sleep 1
tprof statistics:
sample 57
overflow 0
buf 3
emptybuf 3
dropbuf 0
dropbuf_sample 0
4. format the result.
the first line in the following example means that 11 samples have been
taken at 0xc0396c36, whose symbolic name is lapic_gettick+0x6.
# sh ./tpfmt.sh < /tmp/foo
11 c0396c36 lapic_gettick+0x6
5 c039b98a x86_pause+0x2
4 c010cf9d __cpu_simple_lock+0xd
2 c010cfcd __cpu_simple_lock_try+0xd
2 c039b571 bus_space_read_4+0x11
1 c01005c8 sse2_zero_page+0x18
1 c0100624 sse2_copy_page+0x34
1 c010ceeb mutex_spin_enter+0x2b
1 c010cef5 mutex_spin_enter+0x35
1 c010cf32 mutex_spin_exit+0x32
1 c0119ed0 in_localaddr+0x30
1 c012d0fd tcp_output+0x1fbd
1 c02980c2 amap_copy+0x42
1 c02a0100 uvm_map_lookup_entry_bytree+0x20
1 c02a27fe uvm_tree_RB_REMOVE+0xee
1 c02a8914 uvm_pagelookup+0x4
1 c02a9d5c uvm_pagefree+0xfc
1 c02a9e36 uvm_pagefree+0x1d6
1 c02dd9d1 _kernel_unlock+0xa1
1 c02e0285 mutex_vector_enter+0x15
1 c02eb83a sleepq_wake+0x5a
1 c0303467 pool_cache_get_paddr+0x97
1 c030368b pool_cache_put_slow+0x6b
1 c0321ed3 pffasttimo+0x33
1 c034547a VOP_LOCK+0xa
1 c0346235 VOP_ACCESS+0x45
1 c034a749 genfs_unlock+0x29
1 c038f251 cpu_idle+0x31
1 c03938da pmap_write_protect+0xaa
1 c0394305 pmap_do_remove+0x2e5
1 c03944b3 pmap_do_remove+0x493
1 c0396cdf lapic_delay+0x5f
1 c0396d19 lapic_delay+0x99
1 c0396d1d lapic_delay+0x9d
1 c0397429 lapic_clockintr+0x19
1 c039b984 x86_mwait+0xc
1 c042f66a _atomic_swap_32+0xa
5. tpann.sh is another formatter. it outputs "objdump -d" with numbers of
samples for each addresses.
# tprof -o /tmp/bar sleep 100
# sh ./tpann.sh < /tmp/bar
:
snip
:
c01005e0 <sse2_zero_page>:
4 c01005e0: 55 push %ebp
11 c01005e1: 89 e5 mov %esp,%ebp
1 c01005e3: 8b 54 24 08 mov 0x8(%esp),%edx
3 c01005e7: b9 00 10 00 00 mov $0x1000,%ecx
1 c01005ec: 31 c0 xor %eax,%eax
1 c01005ee: 89 f6 mov %esi,%esi
7936 c01005f0: 0f c3 42 00 movnti %eax,0x0(%edx)
6371 c01005f4: 0f c3 42 04 movnti %eax,0x4(%edx)
1220 c01005f8: 0f c3 42 08 movnti %eax,0x8(%edx)
741 c01005fc: 0f c3 42 0c movnti %eax,0xc(%edx)
1178 c0100600: 0f c3 42 10 movnti %eax,0x10(%edx)
1334 c0100604: 0f c3 42 14 movnti %eax,0x14(%edx)
976 c0100608: 0f c3 42 18 movnti %eax,0x18(%edx)
1299 c010060c: 0f c3 42 1c movnti %eax,0x1c(%edx)
954 c0100610: 83 e9 20 sub $0x20,%ecx
45 c0100613: 8d 52 20 lea 0x20(%edx),%edx
238 c0100616: 75 d8 jne c01005f0 <sse2_zero_page+0x10>
71 c0100618: 0f ae f8 sfence
297 c010061b: 5d pop %ebp
19 c010061c: c3 ret
0 c010061d: 8d 76 00 lea 0x0(%esi),%esi
:
snip
:

View File

@ -0,0 +1,63 @@
/* $NetBSD: tprof_noarch.c,v 1.1 2018/07/13 07:56:29 maxv Exp $ */
/*
* Copyright (c) 2018 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by Maxime Villard.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include <sys/cdefs.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>
#include <unistd.h>
#include <err.h>
#include <machine/specialreg.h>
#include <dev/tprof/tprof_ioctl.h>
#include "../tprof.h"
int tprof_event_init(uint32_t);
void tprof_event_list(void);
void tprof_event_lookup(const char *, struct tprof_param *);
int
tprof_event_init(uint32_t ident)
{
errx(EXIT_FAILURE, "architecture not supported");
}
void
tprof_event_list(void)
{
errx(EXIT_FAILURE, "architecture not supported");
}
void
tprof_event_lookup(const char *name, struct tprof_param *param)
{
errx(EXIT_FAILURE, "architecture not supported");
}

View File

@ -0,0 +1,358 @@
/* $NetBSD: tprof_x86.c,v 1.1 2018/07/13 07:56:29 maxv Exp $ */
/*
* Copyright (c) 2018 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by Maxime Villard.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include <sys/cdefs.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>
#include <unistd.h>
#include <err.h>
#include <machine/specialreg.h>
#include <dev/tprof/tprof_ioctl.h>
#include "../tprof.h"
int tprof_event_init(uint32_t);
void tprof_event_list(void);
void tprof_event_lookup(const char *, struct tprof_param *);
struct name_to_event {
const char *name;
uint64_t event;
uint64_t unit;
bool enabled;
};
struct event_table {
const char *tablename;
struct name_to_event *names;
size_t nevents;
struct event_table *next;
};
static struct event_table *cpuevents = NULL;
static void x86_cpuid(unsigned int *eax, unsigned int *ebx,
unsigned int *ecx, unsigned int *edx)
{
asm volatile("cpuid"
: "=a" (*eax),
"=b" (*ebx),
"=c" (*ecx),
"=d" (*edx)
: "0" (*eax), "2" (*ecx));
}
/* -------------------------------------------------------------------------- */
/*
* Intel Architectural Version 1.
*/
static struct name_to_event intel_arch1_names[] = {
/* Event Name - Event Select - UMask */
{ "unhalted-core-cycles", 0x3C, 0x00, true },
{ "instruction-retired", 0xC0, 0x00, true },
{ "unhalted-reference-cycles", 0x3C, 0x01, true },
{ "llc-reference", 0x2E, 0x4F, true },
{ "llc-misses", 0x2E, 0x41, true },
{ "branch-instruction-retired", 0xC4, 0x00, true },
{ "branch-misses-retired", 0xC5, 0x00, true },
};
static struct event_table intel_arch1 = {
.tablename = "Intel Architectural Version 1",
.names = intel_arch1_names,
.nevents = sizeof(intel_arch1_names) /
sizeof(struct name_to_event),
.next = NULL
};
static struct event_table *
init_intel_arch1(void)
{
unsigned int eax, ebx, ecx, edx;
struct event_table *table;
size_t i;
eax = 0x0A;
ebx = 0;
ecx = 0;
edx = 0;
x86_cpuid(&eax, &ebx, &ecx, &edx);
table = &intel_arch1;
for (i = 0; i < table->nevents; i++) {
/* Disable the unsupported events. */
if ((ebx & (i << 1)) != 0)
table->names[i].enabled = false;
}
return table;
}
/*
* Intel Skylake/Kabylake. TODO: there are many more events available.
*/
static struct name_to_event intel_skylake_kabylake_names[] = {
/* Event Name - Event Select - UMask */
{ "itlb-misses-causes-a-walk", 0x85, 0x01, true },
};
static struct event_table intel_skylake_kabylake = {
.tablename = "Intel Skylake/Kabylake",
.names = intel_skylake_kabylake_names,
.nevents = sizeof(intel_skylake_kabylake_names) /
sizeof(struct name_to_event),
.next = NULL
};
static struct event_table *
init_intel_skylake_kabylake(void)
{
return &intel_skylake_kabylake;
}
static struct event_table *
init_intel_generic(void)
{
unsigned int eax, ebx, ecx, edx;
struct event_table *table;
/*
* The kernel made sure the Architectural Version 1 PMCs were
* present.
*/
table = init_intel_arch1();
/*
* Now query the additional (non-architectural) events. They
* depend on the CPU model.
*/
eax = 0x01;
ebx = 0;
ecx = 0;
edx = 0;
x86_cpuid(&eax, &ebx, &ecx, &edx);
switch (CPUID_TO_MODEL(eax)) {
case 0x4E: /* Skylake */
case 0x5E: /* Skylake */
case 0x8E: /* Kabylake */
case 0x9E: /* Kabylake */
table->next = init_intel_skylake_kabylake();
break;
}
return table;
}
/* -------------------------------------------------------------------------- */
/*
* AMD Family 10h
*/
static struct name_to_event amd_f10h_names[] = {
{ "seg-load-all", F10H_SEGMENT_REG_LOADS, 0x7f, true },
{ "seg-load-es", F10H_SEGMENT_REG_LOADS, 0x01, true },
{ "seg-load-cs", F10H_SEGMENT_REG_LOADS, 0x02, true },
{ "seg-load-ss", F10H_SEGMENT_REG_LOADS, 0x04, true },
{ "seg-load-ds", F10H_SEGMENT_REG_LOADS, 0x08, true },
{ "seg-load-fs", F10H_SEGMENT_REG_LOADS, 0x10, true },
{ "seg-load-gs", F10H_SEGMENT_REG_LOADS, 0x20, true },
{ "seg-load-hs", F10H_SEGMENT_REG_LOADS, 0x40, true },
{ "l1cache-access", F10H_DATA_CACHE_ACCESS, 0, true },
{ "l1cache-miss", F10H_DATA_CACHE_MISS, 0, true },
{ "l1cache-refill", F10H_DATA_CACHE_REFILL_FROM_L2, 0x1f, true },
{ "l1cache-refill-invalid", F10H_DATA_CACHE_REFILL_FROM_L2, 0x01, true },
{ "l1cache-refill-shared", F10H_DATA_CACHE_REFILL_FROM_L2, 0x02, true },
{ "l1cache-refill-exclusive", F10H_DATA_CACHE_REFILL_FROM_L2, 0x04, true },
{ "l1cache-refill-owner", F10H_DATA_CACHE_REFILL_FROM_L2, 0x08, true },
{ "l1cache-refill-modified", F10H_DATA_CACHE_REFILL_FROM_L2, 0x10, true },
{ "l1cache-load", F10H_DATA_CACHE_REFILL_FROM_NORTHBRIDGE,0x1f, true },
{ "l1cache-load-invalid", F10H_DATA_CACHE_REFILL_FROM_NORTHBRIDGE,0x01, true },
{ "l1cache-load-shared", F10H_DATA_CACHE_REFILL_FROM_NORTHBRIDGE,0x02, true },
{ "l1cache-load-exclusive", F10H_DATA_CACHE_REFILL_FROM_NORTHBRIDGE,0x04, true },
{ "l1cache-load-owner", F10H_DATA_CACHE_REFILL_FROM_NORTHBRIDGE,0x08, true },
{ "l1cache-load-modified", F10H_DATA_CACHE_REFILL_FROM_NORTHBRIDGE,0x10, true },
{ "l1cache-writeback", F10H_CACHE_LINES_EVICTED, 0x1f, true },
{ "l1cache-writeback-invalid", F10H_CACHE_LINES_EVICTED, 0x01, true },
{ "l1cache-writeback-shared", F10H_CACHE_LINES_EVICTED, 0x02, true },
{ "l1cache-writeback-exclusive",F10H_CACHE_LINES_EVICTED, 0x04, true },
{ "l1cache-writeback-owner", F10H_CACHE_LINES_EVICTED, 0x08, true },
{ "l1cache-writeback-modified", F10H_CACHE_LINES_EVICTED, 0x10, true },
{ "l1DTLB-hit-all", F10H_L1_DTLB_HIT, 0x07, true },
{ "l1DTLB-hit-4Kpage", F10H_L1_DTLB_HIT, 0x01, true },
{ "l1DTLB-hit-2Mpage", F10H_L1_DTLB_HIT, 0x02, true },
{ "l1DTLB-hit-1Gpage", F10H_L1_DTLB_HIT, 0x04, true },
{ "l1DTLB-miss-all", F10H_L1_DTLB_MISS, 0x07, true },
{ "l1DTLB-miss-4Kpage", F10H_L1_DTLB_MISS, 0x01, true },
{ "l1DTLB-miss-2Mpage", F10H_L1_DTLB_MISS, 0x02, true },
{ "l1DTLB-miss-1Gpage", F10H_L1_DTLB_MISS, 0x04, true },
{ "l2DTLB-miss-all", F10H_L2_DTLB_MISS, 0x03, true },
{ "l2DTLB-miss-4Kpage", F10H_L2_DTLB_MISS, 0x01, true },
{ "l2DTLB-miss-2Mpage", F10H_L2_DTLB_MISS, 0x02, true },
/* l2DTLB-miss-1Gpage: reserved on some revisions, so disabled */
{ "l1ITLB-miss", F10H_L1_ITLB_MISS, 0, true },
{ "l2ITLB-miss-all", F10H_L2_ITLB_MISS, 0x03, true },
{ "l2ITLB-miss-4Kpage", F10H_L2_ITLB_MISS, 0x01, true },
{ "l2ITLB-miss-2Mpage", F10H_L2_ITLB_MISS, 0x02, true },
{ "mem-misalign-ref", F10H_MISALIGNED_ACCESS, 0, true },
{ "ins-fetch", F10H_INSTRUCTION_CACHE_FETCH, 0, true },
{ "ins-fetch-miss", F10H_INSTRUCTION_CACHE_MISS, 0, true },
{ "ins-refill-l2", F10H_INSTRUCTION_CACHE_REFILL_FROM_L2, 0, true },
{ "ins-refill-sys", F10H_INSTRUCTION_CACHE_REFILL_FROM_SYS, 0, true },
{ "ins-fetch-stall", F10H_INSTRUCTION_FETCH_STALL, 0, true },
{ "ins-retired", F10H_RETIRED_INSTRUCTIONS, 0, true },
{ "ins-empty", F10H_DECODER_EMPTY, 0, true },
{ "ops-retired", F10H_RETIRED_UOPS, 0, true },
{ "branch-retired", F10H_RETIRED_BRANCH, 0, true },
{ "branch-miss-retired", F10H_RETIRED_MISPREDICTED_BRANCH,0, true },
{ "branch-taken-retired", F10H_RETIRED_TAKEN_BRANCH, 0, true },
{ "branch-taken-miss-retired", F10H_RETIRED_TAKEN_BRANCH_MISPREDICTED, 0, true },
{ "branch-far-retired", F10H_RETIRED_FAR_CONTROL_TRANSFER, 0, true },
{ "branch-resync-retired", F10H_RETIRED_BRANCH_RESYNC, 0, true },
{ "branch-near-retired", F10H_RETIRED_NEAR_RETURNS, 0, true },
{ "branch-near-miss-retired", F10H_RETIRED_NEAR_RETURNS_MISPREDICTED, 0, true },
{ "branch-indirect-miss-retired", F10H_RETIRED_INDIRECT_BRANCH_MISPREDICTED, 0, true },
{ "int-hw", F10H_INTERRUPTS_TAKEN, 0, true },
{ "int-cycles-masked", F10H_INTERRUPTS_MASKED_CYCLES, 0, true },
{ "int-cycles-masked-pending",
F10H_INTERRUPTS_MASKED_CYCLES_INTERRUPT_PENDING, 0, true },
{ "fpu-exceptions", F10H_FPU_EXCEPTIONS, 0, true },
{ "break-match0", F10H_DR0_BREAKPOINT_MATCHES, 0, true },
{ "break-match1", F10H_DR1_BREAKPOINT_MATCHES, 0, true },
{ "break-match2", F10H_DR2_BREAKPOINT_MATCHES, 0, true },
{ "break-match3", F10H_DR3_BREAKPOINT_MATCHES, 0, true },
};
static struct event_table amd_f10h = {
.tablename = "AMD Family 10h",
.names = amd_f10h_names,
.nevents = sizeof(amd_f10h_names) /
sizeof(struct name_to_event),
.next = NULL
};
static struct event_table *
init_amd_f10h(void)
{
return &amd_f10h;
}
static struct event_table *
init_amd_generic(void)
{
unsigned int eax, ebx, ecx, edx;
eax = 0x01;
ebx = 0;
ecx = 0;
edx = 0;
x86_cpuid(&eax, &ebx, &ecx, &edx);
switch (CPUID_TO_FAMILY(eax)) {
case 0x10:
return init_amd_f10h();
}
return NULL;
}
/* -------------------------------------------------------------------------- */
int
tprof_event_init(uint32_t ident)
{
switch (ident) {
case TPROF_IDENT_NONE:
return -1;
case TPROF_IDENT_INTEL_GENERIC:
cpuevents = init_intel_generic();
break;
case TPROF_IDENT_AMD_GENERIC:
cpuevents = init_amd_generic();
break;
}
return (cpuevents == NULL) ? -1 : 0;
}
static void
recursive_event_list(struct event_table *table)
{
size_t i;
printf("%s:\n", table->tablename);
for (i = 0; i < table->nevents; i++) {
if (!table->names[i].enabled)
continue;
printf("\t%s\n", table->names[i].name);
}
printf("\n");
if (table->next != NULL) {
recursive_event_list(table->next);
}
}
void
tprof_event_list(void)
{
recursive_event_list(cpuevents);
}
static void
recursive_event_lookup(struct event_table *table, const char *name,
struct tprof_param *param)
{
size_t i;
for (i = 0; i < table->nevents; i++) {
if (!table->names[i].enabled)
continue;
if (!strcmp(table->names[i].name, name)) {
param->p_event = table->names[i].event;
param->p_unit = table->names[i].unit;
return;
}
}
if (table->next != NULL) {
recursive_event_lookup(table->next, name, param);
} else {
errx(EXIT_FAILURE, "event '%s' unknown", name);
}
}
void
tprof_event_lookup(const char *name, struct tprof_param *param)
{
recursive_event_lookup(cpuevents, name, param);
}

View File

@ -1,44 +0,0 @@
#! /bin/sh
# $NetBSD: tpann.sh,v 1.2 2008/01/14 12:49:54 yamt Exp $
# /*-
# * Copyright (c)2008 YAMAMOTO Takashi,
# * All rights reserved.
# *
# * Redistribution and use in source and binary forms, with or without
# * modification, are permitted provided that the following conditions
# * are met:
# * 1. Redistributions of source code must retain the above copyright
# * notice, this list of conditions and the following disclaimer.
# * 2. Redistributions in binary form must reproduce the above copyright
# * notice, this list of conditions and the following disclaimer in the
# * documentation and/or other materials provided with the distribution.
# *
# * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
# * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
# * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
# * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
# * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
# * SUCH DAMAGE.
# */
# usage: tprof -c sleep 1 | sh tpann.sh
OBJ=/netbsd
if [ ! "${SIZEOF_PTR}" ]; then
SIZEOF_PTR=4
fi
(hexdump -v -e "/${SIZEOF_PTR} \"%x\n\""|sort|uniq -c|sed -e 's/^/SAMPLE: /'
objdump -d --disassemble-zeroes ${OBJ}) | \
awk '
/^SAMPLE:/ { samples[$3 ":"] = $2; next }
/^[0-9a-f]*:/ { printf("%8d %s\n", samples[$1], $0); next }
// { print }
'

View File

@ -1,40 +0,0 @@
#! /bin/sh
# $NetBSD: tpfmt.sh,v 1.3 2009/10/13 00:49:38 yamt Exp $
# /*-
# * Copyright (c)2008 YAMAMOTO Takashi,
# * All rights reserved.
# *
# * Redistribution and use in source and binary forms, with or without
# * modification, are permitted provided that the following conditions
# * are met:
# * 1. Redistributions of source code must retain the above copyright
# * notice, this list of conditions and the following disclaimer.
# * 2. Redistributions in binary form must reproduce the above copyright
# * notice, this list of conditions and the following disclaimer in the
# * documentation and/or other materials provided with the distribution.
# *
# * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
# * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
# * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
# * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
# * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
# * SUCH DAMAGE.
# */
# usage: tprof -c sleep 1 | sh tpfmt.sh
if [ ! "${SIZEOF_PTR}" ]; then
SIZEOF_PTR=4
fi
hexdump -v -e "/${SIZEOF_PTR} \"%x\n\""|sort|uniq -c|sort -nr|
while read x y;do
printf "$x\t$y\t"
objdump -d --start-address=0x$y /netbsd|head -7|sed -ne '/.*<\(.*\)>:/{s//\1/;p;}'
done

View File

@ -1,4 +1,4 @@
.\" $NetBSD: tprof.8,v 1.3 2011/12/09 15:26:48 yamt Exp $
.\" $NetBSD: tprof.8,v 1.4 2018/07/13 07:56:29 maxv Exp $
.\"
.\" Copyright (c)2011 YAMAMOTO Takashi,
.\" All rights reserved.
@ -24,21 +24,19 @@
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
.\" ------------------------------------------------------------
.Dd November 26, 2011
.Dd July 13, 2018
.Dt TPROF 8
.Os
.\" ------------------------------------------------------------
.Sh NAME
.Nm tprof
.Nd record tprof profiling samples
.\" ------------------------------------------------------------
.Sh SYNOPSIS
.Nm
.Op Fl l
.Op Fl e Ar name:option
.Op Fl c
.Op Fl o Ar file
.Ar command ...
.\" ------------------------------------------------------------
.Sh DESCRIPTION
The
.Nm
@ -58,6 +56,18 @@ The
.Nm
utility accepts the following options.
.Bl -tag -width hogehoge
.It Fl l
Display a list of performance counter events available on the system.
.It Fl e Ar name:option
.Ar name
specifies the name of the event to count; it must be taken from the list of
available events.
.Ar option
specifies the source of the event; it must be a combination of
.Ar u
(userland) and
.Ar k
(kernel).
.It Fl o Ar file
Write the collected samples to the file named
.Ar file .
@ -67,14 +77,12 @@ The default is
Write the collected samples to the standard output.
Note that the output is a binary stream.
.El
.\" ------------------------------------------------------------
.Sh EXAMPLES
The following command profiles the system during 1 second and shows
the top-10 kernel functions which likely consumed CPU cycles.
the top-10 kernel functions which likely caused LLC misses.
.Bd -literal
tprof -c sleep 1 2>/dev/null | tpfmt -skCLP | head -10
tprof -e llc-misses:k -c sleep 1 2>/dev/null | tpfmt -skCLP | head -10
.Ed
.\" ------------------------------------------------------------
.Sh DIAGNOSTICS
The
.Nm
@ -97,23 +105,17 @@ exceeds the limit.
The number of samples dropped because the buffers containing the samples
were dropped.
.El
.\" ------------------------------------------------------------
.\".Sh HISTORY
.\"The
.\".Nm
.\"utility first appeared in
.\".Nx XXX .
.\" ------------------------------------------------------------
.Sh SEE ALSO
.Xr tpfmt 1 ,
.Xr tprof 4
.\" ------------------------------------------------------------
.Sh AUTHORS
The
.Nm
utility is written by
.An YAMAMOTO Takashi .
.\" ------------------------------------------------------------
It was revamped by
.An Maxime Villard
in 2018.
.Sh CAVEATS
The contents and representation of recorded samples are undocumented and
will likely be changed for future releases of

View File

@ -1,6 +1,35 @@
/* $NetBSD: tprof.c,v 1.5 2012/01/10 23:39:33 joerg Exp $ */
/* $NetBSD: tprof.c,v 1.6 2018/07/13 07:56:29 maxv Exp $ */
/*-
/*
* Copyright (c) 2018 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by Maxime Villard.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/*
* Copyright (c)2008 YAMAMOTO Takashi,
* All rights reserved.
*
@ -28,7 +57,7 @@
#include <sys/cdefs.h>
#ifndef lint
__RCSID("$NetBSD: tprof.c,v 1.5 2012/01/10 23:39:33 joerg Exp $");
__RCSID("$NetBSD: tprof.c,v 1.6 2018/07/13 07:56:29 maxv Exp $");
#endif /* not lint */
#include <sys/ioctl.h>
@ -47,6 +76,7 @@ __RCSID("$NetBSD: tprof.c,v 1.5 2012/01/10 23:39:33 joerg Exp $");
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include "tprof.h"
#define _PATH_TPROF "/dev/tprof"
@ -59,6 +89,10 @@ usage(void)
fprintf(stderr, "%s [options] command ...\n", getprogname());
fprintf(stderr, "\n");
fprintf(stderr, "-e name:{u}{k}\t"
"the event to count.\n");
fprintf(stderr, "-l\t\t"
"list the events.\n");
fprintf(stderr, "-o filename\t"
"output to the file. [default: -o tprof.out]\n");
fprintf(stderr, "-c\t\t"
@ -102,6 +136,7 @@ int
main(int argc, char *argv[])
{
struct tprof_param param;
struct tprof_info info;
struct tprof_stat ts;
const char *outfile = "tprof.out";
bool cflag = false;
@ -110,16 +145,49 @@ main(int argc, char *argv[])
int error;
int ret;
int ch;
int version;
char *tokens[2];
while ((ch = getopt(argc, argv, "co:")) != -1) {
memset(&param, 0, sizeof(param));
devfd = open(_PATH_TPROF, O_RDWR);
if (devfd == -1) {
err(EXIT_FAILURE, "%s", _PATH_TPROF);
}
ret = ioctl(devfd, TPROF_IOC_GETINFO, &info);
if (ret == -1) {
err(EXIT_FAILURE, "TPROF_IOC_GETINFO");
}
if (info.ti_version != TPROF_VERSION) {
errx(EXIT_FAILURE, "version mismatch: version=%d, expected=%d",
info.ti_version, TPROF_VERSION);
}
if (tprof_event_init(info.ti_ident) == -1) {
err(EXIT_FAILURE, "cpu not supported");
}
while ((ch = getopt(argc, argv, "clo:e:")) != -1) {
switch (ch) {
case 'c':
cflag = true;
break;
case 'l':
tprof_event_list();
return 0;
case 'o':
outfile = optarg;
break;
case 'e':
tokens[0] = strtok(optarg, ":");
tokens[1] = strtok(NULL, ":");
if (tokens[1] == NULL)
usage();
tprof_event_lookup(tokens[0], &param);
if (strchr(tokens[1], 'u'))
param.p_flags |= TPROF_PARAM_USER;
if (strchr(tokens[1], 'k'))
param.p_flags |= TPROF_PARAM_KERN;
break;
default:
usage();
}
@ -130,6 +198,10 @@ main(int argc, char *argv[])
usage();
}
if (param.p_flags == 0) {
usage();
}
if (cflag) {
outfd = STDOUT_FILENO;
} else {
@ -139,21 +211,6 @@ main(int argc, char *argv[])
}
}
devfd = open(_PATH_TPROF, O_RDWR);
if (devfd == -1) {
err(EXIT_FAILURE, "%s", _PATH_TPROF);
}
ret = ioctl(devfd, TPROF_IOC_GETVERSION, &version);
if (ret == -1) {
err(EXIT_FAILURE, "TPROF_IOC_GETVERSION");
}
if (version != TPROF_VERSION) {
errx(EXIT_FAILURE, "version mismatch: version=%d, expected=%d",
version, TPROF_VERSION);
}
memset(&param, 0, sizeof(param));
ret = ioctl(devfd, TPROF_IOC_START, &param);
if (ret == -1) {
err(EXIT_FAILURE, "TPROF_IOC_START");

35
usr.sbin/tprof/tprof.h Normal file
View File

@ -0,0 +1,35 @@
/* $NetBSD: tprof.h,v 1.1 2018/07/13 07:56:29 maxv Exp $ */
/*
* Copyright (c) 2018 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by Maxime Villard.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
int tprof_event_init(uint32_t);
void tprof_event_list(void);
void tprof_event_lookup(const char *, struct tprof_param *);