Improved the performance of kernel profiling on MULTIPROCESSOR, and possible to get profiling data for each CPU.

In the current implementation, locks are acquired at the entrance of the mcount
internal function, so the higher the number of cores, the more lock conflict
occurs, making profiling performance in a MULTIPROCESSOR environment unusable
and slow. Profiling buffers has been changed to be reserved for each CPU,
improving profiling performance in MP by several to several dozen times.

- Eliminated cpu_simple_lock in mcount internal function, using per-CPU buffers.
- Add ci_gmon member to struct cpu_info of each MP arch.
- Add kern.profiling.percpu node in sysctl tree.
- Add new -c <cpuid> option to kgmon(8) to specify the cpuid, like openbsd.
  For compatibility, if the -c option is not specified, the entire system can be
  operated as before, and the -p option will get the total profiling data for
  all CPUs.
This commit is contained in:
ryo 2021-08-14 17:51:18 +00:00
parent 1979ff4ae2
commit 567a3a02e7
18 changed files with 608 additions and 101 deletions

View File

@ -1,4 +1,4 @@
/* $NetBSD: mcount.c,v 1.15 2021/08/14 17:38:44 ryo Exp $ */
/* $NetBSD: mcount.c,v 1.16 2021/08/14 17:51:18 ryo Exp $ */
/*
* Copyright (c) 2003, 2004 Wasabi Systems, Inc.
@ -76,13 +76,14 @@
#if 0
static char sccsid[] = "@(#)mcount.c 8.1 (Berkeley) 6/4/93";
#else
__RCSID("$NetBSD: mcount.c,v 1.15 2021/08/14 17:38:44 ryo Exp $");
__RCSID("$NetBSD: mcount.c,v 1.16 2021/08/14 17:51:18 ryo Exp $");
#endif
#endif
#include <sys/param.h>
#include <sys/gmon.h>
#include <sys/lock.h>
#include <sys/proc.h>
#ifndef _KERNEL
#include "reentrant.h"
@ -94,10 +95,6 @@ extern struct gmonparam _gmondummy;
struct gmonparam *_m_gmon_alloc(void);
#endif
#if defined(_KERNEL) && !defined(_RUMPKERNEL) && defined(MULTIPROCESSOR)
__cpu_simple_lock_t __mcount_lock;
#endif
#ifndef __LINT__
_MCOUNT_DECL(u_long, u_long)
#ifdef _KERNEL
@ -168,8 +165,11 @@ _MCOUNT_DECL(u_long frompc, u_long selfpc)
#if defined(_KERNEL) && !defined(_RUMPKERNEL)
MCOUNT_ENTER;
#ifdef MULTIPROCESSOR
__cpu_simple_lock(&__mcount_lock);
__insn_barrier();
p = curcpu()->ci_gmon;
if (p == NULL || p->state != GMON_PROF_ON) {
MCOUNT_EXIT;
return;
}
#endif
#endif
p->state = GMON_PROF_BUSY;
@ -264,10 +264,6 @@ _MCOUNT_DECL(u_long frompc, u_long selfpc)
done:
p->state = GMON_PROF_ON;
#if defined(_KERNEL) && !defined(_RUMPKERNEL)
#ifdef MULTIPROCESSOR
__insn_barrier();
__cpu_simple_unlock(&__mcount_lock);
#endif
MCOUNT_EXIT;
#endif
return;
@ -275,10 +271,6 @@ done:
overflow:
p->state = GMON_PROF_ERROR;
#if defined(_KERNEL) && !defined(_RUMPKERNEL)
#ifdef MULTIPROCESSOR
__insn_barrier();
__cpu_simple_unlock(&__mcount_lock);
#endif
MCOUNT_EXIT;
#endif
return;
@ -293,4 +285,106 @@ overflow:
MCOUNT
#endif
#if defined(_KERNEL) && !defined(_RUMPKERNEL) && defined(MULTIPROCESSOR)
void _gmonparam_merge(struct gmonparam *, struct gmonparam *);
void
_gmonparam_merge(struct gmonparam *p, struct gmonparam *q)
{
u_long fromindex;
u_short *frompcindex, qtoindex, toindex;
u_long selfpc;
u_long endfrom;
long count;
struct tostruct *top;
int i;
count = q->kcountsize / sizeof(*q->kcount);
for (i = 0; i < count; i++)
p->kcount[i] += q->kcount[i];
endfrom = (q->fromssize / sizeof(*q->froms));
for (fromindex = 0; fromindex < endfrom; fromindex++) {
if (q->froms[fromindex] == 0)
continue;
for (qtoindex = q->froms[fromindex]; qtoindex != 0;
qtoindex = q->tos[qtoindex].link) {
selfpc = q->tos[qtoindex].selfpc;
count = q->tos[qtoindex].count;
/* cribbed from mcount */
frompcindex = &p->froms[fromindex];
toindex = *frompcindex;
if (toindex == 0) {
/*
* first time traversing this arc
*/
toindex = ++p->tos[0].link;
if (toindex >= p->tolimit)
/* halt further profiling */
goto overflow;
*frompcindex = (u_short)toindex;
top = &p->tos[(size_t)toindex];
top->selfpc = selfpc;
top->count = count;
top->link = 0;
goto done;
}
top = &p->tos[(size_t)toindex];
if (top->selfpc == selfpc) {
/*
* arc at front of chain; usual case.
*/
top->count+= count;
goto done;
}
/*
* have to go looking down chain for it.
* top points to what we are looking at,
* we know it is not at the head of the chain.
*/
for (; /* goto done */; ) {
if (top->link == 0) {
/*
* top is end of the chain and
* none of the chain had
* top->selfpc == selfpc. so
* we allocate a new tostruct
* and link it to the head of
* the chain.
*/
toindex = ++p->tos[0].link;
if (toindex >= p->tolimit)
goto overflow;
top = &p->tos[(size_t)toindex];
top->selfpc = selfpc;
top->count = count;
top->link = *frompcindex;
*frompcindex = (u_short)toindex;
goto done;
}
/*
* otherwise, check the next arc on the chain.
*/
top = &p->tos[top->link];
if (top->selfpc == selfpc) {
/*
* there it is.
* add to its count.
*/
top->count += count;
goto done;
}
}
done: ;
}
}
overflow: ;
}
#endif
#endif /* (!_KERNEL || GPROF) && !_STANDALONE */

View File

@ -1,4 +1,4 @@
/* $NetBSD: cpu.h,v 1.37 2021/08/08 19:28:08 skrll Exp $ */
/* $NetBSD: cpu.h,v 1.38 2021/08/14 17:51:18 ryo Exp $ */
/*-
* Copyright (c) 2014, 2020 The NetBSD Foundation, Inc.
@ -37,6 +37,7 @@
#ifdef __aarch64__
#ifdef _KERNEL_OPT
#include "opt_gprof.h"
#include "opt_multiprocessor.h"
#endif
@ -133,6 +134,9 @@ struct cpu_info {
struct aarch64_cache_info *ci_cacheinfo;
struct aarch64_cpufuncs ci_cpufuncs;
#if defined(GPROF) && defined(MULTIPROCESSOR)
struct gmonparam *ci_gmon; /* MI per-cpu GPROF */
#endif
} __aligned(COHERENCY_UNIT);
#ifdef _KERNEL

View File

@ -1,4 +1,4 @@
/* $NetBSD: cpu.h,v 1.103 2021/07/22 01:39:18 thorpej Exp $ */
/* $NetBSD: cpu.h,v 1.104 2021/08/14 17:51:18 ryo Exp $ */
/*-
* Copyright (c) 1998, 1999, 2000, 2001 The NetBSD Foundation, Inc.
@ -72,6 +72,7 @@
#define _ALPHA_CPU_H_
#if defined(_KERNEL_OPT)
#include "opt_gprof.h"
#include "opt_multiprocessor.h"
#include "opt_lockdebug.h"
#endif
@ -140,6 +141,9 @@ struct cpu_info {
uint64_t ci_pcc_freq; /* cpu cycles/second */
struct trapframe *ci_db_regs; /* registers for debuggers */
u_int ci_nintrhand; /* # of interrupt handlers */
#if defined(GPROF) && defined(MULTIPROCESSOR)
struct gmonparam *ci_gmon; /* [MI] per-cpu GPROF */
#endif
};
/* Ensure some cpu_info fields are within the signed 16-bit displacement. */

View File

@ -1,4 +1,4 @@
/* $NetBSD: cpu.h,v 1.118 2021/08/08 19:28:08 skrll Exp $ */
/* $NetBSD: cpu.h,v 1.119 2021/08/14 17:51:18 ryo Exp $ */
/*
* Copyright (c) 1994-1996 Mark Brinicombe.
@ -92,6 +92,7 @@ void cpu_proc_fork(struct proc *, struct proc *);
*/
#if !defined(_MODULE) && defined(_KERNEL_OPT)
#include "opt_gprof.h"
#include "opt_multiprocessor.h"
#include "opt_cpuoptions.h"
#include "opt_lockdebug.h"
@ -223,6 +224,10 @@ struct cpu_info {
struct arm_cache_info *
ci_cacheinfo;
#if defined(GPROF) && defined(MULTIPROCESSOR)
struct gmonparam *ci_gmon; /* MI per-cpu GPROF */
#endif
};
extern struct cpu_info cpu_info_store[];

View File

@ -1,4 +1,4 @@
/* $NetBSD: cpu.h,v 1.10 2020/04/16 09:28:52 skrll Exp $ */
/* $NetBSD: cpu.h,v 1.11 2021/08/14 17:51:19 ryo Exp $ */
/* $OpenBSD: cpu.h,v 1.55 2008/07/23 17:39:35 kettenis Exp $ */
@ -55,6 +55,7 @@
#ifdef _KERNEL_OPT
#include "opt_cputype.h"
#include "opt_gprof.h"
#include "opt_multiprocessor.h"
#endif
@ -300,7 +301,9 @@ struct cpu_info {
struct cpu_softc *ci_softc;
#endif
#if defined(GPROF) && defined(MULTIPROCESSOR)
struct gmonparam *ci_gmon; /* MI per-cpu GPROF */
#endif
#endif /* !_KMEMUSER */
} __aligned(64);

View File

@ -1,4 +1,4 @@
/* $NetBSD: cpu.h,v 1.132 2021/03/29 01:47:45 simonb Exp $ */
/* $NetBSD: cpu.h,v 1.133 2021/08/14 17:51:19 ryo Exp $ */
/*-
* Copyright (c) 1992, 1993
@ -49,6 +49,7 @@
#if defined(_KERNEL_OPT)
#include "opt_cputype.h"
#include "opt_gprof.h"
#include "opt_lockdebug.h"
#include "opt_multiprocessor.h"
#endif
@ -159,6 +160,9 @@ struct cpu_info {
kcpuset_t *ci_watchcpus;
kcpuset_t *ci_ddbcpus;
#endif
#if defined(GPROF) && defined(MULTIPROCESSOR)
struct gmonparam *ci_gmon; /* MI per-cpu GPROF */
#endif
};
#endif /* _KERNEL || _KMEMUSER */

View File

@ -1,4 +1,4 @@
/* $NetBSD: cpu.h,v 1.4 2019/12/01 15:34:45 ad Exp $ */
/* $NetBSD: cpu.h,v 1.5 2021/08/14 17:51:19 ryo Exp $ */
/*-
* Copyright (c) 2014 The NetBSD Foundation, Inc.
@ -64,6 +64,9 @@ struct cpu_info {
int ci_cpl;
u_int ci_softints;
volatile u_int ci_intr_depth;
#if defined(GPROF) && defined(MULTIPROCESSOR)
struct gmonparam *ci_gmon; /* MI per-cpu GPROF */
#endif
};
register struct lwp *or1k_curlwp __asm("r10");

View File

@ -1,4 +1,4 @@
/* $NetBSD: cpu.h,v 1.118 2021/03/07 14:42:53 rin Exp $ */
/* $NetBSD: cpu.h,v 1.119 2021/08/14 17:51:19 ryo Exp $ */
/*
* Copyright (C) 1999 Wolfgang Solfrank.
@ -45,6 +45,7 @@ struct cache_info {
#if defined(_KERNEL) || defined(_KMEMUSER)
#if defined(_KERNEL_OPT)
#include "opt_gprof.h"
#include "opt_modular.h"
#include "opt_multiprocessor.h"
#include "opt_ppcarch.h"
@ -159,6 +160,9 @@ struct cpu_info {
struct evcnt ci_ev_tlbmiss_soft; /* tlb miss (no trap) */
struct evcnt ci_ev_dtlbmiss_hard; /* data tlb miss (trap) */
struct evcnt ci_ev_itlbmiss_hard; /* instruction tlb miss (trap) */
#if defined(GPROF) && defined(MULTIPROCESSOR)
struct gmonparam *ci_gmon; /* MI per-cpu GPROF */
#endif
#endif /* _KERNEL */
};
#endif /* _KERNEL || _KMEMUSER */

View File

@ -1,4 +1,4 @@
/* $NetBSD: cpu.h,v 1.7 2019/12/01 15:34:45 ad Exp $ */
/* $NetBSD: cpu.h,v 1.8 2021/08/14 17:51:19 ryo Exp $ */
/*-
* Copyright (c) 2014 The NetBSD Foundation, Inc.
@ -78,6 +78,9 @@ struct cpu_info {
struct evcnt ci_ev_fpu_saves;
struct evcnt ci_ev_fpu_loads;
struct evcnt ci_ev_fpu_reenables;
#if defined(GPROF) && defined(MULTIPROCESSOR)
struct gmonparam *ci_gmon; /* MI per-cpu GPROF */
#endif
};
#endif /* _KERNEL || _KMEMUSER */

View File

@ -1,4 +1,4 @@
/* $NetBSD: cpu.h,v 1.109 2021/01/24 07:36:54 mrg Exp $ */
/* $NetBSD: cpu.h,v 1.110 2021/08/14 17:51:19 ryo Exp $ */
/*
* Copyright (c) 1992, 1993
@ -120,6 +120,7 @@ struct cacheinfo {
#if defined(_KERNEL) || defined(_KMEMUSER)
#if defined(_KERNEL_OPT)
#include "opt_gprof.h"
#include "opt_multiprocessor.h"
#include "opt_lockdebug.h"
#include "opt_sparc_arch.h"
@ -395,6 +396,10 @@ struct cpu_info {
struct evcnt ci_sintrcnt[16];
struct cpu_data ci_data; /* MI per-cpu data */
#if defined(GPROF) && defined(MULTIPROCESSOR)
struct gmonparam *ci_gmon; /* MI per-cpu GPROF */
#endif
};
#endif /* _KERNEL || _KMEMUSER */

View File

@ -1,4 +1,4 @@
/* $NetBSD: cpu.h,v 1.132 2021/04/05 22:36:27 nakayama Exp $ */
/* $NetBSD: cpu.h,v 1.133 2021/08/14 17:51:19 ryo Exp $ */
/*
* Copyright (c) 1992, 1993
@ -70,6 +70,7 @@ struct cacheinfo {
*/
#if defined(_KERNEL_OPT)
#include "opt_gprof.h"
#include "opt_multiprocessor.h"
#include "opt_lockdebug.h"
#endif
@ -220,6 +221,10 @@ struct cpu_info {
volatile void *ci_ddb_regs; /* DDB regs */
void (*ci_idlespin)(void);
#if defined(GPROF) && defined(MULTIPROCESSOR)
struct gmonparam *ci_gmon; /* MI per-cpu GPROF */
#endif
};
#endif /* _KERNEL || _KMEMUSER */

View File

@ -1,4 +1,4 @@
/* $NetBSD: cpu.h,v 1.104 2019/12/01 15:34:46 ad Exp $ */
/* $NetBSD: cpu.h,v 1.105 2021/08/14 17:51:19 ryo Exp $ */
/*
* Copyright (c) 1994 Ludd, University of Lule}, Sweden
@ -29,6 +29,7 @@
#define _VAX_CPU_H_
#if defined(_KERNEL_OPT)
#include "opt_gprof.h"
#include "opt_multiprocessor.h"
#include "opt_lockdebug.h"
#endif
@ -142,6 +143,9 @@ struct cpu_info {
SIMPLEQ_ENTRY(cpu_info) ci_next; /* next cpu_info */
#endif
uintptr_t ci_cas_addr; /* current address doing CAS in a RAS */
#if defined(GPROF) && defined(MULTIPROCESSOR)
struct gmonparam *ci_gmon; /* MI per-cpu GPROF */
#endif
};
#define CI_MASTERCPU 1 /* Set if master CPU */
#define CI_RUNNING 2 /* Set when a slave CPU is running */

View File

@ -1,4 +1,4 @@
/* $NetBSD: cpu.h,v 1.130 2021/02/19 02:15:24 christos Exp $ */
/* $NetBSD: cpu.h,v 1.131 2021/08/14 17:51:20 ryo Exp $ */
/*
* Copyright (c) 1990 The Regents of the University of California.
@ -321,6 +321,10 @@ struct cpu_info {
struct evcnt ci_xen_systime_backwards_hardclock_evcnt;
struct evcnt ci_xen_missed_hardclock_evcnt;
#endif /* XEN */
#if defined(GPROF) && defined(MULTIPROCESSOR)
struct gmonparam *ci_gmon; /* MI per-cpu GPROF */
#endif
};
#if defined(XEN) && !defined(XENPV)

View File

@ -1,4 +1,4 @@
/* $NetBSD: kern_clock.c,v 1.144 2021/01/16 02:20:00 riastradh Exp $ */
/* $NetBSD: kern_clock.c,v 1.145 2021/08/14 17:51:20 ryo Exp $ */
/*-
* Copyright (c) 2000, 2004, 2006, 2007, 2008 The NetBSD Foundation, Inc.
@ -69,11 +69,12 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: kern_clock.c,v 1.144 2021/01/16 02:20:00 riastradh Exp $");
__KERNEL_RCSID(0, "$NetBSD: kern_clock.c,v 1.145 2021/08/14 17:51:20 ryo Exp $");
#ifdef _KERNEL_OPT
#include "opt_dtrace.h"
#include "opt_gprof.h"
#include "opt_multiprocessor.h"
#endif
#include <sys/param.h>
@ -456,8 +457,14 @@ statclock(struct clockframe *frame)
/*
* Kernel statistics are just like addupc_intr, only easier.
*/
#ifdef MULTIPROCESSOR
g = curcpu()->ci_gmon;
if (g != NULL &&
profsrc == PROFSRC_CLOCK && g->state == GMON_PROF_ON) {
#else
g = &_gmonparam;
if (profsrc == PROFSRC_CLOCK && g->state == GMON_PROF_ON) {
#endif
i = CLKF_PC(frame) - g->lowpc;
if (i < g->textsize) {
i /= HISTFRACTION * sizeof(*g->kcount);

View File

@ -1,4 +1,4 @@
/* $NetBSD: subr_prof.c,v 1.49 2019/04/06 03:06:28 thorpej Exp $ */
/* $NetBSD: subr_prof.c,v 1.50 2021/08/14 17:51:20 ryo Exp $ */
/*-
* Copyright (c) 1982, 1986, 1993
@ -32,10 +32,11 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: subr_prof.c,v 1.49 2019/04/06 03:06:28 thorpej Exp $");
__KERNEL_RCSID(0, "$NetBSD: subr_prof.c,v 1.50 2021/08/14 17:51:20 ryo Exp $");
#ifdef _KERNEL_OPT
#include "opt_gprof.h"
#include "opt_multiprocessor.h"
#endif
#include <sys/param.h>
@ -51,9 +52,15 @@ __KERNEL_RCSID(0, "$NetBSD: subr_prof.c,v 1.49 2019/04/06 03:06:28 thorpej Exp $
#ifdef GPROF
#include <sys/malloc.h>
#include <sys/gmon.h>
#include <sys/xcall.h>
MALLOC_DEFINE(M_GPROF, "gprof", "kernel profiling buffer");
static int sysctl_kern_profiling(SYSCTLFN_ARGS);
#ifdef MULTIPROCESSOR
void _gmonparam_merge(struct gmonparam *, struct gmonparam *);
#endif
/*
* Froms is actually a bunch of unsigned shorts indexing tos
*/
@ -70,6 +77,7 @@ kmstartup(void)
{
char *cp;
struct gmonparam *p = &_gmonparam;
unsigned long size;
/*
* Round lowpc and highpc to multiples of the density we're using
* so the rest of the scaling (here and in gprof) stays in ints.
@ -90,8 +98,101 @@ kmstartup(void)
else if (p->tolimit > MAXARCS)
p->tolimit = MAXARCS;
p->tossize = p->tolimit * sizeof(struct tostruct);
cp = malloc(p->kcountsize + p->fromssize + p->tossize,
M_GPROF, M_NOWAIT | M_ZERO);
size = p->kcountsize + p->fromssize + p->tossize;
#ifdef MULTIPROCESSOR
CPU_INFO_ITERATOR cii;
struct cpu_info *ci;
for (CPU_INFO_FOREACH(cii, ci)) {
p = malloc(sizeof(struct gmonparam) + size, M_GPROF,
M_NOWAIT | M_ZERO);
if (p == NULL) {
printf("No memory for profiling on %s\n",
cpu_name(ci));
/* cannot profile on this cpu */
continue;
}
memcpy(p, &_gmonparam, sizeof(_gmonparam));
ci->ci_gmon = p;
/*
* To allow profiling to be controlled only by the global
* _gmonparam.state, set the default value for each CPU to
* GMON_PROF_ON. If _gmonparam.state is not ON, mcount will
* not be executed.
* This is For compatibility of the kgmon(8) kmem interface.
*/
p->state = GMON_PROF_ON;
cp = (char *)(p + 1);
p->tos = (struct tostruct *)cp;
p->kcount = (u_short *)(cp + p->tossize);
p->froms = (u_short *)(cp + p->tossize + p->kcountsize);
}
sysctl_createv(NULL, 0, NULL, NULL,
0, CTLTYPE_NODE, "percpu",
SYSCTL_DESCR("per cpu profiling information"),
NULL, 0, NULL, 0,
CTL_KERN, KERN_PROF, GPROF_PERCPU, CTL_EOL);
for (CPU_INFO_FOREACH(cii, ci)) {
if (ci->ci_gmon == NULL)
continue;
sysctl_createv(NULL, 0, NULL, NULL,
0, CTLTYPE_NODE, cpu_name(ci),
NULL,
NULL, 0, NULL, 0,
CTL_KERN, KERN_PROF, GPROF_PERCPU, cpu_index(ci), CTL_EOL);
sysctl_createv(NULL, 0, NULL, NULL,
CTLFLAG_READWRITE, CTLTYPE_INT, "state",
SYSCTL_DESCR("Profiling state"),
sysctl_kern_profiling, 0, (void *)ci, 0,
CTL_KERN, KERN_PROF, GPROF_PERCPU, cpu_index(ci),
GPROF_STATE, CTL_EOL);
sysctl_createv(NULL, 0, NULL, NULL,
CTLFLAG_READWRITE, CTLTYPE_STRUCT, "count",
SYSCTL_DESCR("Array of statistical program counters"),
sysctl_kern_profiling, 0, (void *)ci, 0,
CTL_KERN, KERN_PROF, GPROF_PERCPU, cpu_index(ci),
GPROF_COUNT, CTL_EOL);
sysctl_createv(NULL, 0, NULL, NULL,
CTLFLAG_READWRITE, CTLTYPE_STRUCT, "froms",
SYSCTL_DESCR("Array indexed by program counter of "
"call-from points"),
sysctl_kern_profiling, 0, (void *)ci, 0,
CTL_KERN, KERN_PROF, GPROF_PERCPU, cpu_index(ci),
GPROF_FROMS, CTL_EOL);
sysctl_createv(NULL, 0, NULL, NULL,
CTLFLAG_READWRITE, CTLTYPE_STRUCT, "tos",
SYSCTL_DESCR("Array of structures describing "
"destination of calls and their counts"),
sysctl_kern_profiling, 0, (void *)ci, 0,
CTL_KERN, KERN_PROF, GPROF_PERCPU, cpu_index(ci),
GPROF_TOS, CTL_EOL);
sysctl_createv(NULL, 0, NULL, NULL,
CTLFLAG_READWRITE, CTLTYPE_STRUCT, "gmonparam",
SYSCTL_DESCR("Structure giving the sizes of the above "
"arrays"),
sysctl_kern_profiling, 0, (void *)ci, 0,
CTL_KERN, KERN_PROF, GPROF_PERCPU, cpu_index(ci),
GPROF_GMONPARAM, CTL_EOL);
}
/*
* For minimal compatibility of the kgmon(8) kmem interface,
* the _gmonparam and cpu0:ci_gmon share buffers.
*/
p = curcpu()->ci_gmon;
if (p != NULL) {
_gmonparam.tos = p->tos;
_gmonparam.kcount = p->kcount;
_gmonparam.froms = p->froms;
}
#else /* MULTIPROCESSOR */
cp = malloc(size, M_GPROF, M_NOWAIT | M_ZERO);
if (cp == 0) {
printf("No memory for profiling.\n");
return;
@ -101,8 +202,21 @@ kmstartup(void)
p->kcount = (u_short *)cp;
cp += p->kcountsize;
p->froms = (u_short *)cp;
#endif /* MULTIPROCESSOR */
}
#ifdef MULTIPROCESSOR
static void
prof_set_state_xc(void *arg1, void *arg2 __unused)
{
int state = PTRTOUINT64(arg1);
struct gmonparam *gp = curcpu()->ci_gmon;
if (gp != NULL)
gp->state = state;
}
#endif /* MULTIPROCESSOR */
/*
* Return kernel profiling information.
*/
@ -113,15 +227,72 @@ kmstartup(void)
static int
sysctl_kern_profiling(SYSCTLFN_ARGS)
{
struct gmonparam *gp = &_gmonparam;
struct sysctlnode node = *rnode;
struct gmonparam *gp;
int error;
struct sysctlnode node;
#ifdef MULTIPROCESSOR
CPU_INFO_ITERATOR cii;
struct cpu_info *ci, *target_ci;
uint64_t where;
int state;
bool prof_on, do_merge;
node = *rnode;
target_ci = (struct cpu_info *)rnode->sysctl_data;
do_merge = (oldp != NULL) && (target_ci == NULL) &&
((node.sysctl_num == GPROF_COUNT) ||
(node.sysctl_num == GPROF_FROMS) ||
(node.sysctl_num == GPROF_TOS));
if (do_merge) {
/* kern.profiling.{count,froms,tos} */
unsigned long size;
char *cp;
/* allocate temporary gmonparam, and merge results of all CPU */
size = _gmonparam.kcountsize + _gmonparam.fromssize +
_gmonparam.tossize;
gp = malloc(sizeof(struct gmonparam) + size, M_GPROF,
M_NOWAIT | M_ZERO);
if (gp == NULL)
return ENOMEM;
memcpy(gp, &_gmonparam, sizeof(_gmonparam));
cp = (char *)(gp + 1);
gp->tos = (struct tostruct *)cp;
gp->kcount = (u_short *)(cp + gp->tossize);
gp->froms = (u_short *)(cp + gp->tossize + gp->kcountsize);
for (CPU_INFO_FOREACH(cii, ci)) {
if (ci->ci_gmon == NULL)
continue;
_gmonparam_merge(gp, ci->ci_gmon);
}
} else if (target_ci != NULL) {
/* kern.profiling.percpu.* */
gp = target_ci->ci_gmon;
} else {
/* kern.profiling.{state,gmonparam} */
gp = &_gmonparam;
}
#else /* MULTIPROCESSOR */
gp = &_gmonparam;
#endif
switch (node.sysctl_num) {
case GPROF_STATE:
#ifdef MULTIPROCESSOR
/*
* if _gmonparam.state is OFF, the state of each CPU is
* considered to be OFF, even if it is actually ON.
*/
if (_gmonparam.state == GMON_PROF_OFF ||
gp->state == GMON_PROF_OFF)
state = GMON_PROF_OFF;
else
state = GMON_PROF_ON;
node.sysctl_data = &state;
#else
node.sysctl_data = &gp->state;
#endif
break;
case GPROF_COUNT:
node.sysctl_data = gp->kcount;
@ -145,8 +316,97 @@ sysctl_kern_profiling(SYSCTLFN_ARGS)
error = sysctl_lookup(SYSCTLFN_CALL(&node));
if (error || newp == NULL)
return (error);
goto done;
#ifdef MULTIPROCESSOR
switch (node.sysctl_num) {
case GPROF_STATE:
if (target_ci != NULL) {
where = xc_unicast(0, prof_set_state_xc,
UINT64TOPTR(state), NULL, target_ci);
xc_wait(where);
/* if even one CPU being profiled, enable perfclock. */
prof_on = false;
for (CPU_INFO_FOREACH(cii, ci)) {
if (ci->ci_gmon == NULL)
continue;
if (ci->ci_gmon->state != GMON_PROF_OFF) {
prof_on = true;
break;
}
}
mutex_spin_enter(&proc0.p_stmutex);
if (prof_on)
startprofclock(&proc0);
else
stopprofclock(&proc0);
mutex_spin_exit(&proc0.p_stmutex);
if (prof_on) {
_gmonparam.state = GMON_PROF_ON;
} else {
_gmonparam.state = GMON_PROF_OFF;
/*
* when _gmonparam.state and all CPU gmon state
* are OFF, all CPU states should be ON so that
* the entire CPUs profiling can be controlled
* by _gmonparam.state only.
*/
for (CPU_INFO_FOREACH(cii, ci)) {
if (ci->ci_gmon == NULL)
continue;
ci->ci_gmon->state = GMON_PROF_ON;
}
}
} else {
_gmonparam.state = state;
where = xc_broadcast(0, prof_set_state_xc,
UINT64TOPTR(state), NULL);
xc_wait(where);
mutex_spin_enter(&proc0.p_stmutex);
if (state == GMON_PROF_OFF)
stopprofclock(&proc0);
else
startprofclock(&proc0);
mutex_spin_exit(&proc0.p_stmutex);
}
break;
case GPROF_COUNT:
/*
* if 'kern.profiling.{count,froms,tos}' is written, the same
* data will be written to 'kern.profiling.percpu.cpuN.xxx'
*/
if (target_ci == NULL) {
for (CPU_INFO_FOREACH(cii, ci)) {
if (ci->ci_gmon == NULL)
continue;
memmove(ci->ci_gmon->kcount, gp->kcount,
newlen);
}
}
break;
case GPROF_FROMS:
if (target_ci == NULL) {
for (CPU_INFO_FOREACH(cii, ci)) {
if (ci->ci_gmon == NULL)
continue;
memmove(ci->ci_gmon->froms, gp->froms, newlen);
}
}
break;
case GPROF_TOS:
if (target_ci == NULL) {
for (CPU_INFO_FOREACH(cii, ci)) {
if (ci->ci_gmon == NULL)
continue;
memmove(ci->ci_gmon->tos, gp->tos, newlen);
}
}
break;
}
#else
if (node.sysctl_num == GPROF_STATE) {
mutex_spin_enter(&proc0.p_stmutex);
if (gp->state == GMON_PROF_OFF)
@ -155,8 +415,14 @@ sysctl_kern_profiling(SYSCTLFN_ARGS)
startprofclock(&proc0);
mutex_spin_exit(&proc0.p_stmutex);
}
#endif
return (0);
done:
#ifdef MULTIPROCESSOR
if (do_merge)
free(gp, M_GPROF);
#endif
return error;
}
SYSCTL_SETUP(sysctl_kern_gprof_setup, "sysctl kern.profiling subtree setup")

View File

@ -1,4 +1,4 @@
/* $NetBSD: gmon.h,v 1.10 2012/03/09 15:38:03 christos Exp $ */
/* $NetBSD: gmon.h,v 1.11 2021/08/14 17:51:20 ryo Exp $ */
/*-
* Copyright (c) 1982, 1986, 1992, 1993
@ -150,4 +150,5 @@ extern struct gmonparam _gmonparam;
#define GPROF_FROMS 2 /* struct: from location hash bucket */
#define GPROF_TOS 3 /* struct: destination/count structure */
#define GPROF_GMONPARAM 4 /* struct: profiling parameters (see above) */
#define GPROF_PERCPU 5 /* per cpu node */
#endif /* !_SYS_GMON_H_ */

View File

@ -1,4 +1,4 @@
.\" $NetBSD: kgmon.8,v 1.19 2011/04/25 22:46:35 wiz Exp $
.\" $NetBSD: kgmon.8,v 1.20 2021/08/14 17:51:20 ryo Exp $
.\"
.\" Copyright (c) 1983, 1991, 1993
.\" The Regents of the University of California. All rights reserved.
@ -29,7 +29,7 @@
.\"
.\" from: @(#)kgmon.8 8.1 (Berkeley) 6/6/93
.\"
.Dd June 6, 1993
.Dd August 10, 2021
.Dt KGMON 8
.Os
.Sh NAME
@ -38,6 +38,7 @@
.Sh SYNOPSIS
.Nm
.Op Fl bdhpr
.Op Fl c Ar cpuid
.Op Fl M Ar core
.Op Fl N Ar system
.Sh DESCRIPTION
@ -55,6 +56,8 @@ flag is specified,
.Nm
extracts profile data from the operating system and produces a
.Pa gmon.out
or
.Pa gmon-<id>.out
file suitable for later analysis by
.Xr gprof 1 .
.Pp
@ -62,6 +65,18 @@ The options are as follows:
.Bl -tag -width Ds
.It Fl b
Resume the collection of profile data.
.It Fl c Ar cpuid
Operate on the CPU specified by
.Pa cpuid .
If this option is specified with
.Fl p ,
the output file name will be
.Pa gmon-<id>.out
instead of
.Pa gmon.out .
And if the cpuid argument is
.Dq all
, the operation is performed for each cpu.
.It Fl d
Enable debug output.
.It Fl h
@ -79,6 +94,8 @@ instead of the default
.It Fl p
Dump the contents of the profile buffers into a
.Pa gmon.out
or
.Pa gmon-<id>.out
file.
.It Fl r
Reset all the profile buffers.
@ -86,6 +103,8 @@ If the
.Fl p
flag is also specified, the
.Pa gmon.out
or
.Pa gmon-<id>.out
file is generated before the buffers are reset.
.El
.Pp

View File

@ -1,4 +1,4 @@
/* $NetBSD: kgmon.c,v 1.26 2019/07/11 03:49:52 msaitoh Exp $ */
/* $NetBSD: kgmon.c,v 1.27 2021/08/14 17:51:20 ryo Exp $ */
/*
* Copyright (c) 1983, 1992, 1993
@ -39,7 +39,7 @@ __COPYRIGHT("@(#) Copyright (c) 1983, 1992, 1993\
#if 0
static char sccsid[] = "from: @(#)kgmon.c 8.1 (Berkeley) 6/6/93";
#else
__RCSID("$NetBSD: kgmon.c,v 1.26 2019/07/11 03:49:52 msaitoh Exp $");
__RCSID("$NetBSD: kgmon.c,v 1.27 2021/08/14 17:51:20 ryo Exp $");
#endif
#endif /* not lint */
@ -72,28 +72,35 @@ struct kvmvars {
struct gmonparam gpm;
};
static int bflag, hflag, kflag, rflag, pflag;
static int ncpu_mib[2] = { CTL_HW, HW_NCPU };
static int bflag, cflag_all, hflag, kflag, rflag, pflag;
static int debug = 0;
static void setprof(struct kvmvars *kvp, int state);
static void dumpstate(struct kvmvars *kvp);
static void reset(struct kvmvars *kvp);
static void setprof(struct kvmvars *kvp, int state, int cpuid);
static void dumpstate(struct kvmvars *kvp, int cpuid);
static void reset(struct kvmvars *kvp, int cpuid);
static int openfiles(char *, char *, struct kvmvars *);
static int getprof(struct kvmvars *);
static int getprof(struct kvmvars *, int);
static void kern_readonly(int);
static int getprofhz(struct kvmvars *);
int
main(int argc, char **argv)
{
int ch, mode, disp, accessmode;
int ch, mode, disp, accessmode, ncpu, cpuid = -1;
struct kvmvars kvmvars;
size_t size;
char *sys, *kmemf;
char on_cpu[sizeof(" on cpuXXXXXXXXX")];
size = sizeof(ncpu);
if (sysctl(ncpu_mib, 2, &ncpu, &size, NULL, 0) < 0)
ncpu = 1;
setprogname(argv[0]);
(void)seteuid(getuid());
kmemf = NULL;
sys = NULL;
while ((ch = getopt(argc, argv, "M:N:bdhpr")) != -1) {
while ((ch = getopt(argc, argv, "M:N:bc:dhpr")) != -1) {
switch((char)ch) {
case 'M':
@ -109,6 +116,17 @@ main(int argc, char **argv)
bflag = 1;
break;
case 'c':
if (strcmp(optarg, "all") == 0) {
cflag_all = 1;
cpuid = 0;
} else {
cpuid = strtol(optarg, NULL, 10);
if (cpuid >= ncpu)
errx(1, "illegal CPU id %s", optarg);
}
break;
case 'h':
hflag = 1;
break;
@ -127,7 +145,7 @@ main(int argc, char **argv)
default:
(void)fprintf(stderr,
"usage: %s [-bdhrp] [-M core] [-N system]\n",
"usage: %s [-bdhrp] [-c cpuid] [-M core] [-N system]\n",
getprogname());
exit(1);
}
@ -146,21 +164,31 @@ main(int argc, char **argv)
}
#endif
accessmode = openfiles(sys, kmemf, &kvmvars);
mode = getprof(&kvmvars);
if (hflag)
disp = GMON_PROF_OFF;
else if (bflag)
disp = GMON_PROF_ON;
else
disp = mode;
if (pflag)
dumpstate(&kvmvars);
if (rflag)
reset(&kvmvars);
if (accessmode == O_RDWR)
setprof(&kvmvars, disp);
(void)fprintf(stdout, "%s: kernel profiling is %s.\n",
getprogname(), disp == GMON_PROF_OFF ? "off" : "running");
do {
if (cpuid == -1)
on_cpu[0] = '\0';
else
snprintf(on_cpu, sizeof(on_cpu), " on cpu%d", cpuid);
mode = getprof(&kvmvars, cpuid);
if (hflag)
disp = GMON_PROF_OFF;
else if (bflag)
disp = GMON_PROF_ON;
else
disp = mode;
if (pflag)
dumpstate(&kvmvars, cpuid);
if (rflag)
reset(&kvmvars, cpuid);
if (accessmode == O_RDWR)
setprof(&kvmvars, disp, cpuid);
(void)fprintf(stdout, "%s: kernel profiling is %s%s.\n",
getprogname(), disp == GMON_PROF_OFF ? "off" : "running",
on_cpu);
} while (cflag_all && ++cpuid < ncpu);
return (0);
}
@ -233,9 +261,9 @@ kern_readonly(int mode)
* Get the state of kernel profiling.
*/
static int
getprof(struct kvmvars *kvp)
getprof(struct kvmvars *kvp, int cpuid)
{
int mib[3];
int mib[5], miblen, mibparam;
size_t size;
if (kflag) {
@ -244,9 +272,18 @@ getprof(struct kvmvars *kvp)
} else {
mib[0] = CTL_KERN;
mib[1] = KERN_PROF;
mib[2] = GPROF_GMONPARAM;
if (cpuid < 0) {
mibparam = 2;
miblen = 3;
} else {
mib[2] = GPROF_PERCPU;
mib[3] = cpuid;
mibparam = 4;
miblen = 5;
}
mib[mibparam] = GPROF_GMONPARAM;
size = sizeof kvp->gpm;
if (sysctl(mib, 3, &kvp->gpm, &size, NULL, 0) < 0)
if (sysctl(mib, miblen, &kvp->gpm, &size, NULL, 0) < 0)
size = 0;
}
if (size != sizeof kvp->gpm)
@ -259,23 +296,32 @@ getprof(struct kvmvars *kvp)
* Enable or disable kernel profiling according to the state variable.
*/
static void
setprof(struct kvmvars *kvp, int state)
setprof(struct kvmvars *kvp, int state, int cpuid)
{
struct gmonparam *p = (struct gmonparam *)nl[N_GMONPARAM].n_value;
int mib[3], oldstate;
int mib[5], miblen, mibparam, oldstate;
size_t sz;
sz = sizeof(state);
if (!kflag) {
mib[0] = CTL_KERN;
mib[1] = KERN_PROF;
mib[2] = GPROF_STATE;
if (sysctl(mib, 3, &oldstate, &sz, NULL, 0) < 0)
if (cpuid < 0) {
mibparam = 2;
miblen = 3;
} else {
mib[2] = GPROF_PERCPU;
mib[3] = cpuid;
mibparam = 4;
miblen = 5;
}
mib[mibparam] = GPROF_STATE;
if (sysctl(mib, miblen, &oldstate, &sz, NULL, 0) < 0)
goto bad;
if (oldstate == state)
return;
(void)seteuid(0);
if (sysctl(mib, 3, NULL, NULL, &state, sz) >= 0) {
if (sysctl(mib, miblen, NULL, NULL, &state, sz) >= 0) {
(void)seteuid(getuid());
return;
}
@ -292,23 +338,41 @@ bad:
* Build the gmon.out file.
*/
static void
dumpstate(struct kvmvars *kvp)
dumpstate(struct kvmvars *kvp, int cpuid)
{
FILE *fp;
struct rawarc rawarc;
struct tostruct *tos;
u_long frompc;
u_short *froms, *tickbuf;
int mib[3];
int mib[5], miblen, mibparam;
size_t i;
struct gmonhdr h;
int fromindex, endfrom, toindex;
size_t kcountsize;
char gmon_out[sizeof("gmon-XXXXXXXXXXX.out")];
setprof(kvp, GMON_PROF_OFF);
fp = fopen("gmon.out", "w");
mib[0] = CTL_KERN;
mib[1] = KERN_PROF;
if (cpuid < 0) {
mibparam = 2;
miblen = 3;
} else {
mib[2] = GPROF_PERCPU;
mib[3] = cpuid;
mibparam = 4;
miblen = 5;
}
setprof(kvp, GMON_PROF_OFF, cpuid);
if (cpuid < 0)
strlcpy(gmon_out, "gmon.out", sizeof(gmon_out));
else
snprintf(gmon_out, sizeof(gmon_out), "gmon-%d.out", cpuid);
fp = fopen(gmon_out, "w");
if (fp == NULL) {
warn("cannot open `gmon.out'");
warn("cannot open `%s'", gmon_out);
return;
}
@ -329,8 +393,6 @@ dumpstate(struct kvmvars *kvp)
/*
* Write out the tick buffer.
*/
mib[0] = CTL_KERN;
mib[1] = KERN_PROF;
if ((tickbuf = malloc(kcountsize)) == NULL)
err(EXIT_FAILURE, "Cannot allocate %zu kcount space",
kcountsize);
@ -338,9 +400,9 @@ dumpstate(struct kvmvars *kvp)
i = kvm_read(kvp->kd, (u_long)kvp->gpm.kcount, tickbuf,
kcountsize);
} else {
mib[2] = GPROF_COUNT;
mib[mibparam] = GPROF_COUNT;
i = kcountsize;
if (sysctl(mib, 3, tickbuf, &i, NULL, 0) < 0)
if (sysctl(mib, miblen, tickbuf, &i, NULL, 0) < 0)
i = 0;
}
if (i != kcountsize)
@ -361,9 +423,9 @@ dumpstate(struct kvmvars *kvp)
i = kvm_read(kvp->kd, (u_long)kvp->gpm.froms, froms,
(size_t)kvp->gpm.fromssize);
} else {
mib[2] = GPROF_FROMS;
mib[mibparam] = GPROF_FROMS;
i = kvp->gpm.fromssize;
if (sysctl(mib, 3, froms, &i, NULL, 0) < 0)
if (sysctl(mib, miblen, froms, &i, NULL, 0) < 0)
i = 0;
}
if (i != kvp->gpm.fromssize)
@ -377,9 +439,9 @@ dumpstate(struct kvmvars *kvp)
i = kvm_read(kvp->kd, (u_long)kvp->gpm.tos, (void *)tos,
(size_t)kvp->gpm.tossize);
} else {
mib[2] = GPROF_TOS;
mib[mibparam] = GPROF_TOS;
i = kvp->gpm.tossize;
if (sysctl(mib, 3, tos, &i, NULL, 0) < 0)
if (sysctl(mib, miblen, tos, &i, NULL, 0) < 0)
i = 0;
}
if (i != kvp->gpm.tossize)
@ -445,13 +507,25 @@ getprofhz(struct kvmvars *kvp)
* Reset the kernel profiling date structures.
*/
static void
reset(struct kvmvars *kvp)
reset(struct kvmvars *kvp, int cpuid)
{
char *zbuf;
size_t biggest;
int mib[3];
int mib[5], miblen, mibparam;
setprof(kvp, GMON_PROF_OFF);
mib[0] = CTL_KERN;
mib[1] = KERN_PROF;
if (cpuid < 0) {
mibparam = 2;
miblen = 3;
} else {
mib[2] = GPROF_PERCPU;
mib[3] = cpuid;
mibparam = 4;
miblen = 5;
}
setprof(kvp, GMON_PROF_OFF, cpuid);
biggest = (size_t)kvp->gpm.kcountsize;
if ((size_t)kvp->gpm.fromssize > biggest)
@ -477,16 +551,14 @@ reset(struct kvmvars *kvp)
return;
}
(void)seteuid(0);
mib[0] = CTL_KERN;
mib[1] = KERN_PROF;
mib[2] = GPROF_COUNT;
if (sysctl(mib, 3, NULL, NULL, zbuf, (size_t)kvp->gpm.kcountsize) < 0)
mib[mibparam] = GPROF_COUNT;
if (sysctl(mib, miblen, NULL, NULL, zbuf, (size_t)kvp->gpm.kcountsize) < 0)
err(EXIT_FAILURE, "tickbuf zero");
mib[2] = GPROF_FROMS;
if (sysctl(mib, 3, NULL, NULL, zbuf, (size_t)kvp->gpm.fromssize) < 0)
mib[mibparam] = GPROF_FROMS;
if (sysctl(mib, miblen, NULL, NULL, zbuf, (size_t)kvp->gpm.fromssize) < 0)
err(EXIT_FAILURE, "froms zero");
mib[2] = GPROF_TOS;
if (sysctl(mib, 3, NULL, NULL, zbuf, (size_t)kvp->gpm.tossize) < 0)
mib[mibparam] = GPROF_TOS;
if (sysctl(mib, miblen, NULL, NULL, zbuf, (size_t)kvp->gpm.tossize) < 0)
err(EXIT_FAILURE, "tos zero");
(void)seteuid(getuid());
free(zbuf);