cb32a134a5
in PR kern/52639, as well as some general cleaning-up... (As proposed on tech-kern@ with additional changes and enhancements.) Details of changes: * All history arguments are now stored as uintmax_t values[1], both in the kernel and in the structures used for exporting the history data to userland via sysctl(9). This avoids problems on some architectures where passing a 64-bit (or larger) value to printf(3) can cause it to process the value as multiple arguments. (This can be particularly problematic when printf()'s format string is not a literal, since in that case the compiler cannot know how large each argument should be.) * Update the data structures used for exporting kernel history data to include a version number as well as the length of history arguments. * All [2] existing users of kernhist(9) have had their format strings updated. Each format specifier now includes an explicit length modifier 'j' to refer to numeric values of the size of uintmax_t. * All [2] existing users of kernhist(9) have had their format strings updated to replace uses of "%p" with "%#jx", and the pointer arguments are now cast to (uintptr_t) before being subsequently cast to (uintmax_t). This is needed to avoid compiler warnings about casting "pointer to integer of a different size." * All [2] existing users of kernhist(9) have had instances of "%s" or "%c" format strings replaced with numeric formats; several instances of mis-match between format string and argument list have been fixed. * vmstat(1) has been modified to handle the new size of arguments in the history data as exported by sysctl(9). * vmstat(1) now provides a warning message if the history requested with the -u option does not exist (previously, this condition was silently ignored, with only a single blank line being printed). * vmstat(1) now checks the version and argument length included in the data exported via sysctl(9) and exits if they do not match the values with which vmstat was built. * The kernhist(9) man-page has been updated to note the additional requirements imposed on the format strings, along with several other minor changes and enhancements. [1] It would have been possible to use an explicit length (for example, uint64_t) for the history arguments. But that would require another "rototill" of all the users in the future when we add support for an architecture that supports a larger size. Also, the printf(3) format specifiers for explicitly-sized values, such as "%"PRIu64, are much more verbose (and less aesthetically appealing, IMHO) than simply using "%ju". [2] I've tried very hard to find "all [the] existing users of kernhist(9)" but it is possible that I've missed some of them. I would be glad to update any stragglers that anyone identifies.
312 lines
9.2 KiB
C
312 lines
9.2 KiB
C
/* $NetBSD: kern_xxx.c,v 1.74 2017/10/28 00:37:11 pgoyette Exp $ */
|
|
|
|
/*
|
|
* Copyright (c) 1982, 1986, 1989, 1993
|
|
* The Regents of the University of California. All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
* 3. Neither the name of the University nor the names of its contributors
|
|
* may be used to endorse or promote products derived from this software
|
|
* without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
* SUCH DAMAGE.
|
|
*
|
|
* @(#)kern_xxx.c 8.3 (Berkeley) 2/14/95
|
|
*/
|
|
|
|
#include <sys/cdefs.h>
|
|
__KERNEL_RCSID(0, "$NetBSD: kern_xxx.c,v 1.74 2017/10/28 00:37:11 pgoyette Exp $");
|
|
|
|
#ifdef _KERNEL_OPT
|
|
#include "opt_syscall_debug.h"
|
|
#include "opt_kernhist.h"
|
|
#endif
|
|
|
|
#include <sys/param.h>
|
|
#include <sys/systm.h>
|
|
#include <sys/kernel.h>
|
|
#include <sys/proc.h>
|
|
#include <sys/reboot.h>
|
|
#include <sys/syscall.h>
|
|
#include <sys/sysctl.h>
|
|
#include <sys/mount.h>
|
|
#include <sys/syscall.h>
|
|
#include <sys/syscallargs.h>
|
|
#include <sys/kauth.h>
|
|
#include <sys/kernhist.h>
|
|
|
|
/* ARGSUSED */
|
|
int
|
|
sys_reboot(struct lwp *l, const struct sys_reboot_args *uap, register_t *retval)
|
|
{
|
|
/* {
|
|
syscallarg(int) opt;
|
|
syscallarg(char *) bootstr;
|
|
} */
|
|
int error;
|
|
char *bootstr, bs[128];
|
|
|
|
if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_REBOOT,
|
|
0, NULL, NULL, NULL)) != 0)
|
|
return (error);
|
|
|
|
/*
|
|
* Only use the boot string if RB_STRING is set.
|
|
*/
|
|
if ((SCARG(uap, opt) & RB_STRING) &&
|
|
(error = copyinstr(SCARG(uap, bootstr), bs, sizeof(bs), 0)) == 0)
|
|
bootstr = bs;
|
|
else
|
|
bootstr = NULL;
|
|
/*
|
|
* Not all ports use the bootstr currently.
|
|
*/
|
|
KERNEL_LOCK(1, NULL);
|
|
cpu_reboot(SCARG(uap, opt), bootstr);
|
|
KERNEL_UNLOCK_ONE(NULL);
|
|
return (0);
|
|
}
|
|
|
|
/*
|
|
* Pull in the indirect syscall functions here.
|
|
* They are only actually used if the ports syscall entry code
|
|
* doesn't special-case SYS_SYSCALL and SYS___SYSCALL
|
|
*
|
|
* In some cases the generated code for the two functions is identical,
|
|
* but there isn't a MI way of determining that - so we don't try.
|
|
*/
|
|
|
|
#define SYS_SYSCALL sys_syscall
|
|
#include "sys_syscall.c"
|
|
#undef SYS_SYSCALL
|
|
|
|
#define SYS_SYSCALL sys___syscall
|
|
#include "sys_syscall.c"
|
|
#undef SYS_SYSCALL
|
|
|
|
#ifdef SYSCALL_DEBUG
|
|
#define SCDEBUG_CALLS 0x0001 /* show calls */
|
|
#define SCDEBUG_RETURNS 0x0002 /* show returns */
|
|
#define SCDEBUG_ALL 0x0004 /* even syscalls that are not implemented */
|
|
#define SCDEBUG_SHOWARGS 0x0008 /* show arguments to calls */
|
|
#define SCDEBUG_KERNHIST 0x0010 /* use kernhist instead of printf */
|
|
|
|
#ifndef SCDEBUG_DEFAULT
|
|
#define SCDEBUG_DEFAULT (SCDEBUG_CALLS|SCDEBUG_RETURNS|SCDEBUG_SHOWARGS)
|
|
#endif
|
|
|
|
int scdebug = SCDEBUG_DEFAULT;
|
|
|
|
#ifdef KERNHIST
|
|
KERNHIST_DEFINE(scdebughist);
|
|
#define SCDEBUG_KERNHIST_FUNC(a) KERNHIST_FUNC(a)
|
|
#define SCDEBUG_KERNHIST_CALLED(a) KERNHIST_CALLED(a)
|
|
#define SCDEBUG_KERNHIST_LOG(a,b,c,d,e,f) KERNHIST_LOG(a,b,c,d,e,f)
|
|
#else
|
|
#define SCDEBUG_KERNHIST_FUNC(a) /* nothing */
|
|
#define SCDEBUG_KERNHIST_CALLED(a) /* nothing */
|
|
#define SCDEBUG_KERNHIST_LOG(a,b,c,d,e,f) /* nothing */
|
|
/* The non-kernhist support version can elide all this code easily. */
|
|
#undef SCDEBUG_KERNHIST
|
|
#define SCDEBUG_KERNHIST 0
|
|
#endif
|
|
|
|
#ifdef __HAVE_MINIMAL_EMUL
|
|
#define CODE_NOT_OK(code, em) ((int)(code) < 0)
|
|
#else
|
|
#define CODE_NOT_OK(code, em) (((int)(code) < 0) || \
|
|
((int)(code) >= (em)->e_nsysent))
|
|
#endif
|
|
|
|
void
|
|
scdebug_call(register_t code, const register_t args[])
|
|
{
|
|
SCDEBUG_KERNHIST_FUNC("scdebug_call");
|
|
struct lwp *l = curlwp;
|
|
struct proc *p = l->l_proc;
|
|
const struct sysent *sy;
|
|
const struct emul *em;
|
|
int i;
|
|
|
|
if ((scdebug & SCDEBUG_CALLS) == 0)
|
|
return;
|
|
|
|
if (scdebug & SCDEBUG_KERNHIST)
|
|
SCDEBUG_KERNHIST_CALLED(scdebughist);
|
|
|
|
em = p->p_emul;
|
|
sy = &em->e_sysent[code];
|
|
|
|
if ((scdebug & SCDEBUG_ALL) == 0 &&
|
|
(CODE_NOT_OK(code, em) || sy->sy_call == sys_nosys)) {
|
|
if (scdebug & SCDEBUG_KERNHIST)
|
|
SCDEBUG_KERNHIST_LOG(scdebughist, "", 0, 0, 0, 0);
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* The kernhist version of scdebug needs to restrict the usage
|
|
* compared to the normal version. histories must avoid these
|
|
* sorts of usage:
|
|
*
|
|
* - the format string *must* be literal, as it is used
|
|
* at display time in the kernel or userland
|
|
* - strings in the format will cause vmstat -u to crash
|
|
* so avoid using %s formats
|
|
*
|
|
* to avoid these, we have a fairly long block to print args
|
|
* as the format needs to change for each, and we can't just
|
|
* call printf() on each argument until we're done.
|
|
*/
|
|
if (scdebug & SCDEBUG_KERNHIST) {
|
|
if (CODE_NOT_OK(code, em)) {
|
|
SCDEBUG_KERNHIST_LOG(scdebughist,
|
|
"pid %jd:%jd: OUT OF RANGE (%jd)",
|
|
p->p_pid, l->l_lid, code, 0);
|
|
} else {
|
|
SCDEBUG_KERNHIST_LOG(scdebughist,
|
|
"pid %jd:%jd: num %jd call %#jx",
|
|
p->p_pid, l->l_lid, code, (uintptr_t)sy->sy_call);
|
|
if ((scdebug & SCDEBUG_SHOWARGS) == 0)
|
|
return;
|
|
|
|
if (sy->sy_narg > 7) {
|
|
SCDEBUG_KERNHIST_LOG(scdebughist,
|
|
"args[4-7]: (%jx, %jx, %jx, %jx, ...)",
|
|
(long)args[4], (long)args[5],
|
|
(long)args[6], (long)args[7]);
|
|
} else if (sy->sy_narg > 6) {
|
|
SCDEBUG_KERNHIST_LOG(scdebughist,
|
|
"args[4-6]: (%jx, %jx, %jx)",
|
|
(long)args[4], (long)args[5],
|
|
(long)args[6], 0);
|
|
} else if (sy->sy_narg > 5) {
|
|
SCDEBUG_KERNHIST_LOG(scdebughist,
|
|
"args[4-5]: (%jx, %jx)",
|
|
(long)args[4], (long)args[5], 0, 0);
|
|
} else if (sy->sy_narg == 5) {
|
|
SCDEBUG_KERNHIST_LOG(scdebughist,
|
|
"args[4]: (%jx)",
|
|
(long)args[4], 0, 0, 0);
|
|
}
|
|
|
|
if (sy->sy_narg > 3) {
|
|
SCDEBUG_KERNHIST_LOG(scdebughist,
|
|
"args[0-3]: (%jx, %jx, %jx, %jx, ...)",
|
|
(long)args[0], (long)args[1],
|
|
(long)args[2], (long)args[3]);
|
|
} else if (sy->sy_narg > 2) {
|
|
SCDEBUG_KERNHIST_LOG(scdebughist,
|
|
"args[0-2]: (%jx, %jx, %jx)",
|
|
(long)args[0], (long)args[1],
|
|
(long)args[2], 0);
|
|
} else if (sy->sy_narg > 1) {
|
|
SCDEBUG_KERNHIST_LOG(scdebughist,
|
|
"args[0-1]: (%jx, %jx)",
|
|
(long)args[0], (long)args[1], 0, 0);
|
|
} else if (sy->sy_narg == 1) {
|
|
SCDEBUG_KERNHIST_LOG(scdebughist,
|
|
"args[0]: (%jx)",
|
|
(long)args[0], 0, 0, 0);
|
|
}
|
|
}
|
|
return;
|
|
}
|
|
|
|
printf("proc %d (%s): %s num ", p->p_pid, p->p_comm, em->e_name);
|
|
if (CODE_NOT_OK(code, em))
|
|
printf("OUT OF RANGE (%ld)", (long)code);
|
|
else {
|
|
printf("%ld call: %s", (long)code, em->e_syscallnames[code]);
|
|
if (scdebug & SCDEBUG_SHOWARGS) {
|
|
printf("(");
|
|
for (i = 0; i < sy->sy_argsize/sizeof(register_t); i++)
|
|
printf("%s0x%lx", i == 0 ? "" : ", ",
|
|
(long)args[i]);
|
|
printf(")");
|
|
}
|
|
}
|
|
printf("\n");
|
|
}
|
|
|
|
void
|
|
scdebug_ret(register_t code, int error, const register_t retval[])
|
|
{
|
|
SCDEBUG_KERNHIST_FUNC("scdebug_ret");
|
|
struct lwp *l = curlwp;
|
|
struct proc *p = l->l_proc;
|
|
const struct sysent *sy;
|
|
const struct emul *em;
|
|
|
|
if ((scdebug & SCDEBUG_RETURNS) == 0)
|
|
return;
|
|
|
|
if (scdebug & SCDEBUG_KERNHIST)
|
|
SCDEBUG_KERNHIST_CALLED(scdebughist);
|
|
|
|
em = p->p_emul;
|
|
sy = &em->e_sysent[code];
|
|
if ((scdebug & SCDEBUG_ALL) == 0 &&
|
|
(CODE_NOT_OK(code, em) || sy->sy_call == sys_nosys)) {
|
|
if (scdebug & SCDEBUG_KERNHIST)
|
|
SCDEBUG_KERNHIST_LOG(scdebughist, "", 0, 0, 0, 0);
|
|
return;
|
|
}
|
|
|
|
if (scdebug & SCDEBUG_KERNHIST) {
|
|
if (CODE_NOT_OK(code, em)) {
|
|
SCDEBUG_KERNHIST_LOG(scdebughist,
|
|
"pid %jd:%jd: OUT OF RANGE (%jd)",
|
|
p->p_pid, l->l_lid, code, 0);
|
|
} else {
|
|
SCDEBUG_KERNHIST_LOG(scdebughist,
|
|
"pid %jd:%jd: num %jd",
|
|
p->p_pid, l->l_lid, code, 0);
|
|
SCDEBUG_KERNHIST_LOG(scdebughist,
|
|
"ret: err = %jd, rv = 0x%jx,0x%jx",
|
|
error, (long)retval[0], (long)retval[1], 0);
|
|
}
|
|
return;
|
|
}
|
|
|
|
printf("proc %d (%s): %s num ", p->p_pid, p->p_comm, em->e_name);
|
|
if (CODE_NOT_OK(code, em))
|
|
printf("OUT OF RANGE (%ld)", (long)code);
|
|
else
|
|
printf("%ld ret %s: err = %d, rv = 0x%lx,0x%lx", (long)code,
|
|
em->e_syscallnames[code], error,
|
|
(long)retval[0], (long)retval[1]);
|
|
printf("\n");
|
|
}
|
|
#endif /* SYSCALL_DEBUG */
|
|
|
|
#ifndef SCDEBUG_KERNHIST_SIZE
|
|
#define SCDEBUG_KERNHIST_SIZE 500
|
|
#endif
|
|
|
|
void
|
|
scdebug_init(void)
|
|
{
|
|
#if defined(SYSCALL_DEBUG) && defined(KERNHIST)
|
|
/* Setup scdebughist kernel history */
|
|
KERNHIST_INIT(scdebughist, SCDEBUG_KERNHIST_SIZE);
|
|
#endif
|
|
}
|