- Add lwp_pctr(), get an LWP's preemption/ctxsw counter.

- Fix a preemption bug in CURCPU_IDLE_P() that can lead to a bogus
  assertion failure on DEBUG kernels.
- Fix MP/preemption races with timecounter detachment.
This commit is contained in:
ad 2009-05-23 17:08:04 +00:00
parent a742b7d3d8
commit 2fc2b08001
3 changed files with 122 additions and 20 deletions

View File

@ -1,7 +1,7 @@
/* $NetBSD: kern_lock.c,v 1.147 2008/11/12 12:36:16 ad Exp $ */
/* $NetBSD: kern_lock.c,v 1.148 2009/05/23 17:08:04 ad Exp $ */
/*-
* Copyright (c) 2002, 2006, 2007, 2008 The NetBSD Foundation, Inc.
* Copyright (c) 2002, 2006, 2007, 2008, 2009 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
@ -31,7 +31,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: kern_lock.c,v 1.147 2008/11/12 12:36:16 ad Exp $");
__KERNEL_RCSID(0, "$NetBSD: kern_lock.c,v 1.148 2009/05/23 17:08:04 ad Exp $");
#include <sys/param.h>
#include <sys/proc.h>
@ -42,6 +42,7 @@ __KERNEL_RCSID(0, "$NetBSD: kern_lock.c,v 1.147 2008/11/12 12:36:16 ad Exp $");
#include <sys/cpu.h>
#include <sys/syslog.h>
#include <sys/atomic.h>
#include <sys/lwp.h>
#include <machine/stdarg.h>
#include <machine/lock.h>
@ -59,6 +60,8 @@ void
assert_sleepable(void)
{
const char *reason;
uint64_t pctr;
bool idle;
if (panicstr != NULL) {
return;
@ -66,14 +69,23 @@ assert_sleepable(void)
LOCKDEBUG_BARRIER(kernel_lock, 1);
/*
* Avoid disabling/re-enabling preemption here since this
* routine may be called in delicate situatations.
*/
do {
pctr = lwp_pctr();
idle = CURCPU_IDLE_P();
} while (pctr != lwp_pctr());
reason = NULL;
if (CURCPU_IDLE_P() && !cold) {
if (idle && !cold) {
reason = "idle";
}
if (cpu_intr_p()) {
reason = "interrupt";
}
if ((curlwp->l_pflag & LP_INTR) != 0) {
if (cpu_softintr_p()) {
reason = "softint";
}

View File

@ -1,9 +1,12 @@
/* $NetBSD: kern_tc.c,v 1.38 2009/01/11 02:45:52 christos Exp $ */
/* $NetBSD: kern_tc.c,v 1.39 2009/05/23 17:08:04 ad Exp $ */
/*-
* Copyright (c) 2008 The NetBSD Foundation, Inc.
* Copyright (c) 2008, 2009 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by Andrew Doran.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@ -37,7 +40,7 @@
#include <sys/cdefs.h>
/* __FBSDID("$FreeBSD: src/sys/kern/kern_tc.c,v 1.166 2005/09/19 22:16:31 andre Exp $"); */
__KERNEL_RCSID(0, "$NetBSD: kern_tc.c,v 1.38 2009/01/11 02:45:52 christos Exp $");
__KERNEL_RCSID(0, "$NetBSD: kern_tc.c,v 1.39 2009/05/23 17:08:04 ad Exp $");
#include "opt_ntp.h"
@ -54,6 +57,7 @@ __KERNEL_RCSID(0, "$NetBSD: kern_tc.c,v 1.38 2009/01/11 02:45:52 christos Exp $"
#include <sys/kauth.h>
#include <sys/mutex.h>
#include <sys/atomic.h>
#include <sys/xcall.h>
/*
* A large step happens on boot. This constant detects such steps.
@ -126,6 +130,7 @@ static int timestepwarnings;
kmutex_t timecounter_lock;
static u_int timecounter_mods;
static volatile int timecounter_removals = 1;
static u_int timecounter_bad;
#ifdef __FreeBSD__
@ -309,15 +314,49 @@ void
binuptime(struct bintime *bt)
{
struct timehands *th;
u_int gen;
lwp_t *l;
u_int lgen, gen;
TC_COUNT(nbinuptime);
/*
* Provide exclusion against tc_detach().
*
* We record the number of timecounter removals before accessing
* timecounter state. Note that the LWP can be using multiple
* "generations" at once, due to interrupts (interrupted while in
* this function). Hardware interrupts will borrow the interrupted
* LWP's l_tcgen value for this purpose, and can themselves be
* interrupted by higher priority interrupts. In this case we need
* to ensure that the oldest generation in use is recorded.
*
* splsched() is too expensive to use, so we take care to structure
* this code in such a way that it is not required. Likewise, we
* do not disable preemption.
*
* Memory barriers are also too expensive to use for such a
* performance critical function. The good news is that we do not
* need memory barriers for this type of exclusion, as the thread
* updating timecounter_removals will issue a broadcast cross call
* before inspecting our l_tcgen value (this elides memory ordering
* issues).
*/
l = curlwp;
lgen = l->l_tcgen;
if (__predict_true(lgen == 0)) {
l->l_tcgen = timecounter_removals;
}
__insn_barrier();
do {
th = timehands;
gen = th->th_generation;
*bt = th->th_offset;
bintime_addx(bt, th->th_scale * tc_delta(th));
} while (gen == 0 || gen != th->th_generation);
__insn_barrier();
l->l_tcgen = lgen;
}
void
@ -543,8 +582,11 @@ tc_detach(struct timecounter *target)
{
struct timecounter *tc;
struct timecounter **tcp = NULL;
int rc = 0;
int removals;
uint64_t where;
lwp_t *l;
/* First, find the timecounter. */
mutex_spin_enter(&timecounter_lock);
for (tcp = &timecounters, tc = timecounters;
tc != NULL;
@ -553,17 +595,62 @@ tc_detach(struct timecounter *target)
break;
}
if (tc == NULL) {
rc = ESRCH;
} else {
*tcp = tc->tc_next;
if (timecounter == target) {
tc_pick();
tc_windup();
}
timecounter_mods++;
mutex_spin_exit(&timecounter_lock);
return ESRCH;
}
/* And now, remove it. */
*tcp = tc->tc_next;
if (timecounter == target) {
tc_pick();
tc_windup();
}
timecounter_mods++;
removals = timecounter_removals++;
mutex_spin_exit(&timecounter_lock);
return rc;
/*
* We now have to determine if any threads in the system are still
* making use of this timecounter.
*
* We issue a broadcast cross call to elide memory ordering issues,
* then scan all LWPs in the system looking at each's timecounter
* generation number. We need to see a value of zero (not actively
* using a timecounter) or a value greater than our removal value.
*
* We may race with threads that read `timecounter_removals' and
* and then get preempted before updating `l_tcgen'. This is not
* a problem, since it means that these threads have not yet started
* accessing timecounter state. All we do need is one clean
* snapshot of the system where every thread appears not to be using
* old timecounter state.
*/
for (;;) {
where = xc_broadcast(0, (xcfunc_t)nullop, NULL, NULL);
xc_wait(where);
mutex_enter(proc_lock);
LIST_FOREACH(l, &alllwp, l_list) {
if (l->l_tcgen == 0 || l->l_tcgen > removals) {
/*
* Not using timecounter or old timecounter
* state at time of our xcall or later.
*/
continue;
}
break;
}
mutex_exit(proc_lock);
/*
* If the timecounter is still in use, wait at least 10ms
* before retrying.
*/
if (l == NULL) {
return 0;
}
(void)kpause("tcdetach", false, mstohz(10), NULL);
}
}
/* Report the frequency of the current timecounter. */

View File

@ -1,4 +1,4 @@
/* $NetBSD: lwp.h,v 1.117 2009/02/04 21:17:39 ad Exp $ */
/* $NetBSD: lwp.h,v 1.118 2009/05/23 17:08:05 ad Exp $ */
/*-
* Copyright (c) 2001, 2006, 2007, 2008, 2009 The NetBSD Foundation, Inc.
@ -173,6 +173,8 @@ struct lwp {
uintptr_t l_pfailaddr; /* !: for kernel preemption */
uintptr_t l_pfaillock; /* !: for kernel preemption */
_TAILQ_HEAD(,struct lockdebug,volatile) l_ld_locks;/* !: locks held by LWP */
int l_tcgen; /* !: for timecounter removal */
int l_unused2; /* !: for future use */
/* These are only used by 'options SYSCALL_TIMES' */
uint32_t l_syscall_time; /* !: time epoch for current syscall */
@ -306,6 +308,7 @@ void lwp_need_userret(lwp_t *);
void lwp_free(lwp_t *, bool, bool);
void lwp_sys_init(void);
u_int lwp_unsleep(lwp_t *, bool);
uint64_t lwp_pctr(void);
int lwp_specific_key_create(specificdata_key_t *, specificdata_dtor_t);
void lwp_specific_key_delete(specificdata_key_t);