heartbeat(9): Ignore stale tc if primary CPU heartbeat is suspended.

The timecounter ticks only on the primary CPU, so of course it will
go stale if it's suspended.

(It is, perhaps, a mistake that it only ticks on the primary CPU,
even if the primary CPU is offlined or in a polled-input console
loop, but that's a separate issue.)
This commit is contained in:
riastradh 2023-09-02 17:44:23 +00:00
parent aa93680353
commit 95d8ae3ce4
1 changed files with 41 additions and 4 deletions

View File

@ -1,4 +1,4 @@
/* $NetBSD: kern_heartbeat.c,v 1.6 2023/09/02 17:43:37 riastradh Exp $ */
/* $NetBSD: kern_heartbeat.c,v 1.7 2023/09/02 17:44:23 riastradh Exp $ */
/*-
* Copyright (c) 2023 The NetBSD Foundation, Inc.
@ -78,7 +78,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: kern_heartbeat.c,v 1.6 2023/09/02 17:43:37 riastradh Exp $");
__KERNEL_RCSID(0, "$NetBSD: kern_heartbeat.c,v 1.7 2023/09/02 17:44:23 riastradh Exp $");
#ifdef _KERNEL_OPT
#include "opt_ddb.h"
@ -198,6 +198,41 @@ heartbeat_resume(void)
splx(s);
}
/*
* heartbeat_timecounter_suspended()
*
* True if timecounter heartbeat checks are suspended because the
* timecounter may not be advancing, false if heartbeat checks
* should check for timecounter progress.
*/
static bool
heartbeat_timecounter_suspended(void)
{
CPU_INFO_ITERATOR cii;
struct cpu_info *ci;
/*
* The timecounter ticks only on the primary CPU. Check
* whether it's suspended.
*
* XXX Would be nice if we could find the primary CPU without
* iterating over all CPUs.
*/
for (CPU_INFO_FOREACH(cii, ci)) {
if (CPU_IS_PRIMARY(ci)) {
return ci->ci_schedstate.spc_flags &
SPCF_HEARTBEATSUSPENDED;
}
}
/*
* This should be unreachable -- there had better be a primary
* CPU in the system! If not, the timecounter will be busted
* anyway.
*/
panic("no primary CPU");
}
/*
* heartbeat_reset_xc(a, b)
*
@ -598,7 +633,8 @@ heartbeat(void)
/*
* Timecounter hasn't advanced by more than a second.
* Make sure the timecounter isn't stuck according to
* our heartbeats.
* our heartbeats -- unless timecounter heartbeats are
* suspended too.
*
* Our own heartbeat count can't roll back, and
* time_uptime should be updated before it wraps
@ -608,7 +644,8 @@ heartbeat(void)
stamp =
atomic_load_relaxed(&curcpu()->ci_heartbeat_uptime_stamp);
d = count - stamp;
if (__predict_false(d > period_ticks)) {
if (__predict_false(d > period_ticks) &&
!heartbeat_timecounter_suspended()) {
panic("%s: time has not advanced in %u heartbeats",
cpu_name(curcpu()), d);
}