ucas(9): Membar audit.
- Omit needless membar_enter before ipi_trigger_broadcast. This was presumably intended to imply a happens-before relation for the following two CPUs: /* CPU doing ucas */ ucas_critical_enter() ucas_critical_pausing_cpus = ncpu - 1 (A) ipi_trigger_broadcast() /* other CPU walking by whistling innocently */ IPI handler ucas_critical_cpu_gate() load ucas_critical_pausing_cpus (B) That is, this was presumably meant to ensure (A) happens-before (B). This relation is already guaranteed by ipi(9), so there is no need for any explicit memory barrier. - Issue a store-release in ucas_critical_cpu_gate so we have the following happens-before relation which was otherwise not guaranteed except if __HAVE_ATOMIC_AS_MEMBAR: /* other CPU walking by whistling innocently */ ...other logic touching the target ucas word... (A) IPI handler ucas_critical_cpu_gate() ... atomic_dec_uint(&ucas_critical_pausing_cpus) happens-before /* CPU doing ucas */ ucas_critical_enter() -> ucas_critical_wait(); ...touching the word with ufetch/ustore... (B) We need to ensure the logic (A) on another CPU touching the target ucas word happens-before we actually do the ucas at (B). (a) This requires the other CPU to do a store-release on ucas_critical_pausing_cpus in ucas_critical_cpu_gate, and (b) this requires the ucas CPU to do a load-acquire on ucas_critical_pausing_cpus in ucas_critical_wait. Without _both_ sides -- store-release and then load-acquire -- there is no such happens-before guarantee; another CPU may have a buffered store, for instance, that clobbers the ucas. For now, do the store-release with membar_exit conditional on __HAVE_ATOMIC_AS_MEMBAR and then atomic_dec_uint -- later with the C11 API we can dispense with the #ifdef and just use atomic_fetch_add_explicit(..., memory_order_release). The load-acquire we can do with atomic_load_acquire. - Issue a load-acquire in ucas_critical_cpu_gate so we have the following happens-before relation which was otherwise not guaranteed: /* CPU doing ucas */ ...ufetch/ustore... (A) ucas_critical_exit() ucas_critical_pausing_cpus = -1; /* other CPU walking by whistling innocently */ IPI handler ucas_critical_cpu_gate() ... while (ucas_critical_pausing_cpus != -1) spin; ...other logic touching the target ucas word... (B) We need to ensure the logic (A) to do the ucas happens-before logic that might use it on another CPU at (B). (a) This requires that the ucas CPU do a store-release on ucas_critical_pausing_cpus in ucas_critical_exit, and (b) this requires that the other CPU do a load-acquire on ucas_critical_pausing_cpus in ucas_critical_cpu_gate. Without _both_ sides -- store-release and then load-acquire -- there is no such happens-before guarantee; the other CPU might witness a cached stale value of the target location but a new value of some other location in the wrong order. - Use atomic_load/store_* to avoid the appearance of races, e.g. for sanitizers. - Document which barriers pair up with which barriers and what they're doing.
This commit is contained in:
parent
4cc1a1abfd
commit
ab67b1ab7c
|
@ -1,4 +1,4 @@
|
|||
/* $NetBSD: subr_copy.c,v 1.14 2020/05/23 23:42:43 ad Exp $ */
|
||||
/* $NetBSD: subr_copy.c,v 1.15 2022/02/11 17:53:28 riastradh Exp $ */
|
||||
|
||||
/*-
|
||||
* Copyright (c) 1997, 1998, 1999, 2002, 2007, 2008, 2019
|
||||
|
@ -80,7 +80,7 @@
|
|||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__KERNEL_RCSID(0, "$NetBSD: subr_copy.c,v 1.14 2020/05/23 23:42:43 ad Exp $");
|
||||
__KERNEL_RCSID(0, "$NetBSD: subr_copy.c,v 1.15 2022/02/11 17:53:28 riastradh Exp $");
|
||||
|
||||
#define __UFETCHSTORE_PRIVATE
|
||||
#define __UCAS_PRIVATE
|
||||
|
@ -400,9 +400,32 @@ ucas_critical_cpu_gate(void *arg __unused)
|
|||
{
|
||||
int count = SPINLOCK_BACKOFF_MIN;
|
||||
|
||||
KASSERT(ucas_critical_pausing_cpus > 0);
|
||||
KASSERT(atomic_load_relaxed(&ucas_critical_pausing_cpus) > 0);
|
||||
|
||||
/*
|
||||
* Notify ucas_critical_wait that we have stopped. Using
|
||||
* store-release ensures all our memory operations up to the
|
||||
* IPI happen before the ucas -- no buffered stores on our end
|
||||
* can clobber it later on, for instance.
|
||||
*
|
||||
* Matches atomic_load_acquire in ucas_critical_wait -- turns
|
||||
* the following atomic_dec_uint into a store-release.
|
||||
*/
|
||||
#ifndef __HAVE_ATOMIC_AS_MEMBAR
|
||||
membar_exit();
|
||||
#endif
|
||||
atomic_dec_uint(&ucas_critical_pausing_cpus);
|
||||
while (ucas_critical_pausing_cpus != (u_int)-1) {
|
||||
|
||||
/*
|
||||
* Wait for ucas_critical_exit to reopen the gate and let us
|
||||
* proceed. Using a load-acquire ensures the ucas happens
|
||||
* before any of our memory operations when we return from the
|
||||
* IPI and proceed -- we won't observe any stale cached value
|
||||
* that the ucas overwrote, for instance.
|
||||
*
|
||||
* Matches atomic_store_release in ucas_critical_exit.
|
||||
*/
|
||||
while (atomic_load_acquire(&ucas_critical_pausing_cpus) != (u_int)-1) {
|
||||
SPINLOCK_BACKOFF(count);
|
||||
}
|
||||
}
|
||||
|
@ -410,6 +433,7 @@ ucas_critical_cpu_gate(void *arg __unused)
|
|||
static int
|
||||
ucas_critical_init(void)
|
||||
{
|
||||
|
||||
ucas_critical_ipi = ipi_register(ucas_critical_cpu_gate, NULL);
|
||||
return 0;
|
||||
}
|
||||
|
@ -419,7 +443,16 @@ ucas_critical_wait(void)
|
|||
{
|
||||
int count = SPINLOCK_BACKOFF_MIN;
|
||||
|
||||
while (ucas_critical_pausing_cpus > 0) {
|
||||
/*
|
||||
* Wait for all CPUs to stop at the gate. Using a load-acquire
|
||||
* ensures all memory operations before they stop at the gate
|
||||
* happen before the ucas -- no buffered stores in other CPUs
|
||||
* can clobber it later on, for instance.
|
||||
*
|
||||
* Matches membar_exit/atomic_dec_uint (store-release) in
|
||||
* ucas_critical_cpu_gate.
|
||||
*/
|
||||
while (atomic_load_acquire(&ucas_critical_pausing_cpus) > 0) {
|
||||
SPINLOCK_BACKOFF(count);
|
||||
}
|
||||
}
|
||||
|
@ -444,8 +477,6 @@ ucas_critical_enter(lwp_t * const l)
|
|||
mutex_enter(&cpu_lock);
|
||||
ucas_critical_splcookie = splhigh();
|
||||
ucas_critical_pausing_cpus = ncpu - 1;
|
||||
membar_enter();
|
||||
|
||||
ipi_trigger_broadcast(ucas_critical_ipi, true);
|
||||
ucas_critical_wait();
|
||||
return;
|
||||
|
@ -461,8 +492,17 @@ ucas_critical_exit(lwp_t * const l)
|
|||
|
||||
#if !defined(__HAVE_UCAS_MP) && defined(MULTIPROCESSOR)
|
||||
if (ncpu > 1) {
|
||||
membar_exit();
|
||||
ucas_critical_pausing_cpus = (u_int)-1;
|
||||
/*
|
||||
* Open the gate and notify all CPUs in
|
||||
* ucas_critical_cpu_gate that they can now proceed.
|
||||
* Using a store-release ensures the ucas happens
|
||||
* before any memory operations they issue after the
|
||||
* IPI -- they won't observe any stale cache of the
|
||||
* target word, for instance.
|
||||
*
|
||||
* Matches atomic_load_acquire in ucas_critical_cpu_gate.
|
||||
*/
|
||||
atomic_store_release(&ucas_critical_pausing_cpus, (u_int)-1);
|
||||
splx(ucas_critical_splcookie);
|
||||
mutex_exit(&cpu_lock);
|
||||
return;
|
||||
|
|
Loading…
Reference in New Issue