kern: Fix synchronization of clearing LP_RUNNING and lwp_free.

1. membar_sync is not necessary here -- only a store-release is
   required.

2. membar_consumer _before_ loading l->l_pflag is not enough; a
   load-acquire is required.

Actually it's not really clear to me why any barriers are needed, since
the store-release and load-acquire should be implied by releasing and
acquiring the lwp lock (and maybe we could spin with the lock instead
of reading l->l_pflag unlocked).  But maybe there's something subtle
about access to l->l_mutex that's not obvious here.
This commit is contained in:
riastradh 2022-03-10 12:21:25 +00:00
parent 7e3fbda44d
commit b2097dd8cd
2 changed files with 26 additions and 15 deletions

View File

@ -1,4 +1,4 @@
/* $NetBSD: kern_lwp.c,v 1.246 2021/12/22 16:57:28 thorpej Exp $ */
/* $NetBSD: kern_lwp.c,v 1.247 2022/03/10 12:21:25 riastradh Exp $ */
/*-
* Copyright (c) 2001, 2006, 2007, 2008, 2009, 2019, 2020
@ -217,7 +217,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: kern_lwp.c,v 1.246 2021/12/22 16:57:28 thorpej Exp $");
__KERNEL_RCSID(0, "$NetBSD: kern_lwp.c,v 1.247 2022/03/10 12:21:25 riastradh Exp $");
#include "opt_ddb.h"
#include "opt_lockdebug.h"
@ -1017,16 +1017,19 @@ lwp_startup(struct lwp *prev, struct lwp *new_lwp)
KASSERT(curcpu()->ci_mtx_count == -2);
/*
* Immediately mark the previous LWP as no longer running and unlock
* (to keep lock wait times short as possible). If a zombie, don't
* touch after clearing LP_RUNNING as it could be reaped by another
* CPU. Issue a memory barrier to ensure this.
* Immediately mark the previous LWP as no longer running and
* unlock (to keep lock wait times short as possible). If a
* zombie, don't touch after clearing LP_RUNNING as it could be
* reaped by another CPU. Use atomic_store_release to ensure
* this -- matches atomic_load_acquire in lwp_free.
*/
lock = prev->l_mutex;
if (__predict_false(prev->l_stat == LSZOMB)) {
membar_sync();
atomic_store_release(&prev->l_pflag,
prev->l_pflag & ~LP_RUNNING);
} else {
prev->l_pflag &= ~LP_RUNNING;
}
prev->l_pflag &= ~LP_RUNNING;
mutex_spin_exit(lock);
/* Correct spin mutex count after mi_switch(). */
@ -1246,9 +1249,12 @@ lwp_free(struct lwp *l, bool recycle, bool last)
/*
* In the unlikely event that the LWP is still on the CPU,
* then spin until it has switched away.
*
* atomic_load_acquire matches atomic_store_release in
* lwp_startup and mi_switch.
*/
membar_consumer();
while (__predict_false((l->l_pflag & LP_RUNNING) != 0)) {
while (__predict_false((atomic_load_acquire(&l->l_pflag) & LP_RUNNING)
!= 0)) {
SPINLOCK_BACKOFF_HOOK;
}

View File

@ -1,4 +1,4 @@
/* $NetBSD: kern_synch.c,v 1.349 2020/05/23 23:42:43 ad Exp $ */
/* $NetBSD: kern_synch.c,v 1.350 2022/03/10 12:21:25 riastradh Exp $ */
/*-
* Copyright (c) 1999, 2000, 2004, 2006, 2007, 2008, 2009, 2019, 2020
@ -69,7 +69,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: kern_synch.c,v 1.349 2020/05/23 23:42:43 ad Exp $");
__KERNEL_RCSID(0, "$NetBSD: kern_synch.c,v 1.350 2022/03/10 12:21:25 riastradh Exp $");
#include "opt_kstack.h"
#include "opt_dtrace.h"
@ -783,18 +783,23 @@ mi_switch(lwp_t *l)
/*
* Immediately mark the previous LWP as no longer running
* and unlock (to keep lock wait times short as possible).
* and unlock (to keep lock wait times short as possible).
* We'll still be at IPL_SCHED afterwards. If a zombie,
* don't touch after clearing LP_RUNNING as it could be
* reaped by another CPU. Issue a memory barrier to ensure
* this.
*
* atomic_store_release matches atomic_load_acquire in
* lwp_free.
*/
KASSERT((prevlwp->l_pflag & LP_RUNNING) != 0);
lock = prevlwp->l_mutex;
if (__predict_false(prevlwp->l_stat == LSZOMB)) {
membar_sync();
atomic_store_release(&prevlwp->l_pflag,
prevlwp->l_pflag & ~LP_RUNNING);
} else {
prevlwp->l_pflag &= ~LP_RUNNING;
}
prevlwp->l_pflag &= ~LP_RUNNING;
mutex_spin_exit(lock);
/*