In pool_cache_put_slow(), pool_get() can block (it does mutex_enter()),

so we need to retry if curlwp took a context switch during the call.
Otherwise, CPU-local invariants can get screwed up:

    panic: kernel diagnostic assertion "cur->pcg_avail == cur->pcg_size" failed

This is (was) very easy to reproduce by just running:

  while : ; do RUMP_NCPU=32 ./a.out ; done

where a.out only calls rump_init().  But, any situation there's contention
and a pool doesn't have emptygroups would do.
This commit is contained in:
pooka 2013-03-11 21:37:54 +00:00
parent 47c0e0c312
commit 83a2a556bf

@ -1,4 +1,4 @@
/* $NetBSD: subr_pool.c,v 1.199 2013/02/09 00:31:21 christos Exp $ */
/* $NetBSD: subr_pool.c,v 1.200 2013/03/11 21:37:54 pooka Exp $ */
/*-
* Copyright (c) 1997, 1999, 2000, 2002, 2007, 2008, 2010
@ -32,7 +32,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: subr_pool.c,v 1.199 2013/02/09 00:31:21 christos Exp $");
__KERNEL_RCSID(0, "$NetBSD: subr_pool.c,v 1.200 2013/03/11 21:37:54 pooka Exp $");
#include "opt_ddb.h"
#include "opt_lockdebug.h"
@ -2261,6 +2261,7 @@ pool_cache_get_paddr(pool_cache_t pc, int flags, paddr_t *pap)
static bool __noinline
pool_cache_put_slow(pool_cache_cpu_t *cc, int s, void *object)
{
struct lwp *l = curlwp;
pcg_t *pcg, *cur;
uint64_t ncsw;
pool_cache_t pc;
@ -2271,6 +2272,7 @@ pool_cache_put_slow(pool_cache_cpu_t *cc, int s, void *object)
pc = cc->cc_cache;
pcg = NULL;
cc->cc_misses++;
ncsw = l->l_ncsw;
/*
* If there are no empty groups in the cache then allocate one
@ -2280,6 +2282,16 @@ pool_cache_put_slow(pool_cache_cpu_t *cc, int s, void *object)
if (__predict_true(!pool_cache_disable)) {
pcg = pool_get(pc->pc_pcgpool, PR_NOWAIT);
}
/*
* If pool_get() blocked, then our view of
* the per-CPU data is invalid: retry.
*/
if (__predict_false(l->l_ncsw != ncsw)) {
if (pcg != NULL) {
pool_put(pc->pc_pcgpool, pcg);
}
return true;
}
if (__predict_true(pcg != NULL)) {
pcg->pcg_avail = 0;
pcg->pcg_size = pc->pc_pcgsize;
@ -2288,7 +2300,6 @@ pool_cache_put_slow(pool_cache_cpu_t *cc, int s, void *object)
/* Lock the cache. */
if (__predict_false(!mutex_tryenter(&pc->pc_lock))) {
ncsw = curlwp->l_ncsw;
mutex_enter(&pc->pc_lock);
pc->pc_contended++;
@ -2296,7 +2307,7 @@ pool_cache_put_slow(pool_cache_cpu_t *cc, int s, void *object)
* If we context switched while locking, then our view of
* the per-CPU data is invalid: retry.
*/
if (__predict_false(curlwp->l_ncsw != ncsw)) {
if (__predict_false(l->l_ncsw != ncsw)) {
mutex_exit(&pc->pc_lock);
if (pcg != NULL) {
pool_put(pc->pc_pcgpool, pcg);