From 83a2a556bf4395806373d2843de15aa28e00bcfa Mon Sep 17 00:00:00 2001 From: pooka Date: Mon, 11 Mar 2013 21:37:54 +0000 Subject: [PATCH] In pool_cache_put_slow(), pool_get() can block (it does mutex_enter()), so we need to retry if curlwp took a context switch during the call. Otherwise, CPU-local invariants can get screwed up: panic: kernel diagnostic assertion "cur->pcg_avail == cur->pcg_size" failed This is (was) very easy to reproduce by just running: while : ; do RUMP_NCPU=32 ./a.out ; done where a.out only calls rump_init(). But, any situation there's contention and a pool doesn't have emptygroups would do. --- sys/kern/subr_pool.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/sys/kern/subr_pool.c b/sys/kern/subr_pool.c index 31eb87d46fff..ed63d979e044 100644 --- a/sys/kern/subr_pool.c +++ b/sys/kern/subr_pool.c @@ -1,4 +1,4 @@ -/* $NetBSD: subr_pool.c,v 1.199 2013/02/09 00:31:21 christos Exp $ */ +/* $NetBSD: subr_pool.c,v 1.200 2013/03/11 21:37:54 pooka Exp $ */ /*- * Copyright (c) 1997, 1999, 2000, 2002, 2007, 2008, 2010 @@ -32,7 +32,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: subr_pool.c,v 1.199 2013/02/09 00:31:21 christos Exp $"); +__KERNEL_RCSID(0, "$NetBSD: subr_pool.c,v 1.200 2013/03/11 21:37:54 pooka Exp $"); #include "opt_ddb.h" #include "opt_lockdebug.h" @@ -2261,6 +2261,7 @@ pool_cache_get_paddr(pool_cache_t pc, int flags, paddr_t *pap) static bool __noinline pool_cache_put_slow(pool_cache_cpu_t *cc, int s, void *object) { + struct lwp *l = curlwp; pcg_t *pcg, *cur; uint64_t ncsw; pool_cache_t pc; @@ -2271,6 +2272,7 @@ pool_cache_put_slow(pool_cache_cpu_t *cc, int s, void *object) pc = cc->cc_cache; pcg = NULL; cc->cc_misses++; + ncsw = l->l_ncsw; /* * If there are no empty groups in the cache then allocate one @@ -2280,6 +2282,16 @@ pool_cache_put_slow(pool_cache_cpu_t *cc, int s, void *object) if (__predict_true(!pool_cache_disable)) { pcg = pool_get(pc->pc_pcgpool, PR_NOWAIT); } + /* + * If pool_get() blocked, then our view of + * the per-CPU data is invalid: retry. + */ + if (__predict_false(l->l_ncsw != ncsw)) { + if (pcg != NULL) { + pool_put(pc->pc_pcgpool, pcg); + } + return true; + } if (__predict_true(pcg != NULL)) { pcg->pcg_avail = 0; pcg->pcg_size = pc->pc_pcgsize; @@ -2288,7 +2300,6 @@ pool_cache_put_slow(pool_cache_cpu_t *cc, int s, void *object) /* Lock the cache. */ if (__predict_false(!mutex_tryenter(&pc->pc_lock))) { - ncsw = curlwp->l_ncsw; mutex_enter(&pc->pc_lock); pc->pc_contended++; @@ -2296,7 +2307,7 @@ pool_cache_put_slow(pool_cache_cpu_t *cc, int s, void *object) * If we context switched while locking, then our view of * the per-CPU data is invalid: retry. */ - if (__predict_false(curlwp->l_ncsw != ncsw)) { + if (__predict_false(l->l_ncsw != ncsw)) { mutex_exit(&pc->pc_lock); if (pcg != NULL) { pool_put(pc->pc_pcgpool, pcg);