Restructure the name cache code to eliminate most lock contention

resulting from forward lookups. Discussed on tech-kern@.
This commit is contained in:
ad 2008-04-11 15:25:24 +00:00
parent 1c3c41f771
commit 1e11b07bfa
4 changed files with 429 additions and 139 deletions

View File

@ -1,4 +1,4 @@
/* $NetBSD: kern_cpu.c,v 1.22 2008/03/22 18:04:42 ad Exp $ */ /* $NetBSD: kern_cpu.c,v 1.23 2008/04/11 15:25:24 ad Exp $ */
/*- /*-
* Copyright (c) 2007, 2008 The NetBSD Foundation, Inc. * Copyright (c) 2007, 2008 The NetBSD Foundation, Inc.
@ -64,7 +64,7 @@
#include <sys/cdefs.h> #include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: kern_cpu.c,v 1.22 2008/03/22 18:04:42 ad Exp $"); __KERNEL_RCSID(0, "$NetBSD: kern_cpu.c,v 1.23 2008/04/11 15:25:24 ad Exp $");
#include <sys/param.h> #include <sys/param.h>
#include <sys/systm.h> #include <sys/systm.h>
@ -82,6 +82,7 @@ __KERNEL_RCSID(0, "$NetBSD: kern_cpu.c,v 1.22 2008/03/22 18:04:42 ad Exp $");
#include <sys/pool.h> #include <sys/pool.h>
#include <sys/kmem.h> #include <sys/kmem.h>
#include <sys/select.h> #include <sys/select.h>
#include <sys/namei.h>
#include <uvm/uvm_extern.h> #include <uvm/uvm_extern.h>
@ -135,6 +136,7 @@ mi_cpu_attach(struct cpu_info *ci)
xc_init_cpu(ci); xc_init_cpu(ci);
pool_cache_cpu_init(ci); pool_cache_cpu_init(ci);
selsysinit(ci); selsysinit(ci);
cache_cpu_init(ci);
TAILQ_INIT(&ci->ci_data.cpu_biodone); TAILQ_INIT(&ci->ci_data.cpu_biodone);
ncpu++; ncpu++;
ncpuonline++; ncpuonline++;

View File

@ -1,4 +1,37 @@
/* $NetBSD: vfs_cache.c,v 1.72 2007/11/11 23:22:25 matt Exp $ */ /* $NetBSD: vfs_cache.c,v 1.73 2008/04/11 15:25:24 ad Exp $ */
/*-
* Copyright (c) 2008 The NetBSD Foundation, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the NetBSD
* Foundation, Inc. and its contributors.
* 4. Neither the name of The NetBSD Foundation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/* /*
* Copyright (c) 1989, 1993 * Copyright (c) 1989, 1993
@ -32,7 +65,7 @@
*/ */
#include <sys/cdefs.h> #include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: vfs_cache.c,v 1.72 2007/11/11 23:22:25 matt Exp $"); __KERNEL_RCSID(0, "$NetBSD: vfs_cache.c,v 1.73 2008/04/11 15:25:24 ad Exp $");
#include "opt_ddb.h" #include "opt_ddb.h"
#include "opt_revcache.h" #include "opt_revcache.h"
@ -47,6 +80,11 @@ __KERNEL_RCSID(0, "$NetBSD: vfs_cache.c,v 1.72 2007/11/11 23:22:25 matt Exp $");
#include <sys/malloc.h> #include <sys/malloc.h>
#include <sys/pool.h> #include <sys/pool.h>
#include <sys/mutex.h> #include <sys/mutex.h>
#include <sys/atomic.h>
#include <sys/kthread.h>
#include <sys/kernel.h>
#include <sys/cpu.h>
#include <sys/evcnt.h>
#define NAMECACHE_ENTER_REVERSE #define NAMECACHE_ENTER_REVERSE
/* /*
@ -75,7 +113,6 @@ __KERNEL_RCSID(0, "$NetBSD: vfs_cache.c,v 1.72 2007/11/11 23:22:25 matt Exp $");
*/ */
LIST_HEAD(nchashhead, namecache) *nchashtbl; LIST_HEAD(nchashhead, namecache) *nchashtbl;
u_long nchash; /* size of hash table - 1 */ u_long nchash; /* size of hash table - 1 */
long numcache; /* number of cache entries allocated */
#define NCHASH(cnp, dvp) \ #define NCHASH(cnp, dvp) \
(((cnp)->cn_hash ^ ((uintptr_t)(dvp) >> 3)) & nchash) (((cnp)->cn_hash ^ ((uintptr_t)(dvp) >> 3)) & nchash)
@ -83,37 +120,78 @@ LIST_HEAD(ncvhashhead, namecache) *ncvhashtbl;
u_long ncvhash; /* size of hash table - 1 */ u_long ncvhash; /* size of hash table - 1 */
#define NCVHASH(vp) (((uintptr_t)(vp) >> 3) & ncvhash) #define NCVHASH(vp) (((uintptr_t)(vp) >> 3) & ncvhash)
long numcache; /* number of cache entries allocated */
static u_int cache_gcpend; /* number of entries pending GC */
static void *cache_gcqueue; /* garbage collection queue */
TAILQ_HEAD(, namecache) nclruhead = /* LRU chain */ TAILQ_HEAD(, namecache) nclruhead = /* LRU chain */
TAILQ_HEAD_INITIALIZER(nclruhead); TAILQ_HEAD_INITIALIZER(nclruhead);
#define COUNT(x) nchstats.x++
struct nchstats nchstats; /* cache effectiveness statistics */ struct nchstats nchstats; /* cache effectiveness statistics */
static pool_cache_t namecache_cache; static pool_cache_t namecache_cache;
MALLOC_DEFINE(M_CACHE, "namecache", "Dynamically allocated cache entries"); MALLOC_DEFINE(M_CACHE, "namecache", "Dynamically allocated cache entries");
int cache_lowat = 95;
int cache_hiwat = 98;
int cache_hottime = 5; /* number of seconds */
int doingcache = 1; /* 1 => enable the cache */ int doingcache = 1; /* 1 => enable the cache */
/* A single lock to protect cache insertion, removal and lookup */ static struct evcnt cache_ev_scan;
static kmutex_t namecache_lock; static struct evcnt cache_ev_gc;
static struct evcnt cache_ev_over;
static struct evcnt cache_ev_under;
static struct evcnt cache_ev_forced;
static void cache_remove(struct namecache *); /* A single lock to serialize modifications. */
static void cache_free(struct namecache *); static kmutex_t *namecache_lock;
static void cache_invalidate(struct namecache *);
static inline struct namecache *cache_lookup_entry( static inline struct namecache *cache_lookup_entry(
const struct vnode *, const struct componentname *); const struct vnode *, const struct componentname *);
static void cache_thread(void *);
static void cache_invalidate(struct namecache *);
static void cache_disassociate(struct namecache *);
static void cache_reclaim(void);
static int cache_ctor(void *, void *, int);
static void cache_dtor(void *, void *);
/*
* Invalidate a cache entry and enqueue it for garbage collection.
*/
static void static void
cache_remove(struct namecache *ncp) cache_invalidate(struct namecache *ncp)
{
void *head;
KASSERT(mutex_owned(&ncp->nc_lock));
if (ncp->nc_dvp != NULL) {
ncp->nc_vp = NULL;
ncp->nc_dvp = NULL;
do {
head = cache_gcqueue;
ncp->nc_gcqueue = head;
} while (atomic_cas_ptr(&cache_gcqueue, head, ncp) != head);
atomic_inc_uint(&cache_gcpend);
}
}
/*
* Disassociate a namecache entry from any vnodes it is attached to,
* and remove from the global LRU list.
*/
static void
cache_disassociate(struct namecache *ncp)
{ {
KASSERT(mutex_owned(&namecache_lock)); KASSERT(mutex_owned(namecache_lock));
KASSERT(ncp->nc_dvp == NULL);
ncp->nc_dvp = NULL; if (ncp->nc_lru.tqe_prev != NULL) {
ncp->nc_vp = NULL; TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
ncp->nc_lru.tqe_prev = NULL;
TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
if (ncp->nc_hash.le_prev != NULL) {
LIST_REMOVE(ncp, nc_hash);
ncp->nc_hash.le_prev = NULL;
} }
if (ncp->nc_vhash.le_prev != NULL) { if (ncp->nc_vhash.le_prev != NULL) {
LIST_REMOVE(ncp, nc_vhash); LIST_REMOVE(ncp, nc_vhash);
@ -129,32 +207,62 @@ cache_remove(struct namecache *ncp)
} }
} }
/*
* Lock all CPUs to prevent any cache lookup activity. Conceptually,
* this locks out all "readers".
*/
static void static void
cache_free(struct namecache *ncp) cache_lock_cpus(void)
{ {
CPU_INFO_ITERATOR cii;
struct cpu_info *ci;
pool_cache_put(namecache_cache, ncp); for (CPU_INFO_FOREACH(cii, ci)) {
numcache--; mutex_enter(ci->ci_data.cpu_cachelock);
}
} }
static inline struct namecache * /*
* Release all CPU locks.
*/
static void
cache_unlock_cpus(void)
{
CPU_INFO_ITERATOR cii;
struct cpu_info *ci;
for (CPU_INFO_FOREACH(cii, ci)) {
mutex_exit(ci->ci_data.cpu_cachelock);
}
}
/*
* Find a single cache entry and return it locked. 'namecache_lock' or
* at least one of the per-CPU locks must be held.
*/
static struct namecache *
cache_lookup_entry(const struct vnode *dvp, const struct componentname *cnp) cache_lookup_entry(const struct vnode *dvp, const struct componentname *cnp)
{ {
struct nchashhead *ncpp; struct nchashhead *ncpp;
struct namecache *ncp; struct namecache *ncp;
KASSERT(mutex_owned(&namecache_lock));
ncpp = &nchashtbl[NCHASH(cnp, dvp)]; ncpp = &nchashtbl[NCHASH(cnp, dvp)];
LIST_FOREACH(ncp, ncpp, nc_hash) { LIST_FOREACH(ncp, ncpp, nc_hash) {
if (ncp->nc_dvp == dvp && if (ncp->nc_dvp != dvp ||
ncp->nc_nlen == cnp->cn_namelen && ncp->nc_nlen != cnp->cn_namelen ||
!memcmp(ncp->nc_name, cnp->cn_nameptr, (u_int)ncp->nc_nlen)) memcmp(ncp->nc_name, cnp->cn_nameptr, (u_int)ncp->nc_nlen))
break; continue;
mutex_enter(&ncp->nc_lock);
if (ncp->nc_dvp == dvp) {
ncp->nc_hittime = hardclock_ticks;
return ncp;
}
/* Raced: entry has been nullified. */
mutex_exit(&ncp->nc_lock);
} }
return ncp; return NULL;
} }
/* /*
@ -178,6 +286,7 @@ cache_lookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp)
{ {
struct namecache *ncp; struct namecache *ncp;
struct vnode *vp; struct vnode *vp;
kmutex_t *cpulock;
int error; int error;
if (!doingcache) { if (!doingcache) {
@ -188,18 +297,19 @@ cache_lookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp)
if (cnp->cn_namelen > NCHNAMLEN) { if (cnp->cn_namelen > NCHNAMLEN) {
/* Unlocked, but only for stats. */ /* Unlocked, but only for stats. */
nchstats.ncs_long++; COUNT(ncs_long);
cnp->cn_flags &= ~MAKEENTRY; cnp->cn_flags &= ~MAKEENTRY;
goto fail; goto fail;
} }
mutex_enter(&namecache_lock); cpulock = curcpu()->ci_data.cpu_cachelock;
mutex_enter(cpulock);
ncp = cache_lookup_entry(dvp, cnp); ncp = cache_lookup_entry(dvp, cnp);
if (ncp == NULL) { if (ncp == NULL) {
nchstats.ncs_miss++; COUNT(ncs_miss);
goto fail_wlock; goto fail_wlock;
} }
if ((cnp->cn_flags & MAKEENTRY) == 0) { if ((cnp->cn_flags & MAKEENTRY) == 0) {
nchstats.ncs_badhits++; COUNT(ncs_badhits);
goto remove; goto remove;
} else if (ncp->nc_vp == NULL) { } else if (ncp->nc_vp == NULL) {
/* /*
@ -208,41 +318,25 @@ cache_lookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp)
cnp->cn_flags |= ncp->nc_flags; cnp->cn_flags |= ncp->nc_flags;
if (cnp->cn_nameiop != CREATE || if (cnp->cn_nameiop != CREATE ||
(cnp->cn_flags & ISLASTCN) == 0) { (cnp->cn_flags & ISLASTCN) == 0) {
nchstats.ncs_neghits++; COUNT(ncs_neghits);
/* mutex_exit(&ncp->nc_lock);
* Move this slot to end of LRU chain, mutex_exit(cpulock);
* if not already there.
*/
if (TAILQ_NEXT(ncp, nc_lru) != 0) {
TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru);
}
mutex_exit(&namecache_lock);
return (ENOENT); return (ENOENT);
} else { } else {
nchstats.ncs_badhits++; COUNT(ncs_badhits);
goto remove; goto remove;
} }
} }
vp = ncp->nc_vp; vp = ncp->nc_vp;
mutex_enter(&vp->v_interlock);
/* mutex_exit(&ncp->nc_lock);
* Move this slot to end of LRU chain, if not already there. mutex_exit(cpulock);
*/ error = vget(vp, LK_NOWAIT | LK_INTERLOCK);
if (TAILQ_NEXT(ncp, nc_lru) != 0) {
TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru);
}
error = vget(vp, LK_NOWAIT);
/* Release the name cache mutex while we get reference to the vnode */
mutex_exit(&namecache_lock);
#ifdef DEBUG #ifdef DEBUG
/* /*
* since we released namecache_lock, * since we released nb->nb_lock,
* we can't use this pointer any more. * we can't use this pointer any more.
*/ */
ncp = NULL; ncp = NULL;
@ -253,7 +347,7 @@ cache_lookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp)
/* /*
* this vnode is being cleaned out. * this vnode is being cleaned out.
*/ */
nchstats.ncs_falsehits++; /* XXX badhits? */ COUNT(ncs_falsehits); /* XXX badhits? */
goto fail; goto fail;
} }
@ -272,13 +366,13 @@ cache_lookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp)
*/ */
if (error) { if (error) {
/* Unlocked, but only for stats. */ /* Unlocked, but only for stats. */
nchstats.ncs_badhits++; COUNT(ncs_badhits);
*vpp = NULL; *vpp = NULL;
return (-1); return (-1);
} }
/* Unlocked, but only for stats. */ /* Unlocked, but only for stats. */
nchstats.ncs_goodhits++; COUNT(ncs_goodhits);
*vpp = vp; *vpp = vp;
return (0); return (0);
@ -288,11 +382,10 @@ remove:
* the cache entry is invalid, or otherwise don't * the cache entry is invalid, or otherwise don't
* want cache entry to exist. * want cache entry to exist.
*/ */
cache_remove(ncp); cache_invalidate(ncp);
cache_free(ncp); mutex_exit(&ncp->nc_lock);
fail_wlock: fail_wlock:
mutex_exit(&namecache_lock); mutex_exit(cpulock);
fail: fail:
*vpp = NULL; *vpp = NULL;
return (-1); return (-1);
@ -304,6 +397,7 @@ cache_lookup_raw(struct vnode *dvp, struct vnode **vpp,
{ {
struct namecache *ncp; struct namecache *ncp;
struct vnode *vp; struct vnode *vp;
kmutex_t *cpulock;
int error; int error;
if (!doingcache) { if (!doingcache) {
@ -314,47 +408,39 @@ cache_lookup_raw(struct vnode *dvp, struct vnode **vpp,
if (cnp->cn_namelen > NCHNAMLEN) { if (cnp->cn_namelen > NCHNAMLEN) {
/* Unlocked, but only for stats. */ /* Unlocked, but only for stats. */
nchstats.ncs_long++; COUNT(ncs_long);
cnp->cn_flags &= ~MAKEENTRY; cnp->cn_flags &= ~MAKEENTRY;
goto fail; goto fail;
} }
mutex_enter(&namecache_lock); cpulock = curcpu()->ci_data.cpu_cachelock;
mutex_enter(cpulock);
ncp = cache_lookup_entry(dvp, cnp); ncp = cache_lookup_entry(dvp, cnp);
if (ncp == NULL) { if (ncp == NULL) {
nchstats.ncs_miss++; COUNT(ncs_miss);
goto fail_wlock; goto fail_wlock;
} }
/*
* Move this slot to end of LRU chain,
* if not already there.
*/
if (TAILQ_NEXT(ncp, nc_lru) != 0) {
TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru);
}
vp = ncp->nc_vp; vp = ncp->nc_vp;
if (vp == NULL) { if (vp == NULL) {
/* /*
* Restore the ISWHITEOUT flag saved earlier. * Restore the ISWHITEOUT flag saved earlier.
*/ */
cnp->cn_flags |= ncp->nc_flags; cnp->cn_flags |= ncp->nc_flags;
nchstats.ncs_neghits++; COUNT(ncs_neghits);
mutex_exit(&namecache_lock); mutex_exit(&ncp->nc_lock);
mutex_exit(cpulock);
return (ENOENT); return (ENOENT);
} }
mutex_enter(&vp->v_interlock);
error = vget(vp, LK_NOWAIT); mutex_exit(&ncp->nc_lock);
mutex_exit(cpulock);
/* Release the name cache mutex while we get reference to the vnode */ error = vget(vp, LK_NOWAIT | LK_INTERLOCK);
mutex_exit(&namecache_lock);
if (error) { if (error) {
KASSERT(error == EBUSY); KASSERT(error == EBUSY);
/* /*
* this vnode is being cleaned out. * this vnode is being cleaned out.
*/ */
nchstats.ncs_falsehits++; /* XXX badhits? */ COUNT(ncs_falsehits); /* XXX badhits? */
goto fail; goto fail;
} }
@ -363,7 +449,7 @@ cache_lookup_raw(struct vnode *dvp, struct vnode **vpp,
return 0; return 0;
fail_wlock: fail_wlock:
mutex_exit(&namecache_lock); mutex_exit(cpulock);
fail: fail:
*vpp = NULL; *vpp = NULL;
return -1; return -1;
@ -394,8 +480,9 @@ cache_revlookup(struct vnode *vp, struct vnode **dvpp, char **bpp, char *bufp)
nvcpp = &ncvhashtbl[NCVHASH(vp)]; nvcpp = &ncvhashtbl[NCVHASH(vp)];
mutex_enter(&namecache_lock); mutex_enter(namecache_lock);
LIST_FOREACH(ncp, nvcpp, nc_vhash) { LIST_FOREACH(ncp, nvcpp, nc_vhash) {
mutex_enter(&ncp->nc_lock);
if (ncp->nc_vp == vp && if (ncp->nc_vp == vp &&
(dvp = ncp->nc_dvp) != NULL && (dvp = ncp->nc_dvp) != NULL &&
dvp != vp) { /* avoid pesky . entries.. */ dvp != vp) { /* avoid pesky . entries.. */
@ -410,14 +497,15 @@ cache_revlookup(struct vnode *vp, struct vnode **dvpp, char **bpp, char *bufp)
ncp->nc_name[1] == '.') ncp->nc_name[1] == '.')
panic("cache_revlookup: found entry for .."); panic("cache_revlookup: found entry for ..");
#endif #endif
nchstats.ncs_revhits++; COUNT(ncs_revhits);
if (bufp) { if (bufp) {
bp = *bpp; bp = *bpp;
bp -= ncp->nc_nlen; bp -= ncp->nc_nlen;
if (bp <= bufp) { if (bp <= bufp) {
*dvpp = NULL; *dvpp = NULL;
mutex_exit(&namecache_lock); mutex_exit(&ncp->nc_lock);
mutex_exit(namecache_lock);
return (ERANGE); return (ERANGE);
} }
memcpy(bp, ncp->nc_name, ncp->nc_nlen); memcpy(bp, ncp->nc_name, ncp->nc_nlen);
@ -426,12 +514,14 @@ cache_revlookup(struct vnode *vp, struct vnode **dvpp, char **bpp, char *bufp)
/* XXX MP: how do we know dvp won't evaporate? */ /* XXX MP: how do we know dvp won't evaporate? */
*dvpp = dvp; *dvpp = dvp;
mutex_exit(&namecache_lock); mutex_exit(&ncp->nc_lock);
mutex_exit(namecache_lock);
return (0); return (0);
} }
mutex_exit(&ncp->nc_lock);
} }
nchstats.ncs_revmiss++; COUNT(ncs_revmiss);
mutex_exit(&namecache_lock); mutex_exit(namecache_lock);
out: out:
*dvpp = NULL; *dvpp = NULL;
return (-1); return (-1);
@ -454,37 +544,34 @@ cache_enter(struct vnode *dvp, struct vnode *vp, struct componentname *cnp)
#endif #endif
if (!doingcache) if (!doingcache)
return; return;
/*
* Free the cache slot at head of lru chain.
*/
mutex_enter(&namecache_lock);
if (numcache < numvnodes) { if (numcache > desiredvnodes) {
numcache++; mutex_enter(namecache_lock);
mutex_exit(&namecache_lock); cache_ev_forced.ev_count++;
ncp = pool_cache_get(namecache_cache, PR_WAITOK); cache_reclaim();
memset(ncp, 0, sizeof(*ncp)); mutex_exit(namecache_lock);
mutex_enter(&namecache_lock);
} else if ((ncp = TAILQ_FIRST(&nclruhead)) != NULL) {
cache_remove(ncp);
} else {
mutex_exit(&namecache_lock);
return;
} }
ncp = pool_cache_get(namecache_cache, PR_WAITOK);
mutex_enter(namecache_lock);
numcache++;
/* /*
* Concurrent lookups in the same directory may race for a * Concurrent lookups in the same directory may race for a
* cache entry. if there's a duplicated entry, free it. * cache entry. if there's a duplicated entry, free it.
*/ */
oncp = cache_lookup_entry(dvp, cnp); oncp = cache_lookup_entry(dvp, cnp);
if (oncp) { if (oncp) {
cache_remove(oncp); cache_invalidate(oncp);
cache_free(oncp); mutex_exit(&oncp->nc_lock);
} }
KASSERT(cache_lookup_entry(dvp, cnp) == NULL);
/* Grab the vnode we just found. */ /* Grab the vnode we just found. */
mutex_enter(&ncp->nc_lock);
ncp->nc_vp = vp; ncp->nc_vp = vp;
ncp->nc_flags = 0;
ncp->nc_hittime = 0;
ncp->nc_gcqueue = NULL;
if (vp == NULL) { if (vp == NULL) {
/* /*
* For negative hits, save the ISWHITEOUT flag so we can * For negative hits, save the ISWHITEOUT flag so we can
@ -497,10 +584,21 @@ cache_enter(struct vnode *dvp, struct vnode *vp, struct componentname *cnp)
LIST_INSERT_HEAD(&dvp->v_dnclist, ncp, nc_dvlist); LIST_INSERT_HEAD(&dvp->v_dnclist, ncp, nc_dvlist);
if (vp) if (vp)
LIST_INSERT_HEAD(&vp->v_nclist, ncp, nc_vlist); LIST_INSERT_HEAD(&vp->v_nclist, ncp, nc_vlist);
else {
ncp->nc_vlist.le_prev = NULL;
ncp->nc_vlist.le_next = NULL;
}
ncp->nc_nlen = cnp->cn_namelen; ncp->nc_nlen = cnp->cn_namelen;
memcpy(ncp->nc_name, cnp->cn_nameptr, (unsigned)ncp->nc_nlen);
TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru); TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru);
memcpy(ncp->nc_name, cnp->cn_nameptr, (unsigned)ncp->nc_nlen);
ncpp = &nchashtbl[NCHASH(cnp, dvp)]; ncpp = &nchashtbl[NCHASH(cnp, dvp)];
/*
* Flush updates before making visible in table. No need for a
* memory barrier on the other side: to see modifications the
* list must be followed, meaning a dependent pointer load.
*/
membar_producer();
LIST_INSERT_HEAD(ncpp, ncp, nc_hash); LIST_INSERT_HEAD(ncpp, ncp, nc_hash);
ncp->nc_vhash.le_prev = NULL; ncp->nc_vhash.le_prev = NULL;
@ -521,7 +619,8 @@ cache_enter(struct vnode *dvp, struct vnode *vp, struct componentname *cnp)
nvcpp = &ncvhashtbl[NCVHASH(vp)]; nvcpp = &ncvhashtbl[NCVHASH(vp)];
LIST_INSERT_HEAD(nvcpp, ncp, nc_vhash); LIST_INSERT_HEAD(nvcpp, ncp, nc_vhash);
} }
mutex_exit(&namecache_lock); mutex_exit(&ncp->nc_lock);
mutex_exit(namecache_lock);
} }
/* /*
@ -530,12 +629,15 @@ cache_enter(struct vnode *dvp, struct vnode *vp, struct componentname *cnp)
void void
nchinit(void) nchinit(void)
{ {
int error;
namecache_cache = pool_cache_init(sizeof(struct namecache), 0, 0, 0, namecache_cache = pool_cache_init(sizeof(struct namecache),
"ncachepl", NULL, IPL_NONE, NULL, NULL, NULL); coherency_unit, 0, 0, "ncache", NULL, IPL_NONE, cache_ctor,
cache_dtor, NULL);
KASSERT(namecache_cache != NULL); KASSERT(namecache_cache != NULL);
mutex_init(&namecache_lock, MUTEX_DEFAULT, IPL_NONE); namecache_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
nchashtbl = nchashtbl =
hashinit(desiredvnodes, HASH_LIST, M_CACHE, M_WAITOK, &nchash); hashinit(desiredvnodes, HASH_LIST, M_CACHE, M_WAITOK, &nchash);
ncvhashtbl = ncvhashtbl =
@ -544,6 +646,52 @@ nchinit(void)
#else #else
hashinit(desiredvnodes/8, HASH_LIST, M_CACHE, M_WAITOK, &ncvhash); hashinit(desiredvnodes/8, HASH_LIST, M_CACHE, M_WAITOK, &ncvhash);
#endif #endif
error = kthread_create(PRI_VM, KTHREAD_MPSAFE, NULL, cache_thread,
NULL, NULL, "cachegc");
if (error != 0)
panic("nchinit %d", error);
evcnt_attach_dynamic(&cache_ev_scan, EVCNT_TYPE_MISC, NULL,
"namecache", "entries scanned");
evcnt_attach_dynamic(&cache_ev_gc, EVCNT_TYPE_MISC, NULL,
"namecache", "entries collected");
evcnt_attach_dynamic(&cache_ev_over, EVCNT_TYPE_MISC, NULL,
"namecache", "over scan target");
evcnt_attach_dynamic(&cache_ev_under, EVCNT_TYPE_MISC, NULL,
"namecache", "under scan target");
evcnt_attach_dynamic(&cache_ev_forced, EVCNT_TYPE_MISC, NULL,
"namecache", "forced reclaims");
}
static int
cache_ctor(void *arg, void *obj, int flag)
{
struct namecache *ncp;
ncp = obj;
mutex_init(&ncp->nc_lock, MUTEX_DEFAULT, IPL_NONE);
return 0;
}
static void
cache_dtor(void *arg, void *obj)
{
struct namecache *ncp;
ncp = obj;
mutex_destroy(&ncp->nc_lock);
}
/*
* Called once for each CPU in the system as attached.
*/
void
cache_cpu_init(struct cpu_info *ci)
{
ci->ci_data.cpu_cachelock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
} }
/* /*
@ -564,7 +712,8 @@ nchreinit(void)
#else #else
hashinit(desiredvnodes/8, HASH_LIST, M_CACHE, M_WAITOK, &mask2); hashinit(desiredvnodes/8, HASH_LIST, M_CACHE, M_WAITOK, &mask2);
#endif #endif
mutex_enter(&namecache_lock); mutex_enter(namecache_lock);
cache_lock_cpus();
oldhash1 = nchashtbl; oldhash1 = nchashtbl;
oldmask1 = nchash; oldmask1 = nchash;
nchashtbl = hash1; nchashtbl = hash1;
@ -585,7 +734,8 @@ nchreinit(void)
ncp->nc_vhash.le_prev = NULL; ncp->nc_vhash.le_prev = NULL;
} }
} }
mutex_exit(&namecache_lock); cache_unlock_cpus();
mutex_exit(namecache_lock);
hashdone(oldhash1, M_CACHE); hashdone(oldhash1, M_CACHE);
hashdone(oldhash2, M_CACHE); hashdone(oldhash2, M_CACHE);
} }
@ -599,31 +749,36 @@ cache_purge1(struct vnode *vp, const struct componentname *cnp, int flags)
{ {
struct namecache *ncp, *ncnext; struct namecache *ncp, *ncnext;
mutex_enter(&namecache_lock); mutex_enter(namecache_lock);
if (flags & PURGE_PARENTS) { if (flags & PURGE_PARENTS) {
for (ncp = LIST_FIRST(&vp->v_nclist); ncp != NULL; for (ncp = LIST_FIRST(&vp->v_nclist); ncp != NULL;
ncp = ncnext) { ncp = ncnext) {
ncnext = LIST_NEXT(ncp, nc_vlist); ncnext = LIST_NEXT(ncp, nc_vlist);
cache_remove(ncp); mutex_enter(&ncp->nc_lock);
cache_free(ncp); cache_invalidate(ncp);
mutex_exit(&ncp->nc_lock);
cache_disassociate(ncp);
} }
} }
if (flags & PURGE_CHILDREN) { if (flags & PURGE_CHILDREN) {
for (ncp = LIST_FIRST(&vp->v_dnclist); ncp != NULL; for (ncp = LIST_FIRST(&vp->v_dnclist); ncp != NULL;
ncp = ncnext) { ncp = ncnext) {
ncnext = LIST_NEXT(ncp, nc_dvlist); ncnext = LIST_NEXT(ncp, nc_dvlist);
cache_remove(ncp); mutex_enter(&ncp->nc_lock);
cache_free(ncp); cache_invalidate(ncp);
mutex_exit(&ncp->nc_lock);
cache_disassociate(ncp);
} }
} }
if (cnp != NULL) { if (cnp != NULL) {
ncp = cache_lookup_entry(vp, cnp); ncp = cache_lookup_entry(vp, cnp);
if (ncp) { if (ncp) {
cache_remove(ncp); cache_invalidate(ncp);
cache_free(ncp); cache_disassociate(ncp);
mutex_exit(&ncp->nc_lock);
} }
} }
mutex_exit(&namecache_lock); mutex_exit(namecache_lock);
} }
/* /*
@ -635,17 +790,136 @@ cache_purgevfs(struct mount *mp)
{ {
struct namecache *ncp, *nxtcp; struct namecache *ncp, *nxtcp;
mutex_enter(&namecache_lock); mutex_enter(namecache_lock);
for (ncp = TAILQ_FIRST(&nclruhead); ncp != NULL; ncp = nxtcp) { for (ncp = TAILQ_FIRST(&nclruhead); ncp != NULL; ncp = nxtcp) {
nxtcp = TAILQ_NEXT(ncp, nc_lru); nxtcp = TAILQ_NEXT(ncp, nc_lru);
if (ncp->nc_dvp == NULL || ncp->nc_dvp->v_mount != mp) { mutex_enter(&ncp->nc_lock);
if (ncp->nc_dvp != NULL && ncp->nc_dvp->v_mount == mp) {
/* Free the resources we had. */
cache_invalidate(ncp);
cache_disassociate(ncp);
}
mutex_exit(&ncp->nc_lock);
}
cache_reclaim();
mutex_exit(namecache_lock);
}
/*
* Scan global list invalidating entries until we meet a preset target.
* Prefer to invalidate entries that have not scored a hit within
* cache_hottime seconds. We sort the LRU list only for this routine's
* benefit.
*/
static void
cache_prune(int incache, int target)
{
struct namecache *ncp, *nxtcp, *sentinel;
int items, recent, tryharder;
KASSERT(mutex_owned(namecache_lock));
items = 0;
tryharder = 0;
recent = hardclock_ticks - hz * cache_hottime;
sentinel = NULL;
for (ncp = TAILQ_FIRST(&nclruhead); ncp != NULL; ncp = nxtcp) {
if (incache <= target)
break;
items++;
nxtcp = TAILQ_NEXT(ncp, nc_lru);
if (ncp->nc_dvp == NULL)
continue;
if (ncp == sentinel) {
/*
* If we looped back on ourself, then ignore
* recent entries and purge whatever we find.
*/
tryharder = 1;
}
if (!tryharder && ncp->nc_hittime > recent) {
if (sentinel == NULL)
sentinel = ncp;
TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru);
continue; continue;
} }
/* Free the resources we had. */ mutex_enter(&ncp->nc_lock);
cache_remove(ncp); if (ncp->nc_dvp != NULL) {
cache_free(ncp); cache_invalidate(ncp);
cache_disassociate(ncp);
incache--;
}
mutex_exit(&ncp->nc_lock);
}
cache_ev_scan.ev_count += items;
}
/*
* Collect dead cache entries from all CPUs and garbage collect.
*/
static void
cache_reclaim(void)
{
struct namecache *ncp, *next;
int items;
KASSERT(mutex_owned(namecache_lock));
/*
* If the number of extant entries not awaiting garbage collection
* exceeds the high water mark, then reclaim stale entries until we
* reach our low water mark.
*/
items = numcache - cache_gcpend;
if (items > (uint64_t)desiredvnodes * cache_hiwat / 100) {
cache_prune(items, (int)((uint64_t)desiredvnodes *
cache_lowat / 100));
cache_ev_over.ev_count++;
} else
cache_ev_under.ev_count++;
/*
* Stop forward lookup activity on all CPUs and garbage collect dead
* entries.
*/
cache_lock_cpus();
ncp = cache_gcqueue;
cache_gcqueue = NULL;
items = cache_gcpend;
cache_gcpend = 0;
while (ncp != NULL) {
next = ncp->nc_gcqueue;
cache_disassociate(ncp);
KASSERT(ncp->nc_dvp == NULL);
if (ncp->nc_hash.le_prev != NULL) {
LIST_REMOVE(ncp, nc_hash);
ncp->nc_hash.le_prev = NULL;
}
pool_cache_put(namecache_cache, ncp);
ncp = next;
}
cache_unlock_cpus();
numcache -= items;
cache_ev_gc.ev_count += items;
}
/*
* Cache maintainence thread, awakening once per second to:
*
* => keep number of entries below the high water mark
* => sort pseudo-LRU list
* => garbage collect dead entries
*/
static void
cache_thread(void *arg)
{
mutex_enter(namecache_lock);
for (;;) {
cache_reclaim();
kpause("cachegc", false, hz, namecache_lock);
} }
mutex_exit(&namecache_lock);
} }
#ifdef DDB #ifdef DDB
@ -656,7 +930,7 @@ namecache_print(struct vnode *vp, void (*pr)(const char *, ...))
struct namecache *ncp; struct namecache *ncp;
TAILQ_FOREACH(ncp, &nclruhead, nc_lru) { TAILQ_FOREACH(ncp, &nclruhead, nc_lru) {
if (ncp->nc_vp == vp) { if (ncp->nc_vp == vp && ncp->nc_dvp != NULL) {
(*pr)("name %.*s\n", ncp->nc_nlen, ncp->nc_name); (*pr)("name %.*s\n", ncp->nc_nlen, ncp->nc_name);
dvp = ncp->nc_dvp; dvp = ncp->nc_dvp;
} }

View File

@ -1,4 +1,4 @@
/* $NetBSD: cpu_data.h,v 1.19 2008/03/22 18:04:42 ad Exp $ */ /* $NetBSD: cpu_data.h,v 1.20 2008/04/11 15:25:24 ad Exp $ */
/*- /*-
* Copyright (c) 2004, 2006, 2007, 2008 The NetBSD Foundation, Inc. * Copyright (c) 2004, 2006, 2007, 2008 The NetBSD Foundation, Inc.
@ -92,6 +92,7 @@ struct cpu_data {
TAILQ_HEAD(,buf) cpu_biodone; /* finished block xfers */ TAILQ_HEAD(,buf) cpu_biodone; /* finished block xfers */
percpu_cpu_t cpu_percpu; /* per-cpu data */ percpu_cpu_t cpu_percpu; /* per-cpu data */
struct selcpu *cpu_selcpu; /* per-CPU select() info */ struct selcpu *cpu_selcpu; /* per-CPU select() info */
void *cpu_cachelock; /* per-cpu vfs_cache lock */
}; };
/* compat definitions */ /* compat definitions */

View File

@ -1,4 +1,4 @@
/* $NetBSD: namei.src,v 1.6 2007/12/08 19:29:52 pooka Exp $ */ /* $NetBSD: namei.src,v 1.7 2008/04/11 15:25:24 ad Exp $ */
/* /*
* Copyright (c) 1985, 1989, 1991, 1993 * Copyright (c) 1985, 1989, 1991, 1993
@ -35,6 +35,9 @@
#define _SYS_NAMEI_H_ #define _SYS_NAMEI_H_
#include <sys/queue.h> #include <sys/queue.h>
#include <sys/mutex.h>
#include <sys/kauth.h>
#ifdef _KERNEL #ifdef _KERNEL
/* /*
* Encapsulation of namei parameters. * Encapsulation of namei parameters.
@ -132,11 +135,10 @@ NAMEIFL DOWHITEOUT 0x0040000 /* do whiteouts */
NAMEIFL REQUIREDIR 0x0080000 /* must be a directory */ NAMEIFL REQUIREDIR 0x0080000 /* must be a directory */
NAMEIFL CREATEDIR 0x0200000 /* trailing slashes are ok */ NAMEIFL CREATEDIR 0x0200000 /* trailing slashes are ok */
NAMEIFL PARAMASK 0x02fff00 /* mask of parameter descriptors */ NAMEIFL PARAMASK 0x02fff00 /* mask of parameter descriptors */
/* /*
* Initialization of an nameidata structure. * Initialization of an nameidata structure.
*/ */
#include <sys/kauth.h>
#define NDINIT(ndp, op, flags, segflg, namep) { \ #define NDINIT(ndp, op, flags, segflg, namep) { \
(ndp)->ni_cnd.cn_nameiop = op; \ (ndp)->ni_cnd.cn_nameiop = op; \
(ndp)->ni_cnd.cn_flags = flags; \ (ndp)->ni_cnd.cn_flags = flags; \
@ -155,17 +157,26 @@ NAMEIFL PARAMASK 0x02fff00 /* mask of parameter descriptors */
#define NCHNAMLEN 31 /* maximum name segment length we bother with */ #define NCHNAMLEN 31 /* maximum name segment length we bother with */
/*
* Namecache entry. This structure is arranged so that frequently
* accessed and mostly read-only data is toward the front, with
* infrequently accessed data and the lock towards the rear. The
* lock is then more likely to be in a seperate cache line.
*/
struct namecache { struct namecache {
LIST_ENTRY(namecache) nc_hash; /* hash chain */ LIST_ENTRY(namecache) nc_hash; /* hash chain */
TAILQ_ENTRY(namecache) nc_lru; /* LRU chain */
LIST_ENTRY(namecache) nc_vhash; /* directory hash chain */ LIST_ENTRY(namecache) nc_vhash; /* directory hash chain */
LIST_ENTRY(namecache) nc_dvlist;
struct vnode *nc_dvp; /* vnode of parent of name */ struct vnode *nc_dvp; /* vnode of parent of name */
LIST_ENTRY(namecache) nc_vlist;
struct vnode *nc_vp; /* vnode the name refers to */ struct vnode *nc_vp; /* vnode the name refers to */
int nc_flags; /* copy of componentname's ISWHITEOUT */ int nc_flags; /* copy of componentname's ISWHITEOUT */
char nc_nlen; /* length of name */ char nc_nlen; /* length of name */
char nc_name[NCHNAMLEN]; /* segment name */ char nc_name[NCHNAMLEN]; /* segment name */
void *nc_gcqueue; /* queue for garbage collection */
TAILQ_ENTRY(namecache) nc_lru; /* psuedo-lru chain */
LIST_ENTRY(namecache) nc_dvlist;
LIST_ENTRY(namecache) nc_vlist;
kmutex_t nc_lock; /* lock on this entry */
int nc_hittime; /* last time scored a hit */
}; };
#ifdef _KERNEL #ifdef _KERNEL
@ -173,6 +184,7 @@ struct namecache {
#include <sys/pool.h> #include <sys/pool.h>
struct mount; struct mount;
struct cpu_info;
extern pool_cache_t pnbuf_cache; /* pathname buffer cache */ extern pool_cache_t pnbuf_cache; /* pathname buffer cache */
@ -194,6 +206,7 @@ int cache_revlookup(struct vnode *, struct vnode **, char **, char *);
void cache_enter(struct vnode *, struct vnode *, struct componentname *); void cache_enter(struct vnode *, struct vnode *, struct componentname *);
void nchinit(void); void nchinit(void);
void nchreinit(void); void nchreinit(void);
void cache_cpu_init(struct cpu_info *);
void cache_purgevfs(struct mount *); void cache_purgevfs(struct mount *);
void namecache_print(struct vnode *, void (*)(const char *, ...)); void namecache_print(struct vnode *, void (*)(const char *, ...));