namecache changes:
- Delete the per-entry lock, and borrow the associated vnode's v_interlock instead. We need to acquire it during lookup anyway. We can revisit this in the future but for now it's a stepping stone, and works within the quite limited context of what we have (BSD namecache/lookup design). - Implement an idea that Mateusz Guzik (mjg@FreeBSD.org) gave me. In cache_reclaim(), we don't need to lock out all of the CPUs to garbage collect entries. All we need to do is observe their locks unheld at least once: then we know they are not in the critical section, and no longer have visibility of the entries about to be garbage collected. - The above makes it safe for sysctl to take only namecache_lock to get stats, and we can remove all the crap dealing with per-CPU locks. - For lockstat, make namecache_lock a static now we have __cacheline_aligned. - Avoid false sharing - don't write back to nc_hittime unless it has changed. Put a a comment in place explaining this. Pretty sure this was there in 2008/2009 but someone removed it (understandably, the code looks weird). - Use a mutex to protect the garbage collection queue instead of atomics, and adjust the low water mark up so that cache_reclaim() isn't doing so much work at once.
This commit is contained in:
parent
036b61e0aa
commit
566436656a
|
@ -1,7 +1,7 @@
|
|||
/* $NetBSD: vfs_cache.c,v 1.123 2019/09/15 17:37:25 maya Exp $ */
|
||||
/* $NetBSD: vfs_cache.c,v 1.124 2019/12/01 13:39:53 ad Exp $ */
|
||||
|
||||
/*-
|
||||
* Copyright (c) 2008 The NetBSD Foundation, Inc.
|
||||
* Copyright (c) 2008, 2019 The NetBSD Foundation, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -58,7 +58,7 @@
|
|||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__KERNEL_RCSID(0, "$NetBSD: vfs_cache.c,v 1.123 2019/09/15 17:37:25 maya Exp $");
|
||||
__KERNEL_RCSID(0, "$NetBSD: vfs_cache.c,v 1.124 2019/12/01 13:39:53 ad Exp $");
|
||||
|
||||
#define __NAMECACHE_PRIVATE
|
||||
#ifdef _KERNEL_OPT
|
||||
|
@ -130,7 +130,7 @@ __KERNEL_RCSID(0, "$NetBSD: vfs_cache.c,v 1.123 2019/09/15 17:37:25 maya Exp $")
|
|||
* - Invalidate: active--->queued
|
||||
*
|
||||
* Done by cache_invalidate. If not already invalidated, nullify
|
||||
* ncp->nc_dvp and ncp->nc_vp, and add to cache_gcqueue. Called,
|
||||
* ncp->nc_dvp and ncp->nc_vp, and add to namecache_gc_queue. Called,
|
||||
* among various other places, in cache_lookup(dvp, name, namelen,
|
||||
* nameiop, cnflags, &iswht, &vp) when MAKEENTRY is missing from
|
||||
* cnflags.
|
||||
|
@ -145,16 +145,17 @@ __KERNEL_RCSID(0, "$NetBSD: vfs_cache.c,v 1.123 2019/09/15 17:37:25 maya Exp $")
|
|||
* Locking.
|
||||
*
|
||||
* L namecache_lock Global lock for namecache table and queues.
|
||||
* G namecache_gc_lock Global lock for garbage collection.
|
||||
* C struct nchcpu::cpu_lock Per-CPU lock to reduce read contention.
|
||||
* N struct namecache::nc_lock Per-entry lock.
|
||||
* V struct vnode::v_interlock Vnode interlock.
|
||||
* N struct namecache::nc_lock Per-entry lock, matching nc_vp->v_interlock.
|
||||
* If nc_vp==NULL, lock is private / not shared.
|
||||
*
|
||||
* Lock order: L -> C -> N -> V
|
||||
* Lock order: L -> C -> N
|
||||
*
|
||||
* Examples:
|
||||
* . L->C: cache_reclaim
|
||||
* . C->N->V: cache_lookup
|
||||
* . L->N->V: cache_purge1, cache_revlookup
|
||||
* . C->N: cache_lookup
|
||||
* . L->N: cache_purge1, cache_revlookup
|
||||
*
|
||||
* All use serialized by namecache_lock:
|
||||
*
|
||||
|
@ -167,8 +168,9 @@ __KERNEL_RCSID(0, "$NetBSD: vfs_cache.c,v 1.123 2019/09/15 17:37:25 maya Exp $")
|
|||
* - Insertion serialized by namecache_lock,
|
||||
* - read protected by per-CPU lock,
|
||||
* - insert/read ordering guaranteed by memory barriers, and
|
||||
* - deletion allowed only under namecache_lock and *all* per-CPU locks
|
||||
* in CPU_INFO_FOREACH order:
|
||||
* - deletion allowed only under namecache_lock, with namecache_gc_lock
|
||||
* taken to chop out the garbage collection list, and *all* per-CPU locks
|
||||
* observed as "unowned" at least once:
|
||||
*
|
||||
* nchashtbl / struct namecache::nc_hash
|
||||
*
|
||||
|
@ -180,11 +182,13 @@ __KERNEL_RCSID(0, "$NetBSD: vfs_cache.c,v 1.123 2019/09/15 17:37:25 maya Exp $")
|
|||
*
|
||||
* struct namecache::nc_dvp
|
||||
* struct namecache::nc_vp
|
||||
* struct namecache::nc_gcqueue (*)
|
||||
* struct namecache::nc_hittime (**)
|
||||
* struct namecache::nc_hittime (*)
|
||||
*
|
||||
* (*) Once on the queue, only cache_thread uses this nc_gcqueue, unlocked.
|
||||
* (**) cache_prune reads nc_hittime unlocked, since approximate is OK.
|
||||
* All use serialized by struct namecache_gc_lock:
|
||||
*
|
||||
* struct namecache::nc_gclist
|
||||
*
|
||||
* (*) cache_prune reads nc_hittime unlocked, since approximate is OK.
|
||||
*
|
||||
* Unlocked because stable after initialization:
|
||||
*
|
||||
|
@ -257,7 +261,7 @@ typedef u_long nchash_t;
|
|||
* Structures associated with name cacheing.
|
||||
*/
|
||||
|
||||
static kmutex_t *namecache_lock __read_mostly;
|
||||
static kmutex_t namecache_lock __cacheline_aligned;
|
||||
static pool_cache_t namecache_cache __read_mostly;
|
||||
static TAILQ_HEAD(, namecache) nclruhead __cacheline_aligned;
|
||||
|
||||
|
@ -276,8 +280,9 @@ static u_long ncvhash __read_mostly;
|
|||
static long numcache __cacheline_aligned;
|
||||
|
||||
/* Garbage collection queue and number of entries pending in it. */
|
||||
static void *cache_gcqueue;
|
||||
static u_int cache_gcpend;
|
||||
static kmutex_t namecache_gc_lock __cacheline_aligned;
|
||||
static SLIST_HEAD(namecache_gc_queue, namecache) namecache_gc_queue;
|
||||
static u_int namecache_gc_pend;
|
||||
|
||||
/* Cache effectiveness statistics. This holds total from per-cpu stats */
|
||||
struct nchstats nchstats __cacheline_aligned;
|
||||
|
@ -287,8 +292,6 @@ struct nchstats nchstats __cacheline_aligned;
|
|||
* values and add current per-cpu increments to the subsystem total
|
||||
* last collected by cache_reclaim().
|
||||
*/
|
||||
#define CACHE_STATS_CURRENT /* nothing */
|
||||
|
||||
#define COUNT(cpup, f) ((cpup)->cpu_stats.f++)
|
||||
|
||||
#define UPDATE(cpup, f) do { \
|
||||
|
@ -298,15 +301,10 @@ struct nchstats nchstats __cacheline_aligned;
|
|||
Xcpup->cpu_stats_last.f = Xcnt; \
|
||||
} while (/* CONSTCOND */ 0)
|
||||
|
||||
#define ADD(stats, cpup, f) do { \
|
||||
struct nchcpu *Xcpup = (cpup); \
|
||||
stats.f += Xcpup->cpu_stats.f - Xcpup->cpu_stats_last.f; \
|
||||
} while (/* CONSTCOND */ 0)
|
||||
|
||||
/* Do unlocked stats the same way. Use a different name to allow mind changes */
|
||||
#define COUNT_UNL(cpup, f) COUNT((cpup), f)
|
||||
|
||||
static const int cache_lowat = 95;
|
||||
static const int cache_lowat = 97;
|
||||
static const int cache_hiwat = 98;
|
||||
static const int cache_hottime = 5; /* number of seconds */
|
||||
static int doingcache = 1; /* 1 => enable the cache */
|
||||
|
@ -369,15 +367,12 @@ cache_hash(const char *name, size_t namelen)
|
|||
|
||||
/*
|
||||
* Invalidate a cache entry and enqueue it for garbage collection.
|
||||
* The caller needs to hold namecache_lock or a per-cpu lock to hold
|
||||
* off cache_reclaim().
|
||||
*/
|
||||
static void
|
||||
cache_invalidate(struct namecache *ncp)
|
||||
{
|
||||
void *head;
|
||||
|
||||
KASSERT(mutex_owned(&ncp->nc_lock));
|
||||
KASSERT(mutex_owned(ncp->nc_lock));
|
||||
|
||||
if (ncp->nc_dvp != NULL) {
|
||||
SDT_PROBE(vfs, namecache, invalidate, done, ncp->nc_dvp,
|
||||
|
@ -385,11 +380,10 @@ cache_invalidate(struct namecache *ncp)
|
|||
|
||||
ncp->nc_vp = NULL;
|
||||
ncp->nc_dvp = NULL;
|
||||
do {
|
||||
head = cache_gcqueue;
|
||||
ncp->nc_gcqueue = head;
|
||||
} while (atomic_cas_ptr(&cache_gcqueue, head, ncp) != head);
|
||||
atomic_inc_uint(&cache_gcpend);
|
||||
mutex_enter(&namecache_gc_lock);
|
||||
SLIST_INSERT_HEAD(&namecache_gc_queue, ncp, nc_gclist);
|
||||
namecache_gc_pend++;
|
||||
mutex_exit(&namecache_gc_lock);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -401,7 +395,7 @@ static void
|
|||
cache_disassociate(struct namecache *ncp)
|
||||
{
|
||||
|
||||
KASSERT(mutex_owned(namecache_lock));
|
||||
KASSERT(mutex_owned(&namecache_lock));
|
||||
KASSERT(ncp->nc_dvp == NULL);
|
||||
|
||||
if (ncp->nc_lru.tqe_prev != NULL) {
|
||||
|
@ -424,7 +418,8 @@ cache_disassociate(struct namecache *ncp)
|
|||
|
||||
/*
|
||||
* Lock all CPUs to prevent any cache lookup activity. Conceptually,
|
||||
* this locks out all "readers".
|
||||
* this locks out all "readers". This is a very heavyweight operation
|
||||
* that we only use for nchreinit().
|
||||
*/
|
||||
static void
|
||||
cache_lock_cpus(void)
|
||||
|
@ -433,6 +428,9 @@ cache_lock_cpus(void)
|
|||
struct cpu_info *ci;
|
||||
struct nchcpu *cpup;
|
||||
|
||||
/* Not necessary but don't want more than one LWP trying this. */
|
||||
KASSERT(mutex_owned(&namecache_lock));
|
||||
|
||||
/*
|
||||
* Lock out all CPUs first, then harvest per-cpu stats. This
|
||||
* is probably not quite as cache-efficient as doing the lock
|
||||
|
@ -485,6 +483,7 @@ cache_lookup_entry(const struct vnode *dvp, const char *name, size_t namelen)
|
|||
struct nchashhead *ncpp;
|
||||
struct namecache *ncp;
|
||||
nchash_t hash;
|
||||
int ticks;
|
||||
|
||||
KASSERT(dvp != NULL);
|
||||
hash = cache_hash(name, namelen);
|
||||
|
@ -496,15 +495,22 @@ cache_lookup_entry(const struct vnode *dvp, const char *name, size_t namelen)
|
|||
ncp->nc_nlen != namelen ||
|
||||
memcmp(ncp->nc_name, name, (u_int)ncp->nc_nlen))
|
||||
continue;
|
||||
mutex_enter(&ncp->nc_lock);
|
||||
mutex_enter(ncp->nc_lock);
|
||||
if (__predict_true(ncp->nc_dvp == dvp)) {
|
||||
ncp->nc_hittime = hardclock_ticks;
|
||||
ticks = hardclock_ticks;
|
||||
if (ncp->nc_hittime != ticks) {
|
||||
/*
|
||||
* Avoid false sharing on MP: do not store
|
||||
* to *ncp unless the value changed.
|
||||
*/
|
||||
ncp->nc_hittime = ticks;
|
||||
}
|
||||
SDT_PROBE(vfs, namecache, lookup, hit, dvp,
|
||||
name, namelen, 0, 0);
|
||||
return ncp;
|
||||
}
|
||||
/* Raced: entry has been nullified. */
|
||||
mutex_exit(&ncp->nc_lock);
|
||||
mutex_exit(ncp->nc_lock);
|
||||
}
|
||||
|
||||
SDT_PROBE(vfs, namecache, lookup, miss, dvp,
|
||||
|
@ -573,7 +579,6 @@ cache_lookup(struct vnode *dvp, const char *name, size_t namelen,
|
|||
int error;
|
||||
bool hit;
|
||||
|
||||
|
||||
/* Establish default result values */
|
||||
if (iswht_ret != NULL) {
|
||||
*iswht_ret = 0;
|
||||
|
@ -610,12 +615,13 @@ cache_lookup(struct vnode *dvp, const char *name, size_t namelen,
|
|||
* want cache entry to exist.
|
||||
*/
|
||||
cache_invalidate(ncp);
|
||||
mutex_exit(&ncp->nc_lock);
|
||||
mutex_exit(ncp->nc_lock);
|
||||
mutex_exit(&cpup->cpu_lock);
|
||||
/* found nothing */
|
||||
return false;
|
||||
}
|
||||
if (ncp->nc_vp == NULL) {
|
||||
vp = ncp->nc_vp;
|
||||
if (__predict_false(vp == NULL)) {
|
||||
if (iswht_ret != NULL) {
|
||||
/*
|
||||
* Restore the ISWHITEOUT flag saved earlier.
|
||||
|
@ -642,14 +648,11 @@ cache_lookup(struct vnode *dvp, const char *name, size_t namelen,
|
|||
/* found nothing */
|
||||
hit = false;
|
||||
}
|
||||
mutex_exit(&ncp->nc_lock);
|
||||
mutex_exit(ncp->nc_lock);
|
||||
mutex_exit(&cpup->cpu_lock);
|
||||
return hit;
|
||||
}
|
||||
|
||||
vp = ncp->nc_vp;
|
||||
mutex_enter(vp->v_interlock);
|
||||
mutex_exit(&ncp->nc_lock);
|
||||
KASSERT(vp->v_interlock == ncp->nc_lock);
|
||||
mutex_exit(&cpup->cpu_lock);
|
||||
|
||||
/*
|
||||
|
@ -724,13 +727,12 @@ cache_lookup_raw(struct vnode *dvp, const char *name, size_t namelen,
|
|||
*iswht_ret = (ncp->nc_flags & ISWHITEOUT) != 0;
|
||||
}
|
||||
COUNT(cpup, ncs_neghits);
|
||||
mutex_exit(&ncp->nc_lock);
|
||||
mutex_exit(ncp->nc_lock);
|
||||
mutex_exit(&cpup->cpu_lock);
|
||||
/* found negative entry; vn is already null from above */
|
||||
return true;
|
||||
}
|
||||
mutex_enter(vp->v_interlock);
|
||||
mutex_exit(&ncp->nc_lock);
|
||||
KASSERT(vp->v_interlock == ncp->nc_lock);
|
||||
mutex_exit(&cpup->cpu_lock);
|
||||
|
||||
/*
|
||||
|
@ -775,6 +777,7 @@ cache_revlookup(struct vnode *vp, struct vnode **dvpp, char **bpp, char *bufp)
|
|||
struct nchcpu *cpup;
|
||||
char *bp;
|
||||
int error, nlen;
|
||||
bool locked, again;
|
||||
|
||||
if (!doingcache)
|
||||
goto out;
|
||||
|
@ -787,10 +790,12 @@ cache_revlookup(struct vnode *vp, struct vnode **dvpp, char **bpp, char *bufp)
|
|||
* is the only place these counters are incremented so no one
|
||||
* will be racing with us to increment them.
|
||||
*/
|
||||
again = false;
|
||||
retry:
|
||||
cpup = curcpu()->ci_data.cpu_nch;
|
||||
mutex_enter(namecache_lock);
|
||||
mutex_enter(&namecache_lock);
|
||||
LIST_FOREACH(ncp, nvcpp, nc_vhash) {
|
||||
mutex_enter(&ncp->nc_lock);
|
||||
mutex_enter(ncp->nc_lock);
|
||||
if (ncp->nc_vp == vp &&
|
||||
(dvp = ncp->nc_dvp) != NULL &&
|
||||
dvp != vp) { /* avoid pesky . entries.. */
|
||||
|
@ -813,8 +818,8 @@ cache_revlookup(struct vnode *vp, struct vnode **dvpp, char **bpp, char *bufp)
|
|||
bp -= nlen;
|
||||
if (bp <= bufp) {
|
||||
*dvpp = NULL;
|
||||
mutex_exit(&ncp->nc_lock);
|
||||
mutex_exit(namecache_lock);
|
||||
mutex_exit(ncp->nc_lock);
|
||||
mutex_exit(&namecache_lock);
|
||||
SDT_PROBE(vfs, namecache, revlookup,
|
||||
fail, vp, ERANGE, 0, 0, 0);
|
||||
return (ERANGE);
|
||||
|
@ -823,9 +828,18 @@ cache_revlookup(struct vnode *vp, struct vnode **dvpp, char **bpp, char *bufp)
|
|||
*bpp = bp;
|
||||
}
|
||||
|
||||
mutex_enter(dvp->v_interlock);
|
||||
mutex_exit(&ncp->nc_lock);
|
||||
mutex_exit(namecache_lock);
|
||||
|
||||
KASSERT(ncp->nc_lock != dvp->v_interlock);
|
||||
locked = mutex_tryenter(dvp->v_interlock);
|
||||
mutex_exit(ncp->nc_lock);
|
||||
mutex_exit(&namecache_lock);
|
||||
if (!locked) {
|
||||
if (again) {
|
||||
kpause("nchrace", false, 1, NULL);
|
||||
}
|
||||
again = true;
|
||||
goto retry;
|
||||
}
|
||||
error = vcache_tryvget(dvp);
|
||||
if (error) {
|
||||
KASSERT(error == EBUSY);
|
||||
|
@ -841,10 +855,10 @@ cache_revlookup(struct vnode *vp, struct vnode **dvpp, char **bpp, char *bufp)
|
|||
0, 0, 0);
|
||||
return (0);
|
||||
}
|
||||
mutex_exit(&ncp->nc_lock);
|
||||
mutex_exit(ncp->nc_lock);
|
||||
}
|
||||
COUNT(cpup, ncs_revmiss);
|
||||
mutex_exit(namecache_lock);
|
||||
mutex_exit(&namecache_lock);
|
||||
out:
|
||||
*dvpp = NULL;
|
||||
return (-1);
|
||||
|
@ -873,10 +887,10 @@ cache_enter(struct vnode *dvp, struct vnode *vp,
|
|||
|
||||
SDT_PROBE(vfs, namecache, enter, done, vp, name, namelen, 0, 0);
|
||||
if (numcache > desiredvnodes) {
|
||||
mutex_enter(namecache_lock);
|
||||
mutex_enter(&namecache_lock);
|
||||
cache_ev_forced.ev_count++;
|
||||
cache_reclaim();
|
||||
mutex_exit(namecache_lock);
|
||||
mutex_exit(&namecache_lock);
|
||||
}
|
||||
|
||||
if (namelen > NCHNAMLEN) {
|
||||
|
@ -885,7 +899,7 @@ cache_enter(struct vnode *dvp, struct vnode *vp,
|
|||
} else
|
||||
ncp = pool_cache_get(namecache_cache, PR_WAITOK);
|
||||
|
||||
mutex_enter(namecache_lock);
|
||||
mutex_enter(&namecache_lock);
|
||||
numcache++;
|
||||
|
||||
/*
|
||||
|
@ -895,24 +909,27 @@ cache_enter(struct vnode *dvp, struct vnode *vp,
|
|||
oncp = cache_lookup_entry(dvp, name, namelen);
|
||||
if (oncp) {
|
||||
cache_invalidate(oncp);
|
||||
mutex_exit(&oncp->nc_lock);
|
||||
mutex_exit(oncp->nc_lock);
|
||||
}
|
||||
|
||||
/* Grab the vnode we just found. */
|
||||
mutex_enter(&ncp->nc_lock);
|
||||
ncp->nc_vp = vp;
|
||||
ncp->nc_flags = 0;
|
||||
ncp->nc_hittime = 0;
|
||||
ncp->nc_gcqueue = NULL;
|
||||
if (vp == NULL) {
|
||||
/*
|
||||
* For negative hits, save the ISWHITEOUT flag so we can
|
||||
* restore it later when the cache entry is used again.
|
||||
*/
|
||||
ncp->nc_flags = cnflags & ISWHITEOUT;
|
||||
ncp->nc_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
|
||||
} else {
|
||||
ncp->nc_lock = vp->v_interlock;
|
||||
mutex_obj_hold(ncp->nc_lock);
|
||||
}
|
||||
|
||||
/* Fill in cache info. */
|
||||
mutex_enter(ncp->nc_lock);
|
||||
ncp->nc_dvp = dvp;
|
||||
LIST_INSERT_HEAD(&VNODE_TO_VIMPL(dvp)->vi_dnclist, ncp, nc_dvlist);
|
||||
if (vp)
|
||||
|
@ -959,8 +976,8 @@ cache_enter(struct vnode *dvp, struct vnode *vp,
|
|||
nvcpp = &ncvhashtbl[NCVHASH(vp)];
|
||||
LIST_INSERT_HEAD(nvcpp, ncp, nc_vhash);
|
||||
}
|
||||
mutex_exit(&ncp->nc_lock);
|
||||
mutex_exit(namecache_lock);
|
||||
mutex_exit(ncp->nc_lock);
|
||||
mutex_exit(&namecache_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -977,8 +994,8 @@ nchinit(void)
|
|||
cache_dtor, NULL);
|
||||
KASSERT(namecache_cache != NULL);
|
||||
|
||||
namecache_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
|
||||
|
||||
mutex_init(&namecache_lock, MUTEX_DEFAULT, IPL_NONE);
|
||||
mutex_init(&namecache_gc_lock, MUTEX_DEFAULT, IPL_NONE);
|
||||
nchashtbl = hashinit(desiredvnodes, HASH_LIST, true, &nchash);
|
||||
ncvhashtbl =
|
||||
#ifdef NAMECACHE_ENTER_REVERSE
|
||||
|
@ -1009,10 +1026,6 @@ nchinit(void)
|
|||
static int
|
||||
cache_ctor(void *arg, void *obj, int flag)
|
||||
{
|
||||
struct namecache *ncp;
|
||||
|
||||
ncp = obj;
|
||||
mutex_init(&ncp->nc_lock, MUTEX_DEFAULT, IPL_NONE);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -1020,10 +1033,7 @@ cache_ctor(void *arg, void *obj, int flag)
|
|||
static void
|
||||
cache_dtor(void *arg, void *obj)
|
||||
{
|
||||
struct namecache *ncp;
|
||||
|
||||
ncp = obj;
|
||||
mutex_destroy(&ncp->nc_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1060,7 +1070,7 @@ nchreinit(void)
|
|||
#else
|
||||
hashinit(desiredvnodes/8, HASH_LIST, true, &mask2);
|
||||
#endif
|
||||
mutex_enter(namecache_lock);
|
||||
mutex_enter(&namecache_lock);
|
||||
cache_lock_cpus();
|
||||
oldhash1 = nchashtbl;
|
||||
oldmask1 = nchash;
|
||||
|
@ -1083,7 +1093,7 @@ nchreinit(void)
|
|||
}
|
||||
}
|
||||
cache_unlock_cpus();
|
||||
mutex_exit(namecache_lock);
|
||||
mutex_exit(&namecache_lock);
|
||||
hashdone(oldhash1, HASH_LIST, oldmask1);
|
||||
hashdone(oldhash2, HASH_LIST, oldmask2);
|
||||
}
|
||||
|
@ -1097,16 +1107,16 @@ cache_purge1(struct vnode *vp, const char *name, size_t namelen, int flags)
|
|||
{
|
||||
struct namecache *ncp, *ncnext;
|
||||
|
||||
mutex_enter(namecache_lock);
|
||||
mutex_enter(&namecache_lock);
|
||||
if (flags & PURGE_PARENTS) {
|
||||
SDT_PROBE(vfs, namecache, purge, parents, vp, 0, 0, 0, 0);
|
||||
|
||||
for (ncp = LIST_FIRST(&VNODE_TO_VIMPL(vp)->vi_nclist);
|
||||
ncp != NULL; ncp = ncnext) {
|
||||
ncnext = LIST_NEXT(ncp, nc_vlist);
|
||||
mutex_enter(&ncp->nc_lock);
|
||||
mutex_enter(ncp->nc_lock);
|
||||
cache_invalidate(ncp);
|
||||
mutex_exit(&ncp->nc_lock);
|
||||
mutex_exit(ncp->nc_lock);
|
||||
cache_disassociate(ncp);
|
||||
}
|
||||
}
|
||||
|
@ -1115,9 +1125,9 @@ cache_purge1(struct vnode *vp, const char *name, size_t namelen, int flags)
|
|||
for (ncp = LIST_FIRST(&VNODE_TO_VIMPL(vp)->vi_dnclist);
|
||||
ncp != NULL; ncp = ncnext) {
|
||||
ncnext = LIST_NEXT(ncp, nc_dvlist);
|
||||
mutex_enter(&ncp->nc_lock);
|
||||
mutex_enter(ncp->nc_lock);
|
||||
cache_invalidate(ncp);
|
||||
mutex_exit(&ncp->nc_lock);
|
||||
mutex_exit(ncp->nc_lock);
|
||||
cache_disassociate(ncp);
|
||||
}
|
||||
}
|
||||
|
@ -1126,11 +1136,11 @@ cache_purge1(struct vnode *vp, const char *name, size_t namelen, int flags)
|
|||
ncp = cache_lookup_entry(vp, name, namelen);
|
||||
if (ncp) {
|
||||
cache_invalidate(ncp);
|
||||
mutex_exit(&ncp->nc_lock);
|
||||
mutex_exit(ncp->nc_lock);
|
||||
cache_disassociate(ncp);
|
||||
}
|
||||
}
|
||||
mutex_exit(namecache_lock);
|
||||
mutex_exit(&namecache_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1143,19 +1153,19 @@ cache_purgevfs(struct mount *mp)
|
|||
struct namecache *ncp, *nxtcp;
|
||||
|
||||
SDT_PROBE(vfs, namecache, purge, vfs, mp, 0, 0, 0, 0);
|
||||
mutex_enter(namecache_lock);
|
||||
mutex_enter(&namecache_lock);
|
||||
for (ncp = TAILQ_FIRST(&nclruhead); ncp != NULL; ncp = nxtcp) {
|
||||
nxtcp = TAILQ_NEXT(ncp, nc_lru);
|
||||
mutex_enter(&ncp->nc_lock);
|
||||
mutex_enter(ncp->nc_lock);
|
||||
if (ncp->nc_dvp != NULL && ncp->nc_dvp->v_mount == mp) {
|
||||
/* Free the resources we had. */
|
||||
cache_invalidate(ncp);
|
||||
cache_disassociate(ncp);
|
||||
}
|
||||
mutex_exit(&ncp->nc_lock);
|
||||
mutex_exit(ncp->nc_lock);
|
||||
}
|
||||
cache_reclaim();
|
||||
mutex_exit(namecache_lock);
|
||||
mutex_exit(&namecache_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1170,7 +1180,7 @@ cache_prune(int incache, int target)
|
|||
struct namecache *ncp, *nxtcp, *sentinel;
|
||||
int items, recent, tryharder;
|
||||
|
||||
KASSERT(mutex_owned(namecache_lock));
|
||||
KASSERT(mutex_owned(&namecache_lock));
|
||||
|
||||
SDT_PROBE(vfs, namecache, prune, done, incache, target, 0, 0, 0);
|
||||
items = 0;
|
||||
|
@ -1198,13 +1208,13 @@ cache_prune(int incache, int target)
|
|||
TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru);
|
||||
continue;
|
||||
}
|
||||
mutex_enter(&ncp->nc_lock);
|
||||
mutex_enter(ncp->nc_lock);
|
||||
if (ncp->nc_dvp != NULL) {
|
||||
cache_invalidate(ncp);
|
||||
cache_disassociate(ncp);
|
||||
incache--;
|
||||
}
|
||||
mutex_exit(&ncp->nc_lock);
|
||||
mutex_exit(ncp->nc_lock);
|
||||
}
|
||||
cache_ev_scan.ev_count += items;
|
||||
}
|
||||
|
@ -1215,17 +1225,21 @@ cache_prune(int incache, int target)
|
|||
static void
|
||||
cache_reclaim(void)
|
||||
{
|
||||
CPU_INFO_ITERATOR cii;
|
||||
struct cpu_info *ci;
|
||||
struct nchcpu *cpup;
|
||||
struct namecache_gc_queue queue;
|
||||
struct namecache *ncp, *next;
|
||||
int items;
|
||||
|
||||
KASSERT(mutex_owned(namecache_lock));
|
||||
KASSERT(mutex_owned(&namecache_lock));
|
||||
|
||||
/*
|
||||
* If the number of extant entries not awaiting garbage collection
|
||||
* exceeds the high water mark, then reclaim stale entries until we
|
||||
* reach our low water mark.
|
||||
*/
|
||||
items = numcache - cache_gcpend;
|
||||
items = numcache - namecache_gc_pend;
|
||||
if (items > (uint64_t)desiredvnodes * cache_hiwat / 100) {
|
||||
cache_prune(items, (int)((uint64_t)desiredvnodes *
|
||||
cache_lowat / 100));
|
||||
|
@ -1233,31 +1247,73 @@ cache_reclaim(void)
|
|||
} else
|
||||
cache_ev_under.ev_count++;
|
||||
|
||||
/* Chop the existing garbage collection list out. */
|
||||
mutex_enter(&namecache_gc_lock);
|
||||
queue = namecache_gc_queue;
|
||||
items = namecache_gc_pend;
|
||||
SLIST_INIT(&namecache_gc_queue);
|
||||
namecache_gc_pend = 0;
|
||||
mutex_exit(&namecache_gc_lock);
|
||||
|
||||
/*
|
||||
* Stop forward lookup activity on all CPUs and garbage collect dead
|
||||
* entries.
|
||||
* Now disassociate all entries. We haven't locked out the reader
|
||||
* side (cache_lookup_entry()) but the "next" pointers in the hash
|
||||
* list will remain sufficiently consitent regardless of which
|
||||
* version of the list the reader sees (see defs of LIST_FOREACH,
|
||||
* LIST_NEXT);
|
||||
*/
|
||||
cache_lock_cpus();
|
||||
ncp = cache_gcqueue;
|
||||
cache_gcqueue = NULL;
|
||||
items = cache_gcpend;
|
||||
cache_gcpend = 0;
|
||||
while (ncp != NULL) {
|
||||
next = ncp->nc_gcqueue;
|
||||
SLIST_FOREACH(ncp, &queue, nc_gclist) {
|
||||
cache_disassociate(ncp);
|
||||
KASSERT(ncp->nc_dvp == NULL);
|
||||
if (ncp->nc_hash.le_prev != NULL) {
|
||||
LIST_REMOVE(ncp, nc_hash);
|
||||
ncp->nc_hash.le_prev = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* With that done, make sure our updates are visible on the bus, and
|
||||
* make a pass to observe the status of all of the CPU locks. If we
|
||||
* see a lock is unheld, we know the garbage collected entries can
|
||||
* no longer visible to that CPU. If we see a lock IS held, we need
|
||||
* to acquire and release it once to make sure that CPU is out of
|
||||
* cache_lookup_entry(). Take the opportunity to refresh stats.
|
||||
*/
|
||||
membar_sync(); /* stores above vs. reads below */
|
||||
for (CPU_INFO_FOREACH(cii, ci)) {
|
||||
cpup = ci->ci_data.cpu_nch;
|
||||
if (__predict_false(mutex_owner(&cpup->cpu_lock) != NULL)) {
|
||||
mutex_enter(&cpup->cpu_lock);
|
||||
/* nothing */
|
||||
mutex_exit(&cpup->cpu_lock);
|
||||
}
|
||||
UPDATE(cpup, ncs_goodhits);
|
||||
UPDATE(cpup, ncs_neghits);
|
||||
UPDATE(cpup, ncs_badhits);
|
||||
UPDATE(cpup, ncs_falsehits);
|
||||
UPDATE(cpup, ncs_miss);
|
||||
UPDATE(cpup, ncs_long);
|
||||
UPDATE(cpup, ncs_pass2);
|
||||
UPDATE(cpup, ncs_2passes);
|
||||
UPDATE(cpup, ncs_revhits);
|
||||
UPDATE(cpup, ncs_revmiss);
|
||||
}
|
||||
membar_sync(); /* reads above vs. stores below */
|
||||
|
||||
/*
|
||||
* Nobody else can see the cache entries any more. Make a final
|
||||
* pass over the list and toss the contents.
|
||||
*/
|
||||
SLIST_FOREACH_SAFE(ncp, &queue, nc_gclist, next) {
|
||||
mutex_obj_free(ncp->nc_lock);
|
||||
ncp->nc_lock = NULL;
|
||||
if (ncp->nc_nlen > NCHNAMLEN) {
|
||||
cache_dtor(NULL, ncp);
|
||||
kmem_free(ncp, sizeof(*ncp) + ncp->nc_nlen);
|
||||
} else
|
||||
pool_cache_put(namecache_cache, ncp);
|
||||
ncp = next;
|
||||
}
|
||||
cache_unlock_cpus();
|
||||
|
||||
numcache -= items;
|
||||
cache_ev_gc.ev_count += items;
|
||||
}
|
||||
|
@ -1273,10 +1329,10 @@ static void
|
|||
cache_thread(void *arg)
|
||||
{
|
||||
|
||||
mutex_enter(namecache_lock);
|
||||
mutex_enter(&namecache_lock);
|
||||
for (;;) {
|
||||
cache_reclaim();
|
||||
kpause("cachegc", false, hz, namecache_lock);
|
||||
kpause("cachegc", false, hz, &namecache_lock);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1330,53 +1386,40 @@ namecache_count_2passes(void)
|
|||
static int
|
||||
cache_stat_sysctl(SYSCTLFN_ARGS)
|
||||
{
|
||||
struct nchstats stats;
|
||||
struct nchcpu *my_cpup;
|
||||
#ifdef CACHE_STATS_CURRENT
|
||||
CPU_INFO_ITERATOR cii;
|
||||
struct cpu_info *ci;
|
||||
#endif /* CACHE_STATS_CURRENT */
|
||||
|
||||
if (oldp == NULL) {
|
||||
*oldlenp = sizeof(stats);
|
||||
*oldlenp = sizeof(nchstats);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (*oldlenp < sizeof(stats)) {
|
||||
if (*oldlenp < sizeof(nchstats)) {
|
||||
*oldlenp = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Take this CPU's per-cpu lock to hold off cache_reclaim()
|
||||
* from doing a stats update while doing minimal damage to
|
||||
* concurrent operations.
|
||||
*/
|
||||
sysctl_unlock();
|
||||
my_cpup = curcpu()->ci_data.cpu_nch;
|
||||
mutex_enter(&my_cpup->cpu_lock);
|
||||
stats = nchstats;
|
||||
#ifdef CACHE_STATS_CURRENT
|
||||
mutex_enter(&namecache_lock);
|
||||
for (CPU_INFO_FOREACH(cii, ci)) {
|
||||
struct nchcpu *cpup = ci->ci_data.cpu_nch;
|
||||
|
||||
ADD(stats, cpup, ncs_goodhits);
|
||||
ADD(stats, cpup, ncs_neghits);
|
||||
ADD(stats, cpup, ncs_badhits);
|
||||
ADD(stats, cpup, ncs_falsehits);
|
||||
ADD(stats, cpup, ncs_miss);
|
||||
ADD(stats, cpup, ncs_long);
|
||||
ADD(stats, cpup, ncs_pass2);
|
||||
ADD(stats, cpup, ncs_2passes);
|
||||
ADD(stats, cpup, ncs_revhits);
|
||||
ADD(stats, cpup, ncs_revmiss);
|
||||
UPDATE(cpup, ncs_goodhits);
|
||||
UPDATE(cpup, ncs_neghits);
|
||||
UPDATE(cpup, ncs_badhits);
|
||||
UPDATE(cpup, ncs_falsehits);
|
||||
UPDATE(cpup, ncs_miss);
|
||||
|
||||
UPDATE(cpup, ncs_pass2);
|
||||
UPDATE(cpup, ncs_2passes);
|
||||
UPDATE(cpup, ncs_revhits);
|
||||
UPDATE(cpup, ncs_revmiss);
|
||||
}
|
||||
#endif /* CACHE_STATS_CURRENT */
|
||||
mutex_exit(&my_cpup->cpu_lock);
|
||||
mutex_exit(&namecache_lock);
|
||||
sysctl_relock();
|
||||
|
||||
*oldlenp = sizeof(stats);
|
||||
return sysctl_copyout(l, &stats, oldp, sizeof(stats));
|
||||
*oldlenp = sizeof(nchstats);
|
||||
return sysctl_copyout(l, &nchstats, oldp, sizeof(nchstats));
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* $NetBSD: rump_namei.h,v 1.33 2019/09/13 13:59:31 christos Exp $ */
|
||||
/* $NetBSD: rump_namei.h,v 1.34 2019/12/01 13:39:53 ad Exp $ */
|
||||
|
||||
|
||||
/*
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* $NetBSD: namei.h,v 1.99 2019/09/13 13:59:31 christos Exp $ */
|
||||
/* $NetBSD: namei.h,v 1.100 2019/12/01 13:39:53 ad Exp $ */
|
||||
|
||||
|
||||
/*
|
||||
|
@ -211,6 +211,7 @@ struct nameidata {
|
|||
*
|
||||
* - stable after initialization
|
||||
* L namecache_lock
|
||||
* G namecache_gc_lock
|
||||
* C struct nchcpu::cpu_lock
|
||||
* L/C insert needs L, read needs L or any C,
|
||||
* must hold L and all C after (or during) delete before free
|
||||
|
@ -222,10 +223,10 @@ struct namecache {
|
|||
TAILQ_ENTRY(namecache) nc_lru; /* L pseudo-lru chain */
|
||||
LIST_ENTRY(namecache) nc_dvlist;/* L dvp's list of cache entries */
|
||||
LIST_ENTRY(namecache) nc_vlist; /* L vp's list of cache entries */
|
||||
SLIST_ENTRY(namecache) nc_gclist;/*G queue for garbage collection */
|
||||
struct vnode *nc_dvp; /* N vnode of parent of name */
|
||||
struct vnode *nc_vp; /* N vnode the name refers to */
|
||||
void *nc_gcqueue; /* N queue for garbage collection */
|
||||
kmutex_t nc_lock; /* lock on this entry */
|
||||
kmutex_t *nc_lock; /* - lock on this entry */
|
||||
int nc_hittime; /* N last time scored a hit */
|
||||
int nc_flags; /* - copy of componentname ISWHITEOUT */
|
||||
u_short nc_nlen; /* - length of name */
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* $NetBSD: namei.src,v 1.43 2019/09/13 13:58:53 christos Exp $ */
|
||||
/* $NetBSD: namei.src,v 1.44 2019/12/01 13:39:53 ad Exp $ */
|
||||
|
||||
/*
|
||||
* Copyright (c) 1985, 1989, 1991, 1993
|
||||
|
@ -203,6 +203,7 @@ NAMEIFL PARAMASK 0x02ee300 /* mask of parameter descriptors */
|
|||
*
|
||||
* - stable after initialization
|
||||
* L namecache_lock
|
||||
* G namecache_gc_lock
|
||||
* C struct nchcpu::cpu_lock
|
||||
* L/C insert needs L, read needs L or any C,
|
||||
* must hold L and all C after (or during) delete before free
|
||||
|
@ -214,10 +215,10 @@ struct namecache {
|
|||
TAILQ_ENTRY(namecache) nc_lru; /* L pseudo-lru chain */
|
||||
LIST_ENTRY(namecache) nc_dvlist;/* L dvp's list of cache entries */
|
||||
LIST_ENTRY(namecache) nc_vlist; /* L vp's list of cache entries */
|
||||
SLIST_ENTRY(namecache) nc_gclist;/*G queue for garbage collection */
|
||||
struct vnode *nc_dvp; /* N vnode of parent of name */
|
||||
struct vnode *nc_vp; /* N vnode the name refers to */
|
||||
void *nc_gcqueue; /* N queue for garbage collection */
|
||||
kmutex_t nc_lock; /* lock on this entry */
|
||||
kmutex_t *nc_lock; /* - lock on this entry */
|
||||
int nc_hittime; /* N last time scored a hit */
|
||||
int nc_flags; /* - copy of componentname ISWHITEOUT */
|
||||
u_short nc_nlen; /* - length of name */
|
||||
|
|
Loading…
Reference in New Issue