- Shrink namecache entries to 64 bytes on 32-bit platforms and use 32-bit

key values there for speed (remains 128 bytes & 64-bits on _LP64).
- Comments.
This commit is contained in:
ad 2023-09-09 18:27:59 +00:00
parent 7e018a29ea
commit 6e2810dfd3
4 changed files with 93 additions and 60 deletions

View File

@ -1,7 +1,7 @@
/* $NetBSD: vfs_cache.c,v 1.154 2023/04/29 10:07:22 riastradh Exp $ */
/* $NetBSD: vfs_cache.c,v 1.155 2023/09/09 18:27:59 ad Exp $ */
/*-
* Copyright (c) 2008, 2019, 2020 The NetBSD Foundation, Inc.
* Copyright (c) 2008, 2019, 2020, 2023 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
@ -79,10 +79,9 @@
* we are not sensible, and use a per-directory data structure to index
* names, but the cache otherwise functions the same.
*
* The index is a red-black tree. There are no special concurrency
* requirements placed on it, because it's per-directory and protected
* by the namecache's per-directory locks. It should therefore not be
* difficult to experiment with other types of index.
* The index is a red-black tree. It should not be difficult to
* experiment with other types of index, however note that a tree
* can trivially be made to support lockless lookup.
*
* Each cached name is stored in a struct namecache, along with a
* pointer to the associated vnode (nc_vp). Names longer than a
@ -91,6 +90,19 @@
* in struct namecache. If it is a "negative" entry, (i.e. for a name
* that is known NOT to exist) the vnode pointer will be NULL.
*
* In practice this implementation is not any slower than the hash
* table that preceeded it and in some cases it significantly
* outperforms the hash table. Some reasons why this might be:
*
* - natural partitioning provided by the file system structure, which
* the prior implementation discarded (global hash table).
* - worst case tree traversal of O(log n), the hash table could have
* many collisions.
* - minimized cache misses & total L2/L3 CPU cache footprint; struct
* namecache and vnode_impl_t are laid out to keep cache footprint
* minimal in the lookup path; no hash table buckets to cache.
* - minimized number of conditionals & string comparisons.
*
* For a directory with 3 cached names for 3 distinct vnodes, the
* various vnodes and namecache structs would be connected like this
* (the root is at the bottom of the diagram):
@ -167,12 +179,10 @@
* used during reverse lookup)
*
* 3) cache_lru_lock (LRU list direction, used during reclaim)
*
* 4) vp->v_interlock (what the cache entry points to)
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: vfs_cache.c,v 1.154 2023/04/29 10:07:22 riastradh Exp $");
__KERNEL_RCSID(0, "$NetBSD: vfs_cache.c,v 1.155 2023/09/09 18:27:59 ad Exp $");
#define __NAMECACHE_PRIVATE
#ifdef _KERNEL_OPT
@ -202,6 +212,16 @@ __KERNEL_RCSID(0, "$NetBSD: vfs_cache.c,v 1.154 2023/04/29 10:07:22 riastradh Ex
#include <miscfs/genfs/genfs.h>
/*
* Assert that data structure layout hasn't changed unintentionally.
*/
#ifdef _LP64
CTASSERT(sizeof(struct namecache) == 128);
#else
CTASSERT(sizeof(struct namecache) == 64);
#endif
CTASSERT(NC_NLEN_MASK >= MAXPATHLEN);
static void cache_activate(struct namecache *);
static void cache_update_stats(void *);
static int cache_compare_nodes(void *, const void *, const void *);
@ -262,7 +282,7 @@ static kmutex_t cache_stat_lock __cacheline_aligned;
*/
int cache_lru_maxdeact __read_mostly = 2; /* max # to deactivate */
int cache_lru_maxscan __read_mostly = 64; /* max # to scan/reclaim */
int cache_maxlen __read_mostly = USHRT_MAX; /* max name length to cache */
int cache_maxlen __read_mostly = NC_NLEN_MASK; /* max name length to cache */
int cache_stat_interval __read_mostly = 300; /* in seconds */
/*
@ -328,8 +348,8 @@ cache_compare_nodes(void *context, const void *n1, const void *n2)
if (nc1->nc_key > nc2->nc_key) {
return 1;
}
KASSERT(nc1->nc_nlen == nc2->nc_nlen);
return memcmp(nc1->nc_name, nc2->nc_name, nc1->nc_nlen);
KASSERT(NC_NLEN(nc1) == NC_NLEN(nc2));
return memcmp(nc1->nc_name, nc2->nc_name, NC_NLEN(nc1));
}
/*
@ -337,15 +357,15 @@ cache_compare_nodes(void *context, const void *n1, const void *n2)
* the key value to try and improve uniqueness, and so that length doesn't
* need to be compared separately for string comparisons.
*/
static inline uint64_t
static uintptr_t
cache_key(const char *name, size_t nlen)
{
uint64_t key;
uintptr_t key;
KASSERT(nlen <= USHRT_MAX);
KASSERT((nlen & ~NC_NLEN_MASK) == 0);
key = hash32_buf(name, nlen, HASH32_STR_INIT);
return (key << 32) | nlen;
return (key << NC_NLEN_BITS) | (uintptr_t)nlen;
}
/*
@ -358,13 +378,13 @@ cache_remove(struct namecache *ncp, const bool dir2node)
{
struct vnode *vp, *dvp = ncp->nc_dvp;
vnode_impl_t *dvi = VNODE_TO_VIMPL(dvp);
size_t namelen = NC_NLEN(ncp);
KASSERT(rw_write_held(&dvi->vi_nc_lock));
KASSERT(cache_key(ncp->nc_name, ncp->nc_nlen) == ncp->nc_key);
KASSERT(cache_key(ncp->nc_name, namelen) == ncp->nc_key);
KASSERT(rb_tree_find_node(&dvi->vi_nc_tree, ncp) == ncp);
SDT_PROBE(vfs, namecache, invalidate, done, ncp,
0, 0, 0, 0);
SDT_PROBE(vfs, namecache, invalidate, done, ncp, 0, 0, 0, 0);
/*
* Remove from the vnode's list. This excludes cache_revlookup(),
@ -391,8 +411,8 @@ cache_remove(struct namecache *ncp, const bool dir2node)
mutex_exit(&cache_lru_lock);
/* Finally, free it. */
if (ncp->nc_nlen > NCHNAMLEN) {
size_t sz = offsetof(struct namecache, nc_name[ncp->nc_nlen]);
if (namelen > NCHNAMLEN) {
size_t sz = offsetof(struct namecache, nc_name[namelen]);
kmem_free(ncp, sz);
} else {
pool_cache_put(cache_pool, ncp);
@ -404,13 +424,15 @@ cache_remove(struct namecache *ncp, const bool dir2node)
*/
static struct namecache * __noinline
cache_lookup_entry(struct vnode *dvp, const char *name, size_t namelen,
uint64_t key)
uintptr_t key)
{
vnode_impl_t *dvi = VNODE_TO_VIMPL(dvp);
struct rb_node *node = dvi->vi_nc_tree.rbt_root;
struct namecache *ncp;
int lrulist, diff;
enum cache_lru_id lrulist;
int diff;
KASSERT(namelen <= MAXPATHLEN);
KASSERT(rw_lock_held(&dvi->vi_nc_lock));
/*
@ -430,7 +452,7 @@ cache_lookup_entry(struct vnode *dvp, const char *name, size_t namelen,
KASSERT((void *)&ncp->nc_tree == (void *)ncp);
KASSERT(ncp->nc_dvp == dvp);
if (ncp->nc_key == key) {
KASSERT(ncp->nc_nlen == namelen);
KASSERT(NC_NLEN(ncp) == namelen);
diff = memcmp(ncp->nc_name, name, namelen);
if (__predict_true(diff == 0)) {
break;
@ -511,7 +533,7 @@ cache_lookup(struct vnode *dvp, const char *name, size_t namelen,
vnode_impl_t *dvi = VNODE_TO_VIMPL(dvp);
struct namecache *ncp;
struct vnode *vp;
uint64_t key;
uintptr_t key;
int error;
bool hit;
krw_t op;
@ -564,7 +586,7 @@ cache_lookup(struct vnode *dvp, const char *name, size_t namelen,
COUNT(ncs_badhits);
return false;
}
if (ncp->nc_vp == NULL) {
if ((vp = ncp->nc_vp) == NULL) {
if (iswht_ret != NULL) {
/*
* Restore the ISWHITEOUT flag saved earlier.
@ -592,7 +614,6 @@ cache_lookup(struct vnode *dvp, const char *name, size_t namelen,
rw_exit(&dvi->vi_nc_lock);
return hit;
}
vp = ncp->nc_vp;
error = vcache_tryvget(vp);
rw_exit(&dvi->vi_nc_lock);
if (error) {
@ -638,7 +659,8 @@ cache_lookup_linked(struct vnode *dvp, const char *name, size_t namelen,
vnode_impl_t *dvi = VNODE_TO_VIMPL(dvp);
struct namecache *ncp;
krwlock_t *oldlock, *newlock;
uint64_t key;
struct vnode *vp;
uintptr_t key;
int error;
KASSERT(namelen != cache_mp_nlen || name == cache_mp_name);
@ -727,7 +749,7 @@ cache_lookup_linked(struct vnode *dvp, const char *name, size_t namelen,
name, namelen, 0, 0);
return false;
}
if (ncp->nc_vp == NULL) {
if ((vp = ncp->nc_vp) == NULL) {
/* found negative entry; vn is already null from above */
KASSERT(namelen != cache_mp_nlen);
KASSERT(name != cache_mp_name);
@ -749,7 +771,7 @@ cache_lookup_linked(struct vnode *dvp, const char *name, size_t namelen,
if (newlock) {
*plock = newlock;
}
*vn_ret = ncp->nc_vp;
*vn_ret = vp;
return true;
}
@ -772,8 +794,9 @@ cache_revlookup(struct vnode *vp, struct vnode **dvpp, char **bpp, char *bufp,
{
vnode_impl_t *vi = VNODE_TO_VIMPL(vp);
struct namecache *ncp;
enum cache_lru_id lrulist;
struct vnode *dvp;
int error, nlen, lrulist;
int error, nlen;
char *bp;
KASSERT(vp != NULL);
@ -812,12 +835,12 @@ cache_revlookup(struct vnode *vp, struct vnode **dvpp, char **bpp, char *bufp,
TAILQ_FOREACH(ncp, &vi->vi_nc_list, nc_list) {
KASSERT(ncp->nc_vp == vp);
KASSERT(ncp->nc_dvp != NULL);
nlen = ncp->nc_nlen;
nlen = NC_NLEN(ncp);
/*
* Ignore mountpoint entries.
*/
if (ncp->nc_nlen == cache_mp_nlen) {
if (nlen == cache_mp_nlen) {
continue;
}
@ -929,7 +952,6 @@ cache_enter(struct vnode *dvp, struct vnode *vp,
ncp->nc_vp = vp;
ncp->nc_dvp = dvp;
ncp->nc_key = cache_key(name, namelen);
ncp->nc_nlen = namelen;
ncp->nc_whiteout = ((cnflags & ISWHITEOUT) != 0);
memcpy(ncp->nc_name, name, namelen);
@ -941,7 +963,7 @@ cache_enter(struct vnode *dvp, struct vnode *vp,
oncp = rb_tree_insert_node(&dvi->vi_nc_tree, ncp);
if (oncp != ncp) {
KASSERT(oncp->nc_key == ncp->nc_key);
KASSERT(oncp->nc_nlen == ncp->nc_nlen);
KASSERT(NC_NLEN(oncp) == NC_NLEN(ncp));
KASSERT(memcmp(oncp->nc_name, name, namelen) == 0);
cache_remove(oncp, true);
oncp = rb_tree_insert_node(&dvi->vi_nc_tree, ncp);
@ -1107,12 +1129,11 @@ nchinit(void)
void
cache_cpu_init(struct cpu_info *ci)
{
void *p;
size_t sz;
sz = roundup2(sizeof(struct nchcpu), coherency_unit) + coherency_unit;
p = kmem_zalloc(sz, KM_SLEEP);
ci->ci_data.cpu_nch = (void *)roundup2((uintptr_t)p, coherency_unit);
sz = roundup2(sizeof(struct nchcpu), coherency_unit);
ci->ci_data.cpu_nch = kmem_zalloc(sz, KM_SLEEP);
KASSERT(((uintptr_t)ci->ci_data.cpu_nch & (coherency_unit - 1)) == 0);
}
/*
@ -1232,7 +1253,7 @@ cache_purge_name(struct vnode *dvp, const char *name, size_t namelen)
{
vnode_impl_t *dvi = VNODE_TO_VIMPL(dvp);
struct namecache *ncp;
uint64_t key;
uintptr_t key;
SDT_PROBE(vfs, namecache, purge, name, name, namelen, 0, 0, 0);
@ -1524,7 +1545,7 @@ namecache_print(struct vnode *vp, void (*pr)(const char *, ...))
for (id = 0; id < LRU_COUNT; id++) {
TAILQ_FOREACH(ncp, &cache_lru.list[id], nc_lru) {
if (ncp->nc_vp == vp) {
(*pr)("name %.*s\n", ncp->nc_nlen,
(*pr)("name %.*s\n", NC_NLEN(ncp),
ncp->nc_name);
dvp = ncp->nc_dvp;
}
@ -1537,7 +1558,7 @@ namecache_print(struct vnode *vp, void (*pr)(const char *, ...))
for (id = 0; id < LRU_COUNT; id++) {
TAILQ_FOREACH(ncp, &cache_lru.list[id], nc_lru) {
if (ncp->nc_vp == dvp) {
(*pr)("parent %.*s\n", ncp->nc_nlen,
(*pr)("parent %.*s\n", NC_NLEN(ncp),
ncp->nc_name);
}
}

View File

@ -1,4 +1,4 @@
/* $NetBSD: namei.src,v 1.60 2021/06/29 22:39:21 dholland Exp $ */
/* $NetBSD: namei.src,v 1.61 2023/09/09 18:27:59 ad Exp $ */
/*
* Copyright (c) 1985, 1989, 1991, 1993
@ -199,13 +199,25 @@ NAMEIFL PARAMASK 0x02ff800 /* mask of parameter descriptors */
*/
#define NCHNAMLEN sizeof(((struct namecache *)NULL)->nc_name)
/*
* The uintptr_t-sized key value computed for each name consists of name
* length and a hash value. On 32-bit platforms the top NC_NLEN_BITS of
* the 32-bit hash value is lobbed off.
*/
#define NC_NLEN_BITS 11
#define NC_NLEN_MASK ((1 << NC_NLEN_BITS) - 1)
#define NC_NLEN(ncp) ((ncp)->nc_key & NC_NLEN_MASK)
/*
* Namecache entry.
*
* This structure describes the elements in the cache of recent names looked
* up by namei. It's carefully sized to take up 128 bytes on _LP64, to make
* good use of space and the CPU caches. Items used during RB tree lookup
* (nc_tree, nc_key) are clustered at the start of the structure.
* up by namei. It's carefully sized to take up 128 bytes on _LP64 and 64
* bytes on 32-bit machines, to make good use of space and the CPU caches.
*
* Items used during RB tree lookup (nc_tree, nc_key) are clustered at the
* start of the structure to minimise cache misses during lookup.
*
* Field markings and their corresponding locks:
*
@ -216,17 +228,20 @@ NAMEIFL PARAMASK 0x02ff800 /* mask of parameter descriptors */
*/
struct namecache {
struct rb_node nc_tree; /* d red-black tree, must be first */
uint64_t nc_key; /* - hashed key value */
uintptr_t nc_key; /* - hashed key value */
TAILQ_ENTRY(namecache) nc_list; /* v nc_vp's list of cache entries */
TAILQ_ENTRY(namecache) nc_lru; /* l pseudo-lru chain */
struct vnode *nc_dvp; /* - vnode of parent of name */
struct vnode *nc_vp; /* - vnode the name refers to */
int nc_lrulist; /* l which LRU list it's on */
u_short nc_nlen; /* - length of the name */
char nc_whiteout; /* - true if a whiteout */
char nc_name[41]; /* - segment name */
};
u_char nc_lrulist; /* l LRU list entry is on */
u_char nc_whiteout; /* - whiteout indicator */
#ifdef _LP64
char nc_name[46]; /* - segment name */
#else
char nc_name[22]; /* - segment name */
#endif
};
#endif /* __NAMECACHE_PRIVATE */
#ifdef _KERNEL
#include <sys/pool.h>

View File

@ -1,4 +1,4 @@
/* $NetBSD: main.h,v 1.7 2022/08/21 07:46:52 mlelstv Exp $ */
/* $NetBSD: main.h,v 1.8 2023/09/09 18:27:59 ad Exp $ */
/*
* Copyright (c) 2003 The NetBSD Foundation, Inc.
@ -36,9 +36,6 @@ extern u_long kernel_map_addr;
extern void *uvm_vnodeops, *uvm_deviceops, *aobj_pager, *ubc_pager;
extern rlim_t maxssiz;
LIST_HEAD(nchashhead, namecache);
extern struct nchashhead *nchashtbl;
struct cache_entry {
LIST_ENTRY(cache_entry) ce_next;
struct vnode *ce_vp, *ce_pvp;

View File

@ -1,7 +1,7 @@
/* $NetBSD: pmap.c,v 1.57 2022/08/21 07:46:52 mlelstv Exp $ */
/* $NetBSD: pmap.c,v 1.58 2023/09/09 18:27:59 ad Exp $ */
/*
* Copyright (c) 2002, 2003, 2020 The NetBSD Foundation, Inc.
* Copyright (c) 2002, 2003, 2020, 2023 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
@ -31,7 +31,7 @@
#include <sys/cdefs.h>
#ifndef lint
__RCSID("$NetBSD: pmap.c,v 1.57 2022/08/21 07:46:52 mlelstv Exp $");
__RCSID("$NetBSD: pmap.c,v 1.58 2023/09/09 18:27:59 ad Exp $");
#endif
#include <string.h>
@ -874,7 +874,7 @@ search_cache(kvm_t *kd, struct vnode *vp, char **name, char *buf, size_t blen)
(vi.vi_vnode.v_vflag & VV_ROOT) != 0)
break;
/* Otherwise pull first NCHNAMLEN chars of name. */
nlen = MIN((size_t)nc.nc_nlen, NCHNAMLEN);
nlen = MIN(NC_NLEN(&nc), NCHNAMLEN);
/* too small */
if ((size_t)(o - buf) < nlen + (o != e ? 1 : 0))
break;