Replace the global vm_page hash with a per vm_object rbtree.

Proposed on tech-kern@.
This commit is contained in:
ad 2008-06-04 15:06:04 +00:00
parent 61464a76be
commit 7a34cb95f0
5 changed files with 74 additions and 178 deletions

View File

@ -1,4 +1,4 @@
/* $NetBSD: uvm.h,v 1.54 2008/06/04 12:45:28 ad Exp $ */
/* $NetBSD: uvm.h,v 1.55 2008/06/04 15:06:04 ad Exp $ */
/*
*
@ -105,11 +105,6 @@ struct uvm {
/* aiodone daemon */
struct workqueue *aiodone_queue;
/* page hash */
struct pglist *page_hash; /* page hash table (vp/off->page) */
int page_nhash; /* number of buckets */
int page_hashmask; /* hash mask */
/* aio_done is locked by uvm.pagedaemon_lock and splbio! */
TAILQ_HEAD(, buf) aio_done; /* done async i/o reqs */

View File

@ -1,4 +1,4 @@
/* $NetBSD: uvm_init.c,v 1.32 2008/01/28 12:22:47 yamt Exp $ */
/* $NetBSD: uvm_init.c,v 1.33 2008/06/04 15:06:04 ad Exp $ */
/*
*
@ -39,7 +39,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: uvm_init.c,v 1.32 2008/01/28 12:22:47 yamt Exp $");
__KERNEL_RCSID(0, "$NetBSD: uvm_init.c,v 1.33 2008/06/04 15:06:04 ad Exp $");
#include <sys/param.h>
#include <sys/systm.h>
@ -158,7 +158,6 @@ uvm_init(void)
* of kernel objects.
*/
uvm_page_rehash();
uao_create(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS,
UAO_FLAG_KERNSWAP);

View File

@ -1,4 +1,4 @@
/* $NetBSD: uvm_object.h,v 1.25 2008/06/02 16:25:34 ad Exp $ */
/* $NetBSD: uvm_object.h,v 1.26 2008/06/04 15:06:04 ad Exp $ */
/*
*
@ -41,6 +41,8 @@
* uvm_object.h
*/
#include <sys/rb.h>
/*
* uvm_object: all that is left of mach objects.
*/
@ -51,6 +53,7 @@ struct uvm_object {
struct pglist memq; /* pages in this object */
int uo_npages; /* # of pages in memq */
unsigned uo_refs; /* reference count */
struct rb_tree rb_tree; /* tree of pages */
};
/*
@ -102,6 +105,8 @@ extern const struct uvm_pagerops aobj_pager;
#define UVM_OBJ_IS_AOBJ(uobj) \
((uobj)->pgops == &aobj_pager)
extern const struct rb_tree_ops uvm_page_tree_ops;
#define UVM_OBJ_INIT(uobj, ops, refs) \
do { \
mutex_init(&(uobj)->vmobjlock, MUTEX_DEFAULT, IPL_NONE);\
@ -109,12 +114,24 @@ extern const struct uvm_pagerops aobj_pager;
TAILQ_INIT(&(uobj)->memq); \
(uobj)->uo_npages = 0; \
(uobj)->uo_refs = (refs); \
rb_tree_init(&(uobj)->rb_tree, &uvm_page_tree_ops); \
} while (/* CONSTCOND */ 0)
#ifdef DIAGNOSTIC
#define UVM_OBJ_DESTROY(uobj) \
do { \
voff_t _xo = 0; \
void *_xn; \
mutex_destroy(&(uobj)->vmobjlock); \
_xn = rb_tree_find_node_geq(&(uobj)->rb_tree, &_xo); \
KASSERT(_xn == NULL); \
} while (/* CONSTCOND */ 0)
#else
#define UVM_OBJ_DESTROY(uobj) \
do { \
mutex_destroy(&(uobj)->vmobjlock); \
} while (/* CONSTCOND */ 0)
#endif /* DIAGNOSTIC */
#endif /* _KERNEL */

View File

@ -1,4 +1,4 @@
/* $NetBSD: uvm_page.c,v 1.133 2008/06/04 12:45:28 ad Exp $ */
/* $NetBSD: uvm_page.c,v 1.134 2008/06/04 15:06:04 ad Exp $ */
/*
* Copyright (c) 1997 Charles D. Cranor and Washington University.
@ -71,7 +71,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: uvm_page.c,v 1.133 2008/06/04 12:45:28 ad Exp $");
__KERNEL_RCSID(0, "$NetBSD: uvm_page.c,v 1.134 2008/06/04 15:06:04 ad Exp $");
#include "opt_uvmhist.h"
#include "opt_readahead.h"
@ -124,14 +124,6 @@ bool vm_page_zero_enable = false;
static vaddr_t virtual_space_start;
static vaddr_t virtual_space_end;
/*
* we use a hash table with only one bucket during bootup. we will
* later rehash (resize) the hash table once the allocator is ready.
* we static allocate the one bootstrap bucket below...
*/
static struct pglist uvm_bootbucket;
/*
* we allocate an initial number of page colors in uvm_page_init(),
* and remember them. We may re-color pages as cache sizes are
@ -148,20 +140,6 @@ MALLOC_DEFINE(M_VMPAGE, "VM page", "VM page");
vaddr_t uvm_zerocheckkva;
#endif /* DEBUG */
/*
* locks on the hash table. allocated in 32 byte chunks to try
* and reduce cache traffic between CPUs.
*/
#define UVM_HASHLOCK_CNT 32
#define uvm_hashlock(hash) \
(&uvm_hashlocks[(hash) & (UVM_HASHLOCK_CNT - 1)].lock)
static union {
kmutex_t lock;
uint8_t pad[32];
} uvm_hashlocks[UVM_HASHLOCK_CNT] __aligned(32);
/*
* local prototypes
*/
@ -170,12 +148,50 @@ static void uvm_pageinsert(struct vm_page *);
static void uvm_pageinsert_after(struct vm_page *, struct vm_page *);
static void uvm_pageremove(struct vm_page *);
/*
* per-object tree of pages
*/
static signed int
uvm_page_compare_nodes(const struct rb_node *n1, const struct rb_node *n2)
{
const struct vm_page *pg1 = (const void *)n1;
const struct vm_page *pg2 = (const void *)n2;
const voff_t a = pg1->offset;
const voff_t b = pg2->offset;
if (a < b)
return 1;
if (a > b)
return -1;
return 0;
}
static signed int
uvm_page_compare_key(const struct rb_node *n, const void *key)
{
const struct vm_page *pg = (const void *)n;
const voff_t a = pg->offset;
const voff_t b = *(const voff_t *)key;
if (a < b)
return 1;
if (a > b)
return -1;
return 0;
}
const struct rb_tree_ops uvm_page_tree_ops = {
.rb_compare_nodes = uvm_page_compare_nodes,
.rb_compare_key = uvm_page_compare_key,
};
/*
* inline functions
*/
/*
* uvm_pageinsert: insert a page in the object and the hash table
* uvm_pageinsert: insert a page in the object.
* uvm_pageinsert_after: insert a page into the specified place in listq
*
* => caller must lock object
@ -187,22 +203,14 @@ static void uvm_pageremove(struct vm_page *);
inline static void
uvm_pageinsert_after(struct vm_page *pg, struct vm_page *where)
{
struct pglist *buck;
struct uvm_object *uobj = pg->uobject;
kmutex_t *lock;
u_int hash;
KASSERT(mutex_owned(&uobj->vmobjlock));
KASSERT((pg->flags & PG_TABLED) == 0);
KASSERT(where == NULL || (where->flags & PG_TABLED));
KASSERT(where == NULL || (where->uobject == uobj));
hash = uvm_pagehash(uobj, pg->offset);
buck = &uvm.page_hash[hash];
lock = uvm_hashlock(hash);
mutex_spin_enter(lock);
TAILQ_INSERT_TAIL(buck, pg, hashq);
mutex_spin_exit(lock);
rb_tree_insert_node(&uobj->rb_tree, &pg->rb_node);
if (UVM_OBJ_IS_VNODE(uobj)) {
if (uobj->uo_npages == 0) {
@ -235,7 +243,7 @@ uvm_pageinsert(struct vm_page *pg)
}
/*
* uvm_page_remove: remove page from object and hash
* uvm_page_remove: remove page from object.
*
* => caller must lock object
* => caller must lock page queues
@ -244,20 +252,12 @@ uvm_pageinsert(struct vm_page *pg)
static inline void
uvm_pageremove(struct vm_page *pg)
{
struct pglist *buck;
struct uvm_object *uobj = pg->uobject;
kmutex_t *lock;
u_int hash;
KASSERT(mutex_owned(&uobj->vmobjlock));
KASSERT(pg->flags & PG_TABLED);
hash = uvm_pagehash(uobj, pg->offset);
buck = &uvm.page_hash[hash];
lock = uvm_hashlock(hash);
mutex_spin_enter(lock);
TAILQ_REMOVE(buck, pg, hashq);
mutex_spin_exit(lock);
rb_tree_remove_node(&uobj->rb_tree, &pg->rb_node);
if (UVM_OBJ_IS_VNODE(uobj)) {
if (uobj->uo_npages == 1) {
@ -323,27 +323,6 @@ uvm_page_init(vaddr_t *kvm_startp, vaddr_t *kvm_endp)
mutex_init(&uvm_pageqlock, MUTEX_DRIVER, IPL_NONE);
mutex_init(&uvm_fpageqlock, MUTEX_DRIVER, IPL_VM);
/*
* init the <obj,offset> => <page> hash table. for now
* we just have one bucket (the bootstrap bucket). later on we
* will allocate new buckets as we dynamically resize the hash table.
*/
uvm.page_nhash = 1; /* 1 bucket */
uvm.page_hashmask = 0; /* mask for hash function */
uvm.page_hash = &uvm_bootbucket; /* install bootstrap bucket */
TAILQ_INIT(uvm.page_hash); /* init hash table */
/*
* init hashtable locks. these must be spinlocks, as they are
* called from sites in the pmap modules where we cannot block.
* if taking multiple locks, the order is: low numbered first,
* high numbered second.
*/
for (i = 0; i < UVM_HASHLOCK_CNT; i++)
mutex_init(&uvm_hashlocks[i].lock, MUTEX_SPIN, IPL_VM);
/*
* allocate vm_page structures.
*/
@ -844,96 +823,10 @@ uvm_page_physload(paddr_t start, paddr_t end, paddr_t avail_start,
vm_nphysseg++;
if (!preload) {
uvm_page_rehash();
uvmpdpol_reinit();
}
}
/*
* uvm_page_rehash: reallocate hash table based on number of free pages.
*/
void
uvm_page_rehash(void)
{
int freepages, lcv, bucketcount, oldcount, i;
struct pglist *newbuckets, *oldbuckets;
struct vm_page *pg;
size_t newsize, oldsize;
/*
* compute number of pages that can go in the free pool
*/
freepages = 0;
for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
freepages +=
(vm_physmem[lcv].avail_end - vm_physmem[lcv].avail_start);
/*
* compute number of buckets needed for this number of pages
*/
bucketcount = 1;
while (bucketcount < freepages)
bucketcount = bucketcount * 2;
/*
* compute the size of the current table and new table.
*/
oldbuckets = uvm.page_hash;
oldcount = uvm.page_nhash;
oldsize = round_page(sizeof(struct pglist) * oldcount);
newsize = round_page(sizeof(struct pglist) * bucketcount);
/*
* allocate the new buckets
*/
newbuckets = (struct pglist *) uvm_km_alloc(kernel_map, newsize,
0, UVM_KMF_WIRED);
if (newbuckets == NULL) {
printf("uvm_page_physrehash: WARNING: could not grow page "
"hash table\n");
return;
}
for (lcv = 0 ; lcv < bucketcount ; lcv++)
TAILQ_INIT(&newbuckets[lcv]);
/*
* now replace the old buckets with the new ones and rehash everything
*/
for (i = 0; i < UVM_HASHLOCK_CNT; i++)
mutex_spin_enter(&uvm_hashlocks[i].lock);
uvm.page_hash = newbuckets;
uvm.page_nhash = bucketcount;
uvm.page_hashmask = bucketcount - 1; /* power of 2 */
/* ... and rehash */
for (lcv = 0 ; lcv < oldcount ; lcv++) {
while ((pg = oldbuckets[lcv].tqh_first) != NULL) {
TAILQ_REMOVE(&oldbuckets[lcv], pg, hashq);
TAILQ_INSERT_TAIL(
&uvm.page_hash[uvm_pagehash(pg->uobject, pg->offset)],
pg, hashq);
}
}
for (i = 0; i < UVM_HASHLOCK_CNT; i++)
mutex_spin_exit(&uvm_hashlocks[i].lock);
/*
* free old bucket array if is not the boot-time table
*/
if (oldbuckets != &uvm_bootbucket)
uvm_km_free(kernel_map, (vaddr_t) oldbuckets, oldsize,
UVM_KMF_WIRED);
}
/*
* uvm_page_recolor: Recolor the pages if the new bucket count is
* larger than the old one.
@ -1749,22 +1642,11 @@ struct vm_page *
uvm_pagelookup(struct uvm_object *obj, voff_t off)
{
struct vm_page *pg;
struct pglist *buck;
kmutex_t *lock;
u_int hash;
KASSERT(mutex_owned(&obj->vmobjlock));
hash = uvm_pagehash(obj, off);
buck = &uvm.page_hash[hash];
lock = uvm_hashlock(hash);
mutex_spin_enter(lock);
TAILQ_FOREACH(pg, buck, hashq) {
if (pg->uobject == obj && pg->offset == off) {
break;
}
}
mutex_spin_exit(lock);
pg = (struct vm_page *)rb_tree_find_node(&obj->rb_tree, &off);
KASSERT(pg == NULL || obj->uo_npages != 0);
KASSERT(pg == NULL || (pg->flags & (PG_RELEASED|PG_PAGEOUT)) == 0 ||
(pg->flags & PG_BUSY) != 0);

View File

@ -1,4 +1,4 @@
/* $NetBSD: uvm_page.h,v 1.54 2008/06/04 12:45:28 ad Exp $ */
/* $NetBSD: uvm_page.h,v 1.55 2008/06/04 15:06:04 ad Exp $ */
/*
* Copyright (c) 1997 Charles D. Cranor and Washington University.
@ -84,8 +84,9 @@
* page, indexed by page number. Each structure
* is an element of several lists:
*
* A hash table bucket used to quickly
* perform object/offset lookups
* A red-black tree rooted with the containing
* object is used to quickly perform object+
* offset lookups
*
* A list of all pages for a given object,
* so they can be quickly deactivated at
@ -117,8 +118,10 @@
#include <uvm/uvm_extern.h>
#include <uvm/uvm_pglist.h>
#include <sys/rb.h>
struct vm_page {
TAILQ_ENTRY(vm_page) hashq; /* hash table links (O)*/
struct rb_node rb_node; /* tree of pages in obj (O) */
union {
TAILQ_ENTRY(vm_page) queue;