Merge from yamt-pagecache: use radixtree for page lookup.

rbtree page lookup was introduced during the NetBSD 5.0 development cycle to
bypass lock contention problems with the (then) global page hash, and was a
temporary solution to allow us to make progress.  radixtree is the intended
replacement.

Ok yamt@.
This commit is contained in:
ad 2019-12-14 17:28:58 +00:00
parent 5b4211c2aa
commit 6857513180
5 changed files with 84 additions and 115 deletions

View File

@ -1,4 +1,4 @@
/* $NetBSD: vm.c,v 1.174 2019/12/13 20:10:21 ad Exp $ */
/* $NetBSD: vm.c,v 1.175 2019/12/14 17:28:58 ad Exp $ */
/*
* Copyright (c) 2007-2011 Antti Kantee. All Rights Reserved.
@ -41,7 +41,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: vm.c,v 1.174 2019/12/13 20:10:21 ad Exp $");
__KERNEL_RCSID(0, "$NetBSD: vm.c,v 1.175 2019/12/14 17:28:58 ad Exp $");
#include <sys/param.h>
#include <sys/atomic.h>
@ -52,6 +52,7 @@ __KERNEL_RCSID(0, "$NetBSD: vm.c,v 1.174 2019/12/13 20:10:21 ad Exp $");
#include <sys/mman.h>
#include <sys/null.h>
#include <sys/vnode.h>
#include <sys/radixtree.h>
#include <machine/pmap.h>
@ -125,34 +126,6 @@ static unsigned long dddlim; /* 90% of memory limit used */
static struct pglist vmpage_lruqueue;
static unsigned vmpage_onqueue;
static int
pg_compare_key(void *ctx, const void *n, const void *key)
{
voff_t a = ((const struct vm_page *)n)->offset;
voff_t b = *(const voff_t *)key;
if (a < b)
return -1;
else if (a > b)
return 1;
else
return 0;
}
static int
pg_compare_nodes(void *ctx, const void *n1, const void *n2)
{
return pg_compare_key(ctx, n1, &((const struct vm_page *)n2)->offset);
}
const rb_tree_ops_t uvm_page_tree_ops = {
.rbto_compare_nodes = pg_compare_nodes,
.rbto_compare_key = pg_compare_key,
.rbto_node_offset = offsetof(struct vm_page, rb_node),
.rbto_context = NULL
};
/*
* vm pages
*/
@ -204,7 +177,11 @@ uvm_pagealloc_strat(struct uvm_object *uobj, voff_t off, struct vm_anon *anon,
}
TAILQ_INSERT_TAIL(&uobj->memq, pg, listq.queue);
(void)rb_tree_insert_node(&uobj->rb_tree, pg);
if (radix_tree_insert_node(&uobj->uo_pages, off >> PAGE_SHIFT,
pg) != 0) {
pool_cache_put(&pagecache, pg);
return NULL;
}
/*
* Don't put anons on the LRU page queue. We can't flush them
@ -232,6 +209,7 @@ void
uvm_pagefree(struct vm_page *pg)
{
struct uvm_object *uobj = pg->uobject;
struct vm_page *pg2 __unused;
KASSERT(mutex_owned(uobj->vmobjlock));
@ -241,7 +219,8 @@ uvm_pagefree(struct vm_page *pg)
TAILQ_REMOVE(&uobj->memq, pg, listq.queue);
uobj->uo_npages--;
rb_tree_remove_node(&uobj->rb_tree, pg);
pg2 = radix_tree_remove_node(&uobj->uo_pages, pg->offset >> PAGE_SHIFT);
KASSERT(pg == pg2);
if (!UVM_OBJ_IS_AOBJ(uobj)) {
mutex_enter(&vmpage_lruqueue_lock);
@ -396,6 +375,8 @@ uvm_init(void)
pool_cache_bootstrap(&pagecache, sizeof(struct vm_page), 0, 0, 0,
"page$", NULL, IPL_NONE, pgctor, pgdtor, NULL);
radix_tree_init();
/* create vmspace used by local clients */
rump_vmspace_local = kmem_zalloc(sizeof(*rump_vmspace_local), KM_SLEEP);
uvmspace_init(rump_vmspace_local, &rump_pmap_local, 0, 0, false);
@ -618,7 +599,7 @@ uvm_pagelookup(struct uvm_object *uobj, voff_t off)
struct vm_page *pg;
bool ispagedaemon = curlwp == uvm.pagedaemon_lwp;
pg = rb_tree_find_node(&uobj->rb_tree, &off);
pg = radix_tree_lookup_node(&uobj->uo_pages, off >> PAGE_SHIFT);
if (pg && !UVM_OBJ_IS_AOBJ(pg->uobject) && !ispagedaemon) {
mutex_enter(&vmpage_lruqueue_lock);
TAILQ_REMOVE(&vmpage_lruqueue, pg, pageq.queue);

View File

@ -1,4 +1,4 @@
/* $NetBSD: uvm_km.c,v 1.151 2019/12/13 20:10:22 ad Exp $ */
/* $NetBSD: uvm_km.c,v 1.152 2019/12/14 17:28:58 ad Exp $ */
/*
* Copyright (c) 1997 Charles D. Cranor and Washington University.
@ -152,7 +152,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: uvm_km.c,v 1.151 2019/12/13 20:10:22 ad Exp $");
__KERNEL_RCSID(0, "$NetBSD: uvm_km.c,v 1.152 2019/12/14 17:28:58 ad Exp $");
#include "opt_uvmhist.h"
@ -545,9 +545,7 @@ uvm_km_pgremove_intrsafe(struct vm_map *map, vaddr_t start, vaddr_t end)
void
uvm_km_check_empty(struct vm_map *map, vaddr_t start, vaddr_t end)
{
struct vm_page *pg;
vaddr_t va;
paddr_t pa;
UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);
KDASSERT(VM_MAP_IS_KERNEL(map));
@ -556,17 +554,32 @@ uvm_km_check_empty(struct vm_map *map, vaddr_t start, vaddr_t end)
KDASSERT(end <= vm_map_max(map));
for (va = start; va < end; va += PAGE_SIZE) {
paddr_t pa;
if (pmap_extract(pmap_kernel(), va, &pa)) {
panic("uvm_km_check_empty: va %p has pa 0x%llx",
(void *)va, (long long)pa);
}
mutex_enter(uvm_kernel_object->vmobjlock);
pg = uvm_pagelookup(uvm_kernel_object,
va - vm_map_min(kernel_map));
mutex_exit(uvm_kernel_object->vmobjlock);
if (pg) {
panic("uvm_km_check_empty: "
"has page hashed at %p", (const void *)va);
/*
* kernel_object should not have pages for the corresponding
* region. check it.
*
* why trylock? because:
* - caller might not want to block.
* - we can recurse when allocating radix_node for
* kernel_object.
*/
if (mutex_tryenter(uvm_kernel_object->vmobjlock)) {
struct vm_page *pg;
pg = uvm_pagelookup(uvm_kernel_object,
va - vm_map_min(kernel_map));
mutex_exit(uvm_kernel_object->vmobjlock);
if (pg) {
panic("uvm_km_check_empty: "
"has page hashed at %p",
(const void *)va);
}
}
}
}

View File

@ -1,7 +1,7 @@
/* $NetBSD: uvm_object.c,v 1.16 2019/12/13 20:10:22 ad Exp $ */
/* $NetBSD: uvm_object.c,v 1.17 2019/12/14 17:28:58 ad Exp $ */
/*
* Copyright (c) 2006, 2010 The NetBSD Foundation, Inc.
* Copyright (c) 2006, 2010, 2019 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
@ -37,7 +37,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: uvm_object.c,v 1.16 2019/12/13 20:10:22 ad Exp $");
__KERNEL_RCSID(0, "$NetBSD: uvm_object.c,v 1.17 2019/12/14 17:28:58 ad Exp $");
#ifdef _KERNEL_OPT
#include "opt_ddb.h"
@ -46,7 +46,6 @@ __KERNEL_RCSID(0, "$NetBSD: uvm_object.c,v 1.16 2019/12/13 20:10:22 ad Exp $");
#include <sys/param.h>
#include <sys/mutex.h>
#include <sys/queue.h>
#include <sys/rbtree.h>
#include <uvm/uvm.h>
#include <uvm/uvm_ddb.h>
@ -77,7 +76,7 @@ uvm_obj_init(struct uvm_object *uo, const struct uvm_pagerops *ops,
LIST_INIT(&uo->uo_ubc);
uo->uo_npages = 0;
uo->uo_refs = refs;
rb_tree_init(&uo->rb_tree, &uvm_page_tree_ops);
radix_tree_init_tree(&uo->uo_pages);
}
/*
@ -87,7 +86,7 @@ void
uvm_obj_destroy(struct uvm_object *uo, bool dlock)
{
KASSERT(rb_tree_iterate(&uo->rb_tree, NULL, RB_DIR_LEFT) == NULL);
KASSERT(radix_tree_empty_tree_p(&uo->uo_pages));
/* Purge any UBC entries associated with this object. */
ubc_purge(uo);
@ -96,6 +95,7 @@ uvm_obj_destroy(struct uvm_object *uo, bool dlock)
if (dlock) {
mutex_obj_free(uo->vmobjlock);
}
radix_tree_fini_tree(&uo->uo_pages);
}
/*

View File

@ -1,4 +1,4 @@
/* $NetBSD: uvm_object.h,v 1.33 2012/09/14 22:20:50 rmind Exp $ */
/* $NetBSD: uvm_object.h,v 1.34 2019/12/14 17:28:58 ad Exp $ */
/*
* Copyright (c) 1997 Charles D. Cranor and Washington University.
@ -31,7 +31,7 @@
#define _UVM_UVM_OBJECT_H_
#include <sys/queue.h>
#include <sys/rbtree.h>
#include <sys/radixtree.h>
#include <uvm/uvm_pglist.h>
/*
@ -54,12 +54,12 @@
*/
struct uvm_object {
kmutex_t * vmobjlock; /* lock on memq */
kmutex_t * vmobjlock; /* lock on object */
const struct uvm_pagerops *pgops; /* pager ops */
struct pglist memq; /* pages in this object */
int uo_npages; /* # of pages in memq */
int uo_npages; /* # of pages in uo_pages */
unsigned uo_refs; /* reference count */
struct rb_tree rb_tree; /* tree of pages */
struct radix_tree uo_pages; /* tree of pages */
LIST_HEAD(,ubc_map) uo_ubc; /* ubc mappings */
};
@ -112,8 +112,6 @@ extern const struct uvm_pagerops aobj_pager;
#define UVM_OBJ_IS_AOBJ(uobj) \
((uobj)->pgops == &aobj_pager)
extern const rb_tree_ops_t uvm_page_tree_ops;
#endif /* _KERNEL */
#endif /* _UVM_UVM_OBJECT_H_ */

View File

@ -1,4 +1,4 @@
/* $NetBSD: uvm_page.c,v 1.201 2019/12/13 20:10:22 ad Exp $ */
/* $NetBSD: uvm_page.c,v 1.202 2019/12/14 17:28:58 ad Exp $ */
/*
* Copyright (c) 1997 Charles D. Cranor and Washington University.
@ -66,7 +66,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: uvm_page.c,v 1.201 2019/12/13 20:10:22 ad Exp $");
__KERNEL_RCSID(0, "$NetBSD: uvm_page.c,v 1.202 2019/12/14 17:28:58 ad Exp $");
#include "opt_ddb.h"
#include "opt_uvm.h"
@ -79,6 +79,7 @@ __KERNEL_RCSID(0, "$NetBSD: uvm_page.c,v 1.201 2019/12/13 20:10:22 ad Exp $");
#include <sys/kernel.h>
#include <sys/vnode.h>
#include <sys/proc.h>
#include <sys/radixtree.h>
#include <sys/atomic.h>
#include <sys/cpu.h>
#include <sys/extent.h>
@ -150,49 +151,9 @@ struct vm_page *uvm_physseg_seg_alloc_from_slab(uvm_physseg_t, size_t);
* local prototypes
*/
static void uvm_pageinsert(struct uvm_object *, struct vm_page *);
static int uvm_pageinsert(struct uvm_object *, struct vm_page *);
static void uvm_pageremove(struct uvm_object *, struct vm_page *);
/*
* per-object tree of pages
*/
static signed int
uvm_page_compare_nodes(void *ctx, const void *n1, const void *n2)
{
const struct vm_page *pg1 = n1;
const struct vm_page *pg2 = n2;
const voff_t a = pg1->offset;
const voff_t b = pg2->offset;
if (a < b)
return -1;
if (a > b)
return 1;
return 0;
}
static signed int
uvm_page_compare_key(void *ctx, const void *n, const void *key)
{
const struct vm_page *pg = n;
const voff_t a = pg->offset;
const voff_t b = *(const voff_t *)key;
if (a < b)
return -1;
if (a > b)
return 1;
return 0;
}
const rb_tree_ops_t uvm_page_tree_ops = {
.rbto_compare_nodes = uvm_page_compare_nodes,
.rbto_compare_key = uvm_page_compare_key,
.rbto_node_offset = offsetof(struct vm_page, rb_node),
.rbto_context = NULL
};
/*
* inline functions
*/
@ -239,24 +200,33 @@ uvm_pageinsert_list(struct uvm_object *uobj, struct vm_page *pg,
uobj->uo_npages++;
}
static inline void
static inline int
uvm_pageinsert_tree(struct uvm_object *uobj, struct vm_page *pg)
{
struct vm_page *ret __diagused;
const uint64_t idx = pg->offset >> PAGE_SHIFT;
int error;
KASSERT(uobj == pg->uobject);
ret = rb_tree_insert_node(&uobj->rb_tree, pg);
KASSERT(ret == pg);
error = radix_tree_insert_node(&uobj->uo_pages, idx, pg);
if (error != 0) {
return error;
}
return 0;
}
static inline void
static inline int
uvm_pageinsert(struct uvm_object *uobj, struct vm_page *pg)
{
int error;
KDASSERT(uobj != NULL);
uvm_pageinsert_tree(uobj, pg);
KDASSERT(uobj == pg->uobject);
error = uvm_pageinsert_tree(uobj, pg);
if (error != 0) {
KASSERT(error == ENOMEM);
return error;
}
uvm_pageinsert_list(uobj, pg, NULL);
return error;
}
/*
@ -298,9 +268,10 @@ uvm_pageremove_list(struct uvm_object *uobj, struct vm_page *pg)
static inline void
uvm_pageremove_tree(struct uvm_object *uobj, struct vm_page *pg)
{
struct vm_page *opg __unused;
KASSERT(uobj == pg->uobject);
rb_tree_remove_node(&uobj->rb_tree, pg);
opg = radix_tree_remove_node(&uobj->uo_pages, pg->offset >> PAGE_SHIFT);
KASSERT(pg == opg);
}
static inline void
@ -308,8 +279,9 @@ uvm_pageremove(struct uvm_object *uobj, struct vm_page *pg)
{
KDASSERT(uobj != NULL);
uvm_pageremove_tree(uobj, pg);
KASSERT(uobj == pg->uobject);
uvm_pageremove_list(uobj, pg);
uvm_pageremove_tree(uobj, pg);
}
static void
@ -925,7 +897,7 @@ uvm_pagealloc_strat(struct uvm_object *obj, voff_t off, struct vm_anon *anon,
int flags, int strat, int free_list)
{
int try1, try2, zeroit = 0, color;
int lcv;
int lcv, error;
struct uvm_cpu *ucpu;
struct vm_page *pg;
lwp_t *l;
@ -1074,14 +1046,19 @@ uvm_pagealloc_strat(struct uvm_object *obj, voff_t off, struct vm_anon *anon,
pg->uanon = anon;
KASSERT(uvm_page_locked_p(pg));
pg->flags = PG_BUSY|PG_CLEAN|PG_FAKE;
mutex_spin_exit(&uvm_fpageqlock);
if (anon) {
anon->an_page = pg;
pg->flags |= PG_ANON;
atomic_inc_uint(&uvmexp.anonpages);
} else if (obj) {
uvm_pageinsert(obj, pg);
error = uvm_pageinsert(obj, pg);
if (error != 0) {
pg->uobject = NULL;
uvm_pagefree(pg);
return NULL;
}
}
mutex_spin_exit(&uvm_fpageqlock);
#if defined(UVM_PAGE_TRKOWN)
pg->owner_tag = NULL;
@ -1567,7 +1544,7 @@ uvm_pagelookup(struct uvm_object *obj, voff_t off)
KASSERT(mutex_owned(obj->vmobjlock));
pg = rb_tree_find_node(&obj->rb_tree, &off);
pg = radix_tree_lookup_node(&obj->uo_pages, off >> PAGE_SHIFT);
KASSERT(pg == NULL || obj->uo_npages != 0);
KASSERT(pg == NULL || (pg->flags & (PG_RELEASED|PG_PAGEOUT)) == 0 ||