Redo the page allocator to perform better, especially on multi-core and
multi-socket systems. Proposed on tech-kern. While here: - add rudimentary NUMA support - needs more work. - remove now unused "listq" from vm_page.
This commit is contained in:
parent
a4a6d53262
commit
9b1e2fa25c
@ -1,4 +1,4 @@
|
||||
/* $NetBSD: autoconf.c,v 1.28 2017/10/22 00:59:28 maya Exp $ */
|
||||
/* $NetBSD: autoconf.c,v 1.29 2019/12/27 12:51:56 ad Exp $ */
|
||||
|
||||
/*-
|
||||
* Copyright (c) 1990 The Regents of the University of California.
|
||||
@ -46,7 +46,7 @@
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__KERNEL_RCSID(0, "$NetBSD: autoconf.c,v 1.28 2017/10/22 00:59:28 maya Exp $");
|
||||
__KERNEL_RCSID(0, "$NetBSD: autoconf.c,v 1.29 2019/12/27 12:51:56 ad Exp $");
|
||||
|
||||
#include "opt_multiprocessor.h"
|
||||
#include "opt_intrdebug.h"
|
||||
@ -60,9 +60,14 @@ __KERNEL_RCSID(0, "$NetBSD: autoconf.c,v 1.28 2017/10/22 00:59:28 maya Exp $");
|
||||
#include <machine/pte.h>
|
||||
#include <machine/cpufunc.h>
|
||||
|
||||
#include "acpica.h"
|
||||
#include "ioapic.h"
|
||||
#include "lapic.h"
|
||||
|
||||
#if NACPICA > 0
|
||||
#include <dev/acpi/acpi_srat.h>
|
||||
#endif
|
||||
|
||||
#if NIOAPIC > 0
|
||||
#include <machine/i82093var.h>
|
||||
#endif
|
||||
@ -112,6 +117,11 @@ cpu_configure(void)
|
||||
cpu_init_idle_lwps();
|
||||
#endif
|
||||
|
||||
#if NACPICA > 0
|
||||
/* Load NUMA memory regions into UVM. */
|
||||
acpisrat_load_uvm();
|
||||
#endif
|
||||
|
||||
spl0();
|
||||
lcr8(0);
|
||||
}
|
||||
|
@ -1,4 +1,4 @@
|
||||
/* $NetBSD: autoconf.c,v 1.105 2017/10/22 00:59:28 maya Exp $ */
|
||||
/* $NetBSD: autoconf.c,v 1.106 2019/12/27 12:51:56 ad Exp $ */
|
||||
|
||||
/*-
|
||||
* Copyright (c) 1990 The Regents of the University of California.
|
||||
@ -46,7 +46,7 @@
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__KERNEL_RCSID(0, "$NetBSD: autoconf.c,v 1.105 2017/10/22 00:59:28 maya Exp $");
|
||||
__KERNEL_RCSID(0, "$NetBSD: autoconf.c,v 1.106 2019/12/27 12:51:56 ad Exp $");
|
||||
|
||||
#include "opt_intrdebug.h"
|
||||
#include "opt_multiprocessor.h"
|
||||
@ -65,9 +65,14 @@ __KERNEL_RCSID(0, "$NetBSD: autoconf.c,v 1.105 2017/10/22 00:59:28 maya Exp $");
|
||||
#include <machine/cpufunc.h>
|
||||
#include <x86/fpu.h>
|
||||
|
||||
#include "acpica.h"
|
||||
#include "ioapic.h"
|
||||
#include "lapic.h"
|
||||
|
||||
#if NACPICA > 0
|
||||
#include <dev/acpi/acpi_srat.h>
|
||||
#endif
|
||||
|
||||
#if NIOAPIC > 0
|
||||
#include <machine/i82093var.h>
|
||||
#endif
|
||||
@ -132,6 +137,11 @@ cpu_configure(void)
|
||||
cpu_init_idle_lwps();
|
||||
#endif
|
||||
|
||||
#if NACPICA > 0
|
||||
/* Load NUMA memory regions into UVM. */
|
||||
acpisrat_load_uvm();
|
||||
#endif
|
||||
|
||||
spl0();
|
||||
#if NLAPIC > 0
|
||||
lapic_write_tpri(0);
|
||||
|
@ -1,7 +1,8 @@
|
||||
/* $NetBSD: db_command.c,v 1.165 2019/12/15 20:29:08 joerg Exp $ */
|
||||
/* $NetBSD: db_command.c,v 1.166 2019/12/27 12:51:56 ad Exp $ */
|
||||
|
||||
/*
|
||||
* Copyright (c) 1996, 1997, 1998, 1999, 2002, 2009 The NetBSD Foundation, Inc.
|
||||
* Copyright (c) 1996, 1997, 1998, 1999, 2002, 2009, 2019
|
||||
* The NetBSD Foundation, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This code is derived from software contributed to The NetBSD Foundation
|
||||
@ -60,7 +61,7 @@
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__KERNEL_RCSID(0, "$NetBSD: db_command.c,v 1.165 2019/12/15 20:29:08 joerg Exp $");
|
||||
__KERNEL_RCSID(0, "$NetBSD: db_command.c,v 1.166 2019/12/27 12:51:56 ad Exp $");
|
||||
|
||||
#ifdef _KERNEL_OPT
|
||||
#include "opt_aio.h"
|
||||
@ -193,6 +194,7 @@ static void db_help_print_cmd(db_expr_t, bool, db_expr_t, const char *);
|
||||
static void db_lock_print_cmd(db_expr_t, bool, db_expr_t, const char *);
|
||||
static void db_show_all_locks(db_expr_t, bool, db_expr_t, const char *);
|
||||
static void db_show_lockstats(db_expr_t, bool, db_expr_t, const char *);
|
||||
static void db_show_all_freelists(db_expr_t, bool, db_expr_t, const char *);
|
||||
static void db_mount_print_cmd(db_expr_t, bool, db_expr_t, const char *);
|
||||
static void db_show_all_mount(db_expr_t, bool, db_expr_t, const char *);
|
||||
static void db_mbuf_print_cmd(db_expr_t, bool, db_expr_t, const char *);
|
||||
@ -234,6 +236,8 @@ static const struct db_command db_show_cmds[] = {
|
||||
0 ,"Show all held locks", "[/t]", NULL) },
|
||||
{ DDB_ADD_CMD("mount", db_show_all_mount, 0,
|
||||
"Print all mount structures.", "[/f]", NULL) },
|
||||
{ DDB_ADD_CMD("freelists", db_show_all_freelists,
|
||||
0 ,"Show all freelists", NULL, NULL) },
|
||||
#ifdef AIO
|
||||
/*added from all sub cmds*/
|
||||
{ DDB_ADD_CMD("aio_jobs", db_show_aio_jobs, 0,
|
||||
@ -1284,6 +1288,16 @@ db_show_all_locks(db_expr_t addr, bool have_addr,
|
||||
#endif
|
||||
}
|
||||
|
||||
static void
|
||||
db_show_all_freelists(db_expr_t addr, bool have_addr,
|
||||
db_expr_t count, const char *modif)
|
||||
{
|
||||
|
||||
#ifdef _KERNEL /* XXX CRASH(8) */
|
||||
uvm_page_print_freelists(db_printf);
|
||||
#endif
|
||||
}
|
||||
|
||||
static void
|
||||
db_show_lockstats(db_expr_t addr, bool have_addr,
|
||||
db_expr_t count, const char *modif)
|
||||
|
@ -1,4 +1,4 @@
|
||||
/* $NetBSD: acpi_srat.c,v 1.7 2019/12/22 22:18:04 ad Exp $ */
|
||||
/* $NetBSD: acpi_srat.c,v 1.8 2019/12/27 12:51:57 ad Exp $ */
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009 The NetBSD Foundation, Inc.
|
||||
@ -30,7 +30,7 @@
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__KERNEL_RCSID(0, "$NetBSD: acpi_srat.c,v 1.7 2019/12/22 22:18:04 ad Exp $");
|
||||
__KERNEL_RCSID(0, "$NetBSD: acpi_srat.c,v 1.8 2019/12/27 12:51:57 ad Exp $");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/kmem.h>
|
||||
@ -39,6 +39,8 @@ __KERNEL_RCSID(0, "$NetBSD: acpi_srat.c,v 1.7 2019/12/22 22:18:04 ad Exp $");
|
||||
#include <dev/acpi/acpivar.h>
|
||||
#include <dev/acpi/acpi_srat.h>
|
||||
|
||||
#include <uvm/uvm_extern.h>
|
||||
|
||||
static ACPI_TABLE_SRAT *srat;
|
||||
|
||||
static uint32_t nnodes; /* Number of NUMA nodes */
|
||||
@ -472,6 +474,28 @@ acpisrat_dump(void)
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
acpisrat_load_uvm(void)
|
||||
{
|
||||
uint32_t i, j, nn, nm;
|
||||
struct acpisrat_mem m;
|
||||
|
||||
nn = acpisrat_nodes();
|
||||
aprint_debug("SRAT: %u NUMA nodes\n", nn);
|
||||
for (i = 0; i < nn; i++) {
|
||||
nm = acpisrat_node_memoryranges(i);
|
||||
for (j = 0; j < nm; j++) {
|
||||
acpisrat_mem(i, j, &m);
|
||||
aprint_debug("SRAT: node %u memory range %u (0x%"
|
||||
PRIx64" - 0x%"PRIx64" flags %u)\n",
|
||||
m.nodeid, j, m.baseaddress,
|
||||
m.baseaddress + m.length, m.flags);
|
||||
uvm_page_numa_load(trunc_page(m.baseaddress),
|
||||
trunc_page(m.length), m.nodeid);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Get number of NUMA nodes.
|
||||
*/
|
||||
|
@ -1,4 +1,4 @@
|
||||
/* $NetBSD: acpi_srat.h,v 1.4 2017/12/28 08:49:28 maxv Exp $ */
|
||||
/* $NetBSD: acpi_srat.h,v 1.5 2019/12/27 12:51:57 ad Exp $ */
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009 The NetBSD Foundation, Inc.
|
||||
@ -68,6 +68,7 @@ int acpisrat_init(void);
|
||||
int acpisrat_refresh(void);
|
||||
int acpisrat_exit(void);
|
||||
void acpisrat_dump(void);
|
||||
void acpisrat_load_uvm(void);
|
||||
uint32_t acpisrat_nodes(void);
|
||||
uint32_t acpisrat_node_cpus(acpisrat_nodeid_t);
|
||||
uint32_t acpisrat_node_memoryranges(acpisrat_nodeid_t);
|
||||
|
@ -1,4 +1,4 @@
|
||||
/* $NetBSD: init_main.c,v 1.512 2019/12/22 15:00:42 ad Exp $ */
|
||||
/* $NetBSD: init_main.c,v 1.513 2019/12/27 12:51:57 ad Exp $ */
|
||||
|
||||
/*-
|
||||
* Copyright (c) 2008, 2009, 2019 The NetBSD Foundation, Inc.
|
||||
@ -97,7 +97,7 @@
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__KERNEL_RCSID(0, "$NetBSD: init_main.c,v 1.512 2019/12/22 15:00:42 ad Exp $");
|
||||
__KERNEL_RCSID(0, "$NetBSD: init_main.c,v 1.513 2019/12/27 12:51:57 ad Exp $");
|
||||
|
||||
#include "opt_ddb.h"
|
||||
#include "opt_inet.h"
|
||||
@ -814,6 +814,10 @@ configure2(void)
|
||||
for (CPU_INFO_FOREACH(cii, ci)) {
|
||||
uvm_cpu_attach(ci);
|
||||
}
|
||||
|
||||
/* Decide how to partition free memory. */
|
||||
uvm_page_rebucket();
|
||||
|
||||
mp_online = true;
|
||||
#if defined(MULTIPROCESSOR)
|
||||
cpu_boot_secondary_processors();
|
||||
|
@ -1,4 +1,4 @@
|
||||
# $NetBSD: files.uvm,v 1.31 2019/12/15 21:11:35 ad Exp $
|
||||
# $NetBSD: files.uvm,v 1.32 2019/12/27 12:51:57 ad Exp $
|
||||
|
||||
#
|
||||
# UVM options
|
||||
@ -42,6 +42,7 @@ file uvm/uvm_pager.c uvm
|
||||
file uvm/uvm_pdaemon.c uvm
|
||||
file uvm/uvm_pdpolicy_clock.c !pdpolicy_clockpro
|
||||
file uvm/uvm_pdpolicy_clockpro.c pdpolicy_clockpro
|
||||
file uvm/uvm_pgflcache.c uvm
|
||||
file uvm/uvm_pglist.c uvm
|
||||
file uvm/uvm_physseg.c uvm
|
||||
file uvm/uvm_readahead.c uvm
|
||||
|
@ -1,4 +1,4 @@
|
||||
/* $NetBSD: uvm.h,v 1.70 2019/12/13 20:10:22 ad Exp $ */
|
||||
/* $NetBSD: uvm.h,v 1.71 2019/12/27 12:51:57 ad Exp $ */
|
||||
|
||||
/*
|
||||
* Copyright (c) 1997 Charles D. Cranor and Washington University.
|
||||
@ -71,21 +71,19 @@
|
||||
#include <machine/vmparam.h>
|
||||
|
||||
struct workqueue;
|
||||
struct pgflcache;
|
||||
|
||||
/*
|
||||
* per-cpu data
|
||||
*/
|
||||
|
||||
struct uvm_cpu {
|
||||
struct pgfreelist page_free[VM_NFREELIST]; /* unallocated pages */
|
||||
int page_free_nextcolor; /* next color to allocate from */
|
||||
int page_idlezero_next; /* which color to zero next */
|
||||
bool page_idle_zero; /* TRUE if we should try to zero
|
||||
pages in the idle loop */
|
||||
int pages[PGFL_NQUEUES]; /* total of pages in page_free */
|
||||
u_int emap_gen; /* emap generation number */
|
||||
|
||||
krndsource_t rs; /* entropy source */
|
||||
struct pgflcache *pgflcache[VM_NFREELIST];/* cpu-local cached pages */
|
||||
void *pgflcachemem; /* pointer to allocated mem */
|
||||
size_t pgflcachememsz; /* size of allocated memory */
|
||||
u_int pgflcolor; /* next color to allocate */
|
||||
u_int pgflbucket; /* where to send our pages */
|
||||
krndsource_t rs; /* entropy source */
|
||||
};
|
||||
|
||||
/*
|
||||
@ -98,7 +96,9 @@ struct uvm {
|
||||
|
||||
/* vm_page queues */
|
||||
struct pgfreelist page_free[VM_NFREELIST]; /* unallocated pages */
|
||||
bool page_init_done; /* TRUE if uvm_page_init() finished */
|
||||
u_int bucketcount;
|
||||
bool page_init_done; /* true if uvm_page_init() finished */
|
||||
bool numa_alloc; /* use NUMA page allocation strategy */
|
||||
|
||||
/* page daemon trigger */
|
||||
int pagedaemon; /* daemon sleeps on this */
|
||||
@ -123,7 +123,6 @@ extern struct uvm_object *uvm_kernel_object;
|
||||
* locks (made globals for lockstat).
|
||||
*/
|
||||
|
||||
extern kmutex_t uvm_fpageqlock; /* lock for free page q */
|
||||
extern kmutex_t uvm_kentry_lock;
|
||||
|
||||
#endif /* _KERNEL */
|
||||
|
@ -1,4 +1,4 @@
|
||||
/* $NetBSD: uvm_ddb.h,v 1.15 2011/05/17 04:18:07 mrg Exp $ */
|
||||
/* $NetBSD: uvm_ddb.h,v 1.16 2019/12/27 12:51:57 ad Exp $ */
|
||||
|
||||
/*
|
||||
* Copyright (c) 1997 Charles D. Cranor and Washington University.
|
||||
@ -40,6 +40,7 @@ void uvm_object_printit(struct uvm_object *, bool,
|
||||
void uvm_page_printit(struct vm_page *, bool,
|
||||
void (*)(const char *, ...));
|
||||
void uvm_page_printall(void (*)(const char *, ...));
|
||||
void uvm_page_print_freelists(void (*)(const char *, ...));
|
||||
void uvmexp_print(void (*)(const char *, ...));
|
||||
#endif /* DDB || DEBUGPRINT */
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
/* $NetBSD: uvm_extern.h,v 1.215 2019/12/21 12:58:26 ad Exp $ */
|
||||
/* $NetBSD: uvm_extern.h,v 1.216 2019/12/27 12:51:57 ad Exp $ */
|
||||
|
||||
/*
|
||||
* Copyright (c) 1997 Charles D. Cranor and Washington University.
|
||||
@ -210,6 +210,7 @@ b\32UNMAP\0\
|
||||
#define UVM_PGA_STRAT_NORMAL 0 /* priority (low id to high) walk */
|
||||
#define UVM_PGA_STRAT_ONLY 1 /* only specified free list */
|
||||
#define UVM_PGA_STRAT_FALLBACK 2 /* ONLY falls back on NORMAL */
|
||||
#define UVM_PGA_STRAT_NUMA 3 /* strongly prefer ideal bucket */
|
||||
|
||||
/*
|
||||
* flags for uvm_pagealloc_strat()
|
||||
@ -736,6 +737,7 @@ void uvm_obj_unwirepages(struct uvm_object *, off_t, off_t);
|
||||
|
||||
/* uvm_page.c */
|
||||
int uvm_free(void);
|
||||
void uvm_page_numa_load(paddr_t, paddr_t, u_int);
|
||||
struct vm_page *uvm_pagealloc_strat(struct uvm_object *,
|
||||
voff_t, struct vm_anon *, int, int, int);
|
||||
#define uvm_pagealloc(obj, off, anon, flags) \
|
||||
|
@ -1,4 +1,4 @@
|
||||
/* $NetBSD: uvm_glue.c,v 1.172 2019/12/21 13:00:25 ad Exp $ */
|
||||
/* $NetBSD: uvm_glue.c,v 1.173 2019/12/27 12:51:57 ad Exp $ */
|
||||
|
||||
/*
|
||||
* Copyright (c) 1997 Charles D. Cranor and Washington University.
|
||||
@ -62,7 +62,7 @@
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__KERNEL_RCSID(0, "$NetBSD: uvm_glue.c,v 1.172 2019/12/21 13:00:25 ad Exp $");
|
||||
__KERNEL_RCSID(0, "$NetBSD: uvm_glue.c,v 1.173 2019/12/27 12:51:57 ad Exp $");
|
||||
|
||||
#include "opt_kgdb.h"
|
||||
#include "opt_kstack.h"
|
||||
@ -86,6 +86,7 @@ __KERNEL_RCSID(0, "$NetBSD: uvm_glue.c,v 1.172 2019/12/21 13:00:25 ad Exp $");
|
||||
#include <sys/asan.h>
|
||||
|
||||
#include <uvm/uvm.h>
|
||||
#include <uvm/uvm_pgflcache.h>
|
||||
|
||||
/*
|
||||
* uvm_kernacc: test if kernel can access a memory region.
|
||||
@ -500,9 +501,17 @@ uvm_scheduler(void)
|
||||
lwp_changepri(l, PRI_VM);
|
||||
lwp_unlock(l);
|
||||
|
||||
/* Start the freelist cache. */
|
||||
uvm_pgflcache_start();
|
||||
|
||||
for (;;) {
|
||||
/* Update legacy stats for post-mortem debugging. */
|
||||
uvm_update_uvmexp();
|
||||
|
||||
/* See if the pagedaemon needs to generate some free pages. */
|
||||
uvm_kick_pdaemon();
|
||||
|
||||
/* Calculate process statistics. */
|
||||
sched_pstats();
|
||||
(void)kpause("uvm", false, hz, NULL);
|
||||
}
|
||||
|
@ -1,4 +1,4 @@
|
||||
/* $NetBSD: uvm_init.c,v 1.51 2019/12/13 20:10:22 ad Exp $ */
|
||||
/* $NetBSD: uvm_init.c,v 1.52 2019/12/27 12:51:57 ad Exp $ */
|
||||
|
||||
/*
|
||||
* Copyright (c) 1997 Charles D. Cranor and Washington University.
|
||||
@ -32,7 +32,7 @@
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__KERNEL_RCSID(0, "$NetBSD: uvm_init.c,v 1.51 2019/12/13 20:10:22 ad Exp $");
|
||||
__KERNEL_RCSID(0, "$NetBSD: uvm_init.c,v 1.52 2019/12/27 12:51:57 ad Exp $");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/systm.h>
|
||||
@ -64,7 +64,6 @@ const int * const uvmexp_pagemask = &uvmexp.pagemask;
|
||||
const int * const uvmexp_pageshift = &uvmexp.pageshift;
|
||||
#endif
|
||||
|
||||
kmutex_t uvm_fpageqlock __cacheline_aligned;
|
||||
kmutex_t uvm_kentry_lock __cacheline_aligned;
|
||||
|
||||
/*
|
||||
|
1019
sys/uvm/uvm_page.c
1019
sys/uvm/uvm_page.c
File diff suppressed because it is too large
Load Diff
@ -1,4 +1,4 @@
|
||||
/* $NetBSD: uvm_page.h,v 1.88 2019/12/21 14:41:44 ad Exp $ */
|
||||
/* $NetBSD: uvm_page.h,v 1.89 2019/12/27 12:51:57 ad Exp $ */
|
||||
|
||||
/*
|
||||
* Copyright (c) 1997 Charles D. Cranor and Washington University.
|
||||
@ -119,7 +119,6 @@
|
||||
*
|
||||
* o free
|
||||
* => pageq.list is entry on global free page queue
|
||||
* => listq.list is entry on per-CPU free page queue
|
||||
* => uanon is unused (or (void *)0xdeadbeef for DEBUG)
|
||||
* => uobject is unused (or (void *)0xdeadbeef for DEBUG)
|
||||
* => PG_FREE is set in flags
|
||||
@ -129,13 +128,11 @@
|
||||
* => uobject is owner
|
||||
* o owned by a vm_anon
|
||||
* => pageq is unused (XXX correct?)
|
||||
* => listq is unused (XXX correct?)
|
||||
* => uanon is owner
|
||||
* => uobject is NULL
|
||||
* => PG_ANON is set in flags
|
||||
* o allocated by uvm_pglistalloc
|
||||
* => pageq.queue is entry on resulting pglist, owned by caller
|
||||
* => listq is unused (XXX correct?)
|
||||
* => uanon is unused
|
||||
* => uobject is unused
|
||||
*
|
||||
@ -153,11 +150,6 @@ struct vm_page {
|
||||
* or uvm_pglistalloc output */
|
||||
LIST_ENTRY(vm_page) list; /* f: global free page queue */
|
||||
} pageq;
|
||||
|
||||
union {
|
||||
LIST_ENTRY(vm_page) list; /* f: CPU free page queue */
|
||||
} listq;
|
||||
|
||||
struct vm_anon *uanon; /* o,i: anon */
|
||||
struct uvm_object *uobject; /* o,i: object */
|
||||
voff_t offset; /* o: offset into object */
|
||||
@ -302,6 +294,7 @@ void uvm_page_own(struct vm_page *, const char *);
|
||||
bool uvm_page_physget(paddr_t *);
|
||||
#endif
|
||||
void uvm_page_recolor(int);
|
||||
void uvm_page_rebucket(void);
|
||||
void uvm_pageidlezero(void);
|
||||
|
||||
void uvm_pageactivate(struct vm_page *);
|
||||
@ -318,6 +311,8 @@ void uvm_pagewire(struct vm_page *);
|
||||
void uvm_pagezero(struct vm_page *);
|
||||
bool uvm_pageismanaged(paddr_t);
|
||||
bool uvm_page_locked_p(struct vm_page *);
|
||||
void uvm_pgfl_lock(void);
|
||||
void uvm_pgfl_unlock(void);
|
||||
|
||||
int uvm_page_lookup_freelist(struct vm_page *);
|
||||
|
||||
@ -348,8 +343,12 @@ int uvm_direct_process(struct vm_page **, u_int, voff_t, vsize_t,
|
||||
#define VM_PGCOLOR(pg) \
|
||||
(atop(VM_PAGE_TO_PHYS((pg))) & uvmexp.colormask)
|
||||
#define PHYS_TO_VM_PAGE(pa) uvm_phys_to_vm_page(pa)
|
||||
|
||||
/*
|
||||
* VM_PAGE_IS_FREE() can't tell if the page is on global free list, or a
|
||||
* per-CPU cache. If you need to be certain, pause caching.
|
||||
*/
|
||||
#define VM_PAGE_IS_FREE(entry) ((entry)->flags & PG_FREE)
|
||||
#define VM_FREE_PAGE_TO_CPU(pg) ((struct uvm_cpu *)((uintptr_t)pg->offset))
|
||||
|
||||
/*
|
||||
* Use the lower 10 bits of pg->phys_addr to cache some some locators for
|
||||
|
471
sys/uvm/uvm_pgflcache.c
Normal file
471
sys/uvm/uvm_pgflcache.c
Normal file
@ -0,0 +1,471 @@
|
||||
/* $NetBSD: uvm_pgflcache.c,v 1.1 2019/12/27 12:51:57 ad Exp $ */
|
||||
|
||||
/*-
|
||||
* Copyright (c) 2019 The NetBSD Foundation, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This code is derived from software contributed to The NetBSD Foundation
|
||||
* by Andrew Doran.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
|
||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
|
||||
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* uvm_pgflcache.c: page freelist cache.
|
||||
*
|
||||
* This implements a tiny per-CPU cache of pages that sits between the main
|
||||
* page allocator and the freelists. By allocating and freeing pages in
|
||||
* batch, it reduces freelist contention by an order of magnitude.
|
||||
*
|
||||
* The cache can be paused & resumed at runtime so that UVM_HOTPLUG,
|
||||
* uvm_pglistalloc() and uvm_page_redim() can have a consistent view of the
|
||||
* world. On system with one CPU per physical package (e.g. a uniprocessor)
|
||||
* the cache is not enabled.
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__KERNEL_RCSID(0, "$NetBSD: uvm_pgflcache.c,v 1.1 2019/12/27 12:51:57 ad Exp $");
|
||||
|
||||
#include "opt_uvm.h"
|
||||
#include "opt_multiprocessor.h"
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/sched.h>
|
||||
#include <sys/kernel.h>
|
||||
#include <sys/vnode.h>
|
||||
#include <sys/proc.h>
|
||||
#include <sys/atomic.h>
|
||||
#include <sys/cpu.h>
|
||||
#include <sys/xcall.h>
|
||||
|
||||
#include <uvm/uvm.h>
|
||||
#include <uvm/uvm_pglist.h>
|
||||
#include <uvm/uvm_pgflcache.h>
|
||||
|
||||
/* There is no point doing any of this on a uniprocessor. */
|
||||
#ifdef MULTIPROCESSOR
|
||||
|
||||
/*
|
||||
* MAXPGS - maximum pages per color, per bucket.
|
||||
* FILLPGS - number of pages to allocate at once, per color, per bucket.
|
||||
*
|
||||
* Why the chosen values:
|
||||
*
|
||||
* (1) In 2019, an average Intel system has 4kB pages and 8x L2 cache
|
||||
* colors. We make the assumption that most of the time allocation activity
|
||||
* will be centered around one UVM freelist, so most of the time there will
|
||||
* be no more than 224kB worth of cached pages per-CPU. That's tiny, but
|
||||
* enough to hugely reduce contention on the freelist locks, and give us a
|
||||
* small pool of pages which if we're very lucky may have some L1/L2 cache
|
||||
* locality, and do so without subtracting too much from the L2/L3 cache
|
||||
* benefits of having per-package free lists in the page allocator.
|
||||
*
|
||||
* (2) With the chosen values on _LP64, the data structure for each color
|
||||
* takes up a single cache line (64 bytes) giving this very low overhead
|
||||
* even in the "miss" case.
|
||||
*
|
||||
* (3) We don't want to cause too much pressure by hiding away memory that
|
||||
* could otherwise be put to good use.
|
||||
*/
|
||||
#define MAXPGS 7
|
||||
#define FILLPGS 6
|
||||
|
||||
/* Variable size, according to # colors. */
|
||||
struct pgflcache {
|
||||
struct pccolor {
|
||||
intptr_t count;
|
||||
struct vm_page *pages[MAXPGS];
|
||||
} color[1];
|
||||
};
|
||||
|
||||
static kmutex_t uvm_pgflcache_lock;
|
||||
static kcondvar_t uvm_pgflcache_cv;
|
||||
static int uvm_pgflcache_sem;
|
||||
static bool uvm_pgflcache_draining;
|
||||
|
||||
/*
|
||||
* uvm_pgflcache_fill: fill specified freelist/color from global list
|
||||
*
|
||||
* => must be called at IPL_VM
|
||||
* => must be called with given bucket lock held
|
||||
* => must only fill from the correct bucket for this CPU
|
||||
*/
|
||||
|
||||
void
|
||||
uvm_pgflcache_fill(struct uvm_cpu *ucpu, int fl, int b, int c)
|
||||
{
|
||||
struct pgflbucket *pgb;
|
||||
struct pgflcache *pc;
|
||||
struct pccolor *pcc;
|
||||
struct pgflist *head;
|
||||
struct vm_page *pg;
|
||||
int count;
|
||||
|
||||
KASSERT(mutex_owned(&uvm_freelist_locks[b].lock));
|
||||
KASSERT(ucpu->pgflbucket == b);
|
||||
|
||||
/* If caching is off, then bail out. */
|
||||
if (__predict_false((pc = ucpu->pgflcache[fl]) == NULL)) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* Fill only to the limit. */
|
||||
pcc = &pc->color[c];
|
||||
pgb = uvm.page_free[fl].pgfl_buckets[b];
|
||||
head = &pgb->pgb_colors[c];
|
||||
if (pcc->count >= FILLPGS) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* Pull pages from the bucket until it's empty, or we are full. */
|
||||
count = pcc->count;
|
||||
pg = LIST_FIRST(head);
|
||||
while (__predict_true(pg != NULL && count < FILLPGS)) {
|
||||
KASSERT(pg->flags & PG_FREE);
|
||||
KASSERT(uvm_page_get_bucket(pg) == b);
|
||||
pcc->pages[count++] = pg;
|
||||
pg = LIST_NEXT(pg, pageq.list);
|
||||
}
|
||||
|
||||
/* Violate LIST abstraction to remove all pages at once. */
|
||||
head->lh_first = pg;
|
||||
if (__predict_true(pg != NULL)) {
|
||||
pg->pageq.list.le_prev = &head->lh_first;
|
||||
}
|
||||
pgb->pgb_nfree -= (count - pcc->count);
|
||||
pcc->count = count;
|
||||
}
|
||||
|
||||
/*
|
||||
* uvm_pgflcache_spill: spill specified freelist/color to global list
|
||||
*
|
||||
* => must be called at IPL_VM
|
||||
* => mark __noinline so we don't pull it into uvm_pgflcache_free()
|
||||
*/
|
||||
|
||||
static void __noinline
|
||||
uvm_pgflcache_spill(struct uvm_cpu *ucpu, int fl, int c)
|
||||
{
|
||||
struct pgflbucket *pgb;
|
||||
struct pgfreelist *pgfl;
|
||||
struct pgflcache *pc;
|
||||
struct pccolor *pcc;
|
||||
struct pgflist *head;
|
||||
kmutex_t *lock;
|
||||
int b, adj;
|
||||
|
||||
pc = ucpu->pgflcache[fl];
|
||||
pcc = &pc->color[c];
|
||||
pgfl = &uvm.page_free[fl];
|
||||
b = ucpu->pgflbucket;
|
||||
pgb = pgfl->pgfl_buckets[b];
|
||||
head = &pgb->pgb_colors[c];
|
||||
lock = &uvm_freelist_locks[b].lock;
|
||||
|
||||
mutex_spin_enter(lock);
|
||||
for (adj = pcc->count; pcc->count != 0;) {
|
||||
pcc->count--;
|
||||
KASSERT(pcc->pages[pcc->count] != NULL);
|
||||
KASSERT(pcc->pages[pcc->count]->flags & PG_FREE);
|
||||
LIST_INSERT_HEAD(head, pcc->pages[pcc->count], pageq.list);
|
||||
}
|
||||
pgb->pgb_nfree += adj;
|
||||
mutex_spin_exit(lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* uvm_pgflcache_alloc: try to allocate a cached page.
|
||||
*
|
||||
* => must be called at IPL_VM
|
||||
* => allocate only from the given freelist and given page color
|
||||
*/
|
||||
|
||||
struct vm_page *
|
||||
uvm_pgflcache_alloc(struct uvm_cpu *ucpu, int fl, int c)
|
||||
{
|
||||
struct pgflcache *pc;
|
||||
struct pccolor *pcc;
|
||||
struct vm_page *pg;
|
||||
|
||||
/* If caching is off, then bail out. */
|
||||
if (__predict_false((pc = ucpu->pgflcache[fl]) == NULL)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Very simple: if we have a page then return it. */
|
||||
pcc = &pc->color[c];
|
||||
if (__predict_false(pcc->count == 0)) {
|
||||
return NULL;
|
||||
}
|
||||
pg = pcc->pages[--(pcc->count)];
|
||||
KASSERT(pg != NULL);
|
||||
KASSERT(pg->flags & PG_FREE);
|
||||
KASSERT(uvm_page_get_freelist(pg) == fl);
|
||||
KASSERT(uvm_page_get_bucket(pg) == ucpu->pgflbucket);
|
||||
pg->flags &= PG_ZERO;
|
||||
return pg;
|
||||
}
|
||||
|
||||
/*
|
||||
* uvm_pgflcache_free: cache a page, if possible.
|
||||
*
|
||||
* => must be called at IPL_VM
|
||||
* => must only send pages for the correct bucket for this CPU
|
||||
*/
|
||||
|
||||
bool
|
||||
uvm_pgflcache_free(struct uvm_cpu *ucpu, struct vm_page *pg)
|
||||
{
|
||||
struct pgflcache *pc;
|
||||
struct pccolor *pcc;
|
||||
int fl, c;
|
||||
|
||||
KASSERT(uvm_page_get_bucket(pg) == ucpu->pgflbucket);
|
||||
|
||||
/* If caching is off, then bail out. */
|
||||
fl = uvm_page_get_freelist(pg);
|
||||
if (__predict_false((pc = ucpu->pgflcache[fl]) == NULL)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/* If the array is full spill it first, then add page to array. */
|
||||
c = VM_PGCOLOR(pg);
|
||||
pcc = &pc->color[c];
|
||||
KASSERT((pg->flags & PG_FREE) == 0);
|
||||
if (__predict_false(pcc->count == MAXPGS)) {
|
||||
uvm_pgflcache_spill(ucpu, fl, c);
|
||||
}
|
||||
pg->flags = (pg->flags & PG_ZERO) | PG_FREE;
|
||||
pcc->pages[pcc->count] = pg;
|
||||
pcc->count++;
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* uvm_pgflcache_init: allocate and initialize per-CPU data structures for
|
||||
* the free page cache. Don't set anything in motion - that's taken care
|
||||
* of by uvm_pgflcache_resume().
|
||||
*/
|
||||
|
||||
static void
|
||||
uvm_pgflcache_init_cpu(struct cpu_info *ci)
|
||||
{
|
||||
struct uvm_cpu *ucpu;
|
||||
size_t sz;
|
||||
|
||||
ucpu = ci->ci_data.cpu_uvm;
|
||||
KASSERT(ucpu->pgflcachemem == NULL);
|
||||
KASSERT(ucpu->pgflcache[0] == NULL);
|
||||
|
||||
sz = offsetof(struct pgflcache, color[uvmexp.ncolors]);
|
||||
ucpu->pgflcachememsz =
|
||||
(roundup2(sz * VM_NFREELIST, coherency_unit) + coherency_unit - 1);
|
||||
ucpu->pgflcachemem = kmem_zalloc(ucpu->pgflcachememsz, KM_SLEEP);
|
||||
}
|
||||
|
||||
/*
|
||||
* uvm_pgflcache_fini_cpu: dump all cached pages back to global free list
|
||||
* and shut down caching on the CPU. Called on each CPU in the system via
|
||||
* xcall.
|
||||
*/
|
||||
|
||||
static void
|
||||
uvm_pgflcache_fini_cpu(void *arg1 __unused, void *arg2 __unused)
|
||||
{
|
||||
struct uvm_cpu *ucpu;
|
||||
int fl, color, s;
|
||||
|
||||
ucpu = curcpu()->ci_data.cpu_uvm;
|
||||
for (fl = 0; fl < VM_NFREELIST; fl++) {
|
||||
s = splvm();
|
||||
for (color = 0; color < uvmexp.ncolors; color++) {
|
||||
uvm_pgflcache_spill(ucpu, fl, color);
|
||||
}
|
||||
ucpu->pgflcache[fl] = NULL;
|
||||
splx(s);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* uvm_pgflcache_pause: pause operation of the caches
|
||||
*/
|
||||
|
||||
void
|
||||
uvm_pgflcache_pause(void)
|
||||
{
|
||||
uint64_t where;
|
||||
|
||||
/* First one in starts draining. Everyone else waits. */
|
||||
mutex_enter(&uvm_pgflcache_lock);
|
||||
if (uvm_pgflcache_sem++ == 0) {
|
||||
uvm_pgflcache_draining = true;
|
||||
mutex_exit(&uvm_pgflcache_lock);
|
||||
where = xc_broadcast(0, uvm_pgflcache_fini_cpu, NULL, NULL);
|
||||
xc_wait(where);
|
||||
mutex_enter(&uvm_pgflcache_lock);
|
||||
uvm_pgflcache_draining = false;
|
||||
cv_broadcast(&uvm_pgflcache_cv);
|
||||
} else {
|
||||
while (uvm_pgflcache_draining) {
|
||||
cv_wait(&uvm_pgflcache_cv, &uvm_pgflcache_lock);
|
||||
}
|
||||
}
|
||||
mutex_exit(&uvm_pgflcache_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* uvm_pgflcache_resume: resume operation of the caches
|
||||
*/
|
||||
|
||||
void
|
||||
uvm_pgflcache_resume(void)
|
||||
{
|
||||
CPU_INFO_ITERATOR cii;
|
||||
struct cpu_info *ci;
|
||||
struct uvm_cpu *ucpu;
|
||||
uintptr_t addr;
|
||||
size_t sz;
|
||||
int fl;
|
||||
|
||||
/* Last guy out takes care of business. */
|
||||
mutex_enter(&uvm_pgflcache_lock);
|
||||
KASSERT(!uvm_pgflcache_draining);
|
||||
KASSERT(uvm_pgflcache_sem > 0);
|
||||
if (uvm_pgflcache_sem-- > 1) {
|
||||
mutex_exit(&uvm_pgflcache_lock);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Make sure dependant data structure updates are remotely visible.
|
||||
* Essentially this functions as a global memory barrier.
|
||||
*/
|
||||
xc_barrier(XC_HIGHPRI);
|
||||
|
||||
/*
|
||||
* Then set all of the pointers in place on each CPU. As soon as
|
||||
* each pointer is set, caching is operational in that dimension.
|
||||
*/
|
||||
sz = offsetof(struct pgflcache, color[uvmexp.ncolors]);
|
||||
for (CPU_INFO_FOREACH(cii, ci)) {
|
||||
ucpu = ci->ci_data.cpu_uvm;
|
||||
addr = roundup2((uintptr_t)ucpu->pgflcachemem, coherency_unit);
|
||||
for (fl = 0; fl < VM_NFREELIST; fl++) {
|
||||
ucpu->pgflcache[fl] = (struct pgflcache *)addr;
|
||||
addr += sz;
|
||||
}
|
||||
}
|
||||
mutex_exit(&uvm_pgflcache_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* uvm_pgflcache_start: start operation of the cache.
|
||||
*
|
||||
* => called once only, when init(8) is about to be started
|
||||
*/
|
||||
|
||||
void
|
||||
uvm_pgflcache_start(void)
|
||||
{
|
||||
CPU_INFO_ITERATOR cii;
|
||||
struct cpu_info *ci;
|
||||
|
||||
KASSERT(uvm_pgflcache_sem > 0);
|
||||
|
||||
/*
|
||||
* There's not much point doing this if every CPU has its own
|
||||
* bucket (and that includes the uniprocessor case).
|
||||
*/
|
||||
if (ncpu == uvm.bucketcount) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* Create each CPU's buckets. */
|
||||
for (CPU_INFO_FOREACH(cii, ci)) {
|
||||
uvm_pgflcache_init_cpu(ci);
|
||||
}
|
||||
|
||||
/* Kick it into action. */
|
||||
uvm_pgflcache_resume();
|
||||
}
|
||||
|
||||
/*
|
||||
* uvm_pgflcache_init: set up data structures for the free page cache.
|
||||
*/
|
||||
|
||||
void
|
||||
uvm_pgflcache_init(void)
|
||||
{
|
||||
|
||||
uvm_pgflcache_sem = 1;
|
||||
mutex_init(&uvm_pgflcache_lock, MUTEX_DEFAULT, IPL_NONE);
|
||||
cv_init(&uvm_pgflcache_cv, "flcache");
|
||||
}
|
||||
|
||||
#else /* MULTIPROCESSOR */
|
||||
|
||||
struct vm_page *
|
||||
uvm_pgflcache_alloc(struct uvm_cpu *ucpu, int fl, int c)
|
||||
{
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
bool
|
||||
uvm_pgflcache_free(struct uvm_cpu *ucpu, struct vm_page *pg)
|
||||
{
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void
|
||||
uvm_pgflcache_fill(struct uvm_cpu *ucpu, int fl, int b, int c)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
void
|
||||
uvm_pgflcache_pause(void)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
void
|
||||
uvm_pgflcache_resume(void)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
void
|
||||
uvm_pgflcache_start(void)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
void
|
||||
uvm_pgflcache_init(void)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
#endif /* MULTIPROCESSOR */
|
43
sys/uvm/uvm_pgflcache.h
Normal file
43
sys/uvm/uvm_pgflcache.h
Normal file
@ -0,0 +1,43 @@
|
||||
/* $NetBSD: uvm_pgflcache.h,v 1.1 2019/12/27 12:51:57 ad Exp $ */
|
||||
|
||||
/*-
|
||||
* Copyright (c) 2019 The NetBSD Foundation, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This code is derived from software contributed to The NetBSD Foundation
|
||||
* by Andrew Doran.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
|
||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
|
||||
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#if !defined(_UVM_PGFLCACHE_H_)
|
||||
#define _UVM_PGFLCACHE_H_
|
||||
|
||||
struct vm_page *uvm_pgflcache_alloc(struct uvm_cpu *, int, int);
|
||||
void uvm_pgflcache_fill(struct uvm_cpu *, int, int, int);
|
||||
bool uvm_pgflcache_free(struct uvm_cpu *, struct vm_page *);
|
||||
void uvm_pgflcache_init(void);
|
||||
void uvm_pgflcache_pause(void);
|
||||
void uvm_pgflcache_resume(void);
|
||||
void uvm_pgflcache_start(void);
|
||||
|
||||
#endif /* !_UVM_PGFLCACHE_H_ */
|
@ -1,12 +1,12 @@
|
||||
/* $NetBSD: uvm_pglist.c,v 1.77 2019/12/21 14:50:34 ad Exp $ */
|
||||
/* $NetBSD: uvm_pglist.c,v 1.78 2019/12/27 12:51:57 ad Exp $ */
|
||||
|
||||
/*-
|
||||
* Copyright (c) 1997 The NetBSD Foundation, Inc.
|
||||
* Copyright (c) 1997, 2019 The NetBSD Foundation, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This code is derived from software contributed to The NetBSD Foundation
|
||||
* by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
|
||||
* NASA Ames Research Center.
|
||||
* NASA Ames Research Center, and by Andrew Doran.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
@ -35,13 +35,14 @@
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__KERNEL_RCSID(0, "$NetBSD: uvm_pglist.c,v 1.77 2019/12/21 14:50:34 ad Exp $");
|
||||
__KERNEL_RCSID(0, "$NetBSD: uvm_pglist.c,v 1.78 2019/12/27 12:51:57 ad Exp $");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/systm.h>
|
||||
|
||||
#include <uvm/uvm.h>
|
||||
#include <uvm/uvm_pdpolicy.h>
|
||||
#include <uvm/uvm_pgflcache.h>
|
||||
|
||||
#ifdef VM_PAGE_ALLOC_MEMORY_STATS
|
||||
#define STAT_INCR(v) (v)++
|
||||
@ -79,34 +80,25 @@ u_long uvm_pglistalloc_npages;
|
||||
static void
|
||||
uvm_pglist_add(struct vm_page *pg, struct pglist *rlist)
|
||||
{
|
||||
int free_list __unused, color __unused, pgflidx;
|
||||
struct pgfreelist *pgfl;
|
||||
struct pgflbucket *pgb;
|
||||
|
||||
KASSERT(mutex_owned(&uvm_fpageqlock));
|
||||
pgfl = &uvm.page_free[uvm_page_get_freelist(pg)];
|
||||
pgb = pgfl->pgfl_buckets[uvm_page_get_bucket(pg)];
|
||||
|
||||
#if PGFL_NQUEUES != 2
|
||||
#error uvm_pglistalloc needs to be updated
|
||||
#endif
|
||||
|
||||
free_list = uvm_page_get_freelist(pg);
|
||||
color = VM_PGCOLOR(pg);
|
||||
pgflidx = (pg->flags & PG_ZERO) ? PGFL_ZEROS : PGFL_UNKNOWN;
|
||||
#ifdef UVMDEBUG
|
||||
struct vm_page *tp;
|
||||
LIST_FOREACH(tp,
|
||||
&uvm.page_free[free_list].pgfl_buckets[color].pgfl_queues[pgflidx],
|
||||
pageq.list) {
|
||||
LIST_FOREACH(tp, &pgb->pgb_colors[VM_PGCOLOR(pg)], pageq.list) {
|
||||
if (tp == pg)
|
||||
break;
|
||||
}
|
||||
if (tp == NULL)
|
||||
panic("uvm_pglistalloc: page not on freelist");
|
||||
#endif
|
||||
LIST_REMOVE(pg, pageq.list); /* global */
|
||||
LIST_REMOVE(pg, listq.list); /* cpu */
|
||||
uvmexp.free--;
|
||||
LIST_REMOVE(pg, pageq.list);
|
||||
pgb->pgb_nfree--;
|
||||
if (pg->flags & PG_ZERO)
|
||||
CPU_COUNT(CPU_COUNT_ZEROPAGES, -1);
|
||||
VM_FREE_PAGE_TO_CPU(pg)->pages[pgflidx]--;
|
||||
pg->flags = PG_CLEAN;
|
||||
pg->uobject = NULL;
|
||||
pg->uanon = NULL;
|
||||
@ -129,8 +121,6 @@ uvm_pglistalloc_c_ps(uvm_physseg_t psi, int num, paddr_t low, paddr_t high,
|
||||
printf("pgalloc: contig %d pgs from psi %zd\n", num, ps - vm_physmem);
|
||||
#endif
|
||||
|
||||
KASSERT(mutex_owned(&uvm_fpageqlock));
|
||||
|
||||
low = atop(low);
|
||||
high = atop(high);
|
||||
alignment = atop(alignment);
|
||||
@ -316,7 +306,7 @@ uvm_pglistalloc_contig(int num, paddr_t low, paddr_t high, paddr_t alignment,
|
||||
/*
|
||||
* Block all memory allocation and lock the free list.
|
||||
*/
|
||||
mutex_spin_enter(&uvm_fpageqlock);
|
||||
uvm_pgfl_lock();
|
||||
|
||||
/* Are there even any free pages? */
|
||||
if (uvm_free() <= (uvmexp.reserve_pagedaemon + uvmexp.reserve_kernel))
|
||||
@ -352,7 +342,7 @@ out:
|
||||
* the pagedaemon.
|
||||
*/
|
||||
|
||||
mutex_spin_exit(&uvm_fpageqlock);
|
||||
uvm_pgfl_unlock();
|
||||
uvm_kick_pdaemon();
|
||||
return (error);
|
||||
}
|
||||
@ -368,7 +358,6 @@ uvm_pglistalloc_s_ps(uvm_physseg_t psi, int num, paddr_t low, paddr_t high,
|
||||
printf("pgalloc: simple %d pgs from psi %zd\n", num, psi);
|
||||
#endif
|
||||
|
||||
KASSERT(mutex_owned(&uvm_fpageqlock));
|
||||
KASSERT(uvm_physseg_get_start(psi) <= uvm_physseg_get_avail_start(psi));
|
||||
KASSERT(uvm_physseg_get_start(psi) <= uvm_physseg_get_avail_end(psi));
|
||||
KASSERT(uvm_physseg_get_avail_start(psi) <= uvm_physseg_get_end(psi));
|
||||
@ -461,7 +450,7 @@ again:
|
||||
/*
|
||||
* Block all memory allocation and lock the free list.
|
||||
*/
|
||||
mutex_spin_enter(&uvm_fpageqlock);
|
||||
uvm_pgfl_lock();
|
||||
count++;
|
||||
|
||||
/* Are there even any free pages? */
|
||||
@ -493,7 +482,7 @@ out:
|
||||
* the pagedaemon.
|
||||
*/
|
||||
|
||||
mutex_spin_exit(&uvm_fpageqlock);
|
||||
uvm_pgfl_unlock();
|
||||
uvm_kick_pdaemon();
|
||||
|
||||
if (error) {
|
||||
@ -539,6 +528,12 @@ uvm_pglistalloc(psize_t size, paddr_t low, paddr_t high, paddr_t alignment,
|
||||
|
||||
TAILQ_INIT(rlist);
|
||||
|
||||
/*
|
||||
* Turn off the caching of free pages - we need everything to be on
|
||||
* the global freelists.
|
||||
*/
|
||||
uvm_pgflcache_pause();
|
||||
|
||||
if ((nsegs < size >> PAGE_SHIFT) || (alignment != PAGE_SIZE) ||
|
||||
(boundary != 0))
|
||||
res = uvm_pglistalloc_contig(num, low, high, alignment,
|
||||
@ -546,6 +541,8 @@ uvm_pglistalloc(psize_t size, paddr_t low, paddr_t high, paddr_t alignment,
|
||||
else
|
||||
res = uvm_pglistalloc_simple(num, low, high, rlist, waitok);
|
||||
|
||||
uvm_pgflcache_resume();
|
||||
|
||||
return (res);
|
||||
}
|
||||
|
||||
@ -558,45 +555,34 @@ uvm_pglistalloc(psize_t size, paddr_t low, paddr_t high, paddr_t alignment,
|
||||
void
|
||||
uvm_pglistfree(struct pglist *list)
|
||||
{
|
||||
struct uvm_cpu *ucpu;
|
||||
struct pgfreelist *pgfl;
|
||||
struct pgflbucket *pgb;
|
||||
struct vm_page *pg;
|
||||
int index, color, queue;
|
||||
bool iszero;
|
||||
int c, b;
|
||||
|
||||
/*
|
||||
* Lock the free list and free each page.
|
||||
*/
|
||||
|
||||
mutex_spin_enter(&uvm_fpageqlock);
|
||||
ucpu = curcpu()->ci_data.cpu_uvm;
|
||||
uvm_pgfl_lock();
|
||||
while ((pg = TAILQ_FIRST(list)) != NULL) {
|
||||
KASSERT(!uvmpdpol_pageisqueued_p(pg));
|
||||
TAILQ_REMOVE(list, pg, pageq.queue);
|
||||
iszero = (pg->flags & PG_ZERO);
|
||||
pg->flags = (pg->flags & PG_ZERO) | PG_FREE;
|
||||
#ifdef DEBUG
|
||||
pg->uobject = (void *)0xdeadbeef;
|
||||
pg->uanon = (void *)0xdeadbeef;
|
||||
#endif /* DEBUG */
|
||||
#ifdef DEBUG
|
||||
if (iszero)
|
||||
if (pg->flags & PG_ZERO)
|
||||
uvm_pagezerocheck(pg);
|
||||
#endif /* DEBUG */
|
||||
index = uvm_page_get_freelist(pg);
|
||||
color = VM_PGCOLOR(pg);
|
||||
queue = iszero ? PGFL_ZEROS : PGFL_UNKNOWN;
|
||||
pg->offset = (uintptr_t)ucpu;
|
||||
LIST_INSERT_HEAD(&uvm.page_free[index].pgfl_buckets[color].
|
||||
pgfl_queues[queue], pg, pageq.list);
|
||||
LIST_INSERT_HEAD(&ucpu->page_free[index].pgfl_buckets[color].
|
||||
pgfl_queues[queue], pg, listq.list);
|
||||
uvmexp.free++;
|
||||
if (iszero)
|
||||
c = VM_PGCOLOR(pg);
|
||||
b = uvm_page_get_bucket(pg);
|
||||
pgfl = &uvm.page_free[uvm_page_get_freelist(pg)];
|
||||
pgb = pgfl->pgfl_buckets[b];
|
||||
if (pg->flags & PG_ZERO)
|
||||
CPU_COUNT(CPU_COUNT_ZEROPAGES, 1);
|
||||
ucpu->pages[queue]++;
|
||||
pgb->pgb_nfree++;
|
||||
LIST_INSERT_HEAD(&pgb->pgb_colors[c], pg, pageq.list);
|
||||
STAT_DECR(uvm_pglistalloc_npages);
|
||||
}
|
||||
if (ucpu->pages[PGFL_ZEROS] < ucpu->pages[PGFL_UNKNOWN])
|
||||
ucpu->page_idle_zero = vm_page_zero_enable;
|
||||
mutex_spin_exit(&uvm_fpageqlock);
|
||||
uvm_pgfl_unlock();
|
||||
}
|
||||
|
@ -1,11 +1,11 @@
|
||||
/* $NetBSD: uvm_pglist.h,v 1.8 2010/11/06 15:48:00 uebayasi Exp $ */
|
||||
/* $NetBSD: uvm_pglist.h,v 1.9 2019/12/27 12:51:57 ad Exp $ */
|
||||
|
||||
/*-
|
||||
* Copyright (c) 2000, 2001, 2008 The NetBSD Foundation, Inc.
|
||||
* Copyright (c) 2000, 2001, 2008, 2019 The NetBSD Foundation, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This code is derived from software contributed to The NetBSD Foundation
|
||||
* by Jason R. Thorpe.
|
||||
* by Jason R. Thorpe, and by Andrew Doran.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
@ -41,19 +41,51 @@ TAILQ_HEAD(pglist, vm_page);
|
||||
LIST_HEAD(pgflist, vm_page);
|
||||
|
||||
/*
|
||||
* A page free list consists of free pages of unknown contents and free
|
||||
* pages of all zeros.
|
||||
* The global uvm.page_free list (uvm_page.c, uvm_pglist.c). Free pages are
|
||||
* stored according to freelist, bucket, and cache colour.
|
||||
*
|
||||
* pglist = &uvm.page_free[freelist].pgfl_buckets[bucket].pgb_color[color];
|
||||
*
|
||||
* Freelists provide a priority ordering of pages for allocation, based upon
|
||||
* how valuable they are for special uses (e.g. device driver DMA).
|
||||
*
|
||||
* Pages are then grouped in buckets according to some common factor, for
|
||||
* example L2/L3 cache locality. Each bucket has its own lock, and the
|
||||
* locks are shared among freelists for the same numbered buckets.
|
||||
*
|
||||
* Inside each bucket, pages are further distributed by cache color.
|
||||
*
|
||||
* We want these data structures to occupy as few cache lines as possible,
|
||||
* as they will be highly contended.
|
||||
*/
|
||||
#define PGFL_UNKNOWN 0
|
||||
#define PGFL_ZEROS 1
|
||||
#define PGFL_NQUEUES 2
|
||||
|
||||
struct pgflbucket {
|
||||
struct pgflist pgfl_queues[PGFL_NQUEUES];
|
||||
uintptr_t pgb_nfree; /* total # free pages, all colors */
|
||||
struct pgflist pgb_colors[1]; /* variable size array */
|
||||
};
|
||||
|
||||
/*
|
||||
* At the root, the freelists. MD code decides the number and structure of
|
||||
* these. They are always arranged in descending order of allocation
|
||||
* priority.
|
||||
*
|
||||
* 8 buckets should be enough to cover most all current x86 systems (2019),
|
||||
* given the way package/core/smt IDs are structured on x86. For systems
|
||||
* that report high package counts despite having a single physical CPU
|
||||
* package (e.g. Ampere eMAG) a little bit of sharing isn't going to hurt
|
||||
* in the least.
|
||||
*/
|
||||
#define PGFL_MAX_BUCKETS 8
|
||||
struct pgfreelist {
|
||||
struct pgflbucket *pgfl_buckets;
|
||||
struct pgflbucket *pgfl_buckets[PGFL_MAX_BUCKETS];
|
||||
};
|
||||
|
||||
/*
|
||||
* Lock for each bucket.
|
||||
*/
|
||||
union uvm_freelist_lock {
|
||||
kmutex_t lock;
|
||||
uint8_t padding[COHERENCY_UNIT];
|
||||
};
|
||||
extern union uvm_freelist_lock uvm_freelist_locks[PGFL_MAX_BUCKETS];
|
||||
|
||||
#endif /* _UVM_UVM_PGLIST_H_ */
|
||||
|
Loading…
Reference in New Issue
Block a user