NetBSD/sys/uvm/uvm_physseg.c
ad 5978ddc663 Break the global uvm_pageqlock into a per-page identity lock and a private
lock for use of the pagedaemon policy code.  Discussed on tech-kern.

PR kern/54209: NetBSD 8 large memory performance extremely low
PR kern/54210: NetBSD-8 processes presumably not exiting
PR kern/54727: writing a large file causes unreasonable system behaviour
2019-12-13 20:10:21 +00:00

1398 lines
34 KiB
C

/* $NetBSD: uvm_physseg.c,v 1.11 2019/12/13 20:10:22 ad Exp $ */
/*
* Copyright (c) 1997 Charles D. Cranor and Washington University.
* Copyright (c) 1991, 1993, The Regents of the University of California.
*
* All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* The Mach Operating System project at Carnegie-Mellon University.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)vm_page.h 7.3 (Berkeley) 4/21/91
* from: Id: uvm_page.h,v 1.1.2.6 1998/02/04 02:31:42 chuck Exp
*
*
* Copyright (c) 1987, 1990 Carnegie-Mellon University.
* All rights reserved.
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/*
* Consolidated API from uvm_page.c and others.
* Consolidated and designed by Cherry G. Mathew <cherry@zyx.in>
* rbtree(3) backing implementation by:
* Santhosh N. Raju <santhosh.raju@gmail.com>
*/
#ifdef _KERNEL_OPT
#include "opt_uvm.h"
#endif
#include <sys/param.h>
#include <sys/types.h>
#include <sys/extent.h>
#include <sys/kmem.h>
#include <uvm/uvm.h>
#include <uvm/uvm_page.h>
#include <uvm/uvm_param.h>
#include <uvm/uvm_pdpolicy.h>
#include <uvm/uvm_physseg.h>
/*
* uvm_physseg: describes one segment of physical memory
*/
struct uvm_physseg {
struct rb_node rb_node; /* tree information */
paddr_t start; /* PF# of first page in segment */
paddr_t end; /* (PF# of last page in segment) + 1 */
paddr_t avail_start; /* PF# of first free page in segment */
paddr_t avail_end; /* (PF# of last free page in segment) +1 */
struct vm_page *pgs; /* vm_page structures (from start) */
struct extent *ext; /* extent(9) structure to manage pgs[] */
int free_list; /* which free list they belong on */
u_int start_hint; /* start looking for free pages here */
/* protected by uvm_fpageqlock */
#ifdef __HAVE_PMAP_PHYSSEG
struct pmap_physseg pmseg; /* pmap specific (MD) data */
#endif
};
/*
* These functions are reserved for uvm(9) internal use and are not
* exported in the header file uvm_physseg.h
*
* Thus they are redefined here.
*/
void uvm_physseg_init_seg(uvm_physseg_t, struct vm_page *);
void uvm_physseg_seg_chomp_slab(uvm_physseg_t, struct vm_page *, size_t);
/* returns a pgs array */
struct vm_page *uvm_physseg_seg_alloc_from_slab(uvm_physseg_t, size_t);
#if defined(UVM_HOTPLUG) /* rbtree impementation */
#define HANDLE_TO_PHYSSEG_NODE(h) ((struct uvm_physseg *)(h))
#define PHYSSEG_NODE_TO_HANDLE(u) ((uvm_physseg_t)(u))
struct uvm_physseg_graph {
struct rb_tree rb_tree; /* Tree for entries */
int nentries; /* Number of entries */
};
static struct uvm_physseg_graph uvm_physseg_graph;
/*
* Note on kmem(9) allocator usage:
* We take the conservative approach that plug/unplug are allowed to
* fail in high memory stress situations.
*
* We want to avoid re-entrant situations in which one plug/unplug
* operation is waiting on a previous one to complete, since this
* makes the design more complicated than necessary.
*
* We may review this and change its behaviour, once the use cases
* become more obvious.
*/
/*
* Special alloc()/free() functions for boot time support:
* We assume that alloc() at boot time is only for new 'vm_physseg's
* This allows us to use a static array for memory allocation at boot
* time. Thus we avoid using kmem(9) which is not ready at this point
* in boot.
*
* After kmem(9) is ready, we use it. We currently discard any free()s
* to this static array, since the size is small enough to be a
* trivial waste on all architectures we run on.
*/
static size_t nseg = 0;
static struct uvm_physseg uvm_physseg[VM_PHYSSEG_MAX];
static void *
uvm_physseg_alloc(size_t sz)
{
/*
* During boot time, we only support allocating vm_physseg
* entries from the static array.
* We need to assert for this.
*/
if (__predict_false(uvm.page_init_done == false)) {
if (sz % sizeof(struct uvm_physseg))
panic("%s: tried to alloc size other than multiple"
" of struct uvm_physseg at boot\n", __func__);
size_t n = sz / sizeof(struct uvm_physseg);
nseg += n;
KASSERT(nseg > 0 && nseg <= VM_PHYSSEG_MAX);
return &uvm_physseg[nseg - n];
}
return kmem_zalloc(sz, KM_NOSLEEP);
}
static void
uvm_physseg_free(void *p, size_t sz)
{
/*
* This is a bit tricky. We do allow simulation of free()
* during boot (for eg: when MD code is "steal"ing memory,
* and the segment has been exhausted (and thus needs to be
* free() - ed.
* free() also complicates things because we leak the
* free(). Therefore calling code can't assume that free()-ed
* memory is available for alloc() again, at boot time.
*
* Thus we can't explicitly disallow free()s during
* boot time. However, the same restriction for alloc()
* applies to free(). We only allow uvm_physseg related free()s
* via this function during boot time.
*/
if (__predict_false(uvm.page_init_done == false)) {
if (sz % sizeof(struct uvm_physseg))
panic("%s: tried to free size other than struct uvm_physseg"
" at boot\n", __func__);
}
/*
* Could have been in a single if(){} block - split for
* clarity
*/
if ((struct uvm_physseg *)p >= uvm_physseg &&
(struct uvm_physseg *)p < (uvm_physseg + VM_PHYSSEG_MAX)) {
if (sz % sizeof(struct uvm_physseg))
panic("%s: tried to free() other than struct uvm_physseg"
" from static array\n", __func__);
if ((sz / sizeof(struct uvm_physseg)) >= VM_PHYSSEG_MAX)
panic("%s: tried to free() the entire static array!", __func__);
return; /* Nothing to free */
}
kmem_free(p, sz);
}
/* XXX: Multi page size */
bool
uvm_physseg_plug(paddr_t pfn, size_t pages, uvm_physseg_t *psp)
{
int preload;
size_t slabpages;
struct uvm_physseg *ps, *current_ps = NULL;
struct vm_page *slab = NULL, *pgs = NULL;
#ifdef DEBUG
paddr_t off;
uvm_physseg_t upm;
upm = uvm_physseg_find(pfn, &off);
ps = HANDLE_TO_PHYSSEG_NODE(upm);
if (ps != NULL) /* XXX; do we allow "update" plugs ? */
return false;
#endif
/*
* do we have room?
*/
ps = uvm_physseg_alloc(sizeof (struct uvm_physseg));
if (ps == NULL) {
printf("uvm_page_physload: unable to load physical memory "
"segment\n");
printf("\t%d segments allocated, ignoring 0x%"PRIxPADDR" -> 0x%"PRIxPADDR"\n",
VM_PHYSSEG_MAX, pfn, pfn + pages + 1);
printf("\tincrease VM_PHYSSEG_MAX\n");
return false;
}
/* span init */
ps->start = pfn;
ps->end = pfn + pages;
/*
* XXX: Ugly hack because uvmexp.npages accounts for only
* those pages in the segment included below as well - this
* should be legacy and removed.
*/
ps->avail_start = ps->start;
ps->avail_end = ps->end;
/*
* check to see if this is a "preload" (i.e. uvm_page_init hasn't been
* called yet, so kmem is not available).
*/
preload = 1; /* We are going to assume it is a preload */
RB_TREE_FOREACH(current_ps, &(uvm_physseg_graph.rb_tree)) {
/* If there are non NULL pages then we are not in a preload */
if (current_ps->pgs != NULL) {
preload = 0;
/* Try to scavenge from earlier unplug()s. */
pgs = uvm_physseg_seg_alloc_from_slab(current_ps, pages);
if (pgs != NULL) {
break;
}
}
}
/*
* if VM is already running, attempt to kmem_alloc vm_page structures
*/
if (!preload) {
if (pgs == NULL) { /* Brand new */
/* Iteratively try alloc down from uvmexp.npages */
for (slabpages = (size_t) uvmexp.npages; slabpages >= pages; slabpages--) {
slab = kmem_zalloc(sizeof *pgs * (long unsigned int)slabpages, KM_NOSLEEP);
if (slab != NULL)
break;
}
if (slab == NULL) {
uvm_physseg_free(ps, sizeof(struct uvm_physseg));
return false;
}
uvm_physseg_seg_chomp_slab(ps, slab, (size_t) slabpages);
/* We allocate enough for this plug */
pgs = uvm_physseg_seg_alloc_from_slab(ps, pages);
if (pgs == NULL) {
printf("unable to uvm_physseg_seg_alloc_from_slab() from backend\n");
return false;
}
} else {
/* Reuse scavenged extent */
ps->ext = current_ps->ext;
}
physmem += pages;
uvmpdpol_reinit();
} else { /* Boot time - see uvm_page.c:uvm_page_init() */
pgs = NULL;
ps->pgs = pgs;
}
/*
* now insert us in the proper place in uvm_physseg_graph.rb_tree
*/
current_ps = rb_tree_insert_node(&(uvm_physseg_graph.rb_tree), ps);
if (current_ps != ps) {
panic("uvm_page_physload: Duplicate address range detected!");
}
uvm_physseg_graph.nentries++;
/*
* uvm_pagefree() requires the PHYS_TO_VM_PAGE(pgs[i]) on the
* newly allocated pgs[] to return the correct value. This is
* a bit of a chicken and egg problem, since it needs
* uvm_physseg_find() to succeed. For this, the node needs to
* be inserted *before* uvm_physseg_init_seg() happens.
*
* During boot, this happens anyway, since
* uvm_physseg_init_seg() is called later on and separately
* from uvm_page.c:uvm_page_init().
* In the case of hotplug we need to ensure this.
*/
if (__predict_true(!preload))
uvm_physseg_init_seg(ps, pgs);
if (psp != NULL)
*psp = ps;
return true;
}
static int
uvm_physseg_compare_nodes(void *ctx, const void *nnode1, const void *nnode2)
{
const struct uvm_physseg *enode1 = nnode1;
const struct uvm_physseg *enode2 = nnode2;
KASSERT(enode1->start < enode2->start || enode1->start >= enode2->end);
KASSERT(enode2->start < enode1->start || enode2->start >= enode1->end);
if (enode1->start < enode2->start)
return -1;
if (enode1->start >= enode2->end)
return 1;
return 0;
}
static int
uvm_physseg_compare_key(void *ctx, const void *nnode, const void *pkey)
{
const struct uvm_physseg *enode = nnode;
const paddr_t pa = *(const paddr_t *) pkey;
if(enode->start <= pa && pa < enode->end)
return 0;
if (enode->start < pa)
return -1;
if (enode->end > pa)
return 1;
return 0;
}
static const rb_tree_ops_t uvm_physseg_tree_ops = {
.rbto_compare_nodes = uvm_physseg_compare_nodes,
.rbto_compare_key = uvm_physseg_compare_key,
.rbto_node_offset = offsetof(struct uvm_physseg, rb_node),
.rbto_context = NULL
};
/*
* uvm_physseg_init: init the physmem
*
* => physmem unit should not be in use at this point
*/
void
uvm_physseg_init(void)
{
rb_tree_init(&(uvm_physseg_graph.rb_tree), &uvm_physseg_tree_ops);
uvm_physseg_graph.nentries = 0;
}
uvm_physseg_t
uvm_physseg_get_next(uvm_physseg_t upm)
{
/* next of invalid is invalid, not fatal */
if (uvm_physseg_valid_p(upm) == false)
return UVM_PHYSSEG_TYPE_INVALID;
return (uvm_physseg_t) rb_tree_iterate(&(uvm_physseg_graph.rb_tree), upm,
RB_DIR_RIGHT);
}
uvm_physseg_t
uvm_physseg_get_prev(uvm_physseg_t upm)
{
/* prev of invalid is invalid, not fatal */
if (uvm_physseg_valid_p(upm) == false)
return UVM_PHYSSEG_TYPE_INVALID;
return (uvm_physseg_t) rb_tree_iterate(&(uvm_physseg_graph.rb_tree), upm,
RB_DIR_LEFT);
}
uvm_physseg_t
uvm_physseg_get_last(void)
{
return (uvm_physseg_t) RB_TREE_MAX(&(uvm_physseg_graph.rb_tree));
}
uvm_physseg_t
uvm_physseg_get_first(void)
{
return (uvm_physseg_t) RB_TREE_MIN(&(uvm_physseg_graph.rb_tree));
}
paddr_t
uvm_physseg_get_highest_frame(void)
{
struct uvm_physseg *ps =
(uvm_physseg_t) RB_TREE_MAX(&(uvm_physseg_graph.rb_tree));
return ps->end - 1;
}
/*
* uvm_page_physunload: unload physical memory and return it to
* caller.
*/
bool
uvm_page_physunload(uvm_physseg_t upm, int freelist, paddr_t *paddrp)
{
struct uvm_physseg *seg;
if (__predict_true(uvm.page_init_done == true))
panic("%s: unload attempted after uvm_page_init()\n", __func__);
seg = HANDLE_TO_PHYSSEG_NODE(upm);
if (seg->free_list != freelist) {
return false;
}
/*
* During cold boot, what we're about to unplug hasn't been
* put on the uvm freelist, nor has uvmexp.npages been
* updated. (This happens in uvm_page.c:uvm_page_init())
*
* For hotplug, we assume here that the pages being unloaded
* here are completely out of sight of uvm (ie; not on any uvm
* lists), and that uvmexp.npages has been suitably
* decremented before we're called.
*
* XXX: will avail_end == start if avail_start < avail_end?
*/
/* try from front */
if (seg->avail_start == seg->start &&
seg->avail_start < seg->avail_end) {
*paddrp = ctob(seg->avail_start);
return uvm_physseg_unplug(seg->avail_start, 1);
}
/* try from rear */
if (seg->avail_end == seg->end &&
seg->avail_start < seg->avail_end) {
*paddrp = ctob(seg->avail_end - 1);
return uvm_physseg_unplug(seg->avail_end - 1, 1);
}
return false;
}
bool
uvm_page_physunload_force(uvm_physseg_t upm, int freelist, paddr_t *paddrp)
{
struct uvm_physseg *seg;
seg = HANDLE_TO_PHYSSEG_NODE(upm);
if (__predict_true(uvm.page_init_done == true))
panic("%s: unload attempted after uvm_page_init()\n", __func__);
/* any room in this bank? */
if (seg->avail_start >= seg->avail_end) {
return false; /* nope */
}
*paddrp = ctob(seg->avail_start);
/* Always unplug from front */
return uvm_physseg_unplug(seg->avail_start, 1);
}
/*
* vm_physseg_find: find vm_physseg structure that belongs to a PA
*/
uvm_physseg_t
uvm_physseg_find(paddr_t pframe, psize_t *offp)
{
struct uvm_physseg * ps = NULL;
ps = rb_tree_find_node(&(uvm_physseg_graph.rb_tree), &pframe);
if(ps != NULL && offp != NULL)
*offp = pframe - ps->start;
return ps;
}
#else /* UVM_HOTPLUG */
/*
* physical memory config is stored in vm_physmem.
*/
#define VM_PHYSMEM_PTR(i) (&vm_physmem[i])
#if VM_PHYSSEG_MAX == 1
#define VM_PHYSMEM_PTR_SWAP(i, j) /* impossible */
#else
#define VM_PHYSMEM_PTR_SWAP(i, j) \
do { vm_physmem[(i)] = vm_physmem[(j)]; } while (0)
#endif
#define HANDLE_TO_PHYSSEG_NODE(h) (VM_PHYSMEM_PTR((int)h))
#define PHYSSEG_NODE_TO_HANDLE(u) ((int)((vsize_t) (u - vm_physmem) / sizeof(struct uvm_physseg)))
static struct uvm_physseg vm_physmem[VM_PHYSSEG_MAX]; /* XXXCDC: uvm.physmem */
static int vm_nphysseg = 0; /* XXXCDC: uvm.nphysseg */
#define vm_nphysmem vm_nphysseg
void
uvm_physseg_init(void)
{
/* XXX: Provisioning for rb_tree related init(s) */
return;
}
int
uvm_physseg_get_next(uvm_physseg_t lcv)
{
/* next of invalid is invalid, not fatal */
if (uvm_physseg_valid_p(lcv) == false)
return UVM_PHYSSEG_TYPE_INVALID;
return (lcv + 1);
}
int
uvm_physseg_get_prev(uvm_physseg_t lcv)
{
/* prev of invalid is invalid, not fatal */
if (uvm_physseg_valid_p(lcv) == false)
return UVM_PHYSSEG_TYPE_INVALID;
return (lcv - 1);
}
int
uvm_physseg_get_last(void)
{
return (vm_nphysseg - 1);
}
int
uvm_physseg_get_first(void)
{
return 0;
}
paddr_t
uvm_physseg_get_highest_frame(void)
{
int lcv;
paddr_t last = 0;
struct uvm_physseg *ps;
for (lcv = 0; lcv < vm_nphysseg; lcv++) {
ps = VM_PHYSMEM_PTR(lcv);
if (last < ps->end)
last = ps->end;
}
return last;
}
static struct vm_page *
uvm_post_preload_check(void)
{
int preload, lcv;
/*
* check to see if this is a "preload" (i.e. uvm_page_init hasn't been
* called yet, so kmem is not available).
*/
for (lcv = 0 ; lcv < vm_nphysmem ; lcv++) {
if (VM_PHYSMEM_PTR(lcv)->pgs)
break;
}
preload = (lcv == vm_nphysmem);
/*
* if VM is already running, attempt to kmem_alloc vm_page structures
*/
if (!preload) {
panic("Tried to add RAM after uvm_page_init");
}
return NULL;
}
/*
* uvm_page_physunload: unload physical memory and return it to
* caller.
*/
bool
uvm_page_physunload(uvm_physseg_t psi, int freelist, paddr_t *paddrp)
{
int x;
struct uvm_physseg *seg;
uvm_post_preload_check();
seg = VM_PHYSMEM_PTR(psi);
if (seg->free_list != freelist) {
return false;
}
/* try from front */
if (seg->avail_start == seg->start &&
seg->avail_start < seg->avail_end) {
*paddrp = ctob(seg->avail_start);
seg->avail_start++;
seg->start++;
/* nothing left? nuke it */
if (seg->avail_start == seg->end) {
if (vm_nphysmem == 1)
panic("uvm_page_physget: out of memory!");
vm_nphysmem--;
for (x = psi ; x < vm_nphysmem ; x++)
/* structure copy */
VM_PHYSMEM_PTR_SWAP(x, x + 1);
}
return (true);
}
/* try from rear */
if (seg->avail_end == seg->end &&
seg->avail_start < seg->avail_end) {
*paddrp = ctob(seg->avail_end - 1);
seg->avail_end--;
seg->end--;
/* nothing left? nuke it */
if (seg->avail_end == seg->start) {
if (vm_nphysmem == 1)
panic("uvm_page_physget: out of memory!");
vm_nphysmem--;
for (x = psi ; x < vm_nphysmem ; x++)
/* structure copy */
VM_PHYSMEM_PTR_SWAP(x, x + 1);
}
return (true);
}
return false;
}
bool
uvm_page_physunload_force(uvm_physseg_t psi, int freelist, paddr_t *paddrp)
{
int x;
struct uvm_physseg *seg;
uvm_post_preload_check();
seg = VM_PHYSMEM_PTR(psi);
/* any room in this bank? */
if (seg->avail_start >= seg->avail_end) {
return false; /* nope */
}
*paddrp = ctob(seg->avail_start);
seg->avail_start++;
/* truncate! */
seg->start = seg->avail_start;
/* nothing left? nuke it */
if (seg->avail_start == seg->end) {
if (vm_nphysmem == 1)
panic("uvm_page_physget: out of memory!");
vm_nphysmem--;
for (x = psi ; x < vm_nphysmem ; x++)
/* structure copy */
VM_PHYSMEM_PTR_SWAP(x, x + 1);
}
return (true);
}
bool
uvm_physseg_plug(paddr_t pfn, size_t pages, uvm_physseg_t *psp)
{
int lcv;
struct vm_page *pgs;
struct uvm_physseg *ps;
#ifdef DEBUG
paddr_t off;
uvm_physseg_t upm;
upm = uvm_physseg_find(pfn, &off);
if (uvm_physseg_valid_p(upm)) /* XXX; do we allow "update" plugs ? */
return false;
#endif
paddr_t start = pfn;
paddr_t end = pfn + pages;
paddr_t avail_start = start;
paddr_t avail_end = end;
if (uvmexp.pagesize == 0)
panic("uvm_page_physload: page size not set!");
/*
* do we have room?
*/
if (vm_nphysmem == VM_PHYSSEG_MAX) {
printf("uvm_page_physload: unable to load physical memory "
"segment\n");
printf("\t%d segments allocated, ignoring 0x%llx -> 0x%llx\n",
VM_PHYSSEG_MAX, (long long)start, (long long)end);
printf("\tincrease VM_PHYSSEG_MAX\n");
if (psp != NULL)
*psp = UVM_PHYSSEG_TYPE_INVALID_OVERFLOW;
return false;
}
/*
* check to see if this is a "preload" (i.e. uvm_page_init hasn't been
* called yet, so kmem is not available).
*/
pgs = uvm_post_preload_check();
/*
* now insert us in the proper place in vm_physmem[]
*/
#if (VM_PHYSSEG_STRAT == VM_PSTRAT_RANDOM)
/* random: put it at the end (easy!) */
ps = VM_PHYSMEM_PTR(vm_nphysmem);
lcv = vm_nphysmem;
#elif (VM_PHYSSEG_STRAT == VM_PSTRAT_BSEARCH)
{
int x;
/* sort by address for binary search */
for (lcv = 0 ; lcv < vm_nphysmem ; lcv++)
if (start < VM_PHYSMEM_PTR(lcv)->start)
break;
ps = VM_PHYSMEM_PTR(lcv);
/* move back other entries, if necessary ... */
for (x = vm_nphysmem ; x > lcv ; x--)
/* structure copy */
VM_PHYSMEM_PTR_SWAP(x, x - 1);
}
#elif (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST)
{
int x;
/* sort by largest segment first */
for (lcv = 0 ; lcv < vm_nphysmem ; lcv++)
if ((end - start) >
(VM_PHYSMEM_PTR(lcv)->end - VM_PHYSMEM_PTR(lcv)->start))
break;
ps = VM_PHYSMEM_PTR(lcv);
/* move back other entries, if necessary ... */
for (x = vm_nphysmem ; x > lcv ; x--)
/* structure copy */
VM_PHYSMEM_PTR_SWAP(x, x - 1);
}
#else
panic("uvm_page_physload: unknown physseg strategy selected!");
#endif
ps->start = start;
ps->end = end;
ps->avail_start = avail_start;
ps->avail_end = avail_end;
ps->pgs = pgs;
vm_nphysmem++;
if (psp != NULL)
*psp = lcv;
return true;
}
/*
* when VM_PHYSSEG_MAX is 1, we can simplify these functions
*/
#if VM_PHYSSEG_MAX == 1
static inline int vm_physseg_find_contig(struct uvm_physseg *, int, paddr_t, psize_t *);
#elif (VM_PHYSSEG_STRAT == VM_PSTRAT_BSEARCH)
static inline int vm_physseg_find_bsearch(struct uvm_physseg *, int, paddr_t, psize_t *);
#else
static inline int vm_physseg_find_linear(struct uvm_physseg *, int, paddr_t, psize_t *);
#endif
/*
* vm_physseg_find: find vm_physseg structure that belongs to a PA
*/
int
uvm_physseg_find(paddr_t pframe, psize_t *offp)
{
#if VM_PHYSSEG_MAX == 1
return vm_physseg_find_contig(vm_physmem, vm_nphysseg, pframe, offp);
#elif (VM_PHYSSEG_STRAT == VM_PSTRAT_BSEARCH)
return vm_physseg_find_bsearch(vm_physmem, vm_nphysseg, pframe, offp);
#else
return vm_physseg_find_linear(vm_physmem, vm_nphysseg, pframe, offp);
#endif
}
#if VM_PHYSSEG_MAX == 1
static inline int
vm_physseg_find_contig(struct uvm_physseg *segs, int nsegs, paddr_t pframe, psize_t *offp)
{
/* 'contig' case */
if (pframe >= segs[0].start && pframe < segs[0].end) {
if (offp)
*offp = pframe - segs[0].start;
return(0);
}
return(-1);
}
#elif (VM_PHYSSEG_STRAT == VM_PSTRAT_BSEARCH)
static inline int
vm_physseg_find_bsearch(struct uvm_physseg *segs, int nsegs, paddr_t pframe, psize_t *offp)
{
/* binary search for it */
int start, len, guess;
/*
* if try is too large (thus target is less than try) we reduce
* the length to trunc(len/2) [i.e. everything smaller than "try"]
*
* if the try is too small (thus target is greater than try) then
* we set the new start to be (try + 1). this means we need to
* reduce the length to (round(len/2) - 1).
*
* note "adjust" below which takes advantage of the fact that
* (round(len/2) - 1) == trunc((len - 1) / 2)
* for any value of len we may have
*/
for (start = 0, len = nsegs ; len != 0 ; len = len / 2) {
guess = start + (len / 2); /* try in the middle */
/* start past our try? */
if (pframe >= segs[guess].start) {
/* was try correct? */
if (pframe < segs[guess].end) {
if (offp)
*offp = pframe - segs[guess].start;
return guess; /* got it */
}
start = guess + 1; /* next time, start here */
len--; /* "adjust" */
} else {
/*
* pframe before try, just reduce length of
* region, done in "for" loop
*/
}
}
return(-1);
}
#else
static inline int
vm_physseg_find_linear(struct uvm_physseg *segs, int nsegs, paddr_t pframe, psize_t *offp)
{
/* linear search for it */
int lcv;
for (lcv = 0; lcv < nsegs; lcv++) {
if (pframe >= segs[lcv].start &&
pframe < segs[lcv].end) {
if (offp)
*offp = pframe - segs[lcv].start;
return(lcv); /* got it */
}
}
return(-1);
}
#endif
#endif /* UVM_HOTPLUG */
bool
uvm_physseg_valid_p(uvm_physseg_t upm)
{
struct uvm_physseg *ps;
if (upm == UVM_PHYSSEG_TYPE_INVALID ||
upm == UVM_PHYSSEG_TYPE_INVALID_EMPTY ||
upm == UVM_PHYSSEG_TYPE_INVALID_OVERFLOW)
return false;
/*
* This is the delicate init dance -
* needs to go with the dance.
*/
if (uvm.page_init_done != true)
return true;
ps = HANDLE_TO_PHYSSEG_NODE(upm);
/* Extra checks needed only post uvm_page_init() */
if (ps->pgs == NULL)
return false;
/* XXX: etc. */
return true;
}
/*
* Boot protocol dictates that these must be able to return partially
* initialised segments.
*/
paddr_t
uvm_physseg_get_start(uvm_physseg_t upm)
{
if (uvm_physseg_valid_p(upm) == false)
return (paddr_t) -1;
return HANDLE_TO_PHYSSEG_NODE(upm)->start;
}
paddr_t
uvm_physseg_get_end(uvm_physseg_t upm)
{
if (uvm_physseg_valid_p(upm) == false)
return (paddr_t) -1;
return HANDLE_TO_PHYSSEG_NODE(upm)->end;
}
paddr_t
uvm_physseg_get_avail_start(uvm_physseg_t upm)
{
if (uvm_physseg_valid_p(upm) == false)
return (paddr_t) -1;
return HANDLE_TO_PHYSSEG_NODE(upm)->avail_start;
}
#if defined(UVM_PHYSSEG_LEGACY)
void
uvm_physseg_set_avail_start(uvm_physseg_t upm, paddr_t avail_start)
{
struct uvm_physseg *ps = HANDLE_TO_PHYSSEG_NODE(upm);
#if defined(DIAGNOSTIC)
paddr_t avail_end;
avail_end = uvm_physseg_get_avail_end(upm);
KASSERT(uvm_physseg_valid_p(upm));
KASSERT(avail_start < avail_end && avail_start >= ps->start);
#endif
ps->avail_start = avail_start;
}
void uvm_physseg_set_avail_end(uvm_physseg_t upm, paddr_t avail_end)
{
struct uvm_physseg *ps = HANDLE_TO_PHYSSEG_NODE(upm);
#if defined(DIAGNOSTIC)
paddr_t avail_start;
avail_start = uvm_physseg_get_avail_start(upm);
KASSERT(uvm_physseg_valid_p(upm));
KASSERT(avail_end > avail_start && avail_end <= ps->end);
#endif
ps->avail_end = avail_end;
}
#endif /* UVM_PHYSSEG_LEGACY */
paddr_t
uvm_physseg_get_avail_end(uvm_physseg_t upm)
{
if (uvm_physseg_valid_p(upm) == false)
return (paddr_t) -1;
return HANDLE_TO_PHYSSEG_NODE(upm)->avail_end;
}
struct vm_page *
uvm_physseg_get_pg(uvm_physseg_t upm, paddr_t idx)
{
KASSERT(uvm_physseg_valid_p(upm));
return &HANDLE_TO_PHYSSEG_NODE(upm)->pgs[idx];
}
#ifdef __HAVE_PMAP_PHYSSEG
struct pmap_physseg *
uvm_physseg_get_pmseg(uvm_physseg_t upm)
{
KASSERT(uvm_physseg_valid_p(upm));
return &(HANDLE_TO_PHYSSEG_NODE(upm)->pmseg);
}
#endif
int
uvm_physseg_get_free_list(uvm_physseg_t upm)
{
KASSERT(uvm_physseg_valid_p(upm));
return HANDLE_TO_PHYSSEG_NODE(upm)->free_list;
}
u_int
uvm_physseg_get_start_hint(uvm_physseg_t upm)
{
KASSERT(uvm_physseg_valid_p(upm));
return HANDLE_TO_PHYSSEG_NODE(upm)->start_hint;
}
bool
uvm_physseg_set_start_hint(uvm_physseg_t upm, u_int start_hint)
{
if (uvm_physseg_valid_p(upm) == false)
return false;
HANDLE_TO_PHYSSEG_NODE(upm)->start_hint = start_hint;
return true;
}
void
uvm_physseg_init_seg(uvm_physseg_t upm, struct vm_page *pgs)
{
psize_t i;
psize_t n;
paddr_t paddr;
struct uvm_physseg *seg;
struct vm_page *pg;
KASSERT(upm != UVM_PHYSSEG_TYPE_INVALID && pgs != NULL);
seg = HANDLE_TO_PHYSSEG_NODE(upm);
KASSERT(seg != NULL);
KASSERT(seg->pgs == NULL);
n = seg->end - seg->start;
seg->pgs = pgs;
/* init and free vm_pages (we've already zeroed them) */
paddr = ctob(seg->start);
for (i = 0 ; i < n ; i++, paddr += PAGE_SIZE) {
seg->pgs[i].phys_addr = paddr;
#ifdef __HAVE_VM_PAGE_MD
VM_MDPAGE_INIT(&seg->pgs[i]);
#endif
if (atop(paddr) >= seg->avail_start &&
atop(paddr) < seg->avail_end) {
uvmexp.npages++;
/* add page to free pool */
pg = &seg->pgs[i];
/* Disable LOCKDEBUG: too many and too early. */
mutex_init(&pg->interlock, MUTEX_NODEBUG, IPL_NONE);
uvm_pagefree(pg);
}
}
}
void
uvm_physseg_seg_chomp_slab(uvm_physseg_t upm, struct vm_page *pgs, size_t n)
{
struct uvm_physseg *seg = HANDLE_TO_PHYSSEG_NODE(upm);
/* max number of pre-boot unplug()s allowed */
#define UVM_PHYSSEG_BOOT_UNPLUG_MAX VM_PHYSSEG_MAX
static char btslab_ex_storage[EXTENT_FIXED_STORAGE_SIZE(UVM_PHYSSEG_BOOT_UNPLUG_MAX)];
if (__predict_false(uvm.page_init_done == false)) {
seg->ext = extent_create("Boot time slab", (u_long) pgs, (u_long) (pgs + n),
(void *)btslab_ex_storage, sizeof(btslab_ex_storage), 0);
} else {
seg->ext = extent_create("Hotplug slab", (u_long) pgs, (u_long) (pgs + n), NULL, 0, 0);
}
KASSERT(seg->ext != NULL);
}
struct vm_page *
uvm_physseg_seg_alloc_from_slab(uvm_physseg_t upm, size_t pages)
{
int err;
struct uvm_physseg *seg;
struct vm_page *pgs = NULL;
KASSERT(pages > 0);
seg = HANDLE_TO_PHYSSEG_NODE(upm);
if (__predict_false(seg->ext == NULL)) {
/*
* This is a situation unique to boot time.
* It shouldn't happen at any point other than from
* the first uvm_page.c:uvm_page_init() call
* Since we're in a loop, we can get away with the
* below.
*/
KASSERT(uvm.page_init_done != true);
uvm_physseg_t upmp = uvm_physseg_get_prev(upm);
KASSERT(upmp != UVM_PHYSSEG_TYPE_INVALID);
seg->ext = HANDLE_TO_PHYSSEG_NODE(upmp)->ext;
KASSERT(seg->ext != NULL);
}
/* We allocate enough for this segment */
err = extent_alloc(seg->ext, sizeof(*pgs) * pages, 1, 0, EX_BOUNDZERO, (u_long *)&pgs);
if (err != 0) {
#ifdef DEBUG
printf("%s: extent_alloc failed with error: %d \n",
__func__, err);
#endif
}
return pgs;
}
/*
* uvm_page_physload: load physical memory into VM system
*
* => all args are PFs
* => all pages in start/end get vm_page structures
* => areas marked by avail_start/avail_end get added to the free page pool
* => we are limited to VM_PHYSSEG_MAX physical memory segments
*/
uvm_physseg_t
uvm_page_physload(paddr_t start, paddr_t end, paddr_t avail_start,
paddr_t avail_end, int free_list)
{
struct uvm_physseg *ps;
uvm_physseg_t upm;
if (__predict_true(uvm.page_init_done == true))
panic("%s: unload attempted after uvm_page_init()\n", __func__);
if (uvmexp.pagesize == 0)
panic("uvm_page_physload: page size not set!");
if (free_list >= VM_NFREELIST || free_list < VM_FREELIST_DEFAULT)
panic("uvm_page_physload: bad free list %d", free_list);
if (start >= end)
panic("uvm_page_physload: start >= end");
if (uvm_physseg_plug(start, end - start, &upm) == false) {
panic("uvm_physseg_plug() failed at boot.");
/* NOTREACHED */
return UVM_PHYSSEG_TYPE_INVALID; /* XXX: correct type */
}
ps = HANDLE_TO_PHYSSEG_NODE(upm);
/* Legacy */
ps->avail_start = avail_start;
ps->avail_end = avail_end;
ps->free_list = free_list; /* XXX: */
return upm;
}
bool
uvm_physseg_unplug(paddr_t pfn, size_t pages)
{
uvm_physseg_t upm;
paddr_t off = 0, start __diagused, end;
struct uvm_physseg *seg;
upm = uvm_physseg_find(pfn, &off);
if (!uvm_physseg_valid_p(upm)) {
printf("%s: Tried to unplug from unknown offset\n", __func__);
return false;
}
seg = HANDLE_TO_PHYSSEG_NODE(upm);
start = uvm_physseg_get_start(upm);
end = uvm_physseg_get_end(upm);
if (end < (pfn + pages)) {
printf("%s: Tried to unplug oversized span \n", __func__);
return false;
}
KASSERT(pfn == start + off); /* sanity */
if (__predict_true(uvm.page_init_done == true)) {
/* XXX: KASSERT() that seg->pgs[] are not on any uvm lists */
if (extent_free(seg->ext, (u_long)(seg->pgs + off), sizeof(struct vm_page) * pages, EX_MALLOCOK | EX_NOWAIT) != 0)
return false;
}
if (off == 0 && (pfn + pages) == end) {
#if defined(UVM_HOTPLUG) /* rbtree implementation */
int segcount = 0;
struct uvm_physseg *current_ps;
/* Complete segment */
if (uvm_physseg_graph.nentries == 1)
panic("%s: out of memory!", __func__);
if (__predict_true(uvm.page_init_done == true)) {
RB_TREE_FOREACH(current_ps, &(uvm_physseg_graph.rb_tree)) {
if (seg->ext == current_ps->ext)
segcount++;
}
KASSERT(segcount > 0);
if (segcount == 1) {
extent_destroy(seg->ext);
}
/*
* We assume that the unplug will succeed from
* this point onwards
*/
uvmexp.npages -= (int) pages;
}
rb_tree_remove_node(&(uvm_physseg_graph.rb_tree), upm);
memset(seg, 0, sizeof(struct uvm_physseg));
uvm_physseg_free(seg, sizeof(struct uvm_physseg));
uvm_physseg_graph.nentries--;
#else /* UVM_HOTPLUG */
int x;
if (vm_nphysmem == 1)
panic("uvm_page_physget: out of memory!");
vm_nphysmem--;
for (x = upm ; x < vm_nphysmem ; x++)
/* structure copy */
VM_PHYSMEM_PTR_SWAP(x, x + 1);
#endif /* UVM_HOTPLUG */
/* XXX: KASSERT() that seg->pgs[] are not on any uvm lists */
return true;
}
if (off > 0 &&
(pfn + pages) < end) {
#if defined(UVM_HOTPLUG) /* rbtree implementation */
/* middle chunk - need a new segment */
struct uvm_physseg *ps, *current_ps;
ps = uvm_physseg_alloc(sizeof (struct uvm_physseg));
if (ps == NULL) {
printf("%s: Unable to allocated new fragment vm_physseg \n",
__func__);
return false;
}
/* Remove middle chunk */
if (__predict_true(uvm.page_init_done == true)) {
KASSERT(seg->ext != NULL);
ps->ext = seg->ext;
/* XXX: KASSERT() that seg->pgs[] are not on any uvm lists */
/*
* We assume that the unplug will succeed from
* this point onwards
*/
uvmexp.npages -= (int) pages;
}
ps->start = pfn + pages;
ps->avail_start = ps->start; /* XXX: Legacy */
ps->end = seg->end;
ps->avail_end = ps->end; /* XXX: Legacy */
seg->end = pfn;
seg->avail_end = seg->end; /* XXX: Legacy */
/*
* The new pgs array points to the beginning of the
* tail fragment.
*/
if (__predict_true(uvm.page_init_done == true))
ps->pgs = seg->pgs + off + pages;
current_ps = rb_tree_insert_node(&(uvm_physseg_graph.rb_tree), ps);
if (current_ps != ps) {
panic("uvm_page_physload: Duplicate address range detected!");
}
uvm_physseg_graph.nentries++;
#else /* UVM_HOTPLUG */
panic("%s: can't unplug() from the middle of a segment without"
" UVM_HOTPLUG\n", __func__);
/* NOTREACHED */
#endif /* UVM_HOTPLUG */
return true;
}
if (off == 0 && (pfn + pages) < end) {
/* Remove front chunk */
if (__predict_true(uvm.page_init_done == true)) {
/* XXX: KASSERT() that seg->pgs[] are not on any uvm lists */
/*
* We assume that the unplug will succeed from
* this point onwards
*/
uvmexp.npages -= (int) pages;
}
/* Truncate */
seg->start = pfn + pages;
seg->avail_start = seg->start; /* XXX: Legacy */
/*
* Move the pgs array start to the beginning of the
* tail end.
*/
if (__predict_true(uvm.page_init_done == true))
seg->pgs += pages;
return true;
}
if (off > 0 && (pfn + pages) == end) {
/* back chunk */
/* Truncate! */
seg->end = pfn;
seg->avail_end = seg->end; /* XXX: Legacy */
uvmexp.npages -= (int) pages;
return true;
}
printf("%s: Tried to unplug unknown range \n", __func__);
return false;
}