NetBSD/sys/uvm/uvm_amap.c

/*	$NetBSD: uvm_amap.c,v 1.128 2023/06/19 08:23:35 msaitoh Exp $	*/

/*
 * Copyright (c) 1997 Charles D. Cranor and Washington University.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * uvm_amap.c: amap operations
 */

/*
 * this file contains functions that perform operations on amaps.  see
 * uvm_amap.h for a brief explanation of the role of amaps in uvm.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: uvm_amap.c,v 1.128 2023/06/19 08:23:35 msaitoh Exp $");

#include "opt_uvmhist.h"

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/kmem.h>
#include <sys/pool.h>
#include <sys/atomic.h>

#include <uvm/uvm.h>
#include <uvm/uvm_swap.h>

/*
 * cache for allocation of vm_map structures.  note that in order to
 * avoid an endless loop, the amap cache's allocator cannot allocate
 * memory from an amap (it currently goes through the kernel uobj, so
 * we are ok).
 */
static struct pool_cache uvm_amap_cache;
static kmutex_t amap_list_lock __cacheline_aligned;
static LIST_HEAD(, vm_amap) amap_list;

/*
 * local functions
 */

static int
amap_roundup_slots(int slots)
{

	return kmem_roundup_size(slots * sizeof(int)) / sizeof(int);
}

#ifdef UVM_AMAP_PPREF
/*
 * what is ppref?   ppref is an _optional_ amap feature which is used
 * to keep track of reference counts on a per-page basis.  it is enabled
 * when UVM_AMAP_PPREF is defined.
 *
 * when enabled, an array of ints is allocated for the pprefs.  this
 * array is allocated only when a partial reference is added to the
 * map (either by unmapping part of the amap, or gaining a reference
 * to only a part of an amap).  if the allocation of the array fails
 * (KM_NOSLEEP), then we set the array pointer to PPREF_NONE to indicate
 * that we tried to do ppref's but couldn't alloc the array so just
 * give up (after all, this is an optional feature!).
 *
 * the array is divided into page sized "chunks."   for chunks of length 1,
 * the chunk reference count plus one is stored in that chunk's slot.
 * for chunks of length > 1 the first slot contains (the reference count
 * plus one) * -1.    [the negative value indicates that the length is
 * greater than one.]   the second slot of the chunk contains the length
 * of the chunk.   here is an example:
 *
 * actual REFS:  2  2  2  2  3  1  1  0  0  0  4  4  0  1  1  1
 *       ppref: -3  4  x  x  4 -2  2 -1  3  x -5  2  1 -2  3  x
 *              <----------><-><----><-------><----><-><------->
 * (x = don't care)
 *
 * this allows us to allow one int to contain the ref count for the whole
 * chunk.    note that the "plus one" part is needed because a reference
 * count of zero is neither positive or negative (need a way to tell
 * if we've got one zero or a bunch of them).
 *
 * here are some in-line functions to help us.
 */

/*
 * pp_getreflen: get the reference and length for a specific offset
 *
 * => ppref's amap must be locked
 */
static inline void
pp_getreflen(int *ppref, int offset, int *refp, int *lenp)
{

	if (ppref[offset] > 0) {		/* chunk size must be 1 */
		*refp = ppref[offset] - 1;	/* don't forget to adjust */
		*lenp = 1;
	} else {
		*refp = (ppref[offset] * -1) - 1;
		*lenp = ppref[offset+1];
	}
}

/*
 * pp_setreflen: set the reference and length for a specific offset
 *
 * => ppref's amap must be locked
 */
static inline void
pp_setreflen(int *ppref, int offset, int ref, int len)
{
	if (len == 0)
		return;
	if (len == 1) {
		ppref[offset] = ref + 1;
	} else {
		ppref[offset] = (ref + 1) * -1;
		ppref[offset+1] = len;
	}
}
#endif /* UVM_AMAP_PPREF */

/*
 * amap_alloc1: allocate an amap, but do not initialise the overlay.
 *
 * => Note: lock is not set.
 */
static struct vm_amap *
amap_alloc1(int slots, int padslots, int flags)
{
	const bool nowait = (flags & UVM_FLAG_NOWAIT) != 0;
	const km_flag_t kmflags = nowait ? KM_NOSLEEP : KM_SLEEP;
	struct vm_amap *amap;
	krwlock_t *newlock, *oldlock;
	int totalslots;

	amap = pool_cache_get(&uvm_amap_cache, nowait ? PR_NOWAIT : PR_WAITOK);
	if (amap == NULL) {
		return NULL;
	}
	KASSERT(amap->am_lock != NULL);
	KASSERT(amap->am_nused == 0);

	/* Try to privatize the lock if currently shared. */
	if (rw_obj_refcnt(amap->am_lock) > 1) {
		newlock = rw_obj_tryalloc();
		if (newlock != NULL) {
		    	oldlock = amap->am_lock;
		    	mutex_enter(&amap_list_lock);
		    	amap->am_lock = newlock;
		    	mutex_exit(&amap_list_lock);
		    	rw_obj_free(oldlock);
		}
	}

	totalslots = amap_roundup_slots(slots + padslots);
	amap->am_ref = 1;
	amap->am_flags = 0;
#ifdef UVM_AMAP_PPREF
	amap->am_ppref = NULL;
#endif
	amap->am_maxslot = totalslots;
	amap->am_nslot = slots;

	/*
	 * Note: since allocations are likely big, we expect to reduce the
	 * memory fragmentation by allocating them in separate blocks.
	 */
	amap->am_slots = kmem_alloc(totalslots * sizeof(int), kmflags);
	if (amap->am_slots == NULL)
		goto fail1;

	amap->am_bckptr = kmem_alloc(totalslots * sizeof(int), kmflags);
	if (amap->am_bckptr == NULL)
		goto fail2;

	amap->am_anon = kmem_alloc(totalslots * sizeof(struct vm_anon *),
	    kmflags);
	if (amap->am_anon == NULL)
		goto fail3;

	return amap;

fail3:
	kmem_free(amap->am_bckptr, totalslots * sizeof(int));
fail2:
	kmem_free(amap->am_slots, totalslots * sizeof(int));
fail1:
	pool_cache_put(&uvm_amap_cache, amap);

	/*
	 * XXX hack to tell the pagedaemon how many pages we need,
	 * since we can need more than it would normally free.
	 */
	if (nowait) {
		extern u_int uvm_extrapages;
		atomic_add_int(&uvm_extrapages,
		    ((sizeof(int) * 2 + sizeof(struct vm_anon *)) *
		    totalslots) >> PAGE_SHIFT);
	}
	return NULL;
}

/*
 * amap_alloc: allocate an amap to manage "sz" bytes of anonymous VM
 *
 * => caller should ensure sz is a multiple of PAGE_SIZE
 * => reference count to new amap is set to one
 * => new amap is returned unlocked
 */

struct vm_amap *
amap_alloc(vaddr_t sz, vaddr_t padsz, int waitf)
{
	struct vm_amap *amap;
	int slots, padslots;
	UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);

	AMAP_B2SLOT(slots, sz);
	AMAP_B2SLOT(padslots, padsz);

	amap = amap_alloc1(slots, padslots, waitf);
	if (amap) {
		memset(amap->am_anon, 0,
		    amap->am_maxslot * sizeof(struct vm_anon *));
	}

	UVMHIST_LOG(maphist,"<- done, amap = %#jx, sz=%jd", (uintptr_t)amap,
	    sz, 0, 0);
	return(amap);
}

/*
 * amap_ctor: pool_cache constructor for new amaps
 *
 * => carefully synchronize with amap_swap_off()
 */
static int
amap_ctor(void *arg, void *obj, int flags)
{
	struct vm_amap *amap = obj;

	if ((flags & PR_NOWAIT) != 0) {
		amap->am_lock = rw_obj_tryalloc();
		if (amap->am_lock == NULL) {
			return ENOMEM;
		}
	} else {
		amap->am_lock = rw_obj_alloc();
	}
	amap->am_nused = 0;
	amap->am_flags = 0;

	mutex_enter(&amap_list_lock);
	LIST_INSERT_HEAD(&amap_list, amap, am_list);
	mutex_exit(&amap_list_lock);
	return 0;
}

/*
 * amap_ctor: pool_cache destructor for amaps
 *
 * => carefully synchronize with amap_swap_off()
 */
static void
amap_dtor(void *arg, void *obj)
{
	struct vm_amap *amap = obj;

	KASSERT(amap->am_nused == 0);

	mutex_enter(&amap_list_lock);
	LIST_REMOVE(amap, am_list);
	mutex_exit(&amap_list_lock);
	rw_obj_free(amap->am_lock);
}

/*
 * uvm_amap_init: initialize the amap system.
 */
void
uvm_amap_init(void)
{

	mutex_init(&amap_list_lock, MUTEX_DEFAULT, IPL_NONE);

	pool_cache_bootstrap(&uvm_amap_cache, sizeof(struct vm_amap), 0, 0,
	    PR_LARGECACHE, "amappl", NULL, IPL_NONE, amap_ctor, amap_dtor,
	    NULL);
}

/*
 * amap_free: free an amap
 *
 * => the amap must be unlocked
 * => the amap should have a zero reference count and be empty
 */
void
amap_free(struct vm_amap *amap)
{
	int slots;

	UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);

	KASSERT(amap->am_ref == 0);
	KASSERT(amap->am_nused == 0);
	KASSERT((amap->am_flags & AMAP_SWAPOFF) == 0);
	slots = amap->am_maxslot;
	kmem_free(amap->am_slots, slots * sizeof(*amap->am_slots));
	kmem_free(amap->am_bckptr, slots * sizeof(*amap->am_bckptr));
	kmem_free(amap->am_anon, slots * sizeof(*amap->am_anon));
#ifdef UVM_AMAP_PPREF
	if (amap->am_ppref && amap->am_ppref != PPREF_NONE)
		kmem_free(amap->am_ppref, slots * sizeof(*amap->am_ppref));
#endif
	pool_cache_put(&uvm_amap_cache, amap);
	UVMHIST_LOG(maphist,"<- done, freed amap = %#jx", (uintptr_t)amap,
	    0, 0, 0);
}

/*
 * amap_extend: extend the size of an amap (if needed)
 *
 * => called from uvm_map when we want to extend an amap to cover
 *    a new mapping (rather than allocate a new one)
 * => amap should be unlocked (we will lock it)
 * => to safely extend an amap it should have a reference count of
 *    one (thus it can't be shared)
 */
int
amap_extend(struct vm_map_entry *entry, vsize_t addsize, int flags)
{
	struct vm_amap *amap = entry->aref.ar_amap;
	int slotoff = entry->aref.ar_pageoff;
	int slotmapped, slotadd, slotneed, slotadded, slotalloc;
	int slotadj, slotarea, slotendoff;
	int oldnslots;
#ifdef UVM_AMAP_PPREF
	int *newppref, *oldppref;
#endif
	int i, *newsl, *newbck, *oldsl, *oldbck;
	struct vm_anon **newover, **oldover;
	const km_flag_t kmflags =
	    (flags & AMAP_EXTEND_NOWAIT) ? KM_NOSLEEP : KM_SLEEP;

	UVMHIST_FUNC(__func__);
	UVMHIST_CALLARGS(maphist, "  (entry=%#jx, addsize=%#jx, flags=%#jx)",
	    (uintptr_t)entry, addsize, flags, 0);

	/*
	 * first, determine how many slots we need in the amap.  don't
	 * forget that ar_pageoff could be non-zero: this means that
	 * there are some unused slots before us in the amap.
	 */

	amap_lock(amap, RW_WRITER);
	KASSERT(amap_refs(amap) == 1); /* amap can't be shared */
	AMAP_B2SLOT(slotmapped, entry->end - entry->start); /* slots mapped */
	AMAP_B2SLOT(slotadd, addsize);			/* slots to add */
	if (flags & AMAP_EXTEND_FORWARDS) {
		slotneed = slotoff + slotmapped + slotadd;
		slotadj = 0;
		slotarea = 0;
	} else {
		slotneed = slotadd + slotmapped;
		slotadj = slotadd - slotoff;
		slotarea = amap->am_maxslot - slotmapped;
	}

	/*
	 * Because this amap only has 1 ref, we know that there is
	 * only one vm_map_entry pointing to it, and the one entry is
	 * using slots between slotoff and slotoff + slotmapped.  If
	 * we have been using ppref then we know that only slots in
	 * the one map entry's range can have anons, since ppref
	 * allowed us to free any anons outside that range as other map
	 * entries which used this amap were removed. But without ppref,
	 * we couldn't know which slots were still needed by other map
	 * entries, so we couldn't free any anons as we removed map
	 * entries, and so any slot from 0 to am_nslot can have an
	 * anon.  But now that we know there is only one map entry
	 * left and we know its range, we can free up any anons
	 * outside that range.  This is necessary because the rest of
	 * this function assumes that there are no anons in the amap
	 * outside of the one map entry's range.
	 */

	slotendoff = slotoff + slotmapped;
	if (amap->am_ppref == PPREF_NONE) {
		amap_wiperange(amap, 0, slotoff);
		amap_wiperange(amap, slotendoff, amap->am_nslot - slotendoff);
	}
	for (i = 0; i < slotoff; i++) {
		KASSERT(amap->am_anon[i] == NULL);
	}
	for (i = slotendoff; i < amap->am_nslot - slotendoff; i++) {
		KASSERT(amap->am_anon[i] == NULL);
	}

	/*
	 * case 1: we already have enough slots in the map and thus
	 * only need to bump the reference counts on the slots we are
	 * adding.
	 */

	if (flags & AMAP_EXTEND_FORWARDS) {
		if (amap->am_nslot >= slotneed) {
#ifdef UVM_AMAP_PPREF
			if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
				amap_pp_adjref(amap, slotoff + slotmapped,
				    slotadd, 1);
			}
#endif
			amap_unlock(amap);
			UVMHIST_LOG(maphist,
			    "<- done (case 1f), amap = %#jx, sltneed=%jd",
			    (uintptr_t)amap, slotneed, 0, 0);
			return 0;
		}
	} else {
		if (slotadj <= 0) {
			slotoff -= slotadd;
			entry->aref.ar_pageoff = slotoff;
#ifdef UVM_AMAP_PPREF
			if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
				amap_pp_adjref(amap, slotoff, slotadd, 1);
			}
#endif
			amap_unlock(amap);
			UVMHIST_LOG(maphist,
			    "<- done (case 1b), amap = %#jx, sltneed=%jd",
			    (uintptr_t)amap, slotneed, 0, 0);
			return 0;
		}
	}

	/*
	 * case 2: we pre-allocated slots for use and we just need to
	 * bump nslot up to take account for these slots.
	 */

	if (amap->am_maxslot >= slotneed) {
		if (flags & AMAP_EXTEND_FORWARDS) {
#ifdef UVM_AMAP_PPREF
			if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
				if ((slotoff + slotmapped) < amap->am_nslot)
					amap_pp_adjref(amap,
					    slotoff + slotmapped,
					    (amap->am_nslot -
					    (slotoff + slotmapped)), 1);
				pp_setreflen(amap->am_ppref, amap->am_nslot, 1,
				    slotneed - amap->am_nslot);
			}
#endif
			amap->am_nslot = slotneed;
			amap_unlock(amap);

			/*
			 * no need to zero am_anon since that was done at
			 * alloc time and we never shrink an allocation.
			 */

			UVMHIST_LOG(maphist,"<- done (case 2f), amap = %#jx, "
			    "slotneed=%jd", (uintptr_t)amap, slotneed, 0, 0);
			return 0;
		} else {
#ifdef UVM_AMAP_PPREF
			if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
				/*
				 * Slide up the ref counts on the pages that
				 * are actually in use.
				 */
				memmove(amap->am_ppref + slotarea,
				    amap->am_ppref + slotoff,
				    slotmapped * sizeof(int));
				/*
				 * Mark the (adjusted) gap at the front as
				 * referenced/not referenced.
				 */
				pp_setreflen(amap->am_ppref,
				    0, 0, slotarea - slotadd);
				pp_setreflen(amap->am_ppref,
				    slotarea - slotadd, 1, slotadd);
			}
#endif

			/*
			 * Slide the anon pointers up and clear out
			 * the space we just made.
			 */
			memmove(amap->am_anon + slotarea,
			    amap->am_anon + slotoff,
			    slotmapped * sizeof(struct vm_anon*));
			memset(amap->am_anon + slotoff, 0,
			    (slotarea - slotoff) * sizeof(struct vm_anon *));

			/*
			 * Slide the backpointers up, but don't bother
			 * wiping out the old slots.
			 */
			memmove(amap->am_bckptr + slotarea,
			    amap->am_bckptr + slotoff,
			    slotmapped * sizeof(int));

			/*
			 * Adjust all the useful active slot numbers.
			 */
			for (i = 0; i < amap->am_nused; i++)
				amap->am_slots[i] += (slotarea - slotoff);

			/*
			 * We just filled all the empty space in the
			 * front of the amap by activating a few new
			 * slots.
			 */
			amap->am_nslot = amap->am_maxslot;
			entry->aref.ar_pageoff = slotarea - slotadd;
			amap_unlock(amap);

			UVMHIST_LOG(maphist,"<- done (case 2b), amap = %#jx, "
			    "slotneed=%jd", (uintptr_t)amap, slotneed, 0, 0);
			return 0;
		}
	}

	/*
	 * Case 3: we need to allocate a new amap and copy all the amap
	 * data over from old amap to the new one.  Drop the lock before
	 * performing allocation.
	 *
	 * Note: since allocations are likely big, we expect to reduce the
	 * memory fragmentation by allocating them in separate blocks.
	 */

	amap_unlock(amap);

	if (slotneed >= UVM_AMAP_LARGE) {
		return E2BIG;
	}

	slotalloc = amap_roundup_slots(slotneed);
#ifdef UVM_AMAP_PPREF
	newppref = NULL;
	if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
		/* Will be handled later if fails. */
		newppref = kmem_alloc(slotalloc * sizeof(*newppref), kmflags);
	}
#endif
	newsl = kmem_alloc(slotalloc * sizeof(*newsl), kmflags);
	newbck = kmem_alloc(slotalloc * sizeof(*newbck), kmflags);
	newover = kmem_alloc(slotalloc * sizeof(*newover), kmflags);
	if (newsl == NULL || newbck == NULL || newover == NULL) {
#ifdef UVM_AMAP_PPREF
		if (newppref != NULL) {
			kmem_free(newppref, slotalloc * sizeof(*newppref));
		}
#endif
		if (newsl != NULL) {
			kmem_free(newsl, slotalloc * sizeof(*newsl));
		}
		if (newbck != NULL) {
			kmem_free(newbck, slotalloc * sizeof(*newbck));
		}
		if (newover != NULL) {
			kmem_free(newover, slotalloc * sizeof(*newover));
		}
		return ENOMEM;
	}
	amap_lock(amap, RW_WRITER);
	KASSERT(amap->am_maxslot < slotneed);

	/*
	 * Copy everything over to new allocated areas.
	 */

	slotadded = slotalloc - amap->am_nslot;
	if (!(flags & AMAP_EXTEND_FORWARDS))
		slotarea = slotalloc - slotmapped;

	/* do am_slots */
	oldsl = amap->am_slots;
	if (flags & AMAP_EXTEND_FORWARDS)
		memcpy(newsl, oldsl, sizeof(int) * amap->am_nused);
	else
		for (i = 0; i < amap->am_nused; i++)
			newsl[i] = oldsl[i] + slotarea - slotoff;
	amap->am_slots = newsl;

	/* do am_anon */
	oldover = amap->am_anon;
	if (flags & AMAP_EXTEND_FORWARDS) {
		memcpy(newover, oldover,
		    sizeof(struct vm_anon *) * amap->am_nslot);
		memset(newover + amap->am_nslot, 0,
		    sizeof(struct vm_anon *) * slotadded);
	} else {
		memcpy(newover + slotarea, oldover + slotoff,
		    sizeof(struct vm_anon *) * slotmapped);
		memset(newover, 0,
		    sizeof(struct vm_anon *) * slotarea);
	}
	amap->am_anon = newover;

	/* do am_bckptr */
	oldbck = amap->am_bckptr;
	if (flags & AMAP_EXTEND_FORWARDS)
		memcpy(newbck, oldbck, sizeof(int) * amap->am_nslot);
	else
		memcpy(newbck + slotarea, oldbck + slotoff,
		    sizeof(int) * slotmapped);
	amap->am_bckptr = newbck;

#ifdef UVM_AMAP_PPREF
	/* do ppref */
	oldppref = amap->am_ppref;
	if (newppref) {
		if (flags & AMAP_EXTEND_FORWARDS) {
			memcpy(newppref, oldppref,
			    sizeof(int) * amap->am_nslot);
			memset(newppref + amap->am_nslot, 0,
			    sizeof(int) * slotadded);
		} else {
			memcpy(newppref + slotarea, oldppref + slotoff,
			    sizeof(int) * slotmapped);
		}
		amap->am_ppref = newppref;
		if ((flags & AMAP_EXTEND_FORWARDS) &&
		    (slotoff + slotmapped) < amap->am_nslot)
			amap_pp_adjref(amap, slotoff + slotmapped,
			    (amap->am_nslot - (slotoff + slotmapped)), 1);
		if (flags & AMAP_EXTEND_FORWARDS)
			pp_setreflen(newppref, amap->am_nslot, 1,
			    slotneed - amap->am_nslot);
		else {
			pp_setreflen(newppref, 0, 0,
			    slotalloc - slotneed);
			pp_setreflen(newppref, slotalloc - slotneed, 1,
			    slotneed - slotmapped);
		}
	} else {
		if (amap->am_ppref)
			amap->am_ppref = PPREF_NONE;
	}
#endif

	/* update master values */
	if (flags & AMAP_EXTEND_FORWARDS)
		amap->am_nslot = slotneed;
	else {
		entry->aref.ar_pageoff = slotarea - slotadd;
		amap->am_nslot = slotalloc;
	}
	oldnslots = amap->am_maxslot;
	amap->am_maxslot = slotalloc;
	amap_unlock(amap);

	kmem_free(oldsl, oldnslots * sizeof(*oldsl));
	kmem_free(oldbck, oldnslots * sizeof(*oldbck));
	kmem_free(oldover, oldnslots * sizeof(*oldover));
#ifdef UVM_AMAP_PPREF
	if (oldppref && oldppref != PPREF_NONE)
		kmem_free(oldppref, oldnslots * sizeof(*oldppref));
#endif
	UVMHIST_LOG(maphist,"<- done (case 3), amap = %#jx, slotneed=%jd",
	    (uintptr_t)amap, slotneed, 0, 0);
	return 0;
}

/*
 * amap_share_protect: change protection of anons in a shared amap
 *
 * for shared amaps, given the current data structure layout, it is
 * not possible for us to directly locate all maps referencing the
 * shared anon (to change the protection).  in order to protect data
 * in shared maps we use pmap_page_protect().  [this is useful for IPC
 * mechanisms like map entry passing that may want to write-protect
 * all mappings of a shared amap.]  we traverse am_anon or am_slots
 * depending on the current state of the amap.
 *
 * => entry's map and amap must be locked by the caller
 */
void
amap_share_protect(struct vm_map_entry *entry, vm_prot_t prot)
{
	struct vm_amap *amap = entry->aref.ar_amap;
	u_int slots, lcv, slot, stop;
	struct vm_anon *anon;

	KASSERT(rw_write_held(amap->am_lock));

	AMAP_B2SLOT(slots, (entry->end - entry->start));
	stop = entry->aref.ar_pageoff + slots;

	if (slots < amap->am_nused) {
		/*
		 * Cheaper to traverse am_anon.
		 */
		for (lcv = entry->aref.ar_pageoff ; lcv < stop ; lcv++) {
			anon = amap->am_anon[lcv];
			if (anon == NULL) {
				continue;
			}
			if (anon->an_page) {
				pmap_page_protect(anon->an_page, prot);
			}
		}
		return;
	}

	/*
	 * Cheaper to traverse am_slots.
	 */
	for (lcv = 0 ; lcv < amap->am_nused ; lcv++) {
		slot = amap->am_slots[lcv];
		if (slot < entry->aref.ar_pageoff || slot >= stop) {
			continue;
		}
		anon = amap->am_anon[slot];
		if (anon->an_page) {
			pmap_page_protect(anon->an_page, prot);
		}
	}
}

/*
 * amap_wipeout: wipeout all anon's in an amap; then free the amap!
 *
 * => Called from amap_unref(), when reference count drops to zero.
 * => amap must be locked.
 */

void
amap_wipeout(struct vm_amap *amap)
{
	u_int lcv;

	UVMHIST_FUNC(__func__);
	UVMHIST_CALLARGS(maphist,"(amap=%#jx)", (uintptr_t)amap, 0,0,0);

	KASSERT(rw_write_held(amap->am_lock));
	KASSERT(amap->am_ref == 0);

	if (__predict_false(amap->am_flags & AMAP_SWAPOFF)) {
		/*
		 * Note: amap_swap_off() will call us again.
		 */
		amap_unlock(amap);
		return;
	}

	for (lcv = 0 ; lcv < amap->am_nused ; lcv++) {
		struct vm_anon *anon;
		u_int slot;

		slot = amap->am_slots[lcv];
		anon = amap->am_anon[slot];
		KASSERT(anon != NULL);
		KASSERT(anon->an_ref != 0);

		KASSERT(anon->an_lock == amap->am_lock);
		UVMHIST_LOG(maphist,"  processing anon %#jx, ref=%jd",
		    (uintptr_t)anon, anon->an_ref, 0, 0);

		/*
		 * Drop the reference.
		 */

		if (__predict_true(--anon->an_ref == 0)) {
			uvm_anfree(anon);
		}
		if (__predict_false((lcv & 31) == 31)) {
			preempt_point();
		}
	}

	/*
	 * Finally, destroy the amap.
	 */

	amap->am_nused = 0;
	amap_unlock(amap);
	amap_free(amap);
	UVMHIST_LOG(maphist,"<- done!", 0,0,0,0);
}

/*
 * amap_copy: ensure that a map entry's "needs_copy" flag is false
 *	by copying the amap if necessary.
 *
 * => an entry with a null amap pointer will get a new (blank) one.
 * => the map that the map entry belongs to must be locked by caller.
 * => the amap currently attached to "entry" (if any) must be unlocked.
 * => if canchunk is true, then we may clip the entry into a chunk
 * => "startva" and "endva" are used only if canchunk is true.  they are
 *     used to limit chunking (e.g. if you have a large space that you
 *     know you are going to need to allocate amaps for, there is no point
 *     in allowing that to be chunked)
 */

void
amap_copy(struct vm_map *map, struct vm_map_entry *entry, int flags,
    vaddr_t startva, vaddr_t endva)
{
	const int waitf = (flags & AMAP_COPY_NOWAIT) ? UVM_FLAG_NOWAIT : 0;
	struct vm_amap *amap, *srcamap;
	u_int slots, lcv;
	krwlock_t *oldlock;
	vsize_t len;

	UVMHIST_FUNC(__func__);
	UVMHIST_CALLARGS(maphist, "  (map=%#jx, entry=%#jx, flags=%#jx)",
	    (uintptr_t)map, (uintptr_t)entry, flags, -2);

	KASSERT(map != kernel_map);	/* we use nointr pool */

	srcamap = entry->aref.ar_amap;
	len = entry->end - entry->start;

	/*
	 * Is there an amap to copy?  If not, create one.
	 */

	if (srcamap == NULL) {
		const bool canchunk = (flags & AMAP_COPY_NOCHUNK) == 0;

		/*
		 * Check to see if we have a large amap that we can
		 * chunk.  We align startva/endva to chunk-sized
		 * boundaries and then clip to them.
		 */

		if (canchunk && atop(len) >= UVM_AMAP_LARGE) {
			vsize_t chunksize;

			/* Convert slots to bytes. */
			chunksize = UVM_AMAP_CHUNK << PAGE_SHIFT;
			startva = (startva / chunksize) * chunksize;
			endva = roundup(endva, chunksize);
			UVMHIST_LOG(maphist,
			    "  chunk amap ==> clip %#jx->%#jx to %#jx->%#jx",
			    entry->start, entry->end, startva, endva);
			UVM_MAP_CLIP_START(map, entry, startva);

			/* Watch out for endva wrap-around! */
			if (endva >= startva) {
				UVM_MAP_CLIP_END(map, entry, endva);
			}
		}

		if ((flags & AMAP_COPY_NOMERGE) == 0 &&
		    uvm_mapent_trymerge(map, entry, UVM_MERGE_COPYING)) {
			return;
		}

		UVMHIST_LOG(maphist, "<- done [creating new amap %#jx->%#jx]",
		    entry->start, entry->end, 0, 0);

		/*
		 * Allocate an initialised amap and install it.
		 * Note: we must update the length after clipping.
		 */
		len = entry->end - entry->start;
		entry->aref.ar_pageoff = 0;
		entry->aref.ar_amap = amap_alloc(len, 0, waitf);
		if (entry->aref.ar_amap != NULL) {
			entry->etype &= ~UVM_ET_NEEDSCOPY;
		}
		return;
	}

	/*
	 * First check and see if we are the only map entry referencing
	 * he amap we currently have.  If so, then just take it over instead
	 * of copying it.  Note that we are reading am_ref without lock held
	 * as the value can only be one if we have the only reference
	 * to the amap (via our locked map).  If the value is greater than
	 * one, then allocate amap and re-check the value.
	 */

	if (srcamap->am_ref == 1) {
		entry->etype &= ~UVM_ET_NEEDSCOPY;
		UVMHIST_LOG(maphist, "<- done [ref cnt = 1, took it over]",
		    0, 0, 0, 0);
		return;
	}

	UVMHIST_LOG(maphist,"  amap=%#jx, ref=%jd, must copy it",
	    (uintptr_t)srcamap, srcamap->am_ref, 0, 0);

	/*
	 * Allocate a new amap (note: not initialised, etc).
	 */

	AMAP_B2SLOT(slots, len);
	amap = amap_alloc1(slots, 0, waitf);
	if (amap == NULL) {
		UVMHIST_LOG(maphist, "  amap_alloc1 failed", 0,0,0,0);
		return;
	}

	/*
	 * Make the new amap share the source amap's lock, and then lock
	 * both.  We must do this before we set am_nused != 0, otherwise
	 * amap_swap_off() can become interested in the amap.
	 */

	oldlock = amap->am_lock;
	mutex_enter(&amap_list_lock);
	amap->am_lock = srcamap->am_lock;
	mutex_exit(&amap_list_lock);
	rw_obj_hold(amap->am_lock);
	rw_obj_free(oldlock);

	amap_lock(srcamap, RW_WRITER);

	/*
	 * Re-check the reference count with the lock held.  If it has
	 * dropped to one - we can take over the existing map.
	 */

	if (srcamap->am_ref == 1) {
		/* Just take over the existing amap. */
		entry->etype &= ~UVM_ET_NEEDSCOPY;
		amap_unlock(srcamap);
		/* Destroy the new (unused) amap. */
		amap->am_ref--;
		amap_free(amap);
		return;
	}

	/*
	 * Copy the slots.  Zero the padded part.
	 */

	UVMHIST_LOG(maphist, "  copying amap now",0, 0, 0, 0);
	for (lcv = 0 ; lcv < slots; lcv++) {
		amap->am_anon[lcv] =
		    srcamap->am_anon[entry->aref.ar_pageoff + lcv];
		if (amap->am_anon[lcv] == NULL)
			continue;
		KASSERT(amap->am_anon[lcv]->an_lock == srcamap->am_lock);
		KASSERT(amap->am_anon[lcv]->an_ref > 0);
		KASSERT(amap->am_nused < amap->am_maxslot);
		amap->am_anon[lcv]->an_ref++;
		amap->am_bckptr[lcv] = amap->am_nused;
		amap->am_slots[amap->am_nused] = lcv;
		amap->am_nused++;
	}
	memset(&amap->am_anon[lcv], 0,
	    (amap->am_maxslot - lcv) * sizeof(struct vm_anon *));

	/*
	 * Drop our reference to the old amap (srcamap) and unlock.
	 * Since the reference count on srcamap is greater than one,
	 * (we checked above), it cannot drop to zero while it is locked.
	 */

	srcamap->am_ref--;
	KASSERT(srcamap->am_ref > 0);

	if (srcamap->am_ref == 1 && (srcamap->am_flags & AMAP_SHARED) != 0) {
		srcamap->am_flags &= ~AMAP_SHARED;
	}
#ifdef UVM_AMAP_PPREF
	if (srcamap->am_ppref && srcamap->am_ppref != PPREF_NONE) {
		amap_pp_adjref(srcamap, entry->aref.ar_pageoff,
		    len >> PAGE_SHIFT, -1);
	}
#endif

	amap_unlock(srcamap);

	/*
	 * Install new amap.
	 */

	entry->aref.ar_pageoff = 0;
	entry->aref.ar_amap = amap;
	entry->etype &= ~UVM_ET_NEEDSCOPY;
	UVMHIST_LOG(maphist, "<- done",0, 0, 0, 0);
}

/*
 * amap_cow_now: resolve all copy-on-write faults in an amap now for fork(2)
 *
 *	called during fork(2) when the parent process has a wired map
 *	entry.   in that case we want to avoid write-protecting pages
 *	in the parent's map (e.g. like what you'd do for a COW page)
 *	so we resolve the COW here.
 *
 * => assume parent's entry was wired, thus all pages are resident.
 * => assume pages that are loaned out (loan_count) are already mapped
 *	read-only in all maps, and thus no need for us to worry about them
 * => assume both parent and child vm_map's are locked
 * => caller passes child's map/entry in to us
 * => if we run out of memory we will unlock the amap and sleep _with_ the
 *	parent and child vm_map's locked(!).    we have to do this since
 *	we are in the middle of a fork(2) and we can't let the parent
 *	map change until we are done copying all the map entrys.
 * => XXXCDC: out of memory should cause fork to fail, but there is
 *	currently no easy way to do this (needs fix)
 */

void
amap_cow_now(struct vm_map *map, struct vm_map_entry *entry)
{
	struct vm_amap *amap = entry->aref.ar_amap;
	struct vm_anon *anon, *nanon;
	struct vm_page *pg, *npg;
	u_int lcv, slot;

	/*
	 * note that if we unlock the amap then we must ReStart the "lcv" for
	 * loop because some other process could reorder the anon's in the
	 * am_anon[] array on us while the lock is dropped.
	 */

ReStart:
	amap_lock(amap, RW_WRITER);
	for (lcv = 0 ; lcv < amap->am_nused ; lcv++) {
		slot = amap->am_slots[lcv];
		anon = amap->am_anon[slot];
		KASSERT(anon->an_lock == amap->am_lock);

		/*
		 * If anon has only one reference - we must have already
		 * copied it.  This can happen if we needed to sleep waiting
		 * for memory in a previous run through this loop.  The new
		 * page might even have been paged out, since is not wired.
		 */

		if (anon->an_ref == 1) {
			KASSERT(anon->an_page != NULL || anon->an_swslot != 0);
			continue;
		}

		/*
		 * The old page must be resident since the parent is wired.
		 */

		pg = anon->an_page;
		KASSERT(pg != NULL);
		KASSERT(pg->wire_count > 0);

		/*
		 * If the page is loaned then it must already be mapped
		 * read-only and we don't need to copy it.
		 */

		if (pg->loan_count != 0) {
			continue;
		}
		KASSERT(pg->uanon == anon);
		KASSERT(pg->uobject == NULL);

		/*
		 * If the page is busy, then we have to unlock, wait for
		 * it and then restart.
		 */

		if (pg->flags & PG_BUSY) {
			uvm_pagewait(pg, amap->am_lock, "cownow");
			goto ReStart;
		}

		/*
		 * Perform a copy-on-write.
		 * First - get a new anon and a page.
		 */

		nanon = uvm_analloc();
		if (nanon) {
			nanon->an_lock = amap->am_lock;
			npg = uvm_pagealloc(NULL, 0, nanon, 0);
		} else {
			npg = NULL;
		}
		if (nanon == NULL || npg == NULL) {
			amap_unlock(amap);
			if (nanon) {
				nanon->an_lock = NULL;
				nanon->an_ref--;
				KASSERT(nanon->an_ref == 0);
				uvm_anfree(nanon);
			}
			uvm_wait("cownowpage");
			goto ReStart;
		}

		/*
		 * Copy the data and replace anon with the new one.
		 * Also, setup its lock (share the with amap's lock).
		 */

		uvm_pagecopy(pg, npg);
		anon->an_ref--;
		KASSERT(anon->an_ref > 0);
		amap->am_anon[slot] = nanon;

		/*
		 * Drop PG_BUSY on new page.  Since its owner was write
		 * locked all this time - it cannot be PG_RELEASED or
		 * waited on.
		 */
		uvm_pagelock(npg);
		uvm_pageactivate(npg);
		uvm_pageunlock(npg);
		npg->flags &= ~(PG_BUSY|PG_FAKE);
		UVM_PAGE_OWN(npg, NULL);
	}
	amap_unlock(amap);
}

/*
 * amap_splitref: split a single reference into two separate references
 *
 * => called from uvm_map's clip routines
 * => origref's map should be locked
 * => origref->ar_amap should be unlocked (we will lock)
 */
void
amap_splitref(struct vm_aref *origref, struct vm_aref *splitref, vaddr_t offset)
{
	struct vm_amap *amap = origref->ar_amap;
	u_int leftslots;

	KASSERT(splitref->ar_amap == origref->ar_amap);
	AMAP_B2SLOT(leftslots, offset);
	KASSERT(leftslots != 0);

	amap_lock(amap, RW_WRITER);
	KASSERT(amap->am_nslot - origref->ar_pageoff - leftslots > 0);

#ifdef UVM_AMAP_PPREF
	/* Establish ppref before we add a duplicate reference to the amap. */
	if (amap->am_ppref == NULL) {
		amap_pp_establish(amap, origref->ar_pageoff);
	}
#endif
	/* Note: not a share reference. */
	amap->am_ref++;
	splitref->ar_pageoff = origref->ar_pageoff + leftslots;
	amap_unlock(amap);
}

#ifdef UVM_AMAP_PPREF

/*
 * amap_pp_establish: add a ppref array to an amap, if possible.
 *
 * => amap should be locked by caller.
 */
void
amap_pp_establish(struct vm_amap *amap, vaddr_t offset)
{
	const size_t sz = amap->am_maxslot * sizeof(*amap->am_ppref);

	KASSERT(rw_write_held(amap->am_lock));

	amap->am_ppref = kmem_zalloc(sz, KM_NOSLEEP);
	if (amap->am_ppref == NULL) {
		/* Failure - just do not use ppref. */
		amap->am_ppref = PPREF_NONE;
		return;
	}
	pp_setreflen(amap->am_ppref, 0, 0, offset);
	pp_setreflen(amap->am_ppref, offset, amap->am_ref,
	    amap->am_nslot - offset);
}

/*
 * amap_pp_adjref: adjust reference count to a part of an amap using the
 * per-page reference count array.
 *
 * => caller must check that ppref != PPREF_NONE before calling.
 * => map and amap must be locked.
 */
void
amap_pp_adjref(struct vm_amap *amap, int curslot, vsize_t slotlen, int adjval)
{
	int stopslot, *ppref, lcv, prevlcv;
	int ref, len, prevref, prevlen;

	KASSERT(rw_write_held(amap->am_lock));

	stopslot = curslot + slotlen;
	ppref = amap->am_ppref;
	prevlcv = 0;

	/*
	 * Advance to the correct place in the array, fragment if needed.
	 */

	for (lcv = 0 ; lcv < curslot ; lcv += len) {
		pp_getreflen(ppref, lcv, &ref, &len);
		if (lcv + len > curslot) {     /* goes past start? */
			pp_setreflen(ppref, lcv, ref, curslot - lcv);
			pp_setreflen(ppref, curslot, ref, len - (curslot -lcv));
			len = curslot - lcv;   /* new length of entry @ lcv */
		}
		prevlcv = lcv;
	}
	if (lcv == 0) {
		/*
		 * Ensure that the "prevref == ref" test below always
		 * fails, since we are starting from the beginning of
		 * the ppref array; that is, there is no previous chunk.
		 */
		prevref = -1;
		prevlen = 0;
	} else {
		pp_getreflen(ppref, prevlcv, &prevref, &prevlen);
	}

	/*
	 * Now adjust reference counts in range.  Merge the first
	 * changed entry with the last unchanged entry if possible.
	 */
	KASSERT(lcv == curslot);
	for (/* lcv already set */; lcv < stopslot ; lcv += len) {
		pp_getreflen(ppref, lcv, &ref, &len);
		if (lcv + len > stopslot) {     /* goes past end? */
			pp_setreflen(ppref, lcv, ref, stopslot - lcv);
			pp_setreflen(ppref, stopslot, ref,
			    len - (stopslot - lcv));
			len = stopslot - lcv;
		}
		ref += adjval;
		KASSERT(ref >= 0);
		KASSERT(ref <= amap->am_ref);
		if (lcv == prevlcv + prevlen && ref == prevref) {
			pp_setreflen(ppref, prevlcv, ref, prevlen + len);
		} else {
			pp_setreflen(ppref, lcv, ref, len);
		}
		if (ref == 0) {
			amap_wiperange(amap, lcv, len);
		}
	}
}

/*
 * amap_wiperange: wipe out a range of an amap.
 * Note: different from amap_wipeout because the amap is kept intact.
 *
 * => Both map and amap must be locked by caller.
 */
void
amap_wiperange(struct vm_amap *amap, int slotoff, int slots)
{
	u_int lcv, stop, slotend;
	bool byanon;

	KASSERT(rw_write_held(amap->am_lock));

	/*
	 * We can either traverse the amap by am_anon or by am_slots.
	 * Determine which way is less expensive.
	 */

	if (slots < amap->am_nused) {
		byanon = true;
		lcv = slotoff;
		stop = slotoff + slots;
		slotend = 0;
	} else {
		byanon = false;
		lcv = 0;
		stop = amap->am_nused;
		slotend = slotoff + slots;
	}

	while (lcv < stop) {
		struct vm_anon *anon;
		u_int curslot, ptr, last;

		if (byanon) {
			curslot = lcv++;	/* lcv advances here */
			if (amap->am_anon[curslot] == NULL)
				continue;
		} else {
			curslot = amap->am_slots[lcv];
			if (curslot < slotoff || curslot >= slotend) {
				lcv++;		/* lcv advances here */
				continue;
			}
			stop--;	/* drop stop, since anon will be removed */
		}
		anon = amap->am_anon[curslot];
		KASSERT(anon->an_lock == amap->am_lock);

		/*
		 * Remove anon from the amap.
		 */

		amap->am_anon[curslot] = NULL;
		ptr = amap->am_bckptr[curslot];
		last = amap->am_nused - 1;
		if (ptr != last) {
			amap->am_slots[ptr] = amap->am_slots[last];
			amap->am_bckptr[amap->am_slots[ptr]] = ptr;
		}
		amap->am_nused--;

		/*
		 * Drop its reference count.
		 */

		KASSERT(anon->an_lock == amap->am_lock);
		if (--anon->an_ref == 0) {
			uvm_anfree(anon);
		}
	}
}

#endif

#if defined(VMSWAP)

/*
 * amap_swap_off: pagein anonymous pages in amaps and drop swap slots.
 *
 * => called with swap_syscall_lock held.
 * => note that we don't always traverse all anons.
 *    eg. amaps being wiped out, released anons.
 * => return true if failed.
 */

bool
amap_swap_off(int startslot, int endslot)
{
	struct vm_amap *am;
	struct vm_amap *am_next;
	struct vm_amap marker_prev;
	struct vm_amap marker_next;
	bool rv = false;

#if defined(DIAGNOSTIC)
	memset(&marker_prev, 0, sizeof(marker_prev));
	memset(&marker_next, 0, sizeof(marker_next));
#endif /* defined(DIAGNOSTIC) */

	mutex_enter(&amap_list_lock);
	for (am = LIST_FIRST(&amap_list); am != NULL && !rv; am = am_next) {
		int i;

		LIST_INSERT_BEFORE(am, &marker_prev, am_list);
		LIST_INSERT_AFTER(am, &marker_next, am_list);

		/* amap_list_lock prevents the lock pointer from changing. */
		if (!amap_lock_try(am, RW_WRITER)) {
			(void)kpause("amapswpo", false, 1, &amap_list_lock);
			am_next = LIST_NEXT(&marker_prev, am_list);
			if (am_next == &marker_next) {
				am_next = LIST_NEXT(am_next, am_list);
			} else {
				KASSERT(LIST_NEXT(am_next, am_list) ==
				    &marker_next);
			}
			LIST_REMOVE(&marker_prev, am_list);
			LIST_REMOVE(&marker_next, am_list);
			continue;
		}

		mutex_exit(&amap_list_lock);

		/* If am_nused == 0, the amap could be free - careful. */
		for (i = 0; i < am->am_nused; i++) {
			int slot;
			int swslot;
			struct vm_anon *anon;

			slot = am->am_slots[i];
			anon = am->am_anon[slot];
			KASSERT(anon->an_lock == am->am_lock);

			swslot = anon->an_swslot;
			if (swslot < startslot || endslot <= swslot) {
				continue;
			}

			am->am_flags |= AMAP_SWAPOFF;

			rv = uvm_anon_pagein(am, anon);
			amap_lock(am, RW_WRITER);

			am->am_flags &= ~AMAP_SWAPOFF;
			if (amap_refs(am) == 0) {
				amap_wipeout(am);
				am = NULL;
				break;
			}
			if (rv) {
				break;
			}
			i = 0;
		}

		if (am) {
			amap_unlock(am);
		}

		mutex_enter(&amap_list_lock);
		KASSERT(LIST_NEXT(&marker_prev, am_list) == &marker_next ||
		    LIST_NEXT(LIST_NEXT(&marker_prev, am_list), am_list) ==
		    &marker_next);
		am_next = LIST_NEXT(&marker_next, am_list);
		LIST_REMOVE(&marker_prev, am_list);
		LIST_REMOVE(&marker_next, am_list);
	}
	mutex_exit(&amap_list_lock);

	return rv;
}

#endif /* defined(VMSWAP) */

/*
 * amap_lookup: look up a page in an amap.
 *
 * => amap should be locked by caller.
 */
struct vm_anon *
amap_lookup(struct vm_aref *aref, vaddr_t offset)
{
	struct vm_amap *amap = aref->ar_amap;
	struct vm_anon *an;
	u_int slot;

	UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);
	KASSERT(rw_lock_held(amap->am_lock));

	AMAP_B2SLOT(slot, offset);
	slot += aref->ar_pageoff;
	an = amap->am_anon[slot];

	UVMHIST_LOG(maphist,
	    "<- done (amap=%#jx, offset=%#jx, result=%#jx)",
	    (uintptr_t)amap, offset, (uintptr_t)an, 0);

	KASSERT(slot < amap->am_nslot);
	KASSERT(an == NULL || an->an_ref != 0);
	KASSERT(an == NULL || an->an_lock == amap->am_lock);
	return an;
}

/*
 * amap_lookups: look up a range of pages in an amap.
 *
 * => amap should be locked by caller.
 */
void
amap_lookups(struct vm_aref *aref, vaddr_t offset, struct vm_anon **anons,
    int npages)
{
	struct vm_amap *amap = aref->ar_amap;
	u_int slot;

	UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);
	KASSERT(rw_lock_held(amap->am_lock));

	AMAP_B2SLOT(slot, offset);
	slot += aref->ar_pageoff;

	UVMHIST_LOG(maphist, "  slot=%u, npages=%d, nslot=%d",
	    slot, npages, amap->am_nslot, 0);

	KASSERT((slot + (npages - 1)) < amap->am_nslot);
	memcpy(anons, &amap->am_anon[slot], npages * sizeof(struct vm_anon *));

#if defined(DIAGNOSTIC)
	for (int i = 0; i < npages; i++) {
		struct vm_anon * const an = anons[i];
		if (an == NULL) {
			continue;
		}
		KASSERT(an->an_ref != 0);
		KASSERT(an->an_lock == amap->am_lock);
	}
#endif
	UVMHIST_LOG(maphist, "<- done", 0, 0, 0, 0);
}

/*
 * amap_add: add (or replace) a page to an amap.
 *
 * => amap should be locked by caller.
 * => anon must have the lock associated with this amap.
 */
void
amap_add(struct vm_aref *aref, vaddr_t offset, struct vm_anon *anon,
    bool replace)
{
	struct vm_amap *amap = aref->ar_amap;
	u_int slot;

	UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);
	KASSERT(rw_write_held(amap->am_lock));
	KASSERT(anon->an_lock == amap->am_lock);

	AMAP_B2SLOT(slot, offset);
	slot += aref->ar_pageoff;
	KASSERT(slot < amap->am_nslot);

	if (replace) {
		struct vm_anon *oanon = amap->am_anon[slot];

		KASSERT(oanon != NULL);
		if (oanon->an_page && (amap->am_flags & AMAP_SHARED) != 0) {
			pmap_page_protect(oanon->an_page, VM_PROT_NONE);
			/*
			 * XXX: suppose page is supposed to be wired somewhere?
			 */
		}
	} else {
		KASSERT(amap->am_anon[slot] == NULL);
		KASSERT(amap->am_nused < amap->am_maxslot);
		amap->am_bckptr[slot] = amap->am_nused;
		amap->am_slots[amap->am_nused] = slot;
		amap->am_nused++;
	}
	amap->am_anon[slot] = anon;
	UVMHIST_LOG(maphist,
	    "<- done (amap=%#jx, offset=%#x, anon=%#jx, rep=%d)",
	    (uintptr_t)amap, offset, (uintptr_t)anon, replace);
}

/*
 * amap_unadd: remove a page from an amap.
 *
 * => amap should be locked by caller.
 */
void
amap_unadd(struct vm_aref *aref, vaddr_t offset)
{
	struct vm_amap *amap = aref->ar_amap;
	u_int slot, ptr, last;

	UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);
	KASSERT(rw_write_held(amap->am_lock));

	AMAP_B2SLOT(slot, offset);
	slot += aref->ar_pageoff;
	KASSERT(slot < amap->am_nslot);
	KASSERT(amap->am_anon[slot] != NULL);
	KASSERT(amap->am_anon[slot]->an_lock == amap->am_lock);

	amap->am_anon[slot] = NULL;
	ptr = amap->am_bckptr[slot];

	last = amap->am_nused - 1;
	if (ptr != last) {
		/* Move the last entry to keep the slots contiguous. */
		amap->am_slots[ptr] = amap->am_slots[last];
		amap->am_bckptr[amap->am_slots[ptr]] = ptr;
	}
	amap->am_nused--;
	UVMHIST_LOG(maphist, "<- done (amap=%#jx, slot=%#jx)",
	    (uintptr_t)amap, slot,0, 0);
}

/*
 * amap_adjref_anons: adjust the reference count(s) on amap and its anons.
 */
static void
amap_adjref_anons(struct vm_amap *amap, vaddr_t offset, vsize_t len,
    int refv, bool all)
{

#ifdef UVM_AMAP_PPREF
	KASSERT(rw_write_held(amap->am_lock));

	/*
	 * We must establish the ppref array before changing am_ref
	 * so that the ppref values match the current amap refcount.
	 */

	if (amap->am_ppref == NULL) {
		amap_pp_establish(amap, offset);
	}
#endif

	amap->am_ref += refv;

#ifdef UVM_AMAP_PPREF
	if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
		amap_pp_adjref(amap, offset, len, refv);
	}
#endif
	amap_unlock(amap);
}

/*
 * amap_ref: gain a reference to an amap.
 *
 * => amap must not be locked (we will lock).
 * => "offset" and "len" are in units of pages.
 * => Called at fork time to gain the child's reference.
 */
void
amap_ref(struct vm_amap *amap, vaddr_t offset, vsize_t len, int flags)
{
	UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);

	amap_lock(amap, RW_WRITER);
	if (flags & AMAP_SHARED) {
		amap->am_flags |= AMAP_SHARED;
	}
	amap_adjref_anons(amap, offset, len, 1, (flags & AMAP_REFALL) != 0);

	UVMHIST_LOG(maphist,"<- done!  amap=%#jx", (uintptr_t)amap, 0, 0, 0);
}

/*
 * amap_unref: remove a reference to an amap.
 *
 * => All pmap-level references to this amap must be already removed.
 * => Called from uvm_unmap_detach(); entry is already removed from the map.
 * => We will lock amap, so it must be unlocked.
 */
void
amap_unref(struct vm_amap *amap, vaddr_t offset, vsize_t len, bool all)
{
	UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);

	amap_lock(amap, RW_WRITER);

	UVMHIST_LOG(maphist,"  amap=%#jx  refs=%d, nused=%d",
	    (uintptr_t)amap, amap->am_ref, amap->am_nused, 0);
	KASSERT(amap->am_ref > 0);

	if (amap->am_ref == 1) {

		/*
		 * If the last reference - wipeout and destroy the amap.
		 */
		amap->am_ref--;
		amap_wipeout(amap);
		UVMHIST_LOG(maphist,"<- done (was last ref)!", 0, 0, 0, 0);
		return;
	}

	/*
	 * Otherwise, drop the reference count(s) on anons.
	 */

	if (amap->am_ref == 2 && (amap->am_flags & AMAP_SHARED) != 0) {
		amap->am_flags &= ~AMAP_SHARED;
	}
	amap_adjref_anons(amap, offset, len, -1, all);

	UVMHIST_LOG(maphist,"<- done!", 0, 0, 0, 0);
}