NetBSD/sys/arch/i386/i386/pmap.c

3730 lines
93 KiB
C

/* $NetBSD: pmap.c,v 1.78 1999/07/28 06:54:41 thorpej Exp $ */
/*
*
* Copyright (c) 1997 Charles D. Cranor and Washington University.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Charles D. Cranor and
* Washington University.
* 4. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/*
* pmap.c: i386 pmap module rewrite
* Chuck Cranor <chuck@ccrc.wustl.edu>
* 11-Aug-97
*
* history of this pmap module: in addition to my own input, i used
* the following references for this rewrite of the i386 pmap:
*
* [1] the NetBSD i386 pmap. this pmap appears to be based on the
* BSD hp300 pmap done by Mike Hibler at University of Utah.
* it was then ported to the i386 by William Jolitz of UUNET
* Technologies, Inc. Then Charles M. Hannum of the NetBSD
* project fixed some bugs and provided some speed ups.
*
* [2] the FreeBSD i386 pmap. this pmap seems to be the
* Hibler/Jolitz pmap, as modified for FreeBSD by John S. Dyson
* and David Greenman.
*
* [3] the Mach pmap. this pmap, from CMU, seems to have migrated
* between several processors. the VAX version was done by
* Avadis Tevanian, Jr., and Michael Wayne Young. the i386
* version was done by Lance Berc, Mike Kupfer, Bob Baron,
* David Golub, and Richard Draves. the alpha version was
* done by Alessandro Forin (CMU/Mach) and Chris Demetriou
* (NetBSD/alpha).
*/
#include "opt_cputype.h"
#include "opt_user_ldt.h"
#include "opt_lockdebug.h"
#include "opt_multiprocessor.h"
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/malloc.h>
#include <sys/pool.h>
#include <sys/user.h>
#include <vm/vm.h>
#include <vm/vm_kern.h>
#include <vm/vm_page.h>
#include <uvm/uvm.h>
#include <machine/cpu.h>
#include <machine/specialreg.h>
#include <machine/gdt.h>
#include <dev/isa/isareg.h>
#include <machine/isa_machdep.h>
/*
* general info:
*
* - for an explanation of how the i386 MMU hardware works see
* the comments in <machine/pte.h>.
*
* - for an explanation of the general memory structure used by
* this pmap (including the recursive mapping), see the comments
* in <machine/pmap.h>.
*
* this file contains the code for the "pmap module." the module's
* job is to manage the hardware's virtual to physical address mappings.
* note that there are two levels of mapping in the VM system:
*
* [1] the upper layer of the VM system uses vm_map's and vm_map_entry's
* to map ranges of virtual address space to objects/files. for
* example, the vm_map may say: "map VA 0x1000 to 0x22000 read-only
* to the file /bin/ls starting at offset zero." note that
* the upper layer mapping is not concerned with how individual
* vm_pages are mapped.
*
* [2] the lower layer of the VM system (the pmap) maintains the mappings
* from virtual addresses. it is concerned with which vm_page is
* mapped where. for example, when you run /bin/ls and start
* at page 0x1000 the fault routine may lookup the correct page
* of the /bin/ls file and then ask the pmap layer to establish
* a mapping for it.
*
* note that information in the lower layer of the VM system can be
* thrown away since it can easily be reconstructed from the info
* in the upper layer.
*
* data structures we use include:
*
* - struct pmap: describes the address space of one thread
* - struct pv_entry: describes one <PMAP,VA> mapping of a PA
* - struct pv_head: there is one pv_head per managed page of
* physical memory. the pv_head points to a list of pv_entry
* structures which describe all the <PMAP,VA> pairs that this
* page is mapped in. this is critical for page based operations
* such as pmap_page_protect() [change protection on _all_ mappings
* of a page]
* - pv_page/pv_page_info: pv_entry's are allocated out of pv_page's.
* if we run out of pv_entry's we allocate a new pv_page and free
* its pv_entrys.
* - pmap_remove_record: a list of virtual addresses whose mappings
* have been changed. used for TLB flushing.
*/
/*
* memory allocation
*
* - there are three data structures that we must dynamically allocate:
*
* [A] new process' page directory page (PDP)
* - plan 1: done at pmap_pinit() we use
* uvm_km_alloc(kernel_map, NBPG) [fka kmem_alloc] to do this
* allocation.
*
* if we are low in free physical memory then we sleep in
* uvm_km_alloc -- in this case this is ok since we are creating
* a new pmap and should not be holding any locks.
*
* if the kernel is totally out of virtual space
* (i.e. uvm_km_alloc returns NULL), then we panic.
*
* XXX: the fork code currently has no way to return an "out of
* memory, try again" error code since uvm_fork [fka vm_fork]
* is a void function.
*
* [B] new page tables pages (PTP)
* - plan 1: call uvm_pagealloc()
* => success: zero page, add to pm_pdir
* => failure: we are out of free vm_pages
* - plan 2: using a linked LIST of active pmaps we attempt
* to "steal" a PTP from another process. we lock
* the target pmap with simple_lock_try so that if it is
* busy we do not block.
* => success: remove old mappings, zero, add to pm_pdir
* => failure: highly unlikely
* - plan 3: panic
*
* note: for kernel PTPs, we start with NKPTP of them. as we map
* kernel memory (at uvm_map time) we check to see if we've grown
* the kernel pmap. if so, we call the optional function
* pmap_growkernel() to grow the kernel PTPs in advance.
*
* [C] pv_entry structures
* - plan 1: try to allocate one off the free list
* => success: done!
* => failure: no more free pv_entrys on the list
* - plan 2: try to allocate a new pv_page to add a chunk of
* pv_entrys to the free list
* [a] obtain a free, unmapped, VA in kmem_map. either
* we have one saved from a previous call, or we allocate
* one now using a "vm_map_lock_try" in uvm_map
* => success: we have an unmapped VA, continue to [b]
* => failure: unable to lock kmem_map or out of VA in it.
* move on to plan 3.
* [b] allocate a page in kmem_object for the VA
* => success: map it in, free the pv_entry's, DONE!
* => failure: kmem_object locked, no free vm_pages, etc.
* save VA for later call to [a], go to plan 3.
* - plan 3: using the pv_entry/pv_head lists find a pv_entry
* structure that is part of a non-kernel lockable pmap
* and "steal" that pv_entry by removing the mapping
* and reusing that pv_entry.
* => success: done
* => failure: highly unlikely: unable to lock and steal
* pv_entry
* - plan 4: we panic.
*/
/*
* locking
*
* we have the following locks that we must contend with:
*
* "normal" locks:
*
* - pmap_main_lock
* this lock is used to prevent deadlock and/or provide mutex
* access to the pmap system. most operations lock the pmap
* structure first, then they lock the pv_lists (if needed).
* however, some operations such as pmap_page_protect lock
* the pv_lists and then lock pmaps. in order to prevent a
* cycle, we require a mutex lock when locking the pv_lists
* first. thus, the "pmap = >pv_list" lockers must gain a
* read-lock on pmap_main_lock before locking the pmap. and
* the "pv_list => pmap" lockers must gain a write-lock on
* pmap_main_lock before locking. since only one thread
* can write-lock a lock at a time, this provides mutex.
*
* "simple" locks:
*
* - pmap lock (per pmap, part of uvm_object)
* this lock protects the fields in the pmap structure including
* the non-kernel PDEs in the PDP, and the PTEs. it also locks
* in the alternate PTE space (since that is determined by the
* entry in the PDP).
*
* - pvh_lock (per pv_head)
* this lock protects the pv_entry list which is chained off the
* pv_head structure for a specific managed PA. it is locked
* when traversing the list (e.g. adding/removing mappings,
* syncing R/M bits, etc.)
*
* - pvalloc_lock
* this lock protects the data structures which are used to manage
* the free list of pv_entry structures.
*
* - pmaps_lock
* this lock protects the list of active pmaps (headed by "pmaps").
* we lock it when adding or removing pmaps from this list.
*
* - pmap_copy_page_lock
* locks the tmp kernel PTE mappings we used to copy data
*
* - pmap_zero_page_lock
* locks the tmp kernel PTE mapping we use to zero a page
*
* - pmap_tmpptp_lock
* locks the tmp kernel PTE mapping we use to look at a PTP
* in another process
*
* XXX: would be nice to have per-CPU VAs for the above 4
*/
/*
* locking data structures
*/
#if defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
struct lock pmap_main_lock;
simple_lock_data_t pvalloc_lock;
simple_lock_data_t pmaps_lock;
simple_lock_data_t pmap_copy_page_lock;
simple_lock_data_t pmap_zero_page_lock;
simple_lock_data_t pmap_tmpptp_lock;
#define PMAP_MAP_TO_HEAD_LOCK() \
lockmgr(&pmap_main_lock, LK_SHARED, (void *) 0)
#define PMAP_MAP_TO_HEAD_UNLOCK() \
lockmgr(&pmap_main_lock, LK_RELEASE, (void *) 0)
#define PMAP_HEAD_TO_MAP_LOCK() \
lockmgr(&pmap_main_lock, LK_EXCLUSIVE, (void *) 0)
#define PMAP_HEAD_TO_MAP_UNLOCK() \
lockmgr(&pmap_main_lock, LK_RELEASE, (void *) 0)
#else
#define PMAP_MAP_TO_HEAD_LOCK() /* null */
#define PMAP_MAP_TO_HEAD_UNLOCK() /* null */
#define PMAP_HEAD_TO_MAP_LOCK() /* null */
#define PMAP_HEAD_TO_MAP_UNLOCK() /* null */
#endif
/*
* global data structures
*/
struct pmap kernel_pmap_store; /* the kernel's pmap (proc0) */
/*
* nkpde is the number of kernel PTPs allocated for the kernel at
* boot time (NKPTP is a compile time override). this number can
* grow dynamically as needed (but once allocated, we never free
* kernel PTPs).
*/
int nkpde = NKPTP;
#ifdef NKPDE
#error "obsolete NKPDE: use NKPTP"
#endif
/*
* pmap_pg_g: if our processor supports PG_G in the PTE then we
* set pmap_pg_g to PG_G (otherwise it is zero).
*/
int pmap_pg_g = 0;
/*
* i386 physical memory comes in a big contig chunk with a small
* hole toward the front of it... the following 4 paddr_t's
* (shared with machdep.c) describe the physical address space
* of this machine.
*/
paddr_t avail_start; /* PA of first available physical page */
paddr_t avail_end; /* PA of last available physical page */
paddr_t hole_start; /* PA of start of "hole" */
paddr_t hole_end; /* PA of end of "hole" */
/*
* other data structures
*/
static pt_entry_t protection_codes[8]; /* maps MI prot to i386 prot code */
static boolean_t pmap_initialized = FALSE; /* pmap_init done yet? */
/*
* the following two vaddr_t's are used during system startup
* to keep track of how much of the kernel's VM space we have used.
* once the system is started, the management of the remaining kernel
* VM space is turned over to the kernel_map vm_map.
*/
static vaddr_t virtual_avail; /* VA of first free KVA */
static vaddr_t virtual_end; /* VA of last free KVA */
/*
* pv_page management structures: locked by pvalloc_lock
*/
TAILQ_HEAD(pv_pagelist, pv_page);
static struct pv_pagelist pv_freepages; /* list of pv_pages with free entrys */
static struct pv_pagelist pv_unusedpgs; /* list of unused pv_pages */
static int pv_nfpvents; /* # of free pv entries */
static struct pv_page *pv_initpage; /* bootstrap page from kernel_map */
static vaddr_t pv_cachedva; /* cached VA for later use */
#define PVE_LOWAT (PVE_PER_PVPAGE / 2) /* free pv_entry low water mark */
#define PVE_HIWAT (PVE_LOWAT + (PVE_PER_PVPAGE * 2))
/* high water mark */
/*
* linked list of all non-kernel pmaps
*/
static struct pmap_head pmaps;
static struct pmap *pmaps_hand = NULL; /* used by pmap_steal_ptp */
/*
* pool that pmap structures are allocated from
*/
struct pool pmap_pmap_pool;
/*
* special VAs and the PTEs that map them
*/
static pt_entry_t *csrc_pte, *cdst_pte, *zero_pte, *ptp_pte;
static caddr_t csrcp, cdstp, zerop, ptpp;
caddr_t vmmap; /* XXX: used by mem.c... it should really uvm_map_reserve it */
extern vaddr_t msgbuf_vaddr;
extern paddr_t msgbuf_paddr;
extern vaddr_t idt_vaddr; /* we allocate IDT early */
extern paddr_t idt_paddr;
#if defined(I586_CPU)
/* stuff to fix the pentium f00f bug */
extern vaddr_t pentium_idt_vaddr;
#endif
/*
* local prototypes
*/
static struct pv_entry *pmap_add_pvpage __P((struct pv_page *, boolean_t));
static struct vm_page *pmap_alloc_ptp __P((struct pmap *, int, boolean_t));
static struct pv_entry *pmap_alloc_pv __P((struct pmap *, int)); /* see codes below */
#define ALLOCPV_NEED 0 /* need PV now */
#define ALLOCPV_TRY 1 /* just try to allocate, don't steal */
#define ALLOCPV_NONEED 2 /* don't need PV, just growing cache */
static struct pv_entry *pmap_alloc_pvpage __P((struct pmap *, int));
static void pmap_enter_pv __P((struct pv_head *,
struct pv_entry *, struct pmap *,
vaddr_t, struct vm_page *));
static void pmap_free_pv __P((struct pmap *, struct pv_entry *));
static void pmap_free_pvs __P((struct pmap *, struct pv_entry *));
static void pmap_free_pv_doit __P((struct pv_entry *));
static void pmap_free_pvpage __P((void));
static struct vm_page *pmap_get_ptp __P((struct pmap *, int, boolean_t));
static boolean_t pmap_is_curpmap __P((struct pmap *));
static void pmap_kremove1 __P((vaddr_t));
static pt_entry_t *pmap_map_ptes __P((struct pmap *));
static struct pv_entry *pmap_remove_pv __P((struct pv_head *, struct pmap *,
vaddr_t));
static boolean_t pmap_remove_pte __P((struct pmap *, struct vm_page *,
pt_entry_t *, vaddr_t));
static void pmap_remove_ptes __P((struct pmap *,
struct pmap_remove_record *,
struct vm_page *, vaddr_t,
vaddr_t, vaddr_t));
static struct vm_page *pmap_steal_ptp __P((struct uvm_object *,
vaddr_t));
static vaddr_t pmap_tmpmap_pa __P((paddr_t));
static pt_entry_t *pmap_tmpmap_pvepte __P((struct pv_entry *));
static void pmap_tmpunmap_pa __P((void));
static void pmap_tmpunmap_pvepte __P((struct pv_entry *));
static boolean_t pmap_transfer_ptes __P((struct pmap *,
struct pmap_transfer_location *,
struct pmap *,
struct pmap_transfer_location *,
int, boolean_t));
static boolean_t pmap_try_steal_pv __P((struct pv_head *,
struct pv_entry *,
struct pv_entry *));
static void pmap_unmap_ptes __P((struct pmap *));
void pmap_kenter_pa0 __P((vaddr_t, paddr_t, vm_prot_t,
boolean_t));
void pmap_pinit __P((pmap_t));
void pmap_release __P((pmap_t));
/*
* p m a p i n l i n e h e l p e r f u n c t i o n s
*/
/*
* pmap_is_curpmap: is this pmap the one currently loaded [in %cr3]?
* of course the kernel is always loaded
*/
__inline static boolean_t
pmap_is_curpmap(pmap)
struct pmap *pmap;
{
return((pmap == pmap_kernel()) ||
(pmap->pm_pdirpa == (paddr_t) rcr3()));
}
/*
* pmap_tmpmap_pa: map a page in for tmp usage
*
* => returns with pmap_tmpptp_lock held
*/
__inline static vaddr_t
pmap_tmpmap_pa(pa)
paddr_t pa;
{
simple_lock(&pmap_tmpptp_lock);
#if defined(DIAGNOSTIC)
if (*ptp_pte)
panic("pmap_tmpmap_pa: ptp_pte in use?");
#endif
*ptp_pte = PG_V | PG_RW | pa; /* always a new mapping */
return((vaddr_t)ptpp);
}
/*
* pmap_tmpunmap_pa: unmap a tmp use page (undoes pmap_tmpmap_pa)
*
* => we release pmap_tmpptp_lock
*/
__inline static void
pmap_tmpunmap_pa()
{
#if defined(DIAGNOSTIC)
if (!pmap_valid_entry(*ptp_pte))
panic("pmap_tmpunmap_pa: our pte invalid?");
#endif
*ptp_pte = 0; /* zap! */
pmap_update_pg((vaddr_t)ptpp);
simple_unlock(&pmap_tmpptp_lock);
}
/*
* pmap_tmpmap_pvepte: get a quick mapping of a PTE for a pv_entry
*
* => do NOT use this on kernel mappings [why? because pv_ptp may be NULL]
* => we may grab pmap_tmpptp_lock and return with it held
*/
__inline static pt_entry_t *
pmap_tmpmap_pvepte(pve)
struct pv_entry *pve;
{
#ifdef DIAGNOSTIC
if (pve->pv_pmap == pmap_kernel())
panic("pmap_tmpmap_pvepte: attempt to map kernel");
#endif
/* is it current pmap? use direct mapping... */
if (pmap_is_curpmap(pve->pv_pmap))
return(vtopte(pve->pv_va));
return(((pt_entry_t *)pmap_tmpmap_pa(VM_PAGE_TO_PHYS(pve->pv_ptp)))
+ ptei((unsigned)pve->pv_va));
}
/*
* pmap_tmpunmap_pvepte: release a mapping obtained with pmap_tmpmap_pvepte
*
* => we will release pmap_tmpptp_lock if we hold it
*/
__inline static void
pmap_tmpunmap_pvepte(pve)
struct pv_entry *pve;
{
/* was it current pmap? if so, return */
if (pmap_is_curpmap(pve->pv_pmap))
return;
pmap_tmpunmap_pa();
}
/*
* pmap_map_ptes: map a pmap's PTEs into KVM and lock them in
*
* => we lock enough pmaps to keep things locked in
* => must be undone with pmap_unmap_ptes before returning
*/
__inline static pt_entry_t *
pmap_map_ptes(pmap)
struct pmap *pmap;
{
pd_entry_t opde;
/* if curpmap then we are always mapped */
if (pmap_is_curpmap(pmap)) {
simple_lock(&pmap->pm_obj.vmobjlock);
return(PTE_BASE);
}
/* need to lock both curpmap and pmap: use ordered locking */
if ((unsigned) pmap < (unsigned) curpcb->pcb_pmap) {
simple_lock(&pmap->pm_obj.vmobjlock);
simple_lock(&curpcb->pcb_pmap->pm_obj.vmobjlock);
} else {
simple_lock(&curpcb->pcb_pmap->pm_obj.vmobjlock);
simple_lock(&pmap->pm_obj.vmobjlock);
}
/* need to load a new alternate pt space into curpmap? */
opde = *APDP_PDE;
if (!pmap_valid_entry(opde) || (opde & PG_FRAME) != pmap->pm_pdirpa) {
*APDP_PDE = (pd_entry_t) (pmap->pm_pdirpa | PG_RW | PG_V);
if (pmap_valid_entry(opde))
pmap_update();
}
return(APTE_BASE);
}
/*
* pmap_unmap_ptes: unlock the PTE mapping of "pmap"
*/
__inline static void
pmap_unmap_ptes(pmap)
struct pmap *pmap;
{
if (pmap_is_curpmap(pmap)) {
simple_unlock(&pmap->pm_obj.vmobjlock);
} else {
simple_unlock(&pmap->pm_obj.vmobjlock);
simple_unlock(&curpcb->pcb_pmap->pm_obj.vmobjlock);
}
}
/*
* p m a p k e n t e r f u n c t i o n s
*
* functions to quickly enter/remove pages from the kernel address
* space. pmap_kremove/pmap_kenter_pgs are exported to MI kernel.
* we make use of the recursive PTE mappings.
*/
/*
* pmap_kenter_pa: enter a kernel mapping without R/M (pv_entry) tracking
*
* => no need to lock anything, assume va is already allocated
* => should be faster than normal pmap enter function
*/
void
pmap_kenter_pa(va, pa, prot)
vaddr_t va;
paddr_t pa;
vm_prot_t prot;
{
pmap_kenter_pa0(va, pa, prot, FALSE);
}
void
pmap_kenter_pa0(va, pa, prot, islocked)
vaddr_t va;
paddr_t pa;
vm_prot_t prot;
boolean_t islocked;
{
struct pmap *pm = pmap_kernel();
pt_entry_t *pte, opte;
int s;
if (islocked == FALSE) {
s = splimp();
simple_lock(&pm->pm_obj.vmobjlock);
}
pm->pm_stats.resident_count++;
pm->pm_stats.wired_count++;
if (islocked == FALSE) {
simple_unlock(&pm->pm_obj.vmobjlock);
splx(s);
}
pte = vtopte(va);
opte = *pte;
*pte = pa | ((prot & VM_PROT_WRITE)? PG_RW : PG_RO) |
PG_V | pmap_pg_g; /* zap! */
if (pmap_valid_entry(opte))
pmap_update_pg(va);
}
/*
* pmap_kremove: remove a kernel mapping(s) without R/M (pv_entry) tracking
*
* => no need to lock anything
* => caller must dispose of any vm_page mapped in the va range
* => note: not an inline function
* => we assume the va is page aligned and the len is a multiple of NBPG
* => we assume kernel only unmaps valid addresses and thus don't bother
* checking the valid bit before doing TLB flushing
*/
void
pmap_kremove(va, len)
vaddr_t va;
vsize_t len;
{
struct pmap *pm = pmap_kernel();
pt_entry_t *pte;
int s;
len = len / NBPG;
s = splimp();
simple_lock(&pm->pm_obj.vmobjlock);
for ( /* null */ ; len ; len--, va += NBPG) {
pte = vtopte(va);
#ifdef DIAGNOSTIC
if (*pte & PG_PVLIST)
panic("pmap_kremove: PG_PVLIST mapping for 0x%lx\n",
va);
#endif
pm->pm_stats.resident_count--;
pm->pm_stats.wired_count--;
*pte = 0; /* zap! */
#if defined(I386_CPU)
if (cpu_class != CPUCLASS_386)
#endif
pmap_update_pg(va);
}
simple_unlock(&pm->pm_obj.vmobjlock);
splx(s);
#if defined(I386_CPU)
if (cpu_class == CPUCLASS_386)
pmap_update();
#endif
}
/*
* pmap_kremove1: inline one page version of above
*/
__inline static void
pmap_kremove1(va)
vaddr_t va;
{
struct pmap *pm = pmap_kernel();
pt_entry_t *pte;
int s;
s = splimp();
simple_lock(&pm->pm_obj.vmobjlock);
pm->pm_stats.resident_count--;
pm->pm_stats.wired_count--;
pte = vtopte(va);
*pte = 0; /* zap! */
pmap_update_pg(va);
simple_unlock(&pm->pm_obj.vmobjlock);
splx(s);
}
/*
* pmap_kenter_pgs: enter in a number of vm_pages
*/
void
pmap_kenter_pgs(va, pgs, npgs)
vaddr_t va;
struct vm_page **pgs;
int npgs;
{
struct pmap *pm = pmap_kernel();
pt_entry_t *pte, opte;
int s, lcv;
vaddr_t tva;
#if defined(I386_CPU)
boolean_t need_update = FALSE;
#endif
s = splimp();
simple_lock(&pm->pm_obj.vmobjlock);
pm->pm_stats.resident_count += npgs;
pm->pm_stats.wired_count += npgs;
simple_unlock(&pm->pm_obj.vmobjlock);
splx(s);
for (lcv = 0 ; lcv < npgs ; lcv++) {
tva = va + lcv * NBPG;
pte = vtopte(tva);
opte = *pte;
*pte = VM_PAGE_TO_PHYS(pgs[lcv]) | PG_RW | PG_V | pmap_pg_g;
#if defined(I386_CPU)
if (cpu_class == CPUCLASS_386) {
if (pmap_valid_entry(opte))
need_update = TRUE;
continue;
}
#endif
if (pmap_valid_entry(opte))
pmap_update_pg(tva);
}
#if defined(I386_CPU)
if (need_update && cpu_class == CPUCLASS_386)
pmap_update();
#endif
}
/*
* p m a p i n i t f u n c t i o n s
*
* pmap_bootstrap and pmap_init are called during system startup
* to init the pmap module. pmap_bootstrap() does a low level
* init just to get things rolling. pmap_init() finishes the job.
*/
/*
* pmap_bootstrap: get the system in a state where it can run with VM
* properly enabled (called before main()). the VM system is
* fully init'd later...
*
* => on i386, locore.s has already enabled the MMU by allocating
* a PDP for the kernel, and nkpde PTP's for the kernel.
* => kva_start is the first free virtual address in kernel space
* => we make use of the global vars from machdep.c:
* avail_start, avail_end, hole_start, hole_end
*/
void
pmap_bootstrap(kva_start)
vaddr_t kva_start;
{
struct pmap *kpm;
vaddr_t kva;
pt_entry_t *pte;
int first16q;
/*
* set the page size (default value is 4K which is ok)
*/
uvm_setpagesize();
/*
* a quick sanity check
*/
if (PAGE_SIZE != NBPG)
panic("pmap_bootstrap: PAGE_SIZE != NBPG");
/*
* use the very last page of physical memory for the message buffer
*/
avail_end -= i386_round_page(MSGBUFSIZE);
msgbuf_paddr = avail_end;
/*
* set up our local static global vars that keep track of the
* usage of KVM before kernel_map is set up
*/
virtual_avail = kva_start; /* first free KVA */
virtual_end = VM_MAX_KERNEL_ADDRESS; /* last KVA */
/*
* set up protection_codes: we need to be able to convert from
* a MI protection code (some combo of VM_PROT...) to something
* we can jam into a i386 PTE.
*/
protection_codes[VM_PROT_NONE] = 0; /* --- */
protection_codes[VM_PROT_EXECUTE] = PG_RO; /* --x */
protection_codes[VM_PROT_READ] = PG_RO; /* -r- */
protection_codes[VM_PROT_READ|VM_PROT_EXECUTE] = PG_RO; /* -rx */
protection_codes[VM_PROT_WRITE] = PG_RW; /* w-- */
protection_codes[VM_PROT_WRITE|VM_PROT_EXECUTE] = PG_RW;/* w-x */
protection_codes[VM_PROT_WRITE|VM_PROT_READ] = PG_RW; /* wr- */
protection_codes[VM_PROT_ALL] = PG_RW; /* wrx */
/*
* now we init the kernel's pmap
*
* the kernel pmap's pm_obj is not used for much. however, in
* user pmaps the pm_obj contains the list of active PTPs.
* the pm_obj currently does not have a pager. it might be possible
* to add a pager that would allow a process to read-only mmap its
* own page tables (fast user level vtophys?). this may or may not
* be useful.
*/
kpm = pmap_kernel();
simple_lock_init(&kpm->pm_obj.vmobjlock);
kpm->pm_obj.pgops = NULL;
TAILQ_INIT(&kpm->pm_obj.memq);
kpm->pm_obj.uo_npages = 0;
kpm->pm_obj.uo_refs = 1;
memset(&kpm->pm_list, 0, sizeof(kpm->pm_list)); /* pm_list not used */
kpm->pm_pdir = (pd_entry_t *)(proc0.p_addr->u_pcb.pcb_cr3 + KERNBASE);
kpm->pm_pdirpa = (u_int32_t) proc0.p_addr->u_pcb.pcb_cr3;
kpm->pm_stats.wired_count = kpm->pm_stats.resident_count =
i386_btop(kva_start - VM_MIN_KERNEL_ADDRESS);
/*
* the above is just a rough estimate and not critical to the proper
* operation of the system.
*/
curpcb->pcb_pmap = kpm; /* proc0's pcb */
/*
* enable global TLB entries if they are supported
*/
if (cpu_feature & CPUID_PGE) {
lcr4(rcr4() | CR4_PGE); /* enable hardware (via %cr4) */
pmap_pg_g = PG_G; /* enable software */
/* add PG_G attribute to already mapped kernel pages */
for (kva = VM_MIN_KERNEL_ADDRESS ; kva < virtual_avail ;
kva += NBPG)
if (pmap_valid_entry(PTE_BASE[i386_btop(kva)]))
PTE_BASE[i386_btop(kva)] |= PG_G;
}
/*
* now we allocate the "special" VAs which are used for tmp mappings
* by the pmap (and other modules). we allocate the VAs by advancing
* virtual_avail (note that there are no pages mapped at these VAs).
* we find the PTE that maps the allocated VA via the linear PTE
* mapping.
*/
pte = PTE_BASE + i386_btop(virtual_avail);
csrcp = (caddr_t) virtual_avail; csrc_pte = pte; /* allocate */
virtual_avail += NBPG; pte++; /* advance */
cdstp = (caddr_t) virtual_avail; cdst_pte = pte;
virtual_avail += NBPG; pte++;
zerop = (caddr_t) virtual_avail; zero_pte = pte;
virtual_avail += NBPG; pte++;
ptpp = (caddr_t) virtual_avail; ptp_pte = pte;
virtual_avail += NBPG; pte++;
/* XXX: vmmap used by mem.c... should be uvm_map_reserve */
vmmap = (char *)virtual_avail; /* don't need pte */
virtual_avail += NBPG; pte++;
msgbuf_vaddr = virtual_avail; /* don't need pte */
virtual_avail += round_page(MSGBUFSIZE); pte++;
idt_vaddr = virtual_avail; /* don't need pte */
virtual_avail += NBPG; pte++;
avail_end -= NBPG;
idt_paddr = avail_end;
#if defined(I586_CPU)
/* pentium f00f bug stuff */
pentium_idt_vaddr = virtual_avail; /* don't need pte */
virtual_avail += NBPG; pte++;
#endif
/*
* now we reserve some VM for mapping pages when doing a crash dump
*/
virtual_avail = reserve_dumppages(virtual_avail);
/*
* init the static-global locks and global lists.
*/
#if defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
lockinit(&pmap_main_lock, PVM, "pmaplk", 0, 0);
simple_lock_init(&pvalloc_lock);
simple_lock_init(&pmaps_lock);
simple_lock_init(&pmap_copy_page_lock);
simple_lock_init(&pmap_zero_page_lock);
simple_lock_init(&pmap_tmpptp_lock);
#endif
LIST_INIT(&pmaps);
TAILQ_INIT(&pv_freepages);
TAILQ_INIT(&pv_unusedpgs);
/*
* initialize the pmap pool.
*/
pool_init(&pmap_pmap_pool, sizeof(struct pmap), 0, 0, 0, "pmappl",
0, pool_page_alloc_nointr, pool_page_free_nointr, M_VMPMAP);
/*
* we must call uvm_page_physload() after we are done playing with
* virtual_avail but before we call pmap_steal_memory. [i.e. here]
* this call tells the VM system how much physical memory it
* controls. If we have 16M of RAM or less, just put it all on
* the default free list. Otherwise, put the first 16M of RAM
* on a lower priority free list (so that all of the ISA DMA'able
* memory won't be eaten up first-off).
*/
if (avail_end <= (16 * 1024 * 1024))
first16q = VM_FREELIST_DEFAULT;
else
first16q = VM_FREELIST_FIRST16;
if (avail_start < hole_start) /* any free memory before the hole? */
uvm_page_physload(atop(avail_start), atop(hole_start),
atop(avail_start), atop(hole_start),
first16q);
if (first16q == VM_FREELIST_FIRST16) {
uvm_page_physload(atop(hole_end), atop(16 * 1024 * 1024),
atop(hole_end), atop(16 * 1024 * 1024),
first16q);
uvm_page_physload(atop(16 * 1024 * 1024), atop(avail_end),
atop(16 * 1024 * 1024), atop(avail_end),
VM_FREELIST_DEFAULT);
} else {
uvm_page_physload(atop(hole_end), atop(avail_end),
atop(hole_end), atop(avail_end), first16q);
}
/*
* ensure the TLB is sync'd with reality by flushing it...
*/
pmap_update();
}
/*
* pmap_init: called from uvm_init, our job is to get the pmap
* system ready to manage mappings... this mainly means initing
* the pv_entry stuff.
*/
void
pmap_init()
{
int npages, lcv;
vaddr_t addr;
vsize_t s;
/*
* compute the number of pages we have and then allocate RAM
* for each pages' pv_head and saved attributes.
*/
npages = 0;
for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
npages += (vm_physmem[lcv].end - vm_physmem[lcv].start);
s = (vsize_t) (sizeof(struct pv_head) * npages +
sizeof(char) * npages);
s = round_page(s); /* round up */
addr = (vaddr_t) uvm_km_zalloc(kernel_map, s);
if (addr == NULL)
panic("pmap_init: unable to allocate pv_heads");
/*
* init all pv_head's and attrs in one memset
*/
/* allocate pv_head stuff first */
for (lcv = 0 ; lcv < vm_nphysseg ; lcv++) {
vm_physmem[lcv].pmseg.pvhead = (struct pv_head *) addr;
addr = (vaddr_t)(vm_physmem[lcv].pmseg.pvhead +
(vm_physmem[lcv].end - vm_physmem[lcv].start));
}
/* now allocate attrs */
for (lcv = 0 ; lcv < vm_nphysseg ; lcv++) {
vm_physmem[lcv].pmseg.attrs = (char *) addr;
addr = (vaddr_t)(vm_physmem[lcv].pmseg.attrs +
(vm_physmem[lcv].end - vm_physmem[lcv].start));
}
/*
* now we need to free enough pv_entry structures to allow us to get
* the kmem_map/kmem_object allocated and inited (done after this
* function is finished). to do this we allocate one bootstrap page out
* of kernel_map and use it to provide an initial pool of pv_entry
* structures. we never free this page.
*/
pv_initpage = (struct pv_page *) uvm_km_alloc(kernel_map, NBPG);
if (pv_initpage == NULL)
panic("pmap_init: pv_initpage");
pv_cachedva = NULL; /* a VA we have allocated but not used yet */
pv_nfpvents = 0;
(void) pmap_add_pvpage(pv_initpage, FALSE);
/*
* done: pmap module is up (and ready for business)
*/
pmap_initialized = TRUE;
}
/*
* p v _ e n t r y f u n c t i o n s
*/
/*
* pv_entry allocation functions:
* the main pv_entry allocation functions are:
* pmap_alloc_pv: allocate a pv_entry structure
* pmap_free_pv: free one pv_entry
* pmap_free_pvs: free a list of pv_entrys
*
* the rest are helper functions
*/
/*
* pmap_alloc_pv: inline function to allocate a pv_entry structure
* => we lock pvalloc_lock
* => if we fail, we call out to pmap_alloc_pvpage
* => 3 modes:
* ALLOCPV_NEED = we really need a pv_entry, even if we have to steal it
* ALLOCPV_TRY = we want a pv_entry, but not enough to steal
* ALLOCPV_NONEED = we are trying to grow our free list, don't really need
* one now
*
* "try" is for optional functions like pmap_copy().
*/
__inline static struct pv_entry *
pmap_alloc_pv(pmap, mode)
struct pmap *pmap;
int mode;
{
struct pv_page *pvpage;
struct pv_entry *pv;
simple_lock(&pvalloc_lock);
if (pv_freepages.tqh_first != NULL) {
pvpage = pv_freepages.tqh_first;
pvpage->pvinfo.pvpi_nfree--;
if (pvpage->pvinfo.pvpi_nfree == 0) {
/* nothing left in this one? */
TAILQ_REMOVE(&pv_freepages, pvpage, pvinfo.pvpi_list);
}
pv = pvpage->pvinfo.pvpi_pvfree;
#ifdef DIAGNOSTIC
if (pv == NULL)
panic("pmap_alloc_pv: pvpi_nfree off");
#endif
pvpage->pvinfo.pvpi_pvfree = pv->pv_next;
pv_nfpvents--; /* took one from pool */
} else {
pv = NULL; /* need more of them */
}
/*
* if below low water mark or we didn't get a pv_entry we try and
* create more pv_entrys ...
*/
if (pv_nfpvents < PVE_LOWAT || pv == NULL) {
if (pv == NULL)
pv = pmap_alloc_pvpage(pmap, (mode == ALLOCPV_TRY) ?
mode : ALLOCPV_NEED);
else
(void) pmap_alloc_pvpage(pmap, ALLOCPV_NONEED);
}
simple_unlock(&pvalloc_lock);
return(pv);
}
/*
* pmap_alloc_pvpage: maybe allocate a new pvpage
*
* if need_entry is false: try and allocate a new pv_page
* if need_entry is true: try and allocate a new pv_page and return a
* new pv_entry from it. if we are unable to allocate a pv_page
* we make a last ditch effort to steal a pv_page from some other
* mapping. if that fails, we panic...
*
* => we assume that the caller holds pvalloc_lock
*/
static struct pv_entry *
pmap_alloc_pvpage(pmap, mode)
struct pmap *pmap;
int mode;
{
struct vm_page *pg;
struct pv_page *pvpage;
int lcv, idx, npg, s;
struct pv_entry *pv, *cpv, *prevpv;
/*
* if we need_entry and we've got unused pv_pages, allocate from there
*/
if (mode != ALLOCPV_NONEED && pv_unusedpgs.tqh_first != NULL) {
/* move it to pv_freepages list */
pvpage = pv_unusedpgs.tqh_first;
TAILQ_REMOVE(&pv_unusedpgs, pvpage, pvinfo.pvpi_list);
TAILQ_INSERT_HEAD(&pv_freepages, pvpage, pvinfo.pvpi_list);
/* allocate a pv_entry */
pvpage->pvinfo.pvpi_nfree--; /* can't go to zero */
pv = pvpage->pvinfo.pvpi_pvfree;
#ifdef DIAGNOSTIC
if (pv == NULL)
panic("pmap_alloc_pvpage: pvpi_nfree off");
#endif
pvpage->pvinfo.pvpi_pvfree = pv->pv_next;
pv_nfpvents--; /* took one from pool */
return(pv);
}
/*
* see if we've got a cached unmapped VA that we can map a page in.
* if not, try to allocate one.
*/
s = splimp(); /* must protect kmem_map/kmem_object with splimp! */
if (pv_cachedva == NULL) {
pv_cachedva = uvm_km_kmemalloc(kmem_map, uvmexp.kmem_object,
NBPG, UVM_KMF_TRYLOCK|UVM_KMF_VALLOC);
if (pv_cachedva == NULL) {
splx(s);
goto steal_one;
}
}
/*
* we have a VA, now let's try and allocate a page in the object
* note: we are still holding splimp to protect kmem_object
*/
if (!simple_lock_try(&uvmexp.kmem_object->vmobjlock)) {
splx(s);
goto steal_one;
}
pg = uvm_pagealloc(uvmexp.kmem_object, pv_cachedva -
vm_map_min(kernel_map),
NULL, UVM_PGA_USERESERVE);
if (pg)
pg->flags &= ~PG_BUSY; /* never busy */
simple_unlock(&uvmexp.kmem_object->vmobjlock);
splx(s);
/* splimp now dropped */
if (pg == NULL)
goto steal_one;
/*
* add a mapping for our new pv_page and free its entrys (save one!)
*
* NOTE: If we are allocating a PV page for the kernel pmap, the
* pmap is already locked! (...but entering the mapping is safe...)
*/
pmap_kenter_pa0(pv_cachedva, VM_PAGE_TO_PHYS(pg), VM_PROT_ALL,
(pmap == pmap_kernel()) ? TRUE : FALSE);
pvpage = (struct pv_page *) pv_cachedva;
pv_cachedva = NULL;
return(pmap_add_pvpage(pvpage, mode != ALLOCPV_NONEED));
steal_one:
/*
* if we don't really need a pv_entry right now, we can just return.
*/
if (mode != ALLOCPV_NEED)
return(NULL);
/*
* last ditch effort! we couldn't allocate a free page to make
* more pv_entrys so we try and steal one from someone else.
*/
pv = NULL;
for (lcv = 0 ; pv == NULL && lcv < vm_nphysseg ; lcv++) {
npg = vm_physmem[lcv].end - vm_physmem[lcv].start;
for (idx = 0 ; idx < npg ; idx++) {
struct pv_head *pvhead = vm_physmem[lcv].pmseg.pvhead;
if (pvhead->pvh_list == NULL)
continue; /* spot check */
if (!simple_lock_try(&pvhead->pvh_lock))
continue;
cpv = prevpv = pvhead->pvh_list;
while (cpv) {
if (pmap_try_steal_pv(pvhead, cpv, prevpv))
break;
prevpv = cpv;
cpv = cpv->pv_next;
}
simple_unlock(&pvhead->pvh_lock);
/* got one? break out of the loop! */
if (cpv) {
pv = cpv;
break;
}
}
}
/*
* done! if we didn't get a pv then we panic :(
*/
if (pv == NULL)
panic("pmap_alloc_pvpage");
return(pv);
}
/*
* pmap_try_steal_pv: try and steal a pv_entry from a pmap
*
* => return true if we did it!
*/
static boolean_t
pmap_try_steal_pv(pvh, cpv, prevpv)
struct pv_head *pvh;
struct pv_entry *cpv, *prevpv;
{
pt_entry_t *ptep; /* pointer to a PTE */
/*
* we never steal kernel mappings or mappings from pmaps we can't lock
*/
if (cpv->pv_pmap == pmap_kernel() ||
!simple_lock_try(&cpv->pv_pmap->pm_obj.vmobjlock))
return(FALSE);
/*
* yes, we can try and steal it. first we need to remove the
* mapping from the pmap.
*/
ptep = pmap_tmpmap_pvepte(cpv);
if (*ptep & PG_W) {
ptep = NULL; /* wired page, avoid stealing this one */
} else {
*ptep = 0; /* zap! */
if (pmap_is_curpmap(cpv->pv_pmap))
pmap_update_pg(cpv->pv_va);
pmap_tmpunmap_pvepte(cpv);
}
if (ptep == NULL) {
simple_unlock(&cpv->pv_pmap->pm_obj.vmobjlock);
return(FALSE); /* wired page, abort! */
}
cpv->pv_pmap->pm_stats.resident_count--;
if (cpv->pv_ptp && cpv->pv_ptp->wire_count)
/* drop PTP's wired count */
cpv->pv_ptp->wire_count--;
/*
* XXX: if wire_count goes to one the PTP could be freed, however,
* we'd have to lock the page queues (etc.) to do that and it could
* cause deadlock headaches. besides, the pmap we just stole from
* may want the mapping back anyway, so leave the PTP around.
*/
/*
* now we need to remove the entry from the pvlist
*/
if (cpv == pvh->pvh_list)
pvh->pvh_list = cpv->pv_next;
else
prevpv->pv_next = cpv->pv_next;
return(TRUE);
}
/*
* pmap_add_pvpage: add a pv_page's pv_entrys to the free list
*
* => caller must hold pvalloc_lock
* => if need_entry is true, we allocate and return one pv_entry
*/
static struct pv_entry *
pmap_add_pvpage(pvp, need_entry)
struct pv_page *pvp;
boolean_t need_entry;
{
int tofree, lcv;
/* do we need to return one? */
tofree = (need_entry) ? PVE_PER_PVPAGE - 1 : PVE_PER_PVPAGE;
pvp->pvinfo.pvpi_pvfree = NULL;
pvp->pvinfo.pvpi_nfree = tofree;
for (lcv = 0 ; lcv < tofree ; lcv++) {
pvp->pvents[lcv].pv_next = pvp->pvinfo.pvpi_pvfree;
pvp->pvinfo.pvpi_pvfree = &pvp->pvents[lcv];
}
if (need_entry)
TAILQ_INSERT_TAIL(&pv_freepages, pvp, pvinfo.pvpi_list);
else
TAILQ_INSERT_TAIL(&pv_unusedpgs, pvp, pvinfo.pvpi_list);
pv_nfpvents += tofree;
return((need_entry) ? &pvp->pvents[lcv] : NULL);
}
/*
* pmap_free_pv_doit: actually free a pv_entry
*
* => do not call this directly! instead use either
* 1. pmap_free_pv ==> free a single pv_entry
* 2. pmap_free_pvs => free a list of pv_entrys
* => we must be holding pvalloc_lock
*/
__inline static void
pmap_free_pv_doit(pv)
struct pv_entry *pv;
{
struct pv_page *pvp;
pvp = (struct pv_page *) i386_trunc_page(pv);
pv_nfpvents++;
pvp->pvinfo.pvpi_nfree++;
/* nfree == 1 => fully allocated page just became partly allocated */
if (pvp->pvinfo.pvpi_nfree == 1) {
TAILQ_INSERT_HEAD(&pv_freepages, pvp, pvinfo.pvpi_list);
}
/* free it */
pv->pv_next = pvp->pvinfo.pvpi_pvfree;
pvp->pvinfo.pvpi_pvfree = pv;
/*
* are all pv_page's pv_entry's free? move it to unused queue.
*/
if (pvp->pvinfo.pvpi_nfree == PVE_PER_PVPAGE) {
TAILQ_REMOVE(&pv_freepages, pvp, pvinfo.pvpi_list);
TAILQ_INSERT_HEAD(&pv_unusedpgs, pvp, pvinfo.pvpi_list);
}
}
/*
* pmap_free_pv: free a single pv_entry
*
* => we gain the pvalloc_lock
*/
__inline static void
pmap_free_pv(pmap, pv)
struct pmap *pmap;
struct pv_entry *pv;
{
simple_lock(&pvalloc_lock);
pmap_free_pv_doit(pv);
/*
* Can't free the PV page if the PV entries were associated with
* the kernel pmap; the pmap is already locked.
*/
if (pv_nfpvents > PVE_HIWAT && pv_unusedpgs.tqh_first != NULL &&
pmap != pmap_kernel())
pmap_free_pvpage();
simple_unlock(&pvalloc_lock);
}
/*
* pmap_free_pvs: free a list of pv_entrys
*
* => we gain the pvalloc_lock
*/
__inline static void
pmap_free_pvs(pmap, pvs)
struct pmap *pmap;
struct pv_entry *pvs;
{
struct pv_entry *nextpv;
simple_lock(&pvalloc_lock);
for ( /* null */ ; pvs != NULL ; pvs = nextpv) {
nextpv = pvs->pv_next;
pmap_free_pv_doit(pvs);
}
/*
* Can't free the PV page if the PV entries were associated with
* the kernel pmap; the pmap is already locked.
*/
if (pv_nfpvents > PVE_HIWAT && pv_unusedpgs.tqh_first != NULL &&
pmap != pmap_kernel())
pmap_free_pvpage();
simple_unlock(&pvalloc_lock);
}
/*
* pmap_free_pvpage: try and free an unused pv_page structure
*
* => assume caller is holding the pvalloc_lock and that
* there is a page on the pv_unusedpgs list
* => if we can't get a lock on the kmem_map we try again later
* => note: analysis of MI kmem_map usage [i.e. malloc/free] shows
* that if we can lock the kmem_map then we are not already
* holding kmem_object's lock.
*/
static void
pmap_free_pvpage()
{
int s;
struct vm_map *map;
vm_map_entry_t dead_entries;
struct pv_page *pvp;
s = splimp(); /* protect kmem_map */
pvp = pv_unusedpgs.tqh_first;
/*
* note: watch out for pv_initpage which is allocated out of
* kernel_map rather than kmem_map.
*/
if (pvp == pv_initpage)
map = kernel_map;
else
map = kmem_map;
if (vm_map_lock_try(map)) {
/* remove pvp from pv_unusedpgs */
TAILQ_REMOVE(&pv_unusedpgs, pvp, pvinfo.pvpi_list);
/* unmap the page */
dead_entries = NULL;
(void)uvm_unmap_remove(map, (vaddr_t) pvp,
((vaddr_t) pvp) + NBPG, &dead_entries);
vm_map_unlock(map);
if (dead_entries != NULL)
uvm_unmap_detach(dead_entries, 0);
pv_nfpvents -= PVE_PER_PVPAGE; /* update free count */
}
if (pvp == pv_initpage)
/* no more initpage, we've freed it */
pv_initpage = NULL;
splx(s);
}
/*
* main pv_entry manipulation functions:
* pmap_enter_pv: enter a mapping onto a pv_head list
* pmap_remove_pv: remove a mappiing from a pv_head list
*
* NOTE: pmap_enter_pv expects to lock the pvh itself
* pmap_remove_pv expects te caller to lock the pvh before calling
*/
/*
* pmap_enter_pv: enter a mapping onto a pv_head lst
*
* => caller should hold the proper lock on pmap_main_lock
* => caller should have pmap locked
* => we will gain the lock on the pv_head and allocate the new pv_entry
* => caller should adjust ptp's wire_count before calling
*/
__inline static void
pmap_enter_pv(pvh, pve, pmap, va, ptp)
struct pv_head *pvh;
struct pv_entry *pve; /* preallocated pve for us to use */
struct pmap *pmap;
vaddr_t va;
struct vm_page *ptp; /* PTP in pmap that maps this VA */
{
pve->pv_pmap = pmap;
pve->pv_va = va;
pve->pv_ptp = ptp; /* NULL for kernel pmap */
simple_lock(&pvh->pvh_lock); /* lock pv_head */
pve->pv_next = pvh->pvh_list; /* add to ... */
pvh->pvh_list = pve; /* ... locked list */
simple_unlock(&pvh->pvh_lock); /* unlock, done! */
}
/*
* pmap_remove_pv: try to remove a mapping from a pv_list
*
* => caller should hold proper lock on pmap_main_lock
* => pmap should be locked
* => caller should hold lock on pv_head [so that attrs can be adjusted]
* => caller should adjust ptp's wire_count and free PTP if needed
* => we return the removed pve
*/
__inline static struct pv_entry *
pmap_remove_pv(pvh, pmap, va)
struct pv_head *pvh;
struct pmap *pmap;
vaddr_t va;
{
struct pv_entry *pve, **prevptr;
prevptr = &pvh->pvh_list; /* previous pv_entry pointer */
pve = *prevptr;
while (pve) {
if (pve->pv_pmap == pmap && pve->pv_va == va) { /* match? */
*prevptr = pve->pv_next; /* remove it! */
break;
}
prevptr = &pve->pv_next; /* previous pointer */
pve = pve->pv_next; /* advance */
}
return(pve); /* return removed pve */
}
/*
* p t p f u n c t i o n s
*/
/*
* pmap_alloc_ptp: allocate a PTP for a PMAP
*
* => pmap should already be locked by caller
* => we use the ptp's wire_count to count the number of active mappings
* in the PTP (we start it at one to prevent any chance this PTP
* will ever leak onto the active/inactive queues)
* => we should not be holding any pv_head locks (in case we are forced
* to call pmap_steal_ptp())
* => we may need to lock pv_head's if we have to steal a PTP
* => just_try: true if we want a PTP, but not enough to steal one
* from another pmap (e.g. during optional functions like pmap_copy)
*/
__inline static struct vm_page *
pmap_alloc_ptp(pmap, pde_index, just_try)
struct pmap *pmap;
int pde_index;
boolean_t just_try;
{
struct vm_page *ptp;
ptp = uvm_pagealloc(&pmap->pm_obj, ptp_i2o(pde_index), NULL,
UVM_PGA_USERESERVE);
if (ptp == NULL) {
if (just_try)
return(NULL);
ptp = pmap_steal_ptp(&pmap->pm_obj, ptp_i2o(pde_index));
}
/* got one! */
ptp->flags &= ~PG_BUSY; /* never busy */
ptp->wire_count = 1; /* no mappings yet */
pmap_zero_page(VM_PAGE_TO_PHYS(ptp));
pmap->pm_pdir[pde_index] =
(pd_entry_t) (VM_PAGE_TO_PHYS(ptp) | PG_u | PG_RW | PG_V);
pmap->pm_stats.resident_count++; /* count PTP as resident */
pmap->pm_ptphint = ptp;
return(ptp);
}
/*
* pmap_steal_ptp: steal a PTP from any pmap that we can access
*
* => obj is locked by caller.
* => we can throw away mappings at this level (except in the kernel's pmap)
* => stolen PTP is placed in <obj,offset> pmap
* => we lock pv_head's
* => hopefully, this function will be seldom used [much better to have
* enough free pages around for us to allocate off the free page list]
*/
static struct vm_page *
pmap_steal_ptp(obj, offset)
struct uvm_object *obj;
vaddr_t offset;
{
struct vm_page *ptp = NULL;
struct pmap *firstpmap;
int idx, lcv;
pt_entry_t *ptes;
simple_lock(&pmaps_lock);
if (pmaps_hand == NULL)
pmaps_hand = pmaps.lh_first;
firstpmap = pmaps_hand;
if (firstpmap == NULL)
panic("pmap_steal_ptp: no pmaps to steal from!");
do { /* while we haven't looped back around to firstpmap */
if (simple_lock_try(&pmaps_hand->pm_obj.vmobjlock)) {
ptp = pmaps_hand->pm_obj.memq.tqh_first;
for (/*null*/; ptp != NULL; ptp = ptp->listq.tqe_next) {
/*
* might have found a PTP we can steal
* (unless it has wired pages).
*/
idx = ptp_o2i(ptp->offset);
if (VM_PAGE_TO_PHYS(ptp) !=
(pmaps_hand->pm_pdir[idx] & PG_FRAME))
panic("pmap_steal_ptp: PTP mismatch!");
ptes = (pt_entry_t *)
pmap_tmpmap_pa(VM_PAGE_TO_PHYS(ptp));
for (lcv = 0 ; lcv < PTES_PER_PTP ; lcv++)
if ((ptes[lcv] & (PG_V|PG_W)) ==
(PG_V|PG_W))
break;
if (lcv == PTES_PER_PTP)
pmap_remove_ptes(pmaps_hand, NULL, ptp,
(vaddr_t)ptes,
ptp_i2v(idx),
ptp_i2v(idx+1));
pmap_tmpunmap_pa();
if (lcv != PTES_PER_PTP)
/* wired, try next PTP */
continue;
/*
* got it!!!
*/
pmaps_hand->pm_pdir[idx] = 0; /* zap! */
pmaps_hand->pm_stats.resident_count--;
if (pmap_is_curpmap(pmaps_hand))
pmap_update();
else if (pmap_valid_entry(*APDP_PDE) &&
(*APDP_PDE & PG_FRAME) ==
pmaps_hand->pm_pdirpa) {
pmap_update_pg(((vaddr_t)APTE_BASE) +
ptp->offset);
}
/* put it in our pmap! */
uvm_pagerealloc(ptp, obj, offset);
break; /* break out of "for" loop */
}
simple_unlock(&pmaps_hand->pm_obj.vmobjlock);
}
/* advance the pmaps_hand */
pmaps_hand = pmaps_hand->pm_list.le_next;
if (pmaps_hand == NULL)
pmaps_hand = pmaps.lh_first;
} while (ptp == NULL && pmaps_hand != firstpmap);
simple_unlock(&pmaps_lock);
if (ptp == NULL)
panic("pmap_steal_ptp: failed to steal a PTP!");
return(ptp);
}
/*
* pmap_get_ptp: get a PTP (if there isn't one, allocate a new one)
*
* => pmap should NOT be pmap_kernel()
* => pmap should be locked
*/
static struct vm_page *
pmap_get_ptp(pmap, pde_index, just_try)
struct pmap *pmap;
int pde_index;
boolean_t just_try;
{
struct vm_page *ptp;
if (pmap_valid_entry(pmap->pm_pdir[pde_index])) {
/* valid... check hint (saves us a PA->PG lookup) */
if (pmap->pm_ptphint &&
(pmap->pm_pdir[pde_index] & PG_FRAME) ==
VM_PAGE_TO_PHYS(pmap->pm_ptphint))
return(pmap->pm_ptphint);
ptp = uvm_pagelookup(&pmap->pm_obj, ptp_i2o(pde_index));
if (ptp == NULL)
panic("pmap_get_ptp: unmanaged user PTP");
pmap->pm_ptphint = ptp;
return(ptp);
}
/* allocate a new PTP (updates ptphint) */
return(pmap_alloc_ptp(pmap, pde_index, just_try));
}
/*
* p m a p l i f e c y c l e f u n c t i o n s
*/
/*
* pmap_create: create a pmap
*
* => note: old pmap interface took a "size" args which allowed for
* the creation of "software only" pmaps (not in bsd).
*/
struct pmap *
pmap_create()
{
struct pmap *pmap;
pmap = pool_get(&pmap_pmap_pool, PR_WAITOK);
pmap_pinit(pmap);
return(pmap);
}
/*
* pmap_pinit: given a zero'd pmap structure, init it.
*/
void
pmap_pinit(pmap)
struct pmap *pmap;
{
/* init uvm_object */
simple_lock_init(&pmap->pm_obj.vmobjlock);
pmap->pm_obj.pgops = NULL; /* currently not a mappable object */
TAILQ_INIT(&pmap->pm_obj.memq);
pmap->pm_obj.uo_npages = 0;
pmap->pm_obj.uo_refs = 1;
pmap->pm_stats.wired_count = 0;
pmap->pm_stats.resident_count = 1; /* count the PDP allocd below */
pmap->pm_ptphint = NULL;
pmap->pm_flags = 0;
/* allocate PDP */
pmap->pm_pdir = (pd_entry_t *) uvm_km_alloc(kernel_map, NBPG);
if (pmap->pm_pdir == NULL)
panic("pmap_pinit: kernel_map out of virtual space!");
(void) pmap_extract(pmap_kernel(), (vaddr_t)pmap->pm_pdir,
(paddr_t *)&pmap->pm_pdirpa);
/* init PDP */
/* zero init area */
memset(pmap->pm_pdir, 0, PDSLOT_PTE * sizeof(pd_entry_t));
/* put in recursive PDE to map the PTEs */
pmap->pm_pdir[PDSLOT_PTE] = pmap->pm_pdirpa | PG_V | PG_KW;
/* init the LDT */
pmap->pm_ldt = NULL;
pmap->pm_ldt_len = 0;
pmap->pm_ldt_sel = GSEL(GLDT_SEL, SEL_KPL);
/*
* we need to lock pmaps_lock to prevent nkpde from changing on
* us. note that there is no need to splimp to protect us from
* malloc since malloc allocates out of a submap and we should have
* already allocated kernel PTPs to cover the range...
*/
simple_lock(&pmaps_lock);
/* put in kernel VM PDEs */
memcpy(&pmap->pm_pdir[PDSLOT_KERN], &PDP_BASE[PDSLOT_KERN],
nkpde * sizeof(pd_entry_t));
/* zero the rest */
memset(&pmap->pm_pdir[PDSLOT_KERN + nkpde], 0,
NBPG - ((PDSLOT_KERN + nkpde) * sizeof(pd_entry_t)));
LIST_INSERT_HEAD(&pmaps, pmap, pm_list);
if (pmaps_hand == NULL)
pmaps_hand = pmap;
simple_unlock(&pmaps_lock);
}
/*
* pmap_destroy: drop reference count on pmap. free pmap if
* reference count goes to zero.
*/
void
pmap_destroy(pmap)
struct pmap *pmap;
{
int refs;
/*
* drop reference count
*/
simple_lock(&pmap->pm_obj.vmobjlock);
refs = --pmap->pm_obj.uo_refs;
simple_unlock(&pmap->pm_obj.vmobjlock);
if (refs > 0) {
return;
}
/*
* reference count is zero, free pmap resources and then free pmap.
*/
pmap_release(pmap);
pool_put(&pmap_pmap_pool, pmap);
}
/*
* pmap_release: release all resources held by a pmap
*
* => if pmap is still referenced it should be locked
* => XXX: we currently don't expect any busy PTPs because we don't
* allow anything to map them (except for the kernel's private
* recursive mapping) or make them busy.
*/
void
pmap_release(pmap)
struct pmap *pmap;
{
struct vm_page *pg;
/*
* remove it from global list of pmaps
*/
simple_lock(&pmaps_lock);
if (pmap == pmaps_hand)
pmaps_hand = pmaps_hand->pm_list.le_next;
LIST_REMOVE(pmap, pm_list);
simple_unlock(&pmaps_lock);
/*
* free any remaining PTPs
*/
while (pmap->pm_obj.memq.tqh_first != NULL) {
pg = pmap->pm_obj.memq.tqh_first;
if (pg->flags & PG_BUSY)
panic("pmap_release: busy page table page");
/* pmap_page_protect? currently no need for it. */
pg->wire_count = 0;
uvm_pagefree(pg);
}
/* XXX: need to flush it out of other processor's APTE space? */
uvm_km_free(kernel_map, (vaddr_t)pmap->pm_pdir, NBPG);
#ifdef USER_LDT
if (pmap->pm_flags & PMF_USER_LDT) {
/*
* no need to switch the LDT; this address space is gone,
* nothing is using it.
*/
ldt_free(pmap);
uvm_km_free(kernel_map, (vaddr_t)pmap->pm_ldt,
pmap->pm_ldt_len * sizeof(union descriptor));
}
#endif
}
/*
* Add a reference to the specified pmap.
*/
void
pmap_reference(pmap)
struct pmap *pmap;
{
simple_lock(&pmap->pm_obj.vmobjlock);
pmap->pm_obj.uo_refs++;
simple_unlock(&pmap->pm_obj.vmobjlock);
}
#if defined(PMAP_FORK)
/*
* pmap_fork: perform any necessary data structure manipulation when
* a VM space is forked.
*/
void
pmap_fork(pmap1, pmap2)
struct pmap *pmap1, *pmap2;
{
simple_lock(&pmap1->pm_obj.vmobjlock);
simple_lock(&pmap2->pm_obj.vmobjlock);
#ifdef USER_LDT
/* Copy the LDT, if necessary. */
if (pmap1->pm_flags & PMF_USER_LDT) {
union descriptor *new_ldt;
size_t len;
len = pmap1->pm_ldt_len * sizeof(union descriptor);
new_ldt = (union descriptor *)uvm_km_alloc(kernel_map, len);
memcpy(new_ldt, pmap1->pm_ldt, len);
pmap2->pm_ldt = new_ldt;
pmap2->pm_ldt_len = pmap1->pm_ldt_len;
pmap2->pm_flags |= PMF_USER_LDT;
ldt_alloc(pmap2, new_ldt, len);
}
#endif /* USER_LDT */
simple_unlock(&pmap2->pm_obj.vmobjlock);
simple_unlock(&pmap1->pm_obj.vmobjlock);
}
#endif /* PMAP_FORK */
#ifdef USER_LDT
/*
* pmap_ldt_cleanup: if the pmap has a local LDT, deallocate it, and
* restore the default.
*/
void
pmap_ldt_cleanup(p)
struct proc *p;
{
struct pcb *pcb = &p->p_addr->u_pcb;
pmap_t pmap = p->p_vmspace->vm_map.pmap;
union descriptor *old_ldt = NULL;
size_t len = 0;
simple_lock(&pmap->pm_obj.vmobjlock);
if (pmap->pm_flags & PMF_USER_LDT) {
ldt_free(pmap);
pmap->pm_ldt_sel = GSEL(GLDT_SEL, SEL_KPL);
pcb->pcb_ldt_sel = pmap->pm_ldt_sel;
if (pcb == curpcb)
lldt(pcb->pcb_ldt_sel);
old_ldt = pmap->pm_ldt;
len = pmap->pm_ldt_len * sizeof(union descriptor);
pmap->pm_ldt = NULL;
pmap->pm_ldt_len = 0;
pmap->pm_flags &= ~PMF_USER_LDT;
}
simple_unlock(&pmap->pm_obj.vmobjlock);
if (old_ldt != NULL)
uvm_km_free(kernel_map, (vaddr_t)old_ldt, len);
}
#endif /* USER_LDT */
/*
* pmap_activate: activate a process' pmap (fill in %cr3 info)
*
* => called from cpu_fork()
* => if proc is the curproc, then load it into the MMU
*/
void
pmap_activate(p)
struct proc *p;
{
struct pcb *pcb = &p->p_addr->u_pcb;
struct pmap *pmap = p->p_vmspace->vm_map.pmap;
pcb->pcb_pmap = pmap;
pcb->pcb_ldt_sel = pmap->pm_ldt_sel;
pcb->pcb_cr3 = pmap->pm_pdirpa;
if (p == curproc)
lcr3(pcb->pcb_cr3);
if (pcb == curpcb)
lldt(pcb->pcb_ldt_sel);
}
/*
* pmap_deactivate: deactivate a process' pmap
*
* => XXX: what should this do, if anything?
*/
void
pmap_deactivate(p)
struct proc *p;
{
}
/*
* end of lifecycle functions
*/
/*
* some misc. functions
*/
/*
* pmap_extract: extract a PA for the given VA
*/
boolean_t
pmap_extract(pmap, va, pap)
struct pmap *pmap;
vaddr_t va;
paddr_t *pap;
{
paddr_t retval;
pt_entry_t *ptes;
if (pmap->pm_pdir[pdei(va)]) {
ptes = pmap_map_ptes(pmap);
retval = (paddr_t)(ptes[i386_btop(va)] & PG_FRAME);
pmap_unmap_ptes(pmap);
if (pap != NULL)
*pap = retval | (va & ~PG_FRAME);
return (TRUE);
}
return (FALSE);
}
/*
* pmap_virtual_space: used during bootup [pmap_steal_memory] to
* determine the bounds of the kernel virtual addess space.
*/
void
pmap_virtual_space(startp, endp)
vaddr_t *startp;
vaddr_t *endp;
{
*startp = virtual_avail;
*endp = virtual_end;
}
/*
* pmap_map: map a range of PAs into kvm
*
* => used during crash dump
* => XXX: pmap_map() should be phased out?
*/
vaddr_t
pmap_map(va, spa, epa, prot)
vaddr_t va;
paddr_t spa, epa;
vm_prot_t prot;
{
while (spa < epa) {
pmap_enter(pmap_kernel(), va, spa, prot, FALSE, 0);
va += NBPG;
spa += NBPG;
}
return va;
}
/*
* pmap_zero_page: zero a page
*/
void
pmap_zero_page(pa)
paddr_t pa;
{
simple_lock(&pmap_zero_page_lock);
#ifdef DIAGNOSTIC
if (*zero_pte)
panic("pmap_zero_page: lock botch");
#endif
*zero_pte = (pa & PG_FRAME) | PG_V | PG_RW; /* map in */
memset(zerop, 0, NBPG); /* zero */
*zero_pte = 0; /* zap! */
pmap_update_pg((vaddr_t)zerop); /* flush TLB */
simple_unlock(&pmap_zero_page_lock);
}
/*
* pmap_copy_page: copy a page
*/
void
pmap_copy_page(srcpa, dstpa)
paddr_t srcpa, dstpa;
{
simple_lock(&pmap_copy_page_lock);
#ifdef DIAGNOSTIC
if (*csrc_pte || *cdst_pte)
panic("pmap_copy_page: lock botch");
#endif
*csrc_pte = (srcpa & PG_FRAME) | PG_V | PG_RW;
*cdst_pte = (dstpa & PG_FRAME) | PG_V | PG_RW;
memcpy(cdstp, csrcp, NBPG);
*csrc_pte = *cdst_pte = 0; /* zap! */
pmap_update_2pg((vaddr_t)csrcp, (vaddr_t)cdstp);
simple_unlock(&pmap_copy_page_lock);
}
/*
* p m a p r e m o v e f u n c t i o n s
*
* functions that remove mappings
*/
/*
* pmap_remove_ptes: remove PTEs from a PTP
*
* => must have proper locking on pmap_master_lock
* => caller must hold pmap's lock
* => PTP must be mapped into KVA
* => PTP should be null if pmap == pmap_kernel()
*/
static void
pmap_remove_ptes(pmap, pmap_rr, ptp, ptpva, startva, endva)
struct pmap *pmap;
struct pmap_remove_record *pmap_rr;
struct vm_page *ptp;
vaddr_t ptpva;
vaddr_t startva, endva;
{
struct pv_entry *pv_tofree = NULL; /* list of pv_entrys to free */
struct pv_entry *pve;
pt_entry_t *pte = (pt_entry_t *) ptpva;
pt_entry_t opte;
int bank, off;
/*
* note that ptpva points to the PTE that maps startva. this may
* or may not be the first PTE in the PTP.
*
* we loop through the PTP while there are still PTEs to look at
* and the wire_count is greater than 1 (because we use the wire_count
* to keep track of the number of real PTEs in the PTP).
*/
for (/*null*/; startva < endva && (ptp == NULL || ptp->wire_count > 1)
; pte++, startva += NBPG) {
if (!pmap_valid_entry(*pte))
continue; /* VA not mapped */
opte = *pte; /* save the old PTE */
*pte = 0; /* zap! */
if (opte & PG_W)
pmap->pm_stats.wired_count--;
pmap->pm_stats.resident_count--;
if (pmap_rr) { /* worried about tlb flushing? */
if (opte & PG_G) {
/* PG_G requires this */
pmap_update_pg(startva);
} else {
if (pmap_rr->prr_npages < PMAP_RR_MAX) {
pmap_rr->prr_vas[pmap_rr->prr_npages++]
= startva;
} else {
if (pmap_rr->prr_npages == PMAP_RR_MAX)
/* signal an overflow */
pmap_rr->prr_npages++;
}
}
}
if (ptp)
ptp->wire_count--; /* dropping a PTE */
/*
* if we are not on a pv_head list we are done.
*/
if ((opte & PG_PVLIST) == 0) {
#ifdef DIAGNOSTIC
if (vm_physseg_find(i386_btop(opte & PG_FRAME), &off)
!= -1)
panic("pmap_remove_ptes: managed page without "
"PG_PVLIST for 0x%lx", startva);
#endif
continue;
}
bank = vm_physseg_find(i386_btop(opte & PG_FRAME), &off);
if (bank == -1)
panic("pmap_remove_ptes: unmanaged page marked "
"PG_PVLIST");
/* sync R/M bits */
simple_lock(&vm_physmem[bank].pmseg.pvhead[off].pvh_lock);
vm_physmem[bank].pmseg.attrs[off] |= (opte & (PG_U|PG_M));
pve = pmap_remove_pv(&vm_physmem[bank].pmseg.pvhead[off], pmap,
startva);
simple_unlock(&vm_physmem[bank].pmseg.pvhead[off].pvh_lock);
if (pve) {
pve->pv_next = pv_tofree;
pv_tofree = pve;
}
/* end of "for" loop: time for next pte */
}
if (pv_tofree)
pmap_free_pvs(pmap, pv_tofree);
}
/*
* pmap_remove_pte: remove a single PTE from a PTP
*
* => must have proper locking on pmap_master_lock
* => caller must hold pmap's lock
* => PTP must be mapped into KVA
* => PTP should be null if pmap == pmap_kernel()
* => returns true if we removed a mapping
*/
static boolean_t
pmap_remove_pte(pmap, ptp, pte, va)
struct pmap *pmap;
struct vm_page *ptp;
pt_entry_t *pte;
vaddr_t va;
{
pt_entry_t opte;
int bank, off;
struct pv_entry *pve;
if (!pmap_valid_entry(*pte))
return(FALSE); /* VA not mapped */
opte = *pte; /* save the old PTE */
*pte = 0; /* zap! */
if (opte & PG_W)
pmap->pm_stats.wired_count--;
pmap->pm_stats.resident_count--;
if (ptp)
ptp->wire_count--; /* dropping a PTE */
if (pmap_is_curpmap(pmap))
pmap_update_pg(va); /* flush TLB */
/*
* if we are not on a pv_head list we are done.
*/
if ((opte & PG_PVLIST) == 0) {
#ifdef DIAGNOSTIC
if (vm_physseg_find(i386_btop(opte & PG_FRAME), &off) != -1)
panic("pmap_remove_ptes: managed page without "
"PG_PVLIST for 0x%lx", va);
#endif
return(TRUE);
}
bank = vm_physseg_find(i386_btop(opte & PG_FRAME), &off);
if (bank == -1)
panic("pmap_remove_pte: unmanaged page marked PG_PVLIST");
/* sync R/M bits */
simple_lock(&vm_physmem[bank].pmseg.pvhead[off].pvh_lock);
vm_physmem[bank].pmseg.attrs[off] |= (opte & (PG_U|PG_M));
pve = pmap_remove_pv(&vm_physmem[bank].pmseg.pvhead[off], pmap, va);
simple_unlock(&vm_physmem[bank].pmseg.pvhead[off].pvh_lock);
if (pve)
pmap_free_pv(pmap, pve);
return(TRUE);
}
/*
* pmap_remove: top level mapping removal function
*
* => caller should not be holding any pmap locks
*/
void
pmap_remove(pmap, sva, eva)
struct pmap *pmap;
vaddr_t sva, eva;
{
pt_entry_t *ptes;
boolean_t result;
paddr_t ptppa;
vaddr_t blkendva;
struct vm_page *ptp;
struct pmap_remove_record pmap_rr, *prr;
/*
* we lock in the pmap => pv_head direction
*/
PMAP_MAP_TO_HEAD_LOCK();
ptes = pmap_map_ptes(pmap); /* locks pmap */
/*
* removing one page? take shortcut function.
*/
if (sva + NBPG == eva) {
if (pmap_valid_entry(pmap->pm_pdir[pdei(sva)])) {
/* PA of the PTP */
ptppa = pmap->pm_pdir[pdei(sva)] & PG_FRAME;
/* get PTP if non-kernel mapping */
if (pmap == pmap_kernel()) {
/* we never free kernel PTPs */
ptp = NULL;
} else {
if (pmap->pm_ptphint &&
VM_PAGE_TO_PHYS(pmap->pm_ptphint) ==
ptppa) {
ptp = pmap->pm_ptphint;
} else {
ptp = PHYS_TO_VM_PAGE(ptppa);
if (ptp == NULL)
panic("pmap_remove: unmanaged "
"PTP detected");
}
}
/* do it! */
result = pmap_remove_pte(pmap, ptp,
&ptes[i386_btop(sva)], sva);
/*
* if mapping removed and the PTP is no longer
* being used, free it!
*/
if (result && ptp && ptp->wire_count <= 1) {
pmap->pm_pdir[pdei(sva)] = 0; /* zap! */
#if defined(I386_CPU)
/* already dumped whole TLB on i386 */
if (cpu_class != CPUCLASS_386)
#endif
{
pmap_update_pg(((vaddr_t) ptes) +
ptp->offset);
}
pmap->pm_stats.resident_count--;
if (pmap->pm_ptphint == ptp)
pmap->pm_ptphint =
pmap->pm_obj.memq.tqh_first;
ptp->wire_count = 0;
uvm_pagefree(ptp);
}
}
pmap_unmap_ptes(pmap); /* unlock pmap */
PMAP_MAP_TO_HEAD_UNLOCK();
return;
}
/*
* removing a range of pages: we unmap in PTP sized blocks (4MB)
*
* if we are the currently loaded pmap, we use prr to keep track
* of the VAs we unload so that we can flush them out of the tlb.
*/
if (pmap_is_curpmap(pmap)) {
prr = &pmap_rr;
prr->prr_npages = 0;
} else {
prr = NULL;
}
for (/* null */ ; sva < eva ; sva = blkendva) {
/* determine range of block */
blkendva = i386_round_pdr(sva+1);
if (blkendva > eva)
blkendva = eva;
/*
* XXXCDC: our PTE mappings should never be removed
* with pmap_remove! if we allow this (and why would
* we?) then we end up freeing the pmap's page
* directory page (PDP) before we are finished using
* it when we hit in in the recursive mapping. this
* is BAD.
*
* long term solution is to move the PTEs out of user
* address space. and into kernel address space (up
* with APTE). then we can set VM_MAXUSER_ADDRESS to
* be VM_MAX_ADDRESS.
*/
if (pdei(sva) == PDSLOT_PTE)
/* XXXCDC: ugly hack to avoid freeing PDP here */
continue;
if (!pmap_valid_entry(pmap->pm_pdir[pdei(sva)]))
/* valid block? */
continue;
/* PA of the PTP */
ptppa = (pmap->pm_pdir[pdei(sva)] & PG_FRAME);
/* get PTP if non-kernel mapping */
if (pmap == pmap_kernel()) {
ptp = NULL; /* we never free kernel PTPs */
} else {
if (pmap->pm_ptphint &&
VM_PAGE_TO_PHYS(pmap->pm_ptphint) == ptppa) {
ptp = pmap->pm_ptphint;
} else {
ptp = PHYS_TO_VM_PAGE(ptppa);
if (ptp == NULL)
panic("pmap_remove: unmanaged PTP "
"detected");
}
}
pmap_remove_ptes(pmap, prr, ptp,
(vaddr_t)&ptes[i386_btop(sva)], sva, blkendva);
/* if PTP is no longer being used, free it! */
if (ptp && ptp->wire_count <= 1) {
pmap->pm_pdir[pdei(sva)] = 0; /* zap! */
pmap_update_pg( ((vaddr_t) ptes) + ptp->offset);
#if defined(I386_CPU)
/* cancel possible pending pmap update on i386 */
if (cpu_class == CPUCLASS_386 && prr)
prr->prr_npages = 0;
#endif
pmap->pm_stats.resident_count--;
if (pmap->pm_ptphint == ptp) /* update hint? */
pmap->pm_ptphint = pmap->pm_obj.memq.tqh_first;
ptp->wire_count = 0;
uvm_pagefree(ptp);
}
}
/*
* if we kept a removal record and removed some pages update the TLB
*/
if (prr && prr->prr_npages) {
#if defined(I386_CPU)
if (cpu_class == CPUCLASS_386) {
pmap_update();
} else
#endif
{ /* not I386 */
if (prr->prr_npages > PMAP_RR_MAX) {
pmap_update();
} else {
while (prr->prr_npages) {
pmap_update_pg(
prr->prr_vas[--prr->prr_npages]);
}
}
} /* not I386 */
}
pmap_unmap_ptes(pmap);
PMAP_MAP_TO_HEAD_UNLOCK();
}
/*
* pmap_page_remove: remove a managed vm_page from all pmaps that map it
*
* => we set pv_head => pmap locking
* => R/M bits are sync'd back to attrs
*/
void
pmap_page_remove(pg)
struct vm_page *pg;
{
int bank, off;
struct pv_head *pvh;
struct pv_entry *pve;
pt_entry_t *ptes, opte;
#if defined(I386_CPU)
boolean_t needs_update = FALSE;
#endif
/* XXX: vm_page should either contain pv_head or have a pointer to it */
bank = vm_physseg_find(atop(VM_PAGE_TO_PHYS(pg)), &off);
if (bank == -1) {
printf("pmap_page_remove: unmanaged page?\n");
return;
}
pvh = &vm_physmem[bank].pmseg.pvhead[off];
if (pvh->pvh_list == NULL) {
return;
}
/* set pv_head => pmap locking */
PMAP_HEAD_TO_MAP_LOCK();
/* XXX: needed if we hold head->map lock? */
simple_lock(&pvh->pvh_lock);
for (pve = pvh->pvh_list ; pve != NULL ; pve = pve->pv_next) {
ptes = pmap_map_ptes(pve->pv_pmap); /* locks pmap */
#ifdef DIAGNOSTIC
if (pve->pv_ptp && (pve->pv_pmap->pm_pdir[pdei(pve->pv_va)] &
PG_FRAME)
!= VM_PAGE_TO_PHYS(pve->pv_ptp)) {
printf("pmap_page_remove: pg=%p: va=%lx, pv_ptp=%p\n",
pg, pve->pv_va, pve->pv_ptp);
printf("pmap_page_remove: PTP's phys addr: "
"actual=%x, recorded=%lx\n",
(pve->pv_pmap->pm_pdir[pdei(pve->pv_va)] &
PG_FRAME), VM_PAGE_TO_PHYS(pve->pv_ptp));
panic("pmap_page_remove: mapped managed page has "
"invalid pv_ptp field");
}
#endif
opte = ptes[i386_btop(pve->pv_va)];
ptes[i386_btop(pve->pv_va)] = 0; /* zap! */
if (opte & PG_W)
pve->pv_pmap->pm_stats.wired_count--;
pve->pv_pmap->pm_stats.resident_count--;
if (pmap_is_curpmap(pve->pv_pmap)) {
#if defined(I386_CPU)
if (cpu_class == CPUCLASS_386)
needs_update = TRUE;
else
#endif
pmap_update_pg(pve->pv_va);
}
/* sync R/M bits */
vm_physmem[bank].pmseg.attrs[off] |= (opte & (PG_U|PG_M));
/* update the PTP reference count. free if last reference. */
if (pve->pv_ptp) {
pve->pv_ptp->wire_count--;
if (pve->pv_ptp->wire_count <= 1) {
/* zap! */
pve->pv_pmap->pm_pdir[pdei(pve->pv_va)] = 0;
pmap_update_pg(((vaddr_t)ptes) +
pve->pv_ptp->offset);
#if defined(I386_CPU)
needs_update = FALSE;
#endif
pve->pv_pmap->pm_stats.resident_count--;
/* update hint? */
if (pve->pv_pmap->pm_ptphint == pve->pv_ptp)
pve->pv_pmap->pm_ptphint =
pve->pv_pmap->pm_obj.memq.tqh_first;
pve->pv_ptp->wire_count = 0;
uvm_pagefree(pve->pv_ptp);
}
}
pmap_unmap_ptes(pve->pv_pmap); /* unlocks pmap */
}
pmap_free_pvs(NULL, pvh->pvh_list);
pvh->pvh_list = NULL;
simple_unlock(&pvh->pvh_lock);
PMAP_HEAD_TO_MAP_UNLOCK();
#if defined(I386_CPU)
if (needs_update)
pmap_update();
#endif
}
/*
* p m a p a t t r i b u t e f u n c t i o n s
* functions that test/change managed page's attributes
* since a page can be mapped multiple times we must check each PTE that
* maps it by going down the pv lists.
*/
/*
* pmap_test_attrs: test a page's attributes
*
* => we set pv_head => pmap locking
*/
boolean_t
pmap_test_attrs(pg, testbits)
struct vm_page *pg;
int testbits;
{
int bank, off;
char *myattrs;
struct pv_head *pvh;
struct pv_entry *pve;
pt_entry_t *ptes, pte;
/* XXX: vm_page should either contain pv_head or have a pointer to it */
bank = vm_physseg_find(atop(VM_PAGE_TO_PHYS(pg)), &off);
if (bank == -1) {
printf("pmap_test_attrs: unmanaged page?\n");
return(FALSE);
}
/*
* before locking: see if attributes are already set and if so,
* return!
*/
myattrs = &vm_physmem[bank].pmseg.attrs[off];
if (*myattrs & testbits)
return(TRUE);
/* test to see if there is a list before bothering to lock */
pvh = &vm_physmem[bank].pmseg.pvhead[off];
if (pvh->pvh_list == NULL) {
return(FALSE);
}
/* nope, gonna have to do it the hard way */
PMAP_HEAD_TO_MAP_LOCK();
/* XXX: needed if we hold head->map lock? */
simple_lock(&pvh->pvh_lock);
for (pve = pvh->pvh_list; pve != NULL && (*myattrs & testbits) == 0;
pve = pve->pv_next) {
ptes = pmap_map_ptes(pve->pv_pmap);
pte = ptes[i386_btop(pve->pv_va)];
pmap_unmap_ptes(pve->pv_pmap);
*myattrs |= pte;
}
/*
* note that we will exit the for loop with a non-null pve if
* we have found the bits we are testing for.
*/
simple_unlock(&pvh->pvh_lock);
PMAP_HEAD_TO_MAP_UNLOCK();
return((*myattrs & testbits) != 0);
}
/*
* pmap_change_attrs: change a page's attributes
*
* => we set pv_head => pmap locking
* => we return TRUE if we cleared one of the bits we were asked to
*/
boolean_t
pmap_change_attrs(pg, setbits, clearbits)
struct vm_page *pg;
int setbits, clearbits;
{
u_int32_t result;
int bank, off;
struct pv_head *pvh;
struct pv_entry *pve;
pt_entry_t *ptes, npte;
char *myattrs;
#if defined(I386_CPU)
boolean_t needs_update = FALSE;
#endif
/* XXX: vm_page should either contain pv_head or have a pointer to it */
bank = vm_physseg_find(atop(VM_PAGE_TO_PHYS(pg)), &off);
if (bank == -1) {
printf("pmap_change_attrs: unmanaged page?\n");
return(FALSE);
}
PMAP_HEAD_TO_MAP_LOCK();
pvh = &vm_physmem[bank].pmseg.pvhead[off];
/* XXX: needed if we hold head->map lock? */
simple_lock(&pvh->pvh_lock);
myattrs = &vm_physmem[bank].pmseg.attrs[off];
result = *myattrs & clearbits;
*myattrs = (*myattrs | setbits) & ~clearbits;
for (pve = pvh->pvh_list; pve != NULL; pve = pve->pv_next) {
#ifdef DIAGNOSTIC
if (pve->pv_va >= uvm.pager_sva && pve->pv_va < uvm.pager_eva) {
printf("pmap_change_attrs: found pager VA on pv_list");
}
if (!pmap_valid_entry(pve->pv_pmap->pm_pdir[pdei(pve->pv_va)]))
panic("pmap_change_attrs: mapping without PTP "
"detected");
#endif
ptes = pmap_map_ptes(pve->pv_pmap); /* locks pmap */
npte = ptes[i386_btop(pve->pv_va)];
result |= (npte & clearbits);
npte = (npte | setbits) & ~clearbits;
if (ptes[i386_btop(pve->pv_va)] != npte) {
ptes[i386_btop(pve->pv_va)] = npte; /* zap! */
if (pmap_is_curpmap(pve->pv_pmap)) {
#if defined(I386_CPU)
if (cpu_class == CPUCLASS_386)
needs_update = TRUE;
else
#endif
pmap_update_pg(pve->pv_va);
}
}
pmap_unmap_ptes(pve->pv_pmap); /* unlocks pmap */
}
simple_unlock(&pvh->pvh_lock);
PMAP_HEAD_TO_MAP_UNLOCK();
#if defined(I386_CPU)
if (needs_update)
pmap_update();
#endif
return(result != 0);
}
/*
* p m a p p r o t e c t i o n f u n c t i o n s
*/
/*
* pmap_page_protect: change the protection of all recorded mappings
* of a managed page
*
* => NOTE: this is an inline function in pmap.h
*/
/* see pmap.h */
/*
* pmap_protect: set the protection in of the pages in a pmap
*
* => NOTE: this is an inline function in pmap.h
*/
/* see pmap.h */
/*
* pmap_write_protect: write-protect pages in a pmap
*/
void
pmap_write_protect(pmap, sva, eva, prot)
struct pmap *pmap;
vaddr_t sva, eva;
vm_prot_t prot;
{
pt_entry_t *ptes, *spte, *epte, npte;
struct pmap_remove_record pmap_rr, *prr;
vaddr_t blockend, va;
u_int32_t md_prot;
ptes = pmap_map_ptes(pmap); /* locks pmap */
/* need to worry about TLB? [TLB stores protection bits] */
if (pmap_is_curpmap(pmap)) {
prr = &pmap_rr;
prr->prr_npages = 0;
} else {
prr = NULL;
}
/* should be ok, but just in case ... */
sva &= PG_FRAME;
eva &= PG_FRAME;
for (/* null */ ; sva < eva ; sva = blockend) {
blockend = (sva & PD_MASK) + NBPD;
if (blockend > eva)
blockend = eva;
/*
* XXXCDC: our PTE mappings should never be write-protected!
*
* long term solution is to move the PTEs out of user
* address space. and into kernel address space (up
* with APTE). then we can set VM_MAXUSER_ADDRESS to
* be VM_MAX_ADDRESS.
*/
/* XXXCDC: ugly hack to avoid freeing PDP here */
if (pdei(sva) == PDSLOT_PTE)
continue;
/* empty block? */
if (!pmap_valid_entry(pmap->pm_pdir[pdei(sva)]))
continue;
md_prot = protection_codes[prot];
if (sva < VM_MAXUSER_ADDRESS)
md_prot |= PG_u;
else if (sva < VM_MAX_ADDRESS)
/* XXX: write-prot our PTES? never! */
md_prot |= (PG_u | PG_RW);
spte = &ptes[i386_btop(sva)];
epte = &ptes[i386_btop(blockend)];
for (/*null */; spte < epte ; spte++) {
if (!pmap_valid_entry(*spte)) /* no mapping? */
continue;
npte = (*spte & ~PG_PROT) | md_prot;
if (npte != *spte) {
*spte = npte; /* zap! */
if (prr) { /* worried about tlb flushing? */
va = i386_ptob(spte - ptes);
if (npte & PG_G) {
/* PG_G requires this */
pmap_update_pg(va);
} else {
if (prr->prr_npages <
PMAP_RR_MAX) {
prr->prr_vas[
prr->prr_npages++] =
va;
} else {
if (prr->prr_npages ==
PMAP_RR_MAX)
/* signal an overflow */
prr->prr_npages++;
}
}
} /* if (prr) */
} /* npte != *spte */
} /* for loop */
}
/*
* if we kept a removal record and removed some pages update the TLB
*/
if (prr && prr->prr_npages) {
#if defined(I386_CPU)
if (cpu_class == CPUCLASS_386) {
pmap_update();
} else
#endif
{ /* not I386 */
if (prr->prr_npages > PMAP_RR_MAX) {
pmap_update();
} else {
while (prr->prr_npages) {
pmap_update_pg(prr->prr_vas[
--prr->prr_npages]);
}
}
} /* not I386 */
}
pmap_unmap_ptes(pmap); /* unlocks pmap */
}
/*
* end of protection functions
*/
/*
* pmap_unwire: clear the wired bit in the PTE
*
* => mapping should already be in map
*/
void
pmap_unwire(pmap, va)
struct pmap *pmap;
vaddr_t va;
{
pt_entry_t *ptes;
if (pmap_valid_entry(pmap->pm_pdir[pdei(va)])) {
ptes = pmap_map_ptes(pmap); /* locks pmap */
if (!pmap_valid_entry(ptes[i386_btop(va)]))
panic("pmap_unwire: invalid (unmapped) va");
if ((ptes[i386_btop(va)] & PG_W) != 0) {
ptes[i386_btop(va)] &= ~PG_W;
pmap->pm_stats.wired_count--;
}
#ifdef DIAGNOSITC
else {
printf("pmap_unwire: wiring for pmap %p va 0x%lx "
"didn't change!\n", pmap, va);
}
#endif
pmap_unmap_ptes(pmap); /* unlocks map */
}
#ifdef DIAGNOSTIC
else {
panic("pmap_unwire: invalid PDE");
}
#endif
}
/*
* pmap_collect: free resources held by a pmap
*
* => optional function.
* => called when a process is swapped out to free memory.
*/
void
pmap_collect(pmap)
struct pmap *pmap;
{
/*
* free all of the pt pages by removing the physical mappings
* for its entire address space.
*/
pmap_remove(pmap, VM_MIN_ADDRESS, VM_MAX_ADDRESS);
}
/*
* pmap_transfer: transfer (move or copy) mapping from one pmap
* to another.
*
* => this function is optional, it doesn't have to do anything
* => we assume that the mapping in the src pmap is valid (i.e. that
* it doesn't run off the end of the map's virtual space).
* => we assume saddr, daddr, and len are page aligned/lengthed
*/
void
pmap_transfer(dstpmap, srcpmap, daddr, len, saddr, move)
struct pmap *dstpmap, *srcpmap;
vaddr_t daddr, saddr;
vsize_t len;
boolean_t move;
{
/* base address of PTEs, dst could be NULL */
pt_entry_t *srcptes, *dstptes;
struct pmap_transfer_location srcl, dstl;
int dstvalid; /* # of PTEs left in dst's current PTP */
struct pmap *mapped_pmap; /* the pmap we passed to pmap_map_ptes */
vsize_t blklen;
int blkpgs, toxfer;
boolean_t ok;
#ifdef DIAGNOSTIC
/*
* sanity check: let's make sure our len doesn't overflow our dst
* space.
*/
if (daddr < VM_MAXUSER_ADDRESS) {
if (VM_MAXUSER_ADDRESS - daddr < len) {
printf("pmap_transfer: no room in user pmap "
"(addr=0x%lx, len=0x%lx)\n", daddr, len);
return;
}
} else if (daddr < VM_MIN_KERNEL_ADDRESS ||
daddr >= VM_MAX_KERNEL_ADDRESS) {
printf("pmap_transfer: invalid transfer address 0x%lx\n",
daddr);
} else {
if (VM_MAX_KERNEL_ADDRESS - daddr < len) {
printf("pmap_transfer: no room in kernel pmap "
"(addr=0x%lx, len=0x%lx)\n", daddr, len);
return;
}
}
#endif
/*
* ideally we would like to have either src or dst pmap's be the
* current pmap so that we can map the other one in APTE space
* (if needed... one of the maps could be the kernel's pmap).
*
* however, if we can't get this, then we have to use the tmpmap
* (alternately we could punt).
*/
if (!pmap_is_curpmap(dstpmap) && !pmap_is_curpmap(srcpmap)) {
dstptes = NULL; /* dstptes NOT mapped */
srcptes = pmap_map_ptes(srcpmap); /* let's map the source */
mapped_pmap = srcpmap;
} else {
if (!pmap_is_curpmap(srcpmap)) {
srcptes = pmap_map_ptes(srcpmap); /* possible APTE */
dstptes = PTE_BASE;
mapped_pmap = srcpmap;
} else {
dstptes = pmap_map_ptes(dstpmap); /* possible APTE */
srcptes = PTE_BASE;
mapped_pmap = dstpmap;
}
}
/*
* at this point we know that the srcptes are mapped. the dstptes
* are mapped if (dstptes != NULL). if (dstptes == NULL) then we
* will have to map the dst PTPs page at a time using the tmpmap.
* [XXX: is it worth the effort, or should we just punt?]
*/
srcl.addr = saddr;
srcl.pte = &srcptes[i386_btop(srcl.addr)];
srcl.ptp = NULL;
dstl.addr = daddr;
if (dstptes)
dstl.pte = &dstptes[i386_btop(dstl.addr)];
else
dstl.pte = NULL; /* we map page at a time */
dstl.ptp = NULL;
dstvalid = 0; /* force us to load a new dst PTP to start */
while (len) {
/*
* compute the size of this block.
*/
/* length in bytes */
blklen = i386_round_pdr(srcl.addr+1) - srcl.addr;
if (blklen > len)
blklen = len;
blkpgs = i386_btop(blklen);
/*
* if the block is not valid in the src pmap,
* then we can skip it!
*/
if (!pmap_valid_entry(srcpmap->pm_pdir[pdei(srcl.addr)])) {
len = len - blklen;
srcl.pte = srcl.pte + blkpgs;
srcl.addr += blklen;
dstl.addr += blklen;
if (blkpgs > dstvalid) {
dstvalid = 0;
dstl.ptp = NULL;
} else {
dstvalid = dstvalid - blkpgs;
}
if (dstptes == NULL && (len == 0 || dstvalid == 0)) {
if (dstl.pte) {
pmap_tmpunmap_pa();
dstl.pte = NULL;
}
} else {
dstl.pte += blkpgs;
}
continue;
}
/*
* we have a valid source block of "blkpgs" PTEs to transfer.
* if we don't have any dst PTEs ready, then get some.
*/
if (dstvalid == 0) {
if (!pmap_valid_entry(dstpmap->
pm_pdir[pdei(dstl.addr)])) {
if (dstl.addr >= VM_MIN_KERNEL_ADDRESS)
panic("pmap_transfer: missing kernel "
"PTP at 0x%lx", dstl.addr);
dstl.ptp = pmap_get_ptp(dstpmap,
pdei(dstl.addr), TRUE);
if (dstl.ptp == NULL) /* out of RAM? punt. */
break;
} else {
dstl.ptp = NULL;
}
dstvalid = i386_btop(i386_round_pdr(dstl.addr+1) -
dstl.addr);
if (dstptes == NULL) {
dstl.pte = (pt_entry_t *)
pmap_tmpmap_pa(dstpmap->
pm_pdir[pdei(dstl.addr)]
& PG_FRAME);
dstl.pte = dstl.pte + (PTES_PER_PTP - dstvalid);
}
}
/*
* we have a valid source block of "blkpgs" PTEs to transfer.
* we have a valid dst block of "dstvalid" PTEs ready.
* thus we can transfer min(blkpgs, dstvalid) PTEs now.
*/
srcl.ptp = NULL; /* don't know source PTP yet */
if (dstvalid < blkpgs)
toxfer = dstvalid;
else
toxfer = blkpgs;
if (toxfer > 0) {
ok = pmap_transfer_ptes(srcpmap, &srcl, dstpmap, &dstl,
toxfer, move);
if (!ok) /* memory shortage? punt. */
break;
dstvalid -= toxfer;
blkpgs -= toxfer;
len -= i386_ptob(toxfer);
if (blkpgs == 0) /* out of src PTEs? restart */
continue;
}
/*
* we have a valid source block of "blkpgs" PTEs left
* to transfer. we have just used up our "dstvalid"
* PTEs, and thus must obtain more dst PTEs to finish
* off the src block. since we are now going to
* obtain a brand new dst PTP, we know we can finish
* the src block in one more transfer.
*/
#ifdef DIAGNOSTIC
if (dstvalid)
panic("pmap_transfer: dstvalid non-zero after drain");
if ((dstl.addr & (NBPD-1)) != 0)
panic("pmap_transfer: dstaddr not on PD boundary "
"(0x%lx)\n", dstl.addr);
#endif
if (dstptes == NULL && dstl.pte != NULL) {
/* dispose of old PT mapping */
pmap_tmpunmap_pa();
dstl.pte = NULL;
}
/*
* get new dst PTP
*/
if (!pmap_valid_entry(dstpmap->pm_pdir[pdei(dstl.addr)])) {
if (dstl.addr >= VM_MIN_KERNEL_ADDRESS)
panic("pmap_transfer: missing kernel PTP at "
"0x%lx", dstl.addr);
dstl.ptp = pmap_get_ptp(dstpmap, pdei(dstl.addr), TRUE);
if (dstl.ptp == NULL) /* out of free RAM? punt. */
break;
} else {
dstl.ptp = NULL;
}
dstvalid = PTES_PER_PTP; /* new PTP */
/*
* if the dstptes are un-mapped, then we need to tmpmap in the
* dstl.ptp.
*/
if (dstptes == NULL) {
dstl.pte = (pt_entry_t *)
pmap_tmpmap_pa(dstpmap->pm_pdir[pdei(dstl.addr)]
& PG_FRAME);
}
/*
* we have a valid source block of "blkpgs" PTEs left
* to transfer. we just got a brand new dst PTP to
* receive these PTEs.
*/
#ifdef DIAGNOSTIC
if (dstvalid < blkpgs)
panic("pmap_transfer: too many blkpgs?");
#endif
toxfer = blkpgs;
ok = pmap_transfer_ptes(srcpmap, &srcl, dstpmap, &dstl, toxfer,
move);
if (!ok) /* memory shortage? punt. */
break;
dstvalid -= toxfer;
blkpgs -= toxfer;
len -= i386_ptob(toxfer);
/*
* done src pte block
*/
}
if (dstptes == NULL && dstl.pte != NULL)
pmap_tmpunmap_pa(); /* dst PTP still mapped? */
pmap_unmap_ptes(mapped_pmap);
}
/*
* pmap_transfer_ptes: transfer PTEs from one pmap to another
*
* => we assume that the needed PTPs are mapped and that we will
* not cross a block boundary.
* => we return TRUE if we transfered all PTEs, FALSE if we were
* unable to allocate a pv_entry
*/
static boolean_t
pmap_transfer_ptes(srcpmap, srcl, dstpmap, dstl, toxfer, move)
struct pmap *srcpmap, *dstpmap;
struct pmap_transfer_location *srcl, *dstl;
int toxfer;
boolean_t move;
{
pt_entry_t dstproto, opte;
int bank, off;
struct pv_head *pvh;
struct pv_entry *pve, *lpve;
/*
* generate "prototype" dst PTE
*/
if (dstl->addr < VM_MAX_ADDRESS)
dstproto = PG_u; /* "user" page */
else
dstproto = pmap_pg_g; /* kernel page */
/*
* ensure we have dst PTP for user addresses.
*/
if (dstl->ptp == NULL && dstl->addr < VM_MAXUSER_ADDRESS)
dstl->ptp = PHYS_TO_VM_PAGE(dstpmap->pm_pdir[pdei(dstl->addr)] &
PG_FRAME);
/*
* main loop over range
*/
for (/*null*/; toxfer > 0 ; toxfer--,
srcl->addr += NBPG, dstl->addr += NBPG,
srcl->pte++, dstl->pte++) {
if (!pmap_valid_entry(*srcl->pte)) /* skip invalid entrys */
continue;
if (pmap_valid_entry(*dstl->pte))
panic("pmap_transfer_ptes: attempt to overwrite "
"active entry");
/*
* let's not worry about non-pvlist mappings (typically device
* pager mappings).
*/
opte = *srcl->pte;
if ((opte & PG_PVLIST) == 0)
continue;
/*
* if we are moving the mapping, then we can just adjust the
* current pv_entry. if we are copying the mapping, then we
* need to allocate a new pv_entry to account for it.
*/
if (move == FALSE) {
pve = pmap_alloc_pv(dstpmap, ALLOCPV_TRY);
if (pve == NULL)
return(FALSE); /* punt! */
} else {
pve = NULL; /* XXX: quiet gcc warning */
}
/*
* find the pv_head for this mapping. since our mapping is
* on the pvlist (PG_PVLIST), there must be a pv_head.
*/
bank = vm_physseg_find(atop(opte & PG_FRAME), &off);
if (bank == -1)
panic("pmap_transfer_ptes: PG_PVLIST PTE and "
"no pv_head!");
pvh = &vm_physmem[bank].pmseg.pvhead[off];
/*
* now lock down the pvhead and find the current entry (there
* must be one).
*/
simple_lock(&pvh->pvh_lock);
for (lpve = pvh->pvh_list ; lpve ; lpve = lpve->pv_next)
if (lpve->pv_pmap == srcpmap &&
lpve->pv_va == srcl->addr)
break;
if (lpve == NULL)
panic("pmap_transfer_ptes: PG_PVLIST PTE, but "
"entry not found");
/*
* update src ptp. if the ptp is null in the pventry, then
* we are not counting valid entrys for this ptp (this is only
* true for kernel PTPs).
*/
if (srcl->ptp == NULL)
srcl->ptp = lpve->pv_ptp;
#ifdef DIAGNOSTIC
if (srcl->ptp &&
(srcpmap->pm_pdir[pdei(srcl->addr)] & PG_FRAME) !=
VM_PAGE_TO_PHYS(srcl->ptp))
panic("pmap_transfer_ptes: pm_pdir - pv_ptp mismatch!");
#endif
/*
* for move, update the pve we just found (lpve) to
* point to its new mapping. for copy, init the new
* pve and put it in the list.
*/
if (move == TRUE) {
pve = lpve;
}
pve->pv_pmap = dstpmap;
pve->pv_va = dstl->addr;
pve->pv_ptp = dstl->ptp;
if (move == FALSE) { /* link in copy */
pve->pv_next = lpve->pv_next;
lpve->pv_next = pve;
}
/*
* sync the R/M bits while we are here.
*/
vm_physmem[bank].pmseg.attrs[off] |= (opte & (PG_U|PG_M));
/*
* now actually update the ptes and unlock the pvlist.
*/
if (move) {
*srcl->pte = 0; /* zap! */
if (pmap_is_curpmap(srcpmap))
pmap_update_pg(srcl->addr);
if (srcl->ptp)
/* don't bother trying to free PTP */
srcl->ptp->wire_count--;
srcpmap->pm_stats.resident_count--;
if (opte & PG_W)
srcpmap->pm_stats.wired_count--;
}
*dstl->pte = (opte & ~(PG_u|PG_U|PG_M|PG_G|PG_W)) | dstproto;
dstpmap->pm_stats.resident_count++;
if (dstl->ptp)
dstl->ptp->wire_count++;
simple_unlock(&pvh->pvh_lock);
}
return(TRUE);
}
/*
* pmap_copy: copy mappings from one pmap to another
*
* => optional function
* void pmap_copy(dst_pmap, src_pmap, dst_addr, len, src_addr)
*/
/*
* defined as macro call to pmap_transfer in pmap.h
*/
/*
* pmap_move: move mappings from one pmap to another
*
* => optional function
* void pmap_move(dst_pmap, src_pmap, dst_addr, len, src_addr)
*/
/*
* defined as macro call to pmap_transfer in pmap.h
*/
/*
* pmap_enter: enter a mapping into a pmap
*
* => must be done "now" ... no lazy-evaluation
* => we set pmap => pv_head locking
*/
void
pmap_enter(pmap, va, pa, prot, wired, access_type)
struct pmap *pmap;
vaddr_t va;
paddr_t pa;
vm_prot_t prot;
boolean_t wired;
vm_prot_t access_type;
{
pt_entry_t *ptes, opte, npte;
struct vm_page *ptp;
struct pv_head *pvh;
struct pv_entry *pve;
int bank, off;
#ifdef DIAGNOSTIC
/* sanity check: totally out of range? */
if (va >= VM_MAX_KERNEL_ADDRESS)
panic("pmap_enter: too big");
if (va == (vaddr_t) PDP_BASE || va == (vaddr_t) APDP_BASE)
panic("pmap_enter: trying to map over PDP/APDP!");
/* sanity check: kernel PTPs should already have been pre-allocated */
if (va >= VM_MIN_KERNEL_ADDRESS &&
!pmap_valid_entry(pmap->pm_pdir[pdei(va)]))
panic("pmap_enter: missing kernel PTP!");
#endif
/* get lock */
PMAP_MAP_TO_HEAD_LOCK();
/*
* map in ptes and get a pointer to our PTP (unless we are the kernel)
*/
ptes = pmap_map_ptes(pmap); /* locks pmap */
if (pmap == pmap_kernel())
ptp = NULL;
else
ptp = pmap_get_ptp(pmap, pdei(va), FALSE);
opte = ptes[i386_btop(va)]; /* old PTE */
/*
* is there currently a valid mapping at our VA?
*/
if (pmap_valid_entry(opte)) {
/*
* first, update pm_stats. resident count will not
* change since we are replacing/changing a valid
* mapping. wired count might change...
*/
if (wired && (opte & PG_W) == 0)
pmap->pm_stats.wired_count++;
else if (!wired && (opte & PG_W) != 0)
pmap->pm_stats.wired_count--;
/*
* is the currently mapped PA the same as the one we
* want to map?
*/
if ((opte & PG_FRAME) == pa) {
/* if this is on the PVLIST, sync R/M bit */
if (opte & PG_PVLIST) {
bank = vm_physseg_find(atop(pa), &off);
if (bank == -1)
panic("pmap_enter: PG_PVLIST mapping "
"with unmanaged page");
pvh = &vm_physmem[bank].pmseg.pvhead[off];
simple_lock(&pvh->pvh_lock);
vm_physmem[bank].pmseg.attrs[off] |= opte;
simple_unlock(&pvh->pvh_lock);
} else {
pvh = NULL; /* ensure !PG_PVLIST */
}
goto enter_now;
}
/*
* changing PAs: we must remove the old one first
*/
/*
* if current mapping is on a pvlist,
* remove it (sync R/M bits)
*/
if (opte & PG_PVLIST) {
bank = vm_physseg_find(atop(opte & PG_FRAME), &off);
if (bank == -1)
panic("pmap_enter: PG_PVLIST mapping "
"with unmanaged page");
pvh = &vm_physmem[bank].pmseg.pvhead[off];
simple_lock(&pvh->pvh_lock);
pve = pmap_remove_pv(pvh, pmap, va);
vm_physmem[bank].pmseg.attrs[off] |= opte;
simple_unlock(&pvh->pvh_lock);
} else {
pve = NULL;
}
} else { /* opte not valid */
pve = NULL;
pmap->pm_stats.resident_count++;
if (wired)
pmap->pm_stats.wired_count++;
if (ptp)
ptp->wire_count++; /* count # of valid entrys */
}
/*
* at this point pm_stats has been updated. pve is either NULL
* or points to a now-free pv_entry structure (the latter case is
* if we called pmap_remove_pv above).
*
* if this entry is to be on a pvlist, enter it now.
*/
bank = vm_physseg_find(atop(pa), &off);
if (pmap_initialized && bank != -1) {
pvh = &vm_physmem[bank].pmseg.pvhead[off];
if (pve == NULL)
pve = pmap_alloc_pv(pmap, ALLOCPV_NEED);
/* lock pvh when adding */
pmap_enter_pv(pvh, pve, pmap, va, ptp);
} else {
/* new mapping is not PG_PVLIST. free pve if we've got one */
pvh = NULL; /* ensure !PG_PVLIST */
if (pve)
pmap_free_pv(pmap, pve);
}
enter_now:
/*
* at this point pvh is !NULL if we want the PG_PVLIST bit set
*/
npte = pa | protection_codes[prot] | PG_V;
if (pvh)
npte |= PG_PVLIST;
if (wired)
npte |= PG_W;
if (va < VM_MAXUSER_ADDRESS)
npte |= PG_u;
else if (va < VM_MAX_ADDRESS)
npte |= (PG_u | PG_RW); /* XXXCDC: no longer needed? */
if (pmap == pmap_kernel())
npte |= pmap_pg_g;
ptes[i386_btop(va)] = npte; /* zap! */
if ((opte & ~(PG_M|PG_U)) != npte && pmap_is_curpmap(pmap))
pmap_update_pg(va);
pmap_unmap_ptes(pmap);
PMAP_MAP_TO_HEAD_UNLOCK();
}
/*
* pmap_growkernel: increase usage of KVM space
*
* => we allocate new PTPs for the kernel and install them in all
* the pmaps on the system.
*/
vaddr_t
pmap_growkernel(maxkvaddr)
vaddr_t maxkvaddr;
{
struct pmap *kpm = pmap_kernel(), *pm;
int needed_kpde; /* needed number of kernel PTPs */
int s;
paddr_t ptaddr;
needed_kpde = (int)(maxkvaddr - VM_MIN_KERNEL_ADDRESS + (NBPD-1))
/ NBPD;
if (needed_kpde <= nkpde)
goto out; /* we are OK */
/*
* whoops! we need to add kernel PTPs
*/
s = splhigh(); /* to be safe */
simple_lock(&kpm->pm_obj.vmobjlock);
for (/*null*/ ; nkpde < needed_kpde ; nkpde++) {
if (pmap_initialized == FALSE) {
/*
* we're growing the kernel pmap early (from
* uvm_pageboot_alloc()). this case must be
* handled a little differently.
*/
if (uvm_page_physget(&ptaddr) == FALSE)
panic("pmap_growkernel: out of memory");
kpm->pm_pdir[PDSLOT_KERN + nkpde] =
ptaddr | PG_RW | PG_V;
/* count PTP as resident */
kpm->pm_stats.resident_count++;
continue;
}
pmap_alloc_ptp(kpm, PDSLOT_KERN + nkpde, FALSE);
/* PG_u not for kernel */
kpm->pm_pdir[PDSLOT_KERN + nkpde] &= ~PG_u;
/* distribute new kernel PTP to all active pmaps */
simple_lock(&pmaps_lock);
for (pm = pmaps.lh_first; pm != NULL;
pm = pm->pm_list.le_next) {
pm->pm_pdir[PDSLOT_KERN + nkpde] =
kpm->pm_pdir[PDSLOT_KERN + nkpde];
}
simple_unlock(&pmaps_lock);
}
simple_unlock(&kpm->pm_obj.vmobjlock);
splx(s);
out:
return (VM_MIN_KERNEL_ADDRESS + (nkpde * NBPD));
}
#ifdef DEBUG
void pmap_dump __P((struct pmap *, vaddr_t, vaddr_t));
/*
* pmap_dump: dump all the mappings from a pmap
*
* => caller should not be holding any pmap locks
*/
void
pmap_dump(pmap, sva, eva)
struct pmap *pmap;
vaddr_t sva, eva;
{
pt_entry_t *ptes, *pte;
vaddr_t blkendva;
/*
* if end is out of range truncate.
* if (end == start) update to max.
*/
if (eva > VM_MAXUSER_ADDRESS || eva <= sva)
eva = VM_MAXUSER_ADDRESS;
/*
* we lock in the pmap => pv_head direction
*/
PMAP_MAP_TO_HEAD_LOCK();
ptes = pmap_map_ptes(pmap); /* locks pmap */
/*
* dumping a range of pages: we dump in PTP sized blocks (4MB)
*/
for (/* null */ ; sva < eva ; sva = blkendva) {
/* determine range of block */
blkendva = i386_round_pdr(sva+1);
if (blkendva > eva)
blkendva = eva;
/* valid block? */
if (!pmap_valid_entry(pmap->pm_pdir[pdei(sva)]))
continue;
pte = &ptes[i386_btop(sva)];
for (/* null */; sva < blkendva ; sva += NBPG, pte++) {
if (!pmap_valid_entry(*pte))
continue;
printf("va %#lx -> pa %#x (pte=%#x)\n",
sva, *pte, *pte & PG_FRAME);
}
}
pmap_unmap_ptes(pmap);
PMAP_MAP_TO_HEAD_UNLOCK();
}
#endif