diff --git a/sys/arch/pc532/include/pmap.new.h b/sys/arch/pc532/include/pmap.new.h new file mode 100644 index 000000000000..35f3b6263154 --- /dev/null +++ b/sys/arch/pc532/include/pmap.new.h @@ -0,0 +1,511 @@ +/* $NetBSD: pmap.new.h,v 1.1.1.1 1998/04/24 20:10:19 matthias Exp $ */ + +/* + * + * Copyright (c) 1997 Charles D. Cranor and Washington University. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgment: + * This product includes software developed by Charles D. Cranor and + * Washington University. + * 4. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * pmap.h: see pmap.c for the history of this pmap module. + */ + +#ifndef _I386_PMAP_H_ +#define _I386_PMAP_H_ + +#include +#include +#include + +/* + * see pte.h for a description of i386 MMU terminology and hardware + * interface. + * + * a pmap describes a processes' 4GB virtual address space. this + * virtual address space can be broken up into 1024 4MB regions which + * are described by PDEs in the PDP. the PDEs are defined as follows: + * + * (ranges are inclusive -> exclusive, just like vm_map_entry start/end) + * (the following assumes that KERNBASE is 0xf0000000) + * + * PDE#s VA range usage + * 0->959 0x0 -> 0xefc00000 user address space, note that the + * max user address is 0xefbfe000 + * the final two pages in the last 4MB + * used to be reserved for the UAREA + * but now are no longer used + * 959 0xefc00000-> recursive mapping of PDP (used for + * 0xf0000000 linear mapping of PTPs) + * 960->1023 0xf0000000-> kernel address space (constant + * 0xffc00000 across all pmap's/processes) + * 1023 0xffc00000-> "alternate" recursive PDP mapping + * (for other pmaps) + * + * + * note: a recursive PDP mapping provides a way to map all the PTEs for + * a 4GB address space into a linear chunk of virtual memory. in other + * words, the PTE for page 0 is the first int mapped into the 4MB recursive + * area. the PTE for page 1 is the second int. the very last int in the + * 4MB range is the PTE that maps VA 0xffffe000 (the last page in a 4GB + * address). + * + * all pmap's PD's must have the same values in slots 960->1023 so that + * the kernel is always mapped in every process. these values are loaded + * into the PD at pmap creation time. + * + * at any one time only one pmap can be active on a processor. this is + * the pmap whose PDP is pointed to by processor register %cr3. this pmap + * will have all its PTEs mapped into memory at the recursive mapping + * point (slot #959 as show above). when the pmap code wants to find the + * PTE for a virtual address, all it has to do is the following: + * + * address of PTE = (959 * 4MB) + (VA / NBPG) * sizeof(pt_entry_t) + * = 0xefc00000 + (VA /4096) * 4 + * + * what happens if the pmap layer is asked to perform an operation + * on a pmap that is not the one which is currently active? in that + * case we take the PA of the PDP of non-active pmap and put it in + * slot 1023 of the active pmap. this causes the non-active pmap's + * PTEs to get mapped in the final 4MB of the 4GB address space + * (e.g. starting at 0xffc00000). + * + * the following figure shows the effects of the recursive PDP mapping: + * + * PDP (%cr3) + * +----+ + * | 0| -> PTP#0 that maps VA 0x0 -> 0x400000 + * | | + * | | + * | 959| -> points back to PDP (%cr3) mapping VA 0xefc00000 -> 0xf0000000 + * | 960| -> first kernel PTP (maps 0xf0000000 -> 0xf0400000) + * | | + * |1023| -> points to alternate pmap's PDP (maps 0xffc00000 -> end) + * +----+ + * + * note that the PDE#959 VA (0xefc00000) is defined as "PTE_BASE" + * note that the PDE#1023 VA (0xffc00000) is defined as "APTE_BASE" + * + * starting at VA 0xefc00000 the current active PDP (%cr3) acts as a + * PTP: + * + * PTP#959 == PDP(%cr3) => maps VA 0xefc00000 -> 0xf0000000 + * +----+ + * | 0| -> maps the contents of PTP#0 at VA 0xefc00000->0xefc01000 + * | | + * | | + * | 959| -> maps contents of PTP#959 (the PDP) at VA 0xeffdf000 + * | 960| -> maps contents of first kernel PTP + * | | + * |1023| + * +----+ + * + * note that mapping of the PDP at PTP#959's VA (0xeffdf000) is + * defined as "PDP_BASE".... within that mapping there are two + * defines: + * "PDP_PDE" (0xeffdff7c) is the VA of the PDE in the PDP + * which points back to itself. + * "APDP_PDE" (0xeffdfffc) is the VA of the PDE in the PDP which + * establishes the recursive mapping of the alternate pmap. + * to set the alternate PDP, one just has to put the correct + * PA info in *APDP_PDE. + * + * note that in the APTE_BASE space, the APDP appears at VA + * "APDP_BASE" (0xfffff000). + */ + +/* + * the following defines identify the slots used as described above. + */ + +#define PDSLOT_PTE ((KERNBASE/NBPD)-1) /* 959: for recursive PDP map */ +#define PDSLOT_KERN (KERNBASE/NBPD) /* 960: start of kernel space */ +#define PDSLOT_APTE ((unsigned)1023) /* 1023: alternative recursive slot */ + +/* + * the following defines give the virtual addresses of various MMU + * data structures: + * PTE_BASE and APTE_BASE: the base VA of the linear PTE mappings + * PTD_BASE and APTD_BASE: the base VA of the recursive mapping of the PTD + * PDP_PDE and APDP_PDE: the VA of the PDE that points back to the PDP/APDP + */ + +#define PTE_BASE ((pt_entry_t *) (PDSLOT_PTE * NBPD) ) +#define APTE_BASE ((pt_entry_t *) (PDSLOT_APTE * NBPD) ) +#define PDP_BASE ((pd_entry_t *) (((char *)PTE_BASE) + (PDSLOT_PTE * NBPG)) ) +#define APDP_BASE ((pd_entry_t *) (((char *)APTE_BASE) + (PDSLOT_APTE * NBPG)) ) +#define PDP_PDE (PDP_BASE + PDSLOT_PTE) +#define APDP_PDE (PDP_BASE + PDSLOT_APTE) + +/* + * XXXCDC: tmp xlate from old names: + * PTDPTDI -> PDSLOT_PTE + * KPTDI -> PDSLOT_KERN + * APTDPTDI -> PDSLOT_APTE + */ + +/* + * the follow define determines how many PTPs should be set up for the + * kernel by locore.s at boot time. this should be large enough to + * get the VM system running. once the VM system is running, the + * pmap module can add more PTPs to the kernel area on demand. + */ + +#ifndef NKPTP +#define NKPTP 4 /* 16MB to start */ +#endif +#define NKPTP_MIN 4 /* smallest value we allow */ +#define NKPTP_MAX (1024 - (KERNBASE/NBPD) - 1) + /* largest value (-1 for APTP space) */ + +/* + * various address macros + * + * vtopte: return a pointer to the PTE mapping a VA + * kvtopte: same as above (takes a KVA, but doesn't matter with this pmap) + * ptetov: given a pointer to a PTE, return the VA that it maps + * vtophys: translate a VA to the PA mapped to it + * + * plus alternative versions of the above + */ + +#define vtopte(VA) (PTE_BASE + i386_btop(VA)) +#define kvtopte(VA) vtopte(VA) +#define ptetov(PT) (i386_ptob(PT - PTE_BASE)) +#define vtophys(VA) ((*vtopte(VA) & PG_FRAME) | ((unsigned)(VA) & ~PG_FRAME)) +#define avtopte(VA) (APTE_BASE + i386_btop(VA)) +#define ptetoav(PT) (i386_ptob(PT - APTE_BASE)) +#define avtophys(VA) ((*avtopte(VA) & PG_FRAME) | ((unsigned)(VA) & ~PG_FRAME)) + +/* + * pdei/ptei: generate index into PDP/PTP from a VA + */ +#define pdei(VA) (((VA) & PD_MASK) >> PDSHIFT) +#define ptei(VA) (((VA) & PT_MASK) >> PGSHIFT) + +/* + * PTP macros: + * a PTP's index is the PD index of the PDE that points to it + * a PTP's offset is the byte-offset in the PTE space that this PTP is at + * a PTP's VA is the first VA mapped by that PTP + * + * note that NBPG == number of bytes in a PTP (4096 bytes == 1024 entries) + * NBPD == number of bytes a PTP can map (4MB) + */ + +#define ptp_i2o(I) ((I) * NBPG) /* index => offset */ +#define ptp_o2i(O) ((O) / NBPG) /* offset => index */ +#define ptp_i2v(I) ((I) * NBPD) /* index => VA */ +#define ptp_v2i(V) ((V) / NBPD) /* VA => index (same as pdei) */ + +/* + * PG_AVAIL usage: we make use of the ignored bits of the PTE + */ + +#define PG_W PG_AVAIL1 /* "wired" mapping */ +#define PG_PVLIST PG_AVAIL2 /* mapping has entry on pvlist */ +/* PG_AVAIL3 not used */ + +#ifdef _KERNEL +/* + * pmap data structures: see pmap.c for details of locking. + */ + +struct pmap; +typedef struct pmap *pmap_t; + +/* + * we maintain a list of all non-kernel pmaps + */ + +LIST_HEAD(pmap_head, pmap); /* struct pmap_head: head of a pmap list */ + +/* + * the pmap structure + * + * note that the pm_obj contains the simple_lock, the reference count, + * page list, and number of PTPs within the pmap. + */ + +struct pmap { + struct uvm_object pm_obj; /* object (lck by object lock) */ + LIST_ENTRY(pmap) pm_list; /* list (lck by pm_list lock) */ + pd_entry_t *pm_pdir; /* VA of PD (lck by object lock) */ + u_int32_t pm_pdirpa; /* PA of PD (read-only after create) */ + struct vm_page *pm_ptphint; /* pointer to a random PTP in our pmap */ + struct pmap_statistics pm_stats; /* pmap stats (lck by object lock) */ +}; + +/* + * for each managed physical page we maintain a list of 's + * which it is mapped at. the list is headed by a pv_head structure. + * there is one pv_head per managed phys page (allocated at boot time). + * the pv_head structure points to a list of pv_entry structures (each + * describes one mapping). + */ + +struct pv_entry; + +struct pv_head { + simple_lock_data_t pvh_lock; /* locks every pv on this list */ + struct pv_entry *pvh_list; /* head of list (locked by pvh_lock) */ +}; + +struct pv_entry { /* all fields locked by their pvh_lock */ + struct pv_entry *pv_next; /* next entry */ + struct pmap *pv_pmap; /* the pmap */ + vm_offset_t pv_va; /* the virtual address */ + struct vm_page *pv_ptp; /* the vm_page of the PTP */ +}; + +/* + * pv_entrys are dynamically allocated in chunks from a single page. + * we keep track of how many pv_entrys are in use for each page and + * we can free pv_entry pages if needed. there is one lock for the + * entire allocation system. + */ + +struct pv_page_info { + TAILQ_ENTRY(pv_page) pvpi_list; + struct pv_entry *pvpi_pvfree; + int pvpi_nfree; +}; + +/* + * number of pv_entry's in a pv_page + * (note: won't work on systems where NPBG isn't a constant) + */ + +#define PVE_PER_PVPAGE ( (NBPG - sizeof(struct pv_page_info)) / \ + sizeof(struct pv_entry) ) + +/* + * a pv_page: where pv_entrys are allocated from + */ + +struct pv_page { + struct pv_page_info pvinfo; + struct pv_entry pvents[PVE_PER_PVPAGE]; +}; + +/* + * pmap_remove_record: a record of VAs that have been unmapped, used to + * flush TLB. if we have more than PMAP_RR_MAX then we stop recording. + */ + +#define PMAP_RR_MAX 16 /* max of 16 pages (64K) */ + +struct pmap_remove_record { + int prr_npages; + vm_offset_t prr_vas[PMAP_RR_MAX]; +}; + +/* + * pmap_transfer_location: used to pass the current location in the + * pmap between pmap_transfer and pmap_transfer_ptes [e.g. during + * a pmap_copy]. + */ + +struct pmap_transfer_location { + vm_offset_t addr; /* the address (page-aligned) */ + pt_entry_t *pte; /* the PTE that maps address */ + struct vm_page *ptp; /* the PTP that the PTE lives in */ +}; + +/* + * global kernel variables + */ + +/* PTDpaddr: is the physical address of the kernel's PDP */ +extern u_long PTDpaddr; + +extern struct pmap kernel_pmap_store; /* kernel pmap */ +extern int nkpde; /* current # of PDEs for kernel */ +extern int pmap_pg_g; /* do we support PG_G? */ + +/* + * macros + */ + +#define pmap_kernel() (&kernel_pmap_store) +#define pmap_resident_count(pmap) ((pmap)->pm_stats.resident_count) +#define pmap_update() tlbflush() + +#define pmap_clear_modify(pg) pmap_change_attrs(pg, 0, PG_M) +#define pmap_clear_reference(pg) pmap_change_attrs(pg, 0, PG_U) +#define pmap_copy(DP,SP,D,L,S) pmap_transfer(DP,SP,D,L,S, FALSE) +#define pmap_is_modified(pg) pmap_test_attrs(pg, PG_M) +#define pmap_is_referenced(pg) pmap_test_attrs(pg, PG_U) +#define pmap_move(DP,SP,D,L,S) pmap_transfer(DP,SP,D,L,S, TRUE) +#define pmap_phys_address(ppn) i386_ptob(ppn) +#define pmap_valid_entry(E) ((E) & PG_V) /* is PDE or PTE valid? */ + + +/* + * prototypes + */ + +void pmap_activate __P((struct proc *)); +void pmap_bootstrap __P((vm_offset_t)); +boolean_t pmap_change_attrs __P((struct vm_page *, int, int)); +void pmap_deactivate __P((struct proc *)); +static void pmap_kenter_pa __P((vm_offset_t, vm_offset_t, vm_prot_t)); +static void pmap_page_protect __P((struct vm_page *, vm_prot_t)); +void pmap_page_remove __P((struct vm_page *)); +static void pmap_protect __P((struct pmap *, vm_offset_t, + vm_offset_t, vm_prot_t)); +void pmap_remove __P((struct pmap *, vm_offset_t, vm_offset_t)); +boolean_t pmap_test_attrs __P((struct vm_page *, int)); +void pmap_transfer __P((struct pmap *, struct pmap *, vm_offset_t, + vm_size_t, vm_offset_t, boolean_t)); +static void pmap_update_pg __P((vm_offset_t)); +static void pmap_update_2pg __P((vm_offset_t,vm_offset_t)); +void pmap_write_protect __P((struct pmap *, vm_offset_t, + vm_offset_t, vm_prot_t)); + +vm_offset_t reserve_dumppages __P((vm_offset_t)); /* XXX: not a pmap fn */ + +#define PMAP_GROWKERNEL /* turn on pmap_growkernel interface */ + +/* + * inline functions + */ + +/* + * pmap_update_pg: flush one page from the TLB (or flush the whole thing + * if hardware doesn't support one-page flushing) + */ + +__inline static void pmap_update_pg(va) + +vm_offset_t va; + +{ +#if defined(I386_CPU) + if (cpu_class == CPUCLASS_386) + pmap_update(); + else +#endif + invlpg((u_int) va); +} + +/* + * pmap_update_2pg: flush two pages from the TLB + */ + +__inline static void pmap_update_2pg(va, vb) + +vm_offset_t va, vb; + +{ +#if defined(I386_CPU) + if (cpu_class == CPUCLASS_386) + pmap_update(); + else +#endif + { + invlpg((u_int) va); + invlpg((u_int) vb); + } +} + +/* + * pmap_page_protect: change the protection of all recorded mappings + * of a managed page + * + * => this function is a frontend for pmap_page_remove/pmap_change_attrs + * => we only have to worry about making the page more protected. + * unprotecting a page is done on-demand at fault time. + */ + +__inline static void pmap_page_protect(pg, prot) + +struct vm_page *pg; +vm_prot_t prot; + +{ + if ((prot & VM_PROT_WRITE) == 0) { + if (prot & (VM_PROT_READ|VM_PROT_EXECUTE)) { + (void) pmap_change_attrs(pg, PG_RO, PG_RW); + } else { + pmap_page_remove(pg); + } + } +} + +/* + * pmap_protect: change the protection of pages in a pmap + * + * => this function is a frontend for pmap_remove/pmap_write_protect + * => we only have to worry about making the page more protected. + * unprotecting a page is done on-demand at fault time. + */ + +__inline static void pmap_protect(pmap, sva, eva, prot) + +struct pmap *pmap; +vm_offset_t sva, eva; +vm_prot_t prot; + +{ + if ((prot & VM_PROT_WRITE) == 0) { + if (prot & (VM_PROT_READ|VM_PROT_EXECUTE)) { + pmap_write_protect(pmap, sva, eva, prot); + } else { + pmap_remove(pmap, sva, eva); + } + } +} + +/* + * pmap_kenter_pa: enter a kernel mapping without R/M (pv_entry) tracking + * + * => no need to lock anything, assume va is already allocated + * => should be faster than normal pmap enter function + */ + +__inline static void pmap_kenter_pa(va, pa, prot) + +vm_offset_t va, pa; +vm_prot_t prot; + +{ + pt_entry_t *pte, opte; + + pte = vtopte(va); + opte = *pte; + *pte = pa | ((prot & VM_PROT_WRITE)? PG_RW : PG_RO) | + PG_V | pmap_pg_g; /* zap! */ + if (pmap_valid_entry(opte)) + pmap_update_pg(va); +} + +vm_offset_t pmap_map __P((vm_offset_t, vm_offset_t, vm_offset_t, int)); + +#endif /* _KERNEL */ +#endif /* _I386_PMAP_H_ */ diff --git a/sys/arch/pc532/pc532/pmap.new.c b/sys/arch/pc532/pc532/pmap.new.c new file mode 100644 index 000000000000..644cd607d847 --- /dev/null +++ b/sys/arch/pc532/pc532/pmap.new.c @@ -0,0 +1,3561 @@ +/* $NetBSD: pmap.new.c,v 1.1.1.1 1998/04/24 20:10:19 matthias Exp $ */ + +/* + * + * Copyright (c) 1997 Charles D. Cranor and Washington University. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by Charles D. Cranor and + * Washington University. + * 4. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * pmap.c: i386 pmap module rewrite + * Chuck Cranor + * 11-Aug-97 + * + * history of this pmap module: in addition to my own input, i used + * the following references for this rewrite of the i386 pmap: + * + * [1] the NetBSD i386 pmap. this pmap appears to be based on the + * BSD hp300 pmap done by Mike Hibler at University of Utah. + * it was then ported to the i386 by William Jolitz of UUNET + * Technologies, Inc. Then Charles M. Hannum of the NetBSD + * project fixed some bugs and provided some speed ups. + * + * [2] the FreeBSD i386 pmap. this pmap seems to be the + * Hibler/Jolitz pmap, as modified for FreeBSD by John S. Dyson + * and David Greenman. + * + * [3] the Mach pmap. this pmap, from CMU, seems to have migrated + * between several processors. the VAX version was done by + * Avadis Tevanian, Jr., and Michael Wayne Young. the i386 + * version was done by Lance Berc, Mike Kupfer, Bob Baron, + * David Golub, and Richard Draves. the alpha version was + * done by Alessandro Forin (CMU/Mach) and Chris Demetriou + * (NetBSD/alpha). + */ + +#include "opt_cputype.h" + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +#include +#include + +#include +#include + +/* + * general info: + * + * - for an explanation of how the i386 MMU hardware works see + * the comments in . + * + * - for an explanation of the general memory structure used by + * this pmap (including the recursive mapping), see the comments + * in . + * + * this file contains the code for the "pmap module." the module's + * job is to manage the hardware's virtual to physical address mappings. + * note that there are two levels of mapping in the VM system: + * + * [1] the upper layer of the VM system uses vm_map's and vm_map_entry's + * to map ranges of virtual address space to objects/files. for + * example, the vm_map may say: "map VA 0x1000 to 0x22000 read-only + * to the file /bin/ls starting at offset zero." note that + * the upper layer mapping is not concerned with how individual + * vm_pages are mapped. + * + * [2] the lower layer of the VM system (the pmap) maintains the mappings + * from virtual addresses. it is concerned with which vm_page is + * mapped where. for example, when you run /bin/ls and start + * at page 0x1000 the fault routine may lookup the correct page + * of the /bin/ls file and then ask the pmap layer to establish + * a mapping for it. + * + * note that information in the lower layer of the VM system can be + * thrown away since it can easily be reconstructed from the info + * in the upper layer. + * + * data structures we use include: + * + * - struct pmap: describes the address space of one thread + * - struct pv_entry: describes one mapping of a PA + * - struct pv_head: there is one pv_head per managed page of + * physical memory. the pv_head points to a list of pv_entry + * structures which describe all the pairs that this + * page is mapped in. this is critical for page based operations + * such as pmap_page_protect() [change protection on _all_ mappings + * of a page] + * - pv_page/pv_page_info: pv_entry's are allocated out of pv_page's. + * if we run out of pv_entry's we allocate a new pv_page and free + * its pv_entrys. + * - pmap_remove_record: a list of virtual addresses whose mappings + * have been changed. used for TLB flushing. + */ + +/* + * memory allocation + * + * - there are three data structures that we must dynamically allocate: + * + * [A] new process' page directory page (PDP) + * - plan 1: done at pmap_pinit() we use + * uvm_km_alloc(kernel_map, NBPG) [fka kmem_alloc] to do this + * allocation. + * + * if we are low in free physical memory then we sleep in + * uvm_km_alloc -- in this case this is ok since we are creating + * a new pmap and should not be holding any locks. + * + * if the kernel is totally out of virtual space + * (i.e. uvm_km_alloc returns NULL), then we panic. + * + * XXX: the fork code currently has no way to return an "out of + * memory, try again" error code since uvm_fork [fka vm_fork] + * is a void function. + * + * [B] new page tables pages (PTP) + * - plan 1: call uvm_pagealloc() + * => success: zero page, add to pm_pdir + * => failure: we are out of free vm_pages + * - plan 2: using a linked LIST of active pmaps we attempt + * to "steal" a PTP from another process. we lock + * the target pmap with simple_lock_try so that if it is + * busy we do not block. + * => success: remove old mappings, zero, add to pm_pdir + * => failure: highly unlikely + * - plan 3: panic + * + * note: for kernel PTPs, we start with NKPTP of them. as we map + * kernel memory (at uvm_map time) we check to see if we've grown + * the kernel pmap. if so, we call the optional function + * pmap_growkernel() to grow the kernel PTPs in advance. + * + * [C] pv_entry structures + * - plan 1: try to allocate one off the free list + * => success: done! + * => failure: no more free pv_entrys on the list + * - plan 2: try to allocate a new pv_page to add a chunk of + * pv_entrys to the free list + * [a] obtain a free, unmapped, VA in kmem_map. either + * we have one saved from a previous call, or we allocate + * one now using a "vm_map_lock_try" in uvm_map + * => success: we have an unmapped VA, continue to [b] + * => failure: unable to lock kmem_map or out of VA in it. + * move on to plan 3. + * [b] allocate a page in kmem_object for the VA + * => success: map it in, free the pv_entry's, DONE! + * => failure: kmem_object locked, no free vm_pages, etc. + * save VA for later call to [a], go to plan 3. + * - plan 3: using the pv_entry/pv_head lists find a pv_entry + * structure that is part of a non-kernel lockable pmap + * and "steal" that pv_entry by removing the mapping + * and reusing that pv_entry. + * => success: done + * => failure: highly unlikely: unable to lock and steal + * pv_entry + * - plan 4: we panic. + */ + +/* + * locking + * + * we have the following locks that we must contend with: + * + * "normal" locks: + * + * - pmap_main_lock + * this lock is used to prevent deadlock and/or provide mutex + * access to the pmap system. most operations lock the pmap + * structure first, then they lock the pv_lists (if needed). + * however, some operations such as pmap_page_protect lock + * the pv_lists and then lock pmaps. in order to prevent a + * cycle, we require a mutex lock when locking the pv_lists + * first. thus, the "pmap = >pv_list" lockers must gain a + * read-lock on pmap_main_lock before locking the pmap. and + * the "pv_list => pmap" lockers must gain a write-lock on + * pmap_main_lock before locking. since only one thread + * can write-lock a lock at a time, this provides mutex. + * + * "simple" locks: + * + * - pmap lock (per pmap, part of uvm_object) + * this lock protects the fields in the pmap structure including + * the non-kernel PDEs in the PDP, and the PTEs. it also locks + * in the alternate PTE space (since that is determined by the + * entry in the PDP). + * + * - pvh_lock (per pv_head) + * this lock protects the pv_entry list which is chained off the + * pv_head structure for a specific managed PA. it is locked + * when traversing the list (e.g. adding/removing mappings, + * syncing R/M bits, etc.) + * + * - pvalloc_lock + * this lock protects the data structures which are used to manage + * the free list of pv_entry structures. + * + * - pmaps_lock + * this lock protects the list of active pmaps (headed by "pmaps"). + * we lock it when adding or removing pmaps from this list. + * + * - pmap_copy_page_lock + * locks the tmp kernel PTE mappings we used to copy data + * + * - pmap_zero_page_lock + * locks the tmp kernel PTE mapping we use to zero a page + * + * - pmap_tmpptp_lock + * locks the tmp kernel PTE mapping we use to look at a PTP + * in another process + * + * XXX: would be nice to have per-CPU VAs for the above 4 + */ + +/* + * locking data structures + */ + +#if NCPU > 1 +struct lock pmap_main_lock; +simple_lock_data_t pvalloc_lock; +simple_lock_data_t pmaps_lock; +simple_lock_data_t pmap_copy_page_lock; +simple_lock_data_t pmap_zero_page_lock; +simple_lock_date_t pmap_tmpptp_lock; + +#define PMAP_MAP_TO_HEAD_LOCK() \ + lockmgr(&pmap_main_lock, LK_SHARED, (void *) 0, curproc) +#define PMAP_MAP_TO_HEAD_UNLOCK() \ + lockmgr(&pmap_main_lock, LK_RELEASE, (void *) 0, curproc) + +#define PMAP_HEAD_TO_MAP_LOCK() \ + lockmgr(&pmap_main_lock, LK_EXCLUSIVE, (void *)0, curproc) +#define PMAP_HEAD_TO_MAP_UNLOCK() \ + lockmgr(&pmap_main_lock, LK_RELEASE, (void *) 0, curproc) + +#else + +#define PMAP_MAP_TO_HEAD_LOCK() /* null */ +#define PMAP_MAP_TO_HEAD_UNLOCK() /* null */ + +#define PMAP_HEAD_TO_MAP_LOCK() /* null */ +#define PMAP_HEAD_TO_MAP_UNLOCK() /* null */ + +#endif + +/* + * global data structures + */ + +struct pmap kernel_pmap_store; /* the kernel's pmap (proc0) */ + +/* + * nkpde is the number of kernel PTPs allocated for the kernel at + * boot time (NKPTP is a compile time override). this number can + * grow dynamically as needed (but once allocated, we never free + * kernel PTPs). + */ + +int nkpde = NKPTP; +#ifdef NKPDE +#error "obsolete NKPDE: use NKPTP" +#endif + +/* + * pmap_pg_g: if our processor supports PG_G in the PTE then we + * set pmap_pg_g to PG_G (otherwise it is zero). + */ + +int pmap_pg_g = 0; + +/* + * i386 physical memory comes in a big contig chunk with a small + * hole toward the front of it... the following 4 vm_offset_t's + * (shared with machdep.c) describe the physical address space + * of this machine. + */ +vm_offset_t avail_start;/* PA of first available physical page */ +vm_offset_t avail_end; /* PA of last available physical page */ +vm_offset_t hole_start; /* PA of start of "hole" */ +vm_offset_t hole_end; /* PA of end of "hole" */ + +/* + * other data structures + */ + +static pt_entry_t protection_codes[8]; /* maps MI prot to i386 prot code */ +static boolean_t pmap_initialized = FALSE; /* pmap_init done yet? */ + +/* + * the following two vm_offset_t's are used during system startup + * to keep track of how much of the kernel's VM space we have used. + * once the system is started, the management of the remaining kernel + * VM space is turned over to the kernel_map vm_map. + */ + +static vm_offset_t virtual_avail; /* VA of first free KVA */ +static vm_offset_t virtual_end; /* VA of last free KVA */ + + +/* + * pv_page management structures: locked by pvalloc_lock + */ + +TAILQ_HEAD(pv_pagelist, pv_page); +static struct pv_pagelist pv_freepages; /* list of pv_pages with free entrys */ +static struct pv_pagelist pv_unusedpgs; /* list of unused pv_pages */ +static int pv_nfpvents; /* # of free pv entries */ +static struct pv_page *pv_initpage; /* bootstrap page from kernel_map */ +static vm_offset_t pv_cachedva; /* cached VA for later use */ + +#define PVE_LOWAT (PVE_PER_PVPAGE / 2) /* free pv_entry low water mark */ +#define PVE_HIWAT (PVE_LOWAT + (PVE_PER_PVPAGE * 2)) + /* high water mark */ + +/* + * linked list of all non-kernel pmaps + */ + +static struct pmap_head pmaps; +static struct pmap *pmaps_hand = NULL; /* used by pmap_steal_ptp */ + +/* + * special VAs and the PTEs that map them + */ + +static pt_entry_t *csrc_pte, *cdst_pte, *zero_pte, *ptp_pte; +static caddr_t csrcp, cdstp, zerop, ptpp; +caddr_t vmmap; /* XXX: used by mem.c... it should really uvm_map_reserve it */ + +extern vm_offset_t msgbuf_vaddr, msgbuf_paddr; +extern vm_offset_t idt_vaddr, idt_paddr; /* we allocate IDE early */ + +#if defined(I586_CPU) +/* stuff to fix the pentium f00f bug */ +extern vm_offset_t pentium_idt_vaddr; +#endif + + +/* + * local prototypes + */ + +static struct pv_entry *pmap_add_pvpage __P((struct pv_page *, boolean_t)); +static struct vm_page *pmap_alloc_ptp __P((struct pmap *, int, boolean_t)); +static struct pv_entry *pmap_alloc_pv __P((int)); /* see codes below */ +#define ALLOCPV_NEED 0 /* need PV now */ +#define ALLOCPV_TRY 1 /* just try to allocate, don't steal */ +#define ALLOCPV_NONEED 2 /* don't need PV, just growing cache */ +static struct pv_entry *pmap_alloc_pvpage __P((int)); +static void pmap_enter_pv __P((struct pv_head *, + struct pv_entry *, struct pmap *, + vm_offset_t, struct vm_page *)); +static void pmap_free_pv __P((struct pv_entry *)); +static void pmap_free_pvs __P((struct pv_entry *)); +static void pmap_free_pv_doit __P((struct pv_entry *)); +static void pmap_free_pvpage __P((void)); +static struct vm_page *pmap_get_ptp __P((struct pmap *, int, boolean_t)); +static boolean_t pmap_is_curpmap __P((struct pmap *)); +static void pmap_kremove1 __P((vm_offset_t)); +static pt_entry_t *pmap_map_ptes __P((struct pmap *)); +static struct pv_entry *pmap_remove_pv __P((struct pv_head *, struct pmap *, + vm_offset_t)); +static boolean_t pmap_remove_pte __P((struct pmap *, struct vm_page *, + pt_entry_t *, vm_offset_t)); +static void pmap_remove_ptes __P((struct pmap *, + struct pmap_remove_record *, + struct vm_page *, vm_offset_t, + vm_offset_t, vm_offset_t)); +static struct vm_page *pmap_steal_ptp __P((struct uvm_object *, + vm_offset_t)); +static vm_offset_t pmap_tmpmap_pa __P((vm_offset_t)); +static pt_entry_t *pmap_tmpmap_pvepte __P((struct pv_entry *)); +static void pmap_tmpunmap_pa __P((void)); +static void pmap_tmpunmap_pvepte __P((struct pv_entry *)); +static boolean_t pmap_transfer_ptes __P((struct pmap *, + struct pmap_transfer_location *, + struct pmap *, + struct pmap_transfer_location *, + int, boolean_t)); +static boolean_t pmap_try_steal_pv __P((struct pv_head *, + struct pv_entry *, + struct pv_entry *)); +static void pmap_unmap_ptes __P((struct pmap *)); + +/* + * p m a p i n l i n e h e l p e r f u n c t i o n s + */ + +/* + * pmap_is_curpmap: is this pmap the one currently loaded [in %cr3]? + * of course the kernel is always loaded + */ + +__inline static boolean_t pmap_is_curpmap(pmap) + +struct pmap *pmap; + +{ + return((pmap == pmap_kernel()) || (pmap->pm_pdirpa == (vm_offset_t) rcr3())); +} + +/* + * pmap_tmpmap_pa: map a page in for tmp usage + * + * => returns with pmap_tmpptp_lock held + */ + +__inline static vm_offset_t pmap_tmpmap_pa(pa) + +vm_offset_t pa; + +{ + simple_lock(&pmap_tmpptp_lock); +#if defined(DIAGNOSTIC) + if (*ptp_pte) + panic("pmap_tmpmap_pa: ptp_pte in use?"); +#endif + *ptp_pte = PG_V | PG_RW | pa; /* always a new mapping */ + return((vm_offset_t)ptpp); +} + +/* + * pmap_tmpunmap_pa: unmap a tmp use page (undoes pmap_tmpmap_pa) + * + * => we release pmap_tmpptp_lock + */ + +__inline static void pmap_tmpunmap_pa() + +{ +#if defined(DIAGNOSTIC) + if (!pmap_valid_entry(*ptp_pte)) + panic("pmap_tmpunmap_pa: our pte invalid?"); +#endif + *ptp_pte = 0; /* zap! */ + pmap_update_pg((vm_offset_t)ptpp); + simple_unlock(&pmap_tmpptp_lock); + return; +} + +/* + * pmap_tmpmap_pvepte: get a quick mapping of a PTE for a pv_entry + * + * => do NOT use this on kernel mappings [why? because pv_ptp may be NULL] + * => we may grab pmap_tmpptp_lock and return with it held + */ + +__inline static pt_entry_t *pmap_tmpmap_pvepte(pve) + +struct pv_entry *pve; + +{ +#ifdef DIAGNOSTIC + if (pve->pv_pmap == pmap_kernel()) + panic("pmap_tmpmap_pvepte: attempt to map kernel"); +#endif + + /* is it current pmap? use direct mapping... */ + if (pmap_is_curpmap(pve->pv_pmap)) + return(vtopte(pve->pv_va)); + + return( ((pt_entry_t *) pmap_tmpmap_pa(VM_PAGE_TO_PHYS(pve->pv_ptp))) + + ptei((unsigned)pve->pv_va) ); +} + +/* + * pmap_tmpunmap_pvepte: release a mapping obtained with pmap_tmpmap_pvepte + * + * => we will release pmap_tmpptp_lock if we hold it + */ + +__inline static void pmap_tmpunmap_pvepte(pve) + +struct pv_entry *pve; + +{ + /* was it current pmap? if so, return */ + if (pmap_is_curpmap(pve->pv_pmap)) + return; + + pmap_tmpunmap_pa(); + return; +} + +/* + * pmap_map_ptes: map a pmap's PTEs into KVM and lock them in + * + * => we lock enough pmaps to keep things locked in + * => must be undone with pmap_unmap_ptes before returning + */ + +__inline static pt_entry_t *pmap_map_ptes(pmap) + +struct pmap *pmap; + +{ + pd_entry_t opde; + + /* if curpmap then we are always mapped */ + if (pmap_is_curpmap(pmap)) { + simple_lock(&pmap->pm_obj.vmobjlock); + return(PTE_BASE); + } + + /* need to lock both curpmap and pmap: use ordered locking */ + if ((unsigned) pmap < (unsigned) curpcb->pcb_pmap) { + simple_lock(&pmap->pm_obj.vmobjlock); /* 1: pmap */ + simple_lock(&curpcb->pcb_pmap->pm_obj.vmobjlock); /* 2: curpmap */ + } else { + simple_lock(&curpcb->pcb_pmap->pm_obj.vmobjlock); /* 1: curpmap */ + simple_lock(&pmap->pm_obj.vmobjlock); /* 2: pmap */ + } + + /* need to load a new alternate pt space into curpmap? */ + opde = *APDP_PDE; + if (!pmap_valid_entry(opde) || (opde & PG_FRAME) != pmap->pm_pdirpa) { + *APDP_PDE = (pd_entry_t) (pmap->pm_pdirpa | PG_RW | PG_V); /* zap! */ + if (pmap_valid_entry(opde)) + pmap_update(); + } + return(APTE_BASE); +} + +/* + * pmap_unmap_ptes: unlock the PTE mapping of "pmap" + */ + +__inline static void pmap_unmap_ptes(pmap) + +struct pmap *pmap; + +{ + if (pmap_is_curpmap(pmap)) { + simple_unlock(&pmap->pm_obj.vmobjlock); + return; + } else { + simple_unlock(&pmap->pm_obj.vmobjlock); + simple_unlock(&curpcb->pcb_pmap->pm_obj.vmobjlock); + return; + } +} + +/* + * p m a p k e n t e r f u n c t i o n s + * + * functions to quickly enter/remove pages from the kernel address + * space. pmap_kremove/pmap_kenter_pgs are exported to MI kernel. + * we make use of the recursive PTE mappings. + */ + +/* + * pmap_kenter_pa: enter a kernel mapping without R/M (pv_entry) tracking + * + * => no need to lock anything, assume va is already allocated + * => should be faster than normal pmap enter function + */ + +/* + * resides in pmap.h + */ + +/* + * pmap_kremove: remove a kernel mapping(s) without R/M (pv_entry) tracking + * + * => no need to lock anything + * => caller must dispose of any vm_page mapped in the va range + * => note: not an inline function + * => we assume the va is page aligned and the len is a multiple of NBPG + * => we assume kernel only unmaps valid addresses and thus don't bother + * checking the valid bit before doing TLB flushing + */ + +void pmap_kremove(va, len) + +vm_offset_t va; +vm_size_t len; + +{ + pt_entry_t *pte; + len = len / NBPG; + + for ( /* null */ ; len ; len--, va += NBPG) { + + pte = vtopte(va); + + /* + * XXXCDC: we can get PVLIST if the mapping was created by uvm_fault + * as part of a pageable kernel mapping. in that case we need to + * update the pvlists, so we punt the problem to the more powerful + * (and complex) pmap_remove() function. this is kind of ugly... + * need to rethink this a bit. + */ + if (*pte & PG_PVLIST) { + pmap_remove(pmap_kernel(), va, va + (len*NBPG)); /* punt ... */ + return; + } + + *pte = 0; /* zap! */ +#if defined(I386_CPU) + if (cpu_class != CPUCLASS_386) +#endif + pmap_update_pg(va); + + } + +#if defined(I386_CPU) + if (cpu_class == CPUCLASS_386) + pmap_update(); +#endif + +} + +/* + * pmap_kremove1: inline one page version of above + */ + +__inline static void pmap_kremove1(va) + +vm_offset_t va; + +{ + pt_entry_t *pte; + pte = vtopte(va); + *pte = 0; /* zap! */ + pmap_update_pg(va); +} + +/* + * pmap_kenter_pgs: enter in a number of vm_pages + */ + +void pmap_kenter_pgs(va, pgs, npgs) + +vm_offset_t va; +struct vm_page **pgs; +int npgs; + +{ + pt_entry_t *pte, opte; + int lcv; + vm_offset_t tva; +#if defined(I386_CPU) + boolean_t need_update = FALSE; +#endif + + for (lcv = 0 ; lcv < npgs ; lcv++) { + tva = va + lcv * NBPG; + pte = vtopte(tva); + opte = *pte; + *pte = VM_PAGE_TO_PHYS(pgs[lcv]) | PG_RW | PG_V | pmap_pg_g; /* zap! */ +#if defined(I386_CPU) + if (cpu_class == CPUCLASS_386) { + if (pmap_valid_entry(opte)) + need_update = TRUE; + continue; + } +#endif + if (pmap_valid_entry(opte)) + pmap_update_pg(tva); + } + +#if defined(I386_CPU) + if (need_update && cpu_class == CPUCLASS_386) + pmap_update(); +#endif + +} + +/* + * p m a p i n i t f u n c t i o n s + * + * pmap_bootstrap and pmap_init are called during system startup + * to init the pmap module. pmap_bootstrap() does a low level + * init just to get things rolling. pmap_init() finishes the job. + */ + +/* + * pmap_bootstrap: get the system in a state where it can run with VM + * properly enabled (called before main()). the VM system is + * fully init'd later... + * + * => on i386, locore.s has already enabled the MMU by allocating + * a PDP for the kernel, and nkpde PTP's for the kernel. + * => kva_start is the first free virtual address in kernel space + * => we make use of the global vars from machdep.c: + * avail_start, avail_end, hole_start, hole_end + */ + +void pmap_bootstrap(kva_start) + +vm_offset_t kva_start; + +{ + struct pmap *kpm; + vm_offset_t kva; + pt_entry_t *pte; + + /* + * set the page size (default value is 4K which is ok) + */ + + uvm_setpagesize(); + + /* + * a quick sanity check + */ + + if (PAGE_SIZE != NBPG) + panic("pmap_bootstrap: PAGE_SIZE != NBPG"); + + /* + * use the very last page of physical memory for the message buffer + */ + + avail_end -= i386_round_page(MSGBUFSIZE); + msgbuf_paddr = avail_end; + + /* + * set up our local static global vars that keep track of the + * usage of KVM before kernel_map is set up + */ + + virtual_avail = kva_start; /* first free KVA */ + virtual_end = VM_MAX_KERNEL_ADDRESS; /* last KVA */ + + /* + * set up protection_codes: we need to be able to convert from + * a MI protection code (some combo of VM_PROT...) to something + * we can jam into a i386 PTE. + */ + + protection_codes[VM_PROT_NONE] = 0; /* --- */ + protection_codes[VM_PROT_EXECUTE] = PG_RO; /* --x */ + protection_codes[VM_PROT_READ] = PG_RO; /* -r- */ + protection_codes[VM_PROT_READ|VM_PROT_EXECUTE] = PG_RO; /* -rx */ + protection_codes[VM_PROT_WRITE] = PG_RW; /* w-- */ + protection_codes[VM_PROT_WRITE|VM_PROT_EXECUTE] = PG_RW; /* w-x */ + protection_codes[VM_PROT_WRITE|VM_PROT_READ] = PG_RW; /* wr- */ + protection_codes[VM_PROT_ALL] = PG_RW; /* wrx */ + + /* + * now we init the kernel's pmap + * + * the kernel pmap's pm_obj is not used for much. however, in + * user pmaps the pm_obj contains the list of active PTPs. + * the pm_obj currently does not have a pager. it might be possible + * to add a pager that would allow a process to read-only mmap its + * own page tables (fast user level vtophys?). this may or may not + * be useful. + */ + + kpm = pmap_kernel(); + simple_lock_init(&kpm->pm_obj.vmobjlock); + kpm->pm_obj.pgops = NULL; + TAILQ_INIT(&kpm->pm_obj.memq); + kpm->pm_obj.uo_npages = 0; + kpm->pm_obj.uo_refs = 1; + bzero(&kpm->pm_list, sizeof(kpm->pm_list)); /* pm_list not used */ + kpm->pm_pdir = (pd_entry_t *)(proc0.p_addr->u_pcb.pcb_cr3 + KERNBASE); + kpm->pm_pdirpa = (u_int32_t) proc0.p_addr->u_pcb.pcb_cr3; + kpm->pm_stats.wired_count = kpm->pm_stats.resident_count = + i386_btop(kva_start - VM_MIN_KERNEL_ADDRESS); + /* + * the above is just a rough estimate and not critical to the proper + * operation of the system. + */ + curpcb->pcb_pmap = kpm; /* proc0's pcb */ + + /* + * enable global TLB entries if they are supported + */ + + if (cpu_feature & CPUID_PGE) { + + lcr4(rcr4() | CR4_PGE); /* enable hardware (via %cr4) */ + pmap_pg_g = PG_G; /* enable software */ + + /* add PG_G attribute to already mapped kernel pages */ + for (kva = VM_MIN_KERNEL_ADDRESS ; kva < virtual_avail ; kva += NBPG) + if (pmap_valid_entry(PTE_BASE[i386_btop(kva)])) + PTE_BASE[i386_btop(kva)] |= PG_G; + } + + /* + * now we allocate the "special" VAs which are used for tmp mappings + * by the pmap (and other modules). we allocate the VAs by advancing + * virtual_avail (note that there are no pages mapped at these VAs). + * we find the PTE that maps the allocated VA via the linear PTE mapping. + */ + + pte = PTE_BASE + i386_btop(virtual_avail); + + csrcp = (caddr_t) virtual_avail; csrc_pte = pte; /* allocate */ + virtual_avail += NBPG; pte++; /* advance */ + + cdstp = (caddr_t) virtual_avail; cdst_pte = pte; + virtual_avail += NBPG; pte++; + + zerop = (caddr_t) virtual_avail; zero_pte = pte; + virtual_avail += NBPG; pte++; + + ptpp = (caddr_t) virtual_avail; ptp_pte = pte; + virtual_avail += NBPG; pte++; + + /* XXX: vmmap used by mem.c... should be uvm_map_reserve */ + vmmap = (char *)virtual_avail; /* don't need pte */ + virtual_avail += NBPG; pte++; + + msgbuf_vaddr = virtual_avail; /* don't need pte */ + virtual_avail += round_page(MSGBUFSIZE); pte++; + + idt_vaddr = virtual_avail; /* don't need pte */ + virtual_avail += NBPG; pte++; + avail_end -= NBPG; + idt_paddr = avail_end; + +#if defined(I586_CPU) + /* pentium f00f bug stuff */ + pentium_idt_vaddr = virtual_avail; /* don't need pte */ + virtual_avail += NBPG; pte++; +#endif + + + /* + * now we reserve some VM for mapping pages when doing a crash dump + */ + + virtual_avail = reserve_dumppages(virtual_avail); + + /* + * init the static-global locks and global lists. + */ + +#if NCPU > 1 + lock_init(&pmap_main_lock, FALSE); + simple_lock_init(&pvalloc_lock); + simple_lock_init(&pmaps_lock); + simple_lock_init(&pmap_copy_page_lock); + simple_lock_init(&pmap_zero_page_lock); + simple_lock_init(&pmap_tmpptp_lock); +#endif + LIST_INIT(&pmaps); + TAILQ_INIT(&pv_freepages); + TAILQ_INIT(&pv_unusedpgs); + + /* + * we must call uvm_page_physload() after we are done playing with + * virtual_avail but before we call pmap_steal_memory. [i.e. here] + * this call tells the VM system how much physical memory it + * controls. + */ + + if (avail_start < hole_start) /* any free memory before the hole? */ + uvm_page_physload(atop(avail_start), atop(hole_start), + atop(avail_start), atop(hole_start)); + + uvm_page_physload(atop(hole_end), atop(avail_end), + atop(hole_end), atop(avail_end)); + + /* + * ensure the TLB is sync'd with reality by flushing it... + */ + + pmap_update(); + + /* + * done! + */ +} + +/* + * pmap_init: called from uvm_init, our job is to get the pmap + * system ready to manage mappings... this mainly means initing + * the pv_entry stuff. + */ + +void pmap_init() + +{ + int npages, lcv; + vm_offset_t addr; + vm_size_t s; + + /* + * compute the number of pages we have and then allocate RAM + * for each pages' pv_head and saved attributes. + */ + + npages = 0; + for (lcv = 0 ; lcv < vm_nphysseg ; lcv++) + npages += (vm_physmem[lcv].end - vm_physmem[lcv].start); + s = (vm_size_t) (sizeof(struct pv_head) * npages + + sizeof(char) * npages); + s = round_page(s); /* round up */ + addr = (vm_offset_t) uvm_km_zalloc(kernel_map, s); + if (addr == NULL) + panic("pmap_init: unable to allocate pv_heads"); + + /* + * init all pv_head's and attrs in one bzero + */ + + /* allocate pv_head stuff first */ + for (lcv = 0 ; lcv < vm_nphysseg ; lcv++) { + vm_physmem[lcv].pmseg.pvhead = (struct pv_head *) addr; + addr = (vm_offset_t)(vm_physmem[lcv].pmseg.pvhead + + (vm_physmem[lcv].end - vm_physmem[lcv].start)); + } + + /* now allocate attrs */ + for (lcv = 0 ; lcv < vm_nphysseg ; lcv++) { + vm_physmem[lcv].pmseg.attrs = (char *) addr; + addr = (vm_offset_t)(vm_physmem[lcv].pmseg.attrs + + (vm_physmem[lcv].end - vm_physmem[lcv].start)); + } + + /* + * now we need to free enough pv_entry structures to allow us to get + * the kmem_map/kmem_object allocated and inited (done after this + * function is finished). to do this we allocate one bootstrap page out + * of kernel_map and use it to provide an initial pool of pv_entry + * structures. we never free this page. + */ + + pv_initpage = (struct pv_page *) uvm_km_alloc(kernel_map, NBPG); + if (pv_initpage == NULL) + panic("pmap_init: pv_initpage"); + pv_cachedva = NULL; /* a VA we have allocated but not used yet */ + pv_nfpvents = 0; + (void) pmap_add_pvpage(pv_initpage, FALSE); + + /* + * done: pmap module is up (and ready for business) + */ + + pmap_initialized = TRUE; +} + +/* + * p v _ e n t r y f u n c t i o n s + */ + +/* + * pv_entry allocation functions: + * the main pv_entry allocation functions are: + * pmap_alloc_pv: allocate a pv_entry structure + * pmap_free_pv: free one pv_entry + * pmap_free_pvs: free a list of pv_entrys + * + * the rest are helper functions + */ + +/* + * pmap_alloc_pv: inline function to allocate a pv_entry structure + * => we lock pvalloc_lock + * => if we fail, we call out to pmap_alloc_pvpage + * => 3 modes: + * ALLOCPV_NEED = we really need a pv_entry, even if we have to steal it + * ALLOCPV_TRY = we want a pv_entry, but not enough to steal + * ALLOCPV_NONEED = we are trying to grow our free list, don't really need + * one now + * + * "try" is for optional functions like pmap_copy(). + */ + +__inline static struct pv_entry *pmap_alloc_pv(mode) + +int mode; + +{ + struct pv_page *pvpage; + struct pv_entry *pv; + + simple_lock(&pvalloc_lock); + + if (pv_freepages.tqh_first != NULL) { + + pvpage = pv_freepages.tqh_first; + pvpage->pvinfo.pvpi_nfree--; + if (pvpage->pvinfo.pvpi_nfree == 0) { /* nothing left in this one? */ + TAILQ_REMOVE(&pv_freepages, pvpage, pvinfo.pvpi_list); + } + pv = pvpage->pvinfo.pvpi_pvfree; +#ifdef DIAGNOSTIC + if (pv == NULL) + panic("pmap_alloc_pv: pvpi_nfree off"); +#endif + pvpage->pvinfo.pvpi_pvfree = pv->pv_next; + + pv_nfpvents--; /* took one from pool */ + + } else { + + pv = NULL; /* need more of them */ + + } + + /* + * if below low water mark or we didn't get a pv_entry we try and + * create more pv_entrys ... + */ + + if (pv_nfpvents < PVE_LOWAT || pv == NULL) { + + if (pv == NULL) + pv = pmap_alloc_pvpage((mode == ALLOCPV_TRY) ? mode : ALLOCPV_NEED); + else + (void) pmap_alloc_pvpage(ALLOCPV_NONEED); + + } + + simple_unlock(&pvalloc_lock); + return(pv); +} + +/* + * pmap_alloc_pvpage: maybe allocate a new pvpage + * + * if need_entry is false: try and allocate a new pv_page + * if need_entry is true: try and allocate a new pv_page and return a + * new pv_entry from it. if we are unable to allocate a pv_page + * we make a last ditch effort to steal a pv_page from some other + * mapping. if that fails, we panic... + * + * => we assume that the caller holds pvalloc_lock + */ + +static struct pv_entry *pmap_alloc_pvpage(mode) + +int mode; + +{ + struct vm_page *pg; + struct pv_page *pvpage; + int lcv, idx, npg, s; + struct pv_entry *pv, *cpv, *prevpv; + + /* + * if we need_entry and we've got unused pv_pages, allocate from there + */ + + if (mode != ALLOCPV_NONEED && pv_unusedpgs.tqh_first != NULL) { + + /* move it to pv_freepages list */ + pvpage = pv_unusedpgs.tqh_first; + TAILQ_REMOVE(&pv_unusedpgs, pvpage, pvinfo.pvpi_list); + TAILQ_INSERT_HEAD(&pv_freepages, pvpage, pvinfo.pvpi_list); + + /* allocate a pv_entry */ + pvpage->pvinfo.pvpi_nfree--; /* can't go to zero */ + pv = pvpage->pvinfo.pvpi_pvfree; +#ifdef DIAGNOSTIC + if (pv == NULL) + panic("pmap_alloc_pvpage: pvpi_nfree off"); +#endif + pvpage->pvinfo.pvpi_pvfree = pv->pv_next; + + pv_nfpvents--; /* took one from pool */ + return(pv); + } + + /* + * see if we've got a cached unmapped VA that we can map a page in. + * if not, try to allocate one. + */ + + s = splimp(); /* must protect kmem_map/kmem_object with splimp! */ + if (pv_cachedva == NULL) { + + pv_cachedva = uvm_km_kmemalloc(kmem_map, uvmexp.kmem_object, NBPG, + UVM_KMF_TRYLOCK|UVM_KMF_VALLOC); + + if (pv_cachedva == NULL) { + splx(s); + goto steal_one; + } + + } + + /* + * we have a VA, now let's try and allocate a page in the object + * note: we are still holding splimp to protect kmem_object + */ + + if (!simple_lock_try(&uvmexp.kmem_object->vmobjlock)) { + splx(s); + goto steal_one; + } + + pg = uvm_pagealloc(uvmexp.kmem_object, pv_cachedva - vm_map_min(kernel_map), + NULL); + if (pg) + pg->flags &= ~PG_BUSY; /* never busy */ + + simple_unlock(&uvm.kmem_object->vmobjlock); + splx(s); + /* splimp now dropped */ + + if (pg == NULL) + goto steal_one; + + /* + * add a mapping for our new pv_page and free its entrys (save one!) + */ + + pmap_kenter_pa(pv_cachedva, VM_PAGE_TO_PHYS(pg), VM_PROT_ALL); + pvpage = (struct pv_page *) pv_cachedva; + pv_cachedva = NULL; + + /* + * done! + */ + + return(pmap_add_pvpage(pvpage, mode != ALLOCPV_NONEED)); + +steal_one: + /* + * if we don't really need a pv_entry right now, we can just return. + */ + + if (mode != ALLOCPV_NEED) + return(NULL); + + /* + * last ditch effort! we couldn't allocate a free page to make + * more pv_entrys so we try and steal one from someone else. + */ + + pv = NULL; + for (lcv = 0 ; pv == NULL && lcv < vm_nphysseg ; lcv++) { + npg = vm_physmem[lcv].end - vm_physmem[lcv].start; + for (idx = 0 ; idx < npg ; idx++) { + if (vm_physmem[lcv].pmseg.pvhead->pvh_list == NULL) + continue; /* spot check */ + if (!simple_lock_try(&vm_physmem[lcv].pmseg.pvhead->pvh_lock)) + continue; + cpv = prevpv = vm_physmem[lcv].pmseg.pvhead->pvh_list; + while (cpv) { + if (pmap_try_steal_pv(vm_physmem[lcv].pmseg.pvhead, cpv, prevpv)) + break; + prevpv = cpv; + cpv = cpv->pv_next; + } + simple_unlock(&vm_physmem[lcv].pmseg.pvhead->pvh_lock); + /* got one? break out of the loop! */ + if (cpv) { + pv = cpv; + break; + } + } + } + + /* + * done! if we didn't get a pv then we panic :( + */ + + if (pv == NULL) + panic("pmap_alloc_pvpage"); + + return(pv); +} + +/* + * pmap_try_steal_pv: try and steal a pv_entry from a pmap + * + * => return true if we did it! + */ + +static boolean_t pmap_try_steal_pv(pvh, cpv, prevpv) + +struct pv_head *pvh; +struct pv_entry *cpv, *prevpv; + +{ + pt_entry_t *ptep; /* pointer to a PTE */ + + /* + * we never steal kernel mappings or mappings from pmaps we can't lock + */ + + if (cpv->pv_pmap == pmap_kernel() || + !simple_lock_try(&cpv->pv_pmap->pm_obj.vmobjlock)) + return(FALSE); + + /* + * yes, we can try and steal it. first we need to remove the + * mapping from the pmap. + */ + + ptep = pmap_tmpmap_pvepte(cpv); + if (*ptep & PG_W) { + ptep = NULL; /* wired page, avoid stealing this one */ + } else { + *ptep = 0; /* zap! */ + if (pmap_is_curpmap(cpv->pv_pmap)) + pmap_update_pg(cpv->pv_va); + pmap_tmpunmap_pvepte(cpv); + } + + if (ptep == NULL) { + simple_unlock(&cpv->pv_pmap->pm_obj.vmobjlock); + return(FALSE); /* wired page, abort! */ + } + + cpv->pv_pmap->pm_stats.resident_count--; + if (cpv->pv_ptp && cpv->pv_ptp->wire_count) /* drop PTP's wired count */ + cpv->pv_ptp->wire_count--; + + /* + * XXX: if wire_count goes to one the PTP could be freed, however, + * we'd have to lock the page queues (etc.) to do that and it could + * cause deadlock headaches. besides, the pmap we just stole from + * may want the mapping back anyway, so leave the PTP around. + */ + + /* + * now we need to remove the entry from the pvlist + */ + if (cpv == pvh->pvh_list) + pvh->pvh_list = cpv->pv_next; + else + prevpv->pv_next = cpv->pv_next; + + /* + * we did it, return. + */ + + return(TRUE); +} + +/* + * pmap_add_pvpage: add a pv_page's pv_entrys to the free list + * + * => caller must hold pvalloc_lock + * => if need_entry is true, we allocate and return one pv_entry + */ + +static struct pv_entry *pmap_add_pvpage(pvp, need_entry) + +struct pv_page *pvp; +boolean_t need_entry; + +{ + int tofree, lcv; + + /* do we need to return one? */ + tofree = (need_entry) ? PVE_PER_PVPAGE - 1 : PVE_PER_PVPAGE; + + pvp->pvinfo.pvpi_pvfree = NULL; + pvp->pvinfo.pvpi_nfree = tofree; + for (lcv = 0 ; lcv < tofree ; lcv++) { + pvp->pvents[lcv].pv_next = pvp->pvinfo.pvpi_pvfree; + pvp->pvinfo.pvpi_pvfree = &pvp->pvents[lcv]; + } + if (need_entry) + TAILQ_INSERT_TAIL(&pv_freepages, pvp, pvinfo.pvpi_list); + else + TAILQ_INSERT_TAIL(&pv_unusedpgs, pvp, pvinfo.pvpi_list); + pv_nfpvents += tofree; + + return((need_entry) ? &pvp->pvents[lcv] : NULL); +} + +/* + * pmap_free_pv_doit: actually free a pv_entry + * + * => do not call this directly! instead use either + * 1. pmap_free_pv ==> free a single pv_entry + * 2. pmap_free_pvs => free a list of pv_entrys + * => we must be holding pvalloc_lock + */ + +__inline static void pmap_free_pv_doit(pv) + +struct pv_entry *pv; + +{ + struct pv_page *pvp; + + pvp = (struct pv_page *) i386_trunc_page(pv); + pv_nfpvents++; + pvp->pvinfo.pvpi_nfree++; + + /* nfree == 1 => fully allocated page just became partly allocated */ + if (pvp->pvinfo.pvpi_nfree == 1) { + TAILQ_INSERT_HEAD(&pv_freepages, pvp, pvinfo.pvpi_list); + } + + /* free it */ + pv->pv_next = pvp->pvinfo.pvpi_pvfree; + pvp->pvinfo.pvpi_pvfree = pv; + + /* + * are all pv_page's pv_entry's free? move it to unused queue. + */ + if (pvp->pvinfo.pvpi_nfree == PVE_PER_PVPAGE) { + TAILQ_REMOVE(&pv_freepages, pvp, pvinfo.pvpi_list); + TAILQ_INSERT_HEAD(&pv_unusedpgs, pvp, pvinfo.pvpi_list); + } +} + +/* + * pmap_free_pv: free a single pv_entry + * + * => we gain the pvalloc_lock + */ + +__inline static void pmap_free_pv(pv) + +struct pv_entry *pv; + +{ + simple_lock(&pvalloc_lock); + pmap_free_pv_doit(pv); + + if (pv_nfpvents > PVE_HIWAT && pv_unusedpgs.tqh_first != NULL) + pmap_free_pvpage(); + + simple_unlock(&pvalloc_lock); +} + +/* + * pmap_free_pvs: free a list of pv_entrys + * + * => we gain the pvalloc_lock + */ + +__inline static void pmap_free_pvs(pvs) + +struct pv_entry *pvs; + +{ + struct pv_entry *nextpv; + + simple_lock(&pvalloc_lock); + + for ( /* null */ ; pvs != NULL ; pvs = nextpv) { + nextpv = pvs->pv_next; + pmap_free_pv_doit(pvs); + } + + if (pv_nfpvents > PVE_HIWAT && pv_unusedpgs.tqh_first != NULL) + pmap_free_pvpage(); + + simple_unlock(&pvalloc_lock); +} + + +/* + * pmap_free_pvpage: try and free an unused pv_page structure + * + * => assume caller is holding the pvalloc_lock and that + * there is a page on the pv_unusedpgs list + * => if we can't get a lock on the kmem_map we try again later + * => note: analysis of MI kmem_map usage [i.e. malloc/free] shows + * that if we can lock the kmem_map then we are not already + * holding kmem_object's lock. + */ + +static void pmap_free_pvpage() + +{ + int s; + struct vm_map *map; + vm_map_entry_t dead_entries; + struct pv_page *pvp; + + s = splimp(); /* protect kmem_map */ + + pvp = pv_unusedpgs.tqh_first; + + /* + * note: watch out for pv_initpage which is allocated out of + * kernel_map rather than kmem_map. + */ + if (pvp == pv_initpage) + map = kernel_map; + else + map = kmem_map; + + if (vm_map_lock_try(map)) { + + /* remove pvp from pv_unusedpgs */ + TAILQ_REMOVE(&pv_unusedpgs, pvp, pvinfo.pvpi_list); + + /* unmap the page */ + dead_entries = NULL; + (void)uvm_unmap_remove(map, (vm_offset_t) pvp, + ((vm_offset_t) pvp) + NBPG, TRUE, &dead_entries); + vm_map_unlock(map); + + if (dead_entries != NULL) + uvm_unmap_detach(dead_entries, 0); + + pv_nfpvents -= PVE_PER_PVPAGE; /* update free count */ + + } + + if (pvp == pv_initpage) + pv_initpage = NULL; /* no more initpage, we've freed it */ + + splx(s); + +} + +/* + * main pv_entry manipulation functions: + * pmap_enter_pv: enter a mapping onto a pv_head list + * pmap_remove_pv: remove a mappiing from a pv_head list + * + * NOTE: pmap_enter_pv expects to lock the pvh itself + * pmap_remove_pv expects te caller to lock the pvh before calling + */ + +/* + * pmap_enter_pv: enter a mapping onto a pv_head lst + * + * => caller should hold the proper lock on pmap_main_lock + * => caller should have pmap locked + * => we will gain the lock on the pv_head and allocate the new pv_entry + * => caller should adjust ptp's wire_count before calling + */ + +__inline static void pmap_enter_pv(pvh, pve, pmap, va, ptp) + +struct pv_head *pvh; +struct pv_entry *pve; /* preallocated pve for us to use */ +struct pmap *pmap; +vm_offset_t va; +struct vm_page *ptp; /* PTP in pmap that maps this VA */ + +{ + pve->pv_pmap = pmap; + pve->pv_va = va; + pve->pv_ptp = ptp; /* NULL for kernel pmap */ + simple_lock(&pvh->pvh_lock); /* lock pv_head */ + pve->pv_next = pvh->pvh_list; /* add to ... */ + pvh->pvh_list = pve; /* ... locked list */ + simple_unlock(&pvh->pvh_list); /* unlock, done! */ +} + +/* + * pmap_remove_pv: try to remove a mapping from a pv_list + * + * => caller should hold proper lock on pmap_main_lock + * => pmap should be locked + * => caller should hold lock on pv_head [so that attrs can be adjusted] + * => caller should adjust ptp's wire_count and free PTP if needed + * => we return the removed pve + */ + +__inline static struct pv_entry *pmap_remove_pv(pvh, pmap, va) + +struct pv_head *pvh; +struct pmap *pmap; +vm_offset_t va; + +{ + struct pv_entry *pve, **prevptr; + + prevptr = &pvh->pvh_list; /* previous pv_entry pointer */ + pve = *prevptr; + while (pve) { + if (pve->pv_pmap == pmap && pve->pv_va == va) { /* match? */ + *prevptr = pve->pv_next; /* remove it! */ + break; + } + prevptr = &pve->pv_next; /* previous pointer */ + pve = pve->pv_next; /* advance */ + } + return(pve); /* return removed pve */ +} + +/* + * p t p f u n c t i o n s + */ + +/* + * pmap_alloc_ptp: allocate a PTP for a PMAP + * + * => pmap should already be locked by caller + * => we use the ptp's wire_count to count the number of active mappings + * in the PTP (we start it at one to prevent any chance this PTP + * will ever leak onto the active/inactive queues) + * => we should not be holding any pv_head locks (in case we are forced + * to call pmap_steal_ptp()) + * => we may need to lock pv_head's if we have to steal a PTP + * => just_try: true if we want a PTP, but not enough to steal one + * from another pmap (e.g. during optional functions like pmap_copy) + */ + +__inline static struct vm_page *pmap_alloc_ptp(pmap, pde_index, just_try) + +struct pmap *pmap; +int pde_index; +boolean_t just_try; + +{ + struct vm_page *ptp; + + ptp = uvm_pagealloc(&pmap->pm_obj, ptp_i2o(pde_index), NULL); + + if (ptp == NULL) { + if (just_try) + return(NULL); + ptp = pmap_steal_ptp(&pmap->pm_obj, ptp_i2o(pde_index)); /* evil thief! */ + } + + /* got one! */ + ptp->flags &= ~PG_BUSY; /* never busy */ + ptp->wire_count = 1; /* no mappings yet */ + pmap_zero_page(VM_PAGE_TO_PHYS(ptp)); + pmap->pm_pdir[pde_index] = + (pd_entry_t) (VM_PAGE_TO_PHYS(ptp) | PG_u | PG_RW | PG_V); + pmap->pm_stats.resident_count++; /* count PTP as resident */ + pmap->pm_ptphint = ptp; + return(ptp); +} + +/* + * pmap_steal_ptp: steal a PTP from any pmap that we can access + * + * => obj is locked by caller. + * => we can throw away mappings at this level (except in the kernel's pmap) + * => stolen PTP is placed in pmap + * => we lock pv_head's + * => hopefully, this function will be seldom used [much better to have + * enough free pages around for us to allocate off the free page list] + */ + +static struct vm_page *pmap_steal_ptp(obj, offset) + +struct uvm_object *obj; +vm_offset_t offset; + +{ + struct vm_page *ptp; + struct pmap *firstpmap; + int idx, lcv; + pt_entry_t *ptes; + + simple_lock(&pmaps_lock); + + if (pmaps_hand == NULL) + pmaps_hand = pmaps.lh_first; + + firstpmap = pmaps_hand; + if (firstpmap == NULL) + panic("pmap_steal_ptp: no pmaps to steal from!"); + + do { /* while we haven't looped back around to firstpmap */ + + if (simple_lock_try(pmaps_hand->pm_obj.vmobjlock)) { + + ptp = pmaps_hand->pm_obj.memq.tqh_first; + + for (/*null*/; ptp != NULL ; ptp = ptp->listq.tqe_next) { + + /* + * might have found a PTP we can steal (unless it has wired pages). + */ + idx = ptp_o2i(ptp->offset); + if (VM_PAGE_TO_PHYS(ptp) != (pmaps_hand->pm_pdir[idx] & PG_FRAME)) + panic("pmap_steal_ptp: PTP mismatch!"); + + ptes = (pt_entry_t *) pmap_tmpmap_pa(VM_PAGE_TO_PHYS(ptp)); + for (lcv = 0 ; lcv < PTES_PER_PTP ; lcv++) + if ((ptes[lcv] & (PG_V|PG_W)) == (PG_V|PG_W)) + break; + if (lcv == PTES_PER_PTP) + pmap_remove_ptes(pmaps_hand, NULL, ptp, (vm_offset_t)ptes, + ptp_i2v(idx), ptp_i2v(idx+1)); + pmap_tmpunmap_pa(); + + if (lcv != PTES_PER_PTP) + continue; /* wired, try next PTP */ + + /* + * got it!!! + */ + + pmaps_hand->pm_pdir[idx] = 0; /* zap! */ + pmaps_hand->pm_stats.resident_count--; + if (pmap_is_curpmap(pmaps_hand)) + pmap_update(); + else if (pmap_valid_entry(*APDP_PDE) && + (*APDP_PDE & PG_FRAME) == pmaps_hand->pm_pdirpa) { + pmap_update_pg( ((vm_offset_t) APTE_BASE) + ptp->offset); + } + uvm_pagerealloc(ptp, obj, offset); /* put it in our pmap! */ + break; /* break out of "for" loop */ + } + + simple_unlock(pmaps_hand->pm_obj.vmobjlock); + } + + /* advance the pmaps_hand */ + pmaps_hand = pmaps_hand->pm_list.le_next; + if (pmaps_hand == NULL) + pmaps_hand = pmaps.lh_first; + + } while (ptp == NULL && pmaps_hand != firstpmap); + + simple_unlock(&pmaps_lock); + + if (ptp == NULL) + panic("pmap_steal_ptp: failed to steal a PTP!"); + + return(ptp); +} + +/* + * pmap_get_ptp: get a PTP (if there isn't one, allocate a new one) + * + * => pmap should NOT be pmap_kernel() + * => pmap should be locked + */ + +static struct vm_page *pmap_get_ptp(pmap, pde_index, just_try) + +struct pmap *pmap; +int pde_index; +boolean_t just_try; + +{ + struct vm_page *ptp; + + if (pmap_valid_entry(pmap->pm_pdir[pde_index])) { + + /* valid... check hint (saves us a PA->PG lookup) */ + if (pmap->pm_ptphint && + (pmap->pm_pdir[pde_index] & PG_FRAME) == + VM_PAGE_TO_PHYS(pmap->pm_ptphint)) + return(pmap->pm_ptphint); + + ptp = uvm_pagelookup(&pmap->pm_obj, ptp_i2o(pde_index)); + if (ptp == NULL) + panic("pmap_get_ptp: unmanaged user PTP"); + pmap->pm_ptphint = ptp; + return(ptp); + } + + /* allocate a new PTP */ + return(pmap_alloc_ptp(pmap, pde_index, just_try));/* alloc updates ptphint */ +} + +/* + * p m a p l i f e c y c l e f u n c t i o n s + */ + +/* + * pmap_create: create a pmap + * + * => note: old pmap interface took a "size" args which allowed for + * the creation of "software only" pmaps (not in bsd). + */ + +struct pmap *pmap_create() + +{ + struct pmap *pmap; + + MALLOC(pmap, struct pmap *, sizeof(*pmap), M_VMPMAP, M_WAITOK); + pmap_pinit(pmap); + return(pmap); +} + +/* + * pmap_pinit: given a zero'd pmap structure, init it. + */ + +void pmap_pinit(pmap) + +struct pmap *pmap; + +{ + /* init uvm_object */ + simple_lock_init(&pmap->pm_obj.vmobjlock); + pmap->pm_obj.pgops = NULL; /* currently not a mappable object */ + TAILQ_INIT(&pmap->pm_obj.memq); + pmap->pm_obj.uo_npages = 0; + pmap->pm_obj.uo_refs = 1; + pmap->pm_stats.wired_count = 0; + pmap->pm_stats.resident_count = 1; /* count the PDP allocd below */ + pmap->pm_ptphint = NULL; + + /* allocate PDP */ + pmap->pm_pdir = (pd_entry_t *) uvm_km_alloc(kernel_map, NBPG); + if (pmap->pm_pdir == NULL) + panic("pmap_pinit: kernel_map out of virtual space!"); + pmap->pm_pdirpa = (u_int32_t) + pmap_extract(pmap_kernel(), (vm_offset_t) pmap->pm_pdir); + + /* init PDP */ + /* zero init area */ + bzero(pmap->pm_pdir, PDSLOT_PTE * sizeof(pd_entry_t)); + /* put in recursive PDE to map the PTEs */ + pmap->pm_pdir[PDSLOT_PTE] = pmap->pm_pdirpa | PG_V | PG_KW; + + /* + * we need to lock pmaps_lock to prevent nkpde from changing on + * us. note that there is no need to splimp to protect us from + * malloc since malloc allocates out of a submap and we should have + * already allocated kernel PTPs to cover the range... + */ + simple_lock(&pmaps_lock); + /* put in kernel VM PDEs */ + bcopy(&PDP_BASE[PDSLOT_KERN], &pmap->pm_pdir[PDSLOT_KERN], + nkpde * sizeof(pd_entry_t)); + /* zero the rest */ + bzero(&pmap->pm_pdir[PDSLOT_KERN + nkpde], + NBPG - ((PDSLOT_KERN + nkpde) * sizeof(pd_entry_t))); + LIST_INSERT_HEAD(&pmaps, pmap, pm_list); + if (pmaps_hand == NULL) + pmaps_hand = pmap; + simple_unlock(&pmaps_lock); + + /* + * done! + */ +} + +/* + * pmap_destroy: drop reference count on pmap. free pmap if + * reference count goes to zero. + */ + +void pmap_destroy(pmap) + +struct pmap *pmap; + +{ + /* + * drop reference count + */ + simple_lock(&pmap->pm_obj.vmobjlock); + pmap->pm_obj.uo_refs--; + if (pmap->pm_obj.uo_refs > 0) { + simple_unlock(&pmap->pm_obj.vmobjlock); + return; + } + + /* + * reference count is zero, free pmap resources and then free pmap. + */ + pmap_release(pmap); + free((caddr_t)pmap, M_VMPMAP); + + /* + * done: pmap is gone! + */ +} + +/* + * pmap_release: release all resources held by a pmap + * + * => if pmap is still referenced it should be locked + * => XXX: we currently don't expect any busy PTPs because we don't + * allow anything to map them (except for the kernel's private + * recursive mapping) or make them busy. + */ + +void pmap_release(pmap) + +struct pmap *pmap; + +{ + struct vm_page *pg; + + /* + * remove it from global list of pmaps + */ + simple_lock(&pmaps_lock); + if (pmap == pmaps_hand) + pmaps_hand = pmaps_hand->pm_list.le_next; + LIST_REMOVE(pmap, pm_list); + simple_unlock(&pmaps_lock); + + /* + * free any remaining PTPs + */ + while (pmap->pm_obj.memq.tqh_first != NULL) { + + pg = pmap->pm_obj.memq.tqh_first; + if (pg->flags & PG_BUSY) + panic("pmap_release: busy page table page"); + /* pmap_page_protect? currently no need for it. */ + uvm_pagefree(pg); + } + + /* XXX: need to flush it out of other processor's APTE space? */ + + uvm_km_free(kernel_map, (vm_offset_t)pmap->pm_pdir, NBPG); +} + +/* + * Add a reference to the specified pmap. + */ + +void pmap_reference(pmap) + +struct pmap *pmap; + +{ + simple_lock(&pmap->pm_obj.vmobjlock); + pmap->pm_obj.uo_refs++; + simple_unlock(&pmap->pm_obj.vmobjlock); +} + +/* + * pmap_activate: activate a process' pmap (fill in %cr3 info) + * + * => called from cpu_fork() + * => if proc is the curproc, then load it into the MMU + */ + + +void pmap_activate(p) + +struct proc *p; + + +{ + struct pcb *pcb = &p->p_addr->u_pcb; + struct pmap *pmap = p->p_vmspace->vm_map.pmap; + + pcb->pcb_cr3 = pmap->pm_pdirpa; + pcb->pcb_pmap = pmap; + if (p == curproc) { + lcr3(pcb->pcb_cr3); + } +} + +/* + * pmap_deactivate: deactivate a process' pmap + * + * => XXX: what should this do, if anything? + */ + +void pmap_deactivate(p) + +struct proc *p; + +{ +} + +/* + * end of lifecycle functions + */ + +/* + * some misc. functions + */ + +/* + * pmap_extract: extract a PA for the given VA + * + * => returns 0 if no mapping (thus preventing it from returning + * physical page 0!) + */ + +vm_offset_t pmap_extract(pmap, va) + +struct pmap *pmap; +vm_offset_t va; + +{ + vm_offset_t retval; + pt_entry_t *ptes; + + if (pmap->pm_pdir[pdei(va)]) { + ptes = pmap_map_ptes(pmap); + retval = (vm_offset_t) (ptes[i386_btop(va)] & PG_FRAME); + pmap_unmap_ptes(pmap); + return(retval | (va & ~PG_FRAME)); + } + return(0); +} + + +/* + * pmap_virtual_space: used during bootup [pmap_steal_memory] to + * determine the bounds of the kernel virtual addess space. + */ + +void pmap_virtual_space(startp, endp) + +vm_offset_t *startp; +vm_offset_t *endp; + +{ + *startp = virtual_avail; + *endp = virtual_end; +} + +/* + * pmap_map: map a range of PAs into kvm + * + * => used during crash dump + * => XXX: pmap_map() should be phased out? + */ + +vm_offset_t pmap_map(va, spa, epa, prot) + +vm_offset_t va, spa, epa; +int prot; + +{ + while (spa < epa) { + pmap_enter(pmap_kernel(), va, spa, prot, FALSE); + va += NBPG; + spa += NBPG; + } + return va; +} + +/* + * pmap_zero_page: zero a page + */ + +void pmap_zero_page(pa) + +vm_offset_t pa; + +{ + simple_lock(&pmap_zero_page_lock); +#ifdef DIAGNOSTIC + if (*zero_pte) + panic("pmap_zero_page: lock botch"); +#endif + + *zero_pte = (pa & PG_FRAME) | PG_V | PG_RW; /* map in */ + bzero(zerop, NBPG); /* zero */ + *zero_pte = 0; /* zap! */ + pmap_update_pg((vm_offset_t)zerop); /* flush TLB */ + simple_unlock(&pmap_zero_page_lock); +} + +/* + * pmap_copy_page: copy a page + */ + +void pmap_copy_page(srcpa, dstpa) + +vm_offset_t srcpa, dstpa; + +{ + simple_lock(&pmap_copy_page_lock); +#ifdef DIAGNOSTIC + if (*csrc_pte || *cdst_pte) + panic("pmap_copy_page: lock botch"); +#endif + + *csrc_pte = (srcpa & PG_FRAME) | PG_V | PG_RW; + *cdst_pte = (dstpa & PG_FRAME) | PG_V | PG_RW; + bcopy(csrcp, cdstp, NBPG); + *csrc_pte = *cdst_pte = 0; /* zap! */ + pmap_update_2pg((vm_offset_t)csrcp, (vm_offset_t)cdstp); + simple_unlock(&pmap_copy_page_lock); +} + +/* + * p m a p r e m o v e f u n c t i o n s + * + * functions that remove mappings + */ + +/* + * pmap_remove_ptes: remove PTEs from a PTP + * + * => must have proper locking on pmap_master_lock + * => caller must hold pmap's lock + * => PTP must be mapped into KVA + * => PTP should be null if pmap == pmap_kernel() + */ + +static void pmap_remove_ptes(pmap, pmap_rr, ptp, ptpva, startva, endva) + +struct pmap *pmap; +struct pmap_remove_record *pmap_rr; +struct vm_page *ptp; +vm_offset_t ptpva; +vm_offset_t startva, endva; + +{ + struct pv_entry *pv_tofree = NULL; /* list of pv_entrys to free */ + struct pv_entry *pve; + pt_entry_t *pte = (pt_entry_t *) ptpva; + pt_entry_t opte; + int bank, off; + + /* + * note that ptpva points to the PTE that maps startva. this may + * or may not be the first PTE in the PTP. + * + * we loop through the PTP while there are still PTEs to look at + * and the wire_count is greater than 1 (because we use the wire_count + * to keep track of the number of real PTEs in the PTP). + */ + + for (/*null*/; startva < endva && (ptp == NULL || ptp->wire_count > 1) + ; pte++, startva += NBPG) { + + if (!pmap_valid_entry(*pte)) + continue; /* VA not mapped */ + + opte = *pte; /* save the old PTE */ + *pte = 0; /* zap! */ + if (opte & PG_W) + pmap->pm_stats.wired_count--; + pmap->pm_stats.resident_count--; + + if (pmap_rr) { /* worried about tlb flushing? */ + + if (opte & PG_G) { + pmap_update_pg(startva); /* PG_G requires this */ + } else { + if (pmap_rr->prr_npages < PMAP_RR_MAX) { + pmap_rr->prr_vas[pmap_rr->prr_npages++] = startva; + } else { + if (pmap_rr->prr_npages == PMAP_RR_MAX) + pmap_rr->prr_npages++; /* signal an overflow */ + } + } + + } + + if (ptp) + ptp->wire_count--; /* dropping a PTE */ + + /* + * if we are not on a pv_head list we are done. + */ + if ((opte & PG_PVLIST) == 0) + continue; + + bank = vm_physseg_find(i386_btop(opte & PG_FRAME), &off); + if (bank == -1) + panic("pmap_remove_ptes: unmanaged page marked PG_PVLIST"); + + simple_lock(&vm_physmem[bank].pmseg.pvhead[off].pvh_lock); + + /* sync R/M bits */ + vm_physmem[bank].pmseg.attrs[off] |= (opte & (PG_U|PG_M)); + pve = pmap_remove_pv(&vm_physmem[bank].pmseg.pvhead[off], pmap, startva); + + simple_unlock(&vm_physmem[bank].pmseg.pvhead[off].pvh_lock); + + if (pve) { + pve->pv_next = pv_tofree; + pv_tofree = pve; + } + + /* end of "for" loop: time for next pte */ + } + + /* + * done! + */ + if (pv_tofree) + pmap_free_pvs(pv_tofree); + +} + + +/* + * pmap_remove_pte: remove a single PTE from a PTP + * + * => must have proper locking on pmap_master_lock + * => caller must hold pmap's lock + * => PTP must be mapped into KVA + * => PTP should be null if pmap == pmap_kernel() + * => returns true if we removed a mapping + */ + +static boolean_t pmap_remove_pte(pmap, ptp, pte, va) + +struct pmap *pmap; +struct vm_page *ptp; +pt_entry_t *pte; +vm_offset_t va; + +{ + pt_entry_t opte; + int bank, off; + struct pv_entry *pve; + + if (!pmap_valid_entry(*pte)) + return(FALSE); /* VA not mapped */ + + opte = *pte; /* save the old PTE */ + *pte = 0; /* zap! */ + + if (opte & PG_W) + pmap->pm_stats.wired_count--; + pmap->pm_stats.resident_count--; + + if (ptp) + ptp->wire_count--; /* dropping a PTE */ + + if (pmap_is_curpmap(pmap)) + pmap_update_pg(va); /* flush TLB */ + + + /* + * if we are not on a pv_head list we are done. + */ + if ((opte & PG_PVLIST) == 0) + return(TRUE); + + bank = vm_physseg_find(i386_btop(opte & PG_FRAME), &off); + if (bank == -1) + panic("pmap_remove_pte: unmanaged page marked PG_PVLIST"); + + + simple_lock(&vm_physmem[bank].pmseg.pvhead[off].pvh_lock); + + /* sync R/M bits */ + vm_physmem[bank].pmseg.attrs[off] |= (opte & (PG_U|PG_M)); + pve = pmap_remove_pv(&vm_physmem[bank].pmseg.pvhead[off], pmap, va); + + simple_unlock(&vm_physmem[bank].pmseg.pvhead[off].pvh_lock); + + if (pve) + pmap_free_pv(pve); + + /* + * done! + */ + return(TRUE); +} + +/* + * pmap_remove: top level mapping removal function + * + * => caller should not be holding any pmap locks + */ + +void pmap_remove(pmap, sva, eva) + +struct pmap *pmap; +vm_offset_t sva, eva; + +{ + pt_entry_t *ptes; + boolean_t result; + vm_offset_t ptppa, blkendva; + struct vm_page *ptp; + struct pmap_remove_record pmap_rr, *prr; + + /* + * we lock in the pmap => pv_head direction + */ + + PMAP_MAP_TO_HEAD_LOCK(); + ptes = pmap_map_ptes(pmap); /* locks pmap */ + + /* + * removing one page? take shortcut function. + */ + + if (sva + NBPG == eva) { + + if (pmap_valid_entry(pmap->pm_pdir[pdei(sva)])) { + + ptppa = pmap->pm_pdir[pdei(sva)] & PG_FRAME; /* PA of the PTP */ + + /* get PTP if non-kernel mapping */ + + if (pmap == pmap_kernel()) { + ptp = NULL; /* we never free kernel PTPs */ + + } else { + + if (pmap->pm_ptphint && + VM_PAGE_TO_PHYS(pmap->pm_ptphint) == ptppa) { + + ptp = pmap->pm_ptphint; + + } else { + + ptp = PHYS_TO_VM_PAGE(ptppa); + if (ptp == NULL) + panic("pmap_remove: unmanaged PTP detected"); + + } + + } + + /* do it! */ + result = pmap_remove_pte(pmap, ptp, &ptes[i386_btop(sva)], sva); + + /* if mapping removed and the PTP is no longer being used, free it! */ + if (result && ptp && ptp->wire_count <= 1) { + pmap->pm_pdir[pdei(sva)] = 0; /* zap! */ +#if defined(I386_CPU) + if (cpu_class != CPUCLASS_386) /* already dumped whole TLB on i386 */ +#endif + { + pmap_update_pg( ((vm_offset_t) ptes) + ptp->offset ); + } + pmap->pm_stats.resident_count--; + if (pmap->pm_ptphint == ptp) /* update hint? */ + pmap->pm_ptphint = pmap->pm_obj.memq.tqh_first; + uvm_pagefree(ptp); + } + + } + + /* + * done + */ + pmap_unmap_ptes(pmap); /* unlock pmap */ + PMAP_MAP_TO_HEAD_UNLOCK(); + return; + } + + /* + * removing a range of pages: we unmap in PTP sized blocks (4MB) + * + * if we are the currently loaded pmap, we use prr to keep track + * of the VAs we unload so that we can flush them out of the tlb. + */ + + if (pmap_is_curpmap(pmap)) { + prr = &pmap_rr; + prr->prr_npages = 0; + } else { + prr = NULL; + } + + for (/* null */ ; sva < eva ; sva = blkendva) { + + /* determine range of block */ + blkendva = i386_round_pdr(sva+1); + if (blkendva > eva) + blkendva = eva; + + /* + * XXXCDC: our PTE mappings should never be removed with pmap_remove! + * if we allow this (and why would we?) then we end up freeing the + * pmap's page directory page (PDP) before we are finished using it + * when we hit in in the recursive mapping. this is BAD. + * + * long term solution is to move the PTEs out of user address space. + * and into kernel address space (up with APTE). then we can set + * VM_MAXUSER_ADDRESS to be VM_MAX_ADDRESS. + */ + + if (pdei(sva) == PDSLOT_PTE) + continue; /* XXXCDC: ugly hack to avoid freeing PDP here */ + + if (!pmap_valid_entry(pmap->pm_pdir[pdei(sva)])) /* valid block? */ + continue; + + ptppa = (pmap->pm_pdir[pdei(sva)] & PG_FRAME); /* PA of the PTP */ + + /* get PTP if non-kernel mapping */ + + if (pmap == pmap_kernel()) { + ptp = NULL; /* we never free kernel PTPs */ + + } else { + if (pmap->pm_ptphint && + VM_PAGE_TO_PHYS(pmap->pm_ptphint) == ptppa) { + + ptp = pmap->pm_ptphint; + + } else { + + ptp = PHYS_TO_VM_PAGE(ptppa); + if (ptp == NULL) + panic("pmap_remove: unmanaged PTP detected"); + + } + } + + pmap_remove_ptes(pmap, prr, ptp, (vm_offset_t)&ptes[i386_btop(sva)], + sva, blkendva); + + /* if PTP is no longer being used, free it! */ + if (ptp && ptp->wire_count <= 1) { + pmap->pm_pdir[pdei(sva)] = 0; /* zap! */ + pmap_update_pg( ((vm_offset_t) ptes) + ptp->offset); +#if defined(I386_CPU) + /* cancel possible pending pmap update on i386 */ + if (cpu_class == CPUCLASS_386 && prr) + prr->prr_npages = 0; +#endif + pmap->pm_stats.resident_count--; + if (pmap->pm_ptphint == ptp) /* update hint? */ + pmap->pm_ptphint = pmap->pm_obj.memq.tqh_first; + uvm_pagefree(ptp); + } + } + + /* + * if we kept a removal record and removed some pages update the TLB + */ + + if (prr && prr->prr_npages) { +#if defined(I386_CPU) + if (cpu_class == CPUCLASS_386) { + pmap_update(); + } else +#endif + { /* not I386 */ + if (prr->prr_npages > PMAP_RR_MAX) { + pmap_update(); + } else { + while (prr->prr_npages) { + pmap_update_pg(prr->prr_vas[--prr->prr_npages]); + } + } + } /* not I386 */ + } + + /* + * done! + */ + + pmap_unmap_ptes(pmap); + PMAP_MAP_TO_HEAD_UNLOCK(); + return; +} + +/* + * pmap_page_remove: remove a managed vm_page from all pmaps that map it + * + * => we set pv_head => pmap locking + * => R/M bits are sync'd back to attrs + */ + +void pmap_page_remove(pg) + +struct vm_page *pg; + +{ + int bank, off; + struct pv_head *pvh; + struct pv_entry *pve; + pt_entry_t *ptes, opte; +#if defined(I386_CPU) + boolean_t needs_update = FALSE; +#endif + + /* XXX: vm_page should either contain pv_head or have a pointer to it */ + bank = vm_physseg_find(atop(VM_PAGE_TO_PHYS(pg)), &off); + if (bank == -1) { + printf("pmap_page_remove: unmanaged page?\n"); + return; + } + + pvh = &vm_physmem[bank].pmseg.pvhead[off]; + if (pvh->pvh_list == NULL) { + return; + } + + /* set pv_head => pmap locking */ + PMAP_HEAD_TO_MAP_LOCK(); + + simple_lock(&pvh->pvh_lock); /* XXX: needed if we hold head->map lock? */ + + for (pve = pvh->pvh_list ; pve != NULL ; pve = pve->pv_next) { + ptes = pmap_map_ptes(pve->pv_pmap); /* locks pmap */ + +#ifdef DIAGNOSTIC + if (pve->pv_ptp && (pve->pv_pmap->pm_pdir[pdei(pve->pv_va)] & PG_FRAME) + != VM_PAGE_TO_PHYS(pve->pv_ptp)) { + printf("pmap_page_remove: pg=%p: va=%lx, pv_ptp=%p\n", pg, pve->pv_va, + pve->pv_ptp); + printf("pmap_page_remove: PTP's phys addr: actual=%x, recorded=%lx\n", + (pve->pv_pmap->pm_pdir[pdei(pve->pv_va)] & PG_FRAME), + VM_PAGE_TO_PHYS(pve->pv_ptp)); + panic("pmap_page_remove: mapped managed page has invalid pv_ptp field"); + } +#endif + + opte = ptes[i386_btop(pve->pv_va)]; + ptes[i386_btop(pve->pv_va)] = 0; /* zap! */ + + if (opte & PG_W) + pve->pv_pmap->pm_stats.wired_count--; + pve->pv_pmap->pm_stats.resident_count--; + + if (pmap_is_curpmap(pve->pv_pmap)) { +#if defined(I386_CPU) + if (cpu_class == CPUCLASS_386) + needs_update = TRUE; + else +#endif + pmap_update_pg(pve->pv_va); + } + + /* sync R/M bits */ + vm_physmem[bank].pmseg.attrs[off] |= (opte & (PG_U|PG_M)); + + /* update the PTP reference count. free if last reference. */ + if (pve->pv_ptp) { + pve->pv_ptp->wire_count--; + if (pve->pv_ptp->wire_count <= 1) { + pve->pv_pmap->pm_pdir[pdei(pve->pv_va)] = 0; /* zap! */ + pmap_update_pg( ((vm_offset_t) ptes) + pve->pv_ptp->offset); +#if defined(I386_CPU) + needs_update = FALSE; +#endif + pve->pv_pmap->pm_stats.resident_count--; + if (pve->pv_pmap->pm_ptphint == pve->pv_ptp) /* update hint? */ + pve->pv_pmap->pm_ptphint = pve->pv_pmap->pm_obj.memq.tqh_first; + uvm_pagefree(pve->pv_ptp); + } + } + + pmap_unmap_ptes(pve->pv_pmap); /* unlocks pmap */ + } + + pmap_free_pvs(pvh->pvh_list); + pvh->pvh_list = NULL; + simple_unlock(&pvh->pvh_lock); + PMAP_HEAD_TO_MAP_UNLOCK(); + +#if defined(I386_CPU) + if (needs_update) + pmap_update(); +#endif + + return; +} + +/* + * p m a p a t t r i b u t e f u n c t i o n s + * functions that test/change managed page's attributes + * since a page can be mapped multiple times we must check each PTE that + * maps it by going down the pv lists. + */ + +/* + * pmap_test_attrs: test a page's attributes + * + * => we set pv_head => pmap locking + */ + +boolean_t pmap_test_attrs(pg, testbits) + +struct vm_page *pg; +int testbits; + +{ + int bank, off; + char *myattrs; + struct pv_head *pvh; + struct pv_entry *pve; + pt_entry_t *ptes, pte; + + /* XXX: vm_page should either contain pv_head or have a pointer to it */ + bank = vm_physseg_find(atop(VM_PAGE_TO_PHYS(pg)), &off); + if (bank == -1) { + printf("pmap_test_attrs: unmanaged page?\n"); + return(FALSE); + } + + /* before locking: see if attributes are already set and if so, return! */ + myattrs = &vm_physmem[bank].pmseg.attrs[off]; + if (*myattrs & testbits) + return(TRUE); + + /* test to see if there is a list before bothering to lock */ + pvh = &vm_physmem[bank].pmseg.pvhead[off]; + if (pvh->pvh_list == NULL) { + return(FALSE); + } + + /* nope, gonna have to do it the hard way */ + PMAP_HEAD_TO_MAP_LOCK(); + simple_lock(&pvh->pvh_lock); /* XXX: needed if we hold head->map lock? */ + + for (pve = pvh->pvh_list ; pve && (*myattrs & testbits) == 0 ; + pve = pve->pv_next) { + + ptes = pmap_map_ptes(pve->pv_pmap); + pte = ptes[i386_btop(pve->pv_va)]; + pmap_unmap_ptes(pve->pv_pmap); + *myattrs |= pte; + + } + + /* + * note that we will exit the for loop with a non-null pve if + * we have found the bits we are testing for. + */ + + simple_unlock(&pvh->pvh_lock); + PMAP_HEAD_TO_MAP_UNLOCK(); + return((*myattrs & testbits) != 0); +} + +/* + * pmap_change_attrs: change a page's attributes + * + * => we set pv_head => pmap locking + * => we return TRUE if we cleared one of the bits we were asked to + */ + +boolean_t pmap_change_attrs(pg, setbits, clearbits) + +struct vm_page *pg; +int setbits, clearbits; + +{ + u_int32_t result; + int bank, off; + struct pv_head *pvh; + struct pv_entry *pve; + pt_entry_t *ptes, npte; + char *myattrs; +#if defined(I386_CPU) + boolean_t needs_update = FALSE; +#endif + + /* XXX: vm_page should either contain pv_head or have a pointer to it */ + bank = vm_physseg_find(atop(VM_PAGE_TO_PHYS(pg)), &off); + if (bank == -1) { + printf("pmap_change_attrs: unmanaged page?\n"); + return(FALSE); + } + + PMAP_HEAD_TO_MAP_LOCK(); + pvh = &vm_physmem[bank].pmseg.pvhead[off]; + simple_lock(&pvh->pvh_lock); /* XXX: needed if we hold head->map lock? */ + + myattrs = &vm_physmem[bank].pmseg.attrs[off]; + result = *myattrs & clearbits; + *myattrs = (*myattrs | setbits) & ~clearbits; + + for (pve = pvh->pvh_list ; pve ; pve = pve->pv_next) { + +#ifdef DIAGNOSTIC + if (pve->pv_va >= uvm.pager_sva && pve->pv_va < uvm.pager_eva) { + printf("pmap_change_attrs: found pager VA on pv_list"); + } + if (!pmap_valid_entry(pve->pv_pmap->pm_pdir[pdei(pve->pv_va)])) + panic("pmap_change_attrs: mapping without PTP detected"); +#endif + + ptes = pmap_map_ptes(pve->pv_pmap); /* locks pmap */ + npte = ptes[i386_btop(pve->pv_va)]; + result |= (npte & clearbits); + npte = (npte | setbits) & ~clearbits; + if (ptes[i386_btop(pve->pv_va)] != npte) { + ptes[i386_btop(pve->pv_va)] = npte; /* zap! */ + + if (pmap_is_curpmap(pve->pv_pmap)) { +#if defined(I386_CPU) + if (cpu_class == CPUCLASS_386) + needs_update = TRUE; + else +#endif + pmap_update_pg(pve->pv_va); + } + } + pmap_unmap_ptes(pve->pv_pmap); /* unlocks pmap */ + + } + + simple_unlock(&pvh->pvh_lock); + PMAP_HEAD_TO_MAP_UNLOCK(); + +#if defined(I386_CPU) + if (needs_update) + pmap_update(); +#endif + + return(result != 0); +} + +/* + * p m a p p r o t e c t i o n f u n c t i o n s + */ + +/* + * pmap_page_protect: change the protection of all recorded mappings + * of a managed page + * + * => NOTE: this is an inline function in pmap.h + */ + +/* see pmap.h */ + +/* + * pmap_protect: set the protection in of the pages in a pmap + * + * => NOTE: this is an inline function in pmap.h + */ + +/* see pmap.h */ + +/* + * pmap_write_protect: write-protect pages in a pmap + */ + +void pmap_write_protect(pmap, sva, eva, prot) + +struct pmap *pmap; +vm_offset_t sva, eva; +vm_prot_t prot; + +{ + pt_entry_t *ptes, *spte, *epte, npte; + struct pmap_remove_record pmap_rr, *prr; + vm_offset_t blockend, va; + u_int32_t md_prot; + + ptes = pmap_map_ptes(pmap); /* locks pmap */ + + /* need to worry about TLB? [TLB stores protection bits] */ + if (pmap_is_curpmap(pmap)) { + prr = &pmap_rr; + prr->prr_npages = 0; + } else { + prr = NULL; + } + + /* should be ok, but just in case ... */ + sva &= PG_FRAME; + eva &= PG_FRAME; + + for (/* null */ ; sva < eva ; sva = blockend) { + + blockend = (sva & PD_MASK) + NBPD; + if (blockend > eva) + blockend = eva; + + /* + * XXXCDC: our PTE mappings should never be write-protected! + * + * long term solution is to move the PTEs out of user address space. + * and into kernel address space (up with APTE). then we can set + * VM_MAXUSER_ADDRESS to be VM_MAX_ADDRESS. + */ + + if (pdei(sva) == PDSLOT_PTE) + continue; /* XXXCDC: ugly hack to avoid freeing PDP here */ + + if (!pmap_valid_entry(pmap->pm_pdir[pdei(sva)])) /* empty block? */ + continue; + + md_prot = protection_codes[prot]; + if (sva < VM_MAXUSER_ADDRESS) + md_prot |= PG_u; + else if (sva < VM_MAX_ADDRESS) + md_prot |= (PG_u | PG_RW); /* XXX: write-prot our PTES? never! */ + + spte = &ptes[i386_btop(sva)]; + epte = &ptes[i386_btop(blockend)]; + + for (/*null */; spte < epte ; spte++) { + + if (!pmap_valid_entry(*spte)) /* no mapping? */ + continue; + + npte = (*spte & ~PG_PROT) | md_prot; + + if (npte != *spte) { + *spte = npte; /* zap! */ + + if (prr) { /* worried about tlb flushing? */ + + va = i386_ptob(spte - ptes); + if (npte & PG_G) { + pmap_update_pg(va);/* PG_G requires this */ + } else { + if (prr->prr_npages < PMAP_RR_MAX) { + prr->prr_vas[prr->prr_npages++] = va; + } else { + if (prr->prr_npages == PMAP_RR_MAX) + prr->prr_npages++; /* signal an overflow */ + } + } + + } /* if (prr) */ + + } /* npte != *spte */ + + } /* for loop */ + + } + + /* + * if we kept a removal record and removed some pages update the TLB + */ + + if (prr && prr->prr_npages) { +#if defined(I386_CPU) + if (cpu_class == CPUCLASS_386) { + pmap_update(); + } else +#endif + { /* not I386 */ + if (prr->prr_npages > PMAP_RR_MAX) { + pmap_update(); + } else { + while (prr->prr_npages) { + pmap_update_pg(prr->prr_vas[--prr->prr_npages]); + } + } + } /* not I386 */ + } + + /* + * done! + */ + + pmap_unmap_ptes(pmap); /* unlocks pmap */ + return; +} + +/* + * end of protection functions + */ + +/* + * pmap_change_wiring: changing the wiring bit in the PTE + * + * => mapping should already be in map + * => XXX: so, this is really pmap_remove_wiring (pmap_enter sets wiring) + */ + +void pmap_change_wiring(pmap, va, wired) + +struct pmap *pmap; +vm_offset_t va; +boolean_t wired; + + +{ + pt_entry_t *ptes; + + if (pmap_valid_entry(pmap->pm_pdir[pdei(va)])) { + + ptes = pmap_map_ptes(pmap); /* locks pmap */ + + if (!pmap_valid_entry(ptes[i386_btop(va)])) + panic("pmap_change_wiring: invalid (unmapped) va"); + if (!wired && (ptes[i386_btop(va)] & PG_W) != 0) { + ptes[i386_btop(va)] &= ~PG_W; + pmap->pm_stats.wired_count--; + } else if (wired && (ptes[i386_btop(va)] & PG_W) == 0) { + ptes[i386_btop(va)] |= PG_W; + pmap->pm_stats.wired_count++; + } + + pmap_unmap_ptes(pmap); /* unlocks map */ + + } + +} + +/* + * pmap_collect: free resources held by a pmap + * + * => optional function. + * => called when a process is swapped out to free memory. + */ + +void pmap_collect(pmap) + +struct pmap *pmap; + +{ + + /* + * free all of the pt pages by removing the physical mappings + * for its entire address space. + */ + pmap_remove(pmap, VM_MIN_ADDRESS, VM_MAX_ADDRESS); +} + +/* + * pmap_pageable: set pageable status of a range of pages. + * could use this to ensure PTP is in place (but no need). + * + * => optional function. + */ + +void pmap_pageable(pmap, sva, eva, pageable) + +struct pmap *pmap; +vm_offset_t sva, eva; +boolean_t pageable; + +{ + /* + * nothing useful for us to do here. + */ +} + +/* + * pmap_transfer: transfer (move or copy) mapping from one pmap + * to another. + * + * => this function is optional, it doesn't have to do anything + * => we assume that the mapping in the src pmap is valid (i.e. that + * it doesn't run off the end of the map's virtual space). + * => we assume saddr, daddr, and len are page aligned/lengthed + */ + +void pmap_transfer(dstpmap, srcpmap, daddr, len, saddr, move) + +struct pmap *dstpmap, *srcpmap; +vm_offset_t daddr, saddr; +vm_size_t len; +boolean_t move; + +{ +#if 0 + return; +#else + pt_entry_t *srcptes, *dstptes; /* base address of PTEs, dst could be NULL */ + struct pmap_transfer_location srcl, dstl; + int dstvalid; /* # of PTEs left in dst's current PTP */ + struct pmap *mapped_pmap; /* the pmap we passed to pmap_map_ptes */ + vm_size_t blklen; + int blkpgs, toxfer; + boolean_t ok; + +#ifdef DIAGNOSTIC + /* + * sanity check: let's make sure our len doesn't overflow our dst + * space. + */ + if (daddr < VM_MAXUSER_ADDRESS) { + if (VM_MAXUSER_ADDRESS - daddr < len) { + printf("pmap_transfer: no room in user pmap (addr=0x%lx, len=0x%lx)\n", + daddr, len); + return; + } + } else if (daddr < VM_MIN_KERNEL_ADDRESS || + daddr >= VM_MAX_KERNEL_ADDRESS) { + printf("pmap_transfer: invalid transfer address 0x%lx\n", daddr); + } else { + if (VM_MAX_KERNEL_ADDRESS - daddr < len) { + printf("pmap_transfer: no room in kernel pmap (addr=0x%lx, len=0x%lx)\n", + daddr, len); + return; + } + } +#endif + + /* + * ideally we would like to have either src or dst pmap's be the + * current pmap so that we can map the other one in APTE space + * (if needed... one of the maps could be the kernel's pmap). + * + * however, if we can't get this, then we have to use the tmpmap + * (alternately we could punt). + */ + + if (!pmap_is_curpmap(dstpmap) && !pmap_is_curpmap(srcpmap)) { + + dstptes = NULL; /* dstptes NOT mapped */ + srcptes = pmap_map_ptes(srcpmap); /* let's map the source */ + mapped_pmap = srcpmap; + + } else { + + if (!pmap_is_curpmap(srcpmap)) { + srcptes = pmap_map_ptes(srcpmap); /* possible APTE */ + dstptes = PTE_BASE; + mapped_pmap = srcpmap; + } else { + dstptes = pmap_map_ptes(dstpmap); /* possible APTE */ + srcptes = PTE_BASE; + mapped_pmap = dstpmap; + } + + } + + /* + * at this point we know that the srcptes are mapped. the dstptes + * are mapped if (dstptes != NULL). if (dstptes == NULL) then we + * will have to map the dst PTPs page at a time using the tmpmap. + * [XXX: is it worth the effort, or should we just punt?] + */ + + srcl.addr = saddr; + srcl.pte = &srcptes[i386_btop(srcl.addr)]; + srcl.ptp = NULL; + dstl.addr = daddr; + if (dstptes) + dstl.pte = &dstptes[i386_btop(dstl.addr)]; + else + dstl.pte = NULL; /* we map page at a time */ + dstl.ptp = NULL; + dstvalid = 0; /* force us to load a new dst PTP to start */ + + while (len) { + + /* + * compute the size of this block. + */ + + blklen = i386_round_pdr(srcl.addr+1) - srcl.addr; /* length in bytes */ + if (blklen > len) + blklen = len; + blkpgs = i386_btop(blklen); + + /* + * if the block is not valid in the src pmap, then we can skip it! + */ + + if (!pmap_valid_entry(srcpmap->pm_pdir[pdei(srcl.addr)])) { + + len = len - blklen; + srcl.pte = srcl.pte + blkpgs; + srcl.addr += blklen; + dstl.addr += blklen; + if (blkpgs > dstvalid) { + dstvalid = 0; + dstl.ptp = NULL; + } else { + dstvalid = dstvalid - blkpgs; + } + if (dstptes == NULL && (len == 0 || dstvalid == 0)) { + if (dstl.pte) { + pmap_tmpunmap_pa(); + dstl.pte = NULL; + } + } else { + dstl.pte += blkpgs; + } + continue; + } + + /* + * we have a valid source block of "blkpgs" PTEs to transfer. + * if we don't have any dst PTEs ready, then get some. + */ + + if (dstvalid == 0) { + if (!pmap_valid_entry(dstpmap->pm_pdir[pdei(dstl.addr)])) { + if (dstl.addr >= VM_MIN_KERNEL_ADDRESS) + panic("pmap_transfer: missing kernel PTP at 0x%lx", dstl.addr); + dstl.ptp = pmap_get_ptp(dstpmap, pdei(dstl.addr), TRUE); + if (dstl.ptp == NULL) /* out of free RAM? punt. */ + break; + } else { + dstl.ptp = NULL; + } + dstvalid = i386_btop(i386_round_pdr(dstl.addr+1) - dstl.addr); + if (dstptes == NULL) { + dstl.pte = (pt_entry_t *) + pmap_tmpmap_pa(dstpmap->pm_pdir[pdei(dstl.addr)] & PG_FRAME); + dstl.pte = dstl.pte + (PTES_PER_PTP - dstvalid); + } + } + + /* + * we have a valid source block of "blkpgs" PTEs to transfer. + * we have a valid dst block of "dstvalid" PTEs ready. + * thus we can transfer min(blkpgs, dstvalid) PTEs now. + */ + + srcl.ptp = NULL; /* don't know source PTP yet */ + if (dstvalid < blkpgs) + toxfer = dstvalid; + else + toxfer = blkpgs; + + if (toxfer > 0) { + ok = pmap_transfer_ptes(srcpmap, &srcl, dstpmap, &dstl, toxfer, move); + + if (!ok) /* memory shortage? punt. */ + break; + + dstvalid -= toxfer; + blkpgs -= toxfer; + len -= i386_ptob(toxfer); + if (blkpgs == 0) /* ran out of src PTEs? restart loop */ + continue; + } + + /* + * we have a valid source block of "blkpgs" PTEs left to transfer. + * we have just used up our "dstvalid" PTEs, and thus must obtain + * more dst PTEs to finish off the src block. since we are now + * going to obtain a brand new dst PTP, we know we can finish the + * src block in one more transfer. + */ + +#ifdef DIAGNOSTIC + if (dstvalid) + panic("pmap_transfer: dstvalid non-zero after drain"); + if ((dstl.addr & (NBPD-1)) != 0) + panic("pmap_transfer: dstaddr not on PD boundary (0x%lx)\n", dstl.addr); +#endif + + if (dstptes == NULL && dstl.pte != NULL) { /* dispose of old PT mapping */ + pmap_tmpunmap_pa(); + dstl.pte = NULL; + } + + /* + * get new dst PTP + */ + if (!pmap_valid_entry(dstpmap->pm_pdir[pdei(dstl.addr)])) { + if (dstl.addr >= VM_MIN_KERNEL_ADDRESS) + panic("pmap_transfer: missing kernel PTP at 0x%lx", dstl.addr); + dstl.ptp = pmap_get_ptp(dstpmap, pdei(dstl.addr), TRUE); + if (dstl.ptp == NULL) /* out of free RAM? punt. */ + break; + } else { + dstl.ptp = NULL; + } + + dstvalid = PTES_PER_PTP; /* new PTP */ + + /* + * if the dstptes are un-mapped, then we need to tmpmap in the + * dstl.ptp. + */ + + if (dstptes == NULL) { + dstl.pte = (pt_entry_t *) + pmap_tmpmap_pa(dstpmap->pm_pdir[pdei(dstl.addr)] & PG_FRAME); + } + + /* + * we have a valid source block of "blkpgs" PTEs left to transfer. + * we just got a brand new dst PTP to receive these PTEs. + */ + +#ifdef DIAGNOSTIC + if (dstvalid < blkpgs) + panic("pmap_transfer: too many blkpgs?"); +#endif + toxfer = blkpgs; + + ok = pmap_transfer_ptes(srcpmap, &srcl, dstpmap, &dstl, toxfer, move); + + if (!ok) /* memory shortage? punt. */ + break; + + dstvalid -= toxfer; + blkpgs -= toxfer; + len -= i386_ptob(toxfer); + + /* + * done src pte block + */ + } + /* + * done! + */ + + if (dstptes == NULL && dstl.pte != NULL) + pmap_tmpunmap_pa(); /* dst PTP still mapped? */ + pmap_unmap_ptes(mapped_pmap); +#endif +} + +/* + * pmap_transfer_ptes: transfer PTEs from one pmap to another + * + * => we assume that the needed PTPs are mapped and that we will + * not cross a block boundary. + * => we return TRUE if we transfered all PTEs, FALSE if we were + * unable to allocate a pv_entry + */ + +static boolean_t pmap_transfer_ptes(srcpmap, srcl, dstpmap, dstl, toxfer, move) + +struct pmap *srcpmap, *dstpmap; +struct pmap_transfer_location *srcl, *dstl; +int toxfer; +boolean_t move; + +{ + pt_entry_t dstproto, opte; + int bank, off; + struct pv_head *pvh; + struct pv_entry *pve, *lpve; + + /* + * generate "prototype" dst PTE + */ + + if (dstl->addr < VM_MAX_ADDRESS) + dstproto = PG_u; /* "user" page */ + else + dstproto = pmap_pg_g; /* kernel page */ + + /* + * ensure we have dst PTP for user addresses. + */ + + if (dstl->ptp == NULL && dstl->addr < VM_MAXUSER_ADDRESS) + dstl->ptp = PHYS_TO_VM_PAGE(dstpmap->pm_pdir[pdei(dstl->addr)] & PG_FRAME); + + /* + * main loop over range + */ + + for (/*null*/; toxfer > 0 ; toxfer--, + srcl->addr += NBPG, dstl->addr += NBPG, + srcl->pte++, dstl->pte++) { + + if (!pmap_valid_entry(*srcl->pte)) /* skip invalid entrys */ + continue; + + if (pmap_valid_entry(*dstl->pte)) + panic("pmap_transfer_ptes: attempt to overwrite active entry"); + + /* + * let's not worry about non-pvlist mappings (typically device + * pager mappings). + */ + + opte = *srcl->pte; + + if ((opte & PG_PVLIST) == 0) + continue; + + /* + * if we are moving the mapping, then we can just adjust the + * current pv_entry. if we are copying the mapping, then we + * need to allocate a new pv_entry to account for it. + */ + + if (move == FALSE) { + pve = pmap_alloc_pv(ALLOCPV_TRY); + if (pve == NULL) + return(FALSE); /* punt! */ + } else { + pve = NULL; /* XXX: unecessary, but stops gcc warning */ + } + + /* + * find the pv_head for this mapping. since our mapping is + * on the pvlist (PG_PVLIST), there must be a pv_head. + */ + + bank = vm_physseg_find(atop(opte & PG_FRAME), &off); + if (bank == -1) + panic("pmap_transfer_ptes: PG_PVLIST PTE and no pv_head!"); + pvh = &vm_physmem[bank].pmseg.pvhead[off]; + + /* + * now lock down the pvhead and find the current entry (there + * must be one). + */ + + simple_lock(&pvh->pvh_lock); + for (lpve = pvh->pvh_list ; lpve ; lpve = lpve->pv_next) + if (lpve->pv_pmap == srcpmap && lpve->pv_va == srcl->addr) + break; + if (lpve == NULL) + panic("pmap_transfer_ptes: PG_PVLIST PTE, but entry not found"); + + /* + * update src ptp. if the ptp is null in the pventry, then + * we are not counting valid entrys for this ptp (this is only + * true for kernel PTPs). + */ + + if (srcl->ptp == NULL) + srcl->ptp = lpve->pv_ptp; +#ifdef DIAGNOSTIC + if (srcl->ptp && (srcpmap->pm_pdir[pdei(srcl->addr)] & PG_FRAME) != + VM_PAGE_TO_PHYS(srcl->ptp)) + panic("pmap_transfer_ptes: pm_pdir - pv_ptp mismatch!"); +#endif + + /* + * for move, update the pve we just found (lpve) to point to its + * new mapping. for copy, init the new pve and put it in the list. + */ + + if (move == TRUE) { + pve = lpve; + } + pve->pv_pmap = dstpmap; + pve->pv_va = dstl->addr; + pve->pv_ptp = dstl->ptp; + if (move == FALSE) { /* link in copy */ + pve->pv_next = lpve->pv_next; + lpve->pv_next = pve; + } + + /* + * sync the R/M bits while we are here. + */ + + vm_physmem[bank].pmseg.attrs[off] |= (opte & (PG_U|PG_M)); /* sync! */ + + /* + * now actually update the ptes and unlock the pvlist. + */ + + if (move) { + *srcl->pte = 0; /* zap! */ + if (pmap_is_curpmap(srcpmap)) + pmap_update_pg(srcl->addr); + if (srcl->ptp) + srcl->ptp->wire_count--; /* don't bother trying to free PTP */ + srcpmap->pm_stats.resident_count--; + if (opte & PG_W) + srcpmap->pm_stats.wired_count--; + } + *dstl->pte = (opte & ~(PG_u|PG_U|PG_M|PG_G|PG_W)) | dstproto;/* install */ + dstpmap->pm_stats.resident_count++; + if (dstl->ptp) + dstl->ptp->wire_count++; + simple_unlock(&pvh->pvh_lock); + + /* + * done + */ + } + return(TRUE); +} + +/* + * pmap_copy: copy mappings from one pmap to another + * + * => optional function + * void pmap_copy(dst_pmap, src_pmap, dst_addr, len, src_addr) + */ + +/* + * defined as macro call to pmap_transfer in pmap.h + */ + +/* + * pmap_move: move mappings from one pmap to another + * + * => optional function + * void pmap_move(dst_pmap, src_pmap, dst_addr, len, src_addr) + */ + +/* + * defined as macro call to pmap_transfer in pmap.h + */ + +/* + * pmap_enter: enter a mapping into a pmap + * + * => must be done "now" ... no lazy-evaluation + * => we set pmap => pv_head locking + */ + +void pmap_enter(pmap, va, pa, prot, wired) + +struct pmap *pmap; +vm_offset_t va, pa; +vm_prot_t prot; +boolean_t wired; + +{ + pt_entry_t *ptes, opte, npte; + struct vm_page *ptp; + struct pv_head *pvh; + struct pv_entry *pve; + int bank, off; + +#ifdef DIAGNOSTIC + /* sanity check: totally out of range? */ + if (va >= VM_MAX_KERNEL_ADDRESS) + panic("pmap_enter: too big"); + + if (va == (vm_offset_t) PDP_BASE || va == (vm_offset_t) APDP_BASE) + panic("pmap_enter: trying to map over PDP/APDP!"); + + /* sanity check: kernel PTPs should already have been pre-allocated */ + if (va >= VM_MIN_KERNEL_ADDRESS && + !pmap_valid_entry(pmap->pm_pdir[pdei(va)])) + panic("pmap_enter: missing kernel PTP!"); +#endif + + /* get lock */ + PMAP_MAP_TO_HEAD_LOCK(); + + /* + * map in ptes and get a pointer to our PTP (unless we are the kernel) + */ + + ptes = pmap_map_ptes(pmap); /* locks pmap */ + + if (pmap == pmap_kernel()) + ptp = NULL; + else + ptp = pmap_get_ptp(pmap, pdei(va), FALSE); + + opte = ptes[i386_btop(va)]; /* old PTE */ + + + /* + * is there currently a valid mapping at our VA? + */ + + if (pmap_valid_entry(opte)) { + + /* + * first, update pm_stats. resident count will not change since + * we are replacing/changing a valid mapping. wired count might + * change... + */ + + if (wired && (opte & PG_W) == 0) + pmap->pm_stats.wired_count++; + else if (!wired && (opte & PG_W) != 0) + pmap->pm_stats.wired_count--; + + /* + * is the currently mapped PA the same as the one we want to map? + */ + + if ((opte & PG_FRAME) == pa) { + + /* if this is on the PVLIST, sync R/M bit */ + if (opte & PG_PVLIST) { + bank = vm_physseg_find(atop(pa), &off); + if (bank == -1) + panic("pmap_enter: PG_PVLIST mapping with unmanaged page"); + pvh = &vm_physmem[bank].pmseg.pvhead[off]; + simple_lock(&pvh->pvh_lock); + vm_physmem[bank].pmseg.attrs[off] |= opte; + simple_unlock(&pvh->pvh_lock); + } else { + pvh = NULL; /* ensure !PG_PVLIST */ + } + goto enter_now; + } + + /* + * changing PAs: we must remove the old one first + */ + + /* if current mapping is on a pvlist, remove it (sync R/M bits) */ + if (opte & PG_PVLIST) { + + bank = vm_physseg_find(atop(opte & PG_FRAME), &off); + if (bank == -1) + panic("pmap_enter: PG_PVLIST mapping with unmanaged page"); + pvh = &vm_physmem[bank].pmseg.pvhead[off]; + simple_lock(&pvh->pvh_lock); + pve = pmap_remove_pv(pvh, pmap, va); /* does not lock pvh */ + vm_physmem[bank].pmseg.attrs[off] |= opte; + simple_unlock(&pvh->pvh_lock); + + } else { + + pve = NULL; + + } + + } else { /* opte not valid */ + + pve = NULL; + pmap->pm_stats.resident_count++; + if (wired) + pmap->pm_stats.wired_count++; + if (ptp) + ptp->wire_count++; /* count # of valid entrys */ + + } + + /* + * at this point pm_stats has been updated. pve is either NULL + * or points to a now-free pv_entry structure (the latter case is + * if we called pmap_remove_pv above). + * + * if this entry is to be on a pvlist, enter it now. + */ + + bank = vm_physseg_find(atop(pa), &off); + if (pmap_initialized && bank != -1) { + + pvh = &vm_physmem[bank].pmseg.pvhead[off]; + if (pve == NULL) + pve = pmap_alloc_pv(ALLOCPV_NEED); + pmap_enter_pv(pvh, pve, pmap, va, ptp); /* lock pvh when adding */ + + } else { + + /* new mapping is not PG_PVLIST. free pve if we've got one */ + pvh = NULL; /* ensure !PG_PVLIST */ + if (pve) + pmap_free_pv(pve); + + } + +enter_now: + /* + * at this point pvh is !NULL if we want the PG_PVLIST bit set + */ + + npte = pa | protection_codes[prot] | PG_V; + if (pvh) + npte |= PG_PVLIST; + if (wired) + npte |= PG_W; + if (va < VM_MAXUSER_ADDRESS) + npte |= PG_u; + else if (va < VM_MAX_ADDRESS) + npte |= (PG_u | PG_RW); /* XXXCDC: no longer needed? */ + if (pmap == pmap_kernel()) + npte |= pmap_pg_g; + + ptes[i386_btop(va)] = npte; /* zap! */ + + if ((opte & ~(PG_M|PG_U)) != npte && pmap_is_curpmap(pmap)) + pmap_update_pg(va); + + pmap_unmap_ptes(pmap); + PMAP_MAP_TO_HEAD_UNLOCK(); +} + +/* + * pmap_growkernel: increase usage of KVM space + * + * => we allocate new PTPs for the kernel and install them in all + * the pmaps on the system. + */ + +void pmap_growkernel(maxkvaddr) + +vm_offset_t maxkvaddr; + +{ + struct pmap *kpm = pmap_kernel(), *pm; + vm_offset_t needed_kpde; /* needed number of kernel PTPs */ + int s; + + needed_kpde = (maxkvaddr - VM_MIN_KERNEL_ADDRESS + (NBPD-1)) / NBPD; + if (needed_kpde <= nkpde) + return; /* we are OK */ + + /* + * whoops! we need to add kernel PTPs + */ + + s = splhigh(); /* to be safe */ + simple_lock(&kpm->pm_obj.vmobjlock); + + for (/*null*/ ; nkpde < needed_kpde ; nkpde++) { + + pmap_alloc_ptp(kpm, PDSLOT_KERN + nkpde, FALSE); + kpm->pm_pdir[PDSLOT_KERN + nkpde] &= ~PG_u; /* PG_u not for kernel */ + + /* distribute new kernel PTP to all active pmaps */ + simple_lock(&pmaps_lock); + for (pm = pmaps.lh_first ; pm != NULL ; pm = pm->pm_list.le_next) { + pm->pm_pdir[PDSLOT_KERN + nkpde] = kpm->pm_pdir[PDSLOT_KERN + nkpde]; + } + simple_unlock(&pmaps_lock); + } + + simple_unlock(&kpm->pm_obj.vmobjlock); + splx(s); +} + +#ifdef DEBUG +void pmap_dump __P((struct pmap *, vm_offset_t, vm_offset_t)); + +/* + * pmap_dump: dump all the mappings from a pmap + * + * => caller should not be holding any pmap locks + */ + +void pmap_dump(pmap, sva, eva) + +struct pmap *pmap; +vm_offset_t sva, eva; + +{ + pt_entry_t *ptes, *pte; + vm_offset_t blkendva; + + /* + * if end is out of range truncate. + * if (end == start) update to max. + */ + + if (eva > VM_MAXUSER_ADDRESS || eva <= sva) + eva = VM_MAXUSER_ADDRESS; + + /* + * we lock in the pmap => pv_head direction + */ + + PMAP_MAP_TO_HEAD_LOCK(); + ptes = pmap_map_ptes(pmap); /* locks pmap */ + + /* + * dumping a range of pages: we dump in PTP sized blocks (4MB) + */ + + for (/* null */ ; sva < eva ; sva = blkendva) { + + /* determine range of block */ + blkendva = i386_round_pdr(sva+1); + if (blkendva > eva) + blkendva = eva; + + if (!pmap_valid_entry(pmap->pm_pdir[pdei(sva)])) /* valid block? */ + continue; + + pte = &ptes[i386_btop(sva)]; + for (/* null */; sva < blkendva ; sva += NBPG, pte++) { + if (!pmap_valid_entry(*pte)) continue; + printf("va %#lx -> pa %#x (pte=%#x)\n", sva, *pte, *pte & PG_FRAME); + } + + } + + /* + * done! + */ + + pmap_unmap_ptes(pmap); + PMAP_MAP_TO_HEAD_UNLOCK(); + return; +} +#endif