f03010953f
from doc/BRANCHES: idle lwp, and some changes depending on it. 1. separate context switching and thread scheduling. (cf. gmcgarry_ctxsw) 2. implement idle lwp. 3. clean up related MD/MI interfaces. 4. make scheduler(s) modular.
4271 lines
104 KiB
C
4271 lines
104 KiB
C
/* $NetBSD: pmap.c,v 1.29 2007/05/17 14:51:36 yamt Exp $ */
|
|
/* NetBSD: pmap.c,v 1.179 2004/10/10 09:55:24 yamt Exp */
|
|
|
|
/*
|
|
*
|
|
* Copyright (c) 1997 Charles D. Cranor and Washington University.
|
|
* All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
* 3. All advertising materials mentioning features or use of this software
|
|
* must display the following acknowledgement:
|
|
* This product includes software developed by Charles D. Cranor and
|
|
* Washington University.
|
|
* 4. The name of the author may not be used to endorse or promote products
|
|
* derived from this software without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
|
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
|
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
|
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
|
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
|
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
/*
|
|
* pmap.c: i386 pmap module rewrite
|
|
* Chuck Cranor <chuck@ccrc.wustl.edu>
|
|
* 11-Aug-97
|
|
*
|
|
* history of this pmap module: in addition to my own input, i used
|
|
* the following references for this rewrite of the i386 pmap:
|
|
*
|
|
* [1] the NetBSD i386 pmap. this pmap appears to be based on the
|
|
* BSD hp300 pmap done by Mike Hibler at University of Utah.
|
|
* it was then ported to the i386 by William Jolitz of UUNET
|
|
* Technologies, Inc. Then Charles M. Hannum of the NetBSD
|
|
* project fixed some bugs and provided some speed ups.
|
|
*
|
|
* [2] the FreeBSD i386 pmap. this pmap seems to be the
|
|
* Hibler/Jolitz pmap, as modified for FreeBSD by John S. Dyson
|
|
* and David Greenman.
|
|
*
|
|
* [3] the Mach pmap. this pmap, from CMU, seems to have migrated
|
|
* between several processors. the VAX version was done by
|
|
* Avadis Tevanian, Jr., and Michael Wayne Young. the i386
|
|
* version was done by Lance Berc, Mike Kupfer, Bob Baron,
|
|
* David Golub, and Richard Draves. the alpha version was
|
|
* done by Alessandro Forin (CMU/Mach) and Chris Demetriou
|
|
* (NetBSD/alpha).
|
|
*/
|
|
|
|
#include <sys/cdefs.h>
|
|
__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.29 2007/05/17 14:51:36 yamt Exp $");
|
|
|
|
#include "opt_cputype.h"
|
|
#include "opt_user_ldt.h"
|
|
#include "opt_largepages.h"
|
|
#include "opt_lockdebug.h"
|
|
#include "opt_multiprocessor.h"
|
|
#include "opt_kstack_dr0.h"
|
|
#include "opt_xen.h"
|
|
|
|
#include <sys/param.h>
|
|
#include <sys/systm.h>
|
|
#include <sys/proc.h>
|
|
#include <sys/malloc.h>
|
|
#include <sys/pool.h>
|
|
#include <sys/user.h>
|
|
#include <sys/kernel.h>
|
|
|
|
#include <uvm/uvm.h>
|
|
|
|
#include <machine/atomic.h>
|
|
#include <machine/cpu.h>
|
|
#include <machine/specialreg.h>
|
|
#include <machine/gdt.h>
|
|
|
|
#include <dev/isa/isareg.h>
|
|
#include <machine/isa_machdep.h>
|
|
|
|
#include <machine/xen.h>
|
|
#include <machine/hypervisor.h>
|
|
#include <machine/xenpmap.h>
|
|
|
|
void xpmap_find_pte(paddr_t);
|
|
|
|
/* #define XENDEBUG */
|
|
|
|
#ifdef XENDEBUG
|
|
#define XENPRINTF(x) printf x
|
|
#define XENPRINTK(x) printf x
|
|
#else
|
|
#define XENPRINTF(x)
|
|
#define XENPRINTK(x)
|
|
#endif
|
|
#define PRINTF(x) printf x
|
|
#define PRINTK(x) printf x
|
|
|
|
|
|
/*
|
|
* general info:
|
|
*
|
|
* - for an explanation of how the i386 MMU hardware works see
|
|
* the comments in <machine/pte.h>.
|
|
*
|
|
* - for an explanation of the general memory structure used by
|
|
* this pmap (including the recursive mapping), see the comments
|
|
* in <machine/pmap.h>.
|
|
*
|
|
* this file contains the code for the "pmap module." the module's
|
|
* job is to manage the hardware's virtual to physical address mappings.
|
|
* note that there are two levels of mapping in the VM system:
|
|
*
|
|
* [1] the upper layer of the VM system uses vm_map's and vm_map_entry's
|
|
* to map ranges of virtual address space to objects/files. for
|
|
* example, the vm_map may say: "map VA 0x1000 to 0x22000 read-only
|
|
* to the file /bin/ls starting at offset zero." note that
|
|
* the upper layer mapping is not concerned with how individual
|
|
* vm_pages are mapped.
|
|
*
|
|
* [2] the lower layer of the VM system (the pmap) maintains the mappings
|
|
* from virtual addresses. it is concerned with which vm_page is
|
|
* mapped where. for example, when you run /bin/ls and start
|
|
* at page 0x1000 the fault routine may lookup the correct page
|
|
* of the /bin/ls file and then ask the pmap layer to establish
|
|
* a mapping for it.
|
|
*
|
|
* note that information in the lower layer of the VM system can be
|
|
* thrown away since it can easily be reconstructed from the info
|
|
* in the upper layer.
|
|
*
|
|
* data structures we use include:
|
|
*
|
|
* - struct pmap: describes the address space of one thread
|
|
* - struct pv_entry: describes one <PMAP,VA> mapping of a PA
|
|
* - struct pv_head: there is one pv_head per managed page of
|
|
* physical memory. the pv_head points to a list of pv_entry
|
|
* structures which describe all the <PMAP,VA> pairs that this
|
|
* page is mapped in. this is critical for page based operations
|
|
* such as pmap_page_protect() [change protection on _all_ mappings
|
|
* of a page]
|
|
* - pv_page/pv_page_info: pv_entry's are allocated out of pv_page's.
|
|
* if we run out of pv_entry's we allocate a new pv_page and free
|
|
* its pv_entrys.
|
|
*/
|
|
|
|
/*
|
|
* memory allocation
|
|
*
|
|
* - there are three data structures that we must dynamically allocate:
|
|
*
|
|
* [A] new process' page directory page (PDP)
|
|
* - plan 1: done at pmap_create() we use
|
|
* uvm_km_alloc(kernel_map, PAGE_SIZE) [fka kmem_alloc] to do this
|
|
* allocation.
|
|
*
|
|
* if we are low in free physical memory then we sleep in
|
|
* uvm_km_alloc -- in this case this is ok since we are creating
|
|
* a new pmap and should not be holding any locks.
|
|
*
|
|
* if the kernel is totally out of virtual space
|
|
* (i.e. uvm_km_alloc returns NULL), then we panic.
|
|
*
|
|
* XXX: the fork code currently has no way to return an "out of
|
|
* memory, try again" error code since uvm_fork [fka vm_fork]
|
|
* is a void function.
|
|
*
|
|
* [B] new page tables pages (PTP)
|
|
* - call uvm_pagealloc()
|
|
* => success: zero page, add to pm_pdir
|
|
* => failure: we are out of free vm_pages, let pmap_enter()
|
|
* tell UVM about it.
|
|
*
|
|
* note: for kernel PTPs, we start with NKPTP of them. as we map
|
|
* kernel memory (at uvm_map time) we check to see if we've grown
|
|
* the kernel pmap. if so, we call the optional function
|
|
* pmap_growkernel() to grow the kernel PTPs in advance.
|
|
*
|
|
* [C] pv_entry structures
|
|
* - plan 1: try to allocate one off the free list
|
|
* => success: done!
|
|
* => failure: no more free pv_entrys on the list
|
|
* - plan 2: try to allocate a new pv_page to add a chunk of
|
|
* pv_entrys to the free list
|
|
* [a] obtain a free, unmapped, VA in kmem_map. either
|
|
* we have one saved from a previous call, or we allocate
|
|
* one now using a "vm_map_lock_try" in uvm_map
|
|
* => success: we have an unmapped VA, continue to [b]
|
|
* => failure: unable to lock kmem_map or out of VA in it.
|
|
* move on to plan 3.
|
|
* [b] allocate a page in kmem_object for the VA
|
|
* => success: map it in, free the pv_entry's, DONE!
|
|
* => failure: kmem_object locked, no free vm_pages, etc.
|
|
* save VA for later call to [a], go to plan 3.
|
|
* If we fail, we simply let pmap_enter() tell UVM about it.
|
|
*/
|
|
|
|
/*
|
|
* locking
|
|
*
|
|
* we have the following locks that we must contend with:
|
|
*
|
|
* "normal" locks:
|
|
*
|
|
* - pmap_main_lock
|
|
* this lock is used to prevent deadlock and/or provide mutex
|
|
* access to the pmap system. most operations lock the pmap
|
|
* structure first, then they lock the pv_lists (if needed).
|
|
* however, some operations such as pmap_page_protect lock
|
|
* the pv_lists and then lock pmaps. in order to prevent a
|
|
* cycle, we require a mutex lock when locking the pv_lists
|
|
* first. thus, the "pmap = >pv_list" lockers must gain a
|
|
* read-lock on pmap_main_lock before locking the pmap. and
|
|
* the "pv_list => pmap" lockers must gain a write-lock on
|
|
* pmap_main_lock before locking. since only one thread
|
|
* can write-lock a lock at a time, this provides mutex.
|
|
*
|
|
* "simple" locks:
|
|
*
|
|
* - pmap lock (per pmap, part of uvm_object)
|
|
* this lock protects the fields in the pmap structure including
|
|
* the non-kernel PDEs in the PDP, and the PTEs. it also locks
|
|
* in the alternate PTE space (since that is determined by the
|
|
* entry in the PDP).
|
|
*
|
|
* - pvh_lock (per pv_head)
|
|
* this lock protects the pv_entry list which is chained off the
|
|
* pv_head structure for a specific managed PA. it is locked
|
|
* when traversing the list (e.g. adding/removing mappings,
|
|
* syncing R/M bits, etc.)
|
|
*
|
|
* - pvalloc_lock
|
|
* this lock protects the data structures which are used to manage
|
|
* the free list of pv_entry structures.
|
|
*
|
|
* - pmaps_lock
|
|
* this lock protects the list of active pmaps (headed by "pmaps").
|
|
* we lock it when adding or removing pmaps from this list.
|
|
*
|
|
*/
|
|
|
|
/*
|
|
* locking data structures
|
|
*/
|
|
|
|
static struct simplelock pvalloc_lock;
|
|
static struct simplelock pmaps_lock;
|
|
|
|
#if defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
|
|
static struct lock pmap_main_lock;
|
|
|
|
#define PMAP_MAP_TO_HEAD_LOCK() \
|
|
(void) spinlockmgr(&pmap_main_lock, LK_SHARED, NULL)
|
|
#define PMAP_MAP_TO_HEAD_UNLOCK() \
|
|
(void) spinlockmgr(&pmap_main_lock, LK_RELEASE, NULL)
|
|
|
|
#define PMAP_HEAD_TO_MAP_LOCK() \
|
|
(void) spinlockmgr(&pmap_main_lock, LK_EXCLUSIVE, NULL)
|
|
#define PMAP_HEAD_TO_MAP_UNLOCK() \
|
|
spinlockmgr(&pmap_main_lock, LK_RELEASE, (void *) 0)
|
|
|
|
#else
|
|
|
|
#define PMAP_MAP_TO_HEAD_LOCK() /* null */
|
|
#define PMAP_MAP_TO_HEAD_UNLOCK() /* null */
|
|
|
|
#define PMAP_HEAD_TO_MAP_LOCK() /* null */
|
|
#define PMAP_HEAD_TO_MAP_UNLOCK() /* null */
|
|
|
|
#endif
|
|
|
|
#define COUNT(x) /* nothing */
|
|
|
|
/*
|
|
* TLB Shootdown:
|
|
*
|
|
* When a mapping is changed in a pmap, the TLB entry corresponding to
|
|
* the virtual address must be invalidated on all processors. In order
|
|
* to accomplish this on systems with multiple processors, messages are
|
|
* sent from the processor which performs the mapping change to all
|
|
* processors on which the pmap is active. For other processors, the
|
|
* ASN generation numbers for that processor is invalidated, so that
|
|
* the next time the pmap is activated on that processor, a new ASN
|
|
* will be allocated (which implicitly invalidates all TLB entries).
|
|
*
|
|
* Shootdown job queue entries are allocated using a simple special-
|
|
* purpose allocator for speed.
|
|
*/
|
|
struct pmap_tlb_shootdown_job {
|
|
TAILQ_ENTRY(pmap_tlb_shootdown_job) pj_list;
|
|
vaddr_t pj_va; /* virtual address */
|
|
pmap_t pj_pmap; /* the pmap which maps the address */
|
|
pt_entry_t pj_pte; /* the PTE bits */
|
|
struct pmap_tlb_shootdown_job *pj_nextfree;
|
|
};
|
|
|
|
#define PMAP_TLB_SHOOTDOWN_JOB_ALIGN 32
|
|
union pmap_tlb_shootdown_job_al {
|
|
struct pmap_tlb_shootdown_job pja_job;
|
|
char pja_align[PMAP_TLB_SHOOTDOWN_JOB_ALIGN];
|
|
};
|
|
|
|
struct pmap_tlb_shootdown_q {
|
|
TAILQ_HEAD(, pmap_tlb_shootdown_job) pq_head;
|
|
int pq_pte; /* aggregate PTE bits */
|
|
int pq_count; /* number of pending requests */
|
|
__cpu_simple_lock_t pq_slock; /* spin lock on queue */
|
|
int pq_flushg; /* pending flush global */
|
|
int pq_flushu; /* pending flush user */
|
|
} pmap_tlb_shootdown_q[X86_MAXPROCS];
|
|
|
|
#define PMAP_TLB_MAXJOBS 16
|
|
|
|
void pmap_tlb_shootdown_q_drain(struct pmap_tlb_shootdown_q *);
|
|
struct pmap_tlb_shootdown_job *pmap_tlb_shootdown_job_get
|
|
(struct pmap_tlb_shootdown_q *);
|
|
void pmap_tlb_shootdown_job_put(struct pmap_tlb_shootdown_q *,
|
|
struct pmap_tlb_shootdown_job *);
|
|
|
|
__cpu_simple_lock_t pmap_tlb_shootdown_job_lock;
|
|
union pmap_tlb_shootdown_job_al *pj_page, *pj_free;
|
|
|
|
/*
|
|
* global data structures
|
|
*/
|
|
|
|
struct pmap kernel_pmap_store; /* the kernel's pmap (proc0) */
|
|
|
|
/*
|
|
* nkpde is the number of kernel PTPs allocated for the kernel at
|
|
* boot time (NKPTP is a compile time override). this number can
|
|
* grow dynamically as needed (but once allocated, we never free
|
|
* kernel PTPs).
|
|
*/
|
|
|
|
int nkpde = NKPTP;
|
|
#ifdef NKPDE
|
|
#error "obsolete NKPDE: use NKPTP"
|
|
#endif
|
|
|
|
/*
|
|
* pmap_pg_g: if our processor supports PG_G in the PTE then we
|
|
* set pmap_pg_g to PG_G (otherwise it is zero).
|
|
*/
|
|
|
|
int pmap_pg_g = 0;
|
|
|
|
#ifdef LARGEPAGES
|
|
/*
|
|
* pmap_largepages: if our processor supports PG_PS and we are
|
|
* using it, this is set to true.
|
|
*/
|
|
|
|
int pmap_largepages;
|
|
#endif
|
|
|
|
/*
|
|
* i386 physical memory comes in a big contig chunk with a small
|
|
* hole toward the front of it... the following two paddr_t's
|
|
* (shared with machdep.c) describe the physical address space
|
|
* of this machine.
|
|
*/
|
|
paddr_t avail_start; /* PA of first available physical page */
|
|
paddr_t avail_end; /* PA of last available physical page */
|
|
|
|
paddr_t pmap_pa_start; /* PA of first physical page for this domain */
|
|
paddr_t pmap_pa_end; /* PA of last physical page for this domain */
|
|
|
|
/* MA of last physical page of the machine */
|
|
paddr_t pmap_mem_end = HYPERVISOR_VIRT_START; /* updated for domain-0 */
|
|
|
|
/*
|
|
* other data structures
|
|
*/
|
|
|
|
static pt_entry_t protection_codes[8]; /* maps MI prot to i386 prot code */
|
|
static bool pmap_initialized = false; /* pmap_init done yet? */
|
|
|
|
/*
|
|
* the following two vaddr_t's are used during system startup
|
|
* to keep track of how much of the kernel's VM space we have used.
|
|
* once the system is started, the management of the remaining kernel
|
|
* VM space is turned over to the kernel_map vm_map.
|
|
*/
|
|
|
|
static vaddr_t virtual_avail; /* VA of first free KVA */
|
|
static vaddr_t virtual_end; /* VA of last free KVA */
|
|
|
|
|
|
/*
|
|
* pv_page management structures: locked by pvalloc_lock
|
|
*/
|
|
|
|
TAILQ_HEAD(pv_pagelist, pv_page);
|
|
static struct pv_pagelist pv_freepages; /* list of pv_pages with free entrys */
|
|
static struct pv_pagelist pv_unusedpgs; /* list of unused pv_pages */
|
|
static int pv_nfpvents; /* # of free pv entries */
|
|
|
|
#define PVE_LOWAT (PVE_PER_PVPAGE / 2) /* free pv_entry low water mark */
|
|
#define PVE_HIWAT (PVE_LOWAT + (PVE_PER_PVPAGE * 2))
|
|
/* high water mark */
|
|
|
|
static inline int
|
|
pv_compare(struct pv_entry *a, struct pv_entry *b)
|
|
{
|
|
if (a->pv_pmap < b->pv_pmap)
|
|
return (-1);
|
|
else if (a->pv_pmap > b->pv_pmap)
|
|
return (1);
|
|
else if (a->pv_va < b->pv_va)
|
|
return (-1);
|
|
else if (a->pv_va > b->pv_va)
|
|
return (1);
|
|
else
|
|
return (0);
|
|
}
|
|
|
|
SPLAY_PROTOTYPE(pvtree, pv_entry, pv_node, pv_compare);
|
|
SPLAY_GENERATE(pvtree, pv_entry, pv_node, pv_compare);
|
|
|
|
/*
|
|
* linked list of all non-kernel pmaps
|
|
*/
|
|
|
|
static struct pmap_head pmaps;
|
|
|
|
/*
|
|
* pool that pmap structures are allocated from
|
|
*/
|
|
|
|
struct pool pmap_pmap_pool;
|
|
|
|
/*
|
|
* MULTIPROCESSOR: special VA's/ PTE's are actually allocated inside a
|
|
* X86_MAXPROCS*NPTECL array of PTE's, to avoid cache line thrashing
|
|
* due to false sharing.
|
|
*/
|
|
|
|
#ifdef MULTIPROCESSOR
|
|
#define PTESLEW(pte, id) ((pte)+(id)*NPTECL)
|
|
#define VASLEW(va,id) ((va)+(id)*NPTECL*PAGE_SIZE)
|
|
#else
|
|
#define PTESLEW(pte, id) (pte)
|
|
#define VASLEW(va,id) (va)
|
|
#endif
|
|
|
|
/*
|
|
* special VAs and the PTEs that map them
|
|
*/
|
|
static pt_entry_t *csrc_pte, *cdst_pte, *zero_pte, *ptp_pte;
|
|
static void *csrcp, *cdstp, *zerop, *ptpp;
|
|
|
|
/*
|
|
* pool and cache that PDPs are allocated from
|
|
*/
|
|
|
|
struct pool pmap_pdp_pool;
|
|
struct pool_cache pmap_pdp_cache;
|
|
u_int pmap_pdp_cache_generation;
|
|
|
|
int pmap_pdp_ctor(void *, void *, int);
|
|
void pmap_pdp_dtor(void *, void *);
|
|
|
|
void *vmmap; /* XXX: used by mem.c... it should really uvm_map_reserve it */
|
|
|
|
extern vaddr_t idt_vaddr; /* we allocate IDT early */
|
|
extern paddr_t idt_paddr;
|
|
|
|
#if defined(I586_CPU)
|
|
/* stuff to fix the pentium f00f bug */
|
|
extern vaddr_t pentium_idt_vaddr;
|
|
#endif
|
|
|
|
|
|
/*
|
|
* local prototypes
|
|
*/
|
|
|
|
static struct pv_entry *pmap_add_pvpage(struct pv_page *, bool);
|
|
static struct vm_page *pmap_alloc_ptp(struct pmap *, int);
|
|
static struct pv_entry *pmap_alloc_pv(struct pmap *, int); /* see codes below */
|
|
#define ALLOCPV_NEED 0 /* need PV now */
|
|
#define ALLOCPV_TRY 1 /* just try to allocate, don't steal */
|
|
#define ALLOCPV_NONEED 2 /* don't need PV, just growing cache */
|
|
static struct pv_entry *pmap_alloc_pvpage(struct pmap *, int);
|
|
static void pmap_enter_pv(struct pv_head *,
|
|
struct pv_entry *, struct pmap *,
|
|
vaddr_t, struct vm_page *);
|
|
static void pmap_free_pv(struct pmap *, struct pv_entry *);
|
|
static void pmap_free_pvs(struct pmap *, struct pv_entry *);
|
|
static void pmap_free_pv_doit(struct pv_entry *);
|
|
static void pmap_free_pvpage(void);
|
|
static struct vm_page *pmap_get_ptp(struct pmap *, int);
|
|
static bool pmap_is_curpmap(struct pmap *);
|
|
static bool pmap_is_active(struct pmap *, int);
|
|
static pt_entry_t *pmap_map_ptes(struct pmap *);
|
|
static struct pv_entry *pmap_remove_pv(struct pv_head *, struct pmap *,
|
|
vaddr_t);
|
|
static void pmap_do_remove(struct pmap *, vaddr_t, vaddr_t, int);
|
|
static bool pmap_remove_pte(struct pmap *, struct vm_page *,
|
|
pt_entry_t *, vaddr_t, int32_t *, int);
|
|
static void pmap_remove_ptes(struct pmap *, struct vm_page *,
|
|
vaddr_t, vaddr_t, vaddr_t, int32_t *,
|
|
int);
|
|
#define PMAP_REMOVE_ALL 0 /* remove all mappings */
|
|
#define PMAP_REMOVE_SKIPWIRED 1 /* skip wired mappings */
|
|
|
|
static void pmap_unmap_ptes(struct pmap *);
|
|
|
|
static bool pmap_reactivate(struct pmap *);
|
|
|
|
#ifdef DEBUG
|
|
u_int curapdp;
|
|
#endif
|
|
|
|
/*
|
|
* p m a p i n l i n e h e l p e r f u n c t i o n s
|
|
*/
|
|
|
|
/*
|
|
* pmap_is_curpmap: is this pmap the one currently loaded [in %cr3]?
|
|
* of course the kernel is always loaded
|
|
*/
|
|
|
|
inline static bool
|
|
pmap_is_curpmap(pmap)
|
|
struct pmap *pmap;
|
|
{
|
|
|
|
return((pmap == pmap_kernel()) ||
|
|
(pmap == curcpu()->ci_pmap));
|
|
}
|
|
|
|
/*
|
|
* pmap_is_active: is this pmap loaded into the specified processor's %cr3?
|
|
*/
|
|
|
|
inline static bool
|
|
pmap_is_active(pmap, cpu_id)
|
|
struct pmap *pmap;
|
|
int cpu_id;
|
|
{
|
|
|
|
return (pmap == pmap_kernel() ||
|
|
(pmap->pm_cpus & (1U << cpu_id)) != 0);
|
|
}
|
|
|
|
|
|
inline static void
|
|
pmap_apte_flush(struct pmap *pmap)
|
|
{
|
|
#if defined(MULTIPROCESSOR)
|
|
struct pmap_tlb_shootdown_q *pq;
|
|
struct cpu_info *ci, *self = curcpu();
|
|
CPU_INFO_ITERATOR cii;
|
|
int s;
|
|
#endif
|
|
|
|
tlbflush(); /* flush TLB on current processor */
|
|
#if defined(MULTIPROCESSOR)
|
|
/*
|
|
* Flush the APTE mapping from all other CPUs that
|
|
* are using the pmap we are using (who's APTE space
|
|
* is the one we've just modified).
|
|
*
|
|
* XXXthorpej -- find a way to defer the IPI.
|
|
*/
|
|
for (CPU_INFO_FOREACH(cii, ci)) {
|
|
if (ci == self)
|
|
continue;
|
|
if (pmap_is_active(pmap, ci->ci_cpuid)) {
|
|
pq = &pmap_tlb_shootdown_q[ci->ci_cpuid];
|
|
s = splipi();
|
|
__cpu_simple_lock(&pq->pq_slock);
|
|
pq->pq_flushu++;
|
|
__cpu_simple_unlock(&pq->pq_slock);
|
|
splx(s);
|
|
x86_send_ipi(ci, X86_IPI_TLB);
|
|
}
|
|
}
|
|
#endif
|
|
}
|
|
|
|
/*
|
|
* pmap_map_ptes: map a pmap's PTEs into KVM and lock them in
|
|
*
|
|
* => we lock enough pmaps to keep things locked in
|
|
* => must be undone with pmap_unmap_ptes before returning
|
|
*/
|
|
|
|
inline static pt_entry_t *
|
|
pmap_map_ptes(pmap)
|
|
struct pmap *pmap;
|
|
{
|
|
pd_entry_t opde;
|
|
pd_entry_t *mapdp;
|
|
struct pmap *ourpmap;
|
|
struct cpu_info *ci;
|
|
|
|
/* the kernel's pmap is always accessible */
|
|
if (pmap == pmap_kernel()) {
|
|
return(PTE_BASE);
|
|
}
|
|
|
|
ci = curcpu();
|
|
if (ci->ci_want_pmapload &&
|
|
vm_map_pmap(&ci->ci_curlwp->l_proc->p_vmspace->vm_map) == pmap)
|
|
pmap_load();
|
|
|
|
/* if curpmap then we are always mapped */
|
|
if (pmap_is_curpmap(pmap)) {
|
|
simple_lock(&pmap->pm_obj.vmobjlock);
|
|
return(PTE_BASE);
|
|
}
|
|
|
|
ourpmap = ci->ci_pmap;
|
|
|
|
/* need to lock both curpmap and pmap: use ordered locking */
|
|
if ((unsigned) pmap < (unsigned) ourpmap) {
|
|
simple_lock(&pmap->pm_obj.vmobjlock);
|
|
simple_lock(&ourpmap->pm_obj.vmobjlock);
|
|
} else {
|
|
simple_lock(&ourpmap->pm_obj.vmobjlock);
|
|
simple_lock(&pmap->pm_obj.vmobjlock);
|
|
}
|
|
|
|
/* need to load a new alternate pt space into curpmap? */
|
|
COUNT(apdp_pde_map);
|
|
opde = PDE_GET(APDP_PDE);
|
|
if (!pmap_valid_entry(opde) || (opde & PG_FRAME) != pmap->pm_pdirpa) {
|
|
XENPRINTF(("APDP_PDE %p %p/%p set %p/%p\n",
|
|
pmap,
|
|
(void *)vtophys((vaddr_t)APDP_PDE),
|
|
(void *)xpmap_ptom(vtophys((vaddr_t)APDP_PDE)),
|
|
(void *)pmap->pm_pdirpa,
|
|
(void *)xpmap_ptom(pmap->pm_pdirpa)));
|
|
mapdp = (pt_entry_t *)vtomach((vaddr_t)APDP_PDE);
|
|
PDE_SET(APDP_PDE, mapdp, pmap->pm_pdirpa /* | PG_RW */ | PG_V);
|
|
#ifdef DEBUG
|
|
curapdp = pmap->pm_pdirpa;
|
|
#endif
|
|
if (pmap_valid_entry(opde))
|
|
pmap_apte_flush(ourpmap);
|
|
XENPRINTF(("APDP_PDE set done\n"));
|
|
}
|
|
return(APTE_BASE);
|
|
}
|
|
|
|
/*
|
|
* pmap_unmap_ptes: unlock the PTE mapping of "pmap"
|
|
*/
|
|
|
|
inline static void
|
|
pmap_unmap_ptes(pmap)
|
|
struct pmap *pmap;
|
|
{
|
|
#if defined(MULTIPROCESSOR)
|
|
pd_entry_t *mapdp;
|
|
#endif
|
|
|
|
if (pmap == pmap_kernel()) {
|
|
return;
|
|
}
|
|
if (pmap_is_curpmap(pmap)) {
|
|
simple_unlock(&pmap->pm_obj.vmobjlock);
|
|
} else {
|
|
struct pmap *ourpmap = curcpu()->ci_pmap;
|
|
|
|
#if defined(MULTIPROCESSOR)
|
|
mapdp = (pt_entry_t *)vtomach((vaddr_t)APDP_PDE);
|
|
PDE_CLEAR(APDP_PDE, mapdp);
|
|
pmap_apte_flush(ourpmap);
|
|
#endif
|
|
#ifdef DEBUG
|
|
curapdp = 0;
|
|
#endif
|
|
XENPRINTF(("APDP_PDE clear %p/%p set %p/%p\n",
|
|
(void *)vtophys((vaddr_t)APDP_PDE),
|
|
(void *)xpmap_ptom(vtophys((vaddr_t)APDP_PDE)),
|
|
(void *)pmap->pm_pdirpa,
|
|
(void *)xpmap_ptom(pmap->pm_pdirpa)));
|
|
COUNT(apdp_pde_unmap);
|
|
simple_unlock(&pmap->pm_obj.vmobjlock);
|
|
simple_unlock(&ourpmap->pm_obj.vmobjlock);
|
|
}
|
|
}
|
|
|
|
inline static void
|
|
pmap_exec_account(struct pmap *pm, vaddr_t va, pt_entry_t opte, pt_entry_t npte)
|
|
{
|
|
if (curproc == NULL || curproc->p_vmspace == NULL ||
|
|
pm != vm_map_pmap(&curproc->p_vmspace->vm_map))
|
|
return;
|
|
|
|
if ((opte ^ npte) & PG_X)
|
|
pmap_update_pg(va);
|
|
|
|
/*
|
|
* Executability was removed on the last executable change.
|
|
* Reset the code segment to something conservative and
|
|
* let the trap handler deal with setting the right limit.
|
|
* We can't do that because of locking constraints on the vm map.
|
|
*/
|
|
|
|
if ((opte & PG_X) && (npte & PG_X) == 0 && va == pm->pm_hiexec) {
|
|
struct trapframe *tf = curlwp->l_md.md_regs;
|
|
struct pcb *pcb = &curlwp->l_addr->u_pcb;
|
|
|
|
pcb->pcb_cs = tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
|
|
pm->pm_hiexec = I386_MAX_EXE_ADDR;
|
|
}
|
|
}
|
|
|
|
inline static pt_entry_t
|
|
pte_mtop(pt_entry_t pte)
|
|
{
|
|
pt_entry_t ppte;
|
|
|
|
KDASSERT(pmap_valid_entry(pte));
|
|
ppte = xpmap_mtop(pte);
|
|
if ((ppte & PG_FRAME) == XPMAP_OFFSET) {
|
|
XENPRINTF(("pte_mtop: null page %08x -> %08x\n",
|
|
ppte, pte));
|
|
ppte = pte;
|
|
}
|
|
|
|
return ppte;
|
|
}
|
|
|
|
inline static pt_entry_t
|
|
pte_get_ma(pt_entry_t *pte)
|
|
{
|
|
|
|
return *pte;
|
|
}
|
|
|
|
inline static pt_entry_t
|
|
pte_get(pt_entry_t *pte)
|
|
{
|
|
|
|
if (pmap_valid_entry(*pte))
|
|
return pte_mtop(*pte);
|
|
return *pte;
|
|
}
|
|
|
|
inline static pt_entry_t
|
|
pte_atomic_update_ma(pt_entry_t *pte, pt_entry_t *mapte, pt_entry_t npte)
|
|
{
|
|
pt_entry_t opte;
|
|
int s = splvm();
|
|
|
|
XENPRINTK(("pte_atomic_update_ma pte %p mapte %p npte %08x\n",
|
|
pte, mapte, npte));
|
|
opte = PTE_GET_MA(pte);
|
|
if (opte > pmap_mem_end) {
|
|
/* must remove opte unchecked */
|
|
if (npte > pmap_mem_end)
|
|
/* must set npte unchecked */
|
|
xpq_queue_unchecked_pte_update(mapte, npte);
|
|
else {
|
|
/* must set npte checked */
|
|
xpq_queue_unchecked_pte_update(mapte, 0);
|
|
xpq_queue_pte_update(mapte, npte);
|
|
}
|
|
} else {
|
|
/* must remove opte checked */
|
|
if (npte > pmap_mem_end) {
|
|
/* must set npte unchecked */
|
|
xpq_queue_pte_update(mapte, 0);
|
|
xpq_queue_unchecked_pte_update(mapte, npte);
|
|
} else
|
|
/* must set npte checked */
|
|
xpq_queue_pte_update(mapte, npte);
|
|
}
|
|
xpq_flush_queue();
|
|
splx(s);
|
|
|
|
return opte;
|
|
}
|
|
|
|
static inline int
|
|
pte_atomic_update_ma_domid(pt_entry_t *pte, pt_entry_t npte, pt_entry_t *opte,
|
|
int domid)
|
|
{
|
|
pt_entry_t *maptp = (pt_entry_t *)vtomach((vaddr_t)pte);
|
|
int error;
|
|
|
|
if (domid == DOMID_SELF) {
|
|
*opte = pte_atomic_update_ma(pte, maptp, npte);
|
|
error = 0;
|
|
} else {
|
|
/* XXX */
|
|
*opte = PTE_GET_MA(pte);
|
|
error = xpq_update_foreign(maptp, npte, domid);
|
|
}
|
|
|
|
return error;
|
|
}
|
|
|
|
inline static pt_entry_t
|
|
pte_atomic_update(pt_entry_t *pte, pt_entry_t *mapte, pt_entry_t npte)
|
|
{
|
|
pt_entry_t opte;
|
|
|
|
opte = pte_atomic_update_ma(pte, mapte, npte);
|
|
|
|
return pte_mtop(opte);
|
|
}
|
|
|
|
/*
|
|
* Fixup the code segment to cover all potential executable mappings.
|
|
* returns 0 if no changes to the code segment were made.
|
|
*/
|
|
|
|
int
|
|
pmap_exec_fixup(struct vm_map *map, struct trapframe *tf, struct pcb *pcb)
|
|
{
|
|
struct vm_map_entry *ent;
|
|
struct pmap *pm = vm_map_pmap(map);
|
|
vaddr_t va = 0;
|
|
|
|
vm_map_lock_read(map);
|
|
for (ent = (&map->header)->next; ent != &map->header; ent = ent->next) {
|
|
|
|
/*
|
|
* This entry has greater va than the entries before.
|
|
* We need to make it point to the last page, not past it.
|
|
*/
|
|
|
|
if (ent->protection & VM_PROT_EXECUTE)
|
|
va = trunc_page(ent->end) - PAGE_SIZE;
|
|
}
|
|
vm_map_unlock_read(map);
|
|
if (va == pm->pm_hiexec && tf->tf_cs == GSEL(GUCODEBIG_SEL, SEL_UPL))
|
|
return (0);
|
|
|
|
pm->pm_hiexec = va;
|
|
if (pm->pm_hiexec > I386_MAX_EXE_ADDR) {
|
|
pcb->pcb_cs = tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL);
|
|
} else {
|
|
pcb->pcb_cs = tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
|
|
return (0);
|
|
}
|
|
return (1);
|
|
}
|
|
|
|
/*
|
|
* p m a p k e n t e r f u n c t i o n s
|
|
*
|
|
* functions to quickly enter/remove pages from the kernel address
|
|
* space. pmap_kremove is exported to MI kernel. we make use of
|
|
* the recursive PTE mappings.
|
|
*/
|
|
|
|
/*
|
|
* pmap_kenter_pa: enter a kernel mapping without R/M (pv_entry) tracking
|
|
*
|
|
* => no need to lock anything, assume va is already allocated
|
|
* => should be faster than normal pmap enter function
|
|
*/
|
|
|
|
void
|
|
pmap_kenter_pa(va, pa, prot)
|
|
vaddr_t va;
|
|
paddr_t pa;
|
|
vm_prot_t prot;
|
|
{
|
|
pt_entry_t *pte, opte, npte;
|
|
pt_entry_t *maptp;
|
|
|
|
if (va < VM_MIN_KERNEL_ADDRESS)
|
|
pte = vtopte(va);
|
|
else
|
|
pte = kvtopte(va);
|
|
|
|
npte = ((prot & VM_PROT_WRITE) ? PG_RW : PG_RO) |
|
|
PG_V | pmap_pg_g;
|
|
|
|
if (pa >= pmap_pa_start && pa < pmap_pa_end) {
|
|
npte |= xpmap_ptom(pa);
|
|
} else {
|
|
XENPRINTF(("pmap_kenter: va %08lx outside pa range %08lx\n",
|
|
va, pa));
|
|
npte |= pa;
|
|
}
|
|
|
|
maptp = (pt_entry_t *)vtomach((vaddr_t)pte);
|
|
opte = pte_atomic_update_ma(pte, maptp, npte); /* zap! */
|
|
XENPRINTK(("pmap_kenter_pa(%p,%p) %p, was %08x now %08x\n", (void *)va,
|
|
(void *)pa, pte, opte, npte));
|
|
#ifdef LARGEPAGES
|
|
/* XXX For now... */
|
|
if (opte & PG_PS)
|
|
panic("pmap_kenter_pa: PG_PS");
|
|
#endif
|
|
if ((opte & (PG_V | PG_U)) == (PG_V | PG_U)) {
|
|
#if defined(MULTIPROCESSOR)
|
|
int32_t cpumask = 0;
|
|
|
|
pmap_tlb_shootdown(pmap_kernel(), va, opte, &cpumask);
|
|
pmap_tlb_shootnow(cpumask);
|
|
#else
|
|
/* Don't bother deferring in the single CPU case. */
|
|
pmap_update_pg(va);
|
|
#endif
|
|
}
|
|
}
|
|
|
|
/*
|
|
* pmap_kenter_ma: enter a kernel mapping without R/M (pv_entry) tracking
|
|
*
|
|
* => no need to lock anything, assume va is already allocated
|
|
* => should be faster than normal pmap enter function
|
|
*/
|
|
|
|
void pmap_kenter_ma __P((vaddr_t, paddr_t, vm_prot_t));
|
|
|
|
void
|
|
pmap_kenter_ma(va, ma, prot)
|
|
vaddr_t va;
|
|
paddr_t ma;
|
|
vm_prot_t prot;
|
|
{
|
|
pt_entry_t *pte, opte, npte;
|
|
pt_entry_t *maptp;
|
|
|
|
KASSERT (va >= VM_MIN_KERNEL_ADDRESS);
|
|
pte = kvtopte(va);
|
|
|
|
npte = ma | ((prot & VM_PROT_WRITE) ? PG_RW : PG_RO) |
|
|
PG_V | pmap_pg_g;
|
|
|
|
maptp = (pt_entry_t *)vtomach((vaddr_t)pte);
|
|
opte = pte_atomic_update_ma(pte, maptp, npte); /* zap! */
|
|
XENPRINTK(("pmap_kenter_ma(%p,%p) %p, was %08x\n", (void *)va,
|
|
(void *)ma, pte, opte));
|
|
#ifdef LARGEPAGES
|
|
/* XXX For now... */
|
|
if (opte & PG_PS)
|
|
panic("pmap_kenter_ma: PG_PS");
|
|
#endif
|
|
if ((opte & (PG_V | PG_U)) == (PG_V | PG_U)) {
|
|
#if defined(MULTIPROCESSOR)
|
|
int32_t cpumask = 0;
|
|
|
|
pmap_tlb_shootdown(pmap_kernel(), va, opte, &cpumask);
|
|
pmap_tlb_shootnow(cpumask);
|
|
#else
|
|
/* Don't bother deferring in the single CPU case. */
|
|
pmap_update_pg(va);
|
|
#endif
|
|
}
|
|
}
|
|
|
|
/*
|
|
* pmap_kremove: remove a kernel mapping(s) without R/M (pv_entry) tracking
|
|
*
|
|
* => no need to lock anything
|
|
* => caller must dispose of any vm_page mapped in the va range
|
|
* => note: not an inline function
|
|
* => we assume the va is page aligned and the len is a multiple of PAGE_SIZE
|
|
* => we assume kernel only unmaps valid addresses and thus don't bother
|
|
* checking the valid bit before doing TLB flushing
|
|
*/
|
|
|
|
void
|
|
pmap_kremove(va, len)
|
|
vaddr_t va;
|
|
vsize_t len;
|
|
{
|
|
pt_entry_t *pte, opte;
|
|
pt_entry_t *maptp;
|
|
int32_t cpumask = 0;
|
|
|
|
XENPRINTK(("pmap_kremove va %p, len %08lx\n", (void *)va, len));
|
|
len >>= PAGE_SHIFT;
|
|
for ( /* null */ ; len ; len--, va += PAGE_SIZE) {
|
|
if (va < VM_MIN_KERNEL_ADDRESS)
|
|
pte = vtopte(va);
|
|
else
|
|
pte = kvtopte(va);
|
|
maptp = (pt_entry_t *)vtomach((vaddr_t)pte);
|
|
opte = pte_atomic_update_ma(pte, maptp, 0); /* zap! */
|
|
XENPRINTK(("pmap_kremove pte %p, was %08x\n", pte, opte));
|
|
#ifdef LARGEPAGES
|
|
/* XXX For now... */
|
|
if (opte & PG_PS)
|
|
panic("pmap_kremove: PG_PS");
|
|
#endif
|
|
#ifdef DIAGNOSTIC
|
|
if (opte & PG_PVLIST)
|
|
panic("pmap_kremove: PG_PVLIST mapping for 0x%lx",
|
|
va);
|
|
#endif
|
|
if ((opte & (PG_V | PG_U)) == (PG_V | PG_U))
|
|
pmap_tlb_shootdown(pmap_kernel(), va, opte, &cpumask);
|
|
}
|
|
pmap_tlb_shootnow(cpumask);
|
|
}
|
|
|
|
/*
|
|
* p m a p i n i t f u n c t i o n s
|
|
*
|
|
* pmap_bootstrap and pmap_init are called during system startup
|
|
* to init the pmap module. pmap_bootstrap() does a low level
|
|
* init just to get things rolling. pmap_init() finishes the job.
|
|
*/
|
|
|
|
/*
|
|
* pmap_bootstrap: get the system in a state where it can run with VM
|
|
* properly enabled (called before main()). the VM system is
|
|
* fully init'd later...
|
|
*
|
|
* => on i386, locore.s has already enabled the MMU by allocating
|
|
* a PDP for the kernel, and nkpde PTP's for the kernel.
|
|
* => kva_start is the first free virtual address in kernel space
|
|
*/
|
|
|
|
void
|
|
pmap_bootstrap(kva_start)
|
|
vaddr_t kva_start;
|
|
{
|
|
struct pmap *kpm;
|
|
vaddr_t kva;
|
|
pt_entry_t *pte;
|
|
pt_entry_t *maptp;
|
|
int i;
|
|
|
|
/*
|
|
* set up our local static global vars that keep track of the
|
|
* usage of KVM before kernel_map is set up
|
|
*/
|
|
|
|
virtual_avail = kva_start; /* first free KVA */
|
|
virtual_end = VM_MAX_KERNEL_ADDRESS; /* last KVA */
|
|
|
|
/*
|
|
* find out where physical memory ends on the real hardware.
|
|
*/
|
|
|
|
if (xen_start_info.flags & SIF_PRIVILEGED)
|
|
pmap_mem_end = find_pmap_mem_end(kva_start);
|
|
|
|
/*
|
|
* set up protection_codes: we need to be able to convert from
|
|
* a MI protection code (some combo of VM_PROT...) to something
|
|
* we can jam into a i386 PTE.
|
|
*/
|
|
|
|
protection_codes[VM_PROT_NONE] = 0; /* --- */
|
|
protection_codes[VM_PROT_EXECUTE] = PG_X; /* --x */
|
|
protection_codes[VM_PROT_READ] = PG_RO; /* -r- */
|
|
protection_codes[VM_PROT_READ|VM_PROT_EXECUTE] = PG_RO|PG_X;/* -rx */
|
|
protection_codes[VM_PROT_WRITE] = PG_RW; /* w-- */
|
|
protection_codes[VM_PROT_WRITE|VM_PROT_EXECUTE] = PG_RW|PG_X;/* w-x */
|
|
protection_codes[VM_PROT_WRITE|VM_PROT_READ] = PG_RW; /* wr- */
|
|
protection_codes[VM_PROT_ALL] = PG_RW|PG_X; /* wrx */
|
|
|
|
/*
|
|
* now we init the kernel's pmap
|
|
*
|
|
* the kernel pmap's pm_obj is not used for much. however, in
|
|
* user pmaps the pm_obj contains the list of active PTPs.
|
|
* the pm_obj currently does not have a pager. it might be possible
|
|
* to add a pager that would allow a process to read-only mmap its
|
|
* own page tables (fast user level vtophys?). this may or may not
|
|
* be useful.
|
|
*/
|
|
|
|
kpm = pmap_kernel();
|
|
simple_lock_init(&kpm->pm_obj.vmobjlock);
|
|
kpm->pm_obj.pgops = NULL;
|
|
TAILQ_INIT(&kpm->pm_obj.memq);
|
|
kpm->pm_obj.uo_npages = 0;
|
|
kpm->pm_obj.uo_refs = 1;
|
|
memset(&kpm->pm_list, 0, sizeof(kpm->pm_list)); /* pm_list not used */
|
|
kpm->pm_pdir = (pd_entry_t *)(lwp0.l_addr->u_pcb.pcb_cr3 + KERNBASE);
|
|
XENPRINTF(("pm_pdirpa %p PDPpaddr %p\n",
|
|
(void *)lwp0.l_addr->u_pcb.pcb_cr3, (void *)PDPpaddr));
|
|
kpm->pm_pdirpa = (u_int32_t) lwp0.l_addr->u_pcb.pcb_cr3;
|
|
kpm->pm_stats.wired_count = kpm->pm_stats.resident_count =
|
|
x86_btop(kva_start - VM_MIN_KERNEL_ADDRESS);
|
|
|
|
/*
|
|
* the above is just a rough estimate and not critical to the proper
|
|
* operation of the system.
|
|
*/
|
|
|
|
/*
|
|
* Begin to enable global TLB entries if they are supported.
|
|
* The G bit has no effect until the CR4_PGE bit is set in CR4,
|
|
* which happens in cpu_init(), which is run on each cpu
|
|
* (and happens later)
|
|
*/
|
|
|
|
if (cpu_feature & CPUID_PGE) {
|
|
pmap_pg_g = PG_G; /* enable software */
|
|
|
|
/* add PG_G attribute to already mapped kernel pages */
|
|
for (kva = VM_MIN_KERNEL_ADDRESS ; kva < virtual_avail ;
|
|
kva += PAGE_SIZE)
|
|
if (pmap_valid_entry(PTE_BASE[x86_btop(kva)])) {
|
|
#if !defined(XEN)
|
|
PTE_BASE[x86_btop(kva)] |= PG_G;
|
|
#else
|
|
maptp = (pt_entry_t *)vtomach(
|
|
(vaddr_t)&PTE_BASE[x86_btop(kva)]);
|
|
PTE_SETBITS(&PTE_BASE[x86_btop(kva)], maptp,
|
|
PG_G);
|
|
}
|
|
PTE_UPDATES_FLUSH();
|
|
#endif
|
|
}
|
|
|
|
#ifdef LARGEPAGES
|
|
/*
|
|
* enable large pages if they are supported.
|
|
*/
|
|
|
|
if (cpu_feature & CPUID_PSE) {
|
|
paddr_t pa;
|
|
vaddr_t kva_end;
|
|
pd_entry_t *pde;
|
|
pd_entry_t *mapdp;
|
|
extern char _etext;
|
|
|
|
lcr4(rcr4() | CR4_PSE); /* enable hardware (via %cr4) */
|
|
pmap_largepages = 1; /* enable software */
|
|
|
|
/*
|
|
* the TLB must be flushed after enabling large pages
|
|
* on Pentium CPUs, according to section 3.6.2.2 of
|
|
* "Intel Architecture Software Developer's Manual,
|
|
* Volume 3: System Programming".
|
|
*/
|
|
tlbflush();
|
|
|
|
/*
|
|
* now, remap the kernel text using large pages. we
|
|
* assume that the linker has properly aligned the
|
|
* .data segment to a 4MB boundary.
|
|
*/
|
|
kva_end = roundup((vaddr_t)&_etext, NBPD);
|
|
for (pa = 0, kva = KERNBASE; kva < kva_end;
|
|
kva += NBPD, pa += NBPD) {
|
|
pde = &kpm->pm_pdir[pdei(kva)];
|
|
mapdp = (pt_entry_t *)vtomach((vaddr_t)pde);
|
|
PDE_SET(pde, mapdp, pa | pmap_pg_g | PG_PS |
|
|
PG_KR | PG_V); /* zap! */
|
|
tlbflush();
|
|
}
|
|
}
|
|
#endif /* LARGEPAGES */
|
|
|
|
/*
|
|
* now we allocate the "special" VAs which are used for tmp mappings
|
|
* by the pmap (and other modules). we allocate the VAs by advancing
|
|
* virtual_avail (note that there are no pages mapped at these VAs).
|
|
* we find the PTE that maps the allocated VA via the linear PTE
|
|
* mapping.
|
|
*/
|
|
|
|
pte = PTE_BASE + x86_btop(virtual_avail);
|
|
|
|
#ifdef MULTIPROCESSOR
|
|
/*
|
|
* Waste some VA space to avoid false sharing of cache lines
|
|
* for page table pages: Give each possible CPU a cache line
|
|
* of PTE's (8) to play with, though we only need 4. We could
|
|
* recycle some of this waste by putting the idle stacks here
|
|
* as well; we could waste less space if we knew the largest
|
|
* CPU ID beforehand.
|
|
*/
|
|
csrcp = (char *) virtual_avail; csrc_pte = pte;
|
|
|
|
cdstp = (char *) virtual_avail+PAGE_SIZE; cdst_pte = pte+1;
|
|
|
|
zerop = (char *) virtual_avail+PAGE_SIZE*2; zero_pte = pte+2;
|
|
|
|
ptpp = (char *) virtual_avail+PAGE_SIZE*3; ptp_pte = pte+3;
|
|
|
|
virtual_avail += PAGE_SIZE * X86_MAXPROCS * NPTECL;
|
|
pte += X86_MAXPROCS * NPTECL;
|
|
#else
|
|
csrcp = (void *) virtual_avail; csrc_pte = pte; /* allocate */
|
|
virtual_avail += PAGE_SIZE; pte++; /* advance */
|
|
|
|
cdstp = (void *) virtual_avail; cdst_pte = pte;
|
|
virtual_avail += PAGE_SIZE; pte++;
|
|
|
|
zerop = (void *) virtual_avail; zero_pte = pte;
|
|
virtual_avail += PAGE_SIZE; pte++;
|
|
|
|
ptpp = (void *) virtual_avail; ptp_pte = pte;
|
|
virtual_avail += PAGE_SIZE; pte++;
|
|
#endif
|
|
|
|
XENPRINTK(("pmap_bootstrap csrcp %p cdstp %p zerop %p ptpp %p\n",
|
|
csrc_pte, cdst_pte, zero_pte, ptp_pte));
|
|
/*
|
|
* Nothing after this point actually needs pte;
|
|
*/
|
|
pte = (void *)0xdeadbeef;
|
|
|
|
/* XXX: vmmap used by mem.c... should be uvm_map_reserve */
|
|
vmmap = (char *)virtual_avail; /* don't need pte */
|
|
virtual_avail += PAGE_SIZE;
|
|
|
|
idt_vaddr = virtual_avail; /* don't need pte */
|
|
virtual_avail += PAGE_SIZE;
|
|
idt_paddr = avail_start; /* steal a page */
|
|
avail_start += PAGE_SIZE;
|
|
|
|
#if defined(I586_CPU)
|
|
/* pentium f00f bug stuff */
|
|
pentium_idt_vaddr = virtual_avail; /* don't need pte */
|
|
virtual_avail += PAGE_SIZE;
|
|
#endif
|
|
|
|
/*
|
|
* now we reserve some VM for mapping pages when doing a crash dump
|
|
*/
|
|
|
|
virtual_avail = reserve_dumppages(virtual_avail);
|
|
|
|
/*
|
|
* init the static-global locks and global lists.
|
|
*/
|
|
|
|
#if defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
|
|
spinlockinit(&pmap_main_lock, "pmaplk", 0);
|
|
#endif
|
|
simple_lock_init(&pvalloc_lock);
|
|
simple_lock_init(&pmaps_lock);
|
|
LIST_INIT(&pmaps);
|
|
TAILQ_INIT(&pv_freepages);
|
|
TAILQ_INIT(&pv_unusedpgs);
|
|
|
|
/*
|
|
* initialize the pmap pool.
|
|
*/
|
|
|
|
pool_init(&pmap_pmap_pool, sizeof(struct pmap), 0, 0, 0, "pmappl",
|
|
&pool_allocator_nointr, IPL_NONE);
|
|
|
|
/*
|
|
* Initialize the TLB shootdown queues.
|
|
*/
|
|
|
|
__cpu_simple_lock_init(&pmap_tlb_shootdown_job_lock);
|
|
|
|
for (i = 0; i < X86_MAXPROCS; i++) {
|
|
TAILQ_INIT(&pmap_tlb_shootdown_q[i].pq_head);
|
|
__cpu_simple_lock_init(&pmap_tlb_shootdown_q[i].pq_slock);
|
|
}
|
|
|
|
/*
|
|
* initialize the PDE pool and cache.
|
|
*/
|
|
pool_init(&pmap_pdp_pool, PAGE_SIZE, 0, 0, 0, "pdppl",
|
|
&pool_allocator_nointr, IPL_NONE);
|
|
pool_cache_init(&pmap_pdp_cache, &pmap_pdp_pool,
|
|
pmap_pdp_ctor, pmap_pdp_dtor, NULL);
|
|
|
|
/*
|
|
* ensure the TLB is sync'd with reality by flushing it...
|
|
*/
|
|
|
|
tlbflush();
|
|
}
|
|
|
|
/*
|
|
* pmap_init: called from uvm_init, our job is to get the pmap
|
|
* system ready to manage mappings... this mainly means initing
|
|
* the pv_entry stuff.
|
|
*/
|
|
|
|
void
|
|
pmap_init()
|
|
{
|
|
int i;
|
|
|
|
pv_nfpvents = 0;
|
|
|
|
pj_page = (void *)uvm_km_alloc(kernel_map, PAGE_SIZE, 0, UVM_KMF_WIRED);
|
|
if (pj_page == NULL)
|
|
panic("pmap_init: pj_page");
|
|
|
|
for (i = 0;
|
|
i < (PAGE_SIZE / sizeof (union pmap_tlb_shootdown_job_al) - 1);
|
|
i++)
|
|
pj_page[i].pja_job.pj_nextfree = &pj_page[i + 1].pja_job;
|
|
pj_page[i].pja_job.pj_nextfree = NULL;
|
|
pj_free = &pj_page[0];
|
|
|
|
/*
|
|
* done: pmap module is up (and ready for business)
|
|
*/
|
|
|
|
pmap_initialized = true;
|
|
}
|
|
|
|
/*
|
|
* p v _ e n t r y f u n c t i o n s
|
|
*/
|
|
|
|
/*
|
|
* pv_entry allocation functions:
|
|
* the main pv_entry allocation functions are:
|
|
* pmap_alloc_pv: allocate a pv_entry structure
|
|
* pmap_free_pv: free one pv_entry
|
|
* pmap_free_pvs: free a list of pv_entrys
|
|
*
|
|
* the rest are helper functions
|
|
*/
|
|
|
|
/*
|
|
* pmap_alloc_pv: inline function to allocate a pv_entry structure
|
|
* => we lock pvalloc_lock
|
|
* => if we fail, we call out to pmap_alloc_pvpage
|
|
* => 3 modes:
|
|
* ALLOCPV_NEED = we really need a pv_entry, even if we have to steal it
|
|
* ALLOCPV_TRY = we want a pv_entry, but not enough to steal
|
|
* ALLOCPV_NONEED = we are trying to grow our free list, don't really need
|
|
* one now
|
|
*
|
|
* "try" is for optional functions like pmap_copy().
|
|
*/
|
|
|
|
inline static struct pv_entry *
|
|
pmap_alloc_pv(pmap, mode)
|
|
struct pmap *pmap;
|
|
int mode;
|
|
{
|
|
struct pv_page *pvpage;
|
|
struct pv_entry *pv;
|
|
|
|
simple_lock(&pvalloc_lock);
|
|
|
|
pvpage = TAILQ_FIRST(&pv_freepages);
|
|
if (pvpage != NULL) {
|
|
pvpage->pvinfo.pvpi_nfree--;
|
|
if (pvpage->pvinfo.pvpi_nfree == 0) {
|
|
/* nothing left in this one? */
|
|
TAILQ_REMOVE(&pv_freepages, pvpage, pvinfo.pvpi_list);
|
|
}
|
|
pv = pvpage->pvinfo.pvpi_pvfree;
|
|
KASSERT(pv);
|
|
pvpage->pvinfo.pvpi_pvfree = SPLAY_RIGHT(pv, pv_node);
|
|
pv_nfpvents--; /* took one from pool */
|
|
} else {
|
|
pv = NULL; /* need more of them */
|
|
}
|
|
|
|
/*
|
|
* if below low water mark or we didn't get a pv_entry we try and
|
|
* create more pv_entrys ...
|
|
*/
|
|
|
|
if (pv_nfpvents < PVE_LOWAT || pv == NULL) {
|
|
if (pv == NULL)
|
|
pv = pmap_alloc_pvpage(pmap, (mode == ALLOCPV_TRY) ?
|
|
mode : ALLOCPV_NEED);
|
|
else
|
|
(void) pmap_alloc_pvpage(pmap, ALLOCPV_NONEED);
|
|
}
|
|
simple_unlock(&pvalloc_lock);
|
|
return(pv);
|
|
}
|
|
|
|
/*
|
|
* pmap_alloc_pvpage: maybe allocate a new pvpage
|
|
*
|
|
* if need_entry is false: try and allocate a new pv_page
|
|
* if need_entry is true: try and allocate a new pv_page and return a
|
|
* new pv_entry from it. if we are unable to allocate a pv_page
|
|
* we make a last ditch effort to steal a pv_page from some other
|
|
* mapping. if that fails, we panic...
|
|
*
|
|
* => we assume that the caller holds pvalloc_lock
|
|
*/
|
|
|
|
static struct pv_entry *
|
|
pmap_alloc_pvpage(pmap, mode)
|
|
struct pmap *pmap;
|
|
int mode;
|
|
{
|
|
struct pv_page *pvpage;
|
|
struct pv_entry *pv;
|
|
int s;
|
|
|
|
/*
|
|
* if we need_entry and we've got unused pv_pages, allocate from there
|
|
*/
|
|
|
|
pvpage = TAILQ_FIRST(&pv_unusedpgs);
|
|
if (mode != ALLOCPV_NONEED && pvpage != NULL) {
|
|
|
|
/* move it to pv_freepages list */
|
|
TAILQ_REMOVE(&pv_unusedpgs, pvpage, pvinfo.pvpi_list);
|
|
TAILQ_INSERT_HEAD(&pv_freepages, pvpage, pvinfo.pvpi_list);
|
|
|
|
/* allocate a pv_entry */
|
|
pvpage->pvinfo.pvpi_nfree--; /* can't go to zero */
|
|
pv = pvpage->pvinfo.pvpi_pvfree;
|
|
KASSERT(pv);
|
|
pvpage->pvinfo.pvpi_pvfree = SPLAY_RIGHT(pv, pv_node);
|
|
pv_nfpvents--; /* took one from pool */
|
|
return(pv);
|
|
}
|
|
|
|
/*
|
|
* NOTE: If we are allocating a PV page for the kernel pmap, the
|
|
* pmap is already locked! (...but entering the mapping is safe...)
|
|
*/
|
|
|
|
s = splvm(); /* must protect kmem_map with splvm! */
|
|
pvpage = (struct pv_page *)uvm_km_alloc(kmem_map, PAGE_SIZE, 0,
|
|
UVM_KMF_TRYLOCK|UVM_KMF_NOWAIT|UVM_KMF_WIRED);
|
|
splx(s);
|
|
if (pvpage == NULL)
|
|
return NULL;
|
|
|
|
return (pmap_add_pvpage(pvpage, mode != ALLOCPV_NONEED));
|
|
}
|
|
|
|
/*
|
|
* pmap_add_pvpage: add a pv_page's pv_entrys to the free list
|
|
*
|
|
* => caller must hold pvalloc_lock
|
|
* => if need_entry is true, we allocate and return one pv_entry
|
|
*/
|
|
|
|
static struct pv_entry *
|
|
pmap_add_pvpage(pvp, need_entry)
|
|
struct pv_page *pvp;
|
|
bool need_entry;
|
|
{
|
|
int tofree, lcv;
|
|
|
|
/* do we need to return one? */
|
|
tofree = (need_entry) ? PVE_PER_PVPAGE - 1 : PVE_PER_PVPAGE;
|
|
|
|
pvp->pvinfo.pvpi_pvfree = NULL;
|
|
pvp->pvinfo.pvpi_nfree = tofree;
|
|
for (lcv = 0 ; lcv < tofree ; lcv++) {
|
|
SPLAY_RIGHT(&pvp->pvents[lcv], pv_node) =
|
|
pvp->pvinfo.pvpi_pvfree;
|
|
pvp->pvinfo.pvpi_pvfree = &pvp->pvents[lcv];
|
|
}
|
|
if (need_entry)
|
|
TAILQ_INSERT_TAIL(&pv_freepages, pvp, pvinfo.pvpi_list);
|
|
else
|
|
TAILQ_INSERT_TAIL(&pv_unusedpgs, pvp, pvinfo.pvpi_list);
|
|
pv_nfpvents += tofree;
|
|
return((need_entry) ? &pvp->pvents[lcv] : NULL);
|
|
}
|
|
|
|
/*
|
|
* pmap_free_pv_doit: actually free a pv_entry
|
|
*
|
|
* => do not call this directly! instead use either
|
|
* 1. pmap_free_pv ==> free a single pv_entry
|
|
* 2. pmap_free_pvs => free a list of pv_entrys
|
|
* => we must be holding pvalloc_lock
|
|
*/
|
|
|
|
inline static void
|
|
pmap_free_pv_doit(pv)
|
|
struct pv_entry *pv;
|
|
{
|
|
struct pv_page *pvp;
|
|
|
|
pvp = (struct pv_page *) x86_trunc_page(pv);
|
|
pv_nfpvents++;
|
|
pvp->pvinfo.pvpi_nfree++;
|
|
|
|
/* nfree == 1 => fully allocated page just became partly allocated */
|
|
if (pvp->pvinfo.pvpi_nfree == 1) {
|
|
TAILQ_INSERT_HEAD(&pv_freepages, pvp, pvinfo.pvpi_list);
|
|
}
|
|
|
|
/* free it */
|
|
SPLAY_RIGHT(pv, pv_node) = pvp->pvinfo.pvpi_pvfree;
|
|
pvp->pvinfo.pvpi_pvfree = pv;
|
|
|
|
/*
|
|
* are all pv_page's pv_entry's free? move it to unused queue.
|
|
*/
|
|
|
|
if (pvp->pvinfo.pvpi_nfree == PVE_PER_PVPAGE) {
|
|
TAILQ_REMOVE(&pv_freepages, pvp, pvinfo.pvpi_list);
|
|
TAILQ_INSERT_HEAD(&pv_unusedpgs, pvp, pvinfo.pvpi_list);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* pmap_free_pv: free a single pv_entry
|
|
*
|
|
* => we gain the pvalloc_lock
|
|
*/
|
|
|
|
inline static void
|
|
pmap_free_pv(pmap, pv)
|
|
struct pmap *pmap;
|
|
struct pv_entry *pv;
|
|
{
|
|
simple_lock(&pvalloc_lock);
|
|
pmap_free_pv_doit(pv);
|
|
|
|
/*
|
|
* Can't free the PV page if the PV entries were associated with
|
|
* the kernel pmap; the pmap is already locked.
|
|
*/
|
|
if (pv_nfpvents > PVE_HIWAT && TAILQ_FIRST(&pv_unusedpgs) != NULL &&
|
|
pmap != pmap_kernel())
|
|
pmap_free_pvpage();
|
|
|
|
simple_unlock(&pvalloc_lock);
|
|
}
|
|
|
|
/*
|
|
* pmap_free_pvs: free a list of pv_entrys
|
|
*
|
|
* => we gain the pvalloc_lock
|
|
*/
|
|
|
|
inline static void
|
|
pmap_free_pvs(pmap, pvs)
|
|
struct pmap *pmap;
|
|
struct pv_entry *pvs;
|
|
{
|
|
struct pv_entry *nextpv;
|
|
|
|
simple_lock(&pvalloc_lock);
|
|
|
|
for ( /* null */ ; pvs != NULL ; pvs = nextpv) {
|
|
nextpv = SPLAY_RIGHT(pvs, pv_node);
|
|
pmap_free_pv_doit(pvs);
|
|
}
|
|
|
|
/*
|
|
* Can't free the PV page if the PV entries were associated with
|
|
* the kernel pmap; the pmap is already locked.
|
|
*/
|
|
if (pv_nfpvents > PVE_HIWAT && TAILQ_FIRST(&pv_unusedpgs) != NULL &&
|
|
pmap != pmap_kernel())
|
|
pmap_free_pvpage();
|
|
|
|
simple_unlock(&pvalloc_lock);
|
|
}
|
|
|
|
|
|
/*
|
|
* pmap_free_pvpage: try and free an unused pv_page structure
|
|
*
|
|
* => assume caller is holding the pvalloc_lock and that
|
|
* there is a page on the pv_unusedpgs list
|
|
*/
|
|
|
|
static void
|
|
pmap_free_pvpage()
|
|
{
|
|
int s;
|
|
struct pv_page *pvp;
|
|
|
|
pvp = TAILQ_FIRST(&pv_unusedpgs);
|
|
/* remove pvp from pv_unusedpgs */
|
|
TAILQ_REMOVE(&pv_unusedpgs, pvp, pvinfo.pvpi_list);
|
|
|
|
s = splvm();
|
|
uvm_km_free(kmem_map, (vaddr_t)pvp, PAGE_SIZE, UVM_KMF_WIRED);
|
|
splx(s);
|
|
|
|
pv_nfpvents -= PVE_PER_PVPAGE; /* update free count */
|
|
}
|
|
|
|
/*
|
|
* pmap_lock_pvhs: Lock pvh1 and optional pvh2
|
|
* Observe locking order when locking both pvhs
|
|
*/
|
|
|
|
inline static void
|
|
pmap_lock_pvhs(struct pv_head *pvh1, struct pv_head *pvh2)
|
|
{
|
|
|
|
if (pvh2 == NULL) {
|
|
simple_lock(&pvh1->pvh_lock);
|
|
return;
|
|
}
|
|
|
|
if (pvh1 < pvh2) {
|
|
simple_lock(&pvh1->pvh_lock);
|
|
simple_lock(&pvh2->pvh_lock);
|
|
} else {
|
|
simple_lock(&pvh2->pvh_lock);
|
|
simple_lock(&pvh1->pvh_lock);
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
* main pv_entry manipulation functions:
|
|
* pmap_enter_pv: enter a mapping onto a pv_head list
|
|
* pmap_remove_pv: remove a mappiing from a pv_head list
|
|
*
|
|
* NOTE: Both pmap_enter_pv and pmap_remove_pv expect the caller to lock
|
|
* the pvh before calling
|
|
*/
|
|
|
|
/*
|
|
* pmap_enter_pv: enter a mapping onto a pv_head lst
|
|
*
|
|
* => caller should hold the proper lock on pmap_main_lock
|
|
* => caller should have pmap locked
|
|
* => caller should have the pv_head locked
|
|
* => caller should adjust ptp's wire_count before calling
|
|
*/
|
|
|
|
inline static void
|
|
pmap_enter_pv(pvh, pve, pmap, va, ptp)
|
|
struct pv_head *pvh;
|
|
struct pv_entry *pve; /* preallocated pve for us to use */
|
|
struct pmap *pmap;
|
|
vaddr_t va;
|
|
struct vm_page *ptp; /* PTP in pmap that maps this VA */
|
|
{
|
|
pve->pv_pmap = pmap;
|
|
pve->pv_va = va;
|
|
pve->pv_ptp = ptp; /* NULL for kernel pmap */
|
|
SPLAY_INSERT(pvtree, &pvh->pvh_root, pve); /* add to locked list */
|
|
}
|
|
|
|
/*
|
|
* pmap_remove_pv: try to remove a mapping from a pv_list
|
|
*
|
|
* => caller should hold proper lock on pmap_main_lock
|
|
* => pmap should be locked
|
|
* => caller should hold lock on pv_head [so that attrs can be adjusted]
|
|
* => caller should adjust ptp's wire_count and free PTP if needed
|
|
* => we return the removed pve
|
|
*/
|
|
|
|
inline static struct pv_entry *
|
|
pmap_remove_pv(pvh, pmap, va)
|
|
struct pv_head *pvh;
|
|
struct pmap *pmap;
|
|
vaddr_t va;
|
|
{
|
|
struct pv_entry tmp, *pve;
|
|
|
|
tmp.pv_pmap = pmap;
|
|
tmp.pv_va = va;
|
|
pve = SPLAY_FIND(pvtree, &pvh->pvh_root, &tmp);
|
|
if (pve == NULL)
|
|
return (NULL);
|
|
SPLAY_REMOVE(pvtree, &pvh->pvh_root, pve);
|
|
return(pve); /* return removed pve */
|
|
}
|
|
|
|
/*
|
|
* p t p f u n c t i o n s
|
|
*/
|
|
|
|
/*
|
|
* pmap_alloc_ptp: allocate a PTP for a PMAP
|
|
*
|
|
* => pmap should already be locked by caller
|
|
* => we use the ptp's wire_count to count the number of active mappings
|
|
* in the PTP (we start it at one to prevent any chance this PTP
|
|
* will ever leak onto the active/inactive queues)
|
|
*/
|
|
|
|
inline static struct vm_page *
|
|
pmap_alloc_ptp(pmap, pde_index)
|
|
struct pmap *pmap;
|
|
int pde_index;
|
|
{
|
|
struct vm_page *ptp;
|
|
pd_entry_t *mapdp;
|
|
|
|
ptp = uvm_pagealloc(&pmap->pm_obj, ptp_i2o(pde_index), NULL,
|
|
UVM_PGA_USERESERVE|UVM_PGA_ZERO);
|
|
if (ptp == NULL)
|
|
return(NULL);
|
|
|
|
/* got one! */
|
|
ptp->flags &= ~PG_BUSY; /* never busy */
|
|
ptp->wire_count = 1; /* no mappings yet */
|
|
mapdp = (pt_entry_t *)vtomach((vaddr_t)&pmap->pm_pdir[pde_index]);
|
|
PDE_SET(&pmap->pm_pdir[pde_index], mapdp,
|
|
(pd_entry_t) (VM_PAGE_TO_PHYS(ptp) | PG_u | PG_RW | PG_V));
|
|
pmap->pm_stats.resident_count++; /* count PTP as resident */
|
|
pmap->pm_ptphint = ptp;
|
|
return(ptp);
|
|
}
|
|
|
|
/*
|
|
* pmap_get_ptp: get a PTP (if there isn't one, allocate a new one)
|
|
*
|
|
* => pmap should NOT be pmap_kernel()
|
|
* => pmap should be locked
|
|
*/
|
|
|
|
static struct vm_page *
|
|
pmap_get_ptp(pmap, pde_index)
|
|
struct pmap *pmap;
|
|
int pde_index;
|
|
{
|
|
struct vm_page *ptp;
|
|
|
|
if (pmap_valid_entry(pmap->pm_pdir[pde_index])) {
|
|
|
|
/* valid... check hint (saves us a PA->PG lookup) */
|
|
if (pmap->pm_ptphint &&
|
|
(PDE_GET(&pmap->pm_pdir[pde_index]) & PG_FRAME) ==
|
|
VM_PAGE_TO_PHYS(pmap->pm_ptphint))
|
|
return(pmap->pm_ptphint);
|
|
|
|
ptp = uvm_pagelookup(&pmap->pm_obj, ptp_i2o(pde_index));
|
|
#ifdef DIAGNOSTIC
|
|
if (ptp == NULL)
|
|
panic("pmap_get_ptp: unmanaged user PTP");
|
|
#endif
|
|
pmap->pm_ptphint = ptp;
|
|
return(ptp);
|
|
}
|
|
|
|
/* allocate a new PTP (updates ptphint) */
|
|
return(pmap_alloc_ptp(pmap, pde_index));
|
|
}
|
|
|
|
/*
|
|
* p m a p l i f e c y c l e f u n c t i o n s
|
|
*/
|
|
|
|
/*
|
|
* pmap_pdp_ctor: constructor for the PDP cache.
|
|
*/
|
|
|
|
int
|
|
pmap_pdp_ctor(void *arg, void *object, int flags)
|
|
{
|
|
pd_entry_t *pdir = object;
|
|
paddr_t pdirpa;
|
|
int s;
|
|
|
|
/*
|
|
* NOTE: The `pmap_lock' is held when the PDP is allocated.
|
|
* WE MUST NOT BLOCK!
|
|
*/
|
|
|
|
/* fetch the physical address of the page directory. */
|
|
(void) pmap_extract(pmap_kernel(), (vaddr_t) pdir, &pdirpa);
|
|
|
|
XENPRINTF(("pmap_pdp_ctor %p %p\n", pdir, (void *)pdirpa));
|
|
|
|
/* zero init area */
|
|
memset(pdir, 0, PDSLOT_PTE * sizeof(pd_entry_t));
|
|
|
|
/* put in recursive PDE to map the PTEs */
|
|
pdir[PDSLOT_PTE] = xpmap_ptom(pdirpa | PG_V /* | PG_KW */);
|
|
|
|
/* put in kernel VM PDEs */
|
|
memcpy(&pdir[PDSLOT_KERN], &PDP_BASE[PDSLOT_KERN],
|
|
nkpde * sizeof(pd_entry_t));
|
|
|
|
/* zero the rest */
|
|
memset(&pdir[PDSLOT_KERN + nkpde], 0,
|
|
PAGE_SIZE - ((PDSLOT_KERN + nkpde) * sizeof(pd_entry_t)));
|
|
|
|
pmap_kenter_pa((vaddr_t)pdir, pdirpa, VM_PROT_READ);
|
|
pmap_update(pmap_kernel());
|
|
|
|
/* pin page type */
|
|
s = splvm();
|
|
xpq_queue_pin_table(xpmap_ptom(pdirpa), XPQ_PIN_L2_TABLE);
|
|
xpq_flush_queue();
|
|
splx(s);
|
|
|
|
return (0);
|
|
}
|
|
|
|
void
|
|
pmap_pdp_dtor(void *arg, void *object)
|
|
{
|
|
pd_entry_t *pdir = object;
|
|
paddr_t pdirpa;
|
|
int s;
|
|
|
|
/* fetch the physical address of the page directory. */
|
|
pdirpa = PDE_GET(&pdir[PDSLOT_PTE]) & PG_FRAME;
|
|
|
|
XENPRINTF(("pmap_pdp_dtor %p %p\n", pdir, (void *)pdirpa));
|
|
|
|
/* unpin page type */
|
|
s = splvm();
|
|
xpq_queue_unpin_table(xpmap_ptom(pdirpa));
|
|
xpq_flush_queue();
|
|
splx(s);
|
|
pmap_kenter_pa((vaddr_t)pdir, pdirpa, VM_PROT_READ | VM_PROT_WRITE);
|
|
pmap_update(pmap_kernel());
|
|
}
|
|
|
|
/*
|
|
* pmap_create: create a pmap
|
|
*
|
|
* => note: old pmap interface took a "size" args which allowed for
|
|
* the creation of "software only" pmaps (not in bsd).
|
|
*/
|
|
|
|
struct pmap *
|
|
pmap_create()
|
|
{
|
|
struct pmap *pmap;
|
|
u_int gen;
|
|
|
|
XENPRINTF(("pmap_create\n"));
|
|
pmap = pool_get(&pmap_pmap_pool, PR_WAITOK);
|
|
|
|
/* init uvm_object */
|
|
simple_lock_init(&pmap->pm_obj.vmobjlock);
|
|
pmap->pm_obj.pgops = NULL; /* currently not a mappable object */
|
|
TAILQ_INIT(&pmap->pm_obj.memq);
|
|
pmap->pm_obj.uo_npages = 0;
|
|
pmap->pm_obj.uo_refs = 1;
|
|
pmap->pm_stats.wired_count = 0;
|
|
pmap->pm_stats.resident_count = 1; /* count the PDP allocd below */
|
|
pmap->pm_ptphint = NULL;
|
|
pmap->pm_hiexec = 0;
|
|
pmap->pm_flags = 0;
|
|
pmap->pm_cpus = 0;
|
|
|
|
/* init the LDT */
|
|
pmap->pm_ldt = NULL;
|
|
pmap->pm_ldt_len = 0;
|
|
pmap->pm_ldt_sel = GSEL(GLDT_SEL, SEL_KPL);
|
|
|
|
/* allocate PDP */
|
|
|
|
/*
|
|
* we need to lock pmaps_lock to prevent nkpde from changing on
|
|
* us. note that there is no need to splvm to protect us from
|
|
* malloc since malloc allocates out of a submap and we should
|
|
* have already allocated kernel PTPs to cover the range...
|
|
*
|
|
* NOTE: WE MUST NOT BLOCK WHILE HOLDING THE `pmap_lock', nor
|
|
* must we call pmap_growkernel() while holding it!
|
|
*/
|
|
|
|
try_again:
|
|
gen = pmap_pdp_cache_generation;
|
|
pmap->pm_pdir = pool_cache_get(&pmap_pdp_cache, PR_WAITOK);
|
|
|
|
simple_lock(&pmaps_lock);
|
|
|
|
if (gen != pmap_pdp_cache_generation) {
|
|
simple_unlock(&pmaps_lock);
|
|
pool_cache_destruct_object(&pmap_pdp_cache, pmap->pm_pdir);
|
|
goto try_again;
|
|
}
|
|
|
|
pmap->pm_pdirpa = PDE_GET(&pmap->pm_pdir[PDSLOT_PTE]) & PG_FRAME;
|
|
XENPRINTF(("pmap_create %p set pm_pdirpa %p/%p slotval %p\n", pmap,
|
|
(void *)pmap->pm_pdirpa,
|
|
(void *)xpmap_ptom(pmap->pm_pdirpa),
|
|
(void *)pmap->pm_pdir[PDSLOT_PTE]));
|
|
|
|
LIST_INSERT_HEAD(&pmaps, pmap, pm_list);
|
|
|
|
simple_unlock(&pmaps_lock);
|
|
|
|
return (pmap);
|
|
}
|
|
|
|
/*
|
|
* pmap_destroy: drop reference count on pmap. free pmap if
|
|
* reference count goes to zero.
|
|
*/
|
|
|
|
void
|
|
pmap_destroy(pmap)
|
|
struct pmap *pmap;
|
|
{
|
|
int refs;
|
|
#ifdef DIAGNOSTIC
|
|
struct cpu_info *ci;
|
|
CPU_INFO_ITERATOR cii;
|
|
#endif /* DIAGNOSTIC */
|
|
|
|
/*
|
|
* drop reference count
|
|
*/
|
|
|
|
simple_lock(&pmap->pm_obj.vmobjlock);
|
|
refs = --pmap->pm_obj.uo_refs;
|
|
simple_unlock(&pmap->pm_obj.vmobjlock);
|
|
if (refs > 0) {
|
|
return;
|
|
}
|
|
|
|
#ifdef DIAGNOSTIC
|
|
for (CPU_INFO_FOREACH(cii, ci))
|
|
if (ci->ci_pmap == pmap)
|
|
panic("destroying pmap being used");
|
|
#endif /* DIAGNOSTIC */
|
|
|
|
/*
|
|
* reference count is zero, free pmap resources and then free pmap.
|
|
*/
|
|
|
|
XENPRINTF(("pmap_destroy %p pm_pdirpa %p/%p\n", pmap,
|
|
(void *)pmap->pm_pdirpa,
|
|
(void *)xpmap_ptom(pmap->pm_pdirpa)));
|
|
|
|
/*
|
|
* remove it from global list of pmaps
|
|
*/
|
|
|
|
simple_lock(&pmaps_lock);
|
|
LIST_REMOVE(pmap, pm_list);
|
|
simple_unlock(&pmaps_lock);
|
|
|
|
/*
|
|
* destroyed pmap shouldn't have remaining PTPs
|
|
*/
|
|
|
|
KASSERT(pmap->pm_obj.uo_npages == 0);
|
|
KASSERT(TAILQ_EMPTY(&pmap->pm_obj.memq));
|
|
|
|
/*
|
|
* MULTIPROCESSOR -- no need to flush out of other processors'
|
|
* APTE space because we do that in pmap_unmap_ptes().
|
|
*/
|
|
pool_cache_put(&pmap_pdp_cache, pmap->pm_pdir);
|
|
|
|
#ifdef USER_LDT
|
|
if (pmap->pm_flags & PMF_USER_LDT) {
|
|
/*
|
|
* no need to switch the LDT; this address space is gone,
|
|
* nothing is using it.
|
|
*
|
|
* No need to lock the pmap for ldt_free (or anything else),
|
|
* we're the last one to use it.
|
|
*/
|
|
ldt_free(pmap->pm_ldt_sel);
|
|
uvm_km_free(kernel_map, (vaddr_t)pmap->pm_ldt,
|
|
pmap->pm_ldt_len * sizeof(union descriptor), UVM_KMF_WIRED);
|
|
}
|
|
#endif
|
|
|
|
pool_put(&pmap_pmap_pool, pmap);
|
|
}
|
|
|
|
/*
|
|
* Add a reference to the specified pmap.
|
|
*/
|
|
|
|
void
|
|
pmap_reference(pmap)
|
|
struct pmap *pmap;
|
|
{
|
|
simple_lock(&pmap->pm_obj.vmobjlock);
|
|
pmap->pm_obj.uo_refs++;
|
|
simple_unlock(&pmap->pm_obj.vmobjlock);
|
|
}
|
|
|
|
#if defined(PMAP_FORK)
|
|
/*
|
|
* pmap_fork: perform any necessary data structure manipulation when
|
|
* a VM space is forked.
|
|
*/
|
|
|
|
void
|
|
pmap_fork(pmap1, pmap2)
|
|
struct pmap *pmap1, *pmap2;
|
|
{
|
|
#ifdef USER_LDT
|
|
union descriptor *new_ldt;
|
|
size_t len;
|
|
int sel;
|
|
|
|
retry:
|
|
if (pmap1->pm_flags & PMF_USER_LDT) {
|
|
len = pmap1->pm_ldt_len * sizeof(union descriptor);
|
|
new_ldt = (union descriptor *)uvm_km_alloc(kernel_map,
|
|
len, 0, UVM_KMF_WIRED);
|
|
sel = ldt_alloc(new_ldt, len);
|
|
} else {
|
|
len = -1;
|
|
new_ldt = NULL;
|
|
sel = -1;
|
|
}
|
|
|
|
simple_lock(&pmap1->pm_obj.vmobjlock);
|
|
simple_lock(&pmap2->pm_obj.vmobjlock);
|
|
|
|
/* Copy the LDT, if necessary. */
|
|
if (pmap1->pm_flags & PMF_USER_LDT) {
|
|
if (len != pmap1->pm_ldt_len * sizeof(union descriptor)) {
|
|
simple_unlock(&pmap2->pm_obj.vmobjlock);
|
|
simple_unlock(&pmap1->pm_obj.vmobjlock);
|
|
if (len != -1) {
|
|
ldt_free(sel);
|
|
uvm_km_free(kernel_map, (vaddr_t)new_ldt,
|
|
len, UVM_KMF_WIRED);
|
|
}
|
|
goto retry;
|
|
}
|
|
|
|
memcpy(new_ldt, pmap1->pm_ldt, len);
|
|
pmap2->pm_ldt = new_ldt;
|
|
pmap2->pm_ldt_len = pmap1->pm_ldt_len;
|
|
pmap2->pm_flags |= PMF_USER_LDT;
|
|
pmap2->pm_ldt_sel = sel;
|
|
len = -1;
|
|
}
|
|
|
|
simple_unlock(&pmap2->pm_obj.vmobjlock);
|
|
simple_unlock(&pmap1->pm_obj.vmobjlock);
|
|
|
|
if (len != -1) {
|
|
ldt_free(sel);
|
|
uvm_km_free(kernel_map, (vaddr_t)new_ldt, len,
|
|
UVM_KMF_WIRED);
|
|
}
|
|
#endif /* USER_LDT */
|
|
}
|
|
#endif /* PMAP_FORK */
|
|
|
|
#ifdef USER_LDT
|
|
/*
|
|
* pmap_ldt_cleanup: if the pmap has a local LDT, deallocate it, and
|
|
* restore the default.
|
|
*/
|
|
|
|
void
|
|
pmap_ldt_cleanup(l)
|
|
struct lwp *l;
|
|
{
|
|
struct pcb *pcb = &l->l_addr->u_pcb;
|
|
pmap_t pmap = l->l_proc->p_vmspace->vm_map.pmap;
|
|
union descriptor *old_ldt = NULL;
|
|
size_t len = 0;
|
|
int sel = -1;
|
|
|
|
simple_lock(&pmap->pm_obj.vmobjlock);
|
|
|
|
if (pmap->pm_flags & PMF_USER_LDT) {
|
|
sel = pmap->pm_ldt_sel;
|
|
pmap->pm_ldt_sel = GSEL(GLDT_SEL, SEL_KPL);
|
|
pcb->pcb_ldt_sel = pmap->pm_ldt_sel;
|
|
if (l == curlwp)
|
|
lldt(pcb->pcb_ldt_sel);
|
|
old_ldt = pmap->pm_ldt;
|
|
len = pmap->pm_ldt_len * sizeof(union descriptor);
|
|
pmap->pm_ldt = NULL;
|
|
pmap->pm_ldt_len = 0;
|
|
pmap->pm_flags &= ~PMF_USER_LDT;
|
|
}
|
|
|
|
simple_unlock(&pmap->pm_obj.vmobjlock);
|
|
|
|
if (old_ldt != NULL)
|
|
uvm_km_free(kernel_map, (vaddr_t)old_ldt, len, UVM_KMF_WIRED);
|
|
if (sel != -1)
|
|
ldt_free(sel);
|
|
}
|
|
#endif /* USER_LDT */
|
|
|
|
/*
|
|
* pmap_activate: activate a process' pmap
|
|
*
|
|
* => called from cpu_switch()
|
|
* => if lwp is the curlwp, then set ci_want_pmapload so that
|
|
* actual MMU context switch will be done by pmap_load() later
|
|
*/
|
|
|
|
void
|
|
pmap_activate(l)
|
|
struct lwp *l;
|
|
{
|
|
struct cpu_info *ci = curcpu();
|
|
struct pmap *pmap = vm_map_pmap(&l->l_proc->p_vmspace->vm_map);
|
|
|
|
if (l == ci->ci_curlwp) {
|
|
struct pcb *pcb;
|
|
|
|
KASSERT(ci->ci_want_pmapload == 0);
|
|
KASSERT(ci->ci_tlbstate != TLBSTATE_VALID);
|
|
#ifdef KSTACK_CHECK_DR0
|
|
/*
|
|
* setup breakpoint on the top of stack
|
|
*/
|
|
if (l == &lwp0)
|
|
dr0(0, 0, 0, 0);
|
|
else
|
|
dr0(KSTACK_LOWEST_ADDR(l), 1, 3, 1);
|
|
#endif
|
|
|
|
/*
|
|
* no need to switch to kernel vmspace because
|
|
* it's a subset of any vmspace.
|
|
*/
|
|
|
|
if (pmap == pmap_kernel()) {
|
|
ci->ci_want_pmapload = 0;
|
|
return;
|
|
}
|
|
|
|
pcb = &l->l_addr->u_pcb;
|
|
pcb->pcb_ldt_sel = pmap->pm_ldt_sel;
|
|
|
|
ci->ci_want_pmapload = 1;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* pmap_reactivate: try to regain reference to the pmap.
|
|
*/
|
|
|
|
static bool
|
|
pmap_reactivate(struct pmap *pmap)
|
|
{
|
|
struct cpu_info *ci = curcpu();
|
|
u_int32_t cpumask = 1U << ci->ci_cpuid;
|
|
int s;
|
|
bool result;
|
|
u_int32_t oldcpus;
|
|
|
|
/*
|
|
* if we still have a lazy reference to this pmap,
|
|
* we can assume that there was no tlb shootdown
|
|
* for this pmap in the meantime.
|
|
*/
|
|
|
|
#if defined(MULTIPROCESSOR)
|
|
s = splipi(); /* protect from tlb shootdown ipis. */
|
|
#else /* defined(MULTIPROCESSOR) */
|
|
s = splvm();
|
|
#endif /* defined(MULTIPROCESSOR) */
|
|
oldcpus = pmap->pm_cpus;
|
|
x86_atomic_setbits_l(&pmap->pm_cpus, cpumask);
|
|
if (oldcpus & cpumask) {
|
|
KASSERT(ci->ci_tlbstate == TLBSTATE_LAZY);
|
|
/* got it */
|
|
result = true;
|
|
} else {
|
|
KASSERT(ci->ci_tlbstate == TLBSTATE_STALE);
|
|
result = false;
|
|
}
|
|
ci->ci_tlbstate = TLBSTATE_VALID;
|
|
splx(s);
|
|
|
|
return result;
|
|
}
|
|
|
|
/*
|
|
* pmap_load: actually switch pmap. (fill in %cr3 and LDT info)
|
|
*/
|
|
|
|
void
|
|
pmap_load()
|
|
{
|
|
struct cpu_info *ci = curcpu();
|
|
u_int32_t cpumask = 1U << ci->ci_cpuid;
|
|
struct pmap *pmap;
|
|
struct pmap *oldpmap;
|
|
struct lwp *l;
|
|
struct pcb *pcb;
|
|
pd_entry_t *mapdp;
|
|
int s;
|
|
|
|
KASSERT(ci->ci_want_pmapload);
|
|
|
|
/* should be able to take ipis. */
|
|
KASSERT(ci->ci_ilevel < IPL_IPI);
|
|
KASSERT(read_psl() == 0);
|
|
|
|
l = ci->ci_curlwp;
|
|
KASSERT(l != NULL);
|
|
pmap = vm_map_pmap(&l->l_proc->p_vmspace->vm_map);
|
|
KASSERT(pmap != pmap_kernel());
|
|
oldpmap = ci->ci_pmap;
|
|
|
|
pcb = &l->l_addr->u_pcb;
|
|
/* loaded by pmap_activate */
|
|
KASSERT(pcb->pcb_ldt_sel == pmap->pm_ldt_sel);
|
|
|
|
if (pmap == oldpmap) {
|
|
if (!pmap_reactivate(pmap)) {
|
|
|
|
/*
|
|
* pmap has been changed during deactivated.
|
|
* our tlb may be stale.
|
|
*/
|
|
|
|
tlbflush();
|
|
}
|
|
|
|
ci->ci_want_pmapload = 0;
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* actually switch pmap.
|
|
*/
|
|
|
|
x86_atomic_clearbits_l(&oldpmap->pm_cpus, cpumask);
|
|
|
|
KASSERT((pmap->pm_cpus & cpumask) == 0);
|
|
|
|
KERNEL_LOCK(1, NULL);
|
|
pmap_reference(pmap);
|
|
KERNEL_UNLOCK_ONE(NULL);
|
|
|
|
/*
|
|
* mark the pmap in use by this processor.
|
|
*/
|
|
|
|
#if defined(MULTIPROCESSOR)
|
|
s = splipi();
|
|
#else /* defined(MULTIPROCESSOR) */
|
|
s = splvm();
|
|
#endif /* defined(MULTIPROCESSOR) */
|
|
x86_atomic_setbits_l(&pmap->pm_cpus, cpumask);
|
|
ci->ci_pmap = pmap;
|
|
ci->ci_tlbstate = TLBSTATE_VALID;
|
|
splx(s);
|
|
|
|
/*
|
|
* clear apdp slot before loading %cr3 since Xen only allows
|
|
* linear pagetable mappings in the current pagetable.
|
|
*/
|
|
KDASSERT(curapdp == 0);
|
|
mapdp = (pt_entry_t *)vtomach((vaddr_t)APDP_PDE);
|
|
PDE_CLEAR(APDP_PDE, mapdp);
|
|
|
|
/*
|
|
* update tss and load corresponding registers.
|
|
*/
|
|
|
|
lldt(pcb->pcb_ldt_sel);
|
|
pcb->pcb_cr3 = pmap->pm_pdirpa;
|
|
lcr3(pcb->pcb_cr3);
|
|
|
|
ci->ci_want_pmapload = 0;
|
|
|
|
KERNEL_LOCK(1, NULL);
|
|
pmap_destroy(oldpmap);
|
|
KERNEL_UNLOCK_ONE(NULL);
|
|
}
|
|
|
|
/*
|
|
* pmap_deactivate: deactivate a process' pmap
|
|
*/
|
|
|
|
void
|
|
pmap_deactivate(l)
|
|
struct lwp *l;
|
|
{
|
|
struct pmap *pmap;
|
|
struct cpu_info *ci = curcpu();
|
|
|
|
if (l != curlwp) {
|
|
return;
|
|
}
|
|
|
|
if (ci->ci_want_pmapload) {
|
|
KASSERT(vm_map_pmap(&l->l_proc->p_vmspace->vm_map)
|
|
!= pmap_kernel());
|
|
KASSERT(vm_map_pmap(&l->l_proc->p_vmspace->vm_map)
|
|
!= ci->ci_pmap || ci->ci_tlbstate != TLBSTATE_VALID);
|
|
|
|
/*
|
|
* userspace has not been touched.
|
|
* nothing to do here.
|
|
*/
|
|
|
|
ci->ci_want_pmapload = 0;
|
|
return;
|
|
}
|
|
|
|
pmap = vm_map_pmap(&l->l_proc->p_vmspace->vm_map);
|
|
|
|
if (pmap == pmap_kernel()) {
|
|
return;
|
|
}
|
|
|
|
KASSERT(ci->ci_pmap == pmap);
|
|
|
|
KASSERT(ci->ci_tlbstate == TLBSTATE_VALID);
|
|
ci->ci_tlbstate = TLBSTATE_LAZY;
|
|
XENPRINTF(("pmap_deactivate %p ebp %p esp %p\n",
|
|
l, (void *)l->l_addr->u_pcb.pcb_ebp,
|
|
(void *)l->l_addr->u_pcb.pcb_esp));
|
|
}
|
|
|
|
/*
|
|
* end of lifecycle functions
|
|
*/
|
|
|
|
/*
|
|
* some misc. functions
|
|
*/
|
|
|
|
/*
|
|
* pmap_extract: extract a PA for the given VA
|
|
*/
|
|
|
|
bool
|
|
pmap_extract(pmap, va, pap)
|
|
struct pmap *pmap;
|
|
vaddr_t va;
|
|
paddr_t *pap;
|
|
{
|
|
pt_entry_t *ptes, pte;
|
|
pd_entry_t pde;
|
|
|
|
if (__predict_true((pde = PDE_GET(&pmap->pm_pdir[pdei(va)])) != 0)) {
|
|
#ifdef LARGEPAGES
|
|
if (pde & PG_PS) {
|
|
if (pap != NULL)
|
|
*pap = (pde & PG_LGFRAME) | (va & ~PG_LGFRAME);
|
|
return (true);
|
|
}
|
|
#endif
|
|
|
|
ptes = pmap_map_ptes(pmap);
|
|
pte = PTE_GET(&ptes[x86_btop(va)]);
|
|
pmap_unmap_ptes(pmap);
|
|
|
|
if (__predict_true((pte & PG_V) != 0)) {
|
|
if (pap != NULL)
|
|
*pap = (pte & PG_FRAME) | (va & ~PG_FRAME);
|
|
return (true);
|
|
}
|
|
}
|
|
return (false);
|
|
}
|
|
|
|
/*
|
|
* pmap_extract_ma: like pmap_extract, but returns machine address
|
|
*/
|
|
|
|
bool
|
|
pmap_extract_ma(pmap, va, pap)
|
|
struct pmap *pmap;
|
|
vaddr_t va;
|
|
paddr_t *pap;
|
|
{
|
|
pt_entry_t *ptes, pte;
|
|
pd_entry_t pde;
|
|
|
|
if (__predict_true((pde = PDE_GET(&pmap->pm_pdir[pdei(va)])) != 0)) {
|
|
#ifdef LARGEPAGES
|
|
if (pde & PG_PS) {
|
|
if (pap != NULL)
|
|
*pap = (pde & PG_LGFRAME) | (va & ~PG_LGFRAME);
|
|
return (true);
|
|
}
|
|
#endif
|
|
|
|
ptes = pmap_map_ptes(pmap);
|
|
pte = PTE_GET_MA(&ptes[x86_btop(va)]);
|
|
pmap_unmap_ptes(pmap);
|
|
|
|
if (__predict_true((pte & PG_V) != 0)) {
|
|
if (pap != NULL)
|
|
*pap = (pte & PG_FRAME) | (va & ~PG_FRAME);
|
|
return (true);
|
|
}
|
|
}
|
|
return (false);
|
|
}
|
|
|
|
|
|
/*
|
|
* vtophys: virtual address to physical address. For use by
|
|
* machine-dependent code only.
|
|
*/
|
|
|
|
paddr_t
|
|
vtophys(va)
|
|
vaddr_t va;
|
|
{
|
|
paddr_t pa;
|
|
|
|
if (pmap_extract(pmap_kernel(), va, &pa) == true)
|
|
return (pa);
|
|
return (0);
|
|
}
|
|
|
|
|
|
/*
|
|
* pmap_virtual_space: used during bootup [pmap_steal_memory] to
|
|
* determine the bounds of the kernel virtual addess space.
|
|
*/
|
|
|
|
void
|
|
pmap_virtual_space(startp, endp)
|
|
vaddr_t *startp;
|
|
vaddr_t *endp;
|
|
{
|
|
*startp = virtual_avail;
|
|
*endp = virtual_end;
|
|
}
|
|
|
|
/*
|
|
* pmap_map: map a range of PAs into kvm
|
|
*
|
|
* => used during crash dump
|
|
* => XXX: pmap_map() should be phased out?
|
|
*/
|
|
|
|
vaddr_t
|
|
pmap_map(va, spa, epa, prot)
|
|
vaddr_t va;
|
|
paddr_t spa, epa;
|
|
vm_prot_t prot;
|
|
{
|
|
while (spa < epa) {
|
|
pmap_enter(pmap_kernel(), va, spa, prot, 0);
|
|
va += PAGE_SIZE;
|
|
spa += PAGE_SIZE;
|
|
}
|
|
pmap_update(pmap_kernel());
|
|
return va;
|
|
}
|
|
|
|
/*
|
|
* pmap_zero_page: zero a page
|
|
*/
|
|
|
|
void
|
|
pmap_zero_page(pa)
|
|
paddr_t pa;
|
|
{
|
|
#ifdef MULTIPROCESSOR
|
|
int id = cpu_number();
|
|
#endif
|
|
pt_entry_t *zpte = PTESLEW(zero_pte, id);
|
|
pt_entry_t *maptp;
|
|
void *zerova = VASLEW(zerop, id);
|
|
|
|
#ifdef DIAGNOSTIC
|
|
if (PTE_GET(zpte))
|
|
panic("pmap_zero_page: lock botch");
|
|
#endif
|
|
|
|
maptp = (pt_entry_t *)vtomach((vaddr_t)zpte);
|
|
PTE_SET(zpte, maptp,
|
|
(pa & PG_FRAME) | PG_V | PG_RW | PG_M | PG_U);/* map in */
|
|
pmap_update_pg((vaddr_t)zerova); /* flush TLB */
|
|
|
|
memset(zerova, 0, PAGE_SIZE); /* zero */
|
|
PTE_CLEAR(zpte, maptp); /* zap! */
|
|
}
|
|
|
|
/*
|
|
* pmap_pagezeroidle: the same, for the idle loop page zero'er.
|
|
* Returns true if the page was zero'd, false if we aborted for
|
|
* some reason.
|
|
*/
|
|
|
|
bool
|
|
pmap_pageidlezero(pa)
|
|
paddr_t pa;
|
|
{
|
|
#ifdef MULTIPROCESSOR
|
|
int id = cpu_number();
|
|
#endif
|
|
pt_entry_t *zpte = PTESLEW(zero_pte, id);
|
|
pt_entry_t *maptp;
|
|
void *zerova = VASLEW(zerop, id);
|
|
bool rv = true;
|
|
int *ptr;
|
|
int *ep;
|
|
#if defined(I686_CPU)
|
|
const u_int32_t cpu_features = curcpu()->ci_feature_flags;
|
|
#endif /* defined(I686_CPU) */
|
|
|
|
#ifdef DIAGNOSTIC
|
|
if (PTE_GET(zpte))
|
|
panic("pmap_pageidlezero: lock botch");
|
|
#endif
|
|
maptp = (pt_entry_t *)vtomach((vaddr_t)zpte);
|
|
PTE_SET(zpte, maptp,
|
|
(pa & PG_FRAME) | PG_V | PG_RW | PG_M | PG_U); /* map in */
|
|
pmap_update_pg((vaddr_t)zerova); /* flush TLB */
|
|
for (ptr = (int *) zerova, ep = ptr + PAGE_SIZE / sizeof(int);
|
|
ptr < ep; ptr++) {
|
|
if (sched_curcpu_runnable_p()) {
|
|
|
|
/*
|
|
* A process has become ready. Abort now,
|
|
* so we don't keep it waiting while we
|
|
* do slow memory access to finish this
|
|
* page.
|
|
*/
|
|
|
|
rv = false;
|
|
break;
|
|
}
|
|
#if defined(I686_CPU)
|
|
if (cpu_features & CPUID_SSE2)
|
|
__asm volatile ("movnti %1, %0" :
|
|
"=m"(*ptr) : "r" (0));
|
|
else
|
|
#endif /* defined(I686_CPU) */
|
|
*ptr = 0;
|
|
}
|
|
|
|
#if defined(I686_CPU)
|
|
if (cpu_features & CPUID_SSE2)
|
|
__asm volatile ("sfence" ::: "memory");
|
|
#endif /* defined(I686_CPU) */
|
|
|
|
PTE_CLEAR(zpte, maptp); /* zap! */
|
|
return (rv);
|
|
}
|
|
|
|
/*
|
|
* pmap_copy_page: copy a page
|
|
*/
|
|
|
|
void
|
|
pmap_copy_page(srcpa, dstpa)
|
|
paddr_t srcpa, dstpa;
|
|
{
|
|
#ifdef MULTIPROCESSOR
|
|
int id = cpu_number();
|
|
#endif
|
|
pt_entry_t *spte = PTESLEW(csrc_pte,id), *maspte;
|
|
pt_entry_t *dpte = PTESLEW(cdst_pte,id), *madpte;
|
|
void *csrcva = VASLEW(csrcp, id);
|
|
void *cdstva = VASLEW(cdstp, id);
|
|
|
|
#ifdef DIAGNOSTIC
|
|
if (PTE_GET(spte) || PTE_GET(dpte))
|
|
panic("pmap_copy_page: lock botch");
|
|
#endif
|
|
|
|
maspte = (pt_entry_t *)vtomach((vaddr_t)spte);
|
|
madpte = (pt_entry_t *)vtomach((vaddr_t)dpte);
|
|
PTE_SET(spte, maspte, (srcpa & PG_FRAME) | PG_V | PG_RW | PG_U);
|
|
PTE_SET(dpte, madpte, (dstpa & PG_FRAME) | PG_V | PG_RW | PG_M | PG_U);
|
|
pmap_update_2pg((vaddr_t)csrcva, (vaddr_t)cdstva);
|
|
memcpy(cdstva, csrcva, PAGE_SIZE);
|
|
PTE_CLEAR(spte, maspte); /* zap! */
|
|
PTE_CLEAR(dpte, madpte); /* zap! */
|
|
}
|
|
|
|
/*
|
|
* p m a p r e m o v e f u n c t i o n s
|
|
*
|
|
* functions that remove mappings
|
|
*/
|
|
|
|
/*
|
|
* pmap_remove_ptes: remove PTEs from a PTP
|
|
*
|
|
* => must have proper locking on pmap_master_lock
|
|
* => caller must hold pmap's lock
|
|
* => PTP must be mapped into KVA
|
|
* => PTP should be null if pmap == pmap_kernel()
|
|
*/
|
|
|
|
static void
|
|
pmap_remove_ptes(pmap, ptp, ptpva, startva, endva, cpumaskp, flags)
|
|
struct pmap *pmap;
|
|
struct vm_page *ptp;
|
|
vaddr_t ptpva;
|
|
vaddr_t startva, endva;
|
|
int32_t *cpumaskp;
|
|
int flags;
|
|
{
|
|
struct pv_entry *pv_tofree = NULL; /* list of pv_entrys to free */
|
|
struct pv_entry *pve;
|
|
pt_entry_t *pte = (pt_entry_t *) ptpva;
|
|
pt_entry_t opte;
|
|
pt_entry_t *maptp;
|
|
|
|
/*
|
|
* note that ptpva points to the PTE that maps startva. this may
|
|
* or may not be the first PTE in the PTP.
|
|
*
|
|
* we loop through the PTP while there are still PTEs to look at
|
|
* and the wire_count is greater than 1 (because we use the wire_count
|
|
* to keep track of the number of real PTEs in the PTP).
|
|
*/
|
|
|
|
for (/*null*/; startva < endva && (ptp == NULL || ptp->wire_count > 1)
|
|
; pte++, startva += PAGE_SIZE) {
|
|
struct vm_page *pg;
|
|
struct vm_page_md *mdpg;
|
|
|
|
if (!pmap_valid_entry(*pte))
|
|
continue; /* VA not mapped */
|
|
if ((flags & PMAP_REMOVE_SKIPWIRED) && (*pte & PG_W)) {
|
|
continue;
|
|
}
|
|
|
|
/* atomically save the old PTE and zap! it */
|
|
maptp = (pt_entry_t *)vtomach((vaddr_t)pte);
|
|
opte = pte_atomic_update(pte, maptp, 0);
|
|
pmap_exec_account(pmap, startva, opte, 0);
|
|
|
|
if (opte & PG_W)
|
|
pmap->pm_stats.wired_count--;
|
|
pmap->pm_stats.resident_count--;
|
|
|
|
if (opte & PG_U)
|
|
pmap_tlb_shootdown(pmap, startva, opte, cpumaskp);
|
|
|
|
if (ptp) {
|
|
ptp->wire_count--; /* dropping a PTE */
|
|
/* Make sure that the PDE is flushed */
|
|
if ((ptp->wire_count <= 1) && !(opte & PG_U))
|
|
pmap_tlb_shootdown(pmap, startva, opte,
|
|
cpumaskp);
|
|
}
|
|
|
|
/*
|
|
* if we are not on a pv_head list we are done.
|
|
*/
|
|
|
|
if ((opte & PG_PVLIST) == 0) {
|
|
#if defined(DIAGNOSTIC) && !defined(DOM0OPS)
|
|
if (PHYS_TO_VM_PAGE(opte & PG_FRAME) != NULL)
|
|
panic("pmap_remove_ptes: managed page without "
|
|
"PG_PVLIST for 0x%lx", startva);
|
|
#endif
|
|
continue;
|
|
}
|
|
|
|
pg = PHYS_TO_VM_PAGE(opte & PG_FRAME);
|
|
#ifdef DIAGNOSTIC
|
|
if (pg == NULL)
|
|
panic("pmap_remove_ptes: unmanaged page marked "
|
|
"PG_PVLIST, va = 0x%lx, pa = 0x%lx",
|
|
startva, (u_long)(opte & PG_FRAME));
|
|
#endif
|
|
mdpg = &pg->mdpage;
|
|
|
|
/* sync R/M bits */
|
|
simple_lock(&mdpg->mp_pvhead.pvh_lock);
|
|
mdpg->mp_attrs |= (opte & (PG_U|PG_M));
|
|
pve = pmap_remove_pv(&mdpg->mp_pvhead, pmap, startva);
|
|
simple_unlock(&mdpg->mp_pvhead.pvh_lock);
|
|
|
|
if (pve) {
|
|
SPLAY_RIGHT(pve, pv_node) = pv_tofree;
|
|
pv_tofree = pve;
|
|
}
|
|
|
|
/* end of "for" loop: time for next pte */
|
|
}
|
|
if (pv_tofree)
|
|
pmap_free_pvs(pmap, pv_tofree);
|
|
}
|
|
|
|
|
|
/*
|
|
* pmap_remove_pte: remove a single PTE from a PTP
|
|
*
|
|
* => must have proper locking on pmap_master_lock
|
|
* => caller must hold pmap's lock
|
|
* => PTP must be mapped into KVA
|
|
* => PTP should be null if pmap == pmap_kernel()
|
|
* => returns true if we removed a mapping
|
|
*/
|
|
|
|
static bool
|
|
pmap_remove_pte(pmap, ptp, pte, va, cpumaskp, flags)
|
|
struct pmap *pmap;
|
|
struct vm_page *ptp;
|
|
pt_entry_t *pte;
|
|
vaddr_t va;
|
|
int32_t *cpumaskp;
|
|
int flags;
|
|
{
|
|
pt_entry_t opte;
|
|
pt_entry_t *maptp;
|
|
struct pv_entry *pve;
|
|
struct vm_page *pg;
|
|
struct vm_page_md *mdpg;
|
|
|
|
if (!pmap_valid_entry(*pte))
|
|
return(false); /* VA not mapped */
|
|
if ((flags & PMAP_REMOVE_SKIPWIRED) && (*pte & PG_W)) {
|
|
return(false);
|
|
}
|
|
|
|
/* atomically save the old PTE and zap! it */
|
|
maptp = (pt_entry_t *)vtomach((vaddr_t)pte);
|
|
opte = pte_atomic_update(pte, maptp, 0);
|
|
|
|
XENPRINTK(("pmap_remove_pte %p, was %08x\n", pte, opte));
|
|
pmap_exec_account(pmap, va, opte, 0);
|
|
|
|
if (opte & PG_W)
|
|
pmap->pm_stats.wired_count--;
|
|
pmap->pm_stats.resident_count--;
|
|
|
|
if (opte & PG_U)
|
|
pmap_tlb_shootdown(pmap, va, opte, cpumaskp);
|
|
|
|
if (ptp) {
|
|
ptp->wire_count--; /* dropping a PTE */
|
|
/* Make sure that the PDE is flushed */
|
|
if ((ptp->wire_count <= 1) && !(opte & PG_U))
|
|
pmap_tlb_shootdown(pmap, va, opte, cpumaskp);
|
|
|
|
}
|
|
/*
|
|
* if we are not on a pv_head list we are done.
|
|
*/
|
|
|
|
if ((opte & PG_PVLIST) == 0) {
|
|
#if defined(DIAGNOSTIC) && !defined(DOM0OPS)
|
|
if (PHYS_TO_VM_PAGE(opte & PG_FRAME) != NULL)
|
|
panic("pmap_remove_pte: managed page without "
|
|
"PG_PVLIST for 0x%lx", va);
|
|
#endif
|
|
return(true);
|
|
}
|
|
|
|
pg = PHYS_TO_VM_PAGE(opte & PG_FRAME);
|
|
#ifdef DIAGNOSTIC
|
|
if (pg == NULL)
|
|
panic("pmap_remove_pte: unmanaged page marked "
|
|
"PG_PVLIST, va = 0x%lx, pa = 0x%lx", va,
|
|
(u_long)(opte & PG_FRAME));
|
|
#endif
|
|
mdpg = &pg->mdpage;
|
|
|
|
/* sync R/M bits */
|
|
simple_lock(&mdpg->mp_pvhead.pvh_lock);
|
|
mdpg->mp_attrs |= (opte & (PG_U|PG_M));
|
|
pve = pmap_remove_pv(&mdpg->mp_pvhead, pmap, va);
|
|
simple_unlock(&mdpg->mp_pvhead.pvh_lock);
|
|
|
|
if (pve)
|
|
pmap_free_pv(pmap, pve);
|
|
return(true);
|
|
}
|
|
|
|
/*
|
|
* pmap_remove: top level mapping removal function
|
|
*
|
|
* => caller should not be holding any pmap locks
|
|
*/
|
|
|
|
void
|
|
pmap_remove(pmap, sva, eva)
|
|
struct pmap *pmap;
|
|
vaddr_t sva, eva;
|
|
{
|
|
pmap_do_remove(pmap, sva, eva, PMAP_REMOVE_ALL);
|
|
}
|
|
|
|
/*
|
|
* pmap_do_remove: mapping removal guts
|
|
*
|
|
* => caller should not be holding any pmap locks
|
|
*/
|
|
|
|
static void
|
|
pmap_do_remove(pmap, sva, eva, flags)
|
|
struct pmap *pmap;
|
|
vaddr_t sva, eva;
|
|
int flags;
|
|
{
|
|
pt_entry_t *ptes, opte;
|
|
pt_entry_t *maptp;
|
|
bool result;
|
|
paddr_t ptppa;
|
|
vaddr_t blkendva;
|
|
struct vm_page *ptp;
|
|
int32_t cpumask = 0;
|
|
TAILQ_HEAD(, vm_page) empty_ptps;
|
|
struct cpu_info *ci;
|
|
struct pmap *curpmap;
|
|
|
|
/*
|
|
* we lock in the pmap => pv_head direction
|
|
*/
|
|
|
|
TAILQ_INIT(&empty_ptps);
|
|
|
|
PMAP_MAP_TO_HEAD_LOCK();
|
|
|
|
ptes = pmap_map_ptes(pmap); /* locks pmap */
|
|
|
|
ci = curcpu();
|
|
curpmap = ci->ci_pmap;
|
|
|
|
/*
|
|
* removing one page? take shortcut function.
|
|
*/
|
|
|
|
if (sva + PAGE_SIZE == eva) {
|
|
if (pmap_valid_entry(pmap->pm_pdir[pdei(sva)])) {
|
|
|
|
/* PA of the PTP */
|
|
ptppa = PDE_GET(&pmap->pm_pdir[pdei(sva)]) & PG_FRAME;
|
|
|
|
/* get PTP if non-kernel mapping */
|
|
if (pmap == pmap_kernel()) {
|
|
/* we never free kernel PTPs */
|
|
ptp = NULL;
|
|
} else {
|
|
if (pmap->pm_ptphint &&
|
|
VM_PAGE_TO_PHYS(pmap->pm_ptphint) ==
|
|
ptppa) {
|
|
ptp = pmap->pm_ptphint;
|
|
} else {
|
|
ptp = PHYS_TO_VM_PAGE(ptppa);
|
|
#ifdef DIAGNOSTIC
|
|
if (ptp == NULL)
|
|
panic("pmap_remove: unmanaged "
|
|
"PTP detected");
|
|
#endif
|
|
}
|
|
}
|
|
|
|
/* do it! */
|
|
result = pmap_remove_pte(pmap, ptp,
|
|
&ptes[x86_btop(sva)], sva, &cpumask, flags);
|
|
|
|
/*
|
|
* if mapping removed and the PTP is no longer
|
|
* being used, free it!
|
|
*/
|
|
|
|
if (result && ptp && ptp->wire_count <= 1) {
|
|
/* zap! */
|
|
maptp = (pt_entry_t *)vtomach(
|
|
(vaddr_t)&pmap->pm_pdir[pdei(sva)]);
|
|
PTE_ATOMIC_CLEAR(&pmap->pm_pdir[pdei(sva)],
|
|
maptp, opte);
|
|
#if defined(MULTIPROCESSOR)
|
|
/*
|
|
* XXXthorpej Redundant shootdown can happen
|
|
* here if we're using APTE space.
|
|
*/
|
|
#endif
|
|
pmap_tlb_shootdown(curpmap,
|
|
((vaddr_t)ptes) + ptp->offset, opte,
|
|
&cpumask);
|
|
#if defined(MULTIPROCESSOR)
|
|
/*
|
|
* Always shoot down the pmap's self-mapping
|
|
* of the PTP.
|
|
* XXXthorpej Redundant shootdown can happen
|
|
* here if pmap == curpmap (not APTE space).
|
|
*/
|
|
pmap_tlb_shootdown(pmap,
|
|
((vaddr_t)PTE_BASE) + ptp->offset, opte,
|
|
&cpumask);
|
|
#endif
|
|
pmap->pm_stats.resident_count--;
|
|
if (pmap->pm_ptphint == ptp)
|
|
pmap->pm_ptphint =
|
|
TAILQ_FIRST(&pmap->pm_obj.memq);
|
|
ptp->wire_count = 0;
|
|
ptp->flags |= PG_ZERO;
|
|
uvm_pagerealloc(ptp, NULL, 0);
|
|
TAILQ_INSERT_TAIL(&empty_ptps, ptp, listq);
|
|
}
|
|
}
|
|
pmap_tlb_shootnow(cpumask);
|
|
pmap_unmap_ptes(pmap); /* unlock pmap */
|
|
PMAP_MAP_TO_HEAD_UNLOCK();
|
|
/* Now we can free unused ptps */
|
|
TAILQ_FOREACH(ptp, &empty_ptps, listq)
|
|
uvm_pagefree(ptp);
|
|
return;
|
|
}
|
|
|
|
cpumask = 0;
|
|
|
|
for (/* null */ ; sva < eva ; sva = blkendva) {
|
|
|
|
/* determine range of block */
|
|
blkendva = x86_round_pdr(sva+1);
|
|
if (blkendva > eva)
|
|
blkendva = eva;
|
|
|
|
/*
|
|
* XXXCDC: our PTE mappings should never be removed
|
|
* with pmap_remove! if we allow this (and why would
|
|
* we?) then we end up freeing the pmap's page
|
|
* directory page (PDP) before we are finished using
|
|
* it when we hit in in the recursive mapping. this
|
|
* is BAD.
|
|
*
|
|
* long term solution is to move the PTEs out of user
|
|
* address space. and into kernel address space (up
|
|
* with APTE). then we can set VM_MAXUSER_ADDRESS to
|
|
* be VM_MAX_ADDRESS.
|
|
*/
|
|
|
|
if (pdei(sva) == PDSLOT_PTE)
|
|
/* XXXCDC: ugly hack to avoid freeing PDP here */
|
|
continue;
|
|
|
|
if (!pmap_valid_entry(pmap->pm_pdir[pdei(sva)]))
|
|
/* valid block? */
|
|
continue;
|
|
|
|
/* PA of the PTP */
|
|
ptppa = (PDE_GET(&pmap->pm_pdir[pdei(sva)]) & PG_FRAME);
|
|
|
|
/* get PTP if non-kernel mapping */
|
|
if (pmap == pmap_kernel()) {
|
|
/* we never free kernel PTPs */
|
|
ptp = NULL;
|
|
} else {
|
|
if (pmap->pm_ptphint &&
|
|
VM_PAGE_TO_PHYS(pmap->pm_ptphint) == ptppa) {
|
|
ptp = pmap->pm_ptphint;
|
|
} else {
|
|
ptp = PHYS_TO_VM_PAGE(ptppa);
|
|
#ifdef DIAGNOSTIC
|
|
if (ptp == NULL)
|
|
panic("pmap_remove: unmanaged PTP "
|
|
"detected");
|
|
#endif
|
|
}
|
|
}
|
|
pmap_remove_ptes(pmap, ptp, (vaddr_t)&ptes[x86_btop(sva)],
|
|
sva, blkendva, &cpumask, flags);
|
|
|
|
/* if PTP is no longer being used, free it! */
|
|
if (ptp && ptp->wire_count <= 1) {
|
|
/* zap! */
|
|
maptp = (pt_entry_t *)vtomach(
|
|
(vaddr_t)&pmap->pm_pdir[pdei(sva)]);
|
|
PTE_ATOMIC_CLEAR(&pmap->pm_pdir[pdei(sva)],
|
|
maptp, opte);
|
|
#if defined(MULTIPROCESSOR)
|
|
/*
|
|
* XXXthorpej Redundant shootdown can happen here
|
|
* if we're using APTE space.
|
|
*/
|
|
#endif
|
|
pmap_tlb_shootdown(curpmap,
|
|
((vaddr_t)ptes) + ptp->offset, opte, &cpumask);
|
|
#if defined(MULTIPROCESSOR)
|
|
/*
|
|
* Always shoot down the pmap's self-mapping
|
|
* of the PTP.
|
|
* XXXthorpej Redundant shootdown can happen here
|
|
* if pmap == curpmap (not APTE space).
|
|
*/
|
|
pmap_tlb_shootdown(pmap,
|
|
((vaddr_t)PTE_BASE) + ptp->offset, opte, &cpumask);
|
|
#endif
|
|
pmap->pm_stats.resident_count--;
|
|
if (pmap->pm_ptphint == ptp) /* update hint? */
|
|
pmap->pm_ptphint = pmap->pm_obj.memq.tqh_first;
|
|
ptp->wire_count = 0;
|
|
ptp->flags |= PG_ZERO;
|
|
/* Postpone free to shootdown */
|
|
uvm_pagerealloc(ptp, NULL, 0);
|
|
TAILQ_INSERT_TAIL(&empty_ptps, ptp, listq);
|
|
}
|
|
}
|
|
|
|
pmap_tlb_shootnow(cpumask);
|
|
pmap_unmap_ptes(pmap);
|
|
PMAP_MAP_TO_HEAD_UNLOCK();
|
|
/* Now we can free unused ptps */
|
|
TAILQ_FOREACH(ptp, &empty_ptps, listq)
|
|
uvm_pagefree(ptp);
|
|
}
|
|
|
|
/*
|
|
* pmap_page_remove: remove a managed vm_page from all pmaps that map it
|
|
*
|
|
* => we set pv_head => pmap locking
|
|
* => R/M bits are sync'd back to attrs
|
|
*/
|
|
|
|
void
|
|
pmap_page_remove(pg)
|
|
struct vm_page *pg;
|
|
{
|
|
struct pv_head *pvh;
|
|
struct pv_entry *pve, *npve, *killlist = NULL;
|
|
pt_entry_t *ptes, opte;
|
|
pt_entry_t *maptp;
|
|
int32_t cpumask = 0;
|
|
TAILQ_HEAD(, vm_page) empty_ptps;
|
|
struct vm_page *ptp;
|
|
struct cpu_info *ci;
|
|
struct pmap *curpmap;
|
|
|
|
#ifdef DIAGNOSTIC
|
|
int bank, off;
|
|
|
|
bank = vm_physseg_find(atop(VM_PAGE_TO_PHYS(pg)), &off);
|
|
if (bank == -1)
|
|
panic("pmap_page_remove: unmanaged page?");
|
|
#endif
|
|
|
|
pvh = &pg->mdpage.mp_pvhead;
|
|
if (SPLAY_ROOT(&pvh->pvh_root) == NULL) {
|
|
return;
|
|
}
|
|
|
|
TAILQ_INIT(&empty_ptps);
|
|
|
|
/* set pv_head => pmap locking */
|
|
PMAP_HEAD_TO_MAP_LOCK();
|
|
|
|
ci = curcpu();
|
|
curpmap = ci->ci_pmap;
|
|
|
|
/* XXX: needed if we hold head->map lock? */
|
|
simple_lock(&pvh->pvh_lock);
|
|
|
|
for (pve = SPLAY_MIN(pvtree, &pvh->pvh_root); pve != NULL; pve = npve) {
|
|
npve = SPLAY_NEXT(pvtree, &pvh->pvh_root, pve);
|
|
ptes = pmap_map_ptes(pve->pv_pmap); /* locks pmap */
|
|
|
|
#ifdef DIAGNOSTIC
|
|
if (pve->pv_ptp &&
|
|
(PDE_GET(&pve->pv_pmap->pm_pdir[pdei(pve->pv_va)]) &
|
|
PG_FRAME) != VM_PAGE_TO_PHYS(pve->pv_ptp)) {
|
|
printf("pmap_page_remove: pg=%p: va=%lx, pv_ptp=%p\n",
|
|
pg, pve->pv_va, pve->pv_ptp);
|
|
printf("pmap_page_remove: PTP's phys addr: "
|
|
"actual=%lx, recorded=%lx\n",
|
|
(PDE_GET(&pve->pv_pmap->pm_pdir[pdei(pve->pv_va)])
|
|
& PG_FRAME), VM_PAGE_TO_PHYS(pve->pv_ptp));
|
|
panic("pmap_page_remove: mapped managed page has "
|
|
"invalid pv_ptp field");
|
|
}
|
|
#endif
|
|
|
|
/* atomically save the old PTE and zap! it */
|
|
maptp = (pt_entry_t *)vtomach(
|
|
(vaddr_t)&ptes[x86_btop(pve->pv_va)]);
|
|
opte = pte_atomic_update(&ptes[x86_btop(pve->pv_va)],
|
|
maptp, 0);
|
|
|
|
if (opte & PG_W)
|
|
pve->pv_pmap->pm_stats.wired_count--;
|
|
pve->pv_pmap->pm_stats.resident_count--;
|
|
|
|
/* Shootdown only if referenced */
|
|
if (opte & PG_U)
|
|
pmap_tlb_shootdown(pve->pv_pmap, pve->pv_va, opte,
|
|
&cpumask);
|
|
|
|
/* sync R/M bits */
|
|
pg->mdpage.mp_attrs |= (opte & (PG_U|PG_M));
|
|
|
|
/* update the PTP reference count. free if last reference. */
|
|
if (pve->pv_ptp) {
|
|
pve->pv_ptp->wire_count--;
|
|
if (pve->pv_ptp->wire_count <= 1) {
|
|
/*
|
|
* Do we have to shootdown the page just to
|
|
* get the pte out of the TLB ?
|
|
*/
|
|
if(!(opte & PG_U))
|
|
pmap_tlb_shootdown(pve->pv_pmap,
|
|
pve->pv_va, opte, &cpumask);
|
|
|
|
/* zap! */
|
|
maptp = (pt_entry_t *)vtomach((vaddr_t)
|
|
&pve->pv_pmap->pm_pdir[pdei(pve->pv_va)]);
|
|
PTE_ATOMIC_CLEAR(&pve->pv_pmap->pm_pdir
|
|
[pdei(pve->pv_va)], maptp, opte);
|
|
pmap_tlb_shootdown(curpmap,
|
|
((vaddr_t)ptes) + pve->pv_ptp->offset,
|
|
opte, &cpumask);
|
|
#if defined(MULTIPROCESSOR)
|
|
/*
|
|
* Always shoot down the other pmap's
|
|
* self-mapping of the PTP.
|
|
*/
|
|
pmap_tlb_shootdown(pve->pv_pmap,
|
|
((vaddr_t)PTE_BASE) + pve->pv_ptp->offset,
|
|
opte, &cpumask);
|
|
#endif
|
|
pve->pv_pmap->pm_stats.resident_count--;
|
|
/* update hint? */
|
|
if (pve->pv_pmap->pm_ptphint == pve->pv_ptp)
|
|
pve->pv_pmap->pm_ptphint =
|
|
pve->pv_pmap->pm_obj.memq.tqh_first;
|
|
pve->pv_ptp->wire_count = 0;
|
|
pve->pv_ptp->flags |= PG_ZERO;
|
|
/* Free only after the shootdown */
|
|
uvm_pagerealloc(pve->pv_ptp, NULL, 0);
|
|
TAILQ_INSERT_TAIL(&empty_ptps, pve->pv_ptp,
|
|
listq);
|
|
}
|
|
}
|
|
pmap_unmap_ptes(pve->pv_pmap); /* unlocks pmap */
|
|
SPLAY_REMOVE(pvtree, &pvh->pvh_root, pve); /* remove it */
|
|
SPLAY_RIGHT(pve, pv_node) = killlist; /* mark it for death */
|
|
killlist = pve;
|
|
}
|
|
pmap_free_pvs(NULL, killlist);
|
|
simple_unlock(&pvh->pvh_lock);
|
|
PMAP_HEAD_TO_MAP_UNLOCK();
|
|
pmap_tlb_shootnow(cpumask);
|
|
|
|
/* Now we can free unused ptps */
|
|
TAILQ_FOREACH(ptp, &empty_ptps, listq)
|
|
uvm_pagefree(ptp);
|
|
}
|
|
|
|
/*
|
|
* p m a p a t t r i b u t e f u n c t i o n s
|
|
* functions that test/change managed page's attributes
|
|
* since a page can be mapped multiple times we must check each PTE that
|
|
* maps it by going down the pv lists.
|
|
*/
|
|
|
|
/*
|
|
* pmap_test_attrs: test a page's attributes
|
|
*
|
|
* => we set pv_head => pmap locking
|
|
*/
|
|
|
|
bool
|
|
pmap_test_attrs(pg, testbits)
|
|
struct vm_page *pg;
|
|
int testbits;
|
|
{
|
|
struct vm_page_md *mdpg;
|
|
int *myattrs;
|
|
struct pv_head *pvh;
|
|
struct pv_entry *pve;
|
|
volatile pt_entry_t *ptes;
|
|
pt_entry_t pte;
|
|
|
|
#if DIAGNOSTIC
|
|
int bank, off;
|
|
|
|
bank = vm_physseg_find(atop(VM_PAGE_TO_PHYS(pg)), &off);
|
|
if (bank == -1)
|
|
panic("pmap_test_attrs: unmanaged page?");
|
|
#endif
|
|
mdpg = &pg->mdpage;
|
|
|
|
/*
|
|
* before locking: see if attributes are already set and if so,
|
|
* return!
|
|
*/
|
|
|
|
myattrs = &mdpg->mp_attrs;
|
|
if (*myattrs & testbits)
|
|
return(true);
|
|
|
|
/* test to see if there is a list before bothering to lock */
|
|
pvh = &mdpg->mp_pvhead;
|
|
if (SPLAY_ROOT(&pvh->pvh_root) == NULL) {
|
|
return(false);
|
|
}
|
|
|
|
/* nope, gonna have to do it the hard way */
|
|
PMAP_HEAD_TO_MAP_LOCK();
|
|
/* XXX: needed if we hold head->map lock? */
|
|
simple_lock(&pvh->pvh_lock);
|
|
|
|
for (pve = SPLAY_MIN(pvtree, &pvh->pvh_root);
|
|
pve != NULL && (*myattrs & testbits) == 0;
|
|
pve = SPLAY_NEXT(pvtree, &pvh->pvh_root, pve)) {
|
|
ptes = pmap_map_ptes(pve->pv_pmap);
|
|
pte = PTE_GET(&ptes[x86_btop(pve->pv_va)]); /* XXX flags only? */
|
|
pmap_unmap_ptes(pve->pv_pmap);
|
|
*myattrs |= pte;
|
|
}
|
|
|
|
/*
|
|
* note that we will exit the for loop with a non-null pve if
|
|
* we have found the bits we are testing for.
|
|
*/
|
|
|
|
simple_unlock(&pvh->pvh_lock);
|
|
PMAP_HEAD_TO_MAP_UNLOCK();
|
|
return((*myattrs & testbits) != 0);
|
|
}
|
|
|
|
/*
|
|
* pmap_clear_attrs: clear the specified attribute for a page.
|
|
*
|
|
* => we set pv_head => pmap locking
|
|
* => we return true if we cleared one of the bits we were asked to
|
|
*/
|
|
|
|
bool
|
|
pmap_clear_attrs(pg, clearbits)
|
|
struct vm_page *pg;
|
|
int clearbits;
|
|
{
|
|
struct vm_page_md *mdpg;
|
|
u_int32_t result;
|
|
struct pv_head *pvh;
|
|
struct pv_entry *pve;
|
|
pt_entry_t *ptes, opte;
|
|
pt_entry_t *maptp;
|
|
int *myattrs;
|
|
int32_t cpumask = 0;
|
|
|
|
#ifdef DIAGNOSTIC
|
|
int bank, off;
|
|
|
|
bank = vm_physseg_find(atop(VM_PAGE_TO_PHYS(pg)), &off);
|
|
if (bank == -1)
|
|
panic("pmap_change_attrs: unmanaged page?");
|
|
#endif
|
|
mdpg = &pg->mdpage;
|
|
|
|
PMAP_HEAD_TO_MAP_LOCK();
|
|
pvh = &mdpg->mp_pvhead;
|
|
/* XXX: needed if we hold head->map lock? */
|
|
simple_lock(&pvh->pvh_lock);
|
|
|
|
myattrs = &mdpg->mp_attrs;
|
|
result = *myattrs & clearbits;
|
|
*myattrs &= ~clearbits;
|
|
|
|
SPLAY_FOREACH(pve, pvtree, &pvh->pvh_root) {
|
|
#ifdef DIAGNOSTIC
|
|
if (!pmap_valid_entry(pve->pv_pmap->pm_pdir[pdei(pve->pv_va)]))
|
|
panic("pmap_change_attrs: mapping without PTP "
|
|
"detected");
|
|
#endif
|
|
|
|
ptes = pmap_map_ptes(pve->pv_pmap); /* locks pmap */
|
|
opte = PTE_GET(&ptes[x86_btop(pve->pv_va)]);
|
|
if (opte & clearbits) {
|
|
/* We need to do something */
|
|
if (clearbits == PG_RW) {
|
|
result |= PG_RW;
|
|
|
|
/*
|
|
* On write protect we might not need to flush
|
|
* the TLB
|
|
*/
|
|
|
|
/* First zap the RW bit! */
|
|
maptp = (pt_entry_t *)vtomach(
|
|
(vaddr_t)&ptes[x86_btop(pve->pv_va)]);
|
|
PTE_ATOMIC_CLEARBITS(
|
|
&ptes[x86_btop(pve->pv_va)],
|
|
maptp, PG_RW);
|
|
opte = PTE_GET(&ptes[x86_btop(pve->pv_va)]);
|
|
|
|
/*
|
|
* Then test if it is not cached as RW the TLB
|
|
*/
|
|
if (!(opte & PG_M))
|
|
goto no_tlb_shootdown;
|
|
}
|
|
|
|
/*
|
|
* Since we need a shootdown me might as well
|
|
* always clear PG_U AND PG_M.
|
|
*/
|
|
|
|
/* zap! */
|
|
maptp = (pt_entry_t *)vtomach(
|
|
(vaddr_t)&ptes[x86_btop(pve->pv_va)]);
|
|
PTE_ATOMIC_SET(&ptes[x86_btop(pve->pv_va)], maptp,
|
|
(opte & ~(PG_U | PG_M)), opte);
|
|
|
|
result |= (opte & clearbits);
|
|
*myattrs |= (opte & ~(clearbits));
|
|
|
|
pmap_tlb_shootdown(pve->pv_pmap, pve->pv_va, opte,
|
|
&cpumask);
|
|
}
|
|
no_tlb_shootdown:
|
|
pmap_unmap_ptes(pve->pv_pmap); /* unlocks pmap */
|
|
}
|
|
|
|
simple_unlock(&pvh->pvh_lock);
|
|
PMAP_HEAD_TO_MAP_UNLOCK();
|
|
|
|
pmap_tlb_shootnow(cpumask);
|
|
return(result != 0);
|
|
}
|
|
|
|
|
|
/*
|
|
* p m a p p r o t e c t i o n f u n c t i o n s
|
|
*/
|
|
|
|
/*
|
|
* pmap_page_protect: change the protection of all recorded mappings
|
|
* of a managed page
|
|
*
|
|
* => NOTE: this is an inline function in pmap.h
|
|
*/
|
|
|
|
/* see pmap.h */
|
|
|
|
/*
|
|
* pmap_protect: set the protection in of the pages in a pmap
|
|
*
|
|
* => NOTE: this is an inline function in pmap.h
|
|
*/
|
|
|
|
/* see pmap.h */
|
|
|
|
/*
|
|
* pmap_write_protect: write-protect pages in a pmap
|
|
*/
|
|
|
|
void
|
|
pmap_write_protect(pmap, sva, eva, prot)
|
|
struct pmap *pmap;
|
|
vaddr_t sva, eva;
|
|
vm_prot_t prot;
|
|
{
|
|
pt_entry_t *ptes, *epte;
|
|
pt_entry_t *maptp;
|
|
#ifndef XEN
|
|
volatile
|
|
#endif
|
|
pt_entry_t *spte;
|
|
vaddr_t blockend;
|
|
int32_t cpumask = 0;
|
|
|
|
ptes = pmap_map_ptes(pmap); /* locks pmap */
|
|
|
|
/* should be ok, but just in case ... */
|
|
sva &= PG_FRAME;
|
|
eva &= PG_FRAME;
|
|
|
|
for (/* null */ ; sva < eva ; sva = blockend) {
|
|
|
|
blockend = (sva & PD_MASK) + NBPD;
|
|
if (blockend > eva)
|
|
blockend = eva;
|
|
|
|
/*
|
|
* XXXCDC: our PTE mappings should never be write-protected!
|
|
*
|
|
* long term solution is to move the PTEs out of user
|
|
* address space. and into kernel address space (up
|
|
* with APTE). then we can set VM_MAXUSER_ADDRESS to
|
|
* be VM_MAX_ADDRESS.
|
|
*/
|
|
|
|
/* XXXCDC: ugly hack to avoid freeing PDP here */
|
|
if (pdei(sva) == PDSLOT_PTE)
|
|
continue;
|
|
|
|
/* empty block? */
|
|
if (!pmap_valid_entry(pmap->pm_pdir[pdei(sva)]))
|
|
continue;
|
|
|
|
#ifdef DIAGNOSTIC
|
|
if (sva >= VM_MAXUSER_ADDRESS &&
|
|
sva < VM_MAX_ADDRESS)
|
|
panic("pmap_write_protect: PTE space");
|
|
#endif
|
|
|
|
spte = &ptes[x86_btop(sva)];
|
|
epte = &ptes[x86_btop(blockend)];
|
|
|
|
for (/*null */; spte < epte ; spte++) {
|
|
if ((PTE_GET(spte) & (PG_RW|PG_V)) == (PG_RW|PG_V)) {
|
|
maptp = (pt_entry_t *)vtomach((vaddr_t)spte);
|
|
PTE_ATOMIC_CLEARBITS(spte, maptp, PG_RW);
|
|
if (PTE_GET(spte) & PG_M)
|
|
pmap_tlb_shootdown(pmap,
|
|
x86_ptob(spte - ptes),
|
|
PTE_GET(spte), &cpumask);
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* if we kept a removal record and removed some pages update the TLB
|
|
*/
|
|
|
|
pmap_tlb_shootnow(cpumask);
|
|
pmap_unmap_ptes(pmap); /* unlocks pmap */
|
|
}
|
|
|
|
/*
|
|
* end of protection functions
|
|
*/
|
|
|
|
/*
|
|
* pmap_unwire: clear the wired bit in the PTE
|
|
*
|
|
* => mapping should already be in map
|
|
*/
|
|
|
|
void
|
|
pmap_unwire(pmap, va)
|
|
struct pmap *pmap;
|
|
vaddr_t va;
|
|
{
|
|
pt_entry_t *ptes;
|
|
pt_entry_t *maptp;
|
|
|
|
if (pmap_valid_entry(pmap->pm_pdir[pdei(va)])) {
|
|
ptes = pmap_map_ptes(pmap); /* locks pmap */
|
|
|
|
#ifdef DIAGNOSTIC
|
|
if (!pmap_valid_entry(ptes[x86_btop(va)]))
|
|
panic("pmap_unwire: invalid (unmapped) va 0x%lx", va);
|
|
#endif
|
|
if ((ptes[x86_btop(va)] & PG_W) != 0) {
|
|
maptp = (pt_entry_t *)vtomach(
|
|
(vaddr_t)&ptes[x86_btop(va)]);
|
|
PTE_ATOMIC_CLEARBITS(&ptes[x86_btop(va)], maptp, PG_W);
|
|
pmap->pm_stats.wired_count--;
|
|
}
|
|
#ifdef DIAGNOSTIC
|
|
else {
|
|
printf("pmap_unwire: wiring for pmap %p va 0x%lx "
|
|
"didn't change!\n", pmap, va);
|
|
}
|
|
#endif
|
|
pmap_unmap_ptes(pmap); /* unlocks map */
|
|
}
|
|
#ifdef DIAGNOSTIC
|
|
else {
|
|
panic("pmap_unwire: invalid PDE");
|
|
}
|
|
#endif
|
|
}
|
|
|
|
/*
|
|
* pmap_collect: free resources held by a pmap
|
|
*
|
|
* => optional function.
|
|
* => called when a process is swapped out to free memory.
|
|
*/
|
|
|
|
void
|
|
pmap_collect(pmap)
|
|
struct pmap *pmap;
|
|
{
|
|
/*
|
|
* free all of the pt pages by removing the physical mappings
|
|
* for its entire address space.
|
|
*/
|
|
|
|
pmap_do_remove(pmap, VM_MIN_ADDRESS, VM_MAX_ADDRESS,
|
|
PMAP_REMOVE_SKIPWIRED);
|
|
}
|
|
|
|
/*
|
|
* pmap_copy: copy mappings from one pmap to another
|
|
*
|
|
* => optional function
|
|
* void pmap_copy(dst_pmap, src_pmap, dst_addr, len, src_addr)
|
|
*/
|
|
|
|
/*
|
|
* defined as macro in pmap.h
|
|
*/
|
|
|
|
int
|
|
pmap_enter_ma(struct pmap *pmap, vaddr_t va, paddr_t ma, paddr_t pa,
|
|
vm_prot_t prot, int flags, int domid)
|
|
{
|
|
pt_entry_t *ptes, opte, npte;
|
|
struct vm_page *ptp, *pg;
|
|
struct vm_page_md *mdpg;
|
|
struct pv_head *old_pvh, *new_pvh;
|
|
struct pv_entry *pve = NULL; /* XXX gcc */
|
|
int error;
|
|
bool wired = (flags & PMAP_WIRED) != 0;
|
|
int resid_delta = 0;
|
|
int wired_delta = 0;
|
|
|
|
XENPRINTK(("%s(%p, %p, %p, %08x, %08x)\n",
|
|
__func__, pmap, (void *)va, (void *)pa, prot, flags));
|
|
|
|
KASSERT(domid == DOMID_SELF || pa == 0);
|
|
KASSERT(pmap_initialized);
|
|
|
|
#ifdef DIAGNOSTIC
|
|
/* sanity check: totally out of range? */
|
|
if (va >= VM_MAX_KERNEL_ADDRESS)
|
|
panic("%s: too big", __func__);
|
|
|
|
if (va == (vaddr_t) PDP_BASE || va == (vaddr_t) APDP_BASE)
|
|
panic("%s: trying to map over PDP/APDP!", __func__);
|
|
|
|
/* sanity check: kernel PTPs should already have been pre-allocated */
|
|
if (va >= VM_MIN_KERNEL_ADDRESS &&
|
|
!pmap_valid_entry(pmap->pm_pdir[pdei(va)]))
|
|
panic("%s: missing kernel PTP!", __func__);
|
|
#endif
|
|
|
|
npte = protection_codes[prot] | PG_V | ma;
|
|
if (wired)
|
|
npte |= PG_W;
|
|
if (va < VM_MAXUSER_ADDRESS)
|
|
npte |= PG_u;
|
|
else if (va < VM_MAX_ADDRESS)
|
|
npte |= (PG_u | PG_RW); /* XXXCDC: no longer needed? */
|
|
if (pmap == pmap_kernel())
|
|
npte |= pmap_pg_g;
|
|
if (flags & VM_PROT_ALL) {
|
|
npte |= PG_U;
|
|
if (flags & VM_PROT_WRITE)
|
|
npte |= PG_M;
|
|
}
|
|
|
|
/* get lock */
|
|
PMAP_MAP_TO_HEAD_LOCK();
|
|
|
|
ptes = pmap_map_ptes(pmap); /* locks pmap */
|
|
if (pmap == pmap_kernel()) {
|
|
ptp = NULL;
|
|
} else {
|
|
ptp = pmap_get_ptp(pmap, pdei(va));
|
|
if (ptp == NULL) {
|
|
if (flags & PMAP_CANFAIL) {
|
|
error = ENOMEM;
|
|
goto out;
|
|
}
|
|
panic("%s: get ptp failed", __func__);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Get first view on old PTE
|
|
* on SMP the PTE might gain PG_U and PG_M flags
|
|
* before we zap it later
|
|
*/
|
|
opte = pte_get_ma(&ptes[x86_btop(va)]); /* old PTE */
|
|
XENPRINTK(("npte %p opte %p ptes %p idx %03x\n",
|
|
(void *)npte, (void *)opte, ptes, x86_btop(va)));
|
|
|
|
/*
|
|
* is there currently a valid mapping at our VA and does it
|
|
* map to the same MA as the one we want to map ?
|
|
*/
|
|
|
|
if (pmap_valid_entry(opte) && ((opte & PG_FRAME) == ma)) {
|
|
|
|
/*
|
|
* first, calculate pm_stats updates. resident count will not
|
|
* change since we are replacing/changing a valid mapping.
|
|
* wired count might change...
|
|
*/
|
|
wired_delta = ((npte & PG_W) ? 1 : 0) - ((opte & PG_W) ? 1 : 0);
|
|
|
|
npte |= (opte & PG_PVLIST);
|
|
|
|
XENPRINTK(("pmap update opte == pa"));
|
|
/* zap! */
|
|
error = pte_atomic_update_ma_domid(&ptes[x86_btop(va)], npte,
|
|
&opte, domid);
|
|
if (error) {
|
|
goto out;
|
|
}
|
|
|
|
/*
|
|
* Might be cached in the TLB as being writable
|
|
* if this is on the PVLIST, sync R/M bit
|
|
*/
|
|
if (opte & PG_PVLIST) {
|
|
KASSERT(domid == DOMID_SELF);
|
|
pg = PHYS_TO_VM_PAGE(pa);
|
|
#ifdef DIAGNOSTIC
|
|
if (pg == NULL)
|
|
panic("pmap_enter: same pa PG_PVLIST "
|
|
"mapping with unmanaged page "
|
|
"pa = 0x%lx (0x%lx)", pa,
|
|
atop(pa));
|
|
#endif
|
|
mdpg = &pg->mdpage;
|
|
old_pvh = &mdpg->mp_pvhead;
|
|
simple_lock(&old_pvh->pvh_lock);
|
|
mdpg->mp_attrs |= opte;
|
|
simple_unlock(&old_pvh->pvh_lock);
|
|
}
|
|
goto shootdown_now;
|
|
}
|
|
|
|
if (domid == DOMID_SELF) {
|
|
pg = PHYS_TO_VM_PAGE(pa);
|
|
} else {
|
|
pg = NULL;
|
|
}
|
|
XENPRINTK(("pg %p from %p, init %d\n", pg, (void *)pa,
|
|
pmap_initialized));
|
|
if (pg != NULL) {
|
|
/* This is a managed page */
|
|
npte |= PG_PVLIST;
|
|
mdpg = &pg->mdpage;
|
|
new_pvh = &mdpg->mp_pvhead;
|
|
if ((opte & (PG_PVLIST | PG_V)) != (PG_PVLIST | PG_V)) {
|
|
/* We can not steal a pve - allocate one */
|
|
pve = pmap_alloc_pv(pmap, ALLOCPV_NEED);
|
|
if (pve == NULL) {
|
|
if (!(flags & PMAP_CANFAIL))
|
|
panic("pmap_enter: "
|
|
"no pv entries available");
|
|
error = ENOMEM;
|
|
goto out;
|
|
}
|
|
}
|
|
} else {
|
|
new_pvh = NULL;
|
|
}
|
|
|
|
/*
|
|
* is there currently a valid mapping at our VA?
|
|
*/
|
|
|
|
if (pmap_valid_entry(opte)) {
|
|
|
|
/*
|
|
* changing MAs: we must remove the old one first
|
|
*/
|
|
|
|
/*
|
|
* first, calculate pm_stats updates. resident count will not
|
|
* change since we are replacing/changing a valid mapping.
|
|
* wired count might change...
|
|
*/
|
|
wired_delta = ((npte & PG_W) ? 1 : 0) - ((opte & PG_W) ? 1 : 0);
|
|
|
|
if (opte & PG_PVLIST) {
|
|
paddr_t opa = xpmap_mtop(opte & PG_FRAME);
|
|
pg = PHYS_TO_VM_PAGE(opa);
|
|
#ifdef DIAGNOSTIC
|
|
if (pg == NULL)
|
|
panic("%s: PG_PVLIST mapping with "
|
|
"unmanaged page pa = 0x%lx (0x%lx)",
|
|
__func__, pa, atop(pa));
|
|
#endif
|
|
mdpg = &pg->mdpage;
|
|
old_pvh = &mdpg->mp_pvhead;
|
|
|
|
/* new_pvh is NULL if page will not be managed */
|
|
pmap_lock_pvhs(old_pvh, new_pvh);
|
|
|
|
XENPRINTK(("pmap change pa"));
|
|
/* zap! */
|
|
error = pte_atomic_update_ma_domid(&ptes[x86_btop(va)],
|
|
npte, &opte, domid);
|
|
if (error) {
|
|
goto out;
|
|
}
|
|
|
|
pve = pmap_remove_pv(old_pvh, pmap, va);
|
|
KASSERT(pve != 0);
|
|
mdpg->mp_attrs |= opte;
|
|
|
|
if (new_pvh) {
|
|
pmap_enter_pv(new_pvh, pve, pmap, va, ptp);
|
|
simple_unlock(&new_pvh->pvh_lock);
|
|
} else
|
|
pmap_free_pv(pmap, pve);
|
|
simple_unlock(&old_pvh->pvh_lock);
|
|
|
|
goto shootdown_test;
|
|
}
|
|
} else { /* opte not valid */
|
|
resid_delta = 1;
|
|
if (wired)
|
|
wired_delta = 1;
|
|
}
|
|
|
|
if (new_pvh) {
|
|
simple_lock(&new_pvh->pvh_lock);
|
|
pmap_enter_pv(new_pvh, pve, pmap, va, ptp);
|
|
simple_unlock(&new_pvh->pvh_lock);
|
|
}
|
|
|
|
XENPRINTK(("pmap initial setup\n"));
|
|
/* zap! */
|
|
error = pte_atomic_update_ma_domid(&ptes[x86_btop(va)], npte,
|
|
&opte, domid);
|
|
if (error) {
|
|
goto out;
|
|
}
|
|
|
|
shootdown_test:
|
|
/* Update page attributes if needed */
|
|
if ((opte & (PG_V | PG_U)) == (PG_V | PG_U)) {
|
|
#if defined(MULTIPROCESSOR)
|
|
int32_t cpumask = 0;
|
|
#endif
|
|
shootdown_now:
|
|
#if defined(MULTIPROCESSOR)
|
|
pmap_tlb_shootdown(pmap, va, opte, &cpumask);
|
|
pmap_tlb_shootnow(cpumask);
|
|
#else
|
|
/* Don't bother deferring in the single CPU case. */
|
|
if (pmap_is_curpmap(pmap))
|
|
pmap_update_pg(va);
|
|
#endif
|
|
}
|
|
|
|
error = 0;
|
|
|
|
out:
|
|
if (error == 0) {
|
|
if (wired_delta) {
|
|
KASSERT(wired_delta == 1 || wired_delta == -1);
|
|
pmap->pm_stats.wired_count += wired_delta;
|
|
}
|
|
if (resid_delta) {
|
|
KASSERT(resid_delta == 1);
|
|
pmap->pm_stats.resident_count += resid_delta;
|
|
if (ptp) {
|
|
ptp->wire_count += resid_delta;
|
|
}
|
|
}
|
|
}
|
|
pmap_unmap_ptes(pmap);
|
|
PMAP_MAP_TO_HEAD_UNLOCK();
|
|
|
|
XENPRINTK(("%s: %d\n", __func__, error));
|
|
return error;
|
|
}
|
|
|
|
/*
|
|
* pmap_enter: enter a mapping into a pmap
|
|
*
|
|
* => must be done "now" ... no lazy-evaluation
|
|
* => we set pmap => pv_head locking
|
|
*/
|
|
|
|
int
|
|
pmap_enter(struct pmap *pmap, vaddr_t va, paddr_t pa, vm_prot_t prot, int flags)
|
|
{
|
|
paddr_t ma;
|
|
|
|
if (__predict_false(pa < pmap_pa_start || pmap_pa_end <= pa)) {
|
|
ma = pa; /* XXX hack */
|
|
} else {
|
|
ma = xpmap_ptom(pa);
|
|
}
|
|
|
|
return pmap_enter_ma(pmap, va, ma, pa, prot, flags, DOMID_SELF);
|
|
}
|
|
|
|
/*
|
|
* pmap_growkernel: increase usage of KVM space
|
|
*
|
|
* => we allocate new PTPs for the kernel and install them in all
|
|
* the pmaps on the system.
|
|
*/
|
|
|
|
vaddr_t
|
|
pmap_growkernel(maxkvaddr)
|
|
vaddr_t maxkvaddr;
|
|
{
|
|
struct pmap *kpm = pmap_kernel(), *pm;
|
|
pd_entry_t *mapdp;
|
|
pt_entry_t *maptp;
|
|
int needed_kpde; /* needed number of kernel PTPs */
|
|
int s;
|
|
paddr_t ptaddr;
|
|
|
|
needed_kpde = (u_int)(maxkvaddr - VM_MIN_KERNEL_ADDRESS + (NBPD-1))
|
|
/ NBPD;
|
|
XENPRINTF(("pmap_growkernel %p: %d -> %d\n", (void *)maxkvaddr,
|
|
nkpde, needed_kpde));
|
|
if (needed_kpde <= nkpde)
|
|
goto out; /* we are OK */
|
|
|
|
/*
|
|
* whoops! we need to add kernel PTPs
|
|
*/
|
|
|
|
s = splhigh(); /* to be safe */
|
|
simple_lock(&kpm->pm_obj.vmobjlock);
|
|
|
|
for (/*null*/ ; nkpde < needed_kpde ; nkpde++) {
|
|
|
|
mapdp = (pt_entry_t *)vtomach((vaddr_t)&kpm->pm_pdir[PDSLOT_KERN + nkpde]);
|
|
if (uvm.page_init_done == false) {
|
|
|
|
/*
|
|
* we're growing the kernel pmap early (from
|
|
* uvm_pageboot_alloc()). this case must be
|
|
* handled a little differently.
|
|
*/
|
|
|
|
if (uvm_page_physget(&ptaddr) == false)
|
|
panic("pmap_growkernel: out of memory");
|
|
pmap_zero_page(ptaddr);
|
|
|
|
XENPRINTF(("xxxx maybe not PG_RW\n"));
|
|
PDE_SET(&kpm->pm_pdir[PDSLOT_KERN + nkpde], mapdp, ptaddr | PG_RW | PG_V);
|
|
|
|
/* count PTP as resident */
|
|
kpm->pm_stats.resident_count++;
|
|
continue;
|
|
}
|
|
|
|
/*
|
|
* THIS *MUST* BE CODED SO AS TO WORK IN THE
|
|
* pmap_initialized == false CASE! WE MAY BE
|
|
* INVOKED WHILE pmap_init() IS RUNNING!
|
|
*/
|
|
|
|
if (pmap_alloc_ptp(kpm, PDSLOT_KERN + nkpde) == NULL) {
|
|
panic("pmap_growkernel: alloc ptp failed");
|
|
}
|
|
|
|
/* PG_u not for kernel */
|
|
PDE_CLEARBITS(&kpm->pm_pdir[PDSLOT_KERN + nkpde], mapdp, PG_u);
|
|
|
|
/* distribute new kernel PTP to all active pmaps */
|
|
simple_lock(&pmaps_lock);
|
|
for (pm = pmaps.lh_first; pm != NULL;
|
|
pm = pm->pm_list.le_next) {
|
|
XENPRINTF(("update\n"));
|
|
maptp = (pt_entry_t *)vtomach(
|
|
(vaddr_t)&pm->pm_pdir[PDSLOT_KERN + nkpde]);
|
|
PDE_COPY(&pm->pm_pdir[PDSLOT_KERN + nkpde], maptp,
|
|
&kpm->pm_pdir[PDSLOT_KERN + nkpde]);
|
|
}
|
|
|
|
/* Invalidate the PDP cache. */
|
|
pool_cache_invalidate(&pmap_pdp_cache);
|
|
pmap_pdp_cache_generation++;
|
|
|
|
simple_unlock(&pmaps_lock);
|
|
}
|
|
|
|
simple_unlock(&kpm->pm_obj.vmobjlock);
|
|
splx(s);
|
|
|
|
out:
|
|
XENPRINTF(("pmap_growkernel return %d %p\n", nkpde,
|
|
(void *)(VM_MIN_KERNEL_ADDRESS + (nkpde * NBPD))));
|
|
return (VM_MIN_KERNEL_ADDRESS + (nkpde * NBPD));
|
|
}
|
|
|
|
#ifdef DEBUG
|
|
void pmap_dump(struct pmap *, vaddr_t, vaddr_t);
|
|
|
|
/*
|
|
* pmap_dump: dump all the mappings from a pmap
|
|
*
|
|
* => caller should not be holding any pmap locks
|
|
*/
|
|
|
|
void
|
|
pmap_dump(pmap, sva, eva)
|
|
struct pmap *pmap;
|
|
vaddr_t sva, eva;
|
|
{
|
|
pt_entry_t *ptes, *pte;
|
|
vaddr_t blkendva;
|
|
|
|
/*
|
|
* if end is out of range truncate.
|
|
* if (end == start) update to max.
|
|
*/
|
|
|
|
if (eva > VM_MAXUSER_ADDRESS || eva <= sva)
|
|
eva = VM_MAXUSER_ADDRESS;
|
|
|
|
/*
|
|
* we lock in the pmap => pv_head direction
|
|
*/
|
|
|
|
PMAP_MAP_TO_HEAD_LOCK();
|
|
ptes = pmap_map_ptes(pmap); /* locks pmap */
|
|
|
|
/*
|
|
* dumping a range of pages: we dump in PTP sized blocks (4MB)
|
|
*/
|
|
|
|
for (/* null */ ; sva < eva ; sva = blkendva) {
|
|
|
|
/* determine range of block */
|
|
blkendva = x86_round_pdr(sva+1);
|
|
if (blkendva > eva)
|
|
blkendva = eva;
|
|
|
|
/* valid block? */
|
|
if (!pmap_valid_entry(pmap->pm_pdir[pdei(sva)]))
|
|
continue;
|
|
|
|
pte = &ptes[x86_btop(sva)];
|
|
for (/* null */; sva < blkendva ; sva += PAGE_SIZE, pte++) {
|
|
if (!pmap_valid_entry(*pte))
|
|
continue;
|
|
XENPRINTF(("va %#lx -> pa %#lx (pte=%#lx)\n",
|
|
sva, PTE_GET(pte), PTE_GET(pte) & PG_FRAME));
|
|
}
|
|
}
|
|
pmap_unmap_ptes(pmap);
|
|
PMAP_MAP_TO_HEAD_UNLOCK();
|
|
}
|
|
#endif
|
|
|
|
/******************** TLB shootdown code ********************/
|
|
|
|
|
|
void
|
|
pmap_tlb_shootnow(int32_t cpumask)
|
|
{
|
|
struct cpu_info *self;
|
|
#ifdef MULTIPROCESSOR
|
|
struct cpu_info *ci;
|
|
CPU_INFO_ITERATOR cii;
|
|
int s;
|
|
#ifdef DIAGNOSTIC
|
|
int count = 0;
|
|
#endif
|
|
#endif
|
|
|
|
if (cpumask == 0)
|
|
return;
|
|
|
|
self = curcpu();
|
|
#ifdef MULTIPROCESSOR
|
|
s = splipi();
|
|
self->ci_tlb_ipi_mask = cpumask;
|
|
#endif
|
|
|
|
pmap_do_tlb_shootdown(self); /* do *our* work. */
|
|
|
|
#ifdef MULTIPROCESSOR
|
|
splx(s);
|
|
|
|
/*
|
|
* Send the TLB IPI to other CPUs pending shootdowns.
|
|
*/
|
|
for (CPU_INFO_FOREACH(cii, ci)) {
|
|
if (ci == self)
|
|
continue;
|
|
if (cpumask & (1U << ci->ci_cpuid))
|
|
if (x86_send_ipi(ci, X86_IPI_TLB) != 0)
|
|
x86_atomic_clearbits_l(&self->ci_tlb_ipi_mask,
|
|
(1U << ci->ci_cpuid));
|
|
}
|
|
|
|
while (self->ci_tlb_ipi_mask != 0) {
|
|
#ifdef DIAGNOSTIC
|
|
if (count++ > 10000000)
|
|
panic("TLB IPI rendezvous failed (mask %x)",
|
|
self->ci_tlb_ipi_mask);
|
|
#endif
|
|
x86_pause();
|
|
}
|
|
#endif
|
|
}
|
|
|
|
/*
|
|
* pmap_tlb_shootdown:
|
|
*
|
|
* Cause the TLB entry for pmap/va to be shot down.
|
|
*/
|
|
void
|
|
pmap_tlb_shootdown(pmap, va, pte, cpumaskp)
|
|
pmap_t pmap;
|
|
vaddr_t va;
|
|
pt_entry_t pte;
|
|
int32_t *cpumaskp;
|
|
{
|
|
struct cpu_info *ci, *self;
|
|
struct pmap_tlb_shootdown_q *pq;
|
|
struct pmap_tlb_shootdown_job *pj;
|
|
CPU_INFO_ITERATOR cii;
|
|
int s;
|
|
|
|
#ifdef LARGEPAGES
|
|
if (pte & PG_PS)
|
|
va &= PG_LGFRAME;
|
|
#endif
|
|
|
|
if (pmap_initialized == false || cpus_attached == 0) {
|
|
pmap_update_pg(va);
|
|
return;
|
|
}
|
|
|
|
self = curcpu();
|
|
|
|
#if defined(MULTIPROCESSOR)
|
|
s = splipi();
|
|
#else /* defined(MULTIPROCESSOR) */
|
|
s = splvm();
|
|
#endif /* defined(MULTIPROCESSOR) */
|
|
#if 0
|
|
printf("dshootdown %lx\n", va);
|
|
#endif
|
|
|
|
for (CPU_INFO_FOREACH(cii, ci)) {
|
|
/* Note: we queue shootdown events for ourselves here! */
|
|
if (pmap_is_active(pmap, ci->ci_cpuid) == 0)
|
|
continue;
|
|
if (ci != self && !(ci->ci_flags & CPUF_RUNNING))
|
|
continue;
|
|
pq = &pmap_tlb_shootdown_q[ci->ci_cpuid];
|
|
__cpu_simple_lock(&pq->pq_slock);
|
|
|
|
/*
|
|
* If there's a global flush already queued, or a
|
|
* non-global flush, and this pte doesn't have the G
|
|
* bit set, don't bother.
|
|
*/
|
|
if (pq->pq_flushg > 0 ||
|
|
(pq->pq_flushu > 0 && (pte & pmap_pg_g) == 0)) {
|
|
__cpu_simple_unlock(&pq->pq_slock);
|
|
continue;
|
|
}
|
|
|
|
#ifdef I386_CPU
|
|
/*
|
|
* i386 CPUs can't invalidate a single VA, only
|
|
* flush the entire TLB, so don't bother allocating
|
|
* jobs for them -- just queue a `flushu'.
|
|
*
|
|
* XXX note that this can be executed for non-i386
|
|
* when called * early (before identifycpu() has set
|
|
* cpu_class)
|
|
*/
|
|
if (cpu_class == CPUCLASS_386) {
|
|
pq->pq_flushu++;
|
|
*cpumaskp |= 1U << ci->ci_cpuid;
|
|
__cpu_simple_unlock(&pq->pq_slock);
|
|
continue;
|
|
}
|
|
#endif
|
|
|
|
pj = pmap_tlb_shootdown_job_get(pq);
|
|
pq->pq_pte |= pte;
|
|
if (pj == NULL) {
|
|
/*
|
|
* Couldn't allocate a job entry.
|
|
* Kill it now for this CPU, unless the failure
|
|
* was due to too many pending flushes; otherwise,
|
|
* tell other cpus to kill everything..
|
|
*/
|
|
if (ci == self && pq->pq_count < PMAP_TLB_MAXJOBS) {
|
|
pmap_update_pg(va);
|
|
__cpu_simple_unlock(&pq->pq_slock);
|
|
continue;
|
|
} else {
|
|
if (pq->pq_pte & pmap_pg_g)
|
|
pq->pq_flushg++;
|
|
else
|
|
pq->pq_flushu++;
|
|
/*
|
|
* Since we've nailed the whole thing,
|
|
* drain the job entries pending for that
|
|
* processor.
|
|
*/
|
|
pmap_tlb_shootdown_q_drain(pq);
|
|
*cpumaskp |= 1U << ci->ci_cpuid;
|
|
}
|
|
} else {
|
|
pj->pj_pmap = pmap;
|
|
pj->pj_va = va;
|
|
pj->pj_pte = pte;
|
|
TAILQ_INSERT_TAIL(&pq->pq_head, pj, pj_list);
|
|
*cpumaskp |= 1U << ci->ci_cpuid;
|
|
}
|
|
__cpu_simple_unlock(&pq->pq_slock);
|
|
}
|
|
splx(s);
|
|
}
|
|
|
|
/*
|
|
* pmap_do_tlb_shootdown_checktlbstate: check and update ci_tlbstate.
|
|
*
|
|
* => called at splipi if MULTIPROCESSOR.
|
|
* => called at splvm if !MULTIPROCESSOR.
|
|
* => return true if we need to maintain user tlbs.
|
|
*/
|
|
static inline bool
|
|
pmap_do_tlb_shootdown_checktlbstate(struct cpu_info *ci)
|
|
{
|
|
|
|
KASSERT(ci == curcpu());
|
|
|
|
if (ci->ci_tlbstate == TLBSTATE_LAZY) {
|
|
KASSERT(ci->ci_pmap != pmap_kernel());
|
|
/*
|
|
* mostly KASSERT(ci->ci_pmap->pm_cpus & (1U << ci->ci_cpuid));
|
|
*/
|
|
|
|
/*
|
|
* we no longer want tlb shootdown ipis for this pmap.
|
|
* mark the pmap no longer in use by this processor.
|
|
*/
|
|
|
|
x86_atomic_clearbits_l(&ci->ci_pmap->pm_cpus,
|
|
1U << ci->ci_cpuid);
|
|
ci->ci_tlbstate = TLBSTATE_STALE;
|
|
}
|
|
|
|
if (ci->ci_tlbstate == TLBSTATE_STALE)
|
|
return false;
|
|
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* pmap_do_tlb_shootdown:
|
|
*
|
|
* Process pending TLB shootdown operations for this processor.
|
|
*/
|
|
void
|
|
pmap_do_tlb_shootdown(struct cpu_info *self)
|
|
{
|
|
u_long cpu_id = self->ci_cpuid;
|
|
struct pmap_tlb_shootdown_q *pq = &pmap_tlb_shootdown_q[cpu_id];
|
|
struct pmap_tlb_shootdown_job *pj;
|
|
int s;
|
|
#ifdef MULTIPROCESSOR
|
|
struct cpu_info *ci;
|
|
CPU_INFO_ITERATOR cii;
|
|
#endif /* MULTIPROCESSOR */
|
|
|
|
KASSERT(self == curcpu());
|
|
|
|
#ifdef MULTIPROCESSOR
|
|
s = splipi();
|
|
#else /* MULTIPROCESSOR */
|
|
s = splvm();
|
|
#endif /* MULTIPROCESSOR */
|
|
|
|
__cpu_simple_lock(&pq->pq_slock);
|
|
|
|
if (pq->pq_flushg) {
|
|
COUNT(flushg);
|
|
pmap_do_tlb_shootdown_checktlbstate(self);
|
|
tlbflushg();
|
|
pq->pq_flushg = 0;
|
|
pq->pq_flushu = 0;
|
|
pmap_tlb_shootdown_q_drain(pq);
|
|
} else {
|
|
/*
|
|
* TLB flushes for PTEs with PG_G set may be in the queue
|
|
* after a flushu, they need to be dealt with.
|
|
*/
|
|
if (pq->pq_flushu) {
|
|
COUNT(flushu);
|
|
pmap_do_tlb_shootdown_checktlbstate(self);
|
|
tlbflush();
|
|
}
|
|
while ((pj = TAILQ_FIRST(&pq->pq_head)) != NULL) {
|
|
TAILQ_REMOVE(&pq->pq_head, pj, pj_list);
|
|
|
|
if ((pj->pj_pte & pmap_pg_g) ||
|
|
pj->pj_pmap == pmap_kernel()) {
|
|
pmap_update_pg(pj->pj_va);
|
|
} else if (!pq->pq_flushu &&
|
|
pj->pj_pmap == self->ci_pmap) {
|
|
if (pmap_do_tlb_shootdown_checktlbstate(self))
|
|
pmap_update_pg(pj->pj_va);
|
|
}
|
|
|
|
pmap_tlb_shootdown_job_put(pq, pj);
|
|
}
|
|
|
|
pq->pq_flushu = pq->pq_pte = 0;
|
|
}
|
|
|
|
#ifdef MULTIPROCESSOR
|
|
for (CPU_INFO_FOREACH(cii, ci))
|
|
x86_atomic_clearbits_l(&ci->ci_tlb_ipi_mask,
|
|
(1U << cpu_id));
|
|
#endif /* MULTIPROCESSOR */
|
|
__cpu_simple_unlock(&pq->pq_slock);
|
|
|
|
splx(s);
|
|
}
|
|
|
|
|
|
/*
|
|
* pmap_tlb_shootdown_q_drain:
|
|
*
|
|
* Drain a processor's TLB shootdown queue. We do not perform
|
|
* the shootdown operations. This is merely a convenience
|
|
* function.
|
|
*
|
|
* Note: We expect the queue to be locked.
|
|
*/
|
|
void
|
|
pmap_tlb_shootdown_q_drain(pq)
|
|
struct pmap_tlb_shootdown_q *pq;
|
|
{
|
|
struct pmap_tlb_shootdown_job *pj;
|
|
|
|
while ((pj = TAILQ_FIRST(&pq->pq_head)) != NULL) {
|
|
TAILQ_REMOVE(&pq->pq_head, pj, pj_list);
|
|
pmap_tlb_shootdown_job_put(pq, pj);
|
|
}
|
|
pq->pq_pte = 0;
|
|
}
|
|
|
|
/*
|
|
* pmap_tlb_shootdown_job_get:
|
|
*
|
|
* Get a TLB shootdown job queue entry. This places a limit on
|
|
* the number of outstanding jobs a processor may have.
|
|
*
|
|
* Note: We expect the queue to be locked.
|
|
*/
|
|
struct pmap_tlb_shootdown_job *
|
|
pmap_tlb_shootdown_job_get(pq)
|
|
struct pmap_tlb_shootdown_q *pq;
|
|
{
|
|
struct pmap_tlb_shootdown_job *pj;
|
|
|
|
if (pq->pq_count >= PMAP_TLB_MAXJOBS)
|
|
return (NULL);
|
|
|
|
__cpu_simple_lock(&pmap_tlb_shootdown_job_lock);
|
|
if (pj_free == NULL) {
|
|
__cpu_simple_unlock(&pmap_tlb_shootdown_job_lock);
|
|
return NULL;
|
|
}
|
|
pj = &pj_free->pja_job;
|
|
pj_free =
|
|
(union pmap_tlb_shootdown_job_al *)pj_free->pja_job.pj_nextfree;
|
|
__cpu_simple_unlock(&pmap_tlb_shootdown_job_lock);
|
|
|
|
pq->pq_count++;
|
|
return (pj);
|
|
}
|
|
|
|
/*
|
|
* pmap_tlb_shootdown_job_put:
|
|
*
|
|
* Put a TLB shootdown job queue entry onto the free list.
|
|
*
|
|
* Note: We expect the queue to be locked.
|
|
*/
|
|
void
|
|
pmap_tlb_shootdown_job_put(pq, pj)
|
|
struct pmap_tlb_shootdown_q *pq;
|
|
struct pmap_tlb_shootdown_job *pj;
|
|
{
|
|
|
|
#ifdef DIAGNOSTIC
|
|
if (pq->pq_count == 0)
|
|
panic("pmap_tlb_shootdown_job_put: queue length inconsistency");
|
|
#endif
|
|
__cpu_simple_lock(&pmap_tlb_shootdown_job_lock);
|
|
pj->pj_nextfree = &pj_free->pja_job;
|
|
pj_free = (union pmap_tlb_shootdown_job_al *)pj;
|
|
__cpu_simple_unlock(&pmap_tlb_shootdown_job_lock);
|
|
|
|
pq->pq_count--;
|
|
}
|