NetBSD/sys/arch/i386/include/pmap.h

420 lines
14 KiB
C
Raw Normal View History

/* $NetBSD: pmap.h,v 1.115 2012/02/19 10:39:06 cherry Exp $ */
1998-08-15 08:57:50 +04:00
/*
* Copyright (c) 1997 Charles D. Cranor and Washington University.
1993-03-21 12:45:37 +03:00
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
1993-03-21 12:45:37 +03:00
*/
/*
* Copyright (c) 2001 Wasabi Systems, Inc.
* All rights reserved.
*
* Written by Frank van der Linden for Wasabi Systems, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed for the NetBSD Project by
* Wasabi Systems, Inc.
* 4. The name of Wasabi Systems, Inc. may not be used to endorse
* or promote products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
1993-03-21 12:45:37 +03:00
*/
#ifndef _I386_PMAP_H_
#define _I386_PMAP_H_
2001-05-30 15:57:16 +04:00
#if defined(_KERNEL_OPT)
#include "opt_user_ldt.h"
#include "opt_xen.h"
#endif
2007-11-28 19:28:43 +03:00
#include <sys/atomic.h>
#include <i386/pte.h>
#include <machine/segments.h>
#if defined(_KERNEL)
#include <machine/cpufunc.h>
#endif
#include <uvm/uvm_object.h>
#ifdef XEN
#include <xen/xenfunc.h>
#include <xen/xenpmap.h>
#endif /* XEN */
1993-03-21 12:45:37 +03:00
/*
* see pte.h for a description of i386 MMU terminology and hardware
* interface.
*
* a pmap describes a processes' 4GB virtual address space. when PAE
* is not in use, this virtual address space can be broken up into 1024 4MB
* regions which are described by PDEs in the PDP. the PDEs are defined as
* follows:
*
* (ranges are inclusive -> exclusive, just like vm_map_entry start/end)
* (the following assumes that KERNBASE is 0xc0000000)
*
* PDE#s VA range usage
* 0->766 0x0 -> 0xbfc00000 user address space
2001-11-07 13:52:08 +03:00
* 767 0xbfc00000-> recursive mapping of PDP (used for
* 0xc0000000 linear mapping of PTPs)
* 768->1023 0xc0000000-> kernel address space (constant
* 0xffc00000 across all pmap's/processes)
* <end>
*
*
* note: a recursive PDP mapping provides a way to map all the PTEs for
1999-07-19 01:33:20 +04:00
* a 4GB address space into a linear chunk of virtual memory. in other
* words, the PTE for page 0 is the first int mapped into the 4MB recursive
* area. the PTE for page 1 is the second int. the very last int in the
* 4MB range is the PTE that maps VA 0xfffff000 (the last page in a 4GB
* address).
*
* all pmap's PD's must have the same values in slots 768->1023 so that
1999-07-19 01:33:20 +04:00
* the kernel is always mapped in every process. these values are loaded
* into the PD at pmap creation time.
*
1999-07-19 01:33:20 +04:00
* at any one time only one pmap can be active on a processor. this is
* the pmap whose PDP is pointed to by processor register %cr3. this pmap
* will have all its PTEs mapped into memory at the recursive mapping
* point (slot #767 as show above). when the pmap code wants to find the
* PTE for a virtual address, all it has to do is the following:
*
2003-04-02 11:35:54 +04:00
* address of PTE = (767 * 4MB) + (VA / PAGE_SIZE) * sizeof(pt_entry_t)
* = 0xbfc00000 + (VA / 4096) * 4
*
* what happens if the pmap layer is asked to perform an operation
1999-07-19 01:33:20 +04:00
* on a pmap that is not the one which is currently active? in that
* case we temporarily load this pmap, perform the operation, and mark
* the currently active one as pending lazy reload.
*
* the following figure shows the effects of the recursive PDP mapping:
*
* PDP (%cr3)
* +----+
* | 0| -> PTP#0 that maps VA 0x0 -> 0x400000
* | |
* | |
* | 767| -> points back to PDP (%cr3) mapping VA 0xbfc00000 -> 0xc0000000
* | 768| -> first kernel PTP (maps 0xc0000000 -> 0xc0400000)
* | |
* +----+
*
* note that the PDE#767 VA (0xbfc00000) is defined as "PTE_BASE"
*
* starting at VA 0xbfc00000 the current active PDP (%cr3) acts as a
* PTP:
*
* PTP#767 == PDP(%cr3) => maps VA 0xbfc00000 -> 0xc0000000
* +----+
* | 0| -> maps the contents of PTP#0 at VA 0xbfc00000->0xbfc01000
* | |
* | |
* | 767| -> maps contents of PTP#767 (the PDP) at VA 0xbfeff000
* | 768| -> maps contents of first kernel PTP
* | |
* |1023|
* +----+
*
* note that mapping of the PDP at PTP#767's VA (0xbfeff000) is
* defined as "PDP_BASE".... within that mapping there are two
1999-07-19 01:33:20 +04:00
* defines:
2001-08-12 04:20:32 +04:00
* "PDP_PDE" (0xbfeffbfc) is the VA of the PDE in the PDP
1999-07-19 01:33:20 +04:00
* which points back to itself.
*
Welcome PAE inside i386 current. This patch is inspired by work previously done by Jeremy Morse, ported by me to -current, merged with the work previously done for port-xen, together with additionals fixes and improvements. PAE option is disabled by default in GENERIC (but will be enabled in ALL in the next few days). In quick, PAE switches the CPU to a mode where physical addresses become 36 bits (64 GiB). Virtual address space remains at 32 bits (4 GiB). To cope with the increased size of the physical address, they are manipulated as 64 bits variables by kernel and MMU. When supported by the CPU, it also allows the use of the NX/XD bit that provides no-execution right enforcement on a per physical page basis. Notes: - reworked locore.S - introduce cpu_load_pmap(), used to switch pmap for the curcpu. Due to the different handling of pmap mappings with PAE vs !PAE, Xen vs native, details are hidden within this function. This helps calling it from assembly, as some features, like BIOS calls, switch to pmap_kernel before mapping trampoline code in low memory. - some changes in bioscall and kvm86_call, to reflect the above. - the L3 is "pinned" per-CPU, and is only manipulated by a reduced set of functions within pmap. To track the L3, I added two elements to struct cpu_info, namely ci_l3_pdirpa (PA of the L3), and ci_l3_pdir (the L3 VA). Rest of the code considers that it runs "just like" a normal i386, except that the L2 is 4 pages long (PTP_LEVELS is still 2). - similar to the ci_pae_l3_pdir{,pa} variables, amd64's xen_current_user_pgd becomes an element of cpu_info (slowly paving the way for MP world). - bootinfo_source struct declaration is modified, to cope with paddr_t size change with PAE (it is not correct to assume that bs_addr is a paddr_t when compiled with PAE - it should remain 32 bits). bs_addrs is now a void * array (in bootloader's code under i386/stand/, the bs_addrs is a physaddr_t, which is an unsigned long). - fixes in multiboot code (same reason as bootinfo): paddr_t size change. I used Elf32_* types, use RELOC() where necessary, and move the memcpy() functions out of the if/else if (I do not expect sym and str tables to overlap with ELF). - 64 bits atomic functions for pmap - all pmap_pdirpa access are now done through the pmap_pdirpa macro. It hides the L3/L2 stuff from PAE, as well as the pm_pdirpa change in struct pmap (it now becomes a PDP_SIZE array, with or without PAE). - manipulation of recursive mappings ( PDIR_SLOT_{,A}PTEs ) is done via loops on PDP_SIZE. See also http://mail-index.netbsd.org/port-i386/2010/07/17/msg002062.html No objection raised on port-i386@ and port-xen@R for about a week. XXX kvm(3) will be fixed in another patch to properly handle both PAE and !PAE kernel dumps (VA => PA macros are slightly different, and need proper 64 bits PA support in kvm_i386). XXX Mixing PAE and !PAE modules may lead to unwanted/unexpected results. This cannot be solved easily, and needs lots of thinking before being declared safe (paddr_t/bus_addr_t size handling, PD/PT macros abstractions).
2010-07-24 04:45:54 +04:00
* - PAE support -
* ---------------
*
* PAE adds another layer of indirection during address translation, breaking
* up the translation process in 3 different levels:
* - L3 page directory, containing 4 * 64-bits addresses (index determined by
* bits [31:30] from the virtual address). This breaks up the address space
* in 4 1GB regions.
* - the PD (L2), containing 512 64-bits addresses, breaking each L3 region
* in 512 * 2MB regions.
* - the PT (L1), also containing 512 64-bits addresses (at L1, the size of
* the pages is still 4K).
*
* The kernel virtual space is mapped by the last entry in the L3 page,
* the first 3 entries mapping the user VA space.
Welcome PAE inside i386 current. This patch is inspired by work previously done by Jeremy Morse, ported by me to -current, merged with the work previously done for port-xen, together with additionals fixes and improvements. PAE option is disabled by default in GENERIC (but will be enabled in ALL in the next few days). In quick, PAE switches the CPU to a mode where physical addresses become 36 bits (64 GiB). Virtual address space remains at 32 bits (4 GiB). To cope with the increased size of the physical address, they are manipulated as 64 bits variables by kernel and MMU. When supported by the CPU, it also allows the use of the NX/XD bit that provides no-execution right enforcement on a per physical page basis. Notes: - reworked locore.S - introduce cpu_load_pmap(), used to switch pmap for the curcpu. Due to the different handling of pmap mappings with PAE vs !PAE, Xen vs native, details are hidden within this function. This helps calling it from assembly, as some features, like BIOS calls, switch to pmap_kernel before mapping trampoline code in low memory. - some changes in bioscall and kvm86_call, to reflect the above. - the L3 is "pinned" per-CPU, and is only manipulated by a reduced set of functions within pmap. To track the L3, I added two elements to struct cpu_info, namely ci_l3_pdirpa (PA of the L3), and ci_l3_pdir (the L3 VA). Rest of the code considers that it runs "just like" a normal i386, except that the L2 is 4 pages long (PTP_LEVELS is still 2). - similar to the ci_pae_l3_pdir{,pa} variables, amd64's xen_current_user_pgd becomes an element of cpu_info (slowly paving the way for MP world). - bootinfo_source struct declaration is modified, to cope with paddr_t size change with PAE (it is not correct to assume that bs_addr is a paddr_t when compiled with PAE - it should remain 32 bits). bs_addrs is now a void * array (in bootloader's code under i386/stand/, the bs_addrs is a physaddr_t, which is an unsigned long). - fixes in multiboot code (same reason as bootinfo): paddr_t size change. I used Elf32_* types, use RELOC() where necessary, and move the memcpy() functions out of the if/else if (I do not expect sym and str tables to overlap with ELF). - 64 bits atomic functions for pmap - all pmap_pdirpa access are now done through the pmap_pdirpa macro. It hides the L3/L2 stuff from PAE, as well as the pm_pdirpa change in struct pmap (it now becomes a PDP_SIZE array, with or without PAE). - manipulation of recursive mappings ( PDIR_SLOT_{,A}PTEs ) is done via loops on PDP_SIZE. See also http://mail-index.netbsd.org/port-i386/2010/07/17/msg002062.html No objection raised on port-i386@ and port-xen@R for about a week. XXX kvm(3) will be fixed in another patch to properly handle both PAE and !PAE kernel dumps (VA => PA macros are slightly different, and need proper 64 bits PA support in kvm_i386). XXX Mixing PAE and !PAE modules may lead to unwanted/unexpected results. This cannot be solved easily, and needs lots of thinking before being declared safe (paddr_t/bus_addr_t size handling, PD/PT macros abstractions).
2010-07-24 04:45:54 +04:00
*
* Because the L3 has only 4 entries of 1GB each, we can't use recursive
* mappings at this level for PDP_PDE (this would eat up 2 of the 4GB
* virtual space). There are also restrictions imposed by Xen on the
* last entry of the L3 PD (reference count to this page cannot be
* bigger than 1), which makes it hard to use one L3 page per pmap to
* switch between pmaps using %cr3.
Welcome PAE inside i386 current. This patch is inspired by work previously done by Jeremy Morse, ported by me to -current, merged with the work previously done for port-xen, together with additionals fixes and improvements. PAE option is disabled by default in GENERIC (but will be enabled in ALL in the next few days). In quick, PAE switches the CPU to a mode where physical addresses become 36 bits (64 GiB). Virtual address space remains at 32 bits (4 GiB). To cope with the increased size of the physical address, they are manipulated as 64 bits variables by kernel and MMU. When supported by the CPU, it also allows the use of the NX/XD bit that provides no-execution right enforcement on a per physical page basis. Notes: - reworked locore.S - introduce cpu_load_pmap(), used to switch pmap for the curcpu. Due to the different handling of pmap mappings with PAE vs !PAE, Xen vs native, details are hidden within this function. This helps calling it from assembly, as some features, like BIOS calls, switch to pmap_kernel before mapping trampoline code in low memory. - some changes in bioscall and kvm86_call, to reflect the above. - the L3 is "pinned" per-CPU, and is only manipulated by a reduced set of functions within pmap. To track the L3, I added two elements to struct cpu_info, namely ci_l3_pdirpa (PA of the L3), and ci_l3_pdir (the L3 VA). Rest of the code considers that it runs "just like" a normal i386, except that the L2 is 4 pages long (PTP_LEVELS is still 2). - similar to the ci_pae_l3_pdir{,pa} variables, amd64's xen_current_user_pgd becomes an element of cpu_info (slowly paving the way for MP world). - bootinfo_source struct declaration is modified, to cope with paddr_t size change with PAE (it is not correct to assume that bs_addr is a paddr_t when compiled with PAE - it should remain 32 bits). bs_addrs is now a void * array (in bootloader's code under i386/stand/, the bs_addrs is a physaddr_t, which is an unsigned long). - fixes in multiboot code (same reason as bootinfo): paddr_t size change. I used Elf32_* types, use RELOC() where necessary, and move the memcpy() functions out of the if/else if (I do not expect sym and str tables to overlap with ELF). - 64 bits atomic functions for pmap - all pmap_pdirpa access are now done through the pmap_pdirpa macro. It hides the L3/L2 stuff from PAE, as well as the pm_pdirpa change in struct pmap (it now becomes a PDP_SIZE array, with or without PAE). - manipulation of recursive mappings ( PDIR_SLOT_{,A}PTEs ) is done via loops on PDP_SIZE. See also http://mail-index.netbsd.org/port-i386/2010/07/17/msg002062.html No objection raised on port-i386@ and port-xen@R for about a week. XXX kvm(3) will be fixed in another patch to properly handle both PAE and !PAE kernel dumps (VA => PA macros are slightly different, and need proper 64 bits PA support in kvm_i386). XXX Mixing PAE and !PAE modules may lead to unwanted/unexpected results. This cannot be solved easily, and needs lots of thinking before being declared safe (paddr_t/bus_addr_t size handling, PD/PT macros abstractions).
2010-07-24 04:45:54 +04:00
*
* As such, each CPU gets its own L3 page that is always loaded into its %cr3
* (ci_pae_l3_pd in the associated cpu_info struct). We claim that the VM has
* only a 2-level PTP (similar to the non-PAE case). L2 PD is now 4 contiguous
* pages long (corresponding to the 4 entries of the L3), and the different
* index/slots (like PDP_PDE) are adapted accordingly.
*
* Kernel space remains in L3[3], L3[0-2] maps the user VA space. Switching
* between pmaps consists in modifying the first 3 entries of the CPU's L3 page.
*
* PTE_BASE will need 4 entries in the L2 PD pages to map the L2 pages
* recursively.
Welcome PAE inside i386 current. This patch is inspired by work previously done by Jeremy Morse, ported by me to -current, merged with the work previously done for port-xen, together with additionals fixes and improvements. PAE option is disabled by default in GENERIC (but will be enabled in ALL in the next few days). In quick, PAE switches the CPU to a mode where physical addresses become 36 bits (64 GiB). Virtual address space remains at 32 bits (4 GiB). To cope with the increased size of the physical address, they are manipulated as 64 bits variables by kernel and MMU. When supported by the CPU, it also allows the use of the NX/XD bit that provides no-execution right enforcement on a per physical page basis. Notes: - reworked locore.S - introduce cpu_load_pmap(), used to switch pmap for the curcpu. Due to the different handling of pmap mappings with PAE vs !PAE, Xen vs native, details are hidden within this function. This helps calling it from assembly, as some features, like BIOS calls, switch to pmap_kernel before mapping trampoline code in low memory. - some changes in bioscall and kvm86_call, to reflect the above. - the L3 is "pinned" per-CPU, and is only manipulated by a reduced set of functions within pmap. To track the L3, I added two elements to struct cpu_info, namely ci_l3_pdirpa (PA of the L3), and ci_l3_pdir (the L3 VA). Rest of the code considers that it runs "just like" a normal i386, except that the L2 is 4 pages long (PTP_LEVELS is still 2). - similar to the ci_pae_l3_pdir{,pa} variables, amd64's xen_current_user_pgd becomes an element of cpu_info (slowly paving the way for MP world). - bootinfo_source struct declaration is modified, to cope with paddr_t size change with PAE (it is not correct to assume that bs_addr is a paddr_t when compiled with PAE - it should remain 32 bits). bs_addrs is now a void * array (in bootloader's code under i386/stand/, the bs_addrs is a physaddr_t, which is an unsigned long). - fixes in multiboot code (same reason as bootinfo): paddr_t size change. I used Elf32_* types, use RELOC() where necessary, and move the memcpy() functions out of the if/else if (I do not expect sym and str tables to overlap with ELF). - 64 bits atomic functions for pmap - all pmap_pdirpa access are now done through the pmap_pdirpa macro. It hides the L3/L2 stuff from PAE, as well as the pm_pdirpa change in struct pmap (it now becomes a PDP_SIZE array, with or without PAE). - manipulation of recursive mappings ( PDIR_SLOT_{,A}PTEs ) is done via loops on PDP_SIZE. See also http://mail-index.netbsd.org/port-i386/2010/07/17/msg002062.html No objection raised on port-i386@ and port-xen@R for about a week. XXX kvm(3) will be fixed in another patch to properly handle both PAE and !PAE kernel dumps (VA => PA macros are slightly different, and need proper 64 bits PA support in kvm_i386). XXX Mixing PAE and !PAE modules may lead to unwanted/unexpected results. This cannot be solved easily, and needs lots of thinking before being declared safe (paddr_t/bus_addr_t size handling, PD/PT macros abstractions).
2010-07-24 04:45:54 +04:00
*
* In addition, for Xen, we can't recursively map L3[3] (Xen wants the ref
* count on this page to be exactly one), so we use a shadow PD page for
* the last L2 PD. The shadow page could be static too, but to make pm_pdir[]
* contiguous we'll allocate/copy one page per pmap.
1993-03-21 12:45:37 +03:00
*/
1998-02-06 10:21:42 +03:00
/*
* Mask to get rid of the sign-extended part of addresses.
1998-02-06 10:21:42 +03:00
*/
#define VA_SIGN_MASK 0
#define VA_SIGN_NEG(va) ((va) | VA_SIGN_MASK)
1993-03-21 12:45:37 +03:00
/*
* XXXfvdl this one's not right.
*/
#define VA_SIGN_POS(va) ((va) & ~VA_SIGN_MASK)
1993-03-21 12:45:37 +03:00
/*
* the following defines identify the slots used as described above.
*/
#ifdef PAE
#define L2_SLOT_PTE (KERNBASE/NBPD_L2-4) /* 1532: for recursive PDP map */
#define L2_SLOT_KERN (KERNBASE/NBPD_L2) /* 1536: start of kernel space */
#else /* PAE */
#define L2_SLOT_PTE (KERNBASE/NBPD_L2-1) /* 767: for recursive PDP map */
#define L2_SLOT_KERN (KERNBASE/NBPD_L2) /* 768: start of kernel space */
#endif /* PAE */
#define L2_SLOT_KERNBASE L2_SLOT_KERN
1996-05-03 23:26:28 +04:00
#define PDIR_SLOT_KERN L2_SLOT_KERN
#define PDIR_SLOT_PTE L2_SLOT_PTE
/*
* the following defines give the virtual addresses of various MMU
* data structures:
* PTE_BASE: the base VA of the linear PTE mappings
* PDP_BASE: the base VA of the recursive mapping of the PDP
* PDP_PDE: the VA of the PDE that points back to the PDP
*/
#define PTE_BASE ((pt_entry_t *) (PDIR_SLOT_PTE * NBPD_L2))
#define L1_BASE PTE_BASE
#define L2_BASE ((pd_entry_t *)((char *)L1_BASE + L2_SLOT_PTE * NBPD_L1))
#define PDP_PDE (L2_BASE + PDIR_SLOT_PTE)
#define PDP_BASE L2_BASE
/* largest value (-1 for APTP space) */
#define NKL2_MAX_ENTRIES (NTOPLEVEL_PDES - (KERNBASE/NBPD_L2) - 1)
#define NKL1_MAX_ENTRIES (unsigned long)(NKL2_MAX_ENTRIES * NPDPG)
#define NKL2_KIMG_ENTRIES 0 /* XXX unused */
#define NKL2_START_ENTRIES 0 /* XXX computed on runtime */
#define NKL1_START_ENTRIES 0 /* XXX unused */
#ifndef XEN
#define NTOPLEVEL_PDES (PAGE_SIZE * PDP_SIZE / (sizeof (pd_entry_t)))
#else /* !XEN */
#ifdef PAE
#define NTOPLEVEL_PDES 1964 /* 1964-2047 reserved by Xen */
#else /* PAE */
#define NTOPLEVEL_PDES 1008 /* 1008-1023 reserved by Xen */
#endif /* PAE */
#endif /* !XEN */
#define NPDPG (PAGE_SIZE / sizeof (pd_entry_t))
#define PTP_MASK_INITIALIZER { L1_FRAME, L2_FRAME }
#define PTP_SHIFT_INITIALIZER { L1_SHIFT, L2_SHIFT }
#define NKPTP_INITIALIZER { NKL1_START_ENTRIES, NKL2_START_ENTRIES }
#define NKPTPMAX_INITIALIZER { NKL1_MAX_ENTRIES, NKL2_MAX_ENTRIES }
#define NBPD_INITIALIZER { NBPD_L1, NBPD_L2 }
#define PDES_INITIALIZER { L2_BASE }
#define PTP_LEVELS 2
/*
* PG_AVAIL usage: we make use of the ignored bits of the PTE
*/
#define PG_W PG_AVAIL1 /* "wired" mapping */
#define PG_PVLIST PG_AVAIL2 /* mapping has entry on pvlist */
#define PG_X PG_AVAIL3 /* executable mapping */
/*
* Number of PTE's per cache line. 4 byte pte, 32-byte cache line
* Used to avoid false sharing of cache lines.
*/
#ifdef PAE
#define NPTECL 4
#else
#define NPTECL 8
#endif
#include <x86/pmap.h>
#ifndef XEN
#define pmap_pa2pte(a) (a)
#define pmap_pte2pa(a) ((a) & PG_FRAME)
#define pmap_pte_set(p, n) do { *(p) = (n); } while (0)
Welcome PAE inside i386 current. This patch is inspired by work previously done by Jeremy Morse, ported by me to -current, merged with the work previously done for port-xen, together with additionals fixes and improvements. PAE option is disabled by default in GENERIC (but will be enabled in ALL in the next few days). In quick, PAE switches the CPU to a mode where physical addresses become 36 bits (64 GiB). Virtual address space remains at 32 bits (4 GiB). To cope with the increased size of the physical address, they are manipulated as 64 bits variables by kernel and MMU. When supported by the CPU, it also allows the use of the NX/XD bit that provides no-execution right enforcement on a per physical page basis. Notes: - reworked locore.S - introduce cpu_load_pmap(), used to switch pmap for the curcpu. Due to the different handling of pmap mappings with PAE vs !PAE, Xen vs native, details are hidden within this function. This helps calling it from assembly, as some features, like BIOS calls, switch to pmap_kernel before mapping trampoline code in low memory. - some changes in bioscall and kvm86_call, to reflect the above. - the L3 is "pinned" per-CPU, and is only manipulated by a reduced set of functions within pmap. To track the L3, I added two elements to struct cpu_info, namely ci_l3_pdirpa (PA of the L3), and ci_l3_pdir (the L3 VA). Rest of the code considers that it runs "just like" a normal i386, except that the L2 is 4 pages long (PTP_LEVELS is still 2). - similar to the ci_pae_l3_pdir{,pa} variables, amd64's xen_current_user_pgd becomes an element of cpu_info (slowly paving the way for MP world). - bootinfo_source struct declaration is modified, to cope with paddr_t size change with PAE (it is not correct to assume that bs_addr is a paddr_t when compiled with PAE - it should remain 32 bits). bs_addrs is now a void * array (in bootloader's code under i386/stand/, the bs_addrs is a physaddr_t, which is an unsigned long). - fixes in multiboot code (same reason as bootinfo): paddr_t size change. I used Elf32_* types, use RELOC() where necessary, and move the memcpy() functions out of the if/else if (I do not expect sym and str tables to overlap with ELF). - 64 bits atomic functions for pmap - all pmap_pdirpa access are now done through the pmap_pdirpa macro. It hides the L3/L2 stuff from PAE, as well as the pm_pdirpa change in struct pmap (it now becomes a PDP_SIZE array, with or without PAE). - manipulation of recursive mappings ( PDIR_SLOT_{,A}PTEs ) is done via loops on PDP_SIZE. See also http://mail-index.netbsd.org/port-i386/2010/07/17/msg002062.html No objection raised on port-i386@ and port-xen@R for about a week. XXX kvm(3) will be fixed in another patch to properly handle both PAE and !PAE kernel dumps (VA => PA macros are slightly different, and need proper 64 bits PA support in kvm_i386). XXX Mixing PAE and !PAE modules may lead to unwanted/unexpected results. This cannot be solved easily, and needs lots of thinking before being declared safe (paddr_t/bus_addr_t size handling, PD/PT macros abstractions).
2010-07-24 04:45:54 +04:00
#define pmap_pte_flush() /* nothing */
#ifdef PAE
#define pmap_pte_cas(p, o, n) atomic_cas_64((p), (o), (n))
#define pmap_pte_testset(p, n) \
atomic_swap_64((volatile uint64_t *)p, n)
#define pmap_pte_setbits(p, b) \
atomic_or_64((volatile uint64_t *)p, b)
#define pmap_pte_clearbits(p, b) \
atomic_and_64((volatile uint64_t *)p, ~(b))
#else /* PAE */
2008-01-13 10:05:42 +03:00
#define pmap_pte_cas(p, o, n) atomic_cas_32((p), (o), (n))
2007-11-28 19:28:43 +03:00
#define pmap_pte_testset(p, n) \
atomic_swap_ulong((volatile unsigned long *)p, n)
#define pmap_pte_setbits(p, b) \
atomic_or_ulong((volatile unsigned long *)p, b)
#define pmap_pte_clearbits(p, b) \
atomic_and_ulong((volatile unsigned long *)p, ~(b))
Welcome PAE inside i386 current. This patch is inspired by work previously done by Jeremy Morse, ported by me to -current, merged with the work previously done for port-xen, together with additionals fixes and improvements. PAE option is disabled by default in GENERIC (but will be enabled in ALL in the next few days). In quick, PAE switches the CPU to a mode where physical addresses become 36 bits (64 GiB). Virtual address space remains at 32 bits (4 GiB). To cope with the increased size of the physical address, they are manipulated as 64 bits variables by kernel and MMU. When supported by the CPU, it also allows the use of the NX/XD bit that provides no-execution right enforcement on a per physical page basis. Notes: - reworked locore.S - introduce cpu_load_pmap(), used to switch pmap for the curcpu. Due to the different handling of pmap mappings with PAE vs !PAE, Xen vs native, details are hidden within this function. This helps calling it from assembly, as some features, like BIOS calls, switch to pmap_kernel before mapping trampoline code in low memory. - some changes in bioscall and kvm86_call, to reflect the above. - the L3 is "pinned" per-CPU, and is only manipulated by a reduced set of functions within pmap. To track the L3, I added two elements to struct cpu_info, namely ci_l3_pdirpa (PA of the L3), and ci_l3_pdir (the L3 VA). Rest of the code considers that it runs "just like" a normal i386, except that the L2 is 4 pages long (PTP_LEVELS is still 2). - similar to the ci_pae_l3_pdir{,pa} variables, amd64's xen_current_user_pgd becomes an element of cpu_info (slowly paving the way for MP world). - bootinfo_source struct declaration is modified, to cope with paddr_t size change with PAE (it is not correct to assume that bs_addr is a paddr_t when compiled with PAE - it should remain 32 bits). bs_addrs is now a void * array (in bootloader's code under i386/stand/, the bs_addrs is a physaddr_t, which is an unsigned long). - fixes in multiboot code (same reason as bootinfo): paddr_t size change. I used Elf32_* types, use RELOC() where necessary, and move the memcpy() functions out of the if/else if (I do not expect sym and str tables to overlap with ELF). - 64 bits atomic functions for pmap - all pmap_pdirpa access are now done through the pmap_pdirpa macro. It hides the L3/L2 stuff from PAE, as well as the pm_pdirpa change in struct pmap (it now becomes a PDP_SIZE array, with or without PAE). - manipulation of recursive mappings ( PDIR_SLOT_{,A}PTEs ) is done via loops on PDP_SIZE. See also http://mail-index.netbsd.org/port-i386/2010/07/17/msg002062.html No objection raised on port-i386@ and port-xen@R for about a week. XXX kvm(3) will be fixed in another patch to properly handle both PAE and !PAE kernel dumps (VA => PA macros are slightly different, and need proper 64 bits PA support in kvm_i386). XXX Mixing PAE and !PAE modules may lead to unwanted/unexpected results. This cannot be solved easily, and needs lots of thinking before being declared safe (paddr_t/bus_addr_t size handling, PD/PT macros abstractions).
2010-07-24 04:45:54 +04:00
#endif /* PAE */
#else /* XEN */
extern kmutex_t pte_lock;
static __inline pt_entry_t
pmap_pa2pte(paddr_t pa)
{
return (pt_entry_t)xpmap_ptom_masked(pa);
}
static __inline paddr_t
pmap_pte2pa(pt_entry_t pte)
{
return xpmap_mtop_masked(pte & PG_FRAME);
}
static __inline void
pmap_pte_set(pt_entry_t *pte, pt_entry_t npte)
{
int s = splvm();
xpq_queue_pte_update(xpmap_ptetomach(pte), npte);
splx(s);
}
2008-01-13 10:05:42 +03:00
static __inline pt_entry_t
pmap_pte_cas(volatile pt_entry_t *ptep, pt_entry_t o, pt_entry_t n)
2008-01-13 10:05:42 +03:00
{
pt_entry_t opte;
2008-01-13 10:05:42 +03:00
mutex_enter(&pte_lock);
opte = *ptep;
2008-01-13 10:05:42 +03:00
if (opte == o) {
xpq_queue_pte_update(xpmap_ptetomach(__UNVOLATILE(ptep)), n);
2008-01-13 10:05:42 +03:00
xpq_flush_queue();
}
mutex_exit(&pte_lock);
2008-01-13 10:05:42 +03:00
return opte;
}
static __inline pt_entry_t
pmap_pte_testset(volatile pt_entry_t *pte, pt_entry_t npte)
{
pt_entry_t opte;
mutex_enter(&pte_lock);
opte = *pte;
xpq_queue_pte_update(xpmap_ptetomach(__UNVOLATILE(pte)),
npte);
xpq_flush_queue();
mutex_exit(&pte_lock);
return opte;
}
static __inline void
pmap_pte_setbits(volatile pt_entry_t *pte, pt_entry_t bits)
{
mutex_enter(&pte_lock);
xpq_queue_pte_update(xpmap_ptetomach(__UNVOLATILE(pte)), (*pte) | bits);
xpq_flush_queue();
mutex_exit(&pte_lock);
}
static __inline void
pmap_pte_clearbits(volatile pt_entry_t *pte, pt_entry_t bits)
{
mutex_enter(&pte_lock);
xpq_queue_pte_update(xpmap_ptetomach(__UNVOLATILE(pte)),
(*pte) & ~bits);
xpq_flush_queue();
mutex_exit(&pte_lock);
}
static __inline void
pmap_pte_flush(void)
{
int s = splvm();
xpq_flush_queue();
splx(s);
}
#endif
struct trapframe;
int pmap_exec_fixup(struct vm_map *, struct trapframe *, struct pcb *);
void pmap_ldt_cleanup(struct lwp *);
#include <x86/pmap_pv.h>
#define __HAVE_VM_PAGE_MD
#define VM_MDPAGE_INIT(pg) \
memset(&(pg)->mdpage, 0, sizeof((pg)->mdpage)); \
PMAP_PAGE_INIT(&(pg)->mdpage.mp_pp)
struct vm_page_md {
struct pmap_page mp_pp;
};
#endif /* _I386_PMAP_H_ */