Welcome PAE inside i386 current.

This patch is inspired by work previously done by Jeremy Morse, ported by me
to -current, merged with the work previously done for port-xen, together with
additionals fixes and improvements.

PAE option is disabled by default in GENERIC (but will be enabled in ALL in
the next few days).

In quick, PAE switches the CPU to a mode where physical addresses become
36 bits (64 GiB). Virtual address space remains at 32 bits (4 GiB). To cope
with the increased size of the physical address, they are manipulated as
64 bits variables by kernel and MMU.

When supported by the CPU, it also allows the use of the NX/XD bit that
provides no-execution right enforcement on a per physical page basis.

Notes:

- reworked locore.S

- introduce cpu_load_pmap(), used to switch pmap for the curcpu. Due to the
different handling of pmap mappings with PAE vs !PAE, Xen vs native, details
are hidden within this function. This helps calling it from assembly,
as some features, like BIOS calls, switch to pmap_kernel before mapping
trampoline code in low memory.

- some changes in bioscall and kvm86_call, to reflect the above.

- the L3 is "pinned" per-CPU, and is only manipulated by a
reduced set of functions within pmap. To track the L3, I added two
elements to struct cpu_info, namely ci_l3_pdirpa (PA of the L3), and
ci_l3_pdir (the L3 VA). Rest of the code considers that it runs "just
like" a normal i386, except that the L2 is 4 pages long (PTP_LEVELS is
still 2).

- similar to the ci_pae_l3_pdir{,pa} variables, amd64's xen_current_user_pgd
becomes an element of cpu_info (slowly paving the way for MP world).

- bootinfo_source struct declaration is modified, to cope with paddr_t size
change with PAE (it is not correct to assume that bs_addr is a paddr_t when
compiled with PAE - it should remain 32 bits). bs_addrs is now a
void * array (in bootloader's code under i386/stand/, the bs_addrs
is a physaddr_t, which is an unsigned long).

- fixes in multiboot code (same reason as bootinfo): paddr_t size
change. I used Elf32_* types, use RELOC() where necessary, and move the
memcpy() functions out of the if/else if (I do not expect sym and str tables
to overlap with ELF).

- 64 bits atomic functions for pmap

- all pmap_pdirpa access are now done through the pmap_pdirpa macro. It
hides the L3/L2 stuff from PAE, as well as the pm_pdirpa change in
struct pmap (it now becomes a PDP_SIZE array, with or without PAE).

- manipulation of recursive mappings ( PDIR_SLOT_{,A}PTEs ) is done via
loops on PDP_SIZE.

See also http://mail-index.netbsd.org/port-i386/2010/07/17/msg002062.html

No objection raised on port-i386@ and port-xen@R for about a week.

XXX kvm(3) will be fixed in another patch to properly handle both PAE and !PAE
kernel dumps (VA => PA macros are slightly different, and need proper 64 bits
PA support in kvm_i386).

XXX Mixing PAE and !PAE modules may lead to unwanted/unexpected results. This
cannot be solved easily, and needs lots of thinking before being declared
safe (paddr_t/bus_addr_t size handling, PD/PT macros abstractions).
This commit is contained in:
jym 2010-07-24 00:45:54 +00:00
parent 84847a78fe
commit d94742232d
15 changed files with 438 additions and 231 deletions

View File

@ -1,4 +1,4 @@
# $NetBSD: GENERIC,v 1.988 2010/07/23 00:43:20 jakllsch Exp $
# $NetBSD: GENERIC,v 1.989 2010/07/24 00:45:54 jym Exp $
#
# GENERIC machine description file
#
@ -22,7 +22,7 @@ include "arch/i386/conf/std.i386"
options INCLUDE_CONFIG_FILE # embed config file in kernel binary
#ident "GENERIC-$Revision: 1.988 $"
#ident "GENERIC-$Revision: 1.989 $"
maxusers 64 # estimated number of users
@ -35,6 +35,7 @@ no options COREDUMP
# CPU-related options.
options VM86 # virtual 8086 emulation
options USER_LDT # user-settable LDT; used by WINE
#options PAE # PAE mode (36 bits physical addressing)
# Enhanced SpeedStep Technology in the Pentium M
options ENHANCED_SPEEDSTEP

View File

@ -1,4 +1,4 @@
/* $NetBSD: bioscall.S,v 1.8 2008/04/28 20:23:24 martin Exp $ */
/* $NetBSD: bioscall.S,v 1.9 2010/07/24 00:45:54 jym Exp $ */
/*-
* Copyright (c) 1997 The NetBSD Foundation, Inc.
@ -30,7 +30,7 @@
*/
#include <machine/asm.h>
__KERNEL_RCSID(0, "$NetBSD: bioscall.S,v 1.8 2008/04/28 20:23:24 martin Exp $");
__KERNEL_RCSID(0, "$NetBSD: bioscall.S,v 1.9 2010/07/24 00:45:54 jym Exp $");
#include <machine/bioscall.h>
@ -39,8 +39,6 @@ __KERNEL_RCSID(0, "$NetBSD: bioscall.S,v 1.8 2008/04/28 20:23:24 martin Exp $");
/* LINTSTUB: include <sys/types.h> */
/* LINTSTUB: include <machine/bioscall.h> */
.globl _C_LABEL(PDPpaddr) /* from locore.S */
.section ".rodata"
_C_LABEL(biostramp_image):
.globl _C_LABEL(biostramp_image)
@ -69,11 +67,11 @@ NENTRY(bioscall)
pushl %ebp
movl %esp,%ebp /* set up frame ptr */
movl %cr3,%eax /* save PDP base register */
/* install lwp0 pmap */
movl _C_LABEL(kernel_pmap_ptr),%eax
pushl %eax
movl _C_LABEL(PDPpaddr),%eax /* install proc0 PDP */
movl %eax,%cr3
call _C_LABEL(cpu_load_pmap)
addl $4,%esp
movl $(BIOSTRAMP_BASE),%eax /* address of trampoline area */
pushl 12(%ebp)
@ -81,8 +79,11 @@ NENTRY(bioscall)
call *%eax /* machdep.c initializes it */
addl $8,%esp /* clear args from stack */
popl %eax
movl %eax,%cr3 /* restore PTDB register */
/* restore pmap - saved value is in curcpu()->ci_pmap */
movl %fs:(CPU_INFO_PMAP),%eax
pushl %eax
call _C_LABEL(cpu_load_pmap)
addl $4,%esp
leave
ret

View File

@ -1,4 +1,4 @@
/* $NetBSD: kvm86call.S,v 1.9 2008/01/04 15:55:31 yamt Exp $ */
/* $NetBSD: kvm86call.S,v 1.10 2010/07/24 00:45:54 jym Exp $ */
/*-
* Copyright (c) 1998 Jonathan Lemon
@ -34,7 +34,7 @@
#include "assym.h"
__KERNEL_RCSID(0, "$NetBSD: kvm86call.S,v 1.9 2008/01/04 15:55:31 yamt Exp $");
__KERNEL_RCSID(0, "$NetBSD: kvm86call.S,v 1.10 2010/07/24 00:45:54 jym Exp $");
.data
.align 4
@ -79,10 +79,7 @@ ENTRY(kvm86_call)
andl $~0x0200,4(%eax,%edi,1) /* reset "task busy" */
ltr %di
movl %cr3,%eax
pushl %eax /* save address space */
movl PDPpaddr,%ecx
movl %ecx,%ebx
movl _C_LABEL(PDPpaddr),%ebx
addl $KERNBASE,%ebx /* va of Idle PDP */
movl 0(%ebx),%eax
pushl %eax /* old pde */
@ -93,7 +90,12 @@ ENTRY(kvm86_call)
movl vm86newptd,%eax /* mapping for vm86 page table */
movl %eax,0(%ebx) /* ... install as PDP entry 0 */
movl %ecx,%cr3 /* new page tables */
/* install Idle pmap (lwp0 pmap) */
movl _C_LABEL(kernel_pmap_ptr),%eax
pushl %eax
call _C_LABEL(cpu_load_pmap)
addl $4,%esp
movl vm86frame,%esp /* switch to new stack */
movl $1,kvm86_incall /* set flag for trap() */
@ -129,8 +131,12 @@ ENTRY(kvm86_ret)
popl %ebx /* saved va of Idle PDP */
popl %eax
movl %eax,0(%ebx) /* restore old pde */
popl %eax
movl %eax,%cr3 /* install old page table */
/* restore pmap - saved value is in curcpu()->ci_pmap */
movl %fs:(CPU_INFO_PMAP),%eax
pushl %eax
call _C_LABEL(cpu_load_pmap)
addl $4,%esp
movl $0,kvm86_incall /* reset trapflag */

View File

@ -1,4 +1,4 @@
/* $NetBSD: locore.S,v 1.92 2010/07/15 18:55:27 jym Exp $ */
/* $NetBSD: locore.S,v 1.93 2010/07/24 00:45:54 jym Exp $ */
/*
* Copyright-o-rama!
@ -129,7 +129,7 @@
*/
#include <machine/asm.h>
__KERNEL_RCSID(0, "$NetBSD: locore.S,v 1.92 2010/07/15 18:55:27 jym Exp $");
__KERNEL_RCSID(0, "$NetBSD: locore.S,v 1.93 2010/07/24 00:45:54 jym Exp $");
#include "opt_compat_oldboot.h"
#include "opt_ddb.h"
@ -482,29 +482,43 @@ try586: /* Use the `cpuid' instruction. */
movl $_RELOC(tmpstk),%esp # bootstrap stack end location
/*
* Virtual address space of kernel:
* Virtual address space of kernel, without PAE. The page dir is 1 page long.
*
* text | data | bss | [syms] | [blobs] | page dir | proc0 kstack | L1 ptp
* 0 1 2 3
*
* Virtual address space of kernel, with PAE. We need 4 pages for the page dir
* and 1 page for the L3.
* text | data | bss | [syms] | [blobs] | L3 | page dir | proc0 kstack | L1 ptp
* 0 1 5 6 7
*/
#ifndef PAE
#define PROC0_PDIR_OFF 0
#define PROC0_STK_OFF (PROC0_PDIR_OFF + PAGE_SIZE)
#else
#define PROC0_L3_OFF 0
#define PROC0_PDIR_OFF 1 * PAGE_SIZE
#endif
#define PROC0_STK_OFF (PROC0_PDIR_OFF + PDP_SIZE * PAGE_SIZE)
#define PROC0_PTP1_OFF (PROC0_STK_OFF + UPAGES * PAGE_SIZE)
/*
* fillkpt
* fillkpt - Fill in a kernel page table
* eax = pte (page frame | control | status)
* ebx = page table address
* ecx = number of pages to map
*
* For PAE, each entry is 8 bytes long: we must set the 4 upper bytes to 0.
* This is done by the first instruction of fillkpt. In the non-PAE case, this
* instruction just clears the page table entry.
*/
#define fillkpt \
1: movl %eax,(%ebx) ; /* store phys addr */ \
addl $4,%ebx ; /* next pte/pde */ \
addl $PAGE_SIZE,%eax ; /* next phys page */ \
loop 1b ; \
1: movl $0,(PDE_SIZE-4)(%ebx) ; /* clear bits */ \
movl %eax,(%ebx) ; /* store phys addr */ \
addl $PDE_SIZE,%ebx ; /* next pte/pde */ \
addl $PAGE_SIZE,%eax ; /* next phys page */ \
loop 1b ;
/* Find end of kernel image. */
movl $RELOC(end),%edi
@ -538,9 +552,14 @@ try586: /* Use the `cpuid' instruction. */
incl %eax /* one more ptp for VAs stolen by bootstrap */
1: movl %eax,RELOC(nkptp)+1*4
/* tablesize = (1 + UPAGES + nkptp) << PGSHIFT; */
addl $(1+UPAGES),%eax
/* tablesize = (PDP_SIZE + UPAGES + nkptp) << PGSHIFT; */
addl $(PDP_SIZE+UPAGES),%eax
#ifdef PAE
incl %eax /* one more page for the L3 PD */
shll $PGSHIFT+1,%eax /* PTP tables are twice larger with PAE */
#else
shll $PGSHIFT,%eax
#endif
movl %eax,RELOC(tablesize)
/* ensure that nkptp covers bootstrap tables */
@ -578,7 +597,10 @@ try586: /* Use the `cpuid' instruction. */
*/
movl $_RELOC(KERNTEXTOFF),%eax
movl %eax,%ecx
shrl $(PGSHIFT-2),%ecx /* ((n >> PGSHIFT) << 2) for # pdes */
shrl $(PGSHIFT-2),%ecx /* ((n >> PGSHIFT) << 2) for # pdes */
#ifdef PAE
shll $1,%ecx /* pdes are twice larger with PAE */
#endif
addl %ecx,%ebx
/* Map the kernel text read-only. */
@ -605,36 +627,51 @@ try586: /* Use the `cpuid' instruction. */
* Construct a page table directory.
*/
/* Set up top level entries for identity mapping */
leal (PROC0_PDIR_OFF)(%esi),%ebx
leal (PROC0_PDIR_OFF)(%esi),%ebx
leal (PROC0_PTP1_OFF)(%esi),%eax
orl $(PG_V|PG_KW), %eax
movl RELOC(nkptp)+1*4,%ecx
fillkpt
/* Set up top level entries for actual kernel mapping */
leal (PROC0_PDIR_OFF + L2_SLOT_KERNBASE*4)(%esi),%ebx
leal (PROC0_PDIR_OFF + L2_SLOT_KERNBASE*PDE_SIZE)(%esi),%ebx
leal (PROC0_PTP1_OFF)(%esi),%eax
orl $(PG_V|PG_KW), %eax
movl RELOC(nkptp)+1*4,%ecx
fillkpt
/* Install a PDE recursively mapping page directory as a page table! */
leal (PROC0_PDIR_OFF + PDIR_SLOT_PTE*4)(%esi),%ebx
leal (PROC0_PDIR_OFF)(%esi),%eax
leal (PROC0_PDIR_OFF + PDIR_SLOT_PTE*PDE_SIZE)(%esi),%ebx
leal (PROC0_PDIR_OFF)(%esi),%eax
orl $(PG_V|PG_KW),%eax
movl %eax,(%ebx)
movl $PDP_SIZE,%ecx
fillkpt
#ifdef PAE
/* Fill in proc0 L3 page with entries pointing to the page dirs */
leal (PROC0_L3_OFF)(%esi),%ebx
leal (PROC0_PDIR_OFF)(%esi),%eax
orl $(PG_V),%eax
movl $PDP_SIZE,%ecx
fillkpt
/* Enable PAE mode */
movl %cr4,%eax
orl $CR4_PAE,%eax
movl %eax,%cr4
#endif
/* Save phys. addr of PDP, for libkvm. */
movl %esi,RELOC(PDPpaddr)
leal (PROC0_PDIR_OFF)(%esi),%eax
movl %eax,RELOC(PDPpaddr)
/*
* Startup checklist:
* 1. Load %cr3 with pointer to PDIR.
*/
/*
* Startup checklist:
* 1. Load %cr3 with pointer to PDIR (or L3 PD page for PAE).
*/
movl %esi,%eax # phys address of ptd in proc 0
movl %eax,%cr3 # load ptd addr into mmu
/*
* 2. Enable paging and the rest of it.
*/
@ -653,10 +690,11 @@ begin:
* memory, remove it.
*/
movl _C_LABEL(nkptp)+1*4,%ecx
leal (PROC0_PDIR_OFF)(%esi),%ebx # old, phys address of PDIR
addl $(KERNBASE), %ebx # new, virtual address of PDIR
1: movl $0,(%ebx)
addl $4,%ebx
leal (PROC0_PDIR_OFF)(%esi),%ebx # old, phys address of PDIR
addl $(KERNBASE), %ebx # new, virtual address of PDIR
1: movl $0,(PDE_SIZE-4)(%ebx) # Upper bits (for PAE)
movl $0,(%ebx)
addl $PDE_SIZE,%ebx
loop 1b
/* Relocate atdevbase. */
@ -688,9 +726,13 @@ begin:
movl _C_LABEL(tablesize),%eax
addl %esi,%eax # skip past stack and page tables
#ifdef PAE
pushl $0 # init386() expects a 64 bits paddr_t with PAE
#endif
pushl %eax
call _C_LABEL(init386) # wire 386 chip for unix operation
addl $4+NGDT*8,%esp # pop temporary gdt
addl $PDE_SIZE,%esp # pop paddr_t
addl $NGDT*8,%esp # pop temporary gdt
#ifdef SAFARI_FIFO_HACK
movb $5,%al
@ -765,7 +807,7 @@ start:
#endif
pushl %esi
call _C_LABEL(init386) # wire 386 chip for unix operation
addl $PDE_SIZE,%esp
addl $PDE_SIZE,%esp # pop paddr_t
call _C_LABEL(main)
#if defined(XEN) && !defined(XEN_COMPAT_030001)

View File

@ -1,4 +1,4 @@
/* $NetBSD: machdep.c,v 1.690 2010/07/15 23:20:34 jym Exp $ */
/* $NetBSD: machdep.c,v 1.691 2010/07/24 00:45:54 jym Exp $ */
/*-
* Copyright (c) 1996, 1997, 1998, 2000, 2004, 2006, 2008, 2009
@ -67,7 +67,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.690 2010/07/15 23:20:34 jym Exp $");
__KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.691 2010/07/24 00:45:54 jym Exp $");
#include "opt_beep.h"
#include "opt_compat_ibcs2.h"
@ -320,7 +320,7 @@ int biosmem_implicit;
* boot loader. Only be used by native_loader(). */
struct bootinfo_source {
uint32_t bs_naddrs;
paddr_t bs_addrs[1]; /* Actually longer. */
void *bs_addrs[1]; /* Actually longer. */
};
/* Only called by locore.h; no need to be in a header file. */
@ -384,10 +384,10 @@ native_loader(int bl_boothowto, int bl_bootdev,
for (i = 0; i < bl_bootinfo->bs_naddrs; i++) {
struct btinfo_common *bc;
bc = (struct btinfo_common *)(bl_bootinfo->bs_addrs[i]);
bc = bl_bootinfo->bs_addrs[i];
if ((paddr_t)(data + bc->len) >
(paddr_t)(&bidest->bi_data[0] + BOOTINFO_MAXSIZE))
if ((data + bc->len) >
(&bidest->bi_data[0] + BOOTINFO_MAXSIZE))
break;
memcpy(data, bc, bc->len);
@ -1312,6 +1312,14 @@ init386(paddr_t first_avail)
(void *)atdevbase));
#endif
#if defined(PAE) && !defined(XEN)
/*
* Save VA and PA of L3 PD of boot processor (for Xen, this is done
* in xen_pmap_bootstrap())
*/
cpu_info_primary.ci_pae_l3_pdirpa = rcr3();
cpu_info_primary.ci_pae_l3_pdir = (pd_entry_t *)(rcr3() + KERNBASE);
#endif /* PAE && !XEN */
#ifdef XBOX
/*
@ -1457,6 +1465,9 @@ init386(paddr_t first_avail)
VM_PROT_ALL, 0); /* protection */
pmap_update(pmap_kernel());
memcpy((void *)BIOSTRAMP_BASE, biostramp_image, biostramp_image_size);
/* Needed early, for bioscall() and kvm86_call() */
cpu_info_primary.ci_pmap = pmap_kernel();
#endif
#endif /* !XEN */

View File

@ -1,4 +1,4 @@
/* $NetBSD: mptramp.S,v 1.20 2010/02/09 23:09:47 jym Exp $ */
/* $NetBSD: mptramp.S,v 1.21 2010/07/24 00:45:55 jym Exp $ */
/*-
* Copyright (c) 2000 The NetBSD Foundation, Inc.
@ -76,7 +76,7 @@
*/
#include <machine/asm.h>
__KERNEL_RCSID(0, "$NetBSD: mptramp.S,v 1.20 2010/02/09 23:09:47 jym Exp $");
__KERNEL_RCSID(0, "$NetBSD: mptramp.S,v 1.21 2010/07/24 00:45:55 jym Exp $");
#include "opt_mpbios.h" /* for MPDEBUG */
@ -160,6 +160,12 @@ _TRMP_LABEL(mp_startup)
movl %eax,%cr4
1:
#ifdef PAE /* Enable PAE */
movl %cr4,%eax
or $CR4_PAE,%eax
movl %eax,%cr4
#endif
movl RELOC(mp_pdirpa),%ecx
HALTT(0x5,%ecx)

View File

@ -1,4 +1,4 @@
/* $NetBSD: multiboot.c,v 1.19 2009/02/22 18:05:42 ahoka Exp $ */
/* $NetBSD: multiboot.c,v 1.20 2010/07/24 00:45:55 jym Exp $ */
/*-
* Copyright (c) 2005, 2006 The NetBSD Foundation, Inc.
@ -30,7 +30,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: multiboot.c,v 1.19 2009/02/22 18:05:42 ahoka Exp $");
__KERNEL_RCSID(0, "$NetBSD: multiboot.c,v 1.20 2010/07/24 00:45:55 jym Exp $");
#include "opt_multiboot.h"
@ -276,12 +276,11 @@ copy_syms(struct multiboot_info *mi)
{
#define RELOC(type, x) ((type)((vaddr_t)(x) - KERNBASE))
int i;
Elf32_Shdr *symtabp, *strtabp;
struct multiboot_symbols *ms;
size_t symsize, strsize;
paddr_t symaddr, straddr;
paddr_t symstart, strstart;
Elf32_Shdr *symtabp, *strtabp;
Elf32_Word symsize, strsize;
Elf32_Addr symaddr, straddr;
Elf32_Addr symstart, strstart;
/*
* Check if the Multiboot information header has symbols or not.
@ -336,38 +335,32 @@ copy_syms(struct multiboot_info *mi)
* that if the tables start before the kernel's end address,
* they will not grow over this address.
*/
if ((paddr_t)symtabp < (paddr_t)&end - KERNBASE &&
(paddr_t)strtabp < (paddr_t)&end - KERNBASE) {
symstart = (paddr_t)((vaddr_t)&end - KERNBASE);
if ((void *)symtabp < RELOC(void *, &end) &&
(void *)strtabp < RELOC(void *, &end)) {
symstart = RELOC(Elf32_Addr, &end);
strstart = symstart + symsize;
memcpy((void *)symstart, (void *)symaddr, symsize);
memcpy((void *)strstart, (void *)straddr, strsize);
} else if ((paddr_t)symtabp > (paddr_t)&end - KERNBASE &&
(paddr_t)strtabp < (paddr_t)&end - KERNBASE) {
symstart = (paddr_t)((vaddr_t)&end - KERNBASE);
} else if ((void *)symtabp > RELOC(void *, &end) &&
(void *)strtabp < RELOC(void *, &end)) {
symstart = RELOC(Elf32_Addr, &end);
strstart = symstart + symsize;
memcpy((void *)symstart, (void *)symaddr, symsize);
memcpy((void *)strstart, (void *)straddr, strsize);
} else if ((paddr_t)symtabp < (paddr_t)&end - KERNBASE &&
(paddr_t)strtabp > (paddr_t)&end - KERNBASE) {
strstart = (paddr_t)((vaddr_t)&end - KERNBASE);
} else if ((void *)symtabp < RELOC(void *, &end) &&
(void *)strtabp > RELOC(void *, &end)) {
strstart = RELOC(Elf32_Addr, &end);
symstart = strstart + strsize;
memcpy((void *)strstart, (void *)straddr, strsize);
memcpy((void *)symstart, (void *)symaddr, symsize);
} else {
/* symtabp and strtabp are both over end */
if ((paddr_t)symtabp < (paddr_t)strtabp) {
symstart = (paddr_t)((vaddr_t)&end - KERNBASE);
if (symtabp < strtabp) {
symstart = RELOC(Elf32_Addr, &end);
strstart = symstart + symsize;
memcpy((void *)symstart, (void *)symaddr, symsize);
memcpy((void *)strstart, (void *)straddr, strsize);
} else {
strstart = (paddr_t)((vaddr_t)&end - KERNBASE);
strstart = RELOC(Elf32_Addr, &end);
symstart = strstart + strsize;
memcpy((void *)strstart, (void *)straddr, strsize);
memcpy((void *)symstart, (void *)symaddr, symsize);
}
}
memcpy((void *)strstart, (void *)straddr, strsize);
memcpy((void *)symstart, (void *)symaddr, symsize);
*RELOC(int *, &esym) =
(int)(symstart + symsize + strsize + KERNBASE);

View File

@ -1,4 +1,4 @@
/* $NetBSD: pmap.h,v 1.106 2010/07/15 18:58:40 jym Exp $ */
/* $NetBSD: pmap.h,v 1.107 2010/07/24 00:45:55 jym Exp $ */
/*
*
@ -181,25 +181,45 @@
* note that in the APTE_BASE space, the APDP appears at VA
* "APDP_BASE" (0xfffff000).
*
* When PAE is in use, the L3 page directory breaks up the address space in
* 4 1GB * regions, each of them broken in 512 2MB regions by the L2 PD
* (the size of the pages at the L1 level is still 4K).
* - PAE support -
* ---------------
*
* PAE adds another layer of indirection during address translation, breaking
* up the translation process in 3 different levels:
* - L3 page directory, containing 4 * 64-bits addresses (index determined by
* bits [31:30] from the virtual address). This breaks up the address space
* in 4 1GB regions.
* - the PD (L2), containing 512 64-bits addresses, breaking each L3 region
* in 512 * 2MB regions.
* - the PT (L1), also containing 512 64-bits addresses (at L1, the size of
* the pages is still 4K).
*
* The kernel virtual space is mapped by the last entry in the L3 page,
* the first 3 entries mapping the user VA space.
*
* Because the L3 has only 4 entries of 1GB each, we can't use recursive
* mappings at this level for PDP_PDE and APDP_PDE (this would eat 2 of the
* 4GB virtual space). There's also restrictions imposed by Xen on the
* last entry of the L3 PD, which makes it hard to use one L3 page per pmap
* switch %cr3 to switch pmaps. So we use one static L3 page which is
* always loaded in %cr3, and we use it as 2 virtual PD pointers: one for
* kernel space (L3[3], always loaded), and one for user space (in fact the
* first 3 entries of the L3 PD), and we claim the VM has only a 2-level
* PTP (with the L2 index extended by 2 bytes).
* PTE_BASE and APTE_BASE will need 4 entries in the L2 page table.
* In addition, we can't recursively map L3[3] (Xen wants the ref count on
* this page to be exactly once), so we use a shadow PD page for the last
* L2 PD. The shadow page could be static too, but to make pm_pdir[]
* contigous we'll allocate/copy one page per pmap.
* mappings at this level for PDP_PDE and APDP_PDE (this would eat up 2 of
* the 4GB virtual space). There are also restrictions imposed by Xen on the
* last entry of the L3 PD (reference count to this page cannot be bigger
* than 1), which makes it hard to use one L3 page per pmap to switch
* between pmaps using %cr3.
*
* As such, each CPU gets its own L3 page that is always loaded into its %cr3
* (ci_pae_l3_pd in the associated cpu_info struct). We claim that the VM has
* only a 2-level PTP (similar to the non-PAE case). L2 PD is now 4 contiguous
* pages long (corresponding to the 4 entries of the L3), and the different
* index/slots (like PDP_PDE) are adapted accordingly.
*
* Kernel space remains in L3[3], L3[0-2] maps the user VA space. Switching
* between pmaps consists in modifying the first 3 entries of the CPU's L3 page.
*
* PTE_BASE and APTE_BASE will need 4 entries in the L2 PD pages to map the
* L2 pages recursively.
*
* In addition, for Xen, we can't recursively map L3[3] (Xen wants the ref
* count on this page to be exactly one), so we use a shadow PD page for
* the last L2 PD. The shadow page could be static too, but to make pm_pdir[]
* contiguous we'll allocate/copy one page per pmap.
*/
/* XXX MP should we allocate one APDP_PDE per processor?? */
@ -219,12 +239,16 @@
#ifdef PAE
#define L2_SLOT_PTE (KERNBASE/NBPD_L2-4) /* 1532: for recursive PDP map */
#define L2_SLOT_KERN (KERNBASE/NBPD_L2) /* 1536: start of kernel space */
#define L2_SLOT_APTE 1960 /* 1964-2047 reserved by Xen */
#ifndef XEN
#define L2_SLOT_APTE 2044 /* 2044: alternative recursive slot */
#else
#define L2_SLOT_APTE 1960 /* 1964-2047 reserved by Xen */
#endif
#else /* PAE */
#define L2_SLOT_PTE (KERNBASE/NBPD_L2-1) /* 767: for recursive PDP map */
#define L2_SLOT_KERN (KERNBASE/NBPD_L2) /* 768: start of kernel space */
#ifndef XEN
#define L2_SLOT_APTE 1023 /* 1023: alternative recursive slot */
#define L2_SLOT_APTE 1023 /* 1023: alternative recursive slot */
#else
#define L2_SLOT_APTE 1007 /* 1008-1023 reserved by Xen */
#endif
@ -254,17 +278,17 @@
#define AL2_BASE ((pd_entry_t *)((char *)AL1_BASE + L2_SLOT_PTE * NBPD_L1))
#define PDP_PDE (L2_BASE + PDIR_SLOT_PTE)
#ifdef PAE
#if defined(PAE) && defined(XEN)
/*
* when PAE is in use we can't write APDP_PDE though the recursive mapping,
* because it points to the shadow PD. Use the kernel PD instead, which is
* static
* when PAE is in use under Xen, we can't write APDP_PDE through the recursive
* mapping, because it points to the shadow PD. Use the kernel PD instead,
* which is static
*/
#define APDP_PDE (&pmap_kl2pd[l2tol2(PDIR_SLOT_APTE)])
#define APDP_PDE_SHADOW (L2_BASE + PDIR_SLOT_APTE)
#else /* PAE */
#else /* PAE && XEN */
#define APDP_PDE (L2_BASE + PDIR_SLOT_APTE)
#endif /* PAE */
#endif /* PAE && XEN */
#define PDP_BASE L2_BASE
#define APDP_BASE AL2_BASE
@ -316,6 +340,17 @@
#define pmap_pa2pte(a) (a)
#define pmap_pte2pa(a) ((a) & PG_FRAME)
#define pmap_pte_set(p, n) do { *(p) = (n); } while (0)
#define pmap_pte_flush() /* nothing */
#ifdef PAE
#define pmap_pte_cas(p, o, n) atomic_cas_64((p), (o), (n))
#define pmap_pte_testset(p, n) \
atomic_swap_64((volatile uint64_t *)p, n)
#define pmap_pte_setbits(p, b) \
atomic_or_64((volatile uint64_t *)p, b)
#define pmap_pte_clearbits(p, b) \
atomic_and_64((volatile uint64_t *)p, ~(b))
#else /* PAE */
#define pmap_pte_cas(p, o, n) atomic_cas_32((p), (o), (n))
#define pmap_pte_testset(p, n) \
atomic_swap_ulong((volatile unsigned long *)p, n)
@ -323,8 +358,9 @@
atomic_or_ulong((volatile unsigned long *)p, b)
#define pmap_pte_clearbits(p, b) \
atomic_and_ulong((volatile unsigned long *)p, ~(b))
#define pmap_pte_flush() /* nothing */
#else
#endif /* PAE */
#else /* XEN */
static __inline pt_entry_t
pmap_pa2pte(paddr_t pa)
{
@ -400,11 +436,7 @@ pmap_pte_flush(void)
#endif
#ifdef PAE
/* addresses of static pages used for PAE pmap: */
/* the L3 page */
pd_entry_t *pmap_l3pd;
paddr_t pmap_l3paddr;
/* the kernel's L2 page */
/* Address of the static kernel's L2 page */
pd_entry_t *pmap_kl2pd;
paddr_t pmap_kl2paddr;
#endif

View File

@ -1,4 +1,4 @@
/* $NetBSD: cpu.h,v 1.22 2010/05/09 20:32:41 rmind Exp $ */
/* $NetBSD: cpu.h,v 1.23 2010/07/24 00:45:56 jym Exp $ */
/*-
* Copyright (c) 1990 The Regents of the University of California.
@ -50,6 +50,7 @@
* Definitions unique to x86 cpu support.
*/
#include <machine/frame.h>
#include <machine/pte.h>
#include <machine/segments.h>
#include <machine/tss.h>
#include <machine/intrdefs.h>
@ -162,6 +163,17 @@ struct cpu_info {
struct i386tss ci_doubleflt_tss;
struct i386tss ci_ddbipi_tss;
#endif
#ifdef PAE
uint32_t ci_pae_l3_pdirpa; /* PA of L3 PD */
pd_entry_t * ci_pae_l3_pdir; /* VA pointer to L3 PD */
#endif
#if defined(XEN) && defined(__x86_64__)
/* Currently active user PGD (can't use rcr3() with Xen) */
paddr_t ci_xen_current_user_pgd;
#endif
char *ci_doubleflt_stack;
char *ci_ddbipi_stack;
@ -276,6 +288,7 @@ lwp_t *x86_curlwp(void);
void cpu_boot_secondary_processors(void);
void cpu_init_idle_lwps(void);
void cpu_init_msrs(struct cpu_info *, bool);
void cpu_load_pmap(struct pmap *);
extern uint32_t cpus_attached;
#ifndef XEN

View File

@ -1,4 +1,4 @@
/* $NetBSD: pmap.h,v 1.32 2010/07/15 19:02:26 jym Exp $ */
/* $NetBSD: pmap.h,v 1.33 2010/07/24 00:45:56 jym Exp $ */
/*
*
@ -144,11 +144,7 @@ struct pmap {
#define pm_lock pm_obj[0].vmobjlock
LIST_ENTRY(pmap) pm_list; /* list (lck by pm_list lock) */
pd_entry_t *pm_pdir; /* VA of PD (lck by object lock) */
#ifdef PAE
paddr_t pm_pdirpa[PDP_SIZE];
#else
paddr_t pm_pdirpa; /* PA of PD (read-only after create) */
#endif
paddr_t pm_pdirpa[PDP_SIZE]; /* PA of PDs (read-only after create) */
struct vm_page *pm_ptphint[PTP_LEVELS-1];
/* pointer to a PTP in our pmap */
struct pmap_statistics pm_stats; /* pmap stats (lck by object lock) */
@ -166,13 +162,13 @@ struct pmap {
of pmap */
};
/* macro to access pm_pdirpa */
/* macro to access pm_pdirpa slots */
#ifdef PAE
#define pmap_pdirpa(pmap, index) \
((pmap)->pm_pdirpa[l2tol3(index)] + l2tol2(index) * sizeof(pd_entry_t))
#else
#define pmap_pdirpa(pmap, index) \
((pmap)->pm_pdirpa + (index) * sizeof(pd_entry_t))
((pmap)->pm_pdirpa[0] + (index) * sizeof(pd_entry_t))
#endif
/*
@ -187,6 +183,8 @@ struct pmap {
* PDPpaddr is the physical address of the kernel's PDP.
* - i386 non-PAE and amd64: PDPpaddr corresponds directly to the %cr3
* value associated to the kernel process, proc0.
* - i386 PAE: it still represents the PA of the kernel's PDP (L2). Due to
* the L3 PD, it cannot be considered as the equivalent of a %cr3 any more.
* - Xen: it corresponds to the PFN of the kernel's PDP.
*/
extern u_long PDPpaddr;

View File

@ -1,4 +1,4 @@
/* $NetBSD: cpu.c,v 1.72 2010/07/08 11:22:24 rmind Exp $ */
/* $NetBSD: cpu.c,v 1.73 2010/07/24 00:45:56 jym Exp $ */
/*-
* Copyright (c) 2000, 2006, 2007, 2008 The NetBSD Foundation, Inc.
@ -62,7 +62,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.72 2010/07/08 11:22:24 rmind Exp $");
__KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.73 2010/07/24 00:45:56 jym Exp $");
#include "opt_ddb.h"
#include "opt_mpbios.h" /* for MPDEBUG */
@ -717,9 +717,18 @@ cpu_hatch(void *v)
KASSERT((ci->ci_flags & CPUF_RUNNING) == 0);
lcr3(pmap_kernel()->pm_pdirpa);
#ifdef PAE
pd_entry_t * l3_pd = ci->ci_pae_l3_pdir;
for (i = 0 ; i < PDP_SIZE; i++) {
l3_pd[i] = pmap_kernel()->pm_pdirpa[i] | PG_V;
}
lcr3(ci->ci_pae_l3_pdirpa);
#else
lcr3(pmap_pdirpa(pmap_kernel(), 0));
#endif
pcb = lwp_getpcb(curlwp);
pcb->pcb_cr3 = pmap_kernel()->pm_pdirpa;
pcb->pcb_cr3 = rcr3();
pcb = lwp_getpcb(ci->ci_data.cpu_idlelwp);
lcr0(pcb->pcb_cr0);
@ -812,6 +821,8 @@ cpu_copy_trampoline(void)
static void
tss_init(struct i386tss *tss, void *stack, void *func)
{
KASSERT(curcpu()->ci_pmap == pmap_kernel());
memset(tss, 0, sizeof *tss);
tss->tss_esp0 = tss->tss_esp = (int)((char *)stack + USPACE - 16);
tss->tss_ss0 = GSEL(GDATA_SEL, SEL_KPL);
@ -819,7 +830,8 @@ tss_init(struct i386tss *tss, void *stack, void *func)
tss->tss_fs = GSEL(GCPU_SEL, SEL_KPL);
tss->tss_gs = tss->__tss_es = tss->__tss_ds =
tss->__tss_ss = GSEL(GDATA_SEL, SEL_KPL);
tss->tss_cr3 = pmap_kernel()->pm_pdirpa;
/* %cr3 contains the value associated to pmap_kernel */
tss->tss_cr3 = rcr3();
tss->tss_esp = (int)((char *)stack + USPACE - 16);
tss->tss_ldt = GSEL(GLDT_SEL, SEL_KPL);
tss->__tss_eflags = PSL_MBO | PSL_NT; /* XXX not needed? */
@ -1094,3 +1106,26 @@ x86_cpu_idle_halt(void)
x86_enable_intr();
}
}
/*
* Loads pmap for the current CPU.
*/
void
cpu_load_pmap(struct pmap *pmap)
{
#ifdef PAE
int i, s;
struct cpu_info *ci;
s = splvm(); /* just to be safe */
ci = curcpu();
pd_entry_t *l3_pd = ci->ci_pae_l3_pdir;
for (i = 0 ; i < PDP_SIZE; i++) {
l3_pd[i] = pmap->pm_pdirpa[i] | PG_V;
}
splx(s);
tlbflush();
#else /* PAE */
lcr3(pmap_pdirpa(pmap, 0));
#endif /* PAE */
}

View File

@ -1,4 +1,4 @@
/* $NetBSD: pmap.c,v 1.112 2010/07/15 21:14:31 jym Exp $ */
/* $NetBSD: pmap.c,v 1.113 2010/07/24 00:45:56 jym Exp $ */
/*
* Copyright (c) 2007 Manuel Bouyer.
@ -149,7 +149,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.112 2010/07/15 21:14:31 jym Exp $");
__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.113 2010/07/24 00:45:56 jym Exp $");
#include "opt_user_ldt.h"
#include "opt_lockdebug.h"
@ -422,8 +422,6 @@ paddr_t avail_end; /* PA of last available physical page */
#ifdef __x86_64__
/* Dummy PGD for user cr3, used between pmap_deactivate() and pmap_activate() */
static paddr_t xen_dummy_user_pgd;
/* Currently active user PGD (can't use rcr3()) */
static paddr_t xen_current_user_pgd = 0;
#endif /* __x86_64__ */
paddr_t pmap_pa_start; /* PA of first physical page for this domain */
paddr_t pmap_pa_end; /* PA of last physical page for this domain */
@ -1283,7 +1281,6 @@ pmap_bootstrap(vaddr_t kva_start)
{
struct pmap *kpm;
pt_entry_t *pte;
struct pcb *pcb;
int i;
vaddr_t kva;
#ifndef XEN
@ -1334,14 +1331,11 @@ pmap_bootstrap(vaddr_t kva_start)
kpm->pm_ptphint[i] = NULL;
}
memset(&kpm->pm_list, 0, sizeof(kpm->pm_list)); /* pm_list not used */
pcb = lwp_getpcb(&lwp0);
kpm->pm_pdir = (pd_entry_t *)(pcb->pcb_cr3 + KERNBASE);
#ifdef PAE
kpm->pm_pdir = (pd_entry_t *)(PDPpaddr + KERNBASE);
for (i = 0; i < PDP_SIZE; i++)
kpm->pm_pdirpa[i] = (paddr_t)pcb->pcb_cr3 + PAGE_SIZE * i;
#else
kpm->pm_pdirpa = (paddr_t)pcb->pcb_cr3;
#endif
kpm->pm_pdirpa[i] = PDPpaddr + PAGE_SIZE * i;
kpm->pm_stats.wired_count = kpm->pm_stats.resident_count =
x86_btop(kva_start - VM_MIN_KERNEL_ADDRESS);
@ -1612,7 +1606,7 @@ pmap_prealloc_lowmem_ptps(void)
paddr_t newp;
paddr_t pdes_pa;
pdes_pa = pmap_kernel()->pm_pdirpa;
pdes_pa = pmap_pdirpa(pmap_kernel(), 0);
level = PTP_LEVELS;
for (;;) {
newp = avail_start;
@ -1715,6 +1709,40 @@ pmap_cpu_init_late(struct cpu_info *ci)
evcnt_attach_dynamic(&ci->ci_tlb_evcnt, EVCNT_TYPE_MISC,
NULL, device_xname(ci->ci_dev), "TLB IPI");
#ifdef PAE
int ret;
struct pglist pg;
struct vm_page *vmap;
/* The BP has already its own L3 page allocated in locore.S. */
if (ci == &cpu_info_primary)
return;
/*
* Allocate a page for the per-CPU L3 PD. cr3 being 32 bits, PA musts
* resides below the 4GB boundary.
*/
ret = uvm_pglistalloc(PAGE_SIZE, 0, 0x100000000ULL, 32, 0, &pg, 1, 0);
vmap = TAILQ_FIRST(&pg);
if (ret != 0 || vmap == NULL)
panic("%s: failed to allocate L3 pglist for CPU %d (ret %d)\n",
__func__, cpu_index(ci), ret);
ci->ci_pae_l3_pdirpa = vmap->phys_addr;
ci->ci_pae_l3_pdir = (paddr_t *)uvm_km_alloc(kernel_map, PAGE_SIZE, 0,
UVM_KMF_VAONLY | UVM_KMF_NOWAIT);
if (ci->ci_pae_l3_pdir == NULL)
panic("%s: failed to allocate L3 PD for CPU %d\n",
__func__, cpu_index(ci));
pmap_kenter_pa((vaddr_t)ci->ci_pae_l3_pdir, ci->ci_pae_l3_pdirpa,
VM_PROT_READ | VM_PROT_WRITE, 0);
pmap_update(pmap_kernel());
#endif
}
/*
@ -1931,7 +1959,7 @@ pmap_free_ptp(struct pmap *pmap, struct vm_page *ptp, vaddr_t va,
* If ptp is a L3 currently mapped in kernel space,
* clear it before freeing
*/
if (pmap->pm_pdirpa == xen_current_user_pgd
if (pmap_pdirpa(pmap, 0) == curcpu()->ci_xen_current_user_pgd
&& level == PTP_LEVELS - 1)
pmap_pte_set(&pmap_kernel()->pm_pdir[index], 0);
#endif /* XEN && __x86_64__ */
@ -2274,13 +2302,9 @@ pmap_create(void)
goto try_again;
}
#ifdef PAE
for (i = 0; i < PDP_SIZE; i++)
pmap->pm_pdirpa[i] =
pmap_pte2pa(pmap->pm_pdir[PDIR_SLOT_PTE + i]);
#else
pmap->pm_pdirpa = pmap_pte2pa(pmap->pm_pdir[PDIR_SLOT_PTE]);
#endif
LIST_INSERT_HEAD(&pmaps, pmap, pm_list);
@ -2602,11 +2626,11 @@ pmap_reactivate(struct pmap *pmap)
KASSERT(kpreempt_disabled());
#if defined(XEN) && defined(__x86_64__)
KASSERT(pmap->pm_pdirpa == xen_current_user_pgd);
KASSERT(pmap_pdirpa(pmap, 0) == ci->ci_xen_current_user_pgd);
#elif defined(PAE)
KASSERT(pmap_pdirpa(pmap, 0) == pmap_pte2pa(pmap_l3pd[0]));
KASSERT(pmap_pdirpa(pmap, 0) == pmap_pte2pa(ci->ci_pae_l3_pdir[0]));
#elif !defined(XEN)
KASSERT(pmap->pm_pdirpa == pmap_pte2pa(rcr3()));
KASSERT(pmap_pdirpa(pmap, 0) == pmap_pte2pa(rcr3()));
#endif
/*
@ -2708,12 +2732,12 @@ pmap_load(void)
atomic_and_32(&oldpmap->pm_kernel_cpus, ~cpumask);
#if defined(XEN) && defined(__x86_64__)
KASSERT(oldpmap->pm_pdirpa == xen_current_user_pgd ||
KASSERT(pmap_pdirpa(oldpmap, 0) == ci->ci_xen_current_user_pgd ||
oldpmap == pmap_kernel());
#elif defined(PAE)
KASSERT(pmap_pdirpa(oldpmap, 0) == pmap_pte2pa(pmap_l3pd[0]));
KASSERT(pmap_pdirpa(oldpmap, 0) == pmap_pte2pa(ci->ci_pae_l3_pdir[0]));
#elif !defined(XEN)
KASSERT(oldpmap->pm_pdirpa == pmap_pte2pa(rcr3()));
KASSERT(pmap_pdirpa(oldpmap, 0) == pmap_pte2pa(rcr3()));
#endif
KASSERT((pmap->pm_cpus & cpumask) == 0);
KASSERT((pmap->pm_kernel_cpus & cpumask) == 0);
@ -2735,36 +2759,13 @@ pmap_load(void)
* from other CPUs, we're good to load the page tables.
*/
#ifdef PAE
pcb->pcb_cr3 = pmap_l3paddr;
pcb->pcb_cr3 = ci->ci_pae_l3_pdirpa;
#else
pcb->pcb_cr3 = pmap->pm_pdirpa;
pcb->pcb_cr3 = pmap_pdirpa(pmap, 0);
#endif
#if defined(XEN) && defined(__x86_64__)
/* kernel pmap always in cr3 and should never go in user cr3 */
if (pmap_pdirpa(pmap, 0) != pmap_pdirpa(pmap_kernel(), 0)) {
/*
* Map user space address in kernel space and load
* user cr3
*/
int i, s;
pd_entry_t *old_pgd, *new_pgd;
paddr_t addr;
s = splvm();
new_pgd = pmap->pm_pdir;
old_pgd = pmap_kernel()->pm_pdir;
addr = xpmap_ptom(pmap_pdirpa(pmap_kernel(), 0));
for (i = 0; i < PDIR_SLOT_PTE;
i++, addr += sizeof(pd_entry_t)) {
if ((new_pgd[i] & PG_V) || (old_pgd[i] & PG_V))
xpq_queue_pte_update(addr, new_pgd[i]);
}
tlbflush();
xen_set_user_pgd(pmap_pdirpa(pmap, 0));
xen_current_user_pgd = pmap_pdirpa(pmap, 0);
splx(s);
}
#else /* XEN && x86_64 */
#if defined(XEN)
#ifdef i386
#ifdef XEN
/*
* clear APDP slot, in case it points to a page table that has
* been freed
@ -2773,34 +2774,19 @@ pmap_load(void)
pmap_unmap_apdp();
}
/* lldt() does pmap_pte_flush() */
#else /* XEN */
#if defined(i386)
#endif /* XEN */
#ifndef XEN
ci->ci_tss.tss_ldt = pmap->pm_ldt_sel;
ci->ci_tss.tss_cr3 = pcb->pcb_cr3;
#endif
#endif /* XEN */
#endif /* !XEN */
#endif /* i386 */
lldt(pmap->pm_ldt_sel);
#ifdef PAE
{
paddr_t l3_pd = xpmap_ptom_masked(pmap_l3paddr);
int i;
int s = splvm();
/* don't update the kernel L3 slot */
for (i = 0 ; i < PDP_SIZE - 1; i++, l3_pd += sizeof(pd_entry_t)) {
xpq_queue_pte_update(l3_pd,
xpmap_ptom(pmap->pm_pdirpa[i]) | PG_V);
}
tlbflush();
splx(s);
}
#else /* PAE */
{
u_int gen = uvm_emap_gen_return();
lcr3(pcb->pcb_cr3);
cpu_load_pmap(pmap);
uvm_emap_update(gen);
}
#endif /* PAE */
#endif /* XEN && x86_64 */
ci->ci_want_pmapload = 0;
@ -2867,11 +2853,11 @@ pmap_deactivate(struct lwp *l)
}
#if defined(XEN) && defined(__x86_64__)
KASSERT(pmap->pm_pdirpa == xen_current_user_pgd);
KASSERT(pmap_pdirpa(pmap, 0) == ci->ci_xen_current_user_pgd);
#elif defined(PAE)
KASSERT(pmap_pdirpa(pmap, 0) == pmap_pte2pa(pmap_l3pd[0]));
KASSERT(pmap_pdirpa(pmap, 0) == pmap_pte2pa(ci->ci_pae_l3_pdir[0]));
#elif !defined(XEN)
KASSERT(pmap->pm_pdirpa == pmap_pte2pa(rcr3()));
KASSERT(pmap_pdirpa(pmap, 0) == pmap_pte2pa(rcr3()));
#endif
KASSERT(ci->ci_pmap == pmap);
@ -4761,6 +4747,21 @@ pmap_init_tmp_pgtbl(paddr_t pg)
tmp_pml = (void *)x86_tmp_pml_vaddr[PTP_LEVELS - 1];
memcpy(tmp_pml, kernel_pml, PAGE_SIZE);
#ifdef PAE
/*
* Use the last 4 entries of the L2 page as L3 PD entries. These
* last entries are unlikely to be used for temporary mappings.
* 508: maps 0->1GB (userland)
* 509: unused
* 510: unused
* 511: maps 3->4GB (kernel)
*/
tmp_pml[508] = x86_tmp_pml_paddr[PTP_LEVELS - 1] | PG_V;
tmp_pml[509] = 0;
tmp_pml[510] = 0;
tmp_pml[511] = pmap_pdirpa(pmap_kernel(),PDIR_SLOT_KERN) | PG_V;
#endif
for (level = PTP_LEVELS - 1; level > 0; --level) {
tmp_pml = (void *)x86_tmp_pml_vaddr[level];
@ -4771,5 +4772,10 @@ pmap_init_tmp_pgtbl(paddr_t pg)
tmp_pml = (void *)x86_tmp_pml_vaddr[0];
tmp_pml[pl_i(pg, 1)] = (pg & PG_FRAME) | PG_RW | PG_V;
#ifdef PAE
/* Return the PA of the L3 page (entry 508 of the L2 page) */
return x86_tmp_pml_paddr[PTP_LEVELS - 1] + 508 * sizeof(pd_entry_t);
#endif
return x86_tmp_pml_paddr[PTP_LEVELS - 1];
}

View File

@ -1,4 +1,4 @@
/* $NetBSD: cpu.c,v 1.46 2010/07/06 20:50:35 cegger Exp $ */
/* $NetBSD: cpu.c,v 1.47 2010/07/24 00:45:56 jym Exp $ */
/* NetBSD: cpu.c,v 1.18 2004/02/20 17:35:01 yamt Exp */
/*-
@ -66,7 +66,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.46 2010/07/06 20:50:35 cegger Exp $");
__KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.47 2010/07/24 00:45:56 jym Exp $");
#include "opt_ddb.h"
#include "opt_multiprocessor.h"
@ -582,6 +582,11 @@ cpu_init(struct cpu_info *ci)
lcr4(rcr4() | CR4_OSXMMEXCPT);
}
#ifdef __x86_64__
/* No user PGD mapped for this CPU yet */
ci->ci_xen_current_user_pgd = 0;
#endif
atomic_or_32(&cpus_running, ci->ci_cpumask);
atomic_or_32(&ci->ci_flags, CPUF_RUNNING);
}
@ -1111,3 +1116,59 @@ x86_cpu_idle_xen(void)
x86_enable_intr();
}
}
/*
* Loads pmap for the current CPU.
*/
void
cpu_load_pmap(struct pmap *pmap)
{
#ifdef i386
#ifdef PAE
int i, s;
struct cpu_info *ci;
s = splvm(); /* just to be safe */
ci = curcpu();
paddr_t l3_pd = xpmap_ptom_masked(ci->ci_pae_l3_pdirpa);
/* don't update the kernel L3 slot */
for (i = 0 ; i < PDP_SIZE - 1; i++) {
xpq_queue_pte_update(l3_pd + i * sizeof(pd_entry_t),
xpmap_ptom(pmap->pm_pdirpa[i]) | PG_V);
}
splx(s);
tlbflush();
#else /* PAE */
lcr3(pmap_pdirpa(pmap, 0));
#endif /* PAE */
#endif /* i386 */
#ifdef __x86_64__
int i, s;
pd_entry_t *old_pgd, *new_pgd;
paddr_t addr;
struct cpu_info *ci;
/* kernel pmap always in cr3 and should never go in user cr3 */
if (pmap_pdirpa(pmap, 0) != pmap_pdirpa(pmap_kernel(), 0)) {
ci = curcpu();
/*
* Map user space address in kernel space and load
* user cr3
*/
s = splvm();
new_pgd = pmap->pm_pdir;
old_pgd = pmap_kernel()->pm_pdir;
addr = xpmap_ptom(pmap_pdirpa(pmap_kernel(), 0));
for (i = 0; i < PDIR_SLOT_PTE;
i++, addr += sizeof(pd_entry_t)) {
if ((new_pgd[i] & PG_V) || (old_pgd[i] & PG_V))
xpq_queue_pte_update(addr, new_pgd[i]);
}
tlbflush();
xen_set_user_pgd(pmap_pdirpa(pmap, 0));
ci->ci_xen_current_user_pgd = pmap_pdirpa(pmap, 0);
splx(s);
}
#endif /* __x86_64__ */
}

View File

@ -1,4 +1,4 @@
/* $NetBSD: x86_xpmap.c,v 1.20 2010/07/15 23:20:34 jym Exp $ */
/* $NetBSD: x86_xpmap.c,v 1.21 2010/07/24 00:45:56 jym Exp $ */
/*
* Copyright (c) 2006 Mathieu Ropert <mro@adviseo.fr>
@ -69,7 +69,7 @@
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: x86_xpmap.c,v 1.20 2010/07/15 23:20:34 jym Exp $");
__KERNEL_RCSID(0, "$NetBSD: x86_xpmap.c,v 1.21 2010/07/24 00:45:56 jym Exp $");
#include "opt_xen.h"
#include "opt_ddb.h"
@ -814,22 +814,26 @@ xen_bootstrap_tables (vaddr_t old_pgd, vaddr_t new_pgd,
#else
xpq_queue_pin_table(xpmap_ptom_masked(new_pgd - KERNBASE));
#endif
#ifdef __i386__
/* Save phys. addr of PDP, for libkvm. */
PDPpaddr = (long)pde - KERNBASE;
#ifdef PAE
/* also save the address of the L3 page */
pmap_l3pd = pdtpe;
pmap_l3paddr = (new_pgd - KERNBASE);
#endif /* PAE */
#endif /* i386 */
PDPpaddr = (u_long)pde - KERNBASE; /* PDP is the L2 with PAE */
#else
PDPpaddr = (u_long)new_pgd - KERNBASE;
#endif
/* Switch to new tables */
__PRINTK(("switch to PGD\n"));
xpq_queue_pt_switch(xpmap_ptom_masked(new_pgd - KERNBASE));
__PRINTK(("bt_pgd[PDIR_SLOT_PTE] now entry %#" PRIxPADDR "\n",
bt_pgd[PDIR_SLOT_PTE]));
#ifdef PAE
if (final) {
/* save the address of the L3 page */
cpu_info_primary.ci_pae_l3_pdir = pdtpe;
cpu_info_primary.ci_pae_l3_pdirpa = (new_pgd - KERNBASE);
/* now enter kernel's PTE mappings */
addr = (u_long)pde - KERNBASE + PAGE_SIZE * 3;
xpq_queue_pte_update(
@ -839,8 +843,6 @@ xen_bootstrap_tables (vaddr_t old_pgd, vaddr_t new_pgd,
}
#endif
/* Now we can safely reclaim space taken by old tables */
__PRINTK(("unpin old PGD\n"));

View File

@ -1,4 +1,4 @@
/* $NetBSD: xenfunc.c,v 1.10 2010/02/12 01:55:46 jym Exp $ */
/* $NetBSD: xenfunc.c,v 1.11 2010/07/24 00:45:56 jym Exp $ */
/*
*
@ -27,7 +27,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: xenfunc.c,v 1.10 2010/02/12 01:55:46 jym Exp $");
__KERNEL_RCSID(0, "$NetBSD: xenfunc.c,v 1.11 2010/07/24 00:45:56 jym Exp $");
#include <sys/param.h>
@ -58,10 +58,10 @@ invlpg(vaddr_t addr)
splx(s);
}
#ifndef __x86_64__
void
lldt(u_short sel)
{
#ifndef __x86_64__
struct cpu_info *ci;
ci = curcpu();
@ -75,8 +75,8 @@ lldt(u_short sel)
xen_set_ldt(ci->ci_gdt[IDXSELN(sel)].ld.ld_base,
ci->ci_gdt[IDXSELN(sel)].ld.ld_entries);
ci->ci_curldt = sel;
}
#endif
}
void
ltr(u_short sel)