PR port-i386/40143 Viewing an mpeg transport stream with mplayer causes crash

Fix numerous problems:

1. LDT updates are not atomic.

2. Number of processes running with private LDTs and/or I/O bitmaps
   is not capped. System with high maxprocs can be paniced.

3. LDTR can be leaked over context switch.

4. GDT slot allocations can race, giving the same LDT slot to two procs.

5. Incomplete interrupt/trap frames can be stacked.

6. In some rare cases segment faults are not handled correctly.
This commit is contained in:
ad 2009-03-21 14:41:29 +00:00
parent 2600da8765
commit d16d704d62
14 changed files with 251 additions and 298 deletions

View File

@ -1,11 +1,11 @@
/* $NetBSD: gdt.c,v 1.20 2009/03/14 15:36:00 dsl Exp $ */
/* $NetBSD: gdt.c,v 1.21 2009/03/21 14:41:29 ad Exp $ */
/*-
* Copyright (c) 1996, 1997 The NetBSD Foundation, Inc.
* Copyright (c) 1996, 1997, 2009 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by John T. Kohl and Charles M. Hannum.
* by John T. Kohl, by Charles M. Hannum, and by Andrew Doran.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@ -37,7 +37,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: gdt.c,v 1.20 2009/03/14 15:36:00 dsl Exp $");
__KERNEL_RCSID(0, "$NetBSD: gdt.c,v 1.21 2009/03/21 14:41:29 ad Exp $");
#include "opt_multiprocessor.h"
#include "opt_xen.h"
@ -47,6 +47,7 @@ __KERNEL_RCSID(0, "$NetBSD: gdt.c,v 1.20 2009/03/14 15:36:00 dsl Exp $");
#include <sys/proc.h>
#include <sys/mutex.h>
#include <sys/user.h>
#include <sys/cpu.h>
#include <uvm/uvm.h>
@ -63,38 +64,11 @@ int gdt_dynavail;
int gdt_next; /* next available slot for sweeping */
int gdt_free; /* next free slot; terminated with GNULL_SEL */
kmutex_t gdt_lock_store;
static inline void gdt_lock(void);
static inline void gdt_unlock(void);
void gdt_init(void);
void gdt_grow(void);
int gdt_get_slot(void);
void gdt_put_slot(int);
/*
* Lock and unlock the GDT, to avoid races in case gdt_{ge,pu}t_slot() sleep
* waiting for memory.
*
* Note that the locking done here is not sufficient for multiprocessor
* systems. A freshly allocated slot will still be of type SDT_SYSNULL for
* some time after the GDT is unlocked, so gdt_compact() could attempt to
* reclaim it.
*/
static inline void
gdt_lock(void)
{
mutex_enter(&gdt_lock_store);
}
static inline void
gdt_unlock(void)
{
mutex_exit(&gdt_lock_store);
}
void
set_mem_gdt(struct mem_segment_descriptor *sd, void *base, size_t limit,
int type, int dpl, int gran, int def32, int is64)
@ -149,8 +123,6 @@ gdt_init(void)
vaddr_t va;
struct cpu_info *ci = &cpu_info_primary;
mutex_init(&gdt_lock_store, MUTEX_DEFAULT, IPL_NONE);
gdt_size = MINGDTSIZ;
gdt_dyncount = 0;
gdt_next = 0;
@ -279,7 +251,7 @@ gdt_get_slot(void)
gdt = (struct sys_segment_descriptor *)&gdtstore[DYNSEL_START];
gdt_lock();
KASSERT(mutex_owned(&cpu_lock));
if (gdt_free != GNULL_SEL) {
slot = gdt_free;
@ -300,7 +272,6 @@ gdt_get_slot(void)
}
gdt_dyncount++;
gdt_unlock();
return (slot);
}
@ -312,16 +283,14 @@ gdt_put_slot(int slot)
{
struct sys_segment_descriptor *gdt;
KASSERT(mutex_owned(&cpu_lock));
gdt = (struct sys_segment_descriptor *)&gdtstore[DYNSEL_START];
gdt_lock();
gdt_dyncount--;
gdt[slot].sd_type = SDT_SYSNULL;
gdt[slot].sd_xx3 = gdt_free;
gdt_free = slot;
gdt_unlock();
}
int
@ -333,12 +302,14 @@ tss_alloc(struct x86_64_tss *tss)
gdt = (struct sys_segment_descriptor *)&gdtstore[DYNSEL_START];
mutex_enter(&cpu_lock);
slot = gdt_get_slot();
#if 0
printf("tss_alloc: slot %d addr %p\n", slot, &gdt[slot]);
#endif
set_sys_gdt(&gdt[slot], tss, sizeof (struct x86_64_tss)-1,
SDT_SYS386TSS, SEL_KPL, 0);
mutex_exit(&cpu_lock);
#if 0
printf("lolimit %lx lobase %lx type %lx dpl %lx p %lx hilimit %lx\n"
"xx1 %lx gran %lx hibase %lx xx2 %lx zero %lx xx3 %lx pad %lx\n",
@ -366,7 +337,9 @@ void
tss_free(int sel)
{
#ifndef XEN
mutex_enter(&cpu_lock);
gdt_put_slot(IDXDYNSEL(sel));
mutex_exit(&cpu_lock);
#else
KASSERT(sel == GSEL(GNULL_SEL, SEL_KPL));
#endif
@ -378,6 +351,8 @@ ldt_alloc(struct pmap *pmap, char *ldt, size_t len)
int slot;
struct sys_segment_descriptor *gdt;
KASSERT(mutex_owned(&cpu_lock));
gdt = (struct sys_segment_descriptor *)&gdtstore[DYNSEL_START];
slot = gdt_get_slot();
@ -390,6 +365,8 @@ ldt_free(struct pmap *pmap)
{
int slot;
KASSERT(mutex_owned(&cpu_lock));
slot = IDXDYNSEL(pmap->pm_ldt_sel);
gdt_put_slot(slot);

View File

@ -1,4 +1,4 @@
/* $NetBSD: machdep.c,v 1.128 2009/02/26 13:56:46 jmcneill Exp $ */
/* $NetBSD: machdep.c,v 1.129 2009/03/21 14:41:29 ad Exp $ */
/*-
* Copyright (c) 1996, 1997, 1998, 2000, 2006, 2007, 2008
@ -112,7 +112,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.128 2009/02/26 13:56:46 jmcneill Exp $");
__KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.129 2009/03/21 14:41:29 ad Exp $");
/* #define XENDEBUG_LOW */
@ -1685,7 +1685,7 @@ check_mcontext(struct lwp *l, const mcontext_t *mcp, struct trapframe *tf)
if (((gr[_REG_RFLAGS] ^ tf->tf_rflags) & PSL_USERSTATIC) != 0)
return EINVAL;
if (__predict_false((pmap->pm_flags & PMF_USER_LDT) != 0)) {
if (__predict_false(pmap->pm_ldt != NULL)) {
error = valid_user_selector(l, gr[_REG_ES], NULL, 0);
if (error != 0)
return error;
@ -1777,7 +1777,7 @@ memseg_baseaddr(struct lwp *l, uint64_t seg, char *ldtp, int llen,
if (ldtp != NULL) {
dt = ldtp;
len = llen;
} else if (pmap->pm_flags & PMF_USER_LDT) {
} else if (pmap->pm_ldt != NULL) {
len = pmap->pm_ldt_len; /* XXX broken */
dt = (char *)pmap->pm_ldt;
} else {

View File

@ -1,11 +1,11 @@
/* $NetBSD: gdt.c,v 1.46 2009/03/16 09:37:35 cegger Exp $ */
/* $NetBSD: gdt.c,v 1.47 2009/03/21 14:41:29 ad Exp $ */
/*-
* Copyright (c) 1996, 1997 The NetBSD Foundation, Inc.
* Copyright (c) 1996, 1997, 2009 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by John T. Kohl and Charles M. Hannum.
* by John T. Kohl, by Charles M. Hannum, and by Andrew Doran.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@ -30,7 +30,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: gdt.c,v 1.46 2009/03/16 09:37:35 cegger Exp $");
__KERNEL_RCSID(0, "$NetBSD: gdt.c,v 1.47 2009/03/21 14:41:29 ad Exp $");
#include "opt_multiprocessor.h"
#include "opt_xen.h"
@ -40,6 +40,7 @@ __KERNEL_RCSID(0, "$NetBSD: gdt.c,v 1.46 2009/03/16 09:37:35 cegger Exp $");
#include <sys/proc.h>
#include <sys/mutex.h>
#include <sys/user.h>
#include <sys/cpu.h>
#include <uvm/uvm.h>
@ -57,40 +58,14 @@ int gdt_next[2]; /* next available slot for sweeping */
int gdt_free[2]; /* next free slot; terminated with GNULL_SEL */
#endif
static int ldt_count; /* number of LDTs */
static int ldt_max = 1000;/* max number of LDTs */
static kmutex_t gdt_lock_store;
static inline void gdt_lock(void);
static inline void gdt_unlock(void);
void gdt_init(void);
void gdt_grow(int);
int gdt_get_slot1(int);
void gdt_put_slot1(int, int);
/*
* Lock and unlock the GDT, to avoid races in case gdt_{ge,pu}t_slot() sleep
* waiting for memory.
*
* Note that the locking done here is not sufficient for multiprocessor
* systems. A freshly allocated slot will still be of type SDT_SYSNULL for
* some time after the GDT is unlocked, so gdt_compact() could attempt to
* reclaim it.
*/
static inline void
gdt_lock(void)
{
mutex_enter(&gdt_lock_store);
}
static inline void
gdt_unlock(void)
{
mutex_exit(&gdt_lock_store);
}
static void
update_descriptor(union descriptor *table, union descriptor *entry)
{
@ -141,8 +116,6 @@ gdt_init(void)
vaddr_t va;
struct cpu_info *ci = &cpu_info_primary;
mutex_init(&gdt_lock_store, MUTEX_DEFAULT, IPL_NONE);
max_len = MAXGDTSIZ * sizeof(gdt[0]);
min_len = MINGDTSIZ * sizeof(gdt[0]);
@ -325,6 +298,9 @@ gdt_grow(int which)
int
gdt_get_slot(void)
{
KASSERT(mutex_owned(&cpu_lock));
return gdt_get_slot1(0);
}
@ -334,7 +310,7 @@ gdt_get_slot1(int which)
int slot;
size_t offset;
gdt_lock();
KASSERT(mutex_owned(&cpu_lock));
if (gdt_free[which] != GNULL_SEL) {
slot = gdt_free[which];
@ -352,7 +328,6 @@ gdt_get_slot1(int which)
}
gdt_count[which]++;
gdt_unlock();
return (slot);
}
@ -362,6 +337,9 @@ gdt_get_slot1(int which)
void
gdt_put_slot(int slot)
{
KASSERT(mutex_owned(&cpu_lock));
gdt_put_slot1(slot, 0);
}
@ -372,7 +350,8 @@ gdt_put_slot1(int slot, int which)
d.raw[0] = 0;
d.raw[1] = 0;
gdt_lock();
KASSERT(mutex_owned(&cpu_lock));
gdt_count[which]--;
d.gd.gd_type = SDT_SYSNULL;
@ -380,8 +359,6 @@ gdt_put_slot1(int slot, int which)
update_descriptor(&gdt[slot], &d);
gdt_free[which] = slot;
gdt_unlock();
}
#ifndef XEN
@ -390,9 +367,12 @@ tss_alloc(const struct i386tss *tss)
{
int slot;
mutex_enter(&cpu_lock);
slot = gdt_get_slot();
setgdt(slot, tss, sizeof(struct i386tss) + IOMAPSIZE - 1,
SDT_SYS386TSS, SEL_KPL, 0, 0);
mutex_exit(&cpu_lock);
return GSEL(slot, SEL_KPL);
}
@ -400,17 +380,24 @@ void
tss_free(int sel)
{
mutex_enter(&cpu_lock);
gdt_put_slot(IDXSEL(sel));
mutex_exit(&cpu_lock);
}
#endif
/*
* Caller must have pmap locked for both of these functions.
*/
int
ldt_alloc(union descriptor *ldtp, size_t len)
{
int slot;
KASSERT(mutex_owned(&cpu_lock));
if (ldt_count >= ldt_max) {
return -1;
}
ldt_count++;
#ifndef XEN
slot = gdt_get_slot();
setgdt(slot, ldtp, len - 1, SDT_SYSLDT, SEL_KPL, 0, 0);
@ -420,6 +407,7 @@ ldt_alloc(union descriptor *ldtp, size_t len)
cpu_info_primary.ci_gdt[slot].ld.ld_entries =
len / sizeof(union descriptor);
#endif
return GSEL(slot, SEL_KPL);
}
@ -428,10 +416,14 @@ ldt_free(int sel)
{
int slot;
KASSERT(mutex_owned(&cpu_lock));
KASSERT(ldt_count > 0);
slot = IDXSEL(sel);
#ifndef XEN
gdt_put_slot(slot);
#else
gdt_put_slot1(slot, 1);
#endif
ldt_count--;
}

View File

@ -1,4 +1,4 @@
/* $NetBSD: kvm86.c,v 1.16 2009/03/14 15:36:07 dsl Exp $ */
/* $NetBSD: kvm86.c,v 1.17 2009/03/21 14:41:29 ad Exp $ */
/*
* Copyright (c) 2002
@ -27,7 +27,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: kvm86.c,v 1.16 2009/03/14 15:36:07 dsl Exp $");
__KERNEL_RCSID(0, "$NetBSD: kvm86.c,v 1.17 2009/03/21 14:41:29 ad Exp $");
#include "opt_multiprocessor.h"
@ -113,11 +113,13 @@ kvm86_init()
vmd->iomap[i] = 0;
tss->tss_iobase = ((char *)vmd->iomap - (char *)tss) << 16;
/* setup TSS descriptor (including our iomap) */
mutex_enter(&cpu_lock);
slot = gdt_get_slot();
kvm86_tss_sel = GSEL(slot, SEL_KPL);
/* setup TSS descriptor (including our iomap) */
setgdt(slot, tss, sizeof(*tss) + sizeof(vmd->iomap) - 1,
SDT_SYS386TSS, SEL_KPL, 0, 0);
mutex_exit(&cpu_lock);
/* prepare VM for BIOS calls */
kvm86_mapbios(vmd);

View File

@ -1,4 +1,4 @@
/* $NetBSD: locore.S,v 1.84 2009/03/08 16:03:31 ad Exp $ */
/* $NetBSD: locore.S,v 1.85 2009/03/21 14:41:29 ad Exp $ */
/*
* Copyright-o-rama!
@ -134,7 +134,7 @@
*/
#include <machine/asm.h>
__KERNEL_RCSID(0, "$NetBSD: locore.S,v 1.84 2009/03/08 16:03:31 ad Exp $");
__KERNEL_RCSID(0, "$NetBSD: locore.S,v 1.85 2009/03/21 14:41:29 ad Exp $");
#include "opt_compat_oldboot.h"
#include "opt_ddb.h"
@ -1079,8 +1079,10 @@ END(savectx)
* Old call gate entry for syscall
*/
IDTVEC(osyscall)
cli # must be first instruction
pushfl # set eflags in trap frame
popl 8(%esp)
orl $PSL_I,(%esp) # re-enable ints on return to user
pushl $7 # size of instruction for restart
jmp syscall1
IDTVEC_END(osyscall)
@ -1095,6 +1097,7 @@ IDTVEC(syscall)
syscall1:
pushl $T_ASTFLT # trap # for doing ASTs
INTRENTRY
STI(%eax)
#ifdef DIAGNOSTIC
movl CPUVAR(ILEVEL),%ebx
testl %ebx,%ebx
@ -1181,6 +1184,7 @@ IDTVEC(svr4_fasttrap)
pushl $2 # size of instruction for restart
pushl $T_ASTFLT # trap # for doing ASTs
INTRENTRY
STI(%eax)
pushl $RW_READER
pushl $_C_LABEL(svr4_fasttrap_lock)
call _C_LABEL(rw_enter)

View File

@ -1,12 +1,14 @@
/* $NetBSD: machdep.c,v 1.665 2009/03/16 09:37:35 cegger Exp $ */
/* $NetBSD: machdep.c,v 1.666 2009/03/21 14:41:29 ad Exp $ */
/*-
* Copyright (c) 1996, 1997, 1998, 2000, 2004, 2006, 2008 The NetBSD Foundation, Inc.
* Copyright (c) 1996, 1997, 1998, 2000, 2004, 2006, 2008, 2009
* The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by Charles M. Hannum, by Jason R. Thorpe of the Numerical Aerospace
* Simulation Facility, NASA Ames Research Center and by Julio M. Merino Vidal.
* Simulation Facility NASA Ames Research Center, by Julio M. Merino Vidal,
* and by Andrew Doran.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@ -65,7 +67,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.665 2009/03/16 09:37:35 cegger Exp $");
__KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.666 2009/03/21 14:41:29 ad Exp $");
#include "opt_beep.h"
#include "opt_compat_ibcs2.h"
@ -535,7 +537,7 @@ i386_proc0_tss_ldt_init(void)
l = &lwp0;
pcb = &l->l_addr->u_pcb;
pcb->pcb_ldt_sel = pmap_kernel()->pm_ldt_sel = GSEL(GLDT_SEL, SEL_KPL);
pmap_kernel()->pm_ldt_sel = GSEL(GLDT_SEL, SEL_KPL);
pcb->pcb_cr0 = rcr0() & ~CR0_TS;
pcb->pcb_esp0 = USER_TO_UAREA(l->l_addr) + KSTACK_SIZE - 16;
pcb->pcb_iopl = SEL_KPL;
@ -544,7 +546,7 @@ i386_proc0_tss_ldt_init(void)
memcpy(pcb->pcb_gsd, &gdt[GUDATA_SEL], sizeof(pcb->pcb_gsd));
#ifndef XEN
lldt(pcb->pcb_ldt_sel);
lldt(pmap_kernel()->pm_ldt_sel);
#else
HYPERVISOR_fpu_taskswitch();
XENPRINTF(("lwp tss sp %p ss %04x/%04x\n",
@ -1516,18 +1518,17 @@ init386(paddr_t first_avail)
/* exceptions */
for (x = 0; x < 32; x++) {
idt_vec_reserve(x);
setgate(&idt[x], IDTVEC(exceptions)[x], 0,
(x == 7 || x == 16) ? SDT_SYS386IGT : SDT_SYS386TGT,
setgate(&idt[x], IDTVEC(exceptions)[x], 0, SDT_SYS386IGT,
(x == 3 || x == 4) ? SEL_UPL : SEL_KPL,
GSEL(GCODE_SEL, SEL_KPL));
}
/* new-style interrupt gate for syscalls */
idt_vec_reserve(128);
setgate(&idt[128], &IDTVEC(syscall), 0, SDT_SYS386TGT, SEL_UPL,
setgate(&idt[128], &IDTVEC(syscall), 0, SDT_SYS386IGT, SEL_UPL,
GSEL(GCODE_SEL, SEL_KPL));
idt_vec_reserve(0xd2);
setgate(&idt[0xd2], &IDTVEC(svr4_fasttrap), 0, SDT_SYS386TGT,
setgate(&idt[0xd2], &IDTVEC(svr4_fasttrap), 0, SDT_SYS386IGT,
SEL_UPL, GSEL(GCODE_SEL, SEL_KPL));
setregion(&region, gdt, NGDT * sizeof(gdt[0]) - 1);

View File

@ -1,4 +1,4 @@
/* $NetBSD: vector.S,v 1.44 2009/03/19 02:59:00 mrg Exp $ */
/* $NetBSD: vector.S,v 1.45 2009/03/21 14:41:29 ad Exp $ */
/*
* Copyright 2002 (c) Wasabi Systems, Inc.
@ -65,7 +65,7 @@
*/
#include <machine/asm.h>
__KERNEL_RCSID(0, "$NetBSD: vector.S,v 1.44 2009/03/19 02:59:00 mrg Exp $");
__KERNEL_RCSID(0, "$NetBSD: vector.S,v 1.45 2009/03/21 14:41:29 ad Exp $");
#include "opt_ddb.h"
#include "opt_multiprocessor.h"
@ -886,6 +886,7 @@ IDTVEC(trap0d)
IDTVEC(trap0e)
pushl $T_PAGEFLT
INTRENTRY
STI(%eax)
testb $PGEX_U,TF_ERR(%esp)
jnz calltrap
movl %cr2,%eax
@ -996,6 +997,7 @@ IDTVEC(trap0f)
pushl $0 # dummy error code
pushl $T_ASTFLT
INTRENTRY
STI(%eax)
#ifdef DIAGNOSTIC
movl CPUVAR(ILEVEL),%ebx
#endif
@ -1084,6 +1086,7 @@ IDTVEC(tss_trap08)
/* LINTSTUB: Ignore */
NENTRY(alltraps)
INTRENTRY
STI(%eax)
calltrap:
#ifdef DIAGNOSTIC
movl CPUVAR(ILEVEL),%ebx

View File

@ -1,11 +1,11 @@
/* $NetBSD: pcb.h,v 1.46 2008/10/26 06:57:30 mrg Exp $ */
/* $NetBSD: pcb.h,v 1.47 2009/03/21 14:41:30 ad Exp $ */
/*-
* Copyright (c) 1998 The NetBSD Foundation, Inc.
* Copyright (c) 1998, 2009 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by Charles M. Hannum.
* by Charles M. Hannum, and by Andrew Doran.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@ -85,7 +85,7 @@ struct pcb {
int pcb_esp0; /* ring0 esp */
int pcb_esp; /* kernel esp */
int pcb_ebp; /* kernel ebp */
int pcb_ldt_sel;
int pcb_unused; /* unused */
int pcb_cr0; /* saved image of CR0 */
int pcb_cr2; /* page fault address (CR2) */
int pcb_cr3; /* page directory pointer */

View File

@ -1,4 +1,4 @@
/* $NetBSD: pmap.h,v 1.21 2008/12/09 20:45:46 pooka Exp $ */
/* $NetBSD: pmap.h,v 1.22 2009/03/21 14:41:30 ad Exp $ */
/*
*
@ -159,16 +159,13 @@ struct pmap {
int pm_flags; /* see below */
union descriptor *pm_ldt; /* user-set LDT */
int pm_ldt_len; /* number of LDT entries */
size_t pm_ldt_len; /* size of LDT in bytes */
int pm_ldt_sel; /* LDT selector */
uint32_t pm_cpus; /* mask of CPUs using pmap */
uint32_t pm_kernel_cpus; /* mask of CPUs using kernel part
of pmap */
};
/* pm_flags */
#define PMF_USER_LDT 0x01 /* pmap has user-set LDT */
/* macro to access pm_pdirpa */
#ifdef PAE
#define pmap_pdirpa(pmap, index) \
@ -220,6 +217,7 @@ void pmap_write_protect(struct pmap *, vaddr_t, vaddr_t, vm_prot_t);
void pmap_load(void);
paddr_t pmap_init_tmp_pgtbl(paddr_t);
void pmap_remove_all(struct pmap *);
void pmap_ldt_sync(struct pmap *);
vaddr_t reserve_dumppages(vaddr_t); /* XXX: not a pmap fn */

View File

@ -1,4 +1,4 @@
/* $NetBSD: sysarch.h,v 1.7 2008/04/28 20:23:40 martin Exp $ */
/* $NetBSD: sysarch.h,v 1.8 2009/03/21 14:41:30 ad Exp $ */
/*-
* Copyright (c) 2007 The NetBSD Foundation, Inc.
@ -174,7 +174,6 @@ struct mtrr;
int x86_iopl(struct lwp *, void *, register_t *);
int x86_get_mtrr(struct lwp *, void *, register_t *);
int x86_set_mtrr(struct lwp *, void *, register_t *);
int x86_get_ldt_len(struct lwp *l);
int x86_get_ldt(struct lwp *l, void *, register_t *);
int x86_get_ldt1(struct lwp *l, struct x86_get_ldt_args *, union descriptor *);
int x86_set_ldt(struct lwp *l, void *, register_t *);

View File

@ -1,4 +1,4 @@
/* $NetBSD: pmap.c,v 1.79 2009/03/14 15:36:15 dsl Exp $ */
/* $NetBSD: pmap.c,v 1.80 2009/03/21 14:41:30 ad Exp $ */
/*
* Copyright (c) 2007 Manuel Bouyer.
@ -154,7 +154,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.79 2009/03/14 15:36:15 dsl Exp $");
__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.80 2009/03/21 14:41:30 ad Exp $");
#include "opt_user_ldt.h"
#include "opt_lockdebug.h"
@ -173,6 +173,7 @@ __KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.79 2009/03/14 15:36:15 dsl Exp $");
#include <sys/atomic.h>
#include <sys/cpu.h>
#include <sys/intr.h>
#include <sys/xcall.h>
#include <uvm/uvm.h>
@ -2274,7 +2275,7 @@ pmap_destroy(struct pmap *pmap)
pool_cache_put(&pmap_pdp_cache, pmap->pm_pdir);
#ifdef USER_LDT
if (pmap->pm_flags & PMF_USER_LDT) {
if (pmap->pm_ldt != NULL) {
/*
* no need to switch the LDT; this address space is gone,
* nothing is using it.
@ -2282,9 +2283,11 @@ pmap_destroy(struct pmap *pmap)
* No need to lock the pmap for ldt_free (or anything else),
* we're the last one to use it.
*/
mutex_enter(&cpu_lock);
ldt_free(pmap->pm_ldt_sel);
mutex_exit(&cpu_lock);
uvm_km_free(kernel_map, (vaddr_t)pmap->pm_ldt,
pmap->pm_ldt_len * sizeof(union descriptor), UVM_KMF_WIRED);
pmap->pm_ldt_len, UVM_KMF_WIRED);
}
#endif
@ -2322,60 +2325,94 @@ pmap_fork(struct pmap *pmap1, struct pmap *pmap2)
size_t len;
int sel;
if (__predict_true(pmap1->pm_ldt == NULL)) {
return;
}
retry:
if (pmap1->pm_flags & PMF_USER_LDT) {
len = pmap1->pm_ldt_len * sizeof(union descriptor);
new_ldt = (union descriptor *)uvm_km_alloc(kernel_map,
len, 0, UVM_KMF_WIRED);
if (pmap1->pm_ldt != NULL) {
len = pmap1->pm_ldt_len;
new_ldt = (union descriptor *)uvm_km_alloc(kernel_map, len, 0,
UVM_KMF_WIRED);
mutex_enter(&cpu_lock);
sel = ldt_alloc(new_ldt, len);
if (sel == -1) {
mutex_exit(&cpu_lock);
uvm_km_free(kernel_map, (vaddr_t)new_ldt, len,
UVM_KMF_WIRED);
printf("WARNING: pmap_fork: unable to allocate LDT\n");
return;
}
} else {
len = -1;
new_ldt = NULL;
sel = -1;
}
if ((uintptr_t) pmap1 < (uintptr_t) pmap2) {
mutex_enter(&pmap1->pm_lock);
mutex_enter(&pmap2->pm_lock);
} else {
mutex_enter(&pmap2->pm_lock);
mutex_enter(&pmap1->pm_lock);
mutex_enter(&cpu_lock);
}
/* Copy the LDT, if necessary. */
if (pmap1->pm_flags & PMF_USER_LDT) {
if (pmap1->pm_ldt != NULL) {
if (len != pmap1->pm_ldt_len * sizeof(union descriptor)) {
mutex_exit(&pmap2->pm_lock);
mutex_exit(&pmap1->pm_lock);
if (len != -1) {
ldt_free(sel);
uvm_km_free(kernel_map, (vaddr_t)new_ldt,
len, UVM_KMF_WIRED);
}
mutex_exit(&cpu_lock);
goto retry;
}
memcpy(new_ldt, pmap1->pm_ldt, len);
pmap2->pm_ldt = new_ldt;
pmap2->pm_ldt_len = pmap1->pm_ldt_len;
pmap2->pm_flags |= PMF_USER_LDT;
pmap2->pm_ldt_sel = sel;
len = -1;
}
mutex_exit(&pmap2->pm_lock);
mutex_exit(&pmap1->pm_lock);
if (len != -1) {
ldt_free(sel);
uvm_km_free(kernel_map, (vaddr_t)new_ldt, len,
UVM_KMF_WIRED);
}
mutex_exit(&cpu_lock);
#endif /* USER_LDT */
}
#endif /* PMAP_FORK */
#ifdef USER_LDT
/*
* pmap_ldt_xcall: cross call used by pmap_ldt_sync. if the named pmap
* is active, reload LDTR.
*/
static void
pmap_ldt_xcall(void *arg1, void *arg2)
{
struct pmap *pm;
kpreempt_disable();
pm = arg1;
if (curcpu()->ci_pmap == pm) {
lldt(pm->pm_ldt_sel);
}
kpreempt_enable();
}
/*
* pmap_ldt_sync: LDT selector for the named pmap is changing. swap
* in the new selector on all CPUs.
*/
void
pmap_ldt_sync(struct pmap *pm)
{
uint64_t where;
KASSERT(mutex_owned(&cpu_lock));
where = xc_broadcast(0, pmap_ldt_xcall, pm, NULL);
xc_wait(where);
}
/*
* pmap_ldt_cleanup: if the pmap has a local LDT, deallocate it, and
* restore the default.
@ -2384,35 +2421,28 @@ pmap_fork(struct pmap *pmap1, struct pmap *pmap2)
void
pmap_ldt_cleanup(struct lwp *l)
{
struct pcb *pcb = &l->l_addr->u_pcb;
pmap_t pmap = l->l_proc->p_vmspace->vm_map.pmap;
union descriptor *old_ldt = NULL;
union descriptor *dp = NULL;
size_t len = 0;
int sel = -1;
mutex_enter(&pmap->pm_lock);
kpreempt_disable();
if (pmap->pm_flags & PMF_USER_LDT) {
sel = pmap->pm_ldt_sel;
pmap->pm_ldt_sel = GSYSSEL(GLDT_SEL, SEL_KPL);
pcb->pcb_ldt_sel = pmap->pm_ldt_sel;
if (l == curlwp)
lldt(pcb->pcb_ldt_sel);
old_ldt = pmap->pm_ldt;
len = pmap->pm_ldt_len * sizeof(union descriptor);
pmap->pm_ldt = NULL;
pmap->pm_ldt_len = 0;
pmap->pm_flags &= ~PMF_USER_LDT;
if (__predict_true(pmap->pm_ldt == NULL)) {
return;
}
kpreempt_enable();
mutex_exit(&pmap->pm_lock);
if (sel != -1)
mutex_enter(&cpu_lock);
if (pmap->pm_ldt != NULL) {
sel = pmap->pm_ldt_sel;
dp = pmap->pm_ldt;
len = pmap->pm_ldt_len;
pmap->pm_ldt_sel = GSYSSEL(GLDT_SEL, SEL_KPL);
pmap->pm_ldt = NULL;
pmap->pm_ldt_len = 0;
pmap_ldt_sync(pmap);
ldt_free(sel);
if (old_ldt != NULL)
uvm_km_free(kernel_map, (vaddr_t)old_ldt, len, UVM_KMF_WIRED);
uvm_km_free(kernel_map, (vaddr_t)dp, len, UVM_KMF_WIRED);
}
mutex_exit(&cpu_lock);
}
#endif /* USER_LDT */
@ -2460,8 +2490,6 @@ pmap_activate(struct lwp *l)
}
pcb = &l->l_addr->u_pcb;
pcb->pcb_ldt_sel = pmap->pm_ldt_sel;
ci->ci_want_pmapload = 1;
#if defined(__x86_64__)
@ -2564,10 +2592,7 @@ pmap_load(void)
pmap = vm_map_pmap(&l->l_proc->p_vmspace->vm_map);
KASSERT(pmap != pmap_kernel());
oldpmap = ci->ci_pmap;
pcb = &l->l_addr->u_pcb;
/* loaded by pmap_activate */
KASSERT(pcb->pcb_ldt_sel == pmap->pm_ldt_sel);
if (pmap == oldpmap) {
if (!pmap_reactivate(pmap)) {
@ -2674,11 +2699,11 @@ pmap_load(void)
/* lldt() does pmap_pte_flush() */
#else /* XEN */
#if defined(i386)
ci->ci_tss.tss_ldt = pcb->pcb_ldt_sel;
ci->ci_tss.tss_ldt = pmap->pm_ldt_sel;
ci->ci_tss.tss_cr3 = pcb->pcb_cr3;
#endif
#endif /* XEN */
lldt(pcb->pcb_ldt_sel);
lldt(pmap->pm_ldt_sel);
#ifdef PAE
{
paddr_t l3_pd = xpmap_ptom_masked(pmap_l3paddr);

View File

@ -1,7 +1,7 @@
/* $NetBSD: sys_machdep.c,v 1.16 2008/11/19 18:36:01 ad Exp $ */
/* $NetBSD: sys_machdep.c,v 1.17 2009/03/21 14:41:30 ad Exp $ */
/*-
* Copyright (c) 1998, 2007 The NetBSD Foundation, Inc.
* Copyright (c) 1998, 2007, 2009 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
@ -30,7 +30,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: sys_machdep.c,v 1.16 2008/11/19 18:36:01 ad Exp $");
__KERNEL_RCSID(0, "$NetBSD: sys_machdep.c,v 1.17 2009/03/21 14:41:30 ad Exp $");
#include "opt_mtrr.h"
#include "opt_perfctrs.h"
@ -52,13 +52,12 @@ __KERNEL_RCSID(0, "$NetBSD: sys_machdep.c,v 1.16 2008/11/19 18:36:01 ad Exp $");
#include <sys/malloc.h>
#include <sys/kmem.h>
#include <sys/kauth.h>
#include <sys/cpu.h>
#include <sys/mount.h>
#include <sys/syscallargs.h>
#include <uvm/uvm_extern.h>
#include <machine/cpu.h>
#include <machine/cpufunc.h>
#include <machine/gdt.h>
#include <machine/psl.h>
@ -110,28 +109,6 @@ x86_print_ldt(int i, const struct segment_descriptor *d)
}
#endif
int
x86_get_ldt_len(struct lwp *l)
{
#ifndef USER_LDT
return -1;
#else
pmap_t pmap = l->l_proc->p_vmspace->vm_map.pmap;
int nldt;
mutex_enter(&pmap->pm_lock);
if (pmap->pm_flags & PMF_USER_LDT) {
nldt = pmap->pm_ldt_len;
} else {
nldt = NLDT;
}
mutex_exit(&pmap->pm_lock);
return nldt;
#endif
}
int
x86_get_ldt(struct lwp *l, void *args, register_t *retval)
{
@ -188,10 +165,10 @@ x86_get_ldt1(struct lwp *l, struct x86_get_ldt_args *ua, union descriptor *cp)
ua->start + ua->num > 8192)
return (EINVAL);
mutex_enter(&pmap->pm_lock);
mutex_enter(&cpu_lock);
if (pmap->pm_flags & PMF_USER_LDT) {
nldt = pmap->pm_ldt_len;
if (pmap->pm_ldt != NULL) {
nldt = pmap->pm_ldt_len / sizeof(*lp);
lp = pmap->pm_ldt;
} else {
nldt = NLDT;
@ -199,7 +176,7 @@ x86_get_ldt1(struct lwp *l, struct x86_get_ldt_args *ua, union descriptor *cp)
}
if (ua->start > nldt) {
mutex_exit(&pmap->pm_lock);
mutex_exit(&cpu_lock);
return (EINVAL);
}
@ -215,7 +192,7 @@ x86_get_ldt1(struct lwp *l, struct x86_get_ldt_args *ua, union descriptor *cp)
#endif
memcpy(cp, lp, num * sizeof(union descriptor));
mutex_exit(&pmap->pm_lock);
mutex_exit(&cpu_lock);
return 0;
#endif
@ -258,12 +235,11 @@ x86_set_ldt1(struct lwp *l, struct x86_set_ldt_args *ua,
#ifndef USER_LDT
return EINVAL;
#else
int error, i, n, sel, free_sel;
int error, i, n, old_sel, new_sel;
struct proc *p = l->l_proc;
struct pcb *pcb = &l->l_addr->u_pcb;
pmap_t pmap = p->p_vmspace->vm_map.pmap;
size_t old_len, new_len, ldt_len, free_len;
union descriptor *old_ldt, *new_ldt, *free_ldt;
size_t old_len, new_len;
union descriptor *old_ldt, *new_ldt;
error = kauth_authorize_machdep(l->l_cred, KAUTH_MACHDEP_LDT_SET,
NULL, NULL, NULL, NULL);
@ -336,89 +312,72 @@ x86_set_ldt1(struct lwp *l, struct x86_set_ldt_args *ua,
}
}
/* allocate user ldt */
free_sel = -1;
new_ldt = NULL;
new_len = 0;
free_ldt = NULL;
free_len = 0;
mutex_enter(&pmap->pm_lock);
kpreempt_disable();
if (pmap->pm_ldt == 0 || (ua->start + ua->num) > pmap->pm_ldt_len) {
if (pmap->pm_flags & PMF_USER_LDT)
ldt_len = pmap->pm_ldt_len;
else
ldt_len = 512;
while ((ua->start + ua->num) > ldt_len)
ldt_len *= 2;
new_len = ldt_len * sizeof(union descriptor);
/*
* Install selected changes. We perform a copy, write, swap dance
* here to ensure that all updates happen atomically.
*/
mutex_exit(&pmap->pm_lock);
/* Allocate a new LDT. */
for (;;) {
new_len = (ua->start + ua->num) * sizeof(union descriptor);
new_len = max(new_len, pmap->pm_ldt_len);
new_len = max(new_len, NLDT * sizeof(union descriptor));
new_len = round_page(new_len);
new_ldt = (union descriptor *)uvm_km_alloc(kernel_map,
new_len, 0, UVM_KMF_WIRED);
memset(new_ldt, 0, new_len);
sel = ldt_alloc(new_ldt, new_len);
mutex_enter(&pmap->pm_lock);
if (pmap->pm_ldt != NULL && ldt_len <= pmap->pm_ldt_len) {
/*
* Another thread (re)allocated the LDT to
* sufficient size while we were blocked in
* uvm_km_alloc. Oh well. The new entries
* will quite probably not be right, but
* hey.. not our problem if user applications
* have race conditions like that.
*/
goto copy;
new_len, 0, UVM_KMF_WIRED | UVM_KMF_ZERO);
mutex_enter(&cpu_lock);
if (pmap->pm_ldt_len <= new_len) {
break;
}
old_ldt = pmap->pm_ldt;
free_ldt = old_ldt;
free_len = pmap->pm_ldt_len * sizeof(union descriptor);
if (old_ldt != NULL) {
old_len = pmap->pm_ldt_len * sizeof(union descriptor);
} else {
old_len = NLDT * sizeof(union descriptor);
old_ldt = ldt;
}
memcpy(new_ldt, old_ldt, old_len);
memset((char *)new_ldt + old_len, 0, new_len - old_len);
pmap->pm_ldt = new_ldt;
pmap->pm_ldt_len = ldt_len;
if (pmap->pm_flags & PMF_USER_LDT)
free_sel = pmap->pm_ldt_sel;
else {
pmap->pm_flags |= PMF_USER_LDT;
free_sel = -1;
}
pmap->pm_ldt_sel = sel;
pcb->pcb_ldt_sel = pmap->pm_ldt_sel;
if (pcb == curpcb)
lldt(pcb->pcb_ldt_sel);
new_ldt = NULL;
}
copy:
/* Now actually replace the descriptors. */
for (i = 0, n = ua->start; i < ua->num; i++, n++)
pmap->pm_ldt[n] = descv[i];
kpreempt_enable();
mutex_exit(&pmap->pm_lock);
if (new_ldt != NULL)
mutex_exit(&cpu_lock);
uvm_km_free(kernel_map, (vaddr_t)new_ldt, new_len,
UVM_KMF_WIRED);
if (free_sel != -1)
ldt_free(free_sel);
if (free_ldt != NULL)
uvm_km_free(kernel_map, (vaddr_t)free_ldt, free_len,
UVM_KMF_WIRED);
}
return (error);
/* Copy existing entries, if any. */
if (pmap->pm_ldt != NULL) {
old_ldt = pmap->pm_ldt;
old_len = pmap->pm_ldt_len;
old_sel = pmap->pm_ldt_sel;
memcpy(new_ldt, old_ldt, old_len);
} else {
old_ldt = NULL;
old_len = 0;
old_sel = -1;
memcpy(new_ldt, ldt, NLDT * sizeof(union descriptor));
}
/* Apply requested changes. */
for (i = 0, n = ua->start; i < ua->num; i++, n++) {
new_ldt[n] = descv[i];
}
/* Allocate LDT selector. */
new_sel = ldt_alloc(new_ldt, new_len);
if (new_sel == -1) {
mutex_exit(&cpu_lock);
uvm_km_free(kernel_map, (vaddr_t)new_ldt, new_len,
UVM_KMF_WIRED);
return ENOMEM;
}
/* All changes are now globally visible. Swap in the new LDT. */
pmap->pm_ldt = new_ldt;
pmap->pm_ldt_len = new_len;
pmap->pm_ldt_sel = new_sel;
/* Switch existing users onto new LDT. */
pmap_ldt_sync(pmap);
/* Free existing LDT (if any). */
if (old_ldt != NULL) {
ldt_free(old_sel);
uvm_km_free(kernel_map, (vaddr_t)old_ldt, old_len,
UVM_KMF_WIRED);
}
mutex_exit(&cpu_lock);
return error;
#endif
}

View File

@ -1,11 +1,11 @@
/* $NetBSD: linux_machdep.c,v 1.142 2009/01/11 02:45:48 christos Exp $ */
/* $NetBSD: linux_machdep.c,v 1.143 2009/03/21 14:41:30 ad Exp $ */
/*-
* Copyright (c) 1995, 2000, 2008 The NetBSD Foundation, Inc.
* Copyright (c) 1995, 2000, 2008, 2009 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by Frank van der Linden.
* by Frank van der Linden, and by Andrew Doran.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@ -30,7 +30,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.142 2009/01/11 02:45:48 christos Exp $");
__KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.143 2009/03/21 14:41:30 ad Exp $");
#if defined(_KERNEL_OPT)
#include "opt_vm86.h"
@ -62,6 +62,7 @@ __KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.142 2009/01/11 02:45:48 christos
#include <sys/ioctl.h>
#include <sys/wait.h>
#include <sys/kauth.h>
#include <sys/kmem.h>
#include <miscfs/specfs/specdev.h>
@ -553,8 +554,8 @@ linux_read_ldt(struct lwp *l, const struct linux_sys_modify_ldt_args *uap,
{
struct x86_get_ldt_args gl;
int error;
int num_ldt;
union descriptor *ldt_buf;
size_t sz;
/*
* I've checked the linux code - this function is asymetric with
@ -564,19 +565,11 @@ linux_read_ldt(struct lwp *l, const struct linux_sys_modify_ldt_args *uap,
DPRINTF(("linux_read_ldt!"));
num_ldt = x86_get_ldt_len(l);
if (num_ldt <= 0)
return EINVAL;
sz = 8192 * sizeof(*ldt_buf);
ldt_buf = kmem_zalloc(sz, KM_SLEEP);
gl.start = 0;
gl.desc = NULL;
gl.num = SCARG(uap, bytecount) / sizeof(union descriptor);
if (gl.num > num_ldt)
gl.num = num_ldt;
ldt_buf = malloc(gl.num * sizeof *ldt, M_TEMP, M_WAITOK);
error = x86_get_ldt1(l, &gl, ldt_buf);
/* NB gl.num might have changed */
if (error == 0) {
@ -584,7 +577,7 @@ linux_read_ldt(struct lwp *l, const struct linux_sys_modify_ldt_args *uap,
error = copyout(ldt_buf, SCARG(uap, ptr),
gl.num * sizeof *ldt_buf);
}
free(ldt_buf, M_TEMP);
kmem_free(ldt_buf, sz);
return error;
}

View File

@ -1,7 +1,7 @@
/* $NetBSD: init_main.c,v 1.383 2009/03/05 06:37:03 yamt Exp $ */
/* $NetBSD: init_main.c,v 1.384 2009/03/21 14:41:30 ad Exp $ */
/*-
* Copyright (c) 2008 The NetBSD Foundation, Inc.
* Copyright (c) 2008, 2009 The NetBSD Foundation, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -97,7 +97,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: init_main.c,v 1.383 2009/03/05 06:37:03 yamt Exp $");
__KERNEL_RCSID(0, "$NetBSD: init_main.c,v 1.384 2009/03/21 14:41:30 ad Exp $");
#include "opt_ddb.h"
#include "opt_ipsec.h"
@ -295,6 +295,7 @@ main(void)
kernel_lock_init();
once_init();
mutex_init(&cpu_lock, MUTEX_DEFAULT, IPL_NONE);
uvm_init();
@ -371,7 +372,6 @@ main(void)
time_init();
/* Initialize the run queues, turnstiles and sleep queues. */
mutex_init(&cpu_lock, MUTEX_DEFAULT, IPL_NONE);
sched_rqinit();
turnstile_init();
sleeptab_init(&sleeptab);