Fix a subtle ring0 escalation vulnerability in amd64, and implement a

mitigation against similar bugs.

The operations on segment registers can generate a page fault if there is
an issue when touching the in-memory gdt. Theoretically, it is never
supposed to happen, since the gdt is mapped correctly. However, in the
kernel we allow the gdt to be resized, and to do that, we allocate the
maximum amount of va needed by it, but only kenter a few pages until we
need more. Moreover, to avoid reloading the gdt each time we grow it, the
'size' field of gdtr is set to the maximum value. All of this means that
if a mov or iretq is done with a segment register whose index hits a page
that has not been kentered, a page fault is sent.

Such a page fault, if received in kernel mode, does not trigger a swapgs
on amd64; in other words, the kernel would be re-entered with the userland
tls.

And there just happens to be a place in compat_linux32 where the index of
%cs is controlled by userland, making it easy to trigger the page fault
and get kernel privileges.

The mitigation simply consists in abandoning the gdt_grow mechanism and
allocating/kentering the maximum size right away, in such a way that no
page fault can be triggered because of segment registers.
This commit is contained in:
maxv 2017-09-02 12:57:03 +00:00
parent c939a3b265
commit 5eea203ec8
3 changed files with 42 additions and 144 deletions

View File

@ -1,6 +1,6 @@
/* $NetBSD: gdt.c,v 1.40 2017/07/02 11:21:13 maxv Exp $ */
/* $NetBSD: gdt.c,v 1.41 2017/09/02 12:57:03 maxv Exp $ */
/*-
/*
* Copyright (c) 1996, 1997, 2009 The NetBSD Foundation, Inc.
* All rights reserved.
*
@ -37,7 +37,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: gdt.c,v 1.40 2017/07/02 11:21:13 maxv Exp $");
__KERNEL_RCSID(0, "$NetBSD: gdt.c,v 1.41 2017/09/02 12:57:03 maxv Exp $");
#include "opt_multiprocessor.h"
#include "opt_xen.h"
@ -66,7 +66,7 @@ typedef struct {
size_t nslots;
} gdt_bitmap_t;
size_t gdt_size; /* size of GDT in bytes */
size_t gdt_size; /* size of GDT in bytes */
static gdt_bitmap_t gdt_bitmap; /* bitmap of busy slots */
#if defined(USER_LDT) || !defined(XEN)
@ -130,21 +130,16 @@ gdt_init(void)
struct cpu_info *ci = &cpu_info_primary;
/* Initialize the global values */
gdt_size = MINGDTSIZ;
gdt_size = MAXGDTSIZ;
memset(&gdt_bitmap.busy, 0, sizeof(gdt_bitmap.busy));
gdt_bitmap.nslots = NSLOTS(gdt_size);
old_gdt = gdtstore;
/* Allocate MAXGDTSIZ bytes of virtual memory. */
gdtstore = (char *)uvm_km_alloc(kernel_map, MAXGDTSIZ, 0,
/* Allocate gdt_size bytes of memory. */
gdtstore = (char *)uvm_km_alloc(kernel_map, gdt_size, 0,
UVM_KMF_VAONLY);
/*
* Allocate only MINGDTSIZ bytes of physical memory. We will grow this
* area in gdt_grow at run-time if needed.
*/
for (va = (vaddr_t)gdtstore; va < (vaddr_t)gdtstore + MINGDTSIZ;
for (va = (vaddr_t)gdtstore; va < (vaddr_t)gdtstore + gdt_size;
va += PAGE_SIZE) {
pg = uvm_pagealloc(NULL, 0, NULL, UVM_PGA_ZERO);
if (pg == NULL) {
@ -173,15 +168,12 @@ gdt_init(void)
void
gdt_alloc_cpu(struct cpu_info *ci)
{
int max_len = MAXGDTSIZ;
int min_len = MINGDTSIZ;
struct vm_page *pg;
vaddr_t va;
ci->ci_gdt = (union descriptor *)uvm_km_alloc(kernel_map, max_len,
ci->ci_gdt = (union descriptor *)uvm_km_alloc(kernel_map, gdt_size,
0, UVM_KMF_VAONLY);
for (va = (vaddr_t)ci->ci_gdt; va < (vaddr_t)ci->ci_gdt + min_len;
for (va = (vaddr_t)ci->ci_gdt; va < (vaddr_t)ci->ci_gdt + gdt_size;
va += PAGE_SIZE) {
while ((pg = uvm_pagealloc(NULL, 0, NULL, UVM_PGA_ZERO))
== NULL) {
@ -192,7 +184,6 @@ gdt_alloc_cpu(struct cpu_info *ci)
}
pmap_update(pmap_kernel());
memset(ci->ci_gdt, 0, min_len);
memcpy(ci->ci_gdt, gdtstore, gdt_size);
}
@ -207,51 +198,11 @@ gdt_init_cpu(struct cpu_info *ci)
KASSERT(curcpu() == ci);
#ifndef XEN
setregion(&region, ci->ci_gdt, (uint16_t)(MAXGDTSIZ - 1));
#else
setregion(&region, ci->ci_gdt, (uint16_t)(gdt_size - 1));
#endif
lgdt(&region);
}
#if !defined(XEN) || defined(USER_LDT)
/*
* Grow the GDT. The GDT is present on each CPU, so we need to iterate over all
* of them. We already have the virtual memory, we only need to grow the
* physical memory.
*/
static void
gdt_grow(void)
{
size_t old_size;
CPU_INFO_ITERATOR cii;
struct cpu_info *ci;
struct vm_page *pg;
vaddr_t va;
old_size = gdt_size;
gdt_size *= 2;
if (gdt_size > MAXGDTSIZ)
gdt_size = MAXGDTSIZ;
gdt_bitmap.nslots = NSLOTS(gdt_size);
for (CPU_INFO_FOREACH(cii, ci)) {
for (va = (vaddr_t)(ci->ci_gdt) + old_size;
va < (vaddr_t)(ci->ci_gdt) + gdt_size;
va += PAGE_SIZE) {
while ((pg = uvm_pagealloc(NULL, 0, NULL, UVM_PGA_ZERO)) ==
NULL) {
uvm_wait("gdt_grow");
}
pmap_kenter_pa(va, VM_PAGE_TO_PHYS(pg),
VM_PROT_READ | VM_PROT_WRITE, 0);
}
}
pmap_update(pmap_kernel());
}
static int
gdt_get_slot(void)
{
@ -259,17 +210,14 @@ gdt_get_slot(void)
KASSERT(mutex_owned(&cpu_lock));
while (1) {
for (i = 0; i < gdt_bitmap.nslots; i++) {
if (!gdt_bitmap.busy[i]) {
gdt_bitmap.busy[i] = true;
return (int)i;
}
for (i = 0; i < gdt_bitmap.nslots; i++) {
if (!gdt_bitmap.busy[i]) {
gdt_bitmap.busy[i] = true;
return (int)i;
}
if (gdt_size >= MAXGDTSIZ)
panic("gdt_get_slot: out of memory");
gdt_grow();
}
panic("gdt_get_slot: out of memory");
/* NOTREACHED */
return 0;
}
@ -357,12 +305,12 @@ lgdt(struct region_descriptor *desc)
*/
va = desc->rd_base + desc->rd_limit + 1;
memset((void *)va, 0, roundup(va, PAGE_SIZE) - va);
/*
* The lgdt instruction uses virtual addresses, do some translation for
* Xen. Mark pages R/O too, otherwise Xen will refuse to use them.
*/
for (i = 0; i < roundup(desc->rd_limit, PAGE_SIZE) >> PAGE_SHIFT; i++) {
/*
* The lgdt instruction uses virtual addresses,
* do some translation for Xen.
* Mark pages R/O too, else Xen will refuse to use them.
*/
frames[i] = ((paddr_t) xpmap_ptetomach(
(pt_entry_t *)(desc->rd_base + (i << PAGE_SHIFT)))) >>
PAGE_SHIFT;

View File

@ -1,4 +1,4 @@
/* $NetBSD: gdt.c,v 1.65 2017/07/06 20:23:57 bouyer Exp $ */
/* $NetBSD: gdt.c,v 1.66 2017/09/02 12:57:03 maxv Exp $ */
/*
* Copyright (c) 1996, 1997, 2009 The NetBSD Foundation, Inc.
@ -30,7 +30,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: gdt.c,v 1.65 2017/07/06 20:23:57 bouyer Exp $");
__KERNEL_RCSID(0, "$NetBSD: gdt.c,v 1.66 2017/09/02 12:57:03 maxv Exp $");
#include "opt_multiprocessor.h"
#include "opt_xen.h"
@ -54,7 +54,7 @@ typedef struct {
size_t nslots;
} gdt_bitmap_t;
size_t gdt_size; /* size of GDT in bytes */
size_t gdt_size; /* size of GDT in bytes */
static gdt_bitmap_t gdt_bitmap; /* bitmap of busy slots */
#ifndef XEN
@ -63,7 +63,6 @@ static int ldt_max = 1000;/* max number of LDTs */
static void setgdt(int, const void *, size_t, int, int, int, int);
static int gdt_get_slot(void);
static void gdt_put_slot(int);
static void gdt_grow(void);
#endif
void gdt_init(void);
@ -120,21 +119,16 @@ gdt_init(void)
struct cpu_info *ci = &cpu_info_primary;
/* Initialize the global values */
gdt_size = MINGDTSIZ;
gdt_size = MAXGDTSIZ;
memset(&gdt_bitmap.busy, 0, sizeof(gdt_bitmap.busy));
gdt_bitmap.nslots = NSLOTS(gdt_size);
old_gdt = gdtstore;
/* Allocate MAXGDTSIZ bytes of virtual memory. */
gdtstore = (union descriptor *)uvm_km_alloc(kernel_map, MAXGDTSIZ, 0,
/* Allocate gdt_size bytes of memory. */
gdtstore = (union descriptor *)uvm_km_alloc(kernel_map, gdt_size, 0,
UVM_KMF_VAONLY);
/*
* Allocate only MINGDTSIZ bytes of physical memory. We will grow this
* area in gdt_grow at run-time if needed.
*/
for (va = (vaddr_t)gdtstore; va < (vaddr_t)gdtstore + MINGDTSIZ;
for (va = (vaddr_t)gdtstore; va < (vaddr_t)gdtstore + gdt_size;
va += PAGE_SIZE) {
pg = uvm_pagealloc(NULL, 0, NULL, UVM_PGA_ZERO);
if (pg == NULL) {
@ -161,15 +155,12 @@ gdt_init(void)
void
gdt_alloc_cpu(struct cpu_info *ci)
{
int max_len = MAXGDTSIZ;
int min_len = MINGDTSIZ;
struct vm_page *pg;
vaddr_t va;
ci->ci_gdt = (union descriptor *)uvm_km_alloc(kernel_map, max_len,
ci->ci_gdt = (union descriptor *)uvm_km_alloc(kernel_map, gdt_size,
0, UVM_KMF_VAONLY);
for (va = (vaddr_t)ci->ci_gdt; va < (vaddr_t)ci->ci_gdt + min_len;
for (va = (vaddr_t)ci->ci_gdt; va < (vaddr_t)ci->ci_gdt + gdt_size;
va += PAGE_SIZE) {
while ((pg = uvm_pagealloc(NULL, 0, NULL, UVM_PGA_ZERO))
== NULL) {
@ -180,7 +171,6 @@ gdt_alloc_cpu(struct cpu_info *ci)
}
pmap_update(pmap_kernel());
memset(ci->ci_gdt, 0, min_len);
memcpy(ci->ci_gdt, gdtstore, gdt_size);
setsegment(&ci->ci_gdt[GCPU_SEL].sd, ci,
@ -196,10 +186,8 @@ gdt_init_cpu(struct cpu_info *ci)
{
#ifndef XEN
struct region_descriptor region;
size_t max_len;
max_len = MAXGDTSIZ;
setregion(&region, ci->ci_gdt, max_len - 1);
setregion(&region, ci->ci_gdt, gdt_size - 1);
lgdt(&region);
#else
size_t len = roundup(gdt_size, PAGE_SIZE);
@ -234,42 +222,6 @@ gdt_init_cpu(struct cpu_info *ci)
}
#ifndef XEN
/*
* Grow the GDT. The GDT is present on each CPU, so we need to iterate over all
* of them. We already have the virtual memory, we only need to grow the
* physical memory.
*/
static void
gdt_grow(void)
{
size_t old_size;
CPU_INFO_ITERATOR cii;
struct cpu_info *ci;
struct vm_page *pg;
vaddr_t va;
old_size = gdt_size;
gdt_size *= 2;
if (gdt_size > MAXGDTSIZ)
gdt_size = MAXGDTSIZ;
gdt_bitmap.nslots = NSLOTS(gdt_size);
for (CPU_INFO_FOREACH(cii, ci)) {
for (va = (vaddr_t)(ci->ci_gdt) + old_size;
va < (vaddr_t)(ci->ci_gdt) + gdt_size;
va += PAGE_SIZE) {
while ((pg = uvm_pagealloc(NULL, 0, NULL, UVM_PGA_ZERO)) ==
NULL) {
uvm_wait("gdt_grow");
}
pmap_kenter_pa(va, VM_PAGE_TO_PHYS(pg),
VM_PROT_READ | VM_PROT_WRITE, 0);
}
}
pmap_update(pmap_kernel());
}
static int
gdt_get_slot(void)
{
@ -277,17 +229,14 @@ gdt_get_slot(void)
KASSERT(mutex_owned(&cpu_lock));
while (1) {
for (i = 0; i < gdt_bitmap.nslots; i++) {
if (!gdt_bitmap.busy[i]) {
gdt_bitmap.busy[i] = true;
return (int)i;
}
for (i = 0; i < gdt_bitmap.nslots; i++) {
if (!gdt_bitmap.busy[i]) {
gdt_bitmap.busy[i] = true;
return (int)i;
}
if (gdt_size >= MAXGDTSIZ)
panic("gdt_get_slot: out of memory");
gdt_grow();
}
panic("gdt_get_slot: out of memory");
/* NOTREACHED */
return 0;
}

View File

@ -1,4 +1,4 @@
/* $NetBSD: linux32_machdep.c,v 1.38 2017/02/05 08:52:11 maxv Exp $ */
/* $NetBSD: linux32_machdep.c,v 1.39 2017/09/02 12:57:03 maxv Exp $ */
/*-
* Copyright (c) 2006 Emmanuel Dreyfus, all rights reserved.
@ -31,7 +31,7 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: linux32_machdep.c,v 1.38 2017/02/05 08:52:11 maxv Exp $");
__KERNEL_RCSID(0, "$NetBSD: linux32_machdep.c,v 1.39 2017/09/02 12:57:03 maxv Exp $");
#include <sys/param.h>
#include <sys/proc.h>
@ -417,8 +417,9 @@ linux32_restore_sigcontext(struct lwp *l, struct linux32_sigcontext *scp,
/*
* Check for security violations.
*/
if (((scp->sc_eflags ^ tf->tf_rflags) & PSL_USERSTATIC) != 0 ||
!USERMODE(scp->sc_cs, scp->sc_eflags))
if (((scp->sc_eflags ^ tf->tf_rflags) & PSL_USERSTATIC) != 0)
return EINVAL;
if (!VALID_USER_CSEL32(scp->sc_cs))
return EINVAL;
if (scp->sc_fs != 0 && !VALID_USER_DSEL32(scp->sc_fs) &&