Rename MDP_IRET to MDL_IRET since it is an lwp flag, not a proc one.

Add an MDL_COMPAT32 flag to the lwp's md_flags, set it for 32bit lwps
  and use it to force 'return to user' with iret (as is done when
  MDL_IRET is set).
Split the iret/sysret code paths much later.
Remove all the replicated code for 32bit system calls - which was only
  needed so that iret was always used.
frameasm.h for XEN contains '#define swapgs', while XEN probable never
  needs swapgs, this is likely to be confusing.
Add a SWAPGS which is a nop on XEN and swapgs otherwise.
(I've not yet checked all the swapgs in files that include frameasm.h)
Simple x86 programs still work.
Hijack 6.99.9 kernel bump (needed for compat32 modules)
This commit is contained in:
dsl 2012-07-15 15:17:56 +00:00
parent 1fdd2bae8c
commit c578e8d211
10 changed files with 96 additions and 146 deletions

View File

@ -1,4 +1,4 @@
# $NetBSD: genassym.cf,v 1.51 2012/06/11 15:18:05 chs Exp $
# $NetBSD: genassym.cf,v 1.52 2012/07/15 15:17:56 dsl Exp $
#
# Copyright (c) 1998, 2006, 2007, 2008 The NetBSD Foundation, Inc.
@ -162,7 +162,8 @@ define L_MD_ASTPENDING offsetof(struct lwp, l_md.md_astpending)
define PAGE_SIZE PAGE_SIZE
define MDP_IRET MDP_IRET
define MDL_IRET MDL_IRET
define MDL_COMPAT32 MDL_COMPAT32
define P_FLAG offsetof(struct proc, p_flag)
define P_RASLIST offsetof(struct proc, p_raslist)

View File

@ -1,4 +1,4 @@
/* $NetBSD: locore.S,v 1.69 2012/06/16 17:30:19 chs Exp $ */
/* $NetBSD: locore.S,v 1.70 2012/07/15 15:17:56 dsl Exp $ */
/*
* Copyright-o-rama!
@ -986,9 +986,9 @@ ENTRY(cpu_switchto)
xorq %rax, %rax
movw %ax, %fs
CLI(cx)
swapgs
SWAPGS
movw %ax, %gs
swapgs
SWAPGS
STI(cx)
movq CPUVAR(GDT),%rcx
@ -1018,9 +1018,9 @@ ENTRY(cpu_switchto)
movq L_MD_REGS(%r12), %rbx
movw TF_FS(%rbx), %fs
CLI(ax)
swapgs
SWAPGS
movw TF_GS(%rbx), %gs
swapgs
SWAPGS
STI(ax)
#else
@ -1063,40 +1063,50 @@ IDTVEC(syscall32)
/*
* syscall()
*
* syscall insn entry. This currently isn't much faster, but
* it can be made faster in the future.
* syscall insn entry.
* This currently isn't much faster, but it can be made faster in the future.
* (Actually we've already saved a few 100 clocks by not loading the trap gate)
*/
IDTVEC(syscall)
#ifndef XEN
/*
* The user %rip is in %rcx and the user %flags in %r11.
* The kernel %cs and %ss are loaded, but nothing else is.
* The 'swapgs' gives us access to cpu-specific memory where
* we can save a user register and then read the lwps
* kernel stack pointer,
* This code doesn't seem to set %ds, this may not matter since it
* is ignored in 64bit mode, OTOH the syscall instruction sets %ss
* and that is ignored as well.
*/
swapgs
movq %r15,CPUVAR(SCRATCH)
movq CPUVAR(CURLWP),%r15
movq L_PCB(%r15),%r15
movq PCB_RSP0(%r15),%r15
xchgq %r15,%rsp
movq PCB_RSP0(%r15),%r15 /* LWP's kernel stack pointer */
/*
* XXX don't need this whole frame, split of the
* syscall frame and trapframe is needed.
* First, leave some room for the trapno, error,
* ss:rsp, etc, so that all GP registers can be
* saved. Then, fill in the rest.
*/
pushq $(LSEL(LUDATA_SEL, SEL_UPL)) /* Known to be user ss */
pushq %r15 /* User space rsp */
/* Make stack look like an 'int nn' frame */
#define SP(x) (x)-(TF_SS+8)(%r15)
movq $(LSEL(LUDATA_SEL, SEL_UPL)), SP(TF_SS) /* user %ss */
movq %rsp, SP(TF_RSP) /* User space rsp */
movq %r11, SP(TF_RFLAGS) /* old rflags from syscall insn */
movq $(LSEL(LUCODE_SEL, SEL_UPL)), SP(TF_CS)
movq %rcx, SP(TF_RIP) /* syscall saves rip in rcx */
leaq SP(0),%rsp /* %rsp now valid after frame */
movq CPUVAR(SCRATCH),%r15
subq $TF_REGSIZE+(TF_RSP-TF_TRAPNO),%rsp
#undef SP
movq $2,TF_ERR(%rsp) /* syscall instruction size */
movq $T_ASTFLT, TF_TRAPNO(%rsp)
movw %es,TF_ES(%rsp)
sti
INTR_SAVE_GPRS
movw %fs,TF_FS(%rsp)
movw %gs,TF_GS(%rsp)
movw $(LSEL(LUDATA_SEL, SEL_UPL)),TF_DS(%rsp)
movq %r11, TF_RFLAGS(%rsp) /* old rflags from syscall insn */
movq $(LSEL(LUCODE_SEL, SEL_UPL)), TF_CS(%rsp)
movq %rcx,TF_RIP(%rsp) /* syscall saves rip in rcx */
movq $2,TF_ERR(%rsp)
movq $T_ASTFLT, TF_TRAPNO(%rsp)
#else
/* Xen already switched to kernel stack */
pushq %rsi
@ -1113,44 +1123,59 @@ IDTVEC(syscall)
movw $(LSEL(LUDATA_SEL, SEL_UPL)),TF_DS(%rsp)
#endif
do_syscall:
movq CPUVAR(CURLWP),%r14
incq CPUVAR(NSYSCALL) # count it atomically
movq %rsp,L_MD_REGS(%r14) # save pointer to frame
movq L_PROC(%r14),%r15
andl $~MDP_IRET,L_MD_FLAGS(%r14)
andl $~MDL_IRET,L_MD_FLAGS(%r14) /* Allow sysret return */
movq %rsp,%rdi /* Pass frame as arg0 */
call *P_MD_SYSCALL(%r15)
.Lsyscall_checkast:
/* Check for ASTs on exit to user mode. */
/*
* Disable interrupts to avoid new ASTs (etc) being added and
* to ensure we don't take an interrupt with some of the user
* registers loaded.
*/
CLI(si)
/* Check for ASTs on exit to user mode. */
movl L_MD_ASTPENDING(%r14), %eax
orl CPUVAR(WANT_PMAPLOAD), %eax
jnz 9f
testl $MDP_IRET, L_MD_FLAGS(%r14)
jne iret_return;
#ifdef DIAGNOSTIC
cmpl $IPL_NONE,CPUVAR(ILEVEL)
jne 3f
#endif
movw TF_ES(%rsp),%es
#ifndef XEN
swapgs
#endif
testl $(MDL_IRET | MDL_COMPAT32), L_MD_FLAGS(%r14)
INTR_RESTORE_GPRS
movw $(LSEL(LUDATA_SEL, SEL_UPL)), %r11w
movw %r11w,%ds
addq $TF_REGSIZE+16,%rsp /* + T_xxx and error code */
movw TF_ES(%rsp),%es
SWAPGS
jnz 2f
#ifndef XEN
popq %rcx /* return rip */
addq $8,%rsp /* discard cs */
popq %r11 /* flags as set by sysret insn */
movq %ss:(%rsp),%rsp
movq TF_RIP(%rsp), %rcx /* %rip for sysret */
movq TF_RFLAGS(%rsp), %r11 /* %flags for sysret */
movw TF_DS(%rsp), %ds
movq TF_RSP(%rsp), %rsp
sysretq
#else
movw TF_DS(%rsp), %ds
addq $TF_RIP, %rsp
pushq $256 /* VGCF_IN_SYSCALL */
jmp HYPERVISOR_iret
#endif
/*
* If the syscall might have modified some registers, or we are a 32bit
* process we must return to user with an 'iret' instruction.
* If the iret faults in kernel (assumed due to illegal register values)
* then a SIGSEGV will be signalled.
*/
2:
movw TF_DS(%rsp), %ds
addq $TF_RIP, %rsp
iretq
/* Report SPL error */
#ifdef DIAGNOSTIC
3: movabsq $4f, %rdi
movl TF_RAX(%rsp),%esi
@ -1164,6 +1189,8 @@ IDTVEC(syscall)
jmp .Lsyscall_checkast
4: .asciz "WARNING: SPL NOT LOWERED ON SYSCALL %d %d EXIT %x %x\n"
#endif
/* AST pending or pmap load needed */
9:
cmpl $0, CPUVAR(WANT_PMAPLOAD)
jz 10f
@ -1182,26 +1209,9 @@ IDTVEC(syscall)
* void lwp_trampoline(void);
*
* This is a trampoline function pushed run by newly created LWPs
* in order to do additional setup in their context. 32-bit
* binaries begin life here.
* in order to do additional setup in their context.
*/
NENTRY(lwp_trampoline)
movq %rbp,%rsi
movq %rbp,%r14 /* for .Losyscall_checkast */
movq %rax,%rdi
xorq %rbp,%rbp
call _C_LABEL(lwp_startup)
movq %r13,%rdi
call *%r12
jmp .Losyscall_checkast
/* NOTREACHED */
/*
* void child_trampoline(void);
*
* As per lwp_trampoline(), but 64-bit binaries start here.
*/
NENTRY(child_trampoline)
movq %rbp,%rsi
movq %rbp,%r14 /* for .Lsyscall_checkast */
movq %rax,%rdi
@ -1211,8 +1221,6 @@ NENTRY(child_trampoline)
call *%r12
jmp .Lsyscall_checkast
.globl _C_LABEL(osyscall_return)
/*
* oosyscall()
*
@ -1249,50 +1257,7 @@ osyscall1:
pushq $T_ASTFLT # trap # for doing ASTs
INTRENTRY
STI(si)
movq CPUVAR(CURLWP),%r14
movq %rsp,L_MD_REGS(%r14) # save pointer to frame
movq L_PROC(%r14),%rdx
movq %rsp,%rdi
call *P_MD_SYSCALL(%rdx)
_C_LABEL(osyscall_return):
.Losyscall_checkast:
/* Check for ASTs on exit to user mode. */
CLI(si)
movl L_MD_ASTPENDING(%r14), %eax
orl CPUVAR(WANT_PMAPLOAD), %eax
jnz 9f
iret_return:
#ifdef DIAGNOSTIC
cmpl $IPL_NONE,CPUVAR(ILEVEL)
jne 3f
#endif
INTRFASTEXIT
#ifdef DIAGNOSTIC
3: movabsq $4f, %rdi
movl TF_RAX(%rsp),%esi
movl TF_RDI(%rsp),%edx
movl %ebx,%ecx
movl CPUVAR(ILEVEL),%r8d
xorq %rax,%rax
call _C_LABEL(printf)
movl $IPL_NONE,%edi
call _C_LABEL(spllower)
jmp .Losyscall_checkast
4: .asciz "WARNING: SPL NOT LOWERED ON SYSCALL %d %d EXIT %x %x\n"
#endif
9:
cmpl $0, CPUVAR(WANT_PMAPLOAD)
jz 10f
STI(si)
call _C_LABEL(do_pmap_load)
jmp .Losyscall_checkast /* re-check ASTs */
10:
CLEAR_ASTPENDING(%r14)
STI(si)
/* Pushed T_ASTFLT into tf_trapno on entry. */
movq %rsp,%rdi
call _C_LABEL(trap)
jmp .Losyscall_checkast /* re-check ASTs */
jmp do_syscall
/*
* bool sse2_idlezero_page(void *pg)

View File

@ -1,4 +1,4 @@
/* $NetBSD: machdep.c,v 1.188 2012/07/08 20:14:11 dsl Exp $ */
/* $NetBSD: machdep.c,v 1.189 2012/07/15 15:17:56 dsl Exp $ */
/*-
* Copyright (c) 1996, 1997, 1998, 2000, 2006, 2007, 2008, 2011
@ -111,7 +111,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.188 2012/07/08 20:14:11 dsl Exp $");
__KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.189 2012/07/15 15:17:56 dsl Exp $");
/* #define XENDEBUG_LOW */
@ -2054,7 +2054,7 @@ cpu_setmcontext(struct lwp *l, const mcontext_t *mcp, unsigned int flags)
tf->tf_err = err;
tf->tf_trapno = trapno;
l->l_md.md_flags |= MDP_IRET;
l->l_md.md_flags |= MDL_IRET;
}
if (pcb->pcb_fpcpu != NULL)

View File

@ -1,4 +1,4 @@
/* $NetBSD: netbsd32_machdep.c,v 1.78 2012/07/08 20:14:11 dsl Exp $ */
/* $NetBSD: netbsd32_machdep.c,v 1.79 2012/07/15 15:17:56 dsl Exp $ */
/*
* Copyright (c) 2001 Wasabi Systems, Inc.
@ -36,7 +36,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: netbsd32_machdep.c,v 1.78 2012/07/08 20:14:11 dsl Exp $");
__KERNEL_RCSID(0, "$NetBSD: netbsd32_machdep.c,v 1.79 2012/07/15 15:17:56 dsl Exp $");
#ifdef _KERNEL_OPT
#include "opt_compat_netbsd.h"
@ -83,8 +83,6 @@ __KERNEL_RCSID(0, "$NetBSD: netbsd32_machdep.c,v 1.78 2012/07/08 20:14:11 dsl Ex
const char machine32[] = "i386";
const char machine_arch32[] = "i386";
extern void (osyscall_return)(void);
#ifdef MTRR
static int x86_64_get_mtrr32(struct lwp *, void *, register_t *);
static int x86_64_set_mtrr32(struct lwp *, void *, register_t *);
@ -127,7 +125,6 @@ netbsd32_setregs(struct lwp *l, struct exec_package *pack, vaddr_t stack)
struct pcb *pcb;
struct trapframe *tf;
struct proc *p = l->l_proc;
void **retaddr;
pcb = lwp_getpcb(l);
@ -143,6 +140,7 @@ netbsd32_setregs(struct lwp *l, struct exec_package *pack, vaddr_t stack)
netbsd32_adjust_limits(p);
l->l_md.md_flags &= ~MDL_USEDFPU;
l->l_md.md_flags |= MDL_COMPAT32; /* Force iret not sysret */
pcb->pcb_flags = PCB_COMPAT32;
pcb->pcb_savefpu.fp_fxsave.fx_fcw = __NetBSD_NPXCW__;
pcb->pcb_savefpu.fp_fxsave.fx_mxcsr = __INITIAL_MXCSR__;
@ -167,10 +165,6 @@ netbsd32_setregs(struct lwp *l, struct exec_package *pack, vaddr_t stack)
tf->tf_rflags = PSL_USERSET;
tf->tf_rsp = stack;
tf->tf_ss = LSEL(LUDATA32_SEL, SEL_UPL);
/* XXX frob return address to return via old iret method, not sysret */
retaddr = (void **)tf - 1;
*retaddr = (void *)osyscall_return;
}
#ifdef COMPAT_16

View File

@ -1,4 +1,4 @@
/* $NetBSD: trap.c,v 1.72 2012/06/30 23:33:10 rmind Exp $ */
/* $NetBSD: trap.c,v 1.73 2012/07/15 15:17:56 dsl Exp $ */
/*-
* Copyright (c) 1998, 2000 The NetBSD Foundation, Inc.
@ -68,7 +68,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: trap.c,v 1.72 2012/06/30 23:33:10 rmind Exp $");
__KERNEL_RCSID(0, "$NetBSD: trap.c,v 1.73 2012/07/15 15:17:56 dsl Exp $");
#include "opt_ddb.h"
#include "opt_kgdb.h"
@ -343,7 +343,7 @@ kernelfault:
switch (*(uint16_t *)frame->tf_rip) {
case 0xcf48: /* iretq */
/*
* The 'iretq' instruction faulted, wo we have the
* The 'iretq' instruction faulted, so we have the
* 'user' registers saved after the kernel
* %rip:%cs:%fl:%rsp:%ss of the iret, and below that
* the user %rip:%cs:%fl:%rsp:%ss the 'iret' was

View File

@ -1,4 +1,4 @@
/* $NetBSD: frameasm.h,v 1.19 2012/05/17 19:38:53 dsl Exp $ */
/* $NetBSD: frameasm.h,v 1.20 2012/07/15 15:17:56 dsl Exp $ */
#ifndef _AMD64_MACHINE_FRAMEASM_H
#define _AMD64_MACHINE_FRAMEASM_H
@ -35,6 +35,8 @@
#define STI(temp_reg) sti
#endif /* XEN */
#define SWAPGS NOT_XEN(swapgs)
/*
* These are used on interrupt or trap entry or exit.
*/
@ -79,7 +81,7 @@
testb $SEL_UPL,TF_CS(%rsp) ; \
je kernel_trap ; \
usertrap ; \
swapgs ; \
SWAPGS ; \
movw %gs,TF_GS(%rsp) ; \
movw %fs,TF_FS(%rsp) ; \
movw %es,TF_ES(%rsp) ; \
@ -93,11 +95,11 @@ usertrap ; \
INTR_RESTORE_GPRS ; \
testq $SEL_UPL,TF_CS(%rsp) /* Interrupted %cs */ ; \
je 99f ; \
/* XEN: Disabling events before going to user mode sounds like a BAD idea */ \
/* Disable interrupts until the 'iret', user registers loaded. */ \
NOT_XEN(cli;) \
movw TF_ES(%rsp),%es ; \
movw TF_DS(%rsp),%ds ; \
swapgs ; \
SWAPGS ; \
99: addq $TF_REGSIZE+16,%rsp /* + T_xxx and error code */ ; \
iretq

View File

@ -1,4 +1,4 @@
/* $NetBSD: proc.h,v 1.14 2012/07/08 20:14:11 dsl Exp $ */
/* $NetBSD: proc.h,v 1.15 2012/07/15 15:17:56 dsl Exp $ */
/*
* Copyright (c) 1991 Regents of the University of California.
@ -54,6 +54,8 @@ struct mdlwp {
};
#define MDL_USEDFPU 0x0001 /* has used the FPU */
#define MDL_COMPAT32 0x0008 /* i386, always return via iret */
#define MDL_IRET 0x0010 /* force return via iret, not sysret */
struct mdproc {
int md_flags;
@ -62,10 +64,7 @@ struct mdproc {
};
/* md_flags */
#define MDP_COMPAT 0x0002 /* x86 compatibility process */
#define MDP_SYSCALL 0x0004 /* entered kernel via syscall ins */
#define MDP_USEDMTRR 0x0008 /* has set volatile MTRRs */
#define MDP_IRET 0x0010 /* return via iret, not sysret */
#define UAREA_PCB_OFFSET (USPACE - ALIGN(sizeof(struct pcb)))
#define KSTACK_LOWEST_ADDR(l) \

View File

@ -1,4 +1,4 @@
/* $NetBSD: cpu.h,v 1.51 2012/06/16 17:30:18 chs Exp $ */
/* $NetBSD: cpu.h,v 1.52 2012/07/15 15:17:56 dsl Exp $ */
/*-
* Copyright (c) 1990 The Regents of the University of California.
@ -407,7 +407,6 @@ void lgdt_finish(void);
struct pcb;
void savectx(struct pcb *);
void lwp_trampoline(void);
void child_trampoline(void);
#ifdef XEN
void startrtclock(void);
void xen_delay(unsigned int);

View File

@ -1,4 +1,4 @@
/* $NetBSD: vm_machdep.c,v 1.15 2012/02/19 21:06:35 rmind Exp $ */
/* $NetBSD: vm_machdep.c,v 1.16 2012/07/15 15:17:56 dsl Exp $ */
/*-
* Copyright (c) 1982, 1986 The Regents of the University of California.
@ -80,7 +80,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: vm_machdep.c,v 1.15 2012/02/19 21:06:35 rmind Exp $");
__KERNEL_RCSID(0, "$NetBSD: vm_machdep.c,v 1.16 2012/07/15 15:17:56 dsl Exp $");
#include "opt_mtrr.h"
@ -224,10 +224,7 @@ cpu_lwp_fork(struct lwp *l1, struct lwp *l2, void *stack, size_t stacksize,
#ifdef __x86_64__
sf->sf_r12 = (uint64_t)func;
sf->sf_r13 = (uint64_t)arg;
if (func == child_return && !(l2->l_proc->p_flag & PK_32))
sf->sf_rip = (uint64_t)child_trampoline;
else
sf->sf_rip = (uint64_t)lwp_trampoline;
sf->sf_rip = (uint64_t)lwp_trampoline;
pcb2->pcb_rsp = (uint64_t)sf;
pcb2->pcb_rbp = (uint64_t)l2;
#else

View File

@ -1,4 +1,4 @@
/* $NetBSD: linux32_machdep.c,v 1.30 2012/07/08 20:14:12 dsl Exp $ */
/* $NetBSD: linux32_machdep.c,v 1.31 2012/07/15 15:17:56 dsl Exp $ */
/*-
* Copyright (c) 2006 Emmanuel Dreyfus, all rights reserved.
@ -31,7 +31,7 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: linux32_machdep.c,v 1.30 2012/07/08 20:14:12 dsl Exp $");
__KERNEL_RCSID(0, "$NetBSD: linux32_machdep.c,v 1.31 2012/07/15 15:17:56 dsl Exp $");
#include <sys/param.h>
#include <sys/proc.h>
@ -72,8 +72,6 @@ extern char linux32_sigcode[];
extern char linux32_rt_sigcode[];
extern char linux32_esigcode[];
extern void (osyscall_return)(void);
static void linux32_save_ucontext(struct lwp *, struct trapframe *,
const sigset_t *, struct sigaltstack *, struct linux32_ucontext *);
static void linux32_save_sigcontext(struct lwp *, struct trapframe *,
@ -274,7 +272,6 @@ linux32_setregs(struct lwp *l, struct exec_package *pack, u_long stack)
struct pcb *pcb = lwp_getpcb(l);
struct trapframe *tf;
struct proc *p = l->l_proc;
void **retaddr;
/* If we were using the FPU, forget about it. */
if (pcb->pcb_fpcpu != NULL)
@ -287,6 +284,7 @@ linux32_setregs(struct lwp *l, struct exec_package *pack, u_long stack)
netbsd32_adjust_limits(p);
l->l_md.md_flags &= ~MDL_USEDFPU;
l->l_md.md_flags |= MDL_COMPAT32; /* Forces iret not sysret */
pcb->pcb_flags = PCB_COMPAT32;
pcb->pcb_savefpu.fp_fxsave.fx_fcw = __Linux_NPXCW__;
pcb->pcb_savefpu.fp_fxsave.fx_mxcsr = __INITIAL_MXCSR__;
@ -319,11 +317,6 @@ linux32_setregs(struct lwp *l, struct exec_package *pack, u_long stack)
tf->tf_es = GSEL(GUDATA32_SEL, SEL_UPL);
cpu_fsgs_zero(l);
cpu_fsgs_reload(l, GSEL(GUDATA32_SEL, SEL_UPL), GSEL(GUDATA32_SEL, SEL_UPL));
/* XXX frob return address to return via old iret method, not sysret */
retaddr = (void **)tf - 1;
*retaddr = (void *)osyscall_return;
return;
}
static void