diff --git a/headers/private/kernel/arch/x86/arch_cpu.h b/headers/private/kernel/arch/x86/arch_cpu.h index 836b27f7b1..dc0bafab4f 100644 --- a/headers/private/kernel/arch/x86/arch_cpu.h +++ b/headers/private/kernel/arch/x86/arch_cpu.h @@ -36,6 +36,8 @@ #define IA32_MSR_MTRR_PHYSICAL_BASE_0 0x200 #define IA32_MSR_MTRR_PHYSICAL_MASK_0 0x201 +#define IA32_MSR_EFER 0xc0000080 + // x86_64 MSRs. #define IA32_MSR_STAR 0xc0000081 #define IA32_MSR_LSTAR 0xc0000082 diff --git a/src/system/kernel/arch/x86/x86_signals.cpp b/src/system/kernel/arch/x86/32/signals.cpp similarity index 100% rename from src/system/kernel/arch/x86/x86_signals.cpp rename to src/system/kernel/arch/x86/32/signals.cpp diff --git a/src/system/kernel/arch/x86/x86_signals_asm.S b/src/system/kernel/arch/x86/32/signals_asm.S similarity index 100% rename from src/system/kernel/arch/x86/x86_signals_asm.S rename to src/system/kernel/arch/x86/32/signals_asm.S diff --git a/src/system/kernel/arch/x86/x86_syscalls.cpp b/src/system/kernel/arch/x86/32/syscalls.cpp similarity index 98% rename from src/system/kernel/arch/x86/x86_syscalls.cpp rename to src/system/kernel/arch/x86/32/syscalls.cpp index bd14bb0faf..3b3ffedba1 100644 --- a/src/system/kernel/arch/x86/x86_syscalls.cpp +++ b/src/system/kernel/arch/x86/32/syscalls.cpp @@ -76,7 +76,7 @@ init_amd_syscall_registers(void* dummy, int cpuNum) void -x86_initialize_commpage_syscall(void) +x86_initialize_syscall(void) { void* syscallCode = (void *)&_user_syscall_int; void* syscallCodeEnd = &_user_syscall_int_end; diff --git a/src/system/kernel/arch/x86/syscall.S b/src/system/kernel/arch/x86/32/syscalls_asm.S similarity index 100% rename from src/system/kernel/arch/x86/syscall.S rename to src/system/kernel/arch/x86/32/syscalls_asm.S diff --git a/src/system/kernel/arch/x86/64/arch.S b/src/system/kernel/arch/x86/64/arch.S index 7ec20cb39a..cbaeec86cd 100644 --- a/src/system/kernel/arch/x86/64/arch.S +++ b/src/system/kernel/arch/x86/64/arch.S @@ -13,6 +13,7 @@ #include #include "asm_offsets.h" +#include "syscall_numbers.h" .text @@ -117,6 +118,17 @@ FUNCTION(x86_swap_pgdir): FUNCTION_END(x86_swap_pgdir) +/* thread exit stub - copied to the userspace stack in arch_thread_enter_uspace() */ +.align 8 +FUNCTION(x86_userspace_thread_exit): + movq %rax, %rdi + movq $SYSCALL_EXIT_THREAD, %rax + syscall +.align 8 +FUNCTION_END(x86_userspace_thread_exit) +SYMBOL(x86_end_userspace_thread_exit): + + null_idt_descr: .word 0 .quad 0 diff --git a/src/system/kernel/arch/x86/64/interrupts.S b/src/system/kernel/arch/x86/64/interrupts.S index 7987ed27a8..1169556828 100644 --- a/src/system/kernel/arch/x86/64/interrupts.S +++ b/src/system/kernel/arch/x86/64/interrupts.S @@ -10,6 +10,8 @@ #include #include "asm_offsets.h" +#include "syscall_numbers.h" +#include "syscall_table.h" // Push the remainder of the interrupt frame onto the stack. @@ -127,11 +129,15 @@ FUNCTION(int_bottom): // Frame pointer is the iframe. movq %rsp, %rbp + // TODO: Kernel entry work. + // Call the interrupt handler. movq %rsp, %rdi movq IFRAME_vector(%rsp), %rax call *gInterruptHandlerTable(, %rax, 8) + // TODO: Kernel exit work. + // Restore the saved registers. RESTORE_IFRAME() @@ -145,6 +151,73 @@ FUNCTION(int_bottom): FUNCTION_END(int_bottom) +// SYSCALL entry point. +FUNCTION(x86_64_syscall_entry): + // Upon entry, RSP still points at the user stack. Load the kernel GS + // segment base address, which points at the current thread's arch_thread + // structure. This contains our kernel stack pointer and a temporary + // scratch space to store the user stack pointer in before we can push it + // to the stack. + swapgs + movq %rsp, %gs:ARCH_THREAD_user_rsp + movq %gs:ARCH_THREAD_syscall_rsp, %rsp + + // Set up an iframe on the stack (R11 = saved RFLAGS, RCX = saved RIP). + push $USER_DATA_SEG // ss + push %gs:ARCH_THREAD_user_rsp // rsp + push %r11 // flags + push $USER_CODE_SEG // cs + push %rcx // ip + push $0 // error_code + push $0 // vector + PUSH_IFRAME_BOTTOM(IFRAME_TYPE_SYSCALL) + + // Frame pointer is the iframe. + movq %rsp, %rbp + + // TODO: Kernel entry work. + + // Check whether the syscall number is valid. + cmp $SYSCALL_COUNT, %rax + jae .Lbad_syscall_number + + // Get the system call table entry. Note I'm hardcoding the shift because + // sizeof(syscall_info) is 16 and scale factors of 16 aren't supported, + // so can't just do leaq kSyscallInfos(, %rax, SYSCALL_INFO_sizeof). + shl $4, %rax + leaq kSyscallInfos(, %rax, 1), %r12 + + // Move 4th argument to the correct register, it is put in R10 as RCX is + // used by SYSCALL. + movq %r10, %rcx + + // TODO: > 6 arguments. + + // No longer need interrupts disabled. + sti + + // Call the function and save its return value. + call *SYSCALL_INFO_function(%r12) + movq %rax, IFRAME_ax(%rsp) + + cli +.Lbad_syscall_number: + // TODO: Kernel exit work, check for canonical return address, syscall restart. + + // Restore the iframe and RCX/R11 for SYSRET. + RESTORE_IFRAME() + addq $16, %rsp + pop %rcx + addq $8, %rsp + pop %r11 + pop %rsp + + // Restore previous GS base and return. + swapgs + sysretq +FUNCTION_END(x86_64_syscall_entry) + + /*! \fn void x86_return_to_userland(iframe* frame) \brief Returns to the userland environment given by \a frame. @@ -158,5 +231,14 @@ FUNCTION_END(int_bottom) \param frame The iframe defining the userland environment. */ FUNCTION(x86_return_to_userland): - ud2a + movq %rdi, %rbp + movq %rbp, %rsp + + // TODO: Kernel exit work. + + // Restore the frame and return. + RESTORE_IFRAME() + addq $16, %rsp + swapgs + iretq FUNCTION_END(x86_return_to_userland) diff --git a/src/system/kernel/arch/x86/64/signals.cpp b/src/system/kernel/arch/x86/64/signals.cpp new file mode 100644 index 0000000000..0b07851096 --- /dev/null +++ b/src/system/kernel/arch/x86/64/signals.cpp @@ -0,0 +1,26 @@ +/* + * Copyright 2012, Alex Smith, alex@alex-smith.me.uk. + * Distributed under the terms of the MIT License. + */ + + +#include "x86_signals.h" + +#include + +#include + +#include +#include +#include +#include + +#include "syscall_numbers.h" + + +void +x86_initialize_commpage_signal_handler() +{ + // TODO x86_64 +} + diff --git a/src/system/kernel/arch/x86/64/stubs.cpp b/src/system/kernel/arch/x86/64/stubs.cpp index 292572cc25..9ff4e715bc 100644 --- a/src/system/kernel/arch/x86/64/stubs.cpp +++ b/src/system/kernel/arch/x86/64/stubs.cpp @@ -37,20 +37,6 @@ #include -status_t -arch_commpage_init(void) -{ - return B_OK; -} - - -status_t -arch_commpage_init_post_cpus(void) -{ - return B_OK; -} - - // The software breakpoint instruction (int3). const uint8 kX86SoftwareBreakpoint[1] = { 0xcc }; diff --git a/src/system/kernel/arch/x86/64/syscalls.cpp b/src/system/kernel/arch/x86/64/syscalls.cpp new file mode 100644 index 0000000000..20bf44ef4e --- /dev/null +++ b/src/system/kernel/arch/x86/64/syscalls.cpp @@ -0,0 +1,58 @@ +/* + * Copyright 2012, Alex Smith, alex@alex-smith.me.uk. + * Distributed under the terms of the MIT License. + */ + + +#include "x86_syscalls.h" + +#include + +#include +#include + + +// SYSCALL handler (in interrupts.S). +extern "C" void x86_64_syscall_entry(void); + + +static void +init_syscall_registers(void* dummy, int cpuNum) +{ + // Enable SYSCALL (EFER.SCE = 1). + x86_write_msr(IA32_MSR_EFER, x86_read_msr(IA32_MSR_EFER) | (1 << 0)); + + // Flags to clear upon entry. Want interrupts disabled and the direction + // flag cleared. + x86_write_msr(IA32_MSR_FMASK, X86_EFLAGS_INTERRUPT | X86_EFLAGS_DIRECTION); + + // Entry point address. + x86_write_msr(IA32_MSR_LSTAR, (addr_t)x86_64_syscall_entry); + + // Segments that will be set upon entry and return. This is very strange + // and requires a specific ordering of segments in the GDT. Upon entry: + // - CS is set to IA32_STAR[47:32] + // - SS is set to IA32_STAR[47:32] + 8 + // Upon return: + // - CS is set to IA32_STAR[63:48] + 16 + // - SS is set to IA32_STAR[63:48] + 8 + // From this we get: + // - Entry CS = KERNEL_CODE_SEG + // - Entry SS = KERNEL_CODE_SEG + 8 = KERNEL_DATA_SEG + // - Return CS = KERNEL_DATA_SEG + 16 = USER_CODE_SEG + // - Return SS = KERNEL_DATA_SEG + 8 = USER_DATA_SEG + x86_write_msr(IA32_MSR_STAR, ((uint64)(KERNEL_DATA_SEG | 3) << 48) + | ((uint64)KERNEL_CODE_SEG << 32)); +} + + +// #pragma mark - + + +void +x86_initialize_syscall(void) +{ + // SYSCALL/SYSRET are always available on x86_64 so we just use them, no + // need to use the commpage. Tell all CPUs to initialize the SYSCALL MSRs. + call_all_cpus_sync(&init_syscall_registers, NULL); +} diff --git a/src/system/kernel/arch/x86/64/thread.cpp b/src/system/kernel/arch/x86/64/thread.cpp index b4e9d8ac41..bd41ebaa3a 100644 --- a/src/system/kernel/arch/x86/64/thread.cpp +++ b/src/system/kernel/arch/x86/64/thread.cpp @@ -94,6 +94,9 @@ arch_thread_init_kthread_stack(Thread* thread, void* _stack, void* _stackTop, TRACE("arch_thread_init_kthread_stack: stack top %p, function %p, data: " "%p\n", _stackTop, function, data); + // Save the stack top for system call entry. + thread->arch_info.syscall_rsp = stackTop; + // x86_64 uses registers for argument passing, first argument in RDI, // however we don't save RDI on every context switch (there is no need // for us to: it is not callee-save, and only contains the first argument @@ -148,8 +151,46 @@ status_t arch_thread_enter_userspace(Thread* thread, addr_t entry, void* args1, void* args2) { - panic("arch_thread_enter_userspace: TODO\n"); - return B_ERROR; + addr_t stackTop = thread->user_stack_base + thread->user_stack_size; + + TRACE("arch_thread_enter_userspace: entry %#lx, args %p %p, " + "stackTop %#lx\n", entry, args1, args2, stackTop); + + // Copy the little stub that calls exit_thread() when the thread entry + // function returns. + // TODO: This will become a problem later if we want to support execute + // disable, the stack shouldn't really be executable. + size_t codeSize = (addr_t)x86_end_userspace_thread_exit + - (addr_t)x86_userspace_thread_exit; + stackTop -= codeSize; + if (user_memcpy((void*)stackTop, (const void*)&x86_userspace_thread_exit, + codeSize) != B_OK) + return B_BAD_ADDRESS; + + // Copy the address of the stub to the top of the stack to act as the + // return address. + addr_t codeAddr = stackTop; + stackTop -= sizeof(codeAddr); + if (user_memcpy((void*)stackTop, (const void*)&codeAddr, sizeof(codeAddr)) + != B_OK) + return B_BAD_ADDRESS; + + // Prepare the user iframe. + iframe frame = {}; + frame.type = IFRAME_TYPE_SYSCALL; + frame.si = (uint64)args2; + frame.di = (uint64)args1; + frame.ip = entry; + frame.cs = USER_CODE_SEG; + frame.flags = X86_EFLAGS_RESERVED1 | X86_EFLAGS_INTERRUPT + | (3 << X86_EFLAGS_IO_PRIVILEG_LEVEL_SHIFT); + frame.sp = stackTop; + frame.ss = USER_DATA_SEG; + + // Return to userland. Never returns. + x86_initial_return_to_userland(thread, &frame); + + return B_OK; } diff --git a/src/system/kernel/arch/x86/Jamfile b/src/system/kernel/arch/x86/Jamfile index 6fa9cea98d..579dda7d65 100644 --- a/src/system/kernel/arch/x86/Jamfile +++ b/src/system/kernel/arch/x86/Jamfile @@ -25,6 +25,8 @@ if $(TARGET_ARCH) = x86_64 { int.cpp interrupts.S stubs.cpp + signals.cpp + syscalls.cpp thread.cpp # paging @@ -47,17 +49,16 @@ if $(TARGET_ARCH) = x86_64 { cpuid.S int.cpp interrupts.S + signals.cpp + signals_asm.S + syscalls.cpp + syscalls_asm.S thread.cpp vm86.cpp - arch_commpage.cpp arch_user_debugger.cpp ioapic.cpp irq_routing_table.cpp - syscall.S - x86_signals.cpp - x86_signals_asm.S - x86_syscalls.cpp # paging x86_physical_page_mapper_large_memory.cpp @@ -76,6 +77,7 @@ if $(TARGET_ARCH) = x86_64 { local archGenericSources = arch_cpu.cpp + arch_commpage.cpp arch_debug.cpp arch_debug_console.cpp arch_elf.cpp @@ -114,8 +116,7 @@ KernelMergeObject kernel_arch_x86.o : CreateAsmStructOffsetsHeader asm_offsets.h : asm_offsets.cpp ; # We need to specify the dependency on the generated syscalls file explicitly. -Includes [ FGristFiles arch_interrupts.S arch_x86.S x86_signals.cpp - x86_signals_asm.S ] +Includes [ FGristFiles interrupts.S arch.S signals.cpp signals_asm.S ] : syscall_numbers.h ; -Includes [ FGristFiles arch_interrupts.S ] +Includes [ FGristFiles interrupts.S ] : syscall_table.h ; diff --git a/src/system/kernel/arch/x86/arch_commpage.cpp b/src/system/kernel/arch/x86/arch_commpage.cpp index 8b9cc0bdb6..ccb142f3c2 100644 --- a/src/system/kernel/arch/x86/arch_commpage.cpp +++ b/src/system/kernel/arch/x86/arch_commpage.cpp @@ -22,7 +22,7 @@ status_t arch_commpage_init_post_cpus(void) { // select the optimum syscall mechanism and patch the commpage - x86_initialize_commpage_syscall(); + x86_initialize_syscall(); // initialize the signal handler code in the commpage x86_initialize_commpage_signal_handler(); diff --git a/src/system/kernel/arch/x86/x86_syscalls.h b/src/system/kernel/arch/x86/x86_syscalls.h index 788e01a253..85ae6d7878 100644 --- a/src/system/kernel/arch/x86/x86_syscalls.h +++ b/src/system/kernel/arch/x86/x86_syscalls.h @@ -9,21 +9,34 @@ #include -extern void (*gX86SetSyscallStack)(addr_t stackTop); +void x86_initialize_syscall(); -void x86_initialize_commpage_syscall(); +#ifdef __x86_64__ static inline void x86_set_syscall_stack(addr_t stackTop) { - // TODO: x86_64 -#ifndef __x86_64__ - if (gX86SetSyscallStack != NULL) - gX86SetSyscallStack(stackTop); -#endif + // Nothing to do here, the thread's stack pointer is always accessible + // via the GS segment. } +#else + + +extern void (*gX86SetSyscallStack)(addr_t stackTop); + + +static inline void +x86_set_syscall_stack(addr_t stackTop) +{ + if (gX86SetSyscallStack != NULL) + gX86SetSyscallStack(stackTop); +} + + +#endif // __x86_64__ + #endif // _KERNEL_ARCH_X86_SYSCALLS_H