diff --git a/apps/test-syscall-sysret.c b/apps/test-syscall-sysret.c new file mode 100644 index 00000000..26daa114 --- /dev/null +++ b/apps/test-syscall-sysret.c @@ -0,0 +1,14 @@ +#include +#include +#include + +int main(int argc, char * argv[]){ + long ret = 0; +#ifdef __x86_64__ + __asm__ __volatile__("syscall" : "=a"(ret) : "a"(SYS_WRITE), "D"(STDOUT_FILENO), "S"("Hello, world.\n"), "d"((long)14) : "rcx", "r11", "memory"); + __asm__ __volatile__("syscall" : "=a"(ret) : "a"(SYS_WRITE), "D"(STDOUT_FILENO), "S"("Hello, world.\n"), "d"((long)14) : "rcx", "r11", "memory"); + __asm__ __volatile__("syscall" : "=a"(ret) : "a"(SYS_WRITE), "D"(STDOUT_FILENO), "S"("Hello, world.\n"), "d"((long)14) : "rcx", "r11", "memory"); +#endif + + return ret; +} diff --git a/base/usr/include/kernel/process.h b/base/usr/include/kernel/process.h index dc8df064..b37e724b 100644 --- a/base/usr/include/kernel/process.h +++ b/base/usr/include/kernel/process.h @@ -207,7 +207,9 @@ struct ProcessorLocal { int cpu_model; int cpu_family; char cpu_model_name[48]; - const char * cpu_manufacturer; + const char * cpu_manufacturer; /* 0x68 */ + uintptr_t syscall_stack; /* 0x70: Should match TSS.RSP[0] */ + uintptr_t user_sysret_stack; /* 0x78: Used only at start of SYSCALL entry to store user RSP before pushing it */ #endif #ifdef __aarch64__ diff --git a/base/usr/include/syscall.h b/base/usr/include/syscall.h index f13d9340..3e25b91d 100644 --- a/base/usr/include/syscall.h +++ b/base/usr/include/syscall.h @@ -14,42 +14,51 @@ _Begin_C_Header #define DECL_SYSCALL5(fn,p1,p2,p3,p4,p5) long syscall_##fn(p1,p2,p3,p4,p5) #ifdef __x86_64__ + +#ifdef __SYSCALL_INT7F +# define __SYSCALL_ENTRY_INST "int $0x7F" +# define __SYSCALL_CLOBBERS "memory" +#else +# define __SYSCALL_ENTRY_INST "syscall" +# define __SYSCALL_CLOBBERS "rcx", "r11", "memory" +#endif + #define DEFN_SYSCALL0(fn, num) \ long syscall_##fn() { \ - long a = num; __asm__ __volatile__("int $0x7F" : "=a" (a) : "a" ((long)a)); \ + long a = num; __asm__ __volatile__(__SYSCALL_ENTRY_INST : "=a" (a) : "a" ((long)a) : __SYSCALL_CLOBBERS); \ return a; \ } #define DEFN_SYSCALL1(fn, num, P1) \ long syscall_##fn(P1 p1) { \ - long __res = num; __asm__ __volatile__("int $0x7F" \ + long __res = num; __asm__ __volatile__(__SYSCALL_ENTRY_INST \ : "=a" (__res) \ - : "a" (__res), "D" ((long)(p1))); \ + : "a" (__res), "D" ((long)(p1)) : __SYSCALL_CLOBBERS ); \ return __res; \ } #define DEFN_SYSCALL2(fn, num, P1, P2) \ long syscall_##fn(P1 p1, P2 p2) { \ - long __res = num; __asm__ __volatile__("int $0x7F" \ + long __res = num; __asm__ __volatile__(__SYSCALL_ENTRY_INST \ : "=a" (__res) \ - : "a" (__res), "D" ((long)(p1)), "S"((long)(p2))); \ + : "a" (__res), "D" ((long)(p1)), "S"((long)(p2)) : __SYSCALL_CLOBBERS ); \ return __res; \ } #define DEFN_SYSCALL3(fn, num, P1, P2, P3) \ long syscall_##fn(P1 p1, P2 p2, P3 p3) { \ - long __res = num; __asm__ __volatile__("int $0x7F" \ + long __res = num; __asm__ __volatile__(__SYSCALL_ENTRY_INST \ : "=a" (__res) \ - : "a" (__res), "D" ((long)(p1)), "S"((long)(p2)), "d"((long)(p3))); \ + : "a" (__res), "D" ((long)(p1)), "S"((long)(p2)), "d"((long)(p3)) : __SYSCALL_CLOBBERS ); \ return __res; \ } #define DEFN_SYSCALL4(fn, num, P1, P2, P3, P4) \ long syscall_##fn(P1 p1, P2 p2, P3 p3, P4 p4) { \ register long p4_ __asm__("r10") = (long)p4; \ - long __res = num; __asm__ __volatile__("int $0x7F" \ + long __res = num; __asm__ __volatile__(__SYSCALL_ENTRY_INST \ : "=a" (__res) \ - : "a" (__res), "D" ((long)(p1)), "S"((long)(p2)), "d"((long)(p3)), "r"((long)(p4_))); \ + : "a" (__res), "D" ((long)(p1)), "S"((long)(p2)), "d"((long)(p3)), "r"((long)(p4_)) : __SYSCALL_CLOBBERS ); \ return __res; \ } @@ -57,9 +66,9 @@ _Begin_C_Header long syscall_##fn(P1 p1, P2 p2, P3 p3, P4 p4, P5 p5) { \ register long p4_ __asm__("r10") = (long)p4; \ register long p5_ __asm__("r8") = (long)p5; \ - long __res = num; __asm__ __volatile__("int $0x7F" \ + long __res = num; __asm__ __volatile__(__SYSCALL_ENTRY_INST \ : "=a" (__res) \ - : "a" (__res), "D" ((long)(p1)), "S"((long)(p2)), "d"((long)(p3)), "r"((long)(p4_)), "r"((long)(p5_))); \ + : "a" (__res), "D" ((long)(p1)), "S"((long)(p2)), "d"((long)(p3)), "r"((long)(p4_)), "r"((long)(p5_)) : __SYSCALL_CLOBBERS ); \ return __res; \ } #elif defined(__aarch64__) diff --git a/kernel/arch/x86_64/gdt.c b/kernel/arch/x86_64/gdt.c index 766a65a4..70baf0af 100644 --- a/kernel/arch/x86_64/gdt.c +++ b/kernel/arch/x86_64/gdt.c @@ -45,7 +45,7 @@ typedef struct { } __attribute__((packed)) gdt_pointer_t; typedef struct { - gdt_entry_t entries[6]; + gdt_entry_t entries[7]; gdt_entry_high_t tss_extra; gdt_pointer_t pointer; tss_entry_t tss; @@ -58,6 +58,7 @@ FullGDT gdt[32] __attribute__((used)) = {{ {0xFFFF, 0x0000, 0x00, 0x92, (1 << 5) | (1 << 7) | 0x0F, 0x00}, {0xFFFF, 0x0000, 0x00, 0xFA, (1 << 5) | (1 << 7) | 0x0F, 0x00}, {0xFFFF, 0x0000, 0x00, 0xF2, (1 << 5) | (1 << 7) | 0x0F, 0x00}, + {0xFFFF, 0x0000, 0x00, 0xFA, (1 << 5) | (1 << 7) | 0x0F, 0x00}, {0x0067, 0x0000, 0x00, 0xE9, 0x00, 0x00}, }, {0x00000000, 0x00000000}, @@ -75,10 +76,10 @@ void gdt_install(void) { gdt[i].pointer.base = (uintptr_t)&gdt[i].entries; uintptr_t addr = (uintptr_t)&gdt[i].tss; - gdt[i].entries[5].limit_low = sizeof(gdt[i].tss); - gdt[i].entries[5].base_low = (addr & 0xFFFF); - gdt[i].entries[5].base_middle = (addr >> 16) & 0xFF; - gdt[i].entries[5].base_high = (addr >> 24) & 0xFF; + gdt[i].entries[6].limit_low = sizeof(gdt[i].tss); + gdt[i].entries[6].base_low = (addr & 0xFFFF); + gdt[i].entries[6].base_middle = (addr >> 16) & 0xFF; + gdt[i].entries[6].base_high = (addr >> 24) & 0xFF; gdt[i].tss_extra.base_highest = (addr >> 32) & 0xFFFFFFFF; } @@ -92,7 +93,7 @@ void gdt_install(void) { "mov %%ax, %%ds\n" "mov %%ax, %%es\n" "mov %%ax, %%ss\n" - "mov $0x2b, %%ax\n" + "mov $0x33, %%ax\n" /* TSS offset */ "ltr %%ax\n" : : "r"(&gdt[0].pointer) ); @@ -104,6 +105,7 @@ void gdt_copy_to_trampoline(int ap, char * trampoline) { void arch_set_kernel_stack(uintptr_t stack) { gdt[this_core->cpu_id].tss.rsp[0] = stack; + this_core->syscall_stack = stack; } void arch_set_tls_base(uintptr_t tlsbase) { diff --git a/kernel/arch/x86_64/idt.c b/kernel/arch/x86_64/idt.c index f97e0a8a..e7ece25c 100644 --- a/kernel/arch/x86_64/idt.c +++ b/kernel/arch/x86_64/idt.c @@ -693,3 +693,14 @@ struct regs * isr_handler(struct regs * r) { return out; } + +struct regs * syscall_centry(struct regs * r) { + this_core->current_process->time_switch = arch_perf_timer(); + + struct regs * out = _syscall_entrypoint(r); + + process_check_signals(out); + update_process_times_on_exit(); + + return out; +} diff --git a/kernel/arch/x86_64/irq.S b/kernel/arch/x86_64/irq.S index 3e6457fd..5b888584 100644 --- a/kernel/arch/x86_64/irq.S +++ b/kernel/arch/x86_64/irq.S @@ -263,3 +263,68 @@ arch_enter_tasklet: popq %rdi popq %rsi jmpq *%rsi + + +.extern syscall_centry +.global syscall_entry +.type syscall_entry, @function +syscall_entry: + swapgs /* SYSCALL only happens from userspace, so we must always swap gs */ + mov %rsp, %gs:0x78 /* Store user RSP temporarily */ + mov %gs:0x70, %rsp /* Restore kernel stack for this thread */ + + /* Normal `struct regs` layout, same as what we'd get on an interrupt */ + pushq $0x23 /* SS */ + pushq %gs:0x78 /* RSP */ + push %r11 /* RFLAGS - SYSCALL stores in r11 */ + pushq $0x2b /* CS */ + push %rcx /* RIP - SYSCALL stores in rcx */ + + pushq $0 /* Dummy error code */ + pushq $0 /* Dummy interrupt number */ + + push %rax + push %rbx + pushq $0 /* rcx is not valid, set to zero */ + push %rdx + push %rsi + push %rdi + push %rbp + push %r8 + push %r9 + push %r10 + pushq $0 /* r11 is not valid, set to zero */ + push %r12 + push %r13 + push %r14 + push %r15 + + mov %rsp, %rdi + call syscall_centry + mov %rax, %rsp + + pop %r15 + pop %r14 + pop %r13 + pop %r12 + add $8, %rsp + pop %r10 + pop %r9 + pop %r8 + pop %rbp + pop %rdi + pop %rsi + pop %rdx + add $8, %rsp + pop %rbx + pop %rax + + add $16, %rsp + + pop %rcx + add $8, %rsp + pop %r11 + pop %rsp + + swapgs + sysretq diff --git a/kernel/arch/x86_64/smp.c b/kernel/arch/x86_64/smp.c index cb7829ad..d6df3c40 100644 --- a/kernel/arch/x86_64/smp.c +++ b/kernel/arch/x86_64/smp.c @@ -66,7 +66,7 @@ static void __ap_bootstrap(void) { "mov $0x10, %%ax\n" "mov %%ax, %%ds\n" "mov %%ax, %%ss\n" - "mov $0x2b, %%ax\n" + "mov $0x33, %%ax\n" /* TSS offset in gdt */ "ltr %%ax\n" ".extern _ap_stack_base\n" "mov _ap_stack_base,%%rsp\n" @@ -155,6 +155,16 @@ void load_processor_info(void) { cpuid(0x80000004, brand[8], brand[9], brand[10], brand[11]); memcpy(processor_local_data[this_core->cpu_id].cpu_model_name, brand, 48); } + + extern void syscall_entry(void); + uint32_t efer_hi, efer_lo; + asm volatile ("rdmsr" : "=d"(efer_hi), "=a"(efer_lo) : "c"(0xc0000080)); /* Read current EFER */ + asm volatile ("wrmsr" : : "c"(0xc0000080), "d"(efer_hi), "a"(efer_lo | 1)); /* Enable SYSCALL/SYSRET in EFER */ + asm volatile ("wrmsr" : : "c"(0xC0000081), "d"(0x1b0008), "a"(0)); /* Set segment bases in STAR */ + asm volatile ("wrmsr" : : "c"(0xC0000082), /* Set SYSCALL entry point in LSTAR */ + "d"((uintptr_t)&syscall_entry >> 32), + "a"((uintptr_t)&syscall_entry & 0xFFFFffff)); + asm volatile ("wrmsr" : : "c"(0xC0000084), "d"(0), "a"(0x700)); /* SFMASK: Direction flag, interrupt flag, trap flag are all cleared */ } /** diff --git a/kernel/arch/x86_64/user.c b/kernel/arch/x86_64/user.c index 4fe95ea5..d8ba1e88 100644 --- a/kernel/arch/x86_64/user.c +++ b/kernel/arch/x86_64/user.c @@ -30,7 +30,7 @@ */ void arch_enter_user(uintptr_t entrypoint, int argc, char * argv[], char * envp[], uintptr_t stack) { struct regs ret; - ret.cs = 0x18 | 0x03; + ret.cs = 0x28 | 0x03; ret.ss = 0x20 | 0x03; ret.rip = entrypoint; ret.rflags = (1 << 21) | (1 << 9); @@ -116,7 +116,7 @@ int arch_return_from_signal_handler(struct regs *r) { */ void arch_enter_signal_handler(uintptr_t entrypoint, int signum, struct regs *r) { struct regs ret; - ret.cs = 0x18 | 0x03; + ret.cs = 0x28 | 0x03; ret.ss = 0x20 | 0x03; ret.rip = entrypoint; ret.rflags = (1 << 21) | (1 << 9);