kernel: support syscall/sysret

This commit is contained in:
K. Lange 2023-11-05 14:32:47 +09:00
parent cf04ba62e8
commit d88712e956
8 changed files with 134 additions and 21 deletions

View File

@ -0,0 +1,14 @@
#include <unistd.h>
#include <stdio.h>
#include <syscall_nums.h>
int main(int argc, char * argv[]){
long ret = 0;
#ifdef __x86_64__
__asm__ __volatile__("syscall" : "=a"(ret) : "a"(SYS_WRITE), "D"(STDOUT_FILENO), "S"("Hello, world.\n"), "d"((long)14) : "rcx", "r11", "memory");
__asm__ __volatile__("syscall" : "=a"(ret) : "a"(SYS_WRITE), "D"(STDOUT_FILENO), "S"("Hello, world.\n"), "d"((long)14) : "rcx", "r11", "memory");
__asm__ __volatile__("syscall" : "=a"(ret) : "a"(SYS_WRITE), "D"(STDOUT_FILENO), "S"("Hello, world.\n"), "d"((long)14) : "rcx", "r11", "memory");
#endif
return ret;
}

View File

@ -207,7 +207,9 @@ struct ProcessorLocal {
int cpu_model;
int cpu_family;
char cpu_model_name[48];
const char * cpu_manufacturer;
const char * cpu_manufacturer; /* 0x68 */
uintptr_t syscall_stack; /* 0x70: Should match TSS.RSP[0] */
uintptr_t user_sysret_stack; /* 0x78: Used only at start of SYSCALL entry to store user RSP before pushing it */
#endif
#ifdef __aarch64__

View File

@ -14,42 +14,51 @@ _Begin_C_Header
#define DECL_SYSCALL5(fn,p1,p2,p3,p4,p5) long syscall_##fn(p1,p2,p3,p4,p5)
#ifdef __x86_64__
#ifdef __SYSCALL_INT7F
# define __SYSCALL_ENTRY_INST "int $0x7F"
# define __SYSCALL_CLOBBERS "memory"
#else
# define __SYSCALL_ENTRY_INST "syscall"
# define __SYSCALL_CLOBBERS "rcx", "r11", "memory"
#endif
#define DEFN_SYSCALL0(fn, num) \
long syscall_##fn() { \
long a = num; __asm__ __volatile__("int $0x7F" : "=a" (a) : "a" ((long)a)); \
long a = num; __asm__ __volatile__(__SYSCALL_ENTRY_INST : "=a" (a) : "a" ((long)a) : __SYSCALL_CLOBBERS); \
return a; \
}
#define DEFN_SYSCALL1(fn, num, P1) \
long syscall_##fn(P1 p1) { \
long __res = num; __asm__ __volatile__("int $0x7F" \
long __res = num; __asm__ __volatile__(__SYSCALL_ENTRY_INST \
: "=a" (__res) \
: "a" (__res), "D" ((long)(p1))); \
: "a" (__res), "D" ((long)(p1)) : __SYSCALL_CLOBBERS ); \
return __res; \
}
#define DEFN_SYSCALL2(fn, num, P1, P2) \
long syscall_##fn(P1 p1, P2 p2) { \
long __res = num; __asm__ __volatile__("int $0x7F" \
long __res = num; __asm__ __volatile__(__SYSCALL_ENTRY_INST \
: "=a" (__res) \
: "a" (__res), "D" ((long)(p1)), "S"((long)(p2))); \
: "a" (__res), "D" ((long)(p1)), "S"((long)(p2)) : __SYSCALL_CLOBBERS ); \
return __res; \
}
#define DEFN_SYSCALL3(fn, num, P1, P2, P3) \
long syscall_##fn(P1 p1, P2 p2, P3 p3) { \
long __res = num; __asm__ __volatile__("int $0x7F" \
long __res = num; __asm__ __volatile__(__SYSCALL_ENTRY_INST \
: "=a" (__res) \
: "a" (__res), "D" ((long)(p1)), "S"((long)(p2)), "d"((long)(p3))); \
: "a" (__res), "D" ((long)(p1)), "S"((long)(p2)), "d"((long)(p3)) : __SYSCALL_CLOBBERS ); \
return __res; \
}
#define DEFN_SYSCALL4(fn, num, P1, P2, P3, P4) \
long syscall_##fn(P1 p1, P2 p2, P3 p3, P4 p4) { \
register long p4_ __asm__("r10") = (long)p4; \
long __res = num; __asm__ __volatile__("int $0x7F" \
long __res = num; __asm__ __volatile__(__SYSCALL_ENTRY_INST \
: "=a" (__res) \
: "a" (__res), "D" ((long)(p1)), "S"((long)(p2)), "d"((long)(p3)), "r"((long)(p4_))); \
: "a" (__res), "D" ((long)(p1)), "S"((long)(p2)), "d"((long)(p3)), "r"((long)(p4_)) : __SYSCALL_CLOBBERS ); \
return __res; \
}
@ -57,9 +66,9 @@ _Begin_C_Header
long syscall_##fn(P1 p1, P2 p2, P3 p3, P4 p4, P5 p5) { \
register long p4_ __asm__("r10") = (long)p4; \
register long p5_ __asm__("r8") = (long)p5; \
long __res = num; __asm__ __volatile__("int $0x7F" \
long __res = num; __asm__ __volatile__(__SYSCALL_ENTRY_INST \
: "=a" (__res) \
: "a" (__res), "D" ((long)(p1)), "S"((long)(p2)), "d"((long)(p3)), "r"((long)(p4_)), "r"((long)(p5_))); \
: "a" (__res), "D" ((long)(p1)), "S"((long)(p2)), "d"((long)(p3)), "r"((long)(p4_)), "r"((long)(p5_)) : __SYSCALL_CLOBBERS ); \
return __res; \
}
#elif defined(__aarch64__)

View File

@ -45,7 +45,7 @@ typedef struct {
} __attribute__((packed)) gdt_pointer_t;
typedef struct {
gdt_entry_t entries[6];
gdt_entry_t entries[7];
gdt_entry_high_t tss_extra;
gdt_pointer_t pointer;
tss_entry_t tss;
@ -58,6 +58,7 @@ FullGDT gdt[32] __attribute__((used)) = {{
{0xFFFF, 0x0000, 0x00, 0x92, (1 << 5) | (1 << 7) | 0x0F, 0x00},
{0xFFFF, 0x0000, 0x00, 0xFA, (1 << 5) | (1 << 7) | 0x0F, 0x00},
{0xFFFF, 0x0000, 0x00, 0xF2, (1 << 5) | (1 << 7) | 0x0F, 0x00},
{0xFFFF, 0x0000, 0x00, 0xFA, (1 << 5) | (1 << 7) | 0x0F, 0x00},
{0x0067, 0x0000, 0x00, 0xE9, 0x00, 0x00},
},
{0x00000000, 0x00000000},
@ -75,10 +76,10 @@ void gdt_install(void) {
gdt[i].pointer.base = (uintptr_t)&gdt[i].entries;
uintptr_t addr = (uintptr_t)&gdt[i].tss;
gdt[i].entries[5].limit_low = sizeof(gdt[i].tss);
gdt[i].entries[5].base_low = (addr & 0xFFFF);
gdt[i].entries[5].base_middle = (addr >> 16) & 0xFF;
gdt[i].entries[5].base_high = (addr >> 24) & 0xFF;
gdt[i].entries[6].limit_low = sizeof(gdt[i].tss);
gdt[i].entries[6].base_low = (addr & 0xFFFF);
gdt[i].entries[6].base_middle = (addr >> 16) & 0xFF;
gdt[i].entries[6].base_high = (addr >> 24) & 0xFF;
gdt[i].tss_extra.base_highest = (addr >> 32) & 0xFFFFFFFF;
}
@ -92,7 +93,7 @@ void gdt_install(void) {
"mov %%ax, %%ds\n"
"mov %%ax, %%es\n"
"mov %%ax, %%ss\n"
"mov $0x2b, %%ax\n"
"mov $0x33, %%ax\n" /* TSS offset */
"ltr %%ax\n"
: : "r"(&gdt[0].pointer)
);
@ -104,6 +105,7 @@ void gdt_copy_to_trampoline(int ap, char * trampoline) {
void arch_set_kernel_stack(uintptr_t stack) {
gdt[this_core->cpu_id].tss.rsp[0] = stack;
this_core->syscall_stack = stack;
}
void arch_set_tls_base(uintptr_t tlsbase) {

View File

@ -693,3 +693,14 @@ struct regs * isr_handler(struct regs * r) {
return out;
}
struct regs * syscall_centry(struct regs * r) {
this_core->current_process->time_switch = arch_perf_timer();
struct regs * out = _syscall_entrypoint(r);
process_check_signals(out);
update_process_times_on_exit();
return out;
}

View File

@ -263,3 +263,68 @@ arch_enter_tasklet:
popq %rdi
popq %rsi
jmpq *%rsi
.extern syscall_centry
.global syscall_entry
.type syscall_entry, @function
syscall_entry:
swapgs /* SYSCALL only happens from userspace, so we must always swap gs */
mov %rsp, %gs:0x78 /* Store user RSP temporarily */
mov %gs:0x70, %rsp /* Restore kernel stack for this thread */
/* Normal `struct regs` layout, same as what we'd get on an interrupt */
pushq $0x23 /* SS */
pushq %gs:0x78 /* RSP */
push %r11 /* RFLAGS - SYSCALL stores in r11 */
pushq $0x2b /* CS */
push %rcx /* RIP - SYSCALL stores in rcx */
pushq $0 /* Dummy error code */
pushq $0 /* Dummy interrupt number */
push %rax
push %rbx
pushq $0 /* rcx is not valid, set to zero */
push %rdx
push %rsi
push %rdi
push %rbp
push %r8
push %r9
push %r10
pushq $0 /* r11 is not valid, set to zero */
push %r12
push %r13
push %r14
push %r15
mov %rsp, %rdi
call syscall_centry
mov %rax, %rsp
pop %r15
pop %r14
pop %r13
pop %r12
add $8, %rsp
pop %r10
pop %r9
pop %r8
pop %rbp
pop %rdi
pop %rsi
pop %rdx
add $8, %rsp
pop %rbx
pop %rax
add $16, %rsp
pop %rcx
add $8, %rsp
pop %r11
pop %rsp
swapgs
sysretq

View File

@ -66,7 +66,7 @@ static void __ap_bootstrap(void) {
"mov $0x10, %%ax\n"
"mov %%ax, %%ds\n"
"mov %%ax, %%ss\n"
"mov $0x2b, %%ax\n"
"mov $0x33, %%ax\n" /* TSS offset in gdt */
"ltr %%ax\n"
".extern _ap_stack_base\n"
"mov _ap_stack_base,%%rsp\n"
@ -155,6 +155,16 @@ void load_processor_info(void) {
cpuid(0x80000004, brand[8], brand[9], brand[10], brand[11]);
memcpy(processor_local_data[this_core->cpu_id].cpu_model_name, brand, 48);
}
extern void syscall_entry(void);
uint32_t efer_hi, efer_lo;
asm volatile ("rdmsr" : "=d"(efer_hi), "=a"(efer_lo) : "c"(0xc0000080)); /* Read current EFER */
asm volatile ("wrmsr" : : "c"(0xc0000080), "d"(efer_hi), "a"(efer_lo | 1)); /* Enable SYSCALL/SYSRET in EFER */
asm volatile ("wrmsr" : : "c"(0xC0000081), "d"(0x1b0008), "a"(0)); /* Set segment bases in STAR */
asm volatile ("wrmsr" : : "c"(0xC0000082), /* Set SYSCALL entry point in LSTAR */
"d"((uintptr_t)&syscall_entry >> 32),
"a"((uintptr_t)&syscall_entry & 0xFFFFffff));
asm volatile ("wrmsr" : : "c"(0xC0000084), "d"(0), "a"(0x700)); /* SFMASK: Direction flag, interrupt flag, trap flag are all cleared */
}
/**

View File

@ -30,7 +30,7 @@
*/
void arch_enter_user(uintptr_t entrypoint, int argc, char * argv[], char * envp[], uintptr_t stack) {
struct regs ret;
ret.cs = 0x18 | 0x03;
ret.cs = 0x28 | 0x03;
ret.ss = 0x20 | 0x03;
ret.rip = entrypoint;
ret.rflags = (1 << 21) | (1 << 9);
@ -116,7 +116,7 @@ int arch_return_from_signal_handler(struct regs *r) {
*/
void arch_enter_signal_handler(uintptr_t entrypoint, int signum, struct regs *r) {
struct regs ret;
ret.cs = 0x18 | 0x03;
ret.cs = 0x28 | 0x03;
ret.ss = 0x20 | 0x03;
ret.rip = entrypoint;
ret.rflags = (1 << 21) | (1 << 9);