diff --git a/apps/strace.c b/apps/strace.c index d55b1331..7ef89f51 100644 --- a/apps/strace.c +++ b/apps/strace.c @@ -194,7 +194,7 @@ char syscall_mask[] = { }; #define M(e) [e] = #e -const char * errno_names[256] = { +const char * errno_names[] = { M(EPERM), M(ENOENT), M(ESRCH), @@ -310,7 +310,7 @@ const char * errno_names[256] = { M(ENOTRECOVERABLE), M(EOWNERDEAD), M(ESTRPIPE), - 0 + M(ERESTARTSYS), }; @@ -529,8 +529,12 @@ static void buffer_arg(pid_t pid, uintptr_t buffer, ssize_t count) { } static void print_error(int err) { - if (err > 255) return; - fprintf(logfile, " %s (%s)", errno_names[err], strerror(err)); + const char * name = (err >= 0 && (size_t)err < (sizeof(errno_names) / sizeof(*errno_names))) ? errno_names[err] : NULL; + if (name) { + fprintf(logfile, " %s (%s)", name, strerror(err)); + } else { + fprintf(logfile, " %d (%s)", err, strerror(err)); + } } static void maybe_errno(struct regs * r) { diff --git a/base/usr/include/errno.h b/base/usr/include/errno.h index 4fbc98db..9af2ce67 100644 --- a/base/usr/include/errno.h +++ b/base/usr/include/errno.h @@ -133,6 +133,8 @@ _Begin_C_Header #define ESTRPIPE 143 /* Streams pipe error */ #define EWOULDBLOCK EAGAIN /* Operation would block */ +#define ERESTARTSYS 512 + #ifndef _KERNEL_ extern int errno; #define __sets_errno(...) long ret = __VA_ARGS__; if (ret < 0) { errno = -ret; ret = -1; } return ret diff --git a/base/usr/include/kernel/process.h b/base/usr/include/kernel/process.h index 449d9965..c8591ace 100644 --- a/base/usr/include/kernel/process.h +++ b/base/usr/include/kernel/process.h @@ -149,6 +149,9 @@ typedef struct process { pid_t tracer; spin_lock_t wait_lock; list_t * tracees; + + /* Syscall restarting */ + long interrupted_system_call; } process_t; typedef struct { diff --git a/base/usr/include/kernel/signal.h b/base/usr/include/kernel/signal.h index 539b557c..90ffff39 100644 --- a/base/usr/include/kernel/signal.h +++ b/base/usr/include/kernel/signal.h @@ -14,12 +14,11 @@ typedef struct { int signum; uintptr_t handler; - struct regs registers_before; } signal_t; extern void fix_signal_stacks(void); extern int send_signal(pid_t process, int signal, int force_root); extern int group_send_signal(pid_t group, int signal, int force_root); -extern void handle_signal(process_t * proc, signal_t * sig, struct regs *r); +extern void return_from_signal_handler(struct regs*); extern void process_check_signals(struct regs*); diff --git a/base/usr/include/kernel/syscall.h b/base/usr/include/kernel/syscall.h index 4817dbab..6e7f73d1 100644 --- a/base/usr/include/kernel/syscall.h +++ b/base/usr/include/kernel/syscall.h @@ -30,3 +30,5 @@ extern long arch_stack_pointer(struct regs * r); extern long arch_user_ip(struct regs * r); extern void arch_syscall_return(struct regs * r, long retval); + +extern void syscall_handler(struct regs * r); diff --git a/kernel/arch/aarch64/arch.c b/kernel/arch/aarch64/arch.c index 3033eb3a..cd7e6a2d 100644 --- a/kernel/arch/aarch64/arch.c +++ b/kernel/arch/aarch64/arch.c @@ -78,6 +78,7 @@ void arch_return_from_signal_handler(struct regs *r) { POP(sp, uint64_t, this_core->current_process->thread.fp_regs[63-i]); } + POP(sp, long, this_core->current_process->interrupted_system_call); POP(sp, struct regs, *r); POP(sp, uint64_t, elr); asm volatile ("msr ELR_EL1, %0" : "=r"(elr)); @@ -105,6 +106,8 @@ void arch_enter_signal_handler(uintptr_t entrypoint, int signum, struct regs *r) PUSH(sp, uint64_t, elr); PUSH(sp, struct regs, *r); + PUSH(sp, long, this_core->current_process->interrupted_system_call); + this_core->current_process->interrupted_system_call = 0; for (int i = 0; i < 64; ++i) { PUSH(sp, uint64_t, this_core->current_process->thread.fp_regs[i]); diff --git a/kernel/arch/aarch64/main.c b/kernel/arch/aarch64/main.c index 799a7010..33d03504 100644 --- a/kernel/arch/aarch64/main.c +++ b/kernel/arch/aarch64/main.c @@ -305,7 +305,7 @@ void aarch64_sync_enter(struct regs * r) { /* Magic signal return */ if (elr == 0x8DEADBEEF && far == 0x8DEADBEEF) { - arch_return_from_signal_handler(r); + return_from_signal_handler(r); goto _resume_user; } diff --git a/kernel/arch/x86_64/idt.c b/kernel/arch/x86_64/idt.c index 0f33305a..bd8d0f8f 100644 --- a/kernel/arch/x86_64/idt.c +++ b/kernel/arch/x86_64/idt.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -510,7 +511,7 @@ static void _page_fault(struct regs * r) { /* 8DEADBEEFh is the magic ret-from-sig address. */ if (faulting_address == 0x8DEADBEEF) { - arch_return_from_signal_handler(r); + return_from_signal_handler(r); return; } @@ -549,7 +550,6 @@ static void _page_fault(struct regs * r) { */ static struct regs * _syscall_entrypoint(struct regs * r) { /* syscall_handler will modify r to set return value. */ - extern void syscall_handler(struct regs *); syscall_handler(r); /* diff --git a/kernel/arch/x86_64/user.c b/kernel/arch/x86_64/user.c index e7fd156a..f901a8ee 100644 --- a/kernel/arch/x86_64/user.c +++ b/kernel/arch/x86_64/user.c @@ -8,9 +8,11 @@ * Copyright (C) 2021 K. Lange */ #include +#include #include #include #include +#include #include #include @@ -72,6 +74,8 @@ void arch_return_from_signal_handler(struct regs *r) { POP(r->rsp, uint64_t, this_core->current_process->thread.fp_regs[63-i]); } + POP(r->rsp, long, this_core->current_process->interrupted_system_call); + struct regs out; POP(r->rsp, struct regs, out); @@ -111,6 +115,9 @@ void arch_enter_signal_handler(uintptr_t entrypoint, int signum, struct regs *r) PUSH(ret.rsp, struct regs, *r); + PUSH(ret.rsp, long, this_core->current_process->interrupted_system_call); + this_core->current_process->interrupted_system_call = 0; + for (int i = 0; i < 64; ++i) { PUSH(ret.rsp, uint64_t, this_core->current_process->thread.fp_regs[i]); } diff --git a/kernel/misc/ringbuffer.c b/kernel/misc/ringbuffer.c index 62b523ff..0466cdd2 100644 --- a/kernel/misc/ringbuffer.c +++ b/kernel/misc/ringbuffer.c @@ -12,6 +12,7 @@ */ #include #include +#include #include #include #include @@ -96,8 +97,12 @@ size_t ring_buffer_read(ring_buffer_t * ring_buffer, size_t size, uint8_t * buff } wakeup_queue(ring_buffer->wait_queue_writers); if (collected == 0) { - if (sleep_on_unlocking(ring_buffer->wait_queue_readers, &ring_buffer->lock) && ring_buffer->internal_stop) { - ring_buffer->internal_stop = 0; + if (sleep_on_unlocking(ring_buffer->wait_queue_readers, &ring_buffer->lock)) { + if (ring_buffer->internal_stop) { + ring_buffer->internal_stop = 0; + } else { + if (!collected) return -ERESTARTSYS; + } break; } } else { @@ -126,8 +131,12 @@ size_t ring_buffer_write(ring_buffer_t * ring_buffer, size_t size, uint8_t * buf spin_unlock(ring_buffer->lock); break; } - if (sleep_on_unlocking(ring_buffer->wait_queue_writers, &ring_buffer->lock) && ring_buffer->internal_stop) { - ring_buffer->internal_stop = 0; + if (sleep_on_unlocking(ring_buffer->wait_queue_writers, &ring_buffer->lock)) { + if (ring_buffer->internal_stop) { + ring_buffer->internal_stop = 0; + } else { + if (!written) return -ERESTARTSYS; + } break; } } else { diff --git a/kernel/sys/signal.c b/kernel/sys/signal.c index f6d267ec..2ae53486 100644 --- a/kernel/sys/signal.c +++ b/kernel/sys/signal.c @@ -28,51 +28,65 @@ #include #include #include +#include static spin_lock_t sig_lock; -static spin_lock_t sig_lock_b; -char isdeadly[] = { +#define SIG_DISP_Ign 0 +#define SIG_DISP_Term 1 +#define SIG_DISP_Core 2 +#define SIG_DISP_Stop 3 +#define SIG_DISP_Cont 4 + +static char sig_defaults[] = { 0, /* 0? */ - [SIGHUP ] = 1, - [SIGINT ] = 1, - [SIGQUIT ] = 2, - [SIGILL ] = 2, - [SIGTRAP ] = 2, - [SIGABRT ] = 2, - [SIGEMT ] = 2, - [SIGFPE ] = 2, - [SIGKILL ] = 1, - [SIGBUS ] = 2, - [SIGSEGV ] = 2, - [SIGSYS ] = 2, - [SIGPIPE ] = 1, - [SIGALRM ] = 1, - [SIGTERM ] = 1, - [SIGUSR1 ] = 1, - [SIGUSR2 ] = 1, - [SIGCHLD ] = 0, - [SIGPWR ] = 0, - [SIGWINCH ] = 0, - [SIGURG ] = 0, - [SIGPOLL ] = 0, - [SIGSTOP ] = 3, - [SIGTSTP ] = 3, - [SIGCONT ] = 4, - [SIGTTIN ] = 3, - [SIGTTOUT ] = 3, - [SIGVTALRM ] = 1, - [SIGPROF ] = 1, - [SIGXCPU ] = 2, - [SIGXFSZ ] = 2, - [SIGWAITING ] = 0, - [SIGDIAF ] = 1, - [SIGHATE ] = 0, - [SIGWINEVENT] = 0, - [SIGCAT ] = 0, + [SIGHUP ] = SIG_DISP_Term, + [SIGINT ] = SIG_DISP_Term, + [SIGQUIT ] = SIG_DISP_Core, + [SIGILL ] = SIG_DISP_Core, + [SIGTRAP ] = SIG_DISP_Core, + [SIGABRT ] = SIG_DISP_Core, + [SIGEMT ] = SIG_DISP_Core, + [SIGFPE ] = SIG_DISP_Core, + [SIGKILL ] = SIG_DISP_Term, + [SIGBUS ] = SIG_DISP_Core, + [SIGSEGV ] = SIG_DISP_Core, + [SIGSYS ] = SIG_DISP_Core, + [SIGPIPE ] = SIG_DISP_Term, + [SIGALRM ] = SIG_DISP_Term, + [SIGTERM ] = SIG_DISP_Term, + [SIGUSR1 ] = SIG_DISP_Term, + [SIGUSR2 ] = SIG_DISP_Term, + [SIGCHLD ] = SIG_DISP_Ign, + [SIGPWR ] = SIG_DISP_Ign, + [SIGWINCH ] = SIG_DISP_Ign, + [SIGURG ] = SIG_DISP_Ign, + [SIGPOLL ] = SIG_DISP_Ign, + [SIGSTOP ] = SIG_DISP_Stop, + [SIGTSTP ] = SIG_DISP_Stop, + [SIGCONT ] = SIG_DISP_Cont, + [SIGTTIN ] = SIG_DISP_Stop, + [SIGTTOUT ] = SIG_DISP_Stop, + [SIGVTALRM ] = SIG_DISP_Term, + [SIGPROF ] = SIG_DISP_Term, + [SIGXCPU ] = SIG_DISP_Core, + [SIGXFSZ ] = SIG_DISP_Core, + [SIGWAITING ] = SIG_DISP_Ign, + [SIGDIAF ] = SIG_DISP_Term, + [SIGHATE ] = SIG_DISP_Ign, + [SIGWINEVENT] = SIG_DISP_Ign, + [SIGCAT ] = SIG_DISP_Ign, }; -void handle_signal(process_t * proc, signal_t * sig, struct regs *r) { +static void maybe_restart_system_call(struct regs * r) { + if (this_core->current_process->interrupted_system_call && arch_syscall_number(r) == -ERESTARTSYS) { + arch_syscall_return(r, this_core->current_process->interrupted_system_call); + this_core->current_process->interrupted_system_call = 0; + syscall_handler(r); + } +} + +int handle_signal(process_t * proc, signal_t * sig, struct regs *r) { uintptr_t handler = sig->handler; uintptr_t signum = sig->signum; free(sig); @@ -83,20 +97,19 @@ void handle_signal(process_t * proc, signal_t * sig, struct regs *r) { } if (proc->flags & PROC_FLAG_FINISHED) { - return; + return 1; } if (signum == 0 || signum >= NUMSIGNALS) { - /* Ignore */ - return; + goto _ignore_signal; } if (!handler) { - char dowhat = isdeadly[signum]; - if (dowhat == 1 || dowhat == 2) { + char dowhat = sig_defaults[signum]; + if (dowhat == SIG_DISP_Term || dowhat == SIG_DISP_Core) { task_exit(((128 + signum) << 8) | signum); __builtin_unreachable(); - } else if (dowhat == 3) { + } else if (dowhat == SIG_DISP_Stop) { __sync_or_and_fetch(&this_core->current_process->flags, PROC_FLAG_SUSPENDED); this_core->current_process->status = 0x7F; @@ -106,21 +119,30 @@ void handle_signal(process_t * proc, signal_t * sig, struct regs *r) { wakeup_queue(parent->wait_queue); } - switch_task(0); - } else if (dowhat == 4) { - switch_task(1); - return; + do { + switch_task(0); + } while (!this_core->current_process->signal_queue->length); + + return 0; /* Return and handle another */ + } else if (dowhat == SIG_DISP_Cont) { + /* Continue doesn't actually do anything different at this stage. */ + goto _ignore_signal; } - /* XXX dowhat == 2: should dump core */ - /* XXX dowhat == 3: stop */ - return; + goto _ignore_signal; } - if (handler == 1) /* Ignore */ { - return; - } + /* If the handler value is 1 we treat it as IGN. */ + if (handler == 1) goto _ignore_signal; arch_enter_signal_handler(handler, signum, r); + return 1; /* Should not be reachable */ + +_ignore_signal: + /* we still need to check if we need to restart something */ + + maybe_restart_system_call(r); + + return !this_core->current_process->signal_queue->length; } int send_signal(pid_t process, int signal, int force_root) { @@ -152,12 +174,15 @@ int send_signal(pid_t process, int signal, int force_root) { return -EINVAL; } - if (!receiver->signals[signal] && !isdeadly[signal]) { - /* If we're blocking a signal and it's not going to kill us, don't deliver it */ + if (!receiver->signals[signal] && !sig_defaults[signal]) { + /* If there is no handler for a signal and its default disposition is IGNORE, + * we don't even bother sending it, to avoid having to interrupt + restart system calls. */ return 0; } - if (isdeadly[signal] == 4) { + if (sig_defaults[signal] == SIG_DISP_Cont) { + /* XXX: I'm not sure this check is necessary? And the SUSPEND flag flip probably + * should be on the receiving end. */ if (!(receiver->flags & PROC_FLAG_SUSPENDED)) { return -EINVAL; } else { @@ -170,12 +195,19 @@ int send_signal(pid_t process, int signal, int force_root) { signal_t * sig = malloc(sizeof(signal_t)); sig->handler = (uintptr_t)receiver->signals[signal]; sig->signum = signal; - memset(&sig->registers_before, 0x00, sizeof(struct regs)); - list_insert(receiver->signal_queue, sig); + spin_lock(sig_lock); + list_insert(receiver->signal_queue, sig); + spin_unlock(sig_lock); + + /* Informs any blocking events that the process has been interrupted + * by a signal, which should trigger those blocking events to complete + * and potentially return -EINTR or -ERESTARTSYS */ process_awaken_signal(receiver); - if (!process_is_ready(receiver) || receiver == this_core->current_process) { + /* Schedule processes awoken by signals to be run. Unless they're us, we'll + * jump to the signal handler as part of returning from this call. */ + if (receiver != this_core->current_process && !process_is_ready(receiver)) { make_process_ready(receiver); } @@ -211,17 +243,27 @@ int group_send_signal(pid_t group, int signal, int force_root) { } void process_check_signals(struct regs * r) { + spin_lock(sig_lock); if (this_core->current_process && !(this_core->current_process->flags & PROC_FLAG_FINISHED) && this_core->current_process->signal_queue && this_core->current_process->signal_queue->length > 0) { + while (1) { + node_t * node = list_dequeue(this_core->current_process->signal_queue); + spin_unlock(sig_lock); - node_t * node = list_dequeue(this_core->current_process->signal_queue); - signal_t * sig = node->value; + signal_t * sig = node->value; + free(node); - free(node); + if (handle_signal((process_t*)this_core->current_process,sig,r)) return; - handle_signal((process_t*)this_core->current_process,sig,r); + spin_lock(sig_lock); + } } + spin_unlock(sig_lock); } +void return_from_signal_handler(struct regs *r) { + arch_return_from_signal_handler(r); + maybe_restart_system_call(r); +} diff --git a/kernel/sys/syscall.c b/kernel/sys/syscall.c index e16a20d5..8fbe1425 100644 --- a/kernel/sys/syscall.c +++ b/kernel/sys/syscall.c @@ -1185,9 +1185,15 @@ void syscall_handler(struct regs * r) { ptrace_signal(SIGTRAP, PTRACE_EVENT_SYSCALL_ENTER); } - arch_syscall_return(r, func( + long result = func( arch_syscall_arg0(r), arch_syscall_arg1(r), arch_syscall_arg2(r), - arch_syscall_arg3(r), arch_syscall_arg4(r))); + arch_syscall_arg3(r), arch_syscall_arg4(r)); + + if (result == -ERESTARTSYS) { + this_core->current_process->interrupted_system_call = arch_syscall_number(r); + } + + arch_syscall_return(r, result); if (this_core->current_process->flags & PROC_FLAG_TRACE_SYSCALLS) { ptrace_signal(SIGTRAP, PTRACE_EVENT_SYSCALL_EXIT); diff --git a/kernel/vfs/pipe.c b/kernel/vfs/pipe.c index 8700186c..607f1f18 100644 --- a/kernel/vfs/pipe.c +++ b/kernel/vfs/pipe.c @@ -130,7 +130,10 @@ ssize_t read_pipe(fs_node_t *node, off_t offset, size_t size, uint8_t *buffer) { wakeup_queue(pipe->wait_queue_writers); /* Deschedule and switch */ if (collected == 0) { - sleep_on_unlocking(pipe->wait_queue_readers, &pipe->lock_read); + if (sleep_on_unlocking(pipe->wait_queue_readers, &pipe->lock_read)) { + if (!collected) return -EINTR; + break; + } } else { spin_unlock(pipe->lock_read); } @@ -162,7 +165,10 @@ ssize_t write_pipe(fs_node_t *node, off_t offset, size_t size, uint8_t *buffer) wakeup_queue(pipe->wait_queue_readers); pipe_alert_waiters(pipe); if (written < size) { - sleep_on_unlocking(pipe->wait_queue_writers, &pipe->lock_read); + if (sleep_on_unlocking(pipe->wait_queue_writers, &pipe->lock_read)) { + if (!written) return -EINTR; + break; + } } else { spin_unlock(pipe->lock_read); } diff --git a/libc/string/strerror.c b/libc/string/strerror.c index 3c094abd..7eea3505 100644 --- a/libc/string/strerror.c +++ b/libc/string/strerror.c @@ -1,7 +1,8 @@ #include #include +#include -static char * _error_strings[256] = { +static char * _error_strings[] = { [EPERM] = "Operation not permitted", [ENOENT] = "No such file or directory", [ESRCH] = "No such process", @@ -117,10 +118,15 @@ static char * _error_strings[256] = { [ENOTRECOVERABLE] = "State not recoverable", [EOWNERDEAD] = "Previous owner died", [ESTRPIPE] = "Streams pipe error", + [ERESTARTSYS] = "Restartable system call was interrupted", }; +static char _error_string[100]; char * strerror(int errnum) { - if (errnum > 255) return "???"; - if (!_error_strings[errnum]) return "???"; - return _error_strings[errnum]; + char * str = (errnum >= 0 && (size_t)errnum < sizeof(_error_strings) / sizeof(*_error_strings)) ? _error_strings[errnum] : NULL; + if (!str) { + snprintf(_error_string, 100, "%d", errnum); + return _error_string; + } + return str; }