1460 lines
45 KiB
C
1460 lines
45 KiB
C
/**
|
|
* @file kernel/sys/process.c
|
|
* @brief Task switch and thread scheduling.
|
|
*
|
|
* Implements the primary scheduling primitives for the kernel.
|
|
*
|
|
* Generally, what the kernel refers to as a "process" is an individual thread.
|
|
* The POSIX concept of a "process" is represented in Misaka as a collection of
|
|
* threads and their shared paging, signal, and file descriptor tables.
|
|
*
|
|
* Kernel threads are also "processes", referred to as "tasklets".
|
|
*
|
|
* Misaka allows nested kernel preemption, and task switching involves saving
|
|
* kernel state in a manner similar to setjmp/longjmp, as well as saving the
|
|
* outer context in the case of a nested task switch.
|
|
*
|
|
* @copyright
|
|
* This file is part of ToaruOS and is released under the terms
|
|
* of the NCSA / University of Illinois License - see LICENSE.md
|
|
* Copyright (C) 2011-2021 K. Lange
|
|
* Copyright (C) 2012 Markus Schober
|
|
* Copyright (C) 2015 Dale Weiler
|
|
*/
|
|
#include <errno.h>
|
|
#include <kernel/assert.h>
|
|
#include <kernel/process.h>
|
|
#include <kernel/printf.h>
|
|
#include <kernel/string.h>
|
|
#include <kernel/vfs.h>
|
|
#include <kernel/spinlock.h>
|
|
#include <kernel/tree.h>
|
|
#include <kernel/list.h>
|
|
#include <kernel/mmu.h>
|
|
#include <kernel/shm.h>
|
|
#include <kernel/signal.h>
|
|
#include <kernel/time.h>
|
|
#include <kernel/misc.h>
|
|
#include <kernel/syscall.h>
|
|
#include <sys/wait.h>
|
|
#include <sys/signal_defs.h>
|
|
|
|
/* FIXME: This only needs the size of the regs struct... */
|
|
#if defined(__x86_64__)
|
|
#include <kernel/arch/x86_64/regs.h>
|
|
#elif defined(__aarch64__)
|
|
#include <kernel/arch/aarch64/regs.h>
|
|
#else
|
|
#error "no regs"
|
|
#endif
|
|
|
|
tree_t * process_tree; /* Stores the parent-child process relationships; the root of this graph is 'init'. */
|
|
list_t * process_list; /* Stores all existing processes. Mostly used for sanity checking or for places where iterating over all processes is useful. */
|
|
list_t * process_queue; /* Scheduler ready queue. This the round-robin source. The head is the next process to run. */
|
|
list_t * sleep_queue; /* Ordered list of processes waiting to be awoken by timeouts. The head is the earliest thread to awaken. */
|
|
list_t * reap_queue; /* Processes that could not be cleaned up and need to be deleted. */
|
|
|
|
struct ProcessorLocal processor_local_data[32] = {0};
|
|
int processor_count = 1;
|
|
|
|
/* The following locks protect access to the process tree, scheduler queue,
|
|
* sleeping, and the very special wait queue... */
|
|
static spin_lock_t tree_lock = { 0 };
|
|
static spin_lock_t process_queue_lock = { 0 };
|
|
static spin_lock_t wait_lock_tmp = { 0 };
|
|
static spin_lock_t sleep_lock = { 0 };
|
|
static spin_lock_t reap_lock = { 0 };
|
|
|
|
void update_process_times(int includeSystem) {
|
|
uint64_t pTime = arch_perf_timer();
|
|
if (this_core->current_process->time_in && this_core->current_process->time_in < pTime) {
|
|
this_core->current_process->time_total += pTime - this_core->current_process->time_in;
|
|
}
|
|
this_core->current_process->time_in = 0;
|
|
|
|
if (includeSystem) {
|
|
if (this_core->current_process->time_switch && this_core->current_process->time_switch < pTime) {
|
|
this_core->current_process->time_sys += pTime - this_core->current_process->time_switch;
|
|
}
|
|
this_core->current_process->time_switch = 0;
|
|
}
|
|
}
|
|
|
|
#define must_have_lock(lck) if (lck.owner != this_core->cpu_id+1) { arch_fatal_prepare(); printf("Failed lock check.\n"); arch_dump_traceback(); arch_fatal(); }
|
|
|
|
/**
|
|
* @brief Restore the context of the next available process's kernel thread.
|
|
*
|
|
* Loads the next ready process from the scheduler queue and resumes it.
|
|
*
|
|
* If no processes are available, the local idle task will be run from the beginning
|
|
* of its function entry.
|
|
*
|
|
* If the next process in the queue has been marked as finished, it will be discard
|
|
* until a non-finished process is found.
|
|
*
|
|
* If the next process is new, it will be marked as started, and its entry point
|
|
* jumped to.
|
|
*
|
|
* For all other cases, the process's stored kernel thread state will be restored
|
|
* and execution will contain in @ref switch_task with a return value of 1.
|
|
*
|
|
* Note that switch_next does not return and should be called only when the current
|
|
* process has been properly added to a scheduling queue, or marked as awaiting cleanup,
|
|
* otherwise its return state if resumed is undefined and generally whatever the state
|
|
* was when that process last entered switch_task.
|
|
*
|
|
* @returns never.
|
|
*/
|
|
void switch_next(void) {
|
|
this_core->previous_process = this_core->current_process;
|
|
update_process_times(1);
|
|
|
|
/* Get the next available process, discarded anything in the queue
|
|
* marked as finished. */
|
|
do {
|
|
this_core->current_process = next_ready_process();
|
|
} while (this_core->current_process->flags & PROC_FLAG_FINISHED);
|
|
|
|
this_core->current_process->time_in = arch_perf_timer();
|
|
this_core->current_process->time_switch = this_core->current_process->time_in;
|
|
|
|
/* Restore paging and task switch context. */
|
|
mmu_set_directory(this_core->current_process->thread.page_directory->directory);
|
|
arch_set_kernel_stack(this_core->current_process->image.stack);
|
|
|
|
if ((this_core->current_process->flags & PROC_FLAG_FINISHED) || (!this_core->current_process->signal_queue)) {
|
|
arch_fatal_prepare();
|
|
printf("Should not have this process...\n");
|
|
if (this_core->current_process->flags & PROC_FLAG_FINISHED) printf("It is marked finished.\n");
|
|
if (!this_core->current_process->signal_queue) printf("It doesn't have a signal queue.\n");
|
|
arch_dump_traceback();
|
|
arch_fatal();
|
|
__builtin_unreachable();
|
|
}
|
|
|
|
/* Mark the process as running and started. */
|
|
__sync_or_and_fetch(&this_core->current_process->flags, PROC_FLAG_STARTED);
|
|
|
|
asm volatile ("" ::: "memory");
|
|
|
|
/* Jump to next */
|
|
arch_restore_context(&this_core->current_process->thread);
|
|
__builtin_unreachable();
|
|
}
|
|
|
|
extern void * _ret_from_preempt_source;
|
|
|
|
/**
|
|
* @brief Yield the processor to the next available task.
|
|
*
|
|
* Yields the current process, allowing the next to run. Can be called both as
|
|
* part of general preemption or from blocking tasks; in the latter case,
|
|
* the process should be added to a scheduler queue to be awakoen later when the
|
|
* blocking operation is completed and @p reschedule should be set to 0.
|
|
*
|
|
* @param reschedule Non-zero if this process should be added to the ready queue.
|
|
*/
|
|
void switch_task(uint8_t reschedule) {
|
|
|
|
/* switch_task() called but the scheduler isn't enabled? Resume... this is probably a bug. */
|
|
if (!this_core->current_process) return;
|
|
|
|
if (this_core->current_process == this_core->kernel_idle_task && __builtin_return_address(0) != &_ret_from_preempt_source) {
|
|
arch_fatal_prepare();
|
|
printf("Context switch from kernel_idle_task triggered from somewhere other than pre-emption source. Halting.\n");
|
|
printf("This generally means that a driver responding to interrupts has attempted to yield in its interrupt context.\n");
|
|
printf("Ensure that all device drivers which respond to interrupts do so with non-blocking data structures.\n");
|
|
printf(" Return address of switch_task: %p\n", __builtin_return_address(0));
|
|
arch_dump_traceback();
|
|
arch_fatal();
|
|
}
|
|
|
|
/* If a process got to switch_task but was not marked as running, it must be exiting and we don't
|
|
* want to waste time saving context for it. Also, kidle is always resumed from the top of its
|
|
* loop function, so we don't save any context for it either. */
|
|
if (!(this_core->current_process->flags & PROC_FLAG_RUNNING) || (this_core->current_process == this_core->kernel_idle_task)) {
|
|
switch_next();
|
|
return;
|
|
}
|
|
|
|
arch_save_floating((process_t*)this_core->current_process);
|
|
|
|
/* 'setjmp' - save the execution context. When this call returns '1' we are back
|
|
* from a task switch and have been awoken if we were sleeping. */
|
|
if (arch_save_context(&this_core->current_process->thread) == 1) {
|
|
arch_restore_floating((process_t*)this_core->current_process);
|
|
return;
|
|
}
|
|
|
|
/* If this is a normal yield, we reschedule.
|
|
* XXX: Is this going to work okay with SMP? I think this whole thing
|
|
* needs to be wrapped in a lock, but also what if we put the
|
|
* thread into a schedule queue previously but a different core
|
|
* picks it up before we saved the thread context or the FPU state... */
|
|
if (reschedule) {
|
|
make_process_ready((process_t*)this_core->current_process);
|
|
}
|
|
|
|
/* @ref switch_next() does not return. */
|
|
switch_next();
|
|
}
|
|
|
|
/**
|
|
* @brief Initial scheduler datastructures.
|
|
*
|
|
* Called by early system startup to allocate trees and lists
|
|
* the schedule uses to track processes.
|
|
*/
|
|
void initialize_process_tree(void) {
|
|
process_tree = tree_create();
|
|
process_list = list_create("global process list",NULL);
|
|
process_queue = list_create("global scheduler queue",NULL);
|
|
sleep_queue = list_create("global timed sleep queue",NULL);
|
|
reap_queue = list_create("processes awaiting later cleanup",NULL);
|
|
|
|
/* TODO: PID bitset? */
|
|
}
|
|
|
|
/**
|
|
* @brief Determines if a process is alive and valid.
|
|
*
|
|
* Scans @ref process_list to see if @p process is a valid
|
|
* process object or not.
|
|
*
|
|
* XXX This is horribly inefficient, and its very existence
|
|
* is likely indicative of bugs whereever it needed to
|
|
* be called...
|
|
*
|
|
* @param process Process object to check.
|
|
* @returns 1 if the process is valid, 0 if it is not.
|
|
*/
|
|
int is_valid_process(process_t * process) {
|
|
foreach(lnode, process_list) {
|
|
if (lnode->value == process) {
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* @brief Allocate a new file descriptor.
|
|
*
|
|
* Adds a new entry to the file descriptor table for @p proc
|
|
* pointing to the file @p node. The file descriptor's offset
|
|
* and file modes must be set by the caller afterwards.
|
|
*
|
|
* @param proc Process whose file descriptor should be modified.
|
|
* @param node VFS object to add a reference to.
|
|
* @returns the new file descriptor index
|
|
*/
|
|
unsigned long process_append_fd(process_t * proc, fs_node_t * node) {
|
|
spin_lock(proc->fds->lock);
|
|
/* Fill gaps */
|
|
for (unsigned long i = 0; i < proc->fds->length; ++i) {
|
|
if (!proc->fds->entries[i]) {
|
|
proc->fds->entries[i] = node;
|
|
/* modes, offsets must be set by caller */
|
|
proc->fds->modes[i] = 0;
|
|
proc->fds->offsets[i] = 0;
|
|
spin_unlock(proc->fds->lock);
|
|
return i;
|
|
}
|
|
}
|
|
/* No gaps, expand */
|
|
if (proc->fds->length == proc->fds->capacity) {
|
|
proc->fds->capacity *= 2;
|
|
proc->fds->entries = realloc(proc->fds->entries, sizeof(fs_node_t *) * proc->fds->capacity);
|
|
proc->fds->modes = realloc(proc->fds->modes, sizeof(int) * proc->fds->capacity);
|
|
proc->fds->offsets = realloc(proc->fds->offsets, sizeof(uint64_t) * proc->fds->capacity);
|
|
}
|
|
proc->fds->entries[proc->fds->length] = node;
|
|
/* modes, offsets must be set by caller */
|
|
proc->fds->modes[proc->fds->length] = 0;
|
|
proc->fds->offsets[proc->fds->length] = 0;
|
|
proc->fds->length++;
|
|
spin_unlock(proc->fds->lock);
|
|
return proc->fds->length-1;
|
|
}
|
|
|
|
/**
|
|
* @brief Allocate a process identifier.
|
|
*
|
|
* Obtains the next available process identifier.
|
|
*
|
|
* FIXME This used to use a bitset in Toaru32 so it could
|
|
* handle overflow of the pid counter. We need to
|
|
* bring that back.
|
|
*/
|
|
pid_t get_next_pid(void) {
|
|
static pid_t _next_pid = 2;
|
|
return __sync_fetch_and_add(&_next_pid,1);
|
|
}
|
|
|
|
/**
|
|
* @brief The idle task.
|
|
*
|
|
* Sits in a loop forever. Scheduled whenever there is nothing
|
|
* else to do. Actually always enters from the top of the function
|
|
* whenever scheduled, as we don't both to save its state.
|
|
*/
|
|
static void _kidle(void) {
|
|
while (1) {
|
|
arch_pause();
|
|
}
|
|
}
|
|
|
|
static void _kburn(void) {
|
|
while (1) {
|
|
arch_pause();
|
|
#ifndef __aarch64__
|
|
switch_next();
|
|
#endif
|
|
}
|
|
}
|
|
|
|
/**
|
|
* @brief Release a process's paging data.
|
|
*
|
|
* If this is a thread in a POSIX process with other
|
|
* living threads, the directory is not actually released
|
|
* but the reference count for it is decremented.
|
|
*
|
|
* XXX There's probably no reason for this to take an argument;
|
|
* we only ever free directories in two places: on exec, or
|
|
* when a thread exits, and that's always the current thread.
|
|
*/
|
|
void process_release_directory(page_directory_t * dir) {
|
|
spin_lock(dir->lock);
|
|
dir->refcount--;
|
|
if (dir->refcount < 1) {
|
|
mmu_free(dir->directory);
|
|
free(dir);
|
|
} else {
|
|
spin_unlock(dir->lock);
|
|
}
|
|
}
|
|
|
|
process_t * spawn_kidle(int bsp) {
|
|
process_t * idle = calloc(1,sizeof(process_t));
|
|
idle->id = -1;
|
|
idle->name = strdup("[kidle]");
|
|
idle->flags = PROC_FLAG_IS_TASKLET | PROC_FLAG_STARTED | PROC_FLAG_RUNNING;
|
|
idle->image.stack = (uintptr_t)valloc(KERNEL_STACK_SIZE)+ KERNEL_STACK_SIZE;
|
|
mmu_frame_allocate(
|
|
mmu_get_page(idle->image.stack - KERNEL_STACK_SIZE, 0),
|
|
MMU_FLAG_KERNEL);
|
|
|
|
/* TODO arch_initialize_context(uintptr_t) ? */
|
|
idle->thread.context.ip = bsp ? (uintptr_t)&_kidle : (uintptr_t)&_kburn;
|
|
idle->thread.context.sp = idle->image.stack;
|
|
idle->thread.context.bp = idle->image.stack;
|
|
|
|
/* FIXME Why does the idle thread have wait queues and shm mappings?
|
|
* Can we make sure these are never referenced and not allocate them? */
|
|
idle->wait_queue = list_create("process wait queue (kidle)",idle);
|
|
idle->shm_mappings = list_create("process shm mappings (kidle)",idle);
|
|
idle->signal_queue = list_create("process signal queue (kidle)",idle);
|
|
gettimeofday(&idle->start, NULL);
|
|
idle->thread.page_directory = malloc(sizeof(page_directory_t));
|
|
idle->thread.page_directory->refcount = 1;
|
|
idle->thread.page_directory->directory = mmu_clone(this_core->current_pml);
|
|
spin_init(idle->thread.page_directory->lock);
|
|
return idle;
|
|
}
|
|
|
|
process_t * spawn_init(void) {
|
|
process_t * init = calloc(1,sizeof(process_t));
|
|
tree_set_root(process_tree, (void*)init);
|
|
|
|
init->tree_entry = process_tree->root;
|
|
init->id = 1;
|
|
init->group = 0;
|
|
init->job = 1;
|
|
init->session = 1;
|
|
init->name = strdup("init");
|
|
init->cmdline = NULL;
|
|
init->user = USER_ROOT_UID;
|
|
init->real_user = USER_ROOT_UID;
|
|
init->user_group = USER_ROOT_UID;
|
|
init->real_user_group = USER_ROOT_UID;
|
|
init->mask = 022;
|
|
init->status = 0;
|
|
|
|
init->fds = malloc(sizeof(fd_table_t));
|
|
init->fds->refs = 1;
|
|
init->fds->length = 0;
|
|
init->fds->capacity = 4;
|
|
init->fds->entries = malloc(init->fds->capacity * sizeof(fs_node_t *));
|
|
init->fds->modes = malloc(init->fds->capacity * sizeof(int));
|
|
init->fds->offsets = malloc(init->fds->capacity * sizeof(uint64_t));
|
|
spin_init(init->fds->lock);
|
|
|
|
init->wd_node = clone_fs(fs_root);
|
|
init->wd_name = strdup("/");
|
|
|
|
init->image.entry = 0;
|
|
init->image.heap = 0;
|
|
init->image.stack = (uintptr_t)valloc(KERNEL_STACK_SIZE) + KERNEL_STACK_SIZE;
|
|
mmu_frame_allocate(
|
|
mmu_get_page(init->image.stack - KERNEL_STACK_SIZE, 0),
|
|
MMU_FLAG_KERNEL);
|
|
init->image.shm_heap = USER_SHM_LOW;
|
|
|
|
init->flags = PROC_FLAG_STARTED | PROC_FLAG_RUNNING;
|
|
init->wait_queue = list_create("process wait queue (init)", init);
|
|
init->shm_mappings = list_create("process shm mapping (init)", init);
|
|
init->signal_queue = list_create("process signal queue (init)", init);
|
|
|
|
init->sched_node.prev = NULL;
|
|
init->sched_node.next = NULL;
|
|
init->sched_node.value = init;
|
|
|
|
init->sleep_node.prev = NULL;
|
|
init->sleep_node.next = NULL;
|
|
init->sleep_node.value = init;
|
|
|
|
init->timed_sleep_node = NULL;
|
|
|
|
init->thread.page_directory = malloc(sizeof(page_directory_t));
|
|
init->thread.page_directory->refcount = 1;
|
|
init->thread.page_directory->directory = this_core->current_pml;
|
|
spin_init(init->thread.page_directory->lock);
|
|
init->description = strdup("[init]");
|
|
list_insert(process_list, (void*)init);
|
|
|
|
return init;
|
|
}
|
|
|
|
process_t * spawn_process(volatile process_t * parent, int flags) {
|
|
process_t * proc = calloc(1,sizeof(process_t));
|
|
|
|
proc->id = get_next_pid();
|
|
proc->group = proc->id;
|
|
proc->name = strdup(parent->name);
|
|
proc->description = NULL;
|
|
proc->cmdline = parent->cmdline; /* FIXME dup it? */
|
|
|
|
proc->user = parent->user;
|
|
proc->real_user = parent->real_user;
|
|
proc->user_group = parent->user_group;
|
|
proc->real_user_group = parent->real_user_group;
|
|
proc->mask = parent->mask;
|
|
proc->job = parent->job;
|
|
proc->session = parent->session;
|
|
|
|
if (parent->supplementary_group_count) {
|
|
proc->supplementary_group_count = parent->supplementary_group_count;
|
|
proc->supplementary_group_list = malloc(sizeof(gid_t) * proc->supplementary_group_count);
|
|
for (int i = 0; i < proc->supplementary_group_count; ++i) {
|
|
proc->supplementary_group_list[i] = parent->supplementary_group_list[i];
|
|
}
|
|
}
|
|
|
|
proc->thread.context.sp = 0;
|
|
proc->thread.context.bp = 0;
|
|
proc->thread.context.ip = 0;
|
|
memcpy((void*)proc->thread.fp_regs, (void*)parent->thread.fp_regs, 512);
|
|
|
|
/* Entry is only stored for reference. */
|
|
proc->image.entry = parent->image.entry;
|
|
proc->image.heap = parent->image.heap;
|
|
proc->image.stack = (uintptr_t)valloc(KERNEL_STACK_SIZE) + KERNEL_STACK_SIZE;
|
|
mmu_frame_allocate(
|
|
mmu_get_page(proc->image.stack - KERNEL_STACK_SIZE, 0),
|
|
MMU_FLAG_KERNEL);
|
|
proc->image.shm_heap = USER_SHM_LOW;
|
|
|
|
if (flags & PROC_REUSE_FDS) {
|
|
spin_lock(parent->fds->lock);
|
|
proc->fds = parent->fds;
|
|
proc->fds->refs++;
|
|
spin_unlock(parent->fds->lock);
|
|
} else {
|
|
proc->fds = malloc(sizeof(fd_table_t));
|
|
spin_init(proc->fds->lock);
|
|
proc->fds->refs = 1;
|
|
spin_lock(parent->fds->lock);
|
|
proc->fds->length = parent->fds->length;
|
|
proc->fds->capacity = parent->fds->capacity;
|
|
proc->fds->entries = malloc(proc->fds->capacity * sizeof(fs_node_t *));
|
|
proc->fds->modes = malloc(proc->fds->capacity * sizeof(int));
|
|
proc->fds->offsets = malloc(proc->fds->capacity * sizeof(uint64_t));
|
|
for (uint32_t i = 0; i < parent->fds->length; ++i) {
|
|
proc->fds->entries[i] = clone_fs(parent->fds->entries[i]);
|
|
proc->fds->modes[i] = parent->fds->modes[i];
|
|
proc->fds->offsets[i] = parent->fds->offsets[i];
|
|
}
|
|
spin_unlock(parent->fds->lock);
|
|
}
|
|
|
|
proc->wd_node = clone_fs(parent->wd_node);
|
|
proc->wd_name = strdup(parent->wd_name);
|
|
|
|
proc->wait_queue = list_create("process wait queue",proc);
|
|
proc->shm_mappings = list_create("process shm mappings",proc);
|
|
proc->signal_queue = list_create("process signal queue",proc);
|
|
|
|
proc->sched_node.value = proc;
|
|
proc->sleep_node.value = proc;
|
|
|
|
gettimeofday(&proc->start, NULL);
|
|
tree_node_t * entry = tree_node_create(proc);
|
|
proc->tree_entry = entry;
|
|
|
|
spin_lock(tree_lock);
|
|
tree_node_insert_child_node(process_tree, parent->tree_entry, entry);
|
|
list_insert(process_list, (void*)proc);
|
|
spin_unlock(tree_lock);
|
|
return proc;
|
|
}
|
|
|
|
extern void tree_remove_reparent_root(tree_t * tree, tree_node_t * node);
|
|
|
|
void process_reap(process_t * proc) {
|
|
if (proc->tracees) {
|
|
while (proc->tracees->length) {
|
|
free(list_pop(proc->tracees));
|
|
}
|
|
free(proc->tracees);
|
|
}
|
|
|
|
/* Unmark the stack bottom's fault detector */
|
|
mmu_frame_allocate(
|
|
mmu_get_page(proc->image.stack - KERNEL_STACK_SIZE, 0),
|
|
MMU_FLAG_KERNEL | MMU_FLAG_WRITABLE);
|
|
|
|
free((void *)(proc->image.stack - KERNEL_STACK_SIZE));
|
|
process_release_directory(proc->thread.page_directory);
|
|
|
|
free(proc->name);
|
|
free(proc);
|
|
}
|
|
|
|
static int process_is_owned(process_t * proc) {
|
|
for (int i = 0; i < processor_count; ++i) {
|
|
if (processor_local_data[i].previous_process == proc ||
|
|
processor_local_data[i].current_process == proc) {
|
|
return 1;
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
void process_reap_later(process_t * proc) {
|
|
spin_lock(reap_lock);
|
|
/* See if we can delete anything */
|
|
while (reap_queue->head) {
|
|
process_t * proc = reap_queue->head->value;
|
|
if (!process_is_owned(proc)) {
|
|
free(list_dequeue(reap_queue));
|
|
process_reap(proc);
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
/* And delete this thing later */
|
|
list_insert(reap_queue, proc);
|
|
spin_unlock(reap_lock);
|
|
}
|
|
|
|
/**
|
|
* @brief Remove a process from the valid process list.
|
|
*
|
|
* Deletes a process from both the valid list and the process tree.
|
|
* Any the process has any children, they become orphaned and are
|
|
* moved under 'init', which is awoken if it was blocked on 'waitpid'.
|
|
*
|
|
* Finally, the process is freed.
|
|
*/
|
|
void process_delete(process_t * proc) {
|
|
assert(proc != this_core->current_process);
|
|
|
|
tree_node_t * entry = proc->tree_entry;
|
|
if (!entry) {
|
|
printf("Tried to delete process with no tree entry?\n");
|
|
return;
|
|
}
|
|
if (process_tree->root == entry) {
|
|
printf("Tried to delete process init...\n");
|
|
return;
|
|
}
|
|
|
|
spin_lock(tree_lock);
|
|
int has_children = entry->children->length;
|
|
tree_remove_reparent_root(process_tree, entry);
|
|
list_delete(process_list, list_find(process_list, proc));
|
|
spin_unlock(tree_lock);
|
|
|
|
if (has_children) {
|
|
/* Wake up init */
|
|
process_t * init = process_tree->root->value;
|
|
wakeup_queue(init->wait_queue);
|
|
}
|
|
|
|
// FIXME bitset_clear(&pid_set, proc->id);
|
|
proc->tree_entry = NULL;
|
|
|
|
shm_release_all(proc);
|
|
free(proc->shm_mappings);
|
|
|
|
if (proc->supplementary_group_list) {
|
|
proc->supplementary_group_count = 0;
|
|
free(proc->supplementary_group_list);
|
|
}
|
|
|
|
/* Is someone using this process? */
|
|
for (int i = 0; i < processor_count; ++i) {
|
|
if (i == this_core->cpu_id) continue;
|
|
if (processor_local_data[i].previous_process == proc ||
|
|
processor_local_data[i].current_process == proc) {
|
|
process_reap_later(proc);
|
|
return;
|
|
}
|
|
}
|
|
|
|
process_reap(proc);
|
|
}
|
|
|
|
/**
|
|
* @brief Place an available process in the ready queue.
|
|
*
|
|
* Marks a process as available for general scheduling.
|
|
* If the process was currently in a sleep queue, it is
|
|
* marked as having been interrupted and removed from its
|
|
* owning queue before being moved.
|
|
*
|
|
* The process must not otherwise have been in a scheduling
|
|
* queue before it is placed in the ready queue.
|
|
*/
|
|
void make_process_ready(volatile process_t * proc) {
|
|
int sleep_lock_is_mine = sleep_lock.owner == (this_core->cpu_id + 1);
|
|
if (!sleep_lock_is_mine) spin_lock(sleep_lock);
|
|
if (proc->sleep_node.owner != NULL) {
|
|
if (proc->sleep_node.owner == sleep_queue) {
|
|
/* The sleep queue is slightly special... */
|
|
if (proc->timed_sleep_node) {
|
|
list_delete(sleep_queue, proc->timed_sleep_node);
|
|
proc->sleep_node.owner = NULL;
|
|
free(proc->timed_sleep_node->value);
|
|
}
|
|
} else {
|
|
/* This was blocked on a semaphore we can interrupt. */
|
|
__sync_or_and_fetch(&proc->flags, PROC_FLAG_SLEEP_INT);
|
|
list_delete((list_t*)proc->sleep_node.owner, (node_t*)&proc->sleep_node);
|
|
}
|
|
}
|
|
if (!sleep_lock_is_mine) spin_unlock(sleep_lock);
|
|
|
|
spin_lock(process_queue_lock);
|
|
if (proc->sched_node.owner) {
|
|
/* There's only one ready queue, so this means the process was already ready, which
|
|
* is indicative of a bug somewhere as we shouldn't be added processes to the ready
|
|
* queue multiple times. */
|
|
spin_unlock(process_queue_lock);
|
|
return;
|
|
}
|
|
|
|
list_append(process_queue, (node_t*)&proc->sched_node);
|
|
spin_unlock(process_queue_lock);
|
|
|
|
arch_wakeup_others();
|
|
}
|
|
|
|
/**
|
|
* @brief Pop the next available process from the queue.
|
|
*
|
|
* Gets the next available process from the round-robin scheduling
|
|
* queue. If there is no process to run, the idle task is returned.
|
|
*
|
|
* TODO This needs more locking for SMP...
|
|
*/
|
|
volatile process_t * next_ready_process(void) {
|
|
spin_lock(process_queue_lock);
|
|
|
|
if (!process_queue->head) {
|
|
if (process_queue->length) {
|
|
arch_fatal_prepare();
|
|
printf("Queue has a length but head is NULL\n");
|
|
arch_dump_traceback();
|
|
arch_fatal();
|
|
}
|
|
spin_unlock(process_queue_lock);
|
|
return this_core->kernel_idle_task;
|
|
}
|
|
|
|
node_t * np = list_dequeue(process_queue);
|
|
|
|
if ((uintptr_t)np < 0xFFFFff0000000000UL || (uintptr_t)np > 0xFFFFfff000000000UL) {
|
|
arch_fatal_prepare();
|
|
printf("Suspicious pointer in queue: %#zx\n", (uintptr_t)np);
|
|
arch_dump_traceback();
|
|
arch_fatal();
|
|
}
|
|
volatile process_t * next = np->value;
|
|
|
|
if ((next->flags & PROC_FLAG_RUNNING) && (next->owner != this_core->cpu_id)) {
|
|
/* We pulled a process too soon, switch to idle for a bit so the
|
|
* core that marked this process as ready can finish switching away from it. */
|
|
list_append(process_queue, (node_t*)&next->sched_node);
|
|
spin_unlock(process_queue_lock);
|
|
return this_core->kernel_idle_task;
|
|
}
|
|
|
|
spin_unlock(process_queue_lock);
|
|
|
|
if (!(next->flags & PROC_FLAG_FINISHED)) {
|
|
__sync_or_and_fetch(&next->flags, PROC_FLAG_RUNNING);
|
|
}
|
|
|
|
next->owner = this_core->cpu_id;
|
|
|
|
return next;
|
|
}
|
|
|
|
/**
|
|
* @brief Signal a semaphore.
|
|
*
|
|
* Okay, so toaru32 used these general-purpose lists of processes
|
|
* as a sort of sempahore system, so often when you see 'queue' it
|
|
* can be read as 'semaphore' and be equally valid (outside of the
|
|
* 'ready queue', I guess). This will awaken all processes currently
|
|
* in the semaphore @p queue, unless they were marked as finished in
|
|
* which case they will be discarded.
|
|
*
|
|
* Note that these "semaphore queues" are binary semaphores - simple
|
|
* locks, but with smarter logic than the "spin_lock" primitive also
|
|
* used throughout the kernel, as that just blindly switches tasks
|
|
* until its atomic swap succeeds.
|
|
*
|
|
* @param queue The semaphore to signal
|
|
* @returns the number of processes successfully awoken
|
|
*/
|
|
int wakeup_queue(list_t * queue) {
|
|
int awoken_processes = 0;
|
|
spin_lock(wait_lock_tmp);
|
|
while (queue->length > 0) {
|
|
node_t * node = list_pop(queue);
|
|
spin_unlock(wait_lock_tmp);
|
|
if (!(((process_t *)node->value)->flags & PROC_FLAG_FINISHED)) {
|
|
make_process_ready(node->value);
|
|
}
|
|
spin_lock(wait_lock_tmp);
|
|
awoken_processes++;
|
|
}
|
|
spin_unlock(wait_lock_tmp);
|
|
return awoken_processes;
|
|
}
|
|
|
|
/**
|
|
* @brief Signal a semaphore, exceptionally.
|
|
*
|
|
* Wake up everything in the semaphore @p queue but mark every
|
|
* waiter as having been interrupted, rather than gracefully awoken.
|
|
* Generally that means the event they were waiting for did not
|
|
* happen and may never happen.
|
|
*
|
|
* Otherwise, same semantics as @ref wakeup_queue.
|
|
*/
|
|
int wakeup_queue_interrupted(list_t * queue) {
|
|
int awoken_processes = 0;
|
|
spin_lock(wait_lock_tmp);
|
|
while (queue->length > 0) {
|
|
node_t * node = list_pop(queue);
|
|
spin_unlock(wait_lock_tmp);
|
|
if (!(((process_t *)node->value)->flags & PROC_FLAG_FINISHED)) {
|
|
process_t * proc = node->value;
|
|
__sync_or_and_fetch(&proc->flags, PROC_FLAG_SLEEP_INT);
|
|
make_process_ready(proc);
|
|
}
|
|
spin_lock(wait_lock_tmp);
|
|
awoken_processes++;
|
|
}
|
|
spin_unlock(wait_lock_tmp);
|
|
return awoken_processes;
|
|
}
|
|
|
|
int wakeup_queue_one(list_t * queue) {
|
|
int awoken_processes = 0;
|
|
spin_lock(wait_lock_tmp);
|
|
if (queue->length > 0) {
|
|
node_t * node = list_pop(queue);
|
|
spin_unlock(wait_lock_tmp);
|
|
if (!(((process_t *)node->value)->flags & PROC_FLAG_FINISHED)) {
|
|
make_process_ready(node->value);
|
|
}
|
|
spin_lock(wait_lock_tmp);
|
|
awoken_processes++;
|
|
}
|
|
spin_unlock(wait_lock_tmp);
|
|
return awoken_processes;
|
|
}
|
|
|
|
/**
|
|
* @brief Wait for a binary semaphore.
|
|
*
|
|
* Wait for an event with everyone else in @p queue.
|
|
*
|
|
* @returns 1 if the wait was interrupted (eg. the event did not occur); 0 otherwise.
|
|
*/
|
|
int sleep_on(list_t * queue) {
|
|
if (this_core->current_process->sleep_node.owner) {
|
|
switch_task(0);
|
|
return 0;
|
|
}
|
|
__sync_and_and_fetch(&this_core->current_process->flags, ~(PROC_FLAG_SLEEP_INT));
|
|
spin_lock(wait_lock_tmp);
|
|
list_append(queue, (node_t*)&this_core->current_process->sleep_node);
|
|
spin_unlock(wait_lock_tmp);
|
|
switch_task(0);
|
|
return !!(this_core->current_process->flags & PROC_FLAG_SLEEP_INT);
|
|
}
|
|
|
|
int sleep_on_unlocking(list_t * queue, spin_lock_t * release) {
|
|
__sync_and_and_fetch(&this_core->current_process->flags, ~(PROC_FLAG_SLEEP_INT));
|
|
spin_lock(wait_lock_tmp);
|
|
list_append(queue, (node_t*)&this_core->current_process->sleep_node);
|
|
spin_unlock(wait_lock_tmp);
|
|
|
|
spin_unlock(*release);
|
|
|
|
switch_task(0);
|
|
return !!(this_core->current_process->flags & PROC_FLAG_SLEEP_INT);
|
|
}
|
|
|
|
/**
|
|
* @brief Indicates whether a process is ready to be run but not currently running.
|
|
*/
|
|
int process_is_ready(process_t * proc) {
|
|
return (proc->sched_node.owner != NULL && !(proc->flags & PROC_FLAG_RUNNING));
|
|
}
|
|
|
|
int process_alert_node_locked(process_t * process, void * value);
|
|
|
|
/**
|
|
* @brief Wake up processes that were sleeping on timers.
|
|
*
|
|
* Reschedule all processes whose timed waits have expired as of
|
|
* the time indicated by @p seconds and @p subseconds. If the sleep
|
|
* was part of an fswait system call timing out, the call is marked
|
|
* as timed out before the process is rescheduled.
|
|
*/
|
|
void wakeup_sleepers(unsigned long seconds, unsigned long subseconds) {
|
|
spin_lock(sleep_lock);
|
|
if (sleep_queue->length) {
|
|
sleeper_t * proc = ((sleeper_t *)sleep_queue->head->value);
|
|
while (proc && (proc->end_tick < seconds || (proc->end_tick == seconds && proc->end_subtick <= subseconds))) {
|
|
|
|
if (proc->is_fswait) {
|
|
proc->is_fswait = -1;
|
|
process_alert_node_locked(proc->process,proc);
|
|
} else {
|
|
process_t * process = proc->process;
|
|
process->sleep_node.owner = NULL;
|
|
process->timed_sleep_node = NULL;
|
|
if (!process_is_ready(process)) {
|
|
make_process_ready(process);
|
|
}
|
|
}
|
|
free(proc);
|
|
free(list_dequeue(sleep_queue));
|
|
if (sleep_queue->length) {
|
|
proc = ((sleeper_t *)sleep_queue->head->value);
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
spin_unlock(sleep_lock);
|
|
}
|
|
|
|
/**
|
|
* @brief Wait until a given time.
|
|
*
|
|
* Suspends the current process until the given time. The process may
|
|
* still be resumed by a signal or other mechanism, in which case the
|
|
* sleep will not be resumed by the kernel.
|
|
*/
|
|
void sleep_until(process_t * process, unsigned long seconds, unsigned long subseconds) {
|
|
spin_lock(sleep_lock);
|
|
if (this_core->current_process->sleep_node.owner) {
|
|
spin_unlock(sleep_lock);
|
|
/* Can't sleep, sleeping already */
|
|
return;
|
|
}
|
|
process->sleep_node.owner = sleep_queue;
|
|
|
|
node_t * before = NULL;
|
|
foreach(node, sleep_queue) {
|
|
sleeper_t * candidate = ((sleeper_t *)node->value);
|
|
if (!candidate) {
|
|
printf("null candidate?\n");
|
|
continue;
|
|
}
|
|
if (candidate->end_tick > seconds || (candidate->end_tick == seconds && candidate->end_subtick > subseconds)) {
|
|
break;
|
|
}
|
|
before = node;
|
|
}
|
|
sleeper_t * proc = malloc(sizeof(sleeper_t));
|
|
proc->process = process;
|
|
proc->end_tick = seconds;
|
|
proc->end_subtick = subseconds;
|
|
proc->is_fswait = 0;
|
|
process->timed_sleep_node = list_insert_after(sleep_queue, before, proc);
|
|
spin_unlock(sleep_lock);
|
|
}
|
|
|
|
uint8_t process_compare(void * proc_v, void * pid_v) {
|
|
pid_t pid = (*(pid_t *)pid_v);
|
|
process_t * proc = (process_t *)proc_v;
|
|
|
|
return (uint8_t)(proc->id == pid);
|
|
}
|
|
|
|
process_t * process_from_pid(pid_t pid) {
|
|
if (pid < 0) return NULL;
|
|
|
|
spin_lock(tree_lock);
|
|
tree_node_t * entry = tree_find(process_tree,&pid,process_compare);
|
|
spin_unlock(tree_lock);
|
|
if (entry) {
|
|
return (process_t *)entry->value;
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
|
|
long process_move_fd(process_t * proc, long src, long dest) {
|
|
if ((size_t)src >= proc->fds->length || (dest != -1 && (size_t)dest >= proc->fds->length)) {
|
|
return -1;
|
|
}
|
|
if (dest == -1) {
|
|
dest = process_append_fd(proc, NULL);
|
|
}
|
|
if (proc->fds->entries[dest] != proc->fds->entries[src]) {
|
|
close_fs(proc->fds->entries[dest]);
|
|
proc->fds->entries[dest] = proc->fds->entries[src];
|
|
proc->fds->modes[dest] = proc->fds->modes[src];
|
|
proc->fds->offsets[dest] = proc->fds->offsets[src];
|
|
open_fs(proc->fds->entries[dest], 0);
|
|
}
|
|
return dest;
|
|
}
|
|
|
|
void tasking_start(void) {
|
|
this_core->current_process = spawn_init();
|
|
this_core->kernel_idle_task = spawn_kidle(1);
|
|
}
|
|
|
|
static int wait_candidate(volatile process_t * parent, int pid, int options, volatile process_t * proc) {
|
|
if (!proc) return 0;
|
|
|
|
if (options & WNOKERN) {
|
|
/* Skip kernel processes */
|
|
if (proc->flags & PROC_FLAG_IS_TASKLET) return 0;
|
|
}
|
|
|
|
if (pid < -1) {
|
|
if (proc->job == -pid || proc->id == -pid) return 1;
|
|
} else if (pid == 0) {
|
|
/* Matches our group ID */
|
|
if (proc->job == parent->id) return 1;
|
|
} else if (pid > 0) {
|
|
/* Specific pid */
|
|
if (proc->id == pid) return 1;
|
|
} else {
|
|
return 1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int waitpid(int pid, int * status, int options) {
|
|
volatile process_t * volatile proc = (process_t*)this_core->current_process;
|
|
#if 0
|
|
if (proc->group) {
|
|
proc = process_from_pid(proc->group);
|
|
}
|
|
#endif
|
|
|
|
do {
|
|
volatile process_t * candidate = NULL;
|
|
int has_children = 0;
|
|
int is_parent = 0;
|
|
|
|
spin_lock(proc->wait_lock);
|
|
|
|
/* First, find out if there is anyone to reap */
|
|
foreach(node, proc->tree_entry->children) {
|
|
if (!node->value) {
|
|
continue;
|
|
}
|
|
volatile process_t * volatile child = ((tree_node_t *)node->value)->value;
|
|
|
|
if (wait_candidate(proc, pid, options, child)) {
|
|
has_children = 1;
|
|
is_parent = 1;
|
|
if (child->flags & PROC_FLAG_FINISHED) {
|
|
candidate = child;
|
|
break;
|
|
}
|
|
if ((options & WSTOPPED) && child->flags & PROC_FLAG_SUSPENDED) {
|
|
candidate = child;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!candidate && proc->tracees) {
|
|
foreach(node, proc->tracees) {
|
|
process_t * child = node->value;
|
|
if (wait_candidate(proc,pid,options,child)) {
|
|
has_children = 1;
|
|
if (child->flags & (PROC_FLAG_SUSPENDED | PROC_FLAG_FINISHED)) {
|
|
candidate = child;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!has_children) {
|
|
/* No valid children matching this description */
|
|
spin_unlock(proc->wait_lock);
|
|
return -ECHILD;
|
|
}
|
|
|
|
if (candidate) {
|
|
spin_unlock(proc->wait_lock);
|
|
if (status) {
|
|
*status = candidate->status;
|
|
}
|
|
int pid = candidate->id;
|
|
if (is_parent && (candidate->flags & PROC_FLAG_FINISHED)) {
|
|
while (*((volatile int *)&candidate->flags) & PROC_FLAG_RUNNING);
|
|
proc->time_children += candidate->time_children + candidate->time_total;
|
|
proc->time_sys_children += candidate->time_sys_children + candidate->time_sys;
|
|
process_delete((process_t*)candidate);
|
|
}
|
|
return pid;
|
|
} else {
|
|
if (options & WNOHANG) {
|
|
spin_unlock(proc->wait_lock);
|
|
return 0;
|
|
}
|
|
/* Wait */
|
|
if (sleep_on_unlocking(proc->wait_queue, &proc->wait_lock) != 0) {
|
|
return -EINTR;
|
|
}
|
|
}
|
|
} while (1);
|
|
}
|
|
|
|
int process_timeout_sleep(process_t * process, int timeout) {
|
|
unsigned long s, ss;
|
|
relative_time(0, timeout * 1000, &s, &ss);
|
|
|
|
node_t * before = NULL;
|
|
foreach(node, sleep_queue) {
|
|
sleeper_t * candidate = ((sleeper_t *)node->value);
|
|
if (candidate->end_tick > s || (candidate->end_tick == s && candidate->end_subtick > ss)) {
|
|
break;
|
|
}
|
|
before = node;
|
|
}
|
|
sleeper_t * proc = malloc(sizeof(sleeper_t));
|
|
proc->process = process;
|
|
proc->end_tick = s;
|
|
proc->end_subtick = ss;
|
|
proc->is_fswait = 1;
|
|
list_insert(((process_t *)process)->node_waits, proc);
|
|
process->timeout_node = list_insert_after(sleep_queue, before, proc);
|
|
|
|
return 0;
|
|
}
|
|
|
|
int process_wait_nodes(process_t * process,fs_node_t * nodes[], int timeout) {
|
|
fs_node_t ** n = nodes;
|
|
int index = 0;
|
|
if (*n) {
|
|
do {
|
|
int result = selectcheck_fs(*n);
|
|
if (result < 0) {
|
|
return -1;
|
|
}
|
|
if (result == 0) {
|
|
return index;
|
|
}
|
|
n++;
|
|
index++;
|
|
} while (*n);
|
|
}
|
|
|
|
if (timeout == 0) {
|
|
return -2;
|
|
}
|
|
|
|
n = nodes;
|
|
|
|
spin_lock(sleep_lock);
|
|
spin_lock(process->sched_lock);
|
|
process->node_waits = list_create("process fswaiters",process);
|
|
if (*n) {
|
|
do {
|
|
if (selectwait_fs(*n, process) < 0) {
|
|
printf("bad selectwait?\n");
|
|
}
|
|
n++;
|
|
} while (*n);
|
|
}
|
|
|
|
if (timeout > 0) {
|
|
process_timeout_sleep(process, timeout);
|
|
} else {
|
|
process->timeout_node = NULL;
|
|
}
|
|
|
|
process->awoken_index = -1;
|
|
spin_unlock(process->sched_lock);
|
|
spin_unlock(sleep_lock);
|
|
|
|
/* Wait. */
|
|
switch_task(0);
|
|
|
|
return process->awoken_index;
|
|
}
|
|
|
|
int process_awaken_from_fswait(process_t * process, int index) {
|
|
must_have_lock(sleep_lock);
|
|
|
|
process->awoken_index = index;
|
|
list_free(process->node_waits);
|
|
free(process->node_waits);
|
|
process->node_waits = NULL;
|
|
|
|
if (process->timeout_node && process->timeout_node->owner == sleep_queue) {
|
|
sleeper_t * proc = process->timeout_node->value;
|
|
if (proc->is_fswait != -1) {
|
|
list_delete(sleep_queue, process->timeout_node);
|
|
free(process->timeout_node->value);
|
|
free(process->timeout_node);
|
|
}
|
|
}
|
|
process->timeout_node = NULL;
|
|
|
|
make_process_ready(process);
|
|
spin_unlock(process->sched_lock);
|
|
return 0;
|
|
}
|
|
|
|
void process_awaken_signal(process_t * process) {
|
|
spin_lock(sleep_lock);
|
|
spin_lock(process->sched_lock);
|
|
if (process->node_waits) {
|
|
process_awaken_from_fswait(process, -1);
|
|
} else {
|
|
spin_unlock(process->sched_lock);
|
|
}
|
|
spin_unlock(sleep_lock);
|
|
}
|
|
|
|
int process_alert_node_locked(process_t * process, void * value) {
|
|
must_have_lock(sleep_lock);
|
|
|
|
if (!is_valid_process(process)) {
|
|
dprintf("core %d (pid=%d %s) attempted to alert invalid process %#zx\n",
|
|
this_core->cpu_id, this_core->current_process->id, this_core->current_process->name,
|
|
(uintptr_t)process);
|
|
return 0;
|
|
}
|
|
|
|
spin_lock(process->sched_lock);
|
|
|
|
if (!process->node_waits) {
|
|
spin_unlock(process->sched_lock);
|
|
return 0; /* Possibly already returned. Wait for another call. */
|
|
}
|
|
|
|
int index = 0;
|
|
foreach(node, process->node_waits) {
|
|
if (value == node->value) {
|
|
return process_awaken_from_fswait(process, index);
|
|
}
|
|
index++;
|
|
}
|
|
|
|
spin_unlock(process->sched_lock);
|
|
return -1;
|
|
}
|
|
|
|
int process_alert_node(process_t * process, void * value) {
|
|
spin_lock(sleep_lock);
|
|
int result = process_alert_node_locked(process, value);
|
|
spin_unlock(sleep_lock);
|
|
return result;
|
|
}
|
|
|
|
process_t * process_get_parent(process_t * process) {
|
|
process_t * result = NULL;
|
|
spin_lock(tree_lock);
|
|
|
|
tree_node_t * entry = process->tree_entry;
|
|
|
|
if (entry->parent) {
|
|
result = entry->parent->value;
|
|
}
|
|
|
|
spin_unlock(tree_lock);
|
|
return result;
|
|
}
|
|
|
|
void task_exit(int retval) {
|
|
this_core->current_process->status = retval;
|
|
|
|
/* free whatever we can */
|
|
list_free(this_core->current_process->wait_queue);
|
|
free(this_core->current_process->wait_queue);
|
|
list_free(this_core->current_process->signal_queue);
|
|
free(this_core->current_process->signal_queue);
|
|
free(this_core->current_process->wd_name);
|
|
if (this_core->current_process->node_waits) {
|
|
list_free(this_core->current_process->node_waits);
|
|
free(this_core->current_process->node_waits);
|
|
this_core->current_process->node_waits = NULL;
|
|
}
|
|
|
|
if (this_core->current_process->fds) {
|
|
spin_lock(this_core->current_process->fds->lock);
|
|
this_core->current_process->fds->refs--;
|
|
if (this_core->current_process->fds->refs == 0) {
|
|
for (uint32_t i = 0; i < this_core->current_process->fds->length; ++i) {
|
|
if (this_core->current_process->fds->entries[i]) {
|
|
close_fs(this_core->current_process->fds->entries[i]);
|
|
this_core->current_process->fds->entries[i] = NULL;
|
|
}
|
|
}
|
|
free(this_core->current_process->fds->entries);
|
|
free(this_core->current_process->fds->offsets);
|
|
free(this_core->current_process->fds->modes);
|
|
free(this_core->current_process->fds);
|
|
this_core->current_process->fds = NULL;
|
|
} else {
|
|
spin_unlock(this_core->current_process->fds->lock);
|
|
}
|
|
}
|
|
|
|
if (this_core->current_process->tracees) {
|
|
spin_lock(this_core->current_process->wait_lock);
|
|
while (this_core->current_process->tracees->length) {
|
|
node_t * n = list_pop(this_core->current_process->tracees);
|
|
process_t * tracee = n->value;
|
|
free(n);
|
|
if (is_valid_process(tracee)) {
|
|
tracee->tracer = 0;
|
|
__sync_and_and_fetch(&tracee->flags, ~(PROC_FLAG_TRACE_SIGNALS | PROC_FLAG_TRACE_SYSCALLS));
|
|
if (tracee->flags & PROC_FLAG_SUSPENDED) {
|
|
tracee->status = 0;
|
|
__sync_and_and_fetch(&tracee->flags, ~(PROC_FLAG_SUSPENDED));
|
|
make_process_ready(tracee);
|
|
}
|
|
}
|
|
}
|
|
spin_unlock(this_core->current_process->wait_lock);
|
|
}
|
|
|
|
update_process_times(1);
|
|
|
|
process_t * parent = process_get_parent((process_t *)this_core->current_process);
|
|
__sync_or_and_fetch(&this_core->current_process->flags, PROC_FLAG_FINISHED);
|
|
|
|
if (this_core->current_process->tracer) {
|
|
process_t * tracer = process_from_pid(this_core->current_process->tracer);
|
|
if (tracer && tracer != parent) {
|
|
spin_lock(tracer->wait_lock);
|
|
wakeup_queue(tracer->wait_queue);
|
|
spin_unlock(tracer->wait_lock);
|
|
}
|
|
}
|
|
|
|
if (parent && !(parent->flags & PROC_FLAG_FINISHED)) {
|
|
spin_lock(parent->wait_lock);
|
|
send_signal(parent->group, SIGCHLD, 1);
|
|
wakeup_queue(parent->wait_queue);
|
|
spin_unlock(parent->wait_lock);
|
|
}
|
|
|
|
switch_next();
|
|
}
|
|
|
|
#define PUSH(stack, type, item) stack -= sizeof(type); \
|
|
*((volatile type *) stack) = item
|
|
|
|
pid_t fork(void) {
|
|
uintptr_t sp, bp;
|
|
process_t * parent = (process_t*)this_core->current_process;
|
|
union PML * directory = mmu_clone(parent->thread.page_directory->directory);
|
|
process_t * new_proc = spawn_process(parent, 0);
|
|
new_proc->thread.page_directory = malloc(sizeof(page_directory_t));
|
|
new_proc->thread.page_directory->refcount = 1;
|
|
new_proc->thread.page_directory->directory = directory;
|
|
spin_init(new_proc->thread.page_directory->lock);
|
|
|
|
struct regs r;
|
|
memcpy(&r, parent->syscall_registers, sizeof(struct regs));
|
|
sp = new_proc->image.stack;
|
|
bp = sp;
|
|
|
|
arch_syscall_return(&r, 0);
|
|
PUSH(sp, struct regs, r);
|
|
|
|
new_proc->syscall_registers = (void*)sp;
|
|
new_proc->thread.context.sp = sp;
|
|
new_proc->thread.context.bp = bp;
|
|
new_proc->thread.context.tls_base = parent->thread.context.tls_base;
|
|
new_proc->thread.context.ip = (uintptr_t)&arch_resume_user;
|
|
arch_save_context(&parent->thread);
|
|
memcpy(new_proc->thread.context.saved, parent->thread.context.saved, sizeof(parent->thread.context.saved));
|
|
|
|
#if 0
|
|
printf("fork(): resuming with register context\n");
|
|
extern void aarch64_regs(struct regs *);
|
|
aarch64_regs(&r);
|
|
printf("fork(): and arch context:\n");
|
|
extern void aarch64_context(process_t * proc);
|
|
aarch64_context(new_proc);
|
|
#endif
|
|
|
|
if (parent->flags & PROC_FLAG_IS_TASKLET) new_proc->flags |= PROC_FLAG_IS_TASKLET;
|
|
make_process_ready(new_proc);
|
|
return new_proc->id;
|
|
}
|
|
|
|
pid_t clone(uintptr_t new_stack, uintptr_t thread_func, uintptr_t arg) {
|
|
uintptr_t sp, bp;
|
|
process_t * parent = (process_t *)this_core->current_process;
|
|
process_t * new_proc = spawn_process(this_core->current_process, 1);
|
|
new_proc->thread.page_directory = this_core->current_process->thread.page_directory;
|
|
spin_lock(new_proc->thread.page_directory->lock);
|
|
new_proc->thread.page_directory->refcount++;
|
|
spin_unlock(new_proc->thread.page_directory->lock);
|
|
|
|
struct regs r;
|
|
memcpy(&r, parent->syscall_registers, sizeof(struct regs));
|
|
sp = new_proc->image.stack;
|
|
bp = sp;
|
|
|
|
/* Set the gid */
|
|
if (this_core->current_process->group) {
|
|
new_proc->group = this_core->current_process->group;
|
|
} else {
|
|
/* We are the session leader */
|
|
new_proc->group = this_core->current_process->id;
|
|
}
|
|
|
|
/* different calling convention */
|
|
#if defined(__x86_64__)
|
|
r.rdi = arg;
|
|
PUSH(new_stack, uintptr_t, (uintptr_t)0xFFFFB00F);
|
|
#elif defined(__aarch64__)
|
|
r.x0 = arg;
|
|
r.x30 = 0xFFFFB00F;
|
|
#endif
|
|
PUSH(sp, struct regs, r);
|
|
new_proc->syscall_registers = (void*)sp;
|
|
#if defined(__x86_64__)
|
|
new_proc->syscall_registers->rsp = new_stack;
|
|
new_proc->syscall_registers->rbp = new_stack;
|
|
new_proc->syscall_registers->rip = thread_func;
|
|
#elif defined(__aarch64__)
|
|
new_proc->syscall_registers->user_sp = new_stack;
|
|
new_proc->syscall_registers->x29 = new_stack;
|
|
new_proc->thread.context.saved[10] = thread_func;
|
|
#endif
|
|
new_proc->thread.context.sp = sp;
|
|
new_proc->thread.context.bp = bp;
|
|
new_proc->thread.context.tls_base = this_core->current_process->thread.context.tls_base;
|
|
new_proc->thread.context.ip = (uintptr_t)&arch_resume_user;
|
|
if (parent->flags & PROC_FLAG_IS_TASKLET) new_proc->flags |= PROC_FLAG_IS_TASKLET;
|
|
make_process_ready(new_proc);
|
|
return new_proc->id;
|
|
}
|
|
|
|
process_t * spawn_worker_thread(void (*entrypoint)(void * argp), const char * name, void * argp) {
|
|
process_t * proc = calloc(1,sizeof(process_t));
|
|
|
|
proc->flags = PROC_FLAG_IS_TASKLET | PROC_FLAG_STARTED;
|
|
|
|
proc->id = get_next_pid();
|
|
proc->group = proc->id;
|
|
proc->name = strdup(name);
|
|
proc->description = NULL;
|
|
proc->cmdline = NULL;
|
|
|
|
/* Are these necessary for tasklets? Should probably all be zero. */
|
|
proc->user = 0;
|
|
proc->real_user = 0;
|
|
proc->user_group = 0;
|
|
proc->real_user_group = 0;
|
|
proc->mask = 0;
|
|
proc->job = proc->id;
|
|
proc->session = proc->id;
|
|
|
|
proc->thread.page_directory = malloc(sizeof(page_directory_t));
|
|
proc->thread.page_directory->refcount = 1;
|
|
proc->thread.page_directory->directory = mmu_clone(mmu_get_kernel_directory());
|
|
spin_init(proc->thread.page_directory->lock);
|
|
|
|
proc->image.stack = (uintptr_t)valloc(KERNEL_STACK_SIZE) + KERNEL_STACK_SIZE;
|
|
PUSH(proc->image.stack, uintptr_t, (uintptr_t)entrypoint);
|
|
PUSH(proc->image.stack, void*, argp);
|
|
|
|
proc->thread.context.sp = proc->image.stack;
|
|
proc->thread.context.bp = proc->image.stack;
|
|
proc->thread.context.ip = (uintptr_t)&arch_enter_tasklet;
|
|
|
|
|
|
proc->wait_queue = list_create("worker thread wait queue",proc);
|
|
proc->shm_mappings = list_create("worker thread shm mappings",proc);
|
|
proc->signal_queue = list_create("worker thread signal queue",proc);
|
|
|
|
proc->sched_node.value = proc;
|
|
proc->sleep_node.value = proc;
|
|
|
|
gettimeofday(&proc->start, NULL);
|
|
tree_node_t * entry = tree_node_create(proc);
|
|
proc->tree_entry = entry;
|
|
|
|
spin_lock(tree_lock);
|
|
tree_node_insert_child_node(process_tree, this_core->current_process->tree_entry, entry);
|
|
list_insert(process_list, (void*)proc);
|
|
spin_unlock(tree_lock);
|
|
|
|
make_process_ready(proc);
|
|
|
|
return proc;
|
|
}
|
|
|
|
static void update_one_process(uint64_t clock_ticks, uint64_t perf_scale, process_t * proc) {
|
|
proc->usage[3] = proc->usage[2];
|
|
proc->usage[2] = proc->usage[1];
|
|
proc->usage[1] = proc->usage[0];
|
|
proc->usage[0] = (1000 * (proc->time_total - proc->time_prev)) / (clock_ticks * perf_scale);
|
|
proc->time_prev = proc->time_total;
|
|
}
|
|
|
|
void update_process_usage(uint64_t clock_ticks, uint64_t perf_scale) {
|
|
spin_lock(tree_lock);
|
|
foreach(lnode, process_list) {
|
|
process_t * proc = lnode->value;
|
|
update_one_process(clock_ticks, perf_scale, proc);
|
|
}
|
|
spin_unlock(tree_lock);
|
|
/* Now use idle tasks to calculator processor activity? */
|
|
for (int i = 0; i < processor_count; ++i) {
|
|
process_t * proc = processor_local_data[i].kernel_idle_task;
|
|
update_one_process(clock_ticks, perf_scale, proc);
|
|
}
|
|
}
|