diff --git a/linker/linker.c b/linker/linker.c index b2a47d10..259bd2bf 100644 --- a/linker/linker.c +++ b/linker/linker.c @@ -1,20 +1,40 @@ -/* This file is part of ToaruOS and is released under the terms +/* vim: ts=4 sw=4 noexpandtab + * This file is part of ToaruOS and is released under the terms * of the NCSA / University of Illinois License - see LICENSE.md - * Copyright (C) 2016-2017 Kevin Lange + * Copyright (C) 2016-2018 Kevin Lange + * + * ELF Dynamic Linker/Loader + * + * Loads ELF executables and links them at runtime to their + * shared library dependencies. + * + * As of writing, this is a simplistic and not-fully-compliant + * implementation of ELF dynamic linking. It suffers from a number + * of issues, including not actually sharing libraries (there + * isn't a sufficient mechanism in the kernel at the moment for + * doing that - we need something with copy-on-write, preferably + * an mmap-file mechanism), as well as not handling symbol + * resolution correctly. + * + * However, it's sufficient for our purposes, and works well enough + * to load Python C modules. */ #include #include #include #include #include +#include #include #include #include + #include -extern char** environ; - - +/* + * When the LD_DEBUG environment variable is set, TRACE_LD messages + * will be printed to stderr + */ #define TRACE_APP_NAME "ld.so" #define TRACE_LD(...) do { if (__trace_ld) { TRACE(__VA_ARGS__); } } while (0) @@ -22,17 +42,22 @@ static int __trace_ld = 0; #include +/* + * This libraries are included in source form to avoid having + * to build separate objects for them and complicate linking, + * since ld is specially built as a static object. + */ #include "../lib/list.c" #include "../lib/hashmap.c" typedef int (*entry_point_t)(int, char *[], char**); -extern char end[]; - +/* Global linking state */ static hashmap_t * dumb_symbol_table; static hashmap_t * glob_dat; static hashmap_t * objects_map; +/* Used for dlerror */ static char * last_error = NULL; typedef struct elf_object { @@ -69,69 +94,91 @@ typedef struct elf_object { static elf_t * _main_obj = NULL; +/* Locate library for LD_LIBRARY PATH */ static char * find_lib(const char * file) { + /* If it was an absolute path, there's no need to find it. */ if (strchr(file, '/')) return strdup(file); + /* Collect the environment variable. */ char * path = getenv("LD_LIBRARY_PATH"); if (!path) { + /* Not set - this is the default state. Should probably read from config file? */ path = "/lib:/usr/lib"; } + + /* Duplicate so we can tokenize without editing */ char * xpath = strdup(path); char * p, * last; for ((p = strtok_r(xpath, ":", &last)); p; p = strtok_r(NULL, ":", &last)) { + /* Go through each LD_LIBRARY_PATH entry */ int r; struct stat stat_buf; + + /* Append the requested file to that path */ char * exe = malloc(strlen(p) + strlen(file) + 2); - memcpy(exe, p, strlen(p) + 1); - //strcpy(exe, p); + *exe = '\0'; + strcat(exe, p); strcat(exe, "/"); strcat(exe, file); + /* See if it exists */ r = stat(exe, &stat_buf); if (r != 0) { + /* Nope. */ free(exe); continue; } + + /* It exists, so this is what we want. */ return exe; } free(xpath); - + /* No match found. */ return NULL; } +/* Open an object file */ static elf_t * open_object(const char * path) { + /* If no path (eg. dlopen(NULL)), return the main object (the executable). */ if (!path) { _main_obj->loaded = 1; return _main_obj; } + /* If we've already opened a file with this name, return it - don't load things twice. */ if (hashmap_has(objects_map, (void*)path)) { elf_t * object = hashmap_get(objects_map, (void*)path); object->loaded = 1; return object; } + /* Locate the library */ char * file = find_lib(path); if (!file) { last_error = "Could not find library."; return NULL; } + /* Open the library. */ FILE * f = fopen(file, "r"); + /* Free the expanded path, we don't need it anymore. */ free(file); + /* Failed to open? Unlikely, but could mean permissions problems. */ if (!f) { last_error = "Could not open library."; return NULL; } + /* Initialize a fresh object object. */ elf_t * object = calloc(1, sizeof(elf_t)); hashmap_set(objects_map, (void*)path, object); + /* Really unlikely... */ if (!object) { last_error = "Could not allocate space."; return NULL; @@ -139,14 +186,17 @@ static elf_t * open_object(const char * path) { object->file = f; + /* Read the header */ size_t r = fread(&object->header, sizeof(Elf32_Header), 1, object->file); + /* Header failed to read? */ if (!r) { last_error = "Failed to read object header."; free(object); return NULL; } + /* Is this actually an ELF object? */ if (object->header.e_ident[0] != ELFMAG0 || object->header.e_ident[1] != ELFMAG1 || object->header.e_ident[2] != ELFMAG2 || @@ -157,96 +207,108 @@ static elf_t * open_object(const char * path) { return NULL; } + /* Prepare a list for tracking dependencies. */ object->dependencies = list_create(); return object; } +/* Calculate the size of an object file by examining its phdrs */ static size_t object_calculate_size(elf_t * object) { uintptr_t base_addr = 0xFFFFFFFF; uintptr_t end_addr = 0x0; + size_t headers = 0; + while (headers < object->header.e_phnum) { + Elf32_Phdr phdr; - { - size_t headers = 0; - while (headers < object->header.e_phnum) { - Elf32_Phdr phdr; + /* Read the phdr */ + fseek(object->file, object->header.e_phoff + object->header.e_phentsize * headers, SEEK_SET); + fread(&phdr, object->header.e_phentsize, 1, object->file); - fseek(object->file, object->header.e_phoff + object->header.e_phentsize * headers, SEEK_SET); - fread(&phdr, object->header.e_phentsize, 1, object->file); - - switch (phdr.p_type) { - case PT_LOAD: - { - if (phdr.p_vaddr < base_addr) { - base_addr = phdr.p_vaddr; - } - if (phdr.p_memsz + phdr.p_vaddr > end_addr) { - end_addr = phdr.p_memsz + phdr.p_vaddr; - } + switch (phdr.p_type) { + case PT_LOAD: + { + /* If this loads lower than our current base... */ + if (phdr.p_vaddr < base_addr) { + base_addr = phdr.p_vaddr; } - break; - default: - break; - } - headers++; + /* Or higher than our current end address... */ + if (phdr.p_memsz + phdr.p_vaddr > end_addr) { + end_addr = phdr.p_memsz + phdr.p_vaddr; + } + } + break; + /* TODO: Do we care about other PHDR types here? */ + default: + break; } + + headers++; } + /* If base_addr is still -1, then no valid phdrs were found, and the object has no loaded size. */ if (base_addr == 0xFFFFFFFF) return 0; return end_addr - base_addr; } +/* Load an object into memory */ static uintptr_t object_load(elf_t * object, uintptr_t base) { uintptr_t end_addr = 0x0; object->base = base; - /* Load object */ - { - size_t headers = 0; - while (headers < object->header.e_phnum) { - Elf32_Phdr phdr; + size_t headers = 0; + while (headers < object->header.e_phnum) { + Elf32_Phdr phdr; - fseek(object->file, object->header.e_phoff + object->header.e_phentsize * headers, SEEK_SET); - fread(&phdr, object->header.e_phentsize, 1, object->file); + /* Read the phdr */ + fseek(object->file, object->header.e_phoff + object->header.e_phentsize * headers, SEEK_SET); + fread(&phdr, object->header.e_phentsize, 1, object->file); - switch (phdr.p_type) { - case PT_LOAD: - { - char * args[] = {(char *)(base + phdr.p_vaddr), (char *)phdr.p_memsz}; - syscall_system_function(10, args); - fseek(object->file, phdr.p_offset, SEEK_SET); - fread((void *)(base + phdr.p_vaddr), phdr.p_filesz, 1, object->file); - size_t r = phdr.p_filesz; - while (r < phdr.p_memsz) { - *(char *)(phdr.p_vaddr + base + r) = 0; - r++; - } + switch (phdr.p_type) { + case PT_LOAD: + { + /* Request memory to load this PHDR into */ + char * args[] = {(char *)(base + phdr.p_vaddr), (char *)phdr.p_memsz}; + syscall_system_function(10, args); - if (end_addr < phdr.p_vaddr + base + phdr.p_memsz) { - end_addr = phdr.p_vaddr + base + phdr.p_memsz; - } + /* Copy the code into memory */ + fseek(object->file, phdr.p_offset, SEEK_SET); + fread((void *)(base + phdr.p_vaddr), phdr.p_filesz, 1, object->file); + + /* Zero the remaining area */ + size_t r = phdr.p_filesz; + while (r < phdr.p_memsz) { + *(char *)(phdr.p_vaddr + base + r) = 0; + r++; } - break; - case PT_DYNAMIC: - { - object->dynamic = (Elf32_Dyn *)(base + phdr.p_vaddr); - } - break; - default: - break; - } - headers++; + /* If this expands our end address, be sure to update it */ + if (end_addr < phdr.p_vaddr + base + phdr.p_memsz) { + end_addr = phdr.p_vaddr + base + phdr.p_memsz; + } + } + break; + case PT_DYNAMIC: + { + /* Keep a reference to the dynamic section, which is actually loaded by a PT_LOAD normally. */ + object->dynamic = (Elf32_Dyn *)(base + phdr.p_vaddr); + } + break; + default: + break; } + + headers++; } return end_addr; } +/* Perform cleanup after loading */ static int object_postload(elf_t * object) { /* Load section string table */ @@ -259,10 +321,11 @@ static int object_postload(elf_t * object) { fread(object->string_table, shdr.sh_size, 1, object->file); } + /* If there is a dynamic table, parse it. */ if (object->dynamic) { Elf32_Dyn * table; - /* Locate string table */ + /* Locate string tables */ table = object->dynamic; while (table->d_tag) { switch (table->d_tag) { @@ -286,6 +349,12 @@ static int object_postload(elf_t * object) { table++; } + /* + * Read through dependencies + * We have to do this separately from the above to make sure + * we have the dynamic string tables loaded first, as they + * are needed for the dependency names. + */ table = object->dynamic; while (table->d_tag) { switch (table->d_tag) { @@ -297,17 +366,23 @@ static int object_postload(elf_t * object) { } } + /* Locate constructors */ for (uintptr_t x = 0; x < object->header.e_shentsize * object->header.e_shnum; x += object->header.e_shentsize) { Elf32_Shdr shdr; + /* Read section header */ fseek(object->file, object->header.e_shoff + x, SEEK_SET); fread(&shdr, object->header.e_shentsize, 1, object->file); + /* ctors */ if (!strcmp((char *)((uintptr_t)object->string_table + shdr.sh_name), ".ctors")) { + /* Store load address and size */ object->ctors = (void *)(shdr.sh_addr + object->base); object->ctors_size = shdr.sh_size / sizeof(uintptr_t); } + /* init_array */ if (!strcmp((char *)((uintptr_t)object->string_table + shdr.sh_name), ".init_array")) { + /* Store load address and size */ object->init_array = (void *)(shdr.sh_addr + object->base); object->init_array_size = shdr.sh_size / sizeof(uintptr_t); } @@ -316,6 +391,7 @@ static int object_postload(elf_t * object) { return 0; } +/* Whether symbol addresses is needed for a relocation type */ static int need_symbol_for_type(unsigned char type) { switch(type) { case 1: @@ -329,32 +405,36 @@ static int need_symbol_for_type(unsigned char type) { } } - +/* Apply ELF relocations */ static int object_relocate(elf_t * object) { + + /* If there is a dynamic symbol table, load symbols */ if (object->dyn_symbol_table) { Elf32_Sym * table = object->dyn_symbol_table; size_t i = 0; while (i < object->dyn_symbol_table_size) { char * symname = (char *)((uintptr_t)object->dyn_string_table + table->st_name); + + /* If we haven't added this symbol to our symbol table, do so now. */ if (!hashmap_has(dumb_symbol_table, symname)) { if (table->st_shndx) { hashmap_set(dumb_symbol_table, symname, (void*)(table->st_value + object->base)); } - } else { - if (table->st_shndx) { - //table->st_value = (uintptr_t)hashmap_get(dumb_symbol_table, symname); - } } + table++; i++; } } + /* Find relocation table */ for (uintptr_t x = 0; x < object->header.e_shentsize * object->header.e_shnum; x += object->header.e_shentsize) { Elf32_Shdr shdr; + /* Load section header */ fseek(object->file, object->header.e_shoff + x, SEEK_SET); fread(&shdr, object->header.e_shentsize, 1, object->file); + /* Relocation table found */ if (shdr.sh_type == 9) { Elf32_Rel * table = (Elf32_Rel *)(shdr.sh_addr + object->base); while ((uintptr_t)table - ((uintptr_t)shdr.sh_addr + object->base) < shdr.sh_size) { @@ -362,6 +442,7 @@ static int object_relocate(elf_t * object) { unsigned char type = ELF32_R_TYPE(table->r_info); Elf32_Sym * sym = &object->dyn_symbol_table[symbol]; + /* If we need symbol for this, get it. */ char * symname = NULL; uintptr_t x = sym->st_value + object->base; if (need_symbol_for_type(type) || (type == 5)) { @@ -369,6 +450,7 @@ static int object_relocate(elf_t * object) { if (symname && hashmap_has(dumb_symbol_table, symname)) { x = (uintptr_t)hashmap_get(dumb_symbol_table, symname); } else { + /* This isn't fatal, but do log a message if debugging is enabled. */ TRACE_LD("Symbol not found: %s", symname); x = 0x0; } @@ -412,12 +494,15 @@ static int object_relocate(elf_t * object) { return 0; } +/* Copy relocations are special and need to be located before other relocations. */ static void object_find_copy_relocations(elf_t * object) { + for (uintptr_t x = 0; x < object->header.e_shentsize * object->header.e_shnum; x += object->header.e_shentsize) { Elf32_Shdr shdr; fseek(object->file, object->header.e_shoff + x, SEEK_SET); fread(&shdr, object->header.e_shentsize, 1, object->file); + /* Relocation table found */ if (shdr.sh_type == 9) { Elf32_Rel * table = (Elf32_Rel *)(shdr.sh_addr + object->base); while ((uintptr_t)table - ((uintptr_t)shdr.sh_addr + object->base) < shdr.sh_size) { @@ -432,10 +517,11 @@ static void object_find_copy_relocations(elf_t * object) { } } } - } +/* Find a symbol in a specific object. */ static void * object_find_symbol(elf_t * object, const char * symbol_name) { + if (!object->dyn_symbol_table) { last_error = "lib does not have a symbol table"; return NULL; @@ -455,6 +541,7 @@ static void * object_find_symbol(elf_t * object, const char * symbol_name) { return NULL; } +/* Fully load an object. */ static void * do_actual_load(const char * filename, elf_t * lib, int flags) { (void)flags; @@ -465,21 +552,31 @@ static void * do_actual_load(const char * filename, elf_t * lib, int flags) { size_t lib_size = object_calculate_size(lib); + /* Needs to be at least a page. */ if (lib_size < 4096) { lib_size = 4096; } + /* + * Allocate space to load the library + * This is where we should really be loading things into COW + * but we don't have the functionality available. + */ uintptr_t load_addr = (uintptr_t)malloc(lib_size); object_load(lib, load_addr); + /* Perform cleanup steps */ object_postload(lib); + /* Ensure dependencies are available */ node_t * item; while ((item = list_pop(lib->dependencies))) { elf_t * lib = open_object(item->value); if (!lib) { + /* Missing dependencies are fatal to this process, but + * not to the entire application. */ free((void *)load_addr); last_error = "Failed to load a dependency."; return NULL; @@ -492,11 +589,14 @@ static void * do_actual_load(const char * filename, elf_t * lib, int flags) { } + /* Perform relocations */ TRACE_LD("Relocating %s", filename); object_relocate(lib); + /* We're done with the file. */ fclose(lib->file); + /* If there were constructors, call them */ if (lib->ctors) { for (size_t i = 0; i < lib->ctors_size; i++) { TRACE_LD(" 0x%x()", lib->ctors[i]); @@ -504,6 +604,7 @@ static void * do_actual_load(const char * filename, elf_t * lib, int flags) { } } + /* If there was an init_array, call everything in it */ if (lib->init_array) { for (size_t i = 0; i < lib->init_array_size; i++) { TRACE_LD(" 0x%x()", lib->init_array[i]); @@ -511,14 +612,16 @@ static void * do_actual_load(const char * filename, elf_t * lib, int flags) { } } + /* If the library has an init function, call that last. */ if (lib->init) { lib->init(); } + /* And return an object for the loaded library */ return (void *)lib; - } +/* exposed dlopen() method */ static void * dlopen_ld(const char * filename, int flags) { TRACE_LD("dlopen(%s,0x%x)", filename, flags); @@ -537,24 +640,27 @@ static void * dlopen_ld(const char * filename, int flags) { return ret; } +/* exposed dlclose() method - XXX not fully implemented */ static int dlclose_ld(elf_t * lib) { /* TODO close dependencies? Make sure nothing references this. */ free((void *)lib->base); return 0; } +/* exposed dlerror() method */ static char * dlerror_ld(void) { - /* TODO actually do this */ char * this_error = last_error; last_error = NULL; return this_error; } +/* Specially used by libc */ static void * _argv_value = NULL; static char * argv_value(void) { return _argv_value; } +/* Exported methods (dlfcn) */ typedef struct { char * name; void * symbol; @@ -573,7 +679,6 @@ int main(int argc, char * argv[]) { char * file = argv[1]; size_t arg_offset = 1; - if (!strcmp(argv[1], "-e")) { arg_offset = 3; file = argv[2]; @@ -581,35 +686,39 @@ int main(int argc, char * argv[]) { _argv_value = argv+arg_offset; + /* Enable tracing if requested */ char * trace_ld_env = getenv("LD_DEBUG"); if ((trace_ld_env && (!strcmp(trace_ld_env,"1") || !strcmp(trace_ld_env,"yes")))) { __trace_ld = 1; } + /* Initialize hashmaps for symbols, GLOB_DATs, and objects */ dumb_symbol_table = hashmap_create(10); glob_dat = hashmap_create(10); objects_map = hashmap_create(10); + /* Setup symbols for built-in exports */ ld_exports_t * ex = ld_builtin_exports; while (ex->name) { hashmap_set(dumb_symbol_table, ex->name, ex->symbol); ex++; } + /* Open the requested main object */ elf_t * main_obj = open_object(file); _main_obj = main_obj; if (!main_obj) { - //fprintf(stderr, "%s: error: failed to open object '%s'.\n", argv[0], file); + fprintf(stderr, "%s: error: failed to open object '%s'.\n", argv[0], file); return 1; } + /* Load the main object */ uintptr_t end_addr = object_load(main_obj, 0x0); - object_postload(main_obj); - object_find_copy_relocations(main_obj); + /* Load library dependencies */ hashmap_t * libs = hashmap_create(10); while (end_addr & 0xFFF) { @@ -627,10 +736,12 @@ int main(int argc, char * argv[]) { } char * lib_name = item->value; + /* Reject libg.so */ if (!strcmp(lib_name, "libg.so")) goto nope; + elf_t * lib = open_object(lib_name); if (!lib) { - //fprintf(stderr, "Failed to load dependency '%s'.\n", lib_name); + fprintf(stderr, "Failed to load dependency '%s'.\n", lib_name); return 1; } hashmap_set(libs, lib_name, lib); @@ -643,7 +754,7 @@ int main(int argc, char * argv[]) { fclose(lib->file); - /* Execute constructors */ + /* Store constructors for later execution */ if (lib->ctors || lib->init_array) { list_insert(ctor_libs, lib); } @@ -655,6 +766,7 @@ nope: free(item); } + /* Relocate the main object */ TRACE_LD("Relocating main object"); object_relocate(main_obj); TRACE_LD("Placing heap at end"); @@ -662,6 +774,7 @@ nope: end_addr++; } + /* Call constructors for loaded dependencies */ char * ld_no_ctors = getenv("LD_DISABLE_CTORS"); if (ld_no_ctors && (!strcmp(ld_no_ctors,"1") || !strcmp(ld_no_ctors,"yes"))) { TRACE_LD("skipping ctors because LD_DISABLE_CTORS was set"); @@ -690,6 +803,7 @@ nope: lib->init(); } + /* If main object had constructors, call them. */ if (main_obj->init_array) { for (size_t i = 0; i < main_obj->init_array_size; i++) { TRACE_LD(" 0x%x()", main_obj->init_array[i]); @@ -701,12 +815,14 @@ nope: main_obj->init(); } + /* Move heap start (kind of like a weird sbrk) */ { char * args[] = {(char*)end_addr}; syscall_system_function(9, args); } - TRACE_LD("Jumping to entry point"); + /* Jump to the entry for the main object */ + TRACE_LD("Jumping to entry point"); entry_point_t entry = (entry_point_t)main_obj->header.e_entry; entry(argc-arg_offset,argv+arg_offset,environ);