cleanup linker

This commit is contained in:
K. Lange 2018-08-14 15:40:32 +09:00
parent 2d231a97af
commit c4ad63ad74

View File

@ -1,20 +1,40 @@
/* This file is part of ToaruOS and is released under the terms
/* vim: ts=4 sw=4 noexpandtab
* This file is part of ToaruOS and is released under the terms
* of the NCSA / University of Illinois License - see LICENSE.md
* Copyright (C) 2016-2017 Kevin Lange
* Copyright (C) 2016-2018 Kevin Lange
*
* ELF Dynamic Linker/Loader
*
* Loads ELF executables and links them at runtime to their
* shared library dependencies.
*
* As of writing, this is a simplistic and not-fully-compliant
* implementation of ELF dynamic linking. It suffers from a number
* of issues, including not actually sharing libraries (there
* isn't a sufficient mechanism in the kernel at the moment for
* doing that - we need something with copy-on-write, preferably
* an mmap-file mechanism), as well as not handling symbol
* resolution correctly.
*
* However, it's sufficient for our purposes, and works well enough
* to load Python C modules.
*/
#include <stdlib.h>
#include <stdint.h>
#include <alloca.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <syscall.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <kernel/elf.h>
extern char** environ;
/*
* When the LD_DEBUG environment variable is set, TRACE_LD messages
* will be printed to stderr
*/
#define TRACE_APP_NAME "ld.so"
#define TRACE_LD(...) do { if (__trace_ld) { TRACE(__VA_ARGS__); } } while (0)
@ -22,17 +42,22 @@ static int __trace_ld = 0;
#include <toaru/trace.h>
/*
* This libraries are included in source form to avoid having
* to build separate objects for them and complicate linking,
* since ld is specially built as a static object.
*/
#include "../lib/list.c"
#include "../lib/hashmap.c"
typedef int (*entry_point_t)(int, char *[], char**);
extern char end[];
/* Global linking state */
static hashmap_t * dumb_symbol_table;
static hashmap_t * glob_dat;
static hashmap_t * objects_map;
/* Used for dlerror */
static char * last_error = NULL;
typedef struct elf_object {
@ -69,69 +94,91 @@ typedef struct elf_object {
static elf_t * _main_obj = NULL;
/* Locate library for LD_LIBRARY PATH */
static char * find_lib(const char * file) {
/* If it was an absolute path, there's no need to find it. */
if (strchr(file, '/')) return strdup(file);
/* Collect the environment variable. */
char * path = getenv("LD_LIBRARY_PATH");
if (!path) {
/* Not set - this is the default state. Should probably read from config file? */
path = "/lib:/usr/lib";
}
/* Duplicate so we can tokenize without editing */
char * xpath = strdup(path);
char * p, * last;
for ((p = strtok_r(xpath, ":", &last)); p; p = strtok_r(NULL, ":", &last)) {
/* Go through each LD_LIBRARY_PATH entry */
int r;
struct stat stat_buf;
/* Append the requested file to that path */
char * exe = malloc(strlen(p) + strlen(file) + 2);
memcpy(exe, p, strlen(p) + 1);
//strcpy(exe, p);
*exe = '\0';
strcat(exe, p);
strcat(exe, "/");
strcat(exe, file);
/* See if it exists */
r = stat(exe, &stat_buf);
if (r != 0) {
/* Nope. */
free(exe);
continue;
}
/* It exists, so this is what we want. */
return exe;
}
free(xpath);
/* No match found. */
return NULL;
}
/* Open an object file */
static elf_t * open_object(const char * path) {
/* If no path (eg. dlopen(NULL)), return the main object (the executable). */
if (!path) {
_main_obj->loaded = 1;
return _main_obj;
}
/* If we've already opened a file with this name, return it - don't load things twice. */
if (hashmap_has(objects_map, (void*)path)) {
elf_t * object = hashmap_get(objects_map, (void*)path);
object->loaded = 1;
return object;
}
/* Locate the library */
char * file = find_lib(path);
if (!file) {
last_error = "Could not find library.";
return NULL;
}
/* Open the library. */
FILE * f = fopen(file, "r");
/* Free the expanded path, we don't need it anymore. */
free(file);
/* Failed to open? Unlikely, but could mean permissions problems. */
if (!f) {
last_error = "Could not open library.";
return NULL;
}
/* Initialize a fresh object object. */
elf_t * object = calloc(1, sizeof(elf_t));
hashmap_set(objects_map, (void*)path, object);
/* Really unlikely... */
if (!object) {
last_error = "Could not allocate space.";
return NULL;
@ -139,14 +186,17 @@ static elf_t * open_object(const char * path) {
object->file = f;
/* Read the header */
size_t r = fread(&object->header, sizeof(Elf32_Header), 1, object->file);
/* Header failed to read? */
if (!r) {
last_error = "Failed to read object header.";
free(object);
return NULL;
}
/* Is this actually an ELF object? */
if (object->header.e_ident[0] != ELFMAG0 ||
object->header.e_ident[1] != ELFMAG1 ||
object->header.e_ident[2] != ELFMAG2 ||
@ -157,96 +207,108 @@ static elf_t * open_object(const char * path) {
return NULL;
}
/* Prepare a list for tracking dependencies. */
object->dependencies = list_create();
return object;
}
/* Calculate the size of an object file by examining its phdrs */
static size_t object_calculate_size(elf_t * object) {
uintptr_t base_addr = 0xFFFFFFFF;
uintptr_t end_addr = 0x0;
size_t headers = 0;
while (headers < object->header.e_phnum) {
Elf32_Phdr phdr;
{
size_t headers = 0;
while (headers < object->header.e_phnum) {
Elf32_Phdr phdr;
/* Read the phdr */
fseek(object->file, object->header.e_phoff + object->header.e_phentsize * headers, SEEK_SET);
fread(&phdr, object->header.e_phentsize, 1, object->file);
fseek(object->file, object->header.e_phoff + object->header.e_phentsize * headers, SEEK_SET);
fread(&phdr, object->header.e_phentsize, 1, object->file);
switch (phdr.p_type) {
case PT_LOAD:
{
if (phdr.p_vaddr < base_addr) {
base_addr = phdr.p_vaddr;
}
if (phdr.p_memsz + phdr.p_vaddr > end_addr) {
end_addr = phdr.p_memsz + phdr.p_vaddr;
}
switch (phdr.p_type) {
case PT_LOAD:
{
/* If this loads lower than our current base... */
if (phdr.p_vaddr < base_addr) {
base_addr = phdr.p_vaddr;
}
break;
default:
break;
}
headers++;
/* Or higher than our current end address... */
if (phdr.p_memsz + phdr.p_vaddr > end_addr) {
end_addr = phdr.p_memsz + phdr.p_vaddr;
}
}
break;
/* TODO: Do we care about other PHDR types here? */
default:
break;
}
headers++;
}
/* If base_addr is still -1, then no valid phdrs were found, and the object has no loaded size. */
if (base_addr == 0xFFFFFFFF) return 0;
return end_addr - base_addr;
}
/* Load an object into memory */
static uintptr_t object_load(elf_t * object, uintptr_t base) {
uintptr_t end_addr = 0x0;
object->base = base;
/* Load object */
{
size_t headers = 0;
while (headers < object->header.e_phnum) {
Elf32_Phdr phdr;
size_t headers = 0;
while (headers < object->header.e_phnum) {
Elf32_Phdr phdr;
fseek(object->file, object->header.e_phoff + object->header.e_phentsize * headers, SEEK_SET);
fread(&phdr, object->header.e_phentsize, 1, object->file);
/* Read the phdr */
fseek(object->file, object->header.e_phoff + object->header.e_phentsize * headers, SEEK_SET);
fread(&phdr, object->header.e_phentsize, 1, object->file);
switch (phdr.p_type) {
case PT_LOAD:
{
char * args[] = {(char *)(base + phdr.p_vaddr), (char *)phdr.p_memsz};
syscall_system_function(10, args);
fseek(object->file, phdr.p_offset, SEEK_SET);
fread((void *)(base + phdr.p_vaddr), phdr.p_filesz, 1, object->file);
size_t r = phdr.p_filesz;
while (r < phdr.p_memsz) {
*(char *)(phdr.p_vaddr + base + r) = 0;
r++;
}
switch (phdr.p_type) {
case PT_LOAD:
{
/* Request memory to load this PHDR into */
char * args[] = {(char *)(base + phdr.p_vaddr), (char *)phdr.p_memsz};
syscall_system_function(10, args);
if (end_addr < phdr.p_vaddr + base + phdr.p_memsz) {
end_addr = phdr.p_vaddr + base + phdr.p_memsz;
}
/* Copy the code into memory */
fseek(object->file, phdr.p_offset, SEEK_SET);
fread((void *)(base + phdr.p_vaddr), phdr.p_filesz, 1, object->file);
/* Zero the remaining area */
size_t r = phdr.p_filesz;
while (r < phdr.p_memsz) {
*(char *)(phdr.p_vaddr + base + r) = 0;
r++;
}
break;
case PT_DYNAMIC:
{
object->dynamic = (Elf32_Dyn *)(base + phdr.p_vaddr);
}
break;
default:
break;
}
headers++;
/* If this expands our end address, be sure to update it */
if (end_addr < phdr.p_vaddr + base + phdr.p_memsz) {
end_addr = phdr.p_vaddr + base + phdr.p_memsz;
}
}
break;
case PT_DYNAMIC:
{
/* Keep a reference to the dynamic section, which is actually loaded by a PT_LOAD normally. */
object->dynamic = (Elf32_Dyn *)(base + phdr.p_vaddr);
}
break;
default:
break;
}
headers++;
}
return end_addr;
}
/* Perform cleanup after loading */
static int object_postload(elf_t * object) {
/* Load section string table */
@ -259,10 +321,11 @@ static int object_postload(elf_t * object) {
fread(object->string_table, shdr.sh_size, 1, object->file);
}
/* If there is a dynamic table, parse it. */
if (object->dynamic) {
Elf32_Dyn * table;
/* Locate string table */
/* Locate string tables */
table = object->dynamic;
while (table->d_tag) {
switch (table->d_tag) {
@ -286,6 +349,12 @@ static int object_postload(elf_t * object) {
table++;
}
/*
* Read through dependencies
* We have to do this separately from the above to make sure
* we have the dynamic string tables loaded first, as they
* are needed for the dependency names.
*/
table = object->dynamic;
while (table->d_tag) {
switch (table->d_tag) {
@ -297,17 +366,23 @@ static int object_postload(elf_t * object) {
}
}
/* Locate constructors */
for (uintptr_t x = 0; x < object->header.e_shentsize * object->header.e_shnum; x += object->header.e_shentsize) {
Elf32_Shdr shdr;
/* Read section header */
fseek(object->file, object->header.e_shoff + x, SEEK_SET);
fread(&shdr, object->header.e_shentsize, 1, object->file);
/* ctors */
if (!strcmp((char *)((uintptr_t)object->string_table + shdr.sh_name), ".ctors")) {
/* Store load address and size */
object->ctors = (void *)(shdr.sh_addr + object->base);
object->ctors_size = shdr.sh_size / sizeof(uintptr_t);
}
/* init_array */
if (!strcmp((char *)((uintptr_t)object->string_table + shdr.sh_name), ".init_array")) {
/* Store load address and size */
object->init_array = (void *)(shdr.sh_addr + object->base);
object->init_array_size = shdr.sh_size / sizeof(uintptr_t);
}
@ -316,6 +391,7 @@ static int object_postload(elf_t * object) {
return 0;
}
/* Whether symbol addresses is needed for a relocation type */
static int need_symbol_for_type(unsigned char type) {
switch(type) {
case 1:
@ -329,32 +405,36 @@ static int need_symbol_for_type(unsigned char type) {
}
}
/* Apply ELF relocations */
static int object_relocate(elf_t * object) {
/* If there is a dynamic symbol table, load symbols */
if (object->dyn_symbol_table) {
Elf32_Sym * table = object->dyn_symbol_table;
size_t i = 0;
while (i < object->dyn_symbol_table_size) {
char * symname = (char *)((uintptr_t)object->dyn_string_table + table->st_name);
/* If we haven't added this symbol to our symbol table, do so now. */
if (!hashmap_has(dumb_symbol_table, symname)) {
if (table->st_shndx) {
hashmap_set(dumb_symbol_table, symname, (void*)(table->st_value + object->base));
}
} else {
if (table->st_shndx) {
//table->st_value = (uintptr_t)hashmap_get(dumb_symbol_table, symname);
}
}
table++;
i++;
}
}
/* Find relocation table */
for (uintptr_t x = 0; x < object->header.e_shentsize * object->header.e_shnum; x += object->header.e_shentsize) {
Elf32_Shdr shdr;
/* Load section header */
fseek(object->file, object->header.e_shoff + x, SEEK_SET);
fread(&shdr, object->header.e_shentsize, 1, object->file);
/* Relocation table found */
if (shdr.sh_type == 9) {
Elf32_Rel * table = (Elf32_Rel *)(shdr.sh_addr + object->base);
while ((uintptr_t)table - ((uintptr_t)shdr.sh_addr + object->base) < shdr.sh_size) {
@ -362,6 +442,7 @@ static int object_relocate(elf_t * object) {
unsigned char type = ELF32_R_TYPE(table->r_info);
Elf32_Sym * sym = &object->dyn_symbol_table[symbol];
/* If we need symbol for this, get it. */
char * symname = NULL;
uintptr_t x = sym->st_value + object->base;
if (need_symbol_for_type(type) || (type == 5)) {
@ -369,6 +450,7 @@ static int object_relocate(elf_t * object) {
if (symname && hashmap_has(dumb_symbol_table, symname)) {
x = (uintptr_t)hashmap_get(dumb_symbol_table, symname);
} else {
/* This isn't fatal, but do log a message if debugging is enabled. */
TRACE_LD("Symbol not found: %s", symname);
x = 0x0;
}
@ -412,12 +494,15 @@ static int object_relocate(elf_t * object) {
return 0;
}
/* Copy relocations are special and need to be located before other relocations. */
static void object_find_copy_relocations(elf_t * object) {
for (uintptr_t x = 0; x < object->header.e_shentsize * object->header.e_shnum; x += object->header.e_shentsize) {
Elf32_Shdr shdr;
fseek(object->file, object->header.e_shoff + x, SEEK_SET);
fread(&shdr, object->header.e_shentsize, 1, object->file);
/* Relocation table found */
if (shdr.sh_type == 9) {
Elf32_Rel * table = (Elf32_Rel *)(shdr.sh_addr + object->base);
while ((uintptr_t)table - ((uintptr_t)shdr.sh_addr + object->base) < shdr.sh_size) {
@ -432,10 +517,11 @@ static void object_find_copy_relocations(elf_t * object) {
}
}
}
}
/* Find a symbol in a specific object. */
static void * object_find_symbol(elf_t * object, const char * symbol_name) {
if (!object->dyn_symbol_table) {
last_error = "lib does not have a symbol table";
return NULL;
@ -455,6 +541,7 @@ static void * object_find_symbol(elf_t * object, const char * symbol_name) {
return NULL;
}
/* Fully load an object. */
static void * do_actual_load(const char * filename, elf_t * lib, int flags) {
(void)flags;
@ -465,21 +552,31 @@ static void * do_actual_load(const char * filename, elf_t * lib, int flags) {
size_t lib_size = object_calculate_size(lib);
/* Needs to be at least a page. */
if (lib_size < 4096) {
lib_size = 4096;
}
/*
* Allocate space to load the library
* This is where we should really be loading things into COW
* but we don't have the functionality available.
*/
uintptr_t load_addr = (uintptr_t)malloc(lib_size);
object_load(lib, load_addr);
/* Perform cleanup steps */
object_postload(lib);
/* Ensure dependencies are available */
node_t * item;
while ((item = list_pop(lib->dependencies))) {
elf_t * lib = open_object(item->value);
if (!lib) {
/* Missing dependencies are fatal to this process, but
* not to the entire application. */
free((void *)load_addr);
last_error = "Failed to load a dependency.";
return NULL;
@ -492,11 +589,14 @@ static void * do_actual_load(const char * filename, elf_t * lib, int flags) {
}
/* Perform relocations */
TRACE_LD("Relocating %s", filename);
object_relocate(lib);
/* We're done with the file. */
fclose(lib->file);
/* If there were constructors, call them */
if (lib->ctors) {
for (size_t i = 0; i < lib->ctors_size; i++) {
TRACE_LD(" 0x%x()", lib->ctors[i]);
@ -504,6 +604,7 @@ static void * do_actual_load(const char * filename, elf_t * lib, int flags) {
}
}
/* If there was an init_array, call everything in it */
if (lib->init_array) {
for (size_t i = 0; i < lib->init_array_size; i++) {
TRACE_LD(" 0x%x()", lib->init_array[i]);
@ -511,14 +612,16 @@ static void * do_actual_load(const char * filename, elf_t * lib, int flags) {
}
}
/* If the library has an init function, call that last. */
if (lib->init) {
lib->init();
}
/* And return an object for the loaded library */
return (void *)lib;
}
/* exposed dlopen() method */
static void * dlopen_ld(const char * filename, int flags) {
TRACE_LD("dlopen(%s,0x%x)", filename, flags);
@ -537,24 +640,27 @@ static void * dlopen_ld(const char * filename, int flags) {
return ret;
}
/* exposed dlclose() method - XXX not fully implemented */
static int dlclose_ld(elf_t * lib) {
/* TODO close dependencies? Make sure nothing references this. */
free((void *)lib->base);
return 0;
}
/* exposed dlerror() method */
static char * dlerror_ld(void) {
/* TODO actually do this */
char * this_error = last_error;
last_error = NULL;
return this_error;
}
/* Specially used by libc */
static void * _argv_value = NULL;
static char * argv_value(void) {
return _argv_value;
}
/* Exported methods (dlfcn) */
typedef struct {
char * name;
void * symbol;
@ -573,7 +679,6 @@ int main(int argc, char * argv[]) {
char * file = argv[1];
size_t arg_offset = 1;
if (!strcmp(argv[1], "-e")) {
arg_offset = 3;
file = argv[2];
@ -581,35 +686,39 @@ int main(int argc, char * argv[]) {
_argv_value = argv+arg_offset;
/* Enable tracing if requested */
char * trace_ld_env = getenv("LD_DEBUG");
if ((trace_ld_env && (!strcmp(trace_ld_env,"1") || !strcmp(trace_ld_env,"yes")))) {
__trace_ld = 1;
}
/* Initialize hashmaps for symbols, GLOB_DATs, and objects */
dumb_symbol_table = hashmap_create(10);
glob_dat = hashmap_create(10);
objects_map = hashmap_create(10);
/* Setup symbols for built-in exports */
ld_exports_t * ex = ld_builtin_exports;
while (ex->name) {
hashmap_set(dumb_symbol_table, ex->name, ex->symbol);
ex++;
}
/* Open the requested main object */
elf_t * main_obj = open_object(file);
_main_obj = main_obj;
if (!main_obj) {
//fprintf(stderr, "%s: error: failed to open object '%s'.\n", argv[0], file);
fprintf(stderr, "%s: error: failed to open object '%s'.\n", argv[0], file);
return 1;
}
/* Load the main object */
uintptr_t end_addr = object_load(main_obj, 0x0);
object_postload(main_obj);
object_find_copy_relocations(main_obj);
/* Load library dependencies */
hashmap_t * libs = hashmap_create(10);
while (end_addr & 0xFFF) {
@ -627,10 +736,12 @@ int main(int argc, char * argv[]) {
}
char * lib_name = item->value;
/* Reject libg.so */
if (!strcmp(lib_name, "libg.so")) goto nope;
elf_t * lib = open_object(lib_name);
if (!lib) {
//fprintf(stderr, "Failed to load dependency '%s'.\n", lib_name);
fprintf(stderr, "Failed to load dependency '%s'.\n", lib_name);
return 1;
}
hashmap_set(libs, lib_name, lib);
@ -643,7 +754,7 @@ int main(int argc, char * argv[]) {
fclose(lib->file);
/* Execute constructors */
/* Store constructors for later execution */
if (lib->ctors || lib->init_array) {
list_insert(ctor_libs, lib);
}
@ -655,6 +766,7 @@ nope:
free(item);
}
/* Relocate the main object */
TRACE_LD("Relocating main object");
object_relocate(main_obj);
TRACE_LD("Placing heap at end");
@ -662,6 +774,7 @@ nope:
end_addr++;
}
/* Call constructors for loaded dependencies */
char * ld_no_ctors = getenv("LD_DISABLE_CTORS");
if (ld_no_ctors && (!strcmp(ld_no_ctors,"1") || !strcmp(ld_no_ctors,"yes"))) {
TRACE_LD("skipping ctors because LD_DISABLE_CTORS was set");
@ -690,6 +803,7 @@ nope:
lib->init();
}
/* If main object had constructors, call them. */
if (main_obj->init_array) {
for (size_t i = 0; i < main_obj->init_array_size; i++) {
TRACE_LD(" 0x%x()", main_obj->init_array[i]);
@ -701,12 +815,14 @@ nope:
main_obj->init();
}
/* Move heap start (kind of like a weird sbrk) */
{
char * args[] = {(char*)end_addr};
syscall_system_function(9, args);
}
TRACE_LD("Jumping to entry point");
/* Jump to the entry for the main object */
TRACE_LD("Jumping to entry point");
entry_point_t entry = (entry_point_t)main_obj->header.e_entry;
entry(argc-arg_offset,argv+arg_offset,environ);