2012-08-27 05:09:26 +04:00
|
|
|
#define _GNU_SOURCE
|
overhaul i386 syscall mechanism not to depend on external asm source
this is the first part of a series of patches intended to make
__syscall fully self-contained in the object file produced using
syscall.h, which will make it possible for crt1 code to perform
syscalls.
the (confusingly named) i386 __vsyscall mechanism, which this commit
removes, was introduced before the presence of a valid thread pointer
was mandatory; back then the thread pointer was setup lazily only if
threads were used. the intent was to be able to perform syscalls using
the kernel's fast entry point in the VDSO, which can use the sysenter
(Intel) or syscall (AMD) instruction instead of int $128, but without
inlining an access to the __syscall global at the point of each
syscall, which would incur a significant size cost from PIC setup
everywhere. the mechanism also shuffled registers/calling convention
around to avoid spills of call-saved registers, and to avoid
allocating ebx or ebp via asm constraints, since there are plenty of
broken-but-supported compiler versions which are incapable of
allocating ebx with -fPIC or ebp with -fno-omit-frame-pointer.
the new mechanism preserves the properties of avoiding spills and
avoiding allocation of ebx/ebp in constraints, but does it inline,
using some fairly simple register shuffling, and uses a field of the
thread structure rather than global data for the vdso-provided syscall
code address.
for now, the external __syscall function is refactored not to use the
old __vsyscall so it can be kept, but the intent is to remove it too.
2019-04-11 00:10:36 +03:00
|
|
|
#define SYSCALL_NO_TLS 1
|
2011-06-19 03:48:42 +04:00
|
|
|
#include <stdio.h>
|
|
|
|
#include <stdlib.h>
|
2014-06-18 11:05:42 +04:00
|
|
|
#include <stdarg.h>
|
2014-06-19 10:01:06 +04:00
|
|
|
#include <stddef.h>
|
2011-06-19 03:48:42 +04:00
|
|
|
#include <string.h>
|
|
|
|
#include <unistd.h>
|
|
|
|
#include <stdint.h>
|
|
|
|
#include <elf.h>
|
|
|
|
#include <sys/mman.h>
|
|
|
|
#include <limits.h>
|
|
|
|
#include <fcntl.h>
|
|
|
|
#include <sys/stat.h>
|
|
|
|
#include <errno.h>
|
2012-11-01 05:27:48 +04:00
|
|
|
#include <link.h>
|
2011-06-27 01:39:17 +04:00
|
|
|
#include <setjmp.h>
|
2011-06-27 03:23:28 +04:00
|
|
|
#include <pthread.h>
|
2011-08-16 08:24:36 +04:00
|
|
|
#include <ctype.h>
|
2011-06-27 03:23:28 +04:00
|
|
|
#include <dlfcn.h>
|
install dynamic tls synchronously at dlopen, streamline access
previously, dynamic loading of new libraries with thread-local storage
allocated the storage needed for all existing threads at load-time,
precluding late failure that can't be handled, but left installation
in existing threads to take place lazily on first access. this imposed
an additional memory access and branch on every dynamic tls access,
and imposed a requirement, which was not actually met, that the
dynamic tlsdesc asm functions preserve all call-clobbered registers
before calling C code to to install new dynamic tls on first access.
the x86[_64] versions of this code wrongly omitted saving and
restoring of fpu/vector registers, assuming the compiler would not
generate anything using them in the called C code. the arm and aarch64
versions saved known existing registers, but failed to be future-proof
against expansion of the register file.
now that we track live threads in a list, it's possible to install the
new dynamic tls for each thread at dlopen time. for the most part,
synchronization is not needed, because if a thread has not
synchronized with completion of the dlopen, there is no way it can
meaningfully request access to a slot past the end of the old dtv,
which remains valid for accessing slots which already existed.
however, it is necessary to ensure that, if a thread sees its new dtv
pointer, it sees correct pointers in each of the slots that existed
prior to the dlopen. my understanding is that, on most real-world
coherency architectures including all the ones we presently support, a
built-in consume order guarantees this; however, don't rely on that.
instead, the SYS_membarrier syscall is used to ensure that all threads
see the stores to the slots of their new dtv prior to the installation
of the new dtv. if it is not supported, the same is implemented in
userspace via signals, using the same mechanism as __synccall.
the __tls_get_addr function, variants, and dynamic tlsdesc asm
functions are all updated to remove the fallback paths for claiming
new dynamic tls, and are now all branch-free.
2019-02-18 07:22:27 +03:00
|
|
|
#include <semaphore.h>
|
2019-02-22 10:56:10 +03:00
|
|
|
#include <sys/membarrier.h>
|
2012-10-05 00:35:46 +04:00
|
|
|
#include "pthread_impl.h"
|
|
|
|
#include "libc.h"
|
2015-04-13 09:56:26 +03:00
|
|
|
#include "dynlink.h"
|
2018-09-06 23:32:49 +03:00
|
|
|
#include "malloc_impl.h"
|
2011-06-19 03:48:42 +04:00
|
|
|
|
2015-04-19 01:00:22 +03:00
|
|
|
static void error(const char *, ...);
|
2012-03-23 08:28:20 +04:00
|
|
|
|
2012-10-06 09:22:51 +04:00
|
|
|
#define MAXP2(a,b) (-(-(a)&-(b)))
|
|
|
|
#define ALIGN(x,y) ((x)+(y)-1 & -(y))
|
|
|
|
|
2019-02-25 10:09:36 +03:00
|
|
|
#define container_of(p,t,m) ((t*)((char *)(p)-offsetof(t,m)))
|
2019-03-03 21:24:23 +03:00
|
|
|
#define countof(a) ((sizeof (a))/(sizeof (a)[0]))
|
2019-02-25 10:09:36 +03:00
|
|
|
|
2012-04-25 08:05:42 +04:00
|
|
|
struct debug {
|
|
|
|
int ver;
|
|
|
|
void *head;
|
|
|
|
void (*bp)(void);
|
|
|
|
int state;
|
|
|
|
void *base;
|
|
|
|
};
|
|
|
|
|
2014-06-19 10:01:06 +04:00
|
|
|
struct td_index {
|
|
|
|
size_t args[2];
|
|
|
|
struct td_index *next;
|
|
|
|
};
|
|
|
|
|
2012-04-25 08:05:42 +04:00
|
|
|
struct dso {
|
2015-09-22 06:54:42 +03:00
|
|
|
#if DL_FDPIC
|
|
|
|
struct fdpic_loadmap *loadmap;
|
|
|
|
#else
|
2012-04-25 08:05:42 +04:00
|
|
|
unsigned char *base;
|
2015-09-22 06:54:42 +03:00
|
|
|
#endif
|
2012-04-25 08:05:42 +04:00
|
|
|
char *name;
|
|
|
|
size_t *dynv;
|
2011-06-19 03:48:42 +04:00
|
|
|
struct dso *next, *prev;
|
2012-04-25 08:05:42 +04:00
|
|
|
|
2012-11-01 05:27:48 +04:00
|
|
|
Phdr *phdr;
|
|
|
|
int phnum;
|
2014-03-25 22:59:50 +04:00
|
|
|
size_t phentsize;
|
2011-06-19 03:48:42 +04:00
|
|
|
Sym *syms;
|
2016-11-11 20:30:24 +03:00
|
|
|
Elf_Symndx *hashtab;
|
2012-08-26 01:13:28 +04:00
|
|
|
uint32_t *ghashtab;
|
2013-08-09 00:10:35 +04:00
|
|
|
int16_t *versym;
|
2011-06-19 03:48:42 +04:00
|
|
|
char *strings;
|
2017-03-13 15:52:41 +03:00
|
|
|
struct dso *syms_next, *lazy_next;
|
|
|
|
size_t *lazy, lazy_cnt;
|
2011-06-19 03:48:42 +04:00
|
|
|
unsigned char *map;
|
|
|
|
size_t map_len;
|
|
|
|
dev_t dev;
|
|
|
|
ino_t ino;
|
2012-02-08 05:29:29 +04:00
|
|
|
char relocated;
|
|
|
|
char constructed;
|
2013-08-23 21:56:30 +04:00
|
|
|
char kernel_mapped;
|
fix and overhaul dlsym depedency order, always record direct deps
dlsym with an explicit handle is specified to use "dependency order",
a breadth-first search rooted at the argument. this has always been
implemented by iterating a flattened dependency list built at dlopen
time. however, the logic for building this list was completely wrong
except in trivial cases; it simply used the list of libraries loaded
since a given library, and their direct dependencies, as that
library's dependencies, which could result in misordering, wrongful
omission of deep dependencies from the search, and wrongful inclusion
of unrelated libraries in the search.
further, libraries did not have any recorded list of resolved
dependencies until they were explicitly dlopened, meaning that
DT_NEEDED entries had to be resolved again whenever a library
participated as a dependency of more than one dlopened library.
with this overhaul, the resolved direct dependency list of each
library is always recorded when it is first loaded, and can be
extended to a full flattened breadth-first search list if dlopen is
called on the library. the extension is performed using the direct
dependency list as a queue and appending copies of the direct
dependency list of each dependency in the queue, excluding duplicates,
until the end of the queue is reached. the direct deps remain
available for future use as the initial subarray of the full deps
array.
first-load logic in dlopen is updated to match these changes, and
clarified.
2019-02-27 02:05:19 +03:00
|
|
|
char mark;
|
|
|
|
char bfs_built;
|
2019-03-01 22:37:52 +03:00
|
|
|
char runtime_loaded;
|
2013-08-23 19:15:40 +04:00
|
|
|
struct dso **deps, *needed_by;
|
fix and overhaul dlsym depedency order, always record direct deps
dlsym with an explicit handle is specified to use "dependency order",
a breadth-first search rooted at the argument. this has always been
implemented by iterating a flattened dependency list built at dlopen
time. however, the logic for building this list was completely wrong
except in trivial cases; it simply used the list of libraries loaded
since a given library, and their direct dependencies, as that
library's dependencies, which could result in misordering, wrongful
omission of deep dependencies from the search, and wrongful inclusion
of unrelated libraries in the search.
further, libraries did not have any recorded list of resolved
dependencies until they were explicitly dlopened, meaning that
DT_NEEDED entries had to be resolved again whenever a library
participated as a dependency of more than one dlopened library.
with this overhaul, the resolved direct dependency list of each
library is always recorded when it is first loaded, and can be
extended to a full flattened breadth-first search list if dlopen is
called on the library. the extension is performed using the direct
dependency list as a queue and appending copies of the direct
dependency list of each dependency in the queue, excluding duplicates,
until the end of the queue is reached. the direct deps remain
available for future use as the initial subarray of the full deps
array.
first-load logic in dlopen is updated to match these changes, and
clarified.
2019-02-27 02:05:19 +03:00
|
|
|
size_t ndeps_direct;
|
overhaul shared library ctor execution for dependency order, concurrency
previously, shared library constructors at program start and dlopen
time were executed in reverse load order. some libraries, however,
rely on a depth-first dependency order, which most other dynamic
linker implementations provide. this is a much more reasonable, less
arbitrary order, and it turns out to have much better properties with
regard to how slow-running ctors affect multi-threaded programs, and
how recursive dlopen behaves.
this commit builds on previous work tracking direct dependencies of
each dso (commit 403555690775f7c8806372644f543518e6664e3b), and
performs a topological sort on the dependency graph at load time while
the main ldso lock is held and before success is committed, producing
a queue of constructors needed by the newly-loaded dso (or main
application). in the case of circular dependencies, the dependency
chain is simply broken at points where it becomes circular.
when the ctor queue is run, the init_fini_lock is held only for
iteration purposes; it's released during execution of each ctor, so
that arbitrarily-long-running application code no longer runs with a
lock held in the caller. this prevents a dlopen with slow ctors in one
thread from arbitrarily delaying other threads that call dlopen.
fully-independent ctors can run concurrently; when multiple threads
call dlopen with a shared dependency, one will end up executing the
ctor while the other waits on a condvar for it to finish.
another corner case improved by these changes is recursive dlopen
(call from a ctor). previously, recursive calls to dlopen could cause
a ctor for a library to be executed before the ctor for its
dependency, even when there was no relation between the calling
library and the library it was loading, simply due to the naive
reverse-load-order traversal. now, we can guarantee that recursive
dlopen in non-circular-dependency usage preserves the desired ctor
execution order properties, and that even in circular usage, at worst
the libraries whose ctors call dlopen will fail to have completed
construction when ctors that depend on them run.
init_fini_lock is changed to a normal, non-recursive mutex, since it
is no longer held while calling back into application code.
2019-03-02 05:06:23 +03:00
|
|
|
size_t next_dep;
|
|
|
|
int ctor_visitor;
|
2013-08-23 21:56:30 +04:00
|
|
|
char *rpath_orig, *rpath;
|
2015-11-12 23:50:26 +03:00
|
|
|
struct tls_module tls;
|
|
|
|
size_t tls_id;
|
2014-03-25 16:13:27 +04:00
|
|
|
size_t relro_start, relro_end;
|
combine arch ABI's DTP_OFFSET into DTV pointers
as explained in commit 6ba5517a460c6c438f64d69464fdfc3269a4c91a, some
archs use an offset (typicaly -0x8000) with their DTPOFF relocations,
which __tls_get_addr needs to invert. on affected archs, which lack
direct support for large immediates, this can cost multiple extra
instructions in the hot path. instead, incorporate the DTP_OFFSET into
the DTV entries. this means they are no longer valid pointers, so
store them as an array of uintptr_t rather than void *; this also
makes it easier to access slot 0 as a valid slot count.
commit e75b16cf93ebbc1ce758d3ea6b2923e8b2457c68 left behind cruft in
two places, __reset_tls and __tls_get_new, from back when it was
possible to have uninitialized gap slots indicated by a null pointer
in the DTV. since the concept of null pointer is no longer meaningful
with an offset applied, remove this cruft.
presently there are no archs with both TLSDESC and nonzero DTP_OFFSET,
but the dynamic TLSDESC relocation code is also updated to apply an
inverted offset to its offset field, so that the offset DTV would not
impose a runtime cost in TLSDESC resolver functions.
2018-10-12 07:30:34 +03:00
|
|
|
uintptr_t *new_dtv;
|
2012-10-05 19:51:50 +04:00
|
|
|
unsigned char *new_tls;
|
2014-06-19 10:01:06 +04:00
|
|
|
struct td_index *td_index;
|
2012-10-05 21:09:09 +04:00
|
|
|
struct dso *fini_next;
|
2012-05-28 00:01:44 +04:00
|
|
|
char *shortname;
|
2015-09-22 06:54:42 +03:00
|
|
|
#if DL_FDPIC
|
|
|
|
unsigned char *base;
|
|
|
|
#else
|
|
|
|
struct fdpic_loadmap *loadmap;
|
|
|
|
#endif
|
|
|
|
struct funcdesc {
|
|
|
|
void *addr;
|
|
|
|
size_t *got;
|
|
|
|
} *funcdescs;
|
|
|
|
size_t *got;
|
2011-06-27 01:39:17 +04:00
|
|
|
char buf[];
|
2011-06-19 03:48:42 +04:00
|
|
|
};
|
|
|
|
|
2012-10-05 06:48:33 +04:00
|
|
|
struct symdef {
|
|
|
|
Sym *sym;
|
|
|
|
struct dso *dso;
|
|
|
|
};
|
|
|
|
|
2020-01-01 05:59:07 +03:00
|
|
|
typedef void (*stage3_func)(size_t *, size_t *);
|
2020-01-01 05:51:07 +03:00
|
|
|
|
2015-03-06 21:27:08 +03:00
|
|
|
static struct builtin_tls {
|
|
|
|
char c;
|
|
|
|
struct pthread pt;
|
|
|
|
void *space[16];
|
|
|
|
} builtin_tls[1];
|
|
|
|
#define MIN_TLS_ALIGN offsetof(struct builtin_tls, pt)
|
|
|
|
|
reprocess all libc/ldso symbolic relocations in dynamic linking stage 3
commit f3ddd173806fd5c60b3f034528ca24542aecc5b9 introduced early
relocations and subsequent reprocessing as part of the dynamic linker
bootstrap overhaul, to allow use of arbitrary libc functions before
the main application and libraries are loaded, but only reprocessed
GOT/PLT relocation types.
commit c093e2e8201524db0d638920e76bcb6b1d925f3a added reprocessing of
non-GOT/PLT relocations to fix an actual regression that was observed
on powerpc, but only for RELA format tables with out-of-line addends.
REL table (inline addends at the relocation address) reprocessing is
trickier because the first relocation pass clobbers the addends.
this patch extends symbolic relocation reprocessing for libc/ldso to
support all relocation types, whether REL or RELA format tables are
used. it is believed not to alter behavior on any existing archs for
the current dynamic linker and libc code. the motivations for this
change are consistency and future-proofing. it ensures that behavior
does not differ depending on whether REL or RELA tables are used,
which could lead to undetected arch-specific bugs. it also ensures
that, if in the future code depending on additional relocation types
is added to libc.so, either at the source level or as part of the
compiler runtime that gets pulled in (for example, soft-float with TLS
for fenv), the new code will work properly.
the implementation concept is simple: stage 2 of the dynamic linker
counts the number of symbolic relocations in the libc/ldso REL table
and allocates a VLA to save their addends into; stage 3 then uses the
saved addends in place of the inline ones which were clobbered. for
stack safety, a hard limit (currently 4k) is imposed on the number of
such addends; this should be a couple orders of magnitude larger than
the actual need. this number is not a runtime variable that could
break fail-safety; it is constant for a given libc.so build.
2015-05-26 06:33:59 +03:00
|
|
|
#define ADDEND_LIMIT 4096
|
|
|
|
static size_t *saved_addends, *apply_addends_to;
|
|
|
|
|
2015-04-13 09:56:26 +03:00
|
|
|
static struct dso ldso;
|
2017-03-13 15:52:41 +03:00
|
|
|
static struct dso *head, *tail, *fini_head, *syms_tail, *lazy_head;
|
2013-08-23 19:15:40 +04:00
|
|
|
static char *env_path, *sys_path;
|
2012-11-01 05:27:48 +04:00
|
|
|
static unsigned long long gencnt;
|
2011-06-27 01:39:17 +04:00
|
|
|
static int runtime;
|
2012-05-28 00:01:44 +04:00
|
|
|
static int ldd_mode;
|
2012-08-19 00:00:23 +04:00
|
|
|
static int ldso_fail;
|
2013-01-24 07:07:45 +04:00
|
|
|
static int noload;
|
2019-03-02 06:47:29 +03:00
|
|
|
static int shutting_down;
|
2013-07-24 10:38:05 +04:00
|
|
|
static jmp_buf *rtld_fail;
|
2011-06-27 03:23:28 +04:00
|
|
|
static pthread_rwlock_t lock;
|
2012-04-25 08:05:42 +04:00
|
|
|
static struct debug debug;
|
2015-11-12 23:50:26 +03:00
|
|
|
static struct tls_module *tls_tail;
|
2015-03-06 21:27:08 +03:00
|
|
|
static size_t tls_cnt, tls_offset, tls_align = MIN_TLS_ALIGN;
|
2014-06-19 10:01:06 +04:00
|
|
|
static size_t static_tls_cnt;
|
overhaul shared library ctor execution for dependency order, concurrency
previously, shared library constructors at program start and dlopen
time were executed in reverse load order. some libraries, however,
rely on a depth-first dependency order, which most other dynamic
linker implementations provide. this is a much more reasonable, less
arbitrary order, and it turns out to have much better properties with
regard to how slow-running ctors affect multi-threaded programs, and
how recursive dlopen behaves.
this commit builds on previous work tracking direct dependencies of
each dso (commit 403555690775f7c8806372644f543518e6664e3b), and
performs a topological sort on the dependency graph at load time while
the main ldso lock is held and before success is committed, producing
a queue of constructors needed by the newly-loaded dso (or main
application). in the case of circular dependencies, the dependency
chain is simply broken at points where it becomes circular.
when the ctor queue is run, the init_fini_lock is held only for
iteration purposes; it's released during execution of each ctor, so
that arbitrarily-long-running application code no longer runs with a
lock held in the caller. this prevents a dlopen with slow ctors in one
thread from arbitrarily delaying other threads that call dlopen.
fully-independent ctors can run concurrently; when multiple threads
call dlopen with a shared dependency, one will end up executing the
ctor while the other waits on a condvar for it to finish.
another corner case improved by these changes is recursive dlopen
(call from a ctor). previously, recursive calls to dlopen could cause
a ctor for a library to be executed before the ctor for its
dependency, even when there was no relation between the calling
library and the library it was loading, simply due to the naive
reverse-load-order traversal. now, we can guarantee that recursive
dlopen in non-circular-dependency usage preserves the desired ctor
execution order properties, and that even in circular usage, at worst
the libraries whose ctors call dlopen will fail to have completed
construction when ctors that depend on them run.
init_fini_lock is changed to a normal, non-recursive mutex, since it
is no longer held while calling back into application code.
2019-03-02 05:06:23 +03:00
|
|
|
static pthread_mutex_t init_fini_lock;
|
|
|
|
static pthread_cond_t ctor_cond;
|
2019-03-03 20:12:59 +03:00
|
|
|
static struct dso *builtin_deps[2];
|
2019-03-03 20:42:34 +03:00
|
|
|
static struct dso *const no_deps[1];
|
2019-03-03 21:24:23 +03:00
|
|
|
static struct dso *builtin_ctor_queue[4];
|
overhaul shared library ctor execution for dependency order, concurrency
previously, shared library constructors at program start and dlopen
time were executed in reverse load order. some libraries, however,
rely on a depth-first dependency order, which most other dynamic
linker implementations provide. this is a much more reasonable, less
arbitrary order, and it turns out to have much better properties with
regard to how slow-running ctors affect multi-threaded programs, and
how recursive dlopen behaves.
this commit builds on previous work tracking direct dependencies of
each dso (commit 403555690775f7c8806372644f543518e6664e3b), and
performs a topological sort on the dependency graph at load time while
the main ldso lock is held and before success is committed, producing
a queue of constructors needed by the newly-loaded dso (or main
application). in the case of circular dependencies, the dependency
chain is simply broken at points where it becomes circular.
when the ctor queue is run, the init_fini_lock is held only for
iteration purposes; it's released during execution of each ctor, so
that arbitrarily-long-running application code no longer runs with a
lock held in the caller. this prevents a dlopen with slow ctors in one
thread from arbitrarily delaying other threads that call dlopen.
fully-independent ctors can run concurrently; when multiple threads
call dlopen with a shared dependency, one will end up executing the
ctor while the other waits on a condvar for it to finish.
another corner case improved by these changes is recursive dlopen
(call from a ctor). previously, recursive calls to dlopen could cause
a ctor for a library to be executed before the ctor for its
dependency, even when there was no relation between the calling
library and the library it was loading, simply due to the naive
reverse-load-order traversal. now, we can guarantee that recursive
dlopen in non-circular-dependency usage preserves the desired ctor
execution order properties, and that even in circular usage, at worst
the libraries whose ctors call dlopen will fail to have completed
construction when ctors that depend on them run.
init_fini_lock is changed to a normal, non-recursive mutex, since it
is no longer held while calling back into application code.
2019-03-02 05:06:23 +03:00
|
|
|
static struct dso **main_ctor_queue;
|
2015-09-22 06:54:42 +03:00
|
|
|
static struct fdpic_loadmap *app_loadmap;
|
|
|
|
static struct fdpic_dummy_loadmap app_dummy_loadmap;
|
2012-04-25 08:05:42 +04:00
|
|
|
|
|
|
|
struct debug *_dl_debug_addr = &debug;
|
2011-06-19 03:48:42 +04:00
|
|
|
|
2018-09-05 19:43:34 +03:00
|
|
|
extern hidden int __malloc_replaced;
|
2018-04-20 05:19:29 +03:00
|
|
|
|
2018-09-05 19:43:34 +03:00
|
|
|
hidden void (*const __init_array_start)(void)=0, (*const __fini_array_start)(void)=0;
|
remove undef weak refs to init/fini array symbols in libc.so
commit ad1cd43a86645ba2d4f7c8747240452a349d6bc1 eliminated
preprocessor-level omission of references to the init/fini array
symbols from object files going into libc.so. the references are weak,
and the intent was that the linker would resolve them to zero in
libc.so, but instead it leaves undefined references that could be
satisfied at runtime. normally these references would be harmless,
since the code using them does not even get executed, but some older
binutils versions produce a linking error: when linking a program
against libc.so, ld first tries to use the hidden init/fini array
symbols produced by the linker script to satisfy the references in
libc.so, then produces an error because the definitions are hidden.
ideally ld would have already provided definitions of these symbols
when linking libc.so, but the linker script for -shared omits them.
to avoid this situation, the dynamic linker now provides its own dummy
definitions of the init/fini array symbols for libc.so. since they are
hidden, everything binds at ld time and no references remain in the
dynamic symbol table. with modern binutils and --gc-sections, both
the dummy empty array objects and the code referencing them get
dropped at link time, anyway.
the _init and _fini symbols are also switched back to using weak
definitions rather than weak references since the latter behave
somewhat problematically in general, and the weak definition approach
was known to work well.
2015-11-20 04:28:08 +03:00
|
|
|
|
2018-09-05 19:43:34 +03:00
|
|
|
extern hidden void (*const __init_array_end)(void), (*const __fini_array_end)(void);
|
remove undef weak refs to init/fini array symbols in libc.so
commit ad1cd43a86645ba2d4f7c8747240452a349d6bc1 eliminated
preprocessor-level omission of references to the init/fini array
symbols from object files going into libc.so. the references are weak,
and the intent was that the linker would resolve them to zero in
libc.so, but instead it leaves undefined references that could be
satisfied at runtime. normally these references would be harmless,
since the code using them does not even get executed, but some older
binutils versions produce a linking error: when linking a program
against libc.so, ld first tries to use the hidden init/fini array
symbols produced by the linker script to satisfy the references in
libc.so, then produces an error because the definitions are hidden.
ideally ld would have already provided definitions of these symbols
when linking libc.so, but the linker script for -shared omits them.
to avoid this situation, the dynamic linker now provides its own dummy
definitions of the init/fini array symbols for libc.so. since they are
hidden, everything binds at ld time and no references remain in the
dynamic symbol table. with modern binutils and --gc-sections, both
the dummy empty array objects and the code referencing them get
dropped at link time, anyway.
the _init and _fini symbols are also switched back to using weak
definitions rather than weak references since the latter behave
somewhat problematically in general, and the weak definition approach
was known to work well.
2015-11-20 04:28:08 +03:00
|
|
|
|
|
|
|
weak_alias(__init_array_start, __init_array_end);
|
|
|
|
weak_alias(__fini_array_start, __fini_array_end);
|
|
|
|
|
2015-04-13 09:56:26 +03:00
|
|
|
static int dl_strcmp(const char *l, const char *r)
|
|
|
|
{
|
|
|
|
for (; *l==*r && *l; l++, r++);
|
|
|
|
return *(unsigned char *)l - *(unsigned char *)r;
|
|
|
|
}
|
|
|
|
#define strcmp(l,r) dl_strcmp(l,r)
|
2011-06-19 03:48:42 +04:00
|
|
|
|
2015-09-17 20:18:09 +03:00
|
|
|
/* Compute load address for a virtual address in a given dso. */
|
2015-09-22 23:20:39 +03:00
|
|
|
#if DL_FDPIC
|
2015-09-22 06:54:42 +03:00
|
|
|
static void *laddr(const struct dso *p, size_t v)
|
|
|
|
{
|
|
|
|
size_t j=0;
|
|
|
|
if (!p->loadmap) return p->base + v;
|
|
|
|
for (j=0; v-p->loadmap->segs[j].p_vaddr >= p->loadmap->segs[j].p_memsz; j++);
|
|
|
|
return (void *)(v - p->loadmap->segs[j].p_vaddr + p->loadmap->segs[j].addr);
|
|
|
|
}
|
2018-04-17 22:55:18 +03:00
|
|
|
static void *laddr_pg(const struct dso *p, size_t v)
|
|
|
|
{
|
|
|
|
size_t j=0;
|
|
|
|
size_t pgsz = PAGE_SIZE;
|
|
|
|
if (!p->loadmap) return p->base + v;
|
|
|
|
for (j=0; ; j++) {
|
|
|
|
size_t a = p->loadmap->segs[j].p_vaddr;
|
|
|
|
size_t b = a + p->loadmap->segs[j].p_memsz;
|
|
|
|
a &= -pgsz;
|
|
|
|
b += pgsz-1;
|
|
|
|
b &= -pgsz;
|
|
|
|
if (v-a<b-a) break;
|
|
|
|
}
|
|
|
|
return (void *)(v - p->loadmap->segs[j].p_vaddr + p->loadmap->segs[j].addr);
|
|
|
|
}
|
2020-01-01 08:15:04 +03:00
|
|
|
static void (*fdbarrier(void *p))()
|
|
|
|
{
|
|
|
|
void (*fd)();
|
|
|
|
__asm__("" : "=r"(fd) : "0"(p));
|
|
|
|
return fd;
|
|
|
|
}
|
|
|
|
#define fpaddr(p, v) fdbarrier((&(struct funcdesc){ \
|
|
|
|
laddr(p, v), (p)->got }))
|
2015-09-22 06:54:42 +03:00
|
|
|
#else
|
2015-09-17 20:18:09 +03:00
|
|
|
#define laddr(p, v) (void *)((p)->base + (v))
|
2018-04-17 22:55:18 +03:00
|
|
|
#define laddr_pg(p, v) laddr(p, v)
|
2015-09-22 06:54:42 +03:00
|
|
|
#define fpaddr(p, v) ((void (*)())laddr(p, v))
|
|
|
|
#endif
|
2015-09-17 20:18:09 +03:00
|
|
|
|
2011-06-19 03:48:42 +04:00
|
|
|
static void decode_vec(size_t *v, size_t *a, size_t cnt)
|
|
|
|
{
|
2015-04-13 09:56:26 +03:00
|
|
|
size_t i;
|
|
|
|
for (i=0; i<cnt; i++) a[i] = 0;
|
|
|
|
for (; v[0]; v+=2) if (v[0]-1<cnt-1) {
|
|
|
|
a[0] |= 1UL<<v[0];
|
2011-06-19 03:48:42 +04:00
|
|
|
a[v[0]] = v[1];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-08-26 01:13:28 +04:00
|
|
|
static int search_vec(size_t *v, size_t *r, size_t key)
|
|
|
|
{
|
|
|
|
for (; v[0]!=key; v+=2)
|
|
|
|
if (!v[0]) return 0;
|
|
|
|
*r = v[1];
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static uint32_t sysv_hash(const char *s0)
|
2011-06-19 03:48:42 +04:00
|
|
|
{
|
2012-01-17 09:34:58 +04:00
|
|
|
const unsigned char *s = (void *)s0;
|
2011-06-19 03:48:42 +04:00
|
|
|
uint_fast32_t h = 0;
|
|
|
|
while (*s) {
|
|
|
|
h = 16*h + *s++;
|
|
|
|
h ^= h>>24 & 0xf0;
|
|
|
|
}
|
|
|
|
return h & 0xfffffff;
|
|
|
|
}
|
|
|
|
|
2012-08-26 01:13:28 +04:00
|
|
|
static uint32_t gnu_hash(const char *s0)
|
|
|
|
{
|
|
|
|
const unsigned char *s = (void *)s0;
|
|
|
|
uint_fast32_t h = 5381;
|
|
|
|
for (; *s; s++)
|
2015-06-28 02:48:30 +03:00
|
|
|
h += h*32 + *s;
|
2012-08-26 01:13:28 +04:00
|
|
|
return h;
|
|
|
|
}
|
|
|
|
|
|
|
|
static Sym *sysv_lookup(const char *s, uint32_t h, struct dso *dso)
|
2011-06-19 03:48:42 +04:00
|
|
|
{
|
|
|
|
size_t i;
|
2012-08-05 10:38:35 +04:00
|
|
|
Sym *syms = dso->syms;
|
2016-11-11 20:30:24 +03:00
|
|
|
Elf_Symndx *hashtab = dso->hashtab;
|
2012-08-05 10:38:35 +04:00
|
|
|
char *strings = dso->strings;
|
2011-06-19 03:48:42 +04:00
|
|
|
for (i=hashtab[2+h%hashtab[0]]; i; i=hashtab[2+hashtab[0]+i]) {
|
2013-08-09 00:10:35 +04:00
|
|
|
if ((!dso->versym || dso->versym[i] >= 0)
|
|
|
|
&& (!strcmp(s, strings+syms[i].st_name)))
|
2011-06-19 03:48:42 +04:00
|
|
|
return syms+i;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2015-06-28 02:48:33 +03:00
|
|
|
static Sym *gnu_lookup(uint32_t h1, uint32_t *hashtab, struct dso *dso, const char *s)
|
2012-08-26 01:13:28 +04:00
|
|
|
{
|
|
|
|
uint32_t nbuckets = hashtab[0];
|
|
|
|
uint32_t *buckets = hashtab + 4 + hashtab[2]*(sizeof(size_t)/4);
|
2013-08-09 00:10:35 +04:00
|
|
|
uint32_t i = buckets[h1 % nbuckets];
|
2012-08-26 01:13:28 +04:00
|
|
|
|
2013-08-09 00:10:35 +04:00
|
|
|
if (!i) return 0;
|
2012-08-26 01:13:28 +04:00
|
|
|
|
2015-06-28 02:48:32 +03:00
|
|
|
uint32_t *hashval = buckets + nbuckets + (i - hashtab[1]);
|
2012-08-26 01:13:28 +04:00
|
|
|
|
2013-08-09 00:10:35 +04:00
|
|
|
for (h1 |= 1; ; i++) {
|
2015-06-28 02:48:32 +03:00
|
|
|
uint32_t h2 = *hashval++;
|
|
|
|
if ((h1 == (h2|1)) && (!dso->versym || dso->versym[i] >= 0)
|
|
|
|
&& !strcmp(s, dso->strings + dso->syms[i].st_name))
|
|
|
|
return dso->syms+i;
|
2012-08-26 01:13:28 +04:00
|
|
|
if (h2 & 1) break;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2015-06-28 02:48:33 +03:00
|
|
|
static Sym *gnu_lookup_filtered(uint32_t h1, uint32_t *hashtab, struct dso *dso, const char *s, uint32_t fofs, size_t fmask)
|
2015-06-28 02:48:31 +03:00
|
|
|
{
|
|
|
|
const size_t *bloomwords = (const void *)(hashtab+4);
|
|
|
|
size_t f = bloomwords[fofs & (hashtab[2]-1)];
|
|
|
|
if (!(f & fmask)) return 0;
|
|
|
|
|
|
|
|
f >>= (h1 >> hashtab[3]) % (8 * sizeof f);
|
|
|
|
if (!(f & 1)) return 0;
|
|
|
|
|
2015-06-28 02:48:33 +03:00
|
|
|
return gnu_lookup(h1, hashtab, dso, s);
|
2015-06-28 02:48:31 +03:00
|
|
|
}
|
|
|
|
|
2012-10-05 06:48:33 +04:00
|
|
|
#define OK_TYPES (1<<STT_NOTYPE | 1<<STT_OBJECT | 1<<STT_FUNC | 1<<STT_COMMON | 1<<STT_TLS)
|
2013-07-24 19:53:23 +04:00
|
|
|
#define OK_BINDS (1<<STB_GLOBAL | 1<<STB_WEAK | 1<<STB_GNU_UNIQUE)
|
2011-06-19 03:48:42 +04:00
|
|
|
|
fix regression in mips dynamic linker
this issue caused the address of functions in shared libraries to
resolve to their PLT thunks in the main program rather than their
correct addresses. it was observed causing crashes, though the
mechanism of the crash was not thoroughly investigated. since the
issue is very subtle, it calls for some explanation:
on all well-behaved archs, GOT entries that belong to the PLT use a
special relocation type, typically called JMP_SLOT, so that the
dynamic linker can avoid having the jump destinations for the PLT
resolve to PLT thunks themselves (they also provide a definition for
the symbol, which must be used whenever the address of the function is
taken so that all DSOs see the same address).
however, the traditional mips PIC ABI lacked such a JMP_SLOT
relocation type, presumably because, due to the way PIC works, the
address of the PLT thunk was never needed and could always be ignored.
prior to commit adf94c19666e687a728bbf398f9a88ea4ea19996, the mips
version of reloc.h contained a hack that caused all symbol lookups to
be treated like JMP_SLOT, inhibiting undefined symbols from ever being
used to resolve symbolic relocations. this hack goes all the way back
to commit babf820180368f00742ec65b2050a82380d7c542, when the mips
dynamic linker was first made usable.
during the recent refactoring to eliminate arch-specific relocation
processing (commit adf94c19666e687a728bbf398f9a88ea4ea19996), this
hack was overlooked and no equivalent functionality was provided in
the new code.
fixing the problem is not as simple as adding back an equivalent hack,
since there is now also a "non-PIC ABI" that can be used for the main
executable, which actually does use a PLT. the closest thing to
official documentation I could find for this ABI is nonpic.txt,
attached to Message-ID: 20080701202236.GA1534@caradoc.them.org, which
can be found in the gcc mailing list archives and elsewhere. per this
document, undefined symbols corresponding to PLT thunks have the
STO_MIPS_PLT bit set in the symbol's st_other field. thus, I have
added an arch-specific rule for mips, applied at the find_sym level
rather than the relocation level, to reject undefined symbols with the
STO_MIPS_PLT bit clear.
the previous hack of treating all mips relocations as JMP_SLOT-like,
rather than rejecting the unwanted symbols in find_sym, probably also
caused dlsym to wrongly return PLT thunks in place of the correct
address of a function under at least some conditions. this should now
be fixed, at least for global-scope symbol lookups.
2014-06-30 09:18:14 +04:00
|
|
|
#ifndef ARCH_SYM_REJECT_UND
|
|
|
|
#define ARCH_SYM_REJECT_UND(s) 0
|
|
|
|
#endif
|
|
|
|
|
2019-08-11 02:14:40 +03:00
|
|
|
#if defined(__GNUC__)
|
|
|
|
__attribute__((always_inline))
|
|
|
|
#endif
|
|
|
|
static inline struct symdef find_sym2(struct dso *dso, const char *s, int need_def, int use_deps)
|
2011-06-19 03:48:42 +04:00
|
|
|
{
|
2017-03-15 23:50:19 +03:00
|
|
|
uint32_t h = 0, gh = gnu_hash(s), gho = gh / (8*sizeof(size_t)), *ght;
|
|
|
|
size_t ghm = 1ul << gh % (8*sizeof(size_t));
|
2012-10-05 06:48:33 +04:00
|
|
|
struct symdef def = {0};
|
2019-08-11 02:14:40 +03:00
|
|
|
struct dso **deps = use_deps ? dso->deps : 0;
|
|
|
|
for (; dso; dso=use_deps ? *deps++ : dso->syms_next) {
|
2011-06-27 03:23:28 +04:00
|
|
|
Sym *sym;
|
2015-06-28 02:48:33 +03:00
|
|
|
if ((ght = dso->ghashtab)) {
|
|
|
|
sym = gnu_lookup_filtered(gh, ght, dso, s, gho, ghm);
|
2012-08-26 01:13:28 +04:00
|
|
|
} else {
|
|
|
|
if (!h) h = sysv_hash(s);
|
|
|
|
sym = sysv_lookup(s, h, dso);
|
|
|
|
}
|
2012-10-06 09:36:11 +04:00
|
|
|
if (!sym) continue;
|
|
|
|
if (!sym->st_shndx)
|
fix regression in mips dynamic linker
this issue caused the address of functions in shared libraries to
resolve to their PLT thunks in the main program rather than their
correct addresses. it was observed causing crashes, though the
mechanism of the crash was not thoroughly investigated. since the
issue is very subtle, it calls for some explanation:
on all well-behaved archs, GOT entries that belong to the PLT use a
special relocation type, typically called JMP_SLOT, so that the
dynamic linker can avoid having the jump destinations for the PLT
resolve to PLT thunks themselves (they also provide a definition for
the symbol, which must be used whenever the address of the function is
taken so that all DSOs see the same address).
however, the traditional mips PIC ABI lacked such a JMP_SLOT
relocation type, presumably because, due to the way PIC works, the
address of the PLT thunk was never needed and could always be ignored.
prior to commit adf94c19666e687a728bbf398f9a88ea4ea19996, the mips
version of reloc.h contained a hack that caused all symbol lookups to
be treated like JMP_SLOT, inhibiting undefined symbols from ever being
used to resolve symbolic relocations. this hack goes all the way back
to commit babf820180368f00742ec65b2050a82380d7c542, when the mips
dynamic linker was first made usable.
during the recent refactoring to eliminate arch-specific relocation
processing (commit adf94c19666e687a728bbf398f9a88ea4ea19996), this
hack was overlooked and no equivalent functionality was provided in
the new code.
fixing the problem is not as simple as adding back an equivalent hack,
since there is now also a "non-PIC ABI" that can be used for the main
executable, which actually does use a PLT. the closest thing to
official documentation I could find for this ABI is nonpic.txt,
attached to Message-ID: 20080701202236.GA1534@caradoc.them.org, which
can be found in the gcc mailing list archives and elsewhere. per this
document, undefined symbols corresponding to PLT thunks have the
STO_MIPS_PLT bit set in the symbol's st_other field. thus, I have
added an arch-specific rule for mips, applied at the find_sym level
rather than the relocation level, to reject undefined symbols with the
STO_MIPS_PLT bit clear.
the previous hack of treating all mips relocations as JMP_SLOT-like,
rather than rejecting the unwanted symbols in find_sym, probably also
caused dlsym to wrongly return PLT thunks in place of the correct
address of a function under at least some conditions. this should now
be fixed, at least for global-scope symbol lookups.
2014-06-30 09:18:14 +04:00
|
|
|
if (need_def || (sym->st_info&0xf) == STT_TLS
|
|
|
|
|| ARCH_SYM_REJECT_UND(sym))
|
2012-10-06 09:36:11 +04:00
|
|
|
continue;
|
|
|
|
if (!sym->st_value)
|
|
|
|
if ((sym->st_info&0xf) != STT_TLS)
|
|
|
|
continue;
|
|
|
|
if (!(1<<(sym->st_info&0xf) & OK_TYPES)) continue;
|
|
|
|
if (!(1<<(sym->st_info>>4) & OK_BINDS)) continue;
|
|
|
|
def.sym = sym;
|
|
|
|
def.dso = dso;
|
2016-12-03 23:52:43 +03:00
|
|
|
break;
|
2011-06-19 03:48:42 +04:00
|
|
|
}
|
2011-07-24 10:19:47 +04:00
|
|
|
return def;
|
2011-06-19 03:48:42 +04:00
|
|
|
}
|
|
|
|
|
2019-08-11 02:14:40 +03:00
|
|
|
static struct symdef find_sym(struct dso *dso, const char *s, int need_def)
|
|
|
|
{
|
|
|
|
return find_sym2(dso, s, need_def, 0);
|
|
|
|
}
|
|
|
|
|
2012-08-05 10:49:02 +04:00
|
|
|
static void do_relocs(struct dso *dso, size_t *rel, size_t rel_size, size_t stride)
|
2011-06-19 03:48:42 +04:00
|
|
|
{
|
2012-08-05 10:49:02 +04:00
|
|
|
unsigned char *base = dso->base;
|
|
|
|
Sym *syms = dso->syms;
|
|
|
|
char *strings = dso->strings;
|
2011-06-19 03:48:42 +04:00
|
|
|
Sym *sym;
|
|
|
|
const char *name;
|
|
|
|
void *ctx;
|
2015-04-13 09:56:26 +03:00
|
|
|
int type;
|
2011-06-19 03:48:42 +04:00
|
|
|
int sym_index;
|
2012-10-05 06:48:33 +04:00
|
|
|
struct symdef def;
|
2014-06-18 10:44:02 +04:00
|
|
|
size_t *reloc_addr;
|
|
|
|
size_t sym_val;
|
|
|
|
size_t tls_val;
|
|
|
|
size_t addend;
|
reprocess all libc/ldso symbolic relocations in dynamic linking stage 3
commit f3ddd173806fd5c60b3f034528ca24542aecc5b9 introduced early
relocations and subsequent reprocessing as part of the dynamic linker
bootstrap overhaul, to allow use of arbitrary libc functions before
the main application and libraries are loaded, but only reprocessed
GOT/PLT relocation types.
commit c093e2e8201524db0d638920e76bcb6b1d925f3a added reprocessing of
non-GOT/PLT relocations to fix an actual regression that was observed
on powerpc, but only for RELA format tables with out-of-line addends.
REL table (inline addends at the relocation address) reprocessing is
trickier because the first relocation pass clobbers the addends.
this patch extends symbolic relocation reprocessing for libc/ldso to
support all relocation types, whether REL or RELA format tables are
used. it is believed not to alter behavior on any existing archs for
the current dynamic linker and libc code. the motivations for this
change are consistency and future-proofing. it ensures that behavior
does not differ depending on whether REL or RELA tables are used,
which could lead to undetected arch-specific bugs. it also ensures
that, if in the future code depending on additional relocation types
is added to libc.so, either at the source level or as part of the
compiler runtime that gets pulled in (for example, soft-float with TLS
for fenv), the new code will work properly.
the implementation concept is simple: stage 2 of the dynamic linker
counts the number of symbolic relocations in the libc/ldso REL table
and allocates a VLA to save their addends into; stage 3 then uses the
saved addends in place of the inline ones which were clobbered. for
stack safety, a hard limit (currently 4k) is imposed on the number of
such addends; this should be a couple orders of magnitude larger than
the actual need. this number is not a runtime variable that could
break fail-safety; it is constant for a given libc.so build.
2015-05-26 06:33:59 +03:00
|
|
|
int skip_relative = 0, reuse_addends = 0, save_slot = 0;
|
|
|
|
|
|
|
|
if (dso == &ldso) {
|
|
|
|
/* Only ldso's REL table needs addend saving/reuse. */
|
|
|
|
if (rel == apply_addends_to)
|
|
|
|
reuse_addends = 1;
|
|
|
|
skip_relative = 1;
|
|
|
|
}
|
2011-06-19 03:48:42 +04:00
|
|
|
|
|
|
|
for (; rel_size; rel+=stride, rel_size-=stride*sizeof(size_t)) {
|
2015-09-22 06:54:42 +03:00
|
|
|
if (skip_relative && IS_RELATIVE(rel[1], dso->syms)) continue;
|
2015-04-13 09:56:26 +03:00
|
|
|
type = R_TYPE(rel[1]);
|
2015-06-04 18:45:17 +03:00
|
|
|
if (type == REL_NONE) continue;
|
2015-09-17 20:50:43 +03:00
|
|
|
reloc_addr = laddr(dso, rel[0]);
|
2017-03-13 07:30:26 +03:00
|
|
|
|
|
|
|
if (stride > 2) {
|
|
|
|
addend = rel[2];
|
|
|
|
} else if (type==REL_GOT || type==REL_PLT|| type==REL_COPY) {
|
|
|
|
addend = 0;
|
|
|
|
} else if (reuse_addends) {
|
|
|
|
/* Save original addend in stage 2 where the dso
|
|
|
|
* chain consists of just ldso; otherwise read back
|
|
|
|
* saved addend since the inline one was clobbered. */
|
|
|
|
if (head==&ldso)
|
|
|
|
saved_addends[save_slot] = *reloc_addr;
|
|
|
|
addend = saved_addends[save_slot++];
|
|
|
|
} else {
|
|
|
|
addend = *reloc_addr;
|
|
|
|
}
|
|
|
|
|
|
|
|
sym_index = R_SYM(rel[1]);
|
2011-06-19 03:48:42 +04:00
|
|
|
if (sym_index) {
|
|
|
|
sym = syms + sym_index;
|
|
|
|
name = strings + sym->st_name;
|
rework ldso handling of global symbol table for consistency
when loading libraries with dlopen, the caller can request that the
library's symbols become part of the global symbol table, or that they
only be used for resolving relocations in the loaded library and its
dependencies. in the latter case, a subsequent dlopen of the same
library can upgrade it to global status.
previously, if a library was upgraded from local to global mode, its
symbols entered the symbol lookup search order at the point where the
library was originally loaded. this means that a new call to dlopen
could change the value of a symbol that already had a visible
definition, an inconsistency which applications could observe.
POSIX is unclear whether this should happen or whether it's permitted
to happen, but the resolution of Austin Group issue #982 made it
formally unspecified.
with this patch, a library whose mode is upgraded from local to global
enters the symbol lookup order at the point where it was made global,
so that symbol resolution before and after the upgrade are consistent.
in order to implement this change, the per-dso global flag is replaced
with a separate set of linked-list pointers for participation in the
global symbol table. this permits the order of dso objects for symbol
resolution to differ from the order used for iteration of all loaded
libraries. it also improves performance of find_sym, by avoiding a
branch per iteration and skipping, and especially in the case where
many non-global libraries have been loaded, by allowing the loop to
skip over them entirely. logic for temporarily adding non-global
libraries to the symbol table for relocation purposes is also mildly
simplified.
2017-03-13 04:03:05 +03:00
|
|
|
ctx = type==REL_COPY ? head->syms_next : head;
|
2019-08-13 01:19:38 +03:00
|
|
|
def = (sym->st_info>>4) == STB_LOCAL
|
2015-09-22 06:54:42 +03:00
|
|
|
? (struct symdef){ .dso = dso, .sym = sym }
|
|
|
|
: find_sym(ctx, name, type==REL_PLT);
|
2014-01-21 09:36:35 +04:00
|
|
|
if (!def.sym && (sym->st_shndx != SHN_UNDEF
|
|
|
|
|| sym->st_info>>4 != STB_WEAK)) {
|
2017-03-13 15:52:41 +03:00
|
|
|
if (dso->lazy && (type==REL_PLT || type==REL_GOT)) {
|
|
|
|
dso->lazy[3*dso->lazy_cnt+0] = rel[0];
|
|
|
|
dso->lazy[3*dso->lazy_cnt+1] = rel[1];
|
|
|
|
dso->lazy[3*dso->lazy_cnt+2] = addend;
|
|
|
|
dso->lazy_cnt++;
|
|
|
|
continue;
|
|
|
|
}
|
2014-06-30 05:52:54 +04:00
|
|
|
error("Error relocating %s: %s: symbol not found",
|
2012-08-05 10:49:02 +04:00
|
|
|
dso->name, name);
|
2015-04-19 01:00:22 +03:00
|
|
|
if (runtime) longjmp(*rtld_fail, 1);
|
2012-08-19 00:00:23 +04:00
|
|
|
continue;
|
2011-06-27 01:39:17 +04:00
|
|
|
}
|
2012-08-05 22:03:17 +04:00
|
|
|
} else {
|
2012-10-05 06:48:33 +04:00
|
|
|
sym = 0;
|
|
|
|
def.sym = 0;
|
2014-06-18 10:44:02 +04:00
|
|
|
def.dso = dso;
|
|
|
|
}
|
|
|
|
|
2015-09-17 20:50:43 +03:00
|
|
|
sym_val = def.sym ? (size_t)laddr(def.dso, def.sym->st_value) : 0;
|
2014-06-18 10:44:02 +04:00
|
|
|
tls_val = def.sym ? def.sym->st_value : 0;
|
|
|
|
|
2018-07-16 19:32:57 +03:00
|
|
|
if ((type == REL_TPOFF || type == REL_TPOFF_NEG)
|
2019-08-11 18:57:38 +03:00
|
|
|
&& def.dso->tls_id > static_tls_cnt) {
|
2018-07-16 19:32:57 +03:00
|
|
|
error("Error relocating %s: %s: initial-exec TLS "
|
|
|
|
"resolves to dynamic definition in %s",
|
|
|
|
dso->name, name, def.dso->name);
|
|
|
|
longjmp(*rtld_fail, 1);
|
|
|
|
}
|
|
|
|
|
2014-06-18 10:44:02 +04:00
|
|
|
switch(type) {
|
2015-04-13 09:56:26 +03:00
|
|
|
case REL_NONE:
|
|
|
|
break;
|
2014-06-18 10:44:02 +04:00
|
|
|
case REL_OFFSET:
|
|
|
|
addend -= (size_t)reloc_addr;
|
|
|
|
case REL_SYMBOLIC:
|
|
|
|
case REL_GOT:
|
|
|
|
case REL_PLT:
|
|
|
|
*reloc_addr = sym_val + addend;
|
|
|
|
break;
|
add support for powerpc/powerpc64 unaligned relocations
R_PPC_UADDR32 (R_PPC64_UADDR64) has the same meaning as R_PPC_ADDR32
(R_PPC64_ADDR64), except that its address need not be aligned. For
powerpc64, BFD ld(1) will automatically convert between ADDR<->UADDR
relocations when the address is/isn't at its native alignment. This
will happen if, for example, there is a pointer in a packed struct.
gold and lld do not currently generate R_PPC64_UADDR64, but pass
through misaligned R_PPC64_ADDR64 relocations from object files,
possibly relaxing them to misaligned R_PPC64_RELATIVE. In both cases
(relaxed or not) this violates the PSABI, which defines the relevant
field type as "a 64-bit field occupying 8 bytes, the alignment of
which is 8 bytes unless otherwise specified."
All three linkers violate the PSABI on 32-bit powerpc, where the only
difference is that the field is 32 bits wide, aligned to 4 bytes.
Currently musl fails to load executables linked by BFD ld containing
R_PPC64_UADDR64, with the error "unsupported relocation type 43".
This change provides compatibility with BFD ld on powerpc64, and any
static linker on either architecture that starts following the PSABI
more closely.
2019-06-30 15:39:20 +03:00
|
|
|
case REL_USYMBOLIC:
|
|
|
|
memcpy(reloc_addr, &(size_t){sym_val + addend}, sizeof(size_t));
|
|
|
|
break;
|
2014-06-18 10:44:02 +04:00
|
|
|
case REL_RELATIVE:
|
|
|
|
*reloc_addr = (size_t)base + addend;
|
|
|
|
break;
|
|
|
|
case REL_SYM_OR_REL:
|
|
|
|
if (sym) *reloc_addr = sym_val + addend;
|
|
|
|
else *reloc_addr = (size_t)base + addend;
|
|
|
|
break;
|
|
|
|
case REL_COPY:
|
|
|
|
memcpy(reloc_addr, (void *)sym_val, sym->st_size);
|
|
|
|
break;
|
|
|
|
case REL_OFFSET32:
|
|
|
|
*(uint32_t *)reloc_addr = sym_val + addend
|
|
|
|
- (size_t)reloc_addr;
|
|
|
|
break;
|
2015-09-22 06:54:42 +03:00
|
|
|
case REL_FUNCDESC:
|
|
|
|
*reloc_addr = def.sym ? (size_t)(def.dso->funcdescs
|
|
|
|
+ (def.sym - def.dso->syms)) : 0;
|
|
|
|
break;
|
|
|
|
case REL_FUNCDESC_VAL:
|
|
|
|
if ((sym->st_info&0xf) == STT_SECTION) *reloc_addr += sym_val;
|
|
|
|
else *reloc_addr = sym_val;
|
|
|
|
reloc_addr[1] = def.sym ? (size_t)def.dso->got : 0;
|
|
|
|
break;
|
2014-06-18 10:44:02 +04:00
|
|
|
case REL_DTPMOD:
|
|
|
|
*reloc_addr = def.dso->tls_id;
|
|
|
|
break;
|
|
|
|
case REL_DTPOFF:
|
fix local-dynamic model TLS on mips and powerpc
the TLS ABI spec for mips, powerpc, and some other (presently
unsupported) RISC archs has the return value of __tls_get_addr offset
by +0x8000 and the result of DTPOFF relocations offset by -0x8000. I
had previously assumed this part of the ABI was actually just an
implementation detail, since the adjustments cancel out. however, when
the local dynamic model is used for accessing TLS that's known to be
in the same DSO, either of the following may happen:
1. the -0x8000 offset may already be applied to the argument structure
passed to __tls_get_addr at ld time, without any opportunity for
runtime relocations.
2. __tls_get_addr may be used with a zero offset argument to obtain a
base address for the module's TLS, to which the caller then applies
immediate offsets for individual objects accessed using the local
dynamic model. since the immediate offsets have the -0x8000 adjustment
applied to them, the base address they use needs to include the
+0x8000 offset.
it would be possible, but more complex, to store the pointers in the
dtv[] array with the +0x8000 offset pre-applied, to avoid the runtime
cost of adding 0x8000 on each call to __tls_get_addr. this change
could be made later if measurements show that it would help.
2015-06-26 01:22:00 +03:00
|
|
|
*reloc_addr = tls_val + addend - DTP_OFFSET;
|
2014-06-18 10:44:02 +04:00
|
|
|
break;
|
|
|
|
#ifdef TLS_ABOVE_TP
|
|
|
|
case REL_TPOFF:
|
2015-11-12 23:50:26 +03:00
|
|
|
*reloc_addr = tls_val + def.dso->tls.offset + TPOFF_K + addend;
|
2014-06-18 10:44:02 +04:00
|
|
|
break;
|
|
|
|
#else
|
|
|
|
case REL_TPOFF:
|
2015-11-12 23:50:26 +03:00
|
|
|
*reloc_addr = tls_val - def.dso->tls.offset + addend;
|
2014-06-18 10:44:02 +04:00
|
|
|
break;
|
|
|
|
case REL_TPOFF_NEG:
|
2015-11-12 23:50:26 +03:00
|
|
|
*reloc_addr = def.dso->tls.offset - tls_val + addend;
|
2014-06-18 10:44:02 +04:00
|
|
|
break;
|
|
|
|
#endif
|
2014-06-19 10:01:06 +04:00
|
|
|
case REL_TLSDESC:
|
|
|
|
if (stride<3) addend = reloc_addr[1];
|
2019-08-11 18:57:38 +03:00
|
|
|
if (def.dso->tls_id > static_tls_cnt) {
|
2014-06-19 10:01:06 +04:00
|
|
|
struct td_index *new = malloc(sizeof *new);
|
2015-04-19 01:00:22 +03:00
|
|
|
if (!new) {
|
|
|
|
error(
|
2014-06-19 10:01:06 +04:00
|
|
|
"Error relocating %s: cannot allocate TLSDESC for %s",
|
|
|
|
dso->name, sym ? name : "(local)" );
|
2015-04-21 20:22:48 +03:00
|
|
|
longjmp(*rtld_fail, 1);
|
2015-04-19 01:00:22 +03:00
|
|
|
}
|
2014-06-19 10:01:06 +04:00
|
|
|
new->next = dso->td_index;
|
|
|
|
dso->td_index = new;
|
|
|
|
new->args[0] = def.dso->tls_id;
|
combine arch ABI's DTP_OFFSET into DTV pointers
as explained in commit 6ba5517a460c6c438f64d69464fdfc3269a4c91a, some
archs use an offset (typicaly -0x8000) with their DTPOFF relocations,
which __tls_get_addr needs to invert. on affected archs, which lack
direct support for large immediates, this can cost multiple extra
instructions in the hot path. instead, incorporate the DTP_OFFSET into
the DTV entries. this means they are no longer valid pointers, so
store them as an array of uintptr_t rather than void *; this also
makes it easier to access slot 0 as a valid slot count.
commit e75b16cf93ebbc1ce758d3ea6b2923e8b2457c68 left behind cruft in
two places, __reset_tls and __tls_get_new, from back when it was
possible to have uninitialized gap slots indicated by a null pointer
in the DTV. since the concept of null pointer is no longer meaningful
with an offset applied, remove this cruft.
presently there are no archs with both TLSDESC and nonzero DTP_OFFSET,
but the dynamic TLSDESC relocation code is also updated to apply an
inverted offset to its offset field, so that the offset DTV would not
impose a runtime cost in TLSDESC resolver functions.
2018-10-12 07:30:34 +03:00
|
|
|
new->args[1] = tls_val + addend - DTP_OFFSET;
|
2014-06-19 10:01:06 +04:00
|
|
|
reloc_addr[0] = (size_t)__tlsdesc_dynamic;
|
|
|
|
reloc_addr[1] = (size_t)new;
|
|
|
|
} else {
|
|
|
|
reloc_addr[0] = (size_t)__tlsdesc_static;
|
|
|
|
#ifdef TLS_ABOVE_TP
|
2015-11-12 23:50:26 +03:00
|
|
|
reloc_addr[1] = tls_val + def.dso->tls.offset
|
2014-06-19 10:01:06 +04:00
|
|
|
+ TPOFF_K + addend;
|
|
|
|
#else
|
2015-11-12 23:50:26 +03:00
|
|
|
reloc_addr[1] = tls_val - def.dso->tls.offset
|
2014-06-19 10:01:06 +04:00
|
|
|
+ addend;
|
|
|
|
#endif
|
|
|
|
}
|
2018-10-02 01:37:02 +03:00
|
|
|
#ifdef TLSDESC_BACKWARDS
|
|
|
|
/* Some archs (32-bit ARM at least) invert the order of
|
|
|
|
* the descriptor members. Fix them up here. */
|
|
|
|
size_t tmp = reloc_addr[0];
|
|
|
|
reloc_addr[0] = reloc_addr[1];
|
|
|
|
reloc_addr[1] = tmp;
|
|
|
|
#endif
|
2014-06-19 10:01:06 +04:00
|
|
|
break;
|
2015-04-13 09:56:26 +03:00
|
|
|
default:
|
|
|
|
error("Error relocating %s: unsupported relocation type %d",
|
|
|
|
dso->name, type);
|
2015-04-19 01:00:22 +03:00
|
|
|
if (runtime) longjmp(*rtld_fail, 1);
|
2015-04-13 09:56:26 +03:00
|
|
|
continue;
|
2011-06-19 03:48:42 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-03-13 15:52:41 +03:00
|
|
|
static void redo_lazy_relocs()
|
|
|
|
{
|
|
|
|
struct dso *p = lazy_head, *next;
|
|
|
|
lazy_head = 0;
|
|
|
|
for (; p; p=next) {
|
|
|
|
next = p->lazy_next;
|
|
|
|
size_t size = p->lazy_cnt*3*sizeof(size_t);
|
|
|
|
p->lazy_cnt = 0;
|
|
|
|
do_relocs(p, p->lazy, size, 3);
|
|
|
|
if (p->lazy_cnt) {
|
|
|
|
p->lazy_next = lazy_head;
|
|
|
|
lazy_head = p;
|
|
|
|
} else {
|
|
|
|
free(p->lazy);
|
|
|
|
p->lazy = 0;
|
|
|
|
p->lazy_next = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-06-29 03:40:14 +04:00
|
|
|
/* A huge hack: to make up for the wastefulness of shared libraries
|
|
|
|
* needing at least a page of dirty memory even if they have no global
|
|
|
|
* data, we reclaim the gaps at the beginning and end of writable maps
|
2018-04-16 20:54:36 +03:00
|
|
|
* and "donate" them to the heap. */
|
2011-06-29 03:40:14 +04:00
|
|
|
|
2014-03-25 16:13:27 +04:00
|
|
|
static void reclaim(struct dso *dso, size_t start, size_t end)
|
2011-06-29 03:40:14 +04:00
|
|
|
{
|
2014-03-25 16:13:27 +04:00
|
|
|
if (start >= dso->relro_start && start < dso->relro_end) start = dso->relro_end;
|
|
|
|
if (end >= dso->relro_start && end < dso->relro_end) end = dso->relro_start;
|
2018-04-16 20:54:36 +03:00
|
|
|
if (start >= end) return;
|
2018-04-17 22:55:18 +03:00
|
|
|
char *base = laddr_pg(dso, start);
|
|
|
|
__malloc_donate(base, base+(end-start));
|
2011-06-29 03:40:14 +04:00
|
|
|
}
|
|
|
|
|
2014-03-25 22:59:50 +04:00
|
|
|
static void reclaim_gaps(struct dso *dso)
|
2011-06-29 03:40:14 +04:00
|
|
|
{
|
2014-03-26 00:21:50 +04:00
|
|
|
Phdr *ph = dso->phdr;
|
|
|
|
size_t phcnt = dso->phnum;
|
2014-03-25 22:59:50 +04:00
|
|
|
|
2014-03-26 00:21:50 +04:00
|
|
|
for (; phcnt--; ph=(void *)((char *)ph+dso->phentsize)) {
|
2011-06-29 03:40:14 +04:00
|
|
|
if (ph->p_type!=PT_LOAD) continue;
|
|
|
|
if ((ph->p_flags&(PF_R|PF_W))!=(PF_R|PF_W)) continue;
|
2014-03-25 16:13:27 +04:00
|
|
|
reclaim(dso, ph->p_vaddr & -PAGE_SIZE, ph->p_vaddr);
|
|
|
|
reclaim(dso, ph->p_vaddr+ph->p_memsz,
|
2011-06-29 03:40:14 +04:00
|
|
|
ph->p_vaddr+ph->p_memsz+PAGE_SIZE-1 & -PAGE_SIZE);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-06-23 07:03:42 +03:00
|
|
|
static void *mmap_fixed(void *p, size_t n, int prot, int flags, int fd, off_t off)
|
|
|
|
{
|
2015-11-12 01:34:17 +03:00
|
|
|
static int no_map_fixed;
|
|
|
|
char *q;
|
|
|
|
if (!no_map_fixed) {
|
|
|
|
q = mmap(p, n, prot, flags|MAP_FIXED, fd, off);
|
|
|
|
if (!DL_NOMMU_SUPPORT || q != MAP_FAILED || errno != EINVAL)
|
|
|
|
return q;
|
|
|
|
no_map_fixed = 1;
|
|
|
|
}
|
2015-06-23 07:03:42 +03:00
|
|
|
/* Fallbacks for MAP_FIXED failure on NOMMU kernels. */
|
|
|
|
if (flags & MAP_ANONYMOUS) {
|
|
|
|
memset(p, 0, n);
|
|
|
|
return p;
|
|
|
|
}
|
|
|
|
ssize_t r;
|
|
|
|
if (lseek(fd, off, SEEK_SET) < 0) return MAP_FAILED;
|
|
|
|
for (q=p; n; q+=r, off+=r, n-=r) {
|
|
|
|
r = read(fd, q, n);
|
|
|
|
if (r < 0 && errno != EINTR) return MAP_FAILED;
|
|
|
|
if (!r) {
|
|
|
|
memset(q, 0, n);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return p;
|
|
|
|
}
|
|
|
|
|
2015-09-22 22:12:48 +03:00
|
|
|
static void unmap_library(struct dso *dso)
|
|
|
|
{
|
|
|
|
if (dso->loadmap) {
|
|
|
|
size_t i;
|
|
|
|
for (i=0; i<dso->loadmap->nsegs; i++) {
|
|
|
|
if (!dso->loadmap->segs[i].p_memsz)
|
|
|
|
continue;
|
|
|
|
munmap((void *)dso->loadmap->segs[i].addr,
|
|
|
|
dso->loadmap->segs[i].p_memsz);
|
|
|
|
}
|
|
|
|
free(dso->loadmap);
|
|
|
|
} else if (dso->map && dso->map_len) {
|
|
|
|
munmap(dso->map, dso->map_len);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-10-05 04:04:13 +04:00
|
|
|
static void *map_library(int fd, struct dso *dso)
|
2011-06-19 03:48:42 +04:00
|
|
|
{
|
2011-06-25 20:26:08 +04:00
|
|
|
Ehdr buf[(896+sizeof(Ehdr))/sizeof(Ehdr)];
|
2013-08-02 17:56:49 +04:00
|
|
|
void *allocated_buf=0;
|
2011-06-19 03:48:42 +04:00
|
|
|
size_t phsize;
|
|
|
|
size_t addr_min=SIZE_MAX, addr_max=0, map_len;
|
|
|
|
size_t this_min, this_max;
|
2015-09-22 22:12:48 +03:00
|
|
|
size_t nsegs = 0;
|
2011-06-19 03:48:42 +04:00
|
|
|
off_t off_start;
|
|
|
|
Ehdr *eh;
|
2013-07-10 22:38:20 +04:00
|
|
|
Phdr *ph, *ph0;
|
2011-06-19 03:48:42 +04:00
|
|
|
unsigned prot;
|
2013-08-02 17:56:49 +04:00
|
|
|
unsigned char *map=MAP_FAILED, *base;
|
2013-08-02 17:25:12 +04:00
|
|
|
size_t dyn=0;
|
2012-10-05 04:04:13 +04:00
|
|
|
size_t tls_image=0;
|
2011-06-19 03:48:42 +04:00
|
|
|
size_t i;
|
|
|
|
|
|
|
|
ssize_t l = read(fd, buf, sizeof buf);
|
2011-06-25 20:26:08 +04:00
|
|
|
eh = buf;
|
2013-08-02 17:56:49 +04:00
|
|
|
if (l<0) return 0;
|
|
|
|
if (l<sizeof *eh || (eh->e_type != ET_DYN && eh->e_type != ET_EXEC))
|
|
|
|
goto noexec;
|
2011-06-19 03:48:42 +04:00
|
|
|
phsize = eh->e_phentsize * eh->e_phnum;
|
2013-08-02 17:56:49 +04:00
|
|
|
if (phsize > sizeof buf - sizeof *eh) {
|
|
|
|
allocated_buf = malloc(phsize);
|
|
|
|
if (!allocated_buf) return 0;
|
|
|
|
l = pread(fd, allocated_buf, phsize, eh->e_phoff);
|
|
|
|
if (l < 0) goto error;
|
|
|
|
if (l != phsize) goto noexec;
|
|
|
|
ph = ph0 = allocated_buf;
|
|
|
|
} else if (eh->e_phoff + phsize > l) {
|
2011-06-25 20:26:08 +04:00
|
|
|
l = pread(fd, buf+1, phsize, eh->e_phoff);
|
2013-08-02 17:56:49 +04:00
|
|
|
if (l < 0) goto error;
|
|
|
|
if (l != phsize) goto noexec;
|
2013-07-10 22:38:20 +04:00
|
|
|
ph = ph0 = (void *)(buf + 1);
|
|
|
|
} else {
|
|
|
|
ph = ph0 = (void *)((char *)buf + eh->e_phoff);
|
2011-06-19 03:48:42 +04:00
|
|
|
}
|
|
|
|
for (i=eh->e_phnum; i; i--, ph=(void *)((char *)ph+eh->e_phentsize)) {
|
2014-03-26 00:21:50 +04:00
|
|
|
if (ph->p_type == PT_DYNAMIC) {
|
2011-06-19 03:48:42 +04:00
|
|
|
dyn = ph->p_vaddr;
|
2014-03-26 00:21:50 +04:00
|
|
|
} else if (ph->p_type == PT_TLS) {
|
2012-10-05 04:04:13 +04:00
|
|
|
tls_image = ph->p_vaddr;
|
2015-11-12 23:50:26 +03:00
|
|
|
dso->tls.align = ph->p_align;
|
|
|
|
dso->tls.len = ph->p_filesz;
|
|
|
|
dso->tls.size = ph->p_memsz;
|
2014-03-25 16:13:27 +04:00
|
|
|
} else if (ph->p_type == PT_GNU_RELRO) {
|
|
|
|
dso->relro_start = ph->p_vaddr & -PAGE_SIZE;
|
|
|
|
dso->relro_end = (ph->p_vaddr + ph->p_memsz) & -PAGE_SIZE;
|
support setting of default thread stack size via PT_GNU_STACK header
this facilitates building software that assumes a large default stack
size without any patching to call pthread_setattr_default_np or
pthread_attr_setstacksize at each thread creation site, using just
LDFLAGS.
normally the PT_GNU_STACK header is used only to reflect whether
executable stack is desired, but with GNU ld at least, passing
-Wl,-z,stack-size=N will set a size on the program header. with this
patch, that size will be incorporated into the default stack size
(subject to increase-only rule and DEFAULT_STACK_MAX limit).
both static and dynamic linking honor the program header. for dynamic
linking, all libraries loaded at program start, including preloaded
ones, are considered. dlopened libraries are not considered, for
several reasons. extra logic would be needed to defer processing until
the load of the new library is commited, synchronization woud be
needed since other threads may be running concurrently, and the
effectiveness woud be limited since the larger size would not apply to
threads that already existed at the time of dlopen. programs that will
dlopen code expecting a large stack need to declare the requirement
themselves, or pthread_setattr_default_np can be used.
2018-09-19 06:54:18 +03:00
|
|
|
} else if (ph->p_type == PT_GNU_STACK) {
|
|
|
|
if (!runtime && ph->p_memsz > __default_stacksize) {
|
|
|
|
__default_stacksize =
|
|
|
|
ph->p_memsz < DEFAULT_STACK_MAX ?
|
|
|
|
ph->p_memsz : DEFAULT_STACK_MAX;
|
|
|
|
}
|
2012-10-05 04:04:13 +04:00
|
|
|
}
|
2011-06-19 03:48:42 +04:00
|
|
|
if (ph->p_type != PT_LOAD) continue;
|
2015-09-22 22:12:48 +03:00
|
|
|
nsegs++;
|
2011-06-19 03:48:42 +04:00
|
|
|
if (ph->p_vaddr < addr_min) {
|
|
|
|
addr_min = ph->p_vaddr;
|
|
|
|
off_start = ph->p_offset;
|
|
|
|
prot = (((ph->p_flags&PF_R) ? PROT_READ : 0) |
|
|
|
|
((ph->p_flags&PF_W) ? PROT_WRITE: 0) |
|
|
|
|
((ph->p_flags&PF_X) ? PROT_EXEC : 0));
|
|
|
|
}
|
|
|
|
if (ph->p_vaddr+ph->p_memsz > addr_max) {
|
|
|
|
addr_max = ph->p_vaddr+ph->p_memsz;
|
|
|
|
}
|
|
|
|
}
|
2013-08-02 17:56:49 +04:00
|
|
|
if (!dyn) goto noexec;
|
2015-09-22 22:12:48 +03:00
|
|
|
if (DL_FDPIC && !(eh->e_flags & FDPIC_CONSTDISP_FLAG)) {
|
|
|
|
dso->loadmap = calloc(1, sizeof *dso->loadmap
|
|
|
|
+ nsegs * sizeof *dso->loadmap->segs);
|
|
|
|
if (!dso->loadmap) goto error;
|
|
|
|
dso->loadmap->nsegs = nsegs;
|
|
|
|
for (ph=ph0, i=0; i<nsegs; ph=(void *)((char *)ph+eh->e_phentsize)) {
|
|
|
|
if (ph->p_type != PT_LOAD) continue;
|
|
|
|
prot = (((ph->p_flags&PF_R) ? PROT_READ : 0) |
|
|
|
|
((ph->p_flags&PF_W) ? PROT_WRITE: 0) |
|
|
|
|
((ph->p_flags&PF_X) ? PROT_EXEC : 0));
|
|
|
|
map = mmap(0, ph->p_memsz + (ph->p_vaddr & PAGE_SIZE-1),
|
2015-11-16 05:28:41 +03:00
|
|
|
prot, MAP_PRIVATE,
|
2015-09-22 22:12:48 +03:00
|
|
|
fd, ph->p_offset & -PAGE_SIZE);
|
|
|
|
if (map == MAP_FAILED) {
|
|
|
|
unmap_library(dso);
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
dso->loadmap->segs[i].addr = (size_t)map +
|
|
|
|
(ph->p_vaddr & PAGE_SIZE-1);
|
|
|
|
dso->loadmap->segs[i].p_vaddr = ph->p_vaddr;
|
|
|
|
dso->loadmap->segs[i].p_memsz = ph->p_memsz;
|
|
|
|
i++;
|
2015-10-29 04:45:31 +03:00
|
|
|
if (prot & PROT_WRITE) {
|
|
|
|
size_t brk = (ph->p_vaddr & PAGE_SIZE-1)
|
|
|
|
+ ph->p_filesz;
|
|
|
|
size_t pgbrk = brk + PAGE_SIZE-1 & -PAGE_SIZE;
|
|
|
|
size_t pgend = brk + ph->p_memsz - ph->p_filesz
|
|
|
|
+ PAGE_SIZE-1 & -PAGE_SIZE;
|
|
|
|
if (pgend > pgbrk && mmap_fixed(map+pgbrk,
|
|
|
|
pgend-pgbrk, prot,
|
|
|
|
MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS,
|
|
|
|
-1, off_start) == MAP_FAILED)
|
|
|
|
goto error;
|
|
|
|
memset(map + brk, 0, pgbrk-brk);
|
|
|
|
}
|
2015-09-22 22:12:48 +03:00
|
|
|
}
|
|
|
|
map = (void *)dso->loadmap->segs[0].addr;
|
|
|
|
map_len = 0;
|
|
|
|
goto done_mapping;
|
|
|
|
}
|
2011-06-19 03:48:42 +04:00
|
|
|
addr_max += PAGE_SIZE-1;
|
|
|
|
addr_max &= -PAGE_SIZE;
|
|
|
|
addr_min &= -PAGE_SIZE;
|
|
|
|
off_start &= -PAGE_SIZE;
|
|
|
|
map_len = addr_max - addr_min + off_start;
|
|
|
|
/* The first time, we map too much, possibly even more than
|
|
|
|
* the length of the file. This is okay because we will not
|
|
|
|
* use the invalid part; we just need to reserve the right
|
|
|
|
* amount of virtual address space to map over later. */
|
2015-11-12 01:34:17 +03:00
|
|
|
map = DL_NOMMU_SUPPORT
|
|
|
|
? mmap((void *)addr_min, map_len, PROT_READ|PROT_WRITE|PROT_EXEC,
|
|
|
|
MAP_PRIVATE|MAP_ANONYMOUS, -1, 0)
|
|
|
|
: mmap((void *)addr_min, map_len, prot,
|
|
|
|
MAP_PRIVATE, fd, off_start);
|
2013-08-02 17:56:49 +04:00
|
|
|
if (map==MAP_FAILED) goto error;
|
2015-09-22 22:12:48 +03:00
|
|
|
dso->map = map;
|
|
|
|
dso->map_len = map_len;
|
2013-07-31 22:42:08 +04:00
|
|
|
/* If the loaded file is not relocatable and the requested address is
|
|
|
|
* not available, then the load operation must fail. */
|
|
|
|
if (eh->e_type != ET_DYN && addr_min && map!=(void *)addr_min) {
|
|
|
|
errno = EBUSY;
|
|
|
|
goto error;
|
|
|
|
}
|
2011-06-19 03:48:42 +04:00
|
|
|
base = map - addr_min;
|
2013-07-10 22:38:20 +04:00
|
|
|
dso->phdr = 0;
|
|
|
|
dso->phnum = 0;
|
|
|
|
for (ph=ph0, i=eh->e_phnum; i; i--, ph=(void *)((char *)ph+eh->e_phentsize)) {
|
2011-06-19 03:48:42 +04:00
|
|
|
if (ph->p_type != PT_LOAD) continue;
|
2013-07-10 22:38:20 +04:00
|
|
|
/* Check if the programs headers are in this load segment, and
|
|
|
|
* if so, record the address for use by dl_iterate_phdr. */
|
|
|
|
if (!dso->phdr && eh->e_phoff >= ph->p_offset
|
|
|
|
&& eh->e_phoff+phsize <= ph->p_offset+ph->p_filesz) {
|
|
|
|
dso->phdr = (void *)(base + ph->p_vaddr
|
|
|
|
+ (eh->e_phoff-ph->p_offset));
|
|
|
|
dso->phnum = eh->e_phnum;
|
2014-03-25 22:59:50 +04:00
|
|
|
dso->phentsize = eh->e_phentsize;
|
2013-07-10 22:38:20 +04:00
|
|
|
}
|
2011-06-19 03:48:42 +04:00
|
|
|
this_min = ph->p_vaddr & -PAGE_SIZE;
|
|
|
|
this_max = ph->p_vaddr+ph->p_memsz+PAGE_SIZE-1 & -PAGE_SIZE;
|
|
|
|
off_start = ph->p_offset & -PAGE_SIZE;
|
|
|
|
prot = (((ph->p_flags&PF_R) ? PROT_READ : 0) |
|
|
|
|
((ph->p_flags&PF_W) ? PROT_WRITE: 0) |
|
|
|
|
((ph->p_flags&PF_X) ? PROT_EXEC : 0));
|
2018-06-26 19:15:13 +03:00
|
|
|
/* Reuse the existing mapping for the lowest-address LOAD */
|
|
|
|
if ((ph->p_vaddr & -PAGE_SIZE) != addr_min || DL_NOMMU_SUPPORT)
|
|
|
|
if (mmap_fixed(base+this_min, this_max-this_min, prot, MAP_PRIVATE|MAP_FIXED, fd, off_start) == MAP_FAILED)
|
|
|
|
goto error;
|
|
|
|
if (ph->p_memsz > ph->p_filesz && (ph->p_flags&PF_W)) {
|
2011-06-19 03:48:42 +04:00
|
|
|
size_t brk = (size_t)base+ph->p_vaddr+ph->p_filesz;
|
|
|
|
size_t pgbrk = brk+PAGE_SIZE-1 & -PAGE_SIZE;
|
|
|
|
memset((void *)brk, 0, pgbrk-brk & PAGE_SIZE-1);
|
2015-06-23 07:03:42 +03:00
|
|
|
if (pgbrk-(size_t)base < this_max && mmap_fixed((void *)pgbrk, (size_t)base+this_max-pgbrk, prot, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) == MAP_FAILED)
|
2012-06-06 19:21:28 +04:00
|
|
|
goto error;
|
2011-06-19 03:48:42 +04:00
|
|
|
}
|
|
|
|
}
|
2011-06-29 08:29:08 +04:00
|
|
|
for (i=0; ((size_t *)(base+dyn))[i]; i+=2)
|
|
|
|
if (((size_t *)(base+dyn))[i]==DT_TEXTREL) {
|
2015-06-17 20:21:46 +03:00
|
|
|
if (mprotect(map, map_len, PROT_READ|PROT_WRITE|PROT_EXEC)
|
|
|
|
&& errno != ENOSYS)
|
2012-06-06 19:21:28 +04:00
|
|
|
goto error;
|
2011-06-29 08:29:08 +04:00
|
|
|
break;
|
|
|
|
}
|
2015-09-22 22:12:48 +03:00
|
|
|
done_mapping:
|
2012-10-05 04:04:13 +04:00
|
|
|
dso->base = base;
|
2015-09-22 22:12:48 +03:00
|
|
|
dso->dynv = laddr(dso, dyn);
|
2015-11-12 23:50:26 +03:00
|
|
|
if (dso->tls.size) dso->tls.image = laddr(dso, tls_image);
|
2013-08-02 17:59:02 +04:00
|
|
|
free(allocated_buf);
|
2011-06-19 03:48:42 +04:00
|
|
|
return map;
|
2013-08-02 17:56:49 +04:00
|
|
|
noexec:
|
|
|
|
errno = ENOEXEC;
|
2012-06-06 19:21:28 +04:00
|
|
|
error:
|
2015-09-22 22:12:48 +03:00
|
|
|
if (map!=MAP_FAILED) unmap_library(dso);
|
2013-08-02 17:56:49 +04:00
|
|
|
free(allocated_buf);
|
2012-06-06 19:21:28 +04:00
|
|
|
return 0;
|
2011-06-19 03:48:42 +04:00
|
|
|
}
|
|
|
|
|
2013-04-20 19:51:58 +04:00
|
|
|
static int path_open(const char *name, const char *s, char *buf, size_t buf_size)
|
2011-06-25 09:56:34 +04:00
|
|
|
{
|
2013-04-20 19:51:58 +04:00
|
|
|
size_t l;
|
|
|
|
int fd;
|
2011-06-26 01:49:16 +04:00
|
|
|
for (;;) {
|
2013-04-20 19:51:58 +04:00
|
|
|
s += strspn(s, ":\n");
|
|
|
|
l = strcspn(s, ":\n");
|
|
|
|
if (l-1 >= INT_MAX) return -1;
|
2015-04-02 03:27:29 +03:00
|
|
|
if (snprintf(buf, buf_size, "%.*s/%s", (int)l, s, name) < buf_size) {
|
|
|
|
if ((fd = open(buf, O_RDONLY|O_CLOEXEC))>=0) return fd;
|
|
|
|
switch (errno) {
|
|
|
|
case ENOENT:
|
|
|
|
case ENOTDIR:
|
|
|
|
case EACCES:
|
|
|
|
case ENAMETOOLONG:
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
/* Any negative value but -1 will inhibit
|
|
|
|
* futher path search. */
|
|
|
|
return -2;
|
|
|
|
}
|
|
|
|
}
|
2011-06-26 01:49:16 +04:00
|
|
|
s += l;
|
2011-06-25 09:56:34 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-08-23 21:56:30 +04:00
|
|
|
static int fixup_rpath(struct dso *p, char *buf, size_t buf_size)
|
|
|
|
{
|
|
|
|
size_t n, l;
|
|
|
|
const char *s, *t, *origin;
|
|
|
|
char *d;
|
2015-04-03 23:35:43 +03:00
|
|
|
if (p->rpath || !p->rpath_orig) return 0;
|
2013-08-23 21:56:30 +04:00
|
|
|
if (!strchr(p->rpath_orig, '$')) {
|
|
|
|
p->rpath = p->rpath_orig;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
n = 0;
|
|
|
|
s = p->rpath_orig;
|
2013-08-23 23:51:59 +04:00
|
|
|
while ((t=strchr(s, '$'))) {
|
|
|
|
if (strncmp(t, "$ORIGIN", 7) && strncmp(t, "${ORIGIN}", 9))
|
2015-04-03 23:35:43 +03:00
|
|
|
return 0;
|
2013-08-23 21:56:30 +04:00
|
|
|
s = t+1;
|
|
|
|
n++;
|
|
|
|
}
|
2015-04-03 23:35:43 +03:00
|
|
|
if (n > SSIZE_MAX/PATH_MAX) return 0;
|
2013-08-23 21:56:30 +04:00
|
|
|
|
|
|
|
if (p->kernel_mapped) {
|
|
|
|
/* $ORIGIN searches cannot be performed for the main program
|
|
|
|
* when it is suid/sgid/AT_SECURE. This is because the
|
|
|
|
* pathname is under the control of the caller of execve.
|
|
|
|
* For libraries, however, $ORIGIN can be processed safely
|
|
|
|
* since the library's pathname came from a trusted source
|
|
|
|
* (either system paths or a call to dlopen). */
|
|
|
|
if (libc.secure)
|
2015-04-03 23:35:43 +03:00
|
|
|
return 0;
|
2013-08-23 23:51:59 +04:00
|
|
|
l = readlink("/proc/self/exe", buf, buf_size);
|
2015-04-03 23:35:43 +03:00
|
|
|
if (l == -1) switch (errno) {
|
|
|
|
case ENOENT:
|
|
|
|
case ENOTDIR:
|
|
|
|
case EACCES:
|
|
|
|
break;
|
|
|
|
default:
|
2013-08-23 21:56:30 +04:00
|
|
|
return -1;
|
2015-04-03 23:35:43 +03:00
|
|
|
}
|
|
|
|
if (l >= buf_size)
|
|
|
|
return 0;
|
2013-08-23 23:51:59 +04:00
|
|
|
buf[l] = 0;
|
2013-08-23 21:56:30 +04:00
|
|
|
origin = buf;
|
|
|
|
} else {
|
|
|
|
origin = p->name;
|
|
|
|
}
|
|
|
|
t = strrchr(origin, '/');
|
2018-02-07 22:27:08 +03:00
|
|
|
if (t) {
|
|
|
|
l = t-origin;
|
|
|
|
} else {
|
|
|
|
/* Normally p->name will always be an absolute or relative
|
|
|
|
* pathname containing at least one '/' character, but in the
|
|
|
|
* case where ldso was invoked as a command to execute a
|
|
|
|
* program in the working directory, app.name may not. Fix. */
|
|
|
|
origin = ".";
|
|
|
|
l = 1;
|
|
|
|
}
|
2018-02-07 22:31:42 +03:00
|
|
|
/* Disallow non-absolute origins for suid/sgid/AT_SECURE. */
|
|
|
|
if (libc.secure && *origin != '/')
|
|
|
|
return 0;
|
2013-08-23 21:56:30 +04:00
|
|
|
p->rpath = malloc(strlen(p->rpath_orig) + n*l + 1);
|
|
|
|
if (!p->rpath) return -1;
|
|
|
|
|
|
|
|
d = p->rpath;
|
|
|
|
s = p->rpath_orig;
|
2013-08-23 23:51:59 +04:00
|
|
|
while ((t=strchr(s, '$'))) {
|
2013-08-23 21:56:30 +04:00
|
|
|
memcpy(d, s, t-s);
|
|
|
|
d += t-s;
|
|
|
|
memcpy(d, origin, l);
|
|
|
|
d += l;
|
2013-08-23 23:51:59 +04:00
|
|
|
/* It was determined previously that the '$' is followed
|
|
|
|
* either by "ORIGIN" or "{ORIGIN}". */
|
2013-08-23 21:56:30 +04:00
|
|
|
s = t + 7 + 2*(t[1]=='{');
|
|
|
|
}
|
|
|
|
strcpy(d, s);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2012-01-23 09:57:38 +04:00
|
|
|
static void decode_dyn(struct dso *p)
|
|
|
|
{
|
2015-04-14 05:38:18 +03:00
|
|
|
size_t dyn[DYN_CNT];
|
2012-01-23 09:57:38 +04:00
|
|
|
decode_vec(p->dynv, dyn, DYN_CNT);
|
2015-09-17 20:18:09 +03:00
|
|
|
p->syms = laddr(p, dyn[DT_SYMTAB]);
|
|
|
|
p->strings = laddr(p, dyn[DT_STRTAB]);
|
2012-08-26 01:13:28 +04:00
|
|
|
if (dyn[0]&(1<<DT_HASH))
|
2015-09-17 20:18:09 +03:00
|
|
|
p->hashtab = laddr(p, dyn[DT_HASH]);
|
2013-08-23 19:15:40 +04:00
|
|
|
if (dyn[0]&(1<<DT_RPATH))
|
2015-09-17 22:21:55 +03:00
|
|
|
p->rpath_orig = p->strings + dyn[DT_RPATH];
|
2014-11-24 00:17:57 +03:00
|
|
|
if (dyn[0]&(1<<DT_RUNPATH))
|
2015-09-17 22:21:55 +03:00
|
|
|
p->rpath_orig = p->strings + dyn[DT_RUNPATH];
|
2015-09-22 06:54:42 +03:00
|
|
|
if (dyn[0]&(1<<DT_PLTGOT))
|
|
|
|
p->got = laddr(p, dyn[DT_PLTGOT]);
|
2012-08-26 01:13:28 +04:00
|
|
|
if (search_vec(p->dynv, dyn, DT_GNU_HASH))
|
2015-09-17 20:18:09 +03:00
|
|
|
p->ghashtab = laddr(p, *dyn);
|
2013-08-09 00:10:35 +04:00
|
|
|
if (search_vec(p->dynv, dyn, DT_VERSYM))
|
2015-09-17 20:18:09 +03:00
|
|
|
p->versym = laddr(p, *dyn);
|
2012-01-23 09:57:38 +04:00
|
|
|
}
|
|
|
|
|
2015-09-22 00:47:50 +03:00
|
|
|
static size_t count_syms(struct dso *p)
|
|
|
|
{
|
|
|
|
if (p->hashtab) return p->hashtab[1];
|
|
|
|
|
|
|
|
size_t nsym, i;
|
|
|
|
uint32_t *buckets = p->ghashtab + 4 + (p->ghashtab[2]*sizeof(size_t)/4);
|
|
|
|
uint32_t *hashval;
|
|
|
|
for (i = nsym = 0; i < p->ghashtab[0]; i++) {
|
|
|
|
if (buckets[i] > nsym)
|
|
|
|
nsym = buckets[i];
|
|
|
|
}
|
|
|
|
if (nsym) {
|
|
|
|
hashval = buckets + p->ghashtab[0] + (nsym - p->ghashtab[1]);
|
|
|
|
do nsym++;
|
|
|
|
while (!(*hashval++ & 1));
|
|
|
|
}
|
|
|
|
return nsym;
|
|
|
|
}
|
|
|
|
|
2015-09-22 06:54:42 +03:00
|
|
|
static void *dl_mmap(size_t n)
|
|
|
|
{
|
|
|
|
void *p;
|
|
|
|
int prot = PROT_READ|PROT_WRITE, flags = MAP_ANONYMOUS|MAP_PRIVATE;
|
|
|
|
#ifdef SYS_mmap2
|
|
|
|
p = (void *)__syscall(SYS_mmap2, 0, n, prot, flags, -1, 0);
|
|
|
|
#else
|
|
|
|
p = (void *)__syscall(SYS_mmap, 0, n, prot, flags, -1, 0);
|
|
|
|
#endif
|
2019-02-09 17:56:17 +03:00
|
|
|
return (unsigned long)p > -4096UL ? 0 : p;
|
2015-09-22 06:54:42 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
static void makefuncdescs(struct dso *p)
|
|
|
|
{
|
|
|
|
static int self_done;
|
|
|
|
size_t nsym = count_syms(p);
|
|
|
|
size_t i, size = nsym * sizeof(*p->funcdescs);
|
|
|
|
|
|
|
|
if (!self_done) {
|
|
|
|
p->funcdescs = dl_mmap(size);
|
|
|
|
self_done = 1;
|
|
|
|
} else {
|
|
|
|
p->funcdescs = malloc(size);
|
|
|
|
}
|
|
|
|
if (!p->funcdescs) {
|
|
|
|
if (!runtime) a_crash();
|
|
|
|
error("Error allocating function descriptors for %s", p->name);
|
|
|
|
longjmp(*rtld_fail, 1);
|
|
|
|
}
|
|
|
|
for (i=0; i<nsym; i++) {
|
|
|
|
if ((p->syms[i].st_info&0xf)==STT_FUNC && p->syms[i].st_shndx) {
|
|
|
|
p->funcdescs[i].addr = laddr(p, p->syms[i].st_value);
|
|
|
|
p->funcdescs[i].got = p->got;
|
|
|
|
} else {
|
|
|
|
p->funcdescs[i].addr = 0;
|
|
|
|
p->funcdescs[i].got = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-08-23 19:15:40 +04:00
|
|
|
static struct dso *load_library(const char *name, struct dso *needed_by)
|
2011-06-19 03:48:42 +04:00
|
|
|
{
|
2012-05-28 00:01:44 +04:00
|
|
|
char buf[2*NAME_MAX+2];
|
2012-07-11 09:41:20 +04:00
|
|
|
const char *pathname;
|
2012-12-16 08:34:08 +04:00
|
|
|
unsigned char *map;
|
2012-10-05 04:04:13 +04:00
|
|
|
struct dso *p, temp_dso = {0};
|
2011-06-19 03:48:42 +04:00
|
|
|
int fd;
|
|
|
|
struct stat st;
|
2012-10-05 19:51:50 +04:00
|
|
|
size_t alloc_size;
|
|
|
|
int n_th = 0;
|
2013-07-31 22:59:36 +04:00
|
|
|
int is_self = 0;
|
2011-06-19 03:48:42 +04:00
|
|
|
|
2014-07-11 08:29:44 +04:00
|
|
|
if (!*name) {
|
|
|
|
errno = EINVAL;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2011-06-19 03:48:42 +04:00
|
|
|
/* Catch and block attempts to reload the implementation itself */
|
|
|
|
if (name[0]=='l' && name[1]=='i' && name[2]=='b') {
|
2016-11-01 04:44:56 +03:00
|
|
|
static const char reserved[] =
|
2016-11-01 04:49:09 +03:00
|
|
|
"c.pthread.rt.m.dl.util.xnet.";
|
|
|
|
const char *rp, *next;
|
|
|
|
for (rp=reserved; *rp; rp=next) {
|
|
|
|
next = strchr(rp, '.') + 1;
|
|
|
|
if (strncmp(name+3, rp, next-rp) == 0)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (*rp) {
|
|
|
|
if (ldd_mode) {
|
|
|
|
/* Track which names have been resolved
|
|
|
|
* and only report each one once. */
|
|
|
|
static unsigned reported;
|
|
|
|
unsigned mask = 1U<<(rp-reserved);
|
|
|
|
if (!(reported & mask)) {
|
|
|
|
reported |= mask;
|
|
|
|
dprintf(1, "\t%s => %s (%p)\n",
|
|
|
|
name, ldso.name,
|
|
|
|
ldso.base);
|
2013-07-26 22:41:12 +04:00
|
|
|
}
|
2011-06-19 03:48:42 +04:00
|
|
|
}
|
2016-11-01 04:49:09 +03:00
|
|
|
is_self = 1;
|
2011-06-19 03:48:42 +04:00
|
|
|
}
|
|
|
|
}
|
2015-04-13 09:56:26 +03:00
|
|
|
if (!strcmp(name, ldso.name)) is_self = 1;
|
2013-07-31 22:59:36 +04:00
|
|
|
if (is_self) {
|
2015-04-13 09:56:26 +03:00
|
|
|
if (!ldso.prev) {
|
|
|
|
tail->next = &ldso;
|
|
|
|
ldso.prev = tail;
|
rework ldso handling of global symbol table for consistency
when loading libraries with dlopen, the caller can request that the
library's symbols become part of the global symbol table, or that they
only be used for resolving relocations in the loaded library and its
dependencies. in the latter case, a subsequent dlopen of the same
library can upgrade it to global status.
previously, if a library was upgraded from local to global mode, its
symbols entered the symbol lookup search order at the point where the
library was originally loaded. this means that a new call to dlopen
could change the value of a symbol that already had a visible
definition, an inconsistency which applications could observe.
POSIX is unclear whether this should happen or whether it's permitted
to happen, but the resolution of Austin Group issue #982 made it
formally unspecified.
with this patch, a library whose mode is upgraded from local to global
enters the symbol lookup order at the point where it was made global,
so that symbol resolution before and after the upgrade are consistent.
in order to implement this change, the per-dso global flag is replaced
with a separate set of linked-list pointers for participation in the
global symbol table. this permits the order of dso objects for symbol
resolution to differ from the order used for iteration of all loaded
libraries. it also improves performance of find_sym, by avoiding a
branch per iteration and skipping, and especially in the case where
many non-global libraries have been loaded, by allowing the loop to
skip over them entirely. logic for temporarily adding non-global
libraries to the symbol table for relocation purposes is also mildly
simplified.
2017-03-13 04:03:05 +03:00
|
|
|
tail = &ldso;
|
2013-07-31 22:59:36 +04:00
|
|
|
}
|
2015-04-13 09:56:26 +03:00
|
|
|
return &ldso;
|
2013-07-31 22:59:36 +04:00
|
|
|
}
|
2011-06-27 03:23:28 +04:00
|
|
|
if (strchr(name, '/')) {
|
2012-07-11 09:41:20 +04:00
|
|
|
pathname = name;
|
2012-09-30 01:59:50 +04:00
|
|
|
fd = open(name, O_RDONLY|O_CLOEXEC);
|
2011-06-19 03:48:42 +04:00
|
|
|
} else {
|
2012-07-11 09:41:20 +04:00
|
|
|
/* Search for the name to see if it's already loaded */
|
|
|
|
for (p=head->next; p; p=p->next) {
|
|
|
|
if (p->shortname && !strcmp(p->shortname, name)) {
|
|
|
|
return p;
|
|
|
|
}
|
|
|
|
}
|
2011-06-27 03:23:28 +04:00
|
|
|
if (strlen(name) > NAME_MAX) return 0;
|
2011-06-25 09:56:34 +04:00
|
|
|
fd = -1;
|
2013-08-02 18:02:29 +04:00
|
|
|
if (env_path) fd = path_open(name, env_path, buf, sizeof buf);
|
2015-04-03 23:35:43 +03:00
|
|
|
for (p=needed_by; fd == -1 && p; p=p->needed_by) {
|
|
|
|
if (fixup_rpath(p, buf, sizeof buf) < 0)
|
|
|
|
fd = -2; /* Inhibit further search. */
|
|
|
|
if (p->rpath)
|
2013-08-23 19:15:40 +04:00
|
|
|
fd = path_open(name, p->rpath, buf, sizeof buf);
|
2015-04-03 23:35:43 +03:00
|
|
|
}
|
2015-04-02 03:27:29 +03:00
|
|
|
if (fd == -1) {
|
2011-06-25 09:56:34 +04:00
|
|
|
if (!sys_path) {
|
2013-07-19 03:29:44 +04:00
|
|
|
char *prefix = 0;
|
|
|
|
size_t prefix_len;
|
2015-04-13 09:56:26 +03:00
|
|
|
if (ldso.name[0]=='/') {
|
2013-07-19 03:29:44 +04:00
|
|
|
char *s, *t, *z;
|
2015-04-13 09:56:26 +03:00
|
|
|
for (s=t=z=ldso.name; *s; s++)
|
2013-07-19 03:29:44 +04:00
|
|
|
if (*s=='/') z=t, t=s;
|
2015-04-13 09:56:26 +03:00
|
|
|
prefix_len = z-ldso.name;
|
2013-07-19 03:29:44 +04:00
|
|
|
if (prefix_len < PATH_MAX)
|
2015-04-13 09:56:26 +03:00
|
|
|
prefix = ldso.name;
|
2013-07-19 03:29:44 +04:00
|
|
|
}
|
|
|
|
if (!prefix) {
|
|
|
|
prefix = "";
|
|
|
|
prefix_len = 0;
|
|
|
|
}
|
|
|
|
char etc_ldso_path[prefix_len + 1
|
|
|
|
+ sizeof "/etc/ld-musl-" LDSO_ARCH ".path"];
|
|
|
|
snprintf(etc_ldso_path, sizeof etc_ldso_path,
|
|
|
|
"%.*s/etc/ld-musl-" LDSO_ARCH ".path",
|
|
|
|
(int)prefix_len, prefix);
|
|
|
|
FILE *f = fopen(etc_ldso_path, "rbe");
|
2011-06-25 09:56:34 +04:00
|
|
|
if (f) {
|
2013-06-26 18:17:29 +04:00
|
|
|
if (getdelim(&sys_path, (size_t[1]){0}, 0, f) <= 0) {
|
2013-06-26 18:51:36 +04:00
|
|
|
free(sys_path);
|
2013-06-26 18:17:29 +04:00
|
|
|
sys_path = "";
|
2012-11-09 22:49:40 +04:00
|
|
|
}
|
2011-06-25 09:56:34 +04:00
|
|
|
fclose(f);
|
2013-09-09 21:39:08 +04:00
|
|
|
} else if (errno != ENOENT) {
|
|
|
|
sys_path = "";
|
2011-06-25 09:56:34 +04:00
|
|
|
}
|
|
|
|
}
|
2012-11-09 07:41:16 +04:00
|
|
|
if (!sys_path) sys_path = "/lib:/usr/local/lib:/usr/lib";
|
|
|
|
fd = path_open(name, sys_path, buf, sizeof buf);
|
2011-06-19 03:48:42 +04:00
|
|
|
}
|
2012-07-11 09:41:20 +04:00
|
|
|
pathname = buf;
|
2011-06-19 03:48:42 +04:00
|
|
|
}
|
|
|
|
if (fd < 0) return 0;
|
|
|
|
if (fstat(fd, &st) < 0) {
|
|
|
|
close(fd);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
for (p=head->next; p; p=p->next) {
|
|
|
|
if (p->dev == st.st_dev && p->ino == st.st_ino) {
|
2012-07-11 09:41:20 +04:00
|
|
|
/* If this library was previously loaded with a
|
|
|
|
* pathname but a search found the same inode,
|
|
|
|
* setup its shortname so it can be found by name. */
|
2012-10-05 20:09:54 +04:00
|
|
|
if (!p->shortname && pathname != name)
|
|
|
|
p->shortname = strrchr(p->name, '/')+1;
|
2011-06-19 03:48:42 +04:00
|
|
|
close(fd);
|
|
|
|
return p;
|
|
|
|
}
|
|
|
|
}
|
2013-01-24 07:07:45 +04:00
|
|
|
map = noload ? 0 : map_library(fd, &temp_dso);
|
2011-06-19 03:48:42 +04:00
|
|
|
close(fd);
|
|
|
|
if (!map) return 0;
|
2012-10-05 19:51:50 +04:00
|
|
|
|
2017-03-15 01:51:27 +03:00
|
|
|
/* Avoid the danger of getting two versions of libc mapped into the
|
|
|
|
* same process when an absolute pathname was used. The symbols
|
|
|
|
* checked are chosen to catch both musl and glibc, and to avoid
|
|
|
|
* false positives from interposition-hack libraries. */
|
|
|
|
decode_dyn(&temp_dso);
|
|
|
|
if (find_sym(&temp_dso, "__libc_start_main", 1).sym &&
|
|
|
|
find_sym(&temp_dso, "stdin", 1).sym) {
|
|
|
|
unmap_library(&temp_dso);
|
|
|
|
return load_library("libc.so", needed_by);
|
|
|
|
}
|
2017-11-13 23:27:10 +03:00
|
|
|
/* Past this point, if we haven't reached runtime yet, ldso has
|
|
|
|
* committed either to use the mapped library or to abort execution.
|
|
|
|
* Unmapping is not possible, so we can safely reclaim gaps. */
|
|
|
|
if (!runtime) reclaim_gaps(&temp_dso);
|
2017-03-15 01:51:27 +03:00
|
|
|
|
2012-10-05 19:51:50 +04:00
|
|
|
/* Allocate storage for the new DSO. When there is TLS, this
|
|
|
|
* storage must include a reservation for all pre-existing
|
|
|
|
* threads to obtain copies of both the new TLS, and an
|
|
|
|
* extended DTV capable of storing an additional slot for
|
|
|
|
* the newly-loaded DSO. */
|
|
|
|
alloc_size = sizeof *p + strlen(pathname) + 1;
|
2015-11-12 23:50:26 +03:00
|
|
|
if (runtime && temp_dso.tls.image) {
|
|
|
|
size_t per_th = temp_dso.tls.size + temp_dso.tls.align
|
2012-10-05 19:51:50 +04:00
|
|
|
+ sizeof(void *) * (tls_cnt+3);
|
2012-10-14 07:25:20 +04:00
|
|
|
n_th = libc.threads_minus_1 + 1;
|
2012-10-05 19:51:50 +04:00
|
|
|
if (n_th > SSIZE_MAX / per_th) alloc_size = SIZE_MAX;
|
|
|
|
else alloc_size += n_th * per_th;
|
|
|
|
}
|
|
|
|
p = calloc(1, alloc_size);
|
2011-06-19 03:48:42 +04:00
|
|
|
if (!p) {
|
2015-09-22 22:12:48 +03:00
|
|
|
unmap_library(&temp_dso);
|
2011-06-19 03:48:42 +04:00
|
|
|
return 0;
|
|
|
|
}
|
2012-10-05 04:04:13 +04:00
|
|
|
memcpy(p, &temp_dso, sizeof temp_dso);
|
2011-06-19 03:48:42 +04:00
|
|
|
p->dev = st.st_dev;
|
|
|
|
p->ino = st.st_ino;
|
2013-08-23 19:15:40 +04:00
|
|
|
p->needed_by = needed_by;
|
2011-06-27 01:39:17 +04:00
|
|
|
p->name = p->buf;
|
2019-03-01 22:37:52 +03:00
|
|
|
p->runtime_loaded = runtime;
|
2012-07-11 09:41:20 +04:00
|
|
|
strcpy(p->name, pathname);
|
|
|
|
/* Add a shortname only if name arg was not an explicit pathname. */
|
|
|
|
if (pathname != name) p->shortname = strrchr(p->name, '/')+1;
|
2015-11-12 23:50:26 +03:00
|
|
|
if (p->tls.image) {
|
2012-10-05 19:51:50 +04:00
|
|
|
p->tls_id = ++tls_cnt;
|
2015-11-12 23:50:26 +03:00
|
|
|
tls_align = MAXP2(tls_align, p->tls.align);
|
2012-10-16 02:51:53 +04:00
|
|
|
#ifdef TLS_ABOVE_TP
|
2019-05-13 21:47:11 +03:00
|
|
|
p->tls.offset = tls_offset + ( (p->tls.align-1) &
|
|
|
|
(-tls_offset + (uintptr_t)p->tls.image) );
|
2019-05-16 20:15:33 +03:00
|
|
|
tls_offset = p->tls.offset + p->tls.size;
|
2012-10-16 02:51:53 +04:00
|
|
|
#else
|
2015-11-12 23:50:26 +03:00
|
|
|
tls_offset += p->tls.size + p->tls.align - 1;
|
|
|
|
tls_offset -= (tls_offset + (uintptr_t)p->tls.image)
|
|
|
|
& (p->tls.align-1);
|
|
|
|
p->tls.offset = tls_offset;
|
2012-10-16 02:51:53 +04:00
|
|
|
#endif
|
2012-10-05 19:51:50 +04:00
|
|
|
p->new_dtv = (void *)(-sizeof(size_t) &
|
|
|
|
(uintptr_t)(p->name+strlen(p->name)+sizeof(size_t)));
|
|
|
|
p->new_tls = (void *)(p->new_dtv + n_th*(tls_cnt+1));
|
2015-11-12 23:50:26 +03:00
|
|
|
if (tls_tail) tls_tail->next = &p->tls;
|
|
|
|
else libc.tls_head = &p->tls;
|
|
|
|
tls_tail = &p->tls;
|
2012-10-05 19:51:50 +04:00
|
|
|
}
|
2011-06-19 03:48:42 +04:00
|
|
|
|
|
|
|
tail->next = p;
|
|
|
|
p->prev = tail;
|
|
|
|
tail = p;
|
|
|
|
|
2015-09-22 06:54:42 +03:00
|
|
|
if (DL_FDPIC) makefuncdescs(p);
|
|
|
|
|
2012-12-16 08:34:08 +04:00
|
|
|
if (ldd_mode) dprintf(1, "\t%s => %s (%p)\n", name, pathname, p->base);
|
2012-05-28 00:01:44 +04:00
|
|
|
|
2011-06-19 03:48:42 +04:00
|
|
|
return p;
|
|
|
|
}
|
|
|
|
|
fix and overhaul dlsym depedency order, always record direct deps
dlsym with an explicit handle is specified to use "dependency order",
a breadth-first search rooted at the argument. this has always been
implemented by iterating a flattened dependency list built at dlopen
time. however, the logic for building this list was completely wrong
except in trivial cases; it simply used the list of libraries loaded
since a given library, and their direct dependencies, as that
library's dependencies, which could result in misordering, wrongful
omission of deep dependencies from the search, and wrongful inclusion
of unrelated libraries in the search.
further, libraries did not have any recorded list of resolved
dependencies until they were explicitly dlopened, meaning that
DT_NEEDED entries had to be resolved again whenever a library
participated as a dependency of more than one dlopened library.
with this overhaul, the resolved direct dependency list of each
library is always recorded when it is first loaded, and can be
extended to a full flattened breadth-first search list if dlopen is
called on the library. the extension is performed using the direct
dependency list as a queue and appending copies of the direct
dependency list of each dependency in the queue, excluding duplicates,
until the end of the queue is reached. the direct deps remain
available for future use as the initial subarray of the full deps
array.
first-load logic in dlopen is updated to match these changes, and
clarified.
2019-02-27 02:05:19 +03:00
|
|
|
static void load_direct_deps(struct dso *p)
|
|
|
|
{
|
2019-03-01 23:09:16 +03:00
|
|
|
size_t i, cnt=0;
|
2019-03-03 20:12:59 +03:00
|
|
|
|
fix and overhaul dlsym depedency order, always record direct deps
dlsym with an explicit handle is specified to use "dependency order",
a breadth-first search rooted at the argument. this has always been
implemented by iterating a flattened dependency list built at dlopen
time. however, the logic for building this list was completely wrong
except in trivial cases; it simply used the list of libraries loaded
since a given library, and their direct dependencies, as that
library's dependencies, which could result in misordering, wrongful
omission of deep dependencies from the search, and wrongful inclusion
of unrelated libraries in the search.
further, libraries did not have any recorded list of resolved
dependencies until they were explicitly dlopened, meaning that
DT_NEEDED entries had to be resolved again whenever a library
participated as a dependency of more than one dlopened library.
with this overhaul, the resolved direct dependency list of each
library is always recorded when it is first loaded, and can be
extended to a full flattened breadth-first search list if dlopen is
called on the library. the extension is performed using the direct
dependency list as a queue and appending copies of the direct
dependency list of each dependency in the queue, excluding duplicates,
until the end of the queue is reached. the direct deps remain
available for future use as the initial subarray of the full deps
array.
first-load logic in dlopen is updated to match these changes, and
clarified.
2019-02-27 02:05:19 +03:00
|
|
|
if (p->deps) return;
|
2019-03-01 23:09:16 +03:00
|
|
|
/* For head, all preloads are direct pseudo-dependencies.
|
|
|
|
* Count and include them now to avoid realloc later. */
|
|
|
|
if (p==head) for (struct dso *q=p->next; q; q=q->next)
|
|
|
|
cnt++;
|
|
|
|
for (i=0; p->dynv[i]; i+=2)
|
fix and overhaul dlsym depedency order, always record direct deps
dlsym with an explicit handle is specified to use "dependency order",
a breadth-first search rooted at the argument. this has always been
implemented by iterating a flattened dependency list built at dlopen
time. however, the logic for building this list was completely wrong
except in trivial cases; it simply used the list of libraries loaded
since a given library, and their direct dependencies, as that
library's dependencies, which could result in misordering, wrongful
omission of deep dependencies from the search, and wrongful inclusion
of unrelated libraries in the search.
further, libraries did not have any recorded list of resolved
dependencies until they were explicitly dlopened, meaning that
DT_NEEDED entries had to be resolved again whenever a library
participated as a dependency of more than one dlopened library.
with this overhaul, the resolved direct dependency list of each
library is always recorded when it is first loaded, and can be
extended to a full flattened breadth-first search list if dlopen is
called on the library. the extension is performed using the direct
dependency list as a queue and appending copies of the direct
dependency list of each dependency in the queue, excluding duplicates,
until the end of the queue is reached. the direct deps remain
available for future use as the initial subarray of the full deps
array.
first-load logic in dlopen is updated to match these changes, and
clarified.
2019-02-27 02:05:19 +03:00
|
|
|
if (p->dynv[i] == DT_NEEDED) cnt++;
|
2019-03-03 20:12:59 +03:00
|
|
|
/* Use builtin buffer for apps with no external deps, to
|
|
|
|
* preserve property of no runtime failure paths. */
|
|
|
|
p->deps = (p==head && cnt<2) ? builtin_deps :
|
|
|
|
calloc(cnt+1, sizeof *p->deps);
|
fix and overhaul dlsym depedency order, always record direct deps
dlsym with an explicit handle is specified to use "dependency order",
a breadth-first search rooted at the argument. this has always been
implemented by iterating a flattened dependency list built at dlopen
time. however, the logic for building this list was completely wrong
except in trivial cases; it simply used the list of libraries loaded
since a given library, and their direct dependencies, as that
library's dependencies, which could result in misordering, wrongful
omission of deep dependencies from the search, and wrongful inclusion
of unrelated libraries in the search.
further, libraries did not have any recorded list of resolved
dependencies until they were explicitly dlopened, meaning that
DT_NEEDED entries had to be resolved again whenever a library
participated as a dependency of more than one dlopened library.
with this overhaul, the resolved direct dependency list of each
library is always recorded when it is first loaded, and can be
extended to a full flattened breadth-first search list if dlopen is
called on the library. the extension is performed using the direct
dependency list as a queue and appending copies of the direct
dependency list of each dependency in the queue, excluding duplicates,
until the end of the queue is reached. the direct deps remain
available for future use as the initial subarray of the full deps
array.
first-load logic in dlopen is updated to match these changes, and
clarified.
2019-02-27 02:05:19 +03:00
|
|
|
if (!p->deps) {
|
|
|
|
error("Error loading dependencies for %s", p->name);
|
|
|
|
if (runtime) longjmp(*rtld_fail, 1);
|
|
|
|
}
|
2019-03-01 23:09:16 +03:00
|
|
|
cnt=0;
|
|
|
|
if (p==head) for (struct dso *q=p->next; q; q=q->next)
|
|
|
|
p->deps[cnt++] = q;
|
|
|
|
for (i=0; p->dynv[i]; i+=2) {
|
fix and overhaul dlsym depedency order, always record direct deps
dlsym with an explicit handle is specified to use "dependency order",
a breadth-first search rooted at the argument. this has always been
implemented by iterating a flattened dependency list built at dlopen
time. however, the logic for building this list was completely wrong
except in trivial cases; it simply used the list of libraries loaded
since a given library, and their direct dependencies, as that
library's dependencies, which could result in misordering, wrongful
omission of deep dependencies from the search, and wrongful inclusion
of unrelated libraries in the search.
further, libraries did not have any recorded list of resolved
dependencies until they were explicitly dlopened, meaning that
DT_NEEDED entries had to be resolved again whenever a library
participated as a dependency of more than one dlopened library.
with this overhaul, the resolved direct dependency list of each
library is always recorded when it is first loaded, and can be
extended to a full flattened breadth-first search list if dlopen is
called on the library. the extension is performed using the direct
dependency list as a queue and appending copies of the direct
dependency list of each dependency in the queue, excluding duplicates,
until the end of the queue is reached. the direct deps remain
available for future use as the initial subarray of the full deps
array.
first-load logic in dlopen is updated to match these changes, and
clarified.
2019-02-27 02:05:19 +03:00
|
|
|
if (p->dynv[i] != DT_NEEDED) continue;
|
|
|
|
struct dso *dep = load_library(p->strings + p->dynv[i+1], p);
|
|
|
|
if (!dep) {
|
|
|
|
error("Error loading shared library %s: %m (needed by %s)",
|
|
|
|
p->strings + p->dynv[i+1], p->name);
|
|
|
|
if (runtime) longjmp(*rtld_fail, 1);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
p->deps[cnt++] = dep;
|
|
|
|
}
|
2019-03-01 23:09:16 +03:00
|
|
|
p->deps[cnt] = 0;
|
fix and overhaul dlsym depedency order, always record direct deps
dlsym with an explicit handle is specified to use "dependency order",
a breadth-first search rooted at the argument. this has always been
implemented by iterating a flattened dependency list built at dlopen
time. however, the logic for building this list was completely wrong
except in trivial cases; it simply used the list of libraries loaded
since a given library, and their direct dependencies, as that
library's dependencies, which could result in misordering, wrongful
omission of deep dependencies from the search, and wrongful inclusion
of unrelated libraries in the search.
further, libraries did not have any recorded list of resolved
dependencies until they were explicitly dlopened, meaning that
DT_NEEDED entries had to be resolved again whenever a library
participated as a dependency of more than one dlopened library.
with this overhaul, the resolved direct dependency list of each
library is always recorded when it is first loaded, and can be
extended to a full flattened breadth-first search list if dlopen is
called on the library. the extension is performed using the direct
dependency list as a queue and appending copies of the direct
dependency list of each dependency in the queue, excluding duplicates,
until the end of the queue is reached. the direct deps remain
available for future use as the initial subarray of the full deps
array.
first-load logic in dlopen is updated to match these changes, and
clarified.
2019-02-27 02:05:19 +03:00
|
|
|
p->ndeps_direct = cnt;
|
|
|
|
}
|
|
|
|
|
2011-06-19 03:48:42 +04:00
|
|
|
static void load_deps(struct dso *p)
|
|
|
|
{
|
fix and overhaul dlsym depedency order, always record direct deps
dlsym with an explicit handle is specified to use "dependency order",
a breadth-first search rooted at the argument. this has always been
implemented by iterating a flattened dependency list built at dlopen
time. however, the logic for building this list was completely wrong
except in trivial cases; it simply used the list of libraries loaded
since a given library, and their direct dependencies, as that
library's dependencies, which could result in misordering, wrongful
omission of deep dependencies from the search, and wrongful inclusion
of unrelated libraries in the search.
further, libraries did not have any recorded list of resolved
dependencies until they were explicitly dlopened, meaning that
DT_NEEDED entries had to be resolved again whenever a library
participated as a dependency of more than one dlopened library.
with this overhaul, the resolved direct dependency list of each
library is always recorded when it is first loaded, and can be
extended to a full flattened breadth-first search list if dlopen is
called on the library. the extension is performed using the direct
dependency list as a queue and appending copies of the direct
dependency list of each dependency in the queue, excluding duplicates,
until the end of the queue is reached. the direct deps remain
available for future use as the initial subarray of the full deps
array.
first-load logic in dlopen is updated to match these changes, and
clarified.
2019-02-27 02:05:19 +03:00
|
|
|
if (p->deps) return;
|
|
|
|
for (; p; p=p->next)
|
|
|
|
load_direct_deps(p);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void extend_bfs_deps(struct dso *p)
|
|
|
|
{
|
|
|
|
size_t i, j, cnt, ndeps_all;
|
|
|
|
struct dso **tmp;
|
|
|
|
|
2019-03-01 22:37:52 +03:00
|
|
|
/* Can't use realloc if the original p->deps was allocated at
|
2019-03-03 20:12:59 +03:00
|
|
|
* program entry and malloc has been replaced, or if it's
|
|
|
|
* the builtin non-allocated trivial main program deps array. */
|
|
|
|
int no_realloc = (__malloc_replaced && !p->runtime_loaded)
|
|
|
|
|| p->deps == builtin_deps;
|
2019-03-01 22:37:52 +03:00
|
|
|
|
fix and overhaul dlsym depedency order, always record direct deps
dlsym with an explicit handle is specified to use "dependency order",
a breadth-first search rooted at the argument. this has always been
implemented by iterating a flattened dependency list built at dlopen
time. however, the logic for building this list was completely wrong
except in trivial cases; it simply used the list of libraries loaded
since a given library, and their direct dependencies, as that
library's dependencies, which could result in misordering, wrongful
omission of deep dependencies from the search, and wrongful inclusion
of unrelated libraries in the search.
further, libraries did not have any recorded list of resolved
dependencies until they were explicitly dlopened, meaning that
DT_NEEDED entries had to be resolved again whenever a library
participated as a dependency of more than one dlopened library.
with this overhaul, the resolved direct dependency list of each
library is always recorded when it is first loaded, and can be
extended to a full flattened breadth-first search list if dlopen is
called on the library. the extension is performed using the direct
dependency list as a queue and appending copies of the direct
dependency list of each dependency in the queue, excluding duplicates,
until the end of the queue is reached. the direct deps remain
available for future use as the initial subarray of the full deps
array.
first-load logic in dlopen is updated to match these changes, and
clarified.
2019-02-27 02:05:19 +03:00
|
|
|
if (p->bfs_built) return;
|
|
|
|
ndeps_all = p->ndeps_direct;
|
|
|
|
|
|
|
|
/* Mark existing (direct) deps so they won't be duplicated. */
|
|
|
|
for (i=0; p->deps[i]; i++)
|
|
|
|
p->deps[i]->mark = 1;
|
|
|
|
|
|
|
|
/* For each dependency already in the list, copy its list of direct
|
|
|
|
* dependencies to the list, excluding any items already in the
|
|
|
|
* list. Note that the list this loop iterates over will grow during
|
|
|
|
* the loop, but since duplicates are excluded, growth is bounded. */
|
|
|
|
for (i=0; p->deps[i]; i++) {
|
|
|
|
struct dso *dep = p->deps[i];
|
|
|
|
for (j=cnt=0; j<dep->ndeps_direct; j++)
|
|
|
|
if (!dep->deps[j]->mark) cnt++;
|
2019-03-01 22:37:52 +03:00
|
|
|
tmp = no_realloc ?
|
|
|
|
malloc(sizeof(*tmp) * (ndeps_all+cnt+1)) :
|
|
|
|
realloc(p->deps, sizeof(*tmp) * (ndeps_all+cnt+1));
|
fix and overhaul dlsym depedency order, always record direct deps
dlsym with an explicit handle is specified to use "dependency order",
a breadth-first search rooted at the argument. this has always been
implemented by iterating a flattened dependency list built at dlopen
time. however, the logic for building this list was completely wrong
except in trivial cases; it simply used the list of libraries loaded
since a given library, and their direct dependencies, as that
library's dependencies, which could result in misordering, wrongful
omission of deep dependencies from the search, and wrongful inclusion
of unrelated libraries in the search.
further, libraries did not have any recorded list of resolved
dependencies until they were explicitly dlopened, meaning that
DT_NEEDED entries had to be resolved again whenever a library
participated as a dependency of more than one dlopened library.
with this overhaul, the resolved direct dependency list of each
library is always recorded when it is first loaded, and can be
extended to a full flattened breadth-first search list if dlopen is
called on the library. the extension is performed using the direct
dependency list as a queue and appending copies of the direct
dependency list of each dependency in the queue, excluding duplicates,
until the end of the queue is reached. the direct deps remain
available for future use as the initial subarray of the full deps
array.
first-load logic in dlopen is updated to match these changes, and
clarified.
2019-02-27 02:05:19 +03:00
|
|
|
if (!tmp) {
|
|
|
|
error("Error recording dependencies for %s", p->name);
|
|
|
|
if (runtime) longjmp(*rtld_fail, 1);
|
|
|
|
continue;
|
|
|
|
}
|
2019-03-01 22:37:52 +03:00
|
|
|
if (no_realloc) {
|
|
|
|
memcpy(tmp, p->deps, sizeof(*tmp) * (ndeps_all+1));
|
|
|
|
no_realloc = 0;
|
|
|
|
}
|
fix and overhaul dlsym depedency order, always record direct deps
dlsym with an explicit handle is specified to use "dependency order",
a breadth-first search rooted at the argument. this has always been
implemented by iterating a flattened dependency list built at dlopen
time. however, the logic for building this list was completely wrong
except in trivial cases; it simply used the list of libraries loaded
since a given library, and their direct dependencies, as that
library's dependencies, which could result in misordering, wrongful
omission of deep dependencies from the search, and wrongful inclusion
of unrelated libraries in the search.
further, libraries did not have any recorded list of resolved
dependencies until they were explicitly dlopened, meaning that
DT_NEEDED entries had to be resolved again whenever a library
participated as a dependency of more than one dlopened library.
with this overhaul, the resolved direct dependency list of each
library is always recorded when it is first loaded, and can be
extended to a full flattened breadth-first search list if dlopen is
called on the library. the extension is performed using the direct
dependency list as a queue and appending copies of the direct
dependency list of each dependency in the queue, excluding duplicates,
until the end of the queue is reached. the direct deps remain
available for future use as the initial subarray of the full deps
array.
first-load logic in dlopen is updated to match these changes, and
clarified.
2019-02-27 02:05:19 +03:00
|
|
|
p->deps = tmp;
|
|
|
|
for (j=0; j<dep->ndeps_direct; j++) {
|
|
|
|
if (dep->deps[j]->mark) continue;
|
|
|
|
dep->deps[j]->mark = 1;
|
|
|
|
p->deps[ndeps_all++] = dep->deps[j];
|
2011-06-19 03:48:42 +04:00
|
|
|
}
|
fix and overhaul dlsym depedency order, always record direct deps
dlsym with an explicit handle is specified to use "dependency order",
a breadth-first search rooted at the argument. this has always been
implemented by iterating a flattened dependency list built at dlopen
time. however, the logic for building this list was completely wrong
except in trivial cases; it simply used the list of libraries loaded
since a given library, and their direct dependencies, as that
library's dependencies, which could result in misordering, wrongful
omission of deep dependencies from the search, and wrongful inclusion
of unrelated libraries in the search.
further, libraries did not have any recorded list of resolved
dependencies until they were explicitly dlopened, meaning that
DT_NEEDED entries had to be resolved again whenever a library
participated as a dependency of more than one dlopened library.
with this overhaul, the resolved direct dependency list of each
library is always recorded when it is first loaded, and can be
extended to a full flattened breadth-first search list if dlopen is
called on the library. the extension is performed using the direct
dependency list as a queue and appending copies of the direct
dependency list of each dependency in the queue, excluding duplicates,
until the end of the queue is reached. the direct deps remain
available for future use as the initial subarray of the full deps
array.
first-load logic in dlopen is updated to match these changes, and
clarified.
2019-02-27 02:05:19 +03:00
|
|
|
p->deps[ndeps_all] = 0;
|
2011-06-19 03:48:42 +04:00
|
|
|
}
|
fix and overhaul dlsym depedency order, always record direct deps
dlsym with an explicit handle is specified to use "dependency order",
a breadth-first search rooted at the argument. this has always been
implemented by iterating a flattened dependency list built at dlopen
time. however, the logic for building this list was completely wrong
except in trivial cases; it simply used the list of libraries loaded
since a given library, and their direct dependencies, as that
library's dependencies, which could result in misordering, wrongful
omission of deep dependencies from the search, and wrongful inclusion
of unrelated libraries in the search.
further, libraries did not have any recorded list of resolved
dependencies until they were explicitly dlopened, meaning that
DT_NEEDED entries had to be resolved again whenever a library
participated as a dependency of more than one dlopened library.
with this overhaul, the resolved direct dependency list of each
library is always recorded when it is first loaded, and can be
extended to a full flattened breadth-first search list if dlopen is
called on the library. the extension is performed using the direct
dependency list as a queue and appending copies of the direct
dependency list of each dependency in the queue, excluding duplicates,
until the end of the queue is reached. the direct deps remain
available for future use as the initial subarray of the full deps
array.
first-load logic in dlopen is updated to match these changes, and
clarified.
2019-02-27 02:05:19 +03:00
|
|
|
p->bfs_built = 1;
|
|
|
|
for (p=head; p; p=p->next)
|
|
|
|
p->mark = 0;
|
2011-06-19 03:48:42 +04:00
|
|
|
}
|
|
|
|
|
2011-08-16 08:24:36 +04:00
|
|
|
static void load_preload(char *s)
|
|
|
|
{
|
|
|
|
int tmp;
|
|
|
|
char *z;
|
|
|
|
for (z=s; *z; s=z) {
|
2014-07-11 08:26:12 +04:00
|
|
|
for ( ; *s && (isspace(*s) || *s==':'); s++);
|
|
|
|
for (z=s; *z && !isspace(*z) && *z!=':'; z++);
|
2011-08-16 08:24:36 +04:00
|
|
|
tmp = *z;
|
|
|
|
*z = 0;
|
2013-08-23 19:15:40 +04:00
|
|
|
load_library(s, 0);
|
2011-08-16 08:24:36 +04:00
|
|
|
*z = tmp;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
rework ldso handling of global symbol table for consistency
when loading libraries with dlopen, the caller can request that the
library's symbols become part of the global symbol table, or that they
only be used for resolving relocations in the loaded library and its
dependencies. in the latter case, a subsequent dlopen of the same
library can upgrade it to global status.
previously, if a library was upgraded from local to global mode, its
symbols entered the symbol lookup search order at the point where the
library was originally loaded. this means that a new call to dlopen
could change the value of a symbol that already had a visible
definition, an inconsistency which applications could observe.
POSIX is unclear whether this should happen or whether it's permitted
to happen, but the resolution of Austin Group issue #982 made it
formally unspecified.
with this patch, a library whose mode is upgraded from local to global
enters the symbol lookup order at the point where it was made global,
so that symbol resolution before and after the upgrade are consistent.
in order to implement this change, the per-dso global flag is replaced
with a separate set of linked-list pointers for participation in the
global symbol table. this permits the order of dso objects for symbol
resolution to differ from the order used for iteration of all loaded
libraries. it also improves performance of find_sym, by avoiding a
branch per iteration and skipping, and especially in the case where
many non-global libraries have been loaded, by allowing the loop to
skip over them entirely. logic for temporarily adding non-global
libraries to the symbol table for relocation purposes is also mildly
simplified.
2017-03-13 04:03:05 +03:00
|
|
|
static void add_syms(struct dso *p)
|
2011-06-27 03:23:28 +04:00
|
|
|
{
|
rework ldso handling of global symbol table for consistency
when loading libraries with dlopen, the caller can request that the
library's symbols become part of the global symbol table, or that they
only be used for resolving relocations in the loaded library and its
dependencies. in the latter case, a subsequent dlopen of the same
library can upgrade it to global status.
previously, if a library was upgraded from local to global mode, its
symbols entered the symbol lookup search order at the point where the
library was originally loaded. this means that a new call to dlopen
could change the value of a symbol that already had a visible
definition, an inconsistency which applications could observe.
POSIX is unclear whether this should happen or whether it's permitted
to happen, but the resolution of Austin Group issue #982 made it
formally unspecified.
with this patch, a library whose mode is upgraded from local to global
enters the symbol lookup order at the point where it was made global,
so that symbol resolution before and after the upgrade are consistent.
in order to implement this change, the per-dso global flag is replaced
with a separate set of linked-list pointers for participation in the
global symbol table. this permits the order of dso objects for symbol
resolution to differ from the order used for iteration of all loaded
libraries. it also improves performance of find_sym, by avoiding a
branch per iteration and skipping, and especially in the case where
many non-global libraries have been loaded, by allowing the loop to
skip over them entirely. logic for temporarily adding non-global
libraries to the symbol table for relocation purposes is also mildly
simplified.
2017-03-13 04:03:05 +03:00
|
|
|
if (!p->syms_next && syms_tail != p) {
|
|
|
|
syms_tail->syms_next = p;
|
|
|
|
syms_tail = p;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void revert_syms(struct dso *old_tail)
|
|
|
|
{
|
|
|
|
struct dso *p, *next;
|
|
|
|
/* Chop off the tail of the list of dsos that participate in
|
|
|
|
* the global symbol table, reverting them to RTLD_LOCAL. */
|
|
|
|
for (p=old_tail; p; p=next) {
|
|
|
|
next = p->syms_next;
|
|
|
|
p->syms_next = 0;
|
|
|
|
}
|
|
|
|
syms_tail = old_tail;
|
2011-06-27 03:23:28 +04:00
|
|
|
}
|
|
|
|
|
2015-04-13 09:56:26 +03:00
|
|
|
static void do_mips_relocs(struct dso *p, size_t *got)
|
|
|
|
{
|
|
|
|
size_t i, j, rel[2];
|
|
|
|
unsigned char *base = p->base;
|
|
|
|
i=0; search_vec(p->dynv, &i, DT_MIPS_LOCAL_GOTNO);
|
reprocess all libc/ldso symbolic relocations in dynamic linking stage 3
commit f3ddd173806fd5c60b3f034528ca24542aecc5b9 introduced early
relocations and subsequent reprocessing as part of the dynamic linker
bootstrap overhaul, to allow use of arbitrary libc functions before
the main application and libraries are loaded, but only reprocessed
GOT/PLT relocation types.
commit c093e2e8201524db0d638920e76bcb6b1d925f3a added reprocessing of
non-GOT/PLT relocations to fix an actual regression that was observed
on powerpc, but only for RELA format tables with out-of-line addends.
REL table (inline addends at the relocation address) reprocessing is
trickier because the first relocation pass clobbers the addends.
this patch extends symbolic relocation reprocessing for libc/ldso to
support all relocation types, whether REL or RELA format tables are
used. it is believed not to alter behavior on any existing archs for
the current dynamic linker and libc code. the motivations for this
change are consistency and future-proofing. it ensures that behavior
does not differ depending on whether REL or RELA tables are used,
which could lead to undetected arch-specific bugs. it also ensures
that, if in the future code depending on additional relocation types
is added to libc.so, either at the source level or as part of the
compiler runtime that gets pulled in (for example, soft-float with TLS
for fenv), the new code will work properly.
the implementation concept is simple: stage 2 of the dynamic linker
counts the number of symbolic relocations in the libc/ldso REL table
and allocates a VLA to save their addends into; stage 3 then uses the
saved addends in place of the inline ones which were clobbered. for
stack safety, a hard limit (currently 4k) is imposed on the number of
such addends; this should be a couple orders of magnitude larger than
the actual need. this number is not a runtime variable that could
break fail-safety; it is constant for a given libc.so build.
2015-05-26 06:33:59 +03:00
|
|
|
if (p==&ldso) {
|
2015-04-13 09:56:26 +03:00
|
|
|
got += i;
|
|
|
|
} else {
|
|
|
|
while (i--) *got++ += (size_t)base;
|
|
|
|
}
|
|
|
|
j=0; search_vec(p->dynv, &j, DT_MIPS_GOTSYM);
|
|
|
|
i=0; search_vec(p->dynv, &i, DT_MIPS_SYMTABNO);
|
|
|
|
Sym *sym = p->syms + j;
|
|
|
|
rel[0] = (unsigned char *)got - base;
|
|
|
|
for (i-=j; i; i--, sym++, rel[0]+=sizeof(size_t)) {
|
2016-03-06 20:25:52 +03:00
|
|
|
rel[1] = R_INFO(sym-p->syms, R_MIPS_JUMP_SLOT);
|
2015-04-13 09:56:26 +03:00
|
|
|
do_relocs(p, rel, sizeof rel, 2);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-06-19 03:48:42 +04:00
|
|
|
static void reloc_all(struct dso *p)
|
|
|
|
{
|
2015-04-14 05:38:18 +03:00
|
|
|
size_t dyn[DYN_CNT];
|
2011-06-19 03:48:42 +04:00
|
|
|
for (; p; p=p->next) {
|
|
|
|
if (p->relocated) continue;
|
|
|
|
decode_vec(p->dynv, dyn, DYN_CNT);
|
2015-04-13 09:56:26 +03:00
|
|
|
if (NEED_MIPS_GOT_RELOCS)
|
2015-09-17 22:45:45 +03:00
|
|
|
do_mips_relocs(p, laddr(p, dyn[DT_PLTGOT]));
|
|
|
|
do_relocs(p, laddr(p, dyn[DT_JMPREL]), dyn[DT_PLTRELSZ],
|
2012-08-05 10:49:02 +04:00
|
|
|
2+(dyn[DT_PLTREL]==DT_RELA));
|
2015-09-17 22:45:45 +03:00
|
|
|
do_relocs(p, laddr(p, dyn[DT_REL]), dyn[DT_RELSZ], 2);
|
|
|
|
do_relocs(p, laddr(p, dyn[DT_RELA]), dyn[DT_RELASZ], 3);
|
2014-03-25 16:13:27 +04:00
|
|
|
|
2015-04-13 09:56:26 +03:00
|
|
|
if (head != &ldso && p->relro_start != p->relro_end &&
|
2015-09-17 22:45:45 +03:00
|
|
|
mprotect(laddr(p, p->relro_start), p->relro_end-p->relro_start, PROT_READ)
|
2015-06-17 20:21:46 +03:00
|
|
|
&& errno != ENOSYS) {
|
2014-06-30 05:52:54 +04:00
|
|
|
error("Error relocating %s: RELRO protection failed: %m",
|
2014-03-25 16:13:27 +04:00
|
|
|
p->name);
|
2015-04-19 01:00:22 +03:00
|
|
|
if (runtime) longjmp(*rtld_fail, 1);
|
2014-03-25 16:13:27 +04:00
|
|
|
}
|
|
|
|
|
2011-06-25 08:18:19 +04:00
|
|
|
p->relocated = 1;
|
2011-06-19 03:48:42 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-03-25 22:59:50 +04:00
|
|
|
static void kernel_mapped_dso(struct dso *p)
|
2012-08-27 05:09:26 +04:00
|
|
|
{
|
2014-03-25 22:59:50 +04:00
|
|
|
size_t min_addr = -1, max_addr = 0, cnt;
|
|
|
|
Phdr *ph = p->phdr;
|
|
|
|
for (cnt = p->phnum; cnt--; ph = (void *)((char *)ph + p->phentsize)) {
|
|
|
|
if (ph->p_type == PT_DYNAMIC) {
|
2015-09-17 20:18:09 +03:00
|
|
|
p->dynv = laddr(p, ph->p_vaddr);
|
2014-03-25 22:59:50 +04:00
|
|
|
} else if (ph->p_type == PT_GNU_RELRO) {
|
2014-03-25 16:13:27 +04:00
|
|
|
p->relro_start = ph->p_vaddr & -PAGE_SIZE;
|
|
|
|
p->relro_end = (ph->p_vaddr + ph->p_memsz) & -PAGE_SIZE;
|
support setting of default thread stack size via PT_GNU_STACK header
this facilitates building software that assumes a large default stack
size without any patching to call pthread_setattr_default_np or
pthread_attr_setstacksize at each thread creation site, using just
LDFLAGS.
normally the PT_GNU_STACK header is used only to reflect whether
executable stack is desired, but with GNU ld at least, passing
-Wl,-z,stack-size=N will set a size on the program header. with this
patch, that size will be incorporated into the default stack size
(subject to increase-only rule and DEFAULT_STACK_MAX limit).
both static and dynamic linking honor the program header. for dynamic
linking, all libraries loaded at program start, including preloaded
ones, are considered. dlopened libraries are not considered, for
several reasons. extra logic would be needed to defer processing until
the load of the new library is commited, synchronization woud be
needed since other threads may be running concurrently, and the
effectiveness woud be limited since the larger size would not apply to
threads that already existed at the time of dlopen. programs that will
dlopen code expecting a large stack need to declare the requirement
themselves, or pthread_setattr_default_np can be used.
2018-09-19 06:54:18 +03:00
|
|
|
} else if (ph->p_type == PT_GNU_STACK) {
|
|
|
|
if (!runtime && ph->p_memsz > __default_stacksize) {
|
|
|
|
__default_stacksize =
|
|
|
|
ph->p_memsz < DEFAULT_STACK_MAX ?
|
|
|
|
ph->p_memsz : DEFAULT_STACK_MAX;
|
|
|
|
}
|
2014-03-25 16:13:27 +04:00
|
|
|
}
|
2012-08-27 05:09:26 +04:00
|
|
|
if (ph->p_type != PT_LOAD) continue;
|
|
|
|
if (ph->p_vaddr < min_addr)
|
|
|
|
min_addr = ph->p_vaddr;
|
|
|
|
if (ph->p_vaddr+ph->p_memsz > max_addr)
|
|
|
|
max_addr = ph->p_vaddr+ph->p_memsz;
|
|
|
|
}
|
|
|
|
min_addr &= -PAGE_SIZE;
|
|
|
|
max_addr = (max_addr + PAGE_SIZE-1) & -PAGE_SIZE;
|
|
|
|
p->map = p->base + min_addr;
|
|
|
|
p->map_len = max_addr - min_addr;
|
2014-03-25 22:59:50 +04:00
|
|
|
p->kernel_mapped = 1;
|
2012-08-27 05:09:26 +04:00
|
|
|
}
|
|
|
|
|
2015-11-12 06:08:23 +03:00
|
|
|
void __libc_exit_fini()
|
2012-10-05 21:09:09 +04:00
|
|
|
{
|
|
|
|
struct dso *p;
|
2015-04-14 05:38:18 +03:00
|
|
|
size_t dyn[DYN_CNT];
|
2019-03-02 06:47:29 +03:00
|
|
|
int self = __pthread_self()->tid;
|
|
|
|
|
|
|
|
/* Take both locks before setting shutting_down, so that
|
|
|
|
* either lock is sufficient to read its value. The lock
|
|
|
|
* order matches that in dlopen to avoid deadlock. */
|
|
|
|
pthread_rwlock_wrlock(&lock);
|
|
|
|
pthread_mutex_lock(&init_fini_lock);
|
|
|
|
shutting_down = 1;
|
|
|
|
pthread_rwlock_unlock(&lock);
|
2012-10-05 21:09:09 +04:00
|
|
|
for (p=fini_head; p; p=p->fini_next) {
|
2019-03-02 06:47:29 +03:00
|
|
|
while (p->ctor_visitor && p->ctor_visitor!=self)
|
|
|
|
pthread_cond_wait(&ctor_cond, &init_fini_lock);
|
2012-10-05 21:09:09 +04:00
|
|
|
if (!p->constructed) continue;
|
|
|
|
decode_vec(p->dynv, dyn, DYN_CNT);
|
2013-07-21 02:26:17 +04:00
|
|
|
if (dyn[0] & (1<<DT_FINI_ARRAY)) {
|
|
|
|
size_t n = dyn[DT_FINI_ARRAYSZ]/sizeof(size_t);
|
2015-09-17 20:18:09 +03:00
|
|
|
size_t *fn = (size_t *)laddr(p, dyn[DT_FINI_ARRAY])+n;
|
2013-07-21 10:35:46 +04:00
|
|
|
while (n--) ((void (*)(void))*--fn)();
|
2013-07-21 02:26:17 +04:00
|
|
|
}
|
2013-07-22 22:08:33 +04:00
|
|
|
#ifndef NO_LEGACY_INITFINI
|
2013-07-31 08:04:10 +04:00
|
|
|
if ((dyn[0] & (1<<DT_FINI)) && dyn[DT_FINI])
|
2015-09-22 06:54:42 +03:00
|
|
|
fpaddr(p, dyn[DT_FINI])();
|
2013-07-22 22:08:33 +04:00
|
|
|
#endif
|
2012-10-05 21:09:09 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
overhaul shared library ctor execution for dependency order, concurrency
previously, shared library constructors at program start and dlopen
time were executed in reverse load order. some libraries, however,
rely on a depth-first dependency order, which most other dynamic
linker implementations provide. this is a much more reasonable, less
arbitrary order, and it turns out to have much better properties with
regard to how slow-running ctors affect multi-threaded programs, and
how recursive dlopen behaves.
this commit builds on previous work tracking direct dependencies of
each dso (commit 403555690775f7c8806372644f543518e6664e3b), and
performs a topological sort on the dependency graph at load time while
the main ldso lock is held and before success is committed, producing
a queue of constructors needed by the newly-loaded dso (or main
application). in the case of circular dependencies, the dependency
chain is simply broken at points where it becomes circular.
when the ctor queue is run, the init_fini_lock is held only for
iteration purposes; it's released during execution of each ctor, so
that arbitrarily-long-running application code no longer runs with a
lock held in the caller. this prevents a dlopen with slow ctors in one
thread from arbitrarily delaying other threads that call dlopen.
fully-independent ctors can run concurrently; when multiple threads
call dlopen with a shared dependency, one will end up executing the
ctor while the other waits on a condvar for it to finish.
another corner case improved by these changes is recursive dlopen
(call from a ctor). previously, recursive calls to dlopen could cause
a ctor for a library to be executed before the ctor for its
dependency, even when there was no relation between the calling
library and the library it was loading, simply due to the naive
reverse-load-order traversal. now, we can guarantee that recursive
dlopen in non-circular-dependency usage preserves the desired ctor
execution order properties, and that even in circular usage, at worst
the libraries whose ctors call dlopen will fail to have completed
construction when ctors that depend on them run.
init_fini_lock is changed to a normal, non-recursive mutex, since it
is no longer held while calling back into application code.
2019-03-02 05:06:23 +03:00
|
|
|
static struct dso **queue_ctors(struct dso *dso)
|
2012-02-06 23:39:09 +04:00
|
|
|
{
|
overhaul shared library ctor execution for dependency order, concurrency
previously, shared library constructors at program start and dlopen
time were executed in reverse load order. some libraries, however,
rely on a depth-first dependency order, which most other dynamic
linker implementations provide. this is a much more reasonable, less
arbitrary order, and it turns out to have much better properties with
regard to how slow-running ctors affect multi-threaded programs, and
how recursive dlopen behaves.
this commit builds on previous work tracking direct dependencies of
each dso (commit 403555690775f7c8806372644f543518e6664e3b), and
performs a topological sort on the dependency graph at load time while
the main ldso lock is held and before success is committed, producing
a queue of constructors needed by the newly-loaded dso (or main
application). in the case of circular dependencies, the dependency
chain is simply broken at points where it becomes circular.
when the ctor queue is run, the init_fini_lock is held only for
iteration purposes; it's released during execution of each ctor, so
that arbitrarily-long-running application code no longer runs with a
lock held in the caller. this prevents a dlopen with slow ctors in one
thread from arbitrarily delaying other threads that call dlopen.
fully-independent ctors can run concurrently; when multiple threads
call dlopen with a shared dependency, one will end up executing the
ctor while the other waits on a condvar for it to finish.
another corner case improved by these changes is recursive dlopen
(call from a ctor). previously, recursive calls to dlopen could cause
a ctor for a library to be executed before the ctor for its
dependency, even when there was no relation between the calling
library and the library it was loading, simply due to the naive
reverse-load-order traversal. now, we can guarantee that recursive
dlopen in non-circular-dependency usage preserves the desired ctor
execution order properties, and that even in circular usage, at worst
the libraries whose ctors call dlopen will fail to have completed
construction when ctors that depend on them run.
init_fini_lock is changed to a normal, non-recursive mutex, since it
is no longer held while calling back into application code.
2019-03-02 05:06:23 +03:00
|
|
|
size_t cnt, qpos, spos, i;
|
|
|
|
struct dso *p, **queue, **stack;
|
|
|
|
|
|
|
|
if (ldd_mode) return 0;
|
|
|
|
|
|
|
|
/* Bound on queue size is the total number of indirect deps.
|
|
|
|
* If a bfs deps list was built, we can use it. Otherwise,
|
|
|
|
* bound by the total number of DSOs, which is always safe and
|
|
|
|
* is reasonable we use it (for main app at startup). */
|
|
|
|
if (dso->bfs_built) {
|
|
|
|
for (cnt=0; dso->deps[cnt]; cnt++)
|
|
|
|
dso->deps[cnt]->mark = 0;
|
|
|
|
cnt++; /* self, not included in deps */
|
|
|
|
} else {
|
|
|
|
for (cnt=0, p=head; p; cnt++, p=p->next)
|
|
|
|
p->mark = 0;
|
|
|
|
}
|
|
|
|
cnt++; /* termination slot */
|
2019-03-03 21:24:23 +03:00
|
|
|
if (dso==head && cnt <= countof(builtin_ctor_queue))
|
|
|
|
queue = builtin_ctor_queue;
|
|
|
|
else
|
|
|
|
queue = calloc(cnt, sizeof *queue);
|
overhaul shared library ctor execution for dependency order, concurrency
previously, shared library constructors at program start and dlopen
time were executed in reverse load order. some libraries, however,
rely on a depth-first dependency order, which most other dynamic
linker implementations provide. this is a much more reasonable, less
arbitrary order, and it turns out to have much better properties with
regard to how slow-running ctors affect multi-threaded programs, and
how recursive dlopen behaves.
this commit builds on previous work tracking direct dependencies of
each dso (commit 403555690775f7c8806372644f543518e6664e3b), and
performs a topological sort on the dependency graph at load time while
the main ldso lock is held and before success is committed, producing
a queue of constructors needed by the newly-loaded dso (or main
application). in the case of circular dependencies, the dependency
chain is simply broken at points where it becomes circular.
when the ctor queue is run, the init_fini_lock is held only for
iteration purposes; it's released during execution of each ctor, so
that arbitrarily-long-running application code no longer runs with a
lock held in the caller. this prevents a dlopen with slow ctors in one
thread from arbitrarily delaying other threads that call dlopen.
fully-independent ctors can run concurrently; when multiple threads
call dlopen with a shared dependency, one will end up executing the
ctor while the other waits on a condvar for it to finish.
another corner case improved by these changes is recursive dlopen
(call from a ctor). previously, recursive calls to dlopen could cause
a ctor for a library to be executed before the ctor for its
dependency, even when there was no relation between the calling
library and the library it was loading, simply due to the naive
reverse-load-order traversal. now, we can guarantee that recursive
dlopen in non-circular-dependency usage preserves the desired ctor
execution order properties, and that even in circular usage, at worst
the libraries whose ctors call dlopen will fail to have completed
construction when ctors that depend on them run.
init_fini_lock is changed to a normal, non-recursive mutex, since it
is no longer held while calling back into application code.
2019-03-02 05:06:23 +03:00
|
|
|
|
|
|
|
if (!queue) {
|
|
|
|
error("Error allocating constructor queue: %m\n");
|
|
|
|
if (runtime) longjmp(*rtld_fail, 1);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Opposite ends of the allocated buffer serve as an output queue
|
|
|
|
* and a working stack. Setup initial stack with just the argument
|
|
|
|
* dso and initial queue empty... */
|
2019-03-03 21:24:23 +03:00
|
|
|
stack = queue;
|
overhaul shared library ctor execution for dependency order, concurrency
previously, shared library constructors at program start and dlopen
time were executed in reverse load order. some libraries, however,
rely on a depth-first dependency order, which most other dynamic
linker implementations provide. this is a much more reasonable, less
arbitrary order, and it turns out to have much better properties with
regard to how slow-running ctors affect multi-threaded programs, and
how recursive dlopen behaves.
this commit builds on previous work tracking direct dependencies of
each dso (commit 403555690775f7c8806372644f543518e6664e3b), and
performs a topological sort on the dependency graph at load time while
the main ldso lock is held and before success is committed, producing
a queue of constructors needed by the newly-loaded dso (or main
application). in the case of circular dependencies, the dependency
chain is simply broken at points where it becomes circular.
when the ctor queue is run, the init_fini_lock is held only for
iteration purposes; it's released during execution of each ctor, so
that arbitrarily-long-running application code no longer runs with a
lock held in the caller. this prevents a dlopen with slow ctors in one
thread from arbitrarily delaying other threads that call dlopen.
fully-independent ctors can run concurrently; when multiple threads
call dlopen with a shared dependency, one will end up executing the
ctor while the other waits on a condvar for it to finish.
another corner case improved by these changes is recursive dlopen
(call from a ctor). previously, recursive calls to dlopen could cause
a ctor for a library to be executed before the ctor for its
dependency, even when there was no relation between the calling
library and the library it was loading, simply due to the naive
reverse-load-order traversal. now, we can guarantee that recursive
dlopen in non-circular-dependency usage preserves the desired ctor
execution order properties, and that even in circular usage, at worst
the libraries whose ctors call dlopen will fail to have completed
construction when ctors that depend on them run.
init_fini_lock is changed to a normal, non-recursive mutex, since it
is no longer held while calling back into application code.
2019-03-02 05:06:23 +03:00
|
|
|
qpos = 0;
|
|
|
|
spos = cnt;
|
|
|
|
stack[--spos] = dso;
|
|
|
|
dso->next_dep = 0;
|
|
|
|
dso->mark = 1;
|
|
|
|
|
|
|
|
/* Then perform pseudo-DFS sort, but ignoring circular deps. */
|
|
|
|
while (spos<cnt) {
|
|
|
|
p = stack[spos++];
|
|
|
|
while (p->next_dep < p->ndeps_direct) {
|
|
|
|
if (p->deps[p->next_dep]->mark) {
|
|
|
|
p->next_dep++;
|
|
|
|
} else {
|
|
|
|
stack[--spos] = p;
|
|
|
|
p = p->deps[p->next_dep];
|
|
|
|
p->next_dep = 0;
|
|
|
|
p->mark = 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
queue[qpos++] = p;
|
|
|
|
}
|
|
|
|
queue[qpos] = 0;
|
|
|
|
for (i=0; i<qpos; i++) queue[i]->mark = 0;
|
|
|
|
|
|
|
|
return queue;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void do_init_fini(struct dso **queue)
|
|
|
|
{
|
|
|
|
struct dso *p;
|
|
|
|
size_t dyn[DYN_CNT], i;
|
|
|
|
int self = __pthread_self()->tid;
|
|
|
|
|
|
|
|
pthread_mutex_lock(&init_fini_lock);
|
|
|
|
for (i=0; (p=queue[i]); i++) {
|
2019-03-02 06:47:29 +03:00
|
|
|
while ((p->ctor_visitor && p->ctor_visitor!=self) || shutting_down)
|
overhaul shared library ctor execution for dependency order, concurrency
previously, shared library constructors at program start and dlopen
time were executed in reverse load order. some libraries, however,
rely on a depth-first dependency order, which most other dynamic
linker implementations provide. this is a much more reasonable, less
arbitrary order, and it turns out to have much better properties with
regard to how slow-running ctors affect multi-threaded programs, and
how recursive dlopen behaves.
this commit builds on previous work tracking direct dependencies of
each dso (commit 403555690775f7c8806372644f543518e6664e3b), and
performs a topological sort on the dependency graph at load time while
the main ldso lock is held and before success is committed, producing
a queue of constructors needed by the newly-loaded dso (or main
application). in the case of circular dependencies, the dependency
chain is simply broken at points where it becomes circular.
when the ctor queue is run, the init_fini_lock is held only for
iteration purposes; it's released during execution of each ctor, so
that arbitrarily-long-running application code no longer runs with a
lock held in the caller. this prevents a dlopen with slow ctors in one
thread from arbitrarily delaying other threads that call dlopen.
fully-independent ctors can run concurrently; when multiple threads
call dlopen with a shared dependency, one will end up executing the
ctor while the other waits on a condvar for it to finish.
another corner case improved by these changes is recursive dlopen
(call from a ctor). previously, recursive calls to dlopen could cause
a ctor for a library to be executed before the ctor for its
dependency, even when there was no relation between the calling
library and the library it was loading, simply due to the naive
reverse-load-order traversal. now, we can guarantee that recursive
dlopen in non-circular-dependency usage preserves the desired ctor
execution order properties, and that even in circular usage, at worst
the libraries whose ctors call dlopen will fail to have completed
construction when ctors that depend on them run.
init_fini_lock is changed to a normal, non-recursive mutex, since it
is no longer held while calling back into application code.
2019-03-02 05:06:23 +03:00
|
|
|
pthread_cond_wait(&ctor_cond, &init_fini_lock);
|
|
|
|
if (p->ctor_visitor || p->constructed)
|
|
|
|
continue;
|
|
|
|
p->ctor_visitor = self;
|
|
|
|
|
2012-02-06 23:39:09 +04:00
|
|
|
decode_vec(p->dynv, dyn, DYN_CNT);
|
2013-07-21 02:26:17 +04:00
|
|
|
if (dyn[0] & ((1<<DT_FINI) | (1<<DT_FINI_ARRAY))) {
|
2012-10-05 21:09:09 +04:00
|
|
|
p->fini_next = fini_head;
|
|
|
|
fini_head = p;
|
|
|
|
}
|
overhaul shared library ctor execution for dependency order, concurrency
previously, shared library constructors at program start and dlopen
time were executed in reverse load order. some libraries, however,
rely on a depth-first dependency order, which most other dynamic
linker implementations provide. this is a much more reasonable, less
arbitrary order, and it turns out to have much better properties with
regard to how slow-running ctors affect multi-threaded programs, and
how recursive dlopen behaves.
this commit builds on previous work tracking direct dependencies of
each dso (commit 403555690775f7c8806372644f543518e6664e3b), and
performs a topological sort on the dependency graph at load time while
the main ldso lock is held and before success is committed, producing
a queue of constructors needed by the newly-loaded dso (or main
application). in the case of circular dependencies, the dependency
chain is simply broken at points where it becomes circular.
when the ctor queue is run, the init_fini_lock is held only for
iteration purposes; it's released during execution of each ctor, so
that arbitrarily-long-running application code no longer runs with a
lock held in the caller. this prevents a dlopen with slow ctors in one
thread from arbitrarily delaying other threads that call dlopen.
fully-independent ctors can run concurrently; when multiple threads
call dlopen with a shared dependency, one will end up executing the
ctor while the other waits on a condvar for it to finish.
another corner case improved by these changes is recursive dlopen
(call from a ctor). previously, recursive calls to dlopen could cause
a ctor for a library to be executed before the ctor for its
dependency, even when there was no relation between the calling
library and the library it was loading, simply due to the naive
reverse-load-order traversal. now, we can guarantee that recursive
dlopen in non-circular-dependency usage preserves the desired ctor
execution order properties, and that even in circular usage, at worst
the libraries whose ctors call dlopen will fail to have completed
construction when ctors that depend on them run.
init_fini_lock is changed to a normal, non-recursive mutex, since it
is no longer held while calling back into application code.
2019-03-02 05:06:23 +03:00
|
|
|
|
|
|
|
pthread_mutex_unlock(&init_fini_lock);
|
|
|
|
|
2013-07-22 22:08:33 +04:00
|
|
|
#ifndef NO_LEGACY_INITFINI
|
2013-07-31 08:04:10 +04:00
|
|
|
if ((dyn[0] & (1<<DT_INIT)) && dyn[DT_INIT])
|
2015-09-22 06:54:42 +03:00
|
|
|
fpaddr(p, dyn[DT_INIT])();
|
2013-07-22 22:08:33 +04:00
|
|
|
#endif
|
2013-07-21 02:26:17 +04:00
|
|
|
if (dyn[0] & (1<<DT_INIT_ARRAY)) {
|
|
|
|
size_t n = dyn[DT_INIT_ARRAYSZ]/sizeof(size_t);
|
2015-09-17 20:18:09 +03:00
|
|
|
size_t *fn = laddr(p, dyn[DT_INIT_ARRAY]);
|
2013-07-21 02:26:17 +04:00
|
|
|
while (n--) ((void (*)(void))*fn++)();
|
|
|
|
}
|
overhaul shared library ctor execution for dependency order, concurrency
previously, shared library constructors at program start and dlopen
time were executed in reverse load order. some libraries, however,
rely on a depth-first dependency order, which most other dynamic
linker implementations provide. this is a much more reasonable, less
arbitrary order, and it turns out to have much better properties with
regard to how slow-running ctors affect multi-threaded programs, and
how recursive dlopen behaves.
this commit builds on previous work tracking direct dependencies of
each dso (commit 403555690775f7c8806372644f543518e6664e3b), and
performs a topological sort on the dependency graph at load time while
the main ldso lock is held and before success is committed, producing
a queue of constructors needed by the newly-loaded dso (or main
application). in the case of circular dependencies, the dependency
chain is simply broken at points where it becomes circular.
when the ctor queue is run, the init_fini_lock is held only for
iteration purposes; it's released during execution of each ctor, so
that arbitrarily-long-running application code no longer runs with a
lock held in the caller. this prevents a dlopen with slow ctors in one
thread from arbitrarily delaying other threads that call dlopen.
fully-independent ctors can run concurrently; when multiple threads
call dlopen with a shared dependency, one will end up executing the
ctor while the other waits on a condvar for it to finish.
another corner case improved by these changes is recursive dlopen
(call from a ctor). previously, recursive calls to dlopen could cause
a ctor for a library to be executed before the ctor for its
dependency, even when there was no relation between the calling
library and the library it was loading, simply due to the naive
reverse-load-order traversal. now, we can guarantee that recursive
dlopen in non-circular-dependency usage preserves the desired ctor
execution order properties, and that even in circular usage, at worst
the libraries whose ctors call dlopen will fail to have completed
construction when ctors that depend on them run.
init_fini_lock is changed to a normal, non-recursive mutex, since it
is no longer held while calling back into application code.
2019-03-02 05:06:23 +03:00
|
|
|
|
|
|
|
pthread_mutex_lock(&init_fini_lock);
|
|
|
|
p->ctor_visitor = 0;
|
|
|
|
p->constructed = 1;
|
|
|
|
pthread_cond_broadcast(&ctor_cond);
|
2012-02-06 23:39:09 +04:00
|
|
|
}
|
overhaul shared library ctor execution for dependency order, concurrency
previously, shared library constructors at program start and dlopen
time were executed in reverse load order. some libraries, however,
rely on a depth-first dependency order, which most other dynamic
linker implementations provide. this is a much more reasonable, less
arbitrary order, and it turns out to have much better properties with
regard to how slow-running ctors affect multi-threaded programs, and
how recursive dlopen behaves.
this commit builds on previous work tracking direct dependencies of
each dso (commit 403555690775f7c8806372644f543518e6664e3b), and
performs a topological sort on the dependency graph at load time while
the main ldso lock is held and before success is committed, producing
a queue of constructors needed by the newly-loaded dso (or main
application). in the case of circular dependencies, the dependency
chain is simply broken at points where it becomes circular.
when the ctor queue is run, the init_fini_lock is held only for
iteration purposes; it's released during execution of each ctor, so
that arbitrarily-long-running application code no longer runs with a
lock held in the caller. this prevents a dlopen with slow ctors in one
thread from arbitrarily delaying other threads that call dlopen.
fully-independent ctors can run concurrently; when multiple threads
call dlopen with a shared dependency, one will end up executing the
ctor while the other waits on a condvar for it to finish.
another corner case improved by these changes is recursive dlopen
(call from a ctor). previously, recursive calls to dlopen could cause
a ctor for a library to be executed before the ctor for its
dependency, even when there was no relation between the calling
library and the library it was loading, simply due to the naive
reverse-load-order traversal. now, we can guarantee that recursive
dlopen in non-circular-dependency usage preserves the desired ctor
execution order properties, and that even in circular usage, at worst
the libraries whose ctors call dlopen will fail to have completed
construction when ctors that depend on them run.
init_fini_lock is changed to a normal, non-recursive mutex, since it
is no longer held while calling back into application code.
2019-03-02 05:06:23 +03:00
|
|
|
pthread_mutex_unlock(&init_fini_lock);
|
2012-02-06 23:39:09 +04:00
|
|
|
}
|
|
|
|
|
2015-09-22 23:24:28 +03:00
|
|
|
void __libc_start_init(void)
|
|
|
|
{
|
overhaul shared library ctor execution for dependency order, concurrency
previously, shared library constructors at program start and dlopen
time were executed in reverse load order. some libraries, however,
rely on a depth-first dependency order, which most other dynamic
linker implementations provide. this is a much more reasonable, less
arbitrary order, and it turns out to have much better properties with
regard to how slow-running ctors affect multi-threaded programs, and
how recursive dlopen behaves.
this commit builds on previous work tracking direct dependencies of
each dso (commit 403555690775f7c8806372644f543518e6664e3b), and
performs a topological sort on the dependency graph at load time while
the main ldso lock is held and before success is committed, producing
a queue of constructors needed by the newly-loaded dso (or main
application). in the case of circular dependencies, the dependency
chain is simply broken at points where it becomes circular.
when the ctor queue is run, the init_fini_lock is held only for
iteration purposes; it's released during execution of each ctor, so
that arbitrarily-long-running application code no longer runs with a
lock held in the caller. this prevents a dlopen with slow ctors in one
thread from arbitrarily delaying other threads that call dlopen.
fully-independent ctors can run concurrently; when multiple threads
call dlopen with a shared dependency, one will end up executing the
ctor while the other waits on a condvar for it to finish.
another corner case improved by these changes is recursive dlopen
(call from a ctor). previously, recursive calls to dlopen could cause
a ctor for a library to be executed before the ctor for its
dependency, even when there was no relation between the calling
library and the library it was loading, simply due to the naive
reverse-load-order traversal. now, we can guarantee that recursive
dlopen in non-circular-dependency usage preserves the desired ctor
execution order properties, and that even in circular usage, at worst
the libraries whose ctors call dlopen will fail to have completed
construction when ctors that depend on them run.
init_fini_lock is changed to a normal, non-recursive mutex, since it
is no longer held while calling back into application code.
2019-03-02 05:06:23 +03:00
|
|
|
do_init_fini(main_ctor_queue);
|
2019-03-03 21:24:23 +03:00
|
|
|
if (!__malloc_replaced && main_ctor_queue != builtin_ctor_queue)
|
|
|
|
free(main_ctor_queue);
|
overhaul shared library ctor execution for dependency order, concurrency
previously, shared library constructors at program start and dlopen
time were executed in reverse load order. some libraries, however,
rely on a depth-first dependency order, which most other dynamic
linker implementations provide. this is a much more reasonable, less
arbitrary order, and it turns out to have much better properties with
regard to how slow-running ctors affect multi-threaded programs, and
how recursive dlopen behaves.
this commit builds on previous work tracking direct dependencies of
each dso (commit 403555690775f7c8806372644f543518e6664e3b), and
performs a topological sort on the dependency graph at load time while
the main ldso lock is held and before success is committed, producing
a queue of constructors needed by the newly-loaded dso (or main
application). in the case of circular dependencies, the dependency
chain is simply broken at points where it becomes circular.
when the ctor queue is run, the init_fini_lock is held only for
iteration purposes; it's released during execution of each ctor, so
that arbitrarily-long-running application code no longer runs with a
lock held in the caller. this prevents a dlopen with slow ctors in one
thread from arbitrarily delaying other threads that call dlopen.
fully-independent ctors can run concurrently; when multiple threads
call dlopen with a shared dependency, one will end up executing the
ctor while the other waits on a condvar for it to finish.
another corner case improved by these changes is recursive dlopen
(call from a ctor). previously, recursive calls to dlopen could cause
a ctor for a library to be executed before the ctor for its
dependency, even when there was no relation between the calling
library and the library it was loading, simply due to the naive
reverse-load-order traversal. now, we can guarantee that recursive
dlopen in non-circular-dependency usage preserves the desired ctor
execution order properties, and that even in circular usage, at worst
the libraries whose ctors call dlopen will fail to have completed
construction when ctors that depend on them run.
init_fini_lock is changed to a normal, non-recursive mutex, since it
is no longer held while calling back into application code.
2019-03-02 05:06:23 +03:00
|
|
|
main_ctor_queue = 0;
|
2015-09-22 23:24:28 +03:00
|
|
|
}
|
|
|
|
|
2015-04-18 06:23:05 +03:00
|
|
|
static void dl_debug_state(void)
|
2012-04-25 08:05:42 +04:00
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2015-04-18 06:23:05 +03:00
|
|
|
weak_alias(dl_debug_state, _dl_debug_state);
|
|
|
|
|
2015-11-12 23:50:26 +03:00
|
|
|
void __init_tls(size_t *auxv)
|
2013-08-04 00:27:30 +04:00
|
|
|
{
|
2012-10-05 04:04:13 +04:00
|
|
|
}
|
|
|
|
|
2012-10-06 09:22:51 +04:00
|
|
|
static void update_tls_size()
|
|
|
|
{
|
2015-11-12 23:50:26 +03:00
|
|
|
libc.tls_cnt = tls_cnt;
|
|
|
|
libc.tls_align = tls_align;
|
2012-10-16 02:51:53 +04:00
|
|
|
libc.tls_size = ALIGN(
|
|
|
|
(1+tls_cnt) * sizeof(void *) +
|
|
|
|
tls_offset +
|
|
|
|
sizeof(struct pthread) +
|
|
|
|
tls_align * 2,
|
|
|
|
tls_align);
|
2012-10-06 09:22:51 +04:00
|
|
|
}
|
|
|
|
|
install dynamic tls synchronously at dlopen, streamline access
previously, dynamic loading of new libraries with thread-local storage
allocated the storage needed for all existing threads at load-time,
precluding late failure that can't be handled, but left installation
in existing threads to take place lazily on first access. this imposed
an additional memory access and branch on every dynamic tls access,
and imposed a requirement, which was not actually met, that the
dynamic tlsdesc asm functions preserve all call-clobbered registers
before calling C code to to install new dynamic tls on first access.
the x86[_64] versions of this code wrongly omitted saving and
restoring of fpu/vector registers, assuming the compiler would not
generate anything using them in the called C code. the arm and aarch64
versions saved known existing registers, but failed to be future-proof
against expansion of the register file.
now that we track live threads in a list, it's possible to install the
new dynamic tls for each thread at dlopen time. for the most part,
synchronization is not needed, because if a thread has not
synchronized with completion of the dlopen, there is no way it can
meaningfully request access to a slot past the end of the old dtv,
which remains valid for accessing slots which already existed.
however, it is necessary to ensure that, if a thread sees its new dtv
pointer, it sees correct pointers in each of the slots that existed
prior to the dlopen. my understanding is that, on most real-world
coherency architectures including all the ones we presently support, a
built-in consume order guarantees this; however, don't rely on that.
instead, the SYS_membarrier syscall is used to ensure that all threads
see the stores to the slots of their new dtv prior to the installation
of the new dtv. if it is not supported, the same is implemented in
userspace via signals, using the same mechanism as __synccall.
the __tls_get_addr function, variants, and dynamic tlsdesc asm
functions are all updated to remove the fallback paths for claiming
new dynamic tls, and are now all branch-free.
2019-02-18 07:22:27 +03:00
|
|
|
static void install_new_tls(void)
|
|
|
|
{
|
|
|
|
sigset_t set;
|
|
|
|
pthread_t self = __pthread_self(), td;
|
2019-02-25 10:09:36 +03:00
|
|
|
struct dso *dtv_provider = container_of(tls_tail, struct dso, tls);
|
|
|
|
uintptr_t (*newdtv)[tls_cnt+1] = (void *)dtv_provider->new_dtv;
|
install dynamic tls synchronously at dlopen, streamline access
previously, dynamic loading of new libraries with thread-local storage
allocated the storage needed for all existing threads at load-time,
precluding late failure that can't be handled, but left installation
in existing threads to take place lazily on first access. this imposed
an additional memory access and branch on every dynamic tls access,
and imposed a requirement, which was not actually met, that the
dynamic tlsdesc asm functions preserve all call-clobbered registers
before calling C code to to install new dynamic tls on first access.
the x86[_64] versions of this code wrongly omitted saving and
restoring of fpu/vector registers, assuming the compiler would not
generate anything using them in the called C code. the arm and aarch64
versions saved known existing registers, but failed to be future-proof
against expansion of the register file.
now that we track live threads in a list, it's possible to install the
new dynamic tls for each thread at dlopen time. for the most part,
synchronization is not needed, because if a thread has not
synchronized with completion of the dlopen, there is no way it can
meaningfully request access to a slot past the end of the old dtv,
which remains valid for accessing slots which already existed.
however, it is necessary to ensure that, if a thread sees its new dtv
pointer, it sees correct pointers in each of the slots that existed
prior to the dlopen. my understanding is that, on most real-world
coherency architectures including all the ones we presently support, a
built-in consume order guarantees this; however, don't rely on that.
instead, the SYS_membarrier syscall is used to ensure that all threads
see the stores to the slots of their new dtv prior to the installation
of the new dtv. if it is not supported, the same is implemented in
userspace via signals, using the same mechanism as __synccall.
the __tls_get_addr function, variants, and dynamic tlsdesc asm
functions are all updated to remove the fallback paths for claiming
new dynamic tls, and are now all branch-free.
2019-02-18 07:22:27 +03:00
|
|
|
struct dso *p;
|
|
|
|
size_t i, j;
|
|
|
|
size_t old_cnt = self->dtv[0];
|
|
|
|
|
|
|
|
__block_app_sigs(&set);
|
|
|
|
__tl_lock();
|
|
|
|
/* Copy existing dtv contents from all existing threads. */
|
|
|
|
for (i=0, td=self; !i || td!=self; i++, td=td->next) {
|
|
|
|
memcpy(newdtv+i, td->dtv,
|
|
|
|
(old_cnt+1)*sizeof(uintptr_t));
|
|
|
|
newdtv[i][0] = tls_cnt;
|
|
|
|
}
|
|
|
|
/* Install new dtls into the enlarged, uninstalled dtv copies. */
|
|
|
|
for (p=head; ; p=p->next) {
|
2019-02-27 20:02:49 +03:00
|
|
|
if (p->tls_id <= old_cnt) continue;
|
install dynamic tls synchronously at dlopen, streamline access
previously, dynamic loading of new libraries with thread-local storage
allocated the storage needed for all existing threads at load-time,
precluding late failure that can't be handled, but left installation
in existing threads to take place lazily on first access. this imposed
an additional memory access and branch on every dynamic tls access,
and imposed a requirement, which was not actually met, that the
dynamic tlsdesc asm functions preserve all call-clobbered registers
before calling C code to to install new dynamic tls on first access.
the x86[_64] versions of this code wrongly omitted saving and
restoring of fpu/vector registers, assuming the compiler would not
generate anything using them in the called C code. the arm and aarch64
versions saved known existing registers, but failed to be future-proof
against expansion of the register file.
now that we track live threads in a list, it's possible to install the
new dynamic tls for each thread at dlopen time. for the most part,
synchronization is not needed, because if a thread has not
synchronized with completion of the dlopen, there is no way it can
meaningfully request access to a slot past the end of the old dtv,
which remains valid for accessing slots which already existed.
however, it is necessary to ensure that, if a thread sees its new dtv
pointer, it sees correct pointers in each of the slots that existed
prior to the dlopen. my understanding is that, on most real-world
coherency architectures including all the ones we presently support, a
built-in consume order guarantees this; however, don't rely on that.
instead, the SYS_membarrier syscall is used to ensure that all threads
see the stores to the slots of their new dtv prior to the installation
of the new dtv. if it is not supported, the same is implemented in
userspace via signals, using the same mechanism as __synccall.
the __tls_get_addr function, variants, and dynamic tlsdesc asm
functions are all updated to remove the fallback paths for claiming
new dynamic tls, and are now all branch-free.
2019-02-18 07:22:27 +03:00
|
|
|
unsigned char *mem = p->new_tls;
|
|
|
|
for (j=0; j<i; j++) {
|
|
|
|
unsigned char *new = mem;
|
|
|
|
new += ((uintptr_t)p->tls.image - (uintptr_t)mem)
|
|
|
|
& (p->tls.align-1);
|
|
|
|
memcpy(new, p->tls.image, p->tls.len);
|
|
|
|
newdtv[j][p->tls_id] =
|
|
|
|
(uintptr_t)new + DTP_OFFSET;
|
|
|
|
mem += p->tls.size + p->tls.align;
|
|
|
|
}
|
|
|
|
if (p->tls_id == tls_cnt) break;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Broadcast barrier to ensure contents of new dtv is visible
|
2019-02-22 10:56:10 +03:00
|
|
|
* if the new dtv pointer is. The __membarrier function has a
|
|
|
|
* fallback emulation using signals for kernels that lack the
|
|
|
|
* feature at the syscall level. */
|
|
|
|
|
|
|
|
__membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED, 0);
|
install dynamic tls synchronously at dlopen, streamline access
previously, dynamic loading of new libraries with thread-local storage
allocated the storage needed for all existing threads at load-time,
precluding late failure that can't be handled, but left installation
in existing threads to take place lazily on first access. this imposed
an additional memory access and branch on every dynamic tls access,
and imposed a requirement, which was not actually met, that the
dynamic tlsdesc asm functions preserve all call-clobbered registers
before calling C code to to install new dynamic tls on first access.
the x86[_64] versions of this code wrongly omitted saving and
restoring of fpu/vector registers, assuming the compiler would not
generate anything using them in the called C code. the arm and aarch64
versions saved known existing registers, but failed to be future-proof
against expansion of the register file.
now that we track live threads in a list, it's possible to install the
new dynamic tls for each thread at dlopen time. for the most part,
synchronization is not needed, because if a thread has not
synchronized with completion of the dlopen, there is no way it can
meaningfully request access to a slot past the end of the old dtv,
which remains valid for accessing slots which already existed.
however, it is necessary to ensure that, if a thread sees its new dtv
pointer, it sees correct pointers in each of the slots that existed
prior to the dlopen. my understanding is that, on most real-world
coherency architectures including all the ones we presently support, a
built-in consume order guarantees this; however, don't rely on that.
instead, the SYS_membarrier syscall is used to ensure that all threads
see the stores to the slots of their new dtv prior to the installation
of the new dtv. if it is not supported, the same is implemented in
userspace via signals, using the same mechanism as __synccall.
the __tls_get_addr function, variants, and dynamic tlsdesc asm
functions are all updated to remove the fallback paths for claiming
new dynamic tls, and are now all branch-free.
2019-02-18 07:22:27 +03:00
|
|
|
|
|
|
|
/* Install new dtv for each thread. */
|
|
|
|
for (j=0, td=self; !j || td!=self; j++, td=td->next) {
|
|
|
|
td->dtv = td->dtv_copy = newdtv[j];
|
|
|
|
}
|
|
|
|
|
|
|
|
__tl_unlock();
|
|
|
|
__restore_sigs(&set);
|
|
|
|
}
|
|
|
|
|
2015-04-13 09:56:26 +03:00
|
|
|
/* Stage 1 of the dynamic linker is defined in dlstart.c. It calls the
|
|
|
|
* following stage 2 and stage 3 functions via primitive symbolic lookup
|
|
|
|
* since it does not have access to their addresses to begin with. */
|
|
|
|
|
|
|
|
/* Stage 2 of the dynamic linker is called after relative relocations
|
|
|
|
* have been processed. It can make function calls to static functions
|
|
|
|
* and access string literals and static data, but cannot use extern
|
|
|
|
* symbols. Its job is to perform symbolic relocations on the dynamic
|
|
|
|
* linker itself, but some of the relocations performed may need to be
|
|
|
|
* replaced later due to copy relocations in the main program. */
|
|
|
|
|
2018-09-05 19:43:34 +03:00
|
|
|
hidden void __dls2(unsigned char *base, size_t *sp)
|
2011-06-19 03:48:42 +04:00
|
|
|
{
|
2020-01-01 05:59:07 +03:00
|
|
|
size_t *auxv;
|
|
|
|
for (auxv=sp+1+*sp+1; *auxv; auxv++);
|
|
|
|
auxv++;
|
2015-09-22 06:54:42 +03:00
|
|
|
if (DL_FDPIC) {
|
|
|
|
void *p1 = (void *)sp[-2];
|
|
|
|
void *p2 = (void *)sp[-1];
|
|
|
|
if (!p1) {
|
2020-01-01 05:59:07 +03:00
|
|
|
size_t aux[AUX_CNT];
|
2015-09-22 06:54:42 +03:00
|
|
|
decode_vec(auxv, aux, AUX_CNT);
|
|
|
|
if (aux[AT_BASE]) ldso.base = (void *)aux[AT_BASE];
|
|
|
|
else ldso.base = (void *)(aux[AT_PHDR] & -4096);
|
|
|
|
}
|
|
|
|
app_loadmap = p2 ? p1 : 0;
|
|
|
|
ldso.loadmap = p2 ? p2 : p1;
|
|
|
|
ldso.base = laddr(&ldso, 0);
|
|
|
|
} else {
|
|
|
|
ldso.base = base;
|
|
|
|
}
|
|
|
|
Ehdr *ehdr = (void *)ldso.base;
|
2015-04-13 09:56:26 +03:00
|
|
|
ldso.name = ldso.shortname = "libc.so";
|
|
|
|
ldso.phnum = ehdr->e_phnum;
|
2015-09-22 06:54:42 +03:00
|
|
|
ldso.phdr = laddr(&ldso, ehdr->e_phoff);
|
2015-04-13 09:56:26 +03:00
|
|
|
ldso.phentsize = ehdr->e_phentsize;
|
|
|
|
kernel_mapped_dso(&ldso);
|
|
|
|
decode_dyn(&ldso);
|
|
|
|
|
2015-09-22 06:54:42 +03:00
|
|
|
if (DL_FDPIC) makefuncdescs(&ldso);
|
|
|
|
|
reprocess all libc/ldso symbolic relocations in dynamic linking stage 3
commit f3ddd173806fd5c60b3f034528ca24542aecc5b9 introduced early
relocations and subsequent reprocessing as part of the dynamic linker
bootstrap overhaul, to allow use of arbitrary libc functions before
the main application and libraries are loaded, but only reprocessed
GOT/PLT relocation types.
commit c093e2e8201524db0d638920e76bcb6b1d925f3a added reprocessing of
non-GOT/PLT relocations to fix an actual regression that was observed
on powerpc, but only for RELA format tables with out-of-line addends.
REL table (inline addends at the relocation address) reprocessing is
trickier because the first relocation pass clobbers the addends.
this patch extends symbolic relocation reprocessing for libc/ldso to
support all relocation types, whether REL or RELA format tables are
used. it is believed not to alter behavior on any existing archs for
the current dynamic linker and libc code. the motivations for this
change are consistency and future-proofing. it ensures that behavior
does not differ depending on whether REL or RELA tables are used,
which could lead to undetected arch-specific bugs. it also ensures
that, if in the future code depending on additional relocation types
is added to libc.so, either at the source level or as part of the
compiler runtime that gets pulled in (for example, soft-float with TLS
for fenv), the new code will work properly.
the implementation concept is simple: stage 2 of the dynamic linker
counts the number of symbolic relocations in the libc/ldso REL table
and allocates a VLA to save their addends into; stage 3 then uses the
saved addends in place of the inline ones which were clobbered. for
stack safety, a hard limit (currently 4k) is imposed on the number of
such addends; this should be a couple orders of magnitude larger than
the actual need. this number is not a runtime variable that could
break fail-safety; it is constant for a given libc.so build.
2015-05-26 06:33:59 +03:00
|
|
|
/* Prepare storage for to save clobbered REL addends so they
|
|
|
|
* can be reused in stage 3. There should be very few. If
|
|
|
|
* something goes wrong and there are a huge number, abort
|
|
|
|
* instead of risking stack overflow. */
|
|
|
|
size_t dyn[DYN_CNT];
|
|
|
|
decode_vec(ldso.dynv, dyn, DYN_CNT);
|
2015-09-17 22:45:45 +03:00
|
|
|
size_t *rel = laddr(&ldso, dyn[DT_REL]);
|
reprocess all libc/ldso symbolic relocations in dynamic linking stage 3
commit f3ddd173806fd5c60b3f034528ca24542aecc5b9 introduced early
relocations and subsequent reprocessing as part of the dynamic linker
bootstrap overhaul, to allow use of arbitrary libc functions before
the main application and libraries are loaded, but only reprocessed
GOT/PLT relocation types.
commit c093e2e8201524db0d638920e76bcb6b1d925f3a added reprocessing of
non-GOT/PLT relocations to fix an actual regression that was observed
on powerpc, but only for RELA format tables with out-of-line addends.
REL table (inline addends at the relocation address) reprocessing is
trickier because the first relocation pass clobbers the addends.
this patch extends symbolic relocation reprocessing for libc/ldso to
support all relocation types, whether REL or RELA format tables are
used. it is believed not to alter behavior on any existing archs for
the current dynamic linker and libc code. the motivations for this
change are consistency and future-proofing. it ensures that behavior
does not differ depending on whether REL or RELA tables are used,
which could lead to undetected arch-specific bugs. it also ensures
that, if in the future code depending on additional relocation types
is added to libc.so, either at the source level or as part of the
compiler runtime that gets pulled in (for example, soft-float with TLS
for fenv), the new code will work properly.
the implementation concept is simple: stage 2 of the dynamic linker
counts the number of symbolic relocations in the libc/ldso REL table
and allocates a VLA to save their addends into; stage 3 then uses the
saved addends in place of the inline ones which were clobbered. for
stack safety, a hard limit (currently 4k) is imposed on the number of
such addends; this should be a couple orders of magnitude larger than
the actual need. this number is not a runtime variable that could
break fail-safety; it is constant for a given libc.so build.
2015-05-26 06:33:59 +03:00
|
|
|
size_t rel_size = dyn[DT_RELSZ];
|
|
|
|
size_t symbolic_rel_cnt = 0;
|
|
|
|
apply_addends_to = rel;
|
|
|
|
for (; rel_size; rel+=2, rel_size-=2*sizeof(size_t))
|
2015-09-22 06:54:42 +03:00
|
|
|
if (!IS_RELATIVE(rel[1], ldso.syms)) symbolic_rel_cnt++;
|
reprocess all libc/ldso symbolic relocations in dynamic linking stage 3
commit f3ddd173806fd5c60b3f034528ca24542aecc5b9 introduced early
relocations and subsequent reprocessing as part of the dynamic linker
bootstrap overhaul, to allow use of arbitrary libc functions before
the main application and libraries are loaded, but only reprocessed
GOT/PLT relocation types.
commit c093e2e8201524db0d638920e76bcb6b1d925f3a added reprocessing of
non-GOT/PLT relocations to fix an actual regression that was observed
on powerpc, but only for RELA format tables with out-of-line addends.
REL table (inline addends at the relocation address) reprocessing is
trickier because the first relocation pass clobbers the addends.
this patch extends symbolic relocation reprocessing for libc/ldso to
support all relocation types, whether REL or RELA format tables are
used. it is believed not to alter behavior on any existing archs for
the current dynamic linker and libc code. the motivations for this
change are consistency and future-proofing. it ensures that behavior
does not differ depending on whether REL or RELA tables are used,
which could lead to undetected arch-specific bugs. it also ensures
that, if in the future code depending on additional relocation types
is added to libc.so, either at the source level or as part of the
compiler runtime that gets pulled in (for example, soft-float with TLS
for fenv), the new code will work properly.
the implementation concept is simple: stage 2 of the dynamic linker
counts the number of symbolic relocations in the libc/ldso REL table
and allocates a VLA to save their addends into; stage 3 then uses the
saved addends in place of the inline ones which were clobbered. for
stack safety, a hard limit (currently 4k) is imposed on the number of
such addends; this should be a couple orders of magnitude larger than
the actual need. this number is not a runtime variable that could
break fail-safety; it is constant for a given libc.so build.
2015-05-26 06:33:59 +03:00
|
|
|
if (symbolic_rel_cnt >= ADDEND_LIMIT) a_crash();
|
|
|
|
size_t addends[symbolic_rel_cnt+1];
|
|
|
|
saved_addends = addends;
|
|
|
|
|
2015-04-13 09:56:26 +03:00
|
|
|
head = &ldso;
|
|
|
|
reloc_all(&ldso);
|
|
|
|
|
|
|
|
ldso.relocated = 0;
|
2015-05-26 02:15:17 +03:00
|
|
|
|
2018-10-16 20:36:51 +03:00
|
|
|
/* Call dynamic linker stage-2b, __dls2b, looking it up
|
2015-05-26 02:15:17 +03:00
|
|
|
* symbolically as a barrier against moving the address
|
|
|
|
* load across the above relocation processing. */
|
2018-10-16 20:36:51 +03:00
|
|
|
struct symdef dls2b_def = find_sym(&ldso, "__dls2b", 0);
|
2020-01-01 05:59:07 +03:00
|
|
|
if (DL_FDPIC) ((stage3_func)&ldso.funcdescs[dls2b_def.sym-ldso.syms])(sp, auxv);
|
|
|
|
else ((stage3_func)laddr(&ldso, dls2b_def.sym->st_value))(sp, auxv);
|
2018-10-16 20:36:51 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Stage 2b sets up a valid thread pointer, which requires relocations
|
|
|
|
* completed in stage 2, and on which stage 3 is permitted to depend.
|
|
|
|
* This is done as a separate stage, with symbolic lookup as a barrier,
|
|
|
|
* so that loads of the thread pointer and &errno can be pure/const and
|
|
|
|
* thereby hoistable. */
|
|
|
|
|
2020-01-01 05:59:07 +03:00
|
|
|
void __dls2b(size_t *sp, size_t *auxv)
|
2018-10-16 20:36:51 +03:00
|
|
|
{
|
|
|
|
/* Setup early thread pointer in builtin_tls for ldso/libc itself to
|
|
|
|
* use during dynamic linking. If possible it will also serve as the
|
|
|
|
* thread pointer at runtime. */
|
2020-01-16 00:15:49 +03:00
|
|
|
search_vec(auxv, &__hwcap, AT_HWCAP);
|
2020-01-01 05:59:07 +03:00
|
|
|
libc.auxv = auxv;
|
2018-10-16 20:36:51 +03:00
|
|
|
libc.tls_size = sizeof builtin_tls;
|
|
|
|
libc.tls_align = tls_align;
|
|
|
|
if (__init_tp(__copy_tls((void *)builtin_tls)) < 0) {
|
|
|
|
a_crash();
|
|
|
|
}
|
|
|
|
|
2015-05-26 02:15:17 +03:00
|
|
|
struct symdef dls3_def = find_sym(&ldso, "__dls3", 0);
|
2020-01-01 05:59:07 +03:00
|
|
|
if (DL_FDPIC) ((stage3_func)&ldso.funcdescs[dls3_def.sym-ldso.syms])(sp, auxv);
|
|
|
|
else ((stage3_func)laddr(&ldso, dls3_def.sym->st_value))(sp, auxv);
|
2015-04-13 09:56:26 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Stage 3 of the dynamic linker is called with the dynamic linker/libc
|
|
|
|
* fully functional. Its job is to load (if not already loaded) and
|
|
|
|
* process dependencies and relocations for the main application and
|
|
|
|
* transfer control to its entry point. */
|
|
|
|
|
2020-01-01 05:59:07 +03:00
|
|
|
void __dls3(size_t *sp, size_t *auxv)
|
2015-04-13 09:56:26 +03:00
|
|
|
{
|
|
|
|
static struct dso app, vdso;
|
2020-01-01 05:59:07 +03:00
|
|
|
size_t aux[AUX_CNT];
|
2011-06-19 03:48:42 +04:00
|
|
|
size_t i;
|
2011-08-16 08:24:36 +04:00
|
|
|
char *env_preload=0;
|
2017-07-04 23:58:28 +03:00
|
|
|
char *replace_argv0=0;
|
2012-08-26 01:31:59 +04:00
|
|
|
size_t vdso_base;
|
2015-04-13 09:56:26 +03:00
|
|
|
int argc = *sp;
|
|
|
|
char **argv = (void *)(sp+1);
|
|
|
|
char **argv_orig = argv;
|
add support for init/fini array in main program, and greatly simplify
modern (4.7.x and later) gcc uses init/fini arrays, rather than the
legacy _init/_fini function pasting and crtbegin/crtend ctors/dtors
system, on most or all archs. some archs had already switched a long
time ago. without following this change, global ctors/dtors will cease
to work under musl when building with new gcc versions.
the most surprising part of this patch is that it actually reduces the
size of the init code, for both static and shared libc. this is
achieved by (1) unifying the handling main program and shared
libraries in the dynamic linker, and (2) eliminating the
glibc-inspired rube goldberg machine for passing around init and fini
function pointers. to clarify, some background:
the function signature for __libc_start_main was based on glibc, as
part of the original goal of being able to run some glibc-linked
binaries. it worked by having the crt1 code, which is linked into
every application, static or dynamic, obtain and pass pointers to the
init and fini functions, which __libc_start_main is then responsible
for using and recording for later use, as necessary. however, in
neither the static-linked nor dynamic-linked case do we actually need
crt1.o's help. with dynamic linking, all the pointers are available in
the _DYNAMIC block. with static linking, it's safe to simply access
the _init/_fini and __init_array_start, etc. symbols directly.
obviously changing the __libc_start_main function signature in an
incompatible way would break both old musl-linked programs and
glibc-linked programs, so let's not do that. instead, the function can
just ignore the information it doesn't need. new archs need not even
provide the useless args in their versions of crt1.o. existing archs
should continue to provide it as long as there is an interest in
having newly-linked applications be able to run on old versions of
musl; at some point in the future, this support can be removed.
2013-07-21 11:00:54 +04:00
|
|
|
char **envp = argv+argc+1;
|
2015-04-14 01:40:52 +03:00
|
|
|
|
2015-06-07 23:55:23 +03:00
|
|
|
/* Find aux vector just past environ[] and use it to initialize
|
|
|
|
* global data that may be needed before we can make syscalls. */
|
|
|
|
__environ = envp;
|
|
|
|
decode_vec(auxv, aux, AUX_CNT);
|
overhaul i386 syscall mechanism not to depend on external asm source
this is the first part of a series of patches intended to make
__syscall fully self-contained in the object file produced using
syscall.h, which will make it possible for crt1 code to perform
syscalls.
the (confusingly named) i386 __vsyscall mechanism, which this commit
removes, was introduced before the presence of a valid thread pointer
was mandatory; back then the thread pointer was setup lazily only if
threads were used. the intent was to be able to perform syscalls using
the kernel's fast entry point in the VDSO, which can use the sysenter
(Intel) or syscall (AMD) instruction instead of int $128, but without
inlining an access to the __syscall global at the point of each
syscall, which would incur a significant size cost from PIC setup
everywhere. the mechanism also shuffled registers/calling convention
around to avoid spills of call-saved registers, and to avoid
allocating ebx or ebp via asm constraints, since there are plenty of
broken-but-supported compiler versions which are incapable of
allocating ebx with -fPIC or ebp with -fno-omit-frame-pointer.
the new mechanism preserves the properties of avoiding spills and
avoiding allocation of ebx/ebp in constraints, but does it inline,
using some fairly simple register shuffling, and uses a field of the
thread structure rather than global data for the vdso-provided syscall
code address.
for now, the external __syscall function is refactored not to use the
old __vsyscall so it can be kept, but the intent is to remove it too.
2019-04-11 00:10:36 +03:00
|
|
|
search_vec(auxv, &__sysinfo, AT_SYSINFO);
|
|
|
|
__pthread_self()->sysinfo = __sysinfo;
|
2015-06-07 23:55:23 +03:00
|
|
|
libc.page_size = aux[AT_PAGESZ];
|
|
|
|
libc.secure = ((aux[0]&0x7800)!=0x7800 || aux[AT_UID]!=aux[AT_EUID]
|
|
|
|
|| aux[AT_GID]!=aux[AT_EGID] || aux[AT_SECURE]);
|
|
|
|
|
2011-06-25 09:56:34 +04:00
|
|
|
/* Only trust user/env if kernel says we're not suid/sgid */
|
2015-06-07 23:55:23 +03:00
|
|
|
if (!libc.secure) {
|
|
|
|
env_path = getenv("LD_LIBRARY_PATH");
|
|
|
|
env_preload = getenv("LD_PRELOAD");
|
2011-06-25 09:56:34 +04:00
|
|
|
}
|
|
|
|
|
2015-04-13 09:56:26 +03:00
|
|
|
/* If the main program was already loaded by the kernel,
|
|
|
|
* AT_PHDR will point to some location other than the dynamic
|
|
|
|
* linker's program headers. */
|
|
|
|
if (aux[AT_PHDR] != (size_t)ldso.phdr) {
|
2012-07-13 09:31:02 +04:00
|
|
|
size_t interp_off = 0;
|
2012-10-05 04:04:13 +04:00
|
|
|
size_t tls_image = 0;
|
2012-05-28 00:01:44 +04:00
|
|
|
/* Find load address of the main program, via AT_PHDR vs PT_PHDR. */
|
2015-04-13 09:56:26 +03:00
|
|
|
Phdr *phdr = app.phdr = (void *)aux[AT_PHDR];
|
|
|
|
app.phnum = aux[AT_PHNUM];
|
|
|
|
app.phentsize = aux[AT_PHENT];
|
2012-05-28 00:01:44 +04:00
|
|
|
for (i=aux[AT_PHNUM]; i; i--, phdr=(void *)((char *)phdr + aux[AT_PHENT])) {
|
|
|
|
if (phdr->p_type == PT_PHDR)
|
2015-04-13 09:56:26 +03:00
|
|
|
app.base = (void *)(aux[AT_PHDR] - phdr->p_vaddr);
|
2012-07-13 09:31:02 +04:00
|
|
|
else if (phdr->p_type == PT_INTERP)
|
|
|
|
interp_off = (size_t)phdr->p_vaddr;
|
2012-10-05 04:04:13 +04:00
|
|
|
else if (phdr->p_type == PT_TLS) {
|
|
|
|
tls_image = phdr->p_vaddr;
|
2015-11-12 23:50:26 +03:00
|
|
|
app.tls.len = phdr->p_filesz;
|
|
|
|
app.tls.size = phdr->p_memsz;
|
|
|
|
app.tls.align = phdr->p_align;
|
2012-10-05 04:04:13 +04:00
|
|
|
}
|
2012-05-28 00:01:44 +04:00
|
|
|
}
|
2015-09-22 06:54:42 +03:00
|
|
|
if (DL_FDPIC) app.loadmap = app_loadmap;
|
2015-11-12 23:50:26 +03:00
|
|
|
if (app.tls.size) app.tls.image = laddr(&app, tls_image);
|
2015-09-17 20:18:09 +03:00
|
|
|
if (interp_off) ldso.name = laddr(&app, interp_off);
|
2013-08-23 22:14:47 +04:00
|
|
|
if ((aux[0] & (1UL<<AT_EXECFN))
|
|
|
|
&& strncmp((char *)aux[AT_EXECFN], "/proc/", 6))
|
2015-04-13 09:56:26 +03:00
|
|
|
app.name = (char *)aux[AT_EXECFN];
|
2013-08-23 22:14:47 +04:00
|
|
|
else
|
2015-04-13 09:56:26 +03:00
|
|
|
app.name = argv[0];
|
|
|
|
kernel_mapped_dso(&app);
|
2012-05-28 00:01:44 +04:00
|
|
|
} else {
|
|
|
|
int fd;
|
|
|
|
char *ldname = argv[0];
|
2012-11-02 07:46:39 +04:00
|
|
|
size_t l = strlen(ldname);
|
2012-05-28 00:01:44 +04:00
|
|
|
if (l >= 3 && !strcmp(ldname+l-3, "ldd")) ldd_mode = 1;
|
2015-04-13 09:56:26 +03:00
|
|
|
argv++;
|
2014-04-16 20:45:36 +04:00
|
|
|
while (argv[0] && argv[0][0]=='-' && argv[0][1]=='-') {
|
|
|
|
char *opt = argv[0]+2;
|
|
|
|
*argv++ = (void *)-1;
|
|
|
|
if (!*opt) {
|
|
|
|
break;
|
|
|
|
} else if (!memcmp(opt, "list", 5)) {
|
|
|
|
ldd_mode = 1;
|
|
|
|
} else if (!memcmp(opt, "library-path", 12)) {
|
|
|
|
if (opt[12]=='=') env_path = opt+13;
|
|
|
|
else if (opt[12]) *argv = 0;
|
|
|
|
else if (*argv) env_path = *argv++;
|
|
|
|
} else if (!memcmp(opt, "preload", 7)) {
|
|
|
|
if (opt[7]=='=') env_preload = opt+8;
|
|
|
|
else if (opt[7]) *argv = 0;
|
|
|
|
else if (*argv) env_preload = *argv++;
|
2017-07-04 23:58:28 +03:00
|
|
|
} else if (!memcmp(opt, "argv0", 5)) {
|
|
|
|
if (opt[5]=='=') replace_argv0 = opt+6;
|
|
|
|
else if (opt[5]) *argv = 0;
|
|
|
|
else if (*argv) replace_argv0 = *argv++;
|
2014-04-16 20:45:36 +04:00
|
|
|
} else {
|
|
|
|
argv[0] = 0;
|
|
|
|
}
|
|
|
|
}
|
2015-04-13 09:56:26 +03:00
|
|
|
argv[-1] = (void *)(argc - (argv-argv_orig));
|
2012-05-28 00:01:44 +04:00
|
|
|
if (!argv[0]) {
|
2016-01-22 07:04:16 +03:00
|
|
|
dprintf(2, "musl libc (" LDSO_ARCH ")\n"
|
2013-12-02 02:27:25 +04:00
|
|
|
"Version %s\n"
|
|
|
|
"Dynamic Program Loader\n"
|
2014-04-16 20:45:36 +04:00
|
|
|
"Usage: %s [options] [--] pathname%s\n",
|
2018-09-06 23:17:56 +03:00
|
|
|
__libc_version, ldname,
|
2012-05-28 00:01:44 +04:00
|
|
|
ldd_mode ? "" : " [args]");
|
|
|
|
_exit(1);
|
|
|
|
}
|
|
|
|
fd = open(argv[0], O_RDONLY);
|
|
|
|
if (fd < 0) {
|
|
|
|
dprintf(2, "%s: cannot load %s: %s\n", ldname, argv[0], strerror(errno));
|
|
|
|
_exit(1);
|
|
|
|
}
|
2015-04-13 09:56:26 +03:00
|
|
|
Ehdr *ehdr = (void *)map_library(fd, &app);
|
2012-05-28 00:01:44 +04:00
|
|
|
if (!ehdr) {
|
|
|
|
dprintf(2, "%s: %s: Not a valid dynamic program\n", ldname, argv[0]);
|
|
|
|
_exit(1);
|
|
|
|
}
|
|
|
|
close(fd);
|
2015-04-13 09:56:26 +03:00
|
|
|
ldso.name = ldname;
|
|
|
|
app.name = argv[0];
|
2015-09-17 20:18:09 +03:00
|
|
|
aux[AT_ENTRY] = (size_t)laddr(&app, ehdr->e_entry);
|
2013-07-26 22:41:12 +04:00
|
|
|
/* Find the name that would have been used for the dynamic
|
|
|
|
* linker had ldd not taken its place. */
|
|
|
|
if (ldd_mode) {
|
2015-04-13 09:56:26 +03:00
|
|
|
for (i=0; i<app.phnum; i++) {
|
|
|
|
if (app.phdr[i].p_type == PT_INTERP)
|
2015-09-22 22:21:57 +03:00
|
|
|
ldso.name = laddr(&app, app.phdr[i].p_vaddr);
|
2013-07-26 22:41:12 +04:00
|
|
|
}
|
2015-04-13 09:56:26 +03:00
|
|
|
dprintf(1, "\t%s (%p)\n", ldso.name, ldso.base);
|
2013-07-26 22:41:12 +04:00
|
|
|
}
|
2012-01-23 11:02:59 +04:00
|
|
|
}
|
2015-11-12 23:50:26 +03:00
|
|
|
if (app.tls.size) {
|
2016-01-30 22:34:45 +03:00
|
|
|
libc.tls_head = tls_tail = &app.tls;
|
2015-04-13 09:56:26 +03:00
|
|
|
app.tls_id = tls_cnt = 1;
|
2012-10-16 02:51:53 +04:00
|
|
|
#ifdef TLS_ABOVE_TP
|
2018-06-02 02:52:01 +03:00
|
|
|
app.tls.offset = GAP_ABOVE_TP;
|
2019-05-13 21:47:11 +03:00
|
|
|
app.tls.offset += (-GAP_ABOVE_TP + (uintptr_t)app.tls.image)
|
|
|
|
& (app.tls.align-1);
|
2019-05-16 20:15:33 +03:00
|
|
|
tls_offset = app.tls.offset + app.tls.size;
|
2012-10-16 02:51:53 +04:00
|
|
|
#else
|
2015-11-12 23:50:26 +03:00
|
|
|
tls_offset = app.tls.offset = app.tls.size
|
|
|
|
+ ( -((uintptr_t)app.tls.image + app.tls.size)
|
|
|
|
& (app.tls.align-1) );
|
2012-10-16 02:51:53 +04:00
|
|
|
#endif
|
2015-11-12 23:50:26 +03:00
|
|
|
tls_align = MAXP2(tls_align, app.tls.align);
|
2012-10-05 04:04:13 +04:00
|
|
|
}
|
2015-04-13 09:56:26 +03:00
|
|
|
decode_dyn(&app);
|
2015-09-22 06:54:42 +03:00
|
|
|
if (DL_FDPIC) {
|
|
|
|
makefuncdescs(&app);
|
|
|
|
if (!app.loadmap) {
|
|
|
|
app.loadmap = (void *)&app_dummy_loadmap;
|
|
|
|
app.loadmap->nsegs = 1;
|
2015-09-23 02:41:41 +03:00
|
|
|
app.loadmap->segs[0].addr = (size_t)app.map;
|
|
|
|
app.loadmap->segs[0].p_vaddr = (size_t)app.map
|
|
|
|
- (size_t)app.base;
|
|
|
|
app.loadmap->segs[0].p_memsz = app.map_len;
|
2015-09-22 06:54:42 +03:00
|
|
|
}
|
|
|
|
argv[-3] = (void *)app.loadmap;
|
|
|
|
}
|
2012-01-23 09:57:38 +04:00
|
|
|
|
rework ldso handling of global symbol table for consistency
when loading libraries with dlopen, the caller can request that the
library's symbols become part of the global symbol table, or that they
only be used for resolving relocations in the loaded library and its
dependencies. in the latter case, a subsequent dlopen of the same
library can upgrade it to global status.
previously, if a library was upgraded from local to global mode, its
symbols entered the symbol lookup search order at the point where the
library was originally loaded. this means that a new call to dlopen
could change the value of a symbol that already had a visible
definition, an inconsistency which applications could observe.
POSIX is unclear whether this should happen or whether it's permitted
to happen, but the resolution of Austin Group issue #982 made it
formally unspecified.
with this patch, a library whose mode is upgraded from local to global
enters the symbol lookup order at the point where it was made global,
so that symbol resolution before and after the upgrade are consistent.
in order to implement this change, the per-dso global flag is replaced
with a separate set of linked-list pointers for participation in the
global symbol table. this permits the order of dso objects for symbol
resolution to differ from the order used for iteration of all loaded
libraries. it also improves performance of find_sym, by avoiding a
branch per iteration and skipping, and especially in the case where
many non-global libraries have been loaded, by allowing the loop to
skip over them entirely. logic for temporarily adding non-global
libraries to the symbol table for relocation purposes is also mildly
simplified.
2017-03-13 04:03:05 +03:00
|
|
|
/* Initial dso chain consists only of the app. */
|
|
|
|
head = tail = syms_tail = &app;
|
|
|
|
|
|
|
|
/* Donate unused parts of app and library mapping to malloc */
|
|
|
|
reclaim_gaps(&app);
|
|
|
|
reclaim_gaps(&ldso);
|
|
|
|
|
|
|
|
/* Load preload/needed libraries, add symbols to global namespace. */
|
2019-03-03 20:42:34 +03:00
|
|
|
ldso.deps = (struct dso **)no_deps;
|
rework ldso handling of global symbol table for consistency
when loading libraries with dlopen, the caller can request that the
library's symbols become part of the global symbol table, or that they
only be used for resolving relocations in the loaded library and its
dependencies. in the latter case, a subsequent dlopen of the same
library can upgrade it to global status.
previously, if a library was upgraded from local to global mode, its
symbols entered the symbol lookup search order at the point where the
library was originally loaded. this means that a new call to dlopen
could change the value of a symbol that already had a visible
definition, an inconsistency which applications could observe.
POSIX is unclear whether this should happen or whether it's permitted
to happen, but the resolution of Austin Group issue #982 made it
formally unspecified.
with this patch, a library whose mode is upgraded from local to global
enters the symbol lookup order at the point where it was made global,
so that symbol resolution before and after the upgrade are consistent.
in order to implement this change, the per-dso global flag is replaced
with a separate set of linked-list pointers for participation in the
global symbol table. this permits the order of dso objects for symbol
resolution to differ from the order used for iteration of all loaded
libraries. it also improves performance of find_sym, by avoiding a
branch per iteration and skipping, and especially in the case where
many non-global libraries have been loaded, by allowing the loop to
skip over them entirely. logic for temporarily adding non-global
libraries to the symbol table for relocation purposes is also mildly
simplified.
2017-03-13 04:03:05 +03:00
|
|
|
if (env_preload) load_preload(env_preload);
|
|
|
|
load_deps(&app);
|
|
|
|
for (struct dso *p=head; p; p=p->next)
|
|
|
|
add_syms(p);
|
|
|
|
|
|
|
|
/* Attach to vdso, if provided by the kernel, last so that it does
|
|
|
|
* not become part of the global namespace. */
|
2016-07-26 06:52:58 +03:00
|
|
|
if (search_vec(auxv, &vdso_base, AT_SYSINFO_EHDR) && vdso_base) {
|
2015-04-13 09:56:26 +03:00
|
|
|
Ehdr *ehdr = (void *)vdso_base;
|
|
|
|
Phdr *phdr = vdso.phdr = (void *)(vdso_base + ehdr->e_phoff);
|
|
|
|
vdso.phnum = ehdr->e_phnum;
|
|
|
|
vdso.phentsize = ehdr->e_phentsize;
|
2011-07-24 08:54:55 +04:00
|
|
|
for (i=ehdr->e_phnum; i; i--, phdr=(void *)((char *)phdr + ehdr->e_phentsize)) {
|
|
|
|
if (phdr->p_type == PT_DYNAMIC)
|
2015-04-13 09:56:26 +03:00
|
|
|
vdso.dynv = (void *)(vdso_base + phdr->p_offset);
|
2011-07-24 08:54:55 +04:00
|
|
|
if (phdr->p_type == PT_LOAD)
|
2015-04-13 09:56:26 +03:00
|
|
|
vdso.base = (void *)(vdso_base - phdr->p_vaddr + phdr->p_offset);
|
2011-07-24 08:54:55 +04:00
|
|
|
}
|
2015-04-13 09:56:26 +03:00
|
|
|
vdso.name = "";
|
|
|
|
vdso.shortname = "linux-gate.so.1";
|
|
|
|
vdso.relocated = 1;
|
2019-03-03 20:42:34 +03:00
|
|
|
vdso.deps = (struct dso **)no_deps;
|
2015-04-13 09:56:26 +03:00
|
|
|
decode_dyn(&vdso);
|
rework ldso handling of global symbol table for consistency
when loading libraries with dlopen, the caller can request that the
library's symbols become part of the global symbol table, or that they
only be used for resolving relocations in the loaded library and its
dependencies. in the latter case, a subsequent dlopen of the same
library can upgrade it to global status.
previously, if a library was upgraded from local to global mode, its
symbols entered the symbol lookup search order at the point where the
library was originally loaded. this means that a new call to dlopen
could change the value of a symbol that already had a visible
definition, an inconsistency which applications could observe.
POSIX is unclear whether this should happen or whether it's permitted
to happen, but the resolution of Austin Group issue #982 made it
formally unspecified.
with this patch, a library whose mode is upgraded from local to global
enters the symbol lookup order at the point where it was made global,
so that symbol resolution before and after the upgrade are consistent.
in order to implement this change, the per-dso global flag is replaced
with a separate set of linked-list pointers for participation in the
global symbol table. this permits the order of dso objects for symbol
resolution to differ from the order used for iteration of all loaded
libraries. it also improves performance of find_sym, by avoiding a
branch per iteration and skipping, and especially in the case where
many non-global libraries have been loaded, by allowing the loop to
skip over them entirely. logic for temporarily adding non-global
libraries to the symbol table for relocation purposes is also mildly
simplified.
2017-03-13 04:03:05 +03:00
|
|
|
vdso.prev = tail;
|
|
|
|
tail->next = &vdso;
|
|
|
|
tail = &vdso;
|
2011-07-24 08:54:55 +04:00
|
|
|
}
|
|
|
|
|
2016-01-30 23:14:05 +03:00
|
|
|
for (i=0; app.dynv[i]; i+=2) {
|
|
|
|
if (!DT_DEBUG_INDIRECT && app.dynv[i]==DT_DEBUG)
|
2015-04-13 09:56:26 +03:00
|
|
|
app.dynv[i+1] = (size_t)&debug;
|
2016-01-30 23:14:05 +03:00
|
|
|
if (DT_DEBUG_INDIRECT && app.dynv[i]==DT_DEBUG_INDIRECT) {
|
|
|
|
size_t *ptr = (size_t *) app.dynv[i+1];
|
|
|
|
*ptr = (size_t)&debug;
|
|
|
|
}
|
|
|
|
}
|
2014-03-25 16:13:27 +04:00
|
|
|
|
2019-03-03 17:57:19 +03:00
|
|
|
/* This must be done before final relocations, since it calls
|
|
|
|
* malloc, which may be provided by the application. Calling any
|
|
|
|
* application code prior to the jump to its entry point is not
|
|
|
|
* valid in our model and does not work with FDPIC, where there
|
|
|
|
* are additional relocation-like fixups that only the entry point
|
|
|
|
* code can see to perform. */
|
|
|
|
main_ctor_queue = queue_ctors(&app);
|
|
|
|
|
2019-08-11 18:48:06 +03:00
|
|
|
/* Initial TLS must also be allocated before final relocations
|
|
|
|
* might result in calloc being a call to application code. */
|
2012-10-06 09:22:51 +04:00
|
|
|
update_tls_size();
|
2019-08-14 04:53:30 +03:00
|
|
|
void *initial_tls = builtin_tls;
|
2015-04-14 01:40:52 +03:00
|
|
|
if (libc.tls_size > sizeof builtin_tls || tls_align > MIN_TLS_ALIGN) {
|
2019-08-14 04:53:30 +03:00
|
|
|
initial_tls = calloc(libc.tls_size, 1);
|
always initialize thread pointer at program start
this is the first step in an overhaul aimed at greatly simplifying and
optimizing everything dealing with thread-local state.
previously, the thread pointer was initialized lazily on first access,
or at program startup if stack protector was in use, or at certain
random places where inconsistent state could be reached if it were not
initialized early. while believed to be fully correct, the logic was
fragile and non-obvious.
in the first phase of the thread pointer overhaul, support is retained
(and in some cases improved) for systems/situation where loading the
thread pointer fails, e.g. old kernels.
some notes on specific changes:
- the confusing use of libc.main_thread as an indicator that the
thread pointer is initialized is eliminated in favor of an explicit
has_thread_pointer predicate.
- sigaction no longer needs to ensure that the thread pointer is
initialized before installing a signal handler (this was needed to
prevent a situation where the signal handler caused the thread
pointer to be initialized and the subsequent sigreturn cleared it
again) but it still needs to ensure that implementation-internal
thread-related signals are not blocked.
- pthread tsd initialization for the main thread is deferred in a new
manner to minimize bloat in the static-linked __init_tp code.
- pthread_setcancelstate no longer needs special handling for the
situation before the thread pointer is initialized. it simply fails
on systems that cannot support a thread pointer, which are
non-conforming anyway.
- pthread_cleanup_push/pop now check for missing thread pointer and
nop themselves out in this case, so stdio no longer needs to avoid
the cancellable path when the thread pointer is not available.
a number of cases remain where certain interfaces may crash if the
system does not support a thread pointer. at this point, these should
be limited to pthread interfaces, and the number of such cases should
be fewer than before.
2014-03-25 00:57:11 +04:00
|
|
|
if (!initial_tls) {
|
2012-10-05 06:48:33 +04:00
|
|
|
dprintf(2, "%s: Error getting %zu bytes thread-local storage: %m\n",
|
2012-10-14 07:25:20 +04:00
|
|
|
argv[0], libc.tls_size);
|
2012-10-05 06:48:33 +04:00
|
|
|
_exit(127);
|
|
|
|
}
|
2019-08-14 04:53:30 +03:00
|
|
|
}
|
|
|
|
static_tls_cnt = tls_cnt;
|
|
|
|
|
|
|
|
/* The main program must be relocated LAST since it may contain
|
|
|
|
* copy relocations which depend on libraries' relocations. */
|
|
|
|
reloc_all(app.next);
|
|
|
|
reloc_all(&app);
|
|
|
|
|
|
|
|
/* Actual copying to new TLS needs to happen after relocations,
|
|
|
|
* since the TLS images might have contained relocated addresses. */
|
|
|
|
if (initial_tls != builtin_tls) {
|
2015-04-14 01:40:52 +03:00
|
|
|
if (__init_tp(__copy_tls(initial_tls)) < 0) {
|
2015-04-14 02:24:51 +03:00
|
|
|
a_crash();
|
2015-04-14 01:40:52 +03:00
|
|
|
}
|
always initialize thread pointer at program start
this is the first step in an overhaul aimed at greatly simplifying and
optimizing everything dealing with thread-local state.
previously, the thread pointer was initialized lazily on first access,
or at program startup if stack protector was in use, or at certain
random places where inconsistent state could be reached if it were not
initialized early. while believed to be fully correct, the logic was
fragile and non-obvious.
in the first phase of the thread pointer overhaul, support is retained
(and in some cases improved) for systems/situation where loading the
thread pointer fails, e.g. old kernels.
some notes on specific changes:
- the confusing use of libc.main_thread as an indicator that the
thread pointer is initialized is eliminated in favor of an explicit
has_thread_pointer predicate.
- sigaction no longer needs to ensure that the thread pointer is
initialized before installing a signal handler (this was needed to
prevent a situation where the signal handler caused the thread
pointer to be initialized and the subsequent sigreturn cleared it
again) but it still needs to ensure that implementation-internal
thread-related signals are not blocked.
- pthread tsd initialization for the main thread is deferred in a new
manner to minimize bloat in the static-linked __init_tp code.
- pthread_setcancelstate no longer needs special handling for the
situation before the thread pointer is initialized. it simply fails
on systems that cannot support a thread pointer, which are
non-conforming anyway.
- pthread_cleanup_push/pop now check for missing thread pointer and
nop themselves out in this case, so stdio no longer needs to avoid
the cancellable path when the thread pointer is not available.
a number of cases remain where certain interfaces may crash if the
system does not support a thread pointer. at this point, these should
be limited to pthread interfaces, and the number of such cases should
be fewer than before.
2014-03-25 00:57:11 +04:00
|
|
|
} else {
|
2015-04-14 01:40:52 +03:00
|
|
|
size_t tmp_tls_size = libc.tls_size;
|
|
|
|
pthread_t self = __pthread_self();
|
|
|
|
/* Temporarily set the tls size to the full size of
|
|
|
|
* builtin_tls so that __copy_tls will use the same layout
|
|
|
|
* as it did for before. Then check, just to be safe. */
|
|
|
|
libc.tls_size = sizeof builtin_tls;
|
|
|
|
if (__copy_tls((void*)builtin_tls) != self) a_crash();
|
|
|
|
libc.tls_size = tmp_tls_size;
|
2012-10-05 06:48:33 +04:00
|
|
|
}
|
2019-08-11 18:48:06 +03:00
|
|
|
|
2012-08-19 00:00:23 +04:00
|
|
|
if (ldso_fail) _exit(127);
|
2012-05-28 00:01:44 +04:00
|
|
|
if (ldd_mode) _exit(0);
|
|
|
|
|
2018-04-20 05:19:29 +03:00
|
|
|
/* Determine if malloc was interposed by a replacement implementation
|
|
|
|
* so that calloc and the memalign family can harden against the
|
|
|
|
* possibility of incomplete replacement. */
|
|
|
|
if (find_sym(head, "malloc", 1).dso != &ldso)
|
|
|
|
__malloc_replaced = 1;
|
|
|
|
|
2012-01-23 09:57:38 +04:00
|
|
|
/* Switch to runtime mode: any further failures in the dynamic
|
|
|
|
* linker are a reportable failure rather than a fatal startup
|
2015-04-13 09:56:26 +03:00
|
|
|
* error. */
|
2011-07-24 08:26:12 +04:00
|
|
|
runtime = 1;
|
2012-02-06 23:39:09 +04:00
|
|
|
|
2012-04-25 08:05:42 +04:00
|
|
|
debug.ver = 1;
|
2015-04-18 06:23:05 +03:00
|
|
|
debug.bp = dl_debug_state;
|
2012-04-25 08:05:42 +04:00
|
|
|
debug.head = head;
|
2015-04-13 09:56:26 +03:00
|
|
|
debug.base = ldso.base;
|
2012-04-25 08:05:42 +04:00
|
|
|
debug.state = 0;
|
|
|
|
_dl_debug_state();
|
|
|
|
|
2017-07-04 23:58:28 +03:00
|
|
|
if (replace_argv0) argv[0] = replace_argv0;
|
|
|
|
|
add support for init/fini array in main program, and greatly simplify
modern (4.7.x and later) gcc uses init/fini arrays, rather than the
legacy _init/_fini function pasting and crtbegin/crtend ctors/dtors
system, on most or all archs. some archs had already switched a long
time ago. without following this change, global ctors/dtors will cease
to work under musl when building with new gcc versions.
the most surprising part of this patch is that it actually reduces the
size of the init code, for both static and shared libc. this is
achieved by (1) unifying the handling main program and shared
libraries in the dynamic linker, and (2) eliminating the
glibc-inspired rube goldberg machine for passing around init and fini
function pointers. to clarify, some background:
the function signature for __libc_start_main was based on glibc, as
part of the original goal of being able to run some glibc-linked
binaries. it worked by having the crt1 code, which is linked into
every application, static or dynamic, obtain and pass pointers to the
init and fini functions, which __libc_start_main is then responsible
for using and recording for later use, as necessary. however, in
neither the static-linked nor dynamic-linked case do we actually need
crt1.o's help. with dynamic linking, all the pointers are available in
the _DYNAMIC block. with static linking, it's safe to simply access
the _init/_fini and __init_array_start, etc. symbols directly.
obviously changing the __libc_start_main function signature in an
incompatible way would break both old musl-linked programs and
glibc-linked programs, so let's not do that. instead, the function can
just ignore the information it doesn't need. new archs need not even
provide the useless args in their versions of crt1.o. existing archs
should continue to provide it as long as there is an interest in
having newly-linked applications be able to run on old versions of
musl; at some point in the future, this support can be removed.
2013-07-21 11:00:54 +04:00
|
|
|
errno = 0;
|
|
|
|
|
2015-04-13 09:56:26 +03:00
|
|
|
CRTJMP((void *)aux[AT_ENTRY], argv-1);
|
|
|
|
for(;;);
|
2012-12-01 02:56:23 +04:00
|
|
|
}
|
|
|
|
|
2017-03-13 15:52:41 +03:00
|
|
|
static void prepare_lazy(struct dso *p)
|
|
|
|
{
|
|
|
|
size_t dyn[DYN_CNT], n, flags1=0;
|
|
|
|
decode_vec(p->dynv, dyn, DYN_CNT);
|
|
|
|
search_vec(p->dynv, &flags1, DT_FLAGS_1);
|
|
|
|
if (dyn[DT_BIND_NOW] || (dyn[DT_FLAGS] & DF_BIND_NOW) || (flags1 & DF_1_NOW))
|
|
|
|
return;
|
|
|
|
n = dyn[DT_RELSZ]/2 + dyn[DT_RELASZ]/3 + dyn[DT_PLTRELSZ]/2 + 1;
|
|
|
|
if (NEED_MIPS_GOT_RELOCS) {
|
|
|
|
size_t j=0; search_vec(p->dynv, &j, DT_MIPS_GOTSYM);
|
|
|
|
size_t i=0; search_vec(p->dynv, &i, DT_MIPS_SYMTABNO);
|
|
|
|
n += i-j;
|
|
|
|
}
|
|
|
|
p->lazy = calloc(n, 3*sizeof(size_t));
|
|
|
|
if (!p->lazy) {
|
|
|
|
error("Error preparing lazy relocation for %s: %m", p->name);
|
|
|
|
longjmp(*rtld_fail, 1);
|
|
|
|
}
|
|
|
|
p->lazy_next = lazy_head;
|
|
|
|
lazy_head = p;
|
|
|
|
}
|
|
|
|
|
2011-06-27 03:23:28 +04:00
|
|
|
void *dlopen(const char *file, int mode)
|
|
|
|
{
|
2017-03-13 15:52:41 +03:00
|
|
|
struct dso *volatile p, *orig_tail, *orig_syms_tail, *orig_lazy_head, *next;
|
2015-11-12 23:50:26 +03:00
|
|
|
struct tls_module *orig_tls_tail;
|
2012-10-06 09:22:51 +04:00
|
|
|
size_t orig_tls_cnt, orig_tls_offset, orig_tls_align;
|
2011-06-27 03:23:28 +04:00
|
|
|
size_t i;
|
2012-02-08 05:31:27 +04:00
|
|
|
int cs;
|
2013-07-24 10:38:05 +04:00
|
|
|
jmp_buf jb;
|
overhaul shared library ctor execution for dependency order, concurrency
previously, shared library constructors at program start and dlopen
time were executed in reverse load order. some libraries, however,
rely on a depth-first dependency order, which most other dynamic
linker implementations provide. this is a much more reasonable, less
arbitrary order, and it turns out to have much better properties with
regard to how slow-running ctors affect multi-threaded programs, and
how recursive dlopen behaves.
this commit builds on previous work tracking direct dependencies of
each dso (commit 403555690775f7c8806372644f543518e6664e3b), and
performs a topological sort on the dependency graph at load time while
the main ldso lock is held and before success is committed, producing
a queue of constructors needed by the newly-loaded dso (or main
application). in the case of circular dependencies, the dependency
chain is simply broken at points where it becomes circular.
when the ctor queue is run, the init_fini_lock is held only for
iteration purposes; it's released during execution of each ctor, so
that arbitrarily-long-running application code no longer runs with a
lock held in the caller. this prevents a dlopen with slow ctors in one
thread from arbitrarily delaying other threads that call dlopen.
fully-independent ctors can run concurrently; when multiple threads
call dlopen with a shared dependency, one will end up executing the
ctor while the other waits on a condvar for it to finish.
another corner case improved by these changes is recursive dlopen
(call from a ctor). previously, recursive calls to dlopen could cause
a ctor for a library to be executed before the ctor for its
dependency, even when there was no relation between the calling
library and the library it was loading, simply due to the naive
reverse-load-order traversal. now, we can guarantee that recursive
dlopen in non-circular-dependency usage preserves the desired ctor
execution order properties, and that even in circular usage, at worst
the libraries whose ctors call dlopen will fail to have completed
construction when ctors that depend on them run.
init_fini_lock is changed to a normal, non-recursive mutex, since it
is no longer held while calling back into application code.
2019-03-02 05:06:23 +03:00
|
|
|
struct dso **volatile ctor_queue = 0;
|
2011-06-27 03:23:28 +04:00
|
|
|
|
|
|
|
if (!file) return head;
|
|
|
|
|
2012-02-08 05:31:27 +04:00
|
|
|
pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &cs);
|
2011-06-27 03:23:28 +04:00
|
|
|
pthread_rwlock_wrlock(&lock);
|
2012-10-05 19:51:50 +04:00
|
|
|
__inhibit_ptc();
|
2011-06-27 03:23:28 +04:00
|
|
|
|
2012-10-05 19:51:50 +04:00
|
|
|
p = 0;
|
2019-03-02 06:47:29 +03:00
|
|
|
if (shutting_down) {
|
|
|
|
error("Cannot dlopen while program is exiting.");
|
|
|
|
goto end;
|
|
|
|
}
|
2015-11-12 23:50:26 +03:00
|
|
|
orig_tls_tail = tls_tail;
|
2012-10-05 19:51:50 +04:00
|
|
|
orig_tls_cnt = tls_cnt;
|
2012-10-06 09:22:51 +04:00
|
|
|
orig_tls_offset = tls_offset;
|
|
|
|
orig_tls_align = tls_align;
|
2017-03-13 15:52:41 +03:00
|
|
|
orig_lazy_head = lazy_head;
|
rework ldso handling of global symbol table for consistency
when loading libraries with dlopen, the caller can request that the
library's symbols become part of the global symbol table, or that they
only be used for resolving relocations in the loaded library and its
dependencies. in the latter case, a subsequent dlopen of the same
library can upgrade it to global status.
previously, if a library was upgraded from local to global mode, its
symbols entered the symbol lookup search order at the point where the
library was originally loaded. this means that a new call to dlopen
could change the value of a symbol that already had a visible
definition, an inconsistency which applications could observe.
POSIX is unclear whether this should happen or whether it's permitted
to happen, but the resolution of Austin Group issue #982 made it
formally unspecified.
with this patch, a library whose mode is upgraded from local to global
enters the symbol lookup order at the point where it was made global,
so that symbol resolution before and after the upgrade are consistent.
in order to implement this change, the per-dso global flag is replaced
with a separate set of linked-list pointers for participation in the
global symbol table. this permits the order of dso objects for symbol
resolution to differ from the order used for iteration of all loaded
libraries. it also improves performance of find_sym, by avoiding a
branch per iteration and skipping, and especially in the case where
many non-global libraries have been loaded, by allowing the loop to
skip over them entirely. logic for temporarily adding non-global
libraries to the symbol table for relocation purposes is also mildly
simplified.
2017-03-13 04:03:05 +03:00
|
|
|
orig_syms_tail = syms_tail;
|
2012-10-05 09:15:25 +04:00
|
|
|
orig_tail = tail;
|
2013-01-24 07:07:45 +04:00
|
|
|
noload = mode & RTLD_NOLOAD;
|
2012-10-05 09:15:25 +04:00
|
|
|
|
2013-07-24 10:38:05 +04:00
|
|
|
rtld_fail = &jb;
|
|
|
|
if (setjmp(*rtld_fail)) {
|
2011-06-27 03:23:28 +04:00
|
|
|
/* Clean up anything new that was (partially) loaded */
|
rework ldso handling of global symbol table for consistency
when loading libraries with dlopen, the caller can request that the
library's symbols become part of the global symbol table, or that they
only be used for resolving relocations in the loaded library and its
dependencies. in the latter case, a subsequent dlopen of the same
library can upgrade it to global status.
previously, if a library was upgraded from local to global mode, its
symbols entered the symbol lookup search order at the point where the
library was originally loaded. this means that a new call to dlopen
could change the value of a symbol that already had a visible
definition, an inconsistency which applications could observe.
POSIX is unclear whether this should happen or whether it's permitted
to happen, but the resolution of Austin Group issue #982 made it
formally unspecified.
with this patch, a library whose mode is upgraded from local to global
enters the symbol lookup order at the point where it was made global,
so that symbol resolution before and after the upgrade are consistent.
in order to implement this change, the per-dso global flag is replaced
with a separate set of linked-list pointers for participation in the
global symbol table. this permits the order of dso objects for symbol
resolution to differ from the order used for iteration of all loaded
libraries. it also improves performance of find_sym, by avoiding a
branch per iteration and skipping, and especially in the case where
many non-global libraries have been loaded, by allowing the loop to
skip over them entirely. logic for temporarily adding non-global
libraries to the symbol table for relocation purposes is also mildly
simplified.
2017-03-13 04:03:05 +03:00
|
|
|
revert_syms(orig_syms_tail);
|
2011-06-27 03:23:28 +04:00
|
|
|
for (p=orig_tail->next; p; p=next) {
|
|
|
|
next = p->next;
|
2014-06-19 10:01:06 +04:00
|
|
|
while (p->td_index) {
|
|
|
|
void *tmp = p->td_index->next;
|
|
|
|
free(p->td_index);
|
|
|
|
p->td_index = tmp;
|
|
|
|
}
|
2015-09-22 22:12:48 +03:00
|
|
|
free(p->funcdescs);
|
2015-04-04 07:15:19 +03:00
|
|
|
if (p->rpath != p->rpath_orig)
|
|
|
|
free(p->rpath);
|
fix and overhaul dlsym depedency order, always record direct deps
dlsym with an explicit handle is specified to use "dependency order",
a breadth-first search rooted at the argument. this has always been
implemented by iterating a flattened dependency list built at dlopen
time. however, the logic for building this list was completely wrong
except in trivial cases; it simply used the list of libraries loaded
since a given library, and their direct dependencies, as that
library's dependencies, which could result in misordering, wrongful
omission of deep dependencies from the search, and wrongful inclusion
of unrelated libraries in the search.
further, libraries did not have any recorded list of resolved
dependencies until they were explicitly dlopened, meaning that
DT_NEEDED entries had to be resolved again whenever a library
participated as a dependency of more than one dlopened library.
with this overhaul, the resolved direct dependency list of each
library is always recorded when it is first loaded, and can be
extended to a full flattened breadth-first search list if dlopen is
called on the library. the extension is performed using the direct
dependency list as a queue and appending copies of the direct
dependency list of each dependency in the queue, excluding duplicates,
until the end of the queue is reached. the direct deps remain
available for future use as the initial subarray of the full deps
array.
first-load logic in dlopen is updated to match these changes, and
clarified.
2019-02-27 02:05:19 +03:00
|
|
|
free(p->deps);
|
2015-09-22 22:12:48 +03:00
|
|
|
unmap_library(p);
|
2011-06-27 03:23:28 +04:00
|
|
|
free(p);
|
|
|
|
}
|
2019-03-10 20:16:59 +03:00
|
|
|
free(ctor_queue);
|
|
|
|
ctor_queue = 0;
|
2015-11-12 23:50:26 +03:00
|
|
|
if (!orig_tls_tail) libc.tls_head = 0;
|
|
|
|
tls_tail = orig_tls_tail;
|
2017-01-05 06:54:06 +03:00
|
|
|
if (tls_tail) tls_tail->next = 0;
|
2012-10-05 19:51:50 +04:00
|
|
|
tls_cnt = orig_tls_cnt;
|
2012-10-06 09:22:51 +04:00
|
|
|
tls_offset = orig_tls_offset;
|
|
|
|
tls_align = orig_tls_align;
|
2017-03-13 15:52:41 +03:00
|
|
|
lazy_head = orig_lazy_head;
|
2011-06-27 03:23:28 +04:00
|
|
|
tail = orig_tail;
|
|
|
|
tail->next = 0;
|
2012-02-08 05:31:27 +04:00
|
|
|
p = 0;
|
2012-04-23 20:03:31 +04:00
|
|
|
goto end;
|
2013-08-24 07:13:25 +04:00
|
|
|
} else p = load_library(file, head);
|
2012-03-23 08:28:20 +04:00
|
|
|
|
|
|
|
if (!p) {
|
2015-04-19 01:00:22 +03:00
|
|
|
error(noload ?
|
2013-01-24 07:07:45 +04:00
|
|
|
"Library %s is not already loaded" :
|
|
|
|
"Error loading shared library %s: %m",
|
|
|
|
file);
|
2012-02-08 05:31:27 +04:00
|
|
|
goto end;
|
2011-06-27 03:23:28 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
/* First load handling */
|
fix and overhaul dlsym depedency order, always record direct deps
dlsym with an explicit handle is specified to use "dependency order",
a breadth-first search rooted at the argument. this has always been
implemented by iterating a flattened dependency list built at dlopen
time. however, the logic for building this list was completely wrong
except in trivial cases; it simply used the list of libraries loaded
since a given library, and their direct dependencies, as that
library's dependencies, which could result in misordering, wrongful
omission of deep dependencies from the search, and wrongful inclusion
of unrelated libraries in the search.
further, libraries did not have any recorded list of resolved
dependencies until they were explicitly dlopened, meaning that
DT_NEEDED entries had to be resolved again whenever a library
participated as a dependency of more than one dlopened library.
with this overhaul, the resolved direct dependency list of each
library is always recorded when it is first loaded, and can be
extended to a full flattened breadth-first search list if dlopen is
called on the library. the extension is performed using the direct
dependency list as a queue and appending copies of the direct
dependency list of each dependency in the queue, excluding duplicates,
until the end of the queue is reached. the direct deps remain
available for future use as the initial subarray of the full deps
array.
first-load logic in dlopen is updated to match these changes, and
clarified.
2019-02-27 02:05:19 +03:00
|
|
|
load_deps(p);
|
|
|
|
extend_bfs_deps(p);
|
overhaul shared library ctor execution for dependency order, concurrency
previously, shared library constructors at program start and dlopen
time were executed in reverse load order. some libraries, however,
rely on a depth-first dependency order, which most other dynamic
linker implementations provide. this is a much more reasonable, less
arbitrary order, and it turns out to have much better properties with
regard to how slow-running ctors affect multi-threaded programs, and
how recursive dlopen behaves.
this commit builds on previous work tracking direct dependencies of
each dso (commit 403555690775f7c8806372644f543518e6664e3b), and
performs a topological sort on the dependency graph at load time while
the main ldso lock is held and before success is committed, producing
a queue of constructors needed by the newly-loaded dso (or main
application). in the case of circular dependencies, the dependency
chain is simply broken at points where it becomes circular.
when the ctor queue is run, the init_fini_lock is held only for
iteration purposes; it's released during execution of each ctor, so
that arbitrarily-long-running application code no longer runs with a
lock held in the caller. this prevents a dlopen with slow ctors in one
thread from arbitrarily delaying other threads that call dlopen.
fully-independent ctors can run concurrently; when multiple threads
call dlopen with a shared dependency, one will end up executing the
ctor while the other waits on a condvar for it to finish.
another corner case improved by these changes is recursive dlopen
(call from a ctor). previously, recursive calls to dlopen could cause
a ctor for a library to be executed before the ctor for its
dependency, even when there was no relation between the calling
library and the library it was loading, simply due to the naive
reverse-load-order traversal. now, we can guarantee that recursive
dlopen in non-circular-dependency usage preserves the desired ctor
execution order properties, and that even in circular usage, at worst
the libraries whose ctors call dlopen will fail to have completed
construction when ctors that depend on them run.
init_fini_lock is changed to a normal, non-recursive mutex, since it
is no longer held while calling back into application code.
2019-03-02 05:06:23 +03:00
|
|
|
pthread_mutex_lock(&init_fini_lock);
|
|
|
|
if (!p->constructed) ctor_queue = queue_ctors(p);
|
|
|
|
pthread_mutex_unlock(&init_fini_lock);
|
fix and overhaul dlsym depedency order, always record direct deps
dlsym with an explicit handle is specified to use "dependency order",
a breadth-first search rooted at the argument. this has always been
implemented by iterating a flattened dependency list built at dlopen
time. however, the logic for building this list was completely wrong
except in trivial cases; it simply used the list of libraries loaded
since a given library, and their direct dependencies, as that
library's dependencies, which could result in misordering, wrongful
omission of deep dependencies from the search, and wrongful inclusion
of unrelated libraries in the search.
further, libraries did not have any recorded list of resolved
dependencies until they were explicitly dlopened, meaning that
DT_NEEDED entries had to be resolved again whenever a library
participated as a dependency of more than one dlopened library.
with this overhaul, the resolved direct dependency list of each
library is always recorded when it is first loaded, and can be
extended to a full flattened breadth-first search list if dlopen is
called on the library. the extension is performed using the direct
dependency list as a queue and appending copies of the direct
dependency list of each dependency in the queue, excluding duplicates,
until the end of the queue is reached. the direct deps remain
available for future use as the initial subarray of the full deps
array.
first-load logic in dlopen is updated to match these changes, and
clarified.
2019-02-27 02:05:19 +03:00
|
|
|
if (!p->relocated && (mode & RTLD_LAZY)) {
|
|
|
|
prepare_lazy(p);
|
|
|
|
for (i=0; p->deps[i]; i++)
|
|
|
|
if (!p->deps[i]->relocated)
|
|
|
|
prepare_lazy(p->deps[i]);
|
2017-07-04 18:34:39 +03:00
|
|
|
}
|
fix and overhaul dlsym depedency order, always record direct deps
dlsym with an explicit handle is specified to use "dependency order",
a breadth-first search rooted at the argument. this has always been
implemented by iterating a flattened dependency list built at dlopen
time. however, the logic for building this list was completely wrong
except in trivial cases; it simply used the list of libraries loaded
since a given library, and their direct dependencies, as that
library's dependencies, which could result in misordering, wrongful
omission of deep dependencies from the search, and wrongful inclusion
of unrelated libraries in the search.
further, libraries did not have any recorded list of resolved
dependencies until they were explicitly dlopened, meaning that
DT_NEEDED entries had to be resolved again whenever a library
participated as a dependency of more than one dlopened library.
with this overhaul, the resolved direct dependency list of each
library is always recorded when it is first loaded, and can be
extended to a full flattened breadth-first search list if dlopen is
called on the library. the extension is performed using the direct
dependency list as a queue and appending copies of the direct
dependency list of each dependency in the queue, excluding duplicates,
until the end of the queue is reached. the direct deps remain
available for future use as the initial subarray of the full deps
array.
first-load logic in dlopen is updated to match these changes, and
clarified.
2019-02-27 02:05:19 +03:00
|
|
|
if (!p->relocated || (mode & RTLD_GLOBAL)) {
|
rework ldso handling of global symbol table for consistency
when loading libraries with dlopen, the caller can request that the
library's symbols become part of the global symbol table, or that they
only be used for resolving relocations in the loaded library and its
dependencies. in the latter case, a subsequent dlopen of the same
library can upgrade it to global status.
previously, if a library was upgraded from local to global mode, its
symbols entered the symbol lookup search order at the point where the
library was originally loaded. this means that a new call to dlopen
could change the value of a symbol that already had a visible
definition, an inconsistency which applications could observe.
POSIX is unclear whether this should happen or whether it's permitted
to happen, but the resolution of Austin Group issue #982 made it
formally unspecified.
with this patch, a library whose mode is upgraded from local to global
enters the symbol lookup order at the point where it was made global,
so that symbol resolution before and after the upgrade are consistent.
in order to implement this change, the per-dso global flag is replaced
with a separate set of linked-list pointers for participation in the
global symbol table. this permits the order of dso objects for symbol
resolution to differ from the order used for iteration of all loaded
libraries. it also improves performance of find_sym, by avoiding a
branch per iteration and skipping, and especially in the case where
many non-global libraries have been loaded, by allowing the loop to
skip over them entirely. logic for temporarily adding non-global
libraries to the symbol table for relocation purposes is also mildly
simplified.
2017-03-13 04:03:05 +03:00
|
|
|
/* Make new symbols global, at least temporarily, so we can do
|
|
|
|
* relocations. If not RTLD_GLOBAL, this is reverted below. */
|
|
|
|
add_syms(p);
|
2017-07-04 17:58:13 +03:00
|
|
|
for (i=0; p->deps[i]; i++)
|
rework ldso handling of global symbol table for consistency
when loading libraries with dlopen, the caller can request that the
library's symbols become part of the global symbol table, or that they
only be used for resolving relocations in the loaded library and its
dependencies. in the latter case, a subsequent dlopen of the same
library can upgrade it to global status.
previously, if a library was upgraded from local to global mode, its
symbols entered the symbol lookup search order at the point where the
library was originally loaded. this means that a new call to dlopen
could change the value of a symbol that already had a visible
definition, an inconsistency which applications could observe.
POSIX is unclear whether this should happen or whether it's permitted
to happen, but the resolution of Austin Group issue #982 made it
formally unspecified.
with this patch, a library whose mode is upgraded from local to global
enters the symbol lookup order at the point where it was made global,
so that symbol resolution before and after the upgrade are consistent.
in order to implement this change, the per-dso global flag is replaced
with a separate set of linked-list pointers for participation in the
global symbol table. this permits the order of dso objects for symbol
resolution to differ from the order used for iteration of all loaded
libraries. it also improves performance of find_sym, by avoiding a
branch per iteration and skipping, and especially in the case where
many non-global libraries have been loaded, by allowing the loop to
skip over them entirely. logic for temporarily adding non-global
libraries to the symbol table for relocation purposes is also mildly
simplified.
2017-03-13 04:03:05 +03:00
|
|
|
add_syms(p->deps[i]);
|
2017-07-04 18:34:39 +03:00
|
|
|
}
|
fix and overhaul dlsym depedency order, always record direct deps
dlsym with an explicit handle is specified to use "dependency order",
a breadth-first search rooted at the argument. this has always been
implemented by iterating a flattened dependency list built at dlopen
time. however, the logic for building this list was completely wrong
except in trivial cases; it simply used the list of libraries loaded
since a given library, and their direct dependencies, as that
library's dependencies, which could result in misordering, wrongful
omission of deep dependencies from the search, and wrongful inclusion
of unrelated libraries in the search.
further, libraries did not have any recorded list of resolved
dependencies until they were explicitly dlopened, meaning that
DT_NEEDED entries had to be resolved again whenever a library
participated as a dependency of more than one dlopened library.
with this overhaul, the resolved direct dependency list of each
library is always recorded when it is first loaded, and can be
extended to a full flattened breadth-first search list if dlopen is
called on the library. the extension is performed using the direct
dependency list as a queue and appending copies of the direct
dependency list of each dependency in the queue, excluding duplicates,
until the end of the queue is reached. the direct deps remain
available for future use as the initial subarray of the full deps
array.
first-load logic in dlopen is updated to match these changes, and
clarified.
2019-02-27 02:05:19 +03:00
|
|
|
if (!p->relocated) {
|
2011-06-27 03:23:28 +04:00
|
|
|
reloc_all(p);
|
|
|
|
}
|
|
|
|
|
rework ldso handling of global symbol table for consistency
when loading libraries with dlopen, the caller can request that the
library's symbols become part of the global symbol table, or that they
only be used for resolving relocations in the loaded library and its
dependencies. in the latter case, a subsequent dlopen of the same
library can upgrade it to global status.
previously, if a library was upgraded from local to global mode, its
symbols entered the symbol lookup search order at the point where the
library was originally loaded. this means that a new call to dlopen
could change the value of a symbol that already had a visible
definition, an inconsistency which applications could observe.
POSIX is unclear whether this should happen or whether it's permitted
to happen, but the resolution of Austin Group issue #982 made it
formally unspecified.
with this patch, a library whose mode is upgraded from local to global
enters the symbol lookup order at the point where it was made global,
so that symbol resolution before and after the upgrade are consistent.
in order to implement this change, the per-dso global flag is replaced
with a separate set of linked-list pointers for participation in the
global symbol table. this permits the order of dso objects for symbol
resolution to differ from the order used for iteration of all loaded
libraries. it also improves performance of find_sym, by avoiding a
branch per iteration and skipping, and especially in the case where
many non-global libraries have been loaded, by allowing the loop to
skip over them entirely. logic for temporarily adding non-global
libraries to the symbol table for relocation purposes is also mildly
simplified.
2017-03-13 04:03:05 +03:00
|
|
|
/* If RTLD_GLOBAL was not specified, undo any new additions
|
|
|
|
* to the global symbol table. This is a nop if the library was
|
|
|
|
* previously loaded and already global. */
|
|
|
|
if (!(mode & RTLD_GLOBAL))
|
|
|
|
revert_syms(orig_syms_tail);
|
2011-06-27 03:23:28 +04:00
|
|
|
|
2017-03-13 15:52:41 +03:00
|
|
|
/* Processing of deferred lazy relocations must not happen until
|
|
|
|
* the new libraries are committed; otherwise we could end up with
|
|
|
|
* relocations resolved to symbol definitions that get removed. */
|
|
|
|
redo_lazy_relocs();
|
|
|
|
|
2012-10-06 09:22:51 +04:00
|
|
|
update_tls_size();
|
install dynamic tls synchronously at dlopen, streamline access
previously, dynamic loading of new libraries with thread-local storage
allocated the storage needed for all existing threads at load-time,
precluding late failure that can't be handled, but left installation
in existing threads to take place lazily on first access. this imposed
an additional memory access and branch on every dynamic tls access,
and imposed a requirement, which was not actually met, that the
dynamic tlsdesc asm functions preserve all call-clobbered registers
before calling C code to to install new dynamic tls on first access.
the x86[_64] versions of this code wrongly omitted saving and
restoring of fpu/vector registers, assuming the compiler would not
generate anything using them in the called C code. the arm and aarch64
versions saved known existing registers, but failed to be future-proof
against expansion of the register file.
now that we track live threads in a list, it's possible to install the
new dynamic tls for each thread at dlopen time. for the most part,
synchronization is not needed, because if a thread has not
synchronized with completion of the dlopen, there is no way it can
meaningfully request access to a slot past the end of the old dtv,
which remains valid for accessing slots which already existed.
however, it is necessary to ensure that, if a thread sees its new dtv
pointer, it sees correct pointers in each of the slots that existed
prior to the dlopen. my understanding is that, on most real-world
coherency architectures including all the ones we presently support, a
built-in consume order guarantees this; however, don't rely on that.
instead, the SYS_membarrier syscall is used to ensure that all threads
see the stores to the slots of their new dtv prior to the installation
of the new dtv. if it is not supported, the same is implemented in
userspace via signals, using the same mechanism as __synccall.
the __tls_get_addr function, variants, and dynamic tlsdesc asm
functions are all updated to remove the fallback paths for claiming
new dynamic tls, and are now all branch-free.
2019-02-18 07:22:27 +03:00
|
|
|
if (tls_cnt != orig_tls_cnt)
|
|
|
|
install_new_tls();
|
2012-04-25 08:05:42 +04:00
|
|
|
_dl_debug_state();
|
2012-10-05 21:09:09 +04:00
|
|
|
orig_tail = tail;
|
2011-06-27 06:09:32 +04:00
|
|
|
end:
|
2012-10-05 19:51:50 +04:00
|
|
|
__release_ptc();
|
2012-11-01 05:27:48 +04:00
|
|
|
if (p) gencnt++;
|
2011-06-27 03:23:28 +04:00
|
|
|
pthread_rwlock_unlock(&lock);
|
overhaul shared library ctor execution for dependency order, concurrency
previously, shared library constructors at program start and dlopen
time were executed in reverse load order. some libraries, however,
rely on a depth-first dependency order, which most other dynamic
linker implementations provide. this is a much more reasonable, less
arbitrary order, and it turns out to have much better properties with
regard to how slow-running ctors affect multi-threaded programs, and
how recursive dlopen behaves.
this commit builds on previous work tracking direct dependencies of
each dso (commit 403555690775f7c8806372644f543518e6664e3b), and
performs a topological sort on the dependency graph at load time while
the main ldso lock is held and before success is committed, producing
a queue of constructors needed by the newly-loaded dso (or main
application). in the case of circular dependencies, the dependency
chain is simply broken at points where it becomes circular.
when the ctor queue is run, the init_fini_lock is held only for
iteration purposes; it's released during execution of each ctor, so
that arbitrarily-long-running application code no longer runs with a
lock held in the caller. this prevents a dlopen with slow ctors in one
thread from arbitrarily delaying other threads that call dlopen.
fully-independent ctors can run concurrently; when multiple threads
call dlopen with a shared dependency, one will end up executing the
ctor while the other waits on a condvar for it to finish.
another corner case improved by these changes is recursive dlopen
(call from a ctor). previously, recursive calls to dlopen could cause
a ctor for a library to be executed before the ctor for its
dependency, even when there was no relation between the calling
library and the library it was loading, simply due to the naive
reverse-load-order traversal. now, we can guarantee that recursive
dlopen in non-circular-dependency usage preserves the desired ctor
execution order properties, and that even in circular usage, at worst
the libraries whose ctors call dlopen will fail to have completed
construction when ctors that depend on them run.
init_fini_lock is changed to a normal, non-recursive mutex, since it
is no longer held while calling back into application code.
2019-03-02 05:06:23 +03:00
|
|
|
if (ctor_queue) {
|
|
|
|
do_init_fini(ctor_queue);
|
|
|
|
free(ctor_queue);
|
|
|
|
}
|
2012-02-08 05:31:27 +04:00
|
|
|
pthread_setcancelstate(cs, 0);
|
2011-06-27 03:23:28 +04:00
|
|
|
return p;
|
|
|
|
}
|
|
|
|
|
2018-09-05 19:43:34 +03:00
|
|
|
hidden int __dl_invalid_handle(void *h)
|
2013-01-10 23:05:40 +04:00
|
|
|
{
|
|
|
|
struct dso *p;
|
|
|
|
for (p=head; p; p=p->next) if (h==p) return 0;
|
2015-04-19 01:00:22 +03:00
|
|
|
error("Invalid library handle %p", (void *)h);
|
2013-01-10 23:05:40 +04:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2015-09-23 02:41:41 +03:00
|
|
|
static void *addr2dso(size_t a)
|
|
|
|
{
|
|
|
|
struct dso *p;
|
2015-10-16 05:51:56 +03:00
|
|
|
size_t i;
|
|
|
|
if (DL_FDPIC) for (p=head; p; p=p->next) {
|
|
|
|
i = count_syms(p);
|
|
|
|
if (a-(size_t)p->funcdescs < i*sizeof(*p->funcdescs))
|
|
|
|
return p;
|
|
|
|
}
|
2015-09-23 02:41:41 +03:00
|
|
|
for (p=head; p; p=p->next) {
|
|
|
|
if (DL_FDPIC && p->loadmap) {
|
|
|
|
for (i=0; i<p->loadmap->nsegs; i++) {
|
|
|
|
if (a-p->loadmap->segs[i].p_vaddr
|
|
|
|
< p->loadmap->segs[i].p_memsz)
|
|
|
|
return p;
|
|
|
|
}
|
|
|
|
} else {
|
2018-06-28 19:20:58 +03:00
|
|
|
Phdr *ph = p->phdr;
|
|
|
|
size_t phcnt = p->phnum;
|
|
|
|
size_t entsz = p->phentsize;
|
|
|
|
size_t base = (size_t)p->base;
|
|
|
|
for (; phcnt--; ph=(void *)((char *)ph+entsz)) {
|
|
|
|
if (ph->p_type != PT_LOAD) continue;
|
|
|
|
if (a-base-ph->p_vaddr < ph->p_memsz)
|
|
|
|
return p;
|
|
|
|
}
|
2015-09-23 02:41:41 +03:00
|
|
|
if (a-(size_t)p->map < p->map_len)
|
2018-06-28 19:20:58 +03:00
|
|
|
return 0;
|
2015-09-23 02:41:41 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2011-08-16 08:42:13 +04:00
|
|
|
static void *do_dlsym(struct dso *p, const char *s, void *ra)
|
2011-06-27 03:23:28 +04:00
|
|
|
{
|
2019-08-11 02:14:40 +03:00
|
|
|
int use_deps = 0;
|
|
|
|
if (p == head || p == RTLD_DEFAULT) {
|
|
|
|
p = head;
|
|
|
|
} else if (p == RTLD_NEXT) {
|
|
|
|
p = addr2dso((size_t)ra);
|
|
|
|
if (!p) p=head;
|
|
|
|
p = p->next;
|
|
|
|
} else if (__dl_invalid_handle(p)) {
|
2013-01-24 05:21:36 +04:00
|
|
|
return 0;
|
2019-08-11 02:14:40 +03:00
|
|
|
} else
|
|
|
|
use_deps = 1;
|
|
|
|
struct symdef def = find_sym2(p, s, 0, use_deps);
|
|
|
|
if (!def.sym) {
|
|
|
|
error("Symbol not found: %s", s);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
if ((def.sym->st_info&0xf) == STT_TLS)
|
|
|
|
return __tls_get_addr((tls_mod_off_t []){def.dso->tls_id, def.sym->st_value-DTP_OFFSET});
|
|
|
|
if (DL_FDPIC && (def.sym->st_info&0xf) == STT_FUNC)
|
|
|
|
return def.dso->funcdescs + (def.sym - def.dso->syms);
|
|
|
|
return laddr(def.dso, def.sym->st_value);
|
2011-06-27 03:23:28 +04:00
|
|
|
}
|
|
|
|
|
2018-06-27 22:29:12 +03:00
|
|
|
int dladdr(const void *addr_arg, Dl_info *info)
|
2012-08-27 05:09:26 +04:00
|
|
|
{
|
2018-06-27 22:29:12 +03:00
|
|
|
size_t addr = (size_t)addr_arg;
|
2012-08-27 05:09:26 +04:00
|
|
|
struct dso *p;
|
2015-10-16 05:51:56 +03:00
|
|
|
Sym *sym, *bestsym;
|
2012-08-27 05:09:26 +04:00
|
|
|
uint32_t nsym;
|
|
|
|
char *strings;
|
2018-06-27 22:29:12 +03:00
|
|
|
size_t best = 0;
|
2018-06-28 18:51:43 +03:00
|
|
|
size_t besterr = -1;
|
2012-08-27 05:09:26 +04:00
|
|
|
|
|
|
|
pthread_rwlock_rdlock(&lock);
|
2018-06-27 22:29:12 +03:00
|
|
|
p = addr2dso(addr);
|
2012-08-27 05:09:26 +04:00
|
|
|
pthread_rwlock_unlock(&lock);
|
|
|
|
|
|
|
|
if (!p) return 0;
|
|
|
|
|
|
|
|
sym = p->syms;
|
|
|
|
strings = p->strings;
|
2015-09-22 00:47:50 +03:00
|
|
|
nsym = count_syms(p);
|
2012-08-27 05:09:26 +04:00
|
|
|
|
2015-10-16 05:51:56 +03:00
|
|
|
if (DL_FDPIC) {
|
2018-06-27 22:29:12 +03:00
|
|
|
size_t idx = (addr-(size_t)p->funcdescs)
|
2015-10-16 05:51:56 +03:00
|
|
|
/ sizeof(*p->funcdescs);
|
|
|
|
if (idx < nsym && (sym[idx].st_info&0xf) == STT_FUNC) {
|
2018-06-27 22:29:12 +03:00
|
|
|
best = (size_t)(p->funcdescs + idx);
|
2015-10-16 05:51:56 +03:00
|
|
|
bestsym = sym + idx;
|
2018-06-28 18:51:43 +03:00
|
|
|
besterr = 0;
|
2015-10-16 05:51:56 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!best) for (; nsym; nsym--, sym++) {
|
2013-01-16 20:47:35 +04:00
|
|
|
if (sym->st_value
|
2012-08-27 05:09:26 +04:00
|
|
|
&& (1<<(sym->st_info&0xf) & OK_TYPES)
|
|
|
|
&& (1<<(sym->st_info>>4) & OK_BINDS)) {
|
2018-06-27 22:29:12 +03:00
|
|
|
size_t symaddr = (size_t)laddr(p, sym->st_value);
|
2018-06-28 19:07:51 +03:00
|
|
|
if (symaddr > addr || symaddr <= best)
|
2012-08-27 05:09:26 +04:00
|
|
|
continue;
|
|
|
|
best = symaddr;
|
2015-10-16 05:51:56 +03:00
|
|
|
bestsym = sym;
|
2018-06-28 18:51:43 +03:00
|
|
|
besterr = addr - symaddr;
|
2012-08-27 05:09:26 +04:00
|
|
|
if (addr == symaddr)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-07-07 00:47:43 +03:00
|
|
|
if (best && besterr > bestsym->st_size-1) {
|
2018-06-28 18:51:43 +03:00
|
|
|
best = 0;
|
|
|
|
bestsym = 0;
|
|
|
|
}
|
|
|
|
|
2018-06-27 22:32:09 +03:00
|
|
|
info->dli_fname = p->name;
|
|
|
|
info->dli_fbase = p->map;
|
|
|
|
|
|
|
|
if (!best) {
|
|
|
|
info->dli_sname = 0;
|
|
|
|
info->dli_saddr = 0;
|
|
|
|
return 1;
|
|
|
|
}
|
2012-08-27 05:09:26 +04:00
|
|
|
|
2015-10-16 05:51:56 +03:00
|
|
|
if (DL_FDPIC && (bestsym->st_info&0xf) == STT_FUNC)
|
2018-06-27 22:29:12 +03:00
|
|
|
best = (size_t)(p->funcdescs + (bestsym - p->syms));
|
2015-10-16 05:51:56 +03:00
|
|
|
info->dli_sname = strings + bestsym->st_name;
|
2018-06-27 22:29:12 +03:00
|
|
|
info->dli_saddr = (void *)best;
|
2012-08-27 05:09:26 +04:00
|
|
|
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2018-09-05 19:43:34 +03:00
|
|
|
hidden void *__dlsym(void *restrict p, const char *restrict s, void *restrict ra)
|
2011-06-27 03:23:28 +04:00
|
|
|
{
|
|
|
|
void *res;
|
|
|
|
pthread_rwlock_rdlock(&lock);
|
2011-08-16 08:42:13 +04:00
|
|
|
res = do_dlsym(p, s, ra);
|
2011-06-27 03:23:28 +04:00
|
|
|
pthread_rwlock_unlock(&lock);
|
|
|
|
return res;
|
|
|
|
}
|
2012-11-01 05:27:48 +04:00
|
|
|
|
2019-08-09 22:26:23 +03:00
|
|
|
hidden void *__dlsym_redir_time64(void *restrict p, const char *restrict s, void *restrict ra)
|
|
|
|
{
|
|
|
|
#if _REDIR_TIME64
|
|
|
|
const char *suffix, *suffix2 = "";
|
|
|
|
char redir[36];
|
|
|
|
|
|
|
|
/* Map the symbol name to a time64 version of itself according to the
|
|
|
|
* pattern used for naming the redirected time64 symbols. */
|
|
|
|
size_t l = strnlen(s, sizeof redir);
|
|
|
|
if (l<4 || l==sizeof redir) goto no_redir;
|
|
|
|
if (s[l-2]=='_' && s[l-1]=='r') {
|
|
|
|
l -= 2;
|
|
|
|
suffix2 = s+l;
|
|
|
|
}
|
|
|
|
if (l<4) goto no_redir;
|
|
|
|
if (!strcmp(s+l-4, "time")) suffix = "64";
|
|
|
|
else suffix = "_time64";
|
|
|
|
|
|
|
|
/* Use the presence of the remapped symbol name in libc to determine
|
|
|
|
* whether it's one that requires time64 redirection; replace if so. */
|
|
|
|
snprintf(redir, sizeof redir, "__%.*s%s%s", (int)l, s, suffix, suffix2);
|
|
|
|
if (find_sym(&ldso, redir, 1).sym) s = redir;
|
|
|
|
no_redir:
|
|
|
|
#endif
|
|
|
|
return __dlsym(p, s, ra);
|
|
|
|
}
|
|
|
|
|
2012-11-01 05:27:48 +04:00
|
|
|
int dl_iterate_phdr(int(*callback)(struct dl_phdr_info *info, size_t size, void *data), void *data)
|
|
|
|
{
|
|
|
|
struct dso *current;
|
|
|
|
struct dl_phdr_info info;
|
|
|
|
int ret = 0;
|
|
|
|
for(current = head; current;) {
|
|
|
|
info.dlpi_addr = (uintptr_t)current->base;
|
|
|
|
info.dlpi_name = current->name;
|
|
|
|
info.dlpi_phdr = current->phdr;
|
|
|
|
info.dlpi_phnum = current->phnum;
|
|
|
|
info.dlpi_adds = gencnt;
|
|
|
|
info.dlpi_subs = 0;
|
|
|
|
info.dlpi_tls_modid = current->tls_id;
|
2015-11-12 23:50:26 +03:00
|
|
|
info.dlpi_tls_data = current->tls.image;
|
2012-11-01 05:27:48 +04:00
|
|
|
|
|
|
|
ret = (callback)(&info, sizeof (info), data);
|
|
|
|
|
|
|
|
if (ret != 0) break;
|
|
|
|
|
|
|
|
pthread_rwlock_rdlock(&lock);
|
|
|
|
current = current->next;
|
|
|
|
pthread_rwlock_unlock(&lock);
|
|
|
|
}
|
|
|
|
return ret;
|
|
|
|
}
|
2011-06-27 03:23:28 +04:00
|
|
|
|
2015-04-19 01:00:22 +03:00
|
|
|
static void error(const char *fmt, ...)
|
|
|
|
{
|
|
|
|
va_list ap;
|
|
|
|
va_start(ap, fmt);
|
|
|
|
if (!runtime) {
|
|
|
|
vdprintf(2, fmt, ap);
|
|
|
|
dprintf(2, "\n");
|
|
|
|
ldso_fail = 1;
|
|
|
|
va_end(ap);
|
|
|
|
return;
|
|
|
|
}
|
2016-01-26 01:56:00 +03:00
|
|
|
__dl_vseterr(fmt, ap);
|
2015-04-19 01:00:22 +03:00
|
|
|
va_end(ap);
|
|
|
|
}
|