unify static and dynamic linked implementations of thread-local storage

this both allows removal of some of the main remaining uses of the
SHARED macro and clears one obstacle to static-linked dlopen support,
which may be added at some point in the future.

specialized single-TLS-module versions of __copy_tls and __reset_tls
are removed and replaced with code adapted from their dynamic-linked
versions, capable of operating on a whole chain of TLS modules, and
use of the dynamic linker's DSO chain (which contains large struct dso
objects) by these functions is replaced with a new chain of struct
tls_module objects containing only the information needed for
implementing TLS. this may also yield some performance benefit
initializing TLS for a new thread when a large number of modules
without TLS have been loaded, since since there is no need to walk
structures for modules without TLS.
This commit is contained in:
Rich Felker 2015-11-12 15:50:26 -05:00
parent ad1cd43a86
commit d56460c939
4 changed files with 111 additions and 132 deletions

71
src/env/__init_tls.c vendored
View File

@ -8,9 +8,6 @@
#include "atomic.h"
#include "syscall.h"
#ifndef SHARED
static
#endif
int __init_tp(void *p)
{
pthread_t td = p;
@ -24,8 +21,6 @@ int __init_tp(void *p)
return 0;
}
#ifndef SHARED
static struct builtin_tls {
char c;
struct pthread pt;
@ -33,33 +28,40 @@ static struct builtin_tls {
} builtin_tls[1];
#define MIN_TLS_ALIGN offsetof(struct builtin_tls, pt)
struct tls_image {
void *image;
size_t len, size, align;
} __static_tls;
#define T __static_tls
static struct tls_module main_tls;
void *__copy_tls(unsigned char *mem)
{
pthread_t td;
if (!T.image) return mem;
void **dtv = (void *)mem;
dtv[0] = (void *)1;
struct tls_module *p;
size_t i;
void **dtv;
#ifdef TLS_ABOVE_TP
mem += sizeof(void *) * 2;
mem += -((uintptr_t)mem + sizeof(struct pthread)) & (T.align-1);
dtv = (void **)(mem + libc.tls_size) - (libc.tls_cnt + 1);
mem += -((uintptr_t)mem + sizeof(struct pthread)) & (libc.tls_align-1);
td = (pthread_t)mem;
mem += sizeof(struct pthread);
for (i=1, p=libc.tls_head; p; i++, p=p->next) {
dtv[i] = mem + p->offset;
memcpy(dtv[i], p->image, p->len);
}
#else
dtv = (void **)mem;
mem += libc.tls_size - sizeof(struct pthread);
mem -= (uintptr_t)mem & (T.align-1);
mem -= (uintptr_t)mem & (libc.tls_align-1);
td = (pthread_t)mem;
mem -= T.size;
for (i=1, p=libc.tls_head; p; i++, p=p->next) {
dtv[i] = mem - p->offset;
memcpy(dtv[i], p->image, p->len);
}
#endif
dtv[0] = (void *)libc.tls_cnt;
td->dtv = td->dtv_copy = dtv;
dtv[1] = mem;
memcpy(mem, T.image, T.len);
return td;
}
@ -69,7 +71,7 @@ typedef Elf32_Phdr Phdr;
typedef Elf64_Phdr Phdr;
#endif
void __init_tls(size_t *aux)
static void static_init_tls(size_t *aux)
{
unsigned char *p;
size_t n;
@ -86,16 +88,24 @@ void __init_tls(size_t *aux)
}
if (tls_phdr) {
T.image = (void *)(base + tls_phdr->p_vaddr);
T.len = tls_phdr->p_filesz;
T.size = tls_phdr->p_memsz;
T.align = tls_phdr->p_align;
main_tls.image = (void *)(base + tls_phdr->p_vaddr);
main_tls.len = tls_phdr->p_filesz;
main_tls.size = tls_phdr->p_memsz;
main_tls.align = tls_phdr->p_align;
libc.tls_cnt = 1;
libc.tls_head = &main_tls;
}
T.size += (-T.size - (uintptr_t)T.image) & (T.align-1);
if (T.align < MIN_TLS_ALIGN) T.align = MIN_TLS_ALIGN;
main_tls.size += (-main_tls.size - (uintptr_t)main_tls.image)
& (main_tls.align-1);
if (main_tls.align < MIN_TLS_ALIGN) main_tls.align = MIN_TLS_ALIGN;
#ifndef TLS_ABOVE_TP
main_tls.offset = main_tls.size;
#endif
libc.tls_size = 2*sizeof(void *)+T.size+T.align+sizeof(struct pthread)
libc.tls_align = main_tls.align;
libc.tls_size = 2*sizeof(void *) + sizeof(struct pthread)
+ main_tls.size + main_tls.align
+ MIN_TLS_ALIGN-1 & -MIN_TLS_ALIGN;
if (libc.tls_size > sizeof builtin_tls) {
@ -117,6 +127,5 @@ void __init_tls(size_t *aux)
if (__init_tp(__copy_tls(mem)) < 0)
a_crash();
}
#else
void __init_tls(size_t *auxv) { }
#endif
weak_alias(static_init_tls, __init_tls);

23
src/env/__reset_tls.c vendored
View File

@ -1,21 +1,16 @@
#ifndef SHARED
#include <string.h>
#include "pthread_impl.h"
extern struct tls_image {
void *image;
size_t len, size, align;
} __static_tls;
#define T __static_tls
#include "libc.h"
void __reset_tls()
{
if (!T.size) return;
pthread_t self = __pthread_self();
memcpy(self->dtv[1], T.image, T.len);
memset((char *)self->dtv[1]+T.len, 0, T.size-T.len);
struct tls_module *p;
size_t i, n = (size_t)self->dtv[0];
if (n) for (p=libc.tls_head, i=1; i<=n; i++, p=p->next) {
if (!self->dtv[i]) continue;
memcpy(self->dtv[i], p->image, p->len);
memset((char *)self->dtv[i]+p->len, 0,
p->size - p->len);
}
}
#endif

View File

@ -11,13 +11,20 @@ struct __locale_struct {
const struct __locale_map *volatile cat[6];
};
struct tls_module {
struct tls_module *next;
void *image;
size_t len, size, align, offset;
};
struct __libc {
int can_do_threads;
int threaded;
int secure;
volatile int threads_minus_1;
size_t *auxv;
size_t tls_size;
struct tls_module *tls_head;
size_t tls_size, tls_align, tls_cnt;
size_t page_size;
struct __locale_struct global_locale;
};

View File

@ -70,8 +70,8 @@ struct dso {
char kernel_mapped;
struct dso **deps, *needed_by;
char *rpath_orig, *rpath;
void *tls_image;
size_t tls_len, tls_size, tls_align, tls_id, tls_offset;
struct tls_module tls;
size_t tls_id;
size_t relro_start, relro_end;
void **new_dtv;
unsigned char *new_tls;
@ -99,6 +99,7 @@ struct symdef {
int __init_tp(void *);
void __init_libc(char **, char *);
void *__copy_tls(unsigned char *);
const char *__libc_get_version(void);
@ -123,6 +124,7 @@ static int noload;
static jmp_buf *rtld_fail;
static pthread_rwlock_t lock;
static struct debug debug;
static struct tls_module *tls_tail;
static size_t tls_cnt, tls_offset, tls_align = MIN_TLS_ALIGN;
static size_t static_tls_cnt;
static pthread_mutex_t init_fini_lock = { ._m_type = PTHREAD_MUTEX_RECURSIVE };
@ -397,14 +399,14 @@ static void do_relocs(struct dso *dso, size_t *rel, size_t rel_size, size_t stri
break;
#ifdef TLS_ABOVE_TP
case REL_TPOFF:
*reloc_addr = tls_val + def.dso->tls_offset + TPOFF_K + addend;
*reloc_addr = tls_val + def.dso->tls.offset + TPOFF_K + addend;
break;
#else
case REL_TPOFF:
*reloc_addr = tls_val - def.dso->tls_offset + addend;
*reloc_addr = tls_val - def.dso->tls.offset + addend;
break;
case REL_TPOFF_NEG:
*reloc_addr = def.dso->tls_offset - tls_val + addend;
*reloc_addr = def.dso->tls.offset - tls_val + addend;
break;
#endif
case REL_TLSDESC:
@ -426,10 +428,10 @@ static void do_relocs(struct dso *dso, size_t *rel, size_t rel_size, size_t stri
} else {
reloc_addr[0] = (size_t)__tlsdesc_static;
#ifdef TLS_ABOVE_TP
reloc_addr[1] = tls_val + def.dso->tls_offset
reloc_addr[1] = tls_val + def.dso->tls.offset
+ TPOFF_K + addend;
#else
reloc_addr[1] = tls_val - def.dso->tls_offset
reloc_addr[1] = tls_val - def.dso->tls.offset
+ addend;
#endif
}
@ -567,9 +569,9 @@ static void *map_library(int fd, struct dso *dso)
dyn = ph->p_vaddr;
} else if (ph->p_type == PT_TLS) {
tls_image = ph->p_vaddr;
dso->tls_align = ph->p_align;
dso->tls_len = ph->p_filesz;
dso->tls_size = ph->p_memsz;
dso->tls.align = ph->p_align;
dso->tls.len = ph->p_filesz;
dso->tls.size = ph->p_memsz;
} else if (ph->p_type == PT_GNU_RELRO) {
dso->relro_start = ph->p_vaddr & -PAGE_SIZE;
dso->relro_end = (ph->p_vaddr + ph->p_memsz) & -PAGE_SIZE;
@ -694,7 +696,7 @@ static void *map_library(int fd, struct dso *dso)
done_mapping:
dso->base = base;
dso->dynv = laddr(dso, dyn);
if (dso->tls_size) dso->tls_image = laddr(dso, tls_image);
if (dso->tls.size) dso->tls.image = laddr(dso, tls_image);
if (!runtime) reclaim_gaps(dso);
free(allocated_buf);
return map;
@ -1011,8 +1013,8 @@ static struct dso *load_library(const char *name, struct dso *needed_by)
* extended DTV capable of storing an additional slot for
* the newly-loaded DSO. */
alloc_size = sizeof *p + strlen(pathname) + 1;
if (runtime && temp_dso.tls_image) {
size_t per_th = temp_dso.tls_size + temp_dso.tls_align
if (runtime && temp_dso.tls.image) {
size_t per_th = temp_dso.tls.size + temp_dso.tls.align
+ sizeof(void *) * (tls_cnt+3);
n_th = libc.threads_minus_1 + 1;
if (n_th > SSIZE_MAX / per_th) alloc_size = SIZE_MAX;
@ -1033,22 +1035,25 @@ static struct dso *load_library(const char *name, struct dso *needed_by)
strcpy(p->name, pathname);
/* Add a shortname only if name arg was not an explicit pathname. */
if (pathname != name) p->shortname = strrchr(p->name, '/')+1;
if (p->tls_image) {
if (p->tls.image) {
p->tls_id = ++tls_cnt;
tls_align = MAXP2(tls_align, p->tls_align);
tls_align = MAXP2(tls_align, p->tls.align);
#ifdef TLS_ABOVE_TP
p->tls_offset = tls_offset + ( (tls_align-1) &
-(tls_offset + (uintptr_t)p->tls_image) );
tls_offset += p->tls_size;
p->tls.offset = tls_offset + ( (tls_align-1) &
-(tls_offset + (uintptr_t)p->tls.image) );
tls_offset += p->tls.size;
#else
tls_offset += p->tls_size + p->tls_align - 1;
tls_offset -= (tls_offset + (uintptr_t)p->tls_image)
& (p->tls_align-1);
p->tls_offset = tls_offset;
tls_offset += p->tls.size + p->tls.align - 1;
tls_offset -= (tls_offset + (uintptr_t)p->tls.image)
& (p->tls.align-1);
p->tls.offset = tls_offset;
#endif
p->new_dtv = (void *)(-sizeof(size_t) &
(uintptr_t)(p->name+strlen(p->name)+sizeof(size_t)));
p->new_tls = (void *)(p->new_dtv + n_th*(tls_cnt+1));
if (tls_tail) tls_tail->next = &p->tls;
else libc.tls_head = &p->tls;
tls_tail = &p->tls;
}
tail->next = p;
@ -1238,53 +1243,8 @@ static void dl_debug_state(void)
weak_alias(dl_debug_state, _dl_debug_state);
void __reset_tls()
void __init_tls(size_t *auxv)
{
pthread_t self = __pthread_self();
struct dso *p;
for (p=head; p; p=p->next) {
if (!p->tls_id || !self->dtv[p->tls_id]) continue;
memcpy(self->dtv[p->tls_id], p->tls_image, p->tls_len);
memset((char *)self->dtv[p->tls_id]+p->tls_len, 0,
p->tls_size - p->tls_len);
if (p->tls_id == (size_t)self->dtv[0]) break;
}
}
void *__copy_tls(unsigned char *mem)
{
pthread_t td;
struct dso *p;
void **dtv;
#ifdef TLS_ABOVE_TP
dtv = (void **)(mem + libc.tls_size) - (tls_cnt + 1);
mem += -((uintptr_t)mem + sizeof(struct pthread)) & (tls_align-1);
td = (pthread_t)mem;
mem += sizeof(struct pthread);
for (p=head; p; p=p->next) {
if (!p->tls_id) continue;
dtv[p->tls_id] = mem + p->tls_offset;
memcpy(dtv[p->tls_id], p->tls_image, p->tls_len);
}
#else
dtv = (void **)mem;
mem += libc.tls_size - sizeof(struct pthread);
mem -= (uintptr_t)mem & (tls_align-1);
td = (pthread_t)mem;
for (p=head; p; p=p->next) {
if (!p->tls_id) continue;
dtv[p->tls_id] = mem - p->tls_offset;
memcpy(dtv[p->tls_id], p->tls_image, p->tls_len);
}
#endif
dtv[0] = (void *)tls_cnt;
td->dtv = td->dtv_copy = dtv;
return td;
}
__attribute__((__visibility__("hidden")))
@ -1321,12 +1281,12 @@ void *__tls_get_new(size_t *v)
unsigned char *mem;
for (p=head; ; p=p->next) {
if (!p->tls_id || self->dtv[p->tls_id]) continue;
mem = p->new_tls + (p->tls_size + p->tls_align)
mem = p->new_tls + (p->tls.size + p->tls.align)
* a_fetch_add(&p->new_tls_idx,1);
mem += ((uintptr_t)p->tls_image - (uintptr_t)mem)
& (p->tls_align-1);
mem += ((uintptr_t)p->tls.image - (uintptr_t)mem)
& (p->tls.align-1);
self->dtv[p->tls_id] = mem;
memcpy(mem, p->tls_image, p->tls_len);
memcpy(mem, p->tls.image, p->tls.len);
if (p->tls_id == v[0]) break;
}
__restore_sigs(&set);
@ -1335,6 +1295,8 @@ void *__tls_get_new(size_t *v)
static void update_tls_size()
{
libc.tls_cnt = tls_cnt;
libc.tls_align = tls_align;
libc.tls_size = ALIGN(
(1+tls_cnt) * sizeof(void *) +
tls_offset +
@ -1445,6 +1407,7 @@ _Noreturn void __dls3(size_t *sp)
* use during dynamic linking. If possible it will also serve as the
* thread pointer at runtime. */
libc.tls_size = sizeof builtin_tls;
libc.tls_align = tls_align;
if (__init_tp(__copy_tls((void *)builtin_tls)) < 0) {
a_crash();
}
@ -1472,13 +1435,13 @@ _Noreturn void __dls3(size_t *sp)
interp_off = (size_t)phdr->p_vaddr;
else if (phdr->p_type == PT_TLS) {
tls_image = phdr->p_vaddr;
app.tls_len = phdr->p_filesz;
app.tls_size = phdr->p_memsz;
app.tls_align = phdr->p_align;
app.tls.len = phdr->p_filesz;
app.tls.size = phdr->p_memsz;
app.tls.align = phdr->p_align;
}
}
if (DL_FDPIC) app.loadmap = app_loadmap;
if (app.tls_size) app.tls_image = laddr(&app, tls_image);
if (app.tls.size) app.tls.image = laddr(&app, tls_image);
if (interp_off) ldso.name = laddr(&app, interp_off);
if ((aux[0] & (1UL<<AT_EXECFN))
&& strncmp((char *)aux[AT_EXECFN], "/proc/", 6))
@ -1547,19 +1510,20 @@ _Noreturn void __dls3(size_t *sp)
dprintf(1, "\t%s (%p)\n", ldso.name, ldso.base);
}
}
if (app.tls_size) {
if (app.tls.size) {
libc.tls_head = &app.tls;
app.tls_id = tls_cnt = 1;
#ifdef TLS_ABOVE_TP
app.tls_offset = 0;
tls_offset = app.tls_size
+ ( -((uintptr_t)app.tls_image + app.tls_size)
& (app.tls_align-1) );
app.tls.offset = 0;
tls_offset = app.tls.size
+ ( -((uintptr_t)app.tls.image + app.tls.size)
& (app.tls.align-1) );
#else
tls_offset = app.tls_offset = app.tls_size
+ ( -((uintptr_t)app.tls_image + app.tls_size)
& (app.tls_align-1) );
tls_offset = app.tls.offset = app.tls.size
+ ( -((uintptr_t)app.tls.image + app.tls.size)
& (app.tls.align-1) );
#endif
tls_align = MAXP2(tls_align, app.tls_align);
tls_align = MAXP2(tls_align, app.tls.align);
}
app.global = 1;
decode_dyn(&app);
@ -1668,6 +1632,7 @@ _Noreturn void __dls3(size_t *sp)
void *dlopen(const char *file, int mode)
{
struct dso *volatile p, *orig_tail, *next;
struct tls_module *orig_tls_tail;
size_t orig_tls_cnt, orig_tls_offset, orig_tls_align;
size_t i;
int cs;
@ -1680,6 +1645,7 @@ void *dlopen(const char *file, int mode)
__inhibit_ptc();
p = 0;
orig_tls_tail = tls_tail;
orig_tls_cnt = tls_cnt;
orig_tls_offset = tls_offset;
orig_tls_align = tls_align;
@ -1706,6 +1672,8 @@ void *dlopen(const char *file, int mode)
unmap_library(p);
free(p);
}
if (!orig_tls_tail) libc.tls_head = 0;
tls_tail = orig_tls_tail;
tls_cnt = orig_tls_cnt;
tls_offset = orig_tls_offset;
tls_align = orig_tls_align;
@ -1922,7 +1890,7 @@ int dl_iterate_phdr(int(*callback)(struct dl_phdr_info *info, size_t size, void
info.dlpi_adds = gencnt;
info.dlpi_subs = 0;
info.dlpi_tls_modid = current->tls_id;
info.dlpi_tls_data = current->tls_image;
info.dlpi_tls_data = current->tls.image;
ret = (callback)(&info, sizeof (info), data);