From 69e8dca2e581b69499e9719543cb8e74e2d6063c Mon Sep 17 00:00:00 2001
From: skrll <skrll@NetBSD.org>
Date: Fri, 18 Jan 2019 11:59:03 +0000
Subject: [PATCH] Adapt
 https://svnweb.freebsd.org/base?view=revision&revision=342113 to NetBSD.

This brings us resolving for dynamically loaded libraries and makes
tests/libexec/ld.elf_so/t_thread_local_dtor pass.

With suggestions from joerg@
---
 libexec/ld.elf_so/arch/aarch64/mdreloc.c    | 109 ++++++-------
 libexec/ld.elf_so/arch/aarch64/rtld_start.S | 164 ++++++++++++++++----
 2 files changed, 181 insertions(+), 92 deletions(-)

diff --git a/libexec/ld.elf_so/arch/aarch64/mdreloc.c b/libexec/ld.elf_so/arch/aarch64/mdreloc.c
index 58f4dc834845..1118cf71c3a6 100644
--- a/libexec/ld.elf_so/arch/aarch64/mdreloc.c
+++ b/libexec/ld.elf_so/arch/aarch64/mdreloc.c
@@ -1,4 +1,4 @@
-/* $NetBSD: mdreloc.c,v 1.12 2018/11/23 11:26:05 skrll Exp $ */
+/* $NetBSD: mdreloc.c,v 1.13 2019/01/18 11:59:03 skrll Exp $ */
 
 /*-
  * Copyright (c) 2014 The NetBSD Foundation, Inc.
@@ -60,7 +60,7 @@
 
 #include <sys/cdefs.h>
 #ifndef lint
-__RCSID("$NetBSD: mdreloc.c,v 1.12 2018/11/23 11:26:05 skrll Exp $");
+__RCSID("$NetBSD: mdreloc.c,v 1.13 2019/01/18 11:59:03 skrll Exp $");
 #endif /* not lint */
 
 #include <sys/types.h>
@@ -70,17 +70,16 @@ __RCSID("$NetBSD: mdreloc.c,v 1.12 2018/11/23 11:26:05 skrll Exp $");
 #include "rtld.h"
 
 struct tls_data {
-	int64_t index;
-	Obj_Entry *obj;
-	const Elf_Rela *rela;
+	size_t		td_tlsindex;
+	Elf_Addr	td_tlsoffs;
 };
 
 void _rtld_bind_start(void);
 void _rtld_relocate_nonplt_self(Elf_Dyn *, Elf_Addr);
 Elf_Addr _rtld_bind(const Obj_Entry *, Elf_Word);
-void *_rtld_tlsdesc(void *);
+void *_rtld_tlsdesc_static(void *);
+void *_rtld_tlsdesc_undef(void *);
 void *_rtld_tlsdesc_dynamic(void *);
-int64_t _rtld_tlsdesc_handle(struct tls_data *, u_int);
 
 /*
  * AARCH64 PLT looks like this;
@@ -117,67 +116,63 @@ _rtld_setup_pltgot(const Obj_Entry *obj)
 }
 
 static struct tls_data *
-_rtld_tlsdesc_alloc(Obj_Entry *obj, const Elf_Rela *rela)
+_rtld_tlsdesc_alloc(size_t tlsindex, Elf_Addr offs)
 {
 	struct tls_data *tlsdesc;
 
 	tlsdesc = xmalloc(sizeof(*tlsdesc));
-	tlsdesc->index = -1;
-	tlsdesc->obj = obj;
-	tlsdesc->rela = rela;
+	tlsdesc->td_tlsindex = tlsindex;
+	tlsdesc->td_tlsoffs = offs;
 
 	return tlsdesc;
 }
 
-static int64_t
-_rtld_tlsdesc_handle_locked(struct tls_data *tlsdesc, u_int flags)
+static void
+_rtld_tlsdesc_fill(const Obj_Entry *obj, const Elf_Rela *rela, Elf_Addr *where, u_int flags)
 {
-	const Elf_Rela *rela;
 	const Elf_Sym *def;
 	const Obj_Entry *defobj;
-	Obj_Entry *obj;
+	Elf_Addr offs = 0;
+	unsigned long symnum = ELF_R_SYM(rela->r_info);
 
-	rela = tlsdesc->rela;
-	obj = tlsdesc->obj;
+	if (symnum != 0) {
+		def = _rtld_find_symdef(ELF_R_SYM(rela->r_info), obj, &defobj,
+		    flags);
+		if (def == NULL)
+			_rtld_die();
+		if (def == &_rtld_sym_zero) {
+			/* Weak undefined thread variable */
+			where[0] = (Elf_Addr)_rtld_tlsdesc_undef;
+			where[1] = rela->r_addend;
 
-	def = _rtld_find_symdef(ELF_R_SYM(rela->r_info), obj, &defobj, flags);
-	if (def == NULL)
-		_rtld_die();
+			rdbg(("TLSDESC %s (weak) in %s --> %p",
+			    obj->strtab + obj->symtab[symnum].st_name,
+			    obj->path, (void *)where[1]));
 
-	tlsdesc->index = defobj->tlsoffset + def->st_value + rela->r_addend +
-	    sizeof(struct tls_tcb);
-
-	return tlsdesc->index;
-}
-
-int64_t
-_rtld_tlsdesc_handle(struct tls_data *tlsdesc, u_int flags)
-{
-	sigset_t mask;
-
-	/* We have already found the index, return it */
-	if (tlsdesc->index >= 0)
-		return tlsdesc->index;
-
-	_rtld_exclusive_enter(&mask);
-	/* tlsdesc->index may have been set by another thread */
-	if (tlsdesc->index == -1)
-		_rtld_tlsdesc_handle_locked(tlsdesc, flags);
-	_rtld_exclusive_exit(&mask);
-
-	return tlsdesc->index;
-}
-
-static void
-_rtld_tlsdesc_fill(Obj_Entry *obj, const Elf_Rela *rela, Elf_Addr *where)
-{
-	if (ELF_R_SYM(rela->r_info) == 0) {
-		where[0] = (Elf_Addr)_rtld_tlsdesc;
-		where[1] = obj->tlsoffset + rela->r_addend +
-		    sizeof(struct tls_tcb);
+			return;
+		}
+		offs = def->st_value;
 	} else {
+		defobj = obj;
+	}
+	offs += rela->r_addend;
+
+	if (defobj->tls_done) {
+		/* Variable is in initialy allocated TLS segment */
+		where[0] = (Elf_Addr)_rtld_tlsdesc_static;
+		where[1] = defobj->tlsoffset + offs +
+		    sizeof(struct tls_tcb);
+
+		rdbg(("TLSDESC %s --> %p static",
+		    obj->path, (void *)where[1]));
+	} else {
+		/* TLS offset is unknown at load time, use dynamic resolving */
 		where[0] = (Elf_Addr)_rtld_tlsdesc_dynamic;
-		where[1] = (Elf_Addr)_rtld_tlsdesc_alloc(obj, rela);
+		where[1] = (Elf_Addr)_rtld_tlsdesc_alloc(defobj->tlsindex, offs);
+
+		rdbg(("TLSDESC %s in %s --> %p dynamic (%zu, %p)",
+		    obj->strtab + obj->symtab[symnum].st_name,
+		    obj->path, (void *)where[1], defobj->tlsindex, (void *)offs));
 	}
 }
 
@@ -276,7 +271,7 @@ _rtld_relocate_nonplt_objects(Obj_Entry *obj)
 			break;
 
 		case R_TYPE(TLSDESC):
-			_rtld_tlsdesc_fill(obj, rela, where);
+			_rtld_tlsdesc_fill(obj, rela, where, 0);
 			break;
 
 		case R_TLS_TYPE(TLS_DTPREL):
@@ -344,7 +339,7 @@ _rtld_relocate_plt_lazy(Obj_Entry *obj)
 			rdbg(("fixup !main in %s --> %p", obj->path, (void *)*where));
 			break;
 		case R_TYPE(TLSDESC):
-			_rtld_tlsdesc_fill(obj, rela, where);
+			_rtld_tlsdesc_fill(obj, rela, where, SYMLOOK_IN_PLT);
 			break;
 		}
 	}
@@ -408,11 +403,7 @@ _rtld_relocate_plt_object(const Obj_Entry *obj, const Elf_Rela *rela,
 			*tp = new_value;
 		break;
 	case R_TYPE(TLSDESC):
-		if (ELF_R_SYM(rela->r_info) != 0) {
-			struct tls_data *tlsdesc = (struct tls_data *)where[1];
-			if (tlsdesc->index == -1)
-				_rtld_tlsdesc_handle_locked(tlsdesc, SYMLOOK_IN_PLT);
-		}
+		_rtld_tlsdesc_fill(obj, rela, where, SYMLOOK_IN_PLT);
 		break;
 	}
 
diff --git a/libexec/ld.elf_so/arch/aarch64/rtld_start.S b/libexec/ld.elf_so/arch/aarch64/rtld_start.S
index b361809a3a7d..319d80feb479 100644
--- a/libexec/ld.elf_so/arch/aarch64/rtld_start.S
+++ b/libexec/ld.elf_so/arch/aarch64/rtld_start.S
@@ -1,4 +1,4 @@
-/* $NetBSD: rtld_start.S,v 1.3 2018/09/20 18:41:05 jakllsch Exp $ */
+/* $NetBSD: rtld_start.S,v 1.4 2019/01/18 11:59:03 skrll Exp $ */
 
 /*-
  * Copyright (c) 2014 The NetBSD Foundation, Inc.
@@ -60,7 +60,7 @@
 
 #include <machine/asm.h>
 
-RCSID("$NetBSD: rtld_start.S,v 1.3 2018/09/20 18:41:05 jakllsch Exp $")
+RCSID("$NetBSD: rtld_start.S,v 1.4 2019/01/18 11:59:03 skrll Exp $")
 
 /*
  * void _rtld_start(void (*cleanup)(void), const Obj_Entry *obj,
@@ -145,47 +145,145 @@ ENTRY_NP(_rtld_bind_start)
 	br	x17			/* call bound function */
 END(_rtld_bind_start)
 
-
-ENTRY(_rtld_tlsdesc)
+/*
+ * struct rel_tlsdesc {
+ *  uint64_t resolver_fnc;
+ *  uint64_t resolver_arg;
+ *
+ *
+ * uint64_t _rtld_tlsdesc_static(struct rel_tlsdesc *);
+ *
+ * Resolver function for TLS symbols resolved at load time
+ */
+ENTRY(_rtld_tlsdesc_static)
+	.cfi_startproc
 	ldr	x0, [x0, #8]
 	ret
-END(_rtld_tlsdesc)
+	.cfi_endproc
+END(_rtld_tlsdesc_static)
 
 /*
- * uint64_t _rtld_tlsdesc_dynamic(struct tlsdesc *);
+ * uint64_t _rtld_tlsdesc_undef(void);
  *
- * TODO: We could lookup the saved index here to skip saving the entire stack.
+ * Resolver function for weak and undefined TLS symbols
+ */
+ENTRY(_rtld_tlsdesc_undef)
+	.cfi_startproc
+	str	x1, [sp, #-16]!
+	.cfi_adjust_cfa_offset	16
+
+	mrs	x1, tpidr_el0
+	ldr	x0, [x0, #8]
+	sub	x0, x0, x1
+
+	ldr	x1, [sp], #16
+	.cfi_adjust_cfa_offset 	-16
+	.cfi_endproc
+	ret
+END(_rtld_tlsdesc_undef)
+
+/*
+ * uint64_t _rtld_tlsdesc_dynamic(struct rel_tlsdesc *);
+ *
+ * Resolver function for TLS symbols from dlopen()
  */
 ENTRY(_rtld_tlsdesc_dynamic)
-	/* Store any registers we may use in rtld_tlsdesc_handle */
-	stp	x29, x30, [sp, #-(10 * 16)]!
-	mov	x29, sp
-	stp	x1, x2,   [sp, #(1 * 16)]
-	stp	x3, x4,   [sp, #(2 * 16)]
-	stp	x5, x6,   [sp, #(3 * 16)]
-	stp	x7, x8,   [sp, #(4 * 16)]
-	stp	x9, x10,  [sp, #(5 * 16)]
-	stp	x11, x12, [sp, #(6 * 16)]
-	stp	x13, x14, [sp, #(7 * 16)]
-	stp	x15, x16, [sp, #(8 * 16)]
-	stp	x17, x18, [sp, #(9 * 16)]
+	.cfi_startproc
+
+	/* Save registers used in fast path */
+	stp	x1,  x2, [sp, #(-2 * 16)]!
+	stp	x3,  x4, [sp, #(1 * 16)]
+	.cfi_adjust_cfa_offset	2 * 16
+	.cfi_rel_offset		x1, 0
+	.cfi_rel_offset		x2, 8
+	.cfi_rel_offset		x3, 16
+	.cfi_rel_offset		x4, 24
+
+	/* Test fastpath - inlined version of __tls_get_addr. */
+
+	ldr	x1, [x0, #8]		/* tlsdesc ptr */
+	mrs	x4, tpidr_el0
+	ldr	x0, [x4]		/* DTV pointer (tcb->tcb_dtv) */
+
+	ldr	x3, [x0, #-8]		/* DTV_MAX_INDEX(dtv) */
+	ldr	x2, [x1, #0]		/* tlsdesc->td_tlsindex */
+	cmp	x2, x3
+	b.lt	1f			/* Slow path */
+
+	ldr     x3, [x0, x2, lsl #3]	/* dtv[tlsdesc->td_tlsindex] */
+	cbz	x3, 1f
+
+	/* Return (dtv[tlsdesc->td_tlsindex] + tlsdesc->td_tlsoffs - tp) */
+	ldr	x2, [x1, #8]		/* tlsdesc->td_tlsoffs */
+	add 	x2, x2, x3
+	sub	x0, x2, x4
+
+	/* Restore registers and return */
+	ldp	 x3,  x4, [sp, #(1 * 16)]
+	ldp	 x1,  x2, [sp], #(2 * 16)
+	.cfi_adjust_cfa_offset 	-2 * 16
+	ret
+
+	/*
+	 * Slow path
+	 * return _rtld_tls_get_addr(tp, tlsdesc->td_tlsindex, tlsdesc->td_tlsoffs);
+	 *
+	 */
+1:
+	/* Save all interger registers */
+	stp	x29, x30, [sp, #-(8 * 16)]!
+	.cfi_adjust_cfa_offset	8 * 16
+	.cfi_rel_offset		x29, 0
+	.cfi_rel_offset		x30, 8
+
+	stp	x5,   x6, [sp, #(1 * 16)]
+	stp	x7,   x8, [sp, #(2 * 16)]
+	stp	x9,  x10, [sp, #(3 * 16)]
+	stp	x11, x12, [sp, #(4 * 16)]
+	stp	x13, x14, [sp, #(5 * 16)]
+	stp	x15, x16, [sp, #(6 * 16)]
+	stp	x17, x18, [sp, #(7 * 16)]
+	.cfi_rel_offset		 x5, 16
+	.cfi_rel_offset		 x6, 24
+	.cfi_rel_offset		 x7, 32
+	.cfi_rel_offset		 x8, 40
+	.cfi_rel_offset		 x9, 48
+	.cfi_rel_offset		x10, 56
+	.cfi_rel_offset		x11, 64
+	.cfi_rel_offset		x12, 72
+	.cfi_rel_offset		x13, 80
+	.cfi_rel_offset		x14, 88
+	.cfi_rel_offset		x15, 96
+	.cfi_rel_offset		x16, 104
+	.cfi_rel_offset		x17, 112
+	.cfi_rel_offset		x18, 120
 
 	/* Find the tls offset */
-	ldr	x0, [x0, #8]
-	mov	x1, #1
-	bl	_rtld_tlsdesc_handle
+	mov	x0, x4			/* tp */
+	mov	x3, x1			/* tlsdesc ptr */
+	ldr	x1, [x3, #0]		/* tlsdesc->td_tlsindex */
+	ldr	x2, [x3, #8]		/* tlsdesc->td_tlsoffs */
+	bl	_rtld_tls_get_addr
+	mrs	x1, tpidr_el0
+	sub	x0, x0, x1
 
-	/* Restore the registers */
-	ldp	x17, x18, [sp, #(9 * 16)]
-	ldp	x15, x16, [sp, #(8 * 16)]
-	ldp	x13, x14, [sp, #(7 * 16)]
-	ldp	x11, x12, [sp, #(6 * 16)]
-	ldp	x9, x10,  [sp, #(5 * 16)]
-	ldp	x7, x8,   [sp, #(4 * 16)]
-	ldp	x5, x6,   [sp, #(3 * 16)]
-	ldp	x3, x4,   [sp, #(2 * 16)]
-	ldp	x1, x2,   [sp, #(1 * 16)]
-	ldp	x29, x30, [sp], #(10 * 16)
+	/* Restore slow path registers */
+	ldp	x17, x18, [sp, #(7 * 16)]
+	ldp	x15, x16, [sp, #(6 * 16)]
+	ldp	x13, x14, [sp, #(5 * 16)]
+	ldp	x11, x12, [sp, #(4 * 16)]
+	ldp	x9, x10,  [sp, #(3 * 16)]
+	ldp	x7, x8,   [sp, #(2 * 16)]
+	ldp	x5, x6,   [sp, #(1 * 16)]
+	ldp	x29, x30, [sp], #(8 * 16)
+	.cfi_adjust_cfa_offset 	-8 * 16
+	.cfi_restore		x29
+	.cfi_restore		x30
 
+	/* Restore fast path registers and return */
+	ldp	 x3,  x4, [sp, #16]
+	ldp	 x1,  x2, [sp], #(2 * 16)
+	.cfi_adjust_cfa_offset	-2 * 16
+	.cfi_endproc
 	ret
 END(_rtld_tlsdesc_dynamic)