rework arm atomic/tp backends to be thumb-compatible and fdpic-ready

three problems are addressed:

- use of pc arithmetic, which was difficult if not impossible to make
  correct in thumb mode on all models, so that relative rather than
  absolute pointers to the backends could be used. this was designed
  back when there was no coherent model for the early stages of the
  dynamic linker before relocations, and is no longer necessary.

- assumption that data (the relative pointers to the backends) can be
  accessed at a constant displacement from the code. this will not be
  possible on future fdpic subarchs (for cortex-m), so move
  responsibility for loading the backend code address to the caller.

- hard-coded arm opcodes using the .word directive. instead, use the
  .arch directive to work around the assembler's refusal to assemble
  instructions not available (or in some cases, available but just
  considered deprecated) in the target isa level. the obscure v6t2
  arch is used for v6 code so as to (1) allow generation of thumb2
  output if -mthumb is active, and (2) avoid warnings/errors for mcr
  barriers that clang would produce if we just set arch to v7-a.

in addition, the __aeabi_read_tp function is moved out of the inner
workings and implemented as an asm wrapper around a C function, so
that asm code does not need to read global data. the asm wrapper
serves to satisfy the ABI calling convention requirements for this
function.
This commit is contained in:
Rich Felker 2016-12-18 19:38:53 -05:00
parent 9067a3006e
commit 29237f7f5c
6 changed files with 92 additions and 70 deletions

View File

@ -1,5 +1,11 @@
__attribute__((__visibility__("hidden")))
extern const void *__arm_atomics[3]; /* gettp, cas, barrier */
#if __ARM_ARCH_4__ || __ARM_ARCH_4T__ || __ARM_ARCH == 4
#define BLX "mov lr,pc\n\tbx"
#else
#define BLX "blx"
#endif
extern uintptr_t __attribute__((__visibility__("hidden")))
__a_cas_ptr, __a_barrier_ptr;
#if ((__ARM_ARCH_6__ || __ARM_ARCH_6K__ || __ARM_ARCH_6ZK__) && !__thumb__) \
|| __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH >= 7
@ -42,11 +48,12 @@ static inline int a_cas(volatile int *p, int t, int s)
register int r0 __asm__("r0") = t;
register int r1 __asm__("r1") = s;
register volatile int *r2 __asm__("r2") = p;
register uintptr_t r3 __asm__("r3") = __a_cas_ptr;
int old;
__asm__ __volatile__ (
"bl __a_cas"
: "+r"(r0) : "r"(r1), "r"(r2)
: "memory", "r3", "lr", "ip", "cc" );
BLX " r3"
: "+r"(r0), "+r"(r3) : "r"(r1), "r"(r2)
: "memory", "lr", "ip", "cc" );
if (!r0) return t;
if ((old=*p)!=t) return old;
}
@ -58,8 +65,8 @@ static inline int a_cas(volatile int *p, int t, int s)
#define a_barrier a_barrier
static inline void a_barrier()
{
__asm__ __volatile__("bl __a_barrier"
: : : "memory", "cc", "ip", "lr" );
register uintptr_t ip __asm__("ip") = __a_barrier_ptr;
__asm__ __volatile__( BLX " ip" : "+r"(ip) : : "memory", "cc", "lr" );
}
#endif

View File

@ -10,15 +10,17 @@ static inline pthread_t __pthread_self()
#else
#if __ARM_ARCH_4__ || __ARM_ARCH_4T__ || __ARM_ARCH == 4
#define BLX "mov lr,pc\n\tbx"
#else
#define BLX "blx"
#endif
static inline pthread_t __pthread_self()
{
#ifdef __clang__
char *p;
__asm__ __volatile__ ( "bl __a_gettp\n\tmov %0,r0" : "=r"(p) : : "cc", "r0", "lr" );
#else
register char *p __asm__("r0");
__asm__ __volatile__ ( "bl __a_gettp" : "=r"(p) : : "cc", "lr" );
#endif
extern uintptr_t __attribute__((__visibility__("hidden"))) __a_gettp_ptr;
register uintptr_t p __asm__("r0");
__asm__ __volatile__ ( BLX " %1" : "=r"(p) : "r"(__a_gettp_ptr) : "cc", "lr" );
return (void *)(p+8-sizeof(struct pthread));
}

View File

@ -0,0 +1,8 @@
.syntax unified
.global __aeabi_read_tp
.type __aeabi_read_tp,%function
__aeabi_read_tp:
push {r1,r2,r3,lr}
bl __aeabi_read_tp_c
pop {r1,r2,r3,lr}
bx lr

View File

@ -0,0 +1,8 @@
#include "pthread_impl.h"
#include <stdint.h>
__attribute__((__visibility__("hidden")))
void *__aeabi_read_tp_c(void)
{
return (void *)((uintptr_t)__pthread_self()-8+sizeof(struct pthread));
}

View File

@ -6,43 +6,47 @@
#define HWCAP_TLS (1 << 15)
extern const unsigned char __attribute__((__visibility__("hidden")))
__a_barrier_dummy[], __a_barrier_oldkuser[],
__a_barrier_v6[], __a_barrier_v7[],
__a_cas_dummy[], __a_cas_v6[], __a_cas_v7[],
__a_gettp_dummy[];
__a_barrier_oldkuser[], __a_barrier_v6[], __a_barrier_v7[],
__a_cas_v6[], __a_cas_v7[],
__a_gettp_cp15[];
#define __a_barrier_kuser 0xffff0fa0
#define __a_barrier_oldkuser (uintptr_t)__a_barrier_oldkuser
#define __a_barrier_v6 (uintptr_t)__a_barrier_v6
#define __a_barrier_v7 (uintptr_t)__a_barrier_v7
#define __a_cas_kuser 0xffff0fc0
#define __a_cas_v6 (uintptr_t)__a_cas_v6
#define __a_cas_v7 (uintptr_t)__a_cas_v7
#define __a_gettp_kuser 0xffff0fe0
#define __a_gettp_cp15 (uintptr_t)__a_gettp_cp15
extern uintptr_t __attribute__((__visibility__("hidden")))
__a_barrier_ptr, __a_cas_ptr, __a_gettp_ptr;
#define SET(op,ver) (__a_##op##_ptr = \
(uintptr_t)__a_##op##_##ver - (uintptr_t)__a_##op##_dummy)
int __set_thread_area(void *p)
{
#if !__ARM_ARCH_7A__ && !__ARM_ARCH_7R__ && __ARM_ARCH < 7
if (__hwcap & HWCAP_TLS) {
size_t *aux;
SET(cas, v7);
SET(barrier, v7);
__a_cas_ptr = __a_cas_v7;
__a_barrier_ptr = __a_barrier_v7;
for (aux=libc.auxv; *aux; aux+=2) {
if (*aux != AT_PLATFORM) continue;
const char *s = (void *)aux[1];
if (s[0]!='v' || s[1]!='6' || s[2]-'0'<10u) break;
SET(cas, v6);
SET(barrier, v6);
__a_cas_ptr = __a_cas_v6;
__a_barrier_ptr = __a_barrier_v6;
break;
}
} else {
int ver = *(int *)0xffff0ffc;
SET(gettp, kuser);
SET(cas, kuser);
SET(barrier, kuser);
__a_gettp_ptr = __a_gettp_kuser;
__a_cas_ptr = __a_cas_kuser;
__a_barrier_ptr = __a_barrier_kuser;
if (ver < 2) a_crash();
if (ver < 3) SET(barrier, oldkuser);
if (ver < 3) __a_barrier_ptr = __a_barrier_oldkuser;
}
#endif
return __syscall(0xf0005, p);

View File

@ -1,20 +1,15 @@
.syntax unified
.text
.global __a_barrier
.hidden __a_barrier
.type __a_barrier,%function
__a_barrier:
ldr ip,1f
ldr ip,[pc,ip]
add pc,pc,ip
1: .word __a_barrier_ptr-1b
.global __a_barrier_dummy
.hidden __a_barrier_dummy
.type __a_barrier_dummy,%function
__a_barrier_dummy:
bx lr
.global __a_barrier_oldkuser
.hidden __a_barrier_oldkuser
.type __a_barrier_oldkuser,%function
__a_barrier_oldkuser:
push {r0,r1,r2,r3,ip,lr}
mov r1,r0
@ -24,90 +19,88 @@ __a_barrier_oldkuser:
mov pc,ip
pop {r0,r1,r2,r3,ip,lr}
bx lr
.global __a_barrier_v6
.hidden __a_barrier_v6
.type __a_barrier_v6,%function
__a_barrier_v6:
.arch armv6t2
mcr p15,0,r0,c7,c10,5
bx lr
.global __a_barrier_v7
.hidden __a_barrier_v7
.type __a_barrier_v7,%function
__a_barrier_v7:
.word 0xf57ff05b /* dmb ish */
.arch armv7-a
dmb ish
bx lr
.global __a_cas
.hidden __a_cas
.type __a_cas,%function
__a_cas:
ldr ip,1f
ldr ip,[pc,ip]
add pc,pc,ip
1: .word __a_cas_ptr-1b
.global __a_cas_dummy
.hidden __a_cas_dummy
.type __a_cas_dummy,%function
__a_cas_dummy:
mov r3,r0
ldr r0,[r2]
subs r0,r3,r0
streq r1,[r2]
bx lr
.global __a_cas_v6
.hidden __a_cas_v6
.type __a_cas_v6,%function
__a_cas_v6:
.arch armv6t2
mov r3,r0
mcr p15,0,r0,c7,c10,5
1: .word 0xe1920f9f /* ldrex r0,[r2] */
1: ldrex r0,[r2]
subs r0,r3,r0
.word 0x01820f91 /* strexeq r0,r1,[r2] */
strexeq r0,r1,[r2]
teqeq r0,#1
beq 1b
mcr p15,0,r0,c7,c10,5
bx lr
.global __a_cas_v7
.hidden __a_cas_v7
.type __a_cas_v7,%function
__a_cas_v7:
.arch armv7-a
mov r3,r0
.word 0xf57ff05b /* dmb ish */
1: .word 0xe1920f9f /* ldrex r0,[r2] */
dmb ish
1: ldrex r0,[r2]
subs r0,r3,r0
.word 0x01820f91 /* strexeq r0,r1,[r2] */
strexeq r0,r1,[r2]
teqeq r0,#1
beq 1b
.word 0xf57ff05b /* dmb ish */
dmb ish
bx lr
.global __aeabi_read_tp
.type __aeabi_read_tp,%function
__aeabi_read_tp:
.global __a_gettp
.hidden __a_gettp
.type __a_gettp,%function
__a_gettp:
ldr r0,1f
ldr r0,[pc,r0]
add pc,pc,r0
1: .word __a_gettp_ptr-1b
.global __a_gettp_dummy
.hidden __a_gettp_dummy
__a_gettp_dummy:
.global __a_gettp_cp15
.hidden __a_gettp_cp15
.type __a_gettp_cp15,%function
__a_gettp_cp15:
mrc p15,0,r0,c13,c0,3
bx lr
/* Tag this file with minimum ISA level so as not to affect linking. */
.arch armv4t
.eabi_attribute 6,2
.data
.align 2
.global __a_barrier_ptr
.hidden __a_barrier_ptr
__a_barrier_ptr:
.word 0
.word __a_barrier_dummy
.global __a_cas_ptr
.hidden __a_cas_ptr
__a_cas_ptr:
.word 0
.word __a_cas_dummy
.global __a_gettp_ptr
.hidden __a_gettp_ptr
__a_gettp_ptr:
.word 0
.word __a_gettp_cp15