2014-04-14 23:41:49 -04:00
|
|
|
#if __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH >= 7
|
|
|
|
|
2016-01-21 19:08:54 +00:00
|
|
|
#define a_barrier a_barrier
|
2014-11-19 00:40:32 -05:00
|
|
|
static inline void a_barrier()
|
2014-04-07 04:03:18 -04:00
|
|
|
{
|
2014-11-19 00:40:32 -05:00
|
|
|
__asm__ __volatile__("dmb ish");
|
|
|
|
}
|
|
|
|
|
2016-01-21 19:08:54 +00:00
|
|
|
#define a_cas a_cas
|
2014-11-19 00:40:32 -05:00
|
|
|
static inline int a_cas(volatile int *p, int t, int s)
|
|
|
|
{
|
|
|
|
int old;
|
|
|
|
__asm__ __volatile__(
|
|
|
|
" dmb ish\n"
|
2014-04-07 04:03:18 -04:00
|
|
|
"1: ldrex %0,%3\n"
|
2014-11-19 00:40:32 -05:00
|
|
|
" cmp %0,%1\n"
|
|
|
|
" bne 1f\n"
|
|
|
|
" strex %0,%2,%3\n"
|
|
|
|
" cmp %0, #0\n"
|
|
|
|
" bne 1b\n"
|
|
|
|
" mov %0, %1\n"
|
|
|
|
"1: dmb ish\n"
|
|
|
|
: "=&r"(old)
|
2014-04-07 04:28:12 -04:00
|
|
|
: "r"(t), "r"(s), "Q"(*p)
|
2014-04-07 04:03:18 -04:00
|
|
|
: "memory", "cc" );
|
2014-11-19 00:40:32 -05:00
|
|
|
return old;
|
|
|
|
}
|
|
|
|
|
2016-01-21 19:08:54 +00:00
|
|
|
#define a_swap a_swap
|
2014-11-19 00:40:32 -05:00
|
|
|
static inline int a_swap(volatile int *x, int v)
|
|
|
|
{
|
|
|
|
int old, tmp;
|
|
|
|
__asm__ __volatile__(
|
|
|
|
" dmb ish\n"
|
|
|
|
"1: ldrex %0,%3\n"
|
|
|
|
" strex %1,%2,%3\n"
|
|
|
|
" cmp %1, #0\n"
|
|
|
|
" bne 1b\n"
|
|
|
|
" dmb ish\n"
|
|
|
|
: "=&r"(old), "=&r"(tmp)
|
|
|
|
: "r"(v), "Q"(*x)
|
|
|
|
: "memory", "cc" );
|
|
|
|
return old;
|
|
|
|
}
|
|
|
|
|
2016-01-21 19:08:54 +00:00
|
|
|
#define a_fetch_add a_fetch_add
|
2014-11-19 00:40:32 -05:00
|
|
|
static inline int a_fetch_add(volatile int *x, int v)
|
|
|
|
{
|
|
|
|
int old, tmp;
|
|
|
|
__asm__ __volatile__(
|
|
|
|
" dmb ish\n"
|
|
|
|
"1: ldrex %0,%3\n"
|
|
|
|
" add %0,%0,%2\n"
|
|
|
|
" strex %1,%0,%3\n"
|
|
|
|
" cmp %1, #0\n"
|
|
|
|
" bne 1b\n"
|
|
|
|
" dmb ish\n"
|
|
|
|
: "=&r"(old), "=&r"(tmp)
|
|
|
|
: "r"(v), "Q"(*x)
|
|
|
|
: "memory", "cc" );
|
|
|
|
return old-v;
|
|
|
|
}
|
|
|
|
|
2016-01-21 19:08:54 +00:00
|
|
|
#define a_inc a_inc
|
2014-11-19 00:40:32 -05:00
|
|
|
static inline void a_inc(volatile int *x)
|
|
|
|
{
|
|
|
|
int tmp, tmp2;
|
|
|
|
__asm__ __volatile__(
|
|
|
|
" dmb ish\n"
|
|
|
|
"1: ldrex %0,%2\n"
|
|
|
|
" add %0,%0,#1\n"
|
|
|
|
" strex %1,%0,%2\n"
|
|
|
|
" cmp %1, #0\n"
|
|
|
|
" bne 1b\n"
|
|
|
|
" dmb ish\n"
|
|
|
|
: "=&r"(tmp), "=&r"(tmp2)
|
|
|
|
: "Q"(*x)
|
|
|
|
: "memory", "cc" );
|
|
|
|
}
|
|
|
|
|
2016-01-21 19:08:54 +00:00
|
|
|
#define a_dec a_dec
|
2014-11-19 00:40:32 -05:00
|
|
|
static inline void a_dec(volatile int *x)
|
|
|
|
{
|
|
|
|
int tmp, tmp2;
|
|
|
|
__asm__ __volatile__(
|
|
|
|
" dmb ish\n"
|
|
|
|
"1: ldrex %0,%2\n"
|
|
|
|
" sub %0,%0,#1\n"
|
|
|
|
" strex %1,%0,%2\n"
|
|
|
|
" cmp %1, #0\n"
|
|
|
|
" bne 1b\n"
|
|
|
|
" dmb ish\n"
|
|
|
|
: "=&r"(tmp), "=&r"(tmp2)
|
|
|
|
: "Q"(*x)
|
|
|
|
: "memory", "cc" );
|
|
|
|
}
|
|
|
|
|
2016-01-21 19:08:54 +00:00
|
|
|
#define a_and a_and
|
2014-11-19 00:40:32 -05:00
|
|
|
static inline void a_and(volatile int *x, int v)
|
|
|
|
{
|
|
|
|
int tmp, tmp2;
|
|
|
|
__asm__ __volatile__(
|
|
|
|
" dmb ish\n"
|
|
|
|
"1: ldrex %0,%3\n"
|
|
|
|
" and %0,%0,%2\n"
|
|
|
|
" strex %1,%0,%3\n"
|
|
|
|
" cmp %1, #0\n"
|
|
|
|
" bne 1b\n"
|
|
|
|
" dmb ish\n"
|
|
|
|
: "=&r"(tmp), "=&r"(tmp2)
|
|
|
|
: "r"(v), "Q"(*x)
|
|
|
|
: "memory", "cc" );
|
|
|
|
}
|
|
|
|
|
2016-01-21 19:08:54 +00:00
|
|
|
#define a_or a_or
|
2014-11-19 00:40:32 -05:00
|
|
|
static inline void a_or(volatile int *x, int v)
|
|
|
|
{
|
|
|
|
int tmp, tmp2;
|
|
|
|
__asm__ __volatile__(
|
|
|
|
" dmb ish\n"
|
|
|
|
"1: ldrex %0,%3\n"
|
|
|
|
" orr %0,%0,%2\n"
|
|
|
|
" strex %1,%0,%3\n"
|
|
|
|
" cmp %1, #0\n"
|
|
|
|
" bne 1b\n"
|
|
|
|
" dmb ish\n"
|
|
|
|
: "=&r"(tmp), "=&r"(tmp2)
|
|
|
|
: "r"(v), "Q"(*x)
|
|
|
|
: "memory", "cc" );
|
|
|
|
}
|
|
|
|
|
2016-01-21 19:08:54 +00:00
|
|
|
#define a_store a_store
|
2014-11-19 00:40:32 -05:00
|
|
|
static inline void a_store(volatile int *p, int x)
|
|
|
|
{
|
|
|
|
__asm__ __volatile__(
|
|
|
|
" dmb ish\n"
|
|
|
|
" str %1,%0\n"
|
|
|
|
" dmb ish\n"
|
|
|
|
: "=m"(*p)
|
|
|
|
: "r"(x)
|
|
|
|
: "memory", "cc" );
|
2014-04-07 04:03:18 -04:00
|
|
|
}
|
2014-11-19 00:40:32 -05:00
|
|
|
|
2014-04-07 04:03:18 -04:00
|
|
|
#else
|
2014-11-19 00:40:32 -05:00
|
|
|
|
|
|
|
int __a_cas(int, int, volatile int *) __attribute__((__visibility__("hidden")));
|
|
|
|
#define __k_cas __a_cas
|
|
|
|
|
2016-01-21 19:08:54 +00:00
|
|
|
#define a_barrier a_barrier
|
2014-11-19 00:40:32 -05:00
|
|
|
static inline void a_barrier()
|
|
|
|
{
|
|
|
|
__asm__ __volatile__("bl __a_barrier"
|
|
|
|
: : : "memory", "cc", "ip", "lr" );
|
|
|
|
}
|
fix arm atomic store and generate simpler/less-bloated/faster code
atomic store was lacking a barrier, which was fine for legacy arm with
no real smp and kernel-emulated cas, but unsuitable for more modern
systems. the kernel provides another "kuser" function, at 0xffff0fa0,
which could be used for the barrier, but using that would drop support
for kernels 2.6.12 through 2.6.14 unless an extra conditional were
added to check for barrier availability. just using the barrier in the
kernel cas is easier, and, based on my reading of the assembly code in
the kernel, does not appear to be significantly slower.
at the same time, other atomic operations are adapted to call the
kernel cas function directly rather than using a_cas; due to small
differences in their interface contracts, this makes the generated
code much simpler.
2013-09-22 03:06:17 -04:00
|
|
|
|
2016-01-21 19:08:54 +00:00
|
|
|
#define a_cas a_cas
|
2011-09-18 16:44:54 -04:00
|
|
|
static inline int a_cas(volatile int *p, int t, int s)
|
|
|
|
{
|
|
|
|
int old;
|
|
|
|
for (;;) {
|
fix arm atomic store and generate simpler/less-bloated/faster code
atomic store was lacking a barrier, which was fine for legacy arm with
no real smp and kernel-emulated cas, but unsuitable for more modern
systems. the kernel provides another "kuser" function, at 0xffff0fa0,
which could be used for the barrier, but using that would drop support
for kernels 2.6.12 through 2.6.14 unless an extra conditional were
added to check for barrier availability. just using the barrier in the
kernel cas is easier, and, based on my reading of the assembly code in
the kernel, does not appear to be significantly slower.
at the same time, other atomic operations are adapted to call the
kernel cas function directly rather than using a_cas; due to small
differences in their interface contracts, this makes the generated
code much simpler.
2013-09-22 03:06:17 -04:00
|
|
|
if (!__k_cas(t, s, p))
|
2011-09-18 16:44:54 -04:00
|
|
|
return t;
|
|
|
|
if ((old=*p) != t)
|
|
|
|
return old;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif
|