Import compiler-rt r208593. Fix a build bug in __clear_cache by not

explicitly forcing the ABI. Add first part of IEEE 754 quad support.
2014-05-16 00:04:17 +00:00 · 2014-05-16 00:04:17 +00:00 · 61f2f2562d
commit 61f2f2562d
parent 9b139c3c53
131 changed files with 2473 additions and 125 deletions
--- a/sys/external/bsd/compiler_rt/dist/CODE_OWNERS.TXT
+++ b/sys/external/bsd/compiler_rt/dist/CODE_OWNERS.TXT
@ -0,0 +1,57 @@
+This file is a list of the people responsible for ensuring that patches for a
+particular part of compiler-rt are reviewed, either by themself or by
+someone else. They are also the gatekeepers for their part of compiler-rt, with
+the final word on what goes in or not.
+
+The list is sorted by surname and formatted to allow easy grepping and
+beautification by scripts. The fields are: name (N), email (E), web-address
+(W), PGP key ID and fingerprint (P), description (D), and snail-mail address
+(S).
+
+N: Peter Collingbourne
+E: peter@pcc.me.uk
+D: DataFlowSanitizer
+
+N: Daniel Dunbar
+E: daniel@zuster.org
+D: Makefile build
+
+N: Timur Iskhodzhanov
+E: timurrrr@google.com
+D: AddressSanitizer for Windows
+
+N: Howard Hinnant
+E: howard.hinnant@gmail.com
+D: builtins library
+
+N: Sergey Matveev
+E: earthdok@google.com
+D: LeakSanitizer
+
+N: Alexander Potapenko
+E: glider@google.com
+D: MacOS/iOS port of sanitizers
+
+N: Alexey Samsonov
+E: samsonov@google.com
+D: CMake build, test suite
+
+N: Kostya Serebryany
+E: kcc@google.com
+D: AddressSanitizer, sanitizer_common, porting sanitizers to another platforms
+
+N: Richard Smith
+E: richard-llvm@metafoo.co.uk
+D: UndefinedBehaviorSanitizer
+
+N: Evgeniy Stepanov
+E: eugenis@google.com
+D: MemorySanitizer, Android port of sanitizers
+
+N: Dmitry Vyukov
+E: dvyukov@google.com
+D: ThreadSanitizer
+
+N: Bill Wendling
+E: isanbard@gmail.com
+D: Profile runtime library
--- a/sys/external/bsd/compiler_rt/dist/CREDITS.TXT
+++ b/sys/external/bsd/compiler_rt/dist/CREDITS.TXT
@ -23,10 +23,10 @@ N: Howard Hinnant
 E: hhinnant@apple.com
 D: Architect and primary author of compiler-rt

+N: Joerg Sonnenberger
+E: joerg@NetBSD.org
+D: Maintains NetBSD port.
+
 N: Matt Thomas
 E: matt@NetBSD.org
 D: ARM improvements.
-
-N: Joerg Sonnenberger
-E: joerg@NetBSD.org
-D: Maintains port.
--- a/sys/external/bsd/compiler_rt/dist/LICENSE.TXT
+++ b/sys/external/bsd/compiler_rt/dist/LICENSE.TXT
@ -89,9 +89,3 @@ other licenses gives permission to use the names of the LLVM Team or the
 University of Illinois to endorse or promote products derived from this
 Software.

-The following pieces of software have additional or alternate copyrights,
-licenses, and/or restrictions:
-
-Program             Directory
-------             ---------
-android headers     third_party/android
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/adddf3vfp.S
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/adddf3vfp.S
@ -16,7 +16,7 @@
 // calling convention where double arguments are passsed in GPR pairs
 //
 	.syntax unified
-	.align 2
+	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__adddf3vfp)
 	vmov	d6, r0, r1		// move first param from r0/r1 pair into d6
 	vmov	d7, r2, r3		// move second param from r2/r3 pair into d7
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/addsf3vfp.S
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/addsf3vfp.S
@ -16,7 +16,7 @@
 // calling convention where single arguments are passsed in GPRs
 //
 	.syntax unified
-	.align 2
+	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__addsf3vfp)
 	vmov	s14, r0		// move first param from r0 into float register
 	vmov	s15, r1		// move second param from r1 into float register
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/aeabi_dcmp.S
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/aeabi_dcmp.S
@ -20,7 +20,7 @@

 #define DEFINE_AEABI_DCMP(cond)                            \
        .syntax unified                          SEPARATOR \
-        .align 2                                 SEPARATOR \
+        .p2align 2                               SEPARATOR \
 DEFINE_COMPILERRT_FUNCTION(__aeabi_dcmp ## cond)           \
        push      { r4, lr }                     SEPARATOR \
        bl        SYMBOL_NAME(__ ## cond ## df2) SEPARATOR \
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/aeabi_fcmp.S
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/aeabi_fcmp.S
@ -20,7 +20,7 @@

 #define DEFINE_AEABI_FCMP(cond)                            \
        .syntax unified                          SEPARATOR \
-        .align 2                                 SEPARATOR \
+        .p2align 2                               SEPARATOR \
 DEFINE_COMPILERRT_FUNCTION(__aeabi_fcmp ## cond)           \
        push      { r4, lr }                     SEPARATOR \
        bl        SYMBOL_NAME(__ ## cond ## sf2) SEPARATOR \
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/aeabi_idivmod.S
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/aeabi_idivmod.S
@ -16,7 +16,7 @@
 // }

        .syntax unified
-        .align 2
+        .p2align 2
 DEFINE_COMPILERRT_FUNCTION(__aeabi_idivmod)
        push    { lr }
        sub     sp, sp, #4
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/aeabi_ldivmod.S
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/aeabi_ldivmod.S
@ -17,7 +17,7 @@
 // }

        .syntax unified
-        .align 2
+        .p2align 2
 DEFINE_COMPILERRT_FUNCTION(__aeabi_ldivmod)
        push    {r11, lr}
        sub     sp, sp, #16
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/aeabi_memcmp.S
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/aeabi_memcmp.S
@ -11,7 +11,7 @@

 //  void __aeabi_memcmp(void *dest, void *src, size_t n) { memcmp(dest, src, n); }

-        .align 2
+        .p2align 2
 DEFINE_COMPILERRT_FUNCTION(__aeabi_memcmp)
        b       memcmp
 END_COMPILERRT_FUNCTION(__aeabi_memcmp)
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/aeabi_memcpy.S
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/aeabi_memcpy.S
@ -11,7 +11,7 @@

 //  void __aeabi_memcpy(void *dest, void *src, size_t n) { memcpy(dest, src, n); }

-        .align 2
+        .p2align 2
 DEFINE_COMPILERRT_FUNCTION(__aeabi_memcpy)
        b       memcpy
 END_COMPILERRT_FUNCTION(__aeabi_memcpy)
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/aeabi_memmove.S
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/aeabi_memmove.S
@ -11,7 +11,7 @@

 //  void __aeabi_memmove(void *dest, void *src, size_t n) { memmove(dest, src, n); }

-        .align 2
+        .p2align 2
 DEFINE_COMPILERRT_FUNCTION(__aeabi_memmove)
        b       memmove
 END_COMPILERRT_FUNCTION(__aeabi_memmove)
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/aeabi_memset.S
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/aeabi_memset.S
@ -12,7 +12,7 @@
 //  void __aeabi_memset(void *dest, size_t n, int c) { memset(dest, c, n); }
 //  void __aeabi_memclr(void *dest, size_t n) { __aeabi_memset(dest, n, 0); }

-        .align 2
+        .p2align 2
 DEFINE_COMPILERRT_FUNCTION(__aeabi_memset)
        mov     r3, r1
        mov     r1, r2
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/aeabi_uidivmod.S
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/aeabi_uidivmod.S
@ -17,7 +17,7 @@
 // }

        .syntax unified
-        .align 2
+        .p2align 2
 DEFINE_COMPILERRT_FUNCTION(__aeabi_uidivmod)
        push    { lr }
        sub     sp, sp, #4
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/aeabi_uldivmod.S
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/aeabi_uldivmod.S
@ -17,7 +17,7 @@
 // }

        .syntax unified
-        .align 2
+        .p2align 2
 DEFINE_COMPILERRT_FUNCTION(__aeabi_uldivmod)
        push	{r11, lr}
        sub	sp, sp, #16
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/bswapdi2.S
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/bswapdi2.S
@ -14,7 +14,7 @@
 //
 // Reverse all the bytes in a 64-bit integer.
 //
-.align 2
+.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__bswapdi2)
 #if __ARM_ARCH < 6
    // before armv6 does not have "rev" instruction
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/bswapsi2.S
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/bswapsi2.S
@ -14,7 +14,7 @@
 //
 // Reverse all the bytes in a 32-bit integer.
 //
-.align 2
+.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__bswapsi2)
 #if __ARM_ARCH < 6
    // before armv6 does not have "rev" instruction
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/clzdi2.S
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/clzdi2.S
@ -16,7 +16,7 @@
 	.syntax unified

 	.text
-	.align	2
+	.p2align	2
 DEFINE_COMPILERRT_FUNCTION(__clzdi2)
 #ifdef __ARM_FEATURE_CLZ
 #ifdef __ARMEB__
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/clzsi2.S
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/clzsi2.S
@ -16,7 +16,7 @@
 	.syntax unified

 	.text
-	.align	2
+	.p2align	2
 DEFINE_COMPILERRT_FUNCTION(__clzsi2)
 #ifdef __ARM_FEATURE_CLZ
 	clz	r0, r0
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/comparesf2.S
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/comparesf2.S
@ -40,7 +40,7 @@
 #include "../assembly.h"
 .syntax unified

-.align 2
+.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__eqsf2)
    // Make copies of a and b with the sign bit shifted off the top.  These will
    // be used to detect zeros and NaNs.
@ -105,7 +105,7 @@ DEFINE_COMPILERRT_FUNCTION_ALIAS(__lesf2, __eqsf2)
 DEFINE_COMPILERRT_FUNCTION_ALIAS(__ltsf2, __eqsf2)
 DEFINE_COMPILERRT_FUNCTION_ALIAS(__nesf2, __eqsf2)

-.align 2
+.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__gtsf2)
    // Identical to the preceeding except in that we return -1 for NaN values.
    // Given that the two paths share so much code, one might be tempted to 
@ -132,7 +132,7 @@ DEFINE_COMPILERRT_FUNCTION(__gtsf2)
 END_COMPILERRT_FUNCTION(__gtsf2)
 DEFINE_COMPILERRT_FUNCTION_ALIAS(__gesf2, __gtsf2)

-.align 2
+.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__unordsf2)
    // Return 1 for NaN values, 0 otherwise.
    mov     r2,         r0, lsl #1
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/divdf3vfp.S
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/divdf3vfp.S
@ -16,7 +16,7 @@
 // calling convention where double arguments are passsed in GPR pairs
 //
 	.syntax unified
-	.align 2
+	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__divdf3vfp)
 	vmov	d6, r0, r1		// move first param from r0/r1 pair into d6
 	vmov	d7, r2, r3		// move second param from r2/r3 pair into d7
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/divmodsi4.S
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/divmodsi4.S
@ -22,7 +22,7 @@
    pop    {r4-r7, pc}

 .syntax unified
-.align 3
+.p2align 3
 DEFINE_COMPILERRT_FUNCTION(__divmodsi4)
 #if __ARM_ARCH_EXT_IDIV__
 	tst     r1, r1
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/divsf3vfp.S
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/divsf3vfp.S
@ -16,7 +16,7 @@
 // calling convention where single arguments are passsed like 32-bit ints.
 //
 	.syntax unified
-	.align 2
+	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__divsf3vfp)
 	vmov	s14, r0		// move first param from r0 into float register
 	vmov	s15, r1		// move second param from r1 into float register
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/divsi3.S
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/divsi3.S
@ -21,7 +21,7 @@
    pop    {r4, r7, pc}

 .syntax unified
-.align 3
+.p2align 3
 // Ok, APCS and AAPCS agree on 32 bit args, so it's safe to use the same routine.
 DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_idiv, __divsi3)
 DEFINE_COMPILERRT_FUNCTION(__divsi3)
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/eqdf2vfp.S
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/eqdf2vfp.S
@ -17,7 +17,7 @@
 // like in GPR pairs.
 //
 	.syntax unified
-	.align 2
+	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__eqdf2vfp)
 	vmov	d6, r0, r1	// load r0/r1 pair in double register
 	vmov	d7, r2, r3	// load r2/r3 pair in double register
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/eqsf2vfp.S
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/eqsf2vfp.S
@ -17,7 +17,7 @@
 // like 32-bit ints
 //
 	.syntax unified
-	.align 2
+	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__eqsf2vfp)
 	vmov	s14, r0     // move from GPR 0 to float register
 	vmov	s15, r1	    // move from GPR 1 to float register
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/extendsfdf2vfp.S
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/extendsfdf2vfp.S
@ -17,7 +17,7 @@
 // passed in a GPR and a double precision result is returned in R0/R1 pair.
 //
 	.syntax unified
-	.align 2
+	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__extendsfdf2vfp)
 	vmov	s15, r0      // load float register from R0
 	vcvt.f64.f32 d7, s15 // convert single to double
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/fixdfsivfp.S
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/fixdfsivfp.S
@ -17,7 +17,7 @@
 // passed in GPR register pair.
 //
 	.syntax unified
-	.align 2
+	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__fixdfsivfp)
 	vmov	d7, r0, r1    // load double register from R0/R1
 	vcvt.s32.f64 s15, d7  // convert double to 32-bit int into s15
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/fixsfsivfp.S
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/fixsfsivfp.S
@ -17,7 +17,7 @@
 // passed in a GPR..
 //
 	.syntax unified
-	.align 2
+	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__fixsfsivfp)
 	vmov	s15, r0        // load float register from R0
 	vcvt.s32.f32 s15, s15  // convert single to 32-bit int into s15
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/fixunsdfsivfp.S
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/fixunsdfsivfp.S
@ -18,7 +18,7 @@
 // passed in GPR register pair.
 //
 	.syntax unified
-	.align 2
+	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__fixunsdfsivfp)
 	vmov	d7, r0, r1    // load double register from R0/R1
 	vcvt.u32.f64 s15, d7  // convert double to 32-bit int into s15
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/fixunssfsivfp.S
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/fixunssfsivfp.S
@ -18,7 +18,7 @@
 // passed in a GPR..
 //
 	.syntax unified
-	.align 2
+	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__fixunssfsivfp)
 	vmov	s15, r0        // load float register from R0
 	vcvt.u32.f32 s15, s15  // convert single to 32-bit unsigned into s15
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/floatsidfvfp.S
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/floatsidfvfp.S
@ -17,7 +17,7 @@
 // return in GPR register pair.
 //
 	.syntax unified
-	.align 2
+	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__floatsidfvfp)
 	vmov	s15, r0        // move int to float register s15
 	vcvt.f64.s32 d7, s15   // convert 32-bit int in s15 to double in d7
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/floatsisfvfp.S
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/floatsisfvfp.S
@ -17,7 +17,7 @@
 // return in a GPR..
 //
 	.syntax unified
-	.align 2
+	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__floatsisfvfp)
 	vmov	s15, r0	       // move int to float register s15
 	vcvt.f32.s32 s15, s15  // convert 32-bit int in s15 to float in s15
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/floatunssidfvfp.S
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/floatunssidfvfp.S
@ -17,7 +17,7 @@
 // return in GPR register pair.
 //
 	.syntax unified
-	.align 2
+	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__floatunssidfvfp)
 	vmov	s15, r0        // move int to float register s15
 	vcvt.f64.u32 d7, s15   // convert 32-bit int in s15 to double in d7
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/floatunssisfvfp.S
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/floatunssisfvfp.S
@ -17,7 +17,7 @@
 // return in a GPR..
 //
 	.syntax unified
-	.align 2
+	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__floatunssisfvfp)
 	vmov	s15, r0	       // move int to float register s15
 	vcvt.f32.u32 s15, s15  // convert 32-bit int in s15 to float in s15
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/gedf2vfp.S
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/gedf2vfp.S
@ -17,7 +17,7 @@
 // like in GPR pairs.
 //
 	.syntax unified
-	.align 2
+	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__gedf2vfp)
 	vmov 	d6, r0, r1	// load r0/r1 pair in double register
 	vmov 	d7, r2, r3	// load r2/r3 pair in double register
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/gesf2vfp.S
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/gesf2vfp.S
@ -17,7 +17,7 @@
 // like 32-bit ints
 //
 	.syntax unified
-	.align 2
+	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__gesf2vfp)
 	vmov	s14, r0	    // move from GPR 0 to float register
 	vmov	s15, r1	    // move from GPR 1 to float register
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/gtdf2vfp.S
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/gtdf2vfp.S
@ -17,7 +17,7 @@
 // like in GPR pairs.
 //
 	.syntax unified
-	.align 2
+	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__gtdf2vfp)
 	vmov 	d6, r0, r1	// load r0/r1 pair in double register
 	vmov 	d7, r2, r3	// load r2/r3 pair in double register
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/gtsf2vfp.S
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/gtsf2vfp.S
@ -17,7 +17,7 @@
 // like 32-bit ints
 //
 	.syntax unified
-	.align 2
+	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__gtsf2vfp)
 	vmov	s14, r0		// move from GPR 0 to float register
 	vmov	s15, r1		// move from GPR 1 to float register
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/ledf2vfp.S
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/ledf2vfp.S
@ -17,7 +17,7 @@
 // like in GPR pairs.
 //
 	.syntax unified
-	.align 2
+	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__ledf2vfp)
 	vmov 	d6, r0, r1	// load r0/r1 pair in double register
 	vmov 	d7, r2, r3	// load r2/r3 pair in double register
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/lesf2vfp.S
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/lesf2vfp.S
@ -17,7 +17,7 @@
 // like 32-bit ints
 //
 	.syntax unified
-	.align 2
+	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__lesf2vfp)
 	vmov	s14, r0     // move from GPR 0 to float register
 	vmov	s15, r1     // move from GPR 1 to float register
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/ltdf2vfp.S
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/ltdf2vfp.S
@ -17,7 +17,7 @@
 // like in GPR pairs.
 //
 	.syntax unified
-	.align 2
+	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__ltdf2vfp)
 	vmov 	d6, r0, r1	// load r0/r1 pair in double register
 	vmov 	d7, r2, r3	// load r2/r3 pair in double register
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/ltsf2vfp.S
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/ltsf2vfp.S
@ -17,7 +17,7 @@
 // like 32-bit ints
 //
 	.syntax unified
-	.align 2
+	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__ltsf2vfp)
 	vmov	s14, r0     // move from GPR 0 to float register
 	vmov	s15, r1     // move from GPR 1 to float register
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/modsi3.S
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/modsi3.S
@ -21,7 +21,7 @@
    pop    {r4, r7, pc}

 .syntax unified
-.align 3
+.p2align 3
 DEFINE_COMPILERRT_FUNCTION(__modsi3)
 #if __ARM_ARCH_EXT_IDIV__
 	tst     r1, r1
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/muldf3vfp.S
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/muldf3vfp.S
@ -16,7 +16,7 @@
 // calling convention where double arguments are passsed in GPR pairs
 //
 	.syntax unified
-	.align 2
+	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__muldf3vfp)
 	vmov 	d6, r0, r1         // move first param from r0/r1 pair into d6
 	vmov 	d7, r2, r3         // move second param from r2/r3 pair into d7
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/mulsf3vfp.S
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/mulsf3vfp.S
@ -16,7 +16,7 @@
 // calling convention where single arguments are passsed like 32-bit ints.
 //
 	.syntax unified
-	.align 2
+	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__mulsf3vfp)
 	vmov	s14, r0		// move first param from r0 into float register
 	vmov	s15, r1		// move second param from r1 into float register
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/nedf2vfp.S
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/nedf2vfp.S
@ -17,7 +17,7 @@
 // like in GPR pairs.
 //
 	.syntax unified
-	.align 2
+	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__nedf2vfp)
 	vmov 	d6, r0, r1	// load r0/r1 pair in double register
 	vmov 	d7, r2, r3	// load r2/r3 pair in double register
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/negdf2vfp.S
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/negdf2vfp.S
@ -16,7 +16,7 @@
 // Darwin calling convention where double arguments are passsed in GPR pairs.
 //
 	.syntax unified
-	.align 2
+	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__negdf2vfp)
 	eor	r1, r1, #-2147483648	// flip sign bit on double in r0/r1 pair
 	bx	lr
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/negsf2vfp.S
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/negsf2vfp.S
@ -16,7 +16,7 @@
 // Darwin calling convention where single arguments are passsed like 32-bit ints
 //
 	.syntax unified
-	.align 2
+	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__negsf2vfp)
 	eor	r0, r0, #-2147483648	// flip sign bit on float in r0
 	bx	lr
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/nesf2vfp.S
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/nesf2vfp.S
@ -17,7 +17,7 @@
 // like 32-bit ints
 //
 	.syntax unified
-	.align 2
+	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__nesf2vfp)
 	vmov	s14, r0	    // move from GPR 0 to float register
 	vmov	s15, r1	    // move from GPR 1 to float register
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/restore_vfp_d8_d15_regs.S
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/restore_vfp_d8_d15_regs.S
@ -25,7 +25,7 @@
 //
 // Restore registers d8-d15 from stack
 //
-	.align 2
+	.p2align 2
 DEFINE_COMPILERRT_PRIVATE_FUNCTION(__restore_vfp_d8_d15_regs)
 	vldmia	sp!, {d8-d15}           // pop registers d8-d15 off stack
 	bx      lr                      // return to prolog
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/save_vfp_d8_d15_regs.S
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/save_vfp_d8_d15_regs.S
@ -25,7 +25,7 @@
 //
 // Save registers d8-d15 onto stack
 //
-	.align 2
+	.p2align 2
 DEFINE_COMPILERRT_PRIVATE_FUNCTION(__save_vfp_d8_d15_regs)
 	vstmdb	sp!, {d8-d15}           // push registers d8-d15 onto stack
 	bx      lr                      // return to prolog
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/subdf3vfp.S
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/subdf3vfp.S
@ -16,7 +16,7 @@
 // the Darwin calling convention where double arguments are passsed in GPR pairs
 //
 	.syntax unified
-	.align 2
+	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__subdf3vfp)
 	vmov 	d6, r0, r1         // move first param from r0/r1 pair into d6
 	vmov 	d7, r2, r3         // move second param from r2/r3 pair into d7
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/subsf3vfp.S
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/subsf3vfp.S
@ -17,7 +17,7 @@
 // like 32-bit ints.
 //
 	.syntax unified
-	.align 2
+	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__subsf3vfp)
 	vmov	s14, r0		// move first param from r0 into float register
 	vmov	s15, r1		// move second param from r1 into float register
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/switch16.S
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/switch16.S
@ -29,7 +29,7 @@
 // The table contains signed 2-byte sized elements which are 1/2 the distance
 // from lr to the target label.
 //
-	.align 2
+	.p2align 2
 DEFINE_COMPILERRT_PRIVATE_FUNCTION(__switch16)
 	ldrh    ip, [lr, #-1]           // get first 16-bit word in table
 	cmp     r0, ip                  // compare with index
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/switch32.S
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/switch32.S
@ -29,7 +29,7 @@
 // The table contains signed 4-byte sized elements which are the distance
 // from lr to the target label.
 //
-	.align 2
+	.p2align 2
 DEFINE_COMPILERRT_PRIVATE_FUNCTION(__switch32)
 	ldr     ip, [lr, #-1]            // get first 32-bit word in table
 	cmp     r0, ip                   // compare with index
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/switch8.S
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/switch8.S
@ -29,7 +29,7 @@
 // The table contains signed byte sized elements which are 1/2 the distance
 // from lr to the target label.
 //
-	.align 2
+	.p2align 2
 DEFINE_COMPILERRT_PRIVATE_FUNCTION(__switch8)
 	ldrb    ip, [lr, #-1]           // get first byte in table
 	cmp     r0, ip                  // signed compare with index
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/switchu8.S
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/switchu8.S
@ -29,7 +29,7 @@
 // The table contains unsigned byte sized elements which are 1/2 the distance
 // from lr to the target label.
 //
-	.align 2
+	.p2align 2
 DEFINE_COMPILERRT_PRIVATE_FUNCTION(__switchu8)
 	ldrb    ip, [lr, #-1]           // get first byte in table
 	cmp     r0, ip                  // compare with index
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/sync-ops.h
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/sync-ops.h
@ -0,0 +1,60 @@
+/*===-- sync-ops.h - --===//
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements outline macros for the __sync_fetch_and_*
+ * operations. Different instantiations will generate appropriate assembly for
+ * ARM and Thumb-2 versions of the functions.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "../assembly.h"
+
+#define SYNC_OP_4(op) \
+        .p2align 2 ; \
+        .thumb ; \
+        DEFINE_COMPILERRT_FUNCTION(__sync_fetch_and_ ## op) \
+        dmb ; \
+        mov r12, r0 ; \
+        LOCAL_LABEL(tryatomic_ ## op): \
+        ldrex r0, [r12] ; \
+        op(r2, r0, r1) ; \
+        strex r3, r2, [r12] ; \
+        cbnz r3, LOCAL_LABEL(tryatomic_ ## op) ; \
+        dmb ; \
+        bx lr
+
+#define SYNC_OP_8(op) \
+        .p2align 2 ; \
+        .thumb ; \
+        DEFINE_COMPILERRT_FUNCTION(__sync_fetch_and_ ## op) \
+        push {r4, r5, r6, lr} ; \
+        dmb ; \
+        mov r12, r0 ; \
+        LOCAL_LABEL(tryatomic_ ## op): \
+        ldrexd r0, r1, [r12] ; \
+        op(r4, r5, r0, r1, r2, r3) ; \
+        strexd r6, r4, r5, [r12] ; \
+        cbnz r6, LOCAL_LABEL(tryatomic_ ## op) ; \
+        dmb ; \
+        pop {r4, r5, r6, pc}
+
+#define MINMAX_4(rD, rN, rM, cmp_kind) \
+        cmp rN, rM ; \
+        mov rD, rM ; \
+        it cmp_kind ; \
+        mov##cmp_kind rD, rN
+
+#define MINMAX_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI, cmp_kind) \
+        cmp rN_LO, rM_LO ; \
+        sbcs rN_HI, rM_HI ; \
+        mov rD_LO, rM_LO ; \
+        mov rD_HI, rM_HI ; \
+        itt cmp_kind ; \
+        mov##cmp_kind rD_LO, rN_LO ; \
+        mov##cmp_kind rD_HI, rN_HI
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/sync_fetch_and_add_4.S
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/sync_fetch_and_add_4.S
@ -0,0 +1,21 @@
+/*===-- sync_fetch_and_add_4.S - ------------------------------------------===//
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __sync_fetch_and_add_4 function for the ARM
+ * architecture.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "sync-ops.h"
+
+/* "adds" is 2 bytes shorter than "add". */
+#define add_4(rD, rN, rM)  add rD, rN, rM
+
+SYNC_OP_4(add_4)
+
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/sync_fetch_and_add_8.S
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/sync_fetch_and_add_8.S
@ -0,0 +1,22 @@
+/*===-- sync_fetch_and_add_8.S - ------------------------------------------===//
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __sync_fetch_and_add_8 function for the ARM
+ * architecture.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "sync-ops.h"
+
+#define add_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI) \
+    adds rD_LO, rN_LO, rM_LO ; \
+    adc rD_HI, rN_HI, rM_HI
+
+SYNC_OP_8(add_8)
+
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/sync_fetch_and_and_4.S
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/sync_fetch_and_and_4.S
@ -0,0 +1,19 @@
+/*===-- sync_fetch_and_and_4.S - ------------------------------------------===//
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __sync_fetch_and_and_4 function for the ARM
+ * architecture.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "sync-ops.h"
+
+#define and_4(rD, rN, rM)  and rD, rN, rM
+
+SYNC_OP_4(and_4)
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/sync_fetch_and_and_8.S
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/sync_fetch_and_and_8.S
@ -0,0 +1,21 @@
+/*===-- sync_fetch_and_and_8.S - ------------------------------------------===//
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __sync_fetch_and_and_8 function for the ARM
+ * architecture.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "sync-ops.h"
+
+#define and_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI) \
+    and rD_LO, rN_LO, rM_LO ; \
+    and rD_HI, rN_HI, rM_HI
+
+SYNC_OP_8(and_8)
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/sync_fetch_and_max_4.S
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/sync_fetch_and_max_4.S
@ -0,0 +1,20 @@
+/*===-- sync_fetch_and_max_4.S - ------------------------------------------===//
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __sync_fetch_and_max_4 function for the ARM
+ * architecture.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "sync-ops.h"
+
+#define max_4(rD, rN, rM)  MINMAX_4(rD, rN, rM, gt)
+
+SYNC_OP_4(max_4)
+
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/sync_fetch_and_max_8.S
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/sync_fetch_and_max_8.S
@ -0,0 +1,19 @@
+/*===-- sync_fetch_and_max_8.S - ------------------------------------------===//
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __sync_fetch_and_max_8 function for the ARM
+ * architecture.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "sync-ops.h"
+
+#define max_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI)         MINMAX_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI, gt)
+
+SYNC_OP_8(max_8)
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/sync_fetch_and_min_4.S
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/sync_fetch_and_min_4.S
@ -0,0 +1,20 @@
+/*===-- sync_fetch_and_min_4.S - ------------------------------------------===//
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __sync_fetch_and_min_4 function for the ARM
+ * architecture.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "sync-ops.h"
+
+#define min_4(rD, rN, rM) MINMAX_4(rD, rN, rM, lt)
+
+SYNC_OP_4(min_4)
+
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/sync_fetch_and_min_8.S
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/sync_fetch_and_min_8.S
@ -0,0 +1,19 @@
+/*===-- sync_fetch_and_min_8.S - ------------------------------------------===//
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __sync_fetch_and_min_8 function for the ARM
+ * architecture.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "sync-ops.h"
+
+#define min_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI)         MINMAX_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI, lt)
+
+SYNC_OP_8(min_8)
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/sync_fetch_and_nand_4.S
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/sync_fetch_and_nand_4.S
@ -0,0 +1,20 @@
+/*===-- sync_fetch_and_nand_4.S - -----------------------------------------===//
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __sync_fetch_and_nand_4 function for the ARM
+ * architecture.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "sync-ops.h"
+
+#define nand_4(rD, rN, rM)  bic rD, rN, rM
+
+SYNC_OP_4(nand_4)
+
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/sync_fetch_and_nand_8.S
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/sync_fetch_and_nand_8.S
@ -0,0 +1,22 @@
+/*===-- sync_fetch_and_nand_8.S - ------------------------------------------===//
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __sync_fetch_and_nand_8 function for the ARM
+ * architecture.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "sync-ops.h"
+
+#define nand_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI) \
+    bic rD_LO, rN_LO, rM_LO ; \
+    bic rD_HI, rN_HI, rM_HI
+
+SYNC_OP_8(nand_8)
+
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/sync_fetch_and_or_4.S
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/sync_fetch_and_or_4.S
@ -0,0 +1,20 @@
+/*===-- sync_fetch_and_or_4.S - -------------------------------------------===//
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __sync_fetch_and_or_4 function for the ARM
+ * architecture.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "sync-ops.h"
+
+#define or_4(rD, rN, rM)  orr rD, rN, rM
+
+SYNC_OP_4(or_4)
+
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/sync_fetch_and_or_8.S
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/sync_fetch_and_or_8.S
@ -0,0 +1,22 @@
+/*===-- sync_fetch_and_or_8.S - -------------------------------------------===//
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __sync_fetch_and_or_8 function for the ARM
+ * architecture.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "sync-ops.h"
+
+#define or_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI) \
+    orr rD_LO, rN_LO, rM_LO ; \
+    orr rD_HI, rN_HI, rM_HI
+
+SYNC_OP_8(or_8)
+
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/sync_fetch_and_sub_4.S
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/sync_fetch_and_sub_4.S
@ -0,0 +1,21 @@
+/*===-- sync_fetch_and_sub_4.S - ------------------------------------------===//
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __sync_fetch_and_sub_4 function for the ARM
+ * architecture.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "sync-ops.h"
+
+/* "subs" is 2 bytes shorter than "sub". */
+#define sub_4(rD, rN, rM)  sub rD, rN, rM
+
+SYNC_OP_4(sub_4)
+
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/sync_fetch_and_sub_8.S
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/sync_fetch_and_sub_8.S
@ -0,0 +1,22 @@
+/*===-- sync_fetch_and_sub_8.S - ------------------------------------------===//
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __sync_fetch_and_sub_8 function for the ARM
+ * architecture.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "sync-ops.h"
+
+#define sub_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI) \
+    subs rD_LO, rN_LO, rM_LO ; \
+    sbc rD_HI, rN_HI, rM_HI
+
+SYNC_OP_8(sub_8)
+
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/sync_fetch_and_umax_4.S
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/sync_fetch_and_umax_4.S
@ -0,0 +1,20 @@
+/*===-- sync_fetch_and_umax_4.S - ------------------------------------------===//
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __sync_fetch_and_umax_4 function for the ARM
+ * architecture.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "sync-ops.h"
+
+#define umax_4(rD, rN, rM)  MINMAX_4(rD, rN, rM, hi)
+
+SYNC_OP_4(umax_4)
+
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/sync_fetch_and_umax_8.S
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/sync_fetch_and_umax_8.S
@ -0,0 +1,19 @@
+/*===-- sync_fetch_and_umax_8.S - ------------------------------------------===//
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __sync_fetch_and_umax_8 function for the ARM
+ * architecture.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "sync-ops.h"
+
+#define umax_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI)         MINMAX_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI, hi)
+
+SYNC_OP_8(umax_8)
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/sync_fetch_and_umin_4.S
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/sync_fetch_and_umin_4.S
@ -0,0 +1,20 @@
+/*===-- sync_fetch_and_umin_4.S - ------------------------------------------===//
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __sync_fetch_and_umin_4 function for the ARM
+ * architecture.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "sync-ops.h"
+
+#define umin_4(rD, rN, rM) MINMAX_4(rD, rN, rM, lo)
+
+SYNC_OP_4(umin_4)
+
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/sync_fetch_and_umin_8.S
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/sync_fetch_and_umin_8.S
@ -0,0 +1,19 @@
+/*===-- sync_fetch_and_umin_8.S - ------------------------------------------===//
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __sync_fetch_and_umin_8 function for the ARM
+ * architecture.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "sync-ops.h"
+
+#define umin_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI)         MINMAX_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI, lo)
+
+SYNC_OP_8(umin_8)
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/sync_fetch_and_xor_4.S
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/sync_fetch_and_xor_4.S
@ -0,0 +1,20 @@
+/*===-- sync_fetch_and_xor_4.S - ------------------------------------------===//
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __sync_fetch_and_xor_4 function for the ARM
+ * architecture.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "sync-ops.h"
+
+#define xor_4(rD, rN, rM)  eor rD, rN, rM
+
+SYNC_OP_4(xor_4)
+
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/sync_fetch_and_xor_8.S
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/sync_fetch_and_xor_8.S
@ -0,0 +1,22 @@
+/*===-- sync_fetch_and_xor_8.S - ------------------------------------------===//
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===//
+ *
+ * This file implements the __sync_fetch_and_xor_8 function for the ARM
+ * architecture.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include "sync-ops.h"
+
+#define xor_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI) \
+    eor rD_LO, rN_LO, rM_LO ; \
+    eor rD_HI, rN_HI, rM_HI
+
+SYNC_OP_8(xor_8)
+
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/sync_synchronize.S
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/sync_synchronize.S
@ -21,7 +21,7 @@

 #if __APPLE__

-	.align 2
+	.p2align 2
 DEFINE_COMPILERRT_PRIVATE_FUNCTION(__sync_synchronize)
 	stmfd	sp!, {r7, lr}
 	add		r7, sp, #0
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/truncdfsf2vfp.S
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/truncdfsf2vfp.S
@ -17,7 +17,7 @@
 // passed in a R0/R1 pair and a signle precision result is returned in R0.
 //
 	.syntax unified
-	.align 2
+	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__truncdfsf2vfp)
 	vmov 	d7, r0, r1   // load double from r0/r1 pair
 	vcvt.f32.f64 s15, d7 // convert double to single (trucate precision)
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/unorddf2vfp.S
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/unorddf2vfp.S
@ -17,7 +17,7 @@
 // like in GPR pairs.
 //
 	.syntax unified
-	.align 2
+	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__unorddf2vfp)
 	vmov 	d6, r0, r1	// load r0/r1 pair in double register
 	vmov 	d7, r2, r3	// load r2/r3 pair in double register
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/unordsf2vfp.S
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/unordsf2vfp.S
@ -17,7 +17,7 @@
 // like 32-bit ints
 //
 	.syntax unified
-	.align 2
+	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__unordsf2vfp)
 	vmov	s14, r0     // move from GPR 0 to float register
 	vmov	s15, r1	    // move from GPR 1 to float register
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/clear_cache.c
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/clear_cache.c
@ -17,6 +17,14 @@
  #include <machine/sysarch.h>
 #endif

+#if defined(ANDROID) && defined(__mips__)
+  #include <sys/cachectl.h>
+#endif
+
+#if defined(ANDROID) && defined(__arm__)
+  #include <asm/unistd.h>
+#endif
+
 /*
 * The compiler generates calls to __clear_cache() when creating 
 * trampoline functions on the stack for use with nested functions.
@ -24,7 +32,7 @@
 * specified range.
 */

-COMPILER_RT_ABI void
+COMPILER_RT_EXPORT void
 __clear_cache(void* start, void* end)
 {
 #if __i386__ || __x86_64__
@ -32,13 +40,31 @@ __clear_cache(void* start, void* end)
 * Intel processors have a unified instruction and data cache
 * so there is nothing to do
 */
-#elif defined(__NetBSD__) && defined(__arm__)
-  struct arm_sync_icache_args arg;
+#elif defined(__arm__) && !defined(__APPLE__)
+    #if defined(__NetBSD__)
+        struct arm_sync_icache_args arg;

-  arg.addr = (uintptr_t)start;
-  arg.len = (uintptr_t)end - (uintptr_t)start;
+        arg.addr = (uintptr_t)start;
+        arg.len = (uintptr_t)end - (uintptr_t)start;

-  sysarch(ARM_SYNC_ICACHE, &arg);
+        sysarch(ARM_SYNC_ICACHE, &arg);
+    #elif defined(ANDROID)
+         const register int start_reg __asm("r0") = (int) (intptr_t) start;
+         const register int end_reg __asm("r1") = (int) (intptr_t) end;
+         const register int flags __asm("r2") = 0;
+         const register int syscall_nr __asm("r7") = __ARM_NR_cacheflush;
+        __asm __volatile("svc 0x0" : "=r"(start_reg)
+            : "r"(syscall_nr), "r"(start_reg), "r"(end_reg), "r"(flags) : "r0");
+         if (start_reg != 0) {
+             compilerrt_abort();
+         }
+    #else
+        compilerrt_abort();
+    #endif
+#elif defined(ANDROID) && defined(__mips__)
+  const uintptr_t start_int = (uintptr_t) start;
+  const uintptr_t end_int = (uintptr_t) end;
+  _flush_cache(start, (end_int - start_int), BCACHE);
 #elif defined(__aarch64__) && !defined(__APPLE__)
  uint64_t xstart = (uint64_t)(uintptr_t) start;
  uint64_t xend = (uint64_t)(uintptr_t) end;
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/comparetf2.c
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/comparetf2.c
@ -0,0 +1,133 @@
+//===-- lib/comparetf2.c - Quad-precision comparisons -------------*- C -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// // This file implements the following soft-float comparison routines:
+//
+//   __eqtf2   __getf2   __unordtf2
+//   __letf2   __gttf2
+//   __lttf2
+//   __netf2
+//
+// The semantics of the routines grouped in each column are identical, so there
+// is a single implementation for each, and wrappers to provide the other names.
+//
+// The main routines behave as follows:
+//
+//   __letf2(a,b) returns -1 if a < b
+//                         0 if a == b
+//                         1 if a > b
+//                         1 if either a or b is NaN
+//
+//   __getf2(a,b) returns -1 if a < b
+//                         0 if a == b
+//                         1 if a > b
+//                        -1 if either a or b is NaN
+//
+//   __unordtf2(a,b) returns 0 if both a and b are numbers
+//                           1 if either a or b is NaN
+//
+// Note that __letf2( ) and __getf2( ) are identical except in their handling of
+// NaN values.
+//
+//===----------------------------------------------------------------------===//
+
+#define QUAD_PRECISION
+#include "fp_lib.h"
+
+#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT)
+enum LE_RESULT {
+    LE_LESS      = -1,
+    LE_EQUAL     =  0,
+    LE_GREATER   =  1,
+    LE_UNORDERED =  1
+};
+
+COMPILER_RT_ABI enum LE_RESULT __letf2(fp_t a, fp_t b) {
+
+    const srep_t aInt = toRep(a);
+    const srep_t bInt = toRep(b);
+    const rep_t aAbs = aInt & absMask;
+    const rep_t bAbs = bInt & absMask;
+
+    // If either a or b is NaN, they are unordered.
+    if (aAbs > infRep || bAbs > infRep) return LE_UNORDERED;
+
+    // If a and b are both zeros, they are equal.
+    if ((aAbs | bAbs) == 0) return LE_EQUAL;
+
+    // If at least one of a and b is positive, we get the same result comparing
+    // a and b as signed integers as we would with a floating-point compare.
+    if ((aInt & bInt) >= 0) {
+        if (aInt < bInt) return LE_LESS;
+        else if (aInt == bInt) return LE_EQUAL;
+        else return LE_GREATER;
+    }
+    else {
+        // Otherwise, both are negative, so we need to flip the sense of the
+        // comparison to get the correct result.  (This assumes a twos- or ones-
+        // complement integer representation; if integers are represented in a
+        // sign-magnitude representation, then this flip is incorrect).
+        if (aInt > bInt) return LE_LESS;
+        else if (aInt == bInt) return LE_EQUAL;
+        else return LE_GREATER;
+    }
+}
+
+enum GE_RESULT {
+    GE_LESS      = -1,
+    GE_EQUAL     =  0,
+    GE_GREATER   =  1,
+    GE_UNORDERED = -1   // Note: different from LE_UNORDERED
+};
+
+COMPILER_RT_ABI enum GE_RESULT __getf2(fp_t a, fp_t b) {
+
+    const srep_t aInt = toRep(a);
+    const srep_t bInt = toRep(b);
+    const rep_t aAbs = aInt & absMask;
+    const rep_t bAbs = bInt & absMask;
+
+    if (aAbs > infRep || bAbs > infRep) return GE_UNORDERED;
+    if ((aAbs | bAbs) == 0) return GE_EQUAL;
+    if ((aInt & bInt) >= 0) {
+        if (aInt < bInt) return GE_LESS;
+        else if (aInt == bInt) return GE_EQUAL;
+        else return GE_GREATER;
+    } else {
+        if (aInt > bInt) return GE_LESS;
+        else if (aInt == bInt) return GE_EQUAL;
+        else return GE_GREATER;
+    }
+}
+
+COMPILER_RT_ABI int __unordtf2(fp_t a, fp_t b) {
+    const rep_t aAbs = toRep(a) & absMask;
+    const rep_t bAbs = toRep(b) & absMask;
+    return aAbs > infRep || bAbs > infRep;
+}
+
+// The following are alternative names for the preceeding routines.
+
+COMPILER_RT_ABI enum LE_RESULT __eqtf2(fp_t a, fp_t b) {
+    return __letf2(a, b);
+}
+
+COMPILER_RT_ABI enum LE_RESULT __lttf2(fp_t a, fp_t b) {
+    return __letf2(a, b);
+}
+
+COMPILER_RT_ABI enum LE_RESULT __netf2(fp_t a, fp_t b) {
+    return __letf2(a, b);
+}
+
+COMPILER_RT_ABI enum GE_RESULT __gttf2(fp_t a, fp_t b) {
+    return __getf2(a, b);
+}
+
+#endif
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/fp_lib.h
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/fp_lib.h
@ -12,9 +12,9 @@
 // many useful constants and utility routines that are used in the
 // implementation of the soft-float routines in compiler-rt.
 //
-// Assumes that float and double correspond to the IEEE-754 binary32 and
-// binary64 types, respectively, and that integer endianness matches floating
-// point endianness on the target platform.
+// Assumes that float, double and long double correspond to the IEEE-754
+// binary32, binary64 and binary 128 types, respectively, and that integer
+// endianness matches floating point endianness on the target platform.
 //
 //===----------------------------------------------------------------------===//

@ -44,6 +44,7 @@ static inline void wideMultiply(rep_t a, rep_t b, rep_t *hi, rep_t *lo) {
    *hi = product >> 32;
    *lo = product;
 }
+COMPILER_RT_ABI fp_t __addsf3(fp_t a, fp_t b);

 #elif defined DOUBLE_PRECISION

@ -59,7 +60,7 @@ static inline int rep_clz(rep_t a) {
 #else
    if (a & REP_C(0xffffffff00000000))
        return __builtin_clz(a >> 32);
-    else 
+    else
        return 32 + __builtin_clz(a & REP_C(0xffffffff));
 #endif
 }
@ -86,10 +87,120 @@ static inline void wideMultiply(rep_t a, rep_t b, rep_t *hi, rep_t *lo) {
 #undef loWord
 #undef hiWord

+COMPILER_RT_ABI fp_t __adddf3(fp_t a, fp_t b);
+
+#elif defined QUAD_PRECISION
+#if __LDBL_MANT_DIG__ == 113
+#define CRT_LDBL_128BIT
+typedef __uint128_t rep_t;
+typedef __int128_t srep_t;
+typedef long double fp_t;
+#define REP_C (__uint128_t)
+// Note: Since there is no explicit way to tell compiler the constant is a
+// 128-bit integer, we let the constant be casted to 128-bit integer
+#define significandBits 112
+
+static inline int rep_clz(rep_t a) {
+    const union
+        {
+             __uint128_t ll;
+#if _YUGA_BIG_ENDIAN
+             struct { uint64_t high, low; } s;
 #else
-#error Either SINGLE_PRECISION or DOUBLE_PRECISION must be defined.
+             struct { uint64_t low, high; } s;
+#endif
+        } uu = { .ll = a };
+
+    uint64_t word;
+    uint64_t add;
+
+    if (uu.s.high){
+        word = uu.s.high;
+        add = 0;
+    }
+    else{
+        word = uu.s.low;
+        add = 64;
+    }
+    return __builtin_clzll(word) + add;
+}
+
+#define Word_LoMask   UINT64_C(0x00000000ffffffff)
+#define Word_HiMask   UINT64_C(0xffffffff00000000)
+#define Word_FullMask UINT64_C(0xffffffffffffffff)
+#define Word_1(a) (uint64_t)((a >> 96) & Word_LoMask)
+#define Word_2(a) (uint64_t)((a >> 64) & Word_LoMask)
+#define Word_3(a) (uint64_t)((a >> 32) & Word_LoMask)
+#define Word_4(a) (uint64_t)(a & Word_LoMask)
+
+// 128x128 -> 256 wide multiply for platforms that don't have such an operation;
+// many 64-bit platforms have this operation, but they tend to have hardware
+// floating-point, so we don't bother with a special case for them here.
+static inline void wideMultiply(rep_t a, rep_t b, rep_t *hi, rep_t *lo) {
+
+    const uint64_t product11 = Word_1(a) * Word_1(b);
+    const uint64_t product12 = Word_1(a) * Word_2(b);
+    const uint64_t product13 = Word_1(a) * Word_3(b);
+    const uint64_t product14 = Word_1(a) * Word_4(b);
+    const uint64_t product21 = Word_2(a) * Word_1(b);
+    const uint64_t product22 = Word_2(a) * Word_2(b);
+    const uint64_t product23 = Word_2(a) * Word_3(b);
+    const uint64_t product24 = Word_2(a) * Word_4(b);
+    const uint64_t product31 = Word_3(a) * Word_1(b);
+    const uint64_t product32 = Word_3(a) * Word_2(b);
+    const uint64_t product33 = Word_3(a) * Word_3(b);
+    const uint64_t product34 = Word_3(a) * Word_4(b);
+    const uint64_t product41 = Word_4(a) * Word_1(b);
+    const uint64_t product42 = Word_4(a) * Word_2(b);
+    const uint64_t product43 = Word_4(a) * Word_3(b);
+    const uint64_t product44 = Word_4(a) * Word_4(b);
+
+    const __uint128_t sum0 = (__uint128_t)product44;
+    const __uint128_t sum1 = (__uint128_t)product34 +
+                             (__uint128_t)product43;
+    const __uint128_t sum2 = (__uint128_t)product24 +
+                             (__uint128_t)product33 +
+                             (__uint128_t)product42;
+    const __uint128_t sum3 = (__uint128_t)product14 +
+                             (__uint128_t)product23 +
+                             (__uint128_t)product32 +
+                             (__uint128_t)product41;
+    const __uint128_t sum4 = (__uint128_t)product13 +
+                             (__uint128_t)product22 +
+                             (__uint128_t)product31;
+    const __uint128_t sum5 = (__uint128_t)product12 +
+                             (__uint128_t)product21;
+    const __uint128_t sum6 = (__uint128_t)product11;
+
+    const __uint128_t r0 = (sum0 & Word_FullMask) +
+                           ((sum1 & Word_LoMask) << 32);
+    const __uint128_t r1 = (sum0 >> 64) +
+                           ((sum1 >> 32) & Word_FullMask) +
+                           (sum2 & Word_FullMask) +
+                           ((sum3 << 32) & Word_HiMask);
+
+    *lo = r0 + (r1 << 64);
+    *hi = (r1 >> 64) +
+          (sum1 >> 96) +
+          (sum2 >> 64) +
+          (sum3 >> 32) +
+          sum4 +
+          (sum5 << 32) +
+          (sum6 << 64);
+}
+#undef Word_1
+#undef Word_2
+#undef Word_3
+#undef Word_4
+#undef Word_HiMask
+#undef Word_LoMask
+#undef Word_FullMask
+#endif // __LDBL_MANT_DIG__ == 113
+#else
+#error SINGLE_PRECISION, DOUBLE_PRECISION or QUAD_PRECISION must be defined.
 #endif

+#if defined(SINGLE_PRECISION) || defined(DOUBLE_PRECISION) || defined(CRT_LDBL_128BIT)
 #define typeWidth       (sizeof(rep_t)*CHAR_BIT)
 #define exponentBits    (typeWidth - significandBits - 1)
 #define maxExponent     ((1 << exponentBits) - 1)
@ -142,8 +253,6 @@ static inline void wideRightShiftWithSticky(rep_t *hi, rep_t *lo, unsigned int c
        *hi = 0;
    }
 }
-
-COMPILER_RT_ABI fp_t __adddf3(fp_t a, fp_t b);
-COMPILER_RT_ABI fp_t __addsf3(fp_t a, fp_t b);
+#endif

 #endif // FP_LIB_HEADER
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/i386/ashldi3.S
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/i386/ashldi3.S
@ -16,7 +16,7 @@
 #ifdef __SSE2__

 .text
-.align 4
+.balign 4
 DEFINE_COMPILERRT_FUNCTION(__ashldi3)
 	movd	  12(%esp),		%xmm2	// Load count
 #ifndef TRUST_CALLERS_USE_64_BIT_STORES
@ -36,7 +36,7 @@ END_COMPILERRT_FUNCTION(__ashldi3)
 #else // Use GPRs instead of SSE2 instructions, if they aren't available.

 .text
-.align 4
+.balign 4
 DEFINE_COMPILERRT_FUNCTION(__ashldi3)
 	movl	  12(%esp),		%ecx	// Load count
 	movl	   8(%esp),		%edx	// Load high
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/i386/ashrdi3.S
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/i386/ashrdi3.S
@ -9,7 +9,7 @@
 #ifdef __SSE2__

 .text
-.align 4
+.balign 4
 DEFINE_COMPILERRT_FUNCTION(__ashrdi3)
 	movd	  12(%esp),		%xmm2	// Load count
 	movl	   8(%esp),		%eax
@ -46,7 +46,7 @@ END_COMPILERRT_FUNCTION(__ashrdi3)
 #else // Use GPRs instead of SSE2 instructions, if they aren't available.

 .text
-.align 4
+.balign 4
 DEFINE_COMPILERRT_FUNCTION(__ashrdi3)
 	movl	  12(%esp),		%ecx	// Load count
 	movl	   8(%esp),		%edx	// Load high
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/i386/divdi3.S
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/i386/divdi3.S
@ -19,7 +19,7 @@
 #ifdef __i386__

 .text
-.align 4
+.balign 4
 DEFINE_COMPILERRT_FUNCTION(__divdi3)

 /* This is currently implemented by wrapping the unsigned divide up in an absolute
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/i386/floatdidf.S
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/i386/floatdidf.S
@ -10,14 +10,14 @@
 #ifndef __ELF__
 .const
 #endif
-.align 4
+.balign 4
 twop52: .quad 0x4330000000000000
 twop32: .quad 0x41f0000000000000

 #define REL_ADDR(_a)	(_a)-0b(%eax)

 .text
-.align 4
+.balign 4
 DEFINE_COMPILERRT_FUNCTION(__floatdidf)
 	cvtsi2sd	8(%esp),			%xmm1
 	movss		4(%esp),			%xmm0 // low 32 bits of a
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/i386/floatdisf.S
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/i386/floatdisf.S
@ -15,7 +15,7 @@
 #ifdef __i386__

 .text
-.align 4
+.balign 4
 DEFINE_COMPILERRT_FUNCTION(__floatdisf)
 #ifndef TRUST_CALLERS_USE_64_BIT_STORES
 	movd		4(%esp),	%xmm0
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/i386/floatdixf.S
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/i386/floatdixf.S
@ -15,7 +15,7 @@
 // It can be turned off by defining the TRUST_CALLERS_USE_64_BIT_STORES macro.

 .text
-.align 4
+.balign 4
 DEFINE_COMPILERRT_FUNCTION(__floatdixf)
 #ifndef TRUST_CALLERS_USE_64_BIT_STORES
 	movd		4(%esp),	%xmm0
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/i386/floatundidf.S
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/i386/floatundidf.S
@ -20,7 +20,7 @@
 #ifndef __ELF__
 .const
 #endif
-.align 4
+.balign 4
 twop52: .quad 0x4330000000000000
 twop84_plus_twop52:
 		.quad 0x4530000000100000
@ -29,7 +29,7 @@ twop84: .quad 0x4530000000000000
 #define REL_ADDR(_a)	(_a)-0b(%eax)

 .text
-.align 4
+.balign 4
 DEFINE_COMPILERRT_FUNCTION(__floatundidf)
 	movss	8(%esp),						%xmm1 // high 32 bits of a
 	movss	4(%esp),						%xmm0 // low 32 bits of a
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/i386/floatundisf.S
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/i386/floatundisf.S
@ -19,7 +19,7 @@
 #ifdef __i386__

 .const
-.align 3
+.balign 3

 		.quad	0x43f0000000000000
 twop64:	.quad	0x0000000000000000
@ -27,7 +27,7 @@ twop64:	.quad	0x0000000000000000
 #define			TWOp64			twop64-0b(%ecx,%eax,8)

 .text
-.align 4
+.balign 4
 DEFINE_COMPILERRT_FUNCTION(__floatundisf)
 	movl		8(%esp),		%eax
 	movd		8(%esp),		%xmm1
@ -54,10 +54,8 @@ END_COMPILERRT_FUNCTION(__floatundisf)

 #ifndef __ELF__
 .const
-.align 3
-#else
-.align 8
 #endif
+.balign 8
 twop52: .quad 0x4330000000000000
 		.quad 0x0000000000000fff
 sticky: .quad 0x0000000000000000
@ -68,7 +66,7 @@ twelve:	.long 0x00000000
 #define			STICKY			sticky-0b(%ecx,%eax,8)

 .text
-.align 4
+.balign 4
 DEFINE_COMPILERRT_FUNCTION(__floatundisf)
 	movl		8(%esp),		%eax
 	movd		8(%esp),		%xmm1
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/i386/floatundixf.S
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/i386/floatundixf.S
@ -10,7 +10,7 @@
 #ifndef __ELF__
 .const
 #endif
-.align 4
+.balign 4
 twop52: .quad 0x4330000000000000
 twop84_plus_twop52_neg:
 		.quad 0xc530000000100000
@ -19,7 +19,7 @@ twop84: .quad 0x4530000000000000
 #define REL_ADDR(_a)	(_a)-0b(%eax)

 .text
-.align 4
+.balign 4
 DEFINE_COMPILERRT_FUNCTION(__floatundixf)
 	calll	0f
 0:	popl	%eax
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/i386/lshrdi3.S
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/i386/lshrdi3.S
@ -16,7 +16,7 @@
 #ifdef __SSE2__

 .text
-.align 4
+.balign 4
 DEFINE_COMPILERRT_FUNCTION(__lshrdi3)
 	movd	  12(%esp),		%xmm2	// Load count
 #ifndef TRUST_CALLERS_USE_64_BIT_STORES
@ -36,7 +36,7 @@ END_COMPILERRT_FUNCTION(__lshrdi3)
 #else // Use GPRs instead of SSE2 instructions, if they aren't available.

 .text
-.align 4
+.balign 4
 DEFINE_COMPILERRT_FUNCTION(__lshrdi3)
 	movl	  12(%esp),		%ecx	// Load count
 	movl	   8(%esp),		%edx	// Load high
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/i386/moddi3.S
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/i386/moddi3.S
@ -20,7 +20,7 @@
 #ifdef __i386__

 .text
-.align 4
+.balign 4
 DEFINE_COMPILERRT_FUNCTION(__moddi3)

 /* This is currently implemented by wrapping the unsigned modulus up in an absolute
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/i386/muldi3.S
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/i386/muldi3.S
@ -8,7 +8,7 @@
 #ifdef __i386__

 .text
-.align 4
+.balign 4
 DEFINE_COMPILERRT_FUNCTION(__muldi3)
 	pushl	%ebx
 	movl  16(%esp),		%eax	// b.lo
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/i386/udivdi3.S
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/i386/udivdi3.S
@ -19,7 +19,7 @@
 #ifdef __i386__

 .text
-.align 4
+.balign 4
 DEFINE_COMPILERRT_FUNCTION(__udivdi3)

 	pushl		%ebx
--- a/sys/external/bsd/compiler_rt/dist/lib/builtins/i386/umoddi3.S
+++ b/sys/external/bsd/compiler_rt/dist/lib/builtins/i386/umoddi3.S
@ -20,7 +20,7 @@
 #ifdef __i386__

 .text
-.align 4
+.balign 4
 DEFINE_COMPILERRT_FUNCTION(__umoddi3)

 	pushl		%ebx
--- a/Show More
+++ b/Show More