diff --git a/wolfcrypt/src/aes.c b/wolfcrypt/src/aes.c
index e865a9ad3..9ed8cebd9 100644
--- a/wolfcrypt/src/aes.c
+++ b/wolfcrypt/src/aes.c
@@ -608,10 +608,12 @@
         #define AESNI_ALIGN 16
     #endif
 
-    #ifndef _MSC_VER
-        #define XASM_LINK(f) asm(f)
-    #else
+    #ifdef _MSC_VER
         #define XASM_LINK(f)
+    #elif defined(__APPLE__)
+        #define XASM_LINK(f) asm("_" f)
+    #else
+        #define XASM_LINK(f) asm(f)
     #endif /* _MSC_VER */
 
     static int checkAESNI = 0;
diff --git a/wolfcrypt/src/aes_asm.S b/wolfcrypt/src/aes_asm.S
index cff57db9d..6fff275b4 100644
--- a/wolfcrypt/src/aes_asm.S
+++ b/wolfcrypt/src/aes_asm.S
@@ -36,8 +36,13 @@ AES_CBC_encrypt (const unsigned char *in,
 	const unsigned char *KS,
 	int nr)
 */
+#ifndef __APPLE__
 .globl AES_CBC_encrypt
 AES_CBC_encrypt:
+#else
+.globl _AES_CBC_encrypt
+_AES_CBC_encrypt:
+#endif
 # parameter 1: %rdi
 # parameter 2: %rsi
 # parameter 3: %rdx
@@ -96,8 +101,13 @@ AES_CBC_decrypt_by4 (const unsigned char *in,
   const unsigned char *KS,
   int nr)
 */
+#ifndef __APPLE__
 .globl AES_CBC_decrypt_by4
 AES_CBC_decrypt_by4:
+#else
+.globl _AES_CBC_decrypt_by4
+_AES_CBC_decrypt_by4:
+#endif
 # parameter 1: %rdi
 # parameter 2: %rsi
 # parameter 3: %rdx
@@ -272,8 +282,13 @@ AES_CBC_decrypt_by6 (const unsigned char *in,
   const unsigned char *KS,
   int nr)
 */
+#ifndef __APPLE__
 .globl AES_CBC_decrypt_by6
 AES_CBC_decrypt_by6:
+#else
+.globl _AES_CBC_decrypt_by6
+_AES_CBC_decrypt_by6:
+#endif
 # parameter 1: %rdi - in
 # parameter 2: %rsi - out
 # parameter 3: %rdx - ivec
@@ -495,8 +510,13 @@ AES_CBC_decrypt_by8 (const unsigned char *in,
   const unsigned char *KS,
   int nr)
 */
+#ifndef __APPLE__
 .globl AES_CBC_decrypt_by8
 AES_CBC_decrypt_by8:
+#else
+.globl _AES_CBC_decrypt_by8
+_AES_CBC_decrypt_by8:
+#endif
 # parameter 1: %rdi - in
 # parameter 2: %rsi - out
 # parameter 3: %rdx - ivec
@@ -746,8 +766,13 @@ AES_ECB_encrypt (const unsigned char *in,
 	const unsigned char *KS,
 	int nr)
 */
+#ifndef __APPLE__
 .globl AES_ECB_encrypt
 AES_ECB_encrypt:
+#else
+.globl _AES_ECB_encrypt
+_AES_ECB_encrypt:
+#endif
 # parameter 1: %rdi
 # parameter 2: %rsi
 # parameter 3: %rdx
@@ -905,8 +930,13 @@ AES_ECB_decrypt (const unsigned char *in,
   const unsigned char *KS,
   int nr)
 */
+#ifndef __APPLE__
 .globl AES_ECB_decrypt
 AES_ECB_decrypt:
+#else
+.globl _AES_ECB_decrypt
+_AES_ECB_decrypt:
+#endif
 # parameter 1: %rdi
 # parameter 2: %rsi
 # parameter 3: %rdx
@@ -1065,8 +1095,13 @@ void AES_128_Key_Expansion(const unsigned char* userkey,
    unsigned char* key_schedule);
 */
 .align  16,0x90
+#ifndef __APPLE__
 .globl AES_128_Key_Expansion
 AES_128_Key_Expansion:
+#else
+.globl _AES_128_Key_Expansion
+_AES_128_Key_Expansion:
+#endif
 # parameter 1: %rdi
 # parameter 2: %rsi
 movl    $10, 240(%rsi)
@@ -1125,8 +1160,13 @@ ret
 void AES_192_Key_Expansion (const unsigned char *userkey,
   unsigned char *key)
 */
+#ifndef __APPLE__
 .globl AES_192_Key_Expansion
 AES_192_Key_Expansion:
+#else
+.globl _AES_192_Key_Expansion
+_AES_192_Key_Expansion:
+#endif
 # parameter 1: %rdi
 # parameter 2: %rsi
 
@@ -1211,8 +1251,13 @@ ret
 void AES_256_Key_Expansion (const unsigned char *userkey,
   unsigned char *key)
 */
+#ifndef __APPLE__
 .globl AES_256_Key_Expansion
 AES_256_Key_Expansion:
+#else
+.globl _AES_256_Key_Expansion
+_AES_256_Key_Expansion:
+#endif
 # parameter 1: %rdi
 # parameter 2: %rsi
 
diff --git a/wolfcrypt/src/aes_gcm_asm.S b/wolfcrypt/src/aes_gcm_asm.S
index f6f9f3274..19e3f7db8 100644
--- a/wolfcrypt/src/aes_gcm_asm.S
+++ b/wolfcrypt/src/aes_gcm_asm.S
@@ -26,43 +26,150 @@
 #define HAVE_INTEL_AVX2
 #endif /* HAVE_INTEL_AVX2 */
 
-.align 16
+#ifndef __APPLE__
+.data
+#else
+.section	__DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align	16
+#else
+.p2align	4
+#endif /* __APPLE__ */
 L_aes_gcm_one:
 .quad	0x0, 0x1
-.align 16
+#ifndef __APPLE__
+.data
+#else
+.section	__DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align	16
+#else
+.p2align	4
+#endif /* __APPLE__ */
 L_aes_gcm_two:
 .quad	0x0, 0x2
-.align 16
+#ifndef __APPLE__
+.data
+#else
+.section	__DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align	16
+#else
+.p2align	4
+#endif /* __APPLE__ */
 L_aes_gcm_three:
 .quad	0x0, 0x3
-.align 16
+#ifndef __APPLE__
+.data
+#else
+.section	__DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align	16
+#else
+.p2align	4
+#endif /* __APPLE__ */
 L_aes_gcm_four:
 .quad	0x0, 0x4
-.align 16
+#ifndef __APPLE__
+.data
+#else
+.section	__DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align	16
+#else
+.p2align	4
+#endif /* __APPLE__ */
 L_aes_gcm_five:
 .quad	0x0, 0x5
-.align 16
+#ifndef __APPLE__
+.data
+#else
+.section	__DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align	16
+#else
+.p2align	4
+#endif /* __APPLE__ */
 L_aes_gcm_six:
 .quad	0x0, 0x6
-.align 16
+#ifndef __APPLE__
+.data
+#else
+.section	__DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align	16
+#else
+.p2align	4
+#endif /* __APPLE__ */
 L_aes_gcm_seven:
 .quad	0x0, 0x7
-.align 16
+#ifndef __APPLE__
+.data
+#else
+.section	__DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align	16
+#else
+.p2align	4
+#endif /* __APPLE__ */
 L_aes_gcm_eight:
 .quad	0x0, 0x8
-.align 16
+#ifndef __APPLE__
+.data
+#else
+.section	__DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align	16
+#else
+.p2align	4
+#endif /* __APPLE__ */
 L_aes_gcm_bswap_epi64:
 .quad	0x1020304050607, 0x8090a0b0c0d0e0f
-.align 16
+#ifndef __APPLE__
+.data
+#else
+.section	__DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align	16
+#else
+.p2align	4
+#endif /* __APPLE__ */
 L_aes_gcm_bswap_mask:
 .quad	0x8090a0b0c0d0e0f, 0x1020304050607
-.align 16
+#ifndef __APPLE__
+.data
+#else
+.section	__DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align	16
+#else
+.p2align	4
+#endif /* __APPLE__ */
 L_aes_gcm_mod2_128:
 .quad	0x1, 0xc200000000000000
+#ifndef __APPLE__
+.text
 .globl	AES_GCM_encrypt
 .type	AES_GCM_encrypt,@function
 .align	4
 AES_GCM_encrypt:
+#else
+.section	__TEXT,__text
+.globl	_AES_GCM_encrypt
+.p2align	2
+_AES_GCM_encrypt:
+#endif /* __APPLE__ */
         pushq	%r13
         pushq	%r12
         pushq	%rbx
@@ -70,11 +177,11 @@ AES_GCM_encrypt:
         pushq	%r15
         movq	%rdx, %r12
         movq	%rcx, %rax
-        movq	48(%rsp), %r11
-        movq	56(%rsp), %rbx
-        movq	64(%rsp), %r14
+        movl	48(%rsp), %r11d
+        movl	56(%rsp), %ebx
+        movl	64(%rsp), %r14d
         movq	72(%rsp), %r15
-        movq	80(%rsp), %r10
+        movl	80(%rsp), %r10d
         subq	$0xa0, %rsp
         pxor	%xmm4, %xmm4
         pxor	%xmm6, %xmm6
@@ -1902,11 +2009,21 @@ L_AES_GCM_encrypt_store_tag_done:
         popq	%r12
         popq	%r13
         repz retq
+#ifndef __APPLE__
 .size	AES_GCM_encrypt,.-AES_GCM_encrypt
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.text
 .globl	AES_GCM_decrypt
 .type	AES_GCM_decrypt,@function
 .align	4
 AES_GCM_decrypt:
+#else
+.section	__TEXT,__text
+.globl	_AES_GCM_decrypt
+.p2align	2
+_AES_GCM_decrypt:
+#endif /* __APPLE__ */
         pushq	%r13
         pushq	%r12
         pushq	%rbx
@@ -1915,11 +2032,11 @@ AES_GCM_decrypt:
         pushq	%rbp
         movq	%rdx, %r12
         movq	%rcx, %rax
-        movq	56(%rsp), %r11
-        movq	64(%rsp), %rbx
-        movq	72(%rsp), %r14
+        movl	56(%rsp), %r11d
+        movl	64(%rsp), %ebx
+        movl	72(%rsp), %r14d
         movq	80(%rsp), %r15
-        movq	88(%rsp), %r10
+        movl	88(%rsp), %r10d
         movq	96(%rsp), %rbp
         subq	$0xa8, %rsp
         pxor	%xmm4, %xmm4
@@ -3298,45 +3415,154 @@ L_AES_GCM_decrypt_cmp_tag_done:
         popq	%r12
         popq	%r13
         repz retq
+#ifndef __APPLE__
 .size	AES_GCM_decrypt,.-AES_GCM_decrypt
+#endif /* __APPLE__ */
 #ifdef HAVE_INTEL_AVX1
-.align 16
+#ifndef __APPLE__
+.data
+#else
+.section	__DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align	16
+#else
+.p2align	4
+#endif /* __APPLE__ */
 L_avx1_aes_gcm_one:
 .quad	0x0, 0x1
-.align 16
+#ifndef __APPLE__
+.data
+#else
+.section	__DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align	16
+#else
+.p2align	4
+#endif /* __APPLE__ */
 L_avx1_aes_gcm_two:
 .quad	0x0, 0x2
-.align 16
+#ifndef __APPLE__
+.data
+#else
+.section	__DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align	16
+#else
+.p2align	4
+#endif /* __APPLE__ */
 L_avx1_aes_gcm_three:
 .quad	0x0, 0x3
-.align 16
+#ifndef __APPLE__
+.data
+#else
+.section	__DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align	16
+#else
+.p2align	4
+#endif /* __APPLE__ */
 L_avx1_aes_gcm_four:
 .quad	0x0, 0x4
-.align 16
+#ifndef __APPLE__
+.data
+#else
+.section	__DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align	16
+#else
+.p2align	4
+#endif /* __APPLE__ */
 L_avx1_aes_gcm_five:
 .quad	0x0, 0x5
-.align 16
+#ifndef __APPLE__
+.data
+#else
+.section	__DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align	16
+#else
+.p2align	4
+#endif /* __APPLE__ */
 L_avx1_aes_gcm_six:
 .quad	0x0, 0x6
-.align 16
+#ifndef __APPLE__
+.data
+#else
+.section	__DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align	16
+#else
+.p2align	4
+#endif /* __APPLE__ */
 L_avx1_aes_gcm_seven:
 .quad	0x0, 0x7
-.align 16
+#ifndef __APPLE__
+.data
+#else
+.section	__DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align	16
+#else
+.p2align	4
+#endif /* __APPLE__ */
 L_avx1_aes_gcm_eight:
 .quad	0x0, 0x8
-.align 16
+#ifndef __APPLE__
+.data
+#else
+.section	__DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align	16
+#else
+.p2align	4
+#endif /* __APPLE__ */
 L_avx1_aes_gcm_bswap_epi64:
 .quad	0x1020304050607, 0x8090a0b0c0d0e0f
-.align 16
+#ifndef __APPLE__
+.data
+#else
+.section	__DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align	16
+#else
+.p2align	4
+#endif /* __APPLE__ */
 L_avx1_aes_gcm_bswap_mask:
 .quad	0x8090a0b0c0d0e0f, 0x1020304050607
-.align 16
+#ifndef __APPLE__
+.data
+#else
+.section	__DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align	16
+#else
+.p2align	4
+#endif /* __APPLE__ */
 L_avx1_aes_gcm_mod2_128:
 .quad	0x1, 0xc200000000000000
+#ifndef __APPLE__
+.text
 .globl	AES_GCM_encrypt_avx1
 .type	AES_GCM_encrypt_avx1,@function
 .align	4
 AES_GCM_encrypt_avx1:
+#else
+.section	__TEXT,__text
+.globl	_AES_GCM_encrypt_avx1
+.p2align	2
+_AES_GCM_encrypt_avx1:
+#endif /* __APPLE__ */
         pushq	%r13
         pushq	%r12
         pushq	%rbx
@@ -3344,11 +3570,11 @@ AES_GCM_encrypt_avx1:
         pushq	%r15
         movq	%rdx, %r12
         movq	%rcx, %rax
-        movq	48(%rsp), %r11
-        movq	56(%rsp), %rbx
-        movq	64(%rsp), %r14
+        movl	48(%rsp), %r11d
+        movl	56(%rsp), %ebx
+        movl	64(%rsp), %r14d
         movq	72(%rsp), %r15
-        movq	80(%rsp), %r10
+        movl	80(%rsp), %r10d
         subq	$0xa0, %rsp
         vpxor	%xmm4, %xmm4, %xmm4
         vpxor	%xmm6, %xmm6, %xmm6
@@ -4902,11 +5128,21 @@ L_AES_GCM_encrypt_avx1_store_tag_done:
         popq	%r12
         popq	%r13
         repz retq
+#ifndef __APPLE__
 .size	AES_GCM_encrypt_avx1,.-AES_GCM_encrypt_avx1
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.text
 .globl	AES_GCM_decrypt_avx1
 .type	AES_GCM_decrypt_avx1,@function
 .align	4
 AES_GCM_decrypt_avx1:
+#else
+.section	__TEXT,__text
+.globl	_AES_GCM_decrypt_avx1
+.p2align	2
+_AES_GCM_decrypt_avx1:
+#endif /* __APPLE__ */
         pushq	%r13
         pushq	%r12
         pushq	%rbx
@@ -4915,11 +5151,11 @@ AES_GCM_decrypt_avx1:
         pushq	%rbp
         movq	%rdx, %r12
         movq	%rcx, %rax
-        movq	56(%rsp), %r11
-        movq	64(%rsp), %rbx
-        movq	72(%rsp), %r14
+        movl	56(%rsp), %r11d
+        movl	64(%rsp), %ebx
+        movl	72(%rsp), %r14d
         movq	80(%rsp), %r15
-        movq	88(%rsp), %r10
+        movl	88(%rsp), %r10d
         movq	96(%rsp), %rbp
         subq	$0xa8, %rsp
         vpxor	%xmm4, %xmm4, %xmm4
@@ -6066,49 +6302,167 @@ L_AES_GCM_decrypt_avx1_cmp_tag_done:
         popq	%r12
         popq	%r13
         repz retq
+#ifndef __APPLE__
 .size	AES_GCM_decrypt_avx1,.-AES_GCM_decrypt_avx1
+#endif /* __APPLE__ */
 #endif /* HAVE_INTEL_AVX1 */
 #ifdef HAVE_INTEL_AVX2
-.align 16
+#ifndef __APPLE__
+.data
+#else
+.section	__DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align	16
+#else
+.p2align	4
+#endif /* __APPLE__ */
 L_avx2_aes_gcm_one:
 .quad	0x0, 0x1
-.align 16
+#ifndef __APPLE__
+.data
+#else
+.section	__DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align	16
+#else
+.p2align	4
+#endif /* __APPLE__ */
 L_avx2_aes_gcm_two:
 .quad	0x0, 0x2
-.align 16
+#ifndef __APPLE__
+.data
+#else
+.section	__DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align	16
+#else
+.p2align	4
+#endif /* __APPLE__ */
 L_avx2_aes_gcm_three:
 .quad	0x0, 0x3
-.align 16
+#ifndef __APPLE__
+.data
+#else
+.section	__DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align	16
+#else
+.p2align	4
+#endif /* __APPLE__ */
 L_avx2_aes_gcm_four:
 .quad	0x0, 0x4
-.align 16
+#ifndef __APPLE__
+.data
+#else
+.section	__DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align	16
+#else
+.p2align	4
+#endif /* __APPLE__ */
 L_avx2_aes_gcm_five:
 .quad	0x0, 0x5
-.align 16
+#ifndef __APPLE__
+.data
+#else
+.section	__DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align	16
+#else
+.p2align	4
+#endif /* __APPLE__ */
 L_avx2_aes_gcm_six:
 .quad	0x0, 0x6
-.align 16
+#ifndef __APPLE__
+.data
+#else
+.section	__DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align	16
+#else
+.p2align	4
+#endif /* __APPLE__ */
 L_avx2_aes_gcm_seven:
 .quad	0x0, 0x7
-.align 16
+#ifndef __APPLE__
+.data
+#else
+.section	__DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align	16
+#else
+.p2align	4
+#endif /* __APPLE__ */
 L_avx2_aes_gcm_eight:
 .quad	0x0, 0x8
-.align 16
+#ifndef __APPLE__
+.data
+#else
+.section	__DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align	16
+#else
+.p2align	4
+#endif /* __APPLE__ */
 L_avx2_aes_gcm_bswap_one:
 .quad	0x0, 0x100000000000000
-.align 16
+#ifndef __APPLE__
+.data
+#else
+.section	__DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align	16
+#else
+.p2align	4
+#endif /* __APPLE__ */
 L_avx2_aes_gcm_bswap_epi64:
 .quad	0x1020304050607, 0x8090a0b0c0d0e0f
-.align 16
+#ifndef __APPLE__
+.data
+#else
+.section	__DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align	16
+#else
+.p2align	4
+#endif /* __APPLE__ */
 L_avx2_aes_gcm_bswap_mask:
 .quad	0x8090a0b0c0d0e0f, 0x1020304050607
-.align 16
+#ifndef __APPLE__
+.data
+#else
+.section	__DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align	16
+#else
+.p2align	4
+#endif /* __APPLE__ */
 L_avx2_aes_gcm_mod2_128:
 .quad	0x1, 0xc200000000000000
+#ifndef __APPLE__
+.text
 .globl	AES_GCM_encrypt_avx2
 .type	AES_GCM_encrypt_avx2,@function
 .align	4
 AES_GCM_encrypt_avx2:
+#else
+.section	__TEXT,__text
+.globl	_AES_GCM_encrypt_avx2
+.p2align	2
+_AES_GCM_encrypt_avx2:
+#endif /* __APPLE__ */
         pushq	%r13
         pushq	%r12
         pushq	%r15
@@ -6118,12 +6472,12 @@ AES_GCM_encrypt_avx2:
         movq	%rcx, %rax
         movq	%r8, %r15
         movq	%rsi, %r8
-        movq	%r9, %r10
-        movq	48(%rsp), %r11
-        movq	56(%rsp), %rbx
-        movq	64(%rsp), %r14
+        movl	%r9d, %r10d
+        movl	48(%rsp), %r11d
+        movl	56(%rsp), %ebx
+        movl	64(%rsp), %r14d
         movq	72(%rsp), %rsi
-        movq	80(%rsp), %r9
+        movl	80(%rsp), %r9d
         subq	$0xa0, %rsp
         vpxor	%xmm4, %xmm4, %xmm4
         vpxor	%xmm6, %xmm6, %xmm6
@@ -7402,11 +7756,21 @@ L_AES_GCM_encrypt_avx2_store_tag_done:
         popq	%r12
         popq	%r13
         repz retq
+#ifndef __APPLE__
 .size	AES_GCM_encrypt_avx2,.-AES_GCM_encrypt_avx2
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.text
 .globl	AES_GCM_decrypt_avx2
 .type	AES_GCM_decrypt_avx2,@function
 .align	4
 AES_GCM_decrypt_avx2:
+#else
+.section	__TEXT,__text
+.globl	_AES_GCM_decrypt_avx2
+.p2align	2
+_AES_GCM_decrypt_avx2:
+#endif /* __APPLE__ */
         pushq	%r13
         pushq	%r12
         pushq	%r14
@@ -7417,12 +7781,12 @@ AES_GCM_decrypt_avx2:
         movq	%rcx, %rax
         movq	%r8, %r14
         movq	%rsi, %r8
-        movq	%r9, %r10
-        movq	56(%rsp), %r11
-        movq	64(%rsp), %rbx
-        movq	72(%rsp), %r15
+        movl	%r9d, %r10d
+        movl	56(%rsp), %r11d
+        movl	64(%rsp), %ebx
+        movl	72(%rsp), %r15d
         movq	80(%rsp), %rsi
-        movq	88(%rsp), %r9
+        movl	88(%rsp), %r9d
         movq	96(%rsp), %rbp
         subq	$0xa8, %rsp
         vpxor	%xmm4, %xmm4, %xmm4
@@ -8363,5 +8727,7 @@ L_AES_GCM_decrypt_avx2_cmp_tag_done:
         popq	%r12
         popq	%r13
         repz retq
+#ifndef __APPLE__
 .size	AES_GCM_decrypt_avx2,.-AES_GCM_decrypt_avx2
+#endif /* __APPLE__ */
 #endif /* HAVE_INTEL_AVX2 */
diff --git a/wolfcrypt/src/chacha_asm.S b/wolfcrypt/src/chacha_asm.S
index 4be9cdb0f..9800ce6c3 100644
--- a/wolfcrypt/src/chacha_asm.S
+++ b/wolfcrypt/src/chacha_asm.S
@@ -26,10 +26,18 @@
 #define HAVE_INTEL_AVX2
 #endif /* HAVE_INTEL_AVX2 */
 
+#ifndef __APPLE__
+.text
 .globl	chacha_encrypt_x64
 .type	chacha_encrypt_x64,@function
 .align	4
 chacha_encrypt_x64:
+#else
+.section	__TEXT,__text
+.globl	_chacha_encrypt_x64
+.p2align	2
+_chacha_encrypt_x64:
+#endif /* __APPLE__ */
         pushq	%rbx
         pushq	%rbp
         pushq	%r12
@@ -440,24 +448,70 @@ L_chacha_x64_done:
         popq	%rbp
         popq	%rbx
         repz retq
+#ifndef __APPLE__
 .size	chacha_encrypt_x64,.-chacha_encrypt_x64
+#endif /* __APPLE__ */
 #ifdef HAVE_INTEL_AVX1
-.align 16
+#ifndef __APPLE__
+.data
+#else
+.section	__DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align	16
+#else
+.p2align	4
+#endif /* __APPLE__ */
 L_chacha20_avx1_rotl8:
 .quad	0x605040702010003, 0xe0d0c0f0a09080b
-.align 16
+#ifndef __APPLE__
+.data
+#else
+.section	__DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align	16
+#else
+.p2align	4
+#endif /* __APPLE__ */
 L_chacha20_avx1_rotl16:
 .quad	0x504070601000302, 0xd0c0f0e09080b0a
-.align 16
+#ifndef __APPLE__
+.data
+#else
+.section	__DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align	16
+#else
+.p2align	4
+#endif /* __APPLE__ */
 L_chacha20_avx1_add:
 .quad	0x100000000, 0x300000002
-.align 16
+#ifndef __APPLE__
+.data
+#else
+.section	__DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align	16
+#else
+.p2align	4
+#endif /* __APPLE__ */
 L_chacha20_avx1_four:
 .quad	0x400000004, 0x400000004
+#ifndef __APPLE__
+.text
 .globl	chacha_encrypt_avx1
 .type	chacha_encrypt_avx1,@function
 .align	4
 chacha_encrypt_avx1:
+#else
+.section	__TEXT,__text
+.globl	_chacha_encrypt_avx1
+.p2align	2
+_chacha_encrypt_avx1:
+#endif /* __APPLE__ */
         subq	$0x190, %rsp
         movq	%rsp, %r9
         leaq	256(%rsp), %r10
@@ -943,29 +997,75 @@ L_chacha20_avx1_partial_end64:
 L_chacha20_avx1_partial_done:
         addq	$0x190, %rsp
         repz retq
+#ifndef __APPLE__
 .size	chacha_encrypt_avx1,.-chacha_encrypt_avx1
+#endif /* __APPLE__ */
 #endif /* HAVE_INTEL_AVX1 */
 #ifdef HAVE_INTEL_AVX2
-.align 32
+#ifndef __APPLE__
+.data
+#else
+.section	__DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align	32
+#else
+.p2align	5
+#endif /* __APPLE__ */
 L_chacha20_avx2_rotl8:
 .quad	0x605040702010003, 0xe0d0c0f0a09080b
 .quad	0x605040702010003, 0xe0d0c0f0a09080b
-.align 32
+#ifndef __APPLE__
+.data
+#else
+.section	__DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align	32
+#else
+.p2align	5
+#endif /* __APPLE__ */
 L_chacha20_avx2_rotl16:
 .quad	0x504070601000302, 0xd0c0f0e09080b0a
 .quad	0x504070601000302, 0xd0c0f0e09080b0a
-.align 32
+#ifndef __APPLE__
+.data
+#else
+.section	__DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align	32
+#else
+.p2align	5
+#endif /* __APPLE__ */
 L_chacha20_avx2_add:
 .quad	0x100000000, 0x300000002
 .quad	0x500000004, 0x700000006
-.align 32
+#ifndef __APPLE__
+.data
+#else
+.section	__DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align	32
+#else
+.p2align	5
+#endif /* __APPLE__ */
 L_chacha20_avx2_eight:
 .quad	0x800000008, 0x800000008
 .quad	0x800000008, 0x800000008
+#ifndef __APPLE__
+.text
 .globl	chacha_encrypt_avx2
 .type	chacha_encrypt_avx2,@function
 .align	4
 chacha_encrypt_avx2:
+#else
+.section	__TEXT,__text
+.globl	_chacha_encrypt_avx2
+.p2align	2
+_chacha_encrypt_avx2:
+#endif /* __APPLE__ */
         subq	$0x310, %rsp
         movq	%rsp, %r9
         leaq	512(%rsp), %r10
@@ -1307,8 +1407,14 @@ L_chacha20_avx2_done256:
         shl	$3, %eax
         addl	%eax, 48(%rdi)
 L_chacha20_avx2_end256:
+#ifndef __APPLE__
         callq	chacha_encrypt_avx1@plt
+#else
+        callq	_chacha_encrypt_avx1
+#endif /* __APPLE__ */
         addq	$0x310, %rsp
         repz retq
+#ifndef __APPLE__
 .size	chacha_encrypt_avx2,.-chacha_encrypt_avx2
+#endif /* __APPLE__ */
 #endif /* HAVE_INTEL_AVX2 */
diff --git a/wolfcrypt/src/poly1305_asm.S b/wolfcrypt/src/poly1305_asm.S
index ef7fab433..dd7a48224 100644
--- a/wolfcrypt/src/poly1305_asm.S
+++ b/wolfcrypt/src/poly1305_asm.S
@@ -27,10 +27,18 @@
 #endif /* HAVE_INTEL_AVX2 */
 
 #ifdef HAVE_INTEL_AVX1
+#ifndef __APPLE__
+.text
 .globl	poly1305_setkey_avx
 .type	poly1305_setkey_avx,@function
 .align	4
 poly1305_setkey_avx:
+#else
+.section	__TEXT,__text
+.globl	_poly1305_setkey_avx
+.p2align	2
+_poly1305_setkey_avx:
+#endif /* __APPLE__ */
         movabsq	$0xffffffc0fffffff, %r10
         movabsq	$0xffffffc0ffffffc, %r11
         movq	(%rsi), %rdx
@@ -76,11 +84,21 @@ poly1305_setkey_avx:
         movq	%r9, 608(%rdi)
         movb	$0x01, 616(%rdi)
         repz retq
+#ifndef __APPLE__
 .size	poly1305_setkey_avx,.-poly1305_setkey_avx
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.text
 .globl	poly1305_block_avx
 .type	poly1305_block_avx,@function
 .align	4
 poly1305_block_avx:
+#else
+.section	__TEXT,__text
+.globl	_poly1305_block_avx
+.p2align	2
+_poly1305_block_avx:
+#endif /* __APPLE__ */
         pushq	%r15
         pushq	%rbx
         pushq	%r12
@@ -150,11 +168,21 @@ poly1305_block_avx:
         popq	%rbx
         popq	%r15
         repz retq
+#ifndef __APPLE__
 .size	poly1305_block_avx,.-poly1305_block_avx
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.text
 .globl	poly1305_blocks_avx
 .type	poly1305_blocks_avx,@function
 .align	4
 poly1305_blocks_avx:
+#else
+.section	__TEXT,__text
+.globl	_poly1305_blocks_avx
+.p2align	2
+_poly1305_blocks_avx:
+#endif /* __APPLE__ */
         pushq	%r15
         pushq	%rbx
         pushq	%r12
@@ -228,11 +256,21 @@ L_poly1305_avx_blocks_start:
         popq	%rbx
         popq	%r15
         repz retq
+#ifndef __APPLE__
 .size	poly1305_blocks_avx,.-poly1305_blocks_avx
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.text
 .globl	poly1305_final_avx
 .type	poly1305_final_avx,@function
 .align	4
 poly1305_final_avx:
+#else
+.section	__TEXT,__text
+.globl	_poly1305_final_avx
+.p2align	2
+_poly1305_final_avx:
+#endif /* __APPLE__ */
         pushq	%rbx
         pushq	%r12
         movq	%rsi, %rbx
@@ -249,7 +287,11 @@ L_poly1305_avx_final_cmp_rem:
         jl	L_poly1305_avx_final_zero_rem
         movb	$0x00, 616(%rdi)
         leaq	480(%rdi), %rsi
+#ifndef __APPLE__
         callq	poly1305_block_avx@plt
+#else
+        callq	_poly1305_block_avx
+#endif /* __APPLE__ */
 L_poly1305_avx_final_no_more:
         movq	24(%rdi), %rax
         movq	32(%rdi), %rdx
@@ -295,13 +337,23 @@ L_poly1305_avx_final_no_more:
         popq	%r12
         popq	%rbx
         repz retq
+#ifndef __APPLE__
 .size	poly1305_final_avx,.-poly1305_final_avx
+#endif /* __APPLE__ */
 #endif /* HAVE_INTEL_AVX1 */
 #ifdef HAVE_INTEL_AVX2
+#ifndef __APPLE__
+.text
 .globl	poly1305_calc_powers_avx2
 .type	poly1305_calc_powers_avx2,@function
 .align	4
 poly1305_calc_powers_avx2:
+#else
+.section	__TEXT,__text
+.globl	_poly1305_calc_powers_avx2
+.p2align	2
+_poly1305_calc_powers_avx2:
+#endif /* __APPLE__ */
         pushq	%r12
         pushq	%r13
         pushq	%r14
@@ -554,12 +606,26 @@ poly1305_calc_powers_avx2:
         popq	%r13
         popq	%r12
         repz retq
+#ifndef __APPLE__
 .size	poly1305_calc_powers_avx2,.-poly1305_calc_powers_avx2
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.text
 .globl	poly1305_setkey_avx2
 .type	poly1305_setkey_avx2,@function
 .align	4
 poly1305_setkey_avx2:
+#else
+.section	__TEXT,__text
+.globl	_poly1305_setkey_avx2
+.p2align	2
+_poly1305_setkey_avx2:
+#endif /* __APPLE__ */
+#ifndef __APPLE__
         callq	poly1305_setkey_avx@plt
+#else
+        callq	_poly1305_setkey_avx
+#endif /* __APPLE__ */
         vpxor	%ymm0, %ymm0, %ymm0
         vmovdqu	%ymm0, 64(%rdi)
         vmovdqu	%ymm0, 96(%rdi)
@@ -569,19 +635,47 @@ poly1305_setkey_avx2:
         movq	$0x00, 608(%rdi)
         movw	$0x00, 616(%rdi)
         repz retq
+#ifndef __APPLE__
 .size	poly1305_setkey_avx2,.-poly1305_setkey_avx2
-.align 32
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.data
+#else
+.section	__DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align	32
+#else
+.p2align	5
+#endif /* __APPLE__ */
 L_poly1305_avx2_blocks_mask:
 .quad	0x3ffffff, 0x3ffffff
 .quad	0x3ffffff, 0x3ffffff
-.align 32
+#ifndef __APPLE__
+.data
+#else
+.section	__DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align	32
+#else
+.p2align	5
+#endif /* __APPLE__ */
 L_poly1305_avx2_blocks_hibit:
 .quad	0x1000000, 0x1000000
 .quad	0x1000000, 0x1000000
+#ifndef __APPLE__
+.text
 .globl	poly1305_blocks_avx2
 .type	poly1305_blocks_avx2,@function
 .align	4
 poly1305_blocks_avx2:
+#else
+.section	__TEXT,__text
+.globl	_poly1305_blocks_avx2
+.p2align	2
+_poly1305_blocks_avx2:
+#endif /* __APPLE__ */
         pushq	%r12
         pushq	%rbx
         subq	$0x140, %rsp
@@ -927,11 +1021,21 @@ L_poly1305_avx2_blocks_complete:
         popq	%rbx
         popq	%r12
         repz retq
+#ifndef __APPLE__
 .size	poly1305_blocks_avx2,.-poly1305_blocks_avx2
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.text
 .globl	poly1305_final_avx2
 .type	poly1305_final_avx2,@function
 .align	4
 poly1305_final_avx2:
+#else
+.section	__TEXT,__text
+.globl	_poly1305_final_avx2
+.p2align	2
+_poly1305_final_avx2:
+#endif /* __APPLE__ */
         movb	$0x01, 616(%rdi)
         movb	617(%rdi), %cl
         cmpb	$0x00, %cl
@@ -939,7 +1043,11 @@ poly1305_final_avx2:
         pushq	%rsi
         movq	$0x40, %rdx
         xorq	%rsi, %rsi
+#ifndef __APPLE__
         callq	poly1305_blocks_avx2@plt
+#else
+        callq	_poly1305_blocks_avx2
+#endif /* __APPLE__ */
         popq	%rsi
 L_poly1305_avx2_final_done_blocks_X4:
         movq	608(%rdi), %rax
@@ -952,7 +1060,11 @@ L_poly1305_avx2_final_done_blocks_X4:
         pushq	%rsi
         movq	%rcx, %rdx
         leaq	480(%rdi), %rsi
+#ifndef __APPLE__
         callq	poly1305_blocks_avx@plt
+#else
+        callq	_poly1305_blocks_avx
+#endif /* __APPLE__ */
         popq	%rsi
         popq	%rax
         popq	%rcx
@@ -968,7 +1080,11 @@ L_poly1305_avx2_final_start_copy:
 L_poly1305_avx2_final_cmp_copy:
         cmp	%rcx, %rax
         jne	L_poly1305_avx2_final_start_copy
+#ifndef __APPLE__
         callq	poly1305_final_avx@plt
+#else
+        callq	_poly1305_final_avx
+#endif /* __APPLE__ */
         vpxor	%ymm0, %ymm0, %ymm0
         vmovdqu	%ymm0, 64(%rdi)
         vmovdqu	%ymm0, 96(%rdi)
@@ -982,5 +1098,7 @@ L_poly1305_avx2_final_cmp_copy:
         movq	$0x00, 608(%rdi)
         movw	$0x00, 616(%rdi)
         repz retq
+#ifndef __APPLE__
 .size	poly1305_final_avx2,.-poly1305_final_avx2
+#endif /* __APPLE__ */
 #endif /* HAVE_INTEL_AVX2 */
diff --git a/wolfcrypt/src/sha256_asm.S b/wolfcrypt/src/sha256_asm.S
index 09db6eb50..df3cbb259 100644
--- a/wolfcrypt/src/sha256_asm.S
+++ b/wolfcrypt/src/sha256_asm.S
@@ -27,6 +27,11 @@
 #endif /* HAVE_INTEL_AVX2 */
 
 #ifdef HAVE_INTEL_AVX1
+#ifndef __APPLE__
+.data
+#else
+.section	__DATA,__data
+#endif /* __APPLE__ */
 L_avx1_sha256_k:
 .long	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
 .long	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
@@ -44,19 +49,54 @@ L_avx1_sha256_k:
 .long	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
 .long	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
 .long	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
-.align 16
+#ifndef __APPLE__
+.data
+#else
+.section	__DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align	16
+#else
+.p2align	4
+#endif /* __APPLE__ */
 L_avx1_sha256_shuf_00BA:
 .quad	0xb0a090803020100, 0xffffffffffffffff
-.align 16
+#ifndef __APPLE__
+.data
+#else
+.section	__DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align	16
+#else
+.p2align	4
+#endif /* __APPLE__ */
 L_avx1_sha256_shuf_DC00:
 .quad	0xffffffffffffffff, 0xb0a090803020100
-.align 16
+#ifndef __APPLE__
+.data
+#else
+.section	__DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align	16
+#else
+.p2align	4
+#endif /* __APPLE__ */
 L_avx1_sha256_flip_mask:
 .quad	0x405060700010203, 0xc0d0e0f08090a0b
+#ifndef __APPLE__
+.text
 .globl	Transform_Sha256_AVX1
 .type	Transform_Sha256_AVX1,@function
 .align	4
 Transform_Sha256_AVX1:
+#else
+.section	__TEXT,__text
+.globl	_Transform_Sha256_AVX1
+.p2align	2
+_Transform_Sha256_AVX1:
+#endif /* __APPLE__ */
         pushq	%rbx
         pushq	%r12
         pushq	%r13
@@ -2432,11 +2472,21 @@ Transform_Sha256_AVX1:
         popq	%r12
         popq	%rbx
         repz retq
+#ifndef __APPLE__
 .size	Transform_Sha256_AVX1,.-Transform_Sha256_AVX1
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.text
 .globl	Transform_Sha256_AVX1_Len
 .type	Transform_Sha256_AVX1_Len,@function
 .align	4
 Transform_Sha256_AVX1_Len:
+#else
+.section	__TEXT,__text
+.globl	_Transform_Sha256_AVX1_Len
+.p2align	2
+_Transform_Sha256_AVX1_Len:
+#endif /* __APPLE__ */
         pushq	%rbx
         pushq	%r12
         pushq	%r13
@@ -4828,7 +4878,14 @@ L_sha256_len_avx1_start:
         popq	%r12
         popq	%rbx
         repz retq
+#ifndef __APPLE__
 .size	Transform_Sha256_AVX1_Len,.-Transform_Sha256_AVX1_Len
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.data
+#else
+.section	__DATA,__data
+#endif /* __APPLE__ */
 L_avx1_rorx_sha256_k:
 .long	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
 .long	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
@@ -4846,19 +4903,54 @@ L_avx1_rorx_sha256_k:
 .long	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
 .long	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
 .long	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
-.align 16
+#ifndef __APPLE__
+.data
+#else
+.section	__DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align	16
+#else
+.p2align	4
+#endif /* __APPLE__ */
 L_avx1_rorx_sha256_shuf_00BA:
 .quad	0xb0a090803020100, 0xffffffffffffffff
-.align 16
+#ifndef __APPLE__
+.data
+#else
+.section	__DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align	16
+#else
+.p2align	4
+#endif /* __APPLE__ */
 L_avx1_rorx_sha256_shuf_DC00:
 .quad	0xffffffffffffffff, 0xb0a090803020100
-.align 16
+#ifndef __APPLE__
+.data
+#else
+.section	__DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align	16
+#else
+.p2align	4
+#endif /* __APPLE__ */
 L_avx1_rorx_sha256_flip_mask:
 .quad	0x405060700010203, 0xc0d0e0f08090a0b
+#ifndef __APPLE__
+.text
 .globl	Transform_Sha256_AVX1_RORX
 .type	Transform_Sha256_AVX1_RORX,@function
 .align	4
 Transform_Sha256_AVX1_RORX:
+#else
+.section	__TEXT,__text
+.globl	_Transform_Sha256_AVX1_RORX
+.p2align	2
+_Transform_Sha256_AVX1_RORX:
+#endif /* __APPLE__ */
         pushq	%rbx
         pushq	%r12
         pushq	%r13
@@ -7192,11 +7284,21 @@ Transform_Sha256_AVX1_RORX:
         popq	%r12
         popq	%rbx
         repz retq
+#ifndef __APPLE__
 .size	Transform_Sha256_AVX1_RORX,.-Transform_Sha256_AVX1_RORX
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.text
 .globl	Transform_Sha256_AVX1_RORX_Len
 .type	Transform_Sha256_AVX1_RORX_Len,@function
 .align	4
 Transform_Sha256_AVX1_RORX_Len:
+#else
+.section	__TEXT,__text
+.globl	_Transform_Sha256_AVX1_RORX_Len
+.p2align	2
+_Transform_Sha256_AVX1_RORX_Len:
+#endif /* __APPLE__ */
         pushq	%rbx
         pushq	%r12
         pushq	%r13
@@ -9547,9 +9649,16 @@ L_sha256_len_avx1_len_rorx_start:
         popq	%r12
         popq	%rbx
         repz retq
+#ifndef __APPLE__
 .size	Transform_Sha256_AVX1_RORX_Len,.-Transform_Sha256_AVX1_RORX_Len
+#endif /* __APPLE__ */
 #endif /* HAVE_INTEL_AVX1 */
 #ifdef HAVE_INTEL_AVX2
+#ifndef __APPLE__
+.data
+#else
+.section	__DATA,__data
+#endif /* __APPLE__ */
 L_avx2_sha256_k:
 .long	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
 .long	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
@@ -9583,22 +9692,57 @@ L_avx2_sha256_k:
 .long	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
 .long	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
 .long	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
-.align 32
+#ifndef __APPLE__
+.data
+#else
+.section	__DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align	32
+#else
+.p2align	5
+#endif /* __APPLE__ */
 L_avx2_sha256_shuf_00BA:
 .quad	0xb0a090803020100, 0xffffffffffffffff
 .quad	0xb0a090803020100, 0xffffffffffffffff
-.align 32
+#ifndef __APPLE__
+.data
+#else
+.section	__DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align	32
+#else
+.p2align	5
+#endif /* __APPLE__ */
 L_avx2_sha256_shuf_DC00:
 .quad	0xffffffffffffffff, 0xb0a090803020100
 .quad	0xffffffffffffffff, 0xb0a090803020100
-.align 32
+#ifndef __APPLE__
+.data
+#else
+.section	__DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align	32
+#else
+.p2align	5
+#endif /* __APPLE__ */
 L_avx2_sha256_flip_mask:
 .quad	0x405060700010203, 0xc0d0e0f08090a0b
 .quad	0x405060700010203, 0xc0d0e0f08090a0b
+#ifndef __APPLE__
+.text
 .globl	Transform_Sha256_AVX2
 .type	Transform_Sha256_AVX2,@function
 .align	4
 Transform_Sha256_AVX2:
+#else
+.section	__TEXT,__text
+.globl	_Transform_Sha256_AVX2
+.p2align	2
+_Transform_Sha256_AVX2:
+#endif /* __APPLE__ */
         pushq	%rbx
         pushq	%r12
         pushq	%r13
@@ -11974,11 +12118,21 @@ Transform_Sha256_AVX2:
         popq	%r12
         popq	%rbx
         repz retq
+#ifndef __APPLE__
 .size	Transform_Sha256_AVX2,.-Transform_Sha256_AVX2
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.text
 .globl	Transform_Sha256_AVX2_Len
 .type	Transform_Sha256_AVX2_Len,@function
 .align	4
 Transform_Sha256_AVX2_Len:
+#else
+.section	__TEXT,__text
+.globl	_Transform_Sha256_AVX2_Len
+.p2align	2
+_Transform_Sha256_AVX2_Len:
+#endif /* __APPLE__ */
         pushq	%rbx
         pushq	%r12
         pushq	%r13
@@ -11994,7 +12148,11 @@ Transform_Sha256_AVX2_Len:
         vmovdqu	32(%rbp), %ymm1
         vmovups	%ymm0, 32(%rdi)
         vmovups	%ymm1, 64(%rdi)
+#ifndef __APPLE__
         call	Transform_Sha256_AVX2@plt
+#else
+        call	_Transform_Sha256_AVX2
+#endif /* __APPLE__ */
         addq	$0x40, %rbp
         subl	$0x40, %esi
         jz	L_sha256_len_avx2_done
@@ -16089,7 +16247,14 @@ L_sha256_len_avx2_done:
         popq	%r12
         popq	%rbx
         repz retq
+#ifndef __APPLE__
 .size	Transform_Sha256_AVX2_Len,.-Transform_Sha256_AVX2_Len
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.data
+#else
+.section	__DATA,__data
+#endif /* __APPLE__ */
 L_avx2_rorx_sha256_k:
 .long	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
 .long	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
@@ -16123,22 +16288,57 @@ L_avx2_rorx_sha256_k:
 .long	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
 .long	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
 .long	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
-.align 32
+#ifndef __APPLE__
+.data
+#else
+.section	__DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align	32
+#else
+.p2align	5
+#endif /* __APPLE__ */
 L_avx2_rorx_sha256_flip_mask:
 .quad	0x405060700010203, 0xc0d0e0f08090a0b
 .quad	0x405060700010203, 0xc0d0e0f08090a0b
-.align 32
+#ifndef __APPLE__
+.data
+#else
+.section	__DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align	32
+#else
+.p2align	5
+#endif /* __APPLE__ */
 L_avx2_rorx_sha256_shuf_00BA:
 .quad	0xb0a090803020100, 0xffffffffffffffff
 .quad	0xb0a090803020100, 0xffffffffffffffff
-.align 32
+#ifndef __APPLE__
+.data
+#else
+.section	__DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align	32
+#else
+.p2align	5
+#endif /* __APPLE__ */
 L_avx2_rorx_sha256_shuf_DC00:
 .quad	0xffffffffffffffff, 0xb0a090803020100
 .quad	0xffffffffffffffff, 0xb0a090803020100
+#ifndef __APPLE__
+.text
 .globl	Transform_Sha256_AVX2_RORX
 .type	Transform_Sha256_AVX2_RORX,@function
 .align	4
 Transform_Sha256_AVX2_RORX:
+#else
+.section	__TEXT,__text
+.globl	_Transform_Sha256_AVX2_RORX
+.p2align	2
+_Transform_Sha256_AVX2_RORX:
+#endif /* __APPLE__ */
         pushq	%rbx
         pushq	%r12
         pushq	%r13
@@ -18489,11 +18689,21 @@ Transform_Sha256_AVX2_RORX:
         popq	%r12
         popq	%rbx
         repz retq
+#ifndef __APPLE__
 .size	Transform_Sha256_AVX2_RORX,.-Transform_Sha256_AVX2_RORX
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.text
 .globl	Transform_Sha256_AVX2_RORX_Len
 .type	Transform_Sha256_AVX2_RORX_Len,@function
 .align	4
 Transform_Sha256_AVX2_RORX_Len:
+#else
+.section	__TEXT,__text
+.globl	_Transform_Sha256_AVX2_RORX_Len
+.p2align	2
+_Transform_Sha256_AVX2_RORX_Len:
+#endif /* __APPLE__ */
         pushq	%rbx
         pushq	%r12
         pushq	%r13
@@ -18509,7 +18719,11 @@ Transform_Sha256_AVX2_RORX_Len:
         vmovdqu	32(%rbp), %ymm1
         vmovups	%ymm0, 32(%rdi)
         vmovups	%ymm1, 64(%rdi)
+#ifndef __APPLE__
         call	Transform_Sha256_AVX2_RORX@plt
+#else
+        call	_Transform_Sha256_AVX2_RORX
+#endif /* __APPLE__ */
         addq	$0x40, %rbp
         subl	$0x40, %esi
         jz	L_sha256_len_avx2_rorx_done
@@ -22433,5 +22647,7 @@ L_sha256_len_avx2_rorx_done:
         popq	%r12
         popq	%rbx
         repz retq
+#ifndef __APPLE__
 .size	Transform_Sha256_AVX2_RORX_Len,.-Transform_Sha256_AVX2_RORX_Len
+#endif /* __APPLE__ */
 #endif /* HAVE_INTEL_AVX2 */
diff --git a/wolfcrypt/src/sha512_asm.S b/wolfcrypt/src/sha512_asm.S
index dded1c009..96166344c 100644
--- a/wolfcrypt/src/sha512_asm.S
+++ b/wolfcrypt/src/sha512_asm.S
@@ -27,7 +27,16 @@
 #endif /* HAVE_INTEL_AVX2 */
 
 #ifdef HAVE_INTEL_AVX1
-.align 16
+#ifndef __APPLE__
+.data
+#else
+.section	__DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align	16
+#else
+.p2align	4
+#endif /* __APPLE__ */
 L_avx1_sha512_k:
 .quad	0x428a2f98d728ae22,0x7137449123ef65cd
 .quad	0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
@@ -69,13 +78,30 @@ L_avx1_sha512_k:
 .quad	0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
 .quad	0x4cc5d4becb3e42b6,0x597f299cfc657e2a
 .quad	0x5fcb6fab3ad6faec,0x6c44198c4a475817
-.align 16
+#ifndef __APPLE__
+.data
+#else
+.section	__DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align	16
+#else
+.p2align	4
+#endif /* __APPLE__ */
 L_avx1_sha512_flip_mask:
 .quad	0x1020304050607, 0x8090a0b0c0d0e0f
+#ifndef __APPLE__
+.text
 .globl	Transform_Sha512_AVX1
 .type	Transform_Sha512_AVX1,@function
 .align	4
 Transform_Sha512_AVX1:
+#else
+.section	__TEXT,__text
+.globl	_Transform_Sha512_AVX1
+.p2align	2
+_Transform_Sha512_AVX1:
+#endif /* __APPLE__ */
         pushq	%rbx
         pushq	%r12
         pushq	%r13
@@ -1335,11 +1361,21 @@ L_sha256_len_avx1_start:
         popq	%r12
         popq	%rbx
         repz retq
+#ifndef __APPLE__
 .size	Transform_Sha512_AVX1,.-Transform_Sha512_AVX1
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.text
 .globl	Transform_Sha512_AVX1_Len
 .type	Transform_Sha512_AVX1_Len,@function
 .align	4
 Transform_Sha512_AVX1_Len:
+#else
+.section	__TEXT,__text
+.globl	_Transform_Sha512_AVX1_Len
+.p2align	2
+_Transform_Sha512_AVX1_Len:
+#endif /* __APPLE__ */
         pushq	%rbx
         pushq	%r12
         pushq	%r13
@@ -2618,8 +2654,19 @@ L_sha512_len_avx1_start:
         popq	%r12
         popq	%rbx
         repz retq
+#ifndef __APPLE__
 .size	Transform_Sha512_AVX1_Len,.-Transform_Sha512_AVX1_Len
-.align 16
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.data
+#else
+.section	__DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align	16
+#else
+.p2align	4
+#endif /* __APPLE__ */
 L_avx1_rorx_sha512_k:
 .quad	0x428a2f98d728ae22,0x7137449123ef65cd
 .quad	0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
@@ -2661,13 +2708,30 @@ L_avx1_rorx_sha512_k:
 .quad	0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
 .quad	0x4cc5d4becb3e42b6,0x597f299cfc657e2a
 .quad	0x5fcb6fab3ad6faec,0x6c44198c4a475817
-.align 16
+#ifndef __APPLE__
+.data
+#else
+.section	__DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align	16
+#else
+.p2align	4
+#endif /* __APPLE__ */
 L_avx1_rorx_sha512_flip_mask:
 .quad	0x1020304050607, 0x8090a0b0c0d0e0f
+#ifndef __APPLE__
+.text
 .globl	Transform_Sha512_AVX1_RORX
 .type	Transform_Sha512_AVX1_RORX,@function
 .align	4
 Transform_Sha512_AVX1_RORX:
+#else
+.section	__TEXT,__text
+.globl	_Transform_Sha512_AVX1_RORX
+.p2align	2
+_Transform_Sha512_AVX1_RORX:
+#endif /* __APPLE__ */
         pushq	%rbx
         pushq	%r12
         pushq	%r13
@@ -3856,11 +3920,21 @@ L_sha256_len_avx1_rorx_start:
         popq	%r12
         popq	%rbx
         repz retq
+#ifndef __APPLE__
 .size	Transform_Sha512_AVX1_RORX,.-Transform_Sha512_AVX1_RORX
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.text
 .globl	Transform_Sha512_AVX1_RORX_Len
 .type	Transform_Sha512_AVX1_RORX_Len,@function
 .align	4
 Transform_Sha512_AVX1_RORX_Len:
+#else
+.section	__TEXT,__text
+.globl	_Transform_Sha512_AVX1_RORX_Len
+.p2align	2
+_Transform_Sha512_AVX1_RORX_Len:
+#endif /* __APPLE__ */
         pushq	%rbx
         pushq	%r12
         pushq	%r13
@@ -5084,10 +5158,21 @@ L_sha512_len_avx1_rorx_start:
         popq	%r12
         popq	%rbx
         repz retq
+#ifndef __APPLE__
 .size	Transform_Sha512_AVX1_RORX_Len,.-Transform_Sha512_AVX1_RORX_Len
+#endif /* __APPLE__ */
 #endif /* HAVE_INTEL_AVX1 */
 #ifdef HAVE_INTEL_AVX2
-.align 16
+#ifndef __APPLE__
+.data
+#else
+.section	__DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align	16
+#else
+.p2align	4
+#endif /* __APPLE__ */
 L_avx2_sha512_k:
 .quad	0x428a2f98d728ae22,0x7137449123ef65cd
 .quad	0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
@@ -5129,7 +5214,16 @@ L_avx2_sha512_k:
 .quad	0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
 .quad	0x4cc5d4becb3e42b6,0x597f299cfc657e2a
 .quad	0x5fcb6fab3ad6faec,0x6c44198c4a475817
-.align 16
+#ifndef __APPLE__
+.data
+#else
+.section	__DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align	16
+#else
+.p2align	4
+#endif /* __APPLE__ */
 L_avx2_sha512_k_2:
 .quad	0x428a2f98d728ae22,0x7137449123ef65cd
 .quad	0x428a2f98d728ae22,0x7137449123ef65cd
@@ -5211,17 +5305,43 @@ L_avx2_sha512_k_2:
 .quad	0x4cc5d4becb3e42b6,0x597f299cfc657e2a
 .quad	0x5fcb6fab3ad6faec,0x6c44198c4a475817
 .quad	0x5fcb6fab3ad6faec,0x6c44198c4a475817
-.align 8
+#ifndef __APPLE__
+.data
+#else
+.section	__DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align	8
+#else
+.p2align	3
+#endif /* __APPLE__ */
 L_avx2_sha512_k_2_end:
 .quad	1024+L_avx2_sha512_k_2
-.align 32
+#ifndef __APPLE__
+.data
+#else
+.section	__DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align	32
+#else
+.p2align	5
+#endif /* __APPLE__ */
 L_avx2_sha512_flip_mask:
 .quad	0x1020304050607, 0x8090a0b0c0d0e0f
 .quad	0x1020304050607, 0x8090a0b0c0d0e0f
+#ifndef __APPLE__
+.text
 .globl	Transform_Sha512_AVX2
 .type	Transform_Sha512_AVX2,@function
 .align	4
 Transform_Sha512_AVX2:
+#else
+.section	__TEXT,__text
+.globl	_Transform_Sha512_AVX2
+.p2align	2
+_Transform_Sha512_AVX2:
+#endif /* __APPLE__ */
         pushq	%rbx
         pushq	%r12
         pushq	%r13
@@ -6269,11 +6389,21 @@ L_sha256_avx2_start:
         popq	%r12
         popq	%rbx
         repz retq
+#ifndef __APPLE__
 .size	Transform_Sha512_AVX2,.-Transform_Sha512_AVX2
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.text
 .globl	Transform_Sha512_AVX2_Len
 .type	Transform_Sha512_AVX2_Len,@function
 .align	4
 Transform_Sha512_AVX2_Len:
+#else
+.section	__TEXT,__text
+.globl	_Transform_Sha512_AVX2_Len
+.p2align	2
+_Transform_Sha512_AVX2_Len:
+#endif /* __APPLE__ */
         pushq	%rbx
         pushq	%r12
         pushq	%r13
@@ -6292,7 +6422,11 @@ Transform_Sha512_AVX2_Len:
         vmovups	%ymm1, 96(%rdi)
         vmovups	%ymm2, 128(%rdi)
         vmovups	%ymm3, 160(%rdi)
+#ifndef __APPLE__
         call	Transform_Sha512_AVX2@plt
+#else
+        call	_Transform_Sha512_AVX2
+#endif /* __APPLE__ */
         addq	$0x80, 224(%rdi)
         subl	$0x80, %ebp
         jz	L_sha512_len_avx2_done
@@ -7881,8 +8015,19 @@ L_sha512_len_avx2_done:
         popq	%r12
         popq	%rbx
         repz retq
+#ifndef __APPLE__
 .size	Transform_Sha512_AVX2_Len,.-Transform_Sha512_AVX2_Len
-.align 16
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.data
+#else
+.section	__DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align	16
+#else
+.p2align	4
+#endif /* __APPLE__ */
 L_avx2_rorx_sha512_k:
 .quad	0x428a2f98d728ae22,0x7137449123ef65cd
 .quad	0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
@@ -7924,7 +8069,16 @@ L_avx2_rorx_sha512_k:
 .quad	0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
 .quad	0x4cc5d4becb3e42b6,0x597f299cfc657e2a
 .quad	0x5fcb6fab3ad6faec,0x6c44198c4a475817
-.align 16
+#ifndef __APPLE__
+.data
+#else
+.section	__DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align	16
+#else
+.p2align	4
+#endif /* __APPLE__ */
 L_avx2_rorx_sha512_k_2:
 .quad	0x428a2f98d728ae22,0x7137449123ef65cd
 .quad	0x428a2f98d728ae22,0x7137449123ef65cd
@@ -8006,17 +8160,43 @@ L_avx2_rorx_sha512_k_2:
 .quad	0x4cc5d4becb3e42b6,0x597f299cfc657e2a
 .quad	0x5fcb6fab3ad6faec,0x6c44198c4a475817
 .quad	0x5fcb6fab3ad6faec,0x6c44198c4a475817
-.align 8
+#ifndef __APPLE__
+.data
+#else
+.section	__DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align	8
+#else
+.p2align	3
+#endif /* __APPLE__ */
 L_avx2_rorx_sha512_k_2_end:
 .quad	1024+L_avx2_rorx_sha512_k_2
-.align 32
+#ifndef __APPLE__
+.data
+#else
+.section	__DATA,__data
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.align	32
+#else
+.p2align	5
+#endif /* __APPLE__ */
 L_avx2_rorx_sha512_flip_mask:
 .quad	0x1020304050607, 0x8090a0b0c0d0e0f
 .quad	0x1020304050607, 0x8090a0b0c0d0e0f
+#ifndef __APPLE__
+.text
 .globl	Transform_Sha512_AVX2_RORX
 .type	Transform_Sha512_AVX2_RORX,@function
 .align	4
 Transform_Sha512_AVX2_RORX:
+#else
+.section	__TEXT,__text
+.globl	_Transform_Sha512_AVX2_RORX
+.p2align	2
+_Transform_Sha512_AVX2_RORX:
+#endif /* __APPLE__ */
         pushq	%rbx
         pushq	%r12
         pushq	%r13
@@ -9006,11 +9186,21 @@ L_sha256_len_avx2_rorx_start:
         popq	%r12
         popq	%rbx
         repz retq
+#ifndef __APPLE__
 .size	Transform_Sha512_AVX2_RORX,.-Transform_Sha512_AVX2_RORX
+#endif /* __APPLE__ */
+#ifndef __APPLE__
+.text
 .globl	Transform_Sha512_AVX2_RORX_Len
 .type	Transform_Sha512_AVX2_RORX_Len,@function
 .align	4
 Transform_Sha512_AVX2_RORX_Len:
+#else
+.section	__TEXT,__text
+.globl	_Transform_Sha512_AVX2_RORX_Len
+.p2align	2
+_Transform_Sha512_AVX2_RORX_Len:
+#endif /* __APPLE__ */
         pushq	%rbx
         pushq	%r12
         pushq	%r13
@@ -9029,7 +9219,11 @@ Transform_Sha512_AVX2_RORX_Len:
         vmovups	%ymm1, 96(%rdi)
         vmovups	%ymm2, 128(%rdi)
         vmovups	%ymm3, 160(%rdi)
+#ifndef __APPLE__
         call	Transform_Sha512_AVX2_RORX@plt
+#else
+        call	_Transform_Sha512_AVX2_RORX
+#endif /* __APPLE__ */
         pop	%rsi
         addq	$0x80, 224(%rdi)
         subl	$0x80, %esi
@@ -10541,5 +10735,7 @@ L_sha512_len_avx2_rorx_done:
         popq	%r12
         popq	%rbx
         repz retq
+#ifndef __APPLE__
 .size	Transform_Sha512_AVX2_RORX_Len,.-Transform_Sha512_AVX2_RORX_Len
+#endif /* __APPLE__ */
 #endif /* HAVE_INTEL_AVX2 */
diff --git a/wolfcrypt/src/sp_x86_64_asm.S b/wolfcrypt/src/sp_x86_64_asm.S
index 0d73aa7cb..d40a658ed 100644
--- a/wolfcrypt/src/sp_x86_64_asm.S
+++ b/wolfcrypt/src/sp_x86_64_asm.S
@@ -37,10 +37,16 @@
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef __APPLE__
 .globl	sp_2048_mul_16
 .type	sp_2048_mul_16,@function
 .align	16
 sp_2048_mul_16:
+#else
+.globl	_sp_2048_mul_16
+.p2align	4
+_sp_2048_mul_16:
+#endif /* __APPLE__ */
         movq	%rdx, %rcx
         subq	$128, %rsp
         # A[0] * B[0]
@@ -1672,16 +1678,24 @@ sp_2048_mul_16:
         movq	%r9, 120(%rdi)
         addq	$128, %rsp
         repz retq
+#ifndef __APPLE__
 .size	sp_2048_mul_16,.-sp_2048_mul_16
+#endif /* __APPLE__ */
 /* Square a and put result in r. (r = a * a)
  *
  * r  A single precision integer.
  * a  A single precision integer.
  */
+#ifndef __APPLE__
 .globl	sp_2048_sqr_16
 .type	sp_2048_sqr_16,@function
 .align	16
 sp_2048_sqr_16:
+#else
+.globl	_sp_2048_sqr_16
+.p2align	4
+_sp_2048_sqr_16:
+#endif /* __APPLE__ */
         push	%r12
         subq	$128, %rsp
         # A[0] * A[0]
@@ -2756,7 +2770,9 @@ sp_2048_sqr_16:
         addq	$128, %rsp
         pop	%r12
         repz retq
+#ifndef __APPLE__
 .size	sp_2048_sqr_16,.-sp_2048_sqr_16
+#endif /* __APPLE__ */
 #ifdef HAVE_INTEL_AVX2
 /* Multiply a and b into r. (r = a * b)
  *
@@ -2764,10 +2780,16 @@ sp_2048_sqr_16:
  * a   First number to multiply.
  * b   Second number to multiply.
  */
+#ifndef __APPLE__
 .globl	sp_2048_mul_avx2_16
 .type	sp_2048_mul_avx2_16,@function
 .align	16
 sp_2048_mul_avx2_16:
+#else
+.globl	_sp_2048_mul_avx2_16
+.p2align	4
+_sp_2048_mul_avx2_16:
+#endif /* __APPLE__ */
         push	%rbp
         push	%r12
         push	%r13
@@ -4427,7 +4449,9 @@ sp_2048_mul_avx2_16:
         pop	%r12
         pop	%rbp
         repz retq
+#ifndef __APPLE__
 .size	sp_2048_mul_avx2_16,.-sp_2048_mul_avx2_16
+#endif /* __APPLE__ */
 #endif /* HAVE_INTEL_AVX2 */
 #ifdef HAVE_INTEL_AVX2
 /* Square a and put result in r. (r = a * a)
@@ -4435,10 +4459,16 @@ sp_2048_mul_avx2_16:
  * r  A single precision integer.
  * a  A single precision integer.
  */
+#ifndef __APPLE__
 .globl	sp_2048_sqr_avx2_16
 .type	sp_2048_sqr_avx2_16,@function
 .align	16
 sp_2048_sqr_avx2_16:
+#else
+.globl	_sp_2048_sqr_avx2_16
+.p2align	4
+_sp_2048_sqr_avx2_16:
+#endif /* __APPLE__ */
         push	%rbp
         push	%r12
         push	%r13
@@ -5461,7 +5491,9 @@ L_end_2048_sqr_avx2_16:
         pop	%r12
         pop	%rbp
         repz retq
+#ifndef __APPLE__
 .size	sp_2048_sqr_avx2_16,.-sp_2048_sqr_avx2_16
+#endif /* __APPLE__ */
 #endif /* HAVE_INTEL_AVX2 */
 /* Add b to a into r. (r = a + b)
  *
@@ -5469,10 +5501,16 @@ L_end_2048_sqr_avx2_16:
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef __APPLE__
 .globl	sp_2048_add_16
 .type	sp_2048_add_16,@function
 .align	16
 sp_2048_add_16:
+#else
+.globl	_sp_2048_add_16
+.p2align	4
+_sp_2048_add_16:
+#endif /* __APPLE__ */
         xorq	%rax, %rax
         movq	(%rsi), %rcx
         addq	(%rdx), %rcx
@@ -5524,16 +5562,24 @@ sp_2048_add_16:
         movq	%rcx, 120(%rdi)
         adcq	$0, %rax
         repz retq
+#ifndef __APPLE__
 .size	sp_2048_add_16,.-sp_2048_add_16
+#endif /* __APPLE__ */
 /* Sub b from a into a. (a -= b)
  *
  * a  A single precision integer and result.
  * b  A single precision integer.
  */
+#ifndef __APPLE__
 .globl	sp_2048_sub_in_place_32
 .type	sp_2048_sub_in_place_32,@function
 .align	16
 sp_2048_sub_in_place_32:
+#else
+.globl	_sp_2048_sub_in_place_32
+.p2align	4
+_sp_2048_sub_in_place_32:
+#endif /* __APPLE__ */
         xorq	%rax, %rax
         movq	(%rdi), %rdx
         movq	8(%rdi), %rcx
@@ -5665,17 +5711,25 @@ sp_2048_sub_in_place_32:
         movq	%rcx, 248(%rdi)
         sbbq	$0, %rax
         repz retq
+#ifndef __APPLE__
 .size	sp_2048_sub_in_place_32,.-sp_2048_sub_in_place_32
+#endif /* __APPLE__ */
 /* Add b to a into r. (r = a + b)
  *
  * r  A single precision integer.
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef __APPLE__
 .globl	sp_2048_add_32
 .type	sp_2048_add_32,@function
 .align	16
 sp_2048_add_32:
+#else
+.globl	_sp_2048_add_32
+.p2align	4
+_sp_2048_add_32:
+#endif /* __APPLE__ */
         xorq	%rax, %rax
         movq	(%rsi), %rcx
         addq	(%rdx), %rcx
@@ -5775,7 +5829,9 @@ sp_2048_add_32:
         movq	%rcx, 248(%rdi)
         adcq	$0, %rax
         repz retq
+#ifndef __APPLE__
 .size	sp_2048_add_32,.-sp_2048_add_32
+#endif /* __APPLE__ */
 #ifdef HAVE_INTEL_AVX2
 #endif /* HAVE_INTEL_AVX2 */
 #ifdef HAVE_INTEL_AVX2
@@ -5788,10 +5844,16 @@ sp_2048_add_32:
  * a  A single precision integer and result.
  * b  A single precision integer.
  */
+#ifndef __APPLE__
 .globl	sp_2048_sub_in_place_16
 .type	sp_2048_sub_in_place_16,@function
 .align	16
 sp_2048_sub_in_place_16:
+#else
+.globl	_sp_2048_sub_in_place_16
+.p2align	4
+_sp_2048_sub_in_place_16:
+#endif /* __APPLE__ */
         xorq	%rax, %rax
         movq	(%rdi), %rdx
         movq	8(%rdi), %rcx
@@ -5859,7 +5921,9 @@ sp_2048_sub_in_place_16:
         movq	%rcx, 120(%rdi)
         sbbq	$0, %rax
         repz retq
+#ifndef __APPLE__
 .size	sp_2048_sub_in_place_16,.-sp_2048_sub_in_place_16
+#endif /* __APPLE__ */
 /* Conditionally subtract b from a using the mask m.
  * m is -1 to subtract and 0 when not copying.
  *
@@ -5868,10 +5932,16 @@ sp_2048_sub_in_place_16:
  * b  A single precision number to subtract.
  * m  Mask value to apply.
  */
+#ifndef __APPLE__
 .globl	sp_2048_cond_sub_16
 .type	sp_2048_cond_sub_16,@function
 .align	16
 sp_2048_cond_sub_16:
+#else
+.globl	_sp_2048_cond_sub_16
+.p2align	4
+_sp_2048_cond_sub_16:
+#endif /* __APPLE__ */
         subq	$128, %rsp
         movq	$0, %rax
         movq	(%rdx), %r8
@@ -5989,17 +6059,25 @@ sp_2048_cond_sub_16:
         sbbq	$0, %rax
         addq	$128, %rsp
         repz retq
+#ifndef __APPLE__
 .size	sp_2048_cond_sub_16,.-sp_2048_cond_sub_16
+#endif /* __APPLE__ */
 /* Reduce the number back to 2048 bits using Montgomery reduction.
  *
  * a   A single precision number to reduce in place.
  * m   The single precision number representing the modulus.
  * mp  The digit representing the negative inverse of m mod 2^n.
  */
+#ifndef __APPLE__
 .globl	sp_2048_mont_reduce_16
 .type	sp_2048_mont_reduce_16,@function
 .align	16
 sp_2048_mont_reduce_16:
+#else
+.globl	_sp_2048_mont_reduce_16
+.p2align	4
+_sp_2048_mont_reduce_16:
+#endif /* __APPLE__ */
         push	%r12
         push	%r13
         push	%r14
@@ -6182,23 +6260,35 @@ L_mont_loop_16:
         movq	%rdi, %rsi
         movq	%rdi, %rdi
         subq	$128, %rdi
+#ifndef __APPLE__
         callq	sp_2048_cond_sub_16@plt
+#else
+        callq	_sp_2048_cond_sub_16
+#endif /* __APPLE__ */
         pop	%r15
         pop	%r14
         pop	%r13
         pop	%r12
         repz retq
+#ifndef __APPLE__
 .size	sp_2048_mont_reduce_16,.-sp_2048_mont_reduce_16
+#endif /* __APPLE__ */
 /* Mul a by digit b into r. (r = a * b)
  *
  * r  A single precision integer.
  * a  A single precision integer.
  * b  A single precision digit.
  */
+#ifndef __APPLE__
 .globl	sp_2048_mul_d_16
 .type	sp_2048_mul_d_16,@function
 .align	16
 sp_2048_mul_d_16:
+#else
+.globl	_sp_2048_mul_d_16
+.p2align	4
+_sp_2048_mul_d_16:
+#endif /* __APPLE__ */
         movq	%rdx, %rcx
         # A[0] * B
         movq	%rcx, %rax
@@ -6327,7 +6417,9 @@ sp_2048_mul_d_16:
         movq	%r8, 120(%rdi)
         movq	%r9, 128(%rdi)
         repz retq
+#ifndef __APPLE__
 .size	sp_2048_mul_d_16,.-sp_2048_mul_d_16
+#endif /* __APPLE__ */
 #ifdef HAVE_INTEL_AVX2
 /* Mul a by digit b into r. (r = a * b)
  *
@@ -6335,10 +6427,16 @@ sp_2048_mul_d_16:
  * a  A single precision integer.
  * b  A single precision digit.
  */
+#ifndef __APPLE__
 .globl	sp_2048_mul_d_avx2_16
 .type	sp_2048_mul_d_avx2_16,@function
 .align	16
 sp_2048_mul_d_avx2_16:
+#else
+.globl	_sp_2048_mul_d_avx2_16
+.p2align	4
+_sp_2048_mul_d_avx2_16:
+#endif /* __APPLE__ */
         movq	%rdx, %rax
         # A[0] * B
         movq	%rax, %rdx
@@ -6438,7 +6536,9 @@ sp_2048_mul_d_avx2_16:
         movq	%r10, 120(%rdi)
         movq	%r9, 128(%rdi)
         repz retq
+#ifndef __APPLE__
 .size	sp_2048_mul_d_avx2_16,.-sp_2048_mul_d_avx2_16
+#endif /* __APPLE__ */
 #endif /* HAVE_INTEL_AVX2 */
 /* Compare a with b in constant time.
  *
@@ -6447,10 +6547,16 @@ sp_2048_mul_d_avx2_16:
  * return -ve, 0 or +ve if a is less than, equal to or greater than b
  * respectively.
  */
+#ifndef __APPLE__
 .globl	sp_2048_cmp_16
 .type	sp_2048_cmp_16,@function
 .align	16
 sp_2048_cmp_16:
+#else
+.globl	_sp_2048_cmp_16
+.p2align	4
+_sp_2048_cmp_16:
+#endif /* __APPLE__ */
         xorq	%rcx, %rcx
         movq	$-1, %rdx
         movq	$-1, %rax
@@ -6585,7 +6691,9 @@ sp_2048_cmp_16:
         cmovnz	%rcx, %rdx
         xorq	%rdx, %rax
         repz retq
+#ifndef __APPLE__
 .size	sp_2048_cmp_16,.-sp_2048_cmp_16
+#endif /* __APPLE__ */
 #ifdef HAVE_INTEL_AVX2
 /* Reduce the number back to 2048 bits using Montgomery reduction.
  *
@@ -6593,10 +6701,16 @@ sp_2048_cmp_16:
  * m   The single precision number representing the modulus.
  * mp  The digit representing the negative inverse of m mod 2^n.
  */
+#ifndef __APPLE__
 .globl	sp_2048_mont_reduce_avx2_16
 .type	sp_2048_mont_reduce_avx2_16,@function
 .align	16
 sp_2048_mont_reduce_avx2_16:
+#else
+.globl	_sp_2048_mont_reduce_avx2_16
+.p2align	4
+_sp_2048_mont_reduce_avx2_16:
+#endif /* __APPLE__ */
         push	%r12
         push	%r13
         push	%r14
@@ -6721,12 +6835,18 @@ L_mont_loop_avx2_16:
         movq	%rdi, %rsi
         movq	%rdi, %rdi
         subq	$128, %rdi
+#ifndef __APPLE__
         callq	sp_2048_cond_sub_16@plt
+#else
+        callq	_sp_2048_cond_sub_16
+#endif /* __APPLE__ */
         pop	%r14
         pop	%r13
         pop	%r12
         repz retq
+#ifndef __APPLE__
 .size	sp_2048_mont_reduce_avx2_16,.-sp_2048_mont_reduce_avx2_16
+#endif /* __APPLE__ */
 #endif /* HAVE_INTEL_AVX2 */
 #ifdef HAVE_INTEL_AVX2
 #endif /* HAVE_INTEL_AVX2 */
@@ -6745,10 +6865,16 @@ L_mont_loop_avx2_16:
  * b  A single precision number to subtract.
  * m  Mask value to apply.
  */
+#ifndef __APPLE__
 .globl	sp_2048_cond_sub_32
 .type	sp_2048_cond_sub_32,@function
 .align	16
 sp_2048_cond_sub_32:
+#else
+.globl	_sp_2048_cond_sub_32
+.p2align	4
+_sp_2048_cond_sub_32:
+#endif /* __APPLE__ */
         subq	$256, %rsp
         movq	$0, %rax
         movq	(%rdx), %r8
@@ -6978,17 +7104,25 @@ sp_2048_cond_sub_32:
         sbbq	$0, %rax
         addq	$256, %rsp
         repz retq
+#ifndef __APPLE__
 .size	sp_2048_cond_sub_32,.-sp_2048_cond_sub_32
+#endif /* __APPLE__ */
 /* Reduce the number back to 2048 bits using Montgomery reduction.
  *
  * a   A single precision number to reduce in place.
  * m   The single precision number representing the modulus.
  * mp  The digit representing the negative inverse of m mod 2^n.
  */
+#ifndef __APPLE__
 .globl	sp_2048_mont_reduce_32
 .type	sp_2048_mont_reduce_32,@function
 .align	16
 sp_2048_mont_reduce_32:
+#else
+.globl	_sp_2048_mont_reduce_32
+.p2align	4
+_sp_2048_mont_reduce_32:
+#endif /* __APPLE__ */
         push	%r12
         push	%r13
         push	%r14
@@ -7331,23 +7465,35 @@ L_mont_loop_32:
         movq	%rdi, %rsi
         movq	%rdi, %rdi
         subq	$256, %rdi
+#ifndef __APPLE__
         callq	sp_2048_cond_sub_32@plt
+#else
+        callq	_sp_2048_cond_sub_32
+#endif /* __APPLE__ */
         pop	%r15
         pop	%r14
         pop	%r13
         pop	%r12
         repz retq
+#ifndef __APPLE__
 .size	sp_2048_mont_reduce_32,.-sp_2048_mont_reduce_32
+#endif /* __APPLE__ */
 /* Mul a by digit b into r. (r = a * b)
  *
  * r  A single precision integer.
  * a  A single precision integer.
  * b  A single precision digit.
  */
+#ifndef __APPLE__
 .globl	sp_2048_mul_d_32
 .type	sp_2048_mul_d_32,@function
 .align	16
 sp_2048_mul_d_32:
+#else
+.globl	_sp_2048_mul_d_32
+.p2align	4
+_sp_2048_mul_d_32:
+#endif /* __APPLE__ */
         movq	%rdx, %rcx
         # A[0] * B
         movq	%rcx, %rax
@@ -7604,7 +7750,9 @@ sp_2048_mul_d_32:
         movq	%r9, 248(%rdi)
         movq	%r10, 256(%rdi)
         repz retq
+#ifndef __APPLE__
 .size	sp_2048_mul_d_32,.-sp_2048_mul_d_32
+#endif /* __APPLE__ */
 #ifdef HAVE_INTEL_AVX2
 /* Mul a by digit b into r. (r = a * b)
  *
@@ -7612,10 +7760,16 @@ sp_2048_mul_d_32:
  * a  A single precision integer.
  * b  A single precision digit.
  */
+#ifndef __APPLE__
 .globl	sp_2048_mul_d_avx2_32
 .type	sp_2048_mul_d_avx2_32,@function
 .align	16
 sp_2048_mul_d_avx2_32:
+#else
+.globl	_sp_2048_mul_d_avx2_32
+.p2align	4
+_sp_2048_mul_d_avx2_32:
+#endif /* __APPLE__ */
         movq	%rdx, %rax
         # A[0] * B
         movq	%rax, %rdx
@@ -7811,7 +7965,9 @@ sp_2048_mul_d_avx2_32:
         movq	%r10, 248(%rdi)
         movq	%r9, 256(%rdi)
         repz retq
+#ifndef __APPLE__
 .size	sp_2048_mul_d_avx2_32,.-sp_2048_mul_d_avx2_32
+#endif /* __APPLE__ */
 #endif /* HAVE_INTEL_AVX2 */
 /* Compare a with b in constant time.
  *
@@ -7820,10 +7976,16 @@ sp_2048_mul_d_avx2_32:
  * return -ve, 0 or +ve if a is less than, equal to or greater than b
  * respectively.
  */
+#ifndef __APPLE__
 .globl	sp_2048_cmp_32
 .type	sp_2048_cmp_32,@function
 .align	16
 sp_2048_cmp_32:
+#else
+.globl	_sp_2048_cmp_32
+.p2align	4
+_sp_2048_cmp_32:
+#endif /* __APPLE__ */
         xorq	%rcx, %rcx
         movq	$-1, %rdx
         movq	$-1, %rax
@@ -8086,7 +8248,9 @@ sp_2048_cmp_32:
         cmovnz	%rcx, %rdx
         xorq	%rdx, %rax
         repz retq
+#ifndef __APPLE__
 .size	sp_2048_cmp_32,.-sp_2048_cmp_32
+#endif /* __APPLE__ */
 #if defined(SP_RSA_PRIVATE_EXP_D) || defined(WOLFSSL_HAVE_SP_DH)
 #endif /* SP_RSA_PRIVATE_EXP_D || WOLFSSL_HAVE_SP_DH */
 #ifdef HAVE_INTEL_AVX2
@@ -8096,10 +8260,16 @@ sp_2048_cmp_32:
  * m   The single precision number representing the modulus.
  * mp  The digit representing the negative inverse of m mod 2^n.
  */
+#ifndef __APPLE__
 .globl	sp_2048_mont_reduce_avx2_32
 .type	sp_2048_mont_reduce_avx2_32,@function
 .align	16
 sp_2048_mont_reduce_avx2_32:
+#else
+.globl	_sp_2048_mont_reduce_avx2_32
+.p2align	4
+_sp_2048_mont_reduce_avx2_32:
+#endif /* __APPLE__ */
         push	%r12
         push	%r13
         push	%r14
@@ -8320,12 +8490,18 @@ L_mont_loop_avx2_32:
         movq	%rdi, %rsi
         movq	%rdi, %rdi
         subq	$256, %rdi
+#ifndef __APPLE__
         callq	sp_2048_cond_sub_32@plt
+#else
+        callq	_sp_2048_cond_sub_32
+#endif /* __APPLE__ */
         pop	%r14
         pop	%r13
         pop	%r12
         repz retq
+#ifndef __APPLE__
 .size	sp_2048_mont_reduce_avx2_32,.-sp_2048_mont_reduce_avx2_32
+#endif /* __APPLE__ */
 #endif /* HAVE_INTEL_AVX2 */
 #ifdef HAVE_INTEL_AVX2
 #endif /* HAVE_INTEL_AVX2 */
@@ -8345,10 +8521,16 @@ L_mont_loop_avx2_32:
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef __APPLE__
 .globl	sp_3072_mul_24
 .type	sp_3072_mul_24,@function
 .align	16
 sp_3072_mul_24:
+#else
+.globl	_sp_3072_mul_24
+.p2align	4
+_sp_3072_mul_24:
+#endif /* __APPLE__ */
         movq	%rdx, %rcx
         subq	$192, %rsp
         # A[0] * B[0]
@@ -11948,16 +12130,24 @@ sp_3072_mul_24:
         movq	%r9, 184(%rdi)
         addq	$192, %rsp
         repz retq
+#ifndef __APPLE__
 .size	sp_3072_mul_24,.-sp_3072_mul_24
+#endif /* __APPLE__ */
 /* Square a and put result in r. (r = a * a)
  *
  * r  A single precision integer.
  * a  A single precision integer.
  */
+#ifndef __APPLE__
 .globl	sp_3072_sqr_24
 .type	sp_3072_sqr_24,@function
 .align	16
 sp_3072_sqr_24:
+#else
+.globl	_sp_3072_sqr_24
+.p2align	4
+_sp_3072_sqr_24:
+#endif /* __APPLE__ */
         push	%r12
         subq	$192, %rsp
         # A[0] * A[0]
@@ -14160,7 +14350,9 @@ sp_3072_sqr_24:
         addq	$192, %rsp
         pop	%r12
         repz retq
+#ifndef __APPLE__
 .size	sp_3072_sqr_24,.-sp_3072_sqr_24
+#endif /* __APPLE__ */
 #ifdef HAVE_INTEL_AVX2
 /* Multiply a and b into r. (r = a * b)
  *
@@ -14168,10 +14360,16 @@ sp_3072_sqr_24:
  * a   First number to multiply.
  * b   Second number to multiply.
  */
+#ifndef __APPLE__
 .globl	sp_3072_mul_avx2_24
 .type	sp_3072_mul_avx2_24,@function
 .align	16
 sp_3072_mul_avx2_24:
+#else
+.globl	_sp_3072_mul_avx2_24
+.p2align	4
+_sp_3072_mul_avx2_24:
+#endif /* __APPLE__ */
         push	%rbp
         push	%r12
         push	%r13
@@ -17807,7 +18005,9 @@ sp_3072_mul_avx2_24:
         pop	%r12
         pop	%rbp
         repz retq
+#ifndef __APPLE__
 .size	sp_3072_mul_avx2_24,.-sp_3072_mul_avx2_24
+#endif /* __APPLE__ */
 #endif /* HAVE_INTEL_AVX2 */
 #ifdef HAVE_INTEL_AVX2
 /* Square a and put result in r. (r = a * a)
@@ -17815,10 +18015,16 @@ sp_3072_mul_avx2_24:
  * r  A single precision integer.
  * a  A single precision integer.
  */
+#ifndef __APPLE__
 .globl	sp_3072_sqr_avx2_24
 .type	sp_3072_sqr_avx2_24,@function
 .align	16
 sp_3072_sqr_avx2_24:
+#else
+.globl	_sp_3072_sqr_avx2_24
+.p2align	4
+_sp_3072_sqr_avx2_24:
+#endif /* __APPLE__ */
         push	%rbp
         push	%r12
         push	%r13
@@ -19923,7 +20129,9 @@ L_end_3072_sqr_avx2_24:
         pop	%r12
         pop	%rbp
         repz retq
+#ifndef __APPLE__
 .size	sp_3072_sqr_avx2_24,.-sp_3072_sqr_avx2_24
+#endif /* __APPLE__ */
 #endif /* HAVE_INTEL_AVX2 */
 /* Add b to a into r. (r = a + b)
  *
@@ -19931,10 +20139,16 @@ L_end_3072_sqr_avx2_24:
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef __APPLE__
 .globl	sp_3072_add_24
 .type	sp_3072_add_24,@function
 .align	16
 sp_3072_add_24:
+#else
+.globl	_sp_3072_add_24
+.p2align	4
+_sp_3072_add_24:
+#endif /* __APPLE__ */
         xorq	%rax, %rax
         movq	(%rsi), %rcx
         addq	(%rdx), %rcx
@@ -20010,16 +20224,24 @@ sp_3072_add_24:
         movq	%rcx, 184(%rdi)
         adcq	$0, %rax
         repz retq
+#ifndef __APPLE__
 .size	sp_3072_add_24,.-sp_3072_add_24
+#endif /* __APPLE__ */
 /* Sub b from a into a. (a -= b)
  *
  * a  A single precision integer and result.
  * b  A single precision integer.
  */
+#ifndef __APPLE__
 .globl	sp_3072_sub_in_place_48
 .type	sp_3072_sub_in_place_48,@function
 .align	16
 sp_3072_sub_in_place_48:
+#else
+.globl	_sp_3072_sub_in_place_48
+.p2align	4
+_sp_3072_sub_in_place_48:
+#endif /* __APPLE__ */
         xorq	%rax, %rax
         movq	(%rdi), %rdx
         movq	8(%rdi), %rcx
@@ -20215,17 +20437,25 @@ sp_3072_sub_in_place_48:
         movq	%rcx, 376(%rdi)
         sbbq	$0, %rax
         repz retq
+#ifndef __APPLE__
 .size	sp_3072_sub_in_place_48,.-sp_3072_sub_in_place_48
+#endif /* __APPLE__ */
 /* Add b to a into r. (r = a + b)
  *
  * r  A single precision integer.
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef __APPLE__
 .globl	sp_3072_add_48
 .type	sp_3072_add_48,@function
 .align	16
 sp_3072_add_48:
+#else
+.globl	_sp_3072_add_48
+.p2align	4
+_sp_3072_add_48:
+#endif /* __APPLE__ */
         xorq	%rax, %rax
         movq	(%rsi), %rcx
         addq	(%rdx), %rcx
@@ -20373,7 +20603,9 @@ sp_3072_add_48:
         movq	%rcx, 376(%rdi)
         adcq	$0, %rax
         repz retq
+#ifndef __APPLE__
 .size	sp_3072_add_48,.-sp_3072_add_48
+#endif /* __APPLE__ */
 #ifdef HAVE_INTEL_AVX2
 #endif /* HAVE_INTEL_AVX2 */
 #ifdef HAVE_INTEL_AVX2
@@ -20386,10 +20618,16 @@ sp_3072_add_48:
  * a  A single precision integer and result.
  * b  A single precision integer.
  */
+#ifndef __APPLE__
 .globl	sp_3072_sub_in_place_24
 .type	sp_3072_sub_in_place_24,@function
 .align	16
 sp_3072_sub_in_place_24:
+#else
+.globl	_sp_3072_sub_in_place_24
+.p2align	4
+_sp_3072_sub_in_place_24:
+#endif /* __APPLE__ */
         xorq	%rax, %rax
         movq	(%rdi), %rdx
         movq	8(%rdi), %rcx
@@ -20489,7 +20727,9 @@ sp_3072_sub_in_place_24:
         movq	%rcx, 184(%rdi)
         sbbq	$0, %rax
         repz retq
+#ifndef __APPLE__
 .size	sp_3072_sub_in_place_24,.-sp_3072_sub_in_place_24
+#endif /* __APPLE__ */
 /* Conditionally subtract b from a using the mask m.
  * m is -1 to subtract and 0 when not copying.
  *
@@ -20498,10 +20738,16 @@ sp_3072_sub_in_place_24:
  * b  A single precision number to subtract.
  * m  Mask value to apply.
  */
+#ifndef __APPLE__
 .globl	sp_3072_cond_sub_24
 .type	sp_3072_cond_sub_24,@function
 .align	16
 sp_3072_cond_sub_24:
+#else
+.globl	_sp_3072_cond_sub_24
+.p2align	4
+_sp_3072_cond_sub_24:
+#endif /* __APPLE__ */
         subq	$192, %rsp
         movq	$0, %rax
         movq	(%rdx), %r8
@@ -20675,17 +20921,25 @@ sp_3072_cond_sub_24:
         sbbq	$0, %rax
         addq	$192, %rsp
         repz retq
+#ifndef __APPLE__
 .size	sp_3072_cond_sub_24,.-sp_3072_cond_sub_24
+#endif /* __APPLE__ */
 /* Reduce the number back to 3072 bits using Montgomery reduction.
  *
  * a   A single precision number to reduce in place.
  * m   The single precision number representing the modulus.
  * mp  The digit representing the negative inverse of m mod 2^n.
  */
+#ifndef __APPLE__
 .globl	sp_3072_mont_reduce_24
 .type	sp_3072_mont_reduce_24,@function
 .align	16
 sp_3072_mont_reduce_24:
+#else
+.globl	_sp_3072_mont_reduce_24
+.p2align	4
+_sp_3072_mont_reduce_24:
+#endif /* __APPLE__ */
         push	%r12
         push	%r13
         push	%r14
@@ -20948,23 +21202,35 @@ L_mont_loop_24:
         movq	%rdi, %rsi
         movq	%rdi, %rdi
         subq	$192, %rdi
+#ifndef __APPLE__
         callq	sp_3072_cond_sub_24@plt
+#else
+        callq	_sp_3072_cond_sub_24
+#endif /* __APPLE__ */
         pop	%r15
         pop	%r14
         pop	%r13
         pop	%r12
         repz retq
+#ifndef __APPLE__
 .size	sp_3072_mont_reduce_24,.-sp_3072_mont_reduce_24
+#endif /* __APPLE__ */
 /* Mul a by digit b into r. (r = a * b)
  *
  * r  A single precision integer.
  * a  A single precision integer.
  * b  A single precision digit.
  */
+#ifndef __APPLE__
 .globl	sp_3072_mul_d_24
 .type	sp_3072_mul_d_24,@function
 .align	16
 sp_3072_mul_d_24:
+#else
+.globl	_sp_3072_mul_d_24
+.p2align	4
+_sp_3072_mul_d_24:
+#endif /* __APPLE__ */
         movq	%rdx, %rcx
         # A[0] * B
         movq	%rcx, %rax
@@ -21157,7 +21423,9 @@ sp_3072_mul_d_24:
         movq	%r10, 184(%rdi)
         movq	%r8, 192(%rdi)
         repz retq
+#ifndef __APPLE__
 .size	sp_3072_mul_d_24,.-sp_3072_mul_d_24
+#endif /* __APPLE__ */
 #ifdef HAVE_INTEL_AVX2
 /* Mul a by digit b into r. (r = a * b)
  *
@@ -21165,10 +21433,16 @@ sp_3072_mul_d_24:
  * a  A single precision integer.
  * b  A single precision digit.
  */
+#ifndef __APPLE__
 .globl	sp_3072_mul_d_avx2_24
 .type	sp_3072_mul_d_avx2_24,@function
 .align	16
 sp_3072_mul_d_avx2_24:
+#else
+.globl	_sp_3072_mul_d_avx2_24
+.p2align	4
+_sp_3072_mul_d_avx2_24:
+#endif /* __APPLE__ */
         movq	%rdx, %rax
         # A[0] * B
         movq	%rax, %rdx
@@ -21316,7 +21590,9 @@ sp_3072_mul_d_avx2_24:
         movq	%r10, 184(%rdi)
         movq	%r9, 192(%rdi)
         repz retq
+#ifndef __APPLE__
 .size	sp_3072_mul_d_avx2_24,.-sp_3072_mul_d_avx2_24
+#endif /* __APPLE__ */
 #endif /* HAVE_INTEL_AVX2 */
 /* Compare a with b in constant time.
  *
@@ -21325,10 +21601,16 @@ sp_3072_mul_d_avx2_24:
  * return -ve, 0 or +ve if a is less than, equal to or greater than b
  * respectively.
  */
+#ifndef __APPLE__
 .globl	sp_3072_cmp_24
 .type	sp_3072_cmp_24,@function
 .align	16
 sp_3072_cmp_24:
+#else
+.globl	_sp_3072_cmp_24
+.p2align	4
+_sp_3072_cmp_24:
+#endif /* __APPLE__ */
         xorq	%rcx, %rcx
         movq	$-1, %rdx
         movq	$-1, %rax
@@ -21527,7 +21809,9 @@ sp_3072_cmp_24:
         cmovnz	%rcx, %rdx
         xorq	%rdx, %rax
         repz retq
+#ifndef __APPLE__
 .size	sp_3072_cmp_24,.-sp_3072_cmp_24
+#endif /* __APPLE__ */
 #ifdef HAVE_INTEL_AVX2
 /* Reduce the number back to 3072 bits using Montgomery reduction.
  *
@@ -21535,10 +21819,16 @@ sp_3072_cmp_24:
  * m   The single precision number representing the modulus.
  * mp  The digit representing the negative inverse of m mod 2^n.
  */
+#ifndef __APPLE__
 .globl	sp_3072_mont_reduce_avx2_24
 .type	sp_3072_mont_reduce_avx2_24,@function
 .align	16
 sp_3072_mont_reduce_avx2_24:
+#else
+.globl	_sp_3072_mont_reduce_avx2_24
+.p2align	4
+_sp_3072_mont_reduce_avx2_24:
+#endif /* __APPLE__ */
         push	%r12
         push	%r13
         push	%r14
@@ -21711,12 +22001,18 @@ L_mont_loop_avx2_24:
         movq	%rdi, %rsi
         movq	%rdi, %rdi
         subq	$192, %rdi
+#ifndef __APPLE__
         callq	sp_3072_cond_sub_24@plt
+#else
+        callq	_sp_3072_cond_sub_24
+#endif /* __APPLE__ */
         pop	%r14
         pop	%r13
         pop	%r12
         repz retq
+#ifndef __APPLE__
 .size	sp_3072_mont_reduce_avx2_24,.-sp_3072_mont_reduce_avx2_24
+#endif /* __APPLE__ */
 #endif /* HAVE_INTEL_AVX2 */
 #ifdef HAVE_INTEL_AVX2
 #endif /* HAVE_INTEL_AVX2 */
@@ -21735,10 +22031,16 @@ L_mont_loop_avx2_24:
  * b  A single precision number to subtract.
  * m  Mask value to apply.
  */
+#ifndef __APPLE__
 .globl	sp_3072_cond_sub_48
 .type	sp_3072_cond_sub_48,@function
 .align	16
 sp_3072_cond_sub_48:
+#else
+.globl	_sp_3072_cond_sub_48
+.p2align	4
+_sp_3072_cond_sub_48:
+#endif /* __APPLE__ */
         subq	$384, %rsp
         movq	$0, %rax
         movq	(%rdx), %r8
@@ -22080,17 +22382,25 @@ sp_3072_cond_sub_48:
         sbbq	$0, %rax
         addq	$384, %rsp
         repz retq
+#ifndef __APPLE__
 .size	sp_3072_cond_sub_48,.-sp_3072_cond_sub_48
+#endif /* __APPLE__ */
 /* Reduce the number back to 3072 bits using Montgomery reduction.
  *
  * a   A single precision number to reduce in place.
  * m   The single precision number representing the modulus.
  * mp  The digit representing the negative inverse of m mod 2^n.
  */
+#ifndef __APPLE__
 .globl	sp_3072_mont_reduce_48
 .type	sp_3072_mont_reduce_48,@function
 .align	16
 sp_3072_mont_reduce_48:
+#else
+.globl	_sp_3072_mont_reduce_48
+.p2align	4
+_sp_3072_mont_reduce_48:
+#endif /* __APPLE__ */
         push	%r12
         push	%r13
         push	%r14
@@ -22593,23 +22903,35 @@ L_mont_loop_48:
         movq	%rdi, %rsi
         movq	%rdi, %rdi
         subq	$384, %rdi
+#ifndef __APPLE__
         callq	sp_3072_cond_sub_48@plt
+#else
+        callq	_sp_3072_cond_sub_48
+#endif /* __APPLE__ */
         pop	%r15
         pop	%r14
         pop	%r13
         pop	%r12
         repz retq
+#ifndef __APPLE__
 .size	sp_3072_mont_reduce_48,.-sp_3072_mont_reduce_48
+#endif /* __APPLE__ */
 /* Mul a by digit b into r. (r = a * b)
  *
  * r  A single precision integer.
  * a  A single precision integer.
  * b  A single precision digit.
  */
+#ifndef __APPLE__
 .globl	sp_3072_mul_d_48
 .type	sp_3072_mul_d_48,@function
 .align	16
 sp_3072_mul_d_48:
+#else
+.globl	_sp_3072_mul_d_48
+.p2align	4
+_sp_3072_mul_d_48:
+#endif /* __APPLE__ */
         movq	%rdx, %rcx
         # A[0] * B
         movq	%rcx, %rax
@@ -22994,7 +23316,9 @@ sp_3072_mul_d_48:
         movq	%r10, 376(%rdi)
         movq	%r8, 384(%rdi)
         repz retq
+#ifndef __APPLE__
 .size	sp_3072_mul_d_48,.-sp_3072_mul_d_48
+#endif /* __APPLE__ */
 #ifdef HAVE_INTEL_AVX2
 /* Mul a by digit b into r. (r = a * b)
  *
@@ -23002,10 +23326,16 @@ sp_3072_mul_d_48:
  * a  A single precision integer.
  * b  A single precision digit.
  */
+#ifndef __APPLE__
 .globl	sp_3072_mul_d_avx2_48
 .type	sp_3072_mul_d_avx2_48,@function
 .align	16
 sp_3072_mul_d_avx2_48:
+#else
+.globl	_sp_3072_mul_d_avx2_48
+.p2align	4
+_sp_3072_mul_d_avx2_48:
+#endif /* __APPLE__ */
         movq	%rdx, %rax
         # A[0] * B
         movq	%rax, %rdx
@@ -23297,7 +23627,9 @@ sp_3072_mul_d_avx2_48:
         movq	%r10, 376(%rdi)
         movq	%r9, 384(%rdi)
         repz retq
+#ifndef __APPLE__
 .size	sp_3072_mul_d_avx2_48,.-sp_3072_mul_d_avx2_48
+#endif /* __APPLE__ */
 #endif /* HAVE_INTEL_AVX2 */
 /* Compare a with b in constant time.
  *
@@ -23306,10 +23638,16 @@ sp_3072_mul_d_avx2_48:
  * return -ve, 0 or +ve if a is less than, equal to or greater than b
  * respectively.
  */
+#ifndef __APPLE__
 .globl	sp_3072_cmp_48
 .type	sp_3072_cmp_48,@function
 .align	16
 sp_3072_cmp_48:
+#else
+.globl	_sp_3072_cmp_48
+.p2align	4
+_sp_3072_cmp_48:
+#endif /* __APPLE__ */
         xorq	%rcx, %rcx
         movq	$-1, %rdx
         movq	$-1, %rax
@@ -23700,7 +24038,9 @@ sp_3072_cmp_48:
         cmovnz	%rcx, %rdx
         xorq	%rdx, %rax
         repz retq
+#ifndef __APPLE__
 .size	sp_3072_cmp_48,.-sp_3072_cmp_48
+#endif /* __APPLE__ */
 #if defined(SP_RSA_PRIVATE_EXP_D) || defined(WOLFSSL_HAVE_SP_DH)
 #endif /* SP_RSA_PRIVATE_EXP_D || WOLFSSL_HAVE_SP_DH */
 #ifdef HAVE_INTEL_AVX2
@@ -23710,10 +24050,16 @@ sp_3072_cmp_48:
  * m   The single precision number representing the modulus.
  * mp  The digit representing the negative inverse of m mod 2^n.
  */
+#ifndef __APPLE__
 .globl	sp_3072_mont_reduce_avx2_48
 .type	sp_3072_mont_reduce_avx2_48,@function
 .align	16
 sp_3072_mont_reduce_avx2_48:
+#else
+.globl	_sp_3072_mont_reduce_avx2_48
+.p2align	4
+_sp_3072_mont_reduce_avx2_48:
+#endif /* __APPLE__ */
         push	%r12
         push	%r13
         push	%r14
@@ -24030,12 +24376,18 @@ L_mont_loop_avx2_48:
         movq	%rdi, %rsi
         movq	%rdi, %rdi
         subq	$384, %rdi
+#ifndef __APPLE__
         callq	sp_3072_cond_sub_48@plt
+#else
+        callq	_sp_3072_cond_sub_48
+#endif /* __APPLE__ */
         pop	%r14
         pop	%r13
         pop	%r12
         repz retq
+#ifndef __APPLE__
 .size	sp_3072_mont_reduce_avx2_48,.-sp_3072_mont_reduce_avx2_48
+#endif /* __APPLE__ */
 #endif /* HAVE_INTEL_AVX2 */
 #ifdef HAVE_INTEL_AVX2
 #endif /* HAVE_INTEL_AVX2 */
@@ -24058,10 +24410,16 @@ L_mont_loop_avx2_48:
  * a  A single precision number to copy.
  * m  Mask value to apply.
  */
+#ifndef __APPLE__
 .globl	sp_256_cond_copy_4
 .type	sp_256_cond_copy_4,@function
 .align	16
 sp_256_cond_copy_4:
+#else
+.globl	_sp_256_cond_copy_4
+.p2align	4
+_sp_256_cond_copy_4:
+#endif /* __APPLE__ */
         movq	(%rdi), %rax
         movq	8(%rdi), %rcx
         movq	16(%rdi), %r8
@@ -24079,7 +24437,9 @@ sp_256_cond_copy_4:
         xorq	%r8, 16(%rdi)
         xorq	%r9, 24(%rdi)
         repz retq
+#ifndef __APPLE__
 .size	sp_256_cond_copy_4,.-sp_256_cond_copy_4
+#endif /* __APPLE__ */
 /* Compare a with b in constant time.
  *
  * a  A single precision integer.
@@ -24087,10 +24447,16 @@ sp_256_cond_copy_4:
  * return -ve, 0 or +ve if a is less than, equal to or greater than b
  * respectively.
  */
+#ifndef __APPLE__
 .globl	sp_256_cmp_4
 .type	sp_256_cmp_4,@function
 .align	16
 sp_256_cmp_4:
+#else
+.globl	_sp_256_cmp_4
+.p2align	4
+_sp_256_cmp_4:
+#endif /* __APPLE__ */
         xorq	%rcx, %rcx
         movq	$-1, %rdx
         movq	$-1, %rax
@@ -24129,7 +24495,9 @@ sp_256_cmp_4:
         cmovnz	%rcx, %rdx
         xorq	%rdx, %rax
         repz retq
+#ifndef __APPLE__
 .size	sp_256_cmp_4,.-sp_256_cmp_4
+#endif /* __APPLE__ */
 /* Conditionally subtract b from a using the mask m.
  * m is -1 to subtract and 0 when not copying.
  *
@@ -24138,10 +24506,16 @@ sp_256_cmp_4:
  * b  A single precision number to subtract.
  * m  Mask value to apply.
  */
+#ifndef __APPLE__
 .globl	sp_256_cond_sub_4
 .type	sp_256_cond_sub_4,@function
 .align	16
 sp_256_cond_sub_4:
+#else
+.globl	_sp_256_cond_sub_4
+.p2align	4
+_sp_256_cond_sub_4:
+#endif /* __APPLE__ */
         push	%r12
         push	%r13
         push	%r14
@@ -24173,17 +24547,25 @@ sp_256_cond_sub_4:
         pop	%r13
         pop	%r12
         repz retq
+#ifndef __APPLE__
 .size	sp_256_cond_sub_4,.-sp_256_cond_sub_4
+#endif /* __APPLE__ */
 /* Sub b from a into r. (r = a - b)
  *
  * r  A single precision integer.
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef __APPLE__
 .globl	sp_256_sub_4
 .type	sp_256_sub_4,@function
 .align	16
 sp_256_sub_4:
+#else
+.globl	_sp_256_sub_4
+.p2align	4
+_sp_256_sub_4:
+#endif /* __APPLE__ */
         push	%r12
         push	%r13
         push	%r14
@@ -24211,17 +24593,25 @@ sp_256_sub_4:
         pop	%r13
         pop	%r12
         repz retq
+#ifndef __APPLE__
 .size	sp_256_sub_4,.-sp_256_sub_4
+#endif /* __APPLE__ */
 /* Reduce the number back to 256 bits using Montgomery reduction.
  *
  * a   A single precision number to reduce in place.
  * m   The single precision number representing the modulus.
  * mp  The digit representing the negative inverse of m mod 2^n.
  */
+#ifndef __APPLE__
 .globl	sp_256_mont_reduce_4
 .type	sp_256_mont_reduce_4,@function
 .align	16
 sp_256_mont_reduce_4:
+#else
+.globl	_sp_256_mont_reduce_4
+.p2align	4
+_sp_256_mont_reduce_4:
+#endif /* __APPLE__ */
         push	%r12
         push	%r13
         push	%r14
@@ -24309,7 +24699,9 @@ L_mont_loop_4:
         pop	%r13
         pop	%r12
         repz retq
+#ifndef __APPLE__
 .size	sp_256_mont_reduce_4,.-sp_256_mont_reduce_4
+#endif /* __APPLE__ */
 /* Multiply two Montogmery form numbers mod the modulus (prime).
  * (r = a * b mod m)
  *
@@ -24319,10 +24711,16 @@ L_mont_loop_4:
  * m   Modulus (prime).
  * mp  Montogmery mulitplier.
  */
+#ifndef __APPLE__
 .globl	sp_256_mont_mul_4
 .type	sp_256_mont_mul_4,@function
 .align	16
 sp_256_mont_mul_4:
+#else
+.globl	_sp_256_mont_mul_4
+.p2align	4
+_sp_256_mont_mul_4:
+#endif /* __APPLE__ */
         push	%r12
         push	%r13
         push	%r14
@@ -24504,7 +24902,9 @@ sp_256_mont_mul_4:
         pop	%r13
         pop	%r12
         repz retq
+#ifndef __APPLE__
 .size	sp_256_mont_mul_4,.-sp_256_mont_mul_4
+#endif /* __APPLE__ */
 /* Square the Montgomery form number mod the modulus (prime). (r = a * a mod m)
  *
  * r   Result of squaring.
@@ -24512,10 +24912,16 @@ sp_256_mont_mul_4:
  * m   Modulus (prime).
  * mp  Montogmery mulitplier.
  */
+#ifndef __APPLE__
 .globl	sp_256_mont_sqr_4
 .type	sp_256_mont_sqr_4,@function
 .align	16
 sp_256_mont_sqr_4:
+#else
+.globl	_sp_256_mont_sqr_4
+.p2align	4
+_sp_256_mont_sqr_4:
+#endif /* __APPLE__ */
         push	%r12
         push	%r13
         push	%r14
@@ -24675,7 +25081,9 @@ sp_256_mont_sqr_4:
         pop	%r13
         pop	%r12
         repz retq
+#ifndef __APPLE__
 .size	sp_256_mont_sqr_4,.-sp_256_mont_sqr_4
+#endif /* __APPLE__ */
 /* Add two Montgomery form numbers (r = a + b % m).
  *
  * r   Result of addition.
@@ -24683,10 +25091,16 @@ sp_256_mont_sqr_4:
  * b   Second number to add in Montogmery form.
  * m   Modulus (prime).
  */
+#ifndef __APPLE__
 .globl	sp_256_mont_add_4
 .type	sp_256_mont_add_4,@function
 .align	16
 sp_256_mont_add_4:
+#else
+.globl	_sp_256_mont_add_4
+.p2align	4
+_sp_256_mont_add_4:
+#endif /* __APPLE__ */
         movq	(%rsi), %rax
         movq	8(%rsi), %rcx
         movq	16(%rsi), %r8
@@ -24710,17 +25124,25 @@ sp_256_mont_add_4:
         movq	%r8, 16(%rdi)
         movq	%r9, 24(%rdi)
         repz retq
+#ifndef __APPLE__
 .size	sp_256_mont_add_4,.-sp_256_mont_add_4
+#endif /* __APPLE__ */
 /* Double a Montgomery form number (r = a + a % m).
  *
  * r   Result of doubling.
  * a   Number to double in Montogmery form.
  * m   Modulus (prime).
  */
+#ifndef __APPLE__
 .globl	sp_256_mont_dbl_4
 .type	sp_256_mont_dbl_4,@function
 .align	16
 sp_256_mont_dbl_4:
+#else
+.globl	_sp_256_mont_dbl_4
+.p2align	4
+_sp_256_mont_dbl_4:
+#endif /* __APPLE__ */
         movq	(%rsi), %rdx
         movq	8(%rsi), %rax
         movq	16(%rsi), %rcx
@@ -24744,17 +25166,25 @@ sp_256_mont_dbl_4:
         movq	%rcx, 16(%rdi)
         movq	%r8, 24(%rdi)
         repz retq
+#ifndef __APPLE__
 .size	sp_256_mont_dbl_4,.-sp_256_mont_dbl_4
+#endif /* __APPLE__ */
 /* Triple a Montgomery form number (r = a + a + a % m).
  *
  * r   Result of Tripling.
  * a   Number to triple in Montogmery form.
  * m   Modulus (prime).
  */
+#ifndef __APPLE__
 .globl	sp_256_mont_tpl_4
 .type	sp_256_mont_tpl_4,@function
 .align	16
 sp_256_mont_tpl_4:
+#else
+.globl	_sp_256_mont_tpl_4
+.p2align	4
+_sp_256_mont_tpl_4:
+#endif /* __APPLE__ */
         movq	(%rsi), %rdx
         movq	8(%rsi), %rax
         movq	16(%rsi), %rcx
@@ -24792,7 +25222,9 @@ sp_256_mont_tpl_4:
         movq	%rcx, 16(%rdi)
         movq	%r8, 24(%rdi)
         repz retq
+#ifndef __APPLE__
 .size	sp_256_mont_tpl_4,.-sp_256_mont_tpl_4
+#endif /* __APPLE__ */
 /* Subtract two Montgomery form numbers (r = a - b % m).
  *
  * r   Result of subtration.
@@ -24800,10 +25232,16 @@ sp_256_mont_tpl_4:
  * b   Number to subtract with in Montogmery form.
  * m   Modulus (prime).
  */
+#ifndef __APPLE__
 .globl	sp_256_mont_sub_4
 .type	sp_256_mont_sub_4,@function
 .align	16
 sp_256_mont_sub_4:
+#else
+.globl	_sp_256_mont_sub_4
+.p2align	4
+_sp_256_mont_sub_4:
+#endif /* __APPLE__ */
         movq	(%rsi), %rax
         movq	8(%rsi), %rcx
         movq	16(%rsi), %r8
@@ -24827,17 +25265,25 @@ sp_256_mont_sub_4:
         movq	%r8, 16(%rdi)
         movq	%r9, 24(%rdi)
         repz retq
+#ifndef __APPLE__
 .size	sp_256_mont_sub_4,.-sp_256_mont_sub_4
+#endif /* __APPLE__ */
 /* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m)
  *
  * r  Result of division by 2.
  * a  Number to divide.
  * m  Modulus (prime).
  */
+#ifndef __APPLE__
 .globl	sp_256_div2_4
 .type	sp_256_div2_4,@function
 .align	16
 sp_256_div2_4:
+#else
+.globl	_sp_256_div2_4
+.p2align	4
+_sp_256_div2_4:
+#endif /* __APPLE__ */
         movq	(%rsi), %rdx
         movq	8(%rsi), %rax
         movq	16(%rsi), %rcx
@@ -24864,7 +25310,9 @@ sp_256_div2_4:
         movq	%rcx, 16(%rdi)
         movq	%r8, 24(%rdi)
         repz retq
+#ifndef __APPLE__
 .size	sp_256_div2_4,.-sp_256_div2_4
+#endif /* __APPLE__ */
 #ifdef HAVE_INTEL_AVX2
 /* Multiply two Montogmery form numbers mod the modulus (prime).
  * (r = a * b mod m)
@@ -24875,10 +25323,16 @@ sp_256_div2_4:
  * m   Modulus (prime).
  * mp  Montogmery mulitplier.
  */
+#ifndef __APPLE__
 .globl	sp_256_mont_mul_avx2_4
 .type	sp_256_mont_mul_avx2_4,@function
 .align	16
 sp_256_mont_mul_avx2_4:
+#else
+.globl	_sp_256_mont_mul_avx2_4
+.p2align	4
+_sp_256_mont_mul_avx2_4:
+#endif /* __APPLE__ */
         push	%rbx
         push	%rbp
         push	%r12
@@ -25040,7 +25494,9 @@ sp_256_mont_mul_avx2_4:
         pop	%rbp
         pop	%rbx
         repz retq
+#ifndef __APPLE__
 .size	sp_256_mont_mul_avx2_4,.-sp_256_mont_mul_avx2_4
+#endif /* __APPLE__ */
 /* Square the Montgomery form number mod the modulus (prime). (r = a * a mod m)
  *
  * r   Result of squaring.
@@ -25048,10 +25504,16 @@ sp_256_mont_mul_avx2_4:
  * m   Modulus (prime).
  * mp  Montogmery mulitplier.
  */
+#ifndef __APPLE__
 .globl	sp_256_mont_sqr_avx2_4
 .type	sp_256_mont_sqr_avx2_4,@function
 .align	16
 sp_256_mont_sqr_avx2_4:
+#else
+.globl	_sp_256_mont_sqr_avx2_4
+.p2align	4
+_sp_256_mont_sqr_avx2_4:
+#endif /* __APPLE__ */
         push	%r12
         push	%r13
         push	%r14
@@ -25186,7 +25648,9 @@ sp_256_mont_sqr_avx2_4:
         pop	%r13
         pop	%r12
         repz retq
+#ifndef __APPLE__
 .size	sp_256_mont_sqr_avx2_4,.-sp_256_mont_sqr_avx2_4
+#endif /* __APPLE__ */
 #endif /* HAVE_INTEL_AVX2 */
 #ifdef HAVE_INTEL_AVX2
 #endif /* HAVE_INTEL_AVX2 */
@@ -25201,26 +25665,40 @@ sp_256_mont_sqr_avx2_4:
  *
  * a  A single precision integer.
  */
+#ifndef __APPLE__
 .globl	sp_256_add_one_4
 .type	sp_256_add_one_4,@function
 .align	16
 sp_256_add_one_4:
+#else
+.globl	_sp_256_add_one_4
+.p2align	4
+_sp_256_add_one_4:
+#endif /* __APPLE__ */
         addq	$1, (%rdi)
         adcq	$0, 8(%rdi)
         adcq	$0, 16(%rdi)
         adcq	$0, 24(%rdi)
         repz retq
+#ifndef __APPLE__
 .size	sp_256_add_one_4,.-sp_256_add_one_4
+#endif /* __APPLE__ */
 /* Add b to a into r. (r = a + b)
  *
  * r  A single precision integer.
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef __APPLE__
 .globl	sp_256_add_4
 .type	sp_256_add_4,@function
 .align	16
 sp_256_add_4:
+#else
+.globl	_sp_256_add_4
+.p2align	4
+_sp_256_add_4:
+#endif /* __APPLE__ */
         xorq	%rax, %rax
         movq	(%rsi), %rcx
         addq	(%rdx), %rcx
@@ -25236,17 +25714,25 @@ sp_256_add_4:
         movq	%rcx, 24(%rdi)
         adcq	$0, %rax
         repz retq
+#ifndef __APPLE__
 .size	sp_256_add_4,.-sp_256_add_4
+#endif /* __APPLE__ */
 /* Multiply a and b into r. (r = a * b)
  *
  * r  A single precision integer.
  * a  A single precision integer.
  * b  A single precision integer.
  */
+#ifndef __APPLE__
 .globl	sp_256_mul_4
 .type	sp_256_mul_4,@function
 .align	16
 sp_256_mul_4:
+#else
+.globl	_sp_256_mul_4
+.p2align	4
+_sp_256_mul_4:
+#endif /* __APPLE__ */
         movq	%rdx, %rcx
         subq	$32, %rsp
         # A[0] * B[0]
@@ -25366,17 +25852,25 @@ sp_256_mul_4:
         movq	%r9, 24(%rdi)
         addq	$32, %rsp
         repz retq
+#ifndef __APPLE__
 .size	sp_256_mul_4,.-sp_256_mul_4
+#endif /* __APPLE__ */
 /* Multiply a and b into r. (r = a * b)
  *
  * r   Result of multiplication.
  * a   First number to multiply.
  * b   Second number to multiply.
  */
+#ifndef __APPLE__
 .globl	sp_256_mul_avx2_4
 .type	sp_256_mul_avx2_4,@function
 .align	16
 sp_256_mul_avx2_4:
+#else
+.globl	_sp_256_mul_avx2_4
+.p2align	4
+_sp_256_mul_avx2_4:
+#endif /* __APPLE__ */
         push	%r12
         push	%r13
         push	%r14
@@ -25474,16 +25968,24 @@ sp_256_mul_avx2_4:
         pop	%r13
         pop	%r12
         repz retq
+#ifndef __APPLE__
 .size	sp_256_mul_avx2_4,.-sp_256_mul_avx2_4
+#endif /* __APPLE__ */
 /* Sub b from a into a. (a -= b)
  *
  * a  A single precision integer and result.
  * b  A single precision integer.
  */
+#ifndef __APPLE__
 .globl	sp_256_sub_in_place_4
 .type	sp_256_sub_in_place_4,@function
 .align	16
 sp_256_sub_in_place_4:
+#else
+.globl	_sp_256_sub_in_place_4
+.p2align	4
+_sp_256_sub_in_place_4:
+#endif /* __APPLE__ */
         xorq	%rax, %rax
         movq	(%rsi), %rdx
         movq	8(%rsi), %rcx
@@ -25495,17 +25997,25 @@ sp_256_sub_in_place_4:
         sbbq	%r9, 24(%rdi)
         sbbq	$0, %rax
         repz retq
+#ifndef __APPLE__
 .size	sp_256_sub_in_place_4,.-sp_256_sub_in_place_4
+#endif /* __APPLE__ */
 /* Mul a by digit b into r. (r = a * b)
  *
  * r  A single precision integer.
  * a  A single precision integer.
  * b  A single precision digit.
  */
+#ifndef __APPLE__
 .globl	sp_256_mul_d_4
 .type	sp_256_mul_d_4,@function
 .align	16
 sp_256_mul_d_4:
+#else
+.globl	_sp_256_mul_d_4
+.p2align	4
+_sp_256_mul_d_4:
+#endif /* __APPLE__ */
         movq	%rdx, %rcx
         # A[0] * B
         movq	%rcx, %rax
@@ -25538,7 +26048,9 @@ sp_256_mul_d_4:
         movq	%r8, 24(%rdi)
         movq	%r9, 32(%rdi)
         repz retq
+#ifndef __APPLE__
 .size	sp_256_mul_d_4,.-sp_256_mul_d_4
+#endif /* __APPLE__ */
 #ifdef HAVE_INTEL_AVX2
 /* Mul a by digit b into r. (r = a * b)
  *
@@ -25546,10 +26058,16 @@ sp_256_mul_d_4:
  * a  A single precision integer.
  * b  A single precision digit.
  */
+#ifndef __APPLE__
 .globl	sp_256_mul_d_avx2_4
 .type	sp_256_mul_d_avx2_4,@function
 .align	16
 sp_256_mul_d_avx2_4:
+#else
+.globl	_sp_256_mul_d_avx2_4
+.p2align	4
+_sp_256_mul_d_avx2_4:
+#endif /* __APPLE__ */
         movq	%rdx, %rax
         # A[0] * B
         movq	%rax, %rdx
@@ -25577,17 +26095,25 @@ sp_256_mul_d_avx2_4:
         movq	%r10, 24(%rdi)
         movq	%r9, 32(%rdi)
         repz retq
+#ifndef __APPLE__
 .size	sp_256_mul_d_avx2_4,.-sp_256_mul_d_avx2_4
+#endif /* __APPLE__ */
 #endif /* HAVE_INTEL_AVX2 */
 /* Square a and put result in r. (r = a * a)
  *
  * r  A single precision integer.
  * a  A single precision integer.
  */
+#ifndef __APPLE__
 .globl	sp_256_sqr_4
 .type	sp_256_sqr_4,@function
 .align	16
 sp_256_sqr_4:
+#else
+.globl	_sp_256_sqr_4
+.p2align	4
+_sp_256_sqr_4:
+#endif /* __APPLE__ */
         push	%r12
         subq	$32, %rsp
         # A[0] * A[0]
@@ -25690,17 +26216,25 @@ sp_256_sqr_4:
         addq	$32, %rsp
         pop	%r12
         repz retq
+#ifndef __APPLE__
 .size	sp_256_sqr_4,.-sp_256_sqr_4
+#endif /* __APPLE__ */
  
 /* Square a and put result in r. (r = a * a)
  *
  * r   Result of squaring.
  * a   Number to square in Montogmery form.
  */
+#ifndef __APPLE__
 .globl	sp_256_sqr_avx2_4
 .type	sp_256_sqr_avx2_4,@function
 .align	16
 sp_256_sqr_avx2_4:
+#else
+.globl	_sp_256_sqr_avx2_4
+.p2align	4
+_sp_256_sqr_avx2_4:
+#endif /* __APPLE__ */
         push	%rbx
         push	%r12
         push	%r13
@@ -25773,7 +26307,9 @@ sp_256_sqr_avx2_4:
         pop	%r12
         pop	%rbx
         repz retq
+#ifndef __APPLE__
 .size	sp_256_sqr_avx2_4,.-sp_256_sqr_avx2_4
+#endif /* __APPLE__ */
 #ifdef HAVE_INTEL_AVX2
 /* Reduce the number back to 256 bits using Montgomery reduction.
  *
@@ -25781,10 +26317,16 @@ sp_256_sqr_avx2_4:
  * m   The single precision number representing the modulus.
  * mp  The digit representing the negative inverse of m mod 2^n.
  */
+#ifndef __APPLE__
 .globl	sp_256_mont_reduce_avx2_4
 .type	sp_256_mont_reduce_avx2_4,@function
 .align	16
 sp_256_mont_reduce_avx2_4:
+#else
+.globl	_sp_256_mont_reduce_avx2_4
+.p2align	4
+_sp_256_mont_reduce_avx2_4:
+#endif /* __APPLE__ */
         push	%r12
         push	%r13
         push	%r14
@@ -25927,7 +26469,9 @@ sp_256_mont_reduce_avx2_4:
         pop	%r13
         pop	%r12
         repz retq
+#ifndef __APPLE__
 .size	sp_256_mont_reduce_avx2_4,.-sp_256_mont_reduce_avx2_4
+#endif /* __APPLE__ */
 #endif /* HAVE_INTEL_AVX2 */
 #endif /* !WOLFSSL_SP_NO_256 */
 #endif /* WOLFSSL_HAVE_SP_ECC */