diff --git a/wolfcrypt/src/aes.c b/wolfcrypt/src/aes.c index e865a9ad3..9ed8cebd9 100644 --- a/wolfcrypt/src/aes.c +++ b/wolfcrypt/src/aes.c @@ -608,10 +608,12 @@ #define AESNI_ALIGN 16 #endif - #ifndef _MSC_VER - #define XASM_LINK(f) asm(f) - #else + #ifdef _MSC_VER #define XASM_LINK(f) + #elif defined(__APPLE__) + #define XASM_LINK(f) asm("_" f) + #else + #define XASM_LINK(f) asm(f) #endif /* _MSC_VER */ static int checkAESNI = 0; diff --git a/wolfcrypt/src/aes_asm.S b/wolfcrypt/src/aes_asm.S index cff57db9d..6fff275b4 100644 --- a/wolfcrypt/src/aes_asm.S +++ b/wolfcrypt/src/aes_asm.S @@ -36,8 +36,13 @@ AES_CBC_encrypt (const unsigned char *in, const unsigned char *KS, int nr) */ +#ifndef __APPLE__ .globl AES_CBC_encrypt AES_CBC_encrypt: +#else +.globl _AES_CBC_encrypt +_AES_CBC_encrypt: +#endif # parameter 1: %rdi # parameter 2: %rsi # parameter 3: %rdx @@ -96,8 +101,13 @@ AES_CBC_decrypt_by4 (const unsigned char *in, const unsigned char *KS, int nr) */ +#ifndef __APPLE__ .globl AES_CBC_decrypt_by4 AES_CBC_decrypt_by4: +#else +.globl _AES_CBC_decrypt_by4 +_AES_CBC_decrypt_by4: +#endif # parameter 1: %rdi # parameter 2: %rsi # parameter 3: %rdx @@ -272,8 +282,13 @@ AES_CBC_decrypt_by6 (const unsigned char *in, const unsigned char *KS, int nr) */ +#ifndef __APPLE__ .globl AES_CBC_decrypt_by6 AES_CBC_decrypt_by6: +#else +.globl _AES_CBC_decrypt_by6 +_AES_CBC_decrypt_by6: +#endif # parameter 1: %rdi - in # parameter 2: %rsi - out # parameter 3: %rdx - ivec @@ -495,8 +510,13 @@ AES_CBC_decrypt_by8 (const unsigned char *in, const unsigned char *KS, int nr) */ +#ifndef __APPLE__ .globl AES_CBC_decrypt_by8 AES_CBC_decrypt_by8: +#else +.globl _AES_CBC_decrypt_by8 +_AES_CBC_decrypt_by8: +#endif # parameter 1: %rdi - in # parameter 2: %rsi - out # parameter 3: %rdx - ivec @@ -746,8 +766,13 @@ AES_ECB_encrypt (const unsigned char *in, const unsigned char *KS, int nr) */ +#ifndef __APPLE__ .globl AES_ECB_encrypt AES_ECB_encrypt: +#else +.globl _AES_ECB_encrypt +_AES_ECB_encrypt: +#endif # parameter 1: %rdi # parameter 2: %rsi # parameter 3: %rdx @@ -905,8 +930,13 @@ AES_ECB_decrypt (const unsigned char *in, const unsigned char *KS, int nr) */ +#ifndef __APPLE__ .globl AES_ECB_decrypt AES_ECB_decrypt: +#else +.globl _AES_ECB_decrypt +_AES_ECB_decrypt: +#endif # parameter 1: %rdi # parameter 2: %rsi # parameter 3: %rdx @@ -1065,8 +1095,13 @@ void AES_128_Key_Expansion(const unsigned char* userkey, unsigned char* key_schedule); */ .align 16,0x90 +#ifndef __APPLE__ .globl AES_128_Key_Expansion AES_128_Key_Expansion: +#else +.globl _AES_128_Key_Expansion +_AES_128_Key_Expansion: +#endif # parameter 1: %rdi # parameter 2: %rsi movl $10, 240(%rsi) @@ -1125,8 +1160,13 @@ ret void AES_192_Key_Expansion (const unsigned char *userkey, unsigned char *key) */ +#ifndef __APPLE__ .globl AES_192_Key_Expansion AES_192_Key_Expansion: +#else +.globl _AES_192_Key_Expansion +_AES_192_Key_Expansion: +#endif # parameter 1: %rdi # parameter 2: %rsi @@ -1211,8 +1251,13 @@ ret void AES_256_Key_Expansion (const unsigned char *userkey, unsigned char *key) */ +#ifndef __APPLE__ .globl AES_256_Key_Expansion AES_256_Key_Expansion: +#else +.globl _AES_256_Key_Expansion +_AES_256_Key_Expansion: +#endif # parameter 1: %rdi # parameter 2: %rsi diff --git a/wolfcrypt/src/aes_gcm_asm.S b/wolfcrypt/src/aes_gcm_asm.S index f6f9f3274..19e3f7db8 100644 --- a/wolfcrypt/src/aes_gcm_asm.S +++ b/wolfcrypt/src/aes_gcm_asm.S @@ -26,43 +26,150 @@ #define HAVE_INTEL_AVX2 #endif /* HAVE_INTEL_AVX2 */ -.align 16 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_aes_gcm_one: .quad 0x0, 0x1 -.align 16 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_aes_gcm_two: .quad 0x0, 0x2 -.align 16 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_aes_gcm_three: .quad 0x0, 0x3 -.align 16 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_aes_gcm_four: .quad 0x0, 0x4 -.align 16 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_aes_gcm_five: .quad 0x0, 0x5 -.align 16 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_aes_gcm_six: .quad 0x0, 0x6 -.align 16 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_aes_gcm_seven: .quad 0x0, 0x7 -.align 16 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_aes_gcm_eight: .quad 0x0, 0x8 -.align 16 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_aes_gcm_bswap_epi64: .quad 0x1020304050607, 0x8090a0b0c0d0e0f -.align 16 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_aes_gcm_bswap_mask: .quad 0x8090a0b0c0d0e0f, 0x1020304050607 -.align 16 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_aes_gcm_mod2_128: .quad 0x1, 0xc200000000000000 +#ifndef __APPLE__ +.text .globl AES_GCM_encrypt .type AES_GCM_encrypt,@function .align 4 AES_GCM_encrypt: +#else +.section __TEXT,__text +.globl _AES_GCM_encrypt +.p2align 2 +_AES_GCM_encrypt: +#endif /* __APPLE__ */ pushq %r13 pushq %r12 pushq %rbx @@ -70,11 +177,11 @@ AES_GCM_encrypt: pushq %r15 movq %rdx, %r12 movq %rcx, %rax - movq 48(%rsp), %r11 - movq 56(%rsp), %rbx - movq 64(%rsp), %r14 + movl 48(%rsp), %r11d + movl 56(%rsp), %ebx + movl 64(%rsp), %r14d movq 72(%rsp), %r15 - movq 80(%rsp), %r10 + movl 80(%rsp), %r10d subq $0xa0, %rsp pxor %xmm4, %xmm4 pxor %xmm6, %xmm6 @@ -1902,11 +2009,21 @@ L_AES_GCM_encrypt_store_tag_done: popq %r12 popq %r13 repz retq +#ifndef __APPLE__ .size AES_GCM_encrypt,.-AES_GCM_encrypt +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text .globl AES_GCM_decrypt .type AES_GCM_decrypt,@function .align 4 AES_GCM_decrypt: +#else +.section __TEXT,__text +.globl _AES_GCM_decrypt +.p2align 2 +_AES_GCM_decrypt: +#endif /* __APPLE__ */ pushq %r13 pushq %r12 pushq %rbx @@ -1915,11 +2032,11 @@ AES_GCM_decrypt: pushq %rbp movq %rdx, %r12 movq %rcx, %rax - movq 56(%rsp), %r11 - movq 64(%rsp), %rbx - movq 72(%rsp), %r14 + movl 56(%rsp), %r11d + movl 64(%rsp), %ebx + movl 72(%rsp), %r14d movq 80(%rsp), %r15 - movq 88(%rsp), %r10 + movl 88(%rsp), %r10d movq 96(%rsp), %rbp subq $0xa8, %rsp pxor %xmm4, %xmm4 @@ -3298,45 +3415,154 @@ L_AES_GCM_decrypt_cmp_tag_done: popq %r12 popq %r13 repz retq +#ifndef __APPLE__ .size AES_GCM_decrypt,.-AES_GCM_decrypt +#endif /* __APPLE__ */ #ifdef HAVE_INTEL_AVX1 -.align 16 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_avx1_aes_gcm_one: .quad 0x0, 0x1 -.align 16 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_avx1_aes_gcm_two: .quad 0x0, 0x2 -.align 16 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_avx1_aes_gcm_three: .quad 0x0, 0x3 -.align 16 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_avx1_aes_gcm_four: .quad 0x0, 0x4 -.align 16 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_avx1_aes_gcm_five: .quad 0x0, 0x5 -.align 16 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_avx1_aes_gcm_six: .quad 0x0, 0x6 -.align 16 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_avx1_aes_gcm_seven: .quad 0x0, 0x7 -.align 16 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_avx1_aes_gcm_eight: .quad 0x0, 0x8 -.align 16 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_avx1_aes_gcm_bswap_epi64: .quad 0x1020304050607, 0x8090a0b0c0d0e0f -.align 16 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_avx1_aes_gcm_bswap_mask: .quad 0x8090a0b0c0d0e0f, 0x1020304050607 -.align 16 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_avx1_aes_gcm_mod2_128: .quad 0x1, 0xc200000000000000 +#ifndef __APPLE__ +.text .globl AES_GCM_encrypt_avx1 .type AES_GCM_encrypt_avx1,@function .align 4 AES_GCM_encrypt_avx1: +#else +.section __TEXT,__text +.globl _AES_GCM_encrypt_avx1 +.p2align 2 +_AES_GCM_encrypt_avx1: +#endif /* __APPLE__ */ pushq %r13 pushq %r12 pushq %rbx @@ -3344,11 +3570,11 @@ AES_GCM_encrypt_avx1: pushq %r15 movq %rdx, %r12 movq %rcx, %rax - movq 48(%rsp), %r11 - movq 56(%rsp), %rbx - movq 64(%rsp), %r14 + movl 48(%rsp), %r11d + movl 56(%rsp), %ebx + movl 64(%rsp), %r14d movq 72(%rsp), %r15 - movq 80(%rsp), %r10 + movl 80(%rsp), %r10d subq $0xa0, %rsp vpxor %xmm4, %xmm4, %xmm4 vpxor %xmm6, %xmm6, %xmm6 @@ -4902,11 +5128,21 @@ L_AES_GCM_encrypt_avx1_store_tag_done: popq %r12 popq %r13 repz retq +#ifndef __APPLE__ .size AES_GCM_encrypt_avx1,.-AES_GCM_encrypt_avx1 +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text .globl AES_GCM_decrypt_avx1 .type AES_GCM_decrypt_avx1,@function .align 4 AES_GCM_decrypt_avx1: +#else +.section __TEXT,__text +.globl _AES_GCM_decrypt_avx1 +.p2align 2 +_AES_GCM_decrypt_avx1: +#endif /* __APPLE__ */ pushq %r13 pushq %r12 pushq %rbx @@ -4915,11 +5151,11 @@ AES_GCM_decrypt_avx1: pushq %rbp movq %rdx, %r12 movq %rcx, %rax - movq 56(%rsp), %r11 - movq 64(%rsp), %rbx - movq 72(%rsp), %r14 + movl 56(%rsp), %r11d + movl 64(%rsp), %ebx + movl 72(%rsp), %r14d movq 80(%rsp), %r15 - movq 88(%rsp), %r10 + movl 88(%rsp), %r10d movq 96(%rsp), %rbp subq $0xa8, %rsp vpxor %xmm4, %xmm4, %xmm4 @@ -6066,49 +6302,167 @@ L_AES_GCM_decrypt_avx1_cmp_tag_done: popq %r12 popq %r13 repz retq +#ifndef __APPLE__ .size AES_GCM_decrypt_avx1,.-AES_GCM_decrypt_avx1 +#endif /* __APPLE__ */ #endif /* HAVE_INTEL_AVX1 */ #ifdef HAVE_INTEL_AVX2 -.align 16 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_avx2_aes_gcm_one: .quad 0x0, 0x1 -.align 16 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_avx2_aes_gcm_two: .quad 0x0, 0x2 -.align 16 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_avx2_aes_gcm_three: .quad 0x0, 0x3 -.align 16 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_avx2_aes_gcm_four: .quad 0x0, 0x4 -.align 16 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_avx2_aes_gcm_five: .quad 0x0, 0x5 -.align 16 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_avx2_aes_gcm_six: .quad 0x0, 0x6 -.align 16 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_avx2_aes_gcm_seven: .quad 0x0, 0x7 -.align 16 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_avx2_aes_gcm_eight: .quad 0x0, 0x8 -.align 16 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_avx2_aes_gcm_bswap_one: .quad 0x0, 0x100000000000000 -.align 16 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_avx2_aes_gcm_bswap_epi64: .quad 0x1020304050607, 0x8090a0b0c0d0e0f -.align 16 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_avx2_aes_gcm_bswap_mask: .quad 0x8090a0b0c0d0e0f, 0x1020304050607 -.align 16 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_avx2_aes_gcm_mod2_128: .quad 0x1, 0xc200000000000000 +#ifndef __APPLE__ +.text .globl AES_GCM_encrypt_avx2 .type AES_GCM_encrypt_avx2,@function .align 4 AES_GCM_encrypt_avx2: +#else +.section __TEXT,__text +.globl _AES_GCM_encrypt_avx2 +.p2align 2 +_AES_GCM_encrypt_avx2: +#endif /* __APPLE__ */ pushq %r13 pushq %r12 pushq %r15 @@ -6118,12 +6472,12 @@ AES_GCM_encrypt_avx2: movq %rcx, %rax movq %r8, %r15 movq %rsi, %r8 - movq %r9, %r10 - movq 48(%rsp), %r11 - movq 56(%rsp), %rbx - movq 64(%rsp), %r14 + movl %r9d, %r10d + movl 48(%rsp), %r11d + movl 56(%rsp), %ebx + movl 64(%rsp), %r14d movq 72(%rsp), %rsi - movq 80(%rsp), %r9 + movl 80(%rsp), %r9d subq $0xa0, %rsp vpxor %xmm4, %xmm4, %xmm4 vpxor %xmm6, %xmm6, %xmm6 @@ -7402,11 +7756,21 @@ L_AES_GCM_encrypt_avx2_store_tag_done: popq %r12 popq %r13 repz retq +#ifndef __APPLE__ .size AES_GCM_encrypt_avx2,.-AES_GCM_encrypt_avx2 +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text .globl AES_GCM_decrypt_avx2 .type AES_GCM_decrypt_avx2,@function .align 4 AES_GCM_decrypt_avx2: +#else +.section __TEXT,__text +.globl _AES_GCM_decrypt_avx2 +.p2align 2 +_AES_GCM_decrypt_avx2: +#endif /* __APPLE__ */ pushq %r13 pushq %r12 pushq %r14 @@ -7417,12 +7781,12 @@ AES_GCM_decrypt_avx2: movq %rcx, %rax movq %r8, %r14 movq %rsi, %r8 - movq %r9, %r10 - movq 56(%rsp), %r11 - movq 64(%rsp), %rbx - movq 72(%rsp), %r15 + movl %r9d, %r10d + movl 56(%rsp), %r11d + movl 64(%rsp), %ebx + movl 72(%rsp), %r15d movq 80(%rsp), %rsi - movq 88(%rsp), %r9 + movl 88(%rsp), %r9d movq 96(%rsp), %rbp subq $0xa8, %rsp vpxor %xmm4, %xmm4, %xmm4 @@ -8363,5 +8727,7 @@ L_AES_GCM_decrypt_avx2_cmp_tag_done: popq %r12 popq %r13 repz retq +#ifndef __APPLE__ .size AES_GCM_decrypt_avx2,.-AES_GCM_decrypt_avx2 +#endif /* __APPLE__ */ #endif /* HAVE_INTEL_AVX2 */ diff --git a/wolfcrypt/src/chacha_asm.S b/wolfcrypt/src/chacha_asm.S index 4be9cdb0f..9800ce6c3 100644 --- a/wolfcrypt/src/chacha_asm.S +++ b/wolfcrypt/src/chacha_asm.S @@ -26,10 +26,18 @@ #define HAVE_INTEL_AVX2 #endif /* HAVE_INTEL_AVX2 */ +#ifndef __APPLE__ +.text .globl chacha_encrypt_x64 .type chacha_encrypt_x64,@function .align 4 chacha_encrypt_x64: +#else +.section __TEXT,__text +.globl _chacha_encrypt_x64 +.p2align 2 +_chacha_encrypt_x64: +#endif /* __APPLE__ */ pushq %rbx pushq %rbp pushq %r12 @@ -440,24 +448,70 @@ L_chacha_x64_done: popq %rbp popq %rbx repz retq +#ifndef __APPLE__ .size chacha_encrypt_x64,.-chacha_encrypt_x64 +#endif /* __APPLE__ */ #ifdef HAVE_INTEL_AVX1 -.align 16 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_chacha20_avx1_rotl8: .quad 0x605040702010003, 0xe0d0c0f0a09080b -.align 16 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_chacha20_avx1_rotl16: .quad 0x504070601000302, 0xd0c0f0e09080b0a -.align 16 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_chacha20_avx1_add: .quad 0x100000000, 0x300000002 -.align 16 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_chacha20_avx1_four: .quad 0x400000004, 0x400000004 +#ifndef __APPLE__ +.text .globl chacha_encrypt_avx1 .type chacha_encrypt_avx1,@function .align 4 chacha_encrypt_avx1: +#else +.section __TEXT,__text +.globl _chacha_encrypt_avx1 +.p2align 2 +_chacha_encrypt_avx1: +#endif /* __APPLE__ */ subq $0x190, %rsp movq %rsp, %r9 leaq 256(%rsp), %r10 @@ -943,29 +997,75 @@ L_chacha20_avx1_partial_end64: L_chacha20_avx1_partial_done: addq $0x190, %rsp repz retq +#ifndef __APPLE__ .size chacha_encrypt_avx1,.-chacha_encrypt_avx1 +#endif /* __APPLE__ */ #endif /* HAVE_INTEL_AVX1 */ #ifdef HAVE_INTEL_AVX2 -.align 32 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 32 +#else +.p2align 5 +#endif /* __APPLE__ */ L_chacha20_avx2_rotl8: .quad 0x605040702010003, 0xe0d0c0f0a09080b .quad 0x605040702010003, 0xe0d0c0f0a09080b -.align 32 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 32 +#else +.p2align 5 +#endif /* __APPLE__ */ L_chacha20_avx2_rotl16: .quad 0x504070601000302, 0xd0c0f0e09080b0a .quad 0x504070601000302, 0xd0c0f0e09080b0a -.align 32 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 32 +#else +.p2align 5 +#endif /* __APPLE__ */ L_chacha20_avx2_add: .quad 0x100000000, 0x300000002 .quad 0x500000004, 0x700000006 -.align 32 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 32 +#else +.p2align 5 +#endif /* __APPLE__ */ L_chacha20_avx2_eight: .quad 0x800000008, 0x800000008 .quad 0x800000008, 0x800000008 +#ifndef __APPLE__ +.text .globl chacha_encrypt_avx2 .type chacha_encrypt_avx2,@function .align 4 chacha_encrypt_avx2: +#else +.section __TEXT,__text +.globl _chacha_encrypt_avx2 +.p2align 2 +_chacha_encrypt_avx2: +#endif /* __APPLE__ */ subq $0x310, %rsp movq %rsp, %r9 leaq 512(%rsp), %r10 @@ -1307,8 +1407,14 @@ L_chacha20_avx2_done256: shl $3, %eax addl %eax, 48(%rdi) L_chacha20_avx2_end256: +#ifndef __APPLE__ callq chacha_encrypt_avx1@plt +#else + callq _chacha_encrypt_avx1 +#endif /* __APPLE__ */ addq $0x310, %rsp repz retq +#ifndef __APPLE__ .size chacha_encrypt_avx2,.-chacha_encrypt_avx2 +#endif /* __APPLE__ */ #endif /* HAVE_INTEL_AVX2 */ diff --git a/wolfcrypt/src/poly1305_asm.S b/wolfcrypt/src/poly1305_asm.S index ef7fab433..dd7a48224 100644 --- a/wolfcrypt/src/poly1305_asm.S +++ b/wolfcrypt/src/poly1305_asm.S @@ -27,10 +27,18 @@ #endif /* HAVE_INTEL_AVX2 */ #ifdef HAVE_INTEL_AVX1 +#ifndef __APPLE__ +.text .globl poly1305_setkey_avx .type poly1305_setkey_avx,@function .align 4 poly1305_setkey_avx: +#else +.section __TEXT,__text +.globl _poly1305_setkey_avx +.p2align 2 +_poly1305_setkey_avx: +#endif /* __APPLE__ */ movabsq $0xffffffc0fffffff, %r10 movabsq $0xffffffc0ffffffc, %r11 movq (%rsi), %rdx @@ -76,11 +84,21 @@ poly1305_setkey_avx: movq %r9, 608(%rdi) movb $0x01, 616(%rdi) repz retq +#ifndef __APPLE__ .size poly1305_setkey_avx,.-poly1305_setkey_avx +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text .globl poly1305_block_avx .type poly1305_block_avx,@function .align 4 poly1305_block_avx: +#else +.section __TEXT,__text +.globl _poly1305_block_avx +.p2align 2 +_poly1305_block_avx: +#endif /* __APPLE__ */ pushq %r15 pushq %rbx pushq %r12 @@ -150,11 +168,21 @@ poly1305_block_avx: popq %rbx popq %r15 repz retq +#ifndef __APPLE__ .size poly1305_block_avx,.-poly1305_block_avx +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text .globl poly1305_blocks_avx .type poly1305_blocks_avx,@function .align 4 poly1305_blocks_avx: +#else +.section __TEXT,__text +.globl _poly1305_blocks_avx +.p2align 2 +_poly1305_blocks_avx: +#endif /* __APPLE__ */ pushq %r15 pushq %rbx pushq %r12 @@ -228,11 +256,21 @@ L_poly1305_avx_blocks_start: popq %rbx popq %r15 repz retq +#ifndef __APPLE__ .size poly1305_blocks_avx,.-poly1305_blocks_avx +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text .globl poly1305_final_avx .type poly1305_final_avx,@function .align 4 poly1305_final_avx: +#else +.section __TEXT,__text +.globl _poly1305_final_avx +.p2align 2 +_poly1305_final_avx: +#endif /* __APPLE__ */ pushq %rbx pushq %r12 movq %rsi, %rbx @@ -249,7 +287,11 @@ L_poly1305_avx_final_cmp_rem: jl L_poly1305_avx_final_zero_rem movb $0x00, 616(%rdi) leaq 480(%rdi), %rsi +#ifndef __APPLE__ callq poly1305_block_avx@plt +#else + callq _poly1305_block_avx +#endif /* __APPLE__ */ L_poly1305_avx_final_no_more: movq 24(%rdi), %rax movq 32(%rdi), %rdx @@ -295,13 +337,23 @@ L_poly1305_avx_final_no_more: popq %r12 popq %rbx repz retq +#ifndef __APPLE__ .size poly1305_final_avx,.-poly1305_final_avx +#endif /* __APPLE__ */ #endif /* HAVE_INTEL_AVX1 */ #ifdef HAVE_INTEL_AVX2 +#ifndef __APPLE__ +.text .globl poly1305_calc_powers_avx2 .type poly1305_calc_powers_avx2,@function .align 4 poly1305_calc_powers_avx2: +#else +.section __TEXT,__text +.globl _poly1305_calc_powers_avx2 +.p2align 2 +_poly1305_calc_powers_avx2: +#endif /* __APPLE__ */ pushq %r12 pushq %r13 pushq %r14 @@ -554,12 +606,26 @@ poly1305_calc_powers_avx2: popq %r13 popq %r12 repz retq +#ifndef __APPLE__ .size poly1305_calc_powers_avx2,.-poly1305_calc_powers_avx2 +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text .globl poly1305_setkey_avx2 .type poly1305_setkey_avx2,@function .align 4 poly1305_setkey_avx2: +#else +.section __TEXT,__text +.globl _poly1305_setkey_avx2 +.p2align 2 +_poly1305_setkey_avx2: +#endif /* __APPLE__ */ +#ifndef __APPLE__ callq poly1305_setkey_avx@plt +#else + callq _poly1305_setkey_avx +#endif /* __APPLE__ */ vpxor %ymm0, %ymm0, %ymm0 vmovdqu %ymm0, 64(%rdi) vmovdqu %ymm0, 96(%rdi) @@ -569,19 +635,47 @@ poly1305_setkey_avx2: movq $0x00, 608(%rdi) movw $0x00, 616(%rdi) repz retq +#ifndef __APPLE__ .size poly1305_setkey_avx2,.-poly1305_setkey_avx2 -.align 32 +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 32 +#else +.p2align 5 +#endif /* __APPLE__ */ L_poly1305_avx2_blocks_mask: .quad 0x3ffffff, 0x3ffffff .quad 0x3ffffff, 0x3ffffff -.align 32 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 32 +#else +.p2align 5 +#endif /* __APPLE__ */ L_poly1305_avx2_blocks_hibit: .quad 0x1000000, 0x1000000 .quad 0x1000000, 0x1000000 +#ifndef __APPLE__ +.text .globl poly1305_blocks_avx2 .type poly1305_blocks_avx2,@function .align 4 poly1305_blocks_avx2: +#else +.section __TEXT,__text +.globl _poly1305_blocks_avx2 +.p2align 2 +_poly1305_blocks_avx2: +#endif /* __APPLE__ */ pushq %r12 pushq %rbx subq $0x140, %rsp @@ -927,11 +1021,21 @@ L_poly1305_avx2_blocks_complete: popq %rbx popq %r12 repz retq +#ifndef __APPLE__ .size poly1305_blocks_avx2,.-poly1305_blocks_avx2 +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text .globl poly1305_final_avx2 .type poly1305_final_avx2,@function .align 4 poly1305_final_avx2: +#else +.section __TEXT,__text +.globl _poly1305_final_avx2 +.p2align 2 +_poly1305_final_avx2: +#endif /* __APPLE__ */ movb $0x01, 616(%rdi) movb 617(%rdi), %cl cmpb $0x00, %cl @@ -939,7 +1043,11 @@ poly1305_final_avx2: pushq %rsi movq $0x40, %rdx xorq %rsi, %rsi +#ifndef __APPLE__ callq poly1305_blocks_avx2@plt +#else + callq _poly1305_blocks_avx2 +#endif /* __APPLE__ */ popq %rsi L_poly1305_avx2_final_done_blocks_X4: movq 608(%rdi), %rax @@ -952,7 +1060,11 @@ L_poly1305_avx2_final_done_blocks_X4: pushq %rsi movq %rcx, %rdx leaq 480(%rdi), %rsi +#ifndef __APPLE__ callq poly1305_blocks_avx@plt +#else + callq _poly1305_blocks_avx +#endif /* __APPLE__ */ popq %rsi popq %rax popq %rcx @@ -968,7 +1080,11 @@ L_poly1305_avx2_final_start_copy: L_poly1305_avx2_final_cmp_copy: cmp %rcx, %rax jne L_poly1305_avx2_final_start_copy +#ifndef __APPLE__ callq poly1305_final_avx@plt +#else + callq _poly1305_final_avx +#endif /* __APPLE__ */ vpxor %ymm0, %ymm0, %ymm0 vmovdqu %ymm0, 64(%rdi) vmovdqu %ymm0, 96(%rdi) @@ -982,5 +1098,7 @@ L_poly1305_avx2_final_cmp_copy: movq $0x00, 608(%rdi) movw $0x00, 616(%rdi) repz retq +#ifndef __APPLE__ .size poly1305_final_avx2,.-poly1305_final_avx2 +#endif /* __APPLE__ */ #endif /* HAVE_INTEL_AVX2 */ diff --git a/wolfcrypt/src/sha256_asm.S b/wolfcrypt/src/sha256_asm.S index 09db6eb50..df3cbb259 100644 --- a/wolfcrypt/src/sha256_asm.S +++ b/wolfcrypt/src/sha256_asm.S @@ -27,6 +27,11 @@ #endif /* HAVE_INTEL_AVX2 */ #ifdef HAVE_INTEL_AVX1 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ L_avx1_sha256_k: .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 @@ -44,19 +49,54 @@ L_avx1_sha256_k: .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 -.align 16 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_avx1_sha256_shuf_00BA: .quad 0xb0a090803020100, 0xffffffffffffffff -.align 16 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_avx1_sha256_shuf_DC00: .quad 0xffffffffffffffff, 0xb0a090803020100 -.align 16 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_avx1_sha256_flip_mask: .quad 0x405060700010203, 0xc0d0e0f08090a0b +#ifndef __APPLE__ +.text .globl Transform_Sha256_AVX1 .type Transform_Sha256_AVX1,@function .align 4 Transform_Sha256_AVX1: +#else +.section __TEXT,__text +.globl _Transform_Sha256_AVX1 +.p2align 2 +_Transform_Sha256_AVX1: +#endif /* __APPLE__ */ pushq %rbx pushq %r12 pushq %r13 @@ -2432,11 +2472,21 @@ Transform_Sha256_AVX1: popq %r12 popq %rbx repz retq +#ifndef __APPLE__ .size Transform_Sha256_AVX1,.-Transform_Sha256_AVX1 +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text .globl Transform_Sha256_AVX1_Len .type Transform_Sha256_AVX1_Len,@function .align 4 Transform_Sha256_AVX1_Len: +#else +.section __TEXT,__text +.globl _Transform_Sha256_AVX1_Len +.p2align 2 +_Transform_Sha256_AVX1_Len: +#endif /* __APPLE__ */ pushq %rbx pushq %r12 pushq %r13 @@ -4828,7 +4878,14 @@ L_sha256_len_avx1_start: popq %r12 popq %rbx repz retq +#ifndef __APPLE__ .size Transform_Sha256_AVX1_Len,.-Transform_Sha256_AVX1_Len +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ L_avx1_rorx_sha256_k: .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 @@ -4846,19 +4903,54 @@ L_avx1_rorx_sha256_k: .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 -.align 16 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_avx1_rorx_sha256_shuf_00BA: .quad 0xb0a090803020100, 0xffffffffffffffff -.align 16 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_avx1_rorx_sha256_shuf_DC00: .quad 0xffffffffffffffff, 0xb0a090803020100 -.align 16 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_avx1_rorx_sha256_flip_mask: .quad 0x405060700010203, 0xc0d0e0f08090a0b +#ifndef __APPLE__ +.text .globl Transform_Sha256_AVX1_RORX .type Transform_Sha256_AVX1_RORX,@function .align 4 Transform_Sha256_AVX1_RORX: +#else +.section __TEXT,__text +.globl _Transform_Sha256_AVX1_RORX +.p2align 2 +_Transform_Sha256_AVX1_RORX: +#endif /* __APPLE__ */ pushq %rbx pushq %r12 pushq %r13 @@ -7192,11 +7284,21 @@ Transform_Sha256_AVX1_RORX: popq %r12 popq %rbx repz retq +#ifndef __APPLE__ .size Transform_Sha256_AVX1_RORX,.-Transform_Sha256_AVX1_RORX +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text .globl Transform_Sha256_AVX1_RORX_Len .type Transform_Sha256_AVX1_RORX_Len,@function .align 4 Transform_Sha256_AVX1_RORX_Len: +#else +.section __TEXT,__text +.globl _Transform_Sha256_AVX1_RORX_Len +.p2align 2 +_Transform_Sha256_AVX1_RORX_Len: +#endif /* __APPLE__ */ pushq %rbx pushq %r12 pushq %r13 @@ -9547,9 +9649,16 @@ L_sha256_len_avx1_len_rorx_start: popq %r12 popq %rbx repz retq +#ifndef __APPLE__ .size Transform_Sha256_AVX1_RORX_Len,.-Transform_Sha256_AVX1_RORX_Len +#endif /* __APPLE__ */ #endif /* HAVE_INTEL_AVX1 */ #ifdef HAVE_INTEL_AVX2 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ L_avx2_sha256_k: .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 @@ -9583,22 +9692,57 @@ L_avx2_sha256_k: .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 -.align 32 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 32 +#else +.p2align 5 +#endif /* __APPLE__ */ L_avx2_sha256_shuf_00BA: .quad 0xb0a090803020100, 0xffffffffffffffff .quad 0xb0a090803020100, 0xffffffffffffffff -.align 32 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 32 +#else +.p2align 5 +#endif /* __APPLE__ */ L_avx2_sha256_shuf_DC00: .quad 0xffffffffffffffff, 0xb0a090803020100 .quad 0xffffffffffffffff, 0xb0a090803020100 -.align 32 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 32 +#else +.p2align 5 +#endif /* __APPLE__ */ L_avx2_sha256_flip_mask: .quad 0x405060700010203, 0xc0d0e0f08090a0b .quad 0x405060700010203, 0xc0d0e0f08090a0b +#ifndef __APPLE__ +.text .globl Transform_Sha256_AVX2 .type Transform_Sha256_AVX2,@function .align 4 Transform_Sha256_AVX2: +#else +.section __TEXT,__text +.globl _Transform_Sha256_AVX2 +.p2align 2 +_Transform_Sha256_AVX2: +#endif /* __APPLE__ */ pushq %rbx pushq %r12 pushq %r13 @@ -11974,11 +12118,21 @@ Transform_Sha256_AVX2: popq %r12 popq %rbx repz retq +#ifndef __APPLE__ .size Transform_Sha256_AVX2,.-Transform_Sha256_AVX2 +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text .globl Transform_Sha256_AVX2_Len .type Transform_Sha256_AVX2_Len,@function .align 4 Transform_Sha256_AVX2_Len: +#else +.section __TEXT,__text +.globl _Transform_Sha256_AVX2_Len +.p2align 2 +_Transform_Sha256_AVX2_Len: +#endif /* __APPLE__ */ pushq %rbx pushq %r12 pushq %r13 @@ -11994,7 +12148,11 @@ Transform_Sha256_AVX2_Len: vmovdqu 32(%rbp), %ymm1 vmovups %ymm0, 32(%rdi) vmovups %ymm1, 64(%rdi) +#ifndef __APPLE__ call Transform_Sha256_AVX2@plt +#else + call _Transform_Sha256_AVX2 +#endif /* __APPLE__ */ addq $0x40, %rbp subl $0x40, %esi jz L_sha256_len_avx2_done @@ -16089,7 +16247,14 @@ L_sha256_len_avx2_done: popq %r12 popq %rbx repz retq +#ifndef __APPLE__ .size Transform_Sha256_AVX2_Len,.-Transform_Sha256_AVX2_Len +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ L_avx2_rorx_sha256_k: .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 @@ -16123,22 +16288,57 @@ L_avx2_rorx_sha256_k: .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 -.align 32 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 32 +#else +.p2align 5 +#endif /* __APPLE__ */ L_avx2_rorx_sha256_flip_mask: .quad 0x405060700010203, 0xc0d0e0f08090a0b .quad 0x405060700010203, 0xc0d0e0f08090a0b -.align 32 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 32 +#else +.p2align 5 +#endif /* __APPLE__ */ L_avx2_rorx_sha256_shuf_00BA: .quad 0xb0a090803020100, 0xffffffffffffffff .quad 0xb0a090803020100, 0xffffffffffffffff -.align 32 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 32 +#else +.p2align 5 +#endif /* __APPLE__ */ L_avx2_rorx_sha256_shuf_DC00: .quad 0xffffffffffffffff, 0xb0a090803020100 .quad 0xffffffffffffffff, 0xb0a090803020100 +#ifndef __APPLE__ +.text .globl Transform_Sha256_AVX2_RORX .type Transform_Sha256_AVX2_RORX,@function .align 4 Transform_Sha256_AVX2_RORX: +#else +.section __TEXT,__text +.globl _Transform_Sha256_AVX2_RORX +.p2align 2 +_Transform_Sha256_AVX2_RORX: +#endif /* __APPLE__ */ pushq %rbx pushq %r12 pushq %r13 @@ -18489,11 +18689,21 @@ Transform_Sha256_AVX2_RORX: popq %r12 popq %rbx repz retq +#ifndef __APPLE__ .size Transform_Sha256_AVX2_RORX,.-Transform_Sha256_AVX2_RORX +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text .globl Transform_Sha256_AVX2_RORX_Len .type Transform_Sha256_AVX2_RORX_Len,@function .align 4 Transform_Sha256_AVX2_RORX_Len: +#else +.section __TEXT,__text +.globl _Transform_Sha256_AVX2_RORX_Len +.p2align 2 +_Transform_Sha256_AVX2_RORX_Len: +#endif /* __APPLE__ */ pushq %rbx pushq %r12 pushq %r13 @@ -18509,7 +18719,11 @@ Transform_Sha256_AVX2_RORX_Len: vmovdqu 32(%rbp), %ymm1 vmovups %ymm0, 32(%rdi) vmovups %ymm1, 64(%rdi) +#ifndef __APPLE__ call Transform_Sha256_AVX2_RORX@plt +#else + call _Transform_Sha256_AVX2_RORX +#endif /* __APPLE__ */ addq $0x40, %rbp subl $0x40, %esi jz L_sha256_len_avx2_rorx_done @@ -22433,5 +22647,7 @@ L_sha256_len_avx2_rorx_done: popq %r12 popq %rbx repz retq +#ifndef __APPLE__ .size Transform_Sha256_AVX2_RORX_Len,.-Transform_Sha256_AVX2_RORX_Len +#endif /* __APPLE__ */ #endif /* HAVE_INTEL_AVX2 */ diff --git a/wolfcrypt/src/sha512_asm.S b/wolfcrypt/src/sha512_asm.S index dded1c009..96166344c 100644 --- a/wolfcrypt/src/sha512_asm.S +++ b/wolfcrypt/src/sha512_asm.S @@ -27,7 +27,16 @@ #endif /* HAVE_INTEL_AVX2 */ #ifdef HAVE_INTEL_AVX1 -.align 16 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_avx1_sha512_k: .quad 0x428a2f98d728ae22,0x7137449123ef65cd .quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc @@ -69,13 +78,30 @@ L_avx1_sha512_k: .quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c .quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 -.align 16 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_avx1_sha512_flip_mask: .quad 0x1020304050607, 0x8090a0b0c0d0e0f +#ifndef __APPLE__ +.text .globl Transform_Sha512_AVX1 .type Transform_Sha512_AVX1,@function .align 4 Transform_Sha512_AVX1: +#else +.section __TEXT,__text +.globl _Transform_Sha512_AVX1 +.p2align 2 +_Transform_Sha512_AVX1: +#endif /* __APPLE__ */ pushq %rbx pushq %r12 pushq %r13 @@ -1335,11 +1361,21 @@ L_sha256_len_avx1_start: popq %r12 popq %rbx repz retq +#ifndef __APPLE__ .size Transform_Sha512_AVX1,.-Transform_Sha512_AVX1 +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text .globl Transform_Sha512_AVX1_Len .type Transform_Sha512_AVX1_Len,@function .align 4 Transform_Sha512_AVX1_Len: +#else +.section __TEXT,__text +.globl _Transform_Sha512_AVX1_Len +.p2align 2 +_Transform_Sha512_AVX1_Len: +#endif /* __APPLE__ */ pushq %rbx pushq %r12 pushq %r13 @@ -2618,8 +2654,19 @@ L_sha512_len_avx1_start: popq %r12 popq %rbx repz retq +#ifndef __APPLE__ .size Transform_Sha512_AVX1_Len,.-Transform_Sha512_AVX1_Len -.align 16 +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_avx1_rorx_sha512_k: .quad 0x428a2f98d728ae22,0x7137449123ef65cd .quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc @@ -2661,13 +2708,30 @@ L_avx1_rorx_sha512_k: .quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c .quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 -.align 16 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_avx1_rorx_sha512_flip_mask: .quad 0x1020304050607, 0x8090a0b0c0d0e0f +#ifndef __APPLE__ +.text .globl Transform_Sha512_AVX1_RORX .type Transform_Sha512_AVX1_RORX,@function .align 4 Transform_Sha512_AVX1_RORX: +#else +.section __TEXT,__text +.globl _Transform_Sha512_AVX1_RORX +.p2align 2 +_Transform_Sha512_AVX1_RORX: +#endif /* __APPLE__ */ pushq %rbx pushq %r12 pushq %r13 @@ -3856,11 +3920,21 @@ L_sha256_len_avx1_rorx_start: popq %r12 popq %rbx repz retq +#ifndef __APPLE__ .size Transform_Sha512_AVX1_RORX,.-Transform_Sha512_AVX1_RORX +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text .globl Transform_Sha512_AVX1_RORX_Len .type Transform_Sha512_AVX1_RORX_Len,@function .align 4 Transform_Sha512_AVX1_RORX_Len: +#else +.section __TEXT,__text +.globl _Transform_Sha512_AVX1_RORX_Len +.p2align 2 +_Transform_Sha512_AVX1_RORX_Len: +#endif /* __APPLE__ */ pushq %rbx pushq %r12 pushq %r13 @@ -5084,10 +5158,21 @@ L_sha512_len_avx1_rorx_start: popq %r12 popq %rbx repz retq +#ifndef __APPLE__ .size Transform_Sha512_AVX1_RORX_Len,.-Transform_Sha512_AVX1_RORX_Len +#endif /* __APPLE__ */ #endif /* HAVE_INTEL_AVX1 */ #ifdef HAVE_INTEL_AVX2 -.align 16 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_avx2_sha512_k: .quad 0x428a2f98d728ae22,0x7137449123ef65cd .quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc @@ -5129,7 +5214,16 @@ L_avx2_sha512_k: .quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c .quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 -.align 16 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_avx2_sha512_k_2: .quad 0x428a2f98d728ae22,0x7137449123ef65cd .quad 0x428a2f98d728ae22,0x7137449123ef65cd @@ -5211,17 +5305,43 @@ L_avx2_sha512_k_2: .quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 -.align 8 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 8 +#else +.p2align 3 +#endif /* __APPLE__ */ L_avx2_sha512_k_2_end: .quad 1024+L_avx2_sha512_k_2 -.align 32 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 32 +#else +.p2align 5 +#endif /* __APPLE__ */ L_avx2_sha512_flip_mask: .quad 0x1020304050607, 0x8090a0b0c0d0e0f .quad 0x1020304050607, 0x8090a0b0c0d0e0f +#ifndef __APPLE__ +.text .globl Transform_Sha512_AVX2 .type Transform_Sha512_AVX2,@function .align 4 Transform_Sha512_AVX2: +#else +.section __TEXT,__text +.globl _Transform_Sha512_AVX2 +.p2align 2 +_Transform_Sha512_AVX2: +#endif /* __APPLE__ */ pushq %rbx pushq %r12 pushq %r13 @@ -6269,11 +6389,21 @@ L_sha256_avx2_start: popq %r12 popq %rbx repz retq +#ifndef __APPLE__ .size Transform_Sha512_AVX2,.-Transform_Sha512_AVX2 +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text .globl Transform_Sha512_AVX2_Len .type Transform_Sha512_AVX2_Len,@function .align 4 Transform_Sha512_AVX2_Len: +#else +.section __TEXT,__text +.globl _Transform_Sha512_AVX2_Len +.p2align 2 +_Transform_Sha512_AVX2_Len: +#endif /* __APPLE__ */ pushq %rbx pushq %r12 pushq %r13 @@ -6292,7 +6422,11 @@ Transform_Sha512_AVX2_Len: vmovups %ymm1, 96(%rdi) vmovups %ymm2, 128(%rdi) vmovups %ymm3, 160(%rdi) +#ifndef __APPLE__ call Transform_Sha512_AVX2@plt +#else + call _Transform_Sha512_AVX2 +#endif /* __APPLE__ */ addq $0x80, 224(%rdi) subl $0x80, %ebp jz L_sha512_len_avx2_done @@ -7881,8 +8015,19 @@ L_sha512_len_avx2_done: popq %r12 popq %rbx repz retq +#ifndef __APPLE__ .size Transform_Sha512_AVX2_Len,.-Transform_Sha512_AVX2_Len -.align 16 +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_avx2_rorx_sha512_k: .quad 0x428a2f98d728ae22,0x7137449123ef65cd .quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc @@ -7924,7 +8069,16 @@ L_avx2_rorx_sha512_k: .quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c .quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 -.align 16 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 16 +#else +.p2align 4 +#endif /* __APPLE__ */ L_avx2_rorx_sha512_k_2: .quad 0x428a2f98d728ae22,0x7137449123ef65cd .quad 0x428a2f98d728ae22,0x7137449123ef65cd @@ -8006,17 +8160,43 @@ L_avx2_rorx_sha512_k_2: .quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 -.align 8 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 8 +#else +.p2align 3 +#endif /* __APPLE__ */ L_avx2_rorx_sha512_k_2_end: .quad 1024+L_avx2_rorx_sha512_k_2 -.align 32 +#ifndef __APPLE__ +.data +#else +.section __DATA,__data +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.align 32 +#else +.p2align 5 +#endif /* __APPLE__ */ L_avx2_rorx_sha512_flip_mask: .quad 0x1020304050607, 0x8090a0b0c0d0e0f .quad 0x1020304050607, 0x8090a0b0c0d0e0f +#ifndef __APPLE__ +.text .globl Transform_Sha512_AVX2_RORX .type Transform_Sha512_AVX2_RORX,@function .align 4 Transform_Sha512_AVX2_RORX: +#else +.section __TEXT,__text +.globl _Transform_Sha512_AVX2_RORX +.p2align 2 +_Transform_Sha512_AVX2_RORX: +#endif /* __APPLE__ */ pushq %rbx pushq %r12 pushq %r13 @@ -9006,11 +9186,21 @@ L_sha256_len_avx2_rorx_start: popq %r12 popq %rbx repz retq +#ifndef __APPLE__ .size Transform_Sha512_AVX2_RORX,.-Transform_Sha512_AVX2_RORX +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text .globl Transform_Sha512_AVX2_RORX_Len .type Transform_Sha512_AVX2_RORX_Len,@function .align 4 Transform_Sha512_AVX2_RORX_Len: +#else +.section __TEXT,__text +.globl _Transform_Sha512_AVX2_RORX_Len +.p2align 2 +_Transform_Sha512_AVX2_RORX_Len: +#endif /* __APPLE__ */ pushq %rbx pushq %r12 pushq %r13 @@ -9029,7 +9219,11 @@ Transform_Sha512_AVX2_RORX_Len: vmovups %ymm1, 96(%rdi) vmovups %ymm2, 128(%rdi) vmovups %ymm3, 160(%rdi) +#ifndef __APPLE__ call Transform_Sha512_AVX2_RORX@plt +#else + call _Transform_Sha512_AVX2_RORX +#endif /* __APPLE__ */ pop %rsi addq $0x80, 224(%rdi) subl $0x80, %esi @@ -10541,5 +10735,7 @@ L_sha512_len_avx2_rorx_done: popq %r12 popq %rbx repz retq +#ifndef __APPLE__ .size Transform_Sha512_AVX2_RORX_Len,.-Transform_Sha512_AVX2_RORX_Len +#endif /* __APPLE__ */ #endif /* HAVE_INTEL_AVX2 */ diff --git a/wolfcrypt/src/sp_x86_64_asm.S b/wolfcrypt/src/sp_x86_64_asm.S index 0d73aa7cb..d40a658ed 100644 --- a/wolfcrypt/src/sp_x86_64_asm.S +++ b/wolfcrypt/src/sp_x86_64_asm.S @@ -37,10 +37,16 @@ * a A single precision integer. * b A single precision integer. */ +#ifndef __APPLE__ .globl sp_2048_mul_16 .type sp_2048_mul_16,@function .align 16 sp_2048_mul_16: +#else +.globl _sp_2048_mul_16 +.p2align 4 +_sp_2048_mul_16: +#endif /* __APPLE__ */ movq %rdx, %rcx subq $128, %rsp # A[0] * B[0] @@ -1672,16 +1678,24 @@ sp_2048_mul_16: movq %r9, 120(%rdi) addq $128, %rsp repz retq +#ifndef __APPLE__ .size sp_2048_mul_16,.-sp_2048_mul_16 +#endif /* __APPLE__ */ /* Square a and put result in r. (r = a * a) * * r A single precision integer. * a A single precision integer. */ +#ifndef __APPLE__ .globl sp_2048_sqr_16 .type sp_2048_sqr_16,@function .align 16 sp_2048_sqr_16: +#else +.globl _sp_2048_sqr_16 +.p2align 4 +_sp_2048_sqr_16: +#endif /* __APPLE__ */ push %r12 subq $128, %rsp # A[0] * A[0] @@ -2756,7 +2770,9 @@ sp_2048_sqr_16: addq $128, %rsp pop %r12 repz retq +#ifndef __APPLE__ .size sp_2048_sqr_16,.-sp_2048_sqr_16 +#endif /* __APPLE__ */ #ifdef HAVE_INTEL_AVX2 /* Multiply a and b into r. (r = a * b) * @@ -2764,10 +2780,16 @@ sp_2048_sqr_16: * a First number to multiply. * b Second number to multiply. */ +#ifndef __APPLE__ .globl sp_2048_mul_avx2_16 .type sp_2048_mul_avx2_16,@function .align 16 sp_2048_mul_avx2_16: +#else +.globl _sp_2048_mul_avx2_16 +.p2align 4 +_sp_2048_mul_avx2_16: +#endif /* __APPLE__ */ push %rbp push %r12 push %r13 @@ -4427,7 +4449,9 @@ sp_2048_mul_avx2_16: pop %r12 pop %rbp repz retq +#ifndef __APPLE__ .size sp_2048_mul_avx2_16,.-sp_2048_mul_avx2_16 +#endif /* __APPLE__ */ #endif /* HAVE_INTEL_AVX2 */ #ifdef HAVE_INTEL_AVX2 /* Square a and put result in r. (r = a * a) @@ -4435,10 +4459,16 @@ sp_2048_mul_avx2_16: * r A single precision integer. * a A single precision integer. */ +#ifndef __APPLE__ .globl sp_2048_sqr_avx2_16 .type sp_2048_sqr_avx2_16,@function .align 16 sp_2048_sqr_avx2_16: +#else +.globl _sp_2048_sqr_avx2_16 +.p2align 4 +_sp_2048_sqr_avx2_16: +#endif /* __APPLE__ */ push %rbp push %r12 push %r13 @@ -5461,7 +5491,9 @@ L_end_2048_sqr_avx2_16: pop %r12 pop %rbp repz retq +#ifndef __APPLE__ .size sp_2048_sqr_avx2_16,.-sp_2048_sqr_avx2_16 +#endif /* __APPLE__ */ #endif /* HAVE_INTEL_AVX2 */ /* Add b to a into r. (r = a + b) * @@ -5469,10 +5501,16 @@ L_end_2048_sqr_avx2_16: * a A single precision integer. * b A single precision integer. */ +#ifndef __APPLE__ .globl sp_2048_add_16 .type sp_2048_add_16,@function .align 16 sp_2048_add_16: +#else +.globl _sp_2048_add_16 +.p2align 4 +_sp_2048_add_16: +#endif /* __APPLE__ */ xorq %rax, %rax movq (%rsi), %rcx addq (%rdx), %rcx @@ -5524,16 +5562,24 @@ sp_2048_add_16: movq %rcx, 120(%rdi) adcq $0, %rax repz retq +#ifndef __APPLE__ .size sp_2048_add_16,.-sp_2048_add_16 +#endif /* __APPLE__ */ /* Sub b from a into a. (a -= b) * * a A single precision integer and result. * b A single precision integer. */ +#ifndef __APPLE__ .globl sp_2048_sub_in_place_32 .type sp_2048_sub_in_place_32,@function .align 16 sp_2048_sub_in_place_32: +#else +.globl _sp_2048_sub_in_place_32 +.p2align 4 +_sp_2048_sub_in_place_32: +#endif /* __APPLE__ */ xorq %rax, %rax movq (%rdi), %rdx movq 8(%rdi), %rcx @@ -5665,17 +5711,25 @@ sp_2048_sub_in_place_32: movq %rcx, 248(%rdi) sbbq $0, %rax repz retq +#ifndef __APPLE__ .size sp_2048_sub_in_place_32,.-sp_2048_sub_in_place_32 +#endif /* __APPLE__ */ /* Add b to a into r. (r = a + b) * * r A single precision integer. * a A single precision integer. * b A single precision integer. */ +#ifndef __APPLE__ .globl sp_2048_add_32 .type sp_2048_add_32,@function .align 16 sp_2048_add_32: +#else +.globl _sp_2048_add_32 +.p2align 4 +_sp_2048_add_32: +#endif /* __APPLE__ */ xorq %rax, %rax movq (%rsi), %rcx addq (%rdx), %rcx @@ -5775,7 +5829,9 @@ sp_2048_add_32: movq %rcx, 248(%rdi) adcq $0, %rax repz retq +#ifndef __APPLE__ .size sp_2048_add_32,.-sp_2048_add_32 +#endif /* __APPLE__ */ #ifdef HAVE_INTEL_AVX2 #endif /* HAVE_INTEL_AVX2 */ #ifdef HAVE_INTEL_AVX2 @@ -5788,10 +5844,16 @@ sp_2048_add_32: * a A single precision integer and result. * b A single precision integer. */ +#ifndef __APPLE__ .globl sp_2048_sub_in_place_16 .type sp_2048_sub_in_place_16,@function .align 16 sp_2048_sub_in_place_16: +#else +.globl _sp_2048_sub_in_place_16 +.p2align 4 +_sp_2048_sub_in_place_16: +#endif /* __APPLE__ */ xorq %rax, %rax movq (%rdi), %rdx movq 8(%rdi), %rcx @@ -5859,7 +5921,9 @@ sp_2048_sub_in_place_16: movq %rcx, 120(%rdi) sbbq $0, %rax repz retq +#ifndef __APPLE__ .size sp_2048_sub_in_place_16,.-sp_2048_sub_in_place_16 +#endif /* __APPLE__ */ /* Conditionally subtract b from a using the mask m. * m is -1 to subtract and 0 when not copying. * @@ -5868,10 +5932,16 @@ sp_2048_sub_in_place_16: * b A single precision number to subtract. * m Mask value to apply. */ +#ifndef __APPLE__ .globl sp_2048_cond_sub_16 .type sp_2048_cond_sub_16,@function .align 16 sp_2048_cond_sub_16: +#else +.globl _sp_2048_cond_sub_16 +.p2align 4 +_sp_2048_cond_sub_16: +#endif /* __APPLE__ */ subq $128, %rsp movq $0, %rax movq (%rdx), %r8 @@ -5989,17 +6059,25 @@ sp_2048_cond_sub_16: sbbq $0, %rax addq $128, %rsp repz retq +#ifndef __APPLE__ .size sp_2048_cond_sub_16,.-sp_2048_cond_sub_16 +#endif /* __APPLE__ */ /* Reduce the number back to 2048 bits using Montgomery reduction. * * a A single precision number to reduce in place. * m The single precision number representing the modulus. * mp The digit representing the negative inverse of m mod 2^n. */ +#ifndef __APPLE__ .globl sp_2048_mont_reduce_16 .type sp_2048_mont_reduce_16,@function .align 16 sp_2048_mont_reduce_16: +#else +.globl _sp_2048_mont_reduce_16 +.p2align 4 +_sp_2048_mont_reduce_16: +#endif /* __APPLE__ */ push %r12 push %r13 push %r14 @@ -6182,23 +6260,35 @@ L_mont_loop_16: movq %rdi, %rsi movq %rdi, %rdi subq $128, %rdi +#ifndef __APPLE__ callq sp_2048_cond_sub_16@plt +#else + callq _sp_2048_cond_sub_16 +#endif /* __APPLE__ */ pop %r15 pop %r14 pop %r13 pop %r12 repz retq +#ifndef __APPLE__ .size sp_2048_mont_reduce_16,.-sp_2048_mont_reduce_16 +#endif /* __APPLE__ */ /* Mul a by digit b into r. (r = a * b) * * r A single precision integer. * a A single precision integer. * b A single precision digit. */ +#ifndef __APPLE__ .globl sp_2048_mul_d_16 .type sp_2048_mul_d_16,@function .align 16 sp_2048_mul_d_16: +#else +.globl _sp_2048_mul_d_16 +.p2align 4 +_sp_2048_mul_d_16: +#endif /* __APPLE__ */ movq %rdx, %rcx # A[0] * B movq %rcx, %rax @@ -6327,7 +6417,9 @@ sp_2048_mul_d_16: movq %r8, 120(%rdi) movq %r9, 128(%rdi) repz retq +#ifndef __APPLE__ .size sp_2048_mul_d_16,.-sp_2048_mul_d_16 +#endif /* __APPLE__ */ #ifdef HAVE_INTEL_AVX2 /* Mul a by digit b into r. (r = a * b) * @@ -6335,10 +6427,16 @@ sp_2048_mul_d_16: * a A single precision integer. * b A single precision digit. */ +#ifndef __APPLE__ .globl sp_2048_mul_d_avx2_16 .type sp_2048_mul_d_avx2_16,@function .align 16 sp_2048_mul_d_avx2_16: +#else +.globl _sp_2048_mul_d_avx2_16 +.p2align 4 +_sp_2048_mul_d_avx2_16: +#endif /* __APPLE__ */ movq %rdx, %rax # A[0] * B movq %rax, %rdx @@ -6438,7 +6536,9 @@ sp_2048_mul_d_avx2_16: movq %r10, 120(%rdi) movq %r9, 128(%rdi) repz retq +#ifndef __APPLE__ .size sp_2048_mul_d_avx2_16,.-sp_2048_mul_d_avx2_16 +#endif /* __APPLE__ */ #endif /* HAVE_INTEL_AVX2 */ /* Compare a with b in constant time. * @@ -6447,10 +6547,16 @@ sp_2048_mul_d_avx2_16: * return -ve, 0 or +ve if a is less than, equal to or greater than b * respectively. */ +#ifndef __APPLE__ .globl sp_2048_cmp_16 .type sp_2048_cmp_16,@function .align 16 sp_2048_cmp_16: +#else +.globl _sp_2048_cmp_16 +.p2align 4 +_sp_2048_cmp_16: +#endif /* __APPLE__ */ xorq %rcx, %rcx movq $-1, %rdx movq $-1, %rax @@ -6585,7 +6691,9 @@ sp_2048_cmp_16: cmovnz %rcx, %rdx xorq %rdx, %rax repz retq +#ifndef __APPLE__ .size sp_2048_cmp_16,.-sp_2048_cmp_16 +#endif /* __APPLE__ */ #ifdef HAVE_INTEL_AVX2 /* Reduce the number back to 2048 bits using Montgomery reduction. * @@ -6593,10 +6701,16 @@ sp_2048_cmp_16: * m The single precision number representing the modulus. * mp The digit representing the negative inverse of m mod 2^n. */ +#ifndef __APPLE__ .globl sp_2048_mont_reduce_avx2_16 .type sp_2048_mont_reduce_avx2_16,@function .align 16 sp_2048_mont_reduce_avx2_16: +#else +.globl _sp_2048_mont_reduce_avx2_16 +.p2align 4 +_sp_2048_mont_reduce_avx2_16: +#endif /* __APPLE__ */ push %r12 push %r13 push %r14 @@ -6721,12 +6835,18 @@ L_mont_loop_avx2_16: movq %rdi, %rsi movq %rdi, %rdi subq $128, %rdi +#ifndef __APPLE__ callq sp_2048_cond_sub_16@plt +#else + callq _sp_2048_cond_sub_16 +#endif /* __APPLE__ */ pop %r14 pop %r13 pop %r12 repz retq +#ifndef __APPLE__ .size sp_2048_mont_reduce_avx2_16,.-sp_2048_mont_reduce_avx2_16 +#endif /* __APPLE__ */ #endif /* HAVE_INTEL_AVX2 */ #ifdef HAVE_INTEL_AVX2 #endif /* HAVE_INTEL_AVX2 */ @@ -6745,10 +6865,16 @@ L_mont_loop_avx2_16: * b A single precision number to subtract. * m Mask value to apply. */ +#ifndef __APPLE__ .globl sp_2048_cond_sub_32 .type sp_2048_cond_sub_32,@function .align 16 sp_2048_cond_sub_32: +#else +.globl _sp_2048_cond_sub_32 +.p2align 4 +_sp_2048_cond_sub_32: +#endif /* __APPLE__ */ subq $256, %rsp movq $0, %rax movq (%rdx), %r8 @@ -6978,17 +7104,25 @@ sp_2048_cond_sub_32: sbbq $0, %rax addq $256, %rsp repz retq +#ifndef __APPLE__ .size sp_2048_cond_sub_32,.-sp_2048_cond_sub_32 +#endif /* __APPLE__ */ /* Reduce the number back to 2048 bits using Montgomery reduction. * * a A single precision number to reduce in place. * m The single precision number representing the modulus. * mp The digit representing the negative inverse of m mod 2^n. */ +#ifndef __APPLE__ .globl sp_2048_mont_reduce_32 .type sp_2048_mont_reduce_32,@function .align 16 sp_2048_mont_reduce_32: +#else +.globl _sp_2048_mont_reduce_32 +.p2align 4 +_sp_2048_mont_reduce_32: +#endif /* __APPLE__ */ push %r12 push %r13 push %r14 @@ -7331,23 +7465,35 @@ L_mont_loop_32: movq %rdi, %rsi movq %rdi, %rdi subq $256, %rdi +#ifndef __APPLE__ callq sp_2048_cond_sub_32@plt +#else + callq _sp_2048_cond_sub_32 +#endif /* __APPLE__ */ pop %r15 pop %r14 pop %r13 pop %r12 repz retq +#ifndef __APPLE__ .size sp_2048_mont_reduce_32,.-sp_2048_mont_reduce_32 +#endif /* __APPLE__ */ /* Mul a by digit b into r. (r = a * b) * * r A single precision integer. * a A single precision integer. * b A single precision digit. */ +#ifndef __APPLE__ .globl sp_2048_mul_d_32 .type sp_2048_mul_d_32,@function .align 16 sp_2048_mul_d_32: +#else +.globl _sp_2048_mul_d_32 +.p2align 4 +_sp_2048_mul_d_32: +#endif /* __APPLE__ */ movq %rdx, %rcx # A[0] * B movq %rcx, %rax @@ -7604,7 +7750,9 @@ sp_2048_mul_d_32: movq %r9, 248(%rdi) movq %r10, 256(%rdi) repz retq +#ifndef __APPLE__ .size sp_2048_mul_d_32,.-sp_2048_mul_d_32 +#endif /* __APPLE__ */ #ifdef HAVE_INTEL_AVX2 /* Mul a by digit b into r. (r = a * b) * @@ -7612,10 +7760,16 @@ sp_2048_mul_d_32: * a A single precision integer. * b A single precision digit. */ +#ifndef __APPLE__ .globl sp_2048_mul_d_avx2_32 .type sp_2048_mul_d_avx2_32,@function .align 16 sp_2048_mul_d_avx2_32: +#else +.globl _sp_2048_mul_d_avx2_32 +.p2align 4 +_sp_2048_mul_d_avx2_32: +#endif /* __APPLE__ */ movq %rdx, %rax # A[0] * B movq %rax, %rdx @@ -7811,7 +7965,9 @@ sp_2048_mul_d_avx2_32: movq %r10, 248(%rdi) movq %r9, 256(%rdi) repz retq +#ifndef __APPLE__ .size sp_2048_mul_d_avx2_32,.-sp_2048_mul_d_avx2_32 +#endif /* __APPLE__ */ #endif /* HAVE_INTEL_AVX2 */ /* Compare a with b in constant time. * @@ -7820,10 +7976,16 @@ sp_2048_mul_d_avx2_32: * return -ve, 0 or +ve if a is less than, equal to or greater than b * respectively. */ +#ifndef __APPLE__ .globl sp_2048_cmp_32 .type sp_2048_cmp_32,@function .align 16 sp_2048_cmp_32: +#else +.globl _sp_2048_cmp_32 +.p2align 4 +_sp_2048_cmp_32: +#endif /* __APPLE__ */ xorq %rcx, %rcx movq $-1, %rdx movq $-1, %rax @@ -8086,7 +8248,9 @@ sp_2048_cmp_32: cmovnz %rcx, %rdx xorq %rdx, %rax repz retq +#ifndef __APPLE__ .size sp_2048_cmp_32,.-sp_2048_cmp_32 +#endif /* __APPLE__ */ #if defined(SP_RSA_PRIVATE_EXP_D) || defined(WOLFSSL_HAVE_SP_DH) #endif /* SP_RSA_PRIVATE_EXP_D || WOLFSSL_HAVE_SP_DH */ #ifdef HAVE_INTEL_AVX2 @@ -8096,10 +8260,16 @@ sp_2048_cmp_32: * m The single precision number representing the modulus. * mp The digit representing the negative inverse of m mod 2^n. */ +#ifndef __APPLE__ .globl sp_2048_mont_reduce_avx2_32 .type sp_2048_mont_reduce_avx2_32,@function .align 16 sp_2048_mont_reduce_avx2_32: +#else +.globl _sp_2048_mont_reduce_avx2_32 +.p2align 4 +_sp_2048_mont_reduce_avx2_32: +#endif /* __APPLE__ */ push %r12 push %r13 push %r14 @@ -8320,12 +8490,18 @@ L_mont_loop_avx2_32: movq %rdi, %rsi movq %rdi, %rdi subq $256, %rdi +#ifndef __APPLE__ callq sp_2048_cond_sub_32@plt +#else + callq _sp_2048_cond_sub_32 +#endif /* __APPLE__ */ pop %r14 pop %r13 pop %r12 repz retq +#ifndef __APPLE__ .size sp_2048_mont_reduce_avx2_32,.-sp_2048_mont_reduce_avx2_32 +#endif /* __APPLE__ */ #endif /* HAVE_INTEL_AVX2 */ #ifdef HAVE_INTEL_AVX2 #endif /* HAVE_INTEL_AVX2 */ @@ -8345,10 +8521,16 @@ L_mont_loop_avx2_32: * a A single precision integer. * b A single precision integer. */ +#ifndef __APPLE__ .globl sp_3072_mul_24 .type sp_3072_mul_24,@function .align 16 sp_3072_mul_24: +#else +.globl _sp_3072_mul_24 +.p2align 4 +_sp_3072_mul_24: +#endif /* __APPLE__ */ movq %rdx, %rcx subq $192, %rsp # A[0] * B[0] @@ -11948,16 +12130,24 @@ sp_3072_mul_24: movq %r9, 184(%rdi) addq $192, %rsp repz retq +#ifndef __APPLE__ .size sp_3072_mul_24,.-sp_3072_mul_24 +#endif /* __APPLE__ */ /* Square a and put result in r. (r = a * a) * * r A single precision integer. * a A single precision integer. */ +#ifndef __APPLE__ .globl sp_3072_sqr_24 .type sp_3072_sqr_24,@function .align 16 sp_3072_sqr_24: +#else +.globl _sp_3072_sqr_24 +.p2align 4 +_sp_3072_sqr_24: +#endif /* __APPLE__ */ push %r12 subq $192, %rsp # A[0] * A[0] @@ -14160,7 +14350,9 @@ sp_3072_sqr_24: addq $192, %rsp pop %r12 repz retq +#ifndef __APPLE__ .size sp_3072_sqr_24,.-sp_3072_sqr_24 +#endif /* __APPLE__ */ #ifdef HAVE_INTEL_AVX2 /* Multiply a and b into r. (r = a * b) * @@ -14168,10 +14360,16 @@ sp_3072_sqr_24: * a First number to multiply. * b Second number to multiply. */ +#ifndef __APPLE__ .globl sp_3072_mul_avx2_24 .type sp_3072_mul_avx2_24,@function .align 16 sp_3072_mul_avx2_24: +#else +.globl _sp_3072_mul_avx2_24 +.p2align 4 +_sp_3072_mul_avx2_24: +#endif /* __APPLE__ */ push %rbp push %r12 push %r13 @@ -17807,7 +18005,9 @@ sp_3072_mul_avx2_24: pop %r12 pop %rbp repz retq +#ifndef __APPLE__ .size sp_3072_mul_avx2_24,.-sp_3072_mul_avx2_24 +#endif /* __APPLE__ */ #endif /* HAVE_INTEL_AVX2 */ #ifdef HAVE_INTEL_AVX2 /* Square a and put result in r. (r = a * a) @@ -17815,10 +18015,16 @@ sp_3072_mul_avx2_24: * r A single precision integer. * a A single precision integer. */ +#ifndef __APPLE__ .globl sp_3072_sqr_avx2_24 .type sp_3072_sqr_avx2_24,@function .align 16 sp_3072_sqr_avx2_24: +#else +.globl _sp_3072_sqr_avx2_24 +.p2align 4 +_sp_3072_sqr_avx2_24: +#endif /* __APPLE__ */ push %rbp push %r12 push %r13 @@ -19923,7 +20129,9 @@ L_end_3072_sqr_avx2_24: pop %r12 pop %rbp repz retq +#ifndef __APPLE__ .size sp_3072_sqr_avx2_24,.-sp_3072_sqr_avx2_24 +#endif /* __APPLE__ */ #endif /* HAVE_INTEL_AVX2 */ /* Add b to a into r. (r = a + b) * @@ -19931,10 +20139,16 @@ L_end_3072_sqr_avx2_24: * a A single precision integer. * b A single precision integer. */ +#ifndef __APPLE__ .globl sp_3072_add_24 .type sp_3072_add_24,@function .align 16 sp_3072_add_24: +#else +.globl _sp_3072_add_24 +.p2align 4 +_sp_3072_add_24: +#endif /* __APPLE__ */ xorq %rax, %rax movq (%rsi), %rcx addq (%rdx), %rcx @@ -20010,16 +20224,24 @@ sp_3072_add_24: movq %rcx, 184(%rdi) adcq $0, %rax repz retq +#ifndef __APPLE__ .size sp_3072_add_24,.-sp_3072_add_24 +#endif /* __APPLE__ */ /* Sub b from a into a. (a -= b) * * a A single precision integer and result. * b A single precision integer. */ +#ifndef __APPLE__ .globl sp_3072_sub_in_place_48 .type sp_3072_sub_in_place_48,@function .align 16 sp_3072_sub_in_place_48: +#else +.globl _sp_3072_sub_in_place_48 +.p2align 4 +_sp_3072_sub_in_place_48: +#endif /* __APPLE__ */ xorq %rax, %rax movq (%rdi), %rdx movq 8(%rdi), %rcx @@ -20215,17 +20437,25 @@ sp_3072_sub_in_place_48: movq %rcx, 376(%rdi) sbbq $0, %rax repz retq +#ifndef __APPLE__ .size sp_3072_sub_in_place_48,.-sp_3072_sub_in_place_48 +#endif /* __APPLE__ */ /* Add b to a into r. (r = a + b) * * r A single precision integer. * a A single precision integer. * b A single precision integer. */ +#ifndef __APPLE__ .globl sp_3072_add_48 .type sp_3072_add_48,@function .align 16 sp_3072_add_48: +#else +.globl _sp_3072_add_48 +.p2align 4 +_sp_3072_add_48: +#endif /* __APPLE__ */ xorq %rax, %rax movq (%rsi), %rcx addq (%rdx), %rcx @@ -20373,7 +20603,9 @@ sp_3072_add_48: movq %rcx, 376(%rdi) adcq $0, %rax repz retq +#ifndef __APPLE__ .size sp_3072_add_48,.-sp_3072_add_48 +#endif /* __APPLE__ */ #ifdef HAVE_INTEL_AVX2 #endif /* HAVE_INTEL_AVX2 */ #ifdef HAVE_INTEL_AVX2 @@ -20386,10 +20618,16 @@ sp_3072_add_48: * a A single precision integer and result. * b A single precision integer. */ +#ifndef __APPLE__ .globl sp_3072_sub_in_place_24 .type sp_3072_sub_in_place_24,@function .align 16 sp_3072_sub_in_place_24: +#else +.globl _sp_3072_sub_in_place_24 +.p2align 4 +_sp_3072_sub_in_place_24: +#endif /* __APPLE__ */ xorq %rax, %rax movq (%rdi), %rdx movq 8(%rdi), %rcx @@ -20489,7 +20727,9 @@ sp_3072_sub_in_place_24: movq %rcx, 184(%rdi) sbbq $0, %rax repz retq +#ifndef __APPLE__ .size sp_3072_sub_in_place_24,.-sp_3072_sub_in_place_24 +#endif /* __APPLE__ */ /* Conditionally subtract b from a using the mask m. * m is -1 to subtract and 0 when not copying. * @@ -20498,10 +20738,16 @@ sp_3072_sub_in_place_24: * b A single precision number to subtract. * m Mask value to apply. */ +#ifndef __APPLE__ .globl sp_3072_cond_sub_24 .type sp_3072_cond_sub_24,@function .align 16 sp_3072_cond_sub_24: +#else +.globl _sp_3072_cond_sub_24 +.p2align 4 +_sp_3072_cond_sub_24: +#endif /* __APPLE__ */ subq $192, %rsp movq $0, %rax movq (%rdx), %r8 @@ -20675,17 +20921,25 @@ sp_3072_cond_sub_24: sbbq $0, %rax addq $192, %rsp repz retq +#ifndef __APPLE__ .size sp_3072_cond_sub_24,.-sp_3072_cond_sub_24 +#endif /* __APPLE__ */ /* Reduce the number back to 3072 bits using Montgomery reduction. * * a A single precision number to reduce in place. * m The single precision number representing the modulus. * mp The digit representing the negative inverse of m mod 2^n. */ +#ifndef __APPLE__ .globl sp_3072_mont_reduce_24 .type sp_3072_mont_reduce_24,@function .align 16 sp_3072_mont_reduce_24: +#else +.globl _sp_3072_mont_reduce_24 +.p2align 4 +_sp_3072_mont_reduce_24: +#endif /* __APPLE__ */ push %r12 push %r13 push %r14 @@ -20948,23 +21202,35 @@ L_mont_loop_24: movq %rdi, %rsi movq %rdi, %rdi subq $192, %rdi +#ifndef __APPLE__ callq sp_3072_cond_sub_24@plt +#else + callq _sp_3072_cond_sub_24 +#endif /* __APPLE__ */ pop %r15 pop %r14 pop %r13 pop %r12 repz retq +#ifndef __APPLE__ .size sp_3072_mont_reduce_24,.-sp_3072_mont_reduce_24 +#endif /* __APPLE__ */ /* Mul a by digit b into r. (r = a * b) * * r A single precision integer. * a A single precision integer. * b A single precision digit. */ +#ifndef __APPLE__ .globl sp_3072_mul_d_24 .type sp_3072_mul_d_24,@function .align 16 sp_3072_mul_d_24: +#else +.globl _sp_3072_mul_d_24 +.p2align 4 +_sp_3072_mul_d_24: +#endif /* __APPLE__ */ movq %rdx, %rcx # A[0] * B movq %rcx, %rax @@ -21157,7 +21423,9 @@ sp_3072_mul_d_24: movq %r10, 184(%rdi) movq %r8, 192(%rdi) repz retq +#ifndef __APPLE__ .size sp_3072_mul_d_24,.-sp_3072_mul_d_24 +#endif /* __APPLE__ */ #ifdef HAVE_INTEL_AVX2 /* Mul a by digit b into r. (r = a * b) * @@ -21165,10 +21433,16 @@ sp_3072_mul_d_24: * a A single precision integer. * b A single precision digit. */ +#ifndef __APPLE__ .globl sp_3072_mul_d_avx2_24 .type sp_3072_mul_d_avx2_24,@function .align 16 sp_3072_mul_d_avx2_24: +#else +.globl _sp_3072_mul_d_avx2_24 +.p2align 4 +_sp_3072_mul_d_avx2_24: +#endif /* __APPLE__ */ movq %rdx, %rax # A[0] * B movq %rax, %rdx @@ -21316,7 +21590,9 @@ sp_3072_mul_d_avx2_24: movq %r10, 184(%rdi) movq %r9, 192(%rdi) repz retq +#ifndef __APPLE__ .size sp_3072_mul_d_avx2_24,.-sp_3072_mul_d_avx2_24 +#endif /* __APPLE__ */ #endif /* HAVE_INTEL_AVX2 */ /* Compare a with b in constant time. * @@ -21325,10 +21601,16 @@ sp_3072_mul_d_avx2_24: * return -ve, 0 or +ve if a is less than, equal to or greater than b * respectively. */ +#ifndef __APPLE__ .globl sp_3072_cmp_24 .type sp_3072_cmp_24,@function .align 16 sp_3072_cmp_24: +#else +.globl _sp_3072_cmp_24 +.p2align 4 +_sp_3072_cmp_24: +#endif /* __APPLE__ */ xorq %rcx, %rcx movq $-1, %rdx movq $-1, %rax @@ -21527,7 +21809,9 @@ sp_3072_cmp_24: cmovnz %rcx, %rdx xorq %rdx, %rax repz retq +#ifndef __APPLE__ .size sp_3072_cmp_24,.-sp_3072_cmp_24 +#endif /* __APPLE__ */ #ifdef HAVE_INTEL_AVX2 /* Reduce the number back to 3072 bits using Montgomery reduction. * @@ -21535,10 +21819,16 @@ sp_3072_cmp_24: * m The single precision number representing the modulus. * mp The digit representing the negative inverse of m mod 2^n. */ +#ifndef __APPLE__ .globl sp_3072_mont_reduce_avx2_24 .type sp_3072_mont_reduce_avx2_24,@function .align 16 sp_3072_mont_reduce_avx2_24: +#else +.globl _sp_3072_mont_reduce_avx2_24 +.p2align 4 +_sp_3072_mont_reduce_avx2_24: +#endif /* __APPLE__ */ push %r12 push %r13 push %r14 @@ -21711,12 +22001,18 @@ L_mont_loop_avx2_24: movq %rdi, %rsi movq %rdi, %rdi subq $192, %rdi +#ifndef __APPLE__ callq sp_3072_cond_sub_24@plt +#else + callq _sp_3072_cond_sub_24 +#endif /* __APPLE__ */ pop %r14 pop %r13 pop %r12 repz retq +#ifndef __APPLE__ .size sp_3072_mont_reduce_avx2_24,.-sp_3072_mont_reduce_avx2_24 +#endif /* __APPLE__ */ #endif /* HAVE_INTEL_AVX2 */ #ifdef HAVE_INTEL_AVX2 #endif /* HAVE_INTEL_AVX2 */ @@ -21735,10 +22031,16 @@ L_mont_loop_avx2_24: * b A single precision number to subtract. * m Mask value to apply. */ +#ifndef __APPLE__ .globl sp_3072_cond_sub_48 .type sp_3072_cond_sub_48,@function .align 16 sp_3072_cond_sub_48: +#else +.globl _sp_3072_cond_sub_48 +.p2align 4 +_sp_3072_cond_sub_48: +#endif /* __APPLE__ */ subq $384, %rsp movq $0, %rax movq (%rdx), %r8 @@ -22080,17 +22382,25 @@ sp_3072_cond_sub_48: sbbq $0, %rax addq $384, %rsp repz retq +#ifndef __APPLE__ .size sp_3072_cond_sub_48,.-sp_3072_cond_sub_48 +#endif /* __APPLE__ */ /* Reduce the number back to 3072 bits using Montgomery reduction. * * a A single precision number to reduce in place. * m The single precision number representing the modulus. * mp The digit representing the negative inverse of m mod 2^n. */ +#ifndef __APPLE__ .globl sp_3072_mont_reduce_48 .type sp_3072_mont_reduce_48,@function .align 16 sp_3072_mont_reduce_48: +#else +.globl _sp_3072_mont_reduce_48 +.p2align 4 +_sp_3072_mont_reduce_48: +#endif /* __APPLE__ */ push %r12 push %r13 push %r14 @@ -22593,23 +22903,35 @@ L_mont_loop_48: movq %rdi, %rsi movq %rdi, %rdi subq $384, %rdi +#ifndef __APPLE__ callq sp_3072_cond_sub_48@plt +#else + callq _sp_3072_cond_sub_48 +#endif /* __APPLE__ */ pop %r15 pop %r14 pop %r13 pop %r12 repz retq +#ifndef __APPLE__ .size sp_3072_mont_reduce_48,.-sp_3072_mont_reduce_48 +#endif /* __APPLE__ */ /* Mul a by digit b into r. (r = a * b) * * r A single precision integer. * a A single precision integer. * b A single precision digit. */ +#ifndef __APPLE__ .globl sp_3072_mul_d_48 .type sp_3072_mul_d_48,@function .align 16 sp_3072_mul_d_48: +#else +.globl _sp_3072_mul_d_48 +.p2align 4 +_sp_3072_mul_d_48: +#endif /* __APPLE__ */ movq %rdx, %rcx # A[0] * B movq %rcx, %rax @@ -22994,7 +23316,9 @@ sp_3072_mul_d_48: movq %r10, 376(%rdi) movq %r8, 384(%rdi) repz retq +#ifndef __APPLE__ .size sp_3072_mul_d_48,.-sp_3072_mul_d_48 +#endif /* __APPLE__ */ #ifdef HAVE_INTEL_AVX2 /* Mul a by digit b into r. (r = a * b) * @@ -23002,10 +23326,16 @@ sp_3072_mul_d_48: * a A single precision integer. * b A single precision digit. */ +#ifndef __APPLE__ .globl sp_3072_mul_d_avx2_48 .type sp_3072_mul_d_avx2_48,@function .align 16 sp_3072_mul_d_avx2_48: +#else +.globl _sp_3072_mul_d_avx2_48 +.p2align 4 +_sp_3072_mul_d_avx2_48: +#endif /* __APPLE__ */ movq %rdx, %rax # A[0] * B movq %rax, %rdx @@ -23297,7 +23627,9 @@ sp_3072_mul_d_avx2_48: movq %r10, 376(%rdi) movq %r9, 384(%rdi) repz retq +#ifndef __APPLE__ .size sp_3072_mul_d_avx2_48,.-sp_3072_mul_d_avx2_48 +#endif /* __APPLE__ */ #endif /* HAVE_INTEL_AVX2 */ /* Compare a with b in constant time. * @@ -23306,10 +23638,16 @@ sp_3072_mul_d_avx2_48: * return -ve, 0 or +ve if a is less than, equal to or greater than b * respectively. */ +#ifndef __APPLE__ .globl sp_3072_cmp_48 .type sp_3072_cmp_48,@function .align 16 sp_3072_cmp_48: +#else +.globl _sp_3072_cmp_48 +.p2align 4 +_sp_3072_cmp_48: +#endif /* __APPLE__ */ xorq %rcx, %rcx movq $-1, %rdx movq $-1, %rax @@ -23700,7 +24038,9 @@ sp_3072_cmp_48: cmovnz %rcx, %rdx xorq %rdx, %rax repz retq +#ifndef __APPLE__ .size sp_3072_cmp_48,.-sp_3072_cmp_48 +#endif /* __APPLE__ */ #if defined(SP_RSA_PRIVATE_EXP_D) || defined(WOLFSSL_HAVE_SP_DH) #endif /* SP_RSA_PRIVATE_EXP_D || WOLFSSL_HAVE_SP_DH */ #ifdef HAVE_INTEL_AVX2 @@ -23710,10 +24050,16 @@ sp_3072_cmp_48: * m The single precision number representing the modulus. * mp The digit representing the negative inverse of m mod 2^n. */ +#ifndef __APPLE__ .globl sp_3072_mont_reduce_avx2_48 .type sp_3072_mont_reduce_avx2_48,@function .align 16 sp_3072_mont_reduce_avx2_48: +#else +.globl _sp_3072_mont_reduce_avx2_48 +.p2align 4 +_sp_3072_mont_reduce_avx2_48: +#endif /* __APPLE__ */ push %r12 push %r13 push %r14 @@ -24030,12 +24376,18 @@ L_mont_loop_avx2_48: movq %rdi, %rsi movq %rdi, %rdi subq $384, %rdi +#ifndef __APPLE__ callq sp_3072_cond_sub_48@plt +#else + callq _sp_3072_cond_sub_48 +#endif /* __APPLE__ */ pop %r14 pop %r13 pop %r12 repz retq +#ifndef __APPLE__ .size sp_3072_mont_reduce_avx2_48,.-sp_3072_mont_reduce_avx2_48 +#endif /* __APPLE__ */ #endif /* HAVE_INTEL_AVX2 */ #ifdef HAVE_INTEL_AVX2 #endif /* HAVE_INTEL_AVX2 */ @@ -24058,10 +24410,16 @@ L_mont_loop_avx2_48: * a A single precision number to copy. * m Mask value to apply. */ +#ifndef __APPLE__ .globl sp_256_cond_copy_4 .type sp_256_cond_copy_4,@function .align 16 sp_256_cond_copy_4: +#else +.globl _sp_256_cond_copy_4 +.p2align 4 +_sp_256_cond_copy_4: +#endif /* __APPLE__ */ movq (%rdi), %rax movq 8(%rdi), %rcx movq 16(%rdi), %r8 @@ -24079,7 +24437,9 @@ sp_256_cond_copy_4: xorq %r8, 16(%rdi) xorq %r9, 24(%rdi) repz retq +#ifndef __APPLE__ .size sp_256_cond_copy_4,.-sp_256_cond_copy_4 +#endif /* __APPLE__ */ /* Compare a with b in constant time. * * a A single precision integer. @@ -24087,10 +24447,16 @@ sp_256_cond_copy_4: * return -ve, 0 or +ve if a is less than, equal to or greater than b * respectively. */ +#ifndef __APPLE__ .globl sp_256_cmp_4 .type sp_256_cmp_4,@function .align 16 sp_256_cmp_4: +#else +.globl _sp_256_cmp_4 +.p2align 4 +_sp_256_cmp_4: +#endif /* __APPLE__ */ xorq %rcx, %rcx movq $-1, %rdx movq $-1, %rax @@ -24129,7 +24495,9 @@ sp_256_cmp_4: cmovnz %rcx, %rdx xorq %rdx, %rax repz retq +#ifndef __APPLE__ .size sp_256_cmp_4,.-sp_256_cmp_4 +#endif /* __APPLE__ */ /* Conditionally subtract b from a using the mask m. * m is -1 to subtract and 0 when not copying. * @@ -24138,10 +24506,16 @@ sp_256_cmp_4: * b A single precision number to subtract. * m Mask value to apply. */ +#ifndef __APPLE__ .globl sp_256_cond_sub_4 .type sp_256_cond_sub_4,@function .align 16 sp_256_cond_sub_4: +#else +.globl _sp_256_cond_sub_4 +.p2align 4 +_sp_256_cond_sub_4: +#endif /* __APPLE__ */ push %r12 push %r13 push %r14 @@ -24173,17 +24547,25 @@ sp_256_cond_sub_4: pop %r13 pop %r12 repz retq +#ifndef __APPLE__ .size sp_256_cond_sub_4,.-sp_256_cond_sub_4 +#endif /* __APPLE__ */ /* Sub b from a into r. (r = a - b) * * r A single precision integer. * a A single precision integer. * b A single precision integer. */ +#ifndef __APPLE__ .globl sp_256_sub_4 .type sp_256_sub_4,@function .align 16 sp_256_sub_4: +#else +.globl _sp_256_sub_4 +.p2align 4 +_sp_256_sub_4: +#endif /* __APPLE__ */ push %r12 push %r13 push %r14 @@ -24211,17 +24593,25 @@ sp_256_sub_4: pop %r13 pop %r12 repz retq +#ifndef __APPLE__ .size sp_256_sub_4,.-sp_256_sub_4 +#endif /* __APPLE__ */ /* Reduce the number back to 256 bits using Montgomery reduction. * * a A single precision number to reduce in place. * m The single precision number representing the modulus. * mp The digit representing the negative inverse of m mod 2^n. */ +#ifndef __APPLE__ .globl sp_256_mont_reduce_4 .type sp_256_mont_reduce_4,@function .align 16 sp_256_mont_reduce_4: +#else +.globl _sp_256_mont_reduce_4 +.p2align 4 +_sp_256_mont_reduce_4: +#endif /* __APPLE__ */ push %r12 push %r13 push %r14 @@ -24309,7 +24699,9 @@ L_mont_loop_4: pop %r13 pop %r12 repz retq +#ifndef __APPLE__ .size sp_256_mont_reduce_4,.-sp_256_mont_reduce_4 +#endif /* __APPLE__ */ /* Multiply two Montogmery form numbers mod the modulus (prime). * (r = a * b mod m) * @@ -24319,10 +24711,16 @@ L_mont_loop_4: * m Modulus (prime). * mp Montogmery mulitplier. */ +#ifndef __APPLE__ .globl sp_256_mont_mul_4 .type sp_256_mont_mul_4,@function .align 16 sp_256_mont_mul_4: +#else +.globl _sp_256_mont_mul_4 +.p2align 4 +_sp_256_mont_mul_4: +#endif /* __APPLE__ */ push %r12 push %r13 push %r14 @@ -24504,7 +24902,9 @@ sp_256_mont_mul_4: pop %r13 pop %r12 repz retq +#ifndef __APPLE__ .size sp_256_mont_mul_4,.-sp_256_mont_mul_4 +#endif /* __APPLE__ */ /* Square the Montgomery form number mod the modulus (prime). (r = a * a mod m) * * r Result of squaring. @@ -24512,10 +24912,16 @@ sp_256_mont_mul_4: * m Modulus (prime). * mp Montogmery mulitplier. */ +#ifndef __APPLE__ .globl sp_256_mont_sqr_4 .type sp_256_mont_sqr_4,@function .align 16 sp_256_mont_sqr_4: +#else +.globl _sp_256_mont_sqr_4 +.p2align 4 +_sp_256_mont_sqr_4: +#endif /* __APPLE__ */ push %r12 push %r13 push %r14 @@ -24675,7 +25081,9 @@ sp_256_mont_sqr_4: pop %r13 pop %r12 repz retq +#ifndef __APPLE__ .size sp_256_mont_sqr_4,.-sp_256_mont_sqr_4 +#endif /* __APPLE__ */ /* Add two Montgomery form numbers (r = a + b % m). * * r Result of addition. @@ -24683,10 +25091,16 @@ sp_256_mont_sqr_4: * b Second number to add in Montogmery form. * m Modulus (prime). */ +#ifndef __APPLE__ .globl sp_256_mont_add_4 .type sp_256_mont_add_4,@function .align 16 sp_256_mont_add_4: +#else +.globl _sp_256_mont_add_4 +.p2align 4 +_sp_256_mont_add_4: +#endif /* __APPLE__ */ movq (%rsi), %rax movq 8(%rsi), %rcx movq 16(%rsi), %r8 @@ -24710,17 +25124,25 @@ sp_256_mont_add_4: movq %r8, 16(%rdi) movq %r9, 24(%rdi) repz retq +#ifndef __APPLE__ .size sp_256_mont_add_4,.-sp_256_mont_add_4 +#endif /* __APPLE__ */ /* Double a Montgomery form number (r = a + a % m). * * r Result of doubling. * a Number to double in Montogmery form. * m Modulus (prime). */ +#ifndef __APPLE__ .globl sp_256_mont_dbl_4 .type sp_256_mont_dbl_4,@function .align 16 sp_256_mont_dbl_4: +#else +.globl _sp_256_mont_dbl_4 +.p2align 4 +_sp_256_mont_dbl_4: +#endif /* __APPLE__ */ movq (%rsi), %rdx movq 8(%rsi), %rax movq 16(%rsi), %rcx @@ -24744,17 +25166,25 @@ sp_256_mont_dbl_4: movq %rcx, 16(%rdi) movq %r8, 24(%rdi) repz retq +#ifndef __APPLE__ .size sp_256_mont_dbl_4,.-sp_256_mont_dbl_4 +#endif /* __APPLE__ */ /* Triple a Montgomery form number (r = a + a + a % m). * * r Result of Tripling. * a Number to triple in Montogmery form. * m Modulus (prime). */ +#ifndef __APPLE__ .globl sp_256_mont_tpl_4 .type sp_256_mont_tpl_4,@function .align 16 sp_256_mont_tpl_4: +#else +.globl _sp_256_mont_tpl_4 +.p2align 4 +_sp_256_mont_tpl_4: +#endif /* __APPLE__ */ movq (%rsi), %rdx movq 8(%rsi), %rax movq 16(%rsi), %rcx @@ -24792,7 +25222,9 @@ sp_256_mont_tpl_4: movq %rcx, 16(%rdi) movq %r8, 24(%rdi) repz retq +#ifndef __APPLE__ .size sp_256_mont_tpl_4,.-sp_256_mont_tpl_4 +#endif /* __APPLE__ */ /* Subtract two Montgomery form numbers (r = a - b % m). * * r Result of subtration. @@ -24800,10 +25232,16 @@ sp_256_mont_tpl_4: * b Number to subtract with in Montogmery form. * m Modulus (prime). */ +#ifndef __APPLE__ .globl sp_256_mont_sub_4 .type sp_256_mont_sub_4,@function .align 16 sp_256_mont_sub_4: +#else +.globl _sp_256_mont_sub_4 +.p2align 4 +_sp_256_mont_sub_4: +#endif /* __APPLE__ */ movq (%rsi), %rax movq 8(%rsi), %rcx movq 16(%rsi), %r8 @@ -24827,17 +25265,25 @@ sp_256_mont_sub_4: movq %r8, 16(%rdi) movq %r9, 24(%rdi) repz retq +#ifndef __APPLE__ .size sp_256_mont_sub_4,.-sp_256_mont_sub_4 +#endif /* __APPLE__ */ /* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m) * * r Result of division by 2. * a Number to divide. * m Modulus (prime). */ +#ifndef __APPLE__ .globl sp_256_div2_4 .type sp_256_div2_4,@function .align 16 sp_256_div2_4: +#else +.globl _sp_256_div2_4 +.p2align 4 +_sp_256_div2_4: +#endif /* __APPLE__ */ movq (%rsi), %rdx movq 8(%rsi), %rax movq 16(%rsi), %rcx @@ -24864,7 +25310,9 @@ sp_256_div2_4: movq %rcx, 16(%rdi) movq %r8, 24(%rdi) repz retq +#ifndef __APPLE__ .size sp_256_div2_4,.-sp_256_div2_4 +#endif /* __APPLE__ */ #ifdef HAVE_INTEL_AVX2 /* Multiply two Montogmery form numbers mod the modulus (prime). * (r = a * b mod m) @@ -24875,10 +25323,16 @@ sp_256_div2_4: * m Modulus (prime). * mp Montogmery mulitplier. */ +#ifndef __APPLE__ .globl sp_256_mont_mul_avx2_4 .type sp_256_mont_mul_avx2_4,@function .align 16 sp_256_mont_mul_avx2_4: +#else +.globl _sp_256_mont_mul_avx2_4 +.p2align 4 +_sp_256_mont_mul_avx2_4: +#endif /* __APPLE__ */ push %rbx push %rbp push %r12 @@ -25040,7 +25494,9 @@ sp_256_mont_mul_avx2_4: pop %rbp pop %rbx repz retq +#ifndef __APPLE__ .size sp_256_mont_mul_avx2_4,.-sp_256_mont_mul_avx2_4 +#endif /* __APPLE__ */ /* Square the Montgomery form number mod the modulus (prime). (r = a * a mod m) * * r Result of squaring. @@ -25048,10 +25504,16 @@ sp_256_mont_mul_avx2_4: * m Modulus (prime). * mp Montogmery mulitplier. */ +#ifndef __APPLE__ .globl sp_256_mont_sqr_avx2_4 .type sp_256_mont_sqr_avx2_4,@function .align 16 sp_256_mont_sqr_avx2_4: +#else +.globl _sp_256_mont_sqr_avx2_4 +.p2align 4 +_sp_256_mont_sqr_avx2_4: +#endif /* __APPLE__ */ push %r12 push %r13 push %r14 @@ -25186,7 +25648,9 @@ sp_256_mont_sqr_avx2_4: pop %r13 pop %r12 repz retq +#ifndef __APPLE__ .size sp_256_mont_sqr_avx2_4,.-sp_256_mont_sqr_avx2_4 +#endif /* __APPLE__ */ #endif /* HAVE_INTEL_AVX2 */ #ifdef HAVE_INTEL_AVX2 #endif /* HAVE_INTEL_AVX2 */ @@ -25201,26 +25665,40 @@ sp_256_mont_sqr_avx2_4: * * a A single precision integer. */ +#ifndef __APPLE__ .globl sp_256_add_one_4 .type sp_256_add_one_4,@function .align 16 sp_256_add_one_4: +#else +.globl _sp_256_add_one_4 +.p2align 4 +_sp_256_add_one_4: +#endif /* __APPLE__ */ addq $1, (%rdi) adcq $0, 8(%rdi) adcq $0, 16(%rdi) adcq $0, 24(%rdi) repz retq +#ifndef __APPLE__ .size sp_256_add_one_4,.-sp_256_add_one_4 +#endif /* __APPLE__ */ /* Add b to a into r. (r = a + b) * * r A single precision integer. * a A single precision integer. * b A single precision integer. */ +#ifndef __APPLE__ .globl sp_256_add_4 .type sp_256_add_4,@function .align 16 sp_256_add_4: +#else +.globl _sp_256_add_4 +.p2align 4 +_sp_256_add_4: +#endif /* __APPLE__ */ xorq %rax, %rax movq (%rsi), %rcx addq (%rdx), %rcx @@ -25236,17 +25714,25 @@ sp_256_add_4: movq %rcx, 24(%rdi) adcq $0, %rax repz retq +#ifndef __APPLE__ .size sp_256_add_4,.-sp_256_add_4 +#endif /* __APPLE__ */ /* Multiply a and b into r. (r = a * b) * * r A single precision integer. * a A single precision integer. * b A single precision integer. */ +#ifndef __APPLE__ .globl sp_256_mul_4 .type sp_256_mul_4,@function .align 16 sp_256_mul_4: +#else +.globl _sp_256_mul_4 +.p2align 4 +_sp_256_mul_4: +#endif /* __APPLE__ */ movq %rdx, %rcx subq $32, %rsp # A[0] * B[0] @@ -25366,17 +25852,25 @@ sp_256_mul_4: movq %r9, 24(%rdi) addq $32, %rsp repz retq +#ifndef __APPLE__ .size sp_256_mul_4,.-sp_256_mul_4 +#endif /* __APPLE__ */ /* Multiply a and b into r. (r = a * b) * * r Result of multiplication. * a First number to multiply. * b Second number to multiply. */ +#ifndef __APPLE__ .globl sp_256_mul_avx2_4 .type sp_256_mul_avx2_4,@function .align 16 sp_256_mul_avx2_4: +#else +.globl _sp_256_mul_avx2_4 +.p2align 4 +_sp_256_mul_avx2_4: +#endif /* __APPLE__ */ push %r12 push %r13 push %r14 @@ -25474,16 +25968,24 @@ sp_256_mul_avx2_4: pop %r13 pop %r12 repz retq +#ifndef __APPLE__ .size sp_256_mul_avx2_4,.-sp_256_mul_avx2_4 +#endif /* __APPLE__ */ /* Sub b from a into a. (a -= b) * * a A single precision integer and result. * b A single precision integer. */ +#ifndef __APPLE__ .globl sp_256_sub_in_place_4 .type sp_256_sub_in_place_4,@function .align 16 sp_256_sub_in_place_4: +#else +.globl _sp_256_sub_in_place_4 +.p2align 4 +_sp_256_sub_in_place_4: +#endif /* __APPLE__ */ xorq %rax, %rax movq (%rsi), %rdx movq 8(%rsi), %rcx @@ -25495,17 +25997,25 @@ sp_256_sub_in_place_4: sbbq %r9, 24(%rdi) sbbq $0, %rax repz retq +#ifndef __APPLE__ .size sp_256_sub_in_place_4,.-sp_256_sub_in_place_4 +#endif /* __APPLE__ */ /* Mul a by digit b into r. (r = a * b) * * r A single precision integer. * a A single precision integer. * b A single precision digit. */ +#ifndef __APPLE__ .globl sp_256_mul_d_4 .type sp_256_mul_d_4,@function .align 16 sp_256_mul_d_4: +#else +.globl _sp_256_mul_d_4 +.p2align 4 +_sp_256_mul_d_4: +#endif /* __APPLE__ */ movq %rdx, %rcx # A[0] * B movq %rcx, %rax @@ -25538,7 +26048,9 @@ sp_256_mul_d_4: movq %r8, 24(%rdi) movq %r9, 32(%rdi) repz retq +#ifndef __APPLE__ .size sp_256_mul_d_4,.-sp_256_mul_d_4 +#endif /* __APPLE__ */ #ifdef HAVE_INTEL_AVX2 /* Mul a by digit b into r. (r = a * b) * @@ -25546,10 +26058,16 @@ sp_256_mul_d_4: * a A single precision integer. * b A single precision digit. */ +#ifndef __APPLE__ .globl sp_256_mul_d_avx2_4 .type sp_256_mul_d_avx2_4,@function .align 16 sp_256_mul_d_avx2_4: +#else +.globl _sp_256_mul_d_avx2_4 +.p2align 4 +_sp_256_mul_d_avx2_4: +#endif /* __APPLE__ */ movq %rdx, %rax # A[0] * B movq %rax, %rdx @@ -25577,17 +26095,25 @@ sp_256_mul_d_avx2_4: movq %r10, 24(%rdi) movq %r9, 32(%rdi) repz retq +#ifndef __APPLE__ .size sp_256_mul_d_avx2_4,.-sp_256_mul_d_avx2_4 +#endif /* __APPLE__ */ #endif /* HAVE_INTEL_AVX2 */ /* Square a and put result in r. (r = a * a) * * r A single precision integer. * a A single precision integer. */ +#ifndef __APPLE__ .globl sp_256_sqr_4 .type sp_256_sqr_4,@function .align 16 sp_256_sqr_4: +#else +.globl _sp_256_sqr_4 +.p2align 4 +_sp_256_sqr_4: +#endif /* __APPLE__ */ push %r12 subq $32, %rsp # A[0] * A[0] @@ -25690,17 +26216,25 @@ sp_256_sqr_4: addq $32, %rsp pop %r12 repz retq +#ifndef __APPLE__ .size sp_256_sqr_4,.-sp_256_sqr_4 +#endif /* __APPLE__ */ /* Square a and put result in r. (r = a * a) * * r Result of squaring. * a Number to square in Montogmery form. */ +#ifndef __APPLE__ .globl sp_256_sqr_avx2_4 .type sp_256_sqr_avx2_4,@function .align 16 sp_256_sqr_avx2_4: +#else +.globl _sp_256_sqr_avx2_4 +.p2align 4 +_sp_256_sqr_avx2_4: +#endif /* __APPLE__ */ push %rbx push %r12 push %r13 @@ -25773,7 +26307,9 @@ sp_256_sqr_avx2_4: pop %r12 pop %rbx repz retq +#ifndef __APPLE__ .size sp_256_sqr_avx2_4,.-sp_256_sqr_avx2_4 +#endif /* __APPLE__ */ #ifdef HAVE_INTEL_AVX2 /* Reduce the number back to 256 bits using Montgomery reduction. * @@ -25781,10 +26317,16 @@ sp_256_sqr_avx2_4: * m The single precision number representing the modulus. * mp The digit representing the negative inverse of m mod 2^n. */ +#ifndef __APPLE__ .globl sp_256_mont_reduce_avx2_4 .type sp_256_mont_reduce_avx2_4,@function .align 16 sp_256_mont_reduce_avx2_4: +#else +.globl _sp_256_mont_reduce_avx2_4 +.p2align 4 +_sp_256_mont_reduce_avx2_4: +#endif /* __APPLE__ */ push %r12 push %r13 push %r14 @@ -25927,7 +26469,9 @@ sp_256_mont_reduce_avx2_4: pop %r13 pop %r12 repz retq +#ifndef __APPLE__ .size sp_256_mont_reduce_avx2_4,.-sp_256_mont_reduce_avx2_4 +#endif /* __APPLE__ */ #endif /* HAVE_INTEL_AVX2 */ #endif /* !WOLFSSL_SP_NO_256 */ #endif /* WOLFSSL_HAVE_SP_ECC */