diff --git a/wolfcrypt/src/sp_cortexm.c b/wolfcrypt/src/sp_cortexm.c index b993f18a1..794fc7de0 100644 --- a/wolfcrypt/src/sp_cortexm.c +++ b/wolfcrypt/src/sp_cortexm.c @@ -221,7 +221,8 @@ static void sp_2048_to_bin(sp_digit* r, byte* a) SP_NOINLINE static void sp_2048_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_digit tmp[8]; + sp_digit tmp_arr[8]; + sp_digit* tmp = tmp_arr; __asm__ __volatile__ ( /* A[0] * B[0] */ @@ -727,7 +728,8 @@ SP_NOINLINE static void sp_2048_mul_8(sp_digit* r, const sp_digit* a, */ SP_NOINLINE static void sp_2048_sqr_8(sp_digit* r, const sp_digit* a) { - sp_digit tmp[8]; + sp_digit tmp_arr[8]; + sp_digit* tmp = tmp_arr; __asm__ __volatile__ ( /* A[0] * A[0] */ "ldr r6, [%[a], #0]\n\t" @@ -2097,7 +2099,7 @@ SP_NOINLINE static sp_digit sp_2048_add_64(sp_digit* r, const sp_digit* a, "add %[b], %[b], #4\n\t" "add %[r], %[r], #4\n\t" "cmp %[a], r6\n\t" - "bne 1b\n\t" + "bne.n 1b\n\t" : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : : "memory", "r4", "r5", "r6", "r8" @@ -2135,7 +2137,7 @@ SP_NOINLINE static sp_digit sp_2048_sub_in_place_64(sp_digit* a, "add %[a], %[a], #8\n\t" "add %[b], %[b], #8\n\t" "cmp %[a], r8\n\t" - "bne 1b\n\t" + "bne.n 1b\n\t" : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) : : "memory", "r3", "r4", "r5", "r6", "r8" @@ -2155,7 +2157,8 @@ SP_NOINLINE static sp_digit sp_2048_sub_in_place_64(sp_digit* a, SP_NOINLINE static void sp_2048_mul_64(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_digit tmp[64 * 2]; + sp_digit tmp_arr[64 * 2]; + sp_digit* tmp = tmp_arr; __asm__ __volatile__ ( "mov r3, #0\n\t" "mov r4, #0\n\t" @@ -2192,11 +2195,11 @@ SP_NOINLINE static void sp_2048_mul_64(sp_digit* r, const sp_digit* a, "add %[a], %[a], #4\n\t" "sub %[b], %[b], #4\n\t" "cmp %[a], r14\n\t" - "beq 3f\n\t" + "beq.n 3f\n\t" "mov r6, r9\n\t" "add r6, r6, r10\n\t" "cmp %[a], r6\n\t" - "ble 2b\n\t" + "ble.n 2b\n\t" "\n3:\n\t" "mov %[r], r12\n\t" "mov r8, r9\n\t" @@ -2209,7 +2212,7 @@ SP_NOINLINE static void sp_2048_mul_64(sp_digit* r, const sp_digit* a, "lsl r6, r6, #8\n\t" "add r6, r6, #248\n\t" "cmp r8, r6\n\t" - "ble 1b\n\t" + "ble.n 1b\n\t" "str r3, [%[r], r8]\n\t" "mov %[a], r10\n\t" "mov %[b], r11\n\t" @@ -2254,7 +2257,7 @@ SP_NOINLINE static void sp_2048_sqr_64(sp_digit* r, const sp_digit* a) "add r2, r2, r10\n\t" "\n2:\n\t" "cmp r2, %[a]\n\t" - "beq 4f\n\t" + "beq.n 4f\n\t" /* Multiply * 2: Start */ "ldr r6, [%[a]]\n\t" "ldr r8, [r2]\n\t" @@ -2266,7 +2269,7 @@ SP_NOINLINE static void sp_2048_sqr_64(sp_digit* r, const sp_digit* a) "adcs r4, r4, r8\n\t" "adc r5, r5, %[r]\n\t" /* Multiply * 2: Done */ - "bal 5f\n\t" + "bal.n 5f\n\t" "\n4:\n\t" /* Square: Start */ "ldr r6, [%[a]]\n\t" @@ -2282,13 +2285,13 @@ SP_NOINLINE static void sp_2048_sqr_64(sp_digit* r, const sp_digit* a) "lsl r6, r6, #8\n\t" "add r6, r6, r10\n\t" "cmp %[a], r6\n\t" - "beq 3f\n\t" + "beq.n 3f\n\t" "cmp %[a], r2\n\t" - "bgt 3f\n\t" + "bgt.n 3f\n\t" "mov r8, r9\n\t" "add r8, r8, r10\n\t" "cmp %[a], r8\n\t" - "ble 2b\n\t" + "ble.n 2b\n\t" "\n3:\n\t" "mov %[r], r11\n\t" "mov r8, r9\n\t" @@ -2302,7 +2305,7 @@ SP_NOINLINE static void sp_2048_sqr_64(sp_digit* r, const sp_digit* a) "lsl r6, r6, #8\n\t" "add r6, r6, #248\n\t" "cmp r8, r6\n\t" - "ble 1b\n\t" + "ble.n 1b\n\t" "mov %[a], r10\n\t" "str r3, [%[r], r8]\n\t" "mov %[r], r12\n\t" @@ -2314,7 +2317,7 @@ SP_NOINLINE static void sp_2048_sqr_64(sp_digit* r, const sp_digit* a) "ldr r6, [%[a], r3]\n\t" "str r6, [%[r], r3]\n\t" "subs r3, r3, #4\n\t" - "bge 4b\n\t" + "bge.n 4b\n\t" "mov r6, #2\n\t" "lsl r6, r6, #8\n\t" "add sp, sp, r6\n\t" @@ -2372,7 +2375,7 @@ SP_NOINLINE static sp_digit sp_2048_add_32(sp_digit* r, const sp_digit* a, "add %[b], %[b], #4\n\t" "add %[r], %[r], #4\n\t" "cmp %[a], r6\n\t" - "bne 1b\n\t" + "bne.n 1b\n\t" : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : : "memory", "r4", "r5", "r6", "r8" @@ -2410,7 +2413,7 @@ SP_NOINLINE static sp_digit sp_2048_sub_in_place_32(sp_digit* a, "add %[a], %[a], #8\n\t" "add %[b], %[b], #8\n\t" "cmp %[a], r8\n\t" - "bne 1b\n\t" + "bne.n 1b\n\t" : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) : : "memory", "r3", "r4", "r5", "r6", "r8" @@ -2430,7 +2433,8 @@ SP_NOINLINE static sp_digit sp_2048_sub_in_place_32(sp_digit* a, SP_NOINLINE static void sp_2048_mul_32(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_digit tmp[32 * 2]; + sp_digit tmp_arr[32 * 2]; + sp_digit* tmp = tmp_arr; __asm__ __volatile__ ( "mov r3, #0\n\t" "mov r4, #0\n\t" @@ -2466,11 +2470,11 @@ SP_NOINLINE static void sp_2048_mul_32(sp_digit* r, const sp_digit* a, "add %[a], %[a], #4\n\t" "sub %[b], %[b], #4\n\t" "cmp %[a], r14\n\t" - "beq 3f\n\t" + "beq.n 3f\n\t" "mov r6, r9\n\t" "add r6, r6, r10\n\t" "cmp %[a], r6\n\t" - "ble 2b\n\t" + "ble.n 2b\n\t" "\n3:\n\t" "mov %[r], r12\n\t" "mov r8, r9\n\t" @@ -2481,7 +2485,7 @@ SP_NOINLINE static void sp_2048_mul_32(sp_digit* r, const sp_digit* a, "mov r9, r8\n\t" "mov r6, #248\n\t" "cmp r8, r6\n\t" - "ble 1b\n\t" + "ble.n 1b\n\t" "str r3, [%[r], r8]\n\t" "mov %[a], r10\n\t" "mov %[b], r11\n\t" @@ -2526,7 +2530,7 @@ SP_NOINLINE static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a) "add r2, r2, r10\n\t" "\n2:\n\t" "cmp r2, %[a]\n\t" - "beq 4f\n\t" + "beq.n 4f\n\t" /* Multiply * 2: Start */ "ldr r6, [%[a]]\n\t" "ldr r8, [r2]\n\t" @@ -2538,7 +2542,7 @@ SP_NOINLINE static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a) "adcs r4, r4, r8\n\t" "adc r5, r5, %[r]\n\t" /* Multiply * 2: Done */ - "bal 5f\n\t" + "bal.n 5f\n\t" "\n4:\n\t" /* Square: Start */ "ldr r6, [%[a]]\n\t" @@ -2553,13 +2557,13 @@ SP_NOINLINE static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a) "mov r6, #128\n\t" "add r6, r6, r10\n\t" "cmp %[a], r6\n\t" - "beq 3f\n\t" + "beq.n 3f\n\t" "cmp %[a], r2\n\t" - "bgt 3f\n\t" + "bgt.n 3f\n\t" "mov r8, r9\n\t" "add r8, r8, r10\n\t" "cmp %[a], r8\n\t" - "ble 2b\n\t" + "ble.n 2b\n\t" "\n3:\n\t" "mov %[r], r11\n\t" "mov r8, r9\n\t" @@ -2571,7 +2575,7 @@ SP_NOINLINE static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a) "mov r9, r8\n\t" "mov r6, #248\n\t" "cmp r8, r6\n\t" - "ble 1b\n\t" + "ble.n 1b\n\t" "mov %[a], r10\n\t" "str r3, [%[r], r8]\n\t" "mov %[r], r12\n\t" @@ -2581,7 +2585,7 @@ SP_NOINLINE static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a) "ldr r6, [%[a], r3]\n\t" "str r6, [%[r], r3]\n\t" "subs r3, r3, #4\n\t" - "bge 4b\n\t" + "bge.n 4b\n\t" "mov r6, #1\n\t" "lsl r6, r6, #8\n\t" "add sp, sp, r6\n\t" @@ -2643,7 +2647,7 @@ SP_NOINLINE static void sp_2048_mul_d_64(sp_digit* r, const sp_digit* a, "mov r3, r4\n\t" "mov r4, r5\n\t" "cmp %[a], r9\n\t" - "blt 1b\n\t" + "blt.n 1b\n\t" "str r3, [%[r]]\n\t" : [r] "+r" (r), [a] "+r" (a) : [b] "r" (b) @@ -2694,7 +2698,7 @@ SP_NOINLINE static sp_digit sp_2048_cond_sub_32(sp_digit* r, const sp_digit* a, "str r5, [%[r], r8]\n\t" "add r8, r8, #4\n\t" "cmp r8, r9\n\t" - "blt 1b\n\t" + "blt.n 1b\n\t" : [c] "+r" (c) : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) : "memory", "r5", "r6", "r8", "r9" @@ -2753,7 +2757,7 @@ SP_NOINLINE static void sp_2048_mont_reduce_32(sp_digit* a, const sp_digit* m, "adc r4, r4, #0\n\t" "str r5, [r10], #4\n\t" "cmp r10, r14\n\t" - "blt 2b\n\t" + "blt.n 2b\n\t" /* a[i+30] += m[30] * mu */ "ldr %[a], [r10]\n\t" "mov r5, #0\n\t" @@ -2786,7 +2790,7 @@ SP_NOINLINE static void sp_2048_mont_reduce_32(sp_digit* a, const sp_digit* m, /* Next word in a */ "sub r10, r10, #120\n\t" "cmp r10, r11\n\t" - "blt 1b\n\t" + "blt.n 1b\n\t" "mov %[a], r10\n\t" "mov %[m], r12\n\t" : [ca] "+r" (ca), [a] "+r" (a) @@ -2857,7 +2861,7 @@ SP_NOINLINE static void sp_2048_mul_d_32(sp_digit* r, const sp_digit* a, "mov r3, r4\n\t" "mov r4, r5\n\t" "cmp %[a], r9\n\t" - "blt 1b\n\t" + "blt.n 1b\n\t" "str r3, [%[r]]\n\t" : [r] "+r" (r), [a] "+r" (a) : [b] "r" (b) @@ -2951,7 +2955,7 @@ SP_NOINLINE static int32_t sp_2048_cmp_32(const sp_digit* a, const sp_digit* b) "and r3, r3, r8\n\t" "sub r6, r6, #4\n\t" "cmp r6, #0\n\t" - "bge 1b\n\t" + "bge.n 1b\n\t" : [r] "+r" (r) : [a] "r" (a), [b] "r" (b) : "r3", "r4", "r5", "r6", "r8" @@ -3366,7 +3370,7 @@ SP_NOINLINE static sp_digit sp_2048_cond_sub_64(sp_digit* r, const sp_digit* a, "str r5, [%[r], r8]\n\t" "add r8, r8, #4\n\t" "cmp r8, r9\n\t" - "blt 1b\n\t" + "blt.n 1b\n\t" : [c] "+r" (c) : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) : "memory", "r5", "r6", "r8", "r9" @@ -3425,7 +3429,7 @@ SP_NOINLINE static void sp_2048_mont_reduce_64(sp_digit* a, const sp_digit* m, "adc r4, r4, #0\n\t" "str r5, [r10], #4\n\t" "cmp r10, r14\n\t" - "blt 2b\n\t" + "blt.n 2b\n\t" /* a[i+62] += m[62] * mu */ "ldr %[a], [r10]\n\t" "mov r5, #0\n\t" @@ -3458,7 +3462,7 @@ SP_NOINLINE static void sp_2048_mont_reduce_64(sp_digit* a, const sp_digit* m, /* Next word in a */ "sub r10, r10, #248\n\t" "cmp r10, r11\n\t" - "blt 1b\n\t" + "blt.n 1b\n\t" "mov %[a], r10\n\t" "mov %[m], r12\n\t" : [ca] "+r" (ca), [a] "+r" (a) @@ -3615,7 +3619,7 @@ SP_NOINLINE static int32_t sp_2048_cmp_64(const sp_digit* a, const sp_digit* b) "and r3, r3, r8\n\t" "sub r6, r6, #4\n\t" "cmp r6, #0\n\t" - "bge 1b\n\t" + "bge.n 1b\n\t" : [r] "+r" (r) : [a] "r" (a), [b] "r" (b) : "r3", "r4", "r5", "r6", "r8" @@ -4197,7 +4201,7 @@ SP_NOINLINE static sp_digit sp_2048_cond_add_32(sp_digit* r, const sp_digit* a, "str r5, [%[r], r8]\n\t" "add r8, r8, #4\n\t" "cmp r8, r9\n\t" - "blt 1b\n\t" + "blt.n 1b\n\t" : [c] "+r" (c) : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) : "memory", "r5", "r6", "r8", "r9" @@ -5310,7 +5314,8 @@ static void sp_3072_to_bin(sp_digit* r, byte* a) SP_NOINLINE static void sp_3072_mul_12(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_digit tmp[12 * 2]; + sp_digit tmp_arr[12 * 2]; + sp_digit* tmp = tmp_arr; __asm__ __volatile__ ( "mov r3, #0\n\t" "mov r4, #0\n\t" @@ -5346,11 +5351,11 @@ SP_NOINLINE static void sp_3072_mul_12(sp_digit* r, const sp_digit* a, "add %[a], %[a], #4\n\t" "sub %[b], %[b], #4\n\t" "cmp %[a], r14\n\t" - "beq 3f\n\t" + "beq.n 3f\n\t" "mov r6, r9\n\t" "add r6, r6, r10\n\t" "cmp %[a], r6\n\t" - "ble 2b\n\t" + "ble.n 2b\n\t" "\n3:\n\t" "mov %[r], r12\n\t" "mov r8, r9\n\t" @@ -5361,7 +5366,7 @@ SP_NOINLINE static void sp_3072_mul_12(sp_digit* r, const sp_digit* a, "mov r9, r8\n\t" "mov r6, #88\n\t" "cmp r8, r6\n\t" - "ble 1b\n\t" + "ble.n 1b\n\t" "str r3, [%[r], r8]\n\t" "mov %[a], r10\n\t" "mov %[b], r11\n\t" @@ -5405,7 +5410,7 @@ SP_NOINLINE static void sp_3072_sqr_12(sp_digit* r, const sp_digit* a) "add r2, r2, r10\n\t" "\n2:\n\t" "cmp r2, %[a]\n\t" - "beq 4f\n\t" + "beq.n 4f\n\t" /* Multiply * 2: Start */ "ldr r6, [%[a]]\n\t" "ldr r8, [r2]\n\t" @@ -5417,7 +5422,7 @@ SP_NOINLINE static void sp_3072_sqr_12(sp_digit* r, const sp_digit* a) "adcs r4, r4, r8\n\t" "adc r5, r5, %[r]\n\t" /* Multiply * 2: Done */ - "bal 5f\n\t" + "bal.n 5f\n\t" "\n4:\n\t" /* Square: Start */ "ldr r6, [%[a]]\n\t" @@ -5432,13 +5437,13 @@ SP_NOINLINE static void sp_3072_sqr_12(sp_digit* r, const sp_digit* a) "mov r6, #48\n\t" "add r6, r6, r10\n\t" "cmp %[a], r6\n\t" - "beq 3f\n\t" + "beq.n 3f\n\t" "cmp %[a], r2\n\t" - "bgt 3f\n\t" + "bgt.n 3f\n\t" "mov r8, r9\n\t" "add r8, r8, r10\n\t" "cmp %[a], r8\n\t" - "ble 2b\n\t" + "ble.n 2b\n\t" "\n3:\n\t" "mov %[r], r11\n\t" "mov r8, r9\n\t" @@ -5450,7 +5455,7 @@ SP_NOINLINE static void sp_3072_sqr_12(sp_digit* r, const sp_digit* a) "mov r9, r8\n\t" "mov r6, #88\n\t" "cmp r8, r6\n\t" - "ble 1b\n\t" + "ble.n 1b\n\t" "mov %[a], r10\n\t" "str r3, [%[r], r8]\n\t" "mov %[r], r12\n\t" @@ -5460,7 +5465,7 @@ SP_NOINLINE static void sp_3072_sqr_12(sp_digit* r, const sp_digit* a) "ldr r6, [%[a], r3]\n\t" "str r6, [%[r], r3]\n\t" "subs r3, r3, #4\n\t" - "bge 4b\n\t" + "bge.n 4b\n\t" "mov r6, #96\n\t" "add sp, sp, r6\n\t" : @@ -6790,7 +6795,7 @@ SP_NOINLINE static sp_digit sp_3072_add_96(sp_digit* r, const sp_digit* a, "add %[b], %[b], #4\n\t" "add %[r], %[r], #4\n\t" "cmp %[a], r6\n\t" - "bne 1b\n\t" + "bne.n 1b\n\t" : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : : "memory", "r4", "r5", "r6", "r8" @@ -6828,7 +6833,7 @@ SP_NOINLINE static sp_digit sp_3072_sub_in_place_96(sp_digit* a, "add %[a], %[a], #8\n\t" "add %[b], %[b], #8\n\t" "cmp %[a], r8\n\t" - "bne 1b\n\t" + "bne.n 1b\n\t" : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) : : "memory", "r3", "r4", "r5", "r6", "r8" @@ -6848,7 +6853,8 @@ SP_NOINLINE static sp_digit sp_3072_sub_in_place_96(sp_digit* a, SP_NOINLINE static void sp_3072_mul_96(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_digit tmp[96 * 2]; + sp_digit tmp_arr[96 * 2]; + sp_digit* tmp = tmp_arr; __asm__ __volatile__ ( "mov r3, #0\n\t" "mov r4, #0\n\t" @@ -6888,11 +6894,11 @@ SP_NOINLINE static void sp_3072_mul_96(sp_digit* r, const sp_digit* a, "add %[a], %[a], #4\n\t" "sub %[b], %[b], #4\n\t" "cmp %[a], r14\n\t" - "beq 3f\n\t" + "beq.n 3f\n\t" "mov r6, r9\n\t" "add r6, r6, r10\n\t" "cmp %[a], r6\n\t" - "ble 2b\n\t" + "ble.n 2b\n\t" "\n3:\n\t" "mov %[r], r12\n\t" "mov r8, r9\n\t" @@ -6905,7 +6911,7 @@ SP_NOINLINE static void sp_3072_mul_96(sp_digit* r, const sp_digit* a, "lsl r6, r6, #8\n\t" "add r6, r6, #248\n\t" "cmp r8, r6\n\t" - "ble 1b\n\t" + "ble.n 1b\n\t" "str r3, [%[r], r8]\n\t" "mov %[a], r10\n\t" "mov %[b], r11\n\t" @@ -6952,7 +6958,7 @@ SP_NOINLINE static void sp_3072_sqr_96(sp_digit* r, const sp_digit* a) "add r2, r2, r10\n\t" "\n2:\n\t" "cmp r2, %[a]\n\t" - "beq 4f\n\t" + "beq.n 4f\n\t" /* Multiply * 2: Start */ "ldr r6, [%[a]]\n\t" "ldr r8, [r2]\n\t" @@ -6964,7 +6970,7 @@ SP_NOINLINE static void sp_3072_sqr_96(sp_digit* r, const sp_digit* a) "adcs r4, r4, r8\n\t" "adc r5, r5, %[r]\n\t" /* Multiply * 2: Done */ - "bal 5f\n\t" + "bal.n 5f\n\t" "\n4:\n\t" /* Square: Start */ "ldr r6, [%[a]]\n\t" @@ -6981,13 +6987,13 @@ SP_NOINLINE static void sp_3072_sqr_96(sp_digit* r, const sp_digit* a) "add r6, r6, #128\n\t" "add r6, r6, r10\n\t" "cmp %[a], r6\n\t" - "beq 3f\n\t" + "beq.n 3f\n\t" "cmp %[a], r2\n\t" - "bgt 3f\n\t" + "bgt.n 3f\n\t" "mov r8, r9\n\t" "add r8, r8, r10\n\t" "cmp %[a], r8\n\t" - "ble 2b\n\t" + "ble.n 2b\n\t" "\n3:\n\t" "mov %[r], r11\n\t" "mov r8, r9\n\t" @@ -7001,7 +7007,7 @@ SP_NOINLINE static void sp_3072_sqr_96(sp_digit* r, const sp_digit* a) "lsl r6, r6, #8\n\t" "add r6, r6, #248\n\t" "cmp r8, r6\n\t" - "ble 1b\n\t" + "ble.n 1b\n\t" "mov %[a], r10\n\t" "str r3, [%[r], r8]\n\t" "mov %[r], r12\n\t" @@ -7013,7 +7019,7 @@ SP_NOINLINE static void sp_3072_sqr_96(sp_digit* r, const sp_digit* a) "ldr r6, [%[a], r3]\n\t" "str r6, [%[r], r3]\n\t" "subs r3, r3, #4\n\t" - "bge 4b\n\t" + "bge.n 4b\n\t" "mov r6, #3\n\t" "lsl r6, r6, #8\n\t" "add sp, sp, r6\n\t" @@ -7071,7 +7077,7 @@ SP_NOINLINE static sp_digit sp_3072_add_48(sp_digit* r, const sp_digit* a, "add %[b], %[b], #4\n\t" "add %[r], %[r], #4\n\t" "cmp %[a], r6\n\t" - "bne 1b\n\t" + "bne.n 1b\n\t" : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : : "memory", "r4", "r5", "r6", "r8" @@ -7109,7 +7115,7 @@ SP_NOINLINE static sp_digit sp_3072_sub_in_place_48(sp_digit* a, "add %[a], %[a], #8\n\t" "add %[b], %[b], #8\n\t" "cmp %[a], r8\n\t" - "bne 1b\n\t" + "bne.n 1b\n\t" : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) : : "memory", "r3", "r4", "r5", "r6", "r8" @@ -7129,7 +7135,8 @@ SP_NOINLINE static sp_digit sp_3072_sub_in_place_48(sp_digit* a, SP_NOINLINE static void sp_3072_mul_48(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_digit tmp[48 * 2]; + sp_digit tmp_arr[48 * 2]; + sp_digit* tmp = tmp_arr; __asm__ __volatile__ ( "mov r3, #0\n\t" "mov r4, #0\n\t" @@ -7165,11 +7172,11 @@ SP_NOINLINE static void sp_3072_mul_48(sp_digit* r, const sp_digit* a, "add %[a], %[a], #4\n\t" "sub %[b], %[b], #4\n\t" "cmp %[a], r14\n\t" - "beq 3f\n\t" + "beq.n 3f\n\t" "mov r6, r9\n\t" "add r6, r6, r10\n\t" "cmp %[a], r6\n\t" - "ble 2b\n\t" + "ble.n 2b\n\t" "\n3:\n\t" "mov %[r], r12\n\t" "mov r8, r9\n\t" @@ -7182,7 +7189,7 @@ SP_NOINLINE static void sp_3072_mul_48(sp_digit* r, const sp_digit* a, "lsl r6, r6, #8\n\t" "add r6, r6, #120\n\t" "cmp r8, r6\n\t" - "ble 1b\n\t" + "ble.n 1b\n\t" "str r3, [%[r], r8]\n\t" "mov %[a], r10\n\t" "mov %[b], r11\n\t" @@ -7228,7 +7235,7 @@ SP_NOINLINE static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a) "add r2, r2, r10\n\t" "\n2:\n\t" "cmp r2, %[a]\n\t" - "beq 4f\n\t" + "beq.n 4f\n\t" /* Multiply * 2: Start */ "ldr r6, [%[a]]\n\t" "ldr r8, [r2]\n\t" @@ -7240,7 +7247,7 @@ SP_NOINLINE static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a) "adcs r4, r4, r8\n\t" "adc r5, r5, %[r]\n\t" /* Multiply * 2: Done */ - "bal 5f\n\t" + "bal.n 5f\n\t" "\n4:\n\t" /* Square: Start */ "ldr r6, [%[a]]\n\t" @@ -7255,13 +7262,13 @@ SP_NOINLINE static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a) "mov r6, #192\n\t" "add r6, r6, r10\n\t" "cmp %[a], r6\n\t" - "beq 3f\n\t" + "beq.n 3f\n\t" "cmp %[a], r2\n\t" - "bgt 3f\n\t" + "bgt.n 3f\n\t" "mov r8, r9\n\t" "add r8, r8, r10\n\t" "cmp %[a], r8\n\t" - "ble 2b\n\t" + "ble.n 2b\n\t" "\n3:\n\t" "mov %[r], r11\n\t" "mov r8, r9\n\t" @@ -7275,7 +7282,7 @@ SP_NOINLINE static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a) "lsl r6, r6, #8\n\t" "add r6, r6, #120\n\t" "cmp r8, r6\n\t" - "ble 1b\n\t" + "ble.n 1b\n\t" "mov %[a], r10\n\t" "str r3, [%[r], r8]\n\t" "mov %[r], r12\n\t" @@ -7287,7 +7294,7 @@ SP_NOINLINE static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a) "ldr r6, [%[a], r3]\n\t" "str r6, [%[r], r3]\n\t" "subs r3, r3, #4\n\t" - "bge 4b\n\t" + "bge.n 4b\n\t" "mov r6, #1\n\t" "lsl r6, r6, #8\n\t" "add r6, r6, #128\n\t" @@ -7350,7 +7357,7 @@ SP_NOINLINE static void sp_3072_mul_d_96(sp_digit* r, const sp_digit* a, "mov r3, r4\n\t" "mov r4, r5\n\t" "cmp %[a], r9\n\t" - "blt 1b\n\t" + "blt.n 1b\n\t" "str r3, [%[r]]\n\t" : [r] "+r" (r), [a] "+r" (a) : [b] "r" (b) @@ -7401,7 +7408,7 @@ SP_NOINLINE static sp_digit sp_3072_cond_sub_48(sp_digit* r, const sp_digit* a, "str r5, [%[r], r8]\n\t" "add r8, r8, #4\n\t" "cmp r8, r9\n\t" - "blt 1b\n\t" + "blt.n 1b\n\t" : [c] "+r" (c) : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) : "memory", "r5", "r6", "r8", "r9" @@ -7460,7 +7467,7 @@ SP_NOINLINE static void sp_3072_mont_reduce_48(sp_digit* a, const sp_digit* m, "adc r4, r4, #0\n\t" "str r5, [r10], #4\n\t" "cmp r10, r14\n\t" - "blt 2b\n\t" + "blt.n 2b\n\t" /* a[i+46] += m[46] * mu */ "ldr %[a], [r10]\n\t" "mov r5, #0\n\t" @@ -7493,7 +7500,7 @@ SP_NOINLINE static void sp_3072_mont_reduce_48(sp_digit* a, const sp_digit* m, /* Next word in a */ "sub r10, r10, #184\n\t" "cmp r10, r11\n\t" - "blt 1b\n\t" + "blt.n 1b\n\t" "mov %[a], r10\n\t" "mov %[m], r12\n\t" : [ca] "+r" (ca), [a] "+r" (a) @@ -7564,7 +7571,7 @@ SP_NOINLINE static void sp_3072_mul_d_48(sp_digit* r, const sp_digit* a, "mov r3, r4\n\t" "mov r4, r5\n\t" "cmp %[a], r9\n\t" - "blt 1b\n\t" + "blt.n 1b\n\t" "str r3, [%[r]]\n\t" : [r] "+r" (r), [a] "+r" (a) : [b] "r" (b) @@ -7658,7 +7665,7 @@ SP_NOINLINE static int32_t sp_3072_cmp_48(const sp_digit* a, const sp_digit* b) "and r3, r3, r8\n\t" "sub r6, r6, #4\n\t" "cmp r6, #0\n\t" - "bge 1b\n\t" + "bge.n 1b\n\t" : [r] "+r" (r) : [a] "r" (a), [b] "r" (b) : "r3", "r4", "r5", "r6", "r8" @@ -8074,7 +8081,7 @@ SP_NOINLINE static sp_digit sp_3072_cond_sub_96(sp_digit* r, const sp_digit* a, "str r5, [%[r], r8]\n\t" "add r8, r8, #4\n\t" "cmp r8, r9\n\t" - "blt 1b\n\t" + "blt.n 1b\n\t" : [c] "+r" (c) : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) : "memory", "r5", "r6", "r8", "r9" @@ -8133,7 +8140,7 @@ SP_NOINLINE static void sp_3072_mont_reduce_96(sp_digit* a, const sp_digit* m, "adc r4, r4, #0\n\t" "str r5, [r10], #4\n\t" "cmp r10, r14\n\t" - "blt 2b\n\t" + "blt.n 2b\n\t" /* a[i+94] += m[94] * mu */ "ldr %[a], [r10]\n\t" "mov r5, #0\n\t" @@ -8166,7 +8173,7 @@ SP_NOINLINE static void sp_3072_mont_reduce_96(sp_digit* a, const sp_digit* m, /* Next word in a */ "sub r10, r10, #376\n\t" "cmp r10, r11\n\t" - "blt 1b\n\t" + "blt.n 1b\n\t" "mov %[a], r10\n\t" "mov %[m], r12\n\t" : [ca] "+r" (ca), [a] "+r" (a) @@ -8325,7 +8332,7 @@ SP_NOINLINE static int32_t sp_3072_cmp_96(const sp_digit* a, const sp_digit* b) "and r3, r3, r8\n\t" "sub r6, r6, #4\n\t" "cmp r6, #0\n\t" - "bge 1b\n\t" + "bge.n 1b\n\t" : [r] "+r" (r) : [a] "r" (a), [b] "r" (b) : "r3", "r4", "r5", "r6", "r8" @@ -8907,7 +8914,7 @@ SP_NOINLINE static sp_digit sp_3072_cond_add_48(sp_digit* r, const sp_digit* a, "str r5, [%[r], r8]\n\t" "add r8, r8, #4\n\t" "cmp r8, r9\n\t" - "blt 1b\n\t" + "blt.n 1b\n\t" : [c] "+r" (c) : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) : "memory", "r5", "r6", "r8", "r9" @@ -10980,7 +10987,7 @@ SP_NOINLINE static sp_digit sp_4096_add_128(sp_digit* r, const sp_digit* a, "add %[b], %[b], #4\n\t" "add %[r], %[r], #4\n\t" "cmp %[a], r6\n\t" - "bne 1b\n\t" + "bne.n 1b\n\t" : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : : "memory", "r4", "r5", "r6", "r8" @@ -11018,7 +11025,7 @@ SP_NOINLINE static sp_digit sp_4096_sub_in_place_128(sp_digit* a, "add %[a], %[a], #8\n\t" "add %[b], %[b], #8\n\t" "cmp %[a], r8\n\t" - "bne 1b\n\t" + "bne.n 1b\n\t" : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) : : "memory", "r3", "r4", "r5", "r6", "r8" @@ -11038,7 +11045,8 @@ SP_NOINLINE static sp_digit sp_4096_sub_in_place_128(sp_digit* a, SP_NOINLINE static void sp_4096_mul_128(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_digit tmp[128 * 2]; + sp_digit tmp_arr[128 * 2]; + sp_digit* tmp = tmp_arr; __asm__ __volatile__ ( "mov r3, #0\n\t" "mov r4, #0\n\t" @@ -11077,11 +11085,11 @@ SP_NOINLINE static void sp_4096_mul_128(sp_digit* r, const sp_digit* a, "add %[a], %[a], #4\n\t" "sub %[b], %[b], #4\n\t" "cmp %[a], r14\n\t" - "beq 3f\n\t" + "beq.n 3f\n\t" "mov r6, r9\n\t" "add r6, r6, r10\n\t" "cmp %[a], r6\n\t" - "ble 2b\n\t" + "ble.n 2b\n\t" "\n3:\n\t" "mov %[r], r12\n\t" "mov r8, r9\n\t" @@ -11094,7 +11102,7 @@ SP_NOINLINE static void sp_4096_mul_128(sp_digit* r, const sp_digit* a, "lsl r6, r6, #8\n\t" "add r6, r6, #248\n\t" "cmp r8, r6\n\t" - "ble 1b\n\t" + "ble.n 1b\n\t" "str r3, [%[r], r8]\n\t" "mov %[a], r10\n\t" "mov %[b], r11\n\t" @@ -11141,7 +11149,7 @@ SP_NOINLINE static void sp_4096_sqr_128(sp_digit* r, const sp_digit* a) "add r2, r2, r10\n\t" "\n2:\n\t" "cmp r2, %[a]\n\t" - "beq 4f\n\t" + "beq.n 4f\n\t" /* Multiply * 2: Start */ "ldr r6, [%[a]]\n\t" "ldr r8, [r2]\n\t" @@ -11153,7 +11161,7 @@ SP_NOINLINE static void sp_4096_sqr_128(sp_digit* r, const sp_digit* a) "adcs r4, r4, r8\n\t" "adc r5, r5, %[r]\n\t" /* Multiply * 2: Done */ - "bal 5f\n\t" + "bal.n 5f\n\t" "\n4:\n\t" /* Square: Start */ "ldr r6, [%[a]]\n\t" @@ -11169,13 +11177,13 @@ SP_NOINLINE static void sp_4096_sqr_128(sp_digit* r, const sp_digit* a) "lsl r6, r6, #8\n\t" "add r6, r6, r10\n\t" "cmp %[a], r6\n\t" - "beq 3f\n\t" + "beq.n 3f\n\t" "cmp %[a], r2\n\t" - "bgt 3f\n\t" + "bgt.n 3f\n\t" "mov r8, r9\n\t" "add r8, r8, r10\n\t" "cmp %[a], r8\n\t" - "ble 2b\n\t" + "ble.n 2b\n\t" "\n3:\n\t" "mov %[r], r11\n\t" "mov r8, r9\n\t" @@ -11189,7 +11197,7 @@ SP_NOINLINE static void sp_4096_sqr_128(sp_digit* r, const sp_digit* a) "lsl r6, r6, #8\n\t" "add r6, r6, #248\n\t" "cmp r8, r6\n\t" - "ble 1b\n\t" + "ble.n 1b\n\t" "mov %[a], r10\n\t" "str r3, [%[r], r8]\n\t" "mov %[r], r12\n\t" @@ -11201,7 +11209,7 @@ SP_NOINLINE static void sp_4096_sqr_128(sp_digit* r, const sp_digit* a) "ldr r6, [%[a], r3]\n\t" "str r6, [%[r], r3]\n\t" "subs r3, r3, #4\n\t" - "bge 4b\n\t" + "bge.n 4b\n\t" "mov r6, #4\n\t" "lsl r6, r6, #8\n\t" "add sp, sp, r6\n\t" @@ -11261,7 +11269,7 @@ SP_NOINLINE static void sp_4096_mul_d_128(sp_digit* r, const sp_digit* a, "mov r3, r4\n\t" "mov r4, r5\n\t" "cmp %[a], r9\n\t" - "blt 1b\n\t" + "blt.n 1b\n\t" "str r3, [%[r]]\n\t" : [r] "+r" (r), [a] "+r" (a) : [b] "r" (b) @@ -11314,7 +11322,7 @@ SP_NOINLINE static sp_digit sp_4096_cond_sub_128(sp_digit* r, const sp_digit* a, "str r5, [%[r], r8]\n\t" "add r8, r8, #4\n\t" "cmp r8, r9\n\t" - "blt 1b\n\t" + "blt.n 1b\n\t" : [c] "+r" (c) : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) : "memory", "r5", "r6", "r8", "r9" @@ -11373,7 +11381,7 @@ SP_NOINLINE static void sp_4096_mont_reduce_128(sp_digit* a, const sp_digit* m, "adc r4, r4, #0\n\t" "str r5, [r10], #4\n\t" "cmp r10, r14\n\t" - "blt 2b\n\t" + "blt.n 2b\n\t" /* a[i+126] += m[126] * mu */ "ldr %[a], [r10]\n\t" "mov r5, #0\n\t" @@ -11406,7 +11414,7 @@ SP_NOINLINE static void sp_4096_mont_reduce_128(sp_digit* a, const sp_digit* m, /* Next word in a */ "sub r10, r10, #504\n\t" "cmp r10, r11\n\t" - "blt 1b\n\t" + "blt.n 1b\n\t" "mov %[a], r10\n\t" "mov %[m], r12\n\t" : [ca] "+r" (ca), [a] "+r" (a) @@ -11565,7 +11573,7 @@ SP_NOINLINE static int32_t sp_4096_cmp_128(const sp_digit* a, const sp_digit* b) "and r3, r3, r8\n\t" "sub r6, r6, #4\n\t" "cmp r6, #0\n\t" - "bge 1b\n\t" + "bge.n 1b\n\t" : [r] "+r" (r) : [a] "r" (a), [b] "r" (b) : "r3", "r4", "r5", "r6", "r8" @@ -12148,7 +12156,7 @@ SP_NOINLINE static sp_digit sp_4096_cond_add_64(sp_digit* r, const sp_digit* a, "str r5, [%[r], r8]\n\t" "add r8, r8, #4\n\t" "cmp r8, r9\n\t" - "blt 1b\n\t" + "blt.n 1b\n\t" : [c] "+r" (c) : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) : "memory", "r5", "r6", "r8", "r9" @@ -15283,7 +15291,7 @@ SP_NOINLINE static int32_t sp_256_cmp_8(const sp_digit* a, const sp_digit* b) "and r3, r3, r8\n\t" "sub r6, r6, #4\n\t" "cmp r6, #0\n\t" - "bge 1b\n\t" + "bge.n 1b\n\t" : [r] "+r" (r) : [a] "r" (a), [b] "r" (b) : "r3", "r4", "r5", "r6", "r8" @@ -15326,7 +15334,7 @@ SP_NOINLINE static sp_digit sp_256_cond_sub_8(sp_digit* r, const sp_digit* a, "str r5, [%[r], r8]\n\t" "add r8, r8, #4\n\t" "cmp r8, r9\n\t" - "blt 1b\n\t" + "blt.n 1b\n\t" : [c] "+r" (c) : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) : "memory", "r5", "r6", "r8", "r9" @@ -15416,7 +15424,7 @@ SP_NOINLINE static void sp_256_mont_reduce_8(sp_digit* a, const sp_digit* m, "add %[a], %[a], #4\n\t" "mov r6, #8\n\t" "cmp r9, r6\n\t" - "blt 1b\n\t" + "blt.n 1b\n\t" "sub %[a], %[a], #32\n\t" "mov r3, r1\n\t" "sub r1, r1, #1\n\t" @@ -15505,7 +15513,7 @@ SP_NOINLINE static void sp_256_mont_reduce_order_8(sp_digit* a, const sp_digit* "adc r4, r4, #0\n\t" "str r5, [r10], #4\n\t" "cmp r10, r14\n\t" - "blt 2b\n\t" + "blt.n 2b\n\t" /* a[i+6] += m[6] * mu */ "ldr %[a], [r10]\n\t" "mov r5, #0\n\t" @@ -15538,7 +15546,7 @@ SP_NOINLINE static void sp_256_mont_reduce_order_8(sp_digit* a, const sp_digit* /* Next word in a */ "sub r10, r10, #24\n\t" "cmp r10, r11\n\t" - "blt 1b\n\t" + "blt.n 1b\n\t" "mov %[a], r10\n\t" "mov %[m], r12\n\t" : [ca] "+r" (ca), [a] "+r" (a) @@ -15620,7 +15628,7 @@ SP_NOINLINE static sp_digit sp_256_add_8(sp_digit* r, const sp_digit* a, "add %[b], %[b], #4\n\t" "add %[r], %[r], #4\n\t" "cmp %[a], r6\n\t" - "bne 1b\n\t" + "bne.n 1b\n\t" : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : : "memory", "r4", "r5", "r6", "r8" @@ -16261,7 +16269,7 @@ SP_NOINLINE static sp_digit sp_256_sub_8(sp_digit* r, const sp_digit* a, "add %[b], %[b], #4\n\t" "add %[r], %[r], #4\n\t" "cmp %[a], r6\n\t" - "bne 1b\n\t" + "bne.n 1b\n\t" : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : : "memory", "r4", "r5", "r6" @@ -19627,7 +19635,8 @@ int sp_ecc_secret_gen_256(mp_int* priv, ecc_point* pub, byte* out, SP_NOINLINE static void sp_256_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_digit tmp[8]; + sp_digit tmp_arr[8]; + sp_digit* tmp = tmp_arr; __asm__ __volatile__ ( /* A[0] * B[0] */ @@ -20156,7 +20165,7 @@ SP_NOINLINE static sp_digit sp_256_sub_in_place_8(sp_digit* a, "add %[a], %[a], #8\n\t" "add %[b], %[b], #8\n\t" "cmp %[a], r8\n\t" - "bne 1b\n\t" + "bne.n 1b\n\t" : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) : : "memory", "r3", "r4", "r5", "r6", "r8" @@ -20238,7 +20247,7 @@ SP_NOINLINE static void sp_256_mul_d_8(sp_digit* r, const sp_digit* a, "mov r3, r4\n\t" "mov r4, r5\n\t" "cmp %[a], r9\n\t" - "blt 1b\n\t" + "blt.n 1b\n\t" "str r3, [%[r]]\n\t" : [r] "+r" (r), [a] "+r" (a) : [b] "r" (b) @@ -20383,7 +20392,8 @@ static WC_INLINE int sp_256_mod_8(sp_digit* r, const sp_digit* a, const sp_digit */ SP_NOINLINE static void sp_256_sqr_8(sp_digit* r, const sp_digit* a) { - sp_digit tmp[8]; + sp_digit tmp_arr[8]; + sp_digit* tmp = tmp_arr; __asm__ __volatile__ ( /* A[0] * A[0] */ "ldr r6, [%[a], #0]\n\t" @@ -22550,7 +22560,8 @@ static int sp_384_point_to_ecc_point_12(const sp_point_384* p, ecc_point* pm) SP_NOINLINE static void sp_384_mul_12(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_digit tmp[12 * 2]; + sp_digit tmp_arr[12 * 2]; + sp_digit* tmp = tmp_arr; __asm__ __volatile__ ( "mov r3, #0\n\t" "mov r4, #0\n\t" @@ -22586,11 +22597,11 @@ SP_NOINLINE static void sp_384_mul_12(sp_digit* r, const sp_digit* a, "add %[a], %[a], #4\n\t" "sub %[b], %[b], #4\n\t" "cmp %[a], r14\n\t" - "beq 3f\n\t" + "beq.n 3f\n\t" "mov r6, r9\n\t" "add r6, r6, r10\n\t" "cmp %[a], r6\n\t" - "ble 2b\n\t" + "ble.n 2b\n\t" "\n3:\n\t" "mov %[r], r12\n\t" "mov r8, r9\n\t" @@ -22601,7 +22612,7 @@ SP_NOINLINE static void sp_384_mul_12(sp_digit* r, const sp_digit* a, "mov r9, r8\n\t" "mov r6, #88\n\t" "cmp r8, r6\n\t" - "ble 1b\n\t" + "ble.n 1b\n\t" "str r3, [%[r], r8]\n\t" "mov %[a], r10\n\t" "mov %[b], r11\n\t" @@ -22641,7 +22652,7 @@ SP_NOINLINE static sp_digit sp_384_cond_sub_12(sp_digit* r, const sp_digit* a, "str r5, [%[r], r8]\n\t" "add r8, r8, #4\n\t" "cmp r8, r9\n\t" - "blt 1b\n\t" + "blt.n 1b\n\t" : [c] "+r" (c) : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) : "memory", "r5", "r6", "r8", "r9" @@ -22702,7 +22713,7 @@ SP_NOINLINE static void sp_384_mont_reduce_12(sp_digit* a, const sp_digit* m, "adc r4, r4, #0\n\t" "str r5, [r10], #4\n\t" "cmp r10, r14\n\t" - "blt 2b\n\t" + "blt.n 2b\n\t" /* a[i+10] += m[10] * mu */ "ldr %[a], [r10]\n\t" "mov r5, #0\n\t" @@ -22735,7 +22746,7 @@ SP_NOINLINE static void sp_384_mont_reduce_12(sp_digit* a, const sp_digit* m, /* Next word in a */ "sub r10, r10, #40\n\t" "cmp r10, r11\n\t" - "blt 1b\n\t" + "blt.n 1b\n\t" "mov %[a], r10\n\t" "mov %[m], r12\n\t" : [ca] "+r" (ca), [a] "+r" (a) @@ -22794,7 +22805,7 @@ SP_NOINLINE static void sp_384_sqr_12(sp_digit* r, const sp_digit* a) "add r2, r2, r10\n\t" "\n2:\n\t" "cmp r2, %[a]\n\t" - "beq 4f\n\t" + "beq.n 4f\n\t" /* Multiply * 2: Start */ "ldr r6, [%[a]]\n\t" "ldr r8, [r2]\n\t" @@ -22806,7 +22817,7 @@ SP_NOINLINE static void sp_384_sqr_12(sp_digit* r, const sp_digit* a) "adcs r4, r4, r8\n\t" "adc r5, r5, %[r]\n\t" /* Multiply * 2: Done */ - "bal 5f\n\t" + "bal.n 5f\n\t" "\n4:\n\t" /* Square: Start */ "ldr r6, [%[a]]\n\t" @@ -22821,13 +22832,13 @@ SP_NOINLINE static void sp_384_sqr_12(sp_digit* r, const sp_digit* a) "mov r6, #48\n\t" "add r6, r6, r10\n\t" "cmp %[a], r6\n\t" - "beq 3f\n\t" + "beq.n 3f\n\t" "cmp %[a], r2\n\t" - "bgt 3f\n\t" + "bgt.n 3f\n\t" "mov r8, r9\n\t" "add r8, r8, r10\n\t" "cmp %[a], r8\n\t" - "ble 2b\n\t" + "ble.n 2b\n\t" "\n3:\n\t" "mov %[r], r11\n\t" "mov r8, r9\n\t" @@ -22839,7 +22850,7 @@ SP_NOINLINE static void sp_384_sqr_12(sp_digit* r, const sp_digit* a) "mov r9, r8\n\t" "mov r6, #88\n\t" "cmp r8, r6\n\t" - "ble 1b\n\t" + "ble.n 1b\n\t" "mov %[a], r10\n\t" "str r3, [%[r], r8]\n\t" "mov %[r], r12\n\t" @@ -22849,7 +22860,7 @@ SP_NOINLINE static void sp_384_sqr_12(sp_digit* r, const sp_digit* a) "ldr r6, [%[a], r3]\n\t" "str r6, [%[r], r3]\n\t" "subs r3, r3, #4\n\t" - "bge 4b\n\t" + "bge.n 4b\n\t" "mov r6, #96\n\t" "add sp, sp, r6\n\t" : @@ -23018,7 +23029,7 @@ SP_NOINLINE static int32_t sp_384_cmp_12(const sp_digit* a, const sp_digit* b) "and r3, r3, r8\n\t" "sub r6, r6, #4\n\t" "cmp r6, #0\n\t" - "bge 1b\n\t" + "bge.n 1b\n\t" : [r] "+r" (r) : [a] "r" (a), [b] "r" (b) : "r3", "r4", "r5", "r6", "r8" @@ -23104,7 +23115,7 @@ SP_NOINLINE static sp_digit sp_384_add_12(sp_digit* r, const sp_digit* a, "add %[b], %[b], #4\n\t" "add %[r], %[r], #4\n\t" "cmp %[a], r6\n\t" - "bne 1b\n\t" + "bne.n 1b\n\t" : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : : "memory", "r4", "r5", "r6", "r8" @@ -23240,7 +23251,7 @@ SP_NOINLINE static sp_digit sp_384_sub_12(sp_digit* r, const sp_digit* a, "add %[b], %[b], #4\n\t" "add %[r], %[r], #4\n\t" "cmp %[a], r6\n\t" - "bne 1b\n\t" + "bne.n 1b\n\t" : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : : "memory", "r4", "r5", "r6" @@ -23348,7 +23359,7 @@ SP_NOINLINE static sp_digit sp_384_cond_add_12(sp_digit* r, const sp_digit* a, c "str r5, [%[r], r8]\n\t" "add r8, r8, #4\n\t" "cmp r8, r9\n\t" - "blt 1b\n\t" + "blt.n 1b\n\t" : [c] "+r" (c) : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) : "memory", "r5", "r6", "r8", "r9" @@ -27043,7 +27054,7 @@ SP_NOINLINE static sp_digit sp_384_sub_in_place_12(sp_digit* a, "add %[a], %[a], #8\n\t" "add %[b], %[b], #8\n\t" "cmp %[a], r8\n\t" - "bne 1b\n\t" + "bne.n 1b\n\t" : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) : : "memory", "r3", "r4", "r5", "r6", "r8" @@ -27135,7 +27146,7 @@ SP_NOINLINE static void sp_384_mul_d_12(sp_digit* r, const sp_digit* a, "mov r3, r4\n\t" "mov r4, r5\n\t" "cmp %[a], r9\n\t" - "blt 1b\n\t" + "blt.n 1b\n\t" "str r3, [%[r]]\n\t" : [r] "+r" (r), [a] "+r" (a) : [b] "r" (b)