diff --git a/configure.ac b/configure.ac index 6e4c6b2ad..ed89c6f97 100644 --- a/configure.ac +++ b/configure.ac @@ -5134,6 +5134,10 @@ do AM_CFLAGS="$AM_CFLAGS -DSP_INT_BITS=$v" ENABLED_SP_MATH_ALL="yes" ;; + nomalloc) + AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_SP_NO_MALLOC" + ENABLED_SP_MATH_ALL="yes" + ;; *) AC_MSG_ERROR([Support SP int bit sizes: 256, 384, 521, 1024, 2048, 3072, 4096. $ENABLED_SP_MATH_ALL not supported]) ;; diff --git a/wolfcrypt/src/sp_int.c b/wolfcrypt/src/sp_int.c index fbb97e5c8..d51ab9a57 100644 --- a/wolfcrypt/src/sp_int.c +++ b/wolfcrypt/src/sp_int.c @@ -83,6 +83,177 @@ This library provides single precision (SP) integer math functions. #include +/* DECL_SP_INT: Declare one variable of type 'sp_int'. */ +#if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \ + !defined(WOLFSSL_SP_NO_MALLOC) + /* Declare a variable that will be assigned a value on XMALLOC. */ + #define DECL_SP_INT(n, s) \ + sp_int* n = NULL +#else + #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \ + defined(WOLFSSL_SP_SMALL) + /* Declare a variable on the stack with the required data size. */ + #define DECL_SP_INT(n, s) \ + byte n##d[MP_INT_SIZEOF(s)]; \ + sp_int* n = (sp_int*)n##d + #else + /* Declare a variable on the stack. */ + #define DECL_SP_INT(n, s) \ + sp_int n[1] + #endif +#endif + +/* ALLOC_SP_INT: Allocate an 'sp_int' of reqired size. */ +#if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \ + !defined(WOLFSSL_SP_NO_MALLOC) + /* Dynamically allocate just enough data to support size. */ + #define ALLOC_SP_INT(n, s, err, h) \ + do { \ + if (err == MP_OKAY) { \ + n = (sp_int*)XMALLOC(MP_INT_SIZEOF(s), h, DYNAMIC_TYPE_BIGINT); \ + if (n == NULL) { \ + err = MP_MEM; \ + } \ + } \ + } \ + while (0) + + /* Dynamically allocate just enough data to support size - and set size. */ + #define ALLOC_SP_INT_SIZE(n, s, err, h) \ + do { \ + ALLOC_SP_INT(n, s, err, h); \ + if (err == MP_OKAY) { \ + n->size = s; \ + } \ + } \ + while (0) +#else + /* Array declared on stack - nothing to do. */ + #define ALLOC_SP_INT(n, s, err, h) + /* Array declared on stack - set the size field. */ + #define ALLOC_SP_INT_SIZE(n, s, err, h) \ + n->size = s; +#endif + +/* FREE_SP_INT: Free an 'sp_int' variable. */ +#if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \ + !defined(WOLFSSL_SP_NO_MALLOC) + /* Free dynamically allocated data. */ + #define FREE_SP_INT(n, h) \ + do { \ + if (n != NULL) { \ + XFREE(n, h, DYNAMIC_TYPE_BIGINT); \ + } \ + } \ + while (0) +#else + /* Nothing to do as declared on stack. */ + #define FREE_SP_INT(n, h) +#endif + + +/* DECL_SP_INT_ARRAY: Declare array of 'sp_int'. */ +#if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \ + !defined(WOLFSSL_SP_NO_MALLOC) + /* Declare a variable that will be assigned a value on XMALLOC. */ + #define DECL_SP_INT_ARRAY(n, s, c) \ + sp_int* n##d = NULL; \ + sp_int* n[c] = { NULL, } +#else + #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \ + defined(WOLFSSL_SP_SMALL) + /* Declare a variable on the stack with the required data size. */ + #define DECL_SP_INT_ARRAY(n, s, c) \ + byte n##d[MP_INT_SIZEOF(s) * (c)]; \ + sp_int* n[c] + #else + /* Declare a variable on the stack. */ + #define DECL_SP_INT_ARRAY(n, s, c) \ + sp_int n##d[c]; \ + sp_int* n[c] + #endif +#endif + +/* ALLOC_SP_INT_ARRAY: Allocate an array of 'sp_int's of reqired size. */ +#if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \ + !defined(WOLFSSL_SP_NO_MALLOC) + /* Dynamically allocate just enough data to support multiple sp_ints of the + * required size. Use pointers into data to make up array and set sizes. + */ + #define ALLOC_SP_INT_ARRAY(n, s, c, err, h) \ + do { \ + if (err == MP_OKAY) { \ + n##d = (sp_int*)XMALLOC(MP_INT_SIZEOF(s) * (c), h, \ + DYNAMIC_TYPE_BIGINT); \ + if (n##d == NULL) { \ + err = MP_MEM; \ + } \ + else { \ + int n##ii; \ + n[0] = n##d; \ + n[0]->size = s; \ + for (n##ii = 1; n##ii < (c); n##ii++) { \ + n[n##ii] = MP_INT_NEXT(n[n##ii-1], s); \ + n[n##ii]->size = s; \ + } \ + } \ + } \ + } \ + while (0) +#else + #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \ + defined(WOLFSSL_SP_SMALL) + /* Data declared on stack that supports multiple sp_ints of the + * required size. Use pointers into data to make up array and set sizes. + */ + #define ALLOC_SP_INT_ARRAY(n, s, c, err, h) \ + do { \ + if (err == MP_OKAY) { \ + int n##ii; \ + n[0] = (sp_int*)n##d; \ + n[0]->size = s; \ + for (n##ii = 1; n##ii < (c); n##ii++) { \ + n[n##ii] = MP_INT_NEXT(n[n##ii-1], s); \ + n[n##ii]->size = s; \ + } \ + } \ + } \ + while (0) + #else + /* Data declared on stack that supports multiple sp_ints of the + * required size. Set into array and set sizes. + */ + #define ALLOC_SP_INT_ARRAY(n, s, c, err, h) \ + do { \ + if (err == MP_OKAY) { \ + int n##ii; \ + for (n##ii = 0; n##ii < (c); n##ii++) { \ + n[n##ii] = &n##d[n##ii]; \ + n[n##ii]->size = s; \ + } \ + } \ + } \ + while (0) + #endif +#endif + +/* FREE_SP_INT_ARRAY: Free an array of 'sp_int'. */ +#if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \ + !defined(WOLFSSL_SP_NO_MALLOC) + /* Free data variable that was dynamically allocated. */ + #define FREE_SP_INT_ARRAY(n, h) \ + do { \ + if (n##d != NULL) { \ + XFREE(n##d, h, DYNAMIC_TYPE_BIGINT); \ + } \ + } \ + while (0) +#else + /* Nothing to do as data declared on stack. */ + #define FREE_SP_INT_ARRAY(n, h) +#endif + + #ifndef WOLFSSL_NO_ASM #if defined(WOLFSSL_SP_X86_64) && SP_WORD_SIZE == 64 @@ -2249,36 +2420,20 @@ int sp_init_copy(sp_int* r, sp_int* a) int sp_exch(sp_int* a, sp_int* b) { int err = MP_OKAY; -#ifndef WOLFSSL_SMALL_STACK - sp_int t[1]; -#else - sp_int* t = NULL; -#endif + DECL_SP_INT(t, (a != NULL) ? a->used : 1); if ((a == NULL) || (b == NULL)) { err = MP_VAL; } -#ifdef WOLFSSL_SMALL_STACK + ALLOC_SP_INT(t, a->used, err, NULL); if (err == MP_OKAY) { - t = (sp_int*)XMALLOC(sizeof(sp_int), NULL, DYNAMIC_TYPE_BIGINT); - if (t == NULL) { - err = MP_MEM; - } - } -#endif - - if (err == MP_OKAY) { - *t = *a; - *a = *b; - *b = *t; + XMEMCPY(t, a, MP_INT_SIZEOF(a->used)); + XMEMCPY(a, b, MP_INT_SIZEOF(b->used)); + XMEMCPY(b, t, MP_INT_SIZEOF(a->used)); } -#ifdef WOLFSSL_SMALL_STACK - if (t != NULL) { - XFREE(t, NULL, DYNAMIC_TYPE_BIGINT); - } -#endif + FREE_SP_INT(t, NULL); return err; } #endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) || !NO_DH || @@ -2289,44 +2444,36 @@ int sp_exch(sp_int* a, sp_int* b) int sp_cond_swap_ct(sp_int * a, sp_int * b, int c, int m) { int i; + int err = MP_OKAY; sp_digit mask = (sp_digit)0 - m; -#ifndef WOLFSSL_SMALL_STACK - sp_int t[1]; -#else - sp_int* t; -#endif + DECL_SP_INT(t, c); -#ifdef WOLFSSL_SMALL_STACK - t = (sp_int*)XMALLOC(sizeof(sp_int), NULL, DYNAMIC_TYPE_BIGINT); - if (t == NULL) - return MP_MEM; -#endif - - t->used = (int)((a->used ^ b->used) & mask); -#ifdef WOLFSSL_SP_INT_NEGATIVE - t->sign = (int)((a->sign ^ b->sign) & mask); -#endif - for (i = 0; i < c; i++) { - t->dp[i] = (a->dp[i] ^ b->dp[i]) & mask; - } - a->used ^= t->used; -#ifdef WOLFSSL_SP_INT_NEGATIVE - a->sign ^= t->sign; -#endif - for (i = 0; i < c; i++) { - a->dp[i] ^= t->dp[i]; - } - b->used ^= t->used; -#ifdef WOLFSSL_SP_INT_NEGATIVE - b->sign ^= b->sign; -#endif - for (i = 0; i < c; i++) { - b->dp[i] ^= t->dp[i]; + ALLOC_SP_INT(t, c, err, NULL); + if (err == MP_OKAY) { + t->used = (int)((a->used ^ b->used) & mask); + #ifdef WOLFSSL_SP_INT_NEGATIVE + t->sign = (int)((a->sign ^ b->sign) & mask); + #endif + for (i = 0; i < c; i++) { + t->dp[i] = (a->dp[i] ^ b->dp[i]) & mask; + } + a->used ^= t->used; + #ifdef WOLFSSL_SP_INT_NEGATIVE + a->sign ^= t->sign; + #endif + for (i = 0; i < c; i++) { + a->dp[i] ^= t->dp[i]; + } + b->used ^= t->used; + #ifdef WOLFSSL_SP_INT_NEGATIVE + b->sign ^= b->sign; + #endif + for (i = 0; i < c; i++) { + b->dp[i] ^= t->dp[i]; + } } -#ifdef WOLFSSL_SMALL_STACK - XFREE(t, NULL, DYNAMIC_TYPE_BIGINT); -#endif + FREE_SP_INT(t, NULL); return MP_OKAY; } #endif /* HAVE_ECC && ECC_TIMING_RESISTANT && !WC_NO_CACHE_RESISTANT */ @@ -2579,7 +2726,7 @@ int sp_count_bits(sp_int* a) /* Mask to apply to check with array. */ #define SP_LNZ_MASK 0xf /* Number of least significant zero bits in first SP_LNZ_CNT numbers. */ -static const int lnz[SP_LNZ_CNT] = { +static const int sp_lnz[SP_LNZ_CNT] = { 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0 }; @@ -2607,7 +2754,7 @@ int sp_cnt_lsb(sp_int* a) } for (j = 0; j < SP_WORD_SIZE; j += SP_LNZ_BITS) { - bc = lnz[(a->dp[i] >> j) & SP_LNZ_MASK]; + bc = sp_lnz[(a->dp[i] >> j) & SP_LNZ_MASK]; if (bc != 4) { bc += cnt + j; break; @@ -3180,8 +3327,8 @@ static void _sp_div_3(sp_int* a, sp_int* r, sp_int_digit* rem) sp_int_word t; sp_int_digit tr = 0; sp_int_digit tt; - static const char r6[6] = { 0, 0, 0, 1, 1, 1 }; - static const char rem6[6] = { 0, 1, 2, 0, 1, 2 }; + static const char sp_r6[6] = { 0, 0, 0, 1, 1, 1 }; + static const char sp_rem6[6] = { 0, 1, 2, 0, 1, 2 }; if (r == NULL) { for (i = a->used - 1; i >= 0; i--) { @@ -3196,7 +3343,7 @@ static void _sp_div_3(sp_int* a, sp_int* r, sp_int_digit* rem) tt = (t * 0x55) >> 8; #endif tr = (sp_int_digit)(t - (sp_int_word)tt * 3); - tr = rem6[tr]; + tr = sp_rem6[tr]; } *rem = tr; } @@ -3213,8 +3360,8 @@ static void _sp_div_3(sp_int* a, sp_int* r, sp_int_digit* rem) tt = (t * 0x55) >> 8; #endif tr = (sp_int_digit)(t - (sp_int_word)tt * 3); - tt += r6[tr]; - tr = rem6[tr]; + tt += sp_r6[tr]; + tr = sp_rem6[tr]; r->dp[i] = tt; } r->used = a->used; @@ -3709,7 +3856,10 @@ int sp_add(sp_int* a, sp_int* b, sp_int* r) if ((a == NULL) || (b == NULL) || (r == NULL)) { err = MP_VAL; } - else { + if ((err == MP_OKAY) && ((a->used >= r->size) || (b->used >= r->size))) { + err = MP_VAL; + } + if (err == MP_OKAY) { #ifndef WOLFSSL_SP_INT_NEGATIVE err = _sp_add_off(a, b, r, 0); #else @@ -3816,25 +3966,15 @@ int sp_sub(sp_int* a, sp_int* b, sp_int* r) int sp_addmod(sp_int* a, sp_int* b, sp_int* m, sp_int* r) { int err = MP_OKAY; -#ifdef WOLFSSL_SMALL_STACK - sp_int* t = NULL; -#else - sp_int t[1]; -#endif /* WOLFSSL_SMALL_STACK */ + int used = ((a == NULL) || (b == NULL)) ? 1 : + ((a->used >= b->used) ? a->used + 1 : b->used + 1); + DECL_SP_INT(t, used); if ((a == NULL) || (b == NULL) || (m == NULL) || (r == NULL)) { err = MP_VAL; } -#ifdef WOLFSSL_SMALL_STACK - if (err == MP_OKAY) { - t = (sp_int*)XMALLOC(sizeof(sp_int), NULL, DYNAMIC_TYPE_BIGINT); - if (t == NULL) { - err = MP_MEM; - } - } -#endif /* WOLFSSL_SMALL_STACK */ - + ALLOC_SP_INT_SIZE(t, used, err, NULL); if (0 && (err == MP_OKAY)) { sp_print(a, "a"); sp_print(b, "b"); @@ -3852,11 +3992,7 @@ int sp_addmod(sp_int* a, sp_int* b, sp_int* m, sp_int* r) sp_print(r, "rma"); } -#ifdef WOLFSSL_SMALL_STACK - if (t != NULL) { - XFREE(t, NULL, DYNAMIC_TYPE_BIGINT); - } -#endif /* WOLFSSL_SMALL_STACK */ + FREE_SP_INT(t, NULL); return err; } #endif /* WOLFSSL_SP_MATH_ALL || (!WOLFSSL_SP_MATH && WOLFSSL_CUSTOM_CURVES) */ @@ -3878,48 +4014,36 @@ int sp_submod(sp_int* a, sp_int* b, sp_int* m, sp_int* r) { #ifndef WOLFSSL_SP_INT_NEGATIVE int err = MP_OKAY; -#ifdef WOLFSSL_SMALL_STACK - sp_int* t = NULL; -#else - sp_int t[2]; -#endif /* WOLFSSL_SMALL_STACK */ + DECL_SP_INT_ARRAY(t, (m == NULL) ? 1 : m->used + 1, 2); if ((a == NULL) || (b == NULL) || (m == NULL) || (r == NULL)) { err = MP_VAL; } -#ifdef WOLFSSL_SMALL_STACK - if (err == MP_OKAY) { - t = (sp_int*)XMALLOC(sizeof(sp_int) * 2, NULL, DYNAMIC_TYPE_BIGINT); - if (t == NULL) { - err = MP_MEM; - } - } -#endif /* WOLFSSL_SMALL_STACK */ - if (0 && (err == MP_OKAY)) { sp_print(a, "a"); sp_print(b, "b"); sp_print(m, "m"); } + ALLOC_SP_INT_ARRAY(t, m->used + 1, 2, err, NULL); if (err == MP_OKAY) { if (_sp_cmp(a, m) == MP_GT) { - err = sp_mod(a, m, &t[0]); - a = &t[0]; + err = sp_mod(a, m, t[0]); + a = t[0]; } } if (err == MP_OKAY) { if (_sp_cmp(b, m) == MP_GT) { - err = sp_mod(b, m, &t[1]); - b = &t[1]; + err = sp_mod(b, m, t[1]); + b = t[1]; } } if (err == MP_OKAY) { if (_sp_cmp(a, b) == MP_LT) { - err = sp_add(a, m, &t[0]); + err = sp_add(a, m, t[0]); if (err == MP_OKAY) { - err = sp_sub(&t[0], b, r); + err = sp_sub(t[0], b, r); } } else { @@ -3931,39 +4055,27 @@ int sp_submod(sp_int* a, sp_int* b, sp_int* m, sp_int* r) sp_print(r, "rms"); } -#ifdef WOLFSSL_SMALL_STACK - if (t != NULL) - XFREE(t, NULL, DYNAMIC_TYPE_BIGINT); -#endif /* WOLFSSL_SMALL_STACK */ + FREE_SP_INT_ARRAY(t, NULL); return err; #else /* WOLFSSL_SP_INT_NEGATIVE */ int err = MP_OKAY; -#ifdef WOLFSSL_SMALL_STACK - sp_int* t; -#else - sp_int t[1]; -#endif + int used = ((a == NULL) || (b == NULL)) ? 1 : + ((a->used >= b->used) ? a->used + 1 : b->used + 1); + DECL_SP_INT(t, used); if ((a == NULL) || (b == NULL) || (m == NULL) || (r == NULL)) { err = MP_VAL; } -#ifdef WOLFSSL_SMALL_STACK - if (err == MP_OKAY) { - t = (sp_int*)XMALLOC(sizeof(sp_int), NULL, DYNAMIC_TYPE_BIGINT); - if (t == NULL) { - err = MP_MEM; - } - } -#endif - if (0 && (err == MP_OKAY)) { sp_print(a, "a"); sp_print(b, "b"); sp_print(m, "m"); } + + ALLOC_SP_INT_SIZE(t, used, err, NULL); if (err == MP_OKAY) { err = sp_sub(a, b, t); } @@ -3975,10 +4087,7 @@ int sp_submod(sp_int* a, sp_int* b, sp_int* m, sp_int* r) sp_print(r, "rms"); } -#ifdef WOLFSSL_SMALL_STACK - if (t != NULL) - XFREE(t, NULL, DYNAMIC_TYPE_BIGINT); -#endif + FREE_SP_INT(t, NULL); return err; #endif /* WOLFSSL_SP_INT_NEGATIVE */ } @@ -4311,28 +4420,15 @@ int sp_div(sp_int* a, sp_int* d, sp_int* r, sp_int* rem) int s = 0; sp_int_digit dt; sp_int_digit t; -#ifdef WOLFSSL_SMALL_STACK sp_int* sa = NULL; sp_int* sd = NULL; sp_int* tr = NULL; sp_int* trial = NULL; -#else - sp_int sa[1]; - sp_int sd[1]; - sp_int tr[1]; - sp_int trial[1]; -#endif /* WOLFSSL_SMALL_STACK */ -#ifdef WOLFSSL_SP_SMALL - int c; -#else - int j, o; - sp_int_word tw; - sp_int_sword sw; -#endif /* WOLFSSL_SP_SMALL */ #ifdef WOLFSSL_SP_INT_NEGATIVE int aSign = MP_ZPOS; int dSign = MP_ZPOS; #endif /* WOLFSSL_SP_INT_NEGATIVE */ + DECL_SP_INT_ARRAY(td, (a == NULL) ? 1 : a->used + 1, 4); if ((a == NULL) || (d == NULL) || ((r == NULL) && (rem == NULL))) { err = MP_VAL; @@ -4389,26 +4485,21 @@ int sp_div(sp_int* a, sp_int* d, sp_int* r, sp_int* rem) } } -#ifdef WOLFSSL_SMALL_STACK - if ((!done) && (err == MP_OKAY)) { - sa = (sp_int*)XMALLOC(sizeof(sp_int) * 4, NULL, DYNAMIC_TYPE_BIGINT); - if (sa == NULL) { - err = MP_MEM; - } + if (!done) { + /* Macro always has code associated with it and checks err first. */ + ALLOC_SP_INT_ARRAY(td, a->used + 1, 4, err, NULL); } -#endif /* WOLFSSL_SMALL_STACK */ if ((!done) && (err == MP_OKAY)) { -#ifdef WOLFSSL_SMALL_STACK - sd = &sa[1]; - tr = &sa[2]; - trial = &sa[3]; -#endif /* WOLFSSL_SMALL_STACK */ + sa = td[0]; + sd = td[1]; + tr = td[2]; + trial = td[3]; - sp_init(sa); - sp_init(sd); - sp_init(tr); - sp_init(trial); + sp_init_size(sa, a->used + 1); + sp_init_size(sd, d->used + 1); + sp_init_size(tr, a->used - d->used + 2); + sp_init_size(trial, a->used + 1); s = sp_count_bits(d); s = SP_WORD_SIZE - (s & SP_WORD_MASK); @@ -4423,6 +4514,13 @@ int sp_div(sp_int* a, sp_int* d, sp_int* r, sp_int* rem) } } if ((!done) && (err == MP_OKAY) && (d->used > 0)) { +#ifdef WOLFSSL_SP_SMALL + int c; +#else + int j; + int o; + sp_int_sword sw; +#endif /* WOLFSSL_SP_SMALL */ #ifdef WOLFSSL_SP_INT_NEGATIVE sa->sign = MP_ZPOS; sd->sign = MP_ZPOS; @@ -4473,7 +4571,7 @@ int sp_div(sp_int* a, sp_int* d, sp_int* r, sp_int* rem) #else o = i - d->used; do { - tw = 0; + sp_int_word tw = 0; for (j = 0; j < d->used; j++) { tw += (sp_int_word)d->dp[j] * t; trial->dp[j] = (sp_int_digit)tw; @@ -4534,11 +4632,6 @@ int sp_div(sp_int* a, sp_int* d, sp_int* r, sp_int* rem) } } -#ifdef WOLFSSL_SMALL_STACK - if (sa != NULL) - XFREE(sa, NULL, DYNAMIC_TYPE_BIGINT); -#endif /* WOLFSSL_SMALL_STACK */ - if (0 && (err == MP_OKAY)) { if (rem != NULL) { sp_print(rem, "rdr"); @@ -4548,6 +4641,7 @@ int sp_div(sp_int* a, sp_int* d, sp_int* r, sp_int* rem) } } + FREE_SP_INT_ARRAY(td, NULL); return err; } #endif /* WOLFSSL_SP_MATH_ALL || !NO_DH || HAVE_ECC || \ @@ -4570,11 +4664,7 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) { int err = MP_OKAY; #ifdef WOLFSSL_SP_INT_NEGATIVE - #ifdef WOLFSSL_SMALL_STACK - sp_int* t = NULL; - #else - sp_int t[1]; - #endif /* WOLFSSL_SMALL_STACK */ + DECL_SP_INT(t, (m == NULL) ? 1 : m->used); #endif /* WOLFSSL_SP_INT_NEGATIVE */ if ((a == NULL) || (m == NULL) || (r == NULL)) { @@ -4586,16 +4676,9 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) err = sp_div(a, m, NULL, r); } #else + ALLOC_SP_INT(t, m->used, err, NULL); if (err == MP_OKAY) { - #ifdef WOLFSSL_SMALL_STACK - t = (sp_int*)XMALLOC(sizeof(sp_int), NULL, DYNAMIC_TYPE_BIGINT); - if (t == NULL) { - err = MP_MEM; - } - #endif /* WOLFSSL_SMALL_STACK */ - } - if (err == MP_OKAY) { - sp_init(t); + sp_init_size(t, m->used); err = sp_div(a, m, NULL, t); } if (err == MP_OKAY) { @@ -4607,11 +4690,7 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) } } -#ifdef WOLFSSL_SMALL_STACK - if (t != NULL) { - XFREE(t, NULL, DYNAMIC_TYPE_BIGINT); - } -#endif /* WOLFSSL_SMALL_STACK */ + FREE_SP_INT(t, NULL); #endif /* WOLFSSL_SP_INT_NEGATIVE */ return err; @@ -4646,21 +4725,21 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) int i; int j; int k; - #ifdef WOLFSSL_SMALL_STACK - sp_int* t = NULL; + #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_int_digit* t = NULL; + #elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && defined(WOLFSSL_SP_SMALL) + sp_int_digit t[a->used * 2]; #else - sp_int t[1]; + sp_int_digit t[SP_INT_DIGITS]; #endif - #ifdef WOLFSSL_SMALL_STACK - if (err == MP_OKAY) { - t = (sp_int*)XMALLOC(sizeof(sp_int), NULL, DYNAMIC_TYPE_BIGINT); - if (t == NULL) { - err = MP_MEM; - } - } + #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * (a->used * 2), NULL, + DYNAMIC_TYPE_BIGINT); + if (t == NULL) { + err = MP_MEM; + } #endif - if (err == MP_OKAY) { sp_int_digit l, h, o; sp_int_digit* dp; @@ -4668,7 +4747,7 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) h = 0; l = 0; SP_ASM_MUL(h, l, a->dp[0], b->dp[0]); - t->dp[0] = h; + t[0] = h; h = 0; o = 0; for (k = 1; k <= a->used - 1; k++) { @@ -4677,7 +4756,7 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) for (; j >= 0; dp++, j--) { SP_ASM_MUL_ADD(l, h, o, dp[0], b->dp[j]); } - t->dp[k] = l; + t[k] = l; l = h; h = o; o = 0; @@ -4688,27 +4767,22 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) for (; i < a->used; i++, dp--) { SP_ASM_MUL_ADD(l, h, o, a->dp[i], dp[0]); } - t->dp[k] = l; + t[k] = l; l = h; h = o; o = 0; } - t->dp[k] = l; - t->dp[k+1] = h; - t->used = k + 2; - - err = sp_copy(t, r); - } - if (err == MP_OKAY) { + t[k] = l; + r->used = k + 1; + XMEMCPY(r->dp, t, r->used * sizeof(sp_int_digit)); sp_clamp(r); } - #ifdef WOLFSSL_SMALL_STACK + #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) if (t != NULL) { XFREE(t, NULL, DYNAMIC_TYPE_BIGINT); } #endif - return err; } @@ -4727,21 +4801,21 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) int i; int j; int k; - #ifdef WOLFSSL_SMALL_STACK - sp_int* t = NULL; + #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_int_digit* t = NULL; + #elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && defined(WOLFSSL_SP_SMALL) + sp_int_digit t[a->used + b->used]; #else - sp_int t[1]; + sp_int_digit t[SP_INT_DIGITS]; #endif - #ifdef WOLFSSL_SMALL_STACK - if (err == MP_OKAY) { - t = (sp_int*)XMALLOC(sizeof(sp_int), NULL, DYNAMIC_TYPE_BIGINT); - if (t == NULL) { - err = MP_MEM; - } - } + #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * (a->used + b->used), + NULL, DYNAMIC_TYPE_BIGINT); + if (t == NULL) { + err = MP_MEM; + } #endif - if (err == MP_OKAY) { sp_int_digit l; sp_int_digit h; @@ -4750,7 +4824,7 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) h = 0; l = 0; SP_ASM_MUL(h, l, a->dp[0], b->dp[0]); - t->dp[0] = h; + t[0] = h; h = 0; o = 0; for (k = 1; k <= b->used - 1; k++) { @@ -4759,7 +4833,7 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) for (; (i < a->used) && (j >= 0); i++, j--) { SP_ASM_MUL_ADD(l, h, o, a->dp[i], b->dp[j]); } - t->dp[k] = l; + t[k] = l; l = h; h = o; o = 0; @@ -4770,27 +4844,22 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) for (; (i < a->used) && (j >= 0); i++, j--) { SP_ASM_MUL_ADD(l, h, o, a->dp[i], b->dp[j]); } - t->dp[k] = l; + t[k] = l; l = h; h = o; o = 0; } - t->dp[k] = l; - t->dp[k+1] = h; - t->used = k + 2; - - err = sp_copy(t, r); - } - if (err == MP_OKAY) { + t[k] = l; + r->used = k + 1; + XMEMCPY(r->dp, t, r->used * sizeof(sp_int_digit)); sp_clamp(r); } - #ifdef WOLFSSL_SMALL_STACK + #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) if (t != NULL) { XFREE(t, NULL, DYNAMIC_TYPE_BIGINT); } #endif - return err; } #else @@ -4809,21 +4878,21 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) int i; int j; int k; - #ifdef WOLFSSL_SMALL_STACK - sp_int* t = NULL; + #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_int_digit* t = NULL; + #elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && defined(WOLFSSL_SP_SMALL) + sp_int_digit t[a->used + b->used]; #else - sp_int t[1]; + sp_int_digit t[SP_INT_DIGITS]; #endif - #ifdef WOLFSSL_SMALL_STACK - if (err == MP_OKAY) { - t = (sp_int*)XMALLOC(sizeof(sp_int), NULL, DYNAMIC_TYPE_BIGINT); - if (t == NULL) { - err = MP_MEM; - } - } + #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * (a->used + b->used), + NULL, DYNAMIC_TYPE_BIGINT); + if (t == NULL) { + err = MP_MEM; + } #endif - if (err == MP_OKAY) { sp_int_word w; sp_int_word l; @@ -4833,7 +4902,7 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) #endif w = (sp_int_word)a->dp[0] * b->dp[0]; - t->dp[0] = (sp_int_digit)w; + t[0] = (sp_int_digit)w; l = (sp_int_digit)(w >> SP_WORD_SIZE); h = 0; #ifdef SP_WORD_OVERFLOW @@ -4854,7 +4923,7 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) h &= SP_MASK; #endif } - t->dp[k] = (sp_int_digit)l; + t[k] = (sp_int_digit)l; l >>= SP_WORD_SIZE; l += (sp_int_digit)h; h >>= SP_WORD_SIZE; @@ -4863,22 +4932,17 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) o >>= SP_WORD_SIZE; #endif } - t->dp[k] = (sp_int_digit)l; - t->dp[k+1] = (sp_int_digit)h; - t->used = k + 2; - - err = sp_copy(t, r); - } - if (err == MP_OKAY) { + t[k] = (sp_int_digit)l; + r->used = k + 1; + XMEMCPY(r->dp, t, r->used * sizeof(sp_int_digit)); sp_clamp(r); } - #ifdef WOLFSSL_SMALL_STACK + #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) if (t != NULL) { XFREE(t, NULL, DYNAMIC_TYPE_BIGINT); } #endif - return err; } #endif @@ -4899,7 +4963,7 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) static int _sp_mul_4(sp_int* a, sp_int* b, sp_int* r) { int err = MP_OKAY; - #ifdef WOLFSSL_SMALL_STACK + #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) sp_int_word* w = NULL; #else sp_int_word w[16]; @@ -4907,7 +4971,7 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) sp_int_digit* da = a->dp; sp_int_digit* db = b->dp; - #ifdef WOLFSSL_SMALL_STACK + #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) w = (sp_int_word*)XMALLOC(sizeof(sp_int_word) * 16, NULL, DYNAMIC_TYPE_BIGINT); if (w == NULL) { @@ -4998,7 +5062,7 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) sp_clamp(r); } - #ifdef WOLFSSL_SMALL_STACK + #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) if (w != NULL) { XFREE(w, NULL, DYNAMIC_TYPE_BIGINT); } @@ -5017,82 +5081,55 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) */ static int _sp_mul_4(sp_int* a, sp_int* b, sp_int* r) { - int err = MP_OKAY; - sp_int_digit l; - sp_int_digit h; - sp_int_digit o; - #ifdef WOLFSSL_SMALL_STACK - sp_int* t = NULL; - #else - sp_int t[1]; - #endif + sp_int_digit l = 0; + sp_int_digit h = 0; + sp_int_digit o = 0; + sp_int_digit t[4]; - #ifdef WOLFSSL_SMALL_STACK - if (err == MP_OKAY) { - t = (sp_int*)XMALLOC(sizeof(sp_int), NULL, DYNAMIC_TYPE_BIGINT); - if (t == NULL) { - err = MP_MEM; - } - } - #endif + SP_ASM_MUL(h, l, a->dp[0], b->dp[0]); + t[0] = h; + h = 0; + SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[1]); + SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[0]); + t[1] = l; + l = h; + h = o; + o = 0; + SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[2]); + SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[1]); + SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[0]); + t[2] = l; + l = h; + h = o; + o = 0; + SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[3]); + SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[2]); + SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[1]); + SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[0]); + t[3] = l; + l = h; + h = o; + o = 0; + SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[3]); + SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[2]); + SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[1]); + r->dp[4] = l; + l = h; + h = o; + o = 0; + SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[3]); + SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[2]); + r->dp[5] = l; + l = h; + h = o; + SP_ASM_MUL_ADD_NO(l, h, a->dp[3], b->dp[3]); + r->dp[6] = l; + r->dp[7] = h; + XMEMCPY(r->dp, t, 4 * sizeof(sp_int_digit)); + r->used = 8; + sp_clamp(r); - if (err == MP_OKAY) { - h = 0; - l = 0; - SP_ASM_MUL(h, l, a->dp[0], b->dp[0]); - t->dp[0] = h; - h = 0; - o = 0; - SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[1]); - SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[0]); - t->dp[1] = l; - l = h; - h = o; - o = 0; - SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[2]); - SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[1]); - SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[0]); - t->dp[2] = l; - l = h; - h = o; - o = 0; - SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[3]); - SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[2]); - SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[1]); - SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[0]); - t->dp[3] = l; - l = h; - h = o; - o = 0; - SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[3]); - SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[2]); - SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[1]); - t->dp[4] = l; - l = h; - h = o; - o = 0; - SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[3]); - SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[2]); - t->dp[5] = l; - l = h; - h = o; - SP_ASM_MUL_ADD_NO(l, h, a->dp[3], b->dp[3]); - t->dp[6] = l; - t->dp[7] = h; - t->used = 8; - err = sp_copy(t, r); - } - if (err == MP_OKAY) { - sp_clamp(r); - } - - #ifdef WOLFSSL_SMALL_STACK - if (t != NULL) { - XFREE(t, NULL, DYNAMIC_TYPE_BIGINT); - } - #endif - - return err; + return MP_OKAY; } #endif /* SQR_MUL_ASM */ #endif /* SP_WORD_SIZE == 64 */ @@ -5109,118 +5146,91 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) */ static int _sp_mul_6(sp_int* a, sp_int* b, sp_int* r) { - int err = MP_OKAY; - sp_int_digit l; - sp_int_digit h; - sp_int_digit o; - #ifdef WOLFSSL_SMALL_STACK - sp_int* t = NULL; - #else - sp_int t[1]; - #endif + sp_int_digit l = 0; + sp_int_digit h = 0; + sp_int_digit o = 0; + sp_int_digit t[6]; - #ifdef WOLFSSL_SMALL_STACK - if (err == MP_OKAY) { - t = (sp_int*)XMALLOC(sizeof(sp_int), NULL, DYNAMIC_TYPE_BIGINT); - if (t == NULL) { - err = MP_MEM; - } - } - #endif + SP_ASM_MUL(h, l, a->dp[0], b->dp[0]); + t[0] = h; + h = 0; + SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[1]); + SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[0]); + t[1] = l; + l = h; + h = o; + o = 0; + SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[2]); + SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[1]); + SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[0]); + t[2] = l; + l = h; + h = o; + o = 0; + SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[3]); + SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[2]); + SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[1]); + SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[0]); + t[3] = l; + l = h; + h = o; + o = 0; + SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[4]); + SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[3]); + SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[2]); + SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[1]); + SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[0]); + t[4] = l; + l = h; + h = o; + o = 0; + SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[5]); + SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[4]); + SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[3]); + SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[2]); + SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[1]); + SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[0]); + t[5] = l; + l = h; + h = o; + o = 0; + SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[5]); + SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[4]); + SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[3]); + SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[2]); + SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[1]); + r->dp[6] = l; + l = h; + h = o; + o = 0; + SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[5]); + SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[4]); + SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[3]); + SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[2]); + r->dp[7] = l; + l = h; + h = o; + o = 0; + SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[5]); + SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[4]); + SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[3]); + r->dp[8] = l; + l = h; + h = o; + o = 0; + SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[5]); + SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[4]); + r->dp[9] = l; + l = h; + h = o; + SP_ASM_MUL_ADD_NO(l, h, a->dp[5], b->dp[5]); + r->dp[10] = l; + r->dp[11] = h; + XMEMCPY(r->dp, t, 6 * sizeof(sp_int_digit)); + r->used = 12; + sp_clamp(r); - if (err == MP_OKAY) { - h = 0; - l = 0; - SP_ASM_MUL(h, l, a->dp[0], b->dp[0]); - t->dp[0] = h; - h = 0; - o = 0; - SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[1]); - SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[0]); - t->dp[1] = l; - l = h; - h = o; - o = 0; - SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[2]); - SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[1]); - SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[0]); - t->dp[2] = l; - l = h; - h = o; - o = 0; - SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[3]); - SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[2]); - SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[1]); - SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[0]); - t->dp[3] = l; - l = h; - h = o; - o = 0; - SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[4]); - SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[3]); - SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[2]); - SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[1]); - SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[0]); - t->dp[4] = l; - l = h; - h = o; - o = 0; - SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[5]); - SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[4]); - SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[3]); - SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[2]); - SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[1]); - SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[0]); - t->dp[5] = l; - l = h; - h = o; - o = 0; - SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[5]); - SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[4]); - SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[3]); - SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[2]); - SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[1]); - t->dp[6] = l; - l = h; - h = o; - o = 0; - SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[5]); - SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[4]); - SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[3]); - SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[2]); - t->dp[7] = l; - l = h; - h = o; - o = 0; - SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[5]); - SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[4]); - SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[3]); - t->dp[8] = l; - l = h; - h = o; - o = 0; - SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[5]); - SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[4]); - t->dp[9] = l; - l = h; - h = o; - SP_ASM_MUL_ADD_NO(l, h, a->dp[5], b->dp[5]); - t->dp[10] = l; - t->dp[11] = h; - t->used = 12; - err = sp_copy(t, r); - } - if (err == MP_OKAY) { - sp_clamp(r); - } - - #ifdef WOLFSSL_SMALL_STACK - if (t != NULL) { - XFREE(t, NULL, DYNAMIC_TYPE_BIGINT); - } - #endif - - return err; + return MP_OKAY; } #endif /* SQR_MUL_ASM */ #endif /* SP_WORD_SIZE == 64 */ @@ -5237,162 +5247,135 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) */ static int _sp_mul_8(sp_int* a, sp_int* b, sp_int* r) { - int err = MP_OKAY; - sp_int_digit l; - sp_int_digit h; - sp_int_digit o; - #ifdef WOLFSSL_SMALL_STACK - sp_int* t = NULL; - #else - sp_int t[1]; - #endif + sp_int_digit l = 0; + sp_int_digit h = 0; + sp_int_digit o = 0; + sp_int_digit t[8]; - #ifdef WOLFSSL_SMALL_STACK - if (err == MP_OKAY) { - t = (sp_int*)XMALLOC(sizeof(sp_int), NULL, DYNAMIC_TYPE_BIGINT); - if (t == NULL) { - err = MP_MEM; - } - } - #endif + SP_ASM_MUL(h, l, a->dp[0], b->dp[0]); + t[0] = h; + h = 0; + SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[1]); + SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[0]); + t[1] = l; + l = h; + h = o; + o = 0; + SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[2]); + SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[1]); + SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[0]); + t[2] = l; + l = h; + h = o; + o = 0; + SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[3]); + SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[2]); + SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[1]); + SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[0]); + t[3] = l; + l = h; + h = o; + o = 0; + SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[4]); + SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[3]); + SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[2]); + SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[1]); + SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[0]); + t[4] = l; + l = h; + h = o; + o = 0; + SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[5]); + SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[4]); + SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[3]); + SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[2]); + SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[1]); + SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[0]); + t[5] = l; + l = h; + h = o; + o = 0; + SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[6]); + SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[5]); + SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[4]); + SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[3]); + SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[2]); + SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[1]); + SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[0]); + t[6] = l; + l = h; + h = o; + o = 0; + SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[7]); + SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[6]); + SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[5]); + SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[4]); + SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[3]); + SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[2]); + SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[1]); + SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[0]); + t[7] = l; + l = h; + h = o; + o = 0; + SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[7]); + SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[6]); + SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[5]); + SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[4]); + SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[3]); + SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[2]); + SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[1]); + r->dp[8] = l; + l = h; + h = o; + o = 0; + SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[7]); + SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[6]); + SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[5]); + SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[4]); + SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[3]); + SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[2]); + r->dp[9] = l; + l = h; + h = o; + o = 0; + SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[7]); + SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[6]); + SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[5]); + SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[4]); + SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[3]); + r->dp[10] = l; + l = h; + h = o; + o = 0; + SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[7]); + SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[6]); + SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[5]); + SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[4]); + r->dp[11] = l; + l = h; + h = o; + o = 0; + SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[7]); + SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[6]); + SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[5]); + r->dp[12] = l; + l = h; + h = o; + o = 0; + SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[7]); + SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[6]); + r->dp[13] = l; + l = h; + h = o; + SP_ASM_MUL_ADD_NO(l, h, a->dp[7], b->dp[7]); + r->dp[14] = l; + r->dp[15] = h; + XMEMCPY(r->dp, t, 8 * sizeof(sp_int_digit)); + r->used = 16; + sp_clamp(r); - if (err == MP_OKAY) { - h = 0; - l = 0; - SP_ASM_MUL(h, l, a->dp[0], b->dp[0]); - t->dp[0] = h; - h = 0; - o = 0; - SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[1]); - SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[0]); - t->dp[1] = l; - l = h; - h = o; - o = 0; - SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[2]); - SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[1]); - SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[0]); - t->dp[2] = l; - l = h; - h = o; - o = 0; - SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[3]); - SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[2]); - SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[1]); - SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[0]); - t->dp[3] = l; - l = h; - h = o; - o = 0; - SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[4]); - SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[3]); - SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[2]); - SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[1]); - SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[0]); - t->dp[4] = l; - l = h; - h = o; - o = 0; - SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[5]); - SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[4]); - SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[3]); - SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[2]); - SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[1]); - SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[0]); - t->dp[5] = l; - l = h; - h = o; - o = 0; - SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[6]); - SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[5]); - SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[4]); - SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[3]); - SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[2]); - SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[1]); - SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[0]); - t->dp[6] = l; - l = h; - h = o; - o = 0; - SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[7]); - SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[6]); - SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[5]); - SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[4]); - SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[3]); - SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[2]); - SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[1]); - SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[0]); - t->dp[7] = l; - l = h; - h = o; - o = 0; - SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[7]); - SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[6]); - SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[5]); - SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[4]); - SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[3]); - SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[2]); - SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[1]); - t->dp[8] = l; - l = h; - h = o; - o = 0; - SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[7]); - SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[6]); - SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[5]); - SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[4]); - SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[3]); - SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[2]); - t->dp[9] = l; - l = h; - h = o; - o = 0; - SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[7]); - SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[6]); - SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[5]); - SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[4]); - SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[3]); - t->dp[10] = l; - l = h; - h = o; - o = 0; - SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[7]); - SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[6]); - SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[5]); - SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[4]); - t->dp[11] = l; - l = h; - h = o; - o = 0; - SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[7]); - SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[6]); - SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[5]); - t->dp[12] = l; - l = h; - h = o; - o = 0; - SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[7]); - SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[6]); - t->dp[13] = l; - l = h; - h = o; - SP_ASM_MUL_ADD_NO(l, h, a->dp[7], b->dp[7]); - t->dp[14] = l; - t->dp[15] = h; - t->used = 16; - err = sp_copy(t, r); - } - if (err == MP_OKAY) { - sp_clamp(r); - } - - #ifdef WOLFSSL_SMALL_STACK - if (t != NULL) { - XFREE(t, NULL, DYNAMIC_TYPE_BIGINT); - } - #endif - - return err; + return MP_OKAY; } #endif /* SQR_MUL_ASM */ #endif /* SP_WORD_SIZE == 32 */ @@ -5409,274 +5392,247 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) */ static int _sp_mul_12(sp_int* a, sp_int* b, sp_int* r) { - int err = MP_OKAY; - sp_int_digit l; - sp_int_digit h; - sp_int_digit o; - #ifdef WOLFSSL_SMALL_STACK - sp_int* t = NULL; - #else - sp_int t[1]; - #endif + sp_int_digit l = 0; + sp_int_digit h = 0; + sp_int_digit o = 0; + sp_int_digit t[12]; - #ifdef WOLFSSL_SMALL_STACK - if (err == MP_OKAY) { - t = (sp_int*)XMALLOC(sizeof(sp_int), NULL, DYNAMIC_TYPE_BIGINT); - if (t == NULL) { - err = MP_MEM; - } - } - #endif + SP_ASM_MUL(h, l, a->dp[0], b->dp[0]); + t[0] = h; + h = 0; + SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[1]); + SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[0]); + t[1] = l; + l = h; + h = o; + o = 0; + SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[2]); + SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[1]); + SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[0]); + t[2] = l; + l = h; + h = o; + o = 0; + SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[3]); + SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[2]); + SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[1]); + SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[0]); + t[3] = l; + l = h; + h = o; + o = 0; + SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[4]); + SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[3]); + SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[2]); + SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[1]); + SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[0]); + t[4] = l; + l = h; + h = o; + o = 0; + SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[5]); + SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[4]); + SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[3]); + SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[2]); + SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[1]); + SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[0]); + t[5] = l; + l = h; + h = o; + o = 0; + SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[6]); + SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[5]); + SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[4]); + SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[3]); + SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[2]); + SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[1]); + SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[0]); + t[6] = l; + l = h; + h = o; + o = 0; + SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[7]); + SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[6]); + SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[5]); + SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[4]); + SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[3]); + SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[2]); + SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[1]); + SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[0]); + t[7] = l; + l = h; + h = o; + o = 0; + SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[8]); + SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[7]); + SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[6]); + SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[5]); + SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[4]); + SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[3]); + SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[2]); + SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[1]); + SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[0]); + t[8] = l; + l = h; + h = o; + o = 0; + SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[9]); + SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[8]); + SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[7]); + SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[6]); + SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[5]); + SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[4]); + SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[3]); + SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[2]); + SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[1]); + SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[0]); + t[9] = l; + l = h; + h = o; + o = 0; + SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[10]); + SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[9]); + SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[8]); + SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[7]); + SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[6]); + SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[5]); + SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[4]); + SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[3]); + SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[2]); + SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[1]); + SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[0]); + t[10] = l; + l = h; + h = o; + o = 0; + SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[11]); + SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[10]); + SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[9]); + SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[8]); + SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[7]); + SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[6]); + SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[5]); + SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[4]); + SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[3]); + SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[2]); + SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[1]); + SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[0]); + t[11] = l; + l = h; + h = o; + o = 0; + SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[11]); + SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[10]); + SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[9]); + SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[8]); + SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[7]); + SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[6]); + SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[5]); + SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[4]); + SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[3]); + SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[2]); + SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[1]); + r->dp[12] = l; + l = h; + h = o; + o = 0; + SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[11]); + SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[10]); + SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[9]); + SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[8]); + SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[7]); + SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[6]); + SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[5]); + SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[4]); + SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[3]); + SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[2]); + r->dp[13] = l; + l = h; + h = o; + o = 0; + SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[11]); + SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[10]); + SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[9]); + SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[8]); + SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[7]); + SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[6]); + SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[5]); + SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[4]); + SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[3]); + r->dp[14] = l; + l = h; + h = o; + o = 0; + SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[11]); + SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[10]); + SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[9]); + SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[8]); + SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[7]); + SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[6]); + SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[5]); + SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[4]); + r->dp[15] = l; + l = h; + h = o; + o = 0; + SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[11]); + SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[10]); + SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[9]); + SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[8]); + SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[7]); + SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[6]); + SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[5]); + r->dp[16] = l; + l = h; + h = o; + o = 0; + SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[11]); + SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[10]); + SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[9]); + SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[8]); + SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[7]); + SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[6]); + r->dp[17] = l; + l = h; + h = o; + o = 0; + SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[11]); + SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[10]); + SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[9]); + SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[8]); + SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[7]); + r->dp[18] = l; + l = h; + h = o; + o = 0; + SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[11]); + SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[10]); + SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[9]); + SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[8]); + r->dp[19] = l; + l = h; + h = o; + o = 0; + SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[11]); + SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[10]); + SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[9]); + r->dp[20] = l; + l = h; + h = o; + o = 0; + SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[11]); + SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[10]); + r->dp[21] = l; + l = h; + h = o; + SP_ASM_MUL_ADD_NO(l, h, a->dp[11], b->dp[11]); + r->dp[22] = l; + r->dp[23] = h; + XMEMCPY(r->dp, t, 12 * sizeof(sp_int_digit)); + r->used = 24; + sp_clamp(r); - if (err == MP_OKAY) { - h = 0; - l = 0; - SP_ASM_MUL(h, l, a->dp[0], b->dp[0]); - t->dp[0] = h; - h = 0; - o = 0; - SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[1]); - SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[0]); - t->dp[1] = l; - l = h; - h = o; - o = 0; - SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[2]); - SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[1]); - SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[0]); - t->dp[2] = l; - l = h; - h = o; - o = 0; - SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[3]); - SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[2]); - SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[1]); - SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[0]); - t->dp[3] = l; - l = h; - h = o; - o = 0; - SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[4]); - SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[3]); - SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[2]); - SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[1]); - SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[0]); - t->dp[4] = l; - l = h; - h = o; - o = 0; - SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[5]); - SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[4]); - SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[3]); - SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[2]); - SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[1]); - SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[0]); - t->dp[5] = l; - l = h; - h = o; - o = 0; - SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[6]); - SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[5]); - SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[4]); - SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[3]); - SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[2]); - SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[1]); - SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[0]); - t->dp[6] = l; - l = h; - h = o; - o = 0; - SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[7]); - SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[6]); - SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[5]); - SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[4]); - SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[3]); - SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[2]); - SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[1]); - SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[0]); - t->dp[7] = l; - l = h; - h = o; - o = 0; - SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[8]); - SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[7]); - SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[6]); - SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[5]); - SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[4]); - SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[3]); - SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[2]); - SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[1]); - SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[0]); - t->dp[8] = l; - l = h; - h = o; - o = 0; - SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[9]); - SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[8]); - SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[7]); - SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[6]); - SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[5]); - SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[4]); - SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[3]); - SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[2]); - SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[1]); - SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[0]); - t->dp[9] = l; - l = h; - h = o; - o = 0; - SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[10]); - SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[9]); - SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[8]); - SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[7]); - SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[6]); - SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[5]); - SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[4]); - SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[3]); - SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[2]); - SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[1]); - SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[0]); - t->dp[10] = l; - l = h; - h = o; - o = 0; - SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[11]); - SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[10]); - SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[9]); - SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[8]); - SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[7]); - SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[6]); - SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[5]); - SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[4]); - SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[3]); - SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[2]); - SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[1]); - SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[0]); - t->dp[11] = l; - l = h; - h = o; - o = 0; - SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[11]); - SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[10]); - SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[9]); - SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[8]); - SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[7]); - SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[6]); - SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[5]); - SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[4]); - SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[3]); - SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[2]); - SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[1]); - t->dp[12] = l; - l = h; - h = o; - o = 0; - SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[11]); - SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[10]); - SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[9]); - SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[8]); - SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[7]); - SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[6]); - SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[5]); - SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[4]); - SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[3]); - SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[2]); - t->dp[13] = l; - l = h; - h = o; - o = 0; - SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[11]); - SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[10]); - SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[9]); - SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[8]); - SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[7]); - SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[6]); - SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[5]); - SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[4]); - SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[3]); - t->dp[14] = l; - l = h; - h = o; - o = 0; - SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[11]); - SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[10]); - SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[9]); - SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[8]); - SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[7]); - SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[6]); - SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[5]); - SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[4]); - t->dp[15] = l; - l = h; - h = o; - o = 0; - SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[11]); - SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[10]); - SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[9]); - SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[8]); - SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[7]); - SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[6]); - SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[5]); - t->dp[16] = l; - l = h; - h = o; - o = 0; - SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[11]); - SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[10]); - SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[9]); - SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[8]); - SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[7]); - SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[6]); - t->dp[17] = l; - l = h; - h = o; - o = 0; - SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[11]); - SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[10]); - SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[9]); - SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[8]); - SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[7]); - t->dp[18] = l; - l = h; - h = o; - o = 0; - SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[11]); - SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[10]); - SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[9]); - SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[8]); - t->dp[19] = l; - l = h; - h = o; - o = 0; - SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[11]); - SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[10]); - SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[9]); - t->dp[20] = l; - l = h; - h = o; - o = 0; - SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[11]); - SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[10]); - t->dp[21] = l; - l = h; - h = o; - SP_ASM_MUL_ADD_NO(l, h, a->dp[11], b->dp[11]); - t->dp[22] = l; - t->dp[23] = h; - t->used = 24; - err = sp_copy(t, r); - } - if (err == MP_OKAY) { - sp_clamp(r); - } - - #ifdef WOLFSSL_SMALL_STACK - if (t != NULL) { - XFREE(t, NULL, DYNAMIC_TYPE_BIGINT); - } - #endif - - return err; + return MP_OKAY; } #endif /* SQR_MUL_ASM */ #endif /* SP_WORD_SIZE == 32 */ @@ -5696,41 +5652,36 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) static int _sp_mul_16(sp_int* a, sp_int* b, sp_int* r) { int err = MP_OKAY; - sp_int_digit l; - sp_int_digit h; - sp_int_digit o; - #ifdef WOLFSSL_SMALL_STACK - sp_int* t = NULL; + sp_int_digit l = 0; + sp_int_digit h = 0; + sp_int_digit o = 0; + #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_int_digit* t = NULL; #else - sp_int t[1]; + sp_int_digit t[16]; #endif - #ifdef WOLFSSL_SMALL_STACK - if (err == MP_OKAY) { - t = (sp_int*)XMALLOC(sizeof(sp_int), NULL, DYNAMIC_TYPE_BIGINT); - if (t == NULL) { - err = MP_MEM; - } - } + #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * 16, NULL, + DYNAMIC_TYPE_BIGINT); + if (t == NULL) { + err = MP_MEM; + } #endif - if (err == MP_OKAY) { - h = 0; - l = 0; SP_ASM_MUL(h, l, a->dp[0], b->dp[0]); - t->dp[0] = h; + t[0] = h; h = 0; - o = 0; SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[1]); SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[0]); - t->dp[1] = l; + t[1] = l; l = h; h = o; o = 0; SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[2]); SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[1]); SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[0]); - t->dp[2] = l; + t[2] = l; l = h; h = o; o = 0; @@ -5738,7 +5689,7 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[2]); SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[1]); SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[0]); - t->dp[3] = l; + t[3] = l; l = h; h = o; o = 0; @@ -5747,7 +5698,7 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[2]); SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[1]); SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[0]); - t->dp[4] = l; + t[4] = l; l = h; h = o; o = 0; @@ -5757,7 +5708,7 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[2]); SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[1]); SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[0]); - t->dp[5] = l; + t[5] = l; l = h; h = o; o = 0; @@ -5768,7 +5719,7 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[2]); SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[1]); SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[0]); - t->dp[6] = l; + t[6] = l; l = h; h = o; o = 0; @@ -5780,7 +5731,7 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[2]); SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[1]); SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[0]); - t->dp[7] = l; + t[7] = l; l = h; h = o; o = 0; @@ -5793,7 +5744,7 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[2]); SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[1]); SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[0]); - t->dp[8] = l; + t[8] = l; l = h; h = o; o = 0; @@ -5807,7 +5758,7 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[2]); SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[1]); SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[0]); - t->dp[9] = l; + t[9] = l; l = h; h = o; o = 0; @@ -5822,7 +5773,7 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[2]); SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[1]); SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[0]); - t->dp[10] = l; + t[10] = l; l = h; h = o; o = 0; @@ -5838,7 +5789,7 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[2]); SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[1]); SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[0]); - t->dp[11] = l; + t[11] = l; l = h; h = o; o = 0; @@ -5855,7 +5806,7 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[2]); SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[1]); SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[0]); - t->dp[12] = l; + t[12] = l; l = h; h = o; o = 0; @@ -5873,7 +5824,7 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[2]); SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[1]); SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[0]); - t->dp[13] = l; + t[13] = l; l = h; h = o; o = 0; @@ -5892,7 +5843,7 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[2]); SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[1]); SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[0]); - t->dp[14] = l; + t[14] = l; l = h; h = o; o = 0; @@ -5912,7 +5863,7 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[2]); SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[1]); SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[0]); - t->dp[15] = l; + t[15] = l; l = h; h = o; o = 0; @@ -5931,7 +5882,7 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[3]); SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[2]); SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[1]); - t->dp[16] = l; + r->dp[16] = l; l = h; h = o; o = 0; @@ -5949,7 +5900,7 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[4]); SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[3]); SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[2]); - t->dp[17] = l; + r->dp[17] = l; l = h; h = o; o = 0; @@ -5966,7 +5917,7 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[5]); SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[4]); SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[3]); - t->dp[18] = l; + r->dp[18] = l; l = h; h = o; o = 0; @@ -5982,7 +5933,7 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[6]); SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[5]); SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[4]); - t->dp[19] = l; + r->dp[19] = l; l = h; h = o; o = 0; @@ -5997,7 +5948,7 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[7]); SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[6]); SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[5]); - t->dp[20] = l; + r->dp[20] = l; l = h; h = o; o = 0; @@ -6011,7 +5962,7 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[8]); SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[7]); SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[6]); - t->dp[21] = l; + r->dp[21] = l; l = h; h = o; o = 0; @@ -6024,7 +5975,7 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[9]); SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[8]); SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[7]); - t->dp[22] = l; + r->dp[22] = l; l = h; h = o; o = 0; @@ -6036,7 +5987,7 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[10]); SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[9]); SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[8]); - t->dp[23] = l; + r->dp[23] = l; l = h; h = o; o = 0; @@ -6047,7 +5998,7 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[11]); SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[10]); SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[9]); - t->dp[24] = l; + r->dp[24] = l; l = h; h = o; o = 0; @@ -6057,7 +6008,7 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[12]); SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[11]); SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[10]); - t->dp[25] = l; + r->dp[25] = l; l = h; h = o; o = 0; @@ -6066,7 +6017,7 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[13]); SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[12]); SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[11]); - t->dp[26] = l; + r->dp[26] = l; l = h; h = o; o = 0; @@ -6074,38 +6025,35 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[14]); SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[13]); SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[12]); - t->dp[27] = l; + r->dp[27] = l; l = h; h = o; o = 0; SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[15]); SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[14]); SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[13]); - t->dp[28] = l; + r->dp[28] = l; l = h; h = o; o = 0; SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[15]); SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[14]); - t->dp[29] = l; + r->dp[29] = l; l = h; h = o; SP_ASM_MUL_ADD_NO(l, h, a->dp[15], b->dp[15]); - t->dp[30] = l; - t->dp[31] = h; - t->used = 32; - err = sp_copy(t, r); - } - if (err == MP_OKAY) { + r->dp[30] = l; + r->dp[31] = h; + XMEMCPY(r->dp, t, 16 * sizeof(sp_int_digit)); + r->used = 32; sp_clamp(r); } - #ifdef WOLFSSL_SMALL_STACK + #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) if (t != NULL) { XFREE(t, NULL, DYNAMIC_TYPE_BIGINT); } #endif - return err; } #endif /* SP_INT_DIGITS >= 32 */ @@ -6123,41 +6071,36 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) static int _sp_mul_24(sp_int* a, sp_int* b, sp_int* r) { int err = MP_OKAY; - sp_int_digit l; - sp_int_digit h; - sp_int_digit o; - #ifdef WOLFSSL_SMALL_STACK - sp_int* t = NULL; + sp_int_digit l = 0; + sp_int_digit h = 0; + sp_int_digit o = 0; + #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_int_digit* t = NULL; #else - sp_int t[1]; + sp_int_digit t[24]; #endif - #ifdef WOLFSSL_SMALL_STACK - if (err == MP_OKAY) { - t = (sp_int*)XMALLOC(sizeof(sp_int), NULL, DYNAMIC_TYPE_BIGINT); - if (t == NULL) { - err = MP_MEM; - } - } + #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * 24, NULL, + DYNAMIC_TYPE_BIGINT); + if (t == NULL) { + err = MP_MEM; + } #endif - if (err == MP_OKAY) { - h = 0; - l = 0; SP_ASM_MUL(h, l, a->dp[0], b->dp[0]); - t->dp[0] = h; + t[0] = h; h = 0; - o = 0; SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[1]); SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[0]); - t->dp[1] = l; + t[1] = l; l = h; h = o; o = 0; SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[2]); SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[1]); SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[0]); - t->dp[2] = l; + t[2] = l; l = h; h = o; o = 0; @@ -6165,7 +6108,7 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[2]); SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[1]); SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[0]); - t->dp[3] = l; + t[3] = l; l = h; h = o; o = 0; @@ -6174,7 +6117,7 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[2]); SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[1]); SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[0]); - t->dp[4] = l; + t[4] = l; l = h; h = o; o = 0; @@ -6184,7 +6127,7 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[2]); SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[1]); SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[0]); - t->dp[5] = l; + t[5] = l; l = h; h = o; o = 0; @@ -6195,7 +6138,7 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[2]); SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[1]); SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[0]); - t->dp[6] = l; + t[6] = l; l = h; h = o; o = 0; @@ -6207,7 +6150,7 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[2]); SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[1]); SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[0]); - t->dp[7] = l; + t[7] = l; l = h; h = o; o = 0; @@ -6220,7 +6163,7 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[2]); SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[1]); SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[0]); - t->dp[8] = l; + t[8] = l; l = h; h = o; o = 0; @@ -6234,7 +6177,7 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[2]); SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[1]); SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[0]); - t->dp[9] = l; + t[9] = l; l = h; h = o; o = 0; @@ -6249,7 +6192,7 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[2]); SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[1]); SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[0]); - t->dp[10] = l; + t[10] = l; l = h; h = o; o = 0; @@ -6265,7 +6208,7 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[2]); SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[1]); SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[0]); - t->dp[11] = l; + t[11] = l; l = h; h = o; o = 0; @@ -6282,7 +6225,7 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[2]); SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[1]); SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[0]); - t->dp[12] = l; + t[12] = l; l = h; h = o; o = 0; @@ -6300,7 +6243,7 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[2]); SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[1]); SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[0]); - t->dp[13] = l; + t[13] = l; l = h; h = o; o = 0; @@ -6319,7 +6262,7 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[2]); SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[1]); SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[0]); - t->dp[14] = l; + t[14] = l; l = h; h = o; o = 0; @@ -6339,7 +6282,7 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[2]); SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[1]); SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[0]); - t->dp[15] = l; + t[15] = l; l = h; h = o; o = 0; @@ -6360,7 +6303,7 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[2]); SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[1]); SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[0]); - t->dp[16] = l; + t[16] = l; l = h; h = o; o = 0; @@ -6382,7 +6325,7 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[2]); SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[1]); SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[0]); - t->dp[17] = l; + t[17] = l; l = h; h = o; o = 0; @@ -6405,7 +6348,7 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[2]); SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[1]); SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[0]); - t->dp[18] = l; + t[18] = l; l = h; h = o; o = 0; @@ -6429,7 +6372,7 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[2]); SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[1]); SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[0]); - t->dp[19] = l; + t[19] = l; l = h; h = o; o = 0; @@ -6454,7 +6397,7 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[2]); SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[1]); SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[0]); - t->dp[20] = l; + t[20] = l; l = h; h = o; o = 0; @@ -6480,7 +6423,7 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[2]); SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[1]); SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[0]); - t->dp[21] = l; + t[21] = l; l = h; h = o; o = 0; @@ -6507,7 +6450,7 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[2]); SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[1]); SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[0]); - t->dp[22] = l; + t[22] = l; l = h; h = o; o = 0; @@ -6535,7 +6478,7 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[2]); SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[1]); SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[0]); - t->dp[23] = l; + t[23] = l; l = h; h = o; o = 0; @@ -6562,7 +6505,7 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[3]); SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[2]); SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[1]); - t->dp[24] = l; + r->dp[24] = l; l = h; h = o; o = 0; @@ -6588,7 +6531,7 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[4]); SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[3]); SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[2]); - t->dp[25] = l; + r->dp[25] = l; l = h; h = o; o = 0; @@ -6613,7 +6556,7 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[5]); SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[4]); SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[3]); - t->dp[26] = l; + r->dp[26] = l; l = h; h = o; o = 0; @@ -6637,7 +6580,7 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[6]); SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[5]); SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[4]); - t->dp[27] = l; + r->dp[27] = l; l = h; h = o; o = 0; @@ -6660,7 +6603,7 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[7]); SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[6]); SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[5]); - t->dp[28] = l; + r->dp[28] = l; l = h; h = o; o = 0; @@ -6682,7 +6625,7 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[8]); SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[7]); SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[6]); - t->dp[29] = l; + r->dp[29] = l; l = h; h = o; o = 0; @@ -6703,7 +6646,7 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[9]); SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[8]); SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[7]); - t->dp[30] = l; + r->dp[30] = l; l = h; h = o; o = 0; @@ -6723,7 +6666,7 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[10]); SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[9]); SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[8]); - t->dp[31] = l; + r->dp[31] = l; l = h; h = o; o = 0; @@ -6742,7 +6685,7 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[11]); SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[10]); SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[9]); - t->dp[32] = l; + r->dp[32] = l; l = h; h = o; o = 0; @@ -6760,7 +6703,7 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[12]); SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[11]); SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[10]); - t->dp[33] = l; + r->dp[33] = l; l = h; h = o; o = 0; @@ -6777,7 +6720,7 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[13]); SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[12]); SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[11]); - t->dp[34] = l; + r->dp[34] = l; l = h; h = o; o = 0; @@ -6793,7 +6736,7 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[14]); SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[13]); SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[12]); - t->dp[35] = l; + r->dp[35] = l; l = h; h = o; o = 0; @@ -6808,7 +6751,7 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[15]); SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[14]); SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[13]); - t->dp[36] = l; + r->dp[36] = l; l = h; h = o; o = 0; @@ -6822,7 +6765,7 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[16]); SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[15]); SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[14]); - t->dp[37] = l; + r->dp[37] = l; l = h; h = o; o = 0; @@ -6835,7 +6778,7 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[17]); SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[16]); SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[15]); - t->dp[38] = l; + r->dp[38] = l; l = h; h = o; o = 0; @@ -6847,7 +6790,7 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[18]); SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[17]); SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[16]); - t->dp[39] = l; + r->dp[39] = l; l = h; h = o; o = 0; @@ -6858,7 +6801,7 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[19]); SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[18]); SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[17]); - t->dp[40] = l; + r->dp[40] = l; l = h; h = o; o = 0; @@ -6868,7 +6811,7 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[20]); SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[19]); SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[18]); - t->dp[41] = l; + r->dp[41] = l; l = h; h = o; o = 0; @@ -6877,7 +6820,7 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[21]); SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[20]); SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[19]); - t->dp[42] = l; + r->dp[42] = l; l = h; h = o; o = 0; @@ -6885,38 +6828,35 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[22]); SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[21]); SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[20]); - t->dp[43] = l; + r->dp[43] = l; l = h; h = o; o = 0; SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[23]); SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[22]); SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[21]); - t->dp[44] = l; + r->dp[44] = l; l = h; h = o; o = 0; SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[23]); SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[22]); - t->dp[45] = l; + r->dp[45] = l; l = h; h = o; SP_ASM_MUL_ADD_NO(l, h, a->dp[23], b->dp[23]); - t->dp[46] = l; - t->dp[47] = h; - t->used = 48; - err = sp_copy(t, r); - } - if (err == MP_OKAY) { + r->dp[46] = l; + r->dp[47] = h; + XMEMCPY(r->dp, t, 24 * sizeof(sp_int_digit)); + r->used = 48; sp_clamp(r); } - #ifdef WOLFSSL_SMALL_STACK + #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) if (t != NULL) { XFREE(t, NULL, DYNAMIC_TYPE_BIGINT); } #endif - return err; } #endif /* SP_INT_DIGITS >= 48 */ @@ -6937,11 +6877,6 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) int i; sp_int_digit l; sp_int_digit h; - #ifdef WOLFSSL_SMALL_STACK - sp_int* t = NULL; - #else - sp_int t[4]; - #endif sp_int* a1; sp_int* b1; sp_int* z0; @@ -6949,22 +6884,17 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) sp_int* z2; sp_int_digit ca; sp_int_digit cb; + DECL_SP_INT_ARRAY(t, 16, 2); + DECL_SP_INT_ARRAY(z, 33, 2); - #ifdef WOLFSSL_SMALL_STACK + ALLOC_SP_INT_ARRAY(t, 16, 2, err, NULL); + ALLOC_SP_INT_ARRAY(z, 33, 2, err, NULL); if (err == MP_OKAY) { - t = (sp_int*)XMALLOC(sizeof(sp_int) * 4, NULL, DYNAMIC_TYPE_BIGINT); - if (t == NULL) { - err = MP_MEM; - } - } - #endif - - if (err == MP_OKAY) { - a1 = &t[0]; - b1 = &t[1]; + a1 = t[0]; + b1 = t[1]; + z1 = z[0]; + z2 = z[1]; z0 = r; - z1 = &t[2]; - z2 = &t[3]; XMEMCPY(a1->dp, &a->dp[16], sizeof(sp_int_digit) * 16); a1->used = 16; @@ -7017,9 +6947,8 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) /* r = z0 */ /* r += (z1 - z0 - z2) << 16 */ z1->dp[32] = ca & cb; - z1->used = 33; + l = 0; if (ca) { - l = 0; h = 0; for (i = 0; i < 16; i++) { SP_ASM_ADDC(l, h, z1->dp[i + 16]); @@ -7028,10 +6957,10 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) l = h; h = 0; } - z1->dp[32] += l; } + z1->dp[32] += l; + l = 0; if (cb) { - l = 0; h = 0; for (i = 0; i < 16; i++) { SP_ASM_ADDC(l, h, z1->dp[i + 16]); @@ -7040,8 +6969,8 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) l = h; h = 0; } - z1->dp[32] += l; } + z1->dp[32] += l; /* z1 = z1 - z0 - z1 */ l = 0; h = 0; @@ -7090,12 +7019,8 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) sp_clamp(r); } - #ifdef WOLFSSL_SMALL_STACK - if (t != NULL) { - XFREE(t, NULL, DYNAMIC_TYPE_BIGINT); - } - #endif - + FREE_SP_INT_ARRAY(z, NULL); + FREE_SP_INT_ARRAY(t, NULL); return err; } #endif /* SP_INT_DIGITS >= 64 */ @@ -7116,11 +7041,6 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) int i; sp_int_digit l; sp_int_digit h; - #ifdef WOLFSSL_SMALL_STACK - sp_int* t = NULL; - #else - sp_int t[4]; - #endif sp_int* a1; sp_int* b1; sp_int* z0; @@ -7128,22 +7048,17 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) sp_int* z2; sp_int_digit ca; sp_int_digit cb; + DECL_SP_INT_ARRAY(t, 24, 2); + DECL_SP_INT_ARRAY(z, 49, 2); - #ifdef WOLFSSL_SMALL_STACK + ALLOC_SP_INT_ARRAY(t, 24, 2, err, NULL); + ALLOC_SP_INT_ARRAY(z, 49, 2, err, NULL); if (err == MP_OKAY) { - t = (sp_int*)XMALLOC(sizeof(sp_int) * 4, NULL, DYNAMIC_TYPE_BIGINT); - if (t == NULL) { - err = MP_MEM; - } - } - #endif - - if (err == MP_OKAY) { - a1 = &t[0]; - b1 = &t[1]; + a1 = t[0]; + b1 = t[1]; + z1 = z[0]; + z2 = z[1]; z0 = r; - z1 = &t[2]; - z2 = &t[3]; XMEMCPY(a1->dp, &a->dp[24], sizeof(sp_int_digit) * 24); a1->used = 24; @@ -7196,9 +7111,8 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) /* r = z0 */ /* r += (z1 - z0 - z2) << 24 */ z1->dp[48] = ca & cb; - z1->used = 49; + l = 0; if (ca) { - l = 0; h = 0; for (i = 0; i < 24; i++) { SP_ASM_ADDC(l, h, z1->dp[i + 24]); @@ -7207,10 +7121,10 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) l = h; h = 0; } - z1->dp[48] += l; } + z1->dp[48] += l; + l = 0; if (cb) { - l = 0; h = 0; for (i = 0; i < 24; i++) { SP_ASM_ADDC(l, h, z1->dp[i + 24]); @@ -7219,8 +7133,8 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) l = h; h = 0; } - z1->dp[48] += l; } + z1->dp[48] += l; /* z1 = z1 - z0 - z1 */ l = 0; h = 0; @@ -7269,12 +7183,8 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) sp_clamp(r); } - #ifdef WOLFSSL_SMALL_STACK - if (t != NULL) { - XFREE(t, NULL, DYNAMIC_TYPE_BIGINT); - } - #endif - + FREE_SP_INT_ARRAY(z, NULL); + FREE_SP_INT_ARRAY(t, NULL); return err; } #endif /* SP_INT_DIGITS >= 96 */ @@ -7295,11 +7205,6 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) int i; sp_int_digit l; sp_int_digit h; - #ifdef WOLFSSL_SMALL_STACK - sp_int* t = NULL; - #else - sp_int t[4]; - #endif sp_int* a1; sp_int* b1; sp_int* z0; @@ -7307,22 +7212,17 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) sp_int* z2; sp_int_digit ca; sp_int_digit cb; + DECL_SP_INT_ARRAY(t, 32, 2); + DECL_SP_INT_ARRAY(z, 65, 2); - #ifdef WOLFSSL_SMALL_STACK + ALLOC_SP_INT_ARRAY(t, 32, 2, err, NULL); + ALLOC_SP_INT_ARRAY(z, 65, 2, err, NULL); if (err == MP_OKAY) { - t = (sp_int*)XMALLOC(sizeof(sp_int) * 4, NULL, DYNAMIC_TYPE_BIGINT); - if (t == NULL) { - err = MP_MEM; - } - } - #endif - - if (err == MP_OKAY) { - a1 = &t[0]; - b1 = &t[1]; + a1 = t[0]; + b1 = t[1]; + z1 = z[0]; + z2 = z[1]; z0 = r; - z1 = &t[2]; - z2 = &t[3]; XMEMCPY(a1->dp, &a->dp[32], sizeof(sp_int_digit) * 32); a1->used = 32; @@ -7375,9 +7275,8 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) /* r = z0 */ /* r += (z1 - z0 - z2) << 32 */ z1->dp[64] = ca & cb; - z1->used = 65; + l = 0; if (ca) { - l = 0; h = 0; for (i = 0; i < 32; i++) { SP_ASM_ADDC(l, h, z1->dp[i + 32]); @@ -7386,10 +7285,10 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) l = h; h = 0; } - z1->dp[64] += l; } + z1->dp[64] += l; + l = 0; if (cb) { - l = 0; h = 0; for (i = 0; i < 32; i++) { SP_ASM_ADDC(l, h, z1->dp[i + 32]); @@ -7398,8 +7297,8 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) l = h; h = 0; } - z1->dp[64] += l; } + z1->dp[64] += l; /* z1 = z1 - z0 - z1 */ l = 0; h = 0; @@ -7448,12 +7347,8 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) sp_clamp(r); } - #ifdef WOLFSSL_SMALL_STACK - if (t != NULL) { - XFREE(t, NULL, DYNAMIC_TYPE_BIGINT); - } - #endif - + FREE_SP_INT_ARRAY(z, NULL); + FREE_SP_INT_ARRAY(t, NULL); return err; } #endif /* SP_INT_DIGITS >= 128 */ @@ -7474,11 +7369,6 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) int i; sp_int_digit l; sp_int_digit h; - #ifdef WOLFSSL_SMALL_STACK - sp_int* t = NULL; - #else - sp_int t[4]; - #endif sp_int* a1; sp_int* b1; sp_int* z0; @@ -7486,22 +7376,17 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) sp_int* z2; sp_int_digit ca; sp_int_digit cb; + DECL_SP_INT_ARRAY(t, 48, 2); + DECL_SP_INT_ARRAY(z, 97, 2); - #ifdef WOLFSSL_SMALL_STACK + ALLOC_SP_INT_ARRAY(t, 48, 2, err, NULL); + ALLOC_SP_INT_ARRAY(z, 97, 2, err, NULL); if (err == MP_OKAY) { - t = (sp_int*)XMALLOC(sizeof(sp_int) * 4, NULL, DYNAMIC_TYPE_BIGINT); - if (t == NULL) { - err = MP_MEM; - } - } - #endif - - if (err == MP_OKAY) { - a1 = &t[0]; - b1 = &t[1]; + a1 = t[0]; + b1 = t[1]; + z1 = z[0]; + z2 = z[1]; z0 = r; - z1 = &t[2]; - z2 = &t[3]; XMEMCPY(a1->dp, &a->dp[48], sizeof(sp_int_digit) * 48); a1->used = 48; @@ -7554,9 +7439,8 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) /* r = z0 */ /* r += (z1 - z0 - z2) << 48 */ z1->dp[96] = ca & cb; - z1->used = 97; + l = 0; if (ca) { - l = 0; h = 0; for (i = 0; i < 48; i++) { SP_ASM_ADDC(l, h, z1->dp[i + 48]); @@ -7565,10 +7449,10 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) l = h; h = 0; } - z1->dp[96] += l; } + z1->dp[96] += l; + l = 0; if (cb) { - l = 0; h = 0; for (i = 0; i < 48; i++) { SP_ASM_ADDC(l, h, z1->dp[i + 48]); @@ -7577,8 +7461,8 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) l = h; h = 0; } - z1->dp[96] += l; } + z1->dp[96] += l; /* z1 = z1 - z0 - z1 */ l = 0; h = 0; @@ -7627,12 +7511,8 @@ int sp_mod(sp_int* a, sp_int* m, sp_int* r) sp_clamp(r); } - #ifdef WOLFSSL_SMALL_STACK - if (t != NULL) { - XFREE(t, NULL, DYNAMIC_TYPE_BIGINT); - } - #endif - + FREE_SP_INT_ARRAY(z, NULL); + FREE_SP_INT_ARRAY(t, NULL); return err; } #endif /* SP_INT_DIGITS >= 192 */ @@ -7663,7 +7543,7 @@ int sp_mul(sp_int* a, sp_int* b, sp_int* r) } /* Need extra digit during calculation. */ - if ((err == MP_OKAY) && (a->used + b->used >= r->size)) { + if ((err == MP_OKAY) && (a->used + b->used > r->size)) { err = MP_VAL; } @@ -7795,30 +7675,18 @@ int sp_mul(sp_int* a, sp_int* b, sp_int* r) int sp_mulmod(sp_int* a, sp_int* b, sp_int* m, sp_int* r) { int err = MP_OKAY; -#ifdef WOLFSSL_SMALL_STACK - sp_int* t = NULL; -#else - sp_int t[1]; -#endif + DECL_SP_INT(t, ((a == NULL) || (b == NULL)) ? 1 : a->used + b->used); if ((a == NULL) || (b == NULL) || (m == NULL) || (r == NULL)) { err = MP_VAL; } - /* Need extra digit during calculation. */ - if ((err == MP_OKAY) && (a->used + b->used >= r->size)) { + if ((err == MP_OKAY) && (a->used + b->used > r->size)) { err = MP_VAL; } -#ifdef WOLFSSL_SMALL_STACK + ALLOC_SP_INT(t, a->used + b->used, err, NULL); if (err == MP_OKAY) { - t = (sp_int*)XMALLOC(sizeof(sp_int), NULL, DYNAMIC_TYPE_BIGINT); - if (t == NULL) { - err = MP_MEM; - } - } -#endif - if (err == MP_OKAY) { - err = sp_init(t); + err = sp_init_size(t, a->used + b->used); } if (err == MP_OKAY) { err = sp_mul(a, b, t); @@ -7827,11 +7695,7 @@ int sp_mulmod(sp_int* a, sp_int* b, sp_int* m, sp_int* r) err = sp_mod(t, m, r); } -#ifdef WOLFSSL_SMALL_STACK - if (t != NULL) { - XFREE(t, NULL, DYNAMIC_TYPE_BIGINT); - } -#endif + FREE_SP_INT(t, NULL); return err; } #endif @@ -7853,17 +7717,11 @@ int sp_mulmod(sp_int* a, sp_int* b, sp_int* m, sp_int* r) int sp_invmod(sp_int* a, sp_int* m, sp_int* r) { int err = MP_OKAY; -#ifdef WOLFSSL_SMALL_STACK - sp_int* u = NULL; + sp_int* u; sp_int* v; sp_int* b; sp_int* c; -#else - sp_int u[1]; - sp_int v[1]; - sp_int b[1]; - sp_int c[1]; -#endif + DECL_SP_INT_ARRAY(t, (m == NULL) ? 0 : m->used + 1, 4); if ((a == NULL) || (m == NULL) || (r == NULL)) { err = MP_VAL; @@ -7875,22 +7733,13 @@ int sp_invmod(sp_int* a, sp_int* m, sp_int* r) } #endif -#ifdef WOLFSSL_SMALL_STACK + ALLOC_SP_INT_ARRAY(t, (m == NULL) ? 0 : m->used + 1, 4, err, NULL); if (err == MP_OKAY) { - u = (sp_int*)XMALLOC(sizeof(sp_int) * 4, NULL, DYNAMIC_TYPE_BIGINT); - if (u == NULL) { - err = MP_MEM; - } - } -#endif - - if (err == MP_OKAY) { -#ifdef WOLFSSL_SMALL_STACK - v = &u[1]; - b = &u[2]; - c = &u[3]; -#endif - sp_init(v); + u = t[0]; + v = t[1]; + b = t[2]; + c = t[3]; + sp_init_size(v, m->used + 1); if (_sp_cmp_abs(a, m) != MP_LT) { err = sp_mod(a, m, v); @@ -7938,9 +7787,9 @@ int sp_invmod(sp_int* a, sp_int* m, sp_int* r) } } else { - sp_init(u); - sp_init(b); - sp_init(c); + sp_init_size(u, m->used + 1); + sp_init_size(b, m->used + 1); + sp_init_size(c, m->used + 1); sp_copy(m, u); sp_copy(a, v); @@ -7985,12 +7834,7 @@ int sp_invmod(sp_int* a, sp_int* m, sp_int* r) } } -#ifdef WOLFSSL_SMALL_STACK - if (u != NULL) { - XFREE(u, NULL, DYNAMIC_TYPE_BIGINT); - } -#endif - + FREE_SP_INT_ARRAY(t, NULL); return err; } #endif /* HAVE_ECC || !NO_DSA || OPENSSL_EXTRA || \ @@ -8018,15 +7862,10 @@ int sp_invmod_mont_ct(sp_int* a, sp_int* m, sp_int* r, sp_int_digit mp) int err = MP_OKAY; int i; int j; -#ifndef WOLFSSL_SMALL_STACK - sp_int t[1]; - sp_int e[1]; - sp_int pre[CT_INV_MOD_PRE_CNT]; -#else - sp_int* t = NULL; + sp_int* t; sp_int* e; - sp_int* pre; -#endif /* WOLFSSL_SMALL_STACK */ + DECL_SP_INT_ARRAY(pre, (m == NULL) ? 1 : m->used * 2 + 1, + CT_INV_MOD_PRE_CNT + 2); if ((a == NULL) || (m == NULL) || (r == NULL)) { err = MP_VAL; @@ -8037,37 +7876,27 @@ int sp_invmod_mont_ct(sp_int* a, sp_int* m, sp_int* r, sp_int_digit mp) (m->used == 1 && m->dp[0] < 3))) { err = MP_VAL; } -#ifdef WOLFSSL_SMALL_STACK - if (err == MP_OKAY) { - t = (sp_int*)XMALLOC(sizeof(sp_int) * (2 + CT_INV_MOD_PRE_CNT), NULL, - DYNAMIC_TYPE_BIGINT); - if (t == NULL) { - err = MP_MEM; - } - } -#endif /* WOLFSSL_SMALL_STACK */ + ALLOC_SP_INT_ARRAY(pre, m->used * 2 + 1, CT_INV_MOD_PRE_CNT + 2, err, NULL); if (err == MP_OKAY) { -#ifdef WOLFSSL_SMALL_STACK - e = &t[1]; - pre = t + 2; -#endif /* WOLFSSL_SMALL_STACK */ - sp_init(t); - sp_init(e); + t = pre[CT_INV_MOD_PRE_CNT + 0]; + e = pre[CT_INV_MOD_PRE_CNT + 1]; + sp_init_size(t, m->used * 2 + 1); + sp_init_size(e, m->used * 2 + 1); - sp_init(&pre[0]); - err = sp_copy(a, &pre[0]); + sp_init_size(pre[0], m->used * 2 + 1); + err = sp_copy(a, pre[0]); for (i = 1; (err == MP_OKAY) && (i < CT_INV_MOD_PRE_CNT); i++) { - sp_init(&pre[i]); - err = sp_sqr(&pre[i-1], &pre[i]); + sp_init_size(pre[i], m->used * 2 + 1); + err = sp_sqr(pre[i-1], pre[i]); if (err == MP_OKAY) { - err = _sp_mont_red(&pre[i], m, mp); + err = _sp_mont_red(pre[i], m, mp); } if (err == MP_OKAY) { - err = sp_mul(&pre[i], a, &pre[i]); + err = sp_mul(pre[i], a, pre[i]); } if (err == MP_OKAY) { - err = _sp_mont_red(&pre[i], m, mp); + err = _sp_mont_red(pre[i], m, mp); } } } @@ -8079,12 +7908,12 @@ int sp_invmod_mont_ct(sp_int* a, sp_int* m, sp_int* r, sp_int_digit mp) break; } } - err = sp_copy(&pre[j-1], t); + err = sp_copy(pre[j-1], t); for (j = 0; (err == MP_OKAY) && (i >= 0); i--) { int set = sp_is_bit_set(e, i); if ((j == CT_INV_MOD_PRE_CNT) || ((!set) && j > 0)) { - err = sp_mul(t, &pre[j-1], t); + err = sp_mul(t, pre[j-1], t); if (err == MP_OKAY) { err = _sp_mont_red(t, m, mp); } @@ -8101,7 +7930,7 @@ int sp_invmod_mont_ct(sp_int* a, sp_int* m, sp_int* r, sp_int_digit mp) } if (err == MP_OKAY) { if (j > 0) { - err = sp_mul(t, &pre[j-1], r); + err = sp_mul(t, pre[j-1], r); if (err == MP_OKAY) { err = _sp_mont_red(r, m, mp); } @@ -8111,11 +7940,7 @@ int sp_invmod_mont_ct(sp_int* a, sp_int* m, sp_int* r, sp_int_digit mp) } } -#ifdef WOLFSSL_SMALL_STACK - if (t != NULL) { - XFREE(t, NULL, DYNAMIC_TYPE_BIGINT); - } -#endif /* WOLFSSL_SMALL_STACK */ + FREE_SP_INT_ARRAY(pre, NULL); return err; } @@ -8150,97 +7975,82 @@ static int _sp_exptmod_ex(sp_int* b, sp_int* e, int bits, sp_int* m, sp_int* r) int j; int y; int seenTopBit = 0; -#ifdef WOLFSSL_SMALL_STACK - sp_int* t = NULL; -#else #ifdef WC_NO_CACHE_RESISTANT - sp_int t[2]; + DECL_SP_INT_ARRAY(t, 2 * m->used + 1, 2); #else - sp_int t[3]; -#endif + DECL_SP_INT_ARRAY(t, 2 * m->used + 1, 3); #endif -#ifdef WOLFSSL_SMALL_STACK - if (err == MP_OKAY) { #ifdef WC_NO_CACHE_RESISTANT - t = (sp_int*)XMALLOC(sizeof(sp_int) * 2, NULL, DYNAMIC_TYPE_BIGINT); + ALLOC_SP_INT_ARRAY(t, 2 * m->used + 1, 2, err, NULL); #else - t = (sp_int*)XMALLOC(sizeof(sp_int) * 3, NULL, DYNAMIC_TYPE_BIGINT); -#endif - if (t == NULL) { - err = MP_MEM; - } - } + ALLOC_SP_INT_ARRAY(t, 2 * m->used + 1, 3, err, NULL); #endif if (err == MP_OKAY) { - sp_init(&t[0]); - sp_init(&t[1]); -#ifndef WC_NO_CACHE_RESISTANT - sp_init(&t[2]); -#endif + sp_init_size(t[0], 2 * m->used + 1); + sp_init_size(t[1], 2 * m->used + 1); + #ifndef WC_NO_CACHE_RESISTANT + sp_init_size(t[2], 2 * m->used + 1); + #endif /* Ensure base is less than exponent. */ if (_sp_cmp(b, m) != MP_LT) { - err = sp_mod(b, m, &t[0]); - if ((err == MP_OKAY) && sp_iszero(t)) { + err = sp_mod(b, m, t[0]); + if ((err == MP_OKAY) && sp_iszero(t[0])) { sp_set(r, 0); done = 1; } } else { - err = sp_copy(b, &t[0]); + err = sp_copy(b, t[0]); } } if ((!done) && (err == MP_OKAY)) { /* t[0] is dummy value and t[1] is result */ - err = sp_copy(&t[0], &t[1]); + err = sp_copy(t[0], t[1]); for (i = bits - 1; (err == MP_OKAY) && (i >= 0); i--) { #ifdef WC_NO_CACHE_RESISTANT /* Square real result if seen the top bit. */ - err = sp_sqrmod(&t[seenTopBit], m, &t[seenTopBit]); + err = sp_sqrmod(t[seenTopBit], m, t[seenTopBit]); if (err == MP_OKAY) { y = (e->dp[i >> SP_WORD_SHIFT] >> (i & SP_WORD_MASK)) & 1; j = y & seenTopBit; seenTopBit |= y; /* Multiply real result if bit is set and seen the top bit. */ - err = sp_mulmod(&t[j], b, m, &t[j]); + err = sp_mulmod(t[j], b, m, t[j]); } #else /* Square real result if seen the top bit. */ - sp_copy((sp_int*)(((size_t)&t[0] & sp_off_on_addr[seenTopBit^1]) + - ((size_t)&t[1] & sp_off_on_addr[seenTopBit ])), - &t[2]); - err = sp_sqrmod(&t[2], m, &t[2]); - sp_copy(&t[2], - (sp_int*)(((size_t)&t[0] & sp_off_on_addr[seenTopBit^1]) + - ((size_t)&t[1] & sp_off_on_addr[seenTopBit ]))); + sp_copy((sp_int*)(((size_t)t[0] & sp_off_on_addr[seenTopBit^1]) + + ((size_t)t[1] & sp_off_on_addr[seenTopBit ])), + t[2]); + err = sp_sqrmod(t[2], m, t[2]); + sp_copy(t[2], + (sp_int*)(((size_t)t[0] & sp_off_on_addr[seenTopBit^1]) + + ((size_t)t[1] & sp_off_on_addr[seenTopBit ]))); if (err == MP_OKAY) { y = (e->dp[i >> SP_WORD_SHIFT] >> (i & SP_WORD_MASK)) & 1; j = y & seenTopBit; seenTopBit |= y; /* Multiply real result if bit is set and seen the top bit. */ - sp_copy((sp_int*)(((size_t)&t[0] & sp_off_on_addr[j^1]) + - ((size_t)&t[1] & sp_off_on_addr[j ])), - &t[2]); - err = sp_mulmod(&t[2], b, m, &t[2]); - sp_copy(&t[2], - (sp_int*)(((size_t)&t[0] & sp_off_on_addr[j^1]) + - ((size_t)&t[1] & sp_off_on_addr[j ]))); + sp_copy((sp_int*)(((size_t)t[0] & sp_off_on_addr[j^1]) + + ((size_t)t[1] & sp_off_on_addr[j ])), + t[2]); + err = sp_mulmod(t[2], b, m, t[2]); + sp_copy(t[2], + (sp_int*)(((size_t)t[0] & sp_off_on_addr[j^1]) + + ((size_t)t[1] & sp_off_on_addr[j ]))); } #endif } } if ((!done) && (err == MP_OKAY)) { - err = sp_copy(&t[1], r); + err = sp_copy(t[1], r); } -#ifdef WOLFSSL_SMALL_STACK - if (t != NULL) { - XFREE(t, NULL, DYNAMIC_TYPE_BIGINT); - } -#endif + FREE_SP_INT_ARRAY(t, NULL); return err; } #endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) || @@ -8273,34 +8083,26 @@ static int _sp_exptmod_mont_ex(sp_int* b, sp_int* e, int bits, sp_int* m, int j; int y; int seenTopBit = 0; -#ifdef WOLFSSL_SMALL_STACK - sp_int* t = NULL; -#else - sp_int t[4]; -#endif sp_int_digit mp; + DECL_SP_INT_ARRAY(t, m->used * 2 + 1, 4); -#ifdef WOLFSSL_SMALL_STACK + ALLOC_SP_INT_ARRAY(t, m->used * 2 + 1, 4, err, NULL); if (err == MP_OKAY) { - t = (sp_int*)XMALLOC(sizeof(sp_int) * 4, NULL, DYNAMIC_TYPE_BIGINT); - if (t == NULL) { - err = MP_MEM; - } - } -#endif - if (err == MP_OKAY) { - sp_init_multi(&t[0], &t[1], &t[2], &t[3], NULL, NULL); + sp_init_size(t[0], m->used * 2 + 1); + sp_init_size(t[1], m->used * 2 + 1); + sp_init_size(t[2], m->used * 2 + 1); + sp_init_size(t[3], m->used * 2 + 1); /* Ensure base is less than exponent. */ if (_sp_cmp(b, m) != MP_LT) { - err = sp_mod(b, m, &t[0]); - if ((err == MP_OKAY) && sp_iszero(&t[0])) { + err = sp_mod(b, m, t[0]); + if ((err == MP_OKAY) && sp_iszero(t[0])) { sp_set(r, 0); done = 1; } } else { - err = sp_copy(b, &t[0]); + err = sp_copy(b, t[0]); } } @@ -8308,66 +8110,66 @@ static int _sp_exptmod_mont_ex(sp_int* b, sp_int* e, int bits, sp_int* m, if ((!done) && (err == MP_OKAY)) { err = sp_mont_setup(m, &mp); if (err == MP_OKAY) { - err = sp_mont_norm(&t[1], m); + err = sp_mont_norm(t[1], m); } if (err == MP_OKAY) { /* Convert to montgomery form. */ - err = sp_mulmod(&t[0], &t[1], m, &t[0]); + err = sp_mulmod(t[0], t[1], m, t[0]); } if (err == MP_OKAY) { /* t[0] is fake working value and t[1] is real working value. */ - sp_copy(&t[0], &t[1]); + sp_copy(t[0], t[1]); /* Montgomert form of base to multiply by. */ - sp_copy(&t[0], &t[2]); + sp_copy(t[0], t[2]); } for (i = bits - 1; (err == MP_OKAY) && (i >= 0); i--) { /* Square real working value if seen the top bit. */ - sp_copy((sp_int*)(((size_t)&t[0] & sp_off_on_addr[seenTopBit^1]) + - ((size_t)&t[1] & sp_off_on_addr[seenTopBit ])), - &t[3]); - err = sp_sqr(&t[3], &t[3]); + sp_copy((sp_int*)(((size_t)t[0] & sp_off_on_addr[seenTopBit^1]) + + ((size_t)t[1] & sp_off_on_addr[seenTopBit ])), + t[3]); + err = sp_sqr(t[3], t[3]); if (err == MP_OKAY) { - err = _sp_mont_red(&t[3], m, mp); + err = _sp_mont_red(t[3], m, mp); } - sp_copy(&t[3], - (sp_int*)(((size_t)&t[0] & sp_off_on_addr[seenTopBit^1]) + - ((size_t)&t[1] & sp_off_on_addr[seenTopBit ]))); + sp_copy(t[3], + (sp_int*)(((size_t)t[0] & sp_off_on_addr[seenTopBit^1]) + + ((size_t)t[1] & sp_off_on_addr[seenTopBit ]))); if (err == MP_OKAY) { y = (e->dp[i >> SP_WORD_SHIFT] >> (i & SP_WORD_MASK)) & 1; j = y & seenTopBit; seenTopBit |= y; /* Multiply real value if bit is set and seen the top bit. */ - sp_copy((sp_int*)(((size_t)&t[0] & sp_off_on_addr[j^1]) + - ((size_t)&t[1] & sp_off_on_addr[j ])), - &t[3]); - err = sp_mul(&t[3], &t[2], &t[3]); + sp_copy((sp_int*)(((size_t)t[0] & sp_off_on_addr[j^1]) + + ((size_t)t[1] & sp_off_on_addr[j ])), + t[3]); + err = sp_mul(t[3], t[2], t[3]); if (err == MP_OKAY) { - err = _sp_mont_red(&t[3], m, mp); + err = _sp_mont_red(t[3], m, mp); } - sp_copy(&t[3], - (sp_int*)(((size_t)&t[0] & sp_off_on_addr[j^1]) + - ((size_t)&t[1] & sp_off_on_addr[j ]))); + sp_copy(t[3], + (sp_int*)(((size_t)t[0] & sp_off_on_addr[j^1]) + + ((size_t)t[1] & sp_off_on_addr[j ]))); } } if (err == MP_OKAY) { /* Convert from montgomery form. */ - err = _sp_mont_red(&t[1], m, mp); + err = _sp_mont_red(t[1], m, mp); /* Reduction implementation returns number to range < m. */ } } if ((!done) && (err == MP_OKAY)) { - err = sp_copy(&t[1], r); + err = sp_copy(t[1], r); } -#ifdef WOLFSSL_SMALL_STACK - if (t != NULL) { - XFREE(t, NULL, DYNAMIC_TYPE_BIGINT); - } -#endif + FREE_SP_INT_ARRAY(t, NULL); return err; } #else + +/* Always allocate large array of sp_ints unless defined WOLFSSL_SP_NO_MALLOC */ +#define SP_ALLOC + /* Internal. Exponentiates b to the power of e modulo m into r: r = b ^ e mod m * Creates a window of precalculated exponents with base in montgomery form. * Is constant time but NOT cache attack resistant. @@ -8393,11 +8195,11 @@ static int _sp_exptmod_mont_ex(sp_int* b, sp_int* e, int bits, sp_int* m, int preCnt; int err = MP_OKAY; int done = 0; - sp_int* t = NULL; - sp_int* tr = NULL; sp_int_digit mp; sp_int_digit n; sp_int_digit mask; + sp_int* tr = NULL; + DECL_SP_INT_ARRAY(t, m->used * 2 + 1, (1 << 6) + 1); if (bits > 450) { winBits = 6; @@ -8417,33 +8219,25 @@ static int _sp_exptmod_mont_ex(sp_int* b, sp_int* e, int bits, sp_int* m, preCnt = 1 << winBits; mask = preCnt - 1; + ALLOC_SP_INT_ARRAY(t, m->used * 2 + 1, preCnt + 1, err, NULL); if (err == MP_OKAY) { - /* Allocate memory for window. */ - t = (sp_int*)XMALLOC(sizeof(sp_int) * (preCnt + 1), NULL, - DYNAMIC_TYPE_BIGINT); - if (t == NULL) { - err = MP_MEM; - } - } + tr = t[preCnt]; - if (err == MP_OKAY) { - /* Initialize window numbers and temporary result. */ - tr = t + preCnt; for (i = 0; i < preCnt; i++) { - sp_init(&t[i]); + sp_init_size(t[i], m->used * 2 + 1); } - sp_init(tr); + sp_init_size(tr, m->used * 2 + 1); /* Ensure base is less than exponent. */ if (_sp_cmp(b, m) != MP_LT) { - err = sp_mod(b, m, &t[1]); - if ((err == MP_OKAY) && sp_iszero(&t[1])) { + err = sp_mod(b, m, t[1]); + if ((err == MP_OKAY) && sp_iszero(t[1])) { sp_set(r, 0); done = 1; } } else { - err = sp_copy(b, &t[1]); + err = sp_copy(b, t[1]); } } @@ -8451,23 +8245,23 @@ static int _sp_exptmod_mont_ex(sp_int* b, sp_int* e, int bits, sp_int* m, err = sp_mont_setup(m, &mp); if (err == MP_OKAY) { /* Norm value is 1 in montgomery form. */ - err = sp_mont_norm(&t[0], m); + err = sp_mont_norm(t[0], m); } if (err == MP_OKAY) { /* Convert base to montgomery form. */ - err = sp_mulmod(&t[1], &t[0], m, &t[1]); + err = sp_mulmod(t[1], t[0], m, t[1]); } /* Pre-calculate values */ for (i = 2; (i < preCnt) && (err == MP_OKAY); i++) { if ((i & 1) == 0) { - err = sp_sqr(&t[i/2], &t[i]); + err = sp_sqr(t[i/2], t[i]); } else { - err = sp_mul(&t[i-1], &t[1], &t[i]); + err = sp_mul(t[i-1], t[1], t[i]); } if (err == MP_OKAY) { - err = _sp_mont_red(&t[i], m, mp); + err = _sp_mont_red(t[i], m, mp); } } @@ -8483,7 +8277,7 @@ static int _sp_exptmod_mont_ex(sp_int* b, sp_int* e, int bits, sp_int* m, y = (int)(n >> c); n <<= SP_WORD_SIZE - c; /* Copy window number for top bits. */ - sp_copy(&t[y], tr); + sp_copy(t[y], tr); for (; (i >= 0) || (c >= winBits); ) { if (c == 0) { /* Bits up to end of digit */ @@ -8517,7 +8311,7 @@ static int _sp_exptmod_mont_ex(sp_int* b, sp_int* e, int bits, sp_int* m, } /* Multiply by window number for next set of bits. */ if (err == MP_OKAY) { - err = sp_mul(tr, &t[y], tr); + err = sp_mul(tr, t[y], tr); } if (err == MP_OKAY) { err = _sp_mont_red(tr, m, mp); @@ -8535,11 +8329,12 @@ static int _sp_exptmod_mont_ex(sp_int* b, sp_int* e, int bits, sp_int* m, err = sp_copy(tr, r); } - if (t != NULL) { - XFREE(t, NULL, DYNAMIC_TYPE_BIGINT); - } + FREE_SP_INT_ARRAY(t, NULL); return err; } + +#undef SP_ALLOC + #endif /* !WC_NO_CACHE_RESISTANT */ #endif /* !WC_NO_HARDEN */ @@ -8572,14 +8367,10 @@ static int _sp_exptmod_base_2(sp_int* e, int digits, sp_int* m, sp_int* r) int c; int y; int err = MP_OKAY; -#ifdef WOLFSSL_SMALL_STACK sp_int* t = NULL; sp_int* tr = NULL; -#else - sp_int t[1]; - sp_int tr[1]; -#endif sp_int_digit mp = 0, n; + DECL_SP_INT_ARRAY(d, m->used * 2 + 1, 2); if (0) { sp_print_int(2, "a"); @@ -8587,20 +8378,13 @@ static int _sp_exptmod_base_2(sp_int* e, int digits, sp_int* m, sp_int* r) sp_print(m, "m"); } -#ifdef WOLFSSL_SMALL_STACK - /* Allocate memory for window. */ - t = (sp_int*)XMALLOC(sizeof(sp_int) * 2, NULL, DYNAMIC_TYPE_BIGINT); - if (t == NULL) { - err = MP_MEM; - } -#endif - + ALLOC_SP_INT_ARRAY(d, m->used * 2 + 1, 2, err, NULL); if (err == MP_OKAY) { -#ifdef WOLFSSL_SMALL_STACK - tr = t + 1; -#endif - sp_init(t); - sp_init(tr); + t = d[0]; + tr = d[1]; + + sp_init_size(t, m->used * 2 + 1); + sp_init_size(tr, m->used * 2 + 1); if (m->used > 1) { err = sp_mont_setup(m, &mp); @@ -8711,16 +8495,11 @@ static int _sp_exptmod_base_2(sp_int* e, int digits, sp_int* m, sp_int* r) err = sp_copy(tr, r); } -#ifdef WOLFSSL_SMALL_STACK - if (t != NULL) { - XFREE(t, NULL, DYNAMIC_TYPE_BIGINT); - } -#endif - if (0) { sp_print(r, "rme"); } + FREE_SP_INT_ARRAY(d, NULL); return err; } #endif /* WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY */ @@ -8901,6 +8680,10 @@ int sp_exptmod(sp_int* b, sp_int* e, sp_int* m, sp_int* r) #if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH) #if defined(WOLFSSL_SP_FAST_NCT_EXPTMOD) || !defined(WOLFSSL_SP_SMALL) + +/* Always allocate large array of sp_ints unless defined WOLFSSL_SP_NO_MALLOC */ +#define SP_ALLOC + /* Internal. Exponentiates b to the power of e modulo m into r: r = b ^ e mod m * Creates a window of precalculated exponents with base in montgomery form. * Sliding window and is NOT constant time. @@ -8926,10 +8709,11 @@ static int _sp_exptmod_nct(sp_int* b, sp_int* e, sp_int* m, sp_int* r) int preCnt; int err = MP_OKAY; int done = 0; - sp_int* t = NULL; sp_int* tr = NULL; sp_int* bm = NULL; sp_int_digit mask; + /* Maximum winBits is 6 and preCnt is (1 << (winBits - 1)). */ + DECL_SP_INT_ARRAY(t, m->used * 2 + 1, (1 << 5) + 2); bits = sp_count_bits(e); @@ -8951,24 +8735,17 @@ static int _sp_exptmod_nct(sp_int* b, sp_int* e, sp_int* m, sp_int* r) preCnt = 1 << (winBits - 1); mask = preCnt - 1; - if (err == MP_OKAY) { - /* Allocate memory for window. */ - t = (sp_int*)XMALLOC(sizeof(sp_int) * (preCnt + 2), NULL, - DYNAMIC_TYPE_BIGINT); - if (t == NULL) { - err = MP_MEM; - } - } - + ALLOC_SP_INT_ARRAY(t, m->used * 2 + 1, preCnt + 2, err, NULL); if (err == MP_OKAY) { /* Initialize window numbers and temporary result. */ - tr = t + preCnt; - bm = t + preCnt + 1; + tr = t[preCnt + 0]; + bm = t[preCnt + 1]; + for (i = 0; i < preCnt; i++) { - sp_init(&t[i]); + sp_init_size(t[i], m->used * 2 + 1); } - sp_init(tr); - sp_init(bm); + sp_init_size(tr, m->used * 2 + 1); + sp_init_size(bm, m->used * 2 + 1); /* Ensure base is less than exponent. */ if (_sp_cmp(b, m) != MP_LT) { @@ -8989,24 +8766,24 @@ static int _sp_exptmod_nct(sp_int* b, sp_int* e, sp_int* m, sp_int* r) err = sp_mont_setup(m, &mp); if (err == MP_OKAY) { - err = sp_mont_norm(&t[0], m); + err = sp_mont_norm(t[0], m); } if (err == MP_OKAY) { - err = sp_mulmod(bm, &t[0], m, bm); + err = sp_mulmod(bm, t[0], m, bm); } if (err == MP_OKAY) { - err = sp_copy(bm, &t[0]); + err = sp_copy(bm, t[0]); } for (i = 1; (i < winBits) && (err == MP_OKAY); i++) { - err = sp_sqr(&t[0], &t[0]); + err = sp_sqr(t[0], t[0]); if (err == MP_OKAY) { - err = _sp_mont_red(&t[0], m, mp); + err = _sp_mont_red(t[0], m, mp); } } for (i = 1; (i < preCnt) && (err == MP_OKAY); i++) { - err = sp_mul(&t[i-1], bm, &t[i]); + err = sp_mul(t[i-1], bm, t[i]); if (err == MP_OKAY) { - err = _sp_mont_red(&t[i], m, mp); + err = _sp_mont_red(t[i], m, mp); } } @@ -9038,7 +8815,7 @@ static int _sp_exptmod_nct(sp_int* b, sp_int* e, sp_int* m, sp_int* r) n <<= winBits; c -= winBits; } - err = sp_copy(&t[y], tr); + err = sp_copy(t[y], tr); } else { /* 1 in Montgomery form. */ @@ -9114,7 +8891,7 @@ static int _sp_exptmod_nct(sp_int* b, sp_int* e, sp_int* m, sp_int* r) } /* Multiply by window number for next set of bits. */ if (err == MP_OKAY) { - err = sp_mul(tr, &t[y], tr); + err = sp_mul(tr, t[y], tr); } if (err == MP_OKAY) { err = _sp_mont_red(tr, m, mp); @@ -9149,11 +8926,12 @@ static int _sp_exptmod_nct(sp_int* b, sp_int* e, sp_int* m, sp_int* r) err = sp_copy(tr, r); } - if (t != NULL) { - XFREE(t, NULL, DYNAMIC_TYPE_BIGINT); - } + FREE_SP_INT_ARRAY(t, NULL); return err; } + +#undef SP_ALLOC + #else /* Exponentiates b to the power of e modulo m into r: r = b ^ e mod m * Non-constant time implementation. @@ -9174,86 +8952,70 @@ static int _sp_exptmod_nct(sp_int* b, sp_int* e, sp_int* m, sp_int* r) int done = 0; int y; int bits = sp_count_bits(e); -#ifdef WOLFSSL_SMALL_STACK - sp_int* t = NULL; -#else - sp_int t[2]; -#endif sp_int_digit mp; + DECL_SP_INT_ARRAY(t, m->used * 2 + 1, 2); -#ifdef WOLFSSL_SMALL_STACK + ALLOC_SP_INT_ARRAY(t, m->used * 2 + 1, 2, err, NULL); if (err == MP_OKAY) { - t = (sp_int*)XMALLOC(sizeof(sp_int) * 2, NULL, DYNAMIC_TYPE_BIGINT); - if (t == NULL) { - err = MP_MEM; - } - } -#endif - - if (err == MP_OKAY) { - sp_init(&t[0]); - sp_init(&t[1]); + sp_init_size(t[0], m->used * 2 + 1); + sp_init_size(t[1], m->used * 2 + 1); /* Ensure base is less than exponent. */ if (_sp_cmp(b, m) != MP_LT) { - err = sp_mod(b, m, &t[0]); - if ((err == MP_OKAY) && sp_iszero(&t[0])) { + err = sp_mod(b, m, t[0]); + if ((err == MP_OKAY) && sp_iszero(t[0])) { sp_set(r, 0); done = 1; } } else { - err = sp_copy(b, &t[0]); + err = sp_copy(b, t[0]); } } if ((!done) && (err == MP_OKAY)) { err = sp_mont_setup(m, &mp); if (err == MP_OKAY) { - err = sp_mont_norm(&t[1], m); + err = sp_mont_norm(t[1], m); } if (err == MP_OKAY) { /* Convert to montgomery form. */ - err = sp_mulmod(&t[0], &t[1], m, &t[0]); + err = sp_mulmod(t[0], t[1], m, t[0]); } if (err == MP_OKAY) { /* Montgomert form of base to multiply by. */ - sp_copy(&t[0], &t[1]); + sp_copy(t[0], t[1]); } for (i = bits - 2; (err == MP_OKAY) && (i >= 0); i--) { - err = sp_sqr(&t[0], &t[0]); + err = sp_sqr(t[0], t[0]); if (err == MP_OKAY) { - err = _sp_mont_red(&t[0], m, mp); + err = _sp_mont_red(t[0], m, mp); } if (err == MP_OKAY) { y = (e->dp[i >> SP_WORD_SHIFT] >> (i & SP_WORD_MASK)) & 1; if (y != 0) { - err = sp_mul(&t[0], &t[1], &t[0]); + err = sp_mul(t[0], t[1], t[0]); if (err == MP_OKAY) { - err = _sp_mont_red(&t[0], m, mp); + err = _sp_mont_red(t[0], m, mp); } } } } if (err == MP_OKAY) { /* Convert from montgomery form. */ - err = _sp_mont_red(&t[0], m, mp); + err = _sp_mont_red(t[0], m, mp); /* Reduction implementation returns number to range < m. */ } } if ((!done) && (err == MP_OKAY)) { - err = sp_copy(&t[0], r); + err = sp_copy(t[0], r); } -#ifdef WOLFSSL_SMALL_STACK - if (t != NULL) { - XFREE(t, NULL, DYNAMIC_TYPE_BIGINT); - } -#endif + FREE_SP_INT_ARRAY(t, NULL); return err; } -#endif /* WOLFSSL_SP_SMALL */ +#endif /* WOLFSSL_SP_FAST_NCT_EXPTMOD || !WOLFSSL_SP_SMALL */ /* Exponentiates b to the power of e modulo m into r: r = b ^ e mod m * Non-constant time implementation. @@ -9491,19 +9253,21 @@ int sp_mul_2d(sp_int* a, int e, sp_int* r) int i; int j; int k; - #ifdef WOLFSSL_SMALL_STACK - sp_int* t = NULL; + #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_int_digit* t = NULL; + #elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \ + defined(WOLFSSL_SP_SMALL) + sp_int_digit t[a->used * 2]; #else - sp_int t[1]; + sp_int_digit t[SP_INT_DIGITS]; #endif - #ifdef WOLFSSL_SMALL_STACK - if (err == MP_OKAY) { - t = (sp_int*)XMALLOC(sizeof(sp_int), NULL, DYNAMIC_TYPE_BIGINT); - if (t == NULL) { - err = MP_MEM; - } - } + #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * (a->used * 2), NULL, + DYNAMIC_TYPE_BIGINT); + if (t == NULL) { + err = MP_MEM; + } #endif if ((err == MP_OKAY) && (a->used <= 1)) { sp_int_digit l, h; @@ -9511,9 +9275,8 @@ int sp_mul_2d(sp_int* a, int e, sp_int* r) h = 0; l = 0; SP_ASM_SQR(h, l, a->dp[0]); - t->dp[0] = h; - t->dp[1] = l; - t->used = a->used * 2; + t[0] = h; + t[1] = l; } else if (err == MP_OKAY) { sp_int_digit l, h, o; @@ -9521,7 +9284,7 @@ int sp_mul_2d(sp_int* a, int e, sp_int* r) h = 0; l = 0; SP_ASM_SQR(h, l, a->dp[0]); - t->dp[0] = h; + t[0] = h; h = 0; o = 0; for (k = 1; k < (a->used + 1) / 2; k++) { @@ -9530,7 +9293,7 @@ int sp_mul_2d(sp_int* a, int e, sp_int* r) for (; (j >= 0); i++, j--) { SP_ASM_MUL_ADD2(l, h, o, a->dp[i], a->dp[j]); } - t->dp[k * 2 - 1] = l; + t[k * 2 - 1] = l; l = h; h = o; o = 0; @@ -9541,7 +9304,7 @@ int sp_mul_2d(sp_int* a, int e, sp_int* r) for (; (j >= 0); i++, j--) { SP_ASM_MUL_ADD2(l, h, o, a->dp[i], a->dp[j]); } - t->dp[k * 2] = l; + t[k * 2] = l; l = h; h = o; o = 0; @@ -9552,7 +9315,7 @@ int sp_mul_2d(sp_int* a, int e, sp_int* r) for (; (i < a->used); i++, j--) { SP_ASM_MUL_ADD2(l, h, o, a->dp[i], a->dp[j]); } - t->dp[k * 2 - 1] = l; + t[k * 2 - 1] = l; l = h; h = o; o = 0; @@ -9563,28 +9326,25 @@ int sp_mul_2d(sp_int* a, int e, sp_int* r) for (; (i < a->used); i++, j--) { SP_ASM_MUL_ADD2(l, h, o, a->dp[i], a->dp[j]); } - t->dp[k * 2] = l; + t[k * 2] = l; l = h; h = o; o = 0; } - t->dp[k * 2 - 1] = l; - t->dp[k * 2] = h; - t->used = a->used * 2; + t[k * 2 - 1] = l; } if (err == MP_OKAY) { - err = sp_copy(t, r); - } - if (err == MP_OKAY) { + r->used = a->used * 2; + XMEMCPY(r->dp, t, r->used * sizeof(sp_int_digit)); sp_clamp(r); } - #ifdef WOLFSSL_SMALL_STACK - if (t != NULL) + #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + if (t != NULL) { XFREE(t, NULL, DYNAMIC_TYPE_BIGINT); + } #endif - return err; } #else /* !SQR_MUL_ASM */ @@ -9602,21 +9362,22 @@ int sp_mul_2d(sp_int* a, int e, sp_int* r) int i; int j; int k; - #ifdef WOLFSSL_SMALL_STACK - sp_int* t = NULL; + #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_int_digit* t = NULL; + #elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \ + defined(WOLFSSL_SP_SMALL) + sp_int_digit t[a->used * 2]; #else - sp_int t[1]; + sp_int_digit t[SP_INT_DIGITS]; #endif - #ifdef WOLFSSL_SMALL_STACK - if (err == MP_OKAY) { - t = (sp_int*)XMALLOC(sizeof(sp_int), NULL, DYNAMIC_TYPE_BIGINT); - if (t == NULL) { - err = MP_MEM; - } - } + #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * (a->used * 2), NULL, + DYNAMIC_TYPE_BIGINT); + if (t == NULL) { + err = MP_MEM; + } #endif - if (err == MP_OKAY) { sp_int_word w; sp_int_word l; @@ -9626,7 +9387,7 @@ int sp_mul_2d(sp_int* a, int e, sp_int* r) #endif w = (sp_int_word)a->dp[0] * a->dp[0]; - t->dp[0] = (sp_int_digit)w; + t[0] = (sp_int_digit)w; l = (sp_int_digit)(w >> SP_WORD_SIZE); h = 0; #ifdef SP_WORD_OVERFLOW @@ -9665,7 +9426,7 @@ int sp_mul_2d(sp_int* a, int e, sp_int* r) h &= SP_MASK; #endif } - t->dp[k] = (sp_int_digit)l; + t[k] = (sp_int_digit)l; l >>= SP_WORD_SIZE; l += (sp_int_digit)h; h >>= SP_WORD_SIZE; @@ -9674,16 +9435,13 @@ int sp_mul_2d(sp_int* a, int e, sp_int* r) o >>= SP_WORD_SIZE; #endif } - t->dp[k] = (sp_int_digit)l; - t->dp[k+1] = (sp_int_digit)h; - t->used = k + 2; - - err = sp_copy(t, r); - } - if (err == MP_OKAY) { + t[k] = (sp_int_digit)l; + r->used = k + 1; + XMEMCPY(r->dp, t, r->used * sizeof(sp_int_digit)); sp_clamp(r); } - #ifdef WOLFSSL_SMALL_STACK + + #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) if (t != NULL) { XFREE(t, NULL, DYNAMIC_TYPE_BIGINT); } @@ -9708,14 +9466,14 @@ int sp_mul_2d(sp_int* a, int e, sp_int* r) static int _sp_sqr_4(sp_int* a, sp_int* r) { int err = MP_OKAY; - #ifdef WOLFSSL_SMALL_STACK + #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) sp_int_word* w = NULL; #else sp_int_word w[10]; #endif sp_int_digit* da = a->dp; - #ifdef WOLFSSL_SMALL_STACK + #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) w = (sp_int_word*)XMALLOC(sizeof(sp_int_word) * 10, NULL, DYNAMIC_TYPE_BIGINT); if (w == NULL) { @@ -9795,7 +9553,7 @@ int sp_mul_2d(sp_int* a, int e, sp_int* r) sp_clamp(r); } - #ifdef WOLFSSL_SMALL_STACK + #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) if (w != NULL) { XFREE(w, NULL, DYNAMIC_TYPE_BIGINT); } @@ -9813,76 +9571,49 @@ int sp_mul_2d(sp_int* a, int e, sp_int* r) */ static int _sp_sqr_4(sp_int* a, sp_int* r) { - int err = MP_OKAY; - sp_int_digit l; - sp_int_digit h; - sp_int_digit o; - #ifdef WOLFSSL_SMALL_STACK - sp_int* t = NULL; - #else - sp_int t[1]; - #endif + sp_int_digit l = 0; + sp_int_digit h = 0; + sp_int_digit o = 0; + sp_int_digit t[4]; - #ifdef WOLFSSL_SMALL_STACK - if (err == MP_OKAY) { - t = (sp_int*)XMALLOC(sizeof(sp_int), NULL, DYNAMIC_TYPE_BIGINT); - if (t == NULL) { - err = MP_MEM; - } - } - #endif + SP_ASM_SQR(h, l, a->dp[0]); + t[0] = h; + h = 0; + SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[1]); + t[1] = l; + l = h; + h = o; + o = 0; + SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[2]); + SP_ASM_SQR_ADD(l, h, o, a->dp[1]); + t[2] = l; + l = h; + h = o; + o = 0; + SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[3]); + SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[2]); + t[3] = l; + l = h; + h = o; + o = 0; + SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[3]); + SP_ASM_SQR_ADD(l, h, o, a->dp[2]); + r->dp[4] = l; + l = h; + h = o; + o = 0; + SP_ASM_MUL_ADD2(l, h, o, a->dp[2], a->dp[3]); + r->dp[5] = l; + l = h; + h = o; + SP_ASM_SQR_ADD_NO(l, h, a->dp[3]); + r->dp[6] = l; + r->dp[7] = h; + XMEMCPY(r->dp, t, 4 * sizeof(sp_int_digit)); + r->used = 8; + sp_clamp(r); - if (err == MP_OKAY) { - h = 0; - l = 0; - SP_ASM_SQR(h, l, a->dp[0]); - t->dp[0] = h; - h = 0; - o = 0; - SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[1]); - t->dp[1] = l; - l = h; - h = o; - o = 0; - SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[2]); - SP_ASM_SQR_ADD(l, h, o, a->dp[1]); - t->dp[2] = l; - l = h; - h = o; - o = 0; - SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[3]); - SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[2]); - t->dp[3] = l; - l = h; - h = o; - o = 0; - SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[3]); - SP_ASM_SQR_ADD(l, h, o, a->dp[2]); - t->dp[4] = l; - l = h; - h = o; - o = 0; - SP_ASM_MUL_ADD2(l, h, o, a->dp[2], a->dp[3]); - t->dp[5] = l; - l = h; - h = o; - SP_ASM_SQR_ADD_NO(l, h, a->dp[3]); - t->dp[6] = l; - t->dp[7] = h; - t->used = 8; - err = sp_copy(t, r); - } - if (err == MP_OKAY) { - sp_clamp(r); - } - - #ifdef WOLFSSL_SMALL_STACK - if (t != NULL) { - XFREE(t, NULL, DYNAMIC_TYPE_BIGINT); - } - #endif - - return err; + return MP_OKAY; } #endif /* SQR_MUL_ASM */ #endif /* SP_WORD_SIZE == 64 */ @@ -9898,109 +9629,80 @@ int sp_mul_2d(sp_int* a, int e, sp_int* r) */ static int _sp_sqr_6(sp_int* a, sp_int* r) { - int err = MP_OKAY; - sp_int_digit l; - sp_int_digit h; - sp_int_digit o; - sp_int_digit tl; - sp_int_digit th; + sp_int_digit l = 0; + sp_int_digit h = 0; + sp_int_digit o = 0; + sp_int_digit tl = 0; + sp_int_digit th = 0; sp_int_digit to; - #ifdef WOLFSSL_SMALL_STACK - sp_int* t = NULL; - #else - sp_int t[1]; - #endif - tl = 0; - th = 0; + sp_int_digit t[6]; - #ifdef WOLFSSL_SMALL_STACK - if (err == MP_OKAY) { - t = (sp_int*)XMALLOC(sizeof(sp_int), NULL, DYNAMIC_TYPE_BIGINT); - if (t == NULL) { - err = MP_MEM; - } - } - #endif + SP_ASM_SQR(h, l, a->dp[0]); + t[0] = h; + h = 0; + SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[1]); + t[1] = l; + l = h; + h = o; + o = 0; + SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[2]); + SP_ASM_SQR_ADD(l, h, o, a->dp[1]); + t[2] = l; + l = h; + h = o; + o = 0; + SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[3]); + SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[2]); + t[3] = l; + l = h; + h = o; + o = 0; + SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[4]); + SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[3]); + SP_ASM_SQR_ADD(l, h, o, a->dp[2]); + t[4] = l; + l = h; + h = o; + o = 0; + SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[5]); + SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[4]); + SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[3]); + SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); + t[5] = l; + l = h; + h = o; + o = 0; + SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[5]); + SP_ASM_MUL_ADD2(l, h, o, a->dp[2], a->dp[4]); + SP_ASM_SQR_ADD(l, h, o, a->dp[3]); + r->dp[6] = l; + l = h; + h = o; + o = 0; + SP_ASM_MUL_ADD2(l, h, o, a->dp[2], a->dp[5]); + SP_ASM_MUL_ADD2(l, h, o, a->dp[3], a->dp[4]); + r->dp[7] = l; + l = h; + h = o; + o = 0; + SP_ASM_MUL_ADD2(l, h, o, a->dp[3], a->dp[5]); + SP_ASM_SQR_ADD(l, h, o, a->dp[4]); + r->dp[8] = l; + l = h; + h = o; + o = 0; + SP_ASM_MUL_ADD2(l, h, o, a->dp[4], a->dp[5]); + r->dp[9] = l; + l = h; + h = o; + SP_ASM_SQR_ADD_NO(l, h, a->dp[5]); + r->dp[10] = l; + r->dp[11] = h; + XMEMCPY(r->dp, t, 6 * sizeof(sp_int_digit)); + r->used = 12; + sp_clamp(r); - if (err == MP_OKAY) { - h = 0; - l = 0; - SP_ASM_SQR(h, l, a->dp[0]); - t->dp[0] = h; - h = 0; - o = 0; - SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[1]); - t->dp[1] = l; - l = h; - h = o; - o = 0; - SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[2]); - SP_ASM_SQR_ADD(l, h, o, a->dp[1]); - t->dp[2] = l; - l = h; - h = o; - o = 0; - SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[3]); - SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[2]); - t->dp[3] = l; - l = h; - h = o; - o = 0; - SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[4]); - SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[3]); - SP_ASM_SQR_ADD(l, h, o, a->dp[2]); - t->dp[4] = l; - l = h; - h = o; - o = 0; - SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[5]); - SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[4]); - SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[3]); - SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); - t->dp[5] = l; - l = h; - h = o; - o = 0; - SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[5]); - SP_ASM_MUL_ADD2(l, h, o, a->dp[2], a->dp[4]); - SP_ASM_SQR_ADD(l, h, o, a->dp[3]); - t->dp[6] = l; - l = h; - h = o; - o = 0; - SP_ASM_MUL_ADD2(l, h, o, a->dp[2], a->dp[5]); - SP_ASM_MUL_ADD2(l, h, o, a->dp[3], a->dp[4]); - t->dp[7] = l; - l = h; - h = o; - o = 0; - SP_ASM_MUL_ADD2(l, h, o, a->dp[3], a->dp[5]); - SP_ASM_SQR_ADD(l, h, o, a->dp[4]); - t->dp[8] = l; - l = h; - h = o; - o = 0; - SP_ASM_MUL_ADD2(l, h, o, a->dp[4], a->dp[5]); - t->dp[9] = l; - l = h; - h = o; - SP_ASM_SQR_ADD_NO(l, h, a->dp[5]); - t->dp[10] = l; - t->dp[11] = h; - t->used = 12; - err = sp_copy(t, r); - } - if (err == MP_OKAY) { - sp_clamp(r); - } - - #ifdef WOLFSSL_SMALL_STACK - if (t != NULL) { - XFREE(t, NULL, DYNAMIC_TYPE_BIGINT); - } - #endif - - return err; + return MP_OKAY; } #endif /* SQR_MUL_ASM */ #endif /* SP_WORD_SIZE == 64 */ @@ -10016,144 +9718,115 @@ int sp_mul_2d(sp_int* a, int e, sp_int* r) */ static int _sp_sqr_8(sp_int* a, sp_int* r) { - int err = MP_OKAY; - sp_int_digit l; - sp_int_digit h; - sp_int_digit o; - sp_int_digit tl; - sp_int_digit th; + sp_int_digit l = 0; + sp_int_digit h = 0; + sp_int_digit o = 0; + sp_int_digit tl = 0; + sp_int_digit th = 0; sp_int_digit to; - #ifdef WOLFSSL_SMALL_STACK - sp_int* t = NULL; - #else - sp_int t[1]; - #endif - tl = 0; - th = 0; + sp_int_digit t[8]; - #ifdef WOLFSSL_SMALL_STACK - if (err == MP_OKAY) { - t = (sp_int*)XMALLOC(sizeof(sp_int), NULL, DYNAMIC_TYPE_BIGINT); - if (t == NULL) { - err = MP_MEM; - } - } - #endif + SP_ASM_SQR(h, l, a->dp[0]); + t[0] = h; + h = 0; + SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[1]); + t[1] = l; + l = h; + h = o; + o = 0; + SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[2]); + SP_ASM_SQR_ADD(l, h, o, a->dp[1]); + t[2] = l; + l = h; + h = o; + o = 0; + SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[3]); + SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[2]); + t[3] = l; + l = h; + h = o; + o = 0; + SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[4]); + SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[3]); + SP_ASM_SQR_ADD(l, h, o, a->dp[2]); + t[4] = l; + l = h; + h = o; + o = 0; + SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[5]); + SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[4]); + SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[3]); + SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); + t[5] = l; + l = h; + h = o; + o = 0; + SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[6]); + SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[5]); + SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[4]); + SP_ASM_SQR_ADD(l, h, o, a->dp[3]); + SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); + t[6] = l; + l = h; + h = o; + o = 0; + SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[7]); + SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[6]); + SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[5]); + SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[4]); + SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); + t[7] = l; + l = h; + h = o; + o = 0; + SP_ASM_MUL_SET(tl, th, to, a->dp[1], a->dp[7]); + SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[6]); + SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[5]); + SP_ASM_SQR_ADD(l, h, o, a->dp[4]); + SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); + r->dp[8] = l; + l = h; + h = o; + o = 0; + SP_ASM_MUL_SET(tl, th, to, a->dp[2], a->dp[7]); + SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[6]); + SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[5]); + SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); + r->dp[9] = l; + l = h; + h = o; + o = 0; + SP_ASM_MUL_ADD2(l, h, o, a->dp[3], a->dp[7]); + SP_ASM_MUL_ADD2(l, h, o, a->dp[4], a->dp[6]); + SP_ASM_SQR_ADD(l, h, o, a->dp[5]); + r->dp[10] = l; + l = h; + h = o; + o = 0; + SP_ASM_MUL_ADD2(l, h, o, a->dp[4], a->dp[7]); + SP_ASM_MUL_ADD2(l, h, o, a->dp[5], a->dp[6]); + r->dp[11] = l; + l = h; + h = o; + o = 0; + SP_ASM_MUL_ADD2(l, h, o, a->dp[5], a->dp[7]); + SP_ASM_SQR_ADD(l, h, o, a->dp[6]); + r->dp[12] = l; + l = h; + h = o; + o = 0; + SP_ASM_MUL_ADD2(l, h, o, a->dp[6], a->dp[7]); + r->dp[13] = l; + l = h; + h = o; + SP_ASM_SQR_ADD_NO(l, h, a->dp[7]); + r->dp[14] = l; + r->dp[15] = h; + XMEMCPY(r->dp, t, 8 * sizeof(sp_int_digit)); + r->used = 16; + sp_clamp(r); - if (err == MP_OKAY) { - h = 0; - l = 0; - SP_ASM_SQR(h, l, a->dp[0]); - t->dp[0] = h; - h = 0; - o = 0; - SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[1]); - t->dp[1] = l; - l = h; - h = o; - o = 0; - SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[2]); - SP_ASM_SQR_ADD(l, h, o, a->dp[1]); - t->dp[2] = l; - l = h; - h = o; - o = 0; - SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[3]); - SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[2]); - t->dp[3] = l; - l = h; - h = o; - o = 0; - SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[4]); - SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[3]); - SP_ASM_SQR_ADD(l, h, o, a->dp[2]); - t->dp[4] = l; - l = h; - h = o; - o = 0; - SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[5]); - SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[4]); - SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[3]); - SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); - t->dp[5] = l; - l = h; - h = o; - o = 0; - SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[6]); - SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[5]); - SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[4]); - SP_ASM_SQR_ADD(l, h, o, a->dp[3]); - SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); - t->dp[6] = l; - l = h; - h = o; - o = 0; - SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[7]); - SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[6]); - SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[5]); - SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[4]); - SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); - t->dp[7] = l; - l = h; - h = o; - o = 0; - SP_ASM_MUL_SET(tl, th, to, a->dp[1], a->dp[7]); - SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[6]); - SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[5]); - SP_ASM_SQR_ADD(l, h, o, a->dp[4]); - SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); - t->dp[8] = l; - l = h; - h = o; - o = 0; - SP_ASM_MUL_SET(tl, th, to, a->dp[2], a->dp[7]); - SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[6]); - SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[5]); - SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); - t->dp[9] = l; - l = h; - h = o; - o = 0; - SP_ASM_MUL_ADD2(l, h, o, a->dp[3], a->dp[7]); - SP_ASM_MUL_ADD2(l, h, o, a->dp[4], a->dp[6]); - SP_ASM_SQR_ADD(l, h, o, a->dp[5]); - t->dp[10] = l; - l = h; - h = o; - o = 0; - SP_ASM_MUL_ADD2(l, h, o, a->dp[4], a->dp[7]); - SP_ASM_MUL_ADD2(l, h, o, a->dp[5], a->dp[6]); - t->dp[11] = l; - l = h; - h = o; - o = 0; - SP_ASM_MUL_ADD2(l, h, o, a->dp[5], a->dp[7]); - SP_ASM_SQR_ADD(l, h, o, a->dp[6]); - t->dp[12] = l; - l = h; - h = o; - o = 0; - SP_ASM_MUL_ADD2(l, h, o, a->dp[6], a->dp[7]); - t->dp[13] = l; - l = h; - h = o; - SP_ASM_SQR_ADD_NO(l, h, a->dp[7]); - t->dp[14] = l; - t->dp[15] = h; - t->used = 16; - err = sp_copy(t, r); - } - if (err == MP_OKAY) { - sp_clamp(r); - } - - #ifdef WOLFSSL_SMALL_STACK - if (t != NULL) { - XFREE(t, NULL, DYNAMIC_TYPE_BIGINT); - } - #endif - - return err; + return MP_OKAY; } #endif /* SQR_MUL_ASM */ #endif /* SP_WORD_SIZE == 32 */ @@ -10169,226 +9842,197 @@ int sp_mul_2d(sp_int* a, int e, sp_int* r) */ static int _sp_sqr_12(sp_int* a, sp_int* r) { - int err = MP_OKAY; - sp_int_digit l; - sp_int_digit h; - sp_int_digit o; - sp_int_digit tl; - sp_int_digit th; + sp_int_digit l = 0; + sp_int_digit h = 0; + sp_int_digit o = 0; + sp_int_digit tl = 0; + sp_int_digit th = 0; sp_int_digit to; - #ifdef WOLFSSL_SMALL_STACK - sp_int* t = NULL; - #else - sp_int t[1]; - #endif - tl = 0; - th = 0; + sp_int_digit t[12]; - #ifdef WOLFSSL_SMALL_STACK - if (err == MP_OKAY) { - t = (sp_int*)XMALLOC(sizeof(sp_int), NULL, DYNAMIC_TYPE_BIGINT); - if (t == NULL) { - err = MP_MEM; - } - } - #endif + SP_ASM_SQR(h, l, a->dp[0]); + t[0] = h; + h = 0; + SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[1]); + t[1] = l; + l = h; + h = o; + o = 0; + SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[2]); + SP_ASM_SQR_ADD(l, h, o, a->dp[1]); + t[2] = l; + l = h; + h = o; + o = 0; + SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[3]); + SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[2]); + t[3] = l; + l = h; + h = o; + o = 0; + SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[4]); + SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[3]); + SP_ASM_SQR_ADD(l, h, o, a->dp[2]); + t[4] = l; + l = h; + h = o; + o = 0; + SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[5]); + SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[4]); + SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[3]); + SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); + t[5] = l; + l = h; + h = o; + o = 0; + SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[6]); + SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[5]); + SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[4]); + SP_ASM_SQR_ADD(l, h, o, a->dp[3]); + SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); + t[6] = l; + l = h; + h = o; + o = 0; + SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[7]); + SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[6]); + SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[5]); + SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[4]); + SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); + t[7] = l; + l = h; + h = o; + o = 0; + SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[8]); + SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[7]); + SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[6]); + SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[5]); + SP_ASM_SQR_ADD(l, h, o, a->dp[4]); + SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); + t[8] = l; + l = h; + h = o; + o = 0; + SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[9]); + SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[8]); + SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[7]); + SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[6]); + SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[5]); + SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); + t[9] = l; + l = h; + h = o; + o = 0; + SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[10]); + SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[9]); + SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[8]); + SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[7]); + SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[6]); + SP_ASM_SQR_ADD(l, h, o, a->dp[5]); + SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); + t[10] = l; + l = h; + h = o; + o = 0; + SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[11]); + SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[10]); + SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[9]); + SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[8]); + SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[7]); + SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[6]); + SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); + t[11] = l; + l = h; + h = o; + o = 0; + SP_ASM_MUL_SET(tl, th, to, a->dp[1], a->dp[11]); + SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[10]); + SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[9]); + SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[8]); + SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[7]); + SP_ASM_SQR_ADD(l, h, o, a->dp[6]); + SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); + r->dp[12] = l; + l = h; + h = o; + o = 0; + SP_ASM_MUL_SET(tl, th, to, a->dp[2], a->dp[11]); + SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[10]); + SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[9]); + SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[8]); + SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[7]); + SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); + r->dp[13] = l; + l = h; + h = o; + o = 0; + SP_ASM_MUL_SET(tl, th, to, a->dp[3], a->dp[11]); + SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[10]); + SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[9]); + SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[8]); + SP_ASM_SQR_ADD(l, h, o, a->dp[7]); + SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); + r->dp[14] = l; + l = h; + h = o; + o = 0; + SP_ASM_MUL_SET(tl, th, to, a->dp[4], a->dp[11]); + SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[10]); + SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[9]); + SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[8]); + SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); + r->dp[15] = l; + l = h; + h = o; + o = 0; + SP_ASM_MUL_SET(tl, th, to, a->dp[5], a->dp[11]); + SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[10]); + SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[9]); + SP_ASM_SQR_ADD(l, h, o, a->dp[8]); + SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); + r->dp[16] = l; + l = h; + h = o; + o = 0; + SP_ASM_MUL_SET(tl, th, to, a->dp[6], a->dp[11]); + SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[10]); + SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[9]); + SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); + r->dp[17] = l; + l = h; + h = o; + o = 0; + SP_ASM_MUL_ADD2(l, h, o, a->dp[7], a->dp[11]); + SP_ASM_MUL_ADD2(l, h, o, a->dp[8], a->dp[10]); + SP_ASM_SQR_ADD(l, h, o, a->dp[9]); + r->dp[18] = l; + l = h; + h = o; + o = 0; + SP_ASM_MUL_ADD2(l, h, o, a->dp[8], a->dp[11]); + SP_ASM_MUL_ADD2(l, h, o, a->dp[9], a->dp[10]); + r->dp[19] = l; + l = h; + h = o; + o = 0; + SP_ASM_MUL_ADD2(l, h, o, a->dp[9], a->dp[11]); + SP_ASM_SQR_ADD(l, h, o, a->dp[10]); + r->dp[20] = l; + l = h; + h = o; + o = 0; + SP_ASM_MUL_ADD2(l, h, o, a->dp[10], a->dp[11]); + r->dp[21] = l; + l = h; + h = o; + SP_ASM_SQR_ADD_NO(l, h, a->dp[11]); + r->dp[22] = l; + r->dp[23] = h; + XMEMCPY(r->dp, t, 12 * sizeof(sp_int_digit)); + r->used = 24; + sp_clamp(r); - if (err == MP_OKAY) { - h = 0; - l = 0; - SP_ASM_SQR(h, l, a->dp[0]); - t->dp[0] = h; - h = 0; - o = 0; - SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[1]); - t->dp[1] = l; - l = h; - h = o; - o = 0; - SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[2]); - SP_ASM_SQR_ADD(l, h, o, a->dp[1]); - t->dp[2] = l; - l = h; - h = o; - o = 0; - SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[3]); - SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[2]); - t->dp[3] = l; - l = h; - h = o; - o = 0; - SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[4]); - SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[3]); - SP_ASM_SQR_ADD(l, h, o, a->dp[2]); - t->dp[4] = l; - l = h; - h = o; - o = 0; - SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[5]); - SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[4]); - SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[3]); - SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); - t->dp[5] = l; - l = h; - h = o; - o = 0; - SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[6]); - SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[5]); - SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[4]); - SP_ASM_SQR_ADD(l, h, o, a->dp[3]); - SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); - t->dp[6] = l; - l = h; - h = o; - o = 0; - SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[7]); - SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[6]); - SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[5]); - SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[4]); - SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); - t->dp[7] = l; - l = h; - h = o; - o = 0; - SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[8]); - SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[7]); - SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[6]); - SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[5]); - SP_ASM_SQR_ADD(l, h, o, a->dp[4]); - SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); - t->dp[8] = l; - l = h; - h = o; - o = 0; - SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[9]); - SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[8]); - SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[7]); - SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[6]); - SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[5]); - SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); - t->dp[9] = l; - l = h; - h = o; - o = 0; - SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[10]); - SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[9]); - SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[8]); - SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[7]); - SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[6]); - SP_ASM_SQR_ADD(l, h, o, a->dp[5]); - SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); - t->dp[10] = l; - l = h; - h = o; - o = 0; - SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[11]); - SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[10]); - SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[9]); - SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[8]); - SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[7]); - SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[6]); - SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); - t->dp[11] = l; - l = h; - h = o; - o = 0; - SP_ASM_MUL_SET(tl, th, to, a->dp[1], a->dp[11]); - SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[10]); - SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[9]); - SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[8]); - SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[7]); - SP_ASM_SQR_ADD(l, h, o, a->dp[6]); - SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); - t->dp[12] = l; - l = h; - h = o; - o = 0; - SP_ASM_MUL_SET(tl, th, to, a->dp[2], a->dp[11]); - SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[10]); - SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[9]); - SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[8]); - SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[7]); - SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); - t->dp[13] = l; - l = h; - h = o; - o = 0; - SP_ASM_MUL_SET(tl, th, to, a->dp[3], a->dp[11]); - SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[10]); - SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[9]); - SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[8]); - SP_ASM_SQR_ADD(l, h, o, a->dp[7]); - SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); - t->dp[14] = l; - l = h; - h = o; - o = 0; - SP_ASM_MUL_SET(tl, th, to, a->dp[4], a->dp[11]); - SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[10]); - SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[9]); - SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[8]); - SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); - t->dp[15] = l; - l = h; - h = o; - o = 0; - SP_ASM_MUL_SET(tl, th, to, a->dp[5], a->dp[11]); - SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[10]); - SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[9]); - SP_ASM_SQR_ADD(l, h, o, a->dp[8]); - SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); - t->dp[16] = l; - l = h; - h = o; - o = 0; - SP_ASM_MUL_SET(tl, th, to, a->dp[6], a->dp[11]); - SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[10]); - SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[9]); - SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); - t->dp[17] = l; - l = h; - h = o; - o = 0; - SP_ASM_MUL_ADD2(l, h, o, a->dp[7], a->dp[11]); - SP_ASM_MUL_ADD2(l, h, o, a->dp[8], a->dp[10]); - SP_ASM_SQR_ADD(l, h, o, a->dp[9]); - t->dp[18] = l; - l = h; - h = o; - o = 0; - SP_ASM_MUL_ADD2(l, h, o, a->dp[8], a->dp[11]); - SP_ASM_MUL_ADD2(l, h, o, a->dp[9], a->dp[10]); - t->dp[19] = l; - l = h; - h = o; - o = 0; - SP_ASM_MUL_ADD2(l, h, o, a->dp[9], a->dp[11]); - SP_ASM_SQR_ADD(l, h, o, a->dp[10]); - t->dp[20] = l; - l = h; - h = o; - o = 0; - SP_ASM_MUL_ADD2(l, h, o, a->dp[10], a->dp[11]); - t->dp[21] = l; - l = h; - h = o; - SP_ASM_SQR_ADD_NO(l, h, a->dp[11]); - t->dp[22] = l; - t->dp[23] = h; - t->used = 24; - err = sp_copy(t, r); - } - if (err == MP_OKAY) { - sp_clamp(r); - } - - #ifdef WOLFSSL_SMALL_STACK - if (t != NULL) { - XFREE(t, NULL, DYNAMIC_TYPE_BIGINT); - } - #endif - - return err; + return MP_OKAY; } #endif /* SQR_MUL_ASM */ #endif /* SP_WORD_SIZE == 32 */ @@ -10407,57 +10051,50 @@ int sp_mul_2d(sp_int* a, int e, sp_int* r) static int _sp_sqr_16(sp_int* a, sp_int* r) { int err = MP_OKAY; - sp_int_digit l; - sp_int_digit h; - sp_int_digit o; - sp_int_digit tl; - sp_int_digit th; + sp_int_digit l = 0; + sp_int_digit h = 0; + sp_int_digit o = 0; + sp_int_digit tl = 0; + sp_int_digit th = 0; sp_int_digit to; - #ifdef WOLFSSL_SMALL_STACK - sp_int* t = NULL; + #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_int_digit* t = NULL; #else - sp_int t[1]; - #endif - tl = 0; - th = 0; - - #ifdef WOLFSSL_SMALL_STACK - if (err == MP_OKAY) { - t = (sp_int*)XMALLOC(sizeof(sp_int), NULL, DYNAMIC_TYPE_BIGINT); - if (t == NULL) { - err = MP_MEM; - } - } + sp_int_digit t[16]; #endif + #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * 16, NULL, + DYNAMIC_TYPE_BIGINT); + if (t == NULL) { + err = MP_MEM; + } + #endif if (err == MP_OKAY) { - h = 0; - l = 0; SP_ASM_SQR(h, l, a->dp[0]); - t->dp[0] = h; + t[0] = h; h = 0; - o = 0; SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[1]); - t->dp[1] = l; + t[1] = l; l = h; h = o; o = 0; SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[2]); SP_ASM_SQR_ADD(l, h, o, a->dp[1]); - t->dp[2] = l; + t[2] = l; l = h; h = o; o = 0; SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[3]); SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[2]); - t->dp[3] = l; + t[3] = l; l = h; h = o; o = 0; SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[4]); SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[3]); SP_ASM_SQR_ADD(l, h, o, a->dp[2]); - t->dp[4] = l; + t[4] = l; l = h; h = o; o = 0; @@ -10465,7 +10102,7 @@ int sp_mul_2d(sp_int* a, int e, sp_int* r) SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[4]); SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[3]); SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); - t->dp[5] = l; + t[5] = l; l = h; h = o; o = 0; @@ -10474,7 +10111,7 @@ int sp_mul_2d(sp_int* a, int e, sp_int* r) SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[4]); SP_ASM_SQR_ADD(l, h, o, a->dp[3]); SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); - t->dp[6] = l; + t[6] = l; l = h; h = o; o = 0; @@ -10483,7 +10120,7 @@ int sp_mul_2d(sp_int* a, int e, sp_int* r) SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[5]); SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[4]); SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); - t->dp[7] = l; + t[7] = l; l = h; h = o; o = 0; @@ -10493,7 +10130,7 @@ int sp_mul_2d(sp_int* a, int e, sp_int* r) SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[5]); SP_ASM_SQR_ADD(l, h, o, a->dp[4]); SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); - t->dp[8] = l; + t[8] = l; l = h; h = o; o = 0; @@ -10503,7 +10140,7 @@ int sp_mul_2d(sp_int* a, int e, sp_int* r) SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[6]); SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[5]); SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); - t->dp[9] = l; + t[9] = l; l = h; h = o; o = 0; @@ -10514,7 +10151,7 @@ int sp_mul_2d(sp_int* a, int e, sp_int* r) SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[6]); SP_ASM_SQR_ADD(l, h, o, a->dp[5]); SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); - t->dp[10] = l; + t[10] = l; l = h; h = o; o = 0; @@ -10525,7 +10162,7 @@ int sp_mul_2d(sp_int* a, int e, sp_int* r) SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[7]); SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[6]); SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); - t->dp[11] = l; + t[11] = l; l = h; h = o; o = 0; @@ -10537,7 +10174,7 @@ int sp_mul_2d(sp_int* a, int e, sp_int* r) SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[7]); SP_ASM_SQR_ADD(l, h, o, a->dp[6]); SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); - t->dp[12] = l; + t[12] = l; l = h; h = o; o = 0; @@ -10549,7 +10186,7 @@ int sp_mul_2d(sp_int* a, int e, sp_int* r) SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[8]); SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[7]); SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); - t->dp[13] = l; + t[13] = l; l = h; h = o; o = 0; @@ -10562,7 +10199,7 @@ int sp_mul_2d(sp_int* a, int e, sp_int* r) SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[8]); SP_ASM_SQR_ADD(l, h, o, a->dp[7]); SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); - t->dp[14] = l; + t[14] = l; l = h; h = o; o = 0; @@ -10575,7 +10212,7 @@ int sp_mul_2d(sp_int* a, int e, sp_int* r) SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[9]); SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[8]); SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); - t->dp[15] = l; + t[15] = l; l = h; h = o; o = 0; @@ -10588,7 +10225,7 @@ int sp_mul_2d(sp_int* a, int e, sp_int* r) SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[9]); SP_ASM_SQR_ADD(l, h, o, a->dp[8]); SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); - t->dp[16] = l; + r->dp[16] = l; l = h; h = o; o = 0; @@ -10600,7 +10237,7 @@ int sp_mul_2d(sp_int* a, int e, sp_int* r) SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[10]); SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[9]); SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); - t->dp[17] = l; + r->dp[17] = l; l = h; h = o; o = 0; @@ -10612,7 +10249,7 @@ int sp_mul_2d(sp_int* a, int e, sp_int* r) SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[10]); SP_ASM_SQR_ADD(l, h, o, a->dp[9]); SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); - t->dp[18] = l; + r->dp[18] = l; l = h; h = o; o = 0; @@ -10623,7 +10260,7 @@ int sp_mul_2d(sp_int* a, int e, sp_int* r) SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[11]); SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[10]); SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); - t->dp[19] = l; + r->dp[19] = l; l = h; h = o; o = 0; @@ -10634,7 +10271,7 @@ int sp_mul_2d(sp_int* a, int e, sp_int* r) SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[11]); SP_ASM_SQR_ADD(l, h, o, a->dp[10]); SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); - t->dp[20] = l; + r->dp[20] = l; l = h; h = o; o = 0; @@ -10644,7 +10281,7 @@ int sp_mul_2d(sp_int* a, int e, sp_int* r) SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[12]); SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[11]); SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); - t->dp[21] = l; + r->dp[21] = l; l = h; h = o; o = 0; @@ -10654,7 +10291,7 @@ int sp_mul_2d(sp_int* a, int e, sp_int* r) SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[12]); SP_ASM_SQR_ADD(l, h, o, a->dp[11]); SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); - t->dp[22] = l; + r->dp[22] = l; l = h; h = o; o = 0; @@ -10663,7 +10300,7 @@ int sp_mul_2d(sp_int* a, int e, sp_int* r) SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[13]); SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[12]); SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); - t->dp[23] = l; + r->dp[23] = l; l = h; h = o; o = 0; @@ -10672,7 +10309,7 @@ int sp_mul_2d(sp_int* a, int e, sp_int* r) SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[13]); SP_ASM_SQR_ADD(l, h, o, a->dp[12]); SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); - t->dp[24] = l; + r->dp[24] = l; l = h; h = o; o = 0; @@ -10680,49 +10317,46 @@ int sp_mul_2d(sp_int* a, int e, sp_int* r) SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[14]); SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[13]); SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); - t->dp[25] = l; + r->dp[25] = l; l = h; h = o; o = 0; SP_ASM_MUL_ADD2(l, h, o, a->dp[11], a->dp[15]); SP_ASM_MUL_ADD2(l, h, o, a->dp[12], a->dp[14]); SP_ASM_SQR_ADD(l, h, o, a->dp[13]); - t->dp[26] = l; + r->dp[26] = l; l = h; h = o; o = 0; SP_ASM_MUL_ADD2(l, h, o, a->dp[12], a->dp[15]); SP_ASM_MUL_ADD2(l, h, o, a->dp[13], a->dp[14]); - t->dp[27] = l; + r->dp[27] = l; l = h; h = o; o = 0; SP_ASM_MUL_ADD2(l, h, o, a->dp[13], a->dp[15]); SP_ASM_SQR_ADD(l, h, o, a->dp[14]); - t->dp[28] = l; + r->dp[28] = l; l = h; h = o; o = 0; SP_ASM_MUL_ADD2(l, h, o, a->dp[14], a->dp[15]); - t->dp[29] = l; + r->dp[29] = l; l = h; h = o; SP_ASM_SQR_ADD_NO(l, h, a->dp[15]); - t->dp[30] = l; - t->dp[31] = h; - t->used = 32; - err = sp_copy(t, r); - } - if (err == MP_OKAY) { + r->dp[30] = l; + r->dp[31] = h; + XMEMCPY(r->dp, t, 16 * sizeof(sp_int_digit)); + r->used = 32; sp_clamp(r); } - #ifdef WOLFSSL_SMALL_STACK + #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) if (t != NULL) { XFREE(t, NULL, DYNAMIC_TYPE_BIGINT); } #endif - return err; } #endif /* SP_INT_DIGITS >= 32 */ @@ -10739,57 +10373,50 @@ int sp_mul_2d(sp_int* a, int e, sp_int* r) static int _sp_sqr_24(sp_int* a, sp_int* r) { int err = MP_OKAY; - sp_int_digit l; - sp_int_digit h; - sp_int_digit o; - sp_int_digit tl; - sp_int_digit th; + sp_int_digit l = 0; + sp_int_digit h = 0; + sp_int_digit o = 0; + sp_int_digit tl = 0; + sp_int_digit th = 0; sp_int_digit to; - #ifdef WOLFSSL_SMALL_STACK - sp_int* t = NULL; + #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_int_digit* t = NULL; #else - sp_int t[1]; - #endif - tl = 0; - th = 0; - - #ifdef WOLFSSL_SMALL_STACK - if (err == MP_OKAY) { - t = (sp_int*)XMALLOC(sizeof(sp_int), NULL, DYNAMIC_TYPE_BIGINT); - if (t == NULL) { - err = MP_MEM; - } - } + sp_int_digit t[24]; #endif + #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * 24, NULL, + DYNAMIC_TYPE_BIGINT); + if (t == NULL) { + err = MP_MEM; + } + #endif if (err == MP_OKAY) { - h = 0; - l = 0; SP_ASM_SQR(h, l, a->dp[0]); - t->dp[0] = h; + t[0] = h; h = 0; - o = 0; SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[1]); - t->dp[1] = l; + t[1] = l; l = h; h = o; o = 0; SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[2]); SP_ASM_SQR_ADD(l, h, o, a->dp[1]); - t->dp[2] = l; + t[2] = l; l = h; h = o; o = 0; SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[3]); SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[2]); - t->dp[3] = l; + t[3] = l; l = h; h = o; o = 0; SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[4]); SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[3]); SP_ASM_SQR_ADD(l, h, o, a->dp[2]); - t->dp[4] = l; + t[4] = l; l = h; h = o; o = 0; @@ -10797,7 +10424,7 @@ int sp_mul_2d(sp_int* a, int e, sp_int* r) SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[4]); SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[3]); SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); - t->dp[5] = l; + t[5] = l; l = h; h = o; o = 0; @@ -10806,7 +10433,7 @@ int sp_mul_2d(sp_int* a, int e, sp_int* r) SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[4]); SP_ASM_SQR_ADD(l, h, o, a->dp[3]); SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); - t->dp[6] = l; + t[6] = l; l = h; h = o; o = 0; @@ -10815,7 +10442,7 @@ int sp_mul_2d(sp_int* a, int e, sp_int* r) SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[5]); SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[4]); SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); - t->dp[7] = l; + t[7] = l; l = h; h = o; o = 0; @@ -10825,7 +10452,7 @@ int sp_mul_2d(sp_int* a, int e, sp_int* r) SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[5]); SP_ASM_SQR_ADD(l, h, o, a->dp[4]); SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); - t->dp[8] = l; + t[8] = l; l = h; h = o; o = 0; @@ -10835,7 +10462,7 @@ int sp_mul_2d(sp_int* a, int e, sp_int* r) SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[6]); SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[5]); SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); - t->dp[9] = l; + t[9] = l; l = h; h = o; o = 0; @@ -10846,7 +10473,7 @@ int sp_mul_2d(sp_int* a, int e, sp_int* r) SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[6]); SP_ASM_SQR_ADD(l, h, o, a->dp[5]); SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); - t->dp[10] = l; + t[10] = l; l = h; h = o; o = 0; @@ -10857,7 +10484,7 @@ int sp_mul_2d(sp_int* a, int e, sp_int* r) SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[7]); SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[6]); SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); - t->dp[11] = l; + t[11] = l; l = h; h = o; o = 0; @@ -10869,7 +10496,7 @@ int sp_mul_2d(sp_int* a, int e, sp_int* r) SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[7]); SP_ASM_SQR_ADD(l, h, o, a->dp[6]); SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); - t->dp[12] = l; + t[12] = l; l = h; h = o; o = 0; @@ -10881,7 +10508,7 @@ int sp_mul_2d(sp_int* a, int e, sp_int* r) SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[8]); SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[7]); SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); - t->dp[13] = l; + t[13] = l; l = h; h = o; o = 0; @@ -10894,7 +10521,7 @@ int sp_mul_2d(sp_int* a, int e, sp_int* r) SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[8]); SP_ASM_SQR_ADD(l, h, o, a->dp[7]); SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); - t->dp[14] = l; + t[14] = l; l = h; h = o; o = 0; @@ -10907,7 +10534,7 @@ int sp_mul_2d(sp_int* a, int e, sp_int* r) SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[9]); SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[8]); SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); - t->dp[15] = l; + t[15] = l; l = h; h = o; o = 0; @@ -10921,7 +10548,7 @@ int sp_mul_2d(sp_int* a, int e, sp_int* r) SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[9]); SP_ASM_SQR_ADD(l, h, o, a->dp[8]); SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); - t->dp[16] = l; + t[16] = l; l = h; h = o; o = 0; @@ -10935,7 +10562,7 @@ int sp_mul_2d(sp_int* a, int e, sp_int* r) SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[10]); SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[9]); SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); - t->dp[17] = l; + t[17] = l; l = h; h = o; o = 0; @@ -10950,7 +10577,7 @@ int sp_mul_2d(sp_int* a, int e, sp_int* r) SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[10]); SP_ASM_SQR_ADD(l, h, o, a->dp[9]); SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); - t->dp[18] = l; + t[18] = l; l = h; h = o; o = 0; @@ -10965,7 +10592,7 @@ int sp_mul_2d(sp_int* a, int e, sp_int* r) SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[11]); SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[10]); SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); - t->dp[19] = l; + t[19] = l; l = h; h = o; o = 0; @@ -10981,7 +10608,7 @@ int sp_mul_2d(sp_int* a, int e, sp_int* r) SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[11]); SP_ASM_SQR_ADD(l, h, o, a->dp[10]); SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); - t->dp[20] = l; + t[20] = l; l = h; h = o; o = 0; @@ -10997,7 +10624,7 @@ int sp_mul_2d(sp_int* a, int e, sp_int* r) SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[12]); SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[11]); SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); - t->dp[21] = l; + t[21] = l; l = h; h = o; o = 0; @@ -11014,7 +10641,7 @@ int sp_mul_2d(sp_int* a, int e, sp_int* r) SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[12]); SP_ASM_SQR_ADD(l, h, o, a->dp[11]); SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); - t->dp[22] = l; + t[22] = l; l = h; h = o; o = 0; @@ -11031,7 +10658,7 @@ int sp_mul_2d(sp_int* a, int e, sp_int* r) SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[13]); SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[12]); SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); - t->dp[23] = l; + t[23] = l; l = h; h = o; o = 0; @@ -11048,7 +10675,7 @@ int sp_mul_2d(sp_int* a, int e, sp_int* r) SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[13]); SP_ASM_SQR_ADD(l, h, o, a->dp[12]); SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); - t->dp[24] = l; + r->dp[24] = l; l = h; h = o; o = 0; @@ -11064,7 +10691,7 @@ int sp_mul_2d(sp_int* a, int e, sp_int* r) SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[14]); SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[13]); SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); - t->dp[25] = l; + r->dp[25] = l; l = h; h = o; o = 0; @@ -11080,7 +10707,7 @@ int sp_mul_2d(sp_int* a, int e, sp_int* r) SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[14]); SP_ASM_SQR_ADD(l, h, o, a->dp[13]); SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); - t->dp[26] = l; + r->dp[26] = l; l = h; h = o; o = 0; @@ -11095,7 +10722,7 @@ int sp_mul_2d(sp_int* a, int e, sp_int* r) SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[15]); SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[14]); SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); - t->dp[27] = l; + r->dp[27] = l; l = h; h = o; o = 0; @@ -11110,7 +10737,7 @@ int sp_mul_2d(sp_int* a, int e, sp_int* r) SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[15]); SP_ASM_SQR_ADD(l, h, o, a->dp[14]); SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); - t->dp[28] = l; + r->dp[28] = l; l = h; h = o; o = 0; @@ -11124,7 +10751,7 @@ int sp_mul_2d(sp_int* a, int e, sp_int* r) SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[16]); SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[15]); SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); - t->dp[29] = l; + r->dp[29] = l; l = h; h = o; o = 0; @@ -11138,7 +10765,7 @@ int sp_mul_2d(sp_int* a, int e, sp_int* r) SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[16]); SP_ASM_SQR_ADD(l, h, o, a->dp[15]); SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); - t->dp[30] = l; + r->dp[30] = l; l = h; h = o; o = 0; @@ -11151,7 +10778,7 @@ int sp_mul_2d(sp_int* a, int e, sp_int* r) SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[17]); SP_ASM_MUL_ADD(tl, th, to, a->dp[15], a->dp[16]); SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); - t->dp[31] = l; + r->dp[31] = l; l = h; h = o; o = 0; @@ -11164,7 +10791,7 @@ int sp_mul_2d(sp_int* a, int e, sp_int* r) SP_ASM_MUL_ADD(tl, th, to, a->dp[15], a->dp[17]); SP_ASM_SQR_ADD(l, h, o, a->dp[16]); SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); - t->dp[32] = l; + r->dp[32] = l; l = h; h = o; o = 0; @@ -11176,7 +10803,7 @@ int sp_mul_2d(sp_int* a, int e, sp_int* r) SP_ASM_MUL_ADD(tl, th, to, a->dp[15], a->dp[18]); SP_ASM_MUL_ADD(tl, th, to, a->dp[16], a->dp[17]); SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); - t->dp[33] = l; + r->dp[33] = l; l = h; h = o; o = 0; @@ -11188,7 +10815,7 @@ int sp_mul_2d(sp_int* a, int e, sp_int* r) SP_ASM_MUL_ADD(tl, th, to, a->dp[16], a->dp[18]); SP_ASM_SQR_ADD(l, h, o, a->dp[17]); SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); - t->dp[34] = l; + r->dp[34] = l; l = h; h = o; o = 0; @@ -11199,7 +10826,7 @@ int sp_mul_2d(sp_int* a, int e, sp_int* r) SP_ASM_MUL_ADD(tl, th, to, a->dp[16], a->dp[19]); SP_ASM_MUL_ADD(tl, th, to, a->dp[17], a->dp[18]); SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); - t->dp[35] = l; + r->dp[35] = l; l = h; h = o; o = 0; @@ -11210,7 +10837,7 @@ int sp_mul_2d(sp_int* a, int e, sp_int* r) SP_ASM_MUL_ADD(tl, th, to, a->dp[17], a->dp[19]); SP_ASM_SQR_ADD(l, h, o, a->dp[18]); SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); - t->dp[36] = l; + r->dp[36] = l; l = h; h = o; o = 0; @@ -11220,7 +10847,7 @@ int sp_mul_2d(sp_int* a, int e, sp_int* r) SP_ASM_MUL_ADD(tl, th, to, a->dp[17], a->dp[20]); SP_ASM_MUL_ADD(tl, th, to, a->dp[18], a->dp[19]); SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); - t->dp[37] = l; + r->dp[37] = l; l = h; h = o; o = 0; @@ -11230,7 +10857,7 @@ int sp_mul_2d(sp_int* a, int e, sp_int* r) SP_ASM_MUL_ADD(tl, th, to, a->dp[18], a->dp[20]); SP_ASM_SQR_ADD(l, h, o, a->dp[19]); SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); - t->dp[38] = l; + r->dp[38] = l; l = h; h = o; o = 0; @@ -11239,7 +10866,7 @@ int sp_mul_2d(sp_int* a, int e, sp_int* r) SP_ASM_MUL_ADD(tl, th, to, a->dp[18], a->dp[21]); SP_ASM_MUL_ADD(tl, th, to, a->dp[19], a->dp[20]); SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); - t->dp[39] = l; + r->dp[39] = l; l = h; h = o; o = 0; @@ -11248,7 +10875,7 @@ int sp_mul_2d(sp_int* a, int e, sp_int* r) SP_ASM_MUL_ADD(tl, th, to, a->dp[19], a->dp[21]); SP_ASM_SQR_ADD(l, h, o, a->dp[20]); SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); - t->dp[40] = l; + r->dp[40] = l; l = h; h = o; o = 0; @@ -11256,49 +10883,46 @@ int sp_mul_2d(sp_int* a, int e, sp_int* r) SP_ASM_MUL_ADD(tl, th, to, a->dp[19], a->dp[22]); SP_ASM_MUL_ADD(tl, th, to, a->dp[20], a->dp[21]); SP_ASM_ADD_DBL_3(l, h, o, tl, th, to); - t->dp[41] = l; + r->dp[41] = l; l = h; h = o; o = 0; SP_ASM_MUL_ADD2(l, h, o, a->dp[19], a->dp[23]); SP_ASM_MUL_ADD2(l, h, o, a->dp[20], a->dp[22]); SP_ASM_SQR_ADD(l, h, o, a->dp[21]); - t->dp[42] = l; + r->dp[42] = l; l = h; h = o; o = 0; SP_ASM_MUL_ADD2(l, h, o, a->dp[20], a->dp[23]); SP_ASM_MUL_ADD2(l, h, o, a->dp[21], a->dp[22]); - t->dp[43] = l; + r->dp[43] = l; l = h; h = o; o = 0; SP_ASM_MUL_ADD2(l, h, o, a->dp[21], a->dp[23]); SP_ASM_SQR_ADD(l, h, o, a->dp[22]); - t->dp[44] = l; + r->dp[44] = l; l = h; h = o; o = 0; SP_ASM_MUL_ADD2(l, h, o, a->dp[22], a->dp[23]); - t->dp[45] = l; + r->dp[45] = l; l = h; h = o; SP_ASM_SQR_ADD_NO(l, h, a->dp[23]); - t->dp[46] = l; - t->dp[47] = h; - t->used = 48; - err = sp_copy(t, r); - } - if (err == MP_OKAY) { + r->dp[46] = l; + r->dp[47] = h; + XMEMCPY(r->dp, t, 24 * sizeof(sp_int_digit)); + r->used = 48; sp_clamp(r); } - #ifdef WOLFSSL_SMALL_STACK + #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) if (t != NULL) { XFREE(t, NULL, DYNAMIC_TYPE_BIGINT); } #endif - return err; } #endif /* SP_INT_DIGITS >= 48 */ @@ -11318,31 +10942,19 @@ int sp_mul_2d(sp_int* a, int e, sp_int* r) int i; sp_int_digit l; sp_int_digit h; - #ifdef WOLFSSL_SMALL_STACK - sp_int* t = NULL; - #else - sp_int t[3]; - #endif - sp_int* a1; sp_int* z0; sp_int* z1; sp_int* z2; sp_int_digit ca; + DECL_SP_INT(a1, 16); + DECL_SP_INT_ARRAY(z, 33, 2); - #ifdef WOLFSSL_SMALL_STACK + ALLOC_SP_INT(a1, 16, err, NULL); + ALLOC_SP_INT_ARRAY(z, 33, 2, err, NULL); if (err == MP_OKAY) { - t = (sp_int*)XMALLOC(sizeof(sp_int) * 3, NULL, DYNAMIC_TYPE_BIGINT); - if (t == NULL) { - err = MP_MEM; - } - } - #endif - - if (err == MP_OKAY) { - a1 = &t[0]; + z1 = z[0]; + z2 = z[1]; z0 = r; - z1 = &t[1]; - z2 = &t[2]; XMEMCPY(a1->dp, &a->dp[16], sizeof(sp_int_digit) * 16); a1->used = 16; @@ -11374,6 +10986,7 @@ int sp_mul_2d(sp_int* a, int e, sp_int* r) /* r = z0 */ /* r += (z1 - z0 - z2) << 16 */ z1->dp[32] = ca; + l = 0; if (ca) { l = z1->dp[0 + 16]; h = 0; @@ -11390,8 +11003,8 @@ int sp_mul_2d(sp_int* a, int e, sp_int* r) l = h; h = 0; } - z1->dp[32] += l; } + z1->dp[32] += l; /* z1 = z1 - z0 - z1 */ l = z1->dp[0]; h = 0; @@ -11445,12 +11058,8 @@ int sp_mul_2d(sp_int* a, int e, sp_int* r) sp_clamp(r); } - #ifdef WOLFSSL_SMALL_STACK - if (t != NULL) { - XFREE(t, NULL, DYNAMIC_TYPE_BIGINT); - } - #endif - + FREE_SP_INT_ARRAY(z, NULL); + FREE_SP_INT(a1, NULL); return err; } #endif /* SP_INT_DIGITS >= 64 */ @@ -11470,31 +11079,19 @@ int sp_mul_2d(sp_int* a, int e, sp_int* r) int i; sp_int_digit l; sp_int_digit h; - #ifdef WOLFSSL_SMALL_STACK - sp_int* t = NULL; - #else - sp_int t[3]; - #endif - sp_int* a1; sp_int* z0; sp_int* z1; sp_int* z2; sp_int_digit ca; + DECL_SP_INT(a1, 24); + DECL_SP_INT_ARRAY(z, 49, 2); - #ifdef WOLFSSL_SMALL_STACK + ALLOC_SP_INT(a1, 24, err, NULL); + ALLOC_SP_INT_ARRAY(z, 49, 2, err, NULL); if (err == MP_OKAY) { - t = (sp_int*)XMALLOC(sizeof(sp_int) * 3, NULL, DYNAMIC_TYPE_BIGINT); - if (t == NULL) { - err = MP_MEM; - } - } - #endif - - if (err == MP_OKAY) { - a1 = &t[0]; + z1 = z[0]; + z2 = z[1]; z0 = r; - z1 = &t[1]; - z2 = &t[2]; XMEMCPY(a1->dp, &a->dp[24], sizeof(sp_int_digit) * 24); a1->used = 24; @@ -11526,6 +11123,7 @@ int sp_mul_2d(sp_int* a, int e, sp_int* r) /* r = z0 */ /* r += (z1 - z0 - z2) << 24 */ z1->dp[48] = ca; + l = 0; if (ca) { l = z1->dp[0 + 24]; h = 0; @@ -11542,8 +11140,8 @@ int sp_mul_2d(sp_int* a, int e, sp_int* r) l = h; h = 0; } - z1->dp[48] += l; } + z1->dp[48] += l; /* z1 = z1 - z0 - z1 */ l = z1->dp[0]; h = 0; @@ -11597,12 +11195,8 @@ int sp_mul_2d(sp_int* a, int e, sp_int* r) sp_clamp(r); } - #ifdef WOLFSSL_SMALL_STACK - if (t != NULL) { - XFREE(t, NULL, DYNAMIC_TYPE_BIGINT); - } - #endif - + FREE_SP_INT_ARRAY(z, NULL); + FREE_SP_INT(a1, NULL); return err; } #endif /* SP_INT_DIGITS >= 96 */ @@ -11622,31 +11216,19 @@ int sp_mul_2d(sp_int* a, int e, sp_int* r) int i; sp_int_digit l; sp_int_digit h; - #ifdef WOLFSSL_SMALL_STACK - sp_int* t = NULL; - #else - sp_int t[3]; - #endif - sp_int* a1; sp_int* z0; sp_int* z1; sp_int* z2; sp_int_digit ca; + DECL_SP_INT(a1, 32); + DECL_SP_INT_ARRAY(z, 65, 2); - #ifdef WOLFSSL_SMALL_STACK + ALLOC_SP_INT(a1, 32, err, NULL); + ALLOC_SP_INT_ARRAY(z, 65, 2, err, NULL); if (err == MP_OKAY) { - t = (sp_int*)XMALLOC(sizeof(sp_int) * 3, NULL, DYNAMIC_TYPE_BIGINT); - if (t == NULL) { - err = MP_MEM; - } - } - #endif - - if (err == MP_OKAY) { - a1 = &t[0]; + z1 = z[0]; + z2 = z[1]; z0 = r; - z1 = &t[1]; - z2 = &t[2]; XMEMCPY(a1->dp, &a->dp[32], sizeof(sp_int_digit) * 32); a1->used = 32; @@ -11678,6 +11260,7 @@ int sp_mul_2d(sp_int* a, int e, sp_int* r) /* r = z0 */ /* r += (z1 - z0 - z2) << 32 */ z1->dp[64] = ca; + l = 0; if (ca) { l = z1->dp[0 + 32]; h = 0; @@ -11694,8 +11277,8 @@ int sp_mul_2d(sp_int* a, int e, sp_int* r) l = h; h = 0; } - z1->dp[64] += l; } + z1->dp[64] += l; /* z1 = z1 - z0 - z1 */ l = z1->dp[0]; h = 0; @@ -11749,12 +11332,8 @@ int sp_mul_2d(sp_int* a, int e, sp_int* r) sp_clamp(r); } - #ifdef WOLFSSL_SMALL_STACK - if (t != NULL) { - XFREE(t, NULL, DYNAMIC_TYPE_BIGINT); - } - #endif - + FREE_SP_INT_ARRAY(z, NULL); + FREE_SP_INT(a1, NULL); return err; } #endif /* SP_INT_DIGITS >= 128 */ @@ -11774,31 +11353,19 @@ int sp_mul_2d(sp_int* a, int e, sp_int* r) int i; sp_int_digit l; sp_int_digit h; - #ifdef WOLFSSL_SMALL_STACK - sp_int* t = NULL; - #else - sp_int t[3]; - #endif - sp_int* a1; sp_int* z0; sp_int* z1; sp_int* z2; sp_int_digit ca; + DECL_SP_INT(a1, 48); + DECL_SP_INT_ARRAY(z, 97, 2); - #ifdef WOLFSSL_SMALL_STACK + ALLOC_SP_INT(a1, 48, err, NULL); + ALLOC_SP_INT_ARRAY(z, 97, 2, err, NULL); if (err == MP_OKAY) { - t = (sp_int*)XMALLOC(sizeof(sp_int) * 3, NULL, DYNAMIC_TYPE_BIGINT); - if (t == NULL) { - err = MP_MEM; - } - } - #endif - - if (err == MP_OKAY) { - a1 = &t[0]; + z1 = z[0]; + z2 = z[1]; z0 = r; - z1 = &t[1]; - z2 = &t[2]; XMEMCPY(a1->dp, &a->dp[48], sizeof(sp_int_digit) * 48); a1->used = 48; @@ -11830,6 +11397,7 @@ int sp_mul_2d(sp_int* a, int e, sp_int* r) /* r = z0 */ /* r += (z1 - z0 - z2) << 48 */ z1->dp[96] = ca; + l = 0; if (ca) { l = z1->dp[0 + 48]; h = 0; @@ -11846,8 +11414,8 @@ int sp_mul_2d(sp_int* a, int e, sp_int* r) l = h; h = 0; } - z1->dp[96] += l; } + z1->dp[96] += l; /* z1 = z1 - z0 - z1 */ l = z1->dp[0]; h = 0; @@ -11901,12 +11469,8 @@ int sp_mul_2d(sp_int* a, int e, sp_int* r) sp_clamp(r); } - #ifdef WOLFSSL_SMALL_STACK - if (t != NULL) { - XFREE(t, NULL, DYNAMIC_TYPE_BIGINT); - } - #endif - + FREE_SP_INT_ARRAY(z, NULL); + FREE_SP_INT(a1, NULL); return err; } #endif /* SP_INT_DIGITS >= 192 */ @@ -11935,7 +11499,7 @@ int sp_sqr(sp_int* a, sp_int* r) err = MP_VAL; } /* Need extra digit during calculation. */ - if ((err == MP_OKAY) && (a->used * 2 >= r->size)) { + if ((err == MP_OKAY) && (a->used * 2 > r->size)) { err = MP_VAL; } @@ -12062,8 +11626,7 @@ int sp_sqrmod(sp_int* a, sp_int* m, sp_int* r) if ((a == NULL) || (m == NULL) || (r == NULL)) { err = MP_VAL; } - /* Need extra digit during calculation. */ - if ((err == MP_OKAY) && (a->used * 2 >= r->size)) { + if ((err == MP_OKAY) && (a->used * 2 > r->size)) { err = MP_VAL; } @@ -12975,20 +12538,12 @@ int sp_todecimal(sp_int* a, char* str) *str = '\0'; } else { - #ifdef WOLFSSL_SMALL_STACK - sp_int* t = NULL; - #else - sp_int t[1]; - #endif /* WOLFSSL_SMALL_STACK */ + DECL_SP_INT(t, a->used + 1); - #ifdef WOLFSSL_SMALL_STACK - t = (sp_int*)XMALLOC(sizeof(sp_int), NULL, DYNAMIC_TYPE_BIGINT); - if (t == NULL) { - err = MP_MEM; - } - #endif /* WOLFSSL_SMALL_STACK */ - if (err == MP_OKAY) + ALLOC_SP_INT_SIZE(t, a->used + 1, err, NULL); + if (err == MP_OKAY) { err = sp_copy(a, t); + } if (err == MP_OKAY) { #ifdef WOLFSSL_SP_INT_NEGATIVE @@ -13012,11 +12567,7 @@ int sp_todecimal(sp_int* a, char* str) } } - #ifdef WOLFSSL_SMALL_STACK - if (t != NULL) { - XFREE(t, NULL, DYNAMIC_TYPE_BIGINT); - } - #endif /* WOLFSSL_SMALL_STACK */ + FREE_SP_INT(t, NULL); } return err; @@ -13115,21 +12666,15 @@ int sp_radix_size(sp_int* a, int radix, int* size) *size = 1 + 1; } else { - #ifdef WOLFSSL_SMALL_STACK - sp_int* t = NULL; - #else - sp_int t[1]; - #endif /* WOLFSSL_SMALL_STACK */ - #ifdef WOLFSSL_SMALL_STACK + DECL_SP_INT(t, a->used + 1); + + ALLOC_SP_INT(t, a->used + 1, err, NULL); if (err == MP_OKAY) { - t = (sp_int*)XMALLOC(sizeof(sp_int), NULL, DYNAMIC_TYPE_BIGINT); - if (t == NULL) { - err = MP_MEM; - } - } - #endif /* WOLFSSL_SMALL_STACK */ - if (err == MP_OKAY) + #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + t->size = a->used + 1; + #endif /* WOLFSSL_SMALL_STACK && !WOLFSSL_SP_NO_MALLOC */ err = sp_copy(a, t); + } if (err == MP_OKAY) { @@ -13145,11 +12690,7 @@ int sp_radix_size(sp_int* a, int radix, int* size) *size = i + 1; } - #ifdef WOLFSSL_SMALL_STACK - if (t != NULL) { - XFREE(t, NULL, DYNAMIC_TYPE_BIGINT); - } - #endif /* WOLFSSL_SMALL_STACK */ + FREE_SP_INT(t, NULL); } } #endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_KEY_GEN || HAVE_COMP_KEY */ @@ -13373,31 +12914,21 @@ static int sp_prime_miller_rabin_ex(sp_int* a, sp_int* b, int* result, static int sp_prime_miller_rabin(sp_int* a, sp_int* b, int* result) { int err = MP_OKAY; -#ifndef WOLFSSL_SMALL_STACK - sp_int n1[1]; - sp_int y[1]; - sp_int r[1]; -#else - sp_int *n1 = NULL; + sp_int *n1; sp_int *y; sp_int *r; -#endif /* WOLFSSL_SMALL_STACK */ - -#ifdef WOLFSSL_SMALL_STACK - n1 = (sp_int*)XMALLOC(sizeof(sp_int) * 3, NULL, DYNAMIC_TYPE_BIGINT); - if (n1 == NULL) { - err = MP_MEM; - } - else { - y = &n1[1]; - r = &n1[2]; - } -#endif /* WOLFSSL_SMALL_STACK */ + DECL_SP_INT_ARRAY(t, a->used * 2 + 1, 3); + ALLOC_SP_INT_ARRAY(t, a->used * 2 + 1, 3, err, NULL); if (err == MP_OKAY) { - sp_init(n1); - sp_init(y); - sp_init(r); + n1 = t[0]; + y = t[1]; + r = t[2]; + + /* Only 'y' needs to be twice as big. */ + sp_init_size(n1, a->used * 2 + 1); + sp_init_size(y, a->used * 2 + 1); + sp_init_size(r, a->used * 2 + 1); err = sp_prime_miller_rabin_ex(a, b, result, n1, y, r); @@ -13406,12 +12937,7 @@ static int sp_prime_miller_rabin(sp_int* a, sp_int* b, int* result) sp_clear(r); } -#ifdef WOLFSSL_SMALL_STACK - if (n1 != NULL) { - XFREE(n1, NULL, DYNAMIC_TYPE_BIGINT); - } -#endif /* WOLFSSL_SMALL_STACK */ - + FREE_SP_INT_ARRAY(t, NULL); return err; } @@ -13419,7 +12945,7 @@ static int sp_prime_miller_rabin(sp_int* a, sp_int* b, int* result) /* Number of pre-computed primes. First n primes - fitting in a digit. */ #define SP_PRIME_SIZE 54 -static const sp_int_digit primes[SP_PRIME_SIZE] = { +static const sp_int_digit sp_primes[SP_PRIME_SIZE] = { 0x02, 0x03, 0x05, 0x07, 0x0B, 0x0D, 0x11, 0x13, 0x17, 0x1D, 0x1F, 0x25, 0x29, 0x2B, 0x2F, 0x35, 0x3B, 0x3D, 0x43, 0x47, 0x49, 0x4F, 0x53, 0x59, @@ -13433,7 +12959,7 @@ static const sp_int_digit primes[SP_PRIME_SIZE] = { #define SP_PRIME_SIZE 256 /* The first 256 primes. */ -static const sp_int_digit primes[SP_PRIME_SIZE] = { +static const sp_int_digit sp_primes[SP_PRIME_SIZE] = { 0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013, 0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035, 0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059, @@ -13490,12 +13016,8 @@ int sp_prime_is_prime(sp_int* a, int t, int* result) int err = MP_OKAY; int i; int haveRes = 0; -#ifndef WOLFSSL_SMALL_STACK - sp_int b[1]; -#else - sp_int *b = NULL; -#endif /* WOLFSSL_SMALL_STACK */ sp_int_digit d; + DECL_SP_INT(b, 2); if ((a == NULL) || (result == NULL)) { if (result != NULL) { @@ -13517,7 +13039,7 @@ int sp_prime_is_prime(sp_int* a, int t, int* result) if ((err == MP_OKAY) && (!haveRes) && (a->used == 1)) { /* check against primes table */ for (i = 0; i < SP_PRIME_SIZE; i++) { - if (sp_cmp_d(a, primes[i]) == MP_EQ) { + if (sp_cmp_d(a, sp_primes[i]) == MP_EQ) { *result = MP_YES; haveRes = 1; break; @@ -13528,7 +13050,7 @@ int sp_prime_is_prime(sp_int* a, int t, int* result) if ((err == MP_OKAY) && (!haveRes)) { /* do trial division */ for (i = 0; i < SP_PRIME_SIZE; i++) { - err = sp_mod_d(a, primes[i], &d); + err = sp_mod_d(a, sp_primes[i], &d); if ((err != MP_OKAY) || (d == 0)) { *result = MP_NO; haveRes = 1; @@ -13537,33 +13059,22 @@ int sp_prime_is_prime(sp_int* a, int t, int* result) } } -#ifdef WOLFSSL_SMALL_STACK if ((err == MP_OKAY) && (!haveRes)) { - b = (sp_int*)XMALLOC(sizeof(sp_int), NULL, DYNAMIC_TYPE_BIGINT); - if (b == NULL) { - err = MP_MEM; - } - } -#endif /* WOLFSSL_SMALL_STACK */ - - if ((err == MP_OKAY) && (!haveRes)) { - /* now do 't' miller rabins */ - sp_init(b); - for (i = 0; i < t; i++) { - sp_set(b, primes[i]); - err = sp_prime_miller_rabin(a, b, result); - if ((err != MP_OKAY) || (*result == MP_NO)) { - break; + ALLOC_SP_INT(b, 1, err, NULL); + if (err == MP_OKAY) { + /* now do 't' miller rabins */ + sp_init_size(b, 1); + for (i = 0; i < t; i++) { + sp_set(b, sp_primes[i]); + err = sp_prime_miller_rabin(a, b, result); + if ((err != MP_OKAY) || (*result == MP_NO)) { + break; + } } } - } - -#ifdef WOLFSSL_SMALL_STACK - if (b != NULL) { - XFREE(b, NULL, DYNAMIC_TYPE_BIGINT); } -#endif + FREE_SP_INT(b, NULL); return err; } @@ -13588,19 +13099,11 @@ int sp_prime_is_prime_ex(sp_int* a, int t, int* result, WC_RNG* rng) int haveRes = 0; int i; #ifndef WC_NO_RNG - #ifndef WOLFSSL_SMALL_STACK - sp_int b[1]; - sp_int c[1]; - sp_int n1[1]; - sp_int y[1]; - sp_int r[1]; - #else - sp_int *b = NULL; - sp_int *c = NULL; - sp_int *n1 = NULL; - sp_int *y = NULL; - sp_int *r = NULL; - #endif /* WOLFSSL_SMALL_STACK */ + sp_int *b = NULL; + sp_int *c = NULL; + sp_int *n1 = NULL; + sp_int *y = NULL; + sp_int *r = NULL; #endif /* WC_NO_RNG */ if ((a == NULL) || (result == NULL) || (rng == NULL)) { @@ -13615,7 +13118,7 @@ int sp_prime_is_prime_ex(sp_int* a, int t, int* result, WC_RNG* rng) if ((err == MP_OKAY) && (!haveRes) && (a->used == 1)) { /* check against primes table */ for (i = 0; i < SP_PRIME_SIZE; i++) { - if (sp_cmp_d(a, primes[i]) == MP_EQ) { + if (sp_cmp_d(a, sp_primes[i]) == MP_EQ) { ret = MP_YES; haveRes = 1; break; @@ -13628,7 +13131,7 @@ int sp_prime_is_prime_ex(sp_int* a, int t, int* result, WC_RNG* rng) /* do trial division */ for (i = 0; i < SP_PRIME_SIZE; i++) { - err = sp_mod_d(a, primes[i], &d); + err = sp_mod_d(a, sp_primes[i], &d); if ((err != MP_OKAY) || (d == 0)) { ret = MP_NO; haveRes = 1; @@ -13640,71 +13143,62 @@ int sp_prime_is_prime_ex(sp_int* a, int t, int* result, WC_RNG* rng) #ifndef WC_NO_RNG /* now do a miller rabin with up to t random numbers, this should * give a (1/4)^t chance of a false prime. */ - #ifdef WOLFSSL_SMALL_STACK - if ((err == MP_OKAY) && (!haveRes)) { - b = (sp_int*)XMALLOC(sizeof(sp_int) * 5, NULL, DYNAMIC_TYPE_BIGINT); - if (b == NULL) { - err = MP_MEM; - } - else { - c = &b[1]; - n1 = &b[2]; - y = &b[3]; - r = &b[4]; - } - } - #endif /* WOLFSSL_SMALL_STACK */ - - if ((err == MP_OKAY) && (!haveRes)) { - sp_init(b); - sp_init(c); - sp_init(n1); - sp_init(y); - sp_init(r); - - _sp_sub_d(a, 2, c); - } - if ((err == MP_OKAY) && (!haveRes)) { int bits = sp_count_bits(a); word32 baseSz = (bits + 7) / 8; + DECL_SP_INT_ARRAY(d, a->used * 2 + 1, 5); - bits &= SP_WORD_MASK; + ALLOC_SP_INT_ARRAY(d, a->used * 2 + 1, 5, err, NULL); + if (err == MP_OKAY) { + b = d[0]; + c = d[1]; + n1 = d[2]; + y = d[3]; + r = d[4]; - while (t > 0) { - err = wc_RNG_GenerateBlock(rng, (byte*)b->dp, baseSz); - if (err != MP_OKAY) { - break; - } - b->used = a->used; - /* Ensure the top word has no more bits than necessary. */ - if (bits > 0) { - b->dp[b->used - 1] &= (1L << bits) - 1; + /* Only 'y' needs to be twice as big. */ + sp_init_size(b , a->used * 2 + 1); + sp_init_size(c , a->used * 2 + 1); + sp_init_size(n1, a->used * 2 + 1); + sp_init_size(y , a->used * 2 + 1); + sp_init_size(r , a->used * 2 + 1); + + _sp_sub_d(a, 2, c); + + bits &= SP_WORD_MASK; + + while (t > 0) { + err = wc_RNG_GenerateBlock(rng, (byte*)b->dp, baseSz); + if (err != MP_OKAY) { + break; + } + b->used = a->used; + /* Ensure the top word has no more bits than necessary. */ + if (bits > 0) { + b->dp[b->used - 1] &= (1L << bits) - 1; + } + + if ((sp_cmp_d(b, 2) != MP_GT) || (_sp_cmp(b, c) != MP_LT)) { + continue; + } + + err = sp_prime_miller_rabin_ex(a, b, &ret, n1, y, r); + if ((err != MP_OKAY) || (ret == MP_NO)) { + break; + } + + t--; } - if ((sp_cmp_d(b, 2) != MP_GT) || (_sp_cmp(b, c) != MP_LT)) { - continue; - } - - err = sp_prime_miller_rabin_ex(a, b, &ret, n1, y, r); - if ((err != MP_OKAY) || (ret == MP_NO)) { - break; - } - - t--; + sp_clear(n1); + sp_clear(y); + sp_clear(r); + sp_clear(b); + sp_clear(c); } - sp_clear(n1); - sp_clear(y); - sp_clear(r); - sp_clear(b); - sp_clear(c); + FREE_SP_INT_ARRAY(d, NULL); } - - #ifdef WOLFSSL_SMALL_STACK - if (b != NULL) - XFREE(b, NULL, DYNAMIC_TYPE_BIGINT); - #endif /* WOLFSSL_SMALL_STACK */ #else (void)t; #endif /* !WC_NO_RNG */ @@ -13748,31 +13242,20 @@ int sp_gcd(sp_int* a, sp_int* b, sp_int* r) err = sp_copy(a, r); } else { - #ifdef WOLFSSL_SMALL_STACK sp_int* u = NULL; - sp_int* v; - sp_int* t; - #else - sp_int u[1]; - sp_int v[1]; - sp_int t[1]; - #endif /* WOLFSSL_SMALL_STACK */ - - #ifdef WOLFSSL_SMALL_STACK - u = (sp_int*)XMALLOC(sizeof(sp_int) * 3, NULL, DYNAMIC_TYPE_BIGINT); - if (u == NULL) { - err = MP_MEM; - } - else { - v = &u[1]; - t = &u[2]; - } - #endif /* WOLFSSL_SMALL_STACK */ + sp_int* v = NULL; + sp_int* t = NULL; + int used = (a->used >= b->used) ? a->used : b->used; + DECL_SP_INT_ARRAY(d, used, 3); + ALLOC_SP_INT_ARRAY(d, used, 3, err, NULL); if (err == MP_OKAY) { - sp_init(u); - sp_init(v); - sp_init(t); + u = d[0]; + v = d[1]; + t = d[2]; + sp_init_size(u, used); + sp_init_size(v, used); + sp_init_size(t, used); if (_sp_cmp(a, b) != MP_LT) { sp_copy(b, u); @@ -13828,11 +13311,7 @@ int sp_gcd(sp_int* a, sp_int* b, sp_int* r) err = sp_copy(u, r); } - #ifdef WOLFSSL_SMALL_STACK - if (u != NULL) { - XFREE(u, NULL, DYNAMIC_TYPE_BIGINT); - } - #endif /* WOLFSSL_SMALL_STACK */ + FREE_SP_INT_ARRAY(d, NULL); } return err; @@ -13855,56 +13334,42 @@ int sp_gcd(sp_int* a, sp_int* b, sp_int* r) */ int sp_lcm(sp_int* a, sp_int* b, sp_int* r) { - int err = MP_OKAY; -#ifndef WOLFSSL_SMALL_STACK - sp_int t[2]; -#else - sp_int* t = NULL; -#endif /* WOLFSSL_SMALL_STACK */ + int err = MP_OKAY; + int used = ((a == NULL) || (b == NULL)) ? 1 : + (a->used >= b->used ? a->used : b->used); + DECL_SP_INT_ARRAY(t, used, 2); if ((a == NULL) || (b == NULL) || (r == NULL)) { err = MP_VAL; } - if ((err == MP_OKAY) && (mp_iszero(a) || mp_iszero(b))) { err = MP_VAL; } - #ifdef WOLFSSL_SMALL_STACK - if (err == MP_OKAY) { - t = (sp_int*)XMALLOC(sizeof(sp_int) * 2, NULL, DYNAMIC_TYPE_BIGINT); - if (t == NULL) { - err = MP_MEM; - } - } - #endif /* WOLFSSL_SMALL_STACK */ + + ALLOC_SP_INT_ARRAY(t, used, 2, err, NULL); if (err == MP_OKAY) { - sp_init(&t[0]); - sp_init(&t[1]); + sp_init_size(t[0], used); + sp_init_size(t[1], used); - err = sp_gcd(a, b, &t[0]); + err = sp_gcd(a, b, t[0]); if (err == MP_OKAY) { if (_sp_cmp_abs(a, b) == MP_GT) { - err = sp_div(a, &t[0], &t[1], NULL); + err = sp_div(a, t[0], t[1], NULL); if (err == MP_OKAY) { - err = sp_mul(b, &t[1], r); + err = sp_mul(b, t[1], r); } } else { - err = sp_div(b, &t[0], &t[1], NULL); + err = sp_div(b, t[0], t[1], NULL); if (err == MP_OKAY) { - err = sp_mul(a, &t[1], r); + err = sp_mul(a, t[1], r); } } } } -#ifdef WOLFSSL_SMALL_STACK - if (t != NULL) { - XFREE(t, NULL, DYNAMIC_TYPE_BIGINT); - } -#endif /* WOLFSSL_SMALL_STACK */ - + FREE_SP_INT_ARRAY(t, NULL); return err; } diff --git a/wolfssl/wolfcrypt/sp_int.h b/wolfssl/wolfcrypt/sp_int.h index abf5a45c6..562603e81 100644 --- a/wolfssl/wolfcrypt/sp_int.h +++ b/wolfssl/wolfcrypt/sp_int.h @@ -629,23 +629,12 @@ typedef struct sp_ecc_ctx { #define CheckFastMathSettings() (SP_WORD_SIZE == CheckRunTimeFastMath()) -#ifdef WOLFSSL_SP_INT_NEGATIVE - #ifdef HAVE_WOLF_BIGINT - #define SP_INT_EXTRA_OVERHEAD sizeof(int) + sizeof(struct WC_BIGINT) - #else - #define SP_INT_EXTRA_OVERHEAD sizeof(int) - #endif -#elif defined(HAVE_WOLF_BIGINT) - #define SP_INT_EXTRA_OVERHEAD sizeof(struct WC_BIGINT) -#else - #define SP_INT_EXTRA_OVERHEAD 0 -#endif -#define WOLFSSL_SP_INT_OVERHEAD \ - (sizeof(int) + sizeof(int) + SP_INT_EXTRA_OVERHEAD) - +/* The number of bytes to a sp_int with 'cnt' digits. */ #define MP_INT_SIZEOF(cnt) \ - (WOLFSSL_SP_INT_OVERHEAD + ((cnt) * SP_WORD_SIZEOF)) - + (sizeof(sp_int) - (SP_INT_DIGITS - (cnt)) * sizeof(sp_int_digit)) +/* The address of the next sp_int after one with 'cnt' digits. */ +#define MP_INT_NEXT(t, cnt) \ + (sp_int*)(((byte*)(t)) + MP_INT_SIZEOF(cnt)) /** * A reuslt of NO.