diff --git a/wolfcrypt/src/sp_arm32.c b/wolfcrypt/src/sp_arm32.c
index c00e03b30..d4d00e452 100644
--- a/wolfcrypt/src/sp_arm32.c
+++ b/wolfcrypt/src/sp_arm32.c
@@ -36326,15 +36326,16 @@ static int sp_256_mont_inv_order_8_nb(sp_ecc_ctx_t* sp_ctx, sp_digit* r, const s
         break;
     case 1:
         sp_256_mont_sqr_order_8(t, t);
+        ctx->state = 2;
+        break;
+    case 2:
         if ((p256_order_minus_2[ctx->i / 32] & ((sp_int_digit)1 << (ctx->i % 32))) != 0) {
             sp_256_mont_mul_order_8(t, t, a);
         }
         ctx->i--;
-        if (ctx->i == 0) {
-            ctx->state = 2;
-        }
+        ctx->state = (ctx->i == 0) ? 3 : 1;
         break;
-    case 2:
+    case 3:
         XMEMCPY(r, t, sizeof(sp_digit) * 8U);
         err = MP_OKAY;
         break;
@@ -36826,55 +36827,58 @@ int sp_ecc_verify_256_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen,
         sp_256_from_mp(ctx->p2.x, 8, pX);
         sp_256_from_mp(ctx->p2.y, 8, pY);
         sp_256_from_mp(ctx->p2.z, 8, pZ);
+        ctx->state = 1;
+        break;
+    case 1: /* NORMS0 */
         sp_256_mul_8(ctx->s, ctx->s, p256_norm_order);
         err = sp_256_mod_8(ctx->s, ctx->s, p256_order);
         if (err == MP_OKAY)
-            ctx->state = 1;
+            ctx->state = 2;
         break;
-    case 1: /* NORMS1 */
+    case 2: /* NORMS1 */
         sp_256_norm_8(ctx->s);
         XMEMSET(&ctx->mont_inv_order_ctx, 0, sizeof(ctx->mont_inv_order_ctx));
-        ctx->state = 2;
+        ctx->state = 3;
         break;
-    case 2: /* NORMS2 */
+    case 3: /* NORMS2 */
         err = sp_256_mont_inv_order_8_nb((sp_ecc_ctx_t*)&ctx->mont_inv_order_ctx, ctx->s, ctx->s, ctx->tmp);
         if (err == MP_OKAY) {
-            ctx->state = 3;
+            ctx->state = 4;
         }
         break;
-    case 3: /* NORMS3 */
+    case 4: /* NORMS3 */
         sp_256_mont_mul_order_8(ctx->u1, ctx->u1, ctx->s);
         ctx->state = 4;
         break;
-    case 4: /* NORMS4 */
+    case 5: /* NORMS4 */
         sp_256_mont_mul_order_8(ctx->u2, ctx->u2, ctx->s);
         XMEMSET(&ctx->mulmod_ctx, 0, sizeof(ctx->mulmod_ctx));
-        ctx->state = 5;
+        ctx->state = 6;
         break;
-    case 5: /* MULBASE */
+    case 6: /* MULBASE */
         err = sp_256_ecc_mulmod_8_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, &ctx->p1, &p256_base, ctx->u1, 0, 0, heap);
         if (err == MP_OKAY) {
             XMEMSET(&ctx->mulmod_ctx, 0, sizeof(ctx->mulmod_ctx));
-            ctx->state = 6;
-        }
-        break;
-    case 6: /* MULMOD */
-        err = sp_256_ecc_mulmod_8_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, &ctx->p2, &ctx->p2, ctx->u2, 0, 0, heap);
-        if (err == MP_OKAY) {
-            XMEMSET(&ctx->add_ctx, 0, sizeof(ctx->add_ctx));
             ctx->state = 7;
         }
         break;
-    case 7: /* ADD */
+    case 7: /* MULMOD */
+        err = sp_256_ecc_mulmod_8_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, &ctx->p2, &ctx->p2, ctx->u2, 0, 0, heap);
+        if (err == MP_OKAY) {
+            XMEMSET(&ctx->add_ctx, 0, sizeof(ctx->add_ctx));
+            ctx->state = 8;
+        }
+        break;
+    case 8: /* ADD */
         err = sp_256_proj_point_add_8_nb((sp_ecc_ctx_t*)&ctx->add_ctx, &ctx->p1, &ctx->p1, &ctx->p2, ctx->tmp);
         if (err == MP_OKAY)
-            ctx->state = 8;
+            ctx->state = 9;
         break;
-    case 8: /* DBLPREP */
+    case 9: /* DBLPREP */
         if (sp_256_iszero_8(ctx->p1.z)) {
             if (sp_256_iszero_8(ctx->p1.x) && sp_256_iszero_8(ctx->p1.y)) {
                 XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx));
-                ctx->state = 9;
+                ctx->state = 10;
                 break;
             }
             else {
@@ -36886,33 +36890,33 @@ int sp_ecc_verify_256_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen,
                 XMEMCPY(ctx->p1.z, p256_norm_mod, sizeof(p256_norm_mod));
             }
         }
-        ctx->state = 10;
+        ctx->state = 11;
         break;
-    case 9: /* DBL */
+    case 10: /* DBL */
         err = sp_256_proj_point_dbl_8_nb((sp_ecc_ctx_t*)&ctx->dbl_ctx, &ctx->p1, 
             &ctx->p2, ctx->tmp);
         if (err == MP_OKAY) {
-            ctx->state = 10;
+            ctx->state = 11;
         }
         break;
-    case 10: /* MONT */
+    case 11: /* MONT */
         /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */
         /* Reload r and convert to Montgomery form. */
         sp_256_from_mp(ctx->u2, 8, r);
         err = sp_256_mod_mul_norm_8(ctx->u2, ctx->u2, p256_mod);
         if (err == MP_OKAY)
-            ctx->state = 11;
+            ctx->state = 12;
         break;
-    case 11: /* SQR */
+    case 12: /* SQR */
         /* u1 = r.z'.z' mod prime */
         sp_256_mont_sqr_8(ctx->p1.z, ctx->p1.z, p256_mod, p256_mp_mod);
-        ctx->state = 12;
-        break;
-    case 12: /* MUL */
-        sp_256_mont_mul_8(ctx->u1, ctx->u2, ctx->p1.z, p256_mod, p256_mp_mod);
         ctx->state = 13;
         break;
-    case 13: /* RES */
+    case 13: /* MUL */
+        sp_256_mont_mul_8(ctx->u1, ctx->u2, ctx->p1.z, p256_mod, p256_mp_mod);
+        ctx->state = 14;
+        break;
+    case 14: /* RES */
         err = MP_OKAY; /* math okay, now check result */
         *res = (int)(sp_256_cmp_8(ctx->p1.x, ctx->u1) == 0);
         if (*res == 0) {
@@ -36943,7 +36947,7 @@ int sp_ecc_verify_256_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen,
         break;
     }
 
-    if (err == MP_OKAY && ctx->state != 13) {
+    if (err == MP_OKAY && ctx->state != 14) {
         err = FP_WOULDBLOCK;
     }
 
@@ -45092,15 +45096,16 @@ static int sp_384_mont_inv_order_12_nb(sp_ecc_ctx_t* sp_ctx, sp_digit* r, const
         break;
     case 1:
         sp_384_mont_sqr_order_12(t, t);
+        ctx->state = 2;
+        break;
+    case 2:
         if ((p384_order_minus_2[ctx->i / 32] & ((sp_int_digit)1 << (ctx->i % 32))) != 0) {
             sp_384_mont_mul_order_12(t, t, a);
         }
         ctx->i--;
-        if (ctx->i == 0) {
-            ctx->state = 2;
-        }
+        ctx->state = (ctx->i == 0) ? 3 : 1;
         break;
-    case 2:
+    case 3:
         XMEMCPY(r, t, sizeof(sp_digit) * 12U);
         err = MP_OKAY;
         break;
@@ -45563,55 +45568,58 @@ int sp_ecc_verify_384_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen,
         sp_384_from_mp(ctx->p2.x, 12, pX);
         sp_384_from_mp(ctx->p2.y, 12, pY);
         sp_384_from_mp(ctx->p2.z, 12, pZ);
+        ctx->state = 1;
+        break;
+    case 1: /* NORMS0 */
         sp_384_mul_12(ctx->s, ctx->s, p384_norm_order);
         err = sp_384_mod_12(ctx->s, ctx->s, p384_order);
         if (err == MP_OKAY)
-            ctx->state = 1;
+            ctx->state = 2;
         break;
-    case 1: /* NORMS1 */
+    case 2: /* NORMS1 */
         sp_384_norm_12(ctx->s);
         XMEMSET(&ctx->mont_inv_order_ctx, 0, sizeof(ctx->mont_inv_order_ctx));
-        ctx->state = 2;
+        ctx->state = 3;
         break;
-    case 2: /* NORMS2 */
+    case 3: /* NORMS2 */
         err = sp_384_mont_inv_order_12_nb((sp_ecc_ctx_t*)&ctx->mont_inv_order_ctx, ctx->s, ctx->s, ctx->tmp);
         if (err == MP_OKAY) {
-            ctx->state = 3;
+            ctx->state = 4;
         }
         break;
-    case 3: /* NORMS3 */
+    case 4: /* NORMS3 */
         sp_384_mont_mul_order_12(ctx->u1, ctx->u1, ctx->s);
         ctx->state = 4;
         break;
-    case 4: /* NORMS4 */
+    case 5: /* NORMS4 */
         sp_384_mont_mul_order_12(ctx->u2, ctx->u2, ctx->s);
         XMEMSET(&ctx->mulmod_ctx, 0, sizeof(ctx->mulmod_ctx));
-        ctx->state = 5;
+        ctx->state = 6;
         break;
-    case 5: /* MULBASE */
+    case 6: /* MULBASE */
         err = sp_384_ecc_mulmod_12_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, &ctx->p1, &p384_base, ctx->u1, 0, 0, heap);
         if (err == MP_OKAY) {
             XMEMSET(&ctx->mulmod_ctx, 0, sizeof(ctx->mulmod_ctx));
-            ctx->state = 6;
-        }
-        break;
-    case 6: /* MULMOD */
-        err = sp_384_ecc_mulmod_12_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, &ctx->p2, &ctx->p2, ctx->u2, 0, 0, heap);
-        if (err == MP_OKAY) {
-            XMEMSET(&ctx->add_ctx, 0, sizeof(ctx->add_ctx));
             ctx->state = 7;
         }
         break;
-    case 7: /* ADD */
+    case 7: /* MULMOD */
+        err = sp_384_ecc_mulmod_12_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, &ctx->p2, &ctx->p2, ctx->u2, 0, 0, heap);
+        if (err == MP_OKAY) {
+            XMEMSET(&ctx->add_ctx, 0, sizeof(ctx->add_ctx));
+            ctx->state = 8;
+        }
+        break;
+    case 8: /* ADD */
         err = sp_384_proj_point_add_12_nb((sp_ecc_ctx_t*)&ctx->add_ctx, &ctx->p1, &ctx->p1, &ctx->p2, ctx->tmp);
         if (err == MP_OKAY)
-            ctx->state = 8;
+            ctx->state = 9;
         break;
-    case 8: /* DBLPREP */
+    case 9: /* DBLPREP */
         if (sp_384_iszero_12(ctx->p1.z)) {
             if (sp_384_iszero_12(ctx->p1.x) && sp_384_iszero_12(ctx->p1.y)) {
                 XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx));
-                ctx->state = 9;
+                ctx->state = 10;
                 break;
             }
             else {
@@ -45623,33 +45631,33 @@ int sp_ecc_verify_384_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen,
                 XMEMCPY(ctx->p1.z, p384_norm_mod, sizeof(p384_norm_mod));
             }
         }
-        ctx->state = 10;
+        ctx->state = 11;
         break;
-    case 9: /* DBL */
+    case 10: /* DBL */
         err = sp_384_proj_point_dbl_12_nb((sp_ecc_ctx_t*)&ctx->dbl_ctx, &ctx->p1, 
             &ctx->p2, ctx->tmp);
         if (err == MP_OKAY) {
-            ctx->state = 10;
+            ctx->state = 11;
         }
         break;
-    case 10: /* MONT */
+    case 11: /* MONT */
         /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */
         /* Reload r and convert to Montgomery form. */
         sp_384_from_mp(ctx->u2, 12, r);
         err = sp_384_mod_mul_norm_12(ctx->u2, ctx->u2, p384_mod);
         if (err == MP_OKAY)
-            ctx->state = 11;
+            ctx->state = 12;
         break;
-    case 11: /* SQR */
+    case 12: /* SQR */
         /* u1 = r.z'.z' mod prime */
         sp_384_mont_sqr_12(ctx->p1.z, ctx->p1.z, p384_mod, p384_mp_mod);
-        ctx->state = 12;
-        break;
-    case 12: /* MUL */
-        sp_384_mont_mul_12(ctx->u1, ctx->u2, ctx->p1.z, p384_mod, p384_mp_mod);
         ctx->state = 13;
         break;
-    case 13: /* RES */
+    case 13: /* MUL */
+        sp_384_mont_mul_12(ctx->u1, ctx->u2, ctx->p1.z, p384_mod, p384_mp_mod);
+        ctx->state = 14;
+        break;
+    case 14: /* RES */
         err = MP_OKAY; /* math okay, now check result */
         *res = (int)(sp_384_cmp_12(ctx->p1.x, ctx->u1) == 0);
         if (*res == 0) {
@@ -45680,7 +45688,7 @@ int sp_ecc_verify_384_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen,
         break;
     }
 
-    if (err == MP_OKAY && ctx->state != 13) {
+    if (err == MP_OKAY && ctx->state != 14) {
         err = FP_WOULDBLOCK;
     }
 
diff --git a/wolfcrypt/src/sp_arm64.c b/wolfcrypt/src/sp_arm64.c
index 18e0c1fe1..70119e911 100644
--- a/wolfcrypt/src/sp_arm64.c
+++ b/wolfcrypt/src/sp_arm64.c
@@ -36730,15 +36730,16 @@ static int sp_256_mont_inv_order_4_nb(sp_ecc_ctx_t* sp_ctx, sp_digit* r, const s
         break;
     case 1:
         sp_256_mont_sqr_order_4(t, t);
+        ctx->state = 2;
+        break;
+    case 2:
         if ((p256_order_minus_2[ctx->i / 64] & ((sp_int_digit)1 << (ctx->i % 64))) != 0) {
             sp_256_mont_mul_order_4(t, t, a);
         }
         ctx->i--;
-        if (ctx->i == 0) {
-            ctx->state = 2;
-        }
+        ctx->state = (ctx->i == 0) ? 3 : 1;
         break;
-    case 2:
+    case 3:
         XMEMCPY(r, t, sizeof(sp_digit) * 4U);
         err = MP_OKAY;
         break;
@@ -37230,55 +37231,58 @@ int sp_ecc_verify_256_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen,
         sp_256_from_mp(ctx->p2.x, 4, pX);
         sp_256_from_mp(ctx->p2.y, 4, pY);
         sp_256_from_mp(ctx->p2.z, 4, pZ);
+        ctx->state = 1;
+        break;
+    case 1: /* NORMS0 */
         sp_256_mul_4(ctx->s, ctx->s, p256_norm_order);
         err = sp_256_mod_4(ctx->s, ctx->s, p256_order);
         if (err == MP_OKAY)
-            ctx->state = 1;
+            ctx->state = 2;
         break;
-    case 1: /* NORMS1 */
+    case 2: /* NORMS1 */
         sp_256_norm_4(ctx->s);
         XMEMSET(&ctx->mont_inv_order_ctx, 0, sizeof(ctx->mont_inv_order_ctx));
-        ctx->state = 2;
+        ctx->state = 3;
         break;
-    case 2: /* NORMS2 */
+    case 3: /* NORMS2 */
         err = sp_256_mont_inv_order_4_nb((sp_ecc_ctx_t*)&ctx->mont_inv_order_ctx, ctx->s, ctx->s, ctx->tmp);
         if (err == MP_OKAY) {
-            ctx->state = 3;
+            ctx->state = 4;
         }
         break;
-    case 3: /* NORMS3 */
+    case 4: /* NORMS3 */
         sp_256_mont_mul_order_4(ctx->u1, ctx->u1, ctx->s);
         ctx->state = 4;
         break;
-    case 4: /* NORMS4 */
+    case 5: /* NORMS4 */
         sp_256_mont_mul_order_4(ctx->u2, ctx->u2, ctx->s);
         XMEMSET(&ctx->mulmod_ctx, 0, sizeof(ctx->mulmod_ctx));
-        ctx->state = 5;
+        ctx->state = 6;
         break;
-    case 5: /* MULBASE */
+    case 6: /* MULBASE */
         err = sp_256_ecc_mulmod_4_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, &ctx->p1, &p256_base, ctx->u1, 0, 0, heap);
         if (err == MP_OKAY) {
             XMEMSET(&ctx->mulmod_ctx, 0, sizeof(ctx->mulmod_ctx));
-            ctx->state = 6;
-        }
-        break;
-    case 6: /* MULMOD */
-        err = sp_256_ecc_mulmod_4_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, &ctx->p2, &ctx->p2, ctx->u2, 0, 0, heap);
-        if (err == MP_OKAY) {
-            XMEMSET(&ctx->add_ctx, 0, sizeof(ctx->add_ctx));
             ctx->state = 7;
         }
         break;
-    case 7: /* ADD */
+    case 7: /* MULMOD */
+        err = sp_256_ecc_mulmod_4_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, &ctx->p2, &ctx->p2, ctx->u2, 0, 0, heap);
+        if (err == MP_OKAY) {
+            XMEMSET(&ctx->add_ctx, 0, sizeof(ctx->add_ctx));
+            ctx->state = 8;
+        }
+        break;
+    case 8: /* ADD */
         err = sp_256_proj_point_add_4_nb((sp_ecc_ctx_t*)&ctx->add_ctx, &ctx->p1, &ctx->p1, &ctx->p2, ctx->tmp);
         if (err == MP_OKAY)
-            ctx->state = 8;
+            ctx->state = 9;
         break;
-    case 8: /* DBLPREP */
+    case 9: /* DBLPREP */
         if (sp_256_iszero_4(ctx->p1.z)) {
             if (sp_256_iszero_4(ctx->p1.x) && sp_256_iszero_4(ctx->p1.y)) {
                 XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx));
-                ctx->state = 9;
+                ctx->state = 10;
                 break;
             }
             else {
@@ -37290,33 +37294,33 @@ int sp_ecc_verify_256_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen,
                 XMEMCPY(ctx->p1.z, p256_norm_mod, sizeof(p256_norm_mod));
             }
         }
-        ctx->state = 10;
+        ctx->state = 11;
         break;
-    case 9: /* DBL */
+    case 10: /* DBL */
         err = sp_256_proj_point_dbl_4_nb((sp_ecc_ctx_t*)&ctx->dbl_ctx, &ctx->p1, 
             &ctx->p2, ctx->tmp);
         if (err == MP_OKAY) {
-            ctx->state = 10;
+            ctx->state = 11;
         }
         break;
-    case 10: /* MONT */
+    case 11: /* MONT */
         /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */
         /* Reload r and convert to Montgomery form. */
         sp_256_from_mp(ctx->u2, 4, r);
         err = sp_256_mod_mul_norm_4(ctx->u2, ctx->u2, p256_mod);
         if (err == MP_OKAY)
-            ctx->state = 11;
+            ctx->state = 12;
         break;
-    case 11: /* SQR */
+    case 12: /* SQR */
         /* u1 = r.z'.z' mod prime */
         sp_256_mont_sqr_4(ctx->p1.z, ctx->p1.z, p256_mod, p256_mp_mod);
-        ctx->state = 12;
-        break;
-    case 12: /* MUL */
-        sp_256_mont_mul_4(ctx->u1, ctx->u2, ctx->p1.z, p256_mod, p256_mp_mod);
         ctx->state = 13;
         break;
-    case 13: /* RES */
+    case 13: /* MUL */
+        sp_256_mont_mul_4(ctx->u1, ctx->u2, ctx->p1.z, p256_mod, p256_mp_mod);
+        ctx->state = 14;
+        break;
+    case 14: /* RES */
         err = MP_OKAY; /* math okay, now check result */
         *res = (int)(sp_256_cmp_4(ctx->p1.x, ctx->u1) == 0);
         if (*res == 0) {
@@ -37347,7 +37351,7 @@ int sp_ecc_verify_256_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen,
         break;
     }
 
-    if (err == MP_OKAY && ctx->state != 13) {
+    if (err == MP_OKAY && ctx->state != 14) {
         err = FP_WOULDBLOCK;
     }
 
@@ -43200,15 +43204,16 @@ static int sp_384_mont_inv_order_6_nb(sp_ecc_ctx_t* sp_ctx, sp_digit* r, const s
         break;
     case 1:
         sp_384_mont_sqr_order_6(t, t);
+        ctx->state = 2;
+        break;
+    case 2:
         if ((p384_order_minus_2[ctx->i / 64] & ((sp_int_digit)1 << (ctx->i % 64))) != 0) {
             sp_384_mont_mul_order_6(t, t, a);
         }
         ctx->i--;
-        if (ctx->i == 0) {
-            ctx->state = 2;
-        }
+        ctx->state = (ctx->i == 0) ? 3 : 1;
         break;
-    case 2:
+    case 3:
         XMEMCPY(r, t, sizeof(sp_digit) * 6U);
         err = MP_OKAY;
         break;
@@ -43671,55 +43676,58 @@ int sp_ecc_verify_384_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen,
         sp_384_from_mp(ctx->p2.x, 6, pX);
         sp_384_from_mp(ctx->p2.y, 6, pY);
         sp_384_from_mp(ctx->p2.z, 6, pZ);
+        ctx->state = 1;
+        break;
+    case 1: /* NORMS0 */
         sp_384_mul_6(ctx->s, ctx->s, p384_norm_order);
         err = sp_384_mod_6(ctx->s, ctx->s, p384_order);
         if (err == MP_OKAY)
-            ctx->state = 1;
+            ctx->state = 2;
         break;
-    case 1: /* NORMS1 */
+    case 2: /* NORMS1 */
         sp_384_norm_6(ctx->s);
         XMEMSET(&ctx->mont_inv_order_ctx, 0, sizeof(ctx->mont_inv_order_ctx));
-        ctx->state = 2;
+        ctx->state = 3;
         break;
-    case 2: /* NORMS2 */
+    case 3: /* NORMS2 */
         err = sp_384_mont_inv_order_6_nb((sp_ecc_ctx_t*)&ctx->mont_inv_order_ctx, ctx->s, ctx->s, ctx->tmp);
         if (err == MP_OKAY) {
-            ctx->state = 3;
+            ctx->state = 4;
         }
         break;
-    case 3: /* NORMS3 */
+    case 4: /* NORMS3 */
         sp_384_mont_mul_order_6(ctx->u1, ctx->u1, ctx->s);
         ctx->state = 4;
         break;
-    case 4: /* NORMS4 */
+    case 5: /* NORMS4 */
         sp_384_mont_mul_order_6(ctx->u2, ctx->u2, ctx->s);
         XMEMSET(&ctx->mulmod_ctx, 0, sizeof(ctx->mulmod_ctx));
-        ctx->state = 5;
+        ctx->state = 6;
         break;
-    case 5: /* MULBASE */
+    case 6: /* MULBASE */
         err = sp_384_ecc_mulmod_6_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, &ctx->p1, &p384_base, ctx->u1, 0, 0, heap);
         if (err == MP_OKAY) {
             XMEMSET(&ctx->mulmod_ctx, 0, sizeof(ctx->mulmod_ctx));
-            ctx->state = 6;
-        }
-        break;
-    case 6: /* MULMOD */
-        err = sp_384_ecc_mulmod_6_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, &ctx->p2, &ctx->p2, ctx->u2, 0, 0, heap);
-        if (err == MP_OKAY) {
-            XMEMSET(&ctx->add_ctx, 0, sizeof(ctx->add_ctx));
             ctx->state = 7;
         }
         break;
-    case 7: /* ADD */
+    case 7: /* MULMOD */
+        err = sp_384_ecc_mulmod_6_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, &ctx->p2, &ctx->p2, ctx->u2, 0, 0, heap);
+        if (err == MP_OKAY) {
+            XMEMSET(&ctx->add_ctx, 0, sizeof(ctx->add_ctx));
+            ctx->state = 8;
+        }
+        break;
+    case 8: /* ADD */
         err = sp_384_proj_point_add_6_nb((sp_ecc_ctx_t*)&ctx->add_ctx, &ctx->p1, &ctx->p1, &ctx->p2, ctx->tmp);
         if (err == MP_OKAY)
-            ctx->state = 8;
+            ctx->state = 9;
         break;
-    case 8: /* DBLPREP */
+    case 9: /* DBLPREP */
         if (sp_384_iszero_6(ctx->p1.z)) {
             if (sp_384_iszero_6(ctx->p1.x) && sp_384_iszero_6(ctx->p1.y)) {
                 XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx));
-                ctx->state = 9;
+                ctx->state = 10;
                 break;
             }
             else {
@@ -43731,33 +43739,33 @@ int sp_ecc_verify_384_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen,
                 XMEMCPY(ctx->p1.z, p384_norm_mod, sizeof(p384_norm_mod));
             }
         }
-        ctx->state = 10;
+        ctx->state = 11;
         break;
-    case 9: /* DBL */
+    case 10: /* DBL */
         err = sp_384_proj_point_dbl_6_nb((sp_ecc_ctx_t*)&ctx->dbl_ctx, &ctx->p1, 
             &ctx->p2, ctx->tmp);
         if (err == MP_OKAY) {
-            ctx->state = 10;
+            ctx->state = 11;
         }
         break;
-    case 10: /* MONT */
+    case 11: /* MONT */
         /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */
         /* Reload r and convert to Montgomery form. */
         sp_384_from_mp(ctx->u2, 6, r);
         err = sp_384_mod_mul_norm_6(ctx->u2, ctx->u2, p384_mod);
         if (err == MP_OKAY)
-            ctx->state = 11;
+            ctx->state = 12;
         break;
-    case 11: /* SQR */
+    case 12: /* SQR */
         /* u1 = r.z'.z' mod prime */
         sp_384_mont_sqr_6(ctx->p1.z, ctx->p1.z, p384_mod, p384_mp_mod);
-        ctx->state = 12;
-        break;
-    case 12: /* MUL */
-        sp_384_mont_mul_6(ctx->u1, ctx->u2, ctx->p1.z, p384_mod, p384_mp_mod);
         ctx->state = 13;
         break;
-    case 13: /* RES */
+    case 13: /* MUL */
+        sp_384_mont_mul_6(ctx->u1, ctx->u2, ctx->p1.z, p384_mod, p384_mp_mod);
+        ctx->state = 14;
+        break;
+    case 14: /* RES */
         err = MP_OKAY; /* math okay, now check result */
         *res = (int)(sp_384_cmp_6(ctx->p1.x, ctx->u1) == 0);
         if (*res == 0) {
@@ -43788,7 +43796,7 @@ int sp_ecc_verify_384_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen,
         break;
     }
 
-    if (err == MP_OKAY && ctx->state != 13) {
+    if (err == MP_OKAY && ctx->state != 14) {
         err = FP_WOULDBLOCK;
     }
 
diff --git a/wolfcrypt/src/sp_armthumb.c b/wolfcrypt/src/sp_armthumb.c
index 3b2c1844c..348e7f20d 100644
--- a/wolfcrypt/src/sp_armthumb.c
+++ b/wolfcrypt/src/sp_armthumb.c
@@ -21099,15 +21099,16 @@ static int sp_256_mont_inv_order_8_nb(sp_ecc_ctx_t* sp_ctx, sp_digit* r, const s
         break;
     case 1:
         sp_256_mont_sqr_order_8(t, t);
+        ctx->state = 2;
+        break;
+    case 2:
         if ((p256_order_minus_2[ctx->i / 32] & ((sp_int_digit)1 << (ctx->i % 32))) != 0) {
             sp_256_mont_mul_order_8(t, t, a);
         }
         ctx->i--;
-        if (ctx->i == 0) {
-            ctx->state = 2;
-        }
+        ctx->state = (ctx->i == 0) ? 3 : 1;
         break;
-    case 2:
+    case 3:
         XMEMCPY(r, t, sizeof(sp_digit) * 8U);
         err = MP_OKAY;
         break;
@@ -21599,55 +21600,58 @@ int sp_ecc_verify_256_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen,
         sp_256_from_mp(ctx->p2.x, 8, pX);
         sp_256_from_mp(ctx->p2.y, 8, pY);
         sp_256_from_mp(ctx->p2.z, 8, pZ);
+        ctx->state = 1;
+        break;
+    case 1: /* NORMS0 */
         sp_256_mul_8(ctx->s, ctx->s, p256_norm_order);
         err = sp_256_mod_8(ctx->s, ctx->s, p256_order);
         if (err == MP_OKAY)
-            ctx->state = 1;
+            ctx->state = 2;
         break;
-    case 1: /* NORMS1 */
+    case 2: /* NORMS1 */
         sp_256_norm_8(ctx->s);
         XMEMSET(&ctx->mont_inv_order_ctx, 0, sizeof(ctx->mont_inv_order_ctx));
-        ctx->state = 2;
+        ctx->state = 3;
         break;
-    case 2: /* NORMS2 */
+    case 3: /* NORMS2 */
         err = sp_256_mont_inv_order_8_nb((sp_ecc_ctx_t*)&ctx->mont_inv_order_ctx, ctx->s, ctx->s, ctx->tmp);
         if (err == MP_OKAY) {
-            ctx->state = 3;
+            ctx->state = 4;
         }
         break;
-    case 3: /* NORMS3 */
+    case 4: /* NORMS3 */
         sp_256_mont_mul_order_8(ctx->u1, ctx->u1, ctx->s);
         ctx->state = 4;
         break;
-    case 4: /* NORMS4 */
+    case 5: /* NORMS4 */
         sp_256_mont_mul_order_8(ctx->u2, ctx->u2, ctx->s);
         XMEMSET(&ctx->mulmod_ctx, 0, sizeof(ctx->mulmod_ctx));
-        ctx->state = 5;
+        ctx->state = 6;
         break;
-    case 5: /* MULBASE */
+    case 6: /* MULBASE */
         err = sp_256_ecc_mulmod_8_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, &ctx->p1, &p256_base, ctx->u1, 0, 0, heap);
         if (err == MP_OKAY) {
             XMEMSET(&ctx->mulmod_ctx, 0, sizeof(ctx->mulmod_ctx));
-            ctx->state = 6;
-        }
-        break;
-    case 6: /* MULMOD */
-        err = sp_256_ecc_mulmod_8_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, &ctx->p2, &ctx->p2, ctx->u2, 0, 0, heap);
-        if (err == MP_OKAY) {
-            XMEMSET(&ctx->add_ctx, 0, sizeof(ctx->add_ctx));
             ctx->state = 7;
         }
         break;
-    case 7: /* ADD */
+    case 7: /* MULMOD */
+        err = sp_256_ecc_mulmod_8_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, &ctx->p2, &ctx->p2, ctx->u2, 0, 0, heap);
+        if (err == MP_OKAY) {
+            XMEMSET(&ctx->add_ctx, 0, sizeof(ctx->add_ctx));
+            ctx->state = 8;
+        }
+        break;
+    case 8: /* ADD */
         err = sp_256_proj_point_add_8_nb((sp_ecc_ctx_t*)&ctx->add_ctx, &ctx->p1, &ctx->p1, &ctx->p2, ctx->tmp);
         if (err == MP_OKAY)
-            ctx->state = 8;
+            ctx->state = 9;
         break;
-    case 8: /* DBLPREP */
+    case 9: /* DBLPREP */
         if (sp_256_iszero_8(ctx->p1.z)) {
             if (sp_256_iszero_8(ctx->p1.x) && sp_256_iszero_8(ctx->p1.y)) {
                 XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx));
-                ctx->state = 9;
+                ctx->state = 10;
                 break;
             }
             else {
@@ -21659,33 +21663,33 @@ int sp_ecc_verify_256_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen,
                 XMEMCPY(ctx->p1.z, p256_norm_mod, sizeof(p256_norm_mod));
             }
         }
-        ctx->state = 10;
+        ctx->state = 11;
         break;
-    case 9: /* DBL */
+    case 10: /* DBL */
         err = sp_256_proj_point_dbl_8_nb((sp_ecc_ctx_t*)&ctx->dbl_ctx, &ctx->p1, 
             &ctx->p2, ctx->tmp);
         if (err == MP_OKAY) {
-            ctx->state = 10;
+            ctx->state = 11;
         }
         break;
-    case 10: /* MONT */
+    case 11: /* MONT */
         /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */
         /* Reload r and convert to Montgomery form. */
         sp_256_from_mp(ctx->u2, 8, r);
         err = sp_256_mod_mul_norm_8(ctx->u2, ctx->u2, p256_mod);
         if (err == MP_OKAY)
-            ctx->state = 11;
+            ctx->state = 12;
         break;
-    case 11: /* SQR */
+    case 12: /* SQR */
         /* u1 = r.z'.z' mod prime */
         sp_256_mont_sqr_8(ctx->p1.z, ctx->p1.z, p256_mod, p256_mp_mod);
-        ctx->state = 12;
-        break;
-    case 12: /* MUL */
-        sp_256_mont_mul_8(ctx->u1, ctx->u2, ctx->p1.z, p256_mod, p256_mp_mod);
         ctx->state = 13;
         break;
-    case 13: /* RES */
+    case 13: /* MUL */
+        sp_256_mont_mul_8(ctx->u1, ctx->u2, ctx->p1.z, p256_mod, p256_mp_mod);
+        ctx->state = 14;
+        break;
+    case 14: /* RES */
         err = MP_OKAY; /* math okay, now check result */
         *res = (int)(sp_256_cmp_8(ctx->p1.x, ctx->u1) == 0);
         if (*res == 0) {
@@ -21716,7 +21720,7 @@ int sp_ecc_verify_256_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen,
         break;
     }
 
-    if (err == MP_OKAY && ctx->state != 13) {
+    if (err == MP_OKAY && ctx->state != 14) {
         err = FP_WOULDBLOCK;
     }
 
@@ -27934,15 +27938,16 @@ static int sp_384_mont_inv_order_12_nb(sp_ecc_ctx_t* sp_ctx, sp_digit* r, const
         break;
     case 1:
         sp_384_mont_sqr_order_12(t, t);
+        ctx->state = 2;
+        break;
+    case 2:
         if ((p384_order_minus_2[ctx->i / 32] & ((sp_int_digit)1 << (ctx->i % 32))) != 0) {
             sp_384_mont_mul_order_12(t, t, a);
         }
         ctx->i--;
-        if (ctx->i == 0) {
-            ctx->state = 2;
-        }
+        ctx->state = (ctx->i == 0) ? 3 : 1;
         break;
-    case 2:
+    case 3:
         XMEMCPY(r, t, sizeof(sp_digit) * 12U);
         err = MP_OKAY;
         break;
@@ -28405,55 +28410,58 @@ int sp_ecc_verify_384_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen,
         sp_384_from_mp(ctx->p2.x, 12, pX);
         sp_384_from_mp(ctx->p2.y, 12, pY);
         sp_384_from_mp(ctx->p2.z, 12, pZ);
+        ctx->state = 1;
+        break;
+    case 1: /* NORMS0 */
         sp_384_mul_12(ctx->s, ctx->s, p384_norm_order);
         err = sp_384_mod_12(ctx->s, ctx->s, p384_order);
         if (err == MP_OKAY)
-            ctx->state = 1;
+            ctx->state = 2;
         break;
-    case 1: /* NORMS1 */
+    case 2: /* NORMS1 */
         sp_384_norm_12(ctx->s);
         XMEMSET(&ctx->mont_inv_order_ctx, 0, sizeof(ctx->mont_inv_order_ctx));
-        ctx->state = 2;
+        ctx->state = 3;
         break;
-    case 2: /* NORMS2 */
+    case 3: /* NORMS2 */
         err = sp_384_mont_inv_order_12_nb((sp_ecc_ctx_t*)&ctx->mont_inv_order_ctx, ctx->s, ctx->s, ctx->tmp);
         if (err == MP_OKAY) {
-            ctx->state = 3;
+            ctx->state = 4;
         }
         break;
-    case 3: /* NORMS3 */
+    case 4: /* NORMS3 */
         sp_384_mont_mul_order_12(ctx->u1, ctx->u1, ctx->s);
         ctx->state = 4;
         break;
-    case 4: /* NORMS4 */
+    case 5: /* NORMS4 */
         sp_384_mont_mul_order_12(ctx->u2, ctx->u2, ctx->s);
         XMEMSET(&ctx->mulmod_ctx, 0, sizeof(ctx->mulmod_ctx));
-        ctx->state = 5;
+        ctx->state = 6;
         break;
-    case 5: /* MULBASE */
+    case 6: /* MULBASE */
         err = sp_384_ecc_mulmod_12_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, &ctx->p1, &p384_base, ctx->u1, 0, 0, heap);
         if (err == MP_OKAY) {
             XMEMSET(&ctx->mulmod_ctx, 0, sizeof(ctx->mulmod_ctx));
-            ctx->state = 6;
-        }
-        break;
-    case 6: /* MULMOD */
-        err = sp_384_ecc_mulmod_12_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, &ctx->p2, &ctx->p2, ctx->u2, 0, 0, heap);
-        if (err == MP_OKAY) {
-            XMEMSET(&ctx->add_ctx, 0, sizeof(ctx->add_ctx));
             ctx->state = 7;
         }
         break;
-    case 7: /* ADD */
+    case 7: /* MULMOD */
+        err = sp_384_ecc_mulmod_12_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, &ctx->p2, &ctx->p2, ctx->u2, 0, 0, heap);
+        if (err == MP_OKAY) {
+            XMEMSET(&ctx->add_ctx, 0, sizeof(ctx->add_ctx));
+            ctx->state = 8;
+        }
+        break;
+    case 8: /* ADD */
         err = sp_384_proj_point_add_12_nb((sp_ecc_ctx_t*)&ctx->add_ctx, &ctx->p1, &ctx->p1, &ctx->p2, ctx->tmp);
         if (err == MP_OKAY)
-            ctx->state = 8;
+            ctx->state = 9;
         break;
-    case 8: /* DBLPREP */
+    case 9: /* DBLPREP */
         if (sp_384_iszero_12(ctx->p1.z)) {
             if (sp_384_iszero_12(ctx->p1.x) && sp_384_iszero_12(ctx->p1.y)) {
                 XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx));
-                ctx->state = 9;
+                ctx->state = 10;
                 break;
             }
             else {
@@ -28465,33 +28473,33 @@ int sp_ecc_verify_384_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen,
                 XMEMCPY(ctx->p1.z, p384_norm_mod, sizeof(p384_norm_mod));
             }
         }
-        ctx->state = 10;
+        ctx->state = 11;
         break;
-    case 9: /* DBL */
+    case 10: /* DBL */
         err = sp_384_proj_point_dbl_12_nb((sp_ecc_ctx_t*)&ctx->dbl_ctx, &ctx->p1, 
             &ctx->p2, ctx->tmp);
         if (err == MP_OKAY) {
-            ctx->state = 10;
+            ctx->state = 11;
         }
         break;
-    case 10: /* MONT */
+    case 11: /* MONT */
         /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */
         /* Reload r and convert to Montgomery form. */
         sp_384_from_mp(ctx->u2, 12, r);
         err = sp_384_mod_mul_norm_12(ctx->u2, ctx->u2, p384_mod);
         if (err == MP_OKAY)
-            ctx->state = 11;
+            ctx->state = 12;
         break;
-    case 11: /* SQR */
+    case 12: /* SQR */
         /* u1 = r.z'.z' mod prime */
         sp_384_mont_sqr_12(ctx->p1.z, ctx->p1.z, p384_mod, p384_mp_mod);
-        ctx->state = 12;
-        break;
-    case 12: /* MUL */
-        sp_384_mont_mul_12(ctx->u1, ctx->u2, ctx->p1.z, p384_mod, p384_mp_mod);
         ctx->state = 13;
         break;
-    case 13: /* RES */
+    case 13: /* MUL */
+        sp_384_mont_mul_12(ctx->u1, ctx->u2, ctx->p1.z, p384_mod, p384_mp_mod);
+        ctx->state = 14;
+        break;
+    case 14: /* RES */
         err = MP_OKAY; /* math okay, now check result */
         *res = (int)(sp_384_cmp_12(ctx->p1.x, ctx->u1) == 0);
         if (*res == 0) {
@@ -28522,7 +28530,7 @@ int sp_ecc_verify_384_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen,
         break;
     }
 
-    if (err == MP_OKAY && ctx->state != 13) {
+    if (err == MP_OKAY && ctx->state != 14) {
         err = FP_WOULDBLOCK;
     }
 
diff --git a/wolfcrypt/src/sp_c32.c b/wolfcrypt/src/sp_c32.c
index 6bf859bc1..3aa8c9ccb 100644
--- a/wolfcrypt/src/sp_c32.c
+++ b/wolfcrypt/src/sp_c32.c
@@ -17633,15 +17633,16 @@ static int sp_256_mont_inv_order_10_nb(sp_ecc_ctx_t* sp_ctx, sp_digit* r, const
         break;
     case 1:
         sp_256_mont_sqr_order_10(t, t);
+        ctx->state = 2;
+        break;
+    case 2:
         if ((p256_order_minus_2[ctx->i / 32] & ((sp_int_digit)1 << (ctx->i % 32))) != 0) {
             sp_256_mont_mul_order_10(t, t, a);
         }
         ctx->i--;
-        if (ctx->i == 0) {
-            ctx->state = 2;
-        }
+        ctx->state = (ctx->i == 0) ? 3 : 1;
         break;
-    case 2:
+    case 3:
         XMEMCPY(r, t, sizeof(sp_digit) * 10U);
         err = MP_OKAY;
         break;
@@ -18133,55 +18134,58 @@ int sp_ecc_verify_256_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen,
         sp_256_from_mp(ctx->p2.x, 10, pX);
         sp_256_from_mp(ctx->p2.y, 10, pY);
         sp_256_from_mp(ctx->p2.z, 10, pZ);
+        ctx->state = 1;
+        break;
+    case 1: /* NORMS0 */
         sp_256_mul_10(ctx->s, ctx->s, p256_norm_order);
         err = sp_256_mod_10(ctx->s, ctx->s, p256_order);
         if (err == MP_OKAY)
-            ctx->state = 1;
+            ctx->state = 2;
         break;
-    case 1: /* NORMS1 */
+    case 2: /* NORMS1 */
         sp_256_norm_10(ctx->s);
         XMEMSET(&ctx->mont_inv_order_ctx, 0, sizeof(ctx->mont_inv_order_ctx));
-        ctx->state = 2;
+        ctx->state = 3;
         break;
-    case 2: /* NORMS2 */
+    case 3: /* NORMS2 */
         err = sp_256_mont_inv_order_10_nb((sp_ecc_ctx_t*)&ctx->mont_inv_order_ctx, ctx->s, ctx->s, ctx->tmp);
         if (err == MP_OKAY) {
-            ctx->state = 3;
+            ctx->state = 4;
         }
         break;
-    case 3: /* NORMS3 */
+    case 4: /* NORMS3 */
         sp_256_mont_mul_order_10(ctx->u1, ctx->u1, ctx->s);
         ctx->state = 4;
         break;
-    case 4: /* NORMS4 */
+    case 5: /* NORMS4 */
         sp_256_mont_mul_order_10(ctx->u2, ctx->u2, ctx->s);
         XMEMSET(&ctx->mulmod_ctx, 0, sizeof(ctx->mulmod_ctx));
-        ctx->state = 5;
+        ctx->state = 6;
         break;
-    case 5: /* MULBASE */
+    case 6: /* MULBASE */
         err = sp_256_ecc_mulmod_10_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, &ctx->p1, &p256_base, ctx->u1, 0, 0, heap);
         if (err == MP_OKAY) {
             XMEMSET(&ctx->mulmod_ctx, 0, sizeof(ctx->mulmod_ctx));
-            ctx->state = 6;
-        }
-        break;
-    case 6: /* MULMOD */
-        err = sp_256_ecc_mulmod_10_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, &ctx->p2, &ctx->p2, ctx->u2, 0, 0, heap);
-        if (err == MP_OKAY) {
-            XMEMSET(&ctx->add_ctx, 0, sizeof(ctx->add_ctx));
             ctx->state = 7;
         }
         break;
-    case 7: /* ADD */
+    case 7: /* MULMOD */
+        err = sp_256_ecc_mulmod_10_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, &ctx->p2, &ctx->p2, ctx->u2, 0, 0, heap);
+        if (err == MP_OKAY) {
+            XMEMSET(&ctx->add_ctx, 0, sizeof(ctx->add_ctx));
+            ctx->state = 8;
+        }
+        break;
+    case 8: /* ADD */
         err = sp_256_proj_point_add_10_nb((sp_ecc_ctx_t*)&ctx->add_ctx, &ctx->p1, &ctx->p1, &ctx->p2, ctx->tmp);
         if (err == MP_OKAY)
-            ctx->state = 8;
+            ctx->state = 9;
         break;
-    case 8: /* DBLPREP */
+    case 9: /* DBLPREP */
         if (sp_256_iszero_10(ctx->p1.z)) {
             if (sp_256_iszero_10(ctx->p1.x) && sp_256_iszero_10(ctx->p1.y)) {
                 XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx));
-                ctx->state = 9;
+                ctx->state = 10;
                 break;
             }
             else {
@@ -18193,33 +18197,33 @@ int sp_ecc_verify_256_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen,
                 XMEMCPY(ctx->p1.z, p256_norm_mod, sizeof(p256_norm_mod));
             }
         }
-        ctx->state = 10;
+        ctx->state = 11;
         break;
-    case 9: /* DBL */
+    case 10: /* DBL */
         err = sp_256_proj_point_dbl_10_nb((sp_ecc_ctx_t*)&ctx->dbl_ctx, &ctx->p1, 
             &ctx->p2, ctx->tmp);
         if (err == MP_OKAY) {
-            ctx->state = 10;
+            ctx->state = 11;
         }
         break;
-    case 10: /* MONT */
+    case 11: /* MONT */
         /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */
         /* Reload r and convert to Montgomery form. */
         sp_256_from_mp(ctx->u2, 10, r);
         err = sp_256_mod_mul_norm_10(ctx->u2, ctx->u2, p256_mod);
         if (err == MP_OKAY)
-            ctx->state = 11;
+            ctx->state = 12;
         break;
-    case 11: /* SQR */
+    case 12: /* SQR */
         /* u1 = r.z'.z' mod prime */
         sp_256_mont_sqr_10(ctx->p1.z, ctx->p1.z, p256_mod, p256_mp_mod);
-        ctx->state = 12;
-        break;
-    case 12: /* MUL */
-        sp_256_mont_mul_10(ctx->u1, ctx->u2, ctx->p1.z, p256_mod, p256_mp_mod);
         ctx->state = 13;
         break;
-    case 13: /* RES */
+    case 13: /* MUL */
+        sp_256_mont_mul_10(ctx->u1, ctx->u2, ctx->p1.z, p256_mod, p256_mp_mod);
+        ctx->state = 14;
+        break;
+    case 14: /* RES */
         err = MP_OKAY; /* math okay, now check result */
         *res = (int)(sp_256_cmp_10(ctx->p1.x, ctx->u1) == 0);
         if (*res == 0) {
@@ -18250,7 +18254,7 @@ int sp_ecc_verify_256_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen,
         break;
     }
 
-    if (err == MP_OKAY && ctx->state != 13) {
+    if (err == MP_OKAY && ctx->state != 14) {
         err = FP_WOULDBLOCK;
     }
 
@@ -24978,15 +24982,16 @@ static int sp_384_mont_inv_order_15_nb(sp_ecc_ctx_t* sp_ctx, sp_digit* r, const
         break;
     case 1:
         sp_384_mont_sqr_order_15(t, t);
+        ctx->state = 2;
+        break;
+    case 2:
         if ((p384_order_minus_2[ctx->i / 32] & ((sp_int_digit)1 << (ctx->i % 32))) != 0) {
             sp_384_mont_mul_order_15(t, t, a);
         }
         ctx->i--;
-        if (ctx->i == 0) {
-            ctx->state = 2;
-        }
+        ctx->state = (ctx->i == 0) ? 3 : 1;
         break;
-    case 2:
+    case 3:
         XMEMCPY(r, t, sizeof(sp_digit) * 15U);
         err = MP_OKAY;
         break;
@@ -25449,55 +25454,58 @@ int sp_ecc_verify_384_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen,
         sp_384_from_mp(ctx->p2.x, 15, pX);
         sp_384_from_mp(ctx->p2.y, 15, pY);
         sp_384_from_mp(ctx->p2.z, 15, pZ);
+        ctx->state = 1;
+        break;
+    case 1: /* NORMS0 */
         sp_384_mul_15(ctx->s, ctx->s, p384_norm_order);
         err = sp_384_mod_15(ctx->s, ctx->s, p384_order);
         if (err == MP_OKAY)
-            ctx->state = 1;
+            ctx->state = 2;
         break;
-    case 1: /* NORMS1 */
+    case 2: /* NORMS1 */
         sp_384_norm_15(ctx->s);
         XMEMSET(&ctx->mont_inv_order_ctx, 0, sizeof(ctx->mont_inv_order_ctx));
-        ctx->state = 2;
+        ctx->state = 3;
         break;
-    case 2: /* NORMS2 */
+    case 3: /* NORMS2 */
         err = sp_384_mont_inv_order_15_nb((sp_ecc_ctx_t*)&ctx->mont_inv_order_ctx, ctx->s, ctx->s, ctx->tmp);
         if (err == MP_OKAY) {
-            ctx->state = 3;
+            ctx->state = 4;
         }
         break;
-    case 3: /* NORMS3 */
+    case 4: /* NORMS3 */
         sp_384_mont_mul_order_15(ctx->u1, ctx->u1, ctx->s);
         ctx->state = 4;
         break;
-    case 4: /* NORMS4 */
+    case 5: /* NORMS4 */
         sp_384_mont_mul_order_15(ctx->u2, ctx->u2, ctx->s);
         XMEMSET(&ctx->mulmod_ctx, 0, sizeof(ctx->mulmod_ctx));
-        ctx->state = 5;
+        ctx->state = 6;
         break;
-    case 5: /* MULBASE */
+    case 6: /* MULBASE */
         err = sp_384_ecc_mulmod_15_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, &ctx->p1, &p384_base, ctx->u1, 0, 0, heap);
         if (err == MP_OKAY) {
             XMEMSET(&ctx->mulmod_ctx, 0, sizeof(ctx->mulmod_ctx));
-            ctx->state = 6;
-        }
-        break;
-    case 6: /* MULMOD */
-        err = sp_384_ecc_mulmod_15_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, &ctx->p2, &ctx->p2, ctx->u2, 0, 0, heap);
-        if (err == MP_OKAY) {
-            XMEMSET(&ctx->add_ctx, 0, sizeof(ctx->add_ctx));
             ctx->state = 7;
         }
         break;
-    case 7: /* ADD */
+    case 7: /* MULMOD */
+        err = sp_384_ecc_mulmod_15_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, &ctx->p2, &ctx->p2, ctx->u2, 0, 0, heap);
+        if (err == MP_OKAY) {
+            XMEMSET(&ctx->add_ctx, 0, sizeof(ctx->add_ctx));
+            ctx->state = 8;
+        }
+        break;
+    case 8: /* ADD */
         err = sp_384_proj_point_add_15_nb((sp_ecc_ctx_t*)&ctx->add_ctx, &ctx->p1, &ctx->p1, &ctx->p2, ctx->tmp);
         if (err == MP_OKAY)
-            ctx->state = 8;
+            ctx->state = 9;
         break;
-    case 8: /* DBLPREP */
+    case 9: /* DBLPREP */
         if (sp_384_iszero_15(ctx->p1.z)) {
             if (sp_384_iszero_15(ctx->p1.x) && sp_384_iszero_15(ctx->p1.y)) {
                 XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx));
-                ctx->state = 9;
+                ctx->state = 10;
                 break;
             }
             else {
@@ -25509,33 +25517,33 @@ int sp_ecc_verify_384_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen,
                 XMEMCPY(ctx->p1.z, p384_norm_mod, sizeof(p384_norm_mod));
             }
         }
-        ctx->state = 10;
+        ctx->state = 11;
         break;
-    case 9: /* DBL */
+    case 10: /* DBL */
         err = sp_384_proj_point_dbl_15_nb((sp_ecc_ctx_t*)&ctx->dbl_ctx, &ctx->p1, 
             &ctx->p2, ctx->tmp);
         if (err == MP_OKAY) {
-            ctx->state = 10;
+            ctx->state = 11;
         }
         break;
-    case 10: /* MONT */
+    case 11: /* MONT */
         /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */
         /* Reload r and convert to Montgomery form. */
         sp_384_from_mp(ctx->u2, 15, r);
         err = sp_384_mod_mul_norm_15(ctx->u2, ctx->u2, p384_mod);
         if (err == MP_OKAY)
-            ctx->state = 11;
+            ctx->state = 12;
         break;
-    case 11: /* SQR */
+    case 12: /* SQR */
         /* u1 = r.z'.z' mod prime */
         sp_384_mont_sqr_15(ctx->p1.z, ctx->p1.z, p384_mod, p384_mp_mod);
-        ctx->state = 12;
-        break;
-    case 12: /* MUL */
-        sp_384_mont_mul_15(ctx->u1, ctx->u2, ctx->p1.z, p384_mod, p384_mp_mod);
         ctx->state = 13;
         break;
-    case 13: /* RES */
+    case 13: /* MUL */
+        sp_384_mont_mul_15(ctx->u1, ctx->u2, ctx->p1.z, p384_mod, p384_mp_mod);
+        ctx->state = 14;
+        break;
+    case 14: /* RES */
         err = MP_OKAY; /* math okay, now check result */
         *res = (int)(sp_384_cmp_15(ctx->p1.x, ctx->u1) == 0);
         if (*res == 0) {
@@ -25566,7 +25574,7 @@ int sp_ecc_verify_384_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen,
         break;
     }
 
-    if (err == MP_OKAY && ctx->state != 13) {
+    if (err == MP_OKAY && ctx->state != 14) {
         err = FP_WOULDBLOCK;
     }
 
diff --git a/wolfcrypt/src/sp_c64.c b/wolfcrypt/src/sp_c64.c
index 8c0034bfb..70afc97fd 100644
--- a/wolfcrypt/src/sp_c64.c
+++ b/wolfcrypt/src/sp_c64.c
@@ -17374,15 +17374,16 @@ static int sp_256_mont_inv_order_5_nb(sp_ecc_ctx_t* sp_ctx, sp_digit* r, const s
         break;
     case 1:
         sp_256_mont_sqr_order_5(t, t);
+        ctx->state = 2;
+        break;
+    case 2:
         if ((p256_order_minus_2[ctx->i / 64] & ((sp_int_digit)1 << (ctx->i % 64))) != 0) {
             sp_256_mont_mul_order_5(t, t, a);
         }
         ctx->i--;
-        if (ctx->i == 0) {
-            ctx->state = 2;
-        }
+        ctx->state = (ctx->i == 0) ? 3 : 1;
         break;
-    case 2:
+    case 3:
         XMEMCPY(r, t, sizeof(sp_digit) * 5U);
         err = MP_OKAY;
         break;
@@ -17874,55 +17875,58 @@ int sp_ecc_verify_256_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen,
         sp_256_from_mp(ctx->p2.x, 5, pX);
         sp_256_from_mp(ctx->p2.y, 5, pY);
         sp_256_from_mp(ctx->p2.z, 5, pZ);
+        ctx->state = 1;
+        break;
+    case 1: /* NORMS0 */
         sp_256_mul_5(ctx->s, ctx->s, p256_norm_order);
         err = sp_256_mod_5(ctx->s, ctx->s, p256_order);
         if (err == MP_OKAY)
-            ctx->state = 1;
+            ctx->state = 2;
         break;
-    case 1: /* NORMS1 */
+    case 2: /* NORMS1 */
         sp_256_norm_5(ctx->s);
         XMEMSET(&ctx->mont_inv_order_ctx, 0, sizeof(ctx->mont_inv_order_ctx));
-        ctx->state = 2;
+        ctx->state = 3;
         break;
-    case 2: /* NORMS2 */
+    case 3: /* NORMS2 */
         err = sp_256_mont_inv_order_5_nb((sp_ecc_ctx_t*)&ctx->mont_inv_order_ctx, ctx->s, ctx->s, ctx->tmp);
         if (err == MP_OKAY) {
-            ctx->state = 3;
+            ctx->state = 4;
         }
         break;
-    case 3: /* NORMS3 */
+    case 4: /* NORMS3 */
         sp_256_mont_mul_order_5(ctx->u1, ctx->u1, ctx->s);
         ctx->state = 4;
         break;
-    case 4: /* NORMS4 */
+    case 5: /* NORMS4 */
         sp_256_mont_mul_order_5(ctx->u2, ctx->u2, ctx->s);
         XMEMSET(&ctx->mulmod_ctx, 0, sizeof(ctx->mulmod_ctx));
-        ctx->state = 5;
+        ctx->state = 6;
         break;
-    case 5: /* MULBASE */
+    case 6: /* MULBASE */
         err = sp_256_ecc_mulmod_5_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, &ctx->p1, &p256_base, ctx->u1, 0, 0, heap);
         if (err == MP_OKAY) {
             XMEMSET(&ctx->mulmod_ctx, 0, sizeof(ctx->mulmod_ctx));
-            ctx->state = 6;
-        }
-        break;
-    case 6: /* MULMOD */
-        err = sp_256_ecc_mulmod_5_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, &ctx->p2, &ctx->p2, ctx->u2, 0, 0, heap);
-        if (err == MP_OKAY) {
-            XMEMSET(&ctx->add_ctx, 0, sizeof(ctx->add_ctx));
             ctx->state = 7;
         }
         break;
-    case 7: /* ADD */
+    case 7: /* MULMOD */
+        err = sp_256_ecc_mulmod_5_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, &ctx->p2, &ctx->p2, ctx->u2, 0, 0, heap);
+        if (err == MP_OKAY) {
+            XMEMSET(&ctx->add_ctx, 0, sizeof(ctx->add_ctx));
+            ctx->state = 8;
+        }
+        break;
+    case 8: /* ADD */
         err = sp_256_proj_point_add_5_nb((sp_ecc_ctx_t*)&ctx->add_ctx, &ctx->p1, &ctx->p1, &ctx->p2, ctx->tmp);
         if (err == MP_OKAY)
-            ctx->state = 8;
+            ctx->state = 9;
         break;
-    case 8: /* DBLPREP */
+    case 9: /* DBLPREP */
         if (sp_256_iszero_5(ctx->p1.z)) {
             if (sp_256_iszero_5(ctx->p1.x) && sp_256_iszero_5(ctx->p1.y)) {
                 XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx));
-                ctx->state = 9;
+                ctx->state = 10;
                 break;
             }
             else {
@@ -17934,33 +17938,33 @@ int sp_ecc_verify_256_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen,
                 XMEMCPY(ctx->p1.z, p256_norm_mod, sizeof(p256_norm_mod));
             }
         }
-        ctx->state = 10;
+        ctx->state = 11;
         break;
-    case 9: /* DBL */
+    case 10: /* DBL */
         err = sp_256_proj_point_dbl_5_nb((sp_ecc_ctx_t*)&ctx->dbl_ctx, &ctx->p1, 
             &ctx->p2, ctx->tmp);
         if (err == MP_OKAY) {
-            ctx->state = 10;
+            ctx->state = 11;
         }
         break;
-    case 10: /* MONT */
+    case 11: /* MONT */
         /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */
         /* Reload r and convert to Montgomery form. */
         sp_256_from_mp(ctx->u2, 5, r);
         err = sp_256_mod_mul_norm_5(ctx->u2, ctx->u2, p256_mod);
         if (err == MP_OKAY)
-            ctx->state = 11;
+            ctx->state = 12;
         break;
-    case 11: /* SQR */
+    case 12: /* SQR */
         /* u1 = r.z'.z' mod prime */
         sp_256_mont_sqr_5(ctx->p1.z, ctx->p1.z, p256_mod, p256_mp_mod);
-        ctx->state = 12;
-        break;
-    case 12: /* MUL */
-        sp_256_mont_mul_5(ctx->u1, ctx->u2, ctx->p1.z, p256_mod, p256_mp_mod);
         ctx->state = 13;
         break;
-    case 13: /* RES */
+    case 13: /* MUL */
+        sp_256_mont_mul_5(ctx->u1, ctx->u2, ctx->p1.z, p256_mod, p256_mp_mod);
+        ctx->state = 14;
+        break;
+    case 14: /* RES */
         err = MP_OKAY; /* math okay, now check result */
         *res = (int)(sp_256_cmp_5(ctx->p1.x, ctx->u1) == 0);
         if (*res == 0) {
@@ -17991,7 +17995,7 @@ int sp_ecc_verify_256_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen,
         break;
     }
 
-    if (err == MP_OKAY && ctx->state != 13) {
+    if (err == MP_OKAY && ctx->state != 14) {
         err = FP_WOULDBLOCK;
     }
 
@@ -24193,15 +24197,16 @@ static int sp_384_mont_inv_order_7_nb(sp_ecc_ctx_t* sp_ctx, sp_digit* r, const s
         break;
     case 1:
         sp_384_mont_sqr_order_7(t, t);
+        ctx->state = 2;
+        break;
+    case 2:
         if ((p384_order_minus_2[ctx->i / 64] & ((sp_int_digit)1 << (ctx->i % 64))) != 0) {
             sp_384_mont_mul_order_7(t, t, a);
         }
         ctx->i--;
-        if (ctx->i == 0) {
-            ctx->state = 2;
-        }
+        ctx->state = (ctx->i == 0) ? 3 : 1;
         break;
-    case 2:
+    case 3:
         XMEMCPY(r, t, sizeof(sp_digit) * 7U);
         err = MP_OKAY;
         break;
@@ -24664,55 +24669,58 @@ int sp_ecc_verify_384_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen,
         sp_384_from_mp(ctx->p2.x, 7, pX);
         sp_384_from_mp(ctx->p2.y, 7, pY);
         sp_384_from_mp(ctx->p2.z, 7, pZ);
+        ctx->state = 1;
+        break;
+    case 1: /* NORMS0 */
         sp_384_mul_7(ctx->s, ctx->s, p384_norm_order);
         err = sp_384_mod_7(ctx->s, ctx->s, p384_order);
         if (err == MP_OKAY)
-            ctx->state = 1;
+            ctx->state = 2;
         break;
-    case 1: /* NORMS1 */
+    case 2: /* NORMS1 */
         sp_384_norm_7(ctx->s);
         XMEMSET(&ctx->mont_inv_order_ctx, 0, sizeof(ctx->mont_inv_order_ctx));
-        ctx->state = 2;
+        ctx->state = 3;
         break;
-    case 2: /* NORMS2 */
+    case 3: /* NORMS2 */
         err = sp_384_mont_inv_order_7_nb((sp_ecc_ctx_t*)&ctx->mont_inv_order_ctx, ctx->s, ctx->s, ctx->tmp);
         if (err == MP_OKAY) {
-            ctx->state = 3;
+            ctx->state = 4;
         }
         break;
-    case 3: /* NORMS3 */
+    case 4: /* NORMS3 */
         sp_384_mont_mul_order_7(ctx->u1, ctx->u1, ctx->s);
         ctx->state = 4;
         break;
-    case 4: /* NORMS4 */
+    case 5: /* NORMS4 */
         sp_384_mont_mul_order_7(ctx->u2, ctx->u2, ctx->s);
         XMEMSET(&ctx->mulmod_ctx, 0, sizeof(ctx->mulmod_ctx));
-        ctx->state = 5;
+        ctx->state = 6;
         break;
-    case 5: /* MULBASE */
+    case 6: /* MULBASE */
         err = sp_384_ecc_mulmod_7_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, &ctx->p1, &p384_base, ctx->u1, 0, 0, heap);
         if (err == MP_OKAY) {
             XMEMSET(&ctx->mulmod_ctx, 0, sizeof(ctx->mulmod_ctx));
-            ctx->state = 6;
-        }
-        break;
-    case 6: /* MULMOD */
-        err = sp_384_ecc_mulmod_7_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, &ctx->p2, &ctx->p2, ctx->u2, 0, 0, heap);
-        if (err == MP_OKAY) {
-            XMEMSET(&ctx->add_ctx, 0, sizeof(ctx->add_ctx));
             ctx->state = 7;
         }
         break;
-    case 7: /* ADD */
+    case 7: /* MULMOD */
+        err = sp_384_ecc_mulmod_7_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, &ctx->p2, &ctx->p2, ctx->u2, 0, 0, heap);
+        if (err == MP_OKAY) {
+            XMEMSET(&ctx->add_ctx, 0, sizeof(ctx->add_ctx));
+            ctx->state = 8;
+        }
+        break;
+    case 8: /* ADD */
         err = sp_384_proj_point_add_7_nb((sp_ecc_ctx_t*)&ctx->add_ctx, &ctx->p1, &ctx->p1, &ctx->p2, ctx->tmp);
         if (err == MP_OKAY)
-            ctx->state = 8;
+            ctx->state = 9;
         break;
-    case 8: /* DBLPREP */
+    case 9: /* DBLPREP */
         if (sp_384_iszero_7(ctx->p1.z)) {
             if (sp_384_iszero_7(ctx->p1.x) && sp_384_iszero_7(ctx->p1.y)) {
                 XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx));
-                ctx->state = 9;
+                ctx->state = 10;
                 break;
             }
             else {
@@ -24724,33 +24732,33 @@ int sp_ecc_verify_384_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen,
                 XMEMCPY(ctx->p1.z, p384_norm_mod, sizeof(p384_norm_mod));
             }
         }
-        ctx->state = 10;
+        ctx->state = 11;
         break;
-    case 9: /* DBL */
+    case 10: /* DBL */
         err = sp_384_proj_point_dbl_7_nb((sp_ecc_ctx_t*)&ctx->dbl_ctx, &ctx->p1, 
             &ctx->p2, ctx->tmp);
         if (err == MP_OKAY) {
-            ctx->state = 10;
+            ctx->state = 11;
         }
         break;
-    case 10: /* MONT */
+    case 11: /* MONT */
         /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */
         /* Reload r and convert to Montgomery form. */
         sp_384_from_mp(ctx->u2, 7, r);
         err = sp_384_mod_mul_norm_7(ctx->u2, ctx->u2, p384_mod);
         if (err == MP_OKAY)
-            ctx->state = 11;
+            ctx->state = 12;
         break;
-    case 11: /* SQR */
+    case 12: /* SQR */
         /* u1 = r.z'.z' mod prime */
         sp_384_mont_sqr_7(ctx->p1.z, ctx->p1.z, p384_mod, p384_mp_mod);
-        ctx->state = 12;
-        break;
-    case 12: /* MUL */
-        sp_384_mont_mul_7(ctx->u1, ctx->u2, ctx->p1.z, p384_mod, p384_mp_mod);
         ctx->state = 13;
         break;
-    case 13: /* RES */
+    case 13: /* MUL */
+        sp_384_mont_mul_7(ctx->u1, ctx->u2, ctx->p1.z, p384_mod, p384_mp_mod);
+        ctx->state = 14;
+        break;
+    case 14: /* RES */
         err = MP_OKAY; /* math okay, now check result */
         *res = (int)(sp_384_cmp_7(ctx->p1.x, ctx->u1) == 0);
         if (*res == 0) {
@@ -24781,7 +24789,7 @@ int sp_ecc_verify_384_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen,
         break;
     }
 
-    if (err == MP_OKAY && ctx->state != 13) {
+    if (err == MP_OKAY && ctx->state != 14) {
         err = FP_WOULDBLOCK;
     }
 
diff --git a/wolfcrypt/src/sp_cortexm.c b/wolfcrypt/src/sp_cortexm.c
index b4bfc4692..c5ccef721 100644
--- a/wolfcrypt/src/sp_cortexm.c
+++ b/wolfcrypt/src/sp_cortexm.c
@@ -20667,15 +20667,16 @@ static int sp_256_mont_inv_order_8_nb(sp_ecc_ctx_t* sp_ctx, sp_digit* r, const s
         break;
     case 1:
         sp_256_mont_sqr_order_8(t, t);
+        ctx->state = 2;
+        break;
+    case 2:
         if ((p256_order_minus_2[ctx->i / 32] & ((sp_int_digit)1 << (ctx->i % 32))) != 0) {
             sp_256_mont_mul_order_8(t, t, a);
         }
         ctx->i--;
-        if (ctx->i == 0) {
-            ctx->state = 2;
-        }
+        ctx->state = (ctx->i == 0) ? 3 : 1;
         break;
-    case 2:
+    case 3:
         XMEMCPY(r, t, sizeof(sp_digit) * 8U);
         err = MP_OKAY;
         break;
@@ -21167,55 +21168,58 @@ int sp_ecc_verify_256_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen,
         sp_256_from_mp(ctx->p2.x, 8, pX);
         sp_256_from_mp(ctx->p2.y, 8, pY);
         sp_256_from_mp(ctx->p2.z, 8, pZ);
+        ctx->state = 1;
+        break;
+    case 1: /* NORMS0 */
         sp_256_mul_8(ctx->s, ctx->s, p256_norm_order);
         err = sp_256_mod_8(ctx->s, ctx->s, p256_order);
         if (err == MP_OKAY)
-            ctx->state = 1;
+            ctx->state = 2;
         break;
-    case 1: /* NORMS1 */
+    case 2: /* NORMS1 */
         sp_256_norm_8(ctx->s);
         XMEMSET(&ctx->mont_inv_order_ctx, 0, sizeof(ctx->mont_inv_order_ctx));
-        ctx->state = 2;
+        ctx->state = 3;
         break;
-    case 2: /* NORMS2 */
+    case 3: /* NORMS2 */
         err = sp_256_mont_inv_order_8_nb((sp_ecc_ctx_t*)&ctx->mont_inv_order_ctx, ctx->s, ctx->s, ctx->tmp);
         if (err == MP_OKAY) {
-            ctx->state = 3;
+            ctx->state = 4;
         }
         break;
-    case 3: /* NORMS3 */
+    case 4: /* NORMS3 */
         sp_256_mont_mul_order_8(ctx->u1, ctx->u1, ctx->s);
         ctx->state = 4;
         break;
-    case 4: /* NORMS4 */
+    case 5: /* NORMS4 */
         sp_256_mont_mul_order_8(ctx->u2, ctx->u2, ctx->s);
         XMEMSET(&ctx->mulmod_ctx, 0, sizeof(ctx->mulmod_ctx));
-        ctx->state = 5;
+        ctx->state = 6;
         break;
-    case 5: /* MULBASE */
+    case 6: /* MULBASE */
         err = sp_256_ecc_mulmod_8_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, &ctx->p1, &p256_base, ctx->u1, 0, 0, heap);
         if (err == MP_OKAY) {
             XMEMSET(&ctx->mulmod_ctx, 0, sizeof(ctx->mulmod_ctx));
-            ctx->state = 6;
-        }
-        break;
-    case 6: /* MULMOD */
-        err = sp_256_ecc_mulmod_8_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, &ctx->p2, &ctx->p2, ctx->u2, 0, 0, heap);
-        if (err == MP_OKAY) {
-            XMEMSET(&ctx->add_ctx, 0, sizeof(ctx->add_ctx));
             ctx->state = 7;
         }
         break;
-    case 7: /* ADD */
+    case 7: /* MULMOD */
+        err = sp_256_ecc_mulmod_8_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, &ctx->p2, &ctx->p2, ctx->u2, 0, 0, heap);
+        if (err == MP_OKAY) {
+            XMEMSET(&ctx->add_ctx, 0, sizeof(ctx->add_ctx));
+            ctx->state = 8;
+        }
+        break;
+    case 8: /* ADD */
         err = sp_256_proj_point_add_8_nb((sp_ecc_ctx_t*)&ctx->add_ctx, &ctx->p1, &ctx->p1, &ctx->p2, ctx->tmp);
         if (err == MP_OKAY)
-            ctx->state = 8;
+            ctx->state = 9;
         break;
-    case 8: /* DBLPREP */
+    case 9: /* DBLPREP */
         if (sp_256_iszero_8(ctx->p1.z)) {
             if (sp_256_iszero_8(ctx->p1.x) && sp_256_iszero_8(ctx->p1.y)) {
                 XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx));
-                ctx->state = 9;
+                ctx->state = 10;
                 break;
             }
             else {
@@ -21227,33 +21231,33 @@ int sp_ecc_verify_256_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen,
                 XMEMCPY(ctx->p1.z, p256_norm_mod, sizeof(p256_norm_mod));
             }
         }
-        ctx->state = 10;
+        ctx->state = 11;
         break;
-    case 9: /* DBL */
+    case 10: /* DBL */
         err = sp_256_proj_point_dbl_8_nb((sp_ecc_ctx_t*)&ctx->dbl_ctx, &ctx->p1, 
             &ctx->p2, ctx->tmp);
         if (err == MP_OKAY) {
-            ctx->state = 10;
+            ctx->state = 11;
         }
         break;
-    case 10: /* MONT */
+    case 11: /* MONT */
         /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */
         /* Reload r and convert to Montgomery form. */
         sp_256_from_mp(ctx->u2, 8, r);
         err = sp_256_mod_mul_norm_8(ctx->u2, ctx->u2, p256_mod);
         if (err == MP_OKAY)
-            ctx->state = 11;
+            ctx->state = 12;
         break;
-    case 11: /* SQR */
+    case 12: /* SQR */
         /* u1 = r.z'.z' mod prime */
         sp_256_mont_sqr_8(ctx->p1.z, ctx->p1.z, p256_mod, p256_mp_mod);
-        ctx->state = 12;
-        break;
-    case 12: /* MUL */
-        sp_256_mont_mul_8(ctx->u1, ctx->u2, ctx->p1.z, p256_mod, p256_mp_mod);
         ctx->state = 13;
         break;
-    case 13: /* RES */
+    case 13: /* MUL */
+        sp_256_mont_mul_8(ctx->u1, ctx->u2, ctx->p1.z, p256_mod, p256_mp_mod);
+        ctx->state = 14;
+        break;
+    case 14: /* RES */
         err = MP_OKAY; /* math okay, now check result */
         *res = (int)(sp_256_cmp_8(ctx->p1.x, ctx->u1) == 0);
         if (*res == 0) {
@@ -21284,7 +21288,7 @@ int sp_ecc_verify_256_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen,
         break;
     }
 
-    if (err == MP_OKAY && ctx->state != 13) {
+    if (err == MP_OKAY && ctx->state != 14) {
         err = FP_WOULDBLOCK;
     }
 
@@ -27215,15 +27219,16 @@ static int sp_384_mont_inv_order_12_nb(sp_ecc_ctx_t* sp_ctx, sp_digit* r, const
         break;
     case 1:
         sp_384_mont_sqr_order_12(t, t);
+        ctx->state = 2;
+        break;
+    case 2:
         if ((p384_order_minus_2[ctx->i / 32] & ((sp_int_digit)1 << (ctx->i % 32))) != 0) {
             sp_384_mont_mul_order_12(t, t, a);
         }
         ctx->i--;
-        if (ctx->i == 0) {
-            ctx->state = 2;
-        }
+        ctx->state = (ctx->i == 0) ? 3 : 1;
         break;
-    case 2:
+    case 3:
         XMEMCPY(r, t, sizeof(sp_digit) * 12U);
         err = MP_OKAY;
         break;
@@ -27686,55 +27691,58 @@ int sp_ecc_verify_384_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen,
         sp_384_from_mp(ctx->p2.x, 12, pX);
         sp_384_from_mp(ctx->p2.y, 12, pY);
         sp_384_from_mp(ctx->p2.z, 12, pZ);
+        ctx->state = 1;
+        break;
+    case 1: /* NORMS0 */
         sp_384_mul_12(ctx->s, ctx->s, p384_norm_order);
         err = sp_384_mod_12(ctx->s, ctx->s, p384_order);
         if (err == MP_OKAY)
-            ctx->state = 1;
+            ctx->state = 2;
         break;
-    case 1: /* NORMS1 */
+    case 2: /* NORMS1 */
         sp_384_norm_12(ctx->s);
         XMEMSET(&ctx->mont_inv_order_ctx, 0, sizeof(ctx->mont_inv_order_ctx));
-        ctx->state = 2;
+        ctx->state = 3;
         break;
-    case 2: /* NORMS2 */
+    case 3: /* NORMS2 */
         err = sp_384_mont_inv_order_12_nb((sp_ecc_ctx_t*)&ctx->mont_inv_order_ctx, ctx->s, ctx->s, ctx->tmp);
         if (err == MP_OKAY) {
-            ctx->state = 3;
+            ctx->state = 4;
         }
         break;
-    case 3: /* NORMS3 */
+    case 4: /* NORMS3 */
         sp_384_mont_mul_order_12(ctx->u1, ctx->u1, ctx->s);
         ctx->state = 4;
         break;
-    case 4: /* NORMS4 */
+    case 5: /* NORMS4 */
         sp_384_mont_mul_order_12(ctx->u2, ctx->u2, ctx->s);
         XMEMSET(&ctx->mulmod_ctx, 0, sizeof(ctx->mulmod_ctx));
-        ctx->state = 5;
+        ctx->state = 6;
         break;
-    case 5: /* MULBASE */
+    case 6: /* MULBASE */
         err = sp_384_ecc_mulmod_12_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, &ctx->p1, &p384_base, ctx->u1, 0, 0, heap);
         if (err == MP_OKAY) {
             XMEMSET(&ctx->mulmod_ctx, 0, sizeof(ctx->mulmod_ctx));
-            ctx->state = 6;
-        }
-        break;
-    case 6: /* MULMOD */
-        err = sp_384_ecc_mulmod_12_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, &ctx->p2, &ctx->p2, ctx->u2, 0, 0, heap);
-        if (err == MP_OKAY) {
-            XMEMSET(&ctx->add_ctx, 0, sizeof(ctx->add_ctx));
             ctx->state = 7;
         }
         break;
-    case 7: /* ADD */
+    case 7: /* MULMOD */
+        err = sp_384_ecc_mulmod_12_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, &ctx->p2, &ctx->p2, ctx->u2, 0, 0, heap);
+        if (err == MP_OKAY) {
+            XMEMSET(&ctx->add_ctx, 0, sizeof(ctx->add_ctx));
+            ctx->state = 8;
+        }
+        break;
+    case 8: /* ADD */
         err = sp_384_proj_point_add_12_nb((sp_ecc_ctx_t*)&ctx->add_ctx, &ctx->p1, &ctx->p1, &ctx->p2, ctx->tmp);
         if (err == MP_OKAY)
-            ctx->state = 8;
+            ctx->state = 9;
         break;
-    case 8: /* DBLPREP */
+    case 9: /* DBLPREP */
         if (sp_384_iszero_12(ctx->p1.z)) {
             if (sp_384_iszero_12(ctx->p1.x) && sp_384_iszero_12(ctx->p1.y)) {
                 XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx));
-                ctx->state = 9;
+                ctx->state = 10;
                 break;
             }
             else {
@@ -27746,33 +27754,33 @@ int sp_ecc_verify_384_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen,
                 XMEMCPY(ctx->p1.z, p384_norm_mod, sizeof(p384_norm_mod));
             }
         }
-        ctx->state = 10;
+        ctx->state = 11;
         break;
-    case 9: /* DBL */
+    case 10: /* DBL */
         err = sp_384_proj_point_dbl_12_nb((sp_ecc_ctx_t*)&ctx->dbl_ctx, &ctx->p1, 
             &ctx->p2, ctx->tmp);
         if (err == MP_OKAY) {
-            ctx->state = 10;
+            ctx->state = 11;
         }
         break;
-    case 10: /* MONT */
+    case 11: /* MONT */
         /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */
         /* Reload r and convert to Montgomery form. */
         sp_384_from_mp(ctx->u2, 12, r);
         err = sp_384_mod_mul_norm_12(ctx->u2, ctx->u2, p384_mod);
         if (err == MP_OKAY)
-            ctx->state = 11;
+            ctx->state = 12;
         break;
-    case 11: /* SQR */
+    case 12: /* SQR */
         /* u1 = r.z'.z' mod prime */
         sp_384_mont_sqr_12(ctx->p1.z, ctx->p1.z, p384_mod, p384_mp_mod);
-        ctx->state = 12;
-        break;
-    case 12: /* MUL */
-        sp_384_mont_mul_12(ctx->u1, ctx->u2, ctx->p1.z, p384_mod, p384_mp_mod);
         ctx->state = 13;
         break;
-    case 13: /* RES */
+    case 13: /* MUL */
+        sp_384_mont_mul_12(ctx->u1, ctx->u2, ctx->p1.z, p384_mod, p384_mp_mod);
+        ctx->state = 14;
+        break;
+    case 14: /* RES */
         err = MP_OKAY; /* math okay, now check result */
         *res = (int)(sp_384_cmp_12(ctx->p1.x, ctx->u1) == 0);
         if (*res == 0) {
@@ -27803,7 +27811,7 @@ int sp_ecc_verify_384_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen,
         break;
     }
 
-    if (err == MP_OKAY && ctx->state != 13) {
+    if (err == MP_OKAY && ctx->state != 14) {
         err = FP_WOULDBLOCK;
     }
 
diff --git a/wolfcrypt/src/sp_x86_64.c b/wolfcrypt/src/sp_x86_64.c
index 22e0e4476..fa0c43e4f 100644
--- a/wolfcrypt/src/sp_x86_64.c
+++ b/wolfcrypt/src/sp_x86_64.c
@@ -22529,15 +22529,16 @@ static int sp_256_mont_inv_order_4_nb(sp_ecc_ctx_t* sp_ctx, sp_digit* r, const s
         break;
     case 1:
         sp_256_mont_sqr_order_4(t, t);
+        ctx->state = 2;
+        break;
+    case 2:
         if ((p256_order_minus_2[ctx->i / 64] & ((sp_int_digit)1 << (ctx->i % 64))) != 0) {
             sp_256_mont_mul_order_4(t, t, a);
         }
         ctx->i--;
-        if (ctx->i == 0) {
-            ctx->state = 2;
-        }
+        ctx->state = (ctx->i == 0) ? 3 : 1;
         break;
-    case 2:
+    case 3:
         XMEMCPY(r, t, sizeof(sp_digit) * 4U);
         err = MP_OKAY;
         break;
@@ -22715,15 +22716,16 @@ static int sp_256_mont_inv_order_avx2_4_nb(sp_ecc_ctx_t* sp_ctx, sp_digit* r, co
         break;
     case 1:
         sp_256_mont_sqr_order_avx2_4(t, t);
+        ctx->state = 2;
+        break;
+    case 2:
         if ((p256_order_minus_2[ctx->i / 64] & ((sp_int_digit)1 << (ctx->i % 64))) != 0) {
             sp_256_mont_mul_order_avx2_4(t, t, a);
         }
         ctx->i--;
-        if (ctx->i == 0) {
-            ctx->state = 2;
-        }
+        ctx->state = (ctx->i == 0) ? 3 : 1;
         break;
-    case 2:
+    case 3:
         XMEMCPY(r, t, sizeof(sp_digit) * 4U);
         err = MP_OKAY;
         break;
@@ -23244,55 +23246,58 @@ int sp_ecc_verify_256_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen,
         sp_256_from_mp(ctx->p2.x, 4, pX);
         sp_256_from_mp(ctx->p2.y, 4, pY);
         sp_256_from_mp(ctx->p2.z, 4, pZ);
+        ctx->state = 1;
+        break;
+    case 1: /* NORMS0 */
         sp_256_mul_4(ctx->s, ctx->s, p256_norm_order);
         err = sp_256_mod_4(ctx->s, ctx->s, p256_order);
         if (err == MP_OKAY)
-            ctx->state = 1;
+            ctx->state = 2;
         break;
-    case 1: /* NORMS1 */
+    case 2: /* NORMS1 */
         sp_256_norm_4(ctx->s);
         XMEMSET(&ctx->mont_inv_order_ctx, 0, sizeof(ctx->mont_inv_order_ctx));
-        ctx->state = 2;
+        ctx->state = 3;
         break;
-    case 2: /* NORMS2 */
+    case 3: /* NORMS2 */
         err = sp_256_mont_inv_order_4_nb((sp_ecc_ctx_t*)&ctx->mont_inv_order_ctx, ctx->s, ctx->s, ctx->tmp);
         if (err == MP_OKAY) {
-            ctx->state = 3;
+            ctx->state = 4;
         }
         break;
-    case 3: /* NORMS3 */
+    case 4: /* NORMS3 */
         sp_256_mont_mul_order_4(ctx->u1, ctx->u1, ctx->s);
         ctx->state = 4;
         break;
-    case 4: /* NORMS4 */
+    case 5: /* NORMS4 */
         sp_256_mont_mul_order_4(ctx->u2, ctx->u2, ctx->s);
         XMEMSET(&ctx->mulmod_ctx, 0, sizeof(ctx->mulmod_ctx));
-        ctx->state = 5;
+        ctx->state = 6;
         break;
-    case 5: /* MULBASE */
+    case 6: /* MULBASE */
         err = sp_256_ecc_mulmod_4_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, &ctx->p1, &p256_base, ctx->u1, 0, 0, heap);
         if (err == MP_OKAY) {
             XMEMSET(&ctx->mulmod_ctx, 0, sizeof(ctx->mulmod_ctx));
-            ctx->state = 6;
-        }
-        break;
-    case 6: /* MULMOD */
-        err = sp_256_ecc_mulmod_4_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, &ctx->p2, &ctx->p2, ctx->u2, 0, 0, heap);
-        if (err == MP_OKAY) {
-            XMEMSET(&ctx->add_ctx, 0, sizeof(ctx->add_ctx));
             ctx->state = 7;
         }
         break;
-    case 7: /* ADD */
+    case 7: /* MULMOD */
+        err = sp_256_ecc_mulmod_4_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, &ctx->p2, &ctx->p2, ctx->u2, 0, 0, heap);
+        if (err == MP_OKAY) {
+            XMEMSET(&ctx->add_ctx, 0, sizeof(ctx->add_ctx));
+            ctx->state = 8;
+        }
+        break;
+    case 8: /* ADD */
         err = sp_256_proj_point_add_4_nb((sp_ecc_ctx_t*)&ctx->add_ctx, &ctx->p1, &ctx->p1, &ctx->p2, ctx->tmp);
         if (err == MP_OKAY)
-            ctx->state = 8;
+            ctx->state = 9;
         break;
-    case 8: /* DBLPREP */
+    case 9: /* DBLPREP */
         if (sp_256_iszero_4(ctx->p1.z)) {
             if (sp_256_iszero_4(ctx->p1.x) && sp_256_iszero_4(ctx->p1.y)) {
                 XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx));
-                ctx->state = 9;
+                ctx->state = 10;
                 break;
             }
             else {
@@ -23304,33 +23309,33 @@ int sp_ecc_verify_256_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen,
                 XMEMCPY(ctx->p1.z, p256_norm_mod, sizeof(p256_norm_mod));
             }
         }
-        ctx->state = 10;
+        ctx->state = 11;
         break;
-    case 9: /* DBL */
+    case 10: /* DBL */
         err = sp_256_proj_point_dbl_4_nb((sp_ecc_ctx_t*)&ctx->dbl_ctx, &ctx->p1, 
             &ctx->p2, ctx->tmp);
         if (err == MP_OKAY) {
-            ctx->state = 10;
+            ctx->state = 11;
         }
         break;
-    case 10: /* MONT */
+    case 11: /* MONT */
         /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */
         /* Reload r and convert to Montgomery form. */
         sp_256_from_mp(ctx->u2, 4, r);
         err = sp_256_mod_mul_norm_4(ctx->u2, ctx->u2, p256_mod);
         if (err == MP_OKAY)
-            ctx->state = 11;
+            ctx->state = 12;
         break;
-    case 11: /* SQR */
+    case 12: /* SQR */
         /* u1 = r.z'.z' mod prime */
         sp_256_mont_sqr_4(ctx->p1.z, ctx->p1.z, p256_mod, p256_mp_mod);
-        ctx->state = 12;
-        break;
-    case 12: /* MUL */
-        sp_256_mont_mul_4(ctx->u1, ctx->u2, ctx->p1.z, p256_mod, p256_mp_mod);
         ctx->state = 13;
         break;
-    case 13: /* RES */
+    case 13: /* MUL */
+        sp_256_mont_mul_4(ctx->u1, ctx->u2, ctx->p1.z, p256_mod, p256_mp_mod);
+        ctx->state = 14;
+        break;
+    case 14: /* RES */
         err = MP_OKAY; /* math okay, now check result */
         *res = (int)(sp_256_cmp_4(ctx->p1.x, ctx->u1) == 0);
         if (*res == 0) {
@@ -23361,7 +23366,7 @@ int sp_ecc_verify_256_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen,
         break;
     }
 
-    if (err == MP_OKAY && ctx->state != 13) {
+    if (err == MP_OKAY && ctx->state != 14) {
         err = FP_WOULDBLOCK;
     }
 
@@ -29573,15 +29578,16 @@ static int sp_384_mont_inv_order_6_nb(sp_ecc_ctx_t* sp_ctx, sp_digit* r, const s
         break;
     case 1:
         sp_384_mont_sqr_order_6(t, t);
+        ctx->state = 2;
+        break;
+    case 2:
         if ((p384_order_minus_2[ctx->i / 64] & ((sp_int_digit)1 << (ctx->i % 64))) != 0) {
             sp_384_mont_mul_order_6(t, t, a);
         }
         ctx->i--;
-        if (ctx->i == 0) {
-            ctx->state = 2;
-        }
+        ctx->state = (ctx->i == 0) ? 3 : 1;
         break;
-    case 2:
+    case 3:
         XMEMCPY(r, t, sizeof(sp_digit) * 6U);
         err = MP_OKAY;
         break;
@@ -29726,15 +29732,16 @@ static int sp_384_mont_inv_order_avx2_6_nb(sp_ecc_ctx_t* sp_ctx, sp_digit* r, co
         break;
     case 1:
         sp_384_mont_sqr_order_avx2_6(t, t);
+        ctx->state = 2;
+        break;
+    case 2:
         if ((p384_order_minus_2[ctx->i / 64] & ((sp_int_digit)1 << (ctx->i % 64))) != 0) {
             sp_384_mont_mul_order_avx2_6(t, t, a);
         }
         ctx->i--;
-        if (ctx->i == 0) {
-            ctx->state = 2;
-        }
+        ctx->state = (ctx->i == 0) ? 3 : 1;
         break;
-    case 2:
+    case 3:
         XMEMCPY(r, t, sizeof(sp_digit) * 6U);
         err = MP_OKAY;
         break;
@@ -30226,55 +30233,58 @@ int sp_ecc_verify_384_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen,
         sp_384_from_mp(ctx->p2.x, 6, pX);
         sp_384_from_mp(ctx->p2.y, 6, pY);
         sp_384_from_mp(ctx->p2.z, 6, pZ);
+        ctx->state = 1;
+        break;
+    case 1: /* NORMS0 */
         sp_384_mul_6(ctx->s, ctx->s, p384_norm_order);
         err = sp_384_mod_6(ctx->s, ctx->s, p384_order);
         if (err == MP_OKAY)
-            ctx->state = 1;
+            ctx->state = 2;
         break;
-    case 1: /* NORMS1 */
+    case 2: /* NORMS1 */
         sp_384_norm_6(ctx->s);
         XMEMSET(&ctx->mont_inv_order_ctx, 0, sizeof(ctx->mont_inv_order_ctx));
-        ctx->state = 2;
+        ctx->state = 3;
         break;
-    case 2: /* NORMS2 */
+    case 3: /* NORMS2 */
         err = sp_384_mont_inv_order_6_nb((sp_ecc_ctx_t*)&ctx->mont_inv_order_ctx, ctx->s, ctx->s, ctx->tmp);
         if (err == MP_OKAY) {
-            ctx->state = 3;
+            ctx->state = 4;
         }
         break;
-    case 3: /* NORMS3 */
+    case 4: /* NORMS3 */
         sp_384_mont_mul_order_6(ctx->u1, ctx->u1, ctx->s);
         ctx->state = 4;
         break;
-    case 4: /* NORMS4 */
+    case 5: /* NORMS4 */
         sp_384_mont_mul_order_6(ctx->u2, ctx->u2, ctx->s);
         XMEMSET(&ctx->mulmod_ctx, 0, sizeof(ctx->mulmod_ctx));
-        ctx->state = 5;
+        ctx->state = 6;
         break;
-    case 5: /* MULBASE */
+    case 6: /* MULBASE */
         err = sp_384_ecc_mulmod_6_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, &ctx->p1, &p384_base, ctx->u1, 0, 0, heap);
         if (err == MP_OKAY) {
             XMEMSET(&ctx->mulmod_ctx, 0, sizeof(ctx->mulmod_ctx));
-            ctx->state = 6;
-        }
-        break;
-    case 6: /* MULMOD */
-        err = sp_384_ecc_mulmod_6_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, &ctx->p2, &ctx->p2, ctx->u2, 0, 0, heap);
-        if (err == MP_OKAY) {
-            XMEMSET(&ctx->add_ctx, 0, sizeof(ctx->add_ctx));
             ctx->state = 7;
         }
         break;
-    case 7: /* ADD */
+    case 7: /* MULMOD */
+        err = sp_384_ecc_mulmod_6_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, &ctx->p2, &ctx->p2, ctx->u2, 0, 0, heap);
+        if (err == MP_OKAY) {
+            XMEMSET(&ctx->add_ctx, 0, sizeof(ctx->add_ctx));
+            ctx->state = 8;
+        }
+        break;
+    case 8: /* ADD */
         err = sp_384_proj_point_add_6_nb((sp_ecc_ctx_t*)&ctx->add_ctx, &ctx->p1, &ctx->p1, &ctx->p2, ctx->tmp);
         if (err == MP_OKAY)
-            ctx->state = 8;
+            ctx->state = 9;
         break;
-    case 8: /* DBLPREP */
+    case 9: /* DBLPREP */
         if (sp_384_iszero_6(ctx->p1.z)) {
             if (sp_384_iszero_6(ctx->p1.x) && sp_384_iszero_6(ctx->p1.y)) {
                 XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx));
-                ctx->state = 9;
+                ctx->state = 10;
                 break;
             }
             else {
@@ -30286,33 +30296,33 @@ int sp_ecc_verify_384_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen,
                 XMEMCPY(ctx->p1.z, p384_norm_mod, sizeof(p384_norm_mod));
             }
         }
-        ctx->state = 10;
+        ctx->state = 11;
         break;
-    case 9: /* DBL */
+    case 10: /* DBL */
         err = sp_384_proj_point_dbl_6_nb((sp_ecc_ctx_t*)&ctx->dbl_ctx, &ctx->p1, 
             &ctx->p2, ctx->tmp);
         if (err == MP_OKAY) {
-            ctx->state = 10;
+            ctx->state = 11;
         }
         break;
-    case 10: /* MONT */
+    case 11: /* MONT */
         /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */
         /* Reload r and convert to Montgomery form. */
         sp_384_from_mp(ctx->u2, 6, r);
         err = sp_384_mod_mul_norm_6(ctx->u2, ctx->u2, p384_mod);
         if (err == MP_OKAY)
-            ctx->state = 11;
+            ctx->state = 12;
         break;
-    case 11: /* SQR */
+    case 12: /* SQR */
         /* u1 = r.z'.z' mod prime */
         sp_384_mont_sqr_6(ctx->p1.z, ctx->p1.z, p384_mod, p384_mp_mod);
-        ctx->state = 12;
-        break;
-    case 12: /* MUL */
-        sp_384_mont_mul_6(ctx->u1, ctx->u2, ctx->p1.z, p384_mod, p384_mp_mod);
         ctx->state = 13;
         break;
-    case 13: /* RES */
+    case 13: /* MUL */
+        sp_384_mont_mul_6(ctx->u1, ctx->u2, ctx->p1.z, p384_mod, p384_mp_mod);
+        ctx->state = 14;
+        break;
+    case 14: /* RES */
         err = MP_OKAY; /* math okay, now check result */
         *res = (int)(sp_384_cmp_6(ctx->p1.x, ctx->u1) == 0);
         if (*res == 0) {
@@ -30343,7 +30353,7 @@ int sp_ecc_verify_384_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen,
         break;
     }
 
-    if (err == MP_OKAY && ctx->state != 13) {
+    if (err == MP_OKAY && ctx->state != 14) {
         err = FP_WOULDBLOCK;
     }