mirror of https://github.com/wolfSSL/wolfssl
Merge pull request #7706 from SparkiDev/kyber_thumb2_asm
Kyber ASM ARMv7E-M/ARMv7-M: added assembly code
This commit is contained in:
commit
afe5209427
|
@ -1192,6 +1192,15 @@ endif
|
|||
if BUILD_WC_KYBER
|
||||
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/wc_kyber.c
|
||||
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/wc_kyber_poly.c
|
||||
if BUILD_ARMASM
|
||||
if BUILD_ARM_THUMB
|
||||
if BUILD_ARMASM_INLINE
|
||||
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-kyber-asm_c.c
|
||||
else
|
||||
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-kyber-asm.S
|
||||
endif !BUILD_ARMASM_INLINE
|
||||
endif BUILD_ARM_THUMB
|
||||
endif BUILD_ARMASM
|
||||
if !BUILD_X86_ASM
|
||||
if BUILD_INTELASM
|
||||
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/wc_kyber_asm.S
|
||||
|
|
|
@ -1511,7 +1511,7 @@ fe_cmov_table:
|
|||
#endif /* WC_NO_CACHE_RESISTANT */
|
||||
#endif /* HAVE_ED25519_MAKE_KEY || HAVE_ED25519_SIGN */
|
||||
#endif /* HAVE_ED25519 */
|
||||
#ifdef WOLFSSL_SP_NO_UMAAL
|
||||
#ifdef WOLFSSL_ARM_ARCH_7M
|
||||
.text
|
||||
.align 4
|
||||
.globl fe_mul_op
|
||||
|
@ -2023,7 +2023,7 @@ fe_mul_op:
|
|||
POP {pc}
|
||||
/* Cycle Count = 239 */
|
||||
.size fe_mul_op,.-fe_mul_op
|
||||
#endif /* WOLFSSL_SP_NO_UMAAL */
|
||||
#endif /* WOLFSSL_ARM_ARCH_7M */
|
||||
.text
|
||||
.align 4
|
||||
.globl fe_mul
|
||||
|
@ -2034,7 +2034,7 @@ fe_mul:
|
|||
POP {r4, r5, r6, r7, r8, r9, r10, r11, pc}
|
||||
/* Cycle Count = 24 */
|
||||
.size fe_mul,.-fe_mul
|
||||
#ifdef WOLFSSL_SP_NO_UMAAL
|
||||
#ifdef WOLFSSL_ARM_ARCH_7M
|
||||
.text
|
||||
.align 4
|
||||
.globl fe_sq_op
|
||||
|
@ -2425,7 +2425,7 @@ fe_sq_op:
|
|||
POP {pc}
|
||||
/* Cycle Count = 179 */
|
||||
.size fe_sq_op,.-fe_sq_op
|
||||
#endif /* WOLFSSL_SP_NO_UMAAL */
|
||||
#endif /* WOLFSSL_ARM_ARCH_7M */
|
||||
.text
|
||||
.align 4
|
||||
.globl fe_sq
|
||||
|
@ -2437,7 +2437,7 @@ fe_sq:
|
|||
/* Cycle Count = 24 */
|
||||
.size fe_sq,.-fe_sq
|
||||
#ifdef HAVE_CURVE25519
|
||||
#ifdef WOLFSSL_SP_NO_UMAAL
|
||||
#ifdef WOLFSSL_ARM_ARCH_7M
|
||||
.text
|
||||
.align 4
|
||||
.globl fe_mul121666
|
||||
|
@ -2524,7 +2524,7 @@ fe_mul121666:
|
|||
POP {r4, r5, r6, r7, r8, r9, r10, r11, pc}
|
||||
/* Cycle Count = 69 */
|
||||
.size fe_mul121666,.-fe_mul121666
|
||||
#endif /* WOLFSSL_SP_NO_UMAAL */
|
||||
#endif /* WOLFSSL_ARM_ARCH_7M */
|
||||
#ifndef WC_NO_CACHE_RESISTANT
|
||||
.text
|
||||
.align 4
|
||||
|
@ -3466,7 +3466,7 @@ L_fe_invert8:
|
|||
POP {r4, r5, r6, r7, r8, r9, r10, r11, pc}
|
||||
/* Cycle Count = 292 */
|
||||
.size fe_invert,.-fe_invert
|
||||
#ifdef WOLFSSL_SP_NO_UMAAL
|
||||
#ifdef WOLFSSL_ARM_ARCH_7M
|
||||
.text
|
||||
.align 4
|
||||
.globl fe_sq2
|
||||
|
@ -3925,7 +3925,7 @@ fe_sq2:
|
|||
POP {pc}
|
||||
/* Cycle Count = 213 */
|
||||
.size fe_sq2,.-fe_sq2
|
||||
#endif /* WOLFSSL_SP_NO_UMAAL */
|
||||
#endif /* WOLFSSL_ARM_ARCH_7M */
|
||||
.text
|
||||
.align 4
|
||||
.globl fe_pow22523
|
||||
|
@ -4535,7 +4535,7 @@ ge_sub:
|
|||
POP {r4, r5, r6, r7, r8, r9, r10, r11, pc}
|
||||
/* Cycle Count = 138 */
|
||||
.size ge_sub,.-ge_sub
|
||||
#ifdef WOLFSSL_SP_NO_UMAAL
|
||||
#ifdef WOLFSSL_ARM_ARCH_7M
|
||||
.text
|
||||
.align 4
|
||||
.globl sc_reduce
|
||||
|
@ -5258,9 +5258,9 @@ sc_reduce:
|
|||
POP {r4, r5, r6, r7, r8, r9, r10, r11, pc}
|
||||
/* Cycle Count = 502 */
|
||||
.size sc_reduce,.-sc_reduce
|
||||
#endif /* WOLFSSL_SP_NO_UMAAL */
|
||||
#endif /* WOLFSSL_ARM_ARCH_7M */
|
||||
#ifdef HAVE_ED25519_SIGN
|
||||
#ifdef WOLFSSL_SP_NO_UMAAL
|
||||
#ifdef WOLFSSL_ARM_ARCH_7M
|
||||
.text
|
||||
.align 4
|
||||
.globl sc_muladd
|
||||
|
@ -6470,7 +6470,7 @@ sc_muladd:
|
|||
POP {r4, r5, r6, r7, r8, r9, r10, r11, pc}
|
||||
/* Cycle Count = 752 */
|
||||
.size sc_muladd,.-sc_muladd
|
||||
#endif /* WOLFSSL_SP_NO_UMAAL */
|
||||
#endif /* WOLFSSL_ARM_ARCH_7M */
|
||||
#endif /* HAVE_ED25519_SIGN */
|
||||
#endif /* HAVE_ED25519 */
|
||||
|
||||
|
|
|
@ -1667,7 +1667,7 @@ void fe_cmov_table(fe* r, fe* base, signed char b)
|
|||
#endif /* WC_NO_CACHE_RESISTANT */
|
||||
#endif /* HAVE_ED25519_MAKE_KEY || HAVE_ED25519_SIGN */
|
||||
#endif /* HAVE_ED25519 */
|
||||
#ifdef WOLFSSL_SP_NO_UMAAL
|
||||
#ifdef WOLFSSL_ARM_ARCH_7M
|
||||
void fe_mul_op(void);
|
||||
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
|
||||
void fe_mul_op()
|
||||
|
@ -2193,7 +2193,7 @@ void fe_mul_op()
|
|||
);
|
||||
}
|
||||
|
||||
#endif /* WOLFSSL_SP_NO_UMAAL */
|
||||
#endif /* WOLFSSL_ARM_ARCH_7M */
|
||||
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
|
||||
void fe_mul(fe r_p, const fe a_p, const fe b_p)
|
||||
#else
|
||||
|
@ -2214,7 +2214,7 @@ void fe_mul(fe r, const fe a, const fe b)
|
|||
);
|
||||
}
|
||||
|
||||
#ifdef WOLFSSL_SP_NO_UMAAL
|
||||
#ifdef WOLFSSL_ARM_ARCH_7M
|
||||
void fe_sq_op(void);
|
||||
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
|
||||
void fe_sq_op()
|
||||
|
@ -2619,7 +2619,7 @@ void fe_sq_op()
|
|||
);
|
||||
}
|
||||
|
||||
#endif /* WOLFSSL_SP_NO_UMAAL */
|
||||
#endif /* WOLFSSL_ARM_ARCH_7M */
|
||||
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
|
||||
void fe_sq(fe r_p, const fe a_p)
|
||||
#else
|
||||
|
@ -2640,7 +2640,7 @@ void fe_sq(fe r, const fe a)
|
|||
}
|
||||
|
||||
#ifdef HAVE_CURVE25519
|
||||
#ifdef WOLFSSL_SP_NO_UMAAL
|
||||
#ifdef WOLFSSL_ARM_ARCH_7M
|
||||
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
|
||||
void fe_mul121666(fe r_p, fe a_p)
|
||||
#else
|
||||
|
@ -2745,7 +2745,7 @@ void fe_mul121666(fe r, fe a)
|
|||
);
|
||||
}
|
||||
|
||||
#endif /* WOLFSSL_SP_NO_UMAAL */
|
||||
#endif /* WOLFSSL_ARM_ARCH_7M */
|
||||
#ifndef WC_NO_CACHE_RESISTANT
|
||||
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
|
||||
int curve25519(byte* r_p, const byte* n_p, const byte* a_p)
|
||||
|
@ -3907,7 +3907,7 @@ void fe_invert(fe r, const fe a)
|
|||
);
|
||||
}
|
||||
|
||||
#ifdef WOLFSSL_SP_NO_UMAAL
|
||||
#ifdef WOLFSSL_ARM_ARCH_7M
|
||||
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
|
||||
void fe_sq2(fe r_p, const fe a_p)
|
||||
#else
|
||||
|
@ -4384,7 +4384,7 @@ void fe_sq2(fe r, const fe a)
|
|||
);
|
||||
}
|
||||
|
||||
#endif /* WOLFSSL_SP_NO_UMAAL */
|
||||
#endif /* WOLFSSL_ARM_ARCH_7M */
|
||||
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
|
||||
void fe_pow22523(fe r_p, const fe a_p)
|
||||
#else
|
||||
|
@ -5126,7 +5126,7 @@ void ge_sub(ge_p1p1 * r, const ge_p3 * p, const ge_cached* q)
|
|||
);
|
||||
}
|
||||
|
||||
#ifdef WOLFSSL_SP_NO_UMAAL
|
||||
#ifdef WOLFSSL_ARM_ARCH_7M
|
||||
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
|
||||
void sc_reduce(byte* s_p)
|
||||
#else
|
||||
|
@ -5865,9 +5865,9 @@ void sc_reduce(byte* s)
|
|||
);
|
||||
}
|
||||
|
||||
#endif /* WOLFSSL_SP_NO_UMAAL */
|
||||
#endif /* WOLFSSL_ARM_ARCH_7M */
|
||||
#ifdef HAVE_ED25519_SIGN
|
||||
#ifdef WOLFSSL_SP_NO_UMAAL
|
||||
#ifdef WOLFSSL_ARM_ARCH_7M
|
||||
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
|
||||
void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p)
|
||||
#else
|
||||
|
@ -7099,7 +7099,7 @@ void sc_muladd(byte* s, const byte* a, const byte* b, const byte* c)
|
|||
);
|
||||
}
|
||||
|
||||
#endif /* WOLFSSL_SP_NO_UMAAL */
|
||||
#endif /* WOLFSSL_ARM_ARCH_7M */
|
||||
#endif /* HAVE_ED25519_SIGN */
|
||||
#endif /* HAVE_ED25519 */
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -67,17 +67,17 @@ L_poly1305_thumb2_16_loop:
|
|||
ADCS r7, r7, r10
|
||||
ADD r1, r1, #0x10
|
||||
ADC r8, r8, r11
|
||||
#ifdef WOLFSSL_SP_NO_UMAAL
|
||||
#ifdef WOLFSSL_ARM_ARCH_7M
|
||||
STM lr, {r4, r5, r6, r7, r8}
|
||||
#else
|
||||
/* h[0]-h[2] in r4-r6 for multiplication. */
|
||||
STR r7, [lr, #12]
|
||||
STR r8, [lr, #16]
|
||||
#endif /* WOLFSSL_SP_NO_UMAAL */
|
||||
#endif /* WOLFSSL_ARM_ARCH_7M */
|
||||
STR r1, [sp, #16]
|
||||
LDR r1, [sp, #12]
|
||||
/* Multiply h by r */
|
||||
#ifdef WOLFSSL_SP_NO_UMAAL
|
||||
#ifdef WOLFSSL_ARM_ARCH_7M
|
||||
/* r0 = #0, r1 = r, lr = h, r2 = h[j], r3 = r[i] */
|
||||
LDR r3, [r1]
|
||||
EOR r0, r0, r0
|
||||
|
@ -218,7 +218,7 @@ L_poly1305_thumb2_16_loop:
|
|||
UMAAL r11, r12, r3, r5
|
||||
/* DONE */
|
||||
LDM sp, {r4, r5, r6}
|
||||
#endif /* WOLFSSL_SP_NO_UMAAL */
|
||||
#endif /* WOLFSSL_ARM_ARCH_7M */
|
||||
/* r12 will be zero because r is masked. */
|
||||
/* Load length */
|
||||
LDR r2, [sp, #20]
|
||||
|
|
|
@ -93,17 +93,17 @@ void poly1305_blocks_thumb2_16(Poly1305* ctx, const byte* m, word32 len, int not
|
|||
"ADCS r7, r7, r10\n\t"
|
||||
"ADD %[m], %[m], #0x10\n\t"
|
||||
"ADC r8, r8, r11\n\t"
|
||||
#ifdef WOLFSSL_SP_NO_UMAAL
|
||||
#ifdef WOLFSSL_ARM_ARCH_7M
|
||||
"STM lr, {r4, r5, r6, r7, r8}\n\t"
|
||||
#else
|
||||
/* h[0]-h[2] in r4-r6 for multiplication. */
|
||||
"STR r7, [lr, #12]\n\t"
|
||||
"STR r8, [lr, #16]\n\t"
|
||||
#endif /* WOLFSSL_SP_NO_UMAAL */
|
||||
#endif /* WOLFSSL_ARM_ARCH_7M */
|
||||
"STR %[m], [sp, #16]\n\t"
|
||||
"LDR %[m], [sp, #12]\n\t"
|
||||
/* Multiply h by r */
|
||||
#ifdef WOLFSSL_SP_NO_UMAAL
|
||||
#ifdef WOLFSSL_ARM_ARCH_7M
|
||||
/* r0 = #0, r1 = r, lr = h, r2 = h[j], r3 = r[i] */
|
||||
"LDR %[notLast], [%[m]]\n\t"
|
||||
"EOR %[ctx], %[ctx], %[ctx]\n\t"
|
||||
|
@ -244,7 +244,7 @@ void poly1305_blocks_thumb2_16(Poly1305* ctx, const byte* m, word32 len, int not
|
|||
"UMAAL r11, r12, %[notLast], r5\n\t"
|
||||
/* DONE */
|
||||
"LDM sp, {r4, r5, r6}\n\t"
|
||||
#endif /* WOLFSSL_SP_NO_UMAAL */
|
||||
#endif /* WOLFSSL_ARM_ARCH_7M */
|
||||
/* r12 will be zero because r is masked. */
|
||||
/* Load length */
|
||||
"LDR %[len], [sp, #20]\n\t"
|
||||
|
|
|
@ -240,7 +240,7 @@ static void sp_2048_to_bin_64(sp_digit* r, byte* a)
|
|||
#define sp_2048_norm_64(a)
|
||||
|
||||
#ifndef WOLFSSL_SP_SMALL
|
||||
#ifdef WOLFSSL_SP_NO_UMAAL
|
||||
#ifdef WOLFSSL_ARM_ARCH_7M
|
||||
/* Multiply a and b into r. (r = a * b)
|
||||
*
|
||||
* r A single precision integer.
|
||||
|
@ -736,7 +736,7 @@ SP_NOINLINE static void sp_2048_mul_8(sp_digit* r, const sp_digit* a, const sp_d
|
|||
);
|
||||
}
|
||||
|
||||
#endif /* WOLFSSL_SP_NO_UMAAL */
|
||||
#endif /* WOLFSSL_ARM_ARCH_7M */
|
||||
/* Add b to a into r. (r = a + b)
|
||||
*
|
||||
* r A single precision integer.
|
||||
|
@ -1533,7 +1533,7 @@ SP_NOINLINE static void sp_2048_mul_64(sp_digit* r, const sp_digit* a,
|
|||
(void)sp_2048_add_32(r + 96, r + 96, a1);
|
||||
}
|
||||
|
||||
#ifdef WOLFSSL_SP_NO_UMAAL
|
||||
#ifdef WOLFSSL_ARM_ARCH_7M
|
||||
/* Square a and put result in r. (r = a * a)
|
||||
*
|
||||
* r A single precision integer.
|
||||
|
@ -1899,7 +1899,7 @@ SP_NOINLINE static void sp_2048_sqr_8(sp_digit* r, const sp_digit* a)
|
|||
);
|
||||
}
|
||||
|
||||
#endif /* WOLFSSL_SP_NO_UMAAL */
|
||||
#endif /* WOLFSSL_ARM_ARCH_7M */
|
||||
/* Sub b from a into r. (r = a - b)
|
||||
*
|
||||
* r A single precision integer.
|
||||
|
@ -31605,7 +31605,7 @@ static void sp_256_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
|
|||
}
|
||||
|
||||
#else
|
||||
#ifdef WOLFSSL_SP_NO_UMAAL
|
||||
#ifdef WOLFSSL_ARM_ARCH_7M
|
||||
/* Multiply a and b into r. (r = a * b)
|
||||
*
|
||||
* r A single precision integer.
|
||||
|
@ -32101,7 +32101,7 @@ SP_NOINLINE static void sp_256_mul_8(sp_digit* r, const sp_digit* a, const sp_di
|
|||
);
|
||||
}
|
||||
|
||||
#endif /* WOLFSSL_SP_NO_UMAAL */
|
||||
#endif /* WOLFSSL_ARM_ARCH_7M */
|
||||
#endif /* WOLFSSL_SP_SMALL */
|
||||
#ifdef WOLFSSL_SP_SMALL
|
||||
/* Square a and put result in r. (r = a * a)
|
||||
|
@ -32222,7 +32222,7 @@ static void sp_256_sqr_8(sp_digit* r, const sp_digit* a)
|
|||
}
|
||||
|
||||
#else
|
||||
#ifdef WOLFSSL_SP_NO_UMAAL
|
||||
#ifdef WOLFSSL_ARM_ARCH_7M
|
||||
/* Square a and put result in r. (r = a * a)
|
||||
*
|
||||
* r A single precision integer.
|
||||
|
@ -32588,7 +32588,7 @@ SP_NOINLINE static void sp_256_sqr_8(sp_digit* r, const sp_digit* a)
|
|||
);
|
||||
}
|
||||
|
||||
#endif /* WOLFSSL_SP_NO_UMAAL */
|
||||
#endif /* WOLFSSL_ARM_ARCH_7M */
|
||||
#endif /* WOLFSSL_SP_SMALL */
|
||||
#ifdef WOLFSSL_SP_SMALL
|
||||
/* Add b to a into r. (r = a + b)
|
||||
|
|
|
@ -173,8 +173,16 @@ const sword16 zetas_inv[KYBER_N / 2] = {
|
|||
3127, 3042, 1907, 1836, 1517, 359, 758, 1441
|
||||
};
|
||||
|
||||
#define KYBER_BARRETT(a) \
|
||||
"SMULWB r10, r14, " #a "\n\t" \
|
||||
"SMULWT r11, r14, " #a "\n\t" \
|
||||
"SMULBT r10, r12, r10\n\t" \
|
||||
"SMULBT r11, r12, r11\n\t" \
|
||||
"PKHBT r10, r10, r11, LSL #16\n\t" \
|
||||
"SSUB16 " #a ", " #a ", r10\n\t"
|
||||
|
||||
#if !(defined(__aarch64__) && defined(WOLFSSL_ARMASM))
|
||||
|
||||
#if !(defined(__thumb__) || (defined(__aarch64__)) && defined(WOLFSSL_ARMASM))
|
||||
/* Number-Theoretic Transform.
|
||||
*
|
||||
* @param [in, out] r Polynomial to transform.
|
||||
|
@ -939,15 +947,16 @@ static void kyber_basemul(sword16* r, const sword16* a, const sword16* b,
|
|||
*/
|
||||
static void kyber_basemul_mont(sword16* r, const sword16* a, const sword16* b)
|
||||
{
|
||||
unsigned int i;
|
||||
const sword16* zeta = zetas + 64;
|
||||
|
||||
#ifdef WOLFSSL_KYBER_SMALL
|
||||
#if defined(WOLFSSL_KYBER_SMALL)
|
||||
unsigned int i;
|
||||
for (i = 0; i < KYBER_N; i += 4, zeta++) {
|
||||
kyber_basemul(r + i + 0, a + i + 0, b + i + 0, zeta[0]);
|
||||
kyber_basemul(r + i + 2, a + i + 2, b + i + 2, -zeta[0]);
|
||||
}
|
||||
#elif defined(WOLFSSL_KYBER_NO_LARGE_CODE)
|
||||
unsigned int i;
|
||||
for (i = 0; i < KYBER_N; i += 8, zeta += 2) {
|
||||
kyber_basemul(r + i + 0, a + i + 0, b + i + 0, zeta[0]);
|
||||
kyber_basemul(r + i + 2, a + i + 2, b + i + 2, -zeta[0]);
|
||||
|
@ -955,6 +964,7 @@ static void kyber_basemul_mont(sword16* r, const sword16* a, const sword16* b)
|
|||
kyber_basemul(r + i + 6, a + i + 6, b + i + 6, -zeta[1]);
|
||||
}
|
||||
#else
|
||||
unsigned int i;
|
||||
for (i = 0; i < KYBER_N; i += 16, zeta += 4) {
|
||||
kyber_basemul(r + i + 0, a + i + 0, b + i + 0, zeta[0]);
|
||||
kyber_basemul(r + i + 2, a + i + 2, b + i + 2, -zeta[0]);
|
||||
|
@ -977,10 +987,10 @@ static void kyber_basemul_mont(sword16* r, const sword16* a, const sword16* b)
|
|||
static void kyber_basemul_mont_add(sword16* r, const sword16* a,
|
||||
const sword16* b)
|
||||
{
|
||||
unsigned int i;
|
||||
const sword16* zeta = zetas + 64;
|
||||
|
||||
#ifdef WOLFSSL_KYBER_SMALL
|
||||
#if defined(WOLFSSL_KYBER_SMALL)
|
||||
unsigned int i;
|
||||
for (i = 0; i < KYBER_N; i += 4, zeta++) {
|
||||
sword16 t0[2];
|
||||
sword16 t2[2];
|
||||
|
@ -994,6 +1004,7 @@ static void kyber_basemul_mont_add(sword16* r, const sword16* a,
|
|||
r[i + 3] += t2[1];
|
||||
}
|
||||
#elif defined(WOLFSSL_KYBER_NO_LARGE_CODE)
|
||||
unsigned int i;
|
||||
for (i = 0; i < KYBER_N; i += 8, zeta += 2) {
|
||||
sword16 t0[2];
|
||||
sword16 t2[2];
|
||||
|
@ -1015,6 +1026,7 @@ static void kyber_basemul_mont_add(sword16* r, const sword16* a,
|
|||
r[i + 7] += t6[1];
|
||||
}
|
||||
#else
|
||||
unsigned int i;
|
||||
for (i = 0; i < KYBER_N; i += 16, zeta += 4) {
|
||||
sword16 t0[2];
|
||||
sword16 t2[2];
|
||||
|
@ -2142,7 +2154,7 @@ int kyber_kdf(byte* seed, int seedLen, byte* out, int outLen)
|
|||
}
|
||||
#endif
|
||||
|
||||
#if !(defined(WOLFSSL_ARMASM) && defined(__aarch64__))
|
||||
#if !(defined(WOLFSSL_ARMASM) && (defined(__aarch64__) || defined(__thumb__)))
|
||||
/* Rejection sampling on uniform random bytes to generate uniform random
|
||||
* integers mod q.
|
||||
*
|
||||
|
@ -3338,7 +3350,7 @@ int kyber_cmp(const byte* a, const byte* b, int sz)
|
|||
|
||||
/******************************************************************************/
|
||||
|
||||
#if !(defined(__aarch64__) && defined(WOLFSSL_ARMASM))
|
||||
#if !(defined(__thumb__) || (defined(__aarch64__)) && defined(WOLFSSL_ARMASM))
|
||||
|
||||
/* Conditional subtraction of q to each coefficient of a polynomial.
|
||||
*
|
||||
|
@ -3355,10 +3367,14 @@ static KYBER_NOINLINE void kyber_csubq_c(sword16* p)
|
|||
}
|
||||
}
|
||||
|
||||
#else
|
||||
#elif defined(__aarch64__)
|
||||
|
||||
#define kyber_csubq_c kyber_csubq_neon
|
||||
|
||||
#else
|
||||
|
||||
#define kyber_csubq_c kyber_thumb2_csubq
|
||||
|
||||
#endif
|
||||
|
||||
/******************************************************************************/
|
||||
|
|
|
@ -310,6 +310,22 @@ WOLFSSL_LOCAL int kyber_cmp_neon(const byte* a, const byte* b, int sz);
|
|||
WOLFSSL_LOCAL void kyber_csubq_neon(sword16* p);
|
||||
WOLFSSL_LOCAL void kyber_from_msg_neon(sword16* p, const byte* msg);
|
||||
WOLFSSL_LOCAL void kyber_to_msg_neon(byte* msg, sword16* p);
|
||||
#elif defined(__thumb__) && defined(WOLFSSL_ARMASM)
|
||||
#define kyber_ntt kyber_thumb2_ntt
|
||||
#define kyber_invntt kyber_thumb2_invntt
|
||||
#define kyber_basemul_mont kyber_thumb2_basemul_mont
|
||||
#define kyber_basemul_mont_add kyber_thumb2_basemul_mont_add
|
||||
#define kyber_rej_uniform_c kyber_thumb2_rej_uniform
|
||||
|
||||
WOLFSSL_LOCAL void kyber_thumb2_ntt(sword16* r);
|
||||
WOLFSSL_LOCAL void kyber_thumb2_invntt(sword16* r);
|
||||
WOLFSSL_LOCAL void kyber_thumb2_basemul_mont(sword16* r, const sword16* a,
|
||||
const sword16* b);
|
||||
WOLFSSL_LOCAL void kyber_thumb2_basemul_mont_add(sword16* r, const sword16* a,
|
||||
const sword16* b);
|
||||
WOLFSSL_LOCAL void kyber_thumb2_csubq(sword16* p);
|
||||
WOLFSSL_LOCAL unsigned int kyber_thumb2_rej_uniform(sword16* p,
|
||||
unsigned int len, const byte* r, unsigned int rLen);
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
|
Loading…
Reference in New Issue