ARM asm: fixes for compiling on Mac and ChaCha20 streaming
Don't set the CPU to generic on Mac. Implement streaming for ChaCha20.
This commit is contained in:
parent
46b9531bec
commit
66ed9b1522
10
configure.ac
10
configure.ac
@ -1083,8 +1083,14 @@ then
|
||||
*)
|
||||
case $host_cpu in
|
||||
*aarch64*)
|
||||
# +crypto needed for hardware acceleration
|
||||
AM_CPPFLAGS="$AM_CPPFLAGS -mcpu=generic+crypto"
|
||||
case $host_os in
|
||||
*darwin*)
|
||||
;;
|
||||
*)
|
||||
# +crypto needed for hardware acceleration
|
||||
AM_CPPFLAGS="$AM_CPPFLAGS -mcpu=generic+crypto"
|
||||
;;
|
||||
esac
|
||||
# Include options.h
|
||||
AM_CCASFLAGS="$AM_CCASFLAGS -DEXTERNAL_OPTS_OPENVPN"
|
||||
|
||||
|
@ -92,6 +92,7 @@ int wc_Chacha_SetIV(ChaCha* ctx, const byte* inIv, word32 counter)
|
||||
|
||||
XMEMCPY(temp, inIv, CHACHA_IV_BYTES);
|
||||
|
||||
ctx->left = 0;
|
||||
ctx->X[CHACHA_IV_BYTES+0] = counter; /* block counter */
|
||||
ctx->X[CHACHA_IV_BYTES+1] = LITTLE32(temp[0]); /* fixed variable from nonce */
|
||||
ctx->X[CHACHA_IV_BYTES+2] = LITTLE32(temp[1]); /* counter from nonce */
|
||||
@ -166,6 +167,7 @@ int wc_Chacha_SetKey(ChaCha* ctx, const byte* key, word32 keySz)
|
||||
ctx->X[ 1] = constants[1];
|
||||
ctx->X[ 2] = constants[2];
|
||||
ctx->X[ 3] = constants[3];
|
||||
ctx->left = 0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -1673,7 +1675,7 @@ static WC_INLINE int wc_Chacha_encrypt_128(const word32 input[CHACHA_CHUNK_WORDS
|
||||
}
|
||||
|
||||
static WC_INLINE void wc_Chacha_encrypt_64(const word32* input, const byte* m,
|
||||
byte* c, word32 bytes)
|
||||
byte* c, word32 bytes, byte* over)
|
||||
{
|
||||
#ifdef CHACHA_TEST
|
||||
printf("Entering wc_Chacha_encrypt_64 with %d bytes\n", bytes);
|
||||
@ -2154,6 +2156,7 @@ static WC_INLINE void wc_Chacha_encrypt_64(const word32* input, const byte* m,
|
||||
"B L_chacha20_arm64_64_done_%= \n\t"
|
||||
"\n"
|
||||
"L_chacha20_arm64_64_lt_64_%=: \n\t"
|
||||
"ST1 {v0.4s-v3.4s}, [%[over]]\n\t"
|
||||
"CMP %[bytes], #32 \n\t"
|
||||
"BLT L_chacha20_arm64_64_lt_32_%= \n\t"
|
||||
"LD1 {v4.4S, v5.4S}, [%[m]], #32 \n\t"
|
||||
@ -2199,7 +2202,8 @@ static WC_INLINE void wc_Chacha_encrypt_64(const word32* input, const byte* m,
|
||||
"L_chacha20_arm64_64_done_%=: \n\t"
|
||||
: [input] "+r" (input), [m] "+r" (m), [c] "+r" (c), [bytes] "+r" (bytes64)
|
||||
: [L_chacha20_neon_rol8] "r" (L_chacha20_neon_rol8),
|
||||
[L_chacha20_neon_inc_first_word] "r" (L_chacha20_neon_inc_first_word)
|
||||
[L_chacha20_neon_inc_first_word] "r" (L_chacha20_neon_inc_first_word),
|
||||
[over] "r" (over)
|
||||
: "memory", "x4", "x5", "x6", "x7", "v0", "v1", "v2", "v3",
|
||||
"v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11"
|
||||
);
|
||||
@ -2719,6 +2723,7 @@ static WC_INLINE void wc_Chacha_encrypt_64(const word32* input, const byte* m,
|
||||
"B L_chacha20_arm32_64_done_%= \n\t"
|
||||
"\n"
|
||||
"L_chacha20_arm32_64_lt_64_%=: \n\t"
|
||||
"VSTM %[over], {q0-q3} \n\t"
|
||||
/* XOR 32 bytes */
|
||||
"CMP %[bytes], #32 \n\t"
|
||||
"BLT L_chacha20_arm32_64_lt_32_%= \n\t"
|
||||
@ -2785,13 +2790,16 @@ static WC_INLINE void wc_Chacha_encrypt_64(const word32* input, const byte* m,
|
||||
"\n"
|
||||
"L_chacha20_arm32_64_done_%=: \n\t"
|
||||
: [input] "+r" (input), [m] "+r" (m), [c] "+r" (c), [bytes] "+r" (bytes)
|
||||
: [L_chacha20_neon_inc_first_word] "r" (L_chacha20_neon_inc_first_word)
|
||||
: [L_chacha20_neon_inc_first_word] "r" (L_chacha20_neon_inc_first_word),
|
||||
[over] "r" (over)
|
||||
: "memory", "cc",
|
||||
"q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9", "q10", "q11", "q14", "r12", "r14"
|
||||
);
|
||||
#endif /* __aarch64__ */
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Encrypt a stream of bytes
|
||||
*/
|
||||
@ -2830,9 +2838,11 @@ static void wc_Chacha_encrypt_bytes(ChaCha* ctx, const byte* m, byte* c,
|
||||
ctx->X[CHACHA_IV_BYTES] = PLUS(ctx->X[CHACHA_IV_BYTES], processed / CHACHA_CHUNK_BYTES);
|
||||
}
|
||||
if (bytes > 0) {
|
||||
wc_Chacha_encrypt_64(ctx->X, m, c, bytes);
|
||||
wc_Chacha_encrypt_64(ctx->X, m, c, bytes, (byte*)ctx->over);
|
||||
if (bytes > 64)
|
||||
ctx->X[CHACHA_IV_BYTES] = PLUSONE(ctx->X[CHACHA_IV_BYTES]);
|
||||
else
|
||||
ctx->left = CHACHA_CHUNK_BYTES - bytes;
|
||||
ctx->X[CHACHA_IV_BYTES] = PLUSONE(ctx->X[CHACHA_IV_BYTES]);
|
||||
}
|
||||
}
|
||||
@ -2846,6 +2856,26 @@ int wc_Chacha_Process(ChaCha* ctx, byte* output, const byte* input,
|
||||
if (ctx == NULL || output == NULL || input == NULL)
|
||||
return BAD_FUNC_ARG;
|
||||
|
||||
/* handle left overs */
|
||||
if (msglen > 0 && ctx->left > 0) {
|
||||
byte* out;
|
||||
word32 i;
|
||||
|
||||
out = (byte*)ctx->over + CHACHA_CHUNK_BYTES - ctx->left;
|
||||
for (i = 0; i < msglen && i < ctx->left; i++) {
|
||||
output[i] = (byte)(input[i] ^ out[i]);
|
||||
}
|
||||
ctx->left -= i;
|
||||
|
||||
msglen -= i;
|
||||
output += i;
|
||||
input += i;
|
||||
}
|
||||
|
||||
if (msglen == 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
wc_Chacha_encrypt_bytes(ctx, input, output, msglen);
|
||||
|
||||
return 0;
|
||||
|
@ -79,7 +79,7 @@ typedef struct ChaCha {
|
||||
byte extra[12];
|
||||
#endif
|
||||
word32 left; /* number of bytes leftover */
|
||||
#ifdef USE_INTEL_CHACHA_SPEEDUP
|
||||
#if defined(USE_INTEL_CHACHA_SPEEDUP) || defined(WOLFSSL_ARMASM)
|
||||
word32 over[CHACHA_CHUNK_WORDS];
|
||||
#endif
|
||||
} ChaCha;
|
||||
|
Loading…
x
Reference in New Issue
Block a user