From 4c709f1f2c7396e2b26cfcad699825fc9f1e94db Mon Sep 17 00:00:00 2001 From: David Garske Date: Wed, 25 Sep 2019 12:47:12 -0700 Subject: [PATCH] Improvements to SHA-1, SHA-256 and MD5 performance: * Added detection for buffer alignment to avoid memcpy. * Added MD5 and SHA-1 support for XTRANSFORM_LEN to process blocks. * Cleanups for consistency between algorithms and code commenting. * Enhancement for NXP MMCAU to process more than one block at a time. * Improved MMCAU performance: SHA-1 by 35%, SHA-256 by 20% and MD5 by 78%. ``` NXP K64 w/MMCAU after: MD5 8 MB took 1.000 seconds, 7.910 MB/s SHA 4 MB took 1.005 seconds, 3.644 MB/s SHA-256 2 MB took 1.006 seconds, 2.306 MB/s NXP K64 w/MMCAU before: MD5 4 MB took 1.004 seconds, 4.450 MB/s SHA 3 MB took 1.006 seconds, 2.670 MB/s SHA-256 2 MB took 1.008 seconds, 1.913 MB/s ``` --- IDE/ROWLEY-CROSSWORKS-ARM/kinetis_hw.c | 1 - IDE/ROWLEY-CROSSWORKS-ARM/test_main.c | 2 +- IDE/ROWLEY-CROSSWORKS-ARM/user_settings.h | 1 + wolfcrypt/src/md5.c | 438 +++++++++++------- wolfcrypt/src/port/Espressif/esp32_sha.c | 10 +- wolfcrypt/src/sha.c | 210 ++++++--- wolfcrypt/src/sha256.c | 319 +++++++------ .../wolfcrypt/port/Espressif/esp32-crypt.h | 6 +- 8 files changed, 605 insertions(+), 382 deletions(-) diff --git a/IDE/ROWLEY-CROSSWORKS-ARM/kinetis_hw.c b/IDE/ROWLEY-CROSSWORKS-ARM/kinetis_hw.c index 7e5f2d36d..d2d1f0370 100644 --- a/IDE/ROWLEY-CROSSWORKS-ARM/kinetis_hw.c +++ b/IDE/ROWLEY-CROSSWORKS-ARM/kinetis_hw.c @@ -89,7 +89,6 @@ /* Note: You will also need to update the UART clock gate in hw_uart_init (SIM_SCGC1_UART5_MASK) */ /* Note: TWR-K60 is UART3, PTC17 */ -/* Note: FRDM-K64 is UART4, PTE24 */ /* Note: FRDM-K64 is UART4, PTE24 or UART0 PTB17 for OpenOCD (SIM_SCGC4_UART0_MASK)*/ /* Note: TWR-K64 is UART5, PTE8 */ /* Note: FRDM-K82F is LPUART0 A2, LPUART4 PTC15 */ diff --git a/IDE/ROWLEY-CROSSWORKS-ARM/test_main.c b/IDE/ROWLEY-CROSSWORKS-ARM/test_main.c index 6fb50311f..c3d306e46 100644 --- a/IDE/ROWLEY-CROSSWORKS-ARM/test_main.c +++ b/IDE/ROWLEY-CROSSWORKS-ARM/test_main.c @@ -65,7 +65,7 @@ void main(void) test_num++; } while(args.return_code == 0); - /*Print this again for redundancy*/ + /* Print this again for redundancy */ #ifdef WOLFSSL_FRDM_K64_JENKINS printf("\n&&&&&&&&&&&&&& done &&&&&&&&&&&&&\n"); delay_us(1000000); diff --git a/IDE/ROWLEY-CROSSWORKS-ARM/user_settings.h b/IDE/ROWLEY-CROSSWORKS-ARM/user_settings.h index 63279511e..e99aa7aff 100644 --- a/IDE/ROWLEY-CROSSWORKS-ARM/user_settings.h +++ b/IDE/ROWLEY-CROSSWORKS-ARM/user_settings.h @@ -208,6 +208,7 @@ extern "C" { /* MD5 */ #undef NO_MD5 #if 1 +#else #define NO_MD5 #endif diff --git a/wolfcrypt/src/md5.c b/wolfcrypt/src/md5.c index ad0ea18f7..1b5a86e6e 100644 --- a/wolfcrypt/src/md5.c +++ b/wolfcrypt/src/md5.c @@ -22,7 +22,7 @@ #ifdef HAVE_CONFIG_H - #include +#include #endif #include @@ -30,7 +30,7 @@ #if !defined(NO_MD5) #if defined(WOLFSSL_TI_HASH) - /* #include included by wc_port.c */ +/* #include included by wc_port.c */ #else @@ -40,197 +40,225 @@ #include #ifdef NO_INLINE - #include +#include #else - #define WOLFSSL_MISC_INCLUDED - #include +#define WOLFSSL_MISC_INCLUDED +#include #endif /* Hardware Acceleration */ #if defined(STM32_HASH) - /* Supports CubeMX HAL or Standard Peripheral Library */ - #define HAVE_MD5_CUST_API +/* Supports CubeMX HAL or Standard Peripheral Library */ +#define HAVE_MD5_CUST_API - int wc_InitMd5_ex(wc_Md5* md5, void* heap, int devId) - { - if (md5 == NULL) { - return BAD_FUNC_ARG; - } - - (void)devId; - (void)heap; - - wc_Stm32_Hash_Init(&md5->stmCtx); - - return 0; +int wc_InitMd5_ex(wc_Md5* md5, void* heap, int devId) +{ + if (md5 == NULL) { + return BAD_FUNC_ARG; } - int wc_Md5Update(wc_Md5* md5, const byte* data, word32 len) - { - int ret; + (void)devId; + (void)heap; - if (md5 == NULL || (data == NULL && len > 0)) { - return BAD_FUNC_ARG; - } + wc_Stm32_Hash_Init(&md5->stmCtx); - ret = wolfSSL_CryptHwMutexLock(); - if (ret == 0) { - ret = wc_Stm32_Hash_Update(&md5->stmCtx, HASH_AlgoSelection_MD5, - data, len); - wolfSSL_CryptHwMutexUnLock(); - } - return ret; + return 0; +} + +int wc_Md5Update(wc_Md5* md5, const byte* data, word32 len) +{ + int ret; + + if (md5 == NULL || (data == NULL && len > 0)) { + return BAD_FUNC_ARG; } - int wc_Md5Final(wc_Md5* md5, byte* hash) - { - int ret; - - if (md5 == NULL || hash == NULL) { - return BAD_FUNC_ARG; - } - - ret = wolfSSL_CryptHwMutexLock(); - if (ret == 0) { - ret = wc_Stm32_Hash_Final(&md5->stmCtx, HASH_AlgoSelection_MD5, - hash, WC_MD5_DIGEST_SIZE); - wolfSSL_CryptHwMutexUnLock(); - } - - (void)wc_InitMd5(md5); /* reset state */ - - return ret; + ret = wolfSSL_CryptHwMutexLock(); + if (ret == 0) { + ret = wc_Stm32_Hash_Update(&md5->stmCtx, HASH_AlgoSelection_MD5, + data, len); + wolfSSL_CryptHwMutexUnLock(); } + return ret; +} + +int wc_Md5Final(wc_Md5* md5, byte* hash) +{ + int ret; + + if (md5 == NULL || hash == NULL) { + return BAD_FUNC_ARG; + } + + ret = wolfSSL_CryptHwMutexLock(); + if (ret == 0) { + ret = wc_Stm32_Hash_Final(&md5->stmCtx, HASH_AlgoSelection_MD5, + hash, WC_MD5_DIGEST_SIZE); + wolfSSL_CryptHwMutexUnLock(); + } + + (void)wc_InitMd5(md5); /* reset state */ + + return ret; +} #elif defined(FREESCALE_MMCAU_SHA) - #include "cau_api.h" - #define XTRANSFORM(S,B) Transform((S), (B)) - static int Transform(wc_Md5* md5, byte* data) - { - int ret = wolfSSL_CryptHwMutexLock(); - if(ret == 0) { - #ifdef FREESCALE_MMCAU_CLASSIC_SHA - cau_md5_hash_n(data, 1, (unsigned char*)md5->digest); - #else - MMCAU_MD5_HashN(data, 1, (uint32_t*)md5->digest); - #endif - wolfSSL_CryptHwMutexUnLock(); - } - return ret; +#ifdef FREESCALE_MMCAU_CLASSIC_SHA + #include "cau_api.h" +#else + #include "fsl_mmcau.h" +#endif + +#define XTRANSFORM(S,B) Transform((S), (B)) +#define XTRANSFORM_LEN(S,B,L) Transform_Len((S), (B), (L)) + +static int Transform(wc_Md5* md5, const byte* data) +{ + int ret = wolfSSL_CryptHwMutexLock(); + if (ret == 0) { +#ifdef FREESCALE_MMCAU_CLASSIC_SHA + cau_md5_hash_n((byte*)data, 1, (unsigned char*)md5->digest); +#else + MMCAU_MD5_HashN((byte*)data, 1, (uint32_t*)md5->digest); +#endif + wolfSSL_CryptHwMutexUnLock(); } + return ret; +} + +static int Transform_Len(wc_Md5* md5, const byte* data, word32 len) +{ + int ret = wolfSSL_CryptHwMutexLock(); + if (ret == 0) { +#ifdef FREESCALE_MMCAU_CLASSIC_SHA + cau_md5_hash_n((byte*)data, len / WC_MD5_BLOCK_SIZE, + (unsigned char*)md5->digest); +#else + MMCAU_MD5_HashN((byte*)data, len / WC_MD5_BLOCK_SIZE, + (uint32_t*)md5->digest); +#endif + wolfSSL_CryptHwMutexUnLock(); + } + return ret; +} #elif defined(WOLFSSL_PIC32MZ_HASH) - #include - #define HAVE_MD5_CUST_API +#include +#define HAVE_MD5_CUST_API #elif defined(WOLFSSL_IMX6_CAAM) && !defined(NO_IMX6_CAAM_HASH) - /* functions implemented in wolfcrypt/src/port/caam/caam_sha.c */ - #define HAVE_MD5_CUST_API +/* functions implemented in wolfcrypt/src/port/caam/caam_sha.c */ +#define HAVE_MD5_CUST_API #else - #define NEED_SOFT_MD5 +#define NEED_SOFT_MD5 #endif /* End Hardware Acceleration */ +#ifndef WC_MD5_DATA_ALIGNMENT + /* default to 32-bit alignement */ + #define WC_MD5_DATA_ALIGNMENT 4 +#endif #ifdef NEED_SOFT_MD5 - #define XTRANSFORM(S,B) Transform((S)) +#define XTRANSFORM(S,B) Transform((S),(B)) - #define F1(x, y, z) (z ^ (x & (y ^ z))) - #define F2(x, y, z) F1(z, x, y) - #define F3(x, y, z) (x ^ y ^ z) - #define F4(x, y, z) (y ^ (x | ~z)) +#define F1(x, y, z) (z ^ (x & (y ^ z))) +#define F2(x, y, z) F1(z, x, y) +#define F3(x, y, z) (x ^ y ^ z) +#define F4(x, y, z) (y ^ (x | ~z)) - #define MD5STEP(f, w, x, y, z, data, s) \ +#define MD5STEP(f, w, x, y, z, data, s) \ w = rotlFixed(w + f(x, y, z) + data, s) + x - static int Transform(wc_Md5* md5) - { - /* Copy context->state[] to working vars */ - word32 a = md5->digest[0]; - word32 b = md5->digest[1]; - word32 c = md5->digest[2]; - word32 d = md5->digest[3]; +static int Transform(wc_Md5* md5, const byte* data) +{ + word32* buffer = (word32*)data; + /* Copy context->state[] to working vars */ + word32 a = md5->digest[0]; + word32 b = md5->digest[1]; + word32 c = md5->digest[2]; + word32 d = md5->digest[3]; - MD5STEP(F1, a, b, c, d, md5->buffer[0] + 0xd76aa478, 7); - MD5STEP(F1, d, a, b, c, md5->buffer[1] + 0xe8c7b756, 12); - MD5STEP(F1, c, d, a, b, md5->buffer[2] + 0x242070db, 17); - MD5STEP(F1, b, c, d, a, md5->buffer[3] + 0xc1bdceee, 22); - MD5STEP(F1, a, b, c, d, md5->buffer[4] + 0xf57c0faf, 7); - MD5STEP(F1, d, a, b, c, md5->buffer[5] + 0x4787c62a, 12); - MD5STEP(F1, c, d, a, b, md5->buffer[6] + 0xa8304613, 17); - MD5STEP(F1, b, c, d, a, md5->buffer[7] + 0xfd469501, 22); - MD5STEP(F1, a, b, c, d, md5->buffer[8] + 0x698098d8, 7); - MD5STEP(F1, d, a, b, c, md5->buffer[9] + 0x8b44f7af, 12); - MD5STEP(F1, c, d, a, b, md5->buffer[10] + 0xffff5bb1, 17); - MD5STEP(F1, b, c, d, a, md5->buffer[11] + 0x895cd7be, 22); - MD5STEP(F1, a, b, c, d, md5->buffer[12] + 0x6b901122, 7); - MD5STEP(F1, d, a, b, c, md5->buffer[13] + 0xfd987193, 12); - MD5STEP(F1, c, d, a, b, md5->buffer[14] + 0xa679438e, 17); - MD5STEP(F1, b, c, d, a, md5->buffer[15] + 0x49b40821, 22); + MD5STEP(F1, a, b, c, d, buffer[0] + 0xd76aa478, 7); + MD5STEP(F1, d, a, b, c, buffer[1] + 0xe8c7b756, 12); + MD5STEP(F1, c, d, a, b, buffer[2] + 0x242070db, 17); + MD5STEP(F1, b, c, d, a, buffer[3] + 0xc1bdceee, 22); + MD5STEP(F1, a, b, c, d, buffer[4] + 0xf57c0faf, 7); + MD5STEP(F1, d, a, b, c, buffer[5] + 0x4787c62a, 12); + MD5STEP(F1, c, d, a, b, buffer[6] + 0xa8304613, 17); + MD5STEP(F1, b, c, d, a, buffer[7] + 0xfd469501, 22); + MD5STEP(F1, a, b, c, d, buffer[8] + 0x698098d8, 7); + MD5STEP(F1, d, a, b, c, buffer[9] + 0x8b44f7af, 12); + MD5STEP(F1, c, d, a, b, buffer[10] + 0xffff5bb1, 17); + MD5STEP(F1, b, c, d, a, buffer[11] + 0x895cd7be, 22); + MD5STEP(F1, a, b, c, d, buffer[12] + 0x6b901122, 7); + MD5STEP(F1, d, a, b, c, buffer[13] + 0xfd987193, 12); + MD5STEP(F1, c, d, a, b, buffer[14] + 0xa679438e, 17); + MD5STEP(F1, b, c, d, a, buffer[15] + 0x49b40821, 22); - MD5STEP(F2, a, b, c, d, md5->buffer[1] + 0xf61e2562, 5); - MD5STEP(F2, d, a, b, c, md5->buffer[6] + 0xc040b340, 9); - MD5STEP(F2, c, d, a, b, md5->buffer[11] + 0x265e5a51, 14); - MD5STEP(F2, b, c, d, a, md5->buffer[0] + 0xe9b6c7aa, 20); - MD5STEP(F2, a, b, c, d, md5->buffer[5] + 0xd62f105d, 5); - MD5STEP(F2, d, a, b, c, md5->buffer[10] + 0x02441453, 9); - MD5STEP(F2, c, d, a, b, md5->buffer[15] + 0xd8a1e681, 14); - MD5STEP(F2, b, c, d, a, md5->buffer[4] + 0xe7d3fbc8, 20); - MD5STEP(F2, a, b, c, d, md5->buffer[9] + 0x21e1cde6, 5); - MD5STEP(F2, d, a, b, c, md5->buffer[14] + 0xc33707d6, 9); - MD5STEP(F2, c, d, a, b, md5->buffer[3] + 0xf4d50d87, 14); - MD5STEP(F2, b, c, d, a, md5->buffer[8] + 0x455a14ed, 20); - MD5STEP(F2, a, b, c, d, md5->buffer[13] + 0xa9e3e905, 5); - MD5STEP(F2, d, a, b, c, md5->buffer[2] + 0xfcefa3f8, 9); - MD5STEP(F2, c, d, a, b, md5->buffer[7] + 0x676f02d9, 14); - MD5STEP(F2, b, c, d, a, md5->buffer[12] + 0x8d2a4c8a, 20); + MD5STEP(F2, a, b, c, d, buffer[1] + 0xf61e2562, 5); + MD5STEP(F2, d, a, b, c, buffer[6] + 0xc040b340, 9); + MD5STEP(F2, c, d, a, b, buffer[11] + 0x265e5a51, 14); + MD5STEP(F2, b, c, d, a, buffer[0] + 0xe9b6c7aa, 20); + MD5STEP(F2, a, b, c, d, buffer[5] + 0xd62f105d, 5); + MD5STEP(F2, d, a, b, c, buffer[10] + 0x02441453, 9); + MD5STEP(F2, c, d, a, b, buffer[15] + 0xd8a1e681, 14); + MD5STEP(F2, b, c, d, a, buffer[4] + 0xe7d3fbc8, 20); + MD5STEP(F2, a, b, c, d, buffer[9] + 0x21e1cde6, 5); + MD5STEP(F2, d, a, b, c, buffer[14] + 0xc33707d6, 9); + MD5STEP(F2, c, d, a, b, buffer[3] + 0xf4d50d87, 14); + MD5STEP(F2, b, c, d, a, buffer[8] + 0x455a14ed, 20); + MD5STEP(F2, a, b, c, d, buffer[13] + 0xa9e3e905, 5); + MD5STEP(F2, d, a, b, c, buffer[2] + 0xfcefa3f8, 9); + MD5STEP(F2, c, d, a, b, buffer[7] + 0x676f02d9, 14); + MD5STEP(F2, b, c, d, a, buffer[12] + 0x8d2a4c8a, 20); - MD5STEP(F3, a, b, c, d, md5->buffer[5] + 0xfffa3942, 4); - MD5STEP(F3, d, a, b, c, md5->buffer[8] + 0x8771f681, 11); - MD5STEP(F3, c, d, a, b, md5->buffer[11] + 0x6d9d6122, 16); - MD5STEP(F3, b, c, d, a, md5->buffer[14] + 0xfde5380c, 23); - MD5STEP(F3, a, b, c, d, md5->buffer[1] + 0xa4beea44, 4); - MD5STEP(F3, d, a, b, c, md5->buffer[4] + 0x4bdecfa9, 11); - MD5STEP(F3, c, d, a, b, md5->buffer[7] + 0xf6bb4b60, 16); - MD5STEP(F3, b, c, d, a, md5->buffer[10] + 0xbebfbc70, 23); - MD5STEP(F3, a, b, c, d, md5->buffer[13] + 0x289b7ec6, 4); - MD5STEP(F3, d, a, b, c, md5->buffer[0] + 0xeaa127fa, 11); - MD5STEP(F3, c, d, a, b, md5->buffer[3] + 0xd4ef3085, 16); - MD5STEP(F3, b, c, d, a, md5->buffer[6] + 0x04881d05, 23); - MD5STEP(F3, a, b, c, d, md5->buffer[9] + 0xd9d4d039, 4); - MD5STEP(F3, d, a, b, c, md5->buffer[12] + 0xe6db99e5, 11); - MD5STEP(F3, c, d, a, b, md5->buffer[15] + 0x1fa27cf8, 16); - MD5STEP(F3, b, c, d, a, md5->buffer[2] + 0xc4ac5665, 23); + MD5STEP(F3, a, b, c, d, buffer[5] + 0xfffa3942, 4); + MD5STEP(F3, d, a, b, c, buffer[8] + 0x8771f681, 11); + MD5STEP(F3, c, d, a, b, buffer[11] + 0x6d9d6122, 16); + MD5STEP(F3, b, c, d, a, buffer[14] + 0xfde5380c, 23); + MD5STEP(F3, a, b, c, d, buffer[1] + 0xa4beea44, 4); + MD5STEP(F3, d, a, b, c, buffer[4] + 0x4bdecfa9, 11); + MD5STEP(F3, c, d, a, b, buffer[7] + 0xf6bb4b60, 16); + MD5STEP(F3, b, c, d, a, buffer[10] + 0xbebfbc70, 23); + MD5STEP(F3, a, b, c, d, buffer[13] + 0x289b7ec6, 4); + MD5STEP(F3, d, a, b, c, buffer[0] + 0xeaa127fa, 11); + MD5STEP(F3, c, d, a, b, buffer[3] + 0xd4ef3085, 16); + MD5STEP(F3, b, c, d, a, buffer[6] + 0x04881d05, 23); + MD5STEP(F3, a, b, c, d, buffer[9] + 0xd9d4d039, 4); + MD5STEP(F3, d, a, b, c, buffer[12] + 0xe6db99e5, 11); + MD5STEP(F3, c, d, a, b, buffer[15] + 0x1fa27cf8, 16); + MD5STEP(F3, b, c, d, a, buffer[2] + 0xc4ac5665, 23); - MD5STEP(F4, a, b, c, d, md5->buffer[0] + 0xf4292244, 6); - MD5STEP(F4, d, a, b, c, md5->buffer[7] + 0x432aff97, 10); - MD5STEP(F4, c, d, a, b, md5->buffer[14] + 0xab9423a7, 15); - MD5STEP(F4, b, c, d, a, md5->buffer[5] + 0xfc93a039, 21); - MD5STEP(F4, a, b, c, d, md5->buffer[12] + 0x655b59c3, 6); - MD5STEP(F4, d, a, b, c, md5->buffer[3] + 0x8f0ccc92, 10); - MD5STEP(F4, c, d, a, b, md5->buffer[10] + 0xffeff47d, 15); - MD5STEP(F4, b, c, d, a, md5->buffer[1] + 0x85845dd1, 21); - MD5STEP(F4, a, b, c, d, md5->buffer[8] + 0x6fa87e4f, 6); - MD5STEP(F4, d, a, b, c, md5->buffer[15] + 0xfe2ce6e0, 10); - MD5STEP(F4, c, d, a, b, md5->buffer[6] + 0xa3014314, 15); - MD5STEP(F4, b, c, d, a, md5->buffer[13] + 0x4e0811a1, 21); - MD5STEP(F4, a, b, c, d, md5->buffer[4] + 0xf7537e82, 6); - MD5STEP(F4, d, a, b, c, md5->buffer[11] + 0xbd3af235, 10); - MD5STEP(F4, c, d, a, b, md5->buffer[2] + 0x2ad7d2bb, 15); - MD5STEP(F4, b, c, d, a, md5->buffer[9] + 0xeb86d391, 21); + MD5STEP(F4, a, b, c, d, buffer[0] + 0xf4292244, 6); + MD5STEP(F4, d, a, b, c, buffer[7] + 0x432aff97, 10); + MD5STEP(F4, c, d, a, b, buffer[14] + 0xab9423a7, 15); + MD5STEP(F4, b, c, d, a, buffer[5] + 0xfc93a039, 21); + MD5STEP(F4, a, b, c, d, buffer[12] + 0x655b59c3, 6); + MD5STEP(F4, d, a, b, c, buffer[3] + 0x8f0ccc92, 10); + MD5STEP(F4, c, d, a, b, buffer[10] + 0xffeff47d, 15); + MD5STEP(F4, b, c, d, a, buffer[1] + 0x85845dd1, 21); + MD5STEP(F4, a, b, c, d, buffer[8] + 0x6fa87e4f, 6); + MD5STEP(F4, d, a, b, c, buffer[15] + 0xfe2ce6e0, 10); + MD5STEP(F4, c, d, a, b, buffer[6] + 0xa3014314, 15); + MD5STEP(F4, b, c, d, a, buffer[13] + 0x4e0811a1, 21); + MD5STEP(F4, a, b, c, d, buffer[4] + 0xf7537e82, 6); + MD5STEP(F4, d, a, b, c, buffer[11] + 0xbd3af235, 10); + MD5STEP(F4, c, d, a, b, buffer[2] + 0x2ad7d2bb, 15); + MD5STEP(F4, b, c, d, a, buffer[9] + 0xeb86d391, 21); - /* Add the working vars back into digest state[] */ - md5->digest[0] += a; - md5->digest[1] += b; - md5->digest[2] += c; - md5->digest[3] += d; + /* Add the working vars back into digest state[] */ + md5->digest[0] += a; + md5->digest[1] += b; + md5->digest[2] += c; + md5->digest[3] += d; - return 0; - } + return 0; +} #endif /* NEED_SOFT_MD5 */ #ifndef HAVE_MD5_CUST_API @@ -277,17 +305,20 @@ int wc_InitMd5_ex(wc_Md5* md5, void* heap, int devId) #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_MD5) ret = wolfAsync_DevCtxInit(&md5->asyncDev, WOLFSSL_ASYNC_MARKER_MD5, - md5->heap, devId); + md5->heap, devId); #else (void)devId; #endif return ret; } +/* do block size increments/updates */ int wc_Md5Update(wc_Md5* md5, const byte* data, word32 len) { int ret = 0; - byte* local; + word32 blocksLen; + byte* local; + word32* local32; if (md5 == NULL || (data == NULL && len > 0)) { return BAD_FUNC_ARG; @@ -295,36 +326,92 @@ int wc_Md5Update(wc_Md5* md5, const byte* data, word32 len) #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_MD5) if (md5->asyncDev.marker == WOLFSSL_ASYNC_MARKER_MD5) { - #if defined(HAVE_INTEL_QA) +#if defined(HAVE_INTEL_QA) return IntelQaSymMd5(&md5->asyncDev, NULL, data, len); - #endif +#endif } #endif /* WOLFSSL_ASYNC_CRYPT */ - /* do block size increments */ - local = (byte*)md5->buffer; - /* check that internal buffLen is valid */ if (md5->buffLen >= WC_MD5_BLOCK_SIZE) return BUFFER_E; - while (len) { - word32 add = min(len, WC_MD5_BLOCK_SIZE - md5->buffLen); - XMEMCPY(&local[md5->buffLen], data, add); + if (data == NULL && len == 0) { + /* valid, but do nothing */ + return 0; + } - md5->buffLen += add; - data += add; - len -= add; + /* add length for final */ + AddLength(md5, len); + + local = (byte*)md5->buffer; + local32 = md5->buffer; + + /* process any remainder from previous operation */ + if (md5->buffLen > 0) { + blocksLen = min(len, WC_MD5_BLOCK_SIZE - md5->buffLen); + XMEMCPY(&local[md5->buffLen], data, blocksLen); + + md5->buffLen += blocksLen; + data += blocksLen; + len -= blocksLen; if (md5->buffLen == WC_MD5_BLOCK_SIZE) { #if defined(BIG_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA) - ByteReverseWords(md5->buffer, md5->buffer, WC_MD5_BLOCK_SIZE); + ByteReverseWords(local32, local32, WC_MD5_BLOCK_SIZE); #endif - XTRANSFORM(md5, local); - AddLength(md5, WC_MD5_BLOCK_SIZE); + + ret = XTRANSFORM(md5, (const byte*)local); + if (ret != 0) + return ret; + md5->buffLen = 0; } } + + /* process blocks */ +#ifdef XTRANSFORM_LEN + /* get number of blocks */ + /* 64-1 = 0x3F (~ Inverted = 0xFFFFFFC0) */ + /* len (masked by 0xFFFFFFC0) returns block aligned length */ + blocksLen = len & ~(WC_MD5_BLOCK_SIZE-1); + if (blocksLen > 0) { + /* Byte reversal performed in function if required. */ + XTRANSFORM_LEN(md5, data, blocksLen); + data += blocksLen; + len -= blocksLen; + } +#else + while (len >= WC_MD5_BLOCK_SIZE) { + /* optimization to avoid memcpy if data pointer is properly aligned */ + /* Big Endian requires byte swap, so can't use data directly */ + #if defined(WC_MD5_DATA_ALIGNMENT) && !defined(BIG_ENDIAN_ORDER) + if (((size_t)data % WC_MD5_DATA_ALIGNMENT) == 0) { + local32 = (word32*)data; + } + else + #endif + { + XMEMCPY(local32, data, WC_MD5_BLOCK_SIZE); + } + + data += WC_MD5_BLOCK_SIZE; + len -= WC_MD5_BLOCK_SIZE; + + #if defined(BIG_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA) + ByteReverseWords(local32, local32, WC_MD5_BLOCK_SIZE); + #endif + + ret = XTRANSFORM(md5, (const byte*)local32); + } +#endif /* XTRANSFORM_LEN */ + + /* save remainder */ + if (len > 0) { + XMEMCPY(local, data, len); + md5->buffLen = len; + } + return ret; } @@ -338,15 +425,14 @@ int wc_Md5Final(wc_Md5* md5, byte* hash) #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_MD5) if (md5->asyncDev.marker == WOLFSSL_ASYNC_MARKER_MD5) { - #if defined(HAVE_INTEL_QA) +#if defined(HAVE_INTEL_QA) return IntelQaSymMd5(&md5->asyncDev, hash, NULL, WC_MD5_DIGEST_SIZE); - #endif +#endif } #endif /* WOLFSSL_ASYNC_CRYPT */ local = (byte*)md5->buffer; - AddLength(md5, md5->buffLen); /* before adding pads */ local[md5->buffLen++] = 0x80; /* add 1 */ /* pad with zeros */ @@ -354,9 +440,9 @@ int wc_Md5Final(wc_Md5* md5, byte* hash) XMEMSET(&local[md5->buffLen], 0, WC_MD5_BLOCK_SIZE - md5->buffLen); md5->buffLen += WC_MD5_BLOCK_SIZE - md5->buffLen; - #if defined(BIG_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA) +#if defined(BIG_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA) ByteReverseWords(md5->buffer, md5->buffer, WC_MD5_BLOCK_SIZE); - #endif +#endif XTRANSFORM(md5, local); md5->buffLen = 0; } @@ -367,7 +453,7 @@ int wc_Md5Final(wc_Md5* md5, byte* hash) #endif /* put lengths in bits */ - md5->hiLen = (md5->loLen >> (8*sizeof(md5->loLen) - 3)) + + md5->hiLen = (md5->loLen >> (8 * sizeof(md5->loLen) - 3)) + (md5->hiLen << 3); md5->loLen = md5->loLen << 3; @@ -441,7 +527,7 @@ int wc_Md5Copy(wc_Md5* src, wc_Md5* dst) ret = wc_Pic32HashCopy(&src->cache, &dst->cache); #endif #if defined(WOLFSSL_HASH_FLAGS) || defined(WOLF_CRYPTO_CB) - dst->flags |= WC_HASH_FLAG_ISCOPY; + dst->flags |= WC_HASH_FLAG_ISCOPY; #endif return ret; diff --git a/wolfcrypt/src/port/Espressif/esp32_sha.c b/wolfcrypt/src/port/Espressif/esp32_sha.c index c60f31d23..57377aef6 100644 --- a/wolfcrypt/src/port/Espressif/esp32_sha.c +++ b/wolfcrypt/src/port/Espressif/esp32_sha.c @@ -279,7 +279,7 @@ static void esp_digest_state(WC_ESP32SHA* ctx, byte* hash, enum SHA_TYPE sha_typ /* * sha1 process */ -int esp_sha_process(struct wc_Sha* sha) +int esp_sha_process(struct wc_Sha* sha, const byte* data) { int ret = 0; @@ -287,7 +287,7 @@ int esp_sha_process(struct wc_Sha* sha) word32 SHA_START_REG = SHA_1_START_REG; - esp_process_block(&sha->ctx, SHA_START_REG, sha->buffer, + esp_process_block(&sha->ctx, SHA_START_REG, (const word32*)data, WC_SHA_BLOCK_SIZE); ESP_LOGV(TAG, "leave esp_sha_process"); @@ -322,7 +322,7 @@ int esp_sha_digest_process(struct wc_Sha* sha, byte blockproc) /* * sha256 process */ -int esp_sha256_process(struct wc_Sha256* sha) +int esp_sha256_process(struct wc_Sha256* sha, const byte* data) { int ret = 0; word32 SHA_START_REG = SHA_1_START_REG; @@ -332,8 +332,8 @@ int esp_sha256_process(struct wc_Sha256* sha) /* start register offset */ SHA_START_REG += (SHA2_256 << 4); - esp_process_block(&sha->ctx, SHA_START_REG, sha->buffer, - WC_SHA256_BLOCK_SIZE); + esp_process_block(&sha->ctx, SHA_START_REG, (const word32*)data, + WC_SHA256_BLOCK_SIZE); ESP_LOGV(TAG, "leave esp_sha256_process"); diff --git a/wolfcrypt/src/sha.c b/wolfcrypt/src/sha.c index 72f3af886..a601e9e28 100644 --- a/wolfcrypt/src/sha.c +++ b/wolfcrypt/src/sha.c @@ -205,7 +205,9 @@ #endif #define USE_SHA_SOFTWARE_IMPL /* Only for API's, actual transform is here */ - #define XTRANSFORM(S,B) Transform((S),(B)) + + #define XTRANSFORM(S,B) Transform((S),(B)) + #define XTRANSFORM_LEN(S,B,L) Transform_Len((S),(B),(L)) static int InitSha(wc_Sha* sha) { @@ -228,14 +230,29 @@ return ret; } - static int Transform(wc_Sha* sha, byte* data) + static int Transform(wc_Sha* sha, const byte* data) { int ret = wolfSSL_CryptHwMutexLock(); if(ret == 0) { #ifdef FREESCALE_MMCAU_CLASSIC_SHA - cau_sha1_hash_n(data, 1, sha->digest); + cau_sha1_hash_n((byte*)data, 1, sha->digest); #else - MMCAU_SHA1_HashN(data, 1, (uint32_t*)sha->digest); + MMCAU_SHA1_HashN((byte*)data, 1, (uint32_t*)sha->digest); + #endif + wolfSSL_CryptHwMutexUnLock(); + } + return ret; + } + + static int Transform_Len(wc_Sha* sha, const byte* data, word32 len) + { + int ret = wolfSSL_CryptHwMutexLock(); + if(ret == 0) { + #ifdef FREESCALE_MMCAU_CLASSIC_SHA + cau_sha1_hash_n((byte*)data, len/WC_SHA_BLOCK_SIZE, sha->digest); + #else + MMCAU_SHA1_HashN((byte*)data, len/WC_SHA_BLOCK_SIZE, + (uint32_t*)sha->digest); #endif wolfSSL_CryptHwMutexUnLock(); } @@ -280,12 +297,12 @@ return ret; } - + #elif defined(WOLFSSL_RENESAS_TSIP_CRYPT) && \ !defined(NO_WOLFSSL_RENESAS_TSIP_CRYPT_HASH) - + /* implemented in wolfcrypt/src/port/Renesas/renesas_tsip_sha.c */ - + #else /* Software implementation */ #define USE_SHA_SOFTWARE_IMPL @@ -312,6 +329,10 @@ #endif /* End Hardware Acceleration */ +#ifndef WC_SHA_DATA_ALIGNMENT + /* default to 32-bit alignement */ + #define WC_SHA_DATA_ALIGNMENT 4 +#endif /* Software implementation */ #ifdef USE_SHA_SOFTWARE_IMPL @@ -327,7 +348,7 @@ static WC_INLINE void AddLength(wc_Sha* sha, word32 len) #ifndef XTRANSFORM #define XTRANSFORM(S,B) Transform((S),(B)) - #define blk0(i) (W[i] = sha->buffer[i]) + #define blk0(i) (W[i] = *((word32*)&data[i*sizeof(word32)])) #define blk1(i) (W[(i)&15] = \ rotlFixed(W[((i)+13)&15]^W[((i)+8)&15]^W[((i)+2)&15]^W[(i)&15],1)) @@ -356,7 +377,7 @@ static WC_INLINE void AddLength(wc_Sha* sha, word32 len) #define R4(v,w,x,y,z,i) (z)+= f4((w),(x),(y)) + blk1((i)) + 0xCA62C1D6+ \ rotlFixed((v),5); (w) = rotlFixed((w),30); - static void Transform(wc_Sha* sha, byte* data) + static int Transform(wc_Sha* sha, const byte* data) { word32 W[WC_SHA_BLOCK_SIZE / sizeof(word32)]; @@ -431,6 +452,8 @@ static WC_INLINE void AddLength(wc_Sha* sha, word32 len) sha->digest[4] += e; (void)data; /* Not used */ + + return 0; } #endif /* !USE_CUSTOM_SHA_TRANSFORM */ @@ -466,17 +489,18 @@ int wc_InitSha_ex(wc_Sha* sha, void* heap, int devId) return ret; } +/* do block size increments/updates */ int wc_ShaUpdate(wc_Sha* sha, const byte* data, word32 len) { + int ret = 0; + word32 blocksLen; byte* local; + word32* local32; - if (sha == NULL ||(data == NULL && len > 0)) { + if (sha == NULL || (data == NULL && len > 0)) { return BAD_FUNC_ARG; } - /* do block size increments */ - local = (byte*)sha->buffer; - #ifdef WOLF_CRYPTO_CB if (sha->devId != INVALID_DEVID) { int ret = wc_CryptoCb_ShaHash(sha, data, len, NULL); @@ -497,37 +521,107 @@ int wc_ShaUpdate(wc_Sha* sha, const byte* data, word32 len) if (sha->buffLen >= WC_SHA_BLOCK_SIZE) return BUFFER_E; - while (len) { - word32 add = min(len, WC_SHA_BLOCK_SIZE - sha->buffLen); - XMEMCPY(&local[sha->buffLen], data, add); + if (data == NULL && len == 0) { + /* valid, but do nothing */ + return 0; + } - sha->buffLen += add; - data += add; - len -= add; + /* add length for final */ + AddLength(sha, len); + + local = (byte*)sha->buffer; + local32 = sha->buffer; + + /* process any remainder from previous operation */ + if (sha->buffLen > 0) { + blocksLen = min(len, WC_SHA_BLOCK_SIZE - sha->buffLen); + XMEMCPY(&local[sha->buffLen], data, blocksLen); + + sha->buffLen += blocksLen; + data += blocksLen; + len -= blocksLen; if (sha->buffLen == WC_SHA_BLOCK_SIZE) { -#if defined(LITTLE_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA) - ByteReverseWords(sha->buffer, sha->buffer, WC_SHA_BLOCK_SIZE); -#endif -#if !defined(WOLFSSL_ESP32WROOM32_CRYPT) || \ - defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH) - XTRANSFORM(sha, local); -#else - if(sha->ctx.mode == ESP32_SHA_INIT){ + #if defined(LITTLE_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA) + ByteReverseWords(local32, local32, WC_SHA_BLOCK_SIZE); + #endif + + #if defined(WOLFSSL_ESP32WROOM32_CRYPT) && \ + !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH) + if (sha->ctx.mode == ESP32_SHA_INIT) { esp_sha_try_hw_lock(&sha->ctx); } - if(sha->ctx.mode == ESP32_SHA_SW){ - XTRANSFORM(sha, local); + if (sha->ctx.mode == ESP32_SHA_SW { + ret = XTRANSFORM(sha, (const byte*)local); } else { - esp_sha_process(sha); + esp_sha_process(sha, (const byte*)local); } -#endif - AddLength(sha, WC_SHA_BLOCK_SIZE); + #else + ret = XTRANSFORM(sha, (const byte*)local); + #endif + if (ret != 0) + return ret; + sha->buffLen = 0; } } - return 0; + /* process blocks */ +#ifdef XTRANSFORM_LEN + /* get number of blocks */ + /* 64-1 = 0x3F (~ Inverted = 0xFFFFFFC0) */ + /* len (masked by 0xFFFFFFC0) returns block aligned length */ + blocksLen = len & ~(WC_SHA_BLOCK_SIZE-1); + if (blocksLen > 0) { + /* Byte reversal performed in function if required. */ + XTRANSFORM_LEN(sha, data, blocksLen); + data += blocksLen; + len -= blocksLen; + } +#else + while (len >= WC_SHA_BLOCK_SIZE) { + /* optimization to avoid memcpy if data pointer is properly aligned */ + /* Little Endian requires byte swap, so can't use data directly */ + #if defined(WC_SHA_DATA_ALIGNMENT) && !defined(LITTLE_ENDIAN_ORDER) + if (((size_t)data % WC_SHA_DATA_ALIGNMENT) == 0) { + local32 = (word32*)data; + } + else + #endif + { + XMEMCPY(local32, data, WC_SHA_BLOCK_SIZE); + } + + data += WC_SHA_BLOCK_SIZE; + len -= WC_SHA_BLOCK_SIZE; + + #if defined(LITTLE_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA) + ByteReverseWords(local32, local32, WC_SHA_BLOCK_SIZE); + #endif + + #if defined(WOLFSSL_ESP32WROOM32_CRYPT) && \ + !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH) + if (sha->ctx.mode == ESP32_SHA_INIT){ + esp_sha_try_hw_lock(&sha->ctx); + } + if (sha->ctx.mode == ESP32_SHA_SW){ + ret = XTRANSFORM(sha, (const byte*)local32); + } else { + esp_sha_process(sha, (const byte*)local32); + } + #else + ret = XTRANSFORM(sha, (const byte*)local32); + #endif + } +#endif /* XTRANSFORM_LEN */ + + /* save remainder */ + if (len > 0) { + XMEMCPY(local, data, len); + sha->buffLen = len; + } + + return ret; } int wc_ShaFinalRaw(wc_Sha* sha, byte* hash) @@ -552,6 +646,7 @@ int wc_ShaFinalRaw(wc_Sha* sha, byte* hash) int wc_ShaFinal(wc_Sha* sha, byte* hash) { + int ret; byte* local; if (sha == NULL || hash == NULL) { @@ -576,8 +671,6 @@ int wc_ShaFinal(wc_Sha* sha, byte* hash) } #endif /* WOLFSSL_ASYNC_CRYPT */ - AddLength(sha, sha->buffLen); /* before adding pads */ - local[sha->buffLen++] = 0x80; /* add 1 */ /* pad with zeros */ @@ -585,22 +678,26 @@ int wc_ShaFinal(wc_Sha* sha, byte* hash) XMEMSET(&local[sha->buffLen], 0, WC_SHA_BLOCK_SIZE - sha->buffLen); sha->buffLen += WC_SHA_BLOCK_SIZE - sha->buffLen; -#if defined(LITTLE_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA) + #if defined(LITTLE_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA) ByteReverseWords(sha->buffer, sha->buffer, WC_SHA_BLOCK_SIZE); -#endif -#if !defined(WOLFSSL_ESP32WROOM32_CRYPT) || \ - defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH) - XTRANSFORM(sha, local); -#else - if(sha->ctx.mode == ESP32_SHA_INIT){ + #endif + + #if defined(WOLFSSL_ESP32WROOM32_CRYPT) && \ + !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH) + if (sha->ctx.mode == ESP32_SHA_INIT) { esp_sha_try_hw_lock(&sha->ctx); } - if(sha->ctx.mode == ESP32_SHA_SW){ - XTRANSFORM(sha, local); + if (sha->ctx.mode == ESP32_SHA_SW) { + ret = XTRANSFORM(sha, (const byte*)local); } else { - esp_sha_process(sha); + esp_sha_process(sha, (const byte*)local); } -#endif + #else + ret = XTRANSFORM(sha, (const byte*)local); + #endif + if (ret != 0) + return ret; + sha->buffLen = 0; } XMEMSET(&local[sha->buffLen], 0, WC_SHA_PAD_SIZE - sha->buffLen); @@ -625,26 +722,29 @@ int wc_ShaFinal(wc_Sha* sha, byte* hash) 2 * sizeof(word32)); #endif -#if !defined(WOLFSSL_ESP32WROOM32_CRYPT) || \ - defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH) - XTRANSFORM(sha, local); -#else - if(sha->ctx.mode == ESP32_SHA_INIT){ +#if defined(WOLFSSL_ESP32WROOM32_CRYPT) && \ + !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH) + if (sha->ctx.mode == ESP32_SHA_INIT) { esp_sha_try_hw_lock(&sha->ctx); } - if(sha->ctx.mode == ESP32_SHA_SW){ - XTRANSFORM(sha, local); + if (sha->ctx.mode == ESP32_SHA_SW) { + ret = XTRANSFORM(sha, (const byte*)local); } else { esp_sha_digest_process(sha, 1); } +#else + ret = XTRANSFORM(sha, (const byte*)local); #endif #ifdef LITTLE_ENDIAN_ORDER ByteReverseWords(sha->digest, sha->digest, WC_SHA_DIGEST_SIZE); #endif + XMEMCPY(hash, sha->digest, WC_SHA_DIGEST_SIZE); - return InitSha(sha); /* reset state */ + (void)InitSha(sha); /* reset state */ + + return ret; } #endif /* USE_SHA_SOFTWARE_IMPL */ @@ -707,7 +807,7 @@ int wc_ShaGetHash(wc_Sha* sha, byte* hash) sha->ctx.mode = ESP32_SHA_SW; #endif - + } return ret; } diff --git a/wolfcrypt/src/sha256.c b/wolfcrypt/src/sha256.c index f48a412ee..23dbbc410 100644 --- a/wolfcrypt/src/sha256.c +++ b/wolfcrypt/src/sha256.c @@ -165,7 +165,7 @@ !defined(WOLFSSL_AFALG_HASH) && !defined(WOLFSSL_DEVCRYPTO_HASH) && \ (!defined(WOLFSSL_ESP32WROOM32_CRYPT) || defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH)) && \ (!defined(WOLFSSL_RENESAS_TSIP_CRYPT) || defined(NO_WOLFSSL_RENESAS_TSIP_HASH)) - + static int InitSha256(wc_Sha256* sha256) { int ret = 0; @@ -201,6 +201,9 @@ static int InitSha256(wc_Sha256* sha256) /* in case intel instructions aren't available, plus we need the K[] global */ #define NEED_SOFT_SHA256 + /* requires 128-bit alignment */ + #define WC_SHA256_DATA_ALIGNMENT 16 + /***** Intel AVX1/AVX2 Macro Control Structure @@ -258,43 +261,44 @@ static int InitSha256(wc_Sha256* sha256) */ /* #if defined(HAVE_INTEL_AVX1/2) at the tail of sha256 */ - static int Transform_Sha256(wc_Sha256* sha256); + static int Transform_Sha256(wc_Sha256* sha256, const byte* data); #ifdef __cplusplus extern "C" { #endif #if defined(HAVE_INTEL_AVX1) - extern int Transform_Sha256_AVX1(wc_Sha256 *sha256); + extern int Transform_Sha256_AVX1(wc_Sha256 *sha256, const byte* data); extern int Transform_Sha256_AVX1_Len(wc_Sha256* sha256, const byte* data, word32 len); #endif #if defined(HAVE_INTEL_AVX2) - extern int Transform_Sha256_AVX2(wc_Sha256 *sha256); + extern int Transform_Sha256_AVX2(wc_Sha256 *sha256, const byte* data); extern int Transform_Sha256_AVX2_Len(wc_Sha256* sha256, const byte* data, word32 len); #ifdef HAVE_INTEL_RORX - extern int Transform_Sha256_AVX1_RORX(wc_Sha256 *sha256); + extern int Transform_Sha256_AVX1_RORX(wc_Sha256 *sha256, const byte* data); extern int Transform_Sha256_AVX1_RORX_Len(wc_Sha256* sha256, const byte* data, word32 len); - extern int Transform_Sha256_AVX2_RORX(wc_Sha256 *sha256); + extern int Transform_Sha256_AVX2_RORX(wc_Sha256 *sha256, const byte* data); extern int Transform_Sha256_AVX2_RORX_Len(wc_Sha256* sha256, const byte* data, word32 len); - #endif - #endif + #endif /* HAVE_INTEL_RORX */ + #endif /* HAVE_INTEL_AVX2 */ #ifdef __cplusplus } /* extern "C" */ #endif - static int (*Transform_Sha256_p)(wc_Sha256* sha256); + static int (*Transform_Sha256_p)(wc_Sha256* sha256, const byte* data); /* = _Transform_Sha256 */ static int (*Transform_Sha256_Len_p)(wc_Sha256* sha256, const byte* data, word32 len); /* = NULL */ static int transform_check = 0; static word32 intel_flags; - #define XTRANSFORM(S) (*Transform_Sha256_p)((S)) + + #define XTRANSFORM(S, D) (*Transform_Sha256_p)((S),(D)) #define XTRANSFORM_LEN(S, D, L) (*Transform_Sha256_Len_p)((S),(D),(L)) static void Sha256_SetTransform(void) @@ -390,7 +394,7 @@ static int InitSha256(wc_Sha256* sha256) #include "fsl_mmcau.h" #endif - #define XTRANSFORM(S) Transform_Sha256((S)) + #define XTRANSFORM(S, D) Transform_Sha256((S),(D)) #define XTRANSFORM_LEN(S, D, L) Transform_Sha256_Len((S),(D),(L)) int wc_InitSha256_ex(wc_Sha256* sha256, void* heap, int devId) @@ -418,14 +422,31 @@ static int InitSha256(wc_Sha256* sha256) return ret; } - static int Transform_Sha256(wc_Sha256* sha256) + static int Transform_Sha256(wc_Sha256* sha256, const byte* data) { int ret = wolfSSL_CryptHwMutexLock(); if (ret == 0) { #ifdef FREESCALE_MMCAU_CLASSIC_SHA - cau_sha256_hash_n((byte*)sha256->buffer, 1, sha256->digest); + cau_sha256_hash_n((byte*)data, 1, sha256->digest); #else - MMCAU_SHA256_HashN((byte*)sha256->buffer, 1, sha256->digest); + MMCAU_SHA256_HashN((byte*)data, 1, sha256->digest); + #endif + wolfSSL_CryptHwMutexUnLock(); + } + return ret; + } + + static int Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, + word32 len) + { + int ret = wolfSSL_CryptHwMutexLock(); + if (ret == 0) { + #ifdef FREESCALE_MMCAU_CLASSIC_SHA + cau_sha256_hash_n((byte*)data, len/WC_SHA256_BLOCK_SIZE, + sha256->digest); + #else + MMCAU_SHA256_HashN((byte*)data, len/WC_SHA256_BLOCK_SIZE, + sha256->digest); #endif wolfSSL_CryptHwMutexUnLock(); } @@ -553,12 +574,12 @@ static int InitSha256(wc_Sha256* sha256) return ret; } - + #elif defined(WOLFSSL_RENESAS_TSIP_CRYPT) && \ !defined(NO_WOLFSSL_RENESAS_TSIP_CRYPT_HASH) - + /* implemented in wolfcrypt/src/port/Renesas/renesas_tsip_sha.c */ - + #else #define NEED_SOFT_SHA256 @@ -593,6 +614,11 @@ static int InitSha256(wc_Sha256* sha256) } #endif /* End Hardware Acceleration */ +#ifndef WC_SHA256_DATA_ALIGNMENT + /* default is 32-bit alignment required */ + #define WC_SHA256_DATA_ALIGNMENT 4 +#endif + #ifdef NEED_SOFT_SHA256 static const ALIGN32 word32 K[64] = { @@ -639,6 +665,10 @@ static int InitSha256(wc_Sha256* sha256) #define g(i) S[(6-i) & 7] #define h(i) S[(7-i) & 7] + #ifndef XTRANSFORM + #define XTRANSFORM(S, D) Transform_Sha256((S),(D)) + #endif + #ifndef SHA256_MANY_REGISTERS #define RND(j) \ t0 = h(j) + Sigma1(e(j)) + Ch(e(j), f(j), g(j)) + K[i+j] + W[i+j]; \ @@ -646,12 +676,7 @@ static int InitSha256(wc_Sha256* sha256) d(j) += t0; \ h(j) = t0 + t1 - #ifndef XTRANSFORM - #define XTRANSFORM(S) Transform_Sha256((S)) - #define XTRANSFORM_LEN(S, D, L) Transform_Sha256_Len((S),(D),(L)) - #endif - - static int Transform_Sha256(wc_Sha256* sha256) + static int Transform_Sha256(wc_Sha256* sha256, const byte* data) { word32 S[8], t0, t1; int i; @@ -680,7 +705,7 @@ static int InitSha256(wc_Sha256* sha256) S[i] = sha256->digest[i]; for (i = 0; i < 16; i++) - W[i] = sha256->buffer[i]; + W[i] = *((word32*)&data[i*sizeof(word32)]); for (i = 16; i < WC_SHA256_BLOCK_SIZE; i++) W[i] = Gamma1(W[i-2]) + W[i-7] + Gamma0(W[i-15]) + W[i-16]; @@ -713,7 +738,7 @@ static int InitSha256(wc_Sha256* sha256) } #else /* SHA256 version that keeps all data in registers */ - #define SCHED1(j) (W[j] = sha256->buffer[j]) + #define SCHED1(j) (W[j] = *((word32*)&data[j*sizeof(word32)])) #define SCHED(j) ( \ W[ j & 15] += \ Gamma1(W[(j-2) & 15])+ \ @@ -732,12 +757,7 @@ static int InitSha256(wc_Sha256* sha256) d(j) += t0; \ h(j) = t0 + t1 - #ifndef XTRANSFORM - #define XTRANSFORM(S) Transform_Sha256((S)) - #define XTRANSFORM_LEN(S, D, L) Transform_Sha256_Len((S),(D),(L)) - #endif - - static int Transform_Sha256(wc_Sha256* sha256) + static int Transform_Sha256(wc_Sha256* sha256, const byte* data) { word32 S[8], t0, t1; int i; @@ -788,14 +808,18 @@ static int InitSha256(wc_Sha256* sha256) static WC_INLINE void AddLength(wc_Sha256* sha256, word32 len) { word32 tmp = sha256->loLen; - if ((sha256->loLen += len) < tmp) + if ((sha256->loLen += len) < tmp) { sha256->hiLen++; /* carry low to high */ + } } + /* do block size increments/updates */ static WC_INLINE int Sha256Update(wc_Sha256* sha256, const byte* data, word32 len) { int ret = 0; - byte* local; + word32 blocksLen; + byte* local; + word32* local32; if (sha256 == NULL || (data == NULL && len > 0)) { return BAD_FUNC_ARG; @@ -806,117 +830,128 @@ static int InitSha256(wc_Sha256* sha256) return 0; } + /* check that internal buffLen is valid */ + if (sha256->buffLen >= WC_SHA256_BLOCK_SIZE) { + return BUFFER_E; + } + + /* add length for final */ AddLength(sha256, len); - /* do block size increments */ local = (byte*)sha256->buffer; + local32 = sha256->buffer; - /* check that internal buffLen is valid */ - if (sha256->buffLen >= WC_SHA256_BLOCK_SIZE) - return BUFFER_E; - + /* process any remainder from previous operation */ if (sha256->buffLen > 0) { - word32 add = min(len, WC_SHA256_BLOCK_SIZE - sha256->buffLen); - XMEMCPY(&local[sha256->buffLen], data, add); + blocksLen = min(len, WC_SHA256_BLOCK_SIZE - sha256->buffLen); + XMEMCPY(&local[sha256->buffLen], data, blocksLen); - sha256->buffLen += add; - data += add; - len -= add; + sha256->buffLen += blocksLen; + data += blocksLen; + len -= blocksLen; if (sha256->buffLen == WC_SHA256_BLOCK_SIZE) { - #if defined(LITTLE_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA) - #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2) + #if defined(LITTLE_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA) + #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2) if (!IS_INTEL_AVX1(intel_flags) && !IS_INTEL_AVX2(intel_flags)) - #endif + #endif { - ByteReverseWords(sha256->buffer, sha256->buffer, - WC_SHA256_BLOCK_SIZE); + ByteReverseWords(local32, local32, WC_SHA256_BLOCK_SIZE); } - #endif + #endif - #if !defined(WOLFSSL_ESP32WROOM32_CRYPT) || \ - defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH) - ret = XTRANSFORM(sha256); - #else - if(sha256->ctx.mode == ESP32_SHA_INIT) { + #if defined(WOLFSSL_ESP32WROOM32_CRYPT) && \ + !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH) + if (sha256->ctx.mode == ESP32_SHA_INIT){ esp_sha_try_hw_lock(&sha256->ctx); } - if(sha256->ctx.mode == ESP32_SHA_SW){ - ret = XTRANSFORM(sha256); + if (sha256->ctx.mode == ESP32_SHA_SW){ + ret = XTRANSFORM(sha256, (const byte*)local); } else { - esp_sha256_process(sha256); + esp_sha256_process(sha256, (const byte*)local); } - #endif + #else + ret = XTRANSFORM(sha256, (const byte*)local); + #endif + if (ret == 0) sha256->buffLen = 0; else - len = 0; + len = 0; /* error */ } } - #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2) - if (Transform_Sha256_Len_p != NULL) { - word32 blocksLen = len & ~(WC_SHA256_BLOCK_SIZE-1); + /* process blocks */ + #ifdef XTRANSFORM_LEN + #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2) + if (Transform_Sha256_Len_p != NULL) + #endif + { + /* get number of blocks */ + /* 64-1 = 0x3F (~ Inverted = 0xFFFFFFC0) */ + /* len (masked by 0xFFFFFFC0) returns block aligned length */ + blocksLen = len & ~(WC_SHA256_BLOCK_SIZE-1); if (blocksLen > 0) { - /* Byte reversal performed in function if required. */ + /* Byte reversal and alignment handled in function if required */ XTRANSFORM_LEN(sha256, data, blocksLen); data += blocksLen; len -= blocksLen; } } + #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2) else - #endif - #if !defined(LITTLE_ENDIAN_ORDER) || defined(FREESCALE_MMCAU_SHA) || \ - defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2) + #endif + #endif /* XTRANSFORM_LEN */ + #if !defined(XTRANSFORM_LEN) || defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2) { while (len >= WC_SHA256_BLOCK_SIZE) { - XMEMCPY(local, data, WC_SHA256_BLOCK_SIZE); + /* optimization to avoid memcpy if data pointer is properly aligned */ + /* Intel transform function requires use of sha256->buffer */ + /* Little Endian requires byte swap, so can't use data directly */ + #if defined(WC_SHA256_DATA_ALIGNMENT) && !defined(LITTLE_ENDIAN_ORDER) && \ + !defined(HAVE_INTEL_AVX1) && !defined(HAVE_INTEL_AVX2) + if (((size_t)data % WC_SHA256_DATA_ALIGNMENT) == 0) { + local32 = (word32*)data; + } + else + #endif + { + XMEMCPY(local32, data, WC_SHA256_BLOCK_SIZE); + } data += WC_SHA256_BLOCK_SIZE; len -= WC_SHA256_BLOCK_SIZE; - #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2) + #if defined(LITTLE_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA) + #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2) if (!IS_INTEL_AVX1(intel_flags) && !IS_INTEL_AVX2(intel_flags)) + #endif { - ByteReverseWords(sha256->buffer, sha256->buffer, - WC_SHA256_BLOCK_SIZE); + ByteReverseWords(local32, local32, WC_SHA256_BLOCK_SIZE); } #endif - ret = XTRANSFORM(sha256); - if (ret != 0) - break; - } - } - #else - { - while (len >= WC_SHA256_BLOCK_SIZE) { - XMEMCPY(local, data, WC_SHA256_BLOCK_SIZE); - data += WC_SHA256_BLOCK_SIZE; - len -= WC_SHA256_BLOCK_SIZE; - - ByteReverseWords(sha256->buffer, sha256->buffer, - WC_SHA256_BLOCK_SIZE); - #if !defined(WOLFSSL_ESP32WROOM32_CRYPT) || \ - defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH) - ret = XTRANSFORM(sha256); - #else - if(sha256->ctx.mode == ESP32_SHA_INIT){ + #if defined(WOLFSSL_ESP32WROOM32_CRYPT) && \ + !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH) + if (sha256->ctx.mode == ESP32_SHA_INIT){ esp_sha_try_hw_lock(&sha256->ctx); } - - if(sha256->ctx.mode == ESP32_SHA_SW){ - ret = XTRANSFORM(sha256); + if (sha256->ctx.mode == ESP32_SHA_SW){ + ret = XTRANSFORM(sha256, (const byte*)local32); } else { - esp_sha256_process(sha256); + esp_sha256_process(sha256, (const byte*)local32); } - #endif + #else + ret = XTRANSFORM(sha256, (const byte*)local32); + #endif + if (ret != 0) break; } } #endif + /* save remainder */ if (len > 0) { XMEMCPY(local, data, len); sha256->buffLen = len; @@ -959,13 +994,14 @@ static int InitSha256(wc_Sha256* sha256) { int ret; - byte* local = (byte*)sha256->buffer; + byte* local; if (sha256 == NULL) { return BAD_FUNC_ARG; } - local[sha256->buffLen++] = 0x80; /* add 1 */ + local = (byte*)sha256->buffer; + local[sha256->buffLen++] = 0x80; /* add 1 */ /* pad with zeros */ if (sha256->buffLen > WC_SHA256_PAD_SIZE) { @@ -973,36 +1009,36 @@ static int InitSha256(wc_Sha256* sha256) WC_SHA256_BLOCK_SIZE - sha256->buffLen); sha256->buffLen += WC_SHA256_BLOCK_SIZE - sha256->buffLen; - { #if defined(LITTLE_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA) #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2) - if (!IS_INTEL_AVX1(intel_flags) && !IS_INTEL_AVX2(intel_flags)) + if (!IS_INTEL_AVX1(intel_flags) && !IS_INTEL_AVX2(intel_flags)) #endif - { - ByteReverseWords(sha256->buffer, sha256->buffer, - WC_SHA256_BLOCK_SIZE); - } - #endif + { + ByteReverseWords(sha256->buffer, sha256->buffer, + WC_SHA256_BLOCK_SIZE); + } + #endif + + #if defined(WOLFSSL_ESP32WROOM32_CRYPT) && \ + !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH) + if (sha256->ctx.mode == ESP32_SHA_INIT) { + esp_sha_try_hw_lock(&sha256->ctx); + } + if (sha256->ctx.mode == ESP32_SHA_SW) { + ret = XTRANSFORM(sha256, (const byte*)local); + } else { + ret = esp_sha256_process(sha256, (const byte*)local); } - #if !defined(WOLFSSL_ESP32WROOM32_CRYPT) || \ - defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH) - ret = XTRANSFORM(sha256); #else - if(sha256->ctx.mode == ESP32_SHA_INIT){ - esp_sha_try_hw_lock(&sha256->ctx); - } - if(sha256->ctx.mode == ESP32_SHA_SW){ - ret = XTRANSFORM(sha256); - } else { - ret = esp_sha256_process(sha256); - } + ret = XTRANSFORM(sha256, (const byte*)local); #endif if (ret != 0) return ret; sha256->buffLen = 0; } - XMEMSET(&local[sha256->buffLen], 0, WC_SHA256_PAD_SIZE - sha256->buffLen); + XMEMSET(&local[sha256->buffLen], 0, + WC_SHA256_PAD_SIZE - sha256->buffLen); /* put lengths in bits */ sha256->hiLen = (sha256->loLen >> (8 * sizeof(sha256->loLen) - 3)) + @@ -1012,12 +1048,12 @@ static int InitSha256(wc_Sha256* sha256) /* store lengths */ #if defined(LITTLE_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA) #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2) - if (!IS_INTEL_AVX1(intel_flags) && !IS_INTEL_AVX2(intel_flags)) + if (!IS_INTEL_AVX1(intel_flags) && !IS_INTEL_AVX2(intel_flags)) #endif - { - ByteReverseWords(sha256->buffer, sha256->buffer, - WC_SHA256_BLOCK_SIZE); - } + { + ByteReverseWords(sha256->buffer, sha256->buffer, + WC_SHA256_BLOCK_SIZE); + } #endif /* ! length ordering dependent on digest endian type ! */ XMEMCPY(&local[WC_SHA256_PAD_SIZE], &sha256->hiLen, sizeof(word32)); @@ -1028,30 +1064,31 @@ static int InitSha256(wc_Sha256* sha256) defined(HAVE_INTEL_AVX2) /* Kinetis requires only these bytes reversed */ #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2) - if (IS_INTEL_AVX1(intel_flags) || IS_INTEL_AVX2(intel_flags)) + if (IS_INTEL_AVX1(intel_flags) || IS_INTEL_AVX2(intel_flags)) #endif - { - ByteReverseWords( - &sha256->buffer[WC_SHA256_PAD_SIZE / sizeof(word32)], - &sha256->buffer[WC_SHA256_PAD_SIZE / sizeof(word32)], - 2 * sizeof(word32)); - } + { + ByteReverseWords( + &sha256->buffer[WC_SHA256_PAD_SIZE / sizeof(word32)], + &sha256->buffer[WC_SHA256_PAD_SIZE / sizeof(word32)], + 2 * sizeof(word32)); + } #endif - #if !defined(WOLFSSL_ESP32WROOM32_CRYPT) || \ - defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH) - return XTRANSFORM(sha256); + #if defined(WOLFSSL_ESP32WROOM32_CRYPT) && \ + !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH) + if (sha256->ctx.mode == ESP32_SHA_INIT) { + esp_sha_try_hw_lock(&sha256->ctx); + } + if (sha256->ctx.mode == ESP32_SHA_SW) { + ret = XTRANSFORM(sha256, (const byte*)local); + } else { + ret = esp_sha256_digest_process(sha256, 1); + } #else - if(sha256->ctx.mode == ESP32_SHA_INIT){ - esp_sha_try_hw_lock(&sha256->ctx); - } - if(sha256->ctx.mode == ESP32_SHA_SW){ - return XTRANSFORM(sha256); - } else { - ret = esp_sha256_digest_process(sha256, 1); - } - return ret; + ret = XTRANSFORM(sha256, (const byte*)local); #endif + + return ret; } int wc_Sha256FinalRaw(wc_Sha256* sha256, byte* hash) @@ -1439,10 +1476,10 @@ void wc_Sha256Free(wc_Sha256* sha256) #elif defined(WOLFSSL_DEVCRYPTO_HASH) /* implemented in wolfcrypt/src/port/devcrypto/devcrypt_hash.c */ - + #elif defined(WOLFSSL_RENESAS_TSIP_CRYPT) && \ !defined(NO_WOLFSSL_RENESAS_TSIP_CRYPT_HASH) - + /* implemented in wolfcrypt/src/port/Renesas/renesas_tsip_sha.c */ #else diff --git a/wolfssl/wolfcrypt/port/Espressif/esp32-crypt.h b/wolfssl/wolfcrypt/port/Espressif/esp32-crypt.h index 0bb2492a9..433066af1 100644 --- a/wolfssl/wolfcrypt/port/Espressif/esp32-crypt.h +++ b/wolfssl/wolfcrypt/port/Espressif/esp32-crypt.h @@ -114,12 +114,12 @@ void esp_sha_hw_unlock( void ); struct wc_Sha; int esp_sha_digest_process(struct wc_Sha* sha, byte bockprocess); -int esp_sha_process(struct wc_Sha* sha); +int esp_sha_process(struct wc_Sha* sha, const byte* data); #ifndef NO_SHA256 struct wc_Sha256; int esp_sha256_digest_process(struct wc_Sha256* sha, byte bockprocess); - int esp_sha256_process(struct wc_Sha256* sha); + int esp_sha256_process(struct wc_Sha256* sha, const byte* data); #endif #if defined(WOLFSSL_SHA512) || defined(WOLFSSL_SHA384) @@ -140,7 +140,7 @@ struct fp_int; int esp_mp_mul(struct fp_int* X, struct fp_int* Y, struct fp_int* Z); int esp_mp_exptmod(struct fp_int* G, struct fp_int* X, word32 Xbits, struct fp_int* P, struct fp_int* Y); -int esp_mp_mulmod(struct fp_int* X, struct fp_int* Y, struct fp_int* M, +int esp_mp_mulmod(struct fp_int* X, struct fp_int* Y, struct fp_int* M, struct fp_int* Z); #endif /* NO_RSA || HAVE_ECC*/