Poly1305 Intel Assembly code - AVX and AVX2
This commit is contained in:
parent
108f6a4958
commit
5c2736f1a9
@ -404,10 +404,12 @@ static void chacha_encrypt_avx(ChaCha* ctx, const byte* m, byte* c,
|
||||
byte* output;
|
||||
word32 i;
|
||||
word32 cnt = 0;
|
||||
static const __m128i add = { 0x0000000100000000UL,0x0000000300000002UL };
|
||||
static const __m128i four = { 0x0000000400000004UL,0x0000000400000004UL };
|
||||
static const __m128i rotl8 = { 0x0605040702010003UL,0x0e0d0c0f0a09080bUL };
|
||||
static const __m128i rotl16 = { 0x0504070601000302UL,0x0d0c0f0e09080b0aUL };
|
||||
static const word64 add[2] = { 0x0000000100000000UL,0x0000000300000002UL };
|
||||
static const word64 four[2] = { 0x0000000400000004UL,0x0000000400000004UL };
|
||||
static const word64 rotl8[2] =
|
||||
{ 0x0605040702010003UL,0x0e0d0c0f0a09080bUL };
|
||||
static const word64 rotl16[2] =
|
||||
{ 0x0504070601000302UL,0x0d0c0f0e09080b0aUL };
|
||||
|
||||
if (bytes == 0)
|
||||
return;
|
||||
@ -632,8 +634,8 @@ static void chacha_encrypt_avx(ChaCha* ctx, const byte* m, byte* c,
|
||||
: [bytes] "+r" (bytes), [cnt] "+r" (cnt),
|
||||
[in] "+r" (m), [out] "+r" (c)
|
||||
: [X] "r" (X), [x] "r" (x), [key] "r" (ctx->X),
|
||||
[add] "xrm" (add), [four] "xrm" (four),
|
||||
[rotl8] "xrm" (rotl8), [rotl16] "xrm" (rotl16)
|
||||
[add] "m" (add), [four] "m" (four),
|
||||
[rotl8] "m" (rotl8), [rotl16] "m" (rotl16)
|
||||
: "xmm0", "xmm1", "xmm2", "xmm3",
|
||||
"xmm4", "xmm5", "xmm6", "xmm7",
|
||||
"xmm8", "xmm9", "xmm10", "xmm11",
|
||||
@ -669,14 +671,17 @@ static void chacha_encrypt_avx2(ChaCha* ctx, const byte* m, byte* c,
|
||||
byte* output;
|
||||
word32 i;
|
||||
word32 cnt = 0;
|
||||
static const __m256i add = { 0x0000000100000000UL,0x0000000300000002UL,
|
||||
0x0000000500000004UL,0x0000000700000006UL };
|
||||
static const __m256i eight = { 0x0000000800000008UL,0x0000000800000008UL,
|
||||
0x0000000800000008UL,0x0000000800000008UL };
|
||||
static const __m256i rotl8 = { 0x0605040702010003UL,0x0e0d0c0f0a09080bUL,
|
||||
0x0605040702010003UL,0x0e0d0c0f0a09080bUL };
|
||||
static const __m256i rotl16 = { 0x0504070601000302UL,0x0d0c0f0e09080b0aUL,
|
||||
0x0504070601000302UL,0x0d0c0f0e09080b0aUL };
|
||||
static const word64 add[4] = { 0x0000000100000000UL, 0x0000000300000002UL,
|
||||
0x0000000500000004UL, 0x0000000700000006UL };
|
||||
static const word64 eight[4] =
|
||||
{ 0x0000000800000008UL, 0x0000000800000008UL,
|
||||
0x0000000800000008UL, 0x0000000800000008UL };
|
||||
static const word64 rotl8[4] =
|
||||
{ 0x0605040702010003UL, 0x0e0d0c0f0a09080bUL,
|
||||
0x0605040702010003UL, 0x0e0d0c0f0a09080bUL };
|
||||
static const word64 rotl16[4] =
|
||||
{ 0x0504070601000302UL, 0x0d0c0f0e09080b0aUL,
|
||||
0x0504070601000302UL, 0x0d0c0f0e09080b0aUL };
|
||||
|
||||
if (bytes == 0)
|
||||
return;
|
||||
@ -917,8 +922,8 @@ static void chacha_encrypt_avx2(ChaCha* ctx, const byte* m, byte* c,
|
||||
: [bytes] "+r" (bytes), [cnt] "+r" (cnt),
|
||||
[in] "+r" (m), [out] "+r" (c)
|
||||
: [X] "r" (X), [x] "r" (x), [key] "r" (ctx->X),
|
||||
[add] "rm" (add), [eight] "rm" (eight),
|
||||
[rotl8] "rm" (rotl8), [rotl16] "rm" (rotl16)
|
||||
[add] "m" (add), [eight] "m" (eight),
|
||||
[rotl8] "m" (rotl8), [rotl16] "m" (rotl16)
|
||||
: "ymm0", "ymm1", "ymm2", "ymm3",
|
||||
"ymm4", "ymm5", "ymm6", "ymm7",
|
||||
"ymm8", "ymm9", "ymm10", "ymm11",
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -3188,7 +3188,9 @@ int poly1305_test(void)
|
||||
byte tag[16];
|
||||
Poly1305 enc;
|
||||
|
||||
static const byte msg[] =
|
||||
static const byte empty[] = { };
|
||||
|
||||
static const byte msg1[] =
|
||||
{
|
||||
0x43,0x72,0x79,0x70,0x74,0x6f,0x67,0x72,
|
||||
0x61,0x70,0x68,0x69,0x63,0x20,0x46,0x6f,
|
||||
@ -3230,17 +3232,28 @@ int poly1305_test(void)
|
||||
0x61,0x16
|
||||
};
|
||||
|
||||
static const byte msg5[] =
|
||||
{
|
||||
0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
|
||||
0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
|
||||
};
|
||||
|
||||
byte additional[] =
|
||||
{
|
||||
0x50,0x51,0x52,0x53,0xc0,0xc1,0xc2,0xc3,
|
||||
0xc4,0xc5,0xc6,0xc7
|
||||
};
|
||||
|
||||
static const byte correct[] =
|
||||
static const byte correct0[] =
|
||||
{
|
||||
0x01,0x03,0x80,0x8a,0xfb,0x0d,0xb2,0xfd,
|
||||
0x4a,0xbf,0xf6,0xaf,0x41,0x49,0xf5,0x1b
|
||||
};
|
||||
|
||||
static const byte correct1[] =
|
||||
{
|
||||
0xa8,0x06,0x1d,0xc1,0x30,0x51,0x36,0xc6,
|
||||
0xc2,0x2b,0x8b,0xaf,0x0c,0x01,0x27,0xa9
|
||||
|
||||
};
|
||||
|
||||
static const byte correct2[] =
|
||||
@ -3261,6 +3274,12 @@ int poly1305_test(void)
|
||||
0x7e,0x90,0x2e,0xcb,0xd0,0x60,0x06,0x91
|
||||
};
|
||||
|
||||
static const byte correct5[] =
|
||||
{
|
||||
0x03,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
};
|
||||
|
||||
static const byte key[] = {
|
||||
0x85,0xd6,0xbe,0x78,0x57,0x55,0x6d,0x33,
|
||||
0x7f,0x44,0x52,0xfe,0x42,0xd5,0x06,0xa8,
|
||||
@ -3282,41 +3301,49 @@ int poly1305_test(void)
|
||||
0x2a,0x93,0x75,0x78,0x3e,0xd5,0x53,0xff
|
||||
};
|
||||
|
||||
const byte* msgs[] = {msg, msg2, msg3};
|
||||
word32 szm[] = {sizeof(msg),sizeof(msg2),sizeof(msg3)};
|
||||
const byte* keys[] = {key, key2, key2};
|
||||
const byte* tests[] = {correct, correct2, correct3};
|
||||
static const byte key5[] = {
|
||||
0x02,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
|
||||
};
|
||||
|
||||
for (i = 0; i < 3; i++) {
|
||||
const byte* msgs[] = {empty, msg1, msg2, msg3, msg5};
|
||||
word32 szm[] = {sizeof(empty), sizeof(msg1), sizeof(msg2),
|
||||
sizeof(msg3), sizeof(msg5)};
|
||||
const byte* keys[] = {key, key, key2, key2, key5};
|
||||
const byte* tests[] = {correct0, correct1, correct2, correct3, correct5};
|
||||
|
||||
for (i = 0; i < 5; i++) {
|
||||
ret = wc_Poly1305SetKey(&enc, keys[i], 32);
|
||||
if (ret != 0)
|
||||
return -3600;
|
||||
return -3600 + i;
|
||||
|
||||
ret = wc_Poly1305Update(&enc, msgs[i], szm[i]);
|
||||
if (ret != 0)
|
||||
return -3601;
|
||||
return -3605 + i;
|
||||
|
||||
ret = wc_Poly1305Final(&enc, tag);
|
||||
if (ret != 0)
|
||||
return -3602;
|
||||
return -36108 + i;
|
||||
|
||||
if (XMEMCMP(tag, tests[i], sizeof(tag)))
|
||||
return -3603;
|
||||
return -3615 + i;
|
||||
}
|
||||
|
||||
/* Check TLS MAC function from 2.8.2 https://tools.ietf.org/html/rfc7539 */
|
||||
XMEMSET(tag, 0, sizeof(tag));
|
||||
ret = wc_Poly1305SetKey(&enc, key4, sizeof(key4));
|
||||
if (ret != 0)
|
||||
return -3604;
|
||||
return -3614;
|
||||
|
||||
ret = wc_Poly1305_MAC(&enc, additional, sizeof(additional),
|
||||
(byte*)msg4, sizeof(msg4), tag, sizeof(tag));
|
||||
if (ret != 0)
|
||||
return -3605;
|
||||
return -3615;
|
||||
|
||||
if (XMEMCMP(tag, correct4, sizeof(tag)))
|
||||
return -3606;
|
||||
return -3616;
|
||||
|
||||
/* Check fail of TLS MAC function if altering additional data */
|
||||
XMEMSET(tag, 0, sizeof(tag));
|
||||
@ -3324,10 +3351,10 @@ int poly1305_test(void)
|
||||
ret = wc_Poly1305_MAC(&enc, additional, sizeof(additional),
|
||||
(byte*)msg4, sizeof(msg4), tag, sizeof(tag));
|
||||
if (ret != 0)
|
||||
return -3607;
|
||||
return -3617;
|
||||
|
||||
if (XMEMCMP(tag, correct4, sizeof(tag)) == 0)
|
||||
return -3608;
|
||||
return -3618;
|
||||
|
||||
|
||||
return 0;
|
||||
|
@ -45,8 +45,9 @@
|
||||
#define WC_HAS_GCC_4_4_64BIT
|
||||
#endif
|
||||
|
||||
#if (defined(WC_HAS_SIZEOF_INT128_64BIT) || defined(WC_HAS_MSVC_64BIT) || \
|
||||
defined(WC_HAS_GCC_4_4_64BIT))
|
||||
#ifdef USE_INTEL_SPEEDUP
|
||||
#elif (defined(WC_HAS_SIZEOF_INT128_64BIT) || defined(WC_HAS_MSVC_64BIT) || \
|
||||
defined(WC_HAS_GCC_4_4_64BIT))
|
||||
#define POLY130564
|
||||
#else
|
||||
#define POLY130532
|
||||
@ -63,24 +64,44 @@ enum {
|
||||
|
||||
/* Poly1305 state */
|
||||
typedef struct Poly1305 {
|
||||
#if defined(POLY130564)
|
||||
word64 r[3];
|
||||
word64 h[3];
|
||||
word64 pad[2];
|
||||
#ifdef USE_INTEL_SPEEDUP
|
||||
word64 r[3];
|
||||
word64 h[3];
|
||||
word64 pad[2];
|
||||
word64 t0[6];
|
||||
word64 t1[6];
|
||||
word64 hh[12];
|
||||
word32 r0[8];
|
||||
word32 r1[8];
|
||||
word32 r2[8];
|
||||
word32 r3[8];
|
||||
word32 r4[8];
|
||||
word32* rp[4];
|
||||
word64 hibit[4];
|
||||
size_t leftover;
|
||||
unsigned char buffer[4*POLY1305_BLOCK_SIZE];
|
||||
unsigned char finished;
|
||||
unsigned char started;
|
||||
#else
|
||||
word32 r[5];
|
||||
word32 h[5];
|
||||
word32 pad[4];
|
||||
#if defined(POLY130564)
|
||||
word64 r[3];
|
||||
word64 h[3];
|
||||
word64 pad[2];
|
||||
#else
|
||||
word32 r[5];
|
||||
word32 h[5];
|
||||
word32 pad[4];
|
||||
#endif
|
||||
size_t leftover;
|
||||
unsigned char buffer[POLY1305_BLOCK_SIZE];
|
||||
unsigned char finished;
|
||||
#endif
|
||||
size_t leftover;
|
||||
unsigned char buffer[POLY1305_BLOCK_SIZE];
|
||||
unsigned char final;
|
||||
} Poly1305;
|
||||
|
||||
|
||||
/* does init */
|
||||
|
||||
WOLFSSL_API int wc_Poly1305SetKey(Poly1305* poly1305, const byte* key, word32 kySz);
|
||||
WOLFSSL_API int wc_Poly1305SetKey(Poly1305* poly1305, const byte* key,
|
||||
word32 kySz);
|
||||
WOLFSSL_API int wc_Poly1305Update(Poly1305* poly1305, const byte*, word32);
|
||||
WOLFSSL_API int wc_Poly1305Final(Poly1305* poly1305, byte* tag);
|
||||
WOLFSSL_API int wc_Poly1305_MAC(Poly1305* ctx, byte* additional, word32 addSz,
|
||||
|
Loading…
x
Reference in New Issue
Block a user