Poly1305 Intel Assembly code - AVX and AVX2

This commit is contained in:
Sean Parkinson 2017-07-25 15:45:43 +10:00
parent 108f6a4958
commit 5c2736f1a9
4 changed files with 1252 additions and 113 deletions

View File

@ -404,10 +404,12 @@ static void chacha_encrypt_avx(ChaCha* ctx, const byte* m, byte* c,
byte* output;
word32 i;
word32 cnt = 0;
static const __m128i add = { 0x0000000100000000UL,0x0000000300000002UL };
static const __m128i four = { 0x0000000400000004UL,0x0000000400000004UL };
static const __m128i rotl8 = { 0x0605040702010003UL,0x0e0d0c0f0a09080bUL };
static const __m128i rotl16 = { 0x0504070601000302UL,0x0d0c0f0e09080b0aUL };
static const word64 add[2] = { 0x0000000100000000UL,0x0000000300000002UL };
static const word64 four[2] = { 0x0000000400000004UL,0x0000000400000004UL };
static const word64 rotl8[2] =
{ 0x0605040702010003UL,0x0e0d0c0f0a09080bUL };
static const word64 rotl16[2] =
{ 0x0504070601000302UL,0x0d0c0f0e09080b0aUL };
if (bytes == 0)
return;
@ -632,8 +634,8 @@ static void chacha_encrypt_avx(ChaCha* ctx, const byte* m, byte* c,
: [bytes] "+r" (bytes), [cnt] "+r" (cnt),
[in] "+r" (m), [out] "+r" (c)
: [X] "r" (X), [x] "r" (x), [key] "r" (ctx->X),
[add] "xrm" (add), [four] "xrm" (four),
[rotl8] "xrm" (rotl8), [rotl16] "xrm" (rotl16)
[add] "m" (add), [four] "m" (four),
[rotl8] "m" (rotl8), [rotl16] "m" (rotl16)
: "xmm0", "xmm1", "xmm2", "xmm3",
"xmm4", "xmm5", "xmm6", "xmm7",
"xmm8", "xmm9", "xmm10", "xmm11",
@ -669,14 +671,17 @@ static void chacha_encrypt_avx2(ChaCha* ctx, const byte* m, byte* c,
byte* output;
word32 i;
word32 cnt = 0;
static const __m256i add = { 0x0000000100000000UL,0x0000000300000002UL,
0x0000000500000004UL,0x0000000700000006UL };
static const __m256i eight = { 0x0000000800000008UL,0x0000000800000008UL,
0x0000000800000008UL,0x0000000800000008UL };
static const __m256i rotl8 = { 0x0605040702010003UL,0x0e0d0c0f0a09080bUL,
0x0605040702010003UL,0x0e0d0c0f0a09080bUL };
static const __m256i rotl16 = { 0x0504070601000302UL,0x0d0c0f0e09080b0aUL,
0x0504070601000302UL,0x0d0c0f0e09080b0aUL };
static const word64 add[4] = { 0x0000000100000000UL, 0x0000000300000002UL,
0x0000000500000004UL, 0x0000000700000006UL };
static const word64 eight[4] =
{ 0x0000000800000008UL, 0x0000000800000008UL,
0x0000000800000008UL, 0x0000000800000008UL };
static const word64 rotl8[4] =
{ 0x0605040702010003UL, 0x0e0d0c0f0a09080bUL,
0x0605040702010003UL, 0x0e0d0c0f0a09080bUL };
static const word64 rotl16[4] =
{ 0x0504070601000302UL, 0x0d0c0f0e09080b0aUL,
0x0504070601000302UL, 0x0d0c0f0e09080b0aUL };
if (bytes == 0)
return;
@ -917,8 +922,8 @@ static void chacha_encrypt_avx2(ChaCha* ctx, const byte* m, byte* c,
: [bytes] "+r" (bytes), [cnt] "+r" (cnt),
[in] "+r" (m), [out] "+r" (c)
: [X] "r" (X), [x] "r" (x), [key] "r" (ctx->X),
[add] "rm" (add), [eight] "rm" (eight),
[rotl8] "rm" (rotl8), [rotl16] "rm" (rotl16)
[add] "m" (add), [eight] "m" (eight),
[rotl8] "m" (rotl8), [rotl16] "m" (rotl16)
: "ymm0", "ymm1", "ymm2", "ymm3",
"ymm4", "ymm5", "ymm6", "ymm7",
"ymm8", "ymm9", "ymm10", "ymm11",

File diff suppressed because it is too large Load Diff

View File

@ -3188,7 +3188,9 @@ int poly1305_test(void)
byte tag[16];
Poly1305 enc;
static const byte msg[] =
static const byte empty[] = { };
static const byte msg1[] =
{
0x43,0x72,0x79,0x70,0x74,0x6f,0x67,0x72,
0x61,0x70,0x68,0x69,0x63,0x20,0x46,0x6f,
@ -3230,17 +3232,28 @@ int poly1305_test(void)
0x61,0x16
};
static const byte msg5[] =
{
0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
};
byte additional[] =
{
0x50,0x51,0x52,0x53,0xc0,0xc1,0xc2,0xc3,
0xc4,0xc5,0xc6,0xc7
};
static const byte correct[] =
static const byte correct0[] =
{
0x01,0x03,0x80,0x8a,0xfb,0x0d,0xb2,0xfd,
0x4a,0xbf,0xf6,0xaf,0x41,0x49,0xf5,0x1b
};
static const byte correct1[] =
{
0xa8,0x06,0x1d,0xc1,0x30,0x51,0x36,0xc6,
0xc2,0x2b,0x8b,0xaf,0x0c,0x01,0x27,0xa9
};
static const byte correct2[] =
@ -3261,6 +3274,12 @@ int poly1305_test(void)
0x7e,0x90,0x2e,0xcb,0xd0,0x60,0x06,0x91
};
static const byte correct5[] =
{
0x03,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
};
static const byte key[] = {
0x85,0xd6,0xbe,0x78,0x57,0x55,0x6d,0x33,
0x7f,0x44,0x52,0xfe,0x42,0xd5,0x06,0xa8,
@ -3282,41 +3301,49 @@ int poly1305_test(void)
0x2a,0x93,0x75,0x78,0x3e,0xd5,0x53,0xff
};
const byte* msgs[] = {msg, msg2, msg3};
word32 szm[] = {sizeof(msg),sizeof(msg2),sizeof(msg3)};
const byte* keys[] = {key, key2, key2};
const byte* tests[] = {correct, correct2, correct3};
static const byte key5[] = {
0x02,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
};
for (i = 0; i < 3; i++) {
const byte* msgs[] = {empty, msg1, msg2, msg3, msg5};
word32 szm[] = {sizeof(empty), sizeof(msg1), sizeof(msg2),
sizeof(msg3), sizeof(msg5)};
const byte* keys[] = {key, key, key2, key2, key5};
const byte* tests[] = {correct0, correct1, correct2, correct3, correct5};
for (i = 0; i < 5; i++) {
ret = wc_Poly1305SetKey(&enc, keys[i], 32);
if (ret != 0)
return -3600;
return -3600 + i;
ret = wc_Poly1305Update(&enc, msgs[i], szm[i]);
if (ret != 0)
return -3601;
return -3605 + i;
ret = wc_Poly1305Final(&enc, tag);
if (ret != 0)
return -3602;
return -36108 + i;
if (XMEMCMP(tag, tests[i], sizeof(tag)))
return -3603;
return -3615 + i;
}
/* Check TLS MAC function from 2.8.2 https://tools.ietf.org/html/rfc7539 */
XMEMSET(tag, 0, sizeof(tag));
ret = wc_Poly1305SetKey(&enc, key4, sizeof(key4));
if (ret != 0)
return -3604;
return -3614;
ret = wc_Poly1305_MAC(&enc, additional, sizeof(additional),
(byte*)msg4, sizeof(msg4), tag, sizeof(tag));
if (ret != 0)
return -3605;
return -3615;
if (XMEMCMP(tag, correct4, sizeof(tag)))
return -3606;
return -3616;
/* Check fail of TLS MAC function if altering additional data */
XMEMSET(tag, 0, sizeof(tag));
@ -3324,10 +3351,10 @@ int poly1305_test(void)
ret = wc_Poly1305_MAC(&enc, additional, sizeof(additional),
(byte*)msg4, sizeof(msg4), tag, sizeof(tag));
if (ret != 0)
return -3607;
return -3617;
if (XMEMCMP(tag, correct4, sizeof(tag)) == 0)
return -3608;
return -3618;
return 0;

View File

@ -45,8 +45,9 @@
#define WC_HAS_GCC_4_4_64BIT
#endif
#if (defined(WC_HAS_SIZEOF_INT128_64BIT) || defined(WC_HAS_MSVC_64BIT) || \
defined(WC_HAS_GCC_4_4_64BIT))
#ifdef USE_INTEL_SPEEDUP
#elif (defined(WC_HAS_SIZEOF_INT128_64BIT) || defined(WC_HAS_MSVC_64BIT) || \
defined(WC_HAS_GCC_4_4_64BIT))
#define POLY130564
#else
#define POLY130532
@ -63,24 +64,44 @@ enum {
/* Poly1305 state */
typedef struct Poly1305 {
#if defined(POLY130564)
word64 r[3];
word64 h[3];
word64 pad[2];
#ifdef USE_INTEL_SPEEDUP
word64 r[3];
word64 h[3];
word64 pad[2];
word64 t0[6];
word64 t1[6];
word64 hh[12];
word32 r0[8];
word32 r1[8];
word32 r2[8];
word32 r3[8];
word32 r4[8];
word32* rp[4];
word64 hibit[4];
size_t leftover;
unsigned char buffer[4*POLY1305_BLOCK_SIZE];
unsigned char finished;
unsigned char started;
#else
word32 r[5];
word32 h[5];
word32 pad[4];
#if defined(POLY130564)
word64 r[3];
word64 h[3];
word64 pad[2];
#else
word32 r[5];
word32 h[5];
word32 pad[4];
#endif
size_t leftover;
unsigned char buffer[POLY1305_BLOCK_SIZE];
unsigned char finished;
#endif
size_t leftover;
unsigned char buffer[POLY1305_BLOCK_SIZE];
unsigned char final;
} Poly1305;
/* does init */
WOLFSSL_API int wc_Poly1305SetKey(Poly1305* poly1305, const byte* key, word32 kySz);
WOLFSSL_API int wc_Poly1305SetKey(Poly1305* poly1305, const byte* key,
word32 kySz);
WOLFSSL_API int wc_Poly1305Update(Poly1305* poly1305, const byte*, word32);
WOLFSSL_API int wc_Poly1305Final(Poly1305* poly1305, byte* tag);
WOLFSSL_API int wc_Poly1305_MAC(Poly1305* ctx, byte* additional, word32 addSz,