Fix issue with ARMv8 not performing 128-bit math against constants correctly in debug builds. Fix was to use the `__int128_t` as const for integers. Also added `./configure --enable-curve25519=no128bit` option to force FE to not use the `int128_t` math.

This commit is contained in:
David Garske 2017-07-14 10:39:30 -07:00
parent 43260f02f4
commit 8612d52844
6 changed files with 73 additions and 56 deletions

View File

@ -1163,6 +1163,12 @@ then
ENABLED_CURVE25519=yes
fi
if test "$ENABLED_CURVE25519" = "no128bit"
then
AM_CFLAGS="$AM_CFLAGS -DNO_CURVED25519_128BIT"
ENABLED_CURVE25519=yes
fi
if test "$ENABLED_CURVE25519" = "yes"
then
AM_CFLAGS="$AM_CFLAGS -DHAVE_CURVE25519"

View File

@ -41,7 +41,7 @@
#include <wolfcrypt/src/misc.c>
#endif
#ifdef HAVE___UINT128_T
#ifdef CURVED25519_128BIT
#include "fe_x25519_128.i"
#else

View File

@ -1,4 +1,4 @@
/* fp_mont_small.i
/* fp_x25519_128.i
*
* Copyright (C) 2006-2017 wolfSSL Inc.
*
@ -253,6 +253,7 @@ void fe_add(fe r, const fe a, const fe b)
*/
void fe_mul(fe r, const fe a, const fe b)
{
const __int128_t k19 = 19;
__int128_t t0 = ((__int128_t)a[0]) * b[0];
__int128_t t1 = ((__int128_t)a[0]) * b[1]
+ ((__int128_t)a[1]) * b[0];
@ -280,19 +281,19 @@ void fe_mul(fe r, const fe a, const fe b)
__int128_t t8 = ((__int128_t)a[4]) * b[4];
/* Modulo reduce double long word. */
t0 += t5 * 19;
t1 += t6 * 19;
t2 += t7 * 19;
t3 += t8 * 19;
t0 += t5 * k19;
t1 += t6 * k19;
t2 += t7 * k19;
t3 += t8 * k19;
/* Normalize to 51-bits of data per word. */
t0 += (t4 >> 51) * 19; t4 &= 0x7ffffffffffff;
t0 += (t4 >> 51) * k19; t4 &= 0x7ffffffffffff;
t1 += t0 >> 51; r[0] = t0 & 0x7ffffffffffff;
t2 += t1 >> 51; r[1] = t1 & 0x7ffffffffffff;
t3 += t2 >> 51; r[2] = t2 & 0x7ffffffffffff;
t4 += t3 >> 51; r[3] = t3 & 0x7ffffffffffff;
r[0] += (t4 >> 51) * 19;
r[0] += (t4 >> 51) * k19;
r[4] = t4 & 0x7ffffffffffff;
}
@ -304,36 +305,38 @@ void fe_mul(fe r, const fe a, const fe b)
*/
void fe_sq(fe r, const fe a)
{
const __int128_t k19 = 19;
const __int128_t k2 = 2;
__int128_t t0 = ((__int128_t)a[0]) * a[0];
__int128_t t1 = ((__int128_t)a[0]) * a[1] * 2;
__int128_t t2 = ((__int128_t)a[0]) * a[2] * 2
__int128_t t1 = ((__int128_t)a[0]) * a[1] * k2;
__int128_t t2 = ((__int128_t)a[0]) * a[2] * k2
+ ((__int128_t)a[1]) * a[1];
__int128_t t3 = ((__int128_t)a[0]) * a[3] * 2
+ ((__int128_t)a[1]) * a[2] * 2;
__int128_t t4 = ((__int128_t)a[0]) * a[4] * 2
+ ((__int128_t)a[1]) * a[3] * 2
__int128_t t3 = ((__int128_t)a[0]) * a[3] * k2
+ ((__int128_t)a[1]) * a[2] * k2;
__int128_t t4 = ((__int128_t)a[0]) * a[4] * k2
+ ((__int128_t)a[1]) * a[3] * k2
+ ((__int128_t)a[2]) * a[2];
__int128_t t5 = ((__int128_t)a[1]) * a[4] * 2
+ ((__int128_t)a[2]) * a[3] * 2;
__int128_t t6 = ((__int128_t)a[2]) * a[4] * 2
__int128_t t5 = ((__int128_t)a[1]) * a[4] * k2
+ ((__int128_t)a[2]) * a[3] * k2;
__int128_t t6 = ((__int128_t)a[2]) * a[4] * k2
+ ((__int128_t)a[3]) * a[3];
__int128_t t7 = ((__int128_t)a[3]) * a[4] * 2;
__int128_t t7 = ((__int128_t)a[3]) * a[4] * k2;
__int128_t t8 = ((__int128_t)a[4]) * a[4];
/* Modulo reduce double long word. */
t0 += t5 * 19;
t1 += t6 * 19;
t2 += t7 * 19;
t3 += t8 * 19;
t0 += t5 * k19;
t1 += t6 * k19;
t2 += t7 * k19;
t3 += t8 * k19;
/* Normalize to 51-bits of data per word. */
t0 += (t4 >> 51) * 19; t4 &= 0x7ffffffffffff;
t0 += (t4 >> 51) * k19; t4 &= 0x7ffffffffffff;
t1 += t0 >> 51; r[0] = t0 & 0x7ffffffffffff;
t2 += t1 >> 51; r[1] = t1 & 0x7ffffffffffff;
t3 += t2 >> 51; r[2] = t2 & 0x7ffffffffffff;
t4 += t3 >> 51; r[3] = t3 & 0x7ffffffffffff;
r[0] += (t4 >> 51) * 19;
r[0] += (t4 >> 51) * k19;
r[4] = t4 & 0x7ffffffffffff;
}
@ -345,20 +348,22 @@ void fe_sq(fe r, const fe a)
*/
void fe_mul121666(fe r, fe a)
{
__int128_t t0 = ((__int128_t)a[0]) * (int64_t)121666;
__int128_t t1 = ((__int128_t)a[1]) * (int64_t)121666;
__int128_t t2 = ((__int128_t)a[2]) * (int64_t)121666;
__int128_t t3 = ((__int128_t)a[3]) * (int64_t)121666;
__int128_t t4 = ((__int128_t)a[4]) * (int64_t)121666;
const __int128_t k19 = 19;
const __int128_t k121666 = 121666;
__int128_t t0 = ((__int128_t)a[0]) * k121666;
__int128_t t1 = ((__int128_t)a[1]) * k121666;
__int128_t t2 = ((__int128_t)a[2]) * k121666;
__int128_t t3 = ((__int128_t)a[3]) * k121666;
__int128_t t4 = ((__int128_t)a[4]) * k121666;
/* Normalize to 51-bits of data per word. */
t0 += (t4 >> 51) * 19; t4 &= 0x7ffffffffffff;
t0 += (t4 >> 51) * k19; t4 &= 0x7ffffffffffff;
t1 += t0 >> 51; r[0] = t0 & 0x7ffffffffffff;
t2 += t1 >> 51; r[1] = t1 & 0x7ffffffffffff;
t3 += t2 >> 51; r[2] = t2 & 0x7ffffffffffff;
t4 += t3 >> 51; r[3] = t3 & 0x7ffffffffffff;
r[0] += (t4 >> 51) * 19;
r[0] += (t4 >> 51) * k19;
r[4] = t4 & 0x7ffffffffffff;
}
@ -546,36 +551,38 @@ void fe_pow22523(fe r, const fe a)
*/
void fe_sq2(fe r, const fe a)
{
__int128_t t0 = 2 * (((__int128_t)a[0]) * a[0]);
__int128_t t1 = 2 * (((__int128_t)a[0]) * a[1] * 2);
__int128_t t2 = 2 * (((__int128_t)a[0]) * a[2] * 2
const __int128_t k2 = 2;
const __int128_t k19 = 19;
__int128_t t0 = k2 * (((__int128_t)a[0]) * a[0]);
__int128_t t1 = k2 * (((__int128_t)a[0]) * a[1] * k2);
__int128_t t2 = k2 * (((__int128_t)a[0]) * a[2] * k2
+ ((__int128_t)a[1]) * a[1]);
__int128_t t3 = 2 * (((__int128_t)a[0]) * a[3] * 2
+ ((__int128_t)a[1]) * a[2] * 2);
__int128_t t4 = 2 * (((__int128_t)a[0]) * a[4] * 2
+ ((__int128_t)a[1]) * a[3] * 2
__int128_t t3 = k2 * (((__int128_t)a[0]) * a[3] * k2
+ ((__int128_t)a[1]) * a[2] * k2);
__int128_t t4 = k2 * (((__int128_t)a[0]) * a[4] * k2
+ ((__int128_t)a[1]) * a[3] * k2
+ ((__int128_t)a[2]) * a[2]);
__int128_t t5 = 2 * (((__int128_t)a[1]) * a[4] * 2
+ ((__int128_t)a[2]) * a[3] * 2);
__int128_t t6 = 2 * (((__int128_t)a[2]) * a[4] * 2
__int128_t t5 = k2 * (((__int128_t)a[1]) * a[4] * k2
+ ((__int128_t)a[2]) * a[3] * k2);
__int128_t t6 = k2 * (((__int128_t)a[2]) * a[4] * k2
+ ((__int128_t)a[3]) * a[3]);
__int128_t t7 = 2 * (((__int128_t)a[3]) * a[4] * 2);
__int128_t t8 = 2 * (((__int128_t)a[4]) * a[4]);
__int128_t t7 = k2 * (((__int128_t)a[3]) * a[4] * k2);
__int128_t t8 = k2 * (((__int128_t)a[4]) * a[4]);
/* Modulo reduce double long word. */
t0 += t5 * 19;
t1 += t6 * 19;
t2 += t7 * 19;
t3 += t8 * 19;
t0 += t5 * k19;
t1 += t6 * k19;
t2 += t7 * k19;
t3 += t8 * k19;
/* Normalize to 51-bits of data per word. */
t0 += (t4 >> 51) * 19; t4 &= 0x7ffffffffffff;
t0 += (t4 >> 51) * k19; t4 &= 0x7ffffffffffff;
t1 += t0 >> 51; r[0] = t0 & 0x7ffffffffffff;
t2 += t1 >> 51; r[1] = t1 & 0x7ffffffffffff;
t3 += t2 >> 51; r[2] = t2 & 0x7ffffffffffff;
t4 += t3 >> 51; r[3] = t3 & 0x7ffffffffffff;
r[0] += (t4 >> 51) * 19;
r[0] += (t4 >> 51) * k19;
r[4] = t4 & 0x7ffffffffffff;
}

View File

@ -765,7 +765,7 @@ static void cmov(ge_precomp *t,const ge_precomp *u,unsigned char b)
fe_cmov(t->xy2d,u->xy2d,b);
}
#ifdef HAVE___UINT128_T
#ifdef CURVED25519_128BIT
static const ge_precomp base[32][8] = {
{
{
@ -3569,7 +3569,7 @@ static void slide(signed char *r,const unsigned char *a)
}
}
#ifdef HAVE___UINT128_T
#ifdef CURVED25519_128BIT
static const ge_precomp Bi[8] = {
{
{ 0x493c6f58c3b85, 0x0df7181c325f7, 0x0f50b0b3e4cb7, 0x5329385a44c32, 0x07cf9d3a33d4b },
@ -3719,7 +3719,7 @@ int ge_double_scalarmult_vartime(ge_p2 *r, const unsigned char *a,
return 0;
}
#ifdef HAVE___UINT128_T
#ifdef CURVED25519_128BIT
static const ge d = {
0x34dca135978a3, 0x1a8283b156ebd, 0x5e7a26001c029, 0x739c663a03cbb,
0x52036cee2b6ff
@ -3732,7 +3732,7 @@ static const ge d = {
#endif
#ifdef HAVE___UINT128_T
#ifdef CURVED25519_128BIT
static const ge sqrtm1 = {
0x61b274a0ea0b0, 0x0d5a5fc8f189d, 0x7ef5e9cbd0c60, 0x78595a6804c9e,
0x2b8324804fc1d
@ -3921,7 +3921,7 @@ void ge_p3_dbl(ge_p1p1 *r,const ge_p3 *p)
r = p
*/
#ifdef HAVE___UINT128_T
#ifdef CURVED25519_128BIT
static const ge d2 = {
0x69b9426b2f159, 0x35050762add7a, 0x3cf44c0038052, 0x6738cc7407977,
0x2406d9dc56dff

View File

@ -33,6 +33,10 @@
#include <wolfssl/wolfcrypt/types.h>
#if defined(HAVE___UINT128_T) && !defined(NO_CURVED25519_128BIT)
#define CURVED25519_128BIT
#endif
/*
fe means field element.
Here the field is \Z/(2^255-19).
@ -60,7 +64,7 @@ WOLFSSL_LOCAL int curve25519(byte * q, byte * n, byte * p);
/* default to be faster but take more memory */
#if !defined(CURVE25519_SMALL) || !defined(ED25519_SMALL)
#if defined(HAVE___UINT128_T)
#if defined(CURVED25519_128BIT)
typedef int64_t fe[5];
#else
typedef int32_t fe[10];

View File

@ -47,7 +47,7 @@ Representations:
#ifdef ED25519_SMALL
typedef byte ge[F25519_SIZE];
#elif defined(HAVE___UINT128_T)
#elif defined(CURVED25519_128BIT)
typedef int64_t ge[5];
#else
typedef int32_t ge[10];