New permutation-based AES implementation using SSSE3.
This covers a lot of CPUs -- particularly lower-end CPUs over the past decade which lack AES-NI. Derived from Mike Hamburg's public domain vpaes software; see <https://crypto.stanford.edu/vpaes/> for details.
This commit is contained in:
parent
4809cab8b6
commit
c057901613
|
@ -1,4 +1,4 @@
|
|||
# $NetBSD: files.x86,v 1.115 2020/06/29 23:47:54 riastradh Exp $
|
||||
# $NetBSD: files.x86,v 1.116 2020/06/29 23:51:35 riastradh Exp $
|
||||
|
||||
# options for MP configuration through the MP spec
|
||||
defflag opt_mpbios.h MPBIOS MPDEBUG MPBIOS_SCANPCI
|
||||
|
@ -174,3 +174,6 @@ include "crypto/aes/arch/x86/files.aesvia"
|
|||
|
||||
# Bitsliced AES with SSE2
|
||||
include "crypto/aes/arch/x86/files.aessse2"
|
||||
|
||||
# Permutation-based AES with PSHUFB
|
||||
include "crypto/aes/arch/x86/files.aesssse3"
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* $NetBSD: identcpu.c,v 1.110 2020/06/29 23:47:54 riastradh Exp $ */
|
||||
/* $NetBSD: identcpu.c,v 1.111 2020/06/29 23:51:35 riastradh Exp $ */
|
||||
|
||||
/*-
|
||||
* Copyright (c) 1999, 2000, 2001, 2006, 2007, 2008 The NetBSD Foundation, Inc.
|
||||
|
@ -30,7 +30,7 @@
|
|||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__KERNEL_RCSID(0, "$NetBSD: identcpu.c,v 1.110 2020/06/29 23:47:54 riastradh Exp $");
|
||||
__KERNEL_RCSID(0, "$NetBSD: identcpu.c,v 1.111 2020/06/29 23:51:35 riastradh Exp $");
|
||||
|
||||
#include "opt_xen.h"
|
||||
|
||||
|
@ -41,6 +41,7 @@ __KERNEL_RCSID(0, "$NetBSD: identcpu.c,v 1.110 2020/06/29 23:47:54 riastradh Exp
|
|||
|
||||
#include <crypto/aes/arch/x86/aes_ni.h>
|
||||
#include <crypto/aes/arch/x86/aes_sse2.h>
|
||||
#include <crypto/aes/arch/x86/aes_ssse3.h>
|
||||
#include <crypto/aes/arch/x86/aes_via.h>
|
||||
|
||||
#include <uvm/uvm_extern.h>
|
||||
|
@ -1006,6 +1007,10 @@ cpu_probe(struct cpu_info *ci)
|
|||
#endif
|
||||
if (cpu_feature[4] & CPUID_VIA_HAS_ACE)
|
||||
aes_md_init(&aes_via_impl);
|
||||
else if (i386_has_sse && i386_has_sse2 &&
|
||||
(cpu_feature[1] & CPUID2_SSE3) &&
|
||||
(cpu_feature[1] & CPUID2_SSSE3))
|
||||
aes_md_init(&aes_ssse3_impl);
|
||||
else if (i386_has_sse && i386_has_sse2)
|
||||
aes_md_init(&aes_sse2_impl);
|
||||
} else {
|
||||
|
|
|
@ -0,0 +1,556 @@
|
|||
/* $NetBSD: aes_ssse3.c,v 1.1 2020/06/29 23:51:35 riastradh Exp $ */
|
||||
|
||||
/*-
|
||||
* Copyright (c) 2020 The NetBSD Foundation, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
|
||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
|
||||
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Permutation-based AES using SSSE3, derived from Mike Hamburg's VPAES
|
||||
* software, at <https://crypto.stanford.edu/vpaes/>, described in
|
||||
*
|
||||
* Mike Hamburg, `Accelerating AES with Vector Permute
|
||||
* Instructions', in Christophe Clavier and Kris Gaj (eds.),
|
||||
* Cryptographic Hardware and Embedded Systems -- CHES 2009,
|
||||
* Springer LNCS 5747, pp. 18-32.
|
||||
*
|
||||
* https://link.springer.com/chapter/10.1007/978-3-642-04138-9_2
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__KERNEL_RCSID(1, "$NetBSD: aes_ssse3.c,v 1.1 2020/06/29 23:51:35 riastradh Exp $");
|
||||
|
||||
#include <sys/types.h>
|
||||
|
||||
#include <sys/systm.h>
|
||||
|
||||
#include "aes_ssse3_impl.h"
|
||||
|
||||
static const union m128const {
|
||||
uint64_t u64[2];
|
||||
__m128i m;
|
||||
}
|
||||
mc_forward[4] = {
|
||||
{.u64 = {0x0407060500030201, 0x0C0F0E0D080B0A09}},
|
||||
{.u64 = {0x080B0A0904070605, 0x000302010C0F0E0D}},
|
||||
{.u64 = {0x0C0F0E0D080B0A09, 0x0407060500030201}},
|
||||
{.u64 = {0x000302010C0F0E0D, 0x080B0A0904070605}},
|
||||
},
|
||||
mc_backward[4] = {
|
||||
{.u64 = {0x0605040702010003, 0x0E0D0C0F0A09080B}},
|
||||
{.u64 = {0x020100030E0D0C0F, 0x0A09080B06050407}},
|
||||
{.u64 = {0x0E0D0C0F0A09080B, 0x0605040702010003}},
|
||||
{.u64 = {0x0A09080B06050407, 0x020100030E0D0C0F}},
|
||||
},
|
||||
ipt[2] = {
|
||||
{.u64 = {0xC2B2E8985A2A7000, 0xCABAE09052227808}},
|
||||
{.u64 = {0x4C01307D317C4D00, 0xCD80B1FCB0FDCC81}},
|
||||
},
|
||||
opt[2] = {
|
||||
{.u64 = {0xFF9F4929D6B66000, 0xF7974121DEBE6808}},
|
||||
{.u64 = {0x01EDBD5150BCEC00, 0xE10D5DB1B05C0CE0}},
|
||||
},
|
||||
dipt[2] = {
|
||||
{.u64 = {0x0F505B040B545F00, 0x154A411E114E451A}},
|
||||
{.u64 = {0x86E383E660056500, 0x12771772F491F194}},
|
||||
},
|
||||
sb1[2] = {
|
||||
{.u64 = {0xB19BE18FCB503E00, 0xA5DF7A6E142AF544}},
|
||||
{.u64 = {0x3618D415FAE22300, 0x3BF7CCC10D2ED9EF}},
|
||||
},
|
||||
sb2[2] = {
|
||||
{.u64 = {0xE27A93C60B712400, 0x5EB7E955BC982FCD}},
|
||||
{.u64 = {0x69EB88400AE12900, 0xC2A163C8AB82234A}},
|
||||
},
|
||||
sbo[2] = {
|
||||
{.u64 = {0xD0D26D176FBDC700, 0x15AABF7AC502A878}},
|
||||
{.u64 = {0xCFE474A55FBB6A00, 0x8E1E90D1412B35FA}},
|
||||
},
|
||||
dsb9[2] = {
|
||||
{.u64 = {0x851C03539A86D600, 0xCAD51F504F994CC9}},
|
||||
{.u64 = {0xC03B1789ECD74900, 0x725E2C9EB2FBA565}},
|
||||
},
|
||||
dsbd[2] = {
|
||||
{.u64 = {0x7D57CCDFE6B1A200, 0xF56E9B13882A4439}},
|
||||
{.u64 = {0x3CE2FAF724C6CB00, 0x2931180D15DEEFD3}},
|
||||
},
|
||||
dsbb[2] = {
|
||||
{.u64 = {0xD022649296B44200, 0x602646F6B0F2D404}},
|
||||
{.u64 = {0xC19498A6CD596700, 0xF3FF0C3E3255AA6B}},
|
||||
},
|
||||
dsbe[2] = {
|
||||
{.u64 = {0x46F2929626D4D000, 0x2242600464B4F6B0}},
|
||||
{.u64 = {0x0C55A6CDFFAAC100, 0x9467F36B98593E32}},
|
||||
},
|
||||
dsbo[2] = {
|
||||
{.u64 = {0x1387EA537EF94000, 0xC7AA6DB9D4943E2D}},
|
||||
{.u64 = {0x12D7560F93441D00, 0xCA4B8159D8C58E9C}},
|
||||
},
|
||||
dks1[2] = {
|
||||
{.u64 = {0xB6116FC87ED9A700, 0x4AED933482255BFC}},
|
||||
{.u64 = {0x4576516227143300, 0x8BB89FACE9DAFDCE}},
|
||||
},
|
||||
dks2[2] = {
|
||||
{.u64 = {0x27438FEBCCA86400, 0x4622EE8AADC90561}},
|
||||
{.u64 = {0x815C13CE4F92DD00, 0x73AEE13CBD602FF2}},
|
||||
},
|
||||
dks3[2] = {
|
||||
{.u64 = {0x03C4C50201C6C700, 0xF83F3EF9FA3D3CFB}},
|
||||
{.u64 = {0xEE1921D638CFF700, 0xA5526A9D7384BC4B}},
|
||||
},
|
||||
dks4[2] = {
|
||||
{.u64 = {0xE3C390B053732000, 0xA080D3F310306343}},
|
||||
{.u64 = {0xA0CA214B036982E8, 0x2F45AEC48CE60D67}},
|
||||
},
|
||||
deskew[2] = {
|
||||
{.u64 = {0x07E4A34047A4E300, 0x1DFEB95A5DBEF91A}},
|
||||
{.u64 = {0x5F36B5DC83EA6900, 0x2841C2ABF49D1E77}},
|
||||
},
|
||||
sr[4] = {
|
||||
{.u64 = {0x0706050403020100, 0x0F0E0D0C0B0A0908}},
|
||||
{.u64 = {0x030E09040F0A0500, 0x0B06010C07020D08}},
|
||||
{.u64 = {0x0F060D040B020900, 0x070E050C030A0108}},
|
||||
{.u64 = {0x0B0E0104070A0D00, 0x0306090C0F020508}},
|
||||
},
|
||||
rcon = {.u64 = {0x1F8391B9AF9DEEB6, 0x702A98084D7C7D81}},
|
||||
s63 = {.u64 = {0x5B5B5B5B5B5B5B5B, 0x5B5B5B5B5B5B5B5B}},
|
||||
of = {.u64 = {0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F}},
|
||||
inv = {.u64 = {0x0E05060F0D080180, 0x040703090A0B0C02}},
|
||||
inva = {.u64 = {0x01040A060F0B0780, 0x030D0E0C02050809}};
|
||||
|
||||
static inline __m128i
|
||||
loadroundkey(const uint32_t *rk32)
|
||||
{
|
||||
return _mm_load_si128((const void *)rk32);
|
||||
}
|
||||
|
||||
static inline void
|
||||
storeroundkey(uint32_t *rk32, __m128i rk)
|
||||
{
|
||||
_mm_store_si128((void *)rk32, rk);
|
||||
}
|
||||
|
||||
/* Given abcdefgh, set *lo = 0b0d0f0h and *hi = 0a0c0e0g. */
|
||||
static inline void
|
||||
bytes2nybbles(__m128i *restrict lo, __m128i *restrict hi, __m128i x)
|
||||
{
|
||||
|
||||
*lo = x & of.m;
|
||||
*hi = _mm_srli_epi32(x & ~of.m, 4);
|
||||
}
|
||||
|
||||
/* Given 0p0q0r0s, return 0x0y0z0w where x = a/p, y = a/q, &c. */
|
||||
static inline __m128i
|
||||
gf16_inva(__m128i x)
|
||||
{
|
||||
return _mm_shuffle_epi8(inva.m, x);
|
||||
}
|
||||
|
||||
/* Given 0p0q0r0s, return 0x0y0z0w where x = 1/p, y = 1/q, &c. */
|
||||
static inline __m128i
|
||||
gf16_inv(__m128i x)
|
||||
{
|
||||
return _mm_shuffle_epi8(inv.m, x);
|
||||
}
|
||||
|
||||
/*
|
||||
* t is a pair of maps respectively from low and high nybbles to bytes.
|
||||
* Apply t the nybbles, and add the results in GF(2).
|
||||
*/
|
||||
static __m128i
|
||||
aes_schedule_transform(__m128i x, const union m128const t[static 2])
|
||||
{
|
||||
__m128i lo, hi;
|
||||
|
||||
bytes2nybbles(&lo, &hi, x);
|
||||
return _mm_shuffle_epi8(t[0].m, lo) ^ _mm_shuffle_epi8(t[1].m, hi);
|
||||
}
|
||||
|
||||
static inline void
|
||||
subbytes(__m128i *io, __m128i *jo, __m128i x)
|
||||
{
|
||||
__m128i k, i, ak, j;
|
||||
|
||||
bytes2nybbles(&k, &i, x);
|
||||
ak = gf16_inva(k);
|
||||
j = i ^ k;
|
||||
*io = j ^ gf16_inv(ak ^ gf16_inv(i));
|
||||
*jo = i ^ gf16_inv(ak ^ gf16_inv(j));
|
||||
}
|
||||
|
||||
static __m128i
|
||||
aes_schedule_low_round(__m128i rk, __m128i prk)
|
||||
{
|
||||
__m128i io, jo;
|
||||
|
||||
/* smear prk */
|
||||
prk ^= _mm_slli_si128(prk, 4);
|
||||
prk ^= _mm_slli_si128(prk, 8);
|
||||
prk ^= s63.m;
|
||||
|
||||
/* subbytes */
|
||||
subbytes(&io, &jo, rk);
|
||||
rk = _mm_shuffle_epi8(sb1[0].m, io) ^ _mm_shuffle_epi8(sb1[1].m, jo);
|
||||
|
||||
/* add in smeared stuff */
|
||||
return rk ^ prk;
|
||||
}
|
||||
|
||||
static __m128i
|
||||
aes_schedule_round(__m128i rk, __m128i prk, __m128i *rcon_rot)
|
||||
{
|
||||
|
||||
/* extract rcon from rcon_rot */
|
||||
prk ^= _mm_alignr_epi8(_mm_setzero_si128(), *rcon_rot, 15);
|
||||
*rcon_rot = _mm_alignr_epi8(*rcon_rot, *rcon_rot, 15);
|
||||
|
||||
/* rotate */
|
||||
rk = _mm_shuffle_epi32(rk, 0xff);
|
||||
rk = _mm_alignr_epi8(rk, rk, 1);
|
||||
|
||||
return aes_schedule_low_round(rk, prk);
|
||||
}
|
||||
|
||||
static __m128i
|
||||
aes_schedule_mangle_enc(__m128i x, __m128i sr_i)
|
||||
{
|
||||
__m128i y = _mm_setzero_si128();
|
||||
|
||||
x ^= s63.m;
|
||||
|
||||
x = _mm_shuffle_epi8(x, mc_forward[0].m);
|
||||
y ^= x;
|
||||
x = _mm_shuffle_epi8(x, mc_forward[0].m);
|
||||
y ^= x;
|
||||
x = _mm_shuffle_epi8(x, mc_forward[0].m);
|
||||
y ^= x;
|
||||
|
||||
return _mm_shuffle_epi8(y, sr_i);
|
||||
}
|
||||
|
||||
static __m128i
|
||||
aes_schedule_mangle_last_enc(__m128i x, __m128i sr_i)
|
||||
{
|
||||
|
||||
return aes_schedule_transform(_mm_shuffle_epi8(x, sr_i) ^ s63.m, opt);
|
||||
}
|
||||
|
||||
static __m128i
|
||||
aes_schedule_mangle_dec(__m128i x, __m128i sr_i)
|
||||
{
|
||||
__m128i y = _mm_setzero_si128();
|
||||
|
||||
x = aes_schedule_transform(x, dks1);
|
||||
y = _mm_shuffle_epi8(y ^ x, mc_forward[0].m);
|
||||
x = aes_schedule_transform(x, dks2);
|
||||
y = _mm_shuffle_epi8(y ^ x, mc_forward[0].m);
|
||||
x = aes_schedule_transform(x, dks3);
|
||||
y = _mm_shuffle_epi8(y ^ x, mc_forward[0].m);
|
||||
x = aes_schedule_transform(x, dks4);
|
||||
y = _mm_shuffle_epi8(y ^ x, mc_forward[0].m);
|
||||
|
||||
return _mm_shuffle_epi8(y, sr_i);
|
||||
}
|
||||
|
||||
static __m128i
|
||||
aes_schedule_mangle_last_dec(__m128i x)
|
||||
{
|
||||
|
||||
return aes_schedule_transform(x ^ s63.m, deskew);
|
||||
}
|
||||
|
||||
static __m128i
|
||||
aes_schedule_192_smear(__m128i prkhi, __m128i prk)
|
||||
{
|
||||
__m128i rk;
|
||||
|
||||
rk = prkhi;
|
||||
rk ^= _mm_shuffle_epi32(prkhi, 0x80);
|
||||
rk ^= _mm_shuffle_epi32(prk, 0xfe);
|
||||
|
||||
return rk;
|
||||
}
|
||||
|
||||
static __m128i
|
||||
aes_schedule_192_smearhi(__m128i rk)
|
||||
{
|
||||
return (__m128i)_mm_movehl_ps((__m128)rk, _mm_setzero_ps());
|
||||
}
|
||||
|
||||
void
|
||||
aes_ssse3_setenckey(struct aesenc *enc, const uint8_t *key, unsigned nrounds)
|
||||
{
|
||||
uint32_t *rk32 = enc->aese_aes.aes_rk;
|
||||
__m128i mrk; /* mangled round key */
|
||||
__m128i rk; /* round key */
|
||||
__m128i prk; /* previous round key */
|
||||
__m128i rcon_rot = rcon.m;
|
||||
uint64_t i = 3;
|
||||
|
||||
/* input transform */
|
||||
rk = aes_schedule_transform(_mm_loadu_epi8(key), ipt);
|
||||
storeroundkey(rk32, rk);
|
||||
rk32 += 4;
|
||||
|
||||
switch (nrounds) {
|
||||
case 10:
|
||||
for (;;) {
|
||||
rk = aes_schedule_round(rk, rk, &rcon_rot);
|
||||
if (--nrounds == 0)
|
||||
break;
|
||||
mrk = aes_schedule_mangle_enc(rk, sr[i-- % 4].m);
|
||||
storeroundkey(rk32, mrk);
|
||||
rk32 += 4;
|
||||
}
|
||||
break;
|
||||
case 12: {
|
||||
__m128i prkhi; /* high half of previous round key */
|
||||
|
||||
prk = rk;
|
||||
rk = aes_schedule_transform(_mm_loadu_epi8(key + 8), ipt);
|
||||
prkhi = aes_schedule_192_smearhi(rk);
|
||||
for (;;) {
|
||||
prk = aes_schedule_round(rk, prk, &rcon_rot);
|
||||
rk = _mm_alignr_epi8(prk, prkhi, 8);
|
||||
|
||||
mrk = aes_schedule_mangle_enc(rk, sr[i-- % 4].m);
|
||||
storeroundkey(rk32, mrk);
|
||||
rk32 += 4;
|
||||
rk = aes_schedule_192_smear(prkhi, prk);
|
||||
prkhi = aes_schedule_192_smearhi(rk);
|
||||
|
||||
mrk = aes_schedule_mangle_enc(rk, sr[i-- % 4].m);
|
||||
storeroundkey(rk32, mrk);
|
||||
rk32 += 4;
|
||||
rk = prk = aes_schedule_round(rk, prk, &rcon_rot);
|
||||
if ((nrounds -= 3) == 0)
|
||||
break;
|
||||
|
||||
mrk = aes_schedule_mangle_enc(rk, sr[i-- % 4].m);
|
||||
storeroundkey(rk32, mrk);
|
||||
rk32 += 4;
|
||||
rk = aes_schedule_192_smear(prkhi, prk);
|
||||
prkhi = aes_schedule_192_smearhi(rk);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 14: {
|
||||
__m128i pprk; /* previous previous round key */
|
||||
|
||||
prk = rk;
|
||||
rk = aes_schedule_transform(_mm_loadu_epi8(key + 16), ipt);
|
||||
for (;;) {
|
||||
mrk = aes_schedule_mangle_enc(rk, sr[i-- % 4].m);
|
||||
storeroundkey(rk32, mrk);
|
||||
rk32 += 4;
|
||||
pprk = rk;
|
||||
|
||||
/* high round */
|
||||
rk = prk = aes_schedule_round(rk, prk, &rcon_rot);
|
||||
if ((nrounds -= 2) == 0)
|
||||
break;
|
||||
mrk = aes_schedule_mangle_enc(rk, sr[i-- % 4].m);
|
||||
storeroundkey(rk32, mrk);
|
||||
rk32 += 4;
|
||||
|
||||
/* low round */
|
||||
rk = _mm_shuffle_epi32(rk, 0xff);
|
||||
rk = aes_schedule_low_round(rk, pprk);
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
panic("invalid number of AES rounds: %u", nrounds);
|
||||
}
|
||||
storeroundkey(rk32, aes_schedule_mangle_last_enc(rk, sr[i-- % 4].m));
|
||||
}
|
||||
|
||||
void
|
||||
aes_ssse3_setdeckey(struct aesdec *dec, const uint8_t *key, unsigned nrounds)
|
||||
{
|
||||
uint32_t *rk32 = dec->aesd_aes.aes_rk;
|
||||
__m128i mrk; /* mangled round key */
|
||||
__m128i ork; /* original round key */
|
||||
__m128i rk; /* round key */
|
||||
__m128i prk; /* previous round key */
|
||||
__m128i rcon_rot = rcon.m;
|
||||
unsigned i = nrounds == 12 ? 0 : 2;
|
||||
|
||||
ork = _mm_loadu_epi8(key);
|
||||
|
||||
/* input transform */
|
||||
rk = aes_schedule_transform(ork, ipt);
|
||||
|
||||
/* go from end */
|
||||
rk32 += 4*nrounds;
|
||||
storeroundkey(rk32, _mm_shuffle_epi8(ork, sr[i].m));
|
||||
rk32 -= 4;
|
||||
i ^= 3;
|
||||
|
||||
switch (nrounds) {
|
||||
case 10:
|
||||
for (;;) {
|
||||
rk = aes_schedule_round(rk, rk, &rcon_rot);
|
||||
if (--nrounds == 0)
|
||||
break;
|
||||
mrk = aes_schedule_mangle_dec(rk, sr[i-- % 4].m);
|
||||
storeroundkey(rk32, mrk);
|
||||
rk32 -= 4;
|
||||
}
|
||||
break;
|
||||
case 12: {
|
||||
__m128i prkhi; /* high half of previous round key */
|
||||
|
||||
prk = rk;
|
||||
rk = aes_schedule_transform(_mm_loadu_epi8(key + 8), ipt);
|
||||
prkhi = aes_schedule_192_smearhi(rk);
|
||||
for (;;) {
|
||||
prk = aes_schedule_round(rk, prk, &rcon_rot);
|
||||
rk = _mm_alignr_epi8(prk, prkhi, 8);
|
||||
|
||||
mrk = aes_schedule_mangle_dec(rk, sr[i-- % 4].m);
|
||||
storeroundkey(rk32, mrk);
|
||||
rk32 -= 4;
|
||||
rk = aes_schedule_192_smear(prkhi, prk);
|
||||
prkhi = aes_schedule_192_smearhi(rk);
|
||||
|
||||
mrk = aes_schedule_mangle_dec(rk, sr[i-- % 4].m);
|
||||
storeroundkey(rk32, mrk);
|
||||
rk32 -= 4;
|
||||
rk = prk = aes_schedule_round(rk, prk, &rcon_rot);
|
||||
if ((nrounds -= 3) == 0)
|
||||
break;
|
||||
|
||||
mrk = aes_schedule_mangle_dec(rk, sr[i-- % 4].m);
|
||||
storeroundkey(rk32, mrk);
|
||||
rk32 -= 4;
|
||||
rk = aes_schedule_192_smear(prkhi, prk);
|
||||
prkhi = aes_schedule_192_smearhi(rk);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 14: {
|
||||
__m128i pprk; /* previous previous round key */
|
||||
|
||||
prk = rk;
|
||||
rk = aes_schedule_transform(_mm_loadu_epi8(key + 16), ipt);
|
||||
for (;;) {
|
||||
mrk = aes_schedule_mangle_dec(rk, sr[i-- % 4].m);
|
||||
storeroundkey(rk32, mrk);
|
||||
rk32 -= 4;
|
||||
pprk = rk;
|
||||
|
||||
/* high round */
|
||||
rk = prk = aes_schedule_round(rk, prk, &rcon_rot);
|
||||
if ((nrounds -= 2) == 0)
|
||||
break;
|
||||
mrk = aes_schedule_mangle_dec(rk, sr[i-- % 4].m);
|
||||
storeroundkey(rk32, mrk);
|
||||
rk32 -= 4;
|
||||
|
||||
/* low round */
|
||||
rk = _mm_shuffle_epi32(rk, 0xff);
|
||||
rk = aes_schedule_low_round(rk, pprk);
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
panic("invalid number of AES rounds: %u", nrounds);
|
||||
}
|
||||
storeroundkey(rk32, aes_schedule_mangle_last_dec(rk));
|
||||
}
|
||||
|
||||
__m128i
|
||||
aes_ssse3_enc1(const struct aesenc *enc, __m128i x, unsigned nrounds)
|
||||
{
|
||||
const uint32_t *rk32 = enc->aese_aes.aes_rk;
|
||||
__m128i io, jo;
|
||||
unsigned rmod4 = 0;
|
||||
|
||||
x = aes_schedule_transform(x, ipt);
|
||||
x ^= loadroundkey(rk32);
|
||||
for (;;) {
|
||||
__m128i A, A2, A2_B, A2_B_D;
|
||||
|
||||
subbytes(&io, &jo, x);
|
||||
|
||||
rk32 += 4;
|
||||
rmod4 = (rmod4 + 1) % 4;
|
||||
if (--nrounds == 0)
|
||||
break;
|
||||
|
||||
A = _mm_shuffle_epi8(sb1[0].m, io) ^
|
||||
_mm_shuffle_epi8(sb1[1].m, jo);
|
||||
A ^= loadroundkey(rk32);
|
||||
A2 = _mm_shuffle_epi8(sb2[0].m, io) ^
|
||||
_mm_shuffle_epi8(sb2[1].m, jo);
|
||||
A2_B = A2 ^ _mm_shuffle_epi8(A, mc_forward[rmod4].m);
|
||||
A2_B_D = A2_B ^ _mm_shuffle_epi8(A, mc_backward[rmod4].m);
|
||||
x = A2_B_D ^ _mm_shuffle_epi8(A2_B, mc_forward[rmod4].m);
|
||||
}
|
||||
x = _mm_shuffle_epi8(sbo[0].m, io) ^ _mm_shuffle_epi8(sbo[1].m, jo);
|
||||
x ^= loadroundkey(rk32);
|
||||
return _mm_shuffle_epi8(x, sr[rmod4].m);
|
||||
}
|
||||
|
||||
__m128i
|
||||
aes_ssse3_dec1(const struct aesdec *dec, __m128i x, unsigned nrounds)
|
||||
{
|
||||
const uint32_t *rk32 = dec->aesd_aes.aes_rk;
|
||||
unsigned i = 3 & ~(nrounds - 1);
|
||||
__m128i io, jo, mc;
|
||||
|
||||
x = aes_schedule_transform(x, dipt);
|
||||
x ^= loadroundkey(rk32);
|
||||
rk32 += 4;
|
||||
|
||||
mc = mc_forward[3].m;
|
||||
for (;;) {
|
||||
subbytes(&io, &jo, x);
|
||||
if (--nrounds == 0)
|
||||
break;
|
||||
|
||||
x = _mm_shuffle_epi8(dsb9[0].m, io) ^
|
||||
_mm_shuffle_epi8(dsb9[1].m, jo);
|
||||
x ^= loadroundkey(rk32);
|
||||
rk32 += 4; /* next round key */
|
||||
|
||||
x = _mm_shuffle_epi8(x, mc);
|
||||
x ^= _mm_shuffle_epi8(dsbd[0].m, io) ^
|
||||
_mm_shuffle_epi8(dsbd[1].m, jo);
|
||||
|
||||
x = _mm_shuffle_epi8(x, mc);
|
||||
x ^= _mm_shuffle_epi8(dsbb[0].m, io) ^
|
||||
_mm_shuffle_epi8(dsbb[1].m, jo);
|
||||
|
||||
x = _mm_shuffle_epi8(x, mc);
|
||||
x ^= _mm_shuffle_epi8(dsbe[0].m, io) ^
|
||||
_mm_shuffle_epi8(dsbe[1].m, jo);
|
||||
|
||||
mc = _mm_alignr_epi8(mc, mc, 12);
|
||||
}
|
||||
x = _mm_shuffle_epi8(dsbo[0].m, io) ^ _mm_shuffle_epi8(dsbo[1].m, jo);
|
||||
x ^= loadroundkey(rk32);
|
||||
return _mm_shuffle_epi8(x, sr[i].m);
|
||||
}
|
|
@ -0,0 +1,62 @@
|
|||
/* $NetBSD: aes_ssse3.h,v 1.1 2020/06/29 23:51:35 riastradh Exp $ */
|
||||
|
||||
/*-
|
||||
* Copyright (c) 2020 The NetBSD Foundation, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
|
||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
|
||||
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef _CRYPTO_AES_ARCH_X86_AES_SSSE3_H
|
||||
#define _CRYPTO_AES_ARCH_X86_AES_SSSE3_H
|
||||
|
||||
#include <crypto/aes/aes.h>
|
||||
|
||||
/*
|
||||
* These functions MUST NOT use any vector registers for parameters or
|
||||
* results -- the caller is compiled with -mno-sse &c. in the kernel,
|
||||
* and dynamically turns on the vector unit just before calling them.
|
||||
* Internal subroutines that use the vector unit for parameters are
|
||||
* declared in aes_ssse3_impl.h instead.
|
||||
*/
|
||||
|
||||
void aes_ssse3_setenckey(struct aesenc *, const uint8_t *, unsigned);
|
||||
void aes_ssse3_setdeckey(struct aesdec *, const uint8_t *, unsigned);
|
||||
|
||||
void aes_ssse3_enc(const struct aesenc *, const uint8_t[static 16],
|
||||
uint8_t[static 16], uint32_t);
|
||||
void aes_ssse3_dec(const struct aesdec *, const uint8_t[static 16],
|
||||
uint8_t[static 16], uint32_t);
|
||||
void aes_ssse3_cbc_enc(const struct aesenc *, const uint8_t[static 16],
|
||||
uint8_t[static 16], size_t, uint8_t[static 16], uint32_t);
|
||||
void aes_ssse3_cbc_dec(const struct aesdec *, const uint8_t[static 16],
|
||||
uint8_t[static 16], size_t, uint8_t[static 16], uint32_t);
|
||||
void aes_ssse3_xts_enc(const struct aesenc *, const uint8_t[static 16],
|
||||
uint8_t[static 16], size_t, uint8_t[static 16], uint32_t);
|
||||
void aes_ssse3_xts_dec(const struct aesdec *, const uint8_t[static 16],
|
||||
uint8_t[static 16], size_t, uint8_t[static 16], uint32_t);
|
||||
|
||||
int aes_ssse3_selftest(void);
|
||||
|
||||
extern struct aes_impl aes_ssse3_impl;
|
||||
|
||||
#endif /* _CRYPTO_AES_ARCH_X86_AES_SSSE3_H */
|
|
@ -0,0 +1,165 @@
|
|||
/* $NetBSD: aes_ssse3_impl.c,v 1.1 2020/06/29 23:51:35 riastradh Exp $ */
|
||||
|
||||
/*-
|
||||
* Copyright (c) 2020 The NetBSD Foundation, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
|
||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
|
||||
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__KERNEL_RCSID(1, "$NetBSD: aes_ssse3_impl.c,v 1.1 2020/06/29 23:51:35 riastradh Exp $");
|
||||
|
||||
#include <crypto/aes/aes.h>
|
||||
#include <crypto/aes/arch/x86/aes_ssse3.h>
|
||||
|
||||
#include <x86/cpu.h>
|
||||
#include <x86/cpuvar.h>
|
||||
#include <x86/fpu.h>
|
||||
#include <x86/specialreg.h>
|
||||
|
||||
static void
|
||||
aes_ssse3_setenckey_impl(struct aesenc *enc, const uint8_t *key,
|
||||
uint32_t nrounds)
|
||||
{
|
||||
|
||||
fpu_kern_enter();
|
||||
aes_ssse3_setenckey(enc, key, nrounds);
|
||||
fpu_kern_leave();
|
||||
}
|
||||
|
||||
static void
|
||||
aes_ssse3_setdeckey_impl(struct aesdec *dec, const uint8_t *key,
|
||||
uint32_t nrounds)
|
||||
{
|
||||
|
||||
fpu_kern_enter();
|
||||
aes_ssse3_setdeckey(dec, key, nrounds);
|
||||
fpu_kern_leave();
|
||||
}
|
||||
|
||||
static void
|
||||
aes_ssse3_enc_impl(const struct aesenc *enc, const uint8_t in[static 16],
|
||||
uint8_t out[static 16], uint32_t nrounds)
|
||||
{
|
||||
|
||||
fpu_kern_enter();
|
||||
aes_ssse3_enc(enc, in, out, nrounds);
|
||||
fpu_kern_leave();
|
||||
}
|
||||
|
||||
static void
|
||||
aes_ssse3_dec_impl(const struct aesdec *dec, const uint8_t in[static 16],
|
||||
uint8_t out[static 16], uint32_t nrounds)
|
||||
{
|
||||
|
||||
fpu_kern_enter();
|
||||
aes_ssse3_dec(dec, in, out, nrounds);
|
||||
fpu_kern_leave();
|
||||
}
|
||||
|
||||
static void
|
||||
aes_ssse3_cbc_enc_impl(const struct aesenc *enc, const uint8_t in[static 16],
|
||||
uint8_t out[static 16], size_t nbytes, uint8_t iv[static 16],
|
||||
uint32_t nrounds)
|
||||
{
|
||||
|
||||
if (nbytes == 0)
|
||||
return;
|
||||
fpu_kern_enter();
|
||||
aes_ssse3_cbc_enc(enc, in, out, nbytes, iv, nrounds);
|
||||
fpu_kern_leave();
|
||||
}
|
||||
|
||||
static void
|
||||
aes_ssse3_cbc_dec_impl(const struct aesdec *dec, const uint8_t in[static 16],
|
||||
uint8_t out[static 16], size_t nbytes, uint8_t iv[static 16],
|
||||
uint32_t nrounds)
|
||||
{
|
||||
|
||||
if (nbytes == 0)
|
||||
return;
|
||||
fpu_kern_enter();
|
||||
aes_ssse3_cbc_dec(dec, in, out, nbytes, iv, nrounds);
|
||||
fpu_kern_leave();
|
||||
}
|
||||
|
||||
static void
|
||||
aes_ssse3_xts_enc_impl(const struct aesenc *enc, const uint8_t in[static 16],
|
||||
uint8_t out[static 16], size_t nbytes, uint8_t iv[static 16],
|
||||
uint32_t nrounds)
|
||||
{
|
||||
|
||||
if (nbytes == 0)
|
||||
return;
|
||||
fpu_kern_enter();
|
||||
aes_ssse3_xts_enc(enc, in, out, nbytes, iv, nrounds);
|
||||
fpu_kern_leave();
|
||||
}
|
||||
|
||||
static void
|
||||
aes_ssse3_xts_dec_impl(const struct aesdec *dec, const uint8_t in[static 16],
|
||||
uint8_t out[static 16], size_t nbytes, uint8_t iv[static 16],
|
||||
uint32_t nrounds)
|
||||
{
|
||||
|
||||
if (nbytes == 0)
|
||||
return;
|
||||
fpu_kern_enter();
|
||||
aes_ssse3_xts_dec(dec, in, out, nbytes, iv, nrounds);
|
||||
fpu_kern_leave();
|
||||
}
|
||||
|
||||
static int
|
||||
aes_ssse3_probe(void)
|
||||
{
|
||||
int result = 0;
|
||||
|
||||
/* Verify that the CPU supports SSE, SSE2, SSE3, and SSSE3. */
|
||||
if (!i386_has_sse)
|
||||
return -1;
|
||||
if (!i386_has_sse2)
|
||||
return -1;
|
||||
if (((cpu_feature[1]) & CPUID2_SSE3) == 0)
|
||||
return -1;
|
||||
if (((cpu_feature[1]) & CPUID2_SSSE3) == 0)
|
||||
return -1;
|
||||
|
||||
fpu_kern_enter();
|
||||
result = aes_ssse3_selftest();
|
||||
fpu_kern_leave();
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
struct aes_impl aes_ssse3_impl = {
|
||||
.ai_name = "Intel SSSE3 vpaes",
|
||||
.ai_probe = aes_ssse3_probe,
|
||||
.ai_setenckey = aes_ssse3_setenckey_impl,
|
||||
.ai_setdeckey = aes_ssse3_setdeckey_impl,
|
||||
.ai_enc = aes_ssse3_enc_impl,
|
||||
.ai_dec = aes_ssse3_dec_impl,
|
||||
.ai_cbc_enc = aes_ssse3_cbc_enc_impl,
|
||||
.ai_cbc_dec = aes_ssse3_cbc_dec_impl,
|
||||
.ai_xts_enc = aes_ssse3_xts_enc_impl,
|
||||
.ai_xts_dec = aes_ssse3_xts_dec_impl,
|
||||
};
|
|
@ -0,0 +1,42 @@
|
|||
/* $NetBSD: aes_ssse3_impl.h,v 1.1 2020/06/29 23:51:35 riastradh Exp $ */
|
||||
|
||||
/*-
|
||||
* Copyright (c) 2020 The NetBSD Foundation, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
|
||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
|
||||
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef _CRYPTO_AES_ARCH_X86_AES_SSSE3_IMPL_H
|
||||
#define _CRYPTO_AES_ARCH_X86_AES_SSSE3_IMPL_H
|
||||
|
||||
#include <sys/types.h>
|
||||
|
||||
#include <crypto/aes/aes.h>
|
||||
#include <crypto/aes/arch/x86/aes_ssse3.h>
|
||||
#include <crypto/aes/arch/x86/immintrin.h>
|
||||
#include <crypto/aes/arch/x86/immintrin_ext.h>
|
||||
|
||||
__m128i aes_ssse3_enc1(const struct aesenc *, __m128i, unsigned);
|
||||
__m128i aes_ssse3_dec1(const struct aesdec *, __m128i, unsigned);
|
||||
|
||||
#endif /* _CRYPTO_AES_ARCH_X86_AES_SSSE3_IMPL_H */
|
|
@ -0,0 +1,213 @@
|
|||
/* $NetBSD: aes_ssse3_subr.c,v 1.1 2020/06/29 23:51:35 riastradh Exp $ */
|
||||
|
||||
/*-
|
||||
* Copyright (c) 2020 The NetBSD Foundation, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
|
||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
|
||||
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__KERNEL_RCSID(1, "$NetBSD: aes_ssse3_subr.c,v 1.1 2020/06/29 23:51:35 riastradh Exp $");
|
||||
|
||||
#include <sys/systm.h>
|
||||
|
||||
#include <lib/libkern/libkern.h>
|
||||
|
||||
#include "aes_ssse3_impl.h"
|
||||
|
||||
static inline __m128i
|
||||
loadblock(const void *in)
|
||||
{
|
||||
return _mm_loadu_epi8(in);
|
||||
}
|
||||
|
||||
static inline void
|
||||
storeblock(void *out, __m128i block)
|
||||
{
|
||||
_mm_storeu_epi8(out, block);
|
||||
}
|
||||
|
||||
void
|
||||
aes_ssse3_enc(const struct aesenc *enc, const uint8_t in[static 16],
|
||||
uint8_t out[static 16], uint32_t nrounds)
|
||||
{
|
||||
__m128i block;
|
||||
|
||||
block = loadblock(in);
|
||||
block = aes_ssse3_enc1(enc, block, nrounds);
|
||||
storeblock(out, block);
|
||||
}
|
||||
|
||||
void
|
||||
aes_ssse3_dec(const struct aesdec *dec, const uint8_t in[static 16],
|
||||
uint8_t out[static 16], uint32_t nrounds)
|
||||
{
|
||||
__m128i block;
|
||||
|
||||
block = loadblock(in);
|
||||
block = aes_ssse3_dec1(dec, block, nrounds);
|
||||
storeblock(out, block);
|
||||
}
|
||||
|
||||
void
|
||||
aes_ssse3_cbc_enc(const struct aesenc *enc, const uint8_t in[static 16],
|
||||
uint8_t out[static 16], size_t nbytes, uint8_t iv[static 16],
|
||||
uint32_t nrounds)
|
||||
{
|
||||
__m128i cv;
|
||||
|
||||
KASSERT(nbytes);
|
||||
|
||||
cv = loadblock(iv);
|
||||
for (; nbytes; nbytes -= 16, in += 16, out += 16) {
|
||||
cv ^= loadblock(in);
|
||||
cv = aes_ssse3_enc1(enc, cv, nrounds);
|
||||
storeblock(out, cv);
|
||||
}
|
||||
storeblock(iv, cv);
|
||||
}
|
||||
|
||||
void
|
||||
aes_ssse3_cbc_dec(const struct aesdec *dec, const uint8_t in[static 16],
|
||||
uint8_t out[static 16], size_t nbytes, uint8_t iv[static 16],
|
||||
uint32_t nrounds)
|
||||
{
|
||||
__m128i iv0, cv, b;
|
||||
|
||||
KASSERT(nbytes);
|
||||
KASSERT(nbytes % 16 == 0);
|
||||
|
||||
iv0 = loadblock(iv);
|
||||
cv = loadblock(in + nbytes - 16);
|
||||
storeblock(iv, cv);
|
||||
|
||||
for (;;) {
|
||||
b = aes_ssse3_dec1(dec, cv, nrounds);
|
||||
if ((nbytes -= 16) == 0)
|
||||
break;
|
||||
cv = loadblock(in + nbytes - 16);
|
||||
storeblock(out + nbytes, b ^ cv);
|
||||
}
|
||||
storeblock(out, b ^ iv0);
|
||||
}
|
||||
|
||||
static inline __m128i
|
||||
aes_ssse3_xts_update(__m128i t)
|
||||
{
|
||||
const __m128i one = _mm_set_epi64x(1, 1);
|
||||
__m128i s, m, c;
|
||||
|
||||
s = _mm_srli_epi64(t, 63); /* 1 if high bit set else 0 */
|
||||
m = _mm_sub_epi64(s, one); /* 0 if high bit set else -1 */
|
||||
m = _mm_shuffle_epi32(m, 0x4e); /* swap halves */
|
||||
c = _mm_set_epi64x(1, 0x87); /* carry */
|
||||
|
||||
return _mm_slli_epi64(t, 1) ^ (c & ~m);
|
||||
}
|
||||
|
||||
static int
|
||||
aes_ssse3_xts_update_selftest(void)
|
||||
{
|
||||
static const struct {
|
||||
uint32_t in[4], out[4];
|
||||
} cases[] = {
|
||||
[0] = { {1}, {2} },
|
||||
[1] = { {0x80000000U,0,0,0}, {0,1,0,0} },
|
||||
[2] = { {0,0x80000000U,0,0}, {0,0,1,0} },
|
||||
[3] = { {0,0,0x80000000U,0}, {0,0,0,1} },
|
||||
[4] = { {0,0,0,0x80000000U}, {0x87,0,0,0} },
|
||||
[5] = { {0,0x80000000U,0,0x80000000U}, {0x87,0,1,0} },
|
||||
};
|
||||
unsigned i;
|
||||
uint32_t t[4];
|
||||
int result = 0;
|
||||
|
||||
for (i = 0; i < sizeof(cases)/sizeof(cases[0]); i++) {
|
||||
t[0] = cases[i].in[0];
|
||||
t[1] = cases[i].in[1];
|
||||
t[2] = cases[i].in[2];
|
||||
t[3] = cases[i].in[3];
|
||||
storeblock(t, aes_ssse3_xts_update(loadblock(t)));
|
||||
if (t[0] != cases[i].out[0] ||
|
||||
t[1] != cases[i].out[1] ||
|
||||
t[2] != cases[i].out[2] ||
|
||||
t[3] != cases[i].out[3]) {
|
||||
printf("%s %u:"
|
||||
" %"PRIx32" %"PRIx32" %"PRIx32" %"PRIx32"\n",
|
||||
__func__, i, t[0], t[1], t[2], t[3]);
|
||||
result = -1;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void
|
||||
aes_ssse3_xts_enc(const struct aesenc *enc, const uint8_t in[static 16],
|
||||
uint8_t out[static 16], size_t nbytes, uint8_t tweak[static 16],
|
||||
uint32_t nrounds)
|
||||
{
|
||||
__m128i t, b;
|
||||
|
||||
KASSERT(nbytes);
|
||||
KASSERT(nbytes % 16 == 0);
|
||||
|
||||
t = loadblock(tweak);
|
||||
for (; nbytes; nbytes -= 16, in += 16, out += 16) {
|
||||
b = t ^ loadblock(in);
|
||||
b = aes_ssse3_enc1(enc, b, nrounds);
|
||||
storeblock(out, t ^ b);
|
||||
t = aes_ssse3_xts_update(t);
|
||||
}
|
||||
storeblock(tweak, t);
|
||||
}
|
||||
|
||||
void
|
||||
aes_ssse3_xts_dec(const struct aesdec *dec, const uint8_t in[static 16],
|
||||
uint8_t out[static 16], size_t nbytes, uint8_t tweak[static 16],
|
||||
uint32_t nrounds)
|
||||
{
|
||||
__m128i t, b;
|
||||
|
||||
KASSERT(nbytes);
|
||||
KASSERT(nbytes % 16 == 0);
|
||||
|
||||
t = loadblock(tweak);
|
||||
for (; nbytes; nbytes -= 16, in += 16, out += 16) {
|
||||
b = t ^ loadblock(in);
|
||||
b = aes_ssse3_dec1(dec, b, nrounds);
|
||||
storeblock(out, t ^ b);
|
||||
t = aes_ssse3_xts_update(t);
|
||||
}
|
||||
storeblock(tweak, t);
|
||||
}
|
||||
|
||||
int
|
||||
aes_ssse3_selftest(void)
|
||||
{
|
||||
|
||||
if (aes_ssse3_xts_update_selftest())
|
||||
return -1;
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,8 @@
|
|||
# $NetBSD: files.aesssse3,v 1.1 2020/06/29 23:51:35 riastradh Exp $
|
||||
|
||||
makeoptions aes "COPTS.aes_ssse3.c"+="-msse -msse2 -msse3 -mssse3"
|
||||
makeoptions aes "COPTS.aes_ssse3_subr.c"+="-msse -msse2 -msse3 -mssse3"
|
||||
|
||||
file crypto/aes/arch/x86/aes_ssse3.c aes
|
||||
file crypto/aes/arch/x86/aes_ssse3_subr.c aes
|
||||
file crypto/aes/arch/x86/aes_ssse3_impl.c aes
|
|
@ -1,4 +1,4 @@
|
|||
/* $NetBSD: immintrin.h,v 1.1 2020/06/29 23:47:54 riastradh Exp $ */
|
||||
/* $NetBSD: immintrin.h,v 1.2 2020/06/29 23:51:35 riastradh Exp $ */
|
||||
|
||||
/*-
|
||||
* Copyright (c) 2020 The NetBSD Foundation, Inc.
|
||||
|
@ -53,6 +53,7 @@ typedef unsigned long long __v2du __attribute__((__vector_size__(16)));
|
|||
typedef int __v4si __attribute__((__vector_size__(16)));
|
||||
typedef float __v4sf __attribute__((__vector_size__(16)));
|
||||
typedef short __v8hi __attribute__((__vector_size__(16)));
|
||||
typedef char __v16qi __attribute__((__vector_size__(16)));
|
||||
|
||||
#elif defined(__clang__)
|
||||
|
||||
|
@ -66,6 +67,7 @@ typedef unsigned long long __v2du __attribute__((__vector_size__(16)));
|
|||
typedef int __v4si __attribute__((__vector_size__(16)));
|
||||
typedef float __v4sf __attribute__((__vector_size__(16)));
|
||||
typedef short __v8hi __attribute__((__vector_size__(16)));
|
||||
typedef char __v16qi __attribute__((__vector_size__(16)));
|
||||
|
||||
#define _INTRINSATTR \
|
||||
__attribute__((__always_inline__, __nodebug__, __target__("sse2"), \
|
||||
|
@ -79,6 +81,18 @@ typedef short __v8hi __attribute__((__vector_size__(16)));
|
|||
|
||||
#endif
|
||||
|
||||
#define _SSSE3_ATTR __attribute__((target("ssse3")))
|
||||
|
||||
#if defined(__GNUC__) && !defined(__clang__)
|
||||
#define _mm_alignr_epi8(hi,lo,bytes) \
|
||||
(__m128i)__builtin_ia32_palignr128((__v2di)(__m128i)(hi), \
|
||||
(__v2di)(__m128i)(lo), 8*(int)(bytes))
|
||||
#elif defined(__clang__)
|
||||
#define _mm_alignr_epi8(hi,lo,bytes) \
|
||||
(__m128i)__builtin_ia32_palignr128((__v16qi)(__m128i)(hi), \
|
||||
(__v16qi)(__m128i)(lo), (int)(bytes))
|
||||
#endif
|
||||
|
||||
_INTRINSATTR
|
||||
static __inline __m128i
|
||||
_mm_loadu_si32(const void *__p)
|
||||
|
@ -95,6 +109,25 @@ _mm_loadu_si64(const void *__p)
|
|||
return __extension__ (__m128i)(__v2di){ __v, 0 };
|
||||
}
|
||||
|
||||
_INTRINSATTR
|
||||
static __inline __m128i
|
||||
_mm_load_si128(const __m128i *__p)
|
||||
{
|
||||
return *__p;
|
||||
}
|
||||
|
||||
_INTRINSATTR _SSSE3_ATTR
|
||||
static __inline __m128
|
||||
_mm_movehl_ps(__m128 __v0, __m128 __v1)
|
||||
{
|
||||
#if defined(__GNUC__) && !defined(__clang__)
|
||||
return (__m128)__builtin_ia32_movhlps((__v4sf)__v0, (__v4sf)__v1);
|
||||
#elif defined(__clang__)
|
||||
return __builtin_shufflevector((__v4sf)__v0, (__v4sf)__v1,
|
||||
6, 7, 2, 3);
|
||||
#endif
|
||||
}
|
||||
|
||||
_INTRINSATTR
|
||||
static __inline __m128i
|
||||
_mm_set1_epi16(int16_t __v)
|
||||
|
@ -132,6 +165,13 @@ _mm_set_epi64x(int64_t __v1, int64_t __v0)
|
|||
return __extension__ (__m128i)(__v2di){ __v0, __v1 };
|
||||
}
|
||||
|
||||
_INTRINSATTR
|
||||
static __inline __m128
|
||||
_mm_setzero_ps(void)
|
||||
{
|
||||
return __extension__ (__m128){ 0, 0, 0, 0 };
|
||||
}
|
||||
|
||||
_INTRINSATTR
|
||||
static __inline __m128i
|
||||
_mm_setzero_si128(void)
|
||||
|
@ -139,6 +179,14 @@ _mm_setzero_si128(void)
|
|||
return _mm_set1_epi64x(0);
|
||||
}
|
||||
|
||||
_INTRINSATTR _SSSE3_ATTR
|
||||
static __inline __m128i
|
||||
_mm_shuffle_epi8(__m128i __vtbl, __m128i __vidx)
|
||||
{
|
||||
return (__m128i)__builtin_ia32_pshufb128((__v16qi)__vtbl,
|
||||
(__v16qi)__vidx);
|
||||
}
|
||||
|
||||
#define _mm_shuffle_epi32(v,m) \
|
||||
(__m128i)__builtin_ia32_pshufd((__v4si)(__m128i)(v), (int)(m))
|
||||
|
||||
|
@ -163,6 +211,13 @@ _mm_slli_epi64(__m128i __v, uint8_t __bits)
|
|||
(int)(bytes))
|
||||
#endif
|
||||
|
||||
_INTRINSATTR
|
||||
static __inline __m128i
|
||||
_mm_srli_epi32(__m128i __v, uint8_t __bits)
|
||||
{
|
||||
return (__m128i)__builtin_ia32_psrldi128((__v4si)__v, (int)__bits);
|
||||
}
|
||||
|
||||
_INTRINSATTR
|
||||
static __inline __m128i
|
||||
_mm_srli_epi64(__m128i __v, uint8_t __bits)
|
||||
|
@ -193,6 +248,13 @@ _mm_storeu_si64(void *__p, __m128i __v)
|
|||
((struct { int64_t __v; } _PACKALIAS *)__p)->__v = ((__v2di)__v)[0];
|
||||
}
|
||||
|
||||
_INTRINSATTR
|
||||
static __inline void
|
||||
_mm_store_si128(__m128i *__p, __m128i __v)
|
||||
{
|
||||
*__p = __v;
|
||||
}
|
||||
|
||||
_INTRINSATTR
|
||||
static __inline __m128i
|
||||
_mm_sub_epi64(__m128i __x, __m128i __y)
|
||||
|
|
Loading…
Reference in New Issue