ARMv8 : load pointer to AES key and counter into a register along with pointer to SHA256 K table to handle tight optimized loops on function call with -flto

This commit is contained in:
Jacob Barthelmeh 2016-12-06 21:42:15 +00:00
parent 2cbc6ed673
commit 944e5fba03
2 changed files with 43 additions and 36 deletions

View File

@ -476,6 +476,8 @@ int wc_InitAes_h(Aes* aes, void* h)
/* do as many block size ops as possible */
if (numBlocks > 0) {
word32* key = aes->key;
word32* reg = aes->reg;
/*
AESE exor's input with round key
shift rows of exor'ed result
@ -487,10 +489,10 @@ int wc_InitAes_h(Aes* aes, void* h)
case 10: /* AES 128 BLOCK */
__asm__ __volatile__ (
"MOV w11, %w[blocks] \n"
"LD1 {v1.2d-v4.2d}, %[Key], #64 \n"
"LD1 {v5.2d-v8.2d}, %[Key], #64 \n"
"LD1 {v9.2d-v11.2d},%[Key], #48 \n"
"LD1 {v0.2d}, %[reg] \n"
"LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n"
"LD1 {v5.2d-v8.2d}, [%[Key]], #64 \n"
"LD1 {v9.2d-v11.2d},[%[Key]], #48 \n"
"LD1 {v0.2d}, [%[reg]] \n"
"LD1 {v12.2d}, [%[input]], #16 \n"
"1:\n"
@ -525,11 +527,11 @@ int wc_InitAes_h(Aes* aes, void* h)
"2:\n"
"#store current counter value at the end \n"
"ST1 {v0.2d}, %[regOut] \n"
"ST1 {v0.2d}, [%[regOut]] \n"
:[out] "=r" (out), [regOut] "=m" (aes->reg), "=r" (in)
:"0" (out), [Key] "m" (aes->key), [input] "2" (in),
[blocks] "r" (numBlocks), [reg] "m" (aes->reg)
:[out] "=r" (out), [regOut] "=r" (reg), "=r" (in)
:"0" (out), [Key] "r" (key), [input] "2" (in),
[blocks] "r" (numBlocks), [reg] "1" (reg)
: "cc", "memory", "w11", "v0", "v1", "v2", "v3", "v4", "v5",
"v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13"
);
@ -675,14 +677,17 @@ int wc_InitAes_h(Aes* aes, void* h)
/* do as many block size ops as possible */
if (numBlocks > 0) {
word32* key = aes->key;
word32* reg = aes->reg;
switch(aes->rounds) {
case 10: /* AES 128 BLOCK */
__asm__ __volatile__ (
"MOV w11, %w[blocks] \n"
"LD1 {v1.2d-v4.2d}, %[Key], #64 \n"
"LD1 {v5.2d-v8.2d}, %[Key], #64 \n"
"LD1 {v9.2d-v11.2d},%[Key], #48 \n"
"LD1 {v13.2d}, %[reg] \n"
"LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n"
"LD1 {v5.2d-v8.2d}, [%[Key]], #64 \n"
"LD1 {v9.2d-v11.2d},[%[Key]], #48 \n"
"LD1 {v13.2d}, [%[reg]] \n"
"1:\n"
"LD1 {v0.2d}, [%[input]], #16 \n"
@ -718,11 +723,11 @@ int wc_InitAes_h(Aes* aes, void* h)
"2: \n"
"#store current counter value at the end \n"
"ST1 {v13.2d}, %[regOut] \n"
"ST1 {v13.2d}, [%[regOut]] \n"
:[out] "=r" (out), [regOut] "=m" (aes->reg), "=r" (in)
:"0" (out), [Key] "m" (aes->key), [input] "2" (in),
[blocks] "r" (numBlocks), [reg] "m" (aes->reg)
:[out] "=r" (out), [regOut] "=r" (reg), "=r" (in)
:"0" (out), [Key] "r" (key), [input] "2" (in),
[blocks] "r" (numBlocks), [reg] "1" (reg)
: "cc", "memory", "w11", "v0", "v1", "v2", "v3", "v4", "v5",
"v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13"
);
@ -731,11 +736,11 @@ int wc_InitAes_h(Aes* aes, void* h)
case 12: /* AES 192 BLOCK */
__asm__ __volatile__ (
"MOV w11, %w[blocks] \n"
"LD1 {v1.2d-v4.2d}, %[Key], #64 \n"
"LD1 {v5.2d-v8.2d}, %[Key], #64 \n"
"LD1 {v9.2d-v12.2d},%[Key], #64 \n"
"LD1 {v13.16b}, %[Key], #16 \n"
"LD1 {v15.2d}, %[reg] \n"
"LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n"
"LD1 {v5.2d-v8.2d}, [%[Key]], #64 \n"
"LD1 {v9.2d-v12.2d},[%[Key]], #64 \n"
"LD1 {v13.16b}, [%[Key]], #16 \n"
"LD1 {v15.2d}, [%[reg]] \n"
"LD1 {v0.2d}, [%[input]], #16 \n"
"1: \n"
@ -776,11 +781,11 @@ int wc_InitAes_h(Aes* aes, void* h)
"2:\n"
"#store current counter value at the end \n"
"ST1 {v15.2d}, %[regOut] \n"
"ST1 {v15.2d}, [%[regOut]] \n"
:[out] "=r" (out), [regOut] "=m" (aes->reg), "=r" (in)
:"0" (out), [Key] "m" (aes->key), [input] "2" (in),
[blocks] "r" (numBlocks), [reg] "m" (aes->reg)
:[out] "=r" (out), [regOut] "=r" (reg), "=r" (in)
:"0" (out), [Key] "r" (key), [input] "2" (in),
[blocks] "r" (numBlocks), [reg] "1" (reg)
: "cc", "memory", "w11", "v0", "v1", "v2", "v3", "v4", "v5",
"v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15"
);
@ -789,11 +794,11 @@ int wc_InitAes_h(Aes* aes, void* h)
case 14: /* AES 256 BLOCK */
__asm__ __volatile__ (
"MOV w11, %w[blocks] \n"
"LD1 {v1.2d-v4.2d}, %[Key], #64 \n"
"LD1 {v5.2d-v8.2d}, %[Key], #64 \n"
"LD1 {v9.2d-v12.2d}, %[Key], #64 \n"
"LD1 {v13.2d-v15.2d}, %[Key], #48 \n"
"LD1 {v17.2d}, %[reg] \n"
"LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n"
"LD1 {v5.2d-v8.2d}, [%[Key]], #64 \n"
"LD1 {v9.2d-v12.2d}, [%[Key]], #64 \n"
"LD1 {v13.2d-v15.2d}, [%[Key]], #48 \n"
"LD1 {v17.2d}, [%[reg]] \n"
"LD1 {v0.2d}, [%[input]], #16 \n"
"1: \n"
@ -838,11 +843,11 @@ int wc_InitAes_h(Aes* aes, void* h)
"2:\n"
"#store current counter value at the end \n"
"ST1 {v17.2d}, %[regOut] \n"
"ST1 {v17.2d}, [%[regOut]] \n"
:[out] "=r" (out), [regOut] "=m" (aes->reg), "=r" (in)
:"0" (out), [Key] "m" (aes->key), [input] "2" (in),
[blocks] "r" (numBlocks), [reg] "m" (aes->reg)
:[out] "=r" (out), [regOut] "=r" (reg), "=r" (in)
:"0" (out), [Key] "r" (key), [input] "2" (in),
[blocks] "r" (numBlocks), [reg] "1" (reg)
: "cc", "memory", "w11", "v0", "v1", "v2", "v3", "v4", "v5",
"v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14","v15",
"v16", "v17"

View File

@ -133,6 +133,8 @@ int wc_Sha256Update(Sha256* sha256, const byte* data, word32 len)
numBlocks = (len + sha256->buffLen)/SHA256_BLOCK_SIZE;
if (numBlocks > 0) {
word32* k = (word32*)K;
/* get leftover amount after blocks */
add = (len + sha256->buffLen) - numBlocks * SHA256_BLOCK_SIZE;
__asm__ volatile (
@ -300,8 +302,8 @@ int wc_Sha256Update(Sha256* sha256, const byte* data, word32 len)
"STP q12, q13, %[out] \n"
: [out] "=m" (sha256->digest), "=m" (sha256->buffer), "=r" (numBlocks),
"=r" (data)
: [k] "r" (K), [digest] "m" (sha256->digest), [buffer] "m" (sha256->buffer),
"=r" (data), "=r" (k)
: [k] "4" (k), [digest] "m" (sha256->digest), [buffer] "m" (sha256->buffer),
[blocks] "2" (numBlocks), [dataIn] "3" (data)
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
"v8", "v9", "v10", "v11", "v12", "v13", "v14",