Reallocate registers to avoid abusing callee-saves registers, v8-v15.
Forgot to consult the AAPCS before committing this before -- oops! While here, take advantage of the 32 aarch64 simd registers to avoid all stack spills.
This commit is contained in:
parent
6d5a7eed7d
commit
aac1a7e566
@ -1,4 +1,4 @@
|
|||||||
/* $NetBSD: aes_armv8_64.S,v 1.3 2020/06/30 21:53:39 riastradh Exp $ */
|
/* $NetBSD: aes_armv8_64.S,v 1.4 2020/06/30 23:06:02 riastradh Exp $ */
|
||||||
|
|
||||||
/*-
|
/*-
|
||||||
* Copyright (c) 2020 The NetBSD Foundation, Inc.
|
* Copyright (c) 2020 The NetBSD Foundation, Inc.
|
||||||
@ -116,7 +116,7 @@ ENTRY(aesarmv8_setenckey128)
|
|||||||
|
|
||||||
adrl x4, unshiftrows_rotword_3
|
adrl x4, unshiftrows_rotword_3
|
||||||
eor v0.16b, v0.16b, v0.16b /* q0 := 0 */
|
eor v0.16b, v0.16b, v0.16b /* q0 := 0 */
|
||||||
ldr q8, [x4] /* q8 := unshiftrows_rotword_3 table */
|
ldr q16, [x4] /* q16 := unshiftrows_rotword_3 table */
|
||||||
|
|
||||||
str q1, [x0], #0x10 /* store master key as first round key */
|
str q1, [x0], #0x10 /* store master key as first round key */
|
||||||
mov x2, #10 /* round count */
|
mov x2, #10 /* round count */
|
||||||
@ -136,7 +136,7 @@ ENTRY(aesarmv8_setenckey128)
|
|||||||
|
|
||||||
/* v3.4s[i] := RotWords(SubBytes(prk[3])) ^ RCON */
|
/* v3.4s[i] := RotWords(SubBytes(prk[3])) ^ RCON */
|
||||||
ld1r {v4.4s}, [x3], #4
|
ld1r {v4.4s}, [x3], #4
|
||||||
tbl v3.16b, {v3.16b}, v8.16b
|
tbl v3.16b, {v3.16b}, v16.16b
|
||||||
eor v3.16b, v3.16b, v4.16b
|
eor v3.16b, v3.16b, v4.16b
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -175,8 +175,8 @@ ENTRY(aesarmv8_setenckey192)
|
|||||||
adrl x4, unshiftrows_rotword_1
|
adrl x4, unshiftrows_rotword_1
|
||||||
adrl x5, unshiftrows_rotword_3
|
adrl x5, unshiftrows_rotword_3
|
||||||
eor v0.16b, v0.16b, v0.16b /* q0 := 0 */
|
eor v0.16b, v0.16b, v0.16b /* q0 := 0 */
|
||||||
ldr q8, [x4] /* q8 := unshiftrows_rotword_1 */
|
ldr q16, [x4] /* q16 := unshiftrows_rotword_1 */
|
||||||
ldr q9, [x5] /* q9 := unshiftrows_rotword_3 */
|
ldr q17, [x5] /* q17 := unshiftrows_rotword_3 */
|
||||||
|
|
||||||
str q1, [x0], #0x10 /* store master key[0:128) as round key */
|
str q1, [x0], #0x10 /* store master key[0:128) as round key */
|
||||||
mov x2, #12 /* round count */
|
mov x2, #12 /* round count */
|
||||||
@ -197,7 +197,7 @@ ENTRY(aesarmv8_setenckey192)
|
|||||||
|
|
||||||
/* v3.4s[i] := RotWords(SubBytes(rklo[1])) ^ RCON */
|
/* v3.4s[i] := RotWords(SubBytes(rklo[1])) ^ RCON */
|
||||||
ld1r {v4.4s}, [x3], #4
|
ld1r {v4.4s}, [x3], #4
|
||||||
tbl v3.16b, {v3.16b}, v8.16b
|
tbl v3.16b, {v3.16b}, v16.16b
|
||||||
eor v3.16b, v3.16b, v4.16b
|
eor v3.16b, v3.16b, v4.16b
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -269,8 +269,8 @@ ENTRY(aesarmv8_setenckey192)
|
|||||||
* q2 = rk
|
* q2 = rk
|
||||||
* q3 = nrk
|
* q3 = nrk
|
||||||
* v5.4s = (rk[2], rk[3], nrk[0], nrk[1])
|
* v5.4s = (rk[2], rk[3], nrk[0], nrk[1])
|
||||||
* q8 = unshiftrows_rotword_1
|
* q16 = unshiftrows_rotword_1
|
||||||
* q9 = unshiftrows_rotword_3
|
* q17 = unshiftrows_rotword_3
|
||||||
*
|
*
|
||||||
* We have to compute, in q1:
|
* We have to compute, in q1:
|
||||||
*
|
*
|
||||||
@ -294,7 +294,7 @@ ENTRY(aesarmv8_setenckey192)
|
|||||||
|
|
||||||
/* v1.4s[i] := RotWords(SubBytes(nrk[3])) ^ RCON' */
|
/* v1.4s[i] := RotWords(SubBytes(nrk[3])) ^ RCON' */
|
||||||
ld1r {v4.4s}, [x3], #4
|
ld1r {v4.4s}, [x3], #4
|
||||||
tbl v1.16b, {v1.16b}, v9.16b
|
tbl v1.16b, {v1.16b}, v17.16b
|
||||||
eor v1.16b, v1.16b, v4.16b
|
eor v1.16b, v1.16b, v4.16b
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -354,8 +354,8 @@ ENTRY(aesarmv8_setenckey256)
|
|||||||
adrl x4, unshiftrows_rotword_3
|
adrl x4, unshiftrows_rotword_3
|
||||||
adrl x5, unshiftrows_3
|
adrl x5, unshiftrows_3
|
||||||
eor v0.16b, v0.16b, v0.16b /* q0 := 0 */
|
eor v0.16b, v0.16b, v0.16b /* q0 := 0 */
|
||||||
ldr q8, [x4] /* q8 := unshiftrows_rotword_3 */
|
ldr q16, [x4] /* q16 := unshiftrows_rotword_3 */
|
||||||
ldr q9, [x5] /* q9 := unshiftrows_3 */
|
ldr q17, [x5] /* q17 := unshiftrows_3 */
|
||||||
|
|
||||||
/* store master key as first two round keys */
|
/* store master key as first two round keys */
|
||||||
stp q1, q2, [x0], #0x20
|
stp q1, q2, [x0], #0x20
|
||||||
@ -376,7 +376,7 @@ ENTRY(aesarmv8_setenckey256)
|
|||||||
|
|
||||||
/* v3.4s[i] := RotWords(SubBytes(prk[3])) ^ RCON */
|
/* v3.4s[i] := RotWords(SubBytes(prk[3])) ^ RCON */
|
||||||
ld1r {v4.4s}, [x3], #4
|
ld1r {v4.4s}, [x3], #4
|
||||||
tbl v3.16b, {v3.16b}, v8.16b
|
tbl v3.16b, {v3.16b}, v16.16b
|
||||||
eor v3.16b, v3.16b, v4.16b
|
eor v3.16b, v3.16b, v4.16b
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -402,7 +402,7 @@ ENTRY(aesarmv8_setenckey256)
|
|||||||
aese v3.16b, v0.16b
|
aese v3.16b, v0.16b
|
||||||
|
|
||||||
/* v3.4s[i] := SubBytes(rk[3]) */
|
/* v3.4s[i] := SubBytes(rk[3]) */
|
||||||
tbl v3.16b, {v3.16b}, v9.16b
|
tbl v3.16b, {v3.16b}, v17.16b
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* v5.4s := (0,prk[0],prk[1],prk[2])
|
* v5.4s := (0,prk[0],prk[1],prk[2])
|
||||||
@ -458,9 +458,9 @@ END(aesarmv8_enctodec)
|
|||||||
ENTRY(aesarmv8_enc)
|
ENTRY(aesarmv8_enc)
|
||||||
stp fp, lr, [sp, #-16]! /* push stack frame */
|
stp fp, lr, [sp, #-16]! /* push stack frame */
|
||||||
mov fp, sp
|
mov fp, sp
|
||||||
ldr q0, [x1] /* q0 := block */
|
ldr q0, [x1] /* q0 := ptxt */
|
||||||
bl aesarmv8_enc1
|
bl aesarmv8_enc1 /* q0 := ctxt; trash x0/x3/q16 */
|
||||||
str q0, [x2] /* store block */
|
str q0, [x2] /* store ctxt */
|
||||||
ldp fp, lr, [sp], #16 /* pop stack frame */
|
ldp fp, lr, [sp], #16 /* pop stack frame */
|
||||||
ret
|
ret
|
||||||
END(aesarmv8_enc)
|
END(aesarmv8_enc)
|
||||||
@ -476,9 +476,9 @@ END(aesarmv8_enc)
|
|||||||
ENTRY(aesarmv8_dec)
|
ENTRY(aesarmv8_dec)
|
||||||
stp fp, lr, [sp, #-16]! /* push stack frame */
|
stp fp, lr, [sp, #-16]! /* push stack frame */
|
||||||
mov fp, sp
|
mov fp, sp
|
||||||
ldr q0, [x1] /* q0 := block */
|
ldr q0, [x1] /* q0 := ctxt */
|
||||||
bl aesarmv8_dec1
|
bl aesarmv8_dec1 /* q0 := ptxt; trash x0/x3/q16 */
|
||||||
str q0, [x2] /* store block */
|
str q0, [x2] /* store ptxt */
|
||||||
ldp fp, lr, [sp], #16 /* pop stack frame */
|
ldp fp, lr, [sp], #16 /* pop stack frame */
|
||||||
ret
|
ret
|
||||||
END(aesarmv8_dec)
|
END(aesarmv8_dec)
|
||||||
@ -505,7 +505,7 @@ ENTRY(aesarmv8_cbc_enc)
|
|||||||
eor v0.16b, v0.16b, v1.16b /* q0 := cv ^ ptxt */
|
eor v0.16b, v0.16b, v1.16b /* q0 := cv ^ ptxt */
|
||||||
mov x0, x9 /* x0 := enckey */
|
mov x0, x9 /* x0 := enckey */
|
||||||
mov x3, x5 /* x3 := nrounds */
|
mov x3, x5 /* x3 := nrounds */
|
||||||
bl aesarmv8_enc1 /* q0 := ciphertext block */
|
bl aesarmv8_enc1 /* q0 := ctxt; trash x0/x3/q16 */
|
||||||
subs x10, x10, #0x10 /* count down nbytes */
|
subs x10, x10, #0x10 /* count down nbytes */
|
||||||
str q0, [x2], #0x10 /* store ciphertext block */
|
str q0, [x2], #0x10 /* store ciphertext block */
|
||||||
b.ne 1b /* repeat if x10 is nonzero */
|
b.ne 1b /* repeat if x10 is nonzero */
|
||||||
@ -527,10 +527,9 @@ END(aesarmv8_cbc_enc)
|
|||||||
* Standard ABI calling convention.
|
* Standard ABI calling convention.
|
||||||
*/
|
*/
|
||||||
ENTRY(aesarmv8_cbc_dec1)
|
ENTRY(aesarmv8_cbc_dec1)
|
||||||
stp fp, lr, [sp, #-32]! /* push stack frame with uint128 */
|
stp fp, lr, [sp, #-16]! /* push stack frame */
|
||||||
mov fp, sp
|
mov fp, sp
|
||||||
ldr q8, [x4] /* q8 := iv */
|
ldr q24, [x4] /* q24 := iv */
|
||||||
str q8, [sp, #16] /* save iv */
|
|
||||||
mov x9, x0 /* x9 := enckey */
|
mov x9, x0 /* x9 := enckey */
|
||||||
mov x10, x3 /* x10 := nbytes */
|
mov x10, x3 /* x10 := nbytes */
|
||||||
add x1, x1, x3 /* x1 := pointer past end of in */
|
add x1, x1, x3 /* x1 := pointer past end of in */
|
||||||
@ -539,18 +538,17 @@ ENTRY(aesarmv8_cbc_dec1)
|
|||||||
str q0, [x4] /* update iv */
|
str q0, [x4] /* update iv */
|
||||||
1: mov x0, x9 /* x0 := enckey */
|
1: mov x0, x9 /* x0 := enckey */
|
||||||
mov x3, x5 /* x3 := nrounds */
|
mov x3, x5 /* x3 := nrounds */
|
||||||
bl aesarmv8_dec1 /* q0 := cv ^ ptxt; trash x0/x3 */
|
bl aesarmv8_dec1 /* q0 := cv ^ ptxt; trash x0/x3/q16 */
|
||||||
subs x10, x10, #0x10 /* count down nbytes */
|
subs x10, x10, #0x10 /* count down nbytes */
|
||||||
b.eq 2f /* stop if this is the first block */
|
b.eq 2f /* stop if this is the first block */
|
||||||
ldr q8, [x1, #-0x10]! /* q8 := chaining value */
|
ldr q31, [x1, #-0x10]! /* q31 := chaining value */
|
||||||
eor v0.16b, v0.16b, v8.16b /* q0 := plaintext block */
|
eor v0.16b, v0.16b, v31.16b /* q0 := plaintext block */
|
||||||
str q0, [x2, #-0x10]! /* store plaintext block */
|
str q0, [x2, #-0x10]! /* store plaintext block */
|
||||||
mov v0.16b, v8.16b /* move cv = ciphertext block */
|
mov v0.16b, v31.16b /* move cv = ciphertext block */
|
||||||
b 1b
|
b 1b
|
||||||
2: ldr q8, [sp, #16] /* q8 := iv */
|
2: eor v0.16b, v0.16b, v24.16b /* q0 := first plaintext block */
|
||||||
eor v0.16b, v0.16b, v8.16b /* q0 := first plaintext block */
|
|
||||||
str q0, [x2, #-0x10]! /* store first plaintext block */
|
str q0, [x2, #-0x10]! /* store first plaintext block */
|
||||||
ldp fp, lr, [sp], #32 /* pop stack frame */
|
ldp fp, lr, [sp], #16 /* pop stack frame */
|
||||||
ret
|
ret
|
||||||
END(aesarmv8_cbc_dec1)
|
END(aesarmv8_cbc_dec1)
|
||||||
|
|
||||||
@ -566,10 +564,9 @@ END(aesarmv8_cbc_dec1)
|
|||||||
* Standard ABI calling convention.
|
* Standard ABI calling convention.
|
||||||
*/
|
*/
|
||||||
ENTRY(aesarmv8_cbc_dec8)
|
ENTRY(aesarmv8_cbc_dec8)
|
||||||
stp fp, lr, [sp, #-32]! /* push stack frame with uint128 */
|
stp fp, lr, [sp, #-16]! /* push stack frame */
|
||||||
mov fp, sp
|
mov fp, sp
|
||||||
ldr q8, [x4] /* q8 := iv */
|
ldr q24, [x4] /* q24 := iv */
|
||||||
str q8, [sp, #16] /* save iv */
|
|
||||||
mov x9, x0 /* x9 := enckey */
|
mov x9, x0 /* x9 := enckey */
|
||||||
mov x10, x3 /* x10 := nbytes */
|
mov x10, x3 /* x10 := nbytes */
|
||||||
add x1, x1, x3 /* x1 := pointer past end of in */
|
add x1, x1, x3 /* x1 := pointer past end of in */
|
||||||
@ -579,23 +576,24 @@ ENTRY(aesarmv8_cbc_dec8)
|
|||||||
1: ldp q4, q5, [x1, #-0x20]!
|
1: ldp q4, q5, [x1, #-0x20]!
|
||||||
ldp q2, q3, [x1, #-0x20]!
|
ldp q2, q3, [x1, #-0x20]!
|
||||||
ldp q0, q1, [x1, #-0x20]!
|
ldp q0, q1, [x1, #-0x20]!
|
||||||
mov v15.16b, v6.16b /* q[8+i] := cv[i], 0<i<8 */
|
mov v31.16b, v6.16b /* q[24+i] := cv[i], 0<i<8 */
|
||||||
mov v14.16b, v5.16b
|
mov v30.16b, v5.16b
|
||||||
mov v13.16b, v4.16b
|
mov v29.16b, v4.16b
|
||||||
mov v12.16b, v3.16b
|
mov v28.16b, v3.16b
|
||||||
mov v11.16b, v2.16b
|
mov v27.16b, v2.16b
|
||||||
mov v10.16b, v1.16b
|
mov v26.16b, v1.16b
|
||||||
mov v9.16b, v0.16b
|
mov v25.16b, v0.16b
|
||||||
mov x0, x9 /* x0 := enckey */
|
mov x0, x9 /* x0 := enckey */
|
||||||
mov x3, x5 /* x3 := nrounds */
|
mov x3, x5 /* x3 := nrounds */
|
||||||
bl aesarmv8_dec8 /* q[i] := cv[i] ^ pt[i] */
|
bl aesarmv8_dec8 /* q[i] := cv[i] ^ pt[i];
|
||||||
eor v7.16b, v7.16b, v15.16b /* q[i] := pt[i] */
|
* trash x0/x3/q16 */
|
||||||
eor v6.16b, v6.16b, v14.16b
|
eor v7.16b, v7.16b, v31.16b /* q[i] := pt[i] */
|
||||||
eor v5.16b, v5.16b, v13.16b
|
eor v6.16b, v6.16b, v30.16b
|
||||||
eor v4.16b, v4.16b, v12.16b
|
eor v5.16b, v5.16b, v29.16b
|
||||||
eor v3.16b, v3.16b, v11.16b
|
eor v4.16b, v4.16b, v28.16b
|
||||||
eor v2.16b, v2.16b, v10.16b
|
eor v3.16b, v3.16b, v27.16b
|
||||||
eor v1.16b, v1.16b, v9.16b
|
eor v2.16b, v2.16b, v26.16b
|
||||||
|
eor v1.16b, v1.16b, v25.16b
|
||||||
subs x10, x10, #0x80 /* count down nbytes */
|
subs x10, x10, #0x80 /* count down nbytes */
|
||||||
stp q6, q7, [x2, #-0x20]! /* store plaintext blocks */
|
stp q6, q7, [x2, #-0x20]! /* store plaintext blocks */
|
||||||
stp q4, q5, [x2, #-0x20]!
|
stp q4, q5, [x2, #-0x20]!
|
||||||
@ -605,10 +603,9 @@ ENTRY(aesarmv8_cbc_dec8)
|
|||||||
eor v0.16b, v0.16b, v7.16b /* q0 := pt0 */
|
eor v0.16b, v0.16b, v7.16b /* q0 := pt0 */
|
||||||
stp q0, q1, [x2, #-0x20]!
|
stp q0, q1, [x2, #-0x20]!
|
||||||
b 1b
|
b 1b
|
||||||
2: ldr q8, [sp, #16] /* q8 := iv */
|
2: eor v0.16b, v0.16b, v24.16b /* q0 := pt0 */
|
||||||
eor v0.16b, v0.16b, v8.16b /* q0 := pt0 */
|
|
||||||
stp q0, q1, [x2, #-0x20]! /* store first two plaintext blocks */
|
stp q0, q1, [x2, #-0x20]! /* store first two plaintext blocks */
|
||||||
ldp fp, lr, [sp], #32 /* pop stack frame */
|
ldp fp, lr, [sp], #16 /* pop stack frame */
|
||||||
ret
|
ret
|
||||||
END(aesarmv8_cbc_dec8)
|
END(aesarmv8_cbc_dec8)
|
||||||
|
|
||||||
@ -629,18 +626,18 @@ ENTRY(aesarmv8_xts_enc1)
|
|||||||
mov fp, sp
|
mov fp, sp
|
||||||
mov x9, x0 /* x9 := enckey */
|
mov x9, x0 /* x9 := enckey */
|
||||||
mov x10, x3 /* x10 := nbytes */
|
mov x10, x3 /* x10 := nbytes */
|
||||||
ldr q9, [x4] /* q9 := tweak */
|
ldr q31, [x4] /* q31 := tweak */
|
||||||
1: ldr q0, [x1], #0x10 /* q0 := ptxt */
|
1: ldr q0, [x1], #0x10 /* q0 := ptxt */
|
||||||
mov x0, x9 /* x0 := enckey */
|
mov x0, x9 /* x0 := enckey */
|
||||||
mov x3, x5 /* x3 := nrounds */
|
mov x3, x5 /* x3 := nrounds */
|
||||||
eor v0.16b, v0.16b, v9.16b /* q0 := ptxt ^ tweak */
|
eor v0.16b, v0.16b, v31.16b /* q0 := ptxt ^ tweak */
|
||||||
bl aesarmv8_enc1 /* q0 := AES(ptxt ^ tweak) */
|
bl aesarmv8_enc1 /* q0 := AES(...); trash x0/x3/q16 */
|
||||||
eor v0.16b, v0.16b, v9.16b /* q0 := AES(ptxt ^ tweak) ^ tweak */
|
eor v0.16b, v0.16b, v31.16b /* q0 := AES(ptxt ^ tweak) ^ tweak */
|
||||||
str q0, [x2], #0x10 /* store ciphertext block */
|
str q0, [x2], #0x10 /* store ciphertext block */
|
||||||
bl aesarmv8_xts_mulx /* q9 *= x; trash x0/q0/q1 */
|
bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */
|
||||||
subs x10, x10, #0x10 /* count down nbytes */
|
subs x10, x10, #0x10 /* count down nbytes */
|
||||||
b.ne 1b /* repeat if more blocks */
|
b.ne 1b /* repeat if more blocks */
|
||||||
str q9, [x4] /* update tweak */
|
str q31, [x4] /* update tweak */
|
||||||
ldp fp, lr, [sp], #16 /* pop stack frame */
|
ldp fp, lr, [sp], #16 /* pop stack frame */
|
||||||
ret
|
ret
|
||||||
END(aesarmv8_xts_enc1)
|
END(aesarmv8_xts_enc1)
|
||||||
@ -657,61 +654,58 @@ END(aesarmv8_xts_enc1)
|
|||||||
* Standard ABI calling convention.
|
* Standard ABI calling convention.
|
||||||
*/
|
*/
|
||||||
ENTRY(aesarmv8_xts_enc8)
|
ENTRY(aesarmv8_xts_enc8)
|
||||||
stp fp, lr, [sp, #-48]! /* push stack frame uint128[2] */
|
stp fp, lr, [sp, #-16]! /* push stack frame */
|
||||||
mov fp, sp
|
mov fp, sp
|
||||||
mov x9, x0 /* x9 := enckey */
|
mov x9, x0 /* x9 := enckey */
|
||||||
mov x10, x3 /* x10 := nbytes */
|
mov x10, x3 /* x10 := nbytes */
|
||||||
ldr q9, [x4] /* q9 := tweak */
|
ldr q31, [x4] /* q31 := tweak */
|
||||||
1: str q9, [sp, #16] /* save tweak[0] */
|
1: mov v24.16b, v31.16b /* q24 := tweak[0] */
|
||||||
bl aesarmv8_xts_mulx /* q9 *= x; trash x0/q0/q1 */
|
bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */
|
||||||
str q9, [sp, #32] /* save tweak[1] */
|
mov v25.16b, v31.16b /* q25 := tweak[1] */
|
||||||
bl aesarmv8_xts_mulx /* q9 *= x; trash x0/q0/q1 */
|
bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */
|
||||||
mov v10.16b, v9.16b /* q10 := tweak[2] */
|
mov v26.16b, v31.16b /* q26 := tweak[2] */
|
||||||
bl aesarmv8_xts_mulx /* q9 *= x; trash x0/q0/q1 */
|
bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */
|
||||||
mov v11.16b, v9.16b /* q11 := tweak[3] */
|
mov v27.16b, v31.16b /* q27 := tweak[3] */
|
||||||
bl aesarmv8_xts_mulx /* q9 *= x; trash x0/q0/q1 */
|
bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */
|
||||||
mov v12.16b, v9.16b /* q11 := tweak[4] */
|
mov v28.16b, v31.16b /* q28 := tweak[4] */
|
||||||
bl aesarmv8_xts_mulx /* q9 *= x; trash x0/q0/q1 */
|
bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */
|
||||||
mov v13.16b, v9.16b /* q11 := tweak[5] */
|
mov v29.16b, v31.16b /* q29 := tweak[5] */
|
||||||
bl aesarmv8_xts_mulx /* q9 *= x; trash x0/q0/q1 */
|
bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */
|
||||||
mov v14.16b, v9.16b /* q11 := tweak[6] */
|
mov v30.16b, v31.16b /* q30 := tweak[6] */
|
||||||
bl aesarmv8_xts_mulx /* q9 *= x; trash x0/q0/q1 */
|
bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */
|
||||||
mov v15.16b, v9.16b /* q11 := tweak[7] */
|
/* q31 := tweak[7] */
|
||||||
ldp q8, q9, [sp, #16] /* q8 := tweak[0], q9 := tweak[1] */
|
ldp q0, q1, [x1], #0x20 /* q[i] := ptxt[i] */
|
||||||
ldp q0, q1, [x1], #0x20 /* q[i] := pt[i] */
|
|
||||||
ldp q2, q3, [x1], #0x20
|
ldp q2, q3, [x1], #0x20
|
||||||
ldp q4, q5, [x1], #0x20
|
ldp q4, q5, [x1], #0x20
|
||||||
ldp q6, q7, [x1], #0x20
|
ldp q6, q7, [x1], #0x20
|
||||||
eor v0.16b, v0.16b, v8.16b /* q[i] := pt[i] ^ tweak[i] */
|
eor v0.16b, v0.16b, v24.16b /* q[i] := ptxt[i] ^ tweak[i] */
|
||||||
eor v1.16b, v1.16b, v9.16b
|
eor v1.16b, v1.16b, v25.16b
|
||||||
eor v2.16b, v2.16b, v10.16b
|
eor v2.16b, v2.16b, v26.16b
|
||||||
eor v3.16b, v3.16b, v11.16b
|
eor v3.16b, v3.16b, v27.16b
|
||||||
eor v4.16b, v4.16b, v12.16b
|
eor v4.16b, v4.16b, v28.16b
|
||||||
eor v5.16b, v5.16b, v13.16b
|
eor v5.16b, v5.16b, v29.16b
|
||||||
eor v6.16b, v6.16b, v14.16b
|
eor v6.16b, v6.16b, v30.16b
|
||||||
eor v7.16b, v7.16b, v15.16b
|
eor v7.16b, v7.16b, v31.16b
|
||||||
mov x0, x9 /* x0 := enckey */
|
mov x0, x9 /* x0 := enckey */
|
||||||
mov x3, x5 /* x3 := nrounds */
|
mov x3, x5 /* x3 := nrounds */
|
||||||
bl aesarmv8_enc8 /* encrypt q0,...,q7; trash x0/x3/q8 */
|
bl aesarmv8_enc8 /* encrypt q0-q7; trash x0/x3/q16 */
|
||||||
ldr q8, [sp, #16] /* reload q8 := tweak[0] */
|
eor v0.16b, v0.16b, v24.16b /* q[i] := AES(...) ^ tweak[i] */
|
||||||
eor v1.16b, v1.16b, v9.16b /* q[i] := AES(...) ^ tweak[i] */
|
eor v1.16b, v1.16b, v25.16b
|
||||||
eor v2.16b, v2.16b, v10.16b
|
eor v2.16b, v2.16b, v26.16b
|
||||||
eor v3.16b, v3.16b, v11.16b
|
eor v3.16b, v3.16b, v27.16b
|
||||||
eor v0.16b, v0.16b, v8.16b
|
eor v4.16b, v4.16b, v28.16b
|
||||||
eor v4.16b, v4.16b, v12.16b
|
eor v5.16b, v5.16b, v29.16b
|
||||||
eor v5.16b, v5.16b, v13.16b
|
eor v6.16b, v6.16b, v30.16b
|
||||||
eor v6.16b, v6.16b, v14.16b
|
eor v7.16b, v7.16b, v31.16b
|
||||||
eor v7.16b, v7.16b, v15.16b
|
|
||||||
stp q0, q1, [x2], #0x20 /* store ciphertext blocks */
|
stp q0, q1, [x2], #0x20 /* store ciphertext blocks */
|
||||||
stp q2, q3, [x2], #0x20 /* store ciphertext blocks */
|
stp q2, q3, [x2], #0x20
|
||||||
stp q4, q5, [x2], #0x20 /* store ciphertext blocks */
|
stp q4, q5, [x2], #0x20
|
||||||
stp q6, q7, [x2], #0x20 /* store ciphertext blocks */
|
stp q6, q7, [x2], #0x20
|
||||||
mov v9.16b, v15.16b /* q9 := q15 = tweak[7] */
|
bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */
|
||||||
bl aesarmv8_xts_mulx /* q9 *= x; trash x0/q0/q1 */
|
|
||||||
subs x10, x10, #0x80 /* count down nbytes */
|
subs x10, x10, #0x80 /* count down nbytes */
|
||||||
b.ne 1b /* repeat if more block groups */
|
b.ne 1b /* repeat if more block groups */
|
||||||
str q9, [x4] /* update tweak */
|
str q31, [x4] /* update tweak */
|
||||||
ldp fp, lr, [sp], #48 /* pop stack frame */
|
ldp fp, lr, [sp], #16 /* pop stack frame */
|
||||||
ret
|
ret
|
||||||
END(aesarmv8_xts_enc8)
|
END(aesarmv8_xts_enc8)
|
||||||
|
|
||||||
@ -720,7 +714,7 @@ END(aesarmv8_xts_enc8)
|
|||||||
* uint8_t *out@x2, size_t nbytes@x3, uint8_t tweak[16] @x4,
|
* uint8_t *out@x2, size_t nbytes@x3, uint8_t tweak[16] @x4,
|
||||||
* uint32_t nrounds@x5)
|
* uint32_t nrounds@x5)
|
||||||
*
|
*
|
||||||
* Decrypt a contiguous sequence of blocks with AES-XTS.
|
* Decrypt a contiguous sequdece of blocks with AES-XTS.
|
||||||
*
|
*
|
||||||
* nbytes must be a positive integral multiple of 16. This routine
|
* nbytes must be a positive integral multiple of 16. This routine
|
||||||
* is not vectorized; use aesarmv8_xts_dec8 for >=8 blocks at once.
|
* is not vectorized; use aesarmv8_xts_dec8 for >=8 blocks at once.
|
||||||
@ -732,18 +726,18 @@ ENTRY(aesarmv8_xts_dec1)
|
|||||||
mov fp, sp
|
mov fp, sp
|
||||||
mov x9, x0 /* x9 := deckey */
|
mov x9, x0 /* x9 := deckey */
|
||||||
mov x10, x3 /* x10 := nbytes */
|
mov x10, x3 /* x10 := nbytes */
|
||||||
ldr q9, [x4] /* q9 := tweak */
|
ldr q31, [x4] /* q31 := tweak */
|
||||||
1: ldr q0, [x1], #0x10 /* q0 := ptxt */
|
1: ldr q0, [x1], #0x10 /* q0 := ctxt */
|
||||||
mov x0, x9 /* x0 := deckey */
|
mov x0, x9 /* x0 := deckey */
|
||||||
mov x3, x5 /* x3 := nrounds */
|
mov x3, x5 /* x3 := nrounds */
|
||||||
eor v0.16b, v0.16b, v9.16b /* q0 := ptxt ^ tweak */
|
eor v0.16b, v0.16b, v31.16b /* q0 := ctxt ^ tweak */
|
||||||
bl aesarmv8_dec1 /* q0 := AES(ptxt ^ tweak) */
|
bl aesarmv8_dec1 /* q0 := AES(...); trash x0/x3/q16 */
|
||||||
eor v0.16b, v0.16b, v9.16b /* q0 := AES(ptxt ^ tweak) ^ tweak */
|
eor v0.16b, v0.16b, v31.16b /* q0 := AES(ctxt ^ tweak) ^ tweak */
|
||||||
str q0, [x2], #0x10 /* store ciphertext block */
|
str q0, [x2], #0x10 /* store plaintext block */
|
||||||
bl aesarmv8_xts_mulx /* q9 *= x; trash x0/q0/q1 */
|
bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */
|
||||||
subs x10, x10, #0x10 /* count down nbytes */
|
subs x10, x10, #0x10 /* count down nbytes */
|
||||||
b.ne 1b /* repeat if more blocks */
|
b.ne 1b /* repeat if more blocks */
|
||||||
str q9, [x4] /* update tweak */
|
str q31, [x4] /* update tweak */
|
||||||
ldp fp, lr, [sp], #16 /* pop stack frame */
|
ldp fp, lr, [sp], #16 /* pop stack frame */
|
||||||
ret
|
ret
|
||||||
END(aesarmv8_xts_dec1)
|
END(aesarmv8_xts_dec1)
|
||||||
@ -753,75 +747,72 @@ END(aesarmv8_xts_dec1)
|
|||||||
* uint8_t *out@x2, size_t nbytes@x3, uint8_t tweak[16] @x4,
|
* uint8_t *out@x2, size_t nbytes@x3, uint8_t tweak[16] @x4,
|
||||||
* uint32_t nrounds@x5)
|
* uint32_t nrounds@x5)
|
||||||
*
|
*
|
||||||
* Decrypt a contiguous sequence of blocks with AES-XTS.
|
* Decrypt a contiguous sequdece of blocks with AES-XTS.
|
||||||
*
|
*
|
||||||
* nbytes must be a positive integral multiple of 128.
|
* nbytes must be a positive integral multiple of 128.
|
||||||
*
|
*
|
||||||
* Standard ABI calling convention.
|
* Standard ABI calling convention.
|
||||||
*/
|
*/
|
||||||
ENTRY(aesarmv8_xts_dec8)
|
ENTRY(aesarmv8_xts_dec8)
|
||||||
stp fp, lr, [sp, #-48]! /* push stack frame uint128[2] */
|
stp fp, lr, [sp, #-16]! /* push stack frame */
|
||||||
mov fp, sp
|
mov fp, sp
|
||||||
mov x9, x0 /* x9 := deckey */
|
mov x9, x0 /* x9 := deckey */
|
||||||
mov x10, x3 /* x10 := nbytes */
|
mov x10, x3 /* x10 := nbytes */
|
||||||
ldr q9, [x4] /* q9 := tweak */
|
ldr q31, [x4] /* q31 := tweak */
|
||||||
1: str q9, [sp, #16] /* save tweak[0] */
|
1: mov v24.16b, v31.16b /* q24 := tweak[0] */
|
||||||
bl aesarmv8_xts_mulx /* q9 *= x; trash x0/q0/q1 */
|
bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */
|
||||||
str q9, [sp, #32] /* save tweak[1] */
|
mov v25.16b, v31.16b /* q25 := tweak[1] */
|
||||||
bl aesarmv8_xts_mulx /* q9 *= x; trash x0/q0/q1 */
|
bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */
|
||||||
mov v10.16b, v9.16b /* q10 := tweak[2] */
|
mov v26.16b, v31.16b /* q26 := tweak[2] */
|
||||||
bl aesarmv8_xts_mulx /* q9 *= x; trash x0/q0/q1 */
|
bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */
|
||||||
mov v11.16b, v9.16b /* q11 := tweak[3] */
|
mov v27.16b, v31.16b /* q27 := tweak[3] */
|
||||||
bl aesarmv8_xts_mulx /* q9 *= x; trash x0/q0/q1 */
|
bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */
|
||||||
mov v12.16b, v9.16b /* q11 := tweak[4] */
|
mov v28.16b, v31.16b /* q28 := tweak[4] */
|
||||||
bl aesarmv8_xts_mulx /* q9 *= x; trash x0/q0/q1 */
|
bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */
|
||||||
mov v13.16b, v9.16b /* q11 := tweak[5] */
|
mov v29.16b, v31.16b /* q29 := tweak[5] */
|
||||||
bl aesarmv8_xts_mulx /* q9 *= x; trash x0/q0/q1 */
|
bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */
|
||||||
mov v14.16b, v9.16b /* q11 := tweak[6] */
|
mov v30.16b, v31.16b /* q30 := tweak[6] */
|
||||||
bl aesarmv8_xts_mulx /* q9 *= x; trash x0/q0/q1 */
|
bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */
|
||||||
mov v15.16b, v9.16b /* q11 := tweak[7] */
|
/* q31 := tweak[7] */
|
||||||
ldp q8, q9, [sp, #16] /* q8 := tweak[0], q9 := tweak[1] */
|
ldp q0, q1, [x1], #0x20 /* q[i] := ctxt[i] */
|
||||||
ldp q0, q1, [x1], #0x20 /* q[i] := pt[i] */
|
|
||||||
ldp q2, q3, [x1], #0x20
|
ldp q2, q3, [x1], #0x20
|
||||||
ldp q4, q5, [x1], #0x20
|
ldp q4, q5, [x1], #0x20
|
||||||
ldp q6, q7, [x1], #0x20
|
ldp q6, q7, [x1], #0x20
|
||||||
eor v0.16b, v0.16b, v8.16b /* q[i] := pt[i] ^ tweak[i] */
|
eor v0.16b, v0.16b, v24.16b /* q[i] := ctxt[i] ^ tweak[i] */
|
||||||
eor v1.16b, v1.16b, v9.16b
|
eor v1.16b, v1.16b, v25.16b
|
||||||
eor v2.16b, v2.16b, v10.16b
|
eor v2.16b, v2.16b, v26.16b
|
||||||
eor v3.16b, v3.16b, v11.16b
|
eor v3.16b, v3.16b, v27.16b
|
||||||
eor v4.16b, v4.16b, v12.16b
|
eor v4.16b, v4.16b, v28.16b
|
||||||
eor v5.16b, v5.16b, v13.16b
|
eor v5.16b, v5.16b, v29.16b
|
||||||
eor v6.16b, v6.16b, v14.16b
|
eor v6.16b, v6.16b, v30.16b
|
||||||
eor v7.16b, v7.16b, v15.16b
|
eor v7.16b, v7.16b, v31.16b
|
||||||
mov x0, x9 /* x0 := deckey */
|
mov x0, x9 /* x0 := deckey */
|
||||||
mov x3, x5 /* x3 := nrounds */
|
mov x3, x5 /* x3 := nrounds */
|
||||||
bl aesarmv8_dec8 /* decrypt q0,...,q7; trash x0/x3/q8 */
|
bl aesarmv8_dec8 /* decrypt q0-q7; trash x0/x3/q16 */
|
||||||
ldr q8, [sp, #16] /* reload q8 := tweak[0] */
|
eor v0.16b, v0.16b, v24.16b /* q[i] := AES(...) ^ tweak[i] */
|
||||||
eor v1.16b, v1.16b, v9.16b /* q[i] := AES(...) ^ tweak[i] */
|
eor v1.16b, v1.16b, v25.16b
|
||||||
eor v2.16b, v2.16b, v10.16b
|
eor v2.16b, v2.16b, v26.16b
|
||||||
eor v3.16b, v3.16b, v11.16b
|
eor v3.16b, v3.16b, v27.16b
|
||||||
eor v0.16b, v0.16b, v8.16b
|
eor v4.16b, v4.16b, v28.16b
|
||||||
eor v4.16b, v4.16b, v12.16b
|
eor v5.16b, v5.16b, v29.16b
|
||||||
eor v5.16b, v5.16b, v13.16b
|
eor v6.16b, v6.16b, v30.16b
|
||||||
eor v6.16b, v6.16b, v14.16b
|
eor v7.16b, v7.16b, v31.16b
|
||||||
eor v7.16b, v7.16b, v15.16b
|
stp q0, q1, [x2], #0x20 /* store plaintext blocks */
|
||||||
stp q0, q1, [x2], #0x20 /* store ciphertext blocks */
|
stp q2, q3, [x2], #0x20
|
||||||
stp q2, q3, [x2], #0x20 /* store ciphertext blocks */
|
stp q4, q5, [x2], #0x20
|
||||||
stp q4, q5, [x2], #0x20 /* store ciphertext blocks */
|
stp q6, q7, [x2], #0x20
|
||||||
stp q6, q7, [x2], #0x20 /* store ciphertext blocks */
|
bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */
|
||||||
mov v9.16b, v15.16b /* q9 := q15 = tweak[7] */
|
|
||||||
bl aesarmv8_xts_mulx /* q9 *= x; trash x0/q0/q1 */
|
|
||||||
subs x10, x10, #0x80 /* count down nbytes */
|
subs x10, x10, #0x80 /* count down nbytes */
|
||||||
b.ne 1b /* repeat if more block groups */
|
b.ne 1b /* repeat if more block groups */
|
||||||
str q9, [x4] /* update tweak */
|
str q31, [x4] /* update tweak */
|
||||||
ldp fp, lr, [sp], #48 /* pop stack frame */
|
ldp fp, lr, [sp], #16 /* pop stack frame */
|
||||||
ret
|
ret
|
||||||
END(aesarmv8_xts_dec8)
|
END(aesarmv8_xts_dec8)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* aesarmv8_xts_mulx(tweak@q9)
|
* aesarmv8_xts_mulx(tweak@q31)
|
||||||
*
|
*
|
||||||
* Multiply q9 by x, modulo x^128 + x^7 + x^2 + x + 1, in place.
|
* Multiply q31 by x, modulo x^128 + x^7 + x^2 + x + 1, in place.
|
||||||
* Uses x0 and q0/q1 as temporaries.
|
* Uses x0 and q0/q1 as temporaries.
|
||||||
*/
|
*/
|
||||||
.text
|
.text
|
||||||
@ -836,12 +827,12 @@ aesarmv8_xts_mulx:
|
|||||||
* carried into x^128 = x^7 + x^2 + x + 1.
|
* carried into x^128 = x^7 + x^2 + x + 1.
|
||||||
*/
|
*/
|
||||||
adrl x0, xtscarry
|
adrl x0, xtscarry
|
||||||
cmlt v1.2d, v9.2d, #0 /* v1.2d[i] := -1 if v9.2d[i] < 0, else 0 */
|
cmlt v1.2d, v31.2d, #0 /* v1.2d[i] := -1 if v9.2d[i] < 0, else 0 */
|
||||||
ldr q0, [x0] /* q0 := xtscarry */
|
ldr q0, [x0] /* q0 := xtscarry */
|
||||||
ext v1.16b, v1.16b, v1.16b, #8 /* swap halves of q1 */
|
ext v1.16b, v1.16b, v1.16b, #8 /* swap halves of q1 */
|
||||||
shl v9.2d, v9.2d, #1 /* shift */
|
shl v31.2d, v31.2d, #1 /* shift */
|
||||||
and v0.16b, v0.16b, v1.16b /* copy xtscarry according to mask */
|
and v0.16b, v0.16b, v1.16b /* copy xtscarry according to mask */
|
||||||
eor v9.16b, v9.16b, v0.16b /* incorporate (a) and (b) */
|
eor v31.16b, v31.16b, v0.16b /* incorporate (a) and (b) */
|
||||||
ret
|
ret
|
||||||
END(aesarmv8_xts_mulx)
|
END(aesarmv8_xts_mulx)
|
||||||
|
|
||||||
@ -862,9 +853,9 @@ END(xtscarry)
|
|||||||
ENTRY(aesarmv8_xts_update)
|
ENTRY(aesarmv8_xts_update)
|
||||||
stp fp, lr, [sp, #-16]! /* push stack frame */
|
stp fp, lr, [sp, #-16]! /* push stack frame */
|
||||||
mov fp, sp
|
mov fp, sp
|
||||||
ldr q9, [x0] /* load tweak */
|
ldr q31, [x0] /* load tweak */
|
||||||
bl aesarmv8_xts_mulx /* q9 *= x */
|
bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */
|
||||||
str q9, [x1] /* store tweak */
|
str q31, [x1] /* store tweak */
|
||||||
ldp fp, lr, [sp], #16 /* pop stack frame */
|
ldp fp, lr, [sp], #16 /* pop stack frame */
|
||||||
ret
|
ret
|
||||||
END(aesarmv8_xts_update)
|
END(aesarmv8_xts_update)
|
||||||
@ -875,22 +866,22 @@ END(aesarmv8_xts_update)
|
|||||||
*
|
*
|
||||||
* Encrypt a single AES block in q0.
|
* Encrypt a single AES block in q0.
|
||||||
*
|
*
|
||||||
* Internal ABI. Uses q8 as temporary. Destroys x0 and x3.
|
* Internal ABI. Uses q16 as temporary. Destroys x0 and x3.
|
||||||
*/
|
*/
|
||||||
.text
|
.text
|
||||||
_ALIGN_TEXT
|
_ALIGN_TEXT
|
||||||
.type aesarmv8_enc1,@function
|
.type aesarmv8_enc1,@function
|
||||||
aesarmv8_enc1:
|
aesarmv8_enc1:
|
||||||
ldr q8, [x0], #0x10 /* load round key */
|
ldr q16, [x0], #0x10 /* load round key */
|
||||||
1: subs x3, x3, #1
|
1: subs x3, x3, #1
|
||||||
/* q0 := ShiftRows(SubBytes(AddRoundKey_q8(q0))) */
|
/* q0 := ShiftRows(SubBytes(AddRoundKey_q16(q0))) */
|
||||||
aese v0.16b, v8.16b
|
aese v0.16b, v16.16b
|
||||||
ldr q8, [x0], #0x10 /* load next round key */
|
ldr q16, [x0], #0x10 /* load next round key */
|
||||||
b.eq 2f
|
b.eq 2f
|
||||||
/* q0 := MixColumns(q0) */
|
/* q0 := MixColumns(q0) */
|
||||||
aesmc v0.16b, v0.16b
|
aesmc v0.16b, v0.16b
|
||||||
b 1b
|
b 1b
|
||||||
2: eor v0.16b, v0.16b, v8.16b
|
2: eor v0.16b, v0.16b, v16.16b
|
||||||
ret
|
ret
|
||||||
END(aesarmv8_enc1)
|
END(aesarmv8_enc1)
|
||||||
|
|
||||||
@ -901,24 +892,24 @@ END(aesarmv8_enc1)
|
|||||||
*
|
*
|
||||||
* Encrypt eight AES blocks in q0 through q7 in parallel.
|
* Encrypt eight AES blocks in q0 through q7 in parallel.
|
||||||
*
|
*
|
||||||
* Internal ABI. Uses q8 as temporary. Destroys x0 and x3.
|
* Internal ABI. Uses q16 as temporary. Destroys x0 and x3.
|
||||||
*/
|
*/
|
||||||
.text
|
.text
|
||||||
_ALIGN_TEXT
|
_ALIGN_TEXT
|
||||||
.type aesarmv8_enc8,@function
|
.type aesarmv8_enc8,@function
|
||||||
aesarmv8_enc8:
|
aesarmv8_enc8:
|
||||||
ldr q8, [x0], #0x10 /* load round key */
|
ldr q16, [x0], #0x10 /* load round key */
|
||||||
1: subs x3, x3, #1
|
1: subs x3, x3, #1
|
||||||
/* q[i] := ShiftRows(SubBytes(AddRoundKey_q8(q[i]))) */
|
/* q[i] := ShiftRows(SubBytes(AddRoundKey_q16(q[i]))) */
|
||||||
aese v0.16b, v8.16b
|
aese v0.16b, v16.16b
|
||||||
aese v1.16b, v8.16b
|
aese v1.16b, v16.16b
|
||||||
aese v2.16b, v8.16b
|
aese v2.16b, v16.16b
|
||||||
aese v3.16b, v8.16b
|
aese v3.16b, v16.16b
|
||||||
aese v4.16b, v8.16b
|
aese v4.16b, v16.16b
|
||||||
aese v5.16b, v8.16b
|
aese v5.16b, v16.16b
|
||||||
aese v6.16b, v8.16b
|
aese v6.16b, v16.16b
|
||||||
aese v7.16b, v8.16b
|
aese v7.16b, v16.16b
|
||||||
ldr q8, [x0], #0x10 /* load next round key */
|
ldr q16, [x0], #0x10 /* load next round key */
|
||||||
b.eq 2f
|
b.eq 2f
|
||||||
/* q[i] := MixColumns(q[i]) */
|
/* q[i] := MixColumns(q[i]) */
|
||||||
aesmc v0.16b, v0.16b
|
aesmc v0.16b, v0.16b
|
||||||
@ -930,14 +921,14 @@ aesarmv8_enc8:
|
|||||||
aesmc v6.16b, v6.16b
|
aesmc v6.16b, v6.16b
|
||||||
aesmc v7.16b, v7.16b
|
aesmc v7.16b, v7.16b
|
||||||
b 1b
|
b 1b
|
||||||
2: eor v0.16b, v0.16b, v8.16b /* AddRoundKey */
|
2: eor v0.16b, v0.16b, v16.16b /* AddRoundKey */
|
||||||
eor v1.16b, v1.16b, v8.16b
|
eor v1.16b, v1.16b, v16.16b
|
||||||
eor v2.16b, v2.16b, v8.16b
|
eor v2.16b, v2.16b, v16.16b
|
||||||
eor v3.16b, v3.16b, v8.16b
|
eor v3.16b, v3.16b, v16.16b
|
||||||
eor v4.16b, v4.16b, v8.16b
|
eor v4.16b, v4.16b, v16.16b
|
||||||
eor v5.16b, v5.16b, v8.16b
|
eor v5.16b, v5.16b, v16.16b
|
||||||
eor v6.16b, v6.16b, v8.16b
|
eor v6.16b, v6.16b, v16.16b
|
||||||
eor v7.16b, v7.16b, v8.16b
|
eor v7.16b, v7.16b, v16.16b
|
||||||
ret
|
ret
|
||||||
END(aesarmv8_enc8)
|
END(aesarmv8_enc8)
|
||||||
|
|
||||||
@ -947,22 +938,22 @@ END(aesarmv8_enc8)
|
|||||||
*
|
*
|
||||||
* Decrypt a single AES block in q0.
|
* Decrypt a single AES block in q0.
|
||||||
*
|
*
|
||||||
* Internal ABI. Uses q8 as temporary. Destroys x0 and x3.
|
* Internal ABI. Uses q16 as temporary. Destroys x0 and x3.
|
||||||
*/
|
*/
|
||||||
.text
|
.text
|
||||||
_ALIGN_TEXT
|
_ALIGN_TEXT
|
||||||
.type aesarmv8_dec1,@function
|
.type aesarmv8_dec1,@function
|
||||||
aesarmv8_dec1:
|
aesarmv8_dec1:
|
||||||
ldr q8, [x0], #0x10 /* load round key */
|
ldr q16, [x0], #0x10 /* load round key */
|
||||||
1: subs x3, x3, #1
|
1: subs x3, x3, #1
|
||||||
/* q0 := InSubBytes(InShiftRows(AddRoundKey_q8(q0))) */
|
/* q0 := InSubBytes(InShiftRows(AddRoundKey_q16(q0))) */
|
||||||
aesd v0.16b, v8.16b
|
aesd v0.16b, v16.16b
|
||||||
ldr q8, [x0], #0x10 /* load next round key */
|
ldr q16, [x0], #0x10 /* load next round key */
|
||||||
b.eq 2f
|
b.eq 2f
|
||||||
/* q0 := InMixColumns(q0) */
|
/* q0 := InMixColumns(q0) */
|
||||||
aesimc v0.16b, v0.16b
|
aesimc v0.16b, v0.16b
|
||||||
b 1b
|
b 1b
|
||||||
2: eor v0.16b, v0.16b, v8.16b
|
2: eor v0.16b, v0.16b, v16.16b
|
||||||
ret
|
ret
|
||||||
END(aesarmv8_dec1)
|
END(aesarmv8_dec1)
|
||||||
|
|
||||||
@ -973,24 +964,24 @@ END(aesarmv8_dec1)
|
|||||||
*
|
*
|
||||||
* Decrypt eight AES blocks in q0 through q7 in parallel.
|
* Decrypt eight AES blocks in q0 through q7 in parallel.
|
||||||
*
|
*
|
||||||
* Internal ABI. Uses q8 as temporary. Destroys x0 and x3.
|
* Internal ABI. Uses q16 as temporary. Destroys x0 and x3.
|
||||||
*/
|
*/
|
||||||
.text
|
.text
|
||||||
_ALIGN_TEXT
|
_ALIGN_TEXT
|
||||||
.type aesarmv8_dec8,@function
|
.type aesarmv8_dec8,@function
|
||||||
aesarmv8_dec8:
|
aesarmv8_dec8:
|
||||||
ldr q8, [x0], #0x10 /* load round key */
|
ldr q16, [x0], #0x10 /* load round key */
|
||||||
1: subs x3, x3, #1
|
1: subs x3, x3, #1
|
||||||
/* q[i] := InSubBytes(InShiftRows(AddRoundKey_q8(q[i]))) */
|
/* q[i] := InSubBytes(InShiftRows(AddRoundKey_q16(q[i]))) */
|
||||||
aesd v0.16b, v8.16b
|
aesd v0.16b, v16.16b
|
||||||
aesd v1.16b, v8.16b
|
aesd v1.16b, v16.16b
|
||||||
aesd v2.16b, v8.16b
|
aesd v2.16b, v16.16b
|
||||||
aesd v3.16b, v8.16b
|
aesd v3.16b, v16.16b
|
||||||
aesd v4.16b, v8.16b
|
aesd v4.16b, v16.16b
|
||||||
aesd v5.16b, v8.16b
|
aesd v5.16b, v16.16b
|
||||||
aesd v6.16b, v8.16b
|
aesd v6.16b, v16.16b
|
||||||
aesd v7.16b, v8.16b
|
aesd v7.16b, v16.16b
|
||||||
ldr q8, [x0], #0x10 /* load next round key */
|
ldr q16, [x0], #0x10 /* load next round key */
|
||||||
b.eq 2f
|
b.eq 2f
|
||||||
/* q[i] := InMixColumns(q[i]) */
|
/* q[i] := InMixColumns(q[i]) */
|
||||||
aesimc v0.16b, v0.16b
|
aesimc v0.16b, v0.16b
|
||||||
@ -1002,13 +993,13 @@ aesarmv8_dec8:
|
|||||||
aesimc v6.16b, v6.16b
|
aesimc v6.16b, v6.16b
|
||||||
aesimc v7.16b, v7.16b
|
aesimc v7.16b, v7.16b
|
||||||
b 1b
|
b 1b
|
||||||
2: eor v0.16b, v0.16b, v8.16b /* AddRoundKey */
|
2: eor v0.16b, v0.16b, v16.16b /* AddRoundKey */
|
||||||
eor v1.16b, v1.16b, v8.16b
|
eor v1.16b, v1.16b, v16.16b
|
||||||
eor v2.16b, v2.16b, v8.16b
|
eor v2.16b, v2.16b, v16.16b
|
||||||
eor v3.16b, v3.16b, v8.16b
|
eor v3.16b, v3.16b, v16.16b
|
||||||
eor v4.16b, v4.16b, v8.16b
|
eor v4.16b, v4.16b, v16.16b
|
||||||
eor v5.16b, v5.16b, v8.16b
|
eor v5.16b, v5.16b, v16.16b
|
||||||
eor v6.16b, v6.16b, v8.16b
|
eor v6.16b, v6.16b, v16.16b
|
||||||
eor v7.16b, v7.16b, v8.16b
|
eor v7.16b, v7.16b, v16.16b
|
||||||
ret
|
ret
|
||||||
END(aesarmv8_dec8)
|
END(aesarmv8_dec8)
|
||||||
|
Loading…
Reference in New Issue
Block a user