regen
This commit is contained in:
parent
5a016eb166
commit
631c5ab455
|
@ -1318,7 +1318,7 @@ AES_cbc_encrypt:
|
||||||
leaq .LAES_Td(%rip),%r14
|
leaq .LAES_Td(%rip),%r14
|
||||||
.Lcbc_picked_te:
|
.Lcbc_picked_te:
|
||||||
|
|
||||||
movl OPENSSL_ia32cap_P@GOTPCREL(%rip),%r10d
|
movl OPENSSL_ia32cap_P(%rip),%r10d
|
||||||
cmpq $512,%rdx
|
cmpq $512,%rdx
|
||||||
jb .Lcbc_slow_prologue
|
jb .Lcbc_slow_prologue
|
||||||
testq $15,%rdx
|
testq $15,%rdx
|
||||||
|
|
|
@ -1,16 +1,754 @@
|
||||||
#include <machine/asm.h>
|
#include <machine/asm.h>
|
||||||
.text
|
.text
|
||||||
|
|
||||||
.globl aesni_gcm_encrypt
|
.type _aesni_ctr32_ghash_6x,@function
|
||||||
.type aesni_gcm_encrypt,@function
|
.align 32
|
||||||
aesni_gcm_encrypt:
|
_aesni_ctr32_ghash_6x:
|
||||||
xorl %eax,%eax
|
vmovdqu 32(%r11),%xmm2
|
||||||
.byte 0xf3,0xc3
|
subq $6,%rdx
|
||||||
.size aesni_gcm_encrypt,.-aesni_gcm_encrypt
|
vpxor %xmm4,%xmm4,%xmm4
|
||||||
|
vmovdqu 0-128(%rcx),%xmm15
|
||||||
|
vpaddb %xmm2,%xmm1,%xmm10
|
||||||
|
vpaddb %xmm2,%xmm10,%xmm11
|
||||||
|
vpaddb %xmm2,%xmm11,%xmm12
|
||||||
|
vpaddb %xmm2,%xmm12,%xmm13
|
||||||
|
vpaddb %xmm2,%xmm13,%xmm14
|
||||||
|
vpxor %xmm15,%xmm1,%xmm9
|
||||||
|
vmovdqu %xmm4,16+8(%rsp)
|
||||||
|
jmp .Loop6x
|
||||||
|
|
||||||
|
.align 32
|
||||||
|
.Loop6x:
|
||||||
|
addl $100663296,%ebx
|
||||||
|
jc .Lhandle_ctr32
|
||||||
|
vmovdqu 0-32(%r9),%xmm3
|
||||||
|
vpaddb %xmm2,%xmm14,%xmm1
|
||||||
|
vpxor %xmm15,%xmm10,%xmm10
|
||||||
|
vpxor %xmm15,%xmm11,%xmm11
|
||||||
|
|
||||||
|
.Lresume_ctr32:
|
||||||
|
vmovdqu %xmm1,(%r8)
|
||||||
|
vpclmulqdq $0x10,%xmm3,%xmm7,%xmm5
|
||||||
|
vpxor %xmm15,%xmm12,%xmm12
|
||||||
|
vmovups 16-128(%rcx),%xmm2
|
||||||
|
vpclmulqdq $0x01,%xmm3,%xmm7,%xmm6
|
||||||
|
xorq %r12,%r12
|
||||||
|
cmpq %r14,%r15
|
||||||
|
|
||||||
|
vaesenc %xmm2,%xmm9,%xmm9
|
||||||
|
vmovdqu 48+8(%rsp),%xmm0
|
||||||
|
vpxor %xmm15,%xmm13,%xmm13
|
||||||
|
vpclmulqdq $0x00,%xmm3,%xmm7,%xmm1
|
||||||
|
vaesenc %xmm2,%xmm10,%xmm10
|
||||||
|
vpxor %xmm15,%xmm14,%xmm14
|
||||||
|
setnc %r12b
|
||||||
|
vpclmulqdq $0x11,%xmm3,%xmm7,%xmm7
|
||||||
|
vaesenc %xmm2,%xmm11,%xmm11
|
||||||
|
vmovdqu 16-32(%r9),%xmm3
|
||||||
|
negq %r12
|
||||||
|
vaesenc %xmm2,%xmm12,%xmm12
|
||||||
|
vpxor %xmm5,%xmm6,%xmm6
|
||||||
|
vpclmulqdq $0x00,%xmm3,%xmm0,%xmm5
|
||||||
|
vpxor %xmm4,%xmm8,%xmm8
|
||||||
|
vaesenc %xmm2,%xmm13,%xmm13
|
||||||
|
vpxor %xmm5,%xmm1,%xmm4
|
||||||
|
andq $0x60,%r12
|
||||||
|
vmovups 32-128(%rcx),%xmm15
|
||||||
|
vpclmulqdq $0x10,%xmm3,%xmm0,%xmm1
|
||||||
|
vaesenc %xmm2,%xmm14,%xmm14
|
||||||
|
|
||||||
|
vpclmulqdq $0x01,%xmm3,%xmm0,%xmm2
|
||||||
|
leaq (%r14,%r12,1),%r14
|
||||||
|
vaesenc %xmm15,%xmm9,%xmm9
|
||||||
|
vpxor 16+8(%rsp),%xmm8,%xmm8
|
||||||
|
vpclmulqdq $0x11,%xmm3,%xmm0,%xmm3
|
||||||
|
vmovdqu 64+8(%rsp),%xmm0
|
||||||
|
vaesenc %xmm15,%xmm10,%xmm10
|
||||||
|
movbeq 88(%r14),%r13
|
||||||
|
vaesenc %xmm15,%xmm11,%xmm11
|
||||||
|
movbeq 80(%r14),%r12
|
||||||
|
vaesenc %xmm15,%xmm12,%xmm12
|
||||||
|
movq %r13,32+8(%rsp)
|
||||||
|
vaesenc %xmm15,%xmm13,%xmm13
|
||||||
|
movq %r12,40+8(%rsp)
|
||||||
|
vmovdqu 48-32(%r9),%xmm5
|
||||||
|
vaesenc %xmm15,%xmm14,%xmm14
|
||||||
|
|
||||||
|
vmovups 48-128(%rcx),%xmm15
|
||||||
|
vpxor %xmm1,%xmm6,%xmm6
|
||||||
|
vpclmulqdq $0x00,%xmm5,%xmm0,%xmm1
|
||||||
|
vaesenc %xmm15,%xmm9,%xmm9
|
||||||
|
vpxor %xmm2,%xmm6,%xmm6
|
||||||
|
vpclmulqdq $0x10,%xmm5,%xmm0,%xmm2
|
||||||
|
vaesenc %xmm15,%xmm10,%xmm10
|
||||||
|
vpxor %xmm3,%xmm7,%xmm7
|
||||||
|
vpclmulqdq $0x01,%xmm5,%xmm0,%xmm3
|
||||||
|
vaesenc %xmm15,%xmm11,%xmm11
|
||||||
|
vpclmulqdq $0x11,%xmm5,%xmm0,%xmm5
|
||||||
|
vmovdqu 80+8(%rsp),%xmm0
|
||||||
|
vaesenc %xmm15,%xmm12,%xmm12
|
||||||
|
vaesenc %xmm15,%xmm13,%xmm13
|
||||||
|
vpxor %xmm1,%xmm4,%xmm4
|
||||||
|
vmovdqu 64-32(%r9),%xmm1
|
||||||
|
vaesenc %xmm15,%xmm14,%xmm14
|
||||||
|
|
||||||
|
vmovups 64-128(%rcx),%xmm15
|
||||||
|
vpxor %xmm2,%xmm6,%xmm6
|
||||||
|
vpclmulqdq $0x00,%xmm1,%xmm0,%xmm2
|
||||||
|
vaesenc %xmm15,%xmm9,%xmm9
|
||||||
|
vpxor %xmm3,%xmm6,%xmm6
|
||||||
|
vpclmulqdq $0x10,%xmm1,%xmm0,%xmm3
|
||||||
|
vaesenc %xmm15,%xmm10,%xmm10
|
||||||
|
movbeq 72(%r14),%r13
|
||||||
|
vpxor %xmm5,%xmm7,%xmm7
|
||||||
|
vpclmulqdq $0x01,%xmm1,%xmm0,%xmm5
|
||||||
|
vaesenc %xmm15,%xmm11,%xmm11
|
||||||
|
movbeq 64(%r14),%r12
|
||||||
|
vpclmulqdq $0x11,%xmm1,%xmm0,%xmm1
|
||||||
|
vmovdqu 96+8(%rsp),%xmm0
|
||||||
|
vaesenc %xmm15,%xmm12,%xmm12
|
||||||
|
movq %r13,48+8(%rsp)
|
||||||
|
vaesenc %xmm15,%xmm13,%xmm13
|
||||||
|
movq %r12,56+8(%rsp)
|
||||||
|
vpxor %xmm2,%xmm4,%xmm4
|
||||||
|
vmovdqu 96-32(%r9),%xmm2
|
||||||
|
vaesenc %xmm15,%xmm14,%xmm14
|
||||||
|
|
||||||
|
vmovups 80-128(%rcx),%xmm15
|
||||||
|
vpxor %xmm3,%xmm6,%xmm6
|
||||||
|
vpclmulqdq $0x00,%xmm2,%xmm0,%xmm3
|
||||||
|
vaesenc %xmm15,%xmm9,%xmm9
|
||||||
|
vpxor %xmm5,%xmm6,%xmm6
|
||||||
|
vpclmulqdq $0x10,%xmm2,%xmm0,%xmm5
|
||||||
|
vaesenc %xmm15,%xmm10,%xmm10
|
||||||
|
movbeq 56(%r14),%r13
|
||||||
|
vpxor %xmm1,%xmm7,%xmm7
|
||||||
|
vpclmulqdq $0x01,%xmm2,%xmm0,%xmm1
|
||||||
|
vpxor 112+8(%rsp),%xmm8,%xmm8
|
||||||
|
vaesenc %xmm15,%xmm11,%xmm11
|
||||||
|
movbeq 48(%r14),%r12
|
||||||
|
vpclmulqdq $0x11,%xmm2,%xmm0,%xmm2
|
||||||
|
vaesenc %xmm15,%xmm12,%xmm12
|
||||||
|
movq %r13,64+8(%rsp)
|
||||||
|
vaesenc %xmm15,%xmm13,%xmm13
|
||||||
|
movq %r12,72+8(%rsp)
|
||||||
|
vpxor %xmm3,%xmm4,%xmm4
|
||||||
|
vmovdqu 112-32(%r9),%xmm3
|
||||||
|
vaesenc %xmm15,%xmm14,%xmm14
|
||||||
|
|
||||||
|
vmovups 96-128(%rcx),%xmm15
|
||||||
|
vpxor %xmm5,%xmm6,%xmm6
|
||||||
|
vpclmulqdq $0x10,%xmm3,%xmm8,%xmm5
|
||||||
|
vaesenc %xmm15,%xmm9,%xmm9
|
||||||
|
vpxor %xmm1,%xmm6,%xmm6
|
||||||
|
vpclmulqdq $0x01,%xmm3,%xmm8,%xmm1
|
||||||
|
vaesenc %xmm15,%xmm10,%xmm10
|
||||||
|
movbeq 40(%r14),%r13
|
||||||
|
vpxor %xmm2,%xmm7,%xmm7
|
||||||
|
vpclmulqdq $0x00,%xmm3,%xmm8,%xmm2
|
||||||
|
vaesenc %xmm15,%xmm11,%xmm11
|
||||||
|
movbeq 32(%r14),%r12
|
||||||
|
vpclmulqdq $0x11,%xmm3,%xmm8,%xmm8
|
||||||
|
vaesenc %xmm15,%xmm12,%xmm12
|
||||||
|
movq %r13,80+8(%rsp)
|
||||||
|
vaesenc %xmm15,%xmm13,%xmm13
|
||||||
|
movq %r12,88+8(%rsp)
|
||||||
|
vpxor %xmm5,%xmm6,%xmm6
|
||||||
|
vaesenc %xmm15,%xmm14,%xmm14
|
||||||
|
vpxor %xmm1,%xmm6,%xmm6
|
||||||
|
|
||||||
|
vmovups 112-128(%rcx),%xmm15
|
||||||
|
vpslldq $8,%xmm6,%xmm5
|
||||||
|
vpxor %xmm2,%xmm4,%xmm4
|
||||||
|
vmovdqu 16(%r11),%xmm3
|
||||||
|
|
||||||
|
vaesenc %xmm15,%xmm9,%xmm9
|
||||||
|
vpxor %xmm8,%xmm7,%xmm7
|
||||||
|
vaesenc %xmm15,%xmm10,%xmm10
|
||||||
|
vpxor %xmm5,%xmm4,%xmm4
|
||||||
|
movbeq 24(%r14),%r13
|
||||||
|
vaesenc %xmm15,%xmm11,%xmm11
|
||||||
|
movbeq 16(%r14),%r12
|
||||||
|
vpalignr $8,%xmm4,%xmm4,%xmm0
|
||||||
|
vpclmulqdq $0x10,%xmm3,%xmm4,%xmm4
|
||||||
|
movq %r13,96+8(%rsp)
|
||||||
|
vaesenc %xmm15,%xmm12,%xmm12
|
||||||
|
movq %r12,104+8(%rsp)
|
||||||
|
vaesenc %xmm15,%xmm13,%xmm13
|
||||||
|
vmovups 128-128(%rcx),%xmm1
|
||||||
|
vaesenc %xmm15,%xmm14,%xmm14
|
||||||
|
|
||||||
|
vaesenc %xmm1,%xmm9,%xmm9
|
||||||
|
vmovups 144-128(%rcx),%xmm15
|
||||||
|
vaesenc %xmm1,%xmm10,%xmm10
|
||||||
|
vpsrldq $8,%xmm6,%xmm6
|
||||||
|
vaesenc %xmm1,%xmm11,%xmm11
|
||||||
|
vpxor %xmm6,%xmm7,%xmm7
|
||||||
|
vaesenc %xmm1,%xmm12,%xmm12
|
||||||
|
vpxor %xmm0,%xmm4,%xmm4
|
||||||
|
movbeq 8(%r14),%r13
|
||||||
|
vaesenc %xmm1,%xmm13,%xmm13
|
||||||
|
movbeq 0(%r14),%r12
|
||||||
|
vaesenc %xmm1,%xmm14,%xmm14
|
||||||
|
vmovups 160-128(%rcx),%xmm1
|
||||||
|
cmpl $11,%ebp
|
||||||
|
jb .Lenc_tail
|
||||||
|
|
||||||
|
vaesenc %xmm15,%xmm9,%xmm9
|
||||||
|
vaesenc %xmm15,%xmm10,%xmm10
|
||||||
|
vaesenc %xmm15,%xmm11,%xmm11
|
||||||
|
vaesenc %xmm15,%xmm12,%xmm12
|
||||||
|
vaesenc %xmm15,%xmm13,%xmm13
|
||||||
|
vaesenc %xmm15,%xmm14,%xmm14
|
||||||
|
|
||||||
|
vaesenc %xmm1,%xmm9,%xmm9
|
||||||
|
vaesenc %xmm1,%xmm10,%xmm10
|
||||||
|
vaesenc %xmm1,%xmm11,%xmm11
|
||||||
|
vaesenc %xmm1,%xmm12,%xmm12
|
||||||
|
vaesenc %xmm1,%xmm13,%xmm13
|
||||||
|
vmovups 176-128(%rcx),%xmm15
|
||||||
|
vaesenc %xmm1,%xmm14,%xmm14
|
||||||
|
vmovups 192-128(%rcx),%xmm1
|
||||||
|
je .Lenc_tail
|
||||||
|
|
||||||
|
vaesenc %xmm15,%xmm9,%xmm9
|
||||||
|
vaesenc %xmm15,%xmm10,%xmm10
|
||||||
|
vaesenc %xmm15,%xmm11,%xmm11
|
||||||
|
vaesenc %xmm15,%xmm12,%xmm12
|
||||||
|
vaesenc %xmm15,%xmm13,%xmm13
|
||||||
|
vaesenc %xmm15,%xmm14,%xmm14
|
||||||
|
|
||||||
|
vaesenc %xmm1,%xmm9,%xmm9
|
||||||
|
vaesenc %xmm1,%xmm10,%xmm10
|
||||||
|
vaesenc %xmm1,%xmm11,%xmm11
|
||||||
|
vaesenc %xmm1,%xmm12,%xmm12
|
||||||
|
vaesenc %xmm1,%xmm13,%xmm13
|
||||||
|
vmovups 208-128(%rcx),%xmm15
|
||||||
|
vaesenc %xmm1,%xmm14,%xmm14
|
||||||
|
vmovups 224-128(%rcx),%xmm1
|
||||||
|
jmp .Lenc_tail
|
||||||
|
|
||||||
|
.align 32
|
||||||
|
.Lhandle_ctr32:
|
||||||
|
vmovdqu (%r11),%xmm0
|
||||||
|
vpshufb %xmm0,%xmm1,%xmm6
|
||||||
|
vmovdqu 48(%r11),%xmm5
|
||||||
|
vpaddd 64(%r11),%xmm6,%xmm10
|
||||||
|
vpaddd %xmm5,%xmm6,%xmm11
|
||||||
|
vmovdqu 0-32(%r9),%xmm3
|
||||||
|
vpaddd %xmm5,%xmm10,%xmm12
|
||||||
|
vpshufb %xmm0,%xmm10,%xmm10
|
||||||
|
vpaddd %xmm5,%xmm11,%xmm13
|
||||||
|
vpshufb %xmm0,%xmm11,%xmm11
|
||||||
|
vpxor %xmm15,%xmm10,%xmm10
|
||||||
|
vpaddd %xmm5,%xmm12,%xmm14
|
||||||
|
vpshufb %xmm0,%xmm12,%xmm12
|
||||||
|
vpxor %xmm15,%xmm11,%xmm11
|
||||||
|
vpaddd %xmm5,%xmm13,%xmm1
|
||||||
|
vpshufb %xmm0,%xmm13,%xmm13
|
||||||
|
vpshufb %xmm0,%xmm14,%xmm14
|
||||||
|
vpshufb %xmm0,%xmm1,%xmm1
|
||||||
|
jmp .Lresume_ctr32
|
||||||
|
|
||||||
|
.align 32
|
||||||
|
.Lenc_tail:
|
||||||
|
vaesenc %xmm15,%xmm9,%xmm9
|
||||||
|
vmovdqu %xmm7,16+8(%rsp)
|
||||||
|
vpalignr $8,%xmm4,%xmm4,%xmm8
|
||||||
|
vaesenc %xmm15,%xmm10,%xmm10
|
||||||
|
vpclmulqdq $0x10,%xmm3,%xmm4,%xmm4
|
||||||
|
vpxor 0(%rdi),%xmm1,%xmm2
|
||||||
|
vaesenc %xmm15,%xmm11,%xmm11
|
||||||
|
vpxor 16(%rdi),%xmm1,%xmm0
|
||||||
|
vaesenc %xmm15,%xmm12,%xmm12
|
||||||
|
vpxor 32(%rdi),%xmm1,%xmm5
|
||||||
|
vaesenc %xmm15,%xmm13,%xmm13
|
||||||
|
vpxor 48(%rdi),%xmm1,%xmm6
|
||||||
|
vaesenc %xmm15,%xmm14,%xmm14
|
||||||
|
vpxor 64(%rdi),%xmm1,%xmm7
|
||||||
|
vpxor 80(%rdi),%xmm1,%xmm3
|
||||||
|
vmovdqu (%r8),%xmm1
|
||||||
|
|
||||||
|
vaesenclast %xmm2,%xmm9,%xmm9
|
||||||
|
vmovdqu 32(%r11),%xmm2
|
||||||
|
vaesenclast %xmm0,%xmm10,%xmm10
|
||||||
|
vpaddb %xmm2,%xmm1,%xmm0
|
||||||
|
movq %r13,112+8(%rsp)
|
||||||
|
leaq 96(%rdi),%rdi
|
||||||
|
vaesenclast %xmm5,%xmm11,%xmm11
|
||||||
|
vpaddb %xmm2,%xmm0,%xmm5
|
||||||
|
movq %r12,120+8(%rsp)
|
||||||
|
leaq 96(%rsi),%rsi
|
||||||
|
vmovdqu 0-128(%rcx),%xmm15
|
||||||
|
vaesenclast %xmm6,%xmm12,%xmm12
|
||||||
|
vpaddb %xmm2,%xmm5,%xmm6
|
||||||
|
vaesenclast %xmm7,%xmm13,%xmm13
|
||||||
|
vpaddb %xmm2,%xmm6,%xmm7
|
||||||
|
vaesenclast %xmm3,%xmm14,%xmm14
|
||||||
|
vpaddb %xmm2,%xmm7,%xmm3
|
||||||
|
|
||||||
|
addq $0x60,%r10
|
||||||
|
subq $0x6,%rdx
|
||||||
|
jc .L6x_done
|
||||||
|
|
||||||
|
vmovups %xmm9,-96(%rsi)
|
||||||
|
vpxor %xmm15,%xmm1,%xmm9
|
||||||
|
vmovups %xmm10,-80(%rsi)
|
||||||
|
vmovdqa %xmm0,%xmm10
|
||||||
|
vmovups %xmm11,-64(%rsi)
|
||||||
|
vmovdqa %xmm5,%xmm11
|
||||||
|
vmovups %xmm12,-48(%rsi)
|
||||||
|
vmovdqa %xmm6,%xmm12
|
||||||
|
vmovups %xmm13,-32(%rsi)
|
||||||
|
vmovdqa %xmm7,%xmm13
|
||||||
|
vmovups %xmm14,-16(%rsi)
|
||||||
|
vmovdqa %xmm3,%xmm14
|
||||||
|
vmovdqu 32+8(%rsp),%xmm7
|
||||||
|
jmp .Loop6x
|
||||||
|
|
||||||
|
.L6x_done:
|
||||||
|
vpxor 16+8(%rsp),%xmm8,%xmm8
|
||||||
|
vpxor %xmm4,%xmm8,%xmm8
|
||||||
|
|
||||||
|
.byte 0xf3,0xc3
|
||||||
|
.size _aesni_ctr32_ghash_6x,.-_aesni_ctr32_ghash_6x
|
||||||
.globl aesni_gcm_decrypt
|
.globl aesni_gcm_decrypt
|
||||||
.type aesni_gcm_decrypt,@function
|
.type aesni_gcm_decrypt,@function
|
||||||
|
.align 32
|
||||||
aesni_gcm_decrypt:
|
aesni_gcm_decrypt:
|
||||||
xorl %eax,%eax
|
xorq %r10,%r10
|
||||||
|
cmpq $0x60,%rdx
|
||||||
|
jb .Lgcm_dec_abort
|
||||||
|
|
||||||
|
leaq (%rsp),%rax
|
||||||
|
pushq %rbx
|
||||||
|
pushq %rbp
|
||||||
|
pushq %r12
|
||||||
|
pushq %r13
|
||||||
|
pushq %r14
|
||||||
|
pushq %r15
|
||||||
|
vzeroupper
|
||||||
|
|
||||||
|
vmovdqu (%r8),%xmm1
|
||||||
|
addq $-128,%rsp
|
||||||
|
movl 12(%r8),%ebx
|
||||||
|
leaq .Lbswap_mask(%rip),%r11
|
||||||
|
leaq -128(%rcx),%r14
|
||||||
|
movq $0xf80,%r15
|
||||||
|
vmovdqu (%r9),%xmm8
|
||||||
|
andq $-128,%rsp
|
||||||
|
vmovdqu (%r11),%xmm0
|
||||||
|
leaq 128(%rcx),%rcx
|
||||||
|
leaq 32+32(%r9),%r9
|
||||||
|
movl 240-128(%rcx),%ebp
|
||||||
|
vpshufb %xmm0,%xmm8,%xmm8
|
||||||
|
|
||||||
|
andq %r15,%r14
|
||||||
|
andq %rsp,%r15
|
||||||
|
subq %r14,%r15
|
||||||
|
jc .Ldec_no_key_aliasing
|
||||||
|
cmpq $768,%r15
|
||||||
|
jnc .Ldec_no_key_aliasing
|
||||||
|
subq %r15,%rsp
|
||||||
|
.Ldec_no_key_aliasing:
|
||||||
|
|
||||||
|
vmovdqu 80(%rdi),%xmm7
|
||||||
|
leaq (%rdi),%r14
|
||||||
|
vmovdqu 64(%rdi),%xmm4
|
||||||
|
leaq -192(%rdi,%rdx,1),%r15
|
||||||
|
vmovdqu 48(%rdi),%xmm5
|
||||||
|
shrq $4,%rdx
|
||||||
|
xorq %r10,%r10
|
||||||
|
vmovdqu 32(%rdi),%xmm6
|
||||||
|
vpshufb %xmm0,%xmm7,%xmm7
|
||||||
|
vmovdqu 16(%rdi),%xmm2
|
||||||
|
vpshufb %xmm0,%xmm4,%xmm4
|
||||||
|
vmovdqu (%rdi),%xmm3
|
||||||
|
vpshufb %xmm0,%xmm5,%xmm5
|
||||||
|
vmovdqu %xmm4,48(%rsp)
|
||||||
|
vpshufb %xmm0,%xmm6,%xmm6
|
||||||
|
vmovdqu %xmm5,64(%rsp)
|
||||||
|
vpshufb %xmm0,%xmm2,%xmm2
|
||||||
|
vmovdqu %xmm6,80(%rsp)
|
||||||
|
vpshufb %xmm0,%xmm3,%xmm3
|
||||||
|
vmovdqu %xmm2,96(%rsp)
|
||||||
|
vmovdqu %xmm3,112(%rsp)
|
||||||
|
|
||||||
|
call _aesni_ctr32_ghash_6x
|
||||||
|
|
||||||
|
vmovups %xmm9,-96(%rsi)
|
||||||
|
vmovups %xmm10,-80(%rsi)
|
||||||
|
vmovups %xmm11,-64(%rsi)
|
||||||
|
vmovups %xmm12,-48(%rsi)
|
||||||
|
vmovups %xmm13,-32(%rsi)
|
||||||
|
vmovups %xmm14,-16(%rsi)
|
||||||
|
|
||||||
|
vpshufb (%r11),%xmm8,%xmm8
|
||||||
|
vmovdqu %xmm8,-64(%r9)
|
||||||
|
|
||||||
|
vzeroupper
|
||||||
|
movq -48(%rax),%r15
|
||||||
|
movq -40(%rax),%r14
|
||||||
|
movq -32(%rax),%r13
|
||||||
|
movq -24(%rax),%r12
|
||||||
|
movq -16(%rax),%rbp
|
||||||
|
movq -8(%rax),%rbx
|
||||||
|
leaq (%rax),%rsp
|
||||||
|
.Lgcm_dec_abort:
|
||||||
|
movq %r10,%rax
|
||||||
.byte 0xf3,0xc3
|
.byte 0xf3,0xc3
|
||||||
.size aesni_gcm_decrypt,.-aesni_gcm_decrypt
|
.size aesni_gcm_decrypt,.-aesni_gcm_decrypt
|
||||||
|
.type _aesni_ctr32_6x,@function
|
||||||
|
.align 32
|
||||||
|
_aesni_ctr32_6x:
|
||||||
|
vmovdqu 0-128(%rcx),%xmm4
|
||||||
|
vmovdqu 32(%r11),%xmm2
|
||||||
|
leaq -1(%rbp),%r13
|
||||||
|
vmovups 16-128(%rcx),%xmm15
|
||||||
|
leaq 32-128(%rcx),%r12
|
||||||
|
vpxor %xmm4,%xmm1,%xmm9
|
||||||
|
addl $100663296,%ebx
|
||||||
|
jc .Lhandle_ctr32_2
|
||||||
|
vpaddb %xmm2,%xmm1,%xmm10
|
||||||
|
vpaddb %xmm2,%xmm10,%xmm11
|
||||||
|
vpxor %xmm4,%xmm10,%xmm10
|
||||||
|
vpaddb %xmm2,%xmm11,%xmm12
|
||||||
|
vpxor %xmm4,%xmm11,%xmm11
|
||||||
|
vpaddb %xmm2,%xmm12,%xmm13
|
||||||
|
vpxor %xmm4,%xmm12,%xmm12
|
||||||
|
vpaddb %xmm2,%xmm13,%xmm14
|
||||||
|
vpxor %xmm4,%xmm13,%xmm13
|
||||||
|
vpaddb %xmm2,%xmm14,%xmm1
|
||||||
|
vpxor %xmm4,%xmm14,%xmm14
|
||||||
|
jmp .Loop_ctr32
|
||||||
|
|
||||||
|
.align 16
|
||||||
|
.Loop_ctr32:
|
||||||
|
vaesenc %xmm15,%xmm9,%xmm9
|
||||||
|
vaesenc %xmm15,%xmm10,%xmm10
|
||||||
|
vaesenc %xmm15,%xmm11,%xmm11
|
||||||
|
vaesenc %xmm15,%xmm12,%xmm12
|
||||||
|
vaesenc %xmm15,%xmm13,%xmm13
|
||||||
|
vaesenc %xmm15,%xmm14,%xmm14
|
||||||
|
vmovups (%r12),%xmm15
|
||||||
|
leaq 16(%r12),%r12
|
||||||
|
decl %r13d
|
||||||
|
jnz .Loop_ctr32
|
||||||
|
|
||||||
|
vmovdqu (%r12),%xmm3
|
||||||
|
vaesenc %xmm15,%xmm9,%xmm9
|
||||||
|
vpxor 0(%rdi),%xmm3,%xmm4
|
||||||
|
vaesenc %xmm15,%xmm10,%xmm10
|
||||||
|
vpxor 16(%rdi),%xmm3,%xmm5
|
||||||
|
vaesenc %xmm15,%xmm11,%xmm11
|
||||||
|
vpxor 32(%rdi),%xmm3,%xmm6
|
||||||
|
vaesenc %xmm15,%xmm12,%xmm12
|
||||||
|
vpxor 48(%rdi),%xmm3,%xmm8
|
||||||
|
vaesenc %xmm15,%xmm13,%xmm13
|
||||||
|
vpxor 64(%rdi),%xmm3,%xmm2
|
||||||
|
vaesenc %xmm15,%xmm14,%xmm14
|
||||||
|
vpxor 80(%rdi),%xmm3,%xmm3
|
||||||
|
leaq 96(%rdi),%rdi
|
||||||
|
|
||||||
|
vaesenclast %xmm4,%xmm9,%xmm9
|
||||||
|
vaesenclast %xmm5,%xmm10,%xmm10
|
||||||
|
vaesenclast %xmm6,%xmm11,%xmm11
|
||||||
|
vaesenclast %xmm8,%xmm12,%xmm12
|
||||||
|
vaesenclast %xmm2,%xmm13,%xmm13
|
||||||
|
vaesenclast %xmm3,%xmm14,%xmm14
|
||||||
|
vmovups %xmm9,0(%rsi)
|
||||||
|
vmovups %xmm10,16(%rsi)
|
||||||
|
vmovups %xmm11,32(%rsi)
|
||||||
|
vmovups %xmm12,48(%rsi)
|
||||||
|
vmovups %xmm13,64(%rsi)
|
||||||
|
vmovups %xmm14,80(%rsi)
|
||||||
|
leaq 96(%rsi),%rsi
|
||||||
|
|
||||||
|
.byte 0xf3,0xc3
|
||||||
|
.align 32
|
||||||
|
.Lhandle_ctr32_2:
|
||||||
|
vpshufb %xmm0,%xmm1,%xmm6
|
||||||
|
vmovdqu 48(%r11),%xmm5
|
||||||
|
vpaddd 64(%r11),%xmm6,%xmm10
|
||||||
|
vpaddd %xmm5,%xmm6,%xmm11
|
||||||
|
vpaddd %xmm5,%xmm10,%xmm12
|
||||||
|
vpshufb %xmm0,%xmm10,%xmm10
|
||||||
|
vpaddd %xmm5,%xmm11,%xmm13
|
||||||
|
vpshufb %xmm0,%xmm11,%xmm11
|
||||||
|
vpxor %xmm4,%xmm10,%xmm10
|
||||||
|
vpaddd %xmm5,%xmm12,%xmm14
|
||||||
|
vpshufb %xmm0,%xmm12,%xmm12
|
||||||
|
vpxor %xmm4,%xmm11,%xmm11
|
||||||
|
vpaddd %xmm5,%xmm13,%xmm1
|
||||||
|
vpshufb %xmm0,%xmm13,%xmm13
|
||||||
|
vpxor %xmm4,%xmm12,%xmm12
|
||||||
|
vpshufb %xmm0,%xmm14,%xmm14
|
||||||
|
vpxor %xmm4,%xmm13,%xmm13
|
||||||
|
vpshufb %xmm0,%xmm1,%xmm1
|
||||||
|
vpxor %xmm4,%xmm14,%xmm14
|
||||||
|
jmp .Loop_ctr32
|
||||||
|
.size _aesni_ctr32_6x,.-_aesni_ctr32_6x
|
||||||
|
|
||||||
|
.globl aesni_gcm_encrypt
|
||||||
|
.type aesni_gcm_encrypt,@function
|
||||||
|
.align 32
|
||||||
|
aesni_gcm_encrypt:
|
||||||
|
xorq %r10,%r10
|
||||||
|
cmpq $288,%rdx
|
||||||
|
jb .Lgcm_enc_abort
|
||||||
|
|
||||||
|
leaq (%rsp),%rax
|
||||||
|
pushq %rbx
|
||||||
|
pushq %rbp
|
||||||
|
pushq %r12
|
||||||
|
pushq %r13
|
||||||
|
pushq %r14
|
||||||
|
pushq %r15
|
||||||
|
vzeroupper
|
||||||
|
|
||||||
|
vmovdqu (%r8),%xmm1
|
||||||
|
addq $-128,%rsp
|
||||||
|
movl 12(%r8),%ebx
|
||||||
|
leaq .Lbswap_mask(%rip),%r11
|
||||||
|
leaq -128(%rcx),%r14
|
||||||
|
movq $0xf80,%r15
|
||||||
|
leaq 128(%rcx),%rcx
|
||||||
|
vmovdqu (%r11),%xmm0
|
||||||
|
andq $-128,%rsp
|
||||||
|
movl 240-128(%rcx),%ebp
|
||||||
|
|
||||||
|
andq %r15,%r14
|
||||||
|
andq %rsp,%r15
|
||||||
|
subq %r14,%r15
|
||||||
|
jc .Lenc_no_key_aliasing
|
||||||
|
cmpq $768,%r15
|
||||||
|
jnc .Lenc_no_key_aliasing
|
||||||
|
subq %r15,%rsp
|
||||||
|
.Lenc_no_key_aliasing:
|
||||||
|
|
||||||
|
leaq (%rsi),%r14
|
||||||
|
leaq -192(%rsi,%rdx,1),%r15
|
||||||
|
shrq $4,%rdx
|
||||||
|
|
||||||
|
call _aesni_ctr32_6x
|
||||||
|
vpshufb %xmm0,%xmm9,%xmm8
|
||||||
|
vpshufb %xmm0,%xmm10,%xmm2
|
||||||
|
vmovdqu %xmm8,112(%rsp)
|
||||||
|
vpshufb %xmm0,%xmm11,%xmm4
|
||||||
|
vmovdqu %xmm2,96(%rsp)
|
||||||
|
vpshufb %xmm0,%xmm12,%xmm5
|
||||||
|
vmovdqu %xmm4,80(%rsp)
|
||||||
|
vpshufb %xmm0,%xmm13,%xmm6
|
||||||
|
vmovdqu %xmm5,64(%rsp)
|
||||||
|
vpshufb %xmm0,%xmm14,%xmm7
|
||||||
|
vmovdqu %xmm6,48(%rsp)
|
||||||
|
|
||||||
|
call _aesni_ctr32_6x
|
||||||
|
|
||||||
|
vmovdqu (%r9),%xmm8
|
||||||
|
leaq 32+32(%r9),%r9
|
||||||
|
subq $12,%rdx
|
||||||
|
movq $192,%r10
|
||||||
|
vpshufb %xmm0,%xmm8,%xmm8
|
||||||
|
|
||||||
|
call _aesni_ctr32_ghash_6x
|
||||||
|
vmovdqu 32(%rsp),%xmm7
|
||||||
|
vmovdqu (%r11),%xmm0
|
||||||
|
vmovdqu 0-32(%r9),%xmm3
|
||||||
|
vpunpckhqdq %xmm7,%xmm7,%xmm1
|
||||||
|
vmovdqu 32-32(%r9),%xmm15
|
||||||
|
vmovups %xmm9,-96(%rsi)
|
||||||
|
vpshufb %xmm0,%xmm9,%xmm9
|
||||||
|
vpxor %xmm7,%xmm1,%xmm1
|
||||||
|
vmovups %xmm10,-80(%rsi)
|
||||||
|
vpshufb %xmm0,%xmm10,%xmm10
|
||||||
|
vmovups %xmm11,-64(%rsi)
|
||||||
|
vpshufb %xmm0,%xmm11,%xmm11
|
||||||
|
vmovups %xmm12,-48(%rsi)
|
||||||
|
vpshufb %xmm0,%xmm12,%xmm12
|
||||||
|
vmovups %xmm13,-32(%rsi)
|
||||||
|
vpshufb %xmm0,%xmm13,%xmm13
|
||||||
|
vmovups %xmm14,-16(%rsi)
|
||||||
|
vpshufb %xmm0,%xmm14,%xmm14
|
||||||
|
vmovdqu %xmm9,16(%rsp)
|
||||||
|
vmovdqu 48(%rsp),%xmm6
|
||||||
|
vmovdqu 16-32(%r9),%xmm0
|
||||||
|
vpunpckhqdq %xmm6,%xmm6,%xmm2
|
||||||
|
vpclmulqdq $0x00,%xmm3,%xmm7,%xmm5
|
||||||
|
vpxor %xmm6,%xmm2,%xmm2
|
||||||
|
vpclmulqdq $0x11,%xmm3,%xmm7,%xmm7
|
||||||
|
vpclmulqdq $0x00,%xmm15,%xmm1,%xmm1
|
||||||
|
|
||||||
|
vmovdqu 64(%rsp),%xmm9
|
||||||
|
vpclmulqdq $0x00,%xmm0,%xmm6,%xmm4
|
||||||
|
vmovdqu 48-32(%r9),%xmm3
|
||||||
|
vpxor %xmm5,%xmm4,%xmm4
|
||||||
|
vpunpckhqdq %xmm9,%xmm9,%xmm5
|
||||||
|
vpclmulqdq $0x11,%xmm0,%xmm6,%xmm6
|
||||||
|
vpxor %xmm9,%xmm5,%xmm5
|
||||||
|
vpxor %xmm7,%xmm6,%xmm6
|
||||||
|
vpclmulqdq $0x10,%xmm15,%xmm2,%xmm2
|
||||||
|
vmovdqu 80-32(%r9),%xmm15
|
||||||
|
vpxor %xmm1,%xmm2,%xmm2
|
||||||
|
|
||||||
|
vmovdqu 80(%rsp),%xmm1
|
||||||
|
vpclmulqdq $0x00,%xmm3,%xmm9,%xmm7
|
||||||
|
vmovdqu 64-32(%r9),%xmm0
|
||||||
|
vpxor %xmm4,%xmm7,%xmm7
|
||||||
|
vpunpckhqdq %xmm1,%xmm1,%xmm4
|
||||||
|
vpclmulqdq $0x11,%xmm3,%xmm9,%xmm9
|
||||||
|
vpxor %xmm1,%xmm4,%xmm4
|
||||||
|
vpxor %xmm6,%xmm9,%xmm9
|
||||||
|
vpclmulqdq $0x00,%xmm15,%xmm5,%xmm5
|
||||||
|
vpxor %xmm2,%xmm5,%xmm5
|
||||||
|
|
||||||
|
vmovdqu 96(%rsp),%xmm2
|
||||||
|
vpclmulqdq $0x00,%xmm0,%xmm1,%xmm6
|
||||||
|
vmovdqu 96-32(%r9),%xmm3
|
||||||
|
vpxor %xmm7,%xmm6,%xmm6
|
||||||
|
vpunpckhqdq %xmm2,%xmm2,%xmm7
|
||||||
|
vpclmulqdq $0x11,%xmm0,%xmm1,%xmm1
|
||||||
|
vpxor %xmm2,%xmm7,%xmm7
|
||||||
|
vpxor %xmm9,%xmm1,%xmm1
|
||||||
|
vpclmulqdq $0x10,%xmm15,%xmm4,%xmm4
|
||||||
|
vmovdqu 128-32(%r9),%xmm15
|
||||||
|
vpxor %xmm5,%xmm4,%xmm4
|
||||||
|
|
||||||
|
vpxor 112(%rsp),%xmm8,%xmm8
|
||||||
|
vpclmulqdq $0x00,%xmm3,%xmm2,%xmm5
|
||||||
|
vmovdqu 112-32(%r9),%xmm0
|
||||||
|
vpunpckhqdq %xmm8,%xmm8,%xmm9
|
||||||
|
vpxor %xmm6,%xmm5,%xmm5
|
||||||
|
vpclmulqdq $0x11,%xmm3,%xmm2,%xmm2
|
||||||
|
vpxor %xmm8,%xmm9,%xmm9
|
||||||
|
vpxor %xmm1,%xmm2,%xmm2
|
||||||
|
vpclmulqdq $0x00,%xmm15,%xmm7,%xmm7
|
||||||
|
vpxor %xmm4,%xmm7,%xmm4
|
||||||
|
|
||||||
|
vpclmulqdq $0x00,%xmm0,%xmm8,%xmm6
|
||||||
|
vmovdqu 0-32(%r9),%xmm3
|
||||||
|
vpunpckhqdq %xmm14,%xmm14,%xmm1
|
||||||
|
vpclmulqdq $0x11,%xmm0,%xmm8,%xmm8
|
||||||
|
vpxor %xmm14,%xmm1,%xmm1
|
||||||
|
vpxor %xmm5,%xmm6,%xmm5
|
||||||
|
vpclmulqdq $0x10,%xmm15,%xmm9,%xmm9
|
||||||
|
vmovdqu 32-32(%r9),%xmm15
|
||||||
|
vpxor %xmm2,%xmm8,%xmm7
|
||||||
|
vpxor %xmm4,%xmm9,%xmm6
|
||||||
|
|
||||||
|
vmovdqu 16-32(%r9),%xmm0
|
||||||
|
vpxor %xmm5,%xmm7,%xmm9
|
||||||
|
vpclmulqdq $0x00,%xmm3,%xmm14,%xmm4
|
||||||
|
vpxor %xmm9,%xmm6,%xmm6
|
||||||
|
vpunpckhqdq %xmm13,%xmm13,%xmm2
|
||||||
|
vpclmulqdq $0x11,%xmm3,%xmm14,%xmm14
|
||||||
|
vpxor %xmm13,%xmm2,%xmm2
|
||||||
|
vpslldq $8,%xmm6,%xmm9
|
||||||
|
vpclmulqdq $0x00,%xmm15,%xmm1,%xmm1
|
||||||
|
vpxor %xmm9,%xmm5,%xmm8
|
||||||
|
vpsrldq $8,%xmm6,%xmm6
|
||||||
|
vpxor %xmm6,%xmm7,%xmm7
|
||||||
|
|
||||||
|
vpclmulqdq $0x00,%xmm0,%xmm13,%xmm5
|
||||||
|
vmovdqu 48-32(%r9),%xmm3
|
||||||
|
vpxor %xmm4,%xmm5,%xmm5
|
||||||
|
vpunpckhqdq %xmm12,%xmm12,%xmm9
|
||||||
|
vpclmulqdq $0x11,%xmm0,%xmm13,%xmm13
|
||||||
|
vpxor %xmm12,%xmm9,%xmm9
|
||||||
|
vpxor %xmm14,%xmm13,%xmm13
|
||||||
|
vpalignr $8,%xmm8,%xmm8,%xmm14
|
||||||
|
vpclmulqdq $0x10,%xmm15,%xmm2,%xmm2
|
||||||
|
vmovdqu 80-32(%r9),%xmm15
|
||||||
|
vpxor %xmm1,%xmm2,%xmm2
|
||||||
|
|
||||||
|
vpclmulqdq $0x00,%xmm3,%xmm12,%xmm4
|
||||||
|
vmovdqu 64-32(%r9),%xmm0
|
||||||
|
vpxor %xmm5,%xmm4,%xmm4
|
||||||
|
vpunpckhqdq %xmm11,%xmm11,%xmm1
|
||||||
|
vpclmulqdq $0x11,%xmm3,%xmm12,%xmm12
|
||||||
|
vpxor %xmm11,%xmm1,%xmm1
|
||||||
|
vpxor %xmm13,%xmm12,%xmm12
|
||||||
|
vxorps 16(%rsp),%xmm7,%xmm7
|
||||||
|
vpclmulqdq $0x00,%xmm15,%xmm9,%xmm9
|
||||||
|
vpxor %xmm2,%xmm9,%xmm9
|
||||||
|
|
||||||
|
vpclmulqdq $0x10,16(%r11),%xmm8,%xmm8
|
||||||
|
vxorps %xmm14,%xmm8,%xmm8
|
||||||
|
|
||||||
|
vpclmulqdq $0x00,%xmm0,%xmm11,%xmm5
|
||||||
|
vmovdqu 96-32(%r9),%xmm3
|
||||||
|
vpxor %xmm4,%xmm5,%xmm5
|
||||||
|
vpunpckhqdq %xmm10,%xmm10,%xmm2
|
||||||
|
vpclmulqdq $0x11,%xmm0,%xmm11,%xmm11
|
||||||
|
vpxor %xmm10,%xmm2,%xmm2
|
||||||
|
vpalignr $8,%xmm8,%xmm8,%xmm14
|
||||||
|
vpxor %xmm12,%xmm11,%xmm11
|
||||||
|
vpclmulqdq $0x10,%xmm15,%xmm1,%xmm1
|
||||||
|
vmovdqu 128-32(%r9),%xmm15
|
||||||
|
vpxor %xmm9,%xmm1,%xmm1
|
||||||
|
|
||||||
|
vxorps %xmm7,%xmm14,%xmm14
|
||||||
|
vpclmulqdq $0x10,16(%r11),%xmm8,%xmm8
|
||||||
|
vxorps %xmm14,%xmm8,%xmm8
|
||||||
|
|
||||||
|
vpclmulqdq $0x00,%xmm3,%xmm10,%xmm4
|
||||||
|
vmovdqu 112-32(%r9),%xmm0
|
||||||
|
vpxor %xmm5,%xmm4,%xmm4
|
||||||
|
vpunpckhqdq %xmm8,%xmm8,%xmm9
|
||||||
|
vpclmulqdq $0x11,%xmm3,%xmm10,%xmm10
|
||||||
|
vpxor %xmm8,%xmm9,%xmm9
|
||||||
|
vpxor %xmm11,%xmm10,%xmm10
|
||||||
|
vpclmulqdq $0x00,%xmm15,%xmm2,%xmm2
|
||||||
|
vpxor %xmm1,%xmm2,%xmm2
|
||||||
|
|
||||||
|
vpclmulqdq $0x00,%xmm0,%xmm8,%xmm5
|
||||||
|
vpclmulqdq $0x11,%xmm0,%xmm8,%xmm7
|
||||||
|
vpxor %xmm4,%xmm5,%xmm5
|
||||||
|
vpclmulqdq $0x10,%xmm15,%xmm9,%xmm6
|
||||||
|
vpxor %xmm10,%xmm7,%xmm7
|
||||||
|
vpxor %xmm2,%xmm6,%xmm6
|
||||||
|
|
||||||
|
vpxor %xmm5,%xmm7,%xmm4
|
||||||
|
vpxor %xmm4,%xmm6,%xmm6
|
||||||
|
vpslldq $8,%xmm6,%xmm1
|
||||||
|
vmovdqu 16(%r11),%xmm3
|
||||||
|
vpsrldq $8,%xmm6,%xmm6
|
||||||
|
vpxor %xmm1,%xmm5,%xmm8
|
||||||
|
vpxor %xmm6,%xmm7,%xmm7
|
||||||
|
|
||||||
|
vpalignr $8,%xmm8,%xmm8,%xmm2
|
||||||
|
vpclmulqdq $0x10,%xmm3,%xmm8,%xmm8
|
||||||
|
vpxor %xmm2,%xmm8,%xmm8
|
||||||
|
|
||||||
|
vpalignr $8,%xmm8,%xmm8,%xmm2
|
||||||
|
vpclmulqdq $0x10,%xmm3,%xmm8,%xmm8
|
||||||
|
vpxor %xmm7,%xmm2,%xmm2
|
||||||
|
vpxor %xmm2,%xmm8,%xmm8
|
||||||
|
vpshufb (%r11),%xmm8,%xmm8
|
||||||
|
vmovdqu %xmm8,-64(%r9)
|
||||||
|
|
||||||
|
vzeroupper
|
||||||
|
movq -48(%rax),%r15
|
||||||
|
movq -40(%rax),%r14
|
||||||
|
movq -32(%rax),%r13
|
||||||
|
movq -24(%rax),%r12
|
||||||
|
movq -16(%rax),%rbp
|
||||||
|
movq -8(%rax),%rbx
|
||||||
|
leaq (%rax),%rsp
|
||||||
|
.Lgcm_enc_abort:
|
||||||
|
movq %r10,%rax
|
||||||
|
.byte 0xf3,0xc3
|
||||||
|
.size aesni_gcm_encrypt,.-aesni_gcm_encrypt
|
||||||
|
.align 64
|
||||||
|
.Lbswap_mask:
|
||||||
|
.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
|
||||||
|
.Lpoly:
|
||||||
|
.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
|
||||||
|
.Lone_msb:
|
||||||
|
.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
|
||||||
|
.Ltwo_lsb:
|
||||||
|
.byte 2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
|
||||||
|
.Lone_lsb:
|
||||||
|
.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
|
||||||
|
.byte 65,69,83,45,78,73,32,71,67,77,32,109,111,100,117,108,101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||||
|
.align 64
|
||||||
|
|
|
@ -7,6 +7,14 @@
|
||||||
.type aesni_multi_cbc_encrypt,@function
|
.type aesni_multi_cbc_encrypt,@function
|
||||||
.align 32
|
.align 32
|
||||||
aesni_multi_cbc_encrypt:
|
aesni_multi_cbc_encrypt:
|
||||||
|
cmpl $2,%edx
|
||||||
|
jb .Lenc_non_avx
|
||||||
|
movl OPENSSL_ia32cap_P+4(%rip),%ecx
|
||||||
|
testl $268435456,%ecx
|
||||||
|
jnz _avx_cbc_enc_shortcut
|
||||||
|
jmp .Lenc_non_avx
|
||||||
|
.align 16
|
||||||
|
.Lenc_non_avx:
|
||||||
movq %rsp,%rax
|
movq %rsp,%rax
|
||||||
pushq %rbx
|
pushq %rbx
|
||||||
pushq %rbp
|
pushq %rbp
|
||||||
|
@ -263,6 +271,14 @@ aesni_multi_cbc_encrypt:
|
||||||
.type aesni_multi_cbc_decrypt,@function
|
.type aesni_multi_cbc_decrypt,@function
|
||||||
.align 32
|
.align 32
|
||||||
aesni_multi_cbc_decrypt:
|
aesni_multi_cbc_decrypt:
|
||||||
|
cmpl $2,%edx
|
||||||
|
jb .Ldec_non_avx
|
||||||
|
movl OPENSSL_ia32cap_P+4(%rip),%ecx
|
||||||
|
testl $268435456,%ecx
|
||||||
|
jnz _avx_cbc_dec_shortcut
|
||||||
|
jmp .Ldec_non_avx
|
||||||
|
.align 16
|
||||||
|
.Ldec_non_avx:
|
||||||
movq %rsp,%rax
|
movq %rsp,%rax
|
||||||
pushq %rbx
|
pushq %rbx
|
||||||
pushq %rbp
|
pushq %rbp
|
||||||
|
@ -505,3 +521,916 @@ aesni_multi_cbc_decrypt:
|
||||||
.Ldec4x_epilogue:
|
.Ldec4x_epilogue:
|
||||||
.byte 0xf3,0xc3
|
.byte 0xf3,0xc3
|
||||||
.size aesni_multi_cbc_decrypt,.-aesni_multi_cbc_decrypt
|
.size aesni_multi_cbc_decrypt,.-aesni_multi_cbc_decrypt
|
||||||
|
.type aesni_multi_cbc_encrypt_avx,@function
|
||||||
|
.align 32
|
||||||
|
aesni_multi_cbc_encrypt_avx:
|
||||||
|
_avx_cbc_enc_shortcut:
|
||||||
|
movq %rsp,%rax
|
||||||
|
pushq %rbx
|
||||||
|
pushq %rbp
|
||||||
|
pushq %r12
|
||||||
|
pushq %r13
|
||||||
|
pushq %r14
|
||||||
|
pushq %r15
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
subq $192,%rsp
|
||||||
|
andq $-128,%rsp
|
||||||
|
movq %rax,16(%rsp)
|
||||||
|
|
||||||
|
.Lenc8x_body:
|
||||||
|
vzeroupper
|
||||||
|
vmovdqu (%rsi),%xmm15
|
||||||
|
leaq 120(%rsi),%rsi
|
||||||
|
leaq 160(%rdi),%rdi
|
||||||
|
shrl $1,%edx
|
||||||
|
|
||||||
|
.Lenc8x_loop_grande:
|
||||||
|
|
||||||
|
xorl %edx,%edx
|
||||||
|
movl -144(%rdi),%ecx
|
||||||
|
movq -160(%rdi),%r8
|
||||||
|
cmpl %edx,%ecx
|
||||||
|
movq -152(%rdi),%rbx
|
||||||
|
cmovgl %ecx,%edx
|
||||||
|
testl %ecx,%ecx
|
||||||
|
vmovdqu -136(%rdi),%xmm2
|
||||||
|
movl %ecx,32(%rsp)
|
||||||
|
cmovleq %rsp,%r8
|
||||||
|
subq %r8,%rbx
|
||||||
|
movq %rbx,64(%rsp)
|
||||||
|
movl -104(%rdi),%ecx
|
||||||
|
movq -120(%rdi),%r9
|
||||||
|
cmpl %edx,%ecx
|
||||||
|
movq -112(%rdi),%rbp
|
||||||
|
cmovgl %ecx,%edx
|
||||||
|
testl %ecx,%ecx
|
||||||
|
vmovdqu -96(%rdi),%xmm3
|
||||||
|
movl %ecx,36(%rsp)
|
||||||
|
cmovleq %rsp,%r9
|
||||||
|
subq %r9,%rbp
|
||||||
|
movq %rbp,72(%rsp)
|
||||||
|
movl -64(%rdi),%ecx
|
||||||
|
movq -80(%rdi),%r10
|
||||||
|
cmpl %edx,%ecx
|
||||||
|
movq -72(%rdi),%rbp
|
||||||
|
cmovgl %ecx,%edx
|
||||||
|
testl %ecx,%ecx
|
||||||
|
vmovdqu -56(%rdi),%xmm4
|
||||||
|
movl %ecx,40(%rsp)
|
||||||
|
cmovleq %rsp,%r10
|
||||||
|
subq %r10,%rbp
|
||||||
|
movq %rbp,80(%rsp)
|
||||||
|
movl -24(%rdi),%ecx
|
||||||
|
movq -40(%rdi),%r11
|
||||||
|
cmpl %edx,%ecx
|
||||||
|
movq -32(%rdi),%rbp
|
||||||
|
cmovgl %ecx,%edx
|
||||||
|
testl %ecx,%ecx
|
||||||
|
vmovdqu -16(%rdi),%xmm5
|
||||||
|
movl %ecx,44(%rsp)
|
||||||
|
cmovleq %rsp,%r11
|
||||||
|
subq %r11,%rbp
|
||||||
|
movq %rbp,88(%rsp)
|
||||||
|
movl 16(%rdi),%ecx
|
||||||
|
movq 0(%rdi),%r12
|
||||||
|
cmpl %edx,%ecx
|
||||||
|
movq 8(%rdi),%rbp
|
||||||
|
cmovgl %ecx,%edx
|
||||||
|
testl %ecx,%ecx
|
||||||
|
vmovdqu 24(%rdi),%xmm6
|
||||||
|
movl %ecx,48(%rsp)
|
||||||
|
cmovleq %rsp,%r12
|
||||||
|
subq %r12,%rbp
|
||||||
|
movq %rbp,96(%rsp)
|
||||||
|
movl 56(%rdi),%ecx
|
||||||
|
movq 40(%rdi),%r13
|
||||||
|
cmpl %edx,%ecx
|
||||||
|
movq 48(%rdi),%rbp
|
||||||
|
cmovgl %ecx,%edx
|
||||||
|
testl %ecx,%ecx
|
||||||
|
vmovdqu 64(%rdi),%xmm7
|
||||||
|
movl %ecx,52(%rsp)
|
||||||
|
cmovleq %rsp,%r13
|
||||||
|
subq %r13,%rbp
|
||||||
|
movq %rbp,104(%rsp)
|
||||||
|
movl 96(%rdi),%ecx
|
||||||
|
movq 80(%rdi),%r14
|
||||||
|
cmpl %edx,%ecx
|
||||||
|
movq 88(%rdi),%rbp
|
||||||
|
cmovgl %ecx,%edx
|
||||||
|
testl %ecx,%ecx
|
||||||
|
vmovdqu 104(%rdi),%xmm8
|
||||||
|
movl %ecx,56(%rsp)
|
||||||
|
cmovleq %rsp,%r14
|
||||||
|
subq %r14,%rbp
|
||||||
|
movq %rbp,112(%rsp)
|
||||||
|
movl 136(%rdi),%ecx
|
||||||
|
movq 120(%rdi),%r15
|
||||||
|
cmpl %edx,%ecx
|
||||||
|
movq 128(%rdi),%rbp
|
||||||
|
cmovgl %ecx,%edx
|
||||||
|
testl %ecx,%ecx
|
||||||
|
vmovdqu 144(%rdi),%xmm9
|
||||||
|
movl %ecx,60(%rsp)
|
||||||
|
cmovleq %rsp,%r15
|
||||||
|
subq %r15,%rbp
|
||||||
|
movq %rbp,120(%rsp)
|
||||||
|
testl %edx,%edx
|
||||||
|
jz .Lenc8x_done
|
||||||
|
|
||||||
|
vmovups 16-120(%rsi),%xmm1
|
||||||
|
vmovups 32-120(%rsi),%xmm0
|
||||||
|
movl 240-120(%rsi),%eax
|
||||||
|
|
||||||
|
vpxor (%r8),%xmm15,%xmm10
|
||||||
|
leaq 128(%rsp),%rbp
|
||||||
|
vpxor (%r9),%xmm15,%xmm11
|
||||||
|
vpxor (%r10),%xmm15,%xmm12
|
||||||
|
vpxor (%r11),%xmm15,%xmm13
|
||||||
|
vpxor %xmm10,%xmm2,%xmm2
|
||||||
|
vpxor (%r12),%xmm15,%xmm10
|
||||||
|
vpxor %xmm11,%xmm3,%xmm3
|
||||||
|
vpxor (%r13),%xmm15,%xmm11
|
||||||
|
vpxor %xmm12,%xmm4,%xmm4
|
||||||
|
vpxor (%r14),%xmm15,%xmm12
|
||||||
|
vpxor %xmm13,%xmm5,%xmm5
|
||||||
|
vpxor (%r15),%xmm15,%xmm13
|
||||||
|
vpxor %xmm10,%xmm6,%xmm6
|
||||||
|
movl $1,%ecx
|
||||||
|
vpxor %xmm11,%xmm7,%xmm7
|
||||||
|
vpxor %xmm12,%xmm8,%xmm8
|
||||||
|
vpxor %xmm13,%xmm9,%xmm9
|
||||||
|
jmp .Loop_enc8x
|
||||||
|
|
||||||
|
.align 32
|
||||||
|
.Loop_enc8x:
|
||||||
|
vaesenc %xmm1,%xmm2,%xmm2
|
||||||
|
cmpl 32+0(%rsp),%ecx
|
||||||
|
vaesenc %xmm1,%xmm3,%xmm3
|
||||||
|
prefetcht0 31(%r8)
|
||||||
|
vaesenc %xmm1,%xmm4,%xmm4
|
||||||
|
vaesenc %xmm1,%xmm5,%xmm5
|
||||||
|
leaq (%r8,%rbx,1),%rbx
|
||||||
|
cmovgeq %rsp,%r8
|
||||||
|
vaesenc %xmm1,%xmm6,%xmm6
|
||||||
|
cmovgq %rsp,%rbx
|
||||||
|
vaesenc %xmm1,%xmm7,%xmm7
|
||||||
|
subq %r8,%rbx
|
||||||
|
vaesenc %xmm1,%xmm8,%xmm8
|
||||||
|
vpxor 16(%r8),%xmm15,%xmm10
|
||||||
|
movq %rbx,64+0(%rsp)
|
||||||
|
vaesenc %xmm1,%xmm9,%xmm9
|
||||||
|
vmovups -72(%rsi),%xmm1
|
||||||
|
leaq 16(%r8,%rbx,1),%r8
|
||||||
|
vmovdqu %xmm10,0(%rbp)
|
||||||
|
vaesenc %xmm0,%xmm2,%xmm2
|
||||||
|
cmpl 32+4(%rsp),%ecx
|
||||||
|
movq 64+8(%rsp),%rbx
|
||||||
|
vaesenc %xmm0,%xmm3,%xmm3
|
||||||
|
prefetcht0 31(%r9)
|
||||||
|
vaesenc %xmm0,%xmm4,%xmm4
|
||||||
|
vaesenc %xmm0,%xmm5,%xmm5
|
||||||
|
leaq (%r9,%rbx,1),%rbx
|
||||||
|
cmovgeq %rsp,%r9
|
||||||
|
vaesenc %xmm0,%xmm6,%xmm6
|
||||||
|
cmovgq %rsp,%rbx
|
||||||
|
vaesenc %xmm0,%xmm7,%xmm7
|
||||||
|
subq %r9,%rbx
|
||||||
|
vaesenc %xmm0,%xmm8,%xmm8
|
||||||
|
vpxor 16(%r9),%xmm15,%xmm11
|
||||||
|
movq %rbx,64+8(%rsp)
|
||||||
|
vaesenc %xmm0,%xmm9,%xmm9
|
||||||
|
vmovups -56(%rsi),%xmm0
|
||||||
|
leaq 16(%r9,%rbx,1),%r9
|
||||||
|
vmovdqu %xmm11,16(%rbp)
|
||||||
|
vaesenc %xmm1,%xmm2,%xmm2
|
||||||
|
cmpl 32+8(%rsp),%ecx
|
||||||
|
movq 64+16(%rsp),%rbx
|
||||||
|
vaesenc %xmm1,%xmm3,%xmm3
|
||||||
|
prefetcht0 31(%r10)
|
||||||
|
vaesenc %xmm1,%xmm4,%xmm4
|
||||||
|
prefetcht0 15(%r8)
|
||||||
|
vaesenc %xmm1,%xmm5,%xmm5
|
||||||
|
leaq (%r10,%rbx,1),%rbx
|
||||||
|
cmovgeq %rsp,%r10
|
||||||
|
vaesenc %xmm1,%xmm6,%xmm6
|
||||||
|
cmovgq %rsp,%rbx
|
||||||
|
vaesenc %xmm1,%xmm7,%xmm7
|
||||||
|
subq %r10,%rbx
|
||||||
|
vaesenc %xmm1,%xmm8,%xmm8
|
||||||
|
vpxor 16(%r10),%xmm15,%xmm12
|
||||||
|
movq %rbx,64+16(%rsp)
|
||||||
|
vaesenc %xmm1,%xmm9,%xmm9
|
||||||
|
vmovups -40(%rsi),%xmm1
|
||||||
|
leaq 16(%r10,%rbx,1),%r10
|
||||||
|
vmovdqu %xmm12,32(%rbp)
|
||||||
|
vaesenc %xmm0,%xmm2,%xmm2
|
||||||
|
cmpl 32+12(%rsp),%ecx
|
||||||
|
movq 64+24(%rsp),%rbx
|
||||||
|
vaesenc %xmm0,%xmm3,%xmm3
|
||||||
|
prefetcht0 31(%r11)
|
||||||
|
vaesenc %xmm0,%xmm4,%xmm4
|
||||||
|
prefetcht0 15(%r9)
|
||||||
|
vaesenc %xmm0,%xmm5,%xmm5
|
||||||
|
leaq (%r11,%rbx,1),%rbx
|
||||||
|
cmovgeq %rsp,%r11
|
||||||
|
vaesenc %xmm0,%xmm6,%xmm6
|
||||||
|
cmovgq %rsp,%rbx
|
||||||
|
vaesenc %xmm0,%xmm7,%xmm7
|
||||||
|
subq %r11,%rbx
|
||||||
|
vaesenc %xmm0,%xmm8,%xmm8
|
||||||
|
vpxor 16(%r11),%xmm15,%xmm13
|
||||||
|
movq %rbx,64+24(%rsp)
|
||||||
|
vaesenc %xmm0,%xmm9,%xmm9
|
||||||
|
vmovups -24(%rsi),%xmm0
|
||||||
|
leaq 16(%r11,%rbx,1),%r11
|
||||||
|
vmovdqu %xmm13,48(%rbp)
|
||||||
|
vaesenc %xmm1,%xmm2,%xmm2
|
||||||
|
cmpl 32+16(%rsp),%ecx
|
||||||
|
movq 64+32(%rsp),%rbx
|
||||||
|
vaesenc %xmm1,%xmm3,%xmm3
|
||||||
|
prefetcht0 31(%r12)
|
||||||
|
vaesenc %xmm1,%xmm4,%xmm4
|
||||||
|
prefetcht0 15(%r10)
|
||||||
|
vaesenc %xmm1,%xmm5,%xmm5
|
||||||
|
leaq (%r12,%rbx,1),%rbx
|
||||||
|
cmovgeq %rsp,%r12
|
||||||
|
vaesenc %xmm1,%xmm6,%xmm6
|
||||||
|
cmovgq %rsp,%rbx
|
||||||
|
vaesenc %xmm1,%xmm7,%xmm7
|
||||||
|
subq %r12,%rbx
|
||||||
|
vaesenc %xmm1,%xmm8,%xmm8
|
||||||
|
vpxor 16(%r12),%xmm15,%xmm10
|
||||||
|
movq %rbx,64+32(%rsp)
|
||||||
|
vaesenc %xmm1,%xmm9,%xmm9
|
||||||
|
vmovups -8(%rsi),%xmm1
|
||||||
|
leaq 16(%r12,%rbx,1),%r12
|
||||||
|
vaesenc %xmm0,%xmm2,%xmm2
|
||||||
|
cmpl 32+20(%rsp),%ecx
|
||||||
|
movq 64+40(%rsp),%rbx
|
||||||
|
vaesenc %xmm0,%xmm3,%xmm3
|
||||||
|
prefetcht0 31(%r13)
|
||||||
|
vaesenc %xmm0,%xmm4,%xmm4
|
||||||
|
prefetcht0 15(%r11)
|
||||||
|
vaesenc %xmm0,%xmm5,%xmm5
|
||||||
|
leaq (%rbx,%r13,1),%rbx
|
||||||
|
cmovgeq %rsp,%r13
|
||||||
|
vaesenc %xmm0,%xmm6,%xmm6
|
||||||
|
cmovgq %rsp,%rbx
|
||||||
|
vaesenc %xmm0,%xmm7,%xmm7
|
||||||
|
subq %r13,%rbx
|
||||||
|
vaesenc %xmm0,%xmm8,%xmm8
|
||||||
|
vpxor 16(%r13),%xmm15,%xmm11
|
||||||
|
movq %rbx,64+40(%rsp)
|
||||||
|
vaesenc %xmm0,%xmm9,%xmm9
|
||||||
|
vmovups 8(%rsi),%xmm0
|
||||||
|
leaq 16(%r13,%rbx,1),%r13
|
||||||
|
vaesenc %xmm1,%xmm2,%xmm2
|
||||||
|
cmpl 32+24(%rsp),%ecx
|
||||||
|
movq 64+48(%rsp),%rbx
|
||||||
|
vaesenc %xmm1,%xmm3,%xmm3
|
||||||
|
prefetcht0 31(%r14)
|
||||||
|
vaesenc %xmm1,%xmm4,%xmm4
|
||||||
|
prefetcht0 15(%r12)
|
||||||
|
vaesenc %xmm1,%xmm5,%xmm5
|
||||||
|
leaq (%r14,%rbx,1),%rbx
|
||||||
|
cmovgeq %rsp,%r14
|
||||||
|
vaesenc %xmm1,%xmm6,%xmm6
|
||||||
|
cmovgq %rsp,%rbx
|
||||||
|
vaesenc %xmm1,%xmm7,%xmm7
|
||||||
|
subq %r14,%rbx
|
||||||
|
vaesenc %xmm1,%xmm8,%xmm8
|
||||||
|
vpxor 16(%r14),%xmm15,%xmm12
|
||||||
|
movq %rbx,64+48(%rsp)
|
||||||
|
vaesenc %xmm1,%xmm9,%xmm9
|
||||||
|
vmovups 24(%rsi),%xmm1
|
||||||
|
leaq 16(%r14,%rbx,1),%r14
|
||||||
|
vaesenc %xmm0,%xmm2,%xmm2
|
||||||
|
cmpl 32+28(%rsp),%ecx
|
||||||
|
movq 64+56(%rsp),%rbx
|
||||||
|
vaesenc %xmm0,%xmm3,%xmm3
|
||||||
|
prefetcht0 31(%r15)
|
||||||
|
vaesenc %xmm0,%xmm4,%xmm4
|
||||||
|
prefetcht0 15(%r13)
|
||||||
|
vaesenc %xmm0,%xmm5,%xmm5
|
||||||
|
leaq (%r15,%rbx,1),%rbx
|
||||||
|
cmovgeq %rsp,%r15
|
||||||
|
vaesenc %xmm0,%xmm6,%xmm6
|
||||||
|
cmovgq %rsp,%rbx
|
||||||
|
vaesenc %xmm0,%xmm7,%xmm7
|
||||||
|
subq %r15,%rbx
|
||||||
|
vaesenc %xmm0,%xmm8,%xmm8
|
||||||
|
vpxor 16(%r15),%xmm15,%xmm13
|
||||||
|
movq %rbx,64+56(%rsp)
|
||||||
|
vaesenc %xmm0,%xmm9,%xmm9
|
||||||
|
vmovups 40(%rsi),%xmm0
|
||||||
|
leaq 16(%r15,%rbx,1),%r15
|
||||||
|
vmovdqu 32(%rsp),%xmm14
|
||||||
|
prefetcht0 15(%r14)
|
||||||
|
prefetcht0 15(%r15)
|
||||||
|
cmpl $11,%eax
|
||||||
|
jb .Lenc8x_tail
|
||||||
|
|
||||||
|
vaesenc %xmm1,%xmm2,%xmm2
|
||||||
|
vaesenc %xmm1,%xmm3,%xmm3
|
||||||
|
vaesenc %xmm1,%xmm4,%xmm4
|
||||||
|
vaesenc %xmm1,%xmm5,%xmm5
|
||||||
|
vaesenc %xmm1,%xmm6,%xmm6
|
||||||
|
vaesenc %xmm1,%xmm7,%xmm7
|
||||||
|
vaesenc %xmm1,%xmm8,%xmm8
|
||||||
|
vaesenc %xmm1,%xmm9,%xmm9
|
||||||
|
vmovups 176-120(%rsi),%xmm1
|
||||||
|
|
||||||
|
vaesenc %xmm0,%xmm2,%xmm2
|
||||||
|
vaesenc %xmm0,%xmm3,%xmm3
|
||||||
|
vaesenc %xmm0,%xmm4,%xmm4
|
||||||
|
vaesenc %xmm0,%xmm5,%xmm5
|
||||||
|
vaesenc %xmm0,%xmm6,%xmm6
|
||||||
|
vaesenc %xmm0,%xmm7,%xmm7
|
||||||
|
vaesenc %xmm0,%xmm8,%xmm8
|
||||||
|
vaesenc %xmm0,%xmm9,%xmm9
|
||||||
|
vmovups 192-120(%rsi),%xmm0
|
||||||
|
je .Lenc8x_tail
|
||||||
|
|
||||||
|
vaesenc %xmm1,%xmm2,%xmm2
|
||||||
|
vaesenc %xmm1,%xmm3,%xmm3
|
||||||
|
vaesenc %xmm1,%xmm4,%xmm4
|
||||||
|
vaesenc %xmm1,%xmm5,%xmm5
|
||||||
|
vaesenc %xmm1,%xmm6,%xmm6
|
||||||
|
vaesenc %xmm1,%xmm7,%xmm7
|
||||||
|
vaesenc %xmm1,%xmm8,%xmm8
|
||||||
|
vaesenc %xmm1,%xmm9,%xmm9
|
||||||
|
vmovups 208-120(%rsi),%xmm1
|
||||||
|
|
||||||
|
vaesenc %xmm0,%xmm2,%xmm2
|
||||||
|
vaesenc %xmm0,%xmm3,%xmm3
|
||||||
|
vaesenc %xmm0,%xmm4,%xmm4
|
||||||
|
vaesenc %xmm0,%xmm5,%xmm5
|
||||||
|
vaesenc %xmm0,%xmm6,%xmm6
|
||||||
|
vaesenc %xmm0,%xmm7,%xmm7
|
||||||
|
vaesenc %xmm0,%xmm8,%xmm8
|
||||||
|
vaesenc %xmm0,%xmm9,%xmm9
|
||||||
|
vmovups 224-120(%rsi),%xmm0
|
||||||
|
|
||||||
|
.Lenc8x_tail:
|
||||||
|
vaesenc %xmm1,%xmm2,%xmm2
|
||||||
|
vpxor %xmm15,%xmm15,%xmm15
|
||||||
|
vaesenc %xmm1,%xmm3,%xmm3
|
||||||
|
vaesenc %xmm1,%xmm4,%xmm4
|
||||||
|
vpcmpgtd %xmm15,%xmm14,%xmm15
|
||||||
|
vaesenc %xmm1,%xmm5,%xmm5
|
||||||
|
vaesenc %xmm1,%xmm6,%xmm6
|
||||||
|
vpaddd %xmm14,%xmm15,%xmm15
|
||||||
|
vmovdqu 48(%rsp),%xmm14
|
||||||
|
vaesenc %xmm1,%xmm7,%xmm7
|
||||||
|
movq 64(%rsp),%rbx
|
||||||
|
vaesenc %xmm1,%xmm8,%xmm8
|
||||||
|
vaesenc %xmm1,%xmm9,%xmm9
|
||||||
|
vmovups 16-120(%rsi),%xmm1
|
||||||
|
|
||||||
|
vaesenclast %xmm0,%xmm2,%xmm2
|
||||||
|
vmovdqa %xmm15,32(%rsp)
|
||||||
|
vpxor %xmm15,%xmm15,%xmm15
|
||||||
|
vaesenclast %xmm0,%xmm3,%xmm3
|
||||||
|
vaesenclast %xmm0,%xmm4,%xmm4
|
||||||
|
vpcmpgtd %xmm15,%xmm14,%xmm15
|
||||||
|
vaesenclast %xmm0,%xmm5,%xmm5
|
||||||
|
vaesenclast %xmm0,%xmm6,%xmm6
|
||||||
|
vpaddd %xmm15,%xmm14,%xmm14
|
||||||
|
vmovdqu -120(%rsi),%xmm15
|
||||||
|
vaesenclast %xmm0,%xmm7,%xmm7
|
||||||
|
vaesenclast %xmm0,%xmm8,%xmm8
|
||||||
|
vmovdqa %xmm14,48(%rsp)
|
||||||
|
vaesenclast %xmm0,%xmm9,%xmm9
|
||||||
|
vmovups 32-120(%rsi),%xmm0
|
||||||
|
|
||||||
|
vmovups %xmm2,-16(%r8)
|
||||||
|
subq %rbx,%r8
|
||||||
|
vpxor 0(%rbp),%xmm2,%xmm2
|
||||||
|
vmovups %xmm3,-16(%r9)
|
||||||
|
subq 72(%rsp),%r9
|
||||||
|
vpxor 16(%rbp),%xmm3,%xmm3
|
||||||
|
vmovups %xmm4,-16(%r10)
|
||||||
|
subq 80(%rsp),%r10
|
||||||
|
vpxor 32(%rbp),%xmm4,%xmm4
|
||||||
|
vmovups %xmm5,-16(%r11)
|
||||||
|
subq 88(%rsp),%r11
|
||||||
|
vpxor 48(%rbp),%xmm5,%xmm5
|
||||||
|
vmovups %xmm6,-16(%r12)
|
||||||
|
subq 96(%rsp),%r12
|
||||||
|
vpxor %xmm10,%xmm6,%xmm6
|
||||||
|
vmovups %xmm7,-16(%r13)
|
||||||
|
subq 104(%rsp),%r13
|
||||||
|
vpxor %xmm11,%xmm7,%xmm7
|
||||||
|
vmovups %xmm8,-16(%r14)
|
||||||
|
subq 112(%rsp),%r14
|
||||||
|
vpxor %xmm12,%xmm8,%xmm8
|
||||||
|
vmovups %xmm9,-16(%r15)
|
||||||
|
subq 120(%rsp),%r15
|
||||||
|
vpxor %xmm13,%xmm9,%xmm9
|
||||||
|
|
||||||
|
decl %edx
|
||||||
|
jnz .Loop_enc8x
|
||||||
|
|
||||||
|
movq 16(%rsp),%rax
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
.Lenc8x_done:
|
||||||
|
vzeroupper
|
||||||
|
movq -48(%rax),%r15
|
||||||
|
movq -40(%rax),%r14
|
||||||
|
movq -32(%rax),%r13
|
||||||
|
movq -24(%rax),%r12
|
||||||
|
movq -16(%rax),%rbp
|
||||||
|
movq -8(%rax),%rbx
|
||||||
|
leaq (%rax),%rsp
|
||||||
|
.Lenc8x_epilogue:
|
||||||
|
.byte 0xf3,0xc3
|
||||||
|
.size aesni_multi_cbc_encrypt_avx,.-aesni_multi_cbc_encrypt_avx
|
||||||
|
|
||||||
|
.type aesni_multi_cbc_decrypt_avx,@function
|
||||||
|
.align 32
|
||||||
|
aesni_multi_cbc_decrypt_avx:
|
||||||
|
_avx_cbc_dec_shortcut:
|
||||||
|
movq %rsp,%rax
|
||||||
|
pushq %rbx
|
||||||
|
pushq %rbp
|
||||||
|
pushq %r12
|
||||||
|
pushq %r13
|
||||||
|
pushq %r14
|
||||||
|
pushq %r15
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
subq $256,%rsp
|
||||||
|
andq $-256,%rsp
|
||||||
|
subq $192,%rsp
|
||||||
|
movq %rax,16(%rsp)
|
||||||
|
|
||||||
|
.Ldec8x_body:
|
||||||
|
vzeroupper
|
||||||
|
vmovdqu (%rsi),%xmm15
|
||||||
|
leaq 120(%rsi),%rsi
|
||||||
|
leaq 160(%rdi),%rdi
|
||||||
|
shrl $1,%edx
|
||||||
|
|
||||||
|
.Ldec8x_loop_grande:
|
||||||
|
|
||||||
|
xorl %edx,%edx
|
||||||
|
movl -144(%rdi),%ecx
|
||||||
|
movq -160(%rdi),%r8
|
||||||
|
cmpl %edx,%ecx
|
||||||
|
movq -152(%rdi),%rbx
|
||||||
|
cmovgl %ecx,%edx
|
||||||
|
testl %ecx,%ecx
|
||||||
|
vmovdqu -136(%rdi),%xmm2
|
||||||
|
movl %ecx,32(%rsp)
|
||||||
|
cmovleq %rsp,%r8
|
||||||
|
subq %r8,%rbx
|
||||||
|
movq %rbx,64(%rsp)
|
||||||
|
vmovdqu %xmm2,192(%rsp)
|
||||||
|
movl -104(%rdi),%ecx
|
||||||
|
movq -120(%rdi),%r9
|
||||||
|
cmpl %edx,%ecx
|
||||||
|
movq -112(%rdi),%rbp
|
||||||
|
cmovgl %ecx,%edx
|
||||||
|
testl %ecx,%ecx
|
||||||
|
vmovdqu -96(%rdi),%xmm3
|
||||||
|
movl %ecx,36(%rsp)
|
||||||
|
cmovleq %rsp,%r9
|
||||||
|
subq %r9,%rbp
|
||||||
|
movq %rbp,72(%rsp)
|
||||||
|
vmovdqu %xmm3,208(%rsp)
|
||||||
|
movl -64(%rdi),%ecx
|
||||||
|
movq -80(%rdi),%r10
|
||||||
|
cmpl %edx,%ecx
|
||||||
|
movq -72(%rdi),%rbp
|
||||||
|
cmovgl %ecx,%edx
|
||||||
|
testl %ecx,%ecx
|
||||||
|
vmovdqu -56(%rdi),%xmm4
|
||||||
|
movl %ecx,40(%rsp)
|
||||||
|
cmovleq %rsp,%r10
|
||||||
|
subq %r10,%rbp
|
||||||
|
movq %rbp,80(%rsp)
|
||||||
|
vmovdqu %xmm4,224(%rsp)
|
||||||
|
movl -24(%rdi),%ecx
|
||||||
|
movq -40(%rdi),%r11
|
||||||
|
cmpl %edx,%ecx
|
||||||
|
movq -32(%rdi),%rbp
|
||||||
|
cmovgl %ecx,%edx
|
||||||
|
testl %ecx,%ecx
|
||||||
|
vmovdqu -16(%rdi),%xmm5
|
||||||
|
movl %ecx,44(%rsp)
|
||||||
|
cmovleq %rsp,%r11
|
||||||
|
subq %r11,%rbp
|
||||||
|
movq %rbp,88(%rsp)
|
||||||
|
vmovdqu %xmm5,240(%rsp)
|
||||||
|
movl 16(%rdi),%ecx
|
||||||
|
movq 0(%rdi),%r12
|
||||||
|
cmpl %edx,%ecx
|
||||||
|
movq 8(%rdi),%rbp
|
||||||
|
cmovgl %ecx,%edx
|
||||||
|
testl %ecx,%ecx
|
||||||
|
vmovdqu 24(%rdi),%xmm6
|
||||||
|
movl %ecx,48(%rsp)
|
||||||
|
cmovleq %rsp,%r12
|
||||||
|
subq %r12,%rbp
|
||||||
|
movq %rbp,96(%rsp)
|
||||||
|
vmovdqu %xmm6,256(%rsp)
|
||||||
|
movl 56(%rdi),%ecx
|
||||||
|
movq 40(%rdi),%r13
|
||||||
|
cmpl %edx,%ecx
|
||||||
|
movq 48(%rdi),%rbp
|
||||||
|
cmovgl %ecx,%edx
|
||||||
|
testl %ecx,%ecx
|
||||||
|
vmovdqu 64(%rdi),%xmm7
|
||||||
|
movl %ecx,52(%rsp)
|
||||||
|
cmovleq %rsp,%r13
|
||||||
|
subq %r13,%rbp
|
||||||
|
movq %rbp,104(%rsp)
|
||||||
|
vmovdqu %xmm7,272(%rsp)
|
||||||
|
movl 96(%rdi),%ecx
|
||||||
|
movq 80(%rdi),%r14
|
||||||
|
cmpl %edx,%ecx
|
||||||
|
movq 88(%rdi),%rbp
|
||||||
|
cmovgl %ecx,%edx
|
||||||
|
testl %ecx,%ecx
|
||||||
|
vmovdqu 104(%rdi),%xmm8
|
||||||
|
movl %ecx,56(%rsp)
|
||||||
|
cmovleq %rsp,%r14
|
||||||
|
subq %r14,%rbp
|
||||||
|
movq %rbp,112(%rsp)
|
||||||
|
vmovdqu %xmm8,288(%rsp)
|
||||||
|
movl 136(%rdi),%ecx
|
||||||
|
movq 120(%rdi),%r15
|
||||||
|
cmpl %edx,%ecx
|
||||||
|
movq 128(%rdi),%rbp
|
||||||
|
cmovgl %ecx,%edx
|
||||||
|
testl %ecx,%ecx
|
||||||
|
vmovdqu 144(%rdi),%xmm9
|
||||||
|
movl %ecx,60(%rsp)
|
||||||
|
cmovleq %rsp,%r15
|
||||||
|
subq %r15,%rbp
|
||||||
|
movq %rbp,120(%rsp)
|
||||||
|
vmovdqu %xmm9,304(%rsp)
|
||||||
|
testl %edx,%edx
|
||||||
|
jz .Ldec8x_done
|
||||||
|
|
||||||
|
vmovups 16-120(%rsi),%xmm1
|
||||||
|
vmovups 32-120(%rsi),%xmm0
|
||||||
|
movl 240-120(%rsi),%eax
|
||||||
|
leaq 192+128(%rsp),%rbp
|
||||||
|
|
||||||
|
vmovdqu (%r8),%xmm2
|
||||||
|
vmovdqu (%r9),%xmm3
|
||||||
|
vmovdqu (%r10),%xmm4
|
||||||
|
vmovdqu (%r11),%xmm5
|
||||||
|
vmovdqu (%r12),%xmm6
|
||||||
|
vmovdqu (%r13),%xmm7
|
||||||
|
vmovdqu (%r14),%xmm8
|
||||||
|
vmovdqu (%r15),%xmm9
|
||||||
|
vmovdqu %xmm2,0(%rbp)
|
||||||
|
vpxor %xmm15,%xmm2,%xmm2
|
||||||
|
vmovdqu %xmm3,16(%rbp)
|
||||||
|
vpxor %xmm15,%xmm3,%xmm3
|
||||||
|
vmovdqu %xmm4,32(%rbp)
|
||||||
|
vpxor %xmm15,%xmm4,%xmm4
|
||||||
|
vmovdqu %xmm5,48(%rbp)
|
||||||
|
vpxor %xmm15,%xmm5,%xmm5
|
||||||
|
vmovdqu %xmm6,64(%rbp)
|
||||||
|
vpxor %xmm15,%xmm6,%xmm6
|
||||||
|
vmovdqu %xmm7,80(%rbp)
|
||||||
|
vpxor %xmm15,%xmm7,%xmm7
|
||||||
|
vmovdqu %xmm8,96(%rbp)
|
||||||
|
vpxor %xmm15,%xmm8,%xmm8
|
||||||
|
vmovdqu %xmm9,112(%rbp)
|
||||||
|
vpxor %xmm15,%xmm9,%xmm9
|
||||||
|
xorq $0x80,%rbp
|
||||||
|
movl $1,%ecx
|
||||||
|
jmp .Loop_dec8x
|
||||||
|
|
||||||
|
.align 32
|
||||||
|
.Loop_dec8x:
|
||||||
|
vaesdec %xmm1,%xmm2,%xmm2
|
||||||
|
cmpl 32+0(%rsp),%ecx
|
||||||
|
vaesdec %xmm1,%xmm3,%xmm3
|
||||||
|
prefetcht0 31(%r8)
|
||||||
|
vaesdec %xmm1,%xmm4,%xmm4
|
||||||
|
vaesdec %xmm1,%xmm5,%xmm5
|
||||||
|
leaq (%r8,%rbx,1),%rbx
|
||||||
|
cmovgeq %rsp,%r8
|
||||||
|
vaesdec %xmm1,%xmm6,%xmm6
|
||||||
|
cmovgq %rsp,%rbx
|
||||||
|
vaesdec %xmm1,%xmm7,%xmm7
|
||||||
|
subq %r8,%rbx
|
||||||
|
vaesdec %xmm1,%xmm8,%xmm8
|
||||||
|
vmovdqu 16(%r8),%xmm10
|
||||||
|
movq %rbx,64+0(%rsp)
|
||||||
|
vaesdec %xmm1,%xmm9,%xmm9
|
||||||
|
vmovups -72(%rsi),%xmm1
|
||||||
|
leaq 16(%r8,%rbx,1),%r8
|
||||||
|
vmovdqu %xmm10,128(%rsp)
|
||||||
|
vaesdec %xmm0,%xmm2,%xmm2
|
||||||
|
cmpl 32+4(%rsp),%ecx
|
||||||
|
movq 64+8(%rsp),%rbx
|
||||||
|
vaesdec %xmm0,%xmm3,%xmm3
|
||||||
|
prefetcht0 31(%r9)
|
||||||
|
vaesdec %xmm0,%xmm4,%xmm4
|
||||||
|
vaesdec %xmm0,%xmm5,%xmm5
|
||||||
|
leaq (%r9,%rbx,1),%rbx
|
||||||
|
cmovgeq %rsp,%r9
|
||||||
|
vaesdec %xmm0,%xmm6,%xmm6
|
||||||
|
cmovgq %rsp,%rbx
|
||||||
|
vaesdec %xmm0,%xmm7,%xmm7
|
||||||
|
subq %r9,%rbx
|
||||||
|
vaesdec %xmm0,%xmm8,%xmm8
|
||||||
|
vmovdqu 16(%r9),%xmm11
|
||||||
|
movq %rbx,64+8(%rsp)
|
||||||
|
vaesdec %xmm0,%xmm9,%xmm9
|
||||||
|
vmovups -56(%rsi),%xmm0
|
||||||
|
leaq 16(%r9,%rbx,1),%r9
|
||||||
|
vmovdqu %xmm11,144(%rsp)
|
||||||
|
vaesdec %xmm1,%xmm2,%xmm2
|
||||||
|
cmpl 32+8(%rsp),%ecx
|
||||||
|
movq 64+16(%rsp),%rbx
|
||||||
|
vaesdec %xmm1,%xmm3,%xmm3
|
||||||
|
prefetcht0 31(%r10)
|
||||||
|
vaesdec %xmm1,%xmm4,%xmm4
|
||||||
|
prefetcht0 15(%r8)
|
||||||
|
vaesdec %xmm1,%xmm5,%xmm5
|
||||||
|
leaq (%r10,%rbx,1),%rbx
|
||||||
|
cmovgeq %rsp,%r10
|
||||||
|
vaesdec %xmm1,%xmm6,%xmm6
|
||||||
|
cmovgq %rsp,%rbx
|
||||||
|
vaesdec %xmm1,%xmm7,%xmm7
|
||||||
|
subq %r10,%rbx
|
||||||
|
vaesdec %xmm1,%xmm8,%xmm8
|
||||||
|
vmovdqu 16(%r10),%xmm12
|
||||||
|
movq %rbx,64+16(%rsp)
|
||||||
|
vaesdec %xmm1,%xmm9,%xmm9
|
||||||
|
vmovups -40(%rsi),%xmm1
|
||||||
|
leaq 16(%r10,%rbx,1),%r10
|
||||||
|
vmovdqu %xmm12,160(%rsp)
|
||||||
|
vaesdec %xmm0,%xmm2,%xmm2
|
||||||
|
cmpl 32+12(%rsp),%ecx
|
||||||
|
movq 64+24(%rsp),%rbx
|
||||||
|
vaesdec %xmm0,%xmm3,%xmm3
|
||||||
|
prefetcht0 31(%r11)
|
||||||
|
vaesdec %xmm0,%xmm4,%xmm4
|
||||||
|
prefetcht0 15(%r9)
|
||||||
|
vaesdec %xmm0,%xmm5,%xmm5
|
||||||
|
leaq (%r11,%rbx,1),%rbx
|
||||||
|
cmovgeq %rsp,%r11
|
||||||
|
vaesdec %xmm0,%xmm6,%xmm6
|
||||||
|
cmovgq %rsp,%rbx
|
||||||
|
vaesdec %xmm0,%xmm7,%xmm7
|
||||||
|
subq %r11,%rbx
|
||||||
|
vaesdec %xmm0,%xmm8,%xmm8
|
||||||
|
vmovdqu 16(%r11),%xmm13
|
||||||
|
movq %rbx,64+24(%rsp)
|
||||||
|
vaesdec %xmm0,%xmm9,%xmm9
|
||||||
|
vmovups -24(%rsi),%xmm0
|
||||||
|
leaq 16(%r11,%rbx,1),%r11
|
||||||
|
vmovdqu %xmm13,176(%rsp)
|
||||||
|
vaesdec %xmm1,%xmm2,%xmm2
|
||||||
|
cmpl 32+16(%rsp),%ecx
|
||||||
|
movq 64+32(%rsp),%rbx
|
||||||
|
vaesdec %xmm1,%xmm3,%xmm3
|
||||||
|
prefetcht0 31(%r12)
|
||||||
|
vaesdec %xmm1,%xmm4,%xmm4
|
||||||
|
prefetcht0 15(%r10)
|
||||||
|
vaesdec %xmm1,%xmm5,%xmm5
|
||||||
|
leaq (%r12,%rbx,1),%rbx
|
||||||
|
cmovgeq %rsp,%r12
|
||||||
|
vaesdec %xmm1,%xmm6,%xmm6
|
||||||
|
cmovgq %rsp,%rbx
|
||||||
|
vaesdec %xmm1,%xmm7,%xmm7
|
||||||
|
subq %r12,%rbx
|
||||||
|
vaesdec %xmm1,%xmm8,%xmm8
|
||||||
|
vmovdqu 16(%r12),%xmm10
|
||||||
|
movq %rbx,64+32(%rsp)
|
||||||
|
vaesdec %xmm1,%xmm9,%xmm9
|
||||||
|
vmovups -8(%rsi),%xmm1
|
||||||
|
leaq 16(%r12,%rbx,1),%r12
|
||||||
|
vaesdec %xmm0,%xmm2,%xmm2
|
||||||
|
cmpl 32+20(%rsp),%ecx
|
||||||
|
movq 64+40(%rsp),%rbx
|
||||||
|
vaesdec %xmm0,%xmm3,%xmm3
|
||||||
|
prefetcht0 31(%r13)
|
||||||
|
vaesdec %xmm0,%xmm4,%xmm4
|
||||||
|
prefetcht0 15(%r11)
|
||||||
|
vaesdec %xmm0,%xmm5,%xmm5
|
||||||
|
leaq (%rbx,%r13,1),%rbx
|
||||||
|
cmovgeq %rsp,%r13
|
||||||
|
vaesdec %xmm0,%xmm6,%xmm6
|
||||||
|
cmovgq %rsp,%rbx
|
||||||
|
vaesdec %xmm0,%xmm7,%xmm7
|
||||||
|
subq %r13,%rbx
|
||||||
|
vaesdec %xmm0,%xmm8,%xmm8
|
||||||
|
vmovdqu 16(%r13),%xmm11
|
||||||
|
movq %rbx,64+40(%rsp)
|
||||||
|
vaesdec %xmm0,%xmm9,%xmm9
|
||||||
|
vmovups 8(%rsi),%xmm0
|
||||||
|
leaq 16(%r13,%rbx,1),%r13
|
||||||
|
vaesdec %xmm1,%xmm2,%xmm2
|
||||||
|
cmpl 32+24(%rsp),%ecx
|
||||||
|
movq 64+48(%rsp),%rbx
|
||||||
|
vaesdec %xmm1,%xmm3,%xmm3
|
||||||
|
prefetcht0 31(%r14)
|
||||||
|
vaesdec %xmm1,%xmm4,%xmm4
|
||||||
|
prefetcht0 15(%r12)
|
||||||
|
vaesdec %xmm1,%xmm5,%xmm5
|
||||||
|
leaq (%r14,%rbx,1),%rbx
|
||||||
|
cmovgeq %rsp,%r14
|
||||||
|
vaesdec %xmm1,%xmm6,%xmm6
|
||||||
|
cmovgq %rsp,%rbx
|
||||||
|
vaesdec %xmm1,%xmm7,%xmm7
|
||||||
|
subq %r14,%rbx
|
||||||
|
vaesdec %xmm1,%xmm8,%xmm8
|
||||||
|
vmovdqu 16(%r14),%xmm12
|
||||||
|
movq %rbx,64+48(%rsp)
|
||||||
|
vaesdec %xmm1,%xmm9,%xmm9
|
||||||
|
vmovups 24(%rsi),%xmm1
|
||||||
|
leaq 16(%r14,%rbx,1),%r14
|
||||||
|
vaesdec %xmm0,%xmm2,%xmm2
|
||||||
|
cmpl 32+28(%rsp),%ecx
|
||||||
|
movq 64+56(%rsp),%rbx
|
||||||
|
vaesdec %xmm0,%xmm3,%xmm3
|
||||||
|
prefetcht0 31(%r15)
|
||||||
|
vaesdec %xmm0,%xmm4,%xmm4
|
||||||
|
prefetcht0 15(%r13)
|
||||||
|
vaesdec %xmm0,%xmm5,%xmm5
|
||||||
|
leaq (%r15,%rbx,1),%rbx
|
||||||
|
cmovgeq %rsp,%r15
|
||||||
|
vaesdec %xmm0,%xmm6,%xmm6
|
||||||
|
cmovgq %rsp,%rbx
|
||||||
|
vaesdec %xmm0,%xmm7,%xmm7
|
||||||
|
subq %r15,%rbx
|
||||||
|
vaesdec %xmm0,%xmm8,%xmm8
|
||||||
|
vmovdqu 16(%r15),%xmm13
|
||||||
|
movq %rbx,64+56(%rsp)
|
||||||
|
vaesdec %xmm0,%xmm9,%xmm9
|
||||||
|
vmovups 40(%rsi),%xmm0
|
||||||
|
leaq 16(%r15,%rbx,1),%r15
|
||||||
|
vmovdqu 32(%rsp),%xmm14
|
||||||
|
prefetcht0 15(%r14)
|
||||||
|
prefetcht0 15(%r15)
|
||||||
|
cmpl $11,%eax
|
||||||
|
jb .Ldec8x_tail
|
||||||
|
|
||||||
|
vaesdec %xmm1,%xmm2,%xmm2
|
||||||
|
vaesdec %xmm1,%xmm3,%xmm3
|
||||||
|
vaesdec %xmm1,%xmm4,%xmm4
|
||||||
|
vaesdec %xmm1,%xmm5,%xmm5
|
||||||
|
vaesdec %xmm1,%xmm6,%xmm6
|
||||||
|
vaesdec %xmm1,%xmm7,%xmm7
|
||||||
|
vaesdec %xmm1,%xmm8,%xmm8
|
||||||
|
vaesdec %xmm1,%xmm9,%xmm9
|
||||||
|
vmovups 176-120(%rsi),%xmm1
|
||||||
|
|
||||||
|
vaesdec %xmm0,%xmm2,%xmm2
|
||||||
|
vaesdec %xmm0,%xmm3,%xmm3
|
||||||
|
vaesdec %xmm0,%xmm4,%xmm4
|
||||||
|
vaesdec %xmm0,%xmm5,%xmm5
|
||||||
|
vaesdec %xmm0,%xmm6,%xmm6
|
||||||
|
vaesdec %xmm0,%xmm7,%xmm7
|
||||||
|
vaesdec %xmm0,%xmm8,%xmm8
|
||||||
|
vaesdec %xmm0,%xmm9,%xmm9
|
||||||
|
vmovups 192-120(%rsi),%xmm0
|
||||||
|
je .Ldec8x_tail
|
||||||
|
|
||||||
|
vaesdec %xmm1,%xmm2,%xmm2
|
||||||
|
vaesdec %xmm1,%xmm3,%xmm3
|
||||||
|
vaesdec %xmm1,%xmm4,%xmm4
|
||||||
|
vaesdec %xmm1,%xmm5,%xmm5
|
||||||
|
vaesdec %xmm1,%xmm6,%xmm6
|
||||||
|
vaesdec %xmm1,%xmm7,%xmm7
|
||||||
|
vaesdec %xmm1,%xmm8,%xmm8
|
||||||
|
vaesdec %xmm1,%xmm9,%xmm9
|
||||||
|
vmovups 208-120(%rsi),%xmm1
|
||||||
|
|
||||||
|
vaesdec %xmm0,%xmm2,%xmm2
|
||||||
|
vaesdec %xmm0,%xmm3,%xmm3
|
||||||
|
vaesdec %xmm0,%xmm4,%xmm4
|
||||||
|
vaesdec %xmm0,%xmm5,%xmm5
|
||||||
|
vaesdec %xmm0,%xmm6,%xmm6
|
||||||
|
vaesdec %xmm0,%xmm7,%xmm7
|
||||||
|
vaesdec %xmm0,%xmm8,%xmm8
|
||||||
|
vaesdec %xmm0,%xmm9,%xmm9
|
||||||
|
vmovups 224-120(%rsi),%xmm0
|
||||||
|
|
||||||
|
.Ldec8x_tail:
|
||||||
|
vaesdec %xmm1,%xmm2,%xmm2
|
||||||
|
vpxor %xmm15,%xmm15,%xmm15
|
||||||
|
vaesdec %xmm1,%xmm3,%xmm3
|
||||||
|
vaesdec %xmm1,%xmm4,%xmm4
|
||||||
|
vpcmpgtd %xmm15,%xmm14,%xmm15
|
||||||
|
vaesdec %xmm1,%xmm5,%xmm5
|
||||||
|
vaesdec %xmm1,%xmm6,%xmm6
|
||||||
|
vpaddd %xmm14,%xmm15,%xmm15
|
||||||
|
vmovdqu 48(%rsp),%xmm14
|
||||||
|
vaesdec %xmm1,%xmm7,%xmm7
|
||||||
|
movq 64(%rsp),%rbx
|
||||||
|
vaesdec %xmm1,%xmm8,%xmm8
|
||||||
|
vaesdec %xmm1,%xmm9,%xmm9
|
||||||
|
vmovups 16-120(%rsi),%xmm1
|
||||||
|
|
||||||
|
vaesdeclast %xmm0,%xmm2,%xmm2
|
||||||
|
vmovdqa %xmm15,32(%rsp)
|
||||||
|
vpxor %xmm15,%xmm15,%xmm15
|
||||||
|
vaesdeclast %xmm0,%xmm3,%xmm3
|
||||||
|
vpxor 0(%rbp),%xmm2,%xmm2
|
||||||
|
vaesdeclast %xmm0,%xmm4,%xmm4
|
||||||
|
vpxor 16(%rbp),%xmm3,%xmm3
|
||||||
|
vpcmpgtd %xmm15,%xmm14,%xmm15
|
||||||
|
vaesdeclast %xmm0,%xmm5,%xmm5
|
||||||
|
vpxor 32(%rbp),%xmm4,%xmm4
|
||||||
|
vaesdeclast %xmm0,%xmm6,%xmm6
|
||||||
|
vpxor 48(%rbp),%xmm5,%xmm5
|
||||||
|
vpaddd %xmm15,%xmm14,%xmm14
|
||||||
|
vmovdqu -120(%rsi),%xmm15
|
||||||
|
vaesdeclast %xmm0,%xmm7,%xmm7
|
||||||
|
vpxor 64(%rbp),%xmm6,%xmm6
|
||||||
|
vaesdeclast %xmm0,%xmm8,%xmm8
|
||||||
|
vpxor 80(%rbp),%xmm7,%xmm7
|
||||||
|
vmovdqa %xmm14,48(%rsp)
|
||||||
|
vaesdeclast %xmm0,%xmm9,%xmm9
|
||||||
|
vpxor 96(%rbp),%xmm8,%xmm8
|
||||||
|
vmovups 32-120(%rsi),%xmm0
|
||||||
|
|
||||||
|
vmovups %xmm2,-16(%r8)
|
||||||
|
subq %rbx,%r8
|
||||||
|
vmovdqu 128+0(%rsp),%xmm2
|
||||||
|
vpxor 112(%rbp),%xmm9,%xmm9
|
||||||
|
vmovups %xmm3,-16(%r9)
|
||||||
|
subq 72(%rsp),%r9
|
||||||
|
vmovdqu %xmm2,0(%rbp)
|
||||||
|
vpxor %xmm15,%xmm2,%xmm2
|
||||||
|
vmovdqu 128+16(%rsp),%xmm3
|
||||||
|
vmovups %xmm4,-16(%r10)
|
||||||
|
subq 80(%rsp),%r10
|
||||||
|
vmovdqu %xmm3,16(%rbp)
|
||||||
|
vpxor %xmm15,%xmm3,%xmm3
|
||||||
|
vmovdqu 128+32(%rsp),%xmm4
|
||||||
|
vmovups %xmm5,-16(%r11)
|
||||||
|
subq 88(%rsp),%r11
|
||||||
|
vmovdqu %xmm4,32(%rbp)
|
||||||
|
vpxor %xmm15,%xmm4,%xmm4
|
||||||
|
vmovdqu 128+48(%rsp),%xmm5
|
||||||
|
vmovups %xmm6,-16(%r12)
|
||||||
|
subq 96(%rsp),%r12
|
||||||
|
vmovdqu %xmm5,48(%rbp)
|
||||||
|
vpxor %xmm15,%xmm5,%xmm5
|
||||||
|
vmovdqu %xmm10,64(%rbp)
|
||||||
|
vpxor %xmm10,%xmm15,%xmm6
|
||||||
|
vmovups %xmm7,-16(%r13)
|
||||||
|
subq 104(%rsp),%r13
|
||||||
|
vmovdqu %xmm11,80(%rbp)
|
||||||
|
vpxor %xmm11,%xmm15,%xmm7
|
||||||
|
vmovups %xmm8,-16(%r14)
|
||||||
|
subq 112(%rsp),%r14
|
||||||
|
vmovdqu %xmm12,96(%rbp)
|
||||||
|
vpxor %xmm12,%xmm15,%xmm8
|
||||||
|
vmovups %xmm9,-16(%r15)
|
||||||
|
subq 120(%rsp),%r15
|
||||||
|
vmovdqu %xmm13,112(%rbp)
|
||||||
|
vpxor %xmm13,%xmm15,%xmm9
|
||||||
|
|
||||||
|
xorq $128,%rbp
|
||||||
|
decl %edx
|
||||||
|
jnz .Loop_dec8x
|
||||||
|
|
||||||
|
movq 16(%rsp),%rax
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
.Ldec8x_done:
|
||||||
|
vzeroupper
|
||||||
|
movq -48(%rax),%r15
|
||||||
|
movq -40(%rax),%r14
|
||||||
|
movq -32(%rax),%r13
|
||||||
|
movq -24(%rax),%r12
|
||||||
|
movq -16(%rax),%rbp
|
||||||
|
movq -8(%rax),%rbx
|
||||||
|
leaq (%rax),%rsp
|
||||||
|
.Ldec8x_epilogue:
|
||||||
|
.byte 0xf3,0xc3
|
||||||
|
.size aesni_multi_cbc_decrypt_avx,.-aesni_multi_cbc_decrypt_avx
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -1250,7 +1250,108 @@ gcm_ghash_clmul:
|
||||||
.type gcm_init_avx,@function
|
.type gcm_init_avx,@function
|
||||||
.align 32
|
.align 32
|
||||||
gcm_init_avx:
|
gcm_init_avx:
|
||||||
jmp .L_init_clmul
|
vzeroupper
|
||||||
|
|
||||||
|
vmovdqu (%rsi),%xmm2
|
||||||
|
vpshufd $78,%xmm2,%xmm2
|
||||||
|
|
||||||
|
|
||||||
|
vpshufd $255,%xmm2,%xmm4
|
||||||
|
vpsrlq $63,%xmm2,%xmm3
|
||||||
|
vpsllq $1,%xmm2,%xmm2
|
||||||
|
vpxor %xmm5,%xmm5,%xmm5
|
||||||
|
vpcmpgtd %xmm4,%xmm5,%xmm5
|
||||||
|
vpslldq $8,%xmm3,%xmm3
|
||||||
|
vpor %xmm3,%xmm2,%xmm2
|
||||||
|
|
||||||
|
|
||||||
|
vpand .L0x1c2_polynomial(%rip),%xmm5,%xmm5
|
||||||
|
vpxor %xmm5,%xmm2,%xmm2
|
||||||
|
|
||||||
|
vpunpckhqdq %xmm2,%xmm2,%xmm6
|
||||||
|
vmovdqa %xmm2,%xmm0
|
||||||
|
vpxor %xmm2,%xmm6,%xmm6
|
||||||
|
movq $4,%r10
|
||||||
|
jmp .Linit_start_avx
|
||||||
|
.align 32
|
||||||
|
.Linit_loop_avx:
|
||||||
|
vpalignr $8,%xmm3,%xmm4,%xmm5
|
||||||
|
vmovdqu %xmm5,-16(%rdi)
|
||||||
|
vpunpckhqdq %xmm0,%xmm0,%xmm3
|
||||||
|
vpxor %xmm0,%xmm3,%xmm3
|
||||||
|
vpclmulqdq $0x11,%xmm2,%xmm0,%xmm1
|
||||||
|
vpclmulqdq $0x00,%xmm2,%xmm0,%xmm0
|
||||||
|
vpclmulqdq $0x00,%xmm6,%xmm3,%xmm3
|
||||||
|
vpxor %xmm0,%xmm1,%xmm4
|
||||||
|
vpxor %xmm4,%xmm3,%xmm3
|
||||||
|
|
||||||
|
vpslldq $8,%xmm3,%xmm4
|
||||||
|
vpsrldq $8,%xmm3,%xmm3
|
||||||
|
vpxor %xmm4,%xmm0,%xmm0
|
||||||
|
vpxor %xmm3,%xmm1,%xmm1
|
||||||
|
vpsllq $57,%xmm0,%xmm3
|
||||||
|
vpsllq $62,%xmm0,%xmm4
|
||||||
|
vpxor %xmm3,%xmm4,%xmm4
|
||||||
|
vpsllq $63,%xmm0,%xmm3
|
||||||
|
vpxor %xmm3,%xmm4,%xmm4
|
||||||
|
vpslldq $8,%xmm4,%xmm3
|
||||||
|
vpsrldq $8,%xmm4,%xmm4
|
||||||
|
vpxor %xmm3,%xmm0,%xmm0
|
||||||
|
vpxor %xmm4,%xmm1,%xmm1
|
||||||
|
|
||||||
|
vpsrlq $1,%xmm0,%xmm4
|
||||||
|
vpxor %xmm0,%xmm1,%xmm1
|
||||||
|
vpxor %xmm4,%xmm0,%xmm0
|
||||||
|
vpsrlq $5,%xmm4,%xmm4
|
||||||
|
vpxor %xmm4,%xmm0,%xmm0
|
||||||
|
vpsrlq $1,%xmm0,%xmm0
|
||||||
|
vpxor %xmm1,%xmm0,%xmm0
|
||||||
|
.Linit_start_avx:
|
||||||
|
vmovdqa %xmm0,%xmm5
|
||||||
|
vpunpckhqdq %xmm0,%xmm0,%xmm3
|
||||||
|
vpxor %xmm0,%xmm3,%xmm3
|
||||||
|
vpclmulqdq $0x11,%xmm2,%xmm0,%xmm1
|
||||||
|
vpclmulqdq $0x00,%xmm2,%xmm0,%xmm0
|
||||||
|
vpclmulqdq $0x00,%xmm6,%xmm3,%xmm3
|
||||||
|
vpxor %xmm0,%xmm1,%xmm4
|
||||||
|
vpxor %xmm4,%xmm3,%xmm3
|
||||||
|
|
||||||
|
vpslldq $8,%xmm3,%xmm4
|
||||||
|
vpsrldq $8,%xmm3,%xmm3
|
||||||
|
vpxor %xmm4,%xmm0,%xmm0
|
||||||
|
vpxor %xmm3,%xmm1,%xmm1
|
||||||
|
vpsllq $57,%xmm0,%xmm3
|
||||||
|
vpsllq $62,%xmm0,%xmm4
|
||||||
|
vpxor %xmm3,%xmm4,%xmm4
|
||||||
|
vpsllq $63,%xmm0,%xmm3
|
||||||
|
vpxor %xmm3,%xmm4,%xmm4
|
||||||
|
vpslldq $8,%xmm4,%xmm3
|
||||||
|
vpsrldq $8,%xmm4,%xmm4
|
||||||
|
vpxor %xmm3,%xmm0,%xmm0
|
||||||
|
vpxor %xmm4,%xmm1,%xmm1
|
||||||
|
|
||||||
|
vpsrlq $1,%xmm0,%xmm4
|
||||||
|
vpxor %xmm0,%xmm1,%xmm1
|
||||||
|
vpxor %xmm4,%xmm0,%xmm0
|
||||||
|
vpsrlq $5,%xmm4,%xmm4
|
||||||
|
vpxor %xmm4,%xmm0,%xmm0
|
||||||
|
vpsrlq $1,%xmm0,%xmm0
|
||||||
|
vpxor %xmm1,%xmm0,%xmm0
|
||||||
|
vpshufd $78,%xmm5,%xmm3
|
||||||
|
vpshufd $78,%xmm0,%xmm4
|
||||||
|
vpxor %xmm5,%xmm3,%xmm3
|
||||||
|
vmovdqu %xmm5,0(%rdi)
|
||||||
|
vpxor %xmm0,%xmm4,%xmm4
|
||||||
|
vmovdqu %xmm0,16(%rdi)
|
||||||
|
leaq 48(%rdi),%rdi
|
||||||
|
subq $1,%r10
|
||||||
|
jnz .Linit_loop_avx
|
||||||
|
|
||||||
|
vpalignr $8,%xmm4,%xmm3,%xmm5
|
||||||
|
vmovdqu %xmm5,-16(%rdi)
|
||||||
|
|
||||||
|
vzeroupper
|
||||||
|
.byte 0xf3,0xc3
|
||||||
.size gcm_init_avx,.-gcm_init_avx
|
.size gcm_init_avx,.-gcm_init_avx
|
||||||
.globl gcm_gmult_avx
|
.globl gcm_gmult_avx
|
||||||
.type gcm_gmult_avx,@function
|
.type gcm_gmult_avx,@function
|
||||||
|
@ -1262,7 +1363,377 @@ gcm_gmult_avx:
|
||||||
.type gcm_ghash_avx,@function
|
.type gcm_ghash_avx,@function
|
||||||
.align 32
|
.align 32
|
||||||
gcm_ghash_avx:
|
gcm_ghash_avx:
|
||||||
jmp .L_ghash_clmul
|
vzeroupper
|
||||||
|
|
||||||
|
vmovdqu (%rdi),%xmm10
|
||||||
|
leaq .L0x1c2_polynomial(%rip),%r10
|
||||||
|
leaq 64(%rsi),%rsi
|
||||||
|
vmovdqu .Lbswap_mask(%rip),%xmm13
|
||||||
|
vpshufb %xmm13,%xmm10,%xmm10
|
||||||
|
cmpq $0x80,%rcx
|
||||||
|
jb .Lshort_avx
|
||||||
|
subq $0x80,%rcx
|
||||||
|
|
||||||
|
vmovdqu 112(%rdx),%xmm14
|
||||||
|
vmovdqu 0-64(%rsi),%xmm6
|
||||||
|
vpshufb %xmm13,%xmm14,%xmm14
|
||||||
|
vmovdqu 32-64(%rsi),%xmm7
|
||||||
|
|
||||||
|
vpunpckhqdq %xmm14,%xmm14,%xmm9
|
||||||
|
vmovdqu 96(%rdx),%xmm15
|
||||||
|
vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0
|
||||||
|
vpxor %xmm14,%xmm9,%xmm9
|
||||||
|
vpshufb %xmm13,%xmm15,%xmm15
|
||||||
|
vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1
|
||||||
|
vmovdqu 16-64(%rsi),%xmm6
|
||||||
|
vpunpckhqdq %xmm15,%xmm15,%xmm8
|
||||||
|
vmovdqu 80(%rdx),%xmm14
|
||||||
|
vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2
|
||||||
|
vpxor %xmm15,%xmm8,%xmm8
|
||||||
|
|
||||||
|
vpshufb %xmm13,%xmm14,%xmm14
|
||||||
|
vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3
|
||||||
|
vpunpckhqdq %xmm14,%xmm14,%xmm9
|
||||||
|
vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4
|
||||||
|
vmovdqu 48-64(%rsi),%xmm6
|
||||||
|
vpxor %xmm14,%xmm9,%xmm9
|
||||||
|
vmovdqu 64(%rdx),%xmm15
|
||||||
|
vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5
|
||||||
|
vmovdqu 80-64(%rsi),%xmm7
|
||||||
|
|
||||||
|
vpshufb %xmm13,%xmm15,%xmm15
|
||||||
|
vpxor %xmm0,%xmm3,%xmm3
|
||||||
|
vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0
|
||||||
|
vpxor %xmm1,%xmm4,%xmm4
|
||||||
|
vpunpckhqdq %xmm15,%xmm15,%xmm8
|
||||||
|
vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1
|
||||||
|
vmovdqu 64-64(%rsi),%xmm6
|
||||||
|
vpxor %xmm2,%xmm5,%xmm5
|
||||||
|
vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2
|
||||||
|
vpxor %xmm15,%xmm8,%xmm8
|
||||||
|
|
||||||
|
vmovdqu 48(%rdx),%xmm14
|
||||||
|
vpxor %xmm3,%xmm0,%xmm0
|
||||||
|
vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3
|
||||||
|
vpxor %xmm4,%xmm1,%xmm1
|
||||||
|
vpshufb %xmm13,%xmm14,%xmm14
|
||||||
|
vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4
|
||||||
|
vmovdqu 96-64(%rsi),%xmm6
|
||||||
|
vpxor %xmm5,%xmm2,%xmm2
|
||||||
|
vpunpckhqdq %xmm14,%xmm14,%xmm9
|
||||||
|
vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5
|
||||||
|
vmovdqu 128-64(%rsi),%xmm7
|
||||||
|
vpxor %xmm14,%xmm9,%xmm9
|
||||||
|
|
||||||
|
vmovdqu 32(%rdx),%xmm15
|
||||||
|
vpxor %xmm0,%xmm3,%xmm3
|
||||||
|
vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0
|
||||||
|
vpxor %xmm1,%xmm4,%xmm4
|
||||||
|
vpshufb %xmm13,%xmm15,%xmm15
|
||||||
|
vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1
|
||||||
|
vmovdqu 112-64(%rsi),%xmm6
|
||||||
|
vpxor %xmm2,%xmm5,%xmm5
|
||||||
|
vpunpckhqdq %xmm15,%xmm15,%xmm8
|
||||||
|
vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2
|
||||||
|
vpxor %xmm15,%xmm8,%xmm8
|
||||||
|
|
||||||
|
vmovdqu 16(%rdx),%xmm14
|
||||||
|
vpxor %xmm3,%xmm0,%xmm0
|
||||||
|
vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3
|
||||||
|
vpxor %xmm4,%xmm1,%xmm1
|
||||||
|
vpshufb %xmm13,%xmm14,%xmm14
|
||||||
|
vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4
|
||||||
|
vmovdqu 144-64(%rsi),%xmm6
|
||||||
|
vpxor %xmm5,%xmm2,%xmm2
|
||||||
|
vpunpckhqdq %xmm14,%xmm14,%xmm9
|
||||||
|
vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5
|
||||||
|
vmovdqu 176-64(%rsi),%xmm7
|
||||||
|
vpxor %xmm14,%xmm9,%xmm9
|
||||||
|
|
||||||
|
vmovdqu (%rdx),%xmm15
|
||||||
|
vpxor %xmm0,%xmm3,%xmm3
|
||||||
|
vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0
|
||||||
|
vpxor %xmm1,%xmm4,%xmm4
|
||||||
|
vpshufb %xmm13,%xmm15,%xmm15
|
||||||
|
vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1
|
||||||
|
vmovdqu 160-64(%rsi),%xmm6
|
||||||
|
vpxor %xmm2,%xmm5,%xmm5
|
||||||
|
vpclmulqdq $0x10,%xmm7,%xmm9,%xmm2
|
||||||
|
|
||||||
|
leaq 128(%rdx),%rdx
|
||||||
|
cmpq $0x80,%rcx
|
||||||
|
jb .Ltail_avx
|
||||||
|
|
||||||
|
vpxor %xmm10,%xmm15,%xmm15
|
||||||
|
subq $0x80,%rcx
|
||||||
|
jmp .Loop8x_avx
|
||||||
|
|
||||||
|
.align 32
|
||||||
|
.Loop8x_avx:
|
||||||
|
vpunpckhqdq %xmm15,%xmm15,%xmm8
|
||||||
|
vmovdqu 112(%rdx),%xmm14
|
||||||
|
vpxor %xmm0,%xmm3,%xmm3
|
||||||
|
vpxor %xmm15,%xmm8,%xmm8
|
||||||
|
vpclmulqdq $0x00,%xmm6,%xmm15,%xmm10
|
||||||
|
vpshufb %xmm13,%xmm14,%xmm14
|
||||||
|
vpxor %xmm1,%xmm4,%xmm4
|
||||||
|
vpclmulqdq $0x11,%xmm6,%xmm15,%xmm11
|
||||||
|
vmovdqu 0-64(%rsi),%xmm6
|
||||||
|
vpunpckhqdq %xmm14,%xmm14,%xmm9
|
||||||
|
vpxor %xmm2,%xmm5,%xmm5
|
||||||
|
vpclmulqdq $0x00,%xmm7,%xmm8,%xmm12
|
||||||
|
vmovdqu 32-64(%rsi),%xmm7
|
||||||
|
vpxor %xmm14,%xmm9,%xmm9
|
||||||
|
|
||||||
|
vmovdqu 96(%rdx),%xmm15
|
||||||
|
vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0
|
||||||
|
vpxor %xmm3,%xmm10,%xmm10
|
||||||
|
vpshufb %xmm13,%xmm15,%xmm15
|
||||||
|
vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1
|
||||||
|
vxorps %xmm4,%xmm11,%xmm11
|
||||||
|
vmovdqu 16-64(%rsi),%xmm6
|
||||||
|
vpunpckhqdq %xmm15,%xmm15,%xmm8
|
||||||
|
vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2
|
||||||
|
vpxor %xmm5,%xmm12,%xmm12
|
||||||
|
vxorps %xmm15,%xmm8,%xmm8
|
||||||
|
|
||||||
|
vmovdqu 80(%rdx),%xmm14
|
||||||
|
vpxor %xmm10,%xmm12,%xmm12
|
||||||
|
vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3
|
||||||
|
vpxor %xmm11,%xmm12,%xmm12
|
||||||
|
vpslldq $8,%xmm12,%xmm9
|
||||||
|
vpxor %xmm0,%xmm3,%xmm3
|
||||||
|
vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4
|
||||||
|
vpsrldq $8,%xmm12,%xmm12
|
||||||
|
vpxor %xmm9,%xmm10,%xmm10
|
||||||
|
vmovdqu 48-64(%rsi),%xmm6
|
||||||
|
vpshufb %xmm13,%xmm14,%xmm14
|
||||||
|
vxorps %xmm12,%xmm11,%xmm11
|
||||||
|
vpxor %xmm1,%xmm4,%xmm4
|
||||||
|
vpunpckhqdq %xmm14,%xmm14,%xmm9
|
||||||
|
vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5
|
||||||
|
vmovdqu 80-64(%rsi),%xmm7
|
||||||
|
vpxor %xmm14,%xmm9,%xmm9
|
||||||
|
vpxor %xmm2,%xmm5,%xmm5
|
||||||
|
|
||||||
|
vmovdqu 64(%rdx),%xmm15
|
||||||
|
vpalignr $8,%xmm10,%xmm10,%xmm12
|
||||||
|
vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0
|
||||||
|
vpshufb %xmm13,%xmm15,%xmm15
|
||||||
|
vpxor %xmm3,%xmm0,%xmm0
|
||||||
|
vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1
|
||||||
|
vmovdqu 64-64(%rsi),%xmm6
|
||||||
|
vpunpckhqdq %xmm15,%xmm15,%xmm8
|
||||||
|
vpxor %xmm4,%xmm1,%xmm1
|
||||||
|
vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2
|
||||||
|
vxorps %xmm15,%xmm8,%xmm8
|
||||||
|
vpxor %xmm5,%xmm2,%xmm2
|
||||||
|
|
||||||
|
vmovdqu 48(%rdx),%xmm14
|
||||||
|
vpclmulqdq $0x10,(%r10),%xmm10,%xmm10
|
||||||
|
vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3
|
||||||
|
vpshufb %xmm13,%xmm14,%xmm14
|
||||||
|
vpxor %xmm0,%xmm3,%xmm3
|
||||||
|
vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4
|
||||||
|
vmovdqu 96-64(%rsi),%xmm6
|
||||||
|
vpunpckhqdq %xmm14,%xmm14,%xmm9
|
||||||
|
vpxor %xmm1,%xmm4,%xmm4
|
||||||
|
vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5
|
||||||
|
vmovdqu 128-64(%rsi),%xmm7
|
||||||
|
vpxor %xmm14,%xmm9,%xmm9
|
||||||
|
vpxor %xmm2,%xmm5,%xmm5
|
||||||
|
|
||||||
|
vmovdqu 32(%rdx),%xmm15
|
||||||
|
vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0
|
||||||
|
vpshufb %xmm13,%xmm15,%xmm15
|
||||||
|
vpxor %xmm3,%xmm0,%xmm0
|
||||||
|
vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1
|
||||||
|
vmovdqu 112-64(%rsi),%xmm6
|
||||||
|
vpunpckhqdq %xmm15,%xmm15,%xmm8
|
||||||
|
vpxor %xmm4,%xmm1,%xmm1
|
||||||
|
vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2
|
||||||
|
vpxor %xmm15,%xmm8,%xmm8
|
||||||
|
vpxor %xmm5,%xmm2,%xmm2
|
||||||
|
vxorps %xmm12,%xmm10,%xmm10
|
||||||
|
|
||||||
|
vmovdqu 16(%rdx),%xmm14
|
||||||
|
vpalignr $8,%xmm10,%xmm10,%xmm12
|
||||||
|
vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3
|
||||||
|
vpshufb %xmm13,%xmm14,%xmm14
|
||||||
|
vpxor %xmm0,%xmm3,%xmm3
|
||||||
|
vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4
|
||||||
|
vmovdqu 144-64(%rsi),%xmm6
|
||||||
|
vpclmulqdq $0x10,(%r10),%xmm10,%xmm10
|
||||||
|
vxorps %xmm11,%xmm12,%xmm12
|
||||||
|
vpunpckhqdq %xmm14,%xmm14,%xmm9
|
||||||
|
vpxor %xmm1,%xmm4,%xmm4
|
||||||
|
vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5
|
||||||
|
vmovdqu 176-64(%rsi),%xmm7
|
||||||
|
vpxor %xmm14,%xmm9,%xmm9
|
||||||
|
vpxor %xmm2,%xmm5,%xmm5
|
||||||
|
|
||||||
|
vmovdqu (%rdx),%xmm15
|
||||||
|
vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0
|
||||||
|
vpshufb %xmm13,%xmm15,%xmm15
|
||||||
|
vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1
|
||||||
|
vmovdqu 160-64(%rsi),%xmm6
|
||||||
|
vpxor %xmm12,%xmm15,%xmm15
|
||||||
|
vpclmulqdq $0x10,%xmm7,%xmm9,%xmm2
|
||||||
|
vpxor %xmm10,%xmm15,%xmm15
|
||||||
|
|
||||||
|
leaq 128(%rdx),%rdx
|
||||||
|
subq $0x80,%rcx
|
||||||
|
jnc .Loop8x_avx
|
||||||
|
|
||||||
|
addq $0x80,%rcx
|
||||||
|
jmp .Ltail_no_xor_avx
|
||||||
|
|
||||||
|
.align 32
|
||||||
|
.Lshort_avx:
|
||||||
|
vmovdqu -16(%rdx,%rcx,1),%xmm14
|
||||||
|
leaq (%rdx,%rcx,1),%rdx
|
||||||
|
vmovdqu 0-64(%rsi),%xmm6
|
||||||
|
vmovdqu 32-64(%rsi),%xmm7
|
||||||
|
vpshufb %xmm13,%xmm14,%xmm15
|
||||||
|
|
||||||
|
vmovdqa %xmm0,%xmm3
|
||||||
|
vmovdqa %xmm1,%xmm4
|
||||||
|
vmovdqa %xmm2,%xmm5
|
||||||
|
subq $0x10,%rcx
|
||||||
|
jz .Ltail_avx
|
||||||
|
|
||||||
|
vpunpckhqdq %xmm15,%xmm15,%xmm8
|
||||||
|
vpxor %xmm0,%xmm3,%xmm3
|
||||||
|
vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0
|
||||||
|
vpxor %xmm15,%xmm8,%xmm8
|
||||||
|
vmovdqu -32(%rdx),%xmm14
|
||||||
|
vpxor %xmm1,%xmm4,%xmm4
|
||||||
|
vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1
|
||||||
|
vmovdqu 16-64(%rsi),%xmm6
|
||||||
|
vpshufb %xmm13,%xmm14,%xmm15
|
||||||
|
vpxor %xmm2,%xmm5,%xmm5
|
||||||
|
vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2
|
||||||
|
vpsrldq $8,%xmm7,%xmm7
|
||||||
|
subq $0x10,%rcx
|
||||||
|
jz .Ltail_avx
|
||||||
|
|
||||||
|
vpunpckhqdq %xmm15,%xmm15,%xmm8
|
||||||
|
vpxor %xmm0,%xmm3,%xmm3
|
||||||
|
vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0
|
||||||
|
vpxor %xmm15,%xmm8,%xmm8
|
||||||
|
vmovdqu -48(%rdx),%xmm14
|
||||||
|
vpxor %xmm1,%xmm4,%xmm4
|
||||||
|
vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1
|
||||||
|
vmovdqu 48-64(%rsi),%xmm6
|
||||||
|
vpshufb %xmm13,%xmm14,%xmm15
|
||||||
|
vpxor %xmm2,%xmm5,%xmm5
|
||||||
|
vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2
|
||||||
|
vmovdqu 80-64(%rsi),%xmm7
|
||||||
|
subq $0x10,%rcx
|
||||||
|
jz .Ltail_avx
|
||||||
|
|
||||||
|
vpunpckhqdq %xmm15,%xmm15,%xmm8
|
||||||
|
vpxor %xmm0,%xmm3,%xmm3
|
||||||
|
vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0
|
||||||
|
vpxor %xmm15,%xmm8,%xmm8
|
||||||
|
vmovdqu -64(%rdx),%xmm14
|
||||||
|
vpxor %xmm1,%xmm4,%xmm4
|
||||||
|
vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1
|
||||||
|
vmovdqu 64-64(%rsi),%xmm6
|
||||||
|
vpshufb %xmm13,%xmm14,%xmm15
|
||||||
|
vpxor %xmm2,%xmm5,%xmm5
|
||||||
|
vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2
|
||||||
|
vpsrldq $8,%xmm7,%xmm7
|
||||||
|
subq $0x10,%rcx
|
||||||
|
jz .Ltail_avx
|
||||||
|
|
||||||
|
vpunpckhqdq %xmm15,%xmm15,%xmm8
|
||||||
|
vpxor %xmm0,%xmm3,%xmm3
|
||||||
|
vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0
|
||||||
|
vpxor %xmm15,%xmm8,%xmm8
|
||||||
|
vmovdqu -80(%rdx),%xmm14
|
||||||
|
vpxor %xmm1,%xmm4,%xmm4
|
||||||
|
vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1
|
||||||
|
vmovdqu 96-64(%rsi),%xmm6
|
||||||
|
vpshufb %xmm13,%xmm14,%xmm15
|
||||||
|
vpxor %xmm2,%xmm5,%xmm5
|
||||||
|
vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2
|
||||||
|
vmovdqu 128-64(%rsi),%xmm7
|
||||||
|
subq $0x10,%rcx
|
||||||
|
jz .Ltail_avx
|
||||||
|
|
||||||
|
vpunpckhqdq %xmm15,%xmm15,%xmm8
|
||||||
|
vpxor %xmm0,%xmm3,%xmm3
|
||||||
|
vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0
|
||||||
|
vpxor %xmm15,%xmm8,%xmm8
|
||||||
|
vmovdqu -96(%rdx),%xmm14
|
||||||
|
vpxor %xmm1,%xmm4,%xmm4
|
||||||
|
vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1
|
||||||
|
vmovdqu 112-64(%rsi),%xmm6
|
||||||
|
vpshufb %xmm13,%xmm14,%xmm15
|
||||||
|
vpxor %xmm2,%xmm5,%xmm5
|
||||||
|
vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2
|
||||||
|
vpsrldq $8,%xmm7,%xmm7
|
||||||
|
subq $0x10,%rcx
|
||||||
|
jz .Ltail_avx
|
||||||
|
|
||||||
|
vpunpckhqdq %xmm15,%xmm15,%xmm8
|
||||||
|
vpxor %xmm0,%xmm3,%xmm3
|
||||||
|
vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0
|
||||||
|
vpxor %xmm15,%xmm8,%xmm8
|
||||||
|
vmovdqu -112(%rdx),%xmm14
|
||||||
|
vpxor %xmm1,%xmm4,%xmm4
|
||||||
|
vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1
|
||||||
|
vmovdqu 144-64(%rsi),%xmm6
|
||||||
|
vpshufb %xmm13,%xmm14,%xmm15
|
||||||
|
vpxor %xmm2,%xmm5,%xmm5
|
||||||
|
vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2
|
||||||
|
vmovq 184-64(%rsi),%xmm7
|
||||||
|
subq $0x10,%rcx
|
||||||
|
jmp .Ltail_avx
|
||||||
|
|
||||||
|
.align 32
|
||||||
|
.Ltail_avx:
|
||||||
|
vpxor %xmm10,%xmm15,%xmm15
|
||||||
|
.Ltail_no_xor_avx:
|
||||||
|
vpunpckhqdq %xmm15,%xmm15,%xmm8
|
||||||
|
vpxor %xmm0,%xmm3,%xmm3
|
||||||
|
vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0
|
||||||
|
vpxor %xmm15,%xmm8,%xmm8
|
||||||
|
vpxor %xmm1,%xmm4,%xmm4
|
||||||
|
vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1
|
||||||
|
vpxor %xmm2,%xmm5,%xmm5
|
||||||
|
vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2
|
||||||
|
|
||||||
|
vmovdqu (%r10),%xmm12
|
||||||
|
|
||||||
|
vpxor %xmm0,%xmm3,%xmm10
|
||||||
|
vpxor %xmm1,%xmm4,%xmm11
|
||||||
|
vpxor %xmm2,%xmm5,%xmm5
|
||||||
|
|
||||||
|
vpxor %xmm10,%xmm5,%xmm5
|
||||||
|
vpxor %xmm11,%xmm5,%xmm5
|
||||||
|
vpslldq $8,%xmm5,%xmm9
|
||||||
|
vpsrldq $8,%xmm5,%xmm5
|
||||||
|
vpxor %xmm9,%xmm10,%xmm10
|
||||||
|
vpxor %xmm5,%xmm11,%xmm11
|
||||||
|
|
||||||
|
vpclmulqdq $0x10,%xmm12,%xmm10,%xmm9
|
||||||
|
vpalignr $8,%xmm10,%xmm10,%xmm10
|
||||||
|
vpxor %xmm9,%xmm10,%xmm10
|
||||||
|
|
||||||
|
vpclmulqdq $0x10,%xmm12,%xmm10,%xmm9
|
||||||
|
vpalignr $8,%xmm10,%xmm10,%xmm10
|
||||||
|
vpxor %xmm11,%xmm10,%xmm10
|
||||||
|
vpxor %xmm9,%xmm10,%xmm10
|
||||||
|
|
||||||
|
cmpq $0,%rcx
|
||||||
|
jne .Lshort_avx
|
||||||
|
|
||||||
|
vpshufb %xmm13,%xmm10,%xmm10
|
||||||
|
vmovdqu %xmm10,(%rdi)
|
||||||
|
vzeroupper
|
||||||
|
.byte 0xf3,0xc3
|
||||||
.size gcm_ghash_avx,.-gcm_ghash_avx
|
.size gcm_ghash_avx,.-gcm_ghash_avx
|
||||||
.align 64
|
.align 64
|
||||||
.Lbswap_mask:
|
.Lbswap_mask:
|
||||||
|
|
|
@ -24,7 +24,7 @@ RC4: orq %rsi,%rsi
|
||||||
movb -4(%rdi),%cl
|
movb -4(%rdi),%cl
|
||||||
cmpl $-1,256(%rdi)
|
cmpl $-1,256(%rdi)
|
||||||
je .LRC4_CHAR
|
je .LRC4_CHAR
|
||||||
movl OPENSSL_ia32cap_P@GOTPCREL(%rip),%r8d
|
movl OPENSSL_ia32cap_P(%rip),%r8d
|
||||||
xorq %rbx,%rbx
|
xorq %rbx,%rbx
|
||||||
incb %r10b
|
incb %r10b
|
||||||
subq %r10,%rbx
|
subq %r10,%rbx
|
||||||
|
@ -531,7 +531,7 @@ private_RC4_set_key:
|
||||||
xorq %r10,%r10
|
xorq %r10,%r10
|
||||||
xorq %r11,%r11
|
xorq %r11,%r11
|
||||||
|
|
||||||
movl OPENSSL_ia32cap_P@GOTPCREL(%rip),%r8d
|
movl OPENSSL_ia32cap_P(%rip),%r8d
|
||||||
btl $20,%r8d
|
btl $20,%r8d
|
||||||
jc .Lc1stloop
|
jc .Lc1stloop
|
||||||
jmp .Lw1stloop
|
jmp .Lw1stloop
|
||||||
|
@ -595,7 +595,7 @@ private_RC4_set_key:
|
||||||
.align 16
|
.align 16
|
||||||
RC4_options:
|
RC4_options:
|
||||||
leaq .Lopts(%rip),%rax
|
leaq .Lopts(%rip),%rax
|
||||||
movl OPENSSL_ia32cap_P@GOTPCREL(%rip),%edx
|
movl OPENSSL_ia32cap_P(%rip),%edx
|
||||||
btl $20,%edx
|
btl $20,%edx
|
||||||
jc .L8xchar
|
jc .L8xchar
|
||||||
btl $30,%edx
|
btl $30,%edx
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -20,6 +20,10 @@ rsaz_512_sqr:
|
||||||
movq (%rsi),%rdx
|
movq (%rsi),%rdx
|
||||||
movq 8(%rsi),%rax
|
movq 8(%rsi),%rax
|
||||||
movq %rcx,128(%rsp)
|
movq %rcx,128(%rsp)
|
||||||
|
movl $0x80100,%r11d
|
||||||
|
andl OPENSSL_ia32cap_P+8(%rip),%r11d
|
||||||
|
cmpl $0x80100,%r11d
|
||||||
|
je .Loop_sqrx
|
||||||
jmp .Loop_sqr
|
jmp .Loop_sqr
|
||||||
|
|
||||||
.align 32
|
.align 32
|
||||||
|
@ -383,6 +387,276 @@ rsaz_512_sqr:
|
||||||
|
|
||||||
decl %r8d
|
decl %r8d
|
||||||
jnz .Loop_sqr
|
jnz .Loop_sqr
|
||||||
|
jmp .Lsqr_tail
|
||||||
|
|
||||||
|
.align 32
|
||||||
|
.Loop_sqrx:
|
||||||
|
movl %r8d,128+8(%rsp)
|
||||||
|
.byte 102,72,15,110,199
|
||||||
|
.byte 102,72,15,110,205
|
||||||
|
|
||||||
|
mulxq %rax,%r8,%r9
|
||||||
|
|
||||||
|
mulxq 16(%rsi),%rcx,%r10
|
||||||
|
xorq %rbp,%rbp
|
||||||
|
|
||||||
|
mulxq 24(%rsi),%rax,%r11
|
||||||
|
adcxq %rcx,%r9
|
||||||
|
|
||||||
|
mulxq 32(%rsi),%rcx,%r12
|
||||||
|
adcxq %rax,%r10
|
||||||
|
|
||||||
|
mulxq 40(%rsi),%rax,%r13
|
||||||
|
adcxq %rcx,%r11
|
||||||
|
|
||||||
|
.byte 0xc4,0x62,0xf3,0xf6,0xb6,0x30,0x00,0x00,0x00
|
||||||
|
adcxq %rax,%r12
|
||||||
|
adcxq %rcx,%r13
|
||||||
|
|
||||||
|
.byte 0xc4,0x62,0xfb,0xf6,0xbe,0x38,0x00,0x00,0x00
|
||||||
|
adcxq %rax,%r14
|
||||||
|
adcxq %rbp,%r15
|
||||||
|
|
||||||
|
movq %r9,%rcx
|
||||||
|
shldq $1,%r8,%r9
|
||||||
|
shlq $1,%r8
|
||||||
|
|
||||||
|
xorl %ebp,%ebp
|
||||||
|
mulxq %rdx,%rax,%rdx
|
||||||
|
adcxq %rdx,%r8
|
||||||
|
movq 8(%rsi),%rdx
|
||||||
|
adcxq %rbp,%r9
|
||||||
|
|
||||||
|
movq %rax,(%rsp)
|
||||||
|
movq %r8,8(%rsp)
|
||||||
|
|
||||||
|
|
||||||
|
mulxq 16(%rsi),%rax,%rbx
|
||||||
|
adoxq %rax,%r10
|
||||||
|
adcxq %rbx,%r11
|
||||||
|
|
||||||
|
.byte 0xc4,0x62,0xc3,0xf6,0x86,0x18,0x00,0x00,0x00
|
||||||
|
adoxq %rdi,%r11
|
||||||
|
adcxq %r8,%r12
|
||||||
|
|
||||||
|
mulxq 32(%rsi),%rax,%rbx
|
||||||
|
adoxq %rax,%r12
|
||||||
|
adcxq %rbx,%r13
|
||||||
|
|
||||||
|
mulxq 40(%rsi),%rdi,%r8
|
||||||
|
adoxq %rdi,%r13
|
||||||
|
adcxq %r8,%r14
|
||||||
|
|
||||||
|
.byte 0xc4,0xe2,0xfb,0xf6,0x9e,0x30,0x00,0x00,0x00
|
||||||
|
adoxq %rax,%r14
|
||||||
|
adcxq %rbx,%r15
|
||||||
|
|
||||||
|
.byte 0xc4,0x62,0xc3,0xf6,0x86,0x38,0x00,0x00,0x00
|
||||||
|
adoxq %rdi,%r15
|
||||||
|
adcxq %rbp,%r8
|
||||||
|
adoxq %rbp,%r8
|
||||||
|
|
||||||
|
movq %r11,%rbx
|
||||||
|
shldq $1,%r10,%r11
|
||||||
|
shldq $1,%rcx,%r10
|
||||||
|
|
||||||
|
xorl %ebp,%ebp
|
||||||
|
mulxq %rdx,%rax,%rcx
|
||||||
|
movq 16(%rsi),%rdx
|
||||||
|
adcxq %rax,%r9
|
||||||
|
adcxq %rcx,%r10
|
||||||
|
adcxq %rbp,%r11
|
||||||
|
|
||||||
|
movq %r9,16(%rsp)
|
||||||
|
.byte 0x4c,0x89,0x94,0x24,0x18,0x00,0x00,0x00
|
||||||
|
|
||||||
|
|
||||||
|
.byte 0xc4,0x62,0xc3,0xf6,0x8e,0x18,0x00,0x00,0x00
|
||||||
|
adoxq %rdi,%r12
|
||||||
|
adcxq %r9,%r13
|
||||||
|
|
||||||
|
mulxq 32(%rsi),%rax,%rcx
|
||||||
|
adoxq %rax,%r13
|
||||||
|
adcxq %rcx,%r14
|
||||||
|
|
||||||
|
mulxq 40(%rsi),%rdi,%r9
|
||||||
|
adoxq %rdi,%r14
|
||||||
|
adcxq %r9,%r15
|
||||||
|
|
||||||
|
.byte 0xc4,0xe2,0xfb,0xf6,0x8e,0x30,0x00,0x00,0x00
|
||||||
|
adoxq %rax,%r15
|
||||||
|
adcxq %rcx,%r8
|
||||||
|
|
||||||
|
.byte 0xc4,0x62,0xc3,0xf6,0x8e,0x38,0x00,0x00,0x00
|
||||||
|
adoxq %rdi,%r8
|
||||||
|
adcxq %rbp,%r9
|
||||||
|
adoxq %rbp,%r9
|
||||||
|
|
||||||
|
movq %r13,%rcx
|
||||||
|
shldq $1,%r12,%r13
|
||||||
|
shldq $1,%rbx,%r12
|
||||||
|
|
||||||
|
xorl %ebp,%ebp
|
||||||
|
mulxq %rdx,%rax,%rdx
|
||||||
|
adcxq %rax,%r11
|
||||||
|
adcxq %rdx,%r12
|
||||||
|
movq 24(%rsi),%rdx
|
||||||
|
adcxq %rbp,%r13
|
||||||
|
|
||||||
|
movq %r11,32(%rsp)
|
||||||
|
.byte 0x4c,0x89,0xa4,0x24,0x28,0x00,0x00,0x00
|
||||||
|
|
||||||
|
|
||||||
|
.byte 0xc4,0xe2,0xfb,0xf6,0x9e,0x20,0x00,0x00,0x00
|
||||||
|
adoxq %rax,%r14
|
||||||
|
adcxq %rbx,%r15
|
||||||
|
|
||||||
|
mulxq 40(%rsi),%rdi,%r10
|
||||||
|
adoxq %rdi,%r15
|
||||||
|
adcxq %r10,%r8
|
||||||
|
|
||||||
|
mulxq 48(%rsi),%rax,%rbx
|
||||||
|
adoxq %rax,%r8
|
||||||
|
adcxq %rbx,%r9
|
||||||
|
|
||||||
|
mulxq 56(%rsi),%rdi,%r10
|
||||||
|
adoxq %rdi,%r9
|
||||||
|
adcxq %rbp,%r10
|
||||||
|
adoxq %rbp,%r10
|
||||||
|
|
||||||
|
.byte 0x66
|
||||||
|
movq %r15,%rbx
|
||||||
|
shldq $1,%r14,%r15
|
||||||
|
shldq $1,%rcx,%r14
|
||||||
|
|
||||||
|
xorl %ebp,%ebp
|
||||||
|
mulxq %rdx,%rax,%rdx
|
||||||
|
adcxq %rax,%r13
|
||||||
|
adcxq %rdx,%r14
|
||||||
|
movq 32(%rsi),%rdx
|
||||||
|
adcxq %rbp,%r15
|
||||||
|
|
||||||
|
movq %r13,48(%rsp)
|
||||||
|
movq %r14,56(%rsp)
|
||||||
|
|
||||||
|
|
||||||
|
.byte 0xc4,0x62,0xc3,0xf6,0x9e,0x28,0x00,0x00,0x00
|
||||||
|
adoxq %rdi,%r8
|
||||||
|
adcxq %r11,%r9
|
||||||
|
|
||||||
|
mulxq 48(%rsi),%rax,%rcx
|
||||||
|
adoxq %rax,%r9
|
||||||
|
adcxq %rcx,%r10
|
||||||
|
|
||||||
|
mulxq 56(%rsi),%rdi,%r11
|
||||||
|
adoxq %rdi,%r10
|
||||||
|
adcxq %rbp,%r11
|
||||||
|
adoxq %rbp,%r11
|
||||||
|
|
||||||
|
movq %r9,%rcx
|
||||||
|
shldq $1,%r8,%r9
|
||||||
|
shldq $1,%rbx,%r8
|
||||||
|
|
||||||
|
xorl %ebp,%ebp
|
||||||
|
mulxq %rdx,%rax,%rdx
|
||||||
|
adcxq %rax,%r15
|
||||||
|
adcxq %rdx,%r8
|
||||||
|
movq 40(%rsi),%rdx
|
||||||
|
adcxq %rbp,%r9
|
||||||
|
|
||||||
|
movq %r15,64(%rsp)
|
||||||
|
movq %r8,72(%rsp)
|
||||||
|
|
||||||
|
|
||||||
|
.byte 0xc4,0xe2,0xfb,0xf6,0x9e,0x30,0x00,0x00,0x00
|
||||||
|
adoxq %rax,%r10
|
||||||
|
adcxq %rbx,%r11
|
||||||
|
|
||||||
|
.byte 0xc4,0x62,0xc3,0xf6,0xa6,0x38,0x00,0x00,0x00
|
||||||
|
adoxq %rdi,%r11
|
||||||
|
adcxq %rbp,%r12
|
||||||
|
adoxq %rbp,%r12
|
||||||
|
|
||||||
|
movq %r11,%rbx
|
||||||
|
shldq $1,%r10,%r11
|
||||||
|
shldq $1,%rcx,%r10
|
||||||
|
|
||||||
|
xorl %ebp,%ebp
|
||||||
|
mulxq %rdx,%rax,%rdx
|
||||||
|
adcxq %rax,%r9
|
||||||
|
adcxq %rdx,%r10
|
||||||
|
movq 48(%rsi),%rdx
|
||||||
|
adcxq %rbp,%r11
|
||||||
|
|
||||||
|
movq %r9,80(%rsp)
|
||||||
|
movq %r10,88(%rsp)
|
||||||
|
|
||||||
|
|
||||||
|
.byte 0xc4,0x62,0xfb,0xf6,0xae,0x38,0x00,0x00,0x00
|
||||||
|
adoxq %rax,%r12
|
||||||
|
adoxq %rbp,%r13
|
||||||
|
|
||||||
|
xorq %r14,%r14
|
||||||
|
shldq $1,%r13,%r14
|
||||||
|
shldq $1,%r12,%r13
|
||||||
|
shldq $1,%rbx,%r12
|
||||||
|
|
||||||
|
xorl %ebp,%ebp
|
||||||
|
mulxq %rdx,%rax,%rdx
|
||||||
|
adcxq %rax,%r11
|
||||||
|
adcxq %rdx,%r12
|
||||||
|
movq 56(%rsi),%rdx
|
||||||
|
adcxq %rbp,%r13
|
||||||
|
|
||||||
|
.byte 0x4c,0x89,0x9c,0x24,0x60,0x00,0x00,0x00
|
||||||
|
.byte 0x4c,0x89,0xa4,0x24,0x68,0x00,0x00,0x00
|
||||||
|
|
||||||
|
|
||||||
|
mulxq %rdx,%rax,%rdx
|
||||||
|
adoxq %rax,%r13
|
||||||
|
adoxq %rbp,%rdx
|
||||||
|
|
||||||
|
.byte 0x66
|
||||||
|
addq %rdx,%r14
|
||||||
|
|
||||||
|
movq %r13,112(%rsp)
|
||||||
|
movq %r14,120(%rsp)
|
||||||
|
.byte 102,72,15,126,199
|
||||||
|
.byte 102,72,15,126,205
|
||||||
|
|
||||||
|
movq 128(%rsp),%rdx
|
||||||
|
movq (%rsp),%r8
|
||||||
|
movq 8(%rsp),%r9
|
||||||
|
movq 16(%rsp),%r10
|
||||||
|
movq 24(%rsp),%r11
|
||||||
|
movq 32(%rsp),%r12
|
||||||
|
movq 40(%rsp),%r13
|
||||||
|
movq 48(%rsp),%r14
|
||||||
|
movq 56(%rsp),%r15
|
||||||
|
|
||||||
|
call __rsaz_512_reducex
|
||||||
|
|
||||||
|
addq 64(%rsp),%r8
|
||||||
|
adcq 72(%rsp),%r9
|
||||||
|
adcq 80(%rsp),%r10
|
||||||
|
adcq 88(%rsp),%r11
|
||||||
|
adcq 96(%rsp),%r12
|
||||||
|
adcq 104(%rsp),%r13
|
||||||
|
adcq 112(%rsp),%r14
|
||||||
|
adcq 120(%rsp),%r15
|
||||||
|
sbbq %rcx,%rcx
|
||||||
|
|
||||||
|
call __rsaz_512_subtract
|
||||||
|
|
||||||
|
movq %r8,%rdx
|
||||||
|
movq %r9,%rax
|
||||||
|
movl 128+8(%rsp),%r8d
|
||||||
|
movq %rdi,%rsi
|
||||||
|
|
||||||
|
decl %r8d
|
||||||
|
jnz .Loop_sqrx
|
||||||
|
|
||||||
|
.Lsqr_tail:
|
||||||
|
|
||||||
leaq 128+24+48(%rsp),%rax
|
leaq 128+24+48(%rsp),%rax
|
||||||
movq -48(%rax),%r15
|
movq -48(%rax),%r15
|
||||||
|
@ -411,6 +685,10 @@ rsaz_512_mul:
|
||||||
.byte 102,72,15,110,199
|
.byte 102,72,15,110,199
|
||||||
.byte 102,72,15,110,201
|
.byte 102,72,15,110,201
|
||||||
movq %r8,128(%rsp)
|
movq %r8,128(%rsp)
|
||||||
|
movl $0x80100,%r11d
|
||||||
|
andl OPENSSL_ia32cap_P+8(%rip),%r11d
|
||||||
|
cmpl $0x80100,%r11d
|
||||||
|
je .Lmulx
|
||||||
movq (%rdx),%rbx
|
movq (%rdx),%rbx
|
||||||
movq %rdx,%rbp
|
movq %rdx,%rbp
|
||||||
call __rsaz_512_mul
|
call __rsaz_512_mul
|
||||||
|
@ -428,6 +706,29 @@ rsaz_512_mul:
|
||||||
movq 56(%rsp),%r15
|
movq 56(%rsp),%r15
|
||||||
|
|
||||||
call __rsaz_512_reduce
|
call __rsaz_512_reduce
|
||||||
|
jmp .Lmul_tail
|
||||||
|
|
||||||
|
.align 32
|
||||||
|
.Lmulx:
|
||||||
|
movq %rdx,%rbp
|
||||||
|
movq (%rdx),%rdx
|
||||||
|
call __rsaz_512_mulx
|
||||||
|
|
||||||
|
.byte 102,72,15,126,199
|
||||||
|
.byte 102,72,15,126,205
|
||||||
|
|
||||||
|
movq 128(%rsp),%rdx
|
||||||
|
movq (%rsp),%r8
|
||||||
|
movq 8(%rsp),%r9
|
||||||
|
movq 16(%rsp),%r10
|
||||||
|
movq 24(%rsp),%r11
|
||||||
|
movq 32(%rsp),%r12
|
||||||
|
movq 40(%rsp),%r13
|
||||||
|
movq 48(%rsp),%r14
|
||||||
|
movq 56(%rsp),%r15
|
||||||
|
|
||||||
|
call __rsaz_512_reducex
|
||||||
|
.Lmul_tail:
|
||||||
addq 64(%rsp),%r8
|
addq 64(%rsp),%r8
|
||||||
adcq 72(%rsp),%r9
|
adcq 72(%rsp),%r9
|
||||||
adcq 80(%rsp),%r10
|
adcq 80(%rsp),%r10
|
||||||
|
@ -518,6 +819,10 @@ rsaz_512_mul_gather4:
|
||||||
por %xmm9,%xmm8
|
por %xmm9,%xmm8
|
||||||
pshufd $0x4e,%xmm8,%xmm9
|
pshufd $0x4e,%xmm8,%xmm9
|
||||||
por %xmm9,%xmm8
|
por %xmm9,%xmm8
|
||||||
|
movl $0x80100,%r11d
|
||||||
|
andl OPENSSL_ia32cap_P+8(%rip),%r11d
|
||||||
|
cmpl $0x80100,%r11d
|
||||||
|
je .Lmulx_gather
|
||||||
.byte 102,76,15,126,195
|
.byte 102,76,15,126,195
|
||||||
|
|
||||||
movq %r8,128(%rsp)
|
movq %r8,128(%rsp)
|
||||||
|
@ -698,6 +1003,142 @@ rsaz_512_mul_gather4:
|
||||||
movq 56(%rsp),%r15
|
movq 56(%rsp),%r15
|
||||||
|
|
||||||
call __rsaz_512_reduce
|
call __rsaz_512_reduce
|
||||||
|
jmp .Lmul_gather_tail
|
||||||
|
|
||||||
|
.align 32
|
||||||
|
.Lmulx_gather:
|
||||||
|
.byte 102,76,15,126,194
|
||||||
|
|
||||||
|
movq %r8,128(%rsp)
|
||||||
|
movq %rdi,128+8(%rsp)
|
||||||
|
movq %rcx,128+16(%rsp)
|
||||||
|
|
||||||
|
mulxq (%rsi),%rbx,%r8
|
||||||
|
movq %rbx,(%rsp)
|
||||||
|
xorl %edi,%edi
|
||||||
|
|
||||||
|
mulxq 8(%rsi),%rax,%r9
|
||||||
|
|
||||||
|
mulxq 16(%rsi),%rbx,%r10
|
||||||
|
adcxq %rax,%r8
|
||||||
|
|
||||||
|
mulxq 24(%rsi),%rax,%r11
|
||||||
|
adcxq %rbx,%r9
|
||||||
|
|
||||||
|
mulxq 32(%rsi),%rbx,%r12
|
||||||
|
adcxq %rax,%r10
|
||||||
|
|
||||||
|
mulxq 40(%rsi),%rax,%r13
|
||||||
|
adcxq %rbx,%r11
|
||||||
|
|
||||||
|
mulxq 48(%rsi),%rbx,%r14
|
||||||
|
adcxq %rax,%r12
|
||||||
|
|
||||||
|
mulxq 56(%rsi),%rax,%r15
|
||||||
|
adcxq %rbx,%r13
|
||||||
|
adcxq %rax,%r14
|
||||||
|
.byte 0x67
|
||||||
|
movq %r8,%rbx
|
||||||
|
adcxq %rdi,%r15
|
||||||
|
|
||||||
|
movq $-7,%rcx
|
||||||
|
jmp .Loop_mulx_gather
|
||||||
|
|
||||||
|
.align 32
|
||||||
|
.Loop_mulx_gather:
|
||||||
|
movdqa 0(%rbp),%xmm8
|
||||||
|
movdqa 16(%rbp),%xmm9
|
||||||
|
movdqa 32(%rbp),%xmm10
|
||||||
|
movdqa 48(%rbp),%xmm11
|
||||||
|
pand %xmm0,%xmm8
|
||||||
|
movdqa 64(%rbp),%xmm12
|
||||||
|
pand %xmm1,%xmm9
|
||||||
|
movdqa 80(%rbp),%xmm13
|
||||||
|
pand %xmm2,%xmm10
|
||||||
|
movdqa 96(%rbp),%xmm14
|
||||||
|
pand %xmm3,%xmm11
|
||||||
|
movdqa 112(%rbp),%xmm15
|
||||||
|
leaq 128(%rbp),%rbp
|
||||||
|
pand %xmm4,%xmm12
|
||||||
|
pand %xmm5,%xmm13
|
||||||
|
pand %xmm6,%xmm14
|
||||||
|
pand %xmm7,%xmm15
|
||||||
|
por %xmm10,%xmm8
|
||||||
|
por %xmm11,%xmm9
|
||||||
|
por %xmm12,%xmm8
|
||||||
|
por %xmm13,%xmm9
|
||||||
|
por %xmm14,%xmm8
|
||||||
|
por %xmm15,%xmm9
|
||||||
|
|
||||||
|
por %xmm9,%xmm8
|
||||||
|
pshufd $0x4e,%xmm8,%xmm9
|
||||||
|
por %xmm9,%xmm8
|
||||||
|
.byte 102,76,15,126,194
|
||||||
|
|
||||||
|
.byte 0xc4,0x62,0xfb,0xf6,0x86,0x00,0x00,0x00,0x00
|
||||||
|
adcxq %rax,%rbx
|
||||||
|
adoxq %r9,%r8
|
||||||
|
|
||||||
|
mulxq 8(%rsi),%rax,%r9
|
||||||
|
adcxq %rax,%r8
|
||||||
|
adoxq %r10,%r9
|
||||||
|
|
||||||
|
mulxq 16(%rsi),%rax,%r10
|
||||||
|
adcxq %rax,%r9
|
||||||
|
adoxq %r11,%r10
|
||||||
|
|
||||||
|
.byte 0xc4,0x62,0xfb,0xf6,0x9e,0x18,0x00,0x00,0x00
|
||||||
|
adcxq %rax,%r10
|
||||||
|
adoxq %r12,%r11
|
||||||
|
|
||||||
|
mulxq 32(%rsi),%rax,%r12
|
||||||
|
adcxq %rax,%r11
|
||||||
|
adoxq %r13,%r12
|
||||||
|
|
||||||
|
mulxq 40(%rsi),%rax,%r13
|
||||||
|
adcxq %rax,%r12
|
||||||
|
adoxq %r14,%r13
|
||||||
|
|
||||||
|
.byte 0xc4,0x62,0xfb,0xf6,0xb6,0x30,0x00,0x00,0x00
|
||||||
|
adcxq %rax,%r13
|
||||||
|
.byte 0x67
|
||||||
|
adoxq %r15,%r14
|
||||||
|
|
||||||
|
mulxq 56(%rsi),%rax,%r15
|
||||||
|
movq %rbx,64(%rsp,%rcx,8)
|
||||||
|
adcxq %rax,%r14
|
||||||
|
adoxq %rdi,%r15
|
||||||
|
movq %r8,%rbx
|
||||||
|
adcxq %rdi,%r15
|
||||||
|
|
||||||
|
incq %rcx
|
||||||
|
jnz .Loop_mulx_gather
|
||||||
|
|
||||||
|
movq %r8,64(%rsp)
|
||||||
|
movq %r9,64+8(%rsp)
|
||||||
|
movq %r10,64+16(%rsp)
|
||||||
|
movq %r11,64+24(%rsp)
|
||||||
|
movq %r12,64+32(%rsp)
|
||||||
|
movq %r13,64+40(%rsp)
|
||||||
|
movq %r14,64+48(%rsp)
|
||||||
|
movq %r15,64+56(%rsp)
|
||||||
|
|
||||||
|
movq 128(%rsp),%rdx
|
||||||
|
movq 128+8(%rsp),%rdi
|
||||||
|
movq 128+16(%rsp),%rbp
|
||||||
|
|
||||||
|
movq (%rsp),%r8
|
||||||
|
movq 8(%rsp),%r9
|
||||||
|
movq 16(%rsp),%r10
|
||||||
|
movq 24(%rsp),%r11
|
||||||
|
movq 32(%rsp),%r12
|
||||||
|
movq 40(%rsp),%r13
|
||||||
|
movq 48(%rsp),%r14
|
||||||
|
movq 56(%rsp),%r15
|
||||||
|
|
||||||
|
call __rsaz_512_reducex
|
||||||
|
|
||||||
|
.Lmul_gather_tail:
|
||||||
addq 64(%rsp),%r8
|
addq 64(%rsp),%r8
|
||||||
adcq 72(%rsp),%r9
|
adcq 72(%rsp),%r9
|
||||||
adcq 80(%rsp),%r10
|
adcq 80(%rsp),%r10
|
||||||
|
@ -742,6 +1183,10 @@ rsaz_512_mul_scatter4:
|
||||||
movq %rcx,128(%rsp)
|
movq %rcx,128(%rsp)
|
||||||
|
|
||||||
movq %rdi,%rbp
|
movq %rdi,%rbp
|
||||||
|
movl $0x80100,%r11d
|
||||||
|
andl OPENSSL_ia32cap_P+8(%rip),%r11d
|
||||||
|
cmpl $0x80100,%r11d
|
||||||
|
je .Lmulx_scatter
|
||||||
movq (%rdi),%rbx
|
movq (%rdi),%rbx
|
||||||
call __rsaz_512_mul
|
call __rsaz_512_mul
|
||||||
|
|
||||||
|
@ -758,6 +1203,29 @@ rsaz_512_mul_scatter4:
|
||||||
movq 56(%rsp),%r15
|
movq 56(%rsp),%r15
|
||||||
|
|
||||||
call __rsaz_512_reduce
|
call __rsaz_512_reduce
|
||||||
|
jmp .Lmul_scatter_tail
|
||||||
|
|
||||||
|
.align 32
|
||||||
|
.Lmulx_scatter:
|
||||||
|
movq (%rdi),%rdx
|
||||||
|
call __rsaz_512_mulx
|
||||||
|
|
||||||
|
.byte 102,72,15,126,199
|
||||||
|
.byte 102,72,15,126,205
|
||||||
|
|
||||||
|
movq 128(%rsp),%rdx
|
||||||
|
movq (%rsp),%r8
|
||||||
|
movq 8(%rsp),%r9
|
||||||
|
movq 16(%rsp),%r10
|
||||||
|
movq 24(%rsp),%r11
|
||||||
|
movq 32(%rsp),%r12
|
||||||
|
movq 40(%rsp),%r13
|
||||||
|
movq 48(%rsp),%r14
|
||||||
|
movq 56(%rsp),%r15
|
||||||
|
|
||||||
|
call __rsaz_512_reducex
|
||||||
|
|
||||||
|
.Lmul_scatter_tail:
|
||||||
addq 64(%rsp),%r8
|
addq 64(%rsp),%r8
|
||||||
adcq 72(%rsp),%r9
|
adcq 72(%rsp),%r9
|
||||||
adcq 80(%rsp),%r10
|
adcq 80(%rsp),%r10
|
||||||
|
@ -804,6 +1272,7 @@ rsaz_512_mul_by_one:
|
||||||
|
|
||||||
subq $128+24,%rsp
|
subq $128+24,%rsp
|
||||||
.Lmul_by_one_body:
|
.Lmul_by_one_body:
|
||||||
|
movl OPENSSL_ia32cap_P+8(%rip),%eax
|
||||||
movq %rdx,%rbp
|
movq %rdx,%rbp
|
||||||
movq %rcx,128(%rsp)
|
movq %rcx,128(%rsp)
|
||||||
|
|
||||||
|
@ -824,7 +1293,16 @@ rsaz_512_mul_by_one:
|
||||||
movdqa %xmm0,64(%rsp)
|
movdqa %xmm0,64(%rsp)
|
||||||
movdqa %xmm0,80(%rsp)
|
movdqa %xmm0,80(%rsp)
|
||||||
movdqa %xmm0,96(%rsp)
|
movdqa %xmm0,96(%rsp)
|
||||||
|
andl $0x80100,%eax
|
||||||
|
cmpl $0x80100,%eax
|
||||||
|
je .Lby_one_callx
|
||||||
call __rsaz_512_reduce
|
call __rsaz_512_reduce
|
||||||
|
jmp .Lby_one_tail
|
||||||
|
.align 32
|
||||||
|
.Lby_one_callx:
|
||||||
|
movq 128(%rsp),%rdx
|
||||||
|
call __rsaz_512_reducex
|
||||||
|
.Lby_one_tail:
|
||||||
movq %r8,(%rdi)
|
movq %r8,(%rdi)
|
||||||
movq %r9,8(%rdi)
|
movq %r9,8(%rdi)
|
||||||
movq %r10,16(%rdi)
|
movq %r10,16(%rdi)
|
||||||
|
@ -928,6 +1406,62 @@ __rsaz_512_reduce:
|
||||||
|
|
||||||
.byte 0xf3,0xc3
|
.byte 0xf3,0xc3
|
||||||
.size __rsaz_512_reduce,.-__rsaz_512_reduce
|
.size __rsaz_512_reduce,.-__rsaz_512_reduce
|
||||||
|
.type __rsaz_512_reducex,@function
|
||||||
|
.align 32
|
||||||
|
__rsaz_512_reducex:
|
||||||
|
|
||||||
|
imulq %r8,%rdx
|
||||||
|
xorq %rsi,%rsi
|
||||||
|
movl $8,%ecx
|
||||||
|
jmp .Lreduction_loopx
|
||||||
|
|
||||||
|
.align 32
|
||||||
|
.Lreduction_loopx:
|
||||||
|
movq %r8,%rbx
|
||||||
|
mulxq 0(%rbp),%rax,%r8
|
||||||
|
adcxq %rbx,%rax
|
||||||
|
adoxq %r9,%r8
|
||||||
|
|
||||||
|
mulxq 8(%rbp),%rax,%r9
|
||||||
|
adcxq %rax,%r8
|
||||||
|
adoxq %r10,%r9
|
||||||
|
|
||||||
|
mulxq 16(%rbp),%rbx,%r10
|
||||||
|
adcxq %rbx,%r9
|
||||||
|
adoxq %r11,%r10
|
||||||
|
|
||||||
|
mulxq 24(%rbp),%rbx,%r11
|
||||||
|
adcxq %rbx,%r10
|
||||||
|
adoxq %r12,%r11
|
||||||
|
|
||||||
|
.byte 0xc4,0x62,0xe3,0xf6,0xa5,0x20,0x00,0x00,0x00
|
||||||
|
movq %rdx,%rax
|
||||||
|
movq %r8,%rdx
|
||||||
|
adcxq %rbx,%r11
|
||||||
|
adoxq %r13,%r12
|
||||||
|
|
||||||
|
mulxq 128+8(%rsp),%rbx,%rdx
|
||||||
|
movq %rax,%rdx
|
||||||
|
|
||||||
|
mulxq 40(%rbp),%rax,%r13
|
||||||
|
adcxq %rax,%r12
|
||||||
|
adoxq %r14,%r13
|
||||||
|
|
||||||
|
.byte 0xc4,0x62,0xfb,0xf6,0xb5,0x30,0x00,0x00,0x00
|
||||||
|
adcxq %rax,%r13
|
||||||
|
adoxq %r15,%r14
|
||||||
|
|
||||||
|
mulxq 56(%rbp),%rax,%r15
|
||||||
|
movq %rbx,%rdx
|
||||||
|
adcxq %rax,%r14
|
||||||
|
adoxq %rsi,%r15
|
||||||
|
adcxq %rsi,%r15
|
||||||
|
|
||||||
|
decl %ecx
|
||||||
|
jne .Lreduction_loopx
|
||||||
|
|
||||||
|
.byte 0xf3,0xc3
|
||||||
|
.size __rsaz_512_reducex,.-__rsaz_512_reducex
|
||||||
.type __rsaz_512_subtract,@function
|
.type __rsaz_512_subtract,@function
|
||||||
.align 32
|
.align 32
|
||||||
__rsaz_512_subtract:
|
__rsaz_512_subtract:
|
||||||
|
@ -1127,6 +1661,126 @@ __rsaz_512_mul:
|
||||||
|
|
||||||
.byte 0xf3,0xc3
|
.byte 0xf3,0xc3
|
||||||
.size __rsaz_512_mul,.-__rsaz_512_mul
|
.size __rsaz_512_mul,.-__rsaz_512_mul
|
||||||
|
.type __rsaz_512_mulx,@function
|
||||||
|
.align 32
|
||||||
|
__rsaz_512_mulx:
|
||||||
|
mulxq (%rsi),%rbx,%r8
|
||||||
|
movq $-6,%rcx
|
||||||
|
|
||||||
|
mulxq 8(%rsi),%rax,%r9
|
||||||
|
movq %rbx,8(%rsp)
|
||||||
|
|
||||||
|
mulxq 16(%rsi),%rbx,%r10
|
||||||
|
adcq %rax,%r8
|
||||||
|
|
||||||
|
mulxq 24(%rsi),%rax,%r11
|
||||||
|
adcq %rbx,%r9
|
||||||
|
|
||||||
|
mulxq 32(%rsi),%rbx,%r12
|
||||||
|
adcq %rax,%r10
|
||||||
|
|
||||||
|
mulxq 40(%rsi),%rax,%r13
|
||||||
|
adcq %rbx,%r11
|
||||||
|
|
||||||
|
mulxq 48(%rsi),%rbx,%r14
|
||||||
|
adcq %rax,%r12
|
||||||
|
|
||||||
|
mulxq 56(%rsi),%rax,%r15
|
||||||
|
movq 8(%rbp),%rdx
|
||||||
|
adcq %rbx,%r13
|
||||||
|
adcq %rax,%r14
|
||||||
|
adcq $0,%r15
|
||||||
|
|
||||||
|
xorq %rdi,%rdi
|
||||||
|
jmp .Loop_mulx
|
||||||
|
|
||||||
|
.align 32
|
||||||
|
.Loop_mulx:
|
||||||
|
movq %r8,%rbx
|
||||||
|
mulxq (%rsi),%rax,%r8
|
||||||
|
adcxq %rax,%rbx
|
||||||
|
adoxq %r9,%r8
|
||||||
|
|
||||||
|
mulxq 8(%rsi),%rax,%r9
|
||||||
|
adcxq %rax,%r8
|
||||||
|
adoxq %r10,%r9
|
||||||
|
|
||||||
|
mulxq 16(%rsi),%rax,%r10
|
||||||
|
adcxq %rax,%r9
|
||||||
|
adoxq %r11,%r10
|
||||||
|
|
||||||
|
mulxq 24(%rsi),%rax,%r11
|
||||||
|
adcxq %rax,%r10
|
||||||
|
adoxq %r12,%r11
|
||||||
|
|
||||||
|
.byte 0x3e,0xc4,0x62,0xfb,0xf6,0xa6,0x20,0x00,0x00,0x00
|
||||||
|
adcxq %rax,%r11
|
||||||
|
adoxq %r13,%r12
|
||||||
|
|
||||||
|
mulxq 40(%rsi),%rax,%r13
|
||||||
|
adcxq %rax,%r12
|
||||||
|
adoxq %r14,%r13
|
||||||
|
|
||||||
|
mulxq 48(%rsi),%rax,%r14
|
||||||
|
adcxq %rax,%r13
|
||||||
|
adoxq %r15,%r14
|
||||||
|
|
||||||
|
mulxq 56(%rsi),%rax,%r15
|
||||||
|
movq 64(%rbp,%rcx,8),%rdx
|
||||||
|
movq %rbx,8+64-8(%rsp,%rcx,8)
|
||||||
|
adcxq %rax,%r14
|
||||||
|
adoxq %rdi,%r15
|
||||||
|
adcxq %rdi,%r15
|
||||||
|
|
||||||
|
incq %rcx
|
||||||
|
jnz .Loop_mulx
|
||||||
|
|
||||||
|
movq %r8,%rbx
|
||||||
|
mulxq (%rsi),%rax,%r8
|
||||||
|
adcxq %rax,%rbx
|
||||||
|
adoxq %r9,%r8
|
||||||
|
|
||||||
|
.byte 0xc4,0x62,0xfb,0xf6,0x8e,0x08,0x00,0x00,0x00
|
||||||
|
adcxq %rax,%r8
|
||||||
|
adoxq %r10,%r9
|
||||||
|
|
||||||
|
.byte 0xc4,0x62,0xfb,0xf6,0x96,0x10,0x00,0x00,0x00
|
||||||
|
adcxq %rax,%r9
|
||||||
|
adoxq %r11,%r10
|
||||||
|
|
||||||
|
mulxq 24(%rsi),%rax,%r11
|
||||||
|
adcxq %rax,%r10
|
||||||
|
adoxq %r12,%r11
|
||||||
|
|
||||||
|
mulxq 32(%rsi),%rax,%r12
|
||||||
|
adcxq %rax,%r11
|
||||||
|
adoxq %r13,%r12
|
||||||
|
|
||||||
|
mulxq 40(%rsi),%rax,%r13
|
||||||
|
adcxq %rax,%r12
|
||||||
|
adoxq %r14,%r13
|
||||||
|
|
||||||
|
.byte 0xc4,0x62,0xfb,0xf6,0xb6,0x30,0x00,0x00,0x00
|
||||||
|
adcxq %rax,%r13
|
||||||
|
adoxq %r15,%r14
|
||||||
|
|
||||||
|
.byte 0xc4,0x62,0xfb,0xf6,0xbe,0x38,0x00,0x00,0x00
|
||||||
|
adcxq %rax,%r14
|
||||||
|
adoxq %rdi,%r15
|
||||||
|
adcxq %rdi,%r15
|
||||||
|
|
||||||
|
movq %rbx,8+64-8(%rsp)
|
||||||
|
movq %r8,8+64(%rsp)
|
||||||
|
movq %r9,8+64+8(%rsp)
|
||||||
|
movq %r10,8+64+16(%rsp)
|
||||||
|
movq %r11,8+64+24(%rsp)
|
||||||
|
movq %r12,8+64+32(%rsp)
|
||||||
|
movq %r13,8+64+40(%rsp)
|
||||||
|
movq %r14,8+64+48(%rsp)
|
||||||
|
movq %r15,8+64+56(%rsp)
|
||||||
|
|
||||||
|
.byte 0xf3,0xc3
|
||||||
|
.size __rsaz_512_mulx,.-__rsaz_512_mulx
|
||||||
.globl rsaz_512_scatter4
|
.globl rsaz_512_scatter4
|
||||||
.type rsaz_512_scatter4,@function
|
.type rsaz_512_scatter4,@function
|
||||||
.align 16
|
.align 16
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -10,14 +10,14 @@ _lazy1:
|
||||||
movq %rdx,56(%rsp)
|
movq %rdx,56(%rsp)
|
||||||
movq %r8,64(%rsp)
|
movq %r8,64(%rsp)
|
||||||
movq %r9,72(%rsp)
|
movq %r9,72(%rsp)
|
||||||
leaq OPENSSL_UplinkTable@GOTPCREL(%rip),%rcx
|
leaq OPENSSL_UplinkTable(%rip),%rcx
|
||||||
movq $1,%rdx
|
movq $1,%rdx
|
||||||
call OPENSSL_Uplink
|
call OPENSSL_Uplink
|
||||||
movq 48(%rsp),%rcx
|
movq 48(%rsp),%rcx
|
||||||
movq 56(%rsp),%rdx
|
movq 56(%rsp),%rdx
|
||||||
movq 64(%rsp),%r8
|
movq 64(%rsp),%r8
|
||||||
movq 72(%rsp),%r9
|
movq 72(%rsp),%r9
|
||||||
leaq OPENSSL_UplinkTable@GOTPCREL(%rip),%rax
|
leaq OPENSSL_UplinkTable(%rip),%rax
|
||||||
addq $40,%rsp
|
addq $40,%rsp
|
||||||
jmp *8(%rax)
|
jmp *8(%rax)
|
||||||
_lazy1_end:
|
_lazy1_end:
|
||||||
|
@ -30,14 +30,14 @@ _lazy2:
|
||||||
movq %rdx,56(%rsp)
|
movq %rdx,56(%rsp)
|
||||||
movq %r8,64(%rsp)
|
movq %r8,64(%rsp)
|
||||||
movq %r9,72(%rsp)
|
movq %r9,72(%rsp)
|
||||||
leaq OPENSSL_UplinkTable@GOTPCREL(%rip),%rcx
|
leaq OPENSSL_UplinkTable(%rip),%rcx
|
||||||
movq $2,%rdx
|
movq $2,%rdx
|
||||||
call OPENSSL_Uplink
|
call OPENSSL_Uplink
|
||||||
movq 48(%rsp),%rcx
|
movq 48(%rsp),%rcx
|
||||||
movq 56(%rsp),%rdx
|
movq 56(%rsp),%rdx
|
||||||
movq 64(%rsp),%r8
|
movq 64(%rsp),%r8
|
||||||
movq 72(%rsp),%r9
|
movq 72(%rsp),%r9
|
||||||
leaq OPENSSL_UplinkTable@GOTPCREL(%rip),%rax
|
leaq OPENSSL_UplinkTable(%rip),%rax
|
||||||
addq $40,%rsp
|
addq $40,%rsp
|
||||||
jmp *16(%rax)
|
jmp *16(%rax)
|
||||||
_lazy2_end:
|
_lazy2_end:
|
||||||
|
@ -50,14 +50,14 @@ _lazy3:
|
||||||
movq %rdx,56(%rsp)
|
movq %rdx,56(%rsp)
|
||||||
movq %r8,64(%rsp)
|
movq %r8,64(%rsp)
|
||||||
movq %r9,72(%rsp)
|
movq %r9,72(%rsp)
|
||||||
leaq OPENSSL_UplinkTable@GOTPCREL(%rip),%rcx
|
leaq OPENSSL_UplinkTable(%rip),%rcx
|
||||||
movq $3,%rdx
|
movq $3,%rdx
|
||||||
call OPENSSL_Uplink
|
call OPENSSL_Uplink
|
||||||
movq 48(%rsp),%rcx
|
movq 48(%rsp),%rcx
|
||||||
movq 56(%rsp),%rdx
|
movq 56(%rsp),%rdx
|
||||||
movq 64(%rsp),%r8
|
movq 64(%rsp),%r8
|
||||||
movq 72(%rsp),%r9
|
movq 72(%rsp),%r9
|
||||||
leaq OPENSSL_UplinkTable@GOTPCREL(%rip),%rax
|
leaq OPENSSL_UplinkTable(%rip),%rax
|
||||||
addq $40,%rsp
|
addq $40,%rsp
|
||||||
jmp *24(%rax)
|
jmp *24(%rax)
|
||||||
_lazy3_end:
|
_lazy3_end:
|
||||||
|
@ -70,14 +70,14 @@ _lazy4:
|
||||||
movq %rdx,56(%rsp)
|
movq %rdx,56(%rsp)
|
||||||
movq %r8,64(%rsp)
|
movq %r8,64(%rsp)
|
||||||
movq %r9,72(%rsp)
|
movq %r9,72(%rsp)
|
||||||
leaq OPENSSL_UplinkTable@GOTPCREL(%rip),%rcx
|
leaq OPENSSL_UplinkTable(%rip),%rcx
|
||||||
movq $4,%rdx
|
movq $4,%rdx
|
||||||
call OPENSSL_Uplink
|
call OPENSSL_Uplink
|
||||||
movq 48(%rsp),%rcx
|
movq 48(%rsp),%rcx
|
||||||
movq 56(%rsp),%rdx
|
movq 56(%rsp),%rdx
|
||||||
movq 64(%rsp),%r8
|
movq 64(%rsp),%r8
|
||||||
movq 72(%rsp),%r9
|
movq 72(%rsp),%r9
|
||||||
leaq OPENSSL_UplinkTable@GOTPCREL(%rip),%rax
|
leaq OPENSSL_UplinkTable(%rip),%rax
|
||||||
addq $40,%rsp
|
addq $40,%rsp
|
||||||
jmp *32(%rax)
|
jmp *32(%rax)
|
||||||
_lazy4_end:
|
_lazy4_end:
|
||||||
|
@ -90,14 +90,14 @@ _lazy5:
|
||||||
movq %rdx,56(%rsp)
|
movq %rdx,56(%rsp)
|
||||||
movq %r8,64(%rsp)
|
movq %r8,64(%rsp)
|
||||||
movq %r9,72(%rsp)
|
movq %r9,72(%rsp)
|
||||||
leaq OPENSSL_UplinkTable@GOTPCREL(%rip),%rcx
|
leaq OPENSSL_UplinkTable(%rip),%rcx
|
||||||
movq $5,%rdx
|
movq $5,%rdx
|
||||||
call OPENSSL_Uplink
|
call OPENSSL_Uplink
|
||||||
movq 48(%rsp),%rcx
|
movq 48(%rsp),%rcx
|
||||||
movq 56(%rsp),%rdx
|
movq 56(%rsp),%rdx
|
||||||
movq 64(%rsp),%r8
|
movq 64(%rsp),%r8
|
||||||
movq 72(%rsp),%r9
|
movq 72(%rsp),%r9
|
||||||
leaq OPENSSL_UplinkTable@GOTPCREL(%rip),%rax
|
leaq OPENSSL_UplinkTable(%rip),%rax
|
||||||
addq $40,%rsp
|
addq $40,%rsp
|
||||||
jmp *40(%rax)
|
jmp *40(%rax)
|
||||||
_lazy5_end:
|
_lazy5_end:
|
||||||
|
@ -110,14 +110,14 @@ _lazy6:
|
||||||
movq %rdx,56(%rsp)
|
movq %rdx,56(%rsp)
|
||||||
movq %r8,64(%rsp)
|
movq %r8,64(%rsp)
|
||||||
movq %r9,72(%rsp)
|
movq %r9,72(%rsp)
|
||||||
leaq OPENSSL_UplinkTable@GOTPCREL(%rip),%rcx
|
leaq OPENSSL_UplinkTable(%rip),%rcx
|
||||||
movq $6,%rdx
|
movq $6,%rdx
|
||||||
call OPENSSL_Uplink
|
call OPENSSL_Uplink
|
||||||
movq 48(%rsp),%rcx
|
movq 48(%rsp),%rcx
|
||||||
movq 56(%rsp),%rdx
|
movq 56(%rsp),%rdx
|
||||||
movq 64(%rsp),%r8
|
movq 64(%rsp),%r8
|
||||||
movq 72(%rsp),%r9
|
movq 72(%rsp),%r9
|
||||||
leaq OPENSSL_UplinkTable@GOTPCREL(%rip),%rax
|
leaq OPENSSL_UplinkTable(%rip),%rax
|
||||||
addq $40,%rsp
|
addq $40,%rsp
|
||||||
jmp *48(%rax)
|
jmp *48(%rax)
|
||||||
_lazy6_end:
|
_lazy6_end:
|
||||||
|
@ -130,14 +130,14 @@ _lazy7:
|
||||||
movq %rdx,56(%rsp)
|
movq %rdx,56(%rsp)
|
||||||
movq %r8,64(%rsp)
|
movq %r8,64(%rsp)
|
||||||
movq %r9,72(%rsp)
|
movq %r9,72(%rsp)
|
||||||
leaq OPENSSL_UplinkTable@GOTPCREL(%rip),%rcx
|
leaq OPENSSL_UplinkTable(%rip),%rcx
|
||||||
movq $7,%rdx
|
movq $7,%rdx
|
||||||
call OPENSSL_Uplink
|
call OPENSSL_Uplink
|
||||||
movq 48(%rsp),%rcx
|
movq 48(%rsp),%rcx
|
||||||
movq 56(%rsp),%rdx
|
movq 56(%rsp),%rdx
|
||||||
movq 64(%rsp),%r8
|
movq 64(%rsp),%r8
|
||||||
movq 72(%rsp),%r9
|
movq 72(%rsp),%r9
|
||||||
leaq OPENSSL_UplinkTable@GOTPCREL(%rip),%rax
|
leaq OPENSSL_UplinkTable(%rip),%rax
|
||||||
addq $40,%rsp
|
addq $40,%rsp
|
||||||
jmp *56(%rax)
|
jmp *56(%rax)
|
||||||
_lazy7_end:
|
_lazy7_end:
|
||||||
|
@ -150,14 +150,14 @@ _lazy8:
|
||||||
movq %rdx,56(%rsp)
|
movq %rdx,56(%rsp)
|
||||||
movq %r8,64(%rsp)
|
movq %r8,64(%rsp)
|
||||||
movq %r9,72(%rsp)
|
movq %r9,72(%rsp)
|
||||||
leaq OPENSSL_UplinkTable@GOTPCREL(%rip),%rcx
|
leaq OPENSSL_UplinkTable(%rip),%rcx
|
||||||
movq $8,%rdx
|
movq $8,%rdx
|
||||||
call OPENSSL_Uplink
|
call OPENSSL_Uplink
|
||||||
movq 48(%rsp),%rcx
|
movq 48(%rsp),%rcx
|
||||||
movq 56(%rsp),%rdx
|
movq 56(%rsp),%rdx
|
||||||
movq 64(%rsp),%r8
|
movq 64(%rsp),%r8
|
||||||
movq 72(%rsp),%r9
|
movq 72(%rsp),%r9
|
||||||
leaq OPENSSL_UplinkTable@GOTPCREL(%rip),%rax
|
leaq OPENSSL_UplinkTable(%rip),%rax
|
||||||
addq $40,%rsp
|
addq $40,%rsp
|
||||||
jmp *64(%rax)
|
jmp *64(%rax)
|
||||||
_lazy8_end:
|
_lazy8_end:
|
||||||
|
@ -170,14 +170,14 @@ _lazy9:
|
||||||
movq %rdx,56(%rsp)
|
movq %rdx,56(%rsp)
|
||||||
movq %r8,64(%rsp)
|
movq %r8,64(%rsp)
|
||||||
movq %r9,72(%rsp)
|
movq %r9,72(%rsp)
|
||||||
leaq OPENSSL_UplinkTable@GOTPCREL(%rip),%rcx
|
leaq OPENSSL_UplinkTable(%rip),%rcx
|
||||||
movq $9,%rdx
|
movq $9,%rdx
|
||||||
call OPENSSL_Uplink
|
call OPENSSL_Uplink
|
||||||
movq 48(%rsp),%rcx
|
movq 48(%rsp),%rcx
|
||||||
movq 56(%rsp),%rdx
|
movq 56(%rsp),%rdx
|
||||||
movq 64(%rsp),%r8
|
movq 64(%rsp),%r8
|
||||||
movq 72(%rsp),%r9
|
movq 72(%rsp),%r9
|
||||||
leaq OPENSSL_UplinkTable@GOTPCREL(%rip),%rax
|
leaq OPENSSL_UplinkTable(%rip),%rax
|
||||||
addq $40,%rsp
|
addq $40,%rsp
|
||||||
jmp *72(%rax)
|
jmp *72(%rax)
|
||||||
_lazy9_end:
|
_lazy9_end:
|
||||||
|
@ -190,14 +190,14 @@ _lazy10:
|
||||||
movq %rdx,56(%rsp)
|
movq %rdx,56(%rsp)
|
||||||
movq %r8,64(%rsp)
|
movq %r8,64(%rsp)
|
||||||
movq %r9,72(%rsp)
|
movq %r9,72(%rsp)
|
||||||
leaq OPENSSL_UplinkTable@GOTPCREL(%rip),%rcx
|
leaq OPENSSL_UplinkTable(%rip),%rcx
|
||||||
movq $10,%rdx
|
movq $10,%rdx
|
||||||
call OPENSSL_Uplink
|
call OPENSSL_Uplink
|
||||||
movq 48(%rsp),%rcx
|
movq 48(%rsp),%rcx
|
||||||
movq 56(%rsp),%rdx
|
movq 56(%rsp),%rdx
|
||||||
movq 64(%rsp),%r8
|
movq 64(%rsp),%r8
|
||||||
movq 72(%rsp),%r9
|
movq 72(%rsp),%r9
|
||||||
leaq OPENSSL_UplinkTable@GOTPCREL(%rip),%rax
|
leaq OPENSSL_UplinkTable(%rip),%rax
|
||||||
addq $40,%rsp
|
addq $40,%rsp
|
||||||
jmp *80(%rax)
|
jmp *80(%rax)
|
||||||
_lazy10_end:
|
_lazy10_end:
|
||||||
|
@ -210,14 +210,14 @@ _lazy11:
|
||||||
movq %rdx,56(%rsp)
|
movq %rdx,56(%rsp)
|
||||||
movq %r8,64(%rsp)
|
movq %r8,64(%rsp)
|
||||||
movq %r9,72(%rsp)
|
movq %r9,72(%rsp)
|
||||||
leaq OPENSSL_UplinkTable@GOTPCREL(%rip),%rcx
|
leaq OPENSSL_UplinkTable(%rip),%rcx
|
||||||
movq $11,%rdx
|
movq $11,%rdx
|
||||||
call OPENSSL_Uplink
|
call OPENSSL_Uplink
|
||||||
movq 48(%rsp),%rcx
|
movq 48(%rsp),%rcx
|
||||||
movq 56(%rsp),%rdx
|
movq 56(%rsp),%rdx
|
||||||
movq 64(%rsp),%r8
|
movq 64(%rsp),%r8
|
||||||
movq 72(%rsp),%r9
|
movq 72(%rsp),%r9
|
||||||
leaq OPENSSL_UplinkTable@GOTPCREL(%rip),%rax
|
leaq OPENSSL_UplinkTable(%rip),%rax
|
||||||
addq $40,%rsp
|
addq $40,%rsp
|
||||||
jmp *88(%rax)
|
jmp *88(%rax)
|
||||||
_lazy11_end:
|
_lazy11_end:
|
||||||
|
@ -230,14 +230,14 @@ _lazy12:
|
||||||
movq %rdx,56(%rsp)
|
movq %rdx,56(%rsp)
|
||||||
movq %r8,64(%rsp)
|
movq %r8,64(%rsp)
|
||||||
movq %r9,72(%rsp)
|
movq %r9,72(%rsp)
|
||||||
leaq OPENSSL_UplinkTable@GOTPCREL(%rip),%rcx
|
leaq OPENSSL_UplinkTable(%rip),%rcx
|
||||||
movq $12,%rdx
|
movq $12,%rdx
|
||||||
call OPENSSL_Uplink
|
call OPENSSL_Uplink
|
||||||
movq 48(%rsp),%rcx
|
movq 48(%rsp),%rcx
|
||||||
movq 56(%rsp),%rdx
|
movq 56(%rsp),%rdx
|
||||||
movq 64(%rsp),%r8
|
movq 64(%rsp),%r8
|
||||||
movq 72(%rsp),%r9
|
movq 72(%rsp),%r9
|
||||||
leaq OPENSSL_UplinkTable@GOTPCREL(%rip),%rax
|
leaq OPENSSL_UplinkTable(%rip),%rax
|
||||||
addq $40,%rsp
|
addq $40,%rsp
|
||||||
jmp *96(%rax)
|
jmp *96(%rax)
|
||||||
_lazy12_end:
|
_lazy12_end:
|
||||||
|
@ -250,14 +250,14 @@ _lazy13:
|
||||||
movq %rdx,56(%rsp)
|
movq %rdx,56(%rsp)
|
||||||
movq %r8,64(%rsp)
|
movq %r8,64(%rsp)
|
||||||
movq %r9,72(%rsp)
|
movq %r9,72(%rsp)
|
||||||
leaq OPENSSL_UplinkTable@GOTPCREL(%rip),%rcx
|
leaq OPENSSL_UplinkTable(%rip),%rcx
|
||||||
movq $13,%rdx
|
movq $13,%rdx
|
||||||
call OPENSSL_Uplink
|
call OPENSSL_Uplink
|
||||||
movq 48(%rsp),%rcx
|
movq 48(%rsp),%rcx
|
||||||
movq 56(%rsp),%rdx
|
movq 56(%rsp),%rdx
|
||||||
movq 64(%rsp),%r8
|
movq 64(%rsp),%r8
|
||||||
movq 72(%rsp),%r9
|
movq 72(%rsp),%r9
|
||||||
leaq OPENSSL_UplinkTable@GOTPCREL(%rip),%rax
|
leaq OPENSSL_UplinkTable(%rip),%rax
|
||||||
addq $40,%rsp
|
addq $40,%rsp
|
||||||
jmp *104(%rax)
|
jmp *104(%rax)
|
||||||
_lazy13_end:
|
_lazy13_end:
|
||||||
|
@ -270,14 +270,14 @@ _lazy14:
|
||||||
movq %rdx,56(%rsp)
|
movq %rdx,56(%rsp)
|
||||||
movq %r8,64(%rsp)
|
movq %r8,64(%rsp)
|
||||||
movq %r9,72(%rsp)
|
movq %r9,72(%rsp)
|
||||||
leaq OPENSSL_UplinkTable@GOTPCREL(%rip),%rcx
|
leaq OPENSSL_UplinkTable(%rip),%rcx
|
||||||
movq $14,%rdx
|
movq $14,%rdx
|
||||||
call OPENSSL_Uplink
|
call OPENSSL_Uplink
|
||||||
movq 48(%rsp),%rcx
|
movq 48(%rsp),%rcx
|
||||||
movq 56(%rsp),%rdx
|
movq 56(%rsp),%rdx
|
||||||
movq 64(%rsp),%r8
|
movq 64(%rsp),%r8
|
||||||
movq 72(%rsp),%r9
|
movq 72(%rsp),%r9
|
||||||
leaq OPENSSL_UplinkTable@GOTPCREL(%rip),%rax
|
leaq OPENSSL_UplinkTable(%rip),%rax
|
||||||
addq $40,%rsp
|
addq $40,%rsp
|
||||||
jmp *112(%rax)
|
jmp *112(%rax)
|
||||||
_lazy14_end:
|
_lazy14_end:
|
||||||
|
@ -290,14 +290,14 @@ _lazy15:
|
||||||
movq %rdx,56(%rsp)
|
movq %rdx,56(%rsp)
|
||||||
movq %r8,64(%rsp)
|
movq %r8,64(%rsp)
|
||||||
movq %r9,72(%rsp)
|
movq %r9,72(%rsp)
|
||||||
leaq OPENSSL_UplinkTable@GOTPCREL(%rip),%rcx
|
leaq OPENSSL_UplinkTable(%rip),%rcx
|
||||||
movq $15,%rdx
|
movq $15,%rdx
|
||||||
call OPENSSL_Uplink
|
call OPENSSL_Uplink
|
||||||
movq 48(%rsp),%rcx
|
movq 48(%rsp),%rcx
|
||||||
movq 56(%rsp),%rdx
|
movq 56(%rsp),%rdx
|
||||||
movq 64(%rsp),%r8
|
movq 64(%rsp),%r8
|
||||||
movq 72(%rsp),%r9
|
movq 72(%rsp),%r9
|
||||||
leaq OPENSSL_UplinkTable@GOTPCREL(%rip),%rax
|
leaq OPENSSL_UplinkTable(%rip),%rax
|
||||||
addq $40,%rsp
|
addq $40,%rsp
|
||||||
jmp *120(%rax)
|
jmp *120(%rax)
|
||||||
_lazy15_end:
|
_lazy15_end:
|
||||||
|
@ -310,14 +310,14 @@ _lazy16:
|
||||||
movq %rdx,56(%rsp)
|
movq %rdx,56(%rsp)
|
||||||
movq %r8,64(%rsp)
|
movq %r8,64(%rsp)
|
||||||
movq %r9,72(%rsp)
|
movq %r9,72(%rsp)
|
||||||
leaq OPENSSL_UplinkTable@GOTPCREL(%rip),%rcx
|
leaq OPENSSL_UplinkTable(%rip),%rcx
|
||||||
movq $16,%rdx
|
movq $16,%rdx
|
||||||
call OPENSSL_Uplink
|
call OPENSSL_Uplink
|
||||||
movq 48(%rsp),%rcx
|
movq 48(%rsp),%rcx
|
||||||
movq 56(%rsp),%rdx
|
movq 56(%rsp),%rdx
|
||||||
movq 64(%rsp),%r8
|
movq 64(%rsp),%r8
|
||||||
movq 72(%rsp),%r9
|
movq 72(%rsp),%r9
|
||||||
leaq OPENSSL_UplinkTable@GOTPCREL(%rip),%rax
|
leaq OPENSSL_UplinkTable(%rip),%rax
|
||||||
addq $40,%rsp
|
addq $40,%rsp
|
||||||
jmp *128(%rax)
|
jmp *128(%rax)
|
||||||
_lazy16_end:
|
_lazy16_end:
|
||||||
|
@ -330,14 +330,14 @@ _lazy17:
|
||||||
movq %rdx,56(%rsp)
|
movq %rdx,56(%rsp)
|
||||||
movq %r8,64(%rsp)
|
movq %r8,64(%rsp)
|
||||||
movq %r9,72(%rsp)
|
movq %r9,72(%rsp)
|
||||||
leaq OPENSSL_UplinkTable@GOTPCREL(%rip),%rcx
|
leaq OPENSSL_UplinkTable(%rip),%rcx
|
||||||
movq $17,%rdx
|
movq $17,%rdx
|
||||||
call OPENSSL_Uplink
|
call OPENSSL_Uplink
|
||||||
movq 48(%rsp),%rcx
|
movq 48(%rsp),%rcx
|
||||||
movq 56(%rsp),%rdx
|
movq 56(%rsp),%rdx
|
||||||
movq 64(%rsp),%r8
|
movq 64(%rsp),%r8
|
||||||
movq 72(%rsp),%r9
|
movq 72(%rsp),%r9
|
||||||
leaq OPENSSL_UplinkTable@GOTPCREL(%rip),%rax
|
leaq OPENSSL_UplinkTable(%rip),%rax
|
||||||
addq $40,%rsp
|
addq $40,%rsp
|
||||||
jmp *136(%rax)
|
jmp *136(%rax)
|
||||||
_lazy17_end:
|
_lazy17_end:
|
||||||
|
@ -350,14 +350,14 @@ _lazy18:
|
||||||
movq %rdx,56(%rsp)
|
movq %rdx,56(%rsp)
|
||||||
movq %r8,64(%rsp)
|
movq %r8,64(%rsp)
|
||||||
movq %r9,72(%rsp)
|
movq %r9,72(%rsp)
|
||||||
leaq OPENSSL_UplinkTable@GOTPCREL(%rip),%rcx
|
leaq OPENSSL_UplinkTable(%rip),%rcx
|
||||||
movq $18,%rdx
|
movq $18,%rdx
|
||||||
call OPENSSL_Uplink
|
call OPENSSL_Uplink
|
||||||
movq 48(%rsp),%rcx
|
movq 48(%rsp),%rcx
|
||||||
movq 56(%rsp),%rdx
|
movq 56(%rsp),%rdx
|
||||||
movq 64(%rsp),%r8
|
movq 64(%rsp),%r8
|
||||||
movq 72(%rsp),%r9
|
movq 72(%rsp),%r9
|
||||||
leaq OPENSSL_UplinkTable@GOTPCREL(%rip),%rax
|
leaq OPENSSL_UplinkTable(%rip),%rax
|
||||||
addq $40,%rsp
|
addq $40,%rsp
|
||||||
jmp *144(%rax)
|
jmp *144(%rax)
|
||||||
_lazy18_end:
|
_lazy18_end:
|
||||||
|
@ -370,14 +370,14 @@ _lazy19:
|
||||||
movq %rdx,56(%rsp)
|
movq %rdx,56(%rsp)
|
||||||
movq %r8,64(%rsp)
|
movq %r8,64(%rsp)
|
||||||
movq %r9,72(%rsp)
|
movq %r9,72(%rsp)
|
||||||
leaq OPENSSL_UplinkTable@GOTPCREL(%rip),%rcx
|
leaq OPENSSL_UplinkTable(%rip),%rcx
|
||||||
movq $19,%rdx
|
movq $19,%rdx
|
||||||
call OPENSSL_Uplink
|
call OPENSSL_Uplink
|
||||||
movq 48(%rsp),%rcx
|
movq 48(%rsp),%rcx
|
||||||
movq 56(%rsp),%rdx
|
movq 56(%rsp),%rdx
|
||||||
movq 64(%rsp),%r8
|
movq 64(%rsp),%r8
|
||||||
movq 72(%rsp),%r9
|
movq 72(%rsp),%r9
|
||||||
leaq OPENSSL_UplinkTable@GOTPCREL(%rip),%rax
|
leaq OPENSSL_UplinkTable(%rip),%rax
|
||||||
addq $40,%rsp
|
addq $40,%rsp
|
||||||
jmp *152(%rax)
|
jmp *152(%rax)
|
||||||
_lazy19_end:
|
_lazy19_end:
|
||||||
|
@ -390,14 +390,14 @@ _lazy20:
|
||||||
movq %rdx,56(%rsp)
|
movq %rdx,56(%rsp)
|
||||||
movq %r8,64(%rsp)
|
movq %r8,64(%rsp)
|
||||||
movq %r9,72(%rsp)
|
movq %r9,72(%rsp)
|
||||||
leaq OPENSSL_UplinkTable@GOTPCREL(%rip),%rcx
|
leaq OPENSSL_UplinkTable(%rip),%rcx
|
||||||
movq $20,%rdx
|
movq $20,%rdx
|
||||||
call OPENSSL_Uplink
|
call OPENSSL_Uplink
|
||||||
movq 48(%rsp),%rcx
|
movq 48(%rsp),%rcx
|
||||||
movq 56(%rsp),%rdx
|
movq 56(%rsp),%rdx
|
||||||
movq 64(%rsp),%r8
|
movq 64(%rsp),%r8
|
||||||
movq 72(%rsp),%r9
|
movq 72(%rsp),%r9
|
||||||
leaq OPENSSL_UplinkTable@GOTPCREL(%rip),%rax
|
leaq OPENSSL_UplinkTable(%rip),%rax
|
||||||
addq $40,%rsp
|
addq $40,%rsp
|
||||||
jmp *160(%rax)
|
jmp *160(%rax)
|
||||||
_lazy20_end:
|
_lazy20_end:
|
||||||
|
@ -410,14 +410,14 @@ _lazy21:
|
||||||
movq %rdx,56(%rsp)
|
movq %rdx,56(%rsp)
|
||||||
movq %r8,64(%rsp)
|
movq %r8,64(%rsp)
|
||||||
movq %r9,72(%rsp)
|
movq %r9,72(%rsp)
|
||||||
leaq OPENSSL_UplinkTable@GOTPCREL(%rip),%rcx
|
leaq OPENSSL_UplinkTable(%rip),%rcx
|
||||||
movq $21,%rdx
|
movq $21,%rdx
|
||||||
call OPENSSL_Uplink
|
call OPENSSL_Uplink
|
||||||
movq 48(%rsp),%rcx
|
movq 48(%rsp),%rcx
|
||||||
movq 56(%rsp),%rdx
|
movq 56(%rsp),%rdx
|
||||||
movq 64(%rsp),%r8
|
movq 64(%rsp),%r8
|
||||||
movq 72(%rsp),%r9
|
movq 72(%rsp),%r9
|
||||||
leaq OPENSSL_UplinkTable@GOTPCREL(%rip),%rax
|
leaq OPENSSL_UplinkTable(%rip),%rax
|
||||||
addq $40,%rsp
|
addq $40,%rsp
|
||||||
jmp *168(%rax)
|
jmp *168(%rax)
|
||||||
_lazy21_end:
|
_lazy21_end:
|
||||||
|
@ -430,14 +430,14 @@ _lazy22:
|
||||||
movq %rdx,56(%rsp)
|
movq %rdx,56(%rsp)
|
||||||
movq %r8,64(%rsp)
|
movq %r8,64(%rsp)
|
||||||
movq %r9,72(%rsp)
|
movq %r9,72(%rsp)
|
||||||
leaq OPENSSL_UplinkTable@GOTPCREL(%rip),%rcx
|
leaq OPENSSL_UplinkTable(%rip),%rcx
|
||||||
movq $22,%rdx
|
movq $22,%rdx
|
||||||
call OPENSSL_Uplink
|
call OPENSSL_Uplink
|
||||||
movq 48(%rsp),%rcx
|
movq 48(%rsp),%rcx
|
||||||
movq 56(%rsp),%rdx
|
movq 56(%rsp),%rdx
|
||||||
movq 64(%rsp),%r8
|
movq 64(%rsp),%r8
|
||||||
movq 72(%rsp),%r9
|
movq 72(%rsp),%r9
|
||||||
leaq OPENSSL_UplinkTable@GOTPCREL(%rip),%rax
|
leaq OPENSSL_UplinkTable(%rip),%rax
|
||||||
addq $40,%rsp
|
addq $40,%rsp
|
||||||
jmp *176(%rax)
|
jmp *176(%rax)
|
||||||
_lazy22_end:
|
_lazy22_end:
|
||||||
|
|
|
@ -202,8 +202,9 @@ _mul_1x1:
|
||||||
.type bn_GF2m_mul_2x2,@function
|
.type bn_GF2m_mul_2x2,@function
|
||||||
.align 16
|
.align 16
|
||||||
bn_GF2m_mul_2x2:
|
bn_GF2m_mul_2x2:
|
||||||
movq OPENSSL_ia32cap_P@GOTPCREL(%rip),%rax
|
movq OPENSSL_ia32cap_P+4(%rip),%rax
|
||||||
btq $33,%rax
|
btq $1,%rax
|
||||||
|
|
||||||
jnc .Lvanilla_mul_2x2
|
jnc .Lvanilla_mul_2x2
|
||||||
|
|
||||||
.byte 102,72,15,110,198
|
.byte 102,72,15,110,198
|
||||||
|
|
|
@ -13,6 +13,7 @@ bn_mul_mont:
|
||||||
jnz .Lmul_enter
|
jnz .Lmul_enter
|
||||||
cmpl $8,%r9d
|
cmpl $8,%r9d
|
||||||
jb .Lmul_enter
|
jb .Lmul_enter
|
||||||
|
movl OPENSSL_ia32cap_P+8(%rip),%r11d
|
||||||
cmpq %rsi,%rdx
|
cmpq %rsi,%rdx
|
||||||
jne .Lmul4x_enter
|
jne .Lmul4x_enter
|
||||||
testl $7,%r9d
|
testl $7,%r9d
|
||||||
|
@ -241,6 +242,9 @@ bn_mul4x_mont:
|
||||||
movl %r9d,%r9d
|
movl %r9d,%r9d
|
||||||
movq %rsp,%rax
|
movq %rsp,%rax
|
||||||
.Lmul4x_enter:
|
.Lmul4x_enter:
|
||||||
|
andl $0x80100,%r11d
|
||||||
|
cmpl $0x80100,%r11d
|
||||||
|
je .Lmulx4x_enter
|
||||||
pushq %rbx
|
pushq %rbx
|
||||||
pushq %rbp
|
pushq %rbp
|
||||||
pushq %r12
|
pushq %r12
|
||||||
|
@ -651,6 +655,7 @@ bn_mul4x_mont:
|
||||||
.size bn_mul4x_mont,.-bn_mul4x_mont
|
.size bn_mul4x_mont,.-bn_mul4x_mont
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
.type bn_sqr8x_mont,@function
|
.type bn_sqr8x_mont,@function
|
||||||
.align 32
|
.align 32
|
||||||
bn_sqr8x_mont:
|
bn_sqr8x_mont:
|
||||||
|
@ -723,6 +728,25 @@ bn_sqr8x_mont:
|
||||||
pxor %xmm0,%xmm0
|
pxor %xmm0,%xmm0
|
||||||
.byte 102,72,15,110,207
|
.byte 102,72,15,110,207
|
||||||
.byte 102,73,15,110,218
|
.byte 102,73,15,110,218
|
||||||
|
movl OPENSSL_ia32cap_P+8(%rip),%eax
|
||||||
|
andl $0x80100,%eax
|
||||||
|
cmpl $0x80100,%eax
|
||||||
|
jne .Lsqr8x_nox
|
||||||
|
|
||||||
|
call bn_sqrx8x_internal
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
leaq (%r8,%rcx,1),%rbx
|
||||||
|
movq %rcx,%r9
|
||||||
|
movq %rcx,%rdx
|
||||||
|
.byte 102,72,15,126,207
|
||||||
|
sarq $3+2,%rcx
|
||||||
|
jmp .Lsqr8x_sub
|
||||||
|
|
||||||
|
.align 32
|
||||||
|
.Lsqr8x_nox:
|
||||||
call bn_sqr8x_internal
|
call bn_sqr8x_internal
|
||||||
|
|
||||||
|
|
||||||
|
@ -801,5 +825,344 @@ bn_sqr8x_mont:
|
||||||
.Lsqr8x_epilogue:
|
.Lsqr8x_epilogue:
|
||||||
.byte 0xf3,0xc3
|
.byte 0xf3,0xc3
|
||||||
.size bn_sqr8x_mont,.-bn_sqr8x_mont
|
.size bn_sqr8x_mont,.-bn_sqr8x_mont
|
||||||
|
.type bn_mulx4x_mont,@function
|
||||||
|
.align 32
|
||||||
|
bn_mulx4x_mont:
|
||||||
|
movq %rsp,%rax
|
||||||
|
.Lmulx4x_enter:
|
||||||
|
pushq %rbx
|
||||||
|
pushq %rbp
|
||||||
|
pushq %r12
|
||||||
|
pushq %r13
|
||||||
|
pushq %r14
|
||||||
|
pushq %r15
|
||||||
|
.Lmulx4x_prologue:
|
||||||
|
|
||||||
|
shll $3,%r9d
|
||||||
|
xorq %r10,%r10
|
||||||
|
subq %r9,%r10
|
||||||
|
movq (%r8),%r8
|
||||||
|
leaq -72(%rsp,%r10,1),%rbp
|
||||||
|
andq $-128,%rbp
|
||||||
|
movq %rsp,%r11
|
||||||
|
subq %rbp,%r11
|
||||||
|
andq $-4096,%r11
|
||||||
|
leaq (%r11,%rbp,1),%rsp
|
||||||
|
movq (%rsp),%r10
|
||||||
|
cmpq %rbp,%rsp
|
||||||
|
ja .Lmulx4x_page_walk
|
||||||
|
jmp .Lmulx4x_page_walk_done
|
||||||
|
|
||||||
|
.align 16
|
||||||
|
.Lmulx4x_page_walk:
|
||||||
|
leaq -4096(%rsp),%rsp
|
||||||
|
movq (%rsp),%r10
|
||||||
|
cmpq %rbp,%rsp
|
||||||
|
ja .Lmulx4x_page_walk
|
||||||
|
.Lmulx4x_page_walk_done:
|
||||||
|
|
||||||
|
leaq (%rdx,%r9,1),%r10
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
movq %r9,0(%rsp)
|
||||||
|
shrq $5,%r9
|
||||||
|
movq %r10,16(%rsp)
|
||||||
|
subq $1,%r9
|
||||||
|
movq %r8,24(%rsp)
|
||||||
|
movq %rdi,32(%rsp)
|
||||||
|
movq %rax,40(%rsp)
|
||||||
|
movq %r9,48(%rsp)
|
||||||
|
jmp .Lmulx4x_body
|
||||||
|
|
||||||
|
.align 32
|
||||||
|
.Lmulx4x_body:
|
||||||
|
leaq 8(%rdx),%rdi
|
||||||
|
movq (%rdx),%rdx
|
||||||
|
leaq 64+32(%rsp),%rbx
|
||||||
|
movq %rdx,%r9
|
||||||
|
|
||||||
|
mulxq 0(%rsi),%r8,%rax
|
||||||
|
mulxq 8(%rsi),%r11,%r14
|
||||||
|
addq %rax,%r11
|
||||||
|
movq %rdi,8(%rsp)
|
||||||
|
mulxq 16(%rsi),%r12,%r13
|
||||||
|
adcq %r14,%r12
|
||||||
|
adcq $0,%r13
|
||||||
|
|
||||||
|
movq %r8,%rdi
|
||||||
|
imulq 24(%rsp),%r8
|
||||||
|
xorq %rbp,%rbp
|
||||||
|
|
||||||
|
mulxq 24(%rsi),%rax,%r14
|
||||||
|
movq %r8,%rdx
|
||||||
|
leaq 32(%rsi),%rsi
|
||||||
|
adcxq %rax,%r13
|
||||||
|
adcxq %rbp,%r14
|
||||||
|
|
||||||
|
mulxq 0(%rcx),%rax,%r10
|
||||||
|
adcxq %rax,%rdi
|
||||||
|
adoxq %r11,%r10
|
||||||
|
mulxq 8(%rcx),%rax,%r11
|
||||||
|
adcxq %rax,%r10
|
||||||
|
adoxq %r12,%r11
|
||||||
|
.byte 0xc4,0x62,0xfb,0xf6,0xa1,0x10,0x00,0x00,0x00
|
||||||
|
movq 48(%rsp),%rdi
|
||||||
|
movq %r10,-32(%rbx)
|
||||||
|
adcxq %rax,%r11
|
||||||
|
adoxq %r13,%r12
|
||||||
|
mulxq 24(%rcx),%rax,%r15
|
||||||
|
movq %r9,%rdx
|
||||||
|
movq %r11,-24(%rbx)
|
||||||
|
adcxq %rax,%r12
|
||||||
|
adoxq %rbp,%r15
|
||||||
|
leaq 32(%rcx),%rcx
|
||||||
|
movq %r12,-16(%rbx)
|
||||||
|
|
||||||
|
jmp .Lmulx4x_1st
|
||||||
|
|
||||||
|
.align 32
|
||||||
|
.Lmulx4x_1st:
|
||||||
|
adcxq %rbp,%r15
|
||||||
|
mulxq 0(%rsi),%r10,%rax
|
||||||
|
adcxq %r14,%r10
|
||||||
|
mulxq 8(%rsi),%r11,%r14
|
||||||
|
adcxq %rax,%r11
|
||||||
|
mulxq 16(%rsi),%r12,%rax
|
||||||
|
adcxq %r14,%r12
|
||||||
|
mulxq 24(%rsi),%r13,%r14
|
||||||
|
.byte 0x67,0x67
|
||||||
|
movq %r8,%rdx
|
||||||
|
adcxq %rax,%r13
|
||||||
|
adcxq %rbp,%r14
|
||||||
|
leaq 32(%rsi),%rsi
|
||||||
|
leaq 32(%rbx),%rbx
|
||||||
|
|
||||||
|
adoxq %r15,%r10
|
||||||
|
mulxq 0(%rcx),%rax,%r15
|
||||||
|
adcxq %rax,%r10
|
||||||
|
adoxq %r15,%r11
|
||||||
|
mulxq 8(%rcx),%rax,%r15
|
||||||
|
adcxq %rax,%r11
|
||||||
|
adoxq %r15,%r12
|
||||||
|
mulxq 16(%rcx),%rax,%r15
|
||||||
|
movq %r10,-40(%rbx)
|
||||||
|
adcxq %rax,%r12
|
||||||
|
movq %r11,-32(%rbx)
|
||||||
|
adoxq %r15,%r13
|
||||||
|
mulxq 24(%rcx),%rax,%r15
|
||||||
|
movq %r9,%rdx
|
||||||
|
movq %r12,-24(%rbx)
|
||||||
|
adcxq %rax,%r13
|
||||||
|
adoxq %rbp,%r15
|
||||||
|
leaq 32(%rcx),%rcx
|
||||||
|
movq %r13,-16(%rbx)
|
||||||
|
|
||||||
|
decq %rdi
|
||||||
|
jnz .Lmulx4x_1st
|
||||||
|
|
||||||
|
movq 0(%rsp),%rax
|
||||||
|
movq 8(%rsp),%rdi
|
||||||
|
adcq %rbp,%r15
|
||||||
|
addq %r15,%r14
|
||||||
|
sbbq %r15,%r15
|
||||||
|
movq %r14,-8(%rbx)
|
||||||
|
jmp .Lmulx4x_outer
|
||||||
|
|
||||||
|
.align 32
|
||||||
|
.Lmulx4x_outer:
|
||||||
|
movq (%rdi),%rdx
|
||||||
|
leaq 8(%rdi),%rdi
|
||||||
|
subq %rax,%rsi
|
||||||
|
movq %r15,(%rbx)
|
||||||
|
leaq 64+32(%rsp),%rbx
|
||||||
|
subq %rax,%rcx
|
||||||
|
|
||||||
|
mulxq 0(%rsi),%r8,%r11
|
||||||
|
xorl %ebp,%ebp
|
||||||
|
movq %rdx,%r9
|
||||||
|
mulxq 8(%rsi),%r14,%r12
|
||||||
|
adoxq -32(%rbx),%r8
|
||||||
|
adcxq %r14,%r11
|
||||||
|
mulxq 16(%rsi),%r15,%r13
|
||||||
|
adoxq -24(%rbx),%r11
|
||||||
|
adcxq %r15,%r12
|
||||||
|
adoxq %rbp,%r12
|
||||||
|
adcxq %rbp,%r13
|
||||||
|
|
||||||
|
movq %rdi,8(%rsp)
|
||||||
|
.byte 0x67
|
||||||
|
movq %r8,%r15
|
||||||
|
imulq 24(%rsp),%r8
|
||||||
|
xorl %ebp,%ebp
|
||||||
|
|
||||||
|
mulxq 24(%rsi),%rax,%r14
|
||||||
|
movq %r8,%rdx
|
||||||
|
adoxq -16(%rbx),%r12
|
||||||
|
adcxq %rax,%r13
|
||||||
|
adoxq -8(%rbx),%r13
|
||||||
|
adcxq %rbp,%r14
|
||||||
|
leaq 32(%rsi),%rsi
|
||||||
|
adoxq %rbp,%r14
|
||||||
|
|
||||||
|
mulxq 0(%rcx),%rax,%r10
|
||||||
|
adcxq %rax,%r15
|
||||||
|
adoxq %r11,%r10
|
||||||
|
mulxq 8(%rcx),%rax,%r11
|
||||||
|
adcxq %rax,%r10
|
||||||
|
adoxq %r12,%r11
|
||||||
|
mulxq 16(%rcx),%rax,%r12
|
||||||
|
movq %r10,-32(%rbx)
|
||||||
|
adcxq %rax,%r11
|
||||||
|
adoxq %r13,%r12
|
||||||
|
mulxq 24(%rcx),%rax,%r15
|
||||||
|
movq %r9,%rdx
|
||||||
|
movq %r11,-24(%rbx)
|
||||||
|
leaq 32(%rcx),%rcx
|
||||||
|
adcxq %rax,%r12
|
||||||
|
adoxq %rbp,%r15
|
||||||
|
movq 48(%rsp),%rdi
|
||||||
|
movq %r12,-16(%rbx)
|
||||||
|
|
||||||
|
jmp .Lmulx4x_inner
|
||||||
|
|
||||||
|
.align 32
|
||||||
|
.Lmulx4x_inner:
|
||||||
|
mulxq 0(%rsi),%r10,%rax
|
||||||
|
adcxq %rbp,%r15
|
||||||
|
adoxq %r14,%r10
|
||||||
|
mulxq 8(%rsi),%r11,%r14
|
||||||
|
adcxq 0(%rbx),%r10
|
||||||
|
adoxq %rax,%r11
|
||||||
|
mulxq 16(%rsi),%r12,%rax
|
||||||
|
adcxq 8(%rbx),%r11
|
||||||
|
adoxq %r14,%r12
|
||||||
|
mulxq 24(%rsi),%r13,%r14
|
||||||
|
movq %r8,%rdx
|
||||||
|
adcxq 16(%rbx),%r12
|
||||||
|
adoxq %rax,%r13
|
||||||
|
adcxq 24(%rbx),%r13
|
||||||
|
adoxq %rbp,%r14
|
||||||
|
leaq 32(%rsi),%rsi
|
||||||
|
leaq 32(%rbx),%rbx
|
||||||
|
adcxq %rbp,%r14
|
||||||
|
|
||||||
|
adoxq %r15,%r10
|
||||||
|
mulxq 0(%rcx),%rax,%r15
|
||||||
|
adcxq %rax,%r10
|
||||||
|
adoxq %r15,%r11
|
||||||
|
mulxq 8(%rcx),%rax,%r15
|
||||||
|
adcxq %rax,%r11
|
||||||
|
adoxq %r15,%r12
|
||||||
|
mulxq 16(%rcx),%rax,%r15
|
||||||
|
movq %r10,-40(%rbx)
|
||||||
|
adcxq %rax,%r12
|
||||||
|
adoxq %r15,%r13
|
||||||
|
mulxq 24(%rcx),%rax,%r15
|
||||||
|
movq %r9,%rdx
|
||||||
|
movq %r11,-32(%rbx)
|
||||||
|
movq %r12,-24(%rbx)
|
||||||
|
adcxq %rax,%r13
|
||||||
|
adoxq %rbp,%r15
|
||||||
|
leaq 32(%rcx),%rcx
|
||||||
|
movq %r13,-16(%rbx)
|
||||||
|
|
||||||
|
decq %rdi
|
||||||
|
jnz .Lmulx4x_inner
|
||||||
|
|
||||||
|
movq 0(%rsp),%rax
|
||||||
|
movq 8(%rsp),%rdi
|
||||||
|
adcq %rbp,%r15
|
||||||
|
subq 0(%rbx),%rbp
|
||||||
|
adcq %r15,%r14
|
||||||
|
sbbq %r15,%r15
|
||||||
|
movq %r14,-8(%rbx)
|
||||||
|
|
||||||
|
cmpq 16(%rsp),%rdi
|
||||||
|
jne .Lmulx4x_outer
|
||||||
|
|
||||||
|
leaq 64(%rsp),%rbx
|
||||||
|
subq %rax,%rcx
|
||||||
|
negq %r15
|
||||||
|
movq %rax,%rdx
|
||||||
|
shrq $3+2,%rax
|
||||||
|
movq 32(%rsp),%rdi
|
||||||
|
jmp .Lmulx4x_sub
|
||||||
|
|
||||||
|
.align 32
|
||||||
|
.Lmulx4x_sub:
|
||||||
|
movq 0(%rbx),%r11
|
||||||
|
movq 8(%rbx),%r12
|
||||||
|
movq 16(%rbx),%r13
|
||||||
|
movq 24(%rbx),%r14
|
||||||
|
leaq 32(%rbx),%rbx
|
||||||
|
sbbq 0(%rcx),%r11
|
||||||
|
sbbq 8(%rcx),%r12
|
||||||
|
sbbq 16(%rcx),%r13
|
||||||
|
sbbq 24(%rcx),%r14
|
||||||
|
leaq 32(%rcx),%rcx
|
||||||
|
movq %r11,0(%rdi)
|
||||||
|
movq %r12,8(%rdi)
|
||||||
|
movq %r13,16(%rdi)
|
||||||
|
movq %r14,24(%rdi)
|
||||||
|
leaq 32(%rdi),%rdi
|
||||||
|
decq %rax
|
||||||
|
jnz .Lmulx4x_sub
|
||||||
|
|
||||||
|
sbbq $0,%r15
|
||||||
|
leaq 64(%rsp),%rbx
|
||||||
|
subq %rdx,%rdi
|
||||||
|
|
||||||
|
.byte 102,73,15,110,207
|
||||||
|
pxor %xmm0,%xmm0
|
||||||
|
pshufd $0,%xmm1,%xmm1
|
||||||
|
movq 40(%rsp),%rsi
|
||||||
|
jmp .Lmulx4x_cond_copy
|
||||||
|
|
||||||
|
.align 32
|
||||||
|
.Lmulx4x_cond_copy:
|
||||||
|
movdqa 0(%rbx),%xmm2
|
||||||
|
movdqa 16(%rbx),%xmm3
|
||||||
|
leaq 32(%rbx),%rbx
|
||||||
|
movdqu 0(%rdi),%xmm4
|
||||||
|
movdqu 16(%rdi),%xmm5
|
||||||
|
leaq 32(%rdi),%rdi
|
||||||
|
movdqa %xmm0,-32(%rbx)
|
||||||
|
movdqa %xmm0,-16(%rbx)
|
||||||
|
pcmpeqd %xmm1,%xmm0
|
||||||
|
pand %xmm1,%xmm2
|
||||||
|
pand %xmm1,%xmm3
|
||||||
|
pand %xmm0,%xmm4
|
||||||
|
pand %xmm0,%xmm5
|
||||||
|
pxor %xmm0,%xmm0
|
||||||
|
por %xmm2,%xmm4
|
||||||
|
por %xmm3,%xmm5
|
||||||
|
movdqu %xmm4,-32(%rdi)
|
||||||
|
movdqu %xmm5,-16(%rdi)
|
||||||
|
subq $32,%rdx
|
||||||
|
jnz .Lmulx4x_cond_copy
|
||||||
|
|
||||||
|
movq %rdx,(%rbx)
|
||||||
|
|
||||||
|
movq $1,%rax
|
||||||
|
movq -48(%rsi),%r15
|
||||||
|
movq -40(%rsi),%r14
|
||||||
|
movq -32(%rsi),%r13
|
||||||
|
movq -24(%rsi),%r12
|
||||||
|
movq -16(%rsi),%rbp
|
||||||
|
movq -8(%rsi),%rbx
|
||||||
|
leaq (%rsi),%rsp
|
||||||
|
.Lmulx4x_epilogue:
|
||||||
|
.byte 0xf3,0xc3
|
||||||
|
.size bn_mulx4x_mont,.-bn_mulx4x_mont
|
||||||
.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||||
.align 16
|
.align 16
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue