Find CPU-specific variants of the long number routines. Regenerate.
This commit is contained in:
parent
df78cf4a7d
commit
d555156c33
@ -1,4 +1,4 @@
|
||||
# $NetBSD: Makefile,v 1.7 2015/05/16 19:08:37 joerg Exp $
|
||||
# $NetBSD: Makefile,v 1.8 2015/05/16 22:23:31 joerg Exp $
|
||||
|
||||
.include "bsd.own.mk"
|
||||
|
||||
@ -11,6 +11,7 @@ CC+= -fno-integrated-as
|
||||
|
||||
regen:
|
||||
for i in $$(find ${OPENSSLSRC} -name \*${MACHINE_ARCH}.pl) \
|
||||
$$(find ${OPENSSLSRC}/crypto/bn/asm -name ${MACHINE_ARCH}-\*.pl) \
|
||||
${OPENSSLSRC}/crypto/${MACHINE_ARCH}cpuid.pl ; do \
|
||||
(echo "#include <machine/asm.h>"; CC=${CC:Q} perl $$i elf | sed \
|
||||
-e 's/\(OPENSSL[A-Za-z0-9_+]*\)(%rip)/\1@GOTPCREL(%rip)/' \
|
||||
|
292
crypto/external/bsd/openssl/lib/libcrypto/arch/x86_64/x86_64-gf2m.S
vendored
Normal file
292
crypto/external/bsd/openssl/lib/libcrypto/arch/x86_64/x86_64-gf2m.S
vendored
Normal file
@ -0,0 +1,292 @@
|
||||
#include <machine/asm.h>
|
||||
.text
|
||||
|
||||
.type _mul_1x1,@function
|
||||
.align 16
|
||||
_mul_1x1:
|
||||
subq $128+8,%rsp
|
||||
movq $-1,%r9
|
||||
leaq (%rax,%rax,1),%rsi
|
||||
shrq $3,%r9
|
||||
leaq (,%rax,4),%rdi
|
||||
andq %rax,%r9
|
||||
leaq (,%rax,8),%r12
|
||||
sarq $63,%rax
|
||||
leaq (%r9,%r9,1),%r10
|
||||
sarq $63,%rsi
|
||||
leaq (,%r9,4),%r11
|
||||
andq %rbp,%rax
|
||||
sarq $63,%rdi
|
||||
movq %rax,%rdx
|
||||
shlq $63,%rax
|
||||
andq %rbp,%rsi
|
||||
shrq $1,%rdx
|
||||
movq %rsi,%rcx
|
||||
shlq $62,%rsi
|
||||
andq %rbp,%rdi
|
||||
shrq $2,%rcx
|
||||
xorq %rsi,%rax
|
||||
movq %rdi,%rbx
|
||||
shlq $61,%rdi
|
||||
xorq %rcx,%rdx
|
||||
shrq $3,%rbx
|
||||
xorq %rdi,%rax
|
||||
xorq %rbx,%rdx
|
||||
|
||||
movq %r9,%r13
|
||||
movq $0,0(%rsp)
|
||||
xorq %r10,%r13
|
||||
movq %r9,8(%rsp)
|
||||
movq %r11,%r14
|
||||
movq %r10,16(%rsp)
|
||||
xorq %r12,%r14
|
||||
movq %r13,24(%rsp)
|
||||
|
||||
xorq %r11,%r9
|
||||
movq %r11,32(%rsp)
|
||||
xorq %r11,%r10
|
||||
movq %r9,40(%rsp)
|
||||
xorq %r11,%r13
|
||||
movq %r10,48(%rsp)
|
||||
xorq %r14,%r9
|
||||
movq %r13,56(%rsp)
|
||||
xorq %r14,%r10
|
||||
|
||||
movq %r12,64(%rsp)
|
||||
xorq %r14,%r13
|
||||
movq %r9,72(%rsp)
|
||||
xorq %r11,%r9
|
||||
movq %r10,80(%rsp)
|
||||
xorq %r11,%r10
|
||||
movq %r13,88(%rsp)
|
||||
|
||||
xorq %r11,%r13
|
||||
movq %r14,96(%rsp)
|
||||
movq %r8,%rsi
|
||||
movq %r9,104(%rsp)
|
||||
andq %rbp,%rsi
|
||||
movq %r10,112(%rsp)
|
||||
shrq $4,%rbp
|
||||
movq %r13,120(%rsp)
|
||||
movq %r8,%rdi
|
||||
andq %rbp,%rdi
|
||||
shrq $4,%rbp
|
||||
|
||||
movq (%rsp,%rsi,8),%xmm0
|
||||
movq %r8,%rsi
|
||||
andq %rbp,%rsi
|
||||
shrq $4,%rbp
|
||||
movq (%rsp,%rdi,8),%rcx
|
||||
movq %r8,%rdi
|
||||
movq %rcx,%rbx
|
||||
shlq $4,%rcx
|
||||
andq %rbp,%rdi
|
||||
movq (%rsp,%rsi,8),%xmm1
|
||||
shrq $60,%rbx
|
||||
xorq %rcx,%rax
|
||||
pslldq $1,%xmm1
|
||||
movq %r8,%rsi
|
||||
shrq $4,%rbp
|
||||
xorq %rbx,%rdx
|
||||
andq %rbp,%rsi
|
||||
shrq $4,%rbp
|
||||
pxor %xmm1,%xmm0
|
||||
movq (%rsp,%rdi,8),%rcx
|
||||
movq %r8,%rdi
|
||||
movq %rcx,%rbx
|
||||
shlq $12,%rcx
|
||||
andq %rbp,%rdi
|
||||
movq (%rsp,%rsi,8),%xmm1
|
||||
shrq $52,%rbx
|
||||
xorq %rcx,%rax
|
||||
pslldq $2,%xmm1
|
||||
movq %r8,%rsi
|
||||
shrq $4,%rbp
|
||||
xorq %rbx,%rdx
|
||||
andq %rbp,%rsi
|
||||
shrq $4,%rbp
|
||||
pxor %xmm1,%xmm0
|
||||
movq (%rsp,%rdi,8),%rcx
|
||||
movq %r8,%rdi
|
||||
movq %rcx,%rbx
|
||||
shlq $20,%rcx
|
||||
andq %rbp,%rdi
|
||||
movq (%rsp,%rsi,8),%xmm1
|
||||
shrq $44,%rbx
|
||||
xorq %rcx,%rax
|
||||
pslldq $3,%xmm1
|
||||
movq %r8,%rsi
|
||||
shrq $4,%rbp
|
||||
xorq %rbx,%rdx
|
||||
andq %rbp,%rsi
|
||||
shrq $4,%rbp
|
||||
pxor %xmm1,%xmm0
|
||||
movq (%rsp,%rdi,8),%rcx
|
||||
movq %r8,%rdi
|
||||
movq %rcx,%rbx
|
||||
shlq $28,%rcx
|
||||
andq %rbp,%rdi
|
||||
movq (%rsp,%rsi,8),%xmm1
|
||||
shrq $36,%rbx
|
||||
xorq %rcx,%rax
|
||||
pslldq $4,%xmm1
|
||||
movq %r8,%rsi
|
||||
shrq $4,%rbp
|
||||
xorq %rbx,%rdx
|
||||
andq %rbp,%rsi
|
||||
shrq $4,%rbp
|
||||
pxor %xmm1,%xmm0
|
||||
movq (%rsp,%rdi,8),%rcx
|
||||
movq %r8,%rdi
|
||||
movq %rcx,%rbx
|
||||
shlq $36,%rcx
|
||||
andq %rbp,%rdi
|
||||
movq (%rsp,%rsi,8),%xmm1
|
||||
shrq $28,%rbx
|
||||
xorq %rcx,%rax
|
||||
pslldq $5,%xmm1
|
||||
movq %r8,%rsi
|
||||
shrq $4,%rbp
|
||||
xorq %rbx,%rdx
|
||||
andq %rbp,%rsi
|
||||
shrq $4,%rbp
|
||||
pxor %xmm1,%xmm0
|
||||
movq (%rsp,%rdi,8),%rcx
|
||||
movq %r8,%rdi
|
||||
movq %rcx,%rbx
|
||||
shlq $44,%rcx
|
||||
andq %rbp,%rdi
|
||||
movq (%rsp,%rsi,8),%xmm1
|
||||
shrq $20,%rbx
|
||||
xorq %rcx,%rax
|
||||
pslldq $6,%xmm1
|
||||
movq %r8,%rsi
|
||||
shrq $4,%rbp
|
||||
xorq %rbx,%rdx
|
||||
andq %rbp,%rsi
|
||||
shrq $4,%rbp
|
||||
pxor %xmm1,%xmm0
|
||||
movq (%rsp,%rdi,8),%rcx
|
||||
movq %r8,%rdi
|
||||
movq %rcx,%rbx
|
||||
shlq $52,%rcx
|
||||
andq %rbp,%rdi
|
||||
movq (%rsp,%rsi,8),%xmm1
|
||||
shrq $12,%rbx
|
||||
xorq %rcx,%rax
|
||||
pslldq $7,%xmm1
|
||||
movq %r8,%rsi
|
||||
shrq $4,%rbp
|
||||
xorq %rbx,%rdx
|
||||
andq %rbp,%rsi
|
||||
shrq $4,%rbp
|
||||
pxor %xmm1,%xmm0
|
||||
movq (%rsp,%rdi,8),%rcx
|
||||
movq %rcx,%rbx
|
||||
shlq $60,%rcx
|
||||
.byte 102,72,15,126,198
|
||||
shrq $4,%rbx
|
||||
xorq %rcx,%rax
|
||||
psrldq $8,%xmm0
|
||||
xorq %rbx,%rdx
|
||||
.byte 102,72,15,126,199
|
||||
xorq %rsi,%rax
|
||||
xorq %rdi,%rdx
|
||||
|
||||
addq $128+8,%rsp
|
||||
.byte 0xf3,0xc3
|
||||
.Lend_mul_1x1:
|
||||
.size _mul_1x1,.-_mul_1x1
|
||||
|
||||
.globl bn_GF2m_mul_2x2
|
||||
.type bn_GF2m_mul_2x2,@function
|
||||
.align 16
|
||||
bn_GF2m_mul_2x2:
|
||||
movq OPENSSL_ia32cap_P@GOTPCREL(%rip),%rax
|
||||
btq $33,%rax
|
||||
jnc .Lvanilla_mul_2x2
|
||||
|
||||
.byte 102,72,15,110,198
|
||||
.byte 102,72,15,110,201
|
||||
.byte 102,72,15,110,210
|
||||
.byte 102,73,15,110,216
|
||||
movdqa %xmm0,%xmm4
|
||||
movdqa %xmm1,%xmm5
|
||||
.byte 102,15,58,68,193,0
|
||||
pxor %xmm2,%xmm4
|
||||
pxor %xmm3,%xmm5
|
||||
.byte 102,15,58,68,211,0
|
||||
.byte 102,15,58,68,229,0
|
||||
xorps %xmm0,%xmm4
|
||||
xorps %xmm2,%xmm4
|
||||
movdqa %xmm4,%xmm5
|
||||
pslldq $8,%xmm4
|
||||
psrldq $8,%xmm5
|
||||
pxor %xmm4,%xmm2
|
||||
pxor %xmm5,%xmm0
|
||||
movdqu %xmm2,0(%rdi)
|
||||
movdqu %xmm0,16(%rdi)
|
||||
.byte 0xf3,0xc3
|
||||
|
||||
.align 16
|
||||
.Lvanilla_mul_2x2:
|
||||
leaq -136(%rsp),%rsp
|
||||
movq %r14,80(%rsp)
|
||||
movq %r13,88(%rsp)
|
||||
movq %r12,96(%rsp)
|
||||
movq %rbp,104(%rsp)
|
||||
movq %rbx,112(%rsp)
|
||||
.Lbody_mul_2x2:
|
||||
movq %rdi,32(%rsp)
|
||||
movq %rsi,40(%rsp)
|
||||
movq %rdx,48(%rsp)
|
||||
movq %rcx,56(%rsp)
|
||||
movq %r8,64(%rsp)
|
||||
|
||||
movq $15,%r8
|
||||
movq %rsi,%rax
|
||||
movq %rcx,%rbp
|
||||
call _mul_1x1
|
||||
movq %rax,16(%rsp)
|
||||
movq %rdx,24(%rsp)
|
||||
|
||||
movq 48(%rsp),%rax
|
||||
movq 64(%rsp),%rbp
|
||||
call _mul_1x1
|
||||
movq %rax,0(%rsp)
|
||||
movq %rdx,8(%rsp)
|
||||
|
||||
movq 40(%rsp),%rax
|
||||
movq 56(%rsp),%rbp
|
||||
xorq 48(%rsp),%rax
|
||||
xorq 64(%rsp),%rbp
|
||||
call _mul_1x1
|
||||
movq 0(%rsp),%rbx
|
||||
movq 8(%rsp),%rcx
|
||||
movq 16(%rsp),%rdi
|
||||
movq 24(%rsp),%rsi
|
||||
movq 32(%rsp),%rbp
|
||||
|
||||
xorq %rdx,%rax
|
||||
xorq %rcx,%rdx
|
||||
xorq %rbx,%rax
|
||||
movq %rbx,0(%rbp)
|
||||
xorq %rdi,%rdx
|
||||
movq %rsi,24(%rbp)
|
||||
xorq %rsi,%rax
|
||||
xorq %rsi,%rdx
|
||||
xorq %rdx,%rax
|
||||
movq %rdx,16(%rbp)
|
||||
movq %rax,8(%rbp)
|
||||
|
||||
movq 80(%rsp),%r14
|
||||
movq 88(%rsp),%r13
|
||||
movq 96(%rsp),%r12
|
||||
movq 104(%rsp),%rbp
|
||||
movq 112(%rsp),%rbx
|
||||
leaq 136(%rsp),%rsp
|
||||
.byte 0xf3,0xc3
|
||||
.Lend_mul_2x2:
|
||||
.size bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2
|
||||
.byte 71,70,40,50,94,109,41,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.align 16
|
1375
crypto/external/bsd/openssl/lib/libcrypto/arch/x86_64/x86_64-mont.S
vendored
Normal file
1375
crypto/external/bsd/openssl/lib/libcrypto/arch/x86_64/x86_64-mont.S
vendored
Normal file
File diff suppressed because it is too large
Load Diff
785
crypto/external/bsd/openssl/lib/libcrypto/arch/x86_64/x86_64-mont5.S
vendored
Normal file
785
crypto/external/bsd/openssl/lib/libcrypto/arch/x86_64/x86_64-mont5.S
vendored
Normal file
@ -0,0 +1,785 @@
|
||||
#include <machine/asm.h>
|
||||
.text
|
||||
|
||||
.globl bn_mul_mont_gather5
|
||||
.type bn_mul_mont_gather5,@function
|
||||
.align 64
|
||||
bn_mul_mont_gather5:
|
||||
testl $3,%r9d
|
||||
jnz .Lmul_enter
|
||||
cmpl $8,%r9d
|
||||
jb .Lmul_enter
|
||||
jmp .Lmul4x_enter
|
||||
|
||||
.align 16
|
||||
.Lmul_enter:
|
||||
movl %r9d,%r9d
|
||||
movl 8(%rsp),%r10d
|
||||
pushq %rbx
|
||||
pushq %rbp
|
||||
pushq %r12
|
||||
pushq %r13
|
||||
pushq %r14
|
||||
pushq %r15
|
||||
movq %rsp,%rax
|
||||
leaq 2(%r9),%r11
|
||||
negq %r11
|
||||
leaq (%rsp,%r11,8),%rsp
|
||||
andq $-1024,%rsp
|
||||
|
||||
movq %rax,8(%rsp,%r9,8)
|
||||
.Lmul_body:
|
||||
movq %rdx,%r12
|
||||
movq %r10,%r11
|
||||
shrq $3,%r10
|
||||
andq $7,%r11
|
||||
notq %r10
|
||||
leaq .Lmagic_masks(%rip),%rax
|
||||
andq $3,%r10
|
||||
leaq 96(%r12,%r11,8),%r12
|
||||
movq 0(%rax,%r10,8),%xmm4
|
||||
movq 8(%rax,%r10,8),%xmm5
|
||||
movq 16(%rax,%r10,8),%xmm6
|
||||
movq 24(%rax,%r10,8),%xmm7
|
||||
|
||||
movq -96(%r12),%xmm0
|
||||
movq -32(%r12),%xmm1
|
||||
pand %xmm4,%xmm0
|
||||
movq 32(%r12),%xmm2
|
||||
pand %xmm5,%xmm1
|
||||
movq 96(%r12),%xmm3
|
||||
pand %xmm6,%xmm2
|
||||
por %xmm1,%xmm0
|
||||
pand %xmm7,%xmm3
|
||||
por %xmm2,%xmm0
|
||||
leaq 256(%r12),%r12
|
||||
por %xmm3,%xmm0
|
||||
|
||||
.byte 102,72,15,126,195
|
||||
|
||||
movq (%r8),%r8
|
||||
movq (%rsi),%rax
|
||||
|
||||
xorq %r14,%r14
|
||||
xorq %r15,%r15
|
||||
|
||||
movq -96(%r12),%xmm0
|
||||
movq -32(%r12),%xmm1
|
||||
pand %xmm4,%xmm0
|
||||
movq 32(%r12),%xmm2
|
||||
pand %xmm5,%xmm1
|
||||
|
||||
movq %r8,%rbp
|
||||
mulq %rbx
|
||||
movq %rax,%r10
|
||||
movq (%rcx),%rax
|
||||
|
||||
movq 96(%r12),%xmm3
|
||||
pand %xmm6,%xmm2
|
||||
por %xmm1,%xmm0
|
||||
pand %xmm7,%xmm3
|
||||
|
||||
imulq %r10,%rbp
|
||||
movq %rdx,%r11
|
||||
|
||||
por %xmm2,%xmm0
|
||||
leaq 256(%r12),%r12
|
||||
por %xmm3,%xmm0
|
||||
|
||||
mulq %rbp
|
||||
addq %rax,%r10
|
||||
movq 8(%rsi),%rax
|
||||
adcq $0,%rdx
|
||||
movq %rdx,%r13
|
||||
|
||||
leaq 1(%r15),%r15
|
||||
jmp .L1st_enter
|
||||
|
||||
.align 16
|
||||
.L1st:
|
||||
addq %rax,%r13
|
||||
movq (%rsi,%r15,8),%rax
|
||||
adcq $0,%rdx
|
||||
addq %r11,%r13
|
||||
movq %r10,%r11
|
||||
adcq $0,%rdx
|
||||
movq %r13,-16(%rsp,%r15,8)
|
||||
movq %rdx,%r13
|
||||
|
||||
.L1st_enter:
|
||||
mulq %rbx
|
||||
addq %rax,%r11
|
||||
movq (%rcx,%r15,8),%rax
|
||||
adcq $0,%rdx
|
||||
leaq 1(%r15),%r15
|
||||
movq %rdx,%r10
|
||||
|
||||
mulq %rbp
|
||||
cmpq %r9,%r15
|
||||
jne .L1st
|
||||
|
||||
.byte 102,72,15,126,195
|
||||
|
||||
addq %rax,%r13
|
||||
movq (%rsi),%rax
|
||||
adcq $0,%rdx
|
||||
addq %r11,%r13
|
||||
adcq $0,%rdx
|
||||
movq %r13,-16(%rsp,%r15,8)
|
||||
movq %rdx,%r13
|
||||
movq %r10,%r11
|
||||
|
||||
xorq %rdx,%rdx
|
||||
addq %r11,%r13
|
||||
adcq $0,%rdx
|
||||
movq %r13,-8(%rsp,%r9,8)
|
||||
movq %rdx,(%rsp,%r9,8)
|
||||
|
||||
leaq 1(%r14),%r14
|
||||
jmp .Louter
|
||||
.align 16
|
||||
.Louter:
|
||||
xorq %r15,%r15
|
||||
movq %r8,%rbp
|
||||
movq (%rsp),%r10
|
||||
|
||||
movq -96(%r12),%xmm0
|
||||
movq -32(%r12),%xmm1
|
||||
pand %xmm4,%xmm0
|
||||
movq 32(%r12),%xmm2
|
||||
pand %xmm5,%xmm1
|
||||
|
||||
mulq %rbx
|
||||
addq %rax,%r10
|
||||
movq (%rcx),%rax
|
||||
adcq $0,%rdx
|
||||
|
||||
movq 96(%r12),%xmm3
|
||||
pand %xmm6,%xmm2
|
||||
por %xmm1,%xmm0
|
||||
pand %xmm7,%xmm3
|
||||
|
||||
imulq %r10,%rbp
|
||||
movq %rdx,%r11
|
||||
|
||||
por %xmm2,%xmm0
|
||||
leaq 256(%r12),%r12
|
||||
por %xmm3,%xmm0
|
||||
|
||||
mulq %rbp
|
||||
addq %rax,%r10
|
||||
movq 8(%rsi),%rax
|
||||
adcq $0,%rdx
|
||||
movq 8(%rsp),%r10
|
||||
movq %rdx,%r13
|
||||
|
||||
leaq 1(%r15),%r15
|
||||
jmp .Linner_enter
|
||||
|
||||
.align 16
|
||||
.Linner:
|
||||
addq %rax,%r13
|
||||
movq (%rsi,%r15,8),%rax
|
||||
adcq $0,%rdx
|
||||
addq %r10,%r13
|
||||
movq (%rsp,%r15,8),%r10
|
||||
adcq $0,%rdx
|
||||
movq %r13,-16(%rsp,%r15,8)
|
||||
movq %rdx,%r13
|
||||
|
||||
.Linner_enter:
|
||||
mulq %rbx
|
||||
addq %rax,%r11
|
||||
movq (%rcx,%r15,8),%rax
|
||||
adcq $0,%rdx
|
||||
addq %r11,%r10
|
||||
movq %rdx,%r11
|
||||
adcq $0,%r11
|
||||
leaq 1(%r15),%r15
|
||||
|
||||
mulq %rbp
|
||||
cmpq %r9,%r15
|
||||
jne .Linner
|
||||
|
||||
.byte 102,72,15,126,195
|
||||
|
||||
addq %rax,%r13
|
||||
movq (%rsi),%rax
|
||||
adcq $0,%rdx
|
||||
addq %r10,%r13
|
||||
movq (%rsp,%r15,8),%r10
|
||||
adcq $0,%rdx
|
||||
movq %r13,-16(%rsp,%r15,8)
|
||||
movq %rdx,%r13
|
||||
|
||||
xorq %rdx,%rdx
|
||||
addq %r11,%r13
|
||||
adcq $0,%rdx
|
||||
addq %r10,%r13
|
||||
adcq $0,%rdx
|
||||
movq %r13,-8(%rsp,%r9,8)
|
||||
movq %rdx,(%rsp,%r9,8)
|
||||
|
||||
leaq 1(%r14),%r14
|
||||
cmpq %r9,%r14
|
||||
jl .Louter
|
||||
|
||||
xorq %r14,%r14
|
||||
movq (%rsp),%rax
|
||||
leaq (%rsp),%rsi
|
||||
movq %r9,%r15
|
||||
jmp .Lsub
|
||||
.align 16
|
||||
.Lsub: sbbq (%rcx,%r14,8),%rax
|
||||
movq %rax,(%rdi,%r14,8)
|
||||
movq 8(%rsi,%r14,8),%rax
|
||||
leaq 1(%r14),%r14
|
||||
decq %r15
|
||||
jnz .Lsub
|
||||
|
||||
sbbq $0,%rax
|
||||
xorq %r14,%r14
|
||||
andq %rax,%rsi
|
||||
notq %rax
|
||||
movq %rdi,%rcx
|
||||
andq %rax,%rcx
|
||||
movq %r9,%r15
|
||||
orq %rcx,%rsi
|
||||
.align 16
|
||||
.Lcopy:
|
||||
movq (%rsi,%r14,8),%rax
|
||||
movq %r14,(%rsp,%r14,8)
|
||||
movq %rax,(%rdi,%r14,8)
|
||||
leaq 1(%r14),%r14
|
||||
subq $1,%r15
|
||||
jnz .Lcopy
|
||||
|
||||
movq 8(%rsp,%r9,8),%rsi
|
||||
movq $1,%rax
|
||||
movq (%rsi),%r15
|
||||
movq 8(%rsi),%r14
|
||||
movq 16(%rsi),%r13
|
||||
movq 24(%rsi),%r12
|
||||
movq 32(%rsi),%rbp
|
||||
movq 40(%rsi),%rbx
|
||||
leaq 48(%rsi),%rsp
|
||||
.Lmul_epilogue:
|
||||
.byte 0xf3,0xc3
|
||||
.size bn_mul_mont_gather5,.-bn_mul_mont_gather5
|
||||
.type bn_mul4x_mont_gather5,@function
|
||||
.align 16
|
||||
bn_mul4x_mont_gather5:
|
||||
.Lmul4x_enter:
|
||||
movl %r9d,%r9d
|
||||
movl 8(%rsp),%r10d
|
||||
pushq %rbx
|
||||
pushq %rbp
|
||||
pushq %r12
|
||||
pushq %r13
|
||||
pushq %r14
|
||||
pushq %r15
|
||||
movq %rsp,%rax
|
||||
leaq 4(%r9),%r11
|
||||
negq %r11
|
||||
leaq (%rsp,%r11,8),%rsp
|
||||
andq $-1024,%rsp
|
||||
|
||||
movq %rax,8(%rsp,%r9,8)
|
||||
.Lmul4x_body:
|
||||
movq %rdi,16(%rsp,%r9,8)
|
||||
movq %rdx,%r12
|
||||
movq %r10,%r11
|
||||
shrq $3,%r10
|
||||
andq $7,%r11
|
||||
notq %r10
|
||||
leaq .Lmagic_masks(%rip),%rax
|
||||
andq $3,%r10
|
||||
leaq 96(%r12,%r11,8),%r12
|
||||
movq 0(%rax,%r10,8),%xmm4
|
||||
movq 8(%rax,%r10,8),%xmm5
|
||||
movq 16(%rax,%r10,8),%xmm6
|
||||
movq 24(%rax,%r10,8),%xmm7
|
||||
|
||||
movq -96(%r12),%xmm0
|
||||
movq -32(%r12),%xmm1
|
||||
pand %xmm4,%xmm0
|
||||
movq 32(%r12),%xmm2
|
||||
pand %xmm5,%xmm1
|
||||
movq 96(%r12),%xmm3
|
||||
pand %xmm6,%xmm2
|
||||
por %xmm1,%xmm0
|
||||
pand %xmm7,%xmm3
|
||||
por %xmm2,%xmm0
|
||||
leaq 256(%r12),%r12
|
||||
por %xmm3,%xmm0
|
||||
|
||||
.byte 102,72,15,126,195
|
||||
movq (%r8),%r8
|
||||
movq (%rsi),%rax
|
||||
|
||||
xorq %r14,%r14
|
||||
xorq %r15,%r15
|
||||
|
||||
movq -96(%r12),%xmm0
|
||||
movq -32(%r12),%xmm1
|
||||
pand %xmm4,%xmm0
|
||||
movq 32(%r12),%xmm2
|
||||
pand %xmm5,%xmm1
|
||||
|
||||
movq %r8,%rbp
|
||||
mulq %rbx
|
||||
movq %rax,%r10
|
||||
movq (%rcx),%rax
|
||||
|
||||
movq 96(%r12),%xmm3
|
||||
pand %xmm6,%xmm2
|
||||
por %xmm1,%xmm0
|
||||
pand %xmm7,%xmm3
|
||||
|
||||
imulq %r10,%rbp
|
||||
movq %rdx,%r11
|
||||
|
||||
por %xmm2,%xmm0
|
||||
leaq 256(%r12),%r12
|
||||
por %xmm3,%xmm0
|
||||
|
||||
mulq %rbp
|
||||
addq %rax,%r10
|
||||
movq 8(%rsi),%rax
|
||||
adcq $0,%rdx
|
||||
movq %rdx,%rdi
|
||||
|
||||
mulq %rbx
|
||||
addq %rax,%r11
|
||||
movq 8(%rcx),%rax
|
||||
adcq $0,%rdx
|
||||
movq %rdx,%r10
|
||||
|
||||
mulq %rbp
|
||||
addq %rax,%rdi
|
||||
movq 16(%rsi),%rax
|
||||
adcq $0,%rdx
|
||||
addq %r11,%rdi
|
||||
leaq 4(%r15),%r15
|
||||
adcq $0,%rdx
|
||||
movq %rdi,(%rsp)
|
||||
movq %rdx,%r13
|
||||
jmp .L1st4x
|
||||
.align 16
|
||||
.L1st4x:
|
||||
mulq %rbx
|
||||
addq %rax,%r10
|
||||
movq -16(%rcx,%r15,8),%rax
|
||||
adcq $0,%rdx
|
||||
movq %rdx,%r11
|
||||
|
||||
mulq %rbp
|
||||
addq %rax,%r13
|
||||
movq -8(%rsi,%r15,8),%rax
|
||||
adcq $0,%rdx
|
||||
addq %r10,%r13
|
||||
adcq $0,%rdx
|
||||
movq %r13,-24(%rsp,%r15,8)
|
||||
movq %rdx,%rdi
|
||||
|
||||
mulq %rbx
|
||||
addq %rax,%r11
|
||||
movq -8(%rcx,%r15,8),%rax
|
||||
adcq $0,%rdx
|
||||
movq %rdx,%r10
|
||||
|
||||
mulq %rbp
|
||||
addq %rax,%rdi
|
||||
movq (%rsi,%r15,8),%rax
|
||||
adcq $0,%rdx
|
||||
addq %r11,%rdi
|
||||
adcq $0,%rdx
|
||||
movq %rdi,-16(%rsp,%r15,8)
|
||||
movq %rdx,%r13
|
||||
|
||||
mulq %rbx
|
||||
addq %rax,%r10
|
||||
movq (%rcx,%r15,8),%rax
|
||||
adcq $0,%rdx
|
||||
movq %rdx,%r11
|
||||
|
||||
mulq %rbp
|
||||
addq %rax,%r13
|
||||
movq 8(%rsi,%r15,8),%rax
|
||||
adcq $0,%rdx
|
||||
addq %r10,%r13
|
||||
adcq $0,%rdx
|
||||
movq %r13,-8(%rsp,%r15,8)
|
||||
movq %rdx,%rdi
|
||||
|
||||
mulq %rbx
|
||||
addq %rax,%r11
|
||||
movq 8(%rcx,%r15,8),%rax
|
||||
adcq $0,%rdx
|
||||
leaq 4(%r15),%r15
|
||||
movq %rdx,%r10
|
||||
|
||||
mulq %rbp
|
||||
addq %rax,%rdi
|
||||
movq -16(%rsi,%r15,8),%rax
|
||||
adcq $0,%rdx
|
||||
addq %r11,%rdi
|
||||
adcq $0,%rdx
|
||||
movq %rdi,-32(%rsp,%r15,8)
|
||||
movq %rdx,%r13
|
||||
cmpq %r9,%r15
|
||||
jl .L1st4x
|
||||
|
||||
mulq %rbx
|
||||
addq %rax,%r10
|
||||
movq -16(%rcx,%r15,8),%rax
|
||||
adcq $0,%rdx
|
||||
movq %rdx,%r11
|
||||
|
||||
mulq %rbp
|
||||
addq %rax,%r13
|
||||
movq -8(%rsi,%r15,8),%rax
|
||||
adcq $0,%rdx
|
||||
addq %r10,%r13
|
||||
adcq $0,%rdx
|
||||
movq %r13,-24(%rsp,%r15,8)
|
||||
movq %rdx,%rdi
|
||||
|
||||
mulq %rbx
|
||||
addq %rax,%r11
|
||||
movq -8(%rcx,%r15,8),%rax
|
||||
adcq $0,%rdx
|
||||
movq %rdx,%r10
|
||||
|
||||
mulq %rbp
|
||||
addq %rax,%rdi
|
||||
movq (%rsi),%rax
|
||||
adcq $0,%rdx
|
||||
addq %r11,%rdi
|
||||
adcq $0,%rdx
|
||||
movq %rdi,-16(%rsp,%r15,8)
|
||||
movq %rdx,%r13
|
||||
|
||||
.byte 102,72,15,126,195
|
||||
|
||||
xorq %rdi,%rdi
|
||||
addq %r10,%r13
|
||||
adcq $0,%rdi
|
||||
movq %r13,-8(%rsp,%r15,8)
|
||||
movq %rdi,(%rsp,%r15,8)
|
||||
|
||||
leaq 1(%r14),%r14
|
||||
.align 4
|
||||
.Louter4x:
|
||||
xorq %r15,%r15
|
||||
movq -96(%r12),%xmm0
|
||||
movq -32(%r12),%xmm1
|
||||
pand %xmm4,%xmm0
|
||||
movq 32(%r12),%xmm2
|
||||
pand %xmm5,%xmm1
|
||||
|
||||
movq (%rsp),%r10
|
||||
movq %r8,%rbp
|
||||
mulq %rbx
|
||||
addq %rax,%r10
|
||||
movq (%rcx),%rax
|
||||
adcq $0,%rdx
|
||||
|
||||
movq 96(%r12),%xmm3
|
||||
pand %xmm6,%xmm2
|
||||
por %xmm1,%xmm0
|
||||
pand %xmm7,%xmm3
|
||||
|
||||
imulq %r10,%rbp
|
||||
movq %rdx,%r11
|
||||
|
||||
por %xmm2,%xmm0
|
||||
leaq 256(%r12),%r12
|
||||
por %xmm3,%xmm0
|
||||
|
||||
mulq %rbp
|
||||
addq %rax,%r10
|
||||
movq 8(%rsi),%rax
|
||||
adcq $0,%rdx
|
||||
movq %rdx,%rdi
|
||||
|
||||
mulq %rbx
|
||||
addq %rax,%r11
|
||||
movq 8(%rcx),%rax
|
||||
adcq $0,%rdx
|
||||
addq 8(%rsp),%r11
|
||||
adcq $0,%rdx
|
||||
movq %rdx,%r10
|
||||
|
||||
mulq %rbp
|
||||
addq %rax,%rdi
|
||||
movq 16(%rsi),%rax
|
||||
adcq $0,%rdx
|
||||
addq %r11,%rdi
|
||||
leaq 4(%r15),%r15
|
||||
adcq $0,%rdx
|
||||
movq %rdx,%r13
|
||||
jmp .Linner4x
|
||||
.align 16
|
||||
.Linner4x:
|
||||
mulq %rbx
|
||||
addq %rax,%r10
|
||||
movq -16(%rcx,%r15,8),%rax
|
||||
adcq $0,%rdx
|
||||
addq -16(%rsp,%r15,8),%r10
|
||||
adcq $0,%rdx
|
||||
movq %rdx,%r11
|
||||
|
||||
mulq %rbp
|
||||
addq %rax,%r13
|
||||
movq -8(%rsi,%r15,8),%rax
|
||||
adcq $0,%rdx
|
||||
addq %r10,%r13
|
||||
adcq $0,%rdx
|
||||
movq %rdi,-32(%rsp,%r15,8)
|
||||
movq %rdx,%rdi
|
||||
|
||||
mulq %rbx
|
||||
addq %rax,%r11
|
||||
movq -8(%rcx,%r15,8),%rax
|
||||
adcq $0,%rdx
|
||||
addq -8(%rsp,%r15,8),%r11
|
||||
adcq $0,%rdx
|
||||
movq %rdx,%r10
|
||||
|
||||
mulq %rbp
|
||||
addq %rax,%rdi
|
||||
movq (%rsi,%r15,8),%rax
|
||||
adcq $0,%rdx
|
||||
addq %r11,%rdi
|
||||
adcq $0,%rdx
|
||||
movq %r13,-24(%rsp,%r15,8)
|
||||
movq %rdx,%r13
|
||||
|
||||
mulq %rbx
|
||||
addq %rax,%r10
|
||||
movq (%rcx,%r15,8),%rax
|
||||
adcq $0,%rdx
|
||||
addq (%rsp,%r15,8),%r10
|
||||
adcq $0,%rdx
|
||||
movq %rdx,%r11
|
||||
|
||||
mulq %rbp
|
||||
addq %rax,%r13
|
||||
movq 8(%rsi,%r15,8),%rax
|
||||
adcq $0,%rdx
|
||||
addq %r10,%r13
|
||||
adcq $0,%rdx
|
||||
movq %rdi,-16(%rsp,%r15,8)
|
||||
movq %rdx,%rdi
|
||||
|
||||
mulq %rbx
|
||||
addq %rax,%r11
|
||||
movq 8(%rcx,%r15,8),%rax
|
||||
adcq $0,%rdx
|
||||
addq 8(%rsp,%r15,8),%r11
|
||||
adcq $0,%rdx
|
||||
leaq 4(%r15),%r15
|
||||
movq %rdx,%r10
|
||||
|
||||
mulq %rbp
|
||||
addq %rax,%rdi
|
||||
movq -16(%rsi,%r15,8),%rax
|
||||
adcq $0,%rdx
|
||||
addq %r11,%rdi
|
||||
adcq $0,%rdx
|
||||
movq %r13,-40(%rsp,%r15,8)
|
||||
movq %rdx,%r13
|
||||
cmpq %r9,%r15
|
||||
jl .Linner4x
|
||||
|
||||
mulq %rbx
|
||||
addq %rax,%r10
|
||||
movq -16(%rcx,%r15,8),%rax
|
||||
adcq $0,%rdx
|
||||
addq -16(%rsp,%r15,8),%r10
|
||||
adcq $0,%rdx
|
||||
movq %rdx,%r11
|
||||
|
||||
mulq %rbp
|
||||
addq %rax,%r13
|
||||
movq -8(%rsi,%r15,8),%rax
|
||||
adcq $0,%rdx
|
||||
addq %r10,%r13
|
||||
adcq $0,%rdx
|
||||
movq %rdi,-32(%rsp,%r15,8)
|
||||
movq %rdx,%rdi
|
||||
|
||||
mulq %rbx
|
||||
addq %rax,%r11
|
||||
movq -8(%rcx,%r15,8),%rax
|
||||
adcq $0,%rdx
|
||||
addq -8(%rsp,%r15,8),%r11
|
||||
adcq $0,%rdx
|
||||
leaq 1(%r14),%r14
|
||||
movq %rdx,%r10
|
||||
|
||||
mulq %rbp
|
||||
addq %rax,%rdi
|
||||
movq (%rsi),%rax
|
||||
adcq $0,%rdx
|
||||
addq %r11,%rdi
|
||||
adcq $0,%rdx
|
||||
movq %r13,-24(%rsp,%r15,8)
|
||||
movq %rdx,%r13
|
||||
|
||||
.byte 102,72,15,126,195
|
||||
movq %rdi,-16(%rsp,%r15,8)
|
||||
|
||||
xorq %rdi,%rdi
|
||||
addq %r10,%r13
|
||||
adcq $0,%rdi
|
||||
addq (%rsp,%r9,8),%r13
|
||||
adcq $0,%rdi
|
||||
movq %r13,-8(%rsp,%r15,8)
|
||||
movq %rdi,(%rsp,%r15,8)
|
||||
|
||||
cmpq %r9,%r14
|
||||
jl .Louter4x
|
||||
movq 16(%rsp,%r9,8),%rdi
|
||||
movq 0(%rsp),%rax
|
||||
pxor %xmm0,%xmm0
|
||||
movq 8(%rsp),%rdx
|
||||
shrq $2,%r9
|
||||
leaq (%rsp),%rsi
|
||||
xorq %r14,%r14
|
||||
|
||||
subq 0(%rcx),%rax
|
||||
movq 16(%rsi),%rbx
|
||||
movq 24(%rsi),%rbp
|
||||
sbbq 8(%rcx),%rdx
|
||||
leaq -1(%r9),%r15
|
||||
jmp .Lsub4x
|
||||
.align 16
|
||||
.Lsub4x:
|
||||
movq %rax,0(%rdi,%r14,8)
|
||||
movq %rdx,8(%rdi,%r14,8)
|
||||
sbbq 16(%rcx,%r14,8),%rbx
|
||||
movq 32(%rsi,%r14,8),%rax
|
||||
movq 40(%rsi,%r14,8),%rdx
|
||||
sbbq 24(%rcx,%r14,8),%rbp
|
||||
movq %rbx,16(%rdi,%r14,8)
|
||||
movq %rbp,24(%rdi,%r14,8)
|
||||
sbbq 32(%rcx,%r14,8),%rax
|
||||
movq 48(%rsi,%r14,8),%rbx
|
||||
movq 56(%rsi,%r14,8),%rbp
|
||||
sbbq 40(%rcx,%r14,8),%rdx
|
||||
leaq 4(%r14),%r14
|
||||
decq %r15
|
||||
jnz .Lsub4x
|
||||
|
||||
movq %rax,0(%rdi,%r14,8)
|
||||
movq 32(%rsi,%r14,8),%rax
|
||||
sbbq 16(%rcx,%r14,8),%rbx
|
||||
movq %rdx,8(%rdi,%r14,8)
|
||||
sbbq 24(%rcx,%r14,8),%rbp
|
||||
movq %rbx,16(%rdi,%r14,8)
|
||||
|
||||
sbbq $0,%rax
|
||||
movq %rbp,24(%rdi,%r14,8)
|
||||
xorq %r14,%r14
|
||||
andq %rax,%rsi
|
||||
notq %rax
|
||||
movq %rdi,%rcx
|
||||
andq %rax,%rcx
|
||||
leaq -1(%r9),%r15
|
||||
orq %rcx,%rsi
|
||||
|
||||
movdqu (%rsi),%xmm1
|
||||
movdqa %xmm0,(%rsp)
|
||||
movdqu %xmm1,(%rdi)
|
||||
jmp .Lcopy4x
|
||||
.align 16
|
||||
.Lcopy4x:
|
||||
movdqu 16(%rsi,%r14,1),%xmm2
|
||||
movdqu 32(%rsi,%r14,1),%xmm1
|
||||
movdqa %xmm0,16(%rsp,%r14,1)
|
||||
movdqu %xmm2,16(%rdi,%r14,1)
|
||||
movdqa %xmm0,32(%rsp,%r14,1)
|
||||
movdqu %xmm1,32(%rdi,%r14,1)
|
||||
leaq 32(%r14),%r14
|
||||
decq %r15
|
||||
jnz .Lcopy4x
|
||||
|
||||
shlq $2,%r9
|
||||
movdqu 16(%rsi,%r14,1),%xmm2
|
||||
movdqa %xmm0,16(%rsp,%r14,1)
|
||||
movdqu %xmm2,16(%rdi,%r14,1)
|
||||
movq 8(%rsp,%r9,8),%rsi
|
||||
movq $1,%rax
|
||||
movq (%rsi),%r15
|
||||
movq 8(%rsi),%r14
|
||||
movq 16(%rsi),%r13
|
||||
movq 24(%rsi),%r12
|
||||
movq 32(%rsi),%rbp
|
||||
movq 40(%rsi),%rbx
|
||||
leaq 48(%rsi),%rsp
|
||||
.Lmul4x_epilogue:
|
||||
.byte 0xf3,0xc3
|
||||
.size bn_mul4x_mont_gather5,.-bn_mul4x_mont_gather5
|
||||
.globl bn_scatter5
|
||||
.type bn_scatter5,@function
|
||||
.align 16
|
||||
bn_scatter5:
|
||||
cmpq $0,%rsi
|
||||
jz .Lscatter_epilogue
|
||||
leaq (%rdx,%rcx,8),%rdx
|
||||
.Lscatter:
|
||||
movq (%rdi),%rax
|
||||
leaq 8(%rdi),%rdi
|
||||
movq %rax,(%rdx)
|
||||
leaq 256(%rdx),%rdx
|
||||
subq $1,%rsi
|
||||
jnz .Lscatter
|
||||
.Lscatter_epilogue:
|
||||
.byte 0xf3,0xc3
|
||||
.size bn_scatter5,.-bn_scatter5
|
||||
|
||||
.globl bn_gather5
|
||||
.type bn_gather5,@function
|
||||
.align 16
|
||||
bn_gather5:
|
||||
movq %rcx,%r11
|
||||
shrq $3,%rcx
|
||||
andq $7,%r11
|
||||
notq %rcx
|
||||
leaq .Lmagic_masks(%rip),%rax
|
||||
andq $3,%rcx
|
||||
leaq 96(%rdx,%r11,8),%rdx
|
||||
movq 0(%rax,%rcx,8),%xmm4
|
||||
movq 8(%rax,%rcx,8),%xmm5
|
||||
movq 16(%rax,%rcx,8),%xmm6
|
||||
movq 24(%rax,%rcx,8),%xmm7
|
||||
jmp .Lgather
|
||||
.align 16
|
||||
.Lgather:
|
||||
movq -96(%rdx),%xmm0
|
||||
movq -32(%rdx),%xmm1
|
||||
pand %xmm4,%xmm0
|
||||
movq 32(%rdx),%xmm2
|
||||
pand %xmm5,%xmm1
|
||||
movq 96(%rdx),%xmm3
|
||||
pand %xmm6,%xmm2
|
||||
por %xmm1,%xmm0
|
||||
pand %xmm7,%xmm3
|
||||
por %xmm2,%xmm0
|
||||
leaq 256(%rdx),%rdx
|
||||
por %xmm3,%xmm0
|
||||
|
||||
movq %xmm0,(%rdi)
|
||||
leaq 8(%rdi),%rdi
|
||||
subq $1,%rsi
|
||||
jnz .Lgather
|
||||
.byte 0xf3,0xc3
|
||||
.LSEH_end_bn_gather5:
|
||||
.size bn_gather5,.-bn_gather5
|
||||
.align 64
|
||||
.Lmagic_masks:
|
||||
.long 0,0, 0,0, 0,0, -1,-1
|
||||
.long 0,0, 0,0, 0,0, 0,0
|
||||
.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,119,105,116,104,32,115,99,97,116,116,101,114,47,103,97,116,104,101,114,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
Loading…
Reference in New Issue
Block a user