Backout my over-eager backout - we need this for sparc64 building netbsd32
compat libs, but still lots of #ifdef's missing to make this ready for usage by sparc.
This commit is contained in:
parent
32bba72871
commit
031637411f
@ -1,3 +1,9 @@
|
||||
#include "sparc_arch.h"
|
||||
|
||||
#ifdef __arch64__
|
||||
.register %g2,#scratch
|
||||
.register %g3,#scratch
|
||||
#endif
|
||||
.section ".text",#alloc,#execinstr
|
||||
|
||||
.align 256
|
||||
@ -296,8 +302,8 @@ AES_Te:
|
||||
.align 64
|
||||
.skip 16
|
||||
_sparcv9_AES_encrypt:
|
||||
save %sp,-112-16,%sp
|
||||
stx %i7,[%sp+0+112+0] ! off-load return address
|
||||
save %sp,-STACK_FRAME-16,%sp
|
||||
stx %i7,[%sp+STACK_BIAS+STACK_FRAME+0] ! off-load return address
|
||||
ld [%i5+240],%i7
|
||||
ld [%i5+0],%l4
|
||||
ld [%i5+4],%l5 !
|
||||
@ -585,7 +591,7 @@ _sparcv9_AES_encrypt:
|
||||
ldub [%i7+%g5],%g5
|
||||
sll %o0,16,%o0
|
||||
xor %l0,%i0,%i0
|
||||
ldx [%sp+0+112+0],%i7 ! restore return address
|
||||
ldx [%sp+STACK_BIAS+STACK_FRAME+0],%i7 ! restore return address
|
||||
|
||||
sll %o1,8,%o1 !
|
||||
xor %o0,%i0,%i0
|
||||
@ -623,7 +629,7 @@ AES_encrypt:
|
||||
or %o0,%o1,%g1
|
||||
andcc %g1,3,%g0
|
||||
bnz,pn %xcc,.Lunaligned_enc
|
||||
save %sp,-112,%sp
|
||||
save %sp,-STACK_FRAME,%sp
|
||||
|
||||
ld [%i0+0],%o0
|
||||
ld [%i0+4],%o1
|
||||
@ -1028,8 +1034,8 @@ AES_Td:
|
||||
.align 64
|
||||
.skip 16
|
||||
_sparcv9_AES_decrypt:
|
||||
save %sp,-112-16,%sp
|
||||
stx %i7,[%sp+0+112+0] ! off-load return address
|
||||
save %sp,-STACK_FRAME-16,%sp
|
||||
stx %i7,[%sp+STACK_BIAS+STACK_FRAME+0] ! off-load return address
|
||||
ld [%i5+240],%i7
|
||||
ld [%i5+0],%l4
|
||||
ld [%i5+4],%l5 !
|
||||
@ -1317,7 +1323,7 @@ _sparcv9_AES_decrypt:
|
||||
ldub [%i7+%g5],%g5
|
||||
sll %o0,16,%o0
|
||||
xor %l0,%i0,%i0
|
||||
ldx [%sp+0+112+0],%i7 ! restore return address
|
||||
ldx [%sp+STACK_BIAS+STACK_FRAME+0],%i7 ! restore return address
|
||||
|
||||
sll %o1,8,%o1 !
|
||||
xor %o0,%i0,%i0
|
||||
@ -1355,7 +1361,7 @@ AES_decrypt:
|
||||
or %o0,%o1,%g1
|
||||
andcc %g1,3,%g0
|
||||
bnz,pn %xcc,.Lunaligned_dec
|
||||
save %sp,-112,%sp
|
||||
save %sp,-STACK_FRAME,%sp
|
||||
|
||||
ld [%i0+0],%o0
|
||||
ld [%i0+4],%o1
|
||||
|
1180
crypto/external/bsd/openssl/lib/libcrypto/arch/sparc/aesfx-sparcv9.S
vendored
Normal file
1180
crypto/external/bsd/openssl/lib/libcrypto/arch/sparc/aesfx-sparcv9.S
vendored
Normal file
File diff suppressed because it is too large
Load Diff
@ -1,3 +1,10 @@
|
||||
#include "sparc_arch.h"
|
||||
|
||||
#ifdef __arch64__
|
||||
.register %g2,#scratch
|
||||
.register %g3,#scratch
|
||||
#endif
|
||||
|
||||
.text
|
||||
|
||||
.globl aes_t4_encrypt
|
||||
@ -508,9 +515,9 @@ _aes128_load_deckey=_aes128_loadkey
|
||||
.globl aes128_t4_cbc_encrypt
|
||||
.align 32
|
||||
aes128_t4_cbc_encrypt:
|
||||
save %sp, -112, %sp
|
||||
save %sp, -STACK_FRAME, %sp
|
||||
cmp %i2, 0
|
||||
be,pn %icc, .L128_cbc_enc_abort
|
||||
be,pn SIZE_T_CC, .L128_cbc_enc_abort
|
||||
srln %i2, 0, %i2 ! needed on v8+, "nop" on v9
|
||||
sub %i0, %i1, %l5 ! %i0!=%i1
|
||||
ld [%i4 + 0], %f0
|
||||
@ -529,7 +536,7 @@ aes128_t4_cbc_encrypt:
|
||||
and %i1, 7, %l2
|
||||
cmp %i2, 127
|
||||
movrnz %l2, 0, %l5 ! if ( %i1&7 ||
|
||||
movleu %icc, 0, %l5 ! %i2<128 ||
|
||||
movleu SIZE_T_CC, 0, %l5 ! %i2<128 ||
|
||||
brnz,pn %l5, .L128cbc_enc_blk ! %i0==%i1)
|
||||
srl %l3, %l2, %l3
|
||||
|
||||
@ -655,7 +662,7 @@ aes128_t4_cbc_encrypt:
|
||||
.globl aes128_t4_ctr32_encrypt
|
||||
.align 32
|
||||
aes128_t4_ctr32_encrypt:
|
||||
save %sp, -112, %sp
|
||||
save %sp, -STACK_FRAME, %sp
|
||||
srln %i2, 0, %i2 ! needed on v8+, "nop" on v9
|
||||
|
||||
prefetch [%i0], 20
|
||||
@ -685,7 +692,7 @@ aes128_t4_ctr32_encrypt:
|
||||
and %i1, 7, %l2
|
||||
cmp %i2, 255
|
||||
movrnz %l2, 0, %l5 ! if ( %i1&7 ||
|
||||
movleu %icc, 0, %l5 ! %i2<256 ||
|
||||
movleu SIZE_T_CC, 0, %l5 ! %i2<256 ||
|
||||
brnz,pn %l5, .L128_ctr32_blk ! %i0==%i1)
|
||||
srl %l3, %l2, %l3
|
||||
|
||||
@ -904,7 +911,7 @@ aes128_t4_ctr32_encrypt:
|
||||
stda %f4, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
|
||||
add %i1, 8, %i1
|
||||
stda %f6, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
|
||||
bgu,pt %icc, .L128_ctr32_blk_loop2x
|
||||
bgu,pt SIZE_T_CC, .L128_ctr32_blk_loop2x
|
||||
add %i1, 8, %i1
|
||||
|
||||
add %l5, %i2, %i2
|
||||
@ -922,17 +929,17 @@ aes128_t4_ctr32_encrypt:
|
||||
.globl aes128_t4_xts_encrypt
|
||||
.align 32
|
||||
aes128_t4_xts_encrypt:
|
||||
save %sp, -112-16, %sp
|
||||
save %sp, -STACK_FRAME-16, %sp
|
||||
srln %i2, 0, %i2 ! needed on v8+, "nop" on v9
|
||||
|
||||
mov %i5, %o0
|
||||
add %fp, 0-16, %o1
|
||||
add %fp, STACK_BIAS-16, %o1
|
||||
call aes_t4_encrypt
|
||||
mov %i4, %o2
|
||||
|
||||
add %fp, 0-16, %l7
|
||||
add %fp, STACK_BIAS-16, %l7
|
||||
ldxa [%l7]0x88, %g2
|
||||
add %fp, 0-8, %l7
|
||||
add %fp, STACK_BIAS-8, %l7
|
||||
ldxa [%l7]0x88, %g3 ! %g3:%g2 is tweak
|
||||
|
||||
sethi %hi(0x76543210), %l7
|
||||
@ -955,7 +962,7 @@ aes128_t4_xts_encrypt:
|
||||
and %i1, 7, %l2
|
||||
cmp %i2, 255
|
||||
movrnz %l2, 0, %l5 ! if ( %i1&7 ||
|
||||
movleu %icc, 0, %l5 ! %i2<256 ||
|
||||
movleu SIZE_T_CC, 0, %l5 ! %i2<256 ||
|
||||
brnz,pn %l5, .L128_xts_enblk ! %i0==%i1)
|
||||
srl %l3, %l2, %l3
|
||||
|
||||
@ -1253,7 +1260,7 @@ aes128_t4_xts_encrypt:
|
||||
stda %f4, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
|
||||
add %i1, 8, %i1
|
||||
stda %f6, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
|
||||
bgu,pt %icc, .L128_xts_enblk2x
|
||||
bgu,pt SIZE_T_CC, .L128_xts_enblk2x
|
||||
add %i1, 8, %i1
|
||||
|
||||
add %l5, %i2, %i2
|
||||
@ -1274,11 +1281,11 @@ aes128_t4_xts_encrypt:
|
||||
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
.align 32
|
||||
.L128_xts_ensteal:
|
||||
std %f0, [%fp + 0-16] ! copy of output
|
||||
std %f2, [%fp + 0-8]
|
||||
std %f0, [%fp + STACK_BIAS-16] ! copy of output
|
||||
std %f2, [%fp + STACK_BIAS-8]
|
||||
|
||||
srl %l0, 3, %l0
|
||||
add %fp, 0-16, %l7
|
||||
add %fp, STACK_BIAS-16, %l7
|
||||
add %i0, %l0, %i0 ! original %i0+%i2&-15
|
||||
add %i1, %l2, %i1 ! original %i1+%i2&-15
|
||||
mov 0, %l0
|
||||
@ -1306,17 +1313,17 @@ aes128_t4_xts_encrypt:
|
||||
.globl aes128_t4_xts_decrypt
|
||||
.align 32
|
||||
aes128_t4_xts_decrypt:
|
||||
save %sp, -112-16, %sp
|
||||
save %sp, -STACK_FRAME-16, %sp
|
||||
srln %i2, 0, %i2 ! needed on v8+, "nop" on v9
|
||||
|
||||
mov %i5, %o0
|
||||
add %fp, 0-16, %o1
|
||||
add %fp, STACK_BIAS-16, %o1
|
||||
call aes_t4_encrypt
|
||||
mov %i4, %o2
|
||||
|
||||
add %fp, 0-16, %l7
|
||||
add %fp, STACK_BIAS-16, %l7
|
||||
ldxa [%l7]0x88, %g2
|
||||
add %fp, 0-8, %l7
|
||||
add %fp, STACK_BIAS-8, %l7
|
||||
ldxa [%l7]0x88, %g3 ! %g3:%g2 is tweak
|
||||
|
||||
sethi %hi(0x76543210), %l7
|
||||
@ -1342,7 +1349,7 @@ aes128_t4_xts_decrypt:
|
||||
and %i1, 7, %l2
|
||||
cmp %i2, 255
|
||||
movrnz %l2, 0, %l5 ! if ( %i1&7 ||
|
||||
movleu %icc, 0, %l5 ! %i2<256 ||
|
||||
movleu SIZE_T_CC, 0, %l5 ! %i2<256 ||
|
||||
brnz,pn %l5, .L128_xts_deblk ! %i0==%i1)
|
||||
srl %l3, %l2, %l3
|
||||
|
||||
@ -1641,7 +1648,7 @@ aes128_t4_xts_decrypt:
|
||||
stda %f4, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
|
||||
add %i1, 8, %i1
|
||||
stda %f6, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
|
||||
bgu,pt %icc, .L128_xts_deblk2x
|
||||
bgu,pt SIZE_T_CC, .L128_xts_deblk2x
|
||||
add %i1, 8, %i1
|
||||
|
||||
add %l5, %i2, %i2
|
||||
@ -1699,11 +1706,11 @@ aes128_t4_xts_decrypt:
|
||||
.word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
|
||||
.word 0x85b38d82 !fxor %f14,%f2,%f2
|
||||
|
||||
std %f0, [%fp + 0-16]
|
||||
std %f2, [%fp + 0-8]
|
||||
std %f0, [%fp + STACK_BIAS-16]
|
||||
std %f2, [%fp + STACK_BIAS-8]
|
||||
|
||||
srl %l0, 3, %l0
|
||||
add %fp, 0-16, %l7
|
||||
add %fp, STACK_BIAS-16, %l7
|
||||
add %i0, %l0, %i0 ! original %i0+%i2&-15
|
||||
add %i1, %l2, %i1 ! original %i1+%i2&-15
|
||||
mov 0, %l0
|
||||
@ -1732,9 +1739,9 @@ aes128_t4_xts_decrypt:
|
||||
.globl aes128_t4_cbc_decrypt
|
||||
.align 32
|
||||
aes128_t4_cbc_decrypt:
|
||||
save %sp, -112, %sp
|
||||
save %sp, -STACK_FRAME, %sp
|
||||
cmp %i2, 0
|
||||
be,pn %icc, .L128_cbc_dec_abort
|
||||
be,pn SIZE_T_CC, .L128_cbc_dec_abort
|
||||
srln %i2, 0, %i2 ! needed on v8+, "nop" on v9
|
||||
sub %i0, %i1, %l5 ! %i0!=%i1
|
||||
ld [%i4 + 0], %f12 ! load ivec
|
||||
@ -1753,7 +1760,7 @@ aes128_t4_cbc_decrypt:
|
||||
and %i1, 7, %l2
|
||||
cmp %i2, 255
|
||||
movrnz %l2, 0, %l5 ! if ( %i1&7 ||
|
||||
movleu %icc, 0, %l5 ! %i2<256 ||
|
||||
movleu SIZE_T_CC, 0, %l5 ! %i2<256 ||
|
||||
brnz,pn %l5, .L128cbc_dec_blk ! %i0==%i1)
|
||||
srl %l3, %l2, %l3
|
||||
|
||||
@ -1979,7 +1986,7 @@ aes128_t4_cbc_decrypt:
|
||||
stda %f4, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
|
||||
add %i1, 8, %i1
|
||||
stda %f6, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
|
||||
bgu,pt %icc, .L128_cbc_dec_blk_loop2x
|
||||
bgu,pt SIZE_T_CC, .L128_cbc_dec_blk_loop2x
|
||||
add %i1, 8, %i1
|
||||
|
||||
add %l5, %i2, %i2
|
||||
@ -2304,9 +2311,9 @@ _aes256_load_deckey=_aes192_loadkey
|
||||
.globl aes256_t4_cbc_encrypt
|
||||
.align 32
|
||||
aes256_t4_cbc_encrypt:
|
||||
save %sp, -112, %sp
|
||||
save %sp, -STACK_FRAME, %sp
|
||||
cmp %i2, 0
|
||||
be,pn %icc, .L256_cbc_enc_abort
|
||||
be,pn SIZE_T_CC, .L256_cbc_enc_abort
|
||||
srln %i2, 0, %i2 ! needed on v8+, "nop" on v9
|
||||
sub %i0, %i1, %l5 ! %i0!=%i1
|
||||
ld [%i4 + 0], %f0
|
||||
@ -2325,7 +2332,7 @@ aes256_t4_cbc_encrypt:
|
||||
and %i1, 7, %l2
|
||||
cmp %i2, 127
|
||||
movrnz %l2, 0, %l5 ! if ( %i1&7 ||
|
||||
movleu %icc, 0, %l5 ! %i2<128 ||
|
||||
movleu SIZE_T_CC, 0, %l5 ! %i2<128 ||
|
||||
brnz,pn %l5, .L256cbc_enc_blk ! %i0==%i1)
|
||||
srl %l3, %l2, %l3
|
||||
|
||||
@ -2451,9 +2458,9 @@ aes256_t4_cbc_encrypt:
|
||||
.globl aes192_t4_cbc_encrypt
|
||||
.align 32
|
||||
aes192_t4_cbc_encrypt:
|
||||
save %sp, -112, %sp
|
||||
save %sp, -STACK_FRAME, %sp
|
||||
cmp %i2, 0
|
||||
be,pn %icc, .L192_cbc_enc_abort
|
||||
be,pn SIZE_T_CC, .L192_cbc_enc_abort
|
||||
srln %i2, 0, %i2 ! needed on v8+, "nop" on v9
|
||||
sub %i0, %i1, %l5 ! %i0!=%i1
|
||||
ld [%i4 + 0], %f0
|
||||
@ -2472,7 +2479,7 @@ aes192_t4_cbc_encrypt:
|
||||
and %i1, 7, %l2
|
||||
cmp %i2, 127
|
||||
movrnz %l2, 0, %l5 ! if ( %i1&7 ||
|
||||
movleu %icc, 0, %l5 ! %i2<128 ||
|
||||
movleu SIZE_T_CC, 0, %l5 ! %i2<128 ||
|
||||
brnz,pn %l5, .L192cbc_enc_blk ! %i0==%i1)
|
||||
srl %l3, %l2, %l3
|
||||
|
||||
@ -2598,7 +2605,7 @@ aes192_t4_cbc_encrypt:
|
||||
.globl aes256_t4_ctr32_encrypt
|
||||
.align 32
|
||||
aes256_t4_ctr32_encrypt:
|
||||
save %sp, -112, %sp
|
||||
save %sp, -STACK_FRAME, %sp
|
||||
srln %i2, 0, %i2 ! needed on v8+, "nop" on v9
|
||||
|
||||
prefetch [%i0], 20
|
||||
@ -2628,7 +2635,7 @@ aes256_t4_ctr32_encrypt:
|
||||
and %i1, 7, %l2
|
||||
cmp %i2, 255
|
||||
movrnz %l2, 0, %l5 ! if ( %i1&7 ||
|
||||
movleu %icc, 0, %l5 ! %i2<256 ||
|
||||
movleu SIZE_T_CC, 0, %l5 ! %i2<256 ||
|
||||
brnz,pn %l5, .L256_ctr32_blk ! %i0==%i1)
|
||||
srl %l3, %l2, %l3
|
||||
|
||||
@ -2847,7 +2854,7 @@ aes256_t4_ctr32_encrypt:
|
||||
stda %f4, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
|
||||
add %i1, 8, %i1
|
||||
stda %f6, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
|
||||
bgu,pt %icc, .L256_ctr32_blk_loop2x
|
||||
bgu,pt SIZE_T_CC, .L256_ctr32_blk_loop2x
|
||||
add %i1, 8, %i1
|
||||
|
||||
add %l5, %i2, %i2
|
||||
@ -2865,17 +2872,17 @@ aes256_t4_ctr32_encrypt:
|
||||
.globl aes256_t4_xts_encrypt
|
||||
.align 32
|
||||
aes256_t4_xts_encrypt:
|
||||
save %sp, -112-16, %sp
|
||||
save %sp, -STACK_FRAME-16, %sp
|
||||
srln %i2, 0, %i2 ! needed on v8+, "nop" on v9
|
||||
|
||||
mov %i5, %o0
|
||||
add %fp, 0-16, %o1
|
||||
add %fp, STACK_BIAS-16, %o1
|
||||
call aes_t4_encrypt
|
||||
mov %i4, %o2
|
||||
|
||||
add %fp, 0-16, %l7
|
||||
add %fp, STACK_BIAS-16, %l7
|
||||
ldxa [%l7]0x88, %g2
|
||||
add %fp, 0-8, %l7
|
||||
add %fp, STACK_BIAS-8, %l7
|
||||
ldxa [%l7]0x88, %g3 ! %g3:%g2 is tweak
|
||||
|
||||
sethi %hi(0x76543210), %l7
|
||||
@ -2898,7 +2905,7 @@ aes256_t4_xts_encrypt:
|
||||
and %i1, 7, %l2
|
||||
cmp %i2, 255
|
||||
movrnz %l2, 0, %l5 ! if ( %i1&7 ||
|
||||
movleu %icc, 0, %l5 ! %i2<256 ||
|
||||
movleu SIZE_T_CC, 0, %l5 ! %i2<256 ||
|
||||
brnz,pn %l5, .L256_xts_enblk ! %i0==%i1)
|
||||
srl %l3, %l2, %l3
|
||||
|
||||
@ -3196,7 +3203,7 @@ aes256_t4_xts_encrypt:
|
||||
stda %f4, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
|
||||
add %i1, 8, %i1
|
||||
stda %f6, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
|
||||
bgu,pt %icc, .L256_xts_enblk2x
|
||||
bgu,pt SIZE_T_CC, .L256_xts_enblk2x
|
||||
add %i1, 8, %i1
|
||||
|
||||
add %l5, %i2, %i2
|
||||
@ -3217,11 +3224,11 @@ aes256_t4_xts_encrypt:
|
||||
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
.align 32
|
||||
.L256_xts_ensteal:
|
||||
std %f0, [%fp + 0-16] ! copy of output
|
||||
std %f2, [%fp + 0-8]
|
||||
std %f0, [%fp + STACK_BIAS-16] ! copy of output
|
||||
std %f2, [%fp + STACK_BIAS-8]
|
||||
|
||||
srl %l0, 3, %l0
|
||||
add %fp, 0-16, %l7
|
||||
add %fp, STACK_BIAS-16, %l7
|
||||
add %i0, %l0, %i0 ! original %i0+%i2&-15
|
||||
add %i1, %l2, %i1 ! original %i1+%i2&-15
|
||||
mov 0, %l0
|
||||
@ -3249,17 +3256,17 @@ aes256_t4_xts_encrypt:
|
||||
.globl aes256_t4_xts_decrypt
|
||||
.align 32
|
||||
aes256_t4_xts_decrypt:
|
||||
save %sp, -112-16, %sp
|
||||
save %sp, -STACK_FRAME-16, %sp
|
||||
srln %i2, 0, %i2 ! needed on v8+, "nop" on v9
|
||||
|
||||
mov %i5, %o0
|
||||
add %fp, 0-16, %o1
|
||||
add %fp, STACK_BIAS-16, %o1
|
||||
call aes_t4_encrypt
|
||||
mov %i4, %o2
|
||||
|
||||
add %fp, 0-16, %l7
|
||||
add %fp, STACK_BIAS-16, %l7
|
||||
ldxa [%l7]0x88, %g2
|
||||
add %fp, 0-8, %l7
|
||||
add %fp, STACK_BIAS-8, %l7
|
||||
ldxa [%l7]0x88, %g3 ! %g3:%g2 is tweak
|
||||
|
||||
sethi %hi(0x76543210), %l7
|
||||
@ -3285,7 +3292,7 @@ aes256_t4_xts_decrypt:
|
||||
and %i1, 7, %l2
|
||||
cmp %i2, 255
|
||||
movrnz %l2, 0, %l5 ! if ( %i1&7 ||
|
||||
movleu %icc, 0, %l5 ! %i2<256 ||
|
||||
movleu SIZE_T_CC, 0, %l5 ! %i2<256 ||
|
||||
brnz,pn %l5, .L256_xts_deblk ! %i0==%i1)
|
||||
srl %l3, %l2, %l3
|
||||
|
||||
@ -3584,7 +3591,7 @@ aes256_t4_xts_decrypt:
|
||||
stda %f4, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
|
||||
add %i1, 8, %i1
|
||||
stda %f6, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
|
||||
bgu,pt %icc, .L256_xts_deblk2x
|
||||
bgu,pt SIZE_T_CC, .L256_xts_deblk2x
|
||||
add %i1, 8, %i1
|
||||
|
||||
add %l5, %i2, %i2
|
||||
@ -3642,11 +3649,11 @@ aes256_t4_xts_decrypt:
|
||||
.word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
|
||||
.word 0x85b38d82 !fxor %f14,%f2,%f2
|
||||
|
||||
std %f0, [%fp + 0-16]
|
||||
std %f2, [%fp + 0-8]
|
||||
std %f0, [%fp + STACK_BIAS-16]
|
||||
std %f2, [%fp + STACK_BIAS-8]
|
||||
|
||||
srl %l0, 3, %l0
|
||||
add %fp, 0-16, %l7
|
||||
add %fp, STACK_BIAS-16, %l7
|
||||
add %i0, %l0, %i0 ! original %i0+%i2&-15
|
||||
add %i1, %l2, %i1 ! original %i1+%i2&-15
|
||||
mov 0, %l0
|
||||
@ -3675,7 +3682,7 @@ aes256_t4_xts_decrypt:
|
||||
.globl aes192_t4_ctr32_encrypt
|
||||
.align 32
|
||||
aes192_t4_ctr32_encrypt:
|
||||
save %sp, -112, %sp
|
||||
save %sp, -STACK_FRAME, %sp
|
||||
srln %i2, 0, %i2 ! needed on v8+, "nop" on v9
|
||||
|
||||
prefetch [%i0], 20
|
||||
@ -3705,7 +3712,7 @@ aes192_t4_ctr32_encrypt:
|
||||
and %i1, 7, %l2
|
||||
cmp %i2, 255
|
||||
movrnz %l2, 0, %l5 ! if ( %i1&7 ||
|
||||
movleu %icc, 0, %l5 ! %i2<256 ||
|
||||
movleu SIZE_T_CC, 0, %l5 ! %i2<256 ||
|
||||
brnz,pn %l5, .L192_ctr32_blk ! %i0==%i1)
|
||||
srl %l3, %l2, %l3
|
||||
|
||||
@ -3924,7 +3931,7 @@ aes192_t4_ctr32_encrypt:
|
||||
stda %f4, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
|
||||
add %i1, 8, %i1
|
||||
stda %f6, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
|
||||
bgu,pt %icc, .L192_ctr32_blk_loop2x
|
||||
bgu,pt SIZE_T_CC, .L192_ctr32_blk_loop2x
|
||||
add %i1, 8, %i1
|
||||
|
||||
add %l5, %i2, %i2
|
||||
@ -3942,9 +3949,9 @@ aes192_t4_ctr32_encrypt:
|
||||
.globl aes192_t4_cbc_decrypt
|
||||
.align 32
|
||||
aes192_t4_cbc_decrypt:
|
||||
save %sp, -112, %sp
|
||||
save %sp, -STACK_FRAME, %sp
|
||||
cmp %i2, 0
|
||||
be,pn %icc, .L192_cbc_dec_abort
|
||||
be,pn SIZE_T_CC, .L192_cbc_dec_abort
|
||||
srln %i2, 0, %i2 ! needed on v8+, "nop" on v9
|
||||
sub %i0, %i1, %l5 ! %i0!=%i1
|
||||
ld [%i4 + 0], %f12 ! load ivec
|
||||
@ -3963,7 +3970,7 @@ aes192_t4_cbc_decrypt:
|
||||
and %i1, 7, %l2
|
||||
cmp %i2, 255
|
||||
movrnz %l2, 0, %l5 ! if ( %i1&7 ||
|
||||
movleu %icc, 0, %l5 ! %i2<256 ||
|
||||
movleu SIZE_T_CC, 0, %l5 ! %i2<256 ||
|
||||
brnz,pn %l5, .L192cbc_dec_blk ! %i0==%i1)
|
||||
srl %l3, %l2, %l3
|
||||
|
||||
@ -4189,7 +4196,7 @@ aes192_t4_cbc_decrypt:
|
||||
stda %f4, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
|
||||
add %i1, 8, %i1
|
||||
stda %f6, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
|
||||
bgu,pt %icc, .L192_cbc_dec_blk_loop2x
|
||||
bgu,pt SIZE_T_CC, .L192_cbc_dec_blk_loop2x
|
||||
add %i1, 8, %i1
|
||||
|
||||
add %l5, %i2, %i2
|
||||
@ -4210,9 +4217,9 @@ aes192_t4_cbc_decrypt:
|
||||
.globl aes256_t4_cbc_decrypt
|
||||
.align 32
|
||||
aes256_t4_cbc_decrypt:
|
||||
save %sp, -112, %sp
|
||||
save %sp, -STACK_FRAME, %sp
|
||||
cmp %i2, 0
|
||||
be,pn %icc, .L256_cbc_dec_abort
|
||||
be,pn SIZE_T_CC, .L256_cbc_dec_abort
|
||||
srln %i2, 0, %i2 ! needed on v8+, "nop" on v9
|
||||
sub %i0, %i1, %l5 ! %i0!=%i1
|
||||
ld [%i4 + 0], %f12 ! load ivec
|
||||
@ -4231,7 +4238,7 @@ aes256_t4_cbc_decrypt:
|
||||
and %i1, 7, %l2
|
||||
cmp %i2, 255
|
||||
movrnz %l2, 0, %l5 ! if ( %i1&7 ||
|
||||
movleu %icc, 0, %l5 ! %i2<256 ||
|
||||
movleu SIZE_T_CC, 0, %l5 ! %i2<256 ||
|
||||
brnz,pn %l5, .L256cbc_dec_blk ! %i0==%i1)
|
||||
srl %l3, %l2, %l3
|
||||
|
||||
@ -4457,7 +4464,7 @@ aes256_t4_cbc_decrypt:
|
||||
stda %f4, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
|
||||
add %i1, 8, %i1
|
||||
stda %f6, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
|
||||
bgu,pt %icc, .L256_cbc_dec_blk_loop2x
|
||||
bgu,pt SIZE_T_CC, .L256_cbc_dec_blk_loop2x
|
||||
add %i1, 8, %i1
|
||||
|
||||
add %l5, %i2, %i2
|
||||
|
@ -3,12 +3,12 @@
|
||||
|
||||
/*
|
||||
* ====================================================================
|
||||
* Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
|
||||
* project.
|
||||
* Copyright 1999-2016 The OpenSSL Project Authors. All Rights Reserved.
|
||||
*
|
||||
* Rights for redistribution and usage in source and binary forms are
|
||||
* granted according to the OpenSSL license. Warranty of any kind is
|
||||
* disclaimed.
|
||||
* Licensed under the OpenSSL license (the "License"). You may not use
|
||||
* this file except in compliance with the License. You can obtain a copy
|
||||
* in the file LICENSE in the source distribution or at
|
||||
* https://www.openssl.org/source/license.html
|
||||
* ====================================================================
|
||||
*/
|
||||
|
||||
|
@ -1,3 +1,5 @@
|
||||
#include "sparc_arch.h"
|
||||
|
||||
.text
|
||||
|
||||
.globl cmll_t4_encrypt
|
||||
@ -974,9 +976,9 @@ _cmll256_decrypt_2x:
|
||||
.globl cmll128_t4_cbc_encrypt
|
||||
.align 32
|
||||
cmll128_t4_cbc_encrypt:
|
||||
save %sp, -112, %sp
|
||||
save %sp, -STACK_FRAME, %sp
|
||||
cmp %i2, 0
|
||||
be,pn %icc, .L128_cbc_enc_abort
|
||||
be,pn SIZE_T_CC, .L128_cbc_enc_abort
|
||||
srln %i2, 0, %i2 ! needed on v8+, "nop" on v9
|
||||
sub %i0, %i1, %l5 ! %i0!=%i1
|
||||
ld [%i4 + 0], %f0
|
||||
@ -995,7 +997,7 @@ cmll128_t4_cbc_encrypt:
|
||||
and %i1, 7, %l2
|
||||
cmp %i2, 127
|
||||
movrnz %l2, 0, %l5 ! if ( %i1&7 ||
|
||||
movleu %icc, 0, %l5 ! %i2<128 ||
|
||||
movleu SIZE_T_CC, 0, %l5 ! %i2<128 ||
|
||||
brnz,pn %l5, .L128cbc_enc_blk ! %i0==%i1)
|
||||
srl %l3, %l2, %l3
|
||||
|
||||
@ -1121,9 +1123,9 @@ cmll128_t4_cbc_encrypt:
|
||||
.globl cmll256_t4_cbc_encrypt
|
||||
.align 32
|
||||
cmll256_t4_cbc_encrypt:
|
||||
save %sp, -112, %sp
|
||||
save %sp, -STACK_FRAME, %sp
|
||||
cmp %i2, 0
|
||||
be,pn %icc, .L256_cbc_enc_abort
|
||||
be,pn SIZE_T_CC, .L256_cbc_enc_abort
|
||||
srln %i2, 0, %i2 ! needed on v8+, "nop" on v9
|
||||
sub %i0, %i1, %l5 ! %i0!=%i1
|
||||
ld [%i4 + 0], %f0
|
||||
@ -1142,7 +1144,7 @@ cmll256_t4_cbc_encrypt:
|
||||
and %i1, 7, %l2
|
||||
cmp %i2, 127
|
||||
movrnz %l2, 0, %l5 ! if ( %i1&7 ||
|
||||
movleu %icc, 0, %l5 ! %i2<128 ||
|
||||
movleu SIZE_T_CC, 0, %l5 ! %i2<128 ||
|
||||
brnz,pn %l5, .L256cbc_enc_blk ! %i0==%i1)
|
||||
srl %l3, %l2, %l3
|
||||
|
||||
@ -1268,9 +1270,9 @@ cmll256_t4_cbc_encrypt:
|
||||
.globl cmll128_t4_cbc_decrypt
|
||||
.align 32
|
||||
cmll128_t4_cbc_decrypt:
|
||||
save %sp, -112, %sp
|
||||
save %sp, -STACK_FRAME, %sp
|
||||
cmp %i2, 0
|
||||
be,pn %icc, .L128_cbc_dec_abort
|
||||
be,pn SIZE_T_CC, .L128_cbc_dec_abort
|
||||
srln %i2, 0, %i2 ! needed on v8+, "nop" on v9
|
||||
sub %i0, %i1, %l5 ! %i0!=%i1
|
||||
ld [%i4 + 0], %f12 ! load ivec
|
||||
@ -1289,7 +1291,7 @@ cmll128_t4_cbc_decrypt:
|
||||
and %i1, 7, %l2
|
||||
cmp %i2, 255
|
||||
movrnz %l2, 0, %l5 ! if ( %i1&7 ||
|
||||
movleu %icc, 0, %l5 ! %i2<256 ||
|
||||
movleu SIZE_T_CC, 0, %l5 ! %i2<256 ||
|
||||
brnz,pn %l5, .L128cbc_dec_blk ! %i0==%i1)
|
||||
srl %l3, %l2, %l3
|
||||
|
||||
@ -1515,7 +1517,7 @@ cmll128_t4_cbc_decrypt:
|
||||
stda %f4, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
|
||||
add %i1, 8, %i1
|
||||
stda %f6, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
|
||||
bgu,pt %icc, .L128_cbc_dec_blk_loop2x
|
||||
bgu,pt SIZE_T_CC, .L128_cbc_dec_blk_loop2x
|
||||
add %i1, 8, %i1
|
||||
|
||||
add %l5, %i2, %i2
|
||||
@ -1536,9 +1538,9 @@ cmll128_t4_cbc_decrypt:
|
||||
.globl cmll256_t4_cbc_decrypt
|
||||
.align 32
|
||||
cmll256_t4_cbc_decrypt:
|
||||
save %sp, -112, %sp
|
||||
save %sp, -STACK_FRAME, %sp
|
||||
cmp %i2, 0
|
||||
be,pn %icc, .L256_cbc_dec_abort
|
||||
be,pn SIZE_T_CC, .L256_cbc_dec_abort
|
||||
srln %i2, 0, %i2 ! needed on v8+, "nop" on v9
|
||||
sub %i0, %i1, %l5 ! %i0!=%i1
|
||||
ld [%i4 + 0], %f12 ! load ivec
|
||||
@ -1557,7 +1559,7 @@ cmll256_t4_cbc_decrypt:
|
||||
and %i1, 7, %l2
|
||||
cmp %i2, 255
|
||||
movrnz %l2, 0, %l5 ! if ( %i1&7 ||
|
||||
movleu %icc, 0, %l5 ! %i2<256 ||
|
||||
movleu SIZE_T_CC, 0, %l5 ! %i2<256 ||
|
||||
brnz,pn %l5, .L256cbc_dec_blk ! %i0==%i1)
|
||||
srl %l3, %l2, %l3
|
||||
|
||||
@ -1783,7 +1785,7 @@ cmll256_t4_cbc_decrypt:
|
||||
stda %f4, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
|
||||
add %i1, 8, %i1
|
||||
stda %f6, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
|
||||
bgu,pt %icc, .L256_cbc_dec_blk_loop2x
|
||||
bgu,pt SIZE_T_CC, .L256_cbc_dec_blk_loop2x
|
||||
add %i1, 8, %i1
|
||||
|
||||
add %l5, %i2, %i2
|
||||
@ -1804,7 +1806,7 @@ cmll256_t4_cbc_decrypt:
|
||||
.globl cmll128_t4_ctr32_encrypt
|
||||
.align 32
|
||||
cmll128_t4_ctr32_encrypt:
|
||||
save %sp, -112, %sp
|
||||
save %sp, -STACK_FRAME, %sp
|
||||
srln %i2, 0, %i2 ! needed on v8+, "nop" on v9
|
||||
|
||||
prefetch [%i0], 20
|
||||
@ -1834,7 +1836,7 @@ cmll128_t4_ctr32_encrypt:
|
||||
and %i1, 7, %l2
|
||||
cmp %i2, 255
|
||||
movrnz %l2, 0, %l5 ! if ( %i1&7 ||
|
||||
movleu %icc, 0, %l5 ! %i2<256 ||
|
||||
movleu SIZE_T_CC, 0, %l5 ! %i2<256 ||
|
||||
brnz,pn %l5, .L128_ctr32_blk ! %i0==%i1)
|
||||
srl %l3, %l2, %l3
|
||||
|
||||
@ -2053,7 +2055,7 @@ cmll128_t4_ctr32_encrypt:
|
||||
stda %f4, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
|
||||
add %i1, 8, %i1
|
||||
stda %f6, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
|
||||
bgu,pt %icc, .L128_ctr32_blk_loop2x
|
||||
bgu,pt SIZE_T_CC, .L128_ctr32_blk_loop2x
|
||||
add %i1, 8, %i1
|
||||
|
||||
add %l5, %i2, %i2
|
||||
@ -2071,7 +2073,7 @@ cmll128_t4_ctr32_encrypt:
|
||||
.globl cmll256_t4_ctr32_encrypt
|
||||
.align 32
|
||||
cmll256_t4_ctr32_encrypt:
|
||||
save %sp, -112, %sp
|
||||
save %sp, -STACK_FRAME, %sp
|
||||
srln %i2, 0, %i2 ! needed on v8+, "nop" on v9
|
||||
|
||||
prefetch [%i0], 20
|
||||
@ -2101,7 +2103,7 @@ cmll256_t4_ctr32_encrypt:
|
||||
and %i1, 7, %l2
|
||||
cmp %i2, 255
|
||||
movrnz %l2, 0, %l5 ! if ( %i1&7 ||
|
||||
movleu %icc, 0, %l5 ! %i2<256 ||
|
||||
movleu SIZE_T_CC, 0, %l5 ! %i2<256 ||
|
||||
brnz,pn %l5, .L256_ctr32_blk ! %i0==%i1)
|
||||
srl %l3, %l2, %l3
|
||||
|
||||
@ -2320,7 +2322,7 @@ cmll256_t4_ctr32_encrypt:
|
||||
stda %f4, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
|
||||
add %i1, 8, %i1
|
||||
stda %f6, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
|
||||
bgu,pt %icc, .L256_ctr32_blk_loop2x
|
||||
bgu,pt SIZE_T_CC, .L256_ctr32_blk_loop2x
|
||||
add %i1, 8, %i1
|
||||
|
||||
add %l5, %i2, %i2
|
||||
|
@ -1,26 +1,9 @@
|
||||
! des_enc.m4
|
||||
! des_enc.S (generated from des_enc.m4)
|
||||
! Copyright 2000-2016 The OpenSSL Project Authors. All Rights Reserved.
|
||||
!
|
||||
! UltraSPARC assembler version of the LibDES/SSLeay/OpenSSL des_enc.c file.
|
||||
!
|
||||
! Version 1.0. 32-bit version.
|
||||
!
|
||||
! June 8, 2000.
|
||||
!
|
||||
! Version 2.0. 32/64-bit, PIC-ification, blended CPU adaptation
|
||||
! by Andy Polyakov.
|
||||
!
|
||||
! January 1, 2003.
|
||||
!
|
||||
! Assembler version: Copyright Svend Olaf Mikkelsen.
|
||||
!
|
||||
! Original C code: Copyright Eric A. Young.
|
||||
!
|
||||
! This code can be freely used by LibDES/SSLeay/OpenSSL users.
|
||||
!
|
||||
! The LibDES/SSLeay/OpenSSL copyright notices must be respected.
|
||||
!
|
||||
! This version can be redistributed.
|
||||
! Licensed under the OpenSSL license (the "License"). You may not use
|
||||
! this file except in compliance with the License. You can obtain a copy
|
||||
! in the file LICENSE in the source distribution or at
|
||||
! https://www.openssl.org/source/license.html
|
||||
!
|
||||
! To expand the m4 macros: m4 -B 8192 des_enc.m4 > des_enc.S
|
||||
!
|
||||
@ -48,6 +31,10 @@
|
||||
|
||||
#include <openssl/opensslconf.h>
|
||||
|
||||
#ifdef OPENSSL_FIPSCANISTER
|
||||
#include <openssl/fipssyms.h>
|
||||
#endif
|
||||
|
||||
#if defined(__SUNPRO_C) && defined(__sparcv9)
|
||||
# define ABI64 /* They've said -xarch=v9 at command line */
|
||||
#elif defined(__GNUC__) && defined(__arch64__)
|
||||
@ -63,9 +50,6 @@
|
||||
# define STPTR stx
|
||||
# define ARG0 128
|
||||
# define ARGSZ 8
|
||||
# ifndef __sparc_v9__
|
||||
# define __sparc_v9__
|
||||
# endif
|
||||
#else
|
||||
# define FRAME -96
|
||||
# define BIAS 0
|
||||
@ -159,7 +143,7 @@
|
||||
! other half (use).
|
||||
!
|
||||
! In this version we do two rounds in a loop repeated 7 times
|
||||
! and two rounds seperately.
|
||||
! and two rounds separately.
|
||||
!
|
||||
! One half has the bits for the sboxes in the following positions:
|
||||
!
|
||||
@ -430,11 +414,7 @@
|
||||
xor out5, local1, out5 ! 1 finished
|
||||
|
||||
xor out5, local2, out5 ! 3 finished
|
||||
#ifdef __sparc_v9__
|
||||
bne,pt %icc, .des_enc.1
|
||||
#else
|
||||
bne .des_enc.1
|
||||
#endif
|
||||
and local4, 252, local1 ! sbox 1 next round
|
||||
|
||||
! two rounds more:
|
||||
@ -688,11 +668,7 @@
|
||||
xor in5, local1, in5 ! 1 finished
|
||||
|
||||
xor in5, local2, in5 ! 3 finished
|
||||
#ifdef __sparc_v9__
|
||||
bne,pt %icc, .des_dec.1
|
||||
#else
|
||||
bne .des_dec.1
|
||||
#endif
|
||||
and local4, 252, local1 ! sbox 1 next round
|
||||
|
||||
! two rounds more:
|
||||
@ -829,11 +805,7 @@ DES_encrypt1:
|
||||
ld [in0], in5 ! left
|
||||
cmp in2, 0 ! enc
|
||||
|
||||
#ifdef __sparc_v9__
|
||||
be,pn %icc, .encrypt.dec ! enc/dec
|
||||
#else
|
||||
be .encrypt.dec
|
||||
#endif
|
||||
ld [in0+4], out5 ! right
|
||||
|
||||
! parameter 6 1/2 for include encryption/decryption
|
||||
@ -1061,11 +1033,7 @@ DES_encrypt1:
|
||||
xor out5, local1, out5 ! 1 finished
|
||||
|
||||
xor out5, local2, out5 ! 3 finished
|
||||
#ifdef __sparc_v9__
|
||||
bne,pt %icc, .des_encrypt1.1
|
||||
#else
|
||||
bne .des_encrypt1.1
|
||||
#endif
|
||||
and local4, 252, local1 ! sbox 1 next round
|
||||
|
||||
! two rounds more:
|
||||
@ -1505,11 +1473,7 @@ DES_encrypt2:
|
||||
|
||||
! we use our own stackframe
|
||||
|
||||
#ifdef __sparc_v9__
|
||||
be,pn %icc, .encrypt2.dec ! decryption
|
||||
#else
|
||||
be .encrypt2.dec
|
||||
#endif
|
||||
STPTR in0, [%sp+BIAS+ARG0+0*ARGSZ]
|
||||
|
||||
ld [in3], out0 ! key 7531 first round
|
||||
@ -2033,11 +1997,7 @@ DES_ncbc_encrypt:
|
||||
|
||||
cmp in5, 0 ! enc
|
||||
|
||||
#ifdef __sparc_v9__
|
||||
be,pn %icc, .ncbc.dec
|
||||
#else
|
||||
be .ncbc.dec
|
||||
#endif
|
||||
STPTR in4, [%sp+BIAS+ARG0+4*ARGSZ]
|
||||
|
||||
! addr left right temp label
|
||||
@ -2048,18 +2008,6 @@ DES_ncbc_encrypt:
|
||||
|
||||
! first in memory to rightmost in register
|
||||
|
||||
#ifdef __sparc_v9__
|
||||
andcc in4, 3, global0
|
||||
bne,pn %icc, .LLE1
|
||||
nop
|
||||
|
||||
lda [in4] 0x88, in5
|
||||
add in4, 4, local3
|
||||
|
||||
ba,pt %icc, .LLE1a
|
||||
lda [local3] 0x88, out5
|
||||
#endif
|
||||
|
||||
.LLE1:
|
||||
ldub [in4+3], in5
|
||||
|
||||
@ -2095,11 +2043,7 @@ DES_ncbc_encrypt:
|
||||
|
||||
addcc in2, -8, in2 ! bytes missing when first block done
|
||||
|
||||
#ifdef __sparc_v9__
|
||||
bl,pn %icc, .ncbc.enc.seven.or.less
|
||||
#else
|
||||
bl .ncbc.enc.seven.or.less
|
||||
#endif
|
||||
mov in3, in4 ! schedule
|
||||
|
||||
.ncbc.enc.next.block:
|
||||
@ -2111,18 +2055,6 @@ DES_ncbc_encrypt:
|
||||
|
||||
! first in memory to rightmost in register
|
||||
|
||||
#ifdef __sparc_v9__
|
||||
andcc in0, 3, global0
|
||||
bne,pn %icc, .LLE2
|
||||
nop
|
||||
|
||||
lda [in0] 0x88, out4
|
||||
add in0, 4, local3
|
||||
|
||||
ba,pt %icc, .LLE2a
|
||||
lda [local3] 0x88, global4
|
||||
#endif
|
||||
|
||||
.LLE2:
|
||||
ldub [in0+3], out4
|
||||
|
||||
@ -2390,11 +2322,7 @@ DES_ncbc_encrypt:
|
||||
xor out5, local1, out5 ! 1 finished
|
||||
|
||||
xor out5, local2, out5 ! 3 finished
|
||||
#ifdef __sparc_v9__
|
||||
bne,pt %icc, .ncbc.enc.1
|
||||
#else
|
||||
bne .ncbc.enc.1
|
||||
#endif
|
||||
and local4, 252, local1 ! sbox 1 next round
|
||||
|
||||
! two rounds more:
|
||||
@ -2509,11 +2437,7 @@ DES_ncbc_encrypt:
|
||||
xor out5, local2, out5
|
||||
! include encryption ks in3
|
||||
|
||||
#ifdef __sparc_v9__
|
||||
bl,pn %icc, .ncbc.enc.next.block_fp
|
||||
#else
|
||||
bl .ncbc.enc.next.block_fp
|
||||
#endif
|
||||
add in0, 8, in0 ! input address
|
||||
|
||||
! If 8 or more bytes are to be encrypted after this block,
|
||||
@ -2527,18 +2451,6 @@ DES_ncbc_encrypt:
|
||||
|
||||
! first in memory to rightmost in register
|
||||
|
||||
#ifdef __sparc_v9__
|
||||
andcc in0, 3, global0
|
||||
bne,pn %icc, .LLE12
|
||||
nop
|
||||
|
||||
lda [in0] 0x88, global3
|
||||
add in0, 4, local5
|
||||
|
||||
ba,pt %icc, .LLE12a
|
||||
lda [local5] 0x88, global4
|
||||
#endif
|
||||
|
||||
.LLE12:
|
||||
ldub [in0+3], global3
|
||||
|
||||
@ -2712,18 +2624,6 @@ DES_ncbc_encrypt:
|
||||
|
||||
! rightmost in register to first in memory
|
||||
|
||||
#ifdef __sparc_v9__
|
||||
andcc in1, 3, global0
|
||||
bne,pn %icc, .SLE10
|
||||
nop
|
||||
|
||||
sta out0, [in1] 0x88
|
||||
add in1, 4, local3
|
||||
|
||||
ba,pt %icc, .SLE10a
|
||||
sta out1, [local3] 0x88
|
||||
#endif
|
||||
|
||||
.SLE10:
|
||||
and out0, 255, local3
|
||||
stub local3, [in1+0]
|
||||
@ -2767,7 +2667,7 @@ DES_ncbc_encrypt:
|
||||
xor global4, local1, out5 ! iv xor next block
|
||||
|
||||
ba .ncbc.enc.next.block_2
|
||||
add in1, 8, in1 ! output adress
|
||||
add in1, 8, in1 ! output address
|
||||
|
||||
.ncbc.enc.next.block_fp:
|
||||
|
||||
@ -2854,18 +2754,6 @@ DES_ncbc_encrypt:
|
||||
|
||||
! rightmost in register to first in memory
|
||||
|
||||
#ifdef __sparc_v9__
|
||||
andcc in1, 3, global0
|
||||
bne,pn %icc, .SLE1
|
||||
nop
|
||||
|
||||
sta in5, [in1] 0x88
|
||||
add in1, 4, local3
|
||||
|
||||
ba,pt %icc, .SLE1a
|
||||
sta out5, [local3] 0x88
|
||||
#endif
|
||||
|
||||
.SLE1:
|
||||
and in5, 255, local3
|
||||
stub local3, [in1+0]
|
||||
@ -2902,22 +2790,14 @@ DES_ncbc_encrypt:
|
||||
|
||||
addcc in2, -8, in2 ! bytes missing when next block done
|
||||
|
||||
#ifdef __sparc_v9__
|
||||
bpos,pt %icc, .ncbc.enc.next.block ! also jumps if 0
|
||||
#else
|
||||
bpos .ncbc.enc.next.block
|
||||
#endif
|
||||
add in1, 8, in1
|
||||
|
||||
.ncbc.enc.seven.or.less:
|
||||
|
||||
cmp in2, -8
|
||||
|
||||
#ifdef __sparc_v9__
|
||||
ble,pt %icc, .ncbc.enc.finish
|
||||
#else
|
||||
ble .ncbc.enc.finish
|
||||
#endif
|
||||
nop
|
||||
|
||||
add in2, 8, local1 ! bytes to load
|
||||
@ -2995,18 +2875,6 @@ DES_ncbc_encrypt:
|
||||
|
||||
! rightmost in register to first in memory
|
||||
|
||||
#ifdef __sparc_v9__
|
||||
andcc local4, 3, global0
|
||||
bne,pn %icc, .SLE2
|
||||
nop
|
||||
|
||||
sta in5, [local4] 0x88
|
||||
add local4, 4, local5
|
||||
|
||||
ba,pt %icc, .SLE2a
|
||||
sta out5, [local5] 0x88
|
||||
#endif
|
||||
|
||||
.SLE2:
|
||||
and in5, 255, local5
|
||||
stub local5, [local4+0]
|
||||
@ -3052,11 +2920,7 @@ DES_ncbc_encrypt:
|
||||
add in3, 120, in3
|
||||
|
||||
LDPTR [%sp+BIAS+ARG0+4*ARGSZ] , local7 ! ivec
|
||||
#ifdef __sparc_v9__
|
||||
ble,pn %icc, .ncbc.dec.finish
|
||||
#else
|
||||
ble .ncbc.dec.finish
|
||||
#endif
|
||||
mov in3, in4 ! schedule
|
||||
|
||||
STPTR in1, [%sp+BIAS+ARG0+1*ARGSZ]
|
||||
@ -3069,18 +2933,6 @@ DES_ncbc_encrypt:
|
||||
|
||||
! first in memory to rightmost in register
|
||||
|
||||
#ifdef __sparc_v9__
|
||||
andcc local7, 3, global0
|
||||
bne,pn %icc, .LLE3
|
||||
nop
|
||||
|
||||
lda [local7] 0x88, in0
|
||||
add local7, 4, local3
|
||||
|
||||
ba,pt %icc, .LLE3a
|
||||
lda [local3] 0x88, in1
|
||||
#endif
|
||||
|
||||
.LLE3:
|
||||
ldub [local7+3], in0
|
||||
|
||||
@ -3123,18 +2975,6 @@ DES_ncbc_encrypt:
|
||||
|
||||
! first in memory to rightmost in register
|
||||
|
||||
#ifdef __sparc_v9__
|
||||
andcc local5, 3, global0
|
||||
bne,pn %icc, .LLE4
|
||||
nop
|
||||
|
||||
lda [local5] 0x88, in5
|
||||
add local5, 4, local3
|
||||
|
||||
ba,pt %icc, .LLE4a
|
||||
lda [local3] 0x88, out5
|
||||
#endif
|
||||
|
||||
.LLE4:
|
||||
ldub [local5+3], in5
|
||||
|
||||
@ -3354,11 +3194,7 @@ DES_ncbc_encrypt:
|
||||
! in2 is compared to 8 in the rounds
|
||||
|
||||
xor out5, in0, out4 ! iv xor
|
||||
#ifdef __sparc_v9__
|
||||
bl,pn %icc, .ncbc.dec.seven.or.less
|
||||
#else
|
||||
bl .ncbc.dec.seven.or.less
|
||||
#endif
|
||||
xor in5, in1, global4 ! iv xor
|
||||
|
||||
! Load ivec next block now, since input and output address might be the same.
|
||||
@ -3370,19 +3206,6 @@ DES_ncbc_encrypt:
|
||||
|
||||
! first in memory to rightmost in register
|
||||
|
||||
#ifdef __sparc_v9__
|
||||
andcc local5, 3, global0
|
||||
bne,pn %icc, .LLE5
|
||||
nop
|
||||
|
||||
lda [local5] 0x88, in0
|
||||
add local5, 4, local5
|
||||
|
||||
lda [local5] 0x88, in1
|
||||
ba,pt %icc, .LLE5a
|
||||
add local5, 4, local5
|
||||
#endif
|
||||
|
||||
.LLE5:
|
||||
ldub [local5+3], in0
|
||||
|
||||
@ -3423,18 +3246,6 @@ DES_ncbc_encrypt:
|
||||
|
||||
! rightmost in register to first in memory
|
||||
|
||||
#ifdef __sparc_v9__
|
||||
andcc local7, 3, global0
|
||||
bne,pn %icc, .SLE3
|
||||
nop
|
||||
|
||||
sta out4, [local7] 0x88
|
||||
add local7, 4, local3
|
||||
|
||||
ba,pt %icc, .SLE3a
|
||||
sta global4, [local3] 0x88
|
||||
#endif
|
||||
|
||||
.SLE3:
|
||||
and out4, 255, local3
|
||||
stub local3, [local7+0]
|
||||
@ -3473,11 +3284,7 @@ DES_ncbc_encrypt:
|
||||
add local7, 8, local7
|
||||
addcc in2, -8, in2
|
||||
|
||||
#ifdef __sparc_v9__
|
||||
bg,pt %icc, .ncbc.dec.next.block
|
||||
#else
|
||||
bg .ncbc.dec.next.block
|
||||
#endif
|
||||
STPTR local7, [%sp+BIAS+ARG0+1*ARGSZ]
|
||||
|
||||
|
||||
@ -3491,18 +3298,6 @@ DES_ncbc_encrypt:
|
||||
|
||||
! rightmost in register to first in memory
|
||||
|
||||
#ifdef __sparc_v9__
|
||||
andcc local4, 3, global0
|
||||
bne,pn %icc, .SLE4
|
||||
nop
|
||||
|
||||
sta in0, [local4] 0x88
|
||||
add local4, 4, local5
|
||||
|
||||
ba,pt %icc, .SLE4a
|
||||
sta in1, [local5] 0x88
|
||||
#endif
|
||||
|
||||
.SLE4:
|
||||
and in0, 255, local5
|
||||
stub local5, [local4+0]
|
||||
@ -3551,19 +3346,6 @@ DES_ncbc_encrypt:
|
||||
|
||||
! first in memory to rightmost in register
|
||||
|
||||
#ifdef __sparc_v9__
|
||||
andcc local5, 3, global0
|
||||
bne,pn %icc, .LLE13
|
||||
nop
|
||||
|
||||
lda [local5] 0x88, in0
|
||||
add local5, 4, local5
|
||||
|
||||
lda [local5] 0x88, in1
|
||||
ba,pt %icc, .LLE13a
|
||||
add local5, 4, local5
|
||||
#endif
|
||||
|
||||
.LLE13:
|
||||
ldub [local5+3], in0
|
||||
|
||||
@ -3688,11 +3470,7 @@ DES_ede3_cbc_encrypt:
|
||||
LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local4 ! ivec
|
||||
cmp local3, 0 ! enc
|
||||
|
||||
#ifdef __sparc_v9__
|
||||
be,pn %icc, .ede3.dec
|
||||
#else
|
||||
be .ede3.dec
|
||||
#endif
|
||||
STPTR in4, [%sp+BIAS+ARG0+4*ARGSZ]
|
||||
|
||||
STPTR in5, [%sp+BIAS+ARG0+5*ARGSZ]
|
||||
@ -3704,18 +3482,6 @@ DES_ede3_cbc_encrypt:
|
||||
|
||||
! first in memory to rightmost in register
|
||||
|
||||
#ifdef __sparc_v9__
|
||||
andcc local4, 3, global0
|
||||
bne,pn %icc, .LLE6
|
||||
nop
|
||||
|
||||
lda [local4] 0x88, in5
|
||||
add local4, 4, local3
|
||||
|
||||
ba,pt %icc, .LLE6a
|
||||
lda [local3] 0x88, out5
|
||||
#endif
|
||||
|
||||
.LLE6:
|
||||
ldub [local4+3], in5
|
||||
|
||||
@ -3751,11 +3517,7 @@ DES_ede3_cbc_encrypt:
|
||||
|
||||
addcc in2, -8, in2 ! bytes missing after next block
|
||||
|
||||
#ifdef __sparc_v9__
|
||||
bl,pn %icc, .ede3.enc.seven.or.less
|
||||
#else
|
||||
bl .ede3.enc.seven.or.less
|
||||
#endif
|
||||
STPTR in3, [%sp+BIAS+ARG0+3*ARGSZ]
|
||||
|
||||
.ede3.enc.next.block:
|
||||
@ -3767,18 +3529,6 @@ DES_ede3_cbc_encrypt:
|
||||
|
||||
! first in memory to rightmost in register
|
||||
|
||||
#ifdef __sparc_v9__
|
||||
andcc in0, 3, global0
|
||||
bne,pn %icc, .LLE7
|
||||
nop
|
||||
|
||||
lda [in0] 0x88, out4
|
||||
add in0, 4, local3
|
||||
|
||||
ba,pt %icc, .LLE7a
|
||||
lda [local3] 0x88, global4
|
||||
#endif
|
||||
|
||||
.LLE7:
|
||||
ldub [in0+3], out4
|
||||
|
||||
@ -3926,11 +3676,7 @@ DES_ede3_cbc_encrypt:
|
||||
call .des_enc ! ks3 in3 compares in2 to 8
|
||||
nop
|
||||
|
||||
#ifdef __sparc_v9__
|
||||
bl,pn %icc, .ede3.enc.next.block_fp
|
||||
#else
|
||||
bl .ede3.enc.next.block_fp
|
||||
#endif
|
||||
add in0, 8, in0
|
||||
|
||||
! If 8 or more bytes are to be encrypted after this block,
|
||||
@ -3944,18 +3690,6 @@ DES_ede3_cbc_encrypt:
|
||||
|
||||
! first in memory to rightmost in register
|
||||
|
||||
#ifdef __sparc_v9__
|
||||
andcc in0, 3, global0
|
||||
bne,pn %icc, .LLE11
|
||||
nop
|
||||
|
||||
lda [in0] 0x88, global3
|
||||
add in0, 4, local5
|
||||
|
||||
ba,pt %icc, .LLE11a
|
||||
lda [local5] 0x88, global4
|
||||
#endif
|
||||
|
||||
.LLE11:
|
||||
ldub [in0+3], global3
|
||||
|
||||
@ -4129,18 +3863,6 @@ DES_ede3_cbc_encrypt:
|
||||
|
||||
! rightmost in register to first in memory
|
||||
|
||||
#ifdef __sparc_v9__
|
||||
andcc in1, 3, global0
|
||||
bne,pn %icc, .SLE9
|
||||
nop
|
||||
|
||||
sta out0, [in1] 0x88
|
||||
add in1, 4, local3
|
||||
|
||||
ba,pt %icc, .SLE9a
|
||||
sta out1, [local3] 0x88
|
||||
#endif
|
||||
|
||||
.SLE9:
|
||||
and out0, 255, local3
|
||||
stub local3, [in1+0]
|
||||
@ -4272,18 +3994,6 @@ DES_ede3_cbc_encrypt:
|
||||
|
||||
! rightmost in register to first in memory
|
||||
|
||||
#ifdef __sparc_v9__
|
||||
andcc in1, 3, global0
|
||||
bne,pn %icc, .SLE5
|
||||
nop
|
||||
|
||||
sta in5, [in1] 0x88
|
||||
add in1, 4, local3
|
||||
|
||||
ba,pt %icc, .SLE5a
|
||||
sta out5, [local3] 0x88
|
||||
#endif
|
||||
|
||||
.SLE5:
|
||||
and in5, 255, local3
|
||||
stub local3, [in1+0]
|
||||
@ -4320,22 +4030,14 @@ DES_ede3_cbc_encrypt:
|
||||
|
||||
addcc in2, -8, in2 ! bytes missing when next block done
|
||||
|
||||
#ifdef __sparc_v9__
|
||||
bpos,pt %icc, .ede3.enc.next.block
|
||||
#else
|
||||
bpos .ede3.enc.next.block
|
||||
#endif
|
||||
add in1, 8, in1
|
||||
|
||||
.ede3.enc.seven.or.less:
|
||||
|
||||
cmp in2, -8
|
||||
|
||||
#ifdef __sparc_v9__
|
||||
ble,pt %icc, .ede3.enc.finish
|
||||
#else
|
||||
ble .ede3.enc.finish
|
||||
#endif
|
||||
nop
|
||||
|
||||
add in2, 8, local1 ! bytes to load
|
||||
@ -4410,18 +4112,6 @@ DES_ede3_cbc_encrypt:
|
||||
|
||||
! rightmost in register to first in memory
|
||||
|
||||
#ifdef __sparc_v9__
|
||||
andcc local4, 3, global0
|
||||
bne,pn %icc, .SLE6
|
||||
nop
|
||||
|
||||
sta in5, [local4] 0x88
|
||||
add local4, 4, local5
|
||||
|
||||
ba,pt %icc, .SLE6a
|
||||
sta out5, [local5] 0x88
|
||||
#endif
|
||||
|
||||
.SLE6:
|
||||
and in5, 255, local5
|
||||
stub local5, [local4+0]
|
||||
@ -4471,11 +4161,7 @@ DES_ede3_cbc_encrypt:
|
||||
STPTR in3, [%sp+BIAS+ARG0+3*ARGSZ]
|
||||
cmp in2, 0
|
||||
|
||||
#ifdef __sparc_v9__
|
||||
ble %icc, .ede3.dec.finish
|
||||
#else
|
||||
ble .ede3.dec.finish
|
||||
#endif
|
||||
STPTR in5, [%sp+BIAS+ARG0+5*ARGSZ]
|
||||
|
||||
LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local7 ! iv
|
||||
@ -4486,18 +4172,6 @@ DES_ede3_cbc_encrypt:
|
||||
|
||||
! first in memory to rightmost in register
|
||||
|
||||
#ifdef __sparc_v9__
|
||||
andcc local7, 3, global0
|
||||
bne,pn %icc, .LLE8
|
||||
nop
|
||||
|
||||
lda [local7] 0x88, in0
|
||||
add local7, 4, local3
|
||||
|
||||
ba,pt %icc, .LLE8a
|
||||
lda [local3] 0x88, in1
|
||||
#endif
|
||||
|
||||
.LLE8:
|
||||
ldub [local7+3], in0
|
||||
|
||||
@ -4540,18 +4214,6 @@ DES_ede3_cbc_encrypt:
|
||||
|
||||
! first in memory to rightmost in register
|
||||
|
||||
#ifdef __sparc_v9__
|
||||
andcc local5, 3, global0
|
||||
bne,pn %icc, .LLE9
|
||||
nop
|
||||
|
||||
lda [local5] 0x88, in5
|
||||
add local5, 4, local3
|
||||
|
||||
ba,pt %icc, .LLE9a
|
||||
lda [local3] 0x88, out5
|
||||
#endif
|
||||
|
||||
.LLE9:
|
||||
ldub [local5+3], in5
|
||||
|
||||
@ -4778,11 +4440,7 @@ DES_ede3_cbc_encrypt:
|
||||
! in2 is compared to 8 in the rounds
|
||||
|
||||
xor out5, in0, out4
|
||||
#ifdef __sparc_v9__
|
||||
bl,pn %icc, .ede3.dec.seven.or.less
|
||||
#else
|
||||
bl .ede3.dec.seven.or.less
|
||||
#endif
|
||||
xor in5, in1, global4
|
||||
|
||||
|
||||
@ -4792,19 +4450,6 @@ DES_ede3_cbc_encrypt:
|
||||
|
||||
! first in memory to rightmost in register
|
||||
|
||||
#ifdef __sparc_v9__
|
||||
andcc local5, 3, global0
|
||||
bne,pn %icc, .LLE10
|
||||
nop
|
||||
|
||||
lda [local5] 0x88, in0
|
||||
add local5, 4, local5
|
||||
|
||||
lda [local5] 0x88, in1
|
||||
ba,pt %icc, .LLE10a
|
||||
add local5, 4, local5
|
||||
#endif
|
||||
|
||||
.LLE10:
|
||||
ldub [local5+3], in0
|
||||
|
||||
@ -4845,18 +4490,6 @@ DES_ede3_cbc_encrypt:
|
||||
|
||||
! rightmost in register to first in memory
|
||||
|
||||
#ifdef __sparc_v9__
|
||||
andcc local7, 3, global0
|
||||
bne,pn %icc, .SLE7
|
||||
nop
|
||||
|
||||
sta out4, [local7] 0x88
|
||||
add local7, 4, local3
|
||||
|
||||
ba,pt %icc, .SLE7a
|
||||
sta global4, [local3] 0x88
|
||||
#endif
|
||||
|
||||
.SLE7:
|
||||
and out4, 255, local3
|
||||
stub local3, [local7+0]
|
||||
@ -4895,11 +4528,7 @@ DES_ede3_cbc_encrypt:
|
||||
addcc in2, -8, in2
|
||||
add local7, 8, local7
|
||||
|
||||
#ifdef __sparc_v9__
|
||||
bg,pt %icc, .ede3.dec.next.block
|
||||
#else
|
||||
bg .ede3.dec.next.block
|
||||
#endif
|
||||
STPTR local7, [%sp+BIAS+ARG0+1*ARGSZ]
|
||||
|
||||
.ede3.dec.store.iv:
|
||||
@ -4912,18 +4541,6 @@ DES_ede3_cbc_encrypt:
|
||||
|
||||
! rightmost in register to first in memory
|
||||
|
||||
#ifdef __sparc_v9__
|
||||
andcc local4, 3, global0
|
||||
bne,pn %icc, .SLE8
|
||||
nop
|
||||
|
||||
sta in0, [local4] 0x88
|
||||
add local4, 4, local5
|
||||
|
||||
ba,pt %icc, .SLE8a
|
||||
sta in1, [local5] 0x88
|
||||
#endif
|
||||
|
||||
.SLE8:
|
||||
and in0, 255, local5
|
||||
stub local5, [local4+0]
|
||||
@ -4972,19 +4589,6 @@ DES_ede3_cbc_encrypt:
|
||||
|
||||
! first in memory to rightmost in register
|
||||
|
||||
#ifdef __sparc_v9__
|
||||
andcc local5, 3, global0
|
||||
bne,pn %icc, .LLE14
|
||||
nop
|
||||
|
||||
lda [local5] 0x88, in0
|
||||
add local5, 4, local5
|
||||
|
||||
lda [local5] 0x88, in1
|
||||
ba,pt %icc, .LLE14a
|
||||
add local5, 4, local5
|
||||
#endif
|
||||
|
||||
.LLE14:
|
||||
ldub [local5+3], in0
|
||||
|
||||
|
@ -1,3 +1,10 @@
|
||||
#include "sparc_arch.h"
|
||||
|
||||
#ifdef __arch64__
|
||||
.register %g2,#scratch
|
||||
.register %g3,#scratch
|
||||
#endif
|
||||
|
||||
.text
|
||||
.align 32
|
||||
.globl des_t4_key_expand
|
||||
@ -47,7 +54,7 @@ des_t4_key_expand:
|
||||
.align 32
|
||||
des_t4_cbc_encrypt:
|
||||
cmp %o2, 0
|
||||
be,pn %icc, .Lcbc_abort
|
||||
be,pn SIZE_T_CC, .Lcbc_abort
|
||||
srln %o2, 0, %o2 ! needed on v8+, "nop" on v9
|
||||
ld [%o4 + 0], %f0 ! load ivec
|
||||
ld [%o4 + 4], %f1
|
||||
@ -148,7 +155,7 @@ des_t4_cbc_encrypt:
|
||||
.align 32
|
||||
des_t4_cbc_decrypt:
|
||||
cmp %o2, 0
|
||||
be,pn %icc, .Lcbc_abort
|
||||
be,pn SIZE_T_CC, .Lcbc_abort
|
||||
srln %o2, 0, %o2 ! needed on v8+, "nop" on v9
|
||||
ld [%o4 + 0], %f2 ! load ivec
|
||||
ld [%o4 + 4], %f3
|
||||
@ -247,7 +254,7 @@ des_t4_cbc_decrypt:
|
||||
.align 32
|
||||
des_t4_ede3_cbc_encrypt:
|
||||
cmp %o2, 0
|
||||
be,pn %icc, .Lcbc_abort
|
||||
be,pn SIZE_T_CC, .Lcbc_abort
|
||||
srln %o2, 0, %o2 ! needed on v8+, "nop" on v9
|
||||
ld [%o4 + 0], %f0 ! load ivec
|
||||
ld [%o4 + 4], %f1
|
||||
@ -399,7 +406,7 @@ des_t4_ede3_cbc_encrypt:
|
||||
.align 32
|
||||
des_t4_ede3_cbc_decrypt:
|
||||
cmp %o2, 0
|
||||
be,pn %icc, .Lcbc_abort
|
||||
be,pn SIZE_T_CC, .Lcbc_abort
|
||||
srln %o2, 0, %o2 ! needed on v8+, "nop" on v9
|
||||
ld [%o4 + 0], %f2 ! load ivec
|
||||
ld [%o4 + 4], %f3
|
||||
|
6
crypto/external/bsd/openssl/lib/libcrypto/arch/sparc/ec.inc
vendored
Normal file
6
crypto/external/bsd/openssl/lib/libcrypto/arch/sparc/ec.inc
vendored
Normal file
@ -0,0 +1,6 @@
|
||||
.PATH.S: ${.PARSEDIR}
|
||||
EC_SRCS += \
|
||||
ecp_nistz256-sparcv9.S
|
||||
|
||||
ECNI = yes
|
||||
.include "../../ec.inc"
|
5873
crypto/external/bsd/openssl/lib/libcrypto/arch/sparc/ecp_nistz256-sparcv9.S
vendored
Normal file
5873
crypto/external/bsd/openssl/lib/libcrypto/arch/sparc/ecp_nistz256-sparcv9.S
vendored
Normal file
File diff suppressed because it is too large
Load Diff
@ -1,3 +1,10 @@
|
||||
#include "sparc_arch.h"
|
||||
|
||||
#ifdef __arch64__
|
||||
.register %g2,#scratch
|
||||
.register %g3,#scratch
|
||||
#endif
|
||||
|
||||
.section ".text",#alloc,#execinstr
|
||||
|
||||
.align 64
|
||||
@ -12,7 +19,7 @@ rem_4bit:
|
||||
.globl gcm_ghash_4bit
|
||||
.align 32
|
||||
gcm_ghash_4bit:
|
||||
save %sp,-112,%sp
|
||||
save %sp,-STACK_FRAME,%sp
|
||||
ldub [%i2+15],%l1
|
||||
ldub [%i0+15],%l2
|
||||
ldub [%i0+14],%l3
|
||||
@ -101,7 +108,7 @@ gcm_ghash_4bit:
|
||||
|
||||
add %i2,16,%i2
|
||||
cmp %i2,%i3
|
||||
be,pn %icc,.Ldone
|
||||
be,pn SIZE_T_CC,.Ldone
|
||||
and %o1,0xf,%l5
|
||||
|
||||
ldx [%l6+%l0],%o3
|
||||
@ -147,7 +154,7 @@ gcm_ghash_4bit:
|
||||
.globl gcm_gmult_4bit
|
||||
.align 32
|
||||
gcm_gmult_4bit:
|
||||
save %sp,-112,%sp
|
||||
save %sp,-STACK_FRAME,%sp
|
||||
ldub [%i0+15],%l1
|
||||
add %i1,8,%l6
|
||||
|
||||
@ -249,7 +256,7 @@ gcm_gmult_4bit:
|
||||
.globl gcm_init_vis3
|
||||
.align 32
|
||||
gcm_init_vis3:
|
||||
save %sp,-112,%sp
|
||||
save %sp,-STACK_FRAME,%sp
|
||||
|
||||
ldx [%i1+0],%o2
|
||||
ldx [%i1+8],%o1
|
||||
@ -282,7 +289,7 @@ gcm_init_vis3:
|
||||
.globl gcm_gmult_vis3
|
||||
.align 32
|
||||
gcm_gmult_vis3:
|
||||
save %sp,-112,%sp
|
||||
save %sp,-STACK_FRAME,%sp
|
||||
|
||||
ldx [%i0+8],%o3 ! load Xi
|
||||
ldx [%i0+0],%o4
|
||||
@ -336,7 +343,7 @@ gcm_gmult_vis3:
|
||||
.globl gcm_ghash_vis3
|
||||
.align 32
|
||||
gcm_ghash_vis3:
|
||||
save %sp,-112,%sp
|
||||
save %sp,-STACK_FRAME,%sp
|
||||
nop
|
||||
srln %i3,0,%i3 ! needed on v8+, "nop" on v9
|
||||
|
||||
|
946
crypto/external/bsd/openssl/lib/libcrypto/arch/sparc/poly1305-sparcv9.S
vendored
Normal file
946
crypto/external/bsd/openssl/lib/libcrypto/arch/sparc/poly1305-sparcv9.S
vendored
Normal file
@ -0,0 +1,946 @@
|
||||
#include "sparc_arch.h"
|
||||
|
||||
#ifdef __arch64__
|
||||
.register %g2,#scratch
|
||||
.register %g3,#scratch
|
||||
# define STPTR stx
|
||||
# define SIZE_T 8
|
||||
#else
|
||||
# define STPTR st
|
||||
# define SIZE_T 4
|
||||
#endif
|
||||
#define LOCALS (STACK_BIAS+STACK_FRAME)
|
||||
|
||||
.section ".text",#alloc,#execinstr
|
||||
|
||||
#ifdef __PIC__
|
||||
SPARC_PIC_THUNK(%g1)
|
||||
#endif
|
||||
|
||||
.globl poly1305_init
|
||||
.align 32
|
||||
poly1305_init:
|
||||
save %sp,-STACK_FRAME-16,%sp
|
||||
nop
|
||||
|
||||
SPARC_LOAD_ADDRESS(OPENSSL_sparcv9cap_P,%g1)
|
||||
ld [%g1],%g1
|
||||
|
||||
and %g1,SPARCV9_FMADD|SPARCV9_VIS3,%g1
|
||||
cmp %g1,SPARCV9_FMADD
|
||||
be .Lpoly1305_init_fma
|
||||
nop
|
||||
|
||||
stx %g0,[%i0+0]
|
||||
stx %g0,[%i0+8] ! zero hash value
|
||||
brz,pn %i1,.Lno_key
|
||||
stx %g0,[%i0+16]
|
||||
|
||||
and %i1,7,%i5 ! alignment factor
|
||||
andn %i1,7,%i1
|
||||
sll %i5,3,%i5 ! *8
|
||||
neg %i5,%i4
|
||||
|
||||
sethi %hi(0x0ffffffc),%o4
|
||||
set 8,%o1
|
||||
or %o4,%lo(0x0ffffffc),%o4
|
||||
set 16,%o2
|
||||
sllx %o4,32,%o5
|
||||
or %o4,%o5,%o5 ! 0x0ffffffc0ffffffc
|
||||
or %o5,3,%o4 ! 0x0ffffffc0fffffff
|
||||
|
||||
ldxa [%i1+%g0]0x88,%o0 ! load little-endian key
|
||||
brz,pt %i5,.Lkey_aligned
|
||||
ldxa [%i1+%o1]0x88,%o1
|
||||
|
||||
ldxa [%i1+%o2]0x88,%o2
|
||||
srlx %o0,%i5,%o0
|
||||
sllx %o1,%i4,%o7
|
||||
srlx %o1,%i5,%o1
|
||||
or %o7,%o0,%o0
|
||||
sllx %o2,%i4,%o2
|
||||
or %o2,%o1,%o1
|
||||
|
||||
.Lkey_aligned:
|
||||
and %o4,%o0,%o0
|
||||
and %o5,%o1,%o1
|
||||
stx %o0,[%i0+32+0] ! store key
|
||||
stx %o1,[%i0+32+8]
|
||||
|
||||
andcc %g1,SPARCV9_VIS3,%g0
|
||||
be .Lno_key
|
||||
nop
|
||||
|
||||
1: call .+8
|
||||
add %o7,poly1305_blocks_vis3-1b,%o7
|
||||
|
||||
add %o7,poly1305_emit-poly1305_blocks_vis3,%o5
|
||||
STPTR %o7,[%i2]
|
||||
STPTR %o5,[%i2+SIZE_T]
|
||||
|
||||
ret
|
||||
restore %g0,1,%o0 ! return 1
|
||||
|
||||
.Lno_key:
|
||||
ret
|
||||
restore %g0,%g0,%o0 ! return 0
|
||||
.type poly1305_init,#function
|
||||
.size poly1305_init,.-poly1305_init
|
||||
|
||||
.globl poly1305_blocks
|
||||
.align 32
|
||||
poly1305_blocks:
|
||||
save %sp,-STACK_FRAME,%sp
|
||||
srln %i2,4,%i2
|
||||
|
||||
brz,pn %i2,.Lno_data
|
||||
nop
|
||||
|
||||
ld [%i0+32+0],%l1 ! load key
|
||||
ld [%i0+32+4],%l0
|
||||
ld [%i0+32+8],%l3
|
||||
ld [%i0+32+12],%l2
|
||||
|
||||
ld [%i0+0],%o1 ! load hash value
|
||||
ld [%i0+4],%o0
|
||||
ld [%i0+8],%o3
|
||||
ld [%i0+12],%o2
|
||||
ld [%i0+16],%l7
|
||||
|
||||
and %i1,7,%i5 ! alignment factor
|
||||
andn %i1,7,%i1
|
||||
set 8,%g2
|
||||
sll %i5,3,%i5 ! *8
|
||||
set 16,%g3
|
||||
neg %i5,%i4
|
||||
|
||||
srl %l1,2,%l4
|
||||
srl %l2,2,%l5
|
||||
add %l1,%l4,%l4
|
||||
srl %l3,2,%l6
|
||||
add %l2,%l5,%l5
|
||||
add %l3,%l6,%l6
|
||||
|
||||
.Loop:
|
||||
ldxa [%i1+%g0]0x88,%g1 ! load little-endian input
|
||||
brz,pt %i5,.Linp_aligned
|
||||
ldxa [%i1+%g2]0x88,%g2
|
||||
|
||||
ldxa [%i1+%g3]0x88,%g3
|
||||
srlx %g1,%i5,%g1
|
||||
sllx %g2,%i4,%o5
|
||||
srlx %g2,%i5,%g2
|
||||
or %o5,%g1,%g1
|
||||
sllx %g3,%i4,%g3
|
||||
or %g3,%g2,%g2
|
||||
|
||||
.Linp_aligned:
|
||||
srlx %g1,32,%o4
|
||||
addcc %g1,%o0,%o0 ! accumulate input
|
||||
srlx %g2,32,%o5
|
||||
addccc %o4,%o1,%o1
|
||||
addccc %g2,%o2,%o2
|
||||
addccc %o5,%o3,%o3
|
||||
addc %i3,%l7,%l7
|
||||
|
||||
umul %l0,%o0,%g1
|
||||
umul %l1,%o0,%g2
|
||||
umul %l2,%o0,%g3
|
||||
umul %l3,%o0,%g4
|
||||
sub %i2,1,%i2
|
||||
add %i1,16,%i1
|
||||
|
||||
umul %l6,%o1,%o4
|
||||
umul %l0,%o1,%o5
|
||||
umul %l1,%o1,%o7
|
||||
add %o4,%g1,%g1
|
||||
add %o5,%g2,%g2
|
||||
umul %l2,%o1,%o4
|
||||
add %o7,%g3,%g3
|
||||
add %o4,%g4,%g4
|
||||
|
||||
umul %l5,%o2,%o5
|
||||
umul %l6,%o2,%o7
|
||||
umul %l0,%o2,%o4
|
||||
add %o5,%g1,%g1
|
||||
add %o7,%g2,%g2
|
||||
umul %l1,%o2,%o5
|
||||
add %o4,%g3,%g3
|
||||
add %o5,%g4,%g4
|
||||
|
||||
umul %l4,%o3,%o7
|
||||
umul %l5,%o3,%o4
|
||||
umul %l6,%o3,%o5
|
||||
add %o7,%g1,%g1
|
||||
add %o4,%g2,%g2
|
||||
umul %l0,%o3,%o7
|
||||
add %o5,%g3,%g3
|
||||
add %o7,%g4,%g4
|
||||
|
||||
umul %l4,%l7,%o4
|
||||
umul %l5,%l7,%o5
|
||||
umul %l6,%l7,%o7
|
||||
umul %l0,%l7,%l7
|
||||
add %o4,%g2,%g2
|
||||
add %o5,%g3,%g3
|
||||
srlx %g1,32,%o1
|
||||
add %o7,%g4,%g4
|
||||
srlx %g2,32,%o2
|
||||
|
||||
addcc %g2,%o1,%o1
|
||||
srlx %g3,32,%o3
|
||||
set 8,%g2
|
||||
addccc %g3,%o2,%o2
|
||||
srlx %g4,32,%o4
|
||||
set 16,%g3
|
||||
addccc %g4,%o3,%o3
|
||||
addc %o4,%l7,%l7
|
||||
|
||||
srl %l7,2,%o4 ! final reduction step
|
||||
andn %l7,3,%o5
|
||||
and %l7,3,%l7
|
||||
add %o5,%o4,%o4
|
||||
|
||||
addcc %o4,%g1,%o0
|
||||
addccc %g0,%o1,%o1
|
||||
addccc %g0,%o2,%o2
|
||||
addccc %g0,%o3,%o3
|
||||
brnz,pt %i2,.Loop
|
||||
addc %g0,%l7,%l7
|
||||
|
||||
st %o1,[%i0+0] ! store hash value
|
||||
st %o0,[%i0+4]
|
||||
st %o3,[%i0+8]
|
||||
st %o2,[%i0+12]
|
||||
st %l7,[%i0+16]
|
||||
|
||||
.Lno_data:
|
||||
ret
|
||||
restore
|
||||
.type poly1305_blocks,#function
|
||||
.size poly1305_blocks,.-poly1305_blocks
|
||||
.align 32
|
||||
poly1305_blocks_vis3:
|
||||
save %sp,-STACK_FRAME,%sp
|
||||
srln %i2,4,%i2
|
||||
|
||||
brz,pn %i2,.Lno_data
|
||||
nop
|
||||
|
||||
ldx [%i0+32+0],%o3 ! load key
|
||||
ldx [%i0+32+8],%o4
|
||||
|
||||
ldx [%i0+0],%o0 ! load hash value
|
||||
ldx [%i0+8],%o1
|
||||
ld [%i0+16],%o2
|
||||
|
||||
and %i1,7,%i5 ! alignment factor
|
||||
andn %i1,7,%i1
|
||||
set 8,%l1
|
||||
sll %i5,3,%i5 ! *8
|
||||
set 16,%l2
|
||||
neg %i5,%i4
|
||||
|
||||
srlx %o4,2,%o5
|
||||
b .Loop_vis3
|
||||
add %o4,%o5,%o5
|
||||
|
||||
.Loop_vis3:
|
||||
ldxa [%i1+%g0]0x88,%g1 ! load little-endian input
|
||||
brz,pt %i5,.Linp_aligned_vis3
|
||||
ldxa [%i1+%l1]0x88,%g2
|
||||
|
||||
ldxa [%i1+%l2]0x88,%g3
|
||||
srlx %g1,%i5,%g1
|
||||
sllx %g2,%i4,%o7
|
||||
srlx %g2,%i5,%g2
|
||||
or %o7,%g1,%g1
|
||||
sllx %g3,%i4,%g3
|
||||
or %g3,%g2,%g2
|
||||
|
||||
.Linp_aligned_vis3:
|
||||
addcc %g1,%o0,%o0 ! accumulate input
|
||||
sub %i2,1,%i2
|
||||
.word 0x93b08269 !addxccc %g2,%o1,%o1
|
||||
add %i1,16,%i1
|
||||
|
||||
mulx %o3,%o0,%g1 ! r0*h0
|
||||
.word 0x95b6c22a !addxc %i3,%o2,%o2
|
||||
.word 0x85b2c2c8 !umulxhi %o3,%o0,%g2
|
||||
mulx %o5,%o1,%g4 ! s1*h1
|
||||
.word 0x9fb342c9 !umulxhi %o5,%o1,%o7
|
||||
addcc %g4,%g1,%g1
|
||||
mulx %o4,%o0,%g4 ! r1*h0
|
||||
.word 0x85b3c222 !addxc %o7,%g2,%g2
|
||||
.word 0x87b302c8 !umulxhi %o4,%o0,%g3
|
||||
addcc %g4,%g2,%g2
|
||||
mulx %o3,%o1,%g4 ! r0*h1
|
||||
.word 0x87b00223 !addxc %g0,%g3,%g3
|
||||
.word 0x9fb2c2c9 !umulxhi %o3,%o1,%o7
|
||||
addcc %g4,%g2,%g2
|
||||
mulx %o5,%o2,%g4 ! s1*h2
|
||||
.word 0x87b3c223 !addxc %o7,%g3,%g3
|
||||
mulx %o3,%o2,%o7 ! r0*h2
|
||||
addcc %g4,%g2,%g2
|
||||
.word 0x87b3c223 !addxc %o7,%g3,%g3
|
||||
|
||||
srlx %g3,2,%g4 ! final reduction step
|
||||
andn %g3,3,%o7
|
||||
and %g3,3,%o2
|
||||
add %o7,%g4,%g4
|
||||
|
||||
addcc %g4,%g1,%o0
|
||||
.word 0x93b00262 !addxccc %g0,%g2,%o1
|
||||
brnz,pt %i2,.Loop_vis3
|
||||
.word 0x95b0022a !addxc %g0,%o2,%o2
|
||||
|
||||
stx %o0,[%i0+0] ! store hash value
|
||||
stx %o1,[%i0+8]
|
||||
st %o2,[%i0+16]
|
||||
|
||||
ret
|
||||
restore
|
||||
.type poly1305_blocks_vis3,#function
|
||||
.size poly1305_blocks_vis3,.-poly1305_blocks_vis3
|
||||
.globl poly1305_emit
|
||||
.align 32
|
||||
poly1305_emit:
|
||||
save %sp,-STACK_FRAME,%sp
|
||||
|
||||
ld [%i0+0],%o1 ! load hash value
|
||||
ld [%i0+4],%o0
|
||||
ld [%i0+8],%o3
|
||||
ld [%i0+12],%o2
|
||||
ld [%i0+16],%l7
|
||||
|
||||
addcc %o0,5,%l0 ! compare to modulus
|
||||
addccc %o1,0,%l1
|
||||
addccc %o2,0,%l2
|
||||
addccc %o3,0,%l3
|
||||
addc %l7,0,%l7
|
||||
andcc %l7,4,%g0 ! did it carry/borrow?
|
||||
|
||||
movnz %icc,%l0,%o0
|
||||
ld [%i2+0],%l0 ! load nonce
|
||||
movnz %icc,%l1,%o1
|
||||
ld [%i2+4],%l1
|
||||
movnz %icc,%l2,%o2
|
||||
ld [%i2+8],%l2
|
||||
movnz %icc,%l3,%o3
|
||||
ld [%i2+12],%l3
|
||||
|
||||
addcc %l0,%o0,%o0 ! accumulate nonce
|
||||
addccc %l1,%o1,%o1
|
||||
addccc %l2,%o2,%o2
|
||||
addc %l3,%o3,%o3
|
||||
|
||||
srl %o0,8,%l0
|
||||
stb %o0,[%i1+0] ! store little-endian result
|
||||
srl %o0,16,%l1
|
||||
stb %l0,[%i1+1]
|
||||
srl %o0,24,%l2
|
||||
stb %l1,[%i1+2]
|
||||
stb %l2,[%i1+3]
|
||||
|
||||
srl %o1,8,%l0
|
||||
stb %o1,[%i1+4]
|
||||
srl %o1,16,%l1
|
||||
stb %l0,[%i1+5]
|
||||
srl %o1,24,%l2
|
||||
stb %l1,[%i1+6]
|
||||
stb %l2,[%i1+7]
|
||||
|
||||
srl %o2,8,%l0
|
||||
stb %o2,[%i1+8]
|
||||
srl %o2,16,%l1
|
||||
stb %l0,[%i1+9]
|
||||
srl %o2,24,%l2
|
||||
stb %l1,[%i1+10]
|
||||
stb %l2,[%i1+11]
|
||||
|
||||
srl %o3,8,%l0
|
||||
stb %o3,[%i1+12]
|
||||
srl %o3,16,%l1
|
||||
stb %l0,[%i1+13]
|
||||
srl %o3,24,%l2
|
||||
stb %l1,[%i1+14]
|
||||
stb %l2,[%i1+15]
|
||||
|
||||
ret
|
||||
restore
|
||||
.type poly1305_emit,#function
|
||||
.size poly1305_emit,.-poly1305_emit
|
||||
.align 32
|
||||
poly1305_init_fma:
|
||||
save %sp,-STACK_FRAME-16,%sp
|
||||
nop
|
||||
|
||||
.Lpoly1305_init_fma:
|
||||
1: call .+8
|
||||
add %o7,.Lconsts_fma-1b,%o7
|
||||
|
||||
ldd [%o7+8*0],%f16 ! load constants
|
||||
ldd [%o7+8*1],%f18
|
||||
ldd [%o7+8*2],%f20
|
||||
ldd [%o7+8*3],%f22
|
||||
ldd [%o7+8*5],%f26
|
||||
|
||||
std %f16,[%i0+8*0] ! initial hash value, biased 0
|
||||
std %f18,[%i0+8*1]
|
||||
std %f20,[%i0+8*2]
|
||||
std %f22,[%i0+8*3]
|
||||
|
||||
brz,pn %i1,.Lno_key_fma
|
||||
nop
|
||||
|
||||
stx %fsr,[%sp+LOCALS] ! save original %fsr
|
||||
ldx [%o7+8*6],%fsr ! load new %fsr
|
||||
|
||||
std %f16,[%i0+8*4] ! key "template"
|
||||
std %f18,[%i0+8*5]
|
||||
std %f20,[%i0+8*6]
|
||||
std %f22,[%i0+8*7]
|
||||
|
||||
and %i1,7,%l2
|
||||
andn %i1,7,%i1 ! align pointer
|
||||
mov 8,%l0
|
||||
sll %l2,3,%l2
|
||||
mov 16,%l1
|
||||
neg %l2,%l3
|
||||
|
||||
ldxa [%i1+%g0]0x88,%o0 ! load little-endian key
|
||||
ldxa [%i1+%l0]0x88,%o2
|
||||
|
||||
brz %l2,.Lkey_aligned_fma
|
||||
sethi %hi(0xf0000000),%l0 ! 0xf0000000
|
||||
|
||||
ldxa [%i1+%l1]0x88,%o4
|
||||
|
||||
srlx %o0,%l2,%o0 ! align data
|
||||
sllx %o2,%l3,%o1
|
||||
srlx %o2,%l2,%o2
|
||||
or %o1,%o0,%o0
|
||||
sllx %o4,%l3,%o3
|
||||
or %o3,%o2,%o2
|
||||
|
||||
.Lkey_aligned_fma:
|
||||
or %l0,3,%l1 ! 0xf0000003
|
||||
srlx %o0,32,%o1
|
||||
andn %o0,%l0,%o0 ! &=0x0fffffff
|
||||
andn %o1,%l1,%o1 ! &=0x0ffffffc
|
||||
srlx %o2,32,%o3
|
||||
andn %o2,%l1,%o2
|
||||
andn %o3,%l1,%o3
|
||||
|
||||
st %o0,[%i0+36] ! fill "template"
|
||||
st %o1,[%i0+44]
|
||||
st %o2,[%i0+52]
|
||||
st %o3,[%i0+60]
|
||||
|
||||
ldd [%i0+8*4],%f0 ! load [biased] key
|
||||
ldd [%i0+8*5],%f4
|
||||
ldd [%i0+8*6],%f8
|
||||
ldd [%i0+8*7],%f12
|
||||
|
||||
fsubd %f0,%f16, %f0 ! r0
|
||||
ldd [%o7+8*7],%f16 ! more constants
|
||||
fsubd %f4,%f18,%f4 ! r1
|
||||
ldd [%o7+8*8],%f18
|
||||
fsubd %f8,%f20,%f8 ! r2
|
||||
ldd [%o7+8*9],%f20
|
||||
fsubd %f12,%f22,%f12 ! r3
|
||||
ldd [%o7+8*10],%f22
|
||||
|
||||
fmuld %f26,%f4,%f52 ! s1
|
||||
fmuld %f26,%f8,%f40 ! s2
|
||||
fmuld %f26,%f12,%f44 ! s3
|
||||
|
||||
faddd %f0,%f16, %f2
|
||||
faddd %f4,%f18,%f6
|
||||
faddd %f8,%f20,%f10
|
||||
faddd %f12,%f22,%f14
|
||||
|
||||
fsubd %f2,%f16, %f2
|
||||
ldd [%o7+8*11],%f16 ! more constants
|
||||
fsubd %f6,%f18,%f6
|
||||
ldd [%o7+8*12],%f18
|
||||
fsubd %f10,%f20,%f10
|
||||
ldd [%o7+8*13],%f20
|
||||
fsubd %f14,%f22,%f14
|
||||
|
||||
fsubd %f0,%f2,%f0
|
||||
std %f2,[%i0+8*5] ! r0hi
|
||||
fsubd %f4,%f6,%f4
|
||||
std %f6,[%i0+8*7] ! r1hi
|
||||
fsubd %f8,%f10,%f8
|
||||
std %f10,[%i0+8*9] ! r2hi
|
||||
fsubd %f12,%f14,%f12
|
||||
std %f14,[%i0+8*11] ! r3hi
|
||||
|
||||
faddd %f52,%f16, %f54
|
||||
faddd %f40,%f18,%f42
|
||||
faddd %f44,%f20,%f46
|
||||
|
||||
fsubd %f54,%f16, %f54
|
||||
fsubd %f42,%f18,%f42
|
||||
fsubd %f46,%f20,%f46
|
||||
|
||||
fsubd %f52,%f54,%f52
|
||||
fsubd %f40,%f42,%f40
|
||||
fsubd %f44,%f46,%f44
|
||||
|
||||
ldx [%sp+LOCALS],%fsr ! restore %fsr
|
||||
|
||||
std %f0,[%i0+8*4] ! r0lo
|
||||
std %f4,[%i0+8*6] ! r1lo
|
||||
std %f8,[%i0+8*8] ! r2lo
|
||||
std %f12,[%i0+8*10] ! r3lo
|
||||
|
||||
std %f54,[%i0+8*13]
|
||||
std %f42,[%i0+8*15]
|
||||
std %f46,[%i0+8*17]
|
||||
|
||||
std %f52,[%i0+8*12]
|
||||
std %f40,[%i0+8*14]
|
||||
std %f44,[%i0+8*16]
|
||||
|
||||
add %o7,poly1305_blocks_fma-.Lconsts_fma,%o0
|
||||
add %o7,poly1305_emit_fma-.Lconsts_fma,%o1
|
||||
STPTR %o0,[%i2]
|
||||
STPTR %o1,[%i2+SIZE_T]
|
||||
|
||||
ret
|
||||
restore %g0,1,%o0 ! return 1
|
||||
|
||||
.Lno_key_fma:
|
||||
ret
|
||||
restore %g0,%g0,%o0 ! return 0
|
||||
.type poly1305_init_fma,#function
|
||||
.size poly1305_init_fma,.-poly1305_init_fma
|
||||
|
||||
.align 32
|
||||
poly1305_blocks_fma:
|
||||
save %sp,-STACK_FRAME-48,%sp
|
||||
srln %i2,4,%i2
|
||||
|
||||
brz,pn %i2,.Labort
|
||||
sub %i2,1,%i2
|
||||
|
||||
1: call .+8
|
||||
add %o7,.Lconsts_fma-1b,%o7
|
||||
|
||||
ldd [%o7+8*0],%f16 ! load constants
|
||||
ldd [%o7+8*1],%f18
|
||||
ldd [%o7+8*2],%f20
|
||||
ldd [%o7+8*3],%f22
|
||||
ldd [%o7+8*4],%f24
|
||||
ldd [%o7+8*5],%f26
|
||||
|
||||
ldd [%i0+8*0],%f0 ! load [biased] hash value
|
||||
ldd [%i0+8*1],%f4
|
||||
ldd [%i0+8*2],%f8
|
||||
ldd [%i0+8*3],%f12
|
||||
|
||||
std %f16,[%sp+LOCALS+8*0] ! input "template"
|
||||
sethi %hi((1023+52+96)<<20),%o3
|
||||
std %f18,[%sp+LOCALS+8*1]
|
||||
or %i3,%o3,%o3
|
||||
std %f20,[%sp+LOCALS+8*2]
|
||||
st %o3,[%sp+LOCALS+8*3]
|
||||
|
||||
and %i1,7,%l2
|
||||
andn %i1,7,%i1 ! align pointer
|
||||
mov 8,%l0
|
||||
sll %l2,3,%l2
|
||||
mov 16,%l1
|
||||
neg %l2,%l3
|
||||
|
||||
ldxa [%i1+%g0]0x88,%o0 ! load little-endian input
|
||||
brz %l2,.Linp_aligned_fma
|
||||
ldxa [%i1+%l0]0x88,%o2
|
||||
|
||||
ldxa [%i1+%l1]0x88,%o4
|
||||
add %i1,8,%i1
|
||||
|
||||
srlx %o0,%l2,%o0 ! align data
|
||||
sllx %o2,%l3,%o1
|
||||
srlx %o2,%l2,%o2
|
||||
or %o1,%o0,%o0
|
||||
sllx %o4,%l3,%o3
|
||||
srlx %o4,%l2,%o4 ! pre-shift
|
||||
or %o3,%o2,%o2
|
||||
|
||||
.Linp_aligned_fma:
|
||||
srlx %o0,32,%o1
|
||||
movrz %i2,0,%l1
|
||||
srlx %o2,32,%o3
|
||||
add %l1,%i1,%i1 ! conditional advance
|
||||
|
||||
st %o0,[%sp+LOCALS+8*0+4] ! fill "template"
|
||||
st %o1,[%sp+LOCALS+8*1+4]
|
||||
st %o2,[%sp+LOCALS+8*2+4]
|
||||
st %o3,[%sp+LOCALS+8*3+4]
|
||||
|
||||
ldd [%i0+8*4],%f28 ! load key
|
||||
ldd [%i0+8*5],%f30
|
||||
ldd [%i0+8*6],%f32
|
||||
ldd [%i0+8*7],%f34
|
||||
ldd [%i0+8*8],%f36
|
||||
ldd [%i0+8*9],%f38
|
||||
ldd [%i0+8*10],%f48
|
||||
ldd [%i0+8*11],%f50
|
||||
ldd [%i0+8*12],%f52
|
||||
ldd [%i0+8*13],%f54
|
||||
ldd [%i0+8*14],%f40
|
||||
ldd [%i0+8*15],%f42
|
||||
ldd [%i0+8*16],%f44
|
||||
ldd [%i0+8*17],%f46
|
||||
|
||||
stx %fsr,[%sp+LOCALS+8*4] ! save original %fsr
|
||||
ldx [%o7+8*6],%fsr ! load new %fsr
|
||||
|
||||
subcc %i2,1,%i2
|
||||
movrz %i2,0,%l1
|
||||
|
||||
ldd [%sp+LOCALS+8*0],%f56 ! load biased input
|
||||
ldd [%sp+LOCALS+8*1],%f58
|
||||
ldd [%sp+LOCALS+8*2],%f60
|
||||
ldd [%sp+LOCALS+8*3],%f62
|
||||
|
||||
fsubd %f0,%f16, %f0 ! de-bias hash value
|
||||
fsubd %f4,%f18,%f4
|
||||
ldxa [%i1+%g0]0x88,%o0 ! modulo-scheduled input load
|
||||
fsubd %f8,%f20,%f8
|
||||
fsubd %f12,%f22,%f12
|
||||
ldxa [%i1+%l0]0x88,%o2
|
||||
|
||||
fsubd %f56,%f16, %f56 ! de-bias input
|
||||
fsubd %f58,%f18,%f58
|
||||
fsubd %f60,%f20,%f60
|
||||
fsubd %f62,%f22,%f62
|
||||
|
||||
brz %l2,.Linp_aligned_fma2
|
||||
add %l1,%i1,%i1 ! conditional advance
|
||||
|
||||
sllx %o0,%l3,%o1 ! align data
|
||||
srlx %o0,%l2,%o3
|
||||
or %o1,%o4,%o0
|
||||
sllx %o2,%l3,%o1
|
||||
srlx %o2,%l2,%o4 ! pre-shift
|
||||
or %o3,%o1,%o2
|
||||
.Linp_aligned_fma2:
|
||||
srlx %o0,32,%o1
|
||||
srlx %o2,32,%o3
|
||||
|
||||
faddd %f0,%f56,%f56 ! accumulate input
|
||||
stw %o0,[%sp+LOCALS+8*0+4]
|
||||
faddd %f4,%f58,%f58
|
||||
stw %o1,[%sp+LOCALS+8*1+4]
|
||||
faddd %f8,%f60,%f60
|
||||
stw %o2,[%sp+LOCALS+8*2+4]
|
||||
faddd %f12,%f62,%f62
|
||||
stw %o3,[%sp+LOCALS+8*3+4]
|
||||
|
||||
b .Lentry_fma
|
||||
nop
|
||||
|
||||
.align 16
|
||||
.Loop_fma:
|
||||
ldxa [%i1+%g0]0x88,%o0 ! modulo-scheduled input load
|
||||
ldxa [%i1+%l0]0x88,%o2
|
||||
movrz %i2,0,%l1
|
||||
|
||||
faddd %f52,%f0,%f0 ! accumulate input
|
||||
faddd %f54,%f2,%f2
|
||||
faddd %f62,%f8,%f8
|
||||
faddd %f60,%f10,%f10
|
||||
|
||||
brz,pn %l2,.Linp_aligned_fma3
|
||||
add %l1,%i1,%i1 ! conditional advance
|
||||
|
||||
sllx %o0,%l3,%o1 ! align data
|
||||
srlx %o0,%l2,%o3
|
||||
or %o1,%o4,%o0
|
||||
sllx %o2,%l3,%o1
|
||||
srlx %o2,%l2,%o4 ! pre-shift
|
||||
or %o3,%o1,%o2
|
||||
|
||||
.Linp_aligned_fma3:
|
||||
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! base 2^48 -> base 2^32
|
||||
faddd %f20,%f4,%f52
|
||||
srlx %o0,32,%o1
|
||||
faddd %f20,%f6,%f54
|
||||
srlx %o2,32,%o3
|
||||
faddd %f24,%f12,%f60
|
||||
st %o0,[%sp+LOCALS+8*0+4] ! fill "template"
|
||||
faddd %f24,%f14,%f62
|
||||
st %o1,[%sp+LOCALS+8*1+4]
|
||||
faddd %f18,%f0,%f48
|
||||
st %o2,[%sp+LOCALS+8*2+4]
|
||||
faddd %f18,%f2,%f50
|
||||
st %o3,[%sp+LOCALS+8*3+4]
|
||||
faddd %f22,%f8,%f56
|
||||
faddd %f22,%f10,%f58
|
||||
|
||||
fsubd %f52,%f20,%f52
|
||||
fsubd %f54,%f20,%f54
|
||||
fsubd %f60,%f24,%f60
|
||||
fsubd %f62,%f24,%f62
|
||||
fsubd %f48,%f18,%f48
|
||||
fsubd %f50,%f18,%f50
|
||||
fsubd %f56,%f22,%f56
|
||||
fsubd %f58,%f22,%f58
|
||||
|
||||
fsubd %f4,%f52,%f4
|
||||
fsubd %f6,%f54,%f6
|
||||
fsubd %f12,%f60,%f12
|
||||
fsubd %f14,%f62,%f14
|
||||
fsubd %f8,%f56,%f8
|
||||
fsubd %f10,%f58,%f10
|
||||
fsubd %f0,%f48,%f0
|
||||
fsubd %f2,%f50,%f2
|
||||
|
||||
faddd %f4,%f48,%f4
|
||||
faddd %f6,%f50,%f6
|
||||
faddd %f12,%f56,%f12
|
||||
faddd %f14,%f58,%f14
|
||||
faddd %f8,%f52,%f8
|
||||
faddd %f10,%f54,%f10
|
||||
.word 0x81be805d !fmaddd %f26,%f60,%f0,%f0
|
||||
.word 0x85be845f !fmaddd %f26,%f62,%f2,%f2
|
||||
|
||||
faddd %f4,%f6,%f58
|
||||
ldd [%i0+8*12],%f52 ! reload constants
|
||||
faddd %f12,%f14,%f62
|
||||
ldd [%i0+8*13],%f54
|
||||
faddd %f8,%f10,%f60
|
||||
ldd [%i0+8*10],%f48
|
||||
faddd %f0,%f2,%f56
|
||||
ldd [%i0+8*11],%f50
|
||||
|
||||
.Lentry_fma:
|
||||
fmuld %f58,%f44,%f0
|
||||
fmuld %f58,%f46,%f2
|
||||
fmuld %f58,%f32,%f8
|
||||
fmuld %f58,%f34,%f10
|
||||
fmuld %f58,%f28,%f4
|
||||
fmuld %f58,%f30,%f6
|
||||
fmuld %f58,%f36,%f12
|
||||
fmuld %f58,%f38,%f14
|
||||
|
||||
.word 0x81bfc055 !fmaddd %f62,%f52,%f0,%f0
|
||||
.word 0x85bfc457 !fmaddd %f62,%f54,%f2,%f2
|
||||
.word 0x91bfd04d !fmaddd %f62,%f44,%f8,%f8
|
||||
.word 0x95bfd44f !fmaddd %f62,%f46,%f10,%f10
|
||||
.word 0x89bfc849 !fmaddd %f62,%f40,%f4,%f4
|
||||
.word 0x8dbfcc4b !fmaddd %f62,%f42,%f6,%f6
|
||||
.word 0x99bfd85c !fmaddd %f62,%f28,%f12,%f12
|
||||
.word 0x9dbfdc5e !fmaddd %f62,%f30,%f14,%f14
|
||||
|
||||
.word 0x81bf4049 !fmaddd %f60,%f40,%f0,%f0
|
||||
.word 0x85bf444b !fmaddd %f60,%f42,%f2,%f2
|
||||
.word 0x91bf505c !fmaddd %f60,%f28,%f8,%f8
|
||||
.word 0x95bf545e !fmaddd %f60,%f30,%f10,%f10
|
||||
.word 0x89bf484d !fmaddd %f60,%f44,%f4,%f4
|
||||
ldd [%sp+LOCALS+8*0],%f52 ! load [biased] input
|
||||
.word 0x8dbf4c4f !fmaddd %f60,%f46,%f6,%f6
|
||||
ldd [%sp+LOCALS+8*1],%f54
|
||||
.word 0x99bf5841 !fmaddd %f60,%f32,%f12,%f12
|
||||
ldd [%sp+LOCALS+8*2],%f62
|
||||
.word 0x9dbf5c43 !fmaddd %f60,%f34,%f14,%f14
|
||||
ldd [%sp+LOCALS+8*3],%f60
|
||||
|
||||
.word 0x81be405c !fmaddd %f56,%f28,%f0,%f0
|
||||
fsubd %f52,%f16, %f52 ! de-bias input
|
||||
.word 0x85be445e !fmaddd %f56,%f30,%f2,%f2
|
||||
fsubd %f54,%f18,%f54
|
||||
.word 0x91be5045 !fmaddd %f56,%f36,%f8,%f8
|
||||
fsubd %f62,%f20,%f62
|
||||
.word 0x95be5447 !fmaddd %f56,%f38,%f10,%f10
|
||||
fsubd %f60,%f22,%f60
|
||||
.word 0x89be4841 !fmaddd %f56,%f32,%f4,%f4
|
||||
.word 0x8dbe4c43 !fmaddd %f56,%f34,%f6,%f6
|
||||
.word 0x99be5851 !fmaddd %f56,%f48,%f12,%f12
|
||||
.word 0x9dbe5c53 !fmaddd %f56,%f50,%f14,%f14
|
||||
|
||||
bcc SIZE_T_CC,.Loop_fma
|
||||
subcc %i2,1,%i2
|
||||
|
||||
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! base 2^48 -> base 2^32
|
||||
faddd %f0,%f18,%f48
|
||||
faddd %f2,%f18,%f50
|
||||
faddd %f8,%f22,%f56
|
||||
faddd %f10,%f22,%f58
|
||||
faddd %f4,%f20,%f52
|
||||
faddd %f6,%f20,%f54
|
||||
faddd %f12,%f24,%f60
|
||||
faddd %f14,%f24,%f62
|
||||
|
||||
fsubd %f48,%f18,%f48
|
||||
fsubd %f50,%f18,%f50
|
||||
fsubd %f56,%f22,%f56
|
||||
fsubd %f58,%f22,%f58
|
||||
fsubd %f52,%f20,%f52
|
||||
fsubd %f54,%f20,%f54
|
||||
fsubd %f60,%f24,%f60
|
||||
fsubd %f62,%f24,%f62
|
||||
|
||||
fsubd %f4,%f52,%f4
|
||||
fsubd %f6,%f54,%f6
|
||||
fsubd %f12,%f60,%f12
|
||||
fsubd %f14,%f62,%f14
|
||||
fsubd %f8,%f56,%f8
|
||||
fsubd %f10,%f58,%f10
|
||||
fsubd %f0,%f48,%f0
|
||||
fsubd %f2,%f50,%f2
|
||||
|
||||
faddd %f4,%f48,%f4
|
||||
faddd %f6,%f50,%f6
|
||||
faddd %f12,%f56,%f12
|
||||
faddd %f14,%f58,%f14
|
||||
faddd %f8,%f52,%f8
|
||||
faddd %f10,%f54,%f10
|
||||
.word 0x81be805d !fmaddd %f26,%f60,%f0,%f0
|
||||
.word 0x85be845f !fmaddd %f26,%f62,%f2,%f2
|
||||
|
||||
faddd %f4,%f6,%f58
|
||||
faddd %f12,%f14,%f62
|
||||
faddd %f8,%f10,%f60
|
||||
faddd %f0,%f2,%f56
|
||||
|
||||
faddd %f58,%f18,%f58 ! bias
|
||||
faddd %f62,%f22,%f62
|
||||
faddd %f60,%f20,%f60
|
||||
faddd %f56,%f16, %f56
|
||||
|
||||
ldx [%sp+LOCALS+8*4],%fsr ! restore saved %fsr
|
||||
|
||||
std %f58,[%i0+8*1] ! store [biased] hash value
|
||||
std %f62,[%i0+8*3]
|
||||
std %f60,[%i0+8*2]
|
||||
std %f56,[%i0+8*0]
|
||||
|
||||
.Labort:
|
||||
ret
|
||||
restore
|
||||
.type poly1305_blocks_fma,#function
|
||||
.size poly1305_blocks_fma,.-poly1305_blocks_fma
|
||||
.align 32
|
||||
poly1305_emit_fma:
|
||||
save %sp,-STACK_FRAME,%sp
|
||||
|
||||
ld [%i0+8*0+0],%l5 ! load hash
|
||||
ld [%i0+8*0+4],%l0
|
||||
ld [%i0+8*1+0],%o0
|
||||
ld [%i0+8*1+4],%l1
|
||||
ld [%i0+8*2+0],%o1
|
||||
ld [%i0+8*2+4],%l2
|
||||
ld [%i0+8*3+0],%o2
|
||||
ld [%i0+8*3+4],%l3
|
||||
|
||||
sethi %hi(0xfff00000),%o3
|
||||
andn %l5,%o3,%l5 ! mask exponent
|
||||
andn %o0,%o3,%o0
|
||||
andn %o1,%o3,%o1
|
||||
andn %o2,%o3,%o2 ! can be partially reduced...
|
||||
mov 3,%o3
|
||||
|
||||
srl %o2,2,%i3 ! ... so reduce
|
||||
and %o2,%o3,%l4
|
||||
andn %o2,%o3,%o2
|
||||
add %i3,%o2,%o2
|
||||
|
||||
addcc %o2,%l0,%l0
|
||||
addccc %l5,%l1,%l1
|
||||
addccc %o0,%l2,%l2
|
||||
addccc %o1,%l3,%l3
|
||||
addc %g0,%l4,%l4
|
||||
|
||||
addcc %l0,5,%l5 ! compare to modulus
|
||||
addccc %l1,0,%o0
|
||||
addccc %l2,0,%o1
|
||||
addccc %l3,0,%o2
|
||||
addc %l4,0,%o3
|
||||
|
||||
srl %o3,2,%o3 ! did it carry/borrow?
|
||||
neg %o3,%o3
|
||||
sra %o3,31,%o3 ! mask
|
||||
|
||||
andn %l0,%o3,%l0
|
||||
and %l5,%o3,%l5
|
||||
andn %l1,%o3,%l1
|
||||
and %o0,%o3,%o0
|
||||
or %l5,%l0,%l0
|
||||
ld [%i2+0],%l5 ! load nonce
|
||||
andn %l2,%o3,%l2
|
||||
and %o1,%o3,%o1
|
||||
or %o0,%l1,%l1
|
||||
ld [%i2+4],%o0
|
||||
andn %l3,%o3,%l3
|
||||
and %o2,%o3,%o2
|
||||
or %o1,%l2,%l2
|
||||
ld [%i2+8],%o1
|
||||
or %o2,%l3,%l3
|
||||
ld [%i2+12],%o2
|
||||
|
||||
addcc %l5,%l0,%l0 ! accumulate nonce
|
||||
addccc %o0,%l1,%l1
|
||||
addccc %o1,%l2,%l2
|
||||
addc %o2,%l3,%l3
|
||||
|
||||
stb %l0,[%i1+0] ! write little-endian result
|
||||
srl %l0,8,%l0
|
||||
stb %l1,[%i1+4]
|
||||
srl %l1,8,%l1
|
||||
stb %l2,[%i1+8]
|
||||
srl %l2,8,%l2
|
||||
stb %l3,[%i1+12]
|
||||
srl %l3,8,%l3
|
||||
|
||||
stb %l0,[%i1+1]
|
||||
srl %l0,8,%l0
|
||||
stb %l1,[%i1+5]
|
||||
srl %l1,8,%l1
|
||||
stb %l2,[%i1+9]
|
||||
srl %l2,8,%l2
|
||||
stb %l3,[%i1+13]
|
||||
srl %l3,8,%l3
|
||||
|
||||
stb %l0,[%i1+2]
|
||||
srl %l0,8,%l0
|
||||
stb %l1,[%i1+6]
|
||||
srl %l1,8,%l1
|
||||
stb %l2,[%i1+10]
|
||||
srl %l2,8,%l2
|
||||
stb %l3,[%i1+14]
|
||||
srl %l3,8,%l3
|
||||
|
||||
stb %l0,[%i1+3]
|
||||
stb %l1,[%i1+7]
|
||||
stb %l2,[%i1+11]
|
||||
stb %l3,[%i1+15]
|
||||
|
||||
ret
|
||||
restore
|
||||
.type poly1305_emit_fma,#function
|
||||
.size poly1305_emit_fma,.-poly1305_emit_fma
|
||||
.align 64
|
||||
.Lconsts_fma:
|
||||
.word 0x43300000,0x00000000 ! 2^(52+0)
|
||||
.word 0x45300000,0x00000000 ! 2^(52+32)
|
||||
.word 0x47300000,0x00000000 ! 2^(52+64)
|
||||
.word 0x49300000,0x00000000 ! 2^(52+96)
|
||||
.word 0x4b500000,0x00000000 ! 2^(52+130)
|
||||
|
||||
.word 0x37f40000,0x00000000 ! 5/2^130
|
||||
.word 0,1<<30 ! fsr: truncate, no exceptions
|
||||
|
||||
.word 0x44300000,0x00000000 ! 2^(52+16+0)
|
||||
.word 0x46300000,0x00000000 ! 2^(52+16+32)
|
||||
.word 0x48300000,0x00000000 ! 2^(52+16+64)
|
||||
.word 0x4a300000,0x00000000 ! 2^(52+16+96)
|
||||
.word 0x3e300000,0x00000000 ! 2^(52+16+0-96)
|
||||
.word 0x40300000,0x00000000 ! 2^(52+16+32-96)
|
||||
.word 0x42300000,0x00000000 ! 2^(52+16+64-96)
|
||||
.asciz "Poly1305 for SPARCv9/VIS3/FMA, CRYPTOGAMS by <appro@openssl.org>"
|
||||
.align 4
|
6
crypto/external/bsd/openssl/lib/libcrypto/arch/sparc/poly1305.inc
vendored
Normal file
6
crypto/external/bsd/openssl/lib/libcrypto/arch/sparc/poly1305.inc
vendored
Normal file
@ -0,0 +1,6 @@
|
||||
.PATH.S: ${.PARSEDIR}
|
||||
|
||||
POLY1305_SRCS = poly1305-sparcv9.S
|
||||
POLY1305_CPPFLAGS+=-DPOLY1305_ASM
|
||||
|
||||
.include "../../poly1305.inc"
|
@ -1,3 +1,5 @@
|
||||
#include "sparc_arch.h"
|
||||
|
||||
.section ".text",#alloc,#execinstr
|
||||
|
||||
.global bn_mul_mont_int
|
||||
@ -10,7 +12,7 @@ bn_mul_mont_int:
|
||||
clr %o0
|
||||
.align 32
|
||||
.Lenter:
|
||||
save %sp,-128,%sp
|
||||
save %sp,-STACK_FRAME,%sp
|
||||
sll %i5,2,%i5 ! num*=4
|
||||
or %g1,%lo(0xffffffff),%g1
|
||||
ld [%i4],%i4
|
||||
@ -19,21 +21,21 @@ bn_mul_mont_int:
|
||||
ld [%i2],%l2 ! bp[0]
|
||||
nop
|
||||
|
||||
add %sp,0,%o7 ! real top of stack
|
||||
add %sp,STACK_BIAS,%o7 ! real top of stack
|
||||
ld [%i1],%o0 ! ap[0] ! redundant in squaring context
|
||||
sub %o7,%i5,%o7
|
||||
ld [%i1+4],%l5 ! ap[1]
|
||||
and %o7,-1024,%o7
|
||||
ld [%i3],%o1 ! np[0]
|
||||
sub %o7,0,%sp ! alloca
|
||||
sub %o7,STACK_BIAS,%sp ! alloca
|
||||
ld [%i3+4],%l6 ! np[1]
|
||||
be,pt %icc,.Lbn_sqr_mont
|
||||
be,pt SIZE_T_CC,.Lbn_sqr_mont
|
||||
mov 12,%l1
|
||||
|
||||
mulx %o0,%l2,%o0 ! ap[0]*bp[0]
|
||||
mulx %l5,%l2,%g4 !prologue! ap[1]*bp[0]
|
||||
and %o0,%g1,%o3
|
||||
add %sp,0+128,%l4
|
||||
add %sp,STACK_BIAS+STACK_FRAME,%l4
|
||||
ld [%i1+8],%l5 !prologue!
|
||||
|
||||
mulx %i4,%o3,%l3 ! "t[0]"*n0
|
||||
@ -92,7 +94,7 @@ bn_mul_mont_int:
|
||||
mov 4,%l0 ! i++
|
||||
ld [%i2+4],%l2 ! bp[1]
|
||||
.Louter:
|
||||
add %sp,0+128,%l4
|
||||
add %sp,STACK_BIAS+STACK_FRAME,%l4
|
||||
ld [%i1],%o0 ! ap[0]
|
||||
ld [%i1+4],%l5 ! ap[1]
|
||||
ld [%i3],%o1 ! np[0]
|
||||
@ -211,7 +213,7 @@ bn_mul_mont_int:
|
||||
mulx %l2,%l2,%o0 ! ap[0]*ap[0]
|
||||
mulx %l5,%l2,%g4 !prologue!
|
||||
and %o0,%g1,%o3
|
||||
add %sp,0+128,%l4
|
||||
add %sp,STACK_BIAS+STACK_FRAME,%l4
|
||||
ld [%i1+8],%l5 !prologue!
|
||||
|
||||
mulx %i4,%o3,%l3 ! "t[0]"*n0
|
||||
@ -220,7 +222,7 @@ bn_mul_mont_int:
|
||||
|
||||
mulx %o1,%l3,%o1 ! np[0]*"t[0]"*n0
|
||||
mulx %l6,%l3,%o4 !prologue!
|
||||
and %o0,1,%i2
|
||||
and %o0,1,%o5
|
||||
ld [%i3+8],%l6 !prologue!
|
||||
srlx %o0,1,%o0
|
||||
add %o3,%o1,%o1
|
||||
@ -237,9 +239,9 @@ bn_mul_mont_int:
|
||||
ld [%i3+%l1],%l6 ! np[j]
|
||||
srlx %o0,32,%o0
|
||||
add %o3,%o3,%o3
|
||||
or %i2,%o3,%o3
|
||||
or %o5,%o3,%o3
|
||||
mov %g5,%o4
|
||||
srlx %o3,32,%i2
|
||||
srlx %o3,32,%o5
|
||||
add %l1,4,%l1 ! j++
|
||||
and %o3,%g1,%o3
|
||||
cmp %l1,%i5
|
||||
@ -258,8 +260,8 @@ bn_mul_mont_int:
|
||||
and %o0,%g1,%o3
|
||||
srlx %o0,32,%o0
|
||||
add %o3,%o3,%o3
|
||||
or %i2,%o3,%o3
|
||||
srlx %o3,32,%i2
|
||||
or %o5,%o3,%o3
|
||||
srlx %o3,32,%o5
|
||||
and %o3,%g1,%o3
|
||||
add %o3,%o1,%o1
|
||||
st %o1,[%l4]
|
||||
@ -270,22 +272,22 @@ bn_mul_mont_int:
|
||||
and %o0,%g1,%o3
|
||||
srlx %o0,32,%o0
|
||||
add %o3,%o3,%o3
|
||||
or %i2,%o3,%o3
|
||||
srlx %o3,32,%i2
|
||||
or %o5,%o3,%o3
|
||||
srlx %o3,32,%o5
|
||||
and %o3,%g1,%o3
|
||||
add %o3,%o1,%o1
|
||||
st %o1,[%l4+4]
|
||||
srlx %o1,32,%o1
|
||||
|
||||
add %o0,%o0,%o0
|
||||
or %i2,%o0,%o0
|
||||
or %o5,%o0,%o0
|
||||
add %o0,%o1,%o1
|
||||
st %o1,[%l4+8]
|
||||
srlx %o1,32,%o2
|
||||
|
||||
ld [%sp+0+128],%g4 ! tp[0]
|
||||
ld [%sp+0+128+4],%g5 ! tp[1]
|
||||
ld [%sp+0+128+8],%l7 ! tp[2]
|
||||
ld [%sp+STACK_BIAS+STACK_FRAME],%g4 ! tp[0]
|
||||
ld [%sp+STACK_BIAS+STACK_FRAME+4],%g5 ! tp[1]
|
||||
ld [%sp+STACK_BIAS+STACK_FRAME+8],%l7 ! tp[2]
|
||||
ld [%i1+4],%l2 ! ap[1]
|
||||
ld [%i1+8],%l5 ! ap[2]
|
||||
ld [%i3],%o1 ! np[0]
|
||||
@ -304,19 +306,19 @@ bn_mul_mont_int:
|
||||
add %g5,%o1,%o1
|
||||
srlx %o0,32,%o0
|
||||
add %o3,%o1,%o1
|
||||
and %o0,1,%i2
|
||||
and %o0,1,%o5
|
||||
add %o4,%o1,%o1
|
||||
srlx %o0,1,%o0
|
||||
mov 12,%l1
|
||||
st %o1,[%sp+0+128] ! tp[0]=
|
||||
st %o1,[%sp+STACK_BIAS+STACK_FRAME] ! tp[0]=
|
||||
srlx %o1,32,%o1
|
||||
add %sp,0+128+4,%l4
|
||||
add %sp,STACK_BIAS+STACK_FRAME+4,%l4
|
||||
|
||||
.Lsqr_2nd:
|
||||
mulx %l5,%l2,%o3
|
||||
mulx %l6,%l3,%o4
|
||||
add %o3,%o0,%o0
|
||||
add %l7,%o1,%o1
|
||||
add %l7,%o5,%o5
|
||||
ld [%i1+%l1],%l5 ! ap[j]
|
||||
and %o0,%g1,%o3
|
||||
ld [%i3+%l1],%l6 ! np[j]
|
||||
@ -325,8 +327,8 @@ bn_mul_mont_int:
|
||||
ld [%l4+8],%l7 ! tp[j]
|
||||
add %o3,%o3,%o3
|
||||
add %l1,4,%l1 ! j++
|
||||
or %i2,%o3,%o3
|
||||
srlx %o3,32,%i2
|
||||
add %o5,%o3,%o3
|
||||
srlx %o3,32,%o5
|
||||
and %o3,%g1,%o3
|
||||
cmp %l1,%i5
|
||||
add %o3,%o1,%o1
|
||||
@ -339,27 +341,27 @@ bn_mul_mont_int:
|
||||
mulx %l5,%l2,%o3
|
||||
mulx %l6,%l3,%o4
|
||||
add %o3,%o0,%o0
|
||||
add %l7,%o1,%o1
|
||||
add %l7,%o5,%o5
|
||||
and %o0,%g1,%o3
|
||||
srlx %o0,32,%o0
|
||||
add %o4,%o1,%o1
|
||||
add %o3,%o3,%o3
|
||||
or %i2,%o3,%o3
|
||||
srlx %o3,32,%i2
|
||||
add %o5,%o3,%o3
|
||||
srlx %o3,32,%o5
|
||||
and %o3,%g1,%o3
|
||||
add %o3,%o1,%o1
|
||||
st %o1,[%l4] ! tp[j-1]
|
||||
srlx %o1,32,%o1
|
||||
|
||||
add %o0,%o0,%o0
|
||||
or %i2,%o0,%o0
|
||||
add %o5,%o0,%o0
|
||||
add %o0,%o1,%o1
|
||||
add %o2,%o1,%o1
|
||||
st %o1,[%l4+4]
|
||||
srlx %o1,32,%o2
|
||||
|
||||
ld [%sp+0+128],%g5 ! tp[0]
|
||||
ld [%sp+0+128+4],%l7 ! tp[1]
|
||||
ld [%sp+STACK_BIAS+STACK_FRAME],%g5 ! tp[0]
|
||||
ld [%sp+STACK_BIAS+STACK_FRAME+4],%l7 ! tp[1]
|
||||
ld [%i1+8],%l2 ! ap[2]
|
||||
ld [%i3],%o1 ! np[0]
|
||||
ld [%i3+4],%l6 ! np[1]
|
||||
@ -372,9 +374,9 @@ bn_mul_mont_int:
|
||||
and %o0,%g1,%o3
|
||||
add %g5,%o1,%o1
|
||||
srlx %o0,32,%o0
|
||||
add %sp,0+128,%l4
|
||||
add %sp,STACK_BIAS+STACK_FRAME,%l4
|
||||
srlx %o1,32,%o1
|
||||
and %o0,1,%i2
|
||||
and %o0,1,%o5
|
||||
srlx %o0,1,%o0
|
||||
mov 4,%l1
|
||||
|
||||
@ -412,7 +414,7 @@ bn_mul_mont_int:
|
||||
.Lsqr_inner2:
|
||||
mulx %l5,%l2,%o3
|
||||
mulx %l6,%l3,%o4
|
||||
add %l7,%o1,%o1
|
||||
add %l7,%o5,%o5
|
||||
add %o3,%o0,%o0
|
||||
ld [%i1+%l1],%l5 ! ap[j]
|
||||
and %o0,%g1,%o3
|
||||
@ -420,9 +422,9 @@ bn_mul_mont_int:
|
||||
srlx %o0,32,%o0
|
||||
add %o3,%o3,%o3
|
||||
ld [%l4+8],%l7 ! tp[j]
|
||||
or %i2,%o3,%o3
|
||||
add %o5,%o3,%o3
|
||||
add %l1,4,%l1 ! j++
|
||||
srlx %o3,32,%i2
|
||||
srlx %o3,32,%o5
|
||||
and %o3,%g1,%o3
|
||||
cmp %l1,%i5
|
||||
add %o3,%o1,%o1
|
||||
@ -435,13 +437,13 @@ bn_mul_mont_int:
|
||||
.Lsqr_no_inner2:
|
||||
mulx %l5,%l2,%o3
|
||||
mulx %l6,%l3,%o4
|
||||
add %l7,%o1,%o1
|
||||
add %l7,%o5,%o5
|
||||
add %o3,%o0,%o0
|
||||
and %o0,%g1,%o3
|
||||
srlx %o0,32,%o0
|
||||
add %o3,%o3,%o3
|
||||
or %i2,%o3,%o3
|
||||
srlx %o3,32,%i2
|
||||
add %o5,%o3,%o3
|
||||
srlx %o3,32,%o5
|
||||
and %o3,%g1,%o3
|
||||
add %o3,%o1,%o1
|
||||
add %o4,%o1,%o1
|
||||
@ -449,15 +451,15 @@ bn_mul_mont_int:
|
||||
srlx %o1,32,%o1
|
||||
|
||||
add %o0,%o0,%o0
|
||||
or %i2,%o0,%o0
|
||||
add %o5,%o0,%o0
|
||||
add %o0,%o1,%o1
|
||||
add %o2,%o1,%o1
|
||||
st %o1,[%l4+4]
|
||||
srlx %o1,32,%o2
|
||||
|
||||
add %l0,4,%l0 ! i++
|
||||
ld [%sp+0+128],%g5 ! tp[0]
|
||||
ld [%sp+0+128+4],%l7 ! tp[1]
|
||||
ld [%sp+STACK_BIAS+STACK_FRAME],%g5 ! tp[0]
|
||||
ld [%sp+STACK_BIAS+STACK_FRAME+4],%l7 ! tp[1]
|
||||
ld [%i1+%l0],%l2 ! ap[j]
|
||||
ld [%i3],%o1 ! np[0]
|
||||
ld [%i3+4],%l6 ! np[1]
|
||||
@ -470,9 +472,9 @@ bn_mul_mont_int:
|
||||
and %o0,%g1,%o3
|
||||
add %g5,%o1,%o1
|
||||
srlx %o0,32,%o0
|
||||
add %sp,0+128,%l4
|
||||
add %sp,STACK_BIAS+STACK_FRAME,%l4
|
||||
srlx %o1,32,%o1
|
||||
and %o0,1,%i2
|
||||
and %o0,1,%o5
|
||||
srlx %o0,1,%o0
|
||||
|
||||
cmp %g4,%i5 ! i<num-1
|
||||
@ -494,14 +496,17 @@ bn_mul_mont_int:
|
||||
!.Lsqr_last
|
||||
|
||||
mulx %l6,%l3,%o4
|
||||
add %l7,%o1,%o1
|
||||
add %l7,%o3,%o3
|
||||
srlx %o3,32,%g4
|
||||
and %o3,%g1,%o3
|
||||
add %g4,%o5,%o5
|
||||
add %o3,%o1,%o1
|
||||
add %o4,%o1,%o1
|
||||
st %o1,[%l4]
|
||||
srlx %o1,32,%o1
|
||||
|
||||
add %o0,%o0,%o0 ! recover %o0
|
||||
or %i2,%o0,%o0
|
||||
add %o5,%o0,%o0
|
||||
add %o0,%o1,%o1
|
||||
add %o2,%o1,%o1
|
||||
st %o1,[%l4+4]
|
||||
|
0
crypto/external/bsd/openssl/lib/libcrypto/arch/sparc/sparcv9_modes.S
vendored
Normal file
0
crypto/external/bsd/openssl/lib/libcrypto/arch/sparc/sparcv9_modes.S
vendored
Normal file
@ -1,9 +1,11 @@
|
||||
#include "sparc_arch.h"
|
||||
|
||||
.section ".text",#alloc,#execinstr
|
||||
|
||||
.global bn_mul_mont_fpu
|
||||
.align 32
|
||||
bn_mul_mont_fpu:
|
||||
save %sp,-128-64,%sp
|
||||
save %sp,-STACK_FRAME-64,%sp
|
||||
|
||||
cmp %i5,4
|
||||
bl,a,pn %icc,.Lret
|
||||
@ -22,15 +24,15 @@ bn_mul_mont_fpu:
|
||||
|
||||
sll %i5,3,%i5 ! num*=8
|
||||
|
||||
add %sp,0,%o0 ! real top of stack
|
||||
add %sp,STACK_BIAS,%o0 ! real top of stack
|
||||
sll %i5,2,%o1
|
||||
add %o1,%i5,%o1 ! %o1=num*5
|
||||
sub %o0,%o1,%o0
|
||||
and %o0,-2048,%o0 ! optimize TLB utilization
|
||||
sub %o0,0,%sp ! alloca(5*num*8)
|
||||
sub %o0,STACK_BIAS,%sp ! alloca(5*num*8)
|
||||
|
||||
rd %asi,%o7 ! save %asi
|
||||
add %sp,0+128+64,%l0
|
||||
add %sp,STACK_BIAS+STACK_FRAME+64,%l0
|
||||
add %l0,%i5,%l1
|
||||
add %l1,%i5,%l1 ! [an]p_[lh] point at the vectors' ends !
|
||||
add %l1,%i5,%l2
|
||||
@ -44,7 +46,7 @@ bn_mul_mont_fpu:
|
||||
add %i2,%i5,%i2
|
||||
add %i3,%i5,%i3
|
||||
|
||||
stx %o7,[%sp+0+128+48] ! save %asi
|
||||
stx %o7,[%sp+STACK_BIAS+STACK_FRAME+48] ! save %asi
|
||||
|
||||
sub %g0,%i5,%l5 ! i=-num
|
||||
sub %g0,%i5,%l6 ! j=-num
|
||||
@ -65,7 +67,7 @@ bn_mul_mont_fpu:
|
||||
|
||||
mulx %o1,%o0,%o0 ! ap[0]*bp[0]
|
||||
mulx %g4,%o0,%o0 ! ap[0]*bp[0]*n0
|
||||
stx %o0,[%sp+0+128+0]
|
||||
stx %o0,[%sp+STACK_BIAS+STACK_FRAME+0]
|
||||
|
||||
ld [%o3+0],%f17 ! load a[j] as pair of 32-bit words
|
||||
.word 0xa1b00c20 ! fzeros %f16
|
||||
@ -87,13 +89,13 @@ bn_mul_mont_fpu:
|
||||
fxtod %f22,%f22
|
||||
|
||||
! transfer ap[0]*b[0]*n0 to FPU as 4x16-bit values
|
||||
ldda [%sp+0+128+6]%asi,%f8
|
||||
ldda [%sp+STACK_BIAS+STACK_FRAME+6]%asi,%f8
|
||||
fxtod %f0,%f0
|
||||
ldda [%sp+0+128+4]%asi,%f10
|
||||
ldda [%sp+STACK_BIAS+STACK_FRAME+4]%asi,%f10
|
||||
fxtod %f2,%f2
|
||||
ldda [%sp+0+128+2]%asi,%f12
|
||||
ldda [%sp+STACK_BIAS+STACK_FRAME+2]%asi,%f12
|
||||
fxtod %f4,%f4
|
||||
ldda [%sp+0+128+0]%asi,%f14
|
||||
ldda [%sp+STACK_BIAS+STACK_FRAME+0]%asi,%f14
|
||||
fxtod %f6,%f6
|
||||
|
||||
std %f16,[%l1+%l6] ! save smashed ap[j] in double format
|
||||
@ -139,13 +141,13 @@ bn_mul_mont_fpu:
|
||||
fdtox %f52,%f52
|
||||
fdtox %f54,%f54
|
||||
|
||||
std %f48,[%sp+0+128+0]
|
||||
std %f48,[%sp+STACK_BIAS+STACK_FRAME+0]
|
||||
add %l6,8,%l6
|
||||
std %f50,[%sp+0+128+8]
|
||||
std %f50,[%sp+STACK_BIAS+STACK_FRAME+8]
|
||||
add %i1,%l6,%o4
|
||||
std %f52,[%sp+0+128+16]
|
||||
std %f52,[%sp+STACK_BIAS+STACK_FRAME+16]
|
||||
add %i3,%l6,%o5
|
||||
std %f54,[%sp+0+128+24]
|
||||
std %f54,[%sp+STACK_BIAS+STACK_FRAME+24]
|
||||
|
||||
ld [%o4+0],%f17 ! load a[j] as pair of 32-bit words
|
||||
.word 0xa1b00c20 ! fzeros %f16
|
||||
@ -161,13 +163,13 @@ bn_mul_mont_fpu:
|
||||
fxtod %f20,%f20
|
||||
fxtod %f22,%f22
|
||||
|
||||
ldx [%sp+0+128+0],%o0
|
||||
ldx [%sp+STACK_BIAS+STACK_FRAME+0],%o0
|
||||
fmuld %f16,%f0,%f32
|
||||
ldx [%sp+0+128+8],%o1
|
||||
ldx [%sp+STACK_BIAS+STACK_FRAME+8],%o1
|
||||
fmuld %f20,%f8,%f48
|
||||
ldx [%sp+0+128+16],%o2
|
||||
ldx [%sp+STACK_BIAS+STACK_FRAME+16],%o2
|
||||
fmuld %f16,%f2,%f34
|
||||
ldx [%sp+0+128+24],%o3
|
||||
ldx [%sp+STACK_BIAS+STACK_FRAME+24],%o3
|
||||
fmuld %f20,%f10,%f50
|
||||
|
||||
srlx %o0,16,%o7
|
||||
@ -223,12 +225,12 @@ bn_mul_mont_fpu:
|
||||
fdtox %f52,%f52
|
||||
fdtox %f54,%f54
|
||||
|
||||
std %f48,[%sp+0+128+0]
|
||||
std %f50,[%sp+0+128+8]
|
||||
std %f48,[%sp+STACK_BIAS+STACK_FRAME+0]
|
||||
std %f50,[%sp+STACK_BIAS+STACK_FRAME+8]
|
||||
addcc %l6,8,%l6
|
||||
std %f52,[%sp+0+128+16]
|
||||
std %f52,[%sp+STACK_BIAS+STACK_FRAME+16]
|
||||
bz,pn %icc,.L1stskip
|
||||
std %f54,[%sp+0+128+24]
|
||||
std %f54,[%sp+STACK_BIAS+STACK_FRAME+24]
|
||||
|
||||
.align 32 ! incidentally already aligned !
|
||||
.L1st:
|
||||
@ -248,13 +250,13 @@ bn_mul_mont_fpu:
|
||||
fxtod %f20,%f20
|
||||
fxtod %f22,%f22
|
||||
|
||||
ldx [%sp+0+128+0],%o0
|
||||
ldx [%sp+STACK_BIAS+STACK_FRAME+0],%o0
|
||||
fmuld %f16,%f0,%f32
|
||||
ldx [%sp+0+128+8],%o1
|
||||
ldx [%sp+STACK_BIAS+STACK_FRAME+8],%o1
|
||||
fmuld %f20,%f8,%f48
|
||||
ldx [%sp+0+128+16],%o2
|
||||
ldx [%sp+STACK_BIAS+STACK_FRAME+16],%o2
|
||||
fmuld %f16,%f2,%f34
|
||||
ldx [%sp+0+128+24],%o3
|
||||
ldx [%sp+STACK_BIAS+STACK_FRAME+24],%o3
|
||||
fmuld %f20,%f10,%f50
|
||||
|
||||
srlx %o0,16,%o7
|
||||
@ -314,10 +316,10 @@ bn_mul_mont_fpu:
|
||||
fdtox %f52,%f52
|
||||
fdtox %f54,%f54
|
||||
|
||||
std %f48,[%sp+0+128+0]
|
||||
std %f50,[%sp+0+128+8]
|
||||
std %f52,[%sp+0+128+16]
|
||||
std %f54,[%sp+0+128+24]
|
||||
std %f48,[%sp+STACK_BIAS+STACK_FRAME+0]
|
||||
std %f50,[%sp+STACK_BIAS+STACK_FRAME+8]
|
||||
std %f52,[%sp+STACK_BIAS+STACK_FRAME+16]
|
||||
std %f54,[%sp+STACK_BIAS+STACK_FRAME+24]
|
||||
|
||||
addcc %l6,8,%l6
|
||||
bnz,pt %icc,.L1st
|
||||
@ -327,15 +329,15 @@ bn_mul_mont_fpu:
|
||||
fdtox %f24,%f24
|
||||
fdtox %f26,%f26
|
||||
|
||||
ldx [%sp+0+128+0],%o0
|
||||
ldx [%sp+0+128+8],%o1
|
||||
ldx [%sp+0+128+16],%o2
|
||||
ldx [%sp+0+128+24],%o3
|
||||
ldx [%sp+STACK_BIAS+STACK_FRAME+0],%o0
|
||||
ldx [%sp+STACK_BIAS+STACK_FRAME+8],%o1
|
||||
ldx [%sp+STACK_BIAS+STACK_FRAME+16],%o2
|
||||
ldx [%sp+STACK_BIAS+STACK_FRAME+24],%o3
|
||||
|
||||
srlx %o0,16,%o7
|
||||
std %f24,[%sp+0+128+32]
|
||||
std %f24,[%sp+STACK_BIAS+STACK_FRAME+32]
|
||||
add %o7,%o1,%o1
|
||||
std %f26,[%sp+0+128+40]
|
||||
std %f26,[%sp+STACK_BIAS+STACK_FRAME+40]
|
||||
srlx %o1,16,%o7
|
||||
add %o7,%o2,%o2
|
||||
srlx %o2,16,%o7
|
||||
@ -349,9 +351,9 @@ bn_mul_mont_fpu:
|
||||
or %o1,%o0,%o0
|
||||
or %o2,%o0,%o0
|
||||
or %o7,%o0,%o0 ! 64-bit result
|
||||
ldx [%sp+0+128+32],%o4
|
||||
ldx [%sp+STACK_BIAS+STACK_FRAME+32],%o4
|
||||
addcc %g1,%o0,%o0
|
||||
ldx [%sp+0+128+40],%o5
|
||||
ldx [%sp+STACK_BIAS+STACK_FRAME+40],%o5
|
||||
srlx %o3,16,%g1 ! 34-bit carry
|
||||
bcs,a %xcc,.+8
|
||||
add %g1,1,%g1
|
||||
@ -377,7 +379,7 @@ bn_mul_mont_fpu:
|
||||
.align 32
|
||||
.Louter:
|
||||
sub %g0,%i5,%l6 ! j=-num
|
||||
add %sp,0+128+64,%l0
|
||||
add %sp,STACK_BIAS+STACK_FRAME+64,%l0
|
||||
|
||||
add %i1,%l6,%o3
|
||||
add %i2,%l5,%o4
|
||||
@ -395,7 +397,7 @@ bn_mul_mont_fpu:
|
||||
mulx %o1,%o0,%o0
|
||||
addcc %o2,%o0,%o0
|
||||
mulx %g4,%o0,%o0 ! (ap[0]*bp[i]+t[0])*n0
|
||||
stx %o0,[%sp+0+128+0]
|
||||
stx %o0,[%sp+STACK_BIAS+STACK_FRAME+0]
|
||||
|
||||
! transfer b[i] to FPU as 4x16-bit values
|
||||
ldda [%o4+2]%asi,%f0
|
||||
@ -404,13 +406,13 @@ bn_mul_mont_fpu:
|
||||
ldda [%o4+4]%asi,%f6
|
||||
|
||||
! transfer (ap[0]*b[i]+t[0])*n0 to FPU as 4x16-bit values
|
||||
ldda [%sp+0+128+6]%asi,%f8
|
||||
ldda [%sp+STACK_BIAS+STACK_FRAME+6]%asi,%f8
|
||||
fxtod %f0,%f0
|
||||
ldda [%sp+0+128+4]%asi,%f10
|
||||
ldda [%sp+STACK_BIAS+STACK_FRAME+4]%asi,%f10
|
||||
fxtod %f2,%f2
|
||||
ldda [%sp+0+128+2]%asi,%f12
|
||||
ldda [%sp+STACK_BIAS+STACK_FRAME+2]%asi,%f12
|
||||
fxtod %f4,%f4
|
||||
ldda [%sp+0+128+0]%asi,%f14
|
||||
ldda [%sp+STACK_BIAS+STACK_FRAME+0]%asi,%f14
|
||||
fxtod %f6,%f6
|
||||
ldd [%l1+%l6],%f16 ! load a[j] in double format
|
||||
fxtod %f8,%f8
|
||||
@ -455,11 +457,11 @@ bn_mul_mont_fpu:
|
||||
fdtox %f52,%f52
|
||||
fdtox %f54,%f54
|
||||
|
||||
std %f48,[%sp+0+128+0]
|
||||
std %f50,[%sp+0+128+8]
|
||||
std %f52,[%sp+0+128+16]
|
||||
std %f48,[%sp+STACK_BIAS+STACK_FRAME+0]
|
||||
std %f50,[%sp+STACK_BIAS+STACK_FRAME+8]
|
||||
std %f52,[%sp+STACK_BIAS+STACK_FRAME+16]
|
||||
add %l6,8,%l6
|
||||
std %f54,[%sp+0+128+24]
|
||||
std %f54,[%sp+STACK_BIAS+STACK_FRAME+24]
|
||||
|
||||
ldd [%l1+%l6],%f16 ! load a[j] in double format
|
||||
ldd [%l2+%l6],%f18
|
||||
@ -471,15 +473,15 @@ bn_mul_mont_fpu:
|
||||
fmuld %f16,%f2,%f34
|
||||
fmuld %f20,%f10,%f50
|
||||
fmuld %f16,%f4,%f36
|
||||
ldx [%sp+0+128+0],%o0
|
||||
ldx [%sp+STACK_BIAS+STACK_FRAME+0],%o0
|
||||
faddd %f32,%f48,%f48
|
||||
fmuld %f20,%f12,%f52
|
||||
ldx [%sp+0+128+8],%o1
|
||||
ldx [%sp+STACK_BIAS+STACK_FRAME+8],%o1
|
||||
fmuld %f16,%f6,%f38
|
||||
ldx [%sp+0+128+16],%o2
|
||||
ldx [%sp+STACK_BIAS+STACK_FRAME+16],%o2
|
||||
faddd %f34,%f50,%f50
|
||||
fmuld %f20,%f14,%f54
|
||||
ldx [%sp+0+128+24],%o3
|
||||
ldx [%sp+STACK_BIAS+STACK_FRAME+24],%o3
|
||||
fmuld %f18,%f0,%f40
|
||||
|
||||
srlx %o0,16,%o7
|
||||
@ -527,12 +529,12 @@ bn_mul_mont_fpu:
|
||||
fdtox %f52,%f52
|
||||
fdtox %f54,%f54
|
||||
|
||||
std %f48,[%sp+0+128+0]
|
||||
std %f50,[%sp+0+128+8]
|
||||
std %f48,[%sp+STACK_BIAS+STACK_FRAME+0]
|
||||
std %f50,[%sp+STACK_BIAS+STACK_FRAME+8]
|
||||
addcc %l6,8,%l6
|
||||
std %f52,[%sp+0+128+16]
|
||||
std %f52,[%sp+STACK_BIAS+STACK_FRAME+16]
|
||||
bz,pn %icc,.Linnerskip
|
||||
std %f54,[%sp+0+128+24]
|
||||
std %f54,[%sp+STACK_BIAS+STACK_FRAME+24]
|
||||
|
||||
ba .Linner
|
||||
nop
|
||||
@ -548,15 +550,15 @@ bn_mul_mont_fpu:
|
||||
fmuld %f16,%f2,%f34
|
||||
fmuld %f20,%f10,%f50
|
||||
fmuld %f16,%f4,%f36
|
||||
ldx [%sp+0+128+0],%o0
|
||||
ldx [%sp+STACK_BIAS+STACK_FRAME+0],%o0
|
||||
faddd %f32,%f48,%f48
|
||||
fmuld %f20,%f12,%f52
|
||||
ldx [%sp+0+128+8],%o1
|
||||
ldx [%sp+STACK_BIAS+STACK_FRAME+8],%o1
|
||||
fmuld %f16,%f6,%f38
|
||||
ldx [%sp+0+128+16],%o2
|
||||
ldx [%sp+STACK_BIAS+STACK_FRAME+16],%o2
|
||||
faddd %f34,%f50,%f50
|
||||
fmuld %f20,%f14,%f54
|
||||
ldx [%sp+0+128+24],%o3
|
||||
ldx [%sp+STACK_BIAS+STACK_FRAME+24],%o3
|
||||
fmuld %f18,%f0,%f40
|
||||
|
||||
srlx %o0,16,%o7
|
||||
@ -606,11 +608,11 @@ bn_mul_mont_fpu:
|
||||
stx %o0,[%l0] ! tp[j-1]
|
||||
fdtox %f54,%f54
|
||||
|
||||
std %f48,[%sp+0+128+0]
|
||||
std %f50,[%sp+0+128+8]
|
||||
std %f52,[%sp+0+128+16]
|
||||
std %f48,[%sp+STACK_BIAS+STACK_FRAME+0]
|
||||
std %f50,[%sp+STACK_BIAS+STACK_FRAME+8]
|
||||
std %f52,[%sp+STACK_BIAS+STACK_FRAME+16]
|
||||
addcc %l6,8,%l6
|
||||
std %f54,[%sp+0+128+24]
|
||||
std %f54,[%sp+STACK_BIAS+STACK_FRAME+24]
|
||||
bnz,pt %icc,.Linner
|
||||
add %l0,8,%l0
|
||||
|
||||
@ -618,15 +620,15 @@ bn_mul_mont_fpu:
|
||||
fdtox %f24,%f24
|
||||
fdtox %f26,%f26
|
||||
|
||||
ldx [%sp+0+128+0],%o0
|
||||
ldx [%sp+0+128+8],%o1
|
||||
ldx [%sp+0+128+16],%o2
|
||||
ldx [%sp+0+128+24],%o3
|
||||
ldx [%sp+STACK_BIAS+STACK_FRAME+0],%o0
|
||||
ldx [%sp+STACK_BIAS+STACK_FRAME+8],%o1
|
||||
ldx [%sp+STACK_BIAS+STACK_FRAME+16],%o2
|
||||
ldx [%sp+STACK_BIAS+STACK_FRAME+24],%o3
|
||||
|
||||
srlx %o0,16,%o7
|
||||
std %f24,[%sp+0+128+32]
|
||||
std %f24,[%sp+STACK_BIAS+STACK_FRAME+32]
|
||||
add %o7,%o1,%o1
|
||||
std %f26,[%sp+0+128+40]
|
||||
std %f26,[%sp+STACK_BIAS+STACK_FRAME+40]
|
||||
srlx %o1,16,%o7
|
||||
add %o7,%o2,%o2
|
||||
srlx %o2,16,%o7
|
||||
@ -639,9 +641,9 @@ bn_mul_mont_fpu:
|
||||
sllx %o3,48,%o7
|
||||
or %o1,%o0,%o0
|
||||
or %o2,%o0,%o0
|
||||
ldx [%sp+0+128+32],%o4
|
||||
ldx [%sp+STACK_BIAS+STACK_FRAME+32],%o4
|
||||
or %o7,%o0,%o0 ! 64-bit result
|
||||
ldx [%sp+0+128+40],%o5
|
||||
ldx [%sp+STACK_BIAS+STACK_FRAME+40],%o5
|
||||
addcc %g1,%o0,%o0
|
||||
ldx [%l0+8],%o7 ! tp[j]
|
||||
srlx %o3,16,%g1 ! 34-bit carry
|
||||
@ -728,7 +730,7 @@ bn_mul_mont_fpu:
|
||||
brnz,pt %o7,.Lzap
|
||||
nop
|
||||
|
||||
ldx [%sp+0+128+48],%o7
|
||||
ldx [%sp+STACK_BIAS+STACK_FRAME+48],%o7
|
||||
wr %g0,%o7,%asi ! restore %asi
|
||||
|
||||
mov 1,%i0
|
||||
|
@ -1,8 +1,15 @@
|
||||
#include "sparc_arch.h"
|
||||
|
||||
#ifdef __arch64__
|
||||
.register %g2,#scratch
|
||||
.register %g3,#scratch
|
||||
#endif
|
||||
|
||||
.section ".text",#alloc,#execinstr
|
||||
.globl bn_mul_mont_vis3
|
||||
.align 32
|
||||
bn_mul_mont_vis3:
|
||||
add %sp, 0, %g4 ! real top of stack
|
||||
add %sp, STACK_BIAS, %g4 ! real top of stack
|
||||
sll %o5, 2, %o5 ! size in bytes
|
||||
add %o5, 63, %g5
|
||||
andn %g5, 63, %g5 ! buffer size rounded up to 64 bytes
|
||||
@ -10,12 +17,12 @@ bn_mul_mont_vis3:
|
||||
add %g5, %g1, %g1 ! 3*buffer size
|
||||
sub %g4, %g1, %g1
|
||||
andn %g1, 63, %g1 ! align at 64 byte
|
||||
sub %g1, 112, %g1 ! new top of stack
|
||||
sub %g1, STACK_FRAME, %g1 ! new top of stack
|
||||
sub %g1, %g4, %g1
|
||||
|
||||
save %sp, %g1, %sp
|
||||
ld [%i4+0], %l0 ! pull n0[0..1] value
|
||||
add %sp, 0+112, %l5
|
||||
add %sp, STACK_BIAS+STACK_FRAME, %l5
|
||||
ld [%i4+4], %l1
|
||||
add %l5, %g5, %l7
|
||||
ld [%i2+0], %l2 ! m0=bp[0]
|
||||
|
Loading…
Reference in New Issue
Block a user