use assembly

This commit is contained in:
Russ Cox 2005-11-04 16:05:20 +00:00
parent f493c9c544
commit 93a43be0ea
12 changed files with 569 additions and 43 deletions

View File

@ -1,5 +1,6 @@
# Unix
AR=ar
AS=as
CC=gcc
RANLIB=ranlib
CFLAGS=-I$(ROOT) -I$(ROOT)/include -I$(ROOT)/kern -c -ggdb -D_THREAD_SAFE -pthread # not ready for this yet: -Wall

View File

@ -7,6 +7,7 @@ MING=mingw32-
#MING=
AR=$(MING)ar
CC=$(MING)gcc
AS=$(MING)as
RANLIB=$(MING)ranlib
CFLAGS=-I$(ROOT)/include -I$(ROOT) -I$(ROOT)/kern -c -D_X86_ -DIS_32 -DWINDOWS
O=o
@ -28,7 +29,7 @@ TARG=drawterm.exe
#OS=win32
#GUI=win32
all: $(TARG)
all: default
# for root
libmachdep.a:

View File

@ -33,7 +33,6 @@ OFILES=\
hmac.$O\
md4.$O\
md5.$O\
md5block.$O\
md5pickle.$O\
nfastrand.$O\
prng.$O\
@ -46,7 +45,6 @@ OFILES=\
rsagen.$O\
rsaprivtopub.$O\
sha1.$O\
sha1block.$O\
sha1pickle.$O\
smallprimes.$O

View File

@ -4,6 +4,8 @@ LIB=../libmachdep.a
OFILES=\
getcallerpc.$O\
md5block.$O\
sha1block.$O\
tas.$O
default: $(LIB)
@ -14,3 +16,10 @@ $(LIB): $(OFILES)
%.$O: %.c
$(CC) $(CFLAGS) $*.c
%.$O: %.s
$(AS) -o $*.$O $*.s
%.s: %.spp
cpp $*.spp >$*.s

View File

@ -43,7 +43,7 @@
#define S43 15
#define S44 21
#define PAYME(x) $##x
#define PAYME(x) $ ## x
/*
* SI is data
@ -102,14 +102,16 @@
orl B, %edi;\
xorl C, %edi;\
#define DATA 8
#define LEN 12
#define STATE 16
#define STACKSIZE 20
#define EDATA (-4)
#define OLDEBX (-8)
#define OLDESI (-12)
#define OLDEDI (-16)
#define DATA (STACKSIZE+8)
#define LEN (STACKSIZE+12)
#define STATE (STACKSIZE+16)
#define EDATA (STACKSIZE-4)
#define OLDEBX (STACKSIZE-8)
#define OLDESI (STACKSIZE-12)
#define OLDEDI (STACKSIZE-16)
.text
@ -120,6 +122,7 @@
/* Prelude */
pushl %ebp
subl $(STACKSIZE), %esp
movl %ebx, OLDEBX(%esp)
movl %esi, OLDESI(%esp)
movl %edi, OLDEDI(%esp)
@ -130,7 +133,7 @@
movl DATA(%esp), %ebp
mainloop:
0:
movl STATE(%esp), %esi
movl (%esi), %eax
movl 4(%esi), %ebx
@ -229,13 +232,13 @@ mainloop:
movl EDATA(%esp), %edi
cmpl %edi, %ebp
jb mainloop
jb 0b
/* Postlude */
movl OLDEBX(%esp), %ebx
movl OLDESI(%esp), %esi
movl OLDEDI(%esp), %edi
movl %esp, %ebp
leave
addl $(STACKSIZE), %esp
popl %ebp
ret

View File

@ -75,9 +75,10 @@ _sha1block:
* stack offsets
* void sha1block(uchar *DATA, int LEN, ulong *STATE)
*/
#define DATA 8
#define LEN 12
#define STATE 16
#define STACKSIZE (48+80*4)
#define DATA (STACKSIZE+8)
#define LEN (STACKSIZE+12)
#define STATE (STACKSIZE+16)
/*
* stack offsets for locals
@ -89,20 +90,22 @@ _sha1block:
* ulong a = eax, b = ebx, c = ecx, d = edx, e = esi
* ulong tmp = edi
*/
#define WARRAY (-4-(80*4))
#define TMP1 (-8-(80*4))
#define TMP2 (-12-(80*4))
#define W15 (-16-(80*4))
#define W40 (-20-(80*4))
#define W60 (-24-(80*4))
#define W80 (-28-(80*4))
#define EDATA (-32-(80*4))
#define OLDEBX (-36-(80*4))
#define OLDESI (-40-(80*4))
#define OLDEDI (-44-(80*4))
#define WARRAY (STACKSIZE-4-(80*4))
#define TMP1 (STACKSIZE-8-(80*4))
#define TMP2 (STACKSIZE-12-(80*4))
#define W15 (STACKSIZE-16-(80*4))
#define W40 (STACKSIZE-20-(80*4))
#define W60 (STACKSIZE-24-(80*4))
#define W80 (STACKSIZE-28-(80*4))
#define EDATA (STACKSIZE-32-(80*4))
#define OLDEBX (STACKSIZE-36-(80*4))
#define OLDESI (STACKSIZE-40-(80*4))
#define OLDEDI (STACKSIZE-44-(80*4))
/* Prelude */
pushl %ebp
subl $(STACKSIZE), %esp
mov %ebx, OLDEBX(%esp)
mov %esi, OLDESI(%esp)
mov %edi, OLDEDI(%esp)
@ -120,7 +123,7 @@ _sha1block:
leal (WARRAY+80*4)(%esp), %edi /* aw80 */
movl %edi, W80(%esp)
mainloop:
0:
leal WARRAY(%esp), %ebp /* warray */
movl STATE(%esp), %edi /* state */
@ -133,7 +136,7 @@ mainloop:
movl DATA(%esp), %ebx /* data */
loop1:
1:
BODY0(0,FN1,0x5a827999,%eax,TMP1(%esp),%ecx,%edx,%esi)
movl %esi,TMP2(%esp)
BODY0(4,FN1,0x5a827999,%esi,%eax,TMP1(%esp),%ecx,%edx)
@ -147,7 +150,7 @@ loop1:
addl $20, %ebx
addl $20, %ebp
cmpl W15(%esp), %ebp /* w15 */
jb loop1
jb 1b
BODY0(0,FN1,0x5a827999,%eax,TMP1(%esp),%ecx,%edx,%esi)
addl $4, %ebx
@ -161,7 +164,7 @@ loop1:
addl $20, %ebp
loop2:
2:
BODY(0,FN24,0x6ed9eba1,%eax,%ebx,%ecx,%edx,%esi)
BODY(4,FN24,0x6ed9eba1,%esi,%eax,%ebx,%ecx,%edx)
BODY(8,FN24,0x6ed9eba1,%edx,%esi,%eax,%ebx,%ecx)
@ -170,9 +173,9 @@ loop2:
addl $20,%ebp
cmpl W40(%esp), %ebp
jb loop2
jb 2b
loop3:
3:
BODY(0,FN3,0x8f1bbcdc,%eax,%ebx,%ecx,%edx,%esi)
BODY(4,FN3,0x8f1bbcdc,%esi,%eax,%ebx,%ecx,%edx)
BODY(8,FN3,0x8f1bbcdc,%edx,%esi,%eax,%ebx,%ecx)
@ -181,9 +184,9 @@ loop3:
addl $20, %ebp
cmpl W60(%esp), %ebp /* w60 */
jb loop3
jb 3b
loop4:
4:
BODY(0,FN24,0xca62c1d6,%eax,%ebx,%ecx,%edx,%esi)
BODY(4,FN24,0xca62c1d6,%esi,%eax,%ebx,%ecx,%edx)
BODY(8,FN24,0xca62c1d6,%edx,%esi,%eax,%ebx,%ecx)
@ -192,7 +195,7 @@ loop4:
addl $20, %ebp
cmpl W80(%esp), %ebp /* w80 */
jb loop4
jb 4b
movl STATE(%esp), %edi /* state */
addl %eax, 0(%edi)
@ -203,12 +206,12 @@ loop4:
movl EDATA(%esp), %edi /* edata */
cmpl %edi, DATA(%esp) /* data */
jb mainloop
jb 0b
/* Postlude */
mov OLDEBX(%esp), %ebx
mov OLDESI(%esp), %esi
mov OLDEDI(%esp), %edi
movl %esp, %ebp
leave
addl $(STACKSIZE), %esp
popl %ebp
ret

View File

@ -4,6 +4,8 @@ LIB=../libmachdep.a
OFILES=\
getcallerpc.$O\
md5block.$O\
sha1block.$O\
tas.$O
default: $(LIB)
@ -14,3 +16,10 @@ $(LIB): $(OFILES)
%.$O: %.c
$(CC) $(CFLAGS) $*.c
%.$O: %.s
$(AS) -o $*.$O $*.s
%.s: %.spp
cpp $*.spp >$*.s

267
posix-power/md5block.c Normal file
View File

@ -0,0 +1,267 @@
#include "os.h"
#include <libsec.h>
/*
* rfc1321 requires that I include this. The code is new. The constants
* all come from the rfc (hence the copyright). We trade a table for the
* macros in rfc. The total size is a lot less. -- presotto
*
* Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991. All
* rights reserved.
*
* License to copy and use this software is granted provided that it
* is identified as the "RSA Data Security, Inc. MD5 Message-Digest
* Algorithm" in all material mentioning or referencing this software
* or this function.
*
* License is also granted to make and use derivative works provided
* that such works are identified as "derived from the RSA Data
* Security, Inc. MD5 Message-Digest Algorithm" in all material
* mentioning or referencing the derived work.
*
* RSA Data Security, Inc. makes no representations concerning either
* the merchantability of this software or the suitability of this
* software forany particular purpose. It is provided "as is"
* without express or implied warranty of any kind.
* These notices must be retained in any copies of any part of this
* documentation and/or software.
*/
/*
* Rotate ammounts used in the algorithm
*/
enum
{
S11= 7,
S12= 12,
S13= 17,
S14= 22,
S21= 5,
S22= 9,
S23= 14,
S24= 20,
S31= 4,
S32= 11,
S33= 16,
S34= 23,
S41= 6,
S42= 10,
S43= 15,
S44= 21,
};
static u32int md5tab[] =
{
/* round 1 */
/*[0]*/ 0xd76aa478,
0xe8c7b756,
0x242070db,
0xc1bdceee,
0xf57c0faf,
0x4787c62a,
0xa8304613,
0xfd469501,
0x698098d8,
0x8b44f7af,
0xffff5bb1,
0x895cd7be,
0x6b901122,
0xfd987193,
0xa679438e,
0x49b40821,
/* round 2 */
/*[16]*/0xf61e2562,
0xc040b340,
0x265e5a51,
0xe9b6c7aa,
0xd62f105d,
0x2441453,
0xd8a1e681,
0xe7d3fbc8,
0x21e1cde6,
0xc33707d6,
0xf4d50d87,
0x455a14ed,
0xa9e3e905,
0xfcefa3f8,
0x676f02d9,
0x8d2a4c8a,
/* round 3 */
/*[32]*/0xfffa3942,
0x8771f681,
0x6d9d6122,
0xfde5380c,
0xa4beea44,
0x4bdecfa9,
0xf6bb4b60,
0xbebfbc70,
0x289b7ec6,
0xeaa127fa,
0xd4ef3085,
0x4881d05,
0xd9d4d039,
0xe6db99e5,
0x1fa27cf8,
0xc4ac5665,
/* round 4 */
/*[48]*/0xf4292244,
0x432aff97,
0xab9423a7,
0xfc93a039,
0x655b59c3,
0x8f0ccc92,
0xffeff47d,
0x85845dd1,
0x6fa87e4f,
0xfe2ce6e0,
0xa3014314,
0x4e0811a1,
0xf7537e82,
0xbd3af235,
0x2ad7d2bb,
0xeb86d391,
};
static void decode(u32int*, uchar*, ulong);
extern void _md5block(uchar *p, ulong len, u32int *s);
void
_md5block(uchar *p, ulong len, u32int *s)
{
u32int a, b, c, d, sh;
u32int *t;
uchar *end;
u32int x[16];
for(end = p+len; p < end; p += 64){
a = s[0];
b = s[1];
c = s[2];
d = s[3];
decode(x, p, 64);
t = md5tab;
sh = 0;
for(; sh != 16; t += 4){
a += ((c ^ d) & b) ^ d;
a += x[sh] + t[0];
a = (a << S11) | (a >> (32 - S11));
a += b;
d += ((b ^ c) & a) ^ c;
d += x[sh + 1] + t[1];
d = (d << S12) | (d >> (32 - S12));
d += a;
c += ((a ^ b) & d) ^ b;
c += x[sh + 2] + t[2];
c = (c << S13) | (c >> (32 - S13));
c += d;
b += ((d ^ a) & c) ^ a;
b += x[sh + 3] + t[3];
b = (b << S14) | (b >> (32 - S14));
b += c;
sh += 4;
}
sh = 1;
for(; sh != 1+20*4; t += 4){
a += ((b ^ c) & d) ^ c;
a += x[sh & 0xf] + t[0];
a = (a << S21) | (a >> (32 - S21));
a += b;
d += ((a ^ b) & c) ^ b;
d += x[(sh + 5) & 0xf] + t[1];
d = (d << S22) | (d >> (32 - S22));
d += a;
c += ((d ^ a) & b) ^ a;
c += x[(sh + 10) & 0xf] + t[2];
c = (c << S23) | (c >> (32 - S23));
c += d;
b += ((c ^ d) & a) ^ d;
b += x[(sh + 15) & 0xf] + t[3];
b = (b << S24) | (b >> (32 - S24));
b += c;
sh += 20;
}
sh = 5;
for(; sh != 5+12*4; t += 4){
a += b ^ c ^ d;
a += x[sh & 0xf] + t[0];
a = (a << S31) | (a >> (32 - S31));
a += b;
d += a ^ b ^ c;
d += x[(sh + 3) & 0xf] + t[1];
d = (d << S32) | (d >> (32 - S32));
d += a;
c += d ^ a ^ b;
c += x[(sh + 6) & 0xf] + t[2];
c = (c << S33) | (c >> (32 - S33));
c += d;
b += c ^ d ^ a;
b += x[(sh + 9) & 0xf] + t[3];
b = (b << S34) | (b >> (32 - S34));
b += c;
sh += 12;
}
sh = 0;
for(; sh != 28*4; t += 4){
a += c ^ (b | ~d);
a += x[sh & 0xf] + t[0];
a = (a << S41) | (a >> (32 - S41));
a += b;
d += b ^ (a | ~c);
d += x[(sh + 7) & 0xf] + t[1];
d = (d << S42) | (d >> (32 - S42));
d += a;
c += a ^ (d | ~b);
c += x[(sh + 14) & 0xf] + t[2];
c = (c << S43) | (c >> (32 - S43));
c += d;
b += d ^ (c | ~a);
b += x[(sh + 21) & 0xf] + t[3];
b = (b << S44) | (b >> (32 - S44));
b += c;
sh += 28;
}
s[0] += a;
s[1] += b;
s[2] += c;
s[3] += d;
}
}
/*
* decodes input (uchar) into output (u32int). Assumes len is
* a multiple of 4.
*/
static void
decode(u32int *output, uchar *input, ulong len)
{
uchar *e;
for(e = input+len; input < e; input += 4)
*output++ = input[0] | (input[1] << 8) |
(input[2] << 16) | (input[3] << 24);
}

187
posix-power/sha1block.c Normal file
View File

@ -0,0 +1,187 @@
#include "os.h"
void
_sha1block(uchar *p, ulong len, u32int *s)
{
u32int a, b, c, d, e, x;
uchar *end;
u32int *wp, *wend;
u32int w[80];
/* at this point, we have a multiple of 64 bytes */
for(end = p+len; p < end;){
a = s[0];
b = s[1];
c = s[2];
d = s[3];
e = s[4];
wend = w + 15;
for(wp = w; wp < wend; wp += 5){
wp[0] = (p[0]<<24) | (p[1]<<16) | (p[2]<<8) | p[3];
e += ((a<<5) | (a>>27)) + wp[0];
e += 0x5a827999 + (((c^d)&b)^d);
b = (b<<30)|(b>>2);
wp[1] = (p[4]<<24) | (p[5]<<16) | (p[6]<<8) | p[7];
d += ((e<<5) | (e>>27)) + wp[1];
d += 0x5a827999 + (((b^c)&a)^c);
a = (a<<30)|(a>>2);
wp[2] = (p[8]<<24) | (p[9]<<16) | (p[10]<<8) | p[11];
c += ((d<<5) | (d>>27)) + wp[2];
c += 0x5a827999 + (((a^b)&e)^b);
e = (e<<30)|(e>>2);
wp[3] = (p[12]<<24) | (p[13]<<16) | (p[14]<<8) | p[15];
b += ((c<<5) | (c>>27)) + wp[3];
b += 0x5a827999 + (((e^a)&d)^a);
d = (d<<30)|(d>>2);
wp[4] = (p[16]<<24) | (p[17]<<16) | (p[18]<<8) | p[19];
a += ((b<<5) | (b>>27)) + wp[4];
a += 0x5a827999 + (((d^e)&c)^e);
c = (c<<30)|(c>>2);
p += 20;
}
wp[0] = (p[0]<<24) | (p[1]<<16) | (p[2]<<8) | p[3];
e += ((a<<5) | (a>>27)) + wp[0];
e += 0x5a827999 + (((c^d)&b)^d);
b = (b<<30)|(b>>2);
x = wp[-2] ^ wp[-7] ^ wp[-13] ^ wp[-15];
wp[1] = (x<<1) | (x>>31);
d += ((e<<5) | (e>>27)) + wp[1];
d += 0x5a827999 + (((b^c)&a)^c);
a = (a<<30)|(a>>2);
x = wp[-1] ^ wp[-6] ^ wp[-12] ^ wp[-14];
wp[2] = (x<<1) | (x>>31);
c += ((d<<5) | (d>>27)) + wp[2];
c += 0x5a827999 + (((a^b)&e)^b);
e = (e<<30)|(e>>2);
x = wp[0] ^ wp[-5] ^ wp[-11] ^ wp[-13];
wp[3] = (x<<1) | (x>>31);
b += ((c<<5) | (c>>27)) + wp[3];
b += 0x5a827999 + (((e^a)&d)^a);
d = (d<<30)|(d>>2);
x = wp[1] ^ wp[-4] ^ wp[-10] ^ wp[-12];
wp[4] = (x<<1) | (x>>31);
a += ((b<<5) | (b>>27)) + wp[4];
a += 0x5a827999 + (((d^e)&c)^e);
c = (c<<30)|(c>>2);
wp += 5;
p += 4;
wend = w + 40;
for(; wp < wend; wp += 5){
x = wp[-3] ^ wp[-8] ^ wp[-14] ^ wp[-16];
wp[0] = (x<<1) | (x>>31);
e += ((a<<5) | (a>>27)) + wp[0];
e += 0x6ed9eba1 + (b^c^d);
b = (b<<30)|(b>>2);
x = wp[-2] ^ wp[-7] ^ wp[-13] ^ wp[-15];
wp[1] = (x<<1) | (x>>31);
d += ((e<<5) | (e>>27)) + wp[1];
d += 0x6ed9eba1 + (a^b^c);
a = (a<<30)|(a>>2);
x = wp[-1] ^ wp[-6] ^ wp[-12] ^ wp[-14];
wp[2] = (x<<1) | (x>>31);
c += ((d<<5) | (d>>27)) + wp[2];
c += 0x6ed9eba1 + (e^a^b);
e = (e<<30)|(e>>2);
x = wp[0] ^ wp[-5] ^ wp[-11] ^ wp[-13];
wp[3] = (x<<1) | (x>>31);
b += ((c<<5) | (c>>27)) + wp[3];
b += 0x6ed9eba1 + (d^e^a);
d = (d<<30)|(d>>2);
x = wp[1] ^ wp[-4] ^ wp[-10] ^ wp[-12];
wp[4] = (x<<1) | (x>>31);
a += ((b<<5) | (b>>27)) + wp[4];
a += 0x6ed9eba1 + (c^d^e);
c = (c<<30)|(c>>2);
}
wend = w + 60;
for(; wp < wend; wp += 5){
x = wp[-3] ^ wp[-8] ^ wp[-14] ^ wp[-16];
wp[0] = (x<<1) | (x>>31);
e += ((a<<5) | (a>>27)) + wp[0];
e += 0x8f1bbcdc + ((b&c)|((b|c)&d));
b = (b<<30)|(b>>2);
x = wp[-2] ^ wp[-7] ^ wp[-13] ^ wp[-15];
wp[1] = (x<<1) | (x>>31);
d += ((e<<5) | (e>>27)) + wp[1];
d += 0x8f1bbcdc + ((a&b)|((a|b)&c));
a = (a<<30)|(a>>2);
x = wp[-1] ^ wp[-6] ^ wp[-12] ^ wp[-14];
wp[2] = (x<<1) | (x>>31);
c += ((d<<5) | (d>>27)) + wp[2];
c += 0x8f1bbcdc + ((e&a)|((e|a)&b));
e = (e<<30)|(e>>2);
x = wp[0] ^ wp[-5] ^ wp[-11] ^ wp[-13];
wp[3] = (x<<1) | (x>>31);
b += ((c<<5) | (c>>27)) + wp[3];
b += 0x8f1bbcdc + ((d&e)|((d|e)&a));
d = (d<<30)|(d>>2);
x = wp[1] ^ wp[-4] ^ wp[-10] ^ wp[-12];
wp[4] = (x<<1) | (x>>31);
a += ((b<<5) | (b>>27)) + wp[4];
a += 0x8f1bbcdc + ((c&d)|((c|d)&e));
c = (c<<30)|(c>>2);
}
wend = w + 80;
for(; wp < wend; wp += 5){
x = wp[-3] ^ wp[-8] ^ wp[-14] ^ wp[-16];
wp[0] = (x<<1) | (x>>31);
e += ((a<<5) | (a>>27)) + wp[0];
e += 0xca62c1d6 + (b^c^d);
b = (b<<30)|(b>>2);
x = wp[-2] ^ wp[-7] ^ wp[-13] ^ wp[-15];
wp[1] = (x<<1) | (x>>31);
d += ((e<<5) | (e>>27)) + wp[1];
d += 0xca62c1d6 + (a^b^c);
a = (a<<30)|(a>>2);
x = wp[-1] ^ wp[-6] ^ wp[-12] ^ wp[-14];
wp[2] = (x<<1) | (x>>31);
c += ((d<<5) | (d>>27)) + wp[2];
c += 0xca62c1d6 + (e^a^b);
e = (e<<30)|(e>>2);
x = wp[0] ^ wp[-5] ^ wp[-11] ^ wp[-13];
wp[3] = (x<<1) | (x>>31);
b += ((c<<5) | (c>>27)) + wp[3];
b += 0xca62c1d6 + (d^e^a);
d = (d<<30)|(d>>2);
x = wp[1] ^ wp[-4] ^ wp[-10] ^ wp[-12];
wp[4] = (x<<1) | (x>>31);
a += ((b<<5) | (b>>27)) + wp[4];
a += 0xca62c1d6 + (c^d^e);
c = (c<<30)|(c>>2);
}
/* save state */
s[0] += a;
s[1] += b;
s[2] += c;
s[3] += d;
s[4] += e;
}
}

View File

@ -115,11 +115,22 @@
.text
.p2align 2,0x90
.globl ___md5block
___md5block:
.p2align 2,0x90
.globl __md5block
__md5block:
.p2align 2,0x90
.globl _md5block
.type _md5block, @function
_md5block:
.p2align 2,0x90
.globl md5block
md5block:
/* Prelude */
pushl %ebp
subl $(STACKSIZE), %esp

View File

@ -1,9 +1,23 @@
.text
.p2align 2,0x90
.globl ___sha1block
___sha1block:
jmp sha1block
.p2align 2,0x90
.globl __sha1block
__sha1block:
jmp sha1block
.p2align 2,0x90
.globl _sha1block
.type _sha1block, @function
_sha1block:
jmp sha1block
.p2align 2,0x90
.globl sha1block
sha1block:
/* x = (wp[off-f] ^ wp[off-8] ^ wp[off-14] ^ wp[off-16]) <<< 1;
* wp[off] = x;

23
win32-386/tas.c Normal file
View File

@ -0,0 +1,23 @@
#include "u.h"
#include "libc.h"
int
tas(long *x)
{
int v;
__asm__( "movl $1, %%eax\n\t"
"xchgl %%eax,(%%ecx)"
: "=a" (v)
: "c" (x)
);
switch(v) {
case 0:
case 1:
return v;
default:
print("canlock: corrupted 0x%lux\n", v);
return 1;
}
}