Use the assembler version of memcpy().
This commit is contained in:
parent
e1e9dd27c5
commit
6e8ffbdd58
|
@ -1,10 +1,10 @@
|
|||
# $NetBSD: Makefile.inc,v 1.15 1998/02/22 06:44:44 mycroft Exp $
|
||||
# $NetBSD: Makefile.inc,v 1.16 1998/02/22 06:49:57 mycroft Exp $
|
||||
|
||||
SRCS+= __main.c __assert.c \
|
||||
imax.c imin.c lmax.c lmin.c max.c min.c ulmax.c ulmin.c \
|
||||
bswap16.S bswap32.S bswap64.c \
|
||||
bcmp.c bzero.S ffs.S \
|
||||
memchr.c memcpy.c memset.c \
|
||||
memchr.c memcpy.S memset.c \
|
||||
strcat.c strcmp.c strcpy.c strlen.c strncasecmp.c strncmp.c strncpy.c \
|
||||
scanc.c skpc.c \
|
||||
htonl.S htons.S ntohl.S ntohs.S \
|
||||
|
|
|
@ -0,0 +1,288 @@
|
|||
/* $NetBSD: bcopy.S,v 1.1 1998/02/22 06:49:58 mycroft Exp $ */
|
||||
|
||||
/*
|
||||
* Copyright (c) 1995 Carnegie-Mellon University.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Author: Trevor Blackwell. Support for use as memcpy() and memmove()
|
||||
* added by Chris Demetriou.
|
||||
*
|
||||
* Permission to use, copy, modify and distribute this software and
|
||||
* its documentation is hereby granted, provided that both the copyright
|
||||
* notice and this permission notice appear in all copies of the
|
||||
* software, derivative works or modified versions, and any portions
|
||||
* thereof, and that both notices appear in supporting documentation.
|
||||
*
|
||||
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
|
||||
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
|
||||
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
|
||||
*
|
||||
* Carnegie Mellon requests users of this software to return to
|
||||
*
|
||||
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
|
||||
* School of Computer Science
|
||||
* Carnegie Mellon University
|
||||
* Pittsburgh PA 15213-3890
|
||||
*
|
||||
* any improvements or extensions that they make and grant Carnegie the
|
||||
* rights to redistribute these changes.
|
||||
*/
|
||||
|
||||
#include <machine/asm.h>
|
||||
|
||||
#if defined(MEMCOPY) || defined(MEMMOVE)
|
||||
#ifdef MEMCOPY
|
||||
#define FUNCTION memcpy
|
||||
#else
|
||||
#define FUNCTION memmove
|
||||
#endif
|
||||
#define SRCREG a1
|
||||
#define DSTREG a0
|
||||
#else /* !(defined(MEMCOPY) || defined(MEMMOVE)) */
|
||||
#define FUNCTION bcopy
|
||||
#define SRCREG a0
|
||||
#define DSTREG a1
|
||||
#endif /* !(defined(MEMCOPY) || defined(MEMMOVE)) */
|
||||
|
||||
#define SIZEREG a2
|
||||
|
||||
/*
|
||||
* Copy bytes.
|
||||
*
|
||||
* void bcopy(char *from, char *to, size_t len);
|
||||
* char *memcpy(void *to, const void *from, size_t len);
|
||||
* char *memmove(void *to, const void *from, size_t len);
|
||||
*
|
||||
* No matter how invoked, the source and destination registers
|
||||
* for calculation. There's no point in copying them to "working"
|
||||
* registers, since the code uses their values "in place," and
|
||||
* copying them would be slower.
|
||||
*/
|
||||
|
||||
LEAF(FUNCTION,3)
|
||||
|
||||
#if defined(MEMCOPY) || defined(MEMMOVE)
|
||||
/* set up return value, while we still can */
|
||||
mov DSTREG,v0
|
||||
#endif
|
||||
|
||||
/* Check for negative length */
|
||||
ble SIZEREG,bcopy_done
|
||||
|
||||
/* Check for overlap */
|
||||
subq DSTREG,SRCREG,t5
|
||||
cmpult t5,SIZEREG,t5
|
||||
bne t5,bcopy_overlap
|
||||
|
||||
/* a3 = end address */
|
||||
addq SRCREG,SIZEREG,a3
|
||||
|
||||
/* Get the first word */
|
||||
ldq_u t2,0(SRCREG)
|
||||
|
||||
/* Do they have the same alignment? */
|
||||
xor SRCREG,DSTREG,t0
|
||||
and t0,7,t0
|
||||
and DSTREG,7,t1
|
||||
bne t0,bcopy_different_alignment
|
||||
|
||||
/* src & dst have same alignment */
|
||||
beq t1,bcopy_all_aligned
|
||||
|
||||
ldq_u t3,0(DSTREG)
|
||||
addq SIZEREG,t1,SIZEREG
|
||||
mskqh t2,SRCREG,t2
|
||||
mskql t3,SRCREG,t3
|
||||
or t2,t3,t2
|
||||
|
||||
/* Dst is 8-byte aligned */
|
||||
|
||||
bcopy_all_aligned:
|
||||
/* If less than 8 bytes,skip loop */
|
||||
subq SIZEREG,1,t0
|
||||
and SIZEREG,7,SIZEREG
|
||||
bic t0,7,t0
|
||||
beq t0,bcopy_samealign_lp_end
|
||||
|
||||
bcopy_samealign_lp:
|
||||
stq_u t2,0(DSTREG)
|
||||
addq DSTREG,8,DSTREG
|
||||
ldq_u t2,8(SRCREG)
|
||||
subq t0,8,t0
|
||||
addq SRCREG,8,SRCREG
|
||||
bne t0,bcopy_samealign_lp
|
||||
|
||||
bcopy_samealign_lp_end:
|
||||
/* If we're done, exit */
|
||||
bne SIZEREG,bcopy_small_left
|
||||
stq_u t2,0(DSTREG)
|
||||
RET
|
||||
|
||||
bcopy_small_left:
|
||||
mskql t2,SIZEREG,t4
|
||||
ldq_u t3,0(DSTREG)
|
||||
mskqh t3,SIZEREG,t3
|
||||
or t4,t3,t4
|
||||
stq_u t4,0(DSTREG)
|
||||
RET
|
||||
|
||||
bcopy_different_alignment:
|
||||
/*
|
||||
* this is the fun part
|
||||
*/
|
||||
addq SRCREG,SIZEREG,a3
|
||||
cmpule SIZEREG,8,t0
|
||||
bne t0,bcopy_da_finish
|
||||
|
||||
beq t1,bcopy_da_noentry
|
||||
|
||||
/* Do the initial partial word */
|
||||
subq zero,DSTREG,t0
|
||||
and t0,7,t0
|
||||
ldq_u t3,7(SRCREG)
|
||||
extql t2,SRCREG,t2
|
||||
extqh t3,SRCREG,t3
|
||||
or t2,t3,t5
|
||||
insql t5,DSTREG,t5
|
||||
ldq_u t6,0(DSTREG)
|
||||
mskql t6,DSTREG,t6
|
||||
or t5,t6,t5
|
||||
stq_u t5,0(DSTREG)
|
||||
addq SRCREG,t0,SRCREG
|
||||
addq DSTREG,t0,DSTREG
|
||||
subq SIZEREG,t0,SIZEREG
|
||||
ldq_u t2,0(SRCREG)
|
||||
|
||||
bcopy_da_noentry:
|
||||
subq SIZEREG,1,t0
|
||||
bic t0,7,t0
|
||||
and SIZEREG,7,SIZEREG
|
||||
beq t0,bcopy_da_finish2
|
||||
|
||||
bcopy_da_lp:
|
||||
ldq_u t3,7(SRCREG)
|
||||
addq SRCREG,8,SRCREG
|
||||
extql t2,SRCREG,t4
|
||||
extqh t3,SRCREG,t5
|
||||
subq t0,8,t0
|
||||
or t4,t5,t5
|
||||
stq t5,0(DSTREG)
|
||||
addq DSTREG,8,DSTREG
|
||||
beq t0,bcopy_da_finish1
|
||||
ldq_u t2,7(SRCREG)
|
||||
addq SRCREG,8,SRCREG
|
||||
extql t3,SRCREG,t4
|
||||
extqh t2,SRCREG,t5
|
||||
subq t0,8,t0
|
||||
or t4,t5,t5
|
||||
stq t5,0(DSTREG)
|
||||
addq DSTREG,8,DSTREG
|
||||
bne t0,bcopy_da_lp
|
||||
|
||||
bcopy_da_finish2:
|
||||
/* Do the last new word */
|
||||
mov t2,t3
|
||||
|
||||
bcopy_da_finish1:
|
||||
/* Do the last partial word */
|
||||
ldq_u t2,-1(a3)
|
||||
extql t3,SRCREG,t3
|
||||
extqh t2,SRCREG,t2
|
||||
or t2,t3,t2
|
||||
br zero,bcopy_samealign_lp_end
|
||||
|
||||
bcopy_da_finish:
|
||||
/* Do the last word in the next source word */
|
||||
ldq_u t3,-1(a3)
|
||||
extql t2,SRCREG,t2
|
||||
extqh t3,SRCREG,t3
|
||||
or t2,t3,t2
|
||||
insqh t2,DSTREG,t3
|
||||
insql t2,DSTREG,t2
|
||||
lda t4,-1(zero)
|
||||
mskql t4,SIZEREG,t5
|
||||
cmovne t5,t5,t4
|
||||
insqh t4,DSTREG,t5
|
||||
insql t4,DSTREG,t4
|
||||
addq DSTREG,SIZEREG,a4
|
||||
ldq_u t6,0(DSTREG)
|
||||
ldq_u t7,-1(a4)
|
||||
bic t6,t4,t6
|
||||
bic t7,t5,t7
|
||||
and t2,t4,t2
|
||||
and t3,t5,t3
|
||||
or t2,t6,t2
|
||||
or t3,t7,t3
|
||||
stq_u t3,-1(a4)
|
||||
stq_u t2,0(DSTREG)
|
||||
RET
|
||||
|
||||
bcopy_overlap:
|
||||
/*
|
||||
* Basically equivalent to previous case, only backwards.
|
||||
* Not quite as highly optimized
|
||||
*/
|
||||
addq SRCREG,SIZEREG,a3
|
||||
addq DSTREG,SIZEREG,a4
|
||||
|
||||
/* less than 8 bytes - don't worry about overlap */
|
||||
cmpule SIZEREG,8,t0
|
||||
bne t0,bcopy_ov_short
|
||||
|
||||
/* Possibly do a partial first word */
|
||||
and a4,7,t4
|
||||
beq t4,bcopy_ov_nostart2
|
||||
subq a3,t4,a3
|
||||
subq a4,t4,a4
|
||||
ldq_u t1,0(a3)
|
||||
subq SIZEREG,t4,SIZEREG
|
||||
ldq_u t2,7(a3)
|
||||
ldq t3,0(a4)
|
||||
extql t1,a3,t1
|
||||
extqh t2,a3,t2
|
||||
or t1,t2,t1
|
||||
mskqh t3,t4,t3
|
||||
mskql t1,t4,t1
|
||||
or t1,t3,t1
|
||||
stq t1,0(a4)
|
||||
|
||||
bcopy_ov_nostart2:
|
||||
bic SIZEREG,7,t4
|
||||
and SIZEREG,7,SIZEREG
|
||||
beq t4,bcopy_ov_lp_end
|
||||
|
||||
bcopy_ov_lp:
|
||||
/* This could be more pipelined, but it doesn't seem worth it */
|
||||
ldq_u t0,-8(a3)
|
||||
subq a4,8,a4
|
||||
ldq_u t1,-1(a3)
|
||||
subq a3,8,a3
|
||||
extql t0,a3,t0
|
||||
extqh t1,a3,t1
|
||||
subq t4,8,t4
|
||||
or t0,t1,t0
|
||||
stq t0,0(a4)
|
||||
bne t4,bcopy_ov_lp
|
||||
|
||||
bcopy_ov_lp_end:
|
||||
beq SIZEREG,bcopy_done
|
||||
|
||||
ldq_u t0,0(SRCREG)
|
||||
ldq_u t1,7(SRCREG)
|
||||
ldq_u t2,0(DSTREG)
|
||||
extql t0,SRCREG,t0
|
||||
extqh t1,SRCREG,t1
|
||||
or t0,t1,t0
|
||||
insql t0,DSTREG,t0
|
||||
mskql t2,DSTREG,t2
|
||||
or t2,t0,t2
|
||||
stq_u t2,0(DSTREG)
|
||||
|
||||
bcopy_done:
|
||||
RET
|
||||
|
||||
bcopy_ov_short:
|
||||
ldq_u t2,0(SRCREG)
|
||||
br zero,bcopy_da_finish
|
||||
|
||||
END(FUNCTION)
|
|
@ -0,0 +1,4 @@
|
|||
/* $NetBSD: memcpy.S,v 1.1 1998/02/22 06:49:58 mycroft Exp $ */
|
||||
|
||||
#define MEMCOPY
|
||||
#include "bcopy.S"
|
Loading…
Reference in New Issue