NetBSD/lib/libc/arch/mips/string/bcopy.S

293 lines
5.9 KiB
ArmAsm

/* $NetBSD: bcopy.S,v 1.8 2000/10/10 21:51:54 jeffs Exp $ */
/*
* Mach Operating System
* Copyright (c) 1993 Carnegie Mellon University
* All Rights Reserved.
*
* Permission to use, copy, modify and distribute this software and its
* documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
* ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie Mellon
* the rights to redistribute these changes.
*/
/*
* File: mips_bcopy.s
* Author: Chris Maeda
* Date: June 1993
*
* Fast copy routine. Derived from aligned_block_copy.
*/
#include <mips/asm.h>
#define _LOCORE /* XXX not really, just assembly-code source */
#include <machine/endian.h>
#if defined(LIBC_SCCS) && !defined(lint)
ASMSTR("from: @(#)mips_bcopy.s 2.2 CMU 18/06/93")
ASMSTR("$NetBSD: bcopy.S,v 1.8 2000/10/10 21:51:54 jeffs Exp $")
#endif /* LIBC_SCCS and not lint */
#ifdef ABICALLS
.abicalls
#endif
/*
* bcopy(caddr_t src, caddr_t dst, unsigned int len)
*
* a0 src address
* a1 dst address
* a2 length
*/
#if defined(MEMCOPY) || defined(MEMMOVE)
#ifdef MEMCOPY
#define FUNCTION memcpy
#else
#define FUNCTION memmove
#endif
#define SRCREG a1
#define DSTREG a0
#else
#define FUNCTION bcopy
#define SRCREG a0
#define DSTREG a1
#endif
#define SIZEREG a2
LEAF(FUNCTION)
.set noat
.set noreorder
#if defined(MEMCOPY) || defined(MEMMOVE)
/* set up return value, while we still can */
move v0,DSTREG
#endif
/*
* Make sure we can copy forwards.
*/
sltu t0,SRCREG,DSTREG # t0 == SRCREG < DSTREG
bne t0,zero,6f # copy backwards
/*
* There are four alignment cases (with frequency)
* (Based on measurements taken with a DECstation 5000/200
* inside a Mach kernel.)
*
* aligned -> aligned (mostly)
* unaligned -> aligned (sometimes)
* aligned,unaligned -> unaligned (almost never)
*
* Note that we could add another case that checks if
* the destination and source are unaligned but the
* copy is alignable. eg if src and dest are both
* on a halfword boundary.
*/
andi t1,DSTREG,3 # get last 3 bits of dest
bne t1,zero,3f
andi t0,SRCREG,3 # get last 3 bits of src
bne t0,zero,5f
/*
* Forward aligned->aligned copy, 8*4 bytes at a time.
*/
li AT,-32
and t0,SIZEREG,AT # count truncated to multiple of 32 */
addu a3,SRCREG,t0 # run fast loop up to this address
sltu AT,SRCREG,a3 # any work to do?
beq AT,zero,2f
subu SIZEREG,t0
/*
* loop body
*/
1: # cp
lw t3,0(SRCREG)
lw v1,4(SRCREG)
lw t0,8(SRCREG)
lw t1,12(SRCREG)
addu SRCREG,32
sw t3,0(DSTREG)
sw v1,4(DSTREG)
sw t0,8(DSTREG)
sw t1,12(DSTREG)
lw t1,-4(SRCREG)
lw t0,-8(SRCREG)
lw v1,-12(SRCREG)
lw t3,-16(SRCREG)
addu DSTREG,32
sw t1,-4(DSTREG)
sw t0,-8(DSTREG)
sw v1,-12(DSTREG)
bne SRCREG,a3,1b
sw t3,-16(DSTREG)
/*
* Copy a word at a time, no loop unrolling.
*/
2: # wordcopy
andi t2,SIZEREG,3 # get byte count / 4
subu t2,SIZEREG,t2 # t2 = number of words to copy * 4
beq t2,zero,3f
addu t0,SRCREG,t2 # stop at t0
subu SIZEREG,SIZEREG,t2
1:
lw t3,0(SRCREG)
addu SRCREG,4
sw t3,0(DSTREG)
bne SRCREG,t0,1b
addu DSTREG,4
3: # bytecopy
beq SIZEREG,zero,4f # nothing left to do?
nop
1:
lb t3,0(SRCREG)
addu SRCREG,1
sb t3,0(DSTREG)
subu SIZEREG,1
bgtz SIZEREG,1b
addu DSTREG,1
4: # copydone
j ra
nop
/*
* Copy from unaligned source to aligned dest.
*/
5: # destaligned
andi t0,SIZEREG,3 # t0 = bytecount mod 4
subu a3,SIZEREG,t0 # number of words to transfer
beq a3,zero,3b
nop
move SIZEREG,t0 # this many to do after we are done
addu a3,SRCREG,a3 # stop point
1:
LWHI t3,0(SRCREG)
LWLO t3,3(SRCREG)
addi SRCREG,4
sw t3,0(DSTREG)
bne SRCREG,a3,1b
addi DSTREG,4
j 3b
nop
6: # backcopy -- based on above
addu SRCREG,SIZEREG
addu DSTREG,SIZEREG
andi t1,DSTREG,3 # get last 3 bits of dest
bne t1,zero,3f
andi t0,SRCREG,3 # get last 3 bits of src
bne t0,zero,5f
/*
* Forward aligned->aligned copy, 8*4 bytes at a time.
*/
li AT,-32
and t0,SIZEREG,AT # count truncated to multiple of 32
beq t0,zero,2f # any work to do?
subu SIZEREG,t0
subu a3,SRCREG,t0
/*
* loop body
*/
1: # cp
lw t3,-16(SRCREG)
lw v1,-12(SRCREG)
lw t0,-8(SRCREG)
lw t1,-4(SRCREG)
subu SRCREG,32
sw t3,-16(DSTREG)
sw v1,-12(DSTREG)
sw t0,-8(DSTREG)
sw t1,-4(DSTREG)
lw t1,12(SRCREG)
lw t0,8(SRCREG)
lw v1,4(SRCREG)
lw t3,0(SRCREG)
subu DSTREG,32
sw t1,12(DSTREG)
sw t0,8(DSTREG)
sw v1,4(DSTREG)
bne SRCREG,a3,1b
sw t3,0(DSTREG)
/*
* Copy a word at a time, no loop unrolling.
*/
2: # wordcopy
andi t2,SIZEREG,3 # get byte count / 4
subu t2,SIZEREG,t2 # t2 = number of words to copy * 4
beq t2,zero,3f
subu t0,SRCREG,t2 # stop at t0
subu SIZEREG,SIZEREG,t2
1:
lw t3,-4(SRCREG)
subu SRCREG,4
sw t3,-4(DSTREG)
bne SRCREG,t0,1b
subu DSTREG,4
3: # bytecopy
beq SIZEREG,zero,4f # nothing left to do?
nop
1:
lb t3,-1(SRCREG)
subu SRCREG,1
sb t3,-1(DSTREG)
subu SIZEREG,1
bgtz SIZEREG,1b
subu DSTREG,1
4: # copydone
j ra
nop
/*
* Copy from unaligned source to aligned dest.
*/
5: # destaligned
andi t0,SIZEREG,3 # t0 = bytecount mod 4
subu a3,SIZEREG,t0 # number of words to transfer
beq a3,zero,3b
nop
move SIZEREG,t0 # this many to do after we are done
subu a3,SRCREG,a3 # stop point
1:
LWHI t3,-4(SRCREG)
LWLO t3,-1(SRCREG)
subu SRCREG,4
sw t3,-4(DSTREG)
bne SRCREG,a3,1b
subu DSTREG,4
j 3b
nop
.set reorder
.set at
END(FUNCTION)