From 9cc0f333456619e05fa4cd6d18514f65fe153b52 Mon Sep 17 00:00:00 2001
From: chris <chris@NetBSD.org>
Date: Tue, 20 Nov 2001 00:29:19 +0000
Subject: [PATCH] Tidy up some memory copying cruft: bcopy.S is no longer
 needed memmove and memcpy were both stacking r0 and unstacking it to keep the
 return value, so push this down into _memcpy. rename _memcpy.S to memcpy.S.
 memmove.S is now just a placeholder otherwise the make system automagically
 adds a memmove.c file to libkern. memmove is just another entry point for
 memcpy.

---
 sys/lib/libkern/arch/arm/Makefile.inc |   4 +-
 sys/lib/libkern/arch/arm/_memcpy.S    | 481 --------------------------
 sys/lib/libkern/arch/arm/bcopy.S      |  48 ---
 sys/lib/libkern/arch/arm/memcpy.S     | 459 +++++++++++++++++++++++-
 sys/lib/libkern/arch/arm/memmove.S    |  49 +--
 5 files changed, 457 insertions(+), 584 deletions(-)
 delete mode 100644 sys/lib/libkern/arch/arm/_memcpy.S
 delete mode 100644 sys/lib/libkern/arch/arm/bcopy.S

diff --git a/sys/lib/libkern/arch/arm/Makefile.inc b/sys/lib/libkern/arch/arm/Makefile.inc
index 9970ea591f15..6d5800aa1967 100644
--- a/sys/lib/libkern/arch/arm/Makefile.inc
+++ b/sys/lib/libkern/arch/arm/Makefile.inc
@@ -1,4 +1,4 @@
-#	$NetBSD: Makefile.inc,v 1.2 2001/07/06 17:35:44 toshii Exp $
+#	$NetBSD: Makefile.inc,v 1.3 2001/11/20 00:29:19 chris Exp $
 
 SRCS+=	__assert.c __main.c bswap64.c byte_swap_2.S byte_swap_4.S \
 	ffs.c imax.c imin.c lmax.c lmin.c max.c min.c random.c scanc.c \
@@ -7,4 +7,4 @@ SRCS+=	__assert.c __main.c bswap64.c byte_swap_2.S byte_swap_4.S \
 	strncpy.c strtoul.c ulmax.c ulmin.c
 
 SRCS+=	divsi3.S
-SRCS+=	memchr.c memcmp.c _memcpy.S memcpy.S memmove.S memset.S
+SRCS+=	memchr.c memcmp.c memcpy.S memset.S memmove.S
diff --git a/sys/lib/libkern/arch/arm/_memcpy.S b/sys/lib/libkern/arch/arm/_memcpy.S
deleted file mode 100644
index 9a6b4e5e33fa..000000000000
--- a/sys/lib/libkern/arch/arm/_memcpy.S
+++ /dev/null
@@ -1,481 +0,0 @@
-/*	$NetBSD: _memcpy.S,v 1.2 2001/01/23 19:12:29 bjh21 Exp $	*/
-
-/*-
- * Copyright (c) 1997 The NetBSD Foundation, Inc.
- * All rights reserved.
- *
- * This code is derived from software contributed to The NetBSD Foundation
- * by Neil A. Carson and Mark Brinicombe
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- *    must display the following acknowledgement:
- *        This product includes software developed by the NetBSD
- *        Foundation, Inc. and its contributors.
- * 4. Neither the name of The NetBSD Foundation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
- * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
- * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
- * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <machine/asm.h>
-
-/*
- * This is one fun bit of code ...
- * Some easy listening music is suggested while trying to understand this
- * code e.g. Iron Maiden
- *
- * For anyone attempting to understand it :
- *
- * The core code is implemented here with simple stubs for memcpy()
- * memmove() and bcopy().
- *
- * All local labels are prefixed with Lmemcpy_
- * Following the prefix a label starting f is used in the forward copy code
- * while a label using b is used in the backwards copy code
- * The source and destination addresses determine whether a forward or
- * backward copy is performed.
- * Separate bits of code are used to deal with the following situations
- * for both the forward and backwards copy.
- * unaligned source address
- * unaligned destination address
- * Separate copy routines are used to produce an optimised result for each
- * of these cases.
- * The copy code will use LDM/STM instructions to copy up to 32 bytes at
- * a time where possible.
- *
- * Note: r12 (aka ip) can be trashed during the function along with
- * r0-r3 although r0-r2 have defined uses i.e. src, dest, len through out.
- * Additional registers are preserved prior to use i.e. r4, r5 & lr
- *
- * Apologies for the state of the comments ;-)
- */
-
-ENTRY(_memcpy)
-	/* Determine copy direction */
-	cmp	r1, r0
-	bcc	Lmemcpy_backwards
-
-	moveq	r0, #0			/* Quick abort for len=0 */
-#ifdef __APCS_26__
-	moveqs	pc, lr
-#else
-	moveq	pc, lr
-#endif
-
-	stmdb	sp!, {r0, lr}		/* memcpy() returns dest addr */
-	subs	r2, r2, #4
-	blt	Lmemcpy_fl4		/* less than 4 bytes */
-	ands	r12, r0, #3
-	bne	Lmemcpy_fdestul		/* oh unaligned destination addr */
-	ands	r12, r1, #3
-	bne	Lmemcpy_fsrcul		/* oh unaligned source addr */
-
-Lmemcpy_ft8:
-	/* We have aligned source and destination */
-	subs	r2, r2, #8
-	blt	Lmemcpy_fl12		/* less than 12 bytes (4 from above) */
-	subs	r2, r2, #0x14         
-	blt	Lmemcpy_fl32		/* less than 32 bytes (12 from above) */
-	stmdb	sp!, {r4}		/* borrow r4 */
-
-	/* blat 32 bytes at a time */
-	/* XXX for really big copies perhaps we should use more registers */
-Lmemcpy_floop32:	
-	ldmia	r1!, {r3, r4, r12, lr}
-	stmia	r0!, {r3, r4, r12, lr}
-	ldmia	r1!, {r3, r4, r12, lr}
-	stmia	r0!, {r3, r4, r12, lr}
-	subs	r2, r2, #0x20         
-	bge	Lmemcpy_floop32
-
-	cmn	r2, #0x10
-	ldmgeia	r1!, {r3, r4, r12, lr}	/* blat a remaining 16 bytes */
-	stmgeia	r0!, {r3, r4, r12, lr}
-	subge	r2, r2, #0x10         
-	ldmia	sp!, {r4}		/* return r4 */
-
-Lmemcpy_fl32:
-	adds	r2, r2, #0x14         
-
-	/* blat 12 bytes at a time */
-Lmemcpy_floop12:
-	ldmgeia	r1!, {r3, r12, lr}
-	stmgeia	r0!, {r3, r12, lr}
-	subges	r2, r2, #0x0c         
-	bge	Lmemcpy_floop12
-
-Lmemcpy_fl12:
-	adds	r2, r2, #8
-	blt	Lmemcpy_fl4
-
-	subs	r2, r2, #4
-	ldrlt	r3, [r1], #4
-	strlt	r3, [r0], #4
-	ldmgeia	r1!, {r3, r12}
-	stmgeia	r0!, {r3, r12}
-	subge	r2, r2, #4
-
-Lmemcpy_fl4:
-	/* less than 4 bytes to go */
-	adds	r2, r2, #4
-	ldmeqia	sp!, {r0, pc}		/* done */
-
-	/* copy the crud byte at a time */
-	cmp	r2, #2
-	ldrb	r3, [r1], #1
-	strb	r3, [r0], #1
-	ldrgeb	r3, [r1], #1
-	strgeb	r3, [r0], #1
-	ldrgtb	r3, [r1], #1
-	strgtb	r3, [r0], #1
-#ifdef __APCS_26__
-	ldmia	sp!, {r0, pc}^
-#else
-	ldmia	sp!, {r0, pc}
-#endif
-
-	/* erg - unaligned destination */
-Lmemcpy_fdestul:
-	rsb	r12, r12, #4
-	cmp	r12, #2
-
-	/* align destination with byte copies */
-	ldrb	r3, [r1], #1
-	strb	r3, [r0], #1
-	ldrgeb	r3, [r1], #1
-	strgeb	r3, [r0], #1
-	ldrgtb	r3, [r1], #1
-	strgtb	r3, [r0], #1
-	subs	r2, r2, r12
-	blt	Lmemcpy_fl4		/* less the 4 bytes */
-
-	ands	r12, r1, #3
-	beq	Lmemcpy_ft8		/* we have an aligned source */
-
-	/* erg - unaligned source */
-	/* This is where it gets nasty ... */
-Lmemcpy_fsrcul:
-	bic	r1, r1, #3
-	ldr	lr, [r1], #4
-	cmp	r12, #2
-	bgt	Lmemcpy_fsrcul3
-	beq	Lmemcpy_fsrcul2
-	cmp	r2, #0x0c            
-	blt	Lmemcpy_fsrcul1loop4
-	sub	r2, r2, #0x0c         
-	stmdb	sp!, {r4, r5}
-
-Lmemcpy_fsrcul1loop16:
-	mov	r3, lr, lsr #8
-	ldmia	r1!, {r4, r5, r12, lr}
-	orr	r3, r3, r4, lsl #24
-	mov	r4, r4, lsr #8
-	orr	r4, r4, r5, lsl #24
-	mov	r5, r5, lsr #8
-	orr	r5, r5, r12, lsl #24
-	mov	r12, r12, lsr #8
-	orr	r12, r12, lr, lsl #24
-	stmia	r0!, {r3-r5, r12}
-	subs	r2, r2, #0x10         
-	bge	Lmemcpy_fsrcul1loop16
-	ldmia	sp!, {r4, r5}
-	adds	r2, r2, #0x0c         
-	blt	Lmemcpy_fsrcul1l4
-
-Lmemcpy_fsrcul1loop4:
-	mov	r12, lr, lsr #8
-	ldr	lr, [r1], #4
-	orr	r12, r12, lr, lsl #24
-	str	r12, [r0], #4
-	subs	r2, r2, #4
-	bge	Lmemcpy_fsrcul1loop4
-
-Lmemcpy_fsrcul1l4:
-	sub	r1, r1, #3
-	b	Lmemcpy_fl4
-
-Lmemcpy_fsrcul2:
-	cmp	r2, #0x0c            
-	blt	Lmemcpy_fsrcul2loop4
-	sub	r2, r2, #0x0c         
-	stmdb	sp!, {r4, r5}
-
-Lmemcpy_fsrcul2loop16:
-	mov	r3, lr, lsr #16
-	ldmia	r1!, {r4, r5, r12, lr}
-	orr	r3, r3, r4, lsl #16
-	mov	r4, r4, lsr #16
-	orr	r4, r4, r5, lsl #16
-	mov	r5, r5, lsr #16
-	orr	r5, r5, r12, lsl #16
-	mov	r12, r12, lsr #16
-	orr	r12, r12, lr, lsl #16
-	stmia	r0!, {r3-r5, r12}
-	subs	r2, r2, #0x10         
-	bge	Lmemcpy_fsrcul2loop16
-	ldmia	sp!, {r4, r5}
-	adds	r2, r2, #0x0c         
-	blt	Lmemcpy_fsrcul2l4
-
-Lmemcpy_fsrcul2loop4:
-	mov	r12, lr, lsr #16
-	ldr	lr, [r1], #4
-	orr	r12, r12, lr, lsl #16
-	str	r12, [r0], #4
-	subs	r2, r2, #4
-	bge	Lmemcpy_fsrcul2loop4
-
-Lmemcpy_fsrcul2l4:
-	sub	r1, r1, #2
-	b	Lmemcpy_fl4
-
-Lmemcpy_fsrcul3:
-	cmp	r2, #0x0c            
-	blt	Lmemcpy_fsrcul3loop4
-	sub	r2, r2, #0x0c         
-	stmdb	sp!, {r4, r5}
-
-Lmemcpy_fsrcul3loop16:
-	mov	r3, lr, lsr #24
-	ldmia	r1!, {r4, r5, r12, lr}
-	orr	r3, r3, r4, lsl #8
-	mov	r4, r4, lsr #24
-	orr	r4, r4, r5, lsl #8
-	mov	r5, r5, lsr #24
-	orr	r5, r5, r12, lsl #8
-	mov	r12, r12, lsr #24
-	orr	r12, r12, lr, lsl #8
-	stmia	r0!, {r3-r5, r12}
-	subs	r2, r2, #0x10         
-	bge	Lmemcpy_fsrcul3loop16
-	ldmia	sp!, {r4, r5}
-	adds	r2, r2, #0x0c         
-	blt	Lmemcpy_fsrcul3l4
-
-Lmemcpy_fsrcul3loop4:
-	mov	r12, lr, lsr #24
-	ldr	lr, [r1], #4
-	orr	r12, r12, lr, lsl #8
-	str	r12, [r0], #4
-	subs	r2, r2, #4
-	bge	Lmemcpy_fsrcul3loop4
-
-Lmemcpy_fsrcul3l4:
-	sub	r1, r1, #1
-	b	Lmemcpy_fl4
-
-Lmemcpy_backwards:
-	add	r1, r1, r2
-	add	r0, r0, r2
-	subs	r2, r2, #4
-	blt	Lmemcpy_bl4		/* less than 4 bytes */
-	ands	r12, r0, #3
-	bne	Lmemcpy_bdestul		/* oh unaligned destination addr */
-	ands	r12, r1, #3
-	bne	Lmemcpy_bsrcul		/* oh unaligned source addr */
-
-Lmemcpy_bt8:
-	/* We have aligned source and destination */
-	subs	r2, r2, #8
-	blt	Lmemcpy_bl12		/* less than 12 bytes (4 from above) */
-	stmdb	sp!, {r4, lr}
-	subs	r2, r2, #0x14		/* less than 32 bytes (12 from above) */
-	blt	Lmemcpy_bl32
-
-	/* blat 32 bytes at a time */
-	/* XXX for really big copies perhaps we should use more registers */
-Lmemcpy_bloop32:
-	ldmdb	r1!, {r3, r4, r12, lr}
-	stmdb	r0!, {r3, r4, r12, lr}
-	ldmdb	r1!, {r3, r4, r12, lr}
-	stmdb	r0!, {r3, r4, r12, lr}
-	subs	r2, r2, #0x20         
-	bge	Lmemcpy_bloop32
-
-Lmemcpy_bl32:
-	cmn	r2, #0x10            
-	ldmgedb	r1!, {r3, r4, r12, lr}	/* blat a remaining 16 bytes */
-	stmgedb	r0!, {r3, r4, r12, lr}
-	subge	r2, r2, #0x10         
-	adds	r2, r2, #0x14         
-	ldmgedb	r1!, {r3, r12, lr}	/* blat a remaining 12 bytes */
-	stmgedb	r0!, {r3, r12, lr}
-	subge	r2, r2, #0x0c         
-	ldmia	sp!, {r4, lr}
-
-Lmemcpy_bl12:
-	adds	r2, r2, #8
-	blt	Lmemcpy_bl4
-	subs	r2, r2, #4
-	ldrlt	r3, [r1, #-4]!
-	strlt	r3, [r0, #-4]!
-	ldmgedb	r1!, {r3, r12}
-	stmgedb	r0!, {r3, r12}
-	subge	r2, r2, #4
-
-Lmemcpy_bl4:
-	/* less than 4 bytes to go */
-	adds	r2, r2, #4
-	moveq	pc, lr			/* done */
-
-	/* copy the crud byte at a time */
-	cmp	r2, #2
-	ldrb	r3, [r1, #-1]!
-	strb	r3, [r0, #-1]!
-	ldrgeb	r3, [r1, #-1]!
-	strgeb	r3, [r0, #-1]!
-	ldrgtb	r3, [r1, #-1]!
-	strgtb	r3, [r0, #-1]!
-#ifdef __APCS_26__
-	movs	pc, lr
-#else
-	mov	pc, lr
-#endif
-
-	/* erg - unaligned destination */
-Lmemcpy_bdestul:
-	cmp	r12, #2
-
-	/* align destination with byte copies */
-	ldrb	r3, [r1, #-1]!
-	strb	r3, [r0, #-1]!
-	ldrgeb	r3, [r1, #-1]!
-	strgeb	r3, [r0, #-1]!
-	ldrgtb	r3, [r1, #-1]!
-	strgtb	r3, [r0, #-1]!
-	subs	r2, r2, r12
-	blt	Lmemcpy_bl4		/* less than 4 bytes to go */
-	ands	r12, r1, #3
-	beq	Lmemcpy_bt8		/* we have an aligned source */
-
-	/* erg - unaligned source */
-	/* This is where it gets nasty ... */
-Lmemcpy_bsrcul:
-	bic	r1, r1, #3
-	ldr	r3, [r1, #0]
-	cmp	r12, #2
-	blt	Lmemcpy_bsrcul1
-	beq	Lmemcpy_bsrcul2
-	cmp	r2, #0x0c            
-	blt	Lmemcpy_bsrcul3loop4
-	sub	r2, r2, #0x0c         
-	stmdb	sp!, {r4, r5, lr}
-
-Lmemcpy_bsrcul3loop16:
-	mov	lr, r3, lsl #8
-	ldmdb	r1!, {r3-r5, r12}
-	orr	lr, lr, r12, lsr #24
-	mov	r12, r12, lsl #8
-	orr	r12, r12, r5, lsr #24
-	mov	r5, r5, lsl #8
-	orr	r5, r5, r4, lsr #24
-	mov	r4, r4, lsl #8
-	orr	r4, r4, r3, lsr #24
-	stmdb	r0!, {r4, r5, r12, lr}
-	subs	r2, r2, #0x10         
-	bge	Lmemcpy_bsrcul3loop16
-	ldmia	sp!, {r4, r5, lr}
-	adds	r2, r2, #0x0c         
-	blt	Lmemcpy_bsrcul3l4
-
-Lmemcpy_bsrcul3loop4:
-	mov	r12, r3, lsl #8
-	ldr	r3, [r1, #-4]!
-	orr	r12, r12, r3, lsr #24
-	str	r12, [r0, #-4]!
-	subs	r2, r2, #4
-	bge	Lmemcpy_bsrcul3loop4
-
-Lmemcpy_bsrcul3l4:
-	add	r1, r1, #3
-	b	Lmemcpy_bl4
-
-Lmemcpy_bsrcul2:
-	cmp	r2, #0x0c            
-	blt	Lmemcpy_bsrcul2loop4
-	sub	r2, r2, #0x0c         
-	stmdb	sp!, {r4, r5, lr}
-
-Lmemcpy_bsrcul2loop16:
-	mov	lr, r3, lsl #16
-	ldmdb	r1!, {r3-r5, r12}
-	orr	lr, lr, r12, lsr #16
-	mov	r12, r12, lsl #16
-	orr	r12, r12, r5, lsr #16
-	mov	r5, r5, lsl #16
-	orr	r5, r5, r4, lsr #16
-	mov	r4, r4, lsl #16
-	orr	r4, r4, r3, lsr #16
-	stmdb	r0!, {r4, r5, r12, lr}
-	subs	r2, r2, #0x10         
-	bge	Lmemcpy_bsrcul2loop16
-	ldmia	sp!, {r4, r5, lr}
-	adds	r2, r2, #0x0c         
-	blt	Lmemcpy_bsrcul2l4
-
-Lmemcpy_bsrcul2loop4:
-	mov	r12, r3, lsl #16
-	ldr	r3, [r1, #-4]!
-	orr	r12, r12, r3, lsr #16
-	str	r12, [r0, #-4]!
-	subs	r2, r2, #4
-	bge	Lmemcpy_bsrcul2loop4
-
-Lmemcpy_bsrcul2l4:
-	add	r1, r1, #2
-	b	Lmemcpy_bl4
-
-Lmemcpy_bsrcul1:
-	cmp	r2, #0x0c            
-	blt	Lmemcpy_bsrcul1loop4
-	sub	r2, r2, #0x0c         
-	stmdb	sp!, {r4, r5, lr}
-
-Lmemcpy_bsrcul1loop32:
-	mov	lr, r3, lsl #24
-	ldmdb	r1!, {r3-r5, r12}
-	orr	lr, lr, r12, lsr #8
-	mov	r12, r12, lsl #24
-	orr	r12, r12, r5, lsr #8
-	mov	r5, r5, lsl #24
-	orr	r5, r5, r4, lsr #8
-	mov	r4, r4, lsl #24
-	orr	r4, r4, r3, lsr #8
-	stmdb	r0!, {r4, r5, r12, lr}
-	subs	r2, r2, #0x10         
-	bge	Lmemcpy_bsrcul1loop32
-	ldmia	sp!, {r4, r5, lr}
-	adds	r2, r2, #0x0c         
-	blt	Lmemcpy_bsrcul1l4
-
-Lmemcpy_bsrcul1loop4:
-	mov	r12, r3, lsl #24
-	ldr	r3, [r1, #-4]!
-	orr	r12, r12, r3, lsr #8
-	str	r12, [r0, #-4]!
-	subs	r2, r2, #4
-	bge	Lmemcpy_bsrcul1loop4
-
-Lmemcpy_bsrcul1l4:
-	add	r1, r1, #1
-	b	Lmemcpy_bl4
-	
diff --git a/sys/lib/libkern/arch/arm/bcopy.S b/sys/lib/libkern/arch/arm/bcopy.S
deleted file mode 100644
index 2ea46f6bb203..000000000000
--- a/sys/lib/libkern/arch/arm/bcopy.S
+++ /dev/null
@@ -1,48 +0,0 @@
-/*	$NetBSD: bcopy.S,v 1.1 2000/12/29 20:51:56 bjh21 Exp $	*/
-
-/*-
- * Copyright (c) 1997 The NetBSD Foundation, Inc.
- * All rights reserved.
- *
- * This code is derived from software contributed to The NetBSD Foundation
- * by Neil A. Carson and Mark Brinicombe
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- *    must display the following acknowledgement:
- *	This product includes software developed by the NetBSD
- *	Foundation, Inc. and its contributors.
- * 4. Neither the name of The NetBSD Foundation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
- * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
- * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
- * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <machine/asm.h>
-
-/* bcopy = memcpy/memmove with arguments reversed. */
-
-ENTRY(bcopy)
-	/* switch the source and destination registers */
-	eor     r0, r1, r0 
-	eor     r1, r0, r1 
-	eor     r0, r1, r0 
-	b	_C_LABEL(_memcpy)
diff --git a/sys/lib/libkern/arch/arm/memcpy.S b/sys/lib/libkern/arch/arm/memcpy.S
index 7f64e4a64118..1e9552612dec 100644
--- a/sys/lib/libkern/arch/arm/memcpy.S
+++ b/sys/lib/libkern/arch/arm/memcpy.S
@@ -1,4 +1,4 @@
-/*	$NetBSD: memcpy.S,v 1.1 2000/12/29 20:51:57 bjh21 Exp $	*/
+/*	$NetBSD: memcpy.S,v 1.2 2001/11/20 00:29:20 chris Exp $	*/
 
 /*-
  * Copyright (c) 1997 The NetBSD Foundation, Inc.
@@ -17,8 +17,8 @@
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
- *	This product includes software developed by the NetBSD
- *	Foundation, Inc. and its contributors.
+ *        This product includes software developed by the NetBSD
+ *        Foundation, Inc. and its contributors.
  * 4. Neither the name of The NetBSD Foundation nor the names of its
  *    contributors may be used to endorse or promote products derived
  *    from this software without specific prior written permission.
@@ -38,11 +38,456 @@
 
 #include <machine/asm.h>
 
+/*
+ * This is one fun bit of code ...
+ * Some easy listening music is suggested while trying to understand this
+ * code e.g. Iron Maiden
+ *
+ * For anyone attempting to understand it :
+ *
+ * The core code is implemented here with simple stubs for memcpy()
+ * memmove() and bcopy().
+ *
+ * All local labels are prefixed with Lmemcpy_
+ * Following the prefix a label starting f is used in the forward copy code
+ * while a label using b is used in the backwards copy code
+ * The source and destination addresses determine whether a forward or
+ * backward copy is performed.
+ * Separate bits of code are used to deal with the following situations
+ * for both the forward and backwards copy.
+ * unaligned source address
+ * unaligned destination address
+ * Separate copy routines are used to produce an optimised result for each
+ * of these cases.
+ * The copy code will use LDM/STM instructions to copy up to 32 bytes at
+ * a time where possible.
+ *
+ * Note: r12 (aka ip) can be trashed during the function along with
+ * r0-r3 although r0-r2 have defined uses i.e. src, dest, len through out.
+ * Additional registers are preserved prior to use i.e. r4, r5 & lr
+ *
+ * Apologies for the state of the comments ;-)
+ */
+
 ENTRY(memcpy)
-	stmfd	sp!, {r0, lr}
-	bl	_C_LABEL(_memcpy)
+ENTRY_NP(memmove)
+	/* Determine copy direction */
+	cmp	r1, r0
+
+	moveq	r0, #0			/* Quick abort for len=0 */
 #ifdef __APCS_26__
-	ldmfd	sp!, {r0, pc}^
+	moveqs	pc, lr
 #else
-	ldmfd	sp!, {r0, pc}
+	moveq	pc, lr
 #endif
+
+	/* save leaf functions having to store this away */
+	stmdb	sp!, {r0, lr}		/* memcpy() returns dest addr */
+
+	bcc	Lmemcpy_backwards
+
+	/* start of forwards copy */	
+	subs	r2, r2, #4
+	blt	Lmemcpy_fl4		/* less than 4 bytes */
+	ands	r12, r0, #3
+	bne	Lmemcpy_fdestul		/* oh unaligned destination addr */
+	ands	r12, r1, #3
+	bne	Lmemcpy_fsrcul		/* oh unaligned source addr */
+
+Lmemcpy_ft8:
+	/* We have aligned source and destination */
+	subs	r2, r2, #8
+	blt	Lmemcpy_fl12		/* less than 12 bytes (4 from above) */
+	subs	r2, r2, #0x14         
+	blt	Lmemcpy_fl32		/* less than 32 bytes (12 from above) */
+	stmdb	sp!, {r4}		/* borrow r4 */
+
+	/* blat 32 bytes at a time */
+	/* XXX for really big copies perhaps we should use more registers */
+Lmemcpy_floop32:	
+	ldmia	r1!, {r3, r4, r12, lr}
+	stmia	r0!, {r3, r4, r12, lr}
+	ldmia	r1!, {r3, r4, r12, lr}
+	stmia	r0!, {r3, r4, r12, lr}
+	subs	r2, r2, #0x20         
+	bge	Lmemcpy_floop32
+
+	cmn	r2, #0x10
+	ldmgeia	r1!, {r3, r4, r12, lr}	/* blat a remaining 16 bytes */
+	stmgeia	r0!, {r3, r4, r12, lr}
+	subge	r2, r2, #0x10         
+	ldmia	sp!, {r4}		/* return r4 */
+
+Lmemcpy_fl32:
+	adds	r2, r2, #0x14         
+
+	/* blat 12 bytes at a time */
+Lmemcpy_floop12:
+	ldmgeia	r1!, {r3, r12, lr}
+	stmgeia	r0!, {r3, r12, lr}
+	subges	r2, r2, #0x0c         
+	bge	Lmemcpy_floop12
+
+Lmemcpy_fl12:
+	adds	r2, r2, #8
+	blt	Lmemcpy_fl4
+
+	subs	r2, r2, #4
+	ldrlt	r3, [r1], #4
+	strlt	r3, [r0], #4
+	ldmgeia	r1!, {r3, r12}
+	stmgeia	r0!, {r3, r12}
+	subge	r2, r2, #4
+
+Lmemcpy_fl4:
+	/* less than 4 bytes to go */
+	adds	r2, r2, #4
+#ifdef __APCS_26_
+	ldmeqia sp!, {r0, pc}^		/* done */
+#else
+	ldmeqia	sp!, {r0, pc}		/* done */
+#endif
+	/* copy the crud byte at a time */
+	cmp	r2, #2
+	ldrb	r3, [r1], #1
+	strb	r3, [r0], #1
+	ldrgeb	r3, [r1], #1
+	strgeb	r3, [r0], #1
+	ldrgtb	r3, [r1], #1
+	strgtb	r3, [r0], #1
+#ifdef __APCS_26__
+	ldmia	sp!, {r0, pc}^
+#else
+	ldmia	sp!, {r0, pc}
+#endif
+
+	/* erg - unaligned destination */
+Lmemcpy_fdestul:
+	rsb	r12, r12, #4
+	cmp	r12, #2
+
+	/* align destination with byte copies */
+	ldrb	r3, [r1], #1
+	strb	r3, [r0], #1
+	ldrgeb	r3, [r1], #1
+	strgeb	r3, [r0], #1
+	ldrgtb	r3, [r1], #1
+	strgtb	r3, [r0], #1
+	subs	r2, r2, r12
+	blt	Lmemcpy_fl4		/* less the 4 bytes */
+
+	ands	r12, r1, #3
+	beq	Lmemcpy_ft8		/* we have an aligned source */
+
+	/* erg - unaligned source */
+	/* This is where it gets nasty ... */
+Lmemcpy_fsrcul:
+	bic	r1, r1, #3
+	ldr	lr, [r1], #4
+	cmp	r12, #2
+	bgt	Lmemcpy_fsrcul3
+	beq	Lmemcpy_fsrcul2
+	cmp	r2, #0x0c            
+	blt	Lmemcpy_fsrcul1loop4
+	sub	r2, r2, #0x0c         
+	stmdb	sp!, {r4, r5}
+
+Lmemcpy_fsrcul1loop16:
+	mov	r3, lr, lsr #8
+	ldmia	r1!, {r4, r5, r12, lr}
+	orr	r3, r3, r4, lsl #24
+	mov	r4, r4, lsr #8
+	orr	r4, r4, r5, lsl #24
+	mov	r5, r5, lsr #8
+	orr	r5, r5, r12, lsl #24
+	mov	r12, r12, lsr #8
+	orr	r12, r12, lr, lsl #24
+	stmia	r0!, {r3-r5, r12}
+	subs	r2, r2, #0x10         
+	bge	Lmemcpy_fsrcul1loop16
+	ldmia	sp!, {r4, r5}
+	adds	r2, r2, #0x0c         
+	blt	Lmemcpy_fsrcul1l4
+
+Lmemcpy_fsrcul1loop4:
+	mov	r12, lr, lsr #8
+	ldr	lr, [r1], #4
+	orr	r12, r12, lr, lsl #24
+	str	r12, [r0], #4
+	subs	r2, r2, #4
+	bge	Lmemcpy_fsrcul1loop4
+
+Lmemcpy_fsrcul1l4:
+	sub	r1, r1, #3
+	b	Lmemcpy_fl4
+
+Lmemcpy_fsrcul2:
+	cmp	r2, #0x0c            
+	blt	Lmemcpy_fsrcul2loop4
+	sub	r2, r2, #0x0c         
+	stmdb	sp!, {r4, r5}
+
+Lmemcpy_fsrcul2loop16:
+	mov	r3, lr, lsr #16
+	ldmia	r1!, {r4, r5, r12, lr}
+	orr	r3, r3, r4, lsl #16
+	mov	r4, r4, lsr #16
+	orr	r4, r4, r5, lsl #16
+	mov	r5, r5, lsr #16
+	orr	r5, r5, r12, lsl #16
+	mov	r12, r12, lsr #16
+	orr	r12, r12, lr, lsl #16
+	stmia	r0!, {r3-r5, r12}
+	subs	r2, r2, #0x10         
+	bge	Lmemcpy_fsrcul2loop16
+	ldmia	sp!, {r4, r5}
+	adds	r2, r2, #0x0c         
+	blt	Lmemcpy_fsrcul2l4
+
+Lmemcpy_fsrcul2loop4:
+	mov	r12, lr, lsr #16
+	ldr	lr, [r1], #4
+	orr	r12, r12, lr, lsl #16
+	str	r12, [r0], #4
+	subs	r2, r2, #4
+	bge	Lmemcpy_fsrcul2loop4
+
+Lmemcpy_fsrcul2l4:
+	sub	r1, r1, #2
+	b	Lmemcpy_fl4
+
+Lmemcpy_fsrcul3:
+	cmp	r2, #0x0c            
+	blt	Lmemcpy_fsrcul3loop4
+	sub	r2, r2, #0x0c         
+	stmdb	sp!, {r4, r5}
+
+Lmemcpy_fsrcul3loop16:
+	mov	r3, lr, lsr #24
+	ldmia	r1!, {r4, r5, r12, lr}
+	orr	r3, r3, r4, lsl #8
+	mov	r4, r4, lsr #24
+	orr	r4, r4, r5, lsl #8
+	mov	r5, r5, lsr #24
+	orr	r5, r5, r12, lsl #8
+	mov	r12, r12, lsr #24
+	orr	r12, r12, lr, lsl #8
+	stmia	r0!, {r3-r5, r12}
+	subs	r2, r2, #0x10         
+	bge	Lmemcpy_fsrcul3loop16
+	ldmia	sp!, {r4, r5}
+	adds	r2, r2, #0x0c         
+	blt	Lmemcpy_fsrcul3l4
+
+Lmemcpy_fsrcul3loop4:
+	mov	r12, lr, lsr #24
+	ldr	lr, [r1], #4
+	orr	r12, r12, lr, lsl #8
+	str	r12, [r0], #4
+	subs	r2, r2, #4
+	bge	Lmemcpy_fsrcul3loop4
+
+Lmemcpy_fsrcul3l4:
+	sub	r1, r1, #1
+	b	Lmemcpy_fl4
+
+Lmemcpy_backwards:
+	add	r1, r1, r2
+	add	r0, r0, r2
+	subs	r2, r2, #4
+	blt	Lmemcpy_bl4		/* less than 4 bytes */
+	ands	r12, r0, #3
+	bne	Lmemcpy_bdestul		/* oh unaligned destination addr */
+	ands	r12, r1, #3
+	bne	Lmemcpy_bsrcul		/* oh unaligned source addr */
+
+Lmemcpy_bt8:
+	/* We have aligned source and destination */
+	subs	r2, r2, #8
+	blt	Lmemcpy_bl12		/* less than 12 bytes (4 from above) */
+	stmdb	sp!, {r4}
+	subs	r2, r2, #0x14		/* less than 32 bytes (12 from above) */
+	blt	Lmemcpy_bl32
+
+	/* blat 32 bytes at a time */
+	/* XXX for really big copies perhaps we should use more registers */
+Lmemcpy_bloop32:
+	ldmdb	r1!, {r3, r4, r12, lr}
+	stmdb	r0!, {r3, r4, r12, lr}
+	ldmdb	r1!, {r3, r4, r12, lr}
+	stmdb	r0!, {r3, r4, r12, lr}
+	subs	r2, r2, #0x20         
+	bge	Lmemcpy_bloop32
+
+Lmemcpy_bl32:
+	cmn	r2, #0x10            
+	ldmgedb	r1!, {r3, r4, r12, lr}	/* blat a remaining 16 bytes */
+	stmgedb	r0!, {r3, r4, r12, lr}
+	subge	r2, r2, #0x10         
+	adds	r2, r2, #0x14         
+	ldmgedb	r1!, {r3, r12, lr}	/* blat a remaining 12 bytes */
+	stmgedb	r0!, {r3, r12, lr}
+	subge	r2, r2, #0x0c         
+	ldmia	sp!, {r4}
+
+Lmemcpy_bl12:
+	adds	r2, r2, #8
+	blt	Lmemcpy_bl4
+	subs	r2, r2, #4
+	ldrlt	r3, [r1, #-4]!
+	strlt	r3, [r0, #-4]!
+	ldmgedb	r1!, {r3, r12}
+	stmgedb	r0!, {r3, r12}
+	subge	r2, r2, #4
+
+Lmemcpy_bl4:
+	/* less than 4 bytes to go */
+	adds	r2, r2, #4
+#ifdef __APCS_26__
+	ldmeqia	sp!, {r0, pc}^
+#else
+	ldmeqia	sp!, {r0, pc}
+#endif
+
+	/* copy the crud byte at a time */
+	cmp	r2, #2
+	ldrb	r3, [r1, #-1]!
+	strb	r3, [r0, #-1]!
+	ldrgeb	r3, [r1, #-1]!
+	strgeb	r3, [r0, #-1]!
+	ldrgtb	r3, [r1, #-1]!
+	strgtb	r3, [r0, #-1]!
+#ifdef __APCS_26__
+	ldmia	sp!, {r0, pc}^
+#else
+	ldmia	sp!, {r0, pc}
+#endif
+
+	/* erg - unaligned destination */
+Lmemcpy_bdestul:
+	cmp	r12, #2
+
+	/* align destination with byte copies */
+	ldrb	r3, [r1, #-1]!
+	strb	r3, [r0, #-1]!
+	ldrgeb	r3, [r1, #-1]!
+	strgeb	r3, [r0, #-1]!
+	ldrgtb	r3, [r1, #-1]!
+	strgtb	r3, [r0, #-1]!
+	subs	r2, r2, r12
+	blt	Lmemcpy_bl4		/* less than 4 bytes to go */
+	ands	r12, r1, #3
+	beq	Lmemcpy_bt8		/* we have an aligned source */
+
+	/* erg - unaligned source */
+	/* This is where it gets nasty ... */
+Lmemcpy_bsrcul:
+	bic	r1, r1, #3
+	ldr	r3, [r1, #0]
+	cmp	r12, #2
+	blt	Lmemcpy_bsrcul1
+	beq	Lmemcpy_bsrcul2
+	cmp	r2, #0x0c            
+	blt	Lmemcpy_bsrcul3loop4
+	sub	r2, r2, #0x0c         
+	stmdb	sp!, {r4, r5}
+
+Lmemcpy_bsrcul3loop16:
+	mov	lr, r3, lsl #8
+	ldmdb	r1!, {r3-r5, r12}
+	orr	lr, lr, r12, lsr #24
+	mov	r12, r12, lsl #8
+	orr	r12, r12, r5, lsr #24
+	mov	r5, r5, lsl #8
+	orr	r5, r5, r4, lsr #24
+	mov	r4, r4, lsl #8
+	orr	r4, r4, r3, lsr #24
+	stmdb	r0!, {r4, r5, r12, lr}
+	subs	r2, r2, #0x10         
+	bge	Lmemcpy_bsrcul3loop16
+	ldmia	sp!, {r4, r5}
+	adds	r2, r2, #0x0c         
+	blt	Lmemcpy_bsrcul3l4
+
+Lmemcpy_bsrcul3loop4:
+	mov	r12, r3, lsl #8
+	ldr	r3, [r1, #-4]!
+	orr	r12, r12, r3, lsr #24
+	str	r12, [r0, #-4]!
+	subs	r2, r2, #4
+	bge	Lmemcpy_bsrcul3loop4
+
+Lmemcpy_bsrcul3l4:
+	add	r1, r1, #3
+	b	Lmemcpy_bl4
+
+Lmemcpy_bsrcul2:
+	cmp	r2, #0x0c            
+	blt	Lmemcpy_bsrcul2loop4
+	sub	r2, r2, #0x0c         
+	stmdb	sp!, {r4, r5}
+
+Lmemcpy_bsrcul2loop16:
+	mov	lr, r3, lsl #16
+	ldmdb	r1!, {r3-r5, r12}
+	orr	lr, lr, r12, lsr #16
+	mov	r12, r12, lsl #16
+	orr	r12, r12, r5, lsr #16
+	mov	r5, r5, lsl #16
+	orr	r5, r5, r4, lsr #16
+	mov	r4, r4, lsl #16
+	orr	r4, r4, r3, lsr #16
+	stmdb	r0!, {r4, r5, r12, lr}
+	subs	r2, r2, #0x10         
+	bge	Lmemcpy_bsrcul2loop16
+	ldmia	sp!, {r4, r5}
+	adds	r2, r2, #0x0c         
+	blt	Lmemcpy_bsrcul2l4
+
+Lmemcpy_bsrcul2loop4:
+	mov	r12, r3, lsl #16
+	ldr	r3, [r1, #-4]!
+	orr	r12, r12, r3, lsr #16
+	str	r12, [r0, #-4]!
+	subs	r2, r2, #4
+	bge	Lmemcpy_bsrcul2loop4
+
+Lmemcpy_bsrcul2l4:
+	add	r1, r1, #2
+	b	Lmemcpy_bl4
+
+Lmemcpy_bsrcul1:
+	cmp	r2, #0x0c            
+	blt	Lmemcpy_bsrcul1loop4
+	sub	r2, r2, #0x0c         
+	stmdb	sp!, {r4, r5}
+
+Lmemcpy_bsrcul1loop32:
+	mov	lr, r3, lsl #24
+	ldmdb	r1!, {r3-r5, r12}
+	orr	lr, lr, r12, lsr #8
+	mov	r12, r12, lsl #24
+	orr	r12, r12, r5, lsr #8
+	mov	r5, r5, lsl #24
+	orr	r5, r5, r4, lsr #8
+	mov	r4, r4, lsl #24
+	orr	r4, r4, r3, lsr #8
+	stmdb	r0!, {r4, r5, r12, lr}
+	subs	r2, r2, #0x10         
+	bge	Lmemcpy_bsrcul1loop32
+	ldmia	sp!, {r4, r5}
+	adds	r2, r2, #0x0c         
+	blt	Lmemcpy_bsrcul1l4
+
+Lmemcpy_bsrcul1loop4:
+	mov	r12, r3, lsl #24
+	ldr	r3, [r1, #-4]!
+	orr	r12, r12, r3, lsr #8
+	str	r12, [r0, #-4]!
+	subs	r2, r2, #4
+	bge	Lmemcpy_bsrcul1loop4
+
+Lmemcpy_bsrcul1l4:
+	add	r1, r1, #1
+	b	Lmemcpy_bl4
+	
diff --git a/sys/lib/libkern/arch/arm/memmove.S b/sys/lib/libkern/arch/arm/memmove.S
index 22fbd87d33fa..ceb29afe5111 100644
--- a/sys/lib/libkern/arch/arm/memmove.S
+++ b/sys/lib/libkern/arch/arm/memmove.S
@@ -1,48 +1,5 @@
-/*	$NetBSD: memmove.S,v 1.1 2000/12/29 20:51:57 bjh21 Exp $	*/
+/*	$NetBSD: memmove.S,v 1.2 2001/11/20 00:29:20 chris Exp $	*/
 
-/*-
- * Copyright (c) 1997 The NetBSD Foundation, Inc.
- * All rights reserved.
- *
- * This code is derived from software contributed to The NetBSD Foundation
- * by Neil A. Carson and Mark Brinicombe
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- *    must display the following acknowledgement:
- *	This product includes software developed by the NetBSD
- *	Foundation, Inc. and its contributors.
- * 4. Neither the name of The NetBSD Foundation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
- * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
- * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
- * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+/*
+ * placeholder to keep the make system happy, memove is actually in memcpy.S
  */
-
-#include <machine/asm.h>
-
-ENTRY(memmove)
-	stmfd	sp!, {r0, lr}
-	bl	_C_LABEL(_memcpy)
-#ifdef __APCS_26__
-	ldmfd	sp!, {r0, pc}^
-#else
-	ldmfd	sp!, {r0, pc}
-#endif