Add assembler versions of ffs, bzero and memset.

This commit is contained in:
mjl 2001-11-29 00:27:07 +00:00
parent 7f0157c400
commit 4ee3b0f097
3 changed files with 388 additions and 3 deletions

View File

@ -1,13 +1,15 @@
# $NetBSD: Makefile.inc,v 1.16 2000/11/01 19:37:19 thorpej Exp $
# $NetBSD: Makefile.inc,v 1.17 2001/11/29 00:27:07 mjl Exp $
SRCS+= __main.c __assert.c \
imax.c imin.c lmax.c lmin.c max.c min.c ulmax.c ulmin.c \
bswap16.c bswap32.c bswap64.c \
bcmp.c bzero.c ffs.c \
memchr.c memcmp.c memset.c \
bcmp.c \
memchr.c memcmp.c \
strcat.c strcmp.c strcpy.c strlen.c strcasecmp.c \
strncasecmp.c strncmp.c strncpy.c \
scanc.c skpc.c \
htonl.c htons.c ntohl.c ntohs.c \
random.c strtoul.c \
syncicache.c
SRCS+= ffs.S bzero.S

View File

@ -0,0 +1,334 @@
/* $NetBSD: bzero.S,v 1.1 2001/11/29 00:27:07 mjl Exp $ */
/*-
* Copyright (C) 2001 Martin J. Laubach <mjl@netbsd.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/*----------------------------------------------------------------------*/
#include <machine/asm.h>
#ifdef _KERNEL
#include <assym.h>
#endif
#define USE_STSWX 0 /* don't. slower than trivial copy loop */
/*----------------------------------------------------------------------*/
/*
void bzero(void *b r3, size_t len r4);
void * memset(void *b r3, int c r4, size_t len r5);
*/
/*----------------------------------------------------------------------*/
#define r_dst r3
#define r_len r4
#define r_val r0
.text
.align 4
ENTRY(bzero)
li r_val, 0 /* Value to stuff in */
b cb_memset
ENTRY(memset)
cmplwi cr1, r5, 0
mr. r0, r4
mr r8, r3
beqlr- cr1 /* Nothing to do */
rlwimi r0, r4, 8, 16, 23 /* word extend fill value */
rlwimi r0, r0, 16, 0, 15
mr r4, r5
bne- simple_fill /* =! 0, use trivial fill */
cb_memset:
/*----------------------------------------------------------------------*/
#ifndef _KERNEL
/* First find out cache line size */
#ifdef PIC
mflr r9
bl _GLOBAL_OFFSET_TABLE_@local-4
mflr r10
mtlr r9
lwz r5,cache_size@got(r10)
#else
lis r5,cache_size@h
ori r5,r5,cache_size@l
#endif
lwz r6, 0(r5)
cmpwi r6, -1
bne+ cb_cacheline_known
/*----------------------------------------------------------------------*/
#define CTL_MACHDEP 7
#define CPU_CACHELINE 1
#define STKFRAME_SZ 48
#define MIB 8
#define OLDPLEN 16
#define R3_SAVE 20
#define R4_SAVE 24
#define R0_SAVE 28
#define R8_SAVE 32
mflr r6
stw r6, 4(r1)
stwu r1, -STKFRAME_SZ(r1)
stw r8, R8_SAVE(r1)
stw r3, R3_SAVE(r1)
stw r4, R4_SAVE(r1)
stw r0, R0_SAVE(r1)
li r0, CTL_MACHDEP /* Construct MIB */
stw r0, MIB(r1)
li r0, CPU_CACHELINE
stw r0, MIB+4(r1)
li r0, 4 /* Oldlenp := 4 */
stw r0, OLDPLEN(r1)
addi r3, r1, MIB
li r4, 2 /* namelen */
/* r5 already contains &cache_size */
addi r6, r1, OLDPLEN
li r7, 0
li r8, 0
bl PIC_PLT(_C_LABEL(sysctl))
lwz r8, R8_SAVE(r1)
lwz r3, R3_SAVE(r1)
lwz r4, R4_SAVE(r1)
lwz r0, R0_SAVE(r1)
#ifdef PIC
bl _GLOBAL_OFFSET_TABLE_@local-4
mflr r10
lwz r9, cache_size@got(r10)
lwz r9, 0(r9)
#else
lis r5, cache_size@ha
lwz r9, cache_size@l(r5)
#endif
la r1, STKFRAME_SZ(r1)
lwz r5, 4(r1)
mtlr r5
cntlzw r6, r9 /* compute shift value */
li r5, 31
subf r5, r6, r5
#ifdef PIC
lwz r6, cache_sh@got(r10)
stw r5, 0(r6)
#else
lis r6, cache_sh@ha
stw r5, cache_sh@l(r6)
#endif
/*----------------------------------------------------------------------*/
/* Okay, we know the cache line size (r9) and shift value (r10) */
cb_cacheline_known:
#ifdef PIC
lwz r5, cache_size@got(r10)
lwz r9, 0(r5)
lwz r5, cache_sh@got(r10)
lwz r10, 0(r5)
#else
lis r9, cache_size@ha
lwz r9, cache_size@l(r9)
lis r10, cache_sh@ha
lwz r10, cache_sh@l(r10)
#endif
#else /* _KERNEL */
li r9, CACHELINESIZE
#if CACHELINESIZE == 32
#define CACHELINESHIFT 5
#else
#error Define CACHELINESHIFT for your CACHELINESIZE
#endif
li r10, CACHELINESHIFT
#endif /* _KERNEL */
/* Back in memory filling business */
cmplwi cr1, r_len, 0 /* Nothing to do? */
add r5, r9, r9
cmplw r_len, r5 /* <= 2*CL bytes to move? */
beqlr- cr1 /* then do nothing */
blt+ simple_fill /* a trivial fill routine */
/* Word align the block, fill bytewise until dst even*/
andi. r5, r_dst, 0x03
li r6, 4
beq+ cb_aligned_w /* already aligned to word? */
subf r5, r5, r6 /* bytes to fill to align4 */
#if USE_STSWX
mtxer r5
stswx r0, 0, r_dst
add r_dst, r5, r_dst
#else
mtctr r5
subi r_dst, r_dst, 1
1: stbu r_val, 1(r_dst) /* Fill bytewise */
bdnz 1b
addi r_dst, r_dst, 1
#endif
subf r_len, r5, r_len
cb_aligned_w: /* Cache block align, fill wordwise until dst aligned */
/* I know I have something to do since we had > 2*CL initially */
/* so no need to check for r_len = 0 */
rlwinm. r5, r_dst, 30, 29, 31
srwi r6, r9, 2
beq cb_aligned_cb /* already on CL boundary? */
subf r5, r5, r6 /* words to fill to alignment */
mtctr r5
slwi r5, r5, 2
subf r_len, r5, r_len
subi r_dst, r_dst, 4
1: stwu r_val, 4(r_dst) /* Fill wordwise */
bdnz 1b
addi r_dst, r_dst, 4
cb_aligned_cb: /* no need to check r_len, see above */
srw. r5, r_len, r10 /* Number of cache blocks */
mtctr r5
beq cblocks_done
slw r5, r5, r10
subf r_len, r5, r_len
1: dcbz 0, r_dst /* Clear blockwise */
add r_dst, r_dst, r9
bdnz 1b
cblocks_done: /* still CL aligned, but less than CL bytes left */
cmplwi cr1, r_len, 0
cmplwi r_len, 8
beq- cr1, sf_return
blt- sf_bytewise /* <8 remaining? */
b sf_aligned_w
/*----------------------------------------------------------------------*/
wbzero: li r_val, 0
cmplwi r_len, 0
beqlr- /* Nothing to do */
simple_fill:
#if USE_STSWX
cmplwi cr1, r_len, 12 /* < 12 bytes to move? */
#else
cmplwi cr1, r_len, 8 /* < 8 bytes to move? */
#endif
andi. r5, r_dst, 0x03 /* bytes to fill to align4 */
blt cr1, sf_bytewise /* trivial byte mover */
li r6, 4
subf r5, r5, r6
beq+ sf_aligned_w /* dest is word aligned */
#if USE_STSWX
mtxer r5
stswx r0, 0, r_dst
add r_dst, r5, r_dst
#else
mtctr r5 /* nope, then fill bytewise */
subi r_dst, r_dst, 1 /* until it is */
1: stbu r_val, 1(r_dst)
bdnz 1b
addi r_dst, r_dst, 1
#endif
subf r_len, r5, r_len
sf_aligned_w: /* no need to check r_len since it were >= 8 bytes initially */
#if USE_STSWX
mr r6, r0
mr r7, r0
srwi r5, r_len, 3
mtctr r5
slwi r5, r5, 3 /* adjust len */
subf. r_len, r5, r_len
1: stswi r6, r_dst, 8
addi r_dst, r_dst, 8
bdnz 1b
#else
srwi r5, r_len, 2 /* words to fill */
mtctr r5
slwi r5, r5, 2
subf. r_len, r5, r_len /* adjust len for fill */
subi r_dst, r_dst, 4
1: stwu r_val, 4(r_dst)
bdnz 1b
addi r_dst, r_dst, 4
#endif
sf_word_done: bne- sf_bytewise
sf_return: mr r3, r8 /* restore orig ptr */
blr /* for memset functionality */
sf_bytewise:
#if USE_STSWX
mr r5, r0
mr r6, r0
mr r7, r0
mtxer r_len
stswx r5, 0, r_dst
#else
mtctr r_len
subi r_dst, r_dst, 1
1: stbu r_val, 1(r_dst)
bdnz 1b
#endif
mr r3, r8 /* restore orig ptr */
blr /* for memset functionality */
/*----------------------------------------------------------------------*/
.data
cache_size: .long -1
cache_sh: .long 0
/*----------------------------------------------------------------------*/

View File

@ -0,0 +1,49 @@
/* $NetBSD: ffs.S,v 1.1 2001/11/29 00:27:07 mjl Exp $ */
/*-
* Copyright (c) 2001 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by Martin J. Laubach
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the NetBSD
* Foundation, Inc. and its contributors.
* 4. Neither the name of The NetBSD Foundation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include <machine/asm.h>
.align 4
ENTRY(ffs)
neg r4,r3
and r3,r4,r3
cntlzw r3,r3
li r0,32
subf r3,r3,r0
blr