Fixed to not use the "br" instruction. Branch Target Identification (BTI) doesn't like "br".

requested by maxv@
This commit is contained in:
ryo 2020-04-11 05:12:52 +00:00
parent 407ffe11fc
commit adc5085fcd
2 changed files with 110 additions and 54 deletions

View File

@ -1,4 +1,4 @@
/* $NetBSD: bcopy.S,v 1.1 2018/02/04 21:52:16 skrll Exp $ */
/* $NetBSD: bcopy.S,v 1.2 2020/04/11 05:12:52 ryo Exp $ */
/*
* Copyright (c) 2018 Ryo Shimizu <ryo@nerv.org>
@ -29,7 +29,7 @@
#include <machine/asm.h>
#if defined(LIBC_SCCS)
RCSID("$NetBSD: bcopy.S,v 1.1 2018/02/04 21:52:16 skrll Exp $")
RCSID("$NetBSD: bcopy.S,v 1.2 2020/04/11 05:12:52 ryo Exp $")
#endif
#if defined(MEMCOPY)
@ -207,32 +207,60 @@ copy_backward:
#endif /* (STP_ALIGN > 8) */
9:
backward_copy1k:
/* while (len >= 1024) */
/* { src -= 1024; dst -= 1024; copy1024(dst, src); len -= 1024; } */
cmp LEN, #1024
bhs backward_copy1k
backward_less1k:
/* copy 16*n bytes */
and TMP_D, LEN, #(1023-15) /* len &= 1023; len &= ~15; */
adr TMP_X, 8f
sub LEN, LEN, TMP_D
sub TMP_X, TMP_X, TMP_D, lsr #1 /* jump to (8f - len/2) */
br TMP_X
backward_copy1k: /* copy 16*64 bytes */
blo 9f
1:
sub LEN, LEN, #1024
.rept (1024 / 16)
ldp DATA0, DATA1, [SRC0, #-16]! /* *--dst = *--src; */
stp DATA0, DATA1, [DST, #-16]!
.endr
8:
cbz LEN, done
cmp LEN, #1024
bhs backward_copy1k
cmp LEN, #16
bhs backward_less1k
bhs 1b
9:
/* if (len & 512) { src -= 512; dst -= 512; copy512(dst, src); } */
tbz LEN, #9, 1f
.rept (512 / 16)
ldp DATA0, DATA1, [SRC0, #-16]!
stp DATA0, DATA1, [DST, #-16]!
.endr
1:
/* if (len & 256) { src -= 256; dst -= 256; copy256(dst, src); } */
tbz LEN, #8, 1f
.rept (256 / 16)
ldp DATA0, DATA1, [SRC0, #-16]!
stp DATA0, DATA1, [DST, #-16]!
.endr
1:
/* if (len & 128) { src -= 128; dst -= 128; copy128(dst, src); } */
tbz LEN, #7, 1f
.rept (128 / 16)
ldp DATA0, DATA1, [SRC0, #-16]!
stp DATA0, DATA1, [DST, #-16]!
.endr
1:
/* if (len & 64) { src -= 64; dst -= 64; copy64(dst, src); } */
tbz LEN, #6, 1f
.rept (64 / 16)
ldp DATA0, DATA1, [SRC0, #-16]!
stp DATA0, DATA1, [DST, #-16]!
.endr
1:
/* if (len & 32) { src -= 32; dst -= 32; copy32(dst, src); } */
tbz LEN, #5, 1f
.rept (32 / 16)
ldp DATA0, DATA1, [SRC0, #-16]!
stp DATA0, DATA1, [DST, #-16]!
.endr
1:
/* if (len & 16) { *--(uint128_t *)dst = *--(uint128_t *)src; } */
tbz LEN, #4, 1f
ldp DATA0, DATA1, [SRC0, #-16]!
ldp DATA0, DATA1, [DST, #-16]!
stp DATA0, DATA1, [DST, #-16]!
1:
/* if (len & 8) { *--(uint64_t *)dst = *--(uint64_t *)src; } */
tbz LEN, #3, 1f
@ -271,14 +299,10 @@ backward_copy:
bcs 9f
backward_tiny:
/* copy 1-10 bytes */
adr TMP_X, 8f
sub TMP_X, TMP_X, LEN, lsl #3 /* jump to (8f - len*2) */
br TMP_X
.rept 10
1: sub LEN, LEN, #1
ldrb TMP_Xw, [SRC0, #-1]!
strb TMP_Xw, [DST, #-1]!
.endr
8:
cbz LEN, 1b
ret
9:
/* length is small(<32), and src or dst may be unaligned */
@ -548,14 +572,10 @@ ENTRY(FUNCTION)
bcs 9f
forward_tiny:
/* copy 1-10 bytes */
adr TMP_X, 8f
sub TMP_X, TMP_X, LEN, lsl #3 /* jump to (8f - len*2) */
br TMP_X
.rept 10
1: sub LEN, LEN, #1
ldrb TMP_Xw, [SRC0], #1
strb TMP_Xw, [DST], #1
.endr
8:
cbz LEN, 1b
ret
9:
/* length is small(<32), and src or dst may be unaligned */
@ -938,28 +958,56 @@ copy_forward:
#endif /* (STP_ALIGN > 8) */
9:
forward_copy1k:
/* while (len >= 1024) */
/* { copy1024(dst, src); src += 1024; dst += 1024; len -= 1024; } */
cmp LEN, #1024
bhs forward_copy1k
forward_less1k:
/* copy 16*n bytes */
and TMP_D, LEN, #(1023-15) /* len &= 1023; len &= ~15; */
adr TMP_X, 8f
sub LEN, LEN, TMP_D
sub TMP_X, TMP_X, TMP_D, lsr #1 /* jump to (8f - len/2) */
br TMP_X
forward_copy1k: /* copy 16*64 bytes */
blo 9f
1:
sub LEN, LEN, #1024
.rept (1024 / 16)
ldp DATA0, DATA1, [SRC0], #16 /* *dst++ = *src++; */
stp DATA0, DATA1, [DST], #16
.endr
8:
cbz LEN, done
cmp LEN, #1024
bhs forward_copy1k
cmp LEN, #16
bhs forward_less1k
bhs 1b
9:
/* if (len & 512) { copy512(dst, src); src += 512; dst += 512; */
tbz LEN, #9, 1f
.rept (512 / 16)
ldp DATA0, DATA1, [SRC0], #16
stp DATA0, DATA1, [DST], #16
.endr
1:
/* if (len & 256) { copy256(dst, src); src += 256; dst += 256; */
tbz LEN, #8, 1f
.rept (256 / 16)
ldp DATA0, DATA1, [SRC0], #16
stp DATA0, DATA1, [DST], #16
.endr
1:
/* if (len & 128) { copy128(dst, src); src += 128; dst += 128; */
tbz LEN, #7, 1f
.rept (128 / 16)
ldp DATA0, DATA1, [SRC0], #16
stp DATA0, DATA1, [DST], #16
.endr
1:
/* if (len & 64) { copy64(dst, src); src += 64; dst += 64; */
tbz LEN, #6, 1f
.rept (64 / 16)
ldp DATA0, DATA1, [SRC0], #16
stp DATA0, DATA1, [DST], #16
.endr
1:
/* if (len & 32) { copy32(dst, src); src += 32; dst += 32; */
tbz LEN, #5, 1f
.rept (32 / 16)
ldp DATA0, DATA1, [SRC0], #16
stp DATA0, DATA1, [DST], #16
.endr
1:
/* if (len & 16) { *(uint128_t *)dst++ = *(uint128_t *)src++; } */
tbz LEN, #4, 1f
ldp DATA0, DATA1, [SRC0], #16

View File

@ -1,4 +1,4 @@
/* $NetBSD: memset.S,v 1.2 2017/08/29 15:00:23 ryo Exp $ */
/* $NetBSD: memset.S,v 1.3 2020/04/11 05:12:52 ryo Exp $ */
/*-
* Copyright (c) 2014 The NetBSD Foundation, Inc.
@ -158,18 +158,26 @@ ENTRY(memset)
tbz x15, #3, .Lzero_qword_aligned
str xzr, [x15], #8
.Lzero_qword_aligned:
cbz x7, .Lblock_aligned /* less than 16 bytes? just branch */
adr x6, .Lunrolled_end
sub x6, x6, x7, lsl #2 /* backup to write the last N insn */
br x6 /* and do it */
cbz x7, .Lblock_aligned /* aligned? just branch */
/*
* The maximum size of DCZID_EL0:BS supported is 2048 bytes.
*/
.rept (2048 / 16) - 1
/* align to DCZID_EL0:BS boundary */
tbz x7, #0, 0f /* fill 16byte? */
stp xzr, xzr, [x15], #16
.endr
.Lunrolled_end:
0:
tbz x7, #1, 1f /* fill 32byte? */
stp xzr, xzr, [x15], #16
stp xzr, xzr, [x15], #16
1:
lsr x7, x7, #2
cbz x7, 9f
.L64bytes_fill:
sub x7, x7, #1
stp xzr, xzr, [x15], #16
stp xzr, xzr, [x15], #16
stp xzr, xzr, [x15], #16
stp xzr, xzr, [x15], #16
cbnz x7, .L64bytes_fill
9:
/*
* Now we are block aligned.