mips/cavium: Take advantage of Octeon's guaranteed r/rw ordering.

This commit is contained in:
riastradh 2022-04-21 12:06:31 +00:00
parent f03a8d394a
commit 4a6459a8d1
2 changed files with 83 additions and 38 deletions

View File

@ -1,4 +1,4 @@
/* $NetBSD: membar_ops.S,v 1.12 2022/04/09 23:32:51 riastradh Exp $ */
/* $NetBSD: membar_ops.S,v 1.13 2022/04/21 12:06:31 riastradh Exp $ */
/*-
* Copyright (c) 2006, 2007 The NetBSD Foundation, Inc.
@ -38,44 +38,80 @@ LEAF(_membar_sync)
j ra
BDSYNC
END(_membar_sync)
ATOMIC_OP_ALIAS(membar_sync,_membar_sync)
STRONG_ALIAS(_membar_enter,_membar_sync)
ATOMIC_OP_ALIAS(membar_enter,_membar_sync)
#ifdef __OCTEON__
/*
* cnMIPS guarantees load-before-load/store ordering without any
* barriers. So the only barriers we need are store-before-load (sync)
* and store-before-store (syncw, i.e., sync 4). See Table 2-32
* `Execution Ordering Rules' on p. 104 of Cavium OCTEON III CN78XX
* Hardware Reference Manual, CN78XX-HM-0.99E, September 2014:
*
* First Operation DLD [load instruction to a physical
* address that is L2/DRAM]
* Second Operation Any
* Execution Ordering Comments
*
* The second operation cannot appear to execute before
* the first (DLD) operation, regardless of the presence
* or absence of SYNC* instructions.
*
* Note: I'm not sure if this applies to earlier cnMIPS -- can't find
* it in the Cavium Networks OCTEON Plus CN50XX Hardware Reference
* Manual CN50XX-HM-0.99E, July 2008. Experimentally, on an erlite3
* (Cavium Octeon CN5020-500), I can easily detect reordering of
* store-before-store and store-before-load, but I haven't been able to
* detect any reordering of load-before-load or load-before-store.
*
* Note: On early cnMIPS (CN3xxx), there is an erratum which sometimes
* requires issuing two syncw's in a row. I don't know the details --
* don't have documentation -- and in Linux it is only used for I/O
* purposes.
*
* Currently we don't build kernels that work on both Octeon and
* non-Octeon MIPS CPUs, so none of this is done with binary patching.
* For userlands we could use a separate shared library on Octeon with
* ld.so.conf to override the symbols with cheaper definitions, but we
* don't do that now.
*/
LEAF(_membar_acquire)
j ra
nop
END(_membar_acquire)
ATOMIC_OP_ALIAS(membar_acquire,_membar_acquire)
STRONG_ALIAS(_membar_consumer,_membar_acquire)
ATOMIC_OP_ALIAS(membar_consumer,_membar_acquire)
LEAF(_membar_release)
/*
* syncw is documented as ordering store-before-store in
*
* Cavium OCTEON III CN78XX Hardware Reference Manual,
* CN78XX-HM-0.99E, September 2014.
*
* It's unclear from the documentation the architecture
* guarantees load-before-store ordering without barriers, but
* this code assumes it does. If that assumption is wrong, we
* can only use syncw for membar_producer -- membar_release has
* to use the full sync.
*/
j ra
syncw
END(_membar_release)
#endif
ATOMIC_OP_ALIAS(membar_sync,_membar_sync)
ATOMIC_OP_ALIAS(membar_acquire,_membar_sync)
STRONG_ALIAS(_membar_acquire,_membar_sync)
ATOMIC_OP_ALIAS(membar_enter,_membar_sync)
STRONG_ALIAS(_membar_enter,_membar_sync)
#ifdef __OCTEON__
ATOMIC_OP_ALIAS(membar_exit,_membar_release)
STRONG_ALIAS(_membar_exit,_membar_release)
ATOMIC_OP_ALIAS(membar_release,_membar_release)
ATOMIC_OP_ALIAS(membar_producer,_membar_release)
STRONG_ALIAS(_membar_exit,_membar_release)
ATOMIC_OP_ALIAS(membar_exit,_membar_release)
STRONG_ALIAS(_membar_producer,_membar_release)
#else
ATOMIC_OP_ALIAS(membar_exit,_membar_sync)
STRONG_ALIAS(_membar_exit,_membar_sync)
ATOMIC_OP_ALIAS(membar_release,_membar_sync)
ATOMIC_OP_ALIAS(membar_producer,_membar_release)
#else /* !__OCTEON__ */
STRONG_ALIAS(_membar_acquire,_membar_sync)
ATOMIC_OP_ALIAS(membar_acquire,_membar_sync)
STRONG_ALIAS(_membar_release,_membar_sync)
ATOMIC_OP_ALIAS(membar_producer,_membar_sync)
STRONG_ALIAS(_membar_producer,_membar_sync)
#endif
ATOMIC_OP_ALIAS(membar_consumer,_membar_sync)
ATOMIC_OP_ALIAS(membar_release,_membar_sync)
STRONG_ALIAS(_membar_exit,_membar_sync)
ATOMIC_OP_ALIAS(membar_exit,_membar_sync)
STRONG_ALIAS(_membar_consumer,_membar_sync)
ATOMIC_OP_ALIAS(membar_consumer,_membar_sync)
STRONG_ALIAS(_membar_producer,_membar_sync)
ATOMIC_OP_ALIAS(membar_producer,_membar_sync)
#endif

View File

@ -1,4 +1,4 @@
/* $NetBSD: asm.h,v 1.70 2022/04/09 14:09:32 riastradh Exp $ */
/* $NetBSD: asm.h,v 1.71 2022/04/21 12:06:31 riastradh Exp $ */
/*
* Copyright (c) 1992, 1993
@ -574,12 +574,21 @@ _C_LABEL(x):
/* compiler define */
#if defined(__OCTEON__)
/* early cnMIPS have erratum which means 2 */
#define LLSCSYNC sync 4; sync 4
/*
* See common/lib/libc/arch/mips/atomic/membar_ops.S for notes on
* Octeon memory ordering guarantees and barriers.
*
* cnMIPS also has a quirk where the store buffer can get clogged and
* we need to apply a plunger to it _after_ releasing a lock or else
* other CPUs may spin for hundreds of thousands of cycles before they
* see the lock is released. So we also have the quirky SYNC_PLUNGER
* barrier as syncw.
*/
#define LLSCSYNC /* nothing */
#define BDSYNC sync
#define BDSYNC_ACQ sync
#define SYNC_ACQ sync
#define SYNC_REL sync
#define BDSYNC_ACQ nop
#define SYNC_ACQ /* nothing */
#define SYNC_REL sync 4
#define BDSYNC_PLUNGER sync 4
#define SYNC_PLUNGER sync 4
#elif __mips >= 3 || !defined(__mips_o32)