mips/cavium: Take advantage of Octeon's guaranteed r/rw ordering.
This commit is contained in:
parent
f03a8d394a
commit
4a6459a8d1
|
@ -1,4 +1,4 @@
|
|||
/* $NetBSD: membar_ops.S,v 1.12 2022/04/09 23:32:51 riastradh Exp $ */
|
||||
/* $NetBSD: membar_ops.S,v 1.13 2022/04/21 12:06:31 riastradh Exp $ */
|
||||
|
||||
/*-
|
||||
* Copyright (c) 2006, 2007 The NetBSD Foundation, Inc.
|
||||
|
@ -38,44 +38,80 @@ LEAF(_membar_sync)
|
|||
j ra
|
||||
BDSYNC
|
||||
END(_membar_sync)
|
||||
ATOMIC_OP_ALIAS(membar_sync,_membar_sync)
|
||||
|
||||
STRONG_ALIAS(_membar_enter,_membar_sync)
|
||||
ATOMIC_OP_ALIAS(membar_enter,_membar_sync)
|
||||
|
||||
#ifdef __OCTEON__
|
||||
LEAF(_membar_release)
|
||||
/*
|
||||
* syncw is documented as ordering store-before-store in
|
||||
|
||||
/*
|
||||
* cnMIPS guarantees load-before-load/store ordering without any
|
||||
* barriers. So the only barriers we need are store-before-load (sync)
|
||||
* and store-before-store (syncw, i.e., sync 4). See Table 2-32
|
||||
* `Execution Ordering Rules' on p. 104 of Cavium OCTEON III CN78XX
|
||||
* Hardware Reference Manual, CN78XX-HM-0.99E, September 2014:
|
||||
*
|
||||
* Cavium OCTEON III CN78XX Hardware Reference Manual,
|
||||
* CN78XX-HM-0.99E, September 2014.
|
||||
* First Operation DLD [load instruction to a physical
|
||||
* address that is L2/DRAM]
|
||||
* Second Operation Any
|
||||
* Execution Ordering Comments
|
||||
*
|
||||
* It's unclear from the documentation the architecture
|
||||
* guarantees load-before-store ordering without barriers, but
|
||||
* this code assumes it does. If that assumption is wrong, we
|
||||
* can only use syncw for membar_producer -- membar_release has
|
||||
* to use the full sync.
|
||||
* The second operation cannot appear to execute before
|
||||
* the first (DLD) operation, regardless of the presence
|
||||
* or absence of SYNC* instructions.
|
||||
*
|
||||
* Note: I'm not sure if this applies to earlier cnMIPS -- can't find
|
||||
* it in the Cavium Networks OCTEON Plus CN50XX Hardware Reference
|
||||
* Manual CN50XX-HM-0.99E, July 2008. Experimentally, on an erlite3
|
||||
* (Cavium Octeon CN5020-500), I can easily detect reordering of
|
||||
* store-before-store and store-before-load, but I haven't been able to
|
||||
* detect any reordering of load-before-load or load-before-store.
|
||||
*
|
||||
* Note: On early cnMIPS (CN3xxx), there is an erratum which sometimes
|
||||
* requires issuing two syncw's in a row. I don't know the details --
|
||||
* don't have documentation -- and in Linux it is only used for I/O
|
||||
* purposes.
|
||||
*
|
||||
* Currently we don't build kernels that work on both Octeon and
|
||||
* non-Octeon MIPS CPUs, so none of this is done with binary patching.
|
||||
* For userlands we could use a separate shared library on Octeon with
|
||||
* ld.so.conf to override the symbols with cheaper definitions, but we
|
||||
* don't do that now.
|
||||
*/
|
||||
|
||||
LEAF(_membar_acquire)
|
||||
j ra
|
||||
nop
|
||||
END(_membar_acquire)
|
||||
ATOMIC_OP_ALIAS(membar_acquire,_membar_acquire)
|
||||
|
||||
STRONG_ALIAS(_membar_consumer,_membar_acquire)
|
||||
ATOMIC_OP_ALIAS(membar_consumer,_membar_acquire)
|
||||
|
||||
LEAF(_membar_release)
|
||||
j ra
|
||||
syncw
|
||||
END(_membar_release)
|
||||
#endif
|
||||
|
||||
ATOMIC_OP_ALIAS(membar_sync,_membar_sync)
|
||||
ATOMIC_OP_ALIAS(membar_acquire,_membar_sync)
|
||||
STRONG_ALIAS(_membar_acquire,_membar_sync)
|
||||
ATOMIC_OP_ALIAS(membar_enter,_membar_sync)
|
||||
STRONG_ALIAS(_membar_enter,_membar_sync)
|
||||
#ifdef __OCTEON__
|
||||
ATOMIC_OP_ALIAS(membar_exit,_membar_release)
|
||||
STRONG_ALIAS(_membar_exit,_membar_release)
|
||||
ATOMIC_OP_ALIAS(membar_release,_membar_release)
|
||||
ATOMIC_OP_ALIAS(membar_producer,_membar_release)
|
||||
|
||||
STRONG_ALIAS(_membar_exit,_membar_release)
|
||||
ATOMIC_OP_ALIAS(membar_exit,_membar_release)
|
||||
|
||||
STRONG_ALIAS(_membar_producer,_membar_release)
|
||||
#else
|
||||
ATOMIC_OP_ALIAS(membar_exit,_membar_sync)
|
||||
STRONG_ALIAS(_membar_exit,_membar_sync)
|
||||
ATOMIC_OP_ALIAS(membar_release,_membar_sync)
|
||||
ATOMIC_OP_ALIAS(membar_producer,_membar_release)
|
||||
|
||||
#else /* !__OCTEON__ */
|
||||
|
||||
STRONG_ALIAS(_membar_acquire,_membar_sync)
|
||||
ATOMIC_OP_ALIAS(membar_acquire,_membar_sync)
|
||||
STRONG_ALIAS(_membar_release,_membar_sync)
|
||||
ATOMIC_OP_ALIAS(membar_producer,_membar_sync)
|
||||
STRONG_ALIAS(_membar_producer,_membar_sync)
|
||||
#endif
|
||||
ATOMIC_OP_ALIAS(membar_consumer,_membar_sync)
|
||||
ATOMIC_OP_ALIAS(membar_release,_membar_sync)
|
||||
STRONG_ALIAS(_membar_exit,_membar_sync)
|
||||
ATOMIC_OP_ALIAS(membar_exit,_membar_sync)
|
||||
STRONG_ALIAS(_membar_consumer,_membar_sync)
|
||||
ATOMIC_OP_ALIAS(membar_consumer,_membar_sync)
|
||||
STRONG_ALIAS(_membar_producer,_membar_sync)
|
||||
ATOMIC_OP_ALIAS(membar_producer,_membar_sync)
|
||||
|
||||
#endif
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* $NetBSD: asm.h,v 1.70 2022/04/09 14:09:32 riastradh Exp $ */
|
||||
/* $NetBSD: asm.h,v 1.71 2022/04/21 12:06:31 riastradh Exp $ */
|
||||
|
||||
/*
|
||||
* Copyright (c) 1992, 1993
|
||||
|
@ -574,12 +574,21 @@ _C_LABEL(x):
|
|||
|
||||
/* compiler define */
|
||||
#if defined(__OCTEON__)
|
||||
/* early cnMIPS have erratum which means 2 */
|
||||
#define LLSCSYNC sync 4; sync 4
|
||||
/*
|
||||
* See common/lib/libc/arch/mips/atomic/membar_ops.S for notes on
|
||||
* Octeon memory ordering guarantees and barriers.
|
||||
*
|
||||
* cnMIPS also has a quirk where the store buffer can get clogged and
|
||||
* we need to apply a plunger to it _after_ releasing a lock or else
|
||||
* other CPUs may spin for hundreds of thousands of cycles before they
|
||||
* see the lock is released. So we also have the quirky SYNC_PLUNGER
|
||||
* barrier as syncw.
|
||||
*/
|
||||
#define LLSCSYNC /* nothing */
|
||||
#define BDSYNC sync
|
||||
#define BDSYNC_ACQ sync
|
||||
#define SYNC_ACQ sync
|
||||
#define SYNC_REL sync
|
||||
#define BDSYNC_ACQ nop
|
||||
#define SYNC_ACQ /* nothing */
|
||||
#define SYNC_REL sync 4
|
||||
#define BDSYNC_PLUNGER sync 4
|
||||
#define SYNC_PLUNGER sync 4
|
||||
#elif __mips >= 3 || !defined(__mips_o32)
|
||||
|
|
Loading…
Reference in New Issue