809 lines
19 KiB
Groff
809 lines
19 KiB
Groff
.\" $NetBSD: atomic_loadstore.9,v 1.7 2022/04/09 23:32:53 riastradh Exp $
|
|
.\"
|
|
.\" Copyright (c) 2019 The NetBSD Foundation
|
|
.\" All rights reserved.
|
|
.\"
|
|
.\" This code is derived from software contributed to The NetBSD Foundation
|
|
.\" by Taylor R. Campbell.
|
|
.\"
|
|
.\" Redistribution and use in source and binary forms, with or without
|
|
.\" modification, are permitted provided that the following conditions
|
|
.\" are met:
|
|
.\" 1. Redistributions of source code must retain the above copyright
|
|
.\" notice, this list of conditions and the following disclaimer.
|
|
.\" 2. Redistributions in binary form must reproduce the above copyright
|
|
.\" notice, this list of conditions and the following disclaimer in the
|
|
.\" documentation and/or other materials provided with the distribution.
|
|
.\"
|
|
.\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
|
|
.\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
|
.\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
.\" PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
|
|
.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
.\" POSSIBILITY OF SUCH DAMAGE.
|
|
.\"
|
|
.Dd February 11, 2022
|
|
.Dt ATOMIC_LOADSTORE 9
|
|
.Os
|
|
.Sh NAME
|
|
.Nm atomic_load_relaxed ,
|
|
.Nm atomic_load_acquire ,
|
|
.Nm atomic_load_consume ,
|
|
.Nm atomic_store_relaxed ,
|
|
.Nm atomic_store_release
|
|
.Nd atomic and ordered memory operations
|
|
.Sh SYNOPSIS
|
|
.In sys/atomic.h
|
|
.Ft T
|
|
.Fn atomic_load_relaxed "const volatile T *p"
|
|
.Ft T
|
|
.Fn atomic_load_acquire "const volatile T *p"
|
|
.Ft T
|
|
.Fn atomic_load_consume "const volatile T *p"
|
|
.Ft void
|
|
.Fn atomic_store_relaxed "volatile T *p" "T v"
|
|
.Ft void
|
|
.Fn atomic_store_release "volatile T *p" "T v"
|
|
.Sh DESCRIPTION
|
|
These type-generic macros implement memory operations that are
|
|
.Em atomic
|
|
and that have
|
|
.Em memory ordering constraints .
|
|
Aside from atomicity and ordering, the load operations are equivalent
|
|
to
|
|
.Li * Ns Fa p
|
|
and the store operations are equivalent to
|
|
.Li * Ns Fa p Li "=" Fa v .
|
|
The pointer
|
|
.Fa p
|
|
must be aligned, even on architectures like x86 which generally lack
|
|
strict alignment requirements; see
|
|
.Sx SIZE AND ALIGNMENT
|
|
for details.
|
|
.Pp
|
|
.Em Atomic
|
|
means that the memory operations cannot be
|
|
.Em fused
|
|
or
|
|
.Em torn :
|
|
.Bl -bullet
|
|
.It
|
|
.Em Fusing
|
|
is combining multiple memory operations on a single object into one
|
|
memory operation, such as replacing
|
|
.Bd -literal -compact
|
|
*p = v;
|
|
x = *p;
|
|
.Ed
|
|
by
|
|
.Bd -literal -compact
|
|
*p = v;
|
|
x = v;
|
|
.Ed
|
|
since the compiler can prove that
|
|
.Li \&*p
|
|
will yield
|
|
.Li v
|
|
after
|
|
.Li \&*p\ =\ v .
|
|
For
|
|
.Em atomic
|
|
memory operations, the implementation
|
|
.Em will not
|
|
assume that
|
|
.Bl -dash -compact
|
|
.It
|
|
consecutive loads of the same object will return the same value, or
|
|
.It
|
|
a store followed by a load of the same object will return the value
|
|
stored, or
|
|
.It
|
|
consecutive stores of the same object are redundant.
|
|
.El
|
|
Thus, the implementation will not replace two consecutive atomic loads
|
|
by one, will not elide an atomic load following a store, and will not
|
|
combine two consecutive atomic stores into one.
|
|
.Pp
|
|
For example,
|
|
.Bd -literal
|
|
atomic_store_relaxed(&flag, 1);
|
|
while (atomic_load_relaxed(&flag))
|
|
continue;
|
|
.Ed
|
|
.Pp
|
|
may be used to set a flag and then busy-wait until another thread
|
|
clears it, whereas
|
|
.Bd -literal
|
|
flag = 1;
|
|
while (flag)
|
|
continue;
|
|
.Ed
|
|
.Pp
|
|
may be transformed into the infinite loop
|
|
.Bd -literal
|
|
flag = 1;
|
|
while (1)
|
|
continue;
|
|
.Ed
|
|
.It
|
|
.Em Tearing
|
|
is implementing a memory operation on a large data unit such as a
|
|
32-bit word by issuing multiple memory operations on smaller data units
|
|
such as 8-bit bytes.
|
|
The implementation will not tear
|
|
.Em atomic
|
|
loads or stores into smaller ones.
|
|
Thus, as far as any interrupt, other thread, or other CPU can tell, an
|
|
atomic memory operation is issued either all at once or not at all.
|
|
.Pp
|
|
For example, if a 32-bit word
|
|
.Va w
|
|
is written with
|
|
.Pp
|
|
.Dl atomic_store_relaxed(&w,\ 0x00010002);
|
|
.Pp
|
|
then an interrupt, other thread, or other CPU reading it with
|
|
.Li atomic_load_relaxed(&w)
|
|
will never witness it partially written, whereas
|
|
.Pp
|
|
.Dl w\ =\ 0x00010002;
|
|
.Pp
|
|
might be compiled into a pair of separate 16-bit store instructions
|
|
instead of one single word-sized store instruction, in which case other
|
|
threads may see the intermediate state with only one of the halves
|
|
written.
|
|
.El
|
|
.Pp
|
|
Atomic operations on any single object occur in a total order shared by
|
|
all interrupts, threads, and CPUs, which is consistent with the program
|
|
order in every interrupt, thread, and CPU.
|
|
A single program without interruption or other threads or CPUs will
|
|
always observe its own loads and stores in program order, but another
|
|
program in an interrupt handler, in another thread, or on another CPU
|
|
may issue loads that return values as if the first program's stores
|
|
occurred out of program order, and vice versa.
|
|
Two different threads might each observe a third thread's memory
|
|
operations in different orders.
|
|
.Pp
|
|
The
|
|
.Em memory ordering constraints
|
|
make limited guarantees of ordering relative to memory operations on
|
|
.Em other
|
|
objects as witnessed by interrupts, other threads, or other CPUs, and
|
|
have the following meanings:
|
|
.Bl -tag -width relaxed
|
|
.It relaxed
|
|
No ordering relative to memory operations on any other objects is
|
|
guaranteed.
|
|
Relaxed ordering is the default for ordinary non-atomic memory
|
|
operations like
|
|
.Li "*p"
|
|
and
|
|
.Li "*p = v" .
|
|
.Pp
|
|
Atomic operations with relaxed ordering are cheap: they are not
|
|
read/modify/write atomic operations, and they do not involve any kind
|
|
of inter-CPU ordering barriers.
|
|
.It acquire
|
|
This memory operation happens before all subsequent memory operations
|
|
in program order.
|
|
However, prior memory operations in program order may be reordered to
|
|
happen after this one.
|
|
For example, assuming no aliasing between the pointers, the
|
|
implementation is allowed to treat
|
|
.Bd -literal
|
|
int x = *p;
|
|
if (atomic_load_acquire(q)) {
|
|
int y = *r;
|
|
*s = x + y;
|
|
return 1;
|
|
}
|
|
.Ed
|
|
.Pp
|
|
as if it were
|
|
.Bd -literal
|
|
if (atomic_load_acquire(q)) {
|
|
int x = *p;
|
|
int y = *r;
|
|
*s = x + y;
|
|
return 1;
|
|
}
|
|
.Ed
|
|
.Pp
|
|
but
|
|
.Em not
|
|
as if it were
|
|
.Bd -literal
|
|
int x = *p;
|
|
int y = *r;
|
|
*s = x + y;
|
|
if (atomic_load_acquire(q)) {
|
|
return 1;
|
|
}
|
|
.Ed
|
|
.It consume
|
|
This memory operation happens before all memory operations on objects
|
|
at addresses that are computed from the value returned by this one.
|
|
Otherwise, no ordering relative to memory operations on other objects
|
|
is implied.
|
|
.Pp
|
|
For example, the implementation is allowed to treat
|
|
.Bd -literal
|
|
struct foo *foo0, *foo1;
|
|
|
|
struct foo *f0 = atomic_load_consume(&foo0);
|
|
struct foo *f1 = atomic_load_consume(&foo1);
|
|
int x = f0->x;
|
|
int y = f1->y;
|
|
.Ed
|
|
.Pp
|
|
as if it were
|
|
.Bd -literal
|
|
struct foo *foo0, *foo1;
|
|
|
|
struct foo *f1 = atomic_load_consume(&foo1);
|
|
struct foo *f0 = atomic_load_consume(&foo0);
|
|
int y = f1->y;
|
|
int x = f0->x;
|
|
.Ed
|
|
.Pp
|
|
but loading
|
|
.Li f0->x
|
|
is guaranteed to happen after loading
|
|
.Li foo0
|
|
even if the CPU had a cached value for the address that
|
|
.Li f0->x
|
|
happened to be at, and likewise for
|
|
.Li f1->y
|
|
and
|
|
.Li foo1 .
|
|
.Pp
|
|
.Fn atomic_load_consume
|
|
functions like
|
|
.Fn atomic_load_acquire
|
|
as long as the memory operations that must happen after it are limited
|
|
to addresses that depend on the value returned by it, but it is almost
|
|
always as cheap as
|
|
.Fn atomic_load_relaxed .
|
|
See
|
|
.Sx ACQUIRE OR CONSUME?
|
|
below for more details.
|
|
.It release
|
|
All prior memory operations in program order happen before this one.
|
|
However, subsequent memory operations in program order may be reordered
|
|
to happen before this one too.
|
|
For example, assuming no aliasing between the pointers, the
|
|
implementation is allowed to treat
|
|
.Bd -literal
|
|
int x = *p;
|
|
*q = x;
|
|
atomic_store_release(r, 0);
|
|
int y = *s;
|
|
return x + y;
|
|
.Ed
|
|
.Pp
|
|
as if it were
|
|
.Bd -literal
|
|
int y = *s;
|
|
int x = *p;
|
|
*q = x;
|
|
atomic_store_release(r, 0);
|
|
return x + y;
|
|
.Ed
|
|
.Pp
|
|
but
|
|
.Em not
|
|
as if it were
|
|
.Bd -literal
|
|
atomic_store_release(r, 0);
|
|
int x = *p;
|
|
int y = *s;
|
|
*q = x;
|
|
return x + y;
|
|
.Ed
|
|
.El
|
|
.Ss PAIRING ORDERED MEMORY OPERATIONS
|
|
In general, each
|
|
.Fn atomic_store_release
|
|
.Em must
|
|
be paired with either
|
|
.Fn atomic_load_acquire
|
|
or
|
|
.Fn atomic_load_consume
|
|
in order to have an effect \(em it is only when a release operation
|
|
synchronizes with an acquire or consume operation that any ordering
|
|
guaranteed between memory operations
|
|
.Em before
|
|
the release operation and memory operations
|
|
.Em after
|
|
the acquire/consume operation.
|
|
.Pp
|
|
For example, to set up an entry in a table and then mark the entry
|
|
ready, you should:
|
|
.Bl -enum
|
|
.It
|
|
Perform memory operations to initialize the data.
|
|
.Bd -literal
|
|
tab[i].x = ...;
|
|
tab[i].y = ...;
|
|
.Ed
|
|
.It
|
|
Issue
|
|
.Fn atomic_store_release
|
|
to mark it ready.
|
|
.Bd -literal
|
|
atomic_store_release(&tab[i].ready, 1);
|
|
.Ed
|
|
.It
|
|
Possibly in another thread, issue
|
|
.Fn atomic_load_acquire
|
|
to ascertain whether it is ready.
|
|
.Bd -literal
|
|
if (atomic_load_acquire(&tab[i].ready) == 0)
|
|
return EWOULDBLOCK;
|
|
.Ed
|
|
.It
|
|
Perform memory operations to use the data.
|
|
.Bd -literal
|
|
do_stuff(tab[i].x, tab[i].y);
|
|
.Ed
|
|
.El
|
|
.Pp
|
|
Similarly, if you want to create an object, initialize it, and then
|
|
publish it to be used by another thread, then you should:
|
|
.Bl -enum
|
|
.It
|
|
Perform memory operations to initialize the object.
|
|
.Bd -literal
|
|
struct mumble *m = kmem_alloc(sizeof(*m), KM_SLEEP);
|
|
m->x = x;
|
|
m->y = y;
|
|
m->z = m->x + m->y;
|
|
.Ed
|
|
.It
|
|
Issue
|
|
.Fn atomic_store_release
|
|
to publish it.
|
|
.Bd -literal
|
|
atomic_store_release(&the_mumble, m);
|
|
.Ed
|
|
.It
|
|
Possibly in another thread, issue
|
|
.Fn atomic_load_consume
|
|
to get it.
|
|
.Bd -literal
|
|
struct mumble *m = atomic_load_consume(&the_mumble);
|
|
.Ed
|
|
.It
|
|
Perform memory operations to use the object's members.
|
|
.Bd -literal
|
|
m->y &= m->x;
|
|
do_things(m->x, m->y, m->z);
|
|
.Ed
|
|
.El
|
|
.Pp
|
|
In both examples, assuming that the value written by
|
|
.Fn atomic_store_release
|
|
in step\~2
|
|
is read by
|
|
.Fn atomic_load_acquire
|
|
or
|
|
.Fn atomic_load_consume
|
|
in step\~3, this guarantees that all of the memory operations in
|
|
step\~1 complete before any of the memory operations in step\~4 \(em
|
|
even if they happen on different CPUs.
|
|
.Pp
|
|
Without
|
|
.Em both
|
|
the release operation in step\~2
|
|
.Em and
|
|
the acquire or consume operation in step\~3, no ordering is guaranteed
|
|
between the memory operations in steps\~1 and\~4.
|
|
In fact, without
|
|
.Em both
|
|
release and acquire/consume, even the assignment
|
|
.Li m->z\ =\ m->x\ +\ m->y
|
|
in step\~1 might read values of
|
|
.Li m->x
|
|
and
|
|
.Li m->y
|
|
that were written in step\~4.
|
|
.Ss ACQUIRE OR CONSUME?
|
|
You must use
|
|
.Fn atomic_load_acquire
|
|
when subsequent memory operations in program order that must happen
|
|
after the load are on objects at
|
|
.Em addresses that might not depend arithmetically on the resulting value .
|
|
This applies particularly when the choice of whether to do the
|
|
subsequent memory operation depends on a
|
|
.Em control-flow decision based on the resulting value :
|
|
.Bd -literal
|
|
struct gadget {
|
|
int ready, x;
|
|
} the_gadget;
|
|
|
|
/* Producer */
|
|
the_gadget.x = 42;
|
|
atomic_store_release(&the_gadget.ready, 1);
|
|
|
|
/* Consumer */
|
|
if (atomic_load_acquire(&the_gadget.ready) == 0)
|
|
return EWOULDBLOCK;
|
|
int x = the_gadget.x;
|
|
.Ed
|
|
.Pp
|
|
Here the
|
|
.Em decision of whether to load
|
|
.Li the_gadget.x
|
|
depends on a control-flow decision depending on value loaded from
|
|
.Li the_gadget.ready ,
|
|
and loading
|
|
.Li the_gadget.x
|
|
must happen after loading
|
|
.Li the_gadget.ready .
|
|
Using
|
|
.Fn atomic_load_acquire
|
|
guarantees that the compiler and CPU do not conspire to load
|
|
.Li the_gadget.x
|
|
before we have ascertained that it is ready.
|
|
.Pp
|
|
You may use
|
|
.Fn atomic_load_consume
|
|
if all subsequent memory operations in program order that must happen
|
|
after the load are performed on objects at
|
|
.Em addresses computed arithmetically from the resulting value ,
|
|
such as loading a pointer to a structure object and then dereferencing
|
|
it:
|
|
.Bd -literal
|
|
struct gizmo {
|
|
int x, y, z;
|
|
};
|
|
struct gizmo null_gizmo;
|
|
struct gizmo *the_gizmo = &null_gizmo;
|
|
|
|
/* Producer */
|
|
struct gizmo *g = kmem_alloc(sizeof(*g), KM_SLEEP);
|
|
g->x = 12;
|
|
g->y = 34;
|
|
g->z = 56;
|
|
atomic_store_release(&the_gizmo, g);
|
|
|
|
/* Consumer */
|
|
struct gizmo *g = atomic_load_consume(&the_gizmo);
|
|
int y = g->y;
|
|
.Ed
|
|
.Pp
|
|
Here the
|
|
.Em address
|
|
of
|
|
.Li g->y
|
|
depends on the value of the pointer loaded from
|
|
.Li the_gizmo .
|
|
Using
|
|
.Fn atomic_load_consume
|
|
guarantees that we do not witness a stale cache for that address.
|
|
.Pp
|
|
In some cases it may be unclear.
|
|
For example:
|
|
.Bd -literal
|
|
int x[2];
|
|
bool b;
|
|
|
|
/* Producer */
|
|
x[0] = 42;
|
|
atomic_store_release(&b, 0);
|
|
|
|
/* Consumer 1 */
|
|
int y = atomic_load_???(&b) ? x[0] : x[1];
|
|
|
|
/* Consumer 2 */
|
|
int y = x[atomic_load_???(&b) ? 0 : 1];
|
|
|
|
/* Consumer 3 */
|
|
int y = x[atomic_load_???(&b) ^ 1];
|
|
.Ed
|
|
.Pp
|
|
Although the three consumers seem to be equivalent, by the letter of
|
|
C11 consumers\~1 and\~2 require
|
|
.Fn atomic_load_acquire
|
|
because the value determines the address of a subsequent load only via
|
|
control-flow decisions in the
|
|
.Li ?:
|
|
operator, whereas consumer\~3 can use
|
|
.Fn atomic_load_consume .
|
|
However, if you're not sure, you should err on the side of
|
|
.Fn atomic_load_acquire
|
|
until C11 implementations have ironed out the kinks in the semantics.
|
|
.Pp
|
|
On all CPUs other than DEC Alpha,
|
|
.Fn atomic_load_consume
|
|
is cheap \(em it is identical to
|
|
.Fn atomic_load_relaxed .
|
|
In contrast,
|
|
.Fn atomic_load_acquire
|
|
usually implies an expensive memory barrier.
|
|
.Ss SIZE AND ALIGNMENT
|
|
The pointer
|
|
.Fa p
|
|
must be aligned \(em that is, if the object it points to is
|
|
.\"
|
|
2\c
|
|
.ie t \s-2\v'-0.4m'n\v'+0.4m'\s+2
|
|
.el ^n
|
|
.\"
|
|
bytes long, then the low-order
|
|
.Ar n
|
|
bits of
|
|
.Fa p
|
|
must be zero.
|
|
.Pp
|
|
All
|
|
.Nx
|
|
ports support atomic loads and stores on units of data up to 32 bits.
|
|
Some ports additionally support atomic loads and stores on larger
|
|
quantities, like 64-bit quantities, if
|
|
.Dv __HAVE_ATOMIC64_LOADSTORE
|
|
is defined.
|
|
The macros are not allowed on larger quantities of data than the port
|
|
supports atomically; attempts to use them for such quantities should
|
|
result in a compile-time assertion failure.
|
|
.Pp
|
|
For example, as long as you use
|
|
.Fn atomic_store_*
|
|
to write a 32-bit quantity, you can safely use
|
|
.Fn atomic_load_relaxed
|
|
to optimistically read it outside a lock, but for a 64-bit quantity it
|
|
must be conditional on
|
|
.Dv __HAVE_ATOMIC64_LOADSTORE
|
|
\(em otherwise it will lead to compile-time errors on platforms without
|
|
64-bit atomic loads and stores:
|
|
.Bd -literal
|
|
struct foo {
|
|
kmutex_t f_lock;
|
|
uint32_t f_refcnt;
|
|
uint64_t f_ticket;
|
|
};
|
|
|
|
if (atomic_load_relaxed(&foo->f_refcnt) == 0)
|
|
return 123;
|
|
#ifdef __HAVE_ATOMIC64_LOADSTORE
|
|
if (atomic_load_relaxed(&foo->f_ticket) == ticket)
|
|
return 123;
|
|
#endif
|
|
mutex_enter(&foo->f_lock);
|
|
if (foo->f_refcnt == 0 || foo->f_ticket == ticket)
|
|
ret = 123;
|
|
...
|
|
#ifdef __HAVE_ATOMIC64_LOADSTORE
|
|
atomic_store_relaxed(&foo->f_ticket, foo->f_ticket + 1);
|
|
#else
|
|
foo->f_ticket++;
|
|
#endif
|
|
...
|
|
mutex_exit(&foo->f_lock);
|
|
.Ed
|
|
.Sh C11 COMPATIBILITY
|
|
These macros are meant to follow
|
|
.Tn C11
|
|
semantics, in terms of
|
|
.Li atomic_load_explicit()
|
|
and
|
|
.Li atomic_store_explicit()
|
|
with the appropriate memory order specifiers, and are meant to make
|
|
future adoption of the
|
|
.Tn C11
|
|
atomic API easier.
|
|
Eventually it may be mandatory to use the
|
|
.Tn C11
|
|
.Vt _Atomic
|
|
type qualifier or equivalent for the operands.
|
|
.Sh LINUX ANALOGUES
|
|
The Linux kernel provides two macros
|
|
.Li READ_ONCE(x)
|
|
and
|
|
.Li WRITE_ONCE(x,\ v)
|
|
which are similar to
|
|
.Li atomic_load_consume(&x)
|
|
and
|
|
.Li atomic_store_relaxed(&x,\ v) ,
|
|
respectively.
|
|
However, while Linux's
|
|
.Li READ_ONCE
|
|
and
|
|
.Li WRITE_ONCE
|
|
prevent fusing, they may in some cases be torn \(em and therefore fail
|
|
to guarantee atomicity \(em because:
|
|
.Bl -bullet
|
|
.It
|
|
They do not require the address
|
|
.Li "&x"
|
|
to be aligned.
|
|
.It
|
|
They do not require
|
|
.Li sizeof(x)
|
|
to be at most the largest size of available atomic loads and stores on
|
|
the host architecture.
|
|
.El
|
|
.Sh MEMORY BARRIERS AND ATOMIC READ/MODIFY/WRITE
|
|
The atomic read/modify/write operations in
|
|
.Xr atomic_ops 3
|
|
have relaxed ordering by default, but can be combined with the memory
|
|
barriers in
|
|
.Xr membar_ops 3
|
|
for the same effect as an acquire operation and a release operation for
|
|
the purposes of pairing with
|
|
.Fn atomic_store_release
|
|
and
|
|
.Fn atomic_load_acquire
|
|
or
|
|
.Fn atomic_load_consume .
|
|
If
|
|
.Li atomic_r/m/w()
|
|
is an atomic read/modify/write operation in
|
|
.Xr atomic_ops 3 ,
|
|
then
|
|
.Bd -literal
|
|
membar_release();
|
|
atomic_r/m/w(obj, ...);
|
|
.Ed
|
|
.Pp
|
|
functions like a release operation on
|
|
.Va obj ,
|
|
and
|
|
.Bd -literal
|
|
atomic_r/m/w(obj, ...);
|
|
membar_acquire();
|
|
.Ed
|
|
.Pp
|
|
functions like a acquire operation on
|
|
.Va obj .
|
|
.Pp
|
|
On architectures where
|
|
.Dv __HAVE_ATOMIC_AS_MEMBAR
|
|
is defined, all the
|
|
.Xr atomic_ops 3
|
|
imply release and acquire operations, so the
|
|
.Xr membar_acquire 3
|
|
and
|
|
.Xr membar_release 3
|
|
are redundant.
|
|
.Pp
|
|
The combination of
|
|
.Fn atomic_load_relaxed
|
|
and
|
|
.Xr membar_acquire 3
|
|
in that order is equivalent to
|
|
.Fn atomic_load_acquire ,
|
|
and the combination of
|
|
.Xr membar_release 3
|
|
and
|
|
.Fn atomic_store_relaxed
|
|
in that order is equivalent to
|
|
.Fn atomic_store_release .
|
|
.Sh EXAMPLES
|
|
Maintaining lossy counters.
|
|
These may lose some counts, because the read/modify/write cycle as a
|
|
whole is not atomic.
|
|
But this guarantees that the count will increase by at most one each
|
|
time.
|
|
In contrast, without atomic operations, in principle a write to a
|
|
32-bit counter might be torn into multiple smaller stores, which could
|
|
appear to happen out of order from another CPU's perspective, leading
|
|
to nonsensical counter readouts.
|
|
(For frequent events, consider using per-CPU counters instead in
|
|
practice.)
|
|
.Bd -literal
|
|
unsigned count;
|
|
|
|
void
|
|
record_event(void)
|
|
{
|
|
atomic_store_relaxed(&count,
|
|
1 + atomic_load_relaxed(&count));
|
|
}
|
|
|
|
unsigned
|
|
read_event_count(void)
|
|
{
|
|
return atomic_load_relaxed(&count);
|
|
}
|
|
.Ed
|
|
.Pp
|
|
Initialization barrier.
|
|
.Bd -literal
|
|
int ready;
|
|
struct data d;
|
|
|
|
void
|
|
setup_and_notify(void)
|
|
{
|
|
setup_data(&d.things);
|
|
atomic_store_release(&ready, 1);
|
|
}
|
|
|
|
void
|
|
try_if_ready(void)
|
|
{
|
|
if (atomic_load_acquire(&ready))
|
|
do_stuff(d.things);
|
|
}
|
|
.Ed
|
|
.Pp
|
|
Publishing a pointer to the current snapshot of data.
|
|
(Caller must arrange that only one call to
|
|
.Li take_snapshot()
|
|
happens at any
|
|
given time; generally this should be done in coordination with
|
|
.Xr pserialize 9
|
|
or similar to enable resource reclamation.)
|
|
.Bd -literal
|
|
struct data *current_d;
|
|
|
|
void
|
|
take_snapshot(void)
|
|
{
|
|
struct data *d = kmem_alloc(sizeof(*d));
|
|
|
|
d->things = ...;
|
|
|
|
atomic_store_release(¤t_d, d);
|
|
}
|
|
|
|
struct data *
|
|
get_snapshot(void)
|
|
{
|
|
return atomic_load_consume(¤t_d);
|
|
}
|
|
.Ed
|
|
.Sh CODE REFERENCES
|
|
.Pa sys/sys/atomic.h
|
|
.Sh SEE ALSO
|
|
.Xr atomic_ops 3 ,
|
|
.Xr membar_ops 3 ,
|
|
.Xr pserialize 9
|
|
.Sh HISTORY
|
|
These atomic operations first appeared in
|
|
.Nx 9.0 .
|
|
.Sh CAVEATS
|
|
C11 formally specifies that all subexpressions, except the left
|
|
operands of the
|
|
.Ql && ,
|
|
.Ql || ,
|
|
.Ql ?: ,
|
|
and
|
|
.Ql \&,
|
|
operators and the
|
|
.Li kill_dependency()
|
|
macro, carry dependencies for which
|
|
.Dv memory_order_consume
|
|
guarantees ordering, but most or all implementations to date simply
|
|
treat
|
|
.Dv memory_order_consume
|
|
as
|
|
.Dv memory_order_acquire
|
|
and do not take advantage of data dependencies to elide costly memory
|
|
barriers or load-acquire CPU instructions.
|
|
.Pp
|
|
Instead, we implement
|
|
.Fn atomic_load_consume
|
|
as
|
|
.Fn atomic_load_relaxed
|
|
followed by
|
|
.Xr membar_datadep_consumer 3 ,
|
|
which is equivalent to
|
|
.Xr membar_consumer 3
|
|
on DEC Alpha and
|
|
.Xr __insn_barrier 3
|
|
elsewhere.
|
|
.Sh BUGS
|
|
Some idiot decided to call it
|
|
.Em tearing ,
|
|
depriving us of the opportunity to say that atomic operations prevent
|
|
fusion and
|
|
.Em fission .
|