New man page bufferio(9) for I/O transfer view of struct buf.
Move biowait/biodone from buffercache(9) man page.
This commit is contained in:
parent
5a1645e4da
commit
1098217eb7
@ -1,4 +1,4 @@
|
||||
.\" $NetBSD: buffercache.9,v 1.27 2015/03/28 19:24:04 maxv Exp $
|
||||
.\" $NetBSD: buffercache.9,v 1.28 2015/03/29 20:56:01 riastradh Exp $
|
||||
.\"
|
||||
.\" Copyright (c)2003 YAMAMOTO Takashi,
|
||||
.\" All rights reserved.
|
||||
@ -115,9 +115,7 @@
|
||||
.Nm geteblk ,
|
||||
.Nm incore ,
|
||||
.Nm allocbuf ,
|
||||
.Nm brelse ,
|
||||
.Nm biodone ,
|
||||
.Nm biowait
|
||||
.Nm brelse
|
||||
.Nd buffer cache interfaces
|
||||
.\" ------------------------------------------------------------
|
||||
.Sh SYNOPSIS
|
||||
@ -146,10 +144,6 @@
|
||||
.Fn allocbuf "buf_t *bp" "int size" "int preserve"
|
||||
.Ft void
|
||||
.Fn brelse "buf_t *bp" "int set"
|
||||
.Ft void
|
||||
.Fn biodone "buf_t *bp"
|
||||
.Ft int
|
||||
.Fn biowait "buf_t *bp"
|
||||
.\" ------------------------------------------------------------
|
||||
.Sh DESCRIPTION
|
||||
The
|
||||
@ -319,18 +313,6 @@ the buffer's additional contents.
|
||||
.\" - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
||||
.It Fn brelse "bp" "set"
|
||||
Unbusy a buffer and release it to the free lists.
|
||||
.\" - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
||||
.It Fn biodone "bp"
|
||||
Mark I/O complete on a buffer.
|
||||
If a callback has been requested by
|
||||
.Dv B_CALL ,
|
||||
do so.
|
||||
Otherwise, wakeup waiters.
|
||||
.\" - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
||||
.It Fn biowait "bp"
|
||||
Wait for operations on the buffer to complete.
|
||||
When they do, extract and return the I/O's error value.
|
||||
.El
|
||||
.\" ------------------------------------------------------------
|
||||
.Sh CODE REFERENCES
|
||||
The buffer cache subsystem is implemented within the file
|
||||
@ -359,11 +341,3 @@ In the current implementation,
|
||||
.Fn bread
|
||||
and its variants
|
||||
don't use a specified credential.
|
||||
.Pp
|
||||
Because
|
||||
.Fn biodone
|
||||
and
|
||||
.Fn biowait
|
||||
do not really belong to
|
||||
.Nm ,
|
||||
they shouldn't be documented here.
|
||||
|
390
share/man/man9/bufferio.9
Normal file
390
share/man/man9/bufferio.9
Normal file
@ -0,0 +1,390 @@
|
||||
.\" $NetBSD: bufferio.9,v 1.1 2015/03/29 20:56:01 riastradh Exp $
|
||||
.\"
|
||||
.\" Copyright (c) 2015 The NetBSD Foundation, Inc.
|
||||
.\" All rights reserved.
|
||||
.\"
|
||||
.\" This code is derived from software contributed to The NetBSD Foundation
|
||||
.\" by Taylor R. Campbell.
|
||||
.\"
|
||||
.\" Redistribution and use in source and binary forms, with or without
|
||||
.\" modification, are permitted provided that the following conditions
|
||||
.\" are met:
|
||||
.\" 1. Redistributions of source code must retain the above copyright
|
||||
.\" notice, this list of conditions and the following disclaimer.
|
||||
.\" 2. Redistributions in binary form must reproduce the above copyright
|
||||
.\" notice, this list of conditions and the following disclaimer in the
|
||||
.\" documentation and/or other materials provided with the distribution.
|
||||
.\"
|
||||
.\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
|
||||
.\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
.\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
.\" PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
|
||||
.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
.\" POSSIBILITY OF SUCH DAMAGE.
|
||||
.\"
|
||||
.Dd March 29, 2015
|
||||
.Dt BUFFERIO 9
|
||||
.Os
|
||||
.Sh NAME
|
||||
.Nm BUFFERIO ,
|
||||
.Nm biodone ,
|
||||
.Nm biowait ,
|
||||
.Nm getiobuf ,
|
||||
.Nm putiobuf ,
|
||||
.Nm nestiobuf_setup ,
|
||||
.Nm nestiobuf_done
|
||||
.Nd block I/O buffer transfers
|
||||
.Sh SYNOPSIS
|
||||
.In sys/buf.h
|
||||
.Ft void
|
||||
.Fn biodone "struct buf *bp"
|
||||
.Ft int
|
||||
.Fn biowait "struct buf *bp"
|
||||
.Ft struct buf *
|
||||
.Fn getiobuf "struct vnode *vp" "bool waitok"
|
||||
.Ft void
|
||||
.Fn putiobuf "struct buf *bp"
|
||||
.Ft void
|
||||
.Fn nestiobuf_setup "struct buf *mbp" "struct buf *bp" "int offset" \
|
||||
"size_t size"
|
||||
.Ft void
|
||||
.Fn nestiobuf_done "struct buf *mbp" "int donebytes" "int error"
|
||||
.Sh DESCRIPTION
|
||||
The
|
||||
.Nm
|
||||
subsystem manages block I/O buffer transfers, described by the
|
||||
.Vt "struct buf"
|
||||
structure, which serves multiple purposes between users in
|
||||
.Nm ,
|
||||
users in
|
||||
.Xr buffercache 9 ,
|
||||
and users in block device drivers to execute transfers to physical
|
||||
disks.
|
||||
.Sh BLOCK DEVICE USERS
|
||||
Users of
|
||||
.Nm
|
||||
wishing to submit a buffer for block I/O transfer must obtain a
|
||||
.Vt "struct buf" ,
|
||||
e.g. via
|
||||
.Fn getiobuf ,
|
||||
fill its parameters, and submit it to a block device with
|
||||
.Xr bdev_strategy 9 ,
|
||||
usually via
|
||||
.Xr VOP_STRATEGY 9 .
|
||||
.Pp
|
||||
The parameters to an I/O transfer described by
|
||||
.Fa bp
|
||||
are specified by the following
|
||||
.Vt "struct buf"
|
||||
fields:
|
||||
.Bl -tag -offset abcd
|
||||
.It Fa bp Ns Li "->b_flags"
|
||||
Flags specifying the type of transfer.
|
||||
.Bl -tag -compact
|
||||
.It Dv B_READ
|
||||
Transfer is read from device.
|
||||
If not set, transfer is write to device.
|
||||
.It Dv B_ASYNC
|
||||
Asynchronous I/O.
|
||||
Caller must provide
|
||||
.Fa bp Ns Li "->b_iodone"
|
||||
and must not call
|
||||
.Fn biowait bp .
|
||||
.El
|
||||
.It Fa bp Ns Li "->b_data"
|
||||
Pointer to kernel virtual address of source/target for transfer.
|
||||
.It Fa bp Ns Li "->b_bcount"
|
||||
Nonnegative number of bytes requested for transfer.
|
||||
.It Fa bp Ns Li "->b_blkno"
|
||||
Block number at which to do transfer.
|
||||
.It Fa bp Ns Li "->b_iodone"
|
||||
If
|
||||
.Dv B_ASYNC
|
||||
is set in
|
||||
.Fa bp Ns Li "->b_flags" ,
|
||||
an I/O completion callback.
|
||||
.El
|
||||
.Pp
|
||||
Additionally, if the I/O transfer is a write associated with a
|
||||
.Xr vnode 9
|
||||
.Fa vp ,
|
||||
then before the user submits it to a block device, the user must
|
||||
increment
|
||||
.Fa vp Ns Li "->v_numoutput" .
|
||||
The user must not acquire
|
||||
.Fa vp Ns No 's
|
||||
vnode lock between incrementing
|
||||
.Fa vp Ns Li "->v_numoutput"
|
||||
and submitting
|
||||
.Fa bp
|
||||
to a block device -- doing so will likely cause deadlock with the
|
||||
syncer.
|
||||
.Pp
|
||||
Block I/O transfers may be synchronous or asynchronous:
|
||||
.Bl -dash
|
||||
.It
|
||||
If synchronous, after submitting the transfer to a block device, the
|
||||
user must call
|
||||
.Fn biowait bp
|
||||
in order to wait until the transfer has completed.
|
||||
.It
|
||||
If asynchronous, the user must set
|
||||
.Dv B_ASYNC
|
||||
in
|
||||
.Fa bp Ns Li "->b_flags"
|
||||
and provide
|
||||
.Fa bp Ns Li "->b_iodone" .
|
||||
After submitting the transfer to a block device,
|
||||
.Fa bp Ns Li "->b_iodone"
|
||||
will eventually be called with
|
||||
.Fa bp
|
||||
as its argument when the transfer has completed.
|
||||
The user
|
||||
.Em may not
|
||||
call
|
||||
.Fn biowait bp
|
||||
in this case.
|
||||
.El
|
||||
.Sh NESTED I/O TRANSFERS
|
||||
Sometimes an I/O transfer from a single buffer in memory cannot go to a
|
||||
single location on a block device: it must be split up into smaller
|
||||
transfers for each segment of the memory buffer.
|
||||
.Pp
|
||||
After initializing the
|
||||
.Li b_flags ,
|
||||
.Li b_data ,
|
||||
and
|
||||
.Li b_bcount
|
||||
parameters of an I/O transfer for the buffer, called the
|
||||
.Em master
|
||||
buffer, the user can issue smaller transfers for segments of the buffer
|
||||
using
|
||||
.Fn nestiobuf_setup .
|
||||
The nested I/O transfers are asynchronous -- when they complete, they
|
||||
debit from the amount of work left to be done in the master buffer.
|
||||
If any segments of the buffer were skipped, the user can report this
|
||||
with
|
||||
.Fn nestiobuf_done
|
||||
to debit the skipped part of the work.
|
||||
.Pp
|
||||
The master buffer's I/O transfer is completed when all nested buffers'
|
||||
I/O transfers are completed, and if
|
||||
.Fn nestiobuf_done
|
||||
is called in the case of skipped segments.
|
||||
.Pp
|
||||
For writes associated with a vnode
|
||||
.Fa vp ,
|
||||
.Fn nestiobuf_setup
|
||||
accounts for
|
||||
.Fa vp Ns Li "->v_numoutput" ,
|
||||
so the caller is not allowed to acquire
|
||||
.Fa vp Ns No 's
|
||||
vnode lock before submitting the nested I/O transfer to a block
|
||||
device.
|
||||
However, the caller is responsible for accounting the master buffer in
|
||||
.Fa vp Ns Li "->v_numoutput" .
|
||||
This must be done very carefully because after incrementing
|
||||
.Fa vp Ns Li "->v_numoutput" ,
|
||||
the caller is not allowed to acquire
|
||||
.Fa vp Ns No 's
|
||||
vnode lock before either calling
|
||||
.Fn nestiobuf_done
|
||||
or submitting the last nested I/O transfer to a block device.
|
||||
.Pp
|
||||
For example:
|
||||
.Bd -literal -offset abcd
|
||||
\&struct buf *mbp, *bp;
|
||||
\&size_t skipped = 0;
|
||||
\&unsigned i;
|
||||
\&int error = 0;
|
||||
\&
|
||||
\&mbp = getiobuf(vp, true);
|
||||
\&mbp->b_data = data;
|
||||
\&mbp->b_resid = mbp->b_bcount = datalen;
|
||||
\&mbp->b_flags = B_WRITE;
|
||||
\&
|
||||
\&KASSERT(i < nsegs);
|
||||
\&for (i = 0; i < nsegs; i++) {
|
||||
\& struct vnode *devvp;
|
||||
\& daddr_t blkno;
|
||||
\&
|
||||
\& vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
|
||||
\& error = VOP_BMAP(vp, i*segsz, &devvp, &blkno, NULL);
|
||||
\& VOP_UNLOCK(vp);
|
||||
\& if (error == 0 && nbn == -1)
|
||||
\& error = EIO;
|
||||
\& if (error) {
|
||||
\& skipped += segsz;
|
||||
\& break;
|
||||
\& }
|
||||
\&
|
||||
\& bp = getiobuf(vp, true);
|
||||
\& nestiobuf_setup(bp, mbp, i*segsz, segsz);
|
||||
\& bp->b_blkno = blkno;
|
||||
\& if (i == nsegs - 1) /* Last segment. */
|
||||
\& break;
|
||||
\& VOP_STRATEGY(devvp, bp);
|
||||
\&}
|
||||
\&
|
||||
\&/*
|
||||
\& * Account v_numoutput for master write.
|
||||
\& * (Must not vn_lock before last VOP_STRATEGY!)
|
||||
\& */
|
||||
\&mutex_enter(&vp->v_interlock);
|
||||
\&vp->v_numoutput++;
|
||||
\&mutex_exit(&vp->v_interlock);
|
||||
\&
|
||||
\&if (skipped)
|
||||
\& nestiobuf_done(mbp, skipped, error);
|
||||
\&else
|
||||
\& VOP_STRATEGY(devvp, bp);
|
||||
.Ed
|
||||
.Sh BLOCK DEVICE DRIVERS
|
||||
Block device drivers implement a
|
||||
.Sq strategy
|
||||
method, in the
|
||||
.Li d_strategy
|
||||
member of
|
||||
.Li struct bdevsw
|
||||
.Pq Xr driver 9 ,
|
||||
to queue a buffer for disk I/O.
|
||||
The inputs to the strategy method are:
|
||||
.Bl -tag -offset abcd
|
||||
.It Fa bp Ns Li "->b_flags"
|
||||
Flags specifying the type of transfer.
|
||||
.Bl -tag -compact
|
||||
.It Dv B_READ
|
||||
Transfer is read from device.
|
||||
If not set, transfer is write to device.
|
||||
.El
|
||||
.It Fa bp Ns Li "->b_data"
|
||||
Pointer to kernel virtual address of source/target for transfer.
|
||||
.It Fa bp Ns Li "->b_bcount"
|
||||
Nonnegative number of bytes requested for transfer.
|
||||
.It Fa bp Ns Li "->b_blkno"
|
||||
Block number at which to do transfer, relative to partition start.
|
||||
.El
|
||||
.Pp
|
||||
If the strategy method uses
|
||||
.Xr bufq 9 ,
|
||||
it must additionally initialize the following fields before queueing
|
||||
.Fa bp
|
||||
with
|
||||
.Xr bufq_put 9 :
|
||||
.Bl -tag -offset abcd
|
||||
.It Fa bp Ns Li "->b_rawblkno"
|
||||
Block number relative to volume start.
|
||||
.El
|
||||
.Pp
|
||||
When the I/O transfer is complete, whether it succeeded or failed, the
|
||||
strategy method must:
|
||||
.Bl -dash
|
||||
.It
|
||||
Set
|
||||
.Fa bp Ns Li "->b_error"
|
||||
to zero on success, or to an
|
||||
.Xr errno 2
|
||||
error code on failure.
|
||||
.It
|
||||
Set
|
||||
.Fa bp Ns Li "->b_resid"
|
||||
to the number of bytes remaining to transfer, whether on success or
|
||||
on failure.
|
||||
If no bytes are transferred, this must be set to
|
||||
.Fa bp Ns Li "->b_bcount" .
|
||||
.It
|
||||
Call
|
||||
.Li "biodone(" Ns Fa bp Ns Li ")" .
|
||||
.El
|
||||
.Sh FUNCTIONS
|
||||
.Bl -tag -width abcd
|
||||
.It Fn biodone bp
|
||||
Notify that the I/O transfer described by
|
||||
.Fa bp
|
||||
has completed.
|
||||
.Pp
|
||||
To be called by a block device driver.
|
||||
Caller must first set
|
||||
.Fa bp Ns Li "->b_error"
|
||||
to an error code and
|
||||
.Fa bp Ns Li "->b_resid"
|
||||
to the number of bytes remaining to transfer.
|
||||
.It Fn biowait bp
|
||||
Wait for the synchronous I/O transfer described by
|
||||
.Fa bp
|
||||
to complete.
|
||||
Returns the value of
|
||||
.Fa bp Ns Li "->b_error" .
|
||||
.Pp
|
||||
To be called by a user requesting the I/O transfer.
|
||||
.Pp
|
||||
May not be called if
|
||||
.Fa bp
|
||||
represents an asynchronous transfer, i.e. if
|
||||
.Dv B_ASYNC
|
||||
is set in
|
||||
.Fa bp Ns Li "->b_flags" .
|
||||
.It Fn getiobuf vp waitok
|
||||
Allocate a
|
||||
.Fa struct buf
|
||||
for an I/O transfer.
|
||||
If
|
||||
.Fa vp
|
||||
is nonnull, the transfer is associated with it.
|
||||
If
|
||||
.Fa waitok
|
||||
is false,
|
||||
returns null if none can be allocated immediately.
|
||||
.Pp
|
||||
May sleep if
|
||||
.Fa waitok
|
||||
is true.
|
||||
.It Fn putiobuf bp
|
||||
Free
|
||||
.Fa bp ,
|
||||
which must have been allocated by
|
||||
.Fn getiobuf .
|
||||
Either
|
||||
.Fa bp
|
||||
must never have been submitted to a block device, or the I/O transfer
|
||||
must have completed.
|
||||
.El
|
||||
.Sh CODE REFERENCES
|
||||
The
|
||||
.Nm
|
||||
subsystem is implemented in
|
||||
.Pa sys/kern/vfs_bio.c .
|
||||
.Sh SEE ALSO
|
||||
.Xr buffercache 9 ,
|
||||
.Xr bufq 9
|
||||
.Sh BUGS
|
||||
The
|
||||
.Nm
|
||||
abstraction provides no way to cancel an I/O transfers once it has been
|
||||
submitted to a block device.
|
||||
.Pp
|
||||
The
|
||||
.Nm
|
||||
abstraction provides no way to do I/O transfers with non-kernel pages,
|
||||
e.g. directly to buffers in userland without copying into the kernel
|
||||
first.
|
||||
.Pp
|
||||
The
|
||||
.Vt "struct buf"
|
||||
type is all mixed up with the
|
||||
.Xr buffercache 9 .
|
||||
.Pp
|
||||
The
|
||||
.Nm
|
||||
abstraction is a totally idiotic API design.
|
||||
.Pp
|
||||
The
|
||||
.Li v_numoutput
|
||||
accounting required of
|
||||
.Nm
|
||||
callers is asinine.
|
Loading…
Reference in New Issue
Block a user