NetBSD/share/man/man9/vnodeops.9
2006-12-23 09:36:56 +00:00

1478 lines
42 KiB
Groff

.\" $NetBSD: vnodeops.9,v 1.60 2006/12/23 09:36:56 wiz Exp $
.\"
.\" Copyright (c) 2001, 2005, 2006 The NetBSD Foundation, Inc.
.\" All rights reserved.
.\"
.\" This code is derived from software contributed to The NetBSD Foundation
.\" by Gregory McGarry.
.\"
.\" Redistribution and use in source and binary forms, with or without
.\" modification, are permitted provided that the following conditions
.\" are met:
.\" 1. Redistributions of source code must retain the above copyright
.\" notice, this list of conditions and the following disclaimer.
.\" 2. Redistributions in binary form must reproduce the above copyright
.\" notice, this list of conditions and the following disclaimer in the
.\" documentation and/or other materials provided with the distribution.
.\" 3. All advertising materials mentioning features or use of this software
.\" must display the following acknowledgement:
.\" This product includes software developed by the NetBSD
.\" Foundation, Inc. and its contributors.
.\" 4. Neither the name of The NetBSD Foundation nor the names of its
.\" contributors may be used to endorse or promote products derived
.\" from this software without specific prior written permission.
.\"
.\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
.\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
.\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
.\" PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
.\" POSSIBILITY OF SUCH DAMAGE.
.\"
.Dd December 9, 2006
.Dt VNODEOPS 9
.Os
.Sh NAME
.Nm vnodeops ,
.Nm VOP_LOOKUP ,
.Nm VOP_CREATE ,
.Nm VOP_MKNOD ,
.Nm VOP_OPEN ,
.Nm VOP_CLOSE ,
.Nm VOP_ACCESS ,
.Nm VOP_GETATTR ,
.Nm VOP_SETATTR ,
.Nm VOP_READ ,
.Nm VOP_WRITE ,
.Nm VOP_IOCTL ,
.Nm VOP_FCNTL ,
.Nm VOP_POLL ,
.Nm VOP_KQFILTER ,
.Nm VOP_REVOKE ,
.Nm VOP_MMAP ,
.Nm VOP_FSYNC ,
.Nm VOP_SEEK ,
.Nm VOP_REMOVE ,
.Nm VOP_LINK ,
.Nm VOP_RENAME ,
.Nm VOP_MKDIR ,
.Nm VOP_RMDIR ,
.Nm VOP_SYMLINK ,
.Nm VOP_READDIR ,
.Nm VOP_READLINK ,
.Nm VOP_ABORTOP ,
.Nm VOP_INACTIVE ,
.Nm VOP_RECLAIM ,
.Nm VOP_LOCK ,
.Nm VOP_UNLOCK ,
.Nm VOP_ISLOCKED ,
.Nm VOP_BMAP ,
.Nm VOP_PRINT ,
.Nm VOP_PATHCONF ,
.Nm VOP_ADVLOCK ,
.Nm VOP_LEASE ,
.Nm VOP_WHITEOUT ,
.Nm VOP_GETPAGES ,
.Nm VOP_PUTPAGES ,
.Nm VOP_STRATEGY ,
.Nm VOP_BWRITE ,
.Nm VOP_GETEXTATTR ,
.Nm VOP_SETEXTATTR ,
.Nm VOP_LISTEXTATTR ,
.Nd vnode operations
.Sh SYNOPSIS
.In sys/param.h
.In sys/buf.h
.In sys/dirent.h
.In sys/lock.h
.In sys/vnode.h
.In sys/mount.h
.In sys/namei.h
.In sys/unistd.h
.In sys/fcntl.h
.In sys/lockf.h
.In sys/extattr.h
.Ft int
.Fn VOP_LOOKUP "struct vnode *dvp" "struct vnode **vpp" \
"struct componentname *cnp"
.Ft int
.Fn VOP_CREATE "struct vnode *dvp" "struct vnode **vpp" \
"struct componentname *cnp" "struct vattr *vap"
.Ft int
.Fn VOP_MKNOD "struct vnode *dvp" "struct vnode **vpp" \
"struct componentname *cnp" "struct vattr *vap"
.Ft int
.Fn VOP_OPEN "struct vnode *vp" "int mode" "kauth_cred_t cred" \
"struct lwp *l"
.Ft int
.Fn VOP_CLOSE "struct vnode *vp" "int fflag" "kauth_cred_t cred" \
"struct lwp *l"
.Ft int
.Fn VOP_ACCESS "struct vnode *vp" "int mode" "kauth_cred_t cred" \
"struct lwp *l"
.Ft int
.Fn VOP_GETATTR "struct vnode *vp" "struct vattr *vap" \
"kauth_cred_t cred" "struct lwp *l"
.Ft int
.Fn VOP_SETATTR "struct vnode *vp" "struct vattr *vap" \
"kauth_cred_t cred" "struct lwp *l"
.Ft int
.Fn VOP_READ "struct vnode *vp" "struct uio *uio" "int ioflag" \
"kauth_cred_t cred"
.Ft int
.Fn VOP_WRITE "struct vnode *vp" "struct uio *uio" "int ioflag" \
"kauth_cred_t cred"
.Ft int
.Fn VOP_IOCTL "struct vnode *vp" "u_long command" "void *data" \
"int fflag" "kauth_cred_t cred" "struct lwp *l"
.Ft int
.Fn VOP_FCNTL "struct vnode *vp" "u_int command" "void *data" \
"int fflag" "kauth_cred_t cred" "struct lwp *l"
.Ft int
.Fn VOP_POLL "struct vnode *vp" "int events" "struct lwp *l"
.Ft int
.Fn VOP_KQFILTER "struct vnode *vp" "struct knote *kn"
.Ft int
.Fn VOP_REVOKE "struct vnode *vp" "int flags"
.Ft int
.Fn VOP_MMAP "struct vnode *vp" "int fflags" \
"kauth_cred_t cred" "struct lwp *l"
.Ft int
.Fn VOP_FSYNC "struct vnode *vp" "kauth_cred_t cred" "int flags" \
"off_t offlo" "off_t offhi" "struct lwp *l"
.Ft int
.Fn VOP_SEEK "struct vnode *vp" "off_t oldoff" "off_t newoff" \
"kauth_cred_t cred"
.Ft int
.Fn VOP_REMOVE "struct vnode *vp" "struct vnode *vp" \
"struct componentname *cnp"
.Ft int
.Fn VOP_LINK "struct vnode *dvp" "struct vnode *vp" \
"struct componentname *cnp"
.Ft int
.Fn VOP_RENAME "struct vnode *fdvp" "struct vnode *fvp" \
"struct componentname *fcnp" "struct vnode *tdvp" \
"struct vnode *tvp" "struct componentname *tcnp"
.Ft int
.Fn VOP_MKDIR "struct vnode *dvp" "struct vnode **vpp" \
"struct componentname *cnp" "struct vattr *vap"
.Ft int
.Fn VOP_RMDIR "struct vnode *dvp" "struct vnode *vp" \
"struct componentname *cnp"
.Ft int
.Fn VOP_SYMLINK "struct vnode *dvp" "struct vnode **vpp" \
"struct componentname *cnp" "struct vattr *vap" "char *target"
.Ft int
.Fn VOP_READDIR "struct vnode *vp" "struct uio *uio" \
"kauth_cred_t cred" "int *eofflag" "off_t **cookies" "int *ncookies"
.Ft int
.Fn VOP_READLINK "struct vnode *vp" "struct uio *uio" "kauth_cred_t cred"
.Ft int
.Fn VOP_ABORTOP "struct vnode *dvp" "struct componentname *cnp"
.Ft int
.Fn VOP_INACTIVE "struct vnode *vp" "struct lwp *l"
.Ft int
.Fn VOP_RECLAIM "struct vnode *vp" "struct lwp *l"
.Ft int
.Fn VOP_LOCK "struct vnode *vp" "int flags"
.Ft int
.Fn VOP_UNLOCK "struct vnode *vp" "int flags"
.Ft int
.Fn VOP_ISLOCKED "struct vnode *vp"
.Ft int
.Fn VOP_BMAP "struct vnode *vp" "daddr_t bn" "struct vnode **vpp" \
"daddr_t *bnp" "int *runp"
.Ft int
.Fn VOP_PRINT "struct vnode *vp"
.Ft int
.Fn VOP_PATHCONF "struct vnode *vp" "int name" "register_t *retval"
.Ft int
.Fn VOP_ADVLOCK "struct vnode *vp" "void *id" "int op" \
"struct flock *fl" "int flags"
.Ft int
.Fn VOP_LEASE "struct vnode *vp" "struct lwp *l" "kauth_cred_t cred" \
"int flag"
.Ft int
.Fn VOP_WHITEOUT "struct vnode *dvp" "struct componentname *cnp" \
"int flags"
.Ft int
.Fn VOP_GETPAGES "struct vnode *vp" "voff_t offset" "struct vm_page **m" \
"int *count" "int centeridx" "vm_prot_t access_type" "int advice" "int flags"
.Ft int
.Fn VOP_PUTPAGES "struct vnode *vp" "voff_t offlo" "voff_t offhi" \
"int flags"
.Ft int
.Fn VOP_STRATEGY "struct vnode *vp" "struct buf *bp"
.Ft int
.Fn VOP_BWRITE "struct buf *bp"
.Ft int
.Fn VOP_GETEXTATTR "struct vnode *vp" "int attrnamespace" "const char *name" \
"struct uio *uio" "size_t *size" "kauth_cred_t cred" "struct lwp *l"
.Ft int
.Fn VOP_SETEXTATTR "struct vnode *vp" "int attrnamespace" "const char *name" \
"struct uio *uio" "kauth_cred_t cred" "struct lwp *l"
.Ft int
.Fn VOP_LISTEXTATTR "struct vnode *vp" "int attrnamespace" "struct uio *uio" \
"size_t *size" "kauth_cred_t cred" "struct lwp *l"
.Pp
Not all header files are required for each function.
.Sh DESCRIPTION
The vnode operations vector describes what operations can be done to
the file associated with the vnode.
The system maintains one vnode operations vector for each file system
type configured into the kernel.
The vnode operations vector contains a pointer to a function for each
operation supported by the file system.
Many of the functions described in the vnode operations vector are
closely related to their corresponding system calls.
In most cases, they are called as a result of the system call
associated with the operation being invoked.
.Pp
Functions in the vnode operations vector are invoked using specialised
macros.
The following table lists the elements of the vnode operations vector,
the corresponding invocation macro, and a description of the element.
.Pp
.nf
.ta \w'int (*vop_listextattr)()'u+2n +\w'VOP_LISTEXTATTR'u+2n +\w'Map file into user address space'u
\fIVector element\fP \fIMacro\fP \fIDescription\fP
.ta \w'int (*vop_listextattr)()'u+2n +\w'VOP_LISTEXTATTR'u+2n +\w'Map file into user address space'u+6nC
.sp 5p
int (*vop_lookup)() VOP_LOOKUP Lookup file name in name cache
int (*vop_create)() VOP_CREATE Create a new file
int (*vop_mknod)() VOP_MKNOD Make a new device
int (*vop_open)() VOP_OPEN Open a file
int (*vop_close)() VOP_CLOSE Close a file
int (*vop_access)() VOP_ACCESS Determine file accessibility
int (*vop_getattr)() VOP_GETATTR Get file attributes
int (*vop_setattr)() VOP_SETATTR Set file attributes
int (*vop_read)() VOP_READ Read from a file
int (*vop_write)() VOP_WRITE Write to a file
int (*vop_ioctl)() VOP_IOCTL Perform device-specific I/O
int (*vop_fcntl)() VOP_FCNTL Perform file control
int (*vop_poll)() VOP_POLL Test if poll event has occurred
int (*vop_kqfilter)() VOP_KQFILTER Register a knote
int (*vop_revoke)() VOP_REVOKE Eliminate vode activity
int (*vop_mmap)() VOP_MMAP Map file into user address space
int (*vop_fsync)() VOP_FSYNC Flush pending data to disk
int (*vop_seek)() VOP_SEEK Test if file is seekable
int (*vop_remove)() VOP_REMOVE Remove a file
int (*vop_link)() VOP_LINK Link a file
int (*vop_rename)() VOP_RENAME Rename a file
int (*vop_mkdir)() VOP_MKDIR Make a new directory
int (*vop_rmdir)() VOP_RMDIR Remove a directory
int (*vop_symlink)() VOP_SYMLINK Create a symbolic link
int (*vop_readdir)() VOP_READDIR Read directory entry
int (*vop_readlink)() VOP_READLINK Read contents of a symlink
int (*vop_abortop)() VOP_ABORTOP Abort pending operation
int (*vop_inactive)() VOP_INACTIVE Release the inactive vnode
int (*vop_reclaim)() VOP_RECLAIM Reclaim vnode for another file
int (*vop_lock)() VOP_LOCK Sleep until vnode lock is free
int (*vop_unlock)() VOP_UNLOCK Wake up process sleeping on lock
int (*vop_islocked)() VOP_ISLOCKED Test if vnode is locked
int (*vop_bmap)() VOP_BMAP Logical block number conversion
int (*vop_print)() VOP_PRINT Print debugging information
int (*vop_pathconf)() VOP_PATHCONF Return POSIX pathconf data
int (*vop_advlock)() VOP_ADVLOCK Advisory record locking
int (*vop_lease)() VOP_LEASE Validate vnode credentials
int (*vop_whiteout)() VOP_WHITEOUT Whiteout vnode
int (*vop_getpages)() VOP_GETPAGES Read VM pages from file
int (*vop_putpages)() VOP_PUTPAGES Write VM pages to file
int (*vop_strategy)() VOP_STRATEGY Read/write a file system buffer
int (*vop_bwrite)() VOP_BWRITE Write a file system buffer
int (*vop_getextattr)() VOP_GETEXTATTR Get extended attribute
int (*vop_setextattr)() VOP_SETEXTATTR Set extended attribute
int (*vop_listextattr)() VOP_LISTEXTATTR List extended attributes
.fi
.Pp
The implementation details of the vnode operations vector are not
quite what is described here.
.Pp
If the file system type does not support a specific operation, it must
nevertheless assign an appropriate function in the vnode operations
vector to do the minimum required of it.
In most cases, such functions either do nothing or return an error
value to the effect that it is not supported.
.Pp
Many of the functions in the vnode operations vector take a
componentname structure.
It is used to encapsulate many parameters into a single function
argument.
It has the following structure:
.Bd -literal
struct componentname {
/*
* Arguments to lookup.
*/
u_long cn_nameiop; /* namei operation */
u_long cn_flags; /* flags to namei */
struct lwp *cn_lwp; /* lwp requesting lookup */
kauth_cred_t cn_cred; /* credentials */
/*
* Shared between lookup and commit routines.
*/
char *cn_pnbuf; /* pathname buffer */
const char *cn_nameptr; /* pointer to looked up name */
long cn_namelen; /* length of looked up component */
u_long cn_hash; /* hash value of looked up name */
long cn_consume; /* chars to consume in lookup() */
};
.Ed
.Pp
The top half of the structure is used exclusively for the pathname
lookups using
.Fn VOP_LOOKUP
and is initialised by the caller.
The semantics of the lookup are affected by the lookup operation
specified in
.Em cn_nameiop
and the flags specified in
.Em cn_flags .
Valid operations are:
.Pp
.Bl -tag -offset indent -width LOOKUP -compact
.It LOOKUP
perform name lookup only
.It CREATE
set up for file creation
.It DELETE
set up for file deletion
.It RENAME
set up for file renaming
.It OPMASK
mask for operation
.El
.Pp
Valid values for
.Em cn-\*[Gt]cn_flags
are:
.Pp
.Bl -tag -offset indent -width LOCKPARENT -compact
.It LOCKLEAF
lock inode on return
.It LOCKPARENT
want parent vnode returned locked
.It NOCACHE
name must not be left in name cache (see
.Xr namecache 9 )
.It FOLLOW
follow symbolic links
.It NOFOLLOW
do not follow symbolic links (pseudo)
.It MODMASK
mask of operational modifiers
.El
.Pp
No vnode operations may be called from interrupt context.
Most operations also require the vnode to be locked on entry.
To prevent deadlocks, when acquiring locks on multiple vnodes, the
lock of parent directory must be acquired before the lock on the child
directory.
.Pp
Vnode operations for a file system type generally should not be
called directly from the kernel, but accessed indirectly through the
high-level convenience functions discussed in
.Xr vnsubr 9 .
.Sh FUNCTIONS
.Bl -tag -width compact
.It Fn VOP_LOOKUP "dvp" "vpp" "cnp"
Lookup a single pathname component in a given directory.
The argument
.Fa dvp
is the locked vnode of the directory to search and
.Fa cnp
is the pathname component to be searched for.
If the pathname component is found, the address of the resulting
locked vnode is returned in
.Fa vpp .
The operation specified in
.Em cnp-\*[Gt]cn_nameiop
gives
.Fn VOP_LOOKUP
hints about the reason for requesting the lookup and uses it to cache
file system type specific information in the vnode for subsequent
operations.
.Pp
There are three types of lookups: ".", ".." (ISDOTDOT), and other.
If the pathname component being searched for is ".", then
.Fa dvp
has an extra reference added to it and it is returned in
.Fa *vpp .
If the pathname component being search for is ".." (ISDOTDOT),
.Fa dvp
is unlocked, the ".." node is locked and then
.Fa dvp
is relocked.
This process preserves the protocol of always locking nodes from root
downward and prevents deadlock.
For other pathname components,
.Fn VOP_LOOKUP
checks the accessibility of the directory and searches the name cache
for the pathname component.
See
.Xr namecache 9 .
If the pathname is not found in the name cache, the directory is
searched for the pathname.
The resulting locked vnode is returned in
.Fa vpp .
.Fa dvp
is always returned locked.
.Pp
On failure
.Fa *vpp
is
.Dv NULL ,
and
.Fa *dvp
is left locked.
If the operation is successful
.Fa *vpp
is locked and zero is returned.
Typically, if
.Fa *vpp
and
.Fa dvp
are the same vnode the caller will need to release twice (decrement
the reference count) and unlock once.
.It Fn VOP_CREATE "dvp" "vpp" "cnp" "vap"
Create a new file in a given directory.
The argument
.Fa dvp
is the locked vnode of the directory to create the new file in and
.Fa cnp
is the pathname component of the new file.
The argument
.Fa vap
specifies the attributes that the new file should be created with.
If the file is successfully created, the address of the resulting
locked vnode is returned in
.Fa vpp
and zero is returned.
Regardless of the return value, the directory vnode
.Fa dvp
will be unlocked on return.
.Pp
This function is called after
.Fn VOP_LOOKUP
when a file is being created.
Normally,
.Fn VOP_LOOKUP
will have set the SAVENAME flag in
.Em cnp-\*[Gt]cn_flags
to keep the memory pointed to by
.Em cnp-\*[Gt]cn_pnbuf
valid.
If an error is detected when creating the file, this memory is
released.
If the file is created successfully it will be released unless the
SAVESTART flags in specified in
.Em cnp-\*[Gt]cn_flags .
.It Fn VOP_MKNOD "dvp" "vpp" "cnp" "vap"
Make a new device-special file in a given directory.
The argument
.Fa dvp
is the locked vnode of the directory to create the new device-special
file in and
.Fa cnp
is the pathname component of the new device-special file.
The argument
.Fa vap
specifies the attributes that the new device-special file should be
created with.
If the file is successfully created, the address of the resulting
locked vnode is returned in
.Fa vpp
and zero is returned.
.Pp
This function is called after
.Fn VOP_LOOKUP
when a device-special file is being created.
Normally,
.Fn VOP_LOOKUP
will have set the SAVENAME flag in
.Em cnp-\*[Gt]cn_flags
to keep the memory pointed to by
.Em cnp-\*[Gt]cn_pnbuf
valid.
If an error is detected when creating the device-special file,
this memory is released.
If the device-special file is created successfully it will be released
unless the SAVESTART flags in specified in
.Em cnp-\*[Gt]cn_flags .
.It Fn VOP_OPEN "vp" "mode" "cred" "l"
Open a file.
The argument
.Fa vp
is the vnode of the file to open and
.Fa mode
specifies the access mode required by the calling process.
The calling process and its credentials are specified by
.Fa l
and
.Fa cred
respectively.
The access mode is a set of flags, including FREAD, FWRITE,
O_NONBLOCK, O_APPEND, etc.
.Fn VOP_OPEN
must be called before a file can be accessed by a thread.
The vnode reference count is incremented.
.Pp
.Fn VOP_OPEN
expects the vnode
.Fa vp
to be locked on entry and will leave it locked on return.
If the operation is successful zero is returned, otherwise an
appropriate error code is returned.
.It Fn VOP_CLOSE "vp" "fflag" "cred" "l"
Close a file.
The argument
.Fa vp
is the vnode of the file to close and
.Fa fflags
specifies the access mode by the calling process.
The calling process and its credentials are specified by
.Fa l
and
.Fa cred
respectively.
.Fn VOP_CLOSE
must be called after a file is finished with.
.Pp
.Fn VOP_CLOSE
expects at least a reference to be associated with the vnode and does
not care whether the vnode is locked.
The lock and reference state is left unchanged on return.
.It Fn VOP_ACCESS "vp" "mode" "cred" "l"
Determine the accessibility (permissions) of the file against the
specified credentials.
The argument
.Fa vp
is the vnode of the file to check,
.Fa mode
is the type of access required,
.Fa cred
contains the user credentials to check and
.Fa l
is the process which is checking the credentials.
The argument
.Fa mode
is a mask which can contain VREAD, VWRITE or VEXEC.
If the file is accessible in the specified way, zero is returned,
otherwise an appropriate error code is returned.
.Pp
The vnode
.Fa vp
will be locked on entry and should remain locked on return.
.It Fn VOP_GETATTR "vp" "vap" "cred" "l"
Get specific vnode attributes on a file.
The argument
.Fa vp
is the vnode of the file to get the attributes for.
The arguments
.Fa l
and
.Fa cred
specifies the calling process and its credentials respectively.
.Fn VOP_GETATTR
uses the file system type specific data object
.Em vp-\*[Gt]v_data
to reference the underlying file attributes.
Attributes associated with the file are collected by setting the
required attribute bits in
.Em vap-\*[Gt]va_mask .
The attributes are returned in
.Fa vap .
Attributes which are not available are set to the value VNOVAL.
.Pp
For more information on vnode attributes see
.Xr vattr 9 .
.It Fn VOP_SETATTR "vp" "vap" "cred" "l"
Set specific vnode attributes on a file.
The argument
.Fa vp
is the locked vnode of the file to set the attributes for.
The arguments
.Fa l
and
.Fa cred
specifies the calling process and its credentials respectively.
.Fn VOP_SETATTR
uses the file system type specific data object
.Em vp-\*[Gt]v_data
to reference the underlying file attributes.
The new attributes are defined in
.Fa vap .
Attributes associated with the file are set by setting the required
attribute bits in
.Em vap-\*[Gt]va_mask .
Attributes which are not being modified by
.Fn VOP_SETATTR
should be set to the value VNOVAL.
If the operation is successful zero is returned, otherwise an
appropriate error is returned.
.Pp
For more information on vnode attributes see
.Xr vattr 9 .
.It Fn VOP_READ "vp" "uio" "ioflag" "cred"
Read the contents of a file.
The argument
.Fa vp
is the vnode of the file to read from,
.Fa uio
is the location to read the data into,
.Fa ioflag
is a set of flags and
.Fa cred
are the credentials of the calling process.
.Pp
The
.Fa ioflag
argument is used to give directives and hints to the file system.
When attempting a read, the high 16 bits are used to provide a
read-ahead hint (in unit of file system blocks) that the file system
should attempt.
The low 16 bits are a bit mask which can contain the following flags:
.Pp
.Bl -tag -offset indent -width IO_ALTSEMANTICS -compact
.It IO_UNIT
do I/O as atomic unit
.It IO_APPEND
append write to end
.It IO_SYNC
sync I/O file integrity completion
.It IO_NODELOCKED
underlying node already locked
.It IO_NDELAY
FNDELAY flag set in file table
.It IO_DSYNC
sync I/O data integrity completion
.It IO_ALTSEMANTICS
use alternate I/O semantics
.It IO_NORMAL
operate on regular data
.It IO_EXT
operate on extended attributes
.It IO_DIRECT
do not buffer data in the kernel
.El
.Pp
Zero is returned on success, otherwise an error is returned.
The vnode should be locked on entry and remains locked on exit.
.It Fn VOP_WRITE "vp" "uio" "ioflag" "cred"
Write to a file.
The argument
.Fa vp
is the vnode of the file to write to,
.Fa uio
is the location of the data to write,
.Fa ioflag
is a set of flags and
.Fa cred
are the credentials of the calling process.
.Pp
The
.Fa ioflag
argument is used to give directives and hints to the file system.
The low 16 bits are a bit mask which can contain the same flags as
.Fn VOP_READ .
.Pp
Zero is returned on success, otherwise an error is returned.
The vnode should be locked on entry and remains locked on exit.
.It Fn VOP_IOCTL "vp" "command" "data" "fflag" "cred" "l"
Perform device-specific I/O.
The argument
.Fa vp
is the locked vnode of the file, normally representing a device.
The argument
.Fa command
specifies the device-specific operation to perform and
.Fa cnp
provides extra data for the specified operation.
The argument
.Fa fflags
is a set of flags.
The argument
.Fa cred
is the caller's credentials and
.Fa l
the calling process.
If the operation is successful, zero is
returned, otherwise an appropriate error code is returned.
.Pp
Most file systems do not supply a function for
.Fn VOP_IOCTL .
This function implements the
.Xr ioctl 2
system call.
.It Fn VOP_FCNTL "vp" "command" "data" "fflag" "cred" "l"
Perform file control.
The argument
.Fa vp
is the locked vnode of the file.
The argument
.Fa command
specifies the operation to perform and
.Fa cnp
provides extra data for the specified operation.
The argument
.Fa fflags
is a set of flags.
The argument
.Fa cred
is the caller's credentials and
.Fa l
the calling process.
If the operation is successful, zero is returned, otherwise an
appropriate error code is returned.
.It Fn VOP_POLL "vp" "events" "l"
Test if a poll event has occurred.
The argument
.Fa vp
is the vnode of the file to poll and
.Fa l
is the calling process.
It returns any events of interest as specified by
.Fa events
that may have occurred for the file.
The argument
.Fa events
is a set of flags as specified by
.Xr poll 2 .
.It Fn VOP_KQFILTER "vp" "kn"
Register a knote
.Fa kn
with the vnode
.Fa vn .
If the operation is successful zero is returned, otherwise an
appropriate error code is returned.
.It Fn VOP_REVOKE "vp" "flags"
Eliminate all activity associated with the vnode
.Fa vp .
The argument
.Fa flags
is a set of flags.
If REVOKEALL is set in
.Fa flags
all vnodes aliased to the vnode
.Fa vp
are also eliminated.
If the operation is successful zero is returned, otherwise an
appropriate error is returned.
.It Fn VOP_MMAP "vp" "fflags" "cred" "l"
Map file into user address space.
The argument
.Fa vp
is the locked vnode of the file to map into an address space.
The argument
.Fa fflags
is a set of flags.
The argument
.Fa cred
is the caller's credentials and
.Fa l
the calling process requesting the map.
If the operation is successful, zero is returned, otherwise an
appropriate error code is returned.
.Pp
Most file systems do not supply a function for
.Fn VOP_MMAP .
This function implements the
.Xr mmap 2
system call.
.It Fn VOP_FSYNC "vp" "cred" "flags" "offlo" "offhi" "l"
Flush pending data buffers for a file to disk.
The argument
.Fa vp
is the locked vnode of the file for flush.
The argument
.Fa cred
is the caller's credentials and
.Fa l
the calling process.
The argument
.Fa flags
is a set of flags.
If FSYNC_WAIT is specified in
.Fa flags ,
the function should wait for I/O to complete before returning.
The argument
.Fa offlo
and
.Fa offhi
specify the range of file to flush.
If the operation is successful zero is returned, otherwise an
appropriate error code is returned.
.Pp
This function implements the
.Xr sync 2
and
.Xr fsync 2
system calls.
.It Fn VOP_SEEK "vp" "oldoff" "newoff" "cred"
Test if the file is seekable for the specified offset
.Fa newoff .
The argument
.Fa vp
is the locked vnode of the file to test.
For most filesystems this function simply tests if
.Fa newoff
is valid.
If the specified
.Fa newoff
is less than zero, the function returns error code EINVAL.
.It Fn VOP_REMOVE "dvp" "vp" "cnp"
Remove a file.
The argument
.Fa dvp
is the locked vnode of the directory to remove the file from and
.Fa vp
is the locked vnode of the file to remove.
The argument
.Fa cnp
is the pathname component about the file to remove.
If the operation is successful zero is returned, otherwise an
appropriate error code is returned.
Both
.Fa dvp
and
.Fa vp
are locked on entry and are to be unlocked before returning.
.It Fn VOP_LINK "dvp" "vp" "cnp"
Link to a file.
The argument
.Fa dvp
is the locked node of the directory to create the new link and
.Fa vp
is the vnode of the file to be linked.
The argument
.Fa cnp
is the pathname component of the new link.
If the operation is successful zero is returned, otherwise an error
code is returned.
The directory vnode
.Fa dvp
should be locked on entry and will be released and unlocked on return.
The vnode
.Fa vp
should not be locked on entry and will remain unlocked on return.
.It Fn VOP_RENAME "fdvp" "fvp" "fcnp" "tdvp" "tvp" "tcnp"
Rename a file.
The argument
.Fa fdvp
is the vnode of the old parent directory containing in the file to be
renamed and
.Fa fvp
is the vnode of the file to be renamed.
The argument
.Fa fcnp
is the pathname component about the file to be renamed.
The argument
.Fa tdvp
is the vnode of the new directory of the target file and
.Fa tvp
is the vnode of the target file (if it exists).
The argument
.Fa tcnp
is the pathname component about the file's new name.
If the operation is successful zero is returned, otherwise and error
code is returned.
.Pp
The source directory and file vnodes should be unlocked and their
reference counts should be incremented before entry.
The target directory and file vnodes should both be locked on entry.
.Fn VOP_RENAME
updates the reference counts prior to returning.
.It Fn VOP_MKDIR "dvp" "vpp" "cnp" "vap"
Make a new directory in a given directory.
The argument
.Fa dvp
is the locked vnode of the directory to create the new directory in and
.Fa cnp
is the pathname component of the new directory.
The argument
.Fa vap
specifies the attributes that the new directory should be created
with.
If the file is successfully created, the address of the resulting
locked vnode is returned in
.Fa vpp
and zero is returned.
.Pp
This function is called after
.Fn VOP_LOOKUP
when a directory is being created.
Normally,
.Fn VOP_LOOKUP
will have set the SAVENAME flag in
.Em cnp-\*[Gt]cn_flags
to keep the memory pointed to by
.Em cnp-\*[Gt]cn_pnbuf
valid.
If an error is detected when creating the directory, this memory is
released.
If the directory is created successfully it will be released unless
the SAVESTART flags in specified in
.Em cnp-\*[Gt]cn_flags .
.It Fn VOP_RMDIR "dvp" "vp" "cnp"
Remove a directory in a given directory.
The argument
.Fa dvp
is the locked vnode of the directory to remove the directory from and
.Fa vp
is the locked vnode of the directory to remove.
The argument
.Fa cnp
is the pathname component of the directory.
Zero is returned on success, otherwise an error code is returned.
Both
.Fa dvp
and
.Fa vp
should be locked on entry and will be released and unlocked on return.
.It Fn VOP_SYMLINK "dvp" "vpp" "cnp" "vap" "target"
Create a symbolic link in a given directory.
The argument
.Fa dvp
is the locked vnode of the directory to create the symbolic link in
and
.Fa cnp
is the pathname component of the symbolic link.
The argument
.Fa vap
specifies the attributes that the symbolic link should be created
with and
.Fa target
specifies the pathname of the target of the symbolic link.
If the symbolic link is successfully created, the address of the
resulting locked vnode is returned in
.Fa vpp
and zero is returned.
.Pp
This function is called after
.Fn VOP_LOOKUP
when a symbolic link is being created.
Normally,
.Fn VOP_LOOKUP
will have set the SAVENAME flag in
.Em cnp-\*[Gt]cn_flags
to keep the memory pointed to by
.Em cnp-\*[Gt]cn_pnbuf
valid.
If an error is detected when creating the symbolic link, this memory
is released.
If the symbolic link is created successfully it will be released
unless the SAVESTART flags in specified in
.Em cnp-\*[Gt]cn_flags .
.It Fn VOP_READDIR "vp" "uio" "cred" "eofflag" "cookies" "ncookies"
Read directory entry.
The argument
.Fa vp
is the vnode of the directory to read the contents of and
.Fa uio
is the destination location to read the contents into.
The argument
.Fa cred
is the caller's credentials.
The argument
.Fa eofflag
is the pointer to a flag which is set by
.Fn VOP_READDIR
to indicate an end-of-file condition.
If
.Fa eofflag
is
.Dv NULL ,
the end-of-file condition is not returned.
The arguments
.Fa cookies
and
.Fa ncookies
specify the addresses for the list and number of directory seek
cookies generated for NFS.
Both
.Fa cookies
and
.Fa ncookies
should be
.Dv NULL
if they aren't required to be returned by
.Fn VOP_READDIR .
The directory contents are read into struct dirent structures.
If the operation is successful zero is returned, otherwise an
appropriate error code is returned.
.Pp
The directory should be locked on entry and will remain locked on
return.
.Pp
If
.Fn VOP_READDIR
is called from the NFS server, the extra arguments
.Fa eofflag ,
.Fa ncookies
and
.Fa cookies
are used.
The value of
.Fa *eofflag
will be set to TRUE if the end of the directory is reached while
reading.
The directory seek cookies are returned to the NFS client and may be
used later to restart a directory read part way through the directory.
There should be one cookie returned per directory entry.
The value of the cookie should be the offset within the directory
where the on-disk version of the appropriate directory entry starts.
.It Fn VOP_READLINK "vp" "uio" "cred"
Read the contents of a symbolic link.
The argument
.Fa vp
is the locked vnode of the symlink and
.Fa uio
is the destination location to read the contents into.
The argument
.Fa cred
is the credentials of the caller.
If the operation is successful zero is returned, otherwise an error
code is returned.
.Pp
The vnode should be locked on entry and will remain locked on return.
.It Fn VOP_ABORTOP "dvp" "cnp"
Abort pending operation on vnode
.Fa dvp .
This operation is rarely implemented in file systems.
.It Fn VOP_INACTIVE "vp" "l"
Release the inactive vnode.
.Fn VOP_INACTIVE
is called when the kernel is no longer using the vnode.
This may be because the reference count reaches zero or it may be that
the file system is being forcibly unmounted while there are open
files.
It can be used to reclaim space for open but deleted files.
The argument
.Fa vp
is the locked vnode to be released.
The argument
.Fa l
is the calling process.
If the operation is successful zero is returned, otherwise an
appropriate error code is returned.
The vnode
.Fa vp
must be locked on entry, and will be unlocked on return.
.It Fn VOP_RECLAIM "vp" "l"
Reclaim the vnode for another file system.
.Fn VOP_RECLAIM
is called when a vnode is being reused for a different file system.
Any file system specific resources associated with the vnode should be
freed.
The argument
.Fa vp
is the vnode to be reclaimed.
The argument
.Fa l
is the calling process.
If the operation is successful zero is returned, otherwise an
appropriate error code is returned.
The vnode
.Fa vp
should not be locked on entry, and will remain unlocked on return.
.It Fn VOP_LOCK "vp" "flags"
Sleep until vnode lock is free.
The argument
.Fa vp
is the vnode of the file to be locked.
The argument
.Fa flags
is a set of
.Xr lockmgr 9
flags.
If the operation is successful zero is returned, otherwise an
appropriate error code is returned.
.Fn VOP_LOCK
is used to serialise access to the file system such as to present two
writes to the same file from happening at the same time.
Kernel code should use
.Xr vn_lock 9
to lock a vnode rather than calling
.Fn VOP_LOCK
directly.
.It Fn VOP_UNLOCK "vp" "flags"
Wake up process sleeping on lock.
The argument
.Fa vp
is the vnode of the file to be unlocked.
The argument
.Fa flags
is a set of
.Xr lockmgr 9
flags.
If the operation is successful zero is returned, otherwise an
appropriate error code is returned.
.Fn VOP_UNLOCK
is used to serialise access to the file system such as to present two
writes to the same file from happening at the same time.
.It Fn VOP_ISLOCKED "vp"
Test if the vnode
.Fa vp
is locked.
A non-zero values is returned if the vnode is not locked, otherwise
zero is returned.
.It Fn VOP_BMAP "vp" "bn" "vpp" "bnp" "runp"
Convert the logical block number
.Fa bn
of a file specified by vnode
.Fa vp
to its physical block number on the disk.
If
.Fa vpp
is not
.Dv NULL ,
the vnode of the device vnode for the file system is
returned in the address specified by
.Fa vpp .
If
.Fa runp
is not
.Dv NULL ,
the maximum blocksize is returned in the address specified by
.Fa runp .
.It Fn VOP_PRINT "vp"
Print debugging information.
The argument
.Fa vp
is the vnode to print.
If the operation is successful zero is returned, otherwise an
appropriate error code is returned.
.It Fn VOP_PATHCONF "vp" "name" "retval"
Implement POSIX
.Xr pathconf 2
and
.Xr fpathconf 2
support.
The argument
.Fa vp
is the locked vnode to get information about.
The argument
.Fa name
specified the type of information to return.
The information is returned in the address specified by
.Fa retval .
Valid values for
.Fa name
are:
.Pp
.Bl -tag -offset indent -width _PC_CHOWN_RESTRICTED -compact
.It _PC_LINK_MAX
return the maximum number of links to a file
.It _PC_NAME_MAX
return the maximum number of bytes in a file name
.It _PC_PATH_MAX
return the maximum number of bytes in a pathname
.It _PC_PIPE_BUF
return the maximum number of bytes which will be written atomically to
a pipe
.It _PC_CHOWN_RESTRICTED
return 1 if appropriate privileges are required for the
.Xr chown 2
system call, otherwise zero
.It _PC_NO_TRUNC
return if file names longer than KERN_NAME_MAX are truncated
.El
.Pp
If
.Fa name
is recognised,
.Fa *retval
is set to the specified value and zero is returned, otherwise an
appropriate error is returned.
.It Fn VOP_ADVLOCK "vp" "id" "op" "fl" "flags"
Manipulate Advisory record locks on a vnode.
The argument
.Fa vp
is the vnode on which locks are manipulated.
The argument
.Fa id
is the id token which is changing the lock and
.Fa op
is the
.Xr fcntl 2
operation to perform.
Valid values are:
.Pp
.Bl -tag -offset indent -width F_UNLCK -compact
.It F_SETLK
set lock
.It F_GETLK
get the first conflicted lock
.It F_UNLCK
clear lock
.El
.Pp
The argument
.Fa fl
is a description of the lock.
In the case of
.Dv SEEK_CUR ,
The caller should add the current file offset to
fl-\*[Gt]l_start beforehand.
.Fn VOP_ADVLOCK
treats
.Dv SEEK_CUR
as
.Dv SEEK_SET .
.Pp
The argument
.Fa flags
is the set of flags.
Valid values are:
.Pp
.Bl -tag -offset indent -width F_FLOCK -compact
.It F_WAIT
wait until lock is granted
.It F_FLOCK
use
.Xr flock 2
semantics for lock
.It F_POSIX
use POSIX semantics for lock
.El
.Pp
If the operation is successful zero is returned, otherwise an
appropriate error is returned.
.It Fn VOP_LEASE "vp" "l" "cred" "flags"
Validate vnode credentials and operation type.
The argument
.Fa vp
is the locked vnode of the file to validate credentials
.Fa cred .
The argument
.Fa l
specifies the calling process and
.Fa flags
specifies the operation flags.
If the operation is successful zero is returned, otherwise an
appropriate error code is returned.
The vnode must be locked on entry and remains locked on return.
.It Fn VOP_WHITEOUT "dvp" "cnp" "flags"
Whiteout pathname component in directory with vnode
.Fa dvp .
The argument
.Fa cnp
specifies the pathname component to whiteout.
.It Fn VOP_GETPAGES "vp" "offset" "m" "count" "centeridx" "access_type" "advice" "flags"
Read VM pages from file.
The argument
.Fa vp
is the locked vnode to read the VM pages from.
The argument
.Fa offset
is offset in the file to start accessing and
.Fa m
is an array of VM pages.
The argument
.Fa count
points a variable that specifies the number of pages to read.
If the operation is successful zero is returned, otherwise an
appropriate error code is returned.
If PGO_LOCKED is specified in
.Em flags ,
.Fn VOP_GETPAGES
might return less pages than requested.
In that case, the variable pointed to by
.Em count
will be updated.
.Pp
This function is primarily used by the page-fault handing mechanism.
.It Fn VOP_PUTPAGES "vp" "offlo" "offhi" "flags"
Write modified (dirty) VM pages to file.
The argument
.Fa vp
is the vnode to write the VM pages to.
The vnode's vm object lock
.Va ( v_uobj.vmobjlock )
must be held by the caller and will be released upon return.
The arguments
.Fa offlo
and
.Fa offhi
specify the range of VM pages to write.
In case
.Fa offhi
is given as 0, all pages at and after the start offset
.Fa offlo
belonging the vnode
.Fa vp
will be written.
The argument
.Fa flags
controls the behaviour of the routine and takes the vm pager's
flags
.Dv ( PGO_ -prefixed).
If the operation is successful zero is returned, otherwise an
appropriate error code is returned.
.Pp
The function is primarily used by the pageout handling mechanism and
is commonly implemented indirectly
by
.Fn genfs_putpages
with the help of
.Fn VOP_STRATEGY
and
.Fn VOP_BMAP .
.It Fn VOP_STRATEGY "vp" "bp"
Read/write a file system buffer.
The argument
.Fa vp
is the vnode to read/write to.
The argument
.Fa bp
is the buffer to be read or written.
.Fn VOP_STRATEGY
will either read or write data to the file depending on the value of
.Em bp-\*[Gt]b_flags .
If the operation is successful zero is returned, otherwise an
appropriate error code is returned.
.It Fn VOP_BWRITE "bp"
Write a file system buffer.
The argument
.Fa bp
specifies the buffer to be written.
If the operation is successful zero is returned, otherwise an
appropriate error code is returned.
.It Fn VOP_GETEXTATTR "vp" "attrnamespace" "name" "uio" "size" "cred" "l"
Get an extended attribute.
The argument
.Fa vp
is the locked vnode of the file or directory from which to retrieve the
attribute.
The argument
.Fa attrnamespace
specifies the extended attribute namespace.
The argument
.Fa name
is a nul-terminated character string naming the attribute to retrieve.
The argument
.Fa uio ,
if not
.Dv NULL ,
specifies where the extended attribute value is to be written.
The argument
.Fa size ,
if not
.Dv NULL ,
will contain the number of bytes required to read all of
the attribute data upon return.
In most cases,
.Fa uio
will be
.Dv NULL
when
.Fa size
is not, and vice versa.
The argument
.Fa cred
specifies the user credentials to use when authorizing the request.
The argument
.Fa l
specifies the process requesting the extended attribute.
.It Fn VOP_SETEXTATTR "vp" "attrnamespace" "name" "uio" "cred" "l"
Set an extended attribute.
The argument
.Fa vp
is the locked vnode of the file or directory to which to store the
attribute.
The argument
.Fa namespace
specifies the extended attribute namespace.
The argument
.Fa name
is a nul-terminated character string naming the attribute to store.
The argument
.Fa uio
specifies the source of the extended attribute data.
The argument
.Fa cred
specifies the user credentials to use when authorizing the request.
The argument
.Fa l
specifies the process setting the extended attribute.
.It Fn VOP_LISTEXTATTR "vp" "attrnamespace" "uio" "size" "cred" "l"
Retrieve the list of extended attributes.
The argument
.Fa vp
is the locked vnode of the file or directory whose attributes are to be listed.
The argument
.Fa attrnamespace
specifies the extended attribute namespace.
The argument
.Fa uio ,
if not
.Dv NULL ,
specifies where the extended attribute list is to be written.
The argument
.Fa size ,
if not
.Dv NULL ,
will contain the number of bytes required to read all of
the attribute names upon return.
In most cases,
.Fa uio
will be
.Dv NULL
when
.Fa size
is not, and vice versa.
The argument
.Fa cred
specifies the user credentials to use when authorizing the request.
The argument
.Fa l
specifies the process requesting the extended attribute list.
.El
.Sh ERRORS
.Bl -tag -width Er
.It Bq Er ENOATTR
The requested attribute is not defined for this vnode.
.It Bq Er ENOTDIR
The vnode does not represent a directory.
.It Bq Er ENOENT
The component was not found in the directory.
.It Bq Er ENOSPC
The file system is full.
.It Bq Er EDQUOT
Quota exceeded.
.It Bq Er EACCES
Access for the specified operation is denied.
.It Bq Er EJUSTRETURN
A CREATE or RENAME operation would be successful.
.It Bq Er EPERM
an attempt was made to change an immutable file
.It Bq Er ENOTEMPTY
attempt to remove a directory which is not empty
.It Bq Er EINVAL
attempt to read from an illegal offset in the directory; unrecognised
input
.It Bq Er EIO
a read error occurred while reading the directory or reading the
contents of a symbolic link
.It Bq Er EROFS
the filesystem is read-only
.El
.Sh SEE ALSO
.Xr extattr 9 ,
.Xr intro 9 ,
.Xr lock 9 ,
.Xr namei 9 ,
.Xr vattr 9 ,
.Xr vfs 9 ,
.Xr vfsops 9 ,
.Xr vnode 9
.Sh HISTORY
The vnode operations vector, its functions and the corresponding
macros appeared in
.Bx 4.3 .