7a5be5a9ff
doing copy-on-write. - Change VFS_SNAPSHOT() to return the snapshot vnode locked. - Make the IO path for copy-on-write and snapshot-read more lightweight. Avoids deadlocks where vn_rdwr(...READ...) has a shared lock and needs to copy-on-write. Avoids deadlocks/panics where to clean pages the copy-on-write needs to allocate pages for its VOP_PUTPAGES(). L_COWINPROGRESS part approved by: Jason R. Thorpe <thorpej@netbsd.org>
387 lines
13 KiB
Groff
387 lines
13 KiB
Groff
.\" $NetBSD: vfsops.9,v 1.17 2004/06/20 18:55:58 hannken Exp $
|
|
.\"
|
|
.\" Copyright (c) 2001 The NetBSD Foundation, Inc.
|
|
.\" All rights reserved.
|
|
.\"
|
|
.\" This code is derived from software contributed to The NetBSD Foundation
|
|
.\" by Gregory McGarry.
|
|
.\"
|
|
.\" Redistribution and use in source and binary forms, with or without
|
|
.\" modification, are permitted provided that the following conditions
|
|
.\" are met:
|
|
.\" 1. Redistributions of source code must retain the above copyright
|
|
.\" notice, this list of conditions and the following disclaimer.
|
|
.\" 2. Redistributions in binary form must reproduce the above copyright
|
|
.\" notice, this list of conditions and the following disclaimer in the
|
|
.\" documentation and/or other materials provided with the distribution.
|
|
.\" 3. All advertising materials mentioning features or use of this software
|
|
.\" must display the following acknowledgement:
|
|
.\" This product includes software developed by the NetBSD
|
|
.\" Foundation, Inc. and its contributors.
|
|
.\" 4. Neither the name of The NetBSD Foundation nor the names of its
|
|
.\" contributors may be used to endorse or promote products derived
|
|
.\" from this software without specific prior written permission.
|
|
.\"
|
|
.\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
|
|
.\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
|
.\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
.\" PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
|
|
.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
.\" POSSIBILITY OF SUCH DAMAGE.
|
|
.\"
|
|
.Dd June 20, 2004
|
|
.Dt VFSOPS 9
|
|
.Os
|
|
.Sh NAME
|
|
.Nm vfsops ,
|
|
.Nm VFS_MOUNT ,
|
|
.Nm VFS_START ,
|
|
.Nm VFS_UNMOUNT ,
|
|
.Nm VFS_ROOT ,
|
|
.Nm VFS_QUOTACTL ,
|
|
.Nm VFS_STATFS ,
|
|
.Nm VFS_STATFS ,
|
|
.Nm VFS_SYNC ,
|
|
.Nm VFS_VGET ,
|
|
.Nm VFS_FHTOVP ,
|
|
.Nm VFS_VPTOFH ,
|
|
.Nm VFS_CHECKEXP ,
|
|
.Nm VFS_SNAPSHOT
|
|
.Nd kernel file system interface
|
|
.Sh SYNOPSIS
|
|
.In sys/param.h
|
|
.In sys/mount.h
|
|
.In sys/vnode.h
|
|
.Ft int
|
|
.Fn VFS_MOUNT "struct mount *mp" "const char *path" "void *data" \
|
|
"struct nameidata *ndp" "struct proc *p"
|
|
.Ft int
|
|
.Fn VFS_START "struct mount *mp" "int flags" "struct proc *p"
|
|
.Ft int
|
|
.Fn VFS_UNMOUNT "struct mount *mp" "int mntflags" "struct proc *p"
|
|
.Ft int
|
|
.Fn VFS_ROOT "struct mount *mp" "struct vnode **vpp"
|
|
.Ft int
|
|
.Fn VFS_QUOTACTL "struct mount *mp" "int cmds" "uid_t uid" \
|
|
"caddr_t arg" "struct proc *p"
|
|
.Ft int
|
|
.Fn VFS_STATFS "struct mount *mp" "struct statfs *sbp" "struct proc *p"
|
|
.Ft int
|
|
.Fn VFS_SYNC "struct mount *mp" "int waitfor" "struct ucred *cred" \
|
|
"struct proc *p"
|
|
.Ft int
|
|
.Fn VFS_VGET "struct mount *mp" "ino_t ino" "struct vnode **vpp"
|
|
.Ft int
|
|
.Fn VFS_FHTOVP "struct mount *mp" "struct fid *fhp" "struct vnode **vpp"
|
|
.Ft int
|
|
.Fn VFS_VPTOFH "struct vnode *vp" "struct fid *fhp"
|
|
.Ft int
|
|
.Fn VFS_CHECKEXP "struct mount *mp" "struct mbuf *nam" \
|
|
"int *extflagsp" "struct ucred **credanonp"
|
|
.Ft int
|
|
.Fn VFS_SNAPSHOT "struct mount *mp" "struct vnode *vp" "struct timespec *ts"
|
|
.Sh DESCRIPTION
|
|
In a similar fashion to the
|
|
.Xr vnode 9
|
|
interface, all operations that are done on a file system are conducted
|
|
through a single interface that allows the system to carry out
|
|
operations on a file system without knowing its construction or type.
|
|
.Pp
|
|
All supported file systems in the kernel have an entry in the
|
|
.Va vfs_list_initial
|
|
table.
|
|
This table is generated by
|
|
.Xr config 8
|
|
and is a
|
|
.Dv NULL Ns No -terminated
|
|
list of
|
|
.Em vfsops
|
|
structures.
|
|
The vfsops structure describes the operations that can be done to a
|
|
specific file system type.
|
|
The following table list the elements of the vfsops vector, the
|
|
corresponding invocation macro, and a description of the element.
|
|
.Pp
|
|
.nf
|
|
.ta \w'int (*vfs_mountroot)()'u+2n +\w'VFS_QUOTACTL'u+2n +\w'Get the file system root vnode'u
|
|
\fIVector element\fP \fIMacro\fP \fIDescription\fP
|
|
.ta \w'int (*vfs_mountroot)()'u+2n +\w'VFS_QUOTACTL'u+2n +\w'Get the file system root vnode'u+6nC
|
|
.sp 5p
|
|
int (*vfs_mount)() VFS_MOUNT Mount a file system
|
|
int (*vfs_start)() VFS_START Make operational
|
|
int (*vfs_unmount)() VFS_UMOUNT Unmount a file system
|
|
int (*vfs_root)() VFS_ROOT Get the file system root vnode
|
|
int (*vfs_quotactl)() VFS_QUOTACTL Query/modify space quotas
|
|
int (*vfs_statfs)() VFS_STATFS Get file system statistics
|
|
int (*vfs_sync)() VFS_SYNC Flush file system buffers
|
|
int (*vfs_vget)() VFS_VGET Get vnode from file id
|
|
int (*vfs_fhtovp)() VFS_FHTOVP NFS file handle to vnode lookup
|
|
int (*vfs_vptofh)() VFS_VPTOFH Vnode to NFS file handle lookup
|
|
void (*vfs_init)() - Initialise file system
|
|
void (*vfs_reinit)() - Reinitialise file system
|
|
void (*vfs_done)() - Cleanup unmounted file system
|
|
int (*vfs_mountroot)() - Mount the root file system
|
|
int (*vfs_checkexp)() VFS_CHECKEXP Check if file system is exported
|
|
int (*vfs_snapshot)() VFS_SNAPSHOT Take a snapshot
|
|
.fi
|
|
.Pp
|
|
Some additional non-function members of the vfsops structure are the
|
|
file system name
|
|
.Ns Em vfs_name
|
|
and a reference count
|
|
.Ns Em vfs_refcount .
|
|
It is not mandatory for a file system type to support a particular
|
|
operation, but it must assign each member function pointer to a
|
|
suitable function to do the minimum required of it.
|
|
In most cases, such functions either do nothing or return an error
|
|
value to the effect that it is not supported.
|
|
.Em vfs_reinit
|
|
and
|
|
.Em vfs_mountroot
|
|
may
|
|
be
|
|
.Dv NULL .
|
|
.Pp
|
|
At system boot, each file system with an entry in
|
|
.Va vfs_list_initial
|
|
is established and initialised.
|
|
Each initialised file system is recorded by the kernel in the list
|
|
.Va vfs_list
|
|
and the file system specific initialisation function
|
|
.Em vfs_init
|
|
in its vfsops vector is invoked.
|
|
When the file system is not longer needed
|
|
.Em vfs_done
|
|
is invoked to run file system specific cleanups and the file system is
|
|
removed from the kernel list.
|
|
.Pp
|
|
At system boot, the root filesystem is mounted by invoking the file
|
|
system type specific
|
|
.Em vfs_mountroot
|
|
function in the vfsops vector.
|
|
All filesystems that can be mounted as a root file system must define
|
|
this function.
|
|
It is responsible for initialising to list of mount structures for
|
|
all future mounted file systems.
|
|
.Pp
|
|
Kernel state which affects a specific file system type can be
|
|
queried and modified using the
|
|
.Xr sysctl 8
|
|
interface.
|
|
.Sh FUNCTIONS
|
|
.Bl -tag -width compact
|
|
.It Fn VFS_MOUNT "mp" "path" "data" "ndp" "p"
|
|
Mount a file system specified by the mount structure
|
|
.Fa mp
|
|
on the mount point described by
|
|
.Fa path .
|
|
The argument
|
|
.Fa data
|
|
contains file system type specific data and is read into the kernel
|
|
using
|
|
.Xr copyin 9 .
|
|
The argument
|
|
.Fa ndp
|
|
contains the result of a
|
|
.Xr namei 9
|
|
call on the pathname of the mount point and
|
|
.Fa p
|
|
is the calling process.
|
|
.Fn VFS_MOUNT
|
|
initialises the mount structure for the mounted file system.
|
|
This structure records mount-specific information for the file system and
|
|
records the list of vnodes associated with the file system.
|
|
This function is invoked both to mount new file systems and to change the
|
|
attributes of an existing file system.
|
|
If the MNT_UPDATE flag is set in
|
|
.Em mp-\*[Gt]mnt_flag
|
|
then the filesystem should update its internal state from the value of
|
|
.Em mp-\*[Gt]mnt_flag .
|
|
This can be used, for instance, to convert a read-only filesystem to
|
|
read-write.
|
|
If the MNT_UPDATE flag is not specified, then this is a newly
|
|
mounted filesystem.
|
|
.It Fn VFS_START "mp" "flags" "p"
|
|
Make the file system specified by the mount structure
|
|
.Fa mp
|
|
operational.
|
|
The argument
|
|
.Fa p
|
|
is the calling process.
|
|
The argument
|
|
.Fa flags
|
|
is a set of flags for controlling the operation of
|
|
.Fn VOP_START .
|
|
This function is invoked after
|
|
.Fn VFS_MOUNT
|
|
and before the first access to the file system.
|
|
.It Fn VFS_UNMOUNT "mp" "mntflags" "p"
|
|
Unmount a file system specified by the mount structure
|
|
.Fa mp .
|
|
The argument
|
|
.Fa p
|
|
is the calling process.
|
|
.Fn VFS_UNMOUNT
|
|
performs any file system type specific operations required before the
|
|
file system is unmounted, such are flushing buffers.
|
|
If MNT_FORCE is specified in the flags
|
|
.Fa mntflags
|
|
then open files are forcibly closed.
|
|
The function also deallocates space associated with data structure
|
|
that were allocated for the file system when it was mounted.
|
|
.It Fn VFS_ROOT "mp" "vpp"
|
|
Get the root vnode of the file system specified by the mount
|
|
structure
|
|
.Fa mp .
|
|
The vnode is returned in the address given by
|
|
.Fa vpp .
|
|
This function is used by the pathname translation algorithms when a
|
|
vnode that has been covered by a mounted file system is encountered.
|
|
While resolving the pathname, the pathname translation algorithm will
|
|
have to go through the directory tree in the file system associated
|
|
with that mount point and therefore requires the root vnode of the
|
|
file system.
|
|
.It Fn VFS_QUOTACTL "mp" "cmds" "uid" "arg" "p"
|
|
Query/modify user space quotas for the file system specified by the
|
|
mount structure
|
|
.Fa mp .
|
|
The argument specifies the control command to perform.
|
|
The userid is specified in
|
|
.Fa id ,
|
|
the calling process is
|
|
.Fa p
|
|
and
|
|
.Fa arg
|
|
allows command-specific data to be returned to the system call
|
|
interface.
|
|
.Fn VFS_QUOTACTL
|
|
is the file system type specific implementation of the
|
|
.Xr quotactl 2
|
|
system call.
|
|
.It Fn VFS_STATFS "mp" "sbp" "p"
|
|
Get file system statistics for the file system specified by the mount
|
|
structure
|
|
.Fa mp .
|
|
The argument
|
|
.Fa p
|
|
is the calling process.
|
|
A statfs structure filled with the statistics is returned in
|
|
.Fa sbp .
|
|
.Fn VFS_STATFS
|
|
is the file system type specific implementation of the
|
|
.Xr statfs 2
|
|
and
|
|
.Xr fstatfs 2
|
|
system calls.
|
|
.It Fn VFS_SYNC "mp" "waitfor" "cred" "p"
|
|
Flush file system I/O buffers for the file system specified by the mount
|
|
structure
|
|
.Fa mp .
|
|
The
|
|
.Fa waitfor
|
|
argument indicates whether a partial flush or complete flush should be
|
|
performed.
|
|
The arguments
|
|
.Fa p
|
|
and
|
|
.Fa cred
|
|
specific the calling process and its credentials respectively.
|
|
.Fn VFS_SYNC
|
|
does not provide any return value since the operation can never fail.
|
|
.It Fn VFS_VGET "mp" "ino" "vpp"
|
|
Get vnode for a file system type specific file id
|
|
.Fa ino
|
|
for the file system specified by the mount structure
|
|
.Fa mp .
|
|
The vnode is returned in the address specified
|
|
.Fa vpp .
|
|
The function is optional for file systems which have a unique id
|
|
number for every file in the file system.
|
|
It is used internally by the UFS file system and also by the NFSv3
|
|
server to implement the READDIRPLUS nfs call.
|
|
If the file system does not support this function, it should return
|
|
.Er EOPNOTSUPP .
|
|
.It Fn VFS_FHTOVP "mp" "fhp" "vpp"
|
|
Get the vnode for the NFS file specified by the file handle
|
|
.Fa fhp
|
|
in the file system specified by the mount structure
|
|
.Fa mp .
|
|
The locked vnode is returned in
|
|
.Fa vpp .
|
|
.Pp
|
|
A call to
|
|
.Fn VFS_FHTOVP
|
|
should generally be followed by a call to
|
|
.Fn VFS_CHECKEXP
|
|
to check if the file is accessable to the client.
|
|
.It Fn VFS_VPTOFH "vp" "fhp"
|
|
Get a unique NFS file handle for the file specified by the vnode
|
|
.Fa vp .
|
|
The file handle is returned in
|
|
.Fa fhp .
|
|
The contents of the file handle are defined by the file system and are
|
|
not examined by any other subsystems.
|
|
It should contain enough information to uniquely identify a file within
|
|
the file system as well as noticing when a file has been removed and
|
|
the file system resources have been recycled for a new file.
|
|
.It Fn VFS_CHECKEXP "mp" "nam" "extflagsp" "credanonp"
|
|
Check if the file system specified by the mount structure
|
|
.Fa mp
|
|
is exported to a client with anonymous credentials
|
|
.Fa credanonp .
|
|
The argument
|
|
.Fa nam
|
|
is an mbuf containing the network address of the client.
|
|
The return parameters for the export flags for the client are returned
|
|
in the address specified by
|
|
.Fa exflagsp .
|
|
This function is used by the NFS server.
|
|
It is generally invoked before
|
|
.Fn VFS_FHTOVP
|
|
to validate that client has access to the file system.
|
|
The file system should call
|
|
.Fn vfs_export_lookup
|
|
with the address of an appropriate netexport structure and the address
|
|
of the client to verify that the client can access this file system.
|
|
.It Fn VFS_SNAPSHOT "mp" "vp" "ts"
|
|
Take a snapshot of the file system specified by the mount structure
|
|
.Fa mp
|
|
and make it accessible through the locked vnode
|
|
.Fa vp .
|
|
If
|
|
.Fa ts
|
|
is not
|
|
.Dv NULL
|
|
it will receive the time this snapshot was taken.
|
|
If the file system does not support this function, it should return
|
|
.Er EOPNOTSUPP .
|
|
.El
|
|
.Sh CODE REFERENCES
|
|
This section describes places within the
|
|
.Nx
|
|
source tree where actual code implementing or using the vfs
|
|
operations can be found.
|
|
All pathnames are relative to
|
|
.Pa /usr/src .
|
|
.Pp
|
|
The vfs operations are implemented within the files
|
|
.Pa sys/kern/vfs_subr.c
|
|
and
|
|
.Pa sys/kern/vfs_init.c .
|
|
.Sh SEE ALSO
|
|
.Xr intro 9 ,
|
|
.Xr namei 9 ,
|
|
.Xr vfs 9 ,
|
|
.Xr vfssubr 9 ,
|
|
.Xr vnode 9 ,
|
|
.Xr vnodeops 9
|
|
.Sh HISTORY
|
|
The vfs operations vector, its functions and the corresponding macros
|
|
appeared in
|
|
.Bx 4.3 .
|