diff --git a/share/man/man9/genfs_rename.9 b/share/man/man9/genfs_rename.9 new file mode 100644 index 000000000000..e289b7330b7e --- /dev/null +++ b/share/man/man9/genfs_rename.9 @@ -0,0 +1,621 @@ +.\" $NetBSD: genfs_rename.9,v 1.1 2013/05/01 18:56:48 riastradh Exp $ +.\" +.\" Copyright (c) 2013 The NetBSD Foundation, Inc. +.\" All rights reserved. +.\" +.\" This documentation is derived from text contributed to The NetBSD +.\" Foundation by Taylor R. Campbell. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS +.\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +.\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +.\" PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS +.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +.\" POSSIBILITY OF SUCH DAMAGE. +.\" +.Dd May 1, 2013 +.Dt GENFS_RENAME 9 +.Os +.Sh NAME +.Nm genfs_rename , +.Nm genfs_insane_rename , +.Nm genfs_sane_rename +.Nd generic framework for implementing +.Xr VOP_RENAME 9 +.Sh SYNOPSIS +.Ft int +.Fo genfs_insane_rename +.Fa "struct vop_rename_args *v" +.Fa "int (*sane_rename)(struct vnode *fdvp, struct componentname *fcnp, struct vnode *tdvp, struct componentname *tcnp, kauth_cred_t, bool)" +.Fc +.Ft int +.Fo genfs_sane_rename +.Fa "const struct genfs_rename_ops *gro" +.Fa "struct vnode *fdvp" +.Fa "struct componentname *fcnp" +.Fa "void *fde" +.Fa "struct vnode *tdvp" +.Fa "struct componentname *tcnp" +.Fa "void *tde" +.Fa "kauth_cred_t cred" +.Fa "bool posixly_correct" +.Fc +.Ft int +.Fo genfs_rename_knote +.Fa "struct vnode *fdvp" +.Fa "struct vnode *fvp" +.Fa "struct vnode *tdvp" +.Fa "struct vnode *tvp" +.Fc +.Ft void +.Fo genfs_rename_cache_purge +.Fa "struct vnode *fdvp" +.Fa "struct vnode *fvp" +.Fa "struct vnode *tdvp" +.Fa "struct vnode *tvp" +.Fc +.Ft int +.Fo genfs_ufslike_rename_check_possible +.Fa "unsigned long fdflags" +.Fa "unsigned long fflags" +.Fa "unsigned long tdflags" +.Fa "unsigned long tflags" +.Fa "bool clobber" +.Fa "unsigned long immutable" +.Fa "unsigned long append" +.Fc +.Ft int +.Fo genfs_ufslike_rename_check_permitted +.Fa "kauth_cred_t cred" +.Fa "struct vnode *fdvp" +.Fa "mode_t fdmode" +.Fa "uid_t fduid" +.Fa "struct vnode *fvp" +.Fa "uid_t fuid" +.Fa "struct vnode *tdvp" +.Fa "mode_t tdmode" +.Fa "uid_t tduid" +.Fa "struct vnode *tvp" +.Fa "uid_t tuid" +.Fc +.Ft int +.Fo genfs_ufslike_remove_check_possible +.Fa "unsigned long dflags" +.Fa "unsigned long flags" +.Fa "unsigned long immutable" +.Fa "unsigned long append" +.Fc +.Ft int +.Fo genfs_ufslike_remove_check_permitted +.Fa "kauth_cred_t cred" +.Fa "struct vnode *dvp" +.Fa "mode_t dmode" +.Fa "uid_t duid" +.Fa "struct vnode *vp" +.Fa "uid_t uid" +.Fc +.Sh DESCRIPTION +The +.Nm +functions provide a file-system-independent framework for implementing +.Xr VOP_RENAME 9 +with correct locking and error-checking. +.Pp +Implementing rename is nontrivial. +If you are doing it for a new file system, you should consider starting +from +.Fn tmpfs_rename +as implemented in +.Pa sys/fs/tmpfs/tmpfs_rename.c +and adapting it to your file system's physical operations. +.Pp +Because there are so many moving parts to a rename operation, +.Nm +uses the following naming conventions: +.Bl -tag -width indent +.It Fa mp Pq "mount point" +mount point of the file system in question +.It Fa fdvp Pq "from directory vnode pointer" +directory from which we are removing an entry +.It Fa fcnp Pq "from componentname pointer" +name of entry to remove from +.Fa fdvp +.It Fa fde Pq "from directory entry" +fs-specific data about the entry in +.Fa fdvp +.It Fa fvp Pq "from vnode pointer" +file at the entry named +.Fa fcnp +in +.Fa fdvp +.It Fa tdvp Pq "to directory vnode pointer" +directory to which we are adding an entry +.It Fa tcnp Pq "to componentname pointer" +name of entry to add to +.Fa tdvp +.It Fa tde Pq "to directory entry" +fs-specific data about the entry in +.Fa tdvp +.It Fa tvp Pq "to vnode pointer" +file previously at the entry named +.Fa tcnp +in +.Fa tdvp , +to be replaced, if any, or +.Dv NULL +if there was no entry before +.It Fa vp Pq "vnode pointer" +any file +.It Fa dvp Pq "directory vnode pointer" +any directory with an entry for +.Fa vp +.El +.Pp +A file system mumblefs should implement various file-system-dependent +parts of the rename operation in a +.Vt struct genfs_rename_ops , +and use +.Nm +to implement +.Fn mumblefs_rename +for +.Xr VOP_RENAME 9 +as follows: +.Bd -literal -offset indent +static const struct genfs_rename_ops mumblefs_genfs_rename_ops; + +static int +mumblefs_sane_rename( + struct vnode *fdvp, struct componentname *fcnp, + struct vnode *tdvp, struct componentname *tcnp, + kauth_cred_t cred, bool posixly_correct) +{ + struct mumblefs_lookup_results fulr, tulr; + + return genfs_sane_rename(&mumblefs_genfs_rename_ops, + fdvp, fcnp, &fulr, tdvp, tcnp, &tulr, + cred, posixly_correct); +} + +int +mumblefs_rename(void *v) +{ + + return genfs_insane_rename(v, &mumblefs_sane_rename); +} +.Ed +.Pp +The split between +.Fn mumblefs_rename +and +.Fn mumblefs_sane_rename +is designed to enable us to easily change the +.Xr VOP_RENAME 9 +interface, which is currently designed for a broken (hence +.Sq insane ) +locking scheme, to a more sensible locking scheme once all the file +systems have their rename operations split up thus. +.Pp +The +.Vt struct mumblefs_lookup_results +structure is storage for information about directory entries which +needs to pass from the lookups of the children (see the +.Fa gro_lookup +member of +.Vt "struct genfs_rename_ops" ) +to the physical on-disk rename or remove operations (see the +.Fa gro_rename +and +.Fa gro_remove +members of +.Vt "struct genfs_rename_ops" ) . +.Pp +Callers must implement the following operations as members in a +.Vt struct genfs_rename_ops +structure passed to +.Nm : +.Bl -tag -width indent +.It Ft int Fn "(*gro_genealogy)" "struct mount *mp" "kauth_cred_t cred" "struct vnode *fdvp" "struct vnode *tdvp" "struct vnode **intermediate_node_ret" +Walk up the directory tree from the directory vnode +.Fa tdvp +until hitting either +.Fa fdvp +or the root. +If +.Fa fdvp +is hit, store the child of +.Fa fdvp +through which the path from +.Fa tdvp +passed in +.Fa *intermediate_node_ret , +referenced but unlocked. +If +.Fa fdvp +is not hit, store +.Dv NULL +in +.Fa *intermediate_node_ret . +Return zero on success or error on failure. +(Failure means file-system-specific failures, not hitting or missing +.Fa fdvp . ) +.Pp +.Fa fdvp +and +.Fa tdvp +are guaranteed to be distinct, nonnull, referenced, and unlocked. +Since no locks are held on entry except for the file-system-wide rename +lock, +.Fa gro_genealogy +may take any locks it pleases. +.It Ft int Fn "(*gro_lock_directory)" "struct mount *mp" "struct vnode *vp" +Lock the directory vnode +.Fa vp , +but fail if it has been rmdired already. +Return zero on success or error on failure. +.It Ft int Fn "(*gro_lookup)" "struct mount *mp" "struct vnode *dvp" "struct componentname *cnp" "void *de" "struct vnode **vpp" +Look up the entry in +.Fa dvp +for +.Fa cnp , +storing the vnode in +.Fa "*vpp" +and using +.Fa de , +one of the pointers passed to +.Nm genfs_sane_rename , +to store information about the directory entry as needed by the file +system's +.Fa gro_rename +operation, and return zero. +If there is no such entry, return error. +.Pp +.Fa dvp +is guaranteed to be locked, and the vnode returned in +.Fa "*vpp" +must be unlocked. +However, +.Fa gro_lookup +may temporarily lock the vnode without causing deadlock. +.It Ft bool Fn "(*gro_directory_empty_p)" "struct mount *mp" "kauth_cred_t cred" "struct vnode *vp" "struct vnode *dvp" +Return true if the directory vnode +.Fa vp +is empty. +The argument +.Fa dvp +is the parent of +.Fa vp , +as required for this check by some file systems. +.Pp +.Fa dvp +and +.Fa vp +are guaranteed to be distinct, nonnull, referenced, and locked. +.It Ft int Fn "(*gro_rename_check_possible)" "struct mount *mp" "struct vnode *fdvp" "struct vnode *fvp" "struct vnode *tdvp" "struct vnode *tvp" +Return zero if the file system might allow the rename independent of +credentials, or error if not. +This should check, for example, any immutability flags in the vnodes in +question, and should use +.Fn genfs_ufslike_rename_check_possible +for file systems similar to UFS/FFS. +.Pp +.Fa fdvp +and +.Fa tdvp +may be the same; every other pair of vnodes is guaranteed to be +distinct. +.Fa tvp +may be +.Dv NULL ; +every other vnode is guaranteed to be nonnull. +All three or four vnodes are guaranteed to be referenced and locked. +.It Ft int Fn "(*gro_rename_check_permitted)" "struct mount *mp" "kauth_cred_t cred" "struct vnode *fdvp" "struct vnode *fvp" "struct vnode *tdvp" "struct vnode *tvp" +Return zero if the file system allows the rename given the credentials +.Fa cred , +or error if not. +This should check, for example, the ownership and permissions bits of +the vnodes in question, and should use +.Fn genfs_ufslike_rename_check_permitted +for file systems similar to UFS/FFS. +.Pp +.Fa fdvp +and +.Fa tdvp +may be the same; every other pair of vnodes is guaranteed to be +distinct. +.Fa tvp +may be +.Dv NULL ; +every other vnode is guaranteed to be nonnull. +All three or four vnodes are guaranteed to be referenced and locked. +.It Ft int Fn "(*gro_rename)" "struct mount *mp" "kauth_cred_t cred" "struct vnode *fdvp" "struct componentname *fcnp" "void *fde" "struct vnode *fvp" "struct vnode *tdvp" "struct componentname *tcnp" "void *tde" "struct vnode *tvp" +Perform the physical file system rename operation, report any knotes, +and purge the namecache entries. +Return zero on success or error on failure. +All file-system-independent error cases have been handled already. +.Pp +File systems using +.Xr fstrans 9 +should use +.Xr fstrans_start 9 +and +.Xr fstrans_done 9 +here. +.Fa fde +and +.Fa tde +are the pointers that were supplied to +.Fn genfs_sane_rename +and got passed to the +.Fa gro_lookup +operation to find information about directory entries. +.Pp +This may use +.Fn genfs_rename_knote +to report any knotes, if the various file-system-dependent routines it +uses to edit links don't do that already. +This should use +.Fn genfs_rename_cache_purge +to purge the namecache. +.Pp +.Fa fdvp +and +.Fa tdvp +may be the same; every other pair of vnodes is guaranteed to be +distinct. +.Fa tvp +may be null; every other vnode is guaranteed to be nonnull. +All three or four vnodes are guaranteed to be referenced and locked. +.It Ft int Fn "(*gro_remove_check_possible)" "struct mount *mp" "struct vnode *dvp" "struct vnode *vp" +Return zero if the file system might allow removing an entry in +.Fa dvp +for +.Fa vp +independent of credentials, or error if not. +This should use +.Fn genfs_ufslike_remove_check_possible +for file systems similar to UFS/FFS. +.Pp +.Fa dvp +and +.Fa vp +are guaranteed to be distinct, nonnull, referenced, and locked. +.Pp +This, and +.Fa gro_remove_check_permitted +below, are for renames that reduce to a remove; that is, renaming one +entry to another when both entries refer to the same file. +For reasons of locking insanity, +.Nm +cannot simply call +.Xr VOP_REMOVE 9 +instead. +.It Ft int Fn "(*gro_remove_check_permitted)" "struct mount *mp" "kauth_cred_t cred" "struct vnode *dvp" "struct vnode *vp" +Return zero if the file system allows removing an entry in +.Fa dvp +for +.Fa vp +given the credentials +.Fa cred , +or error if not. +This should use +.Fn genfs_ufslike_remove_check_permitted +for file systems similar to UFS/FFS. +.Pp +.Fa dvp +and +.Fa vp +are guaranteed to be distinct, nonnull, referenced, and locked. +.It Ft int Fn "(*gro_remove)" "struct mount *mp" "kauth_cred_t cred" "struct vnode *dvp" "struct componentname *cnp" "void *de" "struct vnode *vp" +For a rename that is effectively a remove, perform the physical file +system remove operation, report any knotes, and purge the namecache +entries. +Return zero on success or error on failure. +All file-system-independent error cases have been handled already. +.Pp +File systems using +.Xr fstrans 9 +should use +.Xr fstrans_start 9 +and +.Xr fstrans_done 9 +here. +.Fa de +is one of the pointers that were supplied to +.Fn genfs_sane_rename +and got passed to the +.Fa gro_lookup +operation to find information about directory entries. +.Pp +This should signal a +.Dv NOTE_WRITE +knote for +.Fa dvp , +and either a +.Dv NOTE_DELETE +or a +.Dv NOTE_LINK +knote for +.Fa vp , +depending on whether this removed the last link to it or not. +.Pp +.Fa dvp +and +.Fa vp +are guaranteed to be distinct, nonnull, referenced, and locked. +.El +.Pp +The following utilities are provided for implementing the +.Vt struct genfs_rename_ops +operations: +.Bl -tag -width indent +.It Fn genfs_rename_knote fdvp fvp tdvp tvp +Signal all the knotes relevant for the rename operation. +.It Fn genfs_rename_cache_purge fdvp fvp tdvp tvp +Purge any namecache entries that the rename operation invalidates. +.It Fn genfs_ufslike_rename_check_possible fdflags fflags tdflags tflags clobber immutable append +Check whether the UFS/FFS-like flags of the files involved a rename +allow it. +Return zero if allowed or error if not. +.Pp +.Bl -tag -width immutable -compact +.It Fa fdflags +flags of source directory +.It Fa fflags +flags of source file +.It Fa tdflags +flags of target directory +.It Fa tflags +flags of target file, if there is one and +.Fa clobber +is true, or ignored otherwise +.It Fa clobber +true if there is a target file whose entry will be clobbered or false +if not +.It Fa immutable +bit mask for the file system's immutable bit, like the UFS/FFS +.Dv IMMUTABLE +.It Fa append +bit mask for the file system's append-only bit, like the UFS/FFS +.Dv APPEND +.El +.It Fn genfs_ufslike_rename_check_permitted cred fdvp fdmode fduid fvp fuid tdvp tdmode tduid tvp tuid +Check whether the credentials +.Fa cred +are permitted by the file ownership and permissions bits to perform a +rename. +Return zero if permitted or error if not. +.Pp +.Bl -tag -width fdmode -compact +.It Fa cred +caller's credentials +.It Fa fdvp +source directory +.It Fa fdmode +file permissions bits of +.Fa fdvp +.It Fa fduid +uid of the owner of +.Fa fdvp +.It Fa fvp +source file +.It Fa fuid +uid of owner of +.Fa fvp +.It Fa tdvp +target directory +.It Fa tdmode +file permissions bits of +.Fa tdvp +.It Fa tduid +uid of owner of +.Fa tdvp +.It Fa tvp +target file, if there is one, or +.Dv NULL +if not +.It Fa tuid +uid of owner of +.Fa tvp , +if there is a target file, or ignored otherwise +.El +.It Fn genfs_ufslike_remove_check_possible dflags flags immutable append +Check whether the UFS/FFS-like flags of the files involved a remove +allow it. +Return zero if allowed or error if not. +.Pp +.Bl -tag -width immutable -compact +.It Fa dflags +flags of the directory +.It Fa flags +flags of the file in the directory +.It Fa immutable +bit mask for the file system's immutable bit, like the UFS/FFS +.Dv IMMUTABLE +.It Fa append +bit mask for the file system's append-only bit, like the UFS/FFS +.Dv APPEND +.El +.It Fn genfs_ufslike_remove_check_permitted cred dvp dmode duid vp uid +Check whether the credentials +.Fa cred +are permitted by the file ownership and permissions bits to perform a +remove. +Return zero if permitted or error if not. +.Pp +.Bl -tag -width fdmode -compact +.It Fa cred +caller's credentials +.It Fa dvp +directory +.It Fa dmode +file permissions bits of +.Fa dvp +.It Fa duid +uid of owner of +.Fa dvp +.It Fa vp +file in +.Fa dvp +.It Fa uid +uid of owner of +.Fa vp +.El +.El +.Sh NOTES +Because there are so many cases of rename, it cannot be assumed a +priori that any pairs of +.Fa fdvp , +.Fa fvp , +.Fa tdvp , +or +.Fa fvp +are distinct: +.Bl -column -offset indent \ +"fdvp = tdvp" "rename(\*qa/b\*q, \*qa/c\*q)" +.It Li "fdvp = fvp" Ta Li "rename(\*qa/.\*q, \*qb\*q)" +.It Li "fdvp = tdvp" Ta Li "rename(\*qa/b\*q, \*qa/c\*q)" +.It Li "fdvp = tvp" Ta Li "rename(\*qa/b\*q, \*qa\*q)" +.It Li "fvp = tdvp" Ta Li "rename(\*qa\*q, \*qa/b\*q)" +.It Li "fvp = tvp" Ta Li "rename(\*qa\*q, \*qa\*q)" +.It Li "tdvp = tvp" Ta Li "rename(\*qa\*q, \*qb/.\*q)" +.El +.Pp +Handling all these cases correctly, and getting the locking correct and +deadlock-free, is very tricky, which is why +.Nm +exists. +The interface to +.Nm +is very complicated because it must fit the insane +.Xr VOP_RENAME 9 +and +.Xr VOP_LOOKUP 9 +protocols until we can fix them, and because it must accomodate a +variety of crufty file systems. +.Sh SEE ALSO +.Xr genfs 9 , +.Xr vfs 9 , +.Xr vnodeops 9 +.Sh HISTORY +.Nm +was designed and implemented by +.An Taylor R. Campbell Aq riastradh@NetBSD.org +after many discussions with +.An David Holland Aq dholland@NetBSD.org , +and first appeared in +.Nx 6.0 .