- Rework and document inode reference counting. Also document inode life

cycle (destruction part).  Perform link counting in tmpfs_dir_attach()
  and tmpfs_dir_detach(), instead of alloc/free and arbitrary places.
  Fixes PR/44285, PR/44288, PR/44657 and likely PR/42484.

- Fix the race between the lookup and inode destruction.  Fixes PR/43167
  and its duplicates PR/40088, PR/40757.

- Improve tmpfs_rename() locking a little, fix kqueue event notifications
  and also fix PR/43617.  Add simplistic tmpfs_parentcheck_p(); to be
  expanded and used for further rename() locking fixes.

- Cache directory entry "hint" in the tmpfs node, add tmpfs_dir_cached(),
  and thus avoid unnecessary lookup in tmpfs_remove() and tmpfs_rmdir().

- Set correct _PC_FILESIZEBITS value in tmpfs_pathconf().  Fixes PR/43576.

- Few minor fixes.
This commit is contained in:
rmind 2011-05-29 22:29:06 +00:00
parent 41ceba0065
commit 4781942ccb
4 changed files with 405 additions and 255 deletions

View File

@ -1,4 +1,4 @@
/* $NetBSD: tmpfs.h,v 1.43 2011/05/29 01:14:31 christos Exp $ */
/* $NetBSD: tmpfs.h,v 1.44 2011/05/29 22:29:06 rmind Exp $ */
/*
* Copyright (c) 2005, 2006, 2007 The NetBSD Foundation, Inc.
@ -110,11 +110,29 @@ tmpfs_dircookie(tmpfs_dirent_t *de)
typedef struct tmpfs_node {
LIST_ENTRY(tmpfs_node) tn_entries;
/*
* Each inode has a corresponding vnode. It is a bi-directional
* association. Whenever vnode is allocated, its v_data field is
* set to the inode it reference, and tmpfs_node_t::tn_vnode is
* set to point to the said vnode.
*
* Further attempts to allocate a vnode for this same node will
* result in returning a new reference to the value stored in
* tn_vnode. It may be NULL when the node is unused (that is,
* no vnode has been allocated or it has been reclaimed).
*/
kmutex_t tn_vlock;
vnode_t * tn_vnode;
/* Directory entry. Only a hint, since hard link can have multiple. */
tmpfs_dirent_t * tn_dirent_hint;
/* The inode type: VBLK, VCHR, VDIR, VFIFO, VLNK, VREG or VSOCK. */
enum vtype tn_type;
/* Inode identifier. */
/* Inode identifier and generation number. */
ino_t tn_id;
unsigned long tn_gen;
/* Inode status flags (for operations in delayed manner). */
int tn_status;
@ -132,25 +150,10 @@ typedef struct tmpfs_node {
struct timespec tn_mtime;
struct timespec tn_ctime;
struct timespec tn_birthtime;
unsigned long tn_gen;
/* Head of byte-level lock list (used by tmpfs_advlock). */
struct lockf * tn_lockf;
/*
* Each inode has a corresponding vnode. It is a bi-directional
* association. Whenever vnode is allocated, its v_data field is
* set to the inode it reference, and tmpfs_node_t::tn_vnode is
* set to point to the said vnode.
*
* Further attempts to allocate a vnode for this same node will
* result in returning a new reference to the value stored in
* tn_vnode. It may be NULL when the node is unused (that is,
* no vnode has been allocated or it has been reclaimed).
*/
kmutex_t tn_vlock;
vnode_t * tn_vnode;
union {
/* Type case: VBLK or VCHR. */
struct {
@ -200,6 +203,19 @@ LIST_HEAD(tmpfs_node_list, tmpfs_node);
#define TMPFS_NODE_STATUSALL \
(TMPFS_NODE_ACCESSED | TMPFS_NODE_MODIFIED | TMPFS_NODE_CHANGED)
/*
* Bit indicating vnode reclamation.
* We abuse tmpfs_node_t::tn_gen for that.
*/
#define TMPFS_NODE_GEN_MASK (~0UL >> 1)
#define TMPFS_RECLAIMING_BIT (~TMPFS_NODE_GEN_MASK)
#define TMPFS_NODE_RECLAIMING(node) \
(((node)->tn_gen & TMPFS_RECLAIMING_BIT) != 0)
#define TMPFS_NODE_GEN(node) \
((node)->tn_gen & TMPFS_NODE_GEN_MASK)
/* White-out inode indicator. */
#define TMPFS_NODE_WHITEOUT ((tmpfs_node_t *)-1)
@ -242,22 +258,23 @@ typedef struct tmpfs_fid {
*/
int tmpfs_alloc_node(tmpfs_mount_t *, enum vtype, uid_t, gid_t,
mode_t, tmpfs_node_t *, char *, dev_t, tmpfs_node_t **);
mode_t, char *, dev_t, tmpfs_node_t **);
void tmpfs_free_node(tmpfs_mount_t *, tmpfs_node_t *);
int tmpfs_alloc_file(vnode_t *, vnode_t **, struct vattr *,
struct componentname *, char *);
int tmpfs_alloc_vp(struct mount *, tmpfs_node_t *, vnode_t **);
void tmpfs_free_vp(vnode_t *);
int tmpfs_vnode_get(struct mount *, tmpfs_node_t *, vnode_t **);
int tmpfs_alloc_dirent(tmpfs_mount_t *, tmpfs_node_t *,
const char *, uint16_t, tmpfs_dirent_t **);
void tmpfs_free_dirent(tmpfs_mount_t *, tmpfs_dirent_t *, bool);
void tmpfs_dir_attach(vnode_t *, tmpfs_dirent_t *);
int tmpfs_alloc_dirent(tmpfs_mount_t *, const char *, uint16_t,
tmpfs_dirent_t **);
void tmpfs_free_dirent(tmpfs_mount_t *, tmpfs_dirent_t *);
void tmpfs_dir_attach(vnode_t *, tmpfs_dirent_t *, tmpfs_node_t *);
void tmpfs_dir_detach(vnode_t *, tmpfs_dirent_t *);
tmpfs_dirent_t *tmpfs_dir_lookup(tmpfs_node_t *, struct componentname *);
tmpfs_dirent_t *tmpfs_dir_cached(tmpfs_node_t *);
int tmpfs_dir_getdotdent(tmpfs_node_t *, struct uio *);
int tmpfs_dir_getdotdotdent(tmpfs_node_t *, struct uio *);
tmpfs_dirent_t *tmpfs_dir_lookupbycookie(tmpfs_node_t *, off_t);

View File

@ -1,12 +1,12 @@
/* $NetBSD: tmpfs_subr.c,v 1.70 2011/05/25 02:03:22 rmind Exp $ */
/* $NetBSD: tmpfs_subr.c,v 1.71 2011/05/29 22:29:06 rmind Exp $ */
/*
* Copyright (c) 2005, 2006, 2007 The NetBSD Foundation, Inc.
* Copyright (c) 2005-2011 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by Julio M. Merino Vidal, developed as part of Google's Summer of Code
* 2005 program.
* 2005 program, and by Mindaugas Rasiukevicius.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@ -31,12 +31,50 @@
*/
/*
* Efficient memory file system: functions for inode and directory entry
* construction and destruction.
* Efficient memory file system: interfaces for inode and directory entry
* construction, destruction and manipulation.
*
* Reference counting
*
* The link count of inode (tmpfs_node_t::tn_links) is used as a
* reference counter. However, it has slightly different semantics.
*
* For directories - link count represents directory entries, which
* refer to the directories. In other words, it represents the count
* of sub-directories. It also takes into account the virtual '.'
* entry (which has no real entry in the list). For files - link count
* represents the hard links. Since only empty directories can be
* removed - link count aligns the reference counting requirements
* enough. Note: to check whether directory is not empty, the inode
* size (tmpfs_node_t::tn_size) can be used.
*
* The inode itself, as an object, gathers its first reference when
* directory entry is attached via tmpfs_dir_attach(9). For instance,
* after regular tmpfs_create(), a file would have a link count of 1,
* while directory after tmpfs_mkdir() would have 2 (due to '.').
*
* Reclamation
*
* It should be noted that tmpfs inodes rely on a combination of vnode
* reference counting and link counting. That is, an inode can only be
* destroyed if its associated vnode is inactive. The destruction is
* done on vnode reclamation i.e. tmpfs_reclaim(). It should be noted
* that tmpfs_node_t::tn_links being 0 is a destruction criterion.
*
* If an inode has references within the file system (tn_links > 0) and
* its inactive vnode gets reclaimed/recycled - then the association is
* broken in tmpfs_reclaim(). In such case, an inode will always pass
* tmpfs_lookup() and thus tmpfs_vnode_get() to associate a new vnode.
*
* Lock order
*
* tmpfs_node_t::tn_vlock ->
* vnode_t::v_vlock ->
* vnode_t::v_interlock
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: tmpfs_subr.c,v 1.70 2011/05/25 02:03:22 rmind Exp $");
__KERNEL_RCSID(0, "$NetBSD: tmpfs_subr.c,v 1.71 2011/05/29 22:29:06 rmind Exp $");
#include <sys/param.h>
#include <sys/dirent.h>
@ -65,9 +103,8 @@ __KERNEL_RCSID(0, "$NetBSD: tmpfs_subr.c,v 1.70 2011/05/25 02:03:22 rmind Exp $"
* insert it into the list of specified mount point.
*/
int
tmpfs_alloc_node(tmpfs_mount_t *tmp, enum vtype type, uid_t uid,
gid_t gid, mode_t mode, tmpfs_node_t *parent, char *target, dev_t rdev,
tmpfs_node_t **node)
tmpfs_alloc_node(tmpfs_mount_t *tmp, enum vtype type, uid_t uid, gid_t gid,
mode_t mode, char *target, dev_t rdev, tmpfs_node_t **node)
{
tmpfs_node_t *nnode;
@ -76,22 +113,25 @@ tmpfs_alloc_node(tmpfs_mount_t *tmp, enum vtype type, uid_t uid,
return ENOSPC;
}
/* Initially, no references and no associations. */
nnode->tn_links = 0;
nnode->tn_vnode = NULL;
nnode->tn_dirent_hint = NULL;
/*
* XXX Where the pool is backed by a map larger than (4GB *
* sizeof(*nnode)), this may produce duplicate inode numbers
* for applications that do not understand 64-bit ino_t.
*/
nnode->tn_id = (ino_t)((uintptr_t)nnode / sizeof(*nnode));
nnode->tn_gen = arc4random();
nnode->tn_gen = TMPFS_NODE_GEN_MASK & arc4random();
/* Generic initialization. */
nnode->tn_type = type;
nnode->tn_size = 0;
nnode->tn_status = 0;
nnode->tn_flags = 0;
nnode->tn_links = 0;
nnode->tn_lockf = NULL;
nnode->tn_vnode = NULL;
vfs_timestamp(&nnode->tn_atime);
nnode->tn_birthtime = nnode->tn_atime;
@ -112,18 +152,13 @@ tmpfs_alloc_node(tmpfs_mount_t *tmp, enum vtype type, uid_t uid,
nnode->tn_spec.tn_dev.tn_rdev = rdev;
break;
case VDIR:
/*
* Directory. Parent must be specified, unless allocating
* the root inode.
*/
KASSERT(parent || tmp->tm_root == NULL);
KASSERT(parent != nnode);
/* Directory. */
TAILQ_INIT(&nnode->tn_spec.tn_dir.tn_dir);
nnode->tn_spec.tn_dir.tn_parent =
(parent == NULL) ? nnode : parent;
nnode->tn_spec.tn_dir.tn_parent = NULL;
nnode->tn_spec.tn_dir.tn_readdir_lastn = 0;
nnode->tn_spec.tn_dir.tn_readdir_lastp = NULL;
/* Extra link count for the virtual '.' entry. */
nnode->tn_links++;
break;
case VFIFO:
@ -200,8 +235,11 @@ tmpfs_free_node(tmpfs_mount_t *tmp, tmpfs_node_t *node)
}
break;
case VDIR:
/* KASSERT(TAILQ_EMPTY(&node->tn_spec.tn_dir.tn_dir)); */
KASSERT(node->tn_spec.tn_dir.tn_parent || node == tmp->tm_root);
/*
* KASSERT(TAILQ_EMPTY(&node->tn_spec.tn_dir.tn_dir));
* KASSERT(node->tn_spec.tn_dir.tn_parent == NULL ||
* node == tmp->tm_root);
*/
break;
default:
break;
@ -212,28 +250,34 @@ tmpfs_free_node(tmpfs_mount_t *tmp, tmpfs_node_t *node)
}
/*
* tmpfs_alloc_vp: allocate or reclaim a vnode for a specified inode.
* tmpfs_vnode_get: allocate or reclaim a vnode for a specified inode.
*
* => Must be called with tmpfs_node_t::tn_vlock held.
* => Returns vnode (*vpp) locked.
*/
int
tmpfs_alloc_vp(struct mount *mp, tmpfs_node_t *node, vnode_t **vpp)
tmpfs_vnode_get(struct mount *mp, tmpfs_node_t *node, vnode_t **vpp)
{
vnode_t *vp;
int error;
again:
/* If there is already a vnode, try to reclaim it. */
mutex_enter(&node->tn_vlock);
if ((vp = node->tn_vnode) != NULL) {
atomic_or_ulong(&node->tn_gen, TMPFS_RECLAIMING_BIT);
mutex_enter(&vp->v_interlock);
mutex_exit(&node->tn_vlock);
error = vget(vp, LK_EXCLUSIVE);
if (error == ENOENT) {
mutex_enter(&node->tn_vlock);
goto again;
}
atomic_and_ulong(&node->tn_gen, ~TMPFS_RECLAIMING_BIT);
*vpp = vp;
return error;
}
if (TMPFS_NODE_RECLAIMING(node)) {
atomic_and_ulong(&node->tn_gen, ~TMPFS_RECLAIMING_BIT);
}
/* Get a new vnode and associate it with our node. */
error = getnewvnode(VT_TMPFS, mp, tmpfs_vnodeop_p, &vp);
@ -277,21 +321,6 @@ again:
return 0;
}
/*
* tmpfs_free_vp: destroys the association between the vnode and the
* inode it references.
*/
void
tmpfs_free_vp(vnode_t *vp)
{
tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
mutex_enter(&node->tn_vlock);
node->tn_vnode = NULL;
mutex_exit(&node->tn_vlock);
vp->v_data = NULL;
}
/*
* tmpfs_alloc_file: allocate a new file of specified type and adds it
* into the parent directory.
@ -303,7 +332,7 @@ tmpfs_alloc_file(vnode_t *dvp, vnode_t **vpp, struct vattr *vap,
struct componentname *cnp, char *target)
{
tmpfs_mount_t *tmp = VFS_TO_TMPFS(dvp->v_mount);
tmpfs_node_t *dnode = VP_TO_TMPFS_DIR(dvp), *node, *parent;
tmpfs_node_t *dnode = VP_TO_TMPFS_DIR(dvp), *node;
tmpfs_dirent_t *de;
int error;
@ -313,45 +342,37 @@ tmpfs_alloc_file(vnode_t *dvp, vnode_t **vpp, struct vattr *vap,
/* Check for the maximum number of links limit. */
if (vap->va_type == VDIR) {
/* Check for maximum links limit. */
KASSERT(dnode->tn_links <= LINK_MAX);
if (dnode->tn_links == LINK_MAX) {
error = EMLINK;
goto out;
}
parent = dnode;
} else {
parent = NULL;
KASSERT(dnode->tn_links < LINK_MAX);
}
/* Allocate a node that represents the new file. */
error = tmpfs_alloc_node(tmp, vap->va_type, kauth_cred_geteuid(cnp->cn_cred),
dnode->tn_gid, vap->va_mode, parent, target, vap->va_rdev, &node);
dnode->tn_gid, vap->va_mode, target, vap->va_rdev, &node);
if (error)
goto out;
/* Allocate a directory entry that points to the new file. */
error = tmpfs_alloc_dirent(tmp, node, cnp->cn_nameptr, cnp->cn_namelen,
&de);
error = tmpfs_alloc_dirent(tmp, cnp->cn_nameptr, cnp->cn_namelen, &de);
if (error) {
tmpfs_free_node(tmp, node);
goto out;
}
/* Allocate a vnode for the new file. */
error = tmpfs_alloc_vp(dvp->v_mount, node, vpp);
/* Get a vnode for the new file. */
mutex_enter(&node->tn_vlock);
error = tmpfs_vnode_get(dvp->v_mount, node, vpp);
if (error) {
tmpfs_free_dirent(tmp, de, true);
tmpfs_free_dirent(tmp, de);
tmpfs_free_node(tmp, node);
goto out;
}
/* Attach directory entry into the directory inode. */
tmpfs_dir_attach(dvp, de);
if (vap->va_type == VDIR) {
dnode->tn_links++;
KASSERT(dnode->tn_links <= LINK_MAX);
VN_KNOTE(dvp, NOTE_LINK);
}
/* Associate inode and attach the entry into the directory. */
tmpfs_dir_attach(dvp, de, node);
out:
vput(dvp);
return error;
@ -359,14 +380,11 @@ out:
/*
* tmpfs_alloc_dirent: allocates a new directory entry for the inode.
*
* The link count of node is increased by one to reflect the new object
* referencing it. This takes care of notifying kqueue listeners about
* this change.
* The directory entry contains a path name component.
*/
int
tmpfs_alloc_dirent(tmpfs_mount_t *tmp, tmpfs_node_t *node,
const char *name, uint16_t len, tmpfs_dirent_t **de)
tmpfs_alloc_dirent(tmpfs_mount_t *tmp, const char *name, uint16_t len,
tmpfs_dirent_t **de)
{
tmpfs_dirent_t *nde;
@ -381,13 +399,6 @@ tmpfs_alloc_dirent(tmpfs_mount_t *tmp, tmpfs_node_t *node,
}
nde->td_namelen = len;
memcpy(nde->td_name, name, len);
nde->td_node = node;
if (node != TMPFS_NODE_WHITEOUT) {
node->tn_links++;
if (node->tn_links > 1 && node->tn_vnode != NULL)
VN_KNOTE(node->tn_vnode, NOTE_LINK);
}
*de = nde;
return 0;
@ -395,72 +406,109 @@ tmpfs_alloc_dirent(tmpfs_mount_t *tmp, tmpfs_node_t *node,
/*
* tmpfs_free_dirent: free a directory entry.
*
* => It is the caller's responsibility to destroy the referenced inode.
* => The link count of inode is decreased by one to reflect the removal of
* an object that referenced it. This only happens if 'node_exists' is true;
* otherwise the function will not access the node referred to by the
* directory entry, as it may already have been released from the outside.
*
* Interested parties (kqueue) are notified of the link count change; note
* that this can include both the node pointed to by the directory entry
* as well as its parent.
*/
void
tmpfs_free_dirent(tmpfs_mount_t *tmp, tmpfs_dirent_t *de, bool node_exists)
tmpfs_free_dirent(tmpfs_mount_t *tmp, tmpfs_dirent_t *de)
{
if (node_exists && de->td_node != TMPFS_NODE_WHITEOUT) {
tmpfs_node_t *node = de->td_node;
KASSERT(node->tn_links > 0);
node->tn_links--;
if (node->tn_vnode != NULL) {
VN_KNOTE(node->tn_vnode, node->tn_links == 0 ?
NOTE_DELETE : NOTE_LINK);
}
if (node->tn_type == VDIR) {
VN_KNOTE(node->tn_spec.tn_dir.tn_parent->tn_vnode,
NOTE_LINK);
}
}
/* KASSERT(de->td_node == NULL); */
tmpfs_strname_free(tmp, de->td_name, de->td_namelen);
tmpfs_dirent_put(tmp, de);
}
/*
* tmpfs_dir_attach: attach the directory entry to the specified vnode.
* tmpfs_dir_attach: associate directory entry with a specified inode,
* and attach the entry into the directory, specified by vnode.
*
* => The link count of inode is not changed; done by tmpfs_alloc_dirent().
* => Triggers NOTE_WRITE event here.
* => Increases link count on the associated node.
* => Increases link count on directory node, if our node is VDIR.
* It is caller's responsibility to check for the LINK_MAX limit.
* => Triggers kqueue events here.
*/
void
tmpfs_dir_attach(vnode_t *vp, tmpfs_dirent_t *de)
tmpfs_dir_attach(vnode_t *dvp, tmpfs_dirent_t *de, tmpfs_node_t *node)
{
tmpfs_node_t *dnode = VP_TO_TMPFS_DIR(vp);
tmpfs_node_t *dnode = VP_TO_TMPFS_DIR(dvp);
int events = NOTE_WRITE;
KASSERT(VOP_ISLOCKED(vp));
KASSERT(VOP_ISLOCKED(dvp));
/* Associate directory entry and the inode. */
if (node != TMPFS_NODE_WHITEOUT) {
de->td_node = node;
KASSERT(node->tn_links < LINK_MAX);
node->tn_links++;
/* Save the hint (might overwrite). */
node->tn_dirent_hint = de;
}
/* Insert the entry to the directory (parent of inode). */
TAILQ_INSERT_TAIL(&dnode->tn_spec.tn_dir.tn_dir, de, td_entries);
dnode->tn_size += sizeof(tmpfs_dirent_t);
dnode->tn_status |= TMPFS_NODE_STATUSALL;
uvm_vnp_setsize(vp, dnode->tn_size);
VN_KNOTE(vp, NOTE_WRITE);
uvm_vnp_setsize(dvp, dnode->tn_size);
if (node != TMPFS_NODE_WHITEOUT && node->tn_type == VDIR) {
/* Set parent. */
KASSERT(node->tn_spec.tn_dir.tn_parent == NULL);
node->tn_spec.tn_dir.tn_parent = dnode;
/* Increase the link count of parent. */
KASSERT(dnode->tn_links < LINK_MAX);
dnode->tn_links++;
events |= NOTE_LINK;
TMPFS_VALIDATE_DIR(node);
}
VN_KNOTE(dvp, events);
}
/*
* tmpfs_dir_detach: detache the directory entry from the specified vnode.
* tmpfs_dir_detach: disassociate directory entry and its inode,
* and detach the entry from the directory, specified by vnode.
*
* => The link count of inode is not changed; done by tmpfs_free_dirent().
* => Triggers NOTE_WRITE event here.
* => Decreases link count on the associated node.
* => Decreases the link count on directory node, if our node is VDIR.
* => Triggers kqueue events here.
*/
void
tmpfs_dir_detach(vnode_t *vp, tmpfs_dirent_t *de)
tmpfs_dir_detach(vnode_t *dvp, tmpfs_dirent_t *de)
{
tmpfs_node_t *dnode = VP_TO_TMPFS_DIR(vp);
tmpfs_node_t *dnode = VP_TO_TMPFS_DIR(dvp);
tmpfs_node_t *node = de->td_node;
int events = NOTE_WRITE;
KASSERT(VOP_ISLOCKED(vp));
KASSERT(VOP_ISLOCKED(dvp));
if (node != TMPFS_NODE_WHITEOUT) {
vnode_t *vp = node->tn_vnode;
KASSERT(VOP_ISLOCKED(vp));
/* Deassociate the inode and entry. */
de->td_node = NULL;
node->tn_dirent_hint = NULL;
KASSERT(node->tn_links > 0);
node->tn_links--;
if (node->tn_vnode) {
VN_KNOTE(node->tn_vnode,
node->tn_links ? NOTE_LINK : NOTE_DELETE);
}
/* If directory - decrease the link count of parent. */
if (node->tn_type == VDIR) {
KASSERT(node->tn_spec.tn_dir.tn_parent == dnode);
node->tn_spec.tn_dir.tn_parent = NULL;
KASSERT(dnode->tn_links > 0);
dnode->tn_links--;
events |= NOTE_LINK;
}
}
/* Remove the entry from the directory. */
if (dnode->tn_spec.tn_dir.tn_readdir_lastp == de) {
dnode->tn_spec.tn_dir.tn_readdir_lastn = 0;
dnode->tn_spec.tn_dir.tn_readdir_lastp = NULL;
@ -469,8 +517,8 @@ tmpfs_dir_detach(vnode_t *vp, tmpfs_dirent_t *de)
dnode->tn_size -= sizeof(tmpfs_dirent_t);
dnode->tn_status |= TMPFS_NODE_STATUSALL;
uvm_vnp_setsize(vp, dnode->tn_size);
VN_KNOTE(vp, NOTE_WRITE);
uvm_vnp_setsize(dvp, dnode->tn_size);
VN_KNOTE(dvp, events);
}
/*
@ -487,9 +535,9 @@ tmpfs_dir_lookup(tmpfs_node_t *node, struct componentname *cnp)
tmpfs_dirent_t *de;
KASSERT(VOP_ISLOCKED(node->tn_vnode));
TMPFS_VALIDATE_DIR(node);
KASSERT(nlen != 1 || !(name[0] == '.'));
KASSERT(nlen != 2 || !(name[0] == '.' && name[1] == '.'));
TMPFS_VALIDATE_DIR(node);
TAILQ_FOREACH(de, &node->tn_spec.tn_dir.tn_dir, td_entries) {
if (de->td_namelen != nlen)
@ -502,6 +550,31 @@ tmpfs_dir_lookup(tmpfs_node_t *node, struct componentname *cnp)
return de;
}
/*
* tmpfs_dir_cached: get a cached directory entry if it is valid. Used to
* avoid unnecessary tmpds_dir_lookup().
*
* => The vnode must be locked.
*/
tmpfs_dirent_t *
tmpfs_dir_cached(tmpfs_node_t *node)
{
tmpfs_dirent_t *de = node->tn_dirent_hint;
KASSERT(VOP_ISLOCKED(node->tn_vnode));
if (de == NULL) {
return NULL;
}
KASSERT(de->td_node == node);
/*
* Directories always have a valid hint. For files, check if there
* are any hard links. If there are - hint might be invalid.
*/
return (node->tn_type != VDIR && node->tn_links > 1) ? NULL : de;
}
/*
* tmpfs_dir_getdotdent: helper function for tmpfs_readdir. Creates a
* '.' entry for the given directory and returns it in the uio space.

View File

@ -1,4 +1,4 @@
/* $NetBSD: tmpfs_vfsops.c,v 1.50 2011/05/24 20:17:49 rmind Exp $ */
/* $NetBSD: tmpfs_vfsops.c,v 1.51 2011/05/29 22:29:07 rmind Exp $ */
/*
* Copyright (c) 2005, 2006, 2007 The NetBSD Foundation, Inc.
@ -42,7 +42,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: tmpfs_vfsops.c,v 1.50 2011/05/24 20:17:49 rmind Exp $");
__KERNEL_RCSID(0, "$NetBSD: tmpfs_vfsops.c,v 1.51 2011/05/29 22:29:07 rmind Exp $");
#include <sys/param.h>
#include <sys/types.h>
@ -166,10 +166,17 @@ tmpfs_mount(struct mount *mp, const char *path, void *data, size_t *data_len)
/* Allocate the root node. */
error = tmpfs_alloc_node(tmp, VDIR, args->ta_root_uid,
args->ta_root_gid, args->ta_root_mode & ALLPERMS, NULL, NULL,
args->ta_root_gid, args->ta_root_mode & ALLPERMS, NULL,
VNOVAL, &root);
KASSERT(error == 0 && root != NULL);
/*
* Parent of the root inode is itself. Also, root inode has no
* directory entry (i.e. is never attached), thus hold an extra
* reference (link) for it.
*/
root->tn_links++;
root->tn_spec.tn_dir.tn_parent = root;
tmp->tm_root = root;
mp->mnt_data = tmp;
@ -224,7 +231,7 @@ tmpfs_unmount(struct mount *mp, int mntflags)
tmpfs_dirent_t *nde;
nde = TAILQ_NEXT(de, td_entries);
tmpfs_free_dirent(tmp, de, false);
tmpfs_free_dirent(tmp, de);
node->tn_size -= sizeof(tmpfs_dirent_t);
de = nde;
}
@ -245,8 +252,10 @@ tmpfs_unmount(struct mount *mp, int mntflags)
static int
tmpfs_root(struct mount *mp, vnode_t **vpp)
{
tmpfs_node_t *node = VFS_TO_TMPFS(mp)->tm_root;
return tmpfs_alloc_vp(mp, VFS_TO_TMPFS(mp)->tm_root, vpp);
mutex_enter(&node->tn_vlock);
return tmpfs_vnode_get(mp, node, vpp);
}
static int
@ -260,31 +269,30 @@ tmpfs_vget(struct mount *mp, ino_t ino, vnode_t **vpp)
static int
tmpfs_fhtovp(struct mount *mp, struct fid *fhp, vnode_t **vpp)
{
tmpfs_mount_t *tmp;
tmpfs_mount_t *tmp = VFS_TO_TMPFS(mp);
tmpfs_node_t *node;
tmpfs_fid_t tfh;
bool found;
tmp = VFS_TO_TMPFS(mp);
if (fhp->fid_len != sizeof(tmpfs_fid_t))
if (fhp->fid_len != sizeof(tmpfs_fid_t)) {
return EINVAL;
}
memcpy(&tfh, fhp, sizeof(tmpfs_fid_t));
found = false;
mutex_enter(&tmp->tm_lock);
LIST_FOREACH(node, &tmp->tm_nodes, tn_entries) {
if (node->tn_id == tfh.tf_id &&
node->tn_gen == tfh.tf_gen) {
found = true;
break;
if (node->tn_id != tfh.tf_id) {
continue;
}
if (TMPFS_NODE_GEN(node) != tfh.tf_gen) {
continue;
}
mutex_enter(&node->tn_vlock);
break;
}
mutex_exit(&tmp->tm_lock);
/* XXXAD nothing to prevent 'node' from being removed. */
return found ? tmpfs_alloc_vp(mp, node, vpp) : ESTALE;
/* Will release the tn_vlock. */
return node ? tmpfs_vnode_get(mp, node, vpp) : ESTALE;
}
static int
@ -302,7 +310,7 @@ tmpfs_vptofh(vnode_t *vp, struct fid *fhp, size_t *fh_size)
memset(&tfh, 0, sizeof(tfh));
tfh.tf_len = sizeof(tmpfs_fid_t);
tfh.tf_gen = node->tn_gen;
tfh.tf_gen = TMPFS_NODE_GEN(node);
tfh.tf_id = node->tn_id;
memcpy(fhp, &tfh, sizeof(tfh));

View File

@ -1,4 +1,4 @@
/* $NetBSD: tmpfs_vnops.c,v 1.84 2011/05/24 23:16:16 rmind Exp $ */
/* $NetBSD: tmpfs_vnops.c,v 1.85 2011/05/29 22:29:07 rmind Exp $ */
/*
* Copyright (c) 2005, 2006, 2007 The NetBSD Foundation, Inc.
@ -35,7 +35,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: tmpfs_vnops.c,v 1.84 2011/05/24 23:16:16 rmind Exp $");
__KERNEL_RCSID(0, "$NetBSD: tmpfs_vnops.c,v 1.85 2011/05/29 22:29:07 rmind Exp $");
#include <sys/param.h>
#include <sys/dirent.h>
@ -142,11 +142,12 @@ tmpfs_lookup(void *v)
dnode = VP_TO_TMPFS_DIR(dvp);
*vpp = NULL;
/* Check accessibility of requested node as a first step. */
/* Check accessibility of directory. */
error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred);
if (error) {
goto out;
}
/*
* If requesting the last path component on a read-only file system
* with a write operation, deny it.
@ -169,16 +170,33 @@ tmpfs_lookup(void *v)
if (cnp->cn_flags & ISDOTDOT) {
tmpfs_node_t *pnode;
/*
* Lookup of ".." case.
*/
if (lastcn && cnp->cn_nameiop == RENAME) {
error = EINVAL;
goto out;
}
KASSERT(dnode->tn_type == VDIR);
pnode = dnode->tn_spec.tn_dir.tn_parent;
KASSERT(dnode->tn_type == VDIR && pnode != dnode);
if (pnode == NULL) {
error = ENOENT;
goto out;
}
/*
* Lock the parent tn_vlock before releasing the vnode lock,
* and thus prevents parent from disappearing.
*/
mutex_enter(&pnode->tn_vlock);
VOP_UNLOCK(dvp);
/* Allocate a new vnode on the matching entry. */
error = tmpfs_alloc_vp(dvp->v_mount, pnode, vpp);
/*
* Get a vnode of the '..' entry and re-acquire the lock.
* Release the tn_vlock.
*/
error = tmpfs_vnode_get(dvp->v_mount, pnode, vpp);
vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
goto out;
@ -261,9 +279,9 @@ tmpfs_lookup(void *v)
}
}
/* Allocate a new vnode on the matching entry. */
error = tmpfs_alloc_vp(dvp->v_mount, tnode, vpp);
/* Get a vnode for the matching entry. */
mutex_enter(&tnode->tn_vlock);
error = tmpfs_vnode_get(dvp->v_mount, tnode, vpp);
done:
/*
* Cache the result, unless request was for creation (as it does
@ -359,13 +377,10 @@ tmpfs_close(void *v)
kauth_cred_t a_cred;
} */ *ap = v;
vnode_t *vp = ap->a_vp;
tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
KASSERT(VOP_ISLOCKED(vp));
if (node->tn_links > 0) {
tmpfs_update(vp, NULL, NULL, NULL, UPDATE_CLOSE);
}
tmpfs_update(vp, NULL, NULL, NULL, UPDATE_CLOSE);
return 0;
}
@ -454,7 +469,7 @@ tmpfs_getattr(void *v)
vap->va_mtime = node->tn_mtime;
vap->va_ctime = node->tn_ctime;
vap->va_birthtime = node->tn_birthtime;
vap->va_gen = node->tn_gen;
vap->va_gen = TMPFS_NODE_GEN(node);
vap->va_flags = node->tn_flags;
vap->va_rdev = (vp->v_type == VBLK || vp->v_type == VCHR) ?
node->tn_spec.tn_dev.tn_rdev : VNOVAL;
@ -667,10 +682,8 @@ tmpfs_remove(void *v)
struct componentname *a_cnp;
} */ *ap = v;
vnode_t *dvp = ap->a_dvp, *vp = ap->a_vp;
struct componentname *cnp = ap->a_cnp;
tmpfs_node_t *dnode, *node;
tmpfs_node_t *node;
tmpfs_dirent_t *de;
tmpfs_mount_t *tmp;
int error;
KASSERT(VOP_ISLOCKED(dvp));
@ -688,21 +701,22 @@ tmpfs_remove(void *v)
goto out;
}
/*
* Lookup and remove the entry from the directory. Note that since
* it is a file, we do not need to change the number of hard links.
*/
dnode = VP_TO_TMPFS_DIR(dvp);
de = tmpfs_dir_lookup(dnode, cnp);
/* Lookup the directory entry (check the cached hint first). */
de = tmpfs_dir_cached(node);
if (de == NULL) {
tmpfs_node_t *dnode = VP_TO_TMPFS_DIR(dvp);
struct componentname *cnp = ap->a_cnp;
de = tmpfs_dir_lookup(dnode, cnp);
}
KASSERT(de && de->td_node == node);
tmpfs_dir_detach(dvp, de);
/*
* Free removed directory entry. Note that the node referred by it
* will not be removed until the vnode is really reclaimed.
* Remove the entry from the directory (drops the link count) and
* destroy it. Note: the inode referred by it will not be destroyed
* until the vnode is reclaimed/recycled.
*/
tmp = VFS_TO_TMPFS(vp->v_mount);
tmpfs_free_dirent(tmp, de, true);
tmpfs_dir_detach(dvp, de);
tmpfs_free_dirent(VFS_TO_TMPFS(vp->v_mount), de);
error = 0;
out:
/* Drop the references and unlock the vnodes. */
@ -744,11 +758,11 @@ tmpfs_link(void *v)
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
/* Check for maximum number of links limit. */
KASSERT(node->tn_links <= LINK_MAX);
if (node->tn_links == LINK_MAX) {
error = EMLINK;
goto out;
}
KASSERT(node->tn_links < LINK_MAX);
/* We cannot create links of files marked immutable or append-only. */
if (node->tn_flags & (IMMUTABLE | APPEND)) {
@ -756,17 +770,23 @@ tmpfs_link(void *v)
goto out;
}
/* Allocate a new directory entry to represent the node. */
error = tmpfs_alloc_dirent(VFS_TO_TMPFS(vp->v_mount), node,
/* Allocate a new directory entry to represent the inode. */
error = tmpfs_alloc_dirent(VFS_TO_TMPFS(vp->v_mount),
cnp->cn_nameptr, cnp->cn_namelen, &de);
if (error) {
goto out;
}
/* Insert the new directory entry into the directory. */
tmpfs_dir_attach(dvp, de);
/*
* Insert the entry into the directory.
* It will increase the inode link count.
*/
tmpfs_dir_attach(dvp, de, node);
/* Node link count has changed, so update node times. */
/* Update the timestamps and trigger the event. */
if (node->tn_vnode) {
VN_KNOTE(node->tn_vnode, NOTE_LINK);
}
node->tn_status |= TMPFS_NODE_CHANGED;
tmpfs_update(vp, NULL, NULL, NULL, 0);
error = 0;
@ -776,6 +796,26 @@ out:
return error;
}
/*
* tmpfs_parentcheck_p: check if 'lower' is a descendent of 'upper'.
*
* => Returns 'true' if parent, and 'false' otherwise.
*/
static inline bool
tmpfs_parentcheck_p(tmpfs_node_t *lower, tmpfs_node_t *upper)
{
tmpfs_node_t *un = lower;
while (un != un->tn_spec.tn_dir.tn_parent) {
KASSERT(un->tn_type == VDIR);
if (un == upper) {
return true;
}
un = un->tn_spec.tn_dir.tn_parent;
}
return false;
}
/*
* tmpfs_rename: rename routine.
*
@ -814,6 +854,8 @@ tmpfs_rename(void *v)
KASSERT(VOP_ISLOCKED(tdvp));
KASSERT(tvp == NULL || VOP_ISLOCKED(tvp) == LK_EXCLUSIVE);
KASSERT((fcnp->cn_flags & ISDOTDOT) == 0);
KASSERT((tcnp->cn_flags & ISDOTDOT) == 0);
newname = NULL;
namelen = 0;
@ -848,9 +890,12 @@ tmpfs_rename(void *v)
}
/* XXX: Lock order violation! */
if (fdnode != tdnode) {
if (fdvp != tdvp) {
vn_lock(fdvp, LK_EXCLUSIVE | LK_RETRY);
}
if (fvp != tvp) {
vn_lock(fvp, LK_EXCLUSIVE | LK_RETRY);
}
/* If the inode we were renaming has scarpered, just give up. */
de = tmpfs_dir_lookup(fdnode, fcnp);
@ -859,15 +904,23 @@ tmpfs_rename(void *v)
goto out;
}
/* If source and target is the same vnode, remove the source link. */
/*
* If source and target is the same vnode - it is either invalid
* rename of a directory, or a hard link. Remove the source link,
* if the later.
*/
if (fvp == tvp) {
if (fvp->v_type == VDIR) {
error = EINVAL;
goto out;
}
/*
* Detach and free the directory entry. Drops the link
* count on the inode.
*/
KASSERT(fnode == tnode);
tmpfs_dir_detach(fdvp, de);
tmpfs_free_dirent(VFS_TO_TMPFS(fvp->v_mount), de, true);
VN_KNOTE(fdvp, NOTE_WRITE);
tmpfs_free_dirent(tmp, de);
goto out_ok;
}
@ -892,32 +945,16 @@ tmpfs_rename(void *v)
}
/* Are we moving the inode to a different directory? */
if (fdnode != tdnode) {
/* Are we moving a directory? */
if (de->td_node->tn_type == VDIR) {
tmpfs_node_t *upnode;
/*
* Ensure the target directory is not a child of the
* directory being moved. Otherwise, it would result
* in stale nodes.
*/
upnode = tdnode;
while (upnode != upnode->tn_spec.tn_dir.tn_parent) {
if (upnode == fnode) {
error = EINVAL;
goto out;
}
upnode = upnode->tn_spec.tn_dir.tn_parent;
}
/* Adjust the parent pointer. */
TMPFS_VALIDATE_DIR(fnode);
de->td_node->tn_spec.tn_dir.tn_parent = tdnode;
/* Adjust the link counts. */
fdnode->tn_links--;
tdnode->tn_links++;
if (fdvp != tdvp) {
/*
* If we are moving a directory - ensure that it is not
* parent of a target directory. Otherwise, it would
* result in stale nodes.
*/
if (fnode->tn_type == VDIR &&
tmpfs_parentcheck_p(tdnode, fnode)) {
error = EINVAL;
goto out;
}
/*
@ -925,27 +962,30 @@ tmpfs_rename(void *v)
* attach into the target directory.
*/
tmpfs_dir_detach(fdvp, de);
tmpfs_dir_attach(tdvp, de);
tmpfs_dir_attach(tdvp, de, fnode);
/* Trigger the event. */
VN_KNOTE(fdvp, NOTE_WRITE);
} else if (tvp == NULL) {
/* Trigger the event, if not overwriting. */
VN_KNOTE(tdvp, NOTE_WRITE);
}
/* Are we overwriting the entry? */
if (tvp != NULL) {
tmpfs_dirent_t *de2;
tmpfs_dirent_t *tde;
tde = tmpfs_dir_cached(tnode);
if (tde == NULL) {
tde = tmpfs_dir_lookup(tdnode, tcnp);
}
KASSERT(tde && tde->td_node == tnode);
KASSERT(tnode->tn_type == fnode->tn_type);
/*
* Remove the old entry from the target directory.
* Note: This relies on tmpfs_dir_attach() putting the new
* node on the end of the target's node list.
* Remove and destroy the directory entry on the target
* directory, since we overwrite it.
*/
de2 = tmpfs_dir_lookup(tdnode, tcnp);
KASSERT(de2 && de2->td_node == tnode);
tmpfs_dir_detach(tdvp, de2);
/* Destroy the detached directory entry. */
tmpfs_free_dirent(VFS_TO_TMPFS(tvp->v_mount), de2, true);
tmpfs_dir_detach(tdvp, tde);
tmpfs_free_dirent(tmp, tde);
}
/* If the name has changed, update directory entry. */
@ -962,14 +1002,16 @@ tmpfs_rename(void *v)
tdnode->tn_status |= TMPFS_NODE_MODIFIED;
}
out_ok:
/* Notify listeners of source and target directories. */
VN_KNOTE(tdvp, NOTE_WRITE);
/* Trigger the rename event. */
VN_KNOTE(fvp, NOTE_RENAME);
error = 0;
out:
if (fdnode != tdnode) {
if (fdvp != tdvp) {
VOP_UNLOCK(fdvp);
}
if (fvp != tvp) {
VOP_UNLOCK(fvp);
}
out_unlocked:
/* Release target nodes. */
if (tdvp == tvp) {
@ -977,7 +1019,7 @@ out_unlocked:
} else {
vput(tdvp);
}
if (tvp != NULL) {
if (tvp) {
vput(tvp);
}
@ -1019,7 +1061,6 @@ tmpfs_rmdir(void *v)
} */ *ap = v;
vnode_t *dvp = ap->a_dvp;
vnode_t *vp = ap->a_vp;
struct componentname *cnp = ap->a_cnp;
tmpfs_mount_t *tmp = VFS_TO_TMPFS(dvp->v_mount);
tmpfs_node_t *dnode = VP_TO_TMPFS_DIR(dvp);
tmpfs_node_t *node = VP_TO_TMPFS_DIR(vp);
@ -1039,8 +1080,12 @@ tmpfs_rmdir(void *v)
goto out;
}
/* Get the directory entry associated with inode (vp). */
de = tmpfs_dir_lookup(dnode, cnp);
/* Lookup the directory entry (check the cached hint first). */
de = tmpfs_dir_cached(node);
if (de == NULL) {
struct componentname *cnp = ap->a_cnp;
de = tmpfs_dir_lookup(dnode, cnp);
}
KASSERT(de && de->td_node == node);
/* Check flags to see if we are allowed to remove the directory. */
@ -1049,13 +1094,12 @@ tmpfs_rmdir(void *v)
goto out;
}
/* Detach the directory entry from the directory (dnode). */
/* Detach the directory entry from the directory. */
tmpfs_dir_detach(dvp, de);
/* Decrement the link count for the virtual '.' entry. */
node->tn_links--;
node->tn_status |= TMPFS_NODE_STATUSALL;
node->tn_spec.tn_dir.tn_parent->tn_links--;
node->tn_spec.tn_dir.tn_parent->tn_status |= TMPFS_NODE_STATUSALL;
/* Purge the cache for parent. */
cache_purge(dvp);
@ -1064,7 +1108,7 @@ tmpfs_rmdir(void *v)
* Destroy the directory entry. Note: the inode referred by it
* will not be destroyed until the vnode is reclaimed.
*/
tmpfs_free_dirent(tmp, de, true);
tmpfs_free_dirent(tmp, de);
KASSERT(node->tn_links == 0);
out:
/* Release the nodes. */
@ -1240,13 +1284,21 @@ tmpfs_reclaim(void *v)
vnode_t *vp = ap->a_vp;
tmpfs_mount_t *tmp = VFS_TO_TMPFS(vp->v_mount);
tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
bool racing;
/* Disassociate inode from vnode. */
tmpfs_free_vp(vp);
KASSERT(vp->v_data == NULL);
mutex_enter(&node->tn_vlock);
node->tn_vnode = NULL;
vp->v_data = NULL;
/* Check if tmpfs_vnode_get() is racing with us. */
racing = TMPFS_NODE_RECLAIMING(node);
mutex_exit(&node->tn_vlock);
/* If inode is not referenced, i.e. no links, then destroy it. */
if (node->tn_links == 0) {
/*
* If inode is not referenced, i.e. no links, then destroy it.
* Note: if racing - inode is about to get a new vnode, leave it.
*/
if (node->tn_links == 0 && !racing) {
tmpfs_free_node(tmp, node);
}
return 0;
@ -1287,7 +1339,7 @@ tmpfs_pathconf(void *v)
*retval = 1;
break;
case _PC_FILESIZEBITS:
*retval = 0; /* FIXME */
*retval = sizeof(off_t) * CHAR_BIT;
break;
default:
error = EINVAL;
@ -1447,11 +1499,11 @@ tmpfs_whiteout(void *v)
case LOOKUP:
break;
case CREATE:
error = tmpfs_alloc_dirent(tmp, TMPFS_NODE_WHITEOUT,
cnp->cn_nameptr, cnp->cn_namelen, &de);
error = tmpfs_alloc_dirent(tmp, cnp->cn_nameptr,
cnp->cn_namelen, &de);
if (error)
return error;
tmpfs_dir_attach(dvp, de);
tmpfs_dir_attach(dvp, de, TMPFS_NODE_WHITEOUT);
break;
case DELETE:
cnp->cn_flags &= ~DOWHITEOUT; /* when in doubt, cargo cult */
@ -1459,7 +1511,7 @@ tmpfs_whiteout(void *v)
if (de == NULL)
return ENOENT;
tmpfs_dir_detach(dvp, de);
tmpfs_free_dirent(tmp, de, true);
tmpfs_free_dirent(tmp, de);
break;
}
return 0;