virtiofsd: passthrough_ll: add fallback for racy ops

We have two operations that cannot be done race-free on a symlink in
certain cases: utimes and link.

Add racy fallback for these if the race-free method doesn't work.  We do
our best to avoid races even in this case:

  - get absolute path by reading /proc/self/fd/NN symlink

  - lookup parent directory: after this we are safe against renames in
    ancestors

  - lookup name in parent directory, and verify that we got to the original
    inode,  if not retry the whole thing

Both utimes(2) and link(2) hold i_lock on the inode across the operation,
so a racing rename/delete by this fuse instance is not possible, only from
other entities changing the filesystem.

If the "norace" option is given, then disable the racy fallbacks.

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Reviewed-by: Masayoshi Mizuma <m.mizuma@jp.fujitsu.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
This commit is contained in:
Miklos Szeredi 2018-11-14 16:52:03 +01:00 committed by Dr. David Alan Gilbert
parent 73b4d19dfc
commit 5fe319a7b1
2 changed files with 145 additions and 17 deletions

View File

@ -142,7 +142,10 @@ void fuse_cmdline_help(void)
" --daemonize run in background\n" " --daemonize run in background\n"
" -o max_idle_threads the maximum number of idle worker " " -o max_idle_threads the maximum number of idle worker "
"threads\n" "threads\n"
" allowed (default: 10)\n"); " allowed (default: 10)\n"
" -o norace disable racy fallback\n"
" default: false\n"
);
} }
static int fuse_helper_opt_proc(void *data, const char *arg, int key, static int fuse_helper_opt_proc(void *data, const char *arg, int key,

View File

@ -98,6 +98,7 @@ enum {
struct lo_data { struct lo_data {
pthread_mutex_t mutex; pthread_mutex_t mutex;
int debug; int debug;
int norace;
int writeback; int writeback;
int flock; int flock;
int xattr; int xattr;
@ -124,10 +125,15 @@ static const struct fuse_opt lo_opts[] = {
{ "cache=never", offsetof(struct lo_data, cache), CACHE_NEVER }, { "cache=never", offsetof(struct lo_data, cache), CACHE_NEVER },
{ "cache=auto", offsetof(struct lo_data, cache), CACHE_NORMAL }, { "cache=auto", offsetof(struct lo_data, cache), CACHE_NORMAL },
{ "cache=always", offsetof(struct lo_data, cache), CACHE_ALWAYS }, { "cache=always", offsetof(struct lo_data, cache), CACHE_ALWAYS },
{ "norace", offsetof(struct lo_data, norace), 1 },
FUSE_OPT_END FUSE_OPT_END
}; };
static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n);
static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st);
static struct lo_data *lo_data(fuse_req_t req) static struct lo_data *lo_data(fuse_req_t req)
{ {
return (struct lo_data *)fuse_req_userdata(req); return (struct lo_data *)fuse_req_userdata(req);
@ -347,23 +353,127 @@ static void lo_getattr(fuse_req_t req, fuse_ino_t ino,
fuse_reply_attr(req, &buf, lo->timeout); fuse_reply_attr(req, &buf, lo->timeout);
} }
static int utimensat_empty_nofollow(struct lo_inode *inode, static int lo_parent_and_name(struct lo_data *lo, struct lo_inode *inode,
const struct timespec *tv) char path[PATH_MAX], struct lo_inode **parent)
{
char procname[64];
char *last;
struct stat stat;
struct lo_inode *p;
int retries = 2;
int res;
retry:
sprintf(procname, "/proc/self/fd/%i", inode->fd);
res = readlink(procname, path, PATH_MAX);
if (res < 0) {
fuse_log(FUSE_LOG_WARNING, "%s: readlink failed: %m\n", __func__);
goto fail_noretry;
}
if (res >= PATH_MAX) {
fuse_log(FUSE_LOG_WARNING, "%s: readlink overflowed\n", __func__);
goto fail_noretry;
}
path[res] = '\0';
last = strrchr(path, '/');
if (last == NULL) {
/* Shouldn't happen */
fuse_log(
FUSE_LOG_WARNING,
"%s: INTERNAL ERROR: bad path read from proc\n", __func__);
goto fail_noretry;
}
if (last == path) {
p = &lo->root;
pthread_mutex_lock(&lo->mutex);
p->refcount++;
pthread_mutex_unlock(&lo->mutex);
} else {
*last = '\0';
res = fstatat(AT_FDCWD, last == path ? "/" : path, &stat, 0);
if (res == -1) {
if (!retries) {
fuse_log(FUSE_LOG_WARNING,
"%s: failed to stat parent: %m\n", __func__);
}
goto fail;
}
p = lo_find(lo, &stat);
if (p == NULL) {
if (!retries) {
fuse_log(FUSE_LOG_WARNING,
"%s: failed to find parent\n", __func__);
}
goto fail;
}
}
last++;
res = fstatat(p->fd, last, &stat, AT_SYMLINK_NOFOLLOW);
if (res == -1) {
if (!retries) {
fuse_log(FUSE_LOG_WARNING,
"%s: failed to stat last\n", __func__);
}
goto fail_unref;
}
if (stat.st_dev != inode->dev || stat.st_ino != inode->ino) {
if (!retries) {
fuse_log(FUSE_LOG_WARNING,
"%s: failed to match last\n", __func__);
}
goto fail_unref;
}
*parent = p;
memmove(path, last, strlen(last) + 1);
return 0;
fail_unref:
unref_inode(lo, p, 1);
fail:
if (retries) {
retries--;
goto retry;
}
fail_noretry:
errno = EIO;
return -1;
}
static int utimensat_empty(struct lo_data *lo, struct lo_inode *inode,
const struct timespec *tv)
{ {
int res; int res;
char procname[64]; struct lo_inode *parent;
char path[PATH_MAX];
if (inode->is_symlink) { if (inode->is_symlink) {
res = utimensat(inode->fd, "", tv, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); res = utimensat(inode->fd, "", tv, AT_EMPTY_PATH);
if (res == -1 && errno == EINVAL) { if (res == -1 && errno == EINVAL) {
/* Sorry, no race free way to set times on symlink. */ /* Sorry, no race free way to set times on symlink. */
errno = EPERM; if (lo->norace) {
errno = EPERM;
} else {
goto fallback;
}
} }
return res; return res;
} }
sprintf(procname, "/proc/self/fd/%i", inode->fd); sprintf(path, "/proc/self/fd/%i", inode->fd);
return utimensat(AT_FDCWD, procname, tv, 0); return utimensat(AT_FDCWD, path, tv, 0);
fallback:
res = lo_parent_and_name(lo, inode, path, &parent);
if (res != -1) {
res = utimensat(parent->fd, path, tv, AT_SYMLINK_NOFOLLOW);
unref_inode(lo, parent, 1);
}
return res;
} }
static int lo_fi_fd(fuse_req_t req, struct fuse_file_info *fi) static int lo_fi_fd(fuse_req_t req, struct fuse_file_info *fi)
@ -387,6 +497,7 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr,
{ {
int saverr; int saverr;
char procname[64]; char procname[64];
struct lo_data *lo = lo_data(req);
struct lo_inode *inode; struct lo_inode *inode;
int ifd; int ifd;
int res; int res;
@ -459,7 +570,7 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr,
if (fi) { if (fi) {
res = futimens(fd, tv); res = futimens(fd, tv);
} else { } else {
res = utimensat_empty_nofollow(inode, tv); res = utimensat_empty(lo, inode, tv);
} }
if (res == -1) { if (res == -1) {
goto out_err; goto out_err;
@ -709,24 +820,38 @@ static void lo_symlink(fuse_req_t req, const char *link, fuse_ino_t parent,
lo_mknod_symlink(req, parent, name, S_IFLNK, 0, link); lo_mknod_symlink(req, parent, name, S_IFLNK, 0, link);
} }
static int linkat_empty_nofollow(struct lo_inode *inode, int dfd, static int linkat_empty_nofollow(struct lo_data *lo, struct lo_inode *inode,
const char *name) int dfd, const char *name)
{ {
int res; int res;
char procname[64]; struct lo_inode *parent;
char path[PATH_MAX];
if (inode->is_symlink) { if (inode->is_symlink) {
res = linkat(inode->fd, "", dfd, name, AT_EMPTY_PATH); res = linkat(inode->fd, "", dfd, name, AT_EMPTY_PATH);
if (res == -1 && (errno == ENOENT || errno == EINVAL)) { if (res == -1 && (errno == ENOENT || errno == EINVAL)) {
/* Sorry, no race free way to hard-link a symlink. */ /* Sorry, no race free way to hard-link a symlink. */
errno = EPERM; if (lo->norace) {
errno = EPERM;
} else {
goto fallback;
}
} }
return res; return res;
} }
sprintf(procname, "/proc/self/fd/%i", inode->fd); sprintf(path, "/proc/self/fd/%i", inode->fd);
return linkat(AT_FDCWD, procname, dfd, name, AT_SYMLINK_FOLLOW); return linkat(AT_FDCWD, path, dfd, name, AT_SYMLINK_FOLLOW);
fallback:
res = lo_parent_and_name(lo, inode, path, &parent);
if (res != -1) {
res = linkat(parent->fd, path, dfd, name, 0);
unref_inode(lo, parent, 1);
}
return res;
} }
static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent,
@ -748,7 +873,7 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent,
e.attr_timeout = lo->timeout; e.attr_timeout = lo->timeout;
e.entry_timeout = lo->timeout; e.entry_timeout = lo->timeout;
res = linkat_empty_nofollow(inode, lo_fd(req, parent), name); res = linkat_empty_nofollow(lo, inode, lo_fd(req, parent), name);
if (res == -1) { if (res == -1) {
goto out_err; goto out_err;
} }