virtiofsd: Fix xattr operations
Current virtiofsd has problems about xattr operations and they does not work properly for directory/symlink/special file. The fundamental cause is that virtiofsd uses openat() + f...xattr() systemcalls for xattr operation but we should not open symlink/special file in the daemon. Therefore the function is restricted. Fix this problem by: 1. during setup of each thread, call unshare(CLONE_FS) 2. in xattr operations (i.e. lo_getxattr), if inode is not a regular file or directory, use fchdir(proc_loot_fd) + ...xattr() + fchdir(root.fd) instead of openat() + f...xattr() (Note: for a regular file/directory openat() + f...xattr() is still used for performance reason) With this patch, xfstests generic/062 passes on virtiofs. This fix is suggested by Miklos Szeredi and Stefan Hajnoczi. The original discussion can be found here: https://www.redhat.com/archives/virtio-fs/2019-October/msg00046.html Signed-off-by: Misono Tomohiro <misono.tomohiro@jp.fujitsu.com> Message-Id: <20200227055927.24566-3-misono.tomohiro@jp.fujitsu.com> Acked-by: Vivek Goyal <vgoyal@redhat.com> Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com> Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
This commit is contained in:
parent
16e15a7308
commit
bdfd667883
@ -426,6 +426,8 @@ err:
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static __thread bool clone_fs_called;
|
||||||
|
|
||||||
/* Process one FVRequest in a thread pool */
|
/* Process one FVRequest in a thread pool */
|
||||||
static void fv_queue_worker(gpointer data, gpointer user_data)
|
static void fv_queue_worker(gpointer data, gpointer user_data)
|
||||||
{
|
{
|
||||||
@ -441,6 +443,17 @@ static void fv_queue_worker(gpointer data, gpointer user_data)
|
|||||||
|
|
||||||
assert(se->bufsize > sizeof(struct fuse_in_header));
|
assert(se->bufsize > sizeof(struct fuse_in_header));
|
||||||
|
|
||||||
|
if (!clone_fs_called) {
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
/* unshare FS for xattr operation */
|
||||||
|
ret = unshare(CLONE_FS);
|
||||||
|
/* should not fail */
|
||||||
|
assert(ret == 0);
|
||||||
|
|
||||||
|
clone_fs_called = true;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* An element contains one request and the space to send our response
|
* An element contains one request and the space to send our response
|
||||||
* They're spread over multiple descriptors in a scatter/gather set
|
* They're spread over multiple descriptors in a scatter/gather set
|
||||||
|
@ -123,7 +123,7 @@ struct lo_inode {
|
|||||||
pthread_mutex_t plock_mutex;
|
pthread_mutex_t plock_mutex;
|
||||||
GHashTable *posix_locks; /* protected by lo_inode->plock_mutex */
|
GHashTable *posix_locks; /* protected by lo_inode->plock_mutex */
|
||||||
|
|
||||||
bool is_symlink;
|
mode_t filetype;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct lo_cred {
|
struct lo_cred {
|
||||||
@ -695,7 +695,7 @@ static int utimensat_empty(struct lo_data *lo, struct lo_inode *inode,
|
|||||||
struct lo_inode *parent;
|
struct lo_inode *parent;
|
||||||
char path[PATH_MAX];
|
char path[PATH_MAX];
|
||||||
|
|
||||||
if (inode->is_symlink) {
|
if (S_ISLNK(inode->filetype)) {
|
||||||
res = utimensat(inode->fd, "", tv, AT_EMPTY_PATH);
|
res = utimensat(inode->fd, "", tv, AT_EMPTY_PATH);
|
||||||
if (res == -1 && errno == EINVAL) {
|
if (res == -1 && errno == EINVAL) {
|
||||||
/* Sorry, no race free way to set times on symlink. */
|
/* Sorry, no race free way to set times on symlink. */
|
||||||
@ -928,7 +928,8 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name,
|
|||||||
goto out_err;
|
goto out_err;
|
||||||
}
|
}
|
||||||
|
|
||||||
inode->is_symlink = S_ISLNK(e->attr.st_mode);
|
/* cache only filetype */
|
||||||
|
inode->filetype = (e->attr.st_mode & S_IFMT);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* One for the caller and one for nlookup (released in
|
* One for the caller and one for nlookup (released in
|
||||||
@ -1135,7 +1136,7 @@ static int linkat_empty_nofollow(struct lo_data *lo, struct lo_inode *inode,
|
|||||||
struct lo_inode *parent;
|
struct lo_inode *parent;
|
||||||
char path[PATH_MAX];
|
char path[PATH_MAX];
|
||||||
|
|
||||||
if (inode->is_symlink) {
|
if (S_ISLNK(inode->filetype)) {
|
||||||
res = linkat(inode->fd, "", dfd, name, AT_EMPTY_PATH);
|
res = linkat(inode->fd, "", dfd, name, AT_EMPTY_PATH);
|
||||||
if (res == -1 && (errno == ENOENT || errno == EINVAL)) {
|
if (res == -1 && (errno == ENOENT || errno == EINVAL)) {
|
||||||
/* Sorry, no race free way to hard-link a symlink. */
|
/* Sorry, no race free way to hard-link a symlink. */
|
||||||
@ -2189,12 +2190,6 @@ static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name,
|
|||||||
fuse_log(FUSE_LOG_DEBUG, "lo_getxattr(ino=%" PRIu64 ", name=%s size=%zd)\n",
|
fuse_log(FUSE_LOG_DEBUG, "lo_getxattr(ino=%" PRIu64 ", name=%s size=%zd)\n",
|
||||||
ino, name, size);
|
ino, name, size);
|
||||||
|
|
||||||
if (inode->is_symlink) {
|
|
||||||
/* Sorry, no race free way to getxattr on symlink. */
|
|
||||||
saverr = EPERM;
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (size) {
|
if (size) {
|
||||||
value = malloc(size);
|
value = malloc(size);
|
||||||
if (!value) {
|
if (!value) {
|
||||||
@ -2203,12 +2198,25 @@ static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name,
|
|||||||
}
|
}
|
||||||
|
|
||||||
sprintf(procname, "%i", inode->fd);
|
sprintf(procname, "%i", inode->fd);
|
||||||
fd = openat(lo->proc_self_fd, procname, O_RDONLY);
|
/*
|
||||||
if (fd < 0) {
|
* It is not safe to open() non-regular/non-dir files in file server
|
||||||
goto out_err;
|
* unless O_PATH is used, so use that method for regular files/dir
|
||||||
|
* only (as it seems giving less performance overhead).
|
||||||
|
* Otherwise, call fchdir() to avoid open().
|
||||||
|
*/
|
||||||
|
if (S_ISREG(inode->filetype) || S_ISDIR(inode->filetype)) {
|
||||||
|
fd = openat(lo->proc_self_fd, procname, O_RDONLY);
|
||||||
|
if (fd < 0) {
|
||||||
|
goto out_err;
|
||||||
|
}
|
||||||
|
ret = fgetxattr(fd, name, value, size);
|
||||||
|
} else {
|
||||||
|
/* fchdir should not fail here */
|
||||||
|
assert(fchdir(lo->proc_self_fd) == 0);
|
||||||
|
ret = getxattr(procname, name, value, size);
|
||||||
|
assert(fchdir(lo->root.fd) == 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = fgetxattr(fd, name, value, size);
|
|
||||||
if (ret == -1) {
|
if (ret == -1) {
|
||||||
goto out_err;
|
goto out_err;
|
||||||
}
|
}
|
||||||
@ -2262,12 +2270,6 @@ static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size)
|
|||||||
fuse_log(FUSE_LOG_DEBUG, "lo_listxattr(ino=%" PRIu64 ", size=%zd)\n", ino,
|
fuse_log(FUSE_LOG_DEBUG, "lo_listxattr(ino=%" PRIu64 ", size=%zd)\n", ino,
|
||||||
size);
|
size);
|
||||||
|
|
||||||
if (inode->is_symlink) {
|
|
||||||
/* Sorry, no race free way to listxattr on symlink. */
|
|
||||||
saverr = EPERM;
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (size) {
|
if (size) {
|
||||||
value = malloc(size);
|
value = malloc(size);
|
||||||
if (!value) {
|
if (!value) {
|
||||||
@ -2276,12 +2278,19 @@ static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size)
|
|||||||
}
|
}
|
||||||
|
|
||||||
sprintf(procname, "%i", inode->fd);
|
sprintf(procname, "%i", inode->fd);
|
||||||
fd = openat(lo->proc_self_fd, procname, O_RDONLY);
|
if (S_ISREG(inode->filetype) || S_ISDIR(inode->filetype)) {
|
||||||
if (fd < 0) {
|
fd = openat(lo->proc_self_fd, procname, O_RDONLY);
|
||||||
goto out_err;
|
if (fd < 0) {
|
||||||
|
goto out_err;
|
||||||
|
}
|
||||||
|
ret = flistxattr(fd, value, size);
|
||||||
|
} else {
|
||||||
|
/* fchdir should not fail here */
|
||||||
|
assert(fchdir(lo->proc_self_fd) == 0);
|
||||||
|
ret = listxattr(procname, value, size);
|
||||||
|
assert(fchdir(lo->root.fd) == 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = flistxattr(fd, value, size);
|
|
||||||
if (ret == -1) {
|
if (ret == -1) {
|
||||||
goto out_err;
|
goto out_err;
|
||||||
}
|
}
|
||||||
@ -2335,20 +2344,21 @@ static void lo_setxattr(fuse_req_t req, fuse_ino_t ino, const char *name,
|
|||||||
fuse_log(FUSE_LOG_DEBUG, "lo_setxattr(ino=%" PRIu64
|
fuse_log(FUSE_LOG_DEBUG, "lo_setxattr(ino=%" PRIu64
|
||||||
", name=%s value=%s size=%zd)\n", ino, name, value, size);
|
", name=%s value=%s size=%zd)\n", ino, name, value, size);
|
||||||
|
|
||||||
if (inode->is_symlink) {
|
|
||||||
/* Sorry, no race free way to setxattr on symlink. */
|
|
||||||
saverr = EPERM;
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
|
|
||||||
sprintf(procname, "%i", inode->fd);
|
sprintf(procname, "%i", inode->fd);
|
||||||
fd = openat(lo->proc_self_fd, procname, O_RDWR);
|
if (S_ISREG(inode->filetype) || S_ISDIR(inode->filetype)) {
|
||||||
if (fd < 0) {
|
fd = openat(lo->proc_self_fd, procname, O_RDONLY);
|
||||||
saverr = errno;
|
if (fd < 0) {
|
||||||
goto out;
|
saverr = errno;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
ret = fsetxattr(fd, name, value, size, flags);
|
||||||
|
} else {
|
||||||
|
/* fchdir should not fail here */
|
||||||
|
assert(fchdir(lo->proc_self_fd) == 0);
|
||||||
|
ret = setxattr(procname, name, value, size, flags);
|
||||||
|
assert(fchdir(lo->root.fd) == 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = fsetxattr(fd, name, value, size, flags);
|
|
||||||
saverr = ret == -1 ? errno : 0;
|
saverr = ret == -1 ? errno : 0;
|
||||||
|
|
||||||
out:
|
out:
|
||||||
@ -2383,20 +2393,21 @@ static void lo_removexattr(fuse_req_t req, fuse_ino_t ino, const char *name)
|
|||||||
fuse_log(FUSE_LOG_DEBUG, "lo_removexattr(ino=%" PRIu64 ", name=%s)\n", ino,
|
fuse_log(FUSE_LOG_DEBUG, "lo_removexattr(ino=%" PRIu64 ", name=%s)\n", ino,
|
||||||
name);
|
name);
|
||||||
|
|
||||||
if (inode->is_symlink) {
|
|
||||||
/* Sorry, no race free way to setxattr on symlink. */
|
|
||||||
saverr = EPERM;
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
|
|
||||||
sprintf(procname, "%i", inode->fd);
|
sprintf(procname, "%i", inode->fd);
|
||||||
fd = openat(lo->proc_self_fd, procname, O_RDWR);
|
if (S_ISREG(inode->filetype) || S_ISDIR(inode->filetype)) {
|
||||||
if (fd < 0) {
|
fd = openat(lo->proc_self_fd, procname, O_RDONLY);
|
||||||
saverr = errno;
|
if (fd < 0) {
|
||||||
goto out;
|
saverr = errno;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
ret = fremovexattr(fd, name);
|
||||||
|
} else {
|
||||||
|
/* fchdir should not fail here */
|
||||||
|
assert(fchdir(lo->proc_self_fd) == 0);
|
||||||
|
ret = removexattr(procname, name);
|
||||||
|
assert(fchdir(lo->root.fd) == 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = fremovexattr(fd, name);
|
|
||||||
saverr = ret == -1 ? errno : 0;
|
saverr = ret == -1 ? errno : 0;
|
||||||
|
|
||||||
out:
|
out:
|
||||||
@ -2796,7 +2807,7 @@ static void setup_root(struct lo_data *lo, struct lo_inode *root)
|
|||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
root->is_symlink = false;
|
root->filetype = S_IFDIR;
|
||||||
root->fd = fd;
|
root->fd = fd;
|
||||||
root->key.ino = stat.st_ino;
|
root->key.ino = stat.st_ino;
|
||||||
root->key.dev = stat.st_dev;
|
root->key.dev = stat.st_dev;
|
||||||
|
@ -41,6 +41,7 @@ static const int syscall_whitelist[] = {
|
|||||||
SCMP_SYS(exit),
|
SCMP_SYS(exit),
|
||||||
SCMP_SYS(exit_group),
|
SCMP_SYS(exit_group),
|
||||||
SCMP_SYS(fallocate),
|
SCMP_SYS(fallocate),
|
||||||
|
SCMP_SYS(fchdir),
|
||||||
SCMP_SYS(fchmodat),
|
SCMP_SYS(fchmodat),
|
||||||
SCMP_SYS(fchownat),
|
SCMP_SYS(fchownat),
|
||||||
SCMP_SYS(fcntl),
|
SCMP_SYS(fcntl),
|
||||||
@ -62,7 +63,9 @@ static const int syscall_whitelist[] = {
|
|||||||
SCMP_SYS(getpid),
|
SCMP_SYS(getpid),
|
||||||
SCMP_SYS(gettid),
|
SCMP_SYS(gettid),
|
||||||
SCMP_SYS(gettimeofday),
|
SCMP_SYS(gettimeofday),
|
||||||
|
SCMP_SYS(getxattr),
|
||||||
SCMP_SYS(linkat),
|
SCMP_SYS(linkat),
|
||||||
|
SCMP_SYS(listxattr),
|
||||||
SCMP_SYS(lseek),
|
SCMP_SYS(lseek),
|
||||||
SCMP_SYS(madvise),
|
SCMP_SYS(madvise),
|
||||||
SCMP_SYS(mkdirat),
|
SCMP_SYS(mkdirat),
|
||||||
@ -85,6 +88,7 @@ static const int syscall_whitelist[] = {
|
|||||||
SCMP_SYS(recvmsg),
|
SCMP_SYS(recvmsg),
|
||||||
SCMP_SYS(renameat),
|
SCMP_SYS(renameat),
|
||||||
SCMP_SYS(renameat2),
|
SCMP_SYS(renameat2),
|
||||||
|
SCMP_SYS(removexattr),
|
||||||
SCMP_SYS(rt_sigaction),
|
SCMP_SYS(rt_sigaction),
|
||||||
SCMP_SYS(rt_sigprocmask),
|
SCMP_SYS(rt_sigprocmask),
|
||||||
SCMP_SYS(rt_sigreturn),
|
SCMP_SYS(rt_sigreturn),
|
||||||
@ -98,10 +102,12 @@ static const int syscall_whitelist[] = {
|
|||||||
SCMP_SYS(setresuid32),
|
SCMP_SYS(setresuid32),
|
||||||
#endif
|
#endif
|
||||||
SCMP_SYS(set_robust_list),
|
SCMP_SYS(set_robust_list),
|
||||||
|
SCMP_SYS(setxattr),
|
||||||
SCMP_SYS(symlinkat),
|
SCMP_SYS(symlinkat),
|
||||||
SCMP_SYS(time), /* Rarely needed, except on static builds */
|
SCMP_SYS(time), /* Rarely needed, except on static builds */
|
||||||
SCMP_SYS(tgkill),
|
SCMP_SYS(tgkill),
|
||||||
SCMP_SYS(unlinkat),
|
SCMP_SYS(unlinkat),
|
||||||
|
SCMP_SYS(unshare),
|
||||||
SCMP_SYS(utimensat),
|
SCMP_SYS(utimensat),
|
||||||
SCMP_SYS(write),
|
SCMP_SYS(write),
|
||||||
SCMP_SYS(writev),
|
SCMP_SYS(writev),
|
||||||
|
Loading…
Reference in New Issue
Block a user