scsi, file-posix: add support for persistent reservation management
It is a common requirement for virtual machine to send persistent reservations, but this currently requires either running QEMU with CAP_SYS_RAWIO, or using out-of-tree patches that let an unprivileged QEMU bypass Linux's filter on SG_IO commands. As an alternative mechanism, the next patches will introduce a privileged helper to run persistent reservation commands without expanding QEMU's attack surface unnecessarily. The helper is invoked through a "pr-manager" QOM object, to which file-posix.c passes SG_IO requests for PERSISTENT RESERVE OUT and PERSISTENT RESERVE IN commands. For example: $ qemu-system-x86_64 -device virtio-scsi \ -object pr-manager-helper,id=helper0,path=/var/run/qemu-pr-helper.sock -drive if=none,id=hd,driver=raw,file.filename=/dev/sdb,file.pr-manager=helper0 -device scsi-block,drive=hd or: $ qemu-system-x86_64 -device virtio-scsi \ -object pr-manager-helper,id=helper0,path=/var/run/qemu-pr-helper.sock -blockdev node-name=hd,driver=raw,file.driver=host_device,file.filename=/dev/sdb,file.pr-manager=helper0 -device scsi-block,drive=hd Multiple pr-manager implementations are conceivable and possible, though only one is implemented right now. For example, a pr-manager could: - talk directly to the multipath daemon from a privileged QEMU (i.e. QEMU links to libmpathpersist); this makes reservation work properly with multipath, but still requires CAP_SYS_RAWIO - use the Linux IOC_PR_* ioctls (they require CAP_SYS_ADMIN though) - more interestingly, implement reservations directly in QEMU through file system locks or a shared database (e.g. sqlite) Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
This commit is contained in:
parent
092aa2fc65
commit
7c9e527659
@ -171,6 +171,7 @@ trace-events-subdirs += qapi
|
||||
trace-events-subdirs += accel/tcg
|
||||
trace-events-subdirs += accel/kvm
|
||||
trace-events-subdirs += nbd
|
||||
trace-events-subdirs += scsi
|
||||
|
||||
trace-events-files = $(SRC_PATH)/trace-events $(trace-events-subdirs:%=$(SRC_PATH)/%/trace-events)
|
||||
|
||||
|
@ -33,6 +33,9 @@
|
||||
#include "block/raw-aio.h"
|
||||
#include "qapi/qmp/qstring.h"
|
||||
|
||||
#include "scsi/pr-manager.h"
|
||||
#include "scsi/constants.h"
|
||||
|
||||
#if defined(__APPLE__) && (__MACH__)
|
||||
#include <paths.h>
|
||||
#include <sys/param.h>
|
||||
@ -155,6 +158,8 @@ typedef struct BDRVRawState {
|
||||
bool page_cache_inconsistent:1;
|
||||
bool has_fallocate;
|
||||
bool needs_alignment;
|
||||
|
||||
PRManager *pr_mgr;
|
||||
} BDRVRawState;
|
||||
|
||||
typedef struct BDRVRawReopenState {
|
||||
@ -402,6 +407,11 @@ static QemuOptsList raw_runtime_opts = {
|
||||
.type = QEMU_OPT_STRING,
|
||||
.help = "file locking mode (on/off/auto, default: auto)",
|
||||
},
|
||||
{
|
||||
.name = "pr-manager",
|
||||
.type = QEMU_OPT_STRING,
|
||||
.help = "id of persistent reservation manager object (default: none)",
|
||||
},
|
||||
{ /* end of list */ }
|
||||
},
|
||||
};
|
||||
@ -413,6 +423,7 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
|
||||
QemuOpts *opts;
|
||||
Error *local_err = NULL;
|
||||
const char *filename = NULL;
|
||||
const char *str;
|
||||
BlockdevAioOptions aio, aio_default;
|
||||
int fd, ret;
|
||||
struct stat st;
|
||||
@ -476,6 +487,16 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
|
||||
abort();
|
||||
}
|
||||
|
||||
str = qemu_opt_get(opts, "pr-manager");
|
||||
if (str) {
|
||||
s->pr_mgr = pr_manager_lookup(str, &local_err);
|
||||
if (local_err) {
|
||||
error_propagate(errp, local_err);
|
||||
ret = -EINVAL;
|
||||
goto fail;
|
||||
}
|
||||
}
|
||||
|
||||
s->open_flags = open_flags;
|
||||
raw_parse_flags(bdrv_flags, &s->open_flags);
|
||||
|
||||
@ -2597,6 +2618,15 @@ static BlockAIOCB *hdev_aio_ioctl(BlockDriverState *bs,
|
||||
if (fd_open(bs) < 0)
|
||||
return NULL;
|
||||
|
||||
if (req == SG_IO && s->pr_mgr) {
|
||||
struct sg_io_hdr *io_hdr = buf;
|
||||
if (io_hdr->cmdp[0] == PERSISTENT_RESERVE_OUT ||
|
||||
io_hdr->cmdp[0] == PERSISTENT_RESERVE_IN) {
|
||||
return pr_manager_execute(s->pr_mgr, bdrv_get_aio_context(bs),
|
||||
s->fd, io_hdr, cb, opaque);
|
||||
}
|
||||
}
|
||||
|
||||
acb = g_new(RawPosixAIOData, 1);
|
||||
acb->bs = bs;
|
||||
acb->aio_type = QEMU_AIO_IOCTL;
|
||||
|
51
docs/pr-manager.rst
Normal file
51
docs/pr-manager.rst
Normal file
@ -0,0 +1,51 @@
|
||||
======================================
|
||||
Persistent reservation managers
|
||||
======================================
|
||||
|
||||
SCSI persistent Reservations allow restricting access to block devices
|
||||
to specific initiators in a shared storage setup. When implementing
|
||||
clustering of virtual machines, it is a common requirement for virtual
|
||||
machines to send persistent reservation SCSI commands. However,
|
||||
the operating system restricts sending these commands to unprivileged
|
||||
programs because incorrect usage can disrupt regular operation of the
|
||||
storage fabric.
|
||||
|
||||
For this reason, QEMU's SCSI passthrough devices, ``scsi-block``
|
||||
and ``scsi-generic`` (both are only available on Linux) can delegate
|
||||
implementation of persistent reservations to a separate object,
|
||||
the "persistent reservation manager". Only PERSISTENT RESERVE OUT and
|
||||
PERSISTENT RESERVE IN commands are passed to the persistent reservation
|
||||
manager object; other commands are processed by QEMU as usual.
|
||||
|
||||
-----------------------------------------
|
||||
Defining a persistent reservation manager
|
||||
-----------------------------------------
|
||||
|
||||
A persistent reservation manager is an instance of a subclass of the
|
||||
"pr-manager" QOM class.
|
||||
|
||||
Right now only one subclass is defined, ``pr-manager-helper``, which
|
||||
forwards the commands to an external privileged helper program
|
||||
over Unix sockets. The helper program only allows sending persistent
|
||||
reservation commands to devices for which QEMU has a file descriptor,
|
||||
so that QEMU will not be able to effect persistent reservations
|
||||
unless it has access to both the socket and the device.
|
||||
|
||||
``pr-manager-helper`` has a single string property, ``path``, which
|
||||
accepts the path to the helper program's Unix socket. For example,
|
||||
the following command line defines a ``pr-manager-helper`` object and
|
||||
attaches it to a SCSI passthrough device::
|
||||
|
||||
$ qemu-system-x86_64
|
||||
-device virtio-scsi \
|
||||
-object pr-manager-helper,id=helper0,path=/var/run/qemu-pr-helper.sock
|
||||
-drive if=none,id=hd,driver=raw,file.filename=/dev/sdb,file.pr-manager=helper0
|
||||
-device scsi-block,drive=hd
|
||||
|
||||
Alternatively, using ``-blockdev``::
|
||||
|
||||
$ qemu-system-x86_64
|
||||
-device virtio-scsi \
|
||||
-object pr-manager-helper,id=helper0,path=/var/run/qemu-pr-helper.sock
|
||||
-blockdev node-name=hd,driver=raw,file.driver=host_device,file.filename=/dev/sdb,file.pr-manager=helper0
|
||||
-device scsi-block,drive=hd
|
56
include/scsi/pr-manager.h
Normal file
56
include/scsi/pr-manager.h
Normal file
@ -0,0 +1,56 @@
|
||||
#ifndef PR_MANAGER_H
|
||||
#define PR_MANAGER_H
|
||||
|
||||
#include "qom/object.h"
|
||||
#include "qapi/qmp/qdict.h"
|
||||
#include "qapi/visitor.h"
|
||||
#include "qom/object_interfaces.h"
|
||||
#include "block/aio.h"
|
||||
|
||||
#define TYPE_PR_MANAGER "pr-manager"
|
||||
|
||||
#define PR_MANAGER_CLASS(klass) \
|
||||
OBJECT_CLASS_CHECK(PRManagerClass, (klass), TYPE_PR_MANAGER)
|
||||
#define PR_MANAGER_GET_CLASS(obj) \
|
||||
OBJECT_GET_CLASS(PRManagerClass, (obj), TYPE_PR_MANAGER)
|
||||
#define PR_MANAGER(obj) \
|
||||
OBJECT_CHECK(PRManager, (obj), TYPE_PR_MANAGER)
|
||||
|
||||
struct sg_io_hdr;
|
||||
|
||||
typedef struct PRManager {
|
||||
/* <private> */
|
||||
Object parent;
|
||||
} PRManager;
|
||||
|
||||
/**
|
||||
* PRManagerClass:
|
||||
* @parent_class: the base class
|
||||
* @run: callback invoked in thread pool context
|
||||
*/
|
||||
typedef struct PRManagerClass {
|
||||
/* <private> */
|
||||
ObjectClass parent_class;
|
||||
|
||||
/* <public> */
|
||||
int (*run)(PRManager *pr_mgr, int fd, struct sg_io_hdr *hdr);
|
||||
} PRManagerClass;
|
||||
|
||||
BlockAIOCB *pr_manager_execute(PRManager *pr_mgr,
|
||||
AioContext *ctx, int fd,
|
||||
struct sg_io_hdr *hdr,
|
||||
BlockCompletionFunc *complete,
|
||||
void *opaque);
|
||||
|
||||
#ifdef CONFIG_LINUX
|
||||
PRManager *pr_manager_lookup(const char *id, Error **errp);
|
||||
#else
|
||||
static inline PRManager *pr_manager_lookup(const char *id, Error **errp)
|
||||
{
|
||||
/* The classes do not exist at all! */
|
||||
error_setg(errp, "No persistent reservation manager with id '%s'", id);
|
||||
return NULL;
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
@ -2241,6 +2241,9 @@
|
||||
# Driver specific block device options for the file backend.
|
||||
#
|
||||
# @filename: path to the image file
|
||||
# @pr-manager: the id for the object that will handle persistent reservations
|
||||
# for this device (default: none, forward the commands via SG_IO;
|
||||
# since 2.11)
|
||||
# @aio: AIO backend (default: threads) (since: 2.8)
|
||||
# @locking: whether to enable file locking. If set to 'auto', only enable
|
||||
# when Open File Descriptor (OFD) locking API is available
|
||||
@ -2250,6 +2253,7 @@
|
||||
##
|
||||
{ 'struct': 'BlockdevOptionsFile',
|
||||
'data': { 'filename': 'str',
|
||||
'*pr-manager': 'str',
|
||||
'*locking': 'OnOffAuto',
|
||||
'*aio': 'BlockdevAioOptions' } }
|
||||
|
||||
|
@ -1 +1,3 @@
|
||||
block-obj-y += utils.o
|
||||
|
||||
block-obj-$(CONFIG_LINUX) += pr-manager.o
|
||||
|
109
scsi/pr-manager.c
Normal file
109
scsi/pr-manager.c
Normal file
@ -0,0 +1,109 @@
|
||||
/*
|
||||
* Persistent reservation manager abstract class
|
||||
*
|
||||
* Copyright (c) 2017 Red Hat, Inc.
|
||||
*
|
||||
* Author: Paolo Bonzini <pbonzini@redhat.com>
|
||||
*
|
||||
* This code is licensed under the LGPL.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "qemu/osdep.h"
|
||||
#include <scsi/sg.h>
|
||||
|
||||
#include "qapi/error.h"
|
||||
#include "block/aio.h"
|
||||
#include "block/thread-pool.h"
|
||||
#include "scsi/pr-manager.h"
|
||||
#include "trace.h"
|
||||
|
||||
typedef struct PRManagerData {
|
||||
PRManager *pr_mgr;
|
||||
struct sg_io_hdr *hdr;
|
||||
int fd;
|
||||
} PRManagerData;
|
||||
|
||||
static int pr_manager_worker(void *opaque)
|
||||
{
|
||||
PRManagerData *data = opaque;
|
||||
PRManager *pr_mgr = data->pr_mgr;
|
||||
PRManagerClass *pr_mgr_class =
|
||||
PR_MANAGER_GET_CLASS(pr_mgr);
|
||||
struct sg_io_hdr *hdr = data->hdr;
|
||||
int fd = data->fd;
|
||||
int r;
|
||||
|
||||
g_free(data);
|
||||
trace_pr_manager_run(fd, hdr->cmdp[0], hdr->cmdp[1]);
|
||||
|
||||
/* The reference was taken in pr_manager_execute. */
|
||||
r = pr_mgr_class->run(pr_mgr, fd, hdr);
|
||||
object_unref(OBJECT(pr_mgr));
|
||||
return r;
|
||||
}
|
||||
|
||||
|
||||
BlockAIOCB *pr_manager_execute(PRManager *pr_mgr,
|
||||
AioContext *ctx, int fd,
|
||||
struct sg_io_hdr *hdr,
|
||||
BlockCompletionFunc *complete,
|
||||
void *opaque)
|
||||
{
|
||||
PRManagerData *data = g_new(PRManagerData, 1);
|
||||
ThreadPool *pool = aio_get_thread_pool(ctx);
|
||||
|
||||
trace_pr_manager_execute(fd, hdr->cmdp[0], hdr->cmdp[1], opaque);
|
||||
data->pr_mgr = pr_mgr;
|
||||
data->fd = fd;
|
||||
data->hdr = hdr;
|
||||
|
||||
/* The matching object_unref is in pr_manager_worker. */
|
||||
object_ref(OBJECT(pr_mgr));
|
||||
return thread_pool_submit_aio(pool, pr_manager_worker,
|
||||
data, complete, opaque);
|
||||
}
|
||||
|
||||
static const TypeInfo pr_manager_info = {
|
||||
.parent = TYPE_OBJECT,
|
||||
.name = TYPE_PR_MANAGER,
|
||||
.class_size = sizeof(PRManagerClass),
|
||||
.abstract = true,
|
||||
.interfaces = (InterfaceInfo[]) {
|
||||
{ TYPE_USER_CREATABLE },
|
||||
{ }
|
||||
}
|
||||
};
|
||||
|
||||
PRManager *pr_manager_lookup(const char *id, Error **errp)
|
||||
{
|
||||
Object *obj;
|
||||
PRManager *pr_mgr;
|
||||
|
||||
obj = object_resolve_path_component(object_get_objects_root(), id);
|
||||
if (!obj) {
|
||||
error_setg(errp, "No persistent reservation manager with id '%s'", id);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
pr_mgr = (PRManager *)
|
||||
object_dynamic_cast(obj,
|
||||
TYPE_PR_MANAGER);
|
||||
if (!pr_mgr) {
|
||||
error_setg(errp,
|
||||
"Object with id '%s' is not a persistent reservation manager",
|
||||
id);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return pr_mgr;
|
||||
}
|
||||
|
||||
static void
|
||||
pr_manager_register_types(void)
|
||||
{
|
||||
type_register_static(&pr_manager_info);
|
||||
}
|
||||
|
||||
|
||||
type_init(pr_manager_register_types);
|
3
scsi/trace-events
Normal file
3
scsi/trace-events
Normal file
@ -0,0 +1,3 @@
|
||||
# scsi/pr-manager.c
|
||||
pr_manager_execute(int fd, int cmd, int sa, void *opaque) "fd=%d cmd=0x%02x service action=0x%02x opaque=%p"
|
||||
pr_manager_run(int fd, int cmd, int sa) "fd=%d cmd=0x%02x service action=0x%02x"
|
Loading…
Reference in New Issue
Block a user