scsi, file-posix: add support for persistent reservation management
It is a common requirement for virtual machine to send persistent
reservations, but this currently requires either running QEMU with
CAP_SYS_RAWIO, or using out-of-tree patches that let an unprivileged
QEMU bypass Linux's filter on SG_IO commands.
As an alternative mechanism, the next patches will introduce a
privileged helper to run persistent reservation commands without
expanding QEMU's attack surface unnecessarily.
The helper is invoked through a "pr-manager" QOM object, to which
file-posix.c passes SG_IO requests for PERSISTENT RESERVE OUT and
PERSISTENT RESERVE IN commands. For example:
$ qemu-system-x86_64
-device virtio-scsi \
-object pr-manager-helper,id=helper0,path=/var/run/qemu-pr-helper.sock
-drive if=none,id=hd,driver=raw,file.filename=/dev/sdb,file.pr-manager=helper0
-device scsi-block,drive=hd
or:
$ qemu-system-x86_64
-device virtio-scsi \
-object pr-manager-helper,id=helper0,path=/var/run/qemu-pr-helper.sock
-blockdev node-name=hd,driver=raw,file.driver=host_device,file.filename=/dev/sdb,file.pr-manager=helper0
-device scsi-block,drive=hd
Multiple pr-manager implementations are conceivable and possible, though
only one is implemented right now. For example, a pr-manager could:
- talk directly to the multipath daemon from a privileged QEMU
(i.e. QEMU links to libmpathpersist); this makes reservation work
properly with multipath, but still requires CAP_SYS_RAWIO
- use the Linux IOC_PR_* ioctls (they require CAP_SYS_ADMIN though)
- more interestingly, implement reservations directly in QEMU
through file system locks or a shared database (e.g. sqlite)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-08-21 19:58:56 +03:00
|
|
|
/*
|
|
|
|
* Persistent reservation manager abstract class
|
|
|
|
*
|
|
|
|
* Copyright (c) 2017 Red Hat, Inc.
|
|
|
|
*
|
|
|
|
* Author: Paolo Bonzini <pbonzini@redhat.com>
|
|
|
|
*
|
|
|
|
* This code is licensed under the LGPL.
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "qemu/osdep.h"
|
|
|
|
#include <scsi/sg.h>
|
|
|
|
|
|
|
|
#include "qapi/error.h"
|
|
|
|
#include "block/aio.h"
|
|
|
|
#include "block/thread-pool.h"
|
|
|
|
#include "scsi/pr-manager.h"
|
|
|
|
#include "trace.h"
|
2018-02-28 20:47:57 +03:00
|
|
|
#include "qapi/qapi-types-block.h"
|
|
|
|
#include "qapi/qapi-commands-block.h"
|
|
|
|
|
|
|
|
#define PR_MANAGER_PATH "/objects"
|
scsi, file-posix: add support for persistent reservation management
It is a common requirement for virtual machine to send persistent
reservations, but this currently requires either running QEMU with
CAP_SYS_RAWIO, or using out-of-tree patches that let an unprivileged
QEMU bypass Linux's filter on SG_IO commands.
As an alternative mechanism, the next patches will introduce a
privileged helper to run persistent reservation commands without
expanding QEMU's attack surface unnecessarily.
The helper is invoked through a "pr-manager" QOM object, to which
file-posix.c passes SG_IO requests for PERSISTENT RESERVE OUT and
PERSISTENT RESERVE IN commands. For example:
$ qemu-system-x86_64
-device virtio-scsi \
-object pr-manager-helper,id=helper0,path=/var/run/qemu-pr-helper.sock
-drive if=none,id=hd,driver=raw,file.filename=/dev/sdb,file.pr-manager=helper0
-device scsi-block,drive=hd
or:
$ qemu-system-x86_64
-device virtio-scsi \
-object pr-manager-helper,id=helper0,path=/var/run/qemu-pr-helper.sock
-blockdev node-name=hd,driver=raw,file.driver=host_device,file.filename=/dev/sdb,file.pr-manager=helper0
-device scsi-block,drive=hd
Multiple pr-manager implementations are conceivable and possible, though
only one is implemented right now. For example, a pr-manager could:
- talk directly to the multipath daemon from a privileged QEMU
(i.e. QEMU links to libmpathpersist); this makes reservation work
properly with multipath, but still requires CAP_SYS_RAWIO
- use the Linux IOC_PR_* ioctls (they require CAP_SYS_ADMIN though)
- more interestingly, implement reservations directly in QEMU
through file system locks or a shared database (e.g. sqlite)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-08-21 19:58:56 +03:00
|
|
|
|
|
|
|
typedef struct PRManagerData {
|
|
|
|
PRManager *pr_mgr;
|
|
|
|
struct sg_io_hdr *hdr;
|
|
|
|
int fd;
|
|
|
|
} PRManagerData;
|
|
|
|
|
|
|
|
static int pr_manager_worker(void *opaque)
|
|
|
|
{
|
|
|
|
PRManagerData *data = opaque;
|
|
|
|
PRManager *pr_mgr = data->pr_mgr;
|
|
|
|
PRManagerClass *pr_mgr_class =
|
|
|
|
PR_MANAGER_GET_CLASS(pr_mgr);
|
|
|
|
struct sg_io_hdr *hdr = data->hdr;
|
|
|
|
int fd = data->fd;
|
|
|
|
int r;
|
|
|
|
|
|
|
|
g_free(data);
|
|
|
|
trace_pr_manager_run(fd, hdr->cmdp[0], hdr->cmdp[1]);
|
|
|
|
|
|
|
|
/* The reference was taken in pr_manager_execute. */
|
|
|
|
r = pr_mgr_class->run(pr_mgr, fd, hdr);
|
|
|
|
object_unref(OBJECT(pr_mgr));
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
BlockAIOCB *pr_manager_execute(PRManager *pr_mgr,
|
|
|
|
AioContext *ctx, int fd,
|
|
|
|
struct sg_io_hdr *hdr,
|
|
|
|
BlockCompletionFunc *complete,
|
|
|
|
void *opaque)
|
|
|
|
{
|
|
|
|
PRManagerData *data = g_new(PRManagerData, 1);
|
|
|
|
ThreadPool *pool = aio_get_thread_pool(ctx);
|
|
|
|
|
|
|
|
trace_pr_manager_execute(fd, hdr->cmdp[0], hdr->cmdp[1], opaque);
|
|
|
|
data->pr_mgr = pr_mgr;
|
|
|
|
data->fd = fd;
|
|
|
|
data->hdr = hdr;
|
|
|
|
|
|
|
|
/* The matching object_unref is in pr_manager_worker. */
|
|
|
|
object_ref(OBJECT(pr_mgr));
|
|
|
|
return thread_pool_submit_aio(pool, pr_manager_worker,
|
|
|
|
data, complete, opaque);
|
|
|
|
}
|
|
|
|
|
2018-02-28 20:47:57 +03:00
|
|
|
bool pr_manager_is_connected(PRManager *pr_mgr)
|
|
|
|
{
|
|
|
|
PRManagerClass *pr_mgr_class =
|
|
|
|
PR_MANAGER_GET_CLASS(pr_mgr);
|
|
|
|
|
|
|
|
return !pr_mgr_class->is_connected || pr_mgr_class->is_connected(pr_mgr);
|
|
|
|
}
|
|
|
|
|
scsi, file-posix: add support for persistent reservation management
It is a common requirement for virtual machine to send persistent
reservations, but this currently requires either running QEMU with
CAP_SYS_RAWIO, or using out-of-tree patches that let an unprivileged
QEMU bypass Linux's filter on SG_IO commands.
As an alternative mechanism, the next patches will introduce a
privileged helper to run persistent reservation commands without
expanding QEMU's attack surface unnecessarily.
The helper is invoked through a "pr-manager" QOM object, to which
file-posix.c passes SG_IO requests for PERSISTENT RESERVE OUT and
PERSISTENT RESERVE IN commands. For example:
$ qemu-system-x86_64
-device virtio-scsi \
-object pr-manager-helper,id=helper0,path=/var/run/qemu-pr-helper.sock
-drive if=none,id=hd,driver=raw,file.filename=/dev/sdb,file.pr-manager=helper0
-device scsi-block,drive=hd
or:
$ qemu-system-x86_64
-device virtio-scsi \
-object pr-manager-helper,id=helper0,path=/var/run/qemu-pr-helper.sock
-blockdev node-name=hd,driver=raw,file.driver=host_device,file.filename=/dev/sdb,file.pr-manager=helper0
-device scsi-block,drive=hd
Multiple pr-manager implementations are conceivable and possible, though
only one is implemented right now. For example, a pr-manager could:
- talk directly to the multipath daemon from a privileged QEMU
(i.e. QEMU links to libmpathpersist); this makes reservation work
properly with multipath, but still requires CAP_SYS_RAWIO
- use the Linux IOC_PR_* ioctls (they require CAP_SYS_ADMIN though)
- more interestingly, implement reservations directly in QEMU
through file system locks or a shared database (e.g. sqlite)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-08-21 19:58:56 +03:00
|
|
|
static const TypeInfo pr_manager_info = {
|
|
|
|
.parent = TYPE_OBJECT,
|
|
|
|
.name = TYPE_PR_MANAGER,
|
|
|
|
.class_size = sizeof(PRManagerClass),
|
|
|
|
.abstract = true,
|
|
|
|
.interfaces = (InterfaceInfo[]) {
|
|
|
|
{ TYPE_USER_CREATABLE },
|
|
|
|
{ }
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
PRManager *pr_manager_lookup(const char *id, Error **errp)
|
|
|
|
{
|
|
|
|
Object *obj;
|
|
|
|
PRManager *pr_mgr;
|
|
|
|
|
|
|
|
obj = object_resolve_path_component(object_get_objects_root(), id);
|
|
|
|
if (!obj) {
|
|
|
|
error_setg(errp, "No persistent reservation manager with id '%s'", id);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
pr_mgr = (PRManager *)
|
|
|
|
object_dynamic_cast(obj,
|
|
|
|
TYPE_PR_MANAGER);
|
|
|
|
if (!pr_mgr) {
|
|
|
|
error_setg(errp,
|
|
|
|
"Object with id '%s' is not a persistent reservation manager",
|
|
|
|
id);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return pr_mgr;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
pr_manager_register_types(void)
|
|
|
|
{
|
|
|
|
type_register_static(&pr_manager_info);
|
|
|
|
}
|
|
|
|
|
2018-02-28 20:47:57 +03:00
|
|
|
static int query_one_pr_manager(Object *object, void *opaque)
|
|
|
|
{
|
|
|
|
PRManagerInfoList ***prev = opaque;
|
|
|
|
PRManagerInfoList *elem;
|
|
|
|
PRManagerInfo *info;
|
|
|
|
PRManager *pr_mgr;
|
|
|
|
|
|
|
|
pr_mgr = (PRManager *)object_dynamic_cast(object, TYPE_PR_MANAGER);
|
|
|
|
if (!pr_mgr) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
elem = g_new0(PRManagerInfoList, 1);
|
|
|
|
info = g_new0(PRManagerInfo, 1);
|
|
|
|
info->id = object_get_canonical_path_component(object);
|
|
|
|
info->connected = pr_manager_is_connected(pr_mgr);
|
|
|
|
elem->value = info;
|
|
|
|
elem->next = NULL;
|
|
|
|
|
|
|
|
**prev = elem;
|
|
|
|
*prev = &elem->next;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
PRManagerInfoList *qmp_query_pr_managers(Error **errp)
|
|
|
|
{
|
|
|
|
PRManagerInfoList *head = NULL;
|
|
|
|
PRManagerInfoList **prev = &head;
|
|
|
|
Object *container = container_get(object_get_root(), PR_MANAGER_PATH);
|
|
|
|
|
|
|
|
object_child_foreach(container, query_one_pr_manager, &prev);
|
|
|
|
return head;
|
|
|
|
}
|
scsi, file-posix: add support for persistent reservation management
It is a common requirement for virtual machine to send persistent
reservations, but this currently requires either running QEMU with
CAP_SYS_RAWIO, or using out-of-tree patches that let an unprivileged
QEMU bypass Linux's filter on SG_IO commands.
As an alternative mechanism, the next patches will introduce a
privileged helper to run persistent reservation commands without
expanding QEMU's attack surface unnecessarily.
The helper is invoked through a "pr-manager" QOM object, to which
file-posix.c passes SG_IO requests for PERSISTENT RESERVE OUT and
PERSISTENT RESERVE IN commands. For example:
$ qemu-system-x86_64
-device virtio-scsi \
-object pr-manager-helper,id=helper0,path=/var/run/qemu-pr-helper.sock
-drive if=none,id=hd,driver=raw,file.filename=/dev/sdb,file.pr-manager=helper0
-device scsi-block,drive=hd
or:
$ qemu-system-x86_64
-device virtio-scsi \
-object pr-manager-helper,id=helper0,path=/var/run/qemu-pr-helper.sock
-blockdev node-name=hd,driver=raw,file.driver=host_device,file.filename=/dev/sdb,file.pr-manager=helper0
-device scsi-block,drive=hd
Multiple pr-manager implementations are conceivable and possible, though
only one is implemented right now. For example, a pr-manager could:
- talk directly to the multipath daemon from a privileged QEMU
(i.e. QEMU links to libmpathpersist); this makes reservation work
properly with multipath, but still requires CAP_SYS_RAWIO
- use the Linux IOC_PR_* ioctls (they require CAP_SYS_ADMIN though)
- more interestingly, implement reservations directly in QEMU
through file system locks or a shared database (e.g. sqlite)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-08-21 19:58:56 +03:00
|
|
|
|
|
|
|
type_init(pr_manager_register_types);
|