2010-12-06 22:53:01 +03:00
|
|
|
/*
|
|
|
|
* QEMU Block driver for RADOS (Ceph)
|
|
|
|
*
|
2011-05-27 03:07:31 +04:00
|
|
|
* Copyright (C) 2010-2011 Christian Brunner <chb@muc.de>,
|
|
|
|
* Josh Durgin <josh.durgin@dreamhost.com>
|
2010-12-06 22:53:01 +03:00
|
|
|
*
|
|
|
|
* This work is licensed under the terms of the GNU GPL, version 2. See
|
|
|
|
* the COPYING file in the top-level directory.
|
|
|
|
*
|
2012-01-13 20:44:23 +04:00
|
|
|
* Contributions after 2012-01-13 are licensed under the terms of the
|
|
|
|
* GNU GPL, version 2 or (at your option) any later version.
|
2010-12-06 22:53:01 +03:00
|
|
|
*/
|
|
|
|
|
2011-05-27 03:07:31 +04:00
|
|
|
#include <inttypes.h>
|
|
|
|
|
2010-12-06 22:53:01 +03:00
|
|
|
#include "qemu-common.h"
|
2012-12-17 21:20:00 +04:00
|
|
|
#include "qemu/error-report.h"
|
2012-12-17 21:19:44 +04:00
|
|
|
#include "block/block_int.h"
|
2010-12-06 22:53:01 +03:00
|
|
|
|
2011-05-27 03:07:31 +04:00
|
|
|
#include <rbd/librbd.h>
|
2010-12-06 22:53:01 +03:00
|
|
|
|
|
|
|
/*
|
|
|
|
* When specifying the image filename use:
|
|
|
|
*
|
2011-05-27 03:07:32 +04:00
|
|
|
* rbd:poolname/devicename[@snapshotname][:option1=value1[:option2=value2...]]
|
2010-12-06 22:53:01 +03:00
|
|
|
*
|
2011-09-16 01:11:10 +04:00
|
|
|
* poolname must be the name of an existing rados pool.
|
2010-12-06 22:53:01 +03:00
|
|
|
*
|
2011-09-16 01:11:10 +04:00
|
|
|
* devicename is the name of the rbd image.
|
2010-12-06 22:53:01 +03:00
|
|
|
*
|
2011-09-16 01:11:10 +04:00
|
|
|
* Each option given is used to configure rados, and may be any valid
|
|
|
|
* Ceph option, "id", or "conf".
|
2011-05-27 03:07:32 +04:00
|
|
|
*
|
2011-09-16 01:11:10 +04:00
|
|
|
* The "id" option indicates what user we should authenticate as to
|
|
|
|
* the Ceph cluster. If it is excluded we will use the Ceph default
|
|
|
|
* (normally 'admin').
|
2010-12-06 22:53:01 +03:00
|
|
|
*
|
2011-09-16 01:11:10 +04:00
|
|
|
* The "conf" option specifies a Ceph configuration file to read. If
|
|
|
|
* it is not specified, we will read from the default Ceph locations
|
|
|
|
* (e.g., /etc/ceph/ceph.conf). To avoid reading _any_ configuration
|
|
|
|
* file, specify conf=/dev/null.
|
2010-12-06 22:53:01 +03:00
|
|
|
*
|
2011-09-16 01:11:10 +04:00
|
|
|
* Configuration values containing :, @, or = can be escaped with a
|
|
|
|
* leading "\".
|
2010-12-06 22:53:01 +03:00
|
|
|
*/
|
|
|
|
|
2012-05-01 10:16:45 +04:00
|
|
|
/* rbd_aio_discard added in 0.1.2 */
|
|
|
|
#if LIBRBD_VERSION_CODE >= LIBRBD_VERSION(0, 1, 2)
|
|
|
|
#define LIBRBD_SUPPORTS_DISCARD
|
|
|
|
#else
|
|
|
|
#undef LIBRBD_SUPPORTS_DISCARD
|
|
|
|
#endif
|
|
|
|
|
2010-12-06 22:53:01 +03:00
|
|
|
#define OBJ_MAX_SIZE (1UL << OBJ_DEFAULT_OBJ_ORDER)
|
|
|
|
|
2011-05-27 03:07:31 +04:00
|
|
|
#define RBD_MAX_CONF_NAME_SIZE 128
|
|
|
|
#define RBD_MAX_CONF_VAL_SIZE 512
|
|
|
|
#define RBD_MAX_CONF_SIZE 1024
|
|
|
|
#define RBD_MAX_POOL_NAME_SIZE 128
|
|
|
|
#define RBD_MAX_SNAP_NAME_SIZE 128
|
|
|
|
#define RBD_MAX_SNAPS 100
|
|
|
|
|
2012-05-01 10:16:45 +04:00
|
|
|
typedef enum {
|
|
|
|
RBD_AIO_READ,
|
|
|
|
RBD_AIO_WRITE,
|
2013-03-30 00:03:23 +04:00
|
|
|
RBD_AIO_DISCARD,
|
|
|
|
RBD_AIO_FLUSH
|
2012-05-01 10:16:45 +04:00
|
|
|
} RBDAIOCmd;
|
|
|
|
|
2010-12-06 22:53:01 +03:00
|
|
|
typedef struct RBDAIOCB {
|
|
|
|
BlockDriverAIOCB common;
|
|
|
|
QEMUBH *bh;
|
2012-11-20 16:44:55 +04:00
|
|
|
int64_t ret;
|
2010-12-06 22:53:01 +03:00
|
|
|
QEMUIOVector *qiov;
|
|
|
|
char *bounce;
|
2012-05-01 10:16:45 +04:00
|
|
|
RBDAIOCmd cmd;
|
2010-12-06 22:53:01 +03:00
|
|
|
int64_t sector_num;
|
|
|
|
int error;
|
|
|
|
struct BDRVRBDState *s;
|
|
|
|
int cancelled;
|
2012-11-30 12:55:46 +04:00
|
|
|
int status;
|
2010-12-06 22:53:01 +03:00
|
|
|
} RBDAIOCB;
|
|
|
|
|
|
|
|
typedef struct RADOSCB {
|
|
|
|
int rcbid;
|
|
|
|
RBDAIOCB *acb;
|
|
|
|
struct BDRVRBDState *s;
|
|
|
|
int done;
|
2011-05-27 03:07:31 +04:00
|
|
|
int64_t size;
|
2010-12-06 22:53:01 +03:00
|
|
|
char *buf;
|
2012-11-20 16:44:55 +04:00
|
|
|
int64_t ret;
|
2010-12-06 22:53:01 +03:00
|
|
|
} RADOSCB;
|
|
|
|
|
|
|
|
#define RBD_FD_READ 0
|
|
|
|
#define RBD_FD_WRITE 1
|
|
|
|
|
|
|
|
typedef struct BDRVRBDState {
|
2011-05-27 03:07:31 +04:00
|
|
|
rados_t cluster;
|
|
|
|
rados_ioctx_t io_ctx;
|
|
|
|
rbd_image_t image;
|
|
|
|
char name[RBD_MAX_IMAGE_NAME_SIZE];
|
|
|
|
char *snap;
|
2010-12-06 22:53:01 +03:00
|
|
|
} BDRVRBDState;
|
|
|
|
|
2011-05-27 03:07:31 +04:00
|
|
|
static int qemu_rbd_next_tok(char *dst, int dst_len,
|
|
|
|
char *src, char delim,
|
|
|
|
const char *name,
|
2014-05-16 13:00:11 +04:00
|
|
|
char **p, Error **errp)
|
2010-12-06 22:53:01 +03:00
|
|
|
{
|
|
|
|
int l;
|
|
|
|
char *end;
|
|
|
|
|
|
|
|
*p = NULL;
|
|
|
|
|
|
|
|
if (delim != '\0') {
|
2011-09-20 00:35:26 +04:00
|
|
|
for (end = src; *end; ++end) {
|
|
|
|
if (*end == delim) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (*end == '\\' && end[1] != '\0') {
|
|
|
|
end++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (*end == delim) {
|
2010-12-06 22:53:01 +03:00
|
|
|
*p = end + 1;
|
|
|
|
*end = '\0';
|
|
|
|
}
|
|
|
|
}
|
|
|
|
l = strlen(src);
|
|
|
|
if (l >= dst_len) {
|
2014-05-16 13:00:11 +04:00
|
|
|
error_setg(errp, "%s too long", name);
|
2010-12-06 22:53:01 +03:00
|
|
|
return -EINVAL;
|
|
|
|
} else if (l == 0) {
|
2014-05-16 13:00:11 +04:00
|
|
|
error_setg(errp, "%s too short", name);
|
2010-12-06 22:53:01 +03:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
pstrcpy(dst, dst_len, src);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2011-09-20 00:35:26 +04:00
|
|
|
static void qemu_rbd_unescape(char *src)
|
|
|
|
{
|
|
|
|
char *p;
|
|
|
|
|
|
|
|
for (p = src; *src; ++src, ++p) {
|
|
|
|
if (*src == '\\' && src[1] != '\0') {
|
|
|
|
src++;
|
|
|
|
}
|
|
|
|
*p = *src;
|
|
|
|
}
|
|
|
|
*p = '\0';
|
|
|
|
}
|
|
|
|
|
2011-05-27 03:07:31 +04:00
|
|
|
static int qemu_rbd_parsename(const char *filename,
|
|
|
|
char *pool, int pool_len,
|
|
|
|
char *snap, int snap_len,
|
2011-05-27 03:07:32 +04:00
|
|
|
char *name, int name_len,
|
2014-05-16 13:00:11 +04:00
|
|
|
char *conf, int conf_len,
|
|
|
|
Error **errp)
|
2010-12-06 22:53:01 +03:00
|
|
|
{
|
|
|
|
const char *start;
|
|
|
|
char *p, *buf;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
if (!strstart(filename, "rbd:", &start)) {
|
2014-05-16 13:00:11 +04:00
|
|
|
error_setg(errp, "File name must start with 'rbd:'");
|
2010-12-06 22:53:01 +03:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2011-08-21 07:09:37 +04:00
|
|
|
buf = g_strdup(start);
|
2010-12-06 22:53:01 +03:00
|
|
|
p = buf;
|
2011-05-27 03:07:32 +04:00
|
|
|
*snap = '\0';
|
|
|
|
*conf = '\0';
|
2010-12-06 22:53:01 +03:00
|
|
|
|
2014-05-16 13:00:11 +04:00
|
|
|
ret = qemu_rbd_next_tok(pool, pool_len, p,
|
|
|
|
'/', "pool name", &p, errp);
|
2010-12-06 22:53:01 +03:00
|
|
|
if (ret < 0 || !p) {
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto done;
|
|
|
|
}
|
2011-09-20 00:35:26 +04:00
|
|
|
qemu_rbd_unescape(pool);
|
2011-05-27 03:07:32 +04:00
|
|
|
|
|
|
|
if (strchr(p, '@')) {
|
2014-05-16 13:00:11 +04:00
|
|
|
ret = qemu_rbd_next_tok(name, name_len, p,
|
|
|
|
'@', "object name", &p, errp);
|
2011-05-27 03:07:32 +04:00
|
|
|
if (ret < 0) {
|
|
|
|
goto done;
|
|
|
|
}
|
2014-05-16 13:00:11 +04:00
|
|
|
ret = qemu_rbd_next_tok(snap, snap_len, p,
|
|
|
|
':', "snap name", &p, errp);
|
2011-09-20 00:35:26 +04:00
|
|
|
qemu_rbd_unescape(snap);
|
2011-05-27 03:07:32 +04:00
|
|
|
} else {
|
2014-05-16 13:00:11 +04:00
|
|
|
ret = qemu_rbd_next_tok(name, name_len, p,
|
|
|
|
':', "object name", &p, errp);
|
2010-12-06 22:53:01 +03:00
|
|
|
}
|
2011-09-20 00:35:26 +04:00
|
|
|
qemu_rbd_unescape(name);
|
2011-05-27 03:07:32 +04:00
|
|
|
if (ret < 0 || !p) {
|
2010-12-06 22:53:01 +03:00
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
|
2014-05-16 13:00:11 +04:00
|
|
|
ret = qemu_rbd_next_tok(conf, conf_len, p,
|
|
|
|
'\0', "configuration", &p, errp);
|
2010-12-06 22:53:01 +03:00
|
|
|
|
|
|
|
done:
|
2011-08-21 07:09:37 +04:00
|
|
|
g_free(buf);
|
2010-12-06 22:53:01 +03:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2011-09-07 20:28:04 +04:00
|
|
|
static char *qemu_rbd_parse_clientname(const char *conf, char *clientname)
|
|
|
|
{
|
|
|
|
const char *p = conf;
|
|
|
|
|
|
|
|
while (*p) {
|
|
|
|
int len;
|
|
|
|
const char *end = strchr(p, ':');
|
|
|
|
|
|
|
|
if (end) {
|
|
|
|
len = end - p;
|
|
|
|
} else {
|
|
|
|
len = strlen(p);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (strncmp(p, "id=", 3) == 0) {
|
|
|
|
len -= 3;
|
|
|
|
strncpy(clientname, p + 3, len);
|
|
|
|
clientname[len] = '\0';
|
|
|
|
return clientname;
|
|
|
|
}
|
|
|
|
if (end == NULL) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
p = end + 1;
|
|
|
|
}
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2014-05-16 13:00:11 +04:00
|
|
|
static int qemu_rbd_set_conf(rados_t cluster, const char *conf, Error **errp)
|
2011-05-27 03:07:32 +04:00
|
|
|
{
|
|
|
|
char *p, *buf;
|
|
|
|
char name[RBD_MAX_CONF_NAME_SIZE];
|
|
|
|
char value[RBD_MAX_CONF_VAL_SIZE];
|
|
|
|
int ret = 0;
|
|
|
|
|
2011-08-21 07:09:37 +04:00
|
|
|
buf = g_strdup(conf);
|
2011-05-27 03:07:32 +04:00
|
|
|
p = buf;
|
|
|
|
|
|
|
|
while (p) {
|
|
|
|
ret = qemu_rbd_next_tok(name, sizeof(name), p,
|
2014-05-16 13:00:11 +04:00
|
|
|
'=', "conf option name", &p, errp);
|
2011-05-27 03:07:32 +04:00
|
|
|
if (ret < 0) {
|
|
|
|
break;
|
|
|
|
}
|
2011-09-20 00:35:26 +04:00
|
|
|
qemu_rbd_unescape(name);
|
2011-05-27 03:07:32 +04:00
|
|
|
|
|
|
|
if (!p) {
|
2014-05-16 13:00:11 +04:00
|
|
|
error_setg(errp, "conf option %s has no value", name);
|
2011-05-27 03:07:32 +04:00
|
|
|
ret = -EINVAL;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = qemu_rbd_next_tok(value, sizeof(value), p,
|
2014-05-16 13:00:11 +04:00
|
|
|
':', "conf option value", &p, errp);
|
2011-05-27 03:07:32 +04:00
|
|
|
if (ret < 0) {
|
|
|
|
break;
|
|
|
|
}
|
2011-09-20 00:35:26 +04:00
|
|
|
qemu_rbd_unescape(value);
|
2011-05-27 03:07:32 +04:00
|
|
|
|
2011-09-07 20:28:04 +04:00
|
|
|
if (strcmp(name, "conf") == 0) {
|
|
|
|
ret = rados_conf_read_file(cluster, value);
|
2011-05-27 03:07:32 +04:00
|
|
|
if (ret < 0) {
|
2014-05-16 13:00:11 +04:00
|
|
|
error_setg(errp, "error reading conf file %s", value);
|
2011-05-27 03:07:32 +04:00
|
|
|
break;
|
|
|
|
}
|
2011-09-07 20:28:04 +04:00
|
|
|
} else if (strcmp(name, "id") == 0) {
|
|
|
|
/* ignore, this is parsed by qemu_rbd_parse_clientname() */
|
2011-05-27 03:07:32 +04:00
|
|
|
} else {
|
2011-09-07 20:28:04 +04:00
|
|
|
ret = rados_conf_set(cluster, name, value);
|
2011-05-27 03:07:32 +04:00
|
|
|
if (ret < 0) {
|
2014-05-16 13:00:11 +04:00
|
|
|
error_setg(errp, "invalid conf option %s", name);
|
2011-09-07 20:28:04 +04:00
|
|
|
ret = -EINVAL;
|
2011-05-27 03:07:32 +04:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-08-21 07:09:37 +04:00
|
|
|
g_free(buf);
|
2011-05-27 03:07:32 +04:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2014-06-05 13:21:04 +04:00
|
|
|
static int qemu_rbd_create(const char *filename, QemuOpts *opts, Error **errp)
|
2010-12-06 22:53:01 +03:00
|
|
|
{
|
2014-05-16 13:00:11 +04:00
|
|
|
Error *local_err = NULL;
|
2010-12-06 22:53:01 +03:00
|
|
|
int64_t bytes = 0;
|
|
|
|
int64_t objsize;
|
2011-05-27 03:07:31 +04:00
|
|
|
int obj_order = 0;
|
|
|
|
char pool[RBD_MAX_POOL_NAME_SIZE];
|
|
|
|
char name[RBD_MAX_IMAGE_NAME_SIZE];
|
|
|
|
char snap_buf[RBD_MAX_SNAP_NAME_SIZE];
|
2011-05-27 03:07:32 +04:00
|
|
|
char conf[RBD_MAX_CONF_SIZE];
|
2011-09-07 20:28:04 +04:00
|
|
|
char clientname_buf[RBD_MAX_CONF_SIZE];
|
|
|
|
char *clientname;
|
2011-05-27 03:07:31 +04:00
|
|
|
rados_t cluster;
|
|
|
|
rados_ioctx_t io_ctx;
|
2010-12-06 22:53:01 +03:00
|
|
|
int ret;
|
|
|
|
|
2011-05-27 03:07:31 +04:00
|
|
|
if (qemu_rbd_parsename(filename, pool, sizeof(pool),
|
|
|
|
snap_buf, sizeof(snap_buf),
|
2011-05-27 03:07:32 +04:00
|
|
|
name, sizeof(name),
|
2014-05-16 13:00:11 +04:00
|
|
|
conf, sizeof(conf), &local_err) < 0) {
|
|
|
|
error_propagate(errp, local_err);
|
2010-12-06 22:53:01 +03:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Read out options */
|
2014-06-05 13:21:04 +04:00
|
|
|
bytes = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0);
|
|
|
|
objsize = qemu_opt_get_size_del(opts, BLOCK_OPT_CLUSTER_SIZE, 0);
|
|
|
|
if (objsize) {
|
|
|
|
if ((objsize - 1) & objsize) { /* not a power of 2? */
|
|
|
|
error_setg(errp, "obj size needs to be power of 2");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
if (objsize < 4096) {
|
|
|
|
error_setg(errp, "obj size too small");
|
|
|
|
return -EINVAL;
|
2010-12-06 22:53:01 +03:00
|
|
|
}
|
2014-06-05 13:21:04 +04:00
|
|
|
obj_order = ffs(objsize) - 1;
|
2010-12-06 22:53:01 +03:00
|
|
|
}
|
|
|
|
|
2011-09-07 20:28:04 +04:00
|
|
|
clientname = qemu_rbd_parse_clientname(conf, clientname_buf);
|
|
|
|
if (rados_create(&cluster, clientname) < 0) {
|
2014-05-16 13:00:11 +04:00
|
|
|
error_setg(errp, "error initializing");
|
2010-12-06 22:53:01 +03:00
|
|
|
return -EIO;
|
|
|
|
}
|
|
|
|
|
2011-05-27 03:07:32 +04:00
|
|
|
if (strstr(conf, "conf=") == NULL) {
|
2011-09-16 01:11:08 +04:00
|
|
|
/* try default location, but ignore failure */
|
|
|
|
rados_conf_read_file(cluster, NULL);
|
2011-05-27 03:07:32 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
if (conf[0] != '\0' &&
|
2014-05-16 13:00:11 +04:00
|
|
|
qemu_rbd_set_conf(cluster, conf, &local_err) < 0) {
|
2011-05-27 03:07:31 +04:00
|
|
|
rados_shutdown(cluster);
|
2014-05-16 13:00:11 +04:00
|
|
|
error_propagate(errp, local_err);
|
2010-12-06 22:53:01 +03:00
|
|
|
return -EIO;
|
|
|
|
}
|
|
|
|
|
2011-05-27 03:07:31 +04:00
|
|
|
if (rados_connect(cluster) < 0) {
|
2014-05-16 13:00:11 +04:00
|
|
|
error_setg(errp, "error connecting");
|
2011-05-27 03:07:31 +04:00
|
|
|
rados_shutdown(cluster);
|
2010-12-06 22:53:01 +03:00
|
|
|
return -EIO;
|
|
|
|
}
|
|
|
|
|
2011-05-27 03:07:31 +04:00
|
|
|
if (rados_ioctx_create(cluster, pool, &io_ctx) < 0) {
|
2014-05-16 13:00:11 +04:00
|
|
|
error_setg(errp, "error opening pool %s", pool);
|
2011-05-27 03:07:31 +04:00
|
|
|
rados_shutdown(cluster);
|
|
|
|
return -EIO;
|
2010-12-06 22:53:01 +03:00
|
|
|
}
|
|
|
|
|
2011-05-27 03:07:31 +04:00
|
|
|
ret = rbd_create(io_ctx, name, bytes, &obj_order);
|
|
|
|
rados_ioctx_destroy(io_ctx);
|
|
|
|
rados_shutdown(cluster);
|
2010-12-06 22:53:01 +03:00
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2013-12-05 19:38:33 +04:00
|
|
|
* This aio completion is being called from rbd_finish_bh() and runs in qemu
|
|
|
|
* BH context.
|
2010-12-06 22:53:01 +03:00
|
|
|
*/
|
2011-05-27 03:07:31 +04:00
|
|
|
static void qemu_rbd_complete_aio(RADOSCB *rcb)
|
2010-12-06 22:53:01 +03:00
|
|
|
{
|
|
|
|
RBDAIOCB *acb = rcb->acb;
|
|
|
|
int64_t r;
|
|
|
|
|
|
|
|
r = rcb->ret;
|
|
|
|
|
2013-03-30 00:03:23 +04:00
|
|
|
if (acb->cmd != RBD_AIO_READ) {
|
2010-12-06 22:53:01 +03:00
|
|
|
if (r < 0) {
|
|
|
|
acb->ret = r;
|
|
|
|
acb->error = 1;
|
|
|
|
} else if (!acb->error) {
|
2011-05-27 03:07:31 +04:00
|
|
|
acb->ret = rcb->size;
|
2010-12-06 22:53:01 +03:00
|
|
|
}
|
|
|
|
} else {
|
2011-05-27 03:07:31 +04:00
|
|
|
if (r < 0) {
|
|
|
|
memset(rcb->buf, 0, rcb->size);
|
2010-12-06 22:53:01 +03:00
|
|
|
acb->ret = r;
|
|
|
|
acb->error = 1;
|
2011-05-27 03:07:31 +04:00
|
|
|
} else if (r < rcb->size) {
|
|
|
|
memset(rcb->buf + r, 0, rcb->size - r);
|
2010-12-06 22:53:01 +03:00
|
|
|
if (!acb->error) {
|
2011-05-27 03:07:31 +04:00
|
|
|
acb->ret = rcb->size;
|
2010-12-06 22:53:01 +03:00
|
|
|
}
|
|
|
|
} else if (!acb->error) {
|
2011-05-27 03:07:31 +04:00
|
|
|
acb->ret = r;
|
2010-12-06 22:53:01 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-12-05 19:38:33 +04:00
|
|
|
g_free(rcb);
|
2010-12-06 22:53:01 +03:00
|
|
|
|
2013-12-05 19:38:33 +04:00
|
|
|
if (acb->cmd == RBD_AIO_READ) {
|
|
|
|
qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
|
|
|
|
}
|
|
|
|
qemu_vfree(acb->bounce);
|
|
|
|
acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret));
|
|
|
|
acb->status = 0;
|
2010-12-06 22:53:01 +03:00
|
|
|
|
2013-12-05 19:38:33 +04:00
|
|
|
if (!acb->cancelled) {
|
|
|
|
qemu_aio_release(acb);
|
|
|
|
}
|
2010-12-06 22:53:01 +03:00
|
|
|
}
|
|
|
|
|
2013-04-12 20:05:35 +04:00
|
|
|
/* TODO Convert to fine grained options */
|
|
|
|
static QemuOptsList runtime_opts = {
|
|
|
|
.name = "rbd",
|
|
|
|
.head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
|
|
|
|
.desc = {
|
|
|
|
{
|
|
|
|
.name = "filename",
|
|
|
|
.type = QEMU_OPT_STRING,
|
|
|
|
.help = "Specification of the rbd image",
|
|
|
|
},
|
|
|
|
{ /* end of list */ }
|
|
|
|
},
|
|
|
|
};
|
|
|
|
|
2013-09-05 16:22:29 +04:00
|
|
|
static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
|
|
|
|
Error **errp)
|
2010-12-06 22:53:01 +03:00
|
|
|
{
|
|
|
|
BDRVRBDState *s = bs->opaque;
|
2011-05-27 03:07:31 +04:00
|
|
|
char pool[RBD_MAX_POOL_NAME_SIZE];
|
|
|
|
char snap_buf[RBD_MAX_SNAP_NAME_SIZE];
|
2011-05-27 03:07:32 +04:00
|
|
|
char conf[RBD_MAX_CONF_SIZE];
|
2011-09-07 20:28:04 +04:00
|
|
|
char clientname_buf[RBD_MAX_CONF_SIZE];
|
|
|
|
char *clientname;
|
2013-04-12 20:05:35 +04:00
|
|
|
QemuOpts *opts;
|
|
|
|
Error *local_err = NULL;
|
|
|
|
const char *filename;
|
2010-12-06 22:53:01 +03:00
|
|
|
int r;
|
|
|
|
|
2014-01-02 06:49:17 +04:00
|
|
|
opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
|
2013-04-12 20:05:35 +04:00
|
|
|
qemu_opts_absorb_qdict(opts, options, &local_err);
|
2014-01-30 18:07:28 +04:00
|
|
|
if (local_err) {
|
2014-05-16 13:00:11 +04:00
|
|
|
error_propagate(errp, local_err);
|
2013-04-12 20:05:35 +04:00
|
|
|
qemu_opts_del(opts);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
filename = qemu_opt_get(opts, "filename");
|
|
|
|
|
2011-05-27 03:07:31 +04:00
|
|
|
if (qemu_rbd_parsename(filename, pool, sizeof(pool),
|
|
|
|
snap_buf, sizeof(snap_buf),
|
2011-05-27 03:07:32 +04:00
|
|
|
s->name, sizeof(s->name),
|
2014-05-16 13:00:11 +04:00
|
|
|
conf, sizeof(conf), errp) < 0) {
|
2013-04-25 17:59:27 +04:00
|
|
|
r = -EINVAL;
|
|
|
|
goto failed_opts;
|
2010-12-06 22:53:01 +03:00
|
|
|
}
|
|
|
|
|
2011-09-07 20:28:04 +04:00
|
|
|
clientname = qemu_rbd_parse_clientname(conf, clientname_buf);
|
|
|
|
r = rados_create(&s->cluster, clientname);
|
2011-05-27 03:07:31 +04:00
|
|
|
if (r < 0) {
|
2014-05-16 13:00:11 +04:00
|
|
|
error_setg(&local_err, "error initializing");
|
2013-04-25 17:59:27 +04:00
|
|
|
goto failed_opts;
|
2010-12-06 22:53:01 +03:00
|
|
|
}
|
|
|
|
|
2011-09-07 20:28:06 +04:00
|
|
|
s->snap = NULL;
|
|
|
|
if (snap_buf[0] != '\0') {
|
|
|
|
s->snap = g_strdup(snap_buf);
|
|
|
|
}
|
|
|
|
|
2012-05-18 00:42:29 +04:00
|
|
|
/*
|
|
|
|
* Fallback to more conservative semantics if setting cache
|
|
|
|
* options fails. Ignore errors from setting rbd_cache because the
|
|
|
|
* only possible error is that the option does not exist, and
|
|
|
|
* librbd defaults to no caching. If write through caching cannot
|
|
|
|
* be set up, fall back to no caching.
|
|
|
|
*/
|
|
|
|
if (flags & BDRV_O_NOCACHE) {
|
|
|
|
rados_conf_set(s->cluster, "rbd_cache", "false");
|
|
|
|
} else {
|
|
|
|
rados_conf_set(s->cluster, "rbd_cache", "true");
|
|
|
|
}
|
|
|
|
|
2011-05-27 03:07:32 +04:00
|
|
|
if (strstr(conf, "conf=") == NULL) {
|
2011-09-16 01:11:08 +04:00
|
|
|
/* try default location, but ignore failure */
|
|
|
|
rados_conf_read_file(s->cluster, NULL);
|
2011-05-27 03:07:32 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
if (conf[0] != '\0') {
|
2014-05-16 13:00:11 +04:00
|
|
|
r = qemu_rbd_set_conf(s->cluster, conf, errp);
|
2011-05-27 03:07:32 +04:00
|
|
|
if (r < 0) {
|
2011-09-07 20:28:06 +04:00
|
|
|
goto failed_shutdown;
|
2011-05-27 03:07:32 +04:00
|
|
|
}
|
2010-12-06 22:53:01 +03:00
|
|
|
}
|
|
|
|
|
2011-05-27 03:07:31 +04:00
|
|
|
r = rados_connect(s->cluster);
|
|
|
|
if (r < 0) {
|
2014-05-16 13:00:11 +04:00
|
|
|
error_setg(&local_err, "error connecting");
|
2011-09-07 20:28:06 +04:00
|
|
|
goto failed_shutdown;
|
2010-12-06 22:53:01 +03:00
|
|
|
}
|
|
|
|
|
2011-05-27 03:07:31 +04:00
|
|
|
r = rados_ioctx_create(s->cluster, pool, &s->io_ctx);
|
|
|
|
if (r < 0) {
|
2014-05-16 13:00:11 +04:00
|
|
|
error_setg(&local_err, "error opening pool %s", pool);
|
2011-09-07 20:28:06 +04:00
|
|
|
goto failed_shutdown;
|
2010-12-06 22:53:01 +03:00
|
|
|
}
|
|
|
|
|
2011-05-27 03:07:31 +04:00
|
|
|
r = rbd_open(s->io_ctx, s->name, &s->image, s->snap);
|
2010-12-06 22:53:01 +03:00
|
|
|
if (r < 0) {
|
2014-05-16 13:00:11 +04:00
|
|
|
error_setg(&local_err, "error reading header from %s", s->name);
|
2011-09-07 20:28:06 +04:00
|
|
|
goto failed_open;
|
2010-12-06 22:53:01 +03:00
|
|
|
}
|
|
|
|
|
2011-05-27 03:07:31 +04:00
|
|
|
bs->read_only = (s->snap != NULL);
|
2010-12-06 22:53:01 +03:00
|
|
|
|
2013-04-25 17:59:27 +04:00
|
|
|
qemu_opts_del(opts);
|
2010-12-06 22:53:01 +03:00
|
|
|
return 0;
|
|
|
|
|
2011-09-07 20:28:06 +04:00
|
|
|
failed_open:
|
2011-05-27 03:07:31 +04:00
|
|
|
rados_ioctx_destroy(s->io_ctx);
|
2011-09-07 20:28:06 +04:00
|
|
|
failed_shutdown:
|
2011-05-27 03:07:31 +04:00
|
|
|
rados_shutdown(s->cluster);
|
2011-09-07 20:28:06 +04:00
|
|
|
g_free(s->snap);
|
2013-04-25 17:59:27 +04:00
|
|
|
failed_opts:
|
|
|
|
qemu_opts_del(opts);
|
2010-12-06 22:53:01 +03:00
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
2011-05-27 03:07:31 +04:00
|
|
|
static void qemu_rbd_close(BlockDriverState *bs)
|
2010-12-06 22:53:01 +03:00
|
|
|
{
|
|
|
|
BDRVRBDState *s = bs->opaque;
|
|
|
|
|
2011-05-27 03:07:31 +04:00
|
|
|
rbd_close(s->image);
|
|
|
|
rados_ioctx_destroy(s->io_ctx);
|
2011-08-21 07:09:37 +04:00
|
|
|
g_free(s->snap);
|
2011-05-27 03:07:31 +04:00
|
|
|
rados_shutdown(s->cluster);
|
2010-12-06 22:53:01 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Cancel aio. Since we don't reference acb in a non qemu threads,
|
|
|
|
* it is safe to access it here.
|
|
|
|
*/
|
2011-05-27 03:07:31 +04:00
|
|
|
static void qemu_rbd_aio_cancel(BlockDriverAIOCB *blockacb)
|
2010-12-06 22:53:01 +03:00
|
|
|
{
|
|
|
|
RBDAIOCB *acb = (RBDAIOCB *) blockacb;
|
|
|
|
acb->cancelled = 1;
|
2012-11-30 12:55:46 +04:00
|
|
|
|
|
|
|
while (acb->status == -EINPROGRESS) {
|
2014-05-08 18:34:51 +04:00
|
|
|
aio_poll(bdrv_get_aio_context(acb->common.bs), true);
|
2012-11-30 12:55:46 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
qemu_aio_release(acb);
|
2010-12-06 22:53:01 +03:00
|
|
|
}
|
|
|
|
|
2012-10-31 19:34:37 +04:00
|
|
|
static const AIOCBInfo rbd_aiocb_info = {
|
2010-12-06 22:53:01 +03:00
|
|
|
.aiocb_size = sizeof(RBDAIOCB),
|
2011-05-27 03:07:31 +04:00
|
|
|
.cancel = qemu_rbd_aio_cancel,
|
2010-12-06 22:53:01 +03:00
|
|
|
};
|
|
|
|
|
2013-12-05 19:38:33 +04:00
|
|
|
static void rbd_finish_bh(void *opaque)
|
2010-12-06 22:53:01 +03:00
|
|
|
{
|
2013-12-05 19:38:33 +04:00
|
|
|
RADOSCB *rcb = opaque;
|
|
|
|
qemu_bh_delete(rcb->acb->bh);
|
|
|
|
qemu_rbd_complete_aio(rcb);
|
2011-05-27 03:07:31 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* This is the callback function for rbd_aio_read and _write
|
|
|
|
*
|
|
|
|
* Note: this function is being called from a non qemu thread so
|
|
|
|
* we need to be careful about what we do here. Generally we only
|
2013-12-05 19:38:33 +04:00
|
|
|
* schedule a BH, and do the rest of the io completion handling
|
|
|
|
* from rbd_finish_bh() which runs in a qemu context.
|
2011-05-27 03:07:31 +04:00
|
|
|
*/
|
|
|
|
static void rbd_finish_aiocb(rbd_completion_t c, RADOSCB *rcb)
|
|
|
|
{
|
2013-12-05 19:38:33 +04:00
|
|
|
RBDAIOCB *acb = rcb->acb;
|
|
|
|
|
2011-05-27 03:07:31 +04:00
|
|
|
rcb->ret = rbd_aio_get_return_value(c);
|
|
|
|
rbd_aio_release(c);
|
2010-12-06 22:53:01 +03:00
|
|
|
|
2014-05-08 18:34:51 +04:00
|
|
|
acb->bh = aio_bh_new(bdrv_get_aio_context(acb->common.bs),
|
|
|
|
rbd_finish_bh, rcb);
|
2013-12-05 19:38:33 +04:00
|
|
|
qemu_bh_schedule(acb->bh);
|
2010-12-06 22:53:01 +03:00
|
|
|
}
|
|
|
|
|
2012-05-01 10:16:45 +04:00
|
|
|
static int rbd_aio_discard_wrapper(rbd_image_t image,
|
|
|
|
uint64_t off,
|
|
|
|
uint64_t len,
|
|
|
|
rbd_completion_t comp)
|
|
|
|
{
|
|
|
|
#ifdef LIBRBD_SUPPORTS_DISCARD
|
|
|
|
return rbd_aio_discard(image, off, len, comp);
|
|
|
|
#else
|
|
|
|
return -ENOTSUP;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2013-03-30 00:03:23 +04:00
|
|
|
static int rbd_aio_flush_wrapper(rbd_image_t image,
|
|
|
|
rbd_completion_t comp)
|
|
|
|
{
|
|
|
|
#ifdef LIBRBD_SUPPORTS_AIO_FLUSH
|
|
|
|
return rbd_aio_flush(image, comp);
|
|
|
|
#else
|
|
|
|
return -ENOTSUP;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2012-05-01 10:16:45 +04:00
|
|
|
static BlockDriverAIOCB *rbd_start_aio(BlockDriverState *bs,
|
|
|
|
int64_t sector_num,
|
|
|
|
QEMUIOVector *qiov,
|
|
|
|
int nb_sectors,
|
|
|
|
BlockDriverCompletionFunc *cb,
|
|
|
|
void *opaque,
|
|
|
|
RBDAIOCmd cmd)
|
2010-12-06 22:53:01 +03:00
|
|
|
{
|
|
|
|
RBDAIOCB *acb;
|
2014-05-21 20:11:48 +04:00
|
|
|
RADOSCB *rcb = NULL;
|
2011-05-27 03:07:31 +04:00
|
|
|
rbd_completion_t c;
|
2010-12-06 22:53:01 +03:00
|
|
|
int64_t off, size;
|
|
|
|
char *buf;
|
2011-05-27 03:07:33 +04:00
|
|
|
int r;
|
2010-12-06 22:53:01 +03:00
|
|
|
|
|
|
|
BDRVRBDState *s = bs->opaque;
|
|
|
|
|
2012-10-31 19:34:37 +04:00
|
|
|
acb = qemu_aio_get(&rbd_aiocb_info, bs, cb, opaque);
|
2012-05-01 10:16:45 +04:00
|
|
|
acb->cmd = cmd;
|
2010-12-06 22:53:01 +03:00
|
|
|
acb->qiov = qiov;
|
2013-03-30 00:03:23 +04:00
|
|
|
if (cmd == RBD_AIO_DISCARD || cmd == RBD_AIO_FLUSH) {
|
2012-05-01 10:16:45 +04:00
|
|
|
acb->bounce = NULL;
|
|
|
|
} else {
|
2014-05-21 20:11:48 +04:00
|
|
|
acb->bounce = qemu_try_blockalign(bs, qiov->size);
|
|
|
|
if (acb->bounce == NULL) {
|
|
|
|
goto failed;
|
|
|
|
}
|
2012-05-01 10:16:45 +04:00
|
|
|
}
|
2010-12-06 22:53:01 +03:00
|
|
|
acb->ret = 0;
|
|
|
|
acb->error = 0;
|
|
|
|
acb->s = s;
|
|
|
|
acb->cancelled = 0;
|
|
|
|
acb->bh = NULL;
|
2012-11-30 12:55:46 +04:00
|
|
|
acb->status = -EINPROGRESS;
|
2010-12-06 22:53:01 +03:00
|
|
|
|
2012-05-01 10:16:45 +04:00
|
|
|
if (cmd == RBD_AIO_WRITE) {
|
2012-06-07 20:21:06 +04:00
|
|
|
qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
|
2010-12-06 22:53:01 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
buf = acb->bounce;
|
|
|
|
|
|
|
|
off = sector_num * BDRV_SECTOR_SIZE;
|
|
|
|
size = nb_sectors * BDRV_SECTOR_SIZE;
|
|
|
|
|
block: Use g_new() & friends where that makes obvious sense
g_new(T, n) is neater than g_malloc(sizeof(T) * n). It's also safer,
for two reasons. One, it catches multiplication overflowing size_t.
Two, it returns T * rather than void *, which lets the compiler catch
more type errors.
Patch created with Coccinelle, with two manual changes on top:
* Add const to bdrv_iterate_format() to keep the types straight
* Convert the allocation in bdrv_drop_intermediate(), which Coccinelle
inexplicably misses
Coccinelle semantic patch:
@@
type T;
@@
-g_malloc(sizeof(T))
+g_new(T, 1)
@@
type T;
@@
-g_try_malloc(sizeof(T))
+g_try_new(T, 1)
@@
type T;
@@
-g_malloc0(sizeof(T))
+g_new0(T, 1)
@@
type T;
@@
-g_try_malloc0(sizeof(T))
+g_try_new0(T, 1)
@@
type T;
expression n;
@@
-g_malloc(sizeof(T) * (n))
+g_new(T, n)
@@
type T;
expression n;
@@
-g_try_malloc(sizeof(T) * (n))
+g_try_new(T, n)
@@
type T;
expression n;
@@
-g_malloc0(sizeof(T) * (n))
+g_new0(T, n)
@@
type T;
expression n;
@@
-g_try_malloc0(sizeof(T) * (n))
+g_try_new0(T, n)
@@
type T;
expression p, n;
@@
-g_realloc(p, sizeof(T) * (n))
+g_renew(T, p, n)
@@
type T;
expression p, n;
@@
-g_try_realloc(p, sizeof(T) * (n))
+g_try_renew(T, p, n)
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2014-08-19 12:31:08 +04:00
|
|
|
rcb = g_new(RADOSCB, 1);
|
2011-05-27 03:07:31 +04:00
|
|
|
rcb->done = 0;
|
|
|
|
rcb->acb = acb;
|
|
|
|
rcb->buf = buf;
|
|
|
|
rcb->s = acb->s;
|
|
|
|
rcb->size = size;
|
2011-05-27 03:07:33 +04:00
|
|
|
r = rbd_aio_create_completion(rcb, (rbd_callback_t) rbd_finish_aiocb, &c);
|
|
|
|
if (r < 0) {
|
|
|
|
goto failed;
|
|
|
|
}
|
2010-12-06 22:53:01 +03:00
|
|
|
|
2012-05-01 10:16:45 +04:00
|
|
|
switch (cmd) {
|
|
|
|
case RBD_AIO_WRITE:
|
2011-05-27 03:07:33 +04:00
|
|
|
r = rbd_aio_write(s->image, off, size, buf, c);
|
2012-05-01 10:16:45 +04:00
|
|
|
break;
|
|
|
|
case RBD_AIO_READ:
|
2011-05-27 03:07:33 +04:00
|
|
|
r = rbd_aio_read(s->image, off, size, buf, c);
|
2012-05-01 10:16:45 +04:00
|
|
|
break;
|
|
|
|
case RBD_AIO_DISCARD:
|
|
|
|
r = rbd_aio_discard_wrapper(s->image, off, size, c);
|
|
|
|
break;
|
2013-03-30 00:03:23 +04:00
|
|
|
case RBD_AIO_FLUSH:
|
|
|
|
r = rbd_aio_flush_wrapper(s->image, c);
|
|
|
|
break;
|
2012-05-01 10:16:45 +04:00
|
|
|
default:
|
|
|
|
r = -EINVAL;
|
2011-05-27 03:07:33 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
if (r < 0) {
|
2014-06-05 18:19:26 +04:00
|
|
|
goto failed_completion;
|
2010-12-06 22:53:01 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
return &acb->common;
|
2011-05-27 03:07:33 +04:00
|
|
|
|
2014-06-05 18:19:26 +04:00
|
|
|
failed_completion:
|
|
|
|
rbd_aio_release(c);
|
2011-05-27 03:07:33 +04:00
|
|
|
failed:
|
2011-08-21 07:09:37 +04:00
|
|
|
g_free(rcb);
|
2014-06-05 18:19:26 +04:00
|
|
|
qemu_vfree(acb->bounce);
|
2011-05-27 03:07:33 +04:00
|
|
|
qemu_aio_release(acb);
|
|
|
|
return NULL;
|
2010-12-06 22:53:01 +03:00
|
|
|
}
|
|
|
|
|
2011-05-27 03:07:31 +04:00
|
|
|
static BlockDriverAIOCB *qemu_rbd_aio_readv(BlockDriverState *bs,
|
|
|
|
int64_t sector_num,
|
|
|
|
QEMUIOVector *qiov,
|
|
|
|
int nb_sectors,
|
|
|
|
BlockDriverCompletionFunc *cb,
|
|
|
|
void *opaque)
|
2010-12-06 22:53:01 +03:00
|
|
|
{
|
2012-05-01 10:16:45 +04:00
|
|
|
return rbd_start_aio(bs, sector_num, qiov, nb_sectors, cb, opaque,
|
|
|
|
RBD_AIO_READ);
|
2010-12-06 22:53:01 +03:00
|
|
|
}
|
|
|
|
|
2011-05-27 03:07:31 +04:00
|
|
|
static BlockDriverAIOCB *qemu_rbd_aio_writev(BlockDriverState *bs,
|
|
|
|
int64_t sector_num,
|
|
|
|
QEMUIOVector *qiov,
|
|
|
|
int nb_sectors,
|
|
|
|
BlockDriverCompletionFunc *cb,
|
|
|
|
void *opaque)
|
2010-12-06 22:53:01 +03:00
|
|
|
{
|
2012-05-01 10:16:45 +04:00
|
|
|
return rbd_start_aio(bs, sector_num, qiov, nb_sectors, cb, opaque,
|
|
|
|
RBD_AIO_WRITE);
|
2010-12-06 22:53:01 +03:00
|
|
|
}
|
|
|
|
|
2013-03-30 00:03:23 +04:00
|
|
|
#ifdef LIBRBD_SUPPORTS_AIO_FLUSH
|
|
|
|
static BlockDriverAIOCB *qemu_rbd_aio_flush(BlockDriverState *bs,
|
|
|
|
BlockDriverCompletionFunc *cb,
|
|
|
|
void *opaque)
|
|
|
|
{
|
|
|
|
return rbd_start_aio(bs, 0, NULL, 0, cb, opaque, RBD_AIO_FLUSH);
|
|
|
|
}
|
|
|
|
|
|
|
|
#else
|
|
|
|
|
2011-10-20 15:16:24 +04:00
|
|
|
static int qemu_rbd_co_flush(BlockDriverState *bs)
|
2011-09-16 01:11:11 +04:00
|
|
|
{
|
|
|
|
#if LIBRBD_VERSION_CODE >= LIBRBD_VERSION(0, 1, 1)
|
|
|
|
/* rbd_flush added in 0.1.1 */
|
|
|
|
BDRVRBDState *s = bs->opaque;
|
|
|
|
return rbd_flush(s->image);
|
|
|
|
#else
|
|
|
|
return 0;
|
|
|
|
#endif
|
|
|
|
}
|
2013-03-30 00:03:23 +04:00
|
|
|
#endif
|
2011-09-16 01:11:11 +04:00
|
|
|
|
2011-05-27 03:07:31 +04:00
|
|
|
static int qemu_rbd_getinfo(BlockDriverState *bs, BlockDriverInfo *bdi)
|
2010-12-06 22:53:01 +03:00
|
|
|
{
|
|
|
|
BDRVRBDState *s = bs->opaque;
|
2011-05-27 03:07:31 +04:00
|
|
|
rbd_image_info_t info;
|
|
|
|
int r;
|
|
|
|
|
|
|
|
r = rbd_stat(s->image, &info, sizeof(info));
|
|
|
|
if (r < 0) {
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
|
|
|
bdi->cluster_size = info.obj_size;
|
2010-12-06 22:53:01 +03:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2011-05-27 03:07:31 +04:00
|
|
|
static int64_t qemu_rbd_getlength(BlockDriverState *bs)
|
2010-12-06 22:53:01 +03:00
|
|
|
{
|
|
|
|
BDRVRBDState *s = bs->opaque;
|
2011-05-27 03:07:31 +04:00
|
|
|
rbd_image_info_t info;
|
|
|
|
int r;
|
2010-12-06 22:53:01 +03:00
|
|
|
|
2011-05-27 03:07:31 +04:00
|
|
|
r = rbd_stat(s->image, &info, sizeof(info));
|
|
|
|
if (r < 0) {
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
|
|
|
return info.size;
|
2010-12-06 22:53:01 +03:00
|
|
|
}
|
|
|
|
|
2011-05-27 03:07:34 +04:00
|
|
|
static int qemu_rbd_truncate(BlockDriverState *bs, int64_t offset)
|
|
|
|
{
|
|
|
|
BDRVRBDState *s = bs->opaque;
|
|
|
|
int r;
|
|
|
|
|
|
|
|
r = rbd_resize(s->image, offset);
|
|
|
|
if (r < 0) {
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2011-05-27 03:07:31 +04:00
|
|
|
static int qemu_rbd_snap_create(BlockDriverState *bs,
|
|
|
|
QEMUSnapshotInfo *sn_info)
|
2010-12-06 22:53:01 +03:00
|
|
|
{
|
|
|
|
BDRVRBDState *s = bs->opaque;
|
|
|
|
int r;
|
|
|
|
|
|
|
|
if (sn_info->name[0] == '\0') {
|
|
|
|
return -EINVAL; /* we need a name for rbd snapshots */
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* rbd snapshots are using the name as the user controlled unique identifier
|
|
|
|
* we can't use the rbd snapid for that purpose, as it can't be set
|
|
|
|
*/
|
|
|
|
if (sn_info->id_str[0] != '\0' &&
|
|
|
|
strcmp(sn_info->id_str, sn_info->name) != 0) {
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (strlen(sn_info->name) >= sizeof(sn_info->id_str)) {
|
|
|
|
return -ERANGE;
|
|
|
|
}
|
|
|
|
|
2011-05-27 03:07:31 +04:00
|
|
|
r = rbd_snap_create(s->image, sn_info->name);
|
2010-12-06 22:53:01 +03:00
|
|
|
if (r < 0) {
|
2011-05-27 03:07:31 +04:00
|
|
|
error_report("failed to create snap: %s", strerror(-r));
|
2010-12-06 22:53:01 +03:00
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2012-01-11 23:53:52 +04:00
|
|
|
static int qemu_rbd_snap_remove(BlockDriverState *bs,
|
snapshot: distinguish id and name in snapshot delete
Snapshot creation actually already distinguish id and name since it take
a structured parameter *sn, but delete can't. Later an accurate delete
is needed in qmp_transaction abort and blockdev-snapshot-delete-sync,
so change its prototype. Also *errp is added to tip error, but return
value is kepted to let caller check what kind of error happens. Existing
caller for it are savevm, delvm and qemu-img, they are not impacted by
introducing a new function bdrv_snapshot_delete_by_id_or_name(), which
check the return value and do the operation again.
Before this patch:
For qcow2, it search id first then name to find the one to delete.
For rbd, it search name.
For sheepdog, it does nothing.
After this patch:
For qcow2, logic is the same by call it twice in caller.
For rbd, it always fails in delete with id, but still search for name
in second try, no change to user.
Some code for *errp is based on Pavel's patch.
Signed-off-by: Wenchao Xia <xiawenc@linux.vnet.ibm.com>
Signed-off-by: Pavel Hrdina <phrdina@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2013-09-11 10:04:33 +04:00
|
|
|
const char *snapshot_id,
|
|
|
|
const char *snapshot_name,
|
|
|
|
Error **errp)
|
2012-01-11 23:53:52 +04:00
|
|
|
{
|
|
|
|
BDRVRBDState *s = bs->opaque;
|
|
|
|
int r;
|
|
|
|
|
snapshot: distinguish id and name in snapshot delete
Snapshot creation actually already distinguish id and name since it take
a structured parameter *sn, but delete can't. Later an accurate delete
is needed in qmp_transaction abort and blockdev-snapshot-delete-sync,
so change its prototype. Also *errp is added to tip error, but return
value is kepted to let caller check what kind of error happens. Existing
caller for it are savevm, delvm and qemu-img, they are not impacted by
introducing a new function bdrv_snapshot_delete_by_id_or_name(), which
check the return value and do the operation again.
Before this patch:
For qcow2, it search id first then name to find the one to delete.
For rbd, it search name.
For sheepdog, it does nothing.
After this patch:
For qcow2, logic is the same by call it twice in caller.
For rbd, it always fails in delete with id, but still search for name
in second try, no change to user.
Some code for *errp is based on Pavel's patch.
Signed-off-by: Wenchao Xia <xiawenc@linux.vnet.ibm.com>
Signed-off-by: Pavel Hrdina <phrdina@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2013-09-11 10:04:33 +04:00
|
|
|
if (!snapshot_name) {
|
|
|
|
error_setg(errp, "rbd need a valid snapshot name");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* If snapshot_id is specified, it must be equal to name, see
|
|
|
|
qemu_rbd_snap_list() */
|
|
|
|
if (snapshot_id && strcmp(snapshot_id, snapshot_name)) {
|
|
|
|
error_setg(errp,
|
|
|
|
"rbd do not support snapshot id, it should be NULL or "
|
|
|
|
"equal to snapshot name");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2012-01-11 23:53:52 +04:00
|
|
|
r = rbd_snap_remove(s->image, snapshot_name);
|
snapshot: distinguish id and name in snapshot delete
Snapshot creation actually already distinguish id and name since it take
a structured parameter *sn, but delete can't. Later an accurate delete
is needed in qmp_transaction abort and blockdev-snapshot-delete-sync,
so change its prototype. Also *errp is added to tip error, but return
value is kepted to let caller check what kind of error happens. Existing
caller for it are savevm, delvm and qemu-img, they are not impacted by
introducing a new function bdrv_snapshot_delete_by_id_or_name(), which
check the return value and do the operation again.
Before this patch:
For qcow2, it search id first then name to find the one to delete.
For rbd, it search name.
For sheepdog, it does nothing.
After this patch:
For qcow2, logic is the same by call it twice in caller.
For rbd, it always fails in delete with id, but still search for name
in second try, no change to user.
Some code for *errp is based on Pavel's patch.
Signed-off-by: Wenchao Xia <xiawenc@linux.vnet.ibm.com>
Signed-off-by: Pavel Hrdina <phrdina@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2013-09-11 10:04:33 +04:00
|
|
|
if (r < 0) {
|
|
|
|
error_setg_errno(errp, -r, "Failed to remove the snapshot");
|
|
|
|
}
|
2012-01-11 23:53:52 +04:00
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int qemu_rbd_snap_rollback(BlockDriverState *bs,
|
|
|
|
const char *snapshot_name)
|
|
|
|
{
|
|
|
|
BDRVRBDState *s = bs->opaque;
|
|
|
|
int r;
|
|
|
|
|
|
|
|
r = rbd_snap_rollback(s->image, snapshot_name);
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
2011-05-27 03:07:31 +04:00
|
|
|
static int qemu_rbd_snap_list(BlockDriverState *bs,
|
|
|
|
QEMUSnapshotInfo **psn_tab)
|
2010-12-06 22:53:01 +03:00
|
|
|
{
|
|
|
|
BDRVRBDState *s = bs->opaque;
|
|
|
|
QEMUSnapshotInfo *sn_info, *sn_tab = NULL;
|
2011-05-27 03:07:31 +04:00
|
|
|
int i, snap_count;
|
|
|
|
rbd_snap_info_t *snaps;
|
|
|
|
int max_snaps = RBD_MAX_SNAPS;
|
2010-12-06 22:53:01 +03:00
|
|
|
|
2011-05-27 03:07:31 +04:00
|
|
|
do {
|
2014-08-19 12:31:09 +04:00
|
|
|
snaps = g_new(rbd_snap_info_t, max_snaps);
|
2011-05-27 03:07:31 +04:00
|
|
|
snap_count = rbd_snap_list(s->image, snaps, &max_snaps);
|
2013-09-25 18:00:48 +04:00
|
|
|
if (snap_count <= 0) {
|
2011-08-21 07:09:37 +04:00
|
|
|
g_free(snaps);
|
2010-12-06 22:53:01 +03:00
|
|
|
}
|
2011-05-27 03:07:31 +04:00
|
|
|
} while (snap_count == -ERANGE);
|
2010-12-06 22:53:01 +03:00
|
|
|
|
2011-05-27 03:07:31 +04:00
|
|
|
if (snap_count <= 0) {
|
2011-12-07 05:05:10 +04:00
|
|
|
goto done;
|
2010-12-06 22:53:01 +03:00
|
|
|
}
|
|
|
|
|
block: Use g_new() & friends where that makes obvious sense
g_new(T, n) is neater than g_malloc(sizeof(T) * n). It's also safer,
for two reasons. One, it catches multiplication overflowing size_t.
Two, it returns T * rather than void *, which lets the compiler catch
more type errors.
Patch created with Coccinelle, with two manual changes on top:
* Add const to bdrv_iterate_format() to keep the types straight
* Convert the allocation in bdrv_drop_intermediate(), which Coccinelle
inexplicably misses
Coccinelle semantic patch:
@@
type T;
@@
-g_malloc(sizeof(T))
+g_new(T, 1)
@@
type T;
@@
-g_try_malloc(sizeof(T))
+g_try_new(T, 1)
@@
type T;
@@
-g_malloc0(sizeof(T))
+g_new0(T, 1)
@@
type T;
@@
-g_try_malloc0(sizeof(T))
+g_try_new0(T, 1)
@@
type T;
expression n;
@@
-g_malloc(sizeof(T) * (n))
+g_new(T, n)
@@
type T;
expression n;
@@
-g_try_malloc(sizeof(T) * (n))
+g_try_new(T, n)
@@
type T;
expression n;
@@
-g_malloc0(sizeof(T) * (n))
+g_new0(T, n)
@@
type T;
expression n;
@@
-g_try_malloc0(sizeof(T) * (n))
+g_try_new0(T, n)
@@
type T;
expression p, n;
@@
-g_realloc(p, sizeof(T) * (n))
+g_renew(T, p, n)
@@
type T;
expression p, n;
@@
-g_try_realloc(p, sizeof(T) * (n))
+g_try_renew(T, p, n)
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2014-08-19 12:31:08 +04:00
|
|
|
sn_tab = g_new0(QEMUSnapshotInfo, snap_count);
|
2010-12-06 22:53:01 +03:00
|
|
|
|
2011-05-27 03:07:31 +04:00
|
|
|
for (i = 0; i < snap_count; i++) {
|
|
|
|
const char *snap_name = snaps[i].name;
|
2010-12-06 22:53:01 +03:00
|
|
|
|
|
|
|
sn_info = sn_tab + i;
|
|
|
|
pstrcpy(sn_info->id_str, sizeof(sn_info->id_str), snap_name);
|
|
|
|
pstrcpy(sn_info->name, sizeof(sn_info->name), snap_name);
|
|
|
|
|
2011-05-27 03:07:31 +04:00
|
|
|
sn_info->vm_state_size = snaps[i].size;
|
2010-12-06 22:53:01 +03:00
|
|
|
sn_info->date_sec = 0;
|
|
|
|
sn_info->date_nsec = 0;
|
|
|
|
sn_info->vm_clock_nsec = 0;
|
|
|
|
}
|
2011-05-27 03:07:31 +04:00
|
|
|
rbd_snap_list_end(snaps);
|
2013-09-25 18:00:48 +04:00
|
|
|
g_free(snaps);
|
2011-05-27 03:07:31 +04:00
|
|
|
|
2011-12-07 05:05:10 +04:00
|
|
|
done:
|
2010-12-06 22:53:01 +03:00
|
|
|
*psn_tab = sn_tab;
|
|
|
|
return snap_count;
|
|
|
|
}
|
|
|
|
|
2012-05-01 10:16:45 +04:00
|
|
|
#ifdef LIBRBD_SUPPORTS_DISCARD
|
|
|
|
static BlockDriverAIOCB* qemu_rbd_aio_discard(BlockDriverState *bs,
|
|
|
|
int64_t sector_num,
|
|
|
|
int nb_sectors,
|
|
|
|
BlockDriverCompletionFunc *cb,
|
|
|
|
void *opaque)
|
|
|
|
{
|
|
|
|
return rbd_start_aio(bs, sector_num, NULL, nb_sectors, cb, opaque,
|
|
|
|
RBD_AIO_DISCARD);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2014-06-05 13:21:04 +04:00
|
|
|
static QemuOptsList qemu_rbd_create_opts = {
|
|
|
|
.name = "rbd-create-opts",
|
|
|
|
.head = QTAILQ_HEAD_INITIALIZER(qemu_rbd_create_opts.head),
|
|
|
|
.desc = {
|
|
|
|
{
|
|
|
|
.name = BLOCK_OPT_SIZE,
|
|
|
|
.type = QEMU_OPT_SIZE,
|
|
|
|
.help = "Virtual disk size"
|
|
|
|
},
|
|
|
|
{
|
|
|
|
.name = BLOCK_OPT_CLUSTER_SIZE,
|
|
|
|
.type = QEMU_OPT_SIZE,
|
|
|
|
.help = "RBD object size"
|
|
|
|
},
|
|
|
|
{ /* end of list */ }
|
|
|
|
}
|
2010-12-06 22:53:01 +03:00
|
|
|
};
|
|
|
|
|
|
|
|
static BlockDriver bdrv_rbd = {
|
|
|
|
.format_name = "rbd",
|
|
|
|
.instance_size = sizeof(BDRVRBDState),
|
2013-09-24 19:07:04 +04:00
|
|
|
.bdrv_needs_filename = true,
|
2011-05-27 03:07:31 +04:00
|
|
|
.bdrv_file_open = qemu_rbd_open,
|
|
|
|
.bdrv_close = qemu_rbd_close,
|
2014-06-05 13:21:11 +04:00
|
|
|
.bdrv_create = qemu_rbd_create,
|
2013-06-28 14:47:42 +04:00
|
|
|
.bdrv_has_zero_init = bdrv_has_zero_init_1,
|
2011-05-27 03:07:31 +04:00
|
|
|
.bdrv_get_info = qemu_rbd_getinfo,
|
2014-06-05 13:21:04 +04:00
|
|
|
.create_opts = &qemu_rbd_create_opts,
|
2011-05-27 03:07:31 +04:00
|
|
|
.bdrv_getlength = qemu_rbd_getlength,
|
2011-05-27 03:07:34 +04:00
|
|
|
.bdrv_truncate = qemu_rbd_truncate,
|
2010-12-06 22:53:01 +03:00
|
|
|
.protocol_name = "rbd",
|
|
|
|
|
2011-11-10 20:25:44 +04:00
|
|
|
.bdrv_aio_readv = qemu_rbd_aio_readv,
|
|
|
|
.bdrv_aio_writev = qemu_rbd_aio_writev,
|
2013-03-30 00:03:23 +04:00
|
|
|
|
|
|
|
#ifdef LIBRBD_SUPPORTS_AIO_FLUSH
|
|
|
|
.bdrv_aio_flush = qemu_rbd_aio_flush,
|
|
|
|
#else
|
2011-11-10 20:25:44 +04:00
|
|
|
.bdrv_co_flush_to_disk = qemu_rbd_co_flush,
|
2013-03-30 00:03:23 +04:00
|
|
|
#endif
|
2010-12-06 22:53:01 +03:00
|
|
|
|
2012-05-01 10:16:45 +04:00
|
|
|
#ifdef LIBRBD_SUPPORTS_DISCARD
|
|
|
|
.bdrv_aio_discard = qemu_rbd_aio_discard,
|
|
|
|
#endif
|
|
|
|
|
2011-11-10 20:25:44 +04:00
|
|
|
.bdrv_snapshot_create = qemu_rbd_snap_create,
|
2012-01-11 23:53:52 +04:00
|
|
|
.bdrv_snapshot_delete = qemu_rbd_snap_remove,
|
2011-11-10 20:25:44 +04:00
|
|
|
.bdrv_snapshot_list = qemu_rbd_snap_list,
|
2012-01-11 23:53:52 +04:00
|
|
|
.bdrv_snapshot_goto = qemu_rbd_snap_rollback,
|
2010-12-06 22:53:01 +03:00
|
|
|
};
|
|
|
|
|
|
|
|
static void bdrv_rbd_init(void)
|
|
|
|
{
|
|
|
|
bdrv_register(&bdrv_rbd);
|
|
|
|
}
|
|
|
|
|
|
|
|
block_init(bdrv_rbd_init);
|