Block layer patches for 2.4.0-rc1

-----BEGIN PGP SIGNATURE-----
 Version: GnuPG v2.0.22 (GNU/Linux)
 
 iQIcBAABAgAGBQJVpSeXAAoJEH8JsnLIjy/WhyQP/3w/Nq1MnsT14Ak79LYsOd4C
 PpwgZF9Z3RZZoe+9bBO9RWzP1y/Pmxnui73A+1d+B4+moKswbvJW4lwx1aaP3Igq
 9XzMA3/xwqdx5kGX7PDSZkdVZC513hI8rIxV9Ly+yE3h729dTHlrULfrgO8MWieQ
 zoWQAMk55tDmazuKKHXAHRphicWvg71+xOHCsJt/MMz/5u8ShHssoIqNXGj8G28m
 AZu2tzkBF4c6Y4b1aghsDpMnc269wD/4UNegpkTz66S9FpSix/f4GOtLGmwXJSrg
 zz4SDMNx7b/U/mZzV94LEd4GP2hEI2tzIKvkd3RHaeyVVlLqnxCHA4W0lpoTO4Ud
 QR8aWjFLIq4HqFrY3JC1EhpGMF0M3PAXMqtau1bCmLOr/+TZWazP0H/WJW65MCwM
 5ertw87apRsIVPUhV22fKNUXary+GE7OHKEklUopFzUfgzzMq9uSTLbBAHX7j2jA
 FDl9NqbBc99FYe8KAwZwjaASavelodc7rnw6vxtWL1ERJPVqbuPpRZAlgGDAuoEN
 DWqnY+LQhPJNLNLeRTVvnWxAIIFBJRJUEF7Ekwabsz8CQ/8pq9XrX8KfStx3TUAm
 OHcw7XJqyirklmI987LuBADtE5SIK4Jbpg8m5UgS+hDqIRSt5WFrzS6Depu7JA/F
 WFfiNg3Y4Kkw236I8UHs
 =zRxM
 -----END PGP SIGNATURE-----

Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging

Block layer patches for 2.4.0-rc1

# gpg: Signature made Tue Jul 14 16:15:35 2015 BST using RSA key ID C88F2FD6
# gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>"

* remotes/kevin/tags/for-upstream:
  rbd: fix ceph settings precedence
  rbd: make qemu's cache setting override any ceph setting
  MAINTAINERS: update email address
  rbd: remove unused constants and fields
  block: Fix backing file child when modifying graph
  block: Reorder cleanups in bdrv_close()
  block: Introduce bdrv_unref_child()
  block: Introduce bdrv_open_child()
  block: Move bdrv_attach_child() calls up the call chain
  nvme: properly report volatile write caches
  nvme: implement the Flush command

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Peter Maydell 2015-07-14 16:51:44 +01:00
commit f3947986d9
7 changed files with 187 additions and 94 deletions

View File

@ -1169,7 +1169,7 @@ S: Supported
F: block/vmdk.c
RBD
M: Josh Durgin <josh.durgin@inktank.com>
M: Josh Durgin <jdurgin@redhat.com>
M: Jeff Cody <jcody@redhat.com>
L: qemu-block@nongnu.org
S: Supported

164
block.c
View File

@ -1102,12 +1102,46 @@ static int bdrv_fill_options(QDict **options, const char **pfilename,
return 0;
}
static BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
BlockDriverState *child_bs,
const BdrvChildRole *child_role)
{
BdrvChild *child = g_new(BdrvChild, 1);
*child = (BdrvChild) {
.bs = child_bs,
.role = child_role,
};
QLIST_INSERT_HEAD(&parent_bs->children, child, next);
return child;
}
static void bdrv_detach_child(BdrvChild *child)
{
QLIST_REMOVE(child, next);
g_free(child);
}
void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child)
{
BlockDriverState *child_bs = child->bs;
if (child->bs->inherits_from == parent) {
child->bs->inherits_from = NULL;
}
bdrv_detach_child(child);
bdrv_unref(child_bs);
}
void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd)
{
if (bs->backing_hd) {
assert(bs->backing_blocker);
bdrv_op_unblock_all(bs->backing_hd, bs->backing_blocker);
bdrv_detach_child(bs->backing_child);
} else if (backing_hd) {
error_setg(&bs->backing_blocker,
"node is used as backing hd of '%s'",
@ -1118,8 +1152,10 @@ void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd)
if (!backing_hd) {
error_free(bs->backing_blocker);
bs->backing_blocker = NULL;
bs->backing_child = NULL;
goto out;
}
bs->backing_child = bdrv_attach_child(bs, backing_hd, &child_backing);
bs->open_flags &= ~BDRV_O_NO_BACKING;
pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename);
pstrcpy(bs->backing_format, sizeof(bs->backing_format),
@ -1202,6 +1238,7 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp)
error_free(local_err);
goto free_exit;
}
bdrv_set_backing_hd(bs, backing_hd);
free_exit:
@ -1214,7 +1251,7 @@ free_exit:
* device's options.
*
* If allow_none is true, no image will be opened if filename is false and no
* BlockdevRef is given. *pbs will remain unchanged and 0 will be returned.
* BlockdevRef is given. NULL will be returned, but errp remains unset.
*
* bdrev_key specifies the key for the image's BlockdevRef in the options QDict.
* That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
@ -1222,6 +1259,56 @@ free_exit:
* BlockdevRef.
*
* The BlockdevRef will be removed from the options QDict.
*/
BdrvChild *bdrv_open_child(const char *filename,
QDict *options, const char *bdref_key,
BlockDriverState* parent,
const BdrvChildRole *child_role,
bool allow_none, Error **errp)
{
BdrvChild *c = NULL;
BlockDriverState *bs;
QDict *image_options;
int ret;
char *bdref_key_dot;
const char *reference;
assert(child_role != NULL);
bdref_key_dot = g_strdup_printf("%s.", bdref_key);
qdict_extract_subqdict(options, &image_options, bdref_key_dot);
g_free(bdref_key_dot);
reference = qdict_get_try_str(options, bdref_key);
if (!filename && !reference && !qdict_size(image_options)) {
if (!allow_none) {
error_setg(errp, "A block device must be specified for \"%s\"",
bdref_key);
}
QDECREF(image_options);
goto done;
}
bs = NULL;
ret = bdrv_open_inherit(&bs, filename, reference, image_options, 0,
parent, child_role, NULL, errp);
if (ret < 0) {
goto done;
}
c = bdrv_attach_child(parent, bs, child_role);
done:
qdict_del(options, bdref_key);
return c;
}
/*
* This is a version of bdrv_open_child() that returns 0/-EINVAL instead of
* a BdrvChild object.
*
* If allow_none is true, no image will be opened if filename is false and no
* BlockdevRef is given. *pbs will remain unchanged and 0 will be returned.
*
* To conform with the behavior of bdrv_open(), *pbs has to be NULL.
*/
@ -1230,37 +1317,24 @@ int bdrv_open_image(BlockDriverState **pbs, const char *filename,
BlockDriverState* parent, const BdrvChildRole *child_role,
bool allow_none, Error **errp)
{
QDict *image_options;
int ret;
char *bdref_key_dot;
const char *reference;
Error *local_err = NULL;
BdrvChild *c;
assert(pbs);
assert(*pbs == NULL);
bdref_key_dot = g_strdup_printf("%s.", bdref_key);
qdict_extract_subqdict(options, &image_options, bdref_key_dot);
g_free(bdref_key_dot);
reference = qdict_get_try_str(options, bdref_key);
if (!filename && !reference && !qdict_size(image_options)) {
if (allow_none) {
ret = 0;
} else {
error_setg(errp, "A block device must be specified for \"%s\"",
bdref_key);
ret = -EINVAL;
}
QDECREF(image_options);
goto done;
c = bdrv_open_child(filename, options, bdref_key, parent, child_role,
allow_none, &local_err);
if (local_err) {
error_propagate(errp, local_err);
return -EINVAL;
}
ret = bdrv_open_inherit(pbs, filename, reference, image_options, 0,
parent, child_role, NULL, errp);
if (c != NULL) {
*pbs = c->bs;
}
done:
qdict_del(options, bdref_key);
return ret;
return 0;
}
int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp)
@ -1328,19 +1402,6 @@ out:
return ret;
}
static void bdrv_attach_child(BlockDriverState *parent_bs,
BlockDriverState *child_bs,
const BdrvChildRole *child_role)
{
BdrvChild *child = g_new(BdrvChild, 1);
*child = (BdrvChild) {
.bs = child_bs,
.role = child_role,
};
QLIST_INSERT_HEAD(&parent_bs->children, child, next);
}
/*
* Opens a disk image (raw, qcow2, vmdk, ...)
*
@ -1393,9 +1454,6 @@ static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
return -ENODEV;
}
bdrv_ref(bs);
if (child_role) {
bdrv_attach_child(parent, bs, child_role);
}
*pbs = bs;
return 0;
}
@ -1540,10 +1598,6 @@ static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
goto close_and_fail;
}
if (child_role) {
bdrv_attach_child(parent, bs, child_role);
}
QDECREF(options);
*pbs = bs;
return 0;
@ -1849,20 +1903,23 @@ void bdrv_close(BlockDriverState *bs)
if (bs->drv) {
BdrvChild *child, *next;
QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
if (child->bs->inherits_from == bs) {
child->bs->inherits_from = NULL;
}
QLIST_REMOVE(child, next);
g_free(child);
}
bs->drv->bdrv_close(bs);
if (bs->backing_hd) {
BlockDriverState *backing_hd = bs->backing_hd;
bdrv_set_backing_hd(bs, NULL);
bdrv_unref(backing_hd);
}
bs->drv->bdrv_close(bs);
QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
/* TODO Remove bdrv_unref() from drivers' close function and use
* bdrv_unref_child() here */
if (child->bs->inherits_from == bs) {
child->bs->inherits_from = NULL;
}
bdrv_detach_child(child);
}
g_free(bs->opaque);
bs->opaque = NULL;
bs->drv = NULL;
@ -2116,7 +2173,6 @@ void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
/* The contents of 'tmp' will become bs_top, as we are
* swapping bs_new and bs_top contents. */
bdrv_set_backing_hd(bs_top, bs_new);
bdrv_attach_child(bs_top, bs_new, &child_backing);
}
static void bdrv_delete(BlockDriverState *bs)

View File

@ -74,25 +74,18 @@ typedef struct RBDAIOCB {
QEMUIOVector *qiov;
char *bounce;
RBDAIOCmd cmd;
int64_t sector_num;
int error;
struct BDRVRBDState *s;
int status;
} RBDAIOCB;
typedef struct RADOSCB {
int rcbid;
RBDAIOCB *acb;
struct BDRVRBDState *s;
int done;
int64_t size;
char *buf;
int64_t ret;
} RADOSCB;
#define RBD_FD_READ 0
#define RBD_FD_WRITE 1
typedef struct BDRVRBDState {
rados_t cluster;
rados_ioctx_t io_ctx;
@ -235,7 +228,9 @@ static char *qemu_rbd_parse_clientname(const char *conf, char *clientname)
return NULL;
}
static int qemu_rbd_set_conf(rados_t cluster, const char *conf, Error **errp)
static int qemu_rbd_set_conf(rados_t cluster, const char *conf,
bool only_read_conf_file,
Error **errp)
{
char *p, *buf;
char name[RBD_MAX_CONF_NAME_SIZE];
@ -267,14 +262,18 @@ static int qemu_rbd_set_conf(rados_t cluster, const char *conf, Error **errp)
qemu_rbd_unescape(value);
if (strcmp(name, "conf") == 0) {
/* read the conf file alone, so it doesn't override more
specific settings for a particular device */
if (only_read_conf_file) {
ret = rados_conf_read_file(cluster, value);
if (ret < 0) {
error_setg(errp, "error reading conf file %s", value);
break;
}
}
} else if (strcmp(name, "id") == 0) {
/* ignore, this is parsed by qemu_rbd_parse_clientname() */
} else {
} else if (!only_read_conf_file) {
ret = rados_conf_set(cluster, name, value);
if (ret < 0) {
error_setg(errp, "invalid conf option %s", name);
@ -337,10 +336,15 @@ static int qemu_rbd_create(const char *filename, QemuOpts *opts, Error **errp)
if (strstr(conf, "conf=") == NULL) {
/* try default location, but ignore failure */
rados_conf_read_file(cluster, NULL);
} else if (conf[0] != '\0' &&
qemu_rbd_set_conf(cluster, conf, true, &local_err) < 0) {
rados_shutdown(cluster);
error_propagate(errp, local_err);
return -EIO;
}
if (conf[0] != '\0' &&
qemu_rbd_set_conf(cluster, conf, &local_err) < 0) {
qemu_rbd_set_conf(cluster, conf, false, &local_err) < 0) {
rados_shutdown(cluster);
error_propagate(errp, local_err);
return -EIO;
@ -405,7 +409,6 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb)
}
qemu_vfree(acb->bounce);
acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret));
acb->status = 0;
qemu_aio_unref(acb);
}
@ -468,6 +471,23 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
s->snap = g_strdup(snap_buf);
}
if (strstr(conf, "conf=") == NULL) {
/* try default location, but ignore failure */
rados_conf_read_file(s->cluster, NULL);
} else if (conf[0] != '\0') {
r = qemu_rbd_set_conf(s->cluster, conf, true, errp);
if (r < 0) {
goto failed_shutdown;
}
}
if (conf[0] != '\0') {
r = qemu_rbd_set_conf(s->cluster, conf, false, errp);
if (r < 0) {
goto failed_shutdown;
}
}
/*
* Fallback to more conservative semantics if setting cache
* options fails. Ignore errors from setting rbd_cache because the
@ -481,18 +501,6 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
rados_conf_set(s->cluster, "rbd_cache", "true");
}
if (strstr(conf, "conf=") == NULL) {
/* try default location, but ignore failure */
rados_conf_read_file(s->cluster, NULL);
}
if (conf[0] != '\0') {
r = qemu_rbd_set_conf(s->cluster, conf, errp);
if (r < 0) {
goto failed_shutdown;
}
}
r = rados_connect(s->cluster);
if (r < 0) {
error_setg(errp, "error connecting");
@ -621,7 +629,6 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
acb->error = 0;
acb->s = s;
acb->bh = NULL;
acb->status = -EINPROGRESS;
if (cmd == RBD_AIO_WRITE) {
qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
@ -633,7 +640,6 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
size = nb_sectors * BDRV_SECTOR_SIZE;
rcb = g_new(RADOSCB, 1);
rcb->done = 0;
rcb->acb = acb;
rcb->buf = buf;
rcb->s = acb->s;

View File

@ -207,11 +207,23 @@ static void nvme_rw_cb(void *opaque, int ret)
} else {
req->status = NVME_INTERNAL_DEV_ERROR;
}
if (req->has_sg) {
qemu_sglist_destroy(&req->qsg);
}
nvme_enqueue_req_completion(cq, req);
}
static uint16_t nvme_flush(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
NvmeRequest *req)
{
req->has_sg = false;
block_acct_start(blk_get_stats(n->conf.blk), &req->acct, 0,
BLOCK_ACCT_FLUSH);
req->aiocb = blk_aio_flush(n->conf.blk, nvme_rw_cb, req);
return NVME_NO_COMPLETE;
}
static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
NvmeRequest *req)
{
@ -235,6 +247,7 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
}
assert((nlb << data_shift) == req->qsg.size);
req->has_sg = true;
dma_acct_start(n->conf.blk, &req->acct, &req->qsg,
is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ);
req->aiocb = is_write ?
@ -256,7 +269,7 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
ns = &n->namespaces[nsid - 1];
switch (cmd->opcode) {
case NVME_CMD_FLUSH:
return NVME_SUCCESS;
return nvme_flush(n, ns, cmd, req);
case NVME_CMD_WRITE:
case NVME_CMD_READ:
return nvme_rw(n, ns, cmd, req);
@ -474,26 +487,32 @@ static uint16_t nvme_identify(NvmeCtrl *n, NvmeCmd *cmd)
static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
{
uint32_t dw10 = le32_to_cpu(cmd->cdw10);
uint32_t result;
switch (dw10) {
case NVME_NUMBER_OF_QUEUES:
req->cqe.result =
cpu_to_le32((n->num_queues - 1) | ((n->num_queues - 1) << 16));
break;
case NVME_VOLATILE_WRITE_CACHE:
req->cqe.result = cpu_to_le32(1);
result = blk_enable_write_cache(n->conf.blk);
break;
case NVME_NUMBER_OF_QUEUES:
result = cpu_to_le32((n->num_queues - 1) | ((n->num_queues - 1) << 16));
break;
default:
return NVME_INVALID_FIELD | NVME_DNR;
}
req->cqe.result = result;
return NVME_SUCCESS;
}
static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
{
uint32_t dw10 = le32_to_cpu(cmd->cdw10);
uint32_t dw11 = le32_to_cpu(cmd->cdw11);
switch (dw10) {
case NVME_VOLATILE_WRITE_CACHE:
blk_set_enable_write_cache(n->conf.blk, dw11 & 1);
break;
case NVME_NUMBER_OF_QUEUES:
req->cqe.result =
cpu_to_le32((n->num_queues - 1) | ((n->num_queues - 1) << 16));
@ -818,6 +837,9 @@ static int nvme_init(PCIDevice *pci_dev)
id->psd[0].mp = cpu_to_le16(0x9c4);
id->psd[0].enlat = cpu_to_le32(0x10);
id->psd[0].exlat = cpu_to_le32(0x4);
if (blk_enable_write_cache(n->conf.blk)) {
id->vwc = 1;
}
n->bar.cap = 0;
NVME_CAP_SET_MQES(n->bar.cap, 0x7ff);

View File

@ -638,6 +638,7 @@ typedef struct NvmeRequest {
struct NvmeSQueue *sq;
BlockAIOCB *aiocb;
uint16_t status;
bool has_sg;
NvmeCqe cqe;
BlockAcctCookie acct;
QEMUSGList qsg;

View File

@ -12,6 +12,7 @@
/* block.c */
typedef struct BlockDriver BlockDriver;
typedef struct BlockJob BlockJob;
typedef struct BdrvChild BdrvChild;
typedef struct BdrvChildRole BdrvChildRole;
typedef struct BlockDriverInfo {
@ -208,6 +209,11 @@ int bdrv_open_image(BlockDriverState **pbs, const char *filename,
QDict *options, const char *bdref_key,
BlockDriverState* parent, const BdrvChildRole *child_role,
bool allow_none, Error **errp);
BdrvChild *bdrv_open_child(const char *filename,
QDict *options, const char *bdref_key,
BlockDriverState* parent,
const BdrvChildRole *child_role,
bool allow_none, Error **errp);
void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd);
int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp);
int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp);
@ -507,6 +513,7 @@ void bdrv_disable_copy_on_read(BlockDriverState *bs);
void bdrv_ref(BlockDriverState *bs);
void bdrv_unref(BlockDriverState *bs);
void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child);
bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp);
void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason);

View File

@ -335,11 +335,11 @@ struct BdrvChildRole {
extern const BdrvChildRole child_file;
extern const BdrvChildRole child_format;
typedef struct BdrvChild {
struct BdrvChild {
BlockDriverState *bs;
const BdrvChildRole *role;
QLIST_ENTRY(BdrvChild) next;
} BdrvChild;
};
/*
* Note: the function bdrv_append() copies and swaps contents of
@ -379,6 +379,7 @@ struct BlockDriverState {
char exact_filename[PATH_MAX];
BlockDriverState *backing_hd;
BdrvChild *backing_child;
BlockDriverState *file;
NotifierList close_notifiers;