diff --git a/MAINTAINERS b/MAINTAINERS index 411da3cf57..978b7174f3 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1169,7 +1169,7 @@ S: Supported F: block/vmdk.c RBD -M: Josh Durgin +M: Josh Durgin M: Jeff Cody L: qemu-block@nongnu.org S: Supported diff --git a/block.c b/block.c index 5e80336e9f..d088ee02ff 100644 --- a/block.c +++ b/block.c @@ -1102,12 +1102,46 @@ static int bdrv_fill_options(QDict **options, const char **pfilename, return 0; } +static BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs, + BlockDriverState *child_bs, + const BdrvChildRole *child_role) +{ + BdrvChild *child = g_new(BdrvChild, 1); + *child = (BdrvChild) { + .bs = child_bs, + .role = child_role, + }; + + QLIST_INSERT_HEAD(&parent_bs->children, child, next); + + return child; +} + +static void bdrv_detach_child(BdrvChild *child) +{ + QLIST_REMOVE(child, next); + g_free(child); +} + +void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child) +{ + BlockDriverState *child_bs = child->bs; + + if (child->bs->inherits_from == parent) { + child->bs->inherits_from = NULL; + } + + bdrv_detach_child(child); + bdrv_unref(child_bs); +} + void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd) { if (bs->backing_hd) { assert(bs->backing_blocker); bdrv_op_unblock_all(bs->backing_hd, bs->backing_blocker); + bdrv_detach_child(bs->backing_child); } else if (backing_hd) { error_setg(&bs->backing_blocker, "node is used as backing hd of '%s'", @@ -1118,8 +1152,10 @@ void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd) if (!backing_hd) { error_free(bs->backing_blocker); bs->backing_blocker = NULL; + bs->backing_child = NULL; goto out; } + bs->backing_child = bdrv_attach_child(bs, backing_hd, &child_backing); bs->open_flags &= ~BDRV_O_NO_BACKING; pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename); pstrcpy(bs->backing_format, sizeof(bs->backing_format), @@ -1202,6 +1238,7 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp) error_free(local_err); goto free_exit; } + bdrv_set_backing_hd(bs, backing_hd); free_exit: @@ -1214,7 +1251,7 @@ free_exit: * device's options. * * If allow_none is true, no image will be opened if filename is false and no - * BlockdevRef is given. *pbs will remain unchanged and 0 will be returned. + * BlockdevRef is given. NULL will be returned, but errp remains unset. * * bdrev_key specifies the key for the image's BlockdevRef in the options QDict. * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict @@ -1222,6 +1259,56 @@ free_exit: * BlockdevRef. * * The BlockdevRef will be removed from the options QDict. + */ +BdrvChild *bdrv_open_child(const char *filename, + QDict *options, const char *bdref_key, + BlockDriverState* parent, + const BdrvChildRole *child_role, + bool allow_none, Error **errp) +{ + BdrvChild *c = NULL; + BlockDriverState *bs; + QDict *image_options; + int ret; + char *bdref_key_dot; + const char *reference; + + assert(child_role != NULL); + + bdref_key_dot = g_strdup_printf("%s.", bdref_key); + qdict_extract_subqdict(options, &image_options, bdref_key_dot); + g_free(bdref_key_dot); + + reference = qdict_get_try_str(options, bdref_key); + if (!filename && !reference && !qdict_size(image_options)) { + if (!allow_none) { + error_setg(errp, "A block device must be specified for \"%s\"", + bdref_key); + } + QDECREF(image_options); + goto done; + } + + bs = NULL; + ret = bdrv_open_inherit(&bs, filename, reference, image_options, 0, + parent, child_role, NULL, errp); + if (ret < 0) { + goto done; + } + + c = bdrv_attach_child(parent, bs, child_role); + +done: + qdict_del(options, bdref_key); + return c; +} + +/* + * This is a version of bdrv_open_child() that returns 0/-EINVAL instead of + * a BdrvChild object. + * + * If allow_none is true, no image will be opened if filename is false and no + * BlockdevRef is given. *pbs will remain unchanged and 0 will be returned. * * To conform with the behavior of bdrv_open(), *pbs has to be NULL. */ @@ -1230,37 +1317,24 @@ int bdrv_open_image(BlockDriverState **pbs, const char *filename, BlockDriverState* parent, const BdrvChildRole *child_role, bool allow_none, Error **errp) { - QDict *image_options; - int ret; - char *bdref_key_dot; - const char *reference; + Error *local_err = NULL; + BdrvChild *c; assert(pbs); assert(*pbs == NULL); - bdref_key_dot = g_strdup_printf("%s.", bdref_key); - qdict_extract_subqdict(options, &image_options, bdref_key_dot); - g_free(bdref_key_dot); - - reference = qdict_get_try_str(options, bdref_key); - if (!filename && !reference && !qdict_size(image_options)) { - if (allow_none) { - ret = 0; - } else { - error_setg(errp, "A block device must be specified for \"%s\"", - bdref_key); - ret = -EINVAL; - } - QDECREF(image_options); - goto done; + c = bdrv_open_child(filename, options, bdref_key, parent, child_role, + allow_none, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return -EINVAL; } - ret = bdrv_open_inherit(pbs, filename, reference, image_options, 0, - parent, child_role, NULL, errp); + if (c != NULL) { + *pbs = c->bs; + } -done: - qdict_del(options, bdref_key); - return ret; + return 0; } int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp) @@ -1328,19 +1402,6 @@ out: return ret; } -static void bdrv_attach_child(BlockDriverState *parent_bs, - BlockDriverState *child_bs, - const BdrvChildRole *child_role) -{ - BdrvChild *child = g_new(BdrvChild, 1); - *child = (BdrvChild) { - .bs = child_bs, - .role = child_role, - }; - - QLIST_INSERT_HEAD(&parent_bs->children, child, next); -} - /* * Opens a disk image (raw, qcow2, vmdk, ...) * @@ -1393,9 +1454,6 @@ static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename, return -ENODEV; } bdrv_ref(bs); - if (child_role) { - bdrv_attach_child(parent, bs, child_role); - } *pbs = bs; return 0; } @@ -1540,10 +1598,6 @@ static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename, goto close_and_fail; } - if (child_role) { - bdrv_attach_child(parent, bs, child_role); - } - QDECREF(options); *pbs = bs; return 0; @@ -1849,20 +1903,23 @@ void bdrv_close(BlockDriverState *bs) if (bs->drv) { BdrvChild *child, *next; - QLIST_FOREACH_SAFE(child, &bs->children, next, next) { - if (child->bs->inherits_from == bs) { - child->bs->inherits_from = NULL; - } - QLIST_REMOVE(child, next); - g_free(child); - } + bs->drv->bdrv_close(bs); if (bs->backing_hd) { BlockDriverState *backing_hd = bs->backing_hd; bdrv_set_backing_hd(bs, NULL); bdrv_unref(backing_hd); } - bs->drv->bdrv_close(bs); + + QLIST_FOREACH_SAFE(child, &bs->children, next, next) { + /* TODO Remove bdrv_unref() from drivers' close function and use + * bdrv_unref_child() here */ + if (child->bs->inherits_from == bs) { + child->bs->inherits_from = NULL; + } + bdrv_detach_child(child); + } + g_free(bs->opaque); bs->opaque = NULL; bs->drv = NULL; @@ -2116,7 +2173,6 @@ void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top) /* The contents of 'tmp' will become bs_top, as we are * swapping bs_new and bs_top contents. */ bdrv_set_backing_hd(bs_top, bs_new); - bdrv_attach_child(bs_top, bs_new, &child_backing); } static void bdrv_delete(BlockDriverState *bs) diff --git a/block/rbd.c b/block/rbd.c index fbe87e035b..a60a19d58d 100644 --- a/block/rbd.c +++ b/block/rbd.c @@ -74,25 +74,18 @@ typedef struct RBDAIOCB { QEMUIOVector *qiov; char *bounce; RBDAIOCmd cmd; - int64_t sector_num; int error; struct BDRVRBDState *s; - int status; } RBDAIOCB; typedef struct RADOSCB { - int rcbid; RBDAIOCB *acb; struct BDRVRBDState *s; - int done; int64_t size; char *buf; int64_t ret; } RADOSCB; -#define RBD_FD_READ 0 -#define RBD_FD_WRITE 1 - typedef struct BDRVRBDState { rados_t cluster; rados_ioctx_t io_ctx; @@ -235,7 +228,9 @@ static char *qemu_rbd_parse_clientname(const char *conf, char *clientname) return NULL; } -static int qemu_rbd_set_conf(rados_t cluster, const char *conf, Error **errp) +static int qemu_rbd_set_conf(rados_t cluster, const char *conf, + bool only_read_conf_file, + Error **errp) { char *p, *buf; char name[RBD_MAX_CONF_NAME_SIZE]; @@ -267,14 +262,18 @@ static int qemu_rbd_set_conf(rados_t cluster, const char *conf, Error **errp) qemu_rbd_unescape(value); if (strcmp(name, "conf") == 0) { - ret = rados_conf_read_file(cluster, value); - if (ret < 0) { - error_setg(errp, "error reading conf file %s", value); - break; + /* read the conf file alone, so it doesn't override more + specific settings for a particular device */ + if (only_read_conf_file) { + ret = rados_conf_read_file(cluster, value); + if (ret < 0) { + error_setg(errp, "error reading conf file %s", value); + break; + } } } else if (strcmp(name, "id") == 0) { /* ignore, this is parsed by qemu_rbd_parse_clientname() */ - } else { + } else if (!only_read_conf_file) { ret = rados_conf_set(cluster, name, value); if (ret < 0) { error_setg(errp, "invalid conf option %s", name); @@ -337,10 +336,15 @@ static int qemu_rbd_create(const char *filename, QemuOpts *opts, Error **errp) if (strstr(conf, "conf=") == NULL) { /* try default location, but ignore failure */ rados_conf_read_file(cluster, NULL); + } else if (conf[0] != '\0' && + qemu_rbd_set_conf(cluster, conf, true, &local_err) < 0) { + rados_shutdown(cluster); + error_propagate(errp, local_err); + return -EIO; } if (conf[0] != '\0' && - qemu_rbd_set_conf(cluster, conf, &local_err) < 0) { + qemu_rbd_set_conf(cluster, conf, false, &local_err) < 0) { rados_shutdown(cluster); error_propagate(errp, local_err); return -EIO; @@ -405,7 +409,6 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb) } qemu_vfree(acb->bounce); acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret)); - acb->status = 0; qemu_aio_unref(acb); } @@ -468,6 +471,23 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags, s->snap = g_strdup(snap_buf); } + if (strstr(conf, "conf=") == NULL) { + /* try default location, but ignore failure */ + rados_conf_read_file(s->cluster, NULL); + } else if (conf[0] != '\0') { + r = qemu_rbd_set_conf(s->cluster, conf, true, errp); + if (r < 0) { + goto failed_shutdown; + } + } + + if (conf[0] != '\0') { + r = qemu_rbd_set_conf(s->cluster, conf, false, errp); + if (r < 0) { + goto failed_shutdown; + } + } + /* * Fallback to more conservative semantics if setting cache * options fails. Ignore errors from setting rbd_cache because the @@ -481,18 +501,6 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags, rados_conf_set(s->cluster, "rbd_cache", "true"); } - if (strstr(conf, "conf=") == NULL) { - /* try default location, but ignore failure */ - rados_conf_read_file(s->cluster, NULL); - } - - if (conf[0] != '\0') { - r = qemu_rbd_set_conf(s->cluster, conf, errp); - if (r < 0) { - goto failed_shutdown; - } - } - r = rados_connect(s->cluster); if (r < 0) { error_setg(errp, "error connecting"); @@ -621,7 +629,6 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs, acb->error = 0; acb->s = s; acb->bh = NULL; - acb->status = -EINPROGRESS; if (cmd == RBD_AIO_WRITE) { qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size); @@ -633,7 +640,6 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs, size = nb_sectors * BDRV_SECTOR_SIZE; rcb = g_new(RADOSCB, 1); - rcb->done = 0; rcb->acb = acb; rcb->buf = buf; rcb->s = acb->s; diff --git a/hw/block/nvme.c b/hw/block/nvme.c index c6a6a0e49a..40d4880326 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -207,11 +207,23 @@ static void nvme_rw_cb(void *opaque, int ret) } else { req->status = NVME_INTERNAL_DEV_ERROR; } - - qemu_sglist_destroy(&req->qsg); + if (req->has_sg) { + qemu_sglist_destroy(&req->qsg); + } nvme_enqueue_req_completion(cq, req); } +static uint16_t nvme_flush(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd, + NvmeRequest *req) +{ + req->has_sg = false; + block_acct_start(blk_get_stats(n->conf.blk), &req->acct, 0, + BLOCK_ACCT_FLUSH); + req->aiocb = blk_aio_flush(n->conf.blk, nvme_rw_cb, req); + + return NVME_NO_COMPLETE; +} + static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd, NvmeRequest *req) { @@ -235,6 +247,7 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd, } assert((nlb << data_shift) == req->qsg.size); + req->has_sg = true; dma_acct_start(n->conf.blk, &req->acct, &req->qsg, is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ); req->aiocb = is_write ? @@ -256,7 +269,7 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) ns = &n->namespaces[nsid - 1]; switch (cmd->opcode) { case NVME_CMD_FLUSH: - return NVME_SUCCESS; + return nvme_flush(n, ns, cmd, req); case NVME_CMD_WRITE: case NVME_CMD_READ: return nvme_rw(n, ns, cmd, req); @@ -474,26 +487,32 @@ static uint16_t nvme_identify(NvmeCtrl *n, NvmeCmd *cmd) static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) { uint32_t dw10 = le32_to_cpu(cmd->cdw10); + uint32_t result; switch (dw10) { - case NVME_NUMBER_OF_QUEUES: - req->cqe.result = - cpu_to_le32((n->num_queues - 1) | ((n->num_queues - 1) << 16)); - break; case NVME_VOLATILE_WRITE_CACHE: - req->cqe.result = cpu_to_le32(1); + result = blk_enable_write_cache(n->conf.blk); + break; + case NVME_NUMBER_OF_QUEUES: + result = cpu_to_le32((n->num_queues - 1) | ((n->num_queues - 1) << 16)); break; default: return NVME_INVALID_FIELD | NVME_DNR; } + + req->cqe.result = result; return NVME_SUCCESS; } static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) { uint32_t dw10 = le32_to_cpu(cmd->cdw10); + uint32_t dw11 = le32_to_cpu(cmd->cdw11); switch (dw10) { + case NVME_VOLATILE_WRITE_CACHE: + blk_set_enable_write_cache(n->conf.blk, dw11 & 1); + break; case NVME_NUMBER_OF_QUEUES: req->cqe.result = cpu_to_le32((n->num_queues - 1) | ((n->num_queues - 1) << 16)); @@ -818,6 +837,9 @@ static int nvme_init(PCIDevice *pci_dev) id->psd[0].mp = cpu_to_le16(0x9c4); id->psd[0].enlat = cpu_to_le32(0x10); id->psd[0].exlat = cpu_to_le32(0x4); + if (blk_enable_write_cache(n->conf.blk)) { + id->vwc = 1; + } n->bar.cap = 0; NVME_CAP_SET_MQES(n->bar.cap, 0x7ff); diff --git a/hw/block/nvme.h b/hw/block/nvme.h index b6ccb655a6..bf3a3ccac8 100644 --- a/hw/block/nvme.h +++ b/hw/block/nvme.h @@ -638,6 +638,7 @@ typedef struct NvmeRequest { struct NvmeSQueue *sq; BlockAIOCB *aiocb; uint16_t status; + bool has_sg; NvmeCqe cqe; BlockAcctCookie acct; QEMUSGList qsg; diff --git a/include/block/block.h b/include/block/block.h index 06e4137008..37916f7208 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -12,6 +12,7 @@ /* block.c */ typedef struct BlockDriver BlockDriver; typedef struct BlockJob BlockJob; +typedef struct BdrvChild BdrvChild; typedef struct BdrvChildRole BdrvChildRole; typedef struct BlockDriverInfo { @@ -208,6 +209,11 @@ int bdrv_open_image(BlockDriverState **pbs, const char *filename, QDict *options, const char *bdref_key, BlockDriverState* parent, const BdrvChildRole *child_role, bool allow_none, Error **errp); +BdrvChild *bdrv_open_child(const char *filename, + QDict *options, const char *bdref_key, + BlockDriverState* parent, + const BdrvChildRole *child_role, + bool allow_none, Error **errp); void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd); int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp); int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp); @@ -507,6 +513,7 @@ void bdrv_disable_copy_on_read(BlockDriverState *bs); void bdrv_ref(BlockDriverState *bs); void bdrv_unref(BlockDriverState *bs); +void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child); bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp); void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason); diff --git a/include/block/block_int.h b/include/block/block_int.h index 8996baf2f0..14ad4c334b 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -335,11 +335,11 @@ struct BdrvChildRole { extern const BdrvChildRole child_file; extern const BdrvChildRole child_format; -typedef struct BdrvChild { +struct BdrvChild { BlockDriverState *bs; const BdrvChildRole *role; QLIST_ENTRY(BdrvChild) next; -} BdrvChild; +}; /* * Note: the function bdrv_append() copies and swaps contents of @@ -379,6 +379,7 @@ struct BlockDriverState { char exact_filename[PATH_MAX]; BlockDriverState *backing_hd; + BdrvChild *backing_child; BlockDriverState *file; NotifierList close_notifiers;