2013-06-04 19:17:10 +04:00
|
|
|
/*
|
|
|
|
* QEMU NVM Express Controller
|
|
|
|
*
|
|
|
|
* Copyright (c) 2012, Intel Corporation
|
|
|
|
*
|
|
|
|
* Written by Keith Busch <keith.busch@intel.com>
|
|
|
|
*
|
|
|
|
* This code is licensed under the GNU GPL v2 or later.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/**
|
2017-05-16 22:10:59 +03:00
|
|
|
* Reference Specs: http://www.nvmexpress.org, 1.2, 1.1, 1.0e
|
2013-06-04 19:17:10 +04:00
|
|
|
*
|
|
|
|
* http://www.nvmexpress.org/resources/
|
|
|
|
*/
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Usage: add options:
|
|
|
|
* -drive file=<file>,if=none,id=<drive_id>
|
2017-05-16 22:10:59 +03:00
|
|
|
* -device nvme,drive=<drive_id>,serial=<serial>,id=<id[optional]>, \
|
2018-06-26 04:44:56 +03:00
|
|
|
* cmb_size_mb=<cmb_size_mb[optional]>, \
|
|
|
|
* num_queues=<N[optional]>
|
2017-05-16 22:10:59 +03:00
|
|
|
*
|
|
|
|
* Note cmb_size_mb denotes size of CMB in MB. CMB is assumed to be at
|
2017-06-13 13:08:35 +03:00
|
|
|
* offset 0 in BAR2 and supports only WDS, RDS and SQS for now.
|
2013-06-04 19:17:10 +04:00
|
|
|
*/
|
|
|
|
|
2016-01-18 21:01:42 +03:00
|
|
|
#include "qemu/osdep.h"
|
2018-06-25 15:42:05 +03:00
|
|
|
#include "qemu/units.h"
|
2016-06-22 20:11:19 +03:00
|
|
|
#include "hw/block/block.h"
|
|
|
|
#include "hw/hw.h"
|
|
|
|
#include "hw/pci/msix.h"
|
|
|
|
#include "hw/pci/pci.h"
|
2014-10-07 12:00:34 +04:00
|
|
|
#include "sysemu/sysemu.h"
|
include/qemu/osdep.h: Don't include qapi/error.h
Commit 57cb38b included qapi/error.h into qemu/osdep.h to get the
Error typedef. Since then, we've moved to include qemu/osdep.h
everywhere. Its file comment explains: "To avoid getting into
possible circular include dependencies, this file should not include
any other QEMU headers, with the exceptions of config-host.h,
compiler.h, os-posix.h and os-win32.h, all of which are doing a
similar job to this file and are under similar constraints."
qapi/error.h doesn't do a similar job, and it doesn't adhere to
similar constraints: it includes qapi-types.h. That's in excess of
100KiB of crap most .c files don't actually need.
Add the typedef to qemu/typedefs.h, and include that instead of
qapi/error.h. Include qapi/error.h in .c files that need it and don't
get it now. Include qapi-types.h in qom/object.h for uint16List.
Update scripts/clean-includes accordingly. Update it further to match
reality: replace config.h by config-target.h, add sysemu/os-posix.h,
sysemu/os-win32.h. Update the list of includes in the qemu/osdep.h
comment quoted above similarly.
This reduces the number of objects depending on qapi/error.h from "all
of them" to less than a third. Unfortunately, the number depending on
qapi-types.h shrinks only a little. More work is needed for that one.
Signed-off-by: Markus Armbruster <armbru@redhat.com>
[Fix compilation without the spice devel packages. - Paolo]
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-03-14 11:01:28 +03:00
|
|
|
#include "qapi/error.h"
|
2014-10-07 12:00:34 +04:00
|
|
|
#include "qapi/visitor.h"
|
2014-10-07 15:59:18 +04:00
|
|
|
#include "sysemu/block-backend.h"
|
2013-06-04 19:17:10 +04:00
|
|
|
|
2017-11-03 16:37:53 +03:00
|
|
|
#include "qemu/log.h"
|
2018-05-29 02:27:13 +03:00
|
|
|
#include "qemu/cutils.h"
|
2017-11-03 16:37:53 +03:00
|
|
|
#include "trace.h"
|
2013-06-04 19:17:10 +04:00
|
|
|
#include "nvme.h"
|
|
|
|
|
2017-11-03 16:37:53 +03:00
|
|
|
#define NVME_GUEST_ERR(trace, fmt, ...) \
|
|
|
|
do { \
|
|
|
|
(trace_##trace)(__VA_ARGS__); \
|
|
|
|
qemu_log_mask(LOG_GUEST_ERROR, #trace \
|
|
|
|
" in %s: " fmt "\n", __func__, ## __VA_ARGS__); \
|
|
|
|
} while (0)
|
|
|
|
|
2013-06-04 19:17:10 +04:00
|
|
|
static void nvme_process_sq(void *opaque);
|
|
|
|
|
2017-05-16 22:10:59 +03:00
|
|
|
static void nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size)
|
|
|
|
{
|
|
|
|
if (n->cmbsz && addr >= n->ctrl_mem.addr &&
|
|
|
|
addr < (n->ctrl_mem.addr + int128_get64(n->ctrl_mem.size))) {
|
|
|
|
memcpy(buf, (void *)&n->cmbuf[addr - n->ctrl_mem.addr], size);
|
|
|
|
} else {
|
|
|
|
pci_dma_read(&n->parent_obj, addr, buf, size);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-06-04 19:17:10 +04:00
|
|
|
static int nvme_check_sqid(NvmeCtrl *n, uint16_t sqid)
|
|
|
|
{
|
|
|
|
return sqid < n->num_queues && n->sq[sqid] != NULL ? 0 : -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int nvme_check_cqid(NvmeCtrl *n, uint16_t cqid)
|
|
|
|
{
|
|
|
|
return cqid < n->num_queues && n->cq[cqid] != NULL ? 0 : -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void nvme_inc_cq_tail(NvmeCQueue *cq)
|
|
|
|
{
|
|
|
|
cq->tail++;
|
|
|
|
if (cq->tail >= cq->size) {
|
|
|
|
cq->tail = 0;
|
|
|
|
cq->phase = !cq->phase;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void nvme_inc_sq_head(NvmeSQueue *sq)
|
|
|
|
{
|
|
|
|
sq->head = (sq->head + 1) % sq->size;
|
|
|
|
}
|
|
|
|
|
|
|
|
static uint8_t nvme_cq_full(NvmeCQueue *cq)
|
|
|
|
{
|
|
|
|
return (cq->tail + 1) % cq->size == cq->head;
|
|
|
|
}
|
|
|
|
|
|
|
|
static uint8_t nvme_sq_empty(NvmeSQueue *sq)
|
|
|
|
{
|
|
|
|
return sq->head == sq->tail;
|
|
|
|
}
|
|
|
|
|
2017-12-18 08:00:43 +03:00
|
|
|
static void nvme_irq_check(NvmeCtrl *n)
|
|
|
|
{
|
|
|
|
if (msix_enabled(&(n->parent_obj))) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
if (~n->bar.intms & n->irq_status) {
|
|
|
|
pci_irq_assert(&n->parent_obj);
|
|
|
|
} else {
|
|
|
|
pci_irq_deassert(&n->parent_obj);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void nvme_irq_assert(NvmeCtrl *n, NvmeCQueue *cq)
|
2013-06-04 19:17:10 +04:00
|
|
|
{
|
|
|
|
if (cq->irq_enabled) {
|
|
|
|
if (msix_enabled(&(n->parent_obj))) {
|
2017-11-03 16:37:53 +03:00
|
|
|
trace_nvme_irq_msix(cq->vector);
|
2013-06-04 19:17:10 +04:00
|
|
|
msix_notify(&(n->parent_obj), cq->vector);
|
|
|
|
} else {
|
2017-11-03 16:37:53 +03:00
|
|
|
trace_nvme_irq_pin();
|
2017-12-18 08:00:43 +03:00
|
|
|
assert(cq->cqid < 64);
|
|
|
|
n->irq_status |= 1 << cq->cqid;
|
|
|
|
nvme_irq_check(n);
|
2013-06-04 19:17:10 +04:00
|
|
|
}
|
2017-11-03 16:37:53 +03:00
|
|
|
} else {
|
|
|
|
trace_nvme_irq_masked();
|
2013-06-04 19:17:10 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-12-18 08:00:43 +03:00
|
|
|
static void nvme_irq_deassert(NvmeCtrl *n, NvmeCQueue *cq)
|
|
|
|
{
|
|
|
|
if (cq->irq_enabled) {
|
|
|
|
if (msix_enabled(&(n->parent_obj))) {
|
|
|
|
return;
|
|
|
|
} else {
|
|
|
|
assert(cq->cqid < 64);
|
|
|
|
n->irq_status &= ~(1 << cq->cqid);
|
|
|
|
nvme_irq_check(n);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-06-13 13:08:35 +03:00
|
|
|
static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1,
|
|
|
|
uint64_t prp2, uint32_t len, NvmeCtrl *n)
|
2013-06-04 19:17:10 +04:00
|
|
|
{
|
|
|
|
hwaddr trans_len = n->page_size - (prp1 % n->page_size);
|
|
|
|
trans_len = MIN(len, trans_len);
|
|
|
|
int num_prps = (len >> n->page_bits) + 1;
|
|
|
|
|
2017-11-03 16:37:53 +03:00
|
|
|
if (unlikely(!prp1)) {
|
|
|
|
trace_nvme_err_invalid_prp();
|
2013-06-04 19:17:10 +04:00
|
|
|
return NVME_INVALID_FIELD | NVME_DNR;
|
2017-06-13 13:08:35 +03:00
|
|
|
} else if (n->cmbsz && prp1 >= n->ctrl_mem.addr &&
|
|
|
|
prp1 < n->ctrl_mem.addr + int128_get64(n->ctrl_mem.size)) {
|
|
|
|
qsg->nsg = 0;
|
|
|
|
qemu_iovec_init(iov, num_prps);
|
|
|
|
qemu_iovec_add(iov, (void *)&n->cmbuf[prp1 - n->ctrl_mem.addr], trans_len);
|
|
|
|
} else {
|
|
|
|
pci_dma_sglist_init(qsg, &n->parent_obj, num_prps);
|
|
|
|
qemu_sglist_add(qsg, prp1, trans_len);
|
2013-06-04 19:17:10 +04:00
|
|
|
}
|
|
|
|
len -= trans_len;
|
|
|
|
if (len) {
|
2017-11-03 16:37:53 +03:00
|
|
|
if (unlikely(!prp2)) {
|
|
|
|
trace_nvme_err_invalid_prp2_missing();
|
2013-06-04 19:17:10 +04:00
|
|
|
goto unmap;
|
|
|
|
}
|
|
|
|
if (len > n->page_size) {
|
|
|
|
uint64_t prp_list[n->max_prp_ents];
|
|
|
|
uint32_t nents, prp_trans;
|
|
|
|
int i = 0;
|
|
|
|
|
|
|
|
nents = (len + n->page_size - 1) >> n->page_bits;
|
|
|
|
prp_trans = MIN(n->max_prp_ents, nents) * sizeof(uint64_t);
|
2017-06-13 13:08:35 +03:00
|
|
|
nvme_addr_read(n, prp2, (void *)prp_list, prp_trans);
|
2013-06-04 19:17:10 +04:00
|
|
|
while (len != 0) {
|
|
|
|
uint64_t prp_ent = le64_to_cpu(prp_list[i]);
|
|
|
|
|
|
|
|
if (i == n->max_prp_ents - 1 && len > n->page_size) {
|
2017-11-03 16:37:53 +03:00
|
|
|
if (unlikely(!prp_ent || prp_ent & (n->page_size - 1))) {
|
|
|
|
trace_nvme_err_invalid_prplist_ent(prp_ent);
|
2013-06-04 19:17:10 +04:00
|
|
|
goto unmap;
|
|
|
|
}
|
|
|
|
|
|
|
|
i = 0;
|
|
|
|
nents = (len + n->page_size - 1) >> n->page_bits;
|
|
|
|
prp_trans = MIN(n->max_prp_ents, nents) * sizeof(uint64_t);
|
2017-06-13 13:08:35 +03:00
|
|
|
nvme_addr_read(n, prp_ent, (void *)prp_list,
|
2013-06-04 19:17:10 +04:00
|
|
|
prp_trans);
|
|
|
|
prp_ent = le64_to_cpu(prp_list[i]);
|
|
|
|
}
|
|
|
|
|
2017-11-03 16:37:53 +03:00
|
|
|
if (unlikely(!prp_ent || prp_ent & (n->page_size - 1))) {
|
|
|
|
trace_nvme_err_invalid_prplist_ent(prp_ent);
|
2013-06-04 19:17:10 +04:00
|
|
|
goto unmap;
|
|
|
|
}
|
|
|
|
|
|
|
|
trans_len = MIN(len, n->page_size);
|
2017-06-13 13:08:35 +03:00
|
|
|
if (qsg->nsg){
|
|
|
|
qemu_sglist_add(qsg, prp_ent, trans_len);
|
|
|
|
} else {
|
|
|
|
qemu_iovec_add(iov, (void *)&n->cmbuf[prp_ent - n->ctrl_mem.addr], trans_len);
|
|
|
|
}
|
2013-06-04 19:17:10 +04:00
|
|
|
len -= trans_len;
|
|
|
|
i++;
|
|
|
|
}
|
|
|
|
} else {
|
2017-11-03 16:37:53 +03:00
|
|
|
if (unlikely(prp2 & (n->page_size - 1))) {
|
|
|
|
trace_nvme_err_invalid_prp2_align(prp2);
|
2013-06-04 19:17:10 +04:00
|
|
|
goto unmap;
|
|
|
|
}
|
2017-06-13 13:08:35 +03:00
|
|
|
if (qsg->nsg) {
|
|
|
|
qemu_sglist_add(qsg, prp2, len);
|
|
|
|
} else {
|
|
|
|
qemu_iovec_add(iov, (void *)&n->cmbuf[prp2 - n->ctrl_mem.addr], trans_len);
|
|
|
|
}
|
2013-06-04 19:17:10 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return NVME_SUCCESS;
|
|
|
|
|
|
|
|
unmap:
|
|
|
|
qemu_sglist_destroy(qsg);
|
|
|
|
return NVME_INVALID_FIELD | NVME_DNR;
|
|
|
|
}
|
|
|
|
|
|
|
|
static uint16_t nvme_dma_read_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
|
|
|
|
uint64_t prp1, uint64_t prp2)
|
|
|
|
{
|
|
|
|
QEMUSGList qsg;
|
2017-06-13 13:08:35 +03:00
|
|
|
QEMUIOVector iov;
|
|
|
|
uint16_t status = NVME_SUCCESS;
|
2013-06-04 19:17:10 +04:00
|
|
|
|
2017-11-03 16:37:53 +03:00
|
|
|
trace_nvme_dma_read(prp1, prp2);
|
|
|
|
|
2017-06-13 13:08:35 +03:00
|
|
|
if (nvme_map_prp(&qsg, &iov, prp1, prp2, len, n)) {
|
2013-06-04 19:17:10 +04:00
|
|
|
return NVME_INVALID_FIELD | NVME_DNR;
|
|
|
|
}
|
2017-06-13 13:08:35 +03:00
|
|
|
if (qsg.nsg > 0) {
|
2017-11-03 16:37:53 +03:00
|
|
|
if (unlikely(dma_buf_read(ptr, len, &qsg))) {
|
|
|
|
trace_nvme_err_invalid_dma();
|
2017-06-13 13:08:35 +03:00
|
|
|
status = NVME_INVALID_FIELD | NVME_DNR;
|
|
|
|
}
|
2013-06-04 19:17:10 +04:00
|
|
|
qemu_sglist_destroy(&qsg);
|
2017-06-13 13:08:35 +03:00
|
|
|
} else {
|
2017-11-03 16:37:53 +03:00
|
|
|
if (unlikely(qemu_iovec_to_buf(&iov, 0, ptr, len) != len)) {
|
|
|
|
trace_nvme_err_invalid_dma();
|
2017-06-13 13:08:35 +03:00
|
|
|
status = NVME_INVALID_FIELD | NVME_DNR;
|
|
|
|
}
|
|
|
|
qemu_iovec_destroy(&iov);
|
2013-06-04 19:17:10 +04:00
|
|
|
}
|
2017-06-13 13:08:35 +03:00
|
|
|
return status;
|
2013-06-04 19:17:10 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
static void nvme_post_cqes(void *opaque)
|
|
|
|
{
|
|
|
|
NvmeCQueue *cq = opaque;
|
|
|
|
NvmeCtrl *n = cq->ctrl;
|
|
|
|
NvmeRequest *req, *next;
|
|
|
|
|
|
|
|
QTAILQ_FOREACH_SAFE(req, &cq->req_list, entry, next) {
|
|
|
|
NvmeSQueue *sq;
|
|
|
|
hwaddr addr;
|
|
|
|
|
|
|
|
if (nvme_cq_full(cq)) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
QTAILQ_REMOVE(&cq->req_list, req, entry);
|
|
|
|
sq = req->sq;
|
|
|
|
req->cqe.status = cpu_to_le16((req->status << 1) | cq->phase);
|
|
|
|
req->cqe.sq_id = cpu_to_le16(sq->sqid);
|
|
|
|
req->cqe.sq_head = cpu_to_le16(sq->head);
|
|
|
|
addr = cq->dma_addr + cq->tail * n->cqe_size;
|
|
|
|
nvme_inc_cq_tail(cq);
|
|
|
|
pci_dma_write(&n->parent_obj, addr, (void *)&req->cqe,
|
|
|
|
sizeof(req->cqe));
|
|
|
|
QTAILQ_INSERT_TAIL(&sq->req_list, req, entry);
|
|
|
|
}
|
2018-11-26 20:17:45 +03:00
|
|
|
if (cq->tail != cq->head) {
|
|
|
|
nvme_irq_assert(n, cq);
|
|
|
|
}
|
2013-06-04 19:17:10 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
static void nvme_enqueue_req_completion(NvmeCQueue *cq, NvmeRequest *req)
|
|
|
|
{
|
|
|
|
assert(cq->cqid == req->sq->cqid);
|
|
|
|
QTAILQ_REMOVE(&req->sq->out_req_list, req, entry);
|
|
|
|
QTAILQ_INSERT_TAIL(&cq->req_list, req, entry);
|
2013-08-21 19:03:08 +04:00
|
|
|
timer_mod(cq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500);
|
2013-06-04 19:17:10 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
static void nvme_rw_cb(void *opaque, int ret)
|
|
|
|
{
|
|
|
|
NvmeRequest *req = opaque;
|
|
|
|
NvmeSQueue *sq = req->sq;
|
|
|
|
NvmeCtrl *n = sq->ctrl;
|
|
|
|
NvmeCQueue *cq = n->cq[sq->cqid];
|
|
|
|
|
|
|
|
if (!ret) {
|
2015-10-28 18:33:11 +03:00
|
|
|
block_acct_done(blk_get_stats(n->conf.blk), &req->acct);
|
2013-06-04 19:17:10 +04:00
|
|
|
req->status = NVME_SUCCESS;
|
|
|
|
} else {
|
2015-10-28 18:33:11 +03:00
|
|
|
block_acct_failed(blk_get_stats(n->conf.blk), &req->acct);
|
2013-06-04 19:17:10 +04:00
|
|
|
req->status = NVME_INTERNAL_DEV_ERROR;
|
|
|
|
}
|
2015-06-11 13:01:38 +03:00
|
|
|
if (req->has_sg) {
|
|
|
|
qemu_sglist_destroy(&req->qsg);
|
|
|
|
}
|
2013-06-04 19:17:10 +04:00
|
|
|
nvme_enqueue_req_completion(cq, req);
|
|
|
|
}
|
|
|
|
|
2015-06-11 13:01:38 +03:00
|
|
|
static uint16_t nvme_flush(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
|
|
|
|
NvmeRequest *req)
|
|
|
|
{
|
|
|
|
req->has_sg = false;
|
|
|
|
block_acct_start(blk_get_stats(n->conf.blk), &req->acct, 0,
|
|
|
|
BLOCK_ACCT_FLUSH);
|
|
|
|
req->aiocb = blk_aio_flush(n->conf.blk, nvme_rw_cb, req);
|
|
|
|
|
|
|
|
return NVME_NO_COMPLETE;
|
|
|
|
}
|
|
|
|
|
2017-05-05 12:58:07 +03:00
|
|
|
static uint16_t nvme_write_zeros(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
|
|
|
|
NvmeRequest *req)
|
|
|
|
{
|
|
|
|
NvmeRwCmd *rw = (NvmeRwCmd *)cmd;
|
|
|
|
const uint8_t lba_index = NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas);
|
|
|
|
const uint8_t data_shift = ns->id_ns.lbaf[lba_index].ds;
|
|
|
|
uint64_t slba = le64_to_cpu(rw->slba);
|
|
|
|
uint32_t nlb = le16_to_cpu(rw->nlb) + 1;
|
|
|
|
uint64_t aio_slba = slba << (data_shift - BDRV_SECTOR_BITS);
|
|
|
|
uint32_t aio_nlb = nlb << (data_shift - BDRV_SECTOR_BITS);
|
|
|
|
|
2017-11-03 16:37:53 +03:00
|
|
|
if (unlikely(slba + nlb > ns->id_ns.nsze)) {
|
|
|
|
trace_nvme_err_invalid_lba_range(slba, nlb, ns->id_ns.nsze);
|
2017-05-05 12:58:07 +03:00
|
|
|
return NVME_LBA_RANGE | NVME_DNR;
|
|
|
|
}
|
|
|
|
|
|
|
|
req->has_sg = false;
|
|
|
|
block_acct_start(blk_get_stats(n->conf.blk), &req->acct, 0,
|
|
|
|
BLOCK_ACCT_WRITE);
|
|
|
|
req->aiocb = blk_aio_pwrite_zeroes(n->conf.blk, aio_slba, aio_nlb,
|
|
|
|
BDRV_REQ_MAY_UNMAP, nvme_rw_cb, req);
|
|
|
|
return NVME_NO_COMPLETE;
|
|
|
|
}
|
|
|
|
|
2013-06-04 19:17:10 +04:00
|
|
|
static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
|
|
|
|
NvmeRequest *req)
|
|
|
|
{
|
|
|
|
NvmeRwCmd *rw = (NvmeRwCmd *)cmd;
|
|
|
|
uint32_t nlb = le32_to_cpu(rw->nlb) + 1;
|
|
|
|
uint64_t slba = le64_to_cpu(rw->slba);
|
|
|
|
uint64_t prp1 = le64_to_cpu(rw->prp1);
|
|
|
|
uint64_t prp2 = le64_to_cpu(rw->prp2);
|
|
|
|
|
|
|
|
uint8_t lba_index = NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas);
|
|
|
|
uint8_t data_shift = ns->id_ns.lbaf[lba_index].ds;
|
2015-03-14 19:00:44 +03:00
|
|
|
uint64_t data_size = (uint64_t)nlb << data_shift;
|
2016-05-23 15:54:05 +03:00
|
|
|
uint64_t data_offset = slba << data_shift;
|
2013-06-04 19:17:10 +04:00
|
|
|
int is_write = rw->opcode == NVME_CMD_WRITE ? 1 : 0;
|
2015-10-28 18:33:11 +03:00
|
|
|
enum BlockAcctType acct = is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ;
|
2013-06-04 19:17:10 +04:00
|
|
|
|
2017-11-03 16:37:53 +03:00
|
|
|
trace_nvme_rw(is_write ? "write" : "read", nlb, data_size, slba);
|
|
|
|
|
|
|
|
if (unlikely((slba + nlb) > ns->id_ns.nsze)) {
|
2015-10-28 18:33:11 +03:00
|
|
|
block_acct_invalid(blk_get_stats(n->conf.blk), acct);
|
2017-11-03 16:37:53 +03:00
|
|
|
trace_nvme_err_invalid_lba_range(slba, nlb, ns->id_ns.nsze);
|
2013-06-04 19:17:10 +04:00
|
|
|
return NVME_LBA_RANGE | NVME_DNR;
|
|
|
|
}
|
2015-10-28 18:33:11 +03:00
|
|
|
|
2017-06-13 13:08:35 +03:00
|
|
|
if (nvme_map_prp(&req->qsg, &req->iov, prp1, prp2, data_size, n)) {
|
2015-10-28 18:33:11 +03:00
|
|
|
block_acct_invalid(blk_get_stats(n->conf.blk), acct);
|
2013-06-04 19:17:10 +04:00
|
|
|
return NVME_INVALID_FIELD | NVME_DNR;
|
|
|
|
}
|
2015-10-28 18:33:11 +03:00
|
|
|
|
|
|
|
dma_acct_start(n->conf.blk, &req->acct, &req->qsg, acct);
|
2017-06-13 13:08:35 +03:00
|
|
|
if (req->qsg.nsg > 0) {
|
|
|
|
req->has_sg = true;
|
|
|
|
req->aiocb = is_write ?
|
|
|
|
dma_blk_write(n->conf.blk, &req->qsg, data_offset, BDRV_SECTOR_SIZE,
|
|
|
|
nvme_rw_cb, req) :
|
|
|
|
dma_blk_read(n->conf.blk, &req->qsg, data_offset, BDRV_SECTOR_SIZE,
|
|
|
|
nvme_rw_cb, req);
|
|
|
|
} else {
|
|
|
|
req->has_sg = false;
|
|
|
|
req->aiocb = is_write ?
|
|
|
|
blk_aio_pwritev(n->conf.blk, data_offset, &req->iov, 0, nvme_rw_cb,
|
|
|
|
req) :
|
|
|
|
blk_aio_preadv(n->conf.blk, data_offset, &req->iov, 0, nvme_rw_cb,
|
|
|
|
req);
|
|
|
|
}
|
2013-06-04 19:17:10 +04:00
|
|
|
|
|
|
|
return NVME_NO_COMPLETE;
|
|
|
|
}
|
|
|
|
|
|
|
|
static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
|
|
|
|
{
|
|
|
|
NvmeNamespace *ns;
|
|
|
|
uint32_t nsid = le32_to_cpu(cmd->nsid);
|
|
|
|
|
2017-11-03 16:37:53 +03:00
|
|
|
if (unlikely(nsid == 0 || nsid > n->num_namespaces)) {
|
|
|
|
trace_nvme_err_invalid_ns(nsid, n->num_namespaces);
|
2013-06-04 19:17:10 +04:00
|
|
|
return NVME_INVALID_NSID | NVME_DNR;
|
|
|
|
}
|
|
|
|
|
|
|
|
ns = &n->namespaces[nsid - 1];
|
|
|
|
switch (cmd->opcode) {
|
|
|
|
case NVME_CMD_FLUSH:
|
2015-06-11 13:01:38 +03:00
|
|
|
return nvme_flush(n, ns, cmd, req);
|
2017-05-05 12:58:07 +03:00
|
|
|
case NVME_CMD_WRITE_ZEROS:
|
|
|
|
return nvme_write_zeros(n, ns, cmd, req);
|
2013-06-04 19:17:10 +04:00
|
|
|
case NVME_CMD_WRITE:
|
|
|
|
case NVME_CMD_READ:
|
|
|
|
return nvme_rw(n, ns, cmd, req);
|
|
|
|
default:
|
2017-11-03 16:37:53 +03:00
|
|
|
trace_nvme_err_invalid_opc(cmd->opcode);
|
2013-06-04 19:17:10 +04:00
|
|
|
return NVME_INVALID_OPCODE | NVME_DNR;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void nvme_free_sq(NvmeSQueue *sq, NvmeCtrl *n)
|
|
|
|
{
|
|
|
|
n->sq[sq->sqid] = NULL;
|
2013-08-21 19:03:08 +04:00
|
|
|
timer_del(sq->timer);
|
|
|
|
timer_free(sq->timer);
|
2013-06-04 19:17:10 +04:00
|
|
|
g_free(sq->io_req);
|
|
|
|
if (sq->sqid) {
|
|
|
|
g_free(sq);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static uint16_t nvme_del_sq(NvmeCtrl *n, NvmeCmd *cmd)
|
|
|
|
{
|
|
|
|
NvmeDeleteQ *c = (NvmeDeleteQ *)cmd;
|
|
|
|
NvmeRequest *req, *next;
|
|
|
|
NvmeSQueue *sq;
|
|
|
|
NvmeCQueue *cq;
|
|
|
|
uint16_t qid = le16_to_cpu(c->qid);
|
|
|
|
|
2017-11-03 16:37:53 +03:00
|
|
|
if (unlikely(!qid || nvme_check_sqid(n, qid))) {
|
|
|
|
trace_nvme_err_invalid_del_sq(qid);
|
2013-06-04 19:17:10 +04:00
|
|
|
return NVME_INVALID_QID | NVME_DNR;
|
|
|
|
}
|
|
|
|
|
2017-11-03 16:37:53 +03:00
|
|
|
trace_nvme_del_sq(qid);
|
|
|
|
|
2013-06-04 19:17:10 +04:00
|
|
|
sq = n->sq[qid];
|
|
|
|
while (!QTAILQ_EMPTY(&sq->out_req_list)) {
|
|
|
|
req = QTAILQ_FIRST(&sq->out_req_list);
|
|
|
|
assert(req->aiocb);
|
2014-10-07 15:59:18 +04:00
|
|
|
blk_aio_cancel(req->aiocb);
|
2013-06-04 19:17:10 +04:00
|
|
|
}
|
|
|
|
if (!nvme_check_cqid(n, sq->cqid)) {
|
|
|
|
cq = n->cq[sq->cqid];
|
|
|
|
QTAILQ_REMOVE(&cq->sq_list, sq, entry);
|
|
|
|
|
|
|
|
nvme_post_cqes(cq);
|
|
|
|
QTAILQ_FOREACH_SAFE(req, &cq->req_list, entry, next) {
|
|
|
|
if (req->sq == sq) {
|
|
|
|
QTAILQ_REMOVE(&cq->req_list, req, entry);
|
|
|
|
QTAILQ_INSERT_TAIL(&sq->req_list, req, entry);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
nvme_free_sq(sq, n);
|
|
|
|
return NVME_SUCCESS;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void nvme_init_sq(NvmeSQueue *sq, NvmeCtrl *n, uint64_t dma_addr,
|
|
|
|
uint16_t sqid, uint16_t cqid, uint16_t size)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
NvmeCQueue *cq;
|
|
|
|
|
|
|
|
sq->ctrl = n;
|
|
|
|
sq->dma_addr = dma_addr;
|
|
|
|
sq->sqid = sqid;
|
|
|
|
sq->size = size;
|
|
|
|
sq->cqid = cqid;
|
|
|
|
sq->head = sq->tail = 0;
|
2014-08-19 12:31:09 +04:00
|
|
|
sq->io_req = g_new(NvmeRequest, sq->size);
|
2013-06-04 19:17:10 +04:00
|
|
|
|
|
|
|
QTAILQ_INIT(&sq->req_list);
|
|
|
|
QTAILQ_INIT(&sq->out_req_list);
|
|
|
|
for (i = 0; i < sq->size; i++) {
|
|
|
|
sq->io_req[i].sq = sq;
|
|
|
|
QTAILQ_INSERT_TAIL(&(sq->req_list), &sq->io_req[i], entry);
|
|
|
|
}
|
2013-08-21 19:03:08 +04:00
|
|
|
sq->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, nvme_process_sq, sq);
|
2013-06-04 19:17:10 +04:00
|
|
|
|
|
|
|
assert(n->cq[cqid]);
|
|
|
|
cq = n->cq[cqid];
|
|
|
|
QTAILQ_INSERT_TAIL(&(cq->sq_list), sq, entry);
|
|
|
|
n->sq[sqid] = sq;
|
|
|
|
}
|
|
|
|
|
|
|
|
static uint16_t nvme_create_sq(NvmeCtrl *n, NvmeCmd *cmd)
|
|
|
|
{
|
|
|
|
NvmeSQueue *sq;
|
|
|
|
NvmeCreateSq *c = (NvmeCreateSq *)cmd;
|
|
|
|
|
|
|
|
uint16_t cqid = le16_to_cpu(c->cqid);
|
|
|
|
uint16_t sqid = le16_to_cpu(c->sqid);
|
|
|
|
uint16_t qsize = le16_to_cpu(c->qsize);
|
|
|
|
uint16_t qflags = le16_to_cpu(c->sq_flags);
|
|
|
|
uint64_t prp1 = le64_to_cpu(c->prp1);
|
|
|
|
|
2017-11-03 16:37:53 +03:00
|
|
|
trace_nvme_create_sq(prp1, sqid, cqid, qsize, qflags);
|
|
|
|
|
|
|
|
if (unlikely(!cqid || nvme_check_cqid(n, cqid))) {
|
|
|
|
trace_nvme_err_invalid_create_sq_cqid(cqid);
|
2013-06-04 19:17:10 +04:00
|
|
|
return NVME_INVALID_CQID | NVME_DNR;
|
|
|
|
}
|
2017-11-03 16:37:53 +03:00
|
|
|
if (unlikely(!sqid || !nvme_check_sqid(n, sqid))) {
|
|
|
|
trace_nvme_err_invalid_create_sq_sqid(sqid);
|
2013-06-04 19:17:10 +04:00
|
|
|
return NVME_INVALID_QID | NVME_DNR;
|
|
|
|
}
|
2017-11-03 16:37:53 +03:00
|
|
|
if (unlikely(!qsize || qsize > NVME_CAP_MQES(n->bar.cap))) {
|
|
|
|
trace_nvme_err_invalid_create_sq_size(qsize);
|
2013-06-04 19:17:10 +04:00
|
|
|
return NVME_MAX_QSIZE_EXCEEDED | NVME_DNR;
|
|
|
|
}
|
2017-11-03 16:37:53 +03:00
|
|
|
if (unlikely(!prp1 || prp1 & (n->page_size - 1))) {
|
|
|
|
trace_nvme_err_invalid_create_sq_addr(prp1);
|
2013-06-04 19:17:10 +04:00
|
|
|
return NVME_INVALID_FIELD | NVME_DNR;
|
|
|
|
}
|
2017-11-03 16:37:53 +03:00
|
|
|
if (unlikely(!(NVME_SQ_FLAGS_PC(qflags)))) {
|
|
|
|
trace_nvme_err_invalid_create_sq_qflags(NVME_SQ_FLAGS_PC(qflags));
|
2013-06-04 19:17:10 +04:00
|
|
|
return NVME_INVALID_FIELD | NVME_DNR;
|
|
|
|
}
|
|
|
|
sq = g_malloc0(sizeof(*sq));
|
|
|
|
nvme_init_sq(sq, n, prp1, sqid, cqid, qsize + 1);
|
|
|
|
return NVME_SUCCESS;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void nvme_free_cq(NvmeCQueue *cq, NvmeCtrl *n)
|
|
|
|
{
|
|
|
|
n->cq[cq->cqid] = NULL;
|
2013-08-21 19:03:08 +04:00
|
|
|
timer_del(cq->timer);
|
|
|
|
timer_free(cq->timer);
|
2013-06-04 19:17:10 +04:00
|
|
|
msix_vector_unuse(&n->parent_obj, cq->vector);
|
|
|
|
if (cq->cqid) {
|
|
|
|
g_free(cq);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static uint16_t nvme_del_cq(NvmeCtrl *n, NvmeCmd *cmd)
|
|
|
|
{
|
|
|
|
NvmeDeleteQ *c = (NvmeDeleteQ *)cmd;
|
|
|
|
NvmeCQueue *cq;
|
|
|
|
uint16_t qid = le16_to_cpu(c->qid);
|
|
|
|
|
2017-11-03 16:37:53 +03:00
|
|
|
if (unlikely(!qid || nvme_check_cqid(n, qid))) {
|
|
|
|
trace_nvme_err_invalid_del_cq_cqid(qid);
|
2013-06-04 19:17:10 +04:00
|
|
|
return NVME_INVALID_CQID | NVME_DNR;
|
|
|
|
}
|
|
|
|
|
|
|
|
cq = n->cq[qid];
|
2017-11-03 16:37:53 +03:00
|
|
|
if (unlikely(!QTAILQ_EMPTY(&cq->sq_list))) {
|
|
|
|
trace_nvme_err_invalid_del_cq_notempty(qid);
|
2013-06-04 19:17:10 +04:00
|
|
|
return NVME_INVALID_QUEUE_DEL;
|
|
|
|
}
|
2018-11-21 21:10:13 +03:00
|
|
|
nvme_irq_deassert(n, cq);
|
2017-11-03 16:37:53 +03:00
|
|
|
trace_nvme_del_cq(qid);
|
2013-06-04 19:17:10 +04:00
|
|
|
nvme_free_cq(cq, n);
|
|
|
|
return NVME_SUCCESS;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void nvme_init_cq(NvmeCQueue *cq, NvmeCtrl *n, uint64_t dma_addr,
|
|
|
|
uint16_t cqid, uint16_t vector, uint16_t size, uint16_t irq_enabled)
|
|
|
|
{
|
|
|
|
cq->ctrl = n;
|
|
|
|
cq->cqid = cqid;
|
|
|
|
cq->size = size;
|
|
|
|
cq->dma_addr = dma_addr;
|
|
|
|
cq->phase = 1;
|
|
|
|
cq->irq_enabled = irq_enabled;
|
|
|
|
cq->vector = vector;
|
|
|
|
cq->head = cq->tail = 0;
|
|
|
|
QTAILQ_INIT(&cq->req_list);
|
|
|
|
QTAILQ_INIT(&cq->sq_list);
|
|
|
|
msix_vector_use(&n->parent_obj, cq->vector);
|
|
|
|
n->cq[cqid] = cq;
|
2013-08-21 19:03:08 +04:00
|
|
|
cq->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, nvme_post_cqes, cq);
|
2013-06-04 19:17:10 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeCmd *cmd)
|
|
|
|
{
|
|
|
|
NvmeCQueue *cq;
|
|
|
|
NvmeCreateCq *c = (NvmeCreateCq *)cmd;
|
|
|
|
uint16_t cqid = le16_to_cpu(c->cqid);
|
|
|
|
uint16_t vector = le16_to_cpu(c->irq_vector);
|
|
|
|
uint16_t qsize = le16_to_cpu(c->qsize);
|
|
|
|
uint16_t qflags = le16_to_cpu(c->cq_flags);
|
|
|
|
uint64_t prp1 = le64_to_cpu(c->prp1);
|
|
|
|
|
2017-11-03 16:37:53 +03:00
|
|
|
trace_nvme_create_cq(prp1, cqid, vector, qsize, qflags,
|
|
|
|
NVME_CQ_FLAGS_IEN(qflags) != 0);
|
|
|
|
|
|
|
|
if (unlikely(!cqid || !nvme_check_cqid(n, cqid))) {
|
|
|
|
trace_nvme_err_invalid_create_cq_cqid(cqid);
|
2013-06-04 19:17:10 +04:00
|
|
|
return NVME_INVALID_CQID | NVME_DNR;
|
|
|
|
}
|
2017-11-03 16:37:53 +03:00
|
|
|
if (unlikely(!qsize || qsize > NVME_CAP_MQES(n->bar.cap))) {
|
|
|
|
trace_nvme_err_invalid_create_cq_size(qsize);
|
2013-06-04 19:17:10 +04:00
|
|
|
return NVME_MAX_QSIZE_EXCEEDED | NVME_DNR;
|
|
|
|
}
|
2017-11-03 16:37:53 +03:00
|
|
|
if (unlikely(!prp1)) {
|
|
|
|
trace_nvme_err_invalid_create_cq_addr(prp1);
|
2013-06-04 19:17:10 +04:00
|
|
|
return NVME_INVALID_FIELD | NVME_DNR;
|
|
|
|
}
|
2017-11-03 16:37:53 +03:00
|
|
|
if (unlikely(vector > n->num_queues)) {
|
|
|
|
trace_nvme_err_invalid_create_cq_vector(vector);
|
2013-06-04 19:17:10 +04:00
|
|
|
return NVME_INVALID_IRQ_VECTOR | NVME_DNR;
|
|
|
|
}
|
2017-11-03 16:37:53 +03:00
|
|
|
if (unlikely(!(NVME_CQ_FLAGS_PC(qflags)))) {
|
|
|
|
trace_nvme_err_invalid_create_cq_qflags(NVME_CQ_FLAGS_PC(qflags));
|
2013-06-04 19:17:10 +04:00
|
|
|
return NVME_INVALID_FIELD | NVME_DNR;
|
|
|
|
}
|
|
|
|
|
|
|
|
cq = g_malloc0(sizeof(*cq));
|
|
|
|
nvme_init_cq(cq, n, prp1, cqid, vector, qsize + 1,
|
|
|
|
NVME_CQ_FLAGS_IEN(qflags));
|
|
|
|
return NVME_SUCCESS;
|
|
|
|
}
|
|
|
|
|
2016-08-04 22:42:14 +03:00
|
|
|
static uint16_t nvme_identify_ctrl(NvmeCtrl *n, NvmeIdentify *c)
|
|
|
|
{
|
|
|
|
uint64_t prp1 = le64_to_cpu(c->prp1);
|
|
|
|
uint64_t prp2 = le64_to_cpu(c->prp2);
|
|
|
|
|
2017-11-03 16:37:53 +03:00
|
|
|
trace_nvme_identify_ctrl();
|
|
|
|
|
2016-08-04 22:42:14 +03:00
|
|
|
return nvme_dma_read_prp(n, (uint8_t *)&n->id_ctrl, sizeof(n->id_ctrl),
|
|
|
|
prp1, prp2);
|
|
|
|
}
|
|
|
|
|
|
|
|
static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeIdentify *c)
|
2013-06-04 19:17:10 +04:00
|
|
|
{
|
|
|
|
NvmeNamespace *ns;
|
|
|
|
uint32_t nsid = le32_to_cpu(c->nsid);
|
|
|
|
uint64_t prp1 = le64_to_cpu(c->prp1);
|
|
|
|
uint64_t prp2 = le64_to_cpu(c->prp2);
|
|
|
|
|
2017-11-03 16:37:53 +03:00
|
|
|
trace_nvme_identify_ns(nsid);
|
|
|
|
|
|
|
|
if (unlikely(nsid == 0 || nsid > n->num_namespaces)) {
|
|
|
|
trace_nvme_err_invalid_ns(nsid, n->num_namespaces);
|
2013-06-04 19:17:10 +04:00
|
|
|
return NVME_INVALID_NSID | NVME_DNR;
|
|
|
|
}
|
|
|
|
|
|
|
|
ns = &n->namespaces[nsid - 1];
|
2017-11-03 16:37:53 +03:00
|
|
|
|
2013-06-04 19:17:10 +04:00
|
|
|
return nvme_dma_read_prp(n, (uint8_t *)&ns->id_ns, sizeof(ns->id_ns),
|
|
|
|
prp1, prp2);
|
|
|
|
}
|
|
|
|
|
2016-08-04 22:42:14 +03:00
|
|
|
static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeIdentify *c)
|
|
|
|
{
|
2018-06-25 15:42:05 +03:00
|
|
|
static const int data_len = 4 * KiB;
|
2016-08-04 22:42:14 +03:00
|
|
|
uint32_t min_nsid = le32_to_cpu(c->nsid);
|
|
|
|
uint64_t prp1 = le64_to_cpu(c->prp1);
|
|
|
|
uint64_t prp2 = le64_to_cpu(c->prp2);
|
|
|
|
uint32_t *list;
|
|
|
|
uint16_t ret;
|
|
|
|
int i, j = 0;
|
|
|
|
|
2017-11-03 16:37:53 +03:00
|
|
|
trace_nvme_identify_nslist(min_nsid);
|
|
|
|
|
2016-08-04 22:42:14 +03:00
|
|
|
list = g_malloc0(data_len);
|
|
|
|
for (i = 0; i < n->num_namespaces; i++) {
|
|
|
|
if (i < min_nsid) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
list[j++] = cpu_to_le32(i + 1);
|
|
|
|
if (j == data_len / sizeof(uint32_t)) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
ret = nvme_dma_read_prp(n, (uint8_t *)list, data_len, prp1, prp2);
|
|
|
|
g_free(list);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static uint16_t nvme_identify(NvmeCtrl *n, NvmeCmd *cmd)
|
|
|
|
{
|
|
|
|
NvmeIdentify *c = (NvmeIdentify *)cmd;
|
|
|
|
|
|
|
|
switch (le32_to_cpu(c->cns)) {
|
|
|
|
case 0x00:
|
|
|
|
return nvme_identify_ns(n, c);
|
|
|
|
case 0x01:
|
|
|
|
return nvme_identify_ctrl(n, c);
|
|
|
|
case 0x02:
|
|
|
|
return nvme_identify_nslist(n, c);
|
|
|
|
default:
|
2017-11-03 16:37:53 +03:00
|
|
|
trace_nvme_err_invalid_identify_cns(le32_to_cpu(c->cns));
|
2016-08-04 22:42:14 +03:00
|
|
|
return NVME_INVALID_FIELD | NVME_DNR;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-06-04 19:17:10 +04:00
|
|
|
static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
|
|
|
|
{
|
|
|
|
uint32_t dw10 = le32_to_cpu(cmd->cdw10);
|
2015-06-11 13:01:39 +03:00
|
|
|
uint32_t result;
|
2013-06-04 19:17:10 +04:00
|
|
|
|
|
|
|
switch (dw10) {
|
2015-04-30 12:44:17 +03:00
|
|
|
case NVME_VOLATILE_WRITE_CACHE:
|
2015-06-11 13:01:39 +03:00
|
|
|
result = blk_enable_write_cache(n->conf.blk);
|
2017-11-03 16:37:53 +03:00
|
|
|
trace_nvme_getfeat_vwcache(result ? "enabled" : "disabled");
|
2015-06-11 13:01:39 +03:00
|
|
|
break;
|
|
|
|
case NVME_NUMBER_OF_QUEUES:
|
2017-05-28 16:06:49 +03:00
|
|
|
result = cpu_to_le32((n->num_queues - 2) | ((n->num_queues - 2) << 16));
|
2017-11-03 16:37:53 +03:00
|
|
|
trace_nvme_getfeat_numq(result);
|
2015-04-30 12:44:17 +03:00
|
|
|
break;
|
2013-06-04 19:17:10 +04:00
|
|
|
default:
|
2017-11-03 16:37:53 +03:00
|
|
|
trace_nvme_err_invalid_getfeat(dw10);
|
2013-06-04 19:17:10 +04:00
|
|
|
return NVME_INVALID_FIELD | NVME_DNR;
|
|
|
|
}
|
2015-06-11 13:01:39 +03:00
|
|
|
|
|
|
|
req->cqe.result = result;
|
2013-06-04 19:17:10 +04:00
|
|
|
return NVME_SUCCESS;
|
|
|
|
}
|
|
|
|
|
|
|
|
static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
|
|
|
|
{
|
|
|
|
uint32_t dw10 = le32_to_cpu(cmd->cdw10);
|
2015-06-11 13:01:39 +03:00
|
|
|
uint32_t dw11 = le32_to_cpu(cmd->cdw11);
|
2013-06-04 19:17:10 +04:00
|
|
|
|
|
|
|
switch (dw10) {
|
2015-06-11 13:01:39 +03:00
|
|
|
case NVME_VOLATILE_WRITE_CACHE:
|
|
|
|
blk_set_enable_write_cache(n->conf.blk, dw11 & 1);
|
|
|
|
break;
|
2013-06-04 19:17:10 +04:00
|
|
|
case NVME_NUMBER_OF_QUEUES:
|
2017-11-03 16:37:53 +03:00
|
|
|
trace_nvme_setfeat_numq((dw11 & 0xFFFF) + 1,
|
|
|
|
((dw11 >> 16) & 0xFFFF) + 1,
|
|
|
|
n->num_queues - 1, n->num_queues - 1);
|
2014-12-05 15:40:24 +03:00
|
|
|
req->cqe.result =
|
2017-05-28 16:06:49 +03:00
|
|
|
cpu_to_le32((n->num_queues - 2) | ((n->num_queues - 2) << 16));
|
2013-06-04 19:17:10 +04:00
|
|
|
break;
|
|
|
|
default:
|
2017-11-03 16:37:53 +03:00
|
|
|
trace_nvme_err_invalid_setfeat(dw10);
|
2013-06-04 19:17:10 +04:00
|
|
|
return NVME_INVALID_FIELD | NVME_DNR;
|
|
|
|
}
|
|
|
|
return NVME_SUCCESS;
|
|
|
|
}
|
|
|
|
|
|
|
|
static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
|
|
|
|
{
|
|
|
|
switch (cmd->opcode) {
|
|
|
|
case NVME_ADM_CMD_DELETE_SQ:
|
|
|
|
return nvme_del_sq(n, cmd);
|
|
|
|
case NVME_ADM_CMD_CREATE_SQ:
|
|
|
|
return nvme_create_sq(n, cmd);
|
|
|
|
case NVME_ADM_CMD_DELETE_CQ:
|
|
|
|
return nvme_del_cq(n, cmd);
|
|
|
|
case NVME_ADM_CMD_CREATE_CQ:
|
|
|
|
return nvme_create_cq(n, cmd);
|
|
|
|
case NVME_ADM_CMD_IDENTIFY:
|
|
|
|
return nvme_identify(n, cmd);
|
|
|
|
case NVME_ADM_CMD_SET_FEATURES:
|
|
|
|
return nvme_set_feature(n, cmd, req);
|
|
|
|
case NVME_ADM_CMD_GET_FEATURES:
|
|
|
|
return nvme_get_feature(n, cmd, req);
|
|
|
|
default:
|
2017-11-03 16:37:53 +03:00
|
|
|
trace_nvme_err_invalid_admin_opc(cmd->opcode);
|
2013-06-04 19:17:10 +04:00
|
|
|
return NVME_INVALID_OPCODE | NVME_DNR;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void nvme_process_sq(void *opaque)
|
|
|
|
{
|
|
|
|
NvmeSQueue *sq = opaque;
|
|
|
|
NvmeCtrl *n = sq->ctrl;
|
|
|
|
NvmeCQueue *cq = n->cq[sq->cqid];
|
|
|
|
|
|
|
|
uint16_t status;
|
|
|
|
hwaddr addr;
|
|
|
|
NvmeCmd cmd;
|
|
|
|
NvmeRequest *req;
|
|
|
|
|
|
|
|
while (!(nvme_sq_empty(sq) || QTAILQ_EMPTY(&sq->req_list))) {
|
|
|
|
addr = sq->dma_addr + sq->head * n->sqe_size;
|
2017-05-16 22:10:59 +03:00
|
|
|
nvme_addr_read(n, addr, (void *)&cmd, sizeof(cmd));
|
2013-06-04 19:17:10 +04:00
|
|
|
nvme_inc_sq_head(sq);
|
|
|
|
|
|
|
|
req = QTAILQ_FIRST(&sq->req_list);
|
|
|
|
QTAILQ_REMOVE(&sq->req_list, req, entry);
|
|
|
|
QTAILQ_INSERT_TAIL(&sq->out_req_list, req, entry);
|
|
|
|
memset(&req->cqe, 0, sizeof(req->cqe));
|
|
|
|
req->cqe.cid = cmd.cid;
|
|
|
|
|
|
|
|
status = sq->sqid ? nvme_io_cmd(n, &cmd, req) :
|
|
|
|
nvme_admin_cmd(n, &cmd, req);
|
|
|
|
if (status != NVME_NO_COMPLETE) {
|
|
|
|
req->status = status;
|
|
|
|
nvme_enqueue_req_completion(cq, req);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void nvme_clear_ctrl(NvmeCtrl *n)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
2018-11-06 15:16:55 +03:00
|
|
|
blk_drain(n->conf.blk);
|
|
|
|
|
2013-06-04 19:17:10 +04:00
|
|
|
for (i = 0; i < n->num_queues; i++) {
|
|
|
|
if (n->sq[i] != NULL) {
|
|
|
|
nvme_free_sq(n->sq[i], n);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
for (i = 0; i < n->num_queues; i++) {
|
|
|
|
if (n->cq[i] != NULL) {
|
|
|
|
nvme_free_cq(n->cq[i], n);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-10-07 15:59:18 +04:00
|
|
|
blk_flush(n->conf.blk);
|
2013-06-04 19:17:10 +04:00
|
|
|
n->bar.cc = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int nvme_start_ctrl(NvmeCtrl *n)
|
|
|
|
{
|
|
|
|
uint32_t page_bits = NVME_CC_MPS(n->bar.cc) + 12;
|
|
|
|
uint32_t page_size = 1 << page_bits;
|
|
|
|
|
2017-11-03 16:37:53 +03:00
|
|
|
if (unlikely(n->cq[0])) {
|
|
|
|
trace_nvme_err_startfail_cq();
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
if (unlikely(n->sq[0])) {
|
|
|
|
trace_nvme_err_startfail_sq();
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
if (unlikely(!n->bar.asq)) {
|
|
|
|
trace_nvme_err_startfail_nbarasq();
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
if (unlikely(!n->bar.acq)) {
|
|
|
|
trace_nvme_err_startfail_nbaracq();
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
if (unlikely(n->bar.asq & (page_size - 1))) {
|
|
|
|
trace_nvme_err_startfail_asq_misaligned(n->bar.asq);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
if (unlikely(n->bar.acq & (page_size - 1))) {
|
|
|
|
trace_nvme_err_startfail_acq_misaligned(n->bar.acq);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
if (unlikely(NVME_CC_MPS(n->bar.cc) <
|
|
|
|
NVME_CAP_MPSMIN(n->bar.cap))) {
|
|
|
|
trace_nvme_err_startfail_page_too_small(
|
|
|
|
NVME_CC_MPS(n->bar.cc),
|
|
|
|
NVME_CAP_MPSMIN(n->bar.cap));
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
if (unlikely(NVME_CC_MPS(n->bar.cc) >
|
|
|
|
NVME_CAP_MPSMAX(n->bar.cap))) {
|
|
|
|
trace_nvme_err_startfail_page_too_large(
|
|
|
|
NVME_CC_MPS(n->bar.cc),
|
|
|
|
NVME_CAP_MPSMAX(n->bar.cap));
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
if (unlikely(NVME_CC_IOCQES(n->bar.cc) <
|
|
|
|
NVME_CTRL_CQES_MIN(n->id_ctrl.cqes))) {
|
|
|
|
trace_nvme_err_startfail_cqent_too_small(
|
|
|
|
NVME_CC_IOCQES(n->bar.cc),
|
|
|
|
NVME_CTRL_CQES_MIN(n->bar.cap));
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
if (unlikely(NVME_CC_IOCQES(n->bar.cc) >
|
|
|
|
NVME_CTRL_CQES_MAX(n->id_ctrl.cqes))) {
|
|
|
|
trace_nvme_err_startfail_cqent_too_large(
|
|
|
|
NVME_CC_IOCQES(n->bar.cc),
|
|
|
|
NVME_CTRL_CQES_MAX(n->bar.cap));
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
if (unlikely(NVME_CC_IOSQES(n->bar.cc) <
|
|
|
|
NVME_CTRL_SQES_MIN(n->id_ctrl.sqes))) {
|
|
|
|
trace_nvme_err_startfail_sqent_too_small(
|
|
|
|
NVME_CC_IOSQES(n->bar.cc),
|
|
|
|
NVME_CTRL_SQES_MIN(n->bar.cap));
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
if (unlikely(NVME_CC_IOSQES(n->bar.cc) >
|
|
|
|
NVME_CTRL_SQES_MAX(n->id_ctrl.sqes))) {
|
|
|
|
trace_nvme_err_startfail_sqent_too_large(
|
|
|
|
NVME_CC_IOSQES(n->bar.cc),
|
|
|
|
NVME_CTRL_SQES_MAX(n->bar.cap));
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
if (unlikely(!NVME_AQA_ASQS(n->bar.aqa))) {
|
|
|
|
trace_nvme_err_startfail_asqent_sz_zero();
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
if (unlikely(!NVME_AQA_ACQS(n->bar.aqa))) {
|
|
|
|
trace_nvme_err_startfail_acqent_sz_zero();
|
2013-06-04 19:17:10 +04:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
n->page_bits = page_bits;
|
|
|
|
n->page_size = page_size;
|
|
|
|
n->max_prp_ents = n->page_size / sizeof(uint64_t);
|
|
|
|
n->cqe_size = 1 << NVME_CC_IOCQES(n->bar.cc);
|
|
|
|
n->sqe_size = 1 << NVME_CC_IOSQES(n->bar.cc);
|
|
|
|
nvme_init_cq(&n->admin_cq, n, n->bar.acq, 0, 0,
|
|
|
|
NVME_AQA_ACQS(n->bar.aqa) + 1, 1);
|
|
|
|
nvme_init_sq(&n->admin_sq, n, n->bar.asq, 0, 0,
|
|
|
|
NVME_AQA_ASQS(n->bar.aqa) + 1);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data,
|
|
|
|
unsigned size)
|
|
|
|
{
|
2017-11-03 16:37:53 +03:00
|
|
|
if (unlikely(offset & (sizeof(uint32_t) - 1))) {
|
|
|
|
NVME_GUEST_ERR(nvme_ub_mmiowr_misaligned32,
|
|
|
|
"MMIO write not 32-bit aligned,"
|
|
|
|
" offset=0x%"PRIx64"", offset);
|
|
|
|
/* should be ignored, fall through for now */
|
|
|
|
}
|
|
|
|
|
|
|
|
if (unlikely(size < sizeof(uint32_t))) {
|
|
|
|
NVME_GUEST_ERR(nvme_ub_mmiowr_toosmall,
|
|
|
|
"MMIO write smaller than 32-bits,"
|
|
|
|
" offset=0x%"PRIx64", size=%u",
|
|
|
|
offset, size);
|
|
|
|
/* should be ignored, fall through for now */
|
|
|
|
}
|
|
|
|
|
2013-06-04 19:17:10 +04:00
|
|
|
switch (offset) {
|
2017-11-03 16:37:53 +03:00
|
|
|
case 0xc: /* INTMS */
|
|
|
|
if (unlikely(msix_enabled(&(n->parent_obj)))) {
|
|
|
|
NVME_GUEST_ERR(nvme_ub_mmiowr_intmask_with_msix,
|
|
|
|
"undefined access to interrupt mask set"
|
|
|
|
" when MSI-X is enabled");
|
|
|
|
/* should be ignored, fall through for now */
|
|
|
|
}
|
2013-06-04 19:17:10 +04:00
|
|
|
n->bar.intms |= data & 0xffffffff;
|
|
|
|
n->bar.intmc = n->bar.intms;
|
2017-11-03 16:37:53 +03:00
|
|
|
trace_nvme_mmio_intm_set(data & 0xffffffff,
|
|
|
|
n->bar.intmc);
|
2017-12-18 08:00:43 +03:00
|
|
|
nvme_irq_check(n);
|
2013-06-04 19:17:10 +04:00
|
|
|
break;
|
2017-11-03 16:37:53 +03:00
|
|
|
case 0x10: /* INTMC */
|
|
|
|
if (unlikely(msix_enabled(&(n->parent_obj)))) {
|
|
|
|
NVME_GUEST_ERR(nvme_ub_mmiowr_intmask_with_msix,
|
|
|
|
"undefined access to interrupt mask clr"
|
|
|
|
" when MSI-X is enabled");
|
|
|
|
/* should be ignored, fall through for now */
|
|
|
|
}
|
2013-06-04 19:17:10 +04:00
|
|
|
n->bar.intms &= ~(data & 0xffffffff);
|
|
|
|
n->bar.intmc = n->bar.intms;
|
2017-11-03 16:37:53 +03:00
|
|
|
trace_nvme_mmio_intm_clr(data & 0xffffffff,
|
|
|
|
n->bar.intmc);
|
2017-12-18 08:00:43 +03:00
|
|
|
nvme_irq_check(n);
|
2013-06-04 19:17:10 +04:00
|
|
|
break;
|
2017-11-03 16:37:53 +03:00
|
|
|
case 0x14: /* CC */
|
|
|
|
trace_nvme_mmio_cfg(data & 0xffffffff);
|
2015-04-24 21:55:42 +03:00
|
|
|
/* Windows first sends data, then sends enable bit */
|
|
|
|
if (!NVME_CC_EN(data) && !NVME_CC_EN(n->bar.cc) &&
|
|
|
|
!NVME_CC_SHN(data) && !NVME_CC_SHN(n->bar.cc))
|
|
|
|
{
|
|
|
|
n->bar.cc = data;
|
|
|
|
}
|
|
|
|
|
2013-06-04 19:17:10 +04:00
|
|
|
if (NVME_CC_EN(data) && !NVME_CC_EN(n->bar.cc)) {
|
|
|
|
n->bar.cc = data;
|
2017-11-03 16:37:53 +03:00
|
|
|
if (unlikely(nvme_start_ctrl(n))) {
|
|
|
|
trace_nvme_err_startfail();
|
2013-06-04 19:17:10 +04:00
|
|
|
n->bar.csts = NVME_CSTS_FAILED;
|
|
|
|
} else {
|
2017-11-03 16:37:53 +03:00
|
|
|
trace_nvme_mmio_start_success();
|
2013-06-04 19:17:10 +04:00
|
|
|
n->bar.csts = NVME_CSTS_READY;
|
|
|
|
}
|
|
|
|
} else if (!NVME_CC_EN(data) && NVME_CC_EN(n->bar.cc)) {
|
2017-11-03 16:37:53 +03:00
|
|
|
trace_nvme_mmio_stopped();
|
2013-06-04 19:17:10 +04:00
|
|
|
nvme_clear_ctrl(n);
|
|
|
|
n->bar.csts &= ~NVME_CSTS_READY;
|
|
|
|
}
|
|
|
|
if (NVME_CC_SHN(data) && !(NVME_CC_SHN(n->bar.cc))) {
|
2017-11-03 16:37:53 +03:00
|
|
|
trace_nvme_mmio_shutdown_set();
|
|
|
|
nvme_clear_ctrl(n);
|
|
|
|
n->bar.cc = data;
|
|
|
|
n->bar.csts |= NVME_CSTS_SHST_COMPLETE;
|
2013-06-04 19:17:10 +04:00
|
|
|
} else if (!NVME_CC_SHN(data) && NVME_CC_SHN(n->bar.cc)) {
|
2017-11-03 16:37:53 +03:00
|
|
|
trace_nvme_mmio_shutdown_cleared();
|
|
|
|
n->bar.csts &= ~NVME_CSTS_SHST_COMPLETE;
|
|
|
|
n->bar.cc = data;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case 0x1C: /* CSTS */
|
|
|
|
if (data & (1 << 4)) {
|
|
|
|
NVME_GUEST_ERR(nvme_ub_mmiowr_ssreset_w1c_unsupported,
|
|
|
|
"attempted to W1C CSTS.NSSRO"
|
|
|
|
" but CAP.NSSRS is zero (not supported)");
|
|
|
|
} else if (data != 0) {
|
|
|
|
NVME_GUEST_ERR(nvme_ub_mmiowr_ro_csts,
|
|
|
|
"attempted to set a read only bit"
|
|
|
|
" of controller status");
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case 0x20: /* NSSR */
|
|
|
|
if (data == 0x4E564D65) {
|
|
|
|
trace_nvme_ub_mmiowr_ssreset_unsupported();
|
|
|
|
} else {
|
|
|
|
/* The spec says that writes of other values have no effect */
|
|
|
|
return;
|
2013-06-04 19:17:10 +04:00
|
|
|
}
|
|
|
|
break;
|
2017-11-03 16:37:53 +03:00
|
|
|
case 0x24: /* AQA */
|
2013-06-04 19:17:10 +04:00
|
|
|
n->bar.aqa = data & 0xffffffff;
|
2017-11-03 16:37:53 +03:00
|
|
|
trace_nvme_mmio_aqattr(data & 0xffffffff);
|
2013-06-04 19:17:10 +04:00
|
|
|
break;
|
2017-11-03 16:37:53 +03:00
|
|
|
case 0x28: /* ASQ */
|
2013-06-04 19:17:10 +04:00
|
|
|
n->bar.asq = data;
|
2017-11-03 16:37:53 +03:00
|
|
|
trace_nvme_mmio_asqaddr(data);
|
2013-06-04 19:17:10 +04:00
|
|
|
break;
|
2017-11-03 16:37:53 +03:00
|
|
|
case 0x2c: /* ASQ hi */
|
2013-06-04 19:17:10 +04:00
|
|
|
n->bar.asq |= data << 32;
|
2017-11-03 16:37:53 +03:00
|
|
|
trace_nvme_mmio_asqaddr_hi(data, n->bar.asq);
|
2013-06-04 19:17:10 +04:00
|
|
|
break;
|
2017-11-03 16:37:53 +03:00
|
|
|
case 0x30: /* ACQ */
|
|
|
|
trace_nvme_mmio_acqaddr(data);
|
2013-06-04 19:17:10 +04:00
|
|
|
n->bar.acq = data;
|
|
|
|
break;
|
2017-11-03 16:37:53 +03:00
|
|
|
case 0x34: /* ACQ hi */
|
2013-06-04 19:17:10 +04:00
|
|
|
n->bar.acq |= data << 32;
|
2017-11-03 16:37:53 +03:00
|
|
|
trace_nvme_mmio_acqaddr_hi(data, n->bar.acq);
|
2013-06-04 19:17:10 +04:00
|
|
|
break;
|
2017-11-03 16:37:53 +03:00
|
|
|
case 0x38: /* CMBLOC */
|
|
|
|
NVME_GUEST_ERR(nvme_ub_mmiowr_cmbloc_reserved,
|
|
|
|
"invalid write to reserved CMBLOC"
|
|
|
|
" when CMBSZ is zero, ignored");
|
|
|
|
return;
|
|
|
|
case 0x3C: /* CMBSZ */
|
|
|
|
NVME_GUEST_ERR(nvme_ub_mmiowr_cmbsz_readonly,
|
|
|
|
"invalid write to read only CMBSZ, ignored");
|
|
|
|
return;
|
2013-06-04 19:17:10 +04:00
|
|
|
default:
|
2017-11-03 16:37:53 +03:00
|
|
|
NVME_GUEST_ERR(nvme_ub_mmiowr_invalid,
|
|
|
|
"invalid MMIO write,"
|
|
|
|
" offset=0x%"PRIx64", data=%"PRIx64"",
|
|
|
|
offset, data);
|
2013-06-04 19:17:10 +04:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static uint64_t nvme_mmio_read(void *opaque, hwaddr addr, unsigned size)
|
|
|
|
{
|
|
|
|
NvmeCtrl *n = (NvmeCtrl *)opaque;
|
|
|
|
uint8_t *ptr = (uint8_t *)&n->bar;
|
|
|
|
uint64_t val = 0;
|
|
|
|
|
2017-11-03 16:37:53 +03:00
|
|
|
if (unlikely(addr & (sizeof(uint32_t) - 1))) {
|
|
|
|
NVME_GUEST_ERR(nvme_ub_mmiord_misaligned32,
|
|
|
|
"MMIO read not 32-bit aligned,"
|
|
|
|
" offset=0x%"PRIx64"", addr);
|
|
|
|
/* should RAZ, fall through for now */
|
|
|
|
} else if (unlikely(size < sizeof(uint32_t))) {
|
|
|
|
NVME_GUEST_ERR(nvme_ub_mmiord_toosmall,
|
|
|
|
"MMIO read smaller than 32-bits,"
|
|
|
|
" offset=0x%"PRIx64"", addr);
|
|
|
|
/* should RAZ, fall through for now */
|
|
|
|
}
|
|
|
|
|
2013-06-04 19:17:10 +04:00
|
|
|
if (addr < sizeof(n->bar)) {
|
|
|
|
memcpy(&val, ptr + addr, size);
|
2017-11-03 16:37:53 +03:00
|
|
|
} else {
|
|
|
|
NVME_GUEST_ERR(nvme_ub_mmiord_invalid_ofs,
|
|
|
|
"MMIO read beyond last register,"
|
|
|
|
" offset=0x%"PRIx64", returning 0", addr);
|
2013-06-04 19:17:10 +04:00
|
|
|
}
|
2017-11-03 16:37:53 +03:00
|
|
|
|
2013-06-04 19:17:10 +04:00
|
|
|
return val;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val)
|
|
|
|
{
|
|
|
|
uint32_t qid;
|
|
|
|
|
2017-11-03 16:37:53 +03:00
|
|
|
if (unlikely(addr & ((1 << 2) - 1))) {
|
|
|
|
NVME_GUEST_ERR(nvme_ub_db_wr_misaligned,
|
|
|
|
"doorbell write not 32-bit aligned,"
|
|
|
|
" offset=0x%"PRIx64", ignoring", addr);
|
2013-06-04 19:17:10 +04:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (((addr - 0x1000) >> 2) & 1) {
|
2017-11-03 16:37:53 +03:00
|
|
|
/* Completion queue doorbell write */
|
|
|
|
|
2013-06-04 19:17:10 +04:00
|
|
|
uint16_t new_head = val & 0xffff;
|
|
|
|
int start_sqs;
|
|
|
|
NvmeCQueue *cq;
|
|
|
|
|
|
|
|
qid = (addr - (0x1000 + (1 << 2))) >> 3;
|
2017-11-03 16:37:53 +03:00
|
|
|
if (unlikely(nvme_check_cqid(n, qid))) {
|
|
|
|
NVME_GUEST_ERR(nvme_ub_db_wr_invalid_cq,
|
|
|
|
"completion queue doorbell write"
|
|
|
|
" for nonexistent queue,"
|
|
|
|
" sqid=%"PRIu32", ignoring", qid);
|
2013-06-04 19:17:10 +04:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
cq = n->cq[qid];
|
2017-11-03 16:37:53 +03:00
|
|
|
if (unlikely(new_head >= cq->size)) {
|
|
|
|
NVME_GUEST_ERR(nvme_ub_db_wr_invalid_cqhead,
|
|
|
|
"completion queue doorbell write value"
|
|
|
|
" beyond queue size, sqid=%"PRIu32","
|
|
|
|
" new_head=%"PRIu16", ignoring",
|
|
|
|
qid, new_head);
|
2013-06-04 19:17:10 +04:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
start_sqs = nvme_cq_full(cq) ? 1 : 0;
|
|
|
|
cq->head = new_head;
|
|
|
|
if (start_sqs) {
|
|
|
|
NvmeSQueue *sq;
|
|
|
|
QTAILQ_FOREACH(sq, &cq->sq_list, entry) {
|
2013-08-21 19:03:08 +04:00
|
|
|
timer_mod(sq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500);
|
2013-06-04 19:17:10 +04:00
|
|
|
}
|
2013-08-21 19:03:08 +04:00
|
|
|
timer_mod(cq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500);
|
2013-06-04 19:17:10 +04:00
|
|
|
}
|
|
|
|
|
2017-12-18 08:00:43 +03:00
|
|
|
if (cq->tail == cq->head) {
|
|
|
|
nvme_irq_deassert(n, cq);
|
2013-06-04 19:17:10 +04:00
|
|
|
}
|
|
|
|
} else {
|
2017-11-03 16:37:53 +03:00
|
|
|
/* Submission queue doorbell write */
|
|
|
|
|
2013-06-04 19:17:10 +04:00
|
|
|
uint16_t new_tail = val & 0xffff;
|
|
|
|
NvmeSQueue *sq;
|
|
|
|
|
|
|
|
qid = (addr - 0x1000) >> 3;
|
2017-11-03 16:37:53 +03:00
|
|
|
if (unlikely(nvme_check_sqid(n, qid))) {
|
|
|
|
NVME_GUEST_ERR(nvme_ub_db_wr_invalid_sq,
|
|
|
|
"submission queue doorbell write"
|
|
|
|
" for nonexistent queue,"
|
|
|
|
" sqid=%"PRIu32", ignoring", qid);
|
2013-06-04 19:17:10 +04:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
sq = n->sq[qid];
|
2017-11-03 16:37:53 +03:00
|
|
|
if (unlikely(new_tail >= sq->size)) {
|
|
|
|
NVME_GUEST_ERR(nvme_ub_db_wr_invalid_sqtail,
|
|
|
|
"submission queue doorbell write value"
|
|
|
|
" beyond queue size, sqid=%"PRIu32","
|
|
|
|
" new_tail=%"PRIu16", ignoring",
|
|
|
|
qid, new_tail);
|
2013-06-04 19:17:10 +04:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
sq->tail = new_tail;
|
2013-08-21 19:03:08 +04:00
|
|
|
timer_mod(sq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500);
|
2013-06-04 19:17:10 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void nvme_mmio_write(void *opaque, hwaddr addr, uint64_t data,
|
|
|
|
unsigned size)
|
|
|
|
{
|
|
|
|
NvmeCtrl *n = (NvmeCtrl *)opaque;
|
|
|
|
if (addr < sizeof(n->bar)) {
|
|
|
|
nvme_write_bar(n, addr, data, size);
|
|
|
|
} else if (addr >= 0x1000) {
|
|
|
|
nvme_process_db(n, addr, data);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static const MemoryRegionOps nvme_mmio_ops = {
|
|
|
|
.read = nvme_mmio_read,
|
|
|
|
.write = nvme_mmio_write,
|
|
|
|
.endianness = DEVICE_LITTLE_ENDIAN,
|
|
|
|
.impl = {
|
|
|
|
.min_access_size = 2,
|
|
|
|
.max_access_size = 8,
|
|
|
|
},
|
|
|
|
};
|
|
|
|
|
2017-05-16 22:10:59 +03:00
|
|
|
static void nvme_cmb_write(void *opaque, hwaddr addr, uint64_t data,
|
|
|
|
unsigned size)
|
|
|
|
{
|
|
|
|
NvmeCtrl *n = (NvmeCtrl *)opaque;
|
2018-11-22 21:23:35 +03:00
|
|
|
stn_le_p(&n->cmbuf[addr], size, data);
|
2017-05-16 22:10:59 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
static uint64_t nvme_cmb_read(void *opaque, hwaddr addr, unsigned size)
|
|
|
|
{
|
|
|
|
NvmeCtrl *n = (NvmeCtrl *)opaque;
|
2018-11-22 21:23:35 +03:00
|
|
|
return ldn_le_p(&n->cmbuf[addr], size);
|
2017-05-16 22:10:59 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
static const MemoryRegionOps nvme_cmb_ops = {
|
|
|
|
.read = nvme_cmb_read,
|
|
|
|
.write = nvme_cmb_write,
|
|
|
|
.endianness = DEVICE_LITTLE_ENDIAN,
|
|
|
|
.impl = {
|
2018-11-20 21:41:48 +03:00
|
|
|
.min_access_size = 1,
|
2017-05-16 22:10:59 +03:00
|
|
|
.max_access_size = 8,
|
|
|
|
},
|
|
|
|
};
|
|
|
|
|
2017-11-22 06:08:43 +03:00
|
|
|
static void nvme_realize(PCIDevice *pci_dev, Error **errp)
|
2013-06-04 19:17:10 +04:00
|
|
|
{
|
|
|
|
NvmeCtrl *n = NVME(pci_dev);
|
|
|
|
NvmeIdCtrl *id = &n->id_ctrl;
|
|
|
|
|
|
|
|
int i;
|
|
|
|
int64_t bs_size;
|
|
|
|
uint8_t *pci_conf;
|
|
|
|
|
nvme: ensure the num_queues is not zero
When it is zero, it causes segv.
Using following command:
"-drive file=//home/test/test1.img,if=none,id=id0
-device nvme,drive=id0,serial=test,num_queues=0"
causes following Backtrack:
Thread 4 "qemu-system-x86" received signal SIGSEGV, Segmentation fault.
[Switching to Thread 0x7fffe9735700 (LWP 30952)]
0x0000555555a7a77c in nvme_start_ctrl (n=0x5555577473f0) at hw/block/nvme.c:825
825 if (unlikely(n->cq[0])) {
(gdb) bt
0 0x0000555555a7a77c in nvme_start_ctrl (n=0x5555577473f0)
at hw/block/nvme.c:825
1 0x0000555555a7af7f in nvme_write_bar (n=0x5555577473f0, offset=20,
data=4587521, size=4) at hw/block/nvme.c:969
2 0x0000555555a7b81a in nvme_mmio_write (opaque=0x5555577473f0, addr=20,
data=4587521, size=4) at hw/block/nvme.c:1163
3 0x0000555555869236 in memory_region_write_accessor (mr=0x555557747cd0,
addr=20, value=0x7fffe97320f8, size=4, shift=0, mask=4294967295, attrs=...)
at /home/test/qemu1/qemu/memory.c:502
4 0x0000555555869446 in access_with_adjusted_size (addr=20,
value=0x7fffe97320f8, size=4, access_size_min=2, access_size_max=8,
access_fn=0x55555586914d <memory_region_write_accessor>,
mr=0x555557747cd0, attrs=...) at /home/test/qemu1/qemu/memory.c:568
5 0x000055555586c479 in memory_region_dispatch_write (mr=0x555557747cd0,
addr=20, data=4587521, size=4, attrs=...)
at /home/test/qemu1/qemu/memory.c:1499
6 0x00005555558030af in flatview_write_continue (fv=0x7fffe0061130,
addr=4273930260, attrs=..., buf=0x7ffff7ff0028 "\001", len=4, addr1=20,
l=4, mr=0x555557747cd0) at /home/test/qemu1/qemu/exec.c:3234
7 0x00005555558031f9 in flatview_write (fv=0x7fffe0061130, addr=4273930260,
attrs=..., buf=0x7ffff7ff0028 "\001", len=4)
at /home/test/qemu1/qemu/exec.c:3273
8 0x00005555558034ff in address_space_write (
---Type <return> to continue, or q <return> to quit---
as=0x555556758480 <address_space_memory>, addr=4273930260, attrs=...,
buf=0x7ffff7ff0028 "\001", len=4) at /home/test/qemu1/qemu/exec.c:3363
9 0x0000555555803550 in address_space_rw (
as=0x555556758480 <address_space_memory>, addr=4273930260, attrs=...,
buf=0x7ffff7ff0028 "\001", len=4, is_write=true)
at /home/test/qemu1/qemu/exec.c:3374
10 0x00005555558884a1 in kvm_cpu_exec (cpu=0x555556920e40)
at /home/test/qemu1/qemu/accel/kvm/kvm-all.c:2031
11 0x000055555584cd9d in qemu_kvm_cpu_thread_fn (arg=0x555556920e40)
at /home/test/qemu1/qemu/cpus.c:1281
12 0x0000555555dbaf6d in qemu_thread_start (args=0x5555569438a0)
at util/qemu-thread-posix.c:502
13 0x00007ffff5dc86db in start_thread (arg=0x7fffe9735700)
at pthread_create.c:463
14 0x00007ffff5af188f in clone ()
at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
Signed-off-by: Li Qiang <liq3ea@163.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Message-id: 20190120055558.32984-3-liq3ea@163.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2019-01-20 08:55:57 +03:00
|
|
|
if (!n->num_queues) {
|
|
|
|
error_setg(errp, "num_queues can't be zero");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2014-10-07 15:59:18 +04:00
|
|
|
if (!n->conf.blk) {
|
2017-11-22 06:08:43 +03:00
|
|
|
error_setg(errp, "drive property not set");
|
|
|
|
return;
|
2013-06-04 19:17:10 +04:00
|
|
|
}
|
|
|
|
|
2014-10-07 15:59:18 +04:00
|
|
|
bs_size = blk_getlength(n->conf.blk);
|
2014-02-22 01:18:31 +04:00
|
|
|
if (bs_size < 0) {
|
2017-11-22 06:08:43 +03:00
|
|
|
error_setg(errp, "could not get backing file size");
|
|
|
|
return;
|
2013-06-04 19:17:10 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
if (!n->serial) {
|
2017-11-22 06:08:43 +03:00
|
|
|
error_setg(errp, "serial property not set");
|
|
|
|
return;
|
2013-06-04 19:17:10 +04:00
|
|
|
}
|
2015-02-16 14:47:58 +03:00
|
|
|
blkconf_blocksizes(&n->conf);
|
2017-11-22 06:08:45 +03:00
|
|
|
if (!blkconf_apply_backend_options(&n->conf, blk_is_read_only(n->conf.blk),
|
|
|
|
false, errp)) {
|
2017-11-22 06:08:43 +03:00
|
|
|
return;
|
2017-01-24 15:43:31 +03:00
|
|
|
}
|
2013-06-04 19:17:10 +04:00
|
|
|
|
|
|
|
pci_conf = pci_dev->config;
|
|
|
|
pci_conf[PCI_INTERRUPT_PIN] = 1;
|
|
|
|
pci_config_set_prog_interface(pci_dev->config, 0x2);
|
|
|
|
pci_config_set_class(pci_dev->config, PCI_CLASS_STORAGE_EXPRESS);
|
2019-01-20 08:55:58 +03:00
|
|
|
pcie_endpoint_cap_init(pci_dev, 0x80);
|
2013-06-04 19:17:10 +04:00
|
|
|
|
|
|
|
n->num_namespaces = 1;
|
2015-07-24 15:33:09 +03:00
|
|
|
n->reg_size = pow2ceil(0x1004 + 2 * (n->num_queues + 1) * 4);
|
2013-06-04 19:17:10 +04:00
|
|
|
n->ns_size = bs_size / (uint64_t)n->num_namespaces;
|
|
|
|
|
2014-08-19 12:31:09 +04:00
|
|
|
n->namespaces = g_new0(NvmeNamespace, n->num_namespaces);
|
|
|
|
n->sq = g_new0(NvmeSQueue *, n->num_queues);
|
|
|
|
n->cq = g_new0(NvmeCQueue *, n->num_queues);
|
2013-06-04 19:17:10 +04:00
|
|
|
|
2013-06-07 05:25:08 +04:00
|
|
|
memory_region_init_io(&n->iomem, OBJECT(n), &nvme_mmio_ops, n,
|
|
|
|
"nvme", n->reg_size);
|
2019-01-20 08:55:58 +03:00
|
|
|
pci_register_bar(pci_dev, 0,
|
2013-06-04 19:17:10 +04:00
|
|
|
PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64,
|
|
|
|
&n->iomem);
|
2019-01-20 08:55:58 +03:00
|
|
|
msix_init_exclusive_bar(pci_dev, n->num_queues, 4, NULL);
|
2013-06-04 19:17:10 +04:00
|
|
|
|
|
|
|
id->vid = cpu_to_le16(pci_get_word(pci_conf + PCI_VENDOR_ID));
|
|
|
|
id->ssvid = cpu_to_le16(pci_get_word(pci_conf + PCI_SUBSYSTEM_VENDOR_ID));
|
|
|
|
strpadcpy((char *)id->mn, sizeof(id->mn), "QEMU NVMe Ctrl", ' ');
|
|
|
|
strpadcpy((char *)id->fr, sizeof(id->fr), "1.0", ' ');
|
|
|
|
strpadcpy((char *)id->sn, sizeof(id->sn), n->serial, ' ');
|
|
|
|
id->rab = 6;
|
|
|
|
id->ieee[0] = 0x00;
|
|
|
|
id->ieee[1] = 0x02;
|
|
|
|
id->ieee[2] = 0xb3;
|
|
|
|
id->oacs = cpu_to_le16(0);
|
|
|
|
id->frmw = 7 << 1;
|
|
|
|
id->lpa = 1 << 0;
|
|
|
|
id->sqes = (0x6 << 4) | 0x6;
|
|
|
|
id->cqes = (0x4 << 4) | 0x4;
|
|
|
|
id->nn = cpu_to_le32(n->num_namespaces);
|
2017-05-05 12:58:07 +03:00
|
|
|
id->oncs = cpu_to_le16(NVME_ONCS_WRITE_ZEROS);
|
2013-06-04 19:17:10 +04:00
|
|
|
id->psd[0].mp = cpu_to_le16(0x9c4);
|
|
|
|
id->psd[0].enlat = cpu_to_le32(0x10);
|
|
|
|
id->psd[0].exlat = cpu_to_le32(0x4);
|
2015-06-11 13:01:39 +03:00
|
|
|
if (blk_enable_write_cache(n->conf.blk)) {
|
|
|
|
id->vwc = 1;
|
|
|
|
}
|
2013-06-04 19:17:10 +04:00
|
|
|
|
|
|
|
n->bar.cap = 0;
|
|
|
|
NVME_CAP_SET_MQES(n->bar.cap, 0x7ff);
|
|
|
|
NVME_CAP_SET_CQR(n->bar.cap, 1);
|
|
|
|
NVME_CAP_SET_AMS(n->bar.cap, 1);
|
|
|
|
NVME_CAP_SET_TO(n->bar.cap, 0xf);
|
|
|
|
NVME_CAP_SET_CSS(n->bar.cap, 1);
|
2014-11-27 06:39:21 +03:00
|
|
|
NVME_CAP_SET_MPSMAX(n->bar.cap, 4);
|
2013-06-04 19:17:10 +04:00
|
|
|
|
2017-05-16 22:10:59 +03:00
|
|
|
n->bar.vs = 0x00010200;
|
2013-06-04 19:17:10 +04:00
|
|
|
n->bar.intmc = n->bar.intms = 0;
|
|
|
|
|
2017-05-16 22:10:59 +03:00
|
|
|
if (n->cmb_size_mb) {
|
|
|
|
|
|
|
|
NVME_CMBLOC_SET_BIR(n->bar.cmbloc, 2);
|
|
|
|
NVME_CMBLOC_SET_OFST(n->bar.cmbloc, 0);
|
|
|
|
|
|
|
|
NVME_CMBSZ_SET_SQS(n->bar.cmbsz, 1);
|
|
|
|
NVME_CMBSZ_SET_CQS(n->bar.cmbsz, 0);
|
|
|
|
NVME_CMBSZ_SET_LISTS(n->bar.cmbsz, 0);
|
2017-06-13 13:08:35 +03:00
|
|
|
NVME_CMBSZ_SET_RDS(n->bar.cmbsz, 1);
|
|
|
|
NVME_CMBSZ_SET_WDS(n->bar.cmbsz, 1);
|
2017-05-16 22:10:59 +03:00
|
|
|
NVME_CMBSZ_SET_SZU(n->bar.cmbsz, 2); /* MBs */
|
|
|
|
NVME_CMBSZ_SET_SZ(n->bar.cmbsz, n->cmb_size_mb);
|
|
|
|
|
2017-06-13 13:08:35 +03:00
|
|
|
n->cmbloc = n->bar.cmbloc;
|
|
|
|
n->cmbsz = n->bar.cmbsz;
|
|
|
|
|
2017-05-16 22:10:59 +03:00
|
|
|
n->cmbuf = g_malloc0(NVME_CMBSZ_GETSIZE(n->bar.cmbsz));
|
|
|
|
memory_region_init_io(&n->ctrl_mem, OBJECT(n), &nvme_cmb_ops, n,
|
|
|
|
"nvme-cmb", NVME_CMBSZ_GETSIZE(n->bar.cmbsz));
|
2019-01-20 08:55:58 +03:00
|
|
|
pci_register_bar(pci_dev, NVME_CMBLOC_BIR(n->bar.cmbloc),
|
2017-05-16 22:10:59 +03:00
|
|
|
PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64 |
|
|
|
|
PCI_BASE_ADDRESS_MEM_PREFETCH, &n->ctrl_mem);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
2013-06-04 19:17:10 +04:00
|
|
|
for (i = 0; i < n->num_namespaces; i++) {
|
|
|
|
NvmeNamespace *ns = &n->namespaces[i];
|
|
|
|
NvmeIdNs *id_ns = &ns->id_ns;
|
|
|
|
id_ns->nsfeat = 0;
|
|
|
|
id_ns->nlbaf = 0;
|
|
|
|
id_ns->flbas = 0;
|
|
|
|
id_ns->mc = 0;
|
|
|
|
id_ns->dpc = 0;
|
|
|
|
id_ns->dps = 0;
|
|
|
|
id_ns->lbaf[0].ds = BDRV_SECTOR_BITS;
|
|
|
|
id_ns->ncap = id_ns->nuse = id_ns->nsze =
|
|
|
|
cpu_to_le64(n->ns_size >>
|
|
|
|
id_ns->lbaf[NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas)].ds);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void nvme_exit(PCIDevice *pci_dev)
|
|
|
|
{
|
|
|
|
NvmeCtrl *n = NVME(pci_dev);
|
|
|
|
|
|
|
|
nvme_clear_ctrl(n);
|
|
|
|
g_free(n->namespaces);
|
|
|
|
g_free(n->cq);
|
|
|
|
g_free(n->sq);
|
2017-05-16 22:10:59 +03:00
|
|
|
|
2018-10-29 09:29:41 +03:00
|
|
|
if (n->cmb_size_mb) {
|
|
|
|
g_free(n->cmbuf);
|
|
|
|
}
|
2013-06-04 19:17:10 +04:00
|
|
|
msix_uninit_exclusive_bar(pci_dev);
|
|
|
|
}
|
|
|
|
|
|
|
|
static Property nvme_props[] = {
|
|
|
|
DEFINE_BLOCK_PROPERTIES(NvmeCtrl, conf),
|
|
|
|
DEFINE_PROP_STRING("serial", NvmeCtrl, serial),
|
2017-05-16 22:10:59 +03:00
|
|
|
DEFINE_PROP_UINT32("cmb_size_mb", NvmeCtrl, cmb_size_mb, 0),
|
2018-06-26 04:44:56 +03:00
|
|
|
DEFINE_PROP_UINT32("num_queues", NvmeCtrl, num_queues, 64),
|
2013-06-04 19:17:10 +04:00
|
|
|
DEFINE_PROP_END_OF_LIST(),
|
|
|
|
};
|
|
|
|
|
|
|
|
static const VMStateDescription nvme_vmstate = {
|
|
|
|
.name = "nvme",
|
|
|
|
.unmigratable = 1,
|
|
|
|
};
|
|
|
|
|
|
|
|
static void nvme_class_init(ObjectClass *oc, void *data)
|
|
|
|
{
|
|
|
|
DeviceClass *dc = DEVICE_CLASS(oc);
|
|
|
|
PCIDeviceClass *pc = PCI_DEVICE_CLASS(oc);
|
|
|
|
|
2017-11-22 06:08:43 +03:00
|
|
|
pc->realize = nvme_realize;
|
2013-06-04 19:17:10 +04:00
|
|
|
pc->exit = nvme_exit;
|
|
|
|
pc->class_id = PCI_CLASS_STORAGE_EXPRESS;
|
|
|
|
pc->vendor_id = PCI_VENDOR_ID_INTEL;
|
|
|
|
pc->device_id = 0x5845;
|
2016-08-04 22:42:15 +03:00
|
|
|
pc->revision = 2;
|
2013-06-04 19:17:10 +04:00
|
|
|
|
2013-07-29 18:17:45 +04:00
|
|
|
set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
|
2013-06-04 19:17:10 +04:00
|
|
|
dc->desc = "Non-Volatile Memory Express";
|
|
|
|
dc->props = nvme_props;
|
|
|
|
dc->vmsd = &nvme_vmstate;
|
|
|
|
}
|
|
|
|
|
nvme: generate OpenFirmware device path in the "bootorder" fw_cfg file
Background on QEMU boot indices
-------------------------------
Normally, the "bootindex" property is configured for bootable devices
with:
DEVICE_instance_init()
device_add_bootindex_property(..., "bootindex", ...)
object_property_add(..., device_get_bootindex,
device_set_bootindex, ...)
and when the bootindex is set on the QEMU command line, with
-device DEVICE,...,bootindex=N
the setter that was configured above is invoked:
device_set_bootindex()
/* parse boot index */
visit_type_int32()
/* verify unicity */
check_boot_index()
/* store parsed boot index */
...
/* insert device path to boot order */
add_boot_device_path()
In the last step, add_boot_device_path() ensures that an OpenFirmware
device path will show up in the "bootorder" fw_cfg file, at a position
corresponding to the device's boot index. Thus guest firmware (SeaBIOS and
OVMF) can try to boot off the device with the right priority.
NVMe boot index
---------------
In QEMU commit 33739c712982,
nvma: ide: add bootindex to qom property
the following generic setters / getters:
- device_set_bootindex()
- device_get_bootindex()
were open-coded for NVMe, under the names
- nvme_set_bootindex()
- nvme_get_bootindex()
Plus nvme_instance_init() was added to configure the "bootindex" property
manually, designating the open-coded getter & setter, rather than calling
device_add_bootindex_property().
Crucially, nvme_set_bootindex() avoided the final add_boot_device_path()
call. This fact is spelled out in the message of commit 33739c712982, and
it was presumably the entire reason for all of the code duplication.
Now, Vladislav filed an RFE for OVMF
<https://github.com/tianocore/edk2/issues/48>; OVMF should boot off NVMe
devices. It is simple to build edk2's existent NvmExpressDxe driver into
OVMF, but the boot order matching logic in OVMF can only handle NVMe if
the "bootorder" fw_cfg file includes such devices.
Therefore this patch converts the NVMe device model to
device_set_bootindex() all the way.
Device paths
------------
device_set_bootindex() accepts an optional parameter called "suffix". When
present, it is expected to take the form of an OpenFirmware device path
node, and it gets appended as last node to the otherwise auto-generated
OFW path.
For NVMe, the auto-generated part is
/pci@i0cf8/pci8086,5845@6[,1]
^ ^ ^ ^
| | PCI slot and (present when nonzero)
| | function of the NVMe controller, both hex
| "driver name" component, built from PCI vendor & device IDs
PCI root at system bus port, PIO
to which here we append the suffix
/namespace@1,0
^ ^
| big endian (MSB at lowest address) numeric interpretation
| of the 64-bit IEEE Extended Unique Identifier, aka EUI-64,
| hex
32-bit NVMe namespace identifier, aka NSID, hex
resulting in the OFW device path
/pci@i0cf8/pci8086,5845@6[,1]/namespace@1,0
The reason for including the NSID and the EUI-64 is that an NVMe device
can in theory produce several different namespaces (distinguished by
NSID). Additionally, each of those may (optionally) have an EUI-64 value.
For now, QEMU only provides namespace 1.
Furthermore, QEMU doesn't even represent the EUI-64 as a standalone field;
it is embedded (and left unused) inside the "NvmeIdNs.res30" array, at the
last eight bytes. (Which is fine, since EUI-64 can be left zero-filled if
unsupported by the device.)
Based on the above, we set the "unit address" part of the last
("namespace") node to fixed "1,0".
OVMF will then map the above OFW device path to the following UEFI device
path fragment, for boot order processing:
PciRoot(0x0)/Pci(0x6,0x1)/NVMe(0x1,00-00-00-00-00-00-00-00)
^ ^ ^ ^ ^ ^
| | | | | octets of the EUI-64 in address order
| | | | NSID
| | | NVMe namespace messaging device path node
| PCI slot and function
PCI root bridge
Cc: Keith Busch <keith.busch@intel.com> (supporter:nvme)
Cc: Kevin Wolf <kwolf@redhat.com> (supporter:Block layer core)
Cc: qemu-block@nongnu.org (open list:nvme)
Cc: Gonglei <arei.gonglei@huawei.com>
Cc: Vladislav Vovchenko <vladislav.vovchenko@sk.com>
Cc: Feng Tian <feng.tian@intel.com>
Cc: Gerd Hoffmann <kraxel@redhat.com>
Cc: Kevin O'Connor <kevin@koconnor.net>
Signed-off-by: Laszlo Ersek <lersek@redhat.com>
Acked-by: Gonglei <arei.gonglei@huawei.com>
Acked-by: Keith Busch <keith.busch@intel.com>
Tested-by: Vladislav Vovchenko <vladislav.vovchenko@sk.com>
Message-id: 1453850483-27511-1-git-send-email-lersek@redhat.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2016-01-27 02:21:23 +03:00
|
|
|
static void nvme_instance_init(Object *obj)
|
2014-10-07 12:00:34 +04:00
|
|
|
{
|
|
|
|
NvmeCtrl *s = NVME(obj);
|
|
|
|
|
nvme: generate OpenFirmware device path in the "bootorder" fw_cfg file
Background on QEMU boot indices
-------------------------------
Normally, the "bootindex" property is configured for bootable devices
with:
DEVICE_instance_init()
device_add_bootindex_property(..., "bootindex", ...)
object_property_add(..., device_get_bootindex,
device_set_bootindex, ...)
and when the bootindex is set on the QEMU command line, with
-device DEVICE,...,bootindex=N
the setter that was configured above is invoked:
device_set_bootindex()
/* parse boot index */
visit_type_int32()
/* verify unicity */
check_boot_index()
/* store parsed boot index */
...
/* insert device path to boot order */
add_boot_device_path()
In the last step, add_boot_device_path() ensures that an OpenFirmware
device path will show up in the "bootorder" fw_cfg file, at a position
corresponding to the device's boot index. Thus guest firmware (SeaBIOS and
OVMF) can try to boot off the device with the right priority.
NVMe boot index
---------------
In QEMU commit 33739c712982,
nvma: ide: add bootindex to qom property
the following generic setters / getters:
- device_set_bootindex()
- device_get_bootindex()
were open-coded for NVMe, under the names
- nvme_set_bootindex()
- nvme_get_bootindex()
Plus nvme_instance_init() was added to configure the "bootindex" property
manually, designating the open-coded getter & setter, rather than calling
device_add_bootindex_property().
Crucially, nvme_set_bootindex() avoided the final add_boot_device_path()
call. This fact is spelled out in the message of commit 33739c712982, and
it was presumably the entire reason for all of the code duplication.
Now, Vladislav filed an RFE for OVMF
<https://github.com/tianocore/edk2/issues/48>; OVMF should boot off NVMe
devices. It is simple to build edk2's existent NvmExpressDxe driver into
OVMF, but the boot order matching logic in OVMF can only handle NVMe if
the "bootorder" fw_cfg file includes such devices.
Therefore this patch converts the NVMe device model to
device_set_bootindex() all the way.
Device paths
------------
device_set_bootindex() accepts an optional parameter called "suffix". When
present, it is expected to take the form of an OpenFirmware device path
node, and it gets appended as last node to the otherwise auto-generated
OFW path.
For NVMe, the auto-generated part is
/pci@i0cf8/pci8086,5845@6[,1]
^ ^ ^ ^
| | PCI slot and (present when nonzero)
| | function of the NVMe controller, both hex
| "driver name" component, built from PCI vendor & device IDs
PCI root at system bus port, PIO
to which here we append the suffix
/namespace@1,0
^ ^
| big endian (MSB at lowest address) numeric interpretation
| of the 64-bit IEEE Extended Unique Identifier, aka EUI-64,
| hex
32-bit NVMe namespace identifier, aka NSID, hex
resulting in the OFW device path
/pci@i0cf8/pci8086,5845@6[,1]/namespace@1,0
The reason for including the NSID and the EUI-64 is that an NVMe device
can in theory produce several different namespaces (distinguished by
NSID). Additionally, each of those may (optionally) have an EUI-64 value.
For now, QEMU only provides namespace 1.
Furthermore, QEMU doesn't even represent the EUI-64 as a standalone field;
it is embedded (and left unused) inside the "NvmeIdNs.res30" array, at the
last eight bytes. (Which is fine, since EUI-64 can be left zero-filled if
unsupported by the device.)
Based on the above, we set the "unit address" part of the last
("namespace") node to fixed "1,0".
OVMF will then map the above OFW device path to the following UEFI device
path fragment, for boot order processing:
PciRoot(0x0)/Pci(0x6,0x1)/NVMe(0x1,00-00-00-00-00-00-00-00)
^ ^ ^ ^ ^ ^
| | | | | octets of the EUI-64 in address order
| | | | NSID
| | | NVMe namespace messaging device path node
| PCI slot and function
PCI root bridge
Cc: Keith Busch <keith.busch@intel.com> (supporter:nvme)
Cc: Kevin Wolf <kwolf@redhat.com> (supporter:Block layer core)
Cc: qemu-block@nongnu.org (open list:nvme)
Cc: Gonglei <arei.gonglei@huawei.com>
Cc: Vladislav Vovchenko <vladislav.vovchenko@sk.com>
Cc: Feng Tian <feng.tian@intel.com>
Cc: Gerd Hoffmann <kraxel@redhat.com>
Cc: Kevin O'Connor <kevin@koconnor.net>
Signed-off-by: Laszlo Ersek <lersek@redhat.com>
Acked-by: Gonglei <arei.gonglei@huawei.com>
Acked-by: Keith Busch <keith.busch@intel.com>
Tested-by: Vladislav Vovchenko <vladislav.vovchenko@sk.com>
Message-id: 1453850483-27511-1-git-send-email-lersek@redhat.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
2016-01-27 02:21:23 +03:00
|
|
|
device_add_bootindex_property(obj, &s->conf.bootindex,
|
|
|
|
"bootindex", "/namespace@1,0",
|
|
|
|
DEVICE(obj), &error_abort);
|
2014-10-07 12:00:34 +04:00
|
|
|
}
|
|
|
|
|
2013-06-04 19:17:10 +04:00
|
|
|
static const TypeInfo nvme_info = {
|
2019-01-20 08:55:56 +03:00
|
|
|
.name = TYPE_NVME,
|
2013-06-04 19:17:10 +04:00
|
|
|
.parent = TYPE_PCI_DEVICE,
|
|
|
|
.instance_size = sizeof(NvmeCtrl),
|
|
|
|
.class_init = nvme_class_init,
|
2014-10-07 12:00:34 +04:00
|
|
|
.instance_init = nvme_instance_init,
|
2017-09-27 22:56:33 +03:00
|
|
|
.interfaces = (InterfaceInfo[]) {
|
|
|
|
{ INTERFACE_PCIE_DEVICE },
|
|
|
|
{ }
|
|
|
|
},
|
2013-06-04 19:17:10 +04:00
|
|
|
};
|
|
|
|
|
|
|
|
static void nvme_register_types(void)
|
|
|
|
{
|
|
|
|
type_register_static(&nvme_info);
|
|
|
|
}
|
|
|
|
|
|
|
|
type_init(nvme_register_types)
|