emulated nvme docs and fixes for -rc3
- documentation - fixes -----BEGIN PGP SIGNATURE----- iQEzBAABCAAdFiEEUigzqnXi3OaiR2bATeGvMW1PDekFAmBz7wkACgkQTeGvMW1P DelIZgf9H3XReeMUBtXL7vftG4tfrdBMmJn1uyaqdlxiwJSuAbIgefwFgUaebkmO zKcu6NEUCoyDfpOqES3RNmEUFbyTau1+nhjVQvde6Ewt/tle+XyVJLp6G7VHQY1u DKxsT1bqP7Mu1QHxn3F03spdvwMXNXlGNyOLec6TA2yxOmWDSwr4GVapB2YhBrzH u6DHmbsh2nmOxX+UWrlJf+F0t61ZxogBevaXTkM5bYAPlPgeJdQBRKSHE6Z4Ta9B Xj46XC/wHXQsrTTAJ9eAiXfyA5nDYco5RtB8GtjElHPZ3CqJKkvQPISW+noCve/D ldVRXAO3CHxvg9WY3UzAQMvQ7Muaeg== =o+in -----END PGP SIGNATURE----- Merge remote-tracking branch 'remotes/nvme/tags/nvme-fixes-20210412-pull-request' into staging emulated nvme docs and fixes for -rc3 - documentation - fixes # gpg: Signature made Mon 12 Apr 2021 07:56:09 BST # gpg: using RSA key 522833AA75E2DCE6A24766C04DE1AF316D4F0DE9 # gpg: Good signature from "Klaus Jensen <its@irrelevant.dk>" [unknown] # gpg: aka "Klaus Jensen <k.jensen@samsung.com>" [unknown] # gpg: WARNING: This key is not certified with a trusted signature! # gpg: There is no indication that the signature belongs to the owner. # Primary key fingerprint: DDCA 4D9C 9EF9 31CC 3468 4272 63D5 6FC5 E55D A838 # Subkey fingerprint: 5228 33AA 75E2 DCE6 A247 66C0 4DE1 AF31 6D4F 0DE9 * remotes/nvme/tags/nvme-fixes-20210412-pull-request: hw/block/nvme: drain namespaces on sq deletion hw/block/nvme: store aiocb in compare hw/block/nvme: map prp fix if prp2 contains non-zero offset docs: add nvme emulation documentation Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
commit
f2afdc2ad9
@ -1974,7 +1974,7 @@ S: Supported
|
||||
F: hw/block/nvme*
|
||||
F: include/block/nvme.h
|
||||
F: tests/qtest/nvme-test.c
|
||||
F: docs/specs/nvme.txt
|
||||
F: docs/system/nvme.rst
|
||||
T: git git://git.infradead.org/qemu-nvme.git nvme-next
|
||||
|
||||
megasas
|
||||
|
@ -1,23 +0,0 @@
|
||||
NVM Express Controller
|
||||
======================
|
||||
|
||||
The nvme device (-device nvme) emulates an NVM Express Controller.
|
||||
|
||||
|
||||
Reference Specifications
|
||||
------------------------
|
||||
|
||||
The device currently implements most mandatory features of NVMe v1.3d, see
|
||||
|
||||
https://nvmexpress.org/resources/specifications/
|
||||
|
||||
for the specification.
|
||||
|
||||
|
||||
Known issues
|
||||
------------
|
||||
|
||||
* The accounting numbers in the SMART/Health are reset across power cycles
|
||||
|
||||
* Interrupt Coalescing is not supported and is disabled by default in volation
|
||||
of the specification.
|
@ -23,6 +23,7 @@ Contents:
|
||||
net
|
||||
virtio-net-failover
|
||||
usb
|
||||
nvme
|
||||
ivshmem
|
||||
linuxboot
|
||||
generic-loader
|
||||
|
225
docs/system/nvme.rst
Normal file
225
docs/system/nvme.rst
Normal file
@ -0,0 +1,225 @@
|
||||
==============
|
||||
NVMe Emulation
|
||||
==============
|
||||
|
||||
QEMU provides NVMe emulation through the ``nvme``, ``nvme-ns`` and
|
||||
``nvme-subsys`` devices.
|
||||
|
||||
See the following sections for specific information on
|
||||
|
||||
* `Adding NVMe Devices`_, `additional namespaces`_ and `NVM subsystems`_.
|
||||
* Configuration of `Optional Features`_ such as `Controller Memory Buffer`_,
|
||||
`Simple Copy`_, `Zoned Namespaces`_, `metadata`_ and `End-to-End Data
|
||||
Protection`_,
|
||||
|
||||
Adding NVMe Devices
|
||||
===================
|
||||
|
||||
Controller Emulation
|
||||
--------------------
|
||||
|
||||
The QEMU emulated NVMe controller implements version 1.4 of the NVM Express
|
||||
specification. All mandatory features are implement with a couple of exceptions
|
||||
and limitations:
|
||||
|
||||
* Accounting numbers in the SMART/Health log page are reset when the device
|
||||
is power cycled.
|
||||
* Interrupt Coalescing is not supported and is disabled by default.
|
||||
|
||||
The simplest way to attach an NVMe controller on the QEMU PCI bus is to add the
|
||||
following parameters:
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
-drive file=nvm.img,if=none,id=nvm
|
||||
-device nvme,serial=deadbeef,drive=nvm
|
||||
|
||||
There are a number of optional general parameters for the ``nvme`` device. Some
|
||||
are mentioned here, but see ``-device nvme,help`` to list all possible
|
||||
parameters.
|
||||
|
||||
``max_ioqpairs=UINT32`` (default: ``64``)
|
||||
Set the maximum number of allowed I/O queue pairs. This replaces the
|
||||
deprecated ``num_queues`` parameter.
|
||||
|
||||
``msix_qsize=UINT16`` (default: ``65``)
|
||||
The number of MSI-X vectors that the device should support.
|
||||
|
||||
``mdts=UINT8`` (default: ``7``)
|
||||
Set the Maximum Data Transfer Size of the device.
|
||||
|
||||
``use-intel-id`` (default: ``off``)
|
||||
Since QEMU 5.2, the device uses a QEMU allocated "Red Hat" PCI Device and
|
||||
Vendor ID. Set this to ``on`` to revert to the unallocated Intel ID
|
||||
previously used.
|
||||
|
||||
Additional Namespaces
|
||||
---------------------
|
||||
|
||||
In the simplest possible invocation sketched above, the device only support a
|
||||
single namespace with the namespace identifier ``1``. To support multiple
|
||||
namespaces and additional features, the ``nvme-ns`` device must be used.
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
-device nvme,id=nvme-ctrl-0,serial=deadbeef
|
||||
-drive file=nvm-1.img,if=none,id=nvm-1
|
||||
-device nvme-ns,drive=nvm-1
|
||||
-drive file=nvm-2.img,if=none,id=nvm-2
|
||||
-device nvme-ns,drive=nvm-2
|
||||
|
||||
The namespaces defined by the ``nvme-ns`` device will attach to the most
|
||||
recently defined ``nvme-bus`` that is created by the ``nvme`` device. Namespace
|
||||
identifers are allocated automatically, starting from ``1``.
|
||||
|
||||
There are a number of parameters available:
|
||||
|
||||
``nsid`` (default: ``0``)
|
||||
Explicitly set the namespace identifier.
|
||||
|
||||
``uuid`` (default: *autogenerated*)
|
||||
Set the UUID of the namespace. This will be reported as a "Namespace UUID"
|
||||
descriptor in the Namespace Identification Descriptor List.
|
||||
|
||||
``bus``
|
||||
If there are more ``nvme`` devices defined, this parameter may be used to
|
||||
attach the namespace to a specific ``nvme`` device (identified by an ``id``
|
||||
parameter on the controller device).
|
||||
|
||||
NVM Subsystems
|
||||
--------------
|
||||
|
||||
Additional features becomes available if the controller device (``nvme``) is
|
||||
linked to an NVM Subsystem device (``nvme-subsys``).
|
||||
|
||||
The NVM Subsystem emulation allows features such as shared namespaces and
|
||||
multipath I/O.
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
-device nvme-subsys,id=nvme-subsys-0,nqn=subsys0
|
||||
-device nvme,serial=a,subsys=nvme-subsys-0
|
||||
-device nvme,serial=b,subsys=nvme-subsys-0
|
||||
|
||||
This will create an NVM subsystem with two controllers. Having controllers
|
||||
linked to an ``nvme-subsys`` device allows additional ``nvme-ns`` parameters:
|
||||
|
||||
``shared`` (default: ``off``)
|
||||
Specifies that the namespace will be attached to all controllers in the
|
||||
subsystem. If set to ``off`` (the default), the namespace will remain a
|
||||
private namespace and may only be attached to a single controller at a time.
|
||||
|
||||
``detached`` (default: ``off``)
|
||||
If set to ``on``, the namespace will be be available in the subsystem, but
|
||||
not attached to any controllers initially.
|
||||
|
||||
Thus, adding
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
-drive file=nvm-1.img,if=none,id=nvm-1
|
||||
-device nvme-ns,drive=nvm-1,nsid=1,shared=on
|
||||
-drive file=nvm-2.img,if=none,id=nvm-2
|
||||
-device nvme-ns,drive=nvm-2,nsid=3,detached=on
|
||||
|
||||
will cause NSID 1 will be a shared namespace (due to ``shared=on``) that is
|
||||
initially attached to both controllers. NSID 3 will be a private namespace
|
||||
(i.e. only attachable to a single controller at a time) and will not be
|
||||
attached to any controller initially (due to ``detached=on``).
|
||||
|
||||
Optional Features
|
||||
=================
|
||||
|
||||
Controller Memory Buffer
|
||||
------------------------
|
||||
|
||||
``nvme`` device parameters related to the Controller Memory Buffer support:
|
||||
|
||||
``cmb_size_mb=UINT32`` (default: ``0``)
|
||||
This adds a Controller Memory Buffer of the given size at offset zero in BAR
|
||||
2.
|
||||
|
||||
``legacy-cmb`` (default: ``off``)
|
||||
By default, the device uses the "v1.4 scheme" for the Controller Memory
|
||||
Buffer support (i.e, the CMB is initially disabled and must be explicitly
|
||||
enabled by the host). Set this to ``on`` to behave as a v1.3 device wrt. the
|
||||
CMB.
|
||||
|
||||
Simple Copy
|
||||
-----------
|
||||
|
||||
The device includes support for TP 4065 ("Simple Copy Command"). A number of
|
||||
additional ``nvme-ns`` device parameters may be used to control the Copy
|
||||
command limits:
|
||||
|
||||
``mssrl=UINT16`` (default: ``128``)
|
||||
Set the Maximum Single Source Range Length (``MSSRL``). This is the maximum
|
||||
number of logical blocks that may be specified in each source range.
|
||||
|
||||
``mcl=UINT32`` (default: ``128``)
|
||||
Set the Maximum Copy Length (``MCL``). This is the maximum number of logical
|
||||
blocks that may be specified in a Copy command (the total for all source
|
||||
ranges).
|
||||
|
||||
``msrc=UINT8`` (default: ``127``)
|
||||
Set the Maximum Source Range Count (``MSRC``). This is the maximum number of
|
||||
source ranges that may be used in a Copy command. This is a 0's based value.
|
||||
|
||||
Zoned Namespaces
|
||||
----------------
|
||||
|
||||
A namespaces may be "Zoned" as defined by TP 4053 ("Zoned Namespaces"). Set
|
||||
``zoned=on`` on an ``nvme-ns`` device to configure it as a zoned namespace.
|
||||
|
||||
The namespace may be configured with additional parameters
|
||||
|
||||
``zoned.zone_size=SIZE`` (default: ``128MiB``)
|
||||
Define the zone size (``ZSZE``).
|
||||
|
||||
``zoned.zone_capacity=SIZE`` (default: ``0``)
|
||||
Define the zone capacity (``ZCAP``). If left at the default (``0``), the zone
|
||||
capacity will equal the zone size.
|
||||
|
||||
``zoned.descr_ext_size=UINT32`` (default: ``0``)
|
||||
Set the Zone Descriptor Extension Size (``ZDES``). Must be a multiple of 64
|
||||
bytes.
|
||||
|
||||
``zoned.cross_read=BOOL`` (default: ``off``)
|
||||
Set to ``on`` to allow reads to cross zone boundaries.
|
||||
|
||||
``zoned.max_active=UINT32`` (default: ``0``)
|
||||
Set the maximum number of active resources (``MAR``). The default (``0``)
|
||||
allows all zones to be active.
|
||||
|
||||
``zoned.max_open=UINT32`` (default: ``0``)
|
||||
Set the maximum number of open resources (``MOR``). The default (``0``)
|
||||
allows all zones to be open. If ``zoned.max_active`` is specified, this value
|
||||
must be less than or equal to that.
|
||||
|
||||
Metadata
|
||||
--------
|
||||
|
||||
The virtual namespace device supports LBA metadata in the form separate
|
||||
metadata (``MPTR``-based) and extended LBAs.
|
||||
|
||||
``ms=UINT16`` (default: ``0``)
|
||||
Defines the number of metadata bytes per LBA.
|
||||
|
||||
``mset=UINT8`` (default: ``0``)
|
||||
Set to ``1`` to enable extended LBAs.
|
||||
|
||||
End-to-End Data Protection
|
||||
--------------------------
|
||||
|
||||
The virtual namespace device supports DIF- and DIX-based protection information
|
||||
(depending on ``mset``).
|
||||
|
||||
``pi=UINT8`` (default: ``0``)
|
||||
Enable protection information of the specified type (type ``1``, ``2`` or
|
||||
``3``).
|
||||
|
||||
``pil=UINT8`` (default: ``0``)
|
||||
Controls the location of the protection information within the metadata. Set
|
||||
to ``1`` to transfer protection information as the first eight bytes of
|
||||
metadata. Otherwise, the protection information is transferred as the last
|
||||
eight bytes.
|
@ -470,6 +470,7 @@ static void nvme_req_clear(NvmeRequest *req)
|
||||
{
|
||||
req->ns = NULL;
|
||||
req->opaque = NULL;
|
||||
req->aiocb = NULL;
|
||||
memset(&req->cqe, 0x0, sizeof(req->cqe));
|
||||
req->status = NVME_SUCCESS;
|
||||
}
|
||||
@ -655,7 +656,12 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, NvmeSg *sg, uint64_t prp1,
|
||||
uint32_t nents, prp_trans;
|
||||
int i = 0;
|
||||
|
||||
nents = (len + n->page_size - 1) >> n->page_bits;
|
||||
/*
|
||||
* The first PRP list entry, pointed to by PRP2 may contain offset.
|
||||
* Hence, we need to calculate the number of entries in based on
|
||||
* that offset.
|
||||
*/
|
||||
nents = (n->page_size - (prp2 & (n->page_size - 1))) >> 3;
|
||||
prp_trans = MIN(n->max_prp_ents, nents) * sizeof(uint64_t);
|
||||
ret = nvme_addr_read(n, prp2, (void *)prp_list, prp_trans);
|
||||
if (ret) {
|
||||
@ -666,7 +672,7 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, NvmeSg *sg, uint64_t prp1,
|
||||
while (len != 0) {
|
||||
uint64_t prp_ent = le64_to_cpu(prp_list[i]);
|
||||
|
||||
if (i == n->max_prp_ents - 1 && len > n->page_size) {
|
||||
if (i == nents - 1 && len > n->page_size) {
|
||||
if (unlikely(prp_ent & (n->page_size - 1))) {
|
||||
trace_pci_nvme_err_invalid_prplist_ent(prp_ent);
|
||||
status = NVME_INVALID_PRP_OFFSET | NVME_DNR;
|
||||
@ -675,7 +681,8 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, NvmeSg *sg, uint64_t prp1,
|
||||
|
||||
i = 0;
|
||||
nents = (len + n->page_size - 1) >> n->page_bits;
|
||||
prp_trans = MIN(n->max_prp_ents, nents) * sizeof(uint64_t);
|
||||
nents = MIN(nents, n->max_prp_ents);
|
||||
prp_trans = nents * sizeof(uint64_t);
|
||||
ret = nvme_addr_read(n, prp_ent, (void *)prp_list,
|
||||
prp_trans);
|
||||
if (ret) {
|
||||
@ -2837,7 +2844,8 @@ static uint16_t nvme_compare(NvmeCtrl *n, NvmeRequest *req)
|
||||
|
||||
block_acct_start(blk_get_stats(blk), &req->acct, data_len,
|
||||
BLOCK_ACCT_READ);
|
||||
blk_aio_preadv(blk, offset, &ctx->data.iov, 0, nvme_compare_data_cb, req);
|
||||
req->aiocb = blk_aio_preadv(blk, offset, &ctx->data.iov, 0,
|
||||
nvme_compare_data_cb, req);
|
||||
|
||||
return NVME_NO_COMPLETE;
|
||||
}
|
||||
@ -3680,6 +3688,7 @@ static uint16_t nvme_del_sq(NvmeCtrl *n, NvmeRequest *req)
|
||||
NvmeSQueue *sq;
|
||||
NvmeCQueue *cq;
|
||||
uint16_t qid = le16_to_cpu(c->qid);
|
||||
uint32_t nsid;
|
||||
|
||||
if (unlikely(!qid || nvme_check_sqid(n, qid))) {
|
||||
trace_pci_nvme_err_invalid_del_sq(qid);
|
||||
@ -3691,9 +3700,26 @@ static uint16_t nvme_del_sq(NvmeCtrl *n, NvmeRequest *req)
|
||||
sq = n->sq[qid];
|
||||
while (!QTAILQ_EMPTY(&sq->out_req_list)) {
|
||||
r = QTAILQ_FIRST(&sq->out_req_list);
|
||||
assert(r->aiocb);
|
||||
blk_aio_cancel(r->aiocb);
|
||||
if (r->aiocb) {
|
||||
blk_aio_cancel(r->aiocb);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Drain all namespaces if there are still outstanding requests that we
|
||||
* could not cancel explicitly.
|
||||
*/
|
||||
if (!QTAILQ_EMPTY(&sq->out_req_list)) {
|
||||
for (nsid = 1; nsid <= NVME_MAX_NAMESPACES; nsid++) {
|
||||
NvmeNamespace *ns = nvme_ns(n, nsid);
|
||||
if (ns) {
|
||||
nvme_ns_drain(ns);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
assert(QTAILQ_EMPTY(&sq->out_req_list));
|
||||
|
||||
if (!nvme_check_cqid(n, sq->cqid)) {
|
||||
cq = n->cq[sq->cqid];
|
||||
QTAILQ_REMOVE(&cq->sq_list, sq, entry);
|
||||
|
Loading…
x
Reference in New Issue
Block a user