Add NVMe command passthrough support.

This commit is contained in:
nonaka 2016-06-04 16:11:50 +00:00
parent 3f81b60716
commit e7c0cc5d9a
9 changed files with 469 additions and 10 deletions

View File

@ -1,4 +1,4 @@
# $NetBSD: mi,v 1.2038 2016/06/03 21:55:50 joerg Exp $
# $NetBSD: mi,v 1.2039 2016/06/04 16:11:50 nonaka Exp $
#
# Note: don't delete entries from here - mark them as "obsolete" instead.
./etc/mtree/set.comp comp-sys-root
@ -498,6 +498,8 @@
./usr/include/dev/ic/nec765reg.h comp-obsolete obsolete
./usr/include/dev/ic/ns16450reg.h comp-obsolete obsolete
./usr/include/dev/ic/ns16550reg.h comp-obsolete obsolete
./usr/include/dev/ic/nvmeio.h comp-c-include
./usr/include/dev/ic/nvmereg.h comp-c-include
./usr/include/dev/ic/opl3sa3.h comp-obsolete obsolete
./usr/include/dev/ic/opl3sa3reg.h comp-obsolete obsolete
./usr/include/dev/ic/pcdisplay.h comp-obsolete obsolete

View File

@ -1,5 +1,5 @@
#!/bin/sh -
# $NetBSD: MAKEDEV.tmpl,v 1.179 2016/01/28 19:06:39 riz Exp $
# $NetBSD: MAKEDEV.tmpl,v 1.180 2016/06/04 16:11:50 nonaka Exp $
#
# Copyright (c) 2003,2007,2008 The NetBSD Foundation, Inc.
# All rights reserved.
@ -264,6 +264,8 @@
# np* UNIBUS Ethernet co-processor interface, for downloading.
# npf NPF packet filter
# nsmb* SMB requester
# nvme* Non-Volatile Memory Host Controller Interface device driver
# nvme*ns* Non-Volatile Memory namespace
# openfirm OpenFirmware accessor
# pad* Pseudo-audio device driver
# pci* PCI bus access devices
@ -2201,6 +2203,22 @@ vchiq)
mkdev vchiq c %vchiq_chr% 0 600
;;
nvme[0-9]*ns[0-9]*)
unit=${i#nvme}
unit=${unit%ns*}
subunit=${i#nvme${unit}ns}
if [ 0$subunit -le 0 -o 0$subunit -ge 65536 ]; then
warn "bad nsid for $i: $subunit"
break
fi
mkdev nvme${unit}ns$subunit c %nvmens_chr% $(($unit * 65536 + $subunit))
;;
nvme[0-9]*)
unit=${i#nvme}
mkdev nvme$unit c %nvme_chr% $unit
;;
midevend)
%MI_DEVICES_END%
local)

View File

@ -1,4 +1,4 @@
# $NetBSD: MAKEDEV.conf,v 1.21 2015/02/22 14:42:44 christos Exp $
# $NetBSD: MAKEDEV.conf,v 1.22 2016/06/04 16:11:50 nonaka Exp $
# As of 2003-04-17, the "init" case must not create more than 890 entries.
all_md)
@ -12,6 +12,10 @@ all_md)
makedev ccd0 md0 random
makedev cgd0 cgd1
makedev amr0 iop0 mfi0 mlx0 mly0 dpti0 dpt0 twe0
makedev nvme0 nvme0ns1 nvme0ns2 nvme0ns3 nvme0ns4
makedev nvme1 nvme1ns1 nvme1ns2 nvme1ns3 nvme1ns4
makedev nvme2 nvme2ns1 nvme2ns2 nvme1ns3 nvme2ns4
makedev nvme3 nvme3ns1 nvme3ns2 nvme1ns3 nvme3ns4
makedev raid0 raid1 raid2 raid3
makedev ld0 ld1 ld2 ld3
makedev xbd0 xbd1 xbd2 xbd3 xen

View File

@ -1,4 +1,4 @@
# $NetBSD: majors,v 1.73 2016/05/13 07:41:47 skrll Exp $
# $NetBSD: majors,v 1.74 2016/06/04 16:11:50 nonaka Exp $
#
# Device majors for Machine-Independent drivers.
#
@ -72,3 +72,5 @@ device-major lua char 209 lua
# 310-339 reserved for previously not MI storage devices
device-major hdmicec char 340 hdmicec
device-major nvme char 341 nvme
device-major nvmens char 342 nvmens

View File

@ -1,9 +1,10 @@
# $NetBSD: Makefile,v 1.24 2005/12/11 12:21:25 christos Exp $
# $NetBSD: Makefile,v 1.25 2016/06/04 16:11:51 nonaka Exp $
INCSDIR= /usr/include/dev/ic
# Only install includes which are used by userland
INCS= athioctl.h bt8xx.h hd44780var.h icpreg.h icp_ioctl.h isp_ioctl.h \
mlxreg.h mlxio.h rrunnerreg.h rrunnervar.h wdcreg.h wi_ieee.h
mlxreg.h mlxio.h nvmeio.h nvmereg.h rrunnerreg.h rrunnervar.h \
wdcreg.h wi_ieee.h
.include <bsd.kinc.mk>

View File

@ -1,4 +1,4 @@
/* $NetBSD: nvme.c,v 1.2 2016/05/02 19:18:29 christos Exp $ */
/* $NetBSD: nvme.c,v 1.3 2016/06/04 16:11:51 nonaka Exp $ */
/* $OpenBSD: nvme.c,v 1.49 2016/04/18 05:59:50 dlg Exp $ */
/*
@ -18,7 +18,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: nvme.c,v 1.2 2016/05/02 19:18:29 christos Exp $");
__KERNEL_RCSID(0, "$NetBSD: nvme.c,v 1.3 2016/06/04 16:11:51 nonaka Exp $");
#include <sys/param.h>
#include <sys/systm.h>
@ -26,14 +26,19 @@ __KERNEL_RCSID(0, "$NetBSD: nvme.c,v 1.2 2016/05/02 19:18:29 christos Exp $");
#include <sys/atomic.h>
#include <sys/bus.h>
#include <sys/buf.h>
#include <sys/conf.h>
#include <sys/device.h>
#include <sys/kmem.h>
#include <sys/once.h>
#include <sys/proc.h>
#include <sys/queue.h>
#include <sys/mutex.h>
#include <uvm/uvm_extern.h>
#include <dev/ic/nvmereg.h>
#include <dev/ic/nvmevar.h>
#include <dev/ic/nvmeio.h>
int nvme_adminq_size = 128;
int nvme_ioq_size = 128;
@ -95,6 +100,13 @@ static void nvme_ns_sync_fill(struct nvme_queue *, struct nvme_ccb *,
static void nvme_ns_sync_done(struct nvme_queue *, struct nvme_ccb *,
struct nvme_cqe *);
static void nvme_pt_fill(struct nvme_queue *, struct nvme_ccb *,
void *);
static void nvme_pt_done(struct nvme_queue *, struct nvme_ccb *,
struct nvme_cqe *);
static int nvme_command_passthrough(struct nvme_softc *,
struct nvme_pt_command *, uint16_t, struct lwp *, bool);
#define nvme_read4(_s, _r) \
bus_space_read_4((_s)->sc_iot, (_s)->sc_ioh, (_r))
#define nvme_write4(_s, _r, _v) \
@ -728,6 +740,136 @@ nvme_ns_free(struct nvme_softc *sc, uint16_t nsid)
kmem_free(identify, sizeof(*identify));
}
static void
nvme_pt_fill(struct nvme_queue *q, struct nvme_ccb *ccb, void *slot)
{
struct nvme_softc *sc = q->q_sc;
struct nvme_sqe *sqe = slot;
struct nvme_pt_command *pt = ccb->ccb_cookie;
bus_dmamap_t dmap = ccb->ccb_dmamap;
int i;
sqe->opcode = pt->cmd.opcode;
htolem32(&sqe->nsid, pt->cmd.nsid);
if (pt->buf != NULL && pt->len > 0) {
htolem64(&sqe->entry.prp[0], dmap->dm_segs[0].ds_addr);
switch (dmap->dm_nsegs) {
case 1:
break;
case 2:
htolem64(&sqe->entry.prp[1], dmap->dm_segs[1].ds_addr);
break;
default:
for (i = 1; i < dmap->dm_nsegs; i++) {
htolem64(&ccb->ccb_prpl[i - 1],
dmap->dm_segs[i].ds_addr);
}
bus_dmamap_sync(sc->sc_dmat,
NVME_DMA_MAP(q->q_ccb_prpls),
ccb->ccb_prpl_off,
sizeof(*ccb->ccb_prpl) * dmap->dm_nsegs - 1,
BUS_DMASYNC_PREWRITE);
htolem64(&sqe->entry.prp[1], ccb->ccb_prpl_dva);
break;
}
}
htolem32(&sqe->cdw10, pt->cmd.cdw10);
htolem32(&sqe->cdw11, pt->cmd.cdw11);
htolem32(&sqe->cdw12, pt->cmd.cdw12);
htolem32(&sqe->cdw13, pt->cmd.cdw13);
htolem32(&sqe->cdw14, pt->cmd.cdw14);
htolem32(&sqe->cdw15, pt->cmd.cdw15);
}
static void
nvme_pt_done(struct nvme_queue *q, struct nvme_ccb *ccb, struct nvme_cqe *cqe)
{
struct nvme_softc *sc = q->q_sc;
struct nvme_pt_command *pt = ccb->ccb_cookie;
bus_dmamap_t dmap = ccb->ccb_dmamap;
if (pt->buf != NULL && pt->len > 0) {
if (dmap->dm_nsegs > 2) {
bus_dmamap_sync(sc->sc_dmat,
NVME_DMA_MAP(q->q_ccb_prpls),
ccb->ccb_prpl_off,
sizeof(*ccb->ccb_prpl) * dmap->dm_nsegs - 1,
BUS_DMASYNC_POSTWRITE);
}
bus_dmamap_sync(sc->sc_dmat, dmap, 0, dmap->dm_mapsize,
pt->is_read ? BUS_DMASYNC_POSTREAD : BUS_DMASYNC_POSTWRITE);
bus_dmamap_unload(sc->sc_dmat, dmap);
}
pt->cpl.cdw0 = cqe->cdw0;
pt->cpl.flags = cqe->flags & ~NVME_CQE_PHASE;
}
static int
nvme_command_passthrough(struct nvme_softc *sc, struct nvme_pt_command *pt,
uint16_t nsid, struct lwp *l, bool is_adminq)
{
struct nvme_queue *q;
struct nvme_ccb *ccb;
void *buf = NULL;
int error;
if ((pt->buf == NULL && pt->len > 0) ||
(pt->buf != NULL && pt->len == 0))
return EINVAL;
q = is_adminq ? sc->sc_admin_q : nvme_get_q(sc);
ccb = nvme_ccb_get(q);
if (ccb == NULL)
return EBUSY;
if (pt->buf != NULL && pt->len > 0) {
buf = kmem_alloc(pt->len, KM_SLEEP);
if (buf == NULL) {
error = ENOMEM;
goto ccb_put;
}
if (!pt->is_read) {
error = copyin(pt->buf, buf, pt->len);
if (error)
goto kmem_free;
}
error = bus_dmamap_load(sc->sc_dmat, ccb->ccb_dmamap, buf,
pt->len, NULL,
BUS_DMA_WAITOK |
(pt->is_read ? BUS_DMA_READ : BUS_DMA_WRITE));
if (error)
goto kmem_free;
bus_dmamap_sync(sc->sc_dmat, ccb->ccb_dmamap,
0, ccb->ccb_dmamap->dm_mapsize,
pt->is_read ? BUS_DMASYNC_PREREAD : BUS_DMASYNC_PREWRITE);
}
ccb->ccb_done = nvme_pt_done;
ccb->ccb_cookie = pt;
pt->cmd.nsid = nsid;
if (nvme_poll(sc, q, ccb, nvme_pt_fill)) {
error = EIO;
goto out;
}
error = 0;
out:
if (buf != NULL) {
if (error == 0 && pt->is_read)
error = copyout(buf, pt->buf, pt->len);
kmem_free:
kmem_free(buf, pt->len);
}
ccb_put:
nvme_ccb_put(q, ccb);
return error;
}
static void
nvme_q_submit(struct nvme_softc *sc, struct nvme_queue *q, struct nvme_ccb *ccb,
void (*fill)(struct nvme_queue *, struct nvme_ccb *, void *))
@ -1289,3 +1431,188 @@ nvme_dmamem_free(struct nvme_softc *sc, struct nvme_dmamem *ndm)
bus_dmamap_destroy(sc->sc_dmat, ndm->ndm_map);
kmem_free(ndm, sizeof(*ndm));
}
/*
* ioctl
*/
/* nvme */
dev_type_open(nvmeopen);
dev_type_close(nvmeclose);
dev_type_ioctl(nvmeioctl);
const struct cdevsw nvme_cdevsw = {
.d_open = nvmeopen,
.d_close = nvmeclose,
.d_read = noread,
.d_write = nowrite,
.d_ioctl = nvmeioctl,
.d_stop = nostop,
.d_tty = notty,
.d_poll = nopoll,
.d_mmap = nommap,
.d_kqfilter = nokqfilter,
.d_discard = nodiscard,
.d_flag = D_OTHER,
};
extern struct cfdriver nvme_cd;
/*
* Accept an open operation on the control device.
*/
int
nvmeopen(dev_t dev, int flag, int mode, struct lwp *l)
{
struct nvme_softc *sc;
int unit = minor(dev);
if ((sc = device_lookup_private(&nvme_cd, unit)) == NULL)
return ENXIO;
if ((sc->sc_flags & NVME_F_ATTACHED) == 0)
return ENXIO;
if (ISSET(sc->sc_flags, NVME_F_OPEN))
return EBUSY;
SET(sc->sc_flags, NVME_F_OPEN);
return 0;
}
/*
* Accept the last close on the control device.
*/
int
nvmeclose(dev_t dev, int flag, int mode, struct lwp *l)
{
struct nvme_softc *sc;
int unit = minor(dev);
sc = device_lookup_private(&nvme_cd, unit);
if (sc == NULL)
return ENXIO;
CLR(sc->sc_flags, NVME_F_OPEN);
return 0;
}
/*
* Handle control operations.
*/
int
nvmeioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
{
struct nvme_softc *sc;
struct nvme_pt_command *pt;
int unit = minor(dev);
sc = device_lookup_private(&nvme_cd, unit);
if (sc == NULL)
return ENXIO;
switch (cmd) {
case NVME_PASSTHROUGH_CMD:
pt = (struct nvme_pt_command *)data;
return nvme_command_passthrough(sc, pt, pt->cmd.nsid, l, true);
}
return ENOTTY;
}
/* nvmens */
dev_type_open(nvmensopen);
dev_type_close(nvmensclose);
dev_type_ioctl(nvmensioctl);
const struct cdevsw nvmens_cdevsw = {
.d_open = nvmensopen,
.d_close = nvmensclose,
.d_read = noread,
.d_write = nowrite,
.d_ioctl = nvmensioctl,
.d_stop = nostop,
.d_tty = notty,
.d_poll = nopoll,
.d_mmap = nommap,
.d_kqfilter = nokqfilter,
.d_discard = nodiscard,
.d_flag = D_OTHER,
};
extern struct cfdriver nvmens_cd;
/*
* Accept an open operation on the control device.
*/
int
nvmensopen(dev_t dev, int flag, int mode, struct lwp *l)
{
struct nvme_softc *sc;
int unit = minor(dev) / 0x10000;
int nsid = minor(dev) & 0xffff;
int nsidx;
if ((sc = device_lookup_private(&nvme_cd, unit)) == NULL)
return ENXIO;
if ((sc->sc_flags & NVME_F_ATTACHED) == 0)
return ENXIO;
if (nsid == 0)
return ENXIO;
nsidx = nsid - 1;
if (nsidx > sc->sc_nn || sc->sc_namespaces[nsidx].dev == NULL)
return ENXIO;
if (ISSET(sc->sc_namespaces[nsidx].flags, NVME_NS_F_OPEN))
return EBUSY;
SET(sc->sc_namespaces[nsidx].flags, NVME_NS_F_OPEN);
return 0;
}
/*
* Accept the last close on the control device.
*/
int
nvmensclose(dev_t dev, int flag, int mode, struct lwp *l)
{
struct nvme_softc *sc;
int unit = minor(dev) / 0x10000;
int nsid = minor(dev) & 0xffff;
int nsidx;
sc = device_lookup_private(&nvme_cd, unit);
if (sc == NULL)
return ENXIO;
if (nsid == 0)
return ENXIO;
nsidx = nsid - 1;
if (nsidx > sc->sc_nn)
return ENXIO;
CLR(sc->sc_namespaces[nsidx].flags, NVME_NS_F_OPEN);
return 0;
}
/*
* Handle control operations.
*/
int
nvmensioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
{
struct nvme_softc *sc;
int unit = minor(dev) / 0x10000;
int nsid = minor(dev) & 0xffff;
sc = device_lookup_private(&nvme_cd, unit);
if (sc == NULL)
return ENXIO;
if (nsid == 0)
return ENXIO;
switch (cmd) {
case NVME_PASSTHROUGH_CMD:
return nvme_command_passthrough(sc, data, nsid, l, false);
}
return ENOTTY;
}

97
sys/dev/ic/nvmeio.h Normal file
View File

@ -0,0 +1,97 @@
/* $NetBSD: nvmeio.h,v 1.1 2016/06/04 16:11:51 nonaka Exp $ */
/*-
* Copyright (C) 2012-2013 Intel Corporation
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD: head/sys/dev/nvme/nvme.h 296617 2016-03-10 17:13:10Z mav $
*/
#ifndef __NVMEIO_H__
#define __NVMEIO_H__
#include <sys/ioccom.h>
#include <dev/ic/nvmereg.h>
#define NVME_PASSTHROUGH_CMD _IOWR('n', 0, struct nvme_pt_command)
#define nvme_completion_is_error(cpl) \
((NVME_CQE_SC((cpl)->flags) != NVME_CQE_SC_SUCCESS) \
|| (NVME_CQE_SCT((cpl)->flags) != NVME_CQE_SCT_GENERIC))
struct nvme_pt_command {
/*
* cmd is used to specify a passthrough command to a controller or
* namespace.
*
* The following fields from cmd may be specified by the caller:
* * opcode
* * nsid (namespace id) - for admin commands only
* * cdw10-cdw15
*
* Remaining fields must be set to 0 by the caller.
*/
struct nvme_sqe cmd;
/*
* cpl returns completion status for the passthrough command
* specified by cmd.
*
* The following fields will be filled out by the driver, for
* consumption by the caller:
* * cdw0
* * flags (except for phase)
*
* Remaining fields will be set to 0 by the driver.
*/
struct nvme_cqe cpl;
/* buf is the data buffer associated with this passthrough command. */
void *buf;
/*
* len is the length of the data buffer associated with this
* passthrough command.
*/
uint32_t len;
/*
* is_read = 1 if the passthrough command will read data into the
* supplied buffer from the controller.
*
* is_read = 0 if the passthrough command will write data from the
* supplied buffer to the controller.
*/
uint32_t is_read;
/*
* timeout (unit: ms)
*
* 0: use default timeout value
*/
uint32_t timeout;
};
#endif /* __NVMEIO_H__ */

View File

@ -1,4 +1,4 @@
/* $NetBSD: nvmereg.h,v 1.1 2016/05/01 10:21:02 nonaka Exp $ */
/* $NetBSD: nvmereg.h,v 1.2 2016/06/04 16:11:51 nonaka Exp $ */
/* $OpenBSD: nvmereg.h,v 1.10 2016/04/14 11:18:32 dlg Exp $ */
/*
@ -17,6 +17,9 @@
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#ifndef __NVMEREG_H__
#define __NVMEREG_H__
#define NVME_CAP 0x0000 /* Controller Capabilities */
#define NVME_CAP_MPSMAX(_r) (12 + (((_r) >> 52) & 0xf)) /* shift */
#define NVME_CAP_MPSMIN(_r) (12 + (((_r) >> 48) & 0xf)) /* shift */
@ -385,3 +388,5 @@ struct nvm_identify_namespace {
uint8_t vs[3712];
} __packed __aligned(8);
#endif /* __NVMEREG_H__ */

View File

@ -1,4 +1,4 @@
/* $NetBSD: nvmevar.h,v 1.1 2016/05/01 10:21:02 nonaka Exp $ */
/* $NetBSD: nvmevar.h,v 1.2 2016/06/04 16:11:51 nonaka Exp $ */
/* $OpenBSD: nvmevar.h,v 1.8 2016/04/14 11:18:32 dlg Exp $ */
/*
@ -79,6 +79,8 @@ struct nvme_queue {
struct nvme_namespace {
struct nvm_identify_namespace *ident;
device_t dev;
uint32_t flags;
#define NVME_NS_F_OPEN __BIT(0)
};
struct nvme_softc {
@ -113,6 +115,7 @@ struct nvme_softc {
uint32_t sc_flags;
#define NVME_F_ATTACHED __BIT(0)
#define NVME_F_OPEN __BIT(1)
};
#define lemtoh16(p) le16toh(*((uint16_t *)(p)))