Added nvme(4) for Non-Volatile Memory Host Controller Interface devices.

Ported from OpenBSD.
This commit is contained in:
nonaka 2016-05-01 10:21:01 +00:00
parent 8e6585046e
commit 8b5163f01f
16 changed files with 2691 additions and 12 deletions

View File

@ -1,4 +1,4 @@
# $NetBSD: mi,v 1.1523 2016/04/28 15:55:15 christos Exp $
# $NetBSD: mi,v 1.1524 2016/05/01 10:21:01 nonaka Exp $
#
# Note: don't delete entries from here - mark them as "obsolete" instead.
#
@ -1468,6 +1468,7 @@
./usr/share/man/cat4/ntwo.0 man-sys-catman .cat
./usr/share/man/cat4/ntwoc.0 man-sys-catman .cat
./usr/share/man/cat4/null.0 man-sys-catman .cat
./usr/share/man/cat4/nvme.0 man-sys-catman .cat
./usr/share/man/cat4/nxt2k.0 man-sys-catman .cat
./usr/share/man/cat4/oak.0 man-sys-catman .cat
./usr/share/man/cat4/oboe.0 man-sys-catman .cat
@ -4500,6 +4501,7 @@
./usr/share/man/html4/ntwo.html man-sys-htmlman html
./usr/share/man/html4/ntwoc.html man-sys-htmlman html
./usr/share/man/html4/null.html man-sys-htmlman html
./usr/share/man/html4/nvme.html man-sys-htmlman html
./usr/share/man/html4/nxt2k.html man-sys-htmlman html
./usr/share/man/html4/oak.html man-sys-htmlman html
./usr/share/man/html4/oboe.html man-sys-htmlman html
@ -7390,6 +7392,7 @@
./usr/share/man/man4/ntwo.4 man-sys-man .man
./usr/share/man/man4/ntwoc.4 man-sys-man .man
./usr/share/man/man4/null.4 man-sys-man .man
./usr/share/man/man4/nvme.4 man-sys-man .man
./usr/share/man/man4/nxt2k.4 man-sys-man .man
./usr/share/man/man4/oak.4 man-sys-man .man
./usr/share/man/man4/oboe.4 man-sys-man .man

View File

@ -1,4 +1,4 @@
# $NetBSD: Makefile,v 1.627 2016/01/18 00:34:43 kamil Exp $
# $NetBSD: Makefile,v 1.628 2016/05/01 10:21:01 nonaka Exp $
# @(#)Makefile 8.1 (Berkeley) 6/18/93
MAN= aac.4 ac97.4 acardide.4 aceride.4 acphy.4 \
@ -45,6 +45,7 @@ MAN= aac.4 ac97.4 acardide.4 aceride.4 acphy.4 \
mtio.4 msm6242b.4 multicast.4 mvsata.4 \
nadb.4 ne.4 neo.4 netintro.4 nfe.4 nfsmb.4 njata.4 njs.4 \
nsclpcsio.4 nside.4 nsp.4 nsphy.4 nsphyter.4 ntwoc.4 null.4 nsmb.4 \
nvme.4 \
oak.4 oosiop.4 opl.4 options.4 optiide.4 osiop.4 otus.4 \
pad.4 pas.4 pcdisplay.4 pcf8563rtc.4 pciide.4 pckbc.4 pckbd.4 pcn.4 \
pcppi.4 pcscp.4 pcweasel.4 pdcide.4 pdcsata.4 piixide.4 piixpcib.4 \

View File

@ -1,4 +1,4 @@
.\" $NetBSD: ld.4,v 1.19 2011/11/05 09:22:44 hannken Exp $
.\" $NetBSD: ld.4,v 1.20 2016/05/01 10:21:01 nonaka Exp $
.\"
.\" Copyright (c) 2000 The NetBSD Foundation, Inc.
.\" All rights reserved.
@ -41,6 +41,7 @@
.Cd "ld* at icp? unit ?"
.Cd "ld* at iop? tid ?"
.Cd "ld* at mlx? unit ?"
.Cd "ld* at nvme? nsid ?"
.Cd "ld* at sdmmc?"
.Cd "ld* at twa? unit ?"
.Cd "ld* at twe? unit ?"
@ -71,6 +72,7 @@ partition
.Xr intro 4 ,
.Xr iop 4 ,
.Xr mlx 4 ,
.Xr nvme 4 ,
.Xr sdmmc 4 ,
.Xr twa 4 ,
.Xr twe 4 ,

56
share/man/man4/nvme.4 Normal file
View File

@ -0,0 +1,56 @@
.\" $NetBSD: nvme.4,v 1.1 2016/05/01 10:21:01 nonaka Exp $
.\" $OpenBSD: nvme.4,v 1.2 2016/04/14 11:53:37 jmc Exp $
.\"
.\" Copyright (c) 2016 David Gwynne <dlg@openbsd.org>
.\"
.\" Permission to use, copy, modify, and distribute this software for any
.\" purpose with or without fee is hereby granted, provided that the above
.\" copyright notice and this permission notice appear in all copies.
.\"
.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
.\"
.Dd May 1, 2016
.Dt NVME 4
.Os
.Sh NAME
.Nm nvme
.Nd Non-Volatile Memory Host Controller Interface
.Sh SYNOPSIS
.Cd "nvme* at pci? dev ? function ?"
.Sh DESCRIPTION
The
.Nm
driver provides support for NVMe, or NVM Express,
storage controllers conforming to the
Non-Volatile Memory Host Controller Interface specification.
.Sh SEE ALSO
.Xr intro 4 ,
.Xr ld 4 ,
.Xr pci 4
.Sh HISTORY
The
.Nm
driver first appeared in
.Ox 6.0
and in
.Nx 8.0 .
.Sh AUTHORS
.An -nosplit
The
.Nm
driver was written by
.An David Gwynne
.Aq dlg@openbsd.org
for
.Ox
and ported to
.Nx
by
.An NONAKA Kimihiro
.Aq nonaka@NetBSD.org .

View File

@ -1,4 +1,4 @@
.\" $NetBSD: pci.4,v 1.95 2015/08/28 08:01:15 wiz Exp $
.\" $NetBSD: pci.4,v 1.96 2016/05/01 10:21:01 nonaka Exp $
.\"
.\" Copyright (c) 1997 Jason R. Thorpe. All rights reserved.
.\" Copyright (c) 1997 Jonathan Stone
@ -404,6 +404,8 @@ USB EHCI host controllers.
I2O I/O processors.
.It mr
Guillemot Maxi Radio FM 2000 FM radio device.
.It nvme
Non-Volatile Memory Host controllers.
.It oboe
Toshiba OBOE IrDA SIR/FIR controller.
.It ohci
@ -483,6 +485,7 @@ VGA graphics boards.
.Xr neo 4 ,
.Xr nfe 4 ,
.Xr ntwoc 4 ,
.Xr nvme 4 ,
.Xr oboe 4 ,
.Xr ohci 4 ,
.Xr pcic 4 ,

View File

@ -1,4 +1,4 @@
# $NetBSD: ALL,v 1.33 2015/11/10 13:01:41 tnn Exp $
# $NetBSD: ALL,v 1.34 2016/05/01 10:21:01 nonaka Exp $
# From NetBSD: GENERIC,v 1.787 2006/10/01 18:37:54 bouyer Exp
#
# ALL machine description file
@ -17,7 +17,7 @@ include "arch/amd64/conf/std.amd64"
options INCLUDE_CONFIG_FILE # embed config file in kernel binary
#ident "ALL-$Revision: 1.33 $"
#ident "ALL-$Revision: 1.34 $"
maxusers 64 # estimated number of users
@ -865,6 +865,11 @@ st* at atapibus? drive ? flags 0x0000 # ATAPI tape drives
uk* at atapibus? drive ? flags 0x0000 # ATAPI unknown
# NVM Express controllers and devices
nvme* at pci? dev ? function ?
ld* at nvme? nsid ?
# Miscellaneous mass storage devices
# ISA floppy

View File

@ -1,4 +1,4 @@
# $NetBSD: GENERIC,v 1.431 2016/04/23 10:15:27 skrll Exp $
# $NetBSD: GENERIC,v 1.432 2016/05/01 10:21:01 nonaka Exp $
#
# GENERIC machine description file
#
@ -22,7 +22,7 @@ include "arch/amd64/conf/std.amd64"
options INCLUDE_CONFIG_FILE # embed config file in kernel binary
#ident "GENERIC-$Revision: 1.431 $"
#ident "GENERIC-$Revision: 1.432 $"
maxusers 64 # estimated number of users
@ -703,6 +703,11 @@ st* at atapibus? drive ? flags 0x0000 # ATAPI tape drives
uk* at atapibus? drive ? flags 0x0000 # ATAPI unknown
# NVM Express controllers and devices
nvme* at pci? dev ? function ?
ld* at nvme? nsid ?
# Miscellaneous mass storage devices
# ISA floppy

View File

@ -1,4 +1,4 @@
# $NetBSD: XEN3_DOM0,v 1.117 2016/03/19 23:21:02 gdt Exp $
# $NetBSD: XEN3_DOM0,v 1.118 2016/05/01 10:21:01 nonaka Exp $
include "arch/amd64/conf/std.xen"
@ -529,6 +529,10 @@ ld* at mlx? unit ?
icpsp* at icp? unit ? # SCSI pass-through
# NVM Express controllers and devices
nvme* at pci? dev ? function ?
ld* at nvme? nsid ?
# wscons
pckbc0 at isa? # pc keyboard controller
pckbd* at pckbc? # PC keyboard

View File

@ -1,4 +1,4 @@
# $NetBSD: files,v 1.1157 2016/04/27 19:46:11 christos Exp $
# $NetBSD: files,v 1.1158 2016/05/01 10:21:02 nonaka Exp $
# @(#)files.newconf 7.5 (Berkeley) 5/10/93
version 20150846
@ -1374,6 +1374,14 @@ file dev/ic/bwi.c bwi
device dme: arp, ether, ifnet
file dev/ic/dm9000.c dme
# NVM Express Controller
#
device nvme {nsid = -1}
file dev/ic/nvme.c nvme
attach ld at nvme with ld_nvme
file dev/ic/ld_nvme.c ld_nvme
# legitimate pseudo-devices
#
defpseudodev vnd: disk

View File

@ -1,4 +1,4 @@
# $NetBSD: DEVNAMES,v 1.297 2016/01/05 13:16:37 msaitoh Exp $
# $NetBSD: DEVNAMES,v 1.298 2016/05/01 10:21:02 nonaka Exp $
#
# This file contains all used device names and defined attributes in
# alphabetical order. New devices added to the system somewhere should first
@ -970,6 +970,7 @@ nsphy MI
nsphyter MI
ntwoc MI
nubus mac68k
nvme MI
nvr atari
nvram macppc
oak MI

236
sys/dev/ic/ld_nvme.c Normal file
View File

@ -0,0 +1,236 @@
/* $NetBSD: ld_nvme.c,v 1.1 2016/05/01 10:21:02 nonaka Exp $ */
/*-
* Copyright (C) 2016 NONAKA Kimihiro <nonaka@netbsd.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: ld_nvme.c,v 1.1 2016/05/01 10:21:02 nonaka Exp $");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/device.h>
#include <sys/buf.h>
#include <sys/disk.h>
#include <sys/kmem.h>
#include <dev/ldvar.h>
#include <dev/ic/nvmereg.h>
#include <dev/ic/nvmevar.h>
struct ld_nvme_softc {
struct ld_softc sc_ld;
struct nvme_softc *sc_nvme;
uint16_t sc_nsid;
};
static int ld_nvme_match(device_t, cfdata_t, void *);
static void ld_nvme_attach(device_t, device_t, void *);
static int ld_nvme_detach(device_t, int);
CFATTACH_DECL_NEW(ld_nvme, sizeof(struct ld_nvme_softc),
ld_nvme_match, ld_nvme_attach, ld_nvme_detach, NULL);
static int ld_nvme_start(struct ld_softc *, struct buf *);
static int ld_nvme_dump(struct ld_softc *, void *, int, int);
static int ld_nvme_flush(struct ld_softc *, int);
static int ld_nvme_dobio(struct ld_nvme_softc *, void *, int, daddr_t,
int, struct buf *);
static void ld_nvme_biodone(struct nvme_ns_context *);
static void ld_nvme_syncdone(struct nvme_ns_context *);
static int
ld_nvme_match(device_t parent, cfdata_t match, void *aux)
{
struct nvme_attach_args *naa = aux;
if (naa->naa_nsid == 0)
return 0;
return 1;
}
static void
ld_nvme_attach(device_t parent, device_t self, void *aux)
{
struct ld_nvme_softc *sc = device_private(self);
struct ld_softc *ld = &sc->sc_ld;
struct nvme_softc *nsc = device_private(parent);
struct nvme_attach_args *naa = aux;
struct nvme_namespace *ns;
struct nvm_namespace_format *f;
uint64_t nsze;
int error;
ld->sc_dv = self;
sc->sc_nvme = nsc;
sc->sc_nsid = naa->naa_nsid;
aprint_naive("\n");
aprint_normal("\n");
error = nvme_ns_identify(sc->sc_nvme, sc->sc_nsid);
if (error) {
aprint_error_dev(self, "couldn't identify namespace\n");
return;
}
ns = nvme_ns_get(sc->sc_nvme, sc->sc_nsid);
KASSERT(ns);
nsze = lemtoh64(&ns->ident->nsze);
f = &ns->ident->lbaf[NVME_ID_NS_FLBAS(ns->ident->flbas)];
ld->sc_secsize = 1 << f->lbads;
ld->sc_secperunit = nsze;
ld->sc_maxxfer = MAXPHYS;
ld->sc_maxqueuecnt = naa->naa_qentries;
ld->sc_start = ld_nvme_start;
ld->sc_dump = ld_nvme_dump;
ld->sc_flush = ld_nvme_flush;
ld->sc_flags = LDF_ENABLED;
ldattach(ld);
}
static int
ld_nvme_detach(device_t self, int flags)
{
struct ld_nvme_softc *sc = device_private(self);
struct ld_softc *ld = &sc->sc_ld;
int rv;
if ((rv = ldbegindetach(ld, flags)) != 0)
return rv;
ldenddetach(ld);
nvme_ns_free(sc->sc_nvme, sc->sc_nsid);
return 0;
}
static int
ld_nvme_start(struct ld_softc *ld, struct buf *bp)
{
struct ld_nvme_softc *sc = device_private(ld->sc_dv);
return ld_nvme_dobio(sc, bp->b_data, bp->b_bcount, bp->b_rawblkno,
BUF_ISWRITE(bp), bp);
}
static int
ld_nvme_dump(struct ld_softc *ld, void *data, int blkno, int blkcnt)
{
struct ld_nvme_softc *sc = device_private(ld->sc_dv);
return ld_nvme_dobio(sc, data, blkcnt * ld->sc_secsize, blkno, 1, NULL);
}
static int
ld_nvme_dobio(struct ld_nvme_softc *sc, void *data, int datasize, daddr_t blkno,
int dowrite, struct buf *bp)
{
struct nvme_ns_context *ctx;
int error;
int s;
ctx = nvme_ns_get_ctx(bp != NULL ? PR_WAITOK : PR_NOWAIT);
ctx->nnc_cookie = sc;
ctx->nnc_nsid = sc->sc_nsid;
ctx->nnc_done = ld_nvme_biodone;
ctx->nnc_buf = bp;
ctx->nnc_data = data;
ctx->nnc_datasize = datasize;
ctx->nnc_secsize = sc->sc_ld.sc_secsize;
ctx->nnc_blkno = blkno;
ctx->nnc_flags = dowrite ? 0 : NVME_NS_CTX_F_READ;
if (bp == NULL) {
SET(ctx->nnc_flags, NVME_NS_CTX_F_POLL);
s = splbio();
}
error = nvme_ns_dobio(sc->sc_nvme, ctx);
if (bp == NULL) {
splx(s);
}
return error;
}
static void
ld_nvme_biodone(struct nvme_ns_context *ctx)
{
struct ld_nvme_softc *sc = ctx->nnc_cookie;
struct buf *bp = ctx->nnc_buf;
int status = NVME_CQE_SC(ctx->nnc_status);
if (bp != NULL) {
if (status != NVME_CQE_SC_SUCCESS) {
bp->b_error = EIO;
bp->b_resid = bp->b_bcount;
aprint_error_dev(sc->sc_ld.sc_dv, "I/O error\n");
} else {
bp->b_resid = 0;
}
lddone(&sc->sc_ld, bp);
} else {
if (status != NVME_CQE_SC_SUCCESS) {
aprint_error_dev(sc->sc_ld.sc_dv, "I/O error\n");
}
}
nvme_ns_put_ctx(ctx);
}
static int
ld_nvme_flush(struct ld_softc *ld, int flags)
{
struct ld_nvme_softc *sc = device_private(ld->sc_dv);
struct nvme_ns_context *ctx;
int error;
int s;
ctx = nvme_ns_get_ctx((flags & LDFL_POLL) ? PR_NOWAIT : PR_WAITOK);
ctx->nnc_cookie = sc;
ctx->nnc_nsid = sc->sc_nsid;
ctx->nnc_done = ld_nvme_syncdone;
ctx->nnc_flags = 0;
if (flags & LDFL_POLL) {
SET(ctx->nnc_flags, NVME_NS_CTX_F_POLL);
s = splbio();
}
error = nvme_ns_sync(sc->sc_nvme, ctx);
if (flags & LDFL_POLL) {
splx(s);
}
return error;
}
static void
ld_nvme_syncdone(struct nvme_ns_context *ctx)
{
nvme_ns_put_ctx(ctx);
}

1333
sys/dev/ic/nvme.c Normal file

File diff suppressed because it is too large Load Diff

387
sys/dev/ic/nvmereg.h Normal file
View File

@ -0,0 +1,387 @@
/* $NetBSD: nvmereg.h,v 1.1 2016/05/01 10:21:02 nonaka Exp $ */
/* $OpenBSD: nvmereg.h,v 1.10 2016/04/14 11:18:32 dlg Exp $ */
/*
* Copyright (c) 2014 David Gwynne <dlg@openbsd.org>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#define NVME_CAP 0x0000 /* Controller Capabilities */
#define NVME_CAP_MPSMAX(_r) (12 + (((_r) >> 52) & 0xf)) /* shift */
#define NVME_CAP_MPSMIN(_r) (12 + (((_r) >> 48) & 0xf)) /* shift */
#define NVME_CAP_CSS(_r) (((_r) >> 37) & 0x7f)
#define NVME_CAP_CSS_NVM __BIT(0)
#define NVME_CAP_NSSRS(_r) ISSET((_r), __BIT(36))
#define NVME_CAP_DSTRD(_r) __BIT(2 + (((_r) >> 32) & 0xf)) /* bytes */
#define NVME_CAP_TO(_r) (500 * (((_r) >> 24) & 0xff)) /* ms */
#define NVME_CAP_AMS(_r) (((_r) >> 17) & 0x3)
#define NVME_CAP_AMS_WRR __BIT(0)
#define NVME_CAP_AMS_VENDOR __BIT(1)
#define NVME_CAP_CQR(_r) ISSET((_r), __BIT(16))
#define NVME_CAP_MQES(_r) (((_r) & 0xffff) + 1)
#define NVME_CAP_LO 0x0000
#define NVME_CAP_HI 0x0004
#define NVME_VS 0x0008 /* Version */
#define NVME_VS_MJR(_r) (((_r) >> 16) & 0xffff)
#define NVME_VS_MNR(_r) ((_r) & 0xffff)
#define NVME_VS_1_0 0x00010000
#define NVME_VS_1_1 0x00010100
#define NVME_VS_1_2 0x00010200
#define NVME_INTMS 0x000c /* Interrupt Mask Set */
#define NVME_INTMC 0x0010 /* Interrupt Mask Clear */
#define NVME_CC 0x0014 /* Controller Configuration */
#define NVME_CC_IOCQES(_v) (((_v) & 0xf) << 20)
#define NVME_CC_IOCQES_MASK NVME_CC_IOCQES(0xf)
#define NVME_CC_IOCQES_R(_v) (((_v) >> 20) & 0xf)
#define NVME_CC_IOSQES(_v) (((_v) & 0xf) << 16)
#define NVME_CC_IOSQES_MASK NVME_CC_IOSQES(0xf)
#define NVME_CC_IOSQES_R(_v) (((_v) >> 16) & 0xf)
#define NVME_CC_SHN(_v) (((_v) & 0x3) << 14)
#define NVME_CC_SHN_MASK NVME_CC_SHN(0x3)
#define NVME_CC_SHN_R(_v) (((_v) >> 15) & 0x3)
#define NVME_CC_SHN_NONE 0
#define NVME_CC_SHN_NORMAL 1
#define NVME_CC_SHN_ABRUPT 2
#define NVME_CC_AMS(_v) (((_v) & 0x7) << 11)
#define NVME_CC_AMS_MASK NVME_CC_AMS(0x7)
#define NVME_CC_AMS_R(_v) (((_v) >> 11) & 0xf)
#define NVME_CC_AMS_RR 0 /* round-robin */
#define NVME_CC_AMS_WRR_U 1 /* weighted round-robin w/ urgent */
#define NVME_CC_AMS_VENDOR 7 /* vendor */
#define NVME_CC_MPS(_v) ((((_v) - 12) & 0xf) << 7)
#define NVME_CC_MPS_MASK (0xf << 7)
#define NVME_CC_MPS_R(_v) (12 + (((_v) >> 7) & 0xf))
#define NVME_CC_CSS(_v) (((_v) & 0x7) << 4)
#define NVME_CC_CSS_MASK NVME_CC_CSS(0x7)
#define NVME_CC_CSS_R(_v) (((_v) >> 4) & 0x7)
#define NVME_CC_CSS_NVM 0
#define NVME_CC_EN __BIT(0)
#define NVME_CSTS 0x001c /* Controller Status */
#define NVME_CSTS_SHST_MASK (0x3 << 2)
#define NVME_CSTS_SHST_NONE (0x0 << 2) /* normal operation */
#define NVME_CSTS_SHST_WAIT (0x1 << 2) /* shutdown processing occurring */
#define NVME_CSTS_SHST_DONE (0x2 << 2) /* shutdown processing complete */
#define NVME_CSTS_CFS (1 << 1)
#define NVME_CSTS_RDY (1 << 0)
#define NVME_NSSR 0x0020 /* NVM Subsystem Reset (Optional) */
#define NVME_AQA 0x0024 /* Admin Queue Attributes */
/* Admin Completion Queue Size */
#define NVME_AQA_ACQS(_v) (((_v) - 1) << 16)
/* Admin Submission Queue Size */
#define NVME_AQA_ASQS(_v) (((_v) - 1) << 0)
#define NVME_ASQ 0x0028 /* Admin Submission Queue Base Address */
#define NVME_ACQ 0x0030 /* Admin Completion Queue Base Address */
#define NVME_ADMIN_Q 0
/* Submission Queue Tail Doorbell */
#define NVME_SQTDBL(_q, _s) (0x1000 + (2 * (_q) + 0) * (_s))
/* Completion Queue Head Doorbell */
#define NVME_CQHDBL(_q, _s) (0x1000 + (2 * (_q) + 1) * (_s))
struct nvme_sge {
uint8_t id;
uint8_t _reserved[15];
} __packed __aligned(8);
struct nvme_sge_data {
uint8_t id;
uint8_t _reserved[3];
uint32_t length;
uint64_t address;
} __packed __aligned(8);
struct nvme_sge_bit_bucket {
uint8_t id;
uint8_t _reserved[3];
uint32_t length;
uint64_t address;
} __packed __aligned(8);
struct nvme_sqe {
uint8_t opcode;
uint8_t flags;
uint16_t cid;
uint32_t nsid;
uint8_t _reserved[8];
uint64_t mptr;
union {
uint64_t prp[2];
struct nvme_sge sge;
} __packed entry;
uint32_t cdw10;
uint32_t cdw11;
uint32_t cdw12;
uint32_t cdw13;
uint32_t cdw14;
uint32_t cdw15;
} __packed __aligned(8);
struct nvme_sqe_q {
uint8_t opcode;
uint8_t flags;
uint16_t cid;
uint8_t _reserved1[20];
uint64_t prp1;
uint8_t _reserved2[8];
uint16_t qid;
uint16_t qsize;
uint8_t qflags;
#define NVM_SQE_SQ_QPRIO_URG (0x0 << 1)
#define NVM_SQE_SQ_QPRIO_HI (0x1 << 1)
#define NVM_SQE_SQ_QPRIO_MED (0x2 << 1)
#define NVM_SQE_SQ_QPRIO_LOW (0x3 << 1)
#define NVM_SQE_CQ_IEN (1 << 1)
#define NVM_SQE_Q_PC (1 << 0)
uint8_t _reserved3;
uint16_t cqid; /* XXX interrupt vector for cq */
uint8_t _reserved4[16];
} __packed __aligned(8);
struct nvme_sqe_io {
uint8_t opcode;
uint8_t flags;
uint16_t cid;
uint32_t nsid;
uint8_t _reserved[8];
uint64_t mptr;
union {
uint64_t prp[2];
struct nvme_sge sge;
} __packed entry;
uint64_t slba; /* Starting LBA */
uint16_t nlb; /* Number of Logical Blocks */
uint16_t ioflags;
uint8_t dsm; /* Dataset Management */
uint8_t _reserved2[3];
uint32_t eilbrt; /* Expected Initial Logical Block
Reference Tag */
uint16_t elbat; /* Expected Logical Block
Application Tag */
uint16_t elbatm; /* Expected Logical Block
Application Tag Mask */
} __packed __aligned(8);
struct nvme_cqe {
uint32_t cdw0;
uint32_t _reserved;
uint16_t sqhd; /* SQ Head Pointer */
uint16_t sqid; /* SQ Identifier */
uint16_t cid; /* Command Identifier */
uint16_t flags;
#define NVME_CQE_DNR __BIT(15)
#define NVME_CQE_M __BIT(14)
#define NVME_CQE_SCT(_f) ((_f) & (0x07 << 8))
#define NVME_CQE_SCT_GENERIC (0x00 << 8)
#define NVME_CQE_SCT_COMMAND (0x01 << 8)
#define NVME_CQE_SCT_MEDIAERR (0x02 << 8)
#define NVME_CQE_SCT_VENDOR (0x07 << 8)
#define NVME_CQE_SC(_f) ((_f) & (0x7f << 1))
#define NVME_CQE_SC_SUCCESS (0x00 << 1)
#define NVME_CQE_SC_INVALID_OPCODE (0x01 << 1)
#define NVME_CQE_SC_INVALID_FIELD (0x02 << 1)
#define NVME_CQE_SC_CID_CONFLICT (0x03 << 1)
#define NVME_CQE_SC_DATA_XFER_ERR (0x04 << 1)
#define NVME_CQE_SC_ABRT_BY_NO_PWR (0x05 << 1)
#define NVME_CQE_SC_INTERNAL_DEV_ERR (0x06 << 1)
#define NVME_CQE_SC_CMD_ABRT_REQD (0x07 << 1)
#define NVME_CQE_SC_CMD_ABDR_SQ_DEL (0x08 << 1)
#define NVME_CQE_SC_CMD_ABDR_FUSE_ERR (0x09 << 1)
#define NVME_CQE_SC_CMD_ABDR_FUSE_MISS (0x0a << 1)
#define NVME_CQE_SC_INVALID_NS (0x0b << 1)
#define NVME_CQE_SC_CMD_SEQ_ERR (0x0c << 1)
#define NVME_CQE_SC_INVALID_LAST_SGL (0x0d << 1)
#define NVME_CQE_SC_INVALID_NUM_SGL (0x0e << 1)
#define NVME_CQE_SC_DATA_SGL_LEN (0x0f << 1)
#define NVME_CQE_SC_MDATA_SGL_LEN (0x10 << 1)
#define NVME_CQE_SC_SGL_TYPE_INVALID (0x11 << 1)
#define NVME_CQE_SC_LBA_RANGE (0x80 << 1)
#define NVME_CQE_SC_CAP_EXCEEDED (0x81 << 1)
#define NVME_CQE_NS_NOT_RDY (0x82 << 1)
#define NVME_CQE_RSV_CONFLICT (0x83 << 1)
#define NVME_CQE_PHASE __BIT(0)
} __packed __aligned(8);
#define NVM_ADMIN_DEL_IOSQ 0x00 /* Delete I/O Submission Queue */
#define NVM_ADMIN_ADD_IOSQ 0x01 /* Create I/O Submission Queue */
#define NVM_ADMIN_GET_LOG_PG 0x02 /* Get Log Page */
#define NVM_ADMIN_DEL_IOCQ 0x04 /* Delete I/O Completion Queue */
#define NVM_ADMIN_ADD_IOCQ 0x05 /* Create I/O Completion Queue */
#define NVM_ADMIN_IDENTIFY 0x06 /* Identify */
#define NVM_ADMIN_ABORT 0x08 /* Abort */
#define NVM_ADMIN_SET_FEATURES 0x09 /* Set Features */
#define NVM_ADMIN_GET_FEATURES 0x0a /* Get Features */
#define NVM_ADMIN_ASYNC_EV_REQ 0x0c /* Asynchronous Event Request */
#define NVM_ADMIN_FW_ACTIVATE 0x10 /* Firmware Activate */
#define NVM_ADMIN_FW_DOWNLOAD 0x11 /* Firmware Image Download */
#define NVM_CMD_FLUSH 0x00 /* Flush */
#define NVM_CMD_WRITE 0x01 /* Write */
#define NVM_CMD_READ 0x02 /* Read */
#define NVM_CMD_WR_UNCOR 0x04 /* Write Uncorrectable */
#define NVM_CMD_COMPARE 0x05 /* Compare */
#define NVM_CMD_DSM 0x09 /* Dataset Management */
/* Power State Descriptor Data */
struct nvm_identify_psd {
uint16_t mp; /* Max Power */
uint16_t flags;
uint32_t enlat; /* Entry Latency */
uint32_t exlat; /* Exit Latency */
uint8_t rrt; /* Relative Read Throughput */
uint8_t rrl; /* Relative Read Latency */
uint8_t rwt; /* Relative Write Throughput */
uint8_t rwl; /* Relative Write Latency */
uint8_t _reserved[16];
} __packed __aligned(8);
struct nvm_identify_controller {
/* Controller Capabilities and Features */
uint16_t vid; /* PCI Vendor ID */
uint16_t ssvid; /* PCI Subsystem Vendor ID */
uint8_t sn[20]; /* Serial Number */
uint8_t mn[40]; /* Model Number */
uint8_t fr[8]; /* Firmware Revision */
uint8_t rab; /* Recommended Arbitration Burst */
uint8_t ieee[3]; /* IEEE OUI Identifier */
uint8_t cmic; /* Controller Multi-Path I/O and
Namespace Sharing Capabilities */
uint8_t mdts; /* Maximum Data Transfer Size */
uint16_t cntlid; /* Controller ID */
uint8_t _reserved1[176];
/* Admin Command Set Attributes & Optional Controller Capabilities */
uint16_t oacs; /* Optional Admin Command Support */
uint8_t acl; /* Abort Command Limit */
uint8_t aerl; /* Asynchronous Event Request Limit */
uint8_t frmw; /* Firmware Updates */
uint8_t lpa; /* Log Page Attributes */
uint8_t elpe; /* Error Log Page Entries */
uint8_t npss; /* Number of Power States Support */
uint8_t avscc; /* Admin Vendor Specific Command
Configuration */
uint8_t apsta; /* Autonomous Power State Transition
Attributes */
uint8_t _reserved2[246];
/* NVM Command Set Attributes */
uint8_t sqes; /* Submission Queue Entry Size */
uint8_t cqes; /* Completion Queue Entry Size */
uint8_t _reserved3[2];
uint32_t nn; /* Number of Namespaces */
uint16_t oncs; /* Optional NVM Command Support */
uint16_t fuses; /* Fused Operation Support */
uint8_t fna; /* Format NVM Attributes */
uint8_t vwc; /* Volatile Write Cache */
uint16_t awun; /* Atomic Write Unit Normal */
uint16_t awupf; /* Atomic Write Unit Power Fail */
uint8_t nvscc; /* NVM Vendor Specific Command */
uint8_t _reserved4[1];
uint16_t acwu; /* Atomic Compare & Write Unit */
uint8_t _reserved5[2];
uint32_t sgls; /* SGL Support */
uint8_t _reserved6[164];
/* I/O Command Set Attributes */
uint8_t _reserved7[1344];
/* Power State Descriptors */
struct nvm_identify_psd psd[32]; /* Power State Descriptors */
/* Vendor Specific */
uint8_t _reserved8[1024];
} __packed __aligned(8);
struct nvm_namespace_format {
uint16_t ms; /* Metadata Size */
uint8_t lbads; /* LBA Data Size */
uint8_t rp; /* Relative Performance */
} __packed __aligned(4);
struct nvm_identify_namespace {
uint64_t nsze; /* Namespace Size */
uint64_t ncap; /* Namespace Capacity */
uint64_t nuse; /* Namespace Utilization */
uint8_t nsfeat; /* Namespace Features */
uint8_t nlbaf; /* Number of LBA Formats */
uint8_t flbas; /* Formatted LBA Size */
#define NVME_ID_NS_FLBAS(_f) ((_f) & 0x0f)
#define NVME_ID_NS_FLBAS_MD 0x10
uint8_t mc; /* Metadata Capabilities */
uint8_t dpc; /* End-to-end Data Protection
Capabilities */
uint8_t dps; /* End-to-end Data Protection Type Settings */
uint8_t _reserved1[98];
struct nvm_namespace_format
lbaf[16]; /* LBA Format Support */
uint8_t _reserved2[192];
uint8_t vs[3712];
} __packed __aligned(8);

180
sys/dev/ic/nvmevar.h Normal file
View File

@ -0,0 +1,180 @@
/* $NetBSD: nvmevar.h,v 1.1 2016/05/01 10:21:02 nonaka Exp $ */
/* $OpenBSD: nvmevar.h,v 1.8 2016/04/14 11:18:32 dlg Exp $ */
/*
* Copyright (c) 2014 David Gwynne <dlg@openbsd.org>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#include <sys/bus.h>
#include <sys/cpu.h>
#include <sys/device.h>
#include <sys/mutex.h>
#include <sys/pool.h>
#include <sys/queue.h>
struct nvme_dmamem {
bus_dmamap_t ndm_map;
bus_dma_segment_t ndm_seg;
size_t ndm_size;
void *ndm_kva;
};
#define NVME_DMA_MAP(_ndm) ((_ndm)->ndm_map)
#define NVME_DMA_LEN(_ndm) ((_ndm)->ndm_map->dm_segs[0].ds_len)
#define NVME_DMA_DVA(_ndm) ((uint64_t)(_ndm)->ndm_map->dm_segs[0].ds_addr)
#define NVME_DMA_KVA(_ndm) ((void *)(_ndm)->ndm_kva)
struct nvme_softc;
struct nvme_queue;
struct nvme_ccb {
SIMPLEQ_ENTRY(nvme_ccb) ccb_entry;
bus_dmamap_t ccb_dmamap;
void *ccb_cookie;
void (*ccb_done)(struct nvme_queue *,
struct nvme_ccb *, struct nvme_cqe *);
bus_addr_t ccb_prpl_off;
uint64_t ccb_prpl_dva;
uint64_t *ccb_prpl;
uint16_t ccb_id;
};
SIMPLEQ_HEAD(nvme_ccb_list, nvme_ccb);
struct nvme_queue {
struct nvme_softc *q_sc;
kmutex_t q_sq_mtx;
kmutex_t q_cq_mtx;
struct nvme_dmamem *q_sq_dmamem;
struct nvme_dmamem *q_cq_dmamem;
bus_size_t q_sqtdbl; /* submission queue tail doorbell */
bus_size_t q_cqhdbl; /* completion queue head doorbell */
uint16_t q_id;
uint32_t q_entries;
uint32_t q_sq_tail;
uint32_t q_cq_head;
uint16_t q_cq_phase;
kmutex_t q_ccb_mtx;
u_int q_nccbs;
struct nvme_ccb *q_ccbs;
struct nvme_ccb_list q_ccb_list;
struct nvme_dmamem *q_ccb_prpls;
};
struct nvme_namespace {
struct nvm_identify_namespace *ident;
device_t dev;
};
struct nvme_softc {
device_t sc_dev;
bus_space_tag_t sc_iot;
bus_space_handle_t sc_ioh;
bus_size_t sc_ios;
bus_dma_tag_t sc_dmat;
int (*sc_intr_establish)(struct nvme_softc *,
uint16_t qid, struct nvme_queue *);
int (*sc_intr_disestablish)(struct nvme_softc *,
uint16_t qid);
void **sc_ih;
u_int sc_rdy_to;
size_t sc_mps;
size_t sc_mdts;
u_int sc_max_sgl;
struct nvm_identify_controller
sc_identify;
u_int sc_nn;
struct nvme_namespace *sc_namespaces;
bool sc_use_mq;
u_int sc_nq; /* # of io queue (sc_q) */
struct nvme_queue *sc_admin_q;
struct nvme_queue **sc_q;
uint32_t sc_flags;
#define NVME_F_ATTACHED __BIT(0)
};
#define lemtoh16(p) le16toh(*((uint16_t *)(p)))
#define lemtoh32(p) le32toh(*((uint32_t *)(p)))
#define lemtoh64(p) le64toh(*((uint64_t *)(p)))
#define htolem16(p, x) (*((uint16_t *)(p)) = htole16(x))
#define htolem32(p, x) (*((uint32_t *)(p)) = htole32(x))
#define htolem64(p, x) (*((uint64_t *)(p)) = htole64(x))
struct nvme_attach_args {
uint16_t naa_nsid;
uint32_t naa_qentries;
};
int nvme_attach(struct nvme_softc *);
int nvme_detach(struct nvme_softc *, int flags);
void nvme_childdet(device_t, device_t);
int nvme_intr(void *);
int nvme_mq_msi_intr(void *);
int nvme_mq_msix_intr(void *);
static inline struct nvme_queue *
nvme_get_q(struct nvme_softc *sc)
{
return sc->sc_q[cpu_index(curcpu()) % sc->sc_nq];
}
/*
* namespace
*/
static inline struct nvme_namespace *
nvme_ns_get(struct nvme_softc *sc, uint16_t nsid)
{
if (nsid == 0 || nsid - 1 >= sc->sc_nn)
return NULL;
return &sc->sc_namespaces[nsid - 1];
}
int nvme_ns_identify(struct nvme_softc *, uint16_t);
void nvme_ns_free(struct nvme_softc *, uint16_t);
struct nvme_ns_context {
void *nnc_cookie;
void (*nnc_done)(struct nvme_ns_context *);
uint16_t nnc_nsid;
struct buf *nnc_buf;
void *nnc_data;
int nnc_datasize;
int nnc_secsize;
daddr_t nnc_blkno;
u_int nnc_flags;
#define NVME_NS_CTX_F_READ __BIT(0)
#define NVME_NS_CTX_F_POLL __BIT(1)
int nnc_status;
};
extern pool_cache_t nvme_ns_ctx_cache;
#define nvme_ns_get_ctx(flags) pool_cache_get(nvme_ns_ctx_cache, (flags))
#define nvme_ns_put_ctx(ctx) pool_cache_put(nvme_ns_ctx_cache, (ctx))
int nvme_ns_dobio(struct nvme_softc *, struct nvme_ns_context *);
int nvme_ns_sync(struct nvme_softc *, struct nvme_ns_context *);

View File

@ -1,4 +1,4 @@
# $NetBSD: files.pci,v 1.380 2016/01/05 12:18:42 msaitoh Exp $
# $NetBSD: files.pci,v 1.381 2016/05/01 10:21:02 nonaka Exp $
#
# Config file and device description for machine-independent PCI code.
# Included by ports that need it. Requires that the SCSI files be
@ -1175,6 +1175,10 @@ defflag opt_gffb.h GFFB_DEBUG
attach rtsx at pci with rtsx_pci
file dev/pci/rtsx_pci.c rtsx_pci
# NVM Express Controller
attach nvme at pci with nvme_pci
file dev/pci/nvme_pci.c nvme_pci
# PCI graphics devices with DRM/KMS
include "external/bsd/drm2/pci/files.drmkms_pci"

451
sys/dev/pci/nvme_pci.c Normal file
View File

@ -0,0 +1,451 @@
/* $NetBSD: nvme_pci.c,v 1.1 2016/05/01 10:21:02 nonaka Exp $ */
/* $OpenBSD: nvme_pci.c,v 1.3 2016/04/14 11:18:32 dlg Exp $ */
/*
* Copyright (c) 2014 David Gwynne <dlg@openbsd.org>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
/*-
* Copyright (C) 2016 NONAKA Kimihiro <nonaka@netbsd.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: nvme_pci.c,v 1.1 2016/05/01 10:21:02 nonaka Exp $");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/device.h>
#include <sys/bitops.h>
#include <sys/bus.h>
#include <sys/cpu.h>
#include <sys/interrupt.h>
#include <sys/kmem.h>
#include <sys/pmf.h>
#include <dev/pci/pcireg.h>
#include <dev/pci/pcivar.h>
#include <dev/ic/nvmereg.h>
#include <dev/ic/nvmevar.h>
int nvme_pci_force_intx = 0;
int nvme_pci_mpsafe = 0;
int nvme_pci_mq = 1; /* INTx: ioq=1, MSI/MSI-X: ioq=ncpu */
#define NVME_PCI_BAR 0x10
struct nvme_pci_softc {
struct nvme_softc psc_nvme;
pci_chipset_tag_t psc_pc;
pci_intr_handle_t *psc_intrs;
int psc_nintrs;
};
static int nvme_pci_match(device_t, cfdata_t, void *);
static void nvme_pci_attach(device_t, device_t, void *);
static int nvme_pci_detach(device_t, int);
CFATTACH_DECL3_NEW(nvme_pci, sizeof(struct nvme_pci_softc),
nvme_pci_match, nvme_pci_attach, nvme_pci_detach, NULL, NULL,
nvme_childdet, DVF_DETACH_SHUTDOWN);
static int nvme_pci_intr_establish(struct nvme_softc *,
uint16_t, struct nvme_queue *);
static int nvme_pci_intr_disestablish(struct nvme_softc *, uint16_t);
static int nvme_pci_setup_intr(struct pci_attach_args *,
struct nvme_pci_softc *);
static int
nvme_pci_match(device_t parent, cfdata_t match, void *aux)
{
struct pci_attach_args *pa = aux;
if (PCI_CLASS(pa->pa_class) == PCI_CLASS_MASS_STORAGE &&
PCI_SUBCLASS(pa->pa_class) == PCI_SUBCLASS_MASS_STORAGE_NVM &&
PCI_INTERFACE(pa->pa_class) == PCI_INTERFACE_NVM_NVME)
return 1;
return 0;
}
static void
nvme_pci_attach(device_t parent, device_t self, void *aux)
{
struct nvme_pci_softc *psc = device_private(self);
struct nvme_softc *sc = &psc->psc_nvme;
struct pci_attach_args *pa = aux;
pcireg_t memtype;
char intr_xname[INTRDEVNAMEBUF];
char intrbuf[PCI_INTRSTR_LEN];
const char *intrstr = NULL;
bus_addr_t memaddr;
int flags, msixoff;
int i, nq, error;
sc->sc_dev = self;
psc->psc_pc = pa->pa_pc;
if (pci_dma64_available(pa))
sc->sc_dmat = pa->pa_dmat64;
else
sc->sc_dmat = pa->pa_dmat;
pci_aprint_devinfo(pa, NULL);
/* Map registers */
memtype = pci_mapreg_type(pa->pa_pc, pa->pa_tag, NVME_PCI_BAR);
if (PCI_MAPREG_TYPE(memtype) != PCI_MAPREG_TYPE_MEM) {
aprint_error_dev(self, "invalid type (type=0x%x)\n", memtype);
return;
}
sc->sc_iot = pa->pa_memt;
error = pci_mapreg_info(pa->pa_pc, pa->pa_tag, PCI_MAPREG_START,
memtype, &memaddr, &sc->sc_ios, &flags);
if (error) {
aprint_error_dev(self, "can't get map info\n");
return;
}
if (pci_get_capability(pa->pa_pc, pa->pa_tag, PCI_CAP_MSIX, &msixoff,
NULL)) {
pcireg_t msixtbl;
uint32_t table_offset;
int bir;
msixtbl = pci_conf_read(pa->pa_pc, pa->pa_tag,
msixoff + PCI_MSIX_TBLOFFSET);
table_offset = msixtbl & PCI_MSIX_TBLOFFSET_MASK;
bir = msixtbl & PCI_MSIX_PBABIR_MASK;
if (bir == 0) {
sc->sc_ios = table_offset;
}
}
error = bus_space_map(sc->sc_iot, memaddr, sc->sc_ios, flags,
&sc->sc_ioh);
if (error != 0) {
aprint_error_dev(self, "can't map mem space (error=%d)\n",
error);
return;
}
/* Establish interrupts */
if (nvme_pci_setup_intr(pa, psc) != 0) {
aprint_error_dev(self, "unable to allocate interrupt\n");
goto unmap;
}
sc->sc_intr_establish = nvme_pci_intr_establish;
sc->sc_intr_disestablish = nvme_pci_intr_disestablish;
nq = sc->sc_nq + (sc->sc_use_mq ? 1 : 0);
sc->sc_ih = kmem_zalloc(sizeof(*sc->sc_ih) * nq, KM_SLEEP);
if (sc->sc_ih == NULL) {
aprint_error_dev(self, "unable to allocate ih memory\n");
goto intr_release;
}
i = 0;
if (!sc->sc_use_mq) {
for (; i < nq; i++) {
if (nvme_pci_mpsafe) {
pci_intr_setattr(pa->pa_pc, &psc->psc_intrs[i],
PCI_INTR_MPSAFE, true);
}
snprintf(intr_xname, sizeof(intr_xname), "%s",
device_xname(self));
sc->sc_ih[i] = pci_intr_establish_xname(pa->pa_pc,
psc->psc_intrs[i], IPL_BIO, nvme_intr, sc,
intr_xname);
if (sc->sc_ih[i] == NULL) {
aprint_error_dev(self,
"unable to establish %s interrupt\n",
intr_xname);
goto intr_disestablish;
}
intrstr = pci_intr_string(pa->pa_pc, psc->psc_intrs[i],
intrbuf, sizeof(intrbuf));
aprint_normal_dev(sc->sc_dev, "interrupting at %s\n",
intrstr);
}
}
if (nvme_attach(sc) != 0) {
/* error printed by nvme_attach() */
goto intr_disestablish;
}
if (!pmf_device_register(self, NULL, NULL))
aprint_error_dev(self, "couldn't establish power handler\n");
SET(sc->sc_flags, NVME_F_ATTACHED);
return;
intr_disestablish:
while (--i >= 0)
pci_intr_disestablish(pa->pa_pc, sc->sc_ih[i]);
kmem_free(sc->sc_ih, sizeof(*sc->sc_ih) * sc->sc_nq);
sc->sc_nq = 0;
intr_release:
pci_intr_release(pa->pa_pc, psc->psc_intrs, psc->psc_nintrs);
psc->psc_nintrs = 0;
unmap:
bus_space_unmap(sc->sc_iot, sc->sc_ioh, sc->sc_ios);
sc->sc_ios = 0;
}
static int
nvme_pci_detach(device_t self, int flags)
{
struct nvme_pci_softc *psc = device_private(self);
struct nvme_softc *sc = &psc->psc_nvme;
int i, nq, error;
if (!ISSET(sc->sc_flags, NVME_F_ATTACHED))
return 0;
error = nvme_detach(sc, flags);
if (error)
return error;
nq = sc->sc_nq + (sc->sc_use_mq ? 1 : 0);
if (!sc->sc_use_mq) {
for (i = 0; i < nq; i++)
pci_intr_disestablish(psc->psc_pc, sc->sc_ih[i]);
}
kmem_free(sc->sc_ih, sizeof(*sc->sc_ih) * nq);
pci_intr_release(psc->psc_pc, psc->psc_intrs, psc->psc_nintrs);
bus_space_unmap(sc->sc_iot, sc->sc_ioh, sc->sc_ios);
return 0;
}
static int
nvme_pci_intr_establish(struct nvme_softc *sc, uint16_t qid,
struct nvme_queue *q)
{
struct nvme_pci_softc *psc = (struct nvme_pci_softc *)sc;
char intr_xname[INTRDEVNAMEBUF];
char intrbuf[PCI_INTRSTR_LEN];
const char *intrstr = NULL;
int (*ih_func)(void *);
void *ih_arg;
kcpuset_t *affinity;
cpuid_t affinity_to;
int error;
if (!sc->sc_use_mq && qid > 0)
return 0;
KASSERT(sc->sc_ih[qid] == NULL);
if (nvme_pci_mpsafe) {
pci_intr_setattr(psc->psc_pc, &psc->psc_intrs[qid],
PCI_INTR_MPSAFE, true);
}
if (!sc->sc_use_mq) {
snprintf(intr_xname, sizeof(intr_xname), "%s",
device_xname(sc->sc_dev));
ih_arg = sc;
ih_func = nvme_intr;
} else {
if (qid == 0) {
snprintf(intr_xname, sizeof(intr_xname), "%s adminq",
device_xname(sc->sc_dev));
} else {
snprintf(intr_xname, sizeof(intr_xname), "%s ioq%d",
device_xname(sc->sc_dev), qid);
}
ih_arg = q;
if (pci_intr_type(psc->psc_intrs[qid]) == PCI_INTR_TYPE_MSIX)
ih_func = nvme_mq_msix_intr;
else
ih_func = nvme_mq_msi_intr;
}
sc->sc_ih[qid] = pci_intr_establish_xname(psc->psc_pc,
psc->psc_intrs[qid], IPL_BIO, ih_func, ih_arg, intr_xname);
if (sc->sc_ih[qid] == NULL) {
aprint_error_dev(sc->sc_dev,
"unable to establish %s interrupt\n", intr_xname);
return 1;
}
intrstr = pci_intr_string(psc->psc_pc, psc->psc_intrs[qid], intrbuf,
sizeof(intrbuf));
if (!sc->sc_use_mq) {
aprint_normal_dev(sc->sc_dev, "interrupting at %s\n", intrstr);
} else if (qid == 0) {
aprint_normal_dev(sc->sc_dev,
"for admin queue interrupting at %s\n", intrstr);
} else if (!nvme_pci_mpsafe) {
aprint_normal_dev(sc->sc_dev,
"for io queue %d interrupting at %s\n", qid, intrstr);
} else {
kcpuset_create(&affinity, true);
affinity_to = (qid - 1) % ncpu;
kcpuset_set(affinity, affinity_to);
error = interrupt_distribute(sc->sc_ih[qid], affinity, NULL);
kcpuset_destroy(affinity);
aprint_normal_dev(sc->sc_dev,
"for io queue %d interrupting at %s", qid, intrstr);
if (error == 0)
aprint_normal(" affinity to cpu%lu", affinity_to);
aprint_normal("\n");
}
return 0;
}
static int
nvme_pci_intr_disestablish(struct nvme_softc *sc, uint16_t qid)
{
struct nvme_pci_softc *psc = (struct nvme_pci_softc *)sc;
if (!sc->sc_use_mq && qid > 0)
return 0;
KASSERT(sc->sc_ih[qid] != NULL);
pci_intr_disestablish(psc->psc_pc, sc->sc_ih[qid]);
sc->sc_ih[qid] = NULL;
return 0;
}
static int
nvme_pci_setup_intr(struct pci_attach_args *pa, struct nvme_pci_softc *psc)
{
struct nvme_softc *sc = &psc->psc_nvme;
pci_intr_handle_t *ihps;
int counts[PCI_INTR_TYPE_SIZE], alloced_counts[PCI_INTR_TYPE_SIZE];
int max_type, intr_type;
int error;
if (nvme_pci_force_intx) {
max_type = PCI_INTR_TYPE_INTX;
goto force_intx;
}
/* MSI-X */
max_type = PCI_INTR_TYPE_MSIX;
counts[PCI_INTR_TYPE_MSIX] = min(pci_msix_count(pa->pa_pc, pa->pa_tag),
ncpu + 1);
if (counts[PCI_INTR_TYPE_MSIX] > 0) {
memset(alloced_counts, 0, sizeof(alloced_counts));
alloced_counts[PCI_INTR_TYPE_MSIX] = counts[PCI_INTR_TYPE_MSIX];
if (pci_intr_alloc(pa, &ihps, alloced_counts,
PCI_INTR_TYPE_MSIX)) {
counts[PCI_INTR_TYPE_MSIX] = 0;
} else {
counts[PCI_INTR_TYPE_MSIX] =
alloced_counts[PCI_INTR_TYPE_MSIX];
pci_intr_release(pa->pa_pc, ihps,
alloced_counts[PCI_INTR_TYPE_MSIX]);
}
}
if (counts[PCI_INTR_TYPE_MSIX] < 2) {
counts[PCI_INTR_TYPE_MSIX] = 0;
max_type = PCI_INTR_TYPE_MSI;
} else if (!nvme_pci_mq || !nvme_pci_mpsafe) {
counts[PCI_INTR_TYPE_MSIX] = 2; /* adminq + 1 ioq */
}
retry_msi:
/* MSI */
counts[PCI_INTR_TYPE_MSI] = pci_msi_count(pa->pa_pc, pa->pa_tag);
if (counts[PCI_INTR_TYPE_MSI] > 0) {
while (counts[PCI_INTR_TYPE_MSI] > ncpu + 1) {
if (counts[PCI_INTR_TYPE_MSI] / 2 <= ncpu + 1)
break;
counts[PCI_INTR_TYPE_MSI] /= 2;
}
memset(alloced_counts, 0, sizeof(alloced_counts));
alloced_counts[PCI_INTR_TYPE_MSI] = counts[PCI_INTR_TYPE_MSI];
if (pci_intr_alloc(pa, &ihps, alloced_counts,
PCI_INTR_TYPE_MSI)) {
counts[PCI_INTR_TYPE_MSI] = 0;
} else {
counts[PCI_INTR_TYPE_MSI] =
alloced_counts[PCI_INTR_TYPE_MSI];
pci_intr_release(pa->pa_pc, ihps,
alloced_counts[PCI_INTR_TYPE_MSI]);
}
}
if (counts[PCI_INTR_TYPE_MSI] < 1) {
counts[PCI_INTR_TYPE_MSI] = 0;
if (max_type == PCI_INTR_TYPE_MSI)
max_type = PCI_INTR_TYPE_INTX;
} else if (!nvme_pci_mq || !nvme_pci_mpsafe) {
if (counts[PCI_INTR_TYPE_MSI] > 2)
counts[PCI_INTR_TYPE_MSI] = 2; /* adminq + 1 ioq */
}
force_intx:
/* INTx */
counts[PCI_INTR_TYPE_INTX] = 1;
memcpy(alloced_counts, counts, sizeof(counts));
error = pci_intr_alloc(pa, &ihps, alloced_counts, max_type);
if (error) {
if (max_type != PCI_INTR_TYPE_INTX) {
retry:
memset(counts, 0, sizeof(counts));
if (max_type == PCI_INTR_TYPE_MSIX) {
max_type = PCI_INTR_TYPE_MSI;
goto retry_msi;
} else {
max_type = PCI_INTR_TYPE_INTX;
goto force_intx;
}
}
return error;
}
intr_type = pci_intr_type(ihps[0]);
if (alloced_counts[intr_type] < counts[intr_type]) {
if (intr_type != PCI_INTR_TYPE_INTX) {
pci_intr_release(pa->pa_pc, ihps,
alloced_counts[intr_type]);
max_type = intr_type;
goto retry;
}
return EBUSY;
}
psc->psc_intrs = ihps;
psc->psc_nintrs = alloced_counts[intr_type];
if (intr_type == PCI_INTR_TYPE_MSI) {
if (alloced_counts[intr_type] > ncpu + 1)
alloced_counts[intr_type] = ncpu + 1;
}
sc->sc_use_mq = alloced_counts[intr_type] > 1;
sc->sc_nq = sc->sc_use_mq ? alloced_counts[intr_type] - 1 : 1;
return 0;
}