/* * QEMU NVM Express End-to-End Data Protection support * * Copyright (c) 2021 Samsung Electronics Co., Ltd. * * Authors: * Klaus Jensen * Gollu Appalanaidu */ #include "qemu/osdep.h" #include "qapi/error.h" #include "sysemu/block-backend.h" #include "nvme.h" #include "trace.h" uint16_t nvme_check_prinfo(NvmeNamespace *ns, uint16_t ctrl, uint64_t slba, uint32_t reftag) { if ((NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) == NVME_ID_NS_DPS_TYPE_1) && (ctrl & NVME_RW_PRINFO_PRCHK_REF) && (slba & 0xffffffff) != reftag) { return NVME_INVALID_PROT_INFO | NVME_DNR; } return NVME_SUCCESS; } /* from Linux kernel (crypto/crct10dif_common.c) */ static uint16_t crc_t10dif(uint16_t crc, const unsigned char *buffer, size_t len) { unsigned int i; for (i = 0; i < len; i++) { crc = (crc << 8) ^ t10_dif_crc_table[((crc >> 8) ^ buffer[i]) & 0xff]; } return crc; } void nvme_dif_pract_generate_dif(NvmeNamespace *ns, uint8_t *buf, size_t len, uint8_t *mbuf, size_t mlen, uint16_t apptag, uint32_t reftag) { uint8_t *end = buf + len; int16_t pil = 0; if (!(ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT)) { pil = ns->lbaf.ms - sizeof(NvmeDifTuple); } trace_pci_nvme_dif_pract_generate_dif(len, ns->lbasz, ns->lbasz + pil, apptag, reftag); for (; buf < end; buf += ns->lbasz, mbuf += ns->lbaf.ms) { NvmeDifTuple *dif = (NvmeDifTuple *)(mbuf + pil); uint16_t crc = crc_t10dif(0x0, buf, ns->lbasz); if (pil) { crc = crc_t10dif(crc, mbuf, pil); } dif->guard = cpu_to_be16(crc); dif->apptag = cpu_to_be16(apptag); dif->reftag = cpu_to_be32(reftag); if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) != NVME_ID_NS_DPS_TYPE_3) { reftag++; } } } static uint16_t nvme_dif_prchk(NvmeNamespace *ns, NvmeDifTuple *dif, uint8_t *buf, uint8_t *mbuf, size_t pil, uint16_t ctrl, uint16_t apptag, uint16_t appmask, uint32_t reftag) { switch (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) { case NVME_ID_NS_DPS_TYPE_3: if (be32_to_cpu(dif->reftag) != 0xffffffff) { break; } /* fallthrough */ case NVME_ID_NS_DPS_TYPE_1: case NVME_ID_NS_DPS_TYPE_2: if (be16_to_cpu(dif->apptag) != 0xffff) { break; } trace_pci_nvme_dif_prchk_disabled(be16_to_cpu(dif->apptag), be32_to_cpu(dif->reftag)); return NVME_SUCCESS; } if (ctrl & NVME_RW_PRINFO_PRCHK_GUARD) { uint16_t crc = crc_t10dif(0x0, buf, ns->lbasz); if (pil) { crc = crc_t10dif(crc, mbuf, pil); } trace_pci_nvme_dif_prchk_guard(be16_to_cpu(dif->guard), crc); if (be16_to_cpu(dif->guard) != crc) { return NVME_E2E_GUARD_ERROR; } } if (ctrl & NVME_RW_PRINFO_PRCHK_APP) { trace_pci_nvme_dif_prchk_apptag(be16_to_cpu(dif->apptag), apptag, appmask); if ((be16_to_cpu(dif->apptag) & appmask) != (apptag & appmask)) { return NVME_E2E_APP_ERROR; } } if (ctrl & NVME_RW_PRINFO_PRCHK_REF) { trace_pci_nvme_dif_prchk_reftag(be32_to_cpu(dif->reftag), reftag); if (be32_to_cpu(dif->reftag) != reftag) { return NVME_E2E_REF_ERROR; } } return NVME_SUCCESS; } uint16_t nvme_dif_check(NvmeNamespace *ns, uint8_t *buf, size_t len, uint8_t *mbuf, size_t mlen, uint16_t ctrl, uint64_t slba, uint16_t apptag, uint16_t appmask, uint32_t reftag) { uint8_t *end = buf + len; int16_t pil = 0; uint16_t status; status = nvme_check_prinfo(ns, ctrl, slba, reftag); if (status) { return status; } if (!(ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT)) { pil = ns->lbaf.ms - sizeof(NvmeDifTuple); } trace_pci_nvme_dif_check(NVME_RW_PRINFO(ctrl), ns->lbasz + pil); for (; buf < end; buf += ns->lbasz, mbuf += ns->lbaf.ms) { NvmeDifTuple *dif = (NvmeDifTuple *)(mbuf + pil); status = nvme_dif_prchk(ns, dif, buf, mbuf, pil, ctrl, apptag, appmask, reftag); if (status) { return status; } if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) != NVME_ID_NS_DPS_TYPE_3) { reftag++; } } return NVME_SUCCESS; } uint16_t nvme_dif_mangle_mdata(NvmeNamespace *ns, uint8_t *mbuf, size_t mlen, uint64_t slba) { BlockBackend *blk = ns->blkconf.blk; BlockDriverState *bs = blk_bs(blk); int64_t moffset = 0, offset = nvme_l2b(ns, slba); uint8_t *mbufp, *end; bool zeroed; int16_t pil = 0; int64_t bytes = (mlen / ns->lbaf.ms) << ns->lbaf.ds; int64_t pnum = 0; Error *err = NULL; if (!(ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT)) { pil = ns->lbaf.ms - sizeof(NvmeDifTuple); } do { int ret; bytes -= pnum; ret = bdrv_block_status(bs, offset, bytes, &pnum, NULL, NULL); if (ret < 0) { error_setg_errno(&err, -ret, "unable to get block status"); error_report_err(err); return NVME_INTERNAL_DEV_ERROR; } zeroed = !!(ret & BDRV_BLOCK_ZERO); trace_pci_nvme_block_status(offset, bytes, pnum, ret, zeroed); if (zeroed) { mbufp = mbuf + moffset; mlen = (pnum >> ns->lbaf.ds) * ns->lbaf.ms; end = mbufp + mlen; for (; mbufp < end; mbufp += ns->lbaf.ms) { memset(mbufp + pil, 0xff, sizeof(NvmeDifTuple)); } } moffset += (pnum >> ns->lbaf.ds) * ns->lbaf.ms; offset += pnum; } while (pnum != bytes); return NVME_SUCCESS; } static void nvme_dif_rw_cb(void *opaque, int ret) { NvmeBounceContext *ctx = opaque; NvmeRequest *req = ctx->req; NvmeNamespace *ns = req->ns; BlockBackend *blk = ns->blkconf.blk; trace_pci_nvme_dif_rw_cb(nvme_cid(req), blk_name(blk)); qemu_iovec_destroy(&ctx->data.iov); g_free(ctx->data.bounce); qemu_iovec_destroy(&ctx->mdata.iov); g_free(ctx->mdata.bounce); g_free(ctx); nvme_rw_complete_cb(req, ret); } static void nvme_dif_rw_check_cb(void *opaque, int ret) { NvmeBounceContext *ctx = opaque; NvmeRequest *req = ctx->req; NvmeNamespace *ns = req->ns; NvmeCtrl *n = nvme_ctrl(req); NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd; uint64_t slba = le64_to_cpu(rw->slba); uint16_t ctrl = le16_to_cpu(rw->control); uint16_t apptag = le16_to_cpu(rw->apptag); uint16_t appmask = le16_to_cpu(rw->appmask); uint32_t reftag = le32_to_cpu(rw->reftag); uint16_t status; trace_pci_nvme_dif_rw_check_cb(nvme_cid(req), NVME_RW_PRINFO(ctrl), apptag, appmask, reftag); if (ret) { goto out; } status = nvme_dif_mangle_mdata(ns, ctx->mdata.bounce, ctx->mdata.iov.size, slba); if (status) { req->status = status; goto out; } status = nvme_dif_check(ns, ctx->data.bounce, ctx->data.iov.size, ctx->mdata.bounce, ctx->mdata.iov.size, ctrl, slba, apptag, appmask, reftag); if (status) { req->status = status; goto out; } status = nvme_bounce_data(n, ctx->data.bounce, ctx->data.iov.size, NVME_TX_DIRECTION_FROM_DEVICE, req); if (status) { req->status = status; goto out; } if (ctrl & NVME_RW_PRINFO_PRACT && ns->lbaf.ms == 8) { goto out; } status = nvme_bounce_mdata(n, ctx->mdata.bounce, ctx->mdata.iov.size, NVME_TX_DIRECTION_FROM_DEVICE, req); if (status) { req->status = status; } out: nvme_dif_rw_cb(ctx, ret); } static void nvme_dif_rw_mdata_in_cb(void *opaque, int ret) { NvmeBounceContext *ctx = opaque; NvmeRequest *req = ctx->req; NvmeNamespace *ns = req->ns; NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd; uint64_t slba = le64_to_cpu(rw->slba); uint32_t nlb = le16_to_cpu(rw->nlb) + 1; size_t mlen = nvme_m2b(ns, nlb); uint64_t offset = ns->mdata_offset + nvme_m2b(ns, slba); BlockBackend *blk = ns->blkconf.blk; trace_pci_nvme_dif_rw_mdata_in_cb(nvme_cid(req), blk_name(blk)); if (ret) { goto out; } ctx->mdata.bounce = g_malloc(mlen); qemu_iovec_reset(&ctx->mdata.iov); qemu_iovec_add(&ctx->mdata.iov, ctx->mdata.bounce, mlen); req->aiocb = blk_aio_preadv(blk, offset, &ctx->mdata.iov, 0, nvme_dif_rw_check_cb, ctx); return; out: nvme_dif_rw_cb(ctx, ret); } static void nvme_dif_rw_mdata_out_cb(void *opaque, int ret) { NvmeBounceContext *ctx = opaque; NvmeRequest *req = ctx->req; NvmeNamespace *ns = req->ns; NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd; uint64_t slba = le64_to_cpu(rw->slba); uint64_t offset = ns->mdata_offset + nvme_m2b(ns, slba); BlockBackend *blk = ns->blkconf.blk; trace_pci_nvme_dif_rw_mdata_out_cb(nvme_cid(req), blk_name(blk)); if (ret) { goto out; } req->aiocb = blk_aio_pwritev(blk, offset, &ctx->mdata.iov, 0, nvme_dif_rw_cb, ctx); return; out: nvme_dif_rw_cb(ctx, ret); } uint16_t nvme_dif_rw(NvmeCtrl *n, NvmeRequest *req) { NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd; NvmeNamespace *ns = req->ns; BlockBackend *blk = ns->blkconf.blk; bool wrz = rw->opcode == NVME_CMD_WRITE_ZEROES; uint32_t nlb = le16_to_cpu(rw->nlb) + 1; uint64_t slba = le64_to_cpu(rw->slba); size_t len = nvme_l2b(ns, nlb); size_t mlen = nvme_m2b(ns, nlb); size_t mapped_len = len; int64_t offset = nvme_l2b(ns, slba); uint16_t ctrl = le16_to_cpu(rw->control); uint16_t apptag = le16_to_cpu(rw->apptag); uint16_t appmask = le16_to_cpu(rw->appmask); uint32_t reftag = le32_to_cpu(rw->reftag); bool pract = !!(ctrl & NVME_RW_PRINFO_PRACT); NvmeBounceContext *ctx; uint16_t status; trace_pci_nvme_dif_rw(pract, NVME_RW_PRINFO(ctrl)); ctx = g_new0(NvmeBounceContext, 1); ctx->req = req; if (wrz) { BdrvRequestFlags flags = BDRV_REQ_MAY_UNMAP; if (ctrl & NVME_RW_PRINFO_PRCHK_MASK) { status = NVME_INVALID_PROT_INFO | NVME_DNR; goto err; } if (pract) { uint8_t *mbuf, *end; int16_t pil = ns->lbaf.ms - sizeof(NvmeDifTuple); status = nvme_check_prinfo(ns, ctrl, slba, reftag); if (status) { goto err; } flags = 0; ctx->mdata.bounce = g_malloc0(mlen); qemu_iovec_init(&ctx->mdata.iov, 1); qemu_iovec_add(&ctx->mdata.iov, ctx->mdata.bounce, mlen); mbuf = ctx->mdata.bounce; end = mbuf + mlen; if (ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT) { pil = 0; } for (; mbuf < end; mbuf += ns->lbaf.ms) { NvmeDifTuple *dif = (NvmeDifTuple *)(mbuf + pil); dif->apptag = cpu_to_be16(apptag); dif->reftag = cpu_to_be32(reftag); switch (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) { case NVME_ID_NS_DPS_TYPE_1: case NVME_ID_NS_DPS_TYPE_2: reftag++; } } } req->aiocb = blk_aio_pwrite_zeroes(blk, offset, len, flags, nvme_dif_rw_mdata_out_cb, ctx); return NVME_NO_COMPLETE; } if (nvme_ns_ext(ns) && !(pract && ns->lbaf.ms == 8)) { mapped_len += mlen; } status = nvme_map_dptr(n, &req->sg, mapped_len, &req->cmd); if (status) { goto err; } ctx->data.bounce = g_malloc(len); qemu_iovec_init(&ctx->data.iov, 1); qemu_iovec_add(&ctx->data.iov, ctx->data.bounce, len); if (req->cmd.opcode == NVME_CMD_READ) { block_acct_start(blk_get_stats(blk), &req->acct, ctx->data.iov.size, BLOCK_ACCT_READ); req->aiocb = blk_aio_preadv(ns->blkconf.blk, offset, &ctx->data.iov, 0, nvme_dif_rw_mdata_in_cb, ctx); return NVME_NO_COMPLETE; } status = nvme_bounce_data(n, ctx->data.bounce, ctx->data.iov.size, NVME_TX_DIRECTION_TO_DEVICE, req); if (status) { goto err; } ctx->mdata.bounce = g_malloc(mlen); qemu_iovec_init(&ctx->mdata.iov, 1); qemu_iovec_add(&ctx->mdata.iov, ctx->mdata.bounce, mlen); if (!(pract && ns->lbaf.ms == 8)) { status = nvme_bounce_mdata(n, ctx->mdata.bounce, ctx->mdata.iov.size, NVME_TX_DIRECTION_TO_DEVICE, req); if (status) { goto err; } } status = nvme_check_prinfo(ns, ctrl, slba, reftag); if (status) { goto err; } if (pract) { /* splice generated protection information into the buffer */ nvme_dif_pract_generate_dif(ns, ctx->data.bounce, ctx->data.iov.size, ctx->mdata.bounce, ctx->mdata.iov.size, apptag, reftag); } else { status = nvme_dif_check(ns, ctx->data.bounce, ctx->data.iov.size, ctx->mdata.bounce, ctx->mdata.iov.size, ctrl, slba, apptag, appmask, reftag); if (status) { goto err; } } block_acct_start(blk_get_stats(blk), &req->acct, ctx->data.iov.size, BLOCK_ACCT_WRITE); req->aiocb = blk_aio_pwritev(ns->blkconf.blk, offset, &ctx->data.iov, 0, nvme_dif_rw_mdata_out_cb, ctx); return NVME_NO_COMPLETE; err: qemu_iovec_destroy(&ctx->data.iov); g_free(ctx->data.bounce); qemu_iovec_destroy(&ctx->mdata.iov); g_free(ctx->mdata.bounce); g_free(ctx); return status; }