From e926c9f1bc8864d4d5ef6613bea76d4f825e30c0 Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Wed, 3 Apr 2019 14:33:40 +0300 Subject: [PATCH 1/4] hw/rdma: Add SRQ support to backend layer Add the required functions and definitions to support shared receive queues (SRQs) in the backend layer. Signed-off-by: Kamal Heib Message-Id: <20190403113343.26384-2-kamalheib1@gmail.com> Reviewed-by: Yuval Shaia Signed-off-by: Marcel Apfelbaum --- hw/rdma/rdma_backend.c | 116 +++++++++++++++++++++++++++++++++++- hw/rdma/rdma_backend.h | 12 ++++ hw/rdma/rdma_backend_defs.h | 5 ++ hw/rdma/rdma_rm.c | 2 + hw/rdma/rdma_rm_defs.h | 1 + 5 files changed, 134 insertions(+), 2 deletions(-) diff --git a/hw/rdma/rdma_backend.c b/hw/rdma/rdma_backend.c index d1660b6474..04dfd63a57 100644 --- a/hw/rdma/rdma_backend.c +++ b/hw/rdma/rdma_backend.c @@ -40,6 +40,7 @@ typedef struct BackendCtx { void *up_ctx; struct ibv_sge sge; /* Used to save MAD recv buffer */ RdmaBackendQP *backend_qp; /* To maintain recv buffers */ + RdmaBackendSRQ *backend_srq; } BackendCtx; struct backend_umad { @@ -99,6 +100,7 @@ static int rdma_poll_cq(RdmaDeviceResources *rdma_dev_res, struct ibv_cq *ibcq) int i, ne, total_ne = 0; BackendCtx *bctx; struct ibv_wc wc[2]; + RdmaProtectedGSList *cqe_ctx_list; qemu_mutex_lock(&rdma_dev_res->lock); do { @@ -116,8 +118,13 @@ static int rdma_poll_cq(RdmaDeviceResources *rdma_dev_res, struct ibv_cq *ibcq) comp_handler(bctx->up_ctx, &wc[i]); - rdma_protected_gslist_remove_int32(&bctx->backend_qp->cqe_ctx_list, - wc[i].wr_id); + if (bctx->backend_qp) { + cqe_ctx_list = &bctx->backend_qp->cqe_ctx_list; + } else { + cqe_ctx_list = &bctx->backend_srq->cqe_ctx_list; + } + + rdma_protected_gslist_remove_int32(cqe_ctx_list, wc[i].wr_id); rdma_rm_dealloc_cqe_ctx(rdma_dev_res, wc[i].wr_id); g_free(bctx); } @@ -662,6 +669,60 @@ err_free_bctx: g_free(bctx); } +void rdma_backend_post_srq_recv(RdmaBackendDev *backend_dev, + RdmaBackendSRQ *srq, struct ibv_sge *sge, + uint32_t num_sge, void *ctx) +{ + BackendCtx *bctx; + struct ibv_sge new_sge[MAX_SGE]; + uint32_t bctx_id; + int rc; + struct ibv_recv_wr wr = {}, *bad_wr; + + bctx = g_malloc0(sizeof(*bctx)); + bctx->up_ctx = ctx; + bctx->backend_srq = srq; + + rc = rdma_rm_alloc_cqe_ctx(backend_dev->rdma_dev_res, &bctx_id, bctx); + if (unlikely(rc)) { + complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_NOMEM, ctx); + goto err_free_bctx; + } + + rdma_protected_gslist_append_int32(&srq->cqe_ctx_list, bctx_id); + + rc = build_host_sge_array(backend_dev->rdma_dev_res, new_sge, sge, num_sge, + &backend_dev->rdma_dev_res->stats.rx_bufs_len); + if (rc) { + complete_work(IBV_WC_GENERAL_ERR, rc, ctx); + goto err_dealloc_cqe_ctx; + } + + wr.num_sge = num_sge; + wr.sg_list = new_sge; + wr.wr_id = bctx_id; + rc = ibv_post_srq_recv(srq->ibsrq, &wr, &bad_wr); + if (rc) { + rdma_error_report("ibv_post_srq_recv fail, srqn=0x%x, rc=%d, errno=%d", + srq->ibsrq->handle, rc, errno); + complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_FAIL_BACKEND, ctx); + goto err_dealloc_cqe_ctx; + } + + atomic_inc(&backend_dev->rdma_dev_res->stats.missing_cqe); + backend_dev->rdma_dev_res->stats.rx_bufs++; + backend_dev->rdma_dev_res->stats.rx_srq++; + + return; + +err_dealloc_cqe_ctx: + backend_dev->rdma_dev_res->stats.rx_bufs_err++; + rdma_rm_dealloc_cqe_ctx(backend_dev->rdma_dev_res, bctx_id); + +err_free_bctx: + g_free(bctx); +} + int rdma_backend_create_pd(RdmaBackendDev *backend_dev, RdmaBackendPD *pd) { pd->ibpd = ibv_alloc_pd(backend_dev->context); @@ -938,6 +999,55 @@ void rdma_backend_destroy_qp(RdmaBackendQP *qp, RdmaDeviceResources *dev_res) rdma_protected_gslist_destroy(&qp->cqe_ctx_list); } +int rdma_backend_create_srq(RdmaBackendSRQ *srq, RdmaBackendPD *pd, + uint32_t max_wr, uint32_t max_sge, + uint32_t srq_limit) +{ + struct ibv_srq_init_attr srq_init_attr = {}; + + srq_init_attr.attr.max_wr = max_wr; + srq_init_attr.attr.max_sge = max_sge; + srq_init_attr.attr.srq_limit = srq_limit; + + srq->ibsrq = ibv_create_srq(pd->ibpd, &srq_init_attr); + if (!srq->ibsrq) { + rdma_error_report("ibv_create_srq failed, errno=%d", errno); + return -EIO; + } + + rdma_protected_gslist_init(&srq->cqe_ctx_list); + + return 0; +} + +int rdma_backend_query_srq(RdmaBackendSRQ *srq, struct ibv_srq_attr *srq_attr) +{ + if (!srq->ibsrq) { + return -EINVAL; + } + + return ibv_query_srq(srq->ibsrq, srq_attr); +} + +int rdma_backend_modify_srq(RdmaBackendSRQ *srq, struct ibv_srq_attr *srq_attr, + int srq_attr_mask) +{ + if (!srq->ibsrq) { + return -EINVAL; + } + + return ibv_modify_srq(srq->ibsrq, srq_attr, srq_attr_mask); +} + +void rdma_backend_destroy_srq(RdmaBackendSRQ *srq, RdmaDeviceResources *dev_res) +{ + if (srq->ibsrq) { + ibv_destroy_srq(srq->ibsrq); + } + g_slist_foreach(srq->cqe_ctx_list.list, free_cqe_ctx, dev_res); + rdma_protected_gslist_destroy(&srq->cqe_ctx_list); +} + #define CHK_ATTR(req, dev, member, fmt) ({ \ trace_rdma_check_dev_attr(#member, dev.member, req->member); \ if (req->member > dev.member) { \ @@ -960,6 +1070,7 @@ static int init_device_caps(RdmaBackendDev *backend_dev, } dev_attr->max_sge = MAX_SGE; + dev_attr->max_srq_sge = MAX_SGE; CHK_ATTR(dev_attr, bk_dev_attr, max_mr_size, "%" PRId64); CHK_ATTR(dev_attr, bk_dev_attr, max_qp, "%d"); @@ -970,6 +1081,7 @@ static int init_device_caps(RdmaBackendDev *backend_dev, CHK_ATTR(dev_attr, bk_dev_attr, max_qp_rd_atom, "%d"); CHK_ATTR(dev_attr, bk_dev_attr, max_qp_init_rd_atom, "%d"); CHK_ATTR(dev_attr, bk_dev_attr, max_ah, "%d"); + CHK_ATTR(dev_attr, bk_dev_attr, max_srq, "%d"); return 0; } diff --git a/hw/rdma/rdma_backend.h b/hw/rdma/rdma_backend.h index 38056d97c7..cad7956d98 100644 --- a/hw/rdma/rdma_backend.h +++ b/hw/rdma/rdma_backend.h @@ -114,4 +114,16 @@ void rdma_backend_post_recv(RdmaBackendDev *backend_dev, RdmaBackendQP *qp, uint8_t qp_type, struct ibv_sge *sge, uint32_t num_sge, void *ctx); +int rdma_backend_create_srq(RdmaBackendSRQ *srq, RdmaBackendPD *pd, + uint32_t max_wr, uint32_t max_sge, + uint32_t srq_limit); +int rdma_backend_query_srq(RdmaBackendSRQ *srq, struct ibv_srq_attr *srq_attr); +int rdma_backend_modify_srq(RdmaBackendSRQ *srq, struct ibv_srq_attr *srq_attr, + int srq_attr_mask); +void rdma_backend_destroy_srq(RdmaBackendSRQ *srq, + RdmaDeviceResources *dev_res); +void rdma_backend_post_srq_recv(RdmaBackendDev *backend_dev, + RdmaBackendSRQ *srq, struct ibv_sge *sge, + uint32_t num_sge, void *ctx); + #endif diff --git a/hw/rdma/rdma_backend_defs.h b/hw/rdma/rdma_backend_defs.h index 817153dc8c..0b55be3503 100644 --- a/hw/rdma/rdma_backend_defs.h +++ b/hw/rdma/rdma_backend_defs.h @@ -68,4 +68,9 @@ typedef struct RdmaBackendQP { RdmaProtectedGSList cqe_ctx_list; } RdmaBackendQP; +typedef struct RdmaBackendSRQ { + struct ibv_srq *ibsrq; + RdmaProtectedGSList cqe_ctx_list; +} RdmaBackendSRQ; + #endif diff --git a/hw/rdma/rdma_rm.c b/hw/rdma/rdma_rm.c index bac3b2f4a6..b683506b86 100644 --- a/hw/rdma/rdma_rm.c +++ b/hw/rdma/rdma_rm.c @@ -37,6 +37,8 @@ void rdma_dump_device_counters(Monitor *mon, RdmaDeviceResources *dev_res) dev_res->stats.tx_err); monitor_printf(mon, "\trx_bufs : %" PRId64 "\n", dev_res->stats.rx_bufs); + monitor_printf(mon, "\trx_srq : %" PRId64 "\n", + dev_res->stats.rx_srq); monitor_printf(mon, "\trx_bufs_len : %" PRId64 "\n", dev_res->stats.rx_bufs_len); monitor_printf(mon, "\trx_bufs_err : %" PRId64 "\n", diff --git a/hw/rdma/rdma_rm_defs.h b/hw/rdma/rdma_rm_defs.h index c200d311de..e774af5280 100644 --- a/hw/rdma/rdma_rm_defs.h +++ b/hw/rdma/rdma_rm_defs.h @@ -106,6 +106,7 @@ typedef struct RdmaRmStats { uint64_t rx_bufs; uint64_t rx_bufs_len; uint64_t rx_bufs_err; + uint64_t rx_srq; uint64_t completions; uint64_t mad_tx; uint64_t mad_tx_err; From cdc84058bc6412179b22ce61f29db104cfcba9fc Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Wed, 3 Apr 2019 14:33:41 +0300 Subject: [PATCH 2/4] hw/rdma: Add support for managing SRQ resource Adding the required functions and definitions for support managing the shared receive queues (SRQs). Signed-off-by: Kamal Heib Message-Id: <20190403113343.26384-3-kamalheib1@gmail.com> Reviewed-by: Yuval Shaia Signed-off-by: Marcel Apfelbaum --- hw/rdma/rdma_rm.c | 93 ++++++++++++++++++++++++++++++++++++++++++ hw/rdma/rdma_rm.h | 10 +++++ hw/rdma/rdma_rm_defs.h | 8 ++++ 3 files changed, 111 insertions(+) diff --git a/hw/rdma/rdma_rm.c b/hw/rdma/rdma_rm.c index b683506b86..c4fb140dcd 100644 --- a/hw/rdma/rdma_rm.c +++ b/hw/rdma/rdma_rm.c @@ -544,6 +544,96 @@ void rdma_rm_dealloc_qp(RdmaDeviceResources *dev_res, uint32_t qp_handle) rdma_res_tbl_dealloc(&dev_res->qp_tbl, qp->qpn); } +RdmaRmSRQ *rdma_rm_get_srq(RdmaDeviceResources *dev_res, uint32_t srq_handle) +{ + return rdma_res_tbl_get(&dev_res->srq_tbl, srq_handle); +} + +int rdma_rm_alloc_srq(RdmaDeviceResources *dev_res, uint32_t pd_handle, + uint32_t max_wr, uint32_t max_sge, uint32_t srq_limit, + uint32_t *srq_handle, void *opaque) +{ + RdmaRmSRQ *srq; + RdmaRmPD *pd; + int rc; + + pd = rdma_rm_get_pd(dev_res, pd_handle); + if (!pd) { + return -EINVAL; + } + + srq = rdma_res_tbl_alloc(&dev_res->srq_tbl, srq_handle); + if (!srq) { + return -ENOMEM; + } + + rc = rdma_backend_create_srq(&srq->backend_srq, &pd->backend_pd, + max_wr, max_sge, srq_limit); + if (rc) { + rc = -EIO; + goto out_dealloc_srq; + } + + srq->opaque = opaque; + + return 0; + +out_dealloc_srq: + rdma_res_tbl_dealloc(&dev_res->srq_tbl, *srq_handle); + + return rc; +} + +int rdma_rm_query_srq(RdmaDeviceResources *dev_res, uint32_t srq_handle, + struct ibv_srq_attr *srq_attr) +{ + RdmaRmSRQ *srq; + + srq = rdma_rm_get_srq(dev_res, srq_handle); + if (!srq) { + return -EINVAL; + } + + return rdma_backend_query_srq(&srq->backend_srq, srq_attr); +} + +int rdma_rm_modify_srq(RdmaDeviceResources *dev_res, uint32_t srq_handle, + struct ibv_srq_attr *srq_attr, int srq_attr_mask) +{ + RdmaRmSRQ *srq; + + srq = rdma_rm_get_srq(dev_res, srq_handle); + if (!srq) { + return -EINVAL; + } + + if ((srq_attr_mask & IBV_SRQ_LIMIT) && + (srq_attr->srq_limit == 0)) { + return -EINVAL; + } + + if ((srq_attr_mask & IBV_SRQ_MAX_WR) && + (srq_attr->max_wr == 0)) { + return -EINVAL; + } + + return rdma_backend_modify_srq(&srq->backend_srq, srq_attr, + srq_attr_mask); +} + +void rdma_rm_dealloc_srq(RdmaDeviceResources *dev_res, uint32_t srq_handle) +{ + RdmaRmSRQ *srq; + + srq = rdma_rm_get_srq(dev_res, srq_handle); + if (!srq) { + return; + } + + rdma_backend_destroy_srq(&srq->backend_srq, dev_res); + rdma_res_tbl_dealloc(&dev_res->srq_tbl, srq_handle); +} + void *rdma_rm_get_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t cqe_ctx_id) { void **cqe_ctx; @@ -673,6 +763,8 @@ int rdma_rm_init(RdmaDeviceResources *dev_res, struct ibv_device_attr *dev_attr) res_tbl_init("CQE_CTX", &dev_res->cqe_ctx_tbl, dev_attr->max_qp * dev_attr->max_qp_wr, sizeof(void *)); res_tbl_init("UC", &dev_res->uc_tbl, MAX_UCS, sizeof(RdmaRmUC)); + res_tbl_init("SRQ", &dev_res->srq_tbl, dev_attr->max_srq, + sizeof(RdmaRmSRQ)); init_ports(dev_res); @@ -691,6 +783,7 @@ void rdma_rm_fini(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev, fini_ports(dev_res, backend_dev, ifname); + res_tbl_free(&dev_res->srq_tbl); res_tbl_free(&dev_res->uc_tbl); res_tbl_free(&dev_res->cqe_ctx_tbl); res_tbl_free(&dev_res->qp_tbl); diff --git a/hw/rdma/rdma_rm.h b/hw/rdma/rdma_rm.h index 4f03f9b8c5..e88ab95e26 100644 --- a/hw/rdma/rdma_rm.h +++ b/hw/rdma/rdma_rm.h @@ -65,6 +65,16 @@ int rdma_rm_query_qp(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev, int attr_mask, struct ibv_qp_init_attr *init_attr); void rdma_rm_dealloc_qp(RdmaDeviceResources *dev_res, uint32_t qp_handle); +RdmaRmSRQ *rdma_rm_get_srq(RdmaDeviceResources *dev_res, uint32_t srq_handle); +int rdma_rm_alloc_srq(RdmaDeviceResources *dev_res, uint32_t pd_handle, + uint32_t max_wr, uint32_t max_sge, uint32_t srq_limit, + uint32_t *srq_handle, void *opaque); +int rdma_rm_query_srq(RdmaDeviceResources *dev_res, uint32_t srq_handle, + struct ibv_srq_attr *srq_attr); +int rdma_rm_modify_srq(RdmaDeviceResources *dev_res, uint32_t srq_handle, + struct ibv_srq_attr *srq_attr, int srq_attr_mask); +void rdma_rm_dealloc_srq(RdmaDeviceResources *dev_res, uint32_t srq_handle); + int rdma_rm_alloc_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t *cqe_ctx_id, void *ctx); void *rdma_rm_get_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t cqe_ctx_id); diff --git a/hw/rdma/rdma_rm_defs.h b/hw/rdma/rdma_rm_defs.h index e774af5280..7bdd9f291f 100644 --- a/hw/rdma/rdma_rm_defs.h +++ b/hw/rdma/rdma_rm_defs.h @@ -33,6 +33,7 @@ #define MAX_QP_RD_ATOM 16 #define MAX_QP_INIT_RD_ATOM 16 #define MAX_AH 64 +#define MAX_SRQ 512 #define MAX_RM_TBL_NAME 16 #define MAX_CONSEQ_EMPTY_POLL_CQ 4096 /* considered as error above this */ @@ -89,6 +90,12 @@ typedef struct RdmaRmQP { enum ibv_qp_state qp_state; } RdmaRmQP; +typedef struct RdmaRmSRQ { + RdmaBackendSRQ backend_srq; + uint32_t recv_cq_handle; + void *opaque; +} RdmaRmSRQ; + typedef struct RdmaRmGid { union ibv_gid gid; int backend_gid_index; @@ -129,6 +136,7 @@ struct RdmaDeviceResources { RdmaRmResTbl qp_tbl; RdmaRmResTbl cq_tbl; RdmaRmResTbl cqe_ctx_tbl; + RdmaRmResTbl srq_tbl; GHashTable *qp_hash; /* Keeps mapping between real and emulated */ QemuMutex lock; RdmaRmStats stats; From 8b42cfab82f404389746dc71852dc88eacb40e8e Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Wed, 3 Apr 2019 14:33:42 +0300 Subject: [PATCH 3/4] hw/rdma: Modify create/destroy QP to support SRQ Modify create/destroy QP to support shared receive queue and rearrange the destroy_qp() code to avoid touching the QP after calling rdma_rm_dealloc_qp(). Signed-off-by: Kamal Heib Message-Id: <20190403113343.26384-4-kamalheib1@gmail.com> Reviewed-by: Yuval Shaia Signed-off-by: Marcel Apfelbaum --- hw/rdma/rdma_backend.c | 9 ++++-- hw/rdma/rdma_backend.h | 6 ++-- hw/rdma/rdma_rm.c | 22 +++++++++++++-- hw/rdma/rdma_rm.h | 3 +- hw/rdma/rdma_rm_defs.h | 1 + hw/rdma/vmw/pvrdma_cmd.c | 59 ++++++++++++++++++++++++---------------- 6 files changed, 67 insertions(+), 33 deletions(-) diff --git a/hw/rdma/rdma_backend.c b/hw/rdma/rdma_backend.c index 04dfd63a57..cf34874e9d 100644 --- a/hw/rdma/rdma_backend.c +++ b/hw/rdma/rdma_backend.c @@ -794,9 +794,9 @@ void rdma_backend_destroy_cq(RdmaBackendCQ *cq) int rdma_backend_create_qp(RdmaBackendQP *qp, uint8_t qp_type, RdmaBackendPD *pd, RdmaBackendCQ *scq, - RdmaBackendCQ *rcq, uint32_t max_send_wr, - uint32_t max_recv_wr, uint32_t max_send_sge, - uint32_t max_recv_sge) + RdmaBackendCQ *rcq, RdmaBackendSRQ *srq, + uint32_t max_send_wr, uint32_t max_recv_wr, + uint32_t max_send_sge, uint32_t max_recv_sge) { struct ibv_qp_init_attr attr = {}; @@ -824,6 +824,9 @@ int rdma_backend_create_qp(RdmaBackendQP *qp, uint8_t qp_type, attr.cap.max_recv_wr = max_recv_wr; attr.cap.max_send_sge = max_send_sge; attr.cap.max_recv_sge = max_recv_sge; + if (srq) { + attr.srq = srq->ibsrq; + } qp->ibqp = ibv_create_qp(pd->ibpd, &attr); if (!qp->ibqp) { diff --git a/hw/rdma/rdma_backend.h b/hw/rdma/rdma_backend.h index cad7956d98..7c1a19a2b5 100644 --- a/hw/rdma/rdma_backend.h +++ b/hw/rdma/rdma_backend.h @@ -89,9 +89,9 @@ void rdma_backend_poll_cq(RdmaDeviceResources *rdma_dev_res, RdmaBackendCQ *cq); int rdma_backend_create_qp(RdmaBackendQP *qp, uint8_t qp_type, RdmaBackendPD *pd, RdmaBackendCQ *scq, - RdmaBackendCQ *rcq, uint32_t max_send_wr, - uint32_t max_recv_wr, uint32_t max_send_sge, - uint32_t max_recv_sge); + RdmaBackendCQ *rcq, RdmaBackendSRQ *srq, + uint32_t max_send_wr, uint32_t max_recv_wr, + uint32_t max_send_sge, uint32_t max_recv_sge); int rdma_backend_qp_state_init(RdmaBackendDev *backend_dev, RdmaBackendQP *qp, uint8_t qp_type, uint32_t qkey); int rdma_backend_qp_state_rtr(RdmaBackendDev *backend_dev, RdmaBackendQP *qp, diff --git a/hw/rdma/rdma_rm.c b/hw/rdma/rdma_rm.c index c4fb140dcd..1927f85472 100644 --- a/hw/rdma/rdma_rm.c +++ b/hw/rdma/rdma_rm.c @@ -386,12 +386,14 @@ int rdma_rm_alloc_qp(RdmaDeviceResources *dev_res, uint32_t pd_handle, uint8_t qp_type, uint32_t max_send_wr, uint32_t max_send_sge, uint32_t send_cq_handle, uint32_t max_recv_wr, uint32_t max_recv_sge, - uint32_t recv_cq_handle, void *opaque, uint32_t *qpn) + uint32_t recv_cq_handle, void *opaque, uint32_t *qpn, + uint8_t is_srq, uint32_t srq_handle) { int rc; RdmaRmQP *qp; RdmaRmCQ *scq, *rcq; RdmaRmPD *pd; + RdmaRmSRQ *srq = NULL; uint32_t rm_qpn; pd = rdma_rm_get_pd(dev_res, pd_handle); @@ -408,6 +410,16 @@ int rdma_rm_alloc_qp(RdmaDeviceResources *dev_res, uint32_t pd_handle, return -EINVAL; } + if (is_srq) { + srq = rdma_rm_get_srq(dev_res, srq_handle); + if (!srq) { + rdma_error_report("Invalid srqn %d", srq_handle); + return -EINVAL; + } + + srq->recv_cq_handle = recv_cq_handle; + } + if (qp_type == IBV_QPT_GSI) { scq->notify = CNT_SET; rcq->notify = CNT_SET; @@ -424,10 +436,14 @@ int rdma_rm_alloc_qp(RdmaDeviceResources *dev_res, uint32_t pd_handle, qp->send_cq_handle = send_cq_handle; qp->recv_cq_handle = recv_cq_handle; qp->opaque = opaque; + qp->is_srq = is_srq; rc = rdma_backend_create_qp(&qp->backend_qp, qp_type, &pd->backend_pd, - &scq->backend_cq, &rcq->backend_cq, max_send_wr, - max_recv_wr, max_send_sge, max_recv_sge); + &scq->backend_cq, &rcq->backend_cq, + is_srq ? &srq->backend_srq : NULL, + max_send_wr, max_recv_wr, max_send_sge, + max_recv_sge); + if (rc) { rc = -EIO; goto out_dealloc_qp; diff --git a/hw/rdma/rdma_rm.h b/hw/rdma/rdma_rm.h index e88ab95e26..e8639909cd 100644 --- a/hw/rdma/rdma_rm.h +++ b/hw/rdma/rdma_rm.h @@ -53,7 +53,8 @@ int rdma_rm_alloc_qp(RdmaDeviceResources *dev_res, uint32_t pd_handle, uint8_t qp_type, uint32_t max_send_wr, uint32_t max_send_sge, uint32_t send_cq_handle, uint32_t max_recv_wr, uint32_t max_recv_sge, - uint32_t recv_cq_handle, void *opaque, uint32_t *qpn); + uint32_t recv_cq_handle, void *opaque, uint32_t *qpn, + uint8_t is_srq, uint32_t srq_handle); RdmaRmQP *rdma_rm_get_qp(RdmaDeviceResources *dev_res, uint32_t qpn); int rdma_rm_modify_qp(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev, uint32_t qp_handle, uint32_t attr_mask, uint8_t sgid_idx, diff --git a/hw/rdma/rdma_rm_defs.h b/hw/rdma/rdma_rm_defs.h index 7bdd9f291f..534f2f74d3 100644 --- a/hw/rdma/rdma_rm_defs.h +++ b/hw/rdma/rdma_rm_defs.h @@ -88,6 +88,7 @@ typedef struct RdmaRmQP { uint32_t send_cq_handle; uint32_t recv_cq_handle; enum ibv_qp_state qp_state; + uint8_t is_srq; } RdmaRmQP; typedef struct RdmaRmSRQ { diff --git a/hw/rdma/vmw/pvrdma_cmd.c b/hw/rdma/vmw/pvrdma_cmd.c index 4afcd2037d..b931bb6dc9 100644 --- a/hw/rdma/vmw/pvrdma_cmd.c +++ b/hw/rdma/vmw/pvrdma_cmd.c @@ -357,7 +357,7 @@ static int destroy_cq(PVRDMADev *dev, union pvrdma_cmd_req *req, static int create_qp_rings(PCIDevice *pci_dev, uint64_t pdir_dma, PvrdmaRing **rings, uint32_t scqe, uint32_t smax_sge, uint32_t spages, uint32_t rcqe, uint32_t rmax_sge, - uint32_t rpages) + uint32_t rpages, uint8_t is_srq) { uint64_t *dir = NULL, *tbl = NULL; PvrdmaRing *sr, *rr; @@ -365,9 +365,14 @@ static int create_qp_rings(PCIDevice *pci_dev, uint64_t pdir_dma, char ring_name[MAX_RING_NAME_SZ]; uint32_t wqe_sz; - if (!spages || spages > PVRDMA_MAX_FAST_REG_PAGES - || !rpages || rpages > PVRDMA_MAX_FAST_REG_PAGES) { - rdma_error_report("Got invalid page count for QP ring: %d, %d", spages, + if (!spages || spages > PVRDMA_MAX_FAST_REG_PAGES) { + rdma_error_report("Got invalid send page count for QP ring: %d", + spages); + return rc; + } + + if (!is_srq && (!rpages || rpages > PVRDMA_MAX_FAST_REG_PAGES)) { + rdma_error_report("Got invalid recv page count for QP ring: %d", rpages); return rc; } @@ -384,8 +389,12 @@ static int create_qp_rings(PCIDevice *pci_dev, uint64_t pdir_dma, goto out; } - sr = g_malloc(2 * sizeof(*rr)); - rr = &sr[1]; + if (!is_srq) { + sr = g_malloc(2 * sizeof(*rr)); + rr = &sr[1]; + } else { + sr = g_malloc(sizeof(*sr)); + } *rings = sr; @@ -407,15 +416,18 @@ static int create_qp_rings(PCIDevice *pci_dev, uint64_t pdir_dma, goto out_unmap_ring_state; } - /* Create recv ring */ - rr->ring_state = &sr->ring_state[1]; - wqe_sz = pow2ceil(sizeof(struct pvrdma_rq_wqe_hdr) + - sizeof(struct pvrdma_sge) * rmax_sge - 1); - sprintf(ring_name, "qp_rring_%" PRIx64, pdir_dma); - rc = pvrdma_ring_init(rr, ring_name, pci_dev, rr->ring_state, - rcqe, wqe_sz, (dma_addr_t *)&tbl[1 + spages], rpages); - if (rc) { - goto out_free_sr; + if (!is_srq) { + /* Create recv ring */ + rr->ring_state = &sr->ring_state[1]; + wqe_sz = pow2ceil(sizeof(struct pvrdma_rq_wqe_hdr) + + sizeof(struct pvrdma_sge) * rmax_sge - 1); + sprintf(ring_name, "qp_rring_%" PRIx64, pdir_dma); + rc = pvrdma_ring_init(rr, ring_name, pci_dev, rr->ring_state, + rcqe, wqe_sz, (dma_addr_t *)&tbl[1 + spages], + rpages); + if (rc) { + goto out_free_sr; + } } goto out; @@ -436,10 +448,12 @@ out: return rc; } -static void destroy_qp_rings(PvrdmaRing *ring) +static void destroy_qp_rings(PvrdmaRing *ring, uint8_t is_srq) { pvrdma_ring_free(&ring[0]); - pvrdma_ring_free(&ring[1]); + if (!is_srq) { + pvrdma_ring_free(&ring[1]); + } rdma_pci_dma_unmap(ring->dev, ring->ring_state, TARGET_PAGE_SIZE); g_free(ring); @@ -458,7 +472,7 @@ static int create_qp(PVRDMADev *dev, union pvrdma_cmd_req *req, rc = create_qp_rings(PCI_DEVICE(dev), cmd->pdir_dma, &rings, cmd->max_send_wr, cmd->max_send_sge, cmd->send_chunks, cmd->max_recv_wr, cmd->max_recv_sge, - cmd->total_chunks - cmd->send_chunks - 1); + cmd->total_chunks - cmd->send_chunks - 1, cmd->is_srq); if (rc) { return rc; } @@ -467,9 +481,9 @@ static int create_qp(PVRDMADev *dev, union pvrdma_cmd_req *req, cmd->max_send_wr, cmd->max_send_sge, cmd->send_cq_handle, cmd->max_recv_wr, cmd->max_recv_sge, cmd->recv_cq_handle, rings, - &resp->qpn); + &resp->qpn, cmd->is_srq, cmd->srq_handle); if (rc) { - destroy_qp_rings(rings); + destroy_qp_rings(rings, cmd->is_srq); return rc; } @@ -531,10 +545,9 @@ static int destroy_qp(PVRDMADev *dev, union pvrdma_cmd_req *req, return -EINVAL; } - rdma_rm_dealloc_qp(&dev->rdma_dev_res, cmd->qp_handle); - ring = (PvrdmaRing *)qp->opaque; - destroy_qp_rings(ring); + destroy_qp_rings(ring, qp->is_srq); + rdma_rm_dealloc_qp(&dev->rdma_dev_res, cmd->qp_handle); return 0; } From 355b7cf356b7f506ca1b716647cc0cf19d312fd2 Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Wed, 3 Apr 2019 14:33:43 +0300 Subject: [PATCH 4/4] hw/pvrdma: Add support for SRQ Implement the pvrdma device commands for supporting SRQ Signed-off-by: Kamal Heib Message-Id: <20190403113343.26384-5-kamalheib1@gmail.com> Reviewed-by: Yuval Shaia Signed-off-by: Marcel Apfelbaum --- hw/rdma/vmw/pvrdma_cmd.c | 147 ++++++++++++++++++++++++++++++++++++ hw/rdma/vmw/pvrdma_main.c | 16 ++++ hw/rdma/vmw/pvrdma_qp_ops.c | 46 ++++++++++- hw/rdma/vmw/pvrdma_qp_ops.h | 1 + 4 files changed, 209 insertions(+), 1 deletion(-) diff --git a/hw/rdma/vmw/pvrdma_cmd.c b/hw/rdma/vmw/pvrdma_cmd.c index b931bb6dc9..8d70c0d23d 100644 --- a/hw/rdma/vmw/pvrdma_cmd.c +++ b/hw/rdma/vmw/pvrdma_cmd.c @@ -609,6 +609,149 @@ static int destroy_uc(PVRDMADev *dev, union pvrdma_cmd_req *req, return 0; } +static int create_srq_ring(PCIDevice *pci_dev, PvrdmaRing **ring, + uint64_t pdir_dma, uint32_t max_wr, + uint32_t max_sge, uint32_t nchunks) +{ + uint64_t *dir = NULL, *tbl = NULL; + PvrdmaRing *r; + int rc = -EINVAL; + char ring_name[MAX_RING_NAME_SZ]; + uint32_t wqe_sz; + + if (!nchunks || nchunks > PVRDMA_MAX_FAST_REG_PAGES) { + rdma_error_report("Got invalid page count for SRQ ring: %d", + nchunks); + return rc; + } + + dir = rdma_pci_dma_map(pci_dev, pdir_dma, TARGET_PAGE_SIZE); + if (!dir) { + rdma_error_report("Failed to map to SRQ page directory"); + goto out; + } + + tbl = rdma_pci_dma_map(pci_dev, dir[0], TARGET_PAGE_SIZE); + if (!tbl) { + rdma_error_report("Failed to map to SRQ page table"); + goto out; + } + + r = g_malloc(sizeof(*r)); + *ring = r; + + r->ring_state = (struct pvrdma_ring *) + rdma_pci_dma_map(pci_dev, tbl[0], TARGET_PAGE_SIZE); + if (!r->ring_state) { + rdma_error_report("Failed to map tp SRQ ring state"); + goto out_free_ring_mem; + } + + wqe_sz = pow2ceil(sizeof(struct pvrdma_rq_wqe_hdr) + + sizeof(struct pvrdma_sge) * max_sge - 1); + sprintf(ring_name, "srq_ring_%" PRIx64, pdir_dma); + rc = pvrdma_ring_init(r, ring_name, pci_dev, &r->ring_state[1], max_wr, + wqe_sz, (dma_addr_t *)&tbl[1], nchunks - 1); + if (rc) { + goto out_unmap_ring_state; + } + + goto out; + +out_unmap_ring_state: + rdma_pci_dma_unmap(pci_dev, r->ring_state, TARGET_PAGE_SIZE); + +out_free_ring_mem: + g_free(r); + +out: + rdma_pci_dma_unmap(pci_dev, tbl, TARGET_PAGE_SIZE); + rdma_pci_dma_unmap(pci_dev, dir, TARGET_PAGE_SIZE); + + return rc; +} + +static void destroy_srq_ring(PvrdmaRing *ring) +{ + pvrdma_ring_free(ring); + rdma_pci_dma_unmap(ring->dev, ring->ring_state, TARGET_PAGE_SIZE); + g_free(ring); +} + +static int create_srq(PVRDMADev *dev, union pvrdma_cmd_req *req, + union pvrdma_cmd_resp *rsp) +{ + struct pvrdma_cmd_create_srq *cmd = &req->create_srq; + struct pvrdma_cmd_create_srq_resp *resp = &rsp->create_srq_resp; + PvrdmaRing *ring = NULL; + int rc; + + memset(resp, 0, sizeof(*resp)); + + rc = create_srq_ring(PCI_DEVICE(dev), &ring, cmd->pdir_dma, + cmd->attrs.max_wr, cmd->attrs.max_sge, + cmd->nchunks); + if (rc) { + return rc; + } + + rc = rdma_rm_alloc_srq(&dev->rdma_dev_res, cmd->pd_handle, + cmd->attrs.max_wr, cmd->attrs.max_sge, + cmd->attrs.srq_limit, &resp->srqn, ring); + if (rc) { + destroy_srq_ring(ring); + return rc; + } + + return 0; +} + +static int query_srq(PVRDMADev *dev, union pvrdma_cmd_req *req, + union pvrdma_cmd_resp *rsp) +{ + struct pvrdma_cmd_query_srq *cmd = &req->query_srq; + struct pvrdma_cmd_query_srq_resp *resp = &rsp->query_srq_resp; + + memset(resp, 0, sizeof(*resp)); + + return rdma_rm_query_srq(&dev->rdma_dev_res, cmd->srq_handle, + (struct ibv_srq_attr *)&resp->attrs); +} + +static int modify_srq(PVRDMADev *dev, union pvrdma_cmd_req *req, + union pvrdma_cmd_resp *rsp) +{ + struct pvrdma_cmd_modify_srq *cmd = &req->modify_srq; + + /* Only support SRQ limit */ + if (!(cmd->attr_mask & IBV_SRQ_LIMIT) || + (cmd->attr_mask & IBV_SRQ_MAX_WR)) + return -EINVAL; + + return rdma_rm_modify_srq(&dev->rdma_dev_res, cmd->srq_handle, + (struct ibv_srq_attr *)&cmd->attrs, + cmd->attr_mask); +} + +static int destroy_srq(PVRDMADev *dev, union pvrdma_cmd_req *req, + union pvrdma_cmd_resp *rsp) +{ + struct pvrdma_cmd_destroy_srq *cmd = &req->destroy_srq; + RdmaRmSRQ *srq; + PvrdmaRing *ring; + + srq = rdma_rm_get_srq(&dev->rdma_dev_res, cmd->srq_handle); + if (!srq) { + return -EINVAL; + } + + ring = (PvrdmaRing *)srq->opaque; + destroy_srq_ring(ring); + rdma_rm_dealloc_srq(&dev->rdma_dev_res, cmd->srq_handle); + + return 0; +} + struct cmd_handler { uint32_t cmd; uint32_t ack; @@ -634,6 +777,10 @@ static struct cmd_handler cmd_handlers[] = { {PVRDMA_CMD_DESTROY_UC, PVRDMA_CMD_DESTROY_UC_RESP_NOOP, destroy_uc}, {PVRDMA_CMD_CREATE_BIND, PVRDMA_CMD_CREATE_BIND_RESP_NOOP, create_bind}, {PVRDMA_CMD_DESTROY_BIND, PVRDMA_CMD_DESTROY_BIND_RESP_NOOP, destroy_bind}, + {PVRDMA_CMD_CREATE_SRQ, PVRDMA_CMD_CREATE_SRQ_RESP, create_srq}, + {PVRDMA_CMD_QUERY_SRQ, PVRDMA_CMD_QUERY_SRQ_RESP, query_srq}, + {PVRDMA_CMD_MODIFY_SRQ, PVRDMA_CMD_MODIFY_SRQ_RESP, modify_srq}, + {PVRDMA_CMD_DESTROY_SRQ, PVRDMA_CMD_DESTROY_SRQ_RESP, destroy_srq}, }; int pvrdma_exec_cmd(PVRDMADev *dev) diff --git a/hw/rdma/vmw/pvrdma_main.c b/hw/rdma/vmw/pvrdma_main.c index 0b46561bad..769f7990f8 100644 --- a/hw/rdma/vmw/pvrdma_main.c +++ b/hw/rdma/vmw/pvrdma_main.c @@ -53,6 +53,7 @@ static Property pvrdma_dev_properties[] = { DEFINE_PROP_INT32("dev-caps-max-qp-init-rd-atom", PVRDMADev, dev_attr.max_qp_init_rd_atom, MAX_QP_INIT_RD_ATOM), DEFINE_PROP_INT32("dev-caps-max-ah", PVRDMADev, dev_attr.max_ah, MAX_AH), + DEFINE_PROP_INT32("dev-caps-max-srq", PVRDMADev, dev_attr.max_srq, MAX_SRQ), DEFINE_PROP_CHR("mad-chardev", PVRDMADev, mad_chr), DEFINE_PROP_END_OF_LIST(), }; @@ -261,6 +262,9 @@ static void init_dsr_dev_caps(PVRDMADev *dev) dsr->caps.max_mr = dev->dev_attr.max_mr; dsr->caps.max_pd = dev->dev_attr.max_pd; dsr->caps.max_ah = dev->dev_attr.max_ah; + dsr->caps.max_srq = dev->dev_attr.max_srq; + dsr->caps.max_srq_wr = dev->dev_attr.max_srq_wr; + dsr->caps.max_srq_sge = dev->dev_attr.max_srq_sge; dsr->caps.gid_tbl_len = MAX_GIDS; dsr->caps.sys_image_guid = 0; dsr->caps.node_guid = dev->node_guid; @@ -485,6 +489,13 @@ static void pvrdma_uar_write(void *opaque, hwaddr addr, uint64_t val, pvrdma_cq_poll(&dev->rdma_dev_res, val & PVRDMA_UAR_HANDLE_MASK); } break; + case PVRDMA_UAR_SRQ_OFFSET: + if (val & PVRDMA_UAR_SRQ_RECV) { + trace_pvrdma_uar_write(addr, val, "QP", "SRQ", + val & PVRDMA_UAR_HANDLE_MASK, 0); + pvrdma_srq_recv(dev, val & PVRDMA_UAR_HANDLE_MASK); + } + break; default: rdma_error_report("Unsupported command, addr=0x%"PRIx64", val=0x%"PRIx64, addr, val); @@ -554,6 +565,11 @@ static void init_dev_caps(PVRDMADev *dev) dev->dev_attr.max_cqe = pg_tbl_bytes / sizeof(struct pvrdma_cqe) - TARGET_PAGE_SIZE; /* First page is ring state */ + + dev->dev_attr.max_srq_wr = pg_tbl_bytes / + ((sizeof(struct pvrdma_rq_wqe_hdr) + + sizeof(struct pvrdma_sge)) * + dev->dev_attr.max_sge) - TARGET_PAGE_SIZE; } static int pvrdma_check_ram_shared(Object *obj, void *opaque) diff --git a/hw/rdma/vmw/pvrdma_qp_ops.c b/hw/rdma/vmw/pvrdma_qp_ops.c index 5b9786efbe..bd6db858de 100644 --- a/hw/rdma/vmw/pvrdma_qp_ops.c +++ b/hw/rdma/vmw/pvrdma_qp_ops.c @@ -70,7 +70,7 @@ static int pvrdma_post_cqe(PVRDMADev *dev, uint32_t cq_handle, memset(cqe1, 0, sizeof(*cqe1)); cqe1->wr_id = cqe->wr_id; - cqe1->qp = cqe->qp; + cqe1->qp = cqe->qp ? cqe->qp : wc->qp_num; cqe1->opcode = cqe->opcode; cqe1->status = wc->status; cqe1->byte_len = wc->byte_len; @@ -241,6 +241,50 @@ void pvrdma_qp_recv(PVRDMADev *dev, uint32_t qp_handle) } } +void pvrdma_srq_recv(PVRDMADev *dev, uint32_t srq_handle) +{ + RdmaRmSRQ *srq; + PvrdmaRqWqe *wqe; + PvrdmaRing *ring; + + srq = rdma_rm_get_srq(&dev->rdma_dev_res, srq_handle); + if (unlikely(!srq)) { + return; + } + + ring = (PvrdmaRing *)srq->opaque; + + wqe = (struct PvrdmaRqWqe *)pvrdma_ring_next_elem_read(ring); + while (wqe) { + CompHandlerCtx *comp_ctx; + + /* Prepare CQE */ + comp_ctx = g_malloc(sizeof(CompHandlerCtx)); + comp_ctx->dev = dev; + comp_ctx->cq_handle = srq->recv_cq_handle; + comp_ctx->cqe.wr_id = wqe->hdr.wr_id; + comp_ctx->cqe.qp = 0; + comp_ctx->cqe.opcode = IBV_WC_RECV; + + if (wqe->hdr.num_sge > dev->dev_attr.max_sge) { + rdma_error_report("Invalid num_sge=%d (max %d)", wqe->hdr.num_sge, + dev->dev_attr.max_sge); + complete_with_error(VENDOR_ERR_INV_NUM_SGE, comp_ctx); + continue; + } + + rdma_backend_post_srq_recv(&dev->backend_dev, &srq->backend_srq, + (struct ibv_sge *)&wqe->sge[0], + wqe->hdr.num_sge, + comp_ctx); + + pvrdma_ring_read_inc(ring); + + wqe = pvrdma_ring_next_elem_read(ring); + } + +} + void pvrdma_cq_poll(RdmaDeviceResources *dev_res, uint32_t cq_handle) { RdmaRmCQ *cq; diff --git a/hw/rdma/vmw/pvrdma_qp_ops.h b/hw/rdma/vmw/pvrdma_qp_ops.h index 31cb48ba29..82e720a76f 100644 --- a/hw/rdma/vmw/pvrdma_qp_ops.h +++ b/hw/rdma/vmw/pvrdma_qp_ops.h @@ -22,6 +22,7 @@ int pvrdma_qp_ops_init(void); void pvrdma_qp_ops_fini(void); void pvrdma_qp_send(PVRDMADev *dev, uint32_t qp_handle); void pvrdma_qp_recv(PVRDMADev *dev, uint32_t qp_handle); +void pvrdma_srq_recv(PVRDMADev *dev, uint32_t srq_handle); void pvrdma_cq_poll(RdmaDeviceResources *dev_res, uint32_t cq_handle); #endif