RDMA queue

* pvrdma: Add support for SRQ
 -----BEGIN PGP SIGNATURE-----
 
 iQEcBAABAgAGBQJczZUsAAoJEDbUwPDPL+Rt/ooH/jZ6Gn325xTl8HYyJ/jJauM1
 OTQpY0NuMR30lFc16pGezOuvkahrh5Nwf0oKBCDbiJGVz+6qfCsmUCeem6zKQuCs
 e6VEGZPDpK3FG/2gZ4zguy0C2YPsjsDNXXRS0e6Xu5dnLrsaZ+dzoRTtjKf783qr
 xVQ6wxDvMICVxtcCglWSd4U5azttpjxccj4nqCg9+q/R7YMTw+Gsp7e2Cti8Uc2I
 LaOgm4+waK5Y8cLNRazPfy1Oonx5+YWPJ5KuNG+AUhm2F6pdsqxh8YT2BHdhp3XC
 G6Spa9Lw0pORxOXWvZVqGTpATa0zpW2v4NuTeM/EoT5DZ89uY5cCfOz+mbLPtQY=
 =UW7h
 -----END PGP SIGNATURE-----

Merge remote-tracking branch 'remotes/marcel/tags/rdma-pull-request' into staging

RDMA queue

* pvrdma: Add support for SRQ

# gpg: Signature made Sat 04 May 2019 14:35:40 BST
# gpg:                using RSA key 36D4C0F0CF2FE46D
# gpg: Good signature from "Marcel Apfelbaum <marcel.apfelbaum@zoho.com>" [marginal]
# gpg:                 aka "Marcel Apfelbaum <marcel@redhat.com>" [marginal]
# gpg:                 aka "Marcel Apfelbaum <marcel.apfelbaum@gmail.com>" [marginal]
# gpg: WARNING: This key is not certified with sufficiently trusted signatures!
# gpg:          It is not certain that the signature belongs to the owner.
# Primary key fingerprint: B1C6 3A57 F92E 08F2 640F  31F5 36D4 C0F0 CF2F E46D

* remotes/marcel/tags/rdma-pull-request:
  hw/pvrdma: Add support for SRQ
  hw/rdma: Modify create/destroy QP to support SRQ
  hw/rdma: Add support for managing SRQ resource
  hw/rdma: Add SRQ support to backend layer

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Peter Maydell 2019-05-07 12:36:50 +01:00
commit 1efede74f6
10 changed files with 521 additions and 36 deletions

View File

@ -40,6 +40,7 @@ typedef struct BackendCtx {
void *up_ctx;
struct ibv_sge sge; /* Used to save MAD recv buffer */
RdmaBackendQP *backend_qp; /* To maintain recv buffers */
RdmaBackendSRQ *backend_srq;
} BackendCtx;
struct backend_umad {
@ -99,6 +100,7 @@ static int rdma_poll_cq(RdmaDeviceResources *rdma_dev_res, struct ibv_cq *ibcq)
int i, ne, total_ne = 0;
BackendCtx *bctx;
struct ibv_wc wc[2];
RdmaProtectedGSList *cqe_ctx_list;
qemu_mutex_lock(&rdma_dev_res->lock);
do {
@ -116,8 +118,13 @@ static int rdma_poll_cq(RdmaDeviceResources *rdma_dev_res, struct ibv_cq *ibcq)
comp_handler(bctx->up_ctx, &wc[i]);
rdma_protected_gslist_remove_int32(&bctx->backend_qp->cqe_ctx_list,
wc[i].wr_id);
if (bctx->backend_qp) {
cqe_ctx_list = &bctx->backend_qp->cqe_ctx_list;
} else {
cqe_ctx_list = &bctx->backend_srq->cqe_ctx_list;
}
rdma_protected_gslist_remove_int32(cqe_ctx_list, wc[i].wr_id);
rdma_rm_dealloc_cqe_ctx(rdma_dev_res, wc[i].wr_id);
g_free(bctx);
}
@ -662,6 +669,60 @@ err_free_bctx:
g_free(bctx);
}
void rdma_backend_post_srq_recv(RdmaBackendDev *backend_dev,
RdmaBackendSRQ *srq, struct ibv_sge *sge,
uint32_t num_sge, void *ctx)
{
BackendCtx *bctx;
struct ibv_sge new_sge[MAX_SGE];
uint32_t bctx_id;
int rc;
struct ibv_recv_wr wr = {}, *bad_wr;
bctx = g_malloc0(sizeof(*bctx));
bctx->up_ctx = ctx;
bctx->backend_srq = srq;
rc = rdma_rm_alloc_cqe_ctx(backend_dev->rdma_dev_res, &bctx_id, bctx);
if (unlikely(rc)) {
complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_NOMEM, ctx);
goto err_free_bctx;
}
rdma_protected_gslist_append_int32(&srq->cqe_ctx_list, bctx_id);
rc = build_host_sge_array(backend_dev->rdma_dev_res, new_sge, sge, num_sge,
&backend_dev->rdma_dev_res->stats.rx_bufs_len);
if (rc) {
complete_work(IBV_WC_GENERAL_ERR, rc, ctx);
goto err_dealloc_cqe_ctx;
}
wr.num_sge = num_sge;
wr.sg_list = new_sge;
wr.wr_id = bctx_id;
rc = ibv_post_srq_recv(srq->ibsrq, &wr, &bad_wr);
if (rc) {
rdma_error_report("ibv_post_srq_recv fail, srqn=0x%x, rc=%d, errno=%d",
srq->ibsrq->handle, rc, errno);
complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_FAIL_BACKEND, ctx);
goto err_dealloc_cqe_ctx;
}
atomic_inc(&backend_dev->rdma_dev_res->stats.missing_cqe);
backend_dev->rdma_dev_res->stats.rx_bufs++;
backend_dev->rdma_dev_res->stats.rx_srq++;
return;
err_dealloc_cqe_ctx:
backend_dev->rdma_dev_res->stats.rx_bufs_err++;
rdma_rm_dealloc_cqe_ctx(backend_dev->rdma_dev_res, bctx_id);
err_free_bctx:
g_free(bctx);
}
int rdma_backend_create_pd(RdmaBackendDev *backend_dev, RdmaBackendPD *pd)
{
pd->ibpd = ibv_alloc_pd(backend_dev->context);
@ -733,9 +794,9 @@ void rdma_backend_destroy_cq(RdmaBackendCQ *cq)
int rdma_backend_create_qp(RdmaBackendQP *qp, uint8_t qp_type,
RdmaBackendPD *pd, RdmaBackendCQ *scq,
RdmaBackendCQ *rcq, uint32_t max_send_wr,
uint32_t max_recv_wr, uint32_t max_send_sge,
uint32_t max_recv_sge)
RdmaBackendCQ *rcq, RdmaBackendSRQ *srq,
uint32_t max_send_wr, uint32_t max_recv_wr,
uint32_t max_send_sge, uint32_t max_recv_sge)
{
struct ibv_qp_init_attr attr = {};
@ -763,6 +824,9 @@ int rdma_backend_create_qp(RdmaBackendQP *qp, uint8_t qp_type,
attr.cap.max_recv_wr = max_recv_wr;
attr.cap.max_send_sge = max_send_sge;
attr.cap.max_recv_sge = max_recv_sge;
if (srq) {
attr.srq = srq->ibsrq;
}
qp->ibqp = ibv_create_qp(pd->ibpd, &attr);
if (!qp->ibqp) {
@ -938,6 +1002,55 @@ void rdma_backend_destroy_qp(RdmaBackendQP *qp, RdmaDeviceResources *dev_res)
rdma_protected_gslist_destroy(&qp->cqe_ctx_list);
}
int rdma_backend_create_srq(RdmaBackendSRQ *srq, RdmaBackendPD *pd,
uint32_t max_wr, uint32_t max_sge,
uint32_t srq_limit)
{
struct ibv_srq_init_attr srq_init_attr = {};
srq_init_attr.attr.max_wr = max_wr;
srq_init_attr.attr.max_sge = max_sge;
srq_init_attr.attr.srq_limit = srq_limit;
srq->ibsrq = ibv_create_srq(pd->ibpd, &srq_init_attr);
if (!srq->ibsrq) {
rdma_error_report("ibv_create_srq failed, errno=%d", errno);
return -EIO;
}
rdma_protected_gslist_init(&srq->cqe_ctx_list);
return 0;
}
int rdma_backend_query_srq(RdmaBackendSRQ *srq, struct ibv_srq_attr *srq_attr)
{
if (!srq->ibsrq) {
return -EINVAL;
}
return ibv_query_srq(srq->ibsrq, srq_attr);
}
int rdma_backend_modify_srq(RdmaBackendSRQ *srq, struct ibv_srq_attr *srq_attr,
int srq_attr_mask)
{
if (!srq->ibsrq) {
return -EINVAL;
}
return ibv_modify_srq(srq->ibsrq, srq_attr, srq_attr_mask);
}
void rdma_backend_destroy_srq(RdmaBackendSRQ *srq, RdmaDeviceResources *dev_res)
{
if (srq->ibsrq) {
ibv_destroy_srq(srq->ibsrq);
}
g_slist_foreach(srq->cqe_ctx_list.list, free_cqe_ctx, dev_res);
rdma_protected_gslist_destroy(&srq->cqe_ctx_list);
}
#define CHK_ATTR(req, dev, member, fmt) ({ \
trace_rdma_check_dev_attr(#member, dev.member, req->member); \
if (req->member > dev.member) { \
@ -960,6 +1073,7 @@ static int init_device_caps(RdmaBackendDev *backend_dev,
}
dev_attr->max_sge = MAX_SGE;
dev_attr->max_srq_sge = MAX_SGE;
CHK_ATTR(dev_attr, bk_dev_attr, max_mr_size, "%" PRId64);
CHK_ATTR(dev_attr, bk_dev_attr, max_qp, "%d");
@ -970,6 +1084,7 @@ static int init_device_caps(RdmaBackendDev *backend_dev,
CHK_ATTR(dev_attr, bk_dev_attr, max_qp_rd_atom, "%d");
CHK_ATTR(dev_attr, bk_dev_attr, max_qp_init_rd_atom, "%d");
CHK_ATTR(dev_attr, bk_dev_attr, max_ah, "%d");
CHK_ATTR(dev_attr, bk_dev_attr, max_srq, "%d");
return 0;
}

View File

@ -89,9 +89,9 @@ void rdma_backend_poll_cq(RdmaDeviceResources *rdma_dev_res, RdmaBackendCQ *cq);
int rdma_backend_create_qp(RdmaBackendQP *qp, uint8_t qp_type,
RdmaBackendPD *pd, RdmaBackendCQ *scq,
RdmaBackendCQ *rcq, uint32_t max_send_wr,
uint32_t max_recv_wr, uint32_t max_send_sge,
uint32_t max_recv_sge);
RdmaBackendCQ *rcq, RdmaBackendSRQ *srq,
uint32_t max_send_wr, uint32_t max_recv_wr,
uint32_t max_send_sge, uint32_t max_recv_sge);
int rdma_backend_qp_state_init(RdmaBackendDev *backend_dev, RdmaBackendQP *qp,
uint8_t qp_type, uint32_t qkey);
int rdma_backend_qp_state_rtr(RdmaBackendDev *backend_dev, RdmaBackendQP *qp,
@ -114,4 +114,16 @@ void rdma_backend_post_recv(RdmaBackendDev *backend_dev,
RdmaBackendQP *qp, uint8_t qp_type,
struct ibv_sge *sge, uint32_t num_sge, void *ctx);
int rdma_backend_create_srq(RdmaBackendSRQ *srq, RdmaBackendPD *pd,
uint32_t max_wr, uint32_t max_sge,
uint32_t srq_limit);
int rdma_backend_query_srq(RdmaBackendSRQ *srq, struct ibv_srq_attr *srq_attr);
int rdma_backend_modify_srq(RdmaBackendSRQ *srq, struct ibv_srq_attr *srq_attr,
int srq_attr_mask);
void rdma_backend_destroy_srq(RdmaBackendSRQ *srq,
RdmaDeviceResources *dev_res);
void rdma_backend_post_srq_recv(RdmaBackendDev *backend_dev,
RdmaBackendSRQ *srq, struct ibv_sge *sge,
uint32_t num_sge, void *ctx);
#endif

View File

@ -68,4 +68,9 @@ typedef struct RdmaBackendQP {
RdmaProtectedGSList cqe_ctx_list;
} RdmaBackendQP;
typedef struct RdmaBackendSRQ {
struct ibv_srq *ibsrq;
RdmaProtectedGSList cqe_ctx_list;
} RdmaBackendSRQ;
#endif

View File

@ -37,6 +37,8 @@ void rdma_dump_device_counters(Monitor *mon, RdmaDeviceResources *dev_res)
dev_res->stats.tx_err);
monitor_printf(mon, "\trx_bufs : %" PRId64 "\n",
dev_res->stats.rx_bufs);
monitor_printf(mon, "\trx_srq : %" PRId64 "\n",
dev_res->stats.rx_srq);
monitor_printf(mon, "\trx_bufs_len : %" PRId64 "\n",
dev_res->stats.rx_bufs_len);
monitor_printf(mon, "\trx_bufs_err : %" PRId64 "\n",
@ -384,12 +386,14 @@ int rdma_rm_alloc_qp(RdmaDeviceResources *dev_res, uint32_t pd_handle,
uint8_t qp_type, uint32_t max_send_wr,
uint32_t max_send_sge, uint32_t send_cq_handle,
uint32_t max_recv_wr, uint32_t max_recv_sge,
uint32_t recv_cq_handle, void *opaque, uint32_t *qpn)
uint32_t recv_cq_handle, void *opaque, uint32_t *qpn,
uint8_t is_srq, uint32_t srq_handle)
{
int rc;
RdmaRmQP *qp;
RdmaRmCQ *scq, *rcq;
RdmaRmPD *pd;
RdmaRmSRQ *srq = NULL;
uint32_t rm_qpn;
pd = rdma_rm_get_pd(dev_res, pd_handle);
@ -406,6 +410,16 @@ int rdma_rm_alloc_qp(RdmaDeviceResources *dev_res, uint32_t pd_handle,
return -EINVAL;
}
if (is_srq) {
srq = rdma_rm_get_srq(dev_res, srq_handle);
if (!srq) {
rdma_error_report("Invalid srqn %d", srq_handle);
return -EINVAL;
}
srq->recv_cq_handle = recv_cq_handle;
}
if (qp_type == IBV_QPT_GSI) {
scq->notify = CNT_SET;
rcq->notify = CNT_SET;
@ -422,10 +436,14 @@ int rdma_rm_alloc_qp(RdmaDeviceResources *dev_res, uint32_t pd_handle,
qp->send_cq_handle = send_cq_handle;
qp->recv_cq_handle = recv_cq_handle;
qp->opaque = opaque;
qp->is_srq = is_srq;
rc = rdma_backend_create_qp(&qp->backend_qp, qp_type, &pd->backend_pd,
&scq->backend_cq, &rcq->backend_cq, max_send_wr,
max_recv_wr, max_send_sge, max_recv_sge);
&scq->backend_cq, &rcq->backend_cq,
is_srq ? &srq->backend_srq : NULL,
max_send_wr, max_recv_wr, max_send_sge,
max_recv_sge);
if (rc) {
rc = -EIO;
goto out_dealloc_qp;
@ -542,6 +560,96 @@ void rdma_rm_dealloc_qp(RdmaDeviceResources *dev_res, uint32_t qp_handle)
rdma_res_tbl_dealloc(&dev_res->qp_tbl, qp->qpn);
}
RdmaRmSRQ *rdma_rm_get_srq(RdmaDeviceResources *dev_res, uint32_t srq_handle)
{
return rdma_res_tbl_get(&dev_res->srq_tbl, srq_handle);
}
int rdma_rm_alloc_srq(RdmaDeviceResources *dev_res, uint32_t pd_handle,
uint32_t max_wr, uint32_t max_sge, uint32_t srq_limit,
uint32_t *srq_handle, void *opaque)
{
RdmaRmSRQ *srq;
RdmaRmPD *pd;
int rc;
pd = rdma_rm_get_pd(dev_res, pd_handle);
if (!pd) {
return -EINVAL;
}
srq = rdma_res_tbl_alloc(&dev_res->srq_tbl, srq_handle);
if (!srq) {
return -ENOMEM;
}
rc = rdma_backend_create_srq(&srq->backend_srq, &pd->backend_pd,
max_wr, max_sge, srq_limit);
if (rc) {
rc = -EIO;
goto out_dealloc_srq;
}
srq->opaque = opaque;
return 0;
out_dealloc_srq:
rdma_res_tbl_dealloc(&dev_res->srq_tbl, *srq_handle);
return rc;
}
int rdma_rm_query_srq(RdmaDeviceResources *dev_res, uint32_t srq_handle,
struct ibv_srq_attr *srq_attr)
{
RdmaRmSRQ *srq;
srq = rdma_rm_get_srq(dev_res, srq_handle);
if (!srq) {
return -EINVAL;
}
return rdma_backend_query_srq(&srq->backend_srq, srq_attr);
}
int rdma_rm_modify_srq(RdmaDeviceResources *dev_res, uint32_t srq_handle,
struct ibv_srq_attr *srq_attr, int srq_attr_mask)
{
RdmaRmSRQ *srq;
srq = rdma_rm_get_srq(dev_res, srq_handle);
if (!srq) {
return -EINVAL;
}
if ((srq_attr_mask & IBV_SRQ_LIMIT) &&
(srq_attr->srq_limit == 0)) {
return -EINVAL;
}
if ((srq_attr_mask & IBV_SRQ_MAX_WR) &&
(srq_attr->max_wr == 0)) {
return -EINVAL;
}
return rdma_backend_modify_srq(&srq->backend_srq, srq_attr,
srq_attr_mask);
}
void rdma_rm_dealloc_srq(RdmaDeviceResources *dev_res, uint32_t srq_handle)
{
RdmaRmSRQ *srq;
srq = rdma_rm_get_srq(dev_res, srq_handle);
if (!srq) {
return;
}
rdma_backend_destroy_srq(&srq->backend_srq, dev_res);
rdma_res_tbl_dealloc(&dev_res->srq_tbl, srq_handle);
}
void *rdma_rm_get_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t cqe_ctx_id)
{
void **cqe_ctx;
@ -671,6 +779,8 @@ int rdma_rm_init(RdmaDeviceResources *dev_res, struct ibv_device_attr *dev_attr)
res_tbl_init("CQE_CTX", &dev_res->cqe_ctx_tbl, dev_attr->max_qp *
dev_attr->max_qp_wr, sizeof(void *));
res_tbl_init("UC", &dev_res->uc_tbl, MAX_UCS, sizeof(RdmaRmUC));
res_tbl_init("SRQ", &dev_res->srq_tbl, dev_attr->max_srq,
sizeof(RdmaRmSRQ));
init_ports(dev_res);
@ -689,6 +799,7 @@ void rdma_rm_fini(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
fini_ports(dev_res, backend_dev, ifname);
res_tbl_free(&dev_res->srq_tbl);
res_tbl_free(&dev_res->uc_tbl);
res_tbl_free(&dev_res->cqe_ctx_tbl);
res_tbl_free(&dev_res->qp_tbl);

View File

@ -53,7 +53,8 @@ int rdma_rm_alloc_qp(RdmaDeviceResources *dev_res, uint32_t pd_handle,
uint8_t qp_type, uint32_t max_send_wr,
uint32_t max_send_sge, uint32_t send_cq_handle,
uint32_t max_recv_wr, uint32_t max_recv_sge,
uint32_t recv_cq_handle, void *opaque, uint32_t *qpn);
uint32_t recv_cq_handle, void *opaque, uint32_t *qpn,
uint8_t is_srq, uint32_t srq_handle);
RdmaRmQP *rdma_rm_get_qp(RdmaDeviceResources *dev_res, uint32_t qpn);
int rdma_rm_modify_qp(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
uint32_t qp_handle, uint32_t attr_mask, uint8_t sgid_idx,
@ -65,6 +66,16 @@ int rdma_rm_query_qp(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
int attr_mask, struct ibv_qp_init_attr *init_attr);
void rdma_rm_dealloc_qp(RdmaDeviceResources *dev_res, uint32_t qp_handle);
RdmaRmSRQ *rdma_rm_get_srq(RdmaDeviceResources *dev_res, uint32_t srq_handle);
int rdma_rm_alloc_srq(RdmaDeviceResources *dev_res, uint32_t pd_handle,
uint32_t max_wr, uint32_t max_sge, uint32_t srq_limit,
uint32_t *srq_handle, void *opaque);
int rdma_rm_query_srq(RdmaDeviceResources *dev_res, uint32_t srq_handle,
struct ibv_srq_attr *srq_attr);
int rdma_rm_modify_srq(RdmaDeviceResources *dev_res, uint32_t srq_handle,
struct ibv_srq_attr *srq_attr, int srq_attr_mask);
void rdma_rm_dealloc_srq(RdmaDeviceResources *dev_res, uint32_t srq_handle);
int rdma_rm_alloc_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t *cqe_ctx_id,
void *ctx);
void *rdma_rm_get_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t cqe_ctx_id);

View File

@ -33,6 +33,7 @@
#define MAX_QP_RD_ATOM 16
#define MAX_QP_INIT_RD_ATOM 16
#define MAX_AH 64
#define MAX_SRQ 512
#define MAX_RM_TBL_NAME 16
#define MAX_CONSEQ_EMPTY_POLL_CQ 4096 /* considered as error above this */
@ -87,8 +88,15 @@ typedef struct RdmaRmQP {
uint32_t send_cq_handle;
uint32_t recv_cq_handle;
enum ibv_qp_state qp_state;
uint8_t is_srq;
} RdmaRmQP;
typedef struct RdmaRmSRQ {
RdmaBackendSRQ backend_srq;
uint32_t recv_cq_handle;
void *opaque;
} RdmaRmSRQ;
typedef struct RdmaRmGid {
union ibv_gid gid;
int backend_gid_index;
@ -106,6 +114,7 @@ typedef struct RdmaRmStats {
uint64_t rx_bufs;
uint64_t rx_bufs_len;
uint64_t rx_bufs_err;
uint64_t rx_srq;
uint64_t completions;
uint64_t mad_tx;
uint64_t mad_tx_err;
@ -128,6 +137,7 @@ struct RdmaDeviceResources {
RdmaRmResTbl qp_tbl;
RdmaRmResTbl cq_tbl;
RdmaRmResTbl cqe_ctx_tbl;
RdmaRmResTbl srq_tbl;
GHashTable *qp_hash; /* Keeps mapping between real and emulated */
QemuMutex lock;
RdmaRmStats stats;

View File

@ -357,7 +357,7 @@ static int destroy_cq(PVRDMADev *dev, union pvrdma_cmd_req *req,
static int create_qp_rings(PCIDevice *pci_dev, uint64_t pdir_dma,
PvrdmaRing **rings, uint32_t scqe, uint32_t smax_sge,
uint32_t spages, uint32_t rcqe, uint32_t rmax_sge,
uint32_t rpages)
uint32_t rpages, uint8_t is_srq)
{
uint64_t *dir = NULL, *tbl = NULL;
PvrdmaRing *sr, *rr;
@ -365,9 +365,14 @@ static int create_qp_rings(PCIDevice *pci_dev, uint64_t pdir_dma,
char ring_name[MAX_RING_NAME_SZ];
uint32_t wqe_sz;
if (!spages || spages > PVRDMA_MAX_FAST_REG_PAGES
|| !rpages || rpages > PVRDMA_MAX_FAST_REG_PAGES) {
rdma_error_report("Got invalid page count for QP ring: %d, %d", spages,
if (!spages || spages > PVRDMA_MAX_FAST_REG_PAGES) {
rdma_error_report("Got invalid send page count for QP ring: %d",
spages);
return rc;
}
if (!is_srq && (!rpages || rpages > PVRDMA_MAX_FAST_REG_PAGES)) {
rdma_error_report("Got invalid recv page count for QP ring: %d",
rpages);
return rc;
}
@ -384,8 +389,12 @@ static int create_qp_rings(PCIDevice *pci_dev, uint64_t pdir_dma,
goto out;
}
sr = g_malloc(2 * sizeof(*rr));
rr = &sr[1];
if (!is_srq) {
sr = g_malloc(2 * sizeof(*rr));
rr = &sr[1];
} else {
sr = g_malloc(sizeof(*sr));
}
*rings = sr;
@ -407,15 +416,18 @@ static int create_qp_rings(PCIDevice *pci_dev, uint64_t pdir_dma,
goto out_unmap_ring_state;
}
/* Create recv ring */
rr->ring_state = &sr->ring_state[1];
wqe_sz = pow2ceil(sizeof(struct pvrdma_rq_wqe_hdr) +
sizeof(struct pvrdma_sge) * rmax_sge - 1);
sprintf(ring_name, "qp_rring_%" PRIx64, pdir_dma);
rc = pvrdma_ring_init(rr, ring_name, pci_dev, rr->ring_state,
rcqe, wqe_sz, (dma_addr_t *)&tbl[1 + spages], rpages);
if (rc) {
goto out_free_sr;
if (!is_srq) {
/* Create recv ring */
rr->ring_state = &sr->ring_state[1];
wqe_sz = pow2ceil(sizeof(struct pvrdma_rq_wqe_hdr) +
sizeof(struct pvrdma_sge) * rmax_sge - 1);
sprintf(ring_name, "qp_rring_%" PRIx64, pdir_dma);
rc = pvrdma_ring_init(rr, ring_name, pci_dev, rr->ring_state,
rcqe, wqe_sz, (dma_addr_t *)&tbl[1 + spages],
rpages);
if (rc) {
goto out_free_sr;
}
}
goto out;
@ -436,10 +448,12 @@ out:
return rc;
}
static void destroy_qp_rings(PvrdmaRing *ring)
static void destroy_qp_rings(PvrdmaRing *ring, uint8_t is_srq)
{
pvrdma_ring_free(&ring[0]);
pvrdma_ring_free(&ring[1]);
if (!is_srq) {
pvrdma_ring_free(&ring[1]);
}
rdma_pci_dma_unmap(ring->dev, ring->ring_state, TARGET_PAGE_SIZE);
g_free(ring);
@ -458,7 +472,7 @@ static int create_qp(PVRDMADev *dev, union pvrdma_cmd_req *req,
rc = create_qp_rings(PCI_DEVICE(dev), cmd->pdir_dma, &rings,
cmd->max_send_wr, cmd->max_send_sge, cmd->send_chunks,
cmd->max_recv_wr, cmd->max_recv_sge,
cmd->total_chunks - cmd->send_chunks - 1);
cmd->total_chunks - cmd->send_chunks - 1, cmd->is_srq);
if (rc) {
return rc;
}
@ -467,9 +481,9 @@ static int create_qp(PVRDMADev *dev, union pvrdma_cmd_req *req,
cmd->max_send_wr, cmd->max_send_sge,
cmd->send_cq_handle, cmd->max_recv_wr,
cmd->max_recv_sge, cmd->recv_cq_handle, rings,
&resp->qpn);
&resp->qpn, cmd->is_srq, cmd->srq_handle);
if (rc) {
destroy_qp_rings(rings);
destroy_qp_rings(rings, cmd->is_srq);
return rc;
}
@ -531,10 +545,9 @@ static int destroy_qp(PVRDMADev *dev, union pvrdma_cmd_req *req,
return -EINVAL;
}
rdma_rm_dealloc_qp(&dev->rdma_dev_res, cmd->qp_handle);
ring = (PvrdmaRing *)qp->opaque;
destroy_qp_rings(ring);
destroy_qp_rings(ring, qp->is_srq);
rdma_rm_dealloc_qp(&dev->rdma_dev_res, cmd->qp_handle);
return 0;
}
@ -596,6 +609,149 @@ static int destroy_uc(PVRDMADev *dev, union pvrdma_cmd_req *req,
return 0;
}
static int create_srq_ring(PCIDevice *pci_dev, PvrdmaRing **ring,
uint64_t pdir_dma, uint32_t max_wr,
uint32_t max_sge, uint32_t nchunks)
{
uint64_t *dir = NULL, *tbl = NULL;
PvrdmaRing *r;
int rc = -EINVAL;
char ring_name[MAX_RING_NAME_SZ];
uint32_t wqe_sz;
if (!nchunks || nchunks > PVRDMA_MAX_FAST_REG_PAGES) {
rdma_error_report("Got invalid page count for SRQ ring: %d",
nchunks);
return rc;
}
dir = rdma_pci_dma_map(pci_dev, pdir_dma, TARGET_PAGE_SIZE);
if (!dir) {
rdma_error_report("Failed to map to SRQ page directory");
goto out;
}
tbl = rdma_pci_dma_map(pci_dev, dir[0], TARGET_PAGE_SIZE);
if (!tbl) {
rdma_error_report("Failed to map to SRQ page table");
goto out;
}
r = g_malloc(sizeof(*r));
*ring = r;
r->ring_state = (struct pvrdma_ring *)
rdma_pci_dma_map(pci_dev, tbl[0], TARGET_PAGE_SIZE);
if (!r->ring_state) {
rdma_error_report("Failed to map tp SRQ ring state");
goto out_free_ring_mem;
}
wqe_sz = pow2ceil(sizeof(struct pvrdma_rq_wqe_hdr) +
sizeof(struct pvrdma_sge) * max_sge - 1);
sprintf(ring_name, "srq_ring_%" PRIx64, pdir_dma);
rc = pvrdma_ring_init(r, ring_name, pci_dev, &r->ring_state[1], max_wr,
wqe_sz, (dma_addr_t *)&tbl[1], nchunks - 1);
if (rc) {
goto out_unmap_ring_state;
}
goto out;
out_unmap_ring_state:
rdma_pci_dma_unmap(pci_dev, r->ring_state, TARGET_PAGE_SIZE);
out_free_ring_mem:
g_free(r);
out:
rdma_pci_dma_unmap(pci_dev, tbl, TARGET_PAGE_SIZE);
rdma_pci_dma_unmap(pci_dev, dir, TARGET_PAGE_SIZE);
return rc;
}
static void destroy_srq_ring(PvrdmaRing *ring)
{
pvrdma_ring_free(ring);
rdma_pci_dma_unmap(ring->dev, ring->ring_state, TARGET_PAGE_SIZE);
g_free(ring);
}
static int create_srq(PVRDMADev *dev, union pvrdma_cmd_req *req,
union pvrdma_cmd_resp *rsp)
{
struct pvrdma_cmd_create_srq *cmd = &req->create_srq;
struct pvrdma_cmd_create_srq_resp *resp = &rsp->create_srq_resp;
PvrdmaRing *ring = NULL;
int rc;
memset(resp, 0, sizeof(*resp));
rc = create_srq_ring(PCI_DEVICE(dev), &ring, cmd->pdir_dma,
cmd->attrs.max_wr, cmd->attrs.max_sge,
cmd->nchunks);
if (rc) {
return rc;
}
rc = rdma_rm_alloc_srq(&dev->rdma_dev_res, cmd->pd_handle,
cmd->attrs.max_wr, cmd->attrs.max_sge,
cmd->attrs.srq_limit, &resp->srqn, ring);
if (rc) {
destroy_srq_ring(ring);
return rc;
}
return 0;
}
static int query_srq(PVRDMADev *dev, union pvrdma_cmd_req *req,
union pvrdma_cmd_resp *rsp)
{
struct pvrdma_cmd_query_srq *cmd = &req->query_srq;
struct pvrdma_cmd_query_srq_resp *resp = &rsp->query_srq_resp;
memset(resp, 0, sizeof(*resp));
return rdma_rm_query_srq(&dev->rdma_dev_res, cmd->srq_handle,
(struct ibv_srq_attr *)&resp->attrs);
}
static int modify_srq(PVRDMADev *dev, union pvrdma_cmd_req *req,
union pvrdma_cmd_resp *rsp)
{
struct pvrdma_cmd_modify_srq *cmd = &req->modify_srq;
/* Only support SRQ limit */
if (!(cmd->attr_mask & IBV_SRQ_LIMIT) ||
(cmd->attr_mask & IBV_SRQ_MAX_WR))
return -EINVAL;
return rdma_rm_modify_srq(&dev->rdma_dev_res, cmd->srq_handle,
(struct ibv_srq_attr *)&cmd->attrs,
cmd->attr_mask);
}
static int destroy_srq(PVRDMADev *dev, union pvrdma_cmd_req *req,
union pvrdma_cmd_resp *rsp)
{
struct pvrdma_cmd_destroy_srq *cmd = &req->destroy_srq;
RdmaRmSRQ *srq;
PvrdmaRing *ring;
srq = rdma_rm_get_srq(&dev->rdma_dev_res, cmd->srq_handle);
if (!srq) {
return -EINVAL;
}
ring = (PvrdmaRing *)srq->opaque;
destroy_srq_ring(ring);
rdma_rm_dealloc_srq(&dev->rdma_dev_res, cmd->srq_handle);
return 0;
}
struct cmd_handler {
uint32_t cmd;
uint32_t ack;
@ -621,6 +777,10 @@ static struct cmd_handler cmd_handlers[] = {
{PVRDMA_CMD_DESTROY_UC, PVRDMA_CMD_DESTROY_UC_RESP_NOOP, destroy_uc},
{PVRDMA_CMD_CREATE_BIND, PVRDMA_CMD_CREATE_BIND_RESP_NOOP, create_bind},
{PVRDMA_CMD_DESTROY_BIND, PVRDMA_CMD_DESTROY_BIND_RESP_NOOP, destroy_bind},
{PVRDMA_CMD_CREATE_SRQ, PVRDMA_CMD_CREATE_SRQ_RESP, create_srq},
{PVRDMA_CMD_QUERY_SRQ, PVRDMA_CMD_QUERY_SRQ_RESP, query_srq},
{PVRDMA_CMD_MODIFY_SRQ, PVRDMA_CMD_MODIFY_SRQ_RESP, modify_srq},
{PVRDMA_CMD_DESTROY_SRQ, PVRDMA_CMD_DESTROY_SRQ_RESP, destroy_srq},
};
int pvrdma_exec_cmd(PVRDMADev *dev)

View File

@ -53,6 +53,7 @@ static Property pvrdma_dev_properties[] = {
DEFINE_PROP_INT32("dev-caps-max-qp-init-rd-atom", PVRDMADev,
dev_attr.max_qp_init_rd_atom, MAX_QP_INIT_RD_ATOM),
DEFINE_PROP_INT32("dev-caps-max-ah", PVRDMADev, dev_attr.max_ah, MAX_AH),
DEFINE_PROP_INT32("dev-caps-max-srq", PVRDMADev, dev_attr.max_srq, MAX_SRQ),
DEFINE_PROP_CHR("mad-chardev", PVRDMADev, mad_chr),
DEFINE_PROP_END_OF_LIST(),
};
@ -261,6 +262,9 @@ static void init_dsr_dev_caps(PVRDMADev *dev)
dsr->caps.max_mr = dev->dev_attr.max_mr;
dsr->caps.max_pd = dev->dev_attr.max_pd;
dsr->caps.max_ah = dev->dev_attr.max_ah;
dsr->caps.max_srq = dev->dev_attr.max_srq;
dsr->caps.max_srq_wr = dev->dev_attr.max_srq_wr;
dsr->caps.max_srq_sge = dev->dev_attr.max_srq_sge;
dsr->caps.gid_tbl_len = MAX_GIDS;
dsr->caps.sys_image_guid = 0;
dsr->caps.node_guid = dev->node_guid;
@ -485,6 +489,13 @@ static void pvrdma_uar_write(void *opaque, hwaddr addr, uint64_t val,
pvrdma_cq_poll(&dev->rdma_dev_res, val & PVRDMA_UAR_HANDLE_MASK);
}
break;
case PVRDMA_UAR_SRQ_OFFSET:
if (val & PVRDMA_UAR_SRQ_RECV) {
trace_pvrdma_uar_write(addr, val, "QP", "SRQ",
val & PVRDMA_UAR_HANDLE_MASK, 0);
pvrdma_srq_recv(dev, val & PVRDMA_UAR_HANDLE_MASK);
}
break;
default:
rdma_error_report("Unsupported command, addr=0x%"PRIx64", val=0x%"PRIx64,
addr, val);
@ -554,6 +565,11 @@ static void init_dev_caps(PVRDMADev *dev)
dev->dev_attr.max_cqe = pg_tbl_bytes / sizeof(struct pvrdma_cqe) -
TARGET_PAGE_SIZE; /* First page is ring state */
dev->dev_attr.max_srq_wr = pg_tbl_bytes /
((sizeof(struct pvrdma_rq_wqe_hdr) +
sizeof(struct pvrdma_sge)) *
dev->dev_attr.max_sge) - TARGET_PAGE_SIZE;
}
static int pvrdma_check_ram_shared(Object *obj, void *opaque)

View File

@ -70,7 +70,7 @@ static int pvrdma_post_cqe(PVRDMADev *dev, uint32_t cq_handle,
memset(cqe1, 0, sizeof(*cqe1));
cqe1->wr_id = cqe->wr_id;
cqe1->qp = cqe->qp;
cqe1->qp = cqe->qp ? cqe->qp : wc->qp_num;
cqe1->opcode = cqe->opcode;
cqe1->status = wc->status;
cqe1->byte_len = wc->byte_len;
@ -241,6 +241,50 @@ void pvrdma_qp_recv(PVRDMADev *dev, uint32_t qp_handle)
}
}
void pvrdma_srq_recv(PVRDMADev *dev, uint32_t srq_handle)
{
RdmaRmSRQ *srq;
PvrdmaRqWqe *wqe;
PvrdmaRing *ring;
srq = rdma_rm_get_srq(&dev->rdma_dev_res, srq_handle);
if (unlikely(!srq)) {
return;
}
ring = (PvrdmaRing *)srq->opaque;
wqe = (struct PvrdmaRqWqe *)pvrdma_ring_next_elem_read(ring);
while (wqe) {
CompHandlerCtx *comp_ctx;
/* Prepare CQE */
comp_ctx = g_malloc(sizeof(CompHandlerCtx));
comp_ctx->dev = dev;
comp_ctx->cq_handle = srq->recv_cq_handle;
comp_ctx->cqe.wr_id = wqe->hdr.wr_id;
comp_ctx->cqe.qp = 0;
comp_ctx->cqe.opcode = IBV_WC_RECV;
if (wqe->hdr.num_sge > dev->dev_attr.max_sge) {
rdma_error_report("Invalid num_sge=%d (max %d)", wqe->hdr.num_sge,
dev->dev_attr.max_sge);
complete_with_error(VENDOR_ERR_INV_NUM_SGE, comp_ctx);
continue;
}
rdma_backend_post_srq_recv(&dev->backend_dev, &srq->backend_srq,
(struct ibv_sge *)&wqe->sge[0],
wqe->hdr.num_sge,
comp_ctx);
pvrdma_ring_read_inc(ring);
wqe = pvrdma_ring_next_elem_read(ring);
}
}
void pvrdma_cq_poll(RdmaDeviceResources *dev_res, uint32_t cq_handle)
{
RdmaRmCQ *cq;

View File

@ -22,6 +22,7 @@ int pvrdma_qp_ops_init(void);
void pvrdma_qp_ops_fini(void);
void pvrdma_qp_send(PVRDMADev *dev, uint32_t qp_handle);
void pvrdma_qp_recv(PVRDMADev *dev, uint32_t qp_handle);
void pvrdma_srq_recv(PVRDMADev *dev, uint32_t srq_handle);
void pvrdma_cq_poll(RdmaDeviceResources *dev_res, uint32_t cq_handle);
#endif