diff --git a/hw/rdma/rdma_backend.c b/hw/rdma/rdma_backend.c index e9ced6f9ef..647e16399f 100644 --- a/hw/rdma/rdma_backend.c +++ b/hw/rdma/rdma_backend.c @@ -35,6 +35,7 @@ #define VENDOR_ERR_MR_SMALL 0x208 #define THR_NAME_LEN 16 +#define THR_POLL_TO 5000 typedef struct BackendCtx { uint64_t req_id; @@ -91,35 +92,82 @@ static void *comp_handler_thread(void *arg) int rc; struct ibv_cq *ev_cq; void *ev_ctx; + int flags; + GPollFD pfds[1]; + + /* Change to non-blocking mode */ + flags = fcntl(backend_dev->channel->fd, F_GETFL); + rc = fcntl(backend_dev->channel->fd, F_SETFL, flags | O_NONBLOCK); + if (rc < 0) { + pr_dbg("Fail to change to non-blocking mode\n"); + return NULL; + } pr_dbg("Starting\n"); + pfds[0].fd = backend_dev->channel->fd; + pfds[0].events = G_IO_IN | G_IO_HUP | G_IO_ERR; + + backend_dev->comp_thread.is_running = true; + while (backend_dev->comp_thread.run) { - pr_dbg("Waiting for completion on channel %p\n", backend_dev->channel); - rc = ibv_get_cq_event(backend_dev->channel, &ev_cq, &ev_ctx); - pr_dbg("ibv_get_cq_event=%d\n", rc); - if (unlikely(rc)) { - pr_dbg("---> ibv_get_cq_event (%d)\n", rc); - continue; + do { + rc = qemu_poll_ns(pfds, 1, THR_POLL_TO * (int64_t)SCALE_MS); + } while (!rc && backend_dev->comp_thread.run); + + if (backend_dev->comp_thread.run) { + pr_dbg("Waiting for completion on channel %p\n", backend_dev->channel); + rc = ibv_get_cq_event(backend_dev->channel, &ev_cq, &ev_ctx); + pr_dbg("ibv_get_cq_event=%d\n", rc); + if (unlikely(rc)) { + pr_dbg("---> ibv_get_cq_event (%d)\n", rc); + continue; + } + + rc = ibv_req_notify_cq(ev_cq, 0); + if (unlikely(rc)) { + pr_dbg("Error %d from ibv_req_notify_cq\n", rc); + } + + poll_cq(backend_dev->rdma_dev_res, ev_cq); + + ibv_ack_cq_events(ev_cq, 1); } - - rc = ibv_req_notify_cq(ev_cq, 0); - if (unlikely(rc)) { - pr_dbg("Error %d from ibv_req_notify_cq\n", rc); - } - - poll_cq(backend_dev->rdma_dev_res, ev_cq); - - ibv_ack_cq_events(ev_cq, 1); } pr_dbg("Going down\n"); /* TODO: Post cqe for all remaining buffs that were posted */ + backend_dev->comp_thread.is_running = false; + + qemu_thread_exit(0); + return NULL; } +static void stop_comp_thread(RdmaBackendDev *backend_dev) +{ + backend_dev->comp_thread.run = false; + while (backend_dev->comp_thread.is_running) { + pr_dbg("Waiting for thread to complete\n"); + sleep(THR_POLL_TO / SCALE_US / 2); + } +} + +static void start_comp_thread(RdmaBackendDev *backend_dev) +{ + char thread_name[THR_NAME_LEN] = {0}; + + stop_comp_thread(backend_dev); + + snprintf(thread_name, sizeof(thread_name), "rdma_comp_%s", + ibv_get_device_name(backend_dev->ib_dev)); + backend_dev->comp_thread.run = true; + qemu_thread_create(&backend_dev->comp_thread.thread, thread_name, + comp_handler_thread, backend_dev, QEMU_THREAD_DETACHED); +} + void rdma_backend_register_comp_handler(void (*handler)(int status, unsigned int vendor_err, void *ctx)) { @@ -706,7 +754,6 @@ int rdma_backend_init(RdmaBackendDev *backend_dev, int i; int ret = 0; int num_ibv_devices; - char thread_name[THR_NAME_LEN] = {0}; struct ibv_device **dev_list; struct ibv_port_attr port_attr; @@ -800,11 +847,8 @@ int rdma_backend_init(RdmaBackendDev *backend_dev, pr_dbg("interface_id=0x%" PRIx64 "\n", be64_to_cpu(backend_dev->gid.global.interface_id)); - snprintf(thread_name, sizeof(thread_name), "rdma_comp_%s", - ibv_get_device_name(backend_dev->ib_dev)); - backend_dev->comp_thread.run = true; - qemu_thread_create(&backend_dev->comp_thread.thread, thread_name, - comp_handler_thread, backend_dev, QEMU_THREAD_DETACHED); + backend_dev->comp_thread.run = false; + backend_dev->comp_thread.is_running = false; ah_cache_init(); @@ -823,8 +867,22 @@ out: return ret; } + +void rdma_backend_start(RdmaBackendDev *backend_dev) +{ + pr_dbg("Starting rdma_backend\n"); + start_comp_thread(backend_dev); +} + +void rdma_backend_stop(RdmaBackendDev *backend_dev) +{ + pr_dbg("Stopping rdma_backend\n"); + stop_comp_thread(backend_dev); +} + void rdma_backend_fini(RdmaBackendDev *backend_dev) { + rdma_backend_stop(backend_dev); g_hash_table_destroy(ah_hash); ibv_destroy_comp_channel(backend_dev->channel); ibv_close_device(backend_dev->context); diff --git a/hw/rdma/rdma_backend.h b/hw/rdma/rdma_backend.h index 3cd636dd88..3049a73962 100644 --- a/hw/rdma/rdma_backend.h +++ b/hw/rdma/rdma_backend.h @@ -52,6 +52,8 @@ int rdma_backend_init(RdmaBackendDev *backend_dev, uint8_t backend_gid_idx, struct ibv_device_attr *dev_attr, Error **errp); void rdma_backend_fini(RdmaBackendDev *backend_dev); +void rdma_backend_start(RdmaBackendDev *backend_dev); +void rdma_backend_stop(RdmaBackendDev *backend_dev); void rdma_backend_register_comp_handler(void (*handler)(int status, unsigned int vendor_err, void *ctx)); void rdma_backend_unregister_comp_handler(void); diff --git a/hw/rdma/rdma_backend_defs.h b/hw/rdma/rdma_backend_defs.h index ff5cfc26eb..7404f64002 100644 --- a/hw/rdma/rdma_backend_defs.h +++ b/hw/rdma/rdma_backend_defs.h @@ -24,7 +24,8 @@ typedef struct RdmaDeviceResources RdmaDeviceResources; typedef struct RdmaBackendThread { QemuThread thread; QemuMutex mutex; - bool run; + bool run; /* Set by thread manager to let thread know it should exit */ + bool is_running; /* Set by the thread to report its status */ } RdmaBackendThread; typedef struct RdmaBackendDev { diff --git a/hw/rdma/vmw/pvrdma_main.c b/hw/rdma/vmw/pvrdma_main.c index 3ed7409763..6a5073974d 100644 --- a/hw/rdma/vmw/pvrdma_main.c +++ b/hw/rdma/vmw/pvrdma_main.c @@ -286,8 +286,78 @@ static void init_ports(PVRDMADev *dev, Error **errp) } } +static void uninit_msix(PCIDevice *pdev, int used_vectors) +{ + PVRDMADev *dev = PVRDMA_DEV(pdev); + int i; + + for (i = 0; i < used_vectors; i++) { + msix_vector_unuse(pdev, i); + } + + msix_uninit(pdev, &dev->msix, &dev->msix); +} + +static int init_msix(PCIDevice *pdev, Error **errp) +{ + PVRDMADev *dev = PVRDMA_DEV(pdev); + int i; + int rc; + + rc = msix_init(pdev, RDMA_MAX_INTRS, &dev->msix, RDMA_MSIX_BAR_IDX, + RDMA_MSIX_TABLE, &dev->msix, RDMA_MSIX_BAR_IDX, + RDMA_MSIX_PBA, 0, NULL); + + if (rc < 0) { + error_setg(errp, "Failed to initialize MSI-X"); + return rc; + } + + for (i = 0; i < RDMA_MAX_INTRS; i++) { + rc = msix_vector_use(PCI_DEVICE(dev), i); + if (rc < 0) { + error_setg(errp, "Fail mark MSI-X vector %d", i); + uninit_msix(pdev, i); + return rc; + } + } + + return 0; +} + +static void pvrdma_fini(PCIDevice *pdev) +{ + PVRDMADev *dev = PVRDMA_DEV(pdev); + + pr_dbg("Closing device %s %x.%x\n", pdev->name, PCI_SLOT(pdev->devfn), + PCI_FUNC(pdev->devfn)); + + pvrdma_qp_ops_fini(); + + rdma_rm_fini(&dev->rdma_dev_res); + + rdma_backend_fini(&dev->backend_dev); + + free_dsr(dev); + + if (msix_enabled(pdev)) { + uninit_msix(pdev, RDMA_MAX_INTRS); + } +} + +static void pvrdma_stop(PVRDMADev *dev) +{ + rdma_backend_stop(&dev->backend_dev); +} + +static void pvrdma_start(PVRDMADev *dev) +{ + rdma_backend_start(&dev->backend_dev); +} + static void activate_device(PVRDMADev *dev) { + pvrdma_start(dev); set_reg_val(dev, PVRDMA_REG_ERR, 0); pr_dbg("Device activated\n"); } @@ -300,7 +370,10 @@ static int unquiesce_device(PVRDMADev *dev) static int reset_device(PVRDMADev *dev) { + pvrdma_stop(dev); + pr_dbg("Device reset complete\n"); + return 0; } @@ -469,45 +542,6 @@ static void init_regs(PCIDevice *pdev) set_reg_val(dev, PVRDMA_REG_ERR, 0xFFFF); } -static void uninit_msix(PCIDevice *pdev, int used_vectors) -{ - PVRDMADev *dev = PVRDMA_DEV(pdev); - int i; - - for (i = 0; i < used_vectors; i++) { - msix_vector_unuse(pdev, i); - } - - msix_uninit(pdev, &dev->msix, &dev->msix); -} - -static int init_msix(PCIDevice *pdev, Error **errp) -{ - PVRDMADev *dev = PVRDMA_DEV(pdev); - int i; - int rc; - - rc = msix_init(pdev, RDMA_MAX_INTRS, &dev->msix, RDMA_MSIX_BAR_IDX, - RDMA_MSIX_TABLE, &dev->msix, RDMA_MSIX_BAR_IDX, - RDMA_MSIX_PBA, 0, NULL); - - if (rc < 0) { - error_setg(errp, "Failed to initialize MSI-X"); - return rc; - } - - for (i = 0; i < RDMA_MAX_INTRS; i++) { - rc = msix_vector_use(PCI_DEVICE(dev), i); - if (rc < 0) { - error_setg(errp, "Fail mark MSI-X vercor %d", i); - uninit_msix(pdev, i); - return rc; - } - } - - return 0; -} - static void init_dev_caps(PVRDMADev *dev) { size_t pg_tbl_bytes = TARGET_PAGE_SIZE * @@ -602,22 +636,7 @@ out: static void pvrdma_exit(PCIDevice *pdev) { - PVRDMADev *dev = PVRDMA_DEV(pdev); - - pr_dbg("Closing device %s %x.%x\n", pdev->name, PCI_SLOT(pdev->devfn), - PCI_FUNC(pdev->devfn)); - - pvrdma_qp_ops_fini(); - - rdma_rm_fini(&dev->rdma_dev_res); - - rdma_backend_fini(&dev->backend_dev); - - free_dsr(dev); - - if (msix_enabled(pdev)) { - uninit_msix(pdev, RDMA_MAX_INTRS); - } + pvrdma_fini(pdev); } static void pvrdma_class_init(ObjectClass *klass, void *data)