block: change reqs_lock to QemuMutex

CoMutex has poor performance when lock contention is high. The tracked requests list is accessed frequently and performance suffers in QEMU multi-queue block layer scenarios. It is not necessary to use CoMutex for the requests lock. The lock is always released across coroutine yield operations. It is held for relatively short periods of time and it is not beneficial to yield when the lock is held by another coroutine. Change the lock type from CoMutex to QemuMutex to improve multi-queue block layer performance. fio randread bs=4k iodepth=64 with 4 IOThreads handling a virtio-blk device with 8 virtqueues improves from 254k to 517k IOPS (+203%). Full benchmark results and configuration details are available here: 980c40845d In the future we may wish to introduce thread-local tracked requests lists to avoid lock contention completely. That would be much more involved though. Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> Message-ID: <20230808155852.2745350-3-stefanha@redhat.com> Reviewed-by: Eric Blake <eblake@redhat.com> Reviewed-by: Kevin Wolf <kwolf@redhat.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2023-08-08 11:58:52 -04:00 · 2023-08-08 11:58:52 -04:00 · fa9185fcdf
commit fa9185fcdf
parent 3480ce69a9
3 changed files with 16 additions and 14 deletions
--- a/block.c
+++ b/block.c
@ -415,7 +415,7 @@ BlockDriverState *bdrv_new(void)
    for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
        QLIST_INIT(&bs->op_blockers[i]);
    }
-    qemu_co_mutex_init(&bs->reqs_lock);
+    qemu_mutex_init(&bs->reqs_lock);
    qemu_mutex_init(&bs->dirty_bitmap_mutex);
    bs->refcnt = 1;
    bs->aio_context = qemu_get_aio_context();
@ -5476,6 +5476,8 @@ static void bdrv_delete(BlockDriverState *bs)
    bdrv_close(bs);
    qemu_mutex_destroy(&bs->reqs_lock);
    g_free(bs);
 }
--- a/block/io.c
+++ b/block/io.c
@ -591,9 +591,9 @@ static void coroutine_fn tracked_request_end(BdrvTrackedRequest *req)
        qatomic_dec(&req->bs->serialising_in_flight);
    }
-    qemu_co_mutex_lock(&req->bs->reqs_lock);
+    qemu_mutex_lock(&req->bs->reqs_lock);
    QLIST_REMOVE(req, list);
-    qemu_co_mutex_unlock(&req->bs->reqs_lock);
+    qemu_mutex_unlock(&req->bs->reqs_lock);
    /*
     * At this point qemu_co_queue_wait(&req->wait_queue, ...) won't be called
@ -627,9 +627,9 @@ static void coroutine_fn tracked_request_begin(BdrvTrackedRequest *req,
    qemu_co_queue_init(&req->wait_queue);
-    qemu_co_mutex_lock(&bs->reqs_lock);
+    qemu_mutex_lock(&bs->reqs_lock);
    QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
-    qemu_co_mutex_unlock(&bs->reqs_lock);
+    qemu_mutex_unlock(&bs->reqs_lock);
 }
 static bool tracked_request_overlaps(BdrvTrackedRequest *req,
@ -793,9 +793,9 @@ bdrv_wait_serialising_requests(BdrvTrackedRequest *self)
        return;
    }
-    qemu_co_mutex_lock(&bs->reqs_lock);
+    qemu_mutex_lock(&bs->reqs_lock);
    bdrv_wait_serialising_requests_locked(self);
-    qemu_co_mutex_unlock(&bs->reqs_lock);
+    qemu_mutex_unlock(&bs->reqs_lock);
 }
 void coroutine_fn bdrv_make_request_serialising(BdrvTrackedRequest *req,
@ -803,12 +803,12 @@ void coroutine_fn bdrv_make_request_serialising(BdrvTrackedRequest *req,
 {
    IO_CODE();
-    qemu_co_mutex_lock(&req->bs->reqs_lock);
+    qemu_mutex_lock(&req->bs->reqs_lock);
    tracked_request_set_serialising(req, align);
    bdrv_wait_serialising_requests_locked(req);
-    qemu_co_mutex_unlock(&req->bs->reqs_lock);
+    qemu_mutex_unlock(&req->bs->reqs_lock);
 }
 int bdrv_check_qiov_request(int64_t offset, int64_t bytes,
@ -3002,7 +3002,7 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
        goto early_exit;
    }
-    qemu_co_mutex_lock(&bs->reqs_lock);
+    qemu_mutex_lock(&bs->reqs_lock);
    current_gen = qatomic_read(&bs->write_gen);
    /* Wait until any previous flushes are completed */
@ -3012,7 +3012,7 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
    /* Flushes reach this point in nondecreasing current_gen order.  */
    bs->active_flush_req = true;
-    qemu_co_mutex_unlock(&bs->reqs_lock);
+    qemu_mutex_unlock(&bs->reqs_lock);
    /* Write back all layers by calling one driver function */
    if (bs->drv->bdrv_co_flush) {
@ -3100,11 +3100,11 @@ out:
        bs->flushed_gen = current_gen;
    }
-    qemu_co_mutex_lock(&bs->reqs_lock);
+    qemu_mutex_lock(&bs->reqs_lock);
    bs->active_flush_req = false;
    /* Return value is ignored - it's ok if wait queue is empty */
    qemu_co_queue_next(&bs->flush_queue);
-    qemu_co_mutex_unlock(&bs->reqs_lock);
+    qemu_mutex_unlock(&bs->reqs_lock);
 early_exit:
    bdrv_dec_in_flight(bs);
--- a/include/block/block_int-common.h
+++ b/include/block/block_int-common.h
@ -1231,7 +1231,7 @@ struct BlockDriverState {
    unsigned int write_gen;               /* Current data generation */
    /* Protected by reqs_lock.  */
-    CoMutex reqs_lock;
+    QemuMutex reqs_lock;
    QLIST_HEAD(, BdrvTrackedRequest) tracked_requests;
    CoQueue flush_queue;                  /* Serializing flush queue */
    bool active_flush_req;                /* Flush request in flight? */