aio-posix: avoid reacquiring rcu_read_lock() when polling
The first rcu_read_lock/unlock() is expensive. Nested calls are cheap. This optimization increases IOPS from 73k to 162k with a Linux guest that has 2 virtio-blk,num-queues=1 and 99 virtio-blk,num-queues=32 devices. Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> Reviewed-by: Paolo Bonzini <pbonzini@redhat.com> Message-id: 20200218182708.914552-1-stefanha@redhat.com Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
This commit is contained in:
parent
c9b7d9ec21
commit
f25c0b5479
@ -15,6 +15,7 @@
|
||||
|
||||
#include "qemu/osdep.h"
|
||||
#include "block/block.h"
|
||||
#include "qemu/rcu.h"
|
||||
#include "qemu/rcu_queue.h"
|
||||
#include "qemu/sockets.h"
|
||||
#include "qemu/cutils.h"
|
||||
@ -514,6 +515,16 @@ static bool run_poll_handlers_once(AioContext *ctx, int64_t *timeout)
|
||||
bool progress = false;
|
||||
AioHandler *node;
|
||||
|
||||
/*
|
||||
* Optimization: ->io_poll() handlers often contain RCU read critical
|
||||
* sections and we therefore see many rcu_read_lock() -> rcu_read_unlock()
|
||||
* -> rcu_read_lock() -> ... sequences with expensive memory
|
||||
* synchronization primitives. Make the entire polling loop an RCU
|
||||
* critical section because nested rcu_read_lock()/rcu_read_unlock() calls
|
||||
* are cheap.
|
||||
*/
|
||||
RCU_READ_LOCK_GUARD();
|
||||
|
||||
QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
|
||||
if (!node->deleted && node->io_poll &&
|
||||
aio_node_check(ctx, node->is_external) &&
|
||||
|
Loading…
Reference in New Issue
Block a user