2020-09-18 11:09:08 +03:00
|
|
|
/*
|
|
|
|
* Sharing QEMU devices via vhost-user protocol
|
|
|
|
*
|
|
|
|
* Copyright (c) Coiby Xu <coiby.xu@gmail.com>.
|
|
|
|
* Copyright (c) 2020 Red Hat, Inc.
|
|
|
|
*
|
|
|
|
* This work is licensed under the terms of the GNU GPL, version 2 or
|
|
|
|
* later. See the COPYING file in the top-level directory.
|
|
|
|
*/
|
|
|
|
#include "qemu/osdep.h"
|
2023-02-10 14:19:31 +03:00
|
|
|
#include "qemu/error-report.h"
|
2020-09-18 11:09:08 +03:00
|
|
|
#include "qemu/main-loop.h"
|
2020-09-24 18:15:48 +03:00
|
|
|
#include "qemu/vhost-user-server.h"
|
2020-09-24 18:15:45 +03:00
|
|
|
#include "block/aio-wait.h"
|
2020-09-18 11:09:08 +03:00
|
|
|
|
2020-09-24 18:15:45 +03:00
|
|
|
/*
|
|
|
|
* Theory of operation:
|
|
|
|
*
|
|
|
|
* VuServer is started and stopped by vhost_user_server_start() and
|
|
|
|
* vhost_user_server_stop() from the main loop thread. Starting the server
|
|
|
|
* opens a vhost-user UNIX domain socket and listens for incoming connections.
|
|
|
|
* Only one connection is allowed at a time.
|
|
|
|
*
|
|
|
|
* The connection is handled by the vu_client_trip() coroutine in the
|
|
|
|
* VuServer->ctx AioContext. The coroutine consists of a vu_dispatch() loop
|
|
|
|
* where libvhost-user calls vu_message_read() to receive the next vhost-user
|
|
|
|
* protocol messages over the UNIX domain socket.
|
|
|
|
*
|
|
|
|
* When virtqueues are set up libvhost-user calls set_watch() to monitor kick
|
|
|
|
* fds. These fds are also handled in the VuServer->ctx AioContext.
|
|
|
|
*
|
|
|
|
* Both vu_client_trip() and kick fd monitoring can be stopped by shutting down
|
|
|
|
* the socket connection. Shutting down the socket connection causes
|
|
|
|
* vu_message_read() to fail since no more data can be received from the socket.
|
|
|
|
* After vu_dispatch() fails, vu_client_trip() calls vu_deinit() to stop
|
|
|
|
* libvhost-user before terminating the coroutine. vu_deinit() calls
|
|
|
|
* remove_watch() to stop monitoring kick fds and this stops virtqueue
|
|
|
|
* processing.
|
|
|
|
*
|
|
|
|
* When vu_client_trip() has finished cleaning up it schedules a BH in the main
|
|
|
|
* loop thread to accept the next client connection.
|
|
|
|
*
|
|
|
|
* When libvhost-user detects an error it calls panic_cb() and sets the
|
|
|
|
* dev->broken flag. Both vu_client_trip() and kick fd processing stop when
|
|
|
|
* the dev->broken flag is set.
|
|
|
|
*
|
|
|
|
* It is possible to switch AioContexts using
|
|
|
|
* vhost_user_server_detach_aio_context() and
|
|
|
|
* vhost_user_server_attach_aio_context(). They stop monitoring fds in the old
|
|
|
|
* AioContext and resume monitoring in the new AioContext. The vu_client_trip()
|
|
|
|
* coroutine remains in a yielded state during the switch. This is made
|
|
|
|
* possible by QIOChannel's support for spurious coroutine re-entry in
|
|
|
|
* qio_channel_yield(). The coroutine will restart I/O when re-entered from the
|
|
|
|
* new AioContext.
|
|
|
|
*/
|
|
|
|
|
2020-09-18 11:09:08 +03:00
|
|
|
static void vmsg_close_fds(VhostUserMsg *vmsg)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
for (i = 0; i < vmsg->fd_num; i++) {
|
|
|
|
close(vmsg->fds[i]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void vmsg_unblock_fds(VhostUserMsg *vmsg)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
for (i = 0; i < vmsg->fd_num; i++) {
|
2022-04-25 16:33:47 +03:00
|
|
|
qemu_socket_set_nonblock(vmsg->fds[i]);
|
2020-09-18 11:09:08 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void panic_cb(VuDev *vu_dev, const char *buf)
|
|
|
|
{
|
2020-09-24 18:15:45 +03:00
|
|
|
error_report("vu_panic: %s", buf);
|
2020-09-18 11:09:08 +03:00
|
|
|
}
|
|
|
|
|
2023-05-16 22:02:23 +03:00
|
|
|
void vhost_user_server_inc_in_flight(VuServer *server)
|
2022-01-25 18:14:35 +03:00
|
|
|
{
|
|
|
|
assert(!server->wait_idle);
|
2023-05-16 22:02:24 +03:00
|
|
|
qatomic_inc(&server->in_flight);
|
2022-01-25 18:14:35 +03:00
|
|
|
}
|
|
|
|
|
2023-05-16 22:02:23 +03:00
|
|
|
void vhost_user_server_dec_in_flight(VuServer *server)
|
2022-01-25 18:14:35 +03:00
|
|
|
{
|
2023-05-16 22:02:24 +03:00
|
|
|
if (qatomic_fetch_dec(&server->in_flight) == 1) {
|
|
|
|
if (server->wait_idle) {
|
|
|
|
aio_co_wake(server->co_trip);
|
|
|
|
}
|
2022-01-25 18:14:35 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-05-16 22:02:24 +03:00
|
|
|
bool vhost_user_server_has_in_flight(VuServer *server)
|
|
|
|
{
|
|
|
|
return qatomic_load_acquire(&server->in_flight) > 0;
|
|
|
|
}
|
|
|
|
|
2020-09-18 11:09:08 +03:00
|
|
|
static bool coroutine_fn
|
|
|
|
vu_message_read(VuDev *vu_dev, int conn_fd, VhostUserMsg *vmsg)
|
|
|
|
{
|
|
|
|
struct iovec iov = {
|
|
|
|
.iov_base = (char *)vmsg,
|
|
|
|
.iov_len = VHOST_USER_HDR_SIZE,
|
|
|
|
};
|
|
|
|
int rc, read_bytes = 0;
|
|
|
|
Error *local_err = NULL;
|
|
|
|
const size_t max_fds = G_N_ELEMENTS(vmsg->fds);
|
|
|
|
VuServer *server = container_of(vu_dev, VuServer, vu_dev);
|
|
|
|
QIOChannel *ioc = server->ioc;
|
|
|
|
|
2020-09-24 18:15:43 +03:00
|
|
|
vmsg->fd_num = 0;
|
2020-09-18 11:09:08 +03:00
|
|
|
if (!ioc) {
|
|
|
|
error_report_err(local_err);
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
|
|
|
assert(qemu_in_coroutine());
|
|
|
|
do {
|
2020-09-24 18:15:43 +03:00
|
|
|
size_t nfds = 0;
|
|
|
|
int *fds = NULL;
|
|
|
|
|
2020-09-18 11:09:08 +03:00
|
|
|
/*
|
|
|
|
* qio_channel_readv_full may have short reads, keeping calling it
|
|
|
|
* until getting VHOST_USER_HDR_SIZE or 0 bytes in total
|
|
|
|
*/
|
2022-12-20 21:44:17 +03:00
|
|
|
rc = qio_channel_readv_full(ioc, &iov, 1, &fds, &nfds, 0, &local_err);
|
2020-09-18 11:09:08 +03:00
|
|
|
if (rc < 0) {
|
|
|
|
if (rc == QIO_CHANNEL_ERR_BLOCK) {
|
2020-09-24 18:15:43 +03:00
|
|
|
assert(local_err == NULL);
|
2023-08-31 01:48:02 +03:00
|
|
|
if (server->ctx) {
|
|
|
|
server->in_qio_channel_yield = true;
|
|
|
|
qio_channel_yield(ioc, G_IO_IN);
|
|
|
|
server->in_qio_channel_yield = false;
|
|
|
|
} else {
|
export/vhost-user-blk: Fix consecutive drains
The vhost-user-blk export implement AioContext switches in its drain
implementation. This means that on drain_begin, it detaches the server
from its AioContext and on drain_end, attaches it again and schedules
the server->co_trip coroutine in the updated AioContext.
However, nothing guarantees that server->co_trip is even safe to be
scheduled. Not only is it unclear that the coroutine is actually in a
state where it can be reentered externally without causing problems, but
with two consecutive drains, it is possible that the scheduled coroutine
didn't have a chance yet to run and trying to schedule an already
scheduled coroutine a second time crashes with an assertion failure.
Following the model of NBD, this commit makes the vhost-user-blk export
shut down server->co_trip during drain so that resuming the export means
creating and scheduling a new coroutine, which is always safe.
There is one exception: If the drain call didn't poll (for example, this
happens in the context of bdrv_graph_wrlock()), then the coroutine
didn't have a chance to shut down. However, in this case the AioContext
can't have changed; changing the AioContext always involves a polling
drain. So in this case we can simply assert that the AioContext is
unchanged and just leave the coroutine running or wake it up if it has
yielded to wait for the AioContext to be attached again.
Fixes: e1054cd4aad03a493a5d1cded7508f7c348205bf
Fixes: https://issues.redhat.com/browse/RHEL-1708
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Message-ID: <20231127115755.22846-1-kwolf@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2023-11-27 14:57:55 +03:00
|
|
|
return false;
|
2023-08-31 01:48:02 +03:00
|
|
|
}
|
2020-09-18 11:09:08 +03:00
|
|
|
continue;
|
|
|
|
} else {
|
|
|
|
error_report_err(local_err);
|
2020-09-24 18:15:43 +03:00
|
|
|
goto fail;
|
2020-09-18 11:09:08 +03:00
|
|
|
}
|
|
|
|
}
|
2020-09-24 18:15:43 +03:00
|
|
|
|
|
|
|
if (nfds > 0) {
|
|
|
|
if (vmsg->fd_num + nfds > max_fds) {
|
2020-09-18 11:09:08 +03:00
|
|
|
error_report("A maximum of %zu fds are allowed, "
|
|
|
|
"however got %zu fds now",
|
2020-09-24 18:15:43 +03:00
|
|
|
max_fds, vmsg->fd_num + nfds);
|
|
|
|
g_free(fds);
|
2020-09-18 11:09:08 +03:00
|
|
|
goto fail;
|
|
|
|
}
|
2020-09-24 18:15:43 +03:00
|
|
|
memcpy(vmsg->fds + vmsg->fd_num, fds, nfds * sizeof(vmsg->fds[0]));
|
|
|
|
vmsg->fd_num += nfds;
|
|
|
|
g_free(fds);
|
2020-09-18 11:09:08 +03:00
|
|
|
}
|
2020-09-24 18:15:43 +03:00
|
|
|
|
|
|
|
if (rc == 0) { /* socket closed */
|
|
|
|
goto fail;
|
2020-09-18 11:09:08 +03:00
|
|
|
}
|
|
|
|
|
2020-09-24 18:15:43 +03:00
|
|
|
iov.iov_base += rc;
|
|
|
|
iov.iov_len -= rc;
|
|
|
|
read_bytes += rc;
|
|
|
|
} while (read_bytes != VHOST_USER_HDR_SIZE);
|
|
|
|
|
2020-09-18 11:09:08 +03:00
|
|
|
/* qio_channel_readv_full will make socket fds blocking, unblock them */
|
|
|
|
vmsg_unblock_fds(vmsg);
|
|
|
|
if (vmsg->size > sizeof(vmsg->payload)) {
|
|
|
|
error_report("Error: too big message request: %d, "
|
|
|
|
"size: vmsg->size: %u, "
|
|
|
|
"while sizeof(vmsg->payload) = %zu",
|
|
|
|
vmsg->request, vmsg->size, sizeof(vmsg->payload));
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
|
|
|
struct iovec iov_payload = {
|
|
|
|
.iov_base = (char *)&vmsg->payload,
|
|
|
|
.iov_len = vmsg->size,
|
|
|
|
};
|
|
|
|
if (vmsg->size) {
|
|
|
|
rc = qio_channel_readv_all_eof(ioc, &iov_payload, 1, &local_err);
|
2020-09-24 18:15:44 +03:00
|
|
|
if (rc != 1) {
|
|
|
|
if (local_err) {
|
|
|
|
error_report_err(local_err);
|
|
|
|
}
|
2020-09-18 11:09:08 +03:00
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
|
|
|
fail:
|
|
|
|
vmsg_close_fds(vmsg);
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
static coroutine_fn void vu_client_trip(void *opaque)
|
|
|
|
{
|
|
|
|
VuServer *server = opaque;
|
2020-09-24 18:15:45 +03:00
|
|
|
VuDev *vu_dev = &server->vu_dev;
|
2020-09-18 11:09:08 +03:00
|
|
|
|
export/vhost-user-blk: Fix consecutive drains
The vhost-user-blk export implement AioContext switches in its drain
implementation. This means that on drain_begin, it detaches the server
from its AioContext and on drain_end, attaches it again and schedules
the server->co_trip coroutine in the updated AioContext.
However, nothing guarantees that server->co_trip is even safe to be
scheduled. Not only is it unclear that the coroutine is actually in a
state where it can be reentered externally without causing problems, but
with two consecutive drains, it is possible that the scheduled coroutine
didn't have a chance yet to run and trying to schedule an already
scheduled coroutine a second time crashes with an assertion failure.
Following the model of NBD, this commit makes the vhost-user-blk export
shut down server->co_trip during drain so that resuming the export means
creating and scheduling a new coroutine, which is always safe.
There is one exception: If the drain call didn't poll (for example, this
happens in the context of bdrv_graph_wrlock()), then the coroutine
didn't have a chance to shut down. However, in this case the AioContext
can't have changed; changing the AioContext always involves a polling
drain. So in this case we can simply assert that the AioContext is
unchanged and just leave the coroutine running or wake it up if it has
yielded to wait for the AioContext to be attached again.
Fixes: e1054cd4aad03a493a5d1cded7508f7c348205bf
Fixes: https://issues.redhat.com/browse/RHEL-1708
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Message-ID: <20231127115755.22846-1-kwolf@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2023-11-27 14:57:55 +03:00
|
|
|
while (!vu_dev->broken) {
|
|
|
|
if (server->quiescing) {
|
|
|
|
server->co_trip = NULL;
|
|
|
|
aio_wait_kick();
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
/* vu_dispatch() returns false if server->ctx went away */
|
|
|
|
if (!vu_dispatch(vu_dev) && server->ctx) {
|
|
|
|
break;
|
|
|
|
}
|
2020-09-18 11:09:08 +03:00
|
|
|
}
|
|
|
|
|
2023-05-16 22:02:24 +03:00
|
|
|
if (vhost_user_server_has_in_flight(server)) {
|
2022-01-25 18:14:35 +03:00
|
|
|
/* Wait for requests to complete before we can unmap the memory */
|
|
|
|
server->wait_idle = true;
|
|
|
|
qemu_coroutine_yield();
|
|
|
|
server->wait_idle = false;
|
|
|
|
}
|
2023-05-16 22:02:24 +03:00
|
|
|
assert(!vhost_user_server_has_in_flight(server));
|
2022-01-25 18:14:35 +03:00
|
|
|
|
2020-09-24 18:15:45 +03:00
|
|
|
vu_deinit(vu_dev);
|
2020-09-18 11:09:08 +03:00
|
|
|
|
2020-09-24 18:15:45 +03:00
|
|
|
/* vu_deinit() should have called remove_watch() */
|
|
|
|
assert(QTAILQ_EMPTY(&server->vu_fd_watches));
|
|
|
|
|
|
|
|
object_unref(OBJECT(server->sioc));
|
|
|
|
server->sioc = NULL;
|
|
|
|
|
|
|
|
object_unref(OBJECT(server->ioc));
|
|
|
|
server->ioc = NULL;
|
|
|
|
|
|
|
|
server->co_trip = NULL;
|
|
|
|
if (server->restart_listener_bh) {
|
|
|
|
qemu_bh_schedule(server->restart_listener_bh);
|
|
|
|
}
|
|
|
|
aio_wait_kick();
|
2020-09-18 11:09:08 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* a wrapper for vu_kick_cb
|
|
|
|
*
|
|
|
|
* since aio_dispatch can only pass one user data pointer to the
|
|
|
|
* callback function, pack VuDev and pvt into a struct. Then unpack it
|
|
|
|
* and pass them to vu_kick_cb
|
|
|
|
*/
|
|
|
|
static void kick_handler(void *opaque)
|
|
|
|
{
|
|
|
|
VuFdWatch *vu_fd_watch = opaque;
|
2020-09-24 18:15:45 +03:00
|
|
|
VuDev *vu_dev = vu_fd_watch->vu_dev;
|
2020-09-18 11:09:08 +03:00
|
|
|
|
2020-09-24 18:15:45 +03:00
|
|
|
vu_fd_watch->cb(vu_dev, 0, vu_fd_watch->pvt);
|
|
|
|
|
|
|
|
/* Stop vu_client_trip() if an error occurred in vu_fd_watch->cb() */
|
|
|
|
if (vu_dev->broken) {
|
|
|
|
VuServer *server = container_of(vu_dev, VuServer, vu_dev);
|
|
|
|
|
|
|
|
qio_channel_shutdown(server->ioc, QIO_CHANNEL_SHUTDOWN_BOTH, NULL);
|
|
|
|
}
|
|
|
|
}
|
2020-09-18 11:09:08 +03:00
|
|
|
|
|
|
|
static VuFdWatch *find_vu_fd_watch(VuServer *server, int fd)
|
|
|
|
{
|
|
|
|
|
|
|
|
VuFdWatch *vu_fd_watch, *next;
|
|
|
|
QTAILQ_FOREACH_SAFE(vu_fd_watch, &server->vu_fd_watches, next, next) {
|
|
|
|
if (vu_fd_watch->fd == fd) {
|
|
|
|
return vu_fd_watch;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
set_watch(VuDev *vu_dev, int fd, int vu_evt,
|
|
|
|
vu_watch_cb cb, void *pvt)
|
|
|
|
{
|
|
|
|
|
|
|
|
VuServer *server = container_of(vu_dev, VuServer, vu_dev);
|
|
|
|
g_assert(vu_dev);
|
|
|
|
g_assert(fd >= 0);
|
|
|
|
g_assert(cb);
|
|
|
|
|
|
|
|
VuFdWatch *vu_fd_watch = find_vu_fd_watch(server, fd);
|
|
|
|
|
|
|
|
if (!vu_fd_watch) {
|
2023-09-04 19:12:29 +03:00
|
|
|
vu_fd_watch = g_new0(VuFdWatch, 1);
|
2020-09-18 11:09:08 +03:00
|
|
|
|
|
|
|
QTAILQ_INSERT_TAIL(&server->vu_fd_watches, vu_fd_watch, next);
|
|
|
|
|
|
|
|
vu_fd_watch->fd = fd;
|
|
|
|
vu_fd_watch->cb = cb;
|
2022-04-25 16:33:47 +03:00
|
|
|
qemu_socket_set_nonblock(fd);
|
2023-08-31 01:48:02 +03:00
|
|
|
aio_set_fd_handler(server->ctx, fd, kick_handler,
|
aio-posix: split poll check from ready handler
Adaptive polling measures the execution time of the polling check plus
handlers called when a polled event becomes ready. Handlers can take a
significant amount of time, making it look like polling was running for
a long time when in fact the event handler was running for a long time.
For example, on Linux the io_submit(2) syscall invoked when a virtio-blk
device's virtqueue becomes ready can take 10s of microseconds. This
can exceed the default polling interval (32 microseconds) and cause
adaptive polling to stop polling.
By excluding the handler's execution time from the polling check we make
the adaptive polling calculation more accurate. As a result, the event
loop now stays in polling mode where previously it would have fallen
back to file descriptor monitoring.
The following data was collected with virtio-blk num-queues=2
event_idx=off using an IOThread. Before:
168k IOPS, IOThread syscalls:
9837.115 ( 0.020 ms): IO iothread1/620155 io_submit(ctx_id: 140512552468480, nr: 16, iocbpp: 0x7fcb9f937db0) = 16
9837.158 ( 0.002 ms): IO iothread1/620155 write(fd: 103, buf: 0x556a2ef71b88, count: 8) = 8
9837.161 ( 0.001 ms): IO iothread1/620155 write(fd: 104, buf: 0x556a2ef71b88, count: 8) = 8
9837.163 ( 0.001 ms): IO iothread1/620155 ppoll(ufds: 0x7fcb90002800, nfds: 4, tsp: 0x7fcb9f1342d0, sigsetsize: 8) = 3
9837.164 ( 0.001 ms): IO iothread1/620155 read(fd: 107, buf: 0x7fcb9f939cc0, count: 512) = 8
9837.174 ( 0.001 ms): IO iothread1/620155 read(fd: 105, buf: 0x7fcb9f939cc0, count: 512) = 8
9837.176 ( 0.001 ms): IO iothread1/620155 read(fd: 106, buf: 0x7fcb9f939cc0, count: 512) = 8
9837.209 ( 0.035 ms): IO iothread1/620155 io_submit(ctx_id: 140512552468480, nr: 32, iocbpp: 0x7fca7d0cebe0) = 32
174k IOPS (+3.6%), IOThread syscalls:
9809.566 ( 0.036 ms): IO iothread1/623061 io_submit(ctx_id: 140539805028352, nr: 32, iocbpp: 0x7fd0cdd62be0) = 32
9809.625 ( 0.001 ms): IO iothread1/623061 write(fd: 103, buf: 0x5647cfba5f58, count: 8) = 8
9809.627 ( 0.002 ms): IO iothread1/623061 write(fd: 104, buf: 0x5647cfba5f58, count: 8) = 8
9809.663 ( 0.036 ms): IO iothread1/623061 io_submit(ctx_id: 140539805028352, nr: 32, iocbpp: 0x7fd0d0388b50) = 32
Notice that ppoll(2) and eventfd read(2) syscalls are eliminated because
the IOThread stays in polling mode instead of falling back to file
descriptor monitoring.
As usual, polling is not implemented on Windows so this patch ignores
the new io_poll_read() callback in aio-win32.c.
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
Message-id: 20211207132336.36627-2-stefanha@redhat.com
[Fixed up aio_set_event_notifier() calls in
tests/unit/test-fdmon-epoll.c added after this series was queued.
--Stefan]
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2021-12-07 16:23:31 +03:00
|
|
|
NULL, NULL, NULL, vu_fd_watch);
|
2020-09-18 11:09:08 +03:00
|
|
|
vu_fd_watch->vu_dev = vu_dev;
|
|
|
|
vu_fd_watch->pvt = pvt;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static void remove_watch(VuDev *vu_dev, int fd)
|
|
|
|
{
|
|
|
|
VuServer *server;
|
|
|
|
g_assert(vu_dev);
|
|
|
|
g_assert(fd >= 0);
|
|
|
|
|
|
|
|
server = container_of(vu_dev, VuServer, vu_dev);
|
|
|
|
|
|
|
|
VuFdWatch *vu_fd_watch = find_vu_fd_watch(server, fd);
|
|
|
|
|
|
|
|
if (!vu_fd_watch) {
|
|
|
|
return;
|
|
|
|
}
|
2023-08-31 01:48:02 +03:00
|
|
|
aio_set_fd_handler(server->ctx, fd, NULL, NULL, NULL, NULL, NULL);
|
2020-09-18 11:09:08 +03:00
|
|
|
|
|
|
|
QTAILQ_REMOVE(&server->vu_fd_watches, vu_fd_watch, next);
|
|
|
|
g_free(vu_fd_watch);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static void vu_accept(QIONetListener *listener, QIOChannelSocket *sioc,
|
|
|
|
gpointer opaque)
|
|
|
|
{
|
|
|
|
VuServer *server = opaque;
|
|
|
|
|
|
|
|
if (server->sioc) {
|
|
|
|
warn_report("Only one vhost-user client is allowed to "
|
|
|
|
"connect the server one time");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!vu_init(&server->vu_dev, server->max_queues, sioc->fd, panic_cb,
|
|
|
|
vu_message_read, set_watch, remove_watch, server->vu_iface)) {
|
|
|
|
error_report("Failed to initialize libvhost-user");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Unset the callback function for network listener to make another
|
|
|
|
* vhost-user client keeping waiting until this client disconnects
|
|
|
|
*/
|
|
|
|
qio_net_listener_set_client_func(server->listener,
|
|
|
|
NULL,
|
|
|
|
NULL,
|
|
|
|
NULL);
|
|
|
|
server->sioc = sioc;
|
|
|
|
/*
|
|
|
|
* Increase the object reference, so sioc will not freed by
|
|
|
|
* qio_net_listener_channel_func which will call object_unref(OBJECT(sioc))
|
|
|
|
*/
|
|
|
|
object_ref(OBJECT(server->sioc));
|
|
|
|
qio_channel_set_name(QIO_CHANNEL(sioc), "vhost-user client");
|
|
|
|
server->ioc = QIO_CHANNEL(sioc);
|
|
|
|
object_ref(OBJECT(server->ioc));
|
2020-09-24 18:15:45 +03:00
|
|
|
|
|
|
|
/* TODO vu_message_write() spins if non-blocking! */
|
2020-09-24 18:15:39 +03:00
|
|
|
qio_channel_set_blocking(server->ioc, false, NULL);
|
2020-09-18 11:09:08 +03:00
|
|
|
|
2023-08-31 01:48:02 +03:00
|
|
|
qio_channel_set_follow_coroutine_ctx(server->ioc, true);
|
|
|
|
|
export/vhost-user-blk: Fix consecutive drains
The vhost-user-blk export implement AioContext switches in its drain
implementation. This means that on drain_begin, it detaches the server
from its AioContext and on drain_end, attaches it again and schedules
the server->co_trip coroutine in the updated AioContext.
However, nothing guarantees that server->co_trip is even safe to be
scheduled. Not only is it unclear that the coroutine is actually in a
state where it can be reentered externally without causing problems, but
with two consecutive drains, it is possible that the scheduled coroutine
didn't have a chance yet to run and trying to schedule an already
scheduled coroutine a second time crashes with an assertion failure.
Following the model of NBD, this commit makes the vhost-user-blk export
shut down server->co_trip during drain so that resuming the export means
creating and scheduling a new coroutine, which is always safe.
There is one exception: If the drain call didn't poll (for example, this
happens in the context of bdrv_graph_wrlock()), then the coroutine
didn't have a chance to shut down. However, in this case the AioContext
can't have changed; changing the AioContext always involves a polling
drain. So in this case we can simply assert that the AioContext is
unchanged and just leave the coroutine running or wake it up if it has
yielded to wait for the AioContext to be attached again.
Fixes: e1054cd4aad03a493a5d1cded7508f7c348205bf
Fixes: https://issues.redhat.com/browse/RHEL-1708
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Message-ID: <20231127115755.22846-1-kwolf@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2023-11-27 14:57:55 +03:00
|
|
|
/* Attaching the AioContext starts the vu_client_trip coroutine */
|
2020-09-24 18:15:45 +03:00
|
|
|
aio_context_acquire(server->ctx);
|
|
|
|
vhost_user_server_attach_aio_context(server, server->ctx);
|
|
|
|
aio_context_release(server->ctx);
|
|
|
|
}
|
2020-09-18 11:09:08 +03:00
|
|
|
|
2023-03-23 17:58:53 +03:00
|
|
|
/* server->ctx acquired by caller */
|
2020-09-18 11:09:08 +03:00
|
|
|
void vhost_user_server_stop(VuServer *server)
|
|
|
|
{
|
2020-09-24 18:15:45 +03:00
|
|
|
qemu_bh_delete(server->restart_listener_bh);
|
|
|
|
server->restart_listener_bh = NULL;
|
|
|
|
|
2020-09-18 11:09:08 +03:00
|
|
|
if (server->sioc) {
|
2020-09-24 18:15:45 +03:00
|
|
|
VuFdWatch *vu_fd_watch;
|
|
|
|
|
|
|
|
QTAILQ_FOREACH(vu_fd_watch, &server->vu_fd_watches, next) {
|
aio: remove aio_disable_external() API
All callers now pass is_external=false to aio_set_fd_handler() and
aio_set_event_notifier(). The aio_disable_external() API that
temporarily disables fd handlers that were registered is_external=true
is therefore dead code.
Remove aio_disable_external(), aio_enable_external(), and the
is_external arguments to aio_set_fd_handler() and
aio_set_event_notifier().
The entire test-fdmon-epoll test is removed because its sole purpose was
testing aio_disable_external().
Parts of this patch were generated using the following coccinelle
(https://coccinelle.lip6.fr/) semantic patch:
@@
expression ctx, fd, is_external, io_read, io_write, io_poll, io_poll_ready, opaque;
@@
- aio_set_fd_handler(ctx, fd, is_external, io_read, io_write, io_poll, io_poll_ready, opaque)
+ aio_set_fd_handler(ctx, fd, io_read, io_write, io_poll, io_poll_ready, opaque)
@@
expression ctx, notifier, is_external, io_read, io_poll, io_poll_ready;
@@
- aio_set_event_notifier(ctx, notifier, is_external, io_read, io_poll, io_poll_ready)
+ aio_set_event_notifier(ctx, notifier, io_read, io_poll, io_poll_ready)
Reviewed-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-Id: <20230516190238.8401-21-stefanha@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2023-05-16 22:02:38 +03:00
|
|
|
aio_set_fd_handler(server->ctx, vu_fd_watch->fd,
|
aio-posix: split poll check from ready handler
Adaptive polling measures the execution time of the polling check plus
handlers called when a polled event becomes ready. Handlers can take a
significant amount of time, making it look like polling was running for
a long time when in fact the event handler was running for a long time.
For example, on Linux the io_submit(2) syscall invoked when a virtio-blk
device's virtqueue becomes ready can take 10s of microseconds. This
can exceed the default polling interval (32 microseconds) and cause
adaptive polling to stop polling.
By excluding the handler's execution time from the polling check we make
the adaptive polling calculation more accurate. As a result, the event
loop now stays in polling mode where previously it would have fallen
back to file descriptor monitoring.
The following data was collected with virtio-blk num-queues=2
event_idx=off using an IOThread. Before:
168k IOPS, IOThread syscalls:
9837.115 ( 0.020 ms): IO iothread1/620155 io_submit(ctx_id: 140512552468480, nr: 16, iocbpp: 0x7fcb9f937db0) = 16
9837.158 ( 0.002 ms): IO iothread1/620155 write(fd: 103, buf: 0x556a2ef71b88, count: 8) = 8
9837.161 ( 0.001 ms): IO iothread1/620155 write(fd: 104, buf: 0x556a2ef71b88, count: 8) = 8
9837.163 ( 0.001 ms): IO iothread1/620155 ppoll(ufds: 0x7fcb90002800, nfds: 4, tsp: 0x7fcb9f1342d0, sigsetsize: 8) = 3
9837.164 ( 0.001 ms): IO iothread1/620155 read(fd: 107, buf: 0x7fcb9f939cc0, count: 512) = 8
9837.174 ( 0.001 ms): IO iothread1/620155 read(fd: 105, buf: 0x7fcb9f939cc0, count: 512) = 8
9837.176 ( 0.001 ms): IO iothread1/620155 read(fd: 106, buf: 0x7fcb9f939cc0, count: 512) = 8
9837.209 ( 0.035 ms): IO iothread1/620155 io_submit(ctx_id: 140512552468480, nr: 32, iocbpp: 0x7fca7d0cebe0) = 32
174k IOPS (+3.6%), IOThread syscalls:
9809.566 ( 0.036 ms): IO iothread1/623061 io_submit(ctx_id: 140539805028352, nr: 32, iocbpp: 0x7fd0cdd62be0) = 32
9809.625 ( 0.001 ms): IO iothread1/623061 write(fd: 103, buf: 0x5647cfba5f58, count: 8) = 8
9809.627 ( 0.002 ms): IO iothread1/623061 write(fd: 104, buf: 0x5647cfba5f58, count: 8) = 8
9809.663 ( 0.036 ms): IO iothread1/623061 io_submit(ctx_id: 140539805028352, nr: 32, iocbpp: 0x7fd0d0388b50) = 32
Notice that ppoll(2) and eventfd read(2) syscalls are eliminated because
the IOThread stays in polling mode instead of falling back to file
descriptor monitoring.
As usual, polling is not implemented on Windows so this patch ignores
the new io_poll_read() callback in aio-win32.c.
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
Message-id: 20211207132336.36627-2-stefanha@redhat.com
[Fixed up aio_set_event_notifier() calls in
tests/unit/test-fdmon-epoll.c added after this series was queued.
--Stefan]
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2021-12-07 16:23:31 +03:00
|
|
|
NULL, NULL, NULL, NULL, vu_fd_watch);
|
2020-09-24 18:15:45 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
qio_channel_shutdown(server->ioc, QIO_CHANNEL_SHUTDOWN_BOTH, NULL);
|
|
|
|
|
|
|
|
AIO_WAIT_WHILE(server->ctx, server->co_trip);
|
2020-09-18 11:09:08 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
if (server->listener) {
|
|
|
|
qio_net_listener_disconnect(server->listener);
|
|
|
|
object_unref(OBJECT(server->listener));
|
|
|
|
}
|
2020-09-24 18:15:45 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Allow the next client to connect to the server. Called from a BH in the main
|
|
|
|
* loop.
|
|
|
|
*/
|
|
|
|
static void restart_listener_bh(void *opaque)
|
|
|
|
{
|
|
|
|
VuServer *server = opaque;
|
2020-09-18 11:09:08 +03:00
|
|
|
|
2020-09-24 18:15:45 +03:00
|
|
|
qio_net_listener_set_client_func(server->listener, vu_accept, server,
|
|
|
|
NULL);
|
2020-09-18 11:09:08 +03:00
|
|
|
}
|
|
|
|
|
2020-09-24 18:15:45 +03:00
|
|
|
/* Called with ctx acquired */
|
|
|
|
void vhost_user_server_attach_aio_context(VuServer *server, AioContext *ctx)
|
2020-09-18 11:09:08 +03:00
|
|
|
{
|
2020-09-24 18:15:45 +03:00
|
|
|
VuFdWatch *vu_fd_watch;
|
2020-09-18 11:09:08 +03:00
|
|
|
|
2020-09-24 18:15:45 +03:00
|
|
|
server->ctx = ctx;
|
2020-09-18 11:09:08 +03:00
|
|
|
|
|
|
|
if (!server->sioc) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2020-09-24 18:15:45 +03:00
|
|
|
QTAILQ_FOREACH(vu_fd_watch, &server->vu_fd_watches, next) {
|
aio: remove aio_disable_external() API
All callers now pass is_external=false to aio_set_fd_handler() and
aio_set_event_notifier(). The aio_disable_external() API that
temporarily disables fd handlers that were registered is_external=true
is therefore dead code.
Remove aio_disable_external(), aio_enable_external(), and the
is_external arguments to aio_set_fd_handler() and
aio_set_event_notifier().
The entire test-fdmon-epoll test is removed because its sole purpose was
testing aio_disable_external().
Parts of this patch were generated using the following coccinelle
(https://coccinelle.lip6.fr/) semantic patch:
@@
expression ctx, fd, is_external, io_read, io_write, io_poll, io_poll_ready, opaque;
@@
- aio_set_fd_handler(ctx, fd, is_external, io_read, io_write, io_poll, io_poll_ready, opaque)
+ aio_set_fd_handler(ctx, fd, io_read, io_write, io_poll, io_poll_ready, opaque)
@@
expression ctx, notifier, is_external, io_read, io_poll, io_poll_ready;
@@
- aio_set_event_notifier(ctx, notifier, is_external, io_read, io_poll, io_poll_ready)
+ aio_set_event_notifier(ctx, notifier, io_read, io_poll, io_poll_ready)
Reviewed-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-Id: <20230516190238.8401-21-stefanha@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2023-05-16 22:02:38 +03:00
|
|
|
aio_set_fd_handler(ctx, vu_fd_watch->fd, kick_handler, NULL,
|
aio-posix: split poll check from ready handler
Adaptive polling measures the execution time of the polling check plus
handlers called when a polled event becomes ready. Handlers can take a
significant amount of time, making it look like polling was running for
a long time when in fact the event handler was running for a long time.
For example, on Linux the io_submit(2) syscall invoked when a virtio-blk
device's virtqueue becomes ready can take 10s of microseconds. This
can exceed the default polling interval (32 microseconds) and cause
adaptive polling to stop polling.
By excluding the handler's execution time from the polling check we make
the adaptive polling calculation more accurate. As a result, the event
loop now stays in polling mode where previously it would have fallen
back to file descriptor monitoring.
The following data was collected with virtio-blk num-queues=2
event_idx=off using an IOThread. Before:
168k IOPS, IOThread syscalls:
9837.115 ( 0.020 ms): IO iothread1/620155 io_submit(ctx_id: 140512552468480, nr: 16, iocbpp: 0x7fcb9f937db0) = 16
9837.158 ( 0.002 ms): IO iothread1/620155 write(fd: 103, buf: 0x556a2ef71b88, count: 8) = 8
9837.161 ( 0.001 ms): IO iothread1/620155 write(fd: 104, buf: 0x556a2ef71b88, count: 8) = 8
9837.163 ( 0.001 ms): IO iothread1/620155 ppoll(ufds: 0x7fcb90002800, nfds: 4, tsp: 0x7fcb9f1342d0, sigsetsize: 8) = 3
9837.164 ( 0.001 ms): IO iothread1/620155 read(fd: 107, buf: 0x7fcb9f939cc0, count: 512) = 8
9837.174 ( 0.001 ms): IO iothread1/620155 read(fd: 105, buf: 0x7fcb9f939cc0, count: 512) = 8
9837.176 ( 0.001 ms): IO iothread1/620155 read(fd: 106, buf: 0x7fcb9f939cc0, count: 512) = 8
9837.209 ( 0.035 ms): IO iothread1/620155 io_submit(ctx_id: 140512552468480, nr: 32, iocbpp: 0x7fca7d0cebe0) = 32
174k IOPS (+3.6%), IOThread syscalls:
9809.566 ( 0.036 ms): IO iothread1/623061 io_submit(ctx_id: 140539805028352, nr: 32, iocbpp: 0x7fd0cdd62be0) = 32
9809.625 ( 0.001 ms): IO iothread1/623061 write(fd: 103, buf: 0x5647cfba5f58, count: 8) = 8
9809.627 ( 0.002 ms): IO iothread1/623061 write(fd: 104, buf: 0x5647cfba5f58, count: 8) = 8
9809.663 ( 0.036 ms): IO iothread1/623061 io_submit(ctx_id: 140539805028352, nr: 32, iocbpp: 0x7fd0d0388b50) = 32
Notice that ppoll(2) and eventfd read(2) syscalls are eliminated because
the IOThread stays in polling mode instead of falling back to file
descriptor monitoring.
As usual, polling is not implemented on Windows so this patch ignores
the new io_poll_read() callback in aio-win32.c.
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
Message-id: 20211207132336.36627-2-stefanha@redhat.com
[Fixed up aio_set_event_notifier() calls in
tests/unit/test-fdmon-epoll.c added after this series was queued.
--Stefan]
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2021-12-07 16:23:31 +03:00
|
|
|
NULL, NULL, vu_fd_watch);
|
2020-09-18 11:09:08 +03:00
|
|
|
}
|
|
|
|
|
export/vhost-user-blk: Fix consecutive drains
The vhost-user-blk export implement AioContext switches in its drain
implementation. This means that on drain_begin, it detaches the server
from its AioContext and on drain_end, attaches it again and schedules
the server->co_trip coroutine in the updated AioContext.
However, nothing guarantees that server->co_trip is even safe to be
scheduled. Not only is it unclear that the coroutine is actually in a
state where it can be reentered externally without causing problems, but
with two consecutive drains, it is possible that the scheduled coroutine
didn't have a chance yet to run and trying to schedule an already
scheduled coroutine a second time crashes with an assertion failure.
Following the model of NBD, this commit makes the vhost-user-blk export
shut down server->co_trip during drain so that resuming the export means
creating and scheduling a new coroutine, which is always safe.
There is one exception: If the drain call didn't poll (for example, this
happens in the context of bdrv_graph_wrlock()), then the coroutine
didn't have a chance to shut down. However, in this case the AioContext
can't have changed; changing the AioContext always involves a polling
drain. So in this case we can simply assert that the AioContext is
unchanged and just leave the coroutine running or wake it up if it has
yielded to wait for the AioContext to be attached again.
Fixes: e1054cd4aad03a493a5d1cded7508f7c348205bf
Fixes: https://issues.redhat.com/browse/RHEL-1708
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Message-ID: <20231127115755.22846-1-kwolf@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2023-11-27 14:57:55 +03:00
|
|
|
if (server->co_trip) {
|
|
|
|
/*
|
|
|
|
* The caller didn't fully shut down co_trip (this can happen on
|
|
|
|
* non-polling drains like in bdrv_graph_wrlock()). This is okay as long
|
|
|
|
* as it no longer tries to shut it down and we're guaranteed to still
|
|
|
|
* be in the same AioContext as before.
|
|
|
|
*
|
|
|
|
* co_ctx can still be NULL if we get multiple calls and only just
|
|
|
|
* scheduled a new coroutine in the else branch.
|
|
|
|
*/
|
|
|
|
AioContext *co_ctx = qemu_coroutine_get_aio_context(server->co_trip);
|
|
|
|
|
|
|
|
assert(!server->quiescing);
|
|
|
|
assert(!co_ctx || co_ctx == ctx);
|
|
|
|
} else {
|
|
|
|
server->co_trip = qemu_coroutine_create(vu_client_trip, server);
|
|
|
|
assert(!server->in_qio_channel_yield);
|
|
|
|
aio_co_schedule(ctx, server->co_trip);
|
|
|
|
}
|
2020-09-24 18:15:45 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Called with server->ctx acquired */
|
|
|
|
void vhost_user_server_detach_aio_context(VuServer *server)
|
|
|
|
{
|
|
|
|
if (server->sioc) {
|
|
|
|
VuFdWatch *vu_fd_watch;
|
|
|
|
|
|
|
|
QTAILQ_FOREACH(vu_fd_watch, &server->vu_fd_watches, next) {
|
aio: remove aio_disable_external() API
All callers now pass is_external=false to aio_set_fd_handler() and
aio_set_event_notifier(). The aio_disable_external() API that
temporarily disables fd handlers that were registered is_external=true
is therefore dead code.
Remove aio_disable_external(), aio_enable_external(), and the
is_external arguments to aio_set_fd_handler() and
aio_set_event_notifier().
The entire test-fdmon-epoll test is removed because its sole purpose was
testing aio_disable_external().
Parts of this patch were generated using the following coccinelle
(https://coccinelle.lip6.fr/) semantic patch:
@@
expression ctx, fd, is_external, io_read, io_write, io_poll, io_poll_ready, opaque;
@@
- aio_set_fd_handler(ctx, fd, is_external, io_read, io_write, io_poll, io_poll_ready, opaque)
+ aio_set_fd_handler(ctx, fd, io_read, io_write, io_poll, io_poll_ready, opaque)
@@
expression ctx, notifier, is_external, io_read, io_poll, io_poll_ready;
@@
- aio_set_event_notifier(ctx, notifier, is_external, io_read, io_poll, io_poll_ready)
+ aio_set_event_notifier(ctx, notifier, io_read, io_poll, io_poll_ready)
Reviewed-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-Id: <20230516190238.8401-21-stefanha@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2023-05-16 22:02:38 +03:00
|
|
|
aio_set_fd_handler(server->ctx, vu_fd_watch->fd,
|
aio-posix: split poll check from ready handler
Adaptive polling measures the execution time of the polling check plus
handlers called when a polled event becomes ready. Handlers can take a
significant amount of time, making it look like polling was running for
a long time when in fact the event handler was running for a long time.
For example, on Linux the io_submit(2) syscall invoked when a virtio-blk
device's virtqueue becomes ready can take 10s of microseconds. This
can exceed the default polling interval (32 microseconds) and cause
adaptive polling to stop polling.
By excluding the handler's execution time from the polling check we make
the adaptive polling calculation more accurate. As a result, the event
loop now stays in polling mode where previously it would have fallen
back to file descriptor monitoring.
The following data was collected with virtio-blk num-queues=2
event_idx=off using an IOThread. Before:
168k IOPS, IOThread syscalls:
9837.115 ( 0.020 ms): IO iothread1/620155 io_submit(ctx_id: 140512552468480, nr: 16, iocbpp: 0x7fcb9f937db0) = 16
9837.158 ( 0.002 ms): IO iothread1/620155 write(fd: 103, buf: 0x556a2ef71b88, count: 8) = 8
9837.161 ( 0.001 ms): IO iothread1/620155 write(fd: 104, buf: 0x556a2ef71b88, count: 8) = 8
9837.163 ( 0.001 ms): IO iothread1/620155 ppoll(ufds: 0x7fcb90002800, nfds: 4, tsp: 0x7fcb9f1342d0, sigsetsize: 8) = 3
9837.164 ( 0.001 ms): IO iothread1/620155 read(fd: 107, buf: 0x7fcb9f939cc0, count: 512) = 8
9837.174 ( 0.001 ms): IO iothread1/620155 read(fd: 105, buf: 0x7fcb9f939cc0, count: 512) = 8
9837.176 ( 0.001 ms): IO iothread1/620155 read(fd: 106, buf: 0x7fcb9f939cc0, count: 512) = 8
9837.209 ( 0.035 ms): IO iothread1/620155 io_submit(ctx_id: 140512552468480, nr: 32, iocbpp: 0x7fca7d0cebe0) = 32
174k IOPS (+3.6%), IOThread syscalls:
9809.566 ( 0.036 ms): IO iothread1/623061 io_submit(ctx_id: 140539805028352, nr: 32, iocbpp: 0x7fd0cdd62be0) = 32
9809.625 ( 0.001 ms): IO iothread1/623061 write(fd: 103, buf: 0x5647cfba5f58, count: 8) = 8
9809.627 ( 0.002 ms): IO iothread1/623061 write(fd: 104, buf: 0x5647cfba5f58, count: 8) = 8
9809.663 ( 0.036 ms): IO iothread1/623061 io_submit(ctx_id: 140539805028352, nr: 32, iocbpp: 0x7fd0d0388b50) = 32
Notice that ppoll(2) and eventfd read(2) syscalls are eliminated because
the IOThread stays in polling mode instead of falling back to file
descriptor monitoring.
As usual, polling is not implemented on Windows so this patch ignores
the new io_poll_read() callback in aio-win32.c.
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
Message-id: 20211207132336.36627-2-stefanha@redhat.com
[Fixed up aio_set_event_notifier() calls in
tests/unit/test-fdmon-epoll.c added after this series was queued.
--Stefan]
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2021-12-07 16:23:31 +03:00
|
|
|
NULL, NULL, NULL, NULL, vu_fd_watch);
|
2020-09-18 11:09:08 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-09-24 18:15:45 +03:00
|
|
|
server->ctx = NULL;
|
2023-08-31 01:48:02 +03:00
|
|
|
|
|
|
|
if (server->ioc) {
|
|
|
|
if (server->in_qio_channel_yield) {
|
|
|
|
/* Stop receiving the next vhost-user message */
|
|
|
|
qio_channel_wake_read(server->ioc);
|
|
|
|
}
|
|
|
|
}
|
2020-09-24 18:15:45 +03:00
|
|
|
}
|
2020-09-18 11:09:08 +03:00
|
|
|
|
|
|
|
bool vhost_user_server_start(VuServer *server,
|
|
|
|
SocketAddress *socket_addr,
|
|
|
|
AioContext *ctx,
|
|
|
|
uint16_t max_queues,
|
|
|
|
const VuDevIface *vu_iface,
|
|
|
|
Error **errp)
|
|
|
|
{
|
2020-09-24 18:15:45 +03:00
|
|
|
QEMUBH *bh;
|
2020-09-24 18:15:47 +03:00
|
|
|
QIONetListener *listener;
|
|
|
|
|
|
|
|
if (socket_addr->type != SOCKET_ADDRESS_TYPE_UNIX &&
|
|
|
|
socket_addr->type != SOCKET_ADDRESS_TYPE_FD) {
|
|
|
|
error_setg(errp, "Only socket address types 'unix' and 'fd' are supported");
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
listener = qio_net_listener_new();
|
2020-09-18 11:09:08 +03:00
|
|
|
if (qio_net_listener_open_sync(listener, socket_addr, 1,
|
|
|
|
errp) < 0) {
|
|
|
|
object_unref(OBJECT(listener));
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2020-09-24 18:15:45 +03:00
|
|
|
bh = qemu_bh_new(restart_listener_bh, server);
|
|
|
|
|
2020-09-24 18:15:38 +03:00
|
|
|
/* zero out unspecified fields */
|
2020-09-18 11:09:08 +03:00
|
|
|
*server = (VuServer) {
|
|
|
|
.listener = listener,
|
2020-09-24 18:15:45 +03:00
|
|
|
.restart_listener_bh = bh,
|
2020-09-18 11:09:08 +03:00
|
|
|
.vu_iface = vu_iface,
|
|
|
|
.max_queues = max_queues,
|
|
|
|
.ctx = ctx,
|
|
|
|
};
|
|
|
|
|
|
|
|
qio_net_listener_set_name(server->listener, "vhost-user-backend-listener");
|
|
|
|
|
|
|
|
qio_net_listener_set_client_func(server->listener,
|
|
|
|
vu_accept,
|
|
|
|
server,
|
|
|
|
NULL);
|
|
|
|
|
|
|
|
QTAILQ_INIT(&server->vu_fd_watches);
|
|
|
|
return true;
|
|
|
|
}
|