From 7b223e38603de3a75602e14914d26f9d4baf52eb Mon Sep 17 00:00:00 2001 From: Christian Ehrhardt Date: Wed, 9 Feb 2022 12:14:56 +0100 Subject: [PATCH 1/3] tools/virtiofsd: Add rseq syscall to the seccomp allowlist The virtiofsd currently crashes when used with glibc 2.35. That is due to the rseq system call being added to every thread creation [1][2]. [1]: https://www.efficios.com/blog/2019/02/08/linux-restartable-sequences/ [2]: https://sourceware.org/pipermail/libc-alpha/2022-February/136040.html This happens not at daemon start, but when a guest connects /usr/lib/qemu/virtiofsd -f --socket-path=/tmp/testvfsd -o sandbox=chroot \ -o source=/var/guests/j-virtiofs --socket-group=kvm virtio_session_mount: Waiting for vhost-user socket connection... # start ok, now guest will connect virtio_session_mount: Received vhost-user socket connection virtio_loop: Entry fv_queue_set_started: qidx=0 started=1 fv_queue_set_started: qidx=1 started=1 Bad system call (core dumped) We have to put rseq on the seccomp allowlist to avoid that the daemon is crashing in this case. Reported-by: Michael Hudson-Doyle Signed-off-by: Christian Ehrhardt Reviewed-by: Dr. David Alan Gilbert Message-id: 20220209111456.3328420-1-christian.ehrhardt@canonical.com [Moved rseq to its alphabetically ordered position in the seccomp allowlist. --Stefan] Signed-off-by: Stefan Hajnoczi --- tools/virtiofsd/passthrough_seccomp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/virtiofsd/passthrough_seccomp.c b/tools/virtiofsd/passthrough_seccomp.c index a3ce9f898d..2bc0127b69 100644 --- a/tools/virtiofsd/passthrough_seccomp.c +++ b/tools/virtiofsd/passthrough_seccomp.c @@ -91,6 +91,9 @@ static const int syscall_allowlist[] = { SCMP_SYS(renameat2), SCMP_SYS(removexattr), SCMP_SYS(restart_syscall), +#ifdef __NR_rseq + SCMP_SYS(rseq), /* required since glibc 2.35 */ +#endif SCMP_SYS(rt_sigaction), SCMP_SYS(rt_sigprocmask), SCMP_SYS(rt_sigreturn), From 34deee7b6a1418f3d62a91ff0a9d156e60a788a5 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Thu, 10 Feb 2022 17:47:14 +0000 Subject: [PATCH 2/3] Deprecate C virtiofsd MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There's a nice new Rust implementation out there; recommend people do new work on that. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Daniel P. Berrangé Message-id: 20220210174714.19843-1-dgilbert@redhat.com Signed-off-by: Stefan Hajnoczi --- docs/about/deprecated.rst | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/docs/about/deprecated.rst b/docs/about/deprecated.rst index 25b1fb8677..26d00812ba 100644 --- a/docs/about/deprecated.rst +++ b/docs/about/deprecated.rst @@ -447,3 +447,20 @@ nanoMIPS ISA The ``nanoMIPS`` ISA has never been upstreamed to any compiler toolchain. As it is hard to generate binaries for it, declare it deprecated. + +Tools +----- + +virtiofsd +''''''''' + +There is a new Rust implementation of ``virtiofsd`` at +``https://gitlab.com/virtio-fs/virtiofsd``; +since this is now marked stable, new development should be done on that +rather than the existing C version in the QEMU tree. +The C version will still accept fixes and patches that +are already in development for the moment, but will eventually +be deleted from this tree. +New deployments should use the Rust version, and existing systems +should consider moving to it. The command line and feature set +is very close and moving should be simple. From 4c41c69e05fe28c0f95f8abd2ebf407e95a4f04b Mon Sep 17 00:00:00 2001 From: Hiroki Narukawa Date: Mon, 14 Feb 2022 20:53:02 +0900 Subject: [PATCH 3/3] util: adjust coroutine pool size to virtio block queue Coroutine pool size was 64 from long ago, and the basis was organized in the commit message in 4d68e86b. At that time, virtio-blk queue-size and num-queue were not configuable, and equivalent values were 128 and 1. Coroutine pool size 64 was fine then. Later queue-size and num-queue got configuable, and default values were increased. Coroutine pool with size 64 exhausts frequently with random disk IO in new size, and slows down. This commit adjusts coroutine pool size adaptively with new values. This commit adds 64 by default, but now coroutine is not only for block devices, and is not too much burdon comparing with new default. pool size of 128 * vCPUs. Signed-off-by: Hiroki Narukawa Message-id: 20220214115302.13294-2-hnarukaw@yahoo-corp.jp Signed-off-by: Stefan Hajnoczi --- hw/block/virtio-blk.c | 5 +++++ include/qemu/coroutine.h | 10 ++++++++++ util/qemu-coroutine.c | 20 ++++++++++++++++---- 3 files changed, 31 insertions(+), 4 deletions(-) diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index 82676cdd01..540c38f829 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -32,6 +32,7 @@ #include "hw/virtio/virtio-bus.h" #include "migration/qemu-file-types.h" #include "hw/virtio/virtio-access.h" +#include "qemu/coroutine.h" /* Config size before the discard support (hide associated config fields) */ #define VIRTIO_BLK_CFG_SIZE offsetof(struct virtio_blk_config, \ @@ -1214,6 +1215,8 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp) for (i = 0; i < conf->num_queues; i++) { virtio_add_queue(vdev, conf->queue_size, virtio_blk_handle_output); } + qemu_coroutine_increase_pool_batch_size(conf->num_queues * conf->queue_size + / 2); virtio_blk_data_plane_create(vdev, conf, &s->dataplane, &err); if (err != NULL) { error_propagate(errp, err); @@ -1250,6 +1253,8 @@ static void virtio_blk_device_unrealize(DeviceState *dev) for (i = 0; i < conf->num_queues; i++) { virtio_del_queue(vdev, i); } + qemu_coroutine_decrease_pool_batch_size(conf->num_queues * conf->queue_size + / 2); qemu_del_vm_change_state_handler(s->change); blockdev_mark_auto_del(s->blk); virtio_cleanup(vdev); diff --git a/include/qemu/coroutine.h b/include/qemu/coroutine.h index 4829ff373d..c828a95ee0 100644 --- a/include/qemu/coroutine.h +++ b/include/qemu/coroutine.h @@ -331,6 +331,16 @@ void qemu_co_sleep_wake(QemuCoSleep *w); */ void coroutine_fn yield_until_fd_readable(int fd); +/** + * Increase coroutine pool size + */ +void qemu_coroutine_increase_pool_batch_size(unsigned int additional_pool_size); + +/** + * Devcrease coroutine pool size + */ +void qemu_coroutine_decrease_pool_batch_size(unsigned int additional_pool_size); + #include "qemu/lockable.h" #endif /* QEMU_COROUTINE_H */ diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c index 38fb6d3084..c03b2422ff 100644 --- a/util/qemu-coroutine.c +++ b/util/qemu-coroutine.c @@ -20,12 +20,14 @@ #include "qemu/coroutine_int.h" #include "block/aio.h" +/** Initial batch size is 64, and is increased on demand */ enum { - POOL_BATCH_SIZE = 64, + POOL_INITIAL_BATCH_SIZE = 64, }; /** Free list to speed up creation */ static QSLIST_HEAD(, Coroutine) release_pool = QSLIST_HEAD_INITIALIZER(pool); +static unsigned int pool_batch_size = POOL_INITIAL_BATCH_SIZE; static unsigned int release_pool_size; static __thread QSLIST_HEAD(, Coroutine) alloc_pool = QSLIST_HEAD_INITIALIZER(pool); static __thread unsigned int alloc_pool_size; @@ -49,7 +51,7 @@ Coroutine *qemu_coroutine_create(CoroutineEntry *entry, void *opaque) if (CONFIG_COROUTINE_POOL) { co = QSLIST_FIRST(&alloc_pool); if (!co) { - if (release_pool_size > POOL_BATCH_SIZE) { + if (release_pool_size > qatomic_read(&pool_batch_size)) { /* Slow path; a good place to register the destructor, too. */ if (!coroutine_pool_cleanup_notifier.notify) { coroutine_pool_cleanup_notifier.notify = coroutine_pool_cleanup; @@ -86,12 +88,12 @@ static void coroutine_delete(Coroutine *co) co->caller = NULL; if (CONFIG_COROUTINE_POOL) { - if (release_pool_size < POOL_BATCH_SIZE * 2) { + if (release_pool_size < qatomic_read(&pool_batch_size) * 2) { QSLIST_INSERT_HEAD_ATOMIC(&release_pool, co, pool_next); qatomic_inc(&release_pool_size); return; } - if (alloc_pool_size < POOL_BATCH_SIZE) { + if (alloc_pool_size < qatomic_read(&pool_batch_size)) { QSLIST_INSERT_HEAD(&alloc_pool, co, pool_next); alloc_pool_size++; return; @@ -202,3 +204,13 @@ AioContext *coroutine_fn qemu_coroutine_get_aio_context(Coroutine *co) { return co->ctx; } + +void qemu_coroutine_increase_pool_batch_size(unsigned int additional_pool_size) +{ + qatomic_add(&pool_batch_size, additional_pool_size); +} + +void qemu_coroutine_decrease_pool_batch_size(unsigned int removing_pool_size) +{ + qatomic_sub(&pool_batch_size, removing_pool_size); +}