virtio/vhost-user: fix qemu abort when hotunplug vhost-user-net device
During the hot-unplugging of vhost-user-net type network cards,
the vhost_user_cleanup function may add the same rcu node to
the rcu linked list. The function call in this case is as follows:
vhost_user_cleanup
->vhost_user_host_notifier_remove
->call_rcu(n, vhost_user_host_notifier_free, rcu);
->g_free_rcu(n, rcu);
When this happens, QEMU will abort in try_dequeue:
if (head == &dummy && qatomic_mb_read(&tail) == &dummy.next) {
abort();
}
backtrace is as follows:
0 __pthread_kill_implementation () at /usr/lib64/libc.so.6
1 raise () at /usr/lib64/libc.so.6
2 abort () at /usr/lib64/libc.so.6
3 try_dequeue () at ../util/rcu.c:235
4 call_rcu_thread (0) at ../util/rcu.c:288
5 qemu_thread_start (0) at ../util/qemu-thread-posix.c:541
6 start_thread () at /usr/lib64/libc.so.6
7 clone3 () at /usr/lib64/libc.so.6
The reason for the abort is that adding two identical nodes to
the rcu linked list will cause the rcu linked list to become a ring,
but when the dummy node is added after the two identical nodes,
the ring is opened. But only one node is added to list with
rcu_call_count added twice. This will cause rcu try_dequeue abort.
This happens when n->addr != 0. In some scenarios, this does happen.
For example, this situation will occur when using a 32-queue DPU
vhost-user-net type network card for hot-unplug testing, because
VhostUserHostNotifier->addr will be cleared during the processing of
VHOST_USER_BACKEND_VRING_HOST_NOTIFIER_MSG. However,it is asynchronous,
so we cannot guarantee that VhostUserHostNotifier->addr is zero in
vhost_user_cleanup. Therefore, it is necessary to merge g_free_rcu
and vhost_user_host_notifier_free into one rcu node.
Fixes: 503e355465
("virtio/vhost-user: dynamically assign VhostUserHostNotifiers")
Signed-off-by: yaozhenguo <yaozhenguo@jd.com>
Message-Id: <20241011102913.45582-1-yaozhenguo@jd.com>
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
This commit is contained in:
parent
55fa4be6f7
commit
963b027645
@ -1185,9 +1185,16 @@ static int vhost_user_set_vring_num(struct vhost_dev *dev,
|
||||
|
||||
static void vhost_user_host_notifier_free(VhostUserHostNotifier *n)
|
||||
{
|
||||
assert(n && n->unmap_addr);
|
||||
if (n->unmap_addr) {
|
||||
munmap(n->unmap_addr, qemu_real_host_page_size());
|
||||
n->unmap_addr = NULL;
|
||||
}
|
||||
if (n->destroy) {
|
||||
memory_region_transaction_begin();
|
||||
object_unparent(OBJECT(&n->mr));
|
||||
memory_region_transaction_commit();
|
||||
g_free(n);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1195,17 +1202,28 @@ static void vhost_user_host_notifier_free(VhostUserHostNotifier *n)
|
||||
* under rcu.
|
||||
*/
|
||||
static void vhost_user_host_notifier_remove(VhostUserHostNotifier *n,
|
||||
VirtIODevice *vdev)
|
||||
VirtIODevice *vdev, bool destroy)
|
||||
{
|
||||
/*
|
||||
* if destroy == false and n->addr == NULL, we have nothing to do.
|
||||
* so, just return.
|
||||
*/
|
||||
if (!n || (!destroy && !n->addr)) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (n->addr) {
|
||||
if (vdev) {
|
||||
memory_region_transaction_begin();
|
||||
virtio_queue_set_host_notifier_mr(vdev, n->idx, &n->mr, false);
|
||||
memory_region_transaction_commit();
|
||||
}
|
||||
assert(!n->unmap_addr);
|
||||
n->unmap_addr = n->addr;
|
||||
n->addr = NULL;
|
||||
call_rcu(n, vhost_user_host_notifier_free, rcu);
|
||||
}
|
||||
n->destroy = destroy;
|
||||
call_rcu(n, vhost_user_host_notifier_free, rcu);
|
||||
}
|
||||
|
||||
static int vhost_user_set_vring_base(struct vhost_dev *dev,
|
||||
@ -1279,9 +1297,7 @@ static int vhost_user_get_vring_base(struct vhost_dev *dev,
|
||||
struct vhost_user *u = dev->opaque;
|
||||
|
||||
VhostUserHostNotifier *n = fetch_notifier(u->user, ring->index);
|
||||
if (n) {
|
||||
vhost_user_host_notifier_remove(n, dev->vdev);
|
||||
}
|
||||
vhost_user_host_notifier_remove(n, dev->vdev, false);
|
||||
|
||||
ret = vhost_user_write(dev, &msg, NULL, 0);
|
||||
if (ret < 0) {
|
||||
@ -1562,7 +1578,7 @@ static int vhost_user_backend_handle_vring_host_notifier(struct vhost_dev *dev,
|
||||
* new mapped address.
|
||||
*/
|
||||
n = fetch_or_create_notifier(user, queue_idx);
|
||||
vhost_user_host_notifier_remove(n, vdev);
|
||||
vhost_user_host_notifier_remove(n, vdev, false);
|
||||
|
||||
if (area->u64 & VHOST_USER_VRING_NOFD_MASK) {
|
||||
return 0;
|
||||
@ -2736,15 +2752,7 @@ static int vhost_user_set_inflight_fd(struct vhost_dev *dev,
|
||||
static void vhost_user_state_destroy(gpointer data)
|
||||
{
|
||||
VhostUserHostNotifier *n = (VhostUserHostNotifier *) data;
|
||||
if (n) {
|
||||
vhost_user_host_notifier_remove(n, NULL);
|
||||
object_unparent(OBJECT(&n->mr));
|
||||
/*
|
||||
* We can't free until vhost_user_host_notifier_remove has
|
||||
* done it's thing so schedule the free with RCU.
|
||||
*/
|
||||
g_free_rcu(n, rcu);
|
||||
}
|
||||
vhost_user_host_notifier_remove(n, NULL, true);
|
||||
}
|
||||
|
||||
bool vhost_user_init(VhostUserState *user, CharBackend *chr, Error **errp)
|
||||
@ -2765,9 +2773,7 @@ void vhost_user_cleanup(VhostUserState *user)
|
||||
if (!user->chr) {
|
||||
return;
|
||||
}
|
||||
memory_region_transaction_begin();
|
||||
user->notifiers = (GPtrArray *) g_ptr_array_free(user->notifiers, true);
|
||||
memory_region_transaction_commit();
|
||||
user->chr = NULL;
|
||||
}
|
||||
|
||||
|
@ -54,6 +54,7 @@ typedef struct VhostUserHostNotifier {
|
||||
void *addr;
|
||||
void *unmap_addr;
|
||||
int idx;
|
||||
bool destroy;
|
||||
} VhostUserHostNotifier;
|
||||
|
||||
/**
|
||||
|
Loading…
Reference in New Issue
Block a user