2020-01-22 18:16:07 +03:00
|
|
|
/*
|
|
|
|
* Multifd common functions
|
|
|
|
*
|
|
|
|
* Copyright (c) 2019-2020 Red Hat Inc
|
|
|
|
*
|
|
|
|
* Authors:
|
|
|
|
* Juan Quintela <quintela@redhat.com>
|
|
|
|
*
|
|
|
|
* This work is licensed under the terms of the GNU GPL, version 2 or later.
|
|
|
|
* See the COPYING file in the top-level directory.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef QEMU_MIGRATION_MULTIFD_H
|
|
|
|
#define QEMU_MIGRATION_MULTIFD_H
|
|
|
|
|
2024-08-27 20:45:49 +03:00
|
|
|
#include "exec/target_page.h"
|
2024-02-29 18:30:15 +03:00
|
|
|
#include "ram.h"
|
|
|
|
|
2024-02-29 18:30:09 +03:00
|
|
|
typedef struct MultiFDRecvData MultiFDRecvData;
|
2024-08-27 20:45:52 +03:00
|
|
|
typedef struct MultiFDSendData MultiFDSendData;
|
2024-02-29 18:30:09 +03:00
|
|
|
|
2024-02-07 00:51:15 +03:00
|
|
|
bool multifd_send_setup(void);
|
2024-02-02 13:28:55 +03:00
|
|
|
void multifd_send_shutdown(void);
|
2024-02-29 18:30:10 +03:00
|
|
|
void multifd_send_channel_created(void);
|
2024-02-02 13:28:55 +03:00
|
|
|
int multifd_recv_setup(Error **errp);
|
|
|
|
void multifd_recv_cleanup(void);
|
|
|
|
void multifd_recv_shutdown(void);
|
2020-01-22 18:16:07 +03:00
|
|
|
bool multifd_recv_all_channels_created(void);
|
2022-12-20 21:44:18 +03:00
|
|
|
void multifd_recv_new_channel(QIOChannel *ioc, Error **errp);
|
2020-01-22 18:16:07 +03:00
|
|
|
void multifd_recv_sync_main(void);
|
2024-01-04 17:21:39 +03:00
|
|
|
int multifd_send_sync_main(void);
|
2024-02-02 13:28:50 +03:00
|
|
|
bool multifd_queue_page(RAMBlock *block, ram_addr_t offset);
|
2024-02-29 18:30:09 +03:00
|
|
|
bool multifd_recv(void);
|
|
|
|
MultiFDRecvData *multifd_get_recv_data(void);
|
2020-01-22 18:16:07 +03:00
|
|
|
|
2019-01-04 17:30:06 +03:00
|
|
|
/* Multifd Compression flags */
|
2020-01-22 18:16:07 +03:00
|
|
|
#define MULTIFD_FLAG_SYNC (1 << 0)
|
|
|
|
|
2024-06-07 16:53:06 +03:00
|
|
|
/* We reserve 4 bits for compression methods */
|
|
|
|
#define MULTIFD_FLAG_COMPRESSION_MASK (0xf << 1)
|
2019-05-15 14:37:46 +03:00
|
|
|
/* we need to be compatible. Before compression value was 0 */
|
|
|
|
#define MULTIFD_FLAG_NOCOMP (0 << 1)
|
2019-01-04 17:30:06 +03:00
|
|
|
#define MULTIFD_FLAG_ZLIB (1 << 1)
|
2019-12-13 15:47:14 +03:00
|
|
|
#define MULTIFD_FLAG_ZSTD (2 << 1)
|
2024-06-10 13:21:07 +03:00
|
|
|
#define MULTIFD_FLAG_QPL (4 << 1)
|
2024-06-07 16:53:06 +03:00
|
|
|
#define MULTIFD_FLAG_UADK (8 << 1)
|
2019-05-15 14:37:46 +03:00
|
|
|
|
2020-01-22 18:16:07 +03:00
|
|
|
/* This value needs to be a multiple of qemu_target_page_size() */
|
|
|
|
#define MULTIFD_PACKET_SIZE (512 * 1024)
|
|
|
|
|
|
|
|
typedef struct {
|
|
|
|
uint32_t magic;
|
|
|
|
uint32_t version;
|
|
|
|
uint32_t flags;
|
|
|
|
/* maximum number of allocated pages */
|
|
|
|
uint32_t pages_alloc;
|
2021-11-22 16:13:51 +03:00
|
|
|
/* non zero pages */
|
|
|
|
uint32_t normal_pages;
|
2020-01-22 18:16:07 +03:00
|
|
|
/* size of the next packet that contains pages */
|
|
|
|
uint32_t next_packet_size;
|
|
|
|
uint64_t packet_num;
|
2024-03-11 21:00:12 +03:00
|
|
|
/* zero pages */
|
|
|
|
uint32_t zero_pages;
|
|
|
|
uint32_t unused32[1]; /* Reserved for future use */
|
|
|
|
uint64_t unused64[3]; /* Reserved for future use */
|
2020-01-22 18:16:07 +03:00
|
|
|
char ramblock[256];
|
2024-03-11 21:00:12 +03:00
|
|
|
/*
|
|
|
|
* This array contains the pointers to:
|
|
|
|
* - normal pages (initial normal_pages entries)
|
|
|
|
* - zero pages (following zero_pages entries)
|
|
|
|
*/
|
2020-01-22 18:16:07 +03:00
|
|
|
uint64_t offset[];
|
|
|
|
} __attribute__((packed)) MultiFDPacket_t;
|
|
|
|
|
|
|
|
typedef struct {
|
|
|
|
/* number of used pages */
|
2021-11-22 13:51:40 +03:00
|
|
|
uint32_t num;
|
2024-03-11 21:00:12 +03:00
|
|
|
/* number of normal pages */
|
|
|
|
uint32_t normal_num;
|
2020-01-22 18:16:07 +03:00
|
|
|
RAMBlock *block;
|
2024-08-27 20:45:53 +03:00
|
|
|
/* offset of each page */
|
|
|
|
ram_addr_t offset[];
|
2020-01-22 18:16:07 +03:00
|
|
|
} MultiFDPages_t;
|
|
|
|
|
2024-02-29 18:30:09 +03:00
|
|
|
struct MultiFDRecvData {
|
|
|
|
void *opaque;
|
|
|
|
size_t size;
|
|
|
|
/* for preadv */
|
|
|
|
off_t file_offset;
|
|
|
|
};
|
|
|
|
|
2024-08-27 20:45:52 +03:00
|
|
|
typedef enum {
|
|
|
|
MULTIFD_PAYLOAD_NONE,
|
|
|
|
MULTIFD_PAYLOAD_RAM,
|
|
|
|
} MultiFDPayloadType;
|
|
|
|
|
|
|
|
typedef union MultiFDPayload {
|
|
|
|
MultiFDPages_t ram;
|
|
|
|
} MultiFDPayload;
|
|
|
|
|
|
|
|
struct MultiFDSendData {
|
|
|
|
MultiFDPayloadType type;
|
|
|
|
MultiFDPayload u;
|
|
|
|
};
|
|
|
|
|
|
|
|
static inline bool multifd_payload_empty(MultiFDSendData *data)
|
|
|
|
{
|
|
|
|
return data->type == MULTIFD_PAYLOAD_NONE;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void multifd_set_payload_type(MultiFDSendData *data,
|
|
|
|
MultiFDPayloadType type)
|
|
|
|
{
|
|
|
|
data->type = type;
|
|
|
|
}
|
|
|
|
|
2020-01-22 18:16:07 +03:00
|
|
|
typedef struct {
|
2022-05-31 13:43:06 +03:00
|
|
|
/* Fields are only written at creating/deletion time */
|
|
|
|
/* No lock required for them, they are read only */
|
|
|
|
|
2020-01-22 18:16:07 +03:00
|
|
|
/* channel number */
|
|
|
|
uint8_t id;
|
|
|
|
/* channel thread name */
|
|
|
|
char *name;
|
|
|
|
/* channel thread id */
|
|
|
|
QemuThread thread;
|
2024-02-07 00:51:14 +03:00
|
|
|
bool thread_created;
|
2024-02-07 00:51:13 +03:00
|
|
|
QemuThread tls_thread;
|
|
|
|
bool tls_thread_created;
|
2020-01-22 18:16:07 +03:00
|
|
|
/* communication channel */
|
|
|
|
QIOChannel *c;
|
2022-05-31 13:43:06 +03:00
|
|
|
/* packet allocated len */
|
|
|
|
uint32_t packet_len;
|
|
|
|
/* multifd flags for sending ram */
|
|
|
|
int write_flags;
|
|
|
|
|
2020-01-22 18:16:07 +03:00
|
|
|
/* sem where to wait for more work */
|
|
|
|
QemuSemaphore sem;
|
2022-05-31 13:43:06 +03:00
|
|
|
/* syncs main thread and channels */
|
|
|
|
QemuSemaphore sem_sync;
|
|
|
|
|
|
|
|
/* multifd flags for each packet */
|
|
|
|
uint32_t flags;
|
migration/multifd: Separate SYNC request with normal jobs
Multifd provide a threaded model for processing jobs. On sender side,
there can be two kinds of job: (1) a list of pages to send, or (2) a sync
request.
The sync request is a very special kind of job. It never contains a page
array, but only a multifd packet telling the dest side to synchronize with
sent pages.
Before this patch, both requests use the pending_job field, no matter what
the request is, it will boost pending_job, while multifd sender thread will
decrement it after it finishes one job.
However this should be racy, because SYNC is special in that it needs to
set p->flags with MULTIFD_FLAG_SYNC, showing that this is a sync request.
Consider a sequence of operations where:
- migration thread enqueue a job to send some pages, pending_job++ (0->1)
- [...before the selected multifd sender thread wakes up...]
- migration thread enqueue another job to sync, pending_job++ (1->2),
setup p->flags=MULTIFD_FLAG_SYNC
- multifd sender thread wakes up, found pending_job==2
- send the 1st packet with MULTIFD_FLAG_SYNC and list of pages
- send the 2nd packet with flags==0 and no pages
This is not expected, because MULTIFD_FLAG_SYNC should hopefully be done
after all the pages are received. Meanwhile, the 2nd packet will be
completely useless, which contains zero information.
I didn't verify above, but I think this issue is still benign in that at
least on the recv side we always receive pages before handling
MULTIFD_FLAG_SYNC. However that's not always guaranteed and just tricky.
One other reason I want to separate it is using p->flags to communicate
between the two threads is also not clearly defined, it's very hard to read
and understand why accessing p->flags is always safe; see the current impl
of multifd_send_thread() where we tried to cache only p->flags. It doesn't
need to be that complicated.
This patch introduces pending_sync, a separate flag just to show that the
requester needs a sync. Alongside, we remove the tricky caching of
p->flags now because after this patch p->flags should only be used by
multifd sender thread now, which will be crystal clear. So it is always
thread safe to access p->flags.
With that, we can also safely convert the pending_job into a boolean,
because we don't support >1 pending jobs anyway.
Always use atomic ops to access both flags to make sure no cache effect.
When at it, drop the initial setting of "pending_job = 0" because it's
always allocated using g_new0().
Reviewed-by: Fabiano Rosas <farosas@suse.de>
Link: https://lore.kernel.org/r/20240202102857.110210-7-peterx@redhat.com
Signed-off-by: Peter Xu <peterx@redhat.com>
2024-02-02 13:28:40 +03:00
|
|
|
/*
|
|
|
|
* The sender thread has work to do if either of below boolean is set.
|
|
|
|
*
|
|
|
|
* @pending_job: a job is pending
|
|
|
|
* @pending_sync: a sync request is pending
|
|
|
|
*
|
|
|
|
* For both of these fields, they're only set by the requesters, and
|
|
|
|
* cleared by the multifd sender threads.
|
|
|
|
*/
|
|
|
|
bool pending_job;
|
|
|
|
bool pending_sync;
|
2024-08-27 20:45:54 +03:00
|
|
|
MultiFDSendData *data;
|
2022-05-31 13:43:06 +03:00
|
|
|
|
|
|
|
/* thread local variables. No locking required */
|
|
|
|
|
2020-01-22 18:16:07 +03:00
|
|
|
/* pointer to the packet */
|
|
|
|
MultiFDPacket_t *packet;
|
|
|
|
/* size of the next packet that contains pages */
|
|
|
|
uint32_t next_packet_size;
|
|
|
|
/* packets sent through this channel */
|
2024-02-02 13:28:43 +03:00
|
|
|
uint64_t packets_sent;
|
2021-11-19 14:06:05 +03:00
|
|
|
/* buffers to send */
|
|
|
|
struct iovec *iov;
|
|
|
|
/* number of iovs used */
|
|
|
|
uint32_t iovs_num;
|
2019-05-15 14:37:46 +03:00
|
|
|
/* used for compression methods */
|
2024-02-29 18:30:06 +03:00
|
|
|
void *compress_data;
|
2020-01-22 18:16:07 +03:00
|
|
|
} MultiFDSendParams;
|
|
|
|
|
|
|
|
typedef struct {
|
2022-05-31 13:43:06 +03:00
|
|
|
/* Fields are only written at creating/deletion time */
|
|
|
|
/* No lock required for them, they are read only */
|
|
|
|
|
2020-01-22 18:16:07 +03:00
|
|
|
/* channel number */
|
|
|
|
uint8_t id;
|
|
|
|
/* channel thread name */
|
|
|
|
char *name;
|
|
|
|
/* channel thread id */
|
|
|
|
QemuThread thread;
|
2024-02-07 00:51:14 +03:00
|
|
|
bool thread_created;
|
2020-01-22 18:16:07 +03:00
|
|
|
/* communication channel */
|
|
|
|
QIOChannel *c;
|
2022-05-31 13:43:06 +03:00
|
|
|
/* packet allocated len */
|
|
|
|
uint32_t packet_len;
|
|
|
|
|
|
|
|
/* syncs main thread and channels */
|
|
|
|
QemuSemaphore sem_sync;
|
2024-02-29 18:30:09 +03:00
|
|
|
/* sem where to wait for more work */
|
|
|
|
QemuSemaphore sem;
|
2022-05-31 13:43:06 +03:00
|
|
|
|
2020-01-22 18:16:07 +03:00
|
|
|
/* this mutex protects the following parameters */
|
|
|
|
QemuMutex mutex;
|
|
|
|
/* should this thread finish */
|
|
|
|
bool quit;
|
|
|
|
/* multifd flags for each packet */
|
|
|
|
uint32_t flags;
|
|
|
|
/* global number of generated multifd packets */
|
|
|
|
uint64_t packet_num;
|
2024-02-29 18:30:09 +03:00
|
|
|
int pending_job;
|
|
|
|
MultiFDRecvData *data;
|
2022-05-31 13:43:06 +03:00
|
|
|
|
|
|
|
/* thread local variables. No locking required */
|
|
|
|
|
|
|
|
/* pointer to the packet */
|
|
|
|
MultiFDPacket_t *packet;
|
2020-01-22 18:16:07 +03:00
|
|
|
/* size of the next packet that contains pages */
|
|
|
|
uint32_t next_packet_size;
|
2024-02-02 13:28:43 +03:00
|
|
|
/* packets received through this channel */
|
|
|
|
uint64_t packets_recved;
|
2023-05-08 22:11:07 +03:00
|
|
|
/* ramblock */
|
|
|
|
RAMBlock *block;
|
2022-05-31 13:43:06 +03:00
|
|
|
/* ramblock host address */
|
|
|
|
uint8_t *host;
|
2021-11-19 14:06:05 +03:00
|
|
|
/* buffers to recv */
|
|
|
|
struct iovec *iov;
|
2021-11-22 15:41:06 +03:00
|
|
|
/* Pages that are not zero */
|
|
|
|
ram_addr_t *normal;
|
|
|
|
/* num of non zero pages */
|
|
|
|
uint32_t normal_num;
|
2024-03-11 21:00:12 +03:00
|
|
|
/* Pages that are zero */
|
|
|
|
ram_addr_t *zero;
|
|
|
|
/* num of zero pages */
|
|
|
|
uint32_t zero_num;
|
2019-05-15 14:37:46 +03:00
|
|
|
/* used for de-compression methods */
|
2024-02-29 18:30:06 +03:00
|
|
|
void *compress_data;
|
2020-01-22 18:16:07 +03:00
|
|
|
} MultiFDRecvParams;
|
|
|
|
|
2019-05-15 14:37:46 +03:00
|
|
|
typedef struct {
|
2024-08-28 17:56:50 +03:00
|
|
|
/*
|
|
|
|
* The send_setup, send_cleanup, send_prepare are only called on
|
|
|
|
* the QEMU instance at the migration source.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Setup for sending side. Called once per channel during channel
|
|
|
|
* setup phase.
|
|
|
|
*
|
|
|
|
* Must allocate p->iov. If packets are in use (default), one
|
|
|
|
* extra iovec must be allocated for the packet header. Any memory
|
|
|
|
* allocated in this hook must be released at send_cleanup.
|
|
|
|
*
|
|
|
|
* p->write_flags may be used for passing flags to the QIOChannel.
|
|
|
|
*
|
|
|
|
* p->compression_data may be used by compression methods to store
|
|
|
|
* compression data.
|
|
|
|
*/
|
2019-05-15 14:37:46 +03:00
|
|
|
int (*send_setup)(MultiFDSendParams *p, Error **errp);
|
2024-08-28 17:56:50 +03:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Cleanup for sending side. Called once per channel during
|
|
|
|
* channel cleanup phase.
|
|
|
|
*/
|
2019-05-15 14:37:46 +03:00
|
|
|
void (*send_cleanup)(MultiFDSendParams *p, Error **errp);
|
2024-08-28 17:56:50 +03:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Prepare the send packet. Called as a result of multifd_send()
|
|
|
|
* on the client side, with p pointing to the MultiFDSendParams of
|
|
|
|
* a channel that is currently idle.
|
|
|
|
*
|
|
|
|
* Must populate p->iov with the data to be sent, increment
|
|
|
|
* p->iovs_num to match the amount of iovecs used and set
|
|
|
|
* p->next_packet_size with the amount of data currently present
|
|
|
|
* in p->iov.
|
|
|
|
*
|
|
|
|
* Must indicate whether this is a compression packet by setting
|
|
|
|
* p->flags.
|
|
|
|
*
|
|
|
|
* As a last step, if packets are in use (default), must prepare
|
|
|
|
* the packet by calling multifd_send_fill_packet().
|
|
|
|
*/
|
2021-11-22 14:08:08 +03:00
|
|
|
int (*send_prepare)(MultiFDSendParams *p, Error **errp);
|
2024-08-28 17:56:50 +03:00
|
|
|
|
|
|
|
/*
|
|
|
|
* The recv_setup, recv_cleanup, recv are only called on the QEMU
|
|
|
|
* instance at the migration destination.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Setup for receiving side. Called once per channel during
|
|
|
|
* channel setup phase. May be empty.
|
|
|
|
*
|
|
|
|
* May allocate data structures for the receiving of data. May use
|
|
|
|
* p->iov. Compression methods may use p->compress_data.
|
|
|
|
*/
|
2019-05-15 14:37:46 +03:00
|
|
|
int (*recv_setup)(MultiFDRecvParams *p, Error **errp);
|
2024-08-28 17:56:50 +03:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Cleanup for receiving side. Called once per channel during
|
|
|
|
* channel cleanup phase. May be empty.
|
|
|
|
*/
|
2019-05-15 14:37:46 +03:00
|
|
|
void (*recv_cleanup)(MultiFDRecvParams *p);
|
2024-08-28 17:56:50 +03:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Data receive method. Called as a result of multifd_recv() on
|
|
|
|
* the client side, with p pointing to the MultiFDRecvParams of a
|
|
|
|
* channel that is currently idle. Only called if there is data
|
|
|
|
* available to receive.
|
|
|
|
*
|
|
|
|
* Must validate p->flags according to what was set at
|
|
|
|
* send_prepare.
|
|
|
|
*
|
|
|
|
* Must read the data from the QIOChannel p->c.
|
|
|
|
*/
|
2024-02-29 18:30:07 +03:00
|
|
|
int (*recv)(MultiFDRecvParams *p, Error **errp);
|
2019-05-15 14:37:46 +03:00
|
|
|
} MultiFDMethods;
|
|
|
|
|
2024-08-27 20:46:04 +03:00
|
|
|
void multifd_register_ops(int method, const MultiFDMethods *ops);
|
2024-02-02 13:28:47 +03:00
|
|
|
void multifd_send_fill_packet(MultiFDSendParams *p);
|
2024-03-11 21:00:12 +03:00
|
|
|
bool multifd_send_prepare_common(MultiFDSendParams *p);
|
|
|
|
void multifd_send_zero_page_detect(MultiFDSendParams *p);
|
|
|
|
void multifd_recv_zero_page_process(MultiFDRecvParams *p);
|
2019-01-04 17:30:06 +03:00
|
|
|
|
2024-02-02 13:28:46 +03:00
|
|
|
static inline void multifd_send_prepare_header(MultiFDSendParams *p)
|
|
|
|
{
|
|
|
|
p->iov[0].iov_len = p->packet_len;
|
|
|
|
p->iov[0].iov_base = p->packet;
|
|
|
|
p->iovs_num++;
|
|
|
|
}
|
|
|
|
|
2024-02-29 18:30:11 +03:00
|
|
|
void multifd_channel_connect(MultiFDSendParams *p, QIOChannel *ioc);
|
2024-08-27 20:46:03 +03:00
|
|
|
bool multifd_send(MultiFDSendData **send_data);
|
|
|
|
MultiFDSendData *multifd_send_data_alloc(void);
|
2024-02-02 13:28:46 +03:00
|
|
|
|
2024-08-27 20:45:49 +03:00
|
|
|
static inline uint32_t multifd_ram_page_size(void)
|
|
|
|
{
|
|
|
|
return qemu_target_page_size();
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline uint32_t multifd_ram_page_count(void)
|
|
|
|
{
|
|
|
|
return MULTIFD_PACKET_SIZE / qemu_target_page_size();
|
|
|
|
}
|
2024-08-27 20:45:59 +03:00
|
|
|
|
|
|
|
void multifd_ram_save_setup(void);
|
|
|
|
void multifd_ram_save_cleanup(void);
|
2024-08-27 20:46:00 +03:00
|
|
|
int multifd_ram_flush_and_sync(void);
|
2024-08-27 20:46:03 +03:00
|
|
|
size_t multifd_ram_payload_size(void);
|
|
|
|
void multifd_ram_fill_packet(MultiFDSendParams *p);
|
|
|
|
int multifd_ram_unfill_packet(MultiFDRecvParams *p, Error **errp);
|
2020-01-22 18:16:07 +03:00
|
|
|
#endif
|