2024-08-27 20:46:03 +03:00
|
|
|
/*
|
|
|
|
* Multifd RAM migration without compression
|
|
|
|
*
|
|
|
|
* Copyright (c) 2019-2020 Red Hat Inc
|
|
|
|
*
|
|
|
|
* Authors:
|
|
|
|
* Juan Quintela <quintela@redhat.com>
|
|
|
|
*
|
|
|
|
* This work is licensed under the terms of the GNU GPL, version 2 or later.
|
|
|
|
* See the COPYING file in the top-level directory.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "qemu/osdep.h"
|
|
|
|
#include "exec/ramblock.h"
|
|
|
|
#include "exec/target_page.h"
|
|
|
|
#include "file.h"
|
|
|
|
#include "multifd.h"
|
|
|
|
#include "options.h"
|
|
|
|
#include "qapi/error.h"
|
2024-09-19 18:06:11 +03:00
|
|
|
#include "qemu/cutils.h"
|
2024-08-27 20:46:03 +03:00
|
|
|
#include "qemu/error-report.h"
|
|
|
|
#include "trace.h"
|
|
|
|
|
|
|
|
static MultiFDSendData *multifd_ram_send;
|
|
|
|
|
|
|
|
size_t multifd_ram_payload_size(void)
|
|
|
|
{
|
|
|
|
uint32_t n = multifd_ram_page_count();
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We keep an array of page offsets at the end of MultiFDPages_t,
|
|
|
|
* add space for it in the allocation.
|
|
|
|
*/
|
|
|
|
return sizeof(MultiFDPages_t) + n * sizeof(ram_addr_t);
|
|
|
|
}
|
|
|
|
|
|
|
|
void multifd_ram_save_setup(void)
|
|
|
|
{
|
|
|
|
multifd_ram_send = multifd_send_data_alloc();
|
|
|
|
}
|
|
|
|
|
|
|
|
void multifd_ram_save_cleanup(void)
|
|
|
|
{
|
|
|
|
g_free(multifd_ram_send);
|
|
|
|
multifd_ram_send = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void multifd_set_file_bitmap(MultiFDSendParams *p)
|
|
|
|
{
|
|
|
|
MultiFDPages_t *pages = &p->data->u.ram;
|
|
|
|
|
|
|
|
assert(pages->block);
|
|
|
|
|
|
|
|
for (int i = 0; i < pages->normal_num; i++) {
|
|
|
|
ramblock_set_file_bmap_atomic(pages->block, pages->offset[i], true);
|
|
|
|
}
|
|
|
|
|
|
|
|
for (int i = pages->normal_num; i < pages->num; i++) {
|
|
|
|
ramblock_set_file_bmap_atomic(pages->block, pages->offset[i], false);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static int multifd_nocomp_send_setup(MultiFDSendParams *p, Error **errp)
|
|
|
|
{
|
|
|
|
uint32_t page_count = multifd_ram_page_count();
|
|
|
|
|
|
|
|
if (migrate_zero_copy_send()) {
|
|
|
|
p->write_flags |= QIO_CHANNEL_WRITE_FLAG_ZERO_COPY;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!migrate_mapped_ram()) {
|
|
|
|
/* We need one extra place for the packet header */
|
|
|
|
p->iov = g_new0(struct iovec, page_count + 1);
|
|
|
|
} else {
|
|
|
|
p->iov = g_new0(struct iovec, page_count);
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void multifd_nocomp_send_cleanup(MultiFDSendParams *p, Error **errp)
|
|
|
|
{
|
|
|
|
g_free(p->iov);
|
|
|
|
p->iov = NULL;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void multifd_send_prepare_iovs(MultiFDSendParams *p)
|
|
|
|
{
|
|
|
|
MultiFDPages_t *pages = &p->data->u.ram;
|
|
|
|
uint32_t page_size = multifd_ram_page_size();
|
|
|
|
|
|
|
|
for (int i = 0; i < pages->normal_num; i++) {
|
|
|
|
p->iov[p->iovs_num].iov_base = pages->block->host + pages->offset[i];
|
|
|
|
p->iov[p->iovs_num].iov_len = page_size;
|
|
|
|
p->iovs_num++;
|
|
|
|
}
|
|
|
|
|
|
|
|
p->next_packet_size = pages->normal_num * page_size;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int multifd_nocomp_send_prepare(MultiFDSendParams *p, Error **errp)
|
|
|
|
{
|
|
|
|
bool use_zero_copy_send = migrate_zero_copy_send();
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
multifd_send_zero_page_detect(p);
|
|
|
|
|
|
|
|
if (migrate_mapped_ram()) {
|
|
|
|
multifd_send_prepare_iovs(p);
|
|
|
|
multifd_set_file_bitmap(p);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!use_zero_copy_send) {
|
|
|
|
/*
|
|
|
|
* Only !zerocopy needs the header in IOV; zerocopy will
|
|
|
|
* send it separately.
|
|
|
|
*/
|
|
|
|
multifd_send_prepare_header(p);
|
|
|
|
}
|
|
|
|
|
|
|
|
multifd_send_prepare_iovs(p);
|
|
|
|
p->flags |= MULTIFD_FLAG_NOCOMP;
|
|
|
|
|
|
|
|
multifd_send_fill_packet(p);
|
|
|
|
|
|
|
|
if (use_zero_copy_send) {
|
|
|
|
/* Send header first, without zerocopy */
|
|
|
|
ret = qio_channel_write_all(p->c, (void *)p->packet,
|
|
|
|
p->packet_len, errp);
|
|
|
|
if (ret != 0) {
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int multifd_nocomp_recv_setup(MultiFDRecvParams *p, Error **errp)
|
|
|
|
{
|
|
|
|
p->iov = g_new0(struct iovec, multifd_ram_page_count());
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void multifd_nocomp_recv_cleanup(MultiFDRecvParams *p)
|
|
|
|
{
|
|
|
|
g_free(p->iov);
|
|
|
|
p->iov = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int multifd_nocomp_recv(MultiFDRecvParams *p, Error **errp)
|
|
|
|
{
|
|
|
|
uint32_t flags;
|
|
|
|
|
|
|
|
if (migrate_mapped_ram()) {
|
|
|
|
return multifd_file_recv_data(p, errp);
|
|
|
|
}
|
|
|
|
|
|
|
|
flags = p->flags & MULTIFD_FLAG_COMPRESSION_MASK;
|
|
|
|
|
|
|
|
if (flags != MULTIFD_FLAG_NOCOMP) {
|
|
|
|
error_setg(errp, "multifd %u: flags received %x flags expected %x",
|
|
|
|
p->id, flags, MULTIFD_FLAG_NOCOMP);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
multifd_recv_zero_page_process(p);
|
|
|
|
|
|
|
|
if (!p->normal_num) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (int i = 0; i < p->normal_num; i++) {
|
|
|
|
p->iov[i].iov_base = p->host + p->normal[i];
|
|
|
|
p->iov[i].iov_len = multifd_ram_page_size();
|
|
|
|
ramblock_recv_bitmap_set_offset(p->block, p->normal[i]);
|
|
|
|
}
|
|
|
|
return qio_channel_readv_all(p->c, p->iov, p->normal_num, errp);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void multifd_pages_reset(MultiFDPages_t *pages)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* We don't need to touch offset[] array, because it will be
|
|
|
|
* overwritten later when reused.
|
|
|
|
*/
|
|
|
|
pages->num = 0;
|
|
|
|
pages->normal_num = 0;
|
|
|
|
pages->block = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
void multifd_ram_fill_packet(MultiFDSendParams *p)
|
|
|
|
{
|
|
|
|
MultiFDPacket_t *packet = p->packet;
|
|
|
|
MultiFDPages_t *pages = &p->data->u.ram;
|
|
|
|
uint32_t zero_num = pages->num - pages->normal_num;
|
|
|
|
|
|
|
|
packet->pages_alloc = cpu_to_be32(multifd_ram_page_count());
|
|
|
|
packet->normal_pages = cpu_to_be32(pages->normal_num);
|
|
|
|
packet->zero_pages = cpu_to_be32(zero_num);
|
|
|
|
|
|
|
|
if (pages->block) {
|
2024-09-19 18:06:11 +03:00
|
|
|
pstrcpy(packet->ramblock, sizeof(packet->ramblock),
|
|
|
|
pages->block->idstr);
|
2024-08-27 20:46:03 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
for (int i = 0; i < pages->num; i++) {
|
|
|
|
/* there are architectures where ram_addr_t is 32 bit */
|
|
|
|
uint64_t temp = pages->offset[i];
|
|
|
|
|
|
|
|
packet->offset[i] = cpu_to_be64(temp);
|
|
|
|
}
|
|
|
|
|
|
|
|
trace_multifd_send_ram_fill(p->id, pages->normal_num,
|
|
|
|
zero_num);
|
|
|
|
}
|
|
|
|
|
|
|
|
int multifd_ram_unfill_packet(MultiFDRecvParams *p, Error **errp)
|
|
|
|
{
|
|
|
|
MultiFDPacket_t *packet = p->packet;
|
|
|
|
uint32_t page_count = multifd_ram_page_count();
|
|
|
|
uint32_t page_size = multifd_ram_page_size();
|
2024-08-28 17:56:47 +03:00
|
|
|
uint32_t pages_per_packet = be32_to_cpu(packet->pages_alloc);
|
2024-08-27 20:46:03 +03:00
|
|
|
int i;
|
|
|
|
|
2024-08-28 17:56:47 +03:00
|
|
|
if (pages_per_packet > page_count) {
|
|
|
|
error_setg(errp, "multifd: received packet with %u pages, expected %u",
|
|
|
|
pages_per_packet, page_count);
|
2024-08-27 20:46:03 +03:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
p->normal_num = be32_to_cpu(packet->normal_pages);
|
2024-08-28 17:56:47 +03:00
|
|
|
if (p->normal_num > pages_per_packet) {
|
|
|
|
error_setg(errp, "multifd: received packet with %u non-zero pages, "
|
|
|
|
"which exceeds maximum expected pages %u",
|
|
|
|
p->normal_num, pages_per_packet);
|
2024-08-27 20:46:03 +03:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
p->zero_num = be32_to_cpu(packet->zero_pages);
|
2024-08-28 17:56:47 +03:00
|
|
|
if (p->zero_num > pages_per_packet - p->normal_num) {
|
|
|
|
error_setg(errp,
|
|
|
|
"multifd: received packet with %u zero pages, expected maximum %u",
|
|
|
|
p->zero_num, pages_per_packet - p->normal_num);
|
2024-08-27 20:46:03 +03:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (p->normal_num == 0 && p->zero_num == 0) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* make sure that ramblock is 0 terminated */
|
|
|
|
packet->ramblock[255] = 0;
|
|
|
|
p->block = qemu_ram_block_by_name(packet->ramblock);
|
|
|
|
if (!p->block) {
|
|
|
|
error_setg(errp, "multifd: unknown ram block %s",
|
|
|
|
packet->ramblock);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
p->host = p->block->host;
|
|
|
|
for (i = 0; i < p->normal_num; i++) {
|
|
|
|
uint64_t offset = be64_to_cpu(packet->offset[i]);
|
|
|
|
|
|
|
|
if (offset > (p->block->used_length - page_size)) {
|
|
|
|
error_setg(errp, "multifd: offset too long %" PRIu64
|
|
|
|
" (max " RAM_ADDR_FMT ")",
|
|
|
|
offset, p->block->used_length);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
p->normal[i] = offset;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; i < p->zero_num; i++) {
|
|
|
|
uint64_t offset = be64_to_cpu(packet->offset[p->normal_num + i]);
|
|
|
|
|
|
|
|
if (offset > (p->block->used_length - page_size)) {
|
|
|
|
error_setg(errp, "multifd: offset too long %" PRIu64
|
|
|
|
" (max " RAM_ADDR_FMT ")",
|
|
|
|
offset, p->block->used_length);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
p->zero[i] = offset;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline bool multifd_queue_empty(MultiFDPages_t *pages)
|
|
|
|
{
|
|
|
|
return pages->num == 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline bool multifd_queue_full(MultiFDPages_t *pages)
|
|
|
|
{
|
|
|
|
return pages->num == multifd_ram_page_count();
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void multifd_enqueue(MultiFDPages_t *pages, ram_addr_t offset)
|
|
|
|
{
|
|
|
|
pages->offset[pages->num++] = offset;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Returns true if enqueue successful, false otherwise */
|
|
|
|
bool multifd_queue_page(RAMBlock *block, ram_addr_t offset)
|
|
|
|
{
|
|
|
|
MultiFDPages_t *pages;
|
|
|
|
|
|
|
|
retry:
|
|
|
|
pages = &multifd_ram_send->u.ram;
|
|
|
|
|
|
|
|
if (multifd_payload_empty(multifd_ram_send)) {
|
|
|
|
multifd_pages_reset(pages);
|
|
|
|
multifd_set_payload_type(multifd_ram_send, MULTIFD_PAYLOAD_RAM);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* If the queue is empty, we can already enqueue now */
|
|
|
|
if (multifd_queue_empty(pages)) {
|
|
|
|
pages->block = block;
|
|
|
|
multifd_enqueue(pages, offset);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Not empty, meanwhile we need a flush. It can because of either:
|
|
|
|
*
|
|
|
|
* (1) The page is not on the same ramblock of previous ones, or,
|
|
|
|
* (2) The queue is full.
|
|
|
|
*
|
|
|
|
* After flush, always retry.
|
|
|
|
*/
|
|
|
|
if (pages->block != block || multifd_queue_full(pages)) {
|
|
|
|
if (!multifd_send(&multifd_ram_send)) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
goto retry;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Not empty, and we still have space, do it! */
|
|
|
|
multifd_enqueue(pages, offset);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
int multifd_ram_flush_and_sync(void)
|
|
|
|
{
|
|
|
|
if (!migrate_multifd()) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!multifd_payload_empty(multifd_ram_send)) {
|
|
|
|
if (!multifd_send(&multifd_ram_send)) {
|
|
|
|
error_report("%s: multifd_send fail", __func__);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return multifd_send_sync_main();
|
|
|
|
}
|
|
|
|
|
|
|
|
bool multifd_send_prepare_common(MultiFDSendParams *p)
|
|
|
|
{
|
|
|
|
MultiFDPages_t *pages = &p->data->u.ram;
|
|
|
|
multifd_send_zero_page_detect(p);
|
|
|
|
|
|
|
|
if (!pages->normal_num) {
|
|
|
|
p->next_packet_size = 0;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
multifd_send_prepare_header(p);
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2024-08-27 20:46:04 +03:00
|
|
|
static const MultiFDMethods multifd_nocomp_ops = {
|
2024-08-27 20:46:03 +03:00
|
|
|
.send_setup = multifd_nocomp_send_setup,
|
|
|
|
.send_cleanup = multifd_nocomp_send_cleanup,
|
|
|
|
.send_prepare = multifd_nocomp_send_prepare,
|
|
|
|
.recv_setup = multifd_nocomp_recv_setup,
|
|
|
|
.recv_cleanup = multifd_nocomp_recv_cleanup,
|
|
|
|
.recv = multifd_nocomp_recv
|
|
|
|
};
|
|
|
|
|
|
|
|
static void multifd_nocomp_register(void)
|
|
|
|
{
|
|
|
|
multifd_register_ops(MULTIFD_COMPRESSION_NONE, &multifd_nocomp_ops);
|
|
|
|
}
|
|
|
|
|
|
|
|
migration_init(multifd_nocomp_register);
|