mirror of https://gitlab.com/qemu-project/qemu
Migration Pull request (3rd try)
Hi This should fix all the freebsd problems. Please apply, -----BEGIN PGP SIGNATURE----- iQIzBAABCAAdFiEEGJn/jt6/WMzuA0uC9IfvGFhy1yMFAmFuj9MACgkQ9IfvGFhy 1yOmphAAxXZ4yLJP9TjhPnjngzENClk9JdRy7eH6Z3wAvx8wXGYY9gBFSxqybdsY 9WClDARPBcKFgUo6WtoSd3uolT67QaMMH/m8lggJ3D/J8DiIQrF999f57a/SKsEf y/PoiaWdPy23KtAD+G/HXYWVraH6ub5OHhRveObb0EzepsramcT55Soa1JGiUyb0 O3DONlKOfBaVc27VpWtKbw5epoa9sxqfnvo1qdv2iXn0aEtRa05X4pMhaI1FaBcP z913Ez5fbejLyS719lawlzDXdJgDf8SGMqr4CUYXZyzKvf1iz9YFxpQHR8Q/h8oH Pck4HsMoPXtecvFguLCsUXkXk2PpSfClvOtsDRVpP1RvA/CxamFkTSrIgHxpWzan MWbaaTa32UWwFMvOEARRNd1obGTgOyBue7lm68wbKdmQDYqRCbiENmV1FDhIIj27 JK2bv3QQr9Y7a3ohMcPG4bGEvMWSMj+jnZr1cRFkL4yJO8qTyRrBn0M0H5ANm1Ni Jj1bx6Q4QXAeWEdZD9jMHIB+6TU75arElPeCAlcnyNLmQ/ejP9mQIoraIn79RUCJ borVhpyPMtwA5BKoYajvfiFz6oSc4mvFLNEXKYJtiQpmbXdBfNoj40hxCSEJxgtc xm2nFN4d2i0SRcbJsCzT7ogrWYgUnZ7ppvPM93AKHQQvgSdUfOw= =ue55 -----END PGP SIGNATURE----- Merge remote-tracking branch 'remotes/juanquintela/tags/migration.next-pull-request' into staging Migration Pull request (3rd try) Hi This should fix all the freebsd problems. Please apply, # gpg: Signature made Tue 19 Oct 2021 02:28:51 AM PDT # gpg: using RSA key 1899FF8EDEBF58CCEE034B82F487EF185872D723 # gpg: Good signature from "Juan Quintela <quintela@redhat.com>" [full] # gpg: aka "Juan Quintela <quintela@trasno.org>" [full] * remotes/juanquintela/tags/migration.next-pull-request: migration/rdma: advise prefetch write for ODP region migration/rdma: Try to register On-Demand Paging memory region migration: allow enabling mutilfd for specific protocol only migration: allow multifd for socket protocol only migration/ram: Don't passs RAMState to migration_clear_memory_region_dirty_bitmap_*() multifd: Unconditionally unregister yank function multifd: Implement yank for multifd send side Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
commit
50352cce13
|
@ -1530,6 +1530,12 @@ config_host_data.set('HAVE_COPY_FILE_RANGE', cc.has_function('copy_file_range'))
|
|||
config_host_data.set('HAVE_OPENPTY', cc.has_function('openpty', dependencies: util))
|
||||
config_host_data.set('HAVE_STRCHRNUL', cc.has_function('strchrnul'))
|
||||
config_host_data.set('HAVE_SYSTEM_FUNCTION', cc.has_function('system', prefix: '#include <stdlib.h>'))
|
||||
if rdma.found()
|
||||
config_host_data.set('HAVE_IBV_ADVISE_MR',
|
||||
cc.has_function('ibv_advise_mr',
|
||||
args: config_host['RDMA_LIBS'].split(),
|
||||
prefix: '#include <infiniband/verbs.h>'))
|
||||
endif
|
||||
|
||||
# has_header_symbol
|
||||
config_host_data.set('CONFIG_BYTESWAP_H',
|
||||
|
|
|
@ -453,10 +453,12 @@ static void qemu_start_incoming_migration(const char *uri, Error **errp)
|
|||
{
|
||||
const char *p = NULL;
|
||||
|
||||
migrate_protocol_allow_multifd(false); /* reset it anyway */
|
||||
qapi_event_send_migration(MIGRATION_STATUS_SETUP);
|
||||
if (strstart(uri, "tcp:", &p) ||
|
||||
strstart(uri, "unix:", NULL) ||
|
||||
strstart(uri, "vsock:", NULL)) {
|
||||
migrate_protocol_allow_multifd(true);
|
||||
socket_start_incoming_migration(p ? p : uri, errp);
|
||||
#ifdef CONFIG_RDMA
|
||||
} else if (strstart(uri, "rdma:", &p)) {
|
||||
|
@ -1235,6 +1237,14 @@ static bool migrate_caps_check(bool *cap_list,
|
|||
}
|
||||
}
|
||||
|
||||
/* incoming side only */
|
||||
if (runstate_check(RUN_STATE_INMIGRATE) &&
|
||||
!migrate_multifd_is_allowed() &&
|
||||
cap_list[MIGRATION_CAPABILITY_MULTIFD]) {
|
||||
error_setg(errp, "multifd is not supported by current protocol");
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -2280,9 +2290,11 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk,
|
|||
}
|
||||
}
|
||||
|
||||
migrate_protocol_allow_multifd(false);
|
||||
if (strstart(uri, "tcp:", &p) ||
|
||||
strstart(uri, "unix:", NULL) ||
|
||||
strstart(uri, "vsock:", NULL)) {
|
||||
migrate_protocol_allow_multifd(true);
|
||||
socket_start_outgoing_migration(s, p ? p : uri, &local_err);
|
||||
#ifdef CONFIG_RDMA
|
||||
} else if (strstart(uri, "rdma:", &p)) {
|
||||
|
|
|
@ -531,7 +531,7 @@ void multifd_save_cleanup(void)
|
|||
{
|
||||
int i;
|
||||
|
||||
if (!migrate_use_multifd()) {
|
||||
if (!migrate_use_multifd() || !migrate_multifd_is_allowed()) {
|
||||
return;
|
||||
}
|
||||
multifd_send_terminate_threads(NULL);
|
||||
|
@ -546,6 +546,9 @@ void multifd_save_cleanup(void)
|
|||
MultiFDSendParams *p = &multifd_send_state->params[i];
|
||||
Error *local_err = NULL;
|
||||
|
||||
if (p->registered_yank) {
|
||||
migration_ioc_unregister_yank(p->c);
|
||||
}
|
||||
socket_send_channel_destroy(p->c);
|
||||
p->c = NULL;
|
||||
qemu_mutex_destroy(&p->mutex);
|
||||
|
@ -813,7 +816,8 @@ static bool multifd_channel_connect(MultiFDSendParams *p,
|
|||
return false;
|
||||
}
|
||||
} else {
|
||||
/* update for tls qio channel */
|
||||
migration_ioc_register_yank(ioc);
|
||||
p->registered_yank = true;
|
||||
p->c = ioc;
|
||||
qemu_thread_create(&p->thread, p->name, multifd_send_thread, p,
|
||||
QEMU_THREAD_JOINABLE);
|
||||
|
@ -864,6 +868,17 @@ cleanup:
|
|||
multifd_new_send_channel_cleanup(p, sioc, local_err);
|
||||
}
|
||||
|
||||
static bool migrate_allow_multifd = true;
|
||||
void migrate_protocol_allow_multifd(bool allow)
|
||||
{
|
||||
migrate_allow_multifd = allow;
|
||||
}
|
||||
|
||||
bool migrate_multifd_is_allowed(void)
|
||||
{
|
||||
return migrate_allow_multifd;
|
||||
}
|
||||
|
||||
int multifd_save_setup(Error **errp)
|
||||
{
|
||||
int thread_count;
|
||||
|
@ -874,6 +889,11 @@ int multifd_save_setup(Error **errp)
|
|||
if (!migrate_use_multifd()) {
|
||||
return 0;
|
||||
}
|
||||
if (!migrate_multifd_is_allowed()) {
|
||||
error_setg(errp, "multifd is not supported by current protocol");
|
||||
return -1;
|
||||
}
|
||||
|
||||
s = migrate_get_current();
|
||||
thread_count = migrate_multifd_channels();
|
||||
multifd_send_state = g_malloc0(sizeof(*multifd_send_state));
|
||||
|
@ -967,7 +987,7 @@ int multifd_load_cleanup(Error **errp)
|
|||
{
|
||||
int i;
|
||||
|
||||
if (!migrate_use_multifd()) {
|
||||
if (!migrate_use_multifd() || !migrate_multifd_is_allowed()) {
|
||||
return 0;
|
||||
}
|
||||
multifd_recv_terminate_threads(NULL);
|
||||
|
@ -987,10 +1007,7 @@ int multifd_load_cleanup(Error **errp)
|
|||
for (i = 0; i < migrate_multifd_channels(); i++) {
|
||||
MultiFDRecvParams *p = &multifd_recv_state->params[i];
|
||||
|
||||
if (OBJECT(p->c)->ref == 1) {
|
||||
migration_ioc_unregister_yank(p->c);
|
||||
}
|
||||
|
||||
migration_ioc_unregister_yank(p->c);
|
||||
object_unref(OBJECT(p->c));
|
||||
p->c = NULL;
|
||||
qemu_mutex_destroy(&p->mutex);
|
||||
|
@ -1119,6 +1136,10 @@ int multifd_load_setup(Error **errp)
|
|||
if (!migrate_use_multifd()) {
|
||||
return 0;
|
||||
}
|
||||
if (!migrate_multifd_is_allowed()) {
|
||||
error_setg(errp, "multifd is not supported by current protocol");
|
||||
return -1;
|
||||
}
|
||||
thread_count = migrate_multifd_channels();
|
||||
multifd_recv_state = g_malloc0(sizeof(*multifd_recv_state));
|
||||
multifd_recv_state->params = g_new0(MultiFDRecvParams, thread_count);
|
||||
|
|
|
@ -13,6 +13,8 @@
|
|||
#ifndef QEMU_MIGRATION_MULTIFD_H
|
||||
#define QEMU_MIGRATION_MULTIFD_H
|
||||
|
||||
bool migrate_multifd_is_allowed(void);
|
||||
void migrate_protocol_allow_multifd(bool allow);
|
||||
int multifd_save_setup(Error **errp);
|
||||
void multifd_save_cleanup(void);
|
||||
int multifd_load_setup(Error **errp);
|
||||
|
@ -85,6 +87,8 @@ typedef struct {
|
|||
bool running;
|
||||
/* should this thread finish */
|
||||
bool quit;
|
||||
/* is the yank function registered */
|
||||
bool registered_yank;
|
||||
/* thread has work to do */
|
||||
int pending_job;
|
||||
/* array of pages to sent */
|
||||
|
|
|
@ -789,8 +789,7 @@ unsigned long migration_bitmap_find_dirty(RAMState *rs, RAMBlock *rb,
|
|||
return find_next_bit(bitmap, size, start);
|
||||
}
|
||||
|
||||
static void migration_clear_memory_region_dirty_bitmap(RAMState *rs,
|
||||
RAMBlock *rb,
|
||||
static void migration_clear_memory_region_dirty_bitmap(RAMBlock *rb,
|
||||
unsigned long page)
|
||||
{
|
||||
uint8_t shift;
|
||||
|
@ -818,8 +817,7 @@ static void migration_clear_memory_region_dirty_bitmap(RAMState *rs,
|
|||
}
|
||||
|
||||
static void
|
||||
migration_clear_memory_region_dirty_bitmap_range(RAMState *rs,
|
||||
RAMBlock *rb,
|
||||
migration_clear_memory_region_dirty_bitmap_range(RAMBlock *rb,
|
||||
unsigned long start,
|
||||
unsigned long npages)
|
||||
{
|
||||
|
@ -832,7 +830,7 @@ migration_clear_memory_region_dirty_bitmap_range(RAMState *rs,
|
|||
* exclusive.
|
||||
*/
|
||||
for (i = chunk_start; i < chunk_end; i += chunk_pages) {
|
||||
migration_clear_memory_region_dirty_bitmap(rs, rb, i);
|
||||
migration_clear_memory_region_dirty_bitmap(rb, i);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -850,7 +848,7 @@ static inline bool migration_bitmap_clear_dirty(RAMState *rs,
|
|||
* the page in the chunk we clear the remote dirty bitmap for all.
|
||||
* Clearing it earlier won't be a problem, but too late will.
|
||||
*/
|
||||
migration_clear_memory_region_dirty_bitmap(rs, rb, page);
|
||||
migration_clear_memory_region_dirty_bitmap(rb, page);
|
||||
|
||||
ret = test_and_clear_bit(page, rb->bmap);
|
||||
if (ret) {
|
||||
|
@ -2777,8 +2775,7 @@ void qemu_guest_free_page_hint(void *addr, size_t len)
|
|||
* are initially set. Otherwise those skipped pages will be sent in
|
||||
* the next round after syncing from the memory region bitmap.
|
||||
*/
|
||||
migration_clear_memory_region_dirty_bitmap_range(ram_state, block,
|
||||
start, npages);
|
||||
migration_clear_memory_region_dirty_bitmap_range(block, start, npages);
|
||||
ram_state->migration_dirty_pages -=
|
||||
bitmap_count_one_with_offset(block->bmap, start, npages);
|
||||
bitmap_clear(block->bmap, start, npages);
|
||||
|
|
113
migration/rdma.c
113
migration/rdma.c
|
@ -1117,19 +1117,82 @@ static int qemu_rdma_alloc_qp(RDMAContext *rdma)
|
|||
return 0;
|
||||
}
|
||||
|
||||
/* Check whether On-Demand Paging is supported by RDAM device */
|
||||
static bool rdma_support_odp(struct ibv_context *dev)
|
||||
{
|
||||
struct ibv_device_attr_ex attr = {0};
|
||||
int ret = ibv_query_device_ex(dev, NULL, &attr);
|
||||
if (ret) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (attr.odp_caps.general_caps & IBV_ODP_SUPPORT) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* ibv_advise_mr to avoid RNR NAK error as far as possible.
|
||||
* The responder mr registering with ODP will sent RNR NAK back to
|
||||
* the requester in the face of the page fault.
|
||||
*/
|
||||
static void qemu_rdma_advise_prefetch_mr(struct ibv_pd *pd, uint64_t addr,
|
||||
uint32_t len, uint32_t lkey,
|
||||
const char *name, bool wr)
|
||||
{
|
||||
#ifdef HAVE_IBV_ADVISE_MR
|
||||
int ret;
|
||||
int advice = wr ? IBV_ADVISE_MR_ADVICE_PREFETCH_WRITE :
|
||||
IBV_ADVISE_MR_ADVICE_PREFETCH;
|
||||
struct ibv_sge sg_list = {.lkey = lkey, .addr = addr, .length = len};
|
||||
|
||||
ret = ibv_advise_mr(pd, advice,
|
||||
IBV_ADVISE_MR_FLAG_FLUSH, &sg_list, 1);
|
||||
/* ignore the error */
|
||||
if (ret) {
|
||||
trace_qemu_rdma_advise_mr(name, len, addr, strerror(errno));
|
||||
} else {
|
||||
trace_qemu_rdma_advise_mr(name, len, addr, "successed");
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static int qemu_rdma_reg_whole_ram_blocks(RDMAContext *rdma)
|
||||
{
|
||||
int i;
|
||||
RDMALocalBlocks *local = &rdma->local_ram_blocks;
|
||||
|
||||
for (i = 0; i < local->nb_blocks; i++) {
|
||||
int access = IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE;
|
||||
|
||||
local->block[i].mr =
|
||||
ibv_reg_mr(rdma->pd,
|
||||
local->block[i].local_host_addr,
|
||||
local->block[i].length,
|
||||
IBV_ACCESS_LOCAL_WRITE |
|
||||
IBV_ACCESS_REMOTE_WRITE
|
||||
local->block[i].length, access
|
||||
);
|
||||
|
||||
if (!local->block[i].mr &&
|
||||
errno == ENOTSUP && rdma_support_odp(rdma->verbs)) {
|
||||
access |= IBV_ACCESS_ON_DEMAND;
|
||||
/* register ODP mr */
|
||||
local->block[i].mr =
|
||||
ibv_reg_mr(rdma->pd,
|
||||
local->block[i].local_host_addr,
|
||||
local->block[i].length, access);
|
||||
trace_qemu_rdma_register_odp_mr(local->block[i].block_name);
|
||||
|
||||
if (local->block[i].mr) {
|
||||
qemu_rdma_advise_prefetch_mr(rdma->pd,
|
||||
(uintptr_t)local->block[i].local_host_addr,
|
||||
local->block[i].length,
|
||||
local->block[i].mr->lkey,
|
||||
local->block[i].block_name,
|
||||
true);
|
||||
}
|
||||
}
|
||||
|
||||
if (!local->block[i].mr) {
|
||||
perror("Failed to register local dest ram block!");
|
||||
break;
|
||||
|
@ -1215,28 +1278,40 @@ static int qemu_rdma_register_and_get_keys(RDMAContext *rdma,
|
|||
*/
|
||||
if (!block->pmr[chunk]) {
|
||||
uint64_t len = chunk_end - chunk_start;
|
||||
int access = rkey ? IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE :
|
||||
0;
|
||||
|
||||
trace_qemu_rdma_register_and_get_keys(len, chunk_start);
|
||||
|
||||
block->pmr[chunk] = ibv_reg_mr(rdma->pd,
|
||||
chunk_start, len,
|
||||
(rkey ? (IBV_ACCESS_LOCAL_WRITE |
|
||||
IBV_ACCESS_REMOTE_WRITE) : 0));
|
||||
block->pmr[chunk] = ibv_reg_mr(rdma->pd, chunk_start, len, access);
|
||||
if (!block->pmr[chunk] &&
|
||||
errno == ENOTSUP && rdma_support_odp(rdma->verbs)) {
|
||||
access |= IBV_ACCESS_ON_DEMAND;
|
||||
/* register ODP mr */
|
||||
block->pmr[chunk] = ibv_reg_mr(rdma->pd, chunk_start, len, access);
|
||||
trace_qemu_rdma_register_odp_mr(block->block_name);
|
||||
|
||||
if (!block->pmr[chunk]) {
|
||||
perror("Failed to register chunk!");
|
||||
fprintf(stderr, "Chunk details: block: %d chunk index %d"
|
||||
" start %" PRIuPTR " end %" PRIuPTR
|
||||
" host %" PRIuPTR
|
||||
" local %" PRIuPTR " registrations: %d\n",
|
||||
block->index, chunk, (uintptr_t)chunk_start,
|
||||
(uintptr_t)chunk_end, host_addr,
|
||||
(uintptr_t)block->local_host_addr,
|
||||
rdma->total_registrations);
|
||||
return -1;
|
||||
if (block->pmr[chunk]) {
|
||||
qemu_rdma_advise_prefetch_mr(rdma->pd, (uintptr_t)chunk_start,
|
||||
len, block->pmr[chunk]->lkey,
|
||||
block->block_name, rkey);
|
||||
|
||||
}
|
||||
}
|
||||
rdma->total_registrations++;
|
||||
}
|
||||
if (!block->pmr[chunk]) {
|
||||
perror("Failed to register chunk!");
|
||||
fprintf(stderr, "Chunk details: block: %d chunk index %d"
|
||||
" start %" PRIuPTR " end %" PRIuPTR
|
||||
" host %" PRIuPTR
|
||||
" local %" PRIuPTR " registrations: %d\n",
|
||||
block->index, chunk, (uintptr_t)chunk_start,
|
||||
(uintptr_t)chunk_end, host_addr,
|
||||
(uintptr_t)block->local_host_addr,
|
||||
rdma->total_registrations);
|
||||
return -1;
|
||||
}
|
||||
rdma->total_registrations++;
|
||||
|
||||
if (lkey) {
|
||||
*lkey = block->pmr[chunk]->lkey;
|
||||
|
|
|
@ -212,6 +212,8 @@ qemu_rdma_poll_write(const char *compstr, int64_t comp, int left, uint64_t block
|
|||
qemu_rdma_poll_other(const char *compstr, int64_t comp, int left) "other completion %s (%" PRId64 ") received left %d"
|
||||
qemu_rdma_post_send_control(const char *desc) "CONTROL: sending %s.."
|
||||
qemu_rdma_register_and_get_keys(uint64_t len, void *start) "Registering %" PRIu64 " bytes @ %p"
|
||||
qemu_rdma_register_odp_mr(const char *name) "Try to register On-Demand Paging memory region: %s"
|
||||
qemu_rdma_advise_mr(const char *name, uint32_t len, uint64_t addr, const char *res) "Try to advise block %s prefetch at %" PRIu32 "@0x%" PRIx64 ": %s"
|
||||
qemu_rdma_registration_handle_compress(int64_t length, int index, int64_t offset) "Zapping zero chunk: %" PRId64 " bytes, index %d, offset %" PRId64
|
||||
qemu_rdma_registration_handle_finished(void) ""
|
||||
qemu_rdma_registration_handle_ram_blocks(void) ""
|
||||
|
|
Loading…
Reference in New Issue