Migration PULL request (take 3)

Hi
 
 Drop everything that is not a bug fix:
 - fixes by peter
 - fix comment on block creation (me)
 - fix return values from qio_channel_block()
 
 Please, apply.
 
 (take 1)
 It includes:
 - Leonardo fix for zero_copy flush
 - Fiona fix for return value of readv/writev
 - Peter Xu cleanups
 - Peter Xu preempt patches
 - Patches ready from zero page (me)
 - AVX2 support (ling)
 - fix for slow networking and reordering of first packets (manish)
 
 Please, apply.
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCAAdFiEEGJn/jt6/WMzuA0uC9IfvGFhy1yMFAmN7dhUACgkQ9IfvGFhy
 1yN0GhAAmpBGFomPXqOhixXcZdCOpFvLVKU13O+okp2NgY9W5Qlicf6ANo0cbvUh
 VVLCnXToySbP+7TLLqZjT4mVgM6EUIk1xqUXXICJ1mXIznvMnMtnseMNX033E2RL
 mhIVx+2AsoClWR9AdQVrzvjwR/gmzEa915w1HnHVfLFSPWmIfd9iWvOEenf5SYY5
 R7yAq0tWohOAtPiyrFAchcyTidW7pB2ZqD85ZEuGQ6EBpPxHM2NZ46NuK52j02k3
 eKGrKBFAh4QTRf5+QT0ASAGUqxPYM3iT/WOw3FZkZDQoedcReeECgDh1gfdd27iH
 Rebn+UHThgofBAspFVrJs9rSVlOnDdDp7yY1YDC6s6285Dci9JyWe0raIyvfdBK7
 h+AtBFLZVkIR0LXu4NlVe4IHnO5t/XVsLPwZ+7SQ9fc3gezAn4kAiEf+m8umTgho
 n3Jo+2dl52QoMOW2OsX9199g0lorQAby6bJVG4xbq82ijE9N1NHuLe44w9OGZTKg
 697cNPDaoSRrvAdCPPh5KaZXsxpfLPxoMlZWxCTsNvs/jCzGs7AnvbU0QHlB+skU
 R2Ae42QBq6ZSogtN8tNZFPH82Z6xTOJNILtmMgEQGAjLf3yOd8T5gZLsYNujTOyJ
 ZsahXU0yRTkGmCkzCyr//mGu4KEPWtDOq27QqQPFfayvhr16ECw=
 =dosb
 -----END PGP SIGNATURE-----

Merge tag 'next-pull-request' of https://gitlab.com/juan.quintela/qemu into staging

Migration PULL request (take 3)

Hi

Drop everything that is not a bug fix:
- fixes by peter
- fix comment on block creation (me)
- fix return values from qio_channel_block()

Please, apply.

(take 1)
It includes:
- Leonardo fix for zero_copy flush
- Fiona fix for return value of readv/writev
- Peter Xu cleanups
- Peter Xu preempt patches
- Patches ready from zero page (me)
- AVX2 support (ling)
- fix for slow networking and reordering of first packets (manish)

Please, apply.

# -----BEGIN PGP SIGNATURE-----
#
# iQIzBAABCAAdFiEEGJn/jt6/WMzuA0uC9IfvGFhy1yMFAmN7dhUACgkQ9IfvGFhy
# 1yN0GhAAmpBGFomPXqOhixXcZdCOpFvLVKU13O+okp2NgY9W5Qlicf6ANo0cbvUh
# VVLCnXToySbP+7TLLqZjT4mVgM6EUIk1xqUXXICJ1mXIznvMnMtnseMNX033E2RL
# mhIVx+2AsoClWR9AdQVrzvjwR/gmzEa915w1HnHVfLFSPWmIfd9iWvOEenf5SYY5
# R7yAq0tWohOAtPiyrFAchcyTidW7pB2ZqD85ZEuGQ6EBpPxHM2NZ46NuK52j02k3
# eKGrKBFAh4QTRf5+QT0ASAGUqxPYM3iT/WOw3FZkZDQoedcReeECgDh1gfdd27iH
# Rebn+UHThgofBAspFVrJs9rSVlOnDdDp7yY1YDC6s6285Dci9JyWe0raIyvfdBK7
# h+AtBFLZVkIR0LXu4NlVe4IHnO5t/XVsLPwZ+7SQ9fc3gezAn4kAiEf+m8umTgho
# n3Jo+2dl52QoMOW2OsX9199g0lorQAby6bJVG4xbq82ijE9N1NHuLe44w9OGZTKg
# 697cNPDaoSRrvAdCPPh5KaZXsxpfLPxoMlZWxCTsNvs/jCzGs7AnvbU0QHlB+skU
# R2Ae42QBq6ZSogtN8tNZFPH82Z6xTOJNILtmMgEQGAjLf3yOd8T5gZLsYNujTOyJ
# ZsahXU0yRTkGmCkzCyr//mGu4KEPWtDOq27QqQPFfayvhr16ECw=
# =dosb
# -----END PGP SIGNATURE-----
# gpg: Signature made Mon 21 Nov 2022 07:59:01 EST
# gpg:                using RSA key 1899FF8EDEBF58CCEE034B82F487EF185872D723
# gpg: Good signature from "Juan Quintela <quintela@redhat.com>" [full]
# gpg:                 aka "Juan Quintela <quintela@trasno.org>" [full]
# Primary key fingerprint: 1899 FF8E DEBF 58CC EE03  4B82 F487 EF18 5872 D723

* tag 'next-pull-request' of https://gitlab.com/juan.quintela/qemu:
  migration: Block migration comment or code is wrong
  migration: Disable multifd explicitly with compression
  migration: Use non-atomic ops for clear log bitmap
  migration: Disallow postcopy preempt to be used with compress
  migration: Fix race on qemu_file_shutdown()
  migration: Fix possible infinite loop of ram save process
  migration/multifd/zero-copy: Create helper function for flushing
  migration/channel-block: fix return value for qio_channel_block_{readv,writev}

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
This commit is contained in:
Stefan Hajnoczi 2022-11-21 09:26:18 -05:00
commit af29446f32
10 changed files with 139 additions and 33 deletions

View File

@ -42,7 +42,8 @@ static inline long clear_bmap_size(uint64_t pages, uint8_t shift)
}
/**
* clear_bmap_set: set clear bitmap for the page range
* clear_bmap_set: set clear bitmap for the page range. Must be with
* bitmap_mutex held.
*
* @rb: the ramblock to operate on
* @start: the start page number
@ -55,12 +56,12 @@ static inline void clear_bmap_set(RAMBlock *rb, uint64_t start,
{
uint8_t shift = rb->clear_bmap_shift;
bitmap_set_atomic(rb->clear_bmap, start >> shift,
clear_bmap_size(npages, shift));
bitmap_set(rb->clear_bmap, start >> shift, clear_bmap_size(npages, shift));
}
/**
* clear_bmap_test_and_clear: test clear bitmap for the page, clear if set
* clear_bmap_test_and_clear: test clear bitmap for the page, clear if set.
* Must be with bitmap_mutex held.
*
* @rb: the ramblock to operate on
* @page: the page number to check
@ -71,7 +72,7 @@ static inline bool clear_bmap_test_and_clear(RAMBlock *rb, uint64_t page)
{
uint8_t shift = rb->clear_bmap_shift;
return bitmap_test_and_clear_atomic(rb->clear_bmap, page >> shift, 1);
return bitmap_test_and_clear(rb->clear_bmap, page >> shift, 1);
}
static inline bool offset_in_ramblock(RAMBlock *b, ram_addr_t offset)

View File

@ -53,6 +53,9 @@ struct RAMBlock {
* and split clearing of dirty bitmap on the remote node (e.g.,
* KVM). The bitmap will be set only when doing global sync.
*
* It is only used during src side of ram migration, and it is
* protected by the global ram_state.bitmap_mutex.
*
* NOTE: this bitmap is different comparing to the other bitmaps
* in that one bit can represent multiple guest pages (which is
* decided by the `clear_bmap_shift' variable below). On

View File

@ -253,6 +253,7 @@ void bitmap_set(unsigned long *map, long i, long len);
void bitmap_set_atomic(unsigned long *map, long i, long len);
void bitmap_clear(unsigned long *map, long start, long nr);
bool bitmap_test_and_clear_atomic(unsigned long *map, long start, long nr);
bool bitmap_test_and_clear(unsigned long *map, long start, long nr);
void bitmap_copy_and_clear_atomic(unsigned long *dst, unsigned long *src,
long nr);
unsigned long bitmap_find_next_zero_area(unsigned long *map,

View File

@ -880,8 +880,8 @@ static void block_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
blk_mig_unlock();
/* Report at least one block pending during bulk phase */
if (pending <= max_size && !block_mig_state.bulk_completed) {
pending = max_size + BLK_MIG_BLOCK_SIZE;
if (!pending && !block_mig_state.bulk_completed) {
pending = BLK_MIG_BLOCK_SIZE;
}
trace_migration_block_save_pending(pending);

View File

@ -62,7 +62,8 @@ qio_channel_block_readv(QIOChannel *ioc,
qemu_iovec_init_external(&qiov, (struct iovec *)iov, niov);
ret = bdrv_readv_vmstate(bioc->bs, &qiov, bioc->offset);
if (ret < 0) {
return ret;
error_setg_errno(errp, -ret, "bdrv_readv_vmstate failed");
return -1;
}
bioc->offset += qiov.size;
@ -86,7 +87,8 @@ qio_channel_block_writev(QIOChannel *ioc,
qemu_iovec_init_external(&qiov, (struct iovec *)iov, niov);
ret = bdrv_writev_vmstate(bioc->bs, &qiov, bioc->offset);
if (ret < 0) {
return ret;
error_setg_errno(errp, -ret, "bdrv_writev_vmstate failed");
return -1;
}
bioc->offset += qiov.size;

View File

@ -1337,6 +1337,24 @@ static bool migrate_caps_check(bool *cap_list,
error_setg(errp, "Postcopy preempt requires postcopy-ram");
return false;
}
/*
* Preempt mode requires urgent pages to be sent in separate
* channel, OTOH compression logic will disorder all pages into
* different compression channels, which is not compatible with the
* preempt assumptions on channel assignments.
*/
if (cap_list[MIGRATION_CAPABILITY_COMPRESS]) {
error_setg(errp, "Postcopy preempt not compatible with compress");
return false;
}
}
if (cap_list[MIGRATION_CAPABILITY_MULTIFD]) {
if (cap_list[MIGRATION_CAPABILITY_COMPRESS]) {
error_setg(errp, "Multifd is not compatible with compress");
return false;
}
}
return true;

View File

@ -566,6 +566,23 @@ void multifd_save_cleanup(void)
multifd_send_state = NULL;
}
static int multifd_zero_copy_flush(QIOChannel *c)
{
int ret;
Error *err = NULL;
ret = qio_channel_flush(c, &err);
if (ret < 0) {
error_report_err(err);
return -1;
}
if (ret == 1) {
dirty_sync_missed_zero_copy();
}
return ret;
}
int multifd_send_sync_main(QEMUFile *f)
{
int i;
@ -616,17 +633,8 @@ int multifd_send_sync_main(QEMUFile *f)
qemu_mutex_unlock(&p->mutex);
qemu_sem_post(&p->sem);
if (flush_zero_copy && p->c) {
int ret;
Error *err = NULL;
ret = qio_channel_flush(p->c, &err);
if (ret < 0) {
error_report_err(err);
if (flush_zero_copy && p->c && (multifd_zero_copy_flush(p->c) < 0)) {
return -1;
} else if (ret == 1) {
dirty_sync_missed_zero_copy();
}
}
}
for (i = 0; i < migrate_multifd_channels(); i++) {

View File

@ -79,6 +79,30 @@ int qemu_file_shutdown(QEMUFile *f)
int ret = 0;
f->shutdown = true;
/*
* We must set qemufile error before the real shutdown(), otherwise
* there can be a race window where we thought IO all went though
* (because last_error==NULL) but actually IO has already stopped.
*
* If without correct ordering, the race can happen like this:
*
* page receiver other thread
* ------------- ------------
* qemu_get_buffer()
* do shutdown()
* returns 0 (buffer all zero)
* (we didn't check this retcode)
* try to detect IO error
* last_error==NULL, IO okay
* install ALL-ZERO page
* set last_error
* --> guest crash!
*/
if (!f->last_error) {
qemu_file_set_error(f, -EIO);
}
if (!qio_channel_has_feature(f->ioc,
QIO_CHANNEL_FEATURE_SHUTDOWN)) {
return -ENOSYS;
@ -88,9 +112,6 @@ int qemu_file_shutdown(QEMUFile *f)
ret = -EIO;
}
if (!f->last_error) {
qemu_file_set_error(f, -EIO);
}
return ret;
}

View File

@ -2305,13 +2305,12 @@ static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss)
}
/*
* Do not use multifd for:
* 1. Compression as the first page in the new block should be posted out
* before sending the compressed page
* 2. In postcopy as one whole host page should be placed
* Do not use multifd in postcopy as one whole host page should be
* placed. Meanwhile postcopy requires atomic update of pages, so even
* if host page size == guest page size the dest guest during run may
* still see partially copied pages which is data corruption.
*/
if (!save_page_use_compression(rs) && migrate_use_multifd()
&& !migration_in_postcopy()) {
if (migrate_use_multifd() && !migration_in_postcopy()) {
return ram_save_multifd_page(rs, block, offset);
}
@ -2546,14 +2545,22 @@ static int ram_find_and_save_block(RAMState *rs)
return pages;
}
/*
* Always keep last_seen_block/last_page valid during this procedure,
* because find_dirty_block() relies on these values (e.g., we compare
* last_seen_block with pss.block to see whether we searched all the
* ramblocks) to detect the completion of migration. Having NULL value
* of last_seen_block can conditionally cause below loop to run forever.
*/
if (!rs->last_seen_block) {
rs->last_seen_block = QLIST_FIRST_RCU(&ram_list.blocks);
rs->last_page = 0;
}
pss.block = rs->last_seen_block;
pss.page = rs->last_page;
pss.complete_round = false;
if (!pss.block) {
pss.block = QLIST_FIRST_RCU(&ram_list.blocks);
}
do {
again = true;
found = get_queued_page(rs, &pss);

View File

@ -240,6 +240,51 @@ void bitmap_clear(unsigned long *map, long start, long nr)
}
}
bool bitmap_test_and_clear(unsigned long *map, long start, long nr)
{
unsigned long *p = map + BIT_WORD(start);
const long size = start + nr;
int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG);
unsigned long mask_to_clear = BITMAP_FIRST_WORD_MASK(start);
bool dirty = false;
assert(start >= 0 && nr >= 0);
/* First word */
if (nr - bits_to_clear > 0) {
if ((*p) & mask_to_clear) {
dirty = true;
}
*p &= ~mask_to_clear;
nr -= bits_to_clear;
bits_to_clear = BITS_PER_LONG;
p++;
}
/* Full words */
if (bits_to_clear == BITS_PER_LONG) {
while (nr >= BITS_PER_LONG) {
if (*p) {
dirty = true;
*p = 0;
}
nr -= BITS_PER_LONG;
p++;
}
}
/* Last word */
if (nr) {
mask_to_clear &= BITMAP_LAST_WORD_MASK(size);
if ((*p) & mask_to_clear) {
dirty = true;
}
*p &= ~mask_to_clear;
}
return dirty;
}
bool bitmap_test_and_clear_atomic(unsigned long *map, long start, long nr)
{
unsigned long *p = map + BIT_WORD(start);