migration/ram: Optimize ram_save_host_page()

Starting from pss->page, ram_save_host_page() will check every page
and send the dirty pages up to the end of the current host page or
the boundary of used_length of the block. If the host page size is
a huge page, the step "check" will take a lot of time.

It will improve performance to use migration_bitmap_find_dirty().

Tested on Kunpeng 920; VM parameters: 1U 4G (page size 1G)
The time of ram_save_host_page() in the last round of ram saving:
before optimize: 9250us		after optimize: 34us

Signed-off-by: Keqian Zhu <zhukeqian1@huawei.com>
Signed-off-by: Kunkun Jiang <jiangkunkun@huawei.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
Message-Id: <20210316125716.1243-3-jiangkunkun@huawei.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
This commit is contained in:
Kunkun Jiang 2021-03-16 20:57:16 +08:00 committed by Dr. David Alan Gilbert
parent 23feba906e
commit ba1b7c812c

View File

@ -2013,6 +2013,8 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss,
int tmppages, pages = 0;
size_t pagesize_bits =
qemu_ram_pagesize(pss->block) >> TARGET_PAGE_BITS;
unsigned long hostpage_boundary =
QEMU_ALIGN_UP(pss->page + 1, pagesize_bits);
unsigned long start_page = pss->page;
int res;
@ -2023,18 +2025,13 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss,
do {
/* Check the pages is dirty and if it is send it */
if (!migration_bitmap_clear_dirty(rs, pss->block, pss->page)) {
pss->page++;
continue;
}
if (migration_bitmap_clear_dirty(rs, pss->block, pss->page)) {
tmppages = ram_save_target_page(rs, pss, last_stage);
if (tmppages < 0) {
return tmppages;
}
pages += tmppages;
pss->page++;
/*
* Allow rate limiting to happen in the middle of huge pages if
* something is sent in the current iteration.
@ -2042,11 +2039,13 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss,
if (pagesize_bits > 1 && tmppages > 0) {
migration_rate_limit();
}
} while ((pss->page & (pagesize_bits - 1)) &&
}
pss->page = migration_bitmap_find_dirty(rs, pss->block, pss->page);
} while ((pss->page < hostpage_boundary) &&
offset_in_ramblock(pss->block,
((ram_addr_t)pss->page) << TARGET_PAGE_BITS));
/* The offset we leave with is the last one we looked at */
pss->page--;
/* The offset we leave with is the min boundary of host page and block */
pss->page = MIN(pss->page, hostpage_boundary) - 1;
res = ram_save_release_protection(rs, pss, start_page);
return (res < 0 ? res : pages);