kvm: Support KVM_CLEAR_DIRTY_LOG
Firstly detect the interface using KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 and mark it. When failed to enable the new feature we'll fall back to the old sync. Provide the log_clear() hook for the memory listeners for both address spaces of KVM (normal system memory, and SMM) and deliever the clear message to kernel. Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com> Signed-off-by: Peter Xu <peterx@redhat.com> Message-Id: <20190603065056.25211-11-peterx@redhat.com> Signed-off-by: Juan Quintela <quintela@redhat.com>
This commit is contained in:
parent
36adac4934
commit
ff4aa11419
@ -91,6 +91,7 @@ struct KVMState
|
||||
int many_ioeventfds;
|
||||
int intx_set_mask;
|
||||
bool sync_mmu;
|
||||
bool manual_dirty_log_protect;
|
||||
/* The man page (and posix) say ioctl numbers are signed int, but
|
||||
* they're not. Linux, glibc and *BSD all treat ioctl numbers as
|
||||
* unsigned, and treating them as signed here can break things */
|
||||
@ -560,6 +561,159 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Alignment requirement for KVM_CLEAR_DIRTY_LOG - 64 pages */
|
||||
#define KVM_CLEAR_LOG_SHIFT 6
|
||||
#define KVM_CLEAR_LOG_ALIGN (qemu_real_host_page_size << KVM_CLEAR_LOG_SHIFT)
|
||||
#define KVM_CLEAR_LOG_MASK (-KVM_CLEAR_LOG_ALIGN)
|
||||
|
||||
/**
|
||||
* kvm_physical_log_clear - Clear the kernel's dirty bitmap for range
|
||||
*
|
||||
* NOTE: this will be a no-op if we haven't enabled manual dirty log
|
||||
* protection in the host kernel because in that case this operation
|
||||
* will be done within log_sync().
|
||||
*
|
||||
* @kml: the kvm memory listener
|
||||
* @section: the memory range to clear dirty bitmap
|
||||
*/
|
||||
static int kvm_physical_log_clear(KVMMemoryListener *kml,
|
||||
MemoryRegionSection *section)
|
||||
{
|
||||
KVMState *s = kvm_state;
|
||||
struct kvm_clear_dirty_log d;
|
||||
uint64_t start, end, bmap_start, start_delta, bmap_npages, size;
|
||||
unsigned long *bmap_clear = NULL, psize = qemu_real_host_page_size;
|
||||
KVMSlot *mem = NULL;
|
||||
int ret, i;
|
||||
|
||||
if (!s->manual_dirty_log_protect) {
|
||||
/* No need to do explicit clear */
|
||||
return 0;
|
||||
}
|
||||
|
||||
start = section->offset_within_address_space;
|
||||
size = int128_get64(section->size);
|
||||
|
||||
if (!size) {
|
||||
/* Nothing more we can do... */
|
||||
return 0;
|
||||
}
|
||||
|
||||
kvm_slots_lock(kml);
|
||||
|
||||
/* Find any possible slot that covers the section */
|
||||
for (i = 0; i < s->nr_slots; i++) {
|
||||
mem = &kml->slots[i];
|
||||
if (mem->start_addr <= start &&
|
||||
start + size <= mem->start_addr + mem->memory_size) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* We should always find one memslot until this point, otherwise
|
||||
* there could be something wrong from the upper layer
|
||||
*/
|
||||
assert(mem && i != s->nr_slots);
|
||||
|
||||
/*
|
||||
* We need to extend either the start or the size or both to
|
||||
* satisfy the KVM interface requirement. Firstly, do the start
|
||||
* page alignment on 64 host pages
|
||||
*/
|
||||
bmap_start = (start - mem->start_addr) & KVM_CLEAR_LOG_MASK;
|
||||
start_delta = start - mem->start_addr - bmap_start;
|
||||
bmap_start /= psize;
|
||||
|
||||
/*
|
||||
* The kernel interface has restriction on the size too, that either:
|
||||
*
|
||||
* (1) the size is 64 host pages aligned (just like the start), or
|
||||
* (2) the size fills up until the end of the KVM memslot.
|
||||
*/
|
||||
bmap_npages = DIV_ROUND_UP(size + start_delta, KVM_CLEAR_LOG_ALIGN)
|
||||
<< KVM_CLEAR_LOG_SHIFT;
|
||||
end = mem->memory_size / psize;
|
||||
if (bmap_npages > end - bmap_start) {
|
||||
bmap_npages = end - bmap_start;
|
||||
}
|
||||
start_delta /= psize;
|
||||
|
||||
/*
|
||||
* Prepare the bitmap to clear dirty bits. Here we must guarantee
|
||||
* that we won't clear any unknown dirty bits otherwise we might
|
||||
* accidentally clear some set bits which are not yet synced from
|
||||
* the kernel into QEMU's bitmap, then we'll lose track of the
|
||||
* guest modifications upon those pages (which can directly lead
|
||||
* to guest data loss or panic after migration).
|
||||
*
|
||||
* Layout of the KVMSlot.dirty_bmap:
|
||||
*
|
||||
* |<-------- bmap_npages -----------..>|
|
||||
* [1]
|
||||
* start_delta size
|
||||
* |----------------|-------------|------------------|------------|
|
||||
* ^ ^ ^ ^
|
||||
* | | | |
|
||||
* start bmap_start (start) end
|
||||
* of memslot of memslot
|
||||
*
|
||||
* [1] bmap_npages can be aligned to either 64 pages or the end of slot
|
||||
*/
|
||||
|
||||
assert(bmap_start % BITS_PER_LONG == 0);
|
||||
/* We should never do log_clear before log_sync */
|
||||
assert(mem->dirty_bmap);
|
||||
if (start_delta) {
|
||||
/* Slow path - we need to manipulate a temp bitmap */
|
||||
bmap_clear = bitmap_new(bmap_npages);
|
||||
bitmap_copy_with_src_offset(bmap_clear, mem->dirty_bmap,
|
||||
bmap_start, start_delta + size / psize);
|
||||
/*
|
||||
* We need to fill the holes at start because that was not
|
||||
* specified by the caller and we extended the bitmap only for
|
||||
* 64 pages alignment
|
||||
*/
|
||||
bitmap_clear(bmap_clear, 0, start_delta);
|
||||
d.dirty_bitmap = bmap_clear;
|
||||
} else {
|
||||
/* Fast path - start address aligns well with BITS_PER_LONG */
|
||||
d.dirty_bitmap = mem->dirty_bmap + BIT_WORD(bmap_start);
|
||||
}
|
||||
|
||||
d.first_page = bmap_start;
|
||||
/* It should never overflow. If it happens, say something */
|
||||
assert(bmap_npages <= UINT32_MAX);
|
||||
d.num_pages = bmap_npages;
|
||||
d.slot = mem->slot | (kml->as_id << 16);
|
||||
|
||||
if (kvm_vm_ioctl(s, KVM_CLEAR_DIRTY_LOG, &d) == -1) {
|
||||
ret = -errno;
|
||||
error_report("%s: KVM_CLEAR_DIRTY_LOG failed, slot=%d, "
|
||||
"start=0x%"PRIx64", size=0x%"PRIx32", errno=%d",
|
||||
__func__, d.slot, (uint64_t)d.first_page,
|
||||
(uint32_t)d.num_pages, ret);
|
||||
} else {
|
||||
ret = 0;
|
||||
trace_kvm_clear_dirty_log(d.slot, d.first_page, d.num_pages);
|
||||
}
|
||||
|
||||
/*
|
||||
* After we have updated the remote dirty bitmap, we update the
|
||||
* cached bitmap as well for the memslot, then if another user
|
||||
* clears the same region we know we shouldn't clear it again on
|
||||
* the remote otherwise it's data loss as well.
|
||||
*/
|
||||
bitmap_clear(mem->dirty_bmap, bmap_start + start_delta,
|
||||
size / psize);
|
||||
/* This handles the NULL case well */
|
||||
g_free(bmap_clear);
|
||||
|
||||
kvm_slots_unlock(kml);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void kvm_coalesce_mmio_region(MemoryListener *listener,
|
||||
MemoryRegionSection *secion,
|
||||
hwaddr start, hwaddr size)
|
||||
@ -894,6 +1048,22 @@ static void kvm_log_sync(MemoryListener *listener,
|
||||
}
|
||||
}
|
||||
|
||||
static void kvm_log_clear(MemoryListener *listener,
|
||||
MemoryRegionSection *section)
|
||||
{
|
||||
KVMMemoryListener *kml = container_of(listener, KVMMemoryListener, listener);
|
||||
int r;
|
||||
|
||||
r = kvm_physical_log_clear(kml, section);
|
||||
if (r < 0) {
|
||||
error_report_once("%s: kvm log clear failed: mr=%s "
|
||||
"offset=%"HWADDR_PRIx" size=%"PRIx64, __func__,
|
||||
section->mr->name, section->offset_within_region,
|
||||
int128_get64(section->size));
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
static void kvm_mem_ioeventfd_add(MemoryListener *listener,
|
||||
MemoryRegionSection *section,
|
||||
bool match_data, uint64_t data,
|
||||
@ -985,6 +1155,7 @@ void kvm_memory_listener_register(KVMState *s, KVMMemoryListener *kml,
|
||||
kml->listener.log_start = kvm_log_start;
|
||||
kml->listener.log_stop = kvm_log_stop;
|
||||
kml->listener.log_sync = kvm_log_sync;
|
||||
kml->listener.log_clear = kvm_log_clear;
|
||||
kml->listener.priority = 10;
|
||||
|
||||
memory_listener_register(&kml->listener, as);
|
||||
@ -1709,6 +1880,17 @@ static int kvm_init(MachineState *ms)
|
||||
s->coalesced_pio = s->coalesced_mmio &&
|
||||
kvm_check_extension(s, KVM_CAP_COALESCED_PIO);
|
||||
|
||||
s->manual_dirty_log_protect =
|
||||
kvm_check_extension(s, KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2);
|
||||
if (s->manual_dirty_log_protect) {
|
||||
ret = kvm_vm_enable_cap(s, KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2, 0, 1);
|
||||
if (ret) {
|
||||
warn_report("Trying to enable KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 "
|
||||
"but failed. Falling back to the legacy mode. ");
|
||||
s->manual_dirty_log_protect = false;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef KVM_CAP_VCPU_EVENTS
|
||||
s->vcpu_events = kvm_check_extension(s, KVM_CAP_VCPU_EVENTS);
|
||||
#endif
|
||||
|
@ -15,4 +15,5 @@ kvm_irqchip_release_virq(int virq) "virq %d"
|
||||
kvm_set_ioeventfd_mmio(int fd, uint64_t addr, uint32_t val, bool assign, uint32_t size, bool datamatch) "fd: %d @0x%" PRIx64 " val=0x%x assign: %d size: %d match: %d"
|
||||
kvm_set_ioeventfd_pio(int fd, uint16_t addr, uint32_t val, bool assign, uint32_t size, bool datamatch) "fd: %d @0x%x val=0x%x assign: %d size: %d match: %d"
|
||||
kvm_set_user_memory(uint32_t slot, uint32_t flags, uint64_t guest_phys_addr, uint64_t memory_size, uint64_t userspace_addr, int ret) "Slot#%d flags=0x%x gpa=0x%"PRIx64 " size=0x%"PRIx64 " ua=0x%"PRIx64 " ret=%d"
|
||||
kvm_clear_dirty_log(uint32_t slot, uint64_t start, uint32_t size) "slot#%"PRId32" start 0x%"PRIx64" size 0x%"PRIx32
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user