Machine queue, 2021-07-07
Deprecation: * Deprecate pmem=on with non-DAX capable backend file (Igor Mammedov) Feature: * virtio-mem: vfio support (David Hildenbrand) Cleanup: * vmbus: Don't make QOM property registration conditional (Eduardo Habkost) -----BEGIN PGP SIGNATURE----- iQJIBAABCAAyFiEEWjIv1avE09usz9GqKAeTb5hNxaYFAmDnWBgUHGVoYWJrb3N0 QHJlZGhhdC5jb20ACgkQKAeTb5hNxaaH4A//SNp6wZ5VtK59KtnrVgGSeeqJFHC8 NyRDy9vQgYJHwBdvQ77IahAovjV2vSD7m1bl2R4ZGUkUNl0t34oq/BvW9olZHJ6R Z52L92kZ8KjfWq99DbMvz3n7maR4mvTLmDcksi459V2+nf+pn9iI9Bux+dRVy6as u85yK7rVmkwNKakYSBHsFQBzImkf7ufWvRVe200c5rm46z3t9jVxI7p/q49J8bgi OurdkxXHIOAjkVbiWjxIW9pL+uf81+UUPrn6v3Pw62su47Ra5edtHopdTVJb35rh YTLnJnnFqXTFn+s9RZWdR8okJZdWU3PA2opeT+pFXqPP11etL59l/j1zCWuVxYCa afbEaZiFLTS7vhy8aXpCVi2jI+3OBDvK2+UyS4zcUxs5T25eqTUqUKHhU0zNwK0s srBaRbl7Clj1keV6SYRSCh79NxjMskLE9bb36fY3XaUTVoQQ5+SvvErzsZmr4U4p /zGf+ilQhLCOgkDxpO0NEAtWV2UlQPhdFJDTMQHACC9GCQvU0meJhwi0UuAZ2QXj Yoo+yhcBnOfpbqKaX+Qoc7fKruRNNM7be130ESC3AqeC2NEPXenonnkBFbCYChvB elMYABjsKfYwf56n4pa9PKSidDS1ld0XImcqobobqpZ4Fd6rzyPocvz1Q63zPYkd presZ5ePekGcW+M= =NEaj -----END PGP SIGNATURE----- Merge remote-tracking branch 'remotes/ehabkost-gl/tags/machine-next-pull-request' into staging Machine queue, 2021-07-07 Deprecation: * Deprecate pmem=on with non-DAX capable backend file (Igor Mammedov) Feature: * virtio-mem: vfio support (David Hildenbrand) Cleanup: * vmbus: Don't make QOM property registration conditional (Eduardo Habkost) # gpg: Signature made Thu 08 Jul 2021 20:55:04 BST # gpg: using RSA key 5A322FD5ABC4D3DBACCFD1AA2807936F984DC5A6 # gpg: issuer "ehabkost@redhat.com" # gpg: Good signature from "Eduardo Habkost <ehabkost@redhat.com>" [full] # Primary key fingerprint: 5A32 2FD5 ABC4 D3DB ACCF D1AA 2807 936F 984D C5A6 * remotes/ehabkost-gl/tags/machine-next-pull-request: vfio: Disable only uncoordinated discards for VFIO_TYPE1 iommus virtio-mem: Require only coordinated discards softmmu/physmem: Extend ram_block_discard_(require|disable) by two discard types softmmu/physmem: Don't use atomic operations in ram_block_discard_(disable|require) vfio: Support for RamDiscardManager in the vIOMMU case vfio: Sanity check maximum number of DMA mappings with RamDiscardManager vfio: Query and store the maximum number of possible DMA mappings vfio: Support for RamDiscardManager in the !vIOMMU case virtio-mem: Implement RamDiscardManager interface virtio-mem: Don't report errors when ram_block_discard_range() fails virtio-mem: Factor out traversing unplugged ranges memory: Helpers to copy/free a MemoryRegionSection memory: Introduce RamDiscardManager for RAM memory regions Deprecate pmem=on with non-DAX capable backend file vmbus: Don't make QOM property registration conditional Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
commit
ebd1f71002
@ -221,6 +221,24 @@ This machine is deprecated because we have enough AST2500 based OpenPOWER
|
||||
machines. It can be easily replaced by the ``witherspoon-bmc`` or the
|
||||
``romulus-bmc`` machines.
|
||||
|
||||
Backend options
|
||||
---------------
|
||||
|
||||
Using non-persistent backing file with pmem=on (since 6.1)
|
||||
''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
|
||||
|
||||
This option is used when ``memory-backend-file`` is consumed by emulated NVDIMM
|
||||
device. However enabling ``memory-backend-file.pmem`` option, when backing file
|
||||
is (a) not DAX capable or (b) not on a filesystem that support direct mapping
|
||||
of persistent memory, is not safe and may lead to data loss or corruption in case
|
||||
of host crash.
|
||||
Options are:
|
||||
|
||||
- modify VM configuration to set ``pmem=off`` to continue using fake NVDIMM
|
||||
(without persistence guaranties) with backing file on non DAX storage
|
||||
- move backing file to NVDIMM storage and keep ``pmem=on``
|
||||
(to have NVDIMM with persistence guaranties).
|
||||
|
||||
Device options
|
||||
--------------
|
||||
|
||||
|
@ -2372,6 +2372,14 @@ static void vmbus_dev_realize(DeviceState *dev, Error **errp)
|
||||
|
||||
assert(!qemu_uuid_is_null(&vdev->instanceid));
|
||||
|
||||
if (!qemu_uuid_is_null(&vdc->instanceid)) {
|
||||
/* Class wants to only have a single instance with a fixed UUID */
|
||||
if (!qemu_uuid_is_equal(&vdev->instanceid, &vdc->instanceid)) {
|
||||
error_setg(&err, "instance id can't be changed");
|
||||
goto error_out;
|
||||
}
|
||||
}
|
||||
|
||||
/* Check for instance id collision for this class id */
|
||||
QTAILQ_FOREACH(child, &BUS(vmbus)->children, sibling) {
|
||||
VMBusDevice *child_dev = VMBUS_DEVICE(child->child);
|
||||
@ -2438,18 +2446,22 @@ static void vmbus_dev_unrealize(DeviceState *dev)
|
||||
free_channels(vdev);
|
||||
}
|
||||
|
||||
static Property vmbus_dev_props[] = {
|
||||
DEFINE_PROP_UUID("instanceid", VMBusDevice, instanceid),
|
||||
DEFINE_PROP_END_OF_LIST()
|
||||
};
|
||||
|
||||
|
||||
static void vmbus_dev_class_init(ObjectClass *klass, void *data)
|
||||
{
|
||||
DeviceClass *kdev = DEVICE_CLASS(klass);
|
||||
device_class_set_props(kdev, vmbus_dev_props);
|
||||
kdev->bus_type = TYPE_VMBUS;
|
||||
kdev->realize = vmbus_dev_realize;
|
||||
kdev->unrealize = vmbus_dev_unrealize;
|
||||
kdev->reset = vmbus_dev_reset;
|
||||
}
|
||||
|
||||
static Property vmbus_dev_instanceid =
|
||||
DEFINE_PROP_UUID("instanceid", VMBusDevice, instanceid);
|
||||
|
||||
static void vmbus_dev_instance_init(Object *obj)
|
||||
{
|
||||
VMBusDevice *vdev = VMBUS_DEVICE(obj);
|
||||
@ -2458,8 +2470,6 @@ static void vmbus_dev_instance_init(Object *obj)
|
||||
if (!qemu_uuid_is_null(&vdc->instanceid)) {
|
||||
/* Class wants to only have a single instance with a fixed UUID */
|
||||
vdev->instanceid = vdc->instanceid;
|
||||
} else {
|
||||
qdev_property_add_static(DEVICE(vdev), &vmbus_dev_instanceid);
|
||||
}
|
||||
}
|
||||
|
||||
|
315
hw/vfio/common.c
315
hw/vfio/common.c
@ -36,6 +36,7 @@
|
||||
#include "qemu/range.h"
|
||||
#include "sysemu/kvm.h"
|
||||
#include "sysemu/reset.h"
|
||||
#include "sysemu/runstate.h"
|
||||
#include "trace.h"
|
||||
#include "qapi/error.h"
|
||||
#include "migration/migration.h"
|
||||
@ -134,6 +135,29 @@ static const char *index_to_str(VFIODevice *vbasedev, int index)
|
||||
}
|
||||
}
|
||||
|
||||
static int vfio_ram_block_discard_disable(VFIOContainer *container, bool state)
|
||||
{
|
||||
switch (container->iommu_type) {
|
||||
case VFIO_TYPE1v2_IOMMU:
|
||||
case VFIO_TYPE1_IOMMU:
|
||||
/*
|
||||
* We support coordinated discarding of RAM via the RamDiscardManager.
|
||||
*/
|
||||
return ram_block_uncoordinated_discard_disable(state);
|
||||
default:
|
||||
/*
|
||||
* VFIO_SPAPR_TCE_IOMMU most probably works just fine with
|
||||
* RamDiscardManager, however, it is completely untested.
|
||||
*
|
||||
* VFIO_SPAPR_TCE_v2_IOMMU with "DMA memory preregistering" does
|
||||
* completely the opposite of managing mapping/pinning dynamically as
|
||||
* required by RamDiscardManager. We would have to special-case sections
|
||||
* with a RamDiscardManager.
|
||||
*/
|
||||
return ram_block_discard_disable(state);
|
||||
}
|
||||
}
|
||||
|
||||
int vfio_set_irq_signaling(VFIODevice *vbasedev, int index, int subindex,
|
||||
int action, int fd, Error **errp)
|
||||
{
|
||||
@ -569,6 +593,44 @@ static bool vfio_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr,
|
||||
error_report("iommu map to non memory area %"HWADDR_PRIx"",
|
||||
xlat);
|
||||
return false;
|
||||
} else if (memory_region_has_ram_discard_manager(mr)) {
|
||||
RamDiscardManager *rdm = memory_region_get_ram_discard_manager(mr);
|
||||
MemoryRegionSection tmp = {
|
||||
.mr = mr,
|
||||
.offset_within_region = xlat,
|
||||
.size = int128_make64(len),
|
||||
};
|
||||
|
||||
/*
|
||||
* Malicious VMs can map memory into the IOMMU, which is expected
|
||||
* to remain discarded. vfio will pin all pages, populating memory.
|
||||
* Disallow that. vmstate priorities make sure any RamDiscardManager
|
||||
* were already restored before IOMMUs are restored.
|
||||
*/
|
||||
if (!ram_discard_manager_is_populated(rdm, &tmp)) {
|
||||
error_report("iommu map to discarded memory (e.g., unplugged via"
|
||||
" virtio-mem): %"HWADDR_PRIx"",
|
||||
iotlb->translated_addr);
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Malicious VMs might trigger discarding of IOMMU-mapped memory. The
|
||||
* pages will remain pinned inside vfio until unmapped, resulting in a
|
||||
* higher memory consumption than expected. If memory would get
|
||||
* populated again later, there would be an inconsistency between pages
|
||||
* pinned by vfio and pages seen by QEMU. This is the case until
|
||||
* unmapped from the IOMMU (e.g., during device reset).
|
||||
*
|
||||
* With malicious guests, we really only care about pinning more memory
|
||||
* than expected. RLIMIT_MEMLOCK set for the user/process can never be
|
||||
* exceeded and can be used to mitigate this problem.
|
||||
*/
|
||||
warn_report_once("Using vfio with vIOMMUs and coordinated discarding of"
|
||||
" RAM (e.g., virtio-mem) works, however, malicious"
|
||||
" guests can trigger pinning of more memory than"
|
||||
" intended via an IOMMU. It's possible to mitigate "
|
||||
" by setting/adjusting RLIMIT_MEMLOCK.");
|
||||
}
|
||||
|
||||
/*
|
||||
@ -649,6 +711,153 @@ out:
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
static void vfio_ram_discard_notify_discard(RamDiscardListener *rdl,
|
||||
MemoryRegionSection *section)
|
||||
{
|
||||
VFIORamDiscardListener *vrdl = container_of(rdl, VFIORamDiscardListener,
|
||||
listener);
|
||||
const hwaddr size = int128_get64(section->size);
|
||||
const hwaddr iova = section->offset_within_address_space;
|
||||
int ret;
|
||||
|
||||
/* Unmap with a single call. */
|
||||
ret = vfio_dma_unmap(vrdl->container, iova, size , NULL);
|
||||
if (ret) {
|
||||
error_report("%s: vfio_dma_unmap() failed: %s", __func__,
|
||||
strerror(-ret));
|
||||
}
|
||||
}
|
||||
|
||||
static int vfio_ram_discard_notify_populate(RamDiscardListener *rdl,
|
||||
MemoryRegionSection *section)
|
||||
{
|
||||
VFIORamDiscardListener *vrdl = container_of(rdl, VFIORamDiscardListener,
|
||||
listener);
|
||||
const hwaddr end = section->offset_within_region +
|
||||
int128_get64(section->size);
|
||||
hwaddr start, next, iova;
|
||||
void *vaddr;
|
||||
int ret;
|
||||
|
||||
/*
|
||||
* Map in (aligned within memory region) minimum granularity, so we can
|
||||
* unmap in minimum granularity later.
|
||||
*/
|
||||
for (start = section->offset_within_region; start < end; start = next) {
|
||||
next = ROUND_UP(start + 1, vrdl->granularity);
|
||||
next = MIN(next, end);
|
||||
|
||||
iova = start - section->offset_within_region +
|
||||
section->offset_within_address_space;
|
||||
vaddr = memory_region_get_ram_ptr(section->mr) + start;
|
||||
|
||||
ret = vfio_dma_map(vrdl->container, iova, next - start,
|
||||
vaddr, section->readonly);
|
||||
if (ret) {
|
||||
/* Rollback */
|
||||
vfio_ram_discard_notify_discard(rdl, section);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void vfio_register_ram_discard_listener(VFIOContainer *container,
|
||||
MemoryRegionSection *section)
|
||||
{
|
||||
RamDiscardManager *rdm = memory_region_get_ram_discard_manager(section->mr);
|
||||
VFIORamDiscardListener *vrdl;
|
||||
|
||||
/* Ignore some corner cases not relevant in practice. */
|
||||
g_assert(QEMU_IS_ALIGNED(section->offset_within_region, TARGET_PAGE_SIZE));
|
||||
g_assert(QEMU_IS_ALIGNED(section->offset_within_address_space,
|
||||
TARGET_PAGE_SIZE));
|
||||
g_assert(QEMU_IS_ALIGNED(int128_get64(section->size), TARGET_PAGE_SIZE));
|
||||
|
||||
vrdl = g_new0(VFIORamDiscardListener, 1);
|
||||
vrdl->container = container;
|
||||
vrdl->mr = section->mr;
|
||||
vrdl->offset_within_address_space = section->offset_within_address_space;
|
||||
vrdl->size = int128_get64(section->size);
|
||||
vrdl->granularity = ram_discard_manager_get_min_granularity(rdm,
|
||||
section->mr);
|
||||
|
||||
g_assert(vrdl->granularity && is_power_of_2(vrdl->granularity));
|
||||
g_assert(vrdl->granularity >= 1 << ctz64(container->pgsizes));
|
||||
|
||||
ram_discard_listener_init(&vrdl->listener,
|
||||
vfio_ram_discard_notify_populate,
|
||||
vfio_ram_discard_notify_discard, true);
|
||||
ram_discard_manager_register_listener(rdm, &vrdl->listener, section);
|
||||
QLIST_INSERT_HEAD(&container->vrdl_list, vrdl, next);
|
||||
|
||||
/*
|
||||
* Sanity-check if we have a theoretically problematic setup where we could
|
||||
* exceed the maximum number of possible DMA mappings over time. We assume
|
||||
* that each mapped section in the same address space as a RamDiscardManager
|
||||
* section consumes exactly one DMA mapping, with the exception of
|
||||
* RamDiscardManager sections; i.e., we don't expect to have gIOMMU sections
|
||||
* in the same address space as RamDiscardManager sections.
|
||||
*
|
||||
* We assume that each section in the address space consumes one memslot.
|
||||
* We take the number of KVM memory slots as a best guess for the maximum
|
||||
* number of sections in the address space we could have over time,
|
||||
* also consuming DMA mappings.
|
||||
*/
|
||||
if (container->dma_max_mappings) {
|
||||
unsigned int vrdl_count = 0, vrdl_mappings = 0, max_memslots = 512;
|
||||
|
||||
#ifdef CONFIG_KVM
|
||||
if (kvm_enabled()) {
|
||||
max_memslots = kvm_get_max_memslots();
|
||||
}
|
||||
#endif
|
||||
|
||||
QLIST_FOREACH(vrdl, &container->vrdl_list, next) {
|
||||
hwaddr start, end;
|
||||
|
||||
start = QEMU_ALIGN_DOWN(vrdl->offset_within_address_space,
|
||||
vrdl->granularity);
|
||||
end = ROUND_UP(vrdl->offset_within_address_space + vrdl->size,
|
||||
vrdl->granularity);
|
||||
vrdl_mappings += (end - start) / vrdl->granularity;
|
||||
vrdl_count++;
|
||||
}
|
||||
|
||||
if (vrdl_mappings + max_memslots - vrdl_count >
|
||||
container->dma_max_mappings) {
|
||||
warn_report("%s: possibly running out of DMA mappings. E.g., try"
|
||||
" increasing the 'block-size' of virtio-mem devies."
|
||||
" Maximum possible DMA mappings: %d, Maximum possible"
|
||||
" memslots: %d", __func__, container->dma_max_mappings,
|
||||
max_memslots);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void vfio_unregister_ram_discard_listener(VFIOContainer *container,
|
||||
MemoryRegionSection *section)
|
||||
{
|
||||
RamDiscardManager *rdm = memory_region_get_ram_discard_manager(section->mr);
|
||||
VFIORamDiscardListener *vrdl = NULL;
|
||||
|
||||
QLIST_FOREACH(vrdl, &container->vrdl_list, next) {
|
||||
if (vrdl->mr == section->mr &&
|
||||
vrdl->offset_within_address_space ==
|
||||
section->offset_within_address_space) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!vrdl) {
|
||||
hw_error("vfio: Trying to unregister missing RAM discard listener");
|
||||
}
|
||||
|
||||
ram_discard_manager_unregister_listener(rdm, &vrdl->listener);
|
||||
QLIST_REMOVE(vrdl, next);
|
||||
g_free(vrdl);
|
||||
}
|
||||
|
||||
static void vfio_listener_region_add(MemoryListener *listener,
|
||||
MemoryRegionSection *section)
|
||||
{
|
||||
@ -810,6 +1019,16 @@ static void vfio_listener_region_add(MemoryListener *listener,
|
||||
|
||||
/* Here we assume that memory_region_is_ram(section->mr)==true */
|
||||
|
||||
/*
|
||||
* For RAM memory regions with a RamDiscardManager, we only want to map the
|
||||
* actually populated parts - and update the mapping whenever we're notified
|
||||
* about changes.
|
||||
*/
|
||||
if (memory_region_has_ram_discard_manager(section->mr)) {
|
||||
vfio_register_ram_discard_listener(container, section);
|
||||
return;
|
||||
}
|
||||
|
||||
vaddr = memory_region_get_ram_ptr(section->mr) +
|
||||
section->offset_within_region +
|
||||
(iova - section->offset_within_address_space);
|
||||
@ -947,6 +1166,10 @@ static void vfio_listener_region_del(MemoryListener *listener,
|
||||
|
||||
pgmask = (1ULL << ctz64(hostwin->iova_pgsizes)) - 1;
|
||||
try_unmap = !((iova & pgmask) || (int128_get64(llsize) & pgmask));
|
||||
} else if (memory_region_has_ram_discard_manager(section->mr)) {
|
||||
vfio_unregister_ram_discard_listener(container, section);
|
||||
/* Unregistering will trigger an unmap. */
|
||||
try_unmap = false;
|
||||
}
|
||||
|
||||
if (try_unmap) {
|
||||
@ -1108,6 +1331,49 @@ static void vfio_iommu_map_dirty_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
static int vfio_ram_discard_get_dirty_bitmap(MemoryRegionSection *section,
|
||||
void *opaque)
|
||||
{
|
||||
const hwaddr size = int128_get64(section->size);
|
||||
const hwaddr iova = section->offset_within_address_space;
|
||||
const ram_addr_t ram_addr = memory_region_get_ram_addr(section->mr) +
|
||||
section->offset_within_region;
|
||||
VFIORamDiscardListener *vrdl = opaque;
|
||||
|
||||
/*
|
||||
* Sync the whole mapped region (spanning multiple individual mappings)
|
||||
* in one go.
|
||||
*/
|
||||
return vfio_get_dirty_bitmap(vrdl->container, iova, size, ram_addr);
|
||||
}
|
||||
|
||||
static int vfio_sync_ram_discard_listener_dirty_bitmap(VFIOContainer *container,
|
||||
MemoryRegionSection *section)
|
||||
{
|
||||
RamDiscardManager *rdm = memory_region_get_ram_discard_manager(section->mr);
|
||||
VFIORamDiscardListener *vrdl = NULL;
|
||||
|
||||
QLIST_FOREACH(vrdl, &container->vrdl_list, next) {
|
||||
if (vrdl->mr == section->mr &&
|
||||
vrdl->offset_within_address_space ==
|
||||
section->offset_within_address_space) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!vrdl) {
|
||||
hw_error("vfio: Trying to sync missing RAM discard listener");
|
||||
}
|
||||
|
||||
/*
|
||||
* We only want/can synchronize the bitmap for actually mapped parts -
|
||||
* which correspond to populated parts. Replay all populated parts.
|
||||
*/
|
||||
return ram_discard_manager_replay_populated(rdm, section,
|
||||
vfio_ram_discard_get_dirty_bitmap,
|
||||
&vrdl);
|
||||
}
|
||||
|
||||
static int vfio_sync_dirty_bitmap(VFIOContainer *container,
|
||||
MemoryRegionSection *section)
|
||||
{
|
||||
@ -1139,6 +1405,8 @@ static int vfio_sync_dirty_bitmap(VFIOContainer *container,
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
} else if (memory_region_has_ram_discard_manager(section->mr)) {
|
||||
return vfio_sync_ram_discard_listener_dirty_bitmap(container, section);
|
||||
}
|
||||
|
||||
ram_addr = memory_region_get_ram_addr(section->mr) +
|
||||
@ -1732,15 +2000,25 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
|
||||
* new memory, it will not yet set ram_block_discard_set_required() and
|
||||
* therefore, neither stops us here or deals with the sudden memory
|
||||
* consumption of inflated memory.
|
||||
*
|
||||
* We do support discarding of memory coordinated via the RamDiscardManager
|
||||
* with some IOMMU types. vfio_ram_block_discard_disable() handles the
|
||||
* details once we know which type of IOMMU we are using.
|
||||
*/
|
||||
ret = ram_block_discard_disable(true);
|
||||
if (ret) {
|
||||
error_setg_errno(errp, -ret, "Cannot set discarding of RAM broken");
|
||||
return ret;
|
||||
}
|
||||
|
||||
QLIST_FOREACH(container, &space->containers, next) {
|
||||
if (!ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &container->fd)) {
|
||||
ret = vfio_ram_block_discard_disable(container, true);
|
||||
if (ret) {
|
||||
error_setg_errno(errp, -ret,
|
||||
"Cannot set discarding of RAM broken");
|
||||
if (ioctl(group->fd, VFIO_GROUP_UNSET_CONTAINER,
|
||||
&container->fd)) {
|
||||
error_report("vfio: error disconnecting group %d from"
|
||||
" container", group->groupid);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
group->container = container;
|
||||
QLIST_INSERT_HEAD(&container->group_list, group, container_next);
|
||||
vfio_kvm_device_add_group(group);
|
||||
@ -1768,14 +2046,22 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
|
||||
container->fd = fd;
|
||||
container->error = NULL;
|
||||
container->dirty_pages_supported = false;
|
||||
container->dma_max_mappings = 0;
|
||||
QLIST_INIT(&container->giommu_list);
|
||||
QLIST_INIT(&container->hostwin_list);
|
||||
QLIST_INIT(&container->vrdl_list);
|
||||
|
||||
ret = vfio_init_container(container, group->fd, errp);
|
||||
if (ret) {
|
||||
goto free_container_exit;
|
||||
}
|
||||
|
||||
ret = vfio_ram_block_discard_disable(container, true);
|
||||
if (ret) {
|
||||
error_setg_errno(errp, -ret, "Cannot set discarding of RAM broken");
|
||||
goto free_container_exit;
|
||||
}
|
||||
|
||||
switch (container->iommu_type) {
|
||||
case VFIO_TYPE1v2_IOMMU:
|
||||
case VFIO_TYPE1_IOMMU:
|
||||
@ -1798,7 +2084,10 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
|
||||
vfio_host_win_add(container, 0, (hwaddr)-1, info->iova_pgsizes);
|
||||
container->pgsizes = info->iova_pgsizes;
|
||||
|
||||
/* The default in the kernel ("dma_entry_limit") is 65535. */
|
||||
container->dma_max_mappings = 65535;
|
||||
if (!ret) {
|
||||
vfio_get_info_dma_avail(info, &container->dma_max_mappings);
|
||||
vfio_get_iommu_info_migration(container, info);
|
||||
}
|
||||
g_free(info);
|
||||
@ -1820,7 +2109,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
|
||||
if (ret) {
|
||||
error_setg_errno(errp, errno, "failed to enable container");
|
||||
ret = -errno;
|
||||
goto free_container_exit;
|
||||
goto enable_discards_exit;
|
||||
}
|
||||
} else {
|
||||
container->prereg_listener = vfio_prereg_listener;
|
||||
@ -1832,7 +2121,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
|
||||
ret = -1;
|
||||
error_propagate_prepend(errp, container->error,
|
||||
"RAM memory listener initialization failed: ");
|
||||
goto free_container_exit;
|
||||
goto enable_discards_exit;
|
||||
}
|
||||
}
|
||||
|
||||
@ -1845,7 +2134,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
|
||||
if (v2) {
|
||||
memory_listener_unregister(&container->prereg_listener);
|
||||
}
|
||||
goto free_container_exit;
|
||||
goto enable_discards_exit;
|
||||
}
|
||||
|
||||
if (v2) {
|
||||
@ -1860,7 +2149,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
|
||||
if (ret) {
|
||||
error_setg_errno(errp, -ret,
|
||||
"failed to remove existing window");
|
||||
goto free_container_exit;
|
||||
goto enable_discards_exit;
|
||||
}
|
||||
} else {
|
||||
/* The default table uses 4K pages */
|
||||
@ -1901,6 +2190,9 @@ listener_release_exit:
|
||||
vfio_kvm_device_del_group(group);
|
||||
vfio_listener_release(container);
|
||||
|
||||
enable_discards_exit:
|
||||
vfio_ram_block_discard_disable(container, false);
|
||||
|
||||
free_container_exit:
|
||||
g_free(container);
|
||||
|
||||
@ -1908,7 +2200,6 @@ close_fd_exit:
|
||||
close(fd);
|
||||
|
||||
put_space_exit:
|
||||
ram_block_discard_disable(false);
|
||||
vfio_put_address_space(space);
|
||||
|
||||
return ret;
|
||||
@ -2030,7 +2321,7 @@ void vfio_put_group(VFIOGroup *group)
|
||||
}
|
||||
|
||||
if (!group->ram_block_discard_allowed) {
|
||||
ram_block_discard_disable(false);
|
||||
vfio_ram_block_discard_disable(group->container, false);
|
||||
}
|
||||
vfio_kvm_device_del_group(group);
|
||||
vfio_disconnect_container(group);
|
||||
@ -2084,7 +2375,7 @@ int vfio_get_device(VFIOGroup *group, const char *name,
|
||||
|
||||
if (!group->ram_block_discard_allowed) {
|
||||
group->ram_block_discard_allowed = true;
|
||||
ram_block_discard_disable(false);
|
||||
vfio_ram_block_discard_disable(group->container, false);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -145,7 +145,173 @@ static bool virtio_mem_is_busy(void)
|
||||
return migration_in_incoming_postcopy() || !migration_is_idle();
|
||||
}
|
||||
|
||||
static bool virtio_mem_test_bitmap(VirtIOMEM *vmem, uint64_t start_gpa,
|
||||
typedef int (*virtio_mem_range_cb)(const VirtIOMEM *vmem, void *arg,
|
||||
uint64_t offset, uint64_t size);
|
||||
|
||||
static int virtio_mem_for_each_unplugged_range(const VirtIOMEM *vmem, void *arg,
|
||||
virtio_mem_range_cb cb)
|
||||
{
|
||||
unsigned long first_zero_bit, last_zero_bit;
|
||||
uint64_t offset, size;
|
||||
int ret = 0;
|
||||
|
||||
first_zero_bit = find_first_zero_bit(vmem->bitmap, vmem->bitmap_size);
|
||||
while (first_zero_bit < vmem->bitmap_size) {
|
||||
offset = first_zero_bit * vmem->block_size;
|
||||
last_zero_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size,
|
||||
first_zero_bit + 1) - 1;
|
||||
size = (last_zero_bit - first_zero_bit + 1) * vmem->block_size;
|
||||
|
||||
ret = cb(vmem, arg, offset, size);
|
||||
if (ret) {
|
||||
break;
|
||||
}
|
||||
first_zero_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size,
|
||||
last_zero_bit + 2);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Adjust the memory section to cover the intersection with the given range.
|
||||
*
|
||||
* Returns false if the intersection is empty, otherwise returns true.
|
||||
*/
|
||||
static bool virito_mem_intersect_memory_section(MemoryRegionSection *s,
|
||||
uint64_t offset, uint64_t size)
|
||||
{
|
||||
uint64_t start = MAX(s->offset_within_region, offset);
|
||||
uint64_t end = MIN(s->offset_within_region + int128_get64(s->size),
|
||||
offset + size);
|
||||
|
||||
if (end <= start) {
|
||||
return false;
|
||||
}
|
||||
|
||||
s->offset_within_address_space += start - s->offset_within_region;
|
||||
s->offset_within_region = start;
|
||||
s->size = int128_make64(end - start);
|
||||
return true;
|
||||
}
|
||||
|
||||
typedef int (*virtio_mem_section_cb)(MemoryRegionSection *s, void *arg);
|
||||
|
||||
static int virtio_mem_for_each_plugged_section(const VirtIOMEM *vmem,
|
||||
MemoryRegionSection *s,
|
||||
void *arg,
|
||||
virtio_mem_section_cb cb)
|
||||
{
|
||||
unsigned long first_bit, last_bit;
|
||||
uint64_t offset, size;
|
||||
int ret = 0;
|
||||
|
||||
first_bit = s->offset_within_region / vmem->bitmap_size;
|
||||
first_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size, first_bit);
|
||||
while (first_bit < vmem->bitmap_size) {
|
||||
MemoryRegionSection tmp = *s;
|
||||
|
||||
offset = first_bit * vmem->block_size;
|
||||
last_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size,
|
||||
first_bit + 1) - 1;
|
||||
size = (last_bit - first_bit + 1) * vmem->block_size;
|
||||
|
||||
if (!virito_mem_intersect_memory_section(&tmp, offset, size)) {
|
||||
break;
|
||||
}
|
||||
ret = cb(&tmp, arg);
|
||||
if (ret) {
|
||||
break;
|
||||
}
|
||||
first_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size,
|
||||
last_bit + 2);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int virtio_mem_notify_populate_cb(MemoryRegionSection *s, void *arg)
|
||||
{
|
||||
RamDiscardListener *rdl = arg;
|
||||
|
||||
return rdl->notify_populate(rdl, s);
|
||||
}
|
||||
|
||||
static int virtio_mem_notify_discard_cb(MemoryRegionSection *s, void *arg)
|
||||
{
|
||||
RamDiscardListener *rdl = arg;
|
||||
|
||||
rdl->notify_discard(rdl, s);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void virtio_mem_notify_unplug(VirtIOMEM *vmem, uint64_t offset,
|
||||
uint64_t size)
|
||||
{
|
||||
RamDiscardListener *rdl;
|
||||
|
||||
QLIST_FOREACH(rdl, &vmem->rdl_list, next) {
|
||||
MemoryRegionSection tmp = *rdl->section;
|
||||
|
||||
if (!virito_mem_intersect_memory_section(&tmp, offset, size)) {
|
||||
continue;
|
||||
}
|
||||
rdl->notify_discard(rdl, &tmp);
|
||||
}
|
||||
}
|
||||
|
||||
static int virtio_mem_notify_plug(VirtIOMEM *vmem, uint64_t offset,
|
||||
uint64_t size)
|
||||
{
|
||||
RamDiscardListener *rdl, *rdl2;
|
||||
int ret = 0;
|
||||
|
||||
QLIST_FOREACH(rdl, &vmem->rdl_list, next) {
|
||||
MemoryRegionSection tmp = *rdl->section;
|
||||
|
||||
if (!virito_mem_intersect_memory_section(&tmp, offset, size)) {
|
||||
continue;
|
||||
}
|
||||
ret = rdl->notify_populate(rdl, &tmp);
|
||||
if (ret) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (ret) {
|
||||
/* Notify all already-notified listeners. */
|
||||
QLIST_FOREACH(rdl2, &vmem->rdl_list, next) {
|
||||
MemoryRegionSection tmp = *rdl->section;
|
||||
|
||||
if (rdl2 == rdl) {
|
||||
break;
|
||||
}
|
||||
if (!virito_mem_intersect_memory_section(&tmp, offset, size)) {
|
||||
continue;
|
||||
}
|
||||
rdl2->notify_discard(rdl2, &tmp);
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void virtio_mem_notify_unplug_all(VirtIOMEM *vmem)
|
||||
{
|
||||
RamDiscardListener *rdl;
|
||||
|
||||
if (!vmem->size) {
|
||||
return;
|
||||
}
|
||||
|
||||
QLIST_FOREACH(rdl, &vmem->rdl_list, next) {
|
||||
if (rdl->double_discard_supported) {
|
||||
rdl->notify_discard(rdl, rdl->section);
|
||||
} else {
|
||||
virtio_mem_for_each_plugged_section(vmem, rdl->section, rdl,
|
||||
virtio_mem_notify_discard_cb);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static bool virtio_mem_test_bitmap(const VirtIOMEM *vmem, uint64_t start_gpa,
|
||||
uint64_t size, bool plugged)
|
||||
{
|
||||
const unsigned long first_bit = (start_gpa - vmem->addr) / vmem->block_size;
|
||||
@ -198,7 +364,8 @@ static void virtio_mem_send_response_simple(VirtIOMEM *vmem,
|
||||
virtio_mem_send_response(vmem, elem, &resp);
|
||||
}
|
||||
|
||||
static bool virtio_mem_valid_range(VirtIOMEM *vmem, uint64_t gpa, uint64_t size)
|
||||
static bool virtio_mem_valid_range(const VirtIOMEM *vmem, uint64_t gpa,
|
||||
uint64_t size)
|
||||
{
|
||||
if (!QEMU_IS_ALIGNED(gpa, vmem->block_size)) {
|
||||
return false;
|
||||
@ -219,19 +386,21 @@ static int virtio_mem_set_block_state(VirtIOMEM *vmem, uint64_t start_gpa,
|
||||
uint64_t size, bool plug)
|
||||
{
|
||||
const uint64_t offset = start_gpa - vmem->addr;
|
||||
int ret;
|
||||
RAMBlock *rb = vmem->memdev->mr.ram_block;
|
||||
|
||||
if (virtio_mem_is_busy()) {
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
if (!plug) {
|
||||
ret = ram_block_discard_range(vmem->memdev->mr.ram_block, offset, size);
|
||||
if (ret) {
|
||||
error_report("Unexpected error discarding RAM: %s",
|
||||
strerror(-ret));
|
||||
if (ram_block_discard_range(rb, offset, size)) {
|
||||
return -EBUSY;
|
||||
}
|
||||
virtio_mem_notify_unplug(vmem, offset, size);
|
||||
} else if (virtio_mem_notify_plug(vmem, offset, size)) {
|
||||
/* Could be a mapping attempt resulted in memory getting populated. */
|
||||
ram_block_discard_range(vmem->memdev->mr.ram_block, offset, size);
|
||||
return -EBUSY;
|
||||
}
|
||||
virtio_mem_set_bitmap(vmem, start_gpa, size, plug);
|
||||
return 0;
|
||||
@ -318,17 +487,16 @@ static void virtio_mem_resize_usable_region(VirtIOMEM *vmem,
|
||||
static int virtio_mem_unplug_all(VirtIOMEM *vmem)
|
||||
{
|
||||
RAMBlock *rb = vmem->memdev->mr.ram_block;
|
||||
int ret;
|
||||
|
||||
if (virtio_mem_is_busy()) {
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
ret = ram_block_discard_range(rb, 0, qemu_ram_get_used_length(rb));
|
||||
if (ret) {
|
||||
error_report("Unexpected error discarding RAM: %s", strerror(-ret));
|
||||
if (ram_block_discard_range(rb, 0, qemu_ram_get_used_length(rb))) {
|
||||
return -EBUSY;
|
||||
}
|
||||
virtio_mem_notify_unplug_all(vmem);
|
||||
|
||||
bitmap_clear(vmem->bitmap, 0, vmem->bitmap_size);
|
||||
if (vmem->size) {
|
||||
vmem->size = 0;
|
||||
@ -551,7 +719,7 @@ static void virtio_mem_device_realize(DeviceState *dev, Error **errp)
|
||||
return;
|
||||
}
|
||||
|
||||
if (ram_block_discard_require(true)) {
|
||||
if (ram_block_coordinated_discard_require(true)) {
|
||||
error_setg(errp, "Discarding RAM is disabled");
|
||||
return;
|
||||
}
|
||||
@ -559,7 +727,7 @@ static void virtio_mem_device_realize(DeviceState *dev, Error **errp)
|
||||
ret = ram_block_discard_range(rb, 0, qemu_ram_get_used_length(rb));
|
||||
if (ret) {
|
||||
error_setg_errno(errp, -ret, "Unexpected error discarding RAM");
|
||||
ram_block_discard_require(false);
|
||||
ram_block_coordinated_discard_require(false);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -577,6 +745,13 @@ static void virtio_mem_device_realize(DeviceState *dev, Error **errp)
|
||||
vmstate_register_ram(&vmem->memdev->mr, DEVICE(vmem));
|
||||
qemu_register_reset(virtio_mem_system_reset, vmem);
|
||||
precopy_add_notifier(&vmem->precopy_notifier);
|
||||
|
||||
/*
|
||||
* Set ourselves as RamDiscardManager before the plug handler maps the
|
||||
* memory region and exposes it via an address space.
|
||||
*/
|
||||
memory_region_set_ram_discard_manager(&vmem->memdev->mr,
|
||||
RAM_DISCARD_MANAGER(vmem));
|
||||
}
|
||||
|
||||
static void virtio_mem_device_unrealize(DeviceState *dev)
|
||||
@ -584,6 +759,11 @@ static void virtio_mem_device_unrealize(DeviceState *dev)
|
||||
VirtIODevice *vdev = VIRTIO_DEVICE(dev);
|
||||
VirtIOMEM *vmem = VIRTIO_MEM(dev);
|
||||
|
||||
/*
|
||||
* The unplug handler unmapped the memory region, it cannot be
|
||||
* found via an address space anymore. Unset ourselves.
|
||||
*/
|
||||
memory_region_set_ram_discard_manager(&vmem->memdev->mr, NULL);
|
||||
precopy_remove_notifier(&vmem->precopy_notifier);
|
||||
qemu_unregister_reset(virtio_mem_system_reset, vmem);
|
||||
vmstate_unregister_ram(&vmem->memdev->mr, DEVICE(vmem));
|
||||
@ -591,43 +771,47 @@ static void virtio_mem_device_unrealize(DeviceState *dev)
|
||||
virtio_del_queue(vdev, 0);
|
||||
virtio_cleanup(vdev);
|
||||
g_free(vmem->bitmap);
|
||||
ram_block_discard_require(false);
|
||||
ram_block_coordinated_discard_require(false);
|
||||
}
|
||||
|
||||
static int virtio_mem_discard_range_cb(const VirtIOMEM *vmem, void *arg,
|
||||
uint64_t offset, uint64_t size)
|
||||
{
|
||||
RAMBlock *rb = vmem->memdev->mr.ram_block;
|
||||
|
||||
return ram_block_discard_range(rb, offset, size) ? -EINVAL : 0;
|
||||
}
|
||||
|
||||
static int virtio_mem_restore_unplugged(VirtIOMEM *vmem)
|
||||
{
|
||||
RAMBlock *rb = vmem->memdev->mr.ram_block;
|
||||
unsigned long first_zero_bit, last_zero_bit;
|
||||
uint64_t offset, length;
|
||||
int ret;
|
||||
|
||||
/* Find consecutive unplugged blocks and discard the consecutive range. */
|
||||
first_zero_bit = find_first_zero_bit(vmem->bitmap, vmem->bitmap_size);
|
||||
while (first_zero_bit < vmem->bitmap_size) {
|
||||
offset = first_zero_bit * vmem->block_size;
|
||||
last_zero_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size,
|
||||
first_zero_bit + 1) - 1;
|
||||
length = (last_zero_bit - first_zero_bit + 1) * vmem->block_size;
|
||||
|
||||
ret = ram_block_discard_range(rb, offset, length);
|
||||
if (ret) {
|
||||
error_report("Unexpected error discarding RAM: %s",
|
||||
strerror(-ret));
|
||||
return -EINVAL;
|
||||
}
|
||||
first_zero_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size,
|
||||
last_zero_bit + 2);
|
||||
}
|
||||
return 0;
|
||||
/* Make sure all memory is really discarded after migration. */
|
||||
return virtio_mem_for_each_unplugged_range(vmem, NULL,
|
||||
virtio_mem_discard_range_cb);
|
||||
}
|
||||
|
||||
static int virtio_mem_post_load(void *opaque, int version_id)
|
||||
{
|
||||
VirtIOMEM *vmem = VIRTIO_MEM(opaque);
|
||||
RamDiscardListener *rdl;
|
||||
int ret;
|
||||
|
||||
/*
|
||||
* We started out with all memory discarded and our memory region is mapped
|
||||
* into an address space. Replay, now that we updated the bitmap.
|
||||
*/
|
||||
QLIST_FOREACH(rdl, &vmem->rdl_list, next) {
|
||||
ret = virtio_mem_for_each_plugged_section(vmem, rdl->section, rdl,
|
||||
virtio_mem_notify_populate_cb);
|
||||
if (ret) {
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
if (migration_in_incoming_postcopy()) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
return virtio_mem_restore_unplugged(VIRTIO_MEM(opaque));
|
||||
return virtio_mem_restore_unplugged(vmem);
|
||||
}
|
||||
|
||||
typedef struct VirtIOMEMMigSanityChecks {
|
||||
@ -702,6 +886,7 @@ static const VMStateDescription vmstate_virtio_mem_device = {
|
||||
.name = "virtio-mem-device",
|
||||
.minimum_version_id = 1,
|
||||
.version_id = 1,
|
||||
.priority = MIG_PRI_VIRTIO_MEM,
|
||||
.post_load = virtio_mem_post_load,
|
||||
.fields = (VMStateField[]) {
|
||||
VMSTATE_WITH_TMP(VirtIOMEM, VirtIOMEMMigSanityChecks,
|
||||
@ -872,28 +1057,19 @@ static void virtio_mem_set_block_size(Object *obj, Visitor *v, const char *name,
|
||||
vmem->block_size = value;
|
||||
}
|
||||
|
||||
static void virtio_mem_precopy_exclude_unplugged(VirtIOMEM *vmem)
|
||||
static int virtio_mem_precopy_exclude_range_cb(const VirtIOMEM *vmem, void *arg,
|
||||
uint64_t offset, uint64_t size)
|
||||
{
|
||||
void * const host = qemu_ram_get_host_addr(vmem->memdev->mr.ram_block);
|
||||
unsigned long first_zero_bit, last_zero_bit;
|
||||
uint64_t offset, length;
|
||||
|
||||
/*
|
||||
* Find consecutive unplugged blocks and exclude them from migration.
|
||||
*
|
||||
* Note: Blocks cannot get (un)plugged during precopy, no locking needed.
|
||||
*/
|
||||
first_zero_bit = find_first_zero_bit(vmem->bitmap, vmem->bitmap_size);
|
||||
while (first_zero_bit < vmem->bitmap_size) {
|
||||
offset = first_zero_bit * vmem->block_size;
|
||||
last_zero_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size,
|
||||
first_zero_bit + 1) - 1;
|
||||
length = (last_zero_bit - first_zero_bit + 1) * vmem->block_size;
|
||||
qemu_guest_free_page_hint(host + offset, size);
|
||||
return 0;
|
||||
}
|
||||
|
||||
qemu_guest_free_page_hint(host + offset, length);
|
||||
first_zero_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size,
|
||||
last_zero_bit + 2);
|
||||
}
|
||||
static void virtio_mem_precopy_exclude_unplugged(VirtIOMEM *vmem)
|
||||
{
|
||||
virtio_mem_for_each_unplugged_range(vmem, NULL,
|
||||
virtio_mem_precopy_exclude_range_cb);
|
||||
}
|
||||
|
||||
static int virtio_mem_precopy_notify(NotifierWithReturn *n, void *data)
|
||||
@ -918,6 +1094,7 @@ static void virtio_mem_instance_init(Object *obj)
|
||||
|
||||
notifier_list_init(&vmem->size_change_notifiers);
|
||||
vmem->precopy_notifier.notify = virtio_mem_precopy_notify;
|
||||
QLIST_INIT(&vmem->rdl_list);
|
||||
|
||||
object_property_add(obj, VIRTIO_MEM_SIZE_PROP, "size", virtio_mem_get_size,
|
||||
NULL, NULL, NULL);
|
||||
@ -937,11 +1114,107 @@ static Property virtio_mem_properties[] = {
|
||||
DEFINE_PROP_END_OF_LIST(),
|
||||
};
|
||||
|
||||
static uint64_t virtio_mem_rdm_get_min_granularity(const RamDiscardManager *rdm,
|
||||
const MemoryRegion *mr)
|
||||
{
|
||||
const VirtIOMEM *vmem = VIRTIO_MEM(rdm);
|
||||
|
||||
g_assert(mr == &vmem->memdev->mr);
|
||||
return vmem->block_size;
|
||||
}
|
||||
|
||||
static bool virtio_mem_rdm_is_populated(const RamDiscardManager *rdm,
|
||||
const MemoryRegionSection *s)
|
||||
{
|
||||
const VirtIOMEM *vmem = VIRTIO_MEM(rdm);
|
||||
uint64_t start_gpa = vmem->addr + s->offset_within_region;
|
||||
uint64_t end_gpa = start_gpa + int128_get64(s->size);
|
||||
|
||||
g_assert(s->mr == &vmem->memdev->mr);
|
||||
|
||||
start_gpa = QEMU_ALIGN_DOWN(start_gpa, vmem->block_size);
|
||||
end_gpa = QEMU_ALIGN_UP(end_gpa, vmem->block_size);
|
||||
|
||||
if (!virtio_mem_valid_range(vmem, start_gpa, end_gpa - start_gpa)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return virtio_mem_test_bitmap(vmem, start_gpa, end_gpa - start_gpa, true);
|
||||
}
|
||||
|
||||
struct VirtIOMEMReplayData {
|
||||
void *fn;
|
||||
void *opaque;
|
||||
};
|
||||
|
||||
static int virtio_mem_rdm_replay_populated_cb(MemoryRegionSection *s, void *arg)
|
||||
{
|
||||
struct VirtIOMEMReplayData *data = arg;
|
||||
|
||||
return ((ReplayRamPopulate)data->fn)(s, data->opaque);
|
||||
}
|
||||
|
||||
static int virtio_mem_rdm_replay_populated(const RamDiscardManager *rdm,
|
||||
MemoryRegionSection *s,
|
||||
ReplayRamPopulate replay_fn,
|
||||
void *opaque)
|
||||
{
|
||||
const VirtIOMEM *vmem = VIRTIO_MEM(rdm);
|
||||
struct VirtIOMEMReplayData data = {
|
||||
.fn = replay_fn,
|
||||
.opaque = opaque,
|
||||
};
|
||||
|
||||
g_assert(s->mr == &vmem->memdev->mr);
|
||||
return virtio_mem_for_each_plugged_section(vmem, s, &data,
|
||||
virtio_mem_rdm_replay_populated_cb);
|
||||
}
|
||||
|
||||
static void virtio_mem_rdm_register_listener(RamDiscardManager *rdm,
|
||||
RamDiscardListener *rdl,
|
||||
MemoryRegionSection *s)
|
||||
{
|
||||
VirtIOMEM *vmem = VIRTIO_MEM(rdm);
|
||||
int ret;
|
||||
|
||||
g_assert(s->mr == &vmem->memdev->mr);
|
||||
rdl->section = memory_region_section_new_copy(s);
|
||||
|
||||
QLIST_INSERT_HEAD(&vmem->rdl_list, rdl, next);
|
||||
ret = virtio_mem_for_each_plugged_section(vmem, rdl->section, rdl,
|
||||
virtio_mem_notify_populate_cb);
|
||||
if (ret) {
|
||||
error_report("%s: Replaying plugged ranges failed: %s", __func__,
|
||||
strerror(-ret));
|
||||
}
|
||||
}
|
||||
|
||||
static void virtio_mem_rdm_unregister_listener(RamDiscardManager *rdm,
|
||||
RamDiscardListener *rdl)
|
||||
{
|
||||
VirtIOMEM *vmem = VIRTIO_MEM(rdm);
|
||||
|
||||
g_assert(rdl->section->mr == &vmem->memdev->mr);
|
||||
if (vmem->size) {
|
||||
if (rdl->double_discard_supported) {
|
||||
rdl->notify_discard(rdl, rdl->section);
|
||||
} else {
|
||||
virtio_mem_for_each_plugged_section(vmem, rdl->section, rdl,
|
||||
virtio_mem_notify_discard_cb);
|
||||
}
|
||||
}
|
||||
|
||||
memory_region_section_free_copy(rdl->section);
|
||||
rdl->section = NULL;
|
||||
QLIST_REMOVE(rdl, next);
|
||||
}
|
||||
|
||||
static void virtio_mem_class_init(ObjectClass *klass, void *data)
|
||||
{
|
||||
DeviceClass *dc = DEVICE_CLASS(klass);
|
||||
VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
|
||||
VirtIOMEMClass *vmc = VIRTIO_MEM_CLASS(klass);
|
||||
RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_CLASS(klass);
|
||||
|
||||
device_class_set_props(dc, virtio_mem_properties);
|
||||
dc->vmsd = &vmstate_virtio_mem;
|
||||
@ -957,6 +1230,12 @@ static void virtio_mem_class_init(ObjectClass *klass, void *data)
|
||||
vmc->get_memory_region = virtio_mem_get_memory_region;
|
||||
vmc->add_size_change_notifier = virtio_mem_add_size_change_notifier;
|
||||
vmc->remove_size_change_notifier = virtio_mem_remove_size_change_notifier;
|
||||
|
||||
rdmc->get_min_granularity = virtio_mem_rdm_get_min_granularity;
|
||||
rdmc->is_populated = virtio_mem_rdm_is_populated;
|
||||
rdmc->replay_populated = virtio_mem_rdm_replay_populated;
|
||||
rdmc->register_listener = virtio_mem_rdm_register_listener;
|
||||
rdmc->unregister_listener = virtio_mem_rdm_unregister_listener;
|
||||
}
|
||||
|
||||
static const TypeInfo virtio_mem_info = {
|
||||
@ -966,6 +1245,10 @@ static const TypeInfo virtio_mem_info = {
|
||||
.instance_init = virtio_mem_instance_init,
|
||||
.class_init = virtio_mem_class_init,
|
||||
.class_size = sizeof(VirtIOMEMClass),
|
||||
.interfaces = (InterfaceInfo[]) {
|
||||
{ TYPE_RAM_DISCARD_MANAGER },
|
||||
{ }
|
||||
},
|
||||
};
|
||||
|
||||
static void virtio_register_types(void)
|
||||
|
@ -42,6 +42,12 @@ typedef struct IOMMUMemoryRegionClass IOMMUMemoryRegionClass;
|
||||
DECLARE_OBJ_CHECKERS(IOMMUMemoryRegion, IOMMUMemoryRegionClass,
|
||||
IOMMU_MEMORY_REGION, TYPE_IOMMU_MEMORY_REGION)
|
||||
|
||||
#define TYPE_RAM_DISCARD_MANAGER "qemu:ram-discard-manager"
|
||||
typedef struct RamDiscardManagerClass RamDiscardManagerClass;
|
||||
typedef struct RamDiscardManager RamDiscardManager;
|
||||
DECLARE_OBJ_CHECKERS(RamDiscardManager, RamDiscardManagerClass,
|
||||
RAM_DISCARD_MANAGER, TYPE_RAM_DISCARD_MANAGER);
|
||||
|
||||
#ifdef CONFIG_FUZZ
|
||||
void fuzz_dma_read_cb(size_t addr,
|
||||
size_t len,
|
||||
@ -65,6 +71,28 @@ struct ReservedRegion {
|
||||
unsigned type;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct MemoryRegionSection: describes a fragment of a #MemoryRegion
|
||||
*
|
||||
* @mr: the region, or %NULL if empty
|
||||
* @fv: the flat view of the address space the region is mapped in
|
||||
* @offset_within_region: the beginning of the section, relative to @mr's start
|
||||
* @size: the size of the section; will not exceed @mr's boundaries
|
||||
* @offset_within_address_space: the address of the first byte of the section
|
||||
* relative to the region's address space
|
||||
* @readonly: writes to this section are ignored
|
||||
* @nonvolatile: this section is non-volatile
|
||||
*/
|
||||
struct MemoryRegionSection {
|
||||
Int128 size;
|
||||
MemoryRegion *mr;
|
||||
FlatView *fv;
|
||||
hwaddr offset_within_region;
|
||||
hwaddr offset_within_address_space;
|
||||
bool readonly;
|
||||
bool nonvolatile;
|
||||
};
|
||||
|
||||
typedef struct IOMMUTLBEntry IOMMUTLBEntry;
|
||||
|
||||
/* See address_space_translate: bit 0 is read, bit 1 is write. */
|
||||
@ -448,6 +476,206 @@ struct IOMMUMemoryRegionClass {
|
||||
Error **errp);
|
||||
};
|
||||
|
||||
typedef struct RamDiscardListener RamDiscardListener;
|
||||
typedef int (*NotifyRamPopulate)(RamDiscardListener *rdl,
|
||||
MemoryRegionSection *section);
|
||||
typedef void (*NotifyRamDiscard)(RamDiscardListener *rdl,
|
||||
MemoryRegionSection *section);
|
||||
|
||||
struct RamDiscardListener {
|
||||
/*
|
||||
* @notify_populate:
|
||||
*
|
||||
* Notification that previously discarded memory is about to get populated.
|
||||
* Listeners are able to object. If any listener objects, already
|
||||
* successfully notified listeners are notified about a discard again.
|
||||
*
|
||||
* @rdl: the #RamDiscardListener getting notified
|
||||
* @section: the #MemoryRegionSection to get populated. The section
|
||||
* is aligned within the memory region to the minimum granularity
|
||||
* unless it would exceed the registered section.
|
||||
*
|
||||
* Returns 0 on success. If the notification is rejected by the listener,
|
||||
* an error is returned.
|
||||
*/
|
||||
NotifyRamPopulate notify_populate;
|
||||
|
||||
/*
|
||||
* @notify_discard:
|
||||
*
|
||||
* Notification that previously populated memory was discarded successfully
|
||||
* and listeners should drop all references to such memory and prevent
|
||||
* new population (e.g., unmap).
|
||||
*
|
||||
* @rdl: the #RamDiscardListener getting notified
|
||||
* @section: the #MemoryRegionSection to get populated. The section
|
||||
* is aligned within the memory region to the minimum granularity
|
||||
* unless it would exceed the registered section.
|
||||
*/
|
||||
NotifyRamDiscard notify_discard;
|
||||
|
||||
/*
|
||||
* @double_discard_supported:
|
||||
*
|
||||
* The listener suppors getting @notify_discard notifications that span
|
||||
* already discarded parts.
|
||||
*/
|
||||
bool double_discard_supported;
|
||||
|
||||
MemoryRegionSection *section;
|
||||
QLIST_ENTRY(RamDiscardListener) next;
|
||||
};
|
||||
|
||||
static inline void ram_discard_listener_init(RamDiscardListener *rdl,
|
||||
NotifyRamPopulate populate_fn,
|
||||
NotifyRamDiscard discard_fn,
|
||||
bool double_discard_supported)
|
||||
{
|
||||
rdl->notify_populate = populate_fn;
|
||||
rdl->notify_discard = discard_fn;
|
||||
rdl->double_discard_supported = double_discard_supported;
|
||||
}
|
||||
|
||||
typedef int (*ReplayRamPopulate)(MemoryRegionSection *section, void *opaque);
|
||||
|
||||
/*
|
||||
* RamDiscardManagerClass:
|
||||
*
|
||||
* A #RamDiscardManager coordinates which parts of specific RAM #MemoryRegion
|
||||
* regions are currently populated to be used/accessed by the VM, notifying
|
||||
* after parts were discarded (freeing up memory) and before parts will be
|
||||
* populated (consuming memory), to be used/acessed by the VM.
|
||||
*
|
||||
* A #RamDiscardManager can only be set for a RAM #MemoryRegion while the
|
||||
* #MemoryRegion isn't mapped yet; it cannot change while the #MemoryRegion is
|
||||
* mapped.
|
||||
*
|
||||
* The #RamDiscardManager is intended to be used by technologies that are
|
||||
* incompatible with discarding of RAM (e.g., VFIO, which may pin all
|
||||
* memory inside a #MemoryRegion), and require proper coordination to only
|
||||
* map the currently populated parts, to hinder parts that are expected to
|
||||
* remain discarded from silently getting populated and consuming memory.
|
||||
* Technologies that support discarding of RAM don't have to bother and can
|
||||
* simply map the whole #MemoryRegion.
|
||||
*
|
||||
* An example #RamDiscardManager is virtio-mem, which logically (un)plugs
|
||||
* memory within an assigned RAM #MemoryRegion, coordinated with the VM.
|
||||
* Logically unplugging memory consists of discarding RAM. The VM agreed to not
|
||||
* access unplugged (discarded) memory - especially via DMA. virtio-mem will
|
||||
* properly coordinate with listeners before memory is plugged (populated),
|
||||
* and after memory is unplugged (discarded).
|
||||
*
|
||||
* Listeners are called in multiples of the minimum granularity (unless it
|
||||
* would exceed the registered range) and changes are aligned to the minimum
|
||||
* granularity within the #MemoryRegion. Listeners have to prepare for memory
|
||||
* becomming discarded in a different granularity than it was populated and the
|
||||
* other way around.
|
||||
*/
|
||||
struct RamDiscardManagerClass {
|
||||
/* private */
|
||||
InterfaceClass parent_class;
|
||||
|
||||
/* public */
|
||||
|
||||
/**
|
||||
* @get_min_granularity:
|
||||
*
|
||||
* Get the minimum granularity in which listeners will get notified
|
||||
* about changes within the #MemoryRegion via the #RamDiscardManager.
|
||||
*
|
||||
* @rdm: the #RamDiscardManager
|
||||
* @mr: the #MemoryRegion
|
||||
*
|
||||
* Returns the minimum granularity.
|
||||
*/
|
||||
uint64_t (*get_min_granularity)(const RamDiscardManager *rdm,
|
||||
const MemoryRegion *mr);
|
||||
|
||||
/**
|
||||
* @is_populated:
|
||||
*
|
||||
* Check whether the given #MemoryRegionSection is completely populated
|
||||
* (i.e., no parts are currently discarded) via the #RamDiscardManager.
|
||||
* There are no alignment requirements.
|
||||
*
|
||||
* @rdm: the #RamDiscardManager
|
||||
* @section: the #MemoryRegionSection
|
||||
*
|
||||
* Returns whether the given range is completely populated.
|
||||
*/
|
||||
bool (*is_populated)(const RamDiscardManager *rdm,
|
||||
const MemoryRegionSection *section);
|
||||
|
||||
/**
|
||||
* @replay_populated:
|
||||
*
|
||||
* Call the #ReplayRamPopulate callback for all populated parts within the
|
||||
* #MemoryRegionSection via the #RamDiscardManager.
|
||||
*
|
||||
* In case any call fails, no further calls are made.
|
||||
*
|
||||
* @rdm: the #RamDiscardManager
|
||||
* @section: the #MemoryRegionSection
|
||||
* @replay_fn: the #ReplayRamPopulate callback
|
||||
* @opaque: pointer to forward to the callback
|
||||
*
|
||||
* Returns 0 on success, or a negative error if any notification failed.
|
||||
*/
|
||||
int (*replay_populated)(const RamDiscardManager *rdm,
|
||||
MemoryRegionSection *section,
|
||||
ReplayRamPopulate replay_fn, void *opaque);
|
||||
|
||||
/**
|
||||
* @register_listener:
|
||||
*
|
||||
* Register a #RamDiscardListener for the given #MemoryRegionSection and
|
||||
* immediately notify the #RamDiscardListener about all populated parts
|
||||
* within the #MemoryRegionSection via the #RamDiscardManager.
|
||||
*
|
||||
* In case any notification fails, no further notifications are triggered
|
||||
* and an error is logged.
|
||||
*
|
||||
* @rdm: the #RamDiscardManager
|
||||
* @rdl: the #RamDiscardListener
|
||||
* @section: the #MemoryRegionSection
|
||||
*/
|
||||
void (*register_listener)(RamDiscardManager *rdm,
|
||||
RamDiscardListener *rdl,
|
||||
MemoryRegionSection *section);
|
||||
|
||||
/**
|
||||
* @unregister_listener:
|
||||
*
|
||||
* Unregister a previously registered #RamDiscardListener via the
|
||||
* #RamDiscardManager after notifying the #RamDiscardListener about all
|
||||
* populated parts becoming unpopulated within the registered
|
||||
* #MemoryRegionSection.
|
||||
*
|
||||
* @rdm: the #RamDiscardManager
|
||||
* @rdl: the #RamDiscardListener
|
||||
*/
|
||||
void (*unregister_listener)(RamDiscardManager *rdm,
|
||||
RamDiscardListener *rdl);
|
||||
};
|
||||
|
||||
uint64_t ram_discard_manager_get_min_granularity(const RamDiscardManager *rdm,
|
||||
const MemoryRegion *mr);
|
||||
|
||||
bool ram_discard_manager_is_populated(const RamDiscardManager *rdm,
|
||||
const MemoryRegionSection *section);
|
||||
|
||||
int ram_discard_manager_replay_populated(const RamDiscardManager *rdm,
|
||||
MemoryRegionSection *section,
|
||||
ReplayRamPopulate replay_fn,
|
||||
void *opaque);
|
||||
|
||||
void ram_discard_manager_register_listener(RamDiscardManager *rdm,
|
||||
RamDiscardListener *rdl,
|
||||
MemoryRegionSection *section);
|
||||
|
||||
void ram_discard_manager_unregister_listener(RamDiscardManager *rdm,
|
||||
RamDiscardListener *rdl);
|
||||
|
||||
typedef struct CoalescedMemoryRange CoalescedMemoryRange;
|
||||
typedef struct MemoryRegionIoeventfd MemoryRegionIoeventfd;
|
||||
|
||||
@ -494,6 +722,7 @@ struct MemoryRegion {
|
||||
const char *name;
|
||||
unsigned ioeventfd_nb;
|
||||
MemoryRegionIoeventfd *ioeventfds;
|
||||
RamDiscardManager *rdm; /* Only for RAM */
|
||||
};
|
||||
|
||||
struct IOMMUMemoryRegion {
|
||||
@ -825,28 +1054,6 @@ typedef bool (*flatview_cb)(Int128 start,
|
||||
*/
|
||||
void flatview_for_each_range(FlatView *fv, flatview_cb cb, void *opaque);
|
||||
|
||||
/**
|
||||
* struct MemoryRegionSection: describes a fragment of a #MemoryRegion
|
||||
*
|
||||
* @mr: the region, or %NULL if empty
|
||||
* @fv: the flat view of the address space the region is mapped in
|
||||
* @offset_within_region: the beginning of the section, relative to @mr's start
|
||||
* @size: the size of the section; will not exceed @mr's boundaries
|
||||
* @offset_within_address_space: the address of the first byte of the section
|
||||
* relative to the region's address space
|
||||
* @readonly: writes to this section are ignored
|
||||
* @nonvolatile: this section is non-volatile
|
||||
*/
|
||||
struct MemoryRegionSection {
|
||||
Int128 size;
|
||||
MemoryRegion *mr;
|
||||
FlatView *fv;
|
||||
hwaddr offset_within_region;
|
||||
hwaddr offset_within_address_space;
|
||||
bool readonly;
|
||||
bool nonvolatile;
|
||||
};
|
||||
|
||||
static inline bool MemoryRegionSection_eq(MemoryRegionSection *a,
|
||||
MemoryRegionSection *b)
|
||||
{
|
||||
@ -859,6 +1066,26 @@ static inline bool MemoryRegionSection_eq(MemoryRegionSection *a,
|
||||
a->nonvolatile == b->nonvolatile;
|
||||
}
|
||||
|
||||
/**
|
||||
* memory_region_section_new_copy: Copy a memory region section
|
||||
*
|
||||
* Allocate memory for a new copy, copy the memory region section, and
|
||||
* properly take a reference on all relevant members.
|
||||
*
|
||||
* @s: the #MemoryRegionSection to copy
|
||||
*/
|
||||
MemoryRegionSection *memory_region_section_new_copy(MemoryRegionSection *s);
|
||||
|
||||
/**
|
||||
* memory_region_section_new_copy: Free a copied memory region section
|
||||
*
|
||||
* Free a copy of a memory section created via memory_region_section_new_copy().
|
||||
* properly dropping references on all relevant members.
|
||||
*
|
||||
* @s: the #MemoryRegionSection to copy
|
||||
*/
|
||||
void memory_region_section_free_copy(MemoryRegionSection *s);
|
||||
|
||||
/**
|
||||
* memory_region_init: Initialize a memory region
|
||||
*
|
||||
@ -2023,6 +2250,41 @@ bool memory_region_present(MemoryRegion *container, hwaddr addr);
|
||||
*/
|
||||
bool memory_region_is_mapped(MemoryRegion *mr);
|
||||
|
||||
/**
|
||||
* memory_region_get_ram_discard_manager: get the #RamDiscardManager for a
|
||||
* #MemoryRegion
|
||||
*
|
||||
* The #RamDiscardManager cannot change while a memory region is mapped.
|
||||
*
|
||||
* @mr: the #MemoryRegion
|
||||
*/
|
||||
RamDiscardManager *memory_region_get_ram_discard_manager(MemoryRegion *mr);
|
||||
|
||||
/**
|
||||
* memory_region_has_ram_discard_manager: check whether a #MemoryRegion has a
|
||||
* #RamDiscardManager assigned
|
||||
*
|
||||
* @mr: the #MemoryRegion
|
||||
*/
|
||||
static inline bool memory_region_has_ram_discard_manager(MemoryRegion *mr)
|
||||
{
|
||||
return !!memory_region_get_ram_discard_manager(mr);
|
||||
}
|
||||
|
||||
/**
|
||||
* memory_region_set_ram_discard_manager: set the #RamDiscardManager for a
|
||||
* #MemoryRegion
|
||||
*
|
||||
* This function must not be called for a mapped #MemoryRegion, a #MemoryRegion
|
||||
* that does not cover RAM, or a #MemoryRegion that already has a
|
||||
* #RamDiscardManager assigned.
|
||||
*
|
||||
* @mr: the #MemoryRegion
|
||||
* @rdm: #RamDiscardManager to set
|
||||
*/
|
||||
void memory_region_set_ram_discard_manager(MemoryRegion *mr,
|
||||
RamDiscardManager *rdm);
|
||||
|
||||
/**
|
||||
* memory_region_find: translate an address/size relative to a
|
||||
* MemoryRegion into a #MemoryRegionSection.
|
||||
@ -2631,6 +2893,12 @@ static inline MemOp devend_memop(enum device_endian end)
|
||||
*/
|
||||
int ram_block_discard_disable(bool state);
|
||||
|
||||
/*
|
||||
* See ram_block_discard_disable(): only disable uncoordinated discards,
|
||||
* keeping coordinated discards (via the RamDiscardManager) enabled.
|
||||
*/
|
||||
int ram_block_uncoordinated_discard_disable(bool state);
|
||||
|
||||
/*
|
||||
* Inhibit technologies that disable discarding of pages in RAM blocks.
|
||||
*
|
||||
@ -2640,12 +2908,20 @@ int ram_block_discard_disable(bool state);
|
||||
int ram_block_discard_require(bool state);
|
||||
|
||||
/*
|
||||
* Test if discarding of memory in ram blocks is disabled.
|
||||
* See ram_block_discard_require(): only inhibit technologies that disable
|
||||
* uncoordinated discarding of pages in RAM blocks, allowing co-existance with
|
||||
* technologies that only inhibit uncoordinated discards (via the
|
||||
* RamDiscardManager).
|
||||
*/
|
||||
int ram_block_coordinated_discard_require(bool state);
|
||||
|
||||
/*
|
||||
* Test if any discarding of memory in ram blocks is disabled.
|
||||
*/
|
||||
bool ram_block_discard_is_disabled(void);
|
||||
|
||||
/*
|
||||
* Test if discarding of memory in ram blocks is required to work reliably.
|
||||
* Test if any discarding of memory in ram blocks is required to work reliably.
|
||||
*/
|
||||
bool ram_block_discard_is_required(void);
|
||||
|
||||
|
@ -88,9 +88,11 @@ typedef struct VFIOContainer {
|
||||
uint64_t dirty_pgsizes;
|
||||
uint64_t max_dirty_bitmap_size;
|
||||
unsigned long pgsizes;
|
||||
unsigned int dma_max_mappings;
|
||||
QLIST_HEAD(, VFIOGuestIOMMU) giommu_list;
|
||||
QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list;
|
||||
QLIST_HEAD(, VFIOGroup) group_list;
|
||||
QLIST_HEAD(, VFIORamDiscardListener) vrdl_list;
|
||||
QLIST_ENTRY(VFIOContainer) next;
|
||||
} VFIOContainer;
|
||||
|
||||
@ -102,6 +104,16 @@ typedef struct VFIOGuestIOMMU {
|
||||
QLIST_ENTRY(VFIOGuestIOMMU) giommu_next;
|
||||
} VFIOGuestIOMMU;
|
||||
|
||||
typedef struct VFIORamDiscardListener {
|
||||
VFIOContainer *container;
|
||||
MemoryRegion *mr;
|
||||
hwaddr offset_within_address_space;
|
||||
hwaddr size;
|
||||
uint64_t granularity;
|
||||
RamDiscardListener listener;
|
||||
QLIST_ENTRY(VFIORamDiscardListener) next;
|
||||
} VFIORamDiscardListener;
|
||||
|
||||
typedef struct VFIOHostDMAWindow {
|
||||
hwaddr min_iova;
|
||||
hwaddr max_iova;
|
||||
|
@ -67,6 +67,9 @@ struct VirtIOMEM {
|
||||
|
||||
/* don't migrate unplugged memory */
|
||||
NotifierWithReturn precopy_notifier;
|
||||
|
||||
/* listeners to notify on plug/unplug activity. */
|
||||
QLIST_HEAD(, RamDiscardListener) rdl_list;
|
||||
};
|
||||
|
||||
struct VirtIOMEMClass {
|
||||
|
@ -153,6 +153,7 @@ typedef enum {
|
||||
MIG_PRI_DEFAULT = 0,
|
||||
MIG_PRI_IOMMU, /* Must happen before PCI devices */
|
||||
MIG_PRI_PCI_BUS, /* Must happen before IOMMU */
|
||||
MIG_PRI_VIRTIO_MEM, /* Must happen before IOMMU */
|
||||
MIG_PRI_GICV3_ITS, /* Must happen before PCI devices */
|
||||
MIG_PRI_GICV3, /* Must happen before the ITS */
|
||||
MIG_PRI_MAX,
|
||||
|
@ -2027,6 +2027,70 @@ int memory_region_iommu_num_indexes(IOMMUMemoryRegion *iommu_mr)
|
||||
return imrc->num_indexes(iommu_mr);
|
||||
}
|
||||
|
||||
RamDiscardManager *memory_region_get_ram_discard_manager(MemoryRegion *mr)
|
||||
{
|
||||
if (!memory_region_is_mapped(mr) || !memory_region_is_ram(mr)) {
|
||||
return NULL;
|
||||
}
|
||||
return mr->rdm;
|
||||
}
|
||||
|
||||
void memory_region_set_ram_discard_manager(MemoryRegion *mr,
|
||||
RamDiscardManager *rdm)
|
||||
{
|
||||
g_assert(memory_region_is_ram(mr) && !memory_region_is_mapped(mr));
|
||||
g_assert(!rdm || !mr->rdm);
|
||||
mr->rdm = rdm;
|
||||
}
|
||||
|
||||
uint64_t ram_discard_manager_get_min_granularity(const RamDiscardManager *rdm,
|
||||
const MemoryRegion *mr)
|
||||
{
|
||||
RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_GET_CLASS(rdm);
|
||||
|
||||
g_assert(rdmc->get_min_granularity);
|
||||
return rdmc->get_min_granularity(rdm, mr);
|
||||
}
|
||||
|
||||
bool ram_discard_manager_is_populated(const RamDiscardManager *rdm,
|
||||
const MemoryRegionSection *section)
|
||||
{
|
||||
RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_GET_CLASS(rdm);
|
||||
|
||||
g_assert(rdmc->is_populated);
|
||||
return rdmc->is_populated(rdm, section);
|
||||
}
|
||||
|
||||
int ram_discard_manager_replay_populated(const RamDiscardManager *rdm,
|
||||
MemoryRegionSection *section,
|
||||
ReplayRamPopulate replay_fn,
|
||||
void *opaque)
|
||||
{
|
||||
RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_GET_CLASS(rdm);
|
||||
|
||||
g_assert(rdmc->replay_populated);
|
||||
return rdmc->replay_populated(rdm, section, replay_fn, opaque);
|
||||
}
|
||||
|
||||
void ram_discard_manager_register_listener(RamDiscardManager *rdm,
|
||||
RamDiscardListener *rdl,
|
||||
MemoryRegionSection *section)
|
||||
{
|
||||
RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_GET_CLASS(rdm);
|
||||
|
||||
g_assert(rdmc->register_listener);
|
||||
rdmc->register_listener(rdm, rdl, section);
|
||||
}
|
||||
|
||||
void ram_discard_manager_unregister_listener(RamDiscardManager *rdm,
|
||||
RamDiscardListener *rdl)
|
||||
{
|
||||
RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_GET_CLASS(rdm);
|
||||
|
||||
g_assert(rdmc->unregister_listener);
|
||||
rdmc->unregister_listener(rdm, rdl);
|
||||
}
|
||||
|
||||
void memory_region_set_log(MemoryRegion *mr, bool log, unsigned client)
|
||||
{
|
||||
uint8_t mask = 1 << client;
|
||||
@ -2637,6 +2701,33 @@ MemoryRegionSection memory_region_find(MemoryRegion *mr,
|
||||
return ret;
|
||||
}
|
||||
|
||||
MemoryRegionSection *memory_region_section_new_copy(MemoryRegionSection *s)
|
||||
{
|
||||
MemoryRegionSection *tmp = g_new(MemoryRegionSection, 1);
|
||||
|
||||
*tmp = *s;
|
||||
if (tmp->mr) {
|
||||
memory_region_ref(tmp->mr);
|
||||
}
|
||||
if (tmp->fv) {
|
||||
bool ret = flatview_ref(tmp->fv);
|
||||
|
||||
g_assert(ret);
|
||||
}
|
||||
return tmp;
|
||||
}
|
||||
|
||||
void memory_region_section_free_copy(MemoryRegionSection *s)
|
||||
{
|
||||
if (s->fv) {
|
||||
flatview_unref(s->fv);
|
||||
}
|
||||
if (s->mr) {
|
||||
memory_region_unref(s->mr);
|
||||
}
|
||||
g_free(s);
|
||||
}
|
||||
|
||||
bool memory_region_present(MemoryRegion *container, hwaddr addr)
|
||||
{
|
||||
MemoryRegion *mr;
|
||||
@ -3320,10 +3411,17 @@ static const TypeInfo iommu_memory_region_info = {
|
||||
.abstract = true,
|
||||
};
|
||||
|
||||
static const TypeInfo ram_discard_manager_info = {
|
||||
.parent = TYPE_INTERFACE,
|
||||
.name = TYPE_RAM_DISCARD_MANAGER,
|
||||
.class_size = sizeof(RamDiscardManagerClass),
|
||||
};
|
||||
|
||||
static void memory_register_types(void)
|
||||
{
|
||||
type_register_static(&memory_region_info);
|
||||
type_register_static(&iommu_memory_region_info);
|
||||
type_register_static(&ram_discard_manager_info);
|
||||
}
|
||||
|
||||
type_init(memory_register_types)
|
||||
|
@ -3684,56 +3684,106 @@ void mtree_print_dispatch(AddressSpaceDispatch *d, MemoryRegion *root)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* If positive, discarding RAM is disabled. If negative, discarding RAM is
|
||||
* required to work and cannot be disabled.
|
||||
*/
|
||||
static int ram_block_discard_disabled;
|
||||
/* Require any discards to work. */
|
||||
static unsigned int ram_block_discard_required_cnt;
|
||||
/* Require only coordinated discards to work. */
|
||||
static unsigned int ram_block_coordinated_discard_required_cnt;
|
||||
/* Disable any discards. */
|
||||
static unsigned int ram_block_discard_disabled_cnt;
|
||||
/* Disable only uncoordinated discards. */
|
||||
static unsigned int ram_block_uncoordinated_discard_disabled_cnt;
|
||||
static QemuMutex ram_block_discard_disable_mutex;
|
||||
|
||||
static void ram_block_discard_disable_mutex_lock(void)
|
||||
{
|
||||
static gsize initialized;
|
||||
|
||||
if (g_once_init_enter(&initialized)) {
|
||||
qemu_mutex_init(&ram_block_discard_disable_mutex);
|
||||
g_once_init_leave(&initialized, 1);
|
||||
}
|
||||
qemu_mutex_lock(&ram_block_discard_disable_mutex);
|
||||
}
|
||||
|
||||
static void ram_block_discard_disable_mutex_unlock(void)
|
||||
{
|
||||
qemu_mutex_unlock(&ram_block_discard_disable_mutex);
|
||||
}
|
||||
|
||||
int ram_block_discard_disable(bool state)
|
||||
{
|
||||
int old;
|
||||
int ret = 0;
|
||||
|
||||
ram_block_discard_disable_mutex_lock();
|
||||
if (!state) {
|
||||
qatomic_dec(&ram_block_discard_disabled);
|
||||
return 0;
|
||||
ram_block_discard_disabled_cnt--;
|
||||
} else if (ram_block_discard_required_cnt ||
|
||||
ram_block_coordinated_discard_required_cnt) {
|
||||
ret = -EBUSY;
|
||||
} else {
|
||||
ram_block_discard_disabled_cnt++;
|
||||
}
|
||||
ram_block_discard_disable_mutex_unlock();
|
||||
return ret;
|
||||
}
|
||||
|
||||
do {
|
||||
old = qatomic_read(&ram_block_discard_disabled);
|
||||
if (old < 0) {
|
||||
return -EBUSY;
|
||||
}
|
||||
} while (qatomic_cmpxchg(&ram_block_discard_disabled,
|
||||
old, old + 1) != old);
|
||||
return 0;
|
||||
int ram_block_uncoordinated_discard_disable(bool state)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
ram_block_discard_disable_mutex_lock();
|
||||
if (!state) {
|
||||
ram_block_uncoordinated_discard_disabled_cnt--;
|
||||
} else if (ram_block_discard_required_cnt) {
|
||||
ret = -EBUSY;
|
||||
} else {
|
||||
ram_block_uncoordinated_discard_disabled_cnt++;
|
||||
}
|
||||
ram_block_discard_disable_mutex_unlock();
|
||||
return ret;
|
||||
}
|
||||
|
||||
int ram_block_discard_require(bool state)
|
||||
{
|
||||
int old;
|
||||
int ret = 0;
|
||||
|
||||
ram_block_discard_disable_mutex_lock();
|
||||
if (!state) {
|
||||
qatomic_inc(&ram_block_discard_disabled);
|
||||
return 0;
|
||||
ram_block_discard_required_cnt--;
|
||||
} else if (ram_block_discard_disabled_cnt ||
|
||||
ram_block_uncoordinated_discard_disabled_cnt) {
|
||||
ret = -EBUSY;
|
||||
} else {
|
||||
ram_block_discard_required_cnt++;
|
||||
}
|
||||
ram_block_discard_disable_mutex_unlock();
|
||||
return ret;
|
||||
}
|
||||
|
||||
do {
|
||||
old = qatomic_read(&ram_block_discard_disabled);
|
||||
if (old > 0) {
|
||||
return -EBUSY;
|
||||
}
|
||||
} while (qatomic_cmpxchg(&ram_block_discard_disabled,
|
||||
old, old - 1) != old);
|
||||
return 0;
|
||||
int ram_block_coordinated_discard_require(bool state)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
ram_block_discard_disable_mutex_lock();
|
||||
if (!state) {
|
||||
ram_block_coordinated_discard_required_cnt--;
|
||||
} else if (ram_block_discard_disabled_cnt) {
|
||||
ret = -EBUSY;
|
||||
} else {
|
||||
ram_block_coordinated_discard_required_cnt++;
|
||||
}
|
||||
ram_block_discard_disable_mutex_unlock();
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool ram_block_discard_is_disabled(void)
|
||||
{
|
||||
return qatomic_read(&ram_block_discard_disabled) > 0;
|
||||
return qatomic_read(&ram_block_discard_disabled_cnt) ||
|
||||
qatomic_read(&ram_block_uncoordinated_discard_disabled_cnt);
|
||||
}
|
||||
|
||||
bool ram_block_discard_is_required(void)
|
||||
{
|
||||
return qatomic_read(&ram_block_discard_disabled) < 0;
|
||||
return qatomic_read(&ram_block_discard_required_cnt) ||
|
||||
qatomic_read(&ram_block_coordinated_discard_required_cnt);
|
||||
}
|
||||
|
@ -225,6 +225,8 @@ static void *mmap_activate(void *ptr, size_t size, int fd,
|
||||
"crash.\n", file_name);
|
||||
g_free(proc_link);
|
||||
g_free(file_name);
|
||||
warn_report("Using non DAX backing file with 'pmem=on' option"
|
||||
" is deprecated");
|
||||
}
|
||||
/*
|
||||
* If mmap failed with MAP_SHARED_VALIDATE | MAP_SYNC, we will try
|
||||
|
Loading…
Reference in New Issue
Block a user