Hi,

"Host Memory Backends" and "Memory devices" queue ("mem"): - Support memory devices with multiple memslots - Support memory devices that dynamically consume memslots - Support memory devices that can automatically decide on the number of memslots to use - virtio-mem support for exposing memory dynamically via multiple memslots - Some required cleanups/refactorings -----BEGIN PGP SIGNATURE----- iQJFBAABCAAvFiEEG9nKrXNcTDpGDfzKTd4Q9wD/g1oFAmUn+XMRHGRhdmlkQHJl ZGhhdC5jb20ACgkQTd4Q9wD/g1qDHA//T01suTa+uzrcoJHoMWN11S47WnAmbuTo vVakucLBPMJAa9xZeCy3OavXaVGpHkw+t6g3OFknof0LfQ5/j9iE3Q1PxURN7g5j SJ2WJXCoceM6T4TMhPvVvgEaYjFmESqZB5FZgedMT0QRyhAxMuF9pCkWhk1O3OAV JqQKqLFiGcv60AEuBYGZGzgiOUv8EJ5gKwRF4VOdyHIxqZDw1aZXzlcd4TzFZBQ7 rwW/3ef+sFmUJdmfrSrqcIlQSRrqZ2w95xATDzLTIEEUT3SWqh/E95EZWIz1M0oQ NgWgFiLCR1KOj7bWFhLXT7IfyLh0mEysD+P/hY6QwQ4RewWG7EW5UK+JFswssdcZ rEj5XpHZzev/wx7hM4bWsoQ+VIvrH7j3uYGyWkcgYRbdDEkWDv2rsT23lwGYNhht oBsrdEBELRw6v4C8doq/+sCmHmuxUMqTGwbArCQVnB1XnLxOEkuqlnfq5MORkzNF fxbIRx+LRluOllC0HVaDQd8qxRq1+UC5WIpAcDcrouy4HGgi1onWKrXpgjIAbVyH M6cENkK7rnRk96gpeXdmrf0h9HqRciAOY8oUsFsvLyKBOCPBWDrLyOQEY5UoSdtD m4QpEVgywCy2z1uU/UObeT/UxJy/9EL/Zb+DHoEK06iEhwONoUJjEBYMJD38RMkk mwPTB4UAk9g= =s69t -----END PGP SIGNATURE----- Merge tag 'mem-2023-10-12' of https://github.com/davidhildenbrand/qemu into staging Hi, "Host Memory Backends" and "Memory devices" queue ("mem"): - Support memory devices with multiple memslots - Support memory devices that dynamically consume memslots - Support memory devices that can automatically decide on the number of memslots to use - virtio-mem support for exposing memory dynamically via multiple memslots - Some required cleanups/refactorings # -----BEGIN PGP SIGNATURE----- # # iQJFBAABCAAvFiEEG9nKrXNcTDpGDfzKTd4Q9wD/g1oFAmUn+XMRHGRhdmlkQHJl # ZGhhdC5jb20ACgkQTd4Q9wD/g1qDHA//T01suTa+uzrcoJHoMWN11S47WnAmbuTo # vVakucLBPMJAa9xZeCy3OavXaVGpHkw+t6g3OFknof0LfQ5/j9iE3Q1PxURN7g5j # SJ2WJXCoceM6T4TMhPvVvgEaYjFmESqZB5FZgedMT0QRyhAxMuF9pCkWhk1O3OAV # JqQKqLFiGcv60AEuBYGZGzgiOUv8EJ5gKwRF4VOdyHIxqZDw1aZXzlcd4TzFZBQ7 # rwW/3ef+sFmUJdmfrSrqcIlQSRrqZ2w95xATDzLTIEEUT3SWqh/E95EZWIz1M0oQ # NgWgFiLCR1KOj7bWFhLXT7IfyLh0mEysD+P/hY6QwQ4RewWG7EW5UK+JFswssdcZ # rEj5XpHZzev/wx7hM4bWsoQ+VIvrH7j3uYGyWkcgYRbdDEkWDv2rsT23lwGYNhht # oBsrdEBELRw6v4C8doq/+sCmHmuxUMqTGwbArCQVnB1XnLxOEkuqlnfq5MORkzNF # fxbIRx+LRluOllC0HVaDQd8qxRq1+UC5WIpAcDcrouy4HGgi1onWKrXpgjIAbVyH # M6cENkK7rnRk96gpeXdmrf0h9HqRciAOY8oUsFsvLyKBOCPBWDrLyOQEY5UoSdtD # m4QpEVgywCy2z1uU/UObeT/UxJy/9EL/Zb+DHoEK06iEhwONoUJjEBYMJD38RMkk # mwPTB4UAk9g= # =s69t # -----END PGP SIGNATURE----- # gpg: Signature made Thu 12 Oct 2023 09:49:39 EDT # gpg: using RSA key 1BD9CAAD735C4C3A460DFCCA4DDE10F700FF835A # gpg: issuer "david@redhat.com" # gpg: Good signature from "David Hildenbrand <david@redhat.com>" [unknown] # gpg: aka "David Hildenbrand <davidhildenbrand@gmail.com>" [full] # gpg: aka "David Hildenbrand <hildenbr@in.tum.de>" [unknown] # gpg: WARNING: The key's User ID is not certified with a trusted signature! # gpg: There is no indication that the signature belongs to the owner. # Primary key fingerprint: 1BD9 CAAD 735C 4C3A 460D FCCA 4DDE 10F7 00FF 835A * tag 'mem-2023-10-12' of https://github.com/davidhildenbrand/qemu: virtio-mem: Mark memslot alias memory regions unmergeable memory,vhost: Allow for marking memory device memory regions unmergeable virtio-mem: Expose device memory dynamically via multiple memslots if enabled virtio-mem: Update state to match bitmap as soon as it's been migrated virtio-mem: Pass non-const VirtIOMEM via virtio_mem_range_cb memory: Clarify mapping requirements for RamDiscardManager memory-device,vhost: Support automatic decision on the number of memslots vhost: Add vhost_get_max_memslots() kvm: Add stub for kvm_get_max_memslots() memory-device,vhost: Support memory devices that dynamically consume memslots memory-device: Track required and actually used memslots in DeviceMemoryState stubs: Rename qmp_memory_device.c to memory_device.c memory-device: Support memory devices with multiple memslots vhost: Return number of free memslots kvm: Return number of free memslots softmmu/physmem: Fixup qemu_ram_block_from_host() documentation vhost: Remove vhost_backend_can_merge() callback vhost: Rework memslot filtering and fix "used_memslot" tracking Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2023-10-16 12:34:17 -04:00 · 2023-10-16 12:34:17 -04:00 · bc2b89b385
commit bc2b89b385
parent 63011373ad ee6398d862
23 changed files with 839 additions and 113 deletions
--- a/1
+++ b/1
@ -2891,6 +2891,7 @@ F: hw/mem/pc-dimm.c
 F: include/hw/mem/memory-device.h
 F: include/hw/mem/nvdimm.h
 F: include/hw/mem/pc-dimm.h
+F: stubs/memory_device.c
 F: docs/nvdimm.txt

 SPICE
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@ -174,13 +174,31 @@ void kvm_resample_fd_notify(int gsi)
    }
 }

-int kvm_get_max_memslots(void)
+unsigned int kvm_get_max_memslots(void)
 {
    KVMState *s = KVM_STATE(current_accel());

    return s->nr_slots;
 }

+unsigned int kvm_get_free_memslots(void)
+{
+    unsigned int used_slots = 0;
+    KVMState *s = kvm_state;
+    int i;
+
+    kvm_slots_lock();
+    for (i = 0; i < s->nr_as; i++) {
+        if (!s->as[i].ml) {
+            continue;
+        }
+        used_slots = MAX(used_slots, s->as[i].ml->nr_used_slots);
+    }
+    kvm_slots_unlock();
+
+    return s->nr_slots - used_slots;
+}
+
 /* Called with KVMMemoryListener.slots_lock held */
 static KVMSlot *kvm_get_free_slot(KVMMemoryListener *kml)
 {
@ -196,19 +214,6 @@ static KVMSlot *kvm_get_free_slot(KVMMemoryListener *kml)
    return NULL;
 }

-bool kvm_has_free_slot(MachineState *ms)
-{
-    KVMState *s = KVM_STATE(ms->accelerator);
-    bool result;
-    KVMMemoryListener *kml = &s->memory_listener;
-
-    kvm_slots_lock();
-    result = !!kvm_get_free_slot(kml);
-    kvm_slots_unlock();
-
-    return result;
-}
-
 /* Called with KVMMemoryListener.slots_lock held */
 static KVMSlot *kvm_alloc_slot(KVMMemoryListener *kml)
 {
@ -1387,6 +1392,7 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml,
            }
            start_addr += slot_size;
            size -= slot_size;
+            kml->nr_used_slots--;
        } while (size);
        return;
    }
@ -1412,6 +1418,7 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml,
        ram_start_offset += slot_size;
        ram += slot_size;
        size -= slot_size;
+        kml->nr_used_slots++;
    } while (size);
 }

--- a/accel/stubs/kvm-stub.c
+++ b/accel/stubs/kvm-stub.c
@ -109,9 +109,14 @@ int kvm_irqchip_remove_irqfd_notifier_gsi(KVMState *s, EventNotifier *n,
    return -ENOSYS;
 }

-bool kvm_has_free_slot(MachineState *ms)
+unsigned int kvm_get_max_memslots(void)
 {
-    return false;
+    return 0;
+}
+
+unsigned int kvm_get_free_memslots(void)
+{
+    return 0;
 }

 void kvm_init_cpu_signals(CPUState *cpu)
--- a/hw/mem/memory-device.c
+++ b/hw/mem/memory-device.c
@ -52,19 +52,135 @@ static int memory_device_build_list(Object *obj, void *opaque)
    return 0;
 }

-static void memory_device_check_addable(MachineState *ms, MemoryRegion *mr,
-                                        Error **errp)
+static unsigned int memory_device_get_memslots(MemoryDeviceState *md)
 {
+    const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(md);
+
+    if (mdc->get_memslots) {
+        return mdc->get_memslots(md);
+    }
+    return 1;
+}
+
+/*
+ * Memslots that are reserved by memory devices (required but still reported
+ * as free from KVM / vhost).
+ */
+static unsigned int get_reserved_memslots(MachineState *ms)
+{
+    if (ms->device_memory->used_memslots >
+        ms->device_memory->required_memslots) {
+        /* This is unexpected, and we warned already in the memory notifier. */
+        return 0;
+    }
+    return ms->device_memory->required_memslots -
+           ms->device_memory->used_memslots;
+}
+
+unsigned int memory_devices_get_reserved_memslots(void)
+{
+    if (!current_machine->device_memory) {
+        return 0;
+    }
+    return get_reserved_memslots(current_machine);
+}
+
+bool memory_devices_memslot_auto_decision_active(void)
+{
+    if (!current_machine->device_memory) {
+        return false;
+    }
+
+    return current_machine->device_memory->memslot_auto_decision_active;
+}
+
+static unsigned int memory_device_memslot_decision_limit(MachineState *ms,
+                                                         MemoryRegion *mr)
+{
+    const unsigned int reserved = get_reserved_memslots(ms);
+    const uint64_t size = memory_region_size(mr);
+    unsigned int max = vhost_get_max_memslots();
+    unsigned int free = vhost_get_free_memslots();
+    uint64_t available_space;
+    unsigned int memslots;
+
+    if (kvm_enabled()) {
+        max = MIN(max, kvm_get_max_memslots());
+        free = MIN(free, kvm_get_free_memslots());
+    }
+
+    /*
+     * If we only have less overall memslots than what we consider reasonable,
+     * just keep it to a minimum.
+     */
+    if (max < MEMORY_DEVICES_SAFE_MAX_MEMSLOTS) {
+        return 1;
+    }
+
+    /*
+     * Consider our soft-limit across all memory devices. We don't really
+     * expect to exceed this limit in reasonable configurations.
+     */
+    if (MEMORY_DEVICES_SOFT_MEMSLOT_LIMIT <=
+        ms->device_memory->required_memslots) {
+        return 1;
+    }
+    memslots = MEMORY_DEVICES_SOFT_MEMSLOT_LIMIT -
+               ms->device_memory->required_memslots;
+
+    /*
+     * Consider the actually still free memslots. This is only relevant if
+     * other memslot consumers would consume *significantly* more memslots than
+     * what we prepared for (> 253). Unlikely, but let's just handle it
+     * cleanly.
+     */
+    memslots = MIN(memslots, free - reserved);
+    if (memslots < 1 || unlikely(free < reserved)) {
+        return 1;
+    }
+
+    /* We cannot have any other memory devices? So give all to this device. */
+    if (size == ms->maxram_size - ms->ram_size) {
+        return memslots;
+    }
+
+    /*
+     * Simple heuristic: equally distribute the memslots over the space
+     * still available for memory devices.
+     */
+    available_space = ms->maxram_size - ms->ram_size -
+                      ms->device_memory->used_region_size;
+    memslots = (double)memslots * size / available_space;
+    return memslots < 1 ? 1 : memslots;
+}
+
+static void memory_device_check_addable(MachineState *ms, MemoryDeviceState *md,
+                                        MemoryRegion *mr, Error **errp)
+{
+    const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(md);
    const uint64_t used_region_size = ms->device_memory->used_region_size;
    const uint64_t size = memory_region_size(mr);
+    const unsigned int reserved_memslots = get_reserved_memslots(ms);
+    unsigned int required_memslots, memslot_limit;

-    /* we will need a new memory slot for kvm and vhost */
-    if (kvm_enabled() && !kvm_has_free_slot(ms)) {
-        error_setg(errp, "hypervisor has no free memory slots left");
+    /*
+     * Instruct the device to decide how many memslots to use, if applicable,
+     * before we query the number of required memslots the first time.
+     */
+    if (mdc->decide_memslots) {
+        memslot_limit = memory_device_memslot_decision_limit(ms, mr);
+        mdc->decide_memslots(md, memslot_limit);
+    }
+    required_memslots = memory_device_get_memslots(md);
+
+    /* we will need memory slots for kvm and vhost */
+    if (kvm_enabled() &&
+        kvm_get_free_memslots() < required_memslots + reserved_memslots) {
+        error_setg(errp, "hypervisor has not enough free memory slots left");
        return;
    }
-    if (!vhost_has_free_slot()) {
-        error_setg(errp, "a used vhost backend has no free memory slots left");
+    if (vhost_get_free_memslots() < required_memslots + reserved_memslots) {
+        error_setg(errp, "a used vhost backend has not enough free memory slots left");
        return;
    }

@ -233,7 +349,7 @@ void memory_device_pre_plug(MemoryDeviceState *md, MachineState *ms,
        goto out;
    }

-    memory_device_check_addable(ms, mr, &local_err);
+    memory_device_check_addable(ms, md, mr, &local_err);
    if (local_err) {
        goto out;
    }
@ -264,6 +380,7 @@ out:
 void memory_device_plug(MemoryDeviceState *md, MachineState *ms)
 {
    const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(md);
+    const unsigned int memslots = memory_device_get_memslots(md);
    const uint64_t addr = mdc->get_addr(md);
    MemoryRegion *mr;

@ -275,6 +392,11 @@ void memory_device_plug(MemoryDeviceState *md, MachineState *ms)
    g_assert(ms->device_memory);

    ms->device_memory->used_region_size += memory_region_size(mr);
+    ms->device_memory->required_memslots += memslots;
+    if (mdc->decide_memslots && memslots > 1) {
+        ms->device_memory->memslot_auto_decision_active++;
+    }
+
    memory_region_add_subregion(&ms->device_memory->mr,
                                addr - ms->device_memory->base, mr);
    trace_memory_device_plug(DEVICE(md)->id ? DEVICE(md)->id : "", addr);
@ -283,6 +405,7 @@ void memory_device_plug(MemoryDeviceState *md, MachineState *ms)
 void memory_device_unplug(MemoryDeviceState *md, MachineState *ms)
 {
    const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(md);
+    const unsigned int memslots = memory_device_get_memslots(md);
    MemoryRegion *mr;

    /*
@ -293,7 +416,12 @@ void memory_device_unplug(MemoryDeviceState *md, MachineState *ms)
    g_assert(ms->device_memory);

    memory_region_del_subregion(&ms->device_memory->mr, mr);
+
+    if (mdc->decide_memslots && memslots > 1) {
+        ms->device_memory->memslot_auto_decision_active--;
+    }
    ms->device_memory->used_region_size -= memory_region_size(mr);
+    ms->device_memory->required_memslots -= memslots;
    trace_memory_device_unplug(DEVICE(md)->id ? DEVICE(md)->id : "",
                               mdc->get_addr(md));
 }
@ -313,6 +441,50 @@ uint64_t memory_device_get_region_size(const MemoryDeviceState *md,
    return memory_region_size(mr);
 }

+static void memory_devices_region_mod(MemoryListener *listener,
+                                      MemoryRegionSection *mrs, bool add)
+{
+    DeviceMemoryState *dms = container_of(listener, DeviceMemoryState,
+                                          listener);
+
+    if (!memory_region_is_ram(mrs->mr)) {
+        warn_report("Unexpected memory region mapped into device memory region.");
+        return;
+    }
+
+    /*
+     * The expectation is that each distinct RAM memory region section in
+     * our region for memory devices consumes exactly one memslot in KVM
+     * and in vhost. For vhost, this is true, except:
+     * * ROM memory regions don't consume a memslot. These get used very
+     *   rarely for memory devices (R/O NVDIMMs).
+     * * Memslots without a fd (memory-backend-ram) don't necessarily
+     *   consume a memslot. Such setups are quite rare and possibly bogus:
+     *   the memory would be inaccessible by such vhost devices.
+     *
+     * So for vhost, in corner cases we might over-estimate the number of
+     * memslots that are currently used or that might still be reserved
+     * (required - used).
+     */
+    dms->used_memslots += add ? 1 : -1;
+
+    if (dms->used_memslots > dms->required_memslots) {
+        warn_report("Memory devices use more memory slots than indicated as required.");
+    }
+}
+
+static void memory_devices_region_add(MemoryListener *listener,
+                                      MemoryRegionSection *mrs)
+{
+    return memory_devices_region_mod(listener, mrs, true);
+}
+
+static void memory_devices_region_del(MemoryListener *listener,
+                                      MemoryRegionSection *mrs)
+{
+    return memory_devices_region_mod(listener, mrs, false);
+}
+
 void machine_memory_devices_init(MachineState *ms, hwaddr base, uint64_t size)
 {
    g_assert(size);
@ -322,8 +494,16 @@ void machine_memory_devices_init(MachineState *ms, hwaddr base, uint64_t size)

    memory_region_init(&ms->device_memory->mr, OBJECT(ms), "device-memory",
                       size);
+    address_space_init(&ms->device_memory->as, &ms->device_memory->mr,
+                       "device-memory");
    memory_region_add_subregion(get_system_memory(), ms->device_memory->base,
                                &ms->device_memory->mr);
+
+    /* Track the number of memslots used by memory devices. */
+    ms->device_memory->listener.region_add = memory_devices_region_add;
+    ms->device_memory->listener.region_del = memory_devices_region_del;
+    memory_listener_register(&ms->device_memory->listener,
+                             &ms->device_memory->as);
 }

 static const TypeInfo memory_device_info = {
--- a/hw/virtio/vhost-stub.c
+++ b/hw/virtio/vhost-stub.c
@ -2,9 +2,14 @@
 #include "hw/virtio/vhost.h"
 #include "hw/virtio/vhost-user.h"

-bool vhost_has_free_slot(void)
+unsigned int vhost_get_max_memslots(void)
 {
-    return true;
+    return UINT_MAX;
+}
+
+unsigned int vhost_get_free_memslots(void)
+{
+    return UINT_MAX;
 }

 bool vhost_user_init(VhostUserState *user, CharBackend *chr, Error **errp)
--- a/hw/virtio/vhost-user.c
+++ b/hw/virtio/vhost-user.c
@ -2327,19 +2327,6 @@ static int vhost_user_migration_done(struct vhost_dev *dev, char* mac_addr)
    return -ENOTSUP;
 }

-static bool vhost_user_can_merge(struct vhost_dev *dev,
-                                 uint64_t start1, uint64_t size1,
-                                 uint64_t start2, uint64_t size2)
-{
-    ram_addr_t offset;
-    int mfd, rfd;
-
-    (void)vhost_user_get_mr_data(start1, &offset, &mfd);
-    (void)vhost_user_get_mr_data(start2, &offset, &rfd);
-
-    return mfd == rfd;
-}
-
 static int vhost_user_net_set_mtu(struct vhost_dev *dev, uint16_t mtu)
 {
    VhostUserMsg msg;
@ -2622,10 +2609,9 @@ vhost_user_crypto_close_session(struct vhost_dev *dev, uint64_t session_id)
    return 0;
 }

-static bool vhost_user_mem_section_filter(struct vhost_dev *dev,
-                                          MemoryRegionSection *section)
+static bool vhost_user_no_private_memslots(struct vhost_dev *dev)
 {
-    return memory_region_get_fd(section->mr) >= 0;
+    return true;
 }

 static int vhost_user_get_inflight_fd(struct vhost_dev *dev,
@ -2868,6 +2854,7 @@ const VhostOps user_ops = {
        .vhost_backend_init = vhost_user_backend_init,
        .vhost_backend_cleanup = vhost_user_backend_cleanup,
        .vhost_backend_memslots_limit = vhost_user_memslots_limit,
+        .vhost_backend_no_private_memslots = vhost_user_no_private_memslots,
        .vhost_set_log_base = vhost_user_set_log_base,
        .vhost_set_mem_table = vhost_user_set_mem_table,
        .vhost_set_vring_addr = vhost_user_set_vring_addr,
@ -2886,7 +2873,6 @@ const VhostOps user_ops = {
        .vhost_set_vring_enable = vhost_user_set_vring_enable,
        .vhost_requires_shm_log = vhost_user_requires_shm_log,
        .vhost_migration_done = vhost_user_migration_done,
-        .vhost_backend_can_merge = vhost_user_can_merge,
        .vhost_net_set_mtu = vhost_user_net_set_mtu,
        .vhost_set_iotlb_callback = vhost_user_set_iotlb_callback,
        .vhost_send_device_iotlb_msg = vhost_user_send_device_iotlb_msg,
@ -2894,7 +2880,6 @@ const VhostOps user_ops = {
        .vhost_set_config = vhost_user_set_config,
        .vhost_crypto_create_session = vhost_user_crypto_create_session,
        .vhost_crypto_close_session = vhost_user_crypto_close_session,
-        .vhost_backend_mem_section_filter = vhost_user_mem_section_filter,
        .vhost_get_inflight_fd = vhost_user_get_inflight_fd,
        .vhost_set_inflight_fd = vhost_user_set_inflight_fd,
        .vhost_dev_start = vhost_user_dev_start,
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@ -1512,7 +1512,6 @@ const VhostOps vdpa_ops = {
        .vhost_set_config = vhost_vdpa_set_config,
        .vhost_requires_shm_log = NULL,
        .vhost_migration_done = NULL,
-        .vhost_backend_can_merge = NULL,
        .vhost_net_set_mtu = NULL,
        .vhost_set_iotlb_callback = NULL,
        .vhost_send_device_iotlb_msg = NULL,
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@ -23,6 +23,7 @@
 #include "qemu/log.h"
 #include "standard-headers/linux/vhost_types.h"
 #include "hw/virtio/virtio-bus.h"
+#include "hw/mem/memory-device.h"
 #include "migration/blocker.h"
 #include "migration/qemu-file-types.h"
 #include "sysemu/dma.h"
@ -45,20 +46,44 @@
 static struct vhost_log *vhost_log;
 static struct vhost_log *vhost_log_shm;

+/* Memslots used by backends that support private memslots (without an fd). */
 static unsigned int used_memslots;
+
+/* Memslots used by backends that only support shared memslots (with an fd). */
+static unsigned int used_shared_memslots;
+
 static QLIST_HEAD(, vhost_dev) vhost_devices =
    QLIST_HEAD_INITIALIZER(vhost_devices);

-bool vhost_has_free_slot(void)
+unsigned int vhost_get_max_memslots(void)
 {
-    unsigned int slots_limit = ~0U;
+    unsigned int max = UINT_MAX;
+    struct vhost_dev *hdev;
+
+    QLIST_FOREACH(hdev, &vhost_devices, entry) {
+        max = MIN(max, hdev->vhost_ops->vhost_backend_memslots_limit(hdev));
+    }
+    return max;
+}
+
+unsigned int vhost_get_free_memslots(void)
+{
+    unsigned int free = UINT_MAX;
    struct vhost_dev *hdev;

    QLIST_FOREACH(hdev, &vhost_devices, entry) {
        unsigned int r = hdev->vhost_ops->vhost_backend_memslots_limit(hdev);
-        slots_limit = MIN(slots_limit, r);
+        unsigned int cur_free;
+
+        if (hdev->vhost_ops->vhost_backend_no_private_memslots &&
+            hdev->vhost_ops->vhost_backend_no_private_memslots(hdev)) {
+            cur_free = r - used_shared_memslots;
+        } else {
+            cur_free = r - used_memslots;
+        }
+        free = MIN(free, cur_free);
    }
-    return slots_limit > used_memslots;
+    return free;
 }

 static void vhost_dev_sync_region(struct vhost_dev *dev,
@ -474,8 +499,7 @@ static int vhost_verify_ring_mappings(struct vhost_dev *dev,
 * vhost_section: identify sections needed for vhost access
 *
 * We only care about RAM sections here (where virtqueue and guest
- * internals accessed by virtio might live). If we find one we still
- * allow the backend to potentially filter it out of our list.
+ * internals accessed by virtio might live).
 */
 static bool vhost_section(struct vhost_dev *dev, MemoryRegionSection *section)
 {
@ -502,8 +526,16 @@ static bool vhost_section(struct vhost_dev *dev, MemoryRegionSection *section)
            return false;
        }

-        if (dev->vhost_ops->vhost_backend_mem_section_filter &&
-            !dev->vhost_ops->vhost_backend_mem_section_filter(dev, section)) {
+        /*
+         * Some backends (like vhost-user) can only handle memory regions
+         * that have an fd (can be mapped into a different process). Filter
+         * the ones without an fd out, if requested.
+         *
+         * TODO: we might have to limit to MAP_SHARED as well.
+         */
+        if (memory_region_get_fd(section->mr) < 0 &&
+            dev->vhost_ops->vhost_backend_no_private_memslots &&
+            dev->vhost_ops->vhost_backend_no_private_memslots(dev)) {
            trace_vhost_reject_section(mr->name, 2);
            return false;
        }
@ -568,7 +600,14 @@ static void vhost_commit(MemoryListener *listener)
                       dev->n_mem_sections * sizeof dev->mem->regions[0];
    dev->mem = g_realloc(dev->mem, regions_size);
    dev->mem->nregions = dev->n_mem_sections;
-    used_memslots = dev->mem->nregions;
+
+    if (dev->vhost_ops->vhost_backend_no_private_memslots &&
+        dev->vhost_ops->vhost_backend_no_private_memslots(dev)) {
+        used_shared_memslots = dev->mem->nregions;
+    } else {
+        used_memslots = dev->mem->nregions;
+    }
+
    for (i = 0; i < dev->n_mem_sections; i++) {
        struct vhost_memory_region *cur_vmr = dev->mem->regions + i;
        struct MemoryRegionSection *mrs = dev->mem_sections + i;
@ -668,7 +707,7 @@ static void vhost_region_add_section(struct vhost_dev *dev,
                                               mrs_size, mrs_host);
    }

-    if (dev->n_tmp_sections) {
+    if (dev->n_tmp_sections && !section->unmergeable) {
        /* Since we already have at least one section, lets see if
         * this extends it; since we're scanning in order, we only
         * have to look at the last one, and the FlatView that calls
@ -701,11 +740,7 @@ static void vhost_region_add_section(struct vhost_dev *dev,
            size_t offset = mrs_gpa - prev_gpa_start;

            if (prev_host_start + offset == mrs_host &&
-                section->mr == prev_sec->mr &&
-                (!dev->vhost_ops->vhost_backend_can_merge ||
-                 dev->vhost_ops->vhost_backend_can_merge(dev,
-                    mrs_host, mrs_size,
-                    prev_host_start, prev_size))) {
+                section->mr == prev_sec->mr && !prev_sec->unmergeable) {
                uint64_t max_end = MAX(prev_host_end, mrs_host + mrs_size);
                need_add = false;
                prev_sec->offset_within_address_space =
@ -1400,6 +1435,7 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
                   VhostBackendType backend_type, uint32_t busyloop_timeout,
                   Error **errp)
 {
+    unsigned int used, reserved, limit;
    uint64_t features;
    int i, r, n_initialized_vqs = 0;

@ -1426,6 +1462,19 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
        goto fail;
    }

+    limit = hdev->vhost_ops->vhost_backend_memslots_limit(hdev);
+    if (limit < MEMORY_DEVICES_SAFE_MAX_MEMSLOTS &&
+        memory_devices_memslot_auto_decision_active()) {
+        error_setg(errp, "some memory device (like virtio-mem)"
+            " decided how many memory slots to use based on the overall"
+            " number of memory slots; this vhost backend would further"
+            " restricts the overall number of memory slots");
+        error_append_hint(errp, "Try plugging this vhost backend before"
+            " plugging such memory devices.\n");
+        r = -EINVAL;
+        goto fail;
+    }
+
    for (i = 0; i < hdev->nvqs; ++i, ++n_initialized_vqs) {
        r = vhost_virtqueue_init(hdev, hdev->vqs + i, hdev->vq_index + i);
        if (r < 0) {
@ -1495,9 +1544,27 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
    memory_listener_register(&hdev->memory_listener, &address_space_memory);
    QLIST_INSERT_HEAD(&vhost_devices, hdev, entry);

-    if (used_memslots > hdev->vhost_ops->vhost_backend_memslots_limit(hdev)) {
-        error_setg(errp, "vhost backend memory slots limit is less"
-                   " than current number of present memory slots");
+    /*
+     * The listener we registered properly updated the corresponding counter.
+     * So we can trust that these values are accurate.
+     */
+    if (hdev->vhost_ops->vhost_backend_no_private_memslots &&
+        hdev->vhost_ops->vhost_backend_no_private_memslots(hdev)) {
+        used = used_shared_memslots;
+    } else {
+        used = used_memslots;
+    }
+    /*
+     * We assume that all reserved memslots actually require a real memslot
+     * in our vhost backend. This might not be true, for example, if the
+     * memslot would be ROM. If ever relevant, we can optimize for that --
+     * but we'll need additional information about the reservations.
+     */
+    reserved = memory_devices_get_reserved_memslots();
+    if (used + reserved > limit) {
+        error_setg(errp, "vhost backend memory slots limit (%d) is less"
+                   " than current number of used (%d) and reserved (%d)"
+                   " memory slots for memory devices.", limit, used, reserved);
        r = -EINVAL;
        goto fail_busyloop;
    }
--- a/hw/virtio/virtio-mem-pci.c
+++ b/hw/virtio/virtio-mem-pci.c
@ -48,6 +48,25 @@ static MemoryRegion *virtio_mem_pci_get_memory_region(MemoryDeviceState *md,
    return vmc->get_memory_region(vmem, errp);
 }

+static void virtio_mem_pci_decide_memslots(MemoryDeviceState *md,
+                                           unsigned int limit)
+{
+    VirtIOMEMPCI *pci_mem = VIRTIO_MEM_PCI(md);
+    VirtIOMEM *vmem = VIRTIO_MEM(&pci_mem->vdev);
+    VirtIOMEMClass *vmc = VIRTIO_MEM_GET_CLASS(vmem);
+
+    vmc->decide_memslots(vmem, limit);
+}
+
+static unsigned int virtio_mem_pci_get_memslots(MemoryDeviceState *md)
+{
+    VirtIOMEMPCI *pci_mem = VIRTIO_MEM_PCI(md);
+    VirtIOMEM *vmem = VIRTIO_MEM(&pci_mem->vdev);
+    VirtIOMEMClass *vmc = VIRTIO_MEM_GET_CLASS(vmem);
+
+    return vmc->get_memslots(vmem);
+}
+
 static uint64_t virtio_mem_pci_get_plugged_size(const MemoryDeviceState *md,
                                                Error **errp)
 {
@ -150,6 +169,8 @@ static void virtio_mem_pci_class_init(ObjectClass *klass, void *data)
    mdc->set_addr = virtio_mem_pci_set_addr;
    mdc->get_plugged_size = virtio_mem_pci_get_plugged_size;
    mdc->get_memory_region = virtio_mem_pci_get_memory_region;
+    mdc->decide_memslots = virtio_mem_pci_decide_memslots;
+    mdc->get_memslots = virtio_mem_pci_get_memslots;
    mdc->fill_device_info = virtio_mem_pci_fill_device_info;
    mdc->get_min_alignment = virtio_mem_pci_get_min_alignment;

--- a/hw/virtio/virtio-mem.c
+++ b/hw/virtio/virtio-mem.c
@ -66,6 +66,13 @@ static uint32_t virtio_mem_default_thp_size(void)
    return default_thp_size;
 }

+/*
+ * The minimum memslot size depends on this setting ("sane default"), the
+ * device block size, and the memory backend page size. The last (or single)
+ * memslot might be smaller than this constant.
+ */
+#define VIRTIO_MEM_MIN_MEMSLOT_SIZE (1 * GiB)
+
 /*
 * We want to have a reasonable default block size such that
 * 1. We avoid splitting THPs when unplugging memory, which degrades
@ -177,10 +184,10 @@ static bool virtio_mem_is_busy(void)
    return migration_in_incoming_postcopy() || !migration_is_idle();
 }

-typedef int (*virtio_mem_range_cb)(const VirtIOMEM *vmem, void *arg,
+typedef int (*virtio_mem_range_cb)(VirtIOMEM *vmem, void *arg,
                                   uint64_t offset, uint64_t size);

-static int virtio_mem_for_each_unplugged_range(const VirtIOMEM *vmem, void *arg,
+static int virtio_mem_for_each_unplugged_range(VirtIOMEM *vmem, void *arg,
                                               virtio_mem_range_cb cb)
 {
    unsigned long first_zero_bit, last_zero_bit;
@ -204,7 +211,7 @@ static int virtio_mem_for_each_unplugged_range(const VirtIOMEM *vmem, void *arg,
    return ret;
 }

-static int virtio_mem_for_each_plugged_range(const VirtIOMEM *vmem, void *arg,
+static int virtio_mem_for_each_plugged_range(VirtIOMEM *vmem, void *arg,
                                             virtio_mem_range_cb cb)
 {
    unsigned long first_bit, last_bit;
@ -483,6 +490,96 @@ static bool virtio_mem_valid_range(const VirtIOMEM *vmem, uint64_t gpa,
    return true;
 }

+static void virtio_mem_activate_memslot(VirtIOMEM *vmem, unsigned int idx)
+{
+    const uint64_t memslot_offset = idx * vmem->memslot_size;
+
+    assert(vmem->memslots);
+
+    /*
+     * Instead of enabling/disabling memslots, we add/remove them. This should
+     * make address space updates faster, because we don't have to loop over
+     * many disabled subregions.
+     */
+    if (memory_region_is_mapped(&vmem->memslots[idx])) {
+        return;
+    }
+    memory_region_add_subregion(vmem->mr, memslot_offset, &vmem->memslots[idx]);
+}
+
+static void virtio_mem_deactivate_memslot(VirtIOMEM *vmem, unsigned int idx)
+{
+    assert(vmem->memslots);
+
+    if (!memory_region_is_mapped(&vmem->memslots[idx])) {
+        return;
+    }
+    memory_region_del_subregion(vmem->mr, &vmem->memslots[idx]);
+}
+
+static void virtio_mem_activate_memslots_to_plug(VirtIOMEM *vmem,
+                                                 uint64_t offset, uint64_t size)
+{
+    const unsigned int start_idx = offset / vmem->memslot_size;
+    const unsigned int end_idx = (offset + size + vmem->memslot_size - 1) /
+                                 vmem->memslot_size;
+    unsigned int idx;
+
+    if (!vmem->dynamic_memslots) {
+        return;
+    }
+
+    /* Activate all involved memslots in a single transaction. */
+    memory_region_transaction_begin();
+    for (idx = start_idx; idx < end_idx; idx++) {
+        virtio_mem_activate_memslot(vmem, idx);
+    }
+    memory_region_transaction_commit();
+}
+
+static void virtio_mem_deactivate_unplugged_memslots(VirtIOMEM *vmem,
+                                                     uint64_t offset,
+                                                     uint64_t size)
+{
+    const uint64_t region_size = memory_region_size(&vmem->memdev->mr);
+    const unsigned int start_idx = offset / vmem->memslot_size;
+    const unsigned int end_idx = (offset + size + vmem->memslot_size - 1) /
+                                 vmem->memslot_size;
+    unsigned int idx;
+
+    if (!vmem->dynamic_memslots) {
+        return;
+    }
+
+    /* Deactivate all memslots with unplugged blocks in a single transaction. */
+    memory_region_transaction_begin();
+    for (idx = start_idx; idx < end_idx; idx++) {
+        const uint64_t memslot_offset = idx * vmem->memslot_size;
+        uint64_t memslot_size = vmem->memslot_size;
+
+        /* The size of the last memslot might be smaller. */
+        if (idx == vmem->nb_memslots - 1) {
+            memslot_size = region_size - memslot_offset;
+        }
+
+        /*
+         * Partially covered memslots might still have some blocks plugged and
+         * have to remain active if that's the case.
+         */
+        if (offset > memslot_offset ||
+            offset + size < memslot_offset + memslot_size) {
+            const uint64_t gpa = vmem->addr + memslot_offset;
+
+            if (!virtio_mem_is_range_unplugged(vmem, gpa, memslot_size)) {
+                continue;
+            }
+        }
+
+        virtio_mem_deactivate_memslot(vmem, idx);
+    }
+    memory_region_transaction_commit();
+}
+
 static int virtio_mem_set_block_state(VirtIOMEM *vmem, uint64_t start_gpa,
                                      uint64_t size, bool plug)
 {
@ -500,6 +597,8 @@ static int virtio_mem_set_block_state(VirtIOMEM *vmem, uint64_t start_gpa,
        }
        virtio_mem_notify_unplug(vmem, offset, size);
        virtio_mem_set_range_unplugged(vmem, start_gpa, size);
+        /* Deactivate completely unplugged memslots after updating the state. */
+        virtio_mem_deactivate_unplugged_memslots(vmem, offset, size);
        return 0;
    }

@ -527,7 +626,20 @@ static int virtio_mem_set_block_state(VirtIOMEM *vmem, uint64_t start_gpa,
    }

    if (!ret) {
+        /*
+         * Activate before notifying and rollback in case of any errors.
+         *
+         * When activating a yet inactive memslot, memory notifiers will get
+         * notified about the added memory region and can register with the
+         * RamDiscardManager; this will traverse all plugged blocks and skip the
+         * blocks we are plugging here. The following notification will inform
+         * registered listeners about the blocks we're plugging.
+         */
+        virtio_mem_activate_memslots_to_plug(vmem, offset, size);
        ret = virtio_mem_notify_plug(vmem, offset, size);
+        if (ret) {
+            virtio_mem_deactivate_unplugged_memslots(vmem, offset, size);
+        }
    }
    if (ret) {
        /* Could be preallocation or a notifier populated memory. */
@ -620,6 +732,7 @@ static void virtio_mem_resize_usable_region(VirtIOMEM *vmem,

 static int virtio_mem_unplug_all(VirtIOMEM *vmem)
 {
+    const uint64_t region_size = memory_region_size(&vmem->memdev->mr);
    RAMBlock *rb = vmem->memdev->mr.ram_block;

    if (vmem->size) {
@ -634,6 +747,9 @@ static int virtio_mem_unplug_all(VirtIOMEM *vmem)
        bitmap_clear(vmem->bitmap, 0, vmem->bitmap_size);
        vmem->size = 0;
        notifier_list_notify(&vmem->size_change_notifiers, &vmem->size);
+
+        /* Deactivate all memslots after updating the state. */
+        virtio_mem_deactivate_unplugged_memslots(vmem, 0, region_size);
    }

    trace_virtio_mem_unplugged_all();
@ -790,6 +906,49 @@ static void virtio_mem_system_reset(void *opaque)
    virtio_mem_unplug_all(vmem);
 }

+static void virtio_mem_prepare_mr(VirtIOMEM *vmem)
+{
+    const uint64_t region_size = memory_region_size(&vmem->memdev->mr);
+
+    assert(!vmem->mr && vmem->dynamic_memslots);
+    vmem->mr = g_new0(MemoryRegion, 1);
+    memory_region_init(vmem->mr, OBJECT(vmem), "virtio-mem",
+                       region_size);
+    vmem->mr->align = memory_region_get_alignment(&vmem->memdev->mr);
+}
+
+static void virtio_mem_prepare_memslots(VirtIOMEM *vmem)
+{
+    const uint64_t region_size = memory_region_size(&vmem->memdev->mr);
+    unsigned int idx;
+
+    g_assert(!vmem->memslots && vmem->nb_memslots && vmem->dynamic_memslots);
+    vmem->memslots = g_new0(MemoryRegion, vmem->nb_memslots);
+
+    /* Initialize our memslots, but don't map them yet. */
+    for (idx = 0; idx < vmem->nb_memslots; idx++) {
+        const uint64_t memslot_offset = idx * vmem->memslot_size;
+        uint64_t memslot_size = vmem->memslot_size;
+        char name[20];
+
+        /* The size of the last memslot might be smaller. */
+        if (idx == vmem->nb_memslots - 1) {
+            memslot_size = region_size - memslot_offset;
+        }
+
+        snprintf(name, sizeof(name), "memslot-%u", idx);
+        memory_region_init_alias(&vmem->memslots[idx], OBJECT(vmem), name,
+                                 &vmem->memdev->mr, memslot_offset,
+                                 memslot_size);
+        /*
+         * We want to be able to atomically and efficiently activate/deactivate
+         * individual memslots without affecting adjacent memslots in memory
+         * notifiers.
+         */
+        memory_region_set_unmergeable(&vmem->memslots[idx], true);
+    }
+}
+
 static void virtio_mem_device_realize(DeviceState *dev, Error **errp)
 {
    MachineState *ms = MACHINE(qdev_get_machine());
@ -861,6 +1020,14 @@ static void virtio_mem_device_realize(DeviceState *dev, Error **errp)
    vmem->unplugged_inaccessible = ON_OFF_AUTO_ON;
 #endif /* VIRTIO_MEM_HAS_LEGACY_GUESTS */

+    if (vmem->dynamic_memslots &&
+        vmem->unplugged_inaccessible != ON_OFF_AUTO_ON) {
+        error_setg(errp, "'%s' property set to 'on' requires '%s' to be 'on'",
+                   VIRTIO_MEM_DYNAMIC_MEMSLOTS_PROP,
+                   VIRTIO_MEM_UNPLUGGED_INACCESSIBLE_PROP);
+        return;
+    }
+
    /*
     * If the block size wasn't configured by the user, use a sane default. This
     * allows using hugetlbfs backends of any page size without manual
@ -930,6 +1097,25 @@ static void virtio_mem_device_realize(DeviceState *dev, Error **errp)
    virtio_init(vdev, VIRTIO_ID_MEM, sizeof(struct virtio_mem_config));
    vmem->vq = virtio_add_queue(vdev, 128, virtio_mem_handle_request);

+    /*
+     * With "dynamic-memslots=off" (old behavior) we always map the whole
+     * RAM memory region directly.
+     */
+    if (vmem->dynamic_memslots) {
+        if (!vmem->mr) {
+            virtio_mem_prepare_mr(vmem);
+        }
+        if (vmem->nb_memslots <= 1) {
+            vmem->nb_memslots = 1;
+            vmem->memslot_size = memory_region_size(&vmem->memdev->mr);
+        }
+        if (!vmem->memslots) {
+            virtio_mem_prepare_memslots(vmem);
+        }
+    } else {
+        assert(!vmem->mr && !vmem->nb_memslots && !vmem->memslots);
+    }
+
    host_memory_backend_set_mapped(vmem->memdev, true);
    vmstate_register_ram(&vmem->memdev->mr, DEVICE(vmem));
    if (vmem->early_migration) {
@ -969,7 +1155,7 @@ static void virtio_mem_device_unrealize(DeviceState *dev)
    ram_block_coordinated_discard_require(false);
 }

-static int virtio_mem_discard_range_cb(const VirtIOMEM *vmem, void *arg,
+static int virtio_mem_discard_range_cb(VirtIOMEM *vmem, void *arg,
                                       uint64_t offset, uint64_t size)
 {
    RAMBlock *rb = vmem->memdev->mr.ram_block;
@ -984,12 +1170,31 @@ static int virtio_mem_restore_unplugged(VirtIOMEM *vmem)
                                               virtio_mem_discard_range_cb);
 }

-static int virtio_mem_post_load(void *opaque, int version_id)
+static int virtio_mem_activate_memslot_range_cb(VirtIOMEM *vmem, void *arg,
+                                                uint64_t offset, uint64_t size)
+{
+    virtio_mem_activate_memslots_to_plug(vmem, offset, size);
+    return 0;
+}
+
+static int virtio_mem_post_load_bitmap(VirtIOMEM *vmem)
 {
-    VirtIOMEM *vmem = VIRTIO_MEM(opaque);
    RamDiscardListener *rdl;
    int ret;

+    /*
+     * We restored the bitmap and updated the requested size; activate all
+     * memslots (so listeners register) before notifying about plugged blocks.
+     */
+    if (vmem->dynamic_memslots) {
+        /*
+         * We don't expect any active memslots at this point to deactivate: no
+         * memory was plugged on the migration destination.
+         */
+        virtio_mem_for_each_plugged_range(vmem, NULL,
+                                          virtio_mem_activate_memslot_range_cb);
+    }
+
    /*
     * We started out with all memory discarded and our memory region is mapped
     * into an address space. Replay, now that we updated the bitmap.
@ -1001,6 +1206,20 @@ static int virtio_mem_post_load(void *opaque, int version_id)
            return ret;
        }
    }
+    return 0;
+}
+
+static int virtio_mem_post_load(void *opaque, int version_id)
+{
+    VirtIOMEM *vmem = VIRTIO_MEM(opaque);
+    int ret;
+
+    if (!vmem->early_migration) {
+        ret = virtio_mem_post_load_bitmap(vmem);
+        if (ret) {
+            return ret;
+        }
+    }

    /*
     * If shared RAM is migrated using the file content and not using QEMU,
@ -1021,7 +1240,7 @@ static int virtio_mem_post_load(void *opaque, int version_id)
    return virtio_mem_restore_unplugged(vmem);
 }

-static int virtio_mem_prealloc_range_cb(const VirtIOMEM *vmem, void *arg,
+static int virtio_mem_prealloc_range_cb(VirtIOMEM *vmem, void *arg,
                                        uint64_t offset, uint64_t size)
 {
    void *area = memory_region_get_ram_ptr(&vmem->memdev->mr) + offset;
@ -1043,7 +1262,7 @@ static int virtio_mem_post_load_early(void *opaque, int version_id)
    int ret;

    if (!vmem->prealloc) {
-        return 0;
+        goto post_load_bitmap;
    }

    /*
@ -1051,7 +1270,7 @@ static int virtio_mem_post_load_early(void *opaque, int version_id)
     * don't mess with preallocation and postcopy.
     */
    if (migrate_ram_is_ignored(rb)) {
-        return 0;
+        goto post_load_bitmap;
    }

    /*
@ -1084,7 +1303,10 @@ static int virtio_mem_post_load_early(void *opaque, int version_id)
            return -EBUSY;
        }
    }
-    return 0;
+
+post_load_bitmap:
+    /* Finally, update any other state to be consistent with the new bitmap. */
+    return virtio_mem_post_load_bitmap(vmem);
 }

 typedef struct VirtIOMEMMigSanityChecks {
@ -1235,11 +1457,79 @@ static MemoryRegion *virtio_mem_get_memory_region(VirtIOMEM *vmem, Error **errp)
    if (!vmem->memdev) {
        error_setg(errp, "'%s' property must be set", VIRTIO_MEM_MEMDEV_PROP);
        return NULL;
+    } else if (vmem->dynamic_memslots) {
+        if (!vmem->mr) {
+            virtio_mem_prepare_mr(vmem);
+        }
+        return vmem->mr;
    }

    return &vmem->memdev->mr;
 }

+static void virtio_mem_decide_memslots(VirtIOMEM *vmem, unsigned int limit)
+{
+    uint64_t region_size, memslot_size, min_memslot_size;
+    unsigned int memslots;
+    RAMBlock *rb;
+
+    if (!vmem->dynamic_memslots) {
+        return;
+    }
+
+    /* We're called exactly once, before realizing the device. */
+    assert(!vmem->nb_memslots);
+
+    /* If realizing the device will fail, just assume a single memslot. */
+    if (limit <= 1 || !vmem->memdev || !vmem->memdev->mr.ram_block) {
+        vmem->nb_memslots = 1;
+        return;
+    }
+
+    rb = vmem->memdev->mr.ram_block;
+    region_size = memory_region_size(&vmem->memdev->mr);
+
+    /*
+     * Determine the default block size now, to determine the minimum memslot
+     * size. We want the minimum slot size to be at least the device block size.
+     */
+    if (!vmem->block_size) {
+        vmem->block_size = virtio_mem_default_block_size(rb);
+    }
+    /* If realizing the device will fail, just assume a single memslot. */
+    if (vmem->block_size < qemu_ram_pagesize(rb) ||
+        !QEMU_IS_ALIGNED(region_size, vmem->block_size)) {
+        vmem->nb_memslots = 1;
+        return;
+    }
+
+    /*
+     * All memslots except the last one have a reasonable minimum size, and
+     * and all memslot sizes are aligned to the device block size.
+     */
+    memslot_size = QEMU_ALIGN_UP(region_size / limit, vmem->block_size);
+    min_memslot_size = MAX(vmem->block_size, VIRTIO_MEM_MIN_MEMSLOT_SIZE);
+    memslot_size = MAX(memslot_size, min_memslot_size);
+
+    memslots = QEMU_ALIGN_UP(region_size, memslot_size) / memslot_size;
+    if (memslots != 1) {
+        vmem->memslot_size = memslot_size;
+    }
+    vmem->nb_memslots = memslots;
+}
+
+static unsigned int virtio_mem_get_memslots(VirtIOMEM *vmem)
+{
+    if (!vmem->dynamic_memslots) {
+        /* Exactly one static RAM memory region. */
+        return 1;
+    }
+
+    /* We're called after instructed to make a decision. */
+    g_assert(vmem->nb_memslots);
+    return vmem->nb_memslots;
+}
+
 static void virtio_mem_add_size_change_notifier(VirtIOMEM *vmem,
                                                Notifier *notifier)
 {
@ -1377,6 +1667,21 @@ static void virtio_mem_instance_init(Object *obj)
                        NULL, NULL);
 }

+static void virtio_mem_instance_finalize(Object *obj)
+{
+    VirtIOMEM *vmem = VIRTIO_MEM(obj);
+
+    /*
+     * Note: the core already dropped the references on all memory regions
+     * (it's passed as the owner to memory_region_init_*()) and finalized
+     * these objects. We can simply free the memory.
+     */
+    g_free(vmem->memslots);
+    vmem->memslots = NULL;
+    g_free(vmem->mr);
+    vmem->mr = NULL;
+}
+
 static Property virtio_mem_properties[] = {
    DEFINE_PROP_UINT64(VIRTIO_MEM_ADDR_PROP, VirtIOMEM, addr, 0),
    DEFINE_PROP_UINT32(VIRTIO_MEM_NODE_PROP, VirtIOMEM, node, 0),
@ -1389,6 +1694,8 @@ static Property virtio_mem_properties[] = {
 #endif
    DEFINE_PROP_BOOL(VIRTIO_MEM_EARLY_MIGRATION_PROP, VirtIOMEM,
                     early_migration, true),
+    DEFINE_PROP_BOOL(VIRTIO_MEM_DYNAMIC_MEMSLOTS_PROP, VirtIOMEM,
+                     dynamic_memslots, false),
    DEFINE_PROP_END_OF_LIST(),
 };

@ -1556,6 +1863,8 @@ static void virtio_mem_class_init(ObjectClass *klass, void *data)

    vmc->fill_device_info = virtio_mem_fill_device_info;
    vmc->get_memory_region = virtio_mem_get_memory_region;
+    vmc->decide_memslots = virtio_mem_decide_memslots;
+    vmc->get_memslots = virtio_mem_get_memslots;
    vmc->add_size_change_notifier = virtio_mem_add_size_change_notifier;
    vmc->remove_size_change_notifier = virtio_mem_remove_size_change_notifier;
    vmc->unplug_request_check = virtio_mem_unplug_request_check;
@ -1573,6 +1882,7 @@ static const TypeInfo virtio_mem_info = {
    .parent = TYPE_VIRTIO_DEVICE,
    .instance_size = sizeof(VirtIOMEM),
    .instance_init = virtio_mem_instance_init,
+    .instance_finalize = virtio_mem_instance_finalize,
    .class_init = virtio_mem_class_init,
    .class_size = sizeof(VirtIOMEMClass),
    .interfaces = (InterfaceInfo[]) {
--- a/include/exec/cpu-common.h
+++ b/include/exec/cpu-common.h
@ -83,6 +83,21 @@ void qemu_ram_remap(ram_addr_t addr, ram_addr_t length);
 ram_addr_t qemu_ram_addr_from_host(void *ptr);
 ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr);
 RAMBlock *qemu_ram_block_by_name(const char *name);
+
+/*
+ * Translates a host ptr back to a RAMBlock and an offset in that RAMBlock.
+ *
+ * @ptr: The host pointer to translate.
+ * @round_offset: Whether to round the result offset down to a target page
+ * @offset: Will be set to the offset within the returned RAMBlock.
+ *
+ * Returns: RAMBlock (or NULL if not found)
+ *
+ * By the time this function returns, the returned pointer is not protected
+ * by RCU anymore.  If the caller is not within an RCU critical section and
+ * does not hold the iothread lock, it must have other means of protecting the
+ * pointer, such as a reference to the memory region that owns the RAMBlock.
+ */
 RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
                                   ram_addr_t *offset);
 ram_addr_t qemu_ram_block_host_offset(RAMBlock *rb, void *host);
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@ -95,6 +95,7 @@ struct ReservedRegion {
 *     relative to the region's address space
 * @readonly: writes to this section are ignored
 * @nonvolatile: this section is non-volatile
+ * @unmergeable: this section should not get merged with adjacent sections
 */
 struct MemoryRegionSection {
    Int128 size;
@ -104,6 +105,7 @@ struct MemoryRegionSection {
    hwaddr offset_within_address_space;
    bool readonly;
    bool nonvolatile;
+    bool unmergeable;
 };

 typedef struct IOMMUTLBEntry IOMMUTLBEntry;
@ -599,8 +601,9 @@ typedef void (*ReplayRamDiscard)(MemoryRegionSection *section, void *opaque);
 * populated (consuming memory), to be used/accessed by the VM.
 *
 * A #RamDiscardManager can only be set for a RAM #MemoryRegion while the
- * #MemoryRegion isn't mapped yet; it cannot change while the #MemoryRegion is
- * mapped.
+ * #MemoryRegion isn't mapped into an address space yet (either directly
+ * or via an alias); it cannot change while the #MemoryRegion is
+ * mapped into an address space.
 *
 * The #RamDiscardManager is intended to be used by technologies that are
 * incompatible with discarding of RAM (e.g., VFIO, which may pin all
@ -772,6 +775,7 @@ struct MemoryRegion {
    bool nonvolatile;
    bool rom_device;
    bool flush_coalesced_mmio;
+    bool unmergeable;
    uint8_t dirty_log_mask;
    bool is_iommu;
    RAMBlock *ram_block;
@ -2350,6 +2354,25 @@ void memory_region_set_size(MemoryRegion *mr, uint64_t size);
 void memory_region_set_alias_offset(MemoryRegion *mr,
                                    hwaddr offset);

+/*
+ * memory_region_set_unmergeable: Set a memory region unmergeable
+ *
+ * Mark a memory region unmergeable, resulting in the memory region (or
+ * everything contained in a memory region container) not getting merged when
+ * simplifying the address space and notifying memory listeners. Consequently,
+ * memory listeners will never get notified about ranges that are larger than
+ * the original memory regions.
+ *
+ * This is primarily useful when multiple aliases to a RAM memory region are
+ * mapped into a memory region container, and updates (e.g., enable/disable or
+ * map/unmap) of individual memory region aliases are not supposed to affect
+ * other memory regions in the same container.
+ *
+ * @mr: the #MemoryRegion to be updated
+ * @unmergeable: whether to mark the #MemoryRegion unmergeable
+ */
+void memory_region_set_unmergeable(MemoryRegion *mr, bool unmergeable);
+
 /**
 * memory_region_present: checks if an address relative to a @container
 * translates into #MemoryRegion within @container
--- a/include/hw/boards.h
+++ b/include/hw/boards.h
@ -297,15 +297,27 @@ struct MachineClass {
 * DeviceMemoryState:
 * @base: address in guest physical address space where the memory
 * address space for memory devices starts
- * @mr: address space container for memory devices
+ * @mr: memory region container for memory devices
+ * @as: address space for memory devices
+ * @listener: memory listener used to track used memslots in the address space
 * @dimm_size: the sum of plugged DIMMs' sizes
 * @used_region_size: the part of @mr already used by memory devices
+ * @required_memslots: the number of memslots required by memory devices
+ * @used_memslots: the number of memslots currently used by memory devices
+ * @memslot_auto_decision_active: whether any plugged memory device
+ *                                automatically decided to use more than
+ *                                one memslot
 */
 typedef struct DeviceMemoryState {
    hwaddr base;
    MemoryRegion mr;
+    AddressSpace as;
+    MemoryListener listener;
    uint64_t dimm_size;
    uint64_t used_region_size;
+    unsigned int required_memslots;
+    unsigned int used_memslots;
+    unsigned int memslot_auto_decision_active;
 } DeviceMemoryState;

 /**
--- a/include/hw/mem/memory-device.h
+++ b/include/hw/mem/memory-device.h
@ -14,6 +14,7 @@
 #define MEMORY_DEVICE_H

 #include "hw/qdev-core.h"
+#include "qemu/typedefs.h"
 #include "qapi/qapi-types-machine.h"
 #include "qom/object.h"

@ -41,6 +42,17 @@ typedef struct MemoryDeviceState MemoryDeviceState;
 * successive memory regions are used, a covering memory region has to
 * be provided. Scattered memory regions are not supported for single
 * devices.
+ *
+ * The device memory region returned via @get_memory_region may either be a
+ * single RAM memory region or a memory region container with subregions
+ * that are RAM memory regions or aliases to RAM memory regions. Other
+ * memory regions or subregions are not supported.
+ *
+ * If the device memory region returned via @get_memory_region is a
+ * memory region container, it's supported to dynamically (un)map subregions
+ * as long as the number of memslots returned by @get_memslots() won't
+ * be exceeded and as long as all memory regions are of the same kind (e.g.,
+ * all RAM or all ROM).
 */
 struct MemoryDeviceClass {
    /* private */
@ -88,6 +100,28 @@ struct MemoryDeviceClass {
     */
    MemoryRegion *(*get_memory_region)(MemoryDeviceState *md, Error **errp);

+    /*
+     * Optional: Instruct the memory device to decide how many memory slots
+     * it requires, not exceeding the given limit.
+     *
+     * Called exactly once when pre-plugging the memory device, before
+     * querying the number of memslots using @get_memslots the first time.
+     */
+    void (*decide_memslots)(MemoryDeviceState *md, unsigned int limit);
+
+    /*
+     * Optional for memory devices that require only a single memslot,
+     * required for all other memory devices: Return the number of memslots
+     * (distinct RAM memory regions in the device memory region) that are
+     * required by the device.
+     *
+     * If this function is not implemented, the assumption is "1".
+     *
+     * Called when (un)plugging the memory device, to check if the requirements
+     * can be satisfied, and to do proper accounting.
+     */
+    unsigned int (*get_memslots)(MemoryDeviceState *md);
+
    /*
     * Optional: Return the desired minimum alignment of the device in guest
     * physical address space. The final alignment is computed based on this
@ -105,8 +139,31 @@ struct MemoryDeviceClass {
                             MemoryDeviceInfo *info);
 };

+/*
+ * Traditionally, KVM/vhost in many setups supported 509 memslots, whereby
+ * 253 memslots were "reserved" for boot memory and other devices (such
+ * as PCI BARs, which can get mapped dynamically) and 256 memslots were
+ * dedicated for DIMMs. These magic numbers worked reliably in the past.
+ *
+ * Further, using many memslots can negatively affect performance, so setting
+ * the soft-limit of memslots used by memory devices to the traditional
+ * DIMM limit of 256 sounds reasonable.
+ *
+ * If we have less than 509 memslots, we will instruct memory devices that
+ * support automatically deciding how many memslots to use to only use a single
+ * one.
+ *
+ * Hotplugging vhost devices with at least 509 memslots is not expected to
+ * cause problems, not even when devices automatically decided how many memslots
+ * to use.
+ */
+#define MEMORY_DEVICES_SOFT_MEMSLOT_LIMIT 256
+#define MEMORY_DEVICES_SAFE_MAX_MEMSLOTS 509
+
 MemoryDeviceInfoList *qmp_memory_device_list(void);
 uint64_t get_plugged_memory_size(void);
+unsigned int memory_devices_get_reserved_memslots(void);
+bool memory_devices_memslot_auto_decision_active(void);
 void memory_device_pre_plug(MemoryDeviceState *md, MachineState *ms,
                            const uint64_t *legacy_align, Error **errp);
 void memory_device_plug(MemoryDeviceState *md, MachineState *ms);
--- a/include/hw/virtio/vhost-backend.h
+++ b/include/hw/virtio/vhost-backend.h
@ -86,9 +86,6 @@ typedef int (*vhost_set_vring_enable_op)(struct vhost_dev *dev,
 typedef bool (*vhost_requires_shm_log_op)(struct vhost_dev *dev);
 typedef int (*vhost_migration_done_op)(struct vhost_dev *dev,
                                       char *mac_addr);
-typedef bool (*vhost_backend_can_merge_op)(struct vhost_dev *dev,
-                                           uint64_t start1, uint64_t size1,
-                                           uint64_t start2, uint64_t size2);
 typedef int (*vhost_vsock_set_guest_cid_op)(struct vhost_dev *dev,
                                            uint64_t guest_cid);
 typedef int (*vhost_vsock_set_running_op)(struct vhost_dev *dev, int start);
@ -108,8 +105,7 @@ typedef int (*vhost_crypto_create_session_op)(struct vhost_dev *dev,
 typedef int (*vhost_crypto_close_session_op)(struct vhost_dev *dev,
                                             uint64_t session_id);

-typedef bool (*vhost_backend_mem_section_filter_op)(struct vhost_dev *dev,
-                                                MemoryRegionSection *section);
+typedef bool (*vhost_backend_no_private_memslots_op)(struct vhost_dev *dev);

 typedef int (*vhost_get_inflight_fd_op)(struct vhost_dev *dev,
                                        uint16_t queue_size,
@ -138,6 +134,7 @@ typedef struct VhostOps {
    vhost_backend_init vhost_backend_init;
    vhost_backend_cleanup vhost_backend_cleanup;
    vhost_backend_memslots_limit vhost_backend_memslots_limit;
+    vhost_backend_no_private_memslots_op vhost_backend_no_private_memslots;
    vhost_net_set_backend_op vhost_net_set_backend;
    vhost_net_set_mtu_op vhost_net_set_mtu;
    vhost_scsi_set_endpoint_op vhost_scsi_set_endpoint;
@ -163,7 +160,6 @@ typedef struct VhostOps {
    vhost_set_vring_enable_op vhost_set_vring_enable;
    vhost_requires_shm_log_op vhost_requires_shm_log;
    vhost_migration_done_op vhost_migration_done;
-    vhost_backend_can_merge_op vhost_backend_can_merge;
    vhost_vsock_set_guest_cid_op vhost_vsock_set_guest_cid;
    vhost_vsock_set_running_op vhost_vsock_set_running;
    vhost_set_iotlb_callback_op vhost_set_iotlb_callback;
@ -172,7 +168,6 @@ typedef struct VhostOps {
    vhost_set_config_op vhost_set_config;
    vhost_crypto_create_session_op vhost_crypto_create_session;
    vhost_crypto_close_session_op vhost_crypto_close_session;
-    vhost_backend_mem_section_filter_op vhost_backend_mem_section_filter;
    vhost_get_inflight_fd_op vhost_get_inflight_fd;
    vhost_set_inflight_fd_op vhost_set_inflight_fd;
    vhost_dev_start_op vhost_dev_start;
--- a/include/hw/virtio/vhost.h
+++ b/include/hw/virtio/vhost.h
@ -315,7 +315,8 @@ uint64_t vhost_get_features(struct vhost_dev *hdev, const int *feature_bits,
 */
 void vhost_ack_features(struct vhost_dev *hdev, const int *feature_bits,
                        uint64_t features);
-bool vhost_has_free_slot(void);
+unsigned int vhost_get_max_memslots(void);
+unsigned int vhost_get_free_memslots(void);

 int vhost_net_set_backend(struct vhost_dev *hdev,
                          struct vhost_vring_file *file);
--- a/include/hw/virtio/virtio-mem.h
+++ b/include/hw/virtio/virtio-mem.h
@ -33,6 +33,7 @@ OBJECT_DECLARE_TYPE(VirtIOMEM, VirtIOMEMClass,
 #define VIRTIO_MEM_UNPLUGGED_INACCESSIBLE_PROP "unplugged-inaccessible"
 #define VIRTIO_MEM_EARLY_MIGRATION_PROP "x-early-migration"
 #define VIRTIO_MEM_PREALLOC_PROP "prealloc"
+#define VIRTIO_MEM_DYNAMIC_MEMSLOTS_PROP "dynamic-memslots"

 struct VirtIOMEM {
    VirtIODevice parent_obj;
@ -44,7 +45,28 @@ struct VirtIOMEM {
    int32_t bitmap_size;
    unsigned long *bitmap;

-    /* assigned memory backend and memory region */
+    /*
+     * With "dynamic-memslots=on": Device memory region in which we dynamically
+     * map the memslots.
+     */
+    MemoryRegion *mr;
+
+    /*
+     * With "dynamic-memslots=on": The individual memslots (aliases into the
+     * memory backend).
+     */
+    MemoryRegion *memslots;
+
+    /* With "dynamic-memslots=on": The total number of memslots. */
+    uint16_t nb_memslots;
+
+    /*
+     * With "dynamic-memslots=on": Size of one memslot (the size of the
+     * last one can differ).
+     */
+    uint64_t memslot_size;
+
+    /* Assigned memory backend with the RAM memory region. */
    HostMemoryBackend *memdev;

    /* NUMA node */
@ -82,6 +104,12 @@ struct VirtIOMEM {
     */
    bool early_migration;

+    /*
+     * Whether we dynamically map (multiple, if possible) memslots instead of
+     * statically mapping the whole RAM memory region.
+     */
+    bool dynamic_memslots;
+
    /* notifiers to notify when "size" changes */
    NotifierList size_change_notifiers;

@ -96,6 +124,8 @@ struct VirtIOMEMClass {
    /* public */
    void (*fill_device_info)(const VirtIOMEM *vmen, VirtioMEMDeviceInfo *vi);
    MemoryRegion *(*get_memory_region)(VirtIOMEM *vmem, Error **errp);
+    void (*decide_memslots)(VirtIOMEM *vmem, unsigned int limit);
+    unsigned int (*get_memslots)(VirtIOMEM *vmem);
    void (*add_size_change_notifier)(VirtIOMEM *vmem, Notifier *notifier);
    void (*remove_size_change_notifier)(VirtIOMEM *vmem, Notifier *notifier);
    void (*unplug_request_check)(VirtIOMEM *vmem, Error **errp);
--- a/include/sysemu/kvm.h
+++ b/include/sysemu/kvm.h
@ -215,7 +215,8 @@ typedef struct KVMRouteChange {

 /* external API */

-bool kvm_has_free_slot(MachineState *ms);
+unsigned int kvm_get_max_memslots(void);
+unsigned int kvm_get_free_memslots(void);
 bool kvm_has_sync_mmu(void);
 int kvm_has_vcpu_events(void);
 int kvm_has_robust_singlestep(void);
@ -552,7 +553,6 @@ int kvm_set_one_reg(CPUState *cs, uint64_t id, void *source);
 */
 int kvm_get_one_reg(CPUState *cs, uint64_t id, void *target);
 struct ppc_radix_page_info *kvm_get_radix_page_info(void);
-int kvm_get_max_memslots(void);

 /* Notify resamplefd for EOI of specific interrupts. */
 void kvm_resample_fd_notify(int gsi);
--- a/include/sysemu/kvm_int.h
+++ b/include/sysemu/kvm_int.h
@ -40,6 +40,7 @@ typedef struct KVMMemoryUpdate {
 typedef struct KVMMemoryListener {
    MemoryListener listener;
    KVMSlot *slots;
+    unsigned int nr_used_slots;
    int as_id;
    QSIMPLEQ_HEAD(, KVMMemoryUpdate) transaction_add;
    QSIMPLEQ_HEAD(, KVMMemoryUpdate) transaction_del;
--- a/stubs/qmp_memory_device.c
+++ b/stubs/qmp_memory_device.c
@ -10,3 +10,13 @@ uint64_t get_plugged_memory_size(void)
 {
    return (uint64_t)-1;
 }
+
+unsigned int memory_devices_get_reserved_memslots(void)
+{
+    return 0;
+}
+
+bool memory_devices_memslot_auto_decision_active(void)
+{
+    return false;
+}
--- a/stubs/meson.build
+++ b/stubs/meson.build
@ -32,7 +32,7 @@ stub_ss.add(files('monitor.c'))
 stub_ss.add(files('monitor-core.c'))
 stub_ss.add(files('physmem.c'))
 stub_ss.add(files('qemu-timer-notify-cb.c'))
-stub_ss.add(files('qmp_memory_device.c'))
+stub_ss.add(files('memory_device.c'))
 stub_ss.add(files('qmp-command-available.c'))
 stub_ss.add(files('qmp-quit.c'))
 stub_ss.add(files('qtest.c'))
--- a/system/memory.c
+++ b/system/memory.c
@ -224,6 +224,7 @@ struct FlatRange {
    bool romd_mode;
    bool readonly;
    bool nonvolatile;
+    bool unmergeable;
 };

 #define FOR_EACH_FLAT_RANGE(var, view)          \
@ -240,6 +241,7 @@ section_from_flat_range(FlatRange *fr, FlatView *fv)
        .offset_within_address_space = int128_get64(fr->addr.start),
        .readonly = fr->readonly,
        .nonvolatile = fr->nonvolatile,
+        .unmergeable = fr->unmergeable,
    };
 }

@ -250,7 +252,8 @@ static bool flatrange_equal(FlatRange *a, FlatRange *b)
        && a->offset_in_region == b->offset_in_region
        && a->romd_mode == b->romd_mode
        && a->readonly == b->readonly
-        && a->nonvolatile == b->nonvolatile;
+        && a->nonvolatile == b->nonvolatile
+        && a->unmergeable == b->unmergeable;
 }

 static FlatView *flatview_new(MemoryRegion *mr_root)
@ -323,7 +326,8 @@ static bool can_merge(FlatRange *r1, FlatRange *r2)
        && r1->dirty_log_mask == r2->dirty_log_mask
        && r1->romd_mode == r2->romd_mode
        && r1->readonly == r2->readonly
-        && r1->nonvolatile == r2->nonvolatile;
+        && r1->nonvolatile == r2->nonvolatile
+        && !r1->unmergeable && !r2->unmergeable;
 }

 /* Attempt to simplify a view by merging adjacent ranges */
@ -599,7 +603,8 @@ static void render_memory_region(FlatView *view,
                                 Int128 base,
                                 AddrRange clip,
                                 bool readonly,
-                                 bool nonvolatile)
+                                 bool nonvolatile,
+                                 bool unmergeable)
 {
    MemoryRegion *subregion;
    unsigned i;
@ -616,6 +621,7 @@ static void render_memory_region(FlatView *view,
    int128_addto(&base, int128_make64(mr->addr));
    readonly |= mr->readonly;
    nonvolatile |= mr->nonvolatile;
+    unmergeable |= mr->unmergeable;

    tmp = addrrange_make(base, mr->size);

@ -629,14 +635,14 @@ static void render_memory_region(FlatView *view,
        int128_subfrom(&base, int128_make64(mr->alias->addr));
        int128_subfrom(&base, int128_make64(mr->alias_offset));
        render_memory_region(view, mr->alias, base, clip,
-                             readonly, nonvolatile);
+                             readonly, nonvolatile, unmergeable);
        return;
    }

    /* Render subregions in priority order. */
    QTAILQ_FOREACH(subregion, &mr->subregions, subregions_link) {
        render_memory_region(view, subregion, base, clip,
-                             readonly, nonvolatile);
+                             readonly, nonvolatile, unmergeable);
    }

    if (!mr->terminates) {
@ -652,6 +658,7 @@ static void render_memory_region(FlatView *view,
    fr.romd_mode = mr->romd_mode;
    fr.readonly = readonly;
    fr.nonvolatile = nonvolatile;
+    fr.unmergeable = unmergeable;

    /* Render the region itself into any gaps left by the current view. */
    for (i = 0; i < view->nr && int128_nz(remain); ++i) {
@ -753,7 +760,7 @@ static FlatView *generate_memory_topology(MemoryRegion *mr)
    if (mr) {
        render_memory_region(view, mr, int128_zero(),
                             addrrange_make(int128_zero(), int128_2_64()),
-                             false, false);
+                             false, false, false);
    }
    flatview_simplify(view);

@ -2085,7 +2092,7 @@ int memory_region_iommu_num_indexes(IOMMUMemoryRegion *iommu_mr)

 RamDiscardManager *memory_region_get_ram_discard_manager(MemoryRegion *mr)
 {
-    if (!memory_region_is_mapped(mr) || !memory_region_is_ram(mr)) {
+    if (!memory_region_is_ram(mr)) {
        return NULL;
    }
    return mr->rdm;
@ -2094,7 +2101,7 @@ RamDiscardManager *memory_region_get_ram_discard_manager(MemoryRegion *mr)
 void memory_region_set_ram_discard_manager(MemoryRegion *mr,
                                           RamDiscardManager *rdm)
 {
-    g_assert(memory_region_is_ram(mr) && !memory_region_is_mapped(mr));
+    g_assert(memory_region_is_ram(mr));
    g_assert(!rdm || !mr->rdm);
    mr->rdm = rdm;
 }
@ -2755,6 +2762,18 @@ void memory_region_set_alias_offset(MemoryRegion *mr, hwaddr offset)
    memory_region_transaction_commit();
 }

+void memory_region_set_unmergeable(MemoryRegion *mr, bool unmergeable)
+{
+    if (unmergeable == mr->unmergeable) {
+        return;
+    }
+
+    memory_region_transaction_begin();
+    mr->unmergeable = unmergeable;
+    memory_region_update_pending |= mr->enabled;
+    memory_region_transaction_commit();
+}
+
 uint64_t memory_region_get_alignment(const MemoryRegion *mr)
 {
    return mr->align;
--- a/system/physmem.c
+++ b/system/physmem.c
@ -2221,23 +2221,6 @@ ram_addr_t qemu_ram_block_host_offset(RAMBlock *rb, void *host)
    return res;
 }

-/*
- * Translates a host ptr back to a RAMBlock, a ram_addr and an offset
- * in that RAMBlock.
- *
- * ptr: Host pointer to look up
- * round_offset: If true round the result offset down to a page boundary
- * *ram_addr: set to result ram_addr
- * *offset: set to result offset within the RAMBlock
- *
- * Returns: RAMBlock (or NULL if not found)
- *
- * By the time this function returns, the returned pointer is not protected
- * by RCU anymore.  If the caller is not within an RCU critical section and
- * does not hold the iothread lock, it must have other means of protecting the
- * pointer, such as a reference to the region that includes the incoming
- * ram_addr_t.
- */
 RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
                                   ram_addr_t *offset)
 {