Hi,
"Host Memory Backends" and "Memory devices" queue ("mem"): - Support memory devices with multiple memslots - Support memory devices that dynamically consume memslots - Support memory devices that can automatically decide on the number of memslots to use - virtio-mem support for exposing memory dynamically via multiple memslots - Some required cleanups/refactorings -----BEGIN PGP SIGNATURE----- iQJFBAABCAAvFiEEG9nKrXNcTDpGDfzKTd4Q9wD/g1oFAmUn+XMRHGRhdmlkQHJl ZGhhdC5jb20ACgkQTd4Q9wD/g1qDHA//T01suTa+uzrcoJHoMWN11S47WnAmbuTo vVakucLBPMJAa9xZeCy3OavXaVGpHkw+t6g3OFknof0LfQ5/j9iE3Q1PxURN7g5j SJ2WJXCoceM6T4TMhPvVvgEaYjFmESqZB5FZgedMT0QRyhAxMuF9pCkWhk1O3OAV JqQKqLFiGcv60AEuBYGZGzgiOUv8EJ5gKwRF4VOdyHIxqZDw1aZXzlcd4TzFZBQ7 rwW/3ef+sFmUJdmfrSrqcIlQSRrqZ2w95xATDzLTIEEUT3SWqh/E95EZWIz1M0oQ NgWgFiLCR1KOj7bWFhLXT7IfyLh0mEysD+P/hY6QwQ4RewWG7EW5UK+JFswssdcZ rEj5XpHZzev/wx7hM4bWsoQ+VIvrH7j3uYGyWkcgYRbdDEkWDv2rsT23lwGYNhht oBsrdEBELRw6v4C8doq/+sCmHmuxUMqTGwbArCQVnB1XnLxOEkuqlnfq5MORkzNF fxbIRx+LRluOllC0HVaDQd8qxRq1+UC5WIpAcDcrouy4HGgi1onWKrXpgjIAbVyH M6cENkK7rnRk96gpeXdmrf0h9HqRciAOY8oUsFsvLyKBOCPBWDrLyOQEY5UoSdtD m4QpEVgywCy2z1uU/UObeT/UxJy/9EL/Zb+DHoEK06iEhwONoUJjEBYMJD38RMkk mwPTB4UAk9g= =s69t -----END PGP SIGNATURE----- Merge tag 'mem-2023-10-12' of https://github.com/davidhildenbrand/qemu into staging Hi, "Host Memory Backends" and "Memory devices" queue ("mem"): - Support memory devices with multiple memslots - Support memory devices that dynamically consume memslots - Support memory devices that can automatically decide on the number of memslots to use - virtio-mem support for exposing memory dynamically via multiple memslots - Some required cleanups/refactorings # -----BEGIN PGP SIGNATURE----- # # iQJFBAABCAAvFiEEG9nKrXNcTDpGDfzKTd4Q9wD/g1oFAmUn+XMRHGRhdmlkQHJl # ZGhhdC5jb20ACgkQTd4Q9wD/g1qDHA//T01suTa+uzrcoJHoMWN11S47WnAmbuTo # vVakucLBPMJAa9xZeCy3OavXaVGpHkw+t6g3OFknof0LfQ5/j9iE3Q1PxURN7g5j # SJ2WJXCoceM6T4TMhPvVvgEaYjFmESqZB5FZgedMT0QRyhAxMuF9pCkWhk1O3OAV # JqQKqLFiGcv60AEuBYGZGzgiOUv8EJ5gKwRF4VOdyHIxqZDw1aZXzlcd4TzFZBQ7 # rwW/3ef+sFmUJdmfrSrqcIlQSRrqZ2w95xATDzLTIEEUT3SWqh/E95EZWIz1M0oQ # NgWgFiLCR1KOj7bWFhLXT7IfyLh0mEysD+P/hY6QwQ4RewWG7EW5UK+JFswssdcZ # rEj5XpHZzev/wx7hM4bWsoQ+VIvrH7j3uYGyWkcgYRbdDEkWDv2rsT23lwGYNhht # oBsrdEBELRw6v4C8doq/+sCmHmuxUMqTGwbArCQVnB1XnLxOEkuqlnfq5MORkzNF # fxbIRx+LRluOllC0HVaDQd8qxRq1+UC5WIpAcDcrouy4HGgi1onWKrXpgjIAbVyH # M6cENkK7rnRk96gpeXdmrf0h9HqRciAOY8oUsFsvLyKBOCPBWDrLyOQEY5UoSdtD # m4QpEVgywCy2z1uU/UObeT/UxJy/9EL/Zb+DHoEK06iEhwONoUJjEBYMJD38RMkk # mwPTB4UAk9g= # =s69t # -----END PGP SIGNATURE----- # gpg: Signature made Thu 12 Oct 2023 09:49:39 EDT # gpg: using RSA key 1BD9CAAD735C4C3A460DFCCA4DDE10F700FF835A # gpg: issuer "david@redhat.com" # gpg: Good signature from "David Hildenbrand <david@redhat.com>" [unknown] # gpg: aka "David Hildenbrand <davidhildenbrand@gmail.com>" [full] # gpg: aka "David Hildenbrand <hildenbr@in.tum.de>" [unknown] # gpg: WARNING: The key's User ID is not certified with a trusted signature! # gpg: There is no indication that the signature belongs to the owner. # Primary key fingerprint: 1BD9 CAAD 735C 4C3A 460D FCCA 4DDE 10F7 00FF 835A * tag 'mem-2023-10-12' of https://github.com/davidhildenbrand/qemu: virtio-mem: Mark memslot alias memory regions unmergeable memory,vhost: Allow for marking memory device memory regions unmergeable virtio-mem: Expose device memory dynamically via multiple memslots if enabled virtio-mem: Update state to match bitmap as soon as it's been migrated virtio-mem: Pass non-const VirtIOMEM via virtio_mem_range_cb memory: Clarify mapping requirements for RamDiscardManager memory-device,vhost: Support automatic decision on the number of memslots vhost: Add vhost_get_max_memslots() kvm: Add stub for kvm_get_max_memslots() memory-device,vhost: Support memory devices that dynamically consume memslots memory-device: Track required and actually used memslots in DeviceMemoryState stubs: Rename qmp_memory_device.c to memory_device.c memory-device: Support memory devices with multiple memslots vhost: Return number of free memslots kvm: Return number of free memslots softmmu/physmem: Fixup qemu_ram_block_from_host() documentation vhost: Remove vhost_backend_can_merge() callback vhost: Rework memslot filtering and fix "used_memslot" tracking Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
This commit is contained in:
commit
bc2b89b385
@ -2891,6 +2891,7 @@ F: hw/mem/pc-dimm.c
|
||||
F: include/hw/mem/memory-device.h
|
||||
F: include/hw/mem/nvdimm.h
|
||||
F: include/hw/mem/pc-dimm.h
|
||||
F: stubs/memory_device.c
|
||||
F: docs/nvdimm.txt
|
||||
|
||||
SPICE
|
||||
|
@ -174,13 +174,31 @@ void kvm_resample_fd_notify(int gsi)
|
||||
}
|
||||
}
|
||||
|
||||
int kvm_get_max_memslots(void)
|
||||
unsigned int kvm_get_max_memslots(void)
|
||||
{
|
||||
KVMState *s = KVM_STATE(current_accel());
|
||||
|
||||
return s->nr_slots;
|
||||
}
|
||||
|
||||
unsigned int kvm_get_free_memslots(void)
|
||||
{
|
||||
unsigned int used_slots = 0;
|
||||
KVMState *s = kvm_state;
|
||||
int i;
|
||||
|
||||
kvm_slots_lock();
|
||||
for (i = 0; i < s->nr_as; i++) {
|
||||
if (!s->as[i].ml) {
|
||||
continue;
|
||||
}
|
||||
used_slots = MAX(used_slots, s->as[i].ml->nr_used_slots);
|
||||
}
|
||||
kvm_slots_unlock();
|
||||
|
||||
return s->nr_slots - used_slots;
|
||||
}
|
||||
|
||||
/* Called with KVMMemoryListener.slots_lock held */
|
||||
static KVMSlot *kvm_get_free_slot(KVMMemoryListener *kml)
|
||||
{
|
||||
@ -196,19 +214,6 @@ static KVMSlot *kvm_get_free_slot(KVMMemoryListener *kml)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
bool kvm_has_free_slot(MachineState *ms)
|
||||
{
|
||||
KVMState *s = KVM_STATE(ms->accelerator);
|
||||
bool result;
|
||||
KVMMemoryListener *kml = &s->memory_listener;
|
||||
|
||||
kvm_slots_lock();
|
||||
result = !!kvm_get_free_slot(kml);
|
||||
kvm_slots_unlock();
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Called with KVMMemoryListener.slots_lock held */
|
||||
static KVMSlot *kvm_alloc_slot(KVMMemoryListener *kml)
|
||||
{
|
||||
@ -1387,6 +1392,7 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml,
|
||||
}
|
||||
start_addr += slot_size;
|
||||
size -= slot_size;
|
||||
kml->nr_used_slots--;
|
||||
} while (size);
|
||||
return;
|
||||
}
|
||||
@ -1412,6 +1418,7 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml,
|
||||
ram_start_offset += slot_size;
|
||||
ram += slot_size;
|
||||
size -= slot_size;
|
||||
kml->nr_used_slots++;
|
||||
} while (size);
|
||||
}
|
||||
|
||||
|
@ -109,9 +109,14 @@ int kvm_irqchip_remove_irqfd_notifier_gsi(KVMState *s, EventNotifier *n,
|
||||
return -ENOSYS;
|
||||
}
|
||||
|
||||
bool kvm_has_free_slot(MachineState *ms)
|
||||
unsigned int kvm_get_max_memslots(void)
|
||||
{
|
||||
return false;
|
||||
return 0;
|
||||
}
|
||||
|
||||
unsigned int kvm_get_free_memslots(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
void kvm_init_cpu_signals(CPUState *cpu)
|
||||
|
@ -52,19 +52,135 @@ static int memory_device_build_list(Object *obj, void *opaque)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void memory_device_check_addable(MachineState *ms, MemoryRegion *mr,
|
||||
Error **errp)
|
||||
static unsigned int memory_device_get_memslots(MemoryDeviceState *md)
|
||||
{
|
||||
const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(md);
|
||||
|
||||
if (mdc->get_memslots) {
|
||||
return mdc->get_memslots(md);
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Memslots that are reserved by memory devices (required but still reported
|
||||
* as free from KVM / vhost).
|
||||
*/
|
||||
static unsigned int get_reserved_memslots(MachineState *ms)
|
||||
{
|
||||
if (ms->device_memory->used_memslots >
|
||||
ms->device_memory->required_memslots) {
|
||||
/* This is unexpected, and we warned already in the memory notifier. */
|
||||
return 0;
|
||||
}
|
||||
return ms->device_memory->required_memslots -
|
||||
ms->device_memory->used_memslots;
|
||||
}
|
||||
|
||||
unsigned int memory_devices_get_reserved_memslots(void)
|
||||
{
|
||||
if (!current_machine->device_memory) {
|
||||
return 0;
|
||||
}
|
||||
return get_reserved_memslots(current_machine);
|
||||
}
|
||||
|
||||
bool memory_devices_memslot_auto_decision_active(void)
|
||||
{
|
||||
if (!current_machine->device_memory) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return current_machine->device_memory->memslot_auto_decision_active;
|
||||
}
|
||||
|
||||
static unsigned int memory_device_memslot_decision_limit(MachineState *ms,
|
||||
MemoryRegion *mr)
|
||||
{
|
||||
const unsigned int reserved = get_reserved_memslots(ms);
|
||||
const uint64_t size = memory_region_size(mr);
|
||||
unsigned int max = vhost_get_max_memslots();
|
||||
unsigned int free = vhost_get_free_memslots();
|
||||
uint64_t available_space;
|
||||
unsigned int memslots;
|
||||
|
||||
if (kvm_enabled()) {
|
||||
max = MIN(max, kvm_get_max_memslots());
|
||||
free = MIN(free, kvm_get_free_memslots());
|
||||
}
|
||||
|
||||
/*
|
||||
* If we only have less overall memslots than what we consider reasonable,
|
||||
* just keep it to a minimum.
|
||||
*/
|
||||
if (max < MEMORY_DEVICES_SAFE_MAX_MEMSLOTS) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Consider our soft-limit across all memory devices. We don't really
|
||||
* expect to exceed this limit in reasonable configurations.
|
||||
*/
|
||||
if (MEMORY_DEVICES_SOFT_MEMSLOT_LIMIT <=
|
||||
ms->device_memory->required_memslots) {
|
||||
return 1;
|
||||
}
|
||||
memslots = MEMORY_DEVICES_SOFT_MEMSLOT_LIMIT -
|
||||
ms->device_memory->required_memslots;
|
||||
|
||||
/*
|
||||
* Consider the actually still free memslots. This is only relevant if
|
||||
* other memslot consumers would consume *significantly* more memslots than
|
||||
* what we prepared for (> 253). Unlikely, but let's just handle it
|
||||
* cleanly.
|
||||
*/
|
||||
memslots = MIN(memslots, free - reserved);
|
||||
if (memslots < 1 || unlikely(free < reserved)) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* We cannot have any other memory devices? So give all to this device. */
|
||||
if (size == ms->maxram_size - ms->ram_size) {
|
||||
return memslots;
|
||||
}
|
||||
|
||||
/*
|
||||
* Simple heuristic: equally distribute the memslots over the space
|
||||
* still available for memory devices.
|
||||
*/
|
||||
available_space = ms->maxram_size - ms->ram_size -
|
||||
ms->device_memory->used_region_size;
|
||||
memslots = (double)memslots * size / available_space;
|
||||
return memslots < 1 ? 1 : memslots;
|
||||
}
|
||||
|
||||
static void memory_device_check_addable(MachineState *ms, MemoryDeviceState *md,
|
||||
MemoryRegion *mr, Error **errp)
|
||||
{
|
||||
const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(md);
|
||||
const uint64_t used_region_size = ms->device_memory->used_region_size;
|
||||
const uint64_t size = memory_region_size(mr);
|
||||
const unsigned int reserved_memslots = get_reserved_memslots(ms);
|
||||
unsigned int required_memslots, memslot_limit;
|
||||
|
||||
/* we will need a new memory slot for kvm and vhost */
|
||||
if (kvm_enabled() && !kvm_has_free_slot(ms)) {
|
||||
error_setg(errp, "hypervisor has no free memory slots left");
|
||||
/*
|
||||
* Instruct the device to decide how many memslots to use, if applicable,
|
||||
* before we query the number of required memslots the first time.
|
||||
*/
|
||||
if (mdc->decide_memslots) {
|
||||
memslot_limit = memory_device_memslot_decision_limit(ms, mr);
|
||||
mdc->decide_memslots(md, memslot_limit);
|
||||
}
|
||||
required_memslots = memory_device_get_memslots(md);
|
||||
|
||||
/* we will need memory slots for kvm and vhost */
|
||||
if (kvm_enabled() &&
|
||||
kvm_get_free_memslots() < required_memslots + reserved_memslots) {
|
||||
error_setg(errp, "hypervisor has not enough free memory slots left");
|
||||
return;
|
||||
}
|
||||
if (!vhost_has_free_slot()) {
|
||||
error_setg(errp, "a used vhost backend has no free memory slots left");
|
||||
if (vhost_get_free_memslots() < required_memslots + reserved_memslots) {
|
||||
error_setg(errp, "a used vhost backend has not enough free memory slots left");
|
||||
return;
|
||||
}
|
||||
|
||||
@ -233,7 +349,7 @@ void memory_device_pre_plug(MemoryDeviceState *md, MachineState *ms,
|
||||
goto out;
|
||||
}
|
||||
|
||||
memory_device_check_addable(ms, mr, &local_err);
|
||||
memory_device_check_addable(ms, md, mr, &local_err);
|
||||
if (local_err) {
|
||||
goto out;
|
||||
}
|
||||
@ -264,6 +380,7 @@ out:
|
||||
void memory_device_plug(MemoryDeviceState *md, MachineState *ms)
|
||||
{
|
||||
const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(md);
|
||||
const unsigned int memslots = memory_device_get_memslots(md);
|
||||
const uint64_t addr = mdc->get_addr(md);
|
||||
MemoryRegion *mr;
|
||||
|
||||
@ -275,6 +392,11 @@ void memory_device_plug(MemoryDeviceState *md, MachineState *ms)
|
||||
g_assert(ms->device_memory);
|
||||
|
||||
ms->device_memory->used_region_size += memory_region_size(mr);
|
||||
ms->device_memory->required_memslots += memslots;
|
||||
if (mdc->decide_memslots && memslots > 1) {
|
||||
ms->device_memory->memslot_auto_decision_active++;
|
||||
}
|
||||
|
||||
memory_region_add_subregion(&ms->device_memory->mr,
|
||||
addr - ms->device_memory->base, mr);
|
||||
trace_memory_device_plug(DEVICE(md)->id ? DEVICE(md)->id : "", addr);
|
||||
@ -283,6 +405,7 @@ void memory_device_plug(MemoryDeviceState *md, MachineState *ms)
|
||||
void memory_device_unplug(MemoryDeviceState *md, MachineState *ms)
|
||||
{
|
||||
const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(md);
|
||||
const unsigned int memslots = memory_device_get_memslots(md);
|
||||
MemoryRegion *mr;
|
||||
|
||||
/*
|
||||
@ -293,7 +416,12 @@ void memory_device_unplug(MemoryDeviceState *md, MachineState *ms)
|
||||
g_assert(ms->device_memory);
|
||||
|
||||
memory_region_del_subregion(&ms->device_memory->mr, mr);
|
||||
|
||||
if (mdc->decide_memslots && memslots > 1) {
|
||||
ms->device_memory->memslot_auto_decision_active--;
|
||||
}
|
||||
ms->device_memory->used_region_size -= memory_region_size(mr);
|
||||
ms->device_memory->required_memslots -= memslots;
|
||||
trace_memory_device_unplug(DEVICE(md)->id ? DEVICE(md)->id : "",
|
||||
mdc->get_addr(md));
|
||||
}
|
||||
@ -313,6 +441,50 @@ uint64_t memory_device_get_region_size(const MemoryDeviceState *md,
|
||||
return memory_region_size(mr);
|
||||
}
|
||||
|
||||
static void memory_devices_region_mod(MemoryListener *listener,
|
||||
MemoryRegionSection *mrs, bool add)
|
||||
{
|
||||
DeviceMemoryState *dms = container_of(listener, DeviceMemoryState,
|
||||
listener);
|
||||
|
||||
if (!memory_region_is_ram(mrs->mr)) {
|
||||
warn_report("Unexpected memory region mapped into device memory region.");
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* The expectation is that each distinct RAM memory region section in
|
||||
* our region for memory devices consumes exactly one memslot in KVM
|
||||
* and in vhost. For vhost, this is true, except:
|
||||
* * ROM memory regions don't consume a memslot. These get used very
|
||||
* rarely for memory devices (R/O NVDIMMs).
|
||||
* * Memslots without a fd (memory-backend-ram) don't necessarily
|
||||
* consume a memslot. Such setups are quite rare and possibly bogus:
|
||||
* the memory would be inaccessible by such vhost devices.
|
||||
*
|
||||
* So for vhost, in corner cases we might over-estimate the number of
|
||||
* memslots that are currently used or that might still be reserved
|
||||
* (required - used).
|
||||
*/
|
||||
dms->used_memslots += add ? 1 : -1;
|
||||
|
||||
if (dms->used_memslots > dms->required_memslots) {
|
||||
warn_report("Memory devices use more memory slots than indicated as required.");
|
||||
}
|
||||
}
|
||||
|
||||
static void memory_devices_region_add(MemoryListener *listener,
|
||||
MemoryRegionSection *mrs)
|
||||
{
|
||||
return memory_devices_region_mod(listener, mrs, true);
|
||||
}
|
||||
|
||||
static void memory_devices_region_del(MemoryListener *listener,
|
||||
MemoryRegionSection *mrs)
|
||||
{
|
||||
return memory_devices_region_mod(listener, mrs, false);
|
||||
}
|
||||
|
||||
void machine_memory_devices_init(MachineState *ms, hwaddr base, uint64_t size)
|
||||
{
|
||||
g_assert(size);
|
||||
@ -322,8 +494,16 @@ void machine_memory_devices_init(MachineState *ms, hwaddr base, uint64_t size)
|
||||
|
||||
memory_region_init(&ms->device_memory->mr, OBJECT(ms), "device-memory",
|
||||
size);
|
||||
address_space_init(&ms->device_memory->as, &ms->device_memory->mr,
|
||||
"device-memory");
|
||||
memory_region_add_subregion(get_system_memory(), ms->device_memory->base,
|
||||
&ms->device_memory->mr);
|
||||
|
||||
/* Track the number of memslots used by memory devices. */
|
||||
ms->device_memory->listener.region_add = memory_devices_region_add;
|
||||
ms->device_memory->listener.region_del = memory_devices_region_del;
|
||||
memory_listener_register(&ms->device_memory->listener,
|
||||
&ms->device_memory->as);
|
||||
}
|
||||
|
||||
static const TypeInfo memory_device_info = {
|
||||
|
@ -2,9 +2,14 @@
|
||||
#include "hw/virtio/vhost.h"
|
||||
#include "hw/virtio/vhost-user.h"
|
||||
|
||||
bool vhost_has_free_slot(void)
|
||||
unsigned int vhost_get_max_memslots(void)
|
||||
{
|
||||
return true;
|
||||
return UINT_MAX;
|
||||
}
|
||||
|
||||
unsigned int vhost_get_free_memslots(void)
|
||||
{
|
||||
return UINT_MAX;
|
||||
}
|
||||
|
||||
bool vhost_user_init(VhostUserState *user, CharBackend *chr, Error **errp)
|
||||
|
@ -2327,19 +2327,6 @@ static int vhost_user_migration_done(struct vhost_dev *dev, char* mac_addr)
|
||||
return -ENOTSUP;
|
||||
}
|
||||
|
||||
static bool vhost_user_can_merge(struct vhost_dev *dev,
|
||||
uint64_t start1, uint64_t size1,
|
||||
uint64_t start2, uint64_t size2)
|
||||
{
|
||||
ram_addr_t offset;
|
||||
int mfd, rfd;
|
||||
|
||||
(void)vhost_user_get_mr_data(start1, &offset, &mfd);
|
||||
(void)vhost_user_get_mr_data(start2, &offset, &rfd);
|
||||
|
||||
return mfd == rfd;
|
||||
}
|
||||
|
||||
static int vhost_user_net_set_mtu(struct vhost_dev *dev, uint16_t mtu)
|
||||
{
|
||||
VhostUserMsg msg;
|
||||
@ -2622,10 +2609,9 @@ vhost_user_crypto_close_session(struct vhost_dev *dev, uint64_t session_id)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool vhost_user_mem_section_filter(struct vhost_dev *dev,
|
||||
MemoryRegionSection *section)
|
||||
static bool vhost_user_no_private_memslots(struct vhost_dev *dev)
|
||||
{
|
||||
return memory_region_get_fd(section->mr) >= 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
static int vhost_user_get_inflight_fd(struct vhost_dev *dev,
|
||||
@ -2868,6 +2854,7 @@ const VhostOps user_ops = {
|
||||
.vhost_backend_init = vhost_user_backend_init,
|
||||
.vhost_backend_cleanup = vhost_user_backend_cleanup,
|
||||
.vhost_backend_memslots_limit = vhost_user_memslots_limit,
|
||||
.vhost_backend_no_private_memslots = vhost_user_no_private_memslots,
|
||||
.vhost_set_log_base = vhost_user_set_log_base,
|
||||
.vhost_set_mem_table = vhost_user_set_mem_table,
|
||||
.vhost_set_vring_addr = vhost_user_set_vring_addr,
|
||||
@ -2886,7 +2873,6 @@ const VhostOps user_ops = {
|
||||
.vhost_set_vring_enable = vhost_user_set_vring_enable,
|
||||
.vhost_requires_shm_log = vhost_user_requires_shm_log,
|
||||
.vhost_migration_done = vhost_user_migration_done,
|
||||
.vhost_backend_can_merge = vhost_user_can_merge,
|
||||
.vhost_net_set_mtu = vhost_user_net_set_mtu,
|
||||
.vhost_set_iotlb_callback = vhost_user_set_iotlb_callback,
|
||||
.vhost_send_device_iotlb_msg = vhost_user_send_device_iotlb_msg,
|
||||
@ -2894,7 +2880,6 @@ const VhostOps user_ops = {
|
||||
.vhost_set_config = vhost_user_set_config,
|
||||
.vhost_crypto_create_session = vhost_user_crypto_create_session,
|
||||
.vhost_crypto_close_session = vhost_user_crypto_close_session,
|
||||
.vhost_backend_mem_section_filter = vhost_user_mem_section_filter,
|
||||
.vhost_get_inflight_fd = vhost_user_get_inflight_fd,
|
||||
.vhost_set_inflight_fd = vhost_user_set_inflight_fd,
|
||||
.vhost_dev_start = vhost_user_dev_start,
|
||||
|
@ -1512,7 +1512,6 @@ const VhostOps vdpa_ops = {
|
||||
.vhost_set_config = vhost_vdpa_set_config,
|
||||
.vhost_requires_shm_log = NULL,
|
||||
.vhost_migration_done = NULL,
|
||||
.vhost_backend_can_merge = NULL,
|
||||
.vhost_net_set_mtu = NULL,
|
||||
.vhost_set_iotlb_callback = NULL,
|
||||
.vhost_send_device_iotlb_msg = NULL,
|
||||
|
@ -23,6 +23,7 @@
|
||||
#include "qemu/log.h"
|
||||
#include "standard-headers/linux/vhost_types.h"
|
||||
#include "hw/virtio/virtio-bus.h"
|
||||
#include "hw/mem/memory-device.h"
|
||||
#include "migration/blocker.h"
|
||||
#include "migration/qemu-file-types.h"
|
||||
#include "sysemu/dma.h"
|
||||
@ -45,20 +46,44 @@
|
||||
static struct vhost_log *vhost_log;
|
||||
static struct vhost_log *vhost_log_shm;
|
||||
|
||||
/* Memslots used by backends that support private memslots (without an fd). */
|
||||
static unsigned int used_memslots;
|
||||
|
||||
/* Memslots used by backends that only support shared memslots (with an fd). */
|
||||
static unsigned int used_shared_memslots;
|
||||
|
||||
static QLIST_HEAD(, vhost_dev) vhost_devices =
|
||||
QLIST_HEAD_INITIALIZER(vhost_devices);
|
||||
|
||||
bool vhost_has_free_slot(void)
|
||||
unsigned int vhost_get_max_memslots(void)
|
||||
{
|
||||
unsigned int slots_limit = ~0U;
|
||||
unsigned int max = UINT_MAX;
|
||||
struct vhost_dev *hdev;
|
||||
|
||||
QLIST_FOREACH(hdev, &vhost_devices, entry) {
|
||||
max = MIN(max, hdev->vhost_ops->vhost_backend_memslots_limit(hdev));
|
||||
}
|
||||
return max;
|
||||
}
|
||||
|
||||
unsigned int vhost_get_free_memslots(void)
|
||||
{
|
||||
unsigned int free = UINT_MAX;
|
||||
struct vhost_dev *hdev;
|
||||
|
||||
QLIST_FOREACH(hdev, &vhost_devices, entry) {
|
||||
unsigned int r = hdev->vhost_ops->vhost_backend_memslots_limit(hdev);
|
||||
slots_limit = MIN(slots_limit, r);
|
||||
unsigned int cur_free;
|
||||
|
||||
if (hdev->vhost_ops->vhost_backend_no_private_memslots &&
|
||||
hdev->vhost_ops->vhost_backend_no_private_memslots(hdev)) {
|
||||
cur_free = r - used_shared_memslots;
|
||||
} else {
|
||||
cur_free = r - used_memslots;
|
||||
}
|
||||
free = MIN(free, cur_free);
|
||||
}
|
||||
return slots_limit > used_memslots;
|
||||
return free;
|
||||
}
|
||||
|
||||
static void vhost_dev_sync_region(struct vhost_dev *dev,
|
||||
@ -474,8 +499,7 @@ static int vhost_verify_ring_mappings(struct vhost_dev *dev,
|
||||
* vhost_section: identify sections needed for vhost access
|
||||
*
|
||||
* We only care about RAM sections here (where virtqueue and guest
|
||||
* internals accessed by virtio might live). If we find one we still
|
||||
* allow the backend to potentially filter it out of our list.
|
||||
* internals accessed by virtio might live).
|
||||
*/
|
||||
static bool vhost_section(struct vhost_dev *dev, MemoryRegionSection *section)
|
||||
{
|
||||
@ -502,8 +526,16 @@ static bool vhost_section(struct vhost_dev *dev, MemoryRegionSection *section)
|
||||
return false;
|
||||
}
|
||||
|
||||
if (dev->vhost_ops->vhost_backend_mem_section_filter &&
|
||||
!dev->vhost_ops->vhost_backend_mem_section_filter(dev, section)) {
|
||||
/*
|
||||
* Some backends (like vhost-user) can only handle memory regions
|
||||
* that have an fd (can be mapped into a different process). Filter
|
||||
* the ones without an fd out, if requested.
|
||||
*
|
||||
* TODO: we might have to limit to MAP_SHARED as well.
|
||||
*/
|
||||
if (memory_region_get_fd(section->mr) < 0 &&
|
||||
dev->vhost_ops->vhost_backend_no_private_memslots &&
|
||||
dev->vhost_ops->vhost_backend_no_private_memslots(dev)) {
|
||||
trace_vhost_reject_section(mr->name, 2);
|
||||
return false;
|
||||
}
|
||||
@ -568,7 +600,14 @@ static void vhost_commit(MemoryListener *listener)
|
||||
dev->n_mem_sections * sizeof dev->mem->regions[0];
|
||||
dev->mem = g_realloc(dev->mem, regions_size);
|
||||
dev->mem->nregions = dev->n_mem_sections;
|
||||
used_memslots = dev->mem->nregions;
|
||||
|
||||
if (dev->vhost_ops->vhost_backend_no_private_memslots &&
|
||||
dev->vhost_ops->vhost_backend_no_private_memslots(dev)) {
|
||||
used_shared_memslots = dev->mem->nregions;
|
||||
} else {
|
||||
used_memslots = dev->mem->nregions;
|
||||
}
|
||||
|
||||
for (i = 0; i < dev->n_mem_sections; i++) {
|
||||
struct vhost_memory_region *cur_vmr = dev->mem->regions + i;
|
||||
struct MemoryRegionSection *mrs = dev->mem_sections + i;
|
||||
@ -668,7 +707,7 @@ static void vhost_region_add_section(struct vhost_dev *dev,
|
||||
mrs_size, mrs_host);
|
||||
}
|
||||
|
||||
if (dev->n_tmp_sections) {
|
||||
if (dev->n_tmp_sections && !section->unmergeable) {
|
||||
/* Since we already have at least one section, lets see if
|
||||
* this extends it; since we're scanning in order, we only
|
||||
* have to look at the last one, and the FlatView that calls
|
||||
@ -701,11 +740,7 @@ static void vhost_region_add_section(struct vhost_dev *dev,
|
||||
size_t offset = mrs_gpa - prev_gpa_start;
|
||||
|
||||
if (prev_host_start + offset == mrs_host &&
|
||||
section->mr == prev_sec->mr &&
|
||||
(!dev->vhost_ops->vhost_backend_can_merge ||
|
||||
dev->vhost_ops->vhost_backend_can_merge(dev,
|
||||
mrs_host, mrs_size,
|
||||
prev_host_start, prev_size))) {
|
||||
section->mr == prev_sec->mr && !prev_sec->unmergeable) {
|
||||
uint64_t max_end = MAX(prev_host_end, mrs_host + mrs_size);
|
||||
need_add = false;
|
||||
prev_sec->offset_within_address_space =
|
||||
@ -1400,6 +1435,7 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
|
||||
VhostBackendType backend_type, uint32_t busyloop_timeout,
|
||||
Error **errp)
|
||||
{
|
||||
unsigned int used, reserved, limit;
|
||||
uint64_t features;
|
||||
int i, r, n_initialized_vqs = 0;
|
||||
|
||||
@ -1426,6 +1462,19 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
|
||||
goto fail;
|
||||
}
|
||||
|
||||
limit = hdev->vhost_ops->vhost_backend_memslots_limit(hdev);
|
||||
if (limit < MEMORY_DEVICES_SAFE_MAX_MEMSLOTS &&
|
||||
memory_devices_memslot_auto_decision_active()) {
|
||||
error_setg(errp, "some memory device (like virtio-mem)"
|
||||
" decided how many memory slots to use based on the overall"
|
||||
" number of memory slots; this vhost backend would further"
|
||||
" restricts the overall number of memory slots");
|
||||
error_append_hint(errp, "Try plugging this vhost backend before"
|
||||
" plugging such memory devices.\n");
|
||||
r = -EINVAL;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
for (i = 0; i < hdev->nvqs; ++i, ++n_initialized_vqs) {
|
||||
r = vhost_virtqueue_init(hdev, hdev->vqs + i, hdev->vq_index + i);
|
||||
if (r < 0) {
|
||||
@ -1495,9 +1544,27 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
|
||||
memory_listener_register(&hdev->memory_listener, &address_space_memory);
|
||||
QLIST_INSERT_HEAD(&vhost_devices, hdev, entry);
|
||||
|
||||
if (used_memslots > hdev->vhost_ops->vhost_backend_memslots_limit(hdev)) {
|
||||
error_setg(errp, "vhost backend memory slots limit is less"
|
||||
" than current number of present memory slots");
|
||||
/*
|
||||
* The listener we registered properly updated the corresponding counter.
|
||||
* So we can trust that these values are accurate.
|
||||
*/
|
||||
if (hdev->vhost_ops->vhost_backend_no_private_memslots &&
|
||||
hdev->vhost_ops->vhost_backend_no_private_memslots(hdev)) {
|
||||
used = used_shared_memslots;
|
||||
} else {
|
||||
used = used_memslots;
|
||||
}
|
||||
/*
|
||||
* We assume that all reserved memslots actually require a real memslot
|
||||
* in our vhost backend. This might not be true, for example, if the
|
||||
* memslot would be ROM. If ever relevant, we can optimize for that --
|
||||
* but we'll need additional information about the reservations.
|
||||
*/
|
||||
reserved = memory_devices_get_reserved_memslots();
|
||||
if (used + reserved > limit) {
|
||||
error_setg(errp, "vhost backend memory slots limit (%d) is less"
|
||||
" than current number of used (%d) and reserved (%d)"
|
||||
" memory slots for memory devices.", limit, used, reserved);
|
||||
r = -EINVAL;
|
||||
goto fail_busyloop;
|
||||
}
|
||||
|
@ -48,6 +48,25 @@ static MemoryRegion *virtio_mem_pci_get_memory_region(MemoryDeviceState *md,
|
||||
return vmc->get_memory_region(vmem, errp);
|
||||
}
|
||||
|
||||
static void virtio_mem_pci_decide_memslots(MemoryDeviceState *md,
|
||||
unsigned int limit)
|
||||
{
|
||||
VirtIOMEMPCI *pci_mem = VIRTIO_MEM_PCI(md);
|
||||
VirtIOMEM *vmem = VIRTIO_MEM(&pci_mem->vdev);
|
||||
VirtIOMEMClass *vmc = VIRTIO_MEM_GET_CLASS(vmem);
|
||||
|
||||
vmc->decide_memslots(vmem, limit);
|
||||
}
|
||||
|
||||
static unsigned int virtio_mem_pci_get_memslots(MemoryDeviceState *md)
|
||||
{
|
||||
VirtIOMEMPCI *pci_mem = VIRTIO_MEM_PCI(md);
|
||||
VirtIOMEM *vmem = VIRTIO_MEM(&pci_mem->vdev);
|
||||
VirtIOMEMClass *vmc = VIRTIO_MEM_GET_CLASS(vmem);
|
||||
|
||||
return vmc->get_memslots(vmem);
|
||||
}
|
||||
|
||||
static uint64_t virtio_mem_pci_get_plugged_size(const MemoryDeviceState *md,
|
||||
Error **errp)
|
||||
{
|
||||
@ -150,6 +169,8 @@ static void virtio_mem_pci_class_init(ObjectClass *klass, void *data)
|
||||
mdc->set_addr = virtio_mem_pci_set_addr;
|
||||
mdc->get_plugged_size = virtio_mem_pci_get_plugged_size;
|
||||
mdc->get_memory_region = virtio_mem_pci_get_memory_region;
|
||||
mdc->decide_memslots = virtio_mem_pci_decide_memslots;
|
||||
mdc->get_memslots = virtio_mem_pci_get_memslots;
|
||||
mdc->fill_device_info = virtio_mem_pci_fill_device_info;
|
||||
mdc->get_min_alignment = virtio_mem_pci_get_min_alignment;
|
||||
|
||||
|
@ -66,6 +66,13 @@ static uint32_t virtio_mem_default_thp_size(void)
|
||||
return default_thp_size;
|
||||
}
|
||||
|
||||
/*
|
||||
* The minimum memslot size depends on this setting ("sane default"), the
|
||||
* device block size, and the memory backend page size. The last (or single)
|
||||
* memslot might be smaller than this constant.
|
||||
*/
|
||||
#define VIRTIO_MEM_MIN_MEMSLOT_SIZE (1 * GiB)
|
||||
|
||||
/*
|
||||
* We want to have a reasonable default block size such that
|
||||
* 1. We avoid splitting THPs when unplugging memory, which degrades
|
||||
@ -177,10 +184,10 @@ static bool virtio_mem_is_busy(void)
|
||||
return migration_in_incoming_postcopy() || !migration_is_idle();
|
||||
}
|
||||
|
||||
typedef int (*virtio_mem_range_cb)(const VirtIOMEM *vmem, void *arg,
|
||||
typedef int (*virtio_mem_range_cb)(VirtIOMEM *vmem, void *arg,
|
||||
uint64_t offset, uint64_t size);
|
||||
|
||||
static int virtio_mem_for_each_unplugged_range(const VirtIOMEM *vmem, void *arg,
|
||||
static int virtio_mem_for_each_unplugged_range(VirtIOMEM *vmem, void *arg,
|
||||
virtio_mem_range_cb cb)
|
||||
{
|
||||
unsigned long first_zero_bit, last_zero_bit;
|
||||
@ -204,7 +211,7 @@ static int virtio_mem_for_each_unplugged_range(const VirtIOMEM *vmem, void *arg,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int virtio_mem_for_each_plugged_range(const VirtIOMEM *vmem, void *arg,
|
||||
static int virtio_mem_for_each_plugged_range(VirtIOMEM *vmem, void *arg,
|
||||
virtio_mem_range_cb cb)
|
||||
{
|
||||
unsigned long first_bit, last_bit;
|
||||
@ -483,6 +490,96 @@ static bool virtio_mem_valid_range(const VirtIOMEM *vmem, uint64_t gpa,
|
||||
return true;
|
||||
}
|
||||
|
||||
static void virtio_mem_activate_memslot(VirtIOMEM *vmem, unsigned int idx)
|
||||
{
|
||||
const uint64_t memslot_offset = idx * vmem->memslot_size;
|
||||
|
||||
assert(vmem->memslots);
|
||||
|
||||
/*
|
||||
* Instead of enabling/disabling memslots, we add/remove them. This should
|
||||
* make address space updates faster, because we don't have to loop over
|
||||
* many disabled subregions.
|
||||
*/
|
||||
if (memory_region_is_mapped(&vmem->memslots[idx])) {
|
||||
return;
|
||||
}
|
||||
memory_region_add_subregion(vmem->mr, memslot_offset, &vmem->memslots[idx]);
|
||||
}
|
||||
|
||||
static void virtio_mem_deactivate_memslot(VirtIOMEM *vmem, unsigned int idx)
|
||||
{
|
||||
assert(vmem->memslots);
|
||||
|
||||
if (!memory_region_is_mapped(&vmem->memslots[idx])) {
|
||||
return;
|
||||
}
|
||||
memory_region_del_subregion(vmem->mr, &vmem->memslots[idx]);
|
||||
}
|
||||
|
||||
static void virtio_mem_activate_memslots_to_plug(VirtIOMEM *vmem,
|
||||
uint64_t offset, uint64_t size)
|
||||
{
|
||||
const unsigned int start_idx = offset / vmem->memslot_size;
|
||||
const unsigned int end_idx = (offset + size + vmem->memslot_size - 1) /
|
||||
vmem->memslot_size;
|
||||
unsigned int idx;
|
||||
|
||||
if (!vmem->dynamic_memslots) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* Activate all involved memslots in a single transaction. */
|
||||
memory_region_transaction_begin();
|
||||
for (idx = start_idx; idx < end_idx; idx++) {
|
||||
virtio_mem_activate_memslot(vmem, idx);
|
||||
}
|
||||
memory_region_transaction_commit();
|
||||
}
|
||||
|
||||
static void virtio_mem_deactivate_unplugged_memslots(VirtIOMEM *vmem,
|
||||
uint64_t offset,
|
||||
uint64_t size)
|
||||
{
|
||||
const uint64_t region_size = memory_region_size(&vmem->memdev->mr);
|
||||
const unsigned int start_idx = offset / vmem->memslot_size;
|
||||
const unsigned int end_idx = (offset + size + vmem->memslot_size - 1) /
|
||||
vmem->memslot_size;
|
||||
unsigned int idx;
|
||||
|
||||
if (!vmem->dynamic_memslots) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* Deactivate all memslots with unplugged blocks in a single transaction. */
|
||||
memory_region_transaction_begin();
|
||||
for (idx = start_idx; idx < end_idx; idx++) {
|
||||
const uint64_t memslot_offset = idx * vmem->memslot_size;
|
||||
uint64_t memslot_size = vmem->memslot_size;
|
||||
|
||||
/* The size of the last memslot might be smaller. */
|
||||
if (idx == vmem->nb_memslots - 1) {
|
||||
memslot_size = region_size - memslot_offset;
|
||||
}
|
||||
|
||||
/*
|
||||
* Partially covered memslots might still have some blocks plugged and
|
||||
* have to remain active if that's the case.
|
||||
*/
|
||||
if (offset > memslot_offset ||
|
||||
offset + size < memslot_offset + memslot_size) {
|
||||
const uint64_t gpa = vmem->addr + memslot_offset;
|
||||
|
||||
if (!virtio_mem_is_range_unplugged(vmem, gpa, memslot_size)) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
virtio_mem_deactivate_memslot(vmem, idx);
|
||||
}
|
||||
memory_region_transaction_commit();
|
||||
}
|
||||
|
||||
static int virtio_mem_set_block_state(VirtIOMEM *vmem, uint64_t start_gpa,
|
||||
uint64_t size, bool plug)
|
||||
{
|
||||
@ -500,6 +597,8 @@ static int virtio_mem_set_block_state(VirtIOMEM *vmem, uint64_t start_gpa,
|
||||
}
|
||||
virtio_mem_notify_unplug(vmem, offset, size);
|
||||
virtio_mem_set_range_unplugged(vmem, start_gpa, size);
|
||||
/* Deactivate completely unplugged memslots after updating the state. */
|
||||
virtio_mem_deactivate_unplugged_memslots(vmem, offset, size);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -527,7 +626,20 @@ static int virtio_mem_set_block_state(VirtIOMEM *vmem, uint64_t start_gpa,
|
||||
}
|
||||
|
||||
if (!ret) {
|
||||
/*
|
||||
* Activate before notifying and rollback in case of any errors.
|
||||
*
|
||||
* When activating a yet inactive memslot, memory notifiers will get
|
||||
* notified about the added memory region and can register with the
|
||||
* RamDiscardManager; this will traverse all plugged blocks and skip the
|
||||
* blocks we are plugging here. The following notification will inform
|
||||
* registered listeners about the blocks we're plugging.
|
||||
*/
|
||||
virtio_mem_activate_memslots_to_plug(vmem, offset, size);
|
||||
ret = virtio_mem_notify_plug(vmem, offset, size);
|
||||
if (ret) {
|
||||
virtio_mem_deactivate_unplugged_memslots(vmem, offset, size);
|
||||
}
|
||||
}
|
||||
if (ret) {
|
||||
/* Could be preallocation or a notifier populated memory. */
|
||||
@ -620,6 +732,7 @@ static void virtio_mem_resize_usable_region(VirtIOMEM *vmem,
|
||||
|
||||
static int virtio_mem_unplug_all(VirtIOMEM *vmem)
|
||||
{
|
||||
const uint64_t region_size = memory_region_size(&vmem->memdev->mr);
|
||||
RAMBlock *rb = vmem->memdev->mr.ram_block;
|
||||
|
||||
if (vmem->size) {
|
||||
@ -634,6 +747,9 @@ static int virtio_mem_unplug_all(VirtIOMEM *vmem)
|
||||
bitmap_clear(vmem->bitmap, 0, vmem->bitmap_size);
|
||||
vmem->size = 0;
|
||||
notifier_list_notify(&vmem->size_change_notifiers, &vmem->size);
|
||||
|
||||
/* Deactivate all memslots after updating the state. */
|
||||
virtio_mem_deactivate_unplugged_memslots(vmem, 0, region_size);
|
||||
}
|
||||
|
||||
trace_virtio_mem_unplugged_all();
|
||||
@ -790,6 +906,49 @@ static void virtio_mem_system_reset(void *opaque)
|
||||
virtio_mem_unplug_all(vmem);
|
||||
}
|
||||
|
||||
static void virtio_mem_prepare_mr(VirtIOMEM *vmem)
|
||||
{
|
||||
const uint64_t region_size = memory_region_size(&vmem->memdev->mr);
|
||||
|
||||
assert(!vmem->mr && vmem->dynamic_memslots);
|
||||
vmem->mr = g_new0(MemoryRegion, 1);
|
||||
memory_region_init(vmem->mr, OBJECT(vmem), "virtio-mem",
|
||||
region_size);
|
||||
vmem->mr->align = memory_region_get_alignment(&vmem->memdev->mr);
|
||||
}
|
||||
|
||||
static void virtio_mem_prepare_memslots(VirtIOMEM *vmem)
|
||||
{
|
||||
const uint64_t region_size = memory_region_size(&vmem->memdev->mr);
|
||||
unsigned int idx;
|
||||
|
||||
g_assert(!vmem->memslots && vmem->nb_memslots && vmem->dynamic_memslots);
|
||||
vmem->memslots = g_new0(MemoryRegion, vmem->nb_memslots);
|
||||
|
||||
/* Initialize our memslots, but don't map them yet. */
|
||||
for (idx = 0; idx < vmem->nb_memslots; idx++) {
|
||||
const uint64_t memslot_offset = idx * vmem->memslot_size;
|
||||
uint64_t memslot_size = vmem->memslot_size;
|
||||
char name[20];
|
||||
|
||||
/* The size of the last memslot might be smaller. */
|
||||
if (idx == vmem->nb_memslots - 1) {
|
||||
memslot_size = region_size - memslot_offset;
|
||||
}
|
||||
|
||||
snprintf(name, sizeof(name), "memslot-%u", idx);
|
||||
memory_region_init_alias(&vmem->memslots[idx], OBJECT(vmem), name,
|
||||
&vmem->memdev->mr, memslot_offset,
|
||||
memslot_size);
|
||||
/*
|
||||
* We want to be able to atomically and efficiently activate/deactivate
|
||||
* individual memslots without affecting adjacent memslots in memory
|
||||
* notifiers.
|
||||
*/
|
||||
memory_region_set_unmergeable(&vmem->memslots[idx], true);
|
||||
}
|
||||
}
|
||||
|
||||
static void virtio_mem_device_realize(DeviceState *dev, Error **errp)
|
||||
{
|
||||
MachineState *ms = MACHINE(qdev_get_machine());
|
||||
@ -861,6 +1020,14 @@ static void virtio_mem_device_realize(DeviceState *dev, Error **errp)
|
||||
vmem->unplugged_inaccessible = ON_OFF_AUTO_ON;
|
||||
#endif /* VIRTIO_MEM_HAS_LEGACY_GUESTS */
|
||||
|
||||
if (vmem->dynamic_memslots &&
|
||||
vmem->unplugged_inaccessible != ON_OFF_AUTO_ON) {
|
||||
error_setg(errp, "'%s' property set to 'on' requires '%s' to be 'on'",
|
||||
VIRTIO_MEM_DYNAMIC_MEMSLOTS_PROP,
|
||||
VIRTIO_MEM_UNPLUGGED_INACCESSIBLE_PROP);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the block size wasn't configured by the user, use a sane default. This
|
||||
* allows using hugetlbfs backends of any page size without manual
|
||||
@ -930,6 +1097,25 @@ static void virtio_mem_device_realize(DeviceState *dev, Error **errp)
|
||||
virtio_init(vdev, VIRTIO_ID_MEM, sizeof(struct virtio_mem_config));
|
||||
vmem->vq = virtio_add_queue(vdev, 128, virtio_mem_handle_request);
|
||||
|
||||
/*
|
||||
* With "dynamic-memslots=off" (old behavior) we always map the whole
|
||||
* RAM memory region directly.
|
||||
*/
|
||||
if (vmem->dynamic_memslots) {
|
||||
if (!vmem->mr) {
|
||||
virtio_mem_prepare_mr(vmem);
|
||||
}
|
||||
if (vmem->nb_memslots <= 1) {
|
||||
vmem->nb_memslots = 1;
|
||||
vmem->memslot_size = memory_region_size(&vmem->memdev->mr);
|
||||
}
|
||||
if (!vmem->memslots) {
|
||||
virtio_mem_prepare_memslots(vmem);
|
||||
}
|
||||
} else {
|
||||
assert(!vmem->mr && !vmem->nb_memslots && !vmem->memslots);
|
||||
}
|
||||
|
||||
host_memory_backend_set_mapped(vmem->memdev, true);
|
||||
vmstate_register_ram(&vmem->memdev->mr, DEVICE(vmem));
|
||||
if (vmem->early_migration) {
|
||||
@ -969,7 +1155,7 @@ static void virtio_mem_device_unrealize(DeviceState *dev)
|
||||
ram_block_coordinated_discard_require(false);
|
||||
}
|
||||
|
||||
static int virtio_mem_discard_range_cb(const VirtIOMEM *vmem, void *arg,
|
||||
static int virtio_mem_discard_range_cb(VirtIOMEM *vmem, void *arg,
|
||||
uint64_t offset, uint64_t size)
|
||||
{
|
||||
RAMBlock *rb = vmem->memdev->mr.ram_block;
|
||||
@ -984,12 +1170,31 @@ static int virtio_mem_restore_unplugged(VirtIOMEM *vmem)
|
||||
virtio_mem_discard_range_cb);
|
||||
}
|
||||
|
||||
static int virtio_mem_post_load(void *opaque, int version_id)
|
||||
static int virtio_mem_activate_memslot_range_cb(VirtIOMEM *vmem, void *arg,
|
||||
uint64_t offset, uint64_t size)
|
||||
{
|
||||
virtio_mem_activate_memslots_to_plug(vmem, offset, size);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int virtio_mem_post_load_bitmap(VirtIOMEM *vmem)
|
||||
{
|
||||
VirtIOMEM *vmem = VIRTIO_MEM(opaque);
|
||||
RamDiscardListener *rdl;
|
||||
int ret;
|
||||
|
||||
/*
|
||||
* We restored the bitmap and updated the requested size; activate all
|
||||
* memslots (so listeners register) before notifying about plugged blocks.
|
||||
*/
|
||||
if (vmem->dynamic_memslots) {
|
||||
/*
|
||||
* We don't expect any active memslots at this point to deactivate: no
|
||||
* memory was plugged on the migration destination.
|
||||
*/
|
||||
virtio_mem_for_each_plugged_range(vmem, NULL,
|
||||
virtio_mem_activate_memslot_range_cb);
|
||||
}
|
||||
|
||||
/*
|
||||
* We started out with all memory discarded and our memory region is mapped
|
||||
* into an address space. Replay, now that we updated the bitmap.
|
||||
@ -1001,6 +1206,20 @@ static int virtio_mem_post_load(void *opaque, int version_id)
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int virtio_mem_post_load(void *opaque, int version_id)
|
||||
{
|
||||
VirtIOMEM *vmem = VIRTIO_MEM(opaque);
|
||||
int ret;
|
||||
|
||||
if (!vmem->early_migration) {
|
||||
ret = virtio_mem_post_load_bitmap(vmem);
|
||||
if (ret) {
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* If shared RAM is migrated using the file content and not using QEMU,
|
||||
@ -1021,7 +1240,7 @@ static int virtio_mem_post_load(void *opaque, int version_id)
|
||||
return virtio_mem_restore_unplugged(vmem);
|
||||
}
|
||||
|
||||
static int virtio_mem_prealloc_range_cb(const VirtIOMEM *vmem, void *arg,
|
||||
static int virtio_mem_prealloc_range_cb(VirtIOMEM *vmem, void *arg,
|
||||
uint64_t offset, uint64_t size)
|
||||
{
|
||||
void *area = memory_region_get_ram_ptr(&vmem->memdev->mr) + offset;
|
||||
@ -1043,7 +1262,7 @@ static int virtio_mem_post_load_early(void *opaque, int version_id)
|
||||
int ret;
|
||||
|
||||
if (!vmem->prealloc) {
|
||||
return 0;
|
||||
goto post_load_bitmap;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1051,7 +1270,7 @@ static int virtio_mem_post_load_early(void *opaque, int version_id)
|
||||
* don't mess with preallocation and postcopy.
|
||||
*/
|
||||
if (migrate_ram_is_ignored(rb)) {
|
||||
return 0;
|
||||
goto post_load_bitmap;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1084,7 +1303,10 @@ static int virtio_mem_post_load_early(void *opaque, int version_id)
|
||||
return -EBUSY;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
|
||||
post_load_bitmap:
|
||||
/* Finally, update any other state to be consistent with the new bitmap. */
|
||||
return virtio_mem_post_load_bitmap(vmem);
|
||||
}
|
||||
|
||||
typedef struct VirtIOMEMMigSanityChecks {
|
||||
@ -1235,11 +1457,79 @@ static MemoryRegion *virtio_mem_get_memory_region(VirtIOMEM *vmem, Error **errp)
|
||||
if (!vmem->memdev) {
|
||||
error_setg(errp, "'%s' property must be set", VIRTIO_MEM_MEMDEV_PROP);
|
||||
return NULL;
|
||||
} else if (vmem->dynamic_memslots) {
|
||||
if (!vmem->mr) {
|
||||
virtio_mem_prepare_mr(vmem);
|
||||
}
|
||||
return vmem->mr;
|
||||
}
|
||||
|
||||
return &vmem->memdev->mr;
|
||||
}
|
||||
|
||||
static void virtio_mem_decide_memslots(VirtIOMEM *vmem, unsigned int limit)
|
||||
{
|
||||
uint64_t region_size, memslot_size, min_memslot_size;
|
||||
unsigned int memslots;
|
||||
RAMBlock *rb;
|
||||
|
||||
if (!vmem->dynamic_memslots) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* We're called exactly once, before realizing the device. */
|
||||
assert(!vmem->nb_memslots);
|
||||
|
||||
/* If realizing the device will fail, just assume a single memslot. */
|
||||
if (limit <= 1 || !vmem->memdev || !vmem->memdev->mr.ram_block) {
|
||||
vmem->nb_memslots = 1;
|
||||
return;
|
||||
}
|
||||
|
||||
rb = vmem->memdev->mr.ram_block;
|
||||
region_size = memory_region_size(&vmem->memdev->mr);
|
||||
|
||||
/*
|
||||
* Determine the default block size now, to determine the minimum memslot
|
||||
* size. We want the minimum slot size to be at least the device block size.
|
||||
*/
|
||||
if (!vmem->block_size) {
|
||||
vmem->block_size = virtio_mem_default_block_size(rb);
|
||||
}
|
||||
/* If realizing the device will fail, just assume a single memslot. */
|
||||
if (vmem->block_size < qemu_ram_pagesize(rb) ||
|
||||
!QEMU_IS_ALIGNED(region_size, vmem->block_size)) {
|
||||
vmem->nb_memslots = 1;
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* All memslots except the last one have a reasonable minimum size, and
|
||||
* and all memslot sizes are aligned to the device block size.
|
||||
*/
|
||||
memslot_size = QEMU_ALIGN_UP(region_size / limit, vmem->block_size);
|
||||
min_memslot_size = MAX(vmem->block_size, VIRTIO_MEM_MIN_MEMSLOT_SIZE);
|
||||
memslot_size = MAX(memslot_size, min_memslot_size);
|
||||
|
||||
memslots = QEMU_ALIGN_UP(region_size, memslot_size) / memslot_size;
|
||||
if (memslots != 1) {
|
||||
vmem->memslot_size = memslot_size;
|
||||
}
|
||||
vmem->nb_memslots = memslots;
|
||||
}
|
||||
|
||||
static unsigned int virtio_mem_get_memslots(VirtIOMEM *vmem)
|
||||
{
|
||||
if (!vmem->dynamic_memslots) {
|
||||
/* Exactly one static RAM memory region. */
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* We're called after instructed to make a decision. */
|
||||
g_assert(vmem->nb_memslots);
|
||||
return vmem->nb_memslots;
|
||||
}
|
||||
|
||||
static void virtio_mem_add_size_change_notifier(VirtIOMEM *vmem,
|
||||
Notifier *notifier)
|
||||
{
|
||||
@ -1377,6 +1667,21 @@ static void virtio_mem_instance_init(Object *obj)
|
||||
NULL, NULL);
|
||||
}
|
||||
|
||||
static void virtio_mem_instance_finalize(Object *obj)
|
||||
{
|
||||
VirtIOMEM *vmem = VIRTIO_MEM(obj);
|
||||
|
||||
/*
|
||||
* Note: the core already dropped the references on all memory regions
|
||||
* (it's passed as the owner to memory_region_init_*()) and finalized
|
||||
* these objects. We can simply free the memory.
|
||||
*/
|
||||
g_free(vmem->memslots);
|
||||
vmem->memslots = NULL;
|
||||
g_free(vmem->mr);
|
||||
vmem->mr = NULL;
|
||||
}
|
||||
|
||||
static Property virtio_mem_properties[] = {
|
||||
DEFINE_PROP_UINT64(VIRTIO_MEM_ADDR_PROP, VirtIOMEM, addr, 0),
|
||||
DEFINE_PROP_UINT32(VIRTIO_MEM_NODE_PROP, VirtIOMEM, node, 0),
|
||||
@ -1389,6 +1694,8 @@ static Property virtio_mem_properties[] = {
|
||||
#endif
|
||||
DEFINE_PROP_BOOL(VIRTIO_MEM_EARLY_MIGRATION_PROP, VirtIOMEM,
|
||||
early_migration, true),
|
||||
DEFINE_PROP_BOOL(VIRTIO_MEM_DYNAMIC_MEMSLOTS_PROP, VirtIOMEM,
|
||||
dynamic_memslots, false),
|
||||
DEFINE_PROP_END_OF_LIST(),
|
||||
};
|
||||
|
||||
@ -1556,6 +1863,8 @@ static void virtio_mem_class_init(ObjectClass *klass, void *data)
|
||||
|
||||
vmc->fill_device_info = virtio_mem_fill_device_info;
|
||||
vmc->get_memory_region = virtio_mem_get_memory_region;
|
||||
vmc->decide_memslots = virtio_mem_decide_memslots;
|
||||
vmc->get_memslots = virtio_mem_get_memslots;
|
||||
vmc->add_size_change_notifier = virtio_mem_add_size_change_notifier;
|
||||
vmc->remove_size_change_notifier = virtio_mem_remove_size_change_notifier;
|
||||
vmc->unplug_request_check = virtio_mem_unplug_request_check;
|
||||
@ -1573,6 +1882,7 @@ static const TypeInfo virtio_mem_info = {
|
||||
.parent = TYPE_VIRTIO_DEVICE,
|
||||
.instance_size = sizeof(VirtIOMEM),
|
||||
.instance_init = virtio_mem_instance_init,
|
||||
.instance_finalize = virtio_mem_instance_finalize,
|
||||
.class_init = virtio_mem_class_init,
|
||||
.class_size = sizeof(VirtIOMEMClass),
|
||||
.interfaces = (InterfaceInfo[]) {
|
||||
|
@ -83,6 +83,21 @@ void qemu_ram_remap(ram_addr_t addr, ram_addr_t length);
|
||||
ram_addr_t qemu_ram_addr_from_host(void *ptr);
|
||||
ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr);
|
||||
RAMBlock *qemu_ram_block_by_name(const char *name);
|
||||
|
||||
/*
|
||||
* Translates a host ptr back to a RAMBlock and an offset in that RAMBlock.
|
||||
*
|
||||
* @ptr: The host pointer to translate.
|
||||
* @round_offset: Whether to round the result offset down to a target page
|
||||
* @offset: Will be set to the offset within the returned RAMBlock.
|
||||
*
|
||||
* Returns: RAMBlock (or NULL if not found)
|
||||
*
|
||||
* By the time this function returns, the returned pointer is not protected
|
||||
* by RCU anymore. If the caller is not within an RCU critical section and
|
||||
* does not hold the iothread lock, it must have other means of protecting the
|
||||
* pointer, such as a reference to the memory region that owns the RAMBlock.
|
||||
*/
|
||||
RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
|
||||
ram_addr_t *offset);
|
||||
ram_addr_t qemu_ram_block_host_offset(RAMBlock *rb, void *host);
|
||||
|
@ -95,6 +95,7 @@ struct ReservedRegion {
|
||||
* relative to the region's address space
|
||||
* @readonly: writes to this section are ignored
|
||||
* @nonvolatile: this section is non-volatile
|
||||
* @unmergeable: this section should not get merged with adjacent sections
|
||||
*/
|
||||
struct MemoryRegionSection {
|
||||
Int128 size;
|
||||
@ -104,6 +105,7 @@ struct MemoryRegionSection {
|
||||
hwaddr offset_within_address_space;
|
||||
bool readonly;
|
||||
bool nonvolatile;
|
||||
bool unmergeable;
|
||||
};
|
||||
|
||||
typedef struct IOMMUTLBEntry IOMMUTLBEntry;
|
||||
@ -599,8 +601,9 @@ typedef void (*ReplayRamDiscard)(MemoryRegionSection *section, void *opaque);
|
||||
* populated (consuming memory), to be used/accessed by the VM.
|
||||
*
|
||||
* A #RamDiscardManager can only be set for a RAM #MemoryRegion while the
|
||||
* #MemoryRegion isn't mapped yet; it cannot change while the #MemoryRegion is
|
||||
* mapped.
|
||||
* #MemoryRegion isn't mapped into an address space yet (either directly
|
||||
* or via an alias); it cannot change while the #MemoryRegion is
|
||||
* mapped into an address space.
|
||||
*
|
||||
* The #RamDiscardManager is intended to be used by technologies that are
|
||||
* incompatible with discarding of RAM (e.g., VFIO, which may pin all
|
||||
@ -772,6 +775,7 @@ struct MemoryRegion {
|
||||
bool nonvolatile;
|
||||
bool rom_device;
|
||||
bool flush_coalesced_mmio;
|
||||
bool unmergeable;
|
||||
uint8_t dirty_log_mask;
|
||||
bool is_iommu;
|
||||
RAMBlock *ram_block;
|
||||
@ -2350,6 +2354,25 @@ void memory_region_set_size(MemoryRegion *mr, uint64_t size);
|
||||
void memory_region_set_alias_offset(MemoryRegion *mr,
|
||||
hwaddr offset);
|
||||
|
||||
/*
|
||||
* memory_region_set_unmergeable: Set a memory region unmergeable
|
||||
*
|
||||
* Mark a memory region unmergeable, resulting in the memory region (or
|
||||
* everything contained in a memory region container) not getting merged when
|
||||
* simplifying the address space and notifying memory listeners. Consequently,
|
||||
* memory listeners will never get notified about ranges that are larger than
|
||||
* the original memory regions.
|
||||
*
|
||||
* This is primarily useful when multiple aliases to a RAM memory region are
|
||||
* mapped into a memory region container, and updates (e.g., enable/disable or
|
||||
* map/unmap) of individual memory region aliases are not supposed to affect
|
||||
* other memory regions in the same container.
|
||||
*
|
||||
* @mr: the #MemoryRegion to be updated
|
||||
* @unmergeable: whether to mark the #MemoryRegion unmergeable
|
||||
*/
|
||||
void memory_region_set_unmergeable(MemoryRegion *mr, bool unmergeable);
|
||||
|
||||
/**
|
||||
* memory_region_present: checks if an address relative to a @container
|
||||
* translates into #MemoryRegion within @container
|
||||
|
@ -297,15 +297,27 @@ struct MachineClass {
|
||||
* DeviceMemoryState:
|
||||
* @base: address in guest physical address space where the memory
|
||||
* address space for memory devices starts
|
||||
* @mr: address space container for memory devices
|
||||
* @mr: memory region container for memory devices
|
||||
* @as: address space for memory devices
|
||||
* @listener: memory listener used to track used memslots in the address space
|
||||
* @dimm_size: the sum of plugged DIMMs' sizes
|
||||
* @used_region_size: the part of @mr already used by memory devices
|
||||
* @required_memslots: the number of memslots required by memory devices
|
||||
* @used_memslots: the number of memslots currently used by memory devices
|
||||
* @memslot_auto_decision_active: whether any plugged memory device
|
||||
* automatically decided to use more than
|
||||
* one memslot
|
||||
*/
|
||||
typedef struct DeviceMemoryState {
|
||||
hwaddr base;
|
||||
MemoryRegion mr;
|
||||
AddressSpace as;
|
||||
MemoryListener listener;
|
||||
uint64_t dimm_size;
|
||||
uint64_t used_region_size;
|
||||
unsigned int required_memslots;
|
||||
unsigned int used_memslots;
|
||||
unsigned int memslot_auto_decision_active;
|
||||
} DeviceMemoryState;
|
||||
|
||||
/**
|
||||
|
@ -14,6 +14,7 @@
|
||||
#define MEMORY_DEVICE_H
|
||||
|
||||
#include "hw/qdev-core.h"
|
||||
#include "qemu/typedefs.h"
|
||||
#include "qapi/qapi-types-machine.h"
|
||||
#include "qom/object.h"
|
||||
|
||||
@ -41,6 +42,17 @@ typedef struct MemoryDeviceState MemoryDeviceState;
|
||||
* successive memory regions are used, a covering memory region has to
|
||||
* be provided. Scattered memory regions are not supported for single
|
||||
* devices.
|
||||
*
|
||||
* The device memory region returned via @get_memory_region may either be a
|
||||
* single RAM memory region or a memory region container with subregions
|
||||
* that are RAM memory regions or aliases to RAM memory regions. Other
|
||||
* memory regions or subregions are not supported.
|
||||
*
|
||||
* If the device memory region returned via @get_memory_region is a
|
||||
* memory region container, it's supported to dynamically (un)map subregions
|
||||
* as long as the number of memslots returned by @get_memslots() won't
|
||||
* be exceeded and as long as all memory regions are of the same kind (e.g.,
|
||||
* all RAM or all ROM).
|
||||
*/
|
||||
struct MemoryDeviceClass {
|
||||
/* private */
|
||||
@ -88,6 +100,28 @@ struct MemoryDeviceClass {
|
||||
*/
|
||||
MemoryRegion *(*get_memory_region)(MemoryDeviceState *md, Error **errp);
|
||||
|
||||
/*
|
||||
* Optional: Instruct the memory device to decide how many memory slots
|
||||
* it requires, not exceeding the given limit.
|
||||
*
|
||||
* Called exactly once when pre-plugging the memory device, before
|
||||
* querying the number of memslots using @get_memslots the first time.
|
||||
*/
|
||||
void (*decide_memslots)(MemoryDeviceState *md, unsigned int limit);
|
||||
|
||||
/*
|
||||
* Optional for memory devices that require only a single memslot,
|
||||
* required for all other memory devices: Return the number of memslots
|
||||
* (distinct RAM memory regions in the device memory region) that are
|
||||
* required by the device.
|
||||
*
|
||||
* If this function is not implemented, the assumption is "1".
|
||||
*
|
||||
* Called when (un)plugging the memory device, to check if the requirements
|
||||
* can be satisfied, and to do proper accounting.
|
||||
*/
|
||||
unsigned int (*get_memslots)(MemoryDeviceState *md);
|
||||
|
||||
/*
|
||||
* Optional: Return the desired minimum alignment of the device in guest
|
||||
* physical address space. The final alignment is computed based on this
|
||||
@ -105,8 +139,31 @@ struct MemoryDeviceClass {
|
||||
MemoryDeviceInfo *info);
|
||||
};
|
||||
|
||||
/*
|
||||
* Traditionally, KVM/vhost in many setups supported 509 memslots, whereby
|
||||
* 253 memslots were "reserved" for boot memory and other devices (such
|
||||
* as PCI BARs, which can get mapped dynamically) and 256 memslots were
|
||||
* dedicated for DIMMs. These magic numbers worked reliably in the past.
|
||||
*
|
||||
* Further, using many memslots can negatively affect performance, so setting
|
||||
* the soft-limit of memslots used by memory devices to the traditional
|
||||
* DIMM limit of 256 sounds reasonable.
|
||||
*
|
||||
* If we have less than 509 memslots, we will instruct memory devices that
|
||||
* support automatically deciding how many memslots to use to only use a single
|
||||
* one.
|
||||
*
|
||||
* Hotplugging vhost devices with at least 509 memslots is not expected to
|
||||
* cause problems, not even when devices automatically decided how many memslots
|
||||
* to use.
|
||||
*/
|
||||
#define MEMORY_DEVICES_SOFT_MEMSLOT_LIMIT 256
|
||||
#define MEMORY_DEVICES_SAFE_MAX_MEMSLOTS 509
|
||||
|
||||
MemoryDeviceInfoList *qmp_memory_device_list(void);
|
||||
uint64_t get_plugged_memory_size(void);
|
||||
unsigned int memory_devices_get_reserved_memslots(void);
|
||||
bool memory_devices_memslot_auto_decision_active(void);
|
||||
void memory_device_pre_plug(MemoryDeviceState *md, MachineState *ms,
|
||||
const uint64_t *legacy_align, Error **errp);
|
||||
void memory_device_plug(MemoryDeviceState *md, MachineState *ms);
|
||||
|
@ -86,9 +86,6 @@ typedef int (*vhost_set_vring_enable_op)(struct vhost_dev *dev,
|
||||
typedef bool (*vhost_requires_shm_log_op)(struct vhost_dev *dev);
|
||||
typedef int (*vhost_migration_done_op)(struct vhost_dev *dev,
|
||||
char *mac_addr);
|
||||
typedef bool (*vhost_backend_can_merge_op)(struct vhost_dev *dev,
|
||||
uint64_t start1, uint64_t size1,
|
||||
uint64_t start2, uint64_t size2);
|
||||
typedef int (*vhost_vsock_set_guest_cid_op)(struct vhost_dev *dev,
|
||||
uint64_t guest_cid);
|
||||
typedef int (*vhost_vsock_set_running_op)(struct vhost_dev *dev, int start);
|
||||
@ -108,8 +105,7 @@ typedef int (*vhost_crypto_create_session_op)(struct vhost_dev *dev,
|
||||
typedef int (*vhost_crypto_close_session_op)(struct vhost_dev *dev,
|
||||
uint64_t session_id);
|
||||
|
||||
typedef bool (*vhost_backend_mem_section_filter_op)(struct vhost_dev *dev,
|
||||
MemoryRegionSection *section);
|
||||
typedef bool (*vhost_backend_no_private_memslots_op)(struct vhost_dev *dev);
|
||||
|
||||
typedef int (*vhost_get_inflight_fd_op)(struct vhost_dev *dev,
|
||||
uint16_t queue_size,
|
||||
@ -138,6 +134,7 @@ typedef struct VhostOps {
|
||||
vhost_backend_init vhost_backend_init;
|
||||
vhost_backend_cleanup vhost_backend_cleanup;
|
||||
vhost_backend_memslots_limit vhost_backend_memslots_limit;
|
||||
vhost_backend_no_private_memslots_op vhost_backend_no_private_memslots;
|
||||
vhost_net_set_backend_op vhost_net_set_backend;
|
||||
vhost_net_set_mtu_op vhost_net_set_mtu;
|
||||
vhost_scsi_set_endpoint_op vhost_scsi_set_endpoint;
|
||||
@ -163,7 +160,6 @@ typedef struct VhostOps {
|
||||
vhost_set_vring_enable_op vhost_set_vring_enable;
|
||||
vhost_requires_shm_log_op vhost_requires_shm_log;
|
||||
vhost_migration_done_op vhost_migration_done;
|
||||
vhost_backend_can_merge_op vhost_backend_can_merge;
|
||||
vhost_vsock_set_guest_cid_op vhost_vsock_set_guest_cid;
|
||||
vhost_vsock_set_running_op vhost_vsock_set_running;
|
||||
vhost_set_iotlb_callback_op vhost_set_iotlb_callback;
|
||||
@ -172,7 +168,6 @@ typedef struct VhostOps {
|
||||
vhost_set_config_op vhost_set_config;
|
||||
vhost_crypto_create_session_op vhost_crypto_create_session;
|
||||
vhost_crypto_close_session_op vhost_crypto_close_session;
|
||||
vhost_backend_mem_section_filter_op vhost_backend_mem_section_filter;
|
||||
vhost_get_inflight_fd_op vhost_get_inflight_fd;
|
||||
vhost_set_inflight_fd_op vhost_set_inflight_fd;
|
||||
vhost_dev_start_op vhost_dev_start;
|
||||
|
@ -315,7 +315,8 @@ uint64_t vhost_get_features(struct vhost_dev *hdev, const int *feature_bits,
|
||||
*/
|
||||
void vhost_ack_features(struct vhost_dev *hdev, const int *feature_bits,
|
||||
uint64_t features);
|
||||
bool vhost_has_free_slot(void);
|
||||
unsigned int vhost_get_max_memslots(void);
|
||||
unsigned int vhost_get_free_memslots(void);
|
||||
|
||||
int vhost_net_set_backend(struct vhost_dev *hdev,
|
||||
struct vhost_vring_file *file);
|
||||
|
@ -33,6 +33,7 @@ OBJECT_DECLARE_TYPE(VirtIOMEM, VirtIOMEMClass,
|
||||
#define VIRTIO_MEM_UNPLUGGED_INACCESSIBLE_PROP "unplugged-inaccessible"
|
||||
#define VIRTIO_MEM_EARLY_MIGRATION_PROP "x-early-migration"
|
||||
#define VIRTIO_MEM_PREALLOC_PROP "prealloc"
|
||||
#define VIRTIO_MEM_DYNAMIC_MEMSLOTS_PROP "dynamic-memslots"
|
||||
|
||||
struct VirtIOMEM {
|
||||
VirtIODevice parent_obj;
|
||||
@ -44,7 +45,28 @@ struct VirtIOMEM {
|
||||
int32_t bitmap_size;
|
||||
unsigned long *bitmap;
|
||||
|
||||
/* assigned memory backend and memory region */
|
||||
/*
|
||||
* With "dynamic-memslots=on": Device memory region in which we dynamically
|
||||
* map the memslots.
|
||||
*/
|
||||
MemoryRegion *mr;
|
||||
|
||||
/*
|
||||
* With "dynamic-memslots=on": The individual memslots (aliases into the
|
||||
* memory backend).
|
||||
*/
|
||||
MemoryRegion *memslots;
|
||||
|
||||
/* With "dynamic-memslots=on": The total number of memslots. */
|
||||
uint16_t nb_memslots;
|
||||
|
||||
/*
|
||||
* With "dynamic-memslots=on": Size of one memslot (the size of the
|
||||
* last one can differ).
|
||||
*/
|
||||
uint64_t memslot_size;
|
||||
|
||||
/* Assigned memory backend with the RAM memory region. */
|
||||
HostMemoryBackend *memdev;
|
||||
|
||||
/* NUMA node */
|
||||
@ -82,6 +104,12 @@ struct VirtIOMEM {
|
||||
*/
|
||||
bool early_migration;
|
||||
|
||||
/*
|
||||
* Whether we dynamically map (multiple, if possible) memslots instead of
|
||||
* statically mapping the whole RAM memory region.
|
||||
*/
|
||||
bool dynamic_memslots;
|
||||
|
||||
/* notifiers to notify when "size" changes */
|
||||
NotifierList size_change_notifiers;
|
||||
|
||||
@ -96,6 +124,8 @@ struct VirtIOMEMClass {
|
||||
/* public */
|
||||
void (*fill_device_info)(const VirtIOMEM *vmen, VirtioMEMDeviceInfo *vi);
|
||||
MemoryRegion *(*get_memory_region)(VirtIOMEM *vmem, Error **errp);
|
||||
void (*decide_memslots)(VirtIOMEM *vmem, unsigned int limit);
|
||||
unsigned int (*get_memslots)(VirtIOMEM *vmem);
|
||||
void (*add_size_change_notifier)(VirtIOMEM *vmem, Notifier *notifier);
|
||||
void (*remove_size_change_notifier)(VirtIOMEM *vmem, Notifier *notifier);
|
||||
void (*unplug_request_check)(VirtIOMEM *vmem, Error **errp);
|
||||
|
@ -215,7 +215,8 @@ typedef struct KVMRouteChange {
|
||||
|
||||
/* external API */
|
||||
|
||||
bool kvm_has_free_slot(MachineState *ms);
|
||||
unsigned int kvm_get_max_memslots(void);
|
||||
unsigned int kvm_get_free_memslots(void);
|
||||
bool kvm_has_sync_mmu(void);
|
||||
int kvm_has_vcpu_events(void);
|
||||
int kvm_has_robust_singlestep(void);
|
||||
@ -552,7 +553,6 @@ int kvm_set_one_reg(CPUState *cs, uint64_t id, void *source);
|
||||
*/
|
||||
int kvm_get_one_reg(CPUState *cs, uint64_t id, void *target);
|
||||
struct ppc_radix_page_info *kvm_get_radix_page_info(void);
|
||||
int kvm_get_max_memslots(void);
|
||||
|
||||
/* Notify resamplefd for EOI of specific interrupts. */
|
||||
void kvm_resample_fd_notify(int gsi);
|
||||
|
@ -40,6 +40,7 @@ typedef struct KVMMemoryUpdate {
|
||||
typedef struct KVMMemoryListener {
|
||||
MemoryListener listener;
|
||||
KVMSlot *slots;
|
||||
unsigned int nr_used_slots;
|
||||
int as_id;
|
||||
QSIMPLEQ_HEAD(, KVMMemoryUpdate) transaction_add;
|
||||
QSIMPLEQ_HEAD(, KVMMemoryUpdate) transaction_del;
|
||||
|
@ -10,3 +10,13 @@ uint64_t get_plugged_memory_size(void)
|
||||
{
|
||||
return (uint64_t)-1;
|
||||
}
|
||||
|
||||
unsigned int memory_devices_get_reserved_memslots(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool memory_devices_memslot_auto_decision_active(void)
|
||||
{
|
||||
return false;
|
||||
}
|
@ -32,7 +32,7 @@ stub_ss.add(files('monitor.c'))
|
||||
stub_ss.add(files('monitor-core.c'))
|
||||
stub_ss.add(files('physmem.c'))
|
||||
stub_ss.add(files('qemu-timer-notify-cb.c'))
|
||||
stub_ss.add(files('qmp_memory_device.c'))
|
||||
stub_ss.add(files('memory_device.c'))
|
||||
stub_ss.add(files('qmp-command-available.c'))
|
||||
stub_ss.add(files('qmp-quit.c'))
|
||||
stub_ss.add(files('qtest.c'))
|
||||
|
@ -224,6 +224,7 @@ struct FlatRange {
|
||||
bool romd_mode;
|
||||
bool readonly;
|
||||
bool nonvolatile;
|
||||
bool unmergeable;
|
||||
};
|
||||
|
||||
#define FOR_EACH_FLAT_RANGE(var, view) \
|
||||
@ -240,6 +241,7 @@ section_from_flat_range(FlatRange *fr, FlatView *fv)
|
||||
.offset_within_address_space = int128_get64(fr->addr.start),
|
||||
.readonly = fr->readonly,
|
||||
.nonvolatile = fr->nonvolatile,
|
||||
.unmergeable = fr->unmergeable,
|
||||
};
|
||||
}
|
||||
|
||||
@ -250,7 +252,8 @@ static bool flatrange_equal(FlatRange *a, FlatRange *b)
|
||||
&& a->offset_in_region == b->offset_in_region
|
||||
&& a->romd_mode == b->romd_mode
|
||||
&& a->readonly == b->readonly
|
||||
&& a->nonvolatile == b->nonvolatile;
|
||||
&& a->nonvolatile == b->nonvolatile
|
||||
&& a->unmergeable == b->unmergeable;
|
||||
}
|
||||
|
||||
static FlatView *flatview_new(MemoryRegion *mr_root)
|
||||
@ -323,7 +326,8 @@ static bool can_merge(FlatRange *r1, FlatRange *r2)
|
||||
&& r1->dirty_log_mask == r2->dirty_log_mask
|
||||
&& r1->romd_mode == r2->romd_mode
|
||||
&& r1->readonly == r2->readonly
|
||||
&& r1->nonvolatile == r2->nonvolatile;
|
||||
&& r1->nonvolatile == r2->nonvolatile
|
||||
&& !r1->unmergeable && !r2->unmergeable;
|
||||
}
|
||||
|
||||
/* Attempt to simplify a view by merging adjacent ranges */
|
||||
@ -599,7 +603,8 @@ static void render_memory_region(FlatView *view,
|
||||
Int128 base,
|
||||
AddrRange clip,
|
||||
bool readonly,
|
||||
bool nonvolatile)
|
||||
bool nonvolatile,
|
||||
bool unmergeable)
|
||||
{
|
||||
MemoryRegion *subregion;
|
||||
unsigned i;
|
||||
@ -616,6 +621,7 @@ static void render_memory_region(FlatView *view,
|
||||
int128_addto(&base, int128_make64(mr->addr));
|
||||
readonly |= mr->readonly;
|
||||
nonvolatile |= mr->nonvolatile;
|
||||
unmergeable |= mr->unmergeable;
|
||||
|
||||
tmp = addrrange_make(base, mr->size);
|
||||
|
||||
@ -629,14 +635,14 @@ static void render_memory_region(FlatView *view,
|
||||
int128_subfrom(&base, int128_make64(mr->alias->addr));
|
||||
int128_subfrom(&base, int128_make64(mr->alias_offset));
|
||||
render_memory_region(view, mr->alias, base, clip,
|
||||
readonly, nonvolatile);
|
||||
readonly, nonvolatile, unmergeable);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Render subregions in priority order. */
|
||||
QTAILQ_FOREACH(subregion, &mr->subregions, subregions_link) {
|
||||
render_memory_region(view, subregion, base, clip,
|
||||
readonly, nonvolatile);
|
||||
readonly, nonvolatile, unmergeable);
|
||||
}
|
||||
|
||||
if (!mr->terminates) {
|
||||
@ -652,6 +658,7 @@ static void render_memory_region(FlatView *view,
|
||||
fr.romd_mode = mr->romd_mode;
|
||||
fr.readonly = readonly;
|
||||
fr.nonvolatile = nonvolatile;
|
||||
fr.unmergeable = unmergeable;
|
||||
|
||||
/* Render the region itself into any gaps left by the current view. */
|
||||
for (i = 0; i < view->nr && int128_nz(remain); ++i) {
|
||||
@ -753,7 +760,7 @@ static FlatView *generate_memory_topology(MemoryRegion *mr)
|
||||
if (mr) {
|
||||
render_memory_region(view, mr, int128_zero(),
|
||||
addrrange_make(int128_zero(), int128_2_64()),
|
||||
false, false);
|
||||
false, false, false);
|
||||
}
|
||||
flatview_simplify(view);
|
||||
|
||||
@ -2085,7 +2092,7 @@ int memory_region_iommu_num_indexes(IOMMUMemoryRegion *iommu_mr)
|
||||
|
||||
RamDiscardManager *memory_region_get_ram_discard_manager(MemoryRegion *mr)
|
||||
{
|
||||
if (!memory_region_is_mapped(mr) || !memory_region_is_ram(mr)) {
|
||||
if (!memory_region_is_ram(mr)) {
|
||||
return NULL;
|
||||
}
|
||||
return mr->rdm;
|
||||
@ -2094,7 +2101,7 @@ RamDiscardManager *memory_region_get_ram_discard_manager(MemoryRegion *mr)
|
||||
void memory_region_set_ram_discard_manager(MemoryRegion *mr,
|
||||
RamDiscardManager *rdm)
|
||||
{
|
||||
g_assert(memory_region_is_ram(mr) && !memory_region_is_mapped(mr));
|
||||
g_assert(memory_region_is_ram(mr));
|
||||
g_assert(!rdm || !mr->rdm);
|
||||
mr->rdm = rdm;
|
||||
}
|
||||
@ -2755,6 +2762,18 @@ void memory_region_set_alias_offset(MemoryRegion *mr, hwaddr offset)
|
||||
memory_region_transaction_commit();
|
||||
}
|
||||
|
||||
void memory_region_set_unmergeable(MemoryRegion *mr, bool unmergeable)
|
||||
{
|
||||
if (unmergeable == mr->unmergeable) {
|
||||
return;
|
||||
}
|
||||
|
||||
memory_region_transaction_begin();
|
||||
mr->unmergeable = unmergeable;
|
||||
memory_region_update_pending |= mr->enabled;
|
||||
memory_region_transaction_commit();
|
||||
}
|
||||
|
||||
uint64_t memory_region_get_alignment(const MemoryRegion *mr)
|
||||
{
|
||||
return mr->align;
|
||||
|
@ -2221,23 +2221,6 @@ ram_addr_t qemu_ram_block_host_offset(RAMBlock *rb, void *host)
|
||||
return res;
|
||||
}
|
||||
|
||||
/*
|
||||
* Translates a host ptr back to a RAMBlock, a ram_addr and an offset
|
||||
* in that RAMBlock.
|
||||
*
|
||||
* ptr: Host pointer to look up
|
||||
* round_offset: If true round the result offset down to a page boundary
|
||||
* *ram_addr: set to result ram_addr
|
||||
* *offset: set to result offset within the RAMBlock
|
||||
*
|
||||
* Returns: RAMBlock (or NULL if not found)
|
||||
*
|
||||
* By the time this function returns, the returned pointer is not protected
|
||||
* by RCU anymore. If the caller is not within an RCU critical section and
|
||||
* does not hold the iothread lock, it must have other means of protecting the
|
||||
* pointer, such as a reference to the region that includes the incoming
|
||||
* ram_addr_t.
|
||||
*/
|
||||
RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
|
||||
ram_addr_t *offset)
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user