virtio-mem: Probe THP size to determine default block size
Let's allow a minimum block size of 1 MiB in all configurations. Select the default block size based on - The page size of the memory backend. - The THP size if the memory backend size corresponds to the real host page size. - The global minimum of 1 MiB. and warn if something smaller is configured by the user. VIRTIO_MEM only supports Linux (depends on LINUX), so we can probe the THP size unconditionally. For now we only support virtio-mem on x86-64 - there isn't a user-visible change (x86-64 only supports 2 MiB THP on the PMD level) - the default was, and will be 2 MiB. If we ever have THP on the PUD level (e.g., 1 GiB THP on x86-64), we expect it to be more transparent - e.g., to only optimize fully populated ranges unless explicitly told /configured otherwise (in contrast to PMD THP). Reviewed-by: Pankaj Gupta <pankaj.gupta.linux@gmail.com> Cc: "Michael S. Tsirkin" <mst@redhat.com> Cc: Wei Yang <richardw.yang@linux.intel.com> Cc: Dr. David Alan Gilbert <dgilbert@redhat.com> Cc: Igor Mammedov <imammedo@redhat.com> Cc: Pankaj Gupta <pankaj.gupta.linux@gmail.com> Signed-off-by: David Hildenbrand <david@redhat.com> Message-Id: <20201008083029.9504-4-david@redhat.com> Reviewed-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
This commit is contained in:
parent
0aed280061
commit
228957fea3
@ -33,10 +33,83 @@
|
|||||||
#include "trace.h"
|
#include "trace.h"
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Use QEMU_VMALLOC_ALIGN, so no THP will have to be split when unplugging
|
* Let's not allow blocks smaller than 1 MiB, for example, to keep the tracking
|
||||||
* memory (e.g., 2MB on x86_64).
|
* bitmap small.
|
||||||
*/
|
*/
|
||||||
#define VIRTIO_MEM_MIN_BLOCK_SIZE ((uint32_t)QEMU_VMALLOC_ALIGN)
|
#define VIRTIO_MEM_MIN_BLOCK_SIZE ((uint32_t)(1 * MiB))
|
||||||
|
|
||||||
|
#if defined(__x86_64__) || defined(__arm__) || defined(__aarch64__) || \
|
||||||
|
defined(__powerpc64__)
|
||||||
|
#define VIRTIO_MEM_DEFAULT_THP_SIZE ((uint32_t)(2 * MiB))
|
||||||
|
#else
|
||||||
|
/* fallback to 1 MiB (e.g., the THP size on s390x) */
|
||||||
|
#define VIRTIO_MEM_DEFAULT_THP_SIZE VIRTIO_MEM_MIN_BLOCK_SIZE
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We want to have a reasonable default block size such that
|
||||||
|
* 1. We avoid splitting THPs when unplugging memory, which degrades
|
||||||
|
* performance.
|
||||||
|
* 2. We avoid placing THPs for plugged blocks that also cover unplugged
|
||||||
|
* blocks.
|
||||||
|
*
|
||||||
|
* The actual THP size might differ between Linux kernels, so we try to probe
|
||||||
|
* it. In the future (if we ever run into issues regarding 2.), we might want
|
||||||
|
* to disable THP in case we fail to properly probe the THP size, or if the
|
||||||
|
* block size is configured smaller than the THP size.
|
||||||
|
*/
|
||||||
|
static uint32_t thp_size;
|
||||||
|
|
||||||
|
#define HPAGE_PMD_SIZE_PATH "/sys/kernel/mm/transparent_hugepage/hpage_pmd_size"
|
||||||
|
static uint32_t virtio_mem_thp_size(void)
|
||||||
|
{
|
||||||
|
gchar *content = NULL;
|
||||||
|
const char *endptr;
|
||||||
|
uint64_t tmp;
|
||||||
|
|
||||||
|
if (thp_size) {
|
||||||
|
return thp_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Try to probe the actual THP size, fallback to (sane but eventually
|
||||||
|
* incorrect) default sizes.
|
||||||
|
*/
|
||||||
|
if (g_file_get_contents(HPAGE_PMD_SIZE_PATH, &content, NULL, NULL) &&
|
||||||
|
!qemu_strtou64(content, &endptr, 0, &tmp) &&
|
||||||
|
(!endptr || *endptr == '\n')) {
|
||||||
|
/*
|
||||||
|
* Sanity-check the value, if it's too big (e.g., aarch64 with 64k base
|
||||||
|
* pages) or weird, fallback to something smaller.
|
||||||
|
*/
|
||||||
|
if (!tmp || !is_power_of_2(tmp) || tmp > 16 * MiB) {
|
||||||
|
warn_report("Read unsupported THP size: %" PRIx64, tmp);
|
||||||
|
} else {
|
||||||
|
thp_size = tmp;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!thp_size) {
|
||||||
|
thp_size = VIRTIO_MEM_DEFAULT_THP_SIZE;
|
||||||
|
warn_report("Could not detect THP size, falling back to %" PRIx64
|
||||||
|
" MiB.", thp_size / MiB);
|
||||||
|
}
|
||||||
|
|
||||||
|
g_free(content);
|
||||||
|
return thp_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
static uint64_t virtio_mem_default_block_size(RAMBlock *rb)
|
||||||
|
{
|
||||||
|
const uint64_t page_size = qemu_ram_pagesize(rb);
|
||||||
|
|
||||||
|
/* We can have hugetlbfs with a page size smaller than the THP size. */
|
||||||
|
if (page_size == qemu_real_host_page_size) {
|
||||||
|
return MAX(page_size, virtio_mem_thp_size());
|
||||||
|
}
|
||||||
|
return MAX(page_size, VIRTIO_MEM_MIN_BLOCK_SIZE);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Size the usable region bigger than the requested size if possible. Esp.
|
* Size the usable region bigger than the requested size if possible. Esp.
|
||||||
* Linux guests will only add (aligned) memory blocks in case they fully
|
* Linux guests will only add (aligned) memory blocks in case they fully
|
||||||
@ -443,10 +516,23 @@ static void virtio_mem_device_realize(DeviceState *dev, Error **errp)
|
|||||||
rb = vmem->memdev->mr.ram_block;
|
rb = vmem->memdev->mr.ram_block;
|
||||||
page_size = qemu_ram_pagesize(rb);
|
page_size = qemu_ram_pagesize(rb);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If the block size wasn't configured by the user, use a sane default. This
|
||||||
|
* allows using hugetlbfs backends of any page size without manual
|
||||||
|
* intervention.
|
||||||
|
*/
|
||||||
|
if (!vmem->block_size) {
|
||||||
|
vmem->block_size = virtio_mem_default_block_size(rb);
|
||||||
|
}
|
||||||
|
|
||||||
if (vmem->block_size < page_size) {
|
if (vmem->block_size < page_size) {
|
||||||
error_setg(errp, "'%s' property has to be at least the page size (0x%"
|
error_setg(errp, "'%s' property has to be at least the page size (0x%"
|
||||||
PRIx64 ")", VIRTIO_MEM_BLOCK_SIZE_PROP, page_size);
|
PRIx64 ")", VIRTIO_MEM_BLOCK_SIZE_PROP, page_size);
|
||||||
return;
|
return;
|
||||||
|
} else if (vmem->block_size < virtio_mem_default_block_size(rb)) {
|
||||||
|
warn_report("'%s' property is smaller than the default block size (%"
|
||||||
|
PRIx64 " MiB)", VIRTIO_MEM_BLOCK_SIZE_PROP,
|
||||||
|
virtio_mem_default_block_size(rb) / MiB);
|
||||||
} else if (!QEMU_IS_ALIGNED(vmem->requested_size, vmem->block_size)) {
|
} else if (!QEMU_IS_ALIGNED(vmem->requested_size, vmem->block_size)) {
|
||||||
error_setg(errp, "'%s' property has to be multiples of '%s' (0x%" PRIx64
|
error_setg(errp, "'%s' property has to be multiples of '%s' (0x%" PRIx64
|
||||||
")", VIRTIO_MEM_REQUESTED_SIZE_PROP,
|
")", VIRTIO_MEM_REQUESTED_SIZE_PROP,
|
||||||
@ -742,6 +828,18 @@ static void virtio_mem_get_block_size(Object *obj, Visitor *v, const char *name,
|
|||||||
const VirtIOMEM *vmem = VIRTIO_MEM(obj);
|
const VirtIOMEM *vmem = VIRTIO_MEM(obj);
|
||||||
uint64_t value = vmem->block_size;
|
uint64_t value = vmem->block_size;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If not configured by the user (and we're not realized yet), use the
|
||||||
|
* default block size we would use with the current memory backend.
|
||||||
|
*/
|
||||||
|
if (!value) {
|
||||||
|
if (vmem->memdev && memory_region_is_ram(&vmem->memdev->mr)) {
|
||||||
|
value = virtio_mem_default_block_size(vmem->memdev->mr.ram_block);
|
||||||
|
} else {
|
||||||
|
value = virtio_mem_thp_size();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
visit_type_size(v, name, &value, errp);
|
visit_type_size(v, name, &value, errp);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -821,7 +919,6 @@ static void virtio_mem_instance_init(Object *obj)
|
|||||||
{
|
{
|
||||||
VirtIOMEM *vmem = VIRTIO_MEM(obj);
|
VirtIOMEM *vmem = VIRTIO_MEM(obj);
|
||||||
|
|
||||||
vmem->block_size = VIRTIO_MEM_MIN_BLOCK_SIZE;
|
|
||||||
notifier_list_init(&vmem->size_change_notifiers);
|
notifier_list_init(&vmem->size_change_notifiers);
|
||||||
vmem->precopy_notifier.notify = virtio_mem_precopy_notify;
|
vmem->precopy_notifier.notify = virtio_mem_precopy_notify;
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user