From 314aec4a6e06844937f1677f6cba21981005f389 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Thu, 14 Feb 2019 11:10:04 +0800 Subject: [PATCH 1/7] hostmem-file: reject invalid pmem file sizes Guests started with NVDIMMs larger than the underlying host file produce confusing errors inside the guest. This happens because the guest accesses pages beyond the end of the file. Check the pmem file size on startup and print a clear error message if the size is invalid. Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1669053 Cc: Wei Yang Cc: Zhang Yi Cc: Eduardo Habkost Cc: Igor Mammedov Signed-off-by: Stefan Hajnoczi Message-Id: <20190214031004.32522-3-stefanha@redhat.com> Reviewed-by: Wei Yang Reviewed-by: Igor Mammedov Reviewed-by: Pankaj Gupta Signed-off-by: Eduardo Habkost --- backends/hostmem-file.c | 23 ++++++++++++++++++ include/qemu/osdep.h | 13 ++++++++++ util/oslib-posix.c | 53 +++++++++++++++++++++++++++++++++++++++++ util/oslib-win32.c | 5 ++++ 4 files changed, 94 insertions(+) diff --git a/backends/hostmem-file.c b/backends/hostmem-file.c index ce54788048..37ac6445d2 100644 --- a/backends/hostmem-file.c +++ b/backends/hostmem-file.c @@ -56,6 +56,29 @@ file_backend_memory_alloc(HostMemoryBackend *backend, Error **errp) error_setg(errp, "mem-path property not set"); return; } + + /* + * Verify pmem file size since starting a guest with an incorrect size + * leads to confusing failures inside the guest. + */ + if (fb->is_pmem) { + Error *local_err = NULL; + uint64_t size; + + size = qemu_get_pmem_size(fb->mem_path, &local_err); + if (!size) { + error_propagate(errp, local_err); + return; + } + + if (backend->size > size) { + error_setg(errp, "size property %" PRIu64 " is larger than " + "pmem file \"%s\" size %" PRIu64, backend->size, + fb->mem_path, size); + return; + } + } + backend->force_prealloc = mem_prealloc; name = host_memory_backend_get_name(backend); memory_region_init_ram_from_file(&backend->mr, OBJECT(backend), diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h index 840af09cb0..303d315c5d 100644 --- a/include/qemu/osdep.h +++ b/include/qemu/osdep.h @@ -570,6 +570,19 @@ void qemu_set_tty_echo(int fd, bool echo); void os_mem_prealloc(int fd, char *area, size_t sz, int smp_cpus, Error **errp); +/** + * qemu_get_pmem_size: + * @filename: path to a pmem file + * @errp: pointer to a NULL-initialized error object + * + * Determine the size of a persistent memory file. Besides supporting files on + * DAX file systems, this function also supports Linux devdax character + * devices. + * + * Returns: the size or 0 on failure + */ +uint64_t qemu_get_pmem_size(const char *filename, Error **errp); + /** * qemu_get_pid_name: * @pid: pid of a process diff --git a/util/oslib-posix.c b/util/oslib-posix.c index 37c5854b9c..10d90d1783 100644 --- a/util/oslib-posix.c +++ b/util/oslib-posix.c @@ -500,6 +500,59 @@ void os_mem_prealloc(int fd, char *area, size_t memory, int smp_cpus, } } +uint64_t qemu_get_pmem_size(const char *filename, Error **errp) +{ + struct stat st; + + if (stat(filename, &st) < 0) { + error_setg(errp, "unable to stat pmem file \"%s\"", filename); + return 0; + } + +#if defined(__linux__) + /* Special handling for devdax character devices */ + if (S_ISCHR(st.st_mode)) { + char *subsystem_path = NULL; + char *subsystem = NULL; + char *size_path = NULL; + char *size_str = NULL; + uint64_t ret = 0; + + subsystem_path = g_strdup_printf("/sys/dev/char/%d:%d/subsystem", + major(st.st_rdev), minor(st.st_rdev)); + subsystem = g_file_read_link(subsystem_path, NULL); + if (!subsystem) { + error_setg(errp, "unable to read subsystem for pmem file \"%s\"", + filename); + goto devdax_err; + } + + if (!g_str_has_suffix(subsystem, "/dax")) { + error_setg(errp, "pmem file \"%s\" is not a dax device", filename); + goto devdax_err; + } + + size_path = g_strdup_printf("/sys/dev/char/%d:%d/size", + major(st.st_rdev), minor(st.st_rdev)); + if (!g_file_get_contents(size_path, &size_str, NULL, NULL)) { + error_setg(errp, "unable to read size for pmem file \"%s\"", + size_path); + goto devdax_err; + } + + ret = g_ascii_strtoull(size_str, NULL, 0); + +devdax_err: + g_free(size_str); + g_free(size_path); + g_free(subsystem); + g_free(subsystem_path); + return ret; + } +#endif /* defined(__linux__) */ + + return st.st_size; +} char *qemu_get_pid_name(pid_t pid) { diff --git a/util/oslib-win32.c b/util/oslib-win32.c index b4c17f5dfa..bd633afab6 100644 --- a/util/oslib-win32.c +++ b/util/oslib-win32.c @@ -560,6 +560,11 @@ void os_mem_prealloc(int fd, char *area, size_t memory, int smp_cpus, } } +uint64_t qemu_get_pmem_size(const char *filename, Error **errp) +{ + error_setg(errp, "pmem support not available"); + return 0; +} char *qemu_get_pid_name(pid_t pid) { From c1404bde9cb326a42327c892e25ecbfcce806a3d Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Fri, 8 Mar 2019 19:20:52 +0100 Subject: [PATCH 2/7] nvdimm: Rename AcpiNVDIMMState into NVDIMMState MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As we intend to migrate the acpi_nvdimm_state into the base machine with a new dimms_state name, let's also rename the datatype. Signed-off-by: Eric Auger Suggested-by: Igor Mammedov Reviewed-by: Philippe Mathieu-Daudé Message-Id: <20190308182053.5487-2-eric.auger@redhat.com> Reviewed-by: Igor Mammedov Signed-off-by: Eduardo Habkost --- hw/acpi/nvdimm.c | 18 +++++++++--------- hw/i386/pc.c | 2 +- include/hw/i386/pc.h | 2 +- include/hw/mem/nvdimm.h | 10 +++++----- 4 files changed, 16 insertions(+), 16 deletions(-) diff --git a/hw/acpi/nvdimm.c b/hw/acpi/nvdimm.c index e53b2cb681..f73cfb9d90 100644 --- a/hw/acpi/nvdimm.c +++ b/hw/acpi/nvdimm.c @@ -382,7 +382,7 @@ nvdimm_build_structure_caps(GArray *structures, uint32_t capabilities) nfit_caps->capabilities = cpu_to_le32(capabilities); } -static GArray *nvdimm_build_device_structure(AcpiNVDIMMState *state) +static GArray *nvdimm_build_device_structure(NVDIMMState *state) { GSList *device_list = nvdimm_get_device_list(); GArray *structures = g_array_new(false, true /* clear */, 1); @@ -416,7 +416,7 @@ static void nvdimm_init_fit_buffer(NvdimmFitBuffer *fit_buf) fit_buf->fit = g_array_new(false, true /* clear */, 1); } -static void nvdimm_build_fit_buffer(AcpiNVDIMMState *state) +static void nvdimm_build_fit_buffer(NVDIMMState *state) { NvdimmFitBuffer *fit_buf = &state->fit_buf; @@ -425,12 +425,12 @@ static void nvdimm_build_fit_buffer(AcpiNVDIMMState *state) fit_buf->dirty = true; } -void nvdimm_plug(AcpiNVDIMMState *state) +void nvdimm_plug(NVDIMMState *state) { nvdimm_build_fit_buffer(state); } -static void nvdimm_build_nfit(AcpiNVDIMMState *state, GArray *table_offsets, +static void nvdimm_build_nfit(NVDIMMState *state, GArray *table_offsets, GArray *table_data, BIOSLinker *linker) { NvdimmFitBuffer *fit_buf = &state->fit_buf; @@ -570,7 +570,7 @@ nvdimm_dsm_no_payload(uint32_t func_ret_status, hwaddr dsm_mem_addr) #define NVDIMM_QEMU_RSVD_HANDLE_ROOT 0x10000 /* Read FIT data, defined in docs/specs/acpi_nvdimm.txt. */ -static void nvdimm_dsm_func_read_fit(AcpiNVDIMMState *state, NvdimmDsmIn *in, +static void nvdimm_dsm_func_read_fit(NVDIMMState *state, NvdimmDsmIn *in, hwaddr dsm_mem_addr) { NvdimmFitBuffer *fit_buf = &state->fit_buf; @@ -619,7 +619,7 @@ exit: } static void -nvdimm_dsm_handle_reserved_root_method(AcpiNVDIMMState *state, +nvdimm_dsm_handle_reserved_root_method(NVDIMMState *state, NvdimmDsmIn *in, hwaddr dsm_mem_addr) { switch (in->function) { @@ -863,7 +863,7 @@ nvdimm_dsm_read(void *opaque, hwaddr addr, unsigned size) static void nvdimm_dsm_write(void *opaque, hwaddr addr, uint64_t val, unsigned size) { - AcpiNVDIMMState *state = opaque; + NVDIMMState *state = opaque; NvdimmDsmIn *in; hwaddr dsm_mem_addr = val; @@ -925,7 +925,7 @@ void nvdimm_acpi_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev) } } -void nvdimm_init_acpi_state(AcpiNVDIMMState *state, MemoryRegion *io, +void nvdimm_init_acpi_state(NVDIMMState *state, MemoryRegion *io, FWCfgState *fw_cfg, Object *owner) { memory_region_init_io(&state->io_mr, owner, &nvdimm_dsm_ops, state, @@ -1319,7 +1319,7 @@ static void nvdimm_build_ssdt(GArray *table_offsets, GArray *table_data, } void nvdimm_build_acpi(GArray *table_offsets, GArray *table_data, - BIOSLinker *linker, AcpiNVDIMMState *state, + BIOSLinker *linker, NVDIMMState *state, uint32_t ram_slots) { GSList *device_list; diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 42128183e9..0338dbe9da 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -2571,7 +2571,7 @@ static void pc_machine_set_nvdimm_persistence(Object *obj, const char *value, Error **errp) { PCMachineState *pcms = PC_MACHINE(obj); - AcpiNVDIMMState *nvdimm_state = &pcms->acpi_nvdimm_state; + NVDIMMState *nvdimm_state = &pcms->acpi_nvdimm_state; if (strcmp(value, "cpu") == 0) nvdimm_state->persistence = 3; diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index 54222a202d..94fb620d65 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -45,7 +45,7 @@ struct PCMachineState { OnOffAuto vmport; OnOffAuto smm; - AcpiNVDIMMState acpi_nvdimm_state; + NVDIMMState acpi_nvdimm_state; bool acpi_build_enabled; bool smbus_enabled; diff --git a/include/hw/mem/nvdimm.h b/include/hw/mem/nvdimm.h index c5c9b3c7f8..523a9b3d4a 100644 --- a/include/hw/mem/nvdimm.h +++ b/include/hw/mem/nvdimm.h @@ -123,7 +123,7 @@ struct NvdimmFitBuffer { }; typedef struct NvdimmFitBuffer NvdimmFitBuffer; -struct AcpiNVDIMMState { +struct NVDIMMState { /* detect if NVDIMM support is enabled. */ bool is_enabled; @@ -141,13 +141,13 @@ struct AcpiNVDIMMState { int32_t persistence; char *persistence_string; }; -typedef struct AcpiNVDIMMState AcpiNVDIMMState; +typedef struct NVDIMMState NVDIMMState; -void nvdimm_init_acpi_state(AcpiNVDIMMState *state, MemoryRegion *io, +void nvdimm_init_acpi_state(NVDIMMState *state, MemoryRegion *io, FWCfgState *fw_cfg, Object *owner); void nvdimm_build_acpi(GArray *table_offsets, GArray *table_data, - BIOSLinker *linker, AcpiNVDIMMState *state, + BIOSLinker *linker, NVDIMMState *state, uint32_t ram_slots); -void nvdimm_plug(AcpiNVDIMMState *state); +void nvdimm_plug(NVDIMMState *state); void nvdimm_acpi_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev); #endif From f6a0d06ba72a0c493e1e05872caf2f3781af316d Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Fri, 8 Mar 2019 19:20:53 +0100 Subject: [PATCH 3/7] machine: Move nvdimms state into struct MachineState As NVDIMM support is looming for ARM and SPAPR, let's move the acpi_nvdimm_state to the generic machine struct instead of duplicating the same code in several machines. It is also renamed into nvdimms_state and becomes a pointer. nvdimm and nvdimm-persistence become generic machine options. They become guarded by a nvdimm_supported machine class member. We also add a description for those options. Signed-off-by: Eric Auger Suggested-by: Igor Mammedov Message-Id: <20190308182053.5487-3-eric.auger@redhat.com> Reviewed-by: Igor Mammedov Signed-off-by: Eduardo Habkost --- hw/core/machine.c | 65 ++++++++++++++++++++++++++++++++++++++++++++ hw/i386/acpi-build.c | 6 ++-- hw/i386/pc.c | 57 ++++---------------------------------- hw/i386/pc_piix.c | 4 +-- hw/i386/pc_q35.c | 4 +-- include/hw/boards.h | 2 ++ include/hw/i386/pc.h | 4 --- 7 files changed, 79 insertions(+), 63 deletions(-) diff --git a/hw/core/machine.c b/hw/core/machine.c index 766ca5899d..743fef2898 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -22,6 +22,7 @@ #include "qemu/error-report.h" #include "sysemu/qtest.h" #include "hw/pci/pci.h" +#include "hw/mem/nvdimm.h" GlobalProperty hw_compat_3_1[] = { { "pcie-root-port", "x-speed", "2_5" }, @@ -481,6 +482,47 @@ static void machine_set_memory_encryption(Object *obj, const char *value, ms->memory_encryption = g_strdup(value); } +static bool machine_get_nvdimm(Object *obj, Error **errp) +{ + MachineState *ms = MACHINE(obj); + + return ms->nvdimms_state->is_enabled; +} + +static void machine_set_nvdimm(Object *obj, bool value, Error **errp) +{ + MachineState *ms = MACHINE(obj); + + ms->nvdimms_state->is_enabled = value; +} + +static char *machine_get_nvdimm_persistence(Object *obj, Error **errp) +{ + MachineState *ms = MACHINE(obj); + + return g_strdup(ms->nvdimms_state->persistence_string); +} + +static void machine_set_nvdimm_persistence(Object *obj, const char *value, + Error **errp) +{ + MachineState *ms = MACHINE(obj); + NVDIMMState *nvdimms_state = ms->nvdimms_state; + + if (strcmp(value, "cpu") == 0) { + nvdimms_state->persistence = 3; + } else if (strcmp(value, "mem-ctrl") == 0) { + nvdimms_state->persistence = 2; + } else { + error_setg(errp, "-machine nvdimm-persistence=%s: unsupported option", + value); + return; + } + + g_free(nvdimms_state->persistence_string); + nvdimms_state->persistence_string = g_strdup(value); +} + void machine_class_allow_dynamic_sysbus_dev(MachineClass *mc, const char *type) { strList *item = g_new0(strList, 1); @@ -791,6 +833,28 @@ static void machine_initfn(Object *obj) ms->mem_merge = true; ms->enable_graphics = true; + if (mc->nvdimm_supported) { + Object *obj = OBJECT(ms); + + ms->nvdimms_state = g_new0(NVDIMMState, 1); + object_property_add_bool(obj, "nvdimm", + machine_get_nvdimm, machine_set_nvdimm, + &error_abort); + object_property_set_description(obj, "nvdimm", + "Set on/off to enable/disable " + "NVDIMM instantiation", NULL); + + object_property_add_str(obj, "nvdimm-persistence", + machine_get_nvdimm_persistence, + machine_set_nvdimm_persistence, + &error_abort); + object_property_set_description(obj, "nvdimm-persistence", + "Set NVDIMM persistence" + "Valid values are cpu, mem-ctrl", + NULL); + } + + /* Register notifier when init is done for sysbus sanity checks */ ms->sysbus_notifier.notify = machine_init_notify; qemu_add_machine_init_done_notifier(&ms->sysbus_notifier); @@ -809,6 +873,7 @@ static void machine_finalize(Object *obj) g_free(ms->dt_compatible); g_free(ms->firmware); g_free(ms->device_memory); + g_free(ms->nvdimms_state); } bool machine_usb(MachineState *machine) diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index 9ecc96dcc7..416da318ae 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -1867,7 +1867,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, aml_append(scope, method); } - if (pcms->acpi_nvdimm_state.is_enabled) { + if (machine->nvdimms_state->is_enabled) { method = aml_method("_E04", 0, AML_NOTSERIALIZED); aml_append(method, aml_notify(aml_name("\\_SB.NVDR"), aml_int(0x80))); @@ -2704,9 +2704,9 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine) build_dmar_q35(tables_blob, tables->linker); } } - if (pcms->acpi_nvdimm_state.is_enabled) { + if (machine->nvdimms_state->is_enabled) { nvdimm_build_acpi(table_offsets, tables_blob, tables->linker, - &pcms->acpi_nvdimm_state, machine->ram_slots); + machine->nvdimms_state, machine->ram_slots); } /* Add tables supplied by user (if any) */ diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 0338dbe9da..71385cfac9 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -2069,6 +2069,7 @@ static void pc_memory_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, { const PCMachineState *pcms = PC_MACHINE(hotplug_dev); const PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); + const MachineState *ms = MACHINE(hotplug_dev); const bool is_nvdimm = object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM); const uint64_t legacy_align = TARGET_PAGE_SIZE; @@ -2083,7 +2084,7 @@ static void pc_memory_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, return; } - if (is_nvdimm && !pcms->acpi_nvdimm_state.is_enabled) { + if (is_nvdimm && !ms->nvdimms_state->is_enabled) { error_setg(errp, "nvdimm is not enabled: missing 'nvdimm' in '-M'"); return; } @@ -2097,6 +2098,7 @@ static void pc_memory_plug(HotplugHandler *hotplug_dev, { Error *local_err = NULL; PCMachineState *pcms = PC_MACHINE(hotplug_dev); + MachineState *ms = MACHINE(hotplug_dev); bool is_nvdimm = object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM); pc_dimm_plug(PC_DIMM(dev), MACHINE(pcms), &local_err); @@ -2105,7 +2107,7 @@ static void pc_memory_plug(HotplugHandler *hotplug_dev, } if (is_nvdimm) { - nvdimm_plug(&pcms->acpi_nvdimm_state); + nvdimm_plug(ms->nvdimms_state); } hotplug_handler_plug(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &error_abort); @@ -2546,47 +2548,6 @@ static void pc_machine_set_smm(Object *obj, Visitor *v, const char *name, visit_type_OnOffAuto(v, name, &pcms->smm, errp); } -static bool pc_machine_get_nvdimm(Object *obj, Error **errp) -{ - PCMachineState *pcms = PC_MACHINE(obj); - - return pcms->acpi_nvdimm_state.is_enabled; -} - -static void pc_machine_set_nvdimm(Object *obj, bool value, Error **errp) -{ - PCMachineState *pcms = PC_MACHINE(obj); - - pcms->acpi_nvdimm_state.is_enabled = value; -} - -static char *pc_machine_get_nvdimm_persistence(Object *obj, Error **errp) -{ - PCMachineState *pcms = PC_MACHINE(obj); - - return g_strdup(pcms->acpi_nvdimm_state.persistence_string); -} - -static void pc_machine_set_nvdimm_persistence(Object *obj, const char *value, - Error **errp) -{ - PCMachineState *pcms = PC_MACHINE(obj); - NVDIMMState *nvdimm_state = &pcms->acpi_nvdimm_state; - - if (strcmp(value, "cpu") == 0) - nvdimm_state->persistence = 3; - else if (strcmp(value, "mem-ctrl") == 0) - nvdimm_state->persistence = 2; - else { - error_setg(errp, "-machine nvdimm-persistence=%s: unsupported option", - value); - return; - } - - g_free(nvdimm_state->persistence_string); - nvdimm_state->persistence_string = g_strdup(value); -} - static bool pc_machine_get_smbus(Object *obj, Error **errp) { PCMachineState *pcms = PC_MACHINE(obj); @@ -2636,8 +2597,6 @@ static void pc_machine_initfn(Object *obj) pcms->max_ram_below_4g = 0; /* use default */ pcms->smm = ON_OFF_AUTO_AUTO; pcms->vmport = ON_OFF_AUTO_AUTO; - /* nvdimm is disabled on default. */ - pcms->acpi_nvdimm_state.is_enabled = false; /* acpi build is enabled by default if machine supports it */ pcms->acpi_build_enabled = PC_MACHINE_GET_CLASS(pcms)->has_acpi_build; pcms->smbus_enabled = true; @@ -2774,6 +2733,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) hc->unplug = pc_machine_device_unplug_cb; nc->nmi_monitor_handler = x86_nmi; mc->default_cpu_type = TARGET_DEFAULT_CPU_TYPE; + mc->nvdimm_supported = true; object_class_property_add(oc, PC_MACHINE_DEVMEM_REGION_SIZE, "int", pc_machine_get_device_memory_region_size, NULL, @@ -2798,13 +2758,6 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) object_class_property_set_description(oc, PC_MACHINE_VMPORT, "Enable vmport (pc & q35)", &error_abort); - object_class_property_add_bool(oc, PC_MACHINE_NVDIMM, - pc_machine_get_nvdimm, pc_machine_set_nvdimm, &error_abort); - - object_class_property_add_str(oc, PC_MACHINE_NVDIMM_PERSIST, - pc_machine_get_nvdimm_persistence, - pc_machine_set_nvdimm_persistence, &error_abort); - object_class_property_add_bool(oc, PC_MACHINE_SMBUS, pc_machine_get_smbus, pc_machine_set_smbus, &error_abort); diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index 8770ecada9..8ad8e885c6 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -297,8 +297,8 @@ static void pc_init1(MachineState *machine, PC_MACHINE_ACPI_DEVICE_PROP, &error_abort); } - if (pcms->acpi_nvdimm_state.is_enabled) { - nvdimm_init_acpi_state(&pcms->acpi_nvdimm_state, system_io, + if (machine->nvdimms_state->is_enabled) { + nvdimm_init_acpi_state(machine->nvdimms_state, system_io, pcms->fw_cfg, OBJECT(pcms)); } } diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index cfb9043e12..372c6b73be 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -329,8 +329,8 @@ static void pc_q35_init(MachineState *machine) pc_vga_init(isa_bus, host_bus); pc_nic_init(pcmc, isa_bus, host_bus); - if (pcms->acpi_nvdimm_state.is_enabled) { - nvdimm_init_acpi_state(&pcms->acpi_nvdimm_state, system_io, + if (machine->nvdimms_state->is_enabled) { + nvdimm_init_acpi_state(machine->nvdimms_state, system_io, pcms->fw_cfg, OBJECT(pcms)); } } diff --git a/include/hw/boards.h b/include/hw/boards.h index 9690c71a6d..e231860666 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h @@ -210,6 +210,7 @@ struct MachineClass { int nb_nodes, ram_addr_t size); bool ignore_boot_device_suffixes; bool smbus_no_migration_support; + bool nvdimm_supported; HotplugHandler *(*get_hotplug_handler)(MachineState *machine, DeviceState *dev); @@ -272,6 +273,7 @@ struct MachineState { const char *cpu_type; AccelState *accelerator; CPUArchIdList *possible_cpus; + struct NVDIMMState *nvdimms_state; }; #define DEFINE_MACHINE(namestr, machine_initfn) \ diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index 94fb620d65..263a6343ff 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -45,8 +45,6 @@ struct PCMachineState { OnOffAuto vmport; OnOffAuto smm; - NVDIMMState acpi_nvdimm_state; - bool acpi_build_enabled; bool smbus_enabled; bool sata_enabled; @@ -74,8 +72,6 @@ struct PCMachineState { #define PC_MACHINE_MAX_RAM_BELOW_4G "max-ram-below-4g" #define PC_MACHINE_VMPORT "vmport" #define PC_MACHINE_SMM "smm" -#define PC_MACHINE_NVDIMM "nvdimm" -#define PC_MACHINE_NVDIMM_PERSIST "nvdimm-persistence" #define PC_MACHINE_SMBUS "smbus" #define PC_MACHINE_SATA "sata" #define PC_MACHINE_PIT "pit" From 4a66c7a9996bb1bb1191170b172daa54e226d7ff Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Mon, 11 Mar 2019 16:58:47 +0300 Subject: [PATCH 4/7] hostmem-memfd: disable for systems without sealing support If seals are not supported, memfd_create() will fail. Furthermore, there is no way to disable it in this case because '.seal' property is not registered. This issue leads to vhost-user-test failures on RHEL 7.2: qemu-system-x86_64: -object memory-backend-memfd,id=mem,size=2M,: \ failed to create memfd: Invalid argument and actually breaks the feature on such systems. Let's restrict memfd backend to systems with sealing support. Signed-off-by: Ilya Maximets Message-Id: <20190311135850.6537-2-i.maximets@samsung.com> Signed-off-by: Eduardo Habkost --- backends/hostmem-memfd.c | 18 ++++++++---------- tests/vhost-user-test.c | 5 +++-- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/backends/hostmem-memfd.c b/backends/hostmem-memfd.c index 98c9bf3240..46b15b916a 100644 --- a/backends/hostmem-memfd.c +++ b/backends/hostmem-memfd.c @@ -154,15 +154,13 @@ memfd_backend_class_init(ObjectClass *oc, void *data) "Huge pages size (ex: 2M, 1G)", &error_abort); } - if (qemu_memfd_check(MFD_ALLOW_SEALING)) { - object_class_property_add_bool(oc, "seal", - memfd_backend_get_seal, - memfd_backend_set_seal, - &error_abort); - object_class_property_set_description(oc, "seal", - "Seal growing & shrinking", - &error_abort); - } + object_class_property_add_bool(oc, "seal", + memfd_backend_get_seal, + memfd_backend_set_seal, + &error_abort); + object_class_property_set_description(oc, "seal", + "Seal growing & shrinking", + &error_abort); } static const TypeInfo memfd_backend_info = { @@ -175,7 +173,7 @@ static const TypeInfo memfd_backend_info = { static void register_types(void) { - if (qemu_memfd_check(0)) { + if (qemu_memfd_check(MFD_ALLOW_SEALING)) { type_register_static(&memfd_backend_info); } } diff --git a/tests/vhost-user-test.c b/tests/vhost-user-test.c index 0c965b3b1e..3817966010 100644 --- a/tests/vhost-user-test.c +++ b/tests/vhost-user-test.c @@ -178,7 +178,8 @@ static void append_mem_opts(TestServer *server, GString *cmd_line, int size, enum test_memfd memfd) { if (memfd == TEST_MEMFD_AUTO) { - memfd = qemu_memfd_check(0) ? TEST_MEMFD_YES : TEST_MEMFD_NO; + memfd = qemu_memfd_check(MFD_ALLOW_SEALING) ? TEST_MEMFD_YES + : TEST_MEMFD_NO; } if (memfd == TEST_MEMFD_YES) { @@ -930,7 +931,7 @@ static void register_vhost_user_test(void) "virtio-net", test_read_guest_mem, &opts); - if (qemu_memfd_check(0)) { + if (qemu_memfd_check(MFD_ALLOW_SEALING)) { opts.before = vhost_user_test_setup_memfd; qos_add_test("vhost-user/read-guest-mem/memfd", "virtio-net", From 92db922f66cde45bb216c36eb43b762429c96a74 Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Mon, 11 Mar 2019 16:58:48 +0300 Subject: [PATCH 5/7] memfd: always check for MFD_CLOEXEC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit QEMU always sets this flag unconditionally. We need to check if it's supported. Signed-off-by: Ilya Maximets Reviewed-by: Marc-André Lureau Message-Id: <20190311135850.6537-3-i.maximets@samsung.com> Signed-off-by: Eduardo Habkost --- util/memfd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/util/memfd.c b/util/memfd.c index 8debd0d037..d74ce4d793 100644 --- a/util/memfd.c +++ b/util/memfd.c @@ -188,7 +188,7 @@ bool qemu_memfd_alloc_check(void) bool qemu_memfd_check(unsigned int flags) { #ifdef CONFIG_LINUX - int mfd = memfd_create("test", flags); + int mfd = memfd_create("test", flags | MFD_CLOEXEC); if (mfd >= 0) { close(mfd); From df20819328d6fa3cb9d4a259a58cebbee35cdd09 Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Mon, 11 Mar 2019 16:58:49 +0300 Subject: [PATCH 6/7] memfd: set up correct errno if not supported MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit qemu_memfd_create() prints the value of 'errno' which is not set in this case. Signed-off-by: Ilya Maximets Reviewed-by: Marc-André Lureau Message-Id: <20190311135850.6537-4-i.maximets@samsung.com> Signed-off-by: Eduardo Habkost --- util/memfd.c | 1 + 1 file changed, 1 insertion(+) diff --git a/util/memfd.c b/util/memfd.c index d74ce4d793..393d23da96 100644 --- a/util/memfd.c +++ b/util/memfd.c @@ -40,6 +40,7 @@ static int memfd_create(const char *name, unsigned int flags) #ifdef __NR_memfd_create return syscall(__NR_memfd_create, name, flags); #else + errno = ENOSYS; return -1; #endif } From edaed6c711f07267785a05a633d97dc9268a7385 Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Mon, 11 Mar 2019 16:58:50 +0300 Subject: [PATCH 7/7] memfd: improve error messages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This gives more information about the failure. Additionally 'ENOSYS' returned for a non-Linux platforms instead of 'errno', which is not initilaized in this case. Signed-off-by: Ilya Maximets Reviewed-by: Marc-André Lureau Message-Id: <20190311135850.6537-5-i.maximets@samsung.com> Signed-off-by: Eduardo Habkost --- util/memfd.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/util/memfd.c b/util/memfd.c index 393d23da96..00334e5b21 100644 --- a/util/memfd.c +++ b/util/memfd.c @@ -71,14 +71,18 @@ int qemu_memfd_create(const char *name, size_t size, bool hugetlb, } mfd = memfd_create(name, flags); if (mfd < 0) { + error_setg_errno(errp, errno, + "failed to create memfd with flags 0x%x", flags); goto err; } if (ftruncate(mfd, size) == -1) { + error_setg_errno(errp, errno, "failed to resize memfd to %zu", size); goto err; } if (seals && fcntl(mfd, F_ADD_SEALS, seals) == -1) { + error_setg_errno(errp, errno, "failed to add seals 0x%x", seals); goto err; } @@ -88,8 +92,9 @@ err: if (mfd >= 0) { close(mfd); } +#else + error_setg_errno(errp, ENOSYS, "failed to create memfd"); #endif - error_setg_errno(errp, errno, "failed to create memfd"); return -1; }