qemu/include/hw/mem/nvdimm.h

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

162 lines
4.9 KiB
C
Raw Normal View History

/*
* Non-Volatile Dual In-line Memory Module Virtualization Implementation
*
* Copyright(C) 2015 Intel Corporation.
*
* Author:
* Xiao Guangrong <guangrong.xiao@linux.intel.com>
*
* NVDIMM specifications and some documents can be found at:
* NVDIMM ACPI device and NFIT are introduced in ACPI 6:
* http://www.uefi.org/sites/default/files/resources/ACPI_6.0.pdf
* NVDIMM Namespace specification:
* http://pmem.io/documents/NVDIMM_Namespace_Spec.pdf
* DSM Interface Example:
* http://pmem.io/documents/NVDIMM_DSM_Interface_Example.pdf
* Driver Writer's Guide:
* http://pmem.io/documents/NVDIMM_Driver_Writers_Guide.pdf
*
* This work is licensed under the terms of the GNU GPL, version 2 or later.
* See the COPYING file in the top-level directory.
*/
#ifndef QEMU_NVDIMM_H
#define QEMU_NVDIMM_H
#include "hw/mem/pc-dimm.h"
#include "hw/acpi/bios-linker-loader.h"
#include "qemu/uuid.h"
#include "hw/acpi/aml-build.h"
#include "qom/object.h"
/*
* The minimum label data size is required by NVDIMM Namespace
* specification, see the chapter 2 Namespaces:
* "NVDIMMs following the NVDIMM Block Mode Specification use an area
* at least 128KB in size, which holds around 1000 labels."
*/
#define MIN_NAMESPACE_LABEL_SIZE (128UL << 10)
#define TYPE_NVDIMM "nvdimm"
OBJECT_DECLARE_TYPE(NVDIMMDevice, NVDIMMClass, NVDIMM)
#define NVDIMM_LABEL_SIZE_PROP "label-size"
#define NVDIMM_UUID_PROP "uuid"
#define NVDIMM_UNARMED_PROP "unarmed"
struct NVDIMMDevice {
/* private */
PCDIMMDevice parent_obj;
/* public */
/*
* the size of label data in NVDIMM device which is presented to
* guest via __DSM "Get Namespace Label Size" function.
*/
uint64_t label_size;
/*
* the address of label data which is read by __DSM "Get Namespace
* Label Data" function and written by __DSM "Set Namespace Label
* Data" function.
*/
void *label_data;
/*
* it's the PMEM region in NVDIMM device, which is presented to
* guest via ACPI NFIT and _FIT method if NVDIMM hotplug is supported.
*/
MemoryRegion *nvdimm_mr;
/*
* The 'on' value results in the unarmed flag set in ACPI NFIT,
* which can be used to notify guest implicitly that the host
* backend (e.g., files on HDD, /dev/pmemX, etc.) cannot guarantee
* the guest write persistence.
*/
bool unarmed;
nvdimm: Reject writing label data to ROM instead of crashing QEMU Currently, when using a true R/O NVDIMM (ROM memory backend) with a label area, the VM can easily crash QEMU by trying to write to the label area, because the ROM memory is mmap'ed without PROT_WRITE. [root@vm-0 ~]# ndctl disable-region region0 disabled 1 region [root@vm-0 ~]# ndctl zero-labels nmem0 -> QEMU segfaults Let's remember whether we have a ROM memory backend and properly reject the write request: [root@vm-0 ~]# ndctl disable-region region0 disabled 1 region [root@vm-0 ~]# ndctl zero-labels nmem0 zeroed 0 nmem In comparison, on a system with a R/W NVDIMM: [root@vm-0 ~]# ndctl disable-region region0 disabled 1 region [root@vm-0 ~]# ndctl zero-labels nmem0 zeroed 1 nmem For ACPI, just return "unsupported", like if no label exists. For spapr, return "H_P2", similar to when no label area exists. Could we rely on the "unarmed" property? Maybe, but it looks cleaner to only disallow what certainly cannot work. After all "unarmed=on" primarily means: cannot accept persistent writes. In theory, there might be setups where devices with "unarmed=on" set could be used to host non-persistent data (temporary files, system RAM, ...); for example, in Linux, admins can overwrite the "readonly" setting and still write to the device -- which will work as long as we're not using ROM. Allowing writing label data in such configurations can make sense. Message-ID: <20230906120503.359863-2-david@redhat.com> Fixes: dbd730e85987 ("nvdimm: check -object memory-backend-file, readonly=on option") Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> Signed-off-by: David Hildenbrand <david@redhat.com>
2023-09-06 15:04:53 +03:00
/*
* Whether our DIMM is backed by ROM, and even label data cannot be
* written. If set, implies that "unarmed" is also set.
*/
bool readonly;
/*
* The PPC64 - spapr requires each nvdimm device have a uuid.
*/
QemuUUID uuid;
};
struct NVDIMMClass {
/* private */
PCDIMMDeviceClass parent_class;
/* public */
/* read @size bytes from NVDIMM label data at @offset into @buf. */
void (*read_label_data)(NVDIMMDevice *nvdimm, void *buf,
uint64_t size, uint64_t offset);
/* write @size bytes from @buf to NVDIMM label data at @offset. */
void (*write_label_data)(NVDIMMDevice *nvdimm, const void *buf,
uint64_t size, uint64_t offset);
void (*realize)(NVDIMMDevice *nvdimm, Error **errp);
void (*unrealize)(NVDIMMDevice *nvdimm);
};
#define NVDIMM_DSM_MEM_FILE "etc/acpi/nvdimm-mem"
/*
* 32 bits IO port starting from 0x0a18 in guest is reserved for
* NVDIMM ACPI emulation.
*/
#define NVDIMM_ACPI_IO_BASE 0x0a18
#define NVDIMM_ACPI_IO_LEN 4
/*
* NvdimmFitBuffer:
* @fit: FIT structures for present NVDIMMs. It is updated when
* the NVDIMM device is plugged or unplugged.
* @dirty: It allows OSPM to detect change and restart read in
* progress if there is any.
*/
struct NvdimmFitBuffer {
GArray *fit;
bool dirty;
};
typedef struct NvdimmFitBuffer NvdimmFitBuffer;
struct NVDIMMState {
/* detect if NVDIMM support is enabled. */
bool is_enabled;
/* the data of the fw_cfg file NVDIMM_DSM_MEM_FILE. */
GArray *dsm_mem;
NvdimmFitBuffer fit_buf;
/* the IO region used by OSPM to transfer control to QEMU. */
MemoryRegion io_mr;
/*
* Platform capabilities, section 5.2.25.9 of ACPI 6.2 Errata A
*/
int32_t persistence;
char *persistence_string;
struct AcpiGenericAddress dsm_io;
};
typedef struct NVDIMMState NVDIMMState;
void nvdimm_init_acpi_state(NVDIMMState *state, MemoryRegion *io,
struct AcpiGenericAddress dsm_io,
FWCfgState *fw_cfg, Object *owner);
hw/acpi/nvdimm: add a helper to augment SRAT generation NVDIMMs can belong to their own proximity domains, as described by the NFIT. In such cases, the SRAT needs to have Memory Affinity structures in the SRAT for these NVDIMMs, otherwise Linux doesn't populate node data structures properly during NUMA initialization. See the following for an example failure case. https://lore.kernel.org/linux-nvdimm/20200416225438.15208-1-vishal.l.verma@intel.com/ Introduce a new helper, nvdimm_build_srat(), and call it for both the i386 and arm versions of 'build_srat()' to augment the SRAT with memory affinity information for NVDIMMs. The relevant command line options to exercise this are below. Nodes 0-1 contain CPUs and regular memory, and nodes 2-3 are the NVDIMM address space. -object memory-backend-ram,id=mem0,size=2048M -numa node,nodeid=0,memdev=mem0, -numa cpu,node-id=0,socket-id=0 -object memory-backend-ram,id=mem1,size=2048M -numa node,nodeid=1,memdev=mem1, -numa cpu,node-id=1,socket-id=1 -numa node,nodeid=2, -object memory-backend-file,id=nvmem0,share,mem-path=nvdimm-0,size=16384M,align=1G -device nvdimm,memdev=nvmem0,id=nv0,label-size=2M,node=2 -numa node,nodeid=3, -object memory-backend-file,id=nvmem1,share,mem-path=nvdimm-1,size=16384M,align=1G -device nvdimm,memdev=nvmem1,id=nv1,label-size=2M,node=3 Cc: Jingqi Liu <jingqi.liu@intel.com> Cc: Michael S. Tsirkin <mst@redhat.com> Reviewed-by: Jingqi Liu <jingqi.liu@intel.com> Reviewed-by: Igor Mammedov <imammedo@redhat.com> Signed-off-by: Vishal Verma <vishal.l.verma@intel.com> Message-Id: <20200606000911.9896-3-vishal.l.verma@intel.com> Reviewed-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2020-06-06 03:09:10 +03:00
void nvdimm_build_srat(GArray *table_data);
void nvdimm_build_acpi(GArray *table_offsets, GArray *table_data,
BIOSLinker *linker, NVDIMMState *state,
uint32_t ram_slots, const char *oem_id,
const char *oem_table_id);
void nvdimm_plug(NVDIMMState *state);
void nvdimm_acpi_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev);
#endif