qemu/hw/mem/pc-dimm.c

301 lines
9.3 KiB
C
Raw Normal View History

/*
* Dimm device for Memory Hotplug
*
* Copyright ProfitBricks GmbH 2012
* Copyright (C) 2014 Red Hat Inc
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>
*/
#include "qemu/osdep.h"
#include "hw/boards.h"
#include "hw/mem/pc-dimm.h"
#include "hw/qdev-properties.h"
#include "migration/vmstate.h"
#include "hw/mem/nvdimm.h"
#include "hw/mem/memory-device.h"
2016-03-14 11:01:28 +03:00
#include "qapi/error.h"
#include "qapi/visitor.h"
#include "qemu/module.h"
#include "sysemu/hostmem.h"
#include "sysemu/numa.h"
#include "trace.h"
static int pc_dimm_get_free_slot(const int *hint, int max_slots, Error **errp);
static MemoryRegion *pc_dimm_get_memory_region(PCDIMMDevice *dimm, Error **errp)
{
if (!dimm->hostmem) {
error_setg(errp, "'" PC_DIMM_MEMDEV_PROP "' property must be set");
return NULL;
}
return host_memory_backend_get_memory(dimm->hostmem);
}
void pc_dimm_pre_plug(PCDIMMDevice *dimm, MachineState *machine,
const uint64_t *legacy_align, Error **errp)
{
Error *local_err = NULL;
int slot;
slot = object_property_get_int(OBJECT(dimm), PC_DIMM_SLOT_PROP,
&error_abort);
if ((slot < 0 || slot >= machine->ram_slots) &&
slot != PC_DIMM_UNASSIGNED_SLOT) {
error_setg(errp,
"invalid slot number %d, valid range is [0-%" PRIu64 "]",
slot, machine->ram_slots - 1);
return;
}
slot = pc_dimm_get_free_slot(slot == PC_DIMM_UNASSIGNED_SLOT ? NULL : &slot,
machine->ram_slots, &local_err);
if (local_err) {
error_propagate(errp, local_err);
return;
}
qom: Put name parameter before value / visitor parameter The object_property_set_FOO() setters take property name and value in an unusual order: void object_property_set_FOO(Object *obj, FOO_TYPE value, const char *name, Error **errp) Having to pass value before name feels grating. Swap them. Same for object_property_set(), object_property_get(), and object_property_parse(). Convert callers with this Coccinelle script: @@ identifier fun = { object_property_get, object_property_parse, object_property_set_str, object_property_set_link, object_property_set_bool, object_property_set_int, object_property_set_uint, object_property_set, object_property_set_qobject }; expression obj, v, name, errp; @@ - fun(obj, v, name, errp) + fun(obj, name, v, errp) Chokes on hw/arm/musicpal.c's lcd_refresh() with the unhelpful error message "no position information". Convert that one manually. Fails to convert hw/arm/armsse.c, because Coccinelle gets confused by ARMSSE being used both as typedef and function-like macro there. Convert manually. Fails to convert hw/rx/rx-gdbsim.c, because Coccinelle gets confused by RXCPU being used both as typedef and function-like macro there. Convert manually. The other files using RXCPU that way don't need conversion. Signed-off-by: Markus Armbruster <armbru@redhat.com> Reviewed-by: Eric Blake <eblake@redhat.com> Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> Message-Id: <20200707160613.848843-27-armbru@redhat.com> [Straightforwad conflict with commit 2336172d9b "audio: set default value for pcspk.iobase property" resolved]
2020-07-07 19:05:54 +03:00
object_property_set_int(OBJECT(dimm), PC_DIMM_SLOT_PROP, slot,
&error_abort);
trace_mhp_pc_dimm_assigned_slot(slot);
memory_device_pre_plug(MEMORY_DEVICE(dimm), machine, legacy_align,
errp);
}
void pc_dimm_plug(PCDIMMDevice *dimm, MachineState *machine)
{
MemoryRegion *vmstate_mr = pc_dimm_get_memory_region(dimm,
&error_abort);
memory_device_plug(MEMORY_DEVICE(dimm), machine);
vmstate_register_ram(vmstate_mr, DEVICE(dimm));
}
void pc_dimm_unplug(PCDIMMDevice *dimm, MachineState *machine)
{
MemoryRegion *vmstate_mr = pc_dimm_get_memory_region(dimm,
&error_abort);
memory_device_unplug(MEMORY_DEVICE(dimm), machine);
vmstate_unregister_ram(vmstate_mr, DEVICE(dimm));
}
static int pc_dimm_slot2bitmap(Object *obj, void *opaque)
{
unsigned long *bitmap = opaque;
if (object_dynamic_cast(obj, TYPE_PC_DIMM)) {
DeviceState *dev = DEVICE(obj);
if (dev->realized) { /* count only realized DIMMs */
PCDIMMDevice *d = PC_DIMM(obj);
set_bit(d->slot, bitmap);
}
}
object_child_foreach(obj, pc_dimm_slot2bitmap, opaque);
return 0;
}
static int pc_dimm_get_free_slot(const int *hint, int max_slots, Error **errp)
{
unsigned long *bitmap;
int slot = 0;
if (max_slots <= 0) {
error_setg(errp, "no slots where allocated, please specify "
"the 'slots' option");
return slot;
}
bitmap = bitmap_new(max_slots);
object_child_foreach(qdev_get_machine(), pc_dimm_slot2bitmap, bitmap);
/* check if requested slot is not occupied */
if (hint) {
if (*hint >= max_slots) {
error_setg(errp, "invalid slot# %d, should be less than %d",
*hint, max_slots);
} else if (!test_bit(*hint, bitmap)) {
slot = *hint;
} else {
error_setg(errp, "slot %d is busy", *hint);
}
goto out;
}
/* search for free slot */
slot = find_first_zero_bit(bitmap, max_slots);
if (slot == max_slots) {
error_setg(errp, "no free slots available");
}
out:
g_free(bitmap);
return slot;
}
static Property pc_dimm_properties[] = {
DEFINE_PROP_UINT64(PC_DIMM_ADDR_PROP, PCDIMMDevice, addr, 0),
DEFINE_PROP_UINT32(PC_DIMM_NODE_PROP, PCDIMMDevice, node, 0),
DEFINE_PROP_INT32(PC_DIMM_SLOT_PROP, PCDIMMDevice, slot,
PC_DIMM_UNASSIGNED_SLOT),
DEFINE_PROP_LINK(PC_DIMM_MEMDEV_PROP, PCDIMMDevice, hostmem,
TYPE_MEMORY_BACKEND, HostMemoryBackend *),
DEFINE_PROP_END_OF_LIST(),
};
static void pc_dimm_get_size(Object *obj, Visitor *v, const char *name,
void *opaque, Error **errp)
{
Error *local_err = NULL;
uint64_t value;
value = memory_device_get_region_size(MEMORY_DEVICE(obj), &local_err);
if (local_err) {
error_propagate(errp, local_err);
return;
}
visit_type_uint64(v, name, &value, errp);
}
static void pc_dimm_init(Object *obj)
{
object_property_add(obj, PC_DIMM_SIZE_PROP, "uint64", pc_dimm_get_size,
qom: Drop parameter @errp of object_property_add() & friends The only way object_property_add() can fail is when a property with the same name already exists. Since our property names are all hardcoded, failure is a programming error, and the appropriate way to handle it is passing &error_abort. Same for its variants, except for object_property_add_child(), which additionally fails when the child already has a parent. Parentage is also under program control, so this is a programming error, too. We have a bit over 500 callers. Almost half of them pass &error_abort, slightly fewer ignore errors, one test case handles errors, and the remaining few callers pass them to their own callers. The previous few commits demonstrated once again that ignoring programming errors is a bad idea. Of the few ones that pass on errors, several violate the Error API. The Error ** argument must be NULL, &error_abort, &error_fatal, or a pointer to a variable containing NULL. Passing an argument of the latter kind twice without clearing it in between is wrong: if the first call sets an error, it no longer points to NULL for the second call. ich9_pm_add_properties(), sparc32_ledma_realize(), sparc32_dma_realize(), xilinx_axidma_realize(), xilinx_enet_realize() are wrong that way. When the one appropriate choice of argument is &error_abort, letting users pick the argument is a bad idea. Drop parameter @errp and assert the preconditions instead. There's one exception to "duplicate property name is a programming error": the way object_property_add() implements the magic (and undocumented) "automatic arrayification". Don't drop @errp there. Instead, rename object_property_add() to object_property_try_add(), and add the obvious wrapper object_property_add(). Signed-off-by: Markus Armbruster <armbru@redhat.com> Reviewed-by: Eric Blake <eblake@redhat.com> Reviewed-by: Paolo Bonzini <pbonzini@redhat.com> Message-Id: <20200505152926.18877-15-armbru@redhat.com> [Two semantic rebase conflicts resolved]
2020-05-05 18:29:22 +03:00
NULL, NULL, NULL);
}
static void pc_dimm_realize(DeviceState *dev, Error **errp)
{
PCDIMMDevice *dimm = PC_DIMM(dev);
PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm);
MachineState *ms = MACHINE(qdev_get_machine());
int nb_numa_nodes = ms->numa_state->num_nodes;
if (!dimm->hostmem) {
error_setg(errp, "'" PC_DIMM_MEMDEV_PROP "' property is not set");
return;
} else if (host_memory_backend_is_mapped(dimm->hostmem)) {
error_setg(errp, "can't use already busy memdev: %s",
object_get_canonical_path_component(OBJECT(dimm->hostmem)));
return;
}
if (((nb_numa_nodes > 0) && (dimm->node >= nb_numa_nodes)) ||
(!nb_numa_nodes && dimm->node)) {
error_setg(errp, "'DIMM property " PC_DIMM_NODE_PROP " has value %"
PRIu32 "' which exceeds the number of numa nodes: %d",
dimm->node, nb_numa_nodes ? nb_numa_nodes : 1);
return;
}
if (ddc->realize) {
ddc->realize(dimm, errp);
}
host_memory_backend_set_mapped(dimm->hostmem, true);
}
qdev: Unrealize must not fail Devices may have component devices and buses. Device realization may fail. Realization is recursive: a device's realize() method realizes its components, and device_set_realized() realizes its buses (which should in turn realize the devices on that bus, except bus_set_realized() doesn't implement that, yet). When realization of a component or bus fails, we need to roll back: unrealize everything we realized so far. If any of these unrealizes failed, the device would be left in an inconsistent state. Must not happen. device_set_realized() lets it happen: it ignores errors in the roll back code starting at label child_realize_fail. Since realization is recursive, unrealization must be recursive, too. But how could a partly failed unrealize be rolled back? We'd have to re-realize, which can fail. This design is fundamentally broken. device_set_realized() does not roll back at all. Instead, it keeps unrealizing, ignoring further errors. It can screw up even for a device with no buses: if the lone dc->unrealize() fails, it still unregisters vmstate, and calls listeners' unrealize() callback. bus_set_realized() does not roll back either. Instead, it stops unrealizing. Fortunately, no unrealize method can fail, as we'll see below. To fix the design error, drop parameter @errp from all the unrealize methods. Any unrealize method that uses @errp now needs an update. This leads us to unrealize() methods that can fail. Merely passing it to another unrealize method cannot cause failure, though. Here are the ones that do other things with @errp: * virtio_serial_device_unrealize() Fails when qbus_set_hotplug_handler() fails, but still does all the other work. On failure, the device would stay realized with its resources completely gone. Oops. Can't happen, because qbus_set_hotplug_handler() can't actually fail here. Pass &error_abort to qbus_set_hotplug_handler() instead. * hw/ppc/spapr_drc.c's unrealize() Fails when object_property_del() fails, but all the other work is already done. On failure, the device would stay realized with its vmstate registration gone. Oops. Can't happen, because object_property_del() can't actually fail here. Pass &error_abort to object_property_del() instead. * spapr_phb_unrealize() Fails and bails out when remove_drcs() fails, but other work is already done. On failure, the device would stay realized with some of its resources gone. Oops. remove_drcs() fails only when chassis_from_bus()'s object_property_get_uint() fails, and it can't here. Pass &error_abort to remove_drcs() instead. Therefore, no unrealize method can fail before this patch. device_set_realized()'s recursive unrealization via bus uses object_property_set_bool(). Can't drop @errp there, so pass &error_abort. We similarly unrealize with object_property_set_bool() elsewhere, always ignoring errors. Pass &error_abort instead. Several unrealize methods no longer handle errors from other unrealize methods: virtio_9p_device_unrealize(), virtio_input_device_unrealize(), scsi_qdev_unrealize(), ... Much of the deleted error handling looks wrong anyway. One unrealize methods no longer ignore such errors: usb_ehci_pci_exit(). Several realize methods no longer ignore errors when rolling back: v9fs_device_realize_common(), pci_qdev_unrealize(), spapr_phb_realize(), usb_qdev_realize(), vfio_ccw_realize(), virtio_device_realize(). Signed-off-by: Markus Armbruster <armbru@redhat.com> Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com> Reviewed-by: Paolo Bonzini <pbonzini@redhat.com> Message-Id: <20200505152926.18877-17-armbru@redhat.com>
2020-05-05 18:29:24 +03:00
static void pc_dimm_unrealize(DeviceState *dev)
{
PCDIMMDevice *dimm = PC_DIMM(dev);
host_memory_backend_set_mapped(dimm->hostmem, false);
}
static uint64_t pc_dimm_md_get_addr(const MemoryDeviceState *md)
{
return object_property_get_uint(OBJECT(md), PC_DIMM_ADDR_PROP,
&error_abort);
}
static void pc_dimm_md_set_addr(MemoryDeviceState *md, uint64_t addr,
Error **errp)
{
qom: Put name parameter before value / visitor parameter The object_property_set_FOO() setters take property name and value in an unusual order: void object_property_set_FOO(Object *obj, FOO_TYPE value, const char *name, Error **errp) Having to pass value before name feels grating. Swap them. Same for object_property_set(), object_property_get(), and object_property_parse(). Convert callers with this Coccinelle script: @@ identifier fun = { object_property_get, object_property_parse, object_property_set_str, object_property_set_link, object_property_set_bool, object_property_set_int, object_property_set_uint, object_property_set, object_property_set_qobject }; expression obj, v, name, errp; @@ - fun(obj, v, name, errp) + fun(obj, name, v, errp) Chokes on hw/arm/musicpal.c's lcd_refresh() with the unhelpful error message "no position information". Convert that one manually. Fails to convert hw/arm/armsse.c, because Coccinelle gets confused by ARMSSE being used both as typedef and function-like macro there. Convert manually. Fails to convert hw/rx/rx-gdbsim.c, because Coccinelle gets confused by RXCPU being used both as typedef and function-like macro there. Convert manually. The other files using RXCPU that way don't need conversion. Signed-off-by: Markus Armbruster <armbru@redhat.com> Reviewed-by: Eric Blake <eblake@redhat.com> Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> Message-Id: <20200707160613.848843-27-armbru@redhat.com> [Straightforwad conflict with commit 2336172d9b "audio: set default value for pcspk.iobase property" resolved]
2020-07-07 19:05:54 +03:00
object_property_set_uint(OBJECT(md), PC_DIMM_ADDR_PROP, addr, errp);
}
static MemoryRegion *pc_dimm_md_get_memory_region(MemoryDeviceState *md,
Error **errp)
{
return pc_dimm_get_memory_region(PC_DIMM(md), errp);
}
static void pc_dimm_md_fill_device_info(const MemoryDeviceState *md,
MemoryDeviceInfo *info)
{
PCDIMMDeviceInfo *di = g_new0(PCDIMMDeviceInfo, 1);
const DeviceClass *dc = DEVICE_GET_CLASS(md);
const PCDIMMDevice *dimm = PC_DIMM(md);
const DeviceState *dev = DEVICE(md);
if (dev->id) {
di->has_id = true;
di->id = g_strdup(dev->id);
}
di->hotplugged = dev->hotplugged;
di->hotpluggable = dc->hotpluggable;
di->addr = dimm->addr;
di->slot = dimm->slot;
di->node = dimm->node;
di->size = object_property_get_uint(OBJECT(dimm), PC_DIMM_SIZE_PROP,
NULL);
di->memdev = object_get_canonical_path(OBJECT(dimm->hostmem));
if (object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM)) {
info->u.nvdimm.data = di;
info->type = MEMORY_DEVICE_INFO_KIND_NVDIMM;
} else {
info->u.dimm.data = di;
info->type = MEMORY_DEVICE_INFO_KIND_DIMM;
}
}
static void pc_dimm_class_init(ObjectClass *oc, void *data)
{
DeviceClass *dc = DEVICE_CLASS(oc);
MemoryDeviceClass *mdc = MEMORY_DEVICE_CLASS(oc);
dc->realize = pc_dimm_realize;
dc->unrealize = pc_dimm_unrealize;
device_class_set_props(dc, pc_dimm_properties);
dc->desc = "DIMM memory module";
mdc->get_addr = pc_dimm_md_get_addr;
mdc->set_addr = pc_dimm_md_set_addr;
/* for a dimm plugged_size == region_size */
mdc->get_plugged_size = memory_device_get_region_size;
mdc->get_memory_region = pc_dimm_md_get_memory_region;
mdc->fill_device_info = pc_dimm_md_fill_device_info;
}
static TypeInfo pc_dimm_info = {
.name = TYPE_PC_DIMM,
.parent = TYPE_DEVICE,
.instance_size = sizeof(PCDIMMDevice),
.instance_init = pc_dimm_init,
.class_init = pc_dimm_class_init,
.class_size = sizeof(PCDIMMDeviceClass),
.interfaces = (InterfaceInfo[]) {
{ TYPE_MEMORY_DEVICE },
{ }
},
};
static void pc_dimm_register_types(void)
{
type_register_static(&pc_dimm_info);
}
type_init(pc_dimm_register_types)