qemu/include/hw/ppc/spapr_drc.h

269 lines
9.2 KiB
C
Raw Normal View History

/*
* QEMU SPAPR Dynamic Reconfiguration Connector Implementation
*
* Copyright IBM Corp. 2014
*
* Authors:
* Michael Roth <mdroth@linux.vnet.ibm.com>
*
* This work is licensed under the terms of the GNU GPL, version 2 or later.
* See the COPYING file in the top-level directory.
*/
#ifndef HW_SPAPR_DRC_H
#define HW_SPAPR_DRC_H
#include <libfdt.h>
#include "qapi/qapi-types-run-state.h"
#include "qom/object.h"
spapr: Treat devices added before inbound migration as coldplugged When migrating a guest which has already had devices hotplugged, libvirt typically starts the destination qemu with -incoming defer, adds those hotplugged devices with qmp, then initiates the incoming migration. This causes problems for the management of spapr DRC state. Because the device is treated as hotplugged, it goes into a DRC state for a device immediately after it's plugged, but before the guest has acknowledged its presence. However, chances are the guest on the source machine *has* acknowledged the device's presence and configured it. If the source has fully configured the device, then DRC state won't be sent in the migration stream: for maximum migration compatibility with earlier versions we don't migrate DRCs in coldplug-equivalent state. That means that the DRC effectively changes state over the migrate, causing problems later on. In addition, logging hotplug events for these devices isn't what we want because a) those events should already have been issued on the source host and b) the event queue should get wiped out by the incoming state anyway. In short, what we really want is to treat devices added before an incoming migration as if they were coldplugged. To do this, we first add a spapr_drc_hotplugged() helper which determines if the device is hotplugged in the sense relevant for DRC state management. We only send hotplug events when this is true. Second, when we add a device which isn't hotplugged in this sense, we force a reset of the DRC state - this ensures the DRC is in a coldplug-equivalent state (there isn't usually a system reset between these device adds and the incoming migration). This is based on an earlier patch by Laurent Vivier, cleaned up and extended. Signed-off-by: Laurent Vivier <lvivier@redhat.com> Signed-off-by: David Gibson <david@gibson.dropbear.id.au> Reviewed-by: Greg Kurz <groug@kaod.org> Tested-by: Daniel Barboza <danielhb@linux.vnet.ibm.com>
2017-06-09 14:08:10 +03:00
#include "sysemu/sysemu.h"
#include "hw/qdev.h"
#define TYPE_SPAPR_DR_CONNECTOR "spapr-dr-connector"
#define SPAPR_DR_CONNECTOR_GET_CLASS(obj) \
OBJECT_GET_CLASS(sPAPRDRConnectorClass, obj, TYPE_SPAPR_DR_CONNECTOR)
#define SPAPR_DR_CONNECTOR_CLASS(klass) \
OBJECT_CLASS_CHECK(sPAPRDRConnectorClass, klass, \
TYPE_SPAPR_DR_CONNECTOR)
#define SPAPR_DR_CONNECTOR(obj) OBJECT_CHECK(sPAPRDRConnector, (obj), \
TYPE_SPAPR_DR_CONNECTOR)
#define TYPE_SPAPR_DRC_PHYSICAL "spapr-drc-physical"
#define SPAPR_DRC_PHYSICAL_GET_CLASS(obj) \
OBJECT_GET_CLASS(sPAPRDRConnectorClass, obj, TYPE_SPAPR_DRC_PHYSICAL)
#define SPAPR_DRC_PHYSICAL_CLASS(klass) \
OBJECT_CLASS_CHECK(sPAPRDRConnectorClass, klass, \
TYPE_SPAPR_DRC_PHYSICAL)
#define SPAPR_DRC_PHYSICAL(obj) OBJECT_CHECK(sPAPRDRCPhysical, (obj), \
TYPE_SPAPR_DRC_PHYSICAL)
#define TYPE_SPAPR_DRC_LOGICAL "spapr-drc-logical"
#define SPAPR_DRC_LOGICAL_GET_CLASS(obj) \
OBJECT_GET_CLASS(sPAPRDRConnectorClass, obj, TYPE_SPAPR_DRC_LOGICAL)
#define SPAPR_DRC_LOGICAL_CLASS(klass) \
OBJECT_CLASS_CHECK(sPAPRDRConnectorClass, klass, \
TYPE_SPAPR_DRC_LOGICAL)
#define SPAPR_DRC_LOGICAL(obj) OBJECT_CHECK(sPAPRDRConnector, (obj), \
TYPE_SPAPR_DRC_LOGICAL)
#define TYPE_SPAPR_DRC_CPU "spapr-drc-cpu"
#define SPAPR_DRC_CPU_GET_CLASS(obj) \
OBJECT_GET_CLASS(sPAPRDRConnectorClass, obj, TYPE_SPAPR_DRC_CPU)
#define SPAPR_DRC_CPU_CLASS(klass) \
OBJECT_CLASS_CHECK(sPAPRDRConnectorClass, klass, TYPE_SPAPR_DRC_CPU)
#define SPAPR_DRC_CPU(obj) OBJECT_CHECK(sPAPRDRConnector, (obj), \
TYPE_SPAPR_DRC_CPU)
#define TYPE_SPAPR_DRC_PCI "spapr-drc-pci"
#define SPAPR_DRC_PCI_GET_CLASS(obj) \
OBJECT_GET_CLASS(sPAPRDRConnectorClass, obj, TYPE_SPAPR_DRC_PCI)
#define SPAPR_DRC_PCI_CLASS(klass) \
OBJECT_CLASS_CHECK(sPAPRDRConnectorClass, klass, TYPE_SPAPR_DRC_PCI)
#define SPAPR_DRC_PCI(obj) OBJECT_CHECK(sPAPRDRConnector, (obj), \
TYPE_SPAPR_DRC_PCI)
#define TYPE_SPAPR_DRC_LMB "spapr-drc-lmb"
#define SPAPR_DRC_LMB_GET_CLASS(obj) \
OBJECT_GET_CLASS(sPAPRDRConnectorClass, obj, TYPE_SPAPR_DRC_LMB)
#define SPAPR_DRC_LMB_CLASS(klass) \
OBJECT_CLASS_CHECK(sPAPRDRConnectorClass, klass, TYPE_SPAPR_DRC_LMB)
#define SPAPR_DRC_LMB(obj) OBJECT_CHECK(sPAPRDRConnector, (obj), \
TYPE_SPAPR_DRC_LMB)
/*
* Various hotplug types managed by sPAPRDRConnector
*
* these are somewhat arbitrary, but to make things easier
* when generating DRC indexes later we've aligned the bit
* positions with the values used to assign DRC indexes on
* pSeries. we use those values as bit shifts to allow for
* the OR'ing of these values in various QEMU routines, but
* for values exposed to the guest (via DRC indexes for
* instance) we will use the shift amounts.
*/
typedef enum {
SPAPR_DR_CONNECTOR_TYPE_SHIFT_CPU = 1,
SPAPR_DR_CONNECTOR_TYPE_SHIFT_PHB = 2,
SPAPR_DR_CONNECTOR_TYPE_SHIFT_VIO = 3,
SPAPR_DR_CONNECTOR_TYPE_SHIFT_PCI = 4,
SPAPR_DR_CONNECTOR_TYPE_SHIFT_LMB = 8,
} sPAPRDRConnectorTypeShift;
typedef enum {
SPAPR_DR_CONNECTOR_TYPE_ANY = ~0,
SPAPR_DR_CONNECTOR_TYPE_CPU = 1 << SPAPR_DR_CONNECTOR_TYPE_SHIFT_CPU,
SPAPR_DR_CONNECTOR_TYPE_PHB = 1 << SPAPR_DR_CONNECTOR_TYPE_SHIFT_PHB,
SPAPR_DR_CONNECTOR_TYPE_VIO = 1 << SPAPR_DR_CONNECTOR_TYPE_SHIFT_VIO,
SPAPR_DR_CONNECTOR_TYPE_PCI = 1 << SPAPR_DR_CONNECTOR_TYPE_SHIFT_PCI,
SPAPR_DR_CONNECTOR_TYPE_LMB = 1 << SPAPR_DR_CONNECTOR_TYPE_SHIFT_LMB,
} sPAPRDRConnectorType;
/*
* set via set-indicator RTAS calls
* as documented by PAPR+ 2.7 13.5.3.4, Table 177
*
* isolated: put device under firmware control
* unisolated: claim OS control of device (may or may not be in use)
*/
typedef enum {
SPAPR_DR_ISOLATION_STATE_ISOLATED = 0,
SPAPR_DR_ISOLATION_STATE_UNISOLATED = 1
} sPAPRDRIsolationState;
/*
* set via set-indicator RTAS calls
* as documented by PAPR+ 2.7 13.5.3.4, Table 177
*
* unusable: mark device as unavailable to OS
* usable: mark device as available to OS
* exchange: (currently unused)
* recover: (currently unused)
*/
typedef enum {
SPAPR_DR_ALLOCATION_STATE_UNUSABLE = 0,
SPAPR_DR_ALLOCATION_STATE_USABLE = 1,
SPAPR_DR_ALLOCATION_STATE_EXCHANGE = 2,
SPAPR_DR_ALLOCATION_STATE_RECOVER = 3
} sPAPRDRAllocationState;
/*
* DR-indicator (LED/visual indicator)
*
* set via set-indicator RTAS calls
* as documented by PAPR+ 2.7 13.5.3.4, Table 177,
* and PAPR+ 2.7 13.5.4.1, Table 180
*
* inactive: hotpluggable entity inactive and safely removable
* active: hotpluggable entity in use and not safely removable
* identify: (currently unused)
* action: (currently unused)
*/
typedef enum {
SPAPR_DR_INDICATOR_INACTIVE = 0,
SPAPR_DR_INDICATOR_ACTIVE = 1,
SPAPR_DR_INDICATOR_IDENTIFY = 2,
SPAPR_DR_INDICATOR_ACTION = 3,
} sPAPRDRIndicatorState;
/*
* returned via get-sensor-state RTAS calls
* as documented by PAPR+ 2.7 13.5.3.3, Table 175:
*
* empty: connector slot empty (e.g. empty hotpluggable PCI slot)
* present: connector slot populated and device available to OS
* unusable: device not currently available to OS
* exchange: (currently unused)
* recover: (currently unused)
*/
typedef enum {
SPAPR_DR_ENTITY_SENSE_EMPTY = 0,
SPAPR_DR_ENTITY_SENSE_PRESENT = 1,
SPAPR_DR_ENTITY_SENSE_UNUSABLE = 2,
SPAPR_DR_ENTITY_SENSE_EXCHANGE = 3,
SPAPR_DR_ENTITY_SENSE_RECOVER = 4,
} sPAPRDREntitySense;
typedef enum {
SPAPR_DR_CC_RESPONSE_NEXT_SIB = 1, /* currently unused */
SPAPR_DR_CC_RESPONSE_NEXT_CHILD = 2,
SPAPR_DR_CC_RESPONSE_NEXT_PROPERTY = 3,
SPAPR_DR_CC_RESPONSE_PREV_PARENT = 4,
SPAPR_DR_CC_RESPONSE_SUCCESS = 0,
SPAPR_DR_CC_RESPONSE_ERROR = -1,
SPAPR_DR_CC_RESPONSE_CONTINUE = -2,
SPAPR_DR_CC_RESPONSE_NOT_CONFIGURABLE = -9003,
} sPAPRDRCCResponse;
typedef enum {
/*
* Values come from Fig. 12 in LoPAPR section 13.4
*
* These are exposed in the migration stream, so don't change
* them.
*/
SPAPR_DRC_STATE_INVALID = 0,
SPAPR_DRC_STATE_LOGICAL_UNUSABLE = 1,
SPAPR_DRC_STATE_LOGICAL_AVAILABLE = 2,
SPAPR_DRC_STATE_LOGICAL_UNISOLATE = 3,
SPAPR_DRC_STATE_LOGICAL_CONFIGURED = 4,
SPAPR_DRC_STATE_PHYSICAL_AVAILABLE = 5,
SPAPR_DRC_STATE_PHYSICAL_POWERON = 6,
SPAPR_DRC_STATE_PHYSICAL_UNISOLATE = 7,
SPAPR_DRC_STATE_PHYSICAL_CONFIGURED = 8,
} sPAPRDRCState;
typedef struct sPAPRDRConnector {
/*< private >*/
DeviceState parent;
uint32_t id;
Object *owner;
uint32_t state;
spapr: Remove sPAPRConfigureConnectorState sub-structure Most of the time, the state of a DRC object is contained in the single 'state' variable. However, during the transition from UNISOLATE to CONFIGURED state requires multiple calls to the ibm,configure-connector RTAS call to retrieve the device tree for the attached device. We need some extra state to keep track of where we're up to in delivering the device tree information to the guest. Currently that extra state is in a sPAPRConfigureConnectorState substructure which is only allocated when we're in the middle of the configure connector process. That sounds like a good idea, but the extra state is only two integers - on many platforms that will take up the same room as the (maybe NULL) ccs pointer even before malloc() overhead. Plus it's another object whose lifetime we need to manage. In short, it's not worth it. So, fold the sPAPRConfigureConnectorState substructure directly into the DRC object. Previously the structure was allocated lazily when the configure-connector call discovers it's not there. Now, we need to initialize the subfields pre-emptively, as soon as we enter UNISOLATE state. Although it's not strictly necessary (the field values should only ever be consulted when in UNISOLATE state), we try to keep them at -1 when in other states, as a debugging aid. Signed-off-by: David Gibson <david@gibson.dropbear.id.au> Reviewed-by: Daniel Barboza <danielhb@linux.vnet.ibm.com> Tested-by: Daniel Barboza <danielhb@linux.vnet.ibm.com>
2017-06-21 12:12:14 +03:00
/* RTAS ibm,configure-connector state */
/* (only valid in UNISOLATE state) */
int ccs_offset;
int ccs_depth;
/* device pointer, via link property */
DeviceState *dev;
bool unplug_requested;
spapr: Remove sPAPRConfigureConnectorState sub-structure Most of the time, the state of a DRC object is contained in the single 'state' variable. However, during the transition from UNISOLATE to CONFIGURED state requires multiple calls to the ibm,configure-connector RTAS call to retrieve the device tree for the attached device. We need some extra state to keep track of where we're up to in delivering the device tree information to the guest. Currently that extra state is in a sPAPRConfigureConnectorState substructure which is only allocated when we're in the middle of the configure connector process. That sounds like a good idea, but the extra state is only two integers - on many platforms that will take up the same room as the (maybe NULL) ccs pointer even before malloc() overhead. Plus it's another object whose lifetime we need to manage. In short, it's not worth it. So, fold the sPAPRConfigureConnectorState substructure directly into the DRC object. Previously the structure was allocated lazily when the configure-connector call discovers it's not there. Now, we need to initialize the subfields pre-emptively, as soon as we enter UNISOLATE state. Although it's not strictly necessary (the field values should only ever be consulted when in UNISOLATE state), we try to keep them at -1 when in other states, as a debugging aid. Signed-off-by: David Gibson <david@gibson.dropbear.id.au> Reviewed-by: Daniel Barboza <danielhb@linux.vnet.ibm.com> Tested-by: Daniel Barboza <danielhb@linux.vnet.ibm.com>
2017-06-21 12:12:14 +03:00
void *fdt;
int fdt_start_offset;
} sPAPRDRConnector;
typedef struct sPAPRDRConnectorClass {
/*< private >*/
DeviceClass parent;
sPAPRDRCState empty_state;
sPAPRDRCState ready_state;
/*< public >*/
sPAPRDRConnectorTypeShift typeshift;
const char *typename; /* used in device tree, PAPR 13.5.2.6 & C.6.1 */
const char *drc_name_prefix; /* used other places in device tree */
sPAPRDREntitySense (*dr_entity_sense)(sPAPRDRConnector *drc);
uint32_t (*isolate)(sPAPRDRConnector *drc);
uint32_t (*unisolate)(sPAPRDRConnector *drc);
void (*release)(DeviceState *dev);
} sPAPRDRConnectorClass;
typedef struct sPAPRDRCPhysical {
/*< private >*/
sPAPRDRConnector parent;
/* DR-indicator */
uint32_t dr_indicator;
} sPAPRDRCPhysical;
spapr: Treat devices added before inbound migration as coldplugged When migrating a guest which has already had devices hotplugged, libvirt typically starts the destination qemu with -incoming defer, adds those hotplugged devices with qmp, then initiates the incoming migration. This causes problems for the management of spapr DRC state. Because the device is treated as hotplugged, it goes into a DRC state for a device immediately after it's plugged, but before the guest has acknowledged its presence. However, chances are the guest on the source machine *has* acknowledged the device's presence and configured it. If the source has fully configured the device, then DRC state won't be sent in the migration stream: for maximum migration compatibility with earlier versions we don't migrate DRCs in coldplug-equivalent state. That means that the DRC effectively changes state over the migrate, causing problems later on. In addition, logging hotplug events for these devices isn't what we want because a) those events should already have been issued on the source host and b) the event queue should get wiped out by the incoming state anyway. In short, what we really want is to treat devices added before an incoming migration as if they were coldplugged. To do this, we first add a spapr_drc_hotplugged() helper which determines if the device is hotplugged in the sense relevant for DRC state management. We only send hotplug events when this is true. Second, when we add a device which isn't hotplugged in this sense, we force a reset of the DRC state - this ensures the DRC is in a coldplug-equivalent state (there isn't usually a system reset between these device adds and the incoming migration). This is based on an earlier patch by Laurent Vivier, cleaned up and extended. Signed-off-by: Laurent Vivier <lvivier@redhat.com> Signed-off-by: David Gibson <david@gibson.dropbear.id.au> Reviewed-by: Greg Kurz <groug@kaod.org> Tested-by: Daniel Barboza <danielhb@linux.vnet.ibm.com>
2017-06-09 14:08:10 +03:00
static inline bool spapr_drc_hotplugged(DeviceState *dev)
{
return dev->hotplugged && !runstate_check(RUN_STATE_INMIGRATE);
}
void spapr_drc_reset(sPAPRDRConnector *drc);
uint32_t spapr_drc_index(sPAPRDRConnector *drc);
sPAPRDRConnectorType spapr_drc_type(sPAPRDRConnector *drc);
sPAPRDRConnector *spapr_dr_connector_new(Object *owner, const char *type,
uint32_t id);
sPAPRDRConnector *spapr_drc_by_index(uint32_t index);
sPAPRDRConnector *spapr_drc_by_id(const char *type, uint32_t id);
int spapr_drc_populate_dt(void *fdt, int fdt_offset, Object *owner,
uint32_t drc_type_mask);
void spapr_drc_attach(sPAPRDRConnector *drc, DeviceState *d, void *fdt,
int fdt_start_offset, Error **errp);
void spapr_drc_detach(sPAPRDRConnector *drc);
hw/ppc: CAS reset on early device hotplug This patch is a follow up on the discussions made in patch "hw/ppc: disable hotplug before CAS is completed" that can be found at [1]. At this moment, we do not support CPU/memory hotplug in early boot stages, before CAS. When a hotplug occurs, the event is logged in an internal RTAS event log queue and an IRQ pulse is fired. In regular conditions, the guest handles the interrupt by executing check_exception, fetching the generated hotplug event and enabling the device for use. In early boot, this IRQ isn't caught (SLOF does not handle hotplug events), leaving the event in the rtas event log queue. If the guest executes check_exception due to another hotplug event, the re-assertion of the IRQ ends up de-queuing the first hotplug event as well. In short, a device hotplugged before CAS is considered coldplugged by SLOF. This leads to device misbehavior and, in some cases, guest kernel Ooops when trying to unplug the device. A proper fix would be to turn every device hotplugged before CAS as a colplugged device. This is not trivial to do with the current code base though - the FDT is written in the guest memory at ppc_spapr_reset and can't be retrieved without adding extra state (fdt_size for example) that will need to managed and migrated. Adding the hotplugged DT in the middle of CAS negotiation via the updated DT tree works with CPU devs, but panics the guest kernel at boot. Additional analysis would be necessary for LMBs and PCI devices. There are questions to be made in QEMU/SLOF/kernel level about how we can make this change in a sustainable way. With Linux guests, a fix would be the kernel executing check_exception at boot time, de-queueing the events that happened in early boot and processing them. However, even if/when the newer kernels start fetching these events at boot time, we need to take care of older kernels that won't be doing that. This patch works around the situation by issuing a CAS reset if a hotplugged device is detected during CAS: - the DRC conditions that warrant a CAS reset is the same as those that triggers a DRC migration - the DRC must have a device attached and the DRC state is not equal to its ready_state. With that in mind, this patch makes use of 'spapr_drc_needed' to determine if a CAS reset is needed. - In the middle of CAS negotiations, the function 'spapr_hotplugged_dev_before_cas' goes through all the DRCs to see if there are any DRC that requires a reset, using spapr_drc_needed. If that happens, returns '1' in 'spapr_h_cas_compose_response' which will set spapr->cas_reboot to true, causing the machine to reboot. No changes are made for coldplug devices. [1] http://lists.nongnu.org/archive/html/qemu-devel/2017-08/msg02855.html Signed-off-by: Daniel Henrique Barboza <danielhb@linux.vnet.ibm.com> Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-08-30 21:21:41 +03:00
bool spapr_drc_needed(void *opaque);
static inline bool spapr_drc_unplug_requested(sPAPRDRConnector *drc)
{
return drc->unplug_requested;
}
#endif /* HW_SPAPR_DRC_H */