2016-06-29 14:47:03 +03:00
|
|
|
#ifndef HW_SPAPR_H
|
|
|
|
#define HW_SPAPR_H
|
2011-04-01 08:15:20 +04:00
|
|
|
|
2018-06-25 15:42:24 +03:00
|
|
|
#include "qemu/units.h"
|
2012-12-17 21:20:04 +04:00
|
|
|
#include "sysemu/dma.h"
|
2015-07-02 09:23:04 +03:00
|
|
|
#include "hw/boards.h"
|
2015-05-07 08:33:49 +03:00
|
|
|
#include "hw/ppc/spapr_drc.h"
|
2015-06-29 11:44:27 +03:00
|
|
|
#include "hw/mem/pc-dimm.h"
|
2016-10-25 07:47:28 +03:00
|
|
|
#include "hw/ppc/spapr_ovec.h"
|
2018-07-30 17:11:32 +03:00
|
|
|
#include "hw/ppc/spapr_irq.h"
|
2011-05-26 13:52:44 +04:00
|
|
|
|
2011-04-01 08:15:21 +04:00
|
|
|
struct VIOsPAPRBus;
|
2011-10-30 21:16:46 +04:00
|
|
|
struct sPAPRPHBState;
|
2012-11-12 20:46:57 +04:00
|
|
|
struct sPAPRNVRAM;
|
2015-05-07 08:33:49 +03:00
|
|
|
typedef struct sPAPREventLogEntry sPAPREventLogEntry;
|
2016-10-27 05:20:26 +03:00
|
|
|
typedef struct sPAPREventSource sPAPREventSource;
|
pseries: Implement HPT resizing
This patch implements hypercalls allowing a PAPR guest to resize its own
hash page table. This will eventually allow for more flexible memory
hotplug.
The implementation is partially asynchronous, handled in a special thread
running the hpt_prepare_thread() function. The state of a pending resize
is stored in SPAPR_MACHINE->pending_hpt.
The H_RESIZE_HPT_PREPARE hypercall will kick off creation of a new HPT, or,
if one is already in progress, monitor it for completion. If there is an
existing HPT resize in progress that doesn't match the size specified in
the call, it will cancel it, replacing it with a new one matching the
given size.
The H_RESIZE_HPT_COMMIT completes transition to a resized HPT, and can only
be called successfully once H_RESIZE_HPT_PREPARE has successfully
completed initialization of a new HPT. The guest must ensure that there
are no concurrent accesses to the existing HPT while this is called (this
effectively means stop_machine() for Linux guests).
For now H_RESIZE_HPT_COMMIT goes through the whole old HPT, rehashing each
HPTE into the new HPT. This can have quite high latency, but it seems to
be of the order of typical migration downtime latencies for HPTs of size
up to ~2GiB (which would be used in a 256GiB guest).
In future we probably want to move more of the rehashing to the "prepare"
phase, by having H_ENTER and other hcalls update both current and
pending HPTs. That's a project for another day, but should be possible
without any changes to the guest interface.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-05-12 08:46:49 +03:00
|
|
|
typedef struct sPAPRPendingHPT sPAPRPendingHPT;
|
2018-07-30 17:11:33 +03:00
|
|
|
typedef struct ICSState ICSState;
|
2011-04-01 08:15:21 +04:00
|
|
|
|
2013-07-18 23:33:01 +04:00
|
|
|
#define HPTE64_V_HPTE_DIRTY 0x0000000000000040ULL
|
2015-07-02 09:23:06 +03:00
|
|
|
#define SPAPR_ENTRY_POINT 0x100
|
2013-07-18 23:33:01 +04:00
|
|
|
|
2016-06-10 03:59:02 +03:00
|
|
|
#define SPAPR_TIMEBASE_FREQ 512000000ULL
|
|
|
|
|
2017-03-07 12:23:40 +03:00
|
|
|
#define TYPE_SPAPR_RTC "spapr-rtc"
|
|
|
|
|
|
|
|
#define SPAPR_RTC(obj) \
|
|
|
|
OBJECT_CHECK(sPAPRRTCState, (obj), TYPE_SPAPR_RTC)
|
|
|
|
|
|
|
|
typedef struct sPAPRRTCState sPAPRRTCState;
|
|
|
|
struct sPAPRRTCState {
|
|
|
|
/*< private >*/
|
|
|
|
DeviceState parent_obj;
|
|
|
|
int64_t ns_offset;
|
|
|
|
};
|
|
|
|
|
2017-05-24 10:01:48 +03:00
|
|
|
typedef struct sPAPRDIMMState sPAPRDIMMState;
|
2015-07-02 09:23:07 +03:00
|
|
|
typedef struct sPAPRMachineClass sPAPRMachineClass;
|
2015-07-02 09:23:04 +03:00
|
|
|
|
|
|
|
#define TYPE_SPAPR_MACHINE "spapr-machine"
|
|
|
|
#define SPAPR_MACHINE(obj) \
|
|
|
|
OBJECT_CHECK(sPAPRMachineState, (obj), TYPE_SPAPR_MACHINE)
|
2015-07-02 09:23:07 +03:00
|
|
|
#define SPAPR_MACHINE_GET_CLASS(obj) \
|
|
|
|
OBJECT_GET_CLASS(sPAPRMachineClass, obj, TYPE_SPAPR_MACHINE)
|
|
|
|
#define SPAPR_MACHINE_CLASS(klass) \
|
|
|
|
OBJECT_CLASS_CHECK(sPAPRMachineClass, klass, TYPE_SPAPR_MACHINE)
|
|
|
|
|
2017-05-12 08:46:11 +03:00
|
|
|
typedef enum {
|
|
|
|
SPAPR_RESIZE_HPT_DEFAULT = 0,
|
|
|
|
SPAPR_RESIZE_HPT_DISABLED,
|
|
|
|
SPAPR_RESIZE_HPT_ENABLED,
|
|
|
|
SPAPR_RESIZE_HPT_REQUIRED,
|
|
|
|
} sPAPRResizeHPT;
|
|
|
|
|
spapr: Capabilities infrastructure
Because PAPR is a paravirtual environment access to certain CPU (or other)
facilities can be blocked by the hypervisor. PAPR provides ways to
advertise in the device tree whether or not those features are available to
the guest.
In some places we automatically determine whether to make a feature
available based on whether our host can support it, in most cases this is
based on limitations in the available KVM implementation.
Although we correctly advertise this to the guest, it means that host
factors might make changes to the guest visible environment which is bad:
as well as generaly reducing reproducibility, it means that a migration
between different host environments can easily go bad.
We've mostly gotten away with it because the environments considered mature
enough to be well supported (basically, KVM on POWER8) have had consistent
feature availability. But, it's still not right and some limitations on
POWER9 is going to make it more of an issue in future.
This introduces an infrastructure for defining "sPAPR capabilities". These
are set by default based on the machine version, masked by the capabilities
of the chosen cpu, but can be overriden with machine properties.
The intention is at reset time we verify that the requested capabilities
can be supported on the host (considering TCG, KVM and/or host cpu
limitations). If not we simply fail, rather than silently modifying the
advertised featureset to the guest.
This does mean that certain configurations that "worked" may now fail, but
such configurations were already more subtly broken.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Greg Kurz <groug@kaod.org>
2017-12-08 02:35:35 +03:00
|
|
|
/**
|
|
|
|
* Capabilities
|
|
|
|
*/
|
|
|
|
|
2017-12-11 05:10:44 +03:00
|
|
|
/* Hardware Transactional Memory */
|
2018-01-12 08:33:43 +03:00
|
|
|
#define SPAPR_CAP_HTM 0x00
|
2017-12-07 09:08:47 +03:00
|
|
|
/* Vector Scalar Extensions */
|
2018-01-12 08:33:43 +03:00
|
|
|
#define SPAPR_CAP_VSX 0x01
|
2017-12-11 09:34:30 +03:00
|
|
|
/* Decimal Floating Point */
|
2018-01-12 08:33:43 +03:00
|
|
|
#define SPAPR_CAP_DFP 0x02
|
2018-01-19 08:00:02 +03:00
|
|
|
/* Cache Flush on Privilege Change */
|
|
|
|
#define SPAPR_CAP_CFPC 0x03
|
2018-01-19 08:00:03 +03:00
|
|
|
/* Speculation Barrier Bounds Checking */
|
|
|
|
#define SPAPR_CAP_SBBC 0x04
|
2018-01-19 08:00:04 +03:00
|
|
|
/* Indirect Branch Serialisation */
|
|
|
|
#define SPAPR_CAP_IBS 0x05
|
2018-03-16 11:19:13 +03:00
|
|
|
/* HPT Maximum Page Size (encoded as a shift) */
|
|
|
|
#define SPAPR_CAP_HPT_MAXPAGESIZE 0x06
|
2018-10-08 06:25:39 +03:00
|
|
|
/* Nested KVM-HV */
|
|
|
|
#define SPAPR_CAP_NESTED_KVM_HV 0x07
|
2018-01-12 08:33:43 +03:00
|
|
|
/* Num Caps */
|
2018-10-08 06:25:39 +03:00
|
|
|
#define SPAPR_CAP_NUM (SPAPR_CAP_NESTED_KVM_HV + 1)
|
2018-01-12 08:33:43 +03:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Capability Values
|
|
|
|
*/
|
|
|
|
/* Bool Caps */
|
|
|
|
#define SPAPR_CAP_OFF 0x00
|
|
|
|
#define SPAPR_CAP_ON 0x01
|
2018-03-01 09:38:02 +03:00
|
|
|
/* Custom Caps */
|
2018-01-19 08:00:01 +03:00
|
|
|
#define SPAPR_CAP_BROKEN 0x00
|
|
|
|
#define SPAPR_CAP_WORKAROUND 0x01
|
|
|
|
#define SPAPR_CAP_FIXED 0x02
|
2018-03-01 09:38:02 +03:00
|
|
|
#define SPAPR_CAP_FIXED_IBS 0x02
|
|
|
|
#define SPAPR_CAP_FIXED_CCD 0x03
|
2017-12-11 09:34:30 +03:00
|
|
|
|
spapr: Capabilities infrastructure
Because PAPR is a paravirtual environment access to certain CPU (or other)
facilities can be blocked by the hypervisor. PAPR provides ways to
advertise in the device tree whether or not those features are available to
the guest.
In some places we automatically determine whether to make a feature
available based on whether our host can support it, in most cases this is
based on limitations in the available KVM implementation.
Although we correctly advertise this to the guest, it means that host
factors might make changes to the guest visible environment which is bad:
as well as generaly reducing reproducibility, it means that a migration
between different host environments can easily go bad.
We've mostly gotten away with it because the environments considered mature
enough to be well supported (basically, KVM on POWER8) have had consistent
feature availability. But, it's still not right and some limitations on
POWER9 is going to make it more of an issue in future.
This introduces an infrastructure for defining "sPAPR capabilities". These
are set by default based on the machine version, masked by the capabilities
of the chosen cpu, but can be overriden with machine properties.
The intention is at reset time we verify that the requested capabilities
can be supported on the host (considering TCG, KVM and/or host cpu
limitations). If not we simply fail, rather than silently modifying the
advertised featureset to the guest.
This does mean that certain configurations that "worked" may now fail, but
such configurations were already more subtly broken.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Greg Kurz <groug@kaod.org>
2017-12-08 02:35:35 +03:00
|
|
|
typedef struct sPAPRCapabilities sPAPRCapabilities;
|
|
|
|
struct sPAPRCapabilities {
|
2018-01-12 08:33:43 +03:00
|
|
|
uint8_t caps[SPAPR_CAP_NUM];
|
spapr: Capabilities infrastructure
Because PAPR is a paravirtual environment access to certain CPU (or other)
facilities can be blocked by the hypervisor. PAPR provides ways to
advertise in the device tree whether or not those features are available to
the guest.
In some places we automatically determine whether to make a feature
available based on whether our host can support it, in most cases this is
based on limitations in the available KVM implementation.
Although we correctly advertise this to the guest, it means that host
factors might make changes to the guest visible environment which is bad:
as well as generaly reducing reproducibility, it means that a migration
between different host environments can easily go bad.
We've mostly gotten away with it because the environments considered mature
enough to be well supported (basically, KVM on POWER8) have had consistent
feature availability. But, it's still not right and some limitations on
POWER9 is going to make it more of an issue in future.
This introduces an infrastructure for defining "sPAPR capabilities". These
are set by default based on the machine version, masked by the capabilities
of the chosen cpu, but can be overriden with machine properties.
The intention is at reset time we verify that the requested capabilities
can be supported on the host (considering TCG, KVM and/or host cpu
limitations). If not we simply fail, rather than silently modifying the
advertised featureset to the guest.
This does mean that certain configurations that "worked" may now fail, but
such configurations were already more subtly broken.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Greg Kurz <groug@kaod.org>
2017-12-08 02:35:35 +03:00
|
|
|
};
|
|
|
|
|
2015-07-02 09:23:07 +03:00
|
|
|
/**
|
|
|
|
* sPAPRMachineClass:
|
|
|
|
*/
|
|
|
|
struct sPAPRMachineClass {
|
|
|
|
/*< private >*/
|
|
|
|
MachineClass parent_class;
|
|
|
|
|
|
|
|
/*< public >*/
|
2015-12-09 15:34:13 +03:00
|
|
|
bool dr_lmb_enabled; /* enable dynamic-reconfig/hotplug of LMBs */
|
|
|
|
bool use_ohci_by_default; /* use USB-OHCI instead of XHCI */
|
2017-06-14 16:29:19 +03:00
|
|
|
bool pre_2_10_has_unused_icps;
|
2018-07-30 17:11:32 +03:00
|
|
|
bool legacy_irq_allocation;
|
|
|
|
|
spapr_pci: Delegate placement of PCI host bridges to machine type
The 'spapr-pci-host-bridge' represents the virtual PCI host bridge (PHB)
for a PAPR guest. Unlike on x86, it's routine on Power (both bare metal
and PAPR guests) to have numerous independent PHBs, each controlling a
separate PCI domain.
There are two ways of configuring the spapr-pci-host-bridge device: first
it can be done fully manually, specifying the locations and sizes of all
the IO windows. This gives the most control, but is very awkward with 6
mandatory parameters. Alternatively just an "index" can be specified
which essentially selects from an array of predefined PHB locations.
The PHB at index 0 is automatically created as the default PHB.
The current set of default locations causes some problems for guests with
large RAM (> 1 TiB) or PCI devices with very large BARs (e.g. big nVidia
GPGPU cards via VFIO). Obviously, for migration we can only change the
locations on a new machine type, however.
This is awkward, because the placement is currently decided within the
spapr-pci-host-bridge code, so it breaks abstraction to look inside the
machine type version.
So, this patch delegates the "default mode" PHB placement from the
spapr-pci-host-bridge device back to the machine type via a public method
in sPAPRMachineClass. It's still a bit ugly, but it's about the best we
can do.
For now, this just changes where the calculation is done. It doesn't
change the actual location of the host bridges, or any other behaviour.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
2016-10-13 02:26:09 +03:00
|
|
|
void (*phb_placement)(sPAPRMachineState *spapr, uint32_t index,
|
spapr_pci: Add a 64-bit MMIO window
On real hardware, and under pHyp, the PCI host bridges on Power machines
typically advertise two outbound MMIO windows from the guest's physical
memory space to PCI memory space:
- A 32-bit window which maps onto 2GiB..4GiB in the PCI address space
- A 64-bit window which maps onto a large region somewhere high in PCI
address space (traditionally this used an identity mapping from guest
physical address to PCI address, but that's not always the case)
The qemu implementation in spapr-pci-host-bridge, however, only supports a
single outbound MMIO window, however. At least some Linux versions expect
the two windows however, so we arranged this window to map onto the PCI
memory space from 2 GiB..~64 GiB, then advertised it as two contiguous
windows, the "32-bit" window from 2G..4G and the "64-bit" window from
4G..~64G.
This approach means, however, that the 64G window is not naturally aligned.
In turn this limits the size of the largest BAR we can map (which does have
to be naturally aligned) to roughly half of the total window. With some
large nVidia GPGPU cards which have huge memory BARs, this is starting to
be a problem.
This patch adds true support for separate 32-bit and 64-bit outbound MMIO
windows to the spapr-pci-host-bridge implementation, each of which can
be independently configured. The 32-bit window always maps to 2G.. in PCI
space, but the PCI address of the 64-bit window can be configured (it
defaults to the same as the guest physical address).
So as not to break possible existing configurations, as long as a 64-bit
window is not specified, a large single window can be specified. This
will appear the same way to the guest as the old approach, although it's
now implemented by two contiguous memory regions rather than a single one.
For now, this only adds the possibility of 64-bit windows. The default
configuration still uses the legacy mode.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
2016-10-11 06:23:33 +03:00
|
|
|
uint64_t *buid, hwaddr *pio,
|
|
|
|
hwaddr *mmio32, hwaddr *mmio64,
|
spapr_pci: Delegate placement of PCI host bridges to machine type
The 'spapr-pci-host-bridge' represents the virtual PCI host bridge (PHB)
for a PAPR guest. Unlike on x86, it's routine on Power (both bare metal
and PAPR guests) to have numerous independent PHBs, each controlling a
separate PCI domain.
There are two ways of configuring the spapr-pci-host-bridge device: first
it can be done fully manually, specifying the locations and sizes of all
the IO windows. This gives the most control, but is very awkward with 6
mandatory parameters. Alternatively just an "index" can be specified
which essentially selects from an array of predefined PHB locations.
The PHB at index 0 is automatically created as the default PHB.
The current set of default locations causes some problems for guests with
large RAM (> 1 TiB) or PCI devices with very large BARs (e.g. big nVidia
GPGPU cards via VFIO). Obviously, for migration we can only change the
locations on a new machine type, however.
This is awkward, because the placement is currently decided within the
spapr-pci-host-bridge code, so it breaks abstraction to look inside the
machine type version.
So, this patch delegates the "default mode" PHB placement from the
spapr-pci-host-bridge device back to the machine type via a public method
in sPAPRMachineClass. It's still a bit ugly, but it's about the best we
can do.
For now, this just changes where the calculation is done. It doesn't
change the actual location of the host bridges, or any other behaviour.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
2016-10-13 02:26:09 +03:00
|
|
|
unsigned n_dma, uint32_t *liobns, Error **errp);
|
2017-05-12 08:46:11 +03:00
|
|
|
sPAPRResizeHPT resize_hpt_default;
|
spapr: Capabilities infrastructure
Because PAPR is a paravirtual environment access to certain CPU (or other)
facilities can be blocked by the hypervisor. PAPR provides ways to
advertise in the device tree whether or not those features are available to
the guest.
In some places we automatically determine whether to make a feature
available based on whether our host can support it, in most cases this is
based on limitations in the available KVM implementation.
Although we correctly advertise this to the guest, it means that host
factors might make changes to the guest visible environment which is bad:
as well as generaly reducing reproducibility, it means that a migration
between different host environments can easily go bad.
We've mostly gotten away with it because the environments considered mature
enough to be well supported (basically, KVM on POWER8) have had consistent
feature availability. But, it's still not right and some limitations on
POWER9 is going to make it more of an issue in future.
This introduces an infrastructure for defining "sPAPR capabilities". These
are set by default based on the machine version, masked by the capabilities
of the chosen cpu, but can be overriden with machine properties.
The intention is at reset time we verify that the requested capabilities
can be supported on the host (considering TCG, KVM and/or host cpu
limitations). If not we simply fail, rather than silently modifying the
advertised featureset to the guest.
This does mean that certain configurations that "worked" may now fail, but
such configurations were already more subtly broken.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Greg Kurz <groug@kaod.org>
2017-12-08 02:35:35 +03:00
|
|
|
sPAPRCapabilities default_caps;
|
2018-07-30 17:11:33 +03:00
|
|
|
sPAPRIrq *irq;
|
2015-07-02 09:23:07 +03:00
|
|
|
};
|
2015-07-02 09:23:04 +03:00
|
|
|
|
|
|
|
/**
|
|
|
|
* sPAPRMachineState:
|
|
|
|
*/
|
|
|
|
struct sPAPRMachineState {
|
|
|
|
/*< private >*/
|
|
|
|
MachineState parent_obj;
|
|
|
|
|
2011-04-01 08:15:21 +04:00
|
|
|
struct VIOsPAPRBus *vio_bus;
|
2011-10-30 21:16:46 +04:00
|
|
|
QLIST_HEAD(, sPAPRPHBState) phbs;
|
2012-11-12 20:46:57 +04:00
|
|
|
struct sPAPRNVRAM *nvram;
|
2017-02-27 17:29:12 +03:00
|
|
|
ICSState *ics;
|
2017-03-07 12:23:40 +03:00
|
|
|
sPAPRRTCState rtc;
|
Delay creation of pseries device tree until reset
At present, the 'pseries' machine creates a flattened device tree in the
machine->init function to pass to either the guest kernel or to firmware.
However, the machine->init function runs before processing of -device
command line options, which means that the device tree so created will
be (incorrectly) missing devices specified that way.
Supplying a correct device tree is, in any case, part of the required
platform entry conditions. Therefore, this patch moves the creation and
loading of the device tree from machine->init to a reset callback. The
setup of entry point address and initial register state moves with it,
which leads to a slight cleanup.
This is not, alas, quite enough to make a fully working reset for pseries.
For that we would need to reload the firmware images, which on this
machine are loaded into RAM. It's a step in the right direction, though.
Signed-off-by: David Gibson <dwg@au1.ibm.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-04-05 09:12:10 +04:00
|
|
|
|
2017-05-12 08:46:11 +03:00
|
|
|
sPAPRResizeHPT resize_hpt;
|
Delay creation of pseries device tree until reset
At present, the 'pseries' machine creates a flattened device tree in the
machine->init function to pass to either the guest kernel or to firmware.
However, the machine->init function runs before processing of -device
command line options, which means that the device tree so created will
be (incorrectly) missing devices specified that way.
Supplying a correct device tree is, in any case, part of the required
platform entry conditions. Therefore, this patch moves the creation and
loading of the device tree from machine->init to a reset callback. The
setup of entry point address and initial register state moves with it,
which leads to a slight cleanup.
This is not, alas, quite enough to make a fully working reset for pseries.
For that we would need to reload the firmware images, which on this
machine are loaded into RAM. It's a step in the right direction, though.
Signed-off-by: David Gibson <dwg@au1.ibm.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-04-05 09:12:10 +04:00
|
|
|
void *htab;
|
2013-07-18 23:33:01 +04:00
|
|
|
uint32_t htab_shift;
|
2017-03-01 09:54:36 +03:00
|
|
|
uint64_t patb_entry; /* Process tbl registed in H_REGISTER_PROCESS_TABLE */
|
pseries: Implement HPT resizing
This patch implements hypercalls allowing a PAPR guest to resize its own
hash page table. This will eventually allow for more flexible memory
hotplug.
The implementation is partially asynchronous, handled in a special thread
running the hpt_prepare_thread() function. The state of a pending resize
is stored in SPAPR_MACHINE->pending_hpt.
The H_RESIZE_HPT_PREPARE hypercall will kick off creation of a new HPT, or,
if one is already in progress, monitor it for completion. If there is an
existing HPT resize in progress that doesn't match the size specified in
the call, it will cancel it, replacing it with a new one matching the
given size.
The H_RESIZE_HPT_COMMIT completes transition to a resized HPT, and can only
be called successfully once H_RESIZE_HPT_PREPARE has successfully
completed initialization of a new HPT. The guest must ensure that there
are no concurrent accesses to the existing HPT while this is called (this
effectively means stop_machine() for Linux guests).
For now H_RESIZE_HPT_COMMIT goes through the whole old HPT, rehashing each
HPTE into the new HPT. This can have quite high latency, but it seems to
be of the order of typical migration downtime latencies for HPTs of size
up to ~2GiB (which would be used in a 256GiB guest).
In future we probably want to move more of the rehashing to the "prepare"
phase, by having H_ENTER and other hcalls update both current and
pending HPTs. That's a project for another day, but should be possible
without any changes to the guest interface.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-05-12 08:46:49 +03:00
|
|
|
sPAPRPendingHPT *pending_hpt; /* in-progress resize */
|
|
|
|
|
2012-10-23 14:30:10 +04:00
|
|
|
hwaddr rma_size;
|
2012-09-12 20:57:12 +04:00
|
|
|
int vrma_adjust;
|
spapr: Locate RTAS and device-tree based on real RMA
We currently calculate the final RTAS and FDT location based on
the early estimate of the RMA size, cropped to 256M on KVM since
we only know the real RMA size at reset time which happens much
later in the boot process.
This means the FDT and RTAS end up right below 256M while they
could be much higher, using precious RMA space and limiting
what the OS bootloader can put there which has proved to be
a problem with some OSes (such as when using very large initrd's)
Fortunately, we do the actual copy of the device-tree into guest
memory much later, during reset, late enough to be able to do it
using the final RMA value, we just need to move the calculation
to the right place.
However, RTAS is still loaded too early, so we change the code to
load the tiny blob into qemu memory early on, and then copy it into
guest memory at reset time. It's small enough that the memory usage
doesn't matter.
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
[aik: fixed errors from checkpatch.pl, defined RTAS_MAX_ADDR]
Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
[agraf: fix compilation on 32bit hosts]
Signed-off-by: Alexander Graf <agraf@suse.de>
2014-07-21 07:02:04 +04:00
|
|
|
ssize_t rtas_size;
|
|
|
|
void *rtas_blob;
|
2016-10-20 07:31:45 +03:00
|
|
|
long kernel_size;
|
|
|
|
bool kernel_le;
|
|
|
|
uint32_t initrd_base;
|
|
|
|
long initrd_size;
|
2015-02-06 06:55:52 +03:00
|
|
|
uint64_t rtc_offset; /* Now used only during incoming migration */
|
2014-05-01 14:37:09 +04:00
|
|
|
struct PPCTimebase tb;
|
2012-08-14 15:22:13 +04:00
|
|
|
bool has_graphics;
|
2017-08-18 08:50:22 +03:00
|
|
|
uint32_t vsmt; /* Virtual SMT mode (KVM's "core stride") */
|
2012-10-08 22:17:39 +04:00
|
|
|
|
|
|
|
Notifier epow_notifier;
|
2015-05-07 08:33:49 +03:00
|
|
|
QTAILQ_HEAD(, sPAPREventLogEntry) pending_events;
|
2016-10-27 05:20:26 +03:00
|
|
|
bool use_hotplug_event_source;
|
|
|
|
sPAPREventSource *event_sources;
|
2013-07-18 23:33:01 +04:00
|
|
|
|
2017-06-11 15:33:59 +03:00
|
|
|
/* ibm,client-architecture-support option negotiation */
|
|
|
|
bool cas_reboot;
|
|
|
|
bool cas_legacy_guest_workaround;
|
|
|
|
sPAPROptionVector *ov5; /* QEMU-supported option vectors */
|
|
|
|
sPAPROptionVector *ov5_cas; /* negotiated (via CAS) option vectors */
|
|
|
|
uint32_t max_compat_pvr;
|
|
|
|
|
2013-07-18 23:33:01 +04:00
|
|
|
/* Migration state */
|
|
|
|
int htab_save_index;
|
|
|
|
bool htab_first_pass;
|
2013-07-18 23:33:03 +04:00
|
|
|
int htab_fd;
|
2015-05-07 08:33:48 +03:00
|
|
|
|
2017-05-24 10:01:48 +03:00
|
|
|
/* Pending DIMM unplug cache. It is populated when a LMB
|
|
|
|
* unplug starts. It can be regenerated if a migration
|
|
|
|
* occurs during the unplug process. */
|
|
|
|
QTAILQ_HEAD(, sPAPRDIMMState) pending_dimm_unplugs;
|
|
|
|
|
2015-07-02 09:23:04 +03:00
|
|
|
/*< public >*/
|
|
|
|
char *kvm_type;
|
2017-02-27 17:29:28 +03:00
|
|
|
|
2017-04-03 10:45:58 +03:00
|
|
|
const char *icp_type;
|
2018-07-30 17:11:32 +03:00
|
|
|
int32_t irq_map_nr;
|
|
|
|
unsigned long *irq_map;
|
spapr: Capabilities infrastructure
Because PAPR is a paravirtual environment access to certain CPU (or other)
facilities can be blocked by the hypervisor. PAPR provides ways to
advertise in the device tree whether or not those features are available to
the guest.
In some places we automatically determine whether to make a feature
available based on whether our host can support it, in most cases this is
based on limitations in the available KVM implementation.
Although we correctly advertise this to the guest, it means that host
factors might make changes to the guest visible environment which is bad:
as well as generaly reducing reproducibility, it means that a migration
between different host environments can easily go bad.
We've mostly gotten away with it because the environments considered mature
enough to be well supported (basically, KVM on POWER8) have had consistent
feature availability. But, it's still not right and some limitations on
POWER9 is going to make it more of an issue in future.
This introduces an infrastructure for defining "sPAPR capabilities". These
are set by default based on the machine version, masked by the capabilities
of the chosen cpu, but can be overriden with machine properties.
The intention is at reset time we verify that the requested capabilities
can be supported on the host (considering TCG, KVM and/or host cpu
limitations). If not we simply fail, rather than silently modifying the
advertised featureset to the guest.
This does mean that certain configurations that "worked" may now fail, but
such configurations were already more subtly broken.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Greg Kurz <groug@kaod.org>
2017-12-08 02:35:35 +03:00
|
|
|
|
2018-01-12 08:33:43 +03:00
|
|
|
bool cmd_line_caps[SPAPR_CAP_NUM];
|
|
|
|
sPAPRCapabilities def, eff, mig;
|
2015-07-02 09:23:04 +03:00
|
|
|
};
|
2011-04-01 08:15:20 +04:00
|
|
|
|
|
|
|
#define H_SUCCESS 0
|
|
|
|
#define H_BUSY 1 /* Hardware busy -- retry later */
|
|
|
|
#define H_CLOSED 2 /* Resource closed */
|
|
|
|
#define H_NOT_AVAILABLE 3
|
|
|
|
#define H_CONSTRAINED 4 /* Resource request constrained to max allowed */
|
|
|
|
#define H_PARTIAL 5
|
|
|
|
#define H_IN_PROGRESS 14 /* Kind of like busy */
|
|
|
|
#define H_PAGE_REGISTERED 15
|
|
|
|
#define H_PARTIAL_STORE 16
|
|
|
|
#define H_PENDING 17 /* returned from H_POLL_PENDING */
|
|
|
|
#define H_CONTINUE 18 /* Returned from H_Join on success */
|
|
|
|
#define H_LONG_BUSY_START_RANGE 9900 /* Start of long busy range */
|
|
|
|
#define H_LONG_BUSY_ORDER_1_MSEC 9900 /* Long busy, hint that 1msec \
|
|
|
|
is a good time to retry */
|
|
|
|
#define H_LONG_BUSY_ORDER_10_MSEC 9901 /* Long busy, hint that 10msec \
|
|
|
|
is a good time to retry */
|
|
|
|
#define H_LONG_BUSY_ORDER_100_MSEC 9902 /* Long busy, hint that 100msec \
|
|
|
|
is a good time to retry */
|
|
|
|
#define H_LONG_BUSY_ORDER_1_SEC 9903 /* Long busy, hint that 1sec \
|
|
|
|
is a good time to retry */
|
|
|
|
#define H_LONG_BUSY_ORDER_10_SEC 9904 /* Long busy, hint that 10sec \
|
|
|
|
is a good time to retry */
|
|
|
|
#define H_LONG_BUSY_ORDER_100_SEC 9905 /* Long busy, hint that 100sec \
|
|
|
|
is a good time to retry */
|
|
|
|
#define H_LONG_BUSY_END_RANGE 9905 /* End of long busy range */
|
|
|
|
#define H_HARDWARE -1 /* Hardware error */
|
|
|
|
#define H_FUNCTION -2 /* Function not supported */
|
|
|
|
#define H_PRIVILEGE -3 /* Caller not privileged */
|
|
|
|
#define H_PARAMETER -4 /* Parameter invalid, out-of-range or conflicting */
|
|
|
|
#define H_BAD_MODE -5 /* Illegal msr value */
|
|
|
|
#define H_PTEG_FULL -6 /* PTEG is full */
|
|
|
|
#define H_NOT_FOUND -7 /* PTE was not found" */
|
|
|
|
#define H_RESERVED_DABR -8 /* DABR address is reserved by the hypervisor on this processor" */
|
|
|
|
#define H_NO_MEM -9
|
|
|
|
#define H_AUTHORITY -10
|
|
|
|
#define H_PERMISSION -11
|
|
|
|
#define H_DROPPED -12
|
|
|
|
#define H_SOURCE_PARM -13
|
|
|
|
#define H_DEST_PARM -14
|
|
|
|
#define H_REMOTE_PARM -15
|
|
|
|
#define H_RESOURCE -16
|
|
|
|
#define H_ADAPTER_PARM -17
|
|
|
|
#define H_RH_PARM -18
|
|
|
|
#define H_RCQ_PARM -19
|
|
|
|
#define H_SCQ_PARM -20
|
|
|
|
#define H_EQ_PARM -21
|
|
|
|
#define H_RT_PARM -22
|
|
|
|
#define H_ST_PARM -23
|
|
|
|
#define H_SIGT_PARM -24
|
|
|
|
#define H_TOKEN_PARM -25
|
|
|
|
#define H_MLENGTH_PARM -27
|
|
|
|
#define H_MEM_PARM -28
|
|
|
|
#define H_MEM_ACCESS_PARM -29
|
|
|
|
#define H_ATTR_PARM -30
|
|
|
|
#define H_PORT_PARM -31
|
|
|
|
#define H_MCG_PARM -32
|
|
|
|
#define H_VL_PARM -33
|
|
|
|
#define H_TSIZE_PARM -34
|
|
|
|
#define H_TRACE_PARM -35
|
|
|
|
|
|
|
|
#define H_MASK_PARM -37
|
|
|
|
#define H_MCG_FULL -38
|
|
|
|
#define H_ALIAS_EXIST -39
|
|
|
|
#define H_P_COUNTER -40
|
|
|
|
#define H_TABLE_FULL -41
|
|
|
|
#define H_ALT_TABLE -42
|
|
|
|
#define H_MR_CONDITION -43
|
|
|
|
#define H_NOT_ENOUGH_RESOURCES -44
|
|
|
|
#define H_R_STATE -45
|
|
|
|
#define H_RESCINDEND -46
|
2013-08-19 15:04:20 +04:00
|
|
|
#define H_P2 -55
|
|
|
|
#define H_P3 -56
|
|
|
|
#define H_P4 -57
|
|
|
|
#define H_P5 -58
|
|
|
|
#define H_P6 -59
|
|
|
|
#define H_P7 -60
|
|
|
|
#define H_P8 -61
|
|
|
|
#define H_P9 -62
|
|
|
|
#define H_UNSUPPORTED_FLAG -256
|
2011-04-01 08:15:20 +04:00
|
|
|
#define H_MULTI_THREADS_ACTIVE -9005
|
|
|
|
|
|
|
|
|
|
|
|
/* Long Busy is a condition that can be returned by the firmware
|
|
|
|
* when a call cannot be completed now, but the identical call
|
|
|
|
* should be retried later. This prevents calls blocking in the
|
|
|
|
* firmware for long periods of time. Annoyingly the firmware can return
|
|
|
|
* a range of return codes, hinting at how long we should wait before
|
|
|
|
* retrying. If you don't care for the hint, the macro below is a good
|
|
|
|
* way to check for the long_busy return codes
|
|
|
|
*/
|
|
|
|
#define H_IS_LONG_BUSY(x) ((x >= H_LONG_BUSY_START_RANGE) \
|
|
|
|
&& (x <= H_LONG_BUSY_END_RANGE))
|
|
|
|
|
|
|
|
/* Flags */
|
|
|
|
#define H_LARGE_PAGE (1ULL<<(63-16))
|
|
|
|
#define H_EXACT (1ULL<<(63-24)) /* Use exact PTE or return H_PTEG_FULL */
|
|
|
|
#define H_R_XLATE (1ULL<<(63-25)) /* include a valid logical page num in the pte if the valid bit is set */
|
|
|
|
#define H_READ_4 (1ULL<<(63-26)) /* Return 4 PTEs */
|
|
|
|
#define H_PAGE_STATE_CHANGE (1ULL<<(63-28))
|
|
|
|
#define H_PAGE_UNUSED ((1ULL<<(63-29)) | (1ULL<<(63-30)))
|
|
|
|
#define H_PAGE_SET_UNUSED (H_PAGE_STATE_CHANGE | H_PAGE_UNUSED)
|
|
|
|
#define H_PAGE_SET_LOANED (H_PAGE_SET_UNUSED | (1ULL<<(63-31)))
|
|
|
|
#define H_PAGE_SET_ACTIVE H_PAGE_STATE_CHANGE
|
|
|
|
#define H_AVPN (1ULL<<(63-32)) /* An avpn is provided as a sanity test */
|
|
|
|
#define H_ANDCOND (1ULL<<(63-33))
|
|
|
|
#define H_ICACHE_INVALIDATE (1ULL<<(63-40)) /* icbi, etc. (ignored for IO pages) */
|
|
|
|
#define H_ICACHE_SYNCHRONIZE (1ULL<<(63-41)) /* dcbst, icbi, etc (ignored for IO pages */
|
|
|
|
#define H_ZERO_PAGE (1ULL<<(63-48)) /* zero the page before mapping (ignored for IO pages) */
|
|
|
|
#define H_COPY_PAGE (1ULL<<(63-49))
|
|
|
|
#define H_N (1ULL<<(63-61))
|
|
|
|
#define H_PP1 (1ULL<<(63-62))
|
|
|
|
#define H_PP2 (1ULL<<(63-63))
|
|
|
|
|
2014-03-07 08:37:40 +04:00
|
|
|
/* Values for 2nd argument to H_SET_MODE */
|
|
|
|
#define H_SET_MODE_RESOURCE_SET_CIABR 1
|
|
|
|
#define H_SET_MODE_RESOURCE_SET_DAWR 2
|
|
|
|
#define H_SET_MODE_RESOURCE_ADDR_TRANS_MODE 3
|
|
|
|
#define H_SET_MODE_RESOURCE_LE 4
|
|
|
|
|
|
|
|
/* Flags for H_SET_MODE_RESOURCE_LE */
|
2013-08-19 15:04:20 +04:00
|
|
|
#define H_SET_MODE_ENDIAN_BIG 0
|
|
|
|
#define H_SET_MODE_ENDIAN_LITTLE 1
|
|
|
|
|
2011-04-01 08:15:20 +04:00
|
|
|
/* VASI States */
|
|
|
|
#define H_VASI_INVALID 0
|
|
|
|
#define H_VASI_ENABLED 1
|
|
|
|
#define H_VASI_ABORTED 2
|
|
|
|
#define H_VASI_SUSPENDING 3
|
|
|
|
#define H_VASI_SUSPENDED 4
|
|
|
|
#define H_VASI_RESUMED 5
|
|
|
|
#define H_VASI_COMPLETED 6
|
|
|
|
|
|
|
|
/* DABRX flags */
|
|
|
|
#define H_DABRX_HYPERVISOR (1ULL<<(63-61))
|
|
|
|
#define H_DABRX_KERNEL (1ULL<<(63-62))
|
|
|
|
#define H_DABRX_USER (1ULL<<(63-63))
|
|
|
|
|
2018-01-19 07:59:59 +03:00
|
|
|
/* Values for KVM_PPC_GET_CPU_CHAR & H_GET_CPU_CHARACTERISTICS */
|
|
|
|
#define H_CPU_CHAR_SPEC_BAR_ORI31 PPC_BIT(0)
|
|
|
|
#define H_CPU_CHAR_BCCTRL_SERIALISED PPC_BIT(1)
|
|
|
|
#define H_CPU_CHAR_L1D_FLUSH_ORI30 PPC_BIT(2)
|
|
|
|
#define H_CPU_CHAR_L1D_FLUSH_TRIG2 PPC_BIT(3)
|
|
|
|
#define H_CPU_CHAR_L1D_THREAD_PRIV PPC_BIT(4)
|
|
|
|
#define H_CPU_CHAR_HON_BRANCH_HINTS PPC_BIT(5)
|
|
|
|
#define H_CPU_CHAR_THR_RECONF_TRIG PPC_BIT(6)
|
2018-03-01 09:38:02 +03:00
|
|
|
#define H_CPU_CHAR_CACHE_COUNT_DIS PPC_BIT(7)
|
2018-01-19 07:59:59 +03:00
|
|
|
#define H_CPU_BEHAV_FAVOUR_SECURITY PPC_BIT(0)
|
|
|
|
#define H_CPU_BEHAV_L1D_FLUSH_PR PPC_BIT(1)
|
|
|
|
#define H_CPU_BEHAV_BNDS_CHK_SPEC_BAR PPC_BIT(2)
|
|
|
|
|
2011-11-29 12:52:39 +04:00
|
|
|
/* Each control block has to be on a 4K boundary */
|
2011-04-01 08:15:20 +04:00
|
|
|
#define H_CB_ALIGNMENT 4096
|
|
|
|
|
|
|
|
/* pSeries hypervisor opcodes */
|
|
|
|
#define H_REMOVE 0x04
|
|
|
|
#define H_ENTER 0x08
|
|
|
|
#define H_READ 0x0c
|
|
|
|
#define H_CLEAR_MOD 0x10
|
|
|
|
#define H_CLEAR_REF 0x14
|
|
|
|
#define H_PROTECT 0x18
|
|
|
|
#define H_GET_TCE 0x1c
|
|
|
|
#define H_PUT_TCE 0x20
|
|
|
|
#define H_SET_SPRG0 0x24
|
|
|
|
#define H_SET_DABR 0x28
|
|
|
|
#define H_PAGE_INIT 0x2c
|
|
|
|
#define H_SET_ASR 0x30
|
|
|
|
#define H_ASR_ON 0x34
|
|
|
|
#define H_ASR_OFF 0x38
|
|
|
|
#define H_LOGICAL_CI_LOAD 0x3c
|
|
|
|
#define H_LOGICAL_CI_STORE 0x40
|
|
|
|
#define H_LOGICAL_CACHE_LOAD 0x44
|
|
|
|
#define H_LOGICAL_CACHE_STORE 0x48
|
|
|
|
#define H_LOGICAL_ICBI 0x4c
|
|
|
|
#define H_LOGICAL_DCBF 0x50
|
|
|
|
#define H_GET_TERM_CHAR 0x54
|
|
|
|
#define H_PUT_TERM_CHAR 0x58
|
|
|
|
#define H_REAL_TO_LOGICAL 0x5c
|
|
|
|
#define H_HYPERVISOR_DATA 0x60
|
|
|
|
#define H_EOI 0x64
|
|
|
|
#define H_CPPR 0x68
|
|
|
|
#define H_IPI 0x6c
|
|
|
|
#define H_IPOLL 0x70
|
|
|
|
#define H_XIRR 0x74
|
|
|
|
#define H_PERFMON 0x7c
|
|
|
|
#define H_MIGRATE_DMA 0x78
|
|
|
|
#define H_REGISTER_VPA 0xDC
|
|
|
|
#define H_CEDE 0xE0
|
|
|
|
#define H_CONFER 0xE4
|
|
|
|
#define H_PROD 0xE8
|
|
|
|
#define H_GET_PPP 0xEC
|
|
|
|
#define H_SET_PPP 0xF0
|
|
|
|
#define H_PURR 0xF4
|
|
|
|
#define H_PIC 0xF8
|
|
|
|
#define H_REG_CRQ 0xFC
|
|
|
|
#define H_FREE_CRQ 0x100
|
|
|
|
#define H_VIO_SIGNAL 0x104
|
|
|
|
#define H_SEND_CRQ 0x108
|
|
|
|
#define H_COPY_RDMA 0x110
|
|
|
|
#define H_REGISTER_LOGICAL_LAN 0x114
|
|
|
|
#define H_FREE_LOGICAL_LAN 0x118
|
|
|
|
#define H_ADD_LOGICAL_LAN_BUFFER 0x11C
|
|
|
|
#define H_SEND_LOGICAL_LAN 0x120
|
|
|
|
#define H_BULK_REMOVE 0x124
|
|
|
|
#define H_MULTICAST_CTRL 0x130
|
|
|
|
#define H_SET_XDABR 0x134
|
|
|
|
#define H_STUFF_TCE 0x138
|
|
|
|
#define H_PUT_TCE_INDIRECT 0x13C
|
|
|
|
#define H_CHANGE_LOGICAL_LAN_MAC 0x14C
|
|
|
|
#define H_VTERM_PARTNER_INFO 0x150
|
|
|
|
#define H_REGISTER_VTERM 0x154
|
|
|
|
#define H_FREE_VTERM 0x158
|
|
|
|
#define H_RESET_EVENTS 0x15C
|
|
|
|
#define H_ALLOC_RESOURCE 0x160
|
|
|
|
#define H_FREE_RESOURCE 0x164
|
|
|
|
#define H_MODIFY_QP 0x168
|
|
|
|
#define H_QUERY_QP 0x16C
|
|
|
|
#define H_REREGISTER_PMR 0x170
|
|
|
|
#define H_REGISTER_SMR 0x174
|
|
|
|
#define H_QUERY_MR 0x178
|
|
|
|
#define H_QUERY_MW 0x17C
|
|
|
|
#define H_QUERY_HCA 0x180
|
|
|
|
#define H_QUERY_PORT 0x184
|
|
|
|
#define H_MODIFY_PORT 0x188
|
|
|
|
#define H_DEFINE_AQP1 0x18C
|
|
|
|
#define H_GET_TRACE_BUFFER 0x190
|
|
|
|
#define H_DEFINE_AQP0 0x194
|
|
|
|
#define H_RESIZE_MR 0x198
|
|
|
|
#define H_ATTACH_MCQP 0x19C
|
|
|
|
#define H_DETACH_MCQP 0x1A0
|
|
|
|
#define H_CREATE_RPT 0x1A4
|
|
|
|
#define H_REMOVE_RPT 0x1A8
|
|
|
|
#define H_REGISTER_RPAGES 0x1AC
|
|
|
|
#define H_DISABLE_AND_GETC 0x1B0
|
|
|
|
#define H_ERROR_DATA 0x1B4
|
|
|
|
#define H_GET_HCA_INFO 0x1B8
|
|
|
|
#define H_GET_PERF_COUNT 0x1BC
|
|
|
|
#define H_MANAGE_TRACE 0x1C0
|
2018-01-19 08:00:05 +03:00
|
|
|
#define H_GET_CPU_CHARACTERISTICS 0x1C8
|
2011-04-01 08:15:20 +04:00
|
|
|
#define H_FREE_LOGICAL_LAN_BUFFER 0x1D4
|
|
|
|
#define H_QUERY_INT_STATE 0x1E4
|
|
|
|
#define H_POLL_PENDING 0x1D8
|
|
|
|
#define H_ILLAN_ATTRIBUTES 0x244
|
|
|
|
#define H_MODIFY_HEA_QP 0x250
|
|
|
|
#define H_QUERY_HEA_QP 0x254
|
|
|
|
#define H_QUERY_HEA 0x258
|
|
|
|
#define H_QUERY_HEA_PORT 0x25C
|
|
|
|
#define H_MODIFY_HEA_PORT 0x260
|
|
|
|
#define H_REG_BCMC 0x264
|
|
|
|
#define H_DEREG_BCMC 0x268
|
|
|
|
#define H_REGISTER_HEA_RPAGES 0x26C
|
|
|
|
#define H_DISABLE_AND_GET_HEA 0x270
|
|
|
|
#define H_GET_HEA_INFO 0x274
|
|
|
|
#define H_ALLOC_HEA_RESOURCE 0x278
|
|
|
|
#define H_ADD_CONN 0x284
|
|
|
|
#define H_DEL_CONN 0x288
|
|
|
|
#define H_JOIN 0x298
|
|
|
|
#define H_VASI_STATE 0x2A4
|
|
|
|
#define H_ENABLE_CRQ 0x2B0
|
|
|
|
#define H_GET_EM_PARMS 0x2B8
|
|
|
|
#define H_SET_MPP 0x2D0
|
|
|
|
#define H_GET_MPP 0x2D4
|
2013-09-26 10:18:46 +04:00
|
|
|
#define H_XIRR_X 0x2FC
|
ppc/spapr: Implement H_RANDOM hypercall in QEMU
The PAPR interface defines a hypercall to pass high-quality
hardware generated random numbers to guests. Recent kernels can
already provide this hypercall to the guest if the right hardware
random number generator is available. But in case the user wants
to use another source like EGD, or QEMU is running with an older
kernel, we should also have this call in QEMU, so that guests that
do not support virtio-rng yet can get good random numbers, too.
This patch now adds a new pseudo-device to QEMU that either
directly provides this hypercall to the guest or is able to
enable the in-kernel hypercall if available. The in-kernel
hypercall can be enabled with the use-kvm property, e.g.:
qemu-system-ppc64 -device spapr-rng,use-kvm=true
For handling the hypercall in QEMU instead, a "RngBackend" is
required since the hypercall should provide "good" random data
instead of pseudo-random (like from a "simple" library function
like rand() or g_random_int()). Since there are multiple RngBackends
available, the user must select an appropriate back-end via the
"rng" property of the device, e.g.:
qemu-system-ppc64 -object rng-random,filename=/dev/hwrng,id=gid0 \
-device spapr-rng,rng=gid0 ...
See http://wiki.qemu-project.org/Features-Done/VirtIORNG for
other example of specifying RngBackends.
Signed-off-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2015-09-17 11:49:41 +03:00
|
|
|
#define H_RANDOM 0x300
|
2013-08-19 15:04:20 +04:00
|
|
|
#define H_SET_MODE 0x31C
|
2017-05-12 08:46:11 +03:00
|
|
|
#define H_RESIZE_HPT_PREPARE 0x36C
|
|
|
|
#define H_RESIZE_HPT_COMMIT 0x370
|
2017-03-20 02:46:45 +03:00
|
|
|
#define H_CLEAN_SLB 0x374
|
|
|
|
#define H_INVALIDATE_PID 0x378
|
|
|
|
#define H_REGISTER_PROC_TBL 0x37C
|
2016-12-05 08:50:21 +03:00
|
|
|
#define H_SIGNAL_SYS_RESET 0x380
|
|
|
|
#define MAX_HCALL_OPCODE H_SIGNAL_SYS_RESET
|
2011-04-01 08:15:20 +04:00
|
|
|
|
2011-04-01 08:15:23 +04:00
|
|
|
/* The hcalls above are standardized in PAPR and implemented by pHyp
|
|
|
|
* as well.
|
|
|
|
*
|
|
|
|
* We also need some hcalls which are specific to qemu / KVM-on-POWER.
|
2017-06-30 13:05:32 +03:00
|
|
|
* We put those into the 0xf000-0xfffc range which is reserved by PAPR
|
|
|
|
* for "platform-specific" hcalls.
|
2011-04-01 08:15:23 +04:00
|
|
|
*/
|
|
|
|
#define KVMPPC_HCALL_BASE 0xf000
|
|
|
|
#define KVMPPC_H_RTAS (KVMPPC_HCALL_BASE + 0x0)
|
2012-06-19 00:21:37 +04:00
|
|
|
#define KVMPPC_H_LOGICAL_MEMOP (KVMPPC_HCALL_BASE + 0x1)
|
2014-05-23 06:26:54 +04:00
|
|
|
/* Client Architecture support */
|
|
|
|
#define KVMPPC_H_CAS (KVMPPC_HCALL_BASE + 0x2)
|
|
|
|
#define KVMPPC_HCALL_MAX KVMPPC_H_CAS
|
2011-04-01 08:15:23 +04:00
|
|
|
|
2014-05-23 06:26:54 +04:00
|
|
|
typedef struct sPAPRDeviceTreeUpdateHeader {
|
|
|
|
uint32_t version_id;
|
|
|
|
} sPAPRDeviceTreeUpdateHeader;
|
|
|
|
|
2011-04-01 08:15:20 +04:00
|
|
|
#define hcall_dprintf(fmt, ...) \
|
2015-09-01 04:29:02 +03:00
|
|
|
do { \
|
|
|
|
qemu_log_mask(LOG_GUEST_ERROR, "%s: " fmt, __func__, ## __VA_ARGS__); \
|
|
|
|
} while (0)
|
2011-04-01 08:15:20 +04:00
|
|
|
|
2015-07-02 09:23:04 +03:00
|
|
|
typedef target_ulong (*spapr_hcall_fn)(PowerPCCPU *cpu, sPAPRMachineState *sm,
|
2011-04-01 08:15:20 +04:00
|
|
|
target_ulong opcode,
|
|
|
|
target_ulong *args);
|
|
|
|
|
|
|
|
void spapr_register_hypercall(target_ulong opcode, spapr_hcall_fn fn);
|
2012-05-03 08:13:14 +04:00
|
|
|
target_ulong spapr_hypercall(PowerPCCPU *cpu, target_ulong opcode,
|
2011-04-01 08:15:20 +04:00
|
|
|
target_ulong *args);
|
|
|
|
|
2015-02-20 07:58:52 +03:00
|
|
|
/* ibm,set-eeh-option */
|
|
|
|
#define RTAS_EEH_DISABLE 0
|
|
|
|
#define RTAS_EEH_ENABLE 1
|
|
|
|
#define RTAS_EEH_THAW_IO 2
|
|
|
|
#define RTAS_EEH_THAW_DMA 3
|
|
|
|
|
|
|
|
/* ibm,get-config-addr-info2 */
|
|
|
|
#define RTAS_GET_PE_ADDR 0
|
|
|
|
#define RTAS_GET_PE_MODE 1
|
|
|
|
#define RTAS_PE_MODE_NONE 0
|
|
|
|
#define RTAS_PE_MODE_NOT_SHARED 1
|
|
|
|
#define RTAS_PE_MODE_SHARED 2
|
|
|
|
|
|
|
|
/* ibm,read-slot-reset-state2 */
|
|
|
|
#define RTAS_EEH_PE_STATE_NORMAL 0
|
|
|
|
#define RTAS_EEH_PE_STATE_RESET 1
|
|
|
|
#define RTAS_EEH_PE_STATE_STOPPED_IO_DMA 2
|
|
|
|
#define RTAS_EEH_PE_STATE_STOPPED_DMA 4
|
|
|
|
#define RTAS_EEH_PE_STATE_UNAVAIL 5
|
|
|
|
#define RTAS_EEH_NOT_SUPPORT 0
|
|
|
|
#define RTAS_EEH_SUPPORT 1
|
|
|
|
#define RTAS_EEH_PE_UNAVAIL_INFO 1000
|
|
|
|
#define RTAS_EEH_PE_RECOVER_INFO 0
|
|
|
|
|
|
|
|
/* ibm,set-slot-reset */
|
|
|
|
#define RTAS_SLOT_RESET_DEACTIVATE 0
|
|
|
|
#define RTAS_SLOT_RESET_HOT 1
|
|
|
|
#define RTAS_SLOT_RESET_FUNDAMENTAL 3
|
|
|
|
|
|
|
|
/* ibm,slot-error-detail */
|
|
|
|
#define RTAS_SLOT_TEMP_ERR_LOG 1
|
|
|
|
#define RTAS_SLOT_PERM_ERR_LOG 2
|
|
|
|
|
2013-11-19 08:28:54 +04:00
|
|
|
/* RTAS return codes */
|
2016-01-19 07:57:42 +03:00
|
|
|
#define RTAS_OUT_SUCCESS 0
|
|
|
|
#define RTAS_OUT_NO_ERRORS_FOUND 1
|
|
|
|
#define RTAS_OUT_HW_ERROR -1
|
|
|
|
#define RTAS_OUT_BUSY -2
|
|
|
|
#define RTAS_OUT_PARAM_ERROR -3
|
|
|
|
#define RTAS_OUT_NOT_SUPPORTED -3
|
|
|
|
#define RTAS_OUT_NO_SUCH_INDICATOR -3
|
|
|
|
#define RTAS_OUT_NOT_AUTHORIZED -9002
|
|
|
|
#define RTAS_OUT_SYSPARM_PARAM_ERROR -9999
|
2013-11-19 08:28:54 +04:00
|
|
|
|
2016-07-04 06:33:07 +03:00
|
|
|
/* DDW pagesize mask values from ibm,query-pe-dma-window */
|
|
|
|
#define RTAS_DDW_PGSIZE_4K 0x01
|
|
|
|
#define RTAS_DDW_PGSIZE_64K 0x02
|
|
|
|
#define RTAS_DDW_PGSIZE_16M 0x04
|
|
|
|
#define RTAS_DDW_PGSIZE_32M 0x08
|
|
|
|
#define RTAS_DDW_PGSIZE_64M 0x10
|
|
|
|
#define RTAS_DDW_PGSIZE_128M 0x20
|
|
|
|
#define RTAS_DDW_PGSIZE_256M 0x40
|
|
|
|
#define RTAS_DDW_PGSIZE_16G 0x80
|
|
|
|
|
2014-06-23 17:26:32 +04:00
|
|
|
/* RTAS tokens */
|
|
|
|
#define RTAS_TOKEN_BASE 0x2000
|
|
|
|
|
|
|
|
#define RTAS_DISPLAY_CHARACTER (RTAS_TOKEN_BASE + 0x00)
|
|
|
|
#define RTAS_GET_TIME_OF_DAY (RTAS_TOKEN_BASE + 0x01)
|
|
|
|
#define RTAS_SET_TIME_OF_DAY (RTAS_TOKEN_BASE + 0x02)
|
|
|
|
#define RTAS_POWER_OFF (RTAS_TOKEN_BASE + 0x03)
|
|
|
|
#define RTAS_SYSTEM_REBOOT (RTAS_TOKEN_BASE + 0x04)
|
|
|
|
#define RTAS_QUERY_CPU_STOPPED_STATE (RTAS_TOKEN_BASE + 0x05)
|
|
|
|
#define RTAS_START_CPU (RTAS_TOKEN_BASE + 0x06)
|
|
|
|
#define RTAS_STOP_SELF (RTAS_TOKEN_BASE + 0x07)
|
|
|
|
#define RTAS_IBM_GET_SYSTEM_PARAMETER (RTAS_TOKEN_BASE + 0x08)
|
|
|
|
#define RTAS_IBM_SET_SYSTEM_PARAMETER (RTAS_TOKEN_BASE + 0x09)
|
|
|
|
#define RTAS_IBM_SET_XIVE (RTAS_TOKEN_BASE + 0x0A)
|
|
|
|
#define RTAS_IBM_GET_XIVE (RTAS_TOKEN_BASE + 0x0B)
|
|
|
|
#define RTAS_IBM_INT_OFF (RTAS_TOKEN_BASE + 0x0C)
|
|
|
|
#define RTAS_IBM_INT_ON (RTAS_TOKEN_BASE + 0x0D)
|
|
|
|
#define RTAS_CHECK_EXCEPTION (RTAS_TOKEN_BASE + 0x0E)
|
|
|
|
#define RTAS_EVENT_SCAN (RTAS_TOKEN_BASE + 0x0F)
|
|
|
|
#define RTAS_IBM_SET_TCE_BYPASS (RTAS_TOKEN_BASE + 0x10)
|
|
|
|
#define RTAS_QUIESCE (RTAS_TOKEN_BASE + 0x11)
|
|
|
|
#define RTAS_NVRAM_FETCH (RTAS_TOKEN_BASE + 0x12)
|
|
|
|
#define RTAS_NVRAM_STORE (RTAS_TOKEN_BASE + 0x13)
|
|
|
|
#define RTAS_READ_PCI_CONFIG (RTAS_TOKEN_BASE + 0x14)
|
|
|
|
#define RTAS_WRITE_PCI_CONFIG (RTAS_TOKEN_BASE + 0x15)
|
|
|
|
#define RTAS_IBM_READ_PCI_CONFIG (RTAS_TOKEN_BASE + 0x16)
|
|
|
|
#define RTAS_IBM_WRITE_PCI_CONFIG (RTAS_TOKEN_BASE + 0x17)
|
|
|
|
#define RTAS_IBM_QUERY_INTERRUPT_SOURCE_NUMBER (RTAS_TOKEN_BASE + 0x18)
|
|
|
|
#define RTAS_IBM_CHANGE_MSI (RTAS_TOKEN_BASE + 0x19)
|
|
|
|
#define RTAS_SET_INDICATOR (RTAS_TOKEN_BASE + 0x1A)
|
|
|
|
#define RTAS_SET_POWER_LEVEL (RTAS_TOKEN_BASE + 0x1B)
|
|
|
|
#define RTAS_GET_POWER_LEVEL (RTAS_TOKEN_BASE + 0x1C)
|
|
|
|
#define RTAS_GET_SENSOR_STATE (RTAS_TOKEN_BASE + 0x1D)
|
|
|
|
#define RTAS_IBM_CONFIGURE_CONNECTOR (RTAS_TOKEN_BASE + 0x1E)
|
|
|
|
#define RTAS_IBM_OS_TERM (RTAS_TOKEN_BASE + 0x1F)
|
2015-02-20 07:58:52 +03:00
|
|
|
#define RTAS_IBM_SET_EEH_OPTION (RTAS_TOKEN_BASE + 0x20)
|
|
|
|
#define RTAS_IBM_GET_CONFIG_ADDR_INFO2 (RTAS_TOKEN_BASE + 0x21)
|
|
|
|
#define RTAS_IBM_READ_SLOT_RESET_STATE2 (RTAS_TOKEN_BASE + 0x22)
|
|
|
|
#define RTAS_IBM_SET_SLOT_RESET (RTAS_TOKEN_BASE + 0x23)
|
|
|
|
#define RTAS_IBM_CONFIGURE_PE (RTAS_TOKEN_BASE + 0x24)
|
|
|
|
#define RTAS_IBM_SLOT_ERROR_DETAIL (RTAS_TOKEN_BASE + 0x25)
|
2016-07-04 06:33:07 +03:00
|
|
|
#define RTAS_IBM_QUERY_PE_DMA_WINDOW (RTAS_TOKEN_BASE + 0x26)
|
|
|
|
#define RTAS_IBM_CREATE_PE_DMA_WINDOW (RTAS_TOKEN_BASE + 0x27)
|
|
|
|
#define RTAS_IBM_REMOVE_PE_DMA_WINDOW (RTAS_TOKEN_BASE + 0x28)
|
|
|
|
#define RTAS_IBM_RESET_PE_DMA_WINDOW (RTAS_TOKEN_BASE + 0x29)
|
2015-02-20 07:58:52 +03:00
|
|
|
|
2016-07-04 06:33:07 +03:00
|
|
|
#define RTAS_TOKEN_MAX (RTAS_TOKEN_BASE + 0x2A)
|
2014-06-23 17:26:32 +04:00
|
|
|
|
2014-06-25 07:54:30 +04:00
|
|
|
/* RTAS ibm,get-system-parameter token values */
|
2014-06-25 07:54:32 +04:00
|
|
|
#define RTAS_SYSPARM_SPLPAR_CHARACTERISTICS 20
|
2014-06-25 07:54:30 +04:00
|
|
|
#define RTAS_SYSPARM_DIAGNOSTICS_RUN_MODE 42
|
2014-06-25 07:54:31 +04:00
|
|
|
#define RTAS_SYSPARM_UUID 48
|
2014-06-25 07:54:30 +04:00
|
|
|
|
2015-05-07 08:33:45 +03:00
|
|
|
/* RTAS indicator/sensor types
|
|
|
|
*
|
|
|
|
* as defined by PAPR+ 2.7 7.3.5.4, Table 41
|
|
|
|
*
|
|
|
|
* NOTE: currently only DR-related sensors are implemented here
|
|
|
|
*/
|
|
|
|
#define RTAS_SENSOR_TYPE_ISOLATION_STATE 9001
|
|
|
|
#define RTAS_SENSOR_TYPE_DR 9002
|
|
|
|
#define RTAS_SENSOR_TYPE_ALLOCATION_STATE 9003
|
|
|
|
#define RTAS_SENSOR_TYPE_ENTITY_SENSE RTAS_SENSOR_TYPE_ALLOCATION_STATE
|
|
|
|
|
2014-06-25 07:54:30 +04:00
|
|
|
/* Possible values for the platform-processor-diagnostics-run-mode parameter
|
|
|
|
* of the RTAS ibm,get-system-parameter call.
|
|
|
|
*/
|
|
|
|
#define DIAGNOSTICS_RUN_MODE_DISABLED 0
|
|
|
|
#define DIAGNOSTICS_RUN_MODE_STAGGERED 1
|
|
|
|
#define DIAGNOSTICS_RUN_MODE_IMMEDIATE 2
|
|
|
|
#define DIAGNOSTICS_RUN_MODE_PERIODIC 3
|
|
|
|
|
2013-09-27 12:10:18 +04:00
|
|
|
static inline uint64_t ppc64_phys_to_real(uint64_t addr)
|
|
|
|
{
|
|
|
|
return addr & ~0xF000000000000000ULL;
|
|
|
|
}
|
|
|
|
|
2011-04-01 08:15:23 +04:00
|
|
|
static inline uint32_t rtas_ld(target_ulong phys, int n)
|
|
|
|
{
|
2013-11-15 17:46:38 +04:00
|
|
|
return ldl_be_phys(&address_space_memory, ppc64_phys_to_real(phys + 4*n));
|
2011-04-01 08:15:23 +04:00
|
|
|
}
|
|
|
|
|
2015-09-01 04:05:12 +03:00
|
|
|
static inline uint64_t rtas_ldq(target_ulong phys, int n)
|
|
|
|
{
|
|
|
|
return (uint64_t)rtas_ld(phys, n) << 32 | rtas_ld(phys, n + 1);
|
|
|
|
}
|
|
|
|
|
2011-04-01 08:15:23 +04:00
|
|
|
static inline void rtas_st(target_ulong phys, int n, uint32_t val)
|
|
|
|
{
|
2013-12-17 09:07:29 +04:00
|
|
|
stl_be_phys(&address_space_memory, ppc64_phys_to_real(phys + 4*n), val);
|
2011-04-01 08:15:23 +04:00
|
|
|
}
|
|
|
|
|
2015-07-02 09:23:04 +03:00
|
|
|
typedef void (*spapr_rtas_fn)(PowerPCCPU *cpu, sPAPRMachineState *sm,
|
2013-06-20 00:40:30 +04:00
|
|
|
uint32_t token,
|
2011-04-01 08:15:23 +04:00
|
|
|
uint32_t nargs, target_ulong args,
|
|
|
|
uint32_t nret, target_ulong rets);
|
2014-06-23 17:26:32 +04:00
|
|
|
void spapr_rtas_register(int token, const char *name, spapr_rtas_fn fn);
|
2015-07-02 09:23:04 +03:00
|
|
|
target_ulong spapr_rtas_call(PowerPCCPU *cpu, sPAPRMachineState *sm,
|
2011-04-01 08:15:23 +04:00
|
|
|
uint32_t token, uint32_t nargs, target_ulong args,
|
|
|
|
uint32_t nret, target_ulong rets);
|
2016-10-20 07:55:36 +03:00
|
|
|
void spapr_dt_rtas_tokens(void *fdt, int rtas);
|
2016-10-20 07:37:41 +03:00
|
|
|
void spapr_load_rtas(sPAPRMachineState *spapr, void *fdt, hwaddr addr);
|
2011-04-01 08:15:23 +04:00
|
|
|
|
2012-06-27 08:50:44 +04:00
|
|
|
#define SPAPR_TCE_PAGE_SHIFT 12
|
|
|
|
#define SPAPR_TCE_PAGE_SIZE (1ULL << SPAPR_TCE_PAGE_SHIFT)
|
|
|
|
#define SPAPR_TCE_PAGE_MASK (SPAPR_TCE_PAGE_SIZE - 1)
|
|
|
|
|
|
|
|
#define SPAPR_VIO_BASE_LIOBN 0x00000000
|
2015-05-07 08:33:31 +03:00
|
|
|
#define SPAPR_VIO_LIOBN(reg) (0x00000000 | (reg))
|
2015-05-07 08:33:30 +03:00
|
|
|
#define SPAPR_PCI_LIOBN(phb_index, window_num) \
|
|
|
|
(0x80000000 | ((phb_index) << 8) | (window_num))
|
2015-05-07 08:33:33 +03:00
|
|
|
#define SPAPR_IS_PCI_LIOBN(liobn) (!!((liobn) & 0x80000000))
|
2015-05-07 08:33:30 +03:00
|
|
|
#define SPAPR_PCI_DMA_WINDOW_NUM(liobn) ((liobn) & 0xff)
|
2012-06-27 08:50:44 +04:00
|
|
|
|
2012-10-08 22:17:39 +04:00
|
|
|
#define RTAS_ERROR_LOG_MAX 2048
|
|
|
|
|
2015-05-07 08:33:50 +03:00
|
|
|
#define RTAS_EVENT_SCAN_RATE 1
|
|
|
|
|
2017-12-06 11:13:16 +03:00
|
|
|
/* This helper should be used to encode interrupt specifiers when the related
|
|
|
|
* "interrupt-controller" node has its "#interrupt-cells" property set to 2 (ie,
|
|
|
|
* VIO devices, RTAS event sources and PHBs).
|
|
|
|
*/
|
|
|
|
static inline void spapr_dt_xics_irq(uint32_t *intspec, int irq, bool is_lsi)
|
|
|
|
{
|
|
|
|
intspec[0] = cpu_to_be32(irq);
|
|
|
|
intspec[1] = is_lsi ? cpu_to_be32(1) : 0;
|
|
|
|
}
|
|
|
|
|
2013-04-10 19:30:48 +04:00
|
|
|
typedef struct sPAPRTCETable sPAPRTCETable;
|
2012-10-08 22:17:39 +04:00
|
|
|
|
2013-07-18 23:32:58 +04:00
|
|
|
#define TYPE_SPAPR_TCE_TABLE "spapr-tce-table"
|
|
|
|
#define SPAPR_TCE_TABLE(obj) \
|
|
|
|
OBJECT_CHECK(sPAPRTCETable, (obj), TYPE_SPAPR_TCE_TABLE)
|
|
|
|
|
2017-07-11 06:56:20 +03:00
|
|
|
#define TYPE_SPAPR_IOMMU_MEMORY_REGION "spapr-iommu-memory-region"
|
|
|
|
#define SPAPR_IOMMU_MEMORY_REGION(obj) \
|
|
|
|
OBJECT_CHECK(IOMMUMemoryRegion, (obj), TYPE_SPAPR_IOMMU_MEMORY_REGION)
|
|
|
|
|
2013-07-18 23:32:58 +04:00
|
|
|
struct sPAPRTCETable {
|
|
|
|
DeviceState parent;
|
|
|
|
uint32_t liobn;
|
|
|
|
uint32_t nb_table;
|
2014-05-27 09:36:37 +04:00
|
|
|
uint64_t bus_offset;
|
2014-05-27 09:36:36 +04:00
|
|
|
uint32_t page_shift;
|
2013-07-18 23:32:58 +04:00
|
|
|
uint64_t *table;
|
2016-06-01 11:57:34 +03:00
|
|
|
uint32_t mig_nb_table;
|
|
|
|
uint64_t *mig_table;
|
2013-07-18 23:32:58 +04:00
|
|
|
bool bypass;
|
2015-09-30 06:42:55 +03:00
|
|
|
bool need_vfio;
|
2013-07-18 23:32:58 +04:00
|
|
|
int fd;
|
2017-07-11 06:56:19 +03:00
|
|
|
MemoryRegion root;
|
|
|
|
IOMMUMemoryRegion iommu;
|
2015-01-29 08:04:58 +03:00
|
|
|
struct VIOsPAPRDevice *vdev; /* for @bypass migration compatibility only */
|
2013-07-18 23:32:58 +04:00
|
|
|
QLIST_ENTRY(sPAPRTCETable) list;
|
|
|
|
};
|
|
|
|
|
2015-05-07 08:33:38 +03:00
|
|
|
sPAPRTCETable *spapr_tce_find_by_liobn(target_ulong liobn);
|
2015-05-07 08:33:49 +03:00
|
|
|
|
2017-07-12 04:55:53 +03:00
|
|
|
struct sPAPREventLogEntry {
|
2017-07-11 21:07:55 +03:00
|
|
|
uint32_t summary;
|
|
|
|
uint32_t extended_length;
|
|
|
|
void *extended_log;
|
2015-05-07 08:33:49 +03:00
|
|
|
QTAILQ_ENTRY(sPAPREventLogEntry) next;
|
|
|
|
};
|
|
|
|
|
2015-07-02 09:23:04 +03:00
|
|
|
void spapr_events_init(sPAPRMachineState *sm);
|
2016-10-27 05:20:26 +03:00
|
|
|
void spapr_dt_events(sPAPRMachineState *sm, void *fdt);
|
2015-07-02 09:23:04 +03:00
|
|
|
int spapr_h_cas_compose_response(sPAPRMachineState *sm,
|
2015-07-13 03:34:00 +03:00
|
|
|
target_ulong addr, target_ulong size,
|
spapr: add option vector handling in CAS-generated resets
In some cases, ibm,client-architecture-support calls can fail. This
could happen in the current code for situations where the modified
device tree segment exceeds the buffer size provided by the guest
via the call parameters. In these cases, QEMU will reset, allowing
an opportunity to regenerate the device tree from scratch via
boot-time handling. There are potentially other scenarios as well,
not currently reachable in the current code, but possible in theory,
such as cases where device-tree properties or nodes need to be removed.
We currently don't handle either of these properly for option vector
capabilities however. Instead of carrying the negotiated capability
beyond the reset and creating the boot-time device tree accordingly,
we start from scratch, generating the same boot-time device tree as we
did prior to the CAS-generated and the same device tree updates as we
did before. This could (in theory) cause us to get stuck in a reset
loop. This hasn't been observed, but depending on the extensiveness
of CAS-induced device tree updates in the future, could eventually
become an issue.
Address this by pulling capability-related device tree
updates resulting from CAS calls into a common routine,
spapr_dt_cas_updates(), and adding an sPAPROptionVector*
parameter that allows us to test for newly-negotiated capabilities.
We invoke it as follows:
1) When ibm,client-architecture-support gets called, we
call spapr_dt_cas_updates() with the set of capabilities
added since the previous call to ibm,client-architecture-support.
For the initial boot, or a system reset generated by something
other than the CAS call itself, this set will consist of *all*
options supported both the platform and the guest. For calls
to ibm,client-architecture-support immediately after a CAS-induced
reset, we call spapr_dt_cas_updates() with only the set
of capabilities added since the previous call, since the other
capabilities will have already been addressed by the boot-time
device-tree this time around. In the unlikely event that
capabilities are *removed* since the previous CAS, we will
generate a CAS-induced reset. In the unlikely event that we
cannot fit the device-tree updates into the buffer provided
by the guest, well generate a CAS-induced reset.
2) When a CAS update results in the need to reset the machine and
include the updates in the boot-time device tree, we call the
spapr_dt_cas_updates() using the full set of negotiated
capabilities as part of the reset path. At initial boot, or after
a reset generated by something other than the CAS call itself,
this set will be empty, resulting in what should be the same
boot-time device-tree as we generated prior to this patch. For
CAS-induced reset, this routine will be called with the full set of
capabilities negotiated by the platform/guest in the previous
CAS call, which should result in CAS updates from previous call
being accounted for in the initial boot-time device tree.
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
[dwg: Changed an int -> bool conversion to be more explicit]
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-10-25 07:47:29 +03:00
|
|
|
sPAPROptionVector *ov5_updates);
|
2017-03-20 02:46:46 +03:00
|
|
|
void close_htab_fd(sPAPRMachineState *spapr);
|
|
|
|
void spapr_setup_hpt_and_vrma(sPAPRMachineState *spapr);
|
2017-05-17 06:49:20 +03:00
|
|
|
void spapr_free_hpt(sPAPRMachineState *spapr);
|
2016-06-01 11:57:33 +03:00
|
|
|
sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn);
|
|
|
|
void spapr_tce_table_enable(sPAPRTCETable *tcet,
|
|
|
|
uint32_t page_shift, uint64_t bus_offset,
|
|
|
|
uint32_t nb_table);
|
2016-06-01 11:57:34 +03:00
|
|
|
void spapr_tce_table_disable(sPAPRTCETable *tcet);
|
2015-10-01 03:46:10 +03:00
|
|
|
void spapr_tce_set_need_vfio(sPAPRTCETable *tcet, bool need_vfio);
|
|
|
|
|
2013-04-11 14:35:33 +04:00
|
|
|
MemoryRegion *spapr_tce_get_iommu(sPAPRTCETable *tcet);
|
2012-06-27 08:50:44 +04:00
|
|
|
int spapr_dma_dt(void *fdt, int node_off, const char *propname,
|
2012-08-07 20:10:38 +04:00
|
|
|
uint32_t liobn, uint64_t window, uint32_t size);
|
|
|
|
int spapr_tcet_dma_dt(void *fdt, int node_off, const char *propname,
|
2013-04-10 19:30:48 +04:00
|
|
|
sPAPRTCETable *tcet);
|
2015-02-10 07:36:16 +03:00
|
|
|
void spapr_pci_switch_vga(bool big_endian);
|
2015-08-03 08:35:42 +03:00
|
|
|
void spapr_hotplug_req_add_by_index(sPAPRDRConnector *drc);
|
|
|
|
void spapr_hotplug_req_remove_by_index(sPAPRDRConnector *drc);
|
|
|
|
void spapr_hotplug_req_add_by_count(sPAPRDRConnectorType drc_type,
|
|
|
|
uint32_t count);
|
|
|
|
void spapr_hotplug_req_remove_by_count(sPAPRDRConnectorType drc_type,
|
|
|
|
uint32_t count);
|
2016-10-27 05:20:28 +03:00
|
|
|
void spapr_hotplug_req_add_by_count_indexed(sPAPRDRConnectorType drc_type,
|
|
|
|
uint32_t count, uint32_t index);
|
|
|
|
void spapr_hotplug_req_remove_by_count_indexed(sPAPRDRConnectorType drc_type,
|
|
|
|
uint32_t count, uint32_t index);
|
pseries: Implement HPT resizing
This patch implements hypercalls allowing a PAPR guest to resize its own
hash page table. This will eventually allow for more flexible memory
hotplug.
The implementation is partially asynchronous, handled in a special thread
running the hpt_prepare_thread() function. The state of a pending resize
is stored in SPAPR_MACHINE->pending_hpt.
The H_RESIZE_HPT_PREPARE hypercall will kick off creation of a new HPT, or,
if one is already in progress, monitor it for completion. If there is an
existing HPT resize in progress that doesn't match the size specified in
the call, it will cancel it, replacing it with a new one matching the
given size.
The H_RESIZE_HPT_COMMIT completes transition to a resized HPT, and can only
be called successfully once H_RESIZE_HPT_PREPARE has successfully
completed initialization of a new HPT. The guest must ensure that there
are no concurrent accesses to the existing HPT while this is called (this
effectively means stop_machine() for Linux guests).
For now H_RESIZE_HPT_COMMIT goes through the whole old HPT, rehashing each
HPTE into the new HPT. This can have quite high latency, but it seems to
be of the order of typical migration downtime latencies for HPTs of size
up to ~2GiB (which would be used in a 256GiB guest).
In future we probably want to move more of the rehashing to the "prepare"
phase, by having H_ENTER and other hcalls update both current and
pending HPTs. That's a project for another day, but should be possible
without any changes to the guest interface.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-05-12 08:46:49 +03:00
|
|
|
int spapr_hpt_shift_for_ramsize(uint64_t ramsize);
|
2017-07-12 10:56:06 +03:00
|
|
|
void spapr_reallocate_hpt(sPAPRMachineState *spapr, int shift,
|
|
|
|
Error **errp);
|
2017-08-30 21:21:40 +03:00
|
|
|
void spapr_clear_pending_events(sPAPRMachineState *spapr);
|
2015-02-06 06:55:51 +03:00
|
|
|
|
2017-05-22 22:35:48 +03:00
|
|
|
/* CPU and LMB DRC release callbacks. */
|
|
|
|
void spapr_core_release(DeviceState *dev);
|
|
|
|
void spapr_lmb_release(DeviceState *dev);
|
|
|
|
|
2017-03-07 12:23:40 +03:00
|
|
|
void spapr_rtc_read(sPAPRRTCState *rtc, struct tm *tm, uint32_t *ns);
|
|
|
|
int spapr_rtc_import_offset(sPAPRRTCState *rtc, int64_t legacy_offset);
|
2015-02-06 06:55:51 +03:00
|
|
|
|
2017-03-07 12:23:40 +03:00
|
|
|
#define TYPE_SPAPR_RNG "spapr-rng"
|
2012-06-27 08:50:44 +04:00
|
|
|
|
2015-07-02 09:23:15 +03:00
|
|
|
#define SPAPR_MEMORY_BLOCK_SIZE (1 << 28) /* 256MB */
|
|
|
|
|
2015-06-29 11:44:27 +03:00
|
|
|
/*
|
|
|
|
* This defines the maximum number of DIMM slots we can have for sPAPR
|
|
|
|
* guest. This is not defined by sPAPR but we are defining it to 32 slots
|
|
|
|
* based on default number of slots provided by PowerPC kernel.
|
|
|
|
*/
|
|
|
|
#define SPAPR_MAX_RAM_SLOTS 32
|
|
|
|
|
2018-06-25 15:42:24 +03:00
|
|
|
/* 1GB alignment for hotplug memory region */
|
|
|
|
#define SPAPR_DEVICE_MEM_ALIGN (1 * GiB)
|
2015-06-29 11:44:27 +03:00
|
|
|
|
2015-07-13 03:34:00 +03:00
|
|
|
/*
|
|
|
|
* Number of 32 bit words in each LMB list entry in ibm,dynamic-memory
|
|
|
|
* property under ibm,dynamic-reconfiguration-memory node.
|
|
|
|
*/
|
|
|
|
#define SPAPR_DR_LMB_LIST_ENTRY_SIZE 6
|
|
|
|
|
|
|
|
/*
|
2016-06-10 08:14:48 +03:00
|
|
|
* Defines for flag value in ibm,dynamic-memory property under
|
|
|
|
* ibm,dynamic-reconfiguration-memory node.
|
2015-07-13 03:34:00 +03:00
|
|
|
*/
|
|
|
|
#define SPAPR_LMB_FLAGS_ASSIGNED 0x00000008
|
2016-06-10 08:14:48 +03:00
|
|
|
#define SPAPR_LMB_FLAGS_DRC_INVALID 0x00000020
|
|
|
|
#define SPAPR_LMB_FLAGS_RESERVED 0x00000080
|
2015-07-13 03:34:00 +03:00
|
|
|
|
2016-12-05 08:50:21 +03:00
|
|
|
void spapr_do_system_reset_on_cpu(CPUState *cs, run_on_cpu_data arg);
|
|
|
|
|
pseries: Implement HPT resizing
This patch implements hypercalls allowing a PAPR guest to resize its own
hash page table. This will eventually allow for more flexible memory
hotplug.
The implementation is partially asynchronous, handled in a special thread
running the hpt_prepare_thread() function. The state of a pending resize
is stored in SPAPR_MACHINE->pending_hpt.
The H_RESIZE_HPT_PREPARE hypercall will kick off creation of a new HPT, or,
if one is already in progress, monitor it for completion. If there is an
existing HPT resize in progress that doesn't match the size specified in
the call, it will cancel it, replacing it with a new one matching the
given size.
The H_RESIZE_HPT_COMMIT completes transition to a resized HPT, and can only
be called successfully once H_RESIZE_HPT_PREPARE has successfully
completed initialization of a new HPT. The guest must ensure that there
are no concurrent accesses to the existing HPT while this is called (this
effectively means stop_machine() for Linux guests).
For now H_RESIZE_HPT_COMMIT goes through the whole old HPT, rehashing each
HPTE into the new HPT. This can have quite high latency, but it seems to
be of the order of typical migration downtime latencies for HPTs of size
up to ~2GiB (which would be used in a 256GiB guest).
In future we probably want to move more of the rehashing to the "prepare"
phase, by having H_ENTER and other hcalls update both current and
pending HPTs. That's a project for another day, but should be possible
without any changes to the guest interface.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2017-05-12 08:46:49 +03:00
|
|
|
#define HTAB_SIZE(spapr) (1ULL << ((spapr)->htab_shift))
|
|
|
|
|
2018-02-14 22:40:44 +03:00
|
|
|
int spapr_get_vcpu_id(PowerPCCPU *cpu);
|
2018-02-14 22:40:35 +03:00
|
|
|
void spapr_set_vcpu_id(PowerPCCPU *cpu, int cpu_index, Error **errp);
|
2017-08-09 08:38:56 +03:00
|
|
|
PowerPCCPU *spapr_find_cpu(int vcpu_id);
|
|
|
|
|
2018-01-12 08:33:43 +03:00
|
|
|
int spapr_caps_pre_load(void *opaque);
|
|
|
|
int spapr_caps_pre_save(void *opaque);
|
|
|
|
|
spapr: Capabilities infrastructure
Because PAPR is a paravirtual environment access to certain CPU (or other)
facilities can be blocked by the hypervisor. PAPR provides ways to
advertise in the device tree whether or not those features are available to
the guest.
In some places we automatically determine whether to make a feature
available based on whether our host can support it, in most cases this is
based on limitations in the available KVM implementation.
Although we correctly advertise this to the guest, it means that host
factors might make changes to the guest visible environment which is bad:
as well as generaly reducing reproducibility, it means that a migration
between different host environments can easily go bad.
We've mostly gotten away with it because the environments considered mature
enough to be well supported (basically, KVM on POWER8) have had consistent
feature availability. But, it's still not right and some limitations on
POWER9 is going to make it more of an issue in future.
This introduces an infrastructure for defining "sPAPR capabilities". These
are set by default based on the machine version, masked by the capabilities
of the chosen cpu, but can be overriden with machine properties.
The intention is at reset time we verify that the requested capabilities
can be supported on the host (considering TCG, KVM and/or host cpu
limitations). If not we simply fail, rather than silently modifying the
advertised featureset to the guest.
This does mean that certain configurations that "worked" may now fail, but
such configurations were already more subtly broken.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Greg Kurz <groug@kaod.org>
2017-12-08 02:35:35 +03:00
|
|
|
/*
|
|
|
|
* Handling of optional capabilities
|
|
|
|
*/
|
2018-01-12 08:33:43 +03:00
|
|
|
extern const VMStateDescription vmstate_spapr_cap_htm;
|
|
|
|
extern const VMStateDescription vmstate_spapr_cap_vsx;
|
|
|
|
extern const VMStateDescription vmstate_spapr_cap_dfp;
|
2018-01-19 08:00:02 +03:00
|
|
|
extern const VMStateDescription vmstate_spapr_cap_cfpc;
|
2018-01-19 08:00:03 +03:00
|
|
|
extern const VMStateDescription vmstate_spapr_cap_sbbc;
|
2018-01-19 08:00:04 +03:00
|
|
|
extern const VMStateDescription vmstate_spapr_cap_ibs;
|
2018-10-08 06:25:39 +03:00
|
|
|
extern const VMStateDescription vmstate_spapr_cap_nested_kvm_hv;
|
spapr: Capabilities infrastructure
Because PAPR is a paravirtual environment access to certain CPU (or other)
facilities can be blocked by the hypervisor. PAPR provides ways to
advertise in the device tree whether or not those features are available to
the guest.
In some places we automatically determine whether to make a feature
available based on whether our host can support it, in most cases this is
based on limitations in the available KVM implementation.
Although we correctly advertise this to the guest, it means that host
factors might make changes to the guest visible environment which is bad:
as well as generaly reducing reproducibility, it means that a migration
between different host environments can easily go bad.
We've mostly gotten away with it because the environments considered mature
enough to be well supported (basically, KVM on POWER8) have had consistent
feature availability. But, it's still not right and some limitations on
POWER9 is going to make it more of an issue in future.
This introduces an infrastructure for defining "sPAPR capabilities". These
are set by default based on the machine version, masked by the capabilities
of the chosen cpu, but can be overriden with machine properties.
The intention is at reset time we verify that the requested capabilities
can be supported on the host (considering TCG, KVM and/or host cpu
limitations). If not we simply fail, rather than silently modifying the
advertised featureset to the guest.
This does mean that certain configurations that "worked" may now fail, but
such configurations were already more subtly broken.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Greg Kurz <groug@kaod.org>
2017-12-08 02:35:35 +03:00
|
|
|
|
2018-01-12 08:33:43 +03:00
|
|
|
static inline uint8_t spapr_get_cap(sPAPRMachineState *spapr, int cap)
|
spapr: Capabilities infrastructure
Because PAPR is a paravirtual environment access to certain CPU (or other)
facilities can be blocked by the hypervisor. PAPR provides ways to
advertise in the device tree whether or not those features are available to
the guest.
In some places we automatically determine whether to make a feature
available based on whether our host can support it, in most cases this is
based on limitations in the available KVM implementation.
Although we correctly advertise this to the guest, it means that host
factors might make changes to the guest visible environment which is bad:
as well as generaly reducing reproducibility, it means that a migration
between different host environments can easily go bad.
We've mostly gotten away with it because the environments considered mature
enough to be well supported (basically, KVM on POWER8) have had consistent
feature availability. But, it's still not right and some limitations on
POWER9 is going to make it more of an issue in future.
This introduces an infrastructure for defining "sPAPR capabilities". These
are set by default based on the machine version, masked by the capabilities
of the chosen cpu, but can be overriden with machine properties.
The intention is at reset time we verify that the requested capabilities
can be supported on the host (considering TCG, KVM and/or host cpu
limitations). If not we simply fail, rather than silently modifying the
advertised featureset to the guest.
This does mean that certain configurations that "worked" may now fail, but
such configurations were already more subtly broken.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Greg Kurz <groug@kaod.org>
2017-12-08 02:35:35 +03:00
|
|
|
{
|
2018-01-12 08:33:43 +03:00
|
|
|
return spapr->eff.caps[cap];
|
spapr: Capabilities infrastructure
Because PAPR is a paravirtual environment access to certain CPU (or other)
facilities can be blocked by the hypervisor. PAPR provides ways to
advertise in the device tree whether or not those features are available to
the guest.
In some places we automatically determine whether to make a feature
available based on whether our host can support it, in most cases this is
based on limitations in the available KVM implementation.
Although we correctly advertise this to the guest, it means that host
factors might make changes to the guest visible environment which is bad:
as well as generaly reducing reproducibility, it means that a migration
between different host environments can easily go bad.
We've mostly gotten away with it because the environments considered mature
enough to be well supported (basically, KVM on POWER8) have had consistent
feature availability. But, it's still not right and some limitations on
POWER9 is going to make it more of an issue in future.
This introduces an infrastructure for defining "sPAPR capabilities". These
are set by default based on the machine version, masked by the capabilities
of the chosen cpu, but can be overriden with machine properties.
The intention is at reset time we verify that the requested capabilities
can be supported on the host (considering TCG, KVM and/or host cpu
limitations). If not we simply fail, rather than silently modifying the
advertised featureset to the guest.
This does mean that certain configurations that "worked" may now fail, but
such configurations were already more subtly broken.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Greg Kurz <groug@kaod.org>
2017-12-08 02:35:35 +03:00
|
|
|
}
|
|
|
|
|
2018-06-14 09:37:28 +03:00
|
|
|
void spapr_caps_init(sPAPRMachineState *spapr);
|
|
|
|
void spapr_caps_apply(sPAPRMachineState *spapr);
|
2018-03-28 06:45:44 +03:00
|
|
|
void spapr_caps_cpu_apply(sPAPRMachineState *spapr, PowerPCCPU *cpu);
|
spapr: Capabilities infrastructure
Because PAPR is a paravirtual environment access to certain CPU (or other)
facilities can be blocked by the hypervisor. PAPR provides ways to
advertise in the device tree whether or not those features are available to
the guest.
In some places we automatically determine whether to make a feature
available based on whether our host can support it, in most cases this is
based on limitations in the available KVM implementation.
Although we correctly advertise this to the guest, it means that host
factors might make changes to the guest visible environment which is bad:
as well as generaly reducing reproducibility, it means that a migration
between different host environments can easily go bad.
We've mostly gotten away with it because the environments considered mature
enough to be well supported (basically, KVM on POWER8) have had consistent
feature availability. But, it's still not right and some limitations on
POWER9 is going to make it more of an issue in future.
This introduces an infrastructure for defining "sPAPR capabilities". These
are set by default based on the machine version, masked by the capabilities
of the chosen cpu, but can be overriden with machine properties.
The intention is at reset time we verify that the requested capabilities
can be supported on the host (considering TCG, KVM and/or host cpu
limitations). If not we simply fail, rather than silently modifying the
advertised featureset to the guest.
This does mean that certain configurations that "worked" may now fail, but
such configurations were already more subtly broken.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Greg Kurz <groug@kaod.org>
2017-12-08 02:35:35 +03:00
|
|
|
void spapr_caps_add_properties(sPAPRMachineClass *smc, Error **errp);
|
2017-12-11 07:09:37 +03:00
|
|
|
int spapr_caps_post_migration(sPAPRMachineState *spapr);
|
spapr: Capabilities infrastructure
Because PAPR is a paravirtual environment access to certain CPU (or other)
facilities can be blocked by the hypervisor. PAPR provides ways to
advertise in the device tree whether or not those features are available to
the guest.
In some places we automatically determine whether to make a feature
available based on whether our host can support it, in most cases this is
based on limitations in the available KVM implementation.
Although we correctly advertise this to the guest, it means that host
factors might make changes to the guest visible environment which is bad:
as well as generaly reducing reproducibility, it means that a migration
between different host environments can easily go bad.
We've mostly gotten away with it because the environments considered mature
enough to be well supported (basically, KVM on POWER8) have had consistent
feature availability. But, it's still not right and some limitations on
POWER9 is going to make it more of an issue in future.
This introduces an infrastructure for defining "sPAPR capabilities". These
are set by default based on the machine version, masked by the capabilities
of the chosen cpu, but can be overriden with machine properties.
The intention is at reset time we verify that the requested capabilities
can be supported on the host (considering TCG, KVM and/or host cpu
limitations). If not we simply fail, rather than silently modifying the
advertised featureset to the guest.
This does mean that certain configurations that "worked" may now fail, but
such configurations were already more subtly broken.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Greg Kurz <groug@kaod.org>
2017-12-08 02:35:35 +03:00
|
|
|
|
2018-04-18 07:21:45 +03:00
|
|
|
void spapr_check_pagesize(sPAPRMachineState *spapr, hwaddr pagesize,
|
|
|
|
Error **errp);
|
|
|
|
|
2016-06-29 14:47:03 +03:00
|
|
|
#endif /* HW_SPAPR_H */
|