qemu/hw/ppc/spapr_cpu_core.c

396 lines
11 KiB
C
Raw Normal View History

/*
* sPAPR CPU core device, acts as container of CPU thread devices.
*
* Copyright (C) 2016 Bharata B Rao <bharata@linux.vnet.ibm.com>
*
* This work is licensed under the terms of the GNU GPL, version 2 or later.
* See the COPYING file in the top-level directory.
*/
#include "qemu/osdep.h"
#include "hw/cpu/core.h"
#include "hw/ppc/spapr_cpu_core.h"
#include "target/ppc/cpu.h"
#include "hw/ppc/spapr.h"
#include "hw/boards.h"
#include "qapi/error.h"
#include "sysemu/cpus.h"
#include "sysemu/kvm.h"
#include "target/ppc/kvm_ppc.h"
#include "hw/ppc/ppc.h"
#include "target/ppc/mmu-hash64.h"
#include "sysemu/numa.h"
#include "sysemu/hw_accel.h"
#include "qemu/error-report.h"
static void spapr_cpu_reset(void *opaque)
{
PowerPCCPU *cpu = opaque;
CPUState *cs = CPU(cpu);
CPUPPCState *env = &cpu->env;
PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu);
target_ulong lpcr;
cpu_reset(cs);
/* Set compatibility mode to match the boot CPU, which was either set
* by the machine reset code or by CAS. This should never fail.
*/
ppc_set_compat(cpu, POWERPC_CPU(first_cpu)->compat_pvr, &error_abort);
/* All CPUs start halted. CPU0 is unhalted from the machine level
* reset code and the rest are explicitly started up by the guest
* using an RTAS call */
cs->halted = 1;
env->spr[SPR_HIOR] = 0;
lpcr = env->spr[SPR_LPCR];
/* Set emulated LPCR to not send interrupts to hypervisor. Note that
* under KVM, the actual HW LPCR will be set differently by KVM itself,
* the settings below ensure proper operations with TCG in absence of
* a real hypervisor.
*
* Clearing VPM0 will also cause us to use RMOR in mmu-hash64.c for
* real mode accesses, which thankfully defaults to 0 and isn't
* accessible in guest mode.
*
* Disable Power-saving mode Exit Cause exceptions for the CPU, so
* we don't get spurious wakups before an RTAS start-cpu call.
*/
lpcr &= ~(LPCR_VPM0 | LPCR_VPM1 | LPCR_ISL | LPCR_KBV | pcc->lpcr_pm);
lpcr |= LPCR_LPES0 | LPCR_LPES1;
/* Set RMLS to the max (ie, 16G) */
lpcr &= ~LPCR_RMLS;
lpcr |= 1ull << LPCR_RMLS_SHIFT;
ppc_store_lpcr(cpu, lpcr);
/* Set a full AMOR so guest can use the AMR as it sees fit */
env->spr[SPR_AMOR] = 0xffffffffffffffffull;
spapr_cpu->vpa_addr = 0;
spapr_cpu->slb_shadow_addr = 0;
spapr_cpu->slb_shadow_size = 0;
spapr_cpu->dtl_addr = 0;
spapr_cpu->dtl_size = 0;
spapr_caps_cpu_apply(SPAPR_MACHINE(qdev_get_machine()), cpu);
}
void spapr_cpu_set_entry_state(PowerPCCPU *cpu, target_ulong nip, target_ulong r3)
{
PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
CPUPPCState *env = &cpu->env;
env->nip = nip;
env->gpr[3] = r3;
CPU(cpu)->halted = 0;
/* Enable Power-saving mode Exit Cause exceptions */
ppc_store_lpcr(cpu, env->spr[SPR_LPCR] | pcc->lpcr_pm);
}
/*
* Return the sPAPR CPU core type for @model which essentially is the CPU
* model specified with -cpu cmdline option.
*/
const char *spapr_get_cpu_core_type(const char *cpu_type)
{
int len = strlen(cpu_type) - strlen(POWERPC_CPU_TYPE_SUFFIX);
char *core_type = g_strdup_printf(SPAPR_CPU_CORE_TYPE_NAME("%.*s"),
len, cpu_type);
ObjectClass *oc = object_class_by_name(core_type);
g_free(core_type);
if (!oc) {
return NULL;
}
return object_class_get_name(oc);
}
static void spapr_unrealize_vcpu(PowerPCCPU *cpu)
{
qemu_unregister_reset(spapr_cpu_reset, cpu);
object_unparent(cpu->intc);
cpu_remove_sync(CPU(cpu));
object_unparent(OBJECT(cpu));
}
static void spapr_cpu_core_unrealize(DeviceState *dev, Error **errp)
{
sPAPRCPUCore *sc = SPAPR_CPU_CORE(OBJECT(dev));
CPUCore *cc = CPU_CORE(dev);
int i;
for (i = 0; i < cc->nr_threads; i++) {
spapr_unrealize_vcpu(sc->threads[i]);
}
g_free(sc->threads);
}
spapr_cpu_core: migrate VPA related state QEMU implements the "Shared Processor LPAR" (SPLPAR) option, which allows the hypervisor to time-slice a physical processor into multiple virtual processor. The intent is to allow more guests to run, and to optimize processor utilization. The guest OS can cede idle VCPUs, so that their processing capacity may be used by other VCPUs, with the H_CEDE hcall. The guest OS can also optimize spinlocks, by confering the time-slice of a spinning VCPU to the spinlock holder if it's currently notrunning, with the H_CONFER hcall. Both hcalls depend on a "Virtual Processor Area" (VPA) to be registered by the guest OS, generally during early boot. Other per-VCPU areas can be registered: the "SLB Shadow Buffer" which allows a more efficient dispatching of VCPUs, and the "Dispatch Trace Log Buffer" (DTL) which is used to compute time stolen by the hypervisor. Both DTL and SLB Shadow areas depend on the VPA to be registered. The VPA/SLB Shadow/DTL are state that QEMU should migrate, but this doesn't happen, for no apparent reason other than it was just never coded. This causes the features listed above to stop working after migration, and it breaks the logic of the H_REGISTER_VPA hcall in the destination. The VPA is set at the guest request, ie, we don't have to migrate it before the guest has actually set it. This patch hence adds an "spapr_cpu/vpa" subsection to the recently introduced per-CPU machine data migration stream. Since DTL and SLB Shadow are optional and both depend on VPA, they get their own subsections "spapr_cpu/vpa/slb_shadow" and "spapr_cpu/vpa/dtl" hanging from the "spapr_cpu/vpa" subsection. Note that this won't break migration to older QEMUs. Is is already handled by only registering the vmstate handler for per-CPU data with newer machine types. Signed-off-by: Greg Kurz <groug@kaod.org> Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2018-06-18 15:26:49 +03:00
static bool slb_shadow_needed(void *opaque)
{
sPAPRCPUState *spapr_cpu = opaque;
return spapr_cpu->slb_shadow_addr != 0;
}
static const VMStateDescription vmstate_spapr_cpu_slb_shadow = {
.name = "spapr_cpu/vpa/slb_shadow",
.version_id = 1,
.minimum_version_id = 1,
.needed = slb_shadow_needed,
.fields = (VMStateField[]) {
VMSTATE_UINT64(slb_shadow_addr, sPAPRCPUState),
VMSTATE_UINT64(slb_shadow_size, sPAPRCPUState),
VMSTATE_END_OF_LIST()
}
};
static bool dtl_needed(void *opaque)
{
sPAPRCPUState *spapr_cpu = opaque;
return spapr_cpu->dtl_addr != 0;
}
static const VMStateDescription vmstate_spapr_cpu_dtl = {
.name = "spapr_cpu/vpa/dtl",
.version_id = 1,
.minimum_version_id = 1,
.needed = dtl_needed,
.fields = (VMStateField[]) {
VMSTATE_UINT64(dtl_addr, sPAPRCPUState),
VMSTATE_UINT64(dtl_size, sPAPRCPUState),
VMSTATE_END_OF_LIST()
}
};
static bool vpa_needed(void *opaque)
{
sPAPRCPUState *spapr_cpu = opaque;
return spapr_cpu->vpa_addr != 0;
}
static const VMStateDescription vmstate_spapr_cpu_vpa = {
.name = "spapr_cpu/vpa",
.version_id = 1,
.minimum_version_id = 1,
.needed = vpa_needed,
.fields = (VMStateField[]) {
VMSTATE_UINT64(vpa_addr, sPAPRCPUState),
VMSTATE_END_OF_LIST()
},
.subsections = (const VMStateDescription * []) {
&vmstate_spapr_cpu_slb_shadow,
&vmstate_spapr_cpu_dtl,
NULL
}
};
static const VMStateDescription vmstate_spapr_cpu_state = {
.name = "spapr_cpu",
.version_id = 1,
.minimum_version_id = 1,
.fields = (VMStateField[]) {
VMSTATE_END_OF_LIST()
},
spapr_cpu_core: migrate VPA related state QEMU implements the "Shared Processor LPAR" (SPLPAR) option, which allows the hypervisor to time-slice a physical processor into multiple virtual processor. The intent is to allow more guests to run, and to optimize processor utilization. The guest OS can cede idle VCPUs, so that their processing capacity may be used by other VCPUs, with the H_CEDE hcall. The guest OS can also optimize spinlocks, by confering the time-slice of a spinning VCPU to the spinlock holder if it's currently notrunning, with the H_CONFER hcall. Both hcalls depend on a "Virtual Processor Area" (VPA) to be registered by the guest OS, generally during early boot. Other per-VCPU areas can be registered: the "SLB Shadow Buffer" which allows a more efficient dispatching of VCPUs, and the "Dispatch Trace Log Buffer" (DTL) which is used to compute time stolen by the hypervisor. Both DTL and SLB Shadow areas depend on the VPA to be registered. The VPA/SLB Shadow/DTL are state that QEMU should migrate, but this doesn't happen, for no apparent reason other than it was just never coded. This causes the features listed above to stop working after migration, and it breaks the logic of the H_REGISTER_VPA hcall in the destination. The VPA is set at the guest request, ie, we don't have to migrate it before the guest has actually set it. This patch hence adds an "spapr_cpu/vpa" subsection to the recently introduced per-CPU machine data migration stream. Since DTL and SLB Shadow are optional and both depend on VPA, they get their own subsections "spapr_cpu/vpa/slb_shadow" and "spapr_cpu/vpa/dtl" hanging from the "spapr_cpu/vpa" subsection. Note that this won't break migration to older QEMUs. Is is already handled by only registering the vmstate handler for per-CPU data with newer machine types. Signed-off-by: Greg Kurz <groug@kaod.org> Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2018-06-18 15:26:49 +03:00
.subsections = (const VMStateDescription * []) {
&vmstate_spapr_cpu_vpa,
NULL
}
};
static void spapr_realize_vcpu(PowerPCCPU *cpu, sPAPRMachineState *spapr,
Error **errp)
{
CPUPPCState *env = &cpu->env;
Error *local_err = NULL;
object_property_set_bool(OBJECT(cpu), true, "realized", &local_err);
if (local_err) {
goto error;
}
/* Set time-base frequency to 512 MHz */
cpu_ppc_tb_init(env, SPAPR_TIMEBASE_FREQ);
cpu_ppc_set_vhyp(cpu, PPC_VIRTUAL_HYPERVISOR(spapr));
kvmppc_set_papr(cpu);
qemu_register_reset(spapr_cpu_reset, cpu);
spapr_cpu_reset(cpu);
cpu->intc = icp_create(OBJECT(cpu), spapr->icp_type, XICS_FABRIC(spapr),
&local_err);
if (local_err) {
spapr_cpu_core: add missing rollback on realization path The spapr_realize_vcpu() function doesn't rollback in case of error. This isn't a problem with coldplugged CPUs because the machine won't start and QEMU will exit. Hotplug is a different story though: the CPU thread is started under object_property_set_bool() and it assumes it can access the CPU object. If icp_create() fails, we return an error without unregistering the reset handler for this CPU, and we let the underlying QEMU thread for this CPU alive. Since spapr_cpu_core_realize() doesn't care to unrealize already realized CPUs either, but happily frees all of them anyway, the CPU thread crashes instantly: (qemu) device_add host-spapr-cpu-core,core-id=1,id=gku GKU: failing icp_create (cpu 0x11497fd0) ^^^^^^^^^^ Program received signal SIGSEGV, Segmentation fault. [Switching to Thread 0x7fffee3feaa0 (LWP 24725)] 0x00000000104c8374 in object_dynamic_cast_assert (obj=0x11497fd0, ^^^^^^^^^^^^^^ pointer to the CPU object 623 trace_object_dynamic_cast_assert(obj ? obj->class->type->name (gdb) p obj->class->type $1 = (Type) 0x0 (gdb) p * obj $2 = {class = 0x10ea9c10, free = 0x11244620, ^^^^^^^^^^ should be g_free (gdb) p g_free $3 = {<text variable, no debug info>} 0x7ffff282bef0 <g_free> obj is a dangling pointer to the CPU that was just destroyed in spapr_cpu_core_realize(). This patch adds proper rollback to both spapr_realize_vcpu() and spapr_cpu_core_realize(). Signed-off-by: Greg Kurz <groug@kaod.org> [dwg: Fixed a conflict due to a change in my tree] Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2018-06-15 00:50:42 +03:00
goto error_unregister;
}
return;
spapr_cpu_core: add missing rollback on realization path The spapr_realize_vcpu() function doesn't rollback in case of error. This isn't a problem with coldplugged CPUs because the machine won't start and QEMU will exit. Hotplug is a different story though: the CPU thread is started under object_property_set_bool() and it assumes it can access the CPU object. If icp_create() fails, we return an error without unregistering the reset handler for this CPU, and we let the underlying QEMU thread for this CPU alive. Since spapr_cpu_core_realize() doesn't care to unrealize already realized CPUs either, but happily frees all of them anyway, the CPU thread crashes instantly: (qemu) device_add host-spapr-cpu-core,core-id=1,id=gku GKU: failing icp_create (cpu 0x11497fd0) ^^^^^^^^^^ Program received signal SIGSEGV, Segmentation fault. [Switching to Thread 0x7fffee3feaa0 (LWP 24725)] 0x00000000104c8374 in object_dynamic_cast_assert (obj=0x11497fd0, ^^^^^^^^^^^^^^ pointer to the CPU object 623 trace_object_dynamic_cast_assert(obj ? obj->class->type->name (gdb) p obj->class->type $1 = (Type) 0x0 (gdb) p * obj $2 = {class = 0x10ea9c10, free = 0x11244620, ^^^^^^^^^^ should be g_free (gdb) p g_free $3 = {<text variable, no debug info>} 0x7ffff282bef0 <g_free> obj is a dangling pointer to the CPU that was just destroyed in spapr_cpu_core_realize(). This patch adds proper rollback to both spapr_realize_vcpu() and spapr_cpu_core_realize(). Signed-off-by: Greg Kurz <groug@kaod.org> [dwg: Fixed a conflict due to a change in my tree] Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2018-06-15 00:50:42 +03:00
error_unregister:
qemu_unregister_reset(spapr_cpu_reset, cpu);
cpu_remove_sync(CPU(cpu));
error:
error_propagate(errp, local_err);
}
static PowerPCCPU *spapr_create_vcpu(sPAPRCPUCore *sc, int i, Error **errp)
{
sPAPRCPUCoreClass *scc = SPAPR_CPU_CORE_GET_CLASS(sc);
CPUCore *cc = CPU_CORE(sc);
Object *obj;
char *id;
CPUState *cs;
PowerPCCPU *cpu;
Error *local_err = NULL;
obj = object_new(scc->cpu_type);
cs = CPU(obj);
cpu = POWERPC_CPU(obj);
cs->cpu_index = cc->core_id + i;
spapr_set_vcpu_id(cpu, cs->cpu_index, &local_err);
if (local_err) {
goto err;
}
cpu->node_id = sc->node_id;
id = g_strdup_printf("thread[%d]", i);
object_property_add_child(OBJECT(sc), id, obj, &local_err);
g_free(id);
if (local_err) {
goto err;
}
cpu->machine_data = g_new0(sPAPRCPUState, 1);
if (!sc->pre_3_0_migration) {
vmstate_register(NULL, cs->cpu_index, &vmstate_spapr_cpu_state,
cpu->machine_data);
}
object_unref(obj);
return cpu;
err:
object_unref(obj);
error_propagate(errp, local_err);
return NULL;
}
static void spapr_delete_vcpu(PowerPCCPU *cpu, sPAPRCPUCore *sc)
{
sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu);
if (!sc->pre_3_0_migration) {
vmstate_unregister(NULL, &vmstate_spapr_cpu_state, cpu->machine_data);
}
cpu->machine_data = NULL;
g_free(spapr_cpu);
object_unparent(OBJECT(cpu));
}
static void spapr_cpu_core_realize(DeviceState *dev, Error **errp)
{
/* We don't use SPAPR_MACHINE() in order to exit gracefully if the user
* tries to add a sPAPR CPU core to a non-pseries machine.
*/
sPAPRMachineState *spapr =
(sPAPRMachineState *) object_dynamic_cast(qdev_get_machine(),
TYPE_SPAPR_MACHINE);
sPAPRCPUCore *sc = SPAPR_CPU_CORE(OBJECT(dev));
CPUCore *cc = CPU_CORE(OBJECT(dev));
Error *local_err = NULL;
int i, j;
if (!spapr) {
error_setg(errp, TYPE_SPAPR_CPU_CORE " needs a pseries machine");
return;
}
sc->threads = g_new(PowerPCCPU *, cc->nr_threads);
for (i = 0; i < cc->nr_threads; i++) {
sc->threads[i] = spapr_create_vcpu(sc, i, &local_err);
if (local_err) {
goto err;
}
}
for (j = 0; j < cc->nr_threads; j++) {
spapr_realize_vcpu(sc->threads[j], spapr, &local_err);
if (local_err) {
spapr_cpu_core: add missing rollback on realization path The spapr_realize_vcpu() function doesn't rollback in case of error. This isn't a problem with coldplugged CPUs because the machine won't start and QEMU will exit. Hotplug is a different story though: the CPU thread is started under object_property_set_bool() and it assumes it can access the CPU object. If icp_create() fails, we return an error without unregistering the reset handler for this CPU, and we let the underlying QEMU thread for this CPU alive. Since spapr_cpu_core_realize() doesn't care to unrealize already realized CPUs either, but happily frees all of them anyway, the CPU thread crashes instantly: (qemu) device_add host-spapr-cpu-core,core-id=1,id=gku GKU: failing icp_create (cpu 0x11497fd0) ^^^^^^^^^^ Program received signal SIGSEGV, Segmentation fault. [Switching to Thread 0x7fffee3feaa0 (LWP 24725)] 0x00000000104c8374 in object_dynamic_cast_assert (obj=0x11497fd0, ^^^^^^^^^^^^^^ pointer to the CPU object 623 trace_object_dynamic_cast_assert(obj ? obj->class->type->name (gdb) p obj->class->type $1 = (Type) 0x0 (gdb) p * obj $2 = {class = 0x10ea9c10, free = 0x11244620, ^^^^^^^^^^ should be g_free (gdb) p g_free $3 = {<text variable, no debug info>} 0x7ffff282bef0 <g_free> obj is a dangling pointer to the CPU that was just destroyed in spapr_cpu_core_realize(). This patch adds proper rollback to both spapr_realize_vcpu() and spapr_cpu_core_realize(). Signed-off-by: Greg Kurz <groug@kaod.org> [dwg: Fixed a conflict due to a change in my tree] Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2018-06-15 00:50:42 +03:00
goto err_unrealize;
}
}
return;
spapr_cpu_core: add missing rollback on realization path The spapr_realize_vcpu() function doesn't rollback in case of error. This isn't a problem with coldplugged CPUs because the machine won't start and QEMU will exit. Hotplug is a different story though: the CPU thread is started under object_property_set_bool() and it assumes it can access the CPU object. If icp_create() fails, we return an error without unregistering the reset handler for this CPU, and we let the underlying QEMU thread for this CPU alive. Since spapr_cpu_core_realize() doesn't care to unrealize already realized CPUs either, but happily frees all of them anyway, the CPU thread crashes instantly: (qemu) device_add host-spapr-cpu-core,core-id=1,id=gku GKU: failing icp_create (cpu 0x11497fd0) ^^^^^^^^^^ Program received signal SIGSEGV, Segmentation fault. [Switching to Thread 0x7fffee3feaa0 (LWP 24725)] 0x00000000104c8374 in object_dynamic_cast_assert (obj=0x11497fd0, ^^^^^^^^^^^^^^ pointer to the CPU object 623 trace_object_dynamic_cast_assert(obj ? obj->class->type->name (gdb) p obj->class->type $1 = (Type) 0x0 (gdb) p * obj $2 = {class = 0x10ea9c10, free = 0x11244620, ^^^^^^^^^^ should be g_free (gdb) p g_free $3 = {<text variable, no debug info>} 0x7ffff282bef0 <g_free> obj is a dangling pointer to the CPU that was just destroyed in spapr_cpu_core_realize(). This patch adds proper rollback to both spapr_realize_vcpu() and spapr_cpu_core_realize(). Signed-off-by: Greg Kurz <groug@kaod.org> [dwg: Fixed a conflict due to a change in my tree] Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2018-06-15 00:50:42 +03:00
err_unrealize:
while (--j >= 0) {
spapr_unrealize_vcpu(sc->threads[j]);
}
err:
while (--i >= 0) {
spapr_delete_vcpu(sc->threads[i], sc);
}
g_free(sc->threads);
error_propagate(errp, local_err);
}
static Property spapr_cpu_core_properties[] = {
DEFINE_PROP_INT32("node-id", sPAPRCPUCore, node_id, CPU_UNSET_NUMA_NODE_ID),
DEFINE_PROP_BOOL("pre-3.0-migration", sPAPRCPUCore, pre_3_0_migration,
false),
DEFINE_PROP_END_OF_LIST()
};
static void spapr_cpu_core_class_init(ObjectClass *oc, void *data)
{
DeviceClass *dc = DEVICE_CLASS(oc);
sPAPRCPUCoreClass *scc = SPAPR_CPU_CORE_CLASS(oc);
dc->realize = spapr_cpu_core_realize;
dc->unrealize = spapr_cpu_core_unrealize;
dc->props = spapr_cpu_core_properties;
scc->cpu_type = data;
}
#define DEFINE_SPAPR_CPU_CORE_TYPE(cpu_model) \
{ \
.parent = TYPE_SPAPR_CPU_CORE, \
.class_data = (void *) POWERPC_CPU_TYPE_NAME(cpu_model), \
.class_init = spapr_cpu_core_class_init, \
.name = SPAPR_CPU_CORE_TYPE_NAME(cpu_model), \
}
static const TypeInfo spapr_cpu_core_type_infos[] = {
{
.name = TYPE_SPAPR_CPU_CORE,
.parent = TYPE_CPU_CORE,
.abstract = true,
.instance_size = sizeof(sPAPRCPUCore),
.class_size = sizeof(sPAPRCPUCoreClass),
},
DEFINE_SPAPR_CPU_CORE_TYPE("970_v2.2"),
DEFINE_SPAPR_CPU_CORE_TYPE("970mp_v1.0"),
DEFINE_SPAPR_CPU_CORE_TYPE("970mp_v1.1"),
DEFINE_SPAPR_CPU_CORE_TYPE("power5+_v2.1"),
DEFINE_SPAPR_CPU_CORE_TYPE("power7_v2.3"),
DEFINE_SPAPR_CPU_CORE_TYPE("power7+_v2.1"),
DEFINE_SPAPR_CPU_CORE_TYPE("power8_v2.0"),
DEFINE_SPAPR_CPU_CORE_TYPE("power8e_v2.1"),
DEFINE_SPAPR_CPU_CORE_TYPE("power8nvl_v1.0"),
DEFINE_SPAPR_CPU_CORE_TYPE("power9_v1.0"),
DEFINE_SPAPR_CPU_CORE_TYPE("power9_v2.0"),
#ifdef CONFIG_KVM
DEFINE_SPAPR_CPU_CORE_TYPE("host"),
#endif
};
DEFINE_TYPES(spapr_cpu_core_type_infos)