qemu/hw/core/cpu.c

459 lines
12 KiB
C
Raw Normal View History

/*
* QEMU CPU model
*
* Copyright (c) 2012-2014 SUSE LINUX Products GmbH
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, see
* <http://www.gnu.org/licenses/gpl-2.0.html>
*/
#include "qemu/osdep.h"
2016-03-14 11:01:28 +03:00
#include "qapi/error.h"
#include "hw/core/cpu.h"
#include "sysemu/hw_accel.h"
#include "qemu/notify.h"
#include "qemu/log.h"
#include "qemu/main-loop.h"
#include "exec/log.h"
#include "qemu/error-report.h"
#include "qemu/qemu-print.h"
#include "sysemu/tcg.h"
#include "hw/boards.h"
#include "hw/qdev-properties.h"
#include "trace-root.h"
#include "qemu/plugin.h"
CPUInterruptHandler cpu_interrupt_handler;
CPUState *cpu_by_arch_id(int64_t id)
{
CPUState *cpu;
CPU_FOREACH(cpu) {
CPUClass *cc = CPU_GET_CLASS(cpu);
if (cc->get_arch_id(cpu) == id) {
return cpu;
}
}
return NULL;
}
bool cpu_exists(int64_t id)
{
return !!cpu_by_arch_id(id);
}
CPUState *cpu_create(const char *typename)
{
Error *err = NULL;
CPUState *cpu = CPU(object_new(typename));
if (!qdev_realize(DEVICE(cpu), NULL, &err)) {
error_report_err(err);
object_unref(OBJECT(cpu));
exit(EXIT_FAILURE);
}
return cpu;
}
bool cpu_paging_enabled(const CPUState *cpu)
{
CPUClass *cc = CPU_GET_CLASS(cpu);
return cc->get_paging_enabled(cpu);
}
static bool cpu_common_get_paging_enabled(const CPUState *cpu)
{
return false;
}
void cpu_get_memory_mapping(CPUState *cpu, MemoryMappingList *list,
Error **errp)
{
CPUClass *cc = CPU_GET_CLASS(cpu);
cc->get_memory_mapping(cpu, list, errp);
}
static void cpu_common_get_memory_mapping(CPUState *cpu,
MemoryMappingList *list,
Error **errp)
{
error_setg(errp, "Obtaining memory mappings is unsupported on this CPU.");
}
tcg: drop global lock during TCG code execution This finally allows TCG to benefit from the iothread introduction: Drop the global mutex while running pure TCG CPU code. Reacquire the lock when entering MMIO or PIO emulation, or when leaving the TCG loop. We have to revert a few optimization for the current TCG threading model, namely kicking the TCG thread in qemu_mutex_lock_iothread and not kicking it in qemu_cpu_kick. We also need to disable RAM block reordering until we have a more efficient locking mechanism at hand. Still, a Linux x86 UP guest and my Musicpal ARM model boot fine here. These numbers demonstrate where we gain something: 20338 jan 20 0 331m 75m 6904 R 99 0.9 0:50.95 qemu-system-arm 20337 jan 20 0 331m 75m 6904 S 20 0.9 0:26.50 qemu-system-arm The guest CPU was fully loaded, but the iothread could still run mostly independent on a second core. Without the patch we don't get beyond 32206 jan 20 0 330m 73m 7036 R 82 0.9 1:06.00 qemu-system-arm 32204 jan 20 0 330m 73m 7036 S 21 0.9 0:17.03 qemu-system-arm We don't benefit significantly, though, when the guest is not fully loading a host CPU. Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com> Message-Id: <1439220437-23957-10-git-send-email-fred.konrad@greensocs.com> [FK: Rebase, fix qemu_devices_reset deadlock, rm address_space_* mutex] Signed-off-by: KONRAD Frederic <fred.konrad@greensocs.com> [EGC: fixed iothread lock for cpu-exec IRQ handling] Signed-off-by: Emilio G. Cota <cota@braap.org> [AJB: -smp single-threaded fix, clean commit msg, BQL fixes] Signed-off-by: Alex Bennée <alex.bennee@linaro.org> Reviewed-by: Richard Henderson <rth@twiddle.net> Reviewed-by: Pranith Kumar <bobby.prani@gmail.com> [PM: target-arm changes] Acked-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-23 21:29:11 +03:00
/* Resetting the IRQ comes from across the code base so we take the
* BQL here if we need to. cpu_interrupt assumes it is held.*/
void cpu_reset_interrupt(CPUState *cpu, int mask)
{
tcg: drop global lock during TCG code execution This finally allows TCG to benefit from the iothread introduction: Drop the global mutex while running pure TCG CPU code. Reacquire the lock when entering MMIO or PIO emulation, or when leaving the TCG loop. We have to revert a few optimization for the current TCG threading model, namely kicking the TCG thread in qemu_mutex_lock_iothread and not kicking it in qemu_cpu_kick. We also need to disable RAM block reordering until we have a more efficient locking mechanism at hand. Still, a Linux x86 UP guest and my Musicpal ARM model boot fine here. These numbers demonstrate where we gain something: 20338 jan 20 0 331m 75m 6904 R 99 0.9 0:50.95 qemu-system-arm 20337 jan 20 0 331m 75m 6904 S 20 0.9 0:26.50 qemu-system-arm The guest CPU was fully loaded, but the iothread could still run mostly independent on a second core. Without the patch we don't get beyond 32206 jan 20 0 330m 73m 7036 R 82 0.9 1:06.00 qemu-system-arm 32204 jan 20 0 330m 73m 7036 S 21 0.9 0:17.03 qemu-system-arm We don't benefit significantly, though, when the guest is not fully loading a host CPU. Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com> Message-Id: <1439220437-23957-10-git-send-email-fred.konrad@greensocs.com> [FK: Rebase, fix qemu_devices_reset deadlock, rm address_space_* mutex] Signed-off-by: KONRAD Frederic <fred.konrad@greensocs.com> [EGC: fixed iothread lock for cpu-exec IRQ handling] Signed-off-by: Emilio G. Cota <cota@braap.org> [AJB: -smp single-threaded fix, clean commit msg, BQL fixes] Signed-off-by: Alex Bennée <alex.bennee@linaro.org> Reviewed-by: Richard Henderson <rth@twiddle.net> Reviewed-by: Pranith Kumar <bobby.prani@gmail.com> [PM: target-arm changes] Acked-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-23 21:29:11 +03:00
bool need_lock = !qemu_mutex_iothread_locked();
if (need_lock) {
qemu_mutex_lock_iothread();
}
cpu->interrupt_request &= ~mask;
tcg: drop global lock during TCG code execution This finally allows TCG to benefit from the iothread introduction: Drop the global mutex while running pure TCG CPU code. Reacquire the lock when entering MMIO or PIO emulation, or when leaving the TCG loop. We have to revert a few optimization for the current TCG threading model, namely kicking the TCG thread in qemu_mutex_lock_iothread and not kicking it in qemu_cpu_kick. We also need to disable RAM block reordering until we have a more efficient locking mechanism at hand. Still, a Linux x86 UP guest and my Musicpal ARM model boot fine here. These numbers demonstrate where we gain something: 20338 jan 20 0 331m 75m 6904 R 99 0.9 0:50.95 qemu-system-arm 20337 jan 20 0 331m 75m 6904 S 20 0.9 0:26.50 qemu-system-arm The guest CPU was fully loaded, but the iothread could still run mostly independent on a second core. Without the patch we don't get beyond 32206 jan 20 0 330m 73m 7036 R 82 0.9 1:06.00 qemu-system-arm 32204 jan 20 0 330m 73m 7036 S 21 0.9 0:17.03 qemu-system-arm We don't benefit significantly, though, when the guest is not fully loading a host CPU. Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com> Message-Id: <1439220437-23957-10-git-send-email-fred.konrad@greensocs.com> [FK: Rebase, fix qemu_devices_reset deadlock, rm address_space_* mutex] Signed-off-by: KONRAD Frederic <fred.konrad@greensocs.com> [EGC: fixed iothread lock for cpu-exec IRQ handling] Signed-off-by: Emilio G. Cota <cota@braap.org> [AJB: -smp single-threaded fix, clean commit msg, BQL fixes] Signed-off-by: Alex Bennée <alex.bennee@linaro.org> Reviewed-by: Richard Henderson <rth@twiddle.net> Reviewed-by: Pranith Kumar <bobby.prani@gmail.com> [PM: target-arm changes] Acked-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-23 21:29:11 +03:00
if (need_lock) {
qemu_mutex_unlock_iothread();
}
}
void cpu_exit(CPUState *cpu)
{
atomic_set(&cpu->exit_request, 1);
/* Ensure cpu_exec will see the exit request after TCG has exited. */
smp_wmb();
atomic_set(&cpu->icount_decr_ptr->u16.high, -1);
}
int cpu_write_elf32_qemunote(WriteCoreDumpFunction f, CPUState *cpu,
void *opaque)
{
CPUClass *cc = CPU_GET_CLASS(cpu);
return (*cc->write_elf32_qemunote)(f, cpu, opaque);
}
static int cpu_common_write_elf32_qemunote(WriteCoreDumpFunction f,
CPUState *cpu, void *opaque)
{
return 0;
}
int cpu_write_elf32_note(WriteCoreDumpFunction f, CPUState *cpu,
int cpuid, void *opaque)
{
CPUClass *cc = CPU_GET_CLASS(cpu);
return (*cc->write_elf32_note)(f, cpu, cpuid, opaque);
}
static int cpu_common_write_elf32_note(WriteCoreDumpFunction f,
CPUState *cpu, int cpuid,
void *opaque)
{
return -1;
}
int cpu_write_elf64_qemunote(WriteCoreDumpFunction f, CPUState *cpu,
void *opaque)
{
CPUClass *cc = CPU_GET_CLASS(cpu);
return (*cc->write_elf64_qemunote)(f, cpu, opaque);
}
static int cpu_common_write_elf64_qemunote(WriteCoreDumpFunction f,
CPUState *cpu, void *opaque)
{
return 0;
}
int cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cpu,
int cpuid, void *opaque)
{
CPUClass *cc = CPU_GET_CLASS(cpu);
return (*cc->write_elf64_note)(f, cpu, cpuid, opaque);
}
static int cpu_common_write_elf64_note(WriteCoreDumpFunction f,
CPUState *cpu, int cpuid,
void *opaque)
{
return -1;
}
static int cpu_common_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg)
{
return 0;
}
static int cpu_common_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg)
{
return 0;
}
static bool cpu_common_debug_check_watchpoint(CPUState *cpu, CPUWatchpoint *wp)
{
/* If no extra check is required, QEMU watchpoint match can be considered
* as an architectural match.
*/
return true;
}
static bool cpu_common_virtio_is_big_endian(CPUState *cpu)
{
return target_words_bigendian();
}
static void cpu_common_noop(CPUState *cpu)
{
}
static bool cpu_common_exec_interrupt(CPUState *cpu, int int_req)
{
return false;
}
#if !defined(CONFIG_USER_ONLY)
GuestPanicInformation *cpu_get_crash_info(CPUState *cpu)
{
CPUClass *cc = CPU_GET_CLASS(cpu);
GuestPanicInformation *res = NULL;
if (cc->get_crash_info) {
res = cc->get_crash_info(cpu);
}
return res;
}
#endif
void cpu_dump_state(CPUState *cpu, FILE *f, int flags)
{
CPUClass *cc = CPU_GET_CLASS(cpu);
if (cc->dump_state) {
cpu_synchronize_state(cpu);
cc->dump_state(cpu, f, flags);
}
}
void cpu_dump_statistics(CPUState *cpu, int flags)
{
CPUClass *cc = CPU_GET_CLASS(cpu);
if (cc->dump_statistics) {
cc->dump_statistics(cpu, flags);
}
}
void cpu_reset(CPUState *cpu)
{
cpu: Use DeviceClass reset instead of a special CPUClass reset The CPUClass has a 'reset' method. This is a legacy from when TYPE_CPU used not to inherit from TYPE_DEVICE. We don't need it any more, as we can simply use the TYPE_DEVICE reset. The 'cpu_reset()' function is kept as the API which most places use to reset a CPU; it is now a wrapper which calls device_cold_reset() and then the tracepoint function. This change should not cause CPU objects to be reset more often than they are at the moment, because: * nobody is directly calling device_cold_reset() or qdev_reset_all() on CPU objects * no CPU object is on a qbus, so they will not be reset either by somebody calling qbus_reset_all()/bus_cold_reset(), or by the main "reset sysbus and everything in the qbus tree" reset that most devices are reset by Note that this does not change the need for each machine or whatever to use qemu_register_reset() to arrange to call cpu_reset() -- that is necessary because CPU objects are not on any qbus, so they don't get reset when the qbus tree rooted at the sysbus bus is reset, and this isn't being changed here. All the changes to the files under target/ were made using the included Coccinelle script, except: (1) the deletion of the now-inaccurate and not terribly useful "CPUClass::reset" comments was done with a perl one-liner afterwards: perl -n -i -e '/ CPUClass::reset/ or print' target/*/*.c (2) this bit of the s390 change was done by hand, because the Coccinelle script is not sophisticated enough to handle the parent_reset call being inside another function: | @@ -96,8 +96,9 @@ static void s390_cpu_reset(CPUState *s, cpu_reset_type type) | S390CPU *cpu = S390_CPU(s); | S390CPUClass *scc = S390_CPU_GET_CLASS(cpu); | CPUS390XState *env = &cpu->env; |+ DeviceState *dev = DEVICE(s); | |- scc->parent_reset(s); |+ scc->parent_reset(dev); | cpu->env.sigp_order = 0; | s390_cpu_set_state(S390_CPU_STATE_STOPPED, cpu); Signed-off-by: Peter Maydell <peter.maydell@linaro.org> Message-Id: <20200303100511.5498-1-peter.maydell@linaro.org> Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Tested-by: Philippe Mathieu-Daudé <philmd@redhat.com> Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2020-03-03 13:05:11 +03:00
device_cold_reset(DEVICE(cpu));
trace_guest_cpu_reset(cpu);
}
cpu: Use DeviceClass reset instead of a special CPUClass reset The CPUClass has a 'reset' method. This is a legacy from when TYPE_CPU used not to inherit from TYPE_DEVICE. We don't need it any more, as we can simply use the TYPE_DEVICE reset. The 'cpu_reset()' function is kept as the API which most places use to reset a CPU; it is now a wrapper which calls device_cold_reset() and then the tracepoint function. This change should not cause CPU objects to be reset more often than they are at the moment, because: * nobody is directly calling device_cold_reset() or qdev_reset_all() on CPU objects * no CPU object is on a qbus, so they will not be reset either by somebody calling qbus_reset_all()/bus_cold_reset(), or by the main "reset sysbus and everything in the qbus tree" reset that most devices are reset by Note that this does not change the need for each machine or whatever to use qemu_register_reset() to arrange to call cpu_reset() -- that is necessary because CPU objects are not on any qbus, so they don't get reset when the qbus tree rooted at the sysbus bus is reset, and this isn't being changed here. All the changes to the files under target/ were made using the included Coccinelle script, except: (1) the deletion of the now-inaccurate and not terribly useful "CPUClass::reset" comments was done with a perl one-liner afterwards: perl -n -i -e '/ CPUClass::reset/ or print' target/*/*.c (2) this bit of the s390 change was done by hand, because the Coccinelle script is not sophisticated enough to handle the parent_reset call being inside another function: | @@ -96,8 +96,9 @@ static void s390_cpu_reset(CPUState *s, cpu_reset_type type) | S390CPU *cpu = S390_CPU(s); | S390CPUClass *scc = S390_CPU_GET_CLASS(cpu); | CPUS390XState *env = &cpu->env; |+ DeviceState *dev = DEVICE(s); | |- scc->parent_reset(s); |+ scc->parent_reset(dev); | cpu->env.sigp_order = 0; | s390_cpu_set_state(S390_CPU_STATE_STOPPED, cpu); Signed-off-by: Peter Maydell <peter.maydell@linaro.org> Message-Id: <20200303100511.5498-1-peter.maydell@linaro.org> Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Tested-by: Philippe Mathieu-Daudé <philmd@redhat.com> Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2020-03-03 13:05:11 +03:00
static void cpu_common_reset(DeviceState *dev)
{
cpu: Use DeviceClass reset instead of a special CPUClass reset The CPUClass has a 'reset' method. This is a legacy from when TYPE_CPU used not to inherit from TYPE_DEVICE. We don't need it any more, as we can simply use the TYPE_DEVICE reset. The 'cpu_reset()' function is kept as the API which most places use to reset a CPU; it is now a wrapper which calls device_cold_reset() and then the tracepoint function. This change should not cause CPU objects to be reset more often than they are at the moment, because: * nobody is directly calling device_cold_reset() or qdev_reset_all() on CPU objects * no CPU object is on a qbus, so they will not be reset either by somebody calling qbus_reset_all()/bus_cold_reset(), or by the main "reset sysbus and everything in the qbus tree" reset that most devices are reset by Note that this does not change the need for each machine or whatever to use qemu_register_reset() to arrange to call cpu_reset() -- that is necessary because CPU objects are not on any qbus, so they don't get reset when the qbus tree rooted at the sysbus bus is reset, and this isn't being changed here. All the changes to the files under target/ were made using the included Coccinelle script, except: (1) the deletion of the now-inaccurate and not terribly useful "CPUClass::reset" comments was done with a perl one-liner afterwards: perl -n -i -e '/ CPUClass::reset/ or print' target/*/*.c (2) this bit of the s390 change was done by hand, because the Coccinelle script is not sophisticated enough to handle the parent_reset call being inside another function: | @@ -96,8 +96,9 @@ static void s390_cpu_reset(CPUState *s, cpu_reset_type type) | S390CPU *cpu = S390_CPU(s); | S390CPUClass *scc = S390_CPU_GET_CLASS(cpu); | CPUS390XState *env = &cpu->env; |+ DeviceState *dev = DEVICE(s); | |- scc->parent_reset(s); |+ scc->parent_reset(dev); | cpu->env.sigp_order = 0; | s390_cpu_set_state(S390_CPU_STATE_STOPPED, cpu); Signed-off-by: Peter Maydell <peter.maydell@linaro.org> Message-Id: <20200303100511.5498-1-peter.maydell@linaro.org> Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Tested-by: Philippe Mathieu-Daudé <philmd@redhat.com> Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2020-03-03 13:05:11 +03:00
CPUState *cpu = CPU(dev);
CPUClass *cc = CPU_GET_CLASS(cpu);
if (qemu_loglevel_mask(CPU_LOG_RESET)) {
qemu_log("CPU Reset (CPU %d)\n", cpu->cpu_index);
log_cpu_state(cpu, cc->reset_dump_flags);
}
cpu->interrupt_request = 0;
cpu->halted = 0;
cpu->mem_io_pc = 0;
cpu->icount_extra = 0;
atomic_set(&cpu->icount_decr_ptr->u32, 0);
cpu->can_do_io = 1;
cpu->exception_index = -1;
cpu->crash_occurred = false;
cpu->cflags_next_tb = -1;
if (tcg_enabled()) {
tcg: consistently access cpu->tb_jmp_cache atomically Some code paths can lead to atomic accesses racing with memset() on cpu->tb_jmp_cache, which can result in torn reads/writes and is undefined behaviour in C11. These torn accesses are unlikely to show up as bugs, but from code inspection they seem possible. For example, tb_phys_invalidate does: /* remove the TB from the hash list */ h = tb_jmp_cache_hash_func(tb->pc); CPU_FOREACH(cpu) { if (atomic_read(&cpu->tb_jmp_cache[h]) == tb) { atomic_set(&cpu->tb_jmp_cache[h], NULL); } } Here atomic_set might race with a concurrent memset (such as the ones scheduled via "unsafe" async work, e.g. tlb_flush_page) and therefore we might end up with a torn pointer (or who knows what, because we are under undefined behaviour). This patch converts parallel accesses to cpu->tb_jmp_cache to use atomic primitives, thereby bringing these accesses back to defined behaviour. The price to pay is to potentially execute more instructions when clearing cpu->tb_jmp_cache, but given how infrequently they happen and the small size of the cache, the performance impact I have measured is within noise range when booting debian-arm. Note that under "safe async" work (e.g. do_tb_flush) we could use memset because no other vcpus are running. However I'm keeping these accesses atomic as well to keep things simple and to avoid confusing analysis tools such as ThreadSanitizer. Reviewed-by: Paolo Bonzini <pbonzini@redhat.com> Reviewed-by: Richard Henderson <rth@twiddle.net> Signed-off-by: Emilio G. Cota <cota@braap.org> Message-Id: <1497486973-25845-1-git-send-email-cota@braap.org> Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-06-15 03:36:13 +03:00
cpu_tb_jmp_cache_clear(cpu);
tcg_flush_softmmu_tlb(cpu);
}
}
static bool cpu_common_has_work(CPUState *cs)
{
return false;
}
ObjectClass *cpu_class_by_name(const char *typename, const char *cpu_model)
{
CPUClass *cc = CPU_CLASS(object_class_by_name(typename));
assert(cpu_model && cc->class_by_name);
return cc->class_by_name(cpu_model);
}
static void cpu_common_parse_features(const char *typename, char *features,
Error **errp)
{
char *val;
static bool cpu_globals_initialized;
/* Single "key=value" string being parsed */
char *featurestr = features ? strtok(features, ",") : NULL;
/* should be called only once, catch invalid users */
assert(!cpu_globals_initialized);
cpu_globals_initialized = true;
while (featurestr) {
val = strchr(featurestr, '=');
if (val) {
GlobalProperty *prop = g_new0(typeof(*prop), 1);
*val = 0;
val++;
prop->driver = typename;
prop->property = g_strdup(featurestr);
prop->value = g_strdup(val);
qdev_prop_register_global(prop);
} else {
error_setg(errp, "Expected key=value format, found %s.",
featurestr);
return;
}
featurestr = strtok(NULL, ",");
}
}
static void cpu_common_realizefn(DeviceState *dev, Error **errp)
{
CPUState *cpu = CPU(dev);
Object *machine = qdev_get_machine();
/* qdev_get_machine() can return something that's not TYPE_MACHINE
* if this is one of the user-only emulators; in that case there's
* no need to check the ignore_memory_transaction_failures board flag.
*/
if (object_dynamic_cast(machine, TYPE_MACHINE)) {
ObjectClass *oc = object_get_class(machine);
MachineClass *mc = MACHINE_CLASS(oc);
if (mc) {
cpu->ignore_memory_transaction_failures =
mc->ignore_memory_transaction_failures;
}
}
if (dev->hotplugged) {
cpu_synchronize_post_init(cpu);
cpu_resume(cpu);
}
/* NOTE: latest generic point where the cpu is fully realized */
trace_init_vcpu(cpu);
}
qdev: Unrealize must not fail Devices may have component devices and buses. Device realization may fail. Realization is recursive: a device's realize() method realizes its components, and device_set_realized() realizes its buses (which should in turn realize the devices on that bus, except bus_set_realized() doesn't implement that, yet). When realization of a component or bus fails, we need to roll back: unrealize everything we realized so far. If any of these unrealizes failed, the device would be left in an inconsistent state. Must not happen. device_set_realized() lets it happen: it ignores errors in the roll back code starting at label child_realize_fail. Since realization is recursive, unrealization must be recursive, too. But how could a partly failed unrealize be rolled back? We'd have to re-realize, which can fail. This design is fundamentally broken. device_set_realized() does not roll back at all. Instead, it keeps unrealizing, ignoring further errors. It can screw up even for a device with no buses: if the lone dc->unrealize() fails, it still unregisters vmstate, and calls listeners' unrealize() callback. bus_set_realized() does not roll back either. Instead, it stops unrealizing. Fortunately, no unrealize method can fail, as we'll see below. To fix the design error, drop parameter @errp from all the unrealize methods. Any unrealize method that uses @errp now needs an update. This leads us to unrealize() methods that can fail. Merely passing it to another unrealize method cannot cause failure, though. Here are the ones that do other things with @errp: * virtio_serial_device_unrealize() Fails when qbus_set_hotplug_handler() fails, but still does all the other work. On failure, the device would stay realized with its resources completely gone. Oops. Can't happen, because qbus_set_hotplug_handler() can't actually fail here. Pass &error_abort to qbus_set_hotplug_handler() instead. * hw/ppc/spapr_drc.c's unrealize() Fails when object_property_del() fails, but all the other work is already done. On failure, the device would stay realized with its vmstate registration gone. Oops. Can't happen, because object_property_del() can't actually fail here. Pass &error_abort to object_property_del() instead. * spapr_phb_unrealize() Fails and bails out when remove_drcs() fails, but other work is already done. On failure, the device would stay realized with some of its resources gone. Oops. remove_drcs() fails only when chassis_from_bus()'s object_property_get_uint() fails, and it can't here. Pass &error_abort to remove_drcs() instead. Therefore, no unrealize method can fail before this patch. device_set_realized()'s recursive unrealization via bus uses object_property_set_bool(). Can't drop @errp there, so pass &error_abort. We similarly unrealize with object_property_set_bool() elsewhere, always ignoring errors. Pass &error_abort instead. Several unrealize methods no longer handle errors from other unrealize methods: virtio_9p_device_unrealize(), virtio_input_device_unrealize(), scsi_qdev_unrealize(), ... Much of the deleted error handling looks wrong anyway. One unrealize methods no longer ignore such errors: usb_ehci_pci_exit(). Several realize methods no longer ignore errors when rolling back: v9fs_device_realize_common(), pci_qdev_unrealize(), spapr_phb_realize(), usb_qdev_realize(), vfio_ccw_realize(), virtio_device_realize(). Signed-off-by: Markus Armbruster <armbru@redhat.com> Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com> Reviewed-by: Paolo Bonzini <pbonzini@redhat.com> Message-Id: <20200505152926.18877-17-armbru@redhat.com>
2020-05-05 18:29:24 +03:00
static void cpu_common_unrealizefn(DeviceState *dev)
{
CPUState *cpu = CPU(dev);
/* NOTE: latest generic point before the cpu is fully unrealized */
trace_fini_vcpu(cpu);
qemu_plugin_vcpu_exit_hook(cpu);
cpu_exec_unrealizefn(cpu);
}
static void cpu_common_initfn(Object *obj)
{
CPUState *cpu = CPU(obj);
CPUClass *cc = CPU_GET_CLASS(obj);
cpu->cpu_index = UNASSIGNED_CPU_INDEX;
cpu->cluster_index = UNASSIGNED_CLUSTER_INDEX;
cpu->gdb_num_regs = cpu->gdb_num_g_regs = cc->gdb_num_core_regs;
linux-user-i386: Fix crash on cpuid Running cpuid instructions with a simple run like: i386-linux-user/qemu-i386 tests/tcg/sha1-i386 Results in the following assert: #0 0x00007ffff64246f5 in raise () from /lib64/libc.so.6 #1 0x00007ffff64262fa in abort () from /lib64/libc.so.6 #2 0x00007ffff7937ec5 in g_assertion_message () from /lib64/libglib-2.0.so.0 #3 0x00007ffff7937f5a in g_assertion_message_expr () from /lib64/libglib-2.0.so.0 #4 0x000055555561b54c in apicid_bitwidth_for_count (count=0) at /home/elmarco/src/qemu/include/hw/i386/topology.h:58 #5 0x000055555561b58a in apicid_smt_width (nr_cores=0, nr_threads=0) at /home/elmarco/src/qemu/include/hw/i386/topology.h:67 #6 0x000055555561b5c3 in apicid_core_offset (nr_cores=0, nr_threads=0) at /home/elmarco/src/qemu/include/hw/i386/topology.h:82 #7 0x000055555561b5e3 in apicid_pkg_offset (nr_cores=0, nr_threads=0) at /home/elmarco/src/qemu/include/hw/i386/topology.h:89 #8 0x000055555561dd86 in cpu_x86_cpuid (env=0x555557999550, index=4, count=3, eax=0x7fffffffcae8, ebx=0x7fffffffcaec, ecx=0x7fffffffcaf0, edx=0x7fffffffcaf4) at /home/elmarco/src/qemu/target-i386/cpu.c:2405 #9 0x0000555555638e8e in helper_cpuid (env=0x555557999550) at /home/elmarco/src/qemu/target-i386/misc_helper.c:106 #10 0x000055555599dc5e in static_code_gen_buffer () #11 0x00005555555952f8 in cpu_tb_exec (cpu=0x5555579912d0, itb=0x7ffff4371ab0) at /home/elmarco/src/qemu/cpu-exec.c:166 #12 0x0000555555595c8e in cpu_loop_exec_tb (cpu=0x5555579912d0, tb=0x7ffff4371ab0, last_tb=0x7fffffffd088, tb_exit=0x7fffffffd084, sc=0x7fffffffd0a0) at /home/elmarco/src/qemu/cpu-exec.c:517 #13 0x0000555555595e50 in cpu_exec (cpu=0x5555579912d0) at /home/elmarco/src/qemu/cpu-exec.c:612 #14 0x00005555555c065b in cpu_loop (env=0x555557999550) at /home/elmarco/src/qemu/linux-user/main.c:297 #15 0x00005555555c25b2 in main (argc=2, argv=0x7fffffffd848, envp=0x7fffffffd860) at /home/elmarco/src/qemu/linux-user/main.c:4803 The fields are set in qemu_init_vcpu() with softmmu, but it's a stub with linux-user. Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com> Reviewed-by: Eduardo Habkost <ehabkost@redhat.com> Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2016-09-16 18:50:23 +03:00
/* *-user doesn't have configurable SMP topology */
/* the default value is changed by qemu_init_vcpu() for softmmu */
cpu->nr_cores = 1;
cpu->nr_threads = 1;
qemu_mutex_init(&cpu->work_mutex);
QSIMPLEQ_INIT(&cpu->work_list);
QTAILQ_INIT(&cpu->breakpoints);
QTAILQ_INIT(&cpu->watchpoints);
cpu_exec_initfn(cpu);
}
cpu: Convert cpu_index into a bitmap Currently CPUState::cpu_index is monotonically increasing and a newly created CPU always gets the next higher index. The next available index is calculated by counting the existing number of CPUs. This is fine as long as we only add CPUs, but there are architectures which are starting to support CPU removal, too. For an architecture like PowerPC which derives its CPU identifier (device tree ID) from cpu_index, the existing logic of generating cpu_index values causes problems. With the currently proposed method of handling vCPU removal by parking the vCPU fd in QEMU (Ref: http://lists.gnu.org/archive/html/qemu-devel/2015-02/msg02604.html), generating cpu_index this way will not work for PowerPC. This patch changes the way cpu_index is handed out by maintaining a bit map of the CPUs that tracks both addition and removal of CPUs. The CPU bitmap allocation logic is part of cpu_exec_init(), which is called by instance_init routines of various CPU targets. Newly added cpu_exec_exit() API handles the deallocation part and this routine is called from generic CPU instance_finalize. Note: This new CPU enumeration is for !CONFIG_USER_ONLY only. CONFIG_USER_ONLY continues to have the old enumeration logic. Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com> Reviewed-by: Eduardo Habkost <ehabkost@redhat.com> Reviewed-by: Igor Mammedov <imammedo@redhat.com> Reviewed-by: David Gibson <david@gibson.dropbear.id.au> Reviewed-by: Peter Crosthwaite <peter.crosthwaite@xilinx.com> Acked-by: Paolo Bonzini <pbonzini@redhat.com> Signed-off-by: Peter Crosthwaite <crosthwaite.peter@gmail.com> [AF: max_cpus -> MAX_CPUMASK_BITS] Signed-off-by: Andreas Färber <afaerber@suse.de>
2015-06-24 05:31:13 +03:00
static void cpu_common_finalize(Object *obj)
{
CPUState *cpu = CPU(obj);
qemu_mutex_destroy(&cpu->work_mutex);
cpu: Convert cpu_index into a bitmap Currently CPUState::cpu_index is monotonically increasing and a newly created CPU always gets the next higher index. The next available index is calculated by counting the existing number of CPUs. This is fine as long as we only add CPUs, but there are architectures which are starting to support CPU removal, too. For an architecture like PowerPC which derives its CPU identifier (device tree ID) from cpu_index, the existing logic of generating cpu_index values causes problems. With the currently proposed method of handling vCPU removal by parking the vCPU fd in QEMU (Ref: http://lists.gnu.org/archive/html/qemu-devel/2015-02/msg02604.html), generating cpu_index this way will not work for PowerPC. This patch changes the way cpu_index is handed out by maintaining a bit map of the CPUs that tracks both addition and removal of CPUs. The CPU bitmap allocation logic is part of cpu_exec_init(), which is called by instance_init routines of various CPU targets. Newly added cpu_exec_exit() API handles the deallocation part and this routine is called from generic CPU instance_finalize. Note: This new CPU enumeration is for !CONFIG_USER_ONLY only. CONFIG_USER_ONLY continues to have the old enumeration logic. Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com> Reviewed-by: Eduardo Habkost <ehabkost@redhat.com> Reviewed-by: Igor Mammedov <imammedo@redhat.com> Reviewed-by: David Gibson <david@gibson.dropbear.id.au> Reviewed-by: Peter Crosthwaite <peter.crosthwaite@xilinx.com> Acked-by: Paolo Bonzini <pbonzini@redhat.com> Signed-off-by: Peter Crosthwaite <crosthwaite.peter@gmail.com> [AF: max_cpus -> MAX_CPUMASK_BITS] Signed-off-by: Andreas Färber <afaerber@suse.de>
2015-06-24 05:31:13 +03:00
}
static int64_t cpu_common_get_arch_id(CPUState *cpu)
{
return cpu->cpu_index;
}
static vaddr cpu_adjust_watchpoint_address(CPUState *cpu, vaddr addr, int len)
{
return addr;
}
static void generic_handle_interrupt(CPUState *cpu, int mask)
{
cpu->interrupt_request |= mask;
if (!qemu_cpu_is_self(cpu)) {
qemu_cpu_kick(cpu);
}
}
CPUInterruptHandler cpu_interrupt_handler = generic_handle_interrupt;
static void cpu_class_init(ObjectClass *klass, void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
CPUClass *k = CPU_CLASS(klass);
k->parse_features = cpu_common_parse_features;
k->get_arch_id = cpu_common_get_arch_id;
k->has_work = cpu_common_has_work;
k->get_paging_enabled = cpu_common_get_paging_enabled;
k->get_memory_mapping = cpu_common_get_memory_mapping;
k->write_elf32_qemunote = cpu_common_write_elf32_qemunote;
k->write_elf32_note = cpu_common_write_elf32_note;
k->write_elf64_qemunote = cpu_common_write_elf64_qemunote;
k->write_elf64_note = cpu_common_write_elf64_note;
k->gdb_read_register = cpu_common_gdb_read_register;
k->gdb_write_register = cpu_common_gdb_write_register;
k->virtio_is_big_endian = cpu_common_virtio_is_big_endian;
k->debug_excp_handler = cpu_common_noop;
k->debug_check_watchpoint = cpu_common_debug_check_watchpoint;
k->cpu_exec_enter = cpu_common_noop;
k->cpu_exec_exit = cpu_common_noop;
k->cpu_exec_interrupt = cpu_common_exec_interrupt;
k->adjust_watchpoint_address = cpu_adjust_watchpoint_address;
set_bit(DEVICE_CATEGORY_CPU, dc->categories);
dc->realize = cpu_common_realizefn;
dc->unrealize = cpu_common_unrealizefn;
cpu: Use DeviceClass reset instead of a special CPUClass reset The CPUClass has a 'reset' method. This is a legacy from when TYPE_CPU used not to inherit from TYPE_DEVICE. We don't need it any more, as we can simply use the TYPE_DEVICE reset. The 'cpu_reset()' function is kept as the API which most places use to reset a CPU; it is now a wrapper which calls device_cold_reset() and then the tracepoint function. This change should not cause CPU objects to be reset more often than they are at the moment, because: * nobody is directly calling device_cold_reset() or qdev_reset_all() on CPU objects * no CPU object is on a qbus, so they will not be reset either by somebody calling qbus_reset_all()/bus_cold_reset(), or by the main "reset sysbus and everything in the qbus tree" reset that most devices are reset by Note that this does not change the need for each machine or whatever to use qemu_register_reset() to arrange to call cpu_reset() -- that is necessary because CPU objects are not on any qbus, so they don't get reset when the qbus tree rooted at the sysbus bus is reset, and this isn't being changed here. All the changes to the files under target/ were made using the included Coccinelle script, except: (1) the deletion of the now-inaccurate and not terribly useful "CPUClass::reset" comments was done with a perl one-liner afterwards: perl -n -i -e '/ CPUClass::reset/ or print' target/*/*.c (2) this bit of the s390 change was done by hand, because the Coccinelle script is not sophisticated enough to handle the parent_reset call being inside another function: | @@ -96,8 +96,9 @@ static void s390_cpu_reset(CPUState *s, cpu_reset_type type) | S390CPU *cpu = S390_CPU(s); | S390CPUClass *scc = S390_CPU_GET_CLASS(cpu); | CPUS390XState *env = &cpu->env; |+ DeviceState *dev = DEVICE(s); | |- scc->parent_reset(s); |+ scc->parent_reset(dev); | cpu->env.sigp_order = 0; | s390_cpu_set_state(S390_CPU_STATE_STOPPED, cpu); Signed-off-by: Peter Maydell <peter.maydell@linaro.org> Message-Id: <20200303100511.5498-1-peter.maydell@linaro.org> Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Tested-by: Philippe Mathieu-Daudé <philmd@redhat.com> Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2020-03-03 13:05:11 +03:00
dc->reset = cpu_common_reset;
device_class_set_props(dc, cpu_common_props);
/*
* Reason: CPUs still need special care by board code: wiring up
* IRQs, adding reset handlers, halting non-first CPUs, ...
*/
qdev: Replace cannot_instantiate_with_device_add_yet with !user_creatable cannot_instantiate_with_device_add_yet was introduced by commit efec3dd631d94160288392721a5f9c39e50fb2bc to replace no_user. It was supposed to be a temporary measure. When it was introduced, we had 54 cannot_instantiate_with_device_add_yet=true lines in the code. Today (3 years later) this number has not shrunk: we now have 57 cannot_instantiate_with_device_add_yet=true lines. I think it is safe to say it is not a temporary measure, and we won't see the flag go away soon. Instead of a long field name that misleads people to believe it is temporary, replace it a shorter and less misleading field: user_creatable. Except for code comments, changes were generated using the following Coccinelle patch: @@ expression DC; @@ ( -DC->cannot_instantiate_with_device_add_yet = false; +DC->user_creatable = true; | -DC->cannot_instantiate_with_device_add_yet = true; +DC->user_creatable = false; ) @@ typedef ObjectClass; expression dc; identifier class, data; @@ static void device_class_init(ObjectClass *class, void *data) { ... dc->hotpluggable = true; +dc->user_creatable = true; ... } @@ @@ struct DeviceClass { ... -bool cannot_instantiate_with_device_add_yet; +bool user_creatable; ... } @@ expression DC; @@ ( -!DC->cannot_instantiate_with_device_add_yet +DC->user_creatable | -DC->cannot_instantiate_with_device_add_yet +!DC->user_creatable ) Cc: Alistair Francis <alistair.francis@xilinx.com> Cc: Laszlo Ersek <lersek@redhat.com> Cc: Marcel Apfelbaum <marcel@redhat.com> Cc: Markus Armbruster <armbru@redhat.com> Cc: Peter Maydell <peter.maydell@linaro.org> Cc: Thomas Huth <thuth@redhat.com> Acked-by: Alistair Francis <alistair.francis@xilinx.com> Reviewed-by: Thomas Huth <thuth@redhat.com> Reviewed-by: Marcel Apfelbaum <marcel@redhat.com> Acked-by: Marcel Apfelbaum <marcel@redhat.com> Signed-off-by: Eduardo Habkost <ehabkost@redhat.com> Message-Id: <20170503203604.31462-2-ehabkost@redhat.com> [ehabkost: kept "TODO remove once we're there" comment] Reviewed-by: Markus Armbruster <armbru@redhat.com> Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2017-05-03 23:35:44 +03:00
dc->user_creatable = false;
}
static const TypeInfo cpu_type_info = {
.name = TYPE_CPU,
.parent = TYPE_DEVICE,
.instance_size = sizeof(CPUState),
.instance_init = cpu_common_initfn,
cpu: Convert cpu_index into a bitmap Currently CPUState::cpu_index is monotonically increasing and a newly created CPU always gets the next higher index. The next available index is calculated by counting the existing number of CPUs. This is fine as long as we only add CPUs, but there are architectures which are starting to support CPU removal, too. For an architecture like PowerPC which derives its CPU identifier (device tree ID) from cpu_index, the existing logic of generating cpu_index values causes problems. With the currently proposed method of handling vCPU removal by parking the vCPU fd in QEMU (Ref: http://lists.gnu.org/archive/html/qemu-devel/2015-02/msg02604.html), generating cpu_index this way will not work for PowerPC. This patch changes the way cpu_index is handed out by maintaining a bit map of the CPUs that tracks both addition and removal of CPUs. The CPU bitmap allocation logic is part of cpu_exec_init(), which is called by instance_init routines of various CPU targets. Newly added cpu_exec_exit() API handles the deallocation part and this routine is called from generic CPU instance_finalize. Note: This new CPU enumeration is for !CONFIG_USER_ONLY only. CONFIG_USER_ONLY continues to have the old enumeration logic. Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com> Reviewed-by: Eduardo Habkost <ehabkost@redhat.com> Reviewed-by: Igor Mammedov <imammedo@redhat.com> Reviewed-by: David Gibson <david@gibson.dropbear.id.au> Reviewed-by: Peter Crosthwaite <peter.crosthwaite@xilinx.com> Acked-by: Paolo Bonzini <pbonzini@redhat.com> Signed-off-by: Peter Crosthwaite <crosthwaite.peter@gmail.com> [AF: max_cpus -> MAX_CPUMASK_BITS] Signed-off-by: Andreas Färber <afaerber@suse.de>
2015-06-24 05:31:13 +03:00
.instance_finalize = cpu_common_finalize,
.abstract = true,
.class_size = sizeof(CPUClass),
.class_init = cpu_class_init,
};
static void cpu_register_types(void)
{
type_register_static(&cpu_type_info);
}
type_init(cpu_register_types)