qemu/cpus.c

1974 lines
52 KiB
C
Raw Normal View History

/*
* QEMU System Emulator
*
* Copyright (c) 2003-2008 Fabrice Bellard
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
/* Needed early for CONFIG_BSD etc. */
#include "qemu/osdep.h"
#include "qemu-common.h"
#include "qemu/config-file.h"
#include "cpu.h"
#include "monitor/monitor.h"
#include "qapi/qmp/qerror.h"
#include "qemu/error-report.h"
#include "sysemu/sysemu.h"
#include "sysemu/block-backend.h"
#include "exec/gdbstub.h"
#include "sysemu/dma.h"
#include "sysemu/hw_accel.h"
#include "sysemu/kvm.h"
#include "sysemu/hax.h"
#include "qmp-commands.h"
#include "exec/exec-all.h"
#include "qemu/thread.h"
#include "sysemu/cpus.h"
#include "sysemu/qtest.h"
#include "qemu/main-loop.h"
#include "qemu/bitmap.h"
#include "qemu/seqlock.h"
#include "tcg.h"
#include "qapi-event.h"
#include "hw/nmi.h"
#include "sysemu/replay.h"
#ifdef CONFIG_LINUX
#include <sys/prctl.h>
#ifndef PR_MCE_KILL
#define PR_MCE_KILL 33
#endif
#ifndef PR_MCE_KILL_SET
#define PR_MCE_KILL_SET 1
#endif
#ifndef PR_MCE_KILL_EARLY
#define PR_MCE_KILL_EARLY 1
#endif
#endif /* CONFIG_LINUX */
int64_t max_delay;
int64_t max_advance;
/* vcpu throttling controls */
static QEMUTimer *throttle_timer;
static unsigned int throttle_percentage;
#define CPU_THROTTLE_PCT_MIN 1
#define CPU_THROTTLE_PCT_MAX 99
#define CPU_THROTTLE_TIMESLICE_NS 10000000
bool cpu_is_stopped(CPUState *cpu)
{
return cpu->stopped || !runstate_is_running();
}
static bool cpu_thread_is_idle(CPUState *cpu)
{
if (cpu->stop || cpu->queued_work_first) {
return false;
}
if (cpu_is_stopped(cpu)) {
return true;
}
if (!cpu->halted || cpu_has_work(cpu) ||
kvm_halt_in_kernel()) {
return false;
}
return true;
}
static bool all_cpu_threads_idle(void)
{
CPUState *cpu;
CPU_FOREACH(cpu) {
if (!cpu_thread_is_idle(cpu)) {
return false;
}
}
return true;
}
/***********************************************************/
/* guest cycle counter */
/* Protected by TimersState seqlock */
static bool icount_sleep = true;
static int64_t vm_clock_warp_start = -1;
/* Conversion factor from emulated instructions to virtual clock ticks. */
static int icount_time_shift;
/* Arbitrarily pick 1MIPS as the minimum allowable speed. */
#define MAX_ICOUNT_SHIFT 10
static QEMUTimer *icount_rt_timer;
static QEMUTimer *icount_vm_timer;
static QEMUTimer *icount_warp_timer;
typedef struct TimersState {
/* Protected by BQL. */
int64_t cpu_ticks_prev;
int64_t cpu_ticks_offset;
/* cpu_clock_offset can be read out of BQL, so protect it with
* this lock.
*/
QemuSeqLock vm_clock_seqlock;
int64_t cpu_clock_offset;
int32_t cpu_ticks_enabled;
int64_t dummy;
/* Compensate for varying guest execution speed. */
int64_t qemu_icount_bias;
/* Only written by TCG thread */
int64_t qemu_icount;
} TimersState;
static TimersState timers_state;
bool mttcg_enabled;
/*
* We default to false if we know other options have been enabled
* which are currently incompatible with MTTCG. Otherwise when each
* guest (target) has been updated to support:
* - atomic instructions
* - memory ordering primitives (barriers)
* they can set the appropriate CONFIG flags in ${target}-softmmu.mak
*
* Once a guest architecture has been converted to the new primitives
* there are two remaining limitations to check.
*
* - The guest can't be oversized (e.g. 64 bit guest on 32 bit host)
* - The host must have a stronger memory order than the guest
*
* It may be possible in future to support strong guests on weak hosts
* but that will require tagging all load/stores in a guest with their
* implicit memory order requirements which would likely slow things
* down a lot.
*/
static bool check_tcg_memory_orders_compatible(void)
{
#if defined(TCG_GUEST_DEFAULT_MO) && defined(TCG_TARGET_DEFAULT_MO)
return (TCG_GUEST_DEFAULT_MO & ~TCG_TARGET_DEFAULT_MO) == 0;
#else
return false;
#endif
}
static bool default_mttcg_enabled(void)
{
if (use_icount || TCG_OVERSIZED_GUEST) {
return false;
} else {
#ifdef TARGET_SUPPORTS_MTTCG
return check_tcg_memory_orders_compatible();
#else
return false;
#endif
}
}
void qemu_tcg_configure(QemuOpts *opts, Error **errp)
{
const char *t = qemu_opt_get(opts, "thread");
if (t) {
if (strcmp(t, "multi") == 0) {
if (TCG_OVERSIZED_GUEST) {
error_setg(errp, "No MTTCG when guest word size > hosts");
} else if (use_icount) {
error_setg(errp, "No MTTCG when icount is enabled");
} else {
#ifndef TARGET_SUPPORT_MTTCG
error_report("Guest not yet converted to MTTCG - "
"you may get unexpected results");
#endif
if (!check_tcg_memory_orders_compatible()) {
error_report("Guest expects a stronger memory ordering "
"than the host provides");
error_printf("This may cause strange/hard to debug errors\n");
}
mttcg_enabled = true;
}
} else if (strcmp(t, "single") == 0) {
mttcg_enabled = false;
} else {
error_setg(errp, "Invalid 'thread' setting %s", t);
}
} else {
mttcg_enabled = default_mttcg_enabled();
}
}
int64_t cpu_get_icount_raw(void)
{
int64_t icount;
CPUState *cpu = current_cpu;
icount = timers_state.qemu_icount;
if (cpu) {
if (!cpu->can_do_io) {
fprintf(stderr, "Bad icount read\n");
exit(1);
}
icount -= (cpu->icount_decr.u16.low + cpu->icount_extra);
}
return icount;
}
/* Return the virtual CPU time, based on the instruction counter. */
static int64_t cpu_get_icount_locked(void)
{
int64_t icount = cpu_get_icount_raw();
return timers_state.qemu_icount_bias + cpu_icount_to_ns(icount);
}
int64_t cpu_get_icount(void)
{
int64_t icount;
unsigned start;
do {
start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
icount = cpu_get_icount_locked();
} while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
return icount;
}
int64_t cpu_icount_to_ns(int64_t icount)
{
return icount << icount_time_shift;
}
/* return the time elapsed in VM between vm_start and vm_stop. Unless
* icount is active, cpu_get_ticks() uses units of the host CPU cycle
* counter.
*
* Caller must hold the BQL
*/
int64_t cpu_get_ticks(void)
{
int64_t ticks;
if (use_icount) {
return cpu_get_icount();
}
ticks = timers_state.cpu_ticks_offset;
if (timers_state.cpu_ticks_enabled) {
ticks += cpu_get_host_ticks();
}
if (timers_state.cpu_ticks_prev > ticks) {
/* Note: non increasing ticks may happen if the host uses
software suspend */
timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
ticks = timers_state.cpu_ticks_prev;
}
timers_state.cpu_ticks_prev = ticks;
return ticks;
}
static int64_t cpu_get_clock_locked(void)
{
int64_t time;
time = timers_state.cpu_clock_offset;
if (timers_state.cpu_ticks_enabled) {
time += get_clock();
}
return time;
}
/* Return the monotonic time elapsed in VM, i.e.,
* minor patches here and there * MTTCG: lock-free TB lookup * SCSI: bugfixes for MPTSAS, MegaSAS, LSI53c, vmw_pvscsi * buffer_is_zero rewrite (except for one patch) * chardev: qemu_chr_fe_write checks * checkpatch improvement for markdown preformatted text * default-configs cleanups * atomics cleanups -----BEGIN PGP SIGNATURE----- Version: GnuPG v2.0.22 (GNU/Linux) iQEcBAABAgAGBQJX2DP2AAoJEL/70l94x66DIBYH/2pW+/HYexCobNn9eVD0Wm08 im0mRHIU0vjfTaeZSasJPXvA2FyYQLl9KnSFvUFcRiLILpp+hE3QdZ8o0QGlfAmE +5MWsPJDXMbOaCOfMKZpZvPfJ6q/lSTg6eiJTPiRgyU7fQgjMDAot1s44ETYGVRu myeheEvjSwm/aT9sRIUK6KC7LWXGHFYRYzYJDnvoN6svHZ10DcEDhve8bdmixFk0 0zUY4RmPk8n46SntDG65tgAlKlzfSuPOesvbpcQIYe1H+r+uJt9BST7MjKdbdDQv b/LDzMx8CTbd2tDPL6JWgjBGBZ6SZ4Q6x0a45kzJRtkS+BPtNeGGzBVwULVN4RY= =eAJS -----END PGP SIGNATURE----- Merge remote-tracking branch 'remotes/bonzini/tags/for-upstream' into staging * minor patches here and there * MTTCG: lock-free TB lookup * SCSI: bugfixes for MPTSAS, MegaSAS, LSI53c, vmw_pvscsi * buffer_is_zero rewrite (except for one patch) * chardev: qemu_chr_fe_write checks * checkpatch improvement for markdown preformatted text * default-configs cleanups * atomics cleanups # gpg: Signature made Tue 13 Sep 2016 18:14:30 BST # gpg: using RSA key 0xBFFBD25F78C7AE83 # gpg: Good signature from "Paolo Bonzini <bonzini@gnu.org>" # gpg: aka "Paolo Bonzini <pbonzini@redhat.com>" # Primary key fingerprint: 46F5 9FBD 57D6 12E7 BFD4 E2F7 7E15 100C CD36 69B1 # Subkey fingerprint: F133 3857 4B66 2389 866C 7682 BFFB D25F 78C7 AE83 * remotes/bonzini/tags/for-upstream: (58 commits) cutils: Add generic prefetch cutils: Add SSE4 version cutils: Add test for buffer_is_zero cutils: Remove ppc buffer zero checking cutils: Remove aarch64 buffer zero checking cutils: Rearrange buffer_is_zero acceleration cutils: Export only buffer_is_zero cutils: Remove SPLAT macro cutils: Move buffer_is_zero and subroutines to a new file ppc: do not redefine CPUPPCState x86/lapic: Load LAPIC state at post_load optionrom: do not rely on compiler's bswap optimization checkpatch: Fix whitespace checks for documentation code blocks atomics: Use __atomic_*_n() variant primitives atomics: Remove redundant barrier()'s kvm-all: drop kvm_setup_guest_memory i8257: Make device "i8257" unavailable with -device Revert "megasas: remove useless check for cmd->frame" char: convert qemu_chr_fe_write to qemu_chr_fe_write_all hw: replace most use of qemu_chr_fe_write with qemu_chr_fe_write_all ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org> Conflicts: cpus.c tests/Makefile.include
2016-09-15 12:24:22 +03:00
* the time between vm_start and vm_stop
*/
int64_t cpu_get_clock(void)
{
int64_t ti;
unsigned start;
do {
start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
ti = cpu_get_clock_locked();
} while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
return ti;
}
/* enable cpu_get_ticks()
* Caller must hold BQL which serves as mutex for vm_clock_seqlock.
*/
void cpu_enable_ticks(void)
{
/* Here, the really thing protected by seqlock is cpu_clock_offset. */
seqlock_write_begin(&timers_state.vm_clock_seqlock);
if (!timers_state.cpu_ticks_enabled) {
timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
timers_state.cpu_clock_offset -= get_clock();
timers_state.cpu_ticks_enabled = 1;
}
seqlock_write_end(&timers_state.vm_clock_seqlock);
}
/* disable cpu_get_ticks() : the clock is stopped. You must not call
* cpu_get_ticks() after that.
* Caller must hold BQL which serves as mutex for vm_clock_seqlock.
*/
void cpu_disable_ticks(void)
{
/* Here, the really thing protected by seqlock is cpu_clock_offset. */
seqlock_write_begin(&timers_state.vm_clock_seqlock);
if (timers_state.cpu_ticks_enabled) {
timers_state.cpu_ticks_offset += cpu_get_host_ticks();
timers_state.cpu_clock_offset = cpu_get_clock_locked();
timers_state.cpu_ticks_enabled = 0;
}
seqlock_write_end(&timers_state.vm_clock_seqlock);
}
/* Correlation between real and virtual time is always going to be
fairly approximate, so ignore small variation.
When the guest is idle real and virtual time will be aligned in
the IO wait loop. */
#define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
static void icount_adjust(void)
{
int64_t cur_time;
int64_t cur_icount;
int64_t delta;
/* Protected by TimersState mutex. */
static int64_t last_delta;
/* If the VM is not running, then do nothing. */
if (!runstate_is_running()) {
return;
}
seqlock_write_begin(&timers_state.vm_clock_seqlock);
cur_time = cpu_get_clock_locked();
cur_icount = cpu_get_icount_locked();
delta = cur_icount - cur_time;
/* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
if (delta > 0
&& last_delta + ICOUNT_WOBBLE < delta * 2
&& icount_time_shift > 0) {
/* The guest is getting too far ahead. Slow time down. */
icount_time_shift--;
}
if (delta < 0
&& last_delta - ICOUNT_WOBBLE > delta * 2
&& icount_time_shift < MAX_ICOUNT_SHIFT) {
/* The guest is getting too far behind. Speed time up. */
icount_time_shift++;
}
last_delta = delta;
timers_state.qemu_icount_bias = cur_icount
- (timers_state.qemu_icount << icount_time_shift);
seqlock_write_end(&timers_state.vm_clock_seqlock);
}
static void icount_adjust_rt(void *opaque)
{
timer_mod(icount_rt_timer,
qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
icount_adjust();
}
static void icount_adjust_vm(void *opaque)
{
timer_mod(icount_vm_timer,
qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
NANOSECONDS_PER_SECOND / 10);
icount_adjust();
}
static int64_t qemu_icount_round(int64_t count)
{
return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
}
static void icount_warp_rt(void)
{
unsigned seq;
int64_t warp_start;
/* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
* changes from -1 to another value, so the race here is okay.
*/
do {
seq = seqlock_read_begin(&timers_state.vm_clock_seqlock);
warp_start = vm_clock_warp_start;
} while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq));
if (warp_start == -1) {
return;
}
seqlock_write_begin(&timers_state.vm_clock_seqlock);
if (runstate_is_running()) {
int64_t clock = REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT,
cpu_get_clock_locked());
int64_t warp_delta;
warp_delta = clock - vm_clock_warp_start;
if (use_icount == 2) {
/*
* In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
* far ahead of real time.
*/
int64_t cur_icount = cpu_get_icount_locked();
int64_t delta = clock - cur_icount;
warp_delta = MIN(warp_delta, delta);
}
timers_state.qemu_icount_bias += warp_delta;
}
vm_clock_warp_start = -1;
seqlock_write_end(&timers_state.vm_clock_seqlock);
if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
}
}
static void icount_timer_cb(void *opaque)
{
/* No need for a checkpoint because the timer already synchronizes
* with CHECKPOINT_CLOCK_VIRTUAL_RT.
*/
icount_warp_rt();
}
void qtest_clock_warp(int64_t dest)
{
int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
AioContext *aio_context;
assert(qtest_enabled());
aio_context = qemu_get_aio_context();
while (clock < dest) {
int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
seqlock_write_begin(&timers_state.vm_clock_seqlock);
timers_state.qemu_icount_bias += warp;
seqlock_write_end(&timers_state.vm_clock_seqlock);
qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
}
qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
}
void qemu_start_warp_timer(void)
{
int64_t clock;
int64_t deadline;
if (!use_icount) {
return;
}
/* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
* do not fire, so computing the deadline does not make sense.
*/
if (!runstate_is_running()) {
return;
}
/* warp clock deterministically in record/replay mode */
if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
return;
}
if (!all_cpu_threads_idle()) {
return;
}
if (qtest_enabled()) {
/* When testing, qtest commands advance icount. */
return;
}
/* We want to use the earliest deadline from ALL vm_clocks */
clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
if (deadline < 0) {
static bool notified;
if (!icount_sleep && !notified) {
error_report("WARNING: icount sleep disabled and no active timers");
notified = true;
}
return;
}
if (deadline > 0) {
/*
* Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
* sleep. Otherwise, the CPU might be waiting for a future timer
* interrupt to wake it up, but the interrupt never comes because
* the vCPU isn't running any insns and thus doesn't advance the
* QEMU_CLOCK_VIRTUAL.
*/
if (!icount_sleep) {
/*
* We never let VCPUs sleep in no sleep icount mode.
* If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
* to the next QEMU_CLOCK_VIRTUAL event and notify it.
* It is useful when we want a deterministic execution time,
* isolated from host latencies.
*/
seqlock_write_begin(&timers_state.vm_clock_seqlock);
timers_state.qemu_icount_bias += deadline;
seqlock_write_end(&timers_state.vm_clock_seqlock);
qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
} else {
/*
* We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
* "real" time, (related to the time left until the next event) has
* passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
* This avoids that the warps are visible externally; for example,
* you will not be sending network packets continuously instead of
* every 100ms.
*/
seqlock_write_begin(&timers_state.vm_clock_seqlock);
if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
vm_clock_warp_start = clock;
}
seqlock_write_end(&timers_state.vm_clock_seqlock);
timer_mod_anticipate(icount_warp_timer, clock + deadline);
}
} else if (deadline == 0) {
qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
}
}
static void qemu_account_warp_timer(void)
{
if (!use_icount || !icount_sleep) {
return;
}
/* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
* do not fire, so computing the deadline does not make sense.
*/
if (!runstate_is_running()) {
return;
}
/* warp clock deterministically in record/replay mode */
if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
return;
}
timer_del(icount_warp_timer);
icount_warp_rt();
}
static bool icount_state_needed(void *opaque)
{
return use_icount;
}
/*
* This is a subsection for icount migration.
*/
static const VMStateDescription icount_vmstate_timers = {
.name = "timer/icount",
.version_id = 1,
.minimum_version_id = 1,
.needed = icount_state_needed,
.fields = (VMStateField[]) {
VMSTATE_INT64(qemu_icount_bias, TimersState),
VMSTATE_INT64(qemu_icount, TimersState),
VMSTATE_END_OF_LIST()
}
};
static const VMStateDescription vmstate_timers = {
.name = "timer",
.version_id = 2,
.minimum_version_id = 1,
.fields = (VMStateField[]) {
VMSTATE_INT64(cpu_ticks_offset, TimersState),
VMSTATE_INT64(dummy, TimersState),
VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
VMSTATE_END_OF_LIST()
},
.subsections = (const VMStateDescription*[]) {
&icount_vmstate_timers,
NULL
}
};
static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque)
{
double pct;
double throttle_ratio;
long sleeptime_ns;
if (!cpu_throttle_get_percentage()) {
return;
}
pct = (double)cpu_throttle_get_percentage()/100;
throttle_ratio = pct / (1 - pct);
sleeptime_ns = (long)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS);
qemu_mutex_unlock_iothread();
atomic_set(&cpu->throttle_thread_scheduled, 0);
g_usleep(sleeptime_ns / 1000); /* Convert ns to us for usleep call */
qemu_mutex_lock_iothread();
}
static void cpu_throttle_timer_tick(void *opaque)
{
CPUState *cpu;
double pct;
/* Stop the timer if needed */
if (!cpu_throttle_get_percentage()) {
return;
}
CPU_FOREACH(cpu) {
if (!atomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
async_run_on_cpu(cpu, cpu_throttle_thread,
RUN_ON_CPU_NULL);
}
}
pct = (double)cpu_throttle_get_percentage()/100;
timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
CPU_THROTTLE_TIMESLICE_NS / (1-pct));
}
void cpu_throttle_set(int new_throttle_pct)
{
/* Ensure throttle percentage is within valid range */
new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX);
new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN);
atomic_set(&throttle_percentage, new_throttle_pct);
timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
CPU_THROTTLE_TIMESLICE_NS);
}
void cpu_throttle_stop(void)
{
atomic_set(&throttle_percentage, 0);
}
bool cpu_throttle_active(void)
{
return (cpu_throttle_get_percentage() != 0);
}
int cpu_throttle_get_percentage(void)
{
return atomic_read(&throttle_percentage);
}
void cpu_ticks_init(void)
{
seqlock_init(&timers_state.vm_clock_seqlock);
vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
cpu_throttle_timer_tick, NULL);
}
void configure_icount(QemuOpts *opts, Error **errp)
{
const char *option;
char *rem_str = NULL;
option = qemu_opt_get(opts, "shift");
if (!option) {
if (qemu_opt_get(opts, "align") != NULL) {
error_setg(errp, "Please specify shift option when using align");
}
return;
}
icount_sleep = qemu_opt_get_bool(opts, "sleep", true);
if (icount_sleep) {
icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
icount_timer_cb, NULL);
}
icount_align_option = qemu_opt_get_bool(opts, "align", false);
if (icount_align_option && !icount_sleep) {
error_setg(errp, "align=on and sleep=off are incompatible");
}
if (strcmp(option, "auto") != 0) {
errno = 0;
icount_time_shift = strtol(option, &rem_str, 0);
if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
error_setg(errp, "icount: Invalid shift value");
}
use_icount = 1;
return;
} else if (icount_align_option) {
error_setg(errp, "shift=auto and align=on are incompatible");
} else if (!icount_sleep) {
error_setg(errp, "shift=auto and sleep=off are incompatible");
}
use_icount = 2;
/* 125MIPS seems a reasonable initial guess at the guest speed.
It will be corrected fairly quickly anyway. */
icount_time_shift = 3;
/* Have both realtime and virtual time triggers for speed adjustment.
The realtime trigger catches emulated time passing too slowly,
the virtual time trigger catches emulated time passing too fast.
Realtime triggers occur even when idle, so use them less frequently
than VM triggers. */
icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
icount_adjust_rt, NULL);
timer_mod(icount_rt_timer,
qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
icount_adjust_vm, NULL);
timer_mod(icount_vm_timer,
qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
NANOSECONDS_PER_SECOND / 10);
}
/***********************************************************/
/* TCG vCPU kick timer
*
* The kick timer is responsible for moving single threaded vCPU
* emulation on to the next vCPU. If more than one vCPU is running a
* timer event with force a cpu->exit so the next vCPU can get
* scheduled.
*
* The timer is removed if all vCPUs are idle and restarted again once
* idleness is complete.
*/
static QEMUTimer *tcg_kick_vcpu_timer;
static CPUState *tcg_current_rr_cpu;
#define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
static inline int64_t qemu_tcg_next_kick(void)
{
return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + TCG_KICK_PERIOD;
}
/* Kick the currently round-robin scheduled vCPU */
static void qemu_cpu_kick_rr_cpu(void)
{
CPUState *cpu;
do {
cpu = atomic_mb_read(&tcg_current_rr_cpu);
if (cpu) {
cpu_exit(cpu);
}
} while (cpu != atomic_mb_read(&tcg_current_rr_cpu));
}
icount: process QEMU_CLOCK_VIRTUAL timers in vCPU thread icount has become much slower after tcg_cpu_exec has stopped using the BQL. There is also a latent bug that is masked by the slowness. The slowness happens because every occurrence of a QEMU_CLOCK_VIRTUAL timer now has to wake up the I/O thread and wait for it. The rendez-vous is mediated by the BQL QemuMutex: - handle_icount_deadline wakes up the I/O thread with BQL taken - the I/O thread wakes up and waits on the BQL - the VCPU thread releases the BQL a little later - the I/O thread raises an interrupt, which calls qemu_cpu_kick - the VCPU thread notices the interrupt, takes the BQL to process it and waits on it All this back and forth is extremely expensive, causing a 6 to 8-fold slowdown when icount is turned on. One may think that the issue is that the VCPU thread is too dependent on the BQL, but then the latent bug comes in. I first tried removing the BQL completely from the x86 cpu_exec, only to see everything break. The only way to fix it (and make everything slow again) was to add a dummy BQL lock/unlock pair. This is because in -icount mode you really have to process the events before the CPU restarts executing the next instruction. Therefore, this series moves the processing of QEMU_CLOCK_VIRTUAL timers straight in the vCPU thread when running in icount mode. The required changes include: - make the timer notification callback wake up TCG's single vCPU thread when run from another thread. By using async_run_on_cpu, the callback can override all_cpu_threads_idle() when the CPU is halted. - move handle_icount_deadline after qemu_tcg_wait_io_event, so that the timer notification callback is invoked after the dummy work item wakes up the vCPU thread - make handle_icount_deadline run the timers instead of just waking the I/O thread. - stop processing the timers in the main loop Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-03-02 21:56:40 +03:00
static void do_nothing(CPUState *cpu, run_on_cpu_data unused)
{
}
void qemu_timer_notify_cb(void *opaque, QEMUClockType type)
{
icount: process QEMU_CLOCK_VIRTUAL timers in vCPU thread icount has become much slower after tcg_cpu_exec has stopped using the BQL. There is also a latent bug that is masked by the slowness. The slowness happens because every occurrence of a QEMU_CLOCK_VIRTUAL timer now has to wake up the I/O thread and wait for it. The rendez-vous is mediated by the BQL QemuMutex: - handle_icount_deadline wakes up the I/O thread with BQL taken - the I/O thread wakes up and waits on the BQL - the VCPU thread releases the BQL a little later - the I/O thread raises an interrupt, which calls qemu_cpu_kick - the VCPU thread notices the interrupt, takes the BQL to process it and waits on it All this back and forth is extremely expensive, causing a 6 to 8-fold slowdown when icount is turned on. One may think that the issue is that the VCPU thread is too dependent on the BQL, but then the latent bug comes in. I first tried removing the BQL completely from the x86 cpu_exec, only to see everything break. The only way to fix it (and make everything slow again) was to add a dummy BQL lock/unlock pair. This is because in -icount mode you really have to process the events before the CPU restarts executing the next instruction. Therefore, this series moves the processing of QEMU_CLOCK_VIRTUAL timers straight in the vCPU thread when running in icount mode. The required changes include: - make the timer notification callback wake up TCG's single vCPU thread when run from another thread. By using async_run_on_cpu, the callback can override all_cpu_threads_idle() when the CPU is halted. - move handle_icount_deadline after qemu_tcg_wait_io_event, so that the timer notification callback is invoked after the dummy work item wakes up the vCPU thread - make handle_icount_deadline run the timers instead of just waking the I/O thread. - stop processing the timers in the main loop Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-03-02 21:56:40 +03:00
if (!use_icount || type != QEMU_CLOCK_VIRTUAL) {
qemu_notify_event();
return;
}
if (!qemu_in_vcpu_thread() && first_cpu) {
/* qemu_cpu_kick is not enough to kick a halted CPU out of
* qemu_tcg_wait_io_event. async_run_on_cpu, instead,
* causes cpu_thread_is_idle to return false. This way,
* handle_icount_deadline can run.
*/
async_run_on_cpu(first_cpu, do_nothing, RUN_ON_CPU_NULL);
}
}
static void kick_tcg_thread(void *opaque)
{
timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
qemu_cpu_kick_rr_cpu();
}
static void start_tcg_kick_timer(void)
{
if (!mttcg_enabled && !tcg_kick_vcpu_timer && CPU_NEXT(first_cpu)) {
tcg_kick_vcpu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
kick_tcg_thread, NULL);
timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
}
}
static void stop_tcg_kick_timer(void)
{
if (tcg_kick_vcpu_timer) {
timer_del(tcg_kick_vcpu_timer);
tcg_kick_vcpu_timer = NULL;
}
}
/***********************************************************/
void hw_error(const char *fmt, ...)
{
va_list ap;
CPUState *cpu;
va_start(ap, fmt);
fprintf(stderr, "qemu: hardware error: ");
vfprintf(stderr, fmt, ap);
fprintf(stderr, "\n");
CPU_FOREACH(cpu) {
fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
}
va_end(ap);
abort();
}
void cpu_synchronize_all_states(void)
{
CPUState *cpu;
CPU_FOREACH(cpu) {
cpu_synchronize_state(cpu);
}
}
void cpu_synchronize_all_post_reset(void)
{
CPUState *cpu;
CPU_FOREACH(cpu) {
cpu_synchronize_post_reset(cpu);
}
}
void cpu_synchronize_all_post_init(void)
{
CPUState *cpu;
CPU_FOREACH(cpu) {
cpu_synchronize_post_init(cpu);
}
}
static int do_vm_stop(RunState state)
{
int ret = 0;
if (runstate_is_running()) {
cpu_disable_ticks();
pause_all_vcpus();
runstate_set(state);
vm_state_notify(0, state);
qapi_event_send_stop(&error_abort);
}
bdrv_drain_all();
replay_disable_events();
ret = bdrv_flush_all();
return ret;
}
static bool cpu_can_run(CPUState *cpu)
{
if (cpu->stop) {
return false;
}
if (cpu_is_stopped(cpu)) {
return false;
}
return true;
}
static void cpu_handle_guest_debug(CPUState *cpu)
{
gdb_set_stop_cpu(cpu);
qemu_system_debug_request();
cpu->stopped = true;
}
#ifdef CONFIG_LINUX
static void sigbus_reraise(void)
{
sigset_t set;
struct sigaction action;
memset(&action, 0, sizeof(action));
action.sa_handler = SIG_DFL;
if (!sigaction(SIGBUS, &action, NULL)) {
raise(SIGBUS);
sigemptyset(&set);
sigaddset(&set, SIGBUS);
pthread_sigmask(SIG_UNBLOCK, &set, NULL);
}
perror("Failed to re-raise SIGBUS!\n");
abort();
}
static void sigbus_handler(int n, siginfo_t *siginfo, void *ctx)
{
if (siginfo->si_code != BUS_MCEERR_AO && siginfo->si_code != BUS_MCEERR_AR) {
sigbus_reraise();
}
if (current_cpu) {
/* Called asynchronously in VCPU thread. */
if (kvm_on_sigbus_vcpu(current_cpu, siginfo->si_code, siginfo->si_addr)) {
sigbus_reraise();
}
} else {
/* Called synchronously (via signalfd) in main thread. */
if (kvm_on_sigbus(siginfo->si_code, siginfo->si_addr)) {
sigbus_reraise();
}
}
}
static void qemu_init_sigbus(void)
{
struct sigaction action;
memset(&action, 0, sizeof(action));
action.sa_flags = SA_SIGINFO;
action.sa_sigaction = sigbus_handler;
sigaction(SIGBUS, &action, NULL);
prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
}
#else /* !CONFIG_LINUX */
static void qemu_init_sigbus(void)
{
}
#endif /* !CONFIG_LINUX */
static QemuMutex qemu_global_mutex;
static QemuThread io_thread;
/* cpu creation */
static QemuCond qemu_cpu_cond;
/* system init */
static QemuCond qemu_pause_cond;
void qemu_init_cpu_loop(void)
{
qemu_init_sigbus();
qemu_cond_init(&qemu_cpu_cond);
qemu_cond_init(&qemu_pause_cond);
qemu_mutex_init(&qemu_global_mutex);
qemu_thread_get_self(&io_thread);
}
void run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data)
{
do_run_on_cpu(cpu, func, data, &qemu_global_mutex);
}
static void qemu_kvm_destroy_vcpu(CPUState *cpu)
{
if (kvm_destroy_vcpu(cpu) < 0) {
error_report("kvm_destroy_vcpu failed");
exit(EXIT_FAILURE);
}
}
static void qemu_tcg_destroy_vcpu(CPUState *cpu)
{
}
static void qemu_wait_io_event_common(CPUState *cpu)
{
atomic_mb_set(&cpu->thread_kicked, false);
if (cpu->stop) {
cpu->stop = false;
cpu->stopped = true;
qemu_cond_broadcast(&qemu_pause_cond);
}
process_queued_cpu_work(cpu);
}
static bool qemu_tcg_should_sleep(CPUState *cpu)
{
if (mttcg_enabled) {
return cpu_thread_is_idle(cpu);
} else {
return all_cpu_threads_idle();
}
}
static void qemu_tcg_wait_io_event(CPUState *cpu)
{
while (qemu_tcg_should_sleep(cpu)) {
stop_tcg_kick_timer();
qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
}
start_tcg_kick_timer();
qemu_wait_io_event_common(cpu);
}
static void qemu_kvm_wait_io_event(CPUState *cpu)
{
while (cpu_thread_is_idle(cpu)) {
qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
}
qemu_wait_io_event_common(cpu);
}
static void *qemu_kvm_cpu_thread_fn(void *arg)
{
CPUState *cpu = arg;
int r;
rcu_register_thread();
qemu_mutex_lock_iothread();
qemu_thread_get_self(cpu->thread);
cpu->thread_id = qemu_get_thread_id();
cpu->can_do_io = 1;
current_cpu = cpu;
r = kvm_init_vcpu(cpu);
if (r < 0) {
fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
exit(1);
}
kvm_init_cpu_signals(cpu);
/* signal CPU creation */
cpu->created = true;
qemu_cond_signal(&qemu_cpu_cond);
do {
if (cpu_can_run(cpu)) {
r = kvm_cpu_exec(cpu);
if (r == EXCP_DEBUG) {
cpu_handle_guest_debug(cpu);
}
}
qemu_kvm_wait_io_event(cpu);
} while (!cpu->unplug || cpu_can_run(cpu));
qemu_kvm_destroy_vcpu(cpu);
cpu->created = false;
qemu_cond_signal(&qemu_cpu_cond);
qemu_mutex_unlock_iothread();
return NULL;
}
static void *qemu_dummy_cpu_thread_fn(void *arg)
{
#ifdef _WIN32
fprintf(stderr, "qtest is not supported under Windows\n");
exit(1);
#else
CPUState *cpu = arg;
sigset_t waitset;
int r;
rcu_register_thread();
qemu_mutex_lock_iothread();
qemu_thread_get_self(cpu->thread);
cpu->thread_id = qemu_get_thread_id();
cpu->can_do_io = 1;
current_cpu = cpu;
sigemptyset(&waitset);
sigaddset(&waitset, SIG_IPI);
/* signal CPU creation */
cpu->created = true;
qemu_cond_signal(&qemu_cpu_cond);
while (1) {
qemu_mutex_unlock_iothread();
do {
int sig;
r = sigwait(&waitset, &sig);
} while (r == -1 && (errno == EAGAIN || errno == EINTR));
if (r == -1) {
perror("sigwait");
exit(1);
}
qemu_mutex_lock_iothread();
qemu_wait_io_event_common(cpu);
}
return NULL;
#endif
}
static int64_t tcg_get_icount_limit(void)
{
int64_t deadline;
if (replay_mode != REPLAY_MODE_PLAY) {
deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
/* Maintain prior (possibly buggy) behaviour where if no deadline
* was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
* INT32_MAX nanoseconds ahead, we still use INT32_MAX
* nanoseconds.
*/
if ((deadline < 0) || (deadline > INT32_MAX)) {
deadline = INT32_MAX;
}
return qemu_icount_round(deadline);
} else {
return replay_get_instructions();
}
}
static void handle_icount_deadline(void)
{
icount: process QEMU_CLOCK_VIRTUAL timers in vCPU thread icount has become much slower after tcg_cpu_exec has stopped using the BQL. There is also a latent bug that is masked by the slowness. The slowness happens because every occurrence of a QEMU_CLOCK_VIRTUAL timer now has to wake up the I/O thread and wait for it. The rendez-vous is mediated by the BQL QemuMutex: - handle_icount_deadline wakes up the I/O thread with BQL taken - the I/O thread wakes up and waits on the BQL - the VCPU thread releases the BQL a little later - the I/O thread raises an interrupt, which calls qemu_cpu_kick - the VCPU thread notices the interrupt, takes the BQL to process it and waits on it All this back and forth is extremely expensive, causing a 6 to 8-fold slowdown when icount is turned on. One may think that the issue is that the VCPU thread is too dependent on the BQL, but then the latent bug comes in. I first tried removing the BQL completely from the x86 cpu_exec, only to see everything break. The only way to fix it (and make everything slow again) was to add a dummy BQL lock/unlock pair. This is because in -icount mode you really have to process the events before the CPU restarts executing the next instruction. Therefore, this series moves the processing of QEMU_CLOCK_VIRTUAL timers straight in the vCPU thread when running in icount mode. The required changes include: - make the timer notification callback wake up TCG's single vCPU thread when run from another thread. By using async_run_on_cpu, the callback can override all_cpu_threads_idle() when the CPU is halted. - move handle_icount_deadline after qemu_tcg_wait_io_event, so that the timer notification callback is invoked after the dummy work item wakes up the vCPU thread - make handle_icount_deadline run the timers instead of just waking the I/O thread. - stop processing the timers in the main loop Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-03-02 21:56:40 +03:00
assert(qemu_in_vcpu_thread());
if (use_icount) {
int64_t deadline =
qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
if (deadline == 0) {
icount: process QEMU_CLOCK_VIRTUAL timers in vCPU thread icount has become much slower after tcg_cpu_exec has stopped using the BQL. There is also a latent bug that is masked by the slowness. The slowness happens because every occurrence of a QEMU_CLOCK_VIRTUAL timer now has to wake up the I/O thread and wait for it. The rendez-vous is mediated by the BQL QemuMutex: - handle_icount_deadline wakes up the I/O thread with BQL taken - the I/O thread wakes up and waits on the BQL - the VCPU thread releases the BQL a little later - the I/O thread raises an interrupt, which calls qemu_cpu_kick - the VCPU thread notices the interrupt, takes the BQL to process it and waits on it All this back and forth is extremely expensive, causing a 6 to 8-fold slowdown when icount is turned on. One may think that the issue is that the VCPU thread is too dependent on the BQL, but then the latent bug comes in. I first tried removing the BQL completely from the x86 cpu_exec, only to see everything break. The only way to fix it (and make everything slow again) was to add a dummy BQL lock/unlock pair. This is because in -icount mode you really have to process the events before the CPU restarts executing the next instruction. Therefore, this series moves the processing of QEMU_CLOCK_VIRTUAL timers straight in the vCPU thread when running in icount mode. The required changes include: - make the timer notification callback wake up TCG's single vCPU thread when run from another thread. By using async_run_on_cpu, the callback can override all_cpu_threads_idle() when the CPU is halted. - move handle_icount_deadline after qemu_tcg_wait_io_event, so that the timer notification callback is invoked after the dummy work item wakes up the vCPU thread - make handle_icount_deadline run the timers instead of just waking the I/O thread. - stop processing the timers in the main loop Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-03-02 21:56:40 +03:00
/* Wake up other AioContexts. */
qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
icount: process QEMU_CLOCK_VIRTUAL timers in vCPU thread icount has become much slower after tcg_cpu_exec has stopped using the BQL. There is also a latent bug that is masked by the slowness. The slowness happens because every occurrence of a QEMU_CLOCK_VIRTUAL timer now has to wake up the I/O thread and wait for it. The rendez-vous is mediated by the BQL QemuMutex: - handle_icount_deadline wakes up the I/O thread with BQL taken - the I/O thread wakes up and waits on the BQL - the VCPU thread releases the BQL a little later - the I/O thread raises an interrupt, which calls qemu_cpu_kick - the VCPU thread notices the interrupt, takes the BQL to process it and waits on it All this back and forth is extremely expensive, causing a 6 to 8-fold slowdown when icount is turned on. One may think that the issue is that the VCPU thread is too dependent on the BQL, but then the latent bug comes in. I first tried removing the BQL completely from the x86 cpu_exec, only to see everything break. The only way to fix it (and make everything slow again) was to add a dummy BQL lock/unlock pair. This is because in -icount mode you really have to process the events before the CPU restarts executing the next instruction. Therefore, this series moves the processing of QEMU_CLOCK_VIRTUAL timers straight in the vCPU thread when running in icount mode. The required changes include: - make the timer notification callback wake up TCG's single vCPU thread when run from another thread. By using async_run_on_cpu, the callback can override all_cpu_threads_idle() when the CPU is halted. - move handle_icount_deadline after qemu_tcg_wait_io_event, so that the timer notification callback is invoked after the dummy work item wakes up the vCPU thread - make handle_icount_deadline run the timers instead of just waking the I/O thread. - stop processing the timers in the main loop Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-03-02 21:56:40 +03:00
qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
}
}
}
static int tcg_cpu_exec(CPUState *cpu)
{
int ret;
#ifdef CONFIG_PROFILER
int64_t ti;
#endif
#ifdef CONFIG_PROFILER
ti = profile_getclock();
#endif
if (use_icount) {
int64_t count;
int decr;
timers_state.qemu_icount -= (cpu->icount_decr.u16.low
+ cpu->icount_extra);
cpu->icount_decr.u16.low = 0;
cpu->icount_extra = 0;
count = tcg_get_icount_limit();
timers_state.qemu_icount += count;
decr = (count > 0xffff) ? 0xffff : count;
count -= decr;
cpu->icount_decr.u16.low = decr;
cpu->icount_extra = count;
}
tcg: drop global lock during TCG code execution This finally allows TCG to benefit from the iothread introduction: Drop the global mutex while running pure TCG CPU code. Reacquire the lock when entering MMIO or PIO emulation, or when leaving the TCG loop. We have to revert a few optimization for the current TCG threading model, namely kicking the TCG thread in qemu_mutex_lock_iothread and not kicking it in qemu_cpu_kick. We also need to disable RAM block reordering until we have a more efficient locking mechanism at hand. Still, a Linux x86 UP guest and my Musicpal ARM model boot fine here. These numbers demonstrate where we gain something: 20338 jan 20 0 331m 75m 6904 R 99 0.9 0:50.95 qemu-system-arm 20337 jan 20 0 331m 75m 6904 S 20 0.9 0:26.50 qemu-system-arm The guest CPU was fully loaded, but the iothread could still run mostly independent on a second core. Without the patch we don't get beyond 32206 jan 20 0 330m 73m 7036 R 82 0.9 1:06.00 qemu-system-arm 32204 jan 20 0 330m 73m 7036 S 21 0.9 0:17.03 qemu-system-arm We don't benefit significantly, though, when the guest is not fully loading a host CPU. Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com> Message-Id: <1439220437-23957-10-git-send-email-fred.konrad@greensocs.com> [FK: Rebase, fix qemu_devices_reset deadlock, rm address_space_* mutex] Signed-off-by: KONRAD Frederic <fred.konrad@greensocs.com> [EGC: fixed iothread lock for cpu-exec IRQ handling] Signed-off-by: Emilio G. Cota <cota@braap.org> [AJB: -smp single-threaded fix, clean commit msg, BQL fixes] Signed-off-by: Alex Bennée <alex.bennee@linaro.org> Reviewed-by: Richard Henderson <rth@twiddle.net> Reviewed-by: Pranith Kumar <bobby.prani@gmail.com> [PM: target-arm changes] Acked-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-23 21:29:11 +03:00
qemu_mutex_unlock_iothread();
cpu_exec_start(cpu);
ret = cpu_exec(cpu);
cpu_exec_end(cpu);
tcg: drop global lock during TCG code execution This finally allows TCG to benefit from the iothread introduction: Drop the global mutex while running pure TCG CPU code. Reacquire the lock when entering MMIO or PIO emulation, or when leaving the TCG loop. We have to revert a few optimization for the current TCG threading model, namely kicking the TCG thread in qemu_mutex_lock_iothread and not kicking it in qemu_cpu_kick. We also need to disable RAM block reordering until we have a more efficient locking mechanism at hand. Still, a Linux x86 UP guest and my Musicpal ARM model boot fine here. These numbers demonstrate where we gain something: 20338 jan 20 0 331m 75m 6904 R 99 0.9 0:50.95 qemu-system-arm 20337 jan 20 0 331m 75m 6904 S 20 0.9 0:26.50 qemu-system-arm The guest CPU was fully loaded, but the iothread could still run mostly independent on a second core. Without the patch we don't get beyond 32206 jan 20 0 330m 73m 7036 R 82 0.9 1:06.00 qemu-system-arm 32204 jan 20 0 330m 73m 7036 S 21 0.9 0:17.03 qemu-system-arm We don't benefit significantly, though, when the guest is not fully loading a host CPU. Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com> Message-Id: <1439220437-23957-10-git-send-email-fred.konrad@greensocs.com> [FK: Rebase, fix qemu_devices_reset deadlock, rm address_space_* mutex] Signed-off-by: KONRAD Frederic <fred.konrad@greensocs.com> [EGC: fixed iothread lock for cpu-exec IRQ handling] Signed-off-by: Emilio G. Cota <cota@braap.org> [AJB: -smp single-threaded fix, clean commit msg, BQL fixes] Signed-off-by: Alex Bennée <alex.bennee@linaro.org> Reviewed-by: Richard Henderson <rth@twiddle.net> Reviewed-by: Pranith Kumar <bobby.prani@gmail.com> [PM: target-arm changes] Acked-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-23 21:29:11 +03:00
qemu_mutex_lock_iothread();
#ifdef CONFIG_PROFILER
tcg_time += profile_getclock() - ti;
#endif
if (use_icount) {
/* Fold pending instructions back into the
instruction counter, and clear the interrupt flag. */
timers_state.qemu_icount -= (cpu->icount_decr.u16.low
+ cpu->icount_extra);
cpu->icount_decr.u32 = 0;
cpu->icount_extra = 0;
replay_account_executed_instructions();
}
return ret;
}
/* Destroy any remaining vCPUs which have been unplugged and have
* finished running
*/
static void deal_with_unplugged_cpus(void)
{
CPUState *cpu;
CPU_FOREACH(cpu) {
if (cpu->unplug && !cpu_can_run(cpu)) {
qemu_tcg_destroy_vcpu(cpu);
cpu->created = false;
qemu_cond_signal(&qemu_cpu_cond);
break;
}
}
}
/* Single-threaded TCG
*
* In the single-threaded case each vCPU is simulated in turn. If
* there is more than a single vCPU we create a simple timer to kick
* the vCPU and ensure we don't get stuck in a tight loop in one vCPU.
* This is done explicitly rather than relying on side-effects
* elsewhere.
*/
static void *qemu_tcg_rr_cpu_thread_fn(void *arg)
{
CPUState *cpu = arg;
rcu_register_thread();
qemu_mutex_lock_iothread();
qemu_thread_get_self(cpu->thread);
CPU_FOREACH(cpu) {
cpu->thread_id = qemu_get_thread_id();
cpu->created = true;
cpu->can_do_io = 1;
}
qemu_cond_signal(&qemu_cpu_cond);
/* wait for initial kick-off after machine start */
while (first_cpu->stopped) {
qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
/* process any pending work */
CPU_FOREACH(cpu) {
current_cpu = cpu;
qemu_wait_io_event_common(cpu);
}
}
start_tcg_kick_timer();
cpu = first_cpu;
/* process any pending work */
cpu->exit_request = 1;
while (1) {
/* Account partial waits to QEMU_CLOCK_VIRTUAL. */
qemu_account_warp_timer();
icount: process QEMU_CLOCK_VIRTUAL timers in vCPU thread icount has become much slower after tcg_cpu_exec has stopped using the BQL. There is also a latent bug that is masked by the slowness. The slowness happens because every occurrence of a QEMU_CLOCK_VIRTUAL timer now has to wake up the I/O thread and wait for it. The rendez-vous is mediated by the BQL QemuMutex: - handle_icount_deadline wakes up the I/O thread with BQL taken - the I/O thread wakes up and waits on the BQL - the VCPU thread releases the BQL a little later - the I/O thread raises an interrupt, which calls qemu_cpu_kick - the VCPU thread notices the interrupt, takes the BQL to process it and waits on it All this back and forth is extremely expensive, causing a 6 to 8-fold slowdown when icount is turned on. One may think that the issue is that the VCPU thread is too dependent on the BQL, but then the latent bug comes in. I first tried removing the BQL completely from the x86 cpu_exec, only to see everything break. The only way to fix it (and make everything slow again) was to add a dummy BQL lock/unlock pair. This is because in -icount mode you really have to process the events before the CPU restarts executing the next instruction. Therefore, this series moves the processing of QEMU_CLOCK_VIRTUAL timers straight in the vCPU thread when running in icount mode. The required changes include: - make the timer notification callback wake up TCG's single vCPU thread when run from another thread. By using async_run_on_cpu, the callback can override all_cpu_threads_idle() when the CPU is halted. - move handle_icount_deadline after qemu_tcg_wait_io_event, so that the timer notification callback is invoked after the dummy work item wakes up the vCPU thread - make handle_icount_deadline run the timers instead of just waking the I/O thread. - stop processing the timers in the main loop Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-03-02 21:56:40 +03:00
/* Run the timers here. This is much more efficient than
* waking up the I/O thread and waiting for completion.
*/
handle_icount_deadline();
if (!cpu) {
cpu = first_cpu;
}
while (cpu && !cpu->queued_work_first && !cpu->exit_request) {
atomic_mb_set(&tcg_current_rr_cpu, cpu);
current_cpu = cpu;
qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
(cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
if (cpu_can_run(cpu)) {
int r;
r = tcg_cpu_exec(cpu);
if (r == EXCP_DEBUG) {
cpu_handle_guest_debug(cpu);
break;
} else if (r == EXCP_ATOMIC) {
qemu_mutex_unlock_iothread();
cpu_exec_step_atomic(cpu);
qemu_mutex_lock_iothread();
break;
}
} else if (cpu->stop) {
if (cpu->unplug) {
cpu = CPU_NEXT(cpu);
}
break;
}
cpu = CPU_NEXT(cpu);
} /* while (cpu && !cpu->exit_request).. */
/* Does not need atomic_mb_set because a spurious wakeup is okay. */
atomic_set(&tcg_current_rr_cpu, NULL);
if (cpu && cpu->exit_request) {
atomic_mb_set(&cpu->exit_request, 0);
}
qemu_tcg_wait_io_event(cpu ? cpu : QTAILQ_FIRST(&cpus));
deal_with_unplugged_cpus();
}
return NULL;
}
static void *qemu_hax_cpu_thread_fn(void *arg)
{
CPUState *cpu = arg;
int r;
qemu_mutex_lock_iothread();
qemu_thread_get_self(cpu->thread);
cpu->thread_id = qemu_get_thread_id();
cpu->created = true;
cpu->halted = 0;
current_cpu = cpu;
hax_init_vcpu(cpu);
qemu_cond_signal(&qemu_cpu_cond);
while (1) {
if (cpu_can_run(cpu)) {
r = hax_smp_cpu_exec(cpu);
if (r == EXCP_DEBUG) {
cpu_handle_guest_debug(cpu);
}
}
while (cpu_thread_is_idle(cpu)) {
qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
}
#ifdef _WIN32
SleepEx(0, TRUE);
#endif
qemu_wait_io_event_common(cpu);
}
return NULL;
}
#ifdef _WIN32
static void CALLBACK dummy_apc_func(ULONG_PTR unused)
{
}
#endif
/* Multi-threaded TCG
*
* In the multi-threaded case each vCPU has its own thread. The TLS
* variable current_cpu can be used deep in the code to find the
* current CPUState for a given thread.
*/
static void *qemu_tcg_cpu_thread_fn(void *arg)
{
CPUState *cpu = arg;
rcu_register_thread();
qemu_mutex_lock_iothread();
qemu_thread_get_self(cpu->thread);
cpu->thread_id = qemu_get_thread_id();
cpu->created = true;
cpu->can_do_io = 1;
current_cpu = cpu;
qemu_cond_signal(&qemu_cpu_cond);
/* process any pending work */
cpu->exit_request = 1;
while (1) {
if (cpu_can_run(cpu)) {
int r;
r = tcg_cpu_exec(cpu);
switch (r) {
case EXCP_DEBUG:
cpu_handle_guest_debug(cpu);
break;
case EXCP_HALTED:
/* during start-up the vCPU is reset and the thread is
* kicked several times. If we don't ensure we go back
* to sleep in the halted state we won't cleanly
* start-up when the vCPU is enabled.
*
* cpu->halted should ensure we sleep in wait_io_event
*/
g_assert(cpu->halted);
break;
case EXCP_ATOMIC:
qemu_mutex_unlock_iothread();
cpu_exec_step_atomic(cpu);
qemu_mutex_lock_iothread();
default:
/* Ignore everything else? */
break;
}
}
handle_icount_deadline();
atomic_mb_set(&cpu->exit_request, 0);
qemu_tcg_wait_io_event(cpu);
}
return NULL;
}
static void qemu_cpu_kick_thread(CPUState *cpu)
{
#ifndef _WIN32
int err;
if (cpu->thread_kicked) {
return;
}
cpu->thread_kicked = true;
err = pthread_kill(cpu->thread->thread, SIG_IPI);
if (err) {
fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
exit(1);
}
#else /* _WIN32 */
if (!qemu_cpu_is_self(cpu)) {
if (!QueueUserAPC(dummy_apc_func, cpu->hThread, 0)) {
fprintf(stderr, "%s: QueueUserAPC failed with error %lu\n",
__func__, GetLastError());
exit(1);
}
}
#endif
}
void qemu_cpu_kick(CPUState *cpu)
{
qemu_cond_broadcast(cpu->halt_cond);
if (tcg_enabled()) {
cpu_exit(cpu);
/* NOP unless doing single-thread RR */
qemu_cpu_kick_rr_cpu();
} else {
if (hax_enabled()) {
/*
* FIXME: race condition with the exit_request check in
* hax_vcpu_hax_exec
*/
cpu->exit_request = 1;
}
qemu_cpu_kick_thread(cpu);
}
}
void qemu_cpu_kick_self(void)
{
assert(current_cpu);
qemu_cpu_kick_thread(current_cpu);
}
bool qemu_cpu_is_self(CPUState *cpu)
{
return qemu_thread_is_self(cpu->thread);
}
bool qemu_in_vcpu_thread(void)
{
return current_cpu && qemu_cpu_is_self(current_cpu);
}
static __thread bool iothread_locked = false;
bool qemu_mutex_iothread_locked(void)
{
return iothread_locked;
}
void qemu_mutex_lock_iothread(void)
{
tcg: drop global lock during TCG code execution This finally allows TCG to benefit from the iothread introduction: Drop the global mutex while running pure TCG CPU code. Reacquire the lock when entering MMIO or PIO emulation, or when leaving the TCG loop. We have to revert a few optimization for the current TCG threading model, namely kicking the TCG thread in qemu_mutex_lock_iothread and not kicking it in qemu_cpu_kick. We also need to disable RAM block reordering until we have a more efficient locking mechanism at hand. Still, a Linux x86 UP guest and my Musicpal ARM model boot fine here. These numbers demonstrate where we gain something: 20338 jan 20 0 331m 75m 6904 R 99 0.9 0:50.95 qemu-system-arm 20337 jan 20 0 331m 75m 6904 S 20 0.9 0:26.50 qemu-system-arm The guest CPU was fully loaded, but the iothread could still run mostly independent on a second core. Without the patch we don't get beyond 32206 jan 20 0 330m 73m 7036 R 82 0.9 1:06.00 qemu-system-arm 32204 jan 20 0 330m 73m 7036 S 21 0.9 0:17.03 qemu-system-arm We don't benefit significantly, though, when the guest is not fully loading a host CPU. Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com> Message-Id: <1439220437-23957-10-git-send-email-fred.konrad@greensocs.com> [FK: Rebase, fix qemu_devices_reset deadlock, rm address_space_* mutex] Signed-off-by: KONRAD Frederic <fred.konrad@greensocs.com> [EGC: fixed iothread lock for cpu-exec IRQ handling] Signed-off-by: Emilio G. Cota <cota@braap.org> [AJB: -smp single-threaded fix, clean commit msg, BQL fixes] Signed-off-by: Alex Bennée <alex.bennee@linaro.org> Reviewed-by: Richard Henderson <rth@twiddle.net> Reviewed-by: Pranith Kumar <bobby.prani@gmail.com> [PM: target-arm changes] Acked-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-23 21:29:11 +03:00
g_assert(!qemu_mutex_iothread_locked());
qemu_mutex_lock(&qemu_global_mutex);
iothread_locked = true;
}
void qemu_mutex_unlock_iothread(void)
{
tcg: drop global lock during TCG code execution This finally allows TCG to benefit from the iothread introduction: Drop the global mutex while running pure TCG CPU code. Reacquire the lock when entering MMIO or PIO emulation, or when leaving the TCG loop. We have to revert a few optimization for the current TCG threading model, namely kicking the TCG thread in qemu_mutex_lock_iothread and not kicking it in qemu_cpu_kick. We also need to disable RAM block reordering until we have a more efficient locking mechanism at hand. Still, a Linux x86 UP guest and my Musicpal ARM model boot fine here. These numbers demonstrate where we gain something: 20338 jan 20 0 331m 75m 6904 R 99 0.9 0:50.95 qemu-system-arm 20337 jan 20 0 331m 75m 6904 S 20 0.9 0:26.50 qemu-system-arm The guest CPU was fully loaded, but the iothread could still run mostly independent on a second core. Without the patch we don't get beyond 32206 jan 20 0 330m 73m 7036 R 82 0.9 1:06.00 qemu-system-arm 32204 jan 20 0 330m 73m 7036 S 21 0.9 0:17.03 qemu-system-arm We don't benefit significantly, though, when the guest is not fully loading a host CPU. Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com> Message-Id: <1439220437-23957-10-git-send-email-fred.konrad@greensocs.com> [FK: Rebase, fix qemu_devices_reset deadlock, rm address_space_* mutex] Signed-off-by: KONRAD Frederic <fred.konrad@greensocs.com> [EGC: fixed iothread lock for cpu-exec IRQ handling] Signed-off-by: Emilio G. Cota <cota@braap.org> [AJB: -smp single-threaded fix, clean commit msg, BQL fixes] Signed-off-by: Alex Bennée <alex.bennee@linaro.org> Reviewed-by: Richard Henderson <rth@twiddle.net> Reviewed-by: Pranith Kumar <bobby.prani@gmail.com> [PM: target-arm changes] Acked-by: Peter Maydell <peter.maydell@linaro.org>
2017-02-23 21:29:11 +03:00
g_assert(qemu_mutex_iothread_locked());
iothread_locked = false;
qemu_mutex_unlock(&qemu_global_mutex);
}
static bool all_vcpus_paused(void)
{
CPUState *cpu;
CPU_FOREACH(cpu) {
if (!cpu->stopped) {
return false;
}
}
return true;
}
void pause_all_vcpus(void)
{
CPUState *cpu;
qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
CPU_FOREACH(cpu) {
cpu->stop = true;
qemu_cpu_kick(cpu);
}
if (qemu_in_vcpu_thread()) {
cpu_stop_current();
}
while (!all_vcpus_paused()) {
qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
CPU_FOREACH(cpu) {
qemu_cpu_kick(cpu);
}
}
}
void cpu_resume(CPUState *cpu)
{
cpu->stop = false;
cpu->stopped = false;
qemu_cpu_kick(cpu);
}
void resume_all_vcpus(void)
{
CPUState *cpu;
qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
CPU_FOREACH(cpu) {
cpu_resume(cpu);
}
}
void cpu_remove(CPUState *cpu)
{
cpu->stop = true;
cpu->unplug = true;
qemu_cpu_kick(cpu);
}
void cpu_remove_sync(CPUState *cpu)
{
cpu_remove(cpu);
while (cpu->created) {
qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
}
}
/* For temporary buffers for forming a name */
#define VCPU_THREAD_NAME_SIZE 16
static void qemu_tcg_init_vcpu(CPUState *cpu)
{
char thread_name[VCPU_THREAD_NAME_SIZE];
static QemuCond *single_tcg_halt_cond;
static QemuThread *single_tcg_cpu_thread;
if (qemu_tcg_mttcg_enabled() || !single_tcg_cpu_thread) {
cpu->thread = g_malloc0(sizeof(QemuThread));
cpu->halt_cond = g_malloc0(sizeof(QemuCond));
qemu_cond_init(cpu->halt_cond);
if (qemu_tcg_mttcg_enabled()) {
/* create a thread per vCPU with TCG (MTTCG) */
parallel_cpus = true;
snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
cpu->cpu_index);
qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
cpu, QEMU_THREAD_JOINABLE);
} else {
/* share a single thread for all cpus with TCG */
snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "ALL CPUs/TCG");
qemu_thread_create(cpu->thread, thread_name,
qemu_tcg_rr_cpu_thread_fn,
cpu, QEMU_THREAD_JOINABLE);
single_tcg_halt_cond = cpu->halt_cond;
single_tcg_cpu_thread = cpu->thread;
}
#ifdef _WIN32
cpu->hThread = qemu_thread_get_handle(cpu->thread);
#endif
while (!cpu->created) {
qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
}
} else {
/* For non-MTTCG cases we share the thread */
cpu->thread = single_tcg_cpu_thread;
cpu->halt_cond = single_tcg_halt_cond;
}
}
static void qemu_hax_start_vcpu(CPUState *cpu)
{
char thread_name[VCPU_THREAD_NAME_SIZE];
cpu->thread = g_malloc0(sizeof(QemuThread));
cpu->halt_cond = g_malloc0(sizeof(QemuCond));
qemu_cond_init(cpu->halt_cond);
snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HAX",
cpu->cpu_index);
qemu_thread_create(cpu->thread, thread_name, qemu_hax_cpu_thread_fn,
cpu, QEMU_THREAD_JOINABLE);
#ifdef _WIN32
cpu->hThread = qemu_thread_get_handle(cpu->thread);
#endif
while (!cpu->created) {
qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
}
}
static void qemu_kvm_start_vcpu(CPUState *cpu)
{
char thread_name[VCPU_THREAD_NAME_SIZE];
cpu->thread = g_malloc0(sizeof(QemuThread));
cpu->halt_cond = g_malloc0(sizeof(QemuCond));
qemu_cond_init(cpu->halt_cond);
snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
cpu->cpu_index);
qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
cpu, QEMU_THREAD_JOINABLE);
while (!cpu->created) {
qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
}
}
static void qemu_dummy_start_vcpu(CPUState *cpu)
{
char thread_name[VCPU_THREAD_NAME_SIZE];
cpu->thread = g_malloc0(sizeof(QemuThread));
cpu->halt_cond = g_malloc0(sizeof(QemuCond));
qemu_cond_init(cpu->halt_cond);
snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
cpu->cpu_index);
qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
QEMU_THREAD_JOINABLE);
while (!cpu->created) {
qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
}
}
void qemu_init_vcpu(CPUState *cpu)
{
cpu->nr_cores = smp_cores;
cpu->nr_threads = smp_threads;
cpu->stopped = true;
if (!cpu->as) {
/* If the target cpu hasn't set up any address spaces itself,
* give it the default one.
*/
AddressSpace *as = address_space_init_shareable(cpu->memory,
"cpu-memory");
cpu->num_ases = 1;
cpu_address_space_init(cpu, as, 0);
}
if (kvm_enabled()) {
qemu_kvm_start_vcpu(cpu);
} else if (hax_enabled()) {
qemu_hax_start_vcpu(cpu);
} else if (tcg_enabled()) {
qemu_tcg_init_vcpu(cpu);
} else {
qemu_dummy_start_vcpu(cpu);
}
}
void cpu_stop_current(void)
{
if (current_cpu) {
current_cpu->stop = false;
current_cpu->stopped = true;
cpu_exit(current_cpu);
qemu_cond_broadcast(&qemu_pause_cond);
}
}
int vm_stop(RunState state)
{
if (qemu_in_vcpu_thread()) {
qemu_system_vmstop_request_prepare();
qemu_system_vmstop_request(state);
/*
* FIXME: should not return to device code in case
* vm_stop() has been requested.
*/
cpu_stop_current();
return 0;
}
return do_vm_stop(state);
}
/**
* Prepare for (re)starting the VM.
* Returns -1 if the vCPUs are not to be restarted (e.g. if they are already
* running or in case of an error condition), 0 otherwise.
*/
int vm_prepare_start(void)
{
RunState requested;
int res = 0;
qemu_vmstop_requested(&requested);
if (runstate_is_running() && requested == RUN_STATE__MAX) {
return -1;
}
/* Ensure that a STOP/RESUME pair of events is emitted if a
* vmstop request was pending. The BLOCK_IO_ERROR event, for
* example, according to documentation is always followed by
* the STOP event.
*/
if (runstate_is_running()) {
qapi_event_send_stop(&error_abort);
res = -1;
} else {
replay_enable_events();
cpu_enable_ticks();
runstate_set(RUN_STATE_RUNNING);
vm_state_notify(1, RUN_STATE_RUNNING);
}
/* We are sending this now, but the CPUs will be resumed shortly later */
qapi_event_send_resume(&error_abort);
return res;
}
void vm_start(void)
{
if (!vm_prepare_start()) {
resume_all_vcpus();
}
}
/* does a state transition even if the VM is already stopped,
current state is forgotten forever */
int vm_stop_force_state(RunState state)
{
if (runstate_is_running()) {
return vm_stop(state);
} else {
runstate_set(state);
bdrv_drain_all();
/* Make sure to return an error if the flush in a previous vm_stop()
* failed. */
return bdrv_flush_all();
}
}
void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
{
/* XXX: implement xxx_cpu_list for targets that still miss it */
#if defined(cpu_list)
cpu_list(f, cpu_fprintf);
#endif
}
CpuInfoList *qmp_query_cpus(Error **errp)
{
CpuInfoList *head = NULL, *cur_item = NULL;
CPUState *cpu;
CPU_FOREACH(cpu) {
CpuInfoList *info;
#if defined(TARGET_I386)
X86CPU *x86_cpu = X86_CPU(cpu);
CPUX86State *env = &x86_cpu->env;
#elif defined(TARGET_PPC)
PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
CPUPPCState *env = &ppc_cpu->env;
#elif defined(TARGET_SPARC)
SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
CPUSPARCState *env = &sparc_cpu->env;
#elif defined(TARGET_MIPS)
MIPSCPU *mips_cpu = MIPS_CPU(cpu);
CPUMIPSState *env = &mips_cpu->env;
#elif defined(TARGET_TRICORE)
TriCoreCPU *tricore_cpu = TRICORE_CPU(cpu);
CPUTriCoreState *env = &tricore_cpu->env;
#endif
cpu_synchronize_state(cpu);
info = g_malloc0(sizeof(*info));
info->value = g_malloc0(sizeof(*info->value));
info->value->CPU = cpu->cpu_index;
info->value->current = (cpu == first_cpu);
info->value->halted = cpu->halted;
info->value->qom_path = object_get_canonical_path(OBJECT(cpu));
info->value->thread_id = cpu->thread_id;
#if defined(TARGET_I386)
info->value->arch = CPU_INFO_ARCH_X86;
qapi: Don't box branches of flat unions There's no reason to do two malloc's for a flat union; let's just inline the branch struct directly into the C union branch of the flat union. Surprisingly, fewer clients were actually using explicit references to the branch types in comparison to the number of flat unions thus modified. This lets us reduce the hack in qapi-types:gen_variants() added in the previous patch; we no longer need to distinguish between alternates and flat unions. The change to unboxed structs means that u.data (added in commit cee2dedb) is now coincident with random fields of each branch of the flat union, whereas beforehand it was only coincident with pointers (since all branches of a flat union have to be objects). Note that this was already the case for simple unions - but there we got lucky. Remember, visit_start_union() blindly returns true for all visitors except for the dealloc visitor, where it returns the value !!obj->u.data, and that this result then controls whether to proceed with the visit to the variant. Pre-patch, this meant that flat unions were testing whether the boxed pointer was still NULL, and thereby skipping visit_end_implicit_struct() and avoiding a NULL dereference if the pointer had not been allocated. The same was true for simple unions where the current branch had pointer type, except there we bypassed visit_type_FOO(). But for simple unions where the current branch had scalar type, the contents of that scalar meant that the decision to call visit_type_FOO() was data-dependent - the reason we got lucky there is that visit_type_FOO() for all scalar types in the dealloc visitor is a no-op (only the pointer variants had anything to free), so it did not matter whether the dealloc visit was skipped. But with this patch, we would risk leaking memory if we could skip a call to visit_type_FOO_fields() based solely on a data-dependent decision. But notice: in the dealloc visitor, visit_type_FOO() already handles a NULL obj - it was only the visit_type_implicit_FOO() that was failing to check for NULL. And now that we have refactored things to have the branch be part of the parent struct, we no longer have a separate pointer that can be NULL in the first place. So we can just delete the call to visit_start_union() altogether, and blindly visit the branch type; there is no change in behavior except to the dealloc visitor, where we now unconditionally visit the branch, but where that visit is now always safe (for a flat union, we can no longer dereference NULL, and for a simple union, visit_type_FOO() was already safely handling NULL on pointer types). Unfortunately, simple unions are not as easy to switch to unboxed layout; because we are special-casing the hidden implicit type with a single 'data' member, we really DO need to keep calling another layer of visit_start_struct(), with a second malloc; although there are some cleanups planned for simple unions in later patches. visit_start_union() and gen_visit_implicit_struct() are now unused. Drop them. Note that after this patch, the only remaining use of visit_start_implicit_struct() is for alternate types; the next patch will do further cleanup based on that fact. Signed-off-by: Eric Blake <eblake@redhat.com> Message-Id: <1455778109-6278-14-git-send-email-eblake@redhat.com> [Dead code deletion squashed in, commit message updated accordingly] Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-02-18 09:48:27 +03:00
info->value->u.x86.pc = env->eip + env->segs[R_CS].base;
#elif defined(TARGET_PPC)
info->value->arch = CPU_INFO_ARCH_PPC;
qapi: Don't box branches of flat unions There's no reason to do two malloc's for a flat union; let's just inline the branch struct directly into the C union branch of the flat union. Surprisingly, fewer clients were actually using explicit references to the branch types in comparison to the number of flat unions thus modified. This lets us reduce the hack in qapi-types:gen_variants() added in the previous patch; we no longer need to distinguish between alternates and flat unions. The change to unboxed structs means that u.data (added in commit cee2dedb) is now coincident with random fields of each branch of the flat union, whereas beforehand it was only coincident with pointers (since all branches of a flat union have to be objects). Note that this was already the case for simple unions - but there we got lucky. Remember, visit_start_union() blindly returns true for all visitors except for the dealloc visitor, where it returns the value !!obj->u.data, and that this result then controls whether to proceed with the visit to the variant. Pre-patch, this meant that flat unions were testing whether the boxed pointer was still NULL, and thereby skipping visit_end_implicit_struct() and avoiding a NULL dereference if the pointer had not been allocated. The same was true for simple unions where the current branch had pointer type, except there we bypassed visit_type_FOO(). But for simple unions where the current branch had scalar type, the contents of that scalar meant that the decision to call visit_type_FOO() was data-dependent - the reason we got lucky there is that visit_type_FOO() for all scalar types in the dealloc visitor is a no-op (only the pointer variants had anything to free), so it did not matter whether the dealloc visit was skipped. But with this patch, we would risk leaking memory if we could skip a call to visit_type_FOO_fields() based solely on a data-dependent decision. But notice: in the dealloc visitor, visit_type_FOO() already handles a NULL obj - it was only the visit_type_implicit_FOO() that was failing to check for NULL. And now that we have refactored things to have the branch be part of the parent struct, we no longer have a separate pointer that can be NULL in the first place. So we can just delete the call to visit_start_union() altogether, and blindly visit the branch type; there is no change in behavior except to the dealloc visitor, where we now unconditionally visit the branch, but where that visit is now always safe (for a flat union, we can no longer dereference NULL, and for a simple union, visit_type_FOO() was already safely handling NULL on pointer types). Unfortunately, simple unions are not as easy to switch to unboxed layout; because we are special-casing the hidden implicit type with a single 'data' member, we really DO need to keep calling another layer of visit_start_struct(), with a second malloc; although there are some cleanups planned for simple unions in later patches. visit_start_union() and gen_visit_implicit_struct() are now unused. Drop them. Note that after this patch, the only remaining use of visit_start_implicit_struct() is for alternate types; the next patch will do further cleanup based on that fact. Signed-off-by: Eric Blake <eblake@redhat.com> Message-Id: <1455778109-6278-14-git-send-email-eblake@redhat.com> [Dead code deletion squashed in, commit message updated accordingly] Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-02-18 09:48:27 +03:00
info->value->u.ppc.nip = env->nip;
#elif defined(TARGET_SPARC)
info->value->arch = CPU_INFO_ARCH_SPARC;
qapi: Don't box branches of flat unions There's no reason to do two malloc's for a flat union; let's just inline the branch struct directly into the C union branch of the flat union. Surprisingly, fewer clients were actually using explicit references to the branch types in comparison to the number of flat unions thus modified. This lets us reduce the hack in qapi-types:gen_variants() added in the previous patch; we no longer need to distinguish between alternates and flat unions. The change to unboxed structs means that u.data (added in commit cee2dedb) is now coincident with random fields of each branch of the flat union, whereas beforehand it was only coincident with pointers (since all branches of a flat union have to be objects). Note that this was already the case for simple unions - but there we got lucky. Remember, visit_start_union() blindly returns true for all visitors except for the dealloc visitor, where it returns the value !!obj->u.data, and that this result then controls whether to proceed with the visit to the variant. Pre-patch, this meant that flat unions were testing whether the boxed pointer was still NULL, and thereby skipping visit_end_implicit_struct() and avoiding a NULL dereference if the pointer had not been allocated. The same was true for simple unions where the current branch had pointer type, except there we bypassed visit_type_FOO(). But for simple unions where the current branch had scalar type, the contents of that scalar meant that the decision to call visit_type_FOO() was data-dependent - the reason we got lucky there is that visit_type_FOO() for all scalar types in the dealloc visitor is a no-op (only the pointer variants had anything to free), so it did not matter whether the dealloc visit was skipped. But with this patch, we would risk leaking memory if we could skip a call to visit_type_FOO_fields() based solely on a data-dependent decision. But notice: in the dealloc visitor, visit_type_FOO() already handles a NULL obj - it was only the visit_type_implicit_FOO() that was failing to check for NULL. And now that we have refactored things to have the branch be part of the parent struct, we no longer have a separate pointer that can be NULL in the first place. So we can just delete the call to visit_start_union() altogether, and blindly visit the branch type; there is no change in behavior except to the dealloc visitor, where we now unconditionally visit the branch, but where that visit is now always safe (for a flat union, we can no longer dereference NULL, and for a simple union, visit_type_FOO() was already safely handling NULL on pointer types). Unfortunately, simple unions are not as easy to switch to unboxed layout; because we are special-casing the hidden implicit type with a single 'data' member, we really DO need to keep calling another layer of visit_start_struct(), with a second malloc; although there are some cleanups planned for simple unions in later patches. visit_start_union() and gen_visit_implicit_struct() are now unused. Drop them. Note that after this patch, the only remaining use of visit_start_implicit_struct() is for alternate types; the next patch will do further cleanup based on that fact. Signed-off-by: Eric Blake <eblake@redhat.com> Message-Id: <1455778109-6278-14-git-send-email-eblake@redhat.com> [Dead code deletion squashed in, commit message updated accordingly] Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-02-18 09:48:27 +03:00
info->value->u.q_sparc.pc = env->pc;
info->value->u.q_sparc.npc = env->npc;
#elif defined(TARGET_MIPS)
info->value->arch = CPU_INFO_ARCH_MIPS;
qapi: Don't box branches of flat unions There's no reason to do two malloc's for a flat union; let's just inline the branch struct directly into the C union branch of the flat union. Surprisingly, fewer clients were actually using explicit references to the branch types in comparison to the number of flat unions thus modified. This lets us reduce the hack in qapi-types:gen_variants() added in the previous patch; we no longer need to distinguish between alternates and flat unions. The change to unboxed structs means that u.data (added in commit cee2dedb) is now coincident with random fields of each branch of the flat union, whereas beforehand it was only coincident with pointers (since all branches of a flat union have to be objects). Note that this was already the case for simple unions - but there we got lucky. Remember, visit_start_union() blindly returns true for all visitors except for the dealloc visitor, where it returns the value !!obj->u.data, and that this result then controls whether to proceed with the visit to the variant. Pre-patch, this meant that flat unions were testing whether the boxed pointer was still NULL, and thereby skipping visit_end_implicit_struct() and avoiding a NULL dereference if the pointer had not been allocated. The same was true for simple unions where the current branch had pointer type, except there we bypassed visit_type_FOO(). But for simple unions where the current branch had scalar type, the contents of that scalar meant that the decision to call visit_type_FOO() was data-dependent - the reason we got lucky there is that visit_type_FOO() for all scalar types in the dealloc visitor is a no-op (only the pointer variants had anything to free), so it did not matter whether the dealloc visit was skipped. But with this patch, we would risk leaking memory if we could skip a call to visit_type_FOO_fields() based solely on a data-dependent decision. But notice: in the dealloc visitor, visit_type_FOO() already handles a NULL obj - it was only the visit_type_implicit_FOO() that was failing to check for NULL. And now that we have refactored things to have the branch be part of the parent struct, we no longer have a separate pointer that can be NULL in the first place. So we can just delete the call to visit_start_union() altogether, and blindly visit the branch type; there is no change in behavior except to the dealloc visitor, where we now unconditionally visit the branch, but where that visit is now always safe (for a flat union, we can no longer dereference NULL, and for a simple union, visit_type_FOO() was already safely handling NULL on pointer types). Unfortunately, simple unions are not as easy to switch to unboxed layout; because we are special-casing the hidden implicit type with a single 'data' member, we really DO need to keep calling another layer of visit_start_struct(), with a second malloc; although there are some cleanups planned for simple unions in later patches. visit_start_union() and gen_visit_implicit_struct() are now unused. Drop them. Note that after this patch, the only remaining use of visit_start_implicit_struct() is for alternate types; the next patch will do further cleanup based on that fact. Signed-off-by: Eric Blake <eblake@redhat.com> Message-Id: <1455778109-6278-14-git-send-email-eblake@redhat.com> [Dead code deletion squashed in, commit message updated accordingly] Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-02-18 09:48:27 +03:00
info->value->u.q_mips.PC = env->active_tc.PC;
#elif defined(TARGET_TRICORE)
info->value->arch = CPU_INFO_ARCH_TRICORE;
qapi: Don't box branches of flat unions There's no reason to do two malloc's for a flat union; let's just inline the branch struct directly into the C union branch of the flat union. Surprisingly, fewer clients were actually using explicit references to the branch types in comparison to the number of flat unions thus modified. This lets us reduce the hack in qapi-types:gen_variants() added in the previous patch; we no longer need to distinguish between alternates and flat unions. The change to unboxed structs means that u.data (added in commit cee2dedb) is now coincident with random fields of each branch of the flat union, whereas beforehand it was only coincident with pointers (since all branches of a flat union have to be objects). Note that this was already the case for simple unions - but there we got lucky. Remember, visit_start_union() blindly returns true for all visitors except for the dealloc visitor, where it returns the value !!obj->u.data, and that this result then controls whether to proceed with the visit to the variant. Pre-patch, this meant that flat unions were testing whether the boxed pointer was still NULL, and thereby skipping visit_end_implicit_struct() and avoiding a NULL dereference if the pointer had not been allocated. The same was true for simple unions where the current branch had pointer type, except there we bypassed visit_type_FOO(). But for simple unions where the current branch had scalar type, the contents of that scalar meant that the decision to call visit_type_FOO() was data-dependent - the reason we got lucky there is that visit_type_FOO() for all scalar types in the dealloc visitor is a no-op (only the pointer variants had anything to free), so it did not matter whether the dealloc visit was skipped. But with this patch, we would risk leaking memory if we could skip a call to visit_type_FOO_fields() based solely on a data-dependent decision. But notice: in the dealloc visitor, visit_type_FOO() already handles a NULL obj - it was only the visit_type_implicit_FOO() that was failing to check for NULL. And now that we have refactored things to have the branch be part of the parent struct, we no longer have a separate pointer that can be NULL in the first place. So we can just delete the call to visit_start_union() altogether, and blindly visit the branch type; there is no change in behavior except to the dealloc visitor, where we now unconditionally visit the branch, but where that visit is now always safe (for a flat union, we can no longer dereference NULL, and for a simple union, visit_type_FOO() was already safely handling NULL on pointer types). Unfortunately, simple unions are not as easy to switch to unboxed layout; because we are special-casing the hidden implicit type with a single 'data' member, we really DO need to keep calling another layer of visit_start_struct(), with a second malloc; although there are some cleanups planned for simple unions in later patches. visit_start_union() and gen_visit_implicit_struct() are now unused. Drop them. Note that after this patch, the only remaining use of visit_start_implicit_struct() is for alternate types; the next patch will do further cleanup based on that fact. Signed-off-by: Eric Blake <eblake@redhat.com> Message-Id: <1455778109-6278-14-git-send-email-eblake@redhat.com> [Dead code deletion squashed in, commit message updated accordingly] Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-02-18 09:48:27 +03:00
info->value->u.tricore.PC = env->PC;
#else
info->value->arch = CPU_INFO_ARCH_OTHER;
#endif
/* XXX: waiting for the qapi to support GSList */
if (!cur_item) {
head = cur_item = info;
} else {
cur_item->next = info;
cur_item = info;
}
}
return head;
}
void qmp_memsave(int64_t addr, int64_t size, const char *filename,
bool has_cpu, int64_t cpu_index, Error **errp)
{
FILE *f;
uint32_t l;
CPUState *cpu;
uint8_t buf[1024];
int64_t orig_addr = addr, orig_size = size;
if (!has_cpu) {
cpu_index = 0;
}
cpu = qemu_get_cpu(cpu_index);
if (cpu == NULL) {
error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
"a CPU number");
return;
}
f = fopen(filename, "wb");
if (!f) {
error_setg_file_open(errp, errno, filename);
return;
}
while (size != 0) {
l = sizeof(buf);
if (l > size)
l = size;
if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
" specified", orig_addr, orig_size);
goto exit;
}
if (fwrite(buf, 1, l, f) != l) {
error_setg(errp, QERR_IO_ERROR);
goto exit;
}
addr += l;
size -= l;
}
exit:
fclose(f);
}
void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
Error **errp)
{
FILE *f;
uint32_t l;
uint8_t buf[1024];
f = fopen(filename, "wb");
if (!f) {
error_setg_file_open(errp, errno, filename);
return;
}
while (size != 0) {
l = sizeof(buf);
if (l > size)
l = size;
cpu_physical_memory_read(addr, buf, l);
if (fwrite(buf, 1, l, f) != l) {
error_setg(errp, QERR_IO_ERROR);
goto exit;
}
addr += l;
size -= l;
}
exit:
fclose(f);
}
void qmp_inject_nmi(Error **errp)
{
nmi_monitor_handle(monitor_get_cpu_index(), errp);
}
void dump_drift_info(FILE *f, fprintf_function cpu_fprintf)
{
if (!use_icount) {
return;
}
cpu_fprintf(f, "Host - Guest clock %"PRIi64" ms\n",
(cpu_get_clock() - cpu_get_icount())/SCALE_MS);
if (icount_align_option) {
cpu_fprintf(f, "Max guest delay %"PRIi64" ms\n", -max_delay/SCALE_MS);
cpu_fprintf(f, "Max guest advance %"PRIi64" ms\n", max_advance/SCALE_MS);
} else {
cpu_fprintf(f, "Max guest delay NA\n");
cpu_fprintf(f, "Max guest advance NA\n");
}
}