0f9668e0c1
Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com> Message-Id: <20220323155743.1585078-33-marcandre.lureau@redhat.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
489 lines
16 KiB
C
489 lines
16 KiB
C
/*
|
|
* QEMU System Emulator
|
|
*
|
|
* Copyright (c) 2003-2008 Fabrice Bellard
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
* of this software and associated documentation files (the "Software"), to deal
|
|
* in the Software without restriction, including without limitation the rights
|
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
* copies of the Software, and to permit persons to whom the Software is
|
|
* furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be included in
|
|
* all copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
* THE SOFTWARE.
|
|
*/
|
|
|
|
#include "qemu/osdep.h"
|
|
#include "qemu/cutils.h"
|
|
#include "migration/vmstate.h"
|
|
#include "qapi/error.h"
|
|
#include "qemu/error-report.h"
|
|
#include "exec/exec-all.h"
|
|
#include "sysemu/cpus.h"
|
|
#include "sysemu/qtest.h"
|
|
#include "qemu/main-loop.h"
|
|
#include "qemu/option.h"
|
|
#include "qemu/seqlock.h"
|
|
#include "sysemu/replay.h"
|
|
#include "sysemu/runstate.h"
|
|
#include "hw/core/cpu.h"
|
|
#include "sysemu/cpu-timers.h"
|
|
#include "sysemu/cpu-throttle.h"
|
|
#include "timers-state.h"
|
|
|
|
/*
|
|
* ICOUNT: Instruction Counter
|
|
*
|
|
* this module is split off from cpu-timers because the icount part
|
|
* is TCG-specific, and does not need to be built for other accels.
|
|
*/
|
|
static bool icount_sleep = true;
|
|
/* Arbitrarily pick 1MIPS as the minimum allowable speed. */
|
|
#define MAX_ICOUNT_SHIFT 10
|
|
|
|
/*
|
|
* 0 = Do not count executed instructions.
|
|
* 1 = Fixed conversion of insn to ns via "shift" option
|
|
* 2 = Runtime adaptive algorithm to compute shift
|
|
*/
|
|
int use_icount;
|
|
|
|
static void icount_enable_precise(void)
|
|
{
|
|
use_icount = 1;
|
|
}
|
|
|
|
static void icount_enable_adaptive(void)
|
|
{
|
|
use_icount = 2;
|
|
}
|
|
|
|
/*
|
|
* The current number of executed instructions is based on what we
|
|
* originally budgeted minus the current state of the decrementing
|
|
* icount counters in extra/u16.low.
|
|
*/
|
|
static int64_t icount_get_executed(CPUState *cpu)
|
|
{
|
|
return (cpu->icount_budget -
|
|
(cpu_neg(cpu)->icount_decr.u16.low + cpu->icount_extra));
|
|
}
|
|
|
|
/*
|
|
* Update the global shared timer_state.qemu_icount to take into
|
|
* account executed instructions. This is done by the TCG vCPU
|
|
* thread so the main-loop can see time has moved forward.
|
|
*/
|
|
static void icount_update_locked(CPUState *cpu)
|
|
{
|
|
int64_t executed = icount_get_executed(cpu);
|
|
cpu->icount_budget -= executed;
|
|
|
|
qatomic_set_i64(&timers_state.qemu_icount,
|
|
timers_state.qemu_icount + executed);
|
|
}
|
|
|
|
/*
|
|
* Update the global shared timer_state.qemu_icount to take into
|
|
* account executed instructions. This is done by the TCG vCPU
|
|
* thread so the main-loop can see time has moved forward.
|
|
*/
|
|
void icount_update(CPUState *cpu)
|
|
{
|
|
seqlock_write_lock(&timers_state.vm_clock_seqlock,
|
|
&timers_state.vm_clock_lock);
|
|
icount_update_locked(cpu);
|
|
seqlock_write_unlock(&timers_state.vm_clock_seqlock,
|
|
&timers_state.vm_clock_lock);
|
|
}
|
|
|
|
static int64_t icount_get_raw_locked(void)
|
|
{
|
|
CPUState *cpu = current_cpu;
|
|
|
|
if (cpu && cpu->running) {
|
|
if (!cpu->can_do_io) {
|
|
error_report("Bad icount read");
|
|
exit(1);
|
|
}
|
|
/* Take into account what has run */
|
|
icount_update_locked(cpu);
|
|
}
|
|
/* The read is protected by the seqlock, but needs atomic64 to avoid UB */
|
|
return qatomic_read_i64(&timers_state.qemu_icount);
|
|
}
|
|
|
|
static int64_t icount_get_locked(void)
|
|
{
|
|
int64_t icount = icount_get_raw_locked();
|
|
return qatomic_read_i64(&timers_state.qemu_icount_bias) +
|
|
icount_to_ns(icount);
|
|
}
|
|
|
|
int64_t icount_get_raw(void)
|
|
{
|
|
int64_t icount;
|
|
unsigned start;
|
|
|
|
do {
|
|
start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
|
|
icount = icount_get_raw_locked();
|
|
} while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
|
|
|
|
return icount;
|
|
}
|
|
|
|
/* Return the virtual CPU time, based on the instruction counter. */
|
|
int64_t icount_get(void)
|
|
{
|
|
int64_t icount;
|
|
unsigned start;
|
|
|
|
do {
|
|
start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
|
|
icount = icount_get_locked();
|
|
} while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
|
|
|
|
return icount;
|
|
}
|
|
|
|
int64_t icount_to_ns(int64_t icount)
|
|
{
|
|
return icount << qatomic_read(&timers_state.icount_time_shift);
|
|
}
|
|
|
|
/*
|
|
* Correlation between real and virtual time is always going to be
|
|
* fairly approximate, so ignore small variation.
|
|
* When the guest is idle real and virtual time will be aligned in
|
|
* the IO wait loop.
|
|
*/
|
|
#define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
|
|
|
|
static void icount_adjust(void)
|
|
{
|
|
int64_t cur_time;
|
|
int64_t cur_icount;
|
|
int64_t delta;
|
|
|
|
/* If the VM is not running, then do nothing. */
|
|
if (!runstate_is_running()) {
|
|
return;
|
|
}
|
|
|
|
seqlock_write_lock(&timers_state.vm_clock_seqlock,
|
|
&timers_state.vm_clock_lock);
|
|
cur_time = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT,
|
|
cpu_get_clock_locked());
|
|
cur_icount = icount_get_locked();
|
|
|
|
delta = cur_icount - cur_time;
|
|
/* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
|
|
if (delta > 0
|
|
&& timers_state.last_delta + ICOUNT_WOBBLE < delta * 2
|
|
&& timers_state.icount_time_shift > 0) {
|
|
/* The guest is getting too far ahead. Slow time down. */
|
|
qatomic_set(&timers_state.icount_time_shift,
|
|
timers_state.icount_time_shift - 1);
|
|
}
|
|
if (delta < 0
|
|
&& timers_state.last_delta - ICOUNT_WOBBLE > delta * 2
|
|
&& timers_state.icount_time_shift < MAX_ICOUNT_SHIFT) {
|
|
/* The guest is getting too far behind. Speed time up. */
|
|
qatomic_set(&timers_state.icount_time_shift,
|
|
timers_state.icount_time_shift + 1);
|
|
}
|
|
timers_state.last_delta = delta;
|
|
qatomic_set_i64(&timers_state.qemu_icount_bias,
|
|
cur_icount - (timers_state.qemu_icount
|
|
<< timers_state.icount_time_shift));
|
|
seqlock_write_unlock(&timers_state.vm_clock_seqlock,
|
|
&timers_state.vm_clock_lock);
|
|
}
|
|
|
|
static void icount_adjust_rt(void *opaque)
|
|
{
|
|
timer_mod(timers_state.icount_rt_timer,
|
|
qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
|
|
icount_adjust();
|
|
}
|
|
|
|
static void icount_adjust_vm(void *opaque)
|
|
{
|
|
timer_mod(timers_state.icount_vm_timer,
|
|
qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
|
|
NANOSECONDS_PER_SECOND / 10);
|
|
icount_adjust();
|
|
}
|
|
|
|
int64_t icount_round(int64_t count)
|
|
{
|
|
int shift = qatomic_read(&timers_state.icount_time_shift);
|
|
return (count + (1 << shift) - 1) >> shift;
|
|
}
|
|
|
|
static void icount_warp_rt(void)
|
|
{
|
|
unsigned seq;
|
|
int64_t warp_start;
|
|
|
|
/*
|
|
* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
|
|
* changes from -1 to another value, so the race here is okay.
|
|
*/
|
|
do {
|
|
seq = seqlock_read_begin(&timers_state.vm_clock_seqlock);
|
|
warp_start = timers_state.vm_clock_warp_start;
|
|
} while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq));
|
|
|
|
if (warp_start == -1) {
|
|
return;
|
|
}
|
|
|
|
seqlock_write_lock(&timers_state.vm_clock_seqlock,
|
|
&timers_state.vm_clock_lock);
|
|
if (runstate_is_running()) {
|
|
int64_t clock = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT,
|
|
cpu_get_clock_locked());
|
|
int64_t warp_delta;
|
|
|
|
warp_delta = clock - timers_state.vm_clock_warp_start;
|
|
if (icount_enabled() == 2) {
|
|
/*
|
|
* In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
|
|
* far ahead of real time.
|
|
*/
|
|
int64_t cur_icount = icount_get_locked();
|
|
int64_t delta = clock - cur_icount;
|
|
warp_delta = MIN(warp_delta, delta);
|
|
}
|
|
qatomic_set_i64(&timers_state.qemu_icount_bias,
|
|
timers_state.qemu_icount_bias + warp_delta);
|
|
}
|
|
timers_state.vm_clock_warp_start = -1;
|
|
seqlock_write_unlock(&timers_state.vm_clock_seqlock,
|
|
&timers_state.vm_clock_lock);
|
|
|
|
if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
|
|
qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
|
|
}
|
|
}
|
|
|
|
static void icount_timer_cb(void *opaque)
|
|
{
|
|
/*
|
|
* No need for a checkpoint because the timer already synchronizes
|
|
* with CHECKPOINT_CLOCK_VIRTUAL_RT.
|
|
*/
|
|
icount_warp_rt();
|
|
}
|
|
|
|
void icount_start_warp_timer(void)
|
|
{
|
|
int64_t clock;
|
|
int64_t deadline;
|
|
|
|
assert(icount_enabled());
|
|
|
|
/*
|
|
* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
|
|
* do not fire, so computing the deadline does not make sense.
|
|
*/
|
|
if (!runstate_is_running()) {
|
|
return;
|
|
}
|
|
|
|
if (replay_mode != REPLAY_MODE_PLAY) {
|
|
if (!all_cpu_threads_idle()) {
|
|
return;
|
|
}
|
|
|
|
if (qtest_enabled()) {
|
|
/* When testing, qtest commands advance icount. */
|
|
return;
|
|
}
|
|
|
|
replay_checkpoint(CHECKPOINT_CLOCK_WARP_START);
|
|
} else {
|
|
/* warp clock deterministically in record/replay mode */
|
|
if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
|
|
/*
|
|
* vCPU is sleeping and warp can't be started.
|
|
* It is probably a race condition: notification sent
|
|
* to vCPU was processed in advance and vCPU went to sleep.
|
|
* Therefore we have to wake it up for doing someting.
|
|
*/
|
|
if (replay_has_checkpoint()) {
|
|
qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
|
|
}
|
|
return;
|
|
}
|
|
}
|
|
|
|
/* We want to use the earliest deadline from ALL vm_clocks */
|
|
clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
|
|
deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
|
|
~QEMU_TIMER_ATTR_EXTERNAL);
|
|
if (deadline < 0) {
|
|
static bool notified;
|
|
if (!icount_sleep && !notified) {
|
|
warn_report("icount sleep disabled and no active timers");
|
|
notified = true;
|
|
}
|
|
return;
|
|
}
|
|
|
|
if (deadline > 0) {
|
|
/*
|
|
* Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
|
|
* sleep. Otherwise, the CPU might be waiting for a future timer
|
|
* interrupt to wake it up, but the interrupt never comes because
|
|
* the vCPU isn't running any insns and thus doesn't advance the
|
|
* QEMU_CLOCK_VIRTUAL.
|
|
*/
|
|
if (!icount_sleep) {
|
|
/*
|
|
* We never let VCPUs sleep in no sleep icount mode.
|
|
* If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
|
|
* to the next QEMU_CLOCK_VIRTUAL event and notify it.
|
|
* It is useful when we want a deterministic execution time,
|
|
* isolated from host latencies.
|
|
*/
|
|
seqlock_write_lock(&timers_state.vm_clock_seqlock,
|
|
&timers_state.vm_clock_lock);
|
|
qatomic_set_i64(&timers_state.qemu_icount_bias,
|
|
timers_state.qemu_icount_bias + deadline);
|
|
seqlock_write_unlock(&timers_state.vm_clock_seqlock,
|
|
&timers_state.vm_clock_lock);
|
|
qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
|
|
} else {
|
|
/*
|
|
* We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
|
|
* "real" time, (related to the time left until the next event) has
|
|
* passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
|
|
* This avoids that the warps are visible externally; for example,
|
|
* you will not be sending network packets continuously instead of
|
|
* every 100ms.
|
|
*/
|
|
seqlock_write_lock(&timers_state.vm_clock_seqlock,
|
|
&timers_state.vm_clock_lock);
|
|
if (timers_state.vm_clock_warp_start == -1
|
|
|| timers_state.vm_clock_warp_start > clock) {
|
|
timers_state.vm_clock_warp_start = clock;
|
|
}
|
|
seqlock_write_unlock(&timers_state.vm_clock_seqlock,
|
|
&timers_state.vm_clock_lock);
|
|
timer_mod_anticipate(timers_state.icount_warp_timer,
|
|
clock + deadline);
|
|
}
|
|
} else if (deadline == 0) {
|
|
qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
|
|
}
|
|
}
|
|
|
|
void icount_account_warp_timer(void)
|
|
{
|
|
if (!icount_sleep) {
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
|
|
* do not fire, so computing the deadline does not make sense.
|
|
*/
|
|
if (!runstate_is_running()) {
|
|
return;
|
|
}
|
|
|
|
/* warp clock deterministically in record/replay mode */
|
|
if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
|
|
return;
|
|
}
|
|
|
|
timer_del(timers_state.icount_warp_timer);
|
|
icount_warp_rt();
|
|
}
|
|
|
|
void icount_configure(QemuOpts *opts, Error **errp)
|
|
{
|
|
const char *option = qemu_opt_get(opts, "shift");
|
|
bool sleep = qemu_opt_get_bool(opts, "sleep", true);
|
|
bool align = qemu_opt_get_bool(opts, "align", false);
|
|
long time_shift = -1;
|
|
|
|
if (!option) {
|
|
if (qemu_opt_get(opts, "align") != NULL) {
|
|
error_setg(errp, "Please specify shift option when using align");
|
|
}
|
|
return;
|
|
}
|
|
|
|
if (align && !sleep) {
|
|
error_setg(errp, "align=on and sleep=off are incompatible");
|
|
return;
|
|
}
|
|
|
|
if (strcmp(option, "auto") != 0) {
|
|
if (qemu_strtol(option, NULL, 0, &time_shift) < 0
|
|
|| time_shift < 0 || time_shift > MAX_ICOUNT_SHIFT) {
|
|
error_setg(errp, "icount: Invalid shift value");
|
|
return;
|
|
}
|
|
} else if (icount_align_option) {
|
|
error_setg(errp, "shift=auto and align=on are incompatible");
|
|
return;
|
|
} else if (!icount_sleep) {
|
|
error_setg(errp, "shift=auto and sleep=off are incompatible");
|
|
return;
|
|
}
|
|
|
|
icount_sleep = sleep;
|
|
if (icount_sleep) {
|
|
timers_state.icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
|
|
icount_timer_cb, NULL);
|
|
}
|
|
|
|
icount_align_option = align;
|
|
|
|
if (time_shift >= 0) {
|
|
timers_state.icount_time_shift = time_shift;
|
|
icount_enable_precise();
|
|
return;
|
|
}
|
|
|
|
icount_enable_adaptive();
|
|
|
|
/*
|
|
* 125MIPS seems a reasonable initial guess at the guest speed.
|
|
* It will be corrected fairly quickly anyway.
|
|
*/
|
|
timers_state.icount_time_shift = 3;
|
|
|
|
/*
|
|
* Have both realtime and virtual time triggers for speed adjustment.
|
|
* The realtime trigger catches emulated time passing too slowly,
|
|
* the virtual time trigger catches emulated time passing too fast.
|
|
* Realtime triggers occur even when idle, so use them less frequently
|
|
* than VM triggers.
|
|
*/
|
|
timers_state.vm_clock_warp_start = -1;
|
|
timers_state.icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
|
|
icount_adjust_rt, NULL);
|
|
timer_mod(timers_state.icount_rt_timer,
|
|
qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
|
|
timers_state.icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
|
|
icount_adjust_vm, NULL);
|
|
timer_mod(timers_state.icount_vm_timer,
|
|
qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
|
|
NANOSECONDS_PER_SECOND / 10);
|
|
}
|