cpu: Move icount_decr to CPUNegativeOffsetState

Amusingly, we had already ignored the comment to keep this value
at the end of CPUState.  This restores the minimum negative offset
from TCG_AREG0 for code generation.

For the couple of uses within qom/cpu.c, without NEED_CPU_H, add
a pointer from the CPUState object to the IcountDecr object within
CPUNegativeOffsetState.

Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
Richard Henderson 2019-03-28 11:54:23 -10:00
parent 5b146dc716
commit 5e1401969b
9 changed files with 56 additions and 54 deletions

View File

@ -54,7 +54,7 @@ typedef struct SyncClocks {
#define MAX_DELAY_PRINT_RATE 2000000000LL
#define MAX_NB_PRINTS 100
static void align_clocks(SyncClocks *sc, const CPUState *cpu)
static void align_clocks(SyncClocks *sc, CPUState *cpu)
{
int64_t cpu_icount;
@ -62,7 +62,7 @@ static void align_clocks(SyncClocks *sc, const CPUState *cpu)
return;
}
cpu_icount = cpu->icount_extra + cpu->icount_decr.u16.low;
cpu_icount = cpu->icount_extra + cpu_neg(cpu)->icount_decr.u16.low;
sc->diff_clk += cpu_icount_to_ns(sc->last_cpu_icount - cpu_icount);
sc->last_cpu_icount = cpu_icount;
@ -105,15 +105,15 @@ static void print_delay(const SyncClocks *sc)
}
}
static void init_delay_params(SyncClocks *sc,
const CPUState *cpu)
static void init_delay_params(SyncClocks *sc, CPUState *cpu)
{
if (!icount_align_option) {
return;
}
sc->realtime_clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
sc->diff_clk = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) - sc->realtime_clock;
sc->last_cpu_icount = cpu->icount_extra + cpu->icount_decr.u16.low;
sc->last_cpu_icount
= cpu->icount_extra + cpu_neg(cpu)->icount_decr.u16.low;
if (sc->diff_clk < max_delay) {
max_delay = sc->diff_clk;
}
@ -467,7 +467,7 @@ static inline bool cpu_handle_exception(CPUState *cpu, int *ret)
if (cpu->exception_index < 0) {
#ifndef CONFIG_USER_ONLY
if (replay_has_exception()
&& cpu->icount_decr.u16.low + cpu->icount_extra == 0) {
&& cpu_neg(cpu)->icount_decr.u16.low + cpu->icount_extra == 0) {
/* try to cause an exception pending in the log */
cpu_exec_nocache(cpu, 1, tb_find(cpu, NULL, 0, curr_cflags()), true);
}
@ -525,7 +525,7 @@ static inline bool cpu_handle_interrupt(CPUState *cpu,
* Ensure zeroing happens before reading cpu->exit_request or
* cpu->interrupt_request (see also smp_wmb in cpu_exit())
*/
atomic_mb_set(&cpu->icount_decr.u16.high, 0);
atomic_mb_set(&cpu_neg(cpu)->icount_decr.u16.high, 0);
if (unlikely(atomic_read(&cpu->interrupt_request))) {
int interrupt_request;
@ -596,8 +596,9 @@ static inline bool cpu_handle_interrupt(CPUState *cpu,
}
/* Finally, check if we need to exit to the main loop. */
if (unlikely(atomic_read(&cpu->exit_request)
|| (use_icount && cpu->icount_decr.u16.low + cpu->icount_extra == 0))) {
if (unlikely(atomic_read(&cpu->exit_request))
|| (use_icount
&& cpu_neg(cpu)->icount_decr.u16.low + cpu->icount_extra == 0)) {
atomic_set(&cpu->exit_request, 0);
if (cpu->exception_index == -1) {
cpu->exception_index = EXCP_INTERRUPT;
@ -624,7 +625,7 @@ static inline void cpu_loop_exec_tb(CPUState *cpu, TranslationBlock *tb,
}
*last_tb = NULL;
insns_left = atomic_read(&cpu->icount_decr.u32);
insns_left = atomic_read(&cpu_neg(cpu)->icount_decr.u32);
if (insns_left < 0) {
/* Something asked us to stop executing chained TBs; just
* continue round the main loop. Whatever requested the exit
@ -643,7 +644,7 @@ static inline void cpu_loop_exec_tb(CPUState *cpu, TranslationBlock *tb,
cpu_update_icount(cpu);
/* Refill decrementer and continue execution. */
insns_left = MIN(0xffff, cpu->icount_budget);
cpu->icount_decr.u16.low = insns_left;
cpu_neg(cpu)->icount_decr.u16.low = insns_left;
cpu->icount_extra = cpu->icount_budget - insns_left;
if (!cpu->icount_extra) {
/* Execute any remaining instructions, then let the main loop

View File

@ -28,13 +28,12 @@
#include "sysemu/sysemu.h"
#include "qom/object.h"
#include "qemu-common.h"
#include "qom/cpu.h"
#include "cpu.h"
#include "sysemu/cpus.h"
#include "qemu/main-loop.h"
unsigned long tcg_tb_size;
#ifndef CONFIG_USER_ONLY
/* mask must never be zero, except for A20 change call */
static void tcg_handle_interrupt(CPUState *cpu, int mask)
{
@ -51,7 +50,7 @@ static void tcg_handle_interrupt(CPUState *cpu, int mask)
if (!qemu_cpu_is_self(cpu)) {
qemu_cpu_kick(cpu);
} else {
atomic_set(&cpu->icount_decr.u16.high, -1);
atomic_set(&cpu_neg(cpu)->icount_decr.u16.high, -1);
if (use_icount &&
!cpu->can_do_io
&& (mask & ~old_mask) != 0) {
@ -59,7 +58,6 @@ static void tcg_handle_interrupt(CPUState *cpu, int mask)
}
}
}
#endif
static int tcg_init(MachineState *ms)
{

View File

@ -364,7 +364,7 @@ static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
assert(use_icount);
/* Reset the cycle counter to the start of the block
and shift if to the number of actually executed instructions */
cpu->icount_decr.u16.low += num_insns - i;
cpu_neg(cpu)->icount_decr.u16.low += num_insns - i;
}
restore_state_to_opc(env, tb, data);
@ -2200,7 +2200,7 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
if ((env->hflags & MIPS_HFLAG_BMASK) != 0
&& env->active_tc.PC != tb->pc) {
env->active_tc.PC -= (env->hflags & MIPS_HFLAG_B16 ? 2 : 4);
cpu->icount_decr.u16.low++;
cpu_neg(cpu)->icount_decr.u16.low++;
env->hflags &= ~MIPS_HFLAG_BMASK;
n = 2;
}
@ -2208,7 +2208,7 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0
&& env->pc != tb->pc) {
env->pc -= 2;
cpu->icount_decr.u16.low++;
cpu_neg(cpu)->icount_decr.u16.low++;
env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
n = 2;
}
@ -2382,7 +2382,7 @@ void cpu_interrupt(CPUState *cpu, int mask)
{
g_assert(qemu_mutex_iothread_locked());
cpu->interrupt_request |= mask;
atomic_set(&cpu->icount_decr.u16.high, -1);
atomic_set(&cpu_neg(cpu)->icount_decr.u16.high, -1);
}
/*

9
cpus.c
View File

@ -239,7 +239,8 @@ void qemu_tcg_configure(QemuOpts *opts, Error **errp)
*/
static int64_t cpu_get_icount_executed(CPUState *cpu)
{
return cpu->icount_budget - (cpu->icount_decr.u16.low + cpu->icount_extra);
return (cpu->icount_budget -
(cpu_neg(cpu)->icount_decr.u16.low + cpu->icount_extra));
}
/*
@ -1389,12 +1390,12 @@ static void prepare_icount_for_run(CPUState *cpu)
* each vCPU execution. However u16.high can be raised
* asynchronously by cpu_exit/cpu_interrupt/tcg_handle_interrupt
*/
g_assert(cpu->icount_decr.u16.low == 0);
g_assert(cpu_neg(cpu)->icount_decr.u16.low == 0);
g_assert(cpu->icount_extra == 0);
cpu->icount_budget = tcg_get_icount_limit();
insns_left = MIN(0xffff, cpu->icount_budget);
cpu->icount_decr.u16.low = insns_left;
cpu_neg(cpu)->icount_decr.u16.low = insns_left;
cpu->icount_extra = cpu->icount_budget - insns_left;
replay_mutex_lock();
@ -1408,7 +1409,7 @@ static void process_icount_data(CPUState *cpu)
cpu_update_icount(cpu);
/* Reset the counters */
cpu->icount_decr.u16.low = 0;
cpu_neg(cpu)->icount_decr.u16.low = 0;
cpu->icount_extra = 0;
cpu->icount_budget = 0;

View File

@ -380,6 +380,7 @@ int cpu_exec(CPUState *cpu);
static inline void cpu_set_cpustate_pointers(ArchCPU *cpu)
{
cpu->parent_obj.env_ptr = &cpu->env;
cpu->parent_obj.icount_decr_ptr = &cpu->neg.icount_decr;
}
/**

View File

@ -33,6 +33,7 @@
#include "exec/hwaddr.h"
#endif
#include "exec/memattrs.h"
#include "qom/cpu.h"
#include "cpu-param.h"
@ -232,7 +233,7 @@ typedef struct CPUTLB {
* before CPUArchState, as a field named "neg".
*/
typedef struct CPUNegativeOffsetState {
/* Empty */
IcountDecr icount_decr;
} CPUNegativeOffsetState;
#endif

View File

@ -5,8 +5,6 @@
/* Helpers for instruction counting code generation. */
#define ENV_OFFSET offsetof(ArchCPU, env)
static TCGOp *icount_start_insn;
static inline void gen_tb_start(TranslationBlock *tb)
@ -21,7 +19,8 @@ static inline void gen_tb_start(TranslationBlock *tb)
}
tcg_gen_ld_i32(count, cpu_env,
-ENV_OFFSET + offsetof(CPUState, icount_decr.u32));
offsetof(ArchCPU, neg.icount_decr.u32) -
offsetof(ArchCPU, env));
if (tb_cflags(tb) & CF_USE_ICOUNT) {
imm = tcg_temp_new_i32();
@ -39,7 +38,8 @@ static inline void gen_tb_start(TranslationBlock *tb)
if (tb_cflags(tb) & CF_USE_ICOUNT) {
tcg_gen_st16_i32(count, cpu_env,
-ENV_OFFSET + offsetof(CPUState, icount_decr.u16.low));
offsetof(ArchCPU, neg.icount_decr.u16.low) -
offsetof(ArchCPU, env));
}
tcg_temp_free_i32(count);
@ -60,14 +60,18 @@ static inline void gen_tb_end(TranslationBlock *tb, int num_insns)
static inline void gen_io_start(void)
{
TCGv_i32 tmp = tcg_const_i32(1);
tcg_gen_st_i32(tmp, cpu_env, -ENV_OFFSET + offsetof(CPUState, can_do_io));
tcg_gen_st_i32(tmp, cpu_env,
offsetof(ArchCPU, parent_obj.can_do_io) -
offsetof(ArchCPU, env));
tcg_temp_free_i32(tmp);
}
static inline void gen_io_end(void)
{
TCGv_i32 tmp = tcg_const_i32(0);
tcg_gen_st_i32(tmp, cpu_env, -ENV_OFFSET + offsetof(CPUState, can_do_io));
tcg_gen_st_i32(tmp, cpu_env,
offsetof(ArchCPU, parent_obj.can_do_io) -
offsetof(ArchCPU, env));
tcg_temp_free_i32(tmp);
}

View File

@ -232,17 +232,25 @@ typedef struct CPUClass {
bool gdb_stop_before_watchpoint;
} CPUClass;
/*
* Low 16 bits: number of cycles left, used only in icount mode.
* High 16 bits: Set to -1 to force TCG to stop executing linked TBs
* for this CPU and return to its top level loop (even in non-icount mode).
* This allows a single read-compare-cbranch-write sequence to test
* for both decrementer underflow and exceptions.
*/
typedef union IcountDecr {
uint32_t u32;
struct {
#ifdef HOST_WORDS_BIGENDIAN
typedef struct icount_decr_u16 {
uint16_t high;
uint16_t low;
} icount_decr_u16;
#else
typedef struct icount_decr_u16 {
uint16_t low;
uint16_t high;
} icount_decr_u16;
#endif
} u16;
} IcountDecr;
typedef struct CPUBreakpoint {
vaddr pc;
@ -314,11 +322,6 @@ struct qemu_work_item;
* @crash_occurred: Indicates the OS reported a crash (panic) for this CPU
* @singlestep_enabled: Flags for single-stepping.
* @icount_extra: Instructions until next timer event.
* @icount_decr: Low 16 bits: number of cycles left, only used in icount mode.
* High 16 bits: Set to -1 to force TCG to stop executing linked TBs for this
* CPU and return to its top level loop (even in non-icount mode).
* This allows a single read-compare-cbranch-write sequence to test
* for both decrementer underflow and exceptions.
* @can_do_io: Nonzero if memory-mapped IO is safe. Deterministic execution
* requires that IO only be performed on the last instruction of a TB
* so that interrupts take effect immediately.
@ -328,6 +331,7 @@ struct qemu_work_item;
* @as: Pointer to the first AddressSpace, for the convenience of targets which
* only have a single AddressSpace
* @env_ptr: Pointer to subclass-specific CPUArchState field.
* @icount_decr_ptr: Pointer to IcountDecr field within subclass.
* @gdb_regs: Additional GDB registers.
* @gdb_num_regs: Number of total registers accessible to GDB.
* @gdb_num_g_regs: Number of registers in GDB 'g' packets.
@ -387,6 +391,7 @@ struct CPUState {
MemoryRegion *memory;
void *env_ptr; /* CPUArchState */
IcountDecr *icount_decr_ptr;
/* Accessed in parallel; all accesses must be atomic */
struct TranslationBlock *tb_jmp_cache[TB_JMP_CACHE_SIZE];
@ -441,15 +446,6 @@ struct CPUState {
bool ignore_memory_transaction_failures;
/* Note that this is accessed at the start of every TB via a negative
offset from AREG0. Leave this field at the end so as to make the
(absolute value) offset as small as possible. This reduces code
size, especially for hosts without large memory offsets. */
union {
uint32_t u32;
icount_decr_u16 u16;
} icount_decr;
struct hax_vcpu_state *hax_vcpu;
int hvf_fd;

View File

@ -115,7 +115,7 @@ void cpu_exit(CPUState *cpu)
atomic_set(&cpu->exit_request, 1);
/* Ensure cpu_exec will see the exit request after TCG has exited. */
smp_wmb();
atomic_set(&cpu->icount_decr.u16.high, -1);
atomic_set(&cpu->icount_decr_ptr->u16.high, -1);
}
int cpu_write_elf32_qemunote(WriteCoreDumpFunction f, CPUState *cpu,
@ -264,7 +264,7 @@ static void cpu_common_reset(CPUState *cpu)
cpu->mem_io_pc = 0;
cpu->mem_io_vaddr = 0;
cpu->icount_extra = 0;
atomic_set(&cpu->icount_decr.u32, 0);
atomic_set(&cpu->icount_decr_ptr->u32, 0);
cpu->can_do_io = 1;
cpu->exception_index = -1;
cpu->crash_occurred = false;