Fix tcg constant temp overflow.

Fix running during atomic single-step.
 Partial support for apple silicon.
 Cleanups for accel/tcg.
 -----BEGIN PGP SIGNATURE-----
 
 iQFRBAABCgA7FiEEekgeeIaLTbaoWgXAZN846K9+IV8FAmANt7kdHHJpY2hhcmQu
 aGVuZGVyc29uQGxpbmFyby5vcmcACgkQZN846K9+IV/qwQgApfJEYmEURz2ETsBv
 z1D/lVM6sjVky7Rk0GiCePf9GZhpXuLK0tEDxWPhHp8d466KfvU3iiXLv0xGbFV5
 MBYXPw2uJWLfYNqfi3YGJYSGAdESKJSx27hMMb7RkElbbKqmFGVyPTkZG0VDoiCZ
 hEUMGFXa1e0WjTQchm7AUw8CtATHUTmC2wkVqA+Z+BNqoU2WKbvy6f5QgM2CdWEX
 bFoC/K0o0vIr8OrwS76ZyVDwbGG7s8r9gA2B9a2Nw73/n4q6DdZPwcUIR23blHa/
 A9NmdU4HRNt07nfJN9DQ2qe9utNkoaFj7SpRW2+RJCVkTz5hqbiZTO1c3ZJa0MTi
 QbyMNQ==
 =KGcn
 -----END PGP SIGNATURE-----

Merge remote-tracking branch 'remotes/rth-gitlab/tags/pull-tcg-20210124' into staging

Fix tcg constant temp overflow.
Fix running during atomic single-step.
Partial support for apple silicon.
Cleanups for accel/tcg.

# gpg: Signature made Sun 24 Jan 2021 18:08:57 GMT
# gpg:                using RSA key 7A481E78868B4DB6A85A05C064DF38E8AF7E215F
# gpg:                issuer "richard.henderson@linaro.org"
# gpg: Good signature from "Richard Henderson <richard.henderson@linaro.org>" [full]
# Primary key fingerprint: 7A48 1E78 868B 4DB6 A85A  05C0 64DF 38E8 AF7E 215F

* remotes/rth-gitlab/tags/pull-tcg-20210124:
  tcg: Restart code generation when we run out of temps
  tcg: Toggle page execution for Apple Silicon
  accel/tcg: Restrict cpu_io_recompile() from other accelerators
  accel/tcg: Declare missing cpu_loop_exit*() stubs
  accel/tcg: Restrict tb_gen_code() from other accelerators
  accel/tcg: Move tb_flush_jmp_cache() to cputlb.c
  accel/tcg: Make cpu_gen_init() static
  tcg: Optimize inline dup_const for MO_64
  qemu/compiler: Split out qemu_build_not_reached_always
  tcg: update the cpu running flag in cpu_exec_step_atomic

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Peter Maydell 2021-01-24 19:36:45 +00:00
commit e672f1d397
10 changed files with 120 additions and 36 deletions

View File

@ -28,3 +28,13 @@ void *probe_access(CPUArchState *env, target_ulong addr, int size,
/* Handled by hardware accelerator. */
g_assert_not_reached();
}
void QEMU_NORETURN cpu_loop_exit(CPUState *cpu)
{
g_assert_not_reached();
}
void QEMU_NORETURN cpu_loop_exit_restore(CPUState *cpu, uintptr_t pc)
{
g_assert_not_reached();
}

View File

@ -41,6 +41,7 @@
#include "exec/cpu-all.h"
#include "sysemu/cpu-timers.h"
#include "sysemu/replay.h"
#include "internal.h"
/* -icount align implementation. */
@ -185,6 +186,7 @@ cpu_tb_exec(CPUState *cpu, TranslationBlock *itb, int *tb_exit)
}
#endif /* DEBUG_DISAS */
qemu_thread_jit_execute();
ret = tcg_qemu_tb_exec(env, tb_ptr);
cpu->can_do_io = 1;
/*
@ -285,6 +287,9 @@ void cpu_exec_step_atomic(CPUState *cpu)
if (sigsetjmp(cpu->jmp_env, 0) == 0) {
start_exclusive();
g_assert(cpu == current_cpu);
g_assert(!cpu->running);
cpu->running = true;
tb = tb_lookup__cpu_state(cpu, &pc, &cs_base, &flags, cf_mask);
if (tb == NULL) {
@ -323,6 +328,7 @@ void cpu_exec_step_atomic(CPUState *cpu)
*/
g_assert(cpu_in_exclusive_context(cpu));
parallel_cpus = true;
cpu->running = false;
end_exclusive();
}
@ -405,6 +411,7 @@ static inline void tb_add_jump(TranslationBlock *tb, int n,
{
uintptr_t old;
qemu_thread_jit_write();
assert(n < ARRAY_SIZE(tb->jmp_list_next));
qemu_spin_lock(&tb_next->jmp_lock);

View File

@ -25,6 +25,7 @@
#include "exec/address-spaces.h"
#include "exec/cpu_ldst.h"
#include "exec/cputlb.h"
#include "exec/tb-hash.h"
#include "exec/memory-internal.h"
#include "exec/ram_addr.h"
#include "tcg/tcg.h"
@ -36,6 +37,7 @@
#include "exec/translate-all.h"
#include "trace/trace-root.h"
#include "trace/mem.h"
#include "internal.h"
#ifdef CONFIG_PLUGIN
#include "qemu/plugin-memory.h"
#endif
@ -97,6 +99,23 @@ static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns,
desc->window_max_entries = max_entries;
}
static void tb_jmp_cache_clear_page(CPUState *cpu, target_ulong page_addr)
{
unsigned int i, i0 = tb_jmp_cache_hash_page(page_addr);
for (i = 0; i < TB_JMP_PAGE_SIZE; i++) {
qatomic_set(&cpu->tb_jmp_cache[i0 + i], NULL);
}
}
static void tb_flush_jmp_cache(CPUState *cpu, target_ulong addr)
{
/* Discard jump cache entries for any tb which might potentially
overlap the flushed page. */
tb_jmp_cache_clear_page(cpu, addr - TARGET_PAGE_SIZE);
tb_jmp_cache_clear_page(cpu, addr);
}
/**
* tlb_mmu_resize_locked() - perform TLB resize bookkeeping; resize if necessary
* @desc: The CPUTLBDesc portion of the TLB

20
accel/tcg/internal.h Normal file
View File

@ -0,0 +1,20 @@
/*
* Internal execution defines for qemu
*
* Copyright (c) 2003 Fabrice Bellard
*
* SPDX-License-Identifier: LGPL-2.1-or-later
*/
#ifndef ACCEL_TCG_INTERNAL_H
#define ACCEL_TCG_INTERNAL_H
#include "exec/exec-all.h"
TranslationBlock *tb_gen_code(CPUState *cpu, target_ulong pc,
target_ulong cs_base, uint32_t flags,
int cflags);
void QEMU_NORETURN cpu_io_recompile(CPUState *cpu, uintptr_t retaddr);
#endif /* ACCEL_TCG_INTERNAL_H */

View File

@ -60,6 +60,7 @@
#include "sysemu/cpu-timers.h"
#include "sysemu/tcg.h"
#include "qapi/error.h"
#include "internal.h"
/* #define DEBUG_TB_INVALIDATE */
/* #define DEBUG_TB_FLUSH */
@ -243,7 +244,7 @@ static void page_table_config_init(void)
assert(v_l2_levels >= 0);
}
void cpu_gen_init(void)
static void cpu_gen_init(void)
{
tcg_context_init(&tcg_init_ctx);
}
@ -1669,7 +1670,9 @@ static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list)
static void tb_phys_invalidate__locked(TranslationBlock *tb)
{
qemu_thread_jit_write();
do_tb_phys_invalidate(tb, true);
qemu_thread_jit_execute();
}
/* invalidate one TB
@ -1871,6 +1874,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
#endif
assert_memory_lock();
qemu_thread_jit_write();
phys_pc = get_page_addr_code(env, pc);
@ -1922,11 +1926,17 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
ti = profile_getclock();
#endif
gen_code_size = sigsetjmp(tcg_ctx->jmp_trans, 0);
if (unlikely(gen_code_size != 0)) {
goto error_return;
}
tcg_func_start(tcg_ctx);
tcg_ctx->cpu = env_cpu(env);
gen_intermediate_code(cpu, tb, max_insns);
tcg_ctx->cpu = NULL;
max_insns = tb->icount;
trace_translate_block(tb, tb->pc, tb->tc.ptr);
@ -1951,6 +1961,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
gen_code_size = tcg_gen_code(tcg_ctx, tb);
if (unlikely(gen_code_size < 0)) {
error_return:
switch (gen_code_size) {
case -1:
/*
@ -1962,6 +1973,9 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
* flush the TBs, allocate a new TB, re-initialize it per
* above, and re-do the actual code generation.
*/
qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT,
"Restarting code generation for "
"code_gen_buffer overflow\n");
goto buffer_overflow;
case -2:
@ -1974,9 +1988,12 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
* Try again with half as many insns as we attempted this time.
* If a single insn overflows, there's a bug somewhere...
*/
max_insns = tb->icount;
assert(max_insns > 1);
max_insns /= 2;
qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT,
"Restarting code generation with "
"smaller translation block (max %d insns)\n",
max_insns);
goto tb_overflow;
default:
@ -2461,23 +2478,6 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
cpu_loop_exit_noexc(cpu);
}
static void tb_jmp_cache_clear_page(CPUState *cpu, target_ulong page_addr)
{
unsigned int i, i0 = tb_jmp_cache_hash_page(page_addr);
for (i = 0; i < TB_JMP_PAGE_SIZE; i++) {
qatomic_set(&cpu->tb_jmp_cache[i0 + i], NULL);
}
}
void tb_flush_jmp_cache(CPUState *cpu, target_ulong addr)
{
/* Discard jump cache entries for any tb which might potentially
overlap the flushed page. */
tb_jmp_cache_clear_page(cpu, addr - TARGET_PAGE_SIZE);
tb_jmp_cache_clear_page(cpu, addr);
}
static void print_qht_statistics(struct qht_stats hst)
{
uint32_t hgram_opts;

View File

@ -47,8 +47,6 @@ void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns);
void restore_state_to_opc(CPUArchState *env, TranslationBlock *tb,
target_ulong *data);
void cpu_gen_init(void);
/**
* cpu_restore_state:
* @cpu: the vCPU state is to be restore to
@ -65,12 +63,6 @@ void cpu_gen_init(void);
bool cpu_restore_state(CPUState *cpu, uintptr_t searched_pc, bool will_exit);
void QEMU_NORETURN cpu_loop_exit_noexc(CPUState *cpu);
void QEMU_NORETURN cpu_io_recompile(CPUState *cpu, uintptr_t retaddr);
TranslationBlock *tb_gen_code(CPUState *cpu,
target_ulong pc, target_ulong cs_base,
uint32_t flags,
int cflags);
void QEMU_NORETURN cpu_loop_exit(CPUState *cpu);
void QEMU_NORETURN cpu_loop_exit_restore(CPUState *cpu, uintptr_t pc);
void QEMU_NORETURN cpu_loop_exit_atomic(CPUState *cpu, uintptr_t pc);
@ -665,9 +657,6 @@ tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr,
void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length);
void tlb_set_dirty(CPUState *cpu, target_ulong vaddr);
/* exec.c */
void tb_flush_jmp_cache(CPUState *cpu, target_ulong addr);
MemoryRegionSection *
address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr,
hwaddr *xlat, hwaddr *plen,

View File

@ -215,9 +215,10 @@
* supports QEMU_ERROR, this will be reported at compile time; otherwise
* this will be reported at link time due to the missing symbol.
*/
#if defined(__OPTIMIZE__) && !defined(__NO_INLINE__)
extern void QEMU_NORETURN QEMU_ERROR("code path is reachable")
qemu_build_not_reached(void);
qemu_build_not_reached_always(void);
#if defined(__OPTIMIZE__) && !defined(__NO_INLINE__)
#define qemu_build_not_reached() qemu_build_not_reached_always()
#else
#define qemu_build_not_reached() g_assert_not_reached()
#endif

View File

@ -119,6 +119,10 @@ extern int daemon(int, int);
#include "sysemu/os-posix.h"
#endif
#ifdef __APPLE__
#include <AvailabilityMacros.h>
#endif
#include "glib-compat.h"
#include "qemu/typedefs.h"
@ -682,4 +686,28 @@ char *qemu_get_host_name(Error **errp);
*/
size_t qemu_get_host_physmem(void);
/*
* Toggle write/execute on the pages marked MAP_JIT
* for the current thread.
*/
#if defined(MAC_OS_VERSION_11_0) && \
MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_VERSION_11_0
static inline void qemu_thread_jit_execute(void)
{
if (__builtin_available(macOS 11.0, *)) {
pthread_jit_write_protect_np(true);
}
}
static inline void qemu_thread_jit_write(void)
{
if (__builtin_available(macOS 11.0, *)) {
pthread_jit_write_protect_np(false);
}
}
#else
static inline void qemu_thread_jit_write(void) {}
static inline void qemu_thread_jit_execute(void) {}
#endif
#endif

View File

@ -680,6 +680,9 @@ struct TCGContext {
uint16_t gen_insn_end_off[TCG_MAX_INSNS];
target_ulong gen_insn_data[TCG_MAX_INSNS][TARGET_INSN_START_WORDS];
/* Exit to translator on overflow. */
sigjmp_buf jmp_trans;
};
static inline bool temp_readonly(TCGTemp *ts)
@ -1325,7 +1328,8 @@ uint64_t dup_const(unsigned vece, uint64_t c);
? ( (VECE) == MO_8 ? 0x0101010101010101ull * (uint8_t)(C) \
: (VECE) == MO_16 ? 0x0001000100010001ull * (uint16_t)(C) \
: (VECE) == MO_32 ? 0x0000000100000001ull * (uint32_t)(C) \
: dup_const(VECE, C)) \
: (VECE) == MO_64 ? (uint64_t)(C) \
: (qemu_build_not_reached_always(), 0)) \
: dup_const(VECE, C))

View File

@ -1112,6 +1112,7 @@ void tcg_prologue_init(TCGContext *s)
s->pool_labels = NULL;
#endif
qemu_thread_jit_write();
/* Generate the prologue. */
tcg_target_qemu_prologue(s);
@ -1204,18 +1205,23 @@ void tcg_func_start(TCGContext *s)
QSIMPLEQ_INIT(&s->labels);
}
static inline TCGTemp *tcg_temp_alloc(TCGContext *s)
static TCGTemp *tcg_temp_alloc(TCGContext *s)
{
int n = s->nb_temps++;
tcg_debug_assert(n < TCG_MAX_TEMPS);
if (n >= TCG_MAX_TEMPS) {
/* Signal overflow, starting over with fewer guest insns. */
siglongjmp(s->jmp_trans, -2);
}
return memset(&s->temps[n], 0, sizeof(TCGTemp));
}
static inline TCGTemp *tcg_global_alloc(TCGContext *s)
static TCGTemp *tcg_global_alloc(TCGContext *s)
{
TCGTemp *ts;
tcg_debug_assert(s->nb_globals == s->nb_temps);
tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
s->nb_globals++;
ts = tcg_temp_alloc(s);
ts->kind = TEMP_GLOBAL;