qemu/accel/tcg/translate-all.c
Stefan Hajnoczi 195801d700 system/cpus: rename qemu_mutex_lock_iothread() to bql_lock()
The Big QEMU Lock (BQL) has many names and they are confusing. The
actual QemuMutex variable is called qemu_global_mutex but it's commonly
referred to as the BQL in discussions and some code comments. The
locking APIs, however, are called qemu_mutex_lock_iothread() and
qemu_mutex_unlock_iothread().

The "iothread" name is historic and comes from when the main thread was
split into into KVM vcpu threads and the "iothread" (now called the main
loop thread). I have contributed to the confusion myself by introducing
a separate --object iothread, a separate concept unrelated to the BQL.

The "iothread" name is no longer appropriate for the BQL. Rename the
locking APIs to:
- void bql_lock(void)
- void bql_unlock(void)
- bool bql_locked(void)

There are more APIs with "iothread" in their names. Subsequent patches
will rename them. There are also comments and documentation that will be
updated in later patches.

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Paul Durrant <paul@xen.org>
Acked-by: Fabiano Rosas <farosas@suse.de>
Acked-by: David Woodhouse <dwmw@amazon.co.uk>
Reviewed-by: Cédric Le Goater <clg@kaod.org>
Acked-by: Peter Xu <peterx@redhat.com>
Acked-by: Eric Farman <farman@linux.ibm.com>
Reviewed-by: Harsh Prateek Bora <harshpb@linux.ibm.com>
Acked-by: Hyman Huang <yong.huang@smartx.com>
Reviewed-by: Akihiko Odaki <akihiko.odaki@daynix.com>
Message-id: 20240102153529.486531-2-stefanha@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2024-01-08 10:45:43 -05:00

676 lines
21 KiB
C

/*
* Host code generation
*
* Copyright (c) 2003 Fabrice Bellard
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#include "qemu/osdep.h"
#include "trace.h"
#include "disas/disas.h"
#include "exec/exec-all.h"
#include "tcg/tcg.h"
#if defined(CONFIG_USER_ONLY)
#include "qemu.h"
#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
#include <sys/param.h>
#if __FreeBSD_version >= 700104
#define HAVE_KINFO_GETVMMAP
#define sigqueue sigqueue_freebsd /* avoid redefinition */
#include <sys/proc.h>
#include <machine/profile.h>
#define _KERNEL
#include <sys/user.h>
#undef _KERNEL
#undef sigqueue
#include <libutil.h>
#endif
#endif
#else
#include "exec/ram_addr.h"
#endif
#include "exec/cputlb.h"
#include "exec/translate-all.h"
#include "exec/translator.h"
#include "exec/tb-flush.h"
#include "qemu/bitmap.h"
#include "qemu/qemu-print.h"
#include "qemu/main-loop.h"
#include "qemu/cacheinfo.h"
#include "qemu/timer.h"
#include "exec/log.h"
#include "sysemu/cpus.h"
#include "sysemu/cpu-timers.h"
#include "sysemu/tcg.h"
#include "qapi/error.h"
#include "hw/core/tcg-cpu-ops.h"
#include "tb-jmp-cache.h"
#include "tb-hash.h"
#include "tb-context.h"
#include "internal-common.h"
#include "internal-target.h"
#include "perf.h"
#include "tcg/insn-start-words.h"
TBContext tb_ctx;
/*
* Encode VAL as a signed leb128 sequence at P.
* Return P incremented past the encoded value.
*/
static uint8_t *encode_sleb128(uint8_t *p, int64_t val)
{
int more, byte;
do {
byte = val & 0x7f;
val >>= 7;
more = !((val == 0 && (byte & 0x40) == 0)
|| (val == -1 && (byte & 0x40) != 0));
if (more) {
byte |= 0x80;
}
*p++ = byte;
} while (more);
return p;
}
/*
* Decode a signed leb128 sequence at *PP; increment *PP past the
* decoded value. Return the decoded value.
*/
static int64_t decode_sleb128(const uint8_t **pp)
{
const uint8_t *p = *pp;
int64_t val = 0;
int byte, shift = 0;
do {
byte = *p++;
val |= (int64_t)(byte & 0x7f) << shift;
shift += 7;
} while (byte & 0x80);
if (shift < TARGET_LONG_BITS && (byte & 0x40)) {
val |= -(int64_t)1 << shift;
}
*pp = p;
return val;
}
/* Encode the data collected about the instructions while compiling TB.
Place the data at BLOCK, and return the number of bytes consumed.
The logical table consists of TARGET_INSN_START_WORDS target_ulong's,
which come from the target's insn_start data, followed by a uintptr_t
which comes from the host pc of the end of the code implementing the insn.
Each line of the table is encoded as sleb128 deltas from the previous
line. The seed for the first line is { tb->pc, 0..., tb->tc.ptr }.
That is, the first column is seeded with the guest pc, the last column
with the host pc, and the middle columns with zeros. */
static int encode_search(TranslationBlock *tb, uint8_t *block)
{
uint8_t *highwater = tcg_ctx->code_gen_highwater;
uint64_t *insn_data = tcg_ctx->gen_insn_data;
uint16_t *insn_end_off = tcg_ctx->gen_insn_end_off;
uint8_t *p = block;
int i, j, n;
for (i = 0, n = tb->icount; i < n; ++i) {
uint64_t prev, curr;
for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
if (i == 0) {
prev = (!(tb_cflags(tb) & CF_PCREL) && j == 0 ? tb->pc : 0);
} else {
prev = insn_data[(i - 1) * TARGET_INSN_START_WORDS + j];
}
curr = insn_data[i * TARGET_INSN_START_WORDS + j];
p = encode_sleb128(p, curr - prev);
}
prev = (i == 0 ? 0 : insn_end_off[i - 1]);
curr = insn_end_off[i];
p = encode_sleb128(p, curr - prev);
/* Test for (pending) buffer overflow. The assumption is that any
one row beginning below the high water mark cannot overrun
the buffer completely. Thus we can test for overflow after
encoding a row without having to check during encoding. */
if (unlikely(p > highwater)) {
return -1;
}
}
return p - block;
}
static int cpu_unwind_data_from_tb(TranslationBlock *tb, uintptr_t host_pc,
uint64_t *data)
{
uintptr_t iter_pc = (uintptr_t)tb->tc.ptr;
const uint8_t *p = tb->tc.ptr + tb->tc.size;
int i, j, num_insns = tb->icount;
host_pc -= GETPC_ADJ;
if (host_pc < iter_pc) {
return -1;
}
memset(data, 0, sizeof(uint64_t) * TARGET_INSN_START_WORDS);
if (!(tb_cflags(tb) & CF_PCREL)) {
data[0] = tb->pc;
}
/*
* Reconstruct the stored insn data while looking for the point
* at which the end of the insn exceeds host_pc.
*/
for (i = 0; i < num_insns; ++i) {
for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
data[j] += decode_sleb128(&p);
}
iter_pc += decode_sleb128(&p);
if (iter_pc > host_pc) {
return num_insns - i;
}
}
return -1;
}
/*
* The cpu state corresponding to 'host_pc' is restored in
* preparation for exiting the TB.
*/
void cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
uintptr_t host_pc)
{
uint64_t data[TARGET_INSN_START_WORDS];
int insns_left = cpu_unwind_data_from_tb(tb, host_pc, data);
if (insns_left < 0) {
return;
}
if (tb_cflags(tb) & CF_USE_ICOUNT) {
assert(icount_enabled());
/*
* Reset the cycle counter to the start of the block and
* shift if to the number of actually executed instructions.
*/
cpu->neg.icount_decr.u16.low += insns_left;
}
cpu->cc->tcg_ops->restore_state_to_opc(cpu, tb, data);
}
bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc)
{
/*
* The host_pc has to be in the rx region of the code buffer.
* If it is not we will not be able to resolve it here.
* The two cases where host_pc will not be correct are:
*
* - fault during translation (instruction fetch)
* - fault from helper (not using GETPC() macro)
*
* Either way we need return early as we can't resolve it here.
*/
if (in_code_gen_buffer((const void *)(host_pc - tcg_splitwx_diff))) {
TranslationBlock *tb = tcg_tb_lookup(host_pc);
if (tb) {
cpu_restore_state_from_tb(cpu, tb, host_pc);
return true;
}
}
return false;
}
bool cpu_unwind_state_data(CPUState *cpu, uintptr_t host_pc, uint64_t *data)
{
if (in_code_gen_buffer((const void *)(host_pc - tcg_splitwx_diff))) {
TranslationBlock *tb = tcg_tb_lookup(host_pc);
if (tb) {
return cpu_unwind_data_from_tb(tb, host_pc, data) >= 0;
}
}
return false;
}
void page_init(void)
{
page_size_init();
page_table_config_init();
}
/*
* Isolate the portion of code gen which can setjmp/longjmp.
* Return the size of the generated code, or negative on error.
*/
static int setjmp_gen_code(CPUArchState *env, TranslationBlock *tb,
vaddr pc, void *host_pc,
int *max_insns, int64_t *ti)
{
int ret = sigsetjmp(tcg_ctx->jmp_trans, 0);
if (unlikely(ret != 0)) {
return ret;
}
tcg_func_start(tcg_ctx);
tcg_ctx->cpu = env_cpu(env);
gen_intermediate_code(env_cpu(env), tb, max_insns, pc, host_pc);
assert(tb->size != 0);
tcg_ctx->cpu = NULL;
*max_insns = tb->icount;
return tcg_gen_code(tcg_ctx, tb, pc);
}
/* Called with mmap_lock held for user mode emulation. */
TranslationBlock *tb_gen_code(CPUState *cpu,
vaddr pc, uint64_t cs_base,
uint32_t flags, int cflags)
{
CPUArchState *env = cpu_env(cpu);
TranslationBlock *tb, *existing_tb;
tb_page_addr_t phys_pc, phys_p2;
tcg_insn_unit *gen_code_buf;
int gen_code_size, search_size, max_insns;
int64_t ti;
void *host_pc;
assert_memory_lock();
qemu_thread_jit_write();
phys_pc = get_page_addr_code_hostp(env, pc, &host_pc);
if (phys_pc == -1) {
/* Generate a one-shot TB with 1 insn in it */
cflags = (cflags & ~CF_COUNT_MASK) | 1;
}
max_insns = cflags & CF_COUNT_MASK;
if (max_insns == 0) {
max_insns = TCG_MAX_INSNS;
}
QEMU_BUILD_BUG_ON(CF_COUNT_MASK + 1 != TCG_MAX_INSNS);
buffer_overflow:
assert_no_pages_locked();
tb = tcg_tb_alloc(tcg_ctx);
if (unlikely(!tb)) {
/* flush must be done */
tb_flush(cpu);
mmap_unlock();
/* Make the execution loop process the flush as soon as possible. */
cpu->exception_index = EXCP_INTERRUPT;
cpu_loop_exit(cpu);
}
gen_code_buf = tcg_ctx->code_gen_ptr;
tb->tc.ptr = tcg_splitwx_to_rx(gen_code_buf);
if (!(cflags & CF_PCREL)) {
tb->pc = pc;
}
tb->cs_base = cs_base;
tb->flags = flags;
tb->cflags = cflags;
tb_set_page_addr0(tb, phys_pc);
tb_set_page_addr1(tb, -1);
if (phys_pc != -1) {
tb_lock_page0(phys_pc);
}
tcg_ctx->gen_tb = tb;
tcg_ctx->addr_type = TARGET_LONG_BITS == 32 ? TCG_TYPE_I32 : TCG_TYPE_I64;
#ifdef CONFIG_SOFTMMU
tcg_ctx->page_bits = TARGET_PAGE_BITS;
tcg_ctx->page_mask = TARGET_PAGE_MASK;
tcg_ctx->tlb_dyn_max_bits = CPU_TLB_DYN_MAX_BITS;
#endif
tcg_ctx->insn_start_words = TARGET_INSN_START_WORDS;
#ifdef TCG_GUEST_DEFAULT_MO
tcg_ctx->guest_mo = TCG_GUEST_DEFAULT_MO;
#else
tcg_ctx->guest_mo = TCG_MO_ALL;
#endif
restart_translate:
trace_translate_block(tb, pc, tb->tc.ptr);
gen_code_size = setjmp_gen_code(env, tb, pc, host_pc, &max_insns, &ti);
if (unlikely(gen_code_size < 0)) {
switch (gen_code_size) {
case -1:
/*
* Overflow of code_gen_buffer, or the current slice of it.
*
* TODO: We don't need to re-do gen_intermediate_code, nor
* should we re-do the tcg optimization currently hidden
* inside tcg_gen_code. All that should be required is to
* flush the TBs, allocate a new TB, re-initialize it per
* above, and re-do the actual code generation.
*/
qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT,
"Restarting code generation for "
"code_gen_buffer overflow\n");
tb_unlock_pages(tb);
tcg_ctx->gen_tb = NULL;
goto buffer_overflow;
case -2:
/*
* The code generated for the TranslationBlock is too large.
* The maximum size allowed by the unwind info is 64k.
* There may be stricter constraints from relocations
* in the tcg backend.
*
* Try again with half as many insns as we attempted this time.
* If a single insn overflows, there's a bug somewhere...
*/
assert(max_insns > 1);
max_insns /= 2;
qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT,
"Restarting code generation with "
"smaller translation block (max %d insns)\n",
max_insns);
/*
* The half-sized TB may not cross pages.
* TODO: Fix all targets that cross pages except with
* the first insn, at which point this can't be reached.
*/
phys_p2 = tb_page_addr1(tb);
if (unlikely(phys_p2 != -1)) {
tb_unlock_page1(phys_pc, phys_p2);
tb_set_page_addr1(tb, -1);
}
goto restart_translate;
case -3:
/*
* We had a page lock ordering problem. In order to avoid
* deadlock we had to drop the lock on page0, which means
* that everything we translated so far is compromised.
* Restart with locks held on both pages.
*/
qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT,
"Restarting code generation with re-locked pages");
goto restart_translate;
default:
g_assert_not_reached();
}
}
tcg_ctx->gen_tb = NULL;
search_size = encode_search(tb, (void *)gen_code_buf + gen_code_size);
if (unlikely(search_size < 0)) {
tb_unlock_pages(tb);
goto buffer_overflow;
}
tb->tc.size = gen_code_size;
/*
* For CF_PCREL, attribute all executions of the generated code
* to its first mapping.
*/
perf_report_code(pc, tb, tcg_splitwx_to_rx(gen_code_buf));
if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM) &&
qemu_log_in_addr_range(pc)) {
FILE *logfile = qemu_log_trylock();
if (logfile) {
int code_size, data_size;
const tcg_target_ulong *rx_data_gen_ptr;
size_t chunk_start;
int insn = 0;
if (tcg_ctx->data_gen_ptr) {
rx_data_gen_ptr = tcg_splitwx_to_rx(tcg_ctx->data_gen_ptr);
code_size = (const void *)rx_data_gen_ptr - tb->tc.ptr;
data_size = gen_code_size - code_size;
} else {
rx_data_gen_ptr = 0;
code_size = gen_code_size;
data_size = 0;
}
/* Dump header and the first instruction */
fprintf(logfile, "OUT: [size=%d]\n", gen_code_size);
fprintf(logfile,
" -- guest addr 0x%016" PRIx64 " + tb prologue\n",
tcg_ctx->gen_insn_data[insn * TARGET_INSN_START_WORDS]);
chunk_start = tcg_ctx->gen_insn_end_off[insn];
disas(logfile, tb->tc.ptr, chunk_start);
/*
* Dump each instruction chunk, wrapping up empty chunks into
* the next instruction. The whole array is offset so the
* first entry is the beginning of the 2nd instruction.
*/
while (insn < tb->icount) {
size_t chunk_end = tcg_ctx->gen_insn_end_off[insn];
if (chunk_end > chunk_start) {
fprintf(logfile, " -- guest addr 0x%016" PRIx64 "\n",
tcg_ctx->gen_insn_data[insn * TARGET_INSN_START_WORDS]);
disas(logfile, tb->tc.ptr + chunk_start,
chunk_end - chunk_start);
chunk_start = chunk_end;
}
insn++;
}
if (chunk_start < code_size) {
fprintf(logfile, " -- tb slow paths + alignment\n");
disas(logfile, tb->tc.ptr + chunk_start,
code_size - chunk_start);
}
/* Finally dump any data we may have after the block */
if (data_size) {
int i;
fprintf(logfile, " data: [size=%d]\n", data_size);
for (i = 0; i < data_size / sizeof(tcg_target_ulong); i++) {
if (sizeof(tcg_target_ulong) == 8) {
fprintf(logfile,
"0x%08" PRIxPTR ": .quad 0x%016" TCG_PRIlx "\n",
(uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]);
} else if (sizeof(tcg_target_ulong) == 4) {
fprintf(logfile,
"0x%08" PRIxPTR ": .long 0x%08" TCG_PRIlx "\n",
(uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]);
} else {
qemu_build_not_reached();
}
}
}
fprintf(logfile, "\n");
qemu_log_unlock(logfile);
}
}
qatomic_set(&tcg_ctx->code_gen_ptr, (void *)
ROUND_UP((uintptr_t)gen_code_buf + gen_code_size + search_size,
CODE_GEN_ALIGN));
/* init jump list */
qemu_spin_init(&tb->jmp_lock);
tb->jmp_list_head = (uintptr_t)NULL;
tb->jmp_list_next[0] = (uintptr_t)NULL;
tb->jmp_list_next[1] = (uintptr_t)NULL;
tb->jmp_dest[0] = (uintptr_t)NULL;
tb->jmp_dest[1] = (uintptr_t)NULL;
/* init original jump addresses which have been set during tcg_gen_code() */
if (tb->jmp_reset_offset[0] != TB_JMP_OFFSET_INVALID) {
tb_reset_jump(tb, 0);
}
if (tb->jmp_reset_offset[1] != TB_JMP_OFFSET_INVALID) {
tb_reset_jump(tb, 1);
}
/*
* If the TB is not associated with a physical RAM page then it must be
* a temporary one-insn TB, and we have nothing left to do. Return early
* before attempting to link to other TBs or add to the lookup table.
*/
if (tb_page_addr0(tb) == -1) {
assert_no_pages_locked();
return tb;
}
/*
* Insert TB into the corresponding region tree before publishing it
* through QHT. Otherwise rewinding happened in the TB might fail to
* lookup itself using host PC.
*/
tcg_tb_insert(tb);
/*
* No explicit memory barrier is required -- tb_link_page() makes the
* TB visible in a consistent state.
*/
existing_tb = tb_link_page(tb);
assert_no_pages_locked();
/* if the TB already exists, discard what we just translated */
if (unlikely(existing_tb != tb)) {
uintptr_t orig_aligned = (uintptr_t)gen_code_buf;
orig_aligned -= ROUND_UP(sizeof(*tb), qemu_icache_linesize);
qatomic_set(&tcg_ctx->code_gen_ptr, (void *)orig_aligned);
tcg_tb_remove(tb);
return existing_tb;
}
return tb;
}
/* user-mode: call with mmap_lock held */
void tb_check_watchpoint(CPUState *cpu, uintptr_t retaddr)
{
TranslationBlock *tb;
assert_memory_lock();
tb = tcg_tb_lookup(retaddr);
if (tb) {
/* We can use retranslation to find the PC. */
cpu_restore_state_from_tb(cpu, tb, retaddr);
tb_phys_invalidate(tb, -1);
} else {
/* The exception probably happened in a helper. The CPU state should
have been saved before calling it. Fetch the PC from there. */
CPUArchState *env = cpu_env(cpu);
vaddr pc;
uint64_t cs_base;
tb_page_addr_t addr;
uint32_t flags;
cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
addr = get_page_addr_code(env, pc);
if (addr != -1) {
tb_invalidate_phys_range(addr, addr);
}
}
}
#ifndef CONFIG_USER_ONLY
/*
* In deterministic execution mode, instructions doing device I/Os
* must be at the end of the TB.
*
* Called by softmmu_template.h, with iothread mutex not held.
*/
void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
{
TranslationBlock *tb;
CPUClass *cc;
uint32_t n;
tb = tcg_tb_lookup(retaddr);
if (!tb) {
cpu_abort(cpu, "cpu_io_recompile: could not find TB for pc=%p",
(void *)retaddr);
}
cpu_restore_state_from_tb(cpu, tb, retaddr);
/*
* Some guests must re-execute the branch when re-executing a delay
* slot instruction. When this is the case, adjust icount and N
* to account for the re-execution of the branch.
*/
n = 1;
cc = CPU_GET_CLASS(cpu);
if (cc->tcg_ops->io_recompile_replay_branch &&
cc->tcg_ops->io_recompile_replay_branch(cpu, tb)) {
cpu->neg.icount_decr.u16.low++;
n = 2;
}
/*
* Exit the loop and potentially generate a new TB executing the
* just the I/O insns. We also limit instrumentation to memory
* operations only (which execute after completion) so we don't
* double instrument the instruction.
*/
cpu->cflags_next_tb = curr_cflags(cpu) | CF_MEMI_ONLY | n;
if (qemu_loglevel_mask(CPU_LOG_EXEC)) {
vaddr pc = log_pc(cpu, tb);
if (qemu_log_in_addr_range(pc)) {
qemu_log("cpu_io_recompile: rewound execution of TB to %016"
VADDR_PRIx "\n", pc);
}
}
cpu_loop_exit_noexc(cpu);
}
#else /* CONFIG_USER_ONLY */
void cpu_interrupt(CPUState *cpu, int mask)
{
g_assert(bql_locked());
cpu->interrupt_request |= mask;
qatomic_set(&cpu->neg.icount_decr.u16.high, -1);
}
#endif /* CONFIG_USER_ONLY */
/*
* Called by generic code at e.g. cpu reset after cpu creation,
* therefore we must be prepared to allocate the jump cache.
*/
void tcg_flush_jmp_cache(CPUState *cpu)
{
CPUJumpCache *jc = cpu->tb_jmp_cache;
/* During early initialization, the cache may not yet be allocated. */
if (unlikely(jc == NULL)) {
return;
}
for (int i = 0; i < TB_JMP_CACHE_SIZE; i++) {
qatomic_set(&jc->array[i].tb, NULL);
}
}