/* * Host code generation * * Copyright (c) 2003 Fabrice Bellard * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, see . */ #include "qemu/osdep.h" #define NO_CPU_IO_DEFS #include "trace.h" #include "disas/disas.h" #include "exec/exec-all.h" #include "tcg/tcg.h" #if defined(CONFIG_USER_ONLY) #include "qemu.h" #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) #include #if __FreeBSD_version >= 700104 #define HAVE_KINFO_GETVMMAP #define sigqueue sigqueue_freebsd /* avoid redefinition */ #include #include #define _KERNEL #include #undef _KERNEL #undef sigqueue #include #endif #endif #else #include "exec/ram_addr.h" #endif #include "exec/cputlb.h" #include "exec/translate-all.h" #include "exec/translator.h" #include "qemu/bitmap.h" #include "qemu/qemu-print.h" #include "qemu/timer.h" #include "qemu/main-loop.h" #include "qemu/cacheinfo.h" #include "exec/log.h" #include "sysemu/cpus.h" #include "sysemu/cpu-timers.h" #include "sysemu/tcg.h" #include "qapi/error.h" #include "hw/core/tcg-cpu-ops.h" #include "tb-jmp-cache.h" #include "tb-hash.h" #include "tb-context.h" #include "internal.h" /* make various TB consistency checks */ /** * struct page_entry - page descriptor entry * @pd: pointer to the &struct PageDesc of the page this entry represents * @index: page index of the page * @locked: whether the page is locked * * This struct helps us keep track of the locked state of a page, without * bloating &struct PageDesc. * * A page lock protects accesses to all fields of &struct PageDesc. * * See also: &struct page_collection. */ struct page_entry { PageDesc *pd; tb_page_addr_t index; bool locked; }; /** * struct page_collection - tracks a set of pages (i.e. &struct page_entry's) * @tree: Binary search tree (BST) of the pages, with key == page index * @max: Pointer to the page in @tree with the highest page index * * To avoid deadlock we lock pages in ascending order of page index. * When operating on a set of pages, we need to keep track of them so that * we can lock them in order and also unlock them later. For this we collect * pages (i.e. &struct page_entry's) in a binary search @tree. Given that the * @tree implementation we use does not provide an O(1) operation to obtain the * highest-ranked element, we use @max to keep track of the inserted page * with the highest index. This is valuable because if a page is not in * the tree and its index is higher than @max's, then we can lock it * without breaking the locking order rule. * * Note on naming: 'struct page_set' would be shorter, but we already have a few * page_set_*() helpers, so page_collection is used instead to avoid confusion. * * See also: page_collection_lock(). */ struct page_collection { GTree *tree; struct page_entry *max; }; /* * In system mode we want L1_MAP to be based on ram offsets, * while in user mode we want it to be based on virtual addresses. * * TODO: For user mode, see the caveat re host vs guest virtual * address spaces near GUEST_ADDR_MAX. */ #if !defined(CONFIG_USER_ONLY) #if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS # define L1_MAP_ADDR_SPACE_BITS HOST_LONG_BITS #else # define L1_MAP_ADDR_SPACE_BITS TARGET_PHYS_ADDR_SPACE_BITS #endif #else # define L1_MAP_ADDR_SPACE_BITS MIN(HOST_LONG_BITS, TARGET_ABI_BITS) #endif /* Make sure all possible CPU event bits fit in tb->trace_vcpu_dstate */ QEMU_BUILD_BUG_ON(CPU_TRACE_DSTATE_MAX_EVENTS > sizeof_field(TranslationBlock, trace_vcpu_dstate) * BITS_PER_BYTE); /* * L1 Mapping properties */ int v_l1_size; int v_l1_shift; int v_l2_levels; void *l1_map[V_L1_MAX_SIZE]; TBContext tb_ctx; static void page_table_config_init(void) { uint32_t v_l1_bits; assert(TARGET_PAGE_BITS); /* The bits remaining after N lower levels of page tables. */ v_l1_bits = (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % V_L2_BITS; if (v_l1_bits < V_L1_MIN_BITS) { v_l1_bits += V_L2_BITS; } v_l1_size = 1 << v_l1_bits; v_l1_shift = L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - v_l1_bits; v_l2_levels = v_l1_shift / V_L2_BITS - 1; assert(v_l1_bits <= V_L1_MAX_BITS); assert(v_l1_shift % V_L2_BITS == 0); assert(v_l2_levels >= 0); } /* Encode VAL as a signed leb128 sequence at P. Return P incremented past the encoded value. */ static uint8_t *encode_sleb128(uint8_t *p, target_long val) { int more, byte; do { byte = val & 0x7f; val >>= 7; more = !((val == 0 && (byte & 0x40) == 0) || (val == -1 && (byte & 0x40) != 0)); if (more) { byte |= 0x80; } *p++ = byte; } while (more); return p; } /* Decode a signed leb128 sequence at *PP; increment *PP past the decoded value. Return the decoded value. */ static target_long decode_sleb128(const uint8_t **pp) { const uint8_t *p = *pp; target_long val = 0; int byte, shift = 0; do { byte = *p++; val |= (target_ulong)(byte & 0x7f) << shift; shift += 7; } while (byte & 0x80); if (shift < TARGET_LONG_BITS && (byte & 0x40)) { val |= -(target_ulong)1 << shift; } *pp = p; return val; } /* Encode the data collected about the instructions while compiling TB. Place the data at BLOCK, and return the number of bytes consumed. The logical table consists of TARGET_INSN_START_WORDS target_ulong's, which come from the target's insn_start data, followed by a uintptr_t which comes from the host pc of the end of the code implementing the insn. Each line of the table is encoded as sleb128 deltas from the previous line. The seed for the first line is { tb->pc, 0..., tb->tc.ptr }. That is, the first column is seeded with the guest pc, the last column with the host pc, and the middle columns with zeros. */ static int encode_search(TranslationBlock *tb, uint8_t *block) { uint8_t *highwater = tcg_ctx->code_gen_highwater; uint8_t *p = block; int i, j, n; for (i = 0, n = tb->icount; i < n; ++i) { target_ulong prev; for (j = 0; j < TARGET_INSN_START_WORDS; ++j) { if (i == 0) { prev = (!TARGET_TB_PCREL && j == 0 ? tb_pc(tb) : 0); } else { prev = tcg_ctx->gen_insn_data[i - 1][j]; } p = encode_sleb128(p, tcg_ctx->gen_insn_data[i][j] - prev); } prev = (i == 0 ? 0 : tcg_ctx->gen_insn_end_off[i - 1]); p = encode_sleb128(p, tcg_ctx->gen_insn_end_off[i] - prev); /* Test for (pending) buffer overflow. The assumption is that any one row beginning below the high water mark cannot overrun the buffer completely. Thus we can test for overflow after encoding a row without having to check during encoding. */ if (unlikely(p > highwater)) { return -1; } } return p - block; } static int cpu_unwind_data_from_tb(TranslationBlock *tb, uintptr_t host_pc, uint64_t *data) { uintptr_t iter_pc = (uintptr_t)tb->tc.ptr; const uint8_t *p = tb->tc.ptr + tb->tc.size; int i, j, num_insns = tb->icount; host_pc -= GETPC_ADJ; if (host_pc < iter_pc) { return -1; } memset(data, 0, sizeof(uint64_t) * TARGET_INSN_START_WORDS); if (!TARGET_TB_PCREL) { data[0] = tb_pc(tb); } /* * Reconstruct the stored insn data while looking for the point * at which the end of the insn exceeds host_pc. */ for (i = 0; i < num_insns; ++i) { for (j = 0; j < TARGET_INSN_START_WORDS; ++j) { data[j] += decode_sleb128(&p); } iter_pc += decode_sleb128(&p); if (iter_pc > host_pc) { return num_insns - i; } } return -1; } /* * The cpu state corresponding to 'host_pc' is restored in * preparation for exiting the TB. */ void cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb, uintptr_t host_pc) { uint64_t data[TARGET_INSN_START_WORDS]; #ifdef CONFIG_PROFILER TCGProfile *prof = &tcg_ctx->prof; int64_t ti = profile_getclock(); #endif int insns_left = cpu_unwind_data_from_tb(tb, host_pc, data); if (insns_left < 0) { return; } if (tb_cflags(tb) & CF_USE_ICOUNT) { assert(icount_enabled()); /* * Reset the cycle counter to the start of the block and * shift if to the number of actually executed instructions. */ cpu_neg(cpu)->icount_decr.u16.low += insns_left; } cpu->cc->tcg_ops->restore_state_to_opc(cpu, tb, data); #ifdef CONFIG_PROFILER qatomic_set(&prof->restore_time, prof->restore_time + profile_getclock() - ti); qatomic_set(&prof->restore_count, prof->restore_count + 1); #endif } bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc) { /* * The host_pc has to be in the rx region of the code buffer. * If it is not we will not be able to resolve it here. * The two cases where host_pc will not be correct are: * * - fault during translation (instruction fetch) * - fault from helper (not using GETPC() macro) * * Either way we need return early as we can't resolve it here. */ if (in_code_gen_buffer((const void *)(host_pc - tcg_splitwx_diff))) { TranslationBlock *tb = tcg_tb_lookup(host_pc); if (tb) { cpu_restore_state_from_tb(cpu, tb, host_pc); return true; } } return false; } bool cpu_unwind_state_data(CPUState *cpu, uintptr_t host_pc, uint64_t *data) { if (in_code_gen_buffer((const void *)(host_pc - tcg_splitwx_diff))) { TranslationBlock *tb = tcg_tb_lookup(host_pc); if (tb) { return cpu_unwind_data_from_tb(tb, host_pc, data) >= 0; } } return false; } void page_init(void) { page_size_init(); page_table_config_init(); #if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY) { #ifdef HAVE_KINFO_GETVMMAP struct kinfo_vmentry *freep; int i, cnt; freep = kinfo_getvmmap(getpid(), &cnt); if (freep) { mmap_lock(); for (i = 0; i < cnt; i++) { unsigned long startaddr, endaddr; startaddr = freep[i].kve_start; endaddr = freep[i].kve_end; if (h2g_valid(startaddr)) { startaddr = h2g(startaddr) & TARGET_PAGE_MASK; if (h2g_valid(endaddr)) { endaddr = h2g(endaddr); page_set_flags(startaddr, endaddr, PAGE_RESERVED); } else { #if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS endaddr = ~0ul; page_set_flags(startaddr, endaddr, PAGE_RESERVED); #endif } } } free(freep); mmap_unlock(); } #else FILE *f; last_brk = (unsigned long)sbrk(0); f = fopen("/compat/linux/proc/self/maps", "r"); if (f) { mmap_lock(); do { unsigned long startaddr, endaddr; int n; n = fscanf(f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr); if (n == 2 && h2g_valid(startaddr)) { startaddr = h2g(startaddr) & TARGET_PAGE_MASK; if (h2g_valid(endaddr)) { endaddr = h2g(endaddr); } else { endaddr = ~0ul; } page_set_flags(startaddr, endaddr, PAGE_RESERVED); } } while (!feof(f)); fclose(f); mmap_unlock(); } #endif } #endif } PageDesc *page_find_alloc(tb_page_addr_t index, bool alloc) { PageDesc *pd; void **lp; int i; /* Level 1. Always allocated. */ lp = l1_map + ((index >> v_l1_shift) & (v_l1_size - 1)); /* Level 2..N-1. */ for (i = v_l2_levels; i > 0; i--) { void **p = qatomic_rcu_read(lp); if (p == NULL) { void *existing; if (!alloc) { return NULL; } p = g_new0(void *, V_L2_SIZE); existing = qatomic_cmpxchg(lp, NULL, p); if (unlikely(existing)) { g_free(p); p = existing; } } lp = p + ((index >> (i * V_L2_BITS)) & (V_L2_SIZE - 1)); } pd = qatomic_rcu_read(lp); if (pd == NULL) { void *existing; if (!alloc) { return NULL; } pd = g_new0(PageDesc, V_L2_SIZE); #ifndef CONFIG_USER_ONLY { int i; for (i = 0; i < V_L2_SIZE; i++) { qemu_spin_init(&pd[i].lock); } } #endif existing = qatomic_cmpxchg(lp, NULL, pd); if (unlikely(existing)) { #ifndef CONFIG_USER_ONLY { int i; for (i = 0; i < V_L2_SIZE; i++) { qemu_spin_destroy(&pd[i].lock); } } #endif g_free(pd); pd = existing; } } return pd + (index & (V_L2_SIZE - 1)); } /* In user-mode page locks aren't used; mmap_lock is enough */ #ifdef CONFIG_USER_ONLY struct page_collection * page_collection_lock(tb_page_addr_t start, tb_page_addr_t end) { return NULL; } void page_collection_unlock(struct page_collection *set) { } #else /* !CONFIG_USER_ONLY */ #ifdef CONFIG_DEBUG_TCG static __thread GHashTable *ht_pages_locked_debug; static void ht_pages_locked_debug_init(void) { if (ht_pages_locked_debug) { return; } ht_pages_locked_debug = g_hash_table_new(NULL, NULL); } static bool page_is_locked(const PageDesc *pd) { PageDesc *found; ht_pages_locked_debug_init(); found = g_hash_table_lookup(ht_pages_locked_debug, pd); return !!found; } static void page_lock__debug(PageDesc *pd) { ht_pages_locked_debug_init(); g_assert(!page_is_locked(pd)); g_hash_table_insert(ht_pages_locked_debug, pd, pd); } static void page_unlock__debug(const PageDesc *pd) { bool removed; ht_pages_locked_debug_init(); g_assert(page_is_locked(pd)); removed = g_hash_table_remove(ht_pages_locked_debug, pd); g_assert(removed); } void do_assert_page_locked(const PageDesc *pd, const char *file, int line) { if (unlikely(!page_is_locked(pd))) { error_report("assert_page_lock: PageDesc %p not locked @ %s:%d", pd, file, line); abort(); } } void assert_no_pages_locked(void) { ht_pages_locked_debug_init(); g_assert(g_hash_table_size(ht_pages_locked_debug) == 0); } #else /* !CONFIG_DEBUG_TCG */ static inline void page_lock__debug(const PageDesc *pd) { } static inline void page_unlock__debug(const PageDesc *pd) { } #endif /* CONFIG_DEBUG_TCG */ void page_lock(PageDesc *pd) { page_lock__debug(pd); qemu_spin_lock(&pd->lock); } void page_unlock(PageDesc *pd) { qemu_spin_unlock(&pd->lock); page_unlock__debug(pd); } static inline struct page_entry * page_entry_new(PageDesc *pd, tb_page_addr_t index) { struct page_entry *pe = g_malloc(sizeof(*pe)); pe->index = index; pe->pd = pd; pe->locked = false; return pe; } static void page_entry_destroy(gpointer p) { struct page_entry *pe = p; g_assert(pe->locked); page_unlock(pe->pd); g_free(pe); } /* returns false on success */ static bool page_entry_trylock(struct page_entry *pe) { bool busy; busy = qemu_spin_trylock(&pe->pd->lock); if (!busy) { g_assert(!pe->locked); pe->locked = true; page_lock__debug(pe->pd); } return busy; } static void do_page_entry_lock(struct page_entry *pe) { page_lock(pe->pd); g_assert(!pe->locked); pe->locked = true; } static gboolean page_entry_lock(gpointer key, gpointer value, gpointer data) { struct page_entry *pe = value; do_page_entry_lock(pe); return FALSE; } static gboolean page_entry_unlock(gpointer key, gpointer value, gpointer data) { struct page_entry *pe = value; if (pe->locked) { pe->locked = false; page_unlock(pe->pd); } return FALSE; } /* * Trylock a page, and if successful, add the page to a collection. * Returns true ("busy") if the page could not be locked; false otherwise. */ static bool page_trylock_add(struct page_collection *set, tb_page_addr_t addr) { tb_page_addr_t index = addr >> TARGET_PAGE_BITS; struct page_entry *pe; PageDesc *pd; pe = g_tree_lookup(set->tree, &index); if (pe) { return false; } pd = page_find(index); if (pd == NULL) { return false; } pe = page_entry_new(pd, index); g_tree_insert(set->tree, &pe->index, pe); /* * If this is either (1) the first insertion or (2) a page whose index * is higher than any other so far, just lock the page and move on. */ if (set->max == NULL || pe->index > set->max->index) { set->max = pe; do_page_entry_lock(pe); return false; } /* * Try to acquire out-of-order lock; if busy, return busy so that we acquire * locks in order. */ return page_entry_trylock(pe); } static gint tb_page_addr_cmp(gconstpointer ap, gconstpointer bp, gpointer udata) { tb_page_addr_t a = *(const tb_page_addr_t *)ap; tb_page_addr_t b = *(const tb_page_addr_t *)bp; if (a == b) { return 0; } else if (a < b) { return -1; } return 1; } /* * Lock a range of pages ([@start,@end[) as well as the pages of all * intersecting TBs. * Locking order: acquire locks in ascending order of page index. */ struct page_collection * page_collection_lock(tb_page_addr_t start, tb_page_addr_t end) { struct page_collection *set = g_malloc(sizeof(*set)); tb_page_addr_t index; PageDesc *pd; start >>= TARGET_PAGE_BITS; end >>= TARGET_PAGE_BITS; g_assert(start <= end); set->tree = g_tree_new_full(tb_page_addr_cmp, NULL, NULL, page_entry_destroy); set->max = NULL; assert_no_pages_locked(); retry: g_tree_foreach(set->tree, page_entry_lock, NULL); for (index = start; index <= end; index++) { TranslationBlock *tb; int n; pd = page_find(index); if (pd == NULL) { continue; } if (page_trylock_add(set, index << TARGET_PAGE_BITS)) { g_tree_foreach(set->tree, page_entry_unlock, NULL); goto retry; } assert_page_locked(pd); PAGE_FOR_EACH_TB(pd, tb, n) { if (page_trylock_add(set, tb_page_addr0(tb)) || (tb_page_addr1(tb) != -1 && page_trylock_add(set, tb_page_addr1(tb)))) { /* drop all locks, and reacquire in order */ g_tree_foreach(set->tree, page_entry_unlock, NULL); goto retry; } } } return set; } void page_collection_unlock(struct page_collection *set) { /* entries are unlocked and freed via page_entry_destroy */ g_tree_destroy(set->tree); g_free(set); } #endif /* !CONFIG_USER_ONLY */ /* Called with mmap_lock held for user mode emulation. */ TranslationBlock *tb_gen_code(CPUState *cpu, target_ulong pc, target_ulong cs_base, uint32_t flags, int cflags) { CPUArchState *env = cpu->env_ptr; TranslationBlock *tb, *existing_tb; tb_page_addr_t phys_pc; tcg_insn_unit *gen_code_buf; int gen_code_size, search_size, max_insns; #ifdef CONFIG_PROFILER TCGProfile *prof = &tcg_ctx->prof; int64_t ti; #endif void *host_pc; assert_memory_lock(); qemu_thread_jit_write(); phys_pc = get_page_addr_code_hostp(env, pc, &host_pc); if (phys_pc == -1) { /* Generate a one-shot TB with 1 insn in it */ cflags = (cflags & ~CF_COUNT_MASK) | CF_LAST_IO | 1; } max_insns = cflags & CF_COUNT_MASK; if (max_insns == 0) { max_insns = TCG_MAX_INSNS; } QEMU_BUILD_BUG_ON(CF_COUNT_MASK + 1 != TCG_MAX_INSNS); buffer_overflow: tb = tcg_tb_alloc(tcg_ctx); if (unlikely(!tb)) { /* flush must be done */ tb_flush(cpu); mmap_unlock(); /* Make the execution loop process the flush as soon as possible. */ cpu->exception_index = EXCP_INTERRUPT; cpu_loop_exit(cpu); } gen_code_buf = tcg_ctx->code_gen_ptr; tb->tc.ptr = tcg_splitwx_to_rx(gen_code_buf); #if !TARGET_TB_PCREL tb->pc = pc; #endif tb->cs_base = cs_base; tb->flags = flags; tb->cflags = cflags; tb->trace_vcpu_dstate = *cpu->trace_dstate; tb_set_page_addr0(tb, phys_pc); tb_set_page_addr1(tb, -1); tcg_ctx->tb_cflags = cflags; tb_overflow: #ifdef CONFIG_PROFILER /* includes aborted translations because of exceptions */ qatomic_set(&prof->tb_count1, prof->tb_count1 + 1); ti = profile_getclock(); #endif gen_code_size = sigsetjmp(tcg_ctx->jmp_trans, 0); if (unlikely(gen_code_size != 0)) { goto error_return; } tcg_func_start(tcg_ctx); tcg_ctx->cpu = env_cpu(env); gen_intermediate_code(cpu, tb, max_insns, pc, host_pc); assert(tb->size != 0); tcg_ctx->cpu = NULL; max_insns = tb->icount; trace_translate_block(tb, pc, tb->tc.ptr); /* generate machine code */ #ifdef CONFIG_PROFILER qatomic_set(&prof->tb_count, prof->tb_count + 1); qatomic_set(&prof->interm_time, prof->interm_time + profile_getclock() - ti); ti = profile_getclock(); #endif gen_code_size = tcg_gen_code(tcg_ctx, tb, pc); if (unlikely(gen_code_size < 0)) { error_return: switch (gen_code_size) { case -1: /* * Overflow of code_gen_buffer, or the current slice of it. * * TODO: We don't need to re-do gen_intermediate_code, nor * should we re-do the tcg optimization currently hidden * inside tcg_gen_code. All that should be required is to * flush the TBs, allocate a new TB, re-initialize it per * above, and re-do the actual code generation. */ qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT, "Restarting code generation for " "code_gen_buffer overflow\n"); goto buffer_overflow; case -2: /* * The code generated for the TranslationBlock is too large. * The maximum size allowed by the unwind info is 64k. * There may be stricter constraints from relocations * in the tcg backend. * * Try again with half as many insns as we attempted this time. * If a single insn overflows, there's a bug somewhere... */ assert(max_insns > 1); max_insns /= 2; qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT, "Restarting code generation with " "smaller translation block (max %d insns)\n", max_insns); goto tb_overflow; default: g_assert_not_reached(); } } search_size = encode_search(tb, (void *)gen_code_buf + gen_code_size); if (unlikely(search_size < 0)) { goto buffer_overflow; } tb->tc.size = gen_code_size; #ifdef CONFIG_PROFILER qatomic_set(&prof->code_time, prof->code_time + profile_getclock() - ti); qatomic_set(&prof->code_in_len, prof->code_in_len + tb->size); qatomic_set(&prof->code_out_len, prof->code_out_len + gen_code_size); qatomic_set(&prof->search_out_len, prof->search_out_len + search_size); #endif #ifdef DEBUG_DISAS if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM) && qemu_log_in_addr_range(pc)) { FILE *logfile = qemu_log_trylock(); if (logfile) { int code_size, data_size; const tcg_target_ulong *rx_data_gen_ptr; size_t chunk_start; int insn = 0; if (tcg_ctx->data_gen_ptr) { rx_data_gen_ptr = tcg_splitwx_to_rx(tcg_ctx->data_gen_ptr); code_size = (const void *)rx_data_gen_ptr - tb->tc.ptr; data_size = gen_code_size - code_size; } else { rx_data_gen_ptr = 0; code_size = gen_code_size; data_size = 0; } /* Dump header and the first instruction */ fprintf(logfile, "OUT: [size=%d]\n", gen_code_size); fprintf(logfile, " -- guest addr 0x" TARGET_FMT_lx " + tb prologue\n", tcg_ctx->gen_insn_data[insn][0]); chunk_start = tcg_ctx->gen_insn_end_off[insn]; disas(logfile, tb->tc.ptr, chunk_start); /* * Dump each instruction chunk, wrapping up empty chunks into * the next instruction. The whole array is offset so the * first entry is the beginning of the 2nd instruction. */ while (insn < tb->icount) { size_t chunk_end = tcg_ctx->gen_insn_end_off[insn]; if (chunk_end > chunk_start) { fprintf(logfile, " -- guest addr 0x" TARGET_FMT_lx "\n", tcg_ctx->gen_insn_data[insn][0]); disas(logfile, tb->tc.ptr + chunk_start, chunk_end - chunk_start); chunk_start = chunk_end; } insn++; } if (chunk_start < code_size) { fprintf(logfile, " -- tb slow paths + alignment\n"); disas(logfile, tb->tc.ptr + chunk_start, code_size - chunk_start); } /* Finally dump any data we may have after the block */ if (data_size) { int i; fprintf(logfile, " data: [size=%d]\n", data_size); for (i = 0; i < data_size / sizeof(tcg_target_ulong); i++) { if (sizeof(tcg_target_ulong) == 8) { fprintf(logfile, "0x%08" PRIxPTR ": .quad 0x%016" TCG_PRIlx "\n", (uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]); } else if (sizeof(tcg_target_ulong) == 4) { fprintf(logfile, "0x%08" PRIxPTR ": .long 0x%08" TCG_PRIlx "\n", (uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]); } else { qemu_build_not_reached(); } } } fprintf(logfile, "\n"); qemu_log_unlock(logfile); } } #endif qatomic_set(&tcg_ctx->code_gen_ptr, (void *) ROUND_UP((uintptr_t)gen_code_buf + gen_code_size + search_size, CODE_GEN_ALIGN)); /* init jump list */ qemu_spin_init(&tb->jmp_lock); tb->jmp_list_head = (uintptr_t)NULL; tb->jmp_list_next[0] = (uintptr_t)NULL; tb->jmp_list_next[1] = (uintptr_t)NULL; tb->jmp_dest[0] = (uintptr_t)NULL; tb->jmp_dest[1] = (uintptr_t)NULL; /* init original jump addresses which have been set during tcg_gen_code() */ if (tb->jmp_reset_offset[0] != TB_JMP_RESET_OFFSET_INVALID) { tb_reset_jump(tb, 0); } if (tb->jmp_reset_offset[1] != TB_JMP_RESET_OFFSET_INVALID) { tb_reset_jump(tb, 1); } /* * If the TB is not associated with a physical RAM page then it must be * a temporary one-insn TB, and we have nothing left to do. Return early * before attempting to link to other TBs or add to the lookup table. */ if (tb_page_addr0(tb) == -1) { return tb; } /* * Insert TB into the corresponding region tree before publishing it * through QHT. Otherwise rewinding happened in the TB might fail to * lookup itself using host PC. */ tcg_tb_insert(tb); /* * No explicit memory barrier is required -- tb_link_page() makes the * TB visible in a consistent state. */ existing_tb = tb_link_page(tb, tb_page_addr0(tb), tb_page_addr1(tb)); /* if the TB already exists, discard what we just translated */ if (unlikely(existing_tb != tb)) { uintptr_t orig_aligned = (uintptr_t)gen_code_buf; orig_aligned -= ROUND_UP(sizeof(*tb), qemu_icache_linesize); qatomic_set(&tcg_ctx->code_gen_ptr, (void *)orig_aligned); tcg_tb_remove(tb); return existing_tb; } return tb; } /* user-mode: call with mmap_lock held */ void tb_check_watchpoint(CPUState *cpu, uintptr_t retaddr) { TranslationBlock *tb; assert_memory_lock(); tb = tcg_tb_lookup(retaddr); if (tb) { /* We can use retranslation to find the PC. */ cpu_restore_state_from_tb(cpu, tb, retaddr); tb_phys_invalidate(tb, -1); } else { /* The exception probably happened in a helper. The CPU state should have been saved before calling it. Fetch the PC from there. */ CPUArchState *env = cpu->env_ptr; target_ulong pc, cs_base; tb_page_addr_t addr; uint32_t flags; cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags); addr = get_page_addr_code(env, pc); if (addr != -1) { tb_invalidate_phys_range(addr, addr + 1); } } } #ifndef CONFIG_USER_ONLY /* * In deterministic execution mode, instructions doing device I/Os * must be at the end of the TB. * * Called by softmmu_template.h, with iothread mutex not held. */ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr) { TranslationBlock *tb; CPUClass *cc; uint32_t n; tb = tcg_tb_lookup(retaddr); if (!tb) { cpu_abort(cpu, "cpu_io_recompile: could not find TB for pc=%p", (void *)retaddr); } cpu_restore_state_from_tb(cpu, tb, retaddr); /* * Some guests must re-execute the branch when re-executing a delay * slot instruction. When this is the case, adjust icount and N * to account for the re-execution of the branch. */ n = 1; cc = CPU_GET_CLASS(cpu); if (cc->tcg_ops->io_recompile_replay_branch && cc->tcg_ops->io_recompile_replay_branch(cpu, tb)) { cpu_neg(cpu)->icount_decr.u16.low++; n = 2; } /* * Exit the loop and potentially generate a new TB executing the * just the I/O insns. We also limit instrumentation to memory * operations only (which execute after completion) so we don't * double instrument the instruction. */ cpu->cflags_next_tb = curr_cflags(cpu) | CF_MEMI_ONLY | CF_LAST_IO | n; if (qemu_loglevel_mask(CPU_LOG_EXEC)) { target_ulong pc = log_pc(cpu, tb); if (qemu_log_in_addr_range(pc)) { qemu_log("cpu_io_recompile: rewound execution of TB to " TARGET_FMT_lx "\n", pc); } } cpu_loop_exit_noexc(cpu); } static void print_qht_statistics(struct qht_stats hst, GString *buf) { uint32_t hgram_opts; size_t hgram_bins; char *hgram; if (!hst.head_buckets) { return; } g_string_append_printf(buf, "TB hash buckets %zu/%zu " "(%0.2f%% head buckets used)\n", hst.used_head_buckets, hst.head_buckets, (double)hst.used_head_buckets / hst.head_buckets * 100); hgram_opts = QDIST_PR_BORDER | QDIST_PR_LABELS; hgram_opts |= QDIST_PR_100X | QDIST_PR_PERCENT; if (qdist_xmax(&hst.occupancy) - qdist_xmin(&hst.occupancy) == 1) { hgram_opts |= QDIST_PR_NODECIMAL; } hgram = qdist_pr(&hst.occupancy, 10, hgram_opts); g_string_append_printf(buf, "TB hash occupancy %0.2f%% avg chain occ. " "Histogram: %s\n", qdist_avg(&hst.occupancy) * 100, hgram); g_free(hgram); hgram_opts = QDIST_PR_BORDER | QDIST_PR_LABELS; hgram_bins = qdist_xmax(&hst.chain) - qdist_xmin(&hst.chain); if (hgram_bins > 10) { hgram_bins = 10; } else { hgram_bins = 0; hgram_opts |= QDIST_PR_NODECIMAL | QDIST_PR_NOBINRANGE; } hgram = qdist_pr(&hst.chain, hgram_bins, hgram_opts); g_string_append_printf(buf, "TB hash avg chain %0.3f buckets. " "Histogram: %s\n", qdist_avg(&hst.chain), hgram); g_free(hgram); } struct tb_tree_stats { size_t nb_tbs; size_t host_size; size_t target_size; size_t max_target_size; size_t direct_jmp_count; size_t direct_jmp2_count; size_t cross_page; }; static gboolean tb_tree_stats_iter(gpointer key, gpointer value, gpointer data) { const TranslationBlock *tb = value; struct tb_tree_stats *tst = data; tst->nb_tbs++; tst->host_size += tb->tc.size; tst->target_size += tb->size; if (tb->size > tst->max_target_size) { tst->max_target_size = tb->size; } if (tb_page_addr1(tb) != -1) { tst->cross_page++; } if (tb->jmp_reset_offset[0] != TB_JMP_RESET_OFFSET_INVALID) { tst->direct_jmp_count++; if (tb->jmp_reset_offset[1] != TB_JMP_RESET_OFFSET_INVALID) { tst->direct_jmp2_count++; } } return false; } void dump_exec_info(GString *buf) { struct tb_tree_stats tst = {}; struct qht_stats hst; size_t nb_tbs, flush_full, flush_part, flush_elide; tcg_tb_foreach(tb_tree_stats_iter, &tst); nb_tbs = tst.nb_tbs; /* XXX: avoid using doubles ? */ g_string_append_printf(buf, "Translation buffer state:\n"); /* * Report total code size including the padding and TB structs; * otherwise users might think "-accel tcg,tb-size" is not honoured. * For avg host size we use the precise numbers from tb_tree_stats though. */ g_string_append_printf(buf, "gen code size %zu/%zu\n", tcg_code_size(), tcg_code_capacity()); g_string_append_printf(buf, "TB count %zu\n", nb_tbs); g_string_append_printf(buf, "TB avg target size %zu max=%zu bytes\n", nb_tbs ? tst.target_size / nb_tbs : 0, tst.max_target_size); g_string_append_printf(buf, "TB avg host size %zu bytes " "(expansion ratio: %0.1f)\n", nb_tbs ? tst.host_size / nb_tbs : 0, tst.target_size ? (double)tst.host_size / tst.target_size : 0); g_string_append_printf(buf, "cross page TB count %zu (%zu%%)\n", tst.cross_page, nb_tbs ? (tst.cross_page * 100) / nb_tbs : 0); g_string_append_printf(buf, "direct jump count %zu (%zu%%) " "(2 jumps=%zu %zu%%)\n", tst.direct_jmp_count, nb_tbs ? (tst.direct_jmp_count * 100) / nb_tbs : 0, tst.direct_jmp2_count, nb_tbs ? (tst.direct_jmp2_count * 100) / nb_tbs : 0); qht_statistics_init(&tb_ctx.htable, &hst); print_qht_statistics(hst, buf); qht_statistics_destroy(&hst); g_string_append_printf(buf, "\nStatistics:\n"); g_string_append_printf(buf, "TB flush count %u\n", qatomic_read(&tb_ctx.tb_flush_count)); g_string_append_printf(buf, "TB invalidate count %u\n", qatomic_read(&tb_ctx.tb_phys_invalidate_count)); tlb_flush_counts(&flush_full, &flush_part, &flush_elide); g_string_append_printf(buf, "TLB full flushes %zu\n", flush_full); g_string_append_printf(buf, "TLB partial flushes %zu\n", flush_part); g_string_append_printf(buf, "TLB elided flushes %zu\n", flush_elide); tcg_dump_info(buf); } #else /* CONFIG_USER_ONLY */ void cpu_interrupt(CPUState *cpu, int mask) { g_assert(qemu_mutex_iothread_locked()); cpu->interrupt_request |= mask; qatomic_set(&cpu_neg(cpu)->icount_decr.u16.high, -1); } /* * Walks guest process memory "regions" one by one * and calls callback function 'fn' for each region. */ struct walk_memory_regions_data { walk_memory_regions_fn fn; void *priv; target_ulong start; int prot; }; static int walk_memory_regions_end(struct walk_memory_regions_data *data, target_ulong end, int new_prot) { if (data->start != -1u) { int rc = data->fn(data->priv, data->start, end, data->prot); if (rc != 0) { return rc; } } data->start = (new_prot ? end : -1u); data->prot = new_prot; return 0; } static int walk_memory_regions_1(struct walk_memory_regions_data *data, target_ulong base, int level, void **lp) { target_ulong pa; int i, rc; if (*lp == NULL) { return walk_memory_regions_end(data, base, 0); } if (level == 0) { PageDesc *pd = *lp; for (i = 0; i < V_L2_SIZE; ++i) { int prot = pd[i].flags; pa = base | (i << TARGET_PAGE_BITS); if (prot != data->prot) { rc = walk_memory_regions_end(data, pa, prot); if (rc != 0) { return rc; } } } } else { void **pp = *lp; for (i = 0; i < V_L2_SIZE; ++i) { pa = base | ((target_ulong)i << (TARGET_PAGE_BITS + V_L2_BITS * level)); rc = walk_memory_regions_1(data, pa, level - 1, pp + i); if (rc != 0) { return rc; } } } return 0; } int walk_memory_regions(void *priv, walk_memory_regions_fn fn) { struct walk_memory_regions_data data; uintptr_t i, l1_sz = v_l1_size; data.fn = fn; data.priv = priv; data.start = -1u; data.prot = 0; for (i = 0; i < l1_sz; i++) { target_ulong base = i << (v_l1_shift + TARGET_PAGE_BITS); int rc = walk_memory_regions_1(&data, base, v_l2_levels, l1_map + i); if (rc != 0) { return rc; } } return walk_memory_regions_end(&data, 0, 0); } static int dump_region(void *priv, target_ulong start, target_ulong end, unsigned long prot) { FILE *f = (FILE *)priv; (void) fprintf(f, TARGET_FMT_lx"-"TARGET_FMT_lx " "TARGET_FMT_lx" %c%c%c\n", start, end, end - start, ((prot & PAGE_READ) ? 'r' : '-'), ((prot & PAGE_WRITE) ? 'w' : '-'), ((prot & PAGE_EXEC) ? 'x' : '-')); return 0; } /* dump memory mappings */ void page_dump(FILE *f) { const int length = sizeof(target_ulong) * 2; (void) fprintf(f, "%-*s %-*s %-*s %s\n", length, "start", length, "end", length, "size", "prot"); walk_memory_regions(f, dump_region); } int page_get_flags(target_ulong address) { PageDesc *p; p = page_find(address >> TARGET_PAGE_BITS); if (!p) { return 0; } return p->flags; } /* * Allow the target to decide if PAGE_TARGET_[12] may be reset. * By default, they are not kept. */ #ifndef PAGE_TARGET_STICKY #define PAGE_TARGET_STICKY 0 #endif #define PAGE_STICKY (PAGE_ANON | PAGE_PASSTHROUGH | PAGE_TARGET_STICKY) /* Modify the flags of a page and invalidate the code if necessary. The flag PAGE_WRITE_ORG is positioned automatically depending on PAGE_WRITE. The mmap_lock should already be held. */ void page_set_flags(target_ulong start, target_ulong end, int flags) { target_ulong addr, len; bool reset, inval_tb = false; /* This function should never be called with addresses outside the guest address space. If this assert fires, it probably indicates a missing call to h2g_valid. */ assert(end - 1 <= GUEST_ADDR_MAX); assert(start < end); /* Only set PAGE_ANON with new mappings. */ assert(!(flags & PAGE_ANON) || (flags & PAGE_RESET)); assert_memory_lock(); start = start & TARGET_PAGE_MASK; end = TARGET_PAGE_ALIGN(end); if (flags & PAGE_WRITE) { flags |= PAGE_WRITE_ORG; } reset = !(flags & PAGE_VALID) || (flags & PAGE_RESET); if (reset) { page_reset_target_data(start, end); } flags &= ~PAGE_RESET; for (addr = start, len = end - start; len != 0; len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) { PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, true); /* * If the page was executable, but is reset, or is no longer * executable, or has become writable, then invalidate any code. */ if ((p->flags & PAGE_EXEC) && (reset || !(flags & PAGE_EXEC) || (flags & ~p->flags & PAGE_WRITE))) { inval_tb = true; } /* Using mprotect on a page does not change sticky bits. */ p->flags = (reset ? 0 : p->flags & PAGE_STICKY) | flags; } if (inval_tb) { tb_invalidate_phys_range(start, end); } } int page_check_range(target_ulong start, target_ulong len, int flags) { PageDesc *p; target_ulong end; target_ulong addr; /* This function should never be called with addresses outside the guest address space. If this assert fires, it probably indicates a missing call to h2g_valid. */ if (TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS) { assert(start < ((target_ulong)1 << L1_MAP_ADDR_SPACE_BITS)); } if (len == 0) { return 0; } if (start + len - 1 < start) { /* We've wrapped around. */ return -1; } /* must do before we loose bits in the next step */ end = TARGET_PAGE_ALIGN(start + len); start = start & TARGET_PAGE_MASK; for (addr = start, len = end - start; len != 0; len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) { p = page_find(addr >> TARGET_PAGE_BITS); if (!p) { return -1; } if (!(p->flags & PAGE_VALID)) { return -1; } if ((flags & PAGE_READ) && !(p->flags & PAGE_READ)) { return -1; } if (flags & PAGE_WRITE) { if (!(p->flags & PAGE_WRITE_ORG)) { return -1; } /* unprotect the page if it was put read-only because it contains translated code */ if (!(p->flags & PAGE_WRITE)) { if (!page_unprotect(addr, 0)) { return -1; } } } } return 0; } void page_protect(tb_page_addr_t page_addr) { target_ulong addr; PageDesc *p; int prot; p = page_find(page_addr >> TARGET_PAGE_BITS); if (p && (p->flags & PAGE_WRITE)) { /* * Force the host page as non writable (writes will have a page fault + * mprotect overhead). */ page_addr &= qemu_host_page_mask; prot = 0; for (addr = page_addr; addr < page_addr + qemu_host_page_size; addr += TARGET_PAGE_SIZE) { p = page_find(addr >> TARGET_PAGE_BITS); if (!p) { continue; } prot |= p->flags; p->flags &= ~PAGE_WRITE; } mprotect(g2h_untagged(page_addr), qemu_host_page_size, (prot & PAGE_BITS) & ~PAGE_WRITE); } } /* called from signal handler: invalidate the code and unprotect the * page. Return 0 if the fault was not handled, 1 if it was handled, * and 2 if it was handled but the caller must cause the TB to be * immediately exited. (We can only return 2 if the 'pc' argument is * non-zero.) */ int page_unprotect(target_ulong address, uintptr_t pc) { unsigned int prot; bool current_tb_invalidated; PageDesc *p; target_ulong host_start, host_end, addr; /* Technically this isn't safe inside a signal handler. However we know this only ever happens in a synchronous SEGV handler, so in practice it seems to be ok. */ mmap_lock(); p = page_find(address >> TARGET_PAGE_BITS); if (!p) { mmap_unlock(); return 0; } /* if the page was really writable, then we change its protection back to writable */ if (p->flags & PAGE_WRITE_ORG) { current_tb_invalidated = false; if (p->flags & PAGE_WRITE) { /* If the page is actually marked WRITE then assume this is because * this thread raced with another one which got here first and * set the page to PAGE_WRITE and did the TB invalidate for us. */ #ifdef TARGET_HAS_PRECISE_SMC TranslationBlock *current_tb = tcg_tb_lookup(pc); if (current_tb) { current_tb_invalidated = tb_cflags(current_tb) & CF_INVALID; } #endif } else { host_start = address & qemu_host_page_mask; host_end = host_start + qemu_host_page_size; prot = 0; for (addr = host_start; addr < host_end; addr += TARGET_PAGE_SIZE) { p = page_find(addr >> TARGET_PAGE_BITS); p->flags |= PAGE_WRITE; prot |= p->flags; /* and since the content will be modified, we must invalidate the corresponding translated code. */ current_tb_invalidated |= tb_invalidate_phys_page_unwind(addr, pc); } mprotect((void *)g2h_untagged(host_start), qemu_host_page_size, prot & PAGE_BITS); } mmap_unlock(); /* If current TB was invalidated return to main loop */ return current_tb_invalidated ? 2 : 1; } mmap_unlock(); return 0; } #endif /* CONFIG_USER_ONLY */ /* * Called by generic code at e.g. cpu reset after cpu creation, * therefore we must be prepared to allocate the jump cache. */ void tcg_flush_jmp_cache(CPUState *cpu) { CPUJumpCache *jc = cpu->tb_jmp_cache; /* During early initialization, the cache may not yet be allocated. */ if (unlikely(jc == NULL)) { return; } for (int i = 0; i < TB_JMP_CACHE_SIZE; i++) { qatomic_set(&jc->array[i].tb, NULL); } } /* This is a wrapper for common code that can not use CONFIG_SOFTMMU */ void tcg_flush_softmmu_tlb(CPUState *cs) { #ifdef CONFIG_SOFTMMU tlb_flush(cs); #endif }