tcg: Create struct CPUTLB

Move all softmmu tlb data into this structure.  Arrange the
members so that we are able to place mask+table together and
at a smaller absolute offset from ENV.

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Acked-by: Alistair Francis <alistair.francis@wdc.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
Richard Henderson 2019-03-22 13:52:09 -07:00
parent 74433bf083
commit a40ec84ee2
12 changed files with 146 additions and 163 deletions

View File

@ -76,7 +76,7 @@ QEMU_BUILD_BUG_ON(NB_MMU_MODES > 16);
static inline size_t sizeof_tlb(CPUArchState *env, uintptr_t mmu_idx) static inline size_t sizeof_tlb(CPUArchState *env, uintptr_t mmu_idx)
{ {
return env->tlb_mask[mmu_idx] + (1 << CPU_TLB_ENTRY_BITS); return env_tlb(env)->f[mmu_idx].mask + (1 << CPU_TLB_ENTRY_BITS);
} }
static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns, static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns,
@ -91,14 +91,14 @@ static void tlb_dyn_init(CPUArchState *env)
int i; int i;
for (i = 0; i < NB_MMU_MODES; i++) { for (i = 0; i < NB_MMU_MODES; i++) {
CPUTLBDesc *desc = &env->tlb_d[i]; CPUTLBDesc *desc = &env_tlb(env)->d[i];
size_t n_entries = 1 << CPU_TLB_DYN_DEFAULT_BITS; size_t n_entries = 1 << CPU_TLB_DYN_DEFAULT_BITS;
tlb_window_reset(desc, get_clock_realtime(), 0); tlb_window_reset(desc, get_clock_realtime(), 0);
desc->n_used_entries = 0; desc->n_used_entries = 0;
env->tlb_mask[i] = (n_entries - 1) << CPU_TLB_ENTRY_BITS; env_tlb(env)->f[i].mask = (n_entries - 1) << CPU_TLB_ENTRY_BITS;
env->tlb_table[i] = g_new(CPUTLBEntry, n_entries); env_tlb(env)->f[i].table = g_new(CPUTLBEntry, n_entries);
env->iotlb[i] = g_new(CPUIOTLBEntry, n_entries); env_tlb(env)->d[i].iotlb = g_new(CPUIOTLBEntry, n_entries);
} }
} }
@ -144,7 +144,7 @@ static void tlb_dyn_init(CPUArchState *env)
*/ */
static void tlb_mmu_resize_locked(CPUArchState *env, int mmu_idx) static void tlb_mmu_resize_locked(CPUArchState *env, int mmu_idx)
{ {
CPUTLBDesc *desc = &env->tlb_d[mmu_idx]; CPUTLBDesc *desc = &env_tlb(env)->d[mmu_idx];
size_t old_size = tlb_n_entries(env, mmu_idx); size_t old_size = tlb_n_entries(env, mmu_idx);
size_t rate; size_t rate;
size_t new_size = old_size; size_t new_size = old_size;
@ -187,14 +187,14 @@ static void tlb_mmu_resize_locked(CPUArchState *env, int mmu_idx)
return; return;
} }
g_free(env->tlb_table[mmu_idx]); g_free(env_tlb(env)->f[mmu_idx].table);
g_free(env->iotlb[mmu_idx]); g_free(env_tlb(env)->d[mmu_idx].iotlb);
tlb_window_reset(desc, now, 0); tlb_window_reset(desc, now, 0);
/* desc->n_used_entries is cleared by the caller */ /* desc->n_used_entries is cleared by the caller */
env->tlb_mask[mmu_idx] = (new_size - 1) << CPU_TLB_ENTRY_BITS; env_tlb(env)->f[mmu_idx].mask = (new_size - 1) << CPU_TLB_ENTRY_BITS;
env->tlb_table[mmu_idx] = g_try_new(CPUTLBEntry, new_size); env_tlb(env)->f[mmu_idx].table = g_try_new(CPUTLBEntry, new_size);
env->iotlb[mmu_idx] = g_try_new(CPUIOTLBEntry, new_size); env_tlb(env)->d[mmu_idx].iotlb = g_try_new(CPUIOTLBEntry, new_size);
/* /*
* If the allocations fail, try smaller sizes. We just freed some * If the allocations fail, try smaller sizes. We just freed some
* memory, so going back to half of new_size has a good chance of working. * memory, so going back to half of new_size has a good chance of working.
@ -202,46 +202,47 @@ static void tlb_mmu_resize_locked(CPUArchState *env, int mmu_idx)
* allocations to fail though, so we progressively reduce the allocation * allocations to fail though, so we progressively reduce the allocation
* size, aborting if we cannot even allocate the smallest TLB we support. * size, aborting if we cannot even allocate the smallest TLB we support.
*/ */
while (env->tlb_table[mmu_idx] == NULL || env->iotlb[mmu_idx] == NULL) { while (env_tlb(env)->f[mmu_idx].table == NULL ||
env_tlb(env)->d[mmu_idx].iotlb == NULL) {
if (new_size == (1 << CPU_TLB_DYN_MIN_BITS)) { if (new_size == (1 << CPU_TLB_DYN_MIN_BITS)) {
error_report("%s: %s", __func__, strerror(errno)); error_report("%s: %s", __func__, strerror(errno));
abort(); abort();
} }
new_size = MAX(new_size >> 1, 1 << CPU_TLB_DYN_MIN_BITS); new_size = MAX(new_size >> 1, 1 << CPU_TLB_DYN_MIN_BITS);
env->tlb_mask[mmu_idx] = (new_size - 1) << CPU_TLB_ENTRY_BITS; env_tlb(env)->f[mmu_idx].mask = (new_size - 1) << CPU_TLB_ENTRY_BITS;
g_free(env->tlb_table[mmu_idx]); g_free(env_tlb(env)->f[mmu_idx].table);
g_free(env->iotlb[mmu_idx]); g_free(env_tlb(env)->d[mmu_idx].iotlb);
env->tlb_table[mmu_idx] = g_try_new(CPUTLBEntry, new_size); env_tlb(env)->f[mmu_idx].table = g_try_new(CPUTLBEntry, new_size);
env->iotlb[mmu_idx] = g_try_new(CPUIOTLBEntry, new_size); env_tlb(env)->d[mmu_idx].iotlb = g_try_new(CPUIOTLBEntry, new_size);
} }
} }
static inline void tlb_table_flush_by_mmuidx(CPUArchState *env, int mmu_idx) static inline void tlb_table_flush_by_mmuidx(CPUArchState *env, int mmu_idx)
{ {
tlb_mmu_resize_locked(env, mmu_idx); tlb_mmu_resize_locked(env, mmu_idx);
memset(env->tlb_table[mmu_idx], -1, sizeof_tlb(env, mmu_idx)); memset(env_tlb(env)->f[mmu_idx].table, -1, sizeof_tlb(env, mmu_idx));
env->tlb_d[mmu_idx].n_used_entries = 0; env_tlb(env)->d[mmu_idx].n_used_entries = 0;
} }
static inline void tlb_n_used_entries_inc(CPUArchState *env, uintptr_t mmu_idx) static inline void tlb_n_used_entries_inc(CPUArchState *env, uintptr_t mmu_idx)
{ {
env->tlb_d[mmu_idx].n_used_entries++; env_tlb(env)->d[mmu_idx].n_used_entries++;
} }
static inline void tlb_n_used_entries_dec(CPUArchState *env, uintptr_t mmu_idx) static inline void tlb_n_used_entries_dec(CPUArchState *env, uintptr_t mmu_idx)
{ {
env->tlb_d[mmu_idx].n_used_entries--; env_tlb(env)->d[mmu_idx].n_used_entries--;
} }
void tlb_init(CPUState *cpu) void tlb_init(CPUState *cpu)
{ {
CPUArchState *env = cpu->env_ptr; CPUArchState *env = cpu->env_ptr;
qemu_spin_init(&env->tlb_c.lock); qemu_spin_init(&env_tlb(env)->c.lock);
/* Ensure that cpu_reset performs a full flush. */ /* Ensure that cpu_reset performs a full flush. */
env->tlb_c.dirty = ALL_MMUIDX_BITS; env_tlb(env)->c.dirty = ALL_MMUIDX_BITS;
tlb_dyn_init(env); tlb_dyn_init(env);
} }
@ -273,9 +274,9 @@ void tlb_flush_counts(size_t *pfull, size_t *ppart, size_t *pelide)
CPU_FOREACH(cpu) { CPU_FOREACH(cpu) {
CPUArchState *env = cpu->env_ptr; CPUArchState *env = cpu->env_ptr;
full += atomic_read(&env->tlb_c.full_flush_count); full += atomic_read(&env_tlb(env)->c.full_flush_count);
part += atomic_read(&env->tlb_c.part_flush_count); part += atomic_read(&env_tlb(env)->c.part_flush_count);
elide += atomic_read(&env->tlb_c.elide_flush_count); elide += atomic_read(&env_tlb(env)->c.elide_flush_count);
} }
*pfull = full; *pfull = full;
*ppart = part; *ppart = part;
@ -285,10 +286,11 @@ void tlb_flush_counts(size_t *pfull, size_t *ppart, size_t *pelide)
static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx) static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
{ {
tlb_table_flush_by_mmuidx(env, mmu_idx); tlb_table_flush_by_mmuidx(env, mmu_idx);
memset(env->tlb_v_table[mmu_idx], -1, sizeof(env->tlb_v_table[0])); env_tlb(env)->d[mmu_idx].large_page_addr = -1;
env->tlb_d[mmu_idx].large_page_addr = -1; env_tlb(env)->d[mmu_idx].large_page_mask = -1;
env->tlb_d[mmu_idx].large_page_mask = -1; env_tlb(env)->d[mmu_idx].vindex = 0;
env->tlb_d[mmu_idx].vindex = 0; memset(env_tlb(env)->d[mmu_idx].vtable, -1,
sizeof(env_tlb(env)->d[0].vtable));
} }
static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data) static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data)
@ -301,31 +303,31 @@ static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data)
tlb_debug("mmu_idx:0x%04" PRIx16 "\n", asked); tlb_debug("mmu_idx:0x%04" PRIx16 "\n", asked);
qemu_spin_lock(&env->tlb_c.lock); qemu_spin_lock(&env_tlb(env)->c.lock);
all_dirty = env->tlb_c.dirty; all_dirty = env_tlb(env)->c.dirty;
to_clean = asked & all_dirty; to_clean = asked & all_dirty;
all_dirty &= ~to_clean; all_dirty &= ~to_clean;
env->tlb_c.dirty = all_dirty; env_tlb(env)->c.dirty = all_dirty;
for (work = to_clean; work != 0; work &= work - 1) { for (work = to_clean; work != 0; work &= work - 1) {
int mmu_idx = ctz32(work); int mmu_idx = ctz32(work);
tlb_flush_one_mmuidx_locked(env, mmu_idx); tlb_flush_one_mmuidx_locked(env, mmu_idx);
} }
qemu_spin_unlock(&env->tlb_c.lock); qemu_spin_unlock(&env_tlb(env)->c.lock);
cpu_tb_jmp_cache_clear(cpu); cpu_tb_jmp_cache_clear(cpu);
if (to_clean == ALL_MMUIDX_BITS) { if (to_clean == ALL_MMUIDX_BITS) {
atomic_set(&env->tlb_c.full_flush_count, atomic_set(&env_tlb(env)->c.full_flush_count,
env->tlb_c.full_flush_count + 1); env_tlb(env)->c.full_flush_count + 1);
} else { } else {
atomic_set(&env->tlb_c.part_flush_count, atomic_set(&env_tlb(env)->c.part_flush_count,
env->tlb_c.part_flush_count + ctpop16(to_clean)); env_tlb(env)->c.part_flush_count + ctpop16(to_clean));
if (to_clean != asked) { if (to_clean != asked) {
atomic_set(&env->tlb_c.elide_flush_count, atomic_set(&env_tlb(env)->c.elide_flush_count,
env->tlb_c.elide_flush_count + env_tlb(env)->c.elide_flush_count +
ctpop16(asked & ~to_clean)); ctpop16(asked & ~to_clean));
} }
} }
@ -410,11 +412,12 @@ static inline bool tlb_flush_entry_locked(CPUTLBEntry *tlb_entry,
static inline void tlb_flush_vtlb_page_locked(CPUArchState *env, int mmu_idx, static inline void tlb_flush_vtlb_page_locked(CPUArchState *env, int mmu_idx,
target_ulong page) target_ulong page)
{ {
CPUTLBDesc *d = &env_tlb(env)->d[mmu_idx];
int k; int k;
assert_cpu_is_self(ENV_GET_CPU(env)); assert_cpu_is_self(ENV_GET_CPU(env));
for (k = 0; k < CPU_VTLB_SIZE; k++) { for (k = 0; k < CPU_VTLB_SIZE; k++) {
if (tlb_flush_entry_locked(&env->tlb_v_table[mmu_idx][k], page)) { if (tlb_flush_entry_locked(&d->vtable[k], page)) {
tlb_n_used_entries_dec(env, mmu_idx); tlb_n_used_entries_dec(env, mmu_idx);
} }
} }
@ -423,8 +426,8 @@ static inline void tlb_flush_vtlb_page_locked(CPUArchState *env, int mmu_idx,
static void tlb_flush_page_locked(CPUArchState *env, int midx, static void tlb_flush_page_locked(CPUArchState *env, int midx,
target_ulong page) target_ulong page)
{ {
target_ulong lp_addr = env->tlb_d[midx].large_page_addr; target_ulong lp_addr = env_tlb(env)->d[midx].large_page_addr;
target_ulong lp_mask = env->tlb_d[midx].large_page_mask; target_ulong lp_mask = env_tlb(env)->d[midx].large_page_mask;
/* Check if we need to flush due to large pages. */ /* Check if we need to flush due to large pages. */
if ((page & lp_mask) == lp_addr) { if ((page & lp_mask) == lp_addr) {
@ -459,13 +462,13 @@ static void tlb_flush_page_by_mmuidx_async_work(CPUState *cpu,
tlb_debug("page addr:" TARGET_FMT_lx " mmu_map:0x%lx\n", tlb_debug("page addr:" TARGET_FMT_lx " mmu_map:0x%lx\n",
addr, mmu_idx_bitmap); addr, mmu_idx_bitmap);
qemu_spin_lock(&env->tlb_c.lock); qemu_spin_lock(&env_tlb(env)->c.lock);
for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
if (test_bit(mmu_idx, &mmu_idx_bitmap)) { if (test_bit(mmu_idx, &mmu_idx_bitmap)) {
tlb_flush_page_locked(env, mmu_idx, addr); tlb_flush_page_locked(env, mmu_idx, addr);
} }
} }
qemu_spin_unlock(&env->tlb_c.lock); qemu_spin_unlock(&env_tlb(env)->c.lock);
tb_flush_jmp_cache(cpu, addr); tb_flush_jmp_cache(cpu, addr);
} }
@ -609,22 +612,22 @@ void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length)
int mmu_idx; int mmu_idx;
env = cpu->env_ptr; env = cpu->env_ptr;
qemu_spin_lock(&env->tlb_c.lock); qemu_spin_lock(&env_tlb(env)->c.lock);
for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
unsigned int i; unsigned int i;
unsigned int n = tlb_n_entries(env, mmu_idx); unsigned int n = tlb_n_entries(env, mmu_idx);
for (i = 0; i < n; i++) { for (i = 0; i < n; i++) {
tlb_reset_dirty_range_locked(&env->tlb_table[mmu_idx][i], start1, tlb_reset_dirty_range_locked(&env_tlb(env)->f[mmu_idx].table[i],
length); start1, length);
} }
for (i = 0; i < CPU_VTLB_SIZE; i++) { for (i = 0; i < CPU_VTLB_SIZE; i++) {
tlb_reset_dirty_range_locked(&env->tlb_v_table[mmu_idx][i], start1, tlb_reset_dirty_range_locked(&env_tlb(env)->d[mmu_idx].vtable[i],
length); start1, length);
} }
} }
qemu_spin_unlock(&env->tlb_c.lock); qemu_spin_unlock(&env_tlb(env)->c.lock);
} }
/* Called with tlb_c.lock held */ /* Called with tlb_c.lock held */
@ -646,7 +649,7 @@ void tlb_set_dirty(CPUState *cpu, target_ulong vaddr)
assert_cpu_is_self(cpu); assert_cpu_is_self(cpu);
vaddr &= TARGET_PAGE_MASK; vaddr &= TARGET_PAGE_MASK;
qemu_spin_lock(&env->tlb_c.lock); qemu_spin_lock(&env_tlb(env)->c.lock);
for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
tlb_set_dirty1_locked(tlb_entry(env, mmu_idx, vaddr), vaddr); tlb_set_dirty1_locked(tlb_entry(env, mmu_idx, vaddr), vaddr);
} }
@ -654,10 +657,10 @@ void tlb_set_dirty(CPUState *cpu, target_ulong vaddr)
for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
int k; int k;
for (k = 0; k < CPU_VTLB_SIZE; k++) { for (k = 0; k < CPU_VTLB_SIZE; k++) {
tlb_set_dirty1_locked(&env->tlb_v_table[mmu_idx][k], vaddr); tlb_set_dirty1_locked(&env_tlb(env)->d[mmu_idx].vtable[k], vaddr);
} }
} }
qemu_spin_unlock(&env->tlb_c.lock); qemu_spin_unlock(&env_tlb(env)->c.lock);
} }
/* Our TLB does not support large pages, so remember the area covered by /* Our TLB does not support large pages, so remember the area covered by
@ -665,7 +668,7 @@ void tlb_set_dirty(CPUState *cpu, target_ulong vaddr)
static void tlb_add_large_page(CPUArchState *env, int mmu_idx, static void tlb_add_large_page(CPUArchState *env, int mmu_idx,
target_ulong vaddr, target_ulong size) target_ulong vaddr, target_ulong size)
{ {
target_ulong lp_addr = env->tlb_d[mmu_idx].large_page_addr; target_ulong lp_addr = env_tlb(env)->d[mmu_idx].large_page_addr;
target_ulong lp_mask = ~(size - 1); target_ulong lp_mask = ~(size - 1);
if (lp_addr == (target_ulong)-1) { if (lp_addr == (target_ulong)-1) {
@ -675,13 +678,13 @@ static void tlb_add_large_page(CPUArchState *env, int mmu_idx,
/* Extend the existing region to include the new page. /* Extend the existing region to include the new page.
This is a compromise between unnecessary flushes and This is a compromise between unnecessary flushes and
the cost of maintaining a full variable size TLB. */ the cost of maintaining a full variable size TLB. */
lp_mask &= env->tlb_d[mmu_idx].large_page_mask; lp_mask &= env_tlb(env)->d[mmu_idx].large_page_mask;
while (((lp_addr ^ vaddr) & lp_mask) != 0) { while (((lp_addr ^ vaddr) & lp_mask) != 0) {
lp_mask <<= 1; lp_mask <<= 1;
} }
} }
env->tlb_d[mmu_idx].large_page_addr = lp_addr & lp_mask; env_tlb(env)->d[mmu_idx].large_page_addr = lp_addr & lp_mask;
env->tlb_d[mmu_idx].large_page_mask = lp_mask; env_tlb(env)->d[mmu_idx].large_page_mask = lp_mask;
} }
/* Add a new TLB entry. At most one entry for a given virtual address /* Add a new TLB entry. At most one entry for a given virtual address
@ -696,6 +699,8 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
int mmu_idx, target_ulong size) int mmu_idx, target_ulong size)
{ {
CPUArchState *env = cpu->env_ptr; CPUArchState *env = cpu->env_ptr;
CPUTLB *tlb = env_tlb(env);
CPUTLBDesc *desc = &tlb->d[mmu_idx];
MemoryRegionSection *section; MemoryRegionSection *section;
unsigned int index; unsigned int index;
target_ulong address; target_ulong address;
@ -757,10 +762,10 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
* a longer critical section, but this is not a concern since the TLB lock * a longer critical section, but this is not a concern since the TLB lock
* is unlikely to be contended. * is unlikely to be contended.
*/ */
qemu_spin_lock(&env->tlb_c.lock); qemu_spin_lock(&tlb->c.lock);
/* Note that the tlb is no longer clean. */ /* Note that the tlb is no longer clean. */
env->tlb_c.dirty |= 1 << mmu_idx; tlb->c.dirty |= 1 << mmu_idx;
/* Make sure there's no cached translation for the new page. */ /* Make sure there's no cached translation for the new page. */
tlb_flush_vtlb_page_locked(env, mmu_idx, vaddr_page); tlb_flush_vtlb_page_locked(env, mmu_idx, vaddr_page);
@ -770,12 +775,12 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
* different page; otherwise just overwrite the stale data. * different page; otherwise just overwrite the stale data.
*/ */
if (!tlb_hit_page_anyprot(te, vaddr_page) && !tlb_entry_is_empty(te)) { if (!tlb_hit_page_anyprot(te, vaddr_page) && !tlb_entry_is_empty(te)) {
unsigned vidx = env->tlb_d[mmu_idx].vindex++ % CPU_VTLB_SIZE; unsigned vidx = desc->vindex++ % CPU_VTLB_SIZE;
CPUTLBEntry *tv = &env->tlb_v_table[mmu_idx][vidx]; CPUTLBEntry *tv = &desc->vtable[vidx];
/* Evict the old entry into the victim tlb. */ /* Evict the old entry into the victim tlb. */
copy_tlb_helper_locked(tv, te); copy_tlb_helper_locked(tv, te);
env->iotlb_v[mmu_idx][vidx] = env->iotlb[mmu_idx][index]; desc->viotlb[vidx] = desc->iotlb[index];
tlb_n_used_entries_dec(env, mmu_idx); tlb_n_used_entries_dec(env, mmu_idx);
} }
@ -792,8 +797,8 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
* subtract here is that of the page base, and not the same as the * subtract here is that of the page base, and not the same as the
* vaddr we add back in io_readx()/io_writex()/get_page_addr_code(). * vaddr we add back in io_readx()/io_writex()/get_page_addr_code().
*/ */
env->iotlb[mmu_idx][index].addr = iotlb - vaddr_page; desc->iotlb[index].addr = iotlb - vaddr_page;
env->iotlb[mmu_idx][index].attrs = attrs; desc->iotlb[index].attrs = attrs;
/* Now calculate the new entry */ /* Now calculate the new entry */
tn.addend = addend - vaddr_page; tn.addend = addend - vaddr_page;
@ -829,7 +834,7 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
copy_tlb_helper_locked(te, &tn); copy_tlb_helper_locked(te, &tn);
tlb_n_used_entries_inc(env, mmu_idx); tlb_n_used_entries_inc(env, mmu_idx);
qemu_spin_unlock(&env->tlb_c.lock); qemu_spin_unlock(&tlb->c.lock);
} }
/* Add a new TLB entry, but without specifying the memory /* Add a new TLB entry, but without specifying the memory
@ -976,21 +981,28 @@ static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index,
assert_cpu_is_self(ENV_GET_CPU(env)); assert_cpu_is_self(ENV_GET_CPU(env));
for (vidx = 0; vidx < CPU_VTLB_SIZE; ++vidx) { for (vidx = 0; vidx < CPU_VTLB_SIZE; ++vidx) {
CPUTLBEntry *vtlb = &env->tlb_v_table[mmu_idx][vidx]; CPUTLBEntry *vtlb = &env_tlb(env)->d[mmu_idx].vtable[vidx];
target_ulong cmp = tlb_read_ofs(vtlb, elt_ofs); target_ulong cmp;
/* elt_ofs might correspond to .addr_write, so use atomic_read */
#if TCG_OVERSIZED_GUEST
cmp = *(target_ulong *)((uintptr_t)vtlb + elt_ofs);
#else
cmp = atomic_read((target_ulong *)((uintptr_t)vtlb + elt_ofs));
#endif
if (cmp == page) { if (cmp == page) {
/* Found entry in victim tlb, swap tlb and iotlb. */ /* Found entry in victim tlb, swap tlb and iotlb. */
CPUTLBEntry tmptlb, *tlb = &env->tlb_table[mmu_idx][index]; CPUTLBEntry tmptlb, *tlb = &env_tlb(env)->f[mmu_idx].table[index];
qemu_spin_lock(&env->tlb_c.lock); qemu_spin_lock(&env_tlb(env)->c.lock);
copy_tlb_helper_locked(&tmptlb, tlb); copy_tlb_helper_locked(&tmptlb, tlb);
copy_tlb_helper_locked(tlb, vtlb); copy_tlb_helper_locked(tlb, vtlb);
copy_tlb_helper_locked(vtlb, &tmptlb); copy_tlb_helper_locked(vtlb, &tmptlb);
qemu_spin_unlock(&env->tlb_c.lock); qemu_spin_unlock(&env_tlb(env)->c.lock);
CPUIOTLBEntry tmpio, *io = &env->iotlb[mmu_idx][index]; CPUIOTLBEntry tmpio, *io = &env_tlb(env)->d[mmu_idx].iotlb[index];
CPUIOTLBEntry *vio = &env->iotlb_v[mmu_idx][vidx]; CPUIOTLBEntry *vio = &env_tlb(env)->d[mmu_idx].viotlb[vidx];
tmpio = *io; *io = *vio; *vio = tmpio; tmpio = *io; *io = *vio; *vio = tmpio;
return true; return true;
} }
@ -1293,8 +1305,8 @@ load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi,
} }
} }
res = io_readx(env, &env->iotlb[mmu_idx][index], mmu_idx, addr, res = io_readx(env, &env_tlb(env)->d[mmu_idx].iotlb[index],
retaddr, access_type, size); mmu_idx, addr, retaddr, access_type, size);
return handle_bswap(res, size, big_endian); return handle_bswap(res, size, big_endian);
} }
@ -1541,7 +1553,7 @@ store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
} }
} }
io_writex(env, &env->iotlb[mmu_idx][index], mmu_idx, io_writex(env, &env_tlb(env)->d[mmu_idx].iotlb[index], mmu_idx,
handle_bswap(val, size, big_endian), handle_bswap(val, size, big_endian),
addr, retaddr, size); addr, retaddr, size);
return; return;

View File

@ -78,6 +78,7 @@ typedef uint64_t target_ulong;
#endif #endif
#if !defined(CONFIG_USER_ONLY) && defined(CONFIG_TCG) #if !defined(CONFIG_USER_ONLY) && defined(CONFIG_TCG)
/* use a fully associative victim tlb of 8 entries */ /* use a fully associative victim tlb of 8 entries */
#define CPU_VTLB_SIZE 8 #define CPU_VTLB_SIZE 8
@ -147,6 +148,10 @@ typedef struct CPUIOTLBEntry {
MemTxAttrs attrs; MemTxAttrs attrs;
} CPUIOTLBEntry; } CPUIOTLBEntry;
/*
* Data elements that are per MMU mode, minus the bits accessed by
* the TCG fast path.
*/
typedef struct CPUTLBDesc { typedef struct CPUTLBDesc {
/* /*
* Describe a region covering all of the large pages allocated * Describe a region covering all of the large pages allocated
@ -160,16 +165,31 @@ typedef struct CPUTLBDesc {
int64_t window_begin_ns; int64_t window_begin_ns;
/* maximum number of entries observed in the window */ /* maximum number of entries observed in the window */
size_t window_max_entries; size_t window_max_entries;
size_t n_used_entries;
/* The next index to use in the tlb victim table. */ /* The next index to use in the tlb victim table. */
size_t vindex; size_t vindex;
size_t n_used_entries; /* The tlb victim table, in two parts. */
CPUTLBEntry vtable[CPU_VTLB_SIZE];
CPUIOTLBEntry viotlb[CPU_VTLB_SIZE];
/* The iotlb. */
CPUIOTLBEntry *iotlb;
} CPUTLBDesc; } CPUTLBDesc;
/*
* Data elements that are per MMU mode, accessed by the fast path.
*/
typedef struct CPUTLBDescFast {
/* Contains (n_entries - 1) << CPU_TLB_ENTRY_BITS */
uintptr_t mask;
/* The array of tlb entries itself. */
CPUTLBEntry *table;
} CPUTLBDescFast;
/* /*
* Data elements that are shared between all MMU modes. * Data elements that are shared between all MMU modes.
*/ */
typedef struct CPUTLBCommon { typedef struct CPUTLBCommon {
/* Serialize updates to tlb_table and tlb_v_table, and others as noted. */ /* Serialize updates to f.table and d.vtable, and others as noted. */
QemuSpin lock; QemuSpin lock;
/* /*
* Within dirty, for each bit N, modifications have been made to * Within dirty, for each bit N, modifications have been made to
@ -187,35 +207,24 @@ typedef struct CPUTLBCommon {
size_t elide_flush_count; size_t elide_flush_count;
} CPUTLBCommon; } CPUTLBCommon;
# define CPU_TLB \
/* tlb_mask[i] contains (n_entries - 1) << CPU_TLB_ENTRY_BITS */ \
uintptr_t tlb_mask[NB_MMU_MODES]; \
CPUTLBEntry *tlb_table[NB_MMU_MODES];
# define CPU_IOTLB \
CPUIOTLBEntry *iotlb[NB_MMU_MODES];
/* /*
* The entire softmmu tlb, for all MMU modes.
* The meaning of each of the MMU modes is defined in the target code. * The meaning of each of the MMU modes is defined in the target code.
* Note that NB_MMU_MODES is not yet defined; we can only reference it
* within preprocessor defines that will be expanded later.
*/ */
#define CPU_COMMON_TLB \ typedef struct CPUTLB {
CPUTLBCommon tlb_c; \ CPUTLBDescFast f[NB_MMU_MODES];
CPUTLBDesc tlb_d[NB_MMU_MODES]; \ CPUTLBDesc d[NB_MMU_MODES];
CPU_TLB \ CPUTLBCommon c;
CPUTLBEntry tlb_v_table[NB_MMU_MODES][CPU_VTLB_SIZE]; \ } CPUTLB;
CPU_IOTLB \
CPUIOTLBEntry iotlb_v[NB_MMU_MODES][CPU_VTLB_SIZE]; /* There are target-specific members named "tlb". This is temporary. */
#define CPU_COMMON CPUTLB tlb_;
#define env_tlb(ENV) (&(ENV)->tlb_)
#else #else
#define CPU_COMMON_TLB #define CPU_COMMON /* Nothing */
#endif #endif /* !CONFIG_USER_ONLY && CONFIG_TCG */
#define CPU_COMMON \
/* soft mmu support */ \
CPU_COMMON_TLB \
#endif #endif

View File

@ -139,21 +139,21 @@ static inline target_ulong tlb_addr_write(const CPUTLBEntry *entry)
static inline uintptr_t tlb_index(CPUArchState *env, uintptr_t mmu_idx, static inline uintptr_t tlb_index(CPUArchState *env, uintptr_t mmu_idx,
target_ulong addr) target_ulong addr)
{ {
uintptr_t size_mask = env->tlb_mask[mmu_idx] >> CPU_TLB_ENTRY_BITS; uintptr_t size_mask = env_tlb(env)->f[mmu_idx].mask >> CPU_TLB_ENTRY_BITS;
return (addr >> TARGET_PAGE_BITS) & size_mask; return (addr >> TARGET_PAGE_BITS) & size_mask;
} }
static inline size_t tlb_n_entries(CPUArchState *env, uintptr_t mmu_idx) static inline size_t tlb_n_entries(CPUArchState *env, uintptr_t mmu_idx)
{ {
return (env->tlb_mask[mmu_idx] >> CPU_TLB_ENTRY_BITS) + 1; return (env_tlb(env)->f[mmu_idx].mask >> CPU_TLB_ENTRY_BITS) + 1;
} }
/* Find the TLB entry corresponding to the mmu_idx + address pair. */ /* Find the TLB entry corresponding to the mmu_idx + address pair. */
static inline CPUTLBEntry *tlb_entry(CPUArchState *env, uintptr_t mmu_idx, static inline CPUTLBEntry *tlb_entry(CPUArchState *env, uintptr_t mmu_idx,
target_ulong addr) target_ulong addr)
{ {
return &env->tlb_table[mmu_idx][tlb_index(env, mmu_idx, addr)]; return &env_tlb(env)->f[mmu_idx].table[tlb_index(env, mmu_idx, addr)];
} }
#ifdef MMU_MODE0_SUFFIX #ifdef MMU_MODE0_SUFFIX

View File

@ -14134,7 +14134,7 @@ static bool is_guarded_page(CPUARMState *env, DisasContext *s)
* table entry even for that case. * table entry even for that case.
*/ */
return (tlb_hit(entry->addr_code, addr) && return (tlb_hit(entry->addr_code, addr) &&
env->iotlb[mmu_idx][index].attrs.target_tlb_bit0); env_tlb(env)->d[mmu_idx].iotlb[index].attrs.target_tlb_bit0);
#endif #endif
} }

View File

@ -1637,12 +1637,8 @@ static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
label->label_ptr[0] = label_ptr; label->label_ptr[0] = label_ptr;
} }
/* We expect tlb_mask to be before tlb_table. */
QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table) <
offsetof(CPUArchState, tlb_mask));
/* We expect to use a 24-bit unsigned offset from ENV. */ /* We expect to use a 24-bit unsigned offset from ENV. */
QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1]) QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_.f[NB_MMU_MODES - 1].table)
> 0xffffff); > 0xffffff);
/* Load and compare a TLB entry, emitting the conditional jump to the /* Load and compare a TLB entry, emitting the conditional jump to the
@ -1653,8 +1649,8 @@ static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, TCGMemOp opc,
tcg_insn_unit **label_ptr, int mem_index, tcg_insn_unit **label_ptr, int mem_index,
bool is_read) bool is_read)
{ {
int mask_ofs = offsetof(CPUArchState, tlb_mask[mem_index]); int mask_ofs = offsetof(CPUArchState, tlb_.f[mem_index].mask);
int table_ofs = offsetof(CPUArchState, tlb_table[mem_index]); int table_ofs = offsetof(CPUArchState, tlb_.f[mem_index].table);
unsigned a_bits = get_alignment_bits(opc); unsigned a_bits = get_alignment_bits(opc);
unsigned s_bits = opc & MO_SIZE; unsigned s_bits = opc & MO_SIZE;
unsigned a_mask = (1u << a_bits) - 1; unsigned a_mask = (1u << a_bits) - 1;

View File

@ -1220,12 +1220,8 @@ static TCGReg tcg_out_arg_reg64(TCGContext *s, TCGReg argreg,
#define TLB_SHIFT (CPU_TLB_ENTRY_BITS + CPU_TLB_BITS) #define TLB_SHIFT (CPU_TLB_ENTRY_BITS + CPU_TLB_BITS)
/* We expect tlb_mask to be before tlb_table. */
QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table) <
offsetof(CPUArchState, tlb_mask));
/* We expect to use a 20-bit unsigned offset from ENV. */ /* We expect to use a 20-bit unsigned offset from ENV. */
QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1]) QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_.f[NB_MMU_MODES - 1].table)
> 0xfffff); > 0xfffff);
/* Load and compare a TLB entry, leaving the flags set. Returns the register /* Load and compare a TLB entry, leaving the flags set. Returns the register
@ -1236,8 +1232,8 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
{ {
int cmp_off = (is_load ? offsetof(CPUTLBEntry, addr_read) int cmp_off = (is_load ? offsetof(CPUTLBEntry, addr_read)
: offsetof(CPUTLBEntry, addr_write)); : offsetof(CPUTLBEntry, addr_write));
int mask_off = offsetof(CPUArchState, tlb_mask[mem_index]); int mask_off = offsetof(CPUArchState, tlb_.f[mem_index].mask);
int table_off = offsetof(CPUArchState, tlb_table[mem_index]); int table_off = offsetof(CPUArchState, tlb_.f[mem_index].table);
TCGReg mask_base = TCG_AREG0, table_base = TCG_AREG0; TCGReg mask_base = TCG_AREG0, table_base = TCG_AREG0;
unsigned s_bits = opc & MO_SIZE; unsigned s_bits = opc & MO_SIZE;
unsigned a_bits = get_alignment_bits(opc); unsigned a_bits = get_alignment_bits(opc);

View File

@ -1730,10 +1730,10 @@ static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
tcg_out_modrm_offset(s, OPC_AND_GvEv + trexw, r0, TCG_AREG0, tcg_out_modrm_offset(s, OPC_AND_GvEv + trexw, r0, TCG_AREG0,
offsetof(CPUArchState, tlb_mask[mem_index])); offsetof(CPUArchState, tlb_.f[mem_index].mask));
tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, r0, TCG_AREG0, tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, r0, TCG_AREG0,
offsetof(CPUArchState, tlb_table[mem_index])); offsetof(CPUArchState, tlb_.f[mem_index].table));
/* If the required alignment is at least as large as the access, simply /* If the required alignment is at least as large as the access, simply
copy the address and mask. For lesser alignments, check that we don't copy the address and mask. For lesser alignments, check that we don't

View File

@ -1202,14 +1202,6 @@ static int tcg_out_call_iarg_reg2(TCGContext *s, int i, TCGReg al, TCGReg ah)
return i; return i;
} }
/* We expect tlb_mask to be before tlb_table. */
QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table) <
offsetof(CPUArchState, tlb_mask));
/* We expect tlb_mask to be "near" tlb_table. */
QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table) -
offsetof(CPUArchState, tlb_mask) >= 0x8000);
/* /*
* Perform the tlb comparison operation. * Perform the tlb comparison operation.
* The complete host address is placed in BASE. * The complete host address is placed in BASE.
@ -1223,8 +1215,8 @@ static void tcg_out_tlb_load(TCGContext *s, TCGReg base, TCGReg addrl,
unsigned s_bits = opc & MO_SIZE; unsigned s_bits = opc & MO_SIZE;
unsigned a_bits = get_alignment_bits(opc); unsigned a_bits = get_alignment_bits(opc);
int mem_index = get_mmuidx(oi); int mem_index = get_mmuidx(oi);
int mask_off = offsetof(CPUArchState, tlb_mask[mem_index]); int mask_off = offsetof(CPUArchState, tlb_.f[mem_index].mask);
int table_off = offsetof(CPUArchState, tlb_table[mem_index]); int table_off = offsetof(CPUArchState, tlb_.f[mem_index].mask);
int add_off = offsetof(CPUTLBEntry, addend); int add_off = offsetof(CPUTLBEntry, addend);
int cmp_off = (is_load ? offsetof(CPUTLBEntry, addr_read) int cmp_off = (is_load ? offsetof(CPUTLBEntry, addr_read)
: offsetof(CPUTLBEntry, addr_write)); : offsetof(CPUTLBEntry, addr_write));

View File

@ -1498,10 +1498,6 @@ static void * const qemu_st_helpers[16] = {
[MO_BEQ] = helper_be_stq_mmu, [MO_BEQ] = helper_be_stq_mmu,
}; };
/* We expect tlb_mask to be before tlb_table. */
QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table) <
offsetof(CPUArchState, tlb_mask));
/* Perform the TLB load and compare. Places the result of the comparison /* Perform the TLB load and compare. Places the result of the comparison
in CR7, loads the addend of the TLB into R3, and returns the register in CR7, loads the addend of the TLB into R3, and returns the register
containing the guest address (zero-extended into R4). Clobbers R0 and R2. */ containing the guest address (zero-extended into R4). Clobbers R0 and R2. */
@ -1514,8 +1510,8 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGMemOp opc,
= (is_read = (is_read
? offsetof(CPUTLBEntry, addr_read) ? offsetof(CPUTLBEntry, addr_read)
: offsetof(CPUTLBEntry, addr_write)); : offsetof(CPUTLBEntry, addr_write));
int mask_off = offsetof(CPUArchState, tlb_mask[mem_index]); int mask_off = offsetof(CPUArchState, tlb_.f[mem_index].mask);
int table_off = offsetof(CPUArchState, tlb_table[mem_index]); int table_off = offsetof(CPUArchState, tlb_.f[mem_index].table);
TCGReg mask_base = TCG_AREG0, table_base = TCG_AREG0; TCGReg mask_base = TCG_AREG0, table_base = TCG_AREG0;
unsigned s_bits = opc & MO_SIZE; unsigned s_bits = opc & MO_SIZE;
unsigned a_bits = get_alignment_bits(opc); unsigned a_bits = get_alignment_bits(opc);

View File

@ -962,14 +962,6 @@ static void * const qemu_st_helpers[16] = {
/* We don't support oversize guests */ /* We don't support oversize guests */
QEMU_BUILD_BUG_ON(TCG_TARGET_REG_BITS < TARGET_LONG_BITS); QEMU_BUILD_BUG_ON(TCG_TARGET_REG_BITS < TARGET_LONG_BITS);
/* We expect tlb_mask to be before tlb_table. */
QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table) <
offsetof(CPUArchState, tlb_mask));
/* We expect tlb_mask to be "near" tlb_table. */
QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table) -
offsetof(CPUArchState, tlb_mask) >= 0x800);
static void tcg_out_tlb_load(TCGContext *s, TCGReg addrl, static void tcg_out_tlb_load(TCGContext *s, TCGReg addrl,
TCGReg addrh, TCGMemOpIdx oi, TCGReg addrh, TCGMemOpIdx oi,
tcg_insn_unit **label_ptr, bool is_load) tcg_insn_unit **label_ptr, bool is_load)
@ -982,8 +974,8 @@ static void tcg_out_tlb_load(TCGContext *s, TCGReg addrl,
int mask_off, table_off; int mask_off, table_off;
TCGReg mask_base = TCG_AREG0, table_base = TCG_AREG0; TCGReg mask_base = TCG_AREG0, table_base = TCG_AREG0;
mask_off = offsetof(CPUArchState, tlb_mask[mem_index]); mask_off = offsetof(CPUArchState, tlb_.f[mem_index].mask);
table_off = offsetof(CPUArchState, tlb_table[mem_index]); table_off = offsetof(CPUArchState, tlb_.f[mem_index].table);
if (table_off > 0x7ff) { if (table_off > 0x7ff) {
int mask_hi = mask_off - sextreg(mask_off, 0, 12); int mask_hi = mask_off - sextreg(mask_off, 0, 12);
int table_hi = table_off - sextreg(table_off, 0, 12); int table_hi = table_off - sextreg(table_off, 0, 12);

View File

@ -1539,9 +1539,7 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGMemOp opc, TCGReg data,
#include "tcg-ldst.inc.c" #include "tcg-ldst.inc.c"
/* We're expecting to use a 20-bit signed offset on the tlb memory ops. */ /* We're expecting to use a 20-bit signed offset on the tlb memory ops. */
QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_mask[NB_MMU_MODES - 1]) QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_.f[NB_MMU_MODES - 1].table)
> 0x7ffff);
QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1])
> 0x7ffff); > 0x7ffff);
/* Load and compare a TLB entry, leaving the flags set. Loads the TLB /* Load and compare a TLB entry, leaving the flags set. Loads the TLB
@ -1553,8 +1551,8 @@ static TCGReg tcg_out_tlb_read(TCGContext* s, TCGReg addr_reg, TCGMemOp opc,
unsigned a_bits = get_alignment_bits(opc); unsigned a_bits = get_alignment_bits(opc);
unsigned s_mask = (1 << s_bits) - 1; unsigned s_mask = (1 << s_bits) - 1;
unsigned a_mask = (1 << a_bits) - 1; unsigned a_mask = (1 << a_bits) - 1;
int mask_off = offsetof(CPUArchState, tlb_mask[mem_index]); int mask_off = offsetof(CPUArchState, tlb_.f[mem_index].mask);
int table_off = offsetof(CPUArchState, tlb_table[mem_index]); int table_off = offsetof(CPUArchState, tlb_.f[mem_index].table);
int ofs, a_off; int ofs, a_off;
uint64_t tlb_mask; uint64_t tlb_mask;

View File

@ -1075,19 +1075,11 @@ static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
The result of the TLB comparison is in %[ix]cc. The sanitized address The result of the TLB comparison is in %[ix]cc. The sanitized address
is in the returned register, maybe %o0. The TLB addend is in %o1. */ is in the returned register, maybe %o0. The TLB addend is in %o1. */
/* We expect tlb_mask to be before tlb_table. */
QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table) <
offsetof(CPUArchState, tlb_mask));
/* We expect tlb_mask to be "near" tlb_table. */
QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table) -
offsetof(CPUArchState, tlb_mask) >= (1 << 13));
static TCGReg tcg_out_tlb_load(TCGContext *s, TCGReg addr, int mem_index, static TCGReg tcg_out_tlb_load(TCGContext *s, TCGReg addr, int mem_index,
TCGMemOp opc, int which) TCGMemOp opc, int which)
{ {
int mask_off = offsetof(CPUArchState, tlb_mask[mem_index]); int mask_off = offsetof(CPUArchState, tlb_.f[mem_index].mask);
int table_off = offsetof(CPUArchState, tlb_table[mem_index]); int table_off = offsetof(CPUArchState, tlb_.f[mem_index].table);
TCGReg base = TCG_AREG0; TCGReg base = TCG_AREG0;
const TCGReg r0 = TCG_REG_O0; const TCGReg r0 = TCG_REG_O0;
const TCGReg r1 = TCG_REG_O1; const TCGReg r1 = TCG_REG_O1;