From 7b7d00e0a714e0bdcd4c8a76f0927e1c8f1b2121 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 11 Nov 2019 14:53:30 +0100 Subject: [PATCH 01/16] cputlb: Handle NB_MMU_MODES > TARGET_PAGE_BITS_MIN In target/arm we will shortly have "too many" mmu_idx. The current minimum barrier is caused by the way in which tlb_flush_page_by_mmuidx is coded. We can remove this limitation by allocating memory for consumption by the worker. Let us assume that this is the unlikely case, as will be the case for the majority of targets which have so far satisfied the BUILD_BUG_ON, and only allocate memory when necessary. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson --- accel/tcg/cputlb.c | 167 +++++++++++++++++++++++++++++++++++---------- 1 file changed, 132 insertions(+), 35 deletions(-) diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index a991ea2964..02b381cf50 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -449,28 +449,29 @@ static void tlb_flush_page_locked(CPUArchState *env, int midx, } } -/* As we are going to hijack the bottom bits of the page address for a - * mmuidx bit mask we need to fail to build if we can't do that +/** + * tlb_flush_page_by_mmuidx_async_0: + * @cpu: cpu on which to flush + * @addr: page of virtual address to flush + * @idxmap: set of mmu_idx to flush + * + * Helper for tlb_flush_page_by_mmuidx and friends, flush one page + * at @addr from the tlbs indicated by @idxmap from @cpu. */ -QEMU_BUILD_BUG_ON(NB_MMU_MODES > TARGET_PAGE_BITS_MIN); - -static void tlb_flush_page_by_mmuidx_async_work(CPUState *cpu, - run_on_cpu_data data) +static void tlb_flush_page_by_mmuidx_async_0(CPUState *cpu, + target_ulong addr, + uint16_t idxmap) { CPUArchState *env = cpu->env_ptr; - target_ulong addr_and_mmuidx = (target_ulong) data.target_ptr; - target_ulong addr = addr_and_mmuidx & TARGET_PAGE_MASK; - unsigned long mmu_idx_bitmap = addr_and_mmuidx & ALL_MMUIDX_BITS; int mmu_idx; assert_cpu_is_self(cpu); - tlb_debug("page addr:" TARGET_FMT_lx " mmu_map:0x%lx\n", - addr, mmu_idx_bitmap); + tlb_debug("page addr:" TARGET_FMT_lx " mmu_map:0x%x\n", addr, idxmap); qemu_spin_lock(&env_tlb(env)->c.lock); for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { - if (test_bit(mmu_idx, &mmu_idx_bitmap)) { + if ((idxmap >> mmu_idx) & 1) { tlb_flush_page_locked(env, mmu_idx, addr); } } @@ -479,22 +480,75 @@ static void tlb_flush_page_by_mmuidx_async_work(CPUState *cpu, tb_flush_jmp_cache(cpu, addr); } +/** + * tlb_flush_page_by_mmuidx_async_1: + * @cpu: cpu on which to flush + * @data: encoded addr + idxmap + * + * Helper for tlb_flush_page_by_mmuidx and friends, called through + * async_run_on_cpu. The idxmap parameter is encoded in the page + * offset of the target_ptr field. This limits the set of mmu_idx + * that can be passed via this method. + */ +static void tlb_flush_page_by_mmuidx_async_1(CPUState *cpu, + run_on_cpu_data data) +{ + target_ulong addr_and_idxmap = (target_ulong) data.target_ptr; + target_ulong addr = addr_and_idxmap & TARGET_PAGE_MASK; + uint16_t idxmap = addr_and_idxmap & ~TARGET_PAGE_MASK; + + tlb_flush_page_by_mmuidx_async_0(cpu, addr, idxmap); +} + +typedef struct { + target_ulong addr; + uint16_t idxmap; +} TLBFlushPageByMMUIdxData; + +/** + * tlb_flush_page_by_mmuidx_async_2: + * @cpu: cpu on which to flush + * @data: allocated addr + idxmap + * + * Helper for tlb_flush_page_by_mmuidx and friends, called through + * async_run_on_cpu. The addr+idxmap parameters are stored in a + * TLBFlushPageByMMUIdxData structure that has been allocated + * specifically for this helper. Free the structure when done. + */ +static void tlb_flush_page_by_mmuidx_async_2(CPUState *cpu, + run_on_cpu_data data) +{ + TLBFlushPageByMMUIdxData *d = data.host_ptr; + + tlb_flush_page_by_mmuidx_async_0(cpu, d->addr, d->idxmap); + g_free(d); +} + void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, uint16_t idxmap) { - target_ulong addr_and_mmu_idx; - tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%" PRIx16 "\n", addr, idxmap); /* This should already be page aligned */ - addr_and_mmu_idx = addr & TARGET_PAGE_MASK; - addr_and_mmu_idx |= idxmap; + addr &= TARGET_PAGE_MASK; - if (!qemu_cpu_is_self(cpu)) { - async_run_on_cpu(cpu, tlb_flush_page_by_mmuidx_async_work, - RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx)); + if (qemu_cpu_is_self(cpu)) { + tlb_flush_page_by_mmuidx_async_0(cpu, addr, idxmap); + } else if (idxmap < TARGET_PAGE_SIZE) { + /* + * Most targets have only a few mmu_idx. In the case where + * we can stuff idxmap into the low TARGET_PAGE_BITS, avoid + * allocating memory for this operation. + */ + async_run_on_cpu(cpu, tlb_flush_page_by_mmuidx_async_1, + RUN_ON_CPU_TARGET_PTR(addr | idxmap)); } else { - tlb_flush_page_by_mmuidx_async_work( - cpu, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx)); + TLBFlushPageByMMUIdxData *d = g_new(TLBFlushPageByMMUIdxData, 1); + + /* Otherwise allocate a structure, freed by the worker. */ + d->addr = addr; + d->idxmap = idxmap; + async_run_on_cpu(cpu, tlb_flush_page_by_mmuidx_async_2, + RUN_ON_CPU_HOST_PTR(d)); } } @@ -506,17 +560,36 @@ void tlb_flush_page(CPUState *cpu, target_ulong addr) void tlb_flush_page_by_mmuidx_all_cpus(CPUState *src_cpu, target_ulong addr, uint16_t idxmap) { - const run_on_cpu_func fn = tlb_flush_page_by_mmuidx_async_work; - target_ulong addr_and_mmu_idx; - tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap); /* This should already be page aligned */ - addr_and_mmu_idx = addr & TARGET_PAGE_MASK; - addr_and_mmu_idx |= idxmap; + addr &= TARGET_PAGE_MASK; - flush_all_helper(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx)); - fn(src_cpu, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx)); + /* + * Allocate memory to hold addr+idxmap only when needed. + * See tlb_flush_page_by_mmuidx for details. + */ + if (idxmap < TARGET_PAGE_SIZE) { + flush_all_helper(src_cpu, tlb_flush_page_by_mmuidx_async_1, + RUN_ON_CPU_TARGET_PTR(addr | idxmap)); + } else { + CPUState *dst_cpu; + + /* Allocate a separate data block for each destination cpu. */ + CPU_FOREACH(dst_cpu) { + if (dst_cpu != src_cpu) { + TLBFlushPageByMMUIdxData *d + = g_new(TLBFlushPageByMMUIdxData, 1); + + d->addr = addr; + d->idxmap = idxmap; + async_run_on_cpu(dst_cpu, tlb_flush_page_by_mmuidx_async_2, + RUN_ON_CPU_HOST_PTR(d)); + } + } + } + + tlb_flush_page_by_mmuidx_async_0(src_cpu, addr, idxmap); } void tlb_flush_page_all_cpus(CPUState *src, target_ulong addr) @@ -528,17 +601,41 @@ void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *src_cpu, target_ulong addr, uint16_t idxmap) { - const run_on_cpu_func fn = tlb_flush_page_by_mmuidx_async_work; - target_ulong addr_and_mmu_idx; - tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap); /* This should already be page aligned */ - addr_and_mmu_idx = addr & TARGET_PAGE_MASK; - addr_and_mmu_idx |= idxmap; + addr &= TARGET_PAGE_MASK; - flush_all_helper(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx)); - async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx)); + /* + * Allocate memory to hold addr+idxmap only when needed. + * See tlb_flush_page_by_mmuidx for details. + */ + if (idxmap < TARGET_PAGE_SIZE) { + flush_all_helper(src_cpu, tlb_flush_page_by_mmuidx_async_1, + RUN_ON_CPU_TARGET_PTR(addr | idxmap)); + async_safe_run_on_cpu(src_cpu, tlb_flush_page_by_mmuidx_async_1, + RUN_ON_CPU_TARGET_PTR(addr | idxmap)); + } else { + CPUState *dst_cpu; + TLBFlushPageByMMUIdxData *d; + + /* Allocate a separate data block for each destination cpu. */ + CPU_FOREACH(dst_cpu) { + if (dst_cpu != src_cpu) { + d = g_new(TLBFlushPageByMMUIdxData, 1); + d->addr = addr; + d->idxmap = idxmap; + async_run_on_cpu(dst_cpu, tlb_flush_page_by_mmuidx_async_2, + RUN_ON_CPU_HOST_PTR(d)); + } + } + + d = g_new(TLBFlushPageByMMUIdxData, 1); + d->addr = addr; + d->idxmap = idxmap; + async_safe_run_on_cpu(src_cpu, tlb_flush_page_by_mmuidx_async_2, + RUN_ON_CPU_HOST_PTR(d)); + } } void tlb_flush_page_all_cpus_synced(CPUState *src, target_ulong addr) From 00b5032eaddb7193f03f0a28b10286244d2e2a7b Mon Sep 17 00:00:00 2001 From: Carlos Santos Date: Thu, 17 Oct 2019 09:37:13 -0300 Subject: [PATCH 02/16] util/cacheinfo: fix crash when compiling with uClibc uClibc defines _SC_LEVEL1_ICACHE_LINESIZE and _SC_LEVEL1_DCACHE_LINESIZE but the corresponding sysconf calls returns -1, which is a valid result, meaning that the limit is indeterminate. Handle this situation using the fallback values instead of crashing due to an assertion failure. Signed-off-by: Carlos Santos Message-Id: <20191017123713.30192-1-casantos@redhat.com> Signed-off-by: Richard Henderson --- util/cacheinfo.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/util/cacheinfo.c b/util/cacheinfo.c index ea6f3e99bf..d94dc6adc8 100644 --- a/util/cacheinfo.c +++ b/util/cacheinfo.c @@ -93,10 +93,16 @@ static void sys_cache_info(int *isize, int *dsize) static void sys_cache_info(int *isize, int *dsize) { # ifdef _SC_LEVEL1_ICACHE_LINESIZE - *isize = sysconf(_SC_LEVEL1_ICACHE_LINESIZE); + int tmp_isize = (int) sysconf(_SC_LEVEL1_ICACHE_LINESIZE); + if (tmp_isize > 0) { + *isize = tmp_isize; + } # endif # ifdef _SC_LEVEL1_DCACHE_LINESIZE - *dsize = sysconf(_SC_LEVEL1_DCACHE_LINESIZE); + int tmp_dsize = (int) sysconf(_SC_LEVEL1_DCACHE_LINESIZE); + if (tmp_dsize > 0) { + *dsize = tmp_dsize; + } # endif } #endif /* sys_cache_info */ From 7970dc12e9b0688e3bed7dd366e76532fea501f7 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 9 Jan 2020 12:10:00 +1100 Subject: [PATCH 03/16] vl: Remove unused variable in configure_accelerators MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The accel_initialised variable no longer has any setters. Fixes: 6f6e1698a68c Acked-by: Paolo Bonzini Reviewed-by: Alex Bennée Reviewed-by: Philippe Mathieu-Daudé Reviewed by: Aleksandar Markovic Signed-off-by: Richard Henderson --- vl.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/vl.c b/vl.c index 71d3e7eefb..da5bf2de27 100644 --- a/vl.c +++ b/vl.c @@ -2756,7 +2756,6 @@ static void configure_accelerators(const char *progname) { const char *accel; char **accel_list, **tmp; - bool accel_initialised = false; bool init_failed = false; qemu_opts_foreach(qemu_find_opts("icount"), @@ -2783,7 +2782,7 @@ static void configure_accelerators(const char *progname) accel_list = g_strsplit(accel, ":", 0); - for (tmp = accel_list; !accel_initialised && tmp && *tmp; tmp++) { + for (tmp = accel_list; tmp && *tmp; tmp++) { /* * Filter invalid accelerators here, to prevent obscenities * such as "-machine accel=tcg,,thread=single". From 60ee355276a17f21aec4bfa9bdffa16102aa0b8b Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 9 Jan 2020 12:14:32 +1100 Subject: [PATCH 04/16] vl: Reduce scope of variables in configure_accelerators MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The accel_list and tmp variables are only used when manufacturing -machine accel, options based on -accel. Acked-by: Paolo Bonzini Reviewed-by: Alex Bennée Reviewed by: Aleksandar Markovic Signed-off-by: Richard Henderson --- vl.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vl.c b/vl.c index da5bf2de27..94c9301e7f 100644 --- a/vl.c +++ b/vl.c @@ -2755,7 +2755,6 @@ static int do_configure_accelerator(void *opaque, QemuOpts *opts, Error **errp) static void configure_accelerators(const char *progname) { const char *accel; - char **accel_list, **tmp; bool init_failed = false; qemu_opts_foreach(qemu_find_opts("icount"), @@ -2763,6 +2762,8 @@ static void configure_accelerators(const char *progname) accel = qemu_opt_get(qemu_get_machine_opts(), "accel"); if (QTAILQ_EMPTY(&qemu_accel_opts.head)) { + char **accel_list, **tmp; + if (accel == NULL) { /* Select the default accelerator */ if (!accel_find("tcg") && !accel_find("kvm")) { From a024b0906779e4355b6d04f67c01d1bc0dcc2699 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 9 Jan 2020 13:10:07 +1100 Subject: [PATCH 05/16] vl: Remove useless test in configure_accelerators MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The result of g_strsplit is never NULL. Acked-by: Paolo Bonzini Reviewed-by: Alex Bennée Reviewed-by: Philippe Mathieu-Daudé Reviewed by: Aleksandar Markovic Signed-off-by: Richard Henderson --- vl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vl.c b/vl.c index 94c9301e7f..1e5e9b9aeb 100644 --- a/vl.c +++ b/vl.c @@ -2783,7 +2783,7 @@ static void configure_accelerators(const char *progname) accel_list = g_strsplit(accel, ":", 0); - for (tmp = accel_list; tmp && *tmp; tmp++) { + for (tmp = accel_list; *tmp; tmp++) { /* * Filter invalid accelerators here, to prevent obscenities * such as "-machine accel=tcg,,thread=single". From 755ee1f301b30d2cd248e162e3a438473eed3767 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 9 Jan 2020 12:07:30 +1100 Subject: [PATCH 06/16] vl: Only choose enabled accelerators in configure_accelerators MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit By choosing "tcg:kvm" when kvm is not enabled, we generate an incorrect warning: "invalid accelerator kvm". At the same time, use g_str_has_suffix rather than open-coding the same operation. Presumably the inverse is also true with --disable-tcg. Fixes: 28a0961757fc Acked-by: Paolo Bonzini Reviewed-by: Alex Bennée Reviewed by: Aleksandar Markovic Signed-off-by: Richard Henderson --- vl.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/vl.c b/vl.c index 1e5e9b9aeb..4c5033842c 100644 --- a/vl.c +++ b/vl.c @@ -2766,21 +2766,26 @@ static void configure_accelerators(const char *progname) if (accel == NULL) { /* Select the default accelerator */ - if (!accel_find("tcg") && !accel_find("kvm")) { - error_report("No accelerator selected and" - " no default accelerator available"); - exit(1); - } else { - int pnlen = strlen(progname); - if (pnlen >= 3 && g_str_equal(&progname[pnlen - 3], "kvm")) { + bool have_tcg = accel_find("tcg"); + bool have_kvm = accel_find("kvm"); + + if (have_tcg && have_kvm) { + if (g_str_has_suffix(progname, "kvm")) { /* If the program name ends with "kvm", we prefer KVM */ accel = "kvm:tcg"; } else { accel = "tcg:kvm"; } + } else if (have_kvm) { + accel = "kvm"; + } else if (have_tcg) { + accel = "tcg"; + } else { + error_report("No accelerator selected and" + " no default accelerator available"); + exit(1); } } - accel_list = g_strsplit(accel, ":", 0); for (tmp = accel_list; *tmp; tmp++) { From f1293145d6063296bc4221269e8dec258cf6a33b Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 7 Dec 2019 11:07:05 -0800 Subject: [PATCH 07/16] cputlb: Merge tlb_table_flush_by_mmuidx into tlb_flush_one_mmuidx_locked MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is only one caller for tlb_table_flush_by_mmuidx. Place the result at the earlier line number, due to an expected user in the near future. Reviewed-by: Alex Bennée Reviewed-by: Alistair Francis Signed-off-by: Richard Henderson --- accel/tcg/cputlb.c | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index 02b381cf50..3a4881cf69 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -224,11 +224,16 @@ static void tlb_mmu_resize_locked(CPUArchState *env, int mmu_idx) } } -static inline void tlb_table_flush_by_mmuidx(CPUArchState *env, int mmu_idx) +static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx) { tlb_mmu_resize_locked(env, mmu_idx); - memset(env_tlb(env)->f[mmu_idx].table, -1, sizeof_tlb(env, mmu_idx)); env_tlb(env)->d[mmu_idx].n_used_entries = 0; + env_tlb(env)->d[mmu_idx].large_page_addr = -1; + env_tlb(env)->d[mmu_idx].large_page_mask = -1; + env_tlb(env)->d[mmu_idx].vindex = 0; + memset(env_tlb(env)->f[mmu_idx].table, -1, sizeof_tlb(env, mmu_idx)); + memset(env_tlb(env)->d[mmu_idx].vtable, -1, + sizeof(env_tlb(env)->d[0].vtable)); } static inline void tlb_n_used_entries_inc(CPUArchState *env, uintptr_t mmu_idx) @@ -289,16 +294,6 @@ void tlb_flush_counts(size_t *pfull, size_t *ppart, size_t *pelide) *pelide = elide; } -static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx) -{ - tlb_table_flush_by_mmuidx(env, mmu_idx); - env_tlb(env)->d[mmu_idx].large_page_addr = -1; - env_tlb(env)->d[mmu_idx].large_page_mask = -1; - env_tlb(env)->d[mmu_idx].vindex = 0; - memset(env_tlb(env)->d[mmu_idx].vtable, -1, - sizeof(env_tlb(env)->d[0].vtable)); -} - static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data) { CPUArchState *env = cpu->env_ptr; From 7a1efe1b97c4f68f2f78cbca2e512324ed07524d Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 7 Dec 2019 11:37:57 -0800 Subject: [PATCH 08/16] cputlb: Make tlb_n_entries private to cputlb.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are no users of this function outside cputlb.c, and its interface will change in the next patch. Reviewed-by: Alex Bennée Reviewed-by: Alistair Francis Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- accel/tcg/cputlb.c | 5 +++++ include/exec/cpu_ldst.h | 5 ----- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index 3a4881cf69..500c56d74d 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -80,6 +80,11 @@ QEMU_BUILD_BUG_ON(sizeof(target_ulong) > sizeof(run_on_cpu_data)); QEMU_BUILD_BUG_ON(NB_MMU_MODES > 16); #define ALL_MMUIDX_BITS ((1 << NB_MMU_MODES) - 1) +static inline size_t tlb_n_entries(CPUArchState *env, uintptr_t mmu_idx) +{ + return (env_tlb(env)->f[mmu_idx].mask >> CPU_TLB_ENTRY_BITS) + 1; +} + static inline size_t sizeof_tlb(CPUArchState *env, uintptr_t mmu_idx) { return env_tlb(env)->f[mmu_idx].mask + (1 << CPU_TLB_ENTRY_BITS); diff --git a/include/exec/cpu_ldst.h b/include/exec/cpu_ldst.h index a46116167c..53de19753a 100644 --- a/include/exec/cpu_ldst.h +++ b/include/exec/cpu_ldst.h @@ -234,11 +234,6 @@ static inline uintptr_t tlb_index(CPUArchState *env, uintptr_t mmu_idx, return (addr >> TARGET_PAGE_BITS) & size_mask; } -static inline size_t tlb_n_entries(CPUArchState *env, uintptr_t mmu_idx) -{ - return (env_tlb(env)->f[mmu_idx].mask >> CPU_TLB_ENTRY_BITS) + 1; -} - /* Find the TLB entry corresponding to the mmu_idx + address pair. */ static inline CPUTLBEntry *tlb_entry(CPUArchState *env, uintptr_t mmu_idx, target_ulong addr) From 722a1c1e97c1edeca5ad458ad2ce3441b47f0440 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 7 Dec 2019 11:47:41 -0800 Subject: [PATCH 09/16] cputlb: Pass CPUTLBDescFast to tlb_n_entries and sizeof_tlb MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We do not need the entire CPUArchState to compute these values. Reviewed-by: Alex Bennée Reviewed-by: Alistair Francis Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- accel/tcg/cputlb.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index 500c56d74d..cc6f4c994b 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -80,14 +80,14 @@ QEMU_BUILD_BUG_ON(sizeof(target_ulong) > sizeof(run_on_cpu_data)); QEMU_BUILD_BUG_ON(NB_MMU_MODES > 16); #define ALL_MMUIDX_BITS ((1 << NB_MMU_MODES) - 1) -static inline size_t tlb_n_entries(CPUArchState *env, uintptr_t mmu_idx) +static inline size_t tlb_n_entries(CPUTLBDescFast *fast) { - return (env_tlb(env)->f[mmu_idx].mask >> CPU_TLB_ENTRY_BITS) + 1; + return (fast->mask >> CPU_TLB_ENTRY_BITS) + 1; } -static inline size_t sizeof_tlb(CPUArchState *env, uintptr_t mmu_idx) +static inline size_t sizeof_tlb(CPUTLBDescFast *fast) { - return env_tlb(env)->f[mmu_idx].mask + (1 << CPU_TLB_ENTRY_BITS); + return fast->mask + (1 << CPU_TLB_ENTRY_BITS); } static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns, @@ -156,7 +156,7 @@ static void tlb_dyn_init(CPUArchState *env) static void tlb_mmu_resize_locked(CPUArchState *env, int mmu_idx) { CPUTLBDesc *desc = &env_tlb(env)->d[mmu_idx]; - size_t old_size = tlb_n_entries(env, mmu_idx); + size_t old_size = tlb_n_entries(&env_tlb(env)->f[mmu_idx]); size_t rate; size_t new_size = old_size; int64_t now = get_clock_realtime(); @@ -236,7 +236,8 @@ static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx) env_tlb(env)->d[mmu_idx].large_page_addr = -1; env_tlb(env)->d[mmu_idx].large_page_mask = -1; env_tlb(env)->d[mmu_idx].vindex = 0; - memset(env_tlb(env)->f[mmu_idx].table, -1, sizeof_tlb(env, mmu_idx)); + memset(env_tlb(env)->f[mmu_idx].table, -1, + sizeof_tlb(&env_tlb(env)->f[mmu_idx])); memset(env_tlb(env)->d[mmu_idx].vtable, -1, sizeof(env_tlb(env)->d[0].vtable)); } @@ -719,7 +720,7 @@ void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length) qemu_spin_lock(&env_tlb(env)->c.lock); for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { unsigned int i; - unsigned int n = tlb_n_entries(env, mmu_idx); + unsigned int n = tlb_n_entries(&env_tlb(env)->f[mmu_idx]); for (i = 0; i < n; i++) { tlb_reset_dirty_range_locked(&env_tlb(env)->f[mmu_idx].table[i], From 71ccd47ba5509f36a3dee54fe009529b67ccbd7c Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 7 Dec 2019 11:58:50 -0800 Subject: [PATCH 10/16] cputlb: Hoist tlb portions in tlb_mmu_resize_locked MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit No functional change, but the smaller expressions make the code easier to read. Reviewed-by: Alex Bennée Reviewed-by: Alistair Francis Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- accel/tcg/cputlb.c | 35 +++++++++++++++++------------------ 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index cc6f4c994b..5252bcb949 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -115,8 +115,8 @@ static void tlb_dyn_init(CPUArchState *env) /** * tlb_mmu_resize_locked() - perform TLB resize bookkeeping; resize if necessary - * @env: CPU that owns the TLB - * @mmu_idx: MMU index of the TLB + * @desc: The CPUTLBDesc portion of the TLB + * @fast: The CPUTLBDescFast portion of the same TLB * * Called with tlb_lock_held. * @@ -153,10 +153,9 @@ static void tlb_dyn_init(CPUArchState *env) * high), since otherwise we are likely to have a significant amount of * conflict misses. */ -static void tlb_mmu_resize_locked(CPUArchState *env, int mmu_idx) +static void tlb_mmu_resize_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast) { - CPUTLBDesc *desc = &env_tlb(env)->d[mmu_idx]; - size_t old_size = tlb_n_entries(&env_tlb(env)->f[mmu_idx]); + size_t old_size = tlb_n_entries(fast); size_t rate; size_t new_size = old_size; int64_t now = get_clock_realtime(); @@ -198,14 +197,15 @@ static void tlb_mmu_resize_locked(CPUArchState *env, int mmu_idx) return; } - g_free(env_tlb(env)->f[mmu_idx].table); - g_free(env_tlb(env)->d[mmu_idx].iotlb); + g_free(fast->table); + g_free(desc->iotlb); tlb_window_reset(desc, now, 0); /* desc->n_used_entries is cleared by the caller */ - env_tlb(env)->f[mmu_idx].mask = (new_size - 1) << CPU_TLB_ENTRY_BITS; - env_tlb(env)->f[mmu_idx].table = g_try_new(CPUTLBEntry, new_size); - env_tlb(env)->d[mmu_idx].iotlb = g_try_new(CPUIOTLBEntry, new_size); + fast->mask = (new_size - 1) << CPU_TLB_ENTRY_BITS; + fast->table = g_try_new(CPUTLBEntry, new_size); + desc->iotlb = g_try_new(CPUIOTLBEntry, new_size); + /* * If the allocations fail, try smaller sizes. We just freed some * memory, so going back to half of new_size has a good chance of working. @@ -213,25 +213,24 @@ static void tlb_mmu_resize_locked(CPUArchState *env, int mmu_idx) * allocations to fail though, so we progressively reduce the allocation * size, aborting if we cannot even allocate the smallest TLB we support. */ - while (env_tlb(env)->f[mmu_idx].table == NULL || - env_tlb(env)->d[mmu_idx].iotlb == NULL) { + while (fast->table == NULL || desc->iotlb == NULL) { if (new_size == (1 << CPU_TLB_DYN_MIN_BITS)) { error_report("%s: %s", __func__, strerror(errno)); abort(); } new_size = MAX(new_size >> 1, 1 << CPU_TLB_DYN_MIN_BITS); - env_tlb(env)->f[mmu_idx].mask = (new_size - 1) << CPU_TLB_ENTRY_BITS; + fast->mask = (new_size - 1) << CPU_TLB_ENTRY_BITS; - g_free(env_tlb(env)->f[mmu_idx].table); - g_free(env_tlb(env)->d[mmu_idx].iotlb); - env_tlb(env)->f[mmu_idx].table = g_try_new(CPUTLBEntry, new_size); - env_tlb(env)->d[mmu_idx].iotlb = g_try_new(CPUIOTLBEntry, new_size); + g_free(fast->table); + g_free(desc->iotlb); + fast->table = g_try_new(CPUTLBEntry, new_size); + desc->iotlb = g_try_new(CPUIOTLBEntry, new_size); } } static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx) { - tlb_mmu_resize_locked(env, mmu_idx); + tlb_mmu_resize_locked(&env_tlb(env)->d[mmu_idx], &env_tlb(env)->f[mmu_idx]); env_tlb(env)->d[mmu_idx].n_used_entries = 0; env_tlb(env)->d[mmu_idx].large_page_addr = -1; env_tlb(env)->d[mmu_idx].large_page_mask = -1; From 5c948e3175e87620f5330e18873e1f8190f10ec0 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 7 Dec 2019 12:00:56 -0800 Subject: [PATCH 11/16] cputlb: Hoist tlb portions in tlb_flush_one_mmuidx_locked MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit No functional change, but the smaller expressions make the code easier to read. Reviewed-by: Alex Bennée Reviewed-by: Alistair Francis Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- accel/tcg/cputlb.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index 5252bcb949..bffca0e7e7 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -230,15 +230,16 @@ static void tlb_mmu_resize_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast) static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx) { - tlb_mmu_resize_locked(&env_tlb(env)->d[mmu_idx], &env_tlb(env)->f[mmu_idx]); - env_tlb(env)->d[mmu_idx].n_used_entries = 0; - env_tlb(env)->d[mmu_idx].large_page_addr = -1; - env_tlb(env)->d[mmu_idx].large_page_mask = -1; - env_tlb(env)->d[mmu_idx].vindex = 0; - memset(env_tlb(env)->f[mmu_idx].table, -1, - sizeof_tlb(&env_tlb(env)->f[mmu_idx])); - memset(env_tlb(env)->d[mmu_idx].vtable, -1, - sizeof(env_tlb(env)->d[0].vtable)); + CPUTLBDesc *desc = &env_tlb(env)->d[mmu_idx]; + CPUTLBDescFast *fast = &env_tlb(env)->f[mmu_idx]; + + tlb_mmu_resize_locked(desc, fast); + desc->n_used_entries = 0; + desc->large_page_addr = -1; + desc->large_page_mask = -1; + desc->vindex = 0; + memset(fast->table, -1, sizeof_tlb(fast)); + memset(desc->vtable, -1, sizeof(desc->vtable)); } static inline void tlb_n_used_entries_inc(CPUArchState *env, uintptr_t mmu_idx) From bbf021b04a57e95b6b4fde882b33c5363f94373f Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 7 Dec 2019 12:08:04 -0800 Subject: [PATCH 12/16] cputlb: Split out tlb_mmu_flush_locked MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We will want to be able to flush a tlb without resizing. Reviewed-by: Alex Bennée Reviewed-by: Alistair Francis Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- accel/tcg/cputlb.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index bffca0e7e7..28cfff1556 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -228,12 +228,8 @@ static void tlb_mmu_resize_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast) } } -static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx) +static void tlb_mmu_flush_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast) { - CPUTLBDesc *desc = &env_tlb(env)->d[mmu_idx]; - CPUTLBDescFast *fast = &env_tlb(env)->f[mmu_idx]; - - tlb_mmu_resize_locked(desc, fast); desc->n_used_entries = 0; desc->large_page_addr = -1; desc->large_page_mask = -1; @@ -242,6 +238,15 @@ static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx) memset(desc->vtable, -1, sizeof(desc->vtable)); } +static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx) +{ + CPUTLBDesc *desc = &env_tlb(env)->d[mmu_idx]; + CPUTLBDescFast *fast = &env_tlb(env)->f[mmu_idx]; + + tlb_mmu_resize_locked(desc, fast); + tlb_mmu_flush_locked(desc, fast); +} + static inline void tlb_n_used_entries_inc(CPUArchState *env, uintptr_t mmu_idx) { env_tlb(env)->d[mmu_idx].n_used_entries++; From 56e89f76fdf0dc8162e28105055570a83a93b15e Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 7 Dec 2019 13:22:19 -0800 Subject: [PATCH 13/16] cputlb: Partially merge tlb_dyn_init into tlb_init MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Merge into the only caller, but at the same time split out tlb_mmu_init to initialize a single tlb entry. Reviewed-by: Alex Bennée Reviewed-by: Alistair Francis Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- accel/tcg/cputlb.c | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index 28cfff1556..360495468e 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -97,22 +97,6 @@ static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns, desc->window_max_entries = max_entries; } -static void tlb_dyn_init(CPUArchState *env) -{ - int i; - - for (i = 0; i < NB_MMU_MODES; i++) { - CPUTLBDesc *desc = &env_tlb(env)->d[i]; - size_t n_entries = 1 << CPU_TLB_DYN_DEFAULT_BITS; - - tlb_window_reset(desc, get_clock_realtime(), 0); - desc->n_used_entries = 0; - env_tlb(env)->f[i].mask = (n_entries - 1) << CPU_TLB_ENTRY_BITS; - env_tlb(env)->f[i].table = g_new(CPUTLBEntry, n_entries); - env_tlb(env)->d[i].iotlb = g_new(CPUIOTLBEntry, n_entries); - } -} - /** * tlb_mmu_resize_locked() - perform TLB resize bookkeeping; resize if necessary * @desc: The CPUTLBDesc portion of the TLB @@ -247,6 +231,17 @@ static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx) tlb_mmu_flush_locked(desc, fast); } +static void tlb_mmu_init(CPUTLBDesc *desc, CPUTLBDescFast *fast, int64_t now) +{ + size_t n_entries = 1 << CPU_TLB_DYN_DEFAULT_BITS; + + tlb_window_reset(desc, now, 0); + desc->n_used_entries = 0; + fast->mask = (n_entries - 1) << CPU_TLB_ENTRY_BITS; + fast->table = g_new(CPUTLBEntry, n_entries); + desc->iotlb = g_new(CPUIOTLBEntry, n_entries); +} + static inline void tlb_n_used_entries_inc(CPUArchState *env, uintptr_t mmu_idx) { env_tlb(env)->d[mmu_idx].n_used_entries++; @@ -260,13 +255,17 @@ static inline void tlb_n_used_entries_dec(CPUArchState *env, uintptr_t mmu_idx) void tlb_init(CPUState *cpu) { CPUArchState *env = cpu->env_ptr; + int64_t now = get_clock_realtime(); + int i; qemu_spin_init(&env_tlb(env)->c.lock); /* Ensure that cpu_reset performs a full flush. */ env_tlb(env)->c.dirty = ALL_MMUIDX_BITS; - tlb_dyn_init(env); + for (i = 0; i < NB_MMU_MODES; i++) { + tlb_mmu_init(&env_tlb(env)->d[i], &env_tlb(env)->f[i], now); + } } /* flush_all_helper: run fn across all cpus From 3c16304af4241f242eeacae646457b9720aa71db Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 9 Jan 2020 11:23:56 +1100 Subject: [PATCH 14/16] cputlb: Initialize tlbs as flushed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There's little point in leaving these data structures half initialized, and relying on a flush to be done during reset. Reviewed-by: Alex Bennée Reviewed-by: Alistair Francis Signed-off-by: Richard Henderson --- accel/tcg/cputlb.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index 360495468e..5a35386224 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -240,6 +240,7 @@ static void tlb_mmu_init(CPUTLBDesc *desc, CPUTLBDescFast *fast, int64_t now) fast->mask = (n_entries - 1) << CPU_TLB_ENTRY_BITS; fast->table = g_new(CPUTLBEntry, n_entries); desc->iotlb = g_new(CPUIOTLBEntry, n_entries); + tlb_mmu_flush_locked(desc, fast); } static inline void tlb_n_used_entries_inc(CPUArchState *env, uintptr_t mmu_idx) @@ -260,8 +261,8 @@ void tlb_init(CPUState *cpu) qemu_spin_init(&env_tlb(env)->c.lock); - /* Ensure that cpu_reset performs a full flush. */ - env_tlb(env)->c.dirty = ALL_MMUIDX_BITS; + /* All tlbs are initialized flushed. */ + env_tlb(env)->c.dirty = 0; for (i = 0; i < NB_MMU_MODES; i++) { tlb_mmu_init(&env_tlb(env)->d[i], &env_tlb(env)->f[i], now); From 3c3959f2d9af919e562d37f6d40322a80a90469d Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 7 Dec 2019 14:36:01 -0800 Subject: [PATCH 15/16] cputlb: Hoist timestamp outside of loops over tlbs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Do not call get_clock_realtime() in tlb_mmu_resize_locked, but hoist outside of any loop over a set of tlbs. This is only two (indirect) callers, tlb_flush_by_mmuidx_async_work and tlb_flush_page_locked, so not onerous. Reviewed-by: Alex Bennée Reviewed-by: Alistair Francis Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- accel/tcg/cputlb.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index 5a35386224..e3b5750c3b 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -137,12 +137,12 @@ static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns, * high), since otherwise we are likely to have a significant amount of * conflict misses. */ -static void tlb_mmu_resize_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast) +static void tlb_mmu_resize_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast, + int64_t now) { size_t old_size = tlb_n_entries(fast); size_t rate; size_t new_size = old_size; - int64_t now = get_clock_realtime(); int64_t window_len_ms = 100; int64_t window_len_ns = window_len_ms * 1000 * 1000; bool window_expired = now > desc->window_begin_ns + window_len_ns; @@ -222,12 +222,13 @@ static void tlb_mmu_flush_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast) memset(desc->vtable, -1, sizeof(desc->vtable)); } -static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx) +static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx, + int64_t now) { CPUTLBDesc *desc = &env_tlb(env)->d[mmu_idx]; CPUTLBDescFast *fast = &env_tlb(env)->f[mmu_idx]; - tlb_mmu_resize_locked(desc, fast); + tlb_mmu_resize_locked(desc, fast, now); tlb_mmu_flush_locked(desc, fast); } @@ -310,6 +311,7 @@ static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data) CPUArchState *env = cpu->env_ptr; uint16_t asked = data.host_int; uint16_t all_dirty, work, to_clean; + int64_t now = get_clock_realtime(); assert_cpu_is_self(cpu); @@ -324,7 +326,7 @@ static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data) for (work = to_clean; work != 0; work &= work - 1) { int mmu_idx = ctz32(work); - tlb_flush_one_mmuidx_locked(env, mmu_idx); + tlb_flush_one_mmuidx_locked(env, mmu_idx, now); } qemu_spin_unlock(&env_tlb(env)->c.lock); @@ -446,7 +448,7 @@ static void tlb_flush_page_locked(CPUArchState *env, int midx, tlb_debug("forcing full flush midx %d (" TARGET_FMT_lx "/" TARGET_FMT_lx ")\n", midx, lp_addr, lp_mask); - tlb_flush_one_mmuidx_locked(env, midx); + tlb_flush_one_mmuidx_locked(env, midx, get_clock_realtime()); } else { if (tlb_flush_entry_locked(tlb_entry(env, midx, page), page)) { tlb_n_used_entries_dec(env, midx); From 75fa376cdab5e5db2c7fdd107358e16f95503ac6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Mon, 30 Dec 2019 09:28:56 +0100 Subject: [PATCH 16/16] scripts/git.orderfile: Display decodetree before C source MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To avoid scrolling each instruction when reviewing tcg helpers written for the decodetree script, display the .decode files (similar to header declarations) before the C source (implementation of previous declarations). Signed-off-by: Philippe Mathieu-Daudé Reviewed-by: Stefano Garzarella Reviewed-by: Alex Bennée Message-Id: <20191230082856.30556-1-philmd@redhat.com> Signed-off-by: Richard Henderson --- scripts/git.orderfile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/scripts/git.orderfile b/scripts/git.orderfile index e89790941c..1f747b583a 100644 --- a/scripts/git.orderfile +++ b/scripts/git.orderfile @@ -25,5 +25,8 @@ qga/*.json # headers *.h +# decoding tree specification +*.decode + # code *.c