notdirty_write: fix store-related performance problems

Every store would always cause the tb_invalidate_phys_page_fast path to be invoked,
amounting to a 40x slowdown of stores compared to loads.

Change this code to only worry about TB invalidation for regions marked as
executable (i.e. emulated executable).

Even without uc_set_native_thunks, this change fixes most of the performance
issues seen with thunking to native calls.

Signed-off-by: Andrei Warkentin <andrei.warkentin@intel.com>
This commit is contained in:
Andrei Warkentin 2022-12-13 00:13:58 -06:00 committed by mio
parent 9f21566b53
commit d01035767e
No known key found for this signature in database
GPG Key ID: DFF27E34A47CB873
20 changed files with 76 additions and 23 deletions

View File

@ -787,6 +787,7 @@
#define tlb_protect_code tlb_protect_code_aarch64
#define tlb_unprotect_code tlb_unprotect_code_aarch64
#define tlb_reset_dirty tlb_reset_dirty_aarch64
#define tlb_reset_dirty_by_vaddr tlb_reset_dirty_by_vaddr_aarch64
#define tlb_set_dirty tlb_set_dirty_aarch64
#define tlb_set_page_with_attrs tlb_set_page_with_attrs_aarch64
#define tlb_set_page tlb_set_page_aarch64

View File

@ -661,6 +661,25 @@ static void tlb_reset_dirty_range_locked(struct uc_struct *uc, CPUTLBEntry *tlb_
}
}
static void tlb_reset_dirty_range_by_vaddr_locked(struct uc_struct *uc, CPUTLBEntry *tlb_entry,
target_ulong start, target_ulong length)
{
uintptr_t addr = tlb_entry->addr_write;
if ((addr & (TLB_INVALID_MASK | TLB_MMIO |
TLB_DISCARD_WRITE | TLB_NOTDIRTY)) == 0) {
addr &= TARGET_PAGE_MASK;
if ((addr - start) < length) {
#if TCG_OVERSIZED_GUEST
tlb_entry->addr_write |= TLB_NOTDIRTY;
#else
tlb_entry->addr_write = tlb_entry->addr_write | TLB_NOTDIRTY;
#endif
}
}
}
/*
* Called with tlb_c.lock held.
* Called only from the vCPU context, i.e. the TLB's owner thread.
@ -699,6 +718,30 @@ void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length)
}
}
void tlb_reset_dirty_by_vaddr(CPUState *cpu, target_ulong start1, target_ulong length)
{
struct uc_struct *uc = cpu->uc;
CPUArchState *env;
int mmu_idx;
env = cpu->env_ptr;
for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
unsigned int i;
unsigned int n = tlb_n_entries(&env_tlb(env)->f[mmu_idx]);
for (i = 0; i < n; i++) {
tlb_reset_dirty_range_by_vaddr_locked(uc, &env_tlb(env)->f[mmu_idx].table[i],
start1, length);
}
for (i = 0; i < CPU_VTLB_SIZE; i++) {
tlb_reset_dirty_range_by_vaddr_locked(uc, &env_tlb(env)->d[mmu_idx].vtable[i],
start1, length);
}
}
}
/* Called with tlb_c.lock held */
static inline void tlb_set_dirty1_locked(CPUTLBEntry *tlb_entry,
target_ulong vaddr)
@ -1144,30 +1187,24 @@ tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr)
}
static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size,
CPUIOTLBEntry *iotlbentry, uintptr_t retaddr)
CPUIOTLBEntry *iotlbentry, uintptr_t retaddr,
MemoryRegion *mr)
{
ram_addr_t ram_addr = mem_vaddr + iotlbentry->addr;
// trace_memory_notdirty_write_access(mem_vaddr, ram_addr, size);
if (mr == NULL) {
mr = memory_mapping(cpu->uc, mem_vaddr);
}
if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
if ((mr->perms & UC_PROT_EXEC) != 0) {
struct page_collection *pages
= page_collection_lock(cpu->uc, ram_addr, ram_addr + size);
tb_invalidate_phys_page_fast(cpu->uc, pages, ram_addr, size, retaddr);
page_collection_unlock(pages);
}
/*
* Set both VGA and migration bits for simplicity and to remove
* the notdirty callback faster.
*/
cpu_physical_memory_set_dirty_range(ram_addr, size, DIRTY_CLIENTS_NOCODE);
/* We remove the notdirty callback only if the code has been flushed. */
if (!cpu_physical_memory_is_clean(ram_addr)) {
// trace_memory_notdirty_set_dirty(mem_vaddr);
/* For exec pages, this is cleared in tb_gen_code. */
tlb_set_dirty(cpu, mem_vaddr);
}
}
/*
@ -1244,7 +1281,7 @@ void *probe_access(CPUArchState *env, target_ulong addr, int size,
/* Handle clean RAM pages. */
if (tlb_addr & TLB_NOTDIRTY) {
notdirty_write(env_cpu(env), addr, size, iotlbentry, retaddr);
notdirty_write(env_cpu(env), addr, size, iotlbentry, retaddr, NULL);
}
}
@ -1370,7 +1407,7 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
if (unlikely(tlb_addr & TLB_NOTDIRTY)) {
notdirty_write(env_cpu(env), addr, 1 << s_bits,
&env_tlb(env)->d[mmu_idx].iotlb[index], retaddr);
&env_tlb(env)->d[mmu_idx].iotlb[index], retaddr, NULL);
}
return hostaddr;
@ -2216,7 +2253,7 @@ store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
/* Handle clean RAM pages. */
if (tlb_addr & TLB_NOTDIRTY) {
notdirty_write(env_cpu(env), addr, size, iotlbentry, retaddr);
notdirty_write(env_cpu(env), addr, size, iotlbentry, retaddr, mr);
}
haddr = (void *)((uintptr_t)addr + entry->addend);

View File

@ -1843,6 +1843,11 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
if ((pc & TARGET_PAGE_MASK) != virt_page2) {
phys_page2 = get_page_addr_code(env, virt_page2);
}
/* Undoes tlb_set_dirty in notdirty_write. */
tlb_reset_dirty_by_vaddr(cpu, pc & TARGET_PAGE_MASK,
(pc & ~TARGET_PAGE_MASK) + tb->size);
/*
* No explicit memory barrier is required -- tb_link_page() makes the
* TB visible in a consistent state.

View File

@ -787,6 +787,7 @@
#define tlb_protect_code tlb_protect_code_arm
#define tlb_unprotect_code tlb_unprotect_code_arm
#define tlb_reset_dirty tlb_reset_dirty_arm
#define tlb_reset_dirty_by_vaddr tlb_reset_dirty_by_vaddr_arm
#define tlb_set_dirty tlb_set_dirty_arm
#define tlb_set_page_with_attrs tlb_set_page_with_attrs_arm
#define tlb_set_page tlb_set_page_arm

View File

@ -464,6 +464,7 @@ tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr,
void **hostp);
void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length);
void tlb_reset_dirty_by_vaddr(CPUState *cpu, target_ulong start1, target_ulong length);
void tlb_set_dirty(CPUState *cpu, target_ulong vaddr);
/* exec.c */

View File

@ -67,12 +67,6 @@ static inline bool cpu_physical_memory_all_dirty(ram_addr_t start,
return false;
}
static inline bool cpu_physical_memory_get_dirty_flag(ram_addr_t addr,
unsigned client)
{
return cpu_physical_memory_get_dirty(addr, 1, client);
}
static inline bool cpu_physical_memory_is_clean(ram_addr_t addr)
{
return true;

View File

@ -787,6 +787,7 @@
#define tlb_protect_code tlb_protect_code_m68k
#define tlb_unprotect_code tlb_unprotect_code_m68k
#define tlb_reset_dirty tlb_reset_dirty_m68k
#define tlb_reset_dirty_by_vaddr tlb_reset_dirty_by_vaddr_m68k
#define tlb_set_dirty tlb_set_dirty_m68k
#define tlb_set_page_with_attrs tlb_set_page_with_attrs_m68k
#define tlb_set_page tlb_set_page_m68k

View File

@ -787,6 +787,7 @@
#define tlb_protect_code tlb_protect_code_mips
#define tlb_unprotect_code tlb_unprotect_code_mips
#define tlb_reset_dirty tlb_reset_dirty_mips
#define tlb_reset_dirty_by_vaddr tlb_reset_dirty_by_vaddr_mips
#define tlb_set_dirty tlb_set_dirty_mips
#define tlb_set_page_with_attrs tlb_set_page_with_attrs_mips
#define tlb_set_page tlb_set_page_mips

View File

@ -787,6 +787,7 @@
#define tlb_protect_code tlb_protect_code_mips64
#define tlb_unprotect_code tlb_unprotect_code_mips64
#define tlb_reset_dirty tlb_reset_dirty_mips64
#define tlb_reset_dirty_by_vaddr tlb_reset_dirty_by_vaddr_mips64
#define tlb_set_dirty tlb_set_dirty_mips64
#define tlb_set_page_with_attrs tlb_set_page_with_attrs_mips64
#define tlb_set_page tlb_set_page_mips64

View File

@ -787,6 +787,7 @@
#define tlb_protect_code tlb_protect_code_mips64el
#define tlb_unprotect_code tlb_unprotect_code_mips64el
#define tlb_reset_dirty tlb_reset_dirty_mips64el
#define tlb_reset_dirty_by_vaddr tlb_reset_dirty_by_vaddr_mips64el
#define tlb_set_dirty tlb_set_dirty_mips64el
#define tlb_set_page_with_attrs tlb_set_page_with_attrs_mips64el
#define tlb_set_page tlb_set_page_mips64el

View File

@ -787,6 +787,7 @@
#define tlb_protect_code tlb_protect_code_mipsel
#define tlb_unprotect_code tlb_unprotect_code_mipsel
#define tlb_reset_dirty tlb_reset_dirty_mipsel
#define tlb_reset_dirty_by_vaddr tlb_reset_dirty_by_vaddr_mipsel
#define tlb_set_dirty tlb_set_dirty_mipsel
#define tlb_set_page_with_attrs tlb_set_page_with_attrs_mipsel
#define tlb_set_page tlb_set_page_mipsel

View File

@ -787,6 +787,7 @@
#define tlb_protect_code tlb_protect_code_ppc
#define tlb_unprotect_code tlb_unprotect_code_ppc
#define tlb_reset_dirty tlb_reset_dirty_ppc
#define tlb_reset_dirty_by_vaddr tlb_reset_dirty_by_vaddr_ppc
#define tlb_set_dirty tlb_set_dirty_ppc
#define tlb_set_page_with_attrs tlb_set_page_with_attrs_ppc
#define tlb_set_page tlb_set_page_ppc

View File

@ -787,6 +787,7 @@
#define tlb_protect_code tlb_protect_code_ppc64
#define tlb_unprotect_code tlb_unprotect_code_ppc64
#define tlb_reset_dirty tlb_reset_dirty_ppc64
#define tlb_reset_dirty_by_vaddr tlb_reset_dirty_by_vaddr_ppc64
#define tlb_set_dirty tlb_set_dirty_ppc64
#define tlb_set_page_with_attrs tlb_set_page_with_attrs_ppc64
#define tlb_set_page tlb_set_page_ppc64

View File

@ -787,6 +787,7 @@
#define tlb_protect_code tlb_protect_code_riscv32
#define tlb_unprotect_code tlb_unprotect_code_riscv32
#define tlb_reset_dirty tlb_reset_dirty_riscv32
#define tlb_reset_dirty_by_vaddr tlb_reset_dirty_by_vaddr_riscv32
#define tlb_set_dirty tlb_set_dirty_riscv32
#define tlb_set_page_with_attrs tlb_set_page_with_attrs_riscv32
#define tlb_set_page tlb_set_page_riscv32

View File

@ -787,6 +787,7 @@
#define tlb_protect_code tlb_protect_code_riscv64
#define tlb_unprotect_code tlb_unprotect_code_riscv64
#define tlb_reset_dirty tlb_reset_dirty_riscv64
#define tlb_reset_dirty_by_vaddr tlb_reset_dirty_by_vaddr_riscv64
#define tlb_set_dirty tlb_set_dirty_riscv64
#define tlb_set_page_with_attrs tlb_set_page_with_attrs_riscv64
#define tlb_set_page tlb_set_page_riscv64

View File

@ -787,6 +787,7 @@
#define tlb_protect_code tlb_protect_code_s390x
#define tlb_unprotect_code tlb_unprotect_code_s390x
#define tlb_reset_dirty tlb_reset_dirty_s390x
#define tlb_reset_dirty_by_vaddr tlb_reset_dirty_by_vaddr_s390x
#define tlb_set_dirty tlb_set_dirty_s390x
#define tlb_set_page_with_attrs tlb_set_page_with_attrs_s390x
#define tlb_set_page tlb_set_page_s390x

View File

@ -787,6 +787,7 @@
#define tlb_protect_code tlb_protect_code_sparc
#define tlb_unprotect_code tlb_unprotect_code_sparc
#define tlb_reset_dirty tlb_reset_dirty_sparc
#define tlb_reset_dirty_by_vaddr tlb_reset_dirty_by_vaddr_sparc
#define tlb_set_dirty tlb_set_dirty_sparc
#define tlb_set_page_with_attrs tlb_set_page_with_attrs_sparc
#define tlb_set_page tlb_set_page_sparc

View File

@ -787,6 +787,7 @@
#define tlb_protect_code tlb_protect_code_sparc64
#define tlb_unprotect_code tlb_unprotect_code_sparc64
#define tlb_reset_dirty tlb_reset_dirty_sparc64
#define tlb_reset_dirty_by_vaddr tlb_reset_dirty_by_vaddr_sparc64
#define tlb_set_dirty tlb_set_dirty_sparc64
#define tlb_set_page_with_attrs tlb_set_page_with_attrs_sparc64
#define tlb_set_page tlb_set_page_sparc64

View File

@ -787,6 +787,7 @@
#define tlb_protect_code tlb_protect_code_tricore
#define tlb_unprotect_code tlb_unprotect_code_tricore
#define tlb_reset_dirty tlb_reset_dirty_tricore
#define tlb_reset_dirty_by_vaddr tlb_reset_dirty_by_vaddr_tricore
#define tlb_set_dirty tlb_set_dirty_tricore
#define tlb_set_page_with_attrs tlb_set_page_with_attrs_tricore
#define tlb_set_page tlb_set_page_tricore

View File

@ -787,6 +787,7 @@
#define tlb_protect_code tlb_protect_code_x86_64
#define tlb_unprotect_code tlb_unprotect_code_x86_64
#define tlb_reset_dirty tlb_reset_dirty_x86_64
#define tlb_reset_dirty_by_vaddr tlb_reset_dirty_by_vaddr_x86_64
#define tlb_set_dirty tlb_set_dirty_x86_64
#define tlb_set_page_with_attrs tlb_set_page_with_attrs_x86_64
#define tlb_set_page tlb_set_page_x86_64