From e8ca3cbea54b215a5ff0e71442d1bcdb0d4a6e87 Mon Sep 17 00:00:00 2001 From: PhilippTakacs <76390863+PhilippTakacs@users.noreply.github.com> Date: Wed, 16 Oct 2024 15:51:13 +0200 Subject: [PATCH] Optimize memory handling (#1963) * optimize ram block handling Save the last element of the ram_list. This allows to faster find where to add new elements when they are not bigger then page size. * save ram_list freed this keeps the optimization for find_ram_offset() intact after snapshot restore. * cow only clear the tlb of affected pages * update flatview when possible Building each flatview new when the memory has changed is quite expensive when many MemoryRegions are used. This is an issue when using snapshots. * update benchmark for new api * save flatview in context this avoids rebuilding the flatview when restore a context. * init context flatview with zero * address_space_dispatch_clear remove subpage with higher priority * docutemnt the options for UC_CTL_CONTEXT_MODE Specialy stress that with UC_CTL_CONTEXT_MEMORY it is not possible to use the context with a different unicorn object. --- include/qemu.h | 1 + include/uc_priv.h | 6 ++ include/unicorn/unicorn.h | 10 +++ qemu/aarch64.h | 2 + qemu/arm.h | 2 + qemu/exec.c | 40 +++++++-- qemu/include/exec/memory-internal.h | 1 + qemu/include/exec/memory.h | 1 + qemu/m68k.h | 2 + qemu/mips.h | 2 + qemu/mips64.h | 2 + qemu/mips64el.h | 2 + qemu/mipsel.h | 2 + qemu/ppc.h | 2 + qemu/ppc64.h | 2 + qemu/riscv32.h | 2 + qemu/riscv64.h | 2 + qemu/s390x.h | 2 + qemu/softmmu/memory.c | 123 ++++++++++++++++++++++++---- qemu/sparc.h | 2 + qemu/sparc64.h | 2 + qemu/tricore.h | 2 + qemu/unicorn_common.h | 1 + qemu/x86_64.h | 2 + symbols.sh | 2 + tests/benchmarks/cow/benchmark.c | 41 +++++----- uc.c | 23 ++++++ 27 files changed, 236 insertions(+), 45 deletions(-) diff --git a/include/qemu.h b/include/qemu.h index 8705a1a6..b6fedfac 100644 --- a/include/qemu.h +++ b/include/qemu.h @@ -43,6 +43,7 @@ typedef struct { typedef struct RAMList { bool freed; RAMBlock *mru_block; + RAMBlock *last_block; QLIST_HEAD(, RAMBlock) blocks; } RAMList; diff --git a/include/uc_priv.h b/include/uc_priv.h index fed75600..f9195b4d 100644 --- a/include/uc_priv.h +++ b/include/uc_priv.h @@ -108,6 +108,8 @@ typedef MemoryRegion *(*uc_memory_mapping_t)(struct uc_struct *, hwaddr addr); typedef void (*uc_memory_filter_t)(MemoryRegion *, int32_t); +typedef bool (*uc_flatview_copy_t)(struct uc_struct *, FlatView *, FlatView *, bool); + typedef void (*uc_readonly_mem_t)(MemoryRegion *mr, bool readonly); typedef int (*uc_cpus_init)(struct uc_struct *, const char *); @@ -288,6 +290,7 @@ struct uc_struct { uc_args_uc_ram_size_ptr_t memory_map_ptr; uc_memory_mapping_t memory_mapping; uc_memory_filter_t memory_filter_subregions; + uc_flatview_copy_t flatview_copy; uc_mem_unmap_t memory_unmap; uc_mem_unmap_t memory_moveout; uc_mem_unmap_t memory_movein; @@ -427,6 +430,9 @@ struct uc_context { uc_mode mode; // the mode of this context uc_arch arch; // the arch of this context int snapshot_level; // the memory snapshot level to restore + bool ramblock_freed; // wheter there was a some ramblock freed + RAMBlock *last_block;// The last element of the ramblock list + FlatView *fv; // The current flatview of the memory char data[0]; // context }; diff --git a/include/unicorn/unicorn.h b/include/unicorn/unicorn.h index 791cfa6d..c057dcc9 100644 --- a/include/unicorn/unicorn.h +++ b/include/unicorn/unicorn.h @@ -1018,6 +1018,16 @@ struct uc_tlb_entry { uc_prot perms; }; +/* + Variables to control which state should be stored in the context. + Defaults to UC_CTL_CONTEXT_CPU. The options are used in a bitfield + so to enable more then one content the binary or of the required + contents can be use. + The UC_CTL_CONTEXT_MEMORY stores some pointers to internal allocated + memory. Therefor it's not possible to use this context with another + unicorn object. +*/ + typedef enum uc_context_content { UC_CTL_CONTEXT_CPU = 1, UC_CTL_CONTEXT_MEMORY = 2, diff --git a/qemu/aarch64.h b/qemu/aarch64.h index fd70593c..642bf3f7 100644 --- a/qemu/aarch64.h +++ b/qemu/aarch64.h @@ -54,6 +54,7 @@ #define vm_start vm_start_aarch64 #define address_space_dispatch_compact address_space_dispatch_compact_aarch64 #define flatview_translate flatview_translate_aarch64 +#define flatview_copy flatview_copy_aarch64 #define address_space_translate_for_iotlb address_space_translate_for_iotlb_aarch64 #define qemu_get_cpu qemu_get_cpu_aarch64 #define cpu_address_space_init cpu_address_space_init_aarch64 @@ -90,6 +91,7 @@ #define iotlb_to_section iotlb_to_section_aarch64 #define address_space_dispatch_new address_space_dispatch_new_aarch64 #define address_space_dispatch_free address_space_dispatch_free_aarch64 +#define address_space_dispatch_clear address_space_dispatch_clear_aarch64 #define flatview_read_continue flatview_read_continue_aarch64 #define address_space_read_full address_space_read_full_aarch64 #define address_space_write address_space_write_aarch64 diff --git a/qemu/arm.h b/qemu/arm.h index 894eba5a..1199603c 100644 --- a/qemu/arm.h +++ b/qemu/arm.h @@ -54,6 +54,7 @@ #define vm_start vm_start_arm #define address_space_dispatch_compact address_space_dispatch_compact_arm #define flatview_translate flatview_translate_arm +#define flatview_copy flatview_copy_arm #define address_space_translate_for_iotlb address_space_translate_for_iotlb_arm #define qemu_get_cpu qemu_get_cpu_arm #define cpu_address_space_init cpu_address_space_init_arm @@ -90,6 +91,7 @@ #define iotlb_to_section iotlb_to_section_arm #define address_space_dispatch_new address_space_dispatch_new_arm #define address_space_dispatch_free address_space_dispatch_free_arm +#define address_space_dispatch_clear address_space_dispatch_clear_arm #define flatview_read_continue flatview_read_continue_arm #define address_space_read_full address_space_read_full_arm #define address_space_write address_space_write_arm diff --git a/qemu/exec.c b/qemu/exec.c index af77e5de..9786b195 100644 --- a/qemu/exec.c +++ b/qemu/exec.c @@ -957,12 +957,10 @@ void flatview_add_to_dispatch(struct uc_struct *uc, FlatView *fv, MemoryRegionSe static ram_addr_t find_ram_offset_last(struct uc_struct *uc, ram_addr_t size) { - RAMBlock *block; ram_addr_t result = 0; + RAMBlock *block = uc->ram_list.last_block; - RAMBLOCK_FOREACH(block) { - result = MAX(block->offset + block->max_length, result); - } + result = block->offset + block->max_length; if (result + size > RAM_ADDR_MAX) { abort(); @@ -1076,18 +1074,26 @@ static void ram_block_add(struct uc_struct *uc, RAMBlock *new_block) * QLIST (which has an RCU-friendly variant) does not have insertion at * tail, so save the last element in last_block. */ - RAMBLOCK_FOREACH(block) { - last_block = block; - if (block->max_length < new_block->max_length) { - break; + if (uc->ram_list.freed || new_block->max_length > uc->target_page_size) { + RAMBLOCK_FOREACH(block) { + last_block = block; + if (block->max_length < new_block->max_length) { + break; + } } + } else { + last_block = uc->ram_list.last_block; + block = NULL; } + if (block) { QLIST_INSERT_BEFORE_RCU(block, new_block, next); } else if (last_block) { QLIST_INSERT_AFTER_RCU(last_block, new_block, next); + uc->ram_list.last_block = new_block; } else { /* list is empty */ QLIST_INSERT_HEAD_RCU(&uc->ram_list.blocks, new_block, next); + uc->ram_list.last_block = new_block; } uc->ram_list.mru_block = NULL; @@ -1165,6 +1171,7 @@ void qemu_ram_free(struct uc_struct *uc, RAMBlock *block) QLIST_REMOVE_RCU(block, next); uc->ram_list.mru_block = NULL; uc->ram_list.freed = true; + uc->ram_list.last_block = NULL; /* Write list before version */ //smp_wmb(); // call_rcu(block, reclaim_ramblock, rcu); @@ -1388,6 +1395,7 @@ static subpage_t *subpage_init(struct uc_struct *uc, FlatView *fv, hwaddr base) memory_region_init_io(fv->root->uc, &mmio->iomem, &subpage_ops, mmio, TARGET_PAGE_SIZE); mmio->iomem.subpage = true; + mmio->iomem.priority = uc->snapshot_level; #if defined(DEBUG_SUBPAGE) printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__, mmio, base, TARGET_PAGE_SIZE); @@ -1448,6 +1456,22 @@ AddressSpaceDispatch *address_space_dispatch_new(struct uc_struct *uc, FlatView return d; } +void address_space_dispatch_clear(AddressSpaceDispatch *d) +{ + MemoryRegionSection *section; + struct uc_struct *uc = d->uc; + while (d->map.sections_nb > 0) { + d->map.sections_nb--; + section = &d->map.sections[d->map.sections_nb]; + if (section->mr->priority > uc->snapshot_level) { + phys_section_destroy(section->mr); + } + } + g_free(d->map.sections); + g_free(d->map.nodes); + g_free(d); +} + void address_space_dispatch_free(AddressSpaceDispatch *d) { phys_sections_free(&d->map); diff --git a/qemu/include/exec/memory-internal.h b/qemu/include/exec/memory-internal.h index d8f05c1b..3f4d666d 100644 --- a/qemu/include/exec/memory-internal.h +++ b/qemu/include/exec/memory-internal.h @@ -45,6 +45,7 @@ void flatview_add_to_dispatch(struct uc_struct *uc, FlatView *fv, MemoryRegionSe AddressSpaceDispatch *address_space_dispatch_new(struct uc_struct *uc, FlatView *fv); void address_space_dispatch_compact(AddressSpaceDispatch *d); void address_space_dispatch_free(AddressSpaceDispatch *d); +void address_space_dispatch_clear(AddressSpaceDispatch *d); void mtree_print_dispatch(struct AddressSpaceDispatch *d, MemoryRegion *root); diff --git a/qemu/include/exec/memory.h b/qemu/include/exec/memory.h index 8a8d13c3..eafc1a5d 100644 --- a/qemu/include/exec/memory.h +++ b/qemu/include/exec/memory.h @@ -1220,5 +1220,6 @@ void memory_unmap(struct uc_struct *uc, MemoryRegion *mr); void memory_moveout(struct uc_struct *uc, MemoryRegion *mr); void memory_movein(struct uc_struct *uc, MemoryRegion *mr); int memory_free(struct uc_struct *uc); +bool flatview_copy(struct uc_struct *uc, FlatView *dst, FlatView *src, bool update_dispatcher); #endif diff --git a/qemu/m68k.h b/qemu/m68k.h index 9cdc982a..2a1ffd67 100644 --- a/qemu/m68k.h +++ b/qemu/m68k.h @@ -54,6 +54,7 @@ #define vm_start vm_start_m68k #define address_space_dispatch_compact address_space_dispatch_compact_m68k #define flatview_translate flatview_translate_m68k +#define flatview_copy flatview_copy_m68k #define address_space_translate_for_iotlb address_space_translate_for_iotlb_m68k #define qemu_get_cpu qemu_get_cpu_m68k #define cpu_address_space_init cpu_address_space_init_m68k @@ -90,6 +91,7 @@ #define iotlb_to_section iotlb_to_section_m68k #define address_space_dispatch_new address_space_dispatch_new_m68k #define address_space_dispatch_free address_space_dispatch_free_m68k +#define address_space_dispatch_clear address_space_dispatch_clear_m68k #define flatview_read_continue flatview_read_continue_m68k #define address_space_read_full address_space_read_full_m68k #define address_space_write address_space_write_m68k diff --git a/qemu/mips.h b/qemu/mips.h index 1ef392f3..30cd7ef5 100644 --- a/qemu/mips.h +++ b/qemu/mips.h @@ -54,6 +54,7 @@ #define vm_start vm_start_mips #define address_space_dispatch_compact address_space_dispatch_compact_mips #define flatview_translate flatview_translate_mips +#define flatview_copy flatview_copy_mips #define address_space_translate_for_iotlb address_space_translate_for_iotlb_mips #define qemu_get_cpu qemu_get_cpu_mips #define cpu_address_space_init cpu_address_space_init_mips @@ -90,6 +91,7 @@ #define iotlb_to_section iotlb_to_section_mips #define address_space_dispatch_new address_space_dispatch_new_mips #define address_space_dispatch_free address_space_dispatch_free_mips +#define address_space_dispatch_clear address_space_dispatch_clear_mips #define flatview_read_continue flatview_read_continue_mips #define address_space_read_full address_space_read_full_mips #define address_space_write address_space_write_mips diff --git a/qemu/mips64.h b/qemu/mips64.h index 6b6c6c3f..ad9033e8 100644 --- a/qemu/mips64.h +++ b/qemu/mips64.h @@ -54,6 +54,7 @@ #define vm_start vm_start_mips64 #define address_space_dispatch_compact address_space_dispatch_compact_mips64 #define flatview_translate flatview_translate_mips64 +#define flatview_copy flatview_copy_mips64 #define address_space_translate_for_iotlb address_space_translate_for_iotlb_mips64 #define qemu_get_cpu qemu_get_cpu_mips64 #define cpu_address_space_init cpu_address_space_init_mips64 @@ -90,6 +91,7 @@ #define iotlb_to_section iotlb_to_section_mips64 #define address_space_dispatch_new address_space_dispatch_new_mips64 #define address_space_dispatch_free address_space_dispatch_free_mips64 +#define address_space_dispatch_clear address_space_dispatch_clear_mips64 #define flatview_read_continue flatview_read_continue_mips64 #define address_space_read_full address_space_read_full_mips64 #define address_space_write address_space_write_mips64 diff --git a/qemu/mips64el.h b/qemu/mips64el.h index 583d2e55..ad65fc39 100644 --- a/qemu/mips64el.h +++ b/qemu/mips64el.h @@ -54,6 +54,7 @@ #define vm_start vm_start_mips64el #define address_space_dispatch_compact address_space_dispatch_compact_mips64el #define flatview_translate flatview_translate_mips64el +#define flatview_copy flatview_copy_mips64el #define address_space_translate_for_iotlb address_space_translate_for_iotlb_mips64el #define qemu_get_cpu qemu_get_cpu_mips64el #define cpu_address_space_init cpu_address_space_init_mips64el @@ -90,6 +91,7 @@ #define iotlb_to_section iotlb_to_section_mips64el #define address_space_dispatch_new address_space_dispatch_new_mips64el #define address_space_dispatch_free address_space_dispatch_free_mips64el +#define address_space_dispatch_clear address_space_dispatch_clear_mips64el #define flatview_read_continue flatview_read_continue_mips64el #define address_space_read_full address_space_read_full_mips64el #define address_space_write address_space_write_mips64el diff --git a/qemu/mipsel.h b/qemu/mipsel.h index 388b46ce..0ca05035 100644 --- a/qemu/mipsel.h +++ b/qemu/mipsel.h @@ -54,6 +54,7 @@ #define vm_start vm_start_mipsel #define address_space_dispatch_compact address_space_dispatch_compact_mipsel #define flatview_translate flatview_translate_mipsel +#define flatview_copy flatview_copy_mipsel #define address_space_translate_for_iotlb address_space_translate_for_iotlb_mipsel #define qemu_get_cpu qemu_get_cpu_mipsel #define cpu_address_space_init cpu_address_space_init_mipsel @@ -90,6 +91,7 @@ #define iotlb_to_section iotlb_to_section_mipsel #define address_space_dispatch_new address_space_dispatch_new_mipsel #define address_space_dispatch_free address_space_dispatch_free_mipsel +#define address_space_dispatch_clear address_space_dispatch_clear_mipsel #define flatview_read_continue flatview_read_continue_mipsel #define address_space_read_full address_space_read_full_mipsel #define address_space_write address_space_write_mipsel diff --git a/qemu/ppc.h b/qemu/ppc.h index 8979bc95..b1ac72fd 100644 --- a/qemu/ppc.h +++ b/qemu/ppc.h @@ -54,6 +54,7 @@ #define vm_start vm_start_ppc #define address_space_dispatch_compact address_space_dispatch_compact_ppc #define flatview_translate flatview_translate_ppc +#define flatview_copy flatview_copy_ppc #define address_space_translate_for_iotlb address_space_translate_for_iotlb_ppc #define qemu_get_cpu qemu_get_cpu_ppc #define cpu_address_space_init cpu_address_space_init_ppc @@ -90,6 +91,7 @@ #define iotlb_to_section iotlb_to_section_ppc #define address_space_dispatch_new address_space_dispatch_new_ppc #define address_space_dispatch_free address_space_dispatch_free_ppc +#define address_space_dispatch_clear address_space_dispatch_clear_ppc #define flatview_read_continue flatview_read_continue_ppc #define address_space_read_full address_space_read_full_ppc #define address_space_write address_space_write_ppc diff --git a/qemu/ppc64.h b/qemu/ppc64.h index 0780f09c..43c1570d 100644 --- a/qemu/ppc64.h +++ b/qemu/ppc64.h @@ -54,6 +54,7 @@ #define vm_start vm_start_ppc64 #define address_space_dispatch_compact address_space_dispatch_compact_ppc64 #define flatview_translate flatview_translate_ppc64 +#define flatview_copy flatview_copy_ppc64 #define address_space_translate_for_iotlb address_space_translate_for_iotlb_ppc64 #define qemu_get_cpu qemu_get_cpu_ppc64 #define cpu_address_space_init cpu_address_space_init_ppc64 @@ -90,6 +91,7 @@ #define iotlb_to_section iotlb_to_section_ppc64 #define address_space_dispatch_new address_space_dispatch_new_ppc64 #define address_space_dispatch_free address_space_dispatch_free_ppc64 +#define address_space_dispatch_clear address_space_dispatch_clear_ppc64 #define flatview_read_continue flatview_read_continue_ppc64 #define address_space_read_full address_space_read_full_ppc64 #define address_space_write address_space_write_ppc64 diff --git a/qemu/riscv32.h b/qemu/riscv32.h index ab5886bc..dde114c5 100644 --- a/qemu/riscv32.h +++ b/qemu/riscv32.h @@ -54,6 +54,7 @@ #define vm_start vm_start_riscv32 #define address_space_dispatch_compact address_space_dispatch_compact_riscv32 #define flatview_translate flatview_translate_riscv32 +#define flatview_copy flatview_copy_riscv32 #define address_space_translate_for_iotlb address_space_translate_for_iotlb_riscv32 #define qemu_get_cpu qemu_get_cpu_riscv32 #define cpu_address_space_init cpu_address_space_init_riscv32 @@ -90,6 +91,7 @@ #define iotlb_to_section iotlb_to_section_riscv32 #define address_space_dispatch_new address_space_dispatch_new_riscv32 #define address_space_dispatch_free address_space_dispatch_free_riscv32 +#define address_space_dispatch_clear address_space_dispatch_clear_riscv32 #define flatview_read_continue flatview_read_continue_riscv32 #define address_space_read_full address_space_read_full_riscv32 #define address_space_write address_space_write_riscv32 diff --git a/qemu/riscv64.h b/qemu/riscv64.h index 59bc60db..32e0894d 100644 --- a/qemu/riscv64.h +++ b/qemu/riscv64.h @@ -54,6 +54,7 @@ #define vm_start vm_start_riscv64 #define address_space_dispatch_compact address_space_dispatch_compact_riscv64 #define flatview_translate flatview_translate_riscv64 +#define flatview_copy flatview_copy_riscv64 #define address_space_translate_for_iotlb address_space_translate_for_iotlb_riscv64 #define qemu_get_cpu qemu_get_cpu_riscv64 #define cpu_address_space_init cpu_address_space_init_riscv64 @@ -90,6 +91,7 @@ #define iotlb_to_section iotlb_to_section_riscv64 #define address_space_dispatch_new address_space_dispatch_new_riscv64 #define address_space_dispatch_free address_space_dispatch_free_riscv64 +#define address_space_dispatch_clear address_space_dispatch_clear_riscv64 #define flatview_read_continue flatview_read_continue_riscv64 #define address_space_read_full address_space_read_full_riscv64 #define address_space_write address_space_write_riscv64 diff --git a/qemu/s390x.h b/qemu/s390x.h index e8fefc10..53823df6 100644 --- a/qemu/s390x.h +++ b/qemu/s390x.h @@ -54,6 +54,7 @@ #define vm_start vm_start_s390x #define address_space_dispatch_compact address_space_dispatch_compact_s390x #define flatview_translate flatview_translate_s390x +#define flatview_copy flatview_copy_s390x #define address_space_translate_for_iotlb address_space_translate_for_iotlb_s390x #define qemu_get_cpu qemu_get_cpu_s390x #define cpu_address_space_init cpu_address_space_init_s390x @@ -90,6 +91,7 @@ #define iotlb_to_section iotlb_to_section_s390x #define address_space_dispatch_new address_space_dispatch_new_s390x #define address_space_dispatch_free address_space_dispatch_free_s390x +#define address_space_dispatch_clear address_space_dispatch_clear_s390x #define flatview_read_continue flatview_read_continue_s390x #define address_space_read_full address_space_read_full_s390x #define address_space_write address_space_write_s390x diff --git a/qemu/softmmu/memory.c b/qemu/softmmu/memory.c index ee72511e..2bf6d0d3 100644 --- a/qemu/softmmu/memory.c +++ b/qemu/softmmu/memory.c @@ -27,7 +27,7 @@ //#define DEBUG_UNASSIGNED void memory_region_transaction_begin(void); -void memory_region_transaction_commit(MemoryRegion *mr); +static void memory_region_transaction_commit(MemoryRegion *mr); typedef struct AddrRange AddrRange; @@ -94,6 +94,7 @@ static void make_contained(struct uc_struct *uc, MemoryRegion *current) MemoryRegion *memory_cow(struct uc_struct *uc, MemoryRegion *current, hwaddr begin, size_t size) { + hwaddr addr; hwaddr offset; hwaddr current_offset; MemoryRegion *ram = g_new(MemoryRegion, 1); @@ -112,18 +113,16 @@ MemoryRegion *memory_cow(struct uc_struct *uc, MemoryRegion *current, hwaddr beg g_free(ram); return NULL; } - memory_region_transaction_begin(); memcpy(ramblock_ptr(ram->ram_block, 0), ramblock_ptr(current->ram_block, current_offset), size); memory_region_add_subregion_overlap(current->container, offset, ram, uc->snapshot_level); if (uc->cpu) { - tlb_flush(uc->cpu); + for (addr = ram->addr; (int64_t)(ram->end - addr) > 0; addr += uc->target_page_size) { + tlb_flush_page(uc->cpu, addr); + } } - uc->memory_region_update_pending = true; - memory_region_transaction_commit(ram); - return ram; } @@ -196,19 +195,33 @@ MemoryRegion *memory_map_io(struct uc_struct *uc, ram_addr_t begin, size_t size, return mmio; } +static void memory_region_remove_subregion(MemoryRegion *mr, + MemoryRegion *subregion) +{ + assert(subregion->container == mr); + subregion->container = NULL; + QTAILQ_REMOVE(&mr->subregions, subregion, subregions_link); +} + void memory_region_filter_subregions(MemoryRegion *mr, int32_t level) { MemoryRegion *subregion, *subregion_next; - memory_region_transaction_begin(); + /* + * memory transaction/commit are only to rebuild the flatview. At + * this point there is need to rebuild the flatview, because this + * function is either called as part of a destructor or as part of + * a context restore. In the destructor case the caller remove the + * complete memory region and should do a transaction/commit. In + * the context restore case the flatview is taken from the context so + * no need to rebuild it. + */ QTAILQ_FOREACH_SAFE(subregion, &mr->subregions, subregions_link, subregion_next) { if (subregion->priority >= level) { - memory_region_del_subregion(mr, subregion); + memory_region_remove_subregion(mr, subregion); subregion->destructor(subregion); g_free(subregion); - mr->uc->memory_region_update_pending = true; } } - memory_region_transaction_commit(mr); } static void memory_region_remove_mapped_block(struct uc_struct *uc, MemoryRegion *mr, bool free) @@ -909,6 +922,77 @@ static void flatviews_init(struct uc_struct *uc) } } +bool flatview_copy(struct uc_struct *uc, FlatView *dst, FlatView *src, bool update_dispatcher) +{ + if (!dst->ranges || !dst->nr_allocated || dst->nr_allocated < src->nr) { + if (dst->ranges && dst->nr_allocated) { + free(dst->ranges); + } + dst->ranges = calloc(src->nr_allocated, sizeof(*dst->ranges)); + if (!dst->ranges) { + return false; + } + dst->nr_allocated = src->nr_allocated; + } + memcpy(dst->ranges, src->ranges, src->nr*sizeof(*dst->ranges)); + dst->nr = src->nr; + if (!update_dispatcher) { + return true; + } + MEMORY_LISTENER_CALL_GLOBAL(uc, begin, Forward); + if (dst->dispatch) { + address_space_dispatch_clear(dst->dispatch); + } + dst->dispatch = address_space_dispatch_new(uc, dst); + for (size_t j = 0; j < dst->nr; j++) { + MemoryRegionSection mrs = + section_from_flat_range(&dst->ranges[j], dst); + mrs.mr->subpage = false; + flatview_add_to_dispatch(uc, dst, &mrs); + } + address_space_dispatch_compact(dst->dispatch); + MEMORY_LISTENER_CALL_GLOBAL(uc, commit, Forward); + return true; +} + +static bool flatview_update(FlatView *fv, MemoryRegion *mr) +{ + struct uc_struct *uc = mr->uc; + MemoryRegion *c = mr; + AddrRange r; + hwaddr addr = 0; + r.size = mr->size; + do { + addr += c->addr; + } while ((c = c->container)); + r.start = int128_make64(addr); + + if (!mr->container || !QTAILQ_EMPTY(&mr->subregions)) + return false; + + for (size_t i = 0; i < fv->nr; i++) { + if (!addrrange_intersects(fv->ranges[i].addr, r)) { + continue; + } + if (!addrrange_equal(fv->ranges[i].addr, r)) { + break; + } + fv->ranges[i].mr = mr; + fv->ranges[i].offset_in_region = 0; + fv->ranges[i].readonly = mr->readonly; + address_space_dispatch_clear(fv->dispatch); + fv->dispatch = address_space_dispatch_new(uc, fv); + for (size_t j = 0; j < fv->nr; j++) { + MemoryRegionSection mrs = + section_from_flat_range(&fv->ranges[j], fv); + flatview_add_to_dispatch(uc, fv, &mrs); + } + address_space_dispatch_compact(fv->dispatch); + return true; + } + return false; +} + static void flatviews_reset(struct uc_struct *uc) { AddressSpace *as; @@ -975,18 +1059,23 @@ void memory_region_transaction_begin(void) { } -void memory_region_transaction_commit(MemoryRegion *mr) +static void memory_region_transaction_commit(MemoryRegion *mr) { - AddressSpace *as; + AddressSpace *as = memory_region_to_address_space(mr); + FlatView *fv = NULL; + if (as) + fv = address_space_to_flatview(as); if (mr->uc->memory_region_update_pending) { - flatviews_reset(mr->uc); - MEMORY_LISTENER_CALL_GLOBAL(mr->uc, begin, Forward); - QTAILQ_FOREACH(as, &mr->uc->address_spaces, address_spaces_link) { - address_space_set_flatview(as); + if (!fv || !flatview_update(fv, mr)) { + flatviews_reset(mr->uc); + QTAILQ_FOREACH(as, &mr->uc->address_spaces, address_spaces_link) { + address_space_set_flatview(as); + } } + mr->uc->memory_region_update_pending = false; MEMORY_LISTENER_CALL_GLOBAL(mr->uc, commit, Forward); } @@ -1238,7 +1327,7 @@ static void memory_region_update_container_subregions(MemoryRegion *subregion) done: mr->uc->memory_region_update_pending = true; - memory_region_transaction_commit(mr); + memory_region_transaction_commit(subregion); } static void memory_region_add_subregion_common(MemoryRegion *mr, diff --git a/qemu/sparc.h b/qemu/sparc.h index 6e726c99..99416167 100644 --- a/qemu/sparc.h +++ b/qemu/sparc.h @@ -54,6 +54,7 @@ #define vm_start vm_start_sparc #define address_space_dispatch_compact address_space_dispatch_compact_sparc #define flatview_translate flatview_translate_sparc +#define flatview_copy flatview_copy_sparc #define address_space_translate_for_iotlb address_space_translate_for_iotlb_sparc #define qemu_get_cpu qemu_get_cpu_sparc #define cpu_address_space_init cpu_address_space_init_sparc @@ -90,6 +91,7 @@ #define iotlb_to_section iotlb_to_section_sparc #define address_space_dispatch_new address_space_dispatch_new_sparc #define address_space_dispatch_free address_space_dispatch_free_sparc +#define address_space_dispatch_clear address_space_dispatch_clear_sparc #define flatview_read_continue flatview_read_continue_sparc #define address_space_read_full address_space_read_full_sparc #define address_space_write address_space_write_sparc diff --git a/qemu/sparc64.h b/qemu/sparc64.h index 747cfe32..17fdd759 100644 --- a/qemu/sparc64.h +++ b/qemu/sparc64.h @@ -54,6 +54,7 @@ #define vm_start vm_start_sparc64 #define address_space_dispatch_compact address_space_dispatch_compact_sparc64 #define flatview_translate flatview_translate_sparc64 +#define flatview_copy flatview_copy_sparc64 #define address_space_translate_for_iotlb address_space_translate_for_iotlb_sparc64 #define qemu_get_cpu qemu_get_cpu_sparc64 #define cpu_address_space_init cpu_address_space_init_sparc64 @@ -90,6 +91,7 @@ #define iotlb_to_section iotlb_to_section_sparc64 #define address_space_dispatch_new address_space_dispatch_new_sparc64 #define address_space_dispatch_free address_space_dispatch_free_sparc64 +#define address_space_dispatch_clear address_space_dispatch_clear_sparc64 #define flatview_read_continue flatview_read_continue_sparc64 #define address_space_read_full address_space_read_full_sparc64 #define address_space_write address_space_write_sparc64 diff --git a/qemu/tricore.h b/qemu/tricore.h index c4967b59..d074ac28 100644 --- a/qemu/tricore.h +++ b/qemu/tricore.h @@ -54,6 +54,7 @@ #define vm_start vm_start_tricore #define address_space_dispatch_compact address_space_dispatch_compact_tricore #define flatview_translate flatview_translate_tricore +#define flatview_copy flatview_copy_tricore #define address_space_translate_for_iotlb address_space_translate_for_iotlb_tricore #define qemu_get_cpu qemu_get_cpu_tricore #define cpu_address_space_init cpu_address_space_init_tricore @@ -90,6 +91,7 @@ #define iotlb_to_section iotlb_to_section_tricore #define address_space_dispatch_new address_space_dispatch_new_tricore #define address_space_dispatch_free address_space_dispatch_free_tricore +#define address_space_dispatch_clear address_space_dispatch_clear_tricore #define flatview_read_continue flatview_read_continue_tricore #define address_space_read_full address_space_read_full_tricore #define address_space_write address_space_write_tricore diff --git a/qemu/unicorn_common.h b/qemu/unicorn_common.h index 9bf24c7a..b557c490 100644 --- a/qemu/unicorn_common.h +++ b/qemu/unicorn_common.h @@ -140,6 +140,7 @@ static inline void uc_common_init(struct uc_struct* uc) uc->set_tlb = uc_set_tlb; uc->memory_mapping = find_memory_mapping; uc->memory_filter_subregions = memory_region_filter_subregions; + uc->flatview_copy = flatview_copy; uc->memory_cow = memory_cow; if (!uc->release) diff --git a/qemu/x86_64.h b/qemu/x86_64.h index 1a941508..77749382 100644 --- a/qemu/x86_64.h +++ b/qemu/x86_64.h @@ -54,6 +54,7 @@ #define vm_start vm_start_x86_64 #define address_space_dispatch_compact address_space_dispatch_compact_x86_64 #define flatview_translate flatview_translate_x86_64 +#define flatview_copy flatview_copy_x86_64 #define address_space_translate_for_iotlb address_space_translate_for_iotlb_x86_64 #define qemu_get_cpu qemu_get_cpu_x86_64 #define cpu_address_space_init cpu_address_space_init_x86_64 @@ -90,6 +91,7 @@ #define iotlb_to_section iotlb_to_section_x86_64 #define address_space_dispatch_new address_space_dispatch_new_x86_64 #define address_space_dispatch_free address_space_dispatch_free_x86_64 +#define address_space_dispatch_clear address_space_dispatch_clear_x86_64 #define flatview_read_continue flatview_read_continue_x86_64 #define address_space_read_full address_space_read_full_x86_64 #define address_space_write address_space_write_x86_64 diff --git a/symbols.sh b/symbols.sh index a33065b1..29600c7e 100755 --- a/symbols.sh +++ b/symbols.sh @@ -54,6 +54,7 @@ resume_all_vcpus \ vm_start \ address_space_dispatch_compact \ flatview_translate \ +flatview_copy \ address_space_translate_for_iotlb \ qemu_get_cpu \ cpu_address_space_init \ @@ -90,6 +91,7 @@ cpu_check_watchpoint \ iotlb_to_section \ address_space_dispatch_new \ address_space_dispatch_free \ +address_space_dispatch_clear \ flatview_read_continue \ address_space_read_full \ address_space_write \ diff --git a/tests/benchmarks/cow/benchmark.c b/tests/benchmarks/cow/benchmark.c index 327886ce..265959ac 100644 --- a/tests/benchmarks/cow/benchmark.c +++ b/tests/benchmarks/cow/benchmark.c @@ -9,6 +9,8 @@ struct data { gsl_rstat_workspace *rstat_p; struct timespec start; + size_t nc; + uc_context **c; }; @@ -22,11 +24,8 @@ void update_stats(gsl_rstat_workspace *rstat_p, struct timespec *start, struct t static uint64_t CODEADDR = 0x1000; static uint64_t DATABASE = 0x40000000; static uint64_t BLOCKSIZE = 0x10000; +static size_t NRUNS = 200; -/*static void callback_mem(uc_engine *uc, uc_mem_type type, uint64_t addr, uint32_t size, uint64_t value, void *data) -{ - printf("callback mem valid: 0x%lX, value: 0x%lX\n", addr, value); -}*/ static int callback_mem_prot(uc_engine *uc, uc_mem_type type, uint64_t addr, uint32_t size, int64_t value, void *data) { printf("callback mem prot: 0x%lX, type: %X\n", addr, type); @@ -50,27 +49,21 @@ static void callback_block(uc_engine *uc, uint64_t addr, uint32_t size, void *da d->rstat_p = gsl_rstat_alloc(); } run = gsl_rstat_n(d->rstat_p); - if ((run >> 4) >= 20) { + if (run && !(run % 128)) { uc_emu_stop(uc); return; - } else if (run > 0 && run % 16 == 0) { - uc_snapshot(uc); } -/* if (run > 0 && run % 16 == 0) { - uc_emu_stop(uc); - return; - }*/ rsi = random(); memblock = random() & 15; offset = random() & (BLOCKSIZE - 1) & (~0xf); -// memblock = 0; -// offset = 0; if (memblock == 15 && (offset + 0x1000) > BLOCKSIZE) { offset -= 0x1000; } rbx += (memblock * BLOCKSIZE) + offset; +#ifndef NDEBUG printf("write at 0x%lX\n", rbx); printf("[%li] callback block: 0x%lX\n", run, addr); +#endif uc_reg_write(uc, UC_X86_REG_RBX, &rbx); uc_reg_write(uc, UC_X86_REG_RAX, &rax); uc_reg_write(uc, UC_X86_REG_RSI, &rsi); @@ -80,7 +73,9 @@ static void callback_block(uc_engine *uc, uint64_t addr, uint32_t size, void *da static void prepare_mapping(uc_engine *uc) { for (size_t i = 0; i < 16; i++) { +#ifndef NDEBUG printf("mem map: 0x%lX\n", DATABASE+i*BLOCKSIZE); +#endif uc_mem_map(uc, DATABASE+i*BLOCKSIZE, BLOCKSIZE, UC_PROT_READ|UC_PROT_WRITE); } } @@ -145,6 +140,7 @@ int main(int argc, char *argv[]) uc_err err; uc_hook hook_block; uc_hook hook_mem; + uc_context **con = calloc(NRUNS, sizeof(*con)); struct data d; uint64_t rax = 5; uint64_t rbx = DATABASE; @@ -156,9 +152,13 @@ int main(int argc, char *argv[]) } d.rstat_p = NULL; + d.c = con; + d.nc = 0; srandom(time(NULL)); uc_open(UC_ARCH_X86, UC_MODE_64, &uc); + uc_ctl_context_mode(uc, UC_CTL_CONTEXT_MEMORY); + uc_ctl_tlb_mode(uc, UC_TLB_VIRTUAL); prepare_code(uc, argv[1], &bin_mmap); prepare_mapping(uc); err = uc_hook_add(uc, &hook_block, UC_HOOK_BLOCK, &callback_block, &d, CODEADDR, 0x1000); @@ -168,18 +168,19 @@ int main(int argc, char *argv[]) uc_hook_add(uc, &hook_mem, UC_HOOK_MEM_INVALID, &callback_mem_prot, NULL, CODEADDR, 0x1000); uc_reg_write(uc, UC_X86_REG_RBX, &rbx); uc_reg_write(uc, UC_X86_REG_RAX, &rax); -/* err = uc_hook_add(uc, &hook_mem, UC_HOOK_MEM_VALID, &callback_mem, NULL, DATABASE, 16*BLOCKSIZE); - if (err) { - printf("err: %s\n", uc_strerror(err)); - return 1; - }*/ - for (int i = 0; i < 1; i++) { + + for (int i = 0; i < NRUNS; i++) { +#ifndef NDEBUG + printf("============ run: %i\n", i); +#endif err = uc_emu_start(uc, CODEADDR, -1, 0, 0); if (err) { printf("err: %s\n", uc_strerror(err)); return 1; } - uc_snapshot(uc); + uc_context_alloc(uc, &d.c[d.nc]); + uc_context_save(uc, d.c[d.nc]); + d.nc++; } print_stats(d.rstat_p); return 0; diff --git a/uc.c b/uc.c index dfddbec7..4ba27d8a 100644 --- a/uc.c +++ b/uc.c @@ -2106,6 +2106,7 @@ uc_err uc_context_alloc(uc_engine *uc, uc_context **context) (*_context)->context_size = size - sizeof(uc_context); (*_context)->arch = uc->arch; (*_context)->mode = uc->mode; + (*_context)->fv = NULL; restore_jit_state(uc); return UC_ERR_OK; } else { @@ -2142,11 +2143,23 @@ uc_err uc_context_save(uc_engine *uc, uc_context *context) uc_err ret = UC_ERR_OK; if (uc->context_content & UC_CTL_CONTEXT_MEMORY) { + if (!context->fv) { + context->fv = g_malloc0(sizeof(*context->fv)); + } + if (!context->fv) { + return UC_ERR_NOMEM; + } + if (!uc->flatview_copy(uc, context->fv, uc->address_space_memory.current_map, false)) { + restore_jit_state(uc); + return UC_ERR_NOMEM; + } ret = uc_snapshot(uc); if (ret != UC_ERR_OK) { restore_jit_state(uc); return ret; } + context->ramblock_freed = uc->ram_list.freed; + context->last_block = uc->ram_list.last_block; } context->snapshot_level = uc->snapshot_level; @@ -2418,6 +2431,11 @@ uc_err uc_context_restore(uc_engine *uc, uc_context *context) return ret; } uc_snapshot(uc); + uc->ram_list.freed = context->ramblock_freed; + uc->ram_list.last_block = context->last_block; + if (!uc->flatview_copy(uc, uc->address_space_memory.current_map, context->fv, true)) { + return UC_ERR_NOMEM; + } } if (uc->context_content & UC_CTL_CONTEXT_CPU) { @@ -2434,6 +2452,10 @@ uc_err uc_context_restore(uc_engine *uc, uc_context *context) UNICORN_EXPORT uc_err uc_context_free(uc_context *context) { + if (context->fv) { + free(context->fv->ranges); + g_free(context->fv); + } return uc_free(context); } @@ -2870,6 +2892,7 @@ static uc_err uc_restore_latest_snapshot(struct uc_struct *uc) g_array_remove_range(uc->unmapped_regions, i, 1); } uc->snapshot_level--; + return UC_ERR_OK; }