From 12a79192ee6a4ec5b69130ff3481ac64d93a9b25 Mon Sep 17 00:00:00 2001 From: mio Date: Sat, 28 Jan 2023 22:04:43 +0100 Subject: [PATCH] Demand paging on Windows --- msvc/config-host.h | 7 +++++++ qemu/accel/tcg/translate-all.c | 18 ++++++++++++++++++ qemu/include/hw/core/cpu.h | 1 + qemu/include/tcg/tcg.h | 5 +++++ qemu/tcg/tcg.c | 18 ++++++++++++++++++ 5 files changed, 49 insertions(+) diff --git a/msvc/config-host.h b/msvc/config-host.h index 943d8593..bf750d2d 100644 --- a/msvc/config-host.h +++ b/msvc/config-host.h @@ -7,3 +7,10 @@ #define CONFIG_CMPXCHG128 1 // #define CONFIG_ATOMIC64 1 #define CONFIG_PLUGIN 1 + +// QEMU by default allocates (and commits) 1GB memory on Windows, and multiple Unicorn instances will result in OOM error easily. +// Unfortunately, Windows doesn't have a similar demand paging feature like mmap(), therefore a workaround is to use tcg regions mechanism. +// Note most Unicorn hacks (and even QEMU!) relies on the assumption that the translation memory won't run out and thus it might result +// in some unexpected errors. If that is case, define to align with QEMU and Unicorn <= 2.0.1 behavior. +// +// #define USE_STATIC_CODE_GEN_BUFFER \ No newline at end of file diff --git a/qemu/accel/tcg/translate-all.c b/qemu/accel/tcg/translate-all.c index 91dd5ce7..aaebd4c5 100644 --- a/qemu/accel/tcg/translate-all.c +++ b/qemu/accel/tcg/translate-all.c @@ -869,6 +869,7 @@ static inline void *alloc_code_gen_buffer(struct uc_struct *uc) return buf; } #elif defined(_WIN32) +#ifdef USE_STATIC_CODE_GEN_BUFFER static inline void *alloc_code_gen_buffer(struct uc_struct *uc) { TCGContext *tcg_ctx = uc->tcg_ctx; @@ -876,6 +877,23 @@ static inline void *alloc_code_gen_buffer(struct uc_struct *uc) return VirtualAlloc(NULL, size, MEM_RESERVE | MEM_COMMIT, PAGE_EXECUTE_READWRITE); } +#else +static inline void *alloc_code_gen_buffer(struct uc_struct *uc) +{ + TCGContext *tcg_ctx = uc->tcg_ctx; + size_t size = tcg_ctx->code_gen_buffer_size; + + void* ptr = VirtualAlloc(NULL, size, MEM_RESERVE, + PAGE_EXECUTE_READWRITE); + + // for prolog init + VirtualAlloc(ptr, + uc->qemu_real_host_page_size * UC_TCG_REGION_PAGES_COUNT, + MEM_COMMIT, + PAGE_EXECUTE_READWRITE); + return ptr; +} +#endif void free_code_gen_buffer(struct uc_struct *uc) { TCGContext *tcg_ctx = uc->tcg_ctx; diff --git a/qemu/include/hw/core/cpu.h b/qemu/include/hw/core/cpu.h index d1b7c250..097a7f63 100644 --- a/qemu/include/hw/core/cpu.h +++ b/qemu/include/hw/core/cpu.h @@ -20,6 +20,7 @@ #ifndef QEMU_CPU_H #define QEMU_CPU_H +#include #include "exec/hwaddr.h" #include "exec/memattrs.h" #include "qemu/bitmap.h" diff --git a/qemu/include/tcg/tcg.h b/qemu/include/tcg/tcg.h index f3643fe3..c7eb23ef 100644 --- a/qemu/include/tcg/tcg.h +++ b/qemu/include/tcg/tcg.h @@ -35,6 +35,11 @@ #include "tcg-apple-jit.h" #include "qemu/int128.h" +// Unicorn: Default region size for win32 +#if defined(_WIN32) && !defined(USE_STATIC_CODE_GEN_BUFFER) +#define UC_TCG_REGION_PAGES_COUNT (128) +#endif + /* XXX: make safe guess about sizes */ #define MAX_OP_PER_INSTR 266 diff --git a/qemu/tcg/tcg.c b/qemu/tcg/tcg.c index 4910c67c..d5fba106 100644 --- a/qemu/tcg/tcg.c +++ b/qemu/tcg/tcg.c @@ -23,6 +23,8 @@ */ /* define it to use liveness analysis (better code) */ +#include "tcg/tcg.h" +#include #define USE_TCG_OPTIMIZATIONS #include "qemu/osdep.h" @@ -406,6 +408,13 @@ static void tcg_region_assign(TCGContext *s, size_t curr_region) s->code_gen_buffer = start; s->code_gen_ptr = start; s->code_gen_buffer_size = (char *)end - (char *)start; +#if defined(WIN32) && !defined(USE_STATIC_CODE_GEN_BUFFER) + VirtualAlloc( + s->code_gen_buffer, + ROUND_UP(s->code_gen_buffer_size, s->uc->qemu_real_host_page_size), + MEM_COMMIT, + PAGE_EXECUTE_READWRITE); +#endif memset(s->code_gen_buffer, 0x00, s->code_gen_buffer_size); s->code_gen_highwater = (char *)end - TCG_HIGHWATER; } @@ -500,7 +509,11 @@ void tcg_region_init(TCGContext *tcg_ctx) size_t n_regions; size_t i; +#if defined(WIN32) && !defined(USE_STATIC_CODE_GEN_BUFFER) + n_regions = size / (tcg_ctx->uc->qemu_real_host_page_size * UC_TCG_REGION_PAGES_COUNT); +#else n_regions = 1; +#endif /* The first region will be 'aligned - buf' bytes larger than the others */ aligned = (void *)QEMU_ALIGN_PTR_UP(buf, page_size); @@ -537,6 +550,11 @@ void tcg_region_init(TCGContext *tcg_ctx) } tcg_ctx->tree = g_tree_new(tb_tc_cmp); + +#if defined(WIN32) && !defined(USE_STATIC_CODE_GEN_BUFFER) + // Allocate a region immediately, or the highwater is not set correctly. + tcg_region_alloc(tcg_ctx); +#endif } /*