diff --git a/CMakeLists.txt b/CMakeLists.txt index d94bd464..965fe77f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1106,6 +1106,7 @@ set(UNICORN_SRCS set(UNICORN_COMMON_SRCS list.c + interval.c glib_compat/glib_compat.c glib_compat/gtestutils.c diff --git a/include/interval.h b/include/interval.h new file mode 100644 index 00000000..af7e207e --- /dev/null +++ b/include/interval.h @@ -0,0 +1,42 @@ +/* Unicorn Emulator Engine, 2023 */ +/* This code is released under the BSD license */ + +// This implements interval tree to efficently manage hooks +// with callbacks installed in memory ranges. + +#ifndef UC_INTERVAL_H +#define UC_INTERVAL_H + +#include + +typedef struct interval_node { + uint64_t begin, end; // [begin, end] inclusive range + void *data; + uint64_t max_endpoint; + struct interval_node *left, *right, *parent; +} interval_node; + +// Create a new interval [begin, end] with user data +// This alloc memory, so user must free the node himself with free() +interval_node *interval_new(uint64_t begin, uint64_t end, void *data); + +// Insert a new interval [begin, end], and return interval node +// This alloc memory, so user must free the node himself with free() +interval_node *interval_insert(interval_node **root, + uint64_t begin, uint64_t end, void *data); + + +// Find a node, given its data +interval_node *interval_find_data(interval_node *root, void *data); + +// Find all intervals containing n (begin <= n <= end) +// This returns an array of nodes in @result, and the array size in @count +// User must free himself with free() later +void interval_find_n(interval_node *root, uint64_t n, + interval_node **results, int *count); + +// Free the tree +void interval_free(interval_node *root); + +#endif + diff --git a/include/uc_priv.h b/include/uc_priv.h index 60a9b486..2c3bcb9d 100644 --- a/include/uc_priv.h +++ b/include/uc_priv.h @@ -12,6 +12,7 @@ #include "qemu/xxhash.h" #include "unicorn/unicorn.h" #include "list.h" +#include "interval.h" // The max recursive nested uc_emu_start levels #define UC_MAX_NESTED_LEVEL (64) @@ -341,6 +342,8 @@ struct uc_struct { struct list hook[UC_HOOK_MAX]; struct list hooks_to_del; int hooks_count[UC_HOOK_MAX]; + // interval tree for BLOCK & CODE hooks + interval_node *interval_block, *interval_code; // hook to count number of instructions for uc_emu_start() uc_hook count_hook; diff --git a/interval.c b/interval.c new file mode 100644 index 00000000..f052848d --- /dev/null +++ b/interval.c @@ -0,0 +1,173 @@ +/* Unicorn Emulator Engine, 2023 */ +/* This code is released under the BSD license */ + +// This implements interval tree to efficently manage hooks +// with callbacks installed in memory ranges. + +#include +#include +#include + +#include "include/interval.h" + +// Create a new interval [begin, end] with user data +// This alloc memory, so user must free the node himself with free() +interval_node *interval_new(uint64_t begin, uint64_t end, void *data) +{ + interval_node *node = (interval_node *)calloc(1, sizeof(interval_node)); + if (!node) + return NULL; + + node->begin = begin; + node->end = end; + // we can be sure that end >= begin + node->max_endpoint = end; + + node->data = data; + // left = right = parent = NULL + + return node; +} + +// Insert a new interval [begin, end], and return interval node +// This alloc memory, so user must free the node himself with free() +interval_node *interval_insert(interval_node **root, uint64_t begin, uint64_t end, void *data) +{ + interval_node *current, *node; + + if (begin > end) { + begin = 0; + end = (uint64_t)-1; + } + + node = interval_new(begin, end, data); + + if (!node) + return NULL; + + if (*root == NULL) { + // first node ever is root + *root = node; + return node; + } + + current = *root; + while (true) { + if (begin < current->begin) { + if (current->left == NULL) { + current->left = node; + node->parent = current; + break; + } else { + current = current->left; + } + } else { + if (current->right == NULL) { + current->right = node; + node->parent = current; + break; + } else { + current = current->right; + } + } + } + + // set new max_endpoint + while (current != NULL) { + if (current->max_endpoint < end) { + current->max_endpoint = end; + current = current->parent; + } else { + break; + } + } + + return node; +} + +// Find a node, given its data +interval_node *interval_find_data(interval_node *root, void *data) +{ + int stack_size = 2; + int top = 0; + + if (root == NULL) { + return NULL; + } + + // Create an empty stack and push the root node onto it + interval_node **stack = malloc(stack_size * sizeof(interval_node *)); + stack[top] = root; + + // Traverse the tree using a loop and a stack + while (top >= 0) { + // Pop the top node from the stack + interval_node *current = stack[top--]; + + if (current->data == data) { + free(stack); + return current; + } + + // Resize the stack if necessary + if (top + 2 > stack_size) { + stack_size *= 2; + stack = realloc(stack, stack_size * sizeof(interval_node *)); + } + + // Push the right and left children onto the stack (if not NULL) + if (current->right != NULL) { + stack[++top] = current->right; + } + + if (current->left != NULL) { + stack[++top] = current->left; + } + } + + // Free the memory used by the stack + free(stack); + + // not found + return NULL; +} + + +// Find all intervals containing n (begin <= n <= end) +// This returns an array of nodes in @result, and the array size in @count +// User must free himself with free() later +void interval_find_n(interval_node *root, uint64_t n, interval_node **result, int *count) +{ + int results_size = 2; + + *count = 0; + *result = malloc(results_size * sizeof(interval_node*)); + + while (root != NULL) { + if (root->begin <= n && n <= root->end) { + // Resize the result array if necessary + if (*count + 1 > results_size) { + results_size *= 2; + *result = realloc(*result, results_size * sizeof(interval_node*)); + } + + result[(*count)++] = root; + root = root->left; + } else if (root->left != NULL && root->left->max_endpoint >= n) { + root = root->left; + } else { + root = root->right; + } + } +} + +// Free the tree +void interval_free(interval_node *root) +{ + // TODO: implement without recursion? + if (root) { + interval_free(root->left); + interval_free(root->right); + free(root); + } +} diff --git a/uc.c b/uc.c index 76c4eb28..8a7b0437 100644 --- a/uc.c +++ b/uc.c @@ -488,6 +488,9 @@ uc_err uc_close(uc_engine *uc) list_clear(&uc->hook[i]); } + interval_free(uc->interval_block); + interval_free(uc->interval_code); + free(uc->mapped_blocks); g_tree_destroy(uc->ctl_exits); @@ -1647,6 +1650,19 @@ uc_err uc_hook_add(uc_engine *uc, uc_hook *hh, int type, void *callback, i++; } + switch (type) { + default: + break; + + case UC_HOOK_BLOCK_IDX: + interval_insert(&uc->interval_block, begin, end, hook); + break; + + case UC_HOOK_CODE_IDX: + interval_insert(&uc->interval_code, begin, end, hook); + break; + } + // we didn't use the hook // TODO: return an error? if (hook->refs == 0) { @@ -1723,8 +1739,9 @@ void helper_uc_tracecode(int32_t size, uc_hook_idx index, void *handle, int64_t address) { struct uc_struct *uc = handle; - struct list_item *cur; struct hook *hook; + interval_node *interval_root, *nodes; + int i, count; int hook_flags = index & UC_HOOK_FLAG_MASK; // The index here may contain additional flags. See @@ -1743,39 +1760,46 @@ void helper_uc_tracecode(int32_t size, uc_hook_idx index, void *handle, return; } - for (cur = uc->hook[index].head; - cur != NULL && (hook = (struct hook *)cur->data); cur = cur->next) { - if (hook->to_delete) { - continue; - } + // index can only be UC_HOOK_CODE_IDX or UC_HOOK_BLOCK_IDX + interval_root = uc->interval_block; + if (index == UC_HOOK_CODE_IDX) + interval_root = uc->interval_code; - // on invalid block/instruction, call instruction counter (if enable), - // then quit - if (size == 0) { - if (index == UC_HOOK_CODE_IDX && uc->count_hook) { - // this is the instruction counter (first hook in the list) - ((uc_cb_hookcode_t)hook->callback)(uc, address, size, - hook->user_data); - } + interval_find_n(interval_root, address, &nodes, &count); - return; - } + for(i = 0; i < count; i++) { + hook = (struct hook *)nodes[i].data; + if (hook->to_delete) { + continue; + } - if (HOOK_BOUND_CHECK(hook, (uint64_t)address)) { - ((uc_cb_hookcode_t)hook->callback)(uc, address, size, - hook->user_data); - } + // on invalid block/instruction, call instruction counter (if enable), + // then quit + if (size == 0) { + if (index == UC_HOOK_CODE_IDX && uc->count_hook) { + // this is the instruction counter (first hook in the list) + ((uc_cb_hookcode_t)hook->callback)(uc, address, size, + hook->user_data); + } - // the last callback may already asked to stop emulation - // Unicorn: - // In an ARM IT block, we behave like the emulation continues - // normally. No check_exit_request is generated and the hooks are - // triggered normally. In other words, the whole IT block is treated - // as a single instruction. - if (uc->stop_request && !(hook_flags & UC_HOOK_FLAG_NO_STOP)) { - break; - } + free(nodes); + return; + } + + ((uc_cb_hookcode_t)hook->callback)(uc, address, size, hook->user_data); + + // the last callback may already asked to stop emulation + // Unicorn: + // In an ARM IT block, we behave like the emulation continues + // normally. No check_exit_request is generated and the hooks are + // triggered normally. In other words, the whole IT block is treated + // as a single instruction. + if (uc->stop_request && !(hook_flags & UC_HOOK_FLAG_NO_STOP)) { + break; + } } + + free(nodes); } UNICORN_EXPORT