qemu/tests/bench/qtree-bench.c

287 lines
7.0 KiB
C
Raw Normal View History

util: import GTree as QTree The only reason to add this implementation is to control the memory allocator used. Some users (e.g. TCG) cannot work reliably in multi-threaded environments (e.g. forking in user-mode) with GTree's allocator, GSlice. See https://gitlab.com/qemu-project/qemu/-/issues/285 for details. Importing GTree is a temporary workaround until GTree migrates away from GSlice. This implementation is identical to that in glib v2.75.0, except that we don't import recent additions to the API nor deprecated API calls, none of which are used in QEMU. I've imported tests from glib and added a benchmark just to make sure that performance is similar. Note: it cannot be identical because (1) we are not using GSlice, (2) we use different compilation flags (e.g. -fPIC) and (3) we're linking statically. $ cat /proc/cpuinfo| grep 'model name' | head -1 model name : AMD Ryzen 7 PRO 5850U with Radeon Graphics $ echo '0' | sudo tee /sys/devices/system/cpu/cpufreq/boost $ tests/bench/qtree-bench Tree Op 32 1024 4096 131072 1048576 ------------------------------------------------------------------------------------------------ GTree Lookup 83.23 43.08 25.31 19.40 16.22 QTree Lookup 113.42 (1.36x) 53.83 (1.25x) 28.38 (1.12x) 17.64 (0.91x) 13.04 (0.80x) GTree Insert 44.23 29.37 25.83 19.49 17.03 QTree Insert 46.87 (1.06x) 25.62 (0.87x) 24.29 (0.94x) 16.83 (0.86x) 12.97 (0.76x) GTree Remove 53.27 35.15 31.43 24.64 16.70 QTree Remove 57.32 (1.08x) 41.76 (1.19x) 38.37 (1.22x) 29.30 (1.19x) 15.07 (0.90x) GTree RemoveAll 135.44 127.52 126.72 120.11 64.34 QTree RemoveAll 127.15 (0.94x) 110.37 (0.87x) 107.97 (0.85x) 97.13 (0.81x) 55.10 (0.86x) GTree Traverse 277.71 276.09 272.78 246.72 98.47 QTree Traverse 370.33 (1.33x) 411.97 (1.49x) 400.23 (1.47x) 262.82 (1.07x) 78.52 (0.80x) ------------------------------------------------------------------------------------------------ As a sanity check, the same benchmark when Glib's version is >= $glib_dropped_gslice_version (i.e. QTree == GTree): Tree Op 32 1024 4096 131072 1048576 ------------------------------------------------------------------------------------------------ GTree Lookup 82.72 43.09 24.18 19.73 16.09 QTree Lookup 81.82 (0.99x) 43.10 (1.00x) 24.20 (1.00x) 19.76 (1.00x) 16.26 (1.01x) GTree Insert 45.07 29.62 26.34 19.90 17.18 QTree Insert 45.72 (1.01x) 29.60 (1.00x) 26.38 (1.00x) 19.71 (0.99x) 17.20 (1.00x) GTree Remove 54.48 35.36 31.77 24.97 16.95 QTree Remove 54.46 (1.00x) 35.32 (1.00x) 31.77 (1.00x) 24.91 (1.00x) 17.15 (1.01x) GTree RemoveAll 140.68 127.36 125.43 121.45 68.20 QTree RemoveAll 140.65 (1.00x) 127.64 (1.00x) 125.01 (1.00x) 121.73 (1.00x) 67.06 (0.98x) GTree Traverse 278.68 276.05 266.75 251.65 104.93 QTree Traverse 278.31 (1.00x) 275.78 (1.00x) 266.42 (1.00x) 247.89 (0.99x) 104.58 (1.00x) ------------------------------------------------------------------------------------------------ Signed-off-by: Emilio Cota <cota@braap.org> Message-Id: <20230205163758.416992-2-cota@braap.org> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2023-02-05 19:37:57 +03:00
/* SPDX-License-Identifier: GPL-2.0-or-later */
#include "qemu/osdep.h"
#include "qemu/qtree.h"
#include "qemu/timer.h"
enum tree_op {
OP_LOOKUP,
OP_INSERT,
OP_REMOVE,
OP_REMOVE_ALL,
OP_TRAVERSE,
};
struct benchmark {
const char * const name;
enum tree_op op;
bool fill_on_init;
};
enum impl_type {
IMPL_GTREE,
IMPL_QTREE,
};
struct tree_implementation {
const char * const name;
enum impl_type type;
};
static const struct benchmark benchmarks[] = {
{
.name = "Lookup",
.op = OP_LOOKUP,
.fill_on_init = true,
},
{
.name = "Insert",
.op = OP_INSERT,
.fill_on_init = false,
},
{
.name = "Remove",
.op = OP_REMOVE,
.fill_on_init = true,
},
{
.name = "RemoveAll",
.op = OP_REMOVE_ALL,
.fill_on_init = true,
},
{
.name = "Traverse",
.op = OP_TRAVERSE,
.fill_on_init = true,
},
};
static const struct tree_implementation impls[] = {
{
.name = "GTree",
.type = IMPL_GTREE,
},
{
.name = "QTree",
.type = IMPL_QTREE,
},
};
static int compare_func(const void *ap, const void *bp)
{
const size_t *a = ap;
const size_t *b = bp;
return *a - *b;
}
static void init_empty_tree_and_keys(enum impl_type impl,
void **ret_tree, size_t **ret_keys,
size_t n_elems)
{
size_t *keys = g_malloc_n(n_elems, sizeof(*keys));
for (size_t i = 0; i < n_elems; i++) {
keys[i] = i;
}
void *tree;
switch (impl) {
case IMPL_GTREE:
tree = g_tree_new(compare_func);
break;
case IMPL_QTREE:
tree = q_tree_new(compare_func);
break;
default:
g_assert_not_reached();
}
*ret_tree = tree;
*ret_keys = keys;
}
static gboolean traverse_func(gpointer key, gpointer value, gpointer data)
{
return FALSE;
}
static inline void remove_all(void *tree, enum impl_type impl)
{
switch (impl) {
case IMPL_GTREE:
g_tree_destroy(tree);
break;
case IMPL_QTREE:
q_tree_destroy(tree);
break;
default:
g_assert_not_reached();
}
}
static int64_t run_benchmark(const struct benchmark *bench,
enum impl_type impl,
size_t n_elems)
{
void *tree;
size_t *keys;
init_empty_tree_and_keys(impl, &tree, &keys, n_elems);
if (bench->fill_on_init) {
for (size_t i = 0; i < n_elems; i++) {
switch (impl) {
case IMPL_GTREE:
g_tree_insert(tree, &keys[i], &keys[i]);
break;
case IMPL_QTREE:
q_tree_insert(tree, &keys[i], &keys[i]);
break;
default:
g_assert_not_reached();
}
}
}
int64_t start_ns = get_clock();
switch (bench->op) {
case OP_LOOKUP:
for (size_t i = 0; i < n_elems; i++) {
void *value;
switch (impl) {
case IMPL_GTREE:
value = g_tree_lookup(tree, &keys[i]);
break;
case IMPL_QTREE:
value = q_tree_lookup(tree, &keys[i]);
break;
default:
g_assert_not_reached();
}
(void)value;
}
break;
case OP_INSERT:
for (size_t i = 0; i < n_elems; i++) {
switch (impl) {
case IMPL_GTREE:
g_tree_insert(tree, &keys[i], &keys[i]);
break;
case IMPL_QTREE:
q_tree_insert(tree, &keys[i], &keys[i]);
break;
default:
g_assert_not_reached();
}
}
break;
case OP_REMOVE:
for (size_t i = 0; i < n_elems; i++) {
switch (impl) {
case IMPL_GTREE:
g_tree_remove(tree, &keys[i]);
break;
case IMPL_QTREE:
q_tree_remove(tree, &keys[i]);
break;
default:
g_assert_not_reached();
}
}
break;
case OP_REMOVE_ALL:
remove_all(tree, impl);
break;
case OP_TRAVERSE:
switch (impl) {
case IMPL_GTREE:
g_tree_foreach(tree, traverse_func, NULL);
break;
case IMPL_QTREE:
q_tree_foreach(tree, traverse_func, NULL);
break;
default:
g_assert_not_reached();
}
break;
default:
g_assert_not_reached();
}
int64_t ns = get_clock() - start_ns;
if (bench->op != OP_REMOVE_ALL) {
remove_all(tree, impl);
}
g_free(keys);
return ns;
}
int main(int argc, char *argv[])
{
size_t sizes[] = {
32,
1024,
1024 * 4,
1024 * 128,
1024 * 1024,
};
double res[ARRAY_SIZE(benchmarks)][ARRAY_SIZE(impls)][ARRAY_SIZE(sizes)];
for (int i = 0; i < ARRAY_SIZE(sizes); i++) {
size_t size = sizes[i];
for (int j = 0; j < ARRAY_SIZE(impls); j++) {
const struct tree_implementation *impl = &impls[j];
for (int k = 0; k < ARRAY_SIZE(benchmarks); k++) {
const struct benchmark *bench = &benchmarks[k];
/* warm-up run */
run_benchmark(bench, impl->type, size);
int64_t total_ns = 0;
int64_t n_runs = 0;
while (total_ns < 2e8 || n_runs < 5) {
total_ns += run_benchmark(bench, impl->type, size);
n_runs++;
}
double ns_per_run = (double)total_ns / n_runs;
/* Throughput, in Mops/s */
res[k][j][i] = size / ns_per_run * 1e3;
}
}
}
printf("# Results' breakdown: Tree, Op and #Elements. Units: Mops/s\n");
printf("%5s %10s ", "Tree", "Op");
for (int i = 0; i < ARRAY_SIZE(sizes); i++) {
printf("%7zu ", sizes[i]);
}
printf("\n");
char separator[97];
for (int i = 0; i < ARRAY_SIZE(separator) - 1; i++) {
separator[i] = '-';
}
separator[ARRAY_SIZE(separator) - 1] = '\0';
printf("%s\n", separator);
for (int i = 0; i < ARRAY_SIZE(benchmarks); i++) {
for (int j = 0; j < ARRAY_SIZE(impls); j++) {
printf("%5s %10s ", impls[j].name, benchmarks[i].name);
for (int k = 0; k < ARRAY_SIZE(sizes); k++) {
printf("%7.2f ", res[i][j][k]);
if (j == 0) {
printf(" ");
} else {
if (res[i][0][k] != 0) {
double speedup = res[i][j][k] / res[i][0][k];
printf("(%4.2fx) ", speedup);
} else {
printf("( ) ");
}
}
}
printf("\n");
}
}
printf("%s\n", separator);
return 0;
}