qemu/tests/bench/qtree-bench.c
Emilio Cota e3feb2cc22 util: import GTree as QTree
The only reason to add this implementation is to control the memory allocator
used. Some users (e.g. TCG) cannot work reliably in multi-threaded
environments (e.g. forking in user-mode) with GTree's allocator, GSlice.
See https://gitlab.com/qemu-project/qemu/-/issues/285 for details.

Importing GTree is a temporary workaround until GTree migrates away
from GSlice.

This implementation is identical to that in glib v2.75.0, except that
we don't import recent additions to the API nor deprecated API calls,
none of which are used in QEMU.

I've imported tests from glib and added a benchmark just to
make sure that performance is similar. Note: it cannot be identical
because (1) we are not using GSlice, (2) we use different compilation flags
(e.g. -fPIC) and (3) we're linking statically.

$ cat /proc/cpuinfo| grep 'model name' | head -1
model name      : AMD Ryzen 7 PRO 5850U with Radeon Graphics
$ echo '0' | sudo tee /sys/devices/system/cpu/cpufreq/boost
$ tests/bench/qtree-bench

 Tree         Op      32            1024            4096          131072         1048576
------------------------------------------------------------------------------------------------
GTree     Lookup   83.23           43.08           25.31           19.40           16.22
QTree     Lookup  113.42 (1.36x)   53.83 (1.25x)   28.38 (1.12x)   17.64 (0.91x)   13.04 (0.80x)
GTree     Insert   44.23           29.37           25.83           19.49           17.03
QTree     Insert   46.87 (1.06x)   25.62 (0.87x)   24.29 (0.94x)   16.83 (0.86x)   12.97 (0.76x)
GTree     Remove   53.27           35.15           31.43           24.64           16.70
QTree     Remove   57.32 (1.08x)   41.76 (1.19x)   38.37 (1.22x)   29.30 (1.19x)   15.07 (0.90x)
GTree  RemoveAll  135.44          127.52          126.72          120.11           64.34
QTree  RemoveAll  127.15 (0.94x)  110.37 (0.87x)  107.97 (0.85x)   97.13 (0.81x)   55.10 (0.86x)
GTree   Traverse  277.71          276.09          272.78          246.72           98.47
QTree   Traverse  370.33 (1.33x)  411.97 (1.49x)  400.23 (1.47x)  262.82 (1.07x)   78.52 (0.80x)
------------------------------------------------------------------------------------------------

As a sanity check, the same benchmark when Glib's version
is >= $glib_dropped_gslice_version (i.e. QTree == GTree):

 Tree         Op      32            1024            4096          131072         1048576
------------------------------------------------------------------------------------------------
GTree     Lookup   82.72           43.09           24.18           19.73           16.09
QTree     Lookup   81.82 (0.99x)   43.10 (1.00x)   24.20 (1.00x)   19.76 (1.00x)   16.26 (1.01x)
GTree     Insert   45.07           29.62           26.34           19.90           17.18
QTree     Insert   45.72 (1.01x)   29.60 (1.00x)   26.38 (1.00x)   19.71 (0.99x)   17.20 (1.00x)
GTree     Remove   54.48           35.36           31.77           24.97           16.95
QTree     Remove   54.46 (1.00x)   35.32 (1.00x)   31.77 (1.00x)   24.91 (1.00x)   17.15 (1.01x)
GTree  RemoveAll  140.68          127.36          125.43          121.45           68.20
QTree  RemoveAll  140.65 (1.00x)  127.64 (1.00x)  125.01 (1.00x)  121.73 (1.00x)   67.06 (0.98x)
GTree   Traverse  278.68          276.05          266.75          251.65          104.93
QTree   Traverse  278.31 (1.00x)  275.78 (1.00x)  266.42 (1.00x)  247.89 (0.99x)  104.58 (1.00x)
------------------------------------------------------------------------------------------------

Signed-off-by: Emilio Cota <cota@braap.org>
Message-Id: <20230205163758.416992-2-cota@braap.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2023-03-28 15:23:10 -07:00

287 lines
7.0 KiB
C

/* SPDX-License-Identifier: GPL-2.0-or-later */
#include "qemu/osdep.h"
#include "qemu/qtree.h"
#include "qemu/timer.h"
enum tree_op {
OP_LOOKUP,
OP_INSERT,
OP_REMOVE,
OP_REMOVE_ALL,
OP_TRAVERSE,
};
struct benchmark {
const char * const name;
enum tree_op op;
bool fill_on_init;
};
enum impl_type {
IMPL_GTREE,
IMPL_QTREE,
};
struct tree_implementation {
const char * const name;
enum impl_type type;
};
static const struct benchmark benchmarks[] = {
{
.name = "Lookup",
.op = OP_LOOKUP,
.fill_on_init = true,
},
{
.name = "Insert",
.op = OP_INSERT,
.fill_on_init = false,
},
{
.name = "Remove",
.op = OP_REMOVE,
.fill_on_init = true,
},
{
.name = "RemoveAll",
.op = OP_REMOVE_ALL,
.fill_on_init = true,
},
{
.name = "Traverse",
.op = OP_TRAVERSE,
.fill_on_init = true,
},
};
static const struct tree_implementation impls[] = {
{
.name = "GTree",
.type = IMPL_GTREE,
},
{
.name = "QTree",
.type = IMPL_QTREE,
},
};
static int compare_func(const void *ap, const void *bp)
{
const size_t *a = ap;
const size_t *b = bp;
return *a - *b;
}
static void init_empty_tree_and_keys(enum impl_type impl,
void **ret_tree, size_t **ret_keys,
size_t n_elems)
{
size_t *keys = g_malloc_n(n_elems, sizeof(*keys));
for (size_t i = 0; i < n_elems; i++) {
keys[i] = i;
}
void *tree;
switch (impl) {
case IMPL_GTREE:
tree = g_tree_new(compare_func);
break;
case IMPL_QTREE:
tree = q_tree_new(compare_func);
break;
default:
g_assert_not_reached();
}
*ret_tree = tree;
*ret_keys = keys;
}
static gboolean traverse_func(gpointer key, gpointer value, gpointer data)
{
return FALSE;
}
static inline void remove_all(void *tree, enum impl_type impl)
{
switch (impl) {
case IMPL_GTREE:
g_tree_destroy(tree);
break;
case IMPL_QTREE:
q_tree_destroy(tree);
break;
default:
g_assert_not_reached();
}
}
static int64_t run_benchmark(const struct benchmark *bench,
enum impl_type impl,
size_t n_elems)
{
void *tree;
size_t *keys;
init_empty_tree_and_keys(impl, &tree, &keys, n_elems);
if (bench->fill_on_init) {
for (size_t i = 0; i < n_elems; i++) {
switch (impl) {
case IMPL_GTREE:
g_tree_insert(tree, &keys[i], &keys[i]);
break;
case IMPL_QTREE:
q_tree_insert(tree, &keys[i], &keys[i]);
break;
default:
g_assert_not_reached();
}
}
}
int64_t start_ns = get_clock();
switch (bench->op) {
case OP_LOOKUP:
for (size_t i = 0; i < n_elems; i++) {
void *value;
switch (impl) {
case IMPL_GTREE:
value = g_tree_lookup(tree, &keys[i]);
break;
case IMPL_QTREE:
value = q_tree_lookup(tree, &keys[i]);
break;
default:
g_assert_not_reached();
}
(void)value;
}
break;
case OP_INSERT:
for (size_t i = 0; i < n_elems; i++) {
switch (impl) {
case IMPL_GTREE:
g_tree_insert(tree, &keys[i], &keys[i]);
break;
case IMPL_QTREE:
q_tree_insert(tree, &keys[i], &keys[i]);
break;
default:
g_assert_not_reached();
}
}
break;
case OP_REMOVE:
for (size_t i = 0; i < n_elems; i++) {
switch (impl) {
case IMPL_GTREE:
g_tree_remove(tree, &keys[i]);
break;
case IMPL_QTREE:
q_tree_remove(tree, &keys[i]);
break;
default:
g_assert_not_reached();
}
}
break;
case OP_REMOVE_ALL:
remove_all(tree, impl);
break;
case OP_TRAVERSE:
switch (impl) {
case IMPL_GTREE:
g_tree_foreach(tree, traverse_func, NULL);
break;
case IMPL_QTREE:
q_tree_foreach(tree, traverse_func, NULL);
break;
default:
g_assert_not_reached();
}
break;
default:
g_assert_not_reached();
}
int64_t ns = get_clock() - start_ns;
if (bench->op != OP_REMOVE_ALL) {
remove_all(tree, impl);
}
g_free(keys);
return ns;
}
int main(int argc, char *argv[])
{
size_t sizes[] = {
32,
1024,
1024 * 4,
1024 * 128,
1024 * 1024,
};
double res[ARRAY_SIZE(benchmarks)][ARRAY_SIZE(impls)][ARRAY_SIZE(sizes)];
for (int i = 0; i < ARRAY_SIZE(sizes); i++) {
size_t size = sizes[i];
for (int j = 0; j < ARRAY_SIZE(impls); j++) {
const struct tree_implementation *impl = &impls[j];
for (int k = 0; k < ARRAY_SIZE(benchmarks); k++) {
const struct benchmark *bench = &benchmarks[k];
/* warm-up run */
run_benchmark(bench, impl->type, size);
int64_t total_ns = 0;
int64_t n_runs = 0;
while (total_ns < 2e8 || n_runs < 5) {
total_ns += run_benchmark(bench, impl->type, size);
n_runs++;
}
double ns_per_run = (double)total_ns / n_runs;
/* Throughput, in Mops/s */
res[k][j][i] = size / ns_per_run * 1e3;
}
}
}
printf("# Results' breakdown: Tree, Op and #Elements. Units: Mops/s\n");
printf("%5s %10s ", "Tree", "Op");
for (int i = 0; i < ARRAY_SIZE(sizes); i++) {
printf("%7zu ", sizes[i]);
}
printf("\n");
char separator[97];
for (int i = 0; i < ARRAY_SIZE(separator) - 1; i++) {
separator[i] = '-';
}
separator[ARRAY_SIZE(separator) - 1] = '\0';
printf("%s\n", separator);
for (int i = 0; i < ARRAY_SIZE(benchmarks); i++) {
for (int j = 0; j < ARRAY_SIZE(impls); j++) {
printf("%5s %10s ", impls[j].name, benchmarks[i].name);
for (int k = 0; k < ARRAY_SIZE(sizes); k++) {
printf("%7.2f ", res[i][j][k]);
if (j == 0) {
printf(" ");
} else {
if (res[i][0][k] != 0) {
double speedup = res[i][j][k] / res[i][0][k];
printf("(%4.2fx) ", speedup);
} else {
printf("( ) ");
}
}
}
printf("\n");
}
}
printf("%s\n", separator);
return 0;
}