tests: add atomic_add-bench
With this microbenchmark we can measure the overhead of emulating atomic instructions with a configurable degree of contention. The benchmark spawns $n threads, each performing $o atomic ops (additions) in a loop. Each atomic operation is performed on a different cache line (assuming lines are 64b long) that is randomly selected from a range [0, $r). [ Note: each $foo corresponds to a -foo flag ] Signed-off-by: Emilio G. Cota <cota@braap.org> Signed-off-by: Richard Henderson <rth@twiddle.net> Message-Id: <1467054136-10430-20-git-send-email-cota@braap.org>
This commit is contained in:
parent
37b995f6e7
commit
070e3edcea
1
tests/.gitignore
vendored
1
tests/.gitignore
vendored
@ -1,3 +1,4 @@
|
||||
atomic_add-bench
|
||||
check-qdict
|
||||
check-qfloat
|
||||
check-qint
|
||||
|
@ -460,7 +460,8 @@ test-obj-y = tests/check-qint.o tests/check-qstring.o tests/check-qdict.o \
|
||||
tests/test-opts-visitor.o tests/test-qmp-event.o \
|
||||
tests/rcutorture.o tests/test-rcu-list.o \
|
||||
tests/test-qdist.o \
|
||||
tests/test-qht.o tests/qht-bench.o tests/test-qht-par.o
|
||||
tests/test-qht.o tests/qht-bench.o tests/test-qht-par.o \
|
||||
tests/atomic_add-bench.o
|
||||
|
||||
$(test-obj-y): QEMU_INCLUDES += -Itests
|
||||
QEMU_CFLAGS += -I$(SRC_PATH)/tests
|
||||
@ -507,6 +508,7 @@ tests/test-qht$(EXESUF): tests/test-qht.o $(test-util-obj-y)
|
||||
tests/test-qht-par$(EXESUF): tests/test-qht-par.o tests/qht-bench$(EXESUF) $(test-util-obj-y)
|
||||
tests/qht-bench$(EXESUF): tests/qht-bench.o $(test-util-obj-y)
|
||||
tests/test-bufferiszero$(EXESUF): tests/test-bufferiszero.o $(test-util-obj-y)
|
||||
tests/atomic_add-bench$(EXESUF): tests/atomic_add-bench.o $(test-util-obj-y)
|
||||
|
||||
tests/test-qdev-global-props$(EXESUF): tests/test-qdev-global-props.o \
|
||||
hw/core/qdev.o hw/core/qdev-properties.o hw/core/hotplug.o\
|
||||
|
163
tests/atomic_add-bench.c
Normal file
163
tests/atomic_add-bench.c
Normal file
@ -0,0 +1,163 @@
|
||||
#include "qemu/osdep.h"
|
||||
#include "qemu/thread.h"
|
||||
#include "qemu/host-utils.h"
|
||||
#include "qemu/processor.h"
|
||||
|
||||
struct thread_info {
|
||||
uint64_t r;
|
||||
} QEMU_ALIGNED(64);
|
||||
|
||||
struct count {
|
||||
unsigned long val;
|
||||
} QEMU_ALIGNED(64);
|
||||
|
||||
static QemuThread *threads;
|
||||
static struct thread_info *th_info;
|
||||
static unsigned int n_threads = 1;
|
||||
static unsigned int n_ready_threads;
|
||||
static struct count *counts;
|
||||
static unsigned int duration = 1;
|
||||
static unsigned int range = 1024;
|
||||
static bool test_start;
|
||||
static bool test_stop;
|
||||
|
||||
static const char commands_string[] =
|
||||
" -n = number of threads\n"
|
||||
" -d = duration in seconds\n"
|
||||
" -r = range (will be rounded up to pow2)";
|
||||
|
||||
static void usage_complete(char *argv[])
|
||||
{
|
||||
fprintf(stderr, "Usage: %s [options]\n", argv[0]);
|
||||
fprintf(stderr, "options:\n%s\n", commands_string);
|
||||
}
|
||||
|
||||
/*
|
||||
* From: https://en.wikipedia.org/wiki/Xorshift
|
||||
* This is faster than rand_r(), and gives us a wider range (RAND_MAX is only
|
||||
* guaranteed to be >= INT_MAX).
|
||||
*/
|
||||
static uint64_t xorshift64star(uint64_t x)
|
||||
{
|
||||
x ^= x >> 12; /* a */
|
||||
x ^= x << 25; /* b */
|
||||
x ^= x >> 27; /* c */
|
||||
return x * UINT64_C(2685821657736338717);
|
||||
}
|
||||
|
||||
static void *thread_func(void *arg)
|
||||
{
|
||||
struct thread_info *info = arg;
|
||||
|
||||
atomic_inc(&n_ready_threads);
|
||||
while (!atomic_read(&test_start)) {
|
||||
cpu_relax();
|
||||
}
|
||||
|
||||
while (!atomic_read(&test_stop)) {
|
||||
unsigned int index;
|
||||
|
||||
info->r = xorshift64star(info->r);
|
||||
index = info->r & (range - 1);
|
||||
atomic_inc(&counts[index].val);
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void run_test(void)
|
||||
{
|
||||
unsigned int remaining;
|
||||
unsigned int i;
|
||||
|
||||
while (atomic_read(&n_ready_threads) != n_threads) {
|
||||
cpu_relax();
|
||||
}
|
||||
atomic_set(&test_start, true);
|
||||
do {
|
||||
remaining = sleep(duration);
|
||||
} while (remaining);
|
||||
atomic_set(&test_stop, true);
|
||||
|
||||
for (i = 0; i < n_threads; i++) {
|
||||
qemu_thread_join(&threads[i]);
|
||||
}
|
||||
}
|
||||
|
||||
static void create_threads(void)
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
threads = g_new(QemuThread, n_threads);
|
||||
th_info = g_new(struct thread_info, n_threads);
|
||||
counts = qemu_memalign(64, sizeof(*counts) * range);
|
||||
memset(counts, 0, sizeof(*counts) * range);
|
||||
|
||||
for (i = 0; i < n_threads; i++) {
|
||||
struct thread_info *info = &th_info[i];
|
||||
|
||||
info->r = (i + 1) ^ time(NULL);
|
||||
qemu_thread_create(&threads[i], NULL, thread_func, info,
|
||||
QEMU_THREAD_JOINABLE);
|
||||
}
|
||||
}
|
||||
|
||||
static void pr_params(void)
|
||||
{
|
||||
printf("Parameters:\n");
|
||||
printf(" # of threads: %u\n", n_threads);
|
||||
printf(" duration: %u\n", duration);
|
||||
printf(" ops' range: %u\n", range);
|
||||
}
|
||||
|
||||
static void pr_stats(void)
|
||||
{
|
||||
unsigned long long val = 0;
|
||||
unsigned int i;
|
||||
double tx;
|
||||
|
||||
for (i = 0; i < range; i++) {
|
||||
val += counts[i].val;
|
||||
}
|
||||
tx = val / duration / 1e6;
|
||||
|
||||
printf("Results:\n");
|
||||
printf("Duration: %u s\n", duration);
|
||||
printf(" Throughput: %.2f Mops/s\n", tx);
|
||||
printf(" Throughput/thread: %.2f Mops/s/thread\n", tx / n_threads);
|
||||
}
|
||||
|
||||
static void parse_args(int argc, char *argv[])
|
||||
{
|
||||
int c;
|
||||
|
||||
for (;;) {
|
||||
c = getopt(argc, argv, "hd:n:r:");
|
||||
if (c < 0) {
|
||||
break;
|
||||
}
|
||||
switch (c) {
|
||||
case 'h':
|
||||
usage_complete(argv);
|
||||
exit(0);
|
||||
case 'd':
|
||||
duration = atoi(optarg);
|
||||
break;
|
||||
case 'n':
|
||||
n_threads = atoi(optarg);
|
||||
break;
|
||||
case 'r':
|
||||
range = pow2ceil(atoi(optarg));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
parse_args(argc, argv);
|
||||
pr_params();
|
||||
create_threads();
|
||||
run_test();
|
||||
pr_stats();
|
||||
return 0;
|
||||
}
|
Loading…
Reference in New Issue
Block a user