From e92f4887531780c93fe21ed5f89f263eb0bb89e5 Mon Sep 17 00:00:00 2001 From: Martin Whitaker Date: Sat, 5 Mar 2022 20:04:32 +0000 Subject: [PATCH] Improve efficiency of random number generation (discussion #8). Use a more efficient algorithm that can be in-lined, and keep the generator state in a local variable. --- tests/mov_inv_random.c | 15 ++++++----- tests/test_helper.c | 61 ------------------------------------------ tests/test_helper.h | 25 ++++++++++------- tests/tests.c | 22 ++++++++++----- 4 files changed, 41 insertions(+), 82 deletions(-) diff --git a/tests/mov_inv_random.c b/tests/mov_inv_random.c index db4ba06..cbdaf34 100644 --- a/tests/mov_inv_random.c +++ b/tests/mov_inv_random.c @@ -34,19 +34,20 @@ int test_mov_inv_random(int my_cpu) { int ticks = 0; - uint64_t seed; + testword_t seed; if (cpuid_info.flags.rdtsc) { seed = get_tsc(); } else { - seed = UINT64_C(0x12345678) * (1 + pass_num); + seed = 1 + pass_num; } + seed *= 0x87654321; if (my_cpu == master_cpu) { display_test_pattern_value(seed); } // Initialize memory with the initial pattern. - random_seed(my_cpu, seed); + testword_t prsg_state = seed; for (int i = 0; i < vm_map_size; i++) { testword_t *start, *end; calculate_chunk(&start, &end, my_cpu, i, sizeof(testword_t)); @@ -69,7 +70,8 @@ int test_mov_inv_random(int my_cpu) } test_addr[my_cpu] = (uintptr_t)p; do { - write_word(p, random(my_cpu)); + prsg_state = prsg(prsg_state); + write_word(p, prsg_state); } while (p++ < pe); // test before increment in case pointer overflows do_tick(my_cpu); BAILOUT; @@ -82,7 +84,7 @@ int test_mov_inv_random(int my_cpu) for (int i = 0; i < 2; i++) { flush_caches(my_cpu); - random_seed(my_cpu, seed); + prsg_state = seed; for (int j = 0; j < vm_map_size; j++) { testword_t *start, *end; calculate_chunk(&start, &end, my_cpu, j, sizeof(testword_t)); @@ -105,7 +107,8 @@ int test_mov_inv_random(int my_cpu) } test_addr[my_cpu] = (uintptr_t)p; do { - testword_t expect = random(my_cpu) ^ invert; + prsg_state = prsg(prsg_state); + testword_t expect = prsg_state ^ invert; testword_t actual = read_word(p); if (unlikely(actual != expect)) { data_error(p, expect, actual, true); diff --git a/tests/test_helper.c b/tests/test_helper.c index cb15abb..169be4f 100644 --- a/tests/test_helper.c +++ b/tests/test_helper.c @@ -25,71 +25,10 @@ #include "test_helper.h" -//------------------------------------------------------------------------------ -// Types -//------------------------------------------------------------------------------ - -// We keep a separate LFSR for each CPU. Space them out by at least a cache line, -// otherwise performance suffers. - -typedef struct { - uint64_t lfsr; - uint64_t pad[7]; -} prsg_state_t; - -//------------------------------------------------------------------------------ -// Private Variables -//------------------------------------------------------------------------------ - -static prsg_state_t prsg_state[MAX_CPUS]; - -//------------------------------------------------------------------------------ -// Private Functions -//------------------------------------------------------------------------------ - -static inline uint32_t prsg(int my_cpu) -{ - // This implements a 64 bit linear feedback shift register with XNOR - // feedback from taps 64, 63, 61, 60. It generates 32 new bits each - // time the function is called. Because the feedback taps are all in - // the upper 32 bits, we can generate the new bits in parallel. - - uint64_t lfsr = prsg_state[my_cpu].lfsr; - uint32_t feedback = ~((lfsr >> 32) ^ (lfsr >> 31) ^ (lfsr >> 29) ^ (lfsr >> 28)); - prsg_state[my_cpu].lfsr = (lfsr << 32) | feedback; - return feedback; -} - //------------------------------------------------------------------------------ // Public Functions //------------------------------------------------------------------------------ -void random_seed(int my_cpu, uint64_t seed) -{ - if (my_cpu < 0) { - return; - } - - // Avoid the PRSG illegal state. - if (~seed == 0) { - seed = 0; - } - prsg_state[my_cpu].lfsr = seed; -} - -testword_t random(int my_cpu) -{ - if (my_cpu < 0) { - return 0; - } - - testword_t value = prsg(my_cpu); -#if TESTWORD_WIDTH > 32 - value = value << 32 | prsg(my_cpu); -#endif - return value; -} - void calculate_chunk(testword_t **start, testword_t **end, int my_cpu, int segment, size_t chunk_align) { if (my_cpu < 0) { diff --git a/tests/test_helper.h b/tests/test_helper.h index 7d6772c..25a49f1 100644 --- a/tests/test_helper.h +++ b/tests/test_helper.h @@ -63,16 +63,23 @@ static inline uintptr_t round_up(uintptr_t value, size_t align_size) } /** - * Seeds the psuedo-random number generator for my_cpu. + * Returns the next word in a pseudo-random sequence where state was the + * previous word in that sequence. */ -void random_seed(int my_cpu, uint64_t seed); - -/** - * Returns a psuedo-random number for my_cpu. The sequence of numbers returned - * is repeatable for a given starting seed. The sequence repeats after 2^64 - 1 - * numbers. Within that period, no number is repeated. - */ -testword_t random(int my_cpu); +static inline testword_t prsg(testword_t state) +{ + // This uses the algorithms described at https://en.wikipedia.org/wiki/Xorshift +#ifdef __x86_64__ + state ^= state << 13; + state ^= state >> 7; + state ^= state << 17; +#else + state ^= state << 13; + state ^= state >> 17; + state ^= state << 5; +#endif + return state; +} /** * Calculates the start and end word address for the chunk of segment that is diff --git a/tests/tests.c b/tests/tests.c index 1969204..3bc7e13 100644 --- a/tests/tests.c +++ b/tests/tests.c @@ -106,6 +106,8 @@ int run_test(int my_cpu, int test, int stage, int iterations) } BARRIER; + testword_t prsg_state; + int ticks = 0; switch (test) { @@ -168,12 +170,16 @@ int run_test(int my_cpu, int test, int stage, int iterations) // Moving inversions, fixed random pattern. case 5: if (cpuid_info.flags.rdtsc) { - random_seed(my_cpu, get_tsc()); + prsg_state = get_tsc(); } else { - random_seed(my_cpu, UINT64_C(0x12345678) * (1 + pass_num)); + prsg_state = 1 + pass_num; } + prsg_state *= 0x12345678; + for (int i = 0; i < iterations; i++) { - testword_t pattern1 = random(my_cpu); + prsg_state = prsg(prsg_state); + + testword_t pattern1 = prsg_state; testword_t pattern2 = ~pattern1; BARRIER; @@ -213,13 +219,17 @@ int run_test(int my_cpu, int test, int stage, int iterations) // Modulo 20 check, fixed random pattern. case 9: if (cpuid_info.flags.rdtsc) { - random_seed(my_cpu, get_tsc()); + prsg_state = get_tsc(); } else { - random_seed(my_cpu, UINT64_C(0x12345678) * (1 + pass_num)); + prsg_state = 1 + pass_num; } + prsg_state *= 0x87654321; + for (int i = 0; i < iterations; i++) { for (int offset = 0; offset < MODULO_N; offset++) { - testword_t pattern1 = random(my_cpu); + prsg_state = prsg(prsg_state); + + testword_t pattern1 = prsg_state; testword_t pattern2 = ~pattern1; BARRIER;