Improve efficiency of random number generation (discussion #8).

Use a more efficient algorithm that can be in-lined, and keep the
generator state in a local variable.
This commit is contained in:
Martin Whitaker 2022-03-05 20:04:32 +00:00
parent 5e2ab9289b
commit e92f488753
4 changed files with 41 additions and 82 deletions

View File

@ -34,19 +34,20 @@ int test_mov_inv_random(int my_cpu)
{
int ticks = 0;
uint64_t seed;
testword_t seed;
if (cpuid_info.flags.rdtsc) {
seed = get_tsc();
} else {
seed = UINT64_C(0x12345678) * (1 + pass_num);
seed = 1 + pass_num;
}
seed *= 0x87654321;
if (my_cpu == master_cpu) {
display_test_pattern_value(seed);
}
// Initialize memory with the initial pattern.
random_seed(my_cpu, seed);
testword_t prsg_state = seed;
for (int i = 0; i < vm_map_size; i++) {
testword_t *start, *end;
calculate_chunk(&start, &end, my_cpu, i, sizeof(testword_t));
@ -69,7 +70,8 @@ int test_mov_inv_random(int my_cpu)
}
test_addr[my_cpu] = (uintptr_t)p;
do {
write_word(p, random(my_cpu));
prsg_state = prsg(prsg_state);
write_word(p, prsg_state);
} while (p++ < pe); // test before increment in case pointer overflows
do_tick(my_cpu);
BAILOUT;
@ -82,7 +84,7 @@ int test_mov_inv_random(int my_cpu)
for (int i = 0; i < 2; i++) {
flush_caches(my_cpu);
random_seed(my_cpu, seed);
prsg_state = seed;
for (int j = 0; j < vm_map_size; j++) {
testword_t *start, *end;
calculate_chunk(&start, &end, my_cpu, j, sizeof(testword_t));
@ -105,7 +107,8 @@ int test_mov_inv_random(int my_cpu)
}
test_addr[my_cpu] = (uintptr_t)p;
do {
testword_t expect = random(my_cpu) ^ invert;
prsg_state = prsg(prsg_state);
testword_t expect = prsg_state ^ invert;
testword_t actual = read_word(p);
if (unlikely(actual != expect)) {
data_error(p, expect, actual, true);

View File

@ -25,71 +25,10 @@
#include "test_helper.h"
//------------------------------------------------------------------------------
// Types
//------------------------------------------------------------------------------
// We keep a separate LFSR for each CPU. Space them out by at least a cache line,
// otherwise performance suffers.
typedef struct {
uint64_t lfsr;
uint64_t pad[7];
} prsg_state_t;
//------------------------------------------------------------------------------
// Private Variables
//------------------------------------------------------------------------------
static prsg_state_t prsg_state[MAX_CPUS];
//------------------------------------------------------------------------------
// Private Functions
//------------------------------------------------------------------------------
static inline uint32_t prsg(int my_cpu)
{
// This implements a 64 bit linear feedback shift register with XNOR
// feedback from taps 64, 63, 61, 60. It generates 32 new bits each
// time the function is called. Because the feedback taps are all in
// the upper 32 bits, we can generate the new bits in parallel.
uint64_t lfsr = prsg_state[my_cpu].lfsr;
uint32_t feedback = ~((lfsr >> 32) ^ (lfsr >> 31) ^ (lfsr >> 29) ^ (lfsr >> 28));
prsg_state[my_cpu].lfsr = (lfsr << 32) | feedback;
return feedback;
}
//------------------------------------------------------------------------------
// Public Functions
//------------------------------------------------------------------------------
void random_seed(int my_cpu, uint64_t seed)
{
if (my_cpu < 0) {
return;
}
// Avoid the PRSG illegal state.
if (~seed == 0) {
seed = 0;
}
prsg_state[my_cpu].lfsr = seed;
}
testword_t random(int my_cpu)
{
if (my_cpu < 0) {
return 0;
}
testword_t value = prsg(my_cpu);
#if TESTWORD_WIDTH > 32
value = value << 32 | prsg(my_cpu);
#endif
return value;
}
void calculate_chunk(testword_t **start, testword_t **end, int my_cpu, int segment, size_t chunk_align)
{
if (my_cpu < 0) {

View File

@ -63,16 +63,23 @@ static inline uintptr_t round_up(uintptr_t value, size_t align_size)
}
/**
* Seeds the psuedo-random number generator for my_cpu.
* Returns the next word in a pseudo-random sequence where state was the
* previous word in that sequence.
*/
void random_seed(int my_cpu, uint64_t seed);
/**
* Returns a psuedo-random number for my_cpu. The sequence of numbers returned
* is repeatable for a given starting seed. The sequence repeats after 2^64 - 1
* numbers. Within that period, no number is repeated.
*/
testword_t random(int my_cpu);
static inline testword_t prsg(testword_t state)
{
// This uses the algorithms described at https://en.wikipedia.org/wiki/Xorshift
#ifdef __x86_64__
state ^= state << 13;
state ^= state >> 7;
state ^= state << 17;
#else
state ^= state << 13;
state ^= state >> 17;
state ^= state << 5;
#endif
return state;
}
/**
* Calculates the start and end word address for the chunk of segment that is

View File

@ -106,6 +106,8 @@ int run_test(int my_cpu, int test, int stage, int iterations)
}
BARRIER;
testword_t prsg_state;
int ticks = 0;
switch (test) {
@ -168,12 +170,16 @@ int run_test(int my_cpu, int test, int stage, int iterations)
// Moving inversions, fixed random pattern.
case 5:
if (cpuid_info.flags.rdtsc) {
random_seed(my_cpu, get_tsc());
prsg_state = get_tsc();
} else {
random_seed(my_cpu, UINT64_C(0x12345678) * (1 + pass_num));
prsg_state = 1 + pass_num;
}
prsg_state *= 0x12345678;
for (int i = 0; i < iterations; i++) {
testword_t pattern1 = random(my_cpu);
prsg_state = prsg(prsg_state);
testword_t pattern1 = prsg_state;
testword_t pattern2 = ~pattern1;
BARRIER;
@ -213,13 +219,17 @@ int run_test(int my_cpu, int test, int stage, int iterations)
// Modulo 20 check, fixed random pattern.
case 9:
if (cpuid_info.flags.rdtsc) {
random_seed(my_cpu, get_tsc());
prsg_state = get_tsc();
} else {
random_seed(my_cpu, UINT64_C(0x12345678) * (1 + pass_num));
prsg_state = 1 + pass_num;
}
prsg_state *= 0x87654321;
for (int i = 0; i < iterations; i++) {
for (int offset = 0; offset < MODULO_N; offset++) {
testword_t pattern1 = random(my_cpu);
prsg_state = prsg(prsg_state);
testword_t pattern1 = prsg_state;
testword_t pattern2 = ~pattern1;
BARRIER;