Faster barrier implementation.
The old barrier implementation was very slow when running on a multi-socket machine (pcmemtest issue 16). The new implementation provides two options: - when blocked, spin on a thread-local flag - when blocked, execute a HLT instruction and wait for a NMI The first option might be faster, but we need to measure it to find out. A new boot command line option is provided to select between the two, with a third setting that uses a mixture of the two.
This commit is contained in:
parent
311a597766
commit
4078b7760e
15
app/config.c
15
app/config.c
|
@ -92,10 +92,11 @@ cpu_state_t cpu_state[MAX_CPUS];
|
|||
|
||||
bool enable_temperature = false;
|
||||
bool enable_trace = false;
|
||||
bool enable_halt = true;
|
||||
|
||||
bool pause_at_start = true;
|
||||
|
||||
power_save_t power_save = POWER_SAVE_HIGH;
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Private Functions
|
||||
//------------------------------------------------------------------------------
|
||||
|
@ -115,8 +116,14 @@ static void parse_option(const char *option, const char *params)
|
|||
}
|
||||
} else if (strncmp(option, "nopause", 8) == 0) {
|
||||
pause_at_start = false;
|
||||
} else if (strncmp(option, "nohalt", 7) == 0) {
|
||||
enable_halt = false;
|
||||
} else if (strncmp(option, "powersave", 10) == 0) {
|
||||
if (strncmp(params, "off", 4) == 0) {
|
||||
power_save = POWER_SAVE_OFF;
|
||||
} else if (strncmp(params, "low", 4) == 0) {
|
||||
power_save = POWER_SAVE_LOW;
|
||||
} else if (strncmp(params, "high", 5) == 0) {
|
||||
power_save = POWER_SAVE_HIGH;
|
||||
}
|
||||
} else if (strncmp(option, "smp", 4) == 0) {
|
||||
smp_enabled = true;
|
||||
} else if (strncmp(option, "trace", 6) == 0) {
|
||||
|
@ -653,6 +660,8 @@ void config_init(void)
|
|||
|
||||
enable_temperature = !no_temperature;
|
||||
|
||||
power_save = POWER_SAVE_HIGH;
|
||||
|
||||
const boot_params_t *boot_params = (boot_params_t *)boot_params_addr;
|
||||
|
||||
uintptr_t cmd_line_addr = boot_params->cmd_line_ptr;
|
||||
|
|
|
@ -28,6 +28,12 @@ typedef enum {
|
|||
ERROR_MODE_BADRAM
|
||||
} error_mode_t;
|
||||
|
||||
typedef enum {
|
||||
POWER_SAVE_OFF,
|
||||
POWER_SAVE_LOW,
|
||||
POWER_SAVE_HIGH
|
||||
} power_save_t;
|
||||
|
||||
extern uintptr_t pm_limit_lower;
|
||||
extern uintptr_t pm_limit_upper;
|
||||
|
||||
|
@ -41,10 +47,11 @@ extern cpu_state_t cpu_state[MAX_CPUS];
|
|||
|
||||
extern bool enable_temperature;
|
||||
extern bool enable_trace;
|
||||
extern bool enable_halt;
|
||||
|
||||
extern bool pause_at_start;
|
||||
|
||||
extern power_save_t power_save;
|
||||
|
||||
void config_init(void);
|
||||
|
||||
void config_menu(bool initial);
|
||||
|
|
|
@ -231,12 +231,21 @@ void scroll(void)
|
|||
|
||||
void do_tick(int my_cpu)
|
||||
{
|
||||
barrier_wait(run_barrier);
|
||||
bool use_spin_wait = (power_save < POWER_SAVE_HIGH);
|
||||
if (use_spin_wait) {
|
||||
barrier_spin_wait(run_barrier);
|
||||
} else {
|
||||
barrier_halt_wait(run_barrier);
|
||||
}
|
||||
if (master_cpu == my_cpu) {
|
||||
check_input();
|
||||
error_update();
|
||||
}
|
||||
barrier_wait(run_barrier);
|
||||
if (use_spin_wait) {
|
||||
barrier_spin_wait(run_barrier);
|
||||
} else {
|
||||
barrier_halt_wait(run_barrier);
|
||||
}
|
||||
|
||||
// Only the master CPU does the update.
|
||||
if (master_cpu != my_cpu) {
|
||||
|
|
75
app/main.c
75
app/main.c
|
@ -118,21 +118,42 @@ uintptr_t test_addr[MAX_CPUS];
|
|||
// Private Functions
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
#define BARRIER \
|
||||
#define SHORT_BARRIER \
|
||||
if (TRACE_BARRIERS) { \
|
||||
trace(my_cpu, "Start barrier wait at %s line %i", __FILE__, __LINE__); \
|
||||
} \
|
||||
barrier_wait(start_barrier);
|
||||
if (power_save < POWER_SAVE_HIGH) { \
|
||||
barrier_spin_wait(start_barrier); \
|
||||
} else { \
|
||||
barrier_halt_wait(start_barrier); \
|
||||
}
|
||||
|
||||
#define LONG_BARRIER \
|
||||
if (TRACE_BARRIERS) { \
|
||||
trace(my_cpu, "Start barrier wait at %s line %i", __FILE__, __LINE__); \
|
||||
} \
|
||||
if (power_save > POWER_SAVE_OFF) { \
|
||||
barrier_halt_wait(start_barrier); \
|
||||
} else { \
|
||||
barrier_spin_wait(start_barrier); \
|
||||
}
|
||||
|
||||
static void run_at(uintptr_t addr, int my_cpu)
|
||||
{
|
||||
uintptr_t *new_start_addr = (uintptr_t *)(addr + startup - _start);
|
||||
|
||||
|
||||
if (my_cpu == 0) {
|
||||
// Copy the program code and all data except the stacks.
|
||||
memcpy((void *)addr, &_start, _stacks - _start);
|
||||
memcpy((void *)addr, (void *)_start, _stacks - _start);
|
||||
// Copy the thread-local storage.
|
||||
size_t locals_offset = _stacks - _start + BSP_STACK_SIZE - LOCALS_SIZE;
|
||||
for (int cpu_num = 0; cpu_num < num_available_cpus; cpu_num++) {
|
||||
memcpy((void *)(addr + locals_offset), (void *)(_start + locals_offset), LOCALS_SIZE);
|
||||
locals_offset += AP_STACK_SIZE;
|
||||
}
|
||||
}
|
||||
BARRIER;
|
||||
LONG_BARRIER;
|
||||
|
||||
#ifndef __x86_64__
|
||||
// The 32-bit startup code needs to know where it is located.
|
||||
|
@ -317,7 +338,7 @@ static void test_all_windows(int my_cpu)
|
|||
display_active_cpu(my_cpu);
|
||||
}
|
||||
}
|
||||
barrier_init(run_barrier, num_active_cpus);
|
||||
barrier_reset(run_barrier, num_active_cpus);
|
||||
}
|
||||
|
||||
int iterations = test_list[test_num].iterations;
|
||||
|
@ -328,7 +349,7 @@ static void test_all_windows(int my_cpu)
|
|||
|
||||
// Loop through all possible windows.
|
||||
do {
|
||||
BARRIER;
|
||||
LONG_BARRIER;
|
||||
if (bail) {
|
||||
break;
|
||||
}
|
||||
|
@ -344,7 +365,7 @@ static void test_all_windows(int my_cpu)
|
|||
window_num = 1;
|
||||
}
|
||||
}
|
||||
BARRIER;
|
||||
SHORT_BARRIER;
|
||||
|
||||
// Relocate if necessary.
|
||||
if (window_num > 0) {
|
||||
|
@ -374,16 +395,9 @@ static void test_all_windows(int my_cpu)
|
|||
}
|
||||
setup_vm_map(window_start, window_end);
|
||||
}
|
||||
BARRIER;
|
||||
SHORT_BARRIER;
|
||||
|
||||
// There is a significant overhead in restarting halted CPU cores, so only enable
|
||||
// halting if the memory present in the window is a reasonable size.
|
||||
bool halt_if_inactive = enable_halt && num_enabled_cpus > num_active_cpus && num_mapped_pages > PAGE_C(16,MB);
|
||||
if (!i_am_active) {
|
||||
if (!dummy_run && halt_if_inactive) {
|
||||
cpu_state[my_cpu] = CPU_STATE_HALTED;
|
||||
__asm__ __volatile__ ("hlt");
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -408,29 +422,6 @@ static void test_all_windows(int my_cpu)
|
|||
}
|
||||
|
||||
if (i_am_master) {
|
||||
if (!dummy_run && halt_if_inactive) {
|
||||
int cpu_num = 0;
|
||||
int retries = 0;
|
||||
while (cpu_num < num_available_cpus) {
|
||||
if (cpu_num == my_cpu) {
|
||||
cpu_num++;
|
||||
continue;
|
||||
}
|
||||
if (cpu_state[cpu_num] == CPU_STATE_ENABLED) {
|
||||
// This catches a potential race between the inactive CPU halting and the master CPU waking
|
||||
// it up. This should be an unlikely event, so just spin until the inactive CPU catches up.
|
||||
usleep(10);
|
||||
if (++retries < 1000) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (cpu_state[cpu_num] == CPU_STATE_HALTED) {
|
||||
smp_send_nmi(cpu_num);
|
||||
}
|
||||
retries = 0;
|
||||
cpu_num++;
|
||||
}
|
||||
}
|
||||
window_num++;
|
||||
}
|
||||
} while (window_end < pm_map[pm_map_size - 1].end);
|
||||
|
@ -467,7 +458,7 @@ void main(void)
|
|||
set_scroll_lock(false);
|
||||
trace(0, "starting other CPUs");
|
||||
}
|
||||
barrier_init(start_barrier, num_enabled_cpus);
|
||||
barrier_reset(start_barrier, num_enabled_cpus);
|
||||
int failed = smp_start(cpu_state);
|
||||
if (failed) {
|
||||
const char *message = "Failed to start CPU core %i. Press any key to reboot...";
|
||||
|
@ -501,7 +492,7 @@ void main(void)
|
|||
// where we left off after each relocation.
|
||||
|
||||
while (1) {
|
||||
BARRIER;
|
||||
SHORT_BARRIER;
|
||||
if (my_cpu == 0) {
|
||||
if (start_run) {
|
||||
pass_num = 0;
|
||||
|
@ -542,11 +533,11 @@ void main(void)
|
|||
start_test = false;
|
||||
rerun_test = false;
|
||||
}
|
||||
BARRIER;
|
||||
SHORT_BARRIER;
|
||||
if (test_list[test_num].enabled) {
|
||||
test_all_windows(my_cpu);
|
||||
}
|
||||
BARRIER;
|
||||
SHORT_BARRIER;
|
||||
if (my_cpu != 0) {
|
||||
continue;
|
||||
}
|
||||
|
|
|
@ -24,6 +24,8 @@
|
|||
|
||||
#define STACKS_SIZE (BSP_STACK_SIZE + MAX_APS * AP_STACK_SIZE)
|
||||
|
||||
#define LOCALS_SIZE 16 /* Stack region reserved for thread-local storage */
|
||||
|
||||
#define LOW_LOAD_ADDR 0x00010000 /* The low load address for the main program */
|
||||
#define HIGH_LOAD_ADDR 0x00100000 /* The high load address for the main program */
|
||||
|
||||
|
|
|
@ -122,7 +122,7 @@ startup:
|
|||
call smp_my_cpu_num
|
||||
movl $AP_STACK_SIZE, %edx
|
||||
mul %edx
|
||||
addl $BSP_STACK_SIZE, %eax
|
||||
addl $(BSP_STACK_SIZE - LOCALS_SIZE), %eax
|
||||
leal _stacks@GOTOFF(%ebx), %esp
|
||||
addl %eax, %esp
|
||||
|
||||
|
|
|
@ -158,7 +158,7 @@ startup:
|
|||
call smp_my_cpu_num
|
||||
movl $AP_STACK_SIZE, %edx
|
||||
mul %edx
|
||||
addq $BSP_STACK_SIZE, %rax
|
||||
addq $(BSP_STACK_SIZE - LOCALS_SIZE), %rax
|
||||
leaq _stacks(%rip), %rsp
|
||||
addq %rax, %rsp
|
||||
|
||||
|
|
|
@ -8,6 +8,7 @@ INC_DIRS = -I../boot -I../system -I../lib -I../tests -I../app
|
|||
|
||||
SYS_OBJS = system/cpuid.o \
|
||||
system/cpuinfo.o \
|
||||
system/cpulocal.o \
|
||||
system/ehci.o \
|
||||
system/font.o \
|
||||
system/hwctrl.o \
|
||||
|
|
|
@ -8,6 +8,7 @@ INC_DIRS = -I../boot -I../system -I../lib -I../tests -I../app
|
|||
|
||||
SYS_OBJS = system/cpuid.o \
|
||||
system/cpuinfo.o \
|
||||
system/cpulocal.o \
|
||||
system/ehci.o \
|
||||
system/font.o \
|
||||
system/hwctrl.o \
|
||||
|
|
|
@ -0,0 +1,25 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
#ifndef ASSERT_H
|
||||
#define ASSERT_H
|
||||
/**
|
||||
* \file
|
||||
*
|
||||
* Provides a function to terminate the program if an unexpected and fatal
|
||||
* error is detected.
|
||||
*
|
||||
*//*
|
||||
* Copyright (C) 2022 Martin Whitaker.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Terminates the program (using a breakpoint exception) if expr is equal
|
||||
* to zero.
|
||||
*/
|
||||
static inline void assert(int expr)
|
||||
{
|
||||
if (!expr) {
|
||||
__asm__ __volatile__ ("int $3");
|
||||
}
|
||||
}
|
||||
|
||||
#endif // ASSERT_H
|
|
@ -1,19 +1,14 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
// Copyright (C) 2020 Martin Whitaker.
|
||||
//
|
||||
// Derived from an extract of memtest86+ smp.c:
|
||||
//
|
||||
// MemTest86+ V5 Specific code (GPL V2.0)
|
||||
// By Samuel DEMEULEMEESTER, sdemeule@memtest.org
|
||||
// http://www.canardpc.com - http://www.memtest.org
|
||||
// ------------------------------------------------
|
||||
// smp.c - MemTest-86 Version 3.5
|
||||
//
|
||||
// Released under version 2 of the Gnu Public License.
|
||||
// By Chris Brady
|
||||
// Copyright (C) 2020-2022 Martin Whitaker.
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stddef.h>
|
||||
|
||||
#include "cpulocal.h"
|
||||
#include "smp.h"
|
||||
|
||||
#include "assert.h"
|
||||
|
||||
#include "barrier.h"
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
@ -22,34 +17,67 @@
|
|||
|
||||
void barrier_init(barrier_t *barrier, int num_threads)
|
||||
{
|
||||
barrier->num_threads = num_threads;
|
||||
barrier->count = num_threads;
|
||||
spin_unlock(&barrier->lock);
|
||||
spin_unlock(&barrier->st1);
|
||||
spin_unlock(&barrier->st2);
|
||||
spin_lock(&barrier->st2);
|
||||
barrier->flag_num = allocate_local_flag();
|
||||
assert(barrier->flag_num >= 0);
|
||||
|
||||
barrier_reset(barrier, num_threads);
|
||||
}
|
||||
|
||||
void barrier_wait(barrier_t *barrier)
|
||||
void barrier_reset(barrier_t *barrier, int num_threads)
|
||||
{
|
||||
barrier->num_threads = num_threads;
|
||||
barrier->count = num_threads;
|
||||
|
||||
local_flag_t *waiting_flags = local_flags(barrier->flag_num);
|
||||
for (int cpu_num = 0; cpu_num < num_available_cpus; cpu_num++) {
|
||||
waiting_flags[cpu_num].flag = false;
|
||||
}
|
||||
}
|
||||
|
||||
void barrier_spin_wait(barrier_t *barrier)
|
||||
{
|
||||
if (barrier == NULL || barrier->num_threads < 2) {
|
||||
return;
|
||||
}
|
||||
spin_wait(&barrier->st1); // Wait if the barrier is active.
|
||||
spin_lock(&barrier->lock); // Get lock for barrier struct.
|
||||
if (--barrier->count == 0) { // Last process?
|
||||
spin_lock(&barrier->st1); // Hold up any processes re-entering.
|
||||
spin_unlock(&barrier->st2); // Release the other processes.
|
||||
barrier->count++;
|
||||
spin_unlock(&barrier->lock);
|
||||
} else {
|
||||
spin_unlock(&barrier->lock);
|
||||
spin_wait(&barrier->st2); // Wait for peers to arrive.
|
||||
spin_lock(&barrier->lock);
|
||||
if (++barrier->count == barrier->num_threads) {
|
||||
spin_unlock(&barrier->st1);
|
||||
spin_lock(&barrier->st2);
|
||||
local_flag_t *waiting_flags = local_flags(barrier->flag_num);
|
||||
int my_cpu = smp_my_cpu_num();
|
||||
waiting_flags[my_cpu].flag = true;
|
||||
if (__sync_fetch_and_sub(&barrier->count, 1) > 1) {
|
||||
volatile bool *i_am_blocked = &waiting_flags[my_cpu].flag;
|
||||
while (*i_am_blocked) {
|
||||
__builtin_ia32_pause();
|
||||
}
|
||||
return;
|
||||
}
|
||||
// Last one here, so reset the barrier and wake the others. No need to
|
||||
// check if a CPU core is actually waiting - just clear all the flags.
|
||||
barrier->count = barrier->num_threads;
|
||||
__sync_synchronize();
|
||||
for (int cpu_num = 0; cpu_num < num_available_cpus; cpu_num++) {
|
||||
waiting_flags[cpu_num].flag = false;
|
||||
}
|
||||
}
|
||||
|
||||
void barrier_halt_wait(barrier_t *barrier)
|
||||
{
|
||||
if (barrier == NULL || barrier->num_threads < 2) {
|
||||
return;
|
||||
}
|
||||
local_flag_t *waiting_flags = local_flags(barrier->flag_num);
|
||||
int my_cpu = smp_my_cpu_num();
|
||||
waiting_flags[my_cpu].flag = true;
|
||||
if (__sync_fetch_and_sub(&barrier->count, 1) > 1) {
|
||||
__asm__ __volatile__ ("hlt");
|
||||
return;
|
||||
}
|
||||
// Last one here, so reset the barrier and wake the others.
|
||||
barrier->count = barrier->num_threads;
|
||||
__sync_synchronize();
|
||||
waiting_flags[my_cpu].flag = false;
|
||||
for (int cpu_num = 0; cpu_num < num_available_cpus; cpu_num++) {
|
||||
if (waiting_flags[cpu_num].flag) {
|
||||
waiting_flags[cpu_num].flag = false;
|
||||
smp_send_nmi(cpu_num);
|
||||
}
|
||||
spin_unlock(&barrier->lock);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -10,6 +10,8 @@
|
|||
* Copyright (C) 2020-2022 Martin Whitaker.
|
||||
*/
|
||||
|
||||
#include "cpulocal.h"
|
||||
|
||||
#include "spinlock.h"
|
||||
|
||||
/**
|
||||
|
@ -17,21 +19,31 @@
|
|||
*/
|
||||
typedef struct
|
||||
{
|
||||
int num_threads;
|
||||
volatile int count;
|
||||
spinlock_t lock;
|
||||
spinlock_t st1;
|
||||
spinlock_t st2;
|
||||
int flag_num;
|
||||
int num_threads;
|
||||
int count;
|
||||
} barrier_t;
|
||||
|
||||
/**
|
||||
* Initialises the barrier to block the specified number of threads.
|
||||
* Initialises a new barrier to block the specified number of threads.
|
||||
*/
|
||||
void barrier_init(barrier_t *barrier, int num_threads);
|
||||
|
||||
/**
|
||||
* Waits for all threads to arrive at the barrier.
|
||||
* Resets an existing barrier to block the specified number of threads.
|
||||
*/
|
||||
void barrier_wait(barrier_t *barrier);
|
||||
void barrier_reset(barrier_t *barrier, int num_threads);
|
||||
|
||||
/**
|
||||
* Waits for all threads to arrive at the barrier. A CPU core spins in an
|
||||
* idle loop when waiting.
|
||||
*/
|
||||
void barrier_spin_wait(barrier_t *barrier);
|
||||
|
||||
/**
|
||||
* Waits for all threads to arrive at the barrier. A CPU core halts when
|
||||
* waiting.
|
||||
*/
|
||||
void barrier_halt_wait(barrier_t *barrier);
|
||||
|
||||
#endif // BARRIER_H
|
||||
|
|
|
@ -0,0 +1,26 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
// Copyright (C) 2022 Martin Whitaker.
|
||||
|
||||
#include <stdbool.h>
|
||||
|
||||
#include "boot.h"
|
||||
|
||||
#include "cpulocal.h"
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Variables
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
int local_bytes_used = 0;
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Public Functions
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
int allocate_local_flag(void)
|
||||
{
|
||||
if (local_bytes_used == LOCALS_SIZE) {
|
||||
return -1;
|
||||
}
|
||||
return local_bytes_used += sizeof(bool);
|
||||
}
|
|
@ -0,0 +1,46 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
#ifndef CPULOCAL_H
|
||||
#define CPULOCAL_H
|
||||
/**
|
||||
* \file
|
||||
*
|
||||
* Provides functions to allocate and access thread-local flags.
|
||||
*
|
||||
*//*
|
||||
* Copyright (C) 2022 Martin Whitaker.
|
||||
*/
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "boot.h"
|
||||
|
||||
/**
|
||||
* A single thread-local flag. These are spaced out in memory to ensure each
|
||||
* flag occupies a different cache line.
|
||||
*/
|
||||
typedef struct __attribute__((packed)) {
|
||||
bool flag;
|
||||
uint8_t spacing[AP_STACK_SIZE - sizeof(bool)];
|
||||
} local_flag_t;
|
||||
|
||||
/**
|
||||
* Allocates an array of thread-local flags, one per CPU core, and returns
|
||||
* a ID number that identifies the allocated array. Returns -1 if there is
|
||||
* insufficient thread local storage remaining to allocate a new array of
|
||||
* flags.
|
||||
*/
|
||||
int allocate_local_flag(void);
|
||||
|
||||
/**
|
||||
* Returns a pointer to the previously allocated array of thread-local flags
|
||||
* identified by flag_num.
|
||||
*/
|
||||
static inline local_flag_t *local_flags(int flag_num)
|
||||
{
|
||||
// The number returned by allocate_local_flag is the byte offset of the
|
||||
// flag from the start of the thread-local storage.
|
||||
return (local_flag_t *)(_stacks + BSP_STACK_SIZE - LOCALS_SIZE + flag_num);
|
||||
}
|
||||
|
||||
#endif // CPULOCAL_H
|
|
@ -1,5 +1,5 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
// Copyright (C) 2020 Martin Whitaker.
|
||||
// Copyright (C) 2020-2022 Martin Whitaker.
|
||||
//
|
||||
// Derived from memtest86+ reloc.c:
|
||||
//
|
||||
|
@ -11,6 +11,8 @@
|
|||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "assert.h"
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Constants
|
||||
//------------------------------------------------------------------------------
|
||||
|
@ -61,13 +63,6 @@ typedef struct
|
|||
|
||||
#define ELF32_R_TYPE(r_info) ((r_info) & 0xff)
|
||||
|
||||
static inline void assert(int expr)
|
||||
{
|
||||
if (!expr) {
|
||||
__asm__ __volatile__ ("int $3");
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Return the run-time load address of the shared object. This must be inlined
|
||||
* in a function which uses global data.
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
// Copyright (C) 2020 Martin Whitaker.
|
||||
// Copyright (C) 2020-2022 Martin Whitaker.
|
||||
//
|
||||
// Derived from memtest86+ reloc.c:
|
||||
//
|
||||
|
@ -11,6 +11,8 @@
|
|||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "assert.h"
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Constants
|
||||
//------------------------------------------------------------------------------
|
||||
|
@ -62,13 +64,6 @@ typedef struct
|
|||
|
||||
#define ELF64_R_TYPE(r_info) ((r_info) & 0xffffffff)
|
||||
|
||||
static inline void assert(int expr)
|
||||
{
|
||||
if (!expr) {
|
||||
__asm__ __volatile__ ("int $3");
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Return the run-time load address of the shared object.
|
||||
*/
|
||||
|
|
26
system/smp.c
26
system/smp.c
|
@ -63,6 +63,10 @@
|
|||
#define APIC_DELMODE_STARTUP 6
|
||||
#define APIC_DELMODE_EXTINT 7
|
||||
|
||||
// APIC ICR busy flag
|
||||
|
||||
#define APIC_ICR_BUSY (1 << 12)
|
||||
|
||||
// IA32_APIC_BASE MSR bits
|
||||
|
||||
#define IA32_APIC_ENABLED (1 << 11)
|
||||
|
@ -614,18 +618,23 @@ static bool find_cpus_in_rsdp(void)
|
|||
return false;
|
||||
}
|
||||
|
||||
static bool send_ipi(int apic_id, int trigger, int level, int mode, uint8_t vector, int delay_before_poll)
|
||||
static inline void send_ipi(int apic_id, int trigger, int level, int mode, uint8_t vector)
|
||||
{
|
||||
apic_write(APIC_REG_ICRHI, apic_id << 24);
|
||||
|
||||
apic_write(APIC_REG_ICRLO, trigger << 15 | level << 14 | mode << 8 | vector);
|
||||
}
|
||||
|
||||
static bool send_ipi_and_wait(int apic_id, int trigger, int level, int mode, uint8_t vector, int delay_before_poll)
|
||||
{
|
||||
send_ipi(apic_id, trigger, level, mode, vector);
|
||||
|
||||
usleep(delay_before_poll);
|
||||
|
||||
// Wait for send complete or timeout after 100ms.
|
||||
int timeout = 1000;
|
||||
while (timeout > 0) {
|
||||
bool send_pending = (apic_read(APIC_REG_ICRLO) & 0x00001000);
|
||||
bool send_pending = (apic_read(APIC_REG_ICRLO) & APIC_ICR_BUSY);
|
||||
if (!send_pending) {
|
||||
return true;
|
||||
}
|
||||
|
@ -663,13 +672,13 @@ static bool start_cpu(int cpu_num)
|
|||
(void)read_apic_esr(is_p5);
|
||||
|
||||
// Pulse the INIT IPI.
|
||||
if (!send_ipi(apic_id, APIC_TRIGGER_LEVEL, 1, APIC_DELMODE_INIT, 0, 0)) {
|
||||
if (!send_ipi_and_wait(apic_id, APIC_TRIGGER_LEVEL, 1, APIC_DELMODE_INIT, 0, 0)) {
|
||||
return false;
|
||||
}
|
||||
if (use_long_delays) {
|
||||
usleep(10*1000); // 10ms
|
||||
}
|
||||
if (!send_ipi(apic_id, APIC_TRIGGER_LEVEL, 0, APIC_DELMODE_INIT, 0, 0)) {
|
||||
if (!send_ipi_and_wait(apic_id, APIC_TRIGGER_LEVEL, 0, APIC_DELMODE_INIT, 0, 0)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -679,7 +688,7 @@ static bool start_cpu(int cpu_num)
|
|||
(void)read_apic_esr(is_p5);
|
||||
|
||||
// Send the STARTUP IPI.
|
||||
if (!send_ipi(apic_id, 0, 0, APIC_DELMODE_STARTUP, AP_TRAMPOLINE_PAGE, use_long_delays ? 300 : 10)) {
|
||||
if (!send_ipi_and_wait(apic_id, 0, 0, APIC_DELMODE_STARTUP, AP_TRAMPOLINE_PAGE, use_long_delays ? 300 : 10)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -785,9 +794,12 @@ int smp_start(cpu_state_t cpu_state[MAX_CPUS])
|
|||
#endif
|
||||
}
|
||||
|
||||
bool smp_send_nmi(int cpu_num)
|
||||
void smp_send_nmi(int cpu_num)
|
||||
{
|
||||
return send_ipi(cpu_num_to_apic_id[cpu_num], 0, 0, APIC_DELMODE_NMI, 0, 200);
|
||||
while (apic_read(APIC_REG_ICRLO) & APIC_ICR_BUSY) {
|
||||
__builtin_ia32_pause();
|
||||
}
|
||||
send_ipi(cpu_num_to_apic_id[cpu_num], 0, 0, APIC_DELMODE_NMI, 0);
|
||||
}
|
||||
|
||||
int smp_my_cpu_num(void)
|
||||
|
|
|
@ -29,8 +29,7 @@
|
|||
typedef enum __attribute__ ((packed)) {
|
||||
CPU_STATE_DISABLED = 0,
|
||||
CPU_STATE_ENABLED = 1,
|
||||
CPU_STATE_RUNNING = 2,
|
||||
CPU_STATE_HALTED = 3
|
||||
CPU_STATE_RUNNING = 2
|
||||
} cpu_state_t;
|
||||
|
||||
/**
|
||||
|
@ -63,7 +62,7 @@ int smp_start(cpu_state_t cpu_state[MAX_CPUS]);
|
|||
* Sends a non-maskable interrupt to the CPU core whose ordinal number
|
||||
* is cpu_num.
|
||||
*/
|
||||
bool smp_send_nmi(int cpu_num);
|
||||
void smp_send_nmi(int cpu_num);
|
||||
|
||||
/**
|
||||
* Returns the ordinal number of the calling CPU core.
|
||||
|
|
|
@ -117,10 +117,19 @@ void calculate_chunk(testword_t **start, testword_t **end, int my_cpu, int segme
|
|||
void flush_caches(int my_cpu)
|
||||
{
|
||||
if (my_cpu >= 0) {
|
||||
barrier_wait(run_barrier);
|
||||
bool use_spin_wait = (power_save < POWER_SAVE_HIGH);
|
||||
if (use_spin_wait) {
|
||||
barrier_spin_wait(run_barrier);
|
||||
} else {
|
||||
barrier_halt_wait(run_barrier);
|
||||
}
|
||||
if (my_cpu == master_cpu) {
|
||||
cache_flush();
|
||||
}
|
||||
barrier_wait(run_barrier);
|
||||
if (use_spin_wait) {
|
||||
barrier_spin_wait(run_barrier);
|
||||
} else {
|
||||
barrier_halt_wait(run_barrier);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -79,7 +79,11 @@ int ticks_per_test[NUM_PASS_TYPES][NUM_TEST_PATTERNS];
|
|||
if (TRACE_BARRIERS) { \
|
||||
trace(my_cpu, "Run barrier wait at %s line %i", __FILE__, __LINE__); \
|
||||
} \
|
||||
barrier_wait(run_barrier); \
|
||||
if (power_save < POWER_SAVE_HIGH) { \
|
||||
barrier_spin_wait(run_barrier); \
|
||||
} else { \
|
||||
barrier_halt_wait(run_barrier); \
|
||||
} \
|
||||
}
|
||||
|
||||
int run_test(int my_cpu, int test, int stage, int iterations)
|
||||
|
|
Loading…
Reference in New Issue