esp32: Fix hang in taskYIELD() on riscv CPUs when IRQs disabled.

Regression introduced in 337742f.

The hang occurs because the esp32 port was calling "from ISR" port-layer
functions to set/clear the interrupt mask. FreeRTOS kernel therefore
doesn't know the CPU is in a critical section. In taskYIELD() the riscv
port layer blocks after yielding until it knows the yield has happened, and
would block indefinitely if IRQs are disabled (until INT WDT triggers).

Moving to the "public" portENTER_CRITICAL/portEXIT_CRITICAL API means that
FreeRTOS knows we're in a critical section and can react accordingly.

Adds a regression test for this case (should be safe to run on all ports).

On single core CPUs, this should result in almost exactly the same
behaviour apart from fixing this case.

On dual core CPUs, we now have cross-CPU mutual exclusion for atomic
sections. This also shouldn't change anything, mostly because all the code
which enters an atomic section runs on the same CPU. If it does change
something, it will be to fix a thread safety bug.

There is some risk that this change triggers a FreeRTOS crash where there
is a call to a blocking FreeRTOS API with interrupts disabled. Previously
this code might have worked, but was probably thread unsafe and would have
hung in some circumstances.

This work was funded through GitHub Sponsors.

Signed-off-by: Angus Gratton <angus@redyak.com.au>
This commit is contained in:
Angus Gratton 2024-09-25 18:18:22 +10:00
parent 197becbdcc
commit 05ac69329d
4 changed files with 48 additions and 6 deletions

View File

@ -57,6 +57,8 @@ TaskHandle_t mp_main_task_handle;
static uint8_t stdin_ringbuf_array[260]; static uint8_t stdin_ringbuf_array[260];
ringbuf_t stdin_ringbuf = {stdin_ringbuf_array, sizeof(stdin_ringbuf_array), 0, 0}; ringbuf_t stdin_ringbuf = {stdin_ringbuf_array, sizeof(stdin_ringbuf_array), 0, 0};
portMUX_TYPE mp_atomic_mux = portMUX_INITIALIZER_UNLOCKED;
// Check the ESP-IDF error code and raise an OSError if it's not ESP_OK. // Check the ESP-IDF error code and raise an OSError if it's not ESP_OK.
#if MICROPY_ERROR_REPORTING <= MICROPY_ERROR_REPORTING_NORMAL #if MICROPY_ERROR_REPORTING <= MICROPY_ERROR_REPORTING_NORMAL
void check_esp_err_(esp_err_t code) void check_esp_err_(esp_err_t code)

View File

@ -54,6 +54,8 @@ extern TaskHandle_t mp_main_task_handle;
extern ringbuf_t stdin_ringbuf; extern ringbuf_t stdin_ringbuf;
extern portMUX_TYPE mp_atomic_mux;
// Check the ESP-IDF error code and raise an OSError if it's not ESP_OK. // Check the ESP-IDF error code and raise an OSError if it's not ESP_OK.
#if MICROPY_ERROR_REPORTING <= MICROPY_ERROR_REPORTING_NORMAL #if MICROPY_ERROR_REPORTING <= MICROPY_ERROR_REPORTING_NORMAL
#define check_esp_err(code) check_esp_err_(code) #define check_esp_err(code) check_esp_err_(code)
@ -63,12 +65,21 @@ void check_esp_err_(esp_err_t code);
void check_esp_err_(esp_err_t code, const char *func, const int line, const char *file); void check_esp_err_(esp_err_t code, const char *func, const int line, const char *file);
#endif #endif
// Note: these "critical nested" macros do not ensure cross-CPU exclusion, static inline mp_uint_t mp_begin_atomic_section(void) {
// the only disable interrupts on the current CPU. To full manage exclusion portENTER_CRITICAL(&mp_atomic_mux);
// one should use portENTER_CRITICAL/portEXIT_CRITICAL instead. return 0;
#include "freertos/FreeRTOS.h" }
#define MICROPY_BEGIN_ATOMIC_SECTION() portSET_INTERRUPT_MASK_FROM_ISR()
#define MICROPY_END_ATOMIC_SECTION(state) portCLEAR_INTERRUPT_MASK_FROM_ISR(state) static inline void mp_end_atomic_section(mp_uint_t state) {
(void)state;
portEXIT_CRITICAL(&mp_atomic_mux);
}
// Note: These atomic macros disable interrupts on the calling CPU, and on SMP
// systems also protect against concurrent access to an atomic section on the
// other CPU.
#define MICROPY_BEGIN_ATOMIC_SECTION() mp_begin_atomic_section()
#define MICROPY_END_ATOMIC_SECTION(state) mp_end_atomic_section(state)
uint32_t mp_hal_ticks_us(void); uint32_t mp_hal_ticks_us(void);
__attribute__((always_inline)) static inline uint32_t mp_hal_ticks_cpu(void) { __attribute__((always_inline)) static inline uint32_t mp_hal_ticks_cpu(void) {

View File

@ -0,0 +1,26 @@
# Test executing Python code while IRQs disabled.
try:
from machine import disable_irq, enable_irq
from time import ticks_us
except ImportError:
print("SKIP")
raise SystemExit
# Structured to also nest disable_irq() calls
def f(n):
st_irq = disable_irq()
if n:
f(n - 1)
else:
# busy-wait in a tight loop for 1ms, to simulate doing some work in a critical section
# (can't wait for time_us() to increment as not all ports will "tick" with interrupts off.)
for _ in range(100):
ticks_us()
enable_irq(st_irq)
for nest in range(3):
print(nest)
for _ in range(5):
f(nest)

View File

@ -0,0 +1,3 @@
0
1
2