diff --git a/src/backend/utils/misc/timeout.c b/src/backend/utils/misc/timeout.c index b47e8ba2fb..88139ab82b 100644 --- a/src/backend/utils/misc/timeout.c +++ b/src/backend/utils/misc/timeout.c @@ -53,18 +53,29 @@ static timeout_params *volatile active_timeouts[MAX_TIMEOUTS]; /* * Flag controlling whether the signal handler is allowed to do anything. - * We leave this "false" when we're not expecting interrupts, just in case. + * This is useful to avoid race conditions with the handler. Note in + * particular that this lets us make changes in the data structures without + * tediously disabling and re-enabling the timer signal. Most of the time, + * no interrupt would happen anyway during such critical sections, but if + * one does, this rule ensures it's safe. Leaving the signal enabled across + * multiple operations can greatly reduce the number of kernel calls we make, + * too. See comments in schedule_alarm() about that. * - * Note that we don't bother to reset any pending timer interrupt when we - * disable the signal handler; it's not really worth the cycles to do so, - * since the probability of the interrupt actually occurring while we have - * it disabled is low. See comments in schedule_alarm() about that. + * We leave this "false" when we're not expecting interrupts, just in case. */ static volatile sig_atomic_t alarm_enabled = false; #define disable_alarm() (alarm_enabled = false) #define enable_alarm() (alarm_enabled = true) +/* + * State recording if and when we next expect the interrupt to fire. + * Note that the signal handler will unconditionally reset signal_pending to + * false, so that can change asynchronously even when alarm_enabled is false. + */ +static volatile sig_atomic_t signal_pending = false; +static TimestampTz signal_due_at = 0; /* valid only when signal_pending */ + /***************************************************************************** * Internal helper functions @@ -185,7 +196,11 @@ enable_timeout(TimeoutId id, TimestampTz now, TimestampTz fin_time) * Schedule alarm for the next active timeout, if any * * We assume the caller has obtained the current time, or a close-enough - * approximation. + * approximation. (It's okay if a tick or two has passed since "now", or + * if a little more time elapses before we reach the kernel call; that will + * cause us to ask for an interrupt a tick or two later than the nearest + * timeout, which is no big deal. Passing a "now" value that's in the future + * would be bad though.) */ static void schedule_alarm(TimestampTz now) @@ -193,21 +208,38 @@ schedule_alarm(TimestampTz now) if (num_active_timeouts > 0) { struct itimerval timeval; + TimestampTz nearest_timeout; long secs; int usecs; MemSet(&timeval, 0, sizeof(struct itimerval)); - /* Get the time remaining till the nearest pending timeout */ - TimestampDifference(now, active_timeouts[0]->fin_time, - &secs, &usecs); - /* - * It's possible that the difference is less than a microsecond; - * ensure we don't cancel, rather than set, the interrupt. + * Get the time remaining till the nearest pending timeout. If it is + * negative, assume that we somehow missed an interrupt, and force + * signal_pending off. This gives us a chance to recover if the + * kernel drops a timeout request for some reason. */ - if (secs == 0 && usecs == 0) + nearest_timeout = active_timeouts[0]->fin_time; + if (now > nearest_timeout) + { + signal_pending = false; + /* force an interrupt as soon as possible */ + secs = 0; usecs = 1; + } + else + { + TimestampDifference(now, nearest_timeout, + &secs, &usecs); + + /* + * It's possible that the difference is less than a microsecond; + * ensure we don't cancel, rather than set, the interrupt. + */ + if (secs == 0 && usecs == 0) + usecs = 1; + } timeval.it_value.tv_sec = secs; timeval.it_value.tv_usec = usecs; @@ -218,7 +250,7 @@ schedule_alarm(TimestampTz now) * interrupt could occur before we can set alarm_enabled, so that the * signal handler would fail to do anything. * - * Because we didn't bother to reset the timer in disable_alarm(), + * Because we didn't bother to disable the timer in disable_alarm(), * it's possible that a previously-set interrupt will fire between * enable_alarm() and setitimer(). This is safe, however. There are * two possible outcomes: @@ -244,9 +276,53 @@ schedule_alarm(TimestampTz now) */ enable_alarm(); + /* + * If there is already an interrupt pending that's at or before the + * needed time, we need not do anything more. The signal handler will + * do the right thing in the first case, and re-schedule the interrupt + * for later in the second case. It might seem that the extra + * interrupt is wasted work, but it's not terribly much work, and this + * method has very significant advantages in the common use-case where + * we repeatedly set a timeout that we don't expect to reach and then + * cancel it. Instead of invoking setitimer() every time the timeout + * is set or canceled, we perform one interrupt and a re-scheduling + * setitimer() call at intervals roughly equal to the timeout delay. + * For example, with statement_timeout = 1s and a throughput of + * thousands of queries per second, this method requires an interrupt + * and setitimer() call roughly once a second, rather than thousands + * of setitimer() calls per second. + * + * Because of the possible passage of time between when we obtained + * "now" and when we reach setitimer(), the kernel's opinion of when + * to trigger the interrupt is likely to be a bit later than + * signal_due_at. That's fine, for the same reasons described above. + */ + if (signal_pending && nearest_timeout >= signal_due_at) + return; + + /* + * As with calling enable_alarm(), we must set signal_pending *before* + * calling setitimer(); if we did it after, the signal handler could + * trigger before we set it, leaving us with a false opinion that a + * signal is still coming. + * + * Other race conditions involved with setting/checking signal_pending + * are okay, for the reasons described above. + */ + signal_due_at = nearest_timeout; + signal_pending = true; + /* Set the alarm timer */ if (setitimer(ITIMER_REAL, &timeval, NULL) != 0) + { + /* + * Clearing signal_pending here is a bit pro forma, but not + * entirely so, since something in the FATAL exit path could try + * to use timeout facilities. + */ + signal_pending = false; elog(FATAL, "could not enable SIGALRM timer: %m"); + } } } @@ -279,6 +355,12 @@ handle_sig_alarm(SIGNAL_ARGS) */ SetLatch(MyLatch); + /* + * Always reset signal_pending, even if !alarm_enabled, since indeed no + * signal is now pending. + */ + signal_pending = false; + /* * Fire any pending timeouts, but only if we're enabled to do so. */ @@ -591,7 +673,7 @@ disable_timeouts(const DisableTimeoutParams *timeouts, int count) } /* - * Disable SIGALRM and remove all timeouts from the active list, + * Disable the signal handler, remove all timeouts from the active list, * and optionally reset their timeout indicators. */ void @@ -602,18 +684,10 @@ disable_all_timeouts(bool keep_indicators) disable_alarm(); /* - * Only bother to reset the timer if we think it's active. We could just - * let the interrupt happen anyway, but it's probably a bit cheaper to do - * setitimer() than to let the useless interrupt happen. + * We used to disable the timer interrupt here, but in common usage + * patterns it's cheaper to leave it enabled; that may save us from having + * to enable it again shortly. See comments in schedule_alarm(). */ - if (num_active_timeouts > 0) - { - struct itimerval timeval; - - MemSet(&timeval, 0, sizeof(struct itimerval)); - if (setitimer(ITIMER_REAL, &timeval, NULL) != 0) - elog(FATAL, "could not disable SIGALRM timer: %m"); - } num_active_timeouts = 0;