Fix recovery conflict SIGUSR1 handling.

We shouldn't be doing non-trivial work in signal handlers in general,
and in this case the handler could reach unsafe code and corrupt state.
It also clobbered its own "reason" code.

Move all recovery conflict decision logic into the next
CHECK_FOR_INTERRUPTS(), and have the signal handler just set flags and
the latch, following the standard pattern.  Since there are several
different "reasons", use a separate flag for each.

With this refactoring, the recovery conflict system no longer
piggy-backs on top of the regular query cancelation mechanism, but
instead raises an error directly if it decides that is necessary.  It
still needs to respect QueryCancelHoldoffCount, because otherwise the
FEBE protocol might get out of sync (see commit 2b3a8b20c2d).

This fixes one class of intermittent failure in the new
031_recovery_conflict.pl test added by commit 9f8a050f, though the buggy
coding is much older.  Failures outside contrived testing seem to be
very rare (or perhaps incorrectly attributed) in the field, based on
lack of reports.

No back-patch for now due to complexity and release schedule.  We have
the option to back-patch into 16 later, as 16 has prerequisite commit
bea3d7e.

Reviewed-by: Andres Freund <andres@anarazel.de> (earlier version)
Reviewed-by: Michael Paquier <michael@paquier.xyz> (earlier version)
Reviewed-by: Robert Haas <robertmhaas@gmail.com> (earlier version)
Tested-by: Christoph Berg <myon@debian.org>
Discussion: https://postgr.es/m/CA%2BhUKGK3PGKwcKqzoosamn36YW-fsuTdOPPF1i_rtEO%3DnEYKSg%40mail.gmail.com
Discussion: https://postgr.es/m/CALj2ACVr8au2J_9D88UfRCi0JdWhyQDDxAcSVav0B0irx9nXEg%40mail.gmail.com
This commit is contained in:
Thomas Munro 2023-09-07 12:38:23 +12:00
parent 8c16ad3b43
commit 0da096d78e
5 changed files with 193 additions and 177 deletions

View File

@ -4923,8 +4923,8 @@ LockBufferForCleanup(Buffer buffer)
} }
/* /*
* Check called from RecoveryConflictInterrupt handler when Startup * Check called from ProcessRecoveryConflictInterrupts() when Startup process
* process requests cancellation of all pin holders that are blocking it. * requests cancellation of all pin holders that are blocking it.
*/ */
bool bool
HoldingBufferPinThatDelaysRecovery(void) HoldingBufferPinThatDelaysRecovery(void)

View File

@ -662,25 +662,25 @@ procsignal_sigusr1_handler(SIGNAL_ARGS)
HandleParallelApplyMessageInterrupt(); HandleParallelApplyMessageInterrupt();
if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_DATABASE)) if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_DATABASE))
RecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_DATABASE); HandleRecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_DATABASE);
if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_TABLESPACE)) if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_TABLESPACE))
RecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_TABLESPACE); HandleRecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_TABLESPACE);
if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_LOCK)) if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_LOCK))
RecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_LOCK); HandleRecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_LOCK);
if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_SNAPSHOT)) if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_SNAPSHOT))
RecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_SNAPSHOT); HandleRecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_SNAPSHOT);
if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_LOGICALSLOT)) if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_LOGICALSLOT))
RecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_LOGICALSLOT); HandleRecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_LOGICALSLOT);
if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK)) if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK))
RecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK); HandleRecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK);
if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN)) if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN))
RecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN); HandleRecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN);
SetLatch(MyLatch); SetLatch(MyLatch);

View File

@ -161,9 +161,8 @@ static bool EchoQuery = false; /* -E switch */
static bool UseSemiNewlineNewline = false; /* -j switch */ static bool UseSemiNewlineNewline = false; /* -j switch */
/* whether or not, and why, we were canceled by conflict with recovery */ /* whether or not, and why, we were canceled by conflict with recovery */
static bool RecoveryConflictPending = false; static volatile sig_atomic_t RecoveryConflictPending = false;
static bool RecoveryConflictRetryable = true; static volatile sig_atomic_t RecoveryConflictPendingReasons[NUM_PROCSIGNALS];
static ProcSignalReason RecoveryConflictReason;
/* reused buffer to pass to SendRowDescriptionMessage() */ /* reused buffer to pass to SendRowDescriptionMessage() */
static MemoryContext row_description_context = NULL; static MemoryContext row_description_context = NULL;
@ -182,7 +181,6 @@ static bool check_log_statement(List *stmt_list);
static int errdetail_execute(List *raw_parsetree_list); static int errdetail_execute(List *raw_parsetree_list);
static int errdetail_params(ParamListInfo params); static int errdetail_params(ParamListInfo params);
static int errdetail_abort(void); static int errdetail_abort(void);
static int errdetail_recovery_conflict(void);
static void bind_param_error_callback(void *arg); static void bind_param_error_callback(void *arg);
static void start_xact_command(void); static void start_xact_command(void);
static void finish_xact_command(void); static void finish_xact_command(void);
@ -2510,9 +2508,9 @@ errdetail_abort(void)
* Add an errdetail() line showing conflict source. * Add an errdetail() line showing conflict source.
*/ */
static int static int
errdetail_recovery_conflict(void) errdetail_recovery_conflict(ProcSignalReason reason)
{ {
switch (RecoveryConflictReason) switch (reason)
{ {
case PROCSIG_RECOVERY_CONFLICT_BUFFERPIN: case PROCSIG_RECOVERY_CONFLICT_BUFFERPIN:
errdetail("User was holding shared buffer pin for too long."); errdetail("User was holding shared buffer pin for too long.");
@ -3040,22 +3038,24 @@ FloatExceptionHandler(SIGNAL_ARGS)
} }
/* /*
* RecoveryConflictInterrupt: out-of-line portion of recovery conflict * Tell the next CHECK_FOR_INTERRUPTS() to check for a particular type of
* handling following receipt of SIGUSR1. Designed to be similar to die() * recovery conflict. Runs in a SIGUSR1 handler.
* and StatementCancelHandler(). Called only by a normal user backend
* that begins a transaction during recovery.
*/ */
void void
RecoveryConflictInterrupt(ProcSignalReason reason) HandleRecoveryConflictInterrupt(ProcSignalReason reason)
{ {
int save_errno = errno; RecoveryConflictPendingReasons[reason] = true;
RecoveryConflictPending = true;
InterruptPending = true;
/* latch will be set by procsignal_sigusr1_handler */
}
/* /*
* Don't joggle the elbow of proc_exit * Check one individual conflict reason.
*/ */
if (!proc_exit_inprogress) static void
{ ProcessRecoveryConflictInterrupt(ProcSignalReason reason)
RecoveryConflictReason = reason; {
switch (reason) switch (reason)
{ {
case PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK: case PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK:
@ -3073,14 +3073,13 @@ RecoveryConflictInterrupt(ProcSignalReason reason)
/* /*
* If PROCSIG_RECOVERY_CONFLICT_BUFFERPIN is requested but we * If PROCSIG_RECOVERY_CONFLICT_BUFFERPIN is requested but we
* aren't blocking the Startup process there is nothing more * aren't blocking the Startup process there is nothing more to
* to do. * do.
* *
* When PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK is * When PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK is requested,
* requested, if we're waiting for locks and the startup * if we're waiting for locks and the startup process is not
* process is not waiting for buffer pin (i.e., also waiting * waiting for buffer pin (i.e., also waiting for locks), we set
* for locks), we set the flag so that ProcSleep() will check * the flag so that ProcSleep() will check for deadlocks.
* for deadlocks.
*/ */
if (!HoldingBufferPinThatDelaysRecovery()) if (!HoldingBufferPinThatDelaysRecovery())
{ {
@ -3105,78 +3104,137 @@ RecoveryConflictInterrupt(ProcSignalReason reason)
if (!IsTransactionOrTransactionBlock()) if (!IsTransactionOrTransactionBlock())
return; return;
/* FALLTHROUGH */
case PROCSIG_RECOVERY_CONFLICT_LOGICALSLOT:
/* /*
* If we can abort just the current subtransaction then we are * If we're not in a subtransaction then we are OK to throw an
* OK to throw an ERROR to resolve the conflict. Otherwise * ERROR to resolve the conflict. Otherwise drop through to the
* drop through to the FATAL case. * FATAL case.
*
* PROCSIG_RECOVERY_CONFLICT_LOGICALSLOT is a special case that
* always throws an ERROR (ie never promotes to FATAL), though it
* still has to respect QueryCancelHoldoffCount, so it shares this
* code path. Logical decoding slots are only acquired while
* performing logical decoding. During logical decoding no user
* controlled code is run. During [sub]transaction abort, the
* slot is released. Therefore user controlled code cannot
* intercept an error before the replication slot is released.
* *
* XXX other times that we can throw just an ERROR *may* be * XXX other times that we can throw just an ERROR *may* be
* PROCSIG_RECOVERY_CONFLICT_LOCK if no locks are held in * PROCSIG_RECOVERY_CONFLICT_LOCK if no locks are held in parent
* parent transactions * transactions
* *
* PROCSIG_RECOVERY_CONFLICT_SNAPSHOT if no snapshots are held * PROCSIG_RECOVERY_CONFLICT_SNAPSHOT if no snapshots are held by
* by parent transactions and the transaction is not * parent transactions and the transaction is not
* transaction-snapshot mode * transaction-snapshot mode
* *
* PROCSIG_RECOVERY_CONFLICT_TABLESPACE if no temp files or * PROCSIG_RECOVERY_CONFLICT_TABLESPACE if no temp files or
* cursors open in parent transactions * cursors open in parent transactions
*/ */
if (!IsSubTransaction()) if (reason == PROCSIG_RECOVERY_CONFLICT_LOGICALSLOT ||
!IsSubTransaction())
{ {
/* /*
* If we already aborted then we no longer need to cancel. * If we already aborted then we no longer need to cancel. We
* We do this here since we do not wish to ignore aborted * do this here since we do not wish to ignore aborted
* subtransactions, which must cause FATAL, currently. * subtransactions, which must cause FATAL, currently.
*/ */
if (IsAbortedTransactionBlockState()) if (IsAbortedTransactionBlockState())
return; return;
/*
* If a recovery conflict happens while we are waiting for
* input from the client, the client is presumably just
* sitting idle in a transaction, preventing recovery from
* making progress. We'll drop through to the FATAL case
* below to dislodge it, in that case.
*/
if (!DoingCommandRead)
{
/* Avoid losing sync in the FE/BE protocol. */
if (QueryCancelHoldoffCount != 0)
{
/*
* Re-arm and defer this interrupt until later. See
* similar code in ProcessInterrupts().
*/
RecoveryConflictPendingReasons[reason] = true;
RecoveryConflictPending = true; RecoveryConflictPending = true;
QueryCancelPending = true;
InterruptPending = true; InterruptPending = true;
return;
}
/*
* We are cleared to throw an ERROR. Either it's the
* logical slot case, or we have a top-level transaction
* that we can abort and a conflict that isn't inherently
* non-retryable.
*/
LockErrorCleanup();
pgstat_report_recovery_conflict(reason);
ereport(ERROR,
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
errmsg("canceling statement due to conflict with recovery"),
errdetail_recovery_conflict(reason)));
break; break;
} }
}
/* Intentional fall through to session cancel */ /* Intentional fall through to session cancel */
/* FALLTHROUGH */ /* FALLTHROUGH */
case PROCSIG_RECOVERY_CONFLICT_DATABASE: case PROCSIG_RECOVERY_CONFLICT_DATABASE:
RecoveryConflictPending = true;
ProcDiePending = true;
InterruptPending = true;
break;
case PROCSIG_RECOVERY_CONFLICT_LOGICALSLOT: /*
RecoveryConflictPending = true; * Retrying is not possible because the database is dropped, or we
QueryCancelPending = true; * decided above that we couldn't resolve the conflict with an
InterruptPending = true; * ERROR and fell through. Terminate the session.
*/
pgstat_report_recovery_conflict(reason);
ereport(FATAL,
(errcode(reason == PROCSIG_RECOVERY_CONFLICT_DATABASE ?
ERRCODE_DATABASE_DROPPED :
ERRCODE_T_R_SERIALIZATION_FAILURE),
errmsg("terminating connection due to conflict with recovery"),
errdetail_recovery_conflict(reason),
errhint("In a moment you should be able to reconnect to the"
" database and repeat your command.")));
break; break;
default: default:
elog(FATAL, "unrecognized conflict mode: %d", elog(FATAL, "unrecognized conflict mode: %d", (int) reason);
(int) reason);
} }
}
Assert(RecoveryConflictPending && (QueryCancelPending || ProcDiePending)); /*
* Check each possible recovery conflict reason.
/*
* All conflicts apart from database cause dynamic errors where the
* command or transaction can be retried at a later point with some
* potential for success. No need to reset this, since non-retryable
* conflict errors are currently FATAL.
*/ */
if (reason == PROCSIG_RECOVERY_CONFLICT_DATABASE) static void
RecoveryConflictRetryable = false; ProcessRecoveryConflictInterrupts(void)
{
/*
* We don't need to worry about joggling the elbow of proc_exit, because
* proc_exit_prepare() holds interrupts, so ProcessInterrupts() won't call
* us.
*/
Assert(!proc_exit_inprogress);
Assert(InterruptHoldoffCount == 0);
Assert(RecoveryConflictPending);
RecoveryConflictPending = false;
for (ProcSignalReason reason = PROCSIG_RECOVERY_CONFLICT_FIRST;
reason <= PROCSIG_RECOVERY_CONFLICT_LAST;
reason++)
{
if (RecoveryConflictPendingReasons[reason])
{
RecoveryConflictPendingReasons[reason] = false;
ProcessRecoveryConflictInterrupt(reason);
}
} }
/*
* Set the process latch. This function essentially emulates signal
* handlers like die() and StatementCancelHandler() and it seems prudent
* to behave similarly as they do.
*/
SetLatch(MyLatch);
errno = save_errno;
} }
/* /*
@ -3231,24 +3289,6 @@ ProcessInterrupts(void)
*/ */
proc_exit(1); proc_exit(1);
} }
else if (RecoveryConflictPending && RecoveryConflictRetryable)
{
pgstat_report_recovery_conflict(RecoveryConflictReason);
ereport(FATAL,
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
errmsg("terminating connection due to conflict with recovery"),
errdetail_recovery_conflict()));
}
else if (RecoveryConflictPending)
{
/* Currently there is only one non-retryable recovery conflict */
Assert(RecoveryConflictReason == PROCSIG_RECOVERY_CONFLICT_DATABASE);
pgstat_report_recovery_conflict(RecoveryConflictReason);
ereport(FATAL,
(errcode(ERRCODE_DATABASE_DROPPED),
errmsg("terminating connection due to conflict with recovery"),
errdetail_recovery_conflict()));
}
else if (IsBackgroundWorker) else if (IsBackgroundWorker)
ereport(FATAL, ereport(FATAL,
(errcode(ERRCODE_ADMIN_SHUTDOWN), (errcode(ERRCODE_ADMIN_SHUTDOWN),
@ -3291,31 +3331,13 @@ ProcessInterrupts(void)
errmsg("connection to client lost"))); errmsg("connection to client lost")));
} }
/*
* If a recovery conflict happens while we are waiting for input from the
* client, the client is presumably just sitting idle in a transaction,
* preventing recovery from making progress. Terminate the connection to
* dislodge it.
*/
if (RecoveryConflictPending && DoingCommandRead)
{
QueryCancelPending = false; /* this trumps QueryCancel */
RecoveryConflictPending = false;
LockErrorCleanup();
pgstat_report_recovery_conflict(RecoveryConflictReason);
ereport(FATAL,
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
errmsg("terminating connection due to conflict with recovery"),
errdetail_recovery_conflict(),
errhint("In a moment you should be able to reconnect to the"
" database and repeat your command.")));
}
/* /*
* Don't allow query cancel interrupts while reading input from the * Don't allow query cancel interrupts while reading input from the
* client, because we might lose sync in the FE/BE protocol. (Die * client, because we might lose sync in the FE/BE protocol. (Die
* interrupts are OK, because we won't read any further messages from the * interrupts are OK, because we won't read any further messages from the
* client in that case.) * client in that case.)
*
* See similar logic in ProcessRecoveryConflictInterrupts().
*/ */
if (QueryCancelPending && QueryCancelHoldoffCount != 0) if (QueryCancelPending && QueryCancelHoldoffCount != 0)
{ {
@ -3374,16 +3396,6 @@ ProcessInterrupts(void)
(errcode(ERRCODE_QUERY_CANCELED), (errcode(ERRCODE_QUERY_CANCELED),
errmsg("canceling autovacuum task"))); errmsg("canceling autovacuum task")));
} }
if (RecoveryConflictPending)
{
RecoveryConflictPending = false;
LockErrorCleanup();
pgstat_report_recovery_conflict(RecoveryConflictReason);
ereport(ERROR,
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
errmsg("canceling statement due to conflict with recovery"),
errdetail_recovery_conflict()));
}
/* /*
* If we are reading a command from the client, just ignore the cancel * If we are reading a command from the client, just ignore the cancel
@ -3399,6 +3411,9 @@ ProcessInterrupts(void)
} }
} }
if (RecoveryConflictPending)
ProcessRecoveryConflictInterrupts();
if (IdleInTransactionSessionTimeoutPending) if (IdleInTransactionSessionTimeoutPending)
{ {
/* /*

View File

@ -38,13 +38,15 @@ typedef enum
PROCSIG_PARALLEL_APPLY_MESSAGE, /* Message from parallel apply workers */ PROCSIG_PARALLEL_APPLY_MESSAGE, /* Message from parallel apply workers */
/* Recovery conflict reasons */ /* Recovery conflict reasons */
PROCSIG_RECOVERY_CONFLICT_DATABASE, PROCSIG_RECOVERY_CONFLICT_FIRST,
PROCSIG_RECOVERY_CONFLICT_DATABASE = PROCSIG_RECOVERY_CONFLICT_FIRST,
PROCSIG_RECOVERY_CONFLICT_TABLESPACE, PROCSIG_RECOVERY_CONFLICT_TABLESPACE,
PROCSIG_RECOVERY_CONFLICT_LOCK, PROCSIG_RECOVERY_CONFLICT_LOCK,
PROCSIG_RECOVERY_CONFLICT_SNAPSHOT, PROCSIG_RECOVERY_CONFLICT_SNAPSHOT,
PROCSIG_RECOVERY_CONFLICT_LOGICALSLOT, PROCSIG_RECOVERY_CONFLICT_LOGICALSLOT,
PROCSIG_RECOVERY_CONFLICT_BUFFERPIN, PROCSIG_RECOVERY_CONFLICT_BUFFERPIN,
PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK, PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK,
PROCSIG_RECOVERY_CONFLICT_LAST = PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK,
NUM_PROCSIGNALS /* Must be last! */ NUM_PROCSIGNALS /* Must be last! */
} ProcSignalReason; } ProcSignalReason;

View File

@ -70,8 +70,7 @@ extern void die(SIGNAL_ARGS);
extern void quickdie(SIGNAL_ARGS) pg_attribute_noreturn(); extern void quickdie(SIGNAL_ARGS) pg_attribute_noreturn();
extern void StatementCancelHandler(SIGNAL_ARGS); extern void StatementCancelHandler(SIGNAL_ARGS);
extern void FloatExceptionHandler(SIGNAL_ARGS) pg_attribute_noreturn(); extern void FloatExceptionHandler(SIGNAL_ARGS) pg_attribute_noreturn();
extern void RecoveryConflictInterrupt(ProcSignalReason reason); /* called from SIGUSR1 extern void HandleRecoveryConflictInterrupt(ProcSignalReason reason);
* handler */
extern void ProcessClientReadInterrupt(bool blocked); extern void ProcessClientReadInterrupt(bool blocked);
extern void ProcessClientWriteInterrupt(bool blocked); extern void ProcessClientWriteInterrupt(bool blocked);