Fix recovery conflict SIGUSR1 handling.

We shouldn't be doing non-trivial work in signal handlers in general,
and in this case the handler could reach unsafe code and corrupt state.
It also clobbered its own "reason" code.

Move all recovery conflict decision logic into the next
CHECK_FOR_INTERRUPTS(), and have the signal handler just set flags and
the latch, following the standard pattern.  Since there are several
different "reasons", use a separate flag for each.

With this refactoring, the recovery conflict system no longer
piggy-backs on top of the regular query cancelation mechanism, but
instead raises an error directly if it decides that is necessary.  It
still needs to respect QueryCancelHoldoffCount, because otherwise the
FEBE protocol might get out of sync (see commit 2b3a8b20c2d).

This fixes one class of intermittent failure in the new
031_recovery_conflict.pl test added by commit 9f8a050f, though the buggy
coding is much older.  Failures outside contrived testing seem to be
very rare (or perhaps incorrectly attributed) in the field, based on
lack of reports.

No back-patch for now due to complexity and release schedule.  We have
the option to back-patch into 16 later, as 16 has prerequisite commit
bea3d7e.

Reviewed-by: Andres Freund <andres@anarazel.de> (earlier version)
Reviewed-by: Michael Paquier <michael@paquier.xyz> (earlier version)
Reviewed-by: Robert Haas <robertmhaas@gmail.com> (earlier version)
Tested-by: Christoph Berg <myon@debian.org>
Discussion: https://postgr.es/m/CA%2BhUKGK3PGKwcKqzoosamn36YW-fsuTdOPPF1i_rtEO%3DnEYKSg%40mail.gmail.com
Discussion: https://postgr.es/m/CALj2ACVr8au2J_9D88UfRCi0JdWhyQDDxAcSVav0B0irx9nXEg%40mail.gmail.com
This commit is contained in:
Thomas Munro 2023-09-07 12:38:23 +12:00
parent 8c16ad3b43
commit 0da096d78e
5 changed files with 193 additions and 177 deletions

View File

@ -4923,8 +4923,8 @@ LockBufferForCleanup(Buffer buffer)
} }
/* /*
* Check called from RecoveryConflictInterrupt handler when Startup * Check called from ProcessRecoveryConflictInterrupts() when Startup process
* process requests cancellation of all pin holders that are blocking it. * requests cancellation of all pin holders that are blocking it.
*/ */
bool bool
HoldingBufferPinThatDelaysRecovery(void) HoldingBufferPinThatDelaysRecovery(void)

View File

@ -662,25 +662,25 @@ procsignal_sigusr1_handler(SIGNAL_ARGS)
HandleParallelApplyMessageInterrupt(); HandleParallelApplyMessageInterrupt();
if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_DATABASE)) if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_DATABASE))
RecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_DATABASE); HandleRecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_DATABASE);
if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_TABLESPACE)) if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_TABLESPACE))
RecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_TABLESPACE); HandleRecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_TABLESPACE);
if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_LOCK)) if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_LOCK))
RecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_LOCK); HandleRecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_LOCK);
if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_SNAPSHOT)) if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_SNAPSHOT))
RecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_SNAPSHOT); HandleRecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_SNAPSHOT);
if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_LOGICALSLOT)) if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_LOGICALSLOT))
RecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_LOGICALSLOT); HandleRecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_LOGICALSLOT);
if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK)) if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK))
RecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK); HandleRecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK);
if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN)) if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN))
RecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN); HandleRecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN);
SetLatch(MyLatch); SetLatch(MyLatch);

View File

@ -161,9 +161,8 @@ static bool EchoQuery = false; /* -E switch */
static bool UseSemiNewlineNewline = false; /* -j switch */ static bool UseSemiNewlineNewline = false; /* -j switch */
/* whether or not, and why, we were canceled by conflict with recovery */ /* whether or not, and why, we were canceled by conflict with recovery */
static bool RecoveryConflictPending = false; static volatile sig_atomic_t RecoveryConflictPending = false;
static bool RecoveryConflictRetryable = true; static volatile sig_atomic_t RecoveryConflictPendingReasons[NUM_PROCSIGNALS];
static ProcSignalReason RecoveryConflictReason;
/* reused buffer to pass to SendRowDescriptionMessage() */ /* reused buffer to pass to SendRowDescriptionMessage() */
static MemoryContext row_description_context = NULL; static MemoryContext row_description_context = NULL;
@ -182,7 +181,6 @@ static bool check_log_statement(List *stmt_list);
static int errdetail_execute(List *raw_parsetree_list); static int errdetail_execute(List *raw_parsetree_list);
static int errdetail_params(ParamListInfo params); static int errdetail_params(ParamListInfo params);
static int errdetail_abort(void); static int errdetail_abort(void);
static int errdetail_recovery_conflict(void);
static void bind_param_error_callback(void *arg); static void bind_param_error_callback(void *arg);
static void start_xact_command(void); static void start_xact_command(void);
static void finish_xact_command(void); static void finish_xact_command(void);
@ -2510,9 +2508,9 @@ errdetail_abort(void)
* Add an errdetail() line showing conflict source. * Add an errdetail() line showing conflict source.
*/ */
static int static int
errdetail_recovery_conflict(void) errdetail_recovery_conflict(ProcSignalReason reason)
{ {
switch (RecoveryConflictReason) switch (reason)
{ {
case PROCSIG_RECOVERY_CONFLICT_BUFFERPIN: case PROCSIG_RECOVERY_CONFLICT_BUFFERPIN:
errdetail("User was holding shared buffer pin for too long."); errdetail("User was holding shared buffer pin for too long.");
@ -3040,143 +3038,203 @@ FloatExceptionHandler(SIGNAL_ARGS)
} }
/* /*
* RecoveryConflictInterrupt: out-of-line portion of recovery conflict * Tell the next CHECK_FOR_INTERRUPTS() to check for a particular type of
* handling following receipt of SIGUSR1. Designed to be similar to die() * recovery conflict. Runs in a SIGUSR1 handler.
* and StatementCancelHandler(). Called only by a normal user backend
* that begins a transaction during recovery.
*/ */
void void
RecoveryConflictInterrupt(ProcSignalReason reason) HandleRecoveryConflictInterrupt(ProcSignalReason reason)
{ {
int save_errno = errno; RecoveryConflictPendingReasons[reason] = true;
RecoveryConflictPending = true;
InterruptPending = true;
/* latch will be set by procsignal_sigusr1_handler */
}
/* /*
* Don't joggle the elbow of proc_exit * Check one individual conflict reason.
*/ */
if (!proc_exit_inprogress) static void
ProcessRecoveryConflictInterrupt(ProcSignalReason reason)
{
switch (reason)
{ {
RecoveryConflictReason = reason; case PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK:
switch (reason)
{
case PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK:
/*
* If we aren't waiting for a lock we can never deadlock.
*/
if (!IsWaitingForLock())
return;
/* Intentional fall through to check wait for pin */
/* FALLTHROUGH */
case PROCSIG_RECOVERY_CONFLICT_BUFFERPIN:
/*
* If PROCSIG_RECOVERY_CONFLICT_BUFFERPIN is requested but we
* aren't blocking the Startup process there is nothing more to
* do.
*
* When PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK is requested,
* if we're waiting for locks and the startup process is not
* waiting for buffer pin (i.e., also waiting for locks), we set
* the flag so that ProcSleep() will check for deadlocks.
*/
if (!HoldingBufferPinThatDelaysRecovery())
{
if (reason == PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK &&
GetStartupBufferPinWaitBufId() < 0)
CheckDeadLockAlert();
return;
}
MyProc->recoveryConflictPending = true;
/* Intentional fall through to error handling */
/* FALLTHROUGH */
case PROCSIG_RECOVERY_CONFLICT_LOCK:
case PROCSIG_RECOVERY_CONFLICT_TABLESPACE:
case PROCSIG_RECOVERY_CONFLICT_SNAPSHOT:
/*
* If we aren't in a transaction any longer then ignore.
*/
if (!IsTransactionOrTransactionBlock())
return;
/* FALLTHROUGH */
case PROCSIG_RECOVERY_CONFLICT_LOGICALSLOT:
/*
* If we're not in a subtransaction then we are OK to throw an
* ERROR to resolve the conflict. Otherwise drop through to the
* FATAL case.
*
* PROCSIG_RECOVERY_CONFLICT_LOGICALSLOT is a special case that
* always throws an ERROR (ie never promotes to FATAL), though it
* still has to respect QueryCancelHoldoffCount, so it shares this
* code path. Logical decoding slots are only acquired while
* performing logical decoding. During logical decoding no user
* controlled code is run. During [sub]transaction abort, the
* slot is released. Therefore user controlled code cannot
* intercept an error before the replication slot is released.
*
* XXX other times that we can throw just an ERROR *may* be
* PROCSIG_RECOVERY_CONFLICT_LOCK if no locks are held in parent
* transactions
*
* PROCSIG_RECOVERY_CONFLICT_SNAPSHOT if no snapshots are held by
* parent transactions and the transaction is not
* transaction-snapshot mode
*
* PROCSIG_RECOVERY_CONFLICT_TABLESPACE if no temp files or
* cursors open in parent transactions
*/
if (reason == PROCSIG_RECOVERY_CONFLICT_LOGICALSLOT ||
!IsSubTransaction())
{
/* /*
* If we aren't waiting for a lock we can never deadlock. * If we already aborted then we no longer need to cancel. We
* do this here since we do not wish to ignore aborted
* subtransactions, which must cause FATAL, currently.
*/ */
if (!IsWaitingForLock()) if (IsAbortedTransactionBlockState())
return; return;
/* Intentional fall through to check wait for pin */
/* FALLTHROUGH */
case PROCSIG_RECOVERY_CONFLICT_BUFFERPIN:
/* /*
* If PROCSIG_RECOVERY_CONFLICT_BUFFERPIN is requested but we * If a recovery conflict happens while we are waiting for
* aren't blocking the Startup process there is nothing more * input from the client, the client is presumably just
* to do. * sitting idle in a transaction, preventing recovery from
* * making progress. We'll drop through to the FATAL case
* When PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK is * below to dislodge it, in that case.
* requested, if we're waiting for locks and the startup
* process is not waiting for buffer pin (i.e., also waiting
* for locks), we set the flag so that ProcSleep() will check
* for deadlocks.
*/ */
if (!HoldingBufferPinThatDelaysRecovery()) if (!DoingCommandRead)
{ {
if (reason == PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK && /* Avoid losing sync in the FE/BE protocol. */
GetStartupBufferPinWaitBufId() < 0) if (QueryCancelHoldoffCount != 0)
CheckDeadLockAlert(); {
return; /*
} * Re-arm and defer this interrupt until later. See
* similar code in ProcessInterrupts().
MyProc->recoveryConflictPending = true; */
RecoveryConflictPendingReasons[reason] = true;
/* Intentional fall through to error handling */ RecoveryConflictPending = true;
/* FALLTHROUGH */ InterruptPending = true;
case PROCSIG_RECOVERY_CONFLICT_LOCK:
case PROCSIG_RECOVERY_CONFLICT_TABLESPACE:
case PROCSIG_RECOVERY_CONFLICT_SNAPSHOT:
/*
* If we aren't in a transaction any longer then ignore.
*/
if (!IsTransactionOrTransactionBlock())
return;
/*
* If we can abort just the current subtransaction then we are
* OK to throw an ERROR to resolve the conflict. Otherwise
* drop through to the FATAL case.
*
* XXX other times that we can throw just an ERROR *may* be
* PROCSIG_RECOVERY_CONFLICT_LOCK if no locks are held in
* parent transactions
*
* PROCSIG_RECOVERY_CONFLICT_SNAPSHOT if no snapshots are held
* by parent transactions and the transaction is not
* transaction-snapshot mode
*
* PROCSIG_RECOVERY_CONFLICT_TABLESPACE if no temp files or
* cursors open in parent transactions
*/
if (!IsSubTransaction())
{
/*
* If we already aborted then we no longer need to cancel.
* We do this here since we do not wish to ignore aborted
* subtransactions, which must cause FATAL, currently.
*/
if (IsAbortedTransactionBlockState())
return; return;
}
RecoveryConflictPending = true; /*
QueryCancelPending = true; * We are cleared to throw an ERROR. Either it's the
InterruptPending = true; * logical slot case, or we have a top-level transaction
* that we can abort and a conflict that isn't inherently
* non-retryable.
*/
LockErrorCleanup();
pgstat_report_recovery_conflict(reason);
ereport(ERROR,
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
errmsg("canceling statement due to conflict with recovery"),
errdetail_recovery_conflict(reason)));
break; break;
} }
}
/* Intentional fall through to session cancel */ /* Intentional fall through to session cancel */
/* FALLTHROUGH */ /* FALLTHROUGH */
case PROCSIG_RECOVERY_CONFLICT_DATABASE: case PROCSIG_RECOVERY_CONFLICT_DATABASE:
RecoveryConflictPending = true;
ProcDiePending = true;
InterruptPending = true;
break;
case PROCSIG_RECOVERY_CONFLICT_LOGICALSLOT: /*
RecoveryConflictPending = true; * Retrying is not possible because the database is dropped, or we
QueryCancelPending = true; * decided above that we couldn't resolve the conflict with an
InterruptPending = true; * ERROR and fell through. Terminate the session.
break; */
pgstat_report_recovery_conflict(reason);
ereport(FATAL,
(errcode(reason == PROCSIG_RECOVERY_CONFLICT_DATABASE ?
ERRCODE_DATABASE_DROPPED :
ERRCODE_T_R_SERIALIZATION_FAILURE),
errmsg("terminating connection due to conflict with recovery"),
errdetail_recovery_conflict(reason),
errhint("In a moment you should be able to reconnect to the"
" database and repeat your command.")));
break;
default: default:
elog(FATAL, "unrecognized conflict mode: %d", elog(FATAL, "unrecognized conflict mode: %d", (int) reason);
(int) reason);
}
Assert(RecoveryConflictPending && (QueryCancelPending || ProcDiePending));
/*
* All conflicts apart from database cause dynamic errors where the
* command or transaction can be retried at a later point with some
* potential for success. No need to reset this, since non-retryable
* conflict errors are currently FATAL.
*/
if (reason == PROCSIG_RECOVERY_CONFLICT_DATABASE)
RecoveryConflictRetryable = false;
} }
}
/*
* Check each possible recovery conflict reason.
*/
static void
ProcessRecoveryConflictInterrupts(void)
{
/* /*
* Set the process latch. This function essentially emulates signal * We don't need to worry about joggling the elbow of proc_exit, because
* handlers like die() and StatementCancelHandler() and it seems prudent * proc_exit_prepare() holds interrupts, so ProcessInterrupts() won't call
* to behave similarly as they do. * us.
*/ */
SetLatch(MyLatch); Assert(!proc_exit_inprogress);
Assert(InterruptHoldoffCount == 0);
Assert(RecoveryConflictPending);
errno = save_errno; RecoveryConflictPending = false;
for (ProcSignalReason reason = PROCSIG_RECOVERY_CONFLICT_FIRST;
reason <= PROCSIG_RECOVERY_CONFLICT_LAST;
reason++)
{
if (RecoveryConflictPendingReasons[reason])
{
RecoveryConflictPendingReasons[reason] = false;
ProcessRecoveryConflictInterrupt(reason);
}
}
} }
/* /*
@ -3231,24 +3289,6 @@ ProcessInterrupts(void)
*/ */
proc_exit(1); proc_exit(1);
} }
else if (RecoveryConflictPending && RecoveryConflictRetryable)
{
pgstat_report_recovery_conflict(RecoveryConflictReason);
ereport(FATAL,
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
errmsg("terminating connection due to conflict with recovery"),
errdetail_recovery_conflict()));
}
else if (RecoveryConflictPending)
{
/* Currently there is only one non-retryable recovery conflict */
Assert(RecoveryConflictReason == PROCSIG_RECOVERY_CONFLICT_DATABASE);
pgstat_report_recovery_conflict(RecoveryConflictReason);
ereport(FATAL,
(errcode(ERRCODE_DATABASE_DROPPED),
errmsg("terminating connection due to conflict with recovery"),
errdetail_recovery_conflict()));
}
else if (IsBackgroundWorker) else if (IsBackgroundWorker)
ereport(FATAL, ereport(FATAL,
(errcode(ERRCODE_ADMIN_SHUTDOWN), (errcode(ERRCODE_ADMIN_SHUTDOWN),
@ -3291,31 +3331,13 @@ ProcessInterrupts(void)
errmsg("connection to client lost"))); errmsg("connection to client lost")));
} }
/*
* If a recovery conflict happens while we are waiting for input from the
* client, the client is presumably just sitting idle in a transaction,
* preventing recovery from making progress. Terminate the connection to
* dislodge it.
*/
if (RecoveryConflictPending && DoingCommandRead)
{
QueryCancelPending = false; /* this trumps QueryCancel */
RecoveryConflictPending = false;
LockErrorCleanup();
pgstat_report_recovery_conflict(RecoveryConflictReason);
ereport(FATAL,
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
errmsg("terminating connection due to conflict with recovery"),
errdetail_recovery_conflict(),
errhint("In a moment you should be able to reconnect to the"
" database and repeat your command.")));
}
/* /*
* Don't allow query cancel interrupts while reading input from the * Don't allow query cancel interrupts while reading input from the
* client, because we might lose sync in the FE/BE protocol. (Die * client, because we might lose sync in the FE/BE protocol. (Die
* interrupts are OK, because we won't read any further messages from the * interrupts are OK, because we won't read any further messages from the
* client in that case.) * client in that case.)
*
* See similar logic in ProcessRecoveryConflictInterrupts().
*/ */
if (QueryCancelPending && QueryCancelHoldoffCount != 0) if (QueryCancelPending && QueryCancelHoldoffCount != 0)
{ {
@ -3374,16 +3396,6 @@ ProcessInterrupts(void)
(errcode(ERRCODE_QUERY_CANCELED), (errcode(ERRCODE_QUERY_CANCELED),
errmsg("canceling autovacuum task"))); errmsg("canceling autovacuum task")));
} }
if (RecoveryConflictPending)
{
RecoveryConflictPending = false;
LockErrorCleanup();
pgstat_report_recovery_conflict(RecoveryConflictReason);
ereport(ERROR,
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
errmsg("canceling statement due to conflict with recovery"),
errdetail_recovery_conflict()));
}
/* /*
* If we are reading a command from the client, just ignore the cancel * If we are reading a command from the client, just ignore the cancel
@ -3399,6 +3411,9 @@ ProcessInterrupts(void)
} }
} }
if (RecoveryConflictPending)
ProcessRecoveryConflictInterrupts();
if (IdleInTransactionSessionTimeoutPending) if (IdleInTransactionSessionTimeoutPending)
{ {
/* /*

View File

@ -38,13 +38,15 @@ typedef enum
PROCSIG_PARALLEL_APPLY_MESSAGE, /* Message from parallel apply workers */ PROCSIG_PARALLEL_APPLY_MESSAGE, /* Message from parallel apply workers */
/* Recovery conflict reasons */ /* Recovery conflict reasons */
PROCSIG_RECOVERY_CONFLICT_DATABASE, PROCSIG_RECOVERY_CONFLICT_FIRST,
PROCSIG_RECOVERY_CONFLICT_DATABASE = PROCSIG_RECOVERY_CONFLICT_FIRST,
PROCSIG_RECOVERY_CONFLICT_TABLESPACE, PROCSIG_RECOVERY_CONFLICT_TABLESPACE,
PROCSIG_RECOVERY_CONFLICT_LOCK, PROCSIG_RECOVERY_CONFLICT_LOCK,
PROCSIG_RECOVERY_CONFLICT_SNAPSHOT, PROCSIG_RECOVERY_CONFLICT_SNAPSHOT,
PROCSIG_RECOVERY_CONFLICT_LOGICALSLOT, PROCSIG_RECOVERY_CONFLICT_LOGICALSLOT,
PROCSIG_RECOVERY_CONFLICT_BUFFERPIN, PROCSIG_RECOVERY_CONFLICT_BUFFERPIN,
PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK, PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK,
PROCSIG_RECOVERY_CONFLICT_LAST = PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK,
NUM_PROCSIGNALS /* Must be last! */ NUM_PROCSIGNALS /* Must be last! */
} ProcSignalReason; } ProcSignalReason;

View File

@ -70,8 +70,7 @@ extern void die(SIGNAL_ARGS);
extern void quickdie(SIGNAL_ARGS) pg_attribute_noreturn(); extern void quickdie(SIGNAL_ARGS) pg_attribute_noreturn();
extern void StatementCancelHandler(SIGNAL_ARGS); extern void StatementCancelHandler(SIGNAL_ARGS);
extern void FloatExceptionHandler(SIGNAL_ARGS) pg_attribute_noreturn(); extern void FloatExceptionHandler(SIGNAL_ARGS) pg_attribute_noreturn();
extern void RecoveryConflictInterrupt(ProcSignalReason reason); /* called from SIGUSR1 extern void HandleRecoveryConflictInterrupt(ProcSignalReason reason);
* handler */
extern void ProcessClientReadInterrupt(bool blocked); extern void ProcessClientReadInterrupt(bool blocked);
extern void ProcessClientWriteInterrupt(bool blocked); extern void ProcessClientWriteInterrupt(bool blocked);