Move replication slot release to before_shmem_exit().

Previously, replication slots were released in ProcKill() on error, resulting
in reporting replication slot drop of ephemeral slots after the stats
subsystem was already shut down.

To fix this problem, move replication slot release to a before_shmem_exit()
hook that is called before the stats collector shuts down. There wasn't really
a good reason for the slot handling to be in ProcKill() anyway.

Patch by Masahiko Sawada, with very minor polishing by me.

I, Andres, wrote a test for dropping slots during process exit, but there may
be some OS dependent issues around the number of times FATAL error messages
are displayed due to a still debated libpq issue. So that test will be
committed separately / later.

Reviewed-By: Kyotaro Horiguchi <horikyota.ntt@gmail.com>
Reviewed-By: Andres Freund <andres@anarazel.de>
Author: Masahiko Sawada <sawada.mshk@gmail.com>
Discussion: https://postgr.es/m/CAD21AoDAeEpAbZEyYJsPZJUmSPaRicVSBObaL7sPaofnKz+9zg@mail.gmail.com
This commit is contained in:
Andres Freund 2022-02-14 16:44:28 -08:00
parent b45fa79340
commit 2f6501fa3c
5 changed files with 35 additions and 9 deletions

View File

@ -46,6 +46,7 @@
#include "pgstat.h" #include "pgstat.h"
#include "replication/slot.h" #include "replication/slot.h"
#include "storage/fd.h" #include "storage/fd.h"
#include "storage/ipc.h"
#include "storage/proc.h" #include "storage/proc.h"
#include "storage/procarray.h" #include "storage/procarray.h"
#include "utils/builtins.h" #include "utils/builtins.h"
@ -99,6 +100,7 @@ ReplicationSlot *MyReplicationSlot = NULL;
int max_replication_slots = 0; /* the maximum number of replication int max_replication_slots = 0; /* the maximum number of replication
* slots */ * slots */
static void ReplicationSlotShmemExit(int code, Datum arg);
static void ReplicationSlotDropAcquired(void); static void ReplicationSlotDropAcquired(void);
static void ReplicationSlotDropPtr(ReplicationSlot *slot); static void ReplicationSlotDropPtr(ReplicationSlot *slot);
@ -160,6 +162,29 @@ ReplicationSlotsShmemInit(void)
} }
} }
/*
* Register the callback for replication slot cleanup and releasing.
*/
void
ReplicationSlotInitialize(void)
{
before_shmem_exit(ReplicationSlotShmemExit, 0);
}
/*
* Release and cleanup replication slots.
*/
static void
ReplicationSlotShmemExit(int code, Datum arg)
{
/* Make sure active replication slots are released */
if (MyReplicationSlot != NULL)
ReplicationSlotRelease();
/* Also cleanup all the temporary slots. */
ReplicationSlotCleanup();
}
/* /*
* Check whether the passed slot name is valid and report errors at elevel. * Check whether the passed slot name is valid and report errors at elevel.
* *

View File

@ -831,13 +831,6 @@ ProcKill(int code, Datum arg)
/* Cancel any pending condition variable sleep, too */ /* Cancel any pending condition variable sleep, too */
ConditionVariableCancelSleep(); ConditionVariableCancelSleep();
/* Make sure active replication slots are released */
if (MyReplicationSlot != NULL)
ReplicationSlotRelease();
/* Also cleanup all the temporary slots. */
ReplicationSlotCleanup();
/* /*
* Detach from any lock group of which we are a member. If the leader * Detach from any lock group of which we are a member. If the leader
* exist before all other group members, its PGPROC will remain allocated * exist before all other group members, its PGPROC will remain allocated

View File

@ -4261,8 +4261,8 @@ PostgresMain(const char *dbname, const char *username)
* We can't release replication slots inside AbortTransaction() as we * We can't release replication slots inside AbortTransaction() as we
* need to be able to start and abort transactions while having a slot * need to be able to start and abort transactions while having a slot
* acquired. But we never need to hold them across top level errors, * acquired. But we never need to hold them across top level errors,
* so releasing here is fine. There's another cleanup in ProcKill() * so releasing here is fine. There also is a before_shmem_exit()
* ensuring we'll correctly cleanup on FATAL errors as well. * callback ensuring correct cleanup on FATAL errors.
*/ */
if (MyReplicationSlot != NULL) if (MyReplicationSlot != NULL)
ReplicationSlotRelease(); ReplicationSlotRelease();

View File

@ -43,6 +43,7 @@
#include "pgstat.h" #include "pgstat.h"
#include "postmaster/autovacuum.h" #include "postmaster/autovacuum.h"
#include "postmaster/postmaster.h" #include "postmaster/postmaster.h"
#include "replication/slot.h"
#include "replication/walsender.h" #include "replication/walsender.h"
#include "storage/bufmgr.h" #include "storage/bufmgr.h"
#include "storage/fd.h" #include "storage/fd.h"
@ -626,6 +627,12 @@ BaseInit(void)
* ever try to insert XLOG. * ever try to insert XLOG.
*/ */
InitXLogInsert(); InitXLogInsert();
/*
* Initialize replication slots after pgstat. The exit hook might need to
* drop ephemeral slots, which in turn triggers stats reporting.
*/
ReplicationSlotInitialize();
} }

View File

@ -206,6 +206,7 @@ extern void ReplicationSlotSave(void);
extern void ReplicationSlotMarkDirty(void); extern void ReplicationSlotMarkDirty(void);
/* misc stuff */ /* misc stuff */
extern void ReplicationSlotInitialize(void);
extern bool ReplicationSlotValidateName(const char *name, int elevel); extern bool ReplicationSlotValidateName(const char *name, int elevel);
extern void ReplicationSlotReserveWal(void); extern void ReplicationSlotReserveWal(void);
extern void ReplicationSlotsComputeRequiredXmin(bool already_locked); extern void ReplicationSlotsComputeRequiredXmin(bool already_locked);