From b36fbd9f8da140f5b0ef9f7daa6b3cb4cae8a69b Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Mon, 11 Mar 2024 10:25:01 +0900 Subject: [PATCH] Improve consistency of replication slot statistics The replication slot stats stored in shared memory rely on an internal index number. Both pgstat_reset_replslot() and pgstat_fetch_replslot() lacked some LWLock protections with ReplicationSlotControlLock while operating on these index numbers. This issue could cause these two functions to potentially operate on incorrect slots when taken in isolation in the event of slots dropped and/or re-created concurrently. Note that pg_stat_get_replication_slot() is called once per slot when querying pg_stat_replication_slots, meaning that the stats are retrieved across multiple ReplicationSlotControlLock acquisitions. So, while this commit improves more consistency, it may still be possible that statistics are not completely consistent for a single scan of pg_stat_replication_slots under concurrent replication slot drop or creation activity. The issue should unlikely be a problem in practice, causing the report of inconsistent stats or or the stats reset of an incorrect slot, so no backpatch is done. Author: Bertrand Drouvot Reviewed-by: Heikki Linnakangas, Shveta Malik, Michael Paquier Discussion: https://postgr.es/m/ZeGq1HDWFfLkjh4o@ip-10-97-1-34.eu-west-3.compute.internal --- src/backend/utils/activity/pgstat_replslot.c | 42 ++++++++++++-------- 1 file changed, 25 insertions(+), 17 deletions(-) diff --git a/src/backend/utils/activity/pgstat_replslot.c b/src/backend/utils/activity/pgstat_replslot.c index c94a3fb513..c61bad1627 100644 --- a/src/backend/utils/activity/pgstat_replslot.c +++ b/src/backend/utils/activity/pgstat_replslot.c @@ -29,7 +29,7 @@ #include "utils/pgstat_internal.h" -static int get_replslot_index(const char *name); +static int get_replslot_index(const char *name, bool need_lock); /* @@ -45,8 +45,10 @@ pgstat_reset_replslot(const char *name) Assert(name != NULL); + LWLockAcquire(ReplicationSlotControlLock, LW_SHARED); + /* Check if the slot exits with the given name. */ - slot = SearchNamedReplicationSlot(name, true); + slot = SearchNamedReplicationSlot(name, false); if (!slot) ereport(ERROR, @@ -55,15 +57,14 @@ pgstat_reset_replslot(const char *name) name))); /* - * Nothing to do for physical slots as we collect stats only for logical - * slots. + * Reset stats if it is a logical slot. Nothing to do for physical slots + * as we collect stats only for logical slots. */ - if (SlotIsPhysical(slot)) - return; + if (SlotIsLogical(slot)) + pgstat_reset(PGSTAT_KIND_REPLSLOT, InvalidOid, + ReplicationSlotIndex(slot)); - /* reset this one entry */ - pgstat_reset(PGSTAT_KIND_REPLSLOT, InvalidOid, - ReplicationSlotIndex(slot)); + LWLockRelease(ReplicationSlotControlLock); } /* @@ -163,13 +164,20 @@ pgstat_drop_replslot(ReplicationSlot *slot) PgStat_StatReplSlotEntry * pgstat_fetch_replslot(NameData slotname) { - int idx = get_replslot_index(NameStr(slotname)); + int idx; + PgStat_StatReplSlotEntry *slotentry = NULL; - if (idx == -1) - return NULL; + LWLockAcquire(ReplicationSlotControlLock, LW_SHARED); - return (PgStat_StatReplSlotEntry *) - pgstat_fetch_entry(PGSTAT_KIND_REPLSLOT, InvalidOid, idx); + idx = get_replslot_index(NameStr(slotname), false); + + if (idx != -1) + slotentry = (PgStat_StatReplSlotEntry *) pgstat_fetch_entry(PGSTAT_KIND_REPLSLOT, + InvalidOid, idx); + + LWLockRelease(ReplicationSlotControlLock); + + return slotentry; } void @@ -188,7 +196,7 @@ pgstat_replslot_to_serialized_name_cb(const PgStat_HashKey *key, const PgStatSha bool pgstat_replslot_from_serialized_name_cb(const NameData *name, PgStat_HashKey *key) { - int idx = get_replslot_index(NameStr(*name)); + int idx = get_replslot_index(NameStr(*name), true); /* slot might have been deleted */ if (idx == -1) @@ -208,13 +216,13 @@ pgstat_replslot_reset_timestamp_cb(PgStatShared_Common *header, TimestampTz ts) } static int -get_replslot_index(const char *name) +get_replslot_index(const char *name, bool need_lock) { ReplicationSlot *slot; Assert(name != NULL); - slot = SearchNamedReplicationSlot(name, true); + slot = SearchNamedReplicationSlot(name, need_lock); if (!slot) return -1;