Fix creation of partition descriptor during concurrent detach
When a partition is being detached in concurrent mode, it is possible for find_inheritance_children_extended() to return that partition in the list, and immediately after that receive an invalidation message that sets its relpartbound to NULL just before we read it. (This can happen because table_open() reads invalidation messages.) Currently we raise an error ERROR: missing relpartbound for relation %u about the situation, but that's bogus because the table is no longer a partition, so we shouldn't be complaining about it. A better reaction is to retry the find_inheritance_children_extended call to get a new list, which will no longer have the partition being detached. Noticed while investigating bug #18377. Backpatch to 14, where DETACH CONCURRENTLY appeared. Discussion: https://postgr.es/m/202405201616.y4ht2qe5ihoy@alvherre.pgsql
This commit is contained in:
parent
d1ffcc7fa3
commit
c2fab70248
@ -24,6 +24,7 @@
|
|||||||
#include "utils/builtins.h"
|
#include "utils/builtins.h"
|
||||||
#include "utils/fmgroids.h"
|
#include "utils/fmgroids.h"
|
||||||
#include "utils/hsearch.h"
|
#include "utils/hsearch.h"
|
||||||
|
#include "utils/inval.h"
|
||||||
#include "utils/lsyscache.h"
|
#include "utils/lsyscache.h"
|
||||||
#include "utils/memutils.h"
|
#include "utils/memutils.h"
|
||||||
#include "utils/partcache.h"
|
#include "utils/partcache.h"
|
||||||
@ -144,16 +145,19 @@ RelationBuildPartitionDesc(Relation rel, bool omit_detached)
|
|||||||
ListCell *cell;
|
ListCell *cell;
|
||||||
int i,
|
int i,
|
||||||
nparts;
|
nparts;
|
||||||
|
bool retried = false;
|
||||||
PartitionKey key = RelationGetPartitionKey(rel);
|
PartitionKey key = RelationGetPartitionKey(rel);
|
||||||
MemoryContext new_pdcxt;
|
MemoryContext new_pdcxt;
|
||||||
MemoryContext oldcxt;
|
MemoryContext oldcxt;
|
||||||
int *mapping;
|
int *mapping;
|
||||||
|
|
||||||
|
retry:
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Get partition oids from pg_inherits. This uses a single snapshot to
|
* Get partition oids from pg_inherits. This uses a single snapshot to
|
||||||
* fetch the list of children, so while more children may be getting added
|
* fetch the list of children, so while more children may be getting added
|
||||||
* concurrently, whatever this function returns will be accurate as of
|
* or removed concurrently, whatever this function returns will be
|
||||||
* some well-defined point in time.
|
* accurate as of some well-defined point in time.
|
||||||
*/
|
*/
|
||||||
detached_exist = false;
|
detached_exist = false;
|
||||||
detached_xmin = InvalidTransactionId;
|
detached_xmin = InvalidTransactionId;
|
||||||
@ -196,18 +200,23 @@ RelationBuildPartitionDesc(Relation rel, bool omit_detached)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The system cache may be out of date; if so, we may find no pg_class
|
* Two problems are possible here. First, a concurrent ATTACH
|
||||||
* tuple or an old one where relpartbound is NULL. In that case, try
|
* PARTITION might be in the process of adding a new partition, but
|
||||||
* the table directly. We can't just AcceptInvalidationMessages() and
|
* the syscache doesn't have it, or its copy of it does not yet have
|
||||||
* retry the system cache lookup because it's possible that a
|
* its relpartbound set. We cannot just AcceptInvalidationMessages(),
|
||||||
* concurrent ATTACH PARTITION operation has removed itself from the
|
* because the other process might have already removed itself from
|
||||||
* ProcArray but not yet added invalidation messages to the shared
|
* the ProcArray but not yet added its invalidation messages to the
|
||||||
* queue; InvalidateSystemCaches() would work, but seems excessive.
|
* shared queue. We solve this problem by reading pg_class directly
|
||||||
|
* for the desired tuple.
|
||||||
*
|
*
|
||||||
* Note that this algorithm assumes that PartitionBoundSpec we manage
|
* The other problem is that DETACH CONCURRENTLY is in the process of
|
||||||
* to fetch is the right one -- so this is only good enough for
|
* removing a partition, which happens in two steps: first it marks it
|
||||||
* concurrent ATTACH PARTITION, not concurrent DETACH PARTITION or
|
* as "detach pending", commits, then unsets relpartbound. If
|
||||||
* some hypothetical operation that changes the partition bounds.
|
* find_inheritance_children_extended included that partition but we
|
||||||
|
* below we see that DETACH CONCURRENTLY has reset relpartbound for
|
||||||
|
* it, we'd see an inconsistent view. (The inconsistency is seen
|
||||||
|
* because table_open below reads invalidation messages.) We protect
|
||||||
|
* against this by retrying find_inheritance_children_extended().
|
||||||
*/
|
*/
|
||||||
if (boundspec == NULL)
|
if (boundspec == NULL)
|
||||||
{
|
{
|
||||||
@ -231,6 +240,25 @@ RelationBuildPartitionDesc(Relation rel, bool omit_detached)
|
|||||||
boundspec = stringToNode(TextDatumGetCString(datum));
|
boundspec = stringToNode(TextDatumGetCString(datum));
|
||||||
systable_endscan(scan);
|
systable_endscan(scan);
|
||||||
table_close(pg_class, AccessShareLock);
|
table_close(pg_class, AccessShareLock);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If we still don't get a relpartbound value, then it must be
|
||||||
|
* because of DETACH CONCURRENTLY. Restart from the top, as
|
||||||
|
* explained above. We only do this once, for two reasons: first,
|
||||||
|
* only one DETACH CONCURRENTLY session could affect us at a time,
|
||||||
|
* since each of them would have to wait for the snapshot under
|
||||||
|
* which this is running; and second, to avoid possible infinite
|
||||||
|
* loops in case of catalog corruption.
|
||||||
|
*
|
||||||
|
* Note that the current memory context is short-lived enough, so
|
||||||
|
* we needn't worry about memory leaks here.
|
||||||
|
*/
|
||||||
|
if (!boundspec && !retried)
|
||||||
|
{
|
||||||
|
AcceptInvalidationMessages();
|
||||||
|
retried = true;
|
||||||
|
goto retry;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Sanity checks. */
|
/* Sanity checks. */
|
||||||
|
Loading…
x
Reference in New Issue
Block a user