diff --git a/contrib/amcheck/Makefile b/contrib/amcheck/Makefile index a2b1b1036b..9a62e84def 100644 --- a/contrib/amcheck/Makefile +++ b/contrib/amcheck/Makefile @@ -11,6 +11,8 @@ PGFILEDESC = "amcheck - function for verifying relation integrity" REGRESS = check check_btree +TAP_TESTS = 1 + ifdef USE_PGXS PG_CONFIG = pg_config PGXS := $(shell $(PG_CONFIG) --pgxs) diff --git a/contrib/amcheck/t/002_cic.pl b/contrib/amcheck/t/002_cic.pl new file mode 100644 index 0000000000..26b56054b1 --- /dev/null +++ b/contrib/amcheck/t/002_cic.pl @@ -0,0 +1,78 @@ + +# Copyright (c) 2021, PostgreSQL Global Development Group + +# Test CREATE INDEX CONCURRENTLY with concurrent modifications +use strict; +use warnings; + +use Config; +use PostgresNode; +use TestLib; + +use Test::More tests => 4; + +my ($node, $result); + +# +# Test set-up +# +$node = get_new_node('CIC_test'); +$node->init; +$node->append_conf('postgresql.conf', 'lock_timeout = 180000'); +$node->start; +$node->safe_psql('postgres', q(CREATE EXTENSION amcheck)); +$node->safe_psql('postgres', q(CREATE TABLE tbl(i int))); +$node->safe_psql('postgres', q(CREATE INDEX idx ON tbl(i))); + +# +# Stress CIC with pgbench +# + +# Run background pgbench with CIC. We cannot mix-in this script into single +# pgbench: CIC will deadlock with itself occasionally. +my $pgbench_out = ''; +my $pgbench_timer = IPC::Run::timeout(180); +my $pgbench_h = $node->background_pgbench( + '--no-vacuum --client=1 --transactions=200', + { + '002_pgbench_concurrent_cic' => q( + DROP INDEX CONCURRENTLY idx; + CREATE INDEX CONCURRENTLY idx ON tbl(i); + SELECT bt_index_check('idx',true); + ) + }, + \$pgbench_out, + $pgbench_timer); + +# Run pgbench. +$node->pgbench( + '--no-vacuum --client=5 --transactions=200', + 0, + [qr{actually processed}], + [qr{^$}], + 'concurrent INSERTs', + { + '002_pgbench_concurrent_transaction' => q( + BEGIN; + INSERT INTO tbl VALUES(0); + COMMIT; + ), + '002_pgbench_concurrent_transaction_savepoints' => q( + BEGIN; + SAVEPOINT s1; + INSERT INTO tbl VALUES(0); + COMMIT; + ) + }); + +$pgbench_h->pump_nb; +$pgbench_h->finish(); +$result = + ($Config{osname} eq "MSWin32") + ? ($pgbench_h->full_results)[0] + : $pgbench_h->result(0); +is($result, 0, "pgbench with CIC works"); + +# done +$node->stop; +done_testing(); diff --git a/src/backend/utils/cache/inval.c b/src/backend/utils/cache/inval.c index 4fbc6d0720..809c3a0972 100644 --- a/src/backend/utils/cache/inval.c +++ b/src/backend/utils/cache/inval.c @@ -584,7 +584,7 @@ LocalExecuteInvalidationMessage(SharedInvalidationMessage *msg) int i; if (msg->rc.relId == InvalidOid) - RelationCacheInvalidate(); + RelationCacheInvalidate(false); else RelationCacheInvalidateEntry(msg->rc.relId); @@ -641,12 +641,18 @@ LocalExecuteInvalidationMessage(SharedInvalidationMessage *msg) */ void InvalidateSystemCaches(void) +{ + InvalidateSystemCachesExtended(false); +} + +void +InvalidateSystemCachesExtended(bool debug_discard) { int i; InvalidateCatalogSnapshot(); ResetCatalogCaches(); - RelationCacheInvalidate(); /* gets smgr and relmap too */ + RelationCacheInvalidate(debug_discard); /* gets smgr and relmap too */ for (i = 0; i < syscache_callback_count; i++) { @@ -717,7 +723,7 @@ AcceptInvalidationMessages(void) if (recursion_depth < 3) { recursion_depth++; - InvalidateSystemCaches(); + InvalidateSystemCachesExtended(true); recursion_depth--; } } diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index 32e75560ea..7976a0d66d 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -150,6 +150,24 @@ bool criticalSharedRelcachesBuilt = false; */ static long relcacheInvalsReceived = 0L; +/* + * in_progress_list is a stack of ongoing RelationBuildDesc() calls. CREATE + * INDEX CONCURRENTLY makes catalog changes under ShareUpdateExclusiveLock. + * It critically relies on each backend absorbing those changes no later than + * next transaction start. Hence, RelationBuildDesc() loops until it finishes + * without accepting a relevant invalidation. (Most invalidation consumers + * don't do this.) + */ +typedef struct inprogressent +{ + Oid reloid; /* OID of relation being built */ + bool invalidated; /* whether an invalidation arrived for it */ +} InProgressEnt; + +static InProgressEnt *in_progress_list; +static int in_progress_list_len; +static int in_progress_list_maxlen; + /* * eoxact_list[] stores the OIDs of relations that (might) need AtEOXact * cleanup work. This list intentionally has limited size; if it overflows, @@ -1043,6 +1061,7 @@ equalRSDesc(RowSecurityDesc *rsdesc1, RowSecurityDesc *rsdesc2) static Relation RelationBuildDesc(Oid targetRelId, bool insertIt) { + int in_progress_offset; Relation relation; Oid relid; HeapTuple pg_class_tuple; @@ -1070,6 +1089,21 @@ RelationBuildDesc(Oid targetRelId, bool insertIt) oldcxt = MemoryContextSwitchTo(tmpcxt); #endif + /* Register to catch invalidation messages */ + if (in_progress_list_len >= in_progress_list_maxlen) + { + int allocsize; + + allocsize = in_progress_list_maxlen * 2; + in_progress_list = repalloc(in_progress_list, + allocsize * sizeof(*in_progress_list)); + in_progress_list_maxlen = allocsize; + } + in_progress_offset = in_progress_list_len++; + in_progress_list[in_progress_offset].reloid = targetRelId; +retry: + in_progress_list[in_progress_offset].invalidated = false; + /* * find the tuple in pg_class corresponding to the given relation id */ @@ -1085,6 +1119,8 @@ RelationBuildDesc(Oid targetRelId, bool insertIt) MemoryContextSwitchTo(oldcxt); MemoryContextDelete(tmpcxt); #endif + Assert(in_progress_offset + 1 == in_progress_list_len); + in_progress_list_len--; return NULL; } @@ -1244,6 +1280,21 @@ RelationBuildDesc(Oid targetRelId, bool insertIt) */ heap_freetuple(pg_class_tuple); + /* + * If an invalidation arrived mid-build, start over. Between here and the + * end of this function, don't add code that does or reasonably could read + * system catalogs. That range must be free from invalidation processing + * for the !insertIt case. For the insertIt case, RelationCacheInsert() + * will enroll this relation in ordinary relcache invalidation processing, + */ + if (in_progress_list[in_progress_offset].invalidated) + { + RelationDestroyRelation(relation, false); + goto retry; + } + Assert(in_progress_offset + 1 == in_progress_list_len); + in_progress_list_len--; + /* * Insert newly created relation into relcache hash table, if requested. * @@ -2586,6 +2637,14 @@ RelationClearRelation(Relation relation, bool rebuild) /* Build temporary entry, but don't link it into hashtable */ newrel = RelationBuildDesc(save_relid, false); + + /* + * Between here and the end of the swap, don't add code that does or + * reasonably could read system catalogs. That range must be free + * from invalidation processing. See RelationBuildDesc() manipulation + * of in_progress_list. + */ + if (newrel == NULL) { /* @@ -2816,6 +2875,14 @@ RelationCacheInvalidateEntry(Oid relationId) relcacheInvalsReceived++; RelationFlushRelation(relation); } + else + { + int i; + + for (i = 0; i < in_progress_list_len; i++) + if (in_progress_list[i].reloid == relationId) + in_progress_list[i].invalidated = true; + } } /* @@ -2824,11 +2891,11 @@ RelationCacheInvalidateEntry(Oid relationId) * and rebuild those with positive reference counts. Also reset the smgr * relation cache and re-read relation mapping data. * - * This is currently used only to recover from SI message buffer overflow, - * so we do not touch relations having new-in-transaction relfilenodes; they - * cannot be targets of cross-backend SI updates (and our own updates now go - * through a separate linked list that isn't limited by the SI message - * buffer size). + * Apart from debug_discard_caches, this is currently used only to recover + * from SI message buffer overflow, so we do not touch relations having + * new-in-transaction relfilenodes; they cannot be targets of cross-backend + * SI updates (and our own updates now go through a separate linked list + * that isn't limited by the SI message buffer size). * * We do this in two phases: the first pass deletes deletable items, and * the second one rebuilds the rebuildable items. This is essential for @@ -2846,9 +2913,14 @@ RelationCacheInvalidateEntry(Oid relationId) * second pass processes nailed-in-cache items before other nondeletable * items. This should ensure that system catalogs are up to date before * we attempt to use them to reload information about other open relations. + * + * After those two phases of work having immediate effects, we normally + * signal any RelationBuildDesc() on the stack to start over. However, we + * don't do this if called as part of debug_discard_caches. Otherwise, + * RelationBuildDesc() would become an infinite loop. */ void -RelationCacheInvalidate(void) +RelationCacheInvalidate(bool debug_discard) { HASH_SEQ_STATUS status; RelIdCacheEnt *idhentry; @@ -2856,6 +2928,7 @@ RelationCacheInvalidate(void) List *rebuildFirstList = NIL; List *rebuildList = NIL; ListCell *l; + int i; /* * Reload relation mapping data before starting to reconstruct cache. @@ -2942,6 +3015,11 @@ RelationCacheInvalidate(void) RelationClearRelation(relation, true); } list_free(rebuildList); + + if (!debug_discard) + /* Any RelationBuildDesc() on the stack must start over. */ + for (i = 0; i < in_progress_list_len; i++) + in_progress_list[i].invalidated = true; } /* @@ -3092,6 +3170,13 @@ AtEOXact_RelationCache(bool isCommit) RelIdCacheEnt *idhentry; int i; + /* + * Forget in_progress_list. This is relevant when we're aborting due to + * an error during RelationBuildDesc(). + */ + Assert(in_progress_list_len == 0 || !isCommit); + in_progress_list_len = 0; + /* * Unless the eoxact_list[] overflowed, we only need to examine the rels * listed in it. Otherwise fall back on a hash_seq_search scan. @@ -3238,6 +3323,14 @@ AtEOSubXact_RelationCache(bool isCommit, SubTransactionId mySubid, RelIdCacheEnt *idhentry; int i; + /* + * Forget in_progress_list. This is relevant when we're aborting due to + * an error during RelationBuildDesc(). We don't commit subtransactions + * during RelationBuildDesc(). + */ + Assert(in_progress_list_len == 0 || !isCommit); + in_progress_list_len = 0; + /* * Unless the eoxact_list[] overflowed, we only need to examine the rels * listed in it. Otherwise fall back on a hash_seq_search scan. Same @@ -3786,6 +3879,7 @@ void RelationCacheInitialize(void) { HASHCTL ctl; + int allocsize; /* * make sure cache memory context exists @@ -3802,6 +3896,15 @@ RelationCacheInitialize(void) RelationIdCache = hash_create("Relcache by OID", INITRELCACHESIZE, &ctl, HASH_ELEM | HASH_BLOBS); + /* + * reserve enough in_progress_list slots for many cases + */ + allocsize = 4; + in_progress_list = + MemoryContextAlloc(CacheMemoryContext, + allocsize * sizeof(*in_progress_list)); + in_progress_list_maxlen = allocsize; + /* * relation mapper needs to be initialized too */ diff --git a/src/include/utils/inval.h b/src/include/utils/inval.h index bc5081cf72..4c6b86c961 100644 --- a/src/include/utils/inval.h +++ b/src/include/utils/inval.h @@ -61,4 +61,5 @@ extern void CacheRegisterRelcacheCallback(RelcacheCallbackFunction func, extern void CallSyscacheCallbacks(int cacheid, uint32 hashvalue); extern void InvalidateSystemCaches(void); +extern void InvalidateSystemCachesExtended(bool debug_discard); #endif /* INVAL_H */ diff --git a/src/include/utils/relcache.h b/src/include/utils/relcache.h index a432a407b0..ff478c3ba7 100644 --- a/src/include/utils/relcache.h +++ b/src/include/utils/relcache.h @@ -120,7 +120,7 @@ extern void RelationForgetRelation(Oid rid); extern void RelationCacheInvalidateEntry(Oid relationId); -extern void RelationCacheInvalidate(void); +extern void RelationCacheInvalidate(bool debug_discard); extern void RelationCloseSmgrByOid(Oid relationId); diff --git a/src/test/perl/PostgresNode.pm b/src/test/perl/PostgresNode.pm index 52f552be85..cbcb74866b 100644 --- a/src/test/perl/PostgresNode.pm +++ b/src/test/perl/PostgresNode.pm @@ -1638,6 +1638,141 @@ sub interactive_psql return $harness; } +# Common sub of pgbench-invoking interfaces. Makes any requested script files +# and returns pgbench command-line options causing use of those files. +sub _pgbench_make_files +{ + my ($self, $files) = @_; + my @file_opts; + + if (defined $files) + { + + # note: files are ordered for determinism + for my $fn (sort keys %$files) + { + my $filename = $self->basedir . '/' . $fn; + push @file_opts, '-f', $filename; + + # cleanup file weight + $filename =~ s/\@\d+$//; + + #push @filenames, $filename; + # filenames are expected to be unique on a test + if (-e $filename) + { + ok(0, "$filename must not already exist"); + unlink $filename or die "cannot unlink $filename: $!"; + } + TestLib::append_to_file($filename, $$files{$fn}); + } + } + + return @file_opts; +} + +=pod + +=item $node->pgbench($opts, $stat, $out, $err, $name, $files, @args) + +Invoke B, with parameters and files. + +=over + +=item $opts + +Options as a string to be split on spaces. + +=item $stat + +Expected exit status. + +=item $out + +Reference to a regexp list that must match stdout. + +=item $err + +Reference to a regexp list that must match stderr. + +=item $name + +Name of test for error messages. + +=item $files + +Reference to filename/contents dictionary. + +=item @args + +Further raw options or arguments. + +=back + +=cut + +sub pgbench +{ + local $Test::Builder::Level = $Test::Builder::Level + 1; + + my ($self, $opts, $stat, $out, $err, $name, $files, @args) = @_; + my @cmd = ( + 'pgbench', + split(/\s+/, $opts), + $self->_pgbench_make_files($files), @args); + + $self->command_checks_all(\@cmd, $stat, $out, $err, $name); +} + +=pod + +=item $node->background_pgbench($opts, $files, \$stdout, $timer) => harness + +Invoke B and return an IPC::Run harness object. The process's stdin +is empty, and its stdout and stderr go to the $stdout scalar reference. This +allows the caller to act on other parts of the system while B is +running. Errors from B are the caller's problem. + +The specified timer object is attached to the harness, as well. It's caller's +responsibility to select the timeout length, and to restart the timer after +each command if the timeout is per-command. + +Be sure to "finish" the harness when done with it. + +=over + +=item $opts + +Options as a string to be split on spaces. + +=item $files + +Reference to filename/contents dictionary. + +=back + +=cut + +sub background_pgbench +{ + my ($self, $opts, $files, $stdout, $timer) = @_; + + my @cmd = + ('pgbench', split(/\s+/, $opts), $self->_pgbench_make_files($files)); + + local $ENV{PGHOST} = $self->host; + local $ENV{PGPORT} = $self->port; + + my $stdin = ""; + # IPC::Run would otherwise append to existing contents: + $$stdout = "" if ref($stdout); + + my $harness = IPC::Run::start \@cmd, '<', \$stdin, '>', $stdout, '2>&1', + $timer; + + return $harness; +} + =pod =item $node->poll_query_until($dbname, $query [, $expected ]) diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 918d14bcae..50fdb17fa3 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -1044,6 +1044,7 @@ ImportForeignSchemaStmt ImportForeignSchemaType ImportForeignSchema_function ImportQual +InProgressEnt IncludeWal InclusionOpaque IncrementVarSublevelsUp_context