Avoid looping over all type cache entries in TypeCacheRelCallback()

Currently, when a single relcache entry gets invalidated,
TypeCacheRelCallback() has to loop over all type cache entries to find
appropriate typentry to invalidate.  Unfortunately, using the syscache here
is impossible, because this callback could be called outside a transaction
and this makes impossible catalog lookups.  This is why present commit
introduces RelIdToTypeIdCacheHash to map relation OID to its composite type
OID.

We are keeping RelIdToTypeIdCacheHash entry while corresponding type cache
entry have something to clean.  Therefore, RelIdToTypeIdCacheHash shouldn't
get bloat in the case of temporary tables flood.

There are many places in lookup_type_cache() where syscache invalidation,
user interruption, or even error could occur.  In order to handle this, we
keep an array of in-progress type cache entries.  In the case of
lookup_type_cache() interruption this array is processed to keep
RelIdToTypeIdCacheHash in a consistent state.

Discussion: https://postgr.es/m/5812a6e5-68ae-4d84-9d85-b443176966a1%40sigaev.ru
Author: Teodor Sigaev
Reviewed-by: Aleksander Alekseev, Tom Lane, Michael Paquier, Roman Zharkov
Reviewed-by: Andrei Lepikhov, Pavel Borisov, Jian He, Alexander Lakhin
Reviewed-by: Artur Zakirov
This commit is contained in:
Alexander Korotkov 2024-10-24 14:35:52 +03:00
parent c1500a1ba7
commit b85a9d046e
11 changed files with 433 additions and 46 deletions

View File

@ -70,6 +70,7 @@
#include "utils/snapmgr.h"
#include "utils/timeout.h"
#include "utils/timestamp.h"
#include "utils/typcache.h"
/*
* User-tweakable parameters
@ -2407,6 +2408,9 @@ CommitTransaction(void)
/* Clean up the relation cache */
AtEOXact_RelationCache(true);
/* Clean up the type cache */
AtEOXact_TypeCache();
/*
* Make catalog changes visible to all backends. This has to happen after
* relcache references are dropped (see comments for
@ -2709,6 +2713,9 @@ PrepareTransaction(void)
/* Clean up the relation cache */
AtEOXact_RelationCache(true);
/* Clean up the type cache */
AtEOXact_TypeCache();
/* notify doesn't need a postprepare call */
PostPrepare_PgStat();
@ -2951,6 +2958,7 @@ AbortTransaction(void)
false, true);
AtEOXact_Buffers(false);
AtEOXact_RelationCache(false);
AtEOXact_TypeCache();
AtEOXact_Inval(false);
AtEOXact_MultiXact();
ResourceOwnerRelease(TopTransactionResourceOwner,
@ -5153,6 +5161,7 @@ CommitSubTransaction(void)
true, false);
AtEOSubXact_RelationCache(true, s->subTransactionId,
s->parent->subTransactionId);
AtEOSubXact_TypeCache();
AtEOSubXact_Inval(true);
AtSubCommit_smgr();
@ -5328,6 +5337,7 @@ AbortSubTransaction(void)
AtEOSubXact_RelationCache(false, s->subTransactionId,
s->parent->subTransactionId);
AtEOSubXact_TypeCache();
AtEOSubXact_Inval(false);
ResourceOwnerRelease(s->curTransactionOwner,
RESOURCE_RELEASE_LOCKS,

View File

@ -66,6 +66,7 @@
#include "utils/builtins.h"
#include "utils/catcache.h"
#include "utils/fmgroids.h"
#include "utils/injection_point.h"
#include "utils/inval.h"
#include "utils/lsyscache.h"
#include "utils/memutils.h"
@ -77,6 +78,20 @@
/* The main type cache hashtable searched by lookup_type_cache */
static HTAB *TypeCacheHash = NULL;
/*
* The mapping of relation's OID to the corresponding composite type OID.
* We're keeping the map entry when the corresponding typentry has something
* to clear i.e it has either TCFLAGS_HAVE_PG_TYPE_DATA, or
* TCFLAGS_OPERATOR_FLAGS, or tupdesc.
*/
static HTAB *RelIdToTypeIdCacheHash = NULL;
typedef struct RelIdToTypeIdCacheEntry
{
Oid relid; /* OID of the relation */
Oid composite_typid; /* OID of the relation's composite type */
} RelIdToTypeIdCacheEntry;
/* List of type cache entries for domain types */
static TypeCacheEntry *firstDomainTypeEntry = NULL;
@ -208,6 +223,10 @@ typedef struct SharedTypmodTableEntry
dsa_pointer shared_tupdesc;
} SharedTypmodTableEntry;
static Oid *in_progress_list;
static int in_progress_list_len;
static int in_progress_list_maxlen;
/*
* A comparator function for SharedRecordTableKey.
*/
@ -329,6 +348,8 @@ static void shared_record_typmod_registry_detach(dsm_segment *segment,
static TupleDesc find_or_make_matching_shared_tupledesc(TupleDesc tupdesc);
static dsa_pointer share_tupledesc(dsa_area *area, TupleDesc tupdesc,
uint32 typmod);
static void insert_rel_type_cache_if_needed(TypeCacheEntry *typentry);
static void delete_rel_type_cache_if_needed(TypeCacheEntry *typentry);
/*
@ -366,11 +387,13 @@ lookup_type_cache(Oid type_id, int flags)
{
TypeCacheEntry *typentry;
bool found;
int in_progress_offset;
if (TypeCacheHash == NULL)
{
/* First time through: initialize the hash table */
HASHCTL ctl;
int allocsize;
ctl.keysize = sizeof(Oid);
ctl.entrysize = sizeof(TypeCacheEntry);
@ -385,6 +408,13 @@ lookup_type_cache(Oid type_id, int flags)
TypeCacheHash = hash_create("Type information cache", 64,
&ctl, HASH_ELEM | HASH_FUNCTION);
Assert(RelIdToTypeIdCacheHash == NULL);
ctl.keysize = sizeof(Oid);
ctl.entrysize = sizeof(RelIdToTypeIdCacheEntry);
RelIdToTypeIdCacheHash = hash_create("Map from relid to OID of cached composite type", 64,
&ctl, HASH_ELEM | HASH_BLOBS);
/* Also set up callbacks for SI invalidations */
CacheRegisterRelcacheCallback(TypeCacheRelCallback, (Datum) 0);
CacheRegisterSyscacheCallback(TYPEOID, TypeCacheTypCallback, (Datum) 0);
@ -394,8 +424,32 @@ lookup_type_cache(Oid type_id, int flags)
/* Also make sure CacheMemoryContext exists */
if (!CacheMemoryContext)
CreateCacheMemoryContext();
/*
* reserve enough in_progress_list slots for many cases
*/
allocsize = 4;
in_progress_list =
MemoryContextAlloc(CacheMemoryContext,
allocsize * sizeof(*in_progress_list));
in_progress_list_maxlen = allocsize;
}
Assert(TypeCacheHash != NULL && RelIdToTypeIdCacheHash != NULL);
/* Register to catch invalidation messages */
if (in_progress_list_len >= in_progress_list_maxlen)
{
int allocsize;
allocsize = in_progress_list_maxlen * 2;
in_progress_list = repalloc(in_progress_list,
allocsize * sizeof(*in_progress_list));
in_progress_list_maxlen = allocsize;
}
in_progress_offset = in_progress_list_len++;
in_progress_list[in_progress_offset] = type_id;
/* Try to look up an existing entry */
typentry = (TypeCacheEntry *) hash_search(TypeCacheHash,
&type_id,
@ -896,6 +950,13 @@ lookup_type_cache(Oid type_id, int flags)
load_domaintype_info(typentry);
}
INJECTION_POINT("typecache-before-rel-type-cache-insert");
Assert(in_progress_offset + 1 == in_progress_list_len);
in_progress_list_len--;
insert_rel_type_cache_if_needed(typentry);
return typentry;
}
@ -2290,6 +2351,53 @@ SharedRecordTypmodRegistryAttach(SharedRecordTypmodRegistry *registry)
CurrentSession->shared_typmod_table = typmod_table;
}
/*
* InvalidateCompositeTypeCacheEntry
* Invalidate particular TypeCacheEntry on Relcache inval callback
*
* Delete the cached tuple descriptor (if any) for the given composite
* type, and reset whatever info we have cached about the composite type's
* comparability.
*/
static void
InvalidateCompositeTypeCacheEntry(TypeCacheEntry *typentry)
{
bool hadTupDescOrOpclass;
Assert(typentry->typtype == TYPTYPE_COMPOSITE &&
OidIsValid(typentry->typrelid));
hadTupDescOrOpclass = (typentry->tupDesc != NULL) ||
(typentry->flags & TCFLAGS_OPERATOR_FLAGS);
/* Delete tupdesc if we have it */
if (typentry->tupDesc != NULL)
{
/*
* Release our refcount and free the tupdesc if none remain. We can't
* use DecrTupleDescRefCount here because this reference is not logged
* by the current resource owner.
*/
Assert(typentry->tupDesc->tdrefcount > 0);
if (--typentry->tupDesc->tdrefcount == 0)
FreeTupleDesc(typentry->tupDesc);
typentry->tupDesc = NULL;
/*
* Also clear tupDesc_identifier, so that anyone watching it will
* realize that the tupdesc has changed.
*/
typentry->tupDesc_identifier = 0;
}
/* Reset equality/comparison/hashing validity information */
typentry->flags &= ~TCFLAGS_OPERATOR_FLAGS;
/* Call delete_rel_type_cache() if we actually cleared something */
if (hadTupDescOrOpclass)
delete_rel_type_cache_if_needed(typentry);
}
/*
* TypeCacheRelCallback
* Relcache inval callback function
@ -2299,63 +2407,55 @@ SharedRecordTypmodRegistryAttach(SharedRecordTypmodRegistry *registry)
* whatever info we have cached about the composite type's comparability.
*
* This is called when a relcache invalidation event occurs for the given
* relid. We must scan the whole typcache hash since we don't know the
* type OID corresponding to the relid. We could do a direct search if this
* were a syscache-flush callback on pg_type, but then we would need all
* ALTER-TABLE-like commands that could modify a rowtype to issue syscache
* invals against the rel's pg_type OID. The extra SI signaling could very
* well cost more than we'd save, since in most usages there are not very
* many entries in a backend's typcache. The risk of bugs-of-omission seems
* high, too.
*
* Another possibility, with only localized impact, is to maintain a second
* hashtable that indexes composite-type typcache entries by their typrelid.
* But it's still not clear it's worth the trouble.
* relid. We can't use syscache to find a type corresponding to the given
* relation because the code can be called outside of transaction. Thus, we
* use the RelIdToTypeIdCacheHash map to locate appropriate typcache entry.
*/
static void
TypeCacheRelCallback(Datum arg, Oid relid)
{
HASH_SEQ_STATUS status;
TypeCacheEntry *typentry;
/* TypeCacheHash must exist, else this callback wouldn't be registered */
hash_seq_init(&status, TypeCacheHash);
while ((typentry = (TypeCacheEntry *) hash_seq_search(&status)) != NULL)
/*
* RelIdToTypeIdCacheHash and TypeCacheHash should exist, otherwise this
* callback wouldn't be registered
*/
if (OidIsValid(relid))
{
if (typentry->typtype == TYPTYPE_COMPOSITE)
RelIdToTypeIdCacheEntry *relentry;
/*
* Find an RelIdToTypeIdCacheHash entry, which should exist as soon as
* corresponding typcache entry has something to clean.
*/
relentry = (RelIdToTypeIdCacheEntry *) hash_search(RelIdToTypeIdCacheHash,
&relid,
HASH_FIND, NULL);
if (relentry != NULL)
{
/* Skip if no match, unless we're zapping all composite types */
if (relid != typentry->typrelid && relid != InvalidOid)
continue;
typentry = (TypeCacheEntry *) hash_search(TypeCacheHash,
&relentry->composite_typid,
HASH_FIND, NULL);
/* Delete tupdesc if we have it */
if (typentry->tupDesc != NULL)
if (typentry != NULL)
{
/*
* Release our refcount, and free the tupdesc if none remain.
* (Can't use DecrTupleDescRefCount because this reference is
* not logged in current resource owner.)
*/
Assert(typentry->tupDesc->tdrefcount > 0);
if (--typentry->tupDesc->tdrefcount == 0)
FreeTupleDesc(typentry->tupDesc);
typentry->tupDesc = NULL;
Assert(typentry->typtype == TYPTYPE_COMPOSITE);
Assert(relid == typentry->typrelid);
/*
* Also clear tupDesc_identifier, so that anything watching
* that will realize that the tupdesc has possibly changed.
* (Alternatively, we could specify that to detect possible
* tupdesc change, one must check for tupDesc != NULL as well
* as tupDesc_identifier being the same as what was previously
* seen. That seems error-prone.)
*/
typentry->tupDesc_identifier = 0;
InvalidateCompositeTypeCacheEntry(typentry);
}
/* Reset equality/comparison/hashing validity information */
typentry->flags &= ~TCFLAGS_OPERATOR_FLAGS;
}
else if (typentry->typtype == TYPTYPE_DOMAIN)
/*
* Visit all the domain types sequentially. Typically, this shouldn't
* affect performance since domain types are less tended to bloat.
* Domain types are created manually, unlike composite types which are
* automatically created for every temporary table.
*/
for (typentry = firstDomainTypeEntry;
typentry != NULL;
typentry = typentry->nextDomain)
{
/*
* If it's domain over composite, reset flags. (We don't bother
@ -2367,6 +2467,36 @@ TypeCacheRelCallback(Datum arg, Oid relid)
typentry->flags &= ~TCFLAGS_OPERATOR_FLAGS;
}
}
else
{
HASH_SEQ_STATUS status;
/*
* Relid is invalid. By convention, we need to reset all composite
* types in cache. Also, we should reset flags for domain types, and
* we loop over all entries in hash, so, do it in a single scan.
*/
hash_seq_init(&status, TypeCacheHash);
while ((typentry = (TypeCacheEntry *) hash_seq_search(&status)) != NULL)
{
if (typentry->typtype == TYPTYPE_COMPOSITE)
{
InvalidateCompositeTypeCacheEntry(typentry);
}
else if (typentry->typtype == TYPTYPE_DOMAIN)
{
/*
* If it's domain over composite, reset flags. (We don't
* bother trying to determine whether the specific base type
* needs a reset.) Note that if we haven't determined whether
* the base type is composite, we don't need to reset
* anything.
*/
if (typentry->flags & TCFLAGS_DOMAIN_BASE_IS_COMPOSITE)
typentry->flags &= ~TCFLAGS_OPERATOR_FLAGS;
}
}
}
}
/*
@ -2397,6 +2527,8 @@ TypeCacheTypCallback(Datum arg, int cacheid, uint32 hashvalue)
while ((typentry = (TypeCacheEntry *) hash_seq_search(&status)) != NULL)
{
bool hadPgTypeData = (typentry->flags & TCFLAGS_HAVE_PG_TYPE_DATA);
Assert(hashvalue == 0 || typentry->type_id_hash == hashvalue);
/*
@ -2406,6 +2538,13 @@ TypeCacheTypCallback(Datum arg, int cacheid, uint32 hashvalue)
*/
typentry->flags &= ~(TCFLAGS_HAVE_PG_TYPE_DATA |
TCFLAGS_CHECKED_DOMAIN_CONSTRAINTS);
/*
* Call delete_rel_type_cache() if we cleaned
* TCFLAGS_HAVE_PG_TYPE_DATA flag previously.
*/
if (hadPgTypeData)
delete_rel_type_cache_if_needed(typentry);
}
}
@ -2914,3 +3053,135 @@ shared_record_typmod_registry_detach(dsm_segment *segment, Datum datum)
}
CurrentSession->shared_typmod_registry = NULL;
}
/*
* Insert RelIdToTypeIdCacheHash entry if needed.
*/
static void
insert_rel_type_cache_if_needed(TypeCacheEntry *typentry)
{
/* Immediately quit for non-composite types */
if (typentry->typtype != TYPTYPE_COMPOSITE)
return;
/* typrelid should be given for composite types */
Assert(OidIsValid(typentry->typrelid));
/*
* Insert a RelIdToTypeIdCacheHash entry if the typentry have any
* information indicating it should be here.
*/
if ((typentry->flags & TCFLAGS_HAVE_PG_TYPE_DATA) ||
(typentry->flags & TCFLAGS_OPERATOR_FLAGS) ||
typentry->tupDesc != NULL)
{
RelIdToTypeIdCacheEntry *relentry;
bool found;
relentry = (RelIdToTypeIdCacheEntry *) hash_search(RelIdToTypeIdCacheHash,
&typentry->typrelid,
HASH_ENTER, &found);
relentry->relid = typentry->typrelid;
relentry->composite_typid = typentry->type_id;
}
}
/*
* Delete entry RelIdToTypeIdCacheHash if needed after resetting of the
* TCFLAGS_HAVE_PG_TYPE_DATA flag, or any of TCFLAGS_OPERATOR_FLAGS,
* or tupDesc.
*/
static void
delete_rel_type_cache_if_needed(TypeCacheEntry *typentry)
{
#ifdef USE_ASSERT_CHECKING
int i;
bool is_in_progress = false;
for (i = 0; i < in_progress_list_len; i++)
{
if (in_progress_list[i] == typentry->type_id)
{
is_in_progress = true;
break;
}
}
#endif
/* Immediately quit for non-composite types */
if (typentry->typtype != TYPTYPE_COMPOSITE)
return;
/* typrelid should be given for composite types */
Assert(OidIsValid(typentry->typrelid));
/*
* Delete a RelIdToTypeIdCacheHash entry if the typentry doesn't have any
* information indicating entry should be still there.
*/
if (!(typentry->flags & TCFLAGS_HAVE_PG_TYPE_DATA) &&
!(typentry->flags & TCFLAGS_OPERATOR_FLAGS) &&
typentry->tupDesc == NULL)
{
bool found;
(void) hash_search(RelIdToTypeIdCacheHash,
&typentry->typrelid,
HASH_REMOVE, &found);
Assert(found || is_in_progress);
}
else
{
#ifdef USE_ASSERT_CHECKING
/*
* In assert-enabled builds otherwise check for RelIdToTypeIdCacheHash
* entry if it should exist.
*/
bool found;
if (!is_in_progress)
{
(void) hash_search(RelIdToTypeIdCacheHash,
&typentry->typrelid,
HASH_FIND, &found);
Assert(found);
}
#endif
}
}
/*
* Add possibly missing RelIdToTypeId entries related to TypeCacheHash
* entries, marked as in-progress by lookup_type_cache(). It may happen
* in case of an error or interruption during the lookup_type_cache() call.
*/
static void
finalize_in_progress_typentries(void)
{
int i;
for (i = 0; i < in_progress_list_len; i++)
{
TypeCacheEntry *typentry;
typentry = (TypeCacheEntry *) hash_search(TypeCacheHash,
&in_progress_list[i],
HASH_FIND, NULL);
if (typentry)
insert_rel_type_cache_if_needed(typentry);
}
in_progress_list_len = 0;
}
void
AtEOXact_TypeCache(void)
{
finalize_in_progress_typentries();
}
void
AtEOSubXact_TypeCache(void)
{
finalize_in_progress_typentries();
}

View File

@ -207,4 +207,8 @@ extern void SharedRecordTypmodRegistryInit(SharedRecordTypmodRegistry *,
extern void SharedRecordTypmodRegistryAttach(SharedRecordTypmodRegistry *);
extern void AtEOXact_TypeCache(void);
extern void AtEOSubXact_TypeCache(void);
#endif /* TYPCACHE_H */

View File

@ -43,9 +43,9 @@ SUBDIRS = \
ifeq ($(enable_injection_points),yes)
SUBDIRS += injection_points gin
SUBDIRS += injection_points gin typcache
else
ALWAYS_SUBDIRS += injection_points gin
ALWAYS_SUBDIRS += injection_points gin typcache
endif
ifeq ($(with_ssl),openssl)

View File

@ -36,6 +36,7 @@ subdir('test_rls_hooks')
subdir('test_shm_mq')
subdir('test_slru')
subdir('test_tidstore')
subdir('typcache')
subdir('unsafe_tests')
subdir('worker_spi')
subdir('xid_wraparound')

4
src/test/modules/typcache/.gitignore vendored Normal file
View File

@ -0,0 +1,4 @@
# Generated subdirectories
/log/
/results/
/tmp_check/

View File

@ -0,0 +1,28 @@
# src/test/modules/typcache/Makefile
EXTRA_INSTALL = src/test/modules/injection_points
REGRESS = typcache_rel_type_cache
ifdef USE_PGXS
PG_CONFIG = pg_config
PGXS := $(shell $(PG_CONFIG) --pgxs)
include $(PGXS)
else
subdir = src/test/modules/typcache
top_builddir = ../../../..
include $(top_builddir)/src/Makefile.global
# XXX: This test is conditional on enable_injection_points in the
# parent Makefile, so we should never get here in the first place if
# injection points are not enabled. But the buildfarm 'misc-check'
# step doesn't pay attention to the if-condition in the parent
# Makefile. To work around that, disable running the test here too.
ifeq ($(enable_injection_points),yes)
include $(top_srcdir)/contrib/contrib-global.mk
else
check:
@echo "injection points are disabled in this build"
endif
endif

View File

@ -0,0 +1,34 @@
--
-- This test checks that lookup_type_cache() can correctly handle an
-- interruption. We use the injection point to simulate an error but note
-- that a similar situation could happen due to user query interruption.
-- Despite the interruption, a map entry from the relation oid to type cache
-- entry should be created. This is validated by subsequent modification of
-- the table schema, then type casts which use new schema implying
-- successful type cache invalidation by relation oid.
--
CREATE EXTENSION injection_points;
CREATE TABLE t (i int);
SELECT injection_points_attach('typecache-before-rel-type-cache-insert', 'error');
injection_points_attach
-------------------------
(1 row)
SELECT '(1)'::t;
ERROR: error triggered for injection point typecache-before-rel-type-cache-insert
LINE 1: SELECT '(1)'::t;
^
SELECT injection_points_detach('typecache-before-rel-type-cache-insert');
injection_points_detach
-------------------------
(1 row)
ALTER TABLE t ADD COLUMN j int;
SELECT '(1,2)'::t;
t
-------
(1,2)
(1 row)

View File

@ -0,0 +1,16 @@
# Copyright (c) 2022-2024, PostgreSQL Global Development Group
if not get_option('injection_points')
subdir_done()
endif
tests += {
'name': 'typcache',
'sd': meson.current_source_dir(),
'bd': meson.current_build_dir(),
'regress': {
'sql': [
'typcache_rel_type_cache',
],
},
}

View File

@ -0,0 +1,18 @@
--
-- This test checks that lookup_type_cache() can correctly handle an
-- interruption. We use the injection point to simulate an error but note
-- that a similar situation could happen due to user query interruption.
-- Despite the interruption, a map entry from the relation oid to type cache
-- entry should be created. This is validated by subsequent modification of
-- the table schema, then type casts which use new schema implying
-- successful type cache invalidation by relation oid.
--
CREATE EXTENSION injection_points;
CREATE TABLE t (i int);
SELECT injection_points_attach('typecache-before-rel-type-cache-insert', 'error');
SELECT '(1)'::t;
SELECT injection_points_detach('typecache-before-rel-type-cache-insert');
ALTER TABLE t ADD COLUMN j int;
SELECT '(1,2)'::t;

View File

@ -2378,6 +2378,7 @@ RelFileLocator
RelFileLocatorBackend
RelFileNumber
RelIdCacheEnt
RelIdToTypeIdCacheEntry
RelInfo
RelInfoArr
RelMapFile