From fc299179dff8521a5ab61c15326278d13d2e069e Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Mon, 10 Jan 2005 21:57:19 +0000
Subject: [PATCH] Separate the functions of relcache entry flush and smgr cache
 entry flush so that we can get the size of a shared inval message back down
 to what it was in 7.4 (and simplify the logic too).  Phase 2 of fixing the
 'SMgrRelation hashtable corrupted' problem.

---
 src/backend/utils/cache/inval.c | 136 ++++++++++++++++++++------------
 src/include/storage/sinval.h    |  42 +++++-----
 2 files changed, 103 insertions(+), 75 deletions(-)

diff --git a/src/backend/utils/cache/inval.c b/src/backend/utils/cache/inval.c
index 8933030a9d..2195458982 100644
--- a/src/backend/utils/cache/inval.c
+++ b/src/backend/utils/cache/inval.c
@@ -53,14 +53,14 @@
  *
  *	Also, whenever we see an operation on a pg_class or pg_attribute tuple,
  *	we register a relcache flush operation for the relation described by that
- *	tuple.
+ *	tuple.  pg_class updates trigger an smgr flush operation as well.
  *
- *	We keep the relcache flush requests in lists separate from the catcache
- *	tuple flush requests.  This allows us to issue all the pending catcache
- *	flushes before we issue relcache flushes, which saves us from loading
- *	a catcache tuple during relcache load only to flush it again right away.
- *	Also, we avoid queuing multiple relcache flush requests for the same
- *	relation, since a relcache flush is relatively expensive to do.
+ *	We keep the relcache and smgr flush requests in lists separate from the
+ *	catcache tuple flush requests.  This allows us to issue all the pending
+ *	catcache flushes before we issue relcache flushes, which saves us from
+ *	loading a catcache tuple during relcache load only to flush it again
+ *	right away.  Also, we avoid queuing multiple relcache flush requests for
+ *	the same relation, since a relcache flush is relatively expensive to do.
  *	(XXX is it worth testing likewise for duplicate catcache flush entries?
  *	Probably not.)
  *
@@ -80,7 +80,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/cache/inval.c,v 1.69 2005/01/10 20:02:23 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/cache/inval.c,v 1.70 2005/01/10 21:57:17 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -115,7 +115,7 @@ typedef struct InvalidationChunk
 typedef struct InvalidationListHeader
 {
 	InvalidationChunk *cclist;	/* list of chunks holding catcache msgs */
-	InvalidationChunk *rclist;	/* list of chunks holding relcache msgs */
+	InvalidationChunk *rclist;	/* list of chunks holding relcache/smgr msgs */
 } InvalidationListHeader;
 
 /*----------------
@@ -164,7 +164,7 @@ static TransInvalidationInfo *transInvalInfo = NULL;
 
 static struct CACHECALLBACK
 {
-	int16		id;				/* cache number or SHAREDINVALRELCACHE_ID */
+	int16		id;				/* cache number or message type id */
 	CacheCallbackFunction function;
 	Datum		arg;
 }	cache_callback_list[MAX_CACHE_CALLBACKS];
@@ -273,7 +273,7 @@ AppendInvalidationMessageList(InvalidationChunk **destHdr,
  *				Invalidation set support functions
  *
  * These routines understand about the division of a logical invalidation
- * list into separate physical lists for catcache and relcache entries.
+ * list into separate physical lists for catcache and relcache/smgr entries.
  * ----------------------------------------------------------------
  */
 
@@ -299,22 +299,42 @@ AddCatcacheInvalidationMessage(InvalidationListHeader *hdr,
  */
 static void
 AddRelcacheInvalidationMessage(InvalidationListHeader *hdr,
-							   Oid dbId, Oid relId, RelFileNode physId)
+							   Oid dbId, Oid relId)
 {
 	SharedInvalidationMessage msg;
 
 	/* Don't add a duplicate item */
 	/* We assume dbId need not be checked because it will never change */
-	/* relfilenode fields must be checked to support reassignment */
 	ProcessMessageList(hdr->rclist,
-					   if (msg->rc.relId == relId &&
-					  RelFileNodeEquals(msg->rc.physId, physId)) return);
+					   if (msg->rc.id == SHAREDINVALRELCACHE_ID &&
+						   msg->rc.relId == relId)
+						   return);
 
 	/* OK, add the item */
 	msg.rc.id = SHAREDINVALRELCACHE_ID;
 	msg.rc.dbId = dbId;
 	msg.rc.relId = relId;
-	msg.rc.physId = physId;
+	AddInvalidationMessage(&hdr->rclist, &msg);
+}
+
+/*
+ * Add an smgr inval entry
+ */
+static void
+AddSmgrInvalidationMessage(InvalidationListHeader *hdr,
+						   RelFileNode rnode)
+{
+	SharedInvalidationMessage msg;
+
+	/* Don't add a duplicate item */
+	ProcessMessageList(hdr->rclist,
+					   if (msg->sm.id == SHAREDINVALSMGR_ID &&
+						   RelFileNodeEquals(msg->sm.rnode, rnode))
+						   return);
+
+	/* OK, add the item */
+	msg.sm.id = SHAREDINVALSMGR_ID;
+	msg.sm.rnode = rnode;
 	AddInvalidationMessage(&hdr->rclist, &msg);
 }
 
@@ -370,10 +390,10 @@ RegisterCatcacheInvalidation(int cacheId,
  * As above, but register a relcache invalidation event.
  */
 static void
-RegisterRelcacheInvalidation(Oid dbId, Oid relId, RelFileNode physId)
+RegisterRelcacheInvalidation(Oid dbId, Oid relId)
 {
 	AddRelcacheInvalidationMessage(&transInvalInfo->CurrentCmdInvalidMsgs,
-								   dbId, relId, physId);
+								   dbId, relId);
 
 	/*
 	 * If the relation being invalidated is one of those cached in the
@@ -383,10 +403,22 @@ RegisterRelcacheInvalidation(Oid dbId, Oid relId, RelFileNode physId)
 		transInvalInfo->RelcacheInitFileInval = true;
 }
 
+/*
+ * RegisterSmgrInvalidation
+ *
+ * As above, but register an smgr invalidation event.
+ */
+static void
+RegisterSmgrInvalidation(RelFileNode rnode)
+{
+	AddSmgrInvalidationMessage(&transInvalInfo->CurrentCmdInvalidMsgs,
+							   rnode);
+}
+
 /*
  * LocalExecuteInvalidationMessage
  *
- * Process a single invalidation message (which could be either type).
+ * Process a single invalidation message (which could be of any type).
  * Only the local caches are flushed; this does not transmit the message
  * to other backends.
  */
@@ -426,17 +458,14 @@ LocalExecuteInvalidationMessage(SharedInvalidationMessage *msg)
 					(*ccitem->function) (ccitem->arg, msg->rc.relId);
 			}
 		}
+	}
+	else if (msg->id == SHAREDINVALSMGR_ID)
+	{
 		/*
-		 * If the message includes a valid relfilenode, we must ensure
-		 * the smgr cache entry gets zapped.  This might not have happened
-		 * above since the relcache entry might not have existed or might
-		 * have been associated with a different relfilenode.
-		 *
-		 * XXX there is no real good reason for rnode inval to be in the
-		 * same message at all.  FIXME in 8.1.
+		 * We could have smgr entries for relations of other databases,
+		 * so no short-circuit test is possible here.
 		 */
-		if (OidIsValid(msg->rc.physId.relNode))
-			smgrclosenode(msg->rc.physId);
+		smgrclosenode(msg->sm.rnode);
 	}
 	else
 		elog(FATAL, "unrecognized SI message id: %d", msg->id);
@@ -475,16 +504,11 @@ InvalidateSystemCaches(void)
  *		of catalog/relation cache entries; if so, register inval events.
  */
 static void
-PrepareForTupleInvalidation(Relation relation, HeapTuple tuple,
-							void (*CacheIdRegisterFunc) (int, uint32,
-													   ItemPointer, Oid),
-							void (*RelationIdRegisterFunc) (Oid, Oid,
-															RelFileNode))
+PrepareForTupleInvalidation(Relation relation, HeapTuple tuple)
 {
 	Oid			tupleRelId;
 	Oid			databaseId;
 	Oid			relationId;
-	RelFileNode rnode;
 
 	/* Do nothing during bootstrap */
 	if (IsBootstrapProcessingMode())
@@ -510,7 +534,7 @@ PrepareForTupleInvalidation(Relation relation, HeapTuple tuple,
 	 * First let the catcache do its thing
 	 */
 	PrepareToInvalidateCacheTuple(relation, tuple,
-								  CacheIdRegisterFunc);
+								  RegisterCatcacheInvalidation);
 
 	/*
 	 * Now, is this tuple one of the primary definers of a relcache entry?
@@ -520,27 +544,36 @@ PrepareForTupleInvalidation(Relation relation, HeapTuple tuple,
 	if (tupleRelId == RelOid_pg_class)
 	{
 		Form_pg_class classtup = (Form_pg_class) GETSTRUCT(tuple);
+		RelFileNode rnode;
 
 		relationId = HeapTupleGetOid(tuple);
 		if (classtup->relisshared)
 			databaseId = InvalidOid;
 		else
 			databaseId = MyDatabaseId;
-		if (classtup->reltablespace)
-			rnode.spcNode = classtup->reltablespace;
-		else
-			rnode.spcNode = MyDatabaseTableSpace;
-		rnode.dbNode = databaseId;
-		rnode.relNode = classtup->relfilenode;
 
 		/*
+		 * We need to send out an smgr inval as well as a relcache inval.
+		 * This is needed because other backends might possibly possess
+		 * smgr cache but not relcache entries for the target relation.
+		 *
 		 * Note: during a pg_class row update that assigns a new
 		 * relfilenode or reltablespace value, we will be called on both
 		 * the old and new tuples, and thus will broadcast invalidation
 		 * messages showing both the old and new RelFileNode values.  This
 		 * ensures that other backends will close smgr references to the
 		 * old file.
+		 *
+		 * XXX possible future cleanup: it might be better to trigger smgr
+		 * flushes explicitly, rather than indirectly from pg_class updates.
 		 */
+		if (classtup->reltablespace)
+			rnode.spcNode = classtup->reltablespace;
+		else
+			rnode.spcNode = MyDatabaseTableSpace;
+		rnode.dbNode = databaseId;
+		rnode.relNode = classtup->relfilenode;
+		RegisterSmgrInvalidation(rnode);
 	}
 	else if (tupleRelId == RelOid_pg_attribute)
 	{
@@ -558,10 +591,6 @@ PrepareForTupleInvalidation(Relation relation, HeapTuple tuple,
 		 * though.
 		 */
 		databaseId = MyDatabaseId;
-		/* We assume no smgr cache flush is needed, either */
-		rnode.spcNode = InvalidOid;
-		rnode.dbNode = InvalidOid;
-		rnode.relNode = InvalidOid;
 	}
 	else
 		return;
@@ -569,7 +598,7 @@ PrepareForTupleInvalidation(Relation relation, HeapTuple tuple,
 	/*
 	 * Yes.  We need to register a relcache invalidation event.
 	 */
-	(*RelationIdRegisterFunc) (databaseId, relationId, rnode);
+	RegisterRelcacheInvalidation(databaseId, relationId);
 }
 
 
@@ -790,9 +819,7 @@ CommandEndInvalidationMessages(void)
 void
 CacheInvalidateHeapTuple(Relation relation, HeapTuple tuple)
 {
-	PrepareForTupleInvalidation(relation, tuple,
-								RegisterCatcacheInvalidation,
-								RegisterRelcacheInvalidation);
+	PrepareForTupleInvalidation(relation, tuple);
 }
 
 /*
@@ -803,7 +830,10 @@ CacheInvalidateHeapTuple(Relation relation, HeapTuple tuple)
  * This is used in places that need to force relcache rebuild but aren't
  * changing any of the tuples recognized as contributors to the relcache
  * entry by PrepareForTupleInvalidation.  (An example is dropping an index.)
- * We assume in particular that relfilenode isn't changing.
+ * We assume in particular that relfilenode/reltablespace aren't changing
+ * (so the rd_node value is still good).
+ *
+ * XXX most callers of this probably don't need to force an smgr flush.
  */
 void
 CacheInvalidateRelcache(Relation relation)
@@ -817,7 +847,8 @@ CacheInvalidateRelcache(Relation relation)
 	else
 		databaseId = MyDatabaseId;
 
-	RegisterRelcacheInvalidation(databaseId, relationId, relation->rd_node);
+	RegisterRelcacheInvalidation(databaseId, relationId);
+	RegisterSmgrInvalidation(relation->rd_node);
 }
 
 /*
@@ -844,7 +875,8 @@ CacheInvalidateRelcacheByTuple(HeapTuple classTuple)
 	rnode.dbNode = databaseId;
 	rnode.relNode = classtup->relfilenode;
 
-	RegisterRelcacheInvalidation(databaseId, relationId, rnode);
+	RegisterRelcacheInvalidation(databaseId, relationId);
+	RegisterSmgrInvalidation(rnode);
 }
 
 /*
diff --git a/src/include/storage/sinval.h b/src/include/storage/sinval.h
index 0fce746614..092b722048 100644
--- a/src/include/storage/sinval.h
+++ b/src/include/storage/sinval.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/storage/sinval.h,v 1.39 2004/12/31 22:03:42 pgsql Exp $
+ * $PostgreSQL: pgsql/src/include/storage/sinval.h,v 1.40 2005/01/10 21:57:19 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -20,22 +20,16 @@
 
 
 /*
- * We currently support two types of shared-invalidation messages: one that
- * invalidates an entry in a catcache, and one that invalidates a relcache
- * entry.  More types could be added if needed.  The message type is
- * identified by the first "int16" field of the message struct.  Zero or
- * positive means a catcache inval message (and also serves as the catcache
- * ID field).  -1 means a relcache inval message.  Other negative values
- * are available to identify other inval message types.
+ * We currently support three types of shared-invalidation messages: one that
+ * invalidates an entry in a catcache, one that invalidates a relcache entry,
+ * and one that invalidates an smgr cache entry.  More types could be added
+ * if needed.  The message type is identified by the first "int16" field of
+ * the message struct.  Zero or positive means a catcache inval message (and
+ * also serves as the catcache ID field).  -1 means a relcache inval message.
+ * -2 means an smgr inval message.  Other negative values are available to
+ * identify other inval message types.
  *
- * Relcache invalidation messages usually also cause invalidation of entries
- * in the smgr's relation cache.  This means they must carry both logical
- * and physical relation ID info (ie, both dbOID/relOID and RelFileNode).
- * In some cases RelFileNode information is not available so the sender fills
- * those fields with zeroes --- this is okay so long as no smgr cache flush
- * is required.
- *
- * Shared-inval events are initially driven by detecting tuple inserts,
+ * Catcache inval events are initially driven by detecting tuple inserts,
  * updates and deletions in system catalogs (see CacheInvalidateHeapTuple).
  * An update generates two inval events, one for the old tuple and one for
  * the new --- this is needed to get rid of both positive entries for the
@@ -71,20 +65,22 @@ typedef struct
 	int16		id;				/* type field --- must be first */
 	Oid			dbId;			/* database ID, or 0 if a shared relation */
 	Oid			relId;			/* relation ID */
-	RelFileNode physId;			/* physical file ID */
-
-	/*
-	 * Note: it is likely that RelFileNode will someday be changed to
-	 * include database ID.  In that case the dbId field will be redundant
-	 * and should be removed to save space.
-	 */
 } SharedInvalRelcacheMsg;
 
+#define SHAREDINVALSMGR_ID		(-2)
+
+typedef struct
+{
+	int16		id;				/* type field --- must be first */
+	RelFileNode rnode;			/* physical file ID */
+} SharedInvalSmgrMsg;
+
 typedef union
 {
 	int16		id;				/* type field --- must be first */
 	SharedInvalCatcacheMsg cc;
 	SharedInvalRelcacheMsg rc;
+	SharedInvalSmgrMsg sm;
 } SharedInvalidationMessage;