diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c
index d47dae0c41..d705d7a5c1 100644
--- a/src/backend/access/transam/xact.c
+++ b/src/backend/access/transam/xact.c
@@ -10,7 +10,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/access/transam/xact.c,v 1.239 2007/04/03 16:34:35 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/access/transam/xact.c,v 1.240 2007/04/26 23:24:44 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -1631,6 +1631,7 @@ CommitTransaction(void)
 	/* smgrcommit already done */
 	AtEOXact_Files();
 	AtEOXact_ComboCid();
+	AtEOXact_HashTables(true);
 	pgstat_clear_snapshot();
 	pgstat_count_xact_commit();
 	pgstat_report_txn_timestamp(0);
@@ -1849,6 +1850,7 @@ PrepareTransaction(void)
 	/* smgrcommit already done */
 	AtEOXact_Files();
 	AtEOXact_ComboCid();
+	AtEOXact_HashTables(true);
 	pgstat_clear_snapshot();
 
 	CurrentResourceOwner = NULL;
@@ -2003,6 +2005,7 @@ AbortTransaction(void)
 	smgrabort();
 	AtEOXact_Files();
 	AtEOXact_ComboCid();
+	AtEOXact_HashTables(false);
 	pgstat_clear_snapshot();
 	pgstat_count_xact_rollback();
 	pgstat_report_txn_timestamp(0);
@@ -3716,6 +3719,7 @@ CommitSubTransaction(void)
 						  s->parent->subTransactionId);
 	AtEOSubXact_Files(true, s->subTransactionId,
 					  s->parent->subTransactionId);
+	AtEOSubXact_HashTables(true, s->nestingLevel);
 
 	/*
 	 * We need to restore the upper transaction's read-only state, in case the
@@ -3827,6 +3831,7 @@ AbortSubTransaction(void)
 							  s->parent->subTransactionId);
 		AtEOSubXact_Files(false, s->subTransactionId,
 						  s->parent->subTransactionId);
+		AtEOSubXact_HashTables(false, s->nestingLevel);
 	}
 
 	/*
diff --git a/src/backend/commands/prepare.c b/src/backend/commands/prepare.c
index e7976ebb6e..e9b953f709 100644
--- a/src/backend/commands/prepare.c
+++ b/src/backend/commands/prepare.c
@@ -10,7 +10,7 @@
  * Copyright (c) 2002-2007, PostgreSQL Global Development Group
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/commands/prepare.c,v 1.73 2007/04/16 18:21:07 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/commands/prepare.c,v 1.74 2007/04/26 23:24:44 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -21,7 +21,7 @@
 #include "catalog/pg_type.h"
 #include "commands/explain.h"
 #include "commands/prepare.h"
-#include "funcapi.h"
+#include "miscadmin.h"
 #include "parser/analyze.h"
 #include "parser/parse_coerce.h"
 #include "parser/parse_expr.h"
@@ -743,92 +743,99 @@ ExplainExecuteQuery(ExecuteStmt *execstmt, ExplainStmt *stmt,
 Datum
 pg_prepared_statement(PG_FUNCTION_ARGS)
 {
-	FuncCallContext *funcctx;
-	HASH_SEQ_STATUS *hash_seq;
-	PreparedStatement *prep_stmt;
+	ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
+	TupleDesc	tupdesc;
+	Tuplestorestate *tupstore;
+	MemoryContext per_query_ctx;
+	MemoryContext oldcontext;
 
-	/* stuff done only on the first call of the function */
-	if (SRF_IS_FIRSTCALL())
+	/* check to see if caller supports us returning a tuplestore */
+	if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo))
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("set-valued function called in context that cannot accept a set")));
+	if (!(rsinfo->allowedModes & SFRM_Materialize))
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("materialize mode required, but it is not " \
+						"allowed in this context")));
+
+	/* need to build tuplestore in query context */
+	per_query_ctx = rsinfo->econtext->ecxt_per_query_memory;
+	oldcontext = MemoryContextSwitchTo(per_query_ctx);
+
+	/*
+	 * build tupdesc for result tuples. This must match the definition of
+	 * the pg_prepared_statements view in system_views.sql
+	 */
+	tupdesc = CreateTemplateTupleDesc(5, false);
+	TupleDescInitEntry(tupdesc, (AttrNumber) 1, "name",
+					   TEXTOID, -1, 0);
+	TupleDescInitEntry(tupdesc, (AttrNumber) 2, "statement",
+					   TEXTOID, -1, 0);
+	TupleDescInitEntry(tupdesc, (AttrNumber) 3, "prepare_time",
+					   TIMESTAMPTZOID, -1, 0);
+	TupleDescInitEntry(tupdesc, (AttrNumber) 4, "parameter_types",
+					   REGTYPEARRAYOID, -1, 0);
+	TupleDescInitEntry(tupdesc, (AttrNumber) 5, "from_sql",
+					   BOOLOID, -1, 0);
+
+	/*
+	 * We put all the tuples into a tuplestore in one scan of the hashtable.
+	 * This avoids any issue of the hashtable possibly changing between calls.
+	 */
+	tupstore = tuplestore_begin_heap(true, false, work_mem);
+
+	/* hash table might be uninitialized */
+	if (prepared_queries)
 	{
-		TupleDesc	tupdesc;
-		MemoryContext oldcontext;
+		HASH_SEQ_STATUS hash_seq;
+		PreparedStatement *prep_stmt;
 
-		/* create a function context for cross-call persistence */
-		funcctx = SRF_FIRSTCALL_INIT();
-
-		/*
-		 * switch to memory context appropriate for multiple function calls
-		 */
-		oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
-
-		/* allocate memory for user context */
-		if (prepared_queries)
+		hash_seq_init(&hash_seq, prepared_queries);
+		while ((prep_stmt = hash_seq_search(&hash_seq)) != NULL)
 		{
-			hash_seq = (HASH_SEQ_STATUS *) palloc(sizeof(HASH_SEQ_STATUS));
-			hash_seq_init(hash_seq, prepared_queries);
-			funcctx->user_fctx = (void *) hash_seq;
-		}
-		else
-			funcctx->user_fctx = NULL;
+			HeapTuple	tuple;
+			Datum		values[5];
+			bool		nulls[5];
 
-		/*
-		 * build tupdesc for result tuples. This must match the definition of
-		 * the pg_prepared_statements view in system_views.sql
-		 */
-		tupdesc = CreateTemplateTupleDesc(5, false);
-		TupleDescInitEntry(tupdesc, (AttrNumber) 1, "name",
-						   TEXTOID, -1, 0);
-		TupleDescInitEntry(tupdesc, (AttrNumber) 2, "statement",
-						   TEXTOID, -1, 0);
-		TupleDescInitEntry(tupdesc, (AttrNumber) 3, "prepare_time",
-						   TIMESTAMPTZOID, -1, 0);
-		TupleDescInitEntry(tupdesc, (AttrNumber) 4, "parameter_types",
-						   REGTYPEARRAYOID, -1, 0);
-		TupleDescInitEntry(tupdesc, (AttrNumber) 5, "from_sql",
-						   BOOLOID, -1, 0);
+			/* generate junk in short-term context */
+			MemoryContextSwitchTo(oldcontext);
 
-		funcctx->tuple_desc = BlessTupleDesc(tupdesc);
-		MemoryContextSwitchTo(oldcontext);
-	}
+			MemSet(nulls, 0, sizeof(nulls));
 
-	/* stuff done on every call of the function */
-	funcctx = SRF_PERCALL_SETUP();
-	hash_seq = (HASH_SEQ_STATUS *) funcctx->user_fctx;
-
-	/* if the hash table is uninitialized, we're done */
-	if (hash_seq == NULL)
-		SRF_RETURN_DONE(funcctx);
-
-	prep_stmt = hash_seq_search(hash_seq);
-	if (prep_stmt)
-	{
-		Datum		result;
-		HeapTuple	tuple;
-		Datum		values[5];
-		bool		nulls[5];
-
-		MemSet(nulls, 0, sizeof(nulls));
-
-		values[0] = DirectFunctionCall1(textin,
+			values[0] = DirectFunctionCall1(textin,
 									  CStringGetDatum(prep_stmt->stmt_name));
 
-		if (prep_stmt->plansource->query_string == NULL)
-			nulls[1] = true;
-		else
-			values[1] = DirectFunctionCall1(textin,
+			if (prep_stmt->plansource->query_string == NULL)
+				nulls[1] = true;
+			else
+				values[1] = DirectFunctionCall1(textin,
 						CStringGetDatum(prep_stmt->plansource->query_string));
 
-		values[2] = TimestampTzGetDatum(prep_stmt->prepare_time);
-		values[3] = build_regtype_array(prep_stmt->plansource->param_types,
-										prep_stmt->plansource->num_params);
-		values[4] = BoolGetDatum(prep_stmt->from_sql);
+			values[2] = TimestampTzGetDatum(prep_stmt->prepare_time);
+			values[3] = build_regtype_array(prep_stmt->plansource->param_types,
+											prep_stmt->plansource->num_params);
+			values[4] = BoolGetDatum(prep_stmt->from_sql);
 
-		tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
-		result = HeapTupleGetDatum(tuple);
-		SRF_RETURN_NEXT(funcctx, result);
+			tuple = heap_form_tuple(tupdesc, values, nulls);
+
+			/* switch to appropriate context while storing the tuple */
+			MemoryContextSwitchTo(per_query_ctx);
+			tuplestore_puttuple(tupstore, tuple);
+		}
 	}
 
-	SRF_RETURN_DONE(funcctx);
+	/* clean up and return the tuplestore */
+	tuplestore_donestoring(tupstore);
+
+	MemoryContextSwitchTo(oldcontext);
+
+	rsinfo->returnMode = SFRM_Materialize;
+	rsinfo->setResult = tupstore;
+	rsinfo->setDesc = tupdesc;
+
+	return (Datum) 0;
 }
 
 /*
diff --git a/src/backend/executor/nodeSubplan.c b/src/backend/executor/nodeSubplan.c
index bb0508d6bc..a66f51c26a 100644
--- a/src/backend/executor/nodeSubplan.c
+++ b/src/backend/executor/nodeSubplan.c
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/executor/nodeSubplan.c,v 1.87 2007/02/27 01:11:25 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/executor/nodeSubplan.c,v 1.88 2007/04/26 23:24:44 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -569,7 +569,7 @@ findPartialMatch(TupleHashTable hashtable, TupleTableSlot *slot)
 	TupleHashIterator hashiter;
 	TupleHashEntry entry;
 
-	ResetTupleHashIterator(hashtable, &hashiter);
+	InitTupleHashIterator(hashtable, &hashiter);
 	while ((entry = ScanTupleHashTable(&hashiter)) != NULL)
 	{
 		ExecStoreMinimalTuple(entry->firstTuple, hashtable->tableslot, false);
@@ -577,8 +577,12 @@ findPartialMatch(TupleHashTable hashtable, TupleTableSlot *slot)
 							   numCols, keyColIdx,
 							   hashtable->cur_eq_funcs,
 							   hashtable->tempcxt))
+		{
+			TermTupleHashIterator(&hashiter);
 			return true;
+		}
 	}
+	/* No TermTupleHashIterator call needed here */
 	return false;
 }
 
diff --git a/src/backend/nodes/tidbitmap.c b/src/backend/nodes/tidbitmap.c
index 2be504213f..2f3e00d6a2 100644
--- a/src/backend/nodes/tidbitmap.c
+++ b/src/backend/nodes/tidbitmap.c
@@ -23,7 +23,7 @@
  * Copyright (c) 2003-2007, PostgreSQL Global Development Group
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/nodes/tidbitmap.c,v 1.11 2007/01/05 22:19:30 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/nodes/tidbitmap.c,v 1.12 2007/04/26 23:24:44 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -907,7 +907,11 @@ tbm_lossify(TIDBitmap *tbm)
 		tbm_mark_page_lossy(tbm, page->blockno);
 
 		if (tbm->nentries <= tbm->maxentries)
-			return;				/* we have done enough */
+		{
+			/* we have done enough */
+			hash_seq_term(&status);
+			break;
+		}
 
 		/*
 		 * Note: tbm_mark_page_lossy may have inserted a lossy chunk into the
diff --git a/src/backend/utils/hash/dynahash.c b/src/backend/utils/hash/dynahash.c
index f781ca6c3d..2ced795798 100644
--- a/src/backend/utils/hash/dynahash.c
+++ b/src/backend/utils/hash/dynahash.c
@@ -26,7 +26,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/hash/dynahash.c,v 1.74 2007/01/05 22:19:43 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/hash/dynahash.c,v 1.75 2007/04/26 23:24:44 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -63,6 +63,7 @@
 
 #include "postgres.h"
 
+#include "access/xact.h"
 #include "storage/shmem.h"
 #include "storage/spin.h"
 #include "utils/dynahash.h"
@@ -160,6 +161,9 @@ struct HTAB
 	char	   *tabname;		/* table name (for error messages) */
 	bool		isshared;		/* true if table is in shared memory */
 
+	/* freezing a shared table isn't allowed, so we can keep state here */
+	bool		frozen;			/* true = no more inserts allowed */
+
 	/* We keep local copies of these fixed values to reduce contention */
 	Size		keysize;		/* hash key length in bytes */
 	long		ssize;			/* segment size --- must be power of 2 */
@@ -195,6 +199,9 @@ static void hdefault(HTAB *hashp);
 static int	choose_nelem_alloc(Size entrysize);
 static bool init_htab(HTAB *hashp, long nelem);
 static void hash_corrupted(HTAB *hashp);
+static void register_seq_scan(HTAB *hashp);
+static void deregister_seq_scan(HTAB *hashp);
+static bool has_seq_scans(HTAB *hashp);
 
 
 /*
@@ -356,6 +363,8 @@ hash_create(const char *tabname, long nelem, HASHCTL *info, int flags)
 					 errmsg("out of memory")));
 	}
 
+	hashp->frozen = false;
+
 	hdefault(hashp);
 
 	hctl = hashp->hctl;
@@ -898,6 +907,10 @@ hash_search_with_hash_value(HTAB *hashp,
 			if (currBucket != NULL)
 				return (void *) ELEMENTKEY(currBucket);
 
+			/* disallow inserts if frozen */
+			if (hashp->frozen)
+				elog(ERROR, "cannot insert into a frozen hashtable");
+
 			currBucket = get_hash_entry(hashp);
 			if (currBucket == NULL)
 			{
@@ -925,10 +938,15 @@ hash_search_with_hash_value(HTAB *hashp,
 
 			/* caller is expected to fill the data field on return */
 
-			/* Check if it is time to split a bucket */
-			/* Can't split if running in partitioned mode */
+			/*
+			 * Check if it is time to split a bucket.  Can't split if running
+			 * in partitioned mode, nor if table is the subject of any active
+			 * hash_seq_search scans.  Strange order of these tests is to try
+			 * to check cheaper conditions first.
+			 */
 			if (!IS_PARTITIONED(hctl) &&
-			 hctl->nentries / (long) (hctl->max_bucket + 1) >= hctl->ffactor)
+				hctl->nentries / (long) (hctl->max_bucket + 1) >= hctl->ffactor &&
+				!has_seq_scans(hashp))
 			{
 				/*
 				 * NOTE: failure to expand table is not a fatal error, it just
@@ -1001,18 +1019,30 @@ hash_get_num_entries(HTAB *hashp)
 }
 
 /*
- * hash_seq_init/_search
+ * hash_seq_init/_search/_term
  *			Sequentially search through hash table and return
  *			all the elements one by one, return NULL when no more.
  *
+ * hash_seq_term should be called if and only if the scan is abandoned before
+ * completion; if hash_seq_search returns NULL then it has already done the
+ * end-of-scan cleanup.
+ *
  * NOTE: caller may delete the returned element before continuing the scan.
  * However, deleting any other element while the scan is in progress is
  * UNDEFINED (it might be the one that curIndex is pointing at!).  Also,
  * if elements are added to the table while the scan is in progress, it is
  * unspecified whether they will be visited by the scan or not.
  *
+ * NOTE: it is possible to use hash_seq_init/hash_seq_search without any
+ * worry about hash_seq_term cleanup, if the hashtable is first locked against
+ * further insertions by calling hash_freeze.  This is used by nodeAgg.c,
+ * wherein it is inconvenient to track whether a scan is still open, and
+ * there's no possibility of further insertions after readout has begun.
+ *
  * NOTE: to use this with a partitioned hashtable, caller had better hold
  * at least shared lock on all partitions of the table throughout the scan!
+ * We can cope with insertions or deletions by our own backend, but *not*
+ * with concurrent insertions or deletions by another.
  */
 void
 hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
@@ -1020,6 +1050,8 @@ hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
 	status->hashp = hashp;
 	status->curBucket = 0;
 	status->curEntry = NULL;
+	if (!hashp->frozen)
+		register_seq_scan(hashp);
 }
 
 void *
@@ -1054,7 +1086,10 @@ hash_seq_search(HASH_SEQ_STATUS *status)
 	max_bucket = hctl->max_bucket;
 
 	if (curBucket > max_bucket)
+	{
+		hash_seq_term(status);
 		return NULL;			/* search is done */
+	}
 
 	/*
 	 * first find the right segment in the table directory.
@@ -1076,6 +1111,7 @@ hash_seq_search(HASH_SEQ_STATUS *status)
 		if (++curBucket > max_bucket)
 		{
 			status->curBucket = curBucket;
+			hash_seq_term(status);
 			return NULL;		/* search is done */
 		}
 		if (++segment_ndx >= ssize)
@@ -1094,6 +1130,36 @@ hash_seq_search(HASH_SEQ_STATUS *status)
 	return (void *) ELEMENTKEY(curElem);
 }
 
+void
+hash_seq_term(HASH_SEQ_STATUS *status)
+{
+	if (!status->hashp->frozen)
+		deregister_seq_scan(status->hashp);
+}
+
+/*
+ * hash_freeze
+ *			Freeze a hashtable against future insertions (deletions are
+ *			still allowed)
+ *
+ * The reason for doing this is that by preventing any more bucket splits,
+ * we no longer need to worry about registering hash_seq_search scans,
+ * and thus caller need not be careful about ensuring hash_seq_term gets
+ * called at the right times.
+ *
+ * Multiple calls to hash_freeze() are allowed, but you can't freeze a table
+ * with active scans (since hash_seq_term would then do the wrong thing).
+ */
+void
+hash_freeze(HTAB *hashp)
+{
+	if (hashp->isshared)
+		elog(ERROR, "cannot freeze shared hashtable");
+	if (!hashp->frozen && has_seq_scans(hashp))
+		elog(ERROR, "cannot freeze hashtable with active scans");
+	hashp->frozen = true;
+}
+
 
 /********************************* UTILITIES ************************/
 
@@ -1324,3 +1390,136 @@ my_log2(long num)
 		;
 	return i;
 }
+
+
+/************************* SEQ SCAN TRACKING ************************/
+
+/*
+ * We track active hash_seq_search scans here.  The need for this mechanism
+ * comes from the fact that a scan will get confused if a bucket split occurs
+ * while it's in progress: it might visit entries twice, or even miss some
+ * entirely (if it's partway through the same bucket that splits).  Hence
+ * we want to inhibit bucket splits if there are any active scans on the
+ * table being inserted into.  This is a fairly rare case in current usage,
+ * so just postponing the split until the next insertion seems sufficient.
+ *
+ * Given present usages of the function, only a few scans are likely to be
+ * open concurrently; so a finite-size stack of open scans seems sufficient,
+ * and we don't worry that linear search is too slow.  Note that we do
+ * allow multiple scans of the same hashtable to be open concurrently.
+ *
+ * This mechanism can support concurrent scan and insertion in a shared
+ * hashtable if it's the same backend doing both.  It would fail otherwise,
+ * but locking reasons seem to preclude any such scenario anyway, so we don't
+ * worry.
+ *
+ * This arrangement is reasonably robust if a transient hashtable is deleted
+ * without notifying us.  The absolute worst case is we might inhibit splits
+ * in another table created later at exactly the same address.  We will give
+ * a warning at transaction end for reference leaks, so any bugs leading to
+ * lack of notification should be easy to catch.
+ */
+
+#define MAX_SEQ_SCANS 100
+
+static HTAB *seq_scan_tables[MAX_SEQ_SCANS];	/* tables being scanned */
+static int	seq_scan_level[MAX_SEQ_SCANS];		/* subtransaction nest level */
+static int	num_seq_scans = 0;
+
+
+/* Register a table as having an active hash_seq_search scan */
+static void
+register_seq_scan(HTAB *hashp)
+{
+	if (num_seq_scans >= MAX_SEQ_SCANS)
+		elog(ERROR, "too many active hash_seq_search scans");
+	seq_scan_tables[num_seq_scans] = hashp;
+	seq_scan_level[num_seq_scans] = GetCurrentTransactionNestLevel();
+	num_seq_scans++;
+}
+
+/* Deregister an active scan */
+static void
+deregister_seq_scan(HTAB *hashp)
+{
+	int		i;
+
+	/* Search backward since it's most likely at the stack top */
+	for (i = num_seq_scans - 1; i >= 0; i--)
+	{
+		if (seq_scan_tables[i] == hashp)
+		{
+			seq_scan_tables[i] = seq_scan_tables[num_seq_scans - 1];
+			seq_scan_level[i] = seq_scan_level[num_seq_scans - 1];
+			num_seq_scans--;
+			return;
+		}
+	}
+	elog(ERROR, "no hash_seq_search scan for hash table \"%s\"",
+		 hashp->tabname);
+}
+
+/* Check if a table has any active scan */
+static bool
+has_seq_scans(HTAB *hashp)
+{
+	int		i;
+
+	for (i = 0; i < num_seq_scans; i++)
+	{
+		if (seq_scan_tables[i] == hashp)
+			return true;
+	}
+	return false;
+}
+
+/* Clean up any open scans at end of transaction */
+void
+AtEOXact_HashTables(bool isCommit)
+{
+	/*
+	 * During abort cleanup, open scans are expected; just silently clean 'em
+	 * out.  An open scan at commit means someone forgot a hash_seq_term()
+	 * call, so complain.
+	 *
+	 * Note: it's tempting to try to print the tabname here, but refrain for
+	 * fear of touching deallocated memory.  This isn't a user-facing message
+	 * anyway, so it needn't be pretty.
+	 */
+	if (isCommit)
+	{
+		int		i;
+
+		for (i = 0; i < num_seq_scans; i++)
+		{
+			elog(WARNING, "leaked hash_seq_search scan for hash table %p",
+				 seq_scan_tables[i]);
+		}
+	}
+	num_seq_scans = 0;
+}
+
+/* Clean up any open scans at end of subtransaction */
+void
+AtEOSubXact_HashTables(bool isCommit, int nestDepth)
+{
+	int		i;
+
+	/*
+	 * Search backward to make cleanup easy.  Note we must check all entries,
+	 * not only those at the end of the array, because deletion technique
+	 * doesn't keep them in order.
+	 */
+	for (i = num_seq_scans - 1; i >= 0; i--)
+	{
+		if (seq_scan_level[i] >= nestDepth)
+		{
+			if (isCommit)
+				elog(WARNING, "leaked hash_seq_search scan for hash table %p",
+					 seq_scan_tables[i]);
+			seq_scan_tables[i] = seq_scan_tables[num_seq_scans - 1];
+			seq_scan_level[i] = seq_scan_level[num_seq_scans - 1];
+			num_seq_scans--;
+		}
+	}
+}
diff --git a/src/backend/utils/mmgr/portalmem.c b/src/backend/utils/mmgr/portalmem.c
index 69bb3e3ebd..412e41952c 100644
--- a/src/backend/utils/mmgr/portalmem.c
+++ b/src/backend/utils/mmgr/portalmem.c
@@ -12,7 +12,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/mmgr/portalmem.c,v 1.102 2007/04/26 16:13:13 neilc Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/mmgr/portalmem.c,v 1.103 2007/04/26 23:24:44 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -22,7 +22,6 @@
 #include "access/xact.h"
 #include "catalog/pg_type.h"
 #include "commands/portalcmds.h"
-#include "funcapi.h"
 #include "miscadmin.h"
 #include "utils/builtins.h"
 #include "utils/memutils.h"
@@ -621,7 +620,9 @@ AtCommit_Portals(void)
 		/* Zap all non-holdable portals */
 		PortalDrop(portal, true);
 
-		/* Restart the iteration */
+		/* Restart the iteration in case that led to other drops */
+		/* XXX is this really necessary? */
+		hash_seq_term(&status);
 		hash_seq_init(&status, PortalHashTable);
 	}
 }
@@ -858,79 +859,68 @@ AtSubCleanup_Portals(SubTransactionId mySubid)
 Datum
 pg_cursor(PG_FUNCTION_ARGS)
 {
-	FuncCallContext *funcctx;
-	HASH_SEQ_STATUS *hash_seq;
+	ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
+	TupleDesc	tupdesc;
+	Tuplestorestate *tupstore;
+	MemoryContext per_query_ctx;
+	MemoryContext oldcontext;
+	HASH_SEQ_STATUS hash_seq;
 	PortalHashEnt *hentry;
 
-	/* stuff done only on the first call of the function */
-	if (SRF_IS_FIRSTCALL())
+	/* check to see if caller supports us returning a tuplestore */
+	if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo))
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("set-valued function called in context that cannot accept a set")));
+	if (!(rsinfo->allowedModes & SFRM_Materialize))
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("materialize mode required, but it is not " \
+						"allowed in this context")));
+
+	/* need to build tuplestore in query context */
+	per_query_ctx = rsinfo->econtext->ecxt_per_query_memory;
+	oldcontext = MemoryContextSwitchTo(per_query_ctx);
+
+	/*
+	 * build tupdesc for result tuples. This must match the definition of
+	 * the pg_cursors view in system_views.sql
+	 */
+	tupdesc = CreateTemplateTupleDesc(6, false);
+	TupleDescInitEntry(tupdesc, (AttrNumber) 1, "name",
+					   TEXTOID, -1, 0);
+	TupleDescInitEntry(tupdesc, (AttrNumber) 2, "statement",
+					   TEXTOID, -1, 0);
+	TupleDescInitEntry(tupdesc, (AttrNumber) 3, "is_holdable",
+					   BOOLOID, -1, 0);
+	TupleDescInitEntry(tupdesc, (AttrNumber) 4, "is_binary",
+					   BOOLOID, -1, 0);
+	TupleDescInitEntry(tupdesc, (AttrNumber) 5, "is_scrollable",
+					   BOOLOID, -1, 0);
+	TupleDescInitEntry(tupdesc, (AttrNumber) 6, "creation_time",
+					   TIMESTAMPTZOID, -1, 0);
+
+	/*
+	 * We put all the tuples into a tuplestore in one scan of the hashtable.
+	 * This avoids any issue of the hashtable possibly changing between calls.
+	 */
+	tupstore = tuplestore_begin_heap(true, false, work_mem);
+
+	hash_seq_init(&hash_seq, PortalHashTable);
+	while ((hentry = hash_seq_search(&hash_seq)) != NULL)
 	{
-		MemoryContext oldcontext;
-		TupleDesc	tupdesc;
-
-		/* create a function context for cross-call persistence */
-		funcctx = SRF_FIRSTCALL_INIT();
-
-		/*
-		 * switch to memory context appropriate for multiple function calls
-		 */
-		oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
-
-		if (PortalHashTable)
-		{
-			hash_seq = (HASH_SEQ_STATUS *) palloc(sizeof(HASH_SEQ_STATUS));
-			hash_seq_init(hash_seq, PortalHashTable);
-			funcctx->user_fctx = (void *) hash_seq;
-		}
-		else
-			funcctx->user_fctx = NULL;
-
-		/*
-		 * build tupdesc for result tuples. This must match the definition of
-		 * the pg_cursors view in system_views.sql
-		 */
-		tupdesc = CreateTemplateTupleDesc(6, false);
-		TupleDescInitEntry(tupdesc, (AttrNumber) 1, "name",
-						   TEXTOID, -1, 0);
-		TupleDescInitEntry(tupdesc, (AttrNumber) 2, "statement",
-						   TEXTOID, -1, 0);
-		TupleDescInitEntry(tupdesc, (AttrNumber) 3, "is_holdable",
-						   BOOLOID, -1, 0);
-		TupleDescInitEntry(tupdesc, (AttrNumber) 4, "is_binary",
-						   BOOLOID, -1, 0);
-		TupleDescInitEntry(tupdesc, (AttrNumber) 5, "is_scrollable",
-						   BOOLOID, -1, 0);
-		TupleDescInitEntry(tupdesc, (AttrNumber) 6, "creation_time",
-						   TIMESTAMPTZOID, -1, 0);
-
-		funcctx->tuple_desc = BlessTupleDesc(tupdesc);
-		MemoryContextSwitchTo(oldcontext);
-	}
-
-	/* stuff done on every call of the function */
-	funcctx = SRF_PERCALL_SETUP();
-	hash_seq = (HASH_SEQ_STATUS *) funcctx->user_fctx;
-
-	/* if the hash table is uninitialized, we're done */
-	if (hash_seq == NULL)
-		SRF_RETURN_DONE(funcctx);
-
-	/* loop until we find a visible portal or hit the end of the list */
-	while ((hentry = hash_seq_search(hash_seq)) != NULL)
-	{
-		if (hentry->portal->visible)
-			break;
-	}
-
-	if (hentry)
-	{
-		Portal		portal;
-		Datum		result;
+		Portal		portal = hentry->portal;
 		HeapTuple	tuple;
 		Datum		values[6];
 		bool		nulls[6];
 
-		portal = hentry->portal;
+		/* report only "visible" entries */
+		if (!portal->visible)
+			continue;
+
+		/* generate junk in short-term context */
+		MemoryContextSwitchTo(oldcontext);
+
 		MemSet(nulls, 0, sizeof(nulls));
 
 		values[0] = DirectFunctionCall1(textin, CStringGetDatum(portal->name));
@@ -944,10 +934,21 @@ pg_cursor(PG_FUNCTION_ARGS)
 		values[4] = BoolGetDatum(portal->cursorOptions & CURSOR_OPT_SCROLL);
 		values[5] = TimestampTzGetDatum(portal->creation_time);
 
-		tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
-		result = HeapTupleGetDatum(tuple);
-		SRF_RETURN_NEXT(funcctx, result);
+		tuple = heap_form_tuple(tupdesc, values, nulls);
+
+		/* switch to appropriate context while storing the tuple */
+		MemoryContextSwitchTo(per_query_ctx);
+		tuplestore_puttuple(tupstore, tuple);
 	}
 
-	SRF_RETURN_DONE(funcctx);
+	/* clean up and return the tuplestore */
+	tuplestore_donestoring(tupstore);
+
+	MemoryContextSwitchTo(oldcontext);
+
+	rsinfo->returnMode = SFRM_Materialize;
+	rsinfo->setResult = tupstore;
+	rsinfo->setDesc = tupdesc;
+
+	return (Datum) 0;
 }
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 56bac9350f..726ee5bdae 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/nodes/execnodes.h,v 1.171 2007/03/27 23:21:12 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/nodes/execnodes.h,v 1.172 2007/04/26 23:24:44 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -408,8 +408,20 @@ typedef struct TupleHashTableData
 
 typedef HASH_SEQ_STATUS TupleHashIterator;
 
-#define ResetTupleHashIterator(htable, iter) \
+/*
+ * Use InitTupleHashIterator/TermTupleHashIterator for a read/write scan.
+ * Use ResetTupleHashIterator if the table can be frozen (in this case no
+ * explicit scan termination is needed).
+ */
+#define InitTupleHashIterator(htable, iter) \
 	hash_seq_init(iter, (htable)->hashtab)
+#define TermTupleHashIterator(iter) \
+	hash_seq_term(iter)
+#define ResetTupleHashIterator(htable, iter) \
+	do { \
+		hash_freeze((htable)->hashtab); \
+		hash_seq_init(iter, (htable)->hashtab); \
+	} while (0)
 #define ScanTupleHashTable(iter) \
 	((TupleHashEntry) hash_seq_search(iter))
 
diff --git a/src/include/utils/hsearch.h b/src/include/utils/hsearch.h
index d40ec4f077..47f2dbc9b3 100644
--- a/src/include/utils/hsearch.h
+++ b/src/include/utils/hsearch.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/utils/hsearch.h,v 1.46 2007/01/05 22:19:59 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/utils/hsearch.h,v 1.47 2007/04/26 23:24:46 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -130,9 +130,13 @@ extern void *hash_search_with_hash_value(HTAB *hashp, const void *keyPtr,
 extern long hash_get_num_entries(HTAB *hashp);
 extern void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp);
 extern void *hash_seq_search(HASH_SEQ_STATUS *status);
+extern void hash_seq_term(HASH_SEQ_STATUS *status);
+extern void hash_freeze(HTAB *hashp);
 extern Size hash_estimate_size(long num_entries, Size entrysize);
 extern long hash_select_dirsize(long num_entries);
 extern Size hash_get_shared_size(HASHCTL *info, int flags);
+extern void AtEOXact_HashTables(bool isCommit);
+extern void AtEOSubXact_HashTables(bool isCommit, int nestDepth);
 
 /*
  * prototypes for functions in hashfn.c