Add defenses to btree and hash index AMs to do simple sanity checks

on every index page they read; in particular to catch the case of an all-zero page, which PageHeaderIsValid allows to pass. It turns out hash already had this idea, but it was just Assert()ing things rather than doing a straight error check, and the Asserts were partially redundant with PageHeaderIsValid anyway. Per recent failure example from Jim Nasby. (gist still needs the same treatment.)
2005-11-06 19:29:01 +00:00 · 2005-11-06 19:29:01 +00:00 · 766dc45d9f
commit 766dc45d9f
parent 18691d8ee3
11 changed files with 147 additions and 79 deletions
--- a/src/backend/access/hash/hash.c
+++ b/src/backend/access/hash/hash.c
@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/access/hash/hash.c,v 1.81 2005/10/15 02:49:08 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/access/hash/hash.c,v 1.82 2005/11/06 19:29:00 tgl Exp $
 *
 * NOTES
 *	  This file contains only the public interface routines.
@ -513,8 +513,8 @@ hashbulkdelete(PG_FUNCTION_ARGS)
 	 * each bucket.
 	 */
 	metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ);
+	_hash_checkpage(rel, metabuf, LH_META_PAGE);
 	metap = (HashMetaPage) BufferGetPage(metabuf);
-	_hash_checkpage(rel, (Page) metap, LH_META_PAGE);
 	orig_maxbucket = metap->hashm_maxbucket;
 	orig_ntuples = metap->hashm_ntuples;
 	memcpy(&local_metapage, metap, sizeof(local_metapage));
@ -555,8 +555,8 @@ loop_top:
 			vacuum_delay_point();

 			buf = _hash_getbuf(rel, blkno, HASH_WRITE);
+			_hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
 			page = BufferGetPage(buf);
-			_hash_checkpage(rel, page, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
 			opaque = (HashPageOpaque) PageGetSpecialPointer(page);
 			Assert(opaque->hasho_bucket == cur_bucket);

@ -614,8 +614,8 @@ loop_top:

 	/* Write-lock metapage and check for split since we started */
 	metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_WRITE);
+	_hash_checkpage(rel, metabuf, LH_META_PAGE);
 	metap = (HashMetaPage) BufferGetPage(metabuf);
-	_hash_checkpage(rel, (Page) metap, LH_META_PAGE);

 	if (cur_maxbucket != metap->hashm_maxbucket)
 	{
--- a/src/backend/access/hash/hashinsert.c
+++ b/src/backend/access/hash/hashinsert.c
@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/access/hash/hashinsert.c,v 1.39 2005/10/18 01:06:23 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/access/hash/hashinsert.c,v 1.40 2005/11/06 19:29:00 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -73,8 +73,8 @@ _hash_doinsert(Relation rel, HashItem hitem)

 	/* Read the metapage */
 	metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ);
+	_hash_checkpage(rel, metabuf, LH_META_PAGE);
 	metap = (HashMetaPage) BufferGetPage(metabuf);
-	_hash_checkpage(rel, (Page) metap, LH_META_PAGE);

 	/*
 	 * Check whether the item can fit on a hash page at all. (Eventually, we
@ -111,8 +111,8 @@ _hash_doinsert(Relation rel, HashItem hitem)

 	/* Fetch the primary bucket page for the bucket */
 	buf = _hash_getbuf(rel, blkno, HASH_WRITE);
+	_hash_checkpage(rel, buf, LH_BUCKET_PAGE);
 	page = BufferGetPage(buf);
-	_hash_checkpage(rel, page, LH_BUCKET_PAGE);
 	pageopaque = (HashPageOpaque) PageGetSpecialPointer(page);
 	Assert(pageopaque->hasho_bucket == bucket);

@ -151,7 +151,7 @@ _hash_doinsert(Relation rel, HashItem hitem)
 			/* should fit now, given test above */
 			Assert(PageGetFreeSpace(page) >= itemsz);
 		}
-		_hash_checkpage(rel, page, LH_OVERFLOW_PAGE);
+		_hash_checkpage(rel, buf, LH_OVERFLOW_PAGE);
 		pageopaque = (HashPageOpaque) PageGetSpecialPointer(page);
 		Assert(pageopaque->hasho_bucket == bucket);
 	}
@ -204,8 +204,8 @@ _hash_pgaddtup(Relation rel,
 	OffsetNumber itup_off;
 	Page		page;

+	_hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
 	page = BufferGetPage(buf);
-	_hash_checkpage(rel, page, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);

 	itup_off = OffsetNumberNext(PageGetMaxOffsetNumber(page));
 	if (PageAddItem(page, (Item) hitem, itemsize, itup_off, LP_USED)
--- a/src/backend/access/hash/hashovfl.c
+++ b/src/backend/access/hash/hashovfl.c
@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/access/hash/hashovfl.c,v 1.47 2005/10/15 02:49:08 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/access/hash/hashovfl.c,v 1.48 2005/11/06 19:29:00 tgl Exp $
 *
 * NOTES
 *	  Overflow pages look like ordinary relation pages.
@ -124,8 +124,8 @@ _hash_addovflpage(Relation rel, Buffer metabuf, Buffer buf)
 	{
 		BlockNumber nextblkno;

+		_hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
 		page = BufferGetPage(buf);
-		_hash_checkpage(rel, page, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
 		pageopaque = (HashPageOpaque) PageGetSpecialPointer(page);
 		nextblkno = pageopaque->hasho_nextblkno;

@ -183,8 +183,8 @@ _hash_getovflpage(Relation rel, Buffer metabuf)
 	/* Get exclusive lock on the meta page */
 	_hash_chgbufaccess(rel, metabuf, HASH_NOLOCK, HASH_WRITE);

+	_hash_checkpage(rel, metabuf, LH_META_PAGE);
 	metap = (HashMetaPage) BufferGetPage(metabuf);
-	_hash_checkpage(rel, (Page) metap, LH_META_PAGE);

 	/* start search at hashm_firstfree */
 	orig_firstfree = metap->hashm_firstfree;
@ -222,8 +222,8 @@ _hash_getovflpage(Relation rel, Buffer metabuf)
 		_hash_chgbufaccess(rel, metabuf, HASH_READ, HASH_NOLOCK);

 		mapbuf = _hash_getbuf(rel, mapblkno, HASH_WRITE);
+		_hash_checkpage(rel, mapbuf, LH_BITMAP_PAGE);
 		mappage = BufferGetPage(mapbuf);
-		_hash_checkpage(rel, mappage, LH_BITMAP_PAGE);
 		freep = HashPageGetBitmap(mappage);

 		for (; bit <= last_inpage; j++, bit += BITS_PER_MAP)
@ -379,9 +379,9 @@ _hash_freeovflpage(Relation rel, Buffer ovflbuf)
 	Bucket		bucket;

 	/* Get information from the doomed page */
+	_hash_checkpage(rel, ovflbuf, LH_OVERFLOW_PAGE);
 	ovflblkno = BufferGetBlockNumber(ovflbuf);
 	ovflpage = BufferGetPage(ovflbuf);
-	_hash_checkpage(rel, ovflpage, LH_OVERFLOW_PAGE);
 	ovflopaque = (HashPageOpaque) PageGetSpecialPointer(ovflpage);
 	nextblkno = ovflopaque->hasho_nextblkno;
 	prevblkno = ovflopaque->hasho_prevblkno;
@ -403,7 +403,7 @@ _hash_freeovflpage(Relation rel, Buffer ovflbuf)
 		Page		prevpage = BufferGetPage(prevbuf);
 		HashPageOpaque prevopaque = (HashPageOpaque) PageGetSpecialPointer(prevpage);

-		_hash_checkpage(rel, prevpage, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
+		_hash_checkpage(rel, prevbuf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
 		Assert(prevopaque->hasho_bucket == bucket);
 		prevopaque->hasho_nextblkno = nextblkno;
 		_hash_wrtbuf(rel, prevbuf);
@ -414,7 +414,7 @@ _hash_freeovflpage(Relation rel, Buffer ovflbuf)
 		Page		nextpage = BufferGetPage(nextbuf);
 		HashPageOpaque nextopaque = (HashPageOpaque) PageGetSpecialPointer(nextpage);

-		_hash_checkpage(rel, nextpage, LH_OVERFLOW_PAGE);
+		_hash_checkpage(rel, nextbuf, LH_OVERFLOW_PAGE);
 		Assert(nextopaque->hasho_bucket == bucket);
 		nextopaque->hasho_prevblkno = prevblkno;
 		_hash_wrtbuf(rel, nextbuf);
@ -422,8 +422,8 @@ _hash_freeovflpage(Relation rel, Buffer ovflbuf)

 	/* Read the metapage so we can determine which bitmap page to use */
 	metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ);
+	_hash_checkpage(rel, metabuf, LH_META_PAGE);
 	metap = (HashMetaPage) BufferGetPage(metabuf);
-	_hash_checkpage(rel, (Page) metap, LH_META_PAGE);

 	/* Identify which bit to set */
 	ovflbitno = blkno_to_bitno(metap, ovflblkno);
@ -440,8 +440,8 @@ _hash_freeovflpage(Relation rel, Buffer ovflbuf)

 	/* Clear the bitmap bit to indicate that this overflow page is free */
 	mapbuf = _hash_getbuf(rel, blkno, HASH_WRITE);
+	_hash_checkpage(rel, mapbuf, LH_BITMAP_PAGE);
 	mappage = BufferGetPage(mapbuf);
-	_hash_checkpage(rel, mappage, LH_BITMAP_PAGE);
 	freep = HashPageGetBitmap(mappage);
 	Assert(ISSET(freep, bitmapbit));
 	CLRBIT(freep, bitmapbit);
@ -569,8 +569,8 @@ _hash_squeezebucket(Relation rel,
 	 */
 	wblkno = bucket_blkno;
 	wbuf = _hash_getbuf(rel, wblkno, HASH_WRITE);
+	_hash_checkpage(rel, wbuf, LH_BUCKET_PAGE);
 	wpage = BufferGetPage(wbuf);
-	_hash_checkpage(rel, wpage, LH_BUCKET_PAGE);
 	wopaque = (HashPageOpaque) PageGetSpecialPointer(wpage);

 	/*
@ -593,8 +593,8 @@ _hash_squeezebucket(Relation rel,
 		if (ropaque != wopaque)
 			_hash_relbuf(rel, rbuf);
 		rbuf = _hash_getbuf(rel, rblkno, HASH_WRITE);
+		_hash_checkpage(rel, rbuf, LH_OVERFLOW_PAGE);
 		rpage = BufferGetPage(rbuf);
-		_hash_checkpage(rel, rpage, LH_OVERFLOW_PAGE);
 		ropaque = (HashPageOpaque) PageGetSpecialPointer(rpage);
 		Assert(ropaque->hasho_bucket == bucket);
 	} while (BlockNumberIsValid(ropaque->hasho_nextblkno));
@ -635,8 +635,8 @@ _hash_squeezebucket(Relation rel,
 				}

 				wbuf = _hash_getbuf(rel, wblkno, HASH_WRITE);
+				_hash_checkpage(rel, wbuf, LH_OVERFLOW_PAGE);
 				wpage = BufferGetPage(wbuf);
-				_hash_checkpage(rel, wpage, LH_OVERFLOW_PAGE);
 				wopaque = (HashPageOpaque) PageGetSpecialPointer(wpage);
 				Assert(wopaque->hasho_bucket == bucket);
 			}
@ -688,8 +688,8 @@ _hash_squeezebucket(Relation rel,
 			_hash_freeovflpage(rel, rbuf);

 			rbuf = _hash_getbuf(rel, rblkno, HASH_WRITE);
+			_hash_checkpage(rel, rbuf, LH_OVERFLOW_PAGE);
 			rpage = BufferGetPage(rbuf);
-			_hash_checkpage(rel, rpage, LH_OVERFLOW_PAGE);
 			ropaque = (HashPageOpaque) PageGetSpecialPointer(rpage);
 			Assert(ropaque->hasho_bucket == bucket);

--- a/src/backend/access/hash/hashpage.c
+++ b/src/backend/access/hash/hashpage.c
@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/access/hash/hashpage.c,v 1.52 2005/10/15 02:49:08 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/access/hash/hashpage.c,v 1.53 2005/11/06 19:29:00 tgl Exp $
 *
 * NOTES
 *	  Postgres hash pages look like ordinary relation pages.  The opaque
@ -103,9 +103,11 @@ _hash_droplock(Relation rel, BlockNumber whichlock, int access)
 *		(ie, the buffer is "locked and pinned").
 *
 *		XXX P_NEW is not used because, unlike the tree structures, we
- *		need the bucket blocks to be at certain block numbers.	we must
- *		depend on the caller to call _hash_pageinit on the block if it
- *		knows that this is a new block.
+ *		need the bucket blocks to be at certain block numbers.
+ *
+ *		All call sites should call either _hash_pageinit or _hash_checkpage
+ *		on the returned page, depending on whether the block is expected
+ *		to be new or not.
 */
 Buffer
 _hash_getbuf(Relation rel, BlockNumber blkno, int access)
@ -380,8 +382,8 @@ _hash_expandtable(Relation rel, Buffer metabuf)
 	/* Write-lock the meta page */
 	_hash_chgbufaccess(rel, metabuf, HASH_NOLOCK, HASH_WRITE);

+	_hash_checkpage(rel, metabuf, LH_META_PAGE);
 	metap = (HashMetaPage) BufferGetPage(metabuf);
-	_hash_checkpage(rel, (Page) metap, LH_META_PAGE);

 	/*
 	 * Check to see if split is still needed; someone else might have already
@ -555,15 +557,15 @@ _hash_splitbucket(Relation rel,
 	 * either bucket.
 	 */
 	oblkno = start_oblkno;
-	nblkno = start_nblkno;
 	obuf = _hash_getbuf(rel, oblkno, HASH_WRITE);
-	nbuf = _hash_getbuf(rel, nblkno, HASH_WRITE);
+	_hash_checkpage(rel, obuf, LH_BUCKET_PAGE);
 	opage = BufferGetPage(obuf);
-	npage = BufferGetPage(nbuf);
-
-	_hash_checkpage(rel, opage, LH_BUCKET_PAGE);
 	oopaque = (HashPageOpaque) PageGetSpecialPointer(opage);

+	nblkno = start_nblkno;
+	nbuf = _hash_getbuf(rel, nblkno, HASH_WRITE);
+	npage = BufferGetPage(nbuf);
+
 	/* initialize the new bucket's primary page */
 	_hash_pageinit(npage, BufferGetPageSize(nbuf));
 	nopaque = (HashPageOpaque) PageGetSpecialPointer(npage);
@ -602,8 +604,8 @@ _hash_splitbucket(Relation rel,
 			_hash_wrtbuf(rel, obuf);

 			obuf = _hash_getbuf(rel, oblkno, HASH_WRITE);
+			_hash_checkpage(rel, obuf, LH_OVERFLOW_PAGE);
 			opage = BufferGetPage(obuf);
-			_hash_checkpage(rel, opage, LH_OVERFLOW_PAGE);
 			oopaque = (HashPageOpaque) PageGetSpecialPointer(opage);
 			ooffnum = FirstOffsetNumber;
 			omaxoffnum = PageGetMaxOffsetNumber(opage);
@ -642,8 +644,8 @@ _hash_splitbucket(Relation rel,
 				_hash_chgbufaccess(rel, nbuf, HASH_WRITE, HASH_NOLOCK);
 				/* chain to a new overflow page */
 				nbuf = _hash_addovflpage(rel, metabuf, nbuf);
+				_hash_checkpage(rel, nbuf, LH_OVERFLOW_PAGE);
 				npage = BufferGetPage(nbuf);
-				_hash_checkpage(rel, npage, LH_OVERFLOW_PAGE);
 				/* we don't need nopaque within the loop */
 			}

--- a/src/backend/access/hash/hashsearch.c
+++ b/src/backend/access/hash/hashsearch.c
@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/access/hash/hashsearch.c,v 1.41 2005/10/18 01:06:23 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/access/hash/hashsearch.c,v 1.42 2005/11/06 19:29:00 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -53,8 +53,8 @@ _hash_next(IndexScanDesc scan, ScanDirection dir)
 	/* if we're here, _hash_step found a valid tuple */
 	current = &(scan->currentItemData);
 	offnum = ItemPointerGetOffsetNumber(current);
+	_hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
 	page = BufferGetPage(buf);
-	_hash_checkpage(rel, page, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
 	hitem = (HashItem) PageGetItem(page, PageGetItemId(page, offnum));
 	itup = &hitem->hash_itup;
 	scan->xs_ctup.t_self = itup->t_tid;
@ -77,8 +77,8 @@ _hash_readnext(Relation rel,
 	if (BlockNumberIsValid(blkno))
 	{
 		*bufp = _hash_getbuf(rel, blkno, HASH_READ);
+		_hash_checkpage(rel, *bufp, LH_OVERFLOW_PAGE);
 		*pagep = BufferGetPage(*bufp);
-		_hash_checkpage(rel, *pagep, LH_OVERFLOW_PAGE);
 		*opaquep = (HashPageOpaque) PageGetSpecialPointer(*pagep);
 	}
 }
@ -98,8 +98,8 @@ _hash_readprev(Relation rel,
 	if (BlockNumberIsValid(blkno))
 	{
 		*bufp = _hash_getbuf(rel, blkno, HASH_READ);
+		_hash_checkpage(rel, *bufp, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
 		*pagep = BufferGetPage(*bufp);
-		_hash_checkpage(rel, *pagep, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
 		*opaquep = (HashPageOpaque) PageGetSpecialPointer(*pagep);
 	}
 }
@ -168,8 +168,8 @@ _hash_first(IndexScanDesc scan, ScanDirection dir)

 	/* Read the metapage */
 	metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ);
+	_hash_checkpage(rel, metabuf, LH_META_PAGE);
 	metap = (HashMetaPage) BufferGetPage(metabuf);
-	_hash_checkpage(rel, (Page) metap, LH_META_PAGE);

 	/*
 	 * Compute the target bucket number, and convert to block number.
@ -198,8 +198,8 @@ _hash_first(IndexScanDesc scan, ScanDirection dir)

 	/* Fetch the primary bucket page for the bucket */
 	buf = _hash_getbuf(rel, blkno, HASH_READ);
+	_hash_checkpage(rel, buf, LH_BUCKET_PAGE);
 	page = BufferGetPage(buf);
-	_hash_checkpage(rel, page, LH_BUCKET_PAGE);
 	opaque = (HashPageOpaque) PageGetSpecialPointer(page);
 	Assert(opaque->hasho_bucket == bucket);

@ -216,8 +216,8 @@ _hash_first(IndexScanDesc scan, ScanDirection dir)

 	/* if we're here, _hash_step found a valid tuple */
 	offnum = ItemPointerGetOffsetNumber(current);
+	_hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
 	page = BufferGetPage(buf);
-	_hash_checkpage(rel, page, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
 	hitem = (HashItem) PageGetItem(page, PageGetItemId(page, offnum));
 	itup = &hitem->hash_itup;
 	scan->xs_ctup.t_self = itup->t_tid;
@ -254,8 +254,8 @@ _hash_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir)
 	current = &(scan->currentItemData);

 	buf = *bufP;
+	_hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
 	page = BufferGetPage(buf);
-	_hash_checkpage(rel, page, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
 	opaque = (HashPageOpaque) PageGetSpecialPointer(page);

 	/*
--- a/src/backend/access/hash/hashutil.c
+++ b/src/backend/access/hash/hashutil.c
@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/access/hash/hashutil.c,v 1.42 2005/05/11 01:26:01 neilc Exp $
+ *	  $PostgreSQL: pgsql/src/backend/access/hash/hashutil.c,v 1.43 2005/11/06 19:29:00 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -110,12 +110,50 @@ _hash_log2(uint32 num)
 * _hash_checkpage -- sanity checks on the format of all hash pages
 */
 void
-_hash_checkpage(Relation rel, Page page, int flags)
+_hash_checkpage(Relation rel, Buffer buf, int flags)
 {
-	Assert(page);
+	Page		page = BufferGetPage(buf);

 	/*
-	 * When checking the metapage, always verify magic number and version.
+	 * ReadBuffer verifies that every newly-read page passes PageHeaderIsValid,
+	 * which means it either contains a reasonably sane page header or is
+	 * all-zero.  We have to defend against the all-zero case, however.
+	 */
+	if (PageIsNew(page))
+		ereport(ERROR,
+				(errcode(ERRCODE_INDEX_CORRUPTED),
+				 errmsg("index \"%s\" contains unexpected zero page at block %u",
+						RelationGetRelationName(rel),
+						BufferGetBlockNumber(buf)),
+				 errhint("Please REINDEX it.")));
+
+	/*
+	 * Additionally check that the special area looks sane.
+	 */
+	if (((PageHeader) (page))->pd_special !=
+		   (BLCKSZ - MAXALIGN(sizeof(HashPageOpaqueData))))
+		ereport(ERROR,
+				(errcode(ERRCODE_INDEX_CORRUPTED),
+				 errmsg("index \"%s\" contains corrupted page at block %u",
+						RelationGetRelationName(rel),
+						BufferGetBlockNumber(buf)),
+				 errhint("Please REINDEX it.")));
+
+	if (flags)
+	{
+		HashPageOpaque opaque = (HashPageOpaque) PageGetSpecialPointer(page);
+
+		if ((opaque->hasho_flag & flags) == 0)
+			ereport(ERROR,
+					(errcode(ERRCODE_INDEX_CORRUPTED),
+					 errmsg("index \"%s\" contains corrupted page at block %u",
+							RelationGetRelationName(rel),
+							BufferGetBlockNumber(buf)),
+					 errhint("Please REINDEX it.")));
+	}
+
+	/*
+	 * When checking the metapage, also verify magic number and version.
 	 */
 	if (flags == LH_META_PAGE)
 	{
@ -130,25 +168,8 @@ _hash_checkpage(Relation rel, Page page, int flags)
 		if (metap->hashm_version != HASH_VERSION)
 			ereport(ERROR,
 					(errcode(ERRCODE_INDEX_CORRUPTED),
-					 errmsg("index \"%s\" has wrong hash version", RelationGetRelationName(rel)),
+					 errmsg("index \"%s\" has wrong hash version",
+							RelationGetRelationName(rel)),
 					 errhint("Please REINDEX it.")));
 	}
-
-	/*
-	 * These other checks are for debugging purposes only.
-	 */
-#ifdef USE_ASSERT_CHECKING
-	Assert(((PageHeader) (page))->pd_lower >= SizeOfPageHeaderData);
-	Assert(((PageHeader) (page))->pd_upper <=
-		   (BLCKSZ - MAXALIGN(sizeof(HashPageOpaqueData))));
-	Assert(((PageHeader) (page))->pd_special ==
-		   (BLCKSZ - MAXALIGN(sizeof(HashPageOpaqueData))));
-	Assert(PageGetPageSize(page) == BLCKSZ);
-	if (flags)
-	{
-		HashPageOpaque opaque = (HashPageOpaque) PageGetSpecialPointer(page);
-
-		Assert(opaque->hasho_flag & flags);
-	}
-#endif   /* USE_ASSERT_CHECKING */
 }
--- a/src/backend/access/nbtree/nbtinsert.c
+++ b/src/backend/access/nbtree/nbtinsert.c
@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.127 2005/10/15 02:49:09 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.128 2005/11/06 19:29:00 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -672,7 +672,7 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright,
 	rightpage = BufferGetPage(rbuf);

 	_bt_pageinit(leftpage, BufferGetPageSize(buf));
-	_bt_pageinit(rightpage, BufferGetPageSize(rbuf));
+	/* rightpage was already initialized by _bt_getbuf */

 	/* init btree private data */
 	oopaque = (BTPageOpaque) PageGetSpecialPointer(origpage);
--- a/src/backend/access/nbtree/nbtpage.c
+++ b/src/backend/access/nbtree/nbtpage.c
@ -9,7 +9,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/access/nbtree/nbtpage.c,v 1.88 2005/10/15 02:49:09 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/access/nbtree/nbtpage.c,v 1.89 2005/11/06 19:29:00 tgl Exp $
 *
 *	NOTES
 *	   Postgres btree pages look like ordinary relation pages.	The opaque
@ -222,8 +222,6 @@ _bt_getroot(Relation rel, int access)
 		rootbuf = _bt_getbuf(rel, P_NEW, BT_WRITE);
 		rootblkno = BufferGetBlockNumber(rootbuf);
 		rootpage = BufferGetPage(rootbuf);
-
-		_bt_pageinit(rootpage, BufferGetPageSize(rootbuf));
 		rootopaque = (BTPageOpaque) PageGetSpecialPointer(rootpage);
 		rootopaque->btpo_prev = rootopaque->btpo_next = P_NONE;
 		rootopaque->btpo_flags = (BTP_LEAF | BTP_ROOT);
@ -405,14 +403,50 @@ _bt_gettrueroot(Relation rel)
 	return rootbuf;
 }

+/*
+ *	_bt_checkpage() -- Verify that a freshly-read page looks sane.
+ */
+void
+_bt_checkpage(Relation rel, Buffer buf)
+{
+	Page		page = BufferGetPage(buf);
+
+	/*
+	 * ReadBuffer verifies that every newly-read page passes PageHeaderIsValid,
+	 * which means it either contains a reasonably sane page header or is
+	 * all-zero.  We have to defend against the all-zero case, however.
+	 */
+	if (PageIsNew(page))
+		ereport(ERROR,
+				(errcode(ERRCODE_INDEX_CORRUPTED),
+				 errmsg("index \"%s\" contains unexpected zero page at block %u",
+						RelationGetRelationName(rel),
+						BufferGetBlockNumber(buf)),
+				 errhint("Please REINDEX it.")));
+
+	/*
+	 * Additionally check that the special area looks sane.
+	 */
+	if (((PageHeader) (page))->pd_special !=
+		(BLCKSZ - MAXALIGN(sizeof(BTPageOpaqueData))))
+		ereport(ERROR,
+				(errcode(ERRCODE_INDEX_CORRUPTED),
+				 errmsg("index \"%s\" contains corrupted page at block %u",
+						RelationGetRelationName(rel),
+						BufferGetBlockNumber(buf)),
+				 errhint("Please REINDEX it.")));
+}
+
 /*
 *	_bt_getbuf() -- Get a buffer by block number for read or write.
 *
- *		blkno == P_NEW means to get an unallocated index page.
+ *		blkno == P_NEW means to get an unallocated index page.  The page
+ *		will be initialized before returning it.
 *
 *		When this routine returns, the appropriate lock is set on the
 *		requested buffer and its reference count has been incremented
- *		(ie, the buffer is "locked and pinned").
+ *		(ie, the buffer is "locked and pinned").  Also, we apply
+ *		_bt_checkpage to sanity-check the page (except in P_NEW case).
 */
 Buffer
 _bt_getbuf(Relation rel, BlockNumber blkno, int access)
@ -424,6 +458,7 @@ _bt_getbuf(Relation rel, BlockNumber blkno, int access)
 		/* Read an existing block of the relation */
 		buf = ReadBuffer(rel, blkno);
 		LockBuffer(buf, access);
+		_bt_checkpage(rel, buf);
 	}
 	else
 	{
@ -538,6 +573,7 @@ _bt_relandgetbuf(Relation rel, Buffer obuf, BlockNumber blkno, int access)
 		LockBuffer(obuf, BUFFER_LOCK_UNLOCK);
 	buf = ReleaseAndReadBuffer(obuf, rel, blkno);
 	LockBuffer(buf, access);
+	_bt_checkpage(rel, buf);
 	return buf;
 }

--- a/src/backend/access/nbtree/nbtree.c
+++ b/src/backend/access/nbtree/nbtree.c
@ -12,7 +12,7 @@
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/access/nbtree/nbtree.c,v 1.132 2005/10/15 02:49:09 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/access/nbtree/nbtree.c,v 1.133 2005/11/06 19:29:00 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -734,8 +734,8 @@ btvacuumcleanup(PG_FUNCTION_ARGS)
 	 * buffer and it will be fully initialized before we can examine it.  (See
 	 * also vacuumlazy.c, which has the same issue.)
 	 *
-	 * We can skip locking for new or temp relations, however, since no one else
-	 * could be accessing them.
+	 * We can skip locking for new or temp relations, however, since no one
+	 * else could be accessing them.
 	 */
 	needLock = !RELATION_IS_LOCAL(rel);

@ -772,9 +772,17 @@ btvacuumcleanup(PG_FUNCTION_ARGS)
 		Page		page;
 		BTPageOpaque opaque;

-		buf = _bt_getbuf(rel, blkno, BT_READ);
+		/*
+		 * We can't use _bt_getbuf() here because it always applies
+		 * _bt_checkpage(), which will barf on an all-zero page.
+		 * We want to recycle all-zero pages, not fail.
+		 */
+		buf = ReadBuffer(rel, blkno);
+		LockBuffer(buf, BT_READ);
 		page = BufferGetPage(buf);
 		opaque = (BTPageOpaque) PageGetSpecialPointer(page);
+		if (!PageIsNew(page))
+			_bt_checkpage(rel, buf);
 		if (_bt_page_recyclable(page))
 		{
 			/* Okay to recycle this page */
--- a/src/include/access/hash.h
+++ b/src/include/access/hash.h
@ -7,7 +7,7 @@
 * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $PostgreSQL: pgsql/src/include/access/hash.h,v 1.63 2005/10/15 02:49:42 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/access/hash.h,v 1.64 2005/11/06 19:29:01 tgl Exp $
 *
 * NOTES
 *		modeled after Margo Seltzer's hash implementation for unix.
@ -310,7 +310,7 @@ extern uint32 _hash_datum2hashkey(Relation rel, Datum key);
 extern Bucket _hash_hashkey2bucket(uint32 hashkey, uint32 maxbucket,
 					 uint32 highmask, uint32 lowmask);
 extern uint32 _hash_log2(uint32 num);
-extern void _hash_checkpage(Relation rel, Page page, int flags);
+extern void _hash_checkpage(Relation rel, Buffer buf, int flags);

 /* hash.c */
 extern void hash_redo(XLogRecPtr lsn, XLogRecord *record);
--- a/src/include/access/nbtree.h
+++ b/src/include/access/nbtree.h
@ -7,7 +7,7 @@
 * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $PostgreSQL: pgsql/src/include/access/nbtree.h,v 1.87 2005/10/15 02:49:42 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/access/nbtree.h,v 1.88 2005/11/06 19:29:01 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -429,6 +429,7 @@ extern void _bt_metapinit(Relation rel);
 extern void _bt_initmetapage(Page page, BlockNumber rootbknum, uint32 level);
 extern Buffer _bt_getroot(Relation rel, int access);
 extern Buffer _bt_gettrueroot(Relation rel);
+extern void _bt_checkpage(Relation rel, Buffer buf);
 extern Buffer _bt_getbuf(Relation rel, BlockNumber blkno, int access);
 extern Buffer _bt_relandgetbuf(Relation rel, Buffer obuf,
 				 BlockNumber blkno, int access);