Fix nbtree metapage cache upgrade bug.

Commit 857f9c36cda, which taught nbtree VACUUM to avoid unnecessary
index scans, bumped the nbtree version number from 2 to 3, while adding
the ability for nbtree indexes to be upgraded on-the-fly.  Various
assertions that assumed that an nbtree index was always on version 2 had
to be changed to accept any supported version (version 2 or 3 on
Postgres 11).

However, a few assertions were missed in the initial commit, all of
which were in code paths that cache a local copy of the metapage
metadata, where the index had been expected to be on the current version
(no longer version 2) as a generic sanity check.  Rather than simply
update the assertions, follow-up commit 0a64b45152b intentionally made
the metapage caching code update the per-backend cached metadata version
without changing the on-disk version at the same time.  This could even
happen when the planner needed to determine the height of a B-Tree for
costing purposes.  The assertions only fail on Postgres v12 when
upgrading from v10, because they were adjusted to use the authoritative
shared memory metapage by v12's commit dd299df8.

To fix, remove the cache-only upgrade mechanism entirely, and update the
assertions themselves to accept any supported version (go back to using
the cached version in v12).  The fix is almost a full revert of commit
0a64b45152b on the v11 branch.

VACUUM only considers the authoritative metapage, and never bothers with
a locally cached version, whereas everywhere else isn't interested in
the metapage fields that were added by commit 857f9c36cda.  It seems
unlikely that this bug has affected any user on v11.

Reported-By: Christoph Berg
Bug: #15896
Discussion: https://postgr.es/m/15896-5b25e260fdb0b081%40postgresql.org
Backpatch: 11-, where VACUUM was taught to avoid unnecessary index scans.
This commit is contained in:
Peter Geoghegan 2019-07-18 13:22:53 -07:00
parent bcafb675a0
commit 6523f2ed34
3 changed files with 13 additions and 45 deletions

View File

@ -33,7 +33,6 @@
#include "storage/predicate.h" #include "storage/predicate.h"
#include "utils/snapmgr.h" #include "utils/snapmgr.h"
static void _bt_cachemetadata(Relation rel, BTMetaPageData *metad);
static bool _bt_mark_page_halfdead(Relation rel, Buffer buf, BTStack stack); static bool _bt_mark_page_halfdead(Relation rel, Buffer buf, BTStack stack);
static bool _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, static bool _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf,
bool *rightsib_empty); bool *rightsib_empty);
@ -106,44 +105,6 @@ _bt_upgrademetapage(Page page)
((char *) metad + sizeof(BTMetaPageData)) - (char *) page; ((char *) metad + sizeof(BTMetaPageData)) - (char *) page;
} }
/*
* Cache metadata from meta page to rel->rd_amcache.
*/
static void
_bt_cachemetadata(Relation rel, BTMetaPageData *metad)
{
/* We assume rel->rd_amcache was already freed by caller */
Assert(rel->rd_amcache == NULL);
rel->rd_amcache = MemoryContextAlloc(rel->rd_indexcxt,
sizeof(BTMetaPageData));
/*
* Meta page should be of supported version (should be already checked by
* caller).
*/
Assert(metad->btm_version >= BTREE_MIN_VERSION &&
metad->btm_version <= BTREE_VERSION);
if (metad->btm_version == BTREE_VERSION)
{
/* Last version of meta-data, no need to upgrade */
memcpy(rel->rd_amcache, metad, sizeof(BTMetaPageData));
}
else
{
BTMetaPageData *cached_metad = (BTMetaPageData *) rel->rd_amcache;
/*
* Upgrade meta-data: copy available information from meta-page and
* fill new fields with default values.
*/
memcpy(rel->rd_amcache, metad, offsetof(BTMetaPageData, btm_oldest_btpo_xact));
cached_metad->btm_version = BTREE_VERSION;
cached_metad->btm_oldest_btpo_xact = InvalidTransactionId;
cached_metad->btm_last_cleanup_num_heap_tuples = -1.0;
}
}
/* /*
* _bt_update_meta_cleanup_info() -- Update cleanup-related information in * _bt_update_meta_cleanup_info() -- Update cleanup-related information in
* the metapage. * the metapage.
@ -442,7 +403,9 @@ _bt_getroot(Relation rel, int access)
/* /*
* Cache the metapage data for next time * Cache the metapage data for next time
*/ */
_bt_cachemetadata(rel, metad); rel->rd_amcache = MemoryContextAlloc(rel->rd_indexcxt,
sizeof(BTMetaPageData));
memcpy(rel->rd_amcache, metad, sizeof(BTMetaPageData));
/* /*
* We are done with the metapage; arrange to release it via first * We are done with the metapage; arrange to release it via first
@ -641,15 +604,18 @@ _bt_getrootheight(Relation rel)
/* /*
* Cache the metapage data for next time * Cache the metapage data for next time
*/ */
_bt_cachemetadata(rel, metad); rel->rd_amcache = MemoryContextAlloc(rel->rd_indexcxt,
sizeof(BTMetaPageData));
memcpy(rel->rd_amcache, metad, sizeof(BTMetaPageData));
_bt_relbuf(rel, metabuf); _bt_relbuf(rel, metabuf);
} }
/* Get cached page */
metad = (BTMetaPageData *) rel->rd_amcache; metad = (BTMetaPageData *) rel->rd_amcache;
/* We shouldn't have cached it if any of these fail */ /* We shouldn't have cached it if any of these fail */
Assert(metad->btm_magic == BTREE_MAGIC); Assert(metad->btm_magic == BTREE_MAGIC);
Assert(metad->btm_version == BTREE_VERSION); Assert(metad->btm_version >= BTREE_MIN_VERSION);
Assert(metad->btm_version <= BTREE_VERSION);
Assert(metad->btm_fastroot != P_NONE); Assert(metad->btm_fastroot != P_NONE);
return metad->btm_fastlevel; return metad->btm_fastlevel;

View File

@ -108,6 +108,8 @@ _bt_restore_meta(XLogReaderState *record, uint8 block_id)
md->btm_level = xlrec->level; md->btm_level = xlrec->level;
md->btm_fastroot = xlrec->fastroot; md->btm_fastroot = xlrec->fastroot;
md->btm_fastlevel = xlrec->fastlevel; md->btm_fastlevel = xlrec->fastlevel;
/* Cannot log BTREE_MIN_VERSION index metapage without upgrade */
Assert(md->btm_version == BTREE_VERSION);
md->btm_oldest_btpo_xact = xlrec->oldest_btpo_xact; md->btm_oldest_btpo_xact = xlrec->oldest_btpo_xact;
md->btm_last_cleanup_num_heap_tuples = xlrec->last_cleanup_num_heap_tuples; md->btm_last_cleanup_num_heap_tuples = xlrec->last_cleanup_num_heap_tuples;

View File

@ -97,12 +97,12 @@ typedef BTPageOpaqueData *BTPageOpaque;
typedef struct BTMetaPageData typedef struct BTMetaPageData
{ {
uint32 btm_magic; /* should contain BTREE_MAGIC */ uint32 btm_magic; /* should contain BTREE_MAGIC */
uint32 btm_version; /* should contain BTREE_VERSION */ uint32 btm_version; /* nbtree version (always <= BTREE_VERSION) */
BlockNumber btm_root; /* current root location */ BlockNumber btm_root; /* current root location */
uint32 btm_level; /* tree level of the root page */ uint32 btm_level; /* tree level of the root page */
BlockNumber btm_fastroot; /* current "fast" root location */ BlockNumber btm_fastroot; /* current "fast" root location */
uint32 btm_fastlevel; /* tree level of the "fast" root page */ uint32 btm_fastlevel; /* tree level of the "fast" root page */
/* following fields are available since page version 3 */ /* remaining fields only valid when btm_version is BTREE_VERSION */
TransactionId btm_oldest_btpo_xact; /* oldest btpo_xact among all deleted TransactionId btm_oldest_btpo_xact; /* oldest btpo_xact among all deleted
* pages */ * pages */
float8 btm_last_cleanup_num_heap_tuples; /* number of heap tuples float8 btm_last_cleanup_num_heap_tuples; /* number of heap tuples