Rearrange lazy_scan_heap to avoid visibility map race conditions.

We must set the visibility map bit before releasing our exclusive lock
on the heap page; otherwise, someone might clear the heap page bit
before we set the visibility map bit, leading to a situation where the
visibility map thinks the page is all-visible but it's really not.

This problem has existed since 8.4, but it wasn't critical before we
had index-only scans, since the worst case scenario was that the page
wouldn't get vacuumed until the next scan_all vacuum.

Along the way, a couple of minor, related improvements: (1) if we
pause the heap scan to do an index vac cycle, release any visibility
map page we're holding, since really long-running pins are not good
for a variety of reasons; and (2) warn if we see a page that's marked
all-visible in the visibility map but not on the page level, since
that should never happen any more (it was allowed in previous
releases, but not in 9.2).
This commit is contained in:
Robert Haas 2012-04-23 22:08:06 -04:00
parent 85efd5f065
commit 7ab9b2f3b7

View File

@ -490,6 +490,18 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
if ((vacrelstats->max_dead_tuples - vacrelstats->num_dead_tuples) < MaxHeapTuplesPerPage &&
vacrelstats->num_dead_tuples > 0)
{
/*
* Before beginning index vacuuming, we release any pin we may hold
* on the visibility map page. This isn't necessary for correctness,
* but we do it anyway to avoid holding the pin across a lengthy,
* unrelated operation.
*/
if (BufferIsValid(vmbuffer))
{
ReleaseBuffer(vmbuffer);
vmbuffer = InvalidBuffer;
}
/* Log cleanup info before we touch indexes */
vacuum_log_cleanup_info(onerel, vacrelstats);
@ -510,6 +522,16 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
vacrelstats->num_index_scans++;
}
/*
* Pin the visibility map page in case we need to mark the page
* all-visible. In most cases this will be very cheap, because we'll
* already have the correct page pinned anyway. However, it's possible
* that (a) next_not_all_visible_block is covered by a different VM page
* than the current block or (b) we released our pin and did a cycle of
* index vacuuming.
*/
visibilitymap_pin(onerel, blkno, &vmbuffer);
buf = ReadBufferExtended(onerel, MAIN_FORKNUM, blkno,
RBM_NORMAL, vac_strategy);
@ -600,26 +622,15 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
empty_pages++;
freespace = PageGetHeapFreeSpace(page);
/* empty pages are always all-visible */
if (!PageIsAllVisible(page))
{
PageSetAllVisible(page);
MarkBufferDirty(buf);
visibilitymap_set(onerel, blkno, InvalidXLogRecPtr, vmbuffer);
}
LockBuffer(buf, BUFFER_LOCK_UNLOCK);
/* Update the visibility map */
if (!all_visible_according_to_vm)
{
visibilitymap_pin(onerel, blkno, &vmbuffer);
LockBuffer(buf, BUFFER_LOCK_SHARE);
if (PageIsAllVisible(page))
visibilitymap_set(onerel, blkno, InvalidXLogRecPtr,
vmbuffer);
LockBuffer(buf, BUFFER_LOCK_UNLOCK);
}
ReleaseBuffer(buf);
UnlockReleaseBuffer(buf);
RecordPageWithFreeSpace(onerel, blkno, freespace);
continue;
}
@ -834,11 +845,26 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
freespace = PageGetHeapFreeSpace(page);
/* Update the all-visible flag on the page */
if (!PageIsAllVisible(page) && all_visible)
/* mark page all-visible, if appropriate */
if (all_visible && !all_visible_according_to_vm)
{
PageSetAllVisible(page);
MarkBufferDirty(buf);
if (!PageIsAllVisible(page))
{
PageSetAllVisible(page);
MarkBufferDirty(buf);
}
visibilitymap_set(onerel, blkno, InvalidXLogRecPtr, vmbuffer);
}
/*
* As of PostgreSQL 9.2, the visibility map bit should never be set if
* the page-level bit is clear.
*/
else if (all_visible_according_to_vm && !PageIsAllVisible(page))
{
elog(WARNING, "page is not marked all-visible but visibility map bit is set in relation \"%s\" page %u",
relname, blkno);
visibilitymap_clear(onerel, blkno, vmbuffer);
}
/*
@ -859,30 +885,11 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
elog(WARNING, "page containing dead tuples is marked as all-visible in relation \"%s\" page %u",
relname, blkno);
PageClearAllVisible(page);
SetBufferCommitInfoNeedsSave(buf);
/*
* Normally, we would drop the lock on the heap page before
* updating the visibility map, but since this case shouldn't
* happen anyway, don't worry about that.
*/
visibilitymap_pin(onerel, blkno, &vmbuffer);
MarkBufferDirty(buf);
visibilitymap_clear(onerel, blkno, vmbuffer);
}
LockBuffer(buf, BUFFER_LOCK_UNLOCK);
/* Update the visibility map */
if (!all_visible_according_to_vm && all_visible)
{
visibilitymap_pin(onerel, blkno, &vmbuffer);
LockBuffer(buf, BUFFER_LOCK_SHARE);
if (PageIsAllVisible(page))
visibilitymap_set(onerel, blkno, InvalidXLogRecPtr, vmbuffer);
LockBuffer(buf, BUFFER_LOCK_UNLOCK);
}
ReleaseBuffer(buf);
UnlockReleaseBuffer(buf);
/* Remember the location of the last page with nonremovable tuples */
if (hastup)