diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index 5c8cdfb9b2..56a8965e34 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -8193,8 +8193,10 @@ log_heap_freeze(Relation reln, Buffer buffer, TransactionId cutoff_xid, * corresponding visibility map block. Both should have already been modified * and dirtied. * - * If checksums are enabled, we also generate a full-page image of - * heap_buffer, if necessary. + * If checksums or wal_log_hints are enabled, we may also generate a full-page + * image of heap_buffer. Otherwise, we optimize away the FPI (by specifying + * REGBUF_NO_IMAGE for the heap buffer), in which case the caller should *not* + * update the heap page's LSN. */ XLogRecPtr log_heap_visible(RelFileLocator rlocator, Buffer heap_buffer, Buffer vm_buffer, diff --git a/src/backend/access/heap/visibilitymap.c b/src/backend/access/heap/visibilitymap.c index d62761728b..4ed70275e2 100644 --- a/src/backend/access/heap/visibilitymap.c +++ b/src/backend/access/heap/visibilitymap.c @@ -223,13 +223,13 @@ visibilitymap_pin_ok(BlockNumber heapBlk, Buffer vmbuf) * visibilitymap_set - set bit(s) on a previously pinned page * * recptr is the LSN of the XLOG record we're replaying, if we're in recovery, - * or InvalidXLogRecPtr in normal running. The page LSN is advanced to the + * or InvalidXLogRecPtr in normal running. The VM page LSN is advanced to the * one provided; in normal running, we generate a new XLOG record and set the - * page LSN to that value. cutoff_xid is the largest xmin on the page being - * marked all-visible; it is needed for Hot Standby, and can be - * InvalidTransactionId if the page contains no tuples. It can also be set - * to InvalidTransactionId when a page that is already all-visible is being - * marked all-frozen. + * page LSN to that value (though the heap page's LSN may *not* be updated; + * see below). cutoff_xid is the largest xmin on the page being marked + * all-visible; it is needed for Hot Standby, and can be InvalidTransactionId + * if the page contains no tuples. It can also be set to InvalidTransactionId + * when a page that is already all-visible is being marked all-frozen. * * Caller is expected to set the heap page's PD_ALL_VISIBLE bit before calling * this function. Except in recovery, caller should also pass the heap @@ -289,6 +289,11 @@ visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf, /* * If data checksums are enabled (or wal_log_hints=on), we * need to protect the heap page from being torn. + * + * If not, then we must *not* update the heap page's LSN. In + * this case, the FPI for the heap page was omitted from the + * WAL record inserted above, so it would be incorrect to + * update the heap page's LSN. */ if (XLogHintBitIsNeeded()) { diff --git a/src/backend/access/transam/README b/src/backend/access/transam/README index 72af656060..22c8ae9755 100644 --- a/src/backend/access/transam/README +++ b/src/backend/access/transam/README @@ -645,6 +645,23 @@ If you do decide to optimise away a WAL record, then any calls to MarkBufferDirty() must be replaced by MarkBufferDirtyHint(), otherwise you will expose the risk of partial page writes. +The all-visible hint in a heap page (PD_ALL_VISIBLE) is a special +case, because it is treated like a durable change in some respects and +a hint in other respects. It must satisfy the invariant that, if a +heap page's associated visibilitymap (VM) bit is set, then +PD_ALL_VISIBLE is set on the heap page itself. Clearing of +PD_ALL_VISIBLE is always treated like a fully-durable change to +maintain this invariant. Additionally, if checksums or wal_log_hints +are enabled, setting PD_ALL_VISIBLE is also treated like a +fully-durable change to protect against torn pages. + +But, if neither checksums nor wal_log_hints are enabled, torn pages +are of no consequence if the only change is to PD_ALL_VISIBLE; so no +full heap page image is taken, and the heap page's LSN is not +updated. NB: it would be incorrect to update the heap page's LSN when +applying this optimization, even though there is an associated WAL +record, because subsequent modifiers (e.g. an unrelated UPDATE) of the +page may falsely believe that a full page image is not required. Write-Ahead Logging for Filesystem Actions ------------------------------------------