Revert "WAL-log inplace update before revealing it to other sessions."

This reverts commit bfd5c6e279c8e1702eea882439dc7ebdf4d4b3a5 (v17) and
counterparts in each other non-master branch.  This unblocks reverting a
commit on which it depends.

Discussion: https://postgr.es/m/10ec0bc3-5933-1189-6bb8-5dec4114558e@gmail.com
This commit is contained in:
Noah Misch 2024-11-02 09:04:59 -07:00
parent b165e71060
commit 9a1c73636d
3 changed files with 18 additions and 46 deletions

View File

@ -203,4 +203,6 @@ Inplace updates create an exception to the rule that tuple data won't change
under a reader holding a pin. A reader of a heap_fetch() result tuple may
witness a torn read. Current inplace-updated fields are aligned and are no
wider than four bytes, and current readers don't need consistency across
fields. Hence, they get by with just fetching each field once.
fields. Hence, they get by with just fetching each field once. XXX such a
caller may also read a value that has not reached WAL; see
systable_inplace_update_finish().

View File

@ -6380,8 +6380,6 @@ heap_inplace_update_and_unlock(Relation relation,
HeapTupleHeader htup = oldtup->t_data;
uint32 oldlen;
uint32 newlen;
char *dst;
char *src;
Assert(ItemPointerEquals(&oldtup->t_self, &tuple->t_self));
oldlen = oldtup->t_len - htup->t_hoff;
@ -6389,9 +6387,6 @@ heap_inplace_update_and_unlock(Relation relation,
if (oldlen != newlen || htup->t_hoff != tuple->t_data->t_hoff)
elog(ERROR, "wrong tuple length");
dst = (char *) htup + htup->t_hoff;
src = (char *) tuple->t_data + tuple->t_data->t_hoff;
/*
* Construct shared cache inval if necessary. Note that because we only
* pass the new version of the tuple, this mustn't be used for any
@ -6410,15 +6405,15 @@ heap_inplace_update_and_unlock(Relation relation,
*/
PreInplace_Inval();
/* NO EREPORT(ERROR) from here till changes are logged */
START_CRIT_SECTION();
memcpy((char *) htup + htup->t_hoff,
(char *) tuple->t_data + tuple->t_data->t_hoff,
newlen);
/*----------
* NO EREPORT(ERROR) from here till changes are complete
*
* Our buffer lock won't stop a reader having already pinned and checked
* visibility for this tuple. Hence, we write WAL first, then mutate the
* buffer. Like in MarkBufferDirtyHint() or RecordTransactionCommit(),
* checkpoint delay makes that acceptable. With the usual order of
* changes, a crash after memcpy() and before XLogInsert() could allow
* datfrozenxid to overtake relfrozenxid:
* XXX A crash here can allow datfrozenxid() to get ahead of relfrozenxid:
*
* ["D" is a VACUUM (ONLY_DATABASE_STATS)]
* ["R" is a VACUUM tbl]
@ -6428,28 +6423,14 @@ heap_inplace_update_and_unlock(Relation relation,
* D: raise pg_database.datfrozenxid, XLogInsert(), finish
* [crash]
* [recovery restores datfrozenxid w/o relfrozenxid]
*
* Like in MarkBufferDirtyHint() subroutine XLogSaveBufferForHint(), copy
* the buffer to the stack before logging. Here, that facilitates a FPI
* of the post-mutation block before we accept other sessions seeing it.
*/
Assert(!MyProc->delayChkpt);
START_CRIT_SECTION();
MyProc->delayChkpt = true;
MarkBufferDirty(buffer);
/* XLOG stuff */
if (RelationNeedsWAL(relation))
{
xl_heap_inplace xlrec;
PGAlignedBlock copied_buffer;
char *origdata = (char *) BufferGetBlock(buffer);
Page page = BufferGetPage(buffer);
uint16 lower = ((PageHeader) page)->pd_lower;
uint16 upper = ((PageHeader) page)->pd_upper;
uintptr_t dst_offset_in_block;
RelFileNode rnode;
ForkNumber forkno;
BlockNumber blkno;
XLogRecPtr recptr;
xlrec.offnum = ItemPointerGetOffsetNumber(&tuple->t_self);
@ -6457,28 +6438,16 @@ heap_inplace_update_and_unlock(Relation relation,
XLogBeginInsert();
XLogRegisterData((char *) &xlrec, SizeOfHeapInplace);
/* register block matching what buffer will look like after changes */
memcpy(copied_buffer.data, origdata, lower);
memcpy(copied_buffer.data + upper, origdata + upper, BLCKSZ - upper);
dst_offset_in_block = dst - origdata;
memcpy(copied_buffer.data + dst_offset_in_block, src, newlen);
BufferGetTag(buffer, &rnode, &forkno, &blkno);
Assert(forkno == MAIN_FORKNUM);
XLogRegisterBlock(0, &rnode, forkno, blkno, copied_buffer.data,
REGBUF_STANDARD);
XLogRegisterBufData(0, src, newlen);
XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
XLogRegisterBufData(0, (char *) htup + htup->t_hoff, newlen);
/* inplace updates aren't decoded atm, don't log the origin */
recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_INPLACE);
PageSetLSN(page, recptr);
PageSetLSN(BufferGetPage(buffer), recptr);
}
memcpy(dst, src, newlen);
MarkBufferDirty(buffer);
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
/*
@ -6491,7 +6460,6 @@ heap_inplace_update_and_unlock(Relation relation,
*/
AtInplace_Inval();
MyProc->delayChkpt = false;
END_CRIT_SECTION();
UnlockTuple(relation, &tuple->t_self, InplaceUpdateTupleLock);

View File

@ -275,6 +275,8 @@ XLogRegisterBlock(uint8 block_id, RelFileNode *rnode, ForkNumber forknum,
{
registered_buffer *regbuf;
/* This is currently only used to WAL-log a full-page image of a page */
Assert(flags & REGBUF_FORCE_IMAGE);
Assert(begininsert_called);
if (block_id >= max_registered_block_id)