Fix failure to guarantee that a checkpoint will write out pg_clog updates
for transaction commits that occurred just before the checkpoint. This is an EXTREMELY serious bug --- kudos to Satoshi Okada for creating a reproducible test case to prove its existence.
This commit is contained in:
parent
bc8a1fc282
commit
3fdf649f4f
@ -10,7 +10,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/access/transam/xact.c,v 1.177 2004/08/03 15:57:26 tgl Exp $
|
* $PostgreSQL: pgsql/src/backend/access/transam/xact.c,v 1.178 2004/08/11 04:07:15 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -574,13 +574,28 @@ RecordTransactionCommit(void)
|
|||||||
START_CRIT_SECTION();
|
START_CRIT_SECTION();
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We only need to log the commit in XLOG if the transaction made
|
* If our transaction made any transaction-controlled XLOG entries,
|
||||||
* any transaction-controlled XLOG entries or will delete files.
|
* we need to lock out checkpoint start between writing our XLOG
|
||||||
|
* record and updating pg_clog. Otherwise it is possible for the
|
||||||
|
* checkpoint to set REDO after the XLOG record but fail to flush the
|
||||||
|
* pg_clog update to disk, leading to loss of the transaction commit
|
||||||
|
* if we crash a little later. Slightly klugy fix for problem
|
||||||
|
* discovered 2004-08-10.
|
||||||
|
*
|
||||||
* (If it made no transaction-controlled XLOG entries, its XID
|
* (If it made no transaction-controlled XLOG entries, its XID
|
||||||
* appears nowhere in permanent storage, so no one else will ever care
|
* appears nowhere in permanent storage, so no one else will ever care
|
||||||
* if it committed.)
|
* if it committed; so it doesn't matter if we lose the commit flag.)
|
||||||
|
*
|
||||||
|
* Note we only need a shared lock.
|
||||||
*/
|
*/
|
||||||
madeTCentries = (MyLastRecPtr.xrecoff != 0);
|
madeTCentries = (MyLastRecPtr.xrecoff != 0);
|
||||||
|
if (madeTCentries)
|
||||||
|
LWLockAcquire(CheckpointStartLock, LW_SHARED);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We only need to log the commit in XLOG if the transaction made
|
||||||
|
* any transaction-controlled XLOG entries or will delete files.
|
||||||
|
*/
|
||||||
if (madeTCentries || nrels > 0)
|
if (madeTCentries || nrels > 0)
|
||||||
{
|
{
|
||||||
XLogRecData rdata[3];
|
XLogRecData rdata[3];
|
||||||
@ -668,6 +683,10 @@ RecordTransactionCommit(void)
|
|||||||
TransactionIdCommitTree(nchildren, children);
|
TransactionIdCommitTree(nchildren, children);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Unlock checkpoint lock if we acquired it */
|
||||||
|
if (madeTCentries)
|
||||||
|
LWLockRelease(CheckpointStartLock);
|
||||||
|
|
||||||
END_CRIT_SECTION();
|
END_CRIT_SECTION();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -850,6 +869,8 @@ RecordTransactionAbort(void)
|
|||||||
*
|
*
|
||||||
* We do not flush XLOG to disk unless deleting files, since the
|
* We do not flush XLOG to disk unless deleting files, since the
|
||||||
* default assumption after a crash would be that we aborted, anyway.
|
* default assumption after a crash would be that we aborted, anyway.
|
||||||
|
* For the same reason, we don't need to worry about interlocking
|
||||||
|
* against checkpoint start.
|
||||||
*/
|
*/
|
||||||
if (MyLastRecPtr.xrecoff != 0 || nrels > 0)
|
if (MyLastRecPtr.xrecoff != 0 || nrels > 0)
|
||||||
{
|
{
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
|
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
|
||||||
* Portions Copyright (c) 1994, Regents of the University of California
|
* Portions Copyright (c) 1994, Regents of the University of California
|
||||||
*
|
*
|
||||||
* $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.158 2004/08/09 16:26:01 tgl Exp $
|
* $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.159 2004/08/11 04:07:15 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -4699,6 +4699,15 @@ CreateCheckPoint(bool shutdown, bool force)
|
|||||||
checkPoint.ThisTimeLineID = ThisTimeLineID;
|
checkPoint.ThisTimeLineID = ThisTimeLineID;
|
||||||
checkPoint.time = time(NULL);
|
checkPoint.time = time(NULL);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We must hold CheckpointStartLock while determining the checkpoint
|
||||||
|
* REDO pointer. This ensures that any concurrent transaction commits
|
||||||
|
* will be either not yet logged, or logged and recorded in pg_clog.
|
||||||
|
* See notes in RecordTransactionCommit().
|
||||||
|
*/
|
||||||
|
LWLockAcquire(CheckpointStartLock, LW_EXCLUSIVE);
|
||||||
|
|
||||||
|
/* And we need WALInsertLock too */
|
||||||
LWLockAcquire(WALInsertLock, LW_EXCLUSIVE);
|
LWLockAcquire(WALInsertLock, LW_EXCLUSIVE);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -4731,6 +4740,7 @@ CreateCheckPoint(bool shutdown, bool force)
|
|||||||
ControlFile->checkPointCopy.redo.xrecoff)
|
ControlFile->checkPointCopy.redo.xrecoff)
|
||||||
{
|
{
|
||||||
LWLockRelease(WALInsertLock);
|
LWLockRelease(WALInsertLock);
|
||||||
|
LWLockRelease(CheckpointStartLock);
|
||||||
LWLockRelease(CheckpointLock);
|
LWLockRelease(CheckpointLock);
|
||||||
END_CRIT_SECTION();
|
END_CRIT_SECTION();
|
||||||
return;
|
return;
|
||||||
@ -4789,6 +4799,9 @@ CreateCheckPoint(bool shutdown, bool force)
|
|||||||
* GetSnapshotData needs to get XidGenLock while holding SInvalLock,
|
* GetSnapshotData needs to get XidGenLock while holding SInvalLock,
|
||||||
* so there's a risk of deadlock. Need to find a better solution. See
|
* so there's a risk of deadlock. Need to find a better solution. See
|
||||||
* pgsql-hackers discussion of 17-Dec-01.
|
* pgsql-hackers discussion of 17-Dec-01.
|
||||||
|
*
|
||||||
|
* XXX actually, the whole UNDO code is dead code and unlikely to ever
|
||||||
|
* be revived, so the lack of a good solution here is not troubling.
|
||||||
*/
|
*/
|
||||||
#ifdef NOT_USED
|
#ifdef NOT_USED
|
||||||
checkPoint.undo = GetUndoRecPtr();
|
checkPoint.undo = GetUndoRecPtr();
|
||||||
@ -4798,11 +4811,13 @@ CreateCheckPoint(bool shutdown, bool force)
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Now we can release insert lock, allowing other xacts to proceed
|
* Now we can release insert lock and checkpoint start lock, allowing
|
||||||
* even while we are flushing disk buffers.
|
* other xacts to proceed even while we are flushing disk buffers.
|
||||||
*/
|
*/
|
||||||
LWLockRelease(WALInsertLock);
|
LWLockRelease(WALInsertLock);
|
||||||
|
|
||||||
|
LWLockRelease(CheckpointStartLock);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Get the other info we need for the checkpoint record.
|
* Get the other info we need for the checkpoint record.
|
||||||
*/
|
*/
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
|
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
|
||||||
* Portions Copyright (c) 1994, Regents of the University of California
|
* Portions Copyright (c) 1994, Regents of the University of California
|
||||||
*
|
*
|
||||||
* $PostgreSQL: pgsql/src/include/storage/lwlock.h,v 1.12 2004/06/11 16:43:24 tgl Exp $
|
* $PostgreSQL: pgsql/src/include/storage/lwlock.h,v 1.13 2004/08/11 04:07:16 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -36,6 +36,7 @@ typedef enum LWLockId
|
|||||||
WALWriteLock,
|
WALWriteLock,
|
||||||
ControlFileLock,
|
ControlFileLock,
|
||||||
CheckpointLock,
|
CheckpointLock,
|
||||||
|
CheckpointStartLock,
|
||||||
RelCacheInitLock,
|
RelCacheInitLock,
|
||||||
BgWriterCommLock,
|
BgWriterCommLock,
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user