During recovery, if we reach consistent state and still have entries in the
invalid-page hash table, PANIC immediately. Immediate PANIC is much better than waiting for end-of-recovery, which is what we did before, because the end-of-recovery might not come until months later if this is a standby server. Also refrain from creating a restartpoint if there are invalid-page entries in the hash table. Restarting recovery from such a restartpoint would not see the invalid references, and wouldn't be able to cross-check them when consistency is reached. That wouldn't matter when things are going smoothly, but the more sanity checks you have the better. Fujii Masao
This commit is contained in:
parent
15a5006aac
commit
1e616f6391
@ -562,7 +562,7 @@ static TimeLineID lastPageTLI = 0;
|
||||
static XLogRecPtr minRecoveryPoint; /* local copy of
|
||||
* ControlFile->minRecoveryPoint */
|
||||
static bool updateMinRecoveryPoint = true;
|
||||
static bool reachedMinRecoveryPoint = false;
|
||||
bool reachedMinRecoveryPoint = false;
|
||||
|
||||
static bool InRedo = false;
|
||||
|
||||
@ -6758,12 +6758,6 @@ StartupXLOG(void)
|
||||
/* Disallow XLogInsert again */
|
||||
LocalXLogInsertAllowed = -1;
|
||||
|
||||
/*
|
||||
* Check to see if the XLOG sequence contained any unresolved
|
||||
* references to uninitialized pages.
|
||||
*/
|
||||
XLogCheckInvalidPages();
|
||||
|
||||
/*
|
||||
* Perform a checkpoint to update all our recovery activity to disk.
|
||||
*
|
||||
@ -6906,6 +6900,12 @@ CheckRecoveryConsistency(void)
|
||||
XLByteLE(minRecoveryPoint, EndRecPtr) &&
|
||||
XLogRecPtrIsInvalid(ControlFile->backupStartPoint))
|
||||
{
|
||||
/*
|
||||
* Check to see if the XLOG sequence contained any unresolved
|
||||
* references to uninitialized pages.
|
||||
*/
|
||||
XLogCheckInvalidPages();
|
||||
|
||||
reachedMinRecoveryPoint = true;
|
||||
ereport(LOG,
|
||||
(errmsg("consistent recovery state reached at %X/%X",
|
||||
@ -7907,7 +7907,7 @@ RecoveryRestartPoint(const CheckPoint *checkPoint)
|
||||
volatile XLogCtlData *xlogctl = XLogCtl;
|
||||
|
||||
/*
|
||||
* Is it safe to checkpoint? We must ask each of the resource managers
|
||||
* Is it safe to restartpoint? We must ask each of the resource managers
|
||||
* whether they have any partial state information that might prevent a
|
||||
* correct restart from this point. If so, we skip this opportunity, but
|
||||
* return at the next checkpoint record for another try.
|
||||
@ -7926,6 +7926,22 @@ RecoveryRestartPoint(const CheckPoint *checkPoint)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Also refrain from creating a restartpoint if we have seen any references
|
||||
* to non-existent pages. Restarting recovery from the restartpoint would
|
||||
* not see the references, so we would lose the cross-check that the pages
|
||||
* belonged to a relation that was dropped later.
|
||||
*/
|
||||
if (XLogHaveInvalidPages())
|
||||
{
|
||||
elog(trace_recovery(DEBUG2),
|
||||
"could not record restart point at %X/%X because there "
|
||||
"are unresolved references to invalid pages",
|
||||
checkPoint->redo.xlogid,
|
||||
checkPoint->redo.xrecoff);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Copy the checkpoint record to shared memory, so that checkpointer
|
||||
* can work out the next time it wants to perform a restartpoint.
|
||||
|
@ -52,6 +52,22 @@ typedef struct xl_invalid_page
|
||||
static HTAB *invalid_page_tab = NULL;
|
||||
|
||||
|
||||
/* Report a reference to an invalid page */
|
||||
static void
|
||||
report_invalid_page(int elevel, RelFileNode node, ForkNumber forkno,
|
||||
BlockNumber blkno, bool present)
|
||||
{
|
||||
char *path = relpathperm(node, forkno);
|
||||
|
||||
if (present)
|
||||
elog(elevel, "page %u of relation %s is uninitialized",
|
||||
blkno, path);
|
||||
else
|
||||
elog(elevel, "page %u of relation %s does not exist",
|
||||
blkno, path);
|
||||
pfree(path);
|
||||
}
|
||||
|
||||
/* Log a reference to an invalid page */
|
||||
static void
|
||||
log_invalid_page(RelFileNode node, ForkNumber forkno, BlockNumber blkno,
|
||||
@ -61,23 +77,27 @@ log_invalid_page(RelFileNode node, ForkNumber forkno, BlockNumber blkno,
|
||||
xl_invalid_page *hentry;
|
||||
bool found;
|
||||
|
||||
/*
|
||||
* Once recovery has reached a consistent state, the invalid-page table
|
||||
* should be empty and remain so. If a reference to an invalid page is
|
||||
* found after consistency is reached, PANIC immediately. This might
|
||||
* seem aggressive, but it's better than letting the invalid reference
|
||||
* linger in the hash table until the end of recovery and PANIC there,
|
||||
* which might come only much later if this is a standby server.
|
||||
*/
|
||||
if (reachedMinRecoveryPoint)
|
||||
{
|
||||
report_invalid_page(WARNING, node, forkno, blkno, present);
|
||||
elog(PANIC, "WAL contains references to invalid pages");
|
||||
}
|
||||
|
||||
/*
|
||||
* Log references to invalid pages at DEBUG1 level. This allows some
|
||||
* tracing of the cause (note the elog context mechanism will tell us
|
||||
* something about the XLOG record that generated the reference).
|
||||
*/
|
||||
if (log_min_messages <= DEBUG1 || client_min_messages <= DEBUG1)
|
||||
{
|
||||
char *path = relpathperm(node, forkno);
|
||||
|
||||
if (present)
|
||||
elog(DEBUG1, "page %u of relation %s is uninitialized",
|
||||
blkno, path);
|
||||
else
|
||||
elog(DEBUG1, "page %u of relation %s does not exist",
|
||||
blkno, path);
|
||||
pfree(path);
|
||||
}
|
||||
report_invalid_page(DEBUG1, node, forkno, blkno, present);
|
||||
|
||||
if (invalid_page_tab == NULL)
|
||||
{
|
||||
@ -181,6 +201,16 @@ forget_invalid_pages_db(Oid dbid)
|
||||
}
|
||||
}
|
||||
|
||||
/* Are there any unresolved references to invalid pages? */
|
||||
bool
|
||||
XLogHaveInvalidPages(void)
|
||||
{
|
||||
if (invalid_page_tab != NULL &&
|
||||
hash_get_num_entries(invalid_page_tab) > 0)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Complain about any remaining invalid-page entries */
|
||||
void
|
||||
XLogCheckInvalidPages(void)
|
||||
@ -200,15 +230,8 @@ XLogCheckInvalidPages(void)
|
||||
*/
|
||||
while ((hentry = (xl_invalid_page *) hash_seq_search(&status)) != NULL)
|
||||
{
|
||||
char *path = relpathperm(hentry->key.node, hentry->key.forkno);
|
||||
|
||||
if (hentry->present)
|
||||
elog(WARNING, "page %u of relation %s was uninitialized",
|
||||
hentry->key.blkno, path);
|
||||
else
|
||||
elog(WARNING, "page %u of relation %s did not exist",
|
||||
hentry->key.blkno, path);
|
||||
pfree(path);
|
||||
report_invalid_page(WARNING, hentry->key.node, hentry->key.forkno,
|
||||
hentry->key.blkno, hentry->present);
|
||||
foundone = true;
|
||||
}
|
||||
|
||||
|
@ -190,6 +190,8 @@ typedef enum
|
||||
|
||||
extern XLogRecPtr XactLastRecEnd;
|
||||
|
||||
extern bool reachedMinRecoveryPoint;
|
||||
|
||||
/* these variables are GUC parameters related to XLOG */
|
||||
extern int CheckPointSegments;
|
||||
extern int wal_keep_segments;
|
||||
|
@ -14,6 +14,7 @@
|
||||
#include "storage/bufmgr.h"
|
||||
|
||||
|
||||
extern bool XLogHaveInvalidPages(void);
|
||||
extern void XLogCheckInvalidPages(void);
|
||||
|
||||
extern void XLogDropRelation(RelFileNode rnode, ForkNumber forknum);
|
||||
|
Loading…
Reference in New Issue
Block a user