Fix backup manifests to generate correct WAL-Ranges across timelines
In a backup manifest, WAL-Ranges stores the range of WAL that is required for the backup to be valid. pg_verifybackup would then internally use pg_waldump for the checks based on this data. When the timeline where the backup started was more than 1 with a history file looked at for the manifest data generation, the calculation of the WAL range for the first timeline to check was incorrect. The previous logic used as start LSN the start position of the first timeline, but it needs to use the start LSN of the backup. This would cause failures with pg_verifybackup, or any tools making use of the backup manifests. This commit adds a test based on a logic using a self-promoted node, making it rather cheap. Author: Kyotaro Horiguchi Discussion: https://postgr.es/m/20210818.143031.1867083699202617521.horikyota.ntt@gmail.com Backpatch-through: 13
This commit is contained in:
parent
8f6a52196a
commit
65b649fecb
@ -251,11 +251,18 @@ AddWALInfoToBackupManifest(backup_manifest_info *manifest, XLogRecPtr startptr,
|
||||
errmsg("expected end timeline %u but found timeline %u",
|
||||
starttli, entry->tli));
|
||||
|
||||
if (!XLogRecPtrIsInvalid(entry->begin))
|
||||
tl_beginptr = entry->begin;
|
||||
/*
|
||||
* If this timeline entry matches with the timeline on which the
|
||||
* backup started, WAL needs to be checked from the start LSN of the
|
||||
* backup. If this entry refers to a newer timeline, WAL needs to be
|
||||
* checked since the beginning of this timeline, so use the LSN where
|
||||
* the timeline began.
|
||||
*/
|
||||
if (starttli == entry->tli)
|
||||
tl_beginptr = startptr;
|
||||
else
|
||||
{
|
||||
tl_beginptr = startptr;
|
||||
tl_beginptr = entry->begin;
|
||||
|
||||
/*
|
||||
* If we reach a TLI that has no valid beginning LSN, there can't
|
||||
@ -263,7 +270,7 @@ AddWALInfoToBackupManifest(backup_manifest_info *manifest, XLogRecPtr startptr,
|
||||
* better have arrived at the expected starting TLI. If not,
|
||||
* something's gone horribly wrong.
|
||||
*/
|
||||
if (starttli != entry->tli)
|
||||
if (XLogRecPtrIsInvalid(entry->begin))
|
||||
ereport(ERROR,
|
||||
errmsg("expected start timeline %u but found timeline %u",
|
||||
starttli, entry->tli));
|
||||
|
@ -10,7 +10,7 @@ use Config;
|
||||
use File::Path qw(rmtree);
|
||||
use PostgresNode;
|
||||
use TestLib;
|
||||
use Test::More tests => 7;
|
||||
use Test::More tests => 9;
|
||||
|
||||
# Start up the server and take a backup.
|
||||
my $primary = get_new_node('primary');
|
||||
@ -59,3 +59,20 @@ command_fails_like(
|
||||
[ 'pg_verifybackup', $backup_path ],
|
||||
qr/WAL parsing failed for timeline 1/,
|
||||
'corrupt WAL file causes failure');
|
||||
|
||||
# Check that WAL-Ranges has correct values with a history file and
|
||||
# a timeline > 1. Rather than plugging in a new standby, do a
|
||||
# self-promotion of this node.
|
||||
$primary->stop;
|
||||
$primary->append_conf('standby.signal');
|
||||
$primary->start;
|
||||
$primary->promote;
|
||||
$primary->safe_psql('postgres', 'SELECT pg_switch_wal()');
|
||||
my $backup_path2 = $primary->backup_dir . '/test_tli';
|
||||
# The base backup run below does a checkpoint, that removes the first segment
|
||||
# of the current timeline.
|
||||
$primary->command_ok([ 'pg_basebackup', '-D', $backup_path2, '--no-sync' ],
|
||||
"base backup 2 ok");
|
||||
command_ok(
|
||||
[ 'pg_verifybackup', $backup_path2 ],
|
||||
'valid base backup with timeline > 1');
|
||||
|
Loading…
x
Reference in New Issue
Block a user