In basebackup.c, refactor to create read_file_data_into_buffer.
This further reduces the length and complexity of sendFile(), hopefully make it easier to understand and modify. In addition to moving some logic into a new function, I took this opportunity to make a few slight adjustments to sendFile() itself, including renaming the 'len' variable to 'bytes_done', since we use it to represent the number of bytes we've already handled so far, not the total length of the file. Patch by me, reviewed by David Steele. Discussion: http://postgr.es/m/CA+TgmoYt5jXH4U6cu1dm9Oe2FTn1aae6hBNhZzJJjyjbE_zYig@mail.gmail.com
This commit is contained in:
parent
053183138a
commit
c2ba3fdea5
@ -83,6 +83,12 @@ static int64 sendDir(bbsink *sink, const char *path, int basepathlen, bool sizeo
|
||||
static bool sendFile(bbsink *sink, const char *readfilename, const char *tarfilename,
|
||||
struct stat *statbuf, bool missing_ok, Oid dboid,
|
||||
backup_manifest_info *manifest, const char *spcoid);
|
||||
static off_t read_file_data_into_buffer(bbsink *sink,
|
||||
const char *readfilename, int fd,
|
||||
off_t offset, size_t length,
|
||||
BlockNumber blkno,
|
||||
bool verify_checksum,
|
||||
int *checksum_failures);
|
||||
static bool verify_page_checksum(Page page, XLogRecPtr start_lsn,
|
||||
BlockNumber blkno,
|
||||
uint16 *expected_checksum);
|
||||
@ -1490,9 +1496,7 @@ sendFile(bbsink *sink, const char *readfilename, const char *tarfilename,
|
||||
BlockNumber blkno = 0;
|
||||
int checksum_failures = 0;
|
||||
off_t cnt;
|
||||
int i;
|
||||
pgoff_t len = 0;
|
||||
char *page;
|
||||
pgoff_t bytes_done = 0;
|
||||
int segmentno = 0;
|
||||
char *segmentpath;
|
||||
bool verify_checksum = false;
|
||||
@ -1514,6 +1518,12 @@ sendFile(bbsink *sink, const char *readfilename, const char *tarfilename,
|
||||
|
||||
_tarWriteHeader(sink, tarfilename, NULL, statbuf, false);
|
||||
|
||||
/*
|
||||
* Checksums are verified in multiples of BLCKSZ, so the buffer length
|
||||
* should be a multiple of the block size as well.
|
||||
*/
|
||||
Assert((sink->bbs_buffer_length % BLCKSZ) == 0);
|
||||
|
||||
if (!noverify_checksums && DataChecksumsEnabled())
|
||||
{
|
||||
char *filename;
|
||||
@ -1551,23 +1561,21 @@ sendFile(bbsink *sink, const char *readfilename, const char *tarfilename,
|
||||
* for a base backup we can ignore such extended data. It will be restored
|
||||
* from WAL.
|
||||
*/
|
||||
while (len < statbuf->st_size)
|
||||
while (bytes_done < statbuf->st_size)
|
||||
{
|
||||
size_t remaining = statbuf->st_size - len;
|
||||
size_t remaining = statbuf->st_size - bytes_done;
|
||||
|
||||
/* Try to read some more data. */
|
||||
cnt = basebackup_read_file(fd, sink->bbs_buffer,
|
||||
Min(sink->bbs_buffer_length, remaining),
|
||||
len, readfilename, true);
|
||||
cnt = read_file_data_into_buffer(sink, readfilename, fd, bytes_done,
|
||||
remaining,
|
||||
blkno + segmentno * RELSEG_SIZE,
|
||||
verify_checksum,
|
||||
&checksum_failures);
|
||||
|
||||
/*
|
||||
* The checksums are verified at block level, so we iterate over the
|
||||
* buffer in chunks of BLCKSZ, after making sure that
|
||||
* TAR_SEND_SIZE/buf is divisible by BLCKSZ and we read a multiple of
|
||||
* BLCKSZ bytes.
|
||||
* If the amount of data we were able to read was not a multiple of
|
||||
* BLCKSZ, we cannot verify checksums, which are block-level.
|
||||
*/
|
||||
Assert((sink->bbs_buffer_length % BLCKSZ) == 0);
|
||||
|
||||
if (verify_checksum && (cnt % BLCKSZ != 0))
|
||||
{
|
||||
ereport(WARNING,
|
||||
@ -1578,84 +1586,6 @@ sendFile(bbsink *sink, const char *readfilename, const char *tarfilename,
|
||||
verify_checksum = false;
|
||||
}
|
||||
|
||||
if (verify_checksum)
|
||||
{
|
||||
for (i = 0; i < cnt / BLCKSZ; i++)
|
||||
{
|
||||
int reread_cnt;
|
||||
uint16 expected_checksum;
|
||||
|
||||
page = sink->bbs_buffer + BLCKSZ * i;
|
||||
|
||||
/* If the page is OK, go on to the next one. */
|
||||
if (verify_page_checksum(page, sink->bbs_state->startptr,
|
||||
blkno + i + segmentno * RELSEG_SIZE,
|
||||
&expected_checksum))
|
||||
continue;
|
||||
|
||||
/*
|
||||
* Retry the block on the first failure. It's possible that
|
||||
* we read the first 4K page of the block just before postgres
|
||||
* updated the entire block so it ends up looking torn to us.
|
||||
* If, before we retry the read, the concurrent write of the
|
||||
* block finishes, the page LSN will be updated and we'll
|
||||
* realize that we should ignore this block.
|
||||
*
|
||||
* There's no guarantee that this will actually happen,
|
||||
* though: the torn write could take an arbitrarily long time
|
||||
* to complete. Retrying multiple times wouldn't fix this
|
||||
* problem, either, though it would reduce the chances of it
|
||||
* happening in practice. The only real fix here seems to be
|
||||
* to have some kind of interlock that allows us to wait until
|
||||
* we can be certain that no write to the block is in
|
||||
* progress. Since we don't have any such thing right now, we
|
||||
* just do this and hope for the best.
|
||||
*/
|
||||
reread_cnt =
|
||||
basebackup_read_file(fd,
|
||||
sink->bbs_buffer + BLCKSZ * i,
|
||||
BLCKSZ, len + BLCKSZ * i,
|
||||
readfilename,
|
||||
false);
|
||||
if (reread_cnt == 0)
|
||||
{
|
||||
/*
|
||||
* If we hit end-of-file, a concurrent truncation must
|
||||
* have occurred, so break out of this loop just as if the
|
||||
* initial fread() returned 0. We'll drop through to the
|
||||
* same code that handles that case. (We must fix up cnt
|
||||
* first, though.)
|
||||
*/
|
||||
cnt = BLCKSZ * i;
|
||||
break;
|
||||
}
|
||||
|
||||
/* If the page now looks OK, go on to the next one. */
|
||||
if (verify_page_checksum(page, sink->bbs_state->startptr,
|
||||
blkno + i + segmentno * RELSEG_SIZE,
|
||||
&expected_checksum))
|
||||
continue;
|
||||
|
||||
/* Handle checksum failure. */
|
||||
checksum_failures++;
|
||||
if (checksum_failures <= 5)
|
||||
ereport(WARNING,
|
||||
(errmsg("checksum verification failed in "
|
||||
"file \"%s\", block %u: calculated "
|
||||
"%X but expected %X",
|
||||
readfilename, blkno + i, expected_checksum,
|
||||
((PageHeader) page)->pd_checksum)));
|
||||
if (checksum_failures == 5)
|
||||
ereport(WARNING,
|
||||
(errmsg("further checksum verification "
|
||||
"failures in file \"%s\" will not "
|
||||
"be reported", readfilename)));
|
||||
}
|
||||
|
||||
/* Update block number for next pass through the outer loop. */
|
||||
blkno += i;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we hit end-of-file, a concurrent truncation must have occurred.
|
||||
* That's not an error condition, because WAL replay will fix things
|
||||
@ -1664,6 +1594,10 @@ sendFile(bbsink *sink, const char *readfilename, const char *tarfilename,
|
||||
if (cnt == 0)
|
||||
break;
|
||||
|
||||
/* Update block number and # of bytes done for next loop iteration. */
|
||||
blkno += cnt / BLCKSZ;
|
||||
bytes_done += cnt;
|
||||
|
||||
/* Archive the data we just read. */
|
||||
bbsink_archive_contents(sink, cnt);
|
||||
|
||||
@ -1671,14 +1605,12 @@ sendFile(bbsink *sink, const char *readfilename, const char *tarfilename,
|
||||
if (pg_checksum_update(&checksum_ctx,
|
||||
(uint8 *) sink->bbs_buffer, cnt) < 0)
|
||||
elog(ERROR, "could not update checksum of base backup");
|
||||
|
||||
len += cnt;
|
||||
}
|
||||
|
||||
/* If the file was truncated while we were sending it, pad it with zeros */
|
||||
while (len < statbuf->st_size)
|
||||
while (bytes_done < statbuf->st_size)
|
||||
{
|
||||
size_t remaining = statbuf->st_size - len;
|
||||
size_t remaining = statbuf->st_size - bytes_done;
|
||||
size_t nbytes = Min(sink->bbs_buffer_length, remaining);
|
||||
|
||||
MemSet(sink->bbs_buffer, 0, nbytes);
|
||||
@ -1687,7 +1619,7 @@ sendFile(bbsink *sink, const char *readfilename, const char *tarfilename,
|
||||
nbytes) < 0)
|
||||
elog(ERROR, "could not update checksum of base backup");
|
||||
bbsink_archive_contents(sink, nbytes);
|
||||
len += nbytes;
|
||||
bytes_done += nbytes;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1695,7 +1627,7 @@ sendFile(bbsink *sink, const char *readfilename, const char *tarfilename,
|
||||
* of data is probably not worth throttling, and is not checksummed
|
||||
* because it's not actually part of the file.)
|
||||
*/
|
||||
_tarWritePadding(sink, len);
|
||||
_tarWritePadding(sink, bytes_done);
|
||||
|
||||
CloseTransientFile(fd);
|
||||
|
||||
@ -1718,6 +1650,109 @@ sendFile(bbsink *sink, const char *readfilename, const char *tarfilename,
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Read some more data from the file into the bbsink's buffer, verifying
|
||||
* checksums as required.
|
||||
*
|
||||
* 'offset' is the file offset from which we should begin to read, and
|
||||
* 'length' is the amount of data that should be read. The actual amount
|
||||
* of data read will be less than the requested amount if the bbsink's
|
||||
* buffer isn't big enough to hold it all, or if the underlying file has
|
||||
* been truncated. The return value is the number of bytes actually read.
|
||||
*
|
||||
* 'blkno' is the block number of the first page in the bbsink's buffer
|
||||
* relative to the start of the relation.
|
||||
*
|
||||
* 'verify_checksum' indicates whether we should try to verify checksums
|
||||
* for the blocks we read. If we do this, we'll update *checksum_failures
|
||||
* and issue warnings as appropriate.
|
||||
*/
|
||||
static off_t
|
||||
read_file_data_into_buffer(bbsink *sink, const char *readfilename, int fd,
|
||||
off_t offset, size_t length, BlockNumber blkno,
|
||||
bool verify_checksum, int *checksum_failures)
|
||||
{
|
||||
off_t cnt;
|
||||
int i;
|
||||
char *page;
|
||||
|
||||
/* Try to read some more data. */
|
||||
cnt = basebackup_read_file(fd, sink->bbs_buffer,
|
||||
Min(sink->bbs_buffer_length, length),
|
||||
offset, readfilename, true);
|
||||
|
||||
/* Can't verify checksums if read length is not a multiple of BLCKSZ. */
|
||||
if (!verify_checksum || (cnt % BLCKSZ) != 0)
|
||||
return cnt;
|
||||
|
||||
/* Verify checksum for each block. */
|
||||
for (i = 0; i < cnt / BLCKSZ; i++)
|
||||
{
|
||||
int reread_cnt;
|
||||
uint16 expected_checksum;
|
||||
|
||||
page = sink->bbs_buffer + BLCKSZ * i;
|
||||
|
||||
/* If the page is OK, go on to the next one. */
|
||||
if (verify_page_checksum(page, sink->bbs_state->startptr, blkno + i,
|
||||
&expected_checksum))
|
||||
continue;
|
||||
|
||||
/*
|
||||
* Retry the block on the first failure. It's possible that we read
|
||||
* the first 4K page of the block just before postgres updated the
|
||||
* entire block so it ends up looking torn to us. If, before we retry
|
||||
* the read, the concurrent write of the block finishes, the page LSN
|
||||
* will be updated and we'll realize that we should ignore this block.
|
||||
*
|
||||
* There's no guarantee that this will actually happen, though: the
|
||||
* torn write could take an arbitrarily long time to complete.
|
||||
* Retrying multiple times wouldn't fix this problem, either, though
|
||||
* it would reduce the chances of it happening in practice. The only
|
||||
* real fix here seems to be to have some kind of interlock that
|
||||
* allows us to wait until we can be certain that no write to the
|
||||
* block is in progress. Since we don't have any such thing right now,
|
||||
* we just do this and hope for the best.
|
||||
*/
|
||||
reread_cnt =
|
||||
basebackup_read_file(fd, sink->bbs_buffer + BLCKSZ * i,
|
||||
BLCKSZ, offset + BLCKSZ * i,
|
||||
readfilename, false);
|
||||
if (reread_cnt == 0)
|
||||
{
|
||||
/*
|
||||
* If we hit end-of-file, a concurrent truncation must have
|
||||
* occurred, so reduce cnt to reflect only the blocks already
|
||||
* processed and break out of this loop.
|
||||
*/
|
||||
cnt = BLCKSZ * i;
|
||||
break;
|
||||
}
|
||||
|
||||
/* If the page now looks OK, go on to the next one. */
|
||||
if (verify_page_checksum(page, sink->bbs_state->startptr, blkno + i,
|
||||
&expected_checksum))
|
||||
continue;
|
||||
|
||||
/* Handle checksum failure. */
|
||||
(*checksum_failures)++;
|
||||
if (*checksum_failures <= 5)
|
||||
ereport(WARNING,
|
||||
(errmsg("checksum verification failed in "
|
||||
"file \"%s\", block %u: calculated "
|
||||
"%X but expected %X",
|
||||
readfilename, blkno + i, expected_checksum,
|
||||
((PageHeader) page)->pd_checksum)));
|
||||
if (*checksum_failures == 5)
|
||||
ereport(WARNING,
|
||||
(errmsg("further checksum verification "
|
||||
"failures in file \"%s\" will not "
|
||||
"be reported", readfilename)));
|
||||
}
|
||||
|
||||
return cnt;
|
||||
}
|
||||
|
||||
/*
|
||||
* Try to verify the checksum for the provided page, if it seems appropriate
|
||||
* to do so.
|
||||
|
Loading…
x
Reference in New Issue
Block a user