Use whitelist to choose files scanned with pg_verify_checksums
The original implementation of pg_verify_checksums used a blacklist to decide which files should be skipped for scanning as they do not include data checksums, like pg_internal.init or pg_control. However, this missed two things: - Some files are created within builds of EXEC_BACKEND and these were not listed, causing failures on Windows. - Extensions may create custom files in data folders, causing the tool to equally fail. This commit switches to a whitelist-like method instead by checking if the files to scan are authorized relation files. This is close to a reverse-engineering of what is defined in relpath.c in charge of building the relation paths, and we could consider refactoring what this patch does so as all routines are in a single place. This is left for later. This is based on a suggestion from Andres Freund. TAP tests are updated so as multiple file patterns are tested. The bug has been spotted by various buildfarm members as a result of b34e84f which has introduced the TAP tests of pg_verify_checksums. Author: Michael Paquier Reviewed-by: Andrew Dunstan, Michael Banck Discussion: https://postgr.es/m/20181012005614.GC26424@paquier.xyz Backpatch-through: 11
This commit is contained in:
parent
350410be45
commit
d55241af70
@ -15,6 +15,7 @@
|
|||||||
|
|
||||||
#include "catalog/pg_control.h"
|
#include "catalog/pg_control.h"
|
||||||
#include "common/controldata_utils.h"
|
#include "common/controldata_utils.h"
|
||||||
|
#include "common/relpath.h"
|
||||||
#include "getopt_long.h"
|
#include "getopt_long.h"
|
||||||
#include "pg_getopt.h"
|
#include "pg_getopt.h"
|
||||||
#include "storage/bufpage.h"
|
#include "storage/bufpage.h"
|
||||||
@ -49,27 +50,69 @@ usage(void)
|
|||||||
printf(_("Report bugs to <pgsql-bugs@postgresql.org>.\n"));
|
printf(_("Report bugs to <pgsql-bugs@postgresql.org>.\n"));
|
||||||
}
|
}
|
||||||
|
|
||||||
static const char *const skip[] = {
|
/*
|
||||||
"pg_control",
|
* isRelFileName
|
||||||
"pg_filenode.map",
|
*
|
||||||
"pg_internal.init",
|
* Check if the given file name is authorized for checksum verification.
|
||||||
"PG_VERSION",
|
*/
|
||||||
NULL,
|
|
||||||
};
|
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
skipfile(const char *fn)
|
isRelFileName(const char *fn)
|
||||||
{
|
{
|
||||||
const char *const *f;
|
int pos;
|
||||||
|
|
||||||
if (strcmp(fn, ".") == 0 ||
|
/*----------
|
||||||
strcmp(fn, "..") == 0)
|
* Only files including data checksums are authorized for verification.
|
||||||
|
* This is guessed based on the file name by reverse-engineering
|
||||||
|
* GetRelationPath() so make sure to update both code paths if any
|
||||||
|
* updates are done. The following file name formats are allowed:
|
||||||
|
* <digits>
|
||||||
|
* <digits>.<segment>
|
||||||
|
* <digits>_<forkname>
|
||||||
|
* <digits>_<forkname>.<segment>
|
||||||
|
*
|
||||||
|
* Note that temporary files, beginning with 't', are also skipped.
|
||||||
|
*
|
||||||
|
*----------
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* A non-empty string of digits should follow */
|
||||||
|
for (pos = 0; isdigit((unsigned char) fn[pos]); ++pos)
|
||||||
|
;
|
||||||
|
/* leave if no digits */
|
||||||
|
if (pos == 0)
|
||||||
|
return false;
|
||||||
|
/* good to go if only digits */
|
||||||
|
if (fn[pos] == '\0')
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
for (f = skip; *f; f++)
|
/* Authorized fork files can be scanned */
|
||||||
if (strcmp(*f, fn) == 0)
|
if (fn[pos] == '_')
|
||||||
return true;
|
{
|
||||||
return false;
|
int forkchar = forkname_chars(&fn[pos + 1], NULL);
|
||||||
|
|
||||||
|
if (forkchar <= 0)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
pos += forkchar + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Check for an optional segment number */
|
||||||
|
if (fn[pos] == '.')
|
||||||
|
{
|
||||||
|
int segchar;
|
||||||
|
|
||||||
|
for (segchar = 1; isdigit((unsigned char) fn[pos + segchar]); ++segchar)
|
||||||
|
;
|
||||||
|
|
||||||
|
if (segchar <= 1)
|
||||||
|
return false;
|
||||||
|
pos += segchar;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Now this should be the end */
|
||||||
|
if (fn[pos] != '\0')
|
||||||
|
return false;
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
@ -146,7 +189,7 @@ scan_directory(const char *basedir, const char *subdir)
|
|||||||
char fn[MAXPGPATH];
|
char fn[MAXPGPATH];
|
||||||
struct stat st;
|
struct stat st;
|
||||||
|
|
||||||
if (skipfile(de->d_name))
|
if (!isRelFileName(de->d_name))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
snprintf(fn, sizeof(fn), "%s/%s", path, de->d_name);
|
snprintf(fn, sizeof(fn), "%s/%s", path, de->d_name);
|
||||||
|
@ -5,7 +5,7 @@ use strict;
|
|||||||
use warnings;
|
use warnings;
|
||||||
use PostgresNode;
|
use PostgresNode;
|
||||||
use TestLib;
|
use TestLib;
|
||||||
use Test::More tests => 12;
|
use Test::More tests => 36;
|
||||||
|
|
||||||
# Initialize node with checksums enabled.
|
# Initialize node with checksums enabled.
|
||||||
my $node = get_new_node('node_checksum');
|
my $node = get_new_node('node_checksum');
|
||||||
@ -17,6 +17,31 @@ command_like(['pg_controldata', $pgdata],
|
|||||||
qr/Data page checksum version:.*1/,
|
qr/Data page checksum version:.*1/,
|
||||||
'checksums enabled in control file');
|
'checksums enabled in control file');
|
||||||
|
|
||||||
|
# Add set of dummy files with some contents. These should not be scanned
|
||||||
|
# by the tool.
|
||||||
|
append_to_file "$pgdata/global/123.", "foo";
|
||||||
|
append_to_file "$pgdata/global/123_", "foo";
|
||||||
|
append_to_file "$pgdata/global/123_.", "foo";
|
||||||
|
append_to_file "$pgdata/global/123.12t", "foo";
|
||||||
|
append_to_file "$pgdata/global/foo", "foo2";
|
||||||
|
append_to_file "$pgdata/global/t123", "bar";
|
||||||
|
append_to_file "$pgdata/global/123a", "bar2";
|
||||||
|
append_to_file "$pgdata/global/.123", "foobar";
|
||||||
|
append_to_file "$pgdata/global/_fsm", "foobar2";
|
||||||
|
append_to_file "$pgdata/global/_init", "foobar3";
|
||||||
|
append_to_file "$pgdata/global/_vm.123", "foohoge";
|
||||||
|
append_to_file "$pgdata/global/123_vm.123t", "foohoge2";
|
||||||
|
|
||||||
|
# Those are correct but empty files, so they should pass through.
|
||||||
|
append_to_file "$pgdata/global/99999", "";
|
||||||
|
append_to_file "$pgdata/global/99999.123", "";
|
||||||
|
append_to_file "$pgdata/global/99999_fsm", "";
|
||||||
|
append_to_file "$pgdata/global/99999_init", "";
|
||||||
|
append_to_file "$pgdata/global/99999_vm", "";
|
||||||
|
append_to_file "$pgdata/global/99999_init.123", "";
|
||||||
|
append_to_file "$pgdata/global/99999_fsm.123", "";
|
||||||
|
append_to_file "$pgdata/global/99999_vm.123", "";
|
||||||
|
|
||||||
# Checksums pass on a newly-created cluster
|
# Checksums pass on a newly-created cluster
|
||||||
command_ok(['pg_verify_checksums', '-D', $pgdata],
|
command_ok(['pg_verify_checksums', '-D', $pgdata],
|
||||||
"succeeds with offline cluster");
|
"succeeds with offline cluster");
|
||||||
@ -67,3 +92,32 @@ $node->command_checks_all([ 'pg_verify_checksums', '-D', $pgdata, '-r',
|
|||||||
[qr/Bad checksums:.*1/],
|
[qr/Bad checksums:.*1/],
|
||||||
[qr/checksum verification failed/],
|
[qr/checksum verification failed/],
|
||||||
'fails for corrupted data on single relfilenode');
|
'fails for corrupted data on single relfilenode');
|
||||||
|
|
||||||
|
# Utility routine to check that pg_verify_checksums is able to detect
|
||||||
|
# correctly-named relation files filled with some corrupted data.
|
||||||
|
sub fail_corrupt
|
||||||
|
{
|
||||||
|
my $node = shift;
|
||||||
|
my $file = shift;
|
||||||
|
my $pgdata = $node->data_dir;
|
||||||
|
|
||||||
|
# Create the file with some dummy data in it.
|
||||||
|
append_to_file "$pgdata/global/$file", "foo";
|
||||||
|
|
||||||
|
$node->command_checks_all([ 'pg_verify_checksums', '-D', $pgdata],
|
||||||
|
1,
|
||||||
|
[qr/^$/],
|
||||||
|
[qr/could not read block/],
|
||||||
|
"fails for corrupted data in $file");
|
||||||
|
}
|
||||||
|
|
||||||
|
# Authorized relation files filled with corrupted data cause the
|
||||||
|
# checksum checks to fail.
|
||||||
|
fail_corrupt($node, "99999");
|
||||||
|
fail_corrupt($node, "99999.123");
|
||||||
|
fail_corrupt($node, "99999_fsm");
|
||||||
|
fail_corrupt($node, "99999_init");
|
||||||
|
fail_corrupt($node, "99999_vm");
|
||||||
|
fail_corrupt($node, "99999_init.123");
|
||||||
|
fail_corrupt($node, "99999_fsm.123");
|
||||||
|
fail_corrupt($node, "99999_vm.123");
|
||||||
|
Loading…
x
Reference in New Issue
Block a user