mirror of https://github.com/postgres/postgres
Fix false reports in pg_visibility
Currently, pg_visibility computes its xid horizon using the GetOldestNonRemovableTransactionId(). The problem is that this horizon can sometimes go backward. That can lead to reporting false errors. In order to fix that, this commit implements a new function GetStrictOldestNonRemovableTransactionId(). This function computes the xid horizon, which would be guaranteed to be newer or equal to any xid horizon computed before. We have to do the following to achieve this. 1. Ignore processes xmin's, because they consider connection to other databases that were ignored before. 2. Ignore KnownAssignedXids, because they are not database-aware. At the same time, the primary could compute its horizons database-aware. 3. Ignore walsender xmin, because it could go backward if some replication connections don't use replication slots. As a result, we're using only currently running xids to compute the horizon. Surely these would significantly sacrifice accuracy. But we have to do so to avoid reporting false errors. Inspired by earlier patch by Daniel Shelepanov and the following discussion with Robert Haas and Tom Lane. Discussion: https://postgr.es/m/1649062270.289865713%40f403.i.mail.ru Reviewed-by: Alexander Lakhin, Dmitry Koval
This commit is contained in:
parent
cc6e64afda
commit
e85662df44
|
@ -11,6 +11,7 @@ DATA = pg_visibility--1.1.sql pg_visibility--1.1--1.2.sql \
|
|||
PGFILEDESC = "pg_visibility - page visibility information"
|
||||
|
||||
REGRESS = pg_visibility
|
||||
TAP_TESTS = 1
|
||||
|
||||
ifdef USE_PGXS
|
||||
PG_CONFIG = pg_config
|
||||
|
|
|
@ -32,5 +32,9 @@ tests += {
|
|||
'sql': [
|
||||
'pg_visibility',
|
||||
],
|
||||
'tap': {
|
||||
'tests': [
|
||||
't/001_concurrent_transaction.pl',
|
||||
],
|
||||
},
|
||||
}
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
#include "funcapi.h"
|
||||
#include "miscadmin.h"
|
||||
#include "storage/bufmgr.h"
|
||||
#include "storage/proc.h"
|
||||
#include "storage/procarray.h"
|
||||
#include "storage/smgr.h"
|
||||
#include "utils/rel.h"
|
||||
|
@ -532,6 +533,63 @@ collect_visibility_data(Oid relid, bool include_pd)
|
|||
return info;
|
||||
}
|
||||
|
||||
/*
|
||||
* The "strict" version of GetOldestNonRemovableTransactionId(). The
|
||||
* pg_visibility check can tolerate false positives (don't report some of the
|
||||
* errors), but can't tolerate false negatives (report false errors). Normally,
|
||||
* horizons move forwards, but there are cases when it could move backward
|
||||
* (see comment for ComputeXidHorizons()).
|
||||
*
|
||||
* This is why we have to implement our own function for xid horizon, which
|
||||
* would be guaranteed to be newer or equal to any xid horizon computed before.
|
||||
* We have to do the following to achieve this.
|
||||
*
|
||||
* 1. Ignore processes xmin's, because they consider connection to other
|
||||
* databases that were ignored before.
|
||||
* 2. Ignore KnownAssignedXids, because they are not database-aware. At the
|
||||
* same time, the primary could compute its horizons database-aware.
|
||||
* 3. Ignore walsender xmin, because it could go backward if some replication
|
||||
* connections don't use replication slots.
|
||||
*
|
||||
* As a result, we're using only currently running xids to compute the horizon.
|
||||
* Surely these would significantly sacrifice accuracy. But we have to do so
|
||||
* to avoid reporting false errors.
|
||||
*/
|
||||
static TransactionId
|
||||
GetStrictOldestNonRemovableTransactionId(Relation rel)
|
||||
{
|
||||
RunningTransactions runningTransactions;
|
||||
|
||||
if (rel == NULL || rel->rd_rel->relisshared || RecoveryInProgress())
|
||||
{
|
||||
/* Shared relation: take into account all running xids */
|
||||
runningTransactions = GetRunningTransactionData();
|
||||
LWLockRelease(ProcArrayLock);
|
||||
LWLockRelease(XidGenLock);
|
||||
return runningTransactions->oldestRunningXid;
|
||||
}
|
||||
else if (!RELATION_IS_LOCAL(rel))
|
||||
{
|
||||
/*
|
||||
* Normal relation: take into account xids running within the current
|
||||
* database
|
||||
*/
|
||||
runningTransactions = GetRunningTransactionData();
|
||||
LWLockRelease(ProcArrayLock);
|
||||
LWLockRelease(XidGenLock);
|
||||
return runningTransactions->oldestDatabaseRunningXid;
|
||||
}
|
||||
else
|
||||
{
|
||||
/*
|
||||
* For temporary relations, ComputeXidHorizons() uses only
|
||||
* TransamVariables->latestCompletedXid and MyProc->xid. These two
|
||||
* shouldn't go backwards. So we're fine with this horizon.
|
||||
*/
|
||||
return GetOldestNonRemovableTransactionId(rel);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns a list of items whose visibility map information does not match
|
||||
* the status of the tuples on the page.
|
||||
|
@ -563,7 +621,7 @@ collect_corrupt_items(Oid relid, bool all_visible, bool all_frozen)
|
|||
check_relation_relkind(rel);
|
||||
|
||||
if (all_visible)
|
||||
OldestXmin = GetOldestNonRemovableTransactionId(rel);
|
||||
OldestXmin = GetStrictOldestNonRemovableTransactionId(rel);
|
||||
|
||||
nblocks = RelationGetNumberOfBlocks(rel);
|
||||
|
||||
|
@ -671,11 +729,11 @@ collect_corrupt_items(Oid relid, bool all_visible, bool all_frozen)
|
|||
* retake ProcArrayLock here while we're holding the buffer
|
||||
* exclusively locked, but it should be safe against
|
||||
* deadlocks, because surely
|
||||
* GetOldestNonRemovableTransactionId() should never take a
|
||||
* buffer lock. And this shouldn't happen often, so it's worth
|
||||
* being careful so as to avoid false positives.
|
||||
* GetStrictOldestNonRemovableTransactionId() should never
|
||||
* take a buffer lock. And this shouldn't happen often, so
|
||||
* it's worth being careful so as to avoid false positives.
|
||||
*/
|
||||
RecomputedOldestXmin = GetOldestNonRemovableTransactionId(rel);
|
||||
RecomputedOldestXmin = GetStrictOldestNonRemovableTransactionId(rel);
|
||||
|
||||
if (!TransactionIdPrecedes(OldestXmin, RecomputedOldestXmin))
|
||||
record_corrupt_item(items, &tuple.t_self);
|
||||
|
|
|
@ -0,0 +1,47 @@
|
|||
|
||||
# Copyright (c) 2021-2024, PostgreSQL Global Development Group
|
||||
|
||||
# Check that a concurrent transaction doesn't cause false negatives in
|
||||
# pg_check_visible() function
|
||||
use strict;
|
||||
use warnings FATAL => 'all';
|
||||
use PostgreSQL::Test::Cluster;
|
||||
use PostgreSQL::Test::Utils;
|
||||
use Test::More;
|
||||
|
||||
|
||||
my $node = PostgreSQL::Test::Cluster->new('main');
|
||||
|
||||
$node->init;
|
||||
$node->start;
|
||||
|
||||
# Setup another database
|
||||
$node->safe_psql("postgres", "CREATE DATABASE other_database;\n");
|
||||
my $bsession = $node->background_psql('other_database');
|
||||
|
||||
# Run a concurrent transaction
|
||||
$bsession->query_safe(
|
||||
qq[
|
||||
BEGIN;
|
||||
SELECT txid_current();
|
||||
]);
|
||||
|
||||
# Create a sample table and run vacuum
|
||||
$node->safe_psql("postgres",
|
||||
"CREATE EXTENSION pg_visibility;\n"
|
||||
. "CREATE TABLE vacuum_test AS SELECT 42 i;\n"
|
||||
. "VACUUM (disable_page_skipping) vacuum_test;");
|
||||
|
||||
# Run pg_check_visible()
|
||||
my $result = $node->safe_psql("postgres",
|
||||
"SELECT * FROM pg_check_visible('vacuum_test');");
|
||||
|
||||
# There should be no false negatives
|
||||
ok($result eq "", "pg_check_visible() detects no errors");
|
||||
|
||||
# Shutdown
|
||||
$bsession->query_safe("COMMIT;");
|
||||
$bsession->quit;
|
||||
$node->stop;
|
||||
|
||||
done_testing();
|
|
@ -2688,6 +2688,7 @@ GetRunningTransactionData(void)
|
|||
RunningTransactions CurrentRunningXacts = &CurrentRunningXactsData;
|
||||
TransactionId latestCompletedXid;
|
||||
TransactionId oldestRunningXid;
|
||||
TransactionId oldestDatabaseRunningXid;
|
||||
TransactionId *xids;
|
||||
int index;
|
||||
int count;
|
||||
|
@ -2732,7 +2733,7 @@ GetRunningTransactionData(void)
|
|||
|
||||
latestCompletedXid =
|
||||
XidFromFullTransactionId(TransamVariables->latestCompletedXid);
|
||||
oldestRunningXid =
|
||||
oldestDatabaseRunningXid = oldestRunningXid =
|
||||
XidFromFullTransactionId(TransamVariables->nextXid);
|
||||
|
||||
/*
|
||||
|
@ -2740,6 +2741,8 @@ GetRunningTransactionData(void)
|
|||
*/
|
||||
for (index = 0; index < arrayP->numProcs; index++)
|
||||
{
|
||||
int pgprocno = arrayP->pgprocnos[index];
|
||||
PGPROC *proc = &allProcs[pgprocno];
|
||||
TransactionId xid;
|
||||
|
||||
/* Fetch xid just once - see GetNewTransactionId */
|
||||
|
@ -2760,6 +2763,13 @@ GetRunningTransactionData(void)
|
|||
if (TransactionIdPrecedes(xid, oldestRunningXid))
|
||||
oldestRunningXid = xid;
|
||||
|
||||
/*
|
||||
* Also, update the oldest running xid within the current database.
|
||||
*/
|
||||
if (proc->databaseId == MyDatabaseId &&
|
||||
TransactionIdPrecedes(xid, oldestRunningXid))
|
||||
oldestDatabaseRunningXid = xid;
|
||||
|
||||
if (ProcGlobal->subxidStates[index].overflowed)
|
||||
suboverflowed = true;
|
||||
|
||||
|
@ -2826,6 +2836,7 @@ GetRunningTransactionData(void)
|
|||
CurrentRunningXacts->subxid_overflow = suboverflowed;
|
||||
CurrentRunningXacts->nextXid = XidFromFullTransactionId(TransamVariables->nextXid);
|
||||
CurrentRunningXacts->oldestRunningXid = oldestRunningXid;
|
||||
CurrentRunningXacts->oldestDatabaseRunningXid = oldestDatabaseRunningXid;
|
||||
CurrentRunningXacts->latestCompletedXid = latestCompletedXid;
|
||||
|
||||
Assert(TransactionIdIsValid(CurrentRunningXacts->nextXid));
|
||||
|
|
|
@ -82,6 +82,8 @@ typedef struct RunningTransactionsData
|
|||
bool subxid_overflow; /* snapshot overflowed, subxids missing */
|
||||
TransactionId nextXid; /* xid from TransamVariables->nextXid */
|
||||
TransactionId oldestRunningXid; /* *not* oldestXmin */
|
||||
TransactionId oldestDatabaseRunningXid; /* same as above, but within the
|
||||
* current database */
|
||||
TransactionId latestCompletedXid; /* so we can set xmax */
|
||||
|
||||
TransactionId *xids; /* array of (sub)xids still running */
|
||||
|
|
Loading…
Reference in New Issue