Fix multiple crasher bugs in partitioned-table replication logic.
apply_handle_tuple_routing(), having detected and reported that the tuple it needed to update didn't exist, tried to update that tuple anyway, leading to a null-pointer dereference. logicalrep_partition_open() failed to ensure that the LogicalRepPartMapEntry it built for a partition was fully independent of that for the partition root, leading to trouble if the root entry was later freed or rebuilt. Meanwhile, on the publisher's side, pgoutput_change() sometimes attempted to apply execute_attr_map_tuple() to a NULL tuple. The first of these was reported by Sergey Bernikov in bug #17055; I found the other two while developing some test cases for this sadly under-tested code. Diagnosis and patch for the first issue by Amit Langote; patches for the others by me; new test cases by me. Back-patch to v13 where this logic came in. Discussion: https://postgr.es/m/17055-9ba800ec8522668b@postgresql.org
This commit is contained in:
parent
4efcf47053
commit
ab55d742eb
@ -620,7 +620,9 @@ logicalrep_partmap_init(void)
|
|||||||
* logicalrep_partition_open
|
* logicalrep_partition_open
|
||||||
*
|
*
|
||||||
* Returned entry reuses most of the values of the root table's entry, save
|
* Returned entry reuses most of the values of the root table's entry, save
|
||||||
* the attribute map, which can be different for the partition.
|
* the attribute map, which can be different for the partition. However,
|
||||||
|
* we must physically copy all the data, in case the root table's entry
|
||||||
|
* gets freed/rebuilt.
|
||||||
*
|
*
|
||||||
* Note there's no logicalrep_partition_close, because the caller closes the
|
* Note there's no logicalrep_partition_close, because the caller closes the
|
||||||
* component relation.
|
* component relation.
|
||||||
@ -656,7 +658,7 @@ logicalrep_partition_open(LogicalRepRelMapEntry *root,
|
|||||||
|
|
||||||
part_entry->partoid = partOid;
|
part_entry->partoid = partOid;
|
||||||
|
|
||||||
/* Remote relation is used as-is from the root entry. */
|
/* Remote relation is copied as-is from the root entry. */
|
||||||
entry = &part_entry->relmapentry;
|
entry = &part_entry->relmapentry;
|
||||||
entry->remoterel.remoteid = remoterel->remoteid;
|
entry->remoterel.remoteid = remoterel->remoteid;
|
||||||
entry->remoterel.nspname = pstrdup(remoterel->nspname);
|
entry->remoterel.nspname = pstrdup(remoterel->nspname);
|
||||||
@ -699,7 +701,12 @@ logicalrep_partition_open(LogicalRepRelMapEntry *root,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
entry->attrmap = attrmap;
|
{
|
||||||
|
/* Lacking copy_attmap, do this the hard way. */
|
||||||
|
entry->attrmap = make_attrmap(attrmap->maplen);
|
||||||
|
memcpy(entry->attrmap->attnums, attrmap->attnums,
|
||||||
|
attrmap->maplen * sizeof(AttrNumber));
|
||||||
|
}
|
||||||
|
|
||||||
entry->updatable = root->updatable;
|
entry->updatable = root->updatable;
|
||||||
|
|
||||||
|
@ -1477,12 +1477,13 @@ apply_handle_update_internal(ApplyExecutionData *edata,
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* The tuple to be updated could not be found.
|
* The tuple to be updated could not be found. Do nothing except for
|
||||||
|
* emitting a log message.
|
||||||
*
|
*
|
||||||
* TODO what to do here, change the log level to LOG perhaps?
|
* XXX should this be promoted to ereport(LOG) perhaps?
|
||||||
*/
|
*/
|
||||||
elog(DEBUG1,
|
elog(DEBUG1,
|
||||||
"logical replication did not find row for update "
|
"logical replication did not find row to be updated "
|
||||||
"in replication target relation \"%s\"",
|
"in replication target relation \"%s\"",
|
||||||
RelationGetRelationName(localrel));
|
RelationGetRelationName(localrel));
|
||||||
}
|
}
|
||||||
@ -1589,9 +1590,14 @@ apply_handle_delete_internal(ApplyExecutionData *edata,
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
/* The tuple to be deleted could not be found. */
|
/*
|
||||||
|
* The tuple to be deleted could not be found. Do nothing except for
|
||||||
|
* emitting a log message.
|
||||||
|
*
|
||||||
|
* XXX should this be promoted to ereport(LOG) perhaps?
|
||||||
|
*/
|
||||||
elog(DEBUG1,
|
elog(DEBUG1,
|
||||||
"logical replication did not find row for delete "
|
"logical replication did not find row to be deleted "
|
||||||
"in replication target relation \"%s\"",
|
"in replication target relation \"%s\"",
|
||||||
RelationGetRelationName(localrel));
|
RelationGetRelationName(localrel));
|
||||||
}
|
}
|
||||||
@ -1728,30 +1734,30 @@ apply_handle_tuple_routing(ApplyExecutionData *edata,
|
|||||||
found = FindReplTupleInLocalRel(estate, partrel,
|
found = FindReplTupleInLocalRel(estate, partrel,
|
||||||
&part_entry->remoterel,
|
&part_entry->remoterel,
|
||||||
remoteslot_part, &localslot);
|
remoteslot_part, &localslot);
|
||||||
|
if (!found)
|
||||||
oldctx = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
|
|
||||||
if (found)
|
|
||||||
{
|
|
||||||
/* Apply the update. */
|
|
||||||
slot_modify_data(remoteslot_part, localslot,
|
|
||||||
part_entry,
|
|
||||||
newtup);
|
|
||||||
MemoryContextSwitchTo(oldctx);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* The tuple to be updated could not be found.
|
* The tuple to be updated could not be found. Do nothing
|
||||||
|
* except for emitting a log message.
|
||||||
*
|
*
|
||||||
* TODO what to do here, change the log level to LOG
|
* XXX should this be promoted to ereport(LOG) perhaps?
|
||||||
* perhaps?
|
|
||||||
*/
|
*/
|
||||||
elog(DEBUG1,
|
elog(DEBUG1,
|
||||||
"logical replication did not find row for update "
|
"logical replication did not find row to be updated "
|
||||||
"in replication target relation \"%s\"",
|
"in replication target relation's partition \"%s\"",
|
||||||
RelationGetRelationName(partrel));
|
RelationGetRelationName(partrel));
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Apply the update to the local tuple, putting the result in
|
||||||
|
* remoteslot_part.
|
||||||
|
*/
|
||||||
|
oldctx = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
|
||||||
|
slot_modify_data(remoteslot_part, localslot, part_entry,
|
||||||
|
newtup);
|
||||||
|
MemoryContextSwitchTo(oldctx);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Does the updated tuple still satisfy the current
|
* Does the updated tuple still satisfy the current
|
||||||
* partition's constraint?
|
* partition's constraint?
|
||||||
|
@ -612,8 +612,11 @@ pgoutput_change(LogicalDecodingContext *ctx, ReorderBufferTXN *txn,
|
|||||||
/* Convert tuples if needed. */
|
/* Convert tuples if needed. */
|
||||||
if (relentry->map)
|
if (relentry->map)
|
||||||
{
|
{
|
||||||
oldtuple = execute_attr_map_tuple(oldtuple, relentry->map);
|
if (oldtuple)
|
||||||
newtuple = execute_attr_map_tuple(newtuple, relentry->map);
|
oldtuple = execute_attr_map_tuple(oldtuple,
|
||||||
|
relentry->map);
|
||||||
|
newtuple = execute_attr_map_tuple(newtuple,
|
||||||
|
relentry->map);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -6,7 +6,7 @@ use strict;
|
|||||||
use warnings;
|
use warnings;
|
||||||
use PostgresNode;
|
use PostgresNode;
|
||||||
use TestLib;
|
use TestLib;
|
||||||
use Test::More tests => 28;
|
use Test::More tests => 31;
|
||||||
|
|
||||||
# Initialize publisher node
|
# Initialize publisher node
|
||||||
my $node_publisher = get_new_node('publisher');
|
my $node_publisher = get_new_node('publisher');
|
||||||
@ -118,7 +118,7 @@ $node_publisher->safe_psql('postgres',
|
|||||||
"INSERT INTO tab_mixed VALUES (2, 'bar', 2.2)");
|
"INSERT INTO tab_mixed VALUES (2, 'bar', 2.2)");
|
||||||
|
|
||||||
$node_publisher->safe_psql('postgres',
|
$node_publisher->safe_psql('postgres',
|
||||||
"INSERT INTO tab_full_pk VALUES (1, 'foo')");
|
"INSERT INTO tab_full_pk VALUES (1, 'foo'), (2, 'baz')");
|
||||||
|
|
||||||
$node_publisher->safe_psql('postgres',
|
$node_publisher->safe_psql('postgres',
|
||||||
"INSERT INTO tab_nothing VALUES (generate_series(1,20))");
|
"INSERT INTO tab_nothing VALUES (generate_series(1,20))");
|
||||||
@ -297,6 +297,38 @@ is( $result, qq(local|1.1|baz|1
|
|||||||
local|2.2|bar|2),
|
local|2.2|bar|2),
|
||||||
'update works with different column order and subscriber local values');
|
'update works with different column order and subscriber local values');
|
||||||
|
|
||||||
|
$result = $node_subscriber->safe_psql('postgres',
|
||||||
|
"SELECT * FROM tab_full_pk ORDER BY a");
|
||||||
|
is( $result, qq(1|bar
|
||||||
|
2|baz),
|
||||||
|
'update works with REPLICA IDENTITY FULL and a primary key');
|
||||||
|
|
||||||
|
# Check that subscriber handles cases where update/delete target tuple
|
||||||
|
# is missing. We have to look for the DEBUG1 log messages about that,
|
||||||
|
# so temporarily bump up the log verbosity.
|
||||||
|
$node_subscriber->append_conf('postgresql.conf', "log_min_messages = debug1");
|
||||||
|
$node_subscriber->reload;
|
||||||
|
|
||||||
|
$node_subscriber->safe_psql('postgres', "DELETE FROM tab_full_pk");
|
||||||
|
|
||||||
|
$node_publisher->safe_psql('postgres',
|
||||||
|
"UPDATE tab_full_pk SET b = 'quux' WHERE a = 1");
|
||||||
|
$node_publisher->safe_psql('postgres', "DELETE FROM tab_full_pk WHERE a = 2");
|
||||||
|
|
||||||
|
$node_publisher->wait_for_catchup('tap_sub');
|
||||||
|
|
||||||
|
my $logfile = slurp_file($node_subscriber->logfile());
|
||||||
|
ok( $logfile =~
|
||||||
|
qr/logical replication did not find row to be updated in replication target relation "tab_full_pk"/,
|
||||||
|
'update target row is missing');
|
||||||
|
ok( $logfile =~
|
||||||
|
qr/logical replication did not find row to be deleted in replication target relation "tab_full_pk"/,
|
||||||
|
'delete target row is missing');
|
||||||
|
|
||||||
|
$node_subscriber->append_conf('postgresql.conf',
|
||||||
|
"log_min_messages = warning");
|
||||||
|
$node_subscriber->reload;
|
||||||
|
|
||||||
# check behavior with toasted values
|
# check behavior with toasted values
|
||||||
|
|
||||||
$node_publisher->safe_psql('postgres',
|
$node_publisher->safe_psql('postgres',
|
||||||
|
@ -6,7 +6,7 @@ use strict;
|
|||||||
use warnings;
|
use warnings;
|
||||||
use PostgresNode;
|
use PostgresNode;
|
||||||
use TestLib;
|
use TestLib;
|
||||||
use Test::More tests => 56;
|
use Test::More tests => 62;
|
||||||
|
|
||||||
# setup
|
# setup
|
||||||
|
|
||||||
@ -347,6 +347,46 @@ $result =
|
|||||||
$node_subscriber2->safe_psql('postgres', "SELECT a FROM tab1 ORDER BY 1");
|
$node_subscriber2->safe_psql('postgres', "SELECT a FROM tab1 ORDER BY 1");
|
||||||
is($result, qq(), 'truncate of tab1 replicated');
|
is($result, qq(), 'truncate of tab1 replicated');
|
||||||
|
|
||||||
|
# Check that subscriber handles cases where update/delete target tuple
|
||||||
|
# is missing. We have to look for the DEBUG1 log messages about that,
|
||||||
|
# so temporarily bump up the log verbosity.
|
||||||
|
$node_subscriber1->append_conf('postgresql.conf',
|
||||||
|
"log_min_messages = debug1");
|
||||||
|
$node_subscriber1->reload;
|
||||||
|
|
||||||
|
$node_publisher->safe_psql('postgres',
|
||||||
|
"INSERT INTO tab1 VALUES (1, 'foo'), (4, 'bar'), (10, 'baz')");
|
||||||
|
|
||||||
|
$node_publisher->wait_for_catchup('sub1');
|
||||||
|
$node_publisher->wait_for_catchup('sub2');
|
||||||
|
|
||||||
|
$node_subscriber1->safe_psql('postgres', "DELETE FROM tab1");
|
||||||
|
|
||||||
|
$node_publisher->safe_psql('postgres',
|
||||||
|
"UPDATE tab1 SET b = 'quux' WHERE a = 4");
|
||||||
|
$node_publisher->safe_psql('postgres', "DELETE FROM tab1");
|
||||||
|
|
||||||
|
$node_publisher->wait_for_catchup('sub1');
|
||||||
|
$node_publisher->wait_for_catchup('sub2');
|
||||||
|
|
||||||
|
my $logfile = slurp_file($node_subscriber1->logfile());
|
||||||
|
ok( $logfile =~
|
||||||
|
qr/logical replication did not find row to be updated in replication target relation's partition "tab1_2_2"/,
|
||||||
|
'update target row is missing in tab1_2_2');
|
||||||
|
ok( $logfile =~
|
||||||
|
qr/logical replication did not find row to be deleted in replication target relation "tab1_1"/,
|
||||||
|
'delete target row is missing in tab1_1');
|
||||||
|
ok( $logfile =~
|
||||||
|
qr/logical replication did not find row to be deleted in replication target relation "tab1_2_2"/,
|
||||||
|
'delete target row is missing in tab1_2_2');
|
||||||
|
ok( $logfile =~
|
||||||
|
qr/logical replication did not find row to be deleted in replication target relation "tab1_def"/,
|
||||||
|
'delete target row is missing in tab1_def');
|
||||||
|
|
||||||
|
$node_subscriber1->append_conf('postgresql.conf',
|
||||||
|
"log_min_messages = warning");
|
||||||
|
$node_subscriber1->reload;
|
||||||
|
|
||||||
# Tests for replication using root table identity and schema
|
# Tests for replication using root table identity and schema
|
||||||
|
|
||||||
# publisher
|
# publisher
|
||||||
@ -650,3 +690,32 @@ is( $result, qq(pub_tab2|1|xxx
|
|||||||
pub_tab2|3|yyy
|
pub_tab2|3|yyy
|
||||||
pub_tab2|5|zzz
|
pub_tab2|5|zzz
|
||||||
xxx_c|6|aaa), 'inserts into tab2 replicated');
|
xxx_c|6|aaa), 'inserts into tab2 replicated');
|
||||||
|
|
||||||
|
# Check that subscriber handles cases where update/delete target tuple
|
||||||
|
# is missing. We have to look for the DEBUG1 log messages about that,
|
||||||
|
# so temporarily bump up the log verbosity.
|
||||||
|
$node_subscriber1->append_conf('postgresql.conf',
|
||||||
|
"log_min_messages = debug1");
|
||||||
|
$node_subscriber1->reload;
|
||||||
|
|
||||||
|
$node_subscriber1->safe_psql('postgres', "DELETE FROM tab2");
|
||||||
|
|
||||||
|
$node_publisher->safe_psql('postgres',
|
||||||
|
"UPDATE tab2 SET b = 'quux' WHERE a = 5");
|
||||||
|
$node_publisher->safe_psql('postgres', "DELETE FROM tab2 WHERE a = 1");
|
||||||
|
|
||||||
|
$node_publisher->wait_for_catchup('sub_viaroot');
|
||||||
|
$node_publisher->wait_for_catchup('sub2');
|
||||||
|
|
||||||
|
$logfile = slurp_file($node_subscriber1->logfile());
|
||||||
|
ok( $logfile =~
|
||||||
|
qr/logical replication did not find row to be updated in replication target relation's partition "tab2_1"/,
|
||||||
|
'update target row is missing in tab2_1');
|
||||||
|
ok( $logfile =~
|
||||||
|
qr/logical replication did not find row to be deleted in replication target relation "tab2_1"/,
|
||||||
|
'delete target row is missing in tab2_1');
|
||||||
|
|
||||||
|
# No need for this until more tests are added.
|
||||||
|
# $node_subscriber1->append_conf('postgresql.conf',
|
||||||
|
# "log_min_messages = warning");
|
||||||
|
# $node_subscriber1->reload;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user