Improve wording of log messages triggered by max_slot_wal_keep_size.
The one about "terminating process to release replication slot" told you nothing about why that was happening. The one about "invalidating slot because its restart_lsn exceeds max_slot_wal_keep_size" told you what was happening, but violated our message style guideline about keeping the primary message short. Add DETAIL/HINT lines to carry the appropriate detail and make the two cases more uniform. While here, fix bogus test logic in 019_replslot_limit.pl: if it timed out without seeing the expected log message, no test failure would be reported. This is flat broken since commit 549ec201d removed the test counts; even before that it was horribly bad style, since you'd only get told that not all tests had been run. Kyotaro Horiguchi, reviewed by Bertrand Drouvot; test fixes by me Discussion: https://postgr.es/m/20211214.130456.2233153190058148084.horikyota.ntt@gmail.com
This commit is contained in:
parent
d7e39d72ca
commit
551aa6b7b9
@ -1293,8 +1293,12 @@ InvalidatePossiblyObsoleteSlot(ReplicationSlot *s, XLogRecPtr oldestLSN,
|
|||||||
if (last_signaled_pid != active_pid)
|
if (last_signaled_pid != active_pid)
|
||||||
{
|
{
|
||||||
ereport(LOG,
|
ereport(LOG,
|
||||||
(errmsg("terminating process %d to release replication slot \"%s\"",
|
errmsg("terminating process %d to release replication slot \"%s\"",
|
||||||
active_pid, NameStr(slotname))));
|
active_pid, NameStr(slotname)),
|
||||||
|
errdetail("The slot's restart_lsn %X/%X exceeds the limit by %llu bytes.",
|
||||||
|
LSN_FORMAT_ARGS(restart_lsn),
|
||||||
|
(unsigned long long) (oldestLSN - restart_lsn)),
|
||||||
|
errhint("You might need to increase max_slot_wal_keep_size."));
|
||||||
|
|
||||||
(void) kill(active_pid, SIGTERM);
|
(void) kill(active_pid, SIGTERM);
|
||||||
last_signaled_pid = active_pid;
|
last_signaled_pid = active_pid;
|
||||||
@ -1331,9 +1335,12 @@ InvalidatePossiblyObsoleteSlot(ReplicationSlot *s, XLogRecPtr oldestLSN,
|
|||||||
ReplicationSlotRelease();
|
ReplicationSlotRelease();
|
||||||
|
|
||||||
ereport(LOG,
|
ereport(LOG,
|
||||||
(errmsg("invalidating slot \"%s\" because its restart_lsn %X/%X exceeds max_slot_wal_keep_size",
|
errmsg("invalidating obsolete replication slot \"%s\"",
|
||||||
NameStr(slotname),
|
NameStr(slotname)),
|
||||||
LSN_FORMAT_ARGS(restart_lsn))));
|
errdetail("The slot's restart_lsn %X/%X exceeds the limit by %llu bytes.",
|
||||||
|
LSN_FORMAT_ARGS(restart_lsn),
|
||||||
|
(unsigned long long) (oldestLSN - restart_lsn)),
|
||||||
|
errhint("You might need to increase max_slot_wal_keep_size."));
|
||||||
|
|
||||||
/* done with this slot for now */
|
/* done with this slot for now */
|
||||||
break;
|
break;
|
||||||
|
@ -185,8 +185,7 @@ my $invalidated = 0;
|
|||||||
for (my $i = 0; $i < 10000; $i++)
|
for (my $i = 0; $i < 10000; $i++)
|
||||||
{
|
{
|
||||||
if (find_in_log(
|
if (find_in_log(
|
||||||
$node_primary,
|
$node_primary, 'invalidating obsolete replication slot "rep1"',
|
||||||
"invalidating slot \"rep1\" because its restart_lsn [0-9A-F/]+ exceeds max_slot_wal_keep_size",
|
|
||||||
$logstart))
|
$logstart))
|
||||||
{
|
{
|
||||||
$invalidated = 1;
|
$invalidated = 1;
|
||||||
@ -379,6 +378,7 @@ $logstart = get_log_size($node_primary3);
|
|||||||
kill 'STOP', $senderpid, $receiverpid;
|
kill 'STOP', $senderpid, $receiverpid;
|
||||||
advance_wal($node_primary3, 2);
|
advance_wal($node_primary3, 2);
|
||||||
|
|
||||||
|
my $msg_logged = 0;
|
||||||
my $max_attempts = $PostgreSQL::Test::Utils::timeout_default;
|
my $max_attempts = $PostgreSQL::Test::Utils::timeout_default;
|
||||||
while ($max_attempts-- >= 0)
|
while ($max_attempts-- >= 0)
|
||||||
{
|
{
|
||||||
@ -387,11 +387,12 @@ while ($max_attempts-- >= 0)
|
|||||||
"terminating process $senderpid to release replication slot \"rep3\"",
|
"terminating process $senderpid to release replication slot \"rep3\"",
|
||||||
$logstart))
|
$logstart))
|
||||||
{
|
{
|
||||||
ok(1, "walsender termination logged");
|
$msg_logged = 1;
|
||||||
last;
|
last;
|
||||||
}
|
}
|
||||||
sleep 1;
|
sleep 1;
|
||||||
}
|
}
|
||||||
|
ok($msg_logged, "walsender termination logged");
|
||||||
|
|
||||||
# Now let the walsender continue; slot should be killed now.
|
# Now let the walsender continue; slot should be killed now.
|
||||||
# (Must not let walreceiver run yet; otherwise the standby could start another
|
# (Must not let walreceiver run yet; otherwise the standby could start another
|
||||||
@ -402,18 +403,20 @@ $node_primary3->poll_query_until('postgres',
|
|||||||
"lost")
|
"lost")
|
||||||
or die "timed out waiting for slot to be lost";
|
or die "timed out waiting for slot to be lost";
|
||||||
|
|
||||||
|
$msg_logged = 0;
|
||||||
$max_attempts = $PostgreSQL::Test::Utils::timeout_default;
|
$max_attempts = $PostgreSQL::Test::Utils::timeout_default;
|
||||||
while ($max_attempts-- >= 0)
|
while ($max_attempts-- >= 0)
|
||||||
{
|
{
|
||||||
if (find_in_log(
|
if (find_in_log(
|
||||||
$node_primary3,
|
$node_primary3, 'invalidating obsolete replication slot "rep3"',
|
||||||
'invalidating slot "rep3" because its restart_lsn', $logstart))
|
$logstart))
|
||||||
{
|
{
|
||||||
ok(1, "slot invalidation logged");
|
$msg_logged = 1;
|
||||||
last;
|
last;
|
||||||
}
|
}
|
||||||
sleep 1;
|
sleep 1;
|
||||||
}
|
}
|
||||||
|
ok($msg_logged, "slot invalidation logged");
|
||||||
|
|
||||||
# Now let the walreceiver continue, so that the node can be stopped cleanly
|
# Now let the walreceiver continue, so that the node can be stopped cleanly
|
||||||
kill 'CONT', $receiverpid;
|
kill 'CONT', $receiverpid;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user