From 29ec94efd08b0b3f863db13f5978cdeab4f1c8dc Mon Sep 17 00:00:00 2001 From: Noah Misch Date: Fri, 4 Mar 2022 18:53:13 -0800 Subject: [PATCH] Introduce PG_TEST_TIMEOUT_DEFAULT for TAP suite non-elapsing timeouts. Slow hosts may avoid load-induced, spurious failures by setting environment variable PG_TEST_TIMEOUT_DEFAULT to some number of seconds greater than 180. Developers may see faster failures by setting that environment variable to some lesser number of seconds. In tests, write $PostgreSQL::Test::Utils::timeout_default wherever the convention has been to write 180. This change raises the default for some briefer timeouts. Back-patch to v10 (all supported versions). Discussion: https://postgr.es/m/20220218052842.GA3627003@rfd.leadboat.com --- contrib/amcheck/t/002_cic.pl | 3 ++- contrib/amcheck/t/003_cic_2pc.pl | 14 ++++++----- src/bin/pg_ctl/t/004_logrotate.pl | 2 +- src/bin/pg_dump/t/002_pg_dump.pl | 3 ++- src/bin/psql/t/010_tab_completion.pl | 6 ++--- src/bin/scripts/t/080_pg_isready.pl | 4 ++-- src/test/perl/PostgresNode.pm | 24 +++++++++++-------- src/test/perl/TestLib.pm | 7 +++++- src/test/recovery/t/003_recovery_targets.pl | 4 ++-- src/test/recovery/t/006_logical_decoding.pl | 4 ++-- .../t/010_logical_decoding_timelines.pl | 4 ++-- src/test/recovery/t/013_crash_restart.pl | 2 +- src/test/recovery/t/017_shm.pl | 13 +++++----- src/test/recovery/t/019_replslot_limit.pl | 2 +- 14 files changed, 53 insertions(+), 39 deletions(-) diff --git a/contrib/amcheck/t/002_cic.pl b/contrib/amcheck/t/002_cic.pl index 832ae8f1cb..3587807b95 100644 --- a/contrib/amcheck/t/002_cic.pl +++ b/contrib/amcheck/t/002_cic.pl @@ -18,7 +18,8 @@ my ($node, $result); # $node = get_new_node('CIC_test'); $node->init; -$node->append_conf('postgresql.conf', 'lock_timeout = 180000'); +$node->append_conf('postgresql.conf', + 'lock_timeout = ' . (1000 * $TestLib::timeout_default)); $node->start; $node->safe_psql('postgres', q(CREATE EXTENSION amcheck)); $node->safe_psql('postgres', q(CREATE TABLE tbl(i int))); diff --git a/contrib/amcheck/t/003_cic_2pc.pl b/contrib/amcheck/t/003_cic_2pc.pl index 9b2c92d11a..445aabaa5e 100644 --- a/contrib/amcheck/t/003_cic_2pc.pl +++ b/contrib/amcheck/t/003_cic_2pc.pl @@ -22,7 +22,8 @@ my ($node, $result); $node = get_new_node('CIC_2PC_test'); $node->init; $node->append_conf('postgresql.conf', 'max_prepared_transactions = 10'); -$node->append_conf('postgresql.conf', 'lock_timeout = 180000'); +$node->append_conf('postgresql.conf', + 'lock_timeout = ' . (1000 * $TestLib::timeout_default)); $node->start; $node->safe_psql('postgres', q(CREATE EXTENSION amcheck)); $node->safe_psql('postgres', q(CREATE TABLE tbl(i int))); @@ -38,7 +39,7 @@ $node->safe_psql('postgres', q(CREATE TABLE tbl(i int))); my $main_in = ''; my $main_out = ''; -my $main_timer = IPC::Run::timeout(180); +my $main_timer = IPC::Run::timeout($TestLib::timeout_default); my $main_h = $node->background_psql('postgres', \$main_in, \$main_out, @@ -52,7 +53,7 @@ pump $main_h until $main_out =~ /syncpoint1/ || $main_timer->is_expired; my $cic_in = ''; my $cic_out = ''; -my $cic_timer = IPC::Run::timeout(180); +my $cic_timer = IPC::Run::timeout($TestLib::timeout_default); my $cic_h = $node->background_psql('postgres', \$cic_in, \$cic_out, $cic_timer, on_error_stop => 1); @@ -113,9 +114,10 @@ PREPARE TRANSACTION 'persists_forever'; )); $node->restart; -my $reindex_in = ''; -my $reindex_out = ''; -my $reindex_timer = IPC::Run::timeout(180); +my $reindex_in = ''; +my $reindex_out = ''; +my $reindex_timer = + IPC::Run::timeout($TestLib::timeout_default); my $reindex_h = $node->background_psql('postgres', \$reindex_in, \$reindex_out, $reindex_timer, on_error_stop => 1); diff --git a/src/bin/pg_ctl/t/004_logrotate.pl b/src/bin/pg_ctl/t/004_logrotate.pl index 2f381e556e..d66f464e10 100644 --- a/src/bin/pg_ctl/t/004_logrotate.pl +++ b/src/bin/pg_ctl/t/004_logrotate.pl @@ -24,7 +24,7 @@ $node->start(); $node->psql('postgres', 'SELECT 1/0'); # might need to retry if logging collector process is slow... -my $max_attempts = 180 * 10; +my $max_attempts = 10 * $TestLib::timeout_default; my $current_logfiles; for (my $attempts = 0; $attempts < $max_attempts; $attempts++) diff --git a/src/bin/pg_dump/t/002_pg_dump.pl b/src/bin/pg_dump/t/002_pg_dump.pl index 8c86d75f82..4beed0caa9 100644 --- a/src/bin/pg_dump/t/002_pg_dump.pl +++ b/src/bin/pg_dump/t/002_pg_dump.pl @@ -271,7 +271,8 @@ my %pgdump_runs = ( '--no-sync', "--file=$tempdir/only_dump_test_table.sql", '--table=dump_test.test_table', - '--lock-wait-timeout=1000000', + '--lock-wait-timeout=' + . (1000 * $TestLib::timeout_default), 'postgres', ], }, diff --git a/src/bin/psql/t/010_tab_completion.pl b/src/bin/psql/t/010_tab_completion.pl index 2761e6117c..cd739be260 100644 --- a/src/bin/psql/t/010_tab_completion.pl +++ b/src/bin/psql/t/010_tab_completion.pl @@ -89,7 +89,7 @@ close $FH; my $in = ''; my $out = ''; -my $timer = timer(5); +my $timer = timer($TestLib::timeout_default); my $h = $node->interactive_psql('postgres', \$in, \$out, $timer); @@ -106,7 +106,7 @@ sub check_completion # reset output collector $out = ""; # restart per-command timer - $timer->start(5); + $timer->start($TestLib::timeout_default); # send the data to be sent $in .= $send; # wait ... @@ -232,7 +232,7 @@ check_completion( clear_line(); # send psql an explicit \q to shut it down, else pty won't close properly -$timer->start(5); +$timer->start($TestLib::timeout_default); $in .= "\\q\n"; finish $h or die "psql returned $?"; $timer->reset; diff --git a/src/bin/scripts/t/080_pg_isready.pl b/src/bin/scripts/t/080_pg_isready.pl index 6da89e1b04..fc2f54f061 100644 --- a/src/bin/scripts/t/080_pg_isready.pl +++ b/src/bin/scripts/t/080_pg_isready.pl @@ -15,6 +15,6 @@ my $node = get_new_node('main'); $node->init; $node->start; -# use a long timeout for the benefit of very slow buildfarm machines -$node->command_ok([qw(pg_isready --timeout=60)], +$node->command_ok( + [ 'pg_isready', "--timeout=$TestLib::timeout_default" ], 'succeeds with server running'); diff --git a/src/test/perl/PostgresNode.pm b/src/test/perl/PostgresNode.pm index 05c7866709..07120e097b 100644 --- a/src/test/perl/PostgresNode.pm +++ b/src/test/perl/PostgresNode.pm @@ -31,7 +31,8 @@ PostgresNode - class representing PostgreSQL server instance my ($stdout, $stderr, $timed_out); my $cmdret = $node->psql('postgres', 'SELECT pg_sleep(600)', stdout => \$stdout, stderr => \$stderr, - timeout => 180, timed_out => \$timed_out, + timeout => $TestLib::timeout_default, + timed_out => \$timed_out, extra_params => ['--single-transaction'], on_error_die => 1) print "Sleep timed out" if $timed_out; @@ -1446,7 +1447,8 @@ e.g. my ($stdout, $stderr, $timed_out); my $cmdret = $node->psql('postgres', 'SELECT pg_sleep(600)', stdout => \$stdout, stderr => \$stderr, - timeout => 180, timed_out => \$timed_out, + timeout => $TestLib::timeout_default, + timed_out => \$timed_out, extra_params => ['--single-transaction']) will set $cmdret to undef and $timed_out to a true value. @@ -1610,7 +1612,8 @@ scalar reference. This allows the caller to act on other parts of the system while idling this backend. The specified timer object is attached to the harness, as well. It's caller's -responsibility to select the timeout length, and to restart the timer after +responsibility to set the timeout length (usually +$TestLib::timeout_default), and to restart the timer after each command if the timeout is per-command. psql is invoked in tuples-only unaligned mode with reading of B<.psqlrc> @@ -1699,9 +1702,10 @@ The process's stdin is sourced from the $stdin scalar reference, and its stdout and stderr go to the $stdout scalar reference. ptys are used so that psql thinks it's being called interactively. -The specified timer object is attached to the harness, as well. -It's caller's responsibility to select the timeout length, and to -restart the timer after each command if the timeout is per-command. +The specified timer object is attached to the harness, as well. It's caller's +responsibility to set the timeout length (usually +$TestLib::timeout_default), and to restart the timer after +each command if the timeout is per-command. psql is invoked in tuples-only unaligned mode with reading of B<.psqlrc> disabled. That may be overridden by passing extra psql parameters. @@ -1849,7 +1853,7 @@ sub pgbench Run B<$query> repeatedly, until it returns the B<$expected> result ('t', or SQL boolean true, by default). Continues polling if B returns an error result. -Times out after 180 seconds. +Times out after $TestLib::timeout_default seconds. Returns 1 if successful, 0 if timed out. =cut @@ -1862,7 +1866,7 @@ sub poll_query_until my $cmd = [ 'psql', '-XAt', '-d', $self->connstr($dbname) ]; my ($stdout, $stderr); - my $max_attempts = 180 * 10; + my $max_attempts = 10 * $TestLib::timeout_default; my $attempts = 0; while ($attempts < $max_attempts) @@ -1884,8 +1888,8 @@ sub poll_query_until $attempts++; } - # The query result didn't change in 180 seconds. Give up. Print the - # output from the last attempt, hopefully that's useful for debugging. + # Give up. Print the output from the last attempt, hopefully that's useful + # for debugging. diag qq(poll_query_until timed out executing this query: $query expecting this output: diff --git a/src/test/perl/TestLib.pm b/src/test/perl/TestLib.pm index 5730151457..8d65fdc36a 100644 --- a/src/test/perl/TestLib.pm +++ b/src/test/perl/TestLib.pm @@ -86,7 +86,8 @@ our @EXPORT = qw( $use_unix_sockets ); -our ($windows_os, $use_unix_sockets, $tmp_check, $log_path, $test_logfile); +our ($windows_os, $use_unix_sockets, $timeout_default, + $tmp_check, $log_path, $test_logfile); BEGIN { @@ -144,6 +145,10 @@ BEGIN # supported, but it can be overridden if desired. $use_unix_sockets = (!$windows_os || defined $ENV{PG_TEST_USE_UNIX_SOCKETS}); + + $timeout_default = $ENV{PG_TEST_TIMEOUT_DEFAULT}; + $timeout_default = 180 + if not defined $timeout_default or $timeout_default eq ''; } =pod diff --git a/src/test/recovery/t/003_recovery_targets.pl b/src/test/recovery/t/003_recovery_targets.pl index b1a413035c..ea8e81eefc 100644 --- a/src/test/recovery/t/003_recovery_targets.pl +++ b/src/test/recovery/t/003_recovery_targets.pl @@ -169,8 +169,8 @@ run_log( $node_standby->logfile, 'start' ]); -# wait up to 180s for postgres to terminate -foreach my $i (0 .. 1800) +# wait for postgres to terminate +foreach my $i (0 .. 10 * $TestLib::timeout_default) { last if !-f $node_standby->data_dir . '/postmaster.pid'; usleep(100_000); diff --git a/src/test/recovery/t/006_logical_decoding.pl b/src/test/recovery/t/006_logical_decoding.pl index 78229a7b92..f523d8f593 100644 --- a/src/test/recovery/t/006_logical_decoding.pl +++ b/src/test/recovery/t/006_logical_decoding.pl @@ -95,7 +95,7 @@ $node_master->safe_psql('postgres', ); my $stdout_recv = $node_master->pg_recvlogical_upto( - 'postgres', 'test_slot', $endpos, 180, + 'postgres', 'test_slot', $endpos, $TestLib::timeout_default, 'include-xids' => '0', 'skip-empty-xacts' => '1'); chomp($stdout_recv); @@ -107,7 +107,7 @@ $node_master->poll_query_until('postgres', ) or die "slot never became inactive"; $stdout_recv = $node_master->pg_recvlogical_upto( - 'postgres', 'test_slot', $endpos, 180, + 'postgres', 'test_slot', $endpos, $TestLib::timeout_default, 'include-xids' => '0', 'skip-empty-xacts' => '1'); chomp($stdout_recv); diff --git a/src/test/recovery/t/010_logical_decoding_timelines.pl b/src/test/recovery/t/010_logical_decoding_timelines.pl index 100766638b..09982b9c75 100644 --- a/src/test/recovery/t/010_logical_decoding_timelines.pl +++ b/src/test/recovery/t/010_logical_decoding_timelines.pl @@ -155,7 +155,7 @@ like( ($ret, $stdout, $stderr) = $node_replica->psql( 'postgres', "SELECT data FROM pg_logical_slot_peek_changes('before_basebackup', NULL, NULL, 'include-xids', '0', 'skip-empty-xacts', '1');", - timeout => 180); + timeout => $TestLib::timeout_default); is($ret, 0, 'replay from slot before_basebackup succeeds'); my $final_expected_output_bb = q(BEGIN @@ -184,7 +184,7 @@ my $endpos = $node_replica->safe_psql('postgres', $stdout = $node_replica->pg_recvlogical_upto( 'postgres', 'before_basebackup', - $endpos, 180, + $endpos, $TestLib::timeout_default, 'include-xids' => '0', 'skip-empty-xacts' => '1'); diff --git a/src/test/recovery/t/013_crash_restart.pl b/src/test/recovery/t/013_crash_restart.pl index 3e3d50b5dd..95445dc097 100644 --- a/src/test/recovery/t/013_crash_restart.pl +++ b/src/test/recovery/t/013_crash_restart.pl @@ -23,7 +23,7 @@ plan tests => 18; # instance being driven by us, add a timeout high enough that it # should never trigger even on very slow machines, unless something # is really wrong. -my $psql_timeout = IPC::Run::timer(60); +my $psql_timeout = IPC::Run::timer($TestLib::timeout_default); my $node = get_new_node('master'); $node->init(allows_streaming => 1); diff --git a/src/test/recovery/t/017_shm.pl b/src/test/recovery/t/017_shm.pl index 8073b60c08..85501d1704 100644 --- a/src/test/recovery/t/017_shm.pl +++ b/src/test/recovery/t/017_shm.pl @@ -133,7 +133,7 @@ my $slow_client = IPC::Run::start( \$stdout, '2>', \$stderr, - IPC::Run::timeout(900)); # five times the poll_query_until timeout + IPC::Run::timeout(5 * $TestLib::timeout_default)); ok( $gnat->poll_query_until( 'postgres', "SELECT 1 FROM pg_stat_activity WHERE query = '$slow_query'", '1'), @@ -144,10 +144,11 @@ $gnat->kill9; unlink($gnat->data_dir . '/postmaster.pid'); $gnat->rotate_logfile; # on Windows, can't open old log for writing log_ipcs(); -# Reject ordinary startup. Retry for the same reasons poll_start() does. +# Reject ordinary startup. Retry for the same reasons poll_start() does, +# every 0.1s for at least $TestLib::timeout_default seconds. my $pre_existing_msg = qr/pre-existing shared memory block/; { - my $max_attempts = 180 * 10; # Retry every 0.1s for at least 180s. + my $max_attempts = 10 * $TestLib::timeout_default; my $attempts = 0; while ($attempts < $max_attempts) { @@ -194,7 +195,7 @@ sub poll_start { my ($node) = @_; - my $max_attempts = 180 * 10; + my $max_attempts = 10 * $TestLib::timeout_default; my $attempts = 0; while ($attempts < $max_attempts) @@ -210,8 +211,8 @@ sub poll_start $attempts++; } - # No success within 180 seconds. Try one last time without fail_ok, which - # will BAIL_OUT unless it succeeds. + # Try one last time without fail_ok, which will BAIL_OUT unless it + # succeeds. $node->start && return 1; return 0; } diff --git a/src/test/recovery/t/019_replslot_limit.pl b/src/test/recovery/t/019_replslot_limit.pl index d33ca1e35a..7a8954dad2 100644 --- a/src/test/recovery/t/019_replslot_limit.pl +++ b/src/test/recovery/t/019_replslot_limit.pl @@ -293,7 +293,7 @@ my @result = SELECT pg_switch_wal(); CHECKPOINT; SELECT 'finished';", - timeout => '60')); + timeout => $TestLib::timeout_default)); is($result[1], 'finished', 'check if checkpoint command is not blocked'); #####################################