tap-fixes.txt
text/plain
Filename: tap-fixes.txt
Type: text/plain
Part: 1
From 57bb79b1bc8faac646131336abcc1596711c5f32 Mon Sep 17 00:00:00 2001
From: tomas <tomas>
Date: Thu, 28 Aug 2025 22:10:25 +0200
Subject: [PATCH] TAP fixes
---
.../t/006_concurrent_pgbench.pl | 88 ++++++++++++++-----
1 file changed, 68 insertions(+), 20 deletions(-)
diff --git a/src/test/modules/test_checksums/t/006_concurrent_pgbench.pl b/src/test/modules/test_checksums/t/006_concurrent_pgbench.pl
index b33ca6e0c26..374eac7e6a3 100644
--- a/src/test/modules/test_checksums/t/006_concurrent_pgbench.pl
+++ b/src/test/modules/test_checksums/t/006_concurrent_pgbench.pl
@@ -23,11 +23,14 @@ my $node_primary_loglocation = 0;
my $node_standby_1;
my $node_standby_1_loglocation = 0;
+my $pgbench_primary = undef;
+my $pgbench_standby = undef;
+
# The number of full test iterations which will be performed. The exact number
# of tests performed and the wall time taken is non-deterministic as the test
# performs a lot of randomized actions, but 50 iterations will be a long test
# run regardless.
-my $TEST_ITERATIONS = 50;
+my $TEST_ITERATIONS = 1000;
# Variables which record the current state of the cluster
my $data_checksum_state = 'off';
@@ -55,7 +58,7 @@ if ($ENV{enable_injection_points} ne 'yes')
# whether to turn things off during testing.
sub cointoss
{
- return int(rand(2) == 1);
+ return int(rand() < 0.5);
}
# Helper for injecting random sleeps here and there in the testrun. The sleep
@@ -73,11 +76,16 @@ sub background_ro_pgbench
{
my ($port, $stdin, $stdout, $stderr) = @_;
- my $pgbench_primary = IPC::Run::start(
- [ 'pgbench', '-p', $port, '-S', '-T', '600', '-c', '10', 'postgres' ],
- '<' => \$stdin,
- '>' => \$stdout,
- '2>' => \$stderr,
+ if ($pgbench_standby)
+ {
+ $pgbench_standby->finish;
+ }
+
+ $pgbench_standby = IPC::Run::start(
+ [ 'pgbench', '-n', '-p', $port, '-S', '-T', '600', '-c', '10', 'postgres' ],
+ '<' => '/dev/null',
+ '>' => '/dev/null',
+ '2>' => '/dev/null',
IPC::Run::timer($PostgreSQL::Test::Utils::timeout_default));
}
@@ -87,11 +95,16 @@ sub background_rw_pgbench
{
my ($port, $stdin, $stdout, $stderr) = @_;
- my $pgbench_primary = IPC::Run::start(
+ if ($pgbench_primary)
+ {
+ $pgbench_primary->finish;
+ }
+
+ $pgbench_primary = IPC::Run::start(
[ 'pgbench', '-p', $port, '-T', '600', '-c', '10', 'postgres' ],
- '<' => \$stdin,
- '>' => \$stdout,
- '2>' => \$stderr,
+ '<' => '/dev/null',
+ '>' => '/dev/null',
+ '2>' => '/dev/null',
IPC::Run::timer($PostgreSQL::Test::Utils::timeout_default));
}
@@ -224,6 +237,9 @@ background_rw_pgbench(
$node_primary->port, $pgb_primary_stdin,
$pgb_primary_stdout, $pgb_primary_stderr);
+my $primary_shutdown_clean = 0;
+my $standby_shutdown_clean = 0;
+
# Main test suite. This loop will start a pgbench run on the cluster and while
# that's running flip the state of data checksums concurrently. It will then
# randomly restart thec cluster (in fast or immediate) mode and then check for
@@ -246,9 +262,11 @@ for (my $i = 0; $i < $TEST_ITERATIONS; $i++)
$node_primary_loglocation = -s $node_primary->logfile;
# If data checksums are enabled, take the opportunity to verify them
- # while the cluster is offline
- $node_primary->checksum_verify_offline()
- unless $data_checksum_state eq 'off';
+ # while the cluster is offline (but only if stopped in a clean way,
+ # not after immediate shutdown)
+ #$node_primary->checksum_verify_offline()
+ # unless $data_checksum_state eq 'off' or !$primary_shutdown_clean;
+
random_sleep();
$node_primary->start;
# Start a pgbench in the background against the primary
@@ -270,9 +288,11 @@ for (my $i = 0; $i < $TEST_ITERATIONS; $i++)
$node_standby_1_loglocation = -s $node_standby_1->logfile;
# If data checksums are enabled, take the opportunity to verify them
- # while the cluster is offline
- $node_standby_1->checksum_verify_offline()
- unless $data_checksum_state eq 'off';
+ # while the cluster is offline (but only if stopped in a clean way,
+ # not after immediate shutdown)
+ #$node_standby_1->checksum_verify_offline()
+ # unless $data_checksum_state eq 'off' or !$standby_shutdown_clean;
+
random_sleep();
$node_standby_1->start;
# Start a select-only pgbench in the background on the standby
@@ -287,13 +307,41 @@ for (my $i = 0; $i < $TEST_ITERATIONS; $i++)
my $result = $node_primary->safe_psql('postgres',
"SELECT count(*) FROM t WHERE a > 1");
is($result, '100000', 'ensure data pages can be read back on primary');
+
random_sleep();
+
$node_primary->wait_for_catchup($node_standby_1, 'write');
- # Potentially powercycle the cluster
- $node_primary->stop($stop_modes[ int(rand(100)) ]) if cointoss();
random_sleep();
- $node_standby_1->stop($stop_modes[ int(rand(100)) ]) if cointoss();
+
+ # Potentially powercycle the cluster (the nodes independently)
+ # XXX should maybe try stopping nodes in the opposite order too?
+ if (cointoss())
+ {
+ my $mode = $stop_modes[ int(rand(100)) ];
+ $node_primary->stop($mode);
+ $primary_shutdown_clean = ($mode eq 'fast');
+ }
+
+ random_sleep();
+
+ if (cointoss())
+ {
+ my $mode = $stop_modes[ int(rand(100)) ];
+ $node_standby_1->stop($mode);
+ $standby_shutdown_clean = ($mode eq 'fast');
+ }
+}
+
+# make sure the nodes are running
+if (!$node_primary->is_alive)
+{
+ $node_primary->start;
+}
+
+if (!$node_standby_1->is_alive)
+{
+ $node_standby_1->start;
}
# Testrun is over, ensure that data reads back as expected and perform a final
--
2.39.5