v1-0001-Reproduce-bgw-not-starting-after-restart_after_cr.patch
text/x-patch
Filename: v1-0001-Reproduce-bgw-not-starting-after-restart_after_cr.patch
Type: text/x-patch
Part: 0
Patch
Same data as JSON:
GET /api/v1/attachments/:id/patch
the parsed metadata as JSON — format, series position, per-file stats; never the diff bytes.
API reference →
Format: format-patch
Series: patch v1-0001
Subject: Reproduce bgw not starting after restart_after_crash
| File | + | − |
|---|---|---|
| contrib/pg_prewarm/t/002_bgw_crash_restart.pl | 176 | 0 |
| src/backend/postmaster/postmaster.c | 3 | 0 |
From 4ae584f3f3e352bb4b7a0cedf1f7efc27fe8154d Mon Sep 17 00:00:00 2001
From: Andrey Rudometov <unlimitedhikari@gmail.com>
Date: Wed, 11 Jun 2025 11:10:53 +0700
Subject: [PATCH v1 1/2] Reproduce bgw not starting after restart_after_crash
---
contrib/pg_prewarm/t/002_bgw_crash_restart.pl | 176 ++++++++++++++++++
src/backend/postmaster/postmaster.c | 3 +
2 files changed, 179 insertions(+)
create mode 100644 contrib/pg_prewarm/t/002_bgw_crash_restart.pl
diff --git a/contrib/pg_prewarm/t/002_bgw_crash_restart.pl b/contrib/pg_prewarm/t/002_bgw_crash_restart.pl
new file mode 100644
index 00000000000..7376194707a
--- /dev/null
+++ b/contrib/pg_prewarm/t/002_bgw_crash_restart.pl
@@ -0,0 +1,176 @@
+
+# Copyright (c) 2021-2025, PostgreSQL Global Development Group
+
+#
+# Test ensures that background worker survives after crash restart.
+# Intended solely for reproduction; made of 001_basic.pl & 013_crash_restart.pl.
+#
+# pg_prewarm extension is chosen as it creates bgw and is in-tree.
+#
+use strict;
+use warnings FATAL => 'all';
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Test::More;
+
+use Carp qw(croak);
+use Time::HiRes qw(usleep);
+
+my $psql_timeout = IPC::Run::timer($PostgreSQL::Test::Utils::timeout_default);
+
+my $node = PostgreSQL::Test::Cluster->new('primary');
+$node->init(allows_streaming => 1);
+$node->append_conf(
+ 'postgresql.conf',
+ qq{shared_preload_libraries = 'pg_prewarm'
+ pg_prewarm.autoprewarm = true
+ pg_prewarm.autoprewarm_interval = 1});
+$node->start();
+
+# by default PostgreSQL::Test::Cluster doesn't restart after a crash
+$node->safe_psql(
+ 'postgres',
+ q[ALTER SYSTEM SET restart_after_crash = 1;
+ ALTER SYSTEM SET log_connections = receipt;
+ SELECT pg_reload_conf();]);
+
+# setup pg_prewarm
+$node->safe_psql("postgres",
+ "CREATE EXTENSION pg_prewarm;\n"
+ . "CREATE TABLE test(c1 int);\n"
+ . "INSERT INTO test SELECT generate_series(1, 100);");
+$node->restart;
+
+# Wait for prewarm worker to show signs of life
+$node->wait_for_log(
+ "autoprewarm successfully prewarmed [1-9][0-9]* of [0-9]+ previously-loaded blocks"
+);
+
+# Run psql, keeping session alive, so we have an alive backend to kill.
+my ($killme_stdin, $killme_stdout, $killme_stderr) = ('', '', '');
+my $killme = IPC::Run::start(
+ [
+ 'psql', '--no-psqlrc', '--quiet', '--no-align', '--tuples-only',
+ '--set' => 'ON_ERROR_STOP=1',
+ '--file' => '-',
+ '--dbname' => $node->connstr('postgres')
+ ],
+ '<' => \$killme_stdin,
+ '>' => \$killme_stdout,
+ '2>' => \$killme_stderr,
+ $psql_timeout);
+
+# Acquire pid of new backend
+$killme_stdin .= q[
+SELECT pg_backend_pid();
+];
+ok( pump_until(
+ $killme, $psql_timeout, \$killme_stdout, qr/[[:digit:]]+[\r\n]$/m),
+ "acquired pid for SIGKILL");
+my $pid = $killme_stdout;
+chomp($pid);
+$killme_stdout = '';
+$killme_stderr = '';
+
+# Need a second psql to check if crash-restart happened.
+my ($monitor_stdin, $monitor_stdout, $monitor_stderr) = ('', '', '');
+my $monitor = IPC::Run::start(
+ [
+ 'psql', '--no-psqlrc', '--quiet', '--no-align', '--tuples-only',
+ '--set' => 'ON_ERROR_STOP=1',
+ '--file', '-', '--dbname' => $node->connstr('postgres')
+ ],
+ '<' => \$monitor_stdin,
+ '>' => \$monitor_stdout,
+ '2>' => \$monitor_stderr,
+ $psql_timeout);
+
+# Start longrunning query in second session; its failure will signal that
+# crash-restart has occurred. The initial wait for the trivial select is to
+# be sure that psql successfully connected to backend.
+$monitor_stdin .= q[
+SELECT $$psql-connected$$;
+SELECT pg_sleep(3600);
+];
+ok( pump_until(
+ $monitor, $psql_timeout, \$monitor_stdout, qr/psql-connected/m),
+ 'monitor connected');
+$monitor_stdout = '';
+$monitor_stderr = '';
+
+my $log_offset = -s $node->logfile;
+
+# kill once with QUIT - we expect psql to exit, while emitting error message first
+my $ret = PostgreSQL::Test::Utils::system_log('pg_ctl', 'kill', 'QUIT', $pid);
+
+# Exactly process should have been alive to be killed
+is($ret, 0, "killed process with SIGQUIT");
+
+# Check that psql sees the killed backend as having been terminated
+$killme_stdin .= q[
+SELECT 1;
+];
+ok( pump_until(
+ $killme,
+ $psql_timeout,
+ \$killme_stderr,
+ qr/WARNING: terminating connection because of unexpected SIGQUIT signal|server closed the connection unexpectedly|connection to server was lost|could not send data to server/m
+ ),
+ "psql query died successfully after SIGQUIT");
+$killme_stderr = '';
+$killme_stdout = '';
+$killme->finish;
+
+# Wait till server restarts - we should get the WARNING here, but
+# sometimes the server is unable to send that, if interrupted while
+# sending.
+ok( pump_until(
+ $monitor,
+ $psql_timeout,
+ \$monitor_stderr,
+ qr/WARNING: terminating connection because of crash of another server process|server closed the connection unexpectedly|connection to server was lost|could not send data to server/m
+ ),
+ "psql monitor died successfully after SIGQUIT");
+$monitor->finish;
+
+# Wait till server restarts
+is($node->poll_query_until('postgres', undef, ''),
+ "1", "reconnected after SIGQUIT");
+
+# Wait for prewarm worker to show signs of life
+wait_for_log_less(
+ $node,
+ "autoprewarm successfully prewarmed [1-9][0-9]* of [0-9]+ previously-loaded blocks",
+ $log_offset
+);
+
+$node->stop();
+
+done_testing();
+
+
+# there is no point in waiting for timeout_default, make less attempts.
+sub wait_for_log_less
+{
+ my ($node, $regexp, $offset) = @_;
+ $offset = 0 unless defined $offset;
+
+ # Wait for 5 seconds. Surely lone bgw with 1 second timeout will start..
+ my $max_attempts = 10 * 5;
+ my $attempts = 0;
+
+ while ($attempts < $max_attempts)
+ {
+ my $log =
+ PostgreSQL::Test::Utils::slurp_file($node->logfile, $offset);
+
+ return $offset + length($log) if ($log =~ m/$regexp/);
+
+ # Wait 0.1 second before retrying.
+ usleep(100_000);
+
+ $attempts++;
+ }
+
+ croak "timed out waiting for match: $regexp";
+}
diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c
index 490f7ce3664..2d99f9d670d 100644
--- a/src/backend/postmaster/postmaster.c
+++ b/src/backend/postmaster/postmaster.c
@@ -4249,6 +4249,9 @@ maybe_start_bgworkers(void)
rw = dlist_container(RegisteredBgWorker, rw_lnode, iter.cur);
+ elog(LOG, "maybe_start_bgworkers: worker %s has pid %d",
+ rw->rw_worker.bgw_function_name, rw->rw_pid);
+
/* ignore if already running */
if (rw->rw_pid != 0)
continue;
--
2.43.0