v2-0001-Fix-WAIT-FOR-LSN-cleanup-on-subtransaction-abort.patch
application/octet-stream
Filename: v2-0001-Fix-WAIT-FOR-LSN-cleanup-on-subtransaction-abort.patch
Type: application/octet-stream
Part: 0
From 15f90c7761680d8eab7b1315b4a37c17a2f68ccc Mon Sep 17 00:00:00 2001
From: alterego655 <824662526@qq.com>
Date: Wed, 6 May 2026 16:40:55 +0800
Subject: [PATCH v2] Fix WAIT FOR LSN cleanup on subtransaction abort
WAIT FOR LSN registers the current backend in shared memory before
entering an interruptible wait loop. Top-level abort and backend exit
already call WaitLSNCleanup(), but subtransaction abort did not. If an
interrupt such as statement_timeout occurred while waiting inside a
savepoint, rolling back to the savepoint left the backend marked as
present in the WAIT FOR LSN heap.
Clean up WAIT FOR LSN state from AbortSubTransaction() as well, and add
a TAP test covering reuse of WAIT FOR LSN after a savepoint rollback.
---
src/backend/access/transam/xact.c | 5 +++
src/backend/access/transam/xlogwait.c | 2 +-
src/test/recovery/t/049_wait_for_lsn.pl | 48 +++++++++++++++++++++++++
3 files changed, 54 insertions(+), 1 deletion(-)
diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c
index 48bc90c9673..5586fbe5b07 100644
--- a/src/backend/access/transam/xact.c
+++ b/src/backend/access/transam/xact.c
@@ -5289,6 +5289,11 @@ AbortSubTransaction(void)
*/
LWLockReleaseAll();
+ /*
+ * Cleanup waiting for LSN if any.
+ */
+ WaitLSNCleanup();
+
pgstat_report_wait_end();
pgstat_progress_end_command();
diff --git a/src/backend/access/transam/xlogwait.c b/src/backend/access/transam/xlogwait.c
index 18f78338330..582dde3b061 100644
--- a/src/backend/access/transam/xlogwait.c
+++ b/src/backend/access/transam/xlogwait.c
@@ -360,7 +360,7 @@ WaitLSNWakeup(WaitLSNType lsnType, XLogRecPtr currentLSN)
}
/*
- * Clean up LSN waiters for exiting process
+ * Clean up any LSN wait state for the current process.
*/
void
WaitLSNCleanup(void)
diff --git a/src/test/recovery/t/049_wait_for_lsn.pl b/src/test/recovery/t/049_wait_for_lsn.pl
index 9f8af351ba8..a51e31926f5 100644
--- a/src/test/recovery/t/049_wait_for_lsn.pl
+++ b/src/test/recovery/t/049_wait_for_lsn.pl
@@ -213,6 +213,54 @@ $output = $node_standby->safe_psql(
WAIT FOR LSN '${lsn3}' WITH (timeout '10ms', no_throw);]);
ok($output eq "timeout", "WAIT FOR returns correct status after timeout");
+# 4a. Check that aborting a subtransaction during WAIT FOR LSN cleans up
+# shared wait-state. Poll pg_stat_activity before canceling the first WAIT
+# FOR to ensure that the backend has registered itself in the waiters heap.
+# After rolling back to the savepoint, a second WAIT FOR in the same backend
+# must be able to register itself again.
+my $subxact_lsn = $node_primary->safe_psql('postgres',
+ "SELECT pg_current_wal_insert_lsn() + 10000000000");
+my $subxact_appname = 'wait_for_lsn_subxact_cleanup';
+my $subxact_session =
+ $node_primary->background_psql('postgres', on_error_stop => 0);
+$subxact_session->query_until(
+ qr/start/, qq[
+ SET application_name = '$subxact_appname';
+ BEGIN;
+ SAVEPOINT wait_cleanup;
+ \\echo start
+ WAIT FOR LSN '${subxact_lsn}' WITH (MODE 'primary_flush');
+ ROLLBACK TO wait_cleanup;
+ WAIT FOR LSN '${subxact_lsn}'
+ WITH (MODE 'primary_flush', timeout '10ms', no_throw);
+ COMMIT;
+]);
+$node_primary->poll_query_until(
+ 'postgres',
+ "SELECT count(*) = 1 FROM pg_stat_activity
+ WHERE application_name = '$subxact_appname'
+ AND wait_event = 'WaitForWalFlush'"
+) or die "WAIT FOR LSN did not enter the primary_flush wait path";
+my $subxact_cancelled = $node_primary->safe_psql(
+ 'postgres',
+ "SELECT pg_cancel_backend(pid) FROM pg_stat_activity
+ WHERE application_name = '$subxact_appname'
+ AND wait_event = 'WaitForWalFlush'"
+);
+is($subxact_cancelled, 't', "canceled WAIT FOR LSN in subtransaction");
+$subxact_session->quit;
+chomp($subxact_session->{stdout});
+like(
+ $subxact_session->{stderr},
+ qr/canceling statement due to user request/,
+ "query cancel interrupted WAIT FOR LSN in subtransaction");
+is($subxact_session->{stdout},
+ "timeout", "second WAIT FOR LSN timed out after savepoint rollback");
+unlike(
+ $subxact_session->{stderr},
+ qr/server closed the connection unexpectedly/,
+ "WAIT FOR LSN after savepoint rollback did not disconnect");
+
# 5. Check mode validation: standby modes error on primary, primary mode errors
# on standby, and primary_flush works on primary. Also check that WAIT FOR
# triggers an error if called within a function, procedure, anonymous DO block,
--
2.51.0