v3-0001-Fix-REPACK-decoding-worker-not-cleaned-up-on-FATAL-exit.patch
application/octet-stream
Filename: v3-0001-Fix-REPACK-decoding-worker-not-cleaned-up-on-FATAL-exit.patch
Type: application/octet-stream
Part: 0
From b7ace6f7eab4a2c181427ac4a7719da62faf78ee Mon Sep 17 00:00:00 2001
From: Baji Shaik <baji.pgdev@gmail.com>
Date: Sun, 17 May 2026 23:32:42 +0000
Subject: [PATCH] Fix REPACK decoding worker not cleaned up on FATAL exit
When the launching backend of REPACK (CONCURRENTLY) is terminated via
pg_terminate_backend(), ProcDiePending causes ereport(FATAL) which
bypasses PG_FINALLY blocks. As a result, stop_repack_decoding_worker()
is never called, leaving the decoding worker running indefinitely and
holding its temporary replication slot.
Fix by using PG_ENSURE_ERROR_CLEANUP, which handles both ERROR and
FATAL exits. Unlike before_shmem_exit, this does not leak callback
slots when the same backend runs multiple REPACK (CONCURRENTLY)
commands, and unlike on_proc_exit, it runs before memory contexts
are destroyed so the worker handle is still valid.
---
src/backend/commands/repack.c | 42 +++++++++++++++++++++++------------
1 file changed, 28 insertions(+), 14 deletions(-)
diff --git a/src/backend/commands/repack.c b/src/backend/commands/repack.c
index fae88d6bb83..17eb9eb029c 100644
--- a/src/backend/commands/repack.c
+++ b/src/backend/commands/repack.c
@@ -64,6 +64,7 @@
#include "pgstat.h"
#include "replication/logicalrelation.h"
#include "storage/bufmgr.h"
+#include "storage/ipc.h"
#include "storage/lmgr.h"
#include "storage/predicate.h"
#include "storage/proc.h"
@@ -211,6 +212,7 @@ static Oid determine_clustered_index(Relation rel, bool usingindex,
static void start_repack_decoding_worker(Oid relid);
static void stop_repack_decoding_worker(void);
+static void repack_decoding_worker_cleanup_cb(int code, Datum arg);
static Snapshot get_initial_snapshot(DecodingWorker *worker);
static void ProcessRepackMessage(StringInfo msg);
@@ -660,24 +662,25 @@ cluster_rel(RepackCommand cmd, Relation OldHeap, Oid indexOid,
TransferPredicateLocksToHeapRelation(OldHeap);
/* rebuild_relation does all the dirty work */
- PG_TRY();
- {
- rebuild_relation(OldHeap, index, verbose, ident_idx);
- }
- PG_FINALLY();
+ if (concurrent)
{
- if (concurrent)
+ /*
+ * Use PG_ENSURE_ERROR_CLEANUP so that the decoding worker is stopped
+ * on both ERROR and FATAL exits. PG_FINALLY only handles ERROR;
+ * FATAL (e.g. from pg_terminate_backend) would bypass it, leaving
+ * the worker running and holding its replication slot indefinitely.
+ */
+ PG_ENSURE_ERROR_CLEANUP(repack_decoding_worker_cleanup_cb, (Datum) 0);
{
- /*
- * Since during normal operation the worker was already asked to
- * exit, stopping it explicitly is especially important on ERROR.
- * However it still seems a good practice to make sure that the
- * worker never survives the REPACK command.
- */
- stop_repack_decoding_worker();
+ rebuild_relation(OldHeap, index, verbose, ident_idx);
}
+ PG_END_ENSURE_ERROR_CLEANUP(repack_decoding_worker_cleanup_cb, (Datum) 0);
+ stop_repack_decoding_worker();
+ }
+ else
+ {
+ rebuild_relation(OldHeap, index, verbose, ident_idx);
}
- PG_END_TRY();
/* rebuild_relation closes OldHeap, and index if valid */
@@ -3482,6 +3485,17 @@ start_repack_decoding_worker(Oid relid)
ConditionVariableCancelSleep();
}
+/*
+ * PG_ENSURE_ERROR_CLEANUP callback to stop the decoding worker.
+ * This ensures the worker is terminated on both ERROR and FATAL exits,
+ * unlike PG_FINALLY which only handles ERROR.
+ */
+static void
+repack_decoding_worker_cleanup_cb(int code, Datum arg)
+{
+ stop_repack_decoding_worker();
+}
+
/*
* Stop the decoding worker and cleanup the related resources.
*
--
2.50.1