pg18-v1-0001-Fix-race-in-ReplicationSlotRelease-for-ephemeral-.txt

text/plain

Filename: pg18-v1-0001-Fix-race-in-ReplicationSlotRelease-for-ephemeral-.txt
Type: text/plain
Part: 0
Message: Re: Fix race in ReplicationSlotRelease for ephemeral slots
From 56e8c1068e19c3ccd41cecb75b1ee26affb52abf Mon Sep 17 00:00:00 2001
From: Fujii Masao <fujii@postgresql.org>
Date: Fri, 29 May 2026 18:43:50 +0900
Subject: [PATCH v1] Fix race in ReplicationSlotRelease for ephemeral slots

When releasing an ephemeral replication slot, ReplicationSlotRelease() first
drops the slot via ReplicationSlotDropAcquired(). After this point, the slot's
shared memory slot array entry can be immediately reused by another backend
creating a new slot.

However, ReplicationSlotRelease() continued executing common cleanup code that
still dereferenced the old slot pointer and updated shared memory fields such as
effective_xmin. If the slot array entry had already been reallocated, these
writes could inadvertently affect a different, unrelated slot.

This commit avoids touching slot shared-memory state after dropping an ephemeral
slot. Keep the post-release shared-memory updates only for non-ephemeral slots,
where the slot remains valid after release.
---
 src/backend/replication/slot.c | 68 +++++++++++++++++-----------------
 1 file changed, 35 insertions(+), 33 deletions(-)

diff --git a/src/backend/replication/slot.c b/src/backend/replication/slot.c
index 4246d0a51e1..e60ccc424dd 100644
--- a/src/backend/replication/slot.c
+++ b/src/backend/replication/slot.c
@@ -737,44 +737,46 @@ ReplicationSlotRelease(void)
 		 */
 		ReplicationSlotDropAcquired();
 	}
-
-	/*
-	 * If slot needed to temporarily restrain both data and catalog xmin to
-	 * create the catalog snapshot, remove that temporary constraint.
-	 * Snapshots can only be exported while the initial snapshot is still
-	 * acquired.
-	 */
-	if (!TransactionIdIsValid(slot->data.xmin) &&
-		TransactionIdIsValid(slot->effective_xmin))
+	else
 	{
-		SpinLockAcquire(&slot->mutex);
-		slot->effective_xmin = InvalidTransactionId;
-		SpinLockRelease(&slot->mutex);
-		ReplicationSlotsComputeRequiredXmin(false);
-	}
-
-	/*
-	 * Set the time since the slot has become inactive. We get the current
-	 * time beforehand to avoid system call while holding the spinlock.
-	 */
-	now = GetCurrentTimestamp();
+		/*
+		 * If slot needed to temporarily restrain both data and catalog xmin
+		 * to create the catalog snapshot, remove that temporary constraint.
+		 * Snapshots can only be exported while the initial snapshot is still
+		 * acquired.
+		 */
+		if (!TransactionIdIsValid(slot->data.xmin) &&
+			TransactionIdIsValid(slot->effective_xmin))
+		{
+			SpinLockAcquire(&slot->mutex);
+			slot->effective_xmin = InvalidTransactionId;
+			SpinLockRelease(&slot->mutex);
+			ReplicationSlotsComputeRequiredXmin(false);
+		}
 
-	if (slot->data.persistency == RS_PERSISTENT)
-	{
 		/*
-		 * Mark persistent slot inactive.  We're not freeing it, just
-		 * disconnecting, but wake up others that may be waiting for it.
+		 * Set the time since the slot has become inactive. We get the current
+		 * time beforehand to avoid system call while holding the spinlock.
 		 */
-		SpinLockAcquire(&slot->mutex);
-		slot->active_pid = 0;
-		ReplicationSlotSetInactiveSince(slot, now, false);
-		SpinLockRelease(&slot->mutex);
-		ConditionVariableBroadcast(&slot->active_cv);
-	}
-	else
-		ReplicationSlotSetInactiveSince(slot, now, true);
+		now = GetCurrentTimestamp();
 
-	MyReplicationSlot = NULL;
+		if (slot->data.persistency == RS_PERSISTENT)
+		{
+			/*
+			 * Mark persistent slot inactive.  We're not freeing it, just
+			 * disconnecting, but wake up others that may be waiting for it.
+			 */
+			SpinLockAcquire(&slot->mutex);
+			slot->active_pid = 0;
+			ReplicationSlotSetInactiveSince(slot, now, false);
+			SpinLockRelease(&slot->mutex);
+			ConditionVariableBroadcast(&slot->active_cv);
+		}
+		else
+			ReplicationSlotSetInactiveSince(slot, now, true);
+
+		MyReplicationSlot = NULL;
+	}
 
 	/* might not have been set when we've been a plain slot */
 	LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
-- 
2.53.0