v1-0001-Reset-synced-slots-when-a-standby-is-promoted.patch
application/octet-stream
Filename: v1-0001-Reset-synced-slots-when-a-standby-is-promoted.patch
Type: application/octet-stream
Part: 0
Patch
Same data as JSON:
GET /api/v1/attachments/:id/patch
the parsed metadata as JSON — format, series position, per-file stats; never the diff bytes.
API reference →
Format: format-patch
Series: patch v1-0001
Subject: Reset synced slots when a standby is promoted.
| File | + | − |
|---|---|---|
| src/backend/access/transam/xlog.c | 15 | 5 |
| src/backend/replication/slot.c | 52 | 0 |
| src/include/replication/slot.h | 1 | 0 |
| src/test/recovery/t/040_standby_failover_slots_sync.pl | 3 | 3 |
From 665f7b623f46247659cf42c8239a7109ae2db819 Mon Sep 17 00:00:00 2001
From: Ajin Cherian <itsajin@gmail.com>
Date: Tue, 9 Sep 2025 17:10:22 +1000
Subject: [PATCH v1] Reset synced slots when a standby is promoted.
On promotion, reset any slots which have the 'synced' flag set so that
the primary starts from a clean state. This ensures consistent
behavior across all switchovers.
---
src/backend/access/transam/xlog.c | 20 +++++--
src/backend/replication/slot.c | 52 +++++++++++++++++++
src/include/replication/slot.h | 1 +
.../t/040_standby_failover_slots_sync.pl | 6 +--
4 files changed, 71 insertions(+), 8 deletions(-)
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 7ffb2179151..958e2a4271f 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -5622,9 +5622,14 @@ StartupXLOG(void)
/*
* Initialize replication slots, before there's a chance to remove
- * required resources.
+ * required resources. Clear any leftover 'synced' flags on replication
+ * slots when in crash recovery on the primary. The DB_IN_CRASH_RECOVERY
+ * state check ensures that this code is only reached when a standby
+ * server crashes during promotion.
*/
StartupReplicationSlots();
+ if (ControlFile->state == DB_IN_CRASH_RECOVERY)
+ ResetSyncedSlots();
/*
* Startup logical state, needs to be setup now so we have proper data
@@ -6224,13 +6229,18 @@ StartupXLOG(void)
WalSndWakeup(true, true);
/*
- * If this was a promotion, request an (online) checkpoint now. This isn't
- * required for consistency, but the last restartpoint might be far back,
- * and in case of a crash, recovering from it might take a longer than is
- * appropriate now that we're not in standby mode anymore.
+ * If this was a promotion, first reset any slots that had been marked as
+ * synced during standby mode. Then request an (online) checkpoint.
+ * The checkpoint isn't required for consistency, but the last
+ * restartpoint might be far back, and in case of a crash, recovery
+ * could take longer than desirable now that we're not in standby
+ * mode anymore.
*/
if (promoted)
+ {
+ ResetSyncedSlots();
RequestCheckpoint(CHECKPOINT_FORCE);
+ }
}
/*
diff --git a/src/backend/replication/slot.c b/src/backend/replication/slot.c
index fd0fdb96d42..01a6e0de133 100644
--- a/src/backend/replication/slot.c
+++ b/src/backend/replication/slot.c
@@ -852,6 +852,57 @@ restart:
LWLockRelease(ReplicationSlotControlLock);
}
+/*
+ * ResetSyncedSlots()
+ *
+ * Reset all replication slots that have synced=true to synced=false.
+ */
+void
+ResetSyncedSlots(void)
+{
+ int i;
+
+ /*
+ * Iterate through all replication slot entries and reset synced ones
+ */
+ for (i = 0; i < max_replication_slots; i++)
+ {
+ ReplicationSlot *s = &ReplicationSlotCtl->replication_slots[i];
+
+ /* Skip inactive/unused slots */
+ if (!s->in_use)
+ continue;
+
+ /* we're only interested in logical slots */
+ if (!SlotIsLogical(s))
+ continue;
+
+ /* Check if this slot was marked as synced */
+ if (s->data.synced)
+ {
+ /* Acquire the slot */
+ ReplicationSlotAcquire(NameStr(s->data.name), false, true);
+
+ /* Reset the synced flag under spinlock protection */
+ SpinLockAcquire(&s->mutex);
+ s->data.synced = false;
+ SpinLockRelease(&s->mutex);
+
+ /* Mark dirty and save outside the spinlock */
+ ReplicationSlotMarkDirty();
+ ReplicationSlotSave();
+
+ ereport(LOG,
+ (errmsg("reset synced flag for replication slot \"%s\"",
+ NameStr(s->data.name))));
+
+ /* Release the slot */
+ ReplicationSlotRelease();
+ }
+ }
+
+}
+
/*
* Permanently drop replication slot identified by the passed in name.
*/
@@ -2212,6 +2263,7 @@ StartupReplicationSlots(void)
/* we crashed while a slot was being setup or deleted, clean up */
if (pg_str_endswith(replication_de->d_name, ".tmp"))
{
+ elog(LOG, "there was a leftover tmp file for slots");
if (!rmtree(path, true))
{
ereport(WARNING,
diff --git a/src/include/replication/slot.h b/src/include/replication/slot.h
index fe62162cde3..7902d51781d 100644
--- a/src/include/replication/slot.h
+++ b/src/include/replication/slot.h
@@ -336,6 +336,7 @@ extern int ReplicationSlotIndex(ReplicationSlot *slot);
extern bool ReplicationSlotName(int index, Name name);
extern void ReplicationSlotNameForTablesync(Oid suboid, Oid relid, char *syncslotname, Size szslot);
extern void ReplicationSlotDropAtPubNode(WalReceiverConn *wrconn, char *slotname, bool missing_ok);
+extern void ResetSyncedSlots(void);
extern void StartupReplicationSlots(void);
extern void CheckPointReplicationSlots(bool is_shutdown);
diff --git a/src/test/recovery/t/040_standby_failover_slots_sync.pl b/src/test/recovery/t/040_standby_failover_slots_sync.pl
index 2c61c51e914..0f225aa09c1 100644
--- a/src/test/recovery/t/040_standby_failover_slots_sync.pl
+++ b/src/test/recovery/t/040_standby_failover_slots_sync.pl
@@ -932,13 +932,13 @@ my $standby1_conninfo = $standby1->connstr . ' dbname=postgres';
$subscriber1->safe_psql('postgres',
"ALTER SUBSCRIPTION regress_mysub1 CONNECTION '$standby1_conninfo';");
-# Confirm the synced slot 'lsub1_slot' is retained on the new primary
+# Confirm the synced slot 'lsub1_slot' is reset on the new primary
is( $standby1->safe_psql(
'postgres',
q{SELECT count(*) = 2 FROM pg_replication_slots WHERE slot_name IN ('lsub1_slot', 'snap_test_slot') AND synced AND NOT temporary;}
),
- 't',
- 'synced slot retained on the new primary');
+ 'f',
+ 'synced slot reset on the new primary');
# Commit the prepared transaction
$standby1->safe_psql('postgres', "COMMIT PREPARED 'test_twophase_slotsync';");
--
2.47.3