v10-0002-Add-slotsync_skip_reason-to-pg_replication_slots.patch
application/octet-stream
Filename: v10-0002-Add-slotsync_skip_reason-to-pg_replication_slots.patch
Type: application/octet-stream
Part: 1
From a7c6526eb30b1f5a47f210f950b3876d7b536ac9 Mon Sep 17 00:00:00 2001
From: Shlok Kyal <shlok.kyal.oss@gmail.com>
Date: Fri, 21 Nov 2025 14:39:47 +0530
Subject: [PATCH v10 2/2] Add slotsync_skip_reason to pg_replication_slots
This patch introduces a new column slotsync_skip_reson to
pg_replication_slots view. This indicates the reason for last slot
synchronization skip.
---
doc/src/sgml/system-views.sgml | 49 +++++++++++++++++++
src/backend/catalog/system_views.sql | 3 +-
src/backend/replication/logical/slotsync.c | 44 +++++++++++++++--
src/backend/replication/slot.c | 1 +
src/backend/replication/slotfuncs.c | 28 ++++++++++-
src/include/catalog/pg_proc.dat | 6 +--
src/include/replication/slot.h | 26 ++++++++++
.../recovery/t/050_slotsync_skip_stats.pl | 33 ++++++++++++-
src/test/regress/expected/rules.out | 5 +-
src/tools/pgindent/typedefs.list | 1 +
10 files changed, 183 insertions(+), 13 deletions(-)
diff --git a/doc/src/sgml/system-views.sgml b/doc/src/sgml/system-views.sgml
index 7971498fe75..ec797d2d916 100644
--- a/doc/src/sgml/system-views.sgml
+++ b/doc/src/sgml/system-views.sgml
@@ -3102,6 +3102,55 @@ SELECT * FROM pg_locks pl LEFT JOIN pg_prepared_xacts ppx
</para></entry>
</row>
+ <row>
+ <entry role="catalog_table_entry"><para role="column_definition">
+ <structfield>slotsync_skip_reason</structfield><type>text</type>
+ </para>
+ <para>
+ The reason for the last slot synchronization skip. This field is set only
+ for logical slots that are being synchronized from a primary server (that
+ is, those whose <structfield>synced</structfield> field is
+ <literal>true</literal>). The value of this column has no meaning on the
+ primary server; it defaults to <literal>none</literal> for all slots, but
+ may (if leftover from a promoted standby) also have a value other than
+ <literal>none</literal>. Possible values are:
+ <itemizedlist spacing="compact">
+ <listitem>
+ <para>
+ <literal>none</literal> means that the last slot synchronization
+ completed successfully.
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ <literal>remote_behind</literal> means that the last slot
+ synchronization was skipped because the slot is ahead of the
+ corresponding failover slot on the primary.
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ <literal>wal_not_flushed</literal> means that the last slot
+ synchronization was skipped because the standby had not flushed the
+ WAL corresponding to the confirmed flush position on the remote slot.
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ <literal>no_consistent_snapshot</literal> means that the last slot
+ synchronization was skipped because the standby could not build a
+ consistent snapshot.
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ <literal>slot_invalidated</literal> means that the last slot
+ synchronization was skipped because the slot is invalidated.
+ </para>
+ </listitem>
+ </itemizedlist>
+ </para></entry>
+ </row>
</tbody>
</tgroup>
</table>
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index c77a5d15a2e..1445ac5a78c 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -1060,7 +1060,8 @@ CREATE VIEW pg_replication_slots AS
L.conflicting,
L.invalidation_reason,
L.failover,
- L.synced
+ L.synced,
+ L.slotsync_skip_reason
FROM pg_get_replication_slots() AS L
LEFT JOIN pg_database D ON (L.datoid = D.oid);
diff --git a/src/backend/replication/logical/slotsync.c b/src/backend/replication/logical/slotsync.c
index 052117f0481..83cc37239c7 100644
--- a/src/backend/replication/logical/slotsync.c
+++ b/src/backend/replication/logical/slotsync.c
@@ -149,6 +149,35 @@ typedef struct RemoteSlot
static void slotsync_failure_callback(int code, Datum arg);
static void update_synced_slots_inactive_since(void);
+/*
+ * Update slot sync skip stats. This function requires the caller to acquire
+ * the slot.
+ */
+static void
+update_slotsync_skip_stats(SlotSyncSkipReason skip_reason)
+{
+ ReplicationSlot *slot;
+
+ Assert(MyReplicationSlot);
+
+ slot = MyReplicationSlot;
+
+ /*
+ * Update the slot sync related stats in pg_stat_replication_slot when a
+ * slot sync is skipped
+ */
+ if (skip_reason != SS_SKIP_NONE)
+ pgstat_report_replslotsync_skip(slot);
+
+ /* Update the slot sync skip reason */
+ if (slot->slotsync_skip_reason != skip_reason)
+ {
+ SpinLockAcquire(&slot->mutex);
+ slot->slotsync_skip_reason = skip_reason;
+ SpinLockRelease(&slot->mutex);
+ }
+}
+
/*
* If necessary, update the local synced slot's metadata based on the data
* from the remote slot.
@@ -171,6 +200,7 @@ update_local_synced_slot(RemoteSlot *remote_slot, Oid remote_dbid,
ReplicationSlot *slot = MyReplicationSlot;
bool updated_xmin_or_lsn = false;
bool updated_config = false;
+ SlotSyncSkipReason skip_reason = SS_SKIP_NONE;
Assert(slot->data.invalidated == RS_INVAL_NONE);
@@ -220,7 +250,7 @@ update_local_synced_slot(RemoteSlot *remote_slot, Oid remote_dbid,
slot->data.catalog_xmin));
/* Update slot sync skip stats */
- pgstat_report_replslotsync_skip(slot);
+ update_slotsync_skip_stats(SS_SKIP_REMOTE_BEHIND);
if (remote_slot_precedes)
*remote_slot_precedes = true;
@@ -291,12 +321,15 @@ update_local_synced_slot(RemoteSlot *remote_slot, Oid remote_dbid,
* indicating slot synchronization is successful.
*/
if (found_consistent_snapshot && !(*found_consistent_snapshot))
- pgstat_report_replslotsync_skip(slot);
+ skip_reason = SS_SKIP_NO_CONSISTENT_SNAPSHOT;
}
updated_xmin_or_lsn = true;
}
+ /* Update slot sync skip stats */
+ update_slotsync_skip_stats(skip_reason);
+
if (remote_dbid != slot->data.database ||
remote_slot->two_phase != slot->data.two_phase ||
remote_slot->failover != slot->data.failover ||
@@ -673,7 +706,10 @@ synchronize_one_slot(RemoteSlot *remote_slot, Oid remote_dbid)
{
ReplicationSlotAcquire(NameStr(slot->data.name), true, false);
- pgstat_report_replslotsync_skip(slot);
+ if (slot->data.invalidated == RS_INVAL_NONE)
+ update_slotsync_skip_stats(SS_SKIP_WAL_NOT_FLUSHED);
+ else
+ update_slotsync_skip_stats(SS_SKIP_INVALID);
ReplicationSlotRelease();
}
@@ -752,7 +788,7 @@ synchronize_one_slot(RemoteSlot *remote_slot, Oid remote_dbid)
/* Skip the sync of an invalidated slot */
if (slot->data.invalidated != RS_INVAL_NONE)
{
- pgstat_report_replslotsync_skip(slot);
+ update_slotsync_skip_stats(SS_SKIP_INVALID);
ReplicationSlotRelease();
return slot_updated;
diff --git a/src/backend/replication/slot.c b/src/backend/replication/slot.c
index 1ec1e997b27..86ae99a3ca9 100644
--- a/src/backend/replication/slot.c
+++ b/src/backend/replication/slot.c
@@ -491,6 +491,7 @@ ReplicationSlotCreate(const char *name, bool db_specific,
slot->last_saved_confirmed_flush = InvalidXLogRecPtr;
slot->last_saved_restart_lsn = InvalidXLogRecPtr;
slot->inactive_since = 0;
+ slot->slotsync_skip_reason = SS_SKIP_NONE;
/*
* Create the slot on disk. We haven't actually marked the slot allocated
diff --git a/src/backend/replication/slotfuncs.c b/src/backend/replication/slotfuncs.c
index 0478fc9c977..7200c7f071d 100644
--- a/src/backend/replication/slotfuncs.c
+++ b/src/backend/replication/slotfuncs.c
@@ -228,6 +228,30 @@ pg_drop_replication_slot(PG_FUNCTION_ARGS)
PG_RETURN_VOID();
}
+/*
+ * Map a SlotSyncSkipReason enum to a human-readable string
+ */
+static char *
+GetSlotSyncSkipReason(SlotSyncSkipReason reason)
+{
+ switch (reason)
+ {
+ case SS_SKIP_NONE:
+ return "none";
+ case SS_SKIP_REMOTE_BEHIND:
+ return "remote_behind";
+ case SS_SKIP_WAL_NOT_FLUSHED:
+ return "wal_not_flushed";
+ case SS_SKIP_NO_CONSISTENT_SNAPSHOT:
+ return "no_consistent_snapshot";
+ case SS_SKIP_INVALID:
+ return "slot_invalidated";
+ }
+
+ Assert(false);
+ return "none";
+}
+
/*
* pg_get_replication_slots - SQL SRF showing all replication slots
* that currently exist on the database cluster.
@@ -235,7 +259,7 @@ pg_drop_replication_slot(PG_FUNCTION_ARGS)
Datum
pg_get_replication_slots(PG_FUNCTION_ARGS)
{
-#define PG_GET_REPLICATION_SLOTS_COLS 20
+#define PG_GET_REPLICATION_SLOTS_COLS 21
ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
XLogRecPtr currlsn;
int slotno;
@@ -443,6 +467,8 @@ pg_get_replication_slots(PG_FUNCTION_ARGS)
values[i++] = BoolGetDatum(slot_contents.data.synced);
+ values[i++] = CStringGetTextDatum(GetSlotSyncSkipReason(slot_contents.slotsync_skip_reason));
+
Assert(i == PG_GET_REPLICATION_SLOTS_COLS);
tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc,
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index b10809ba9b6..4205d565df3 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -11511,9 +11511,9 @@
proname => 'pg_get_replication_slots', prorows => '10', proisstrict => 'f',
proretset => 't', provolatile => 's', prorettype => 'record',
proargtypes => '',
- proallargtypes => '{name,name,text,oid,bool,bool,int4,xid,xid,pg_lsn,pg_lsn,text,int8,bool,pg_lsn,timestamptz,bool,text,bool,bool}',
- proargmodes => '{o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o}',
- proargnames => '{slot_name,plugin,slot_type,datoid,temporary,active,active_pid,xmin,catalog_xmin,restart_lsn,confirmed_flush_lsn,wal_status,safe_wal_size,two_phase,two_phase_at,inactive_since,conflicting,invalidation_reason,failover,synced}',
+ proallargtypes => '{name,name,text,oid,bool,bool,int4,xid,xid,pg_lsn,pg_lsn,text,int8,bool,pg_lsn,timestamptz,bool,text,bool,bool,text}',
+ proargmodes => '{o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o}',
+ proargnames => '{slot_name,plugin,slot_type,datoid,temporary,active,active_pid,xmin,catalog_xmin,restart_lsn,confirmed_flush_lsn,wal_status,safe_wal_size,two_phase,two_phase_at,inactive_since,conflicting,invalidation_reason,failover,synced,slotsync_skip_reason}',
prosrc => 'pg_get_replication_slots' },
{ oid => '3786', descr => 'set up a logical replication slot',
proname => 'pg_create_logical_replication_slot', provolatile => 'v',
diff --git a/src/include/replication/slot.h b/src/include/replication/slot.h
index 09c69f83d57..054e81b6c43 100644
--- a/src/include/replication/slot.h
+++ b/src/include/replication/slot.h
@@ -71,6 +71,22 @@ typedef enum ReplicationSlotInvalidationCause
/* Maximum number of invalidation causes */
#define RS_INVAL_MAX_CAUSES 4
+/*
+ * When slot sync worker is running or pg_sync_replication_slots is run, the
+ * slot sync can be skipped. This enum keeps a list of reasons of slot sync
+ * skip.
+ */
+typedef enum SlotSyncSkipReason
+{
+ SS_SKIP_NONE, /* No skip */
+ SS_SKIP_WAL_NOT_FLUSHED, /* Standby did not flush the wal corresponding
+ * to confirmed flush of remote slot */
+ SS_SKIP_REMOTE_BEHIND, /* Remote slot is behind the local slot */
+ SS_SKIP_NO_CONSISTENT_SNAPSHOT, /* Standby could not build a consistent
+ * snapshot */
+ SS_SKIP_INVALID /* Local slot is invalid */
+} SlotSyncSkipReason;
+
/*
* On-Disk data of a replication slot, preserved across restarts.
*/
@@ -249,6 +265,16 @@ typedef struct ReplicationSlot
*/
XLogRecPtr last_saved_restart_lsn;
+ /*
+ * The reason for last slot sync skip.
+ *
+ * A slotsync skip typically occurs only for temporary slots. For
+ * persistent slots it is extremely rare (e.g., cases like
+ * SS_SKIP_WAL_NOT_FLUSHED or SS_SKIP_REMOTE_BEHIND). Also, temporary
+ * slots are dropped after server restart, so there is no value in
+ * persisting the slotsync_skip_reason.
+ */
+ SlotSyncSkipReason slotsync_skip_reason;
} ReplicationSlot;
#define SlotIsPhysical(slot) ((slot)->data.database == InvalidOid)
diff --git a/src/test/recovery/t/050_slotsync_skip_stats.pl b/src/test/recovery/t/050_slotsync_skip_stats.pl
index 39ce9ef702b..59b512bc116 100644
--- a/src/test/recovery/t/050_slotsync_skip_stats.pl
+++ b/src/test/recovery/t/050_slotsync_skip_stats.pl
@@ -66,7 +66,13 @@ $primary->wait_for_replay_catchup($standby);
$standby->safe_psql('postgres', "SELECT pg_sync_replication_slots();");
# Verify slot is synced successfully
-my $result = $standby->safe_psql('postgres',
+my $result = $standby->safe_psql(
+ 'postgres',
+ "SELECT slotsync_skip_reason FROM pg_replication_slots
+ WHERE slot_name = 'slot_sync' AND synced"
+);
+is($result, 'none', "slot sync reason is none");
+$result = $standby->safe_psql('postgres',
"SELECT slotsync_skip_count FROM pg_stat_replication_slots WHERE slot_name = 'slot_sync'"
);
is($result, '0', "check slot sync skip count after initial sync");
@@ -102,7 +108,14 @@ ok( $stderr =~
qr/skipping slot synchronization because the received slot sync.*is ahead of the standby position/,
'pg_sync_replication_slots failed as expected');
-# Check slot sync skip count when standby is behind
+# Check skip reason and count when standby is behind
+$result = $standby->safe_psql(
+ 'postgres',
+ "SELECT slotsync_skip_reason FROM pg_replication_slots
+ WHERE slot_name = 'slot_sync' AND synced AND NOT temporary"
+);
+is($result, 'wal_not_flushed', "slot sync skip when standby is behind");
+
$result = $standby->safe_psql('postgres',
"SELECT slotsync_skip_count FROM pg_stat_replication_slots WHERE slot_name = 'slot_sync'"
);
@@ -129,6 +142,15 @@ $primary->restart;
# Wait for standby to catch up
$primary->wait_for_replay_catchup($standby);
+# Check that skip reason is reset after successful sync
+$standby->safe_psql('postgres', "SELECT pg_sync_replication_slots();");
+$result = $standby->safe_psql(
+ 'postgres',
+ "SELECT slotsync_skip_reason FROM pg_replication_slots
+ WHERE slot_name = 'slot_sync' AND synced AND NOT temporary"
+);
+is($result, 'none', "slotsync_skip_reason is reset after successful sync");
+
# Cleanup: drop the logical slot and ensure standby catches up
$primary->safe_psql('postgres',
"SELECT pg_drop_replication_slot('slot_sync')");
@@ -159,6 +181,13 @@ select pg_sync_replication_slots();
$standby->wait_for_event('client backend', 'slot-sync-skip');
# Logical slot is temporary and sync will skip because remote is behind
+$result = $standby->safe_psql(
+ 'postgres',
+ "SELECT slotsync_skip_reason FROM pg_replication_slots
+ WHERE slot_name = 'slot_sync' AND synced AND temporary"
+);
+is($result, 'remote_behind', "slot sync skip as remote is behind");
+
$result = $standby->safe_psql('postgres',
"SELECT slotsync_skip_count FROM pg_stat_replication_slots WHERE slot_name = 'slot_sync'"
);
diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out
index adda7f425e2..feac3e4c089 100644
--- a/src/test/regress/expected/rules.out
+++ b/src/test/regress/expected/rules.out
@@ -1507,8 +1507,9 @@ pg_replication_slots| SELECT l.slot_name,
l.conflicting,
l.invalidation_reason,
l.failover,
- l.synced
- FROM (pg_get_replication_slots() l(slot_name, plugin, slot_type, datoid, temporary, active, active_pid, xmin, catalog_xmin, restart_lsn, confirmed_flush_lsn, wal_status, safe_wal_size, two_phase, two_phase_at, inactive_since, conflicting, invalidation_reason, failover, synced)
+ l.synced,
+ l.slotsync_skip_reason
+ FROM (pg_get_replication_slots() l(slot_name, plugin, slot_type, datoid, temporary, active, active_pid, xmin, catalog_xmin, restart_lsn, confirmed_flush_lsn, wal_status, safe_wal_size, two_phase, two_phase_at, inactive_since, conflicting, invalidation_reason, failover, synced, slotsync_skip_reason)
LEFT JOIN pg_database d ON ((l.datoid = d.oid)));
pg_roles| SELECT pg_authid.rolname,
pg_authid.rolsuper,
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 57f2a9ccdc5..435927e5638 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -2802,6 +2802,7 @@ SlabSlot
SlotInvalidationCauseMap
SlotNumber
SlotSyncCtxStruct
+SlotSyncSkipReason
SlruCtl
SlruCtlData
SlruErrorCause
--
2.34.1