[PATCH v1] Always persist to disk logical slots during a shutdown checkpoint.
Julien Rouhaud <julien.rouhaud@free.fr>
From: Julien Rouhaud <julien.rouhaud@free.fr>
To:
Date: 2023-04-14T05:49:09Z
Lists: pgsql-hackers
It's entirely possible for a logical slot to have a confirmed_flush_lsn higher
than the last value saved on disk while not being marked as dirty. It's
currently not a problem to lose that value during a clean shutdown / restart
cycle, but a later patch adding support for pg_upgrade of publications and
logical slots will rely on that value being properly persisted to disk.
Author: Julien Rouhaud
Reviewed-by: FIXME
Discussion: FIXME
---
src/backend/access/transam/xlog.c | 2 +-
src/backend/replication/slot.c | 26 ++++++++++++++++----------
src/include/replication/slot.h | 2 +-
3 files changed, 18 insertions(+), 12 deletions(-)
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index b540ee293b..8100ca656e 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -7011,7 +7011,7 @@ static void
CheckPointGuts(XLogRecPtr checkPointRedo, int flags)
{
CheckPointRelationMap();
- CheckPointReplicationSlots();
+ CheckPointReplicationSlots(flags & CHECKPOINT_IS_SHUTDOWN);
CheckPointSnapBuild();
CheckPointLogicalRewriteHeap();
CheckPointReplicationOrigin();
diff --git a/src/backend/replication/slot.c b/src/backend/replication/slot.c
index 8021aaa0a8..2bbf2af770 100644
--- a/src/backend/replication/slot.c
+++ b/src/backend/replication/slot.c
@@ -109,7 +109,8 @@ static void ReplicationSlotDropPtr(ReplicationSlot *slot);
/* internal persistency functions */
static void RestoreSlotFromDisk(const char *name);
static void CreateSlotOnDisk(ReplicationSlot *slot);
-static void SaveSlotToPath(ReplicationSlot *slot, const char *dir, int elevel);
+static void SaveSlotToPath(ReplicationSlot *slot, const char *dir, int elevel,
+ bool is_shutdown);
/*
* Report shared-memory space needed by ReplicationSlotsShmemInit.
@@ -783,7 +784,7 @@ ReplicationSlotSave(void)
Assert(MyReplicationSlot != NULL);
sprintf(path, "pg_replslot/%s", NameStr(MyReplicationSlot->data.name));
- SaveSlotToPath(MyReplicationSlot, path, ERROR);
+ SaveSlotToPath(MyReplicationSlot, path, ERROR, false);
}
/*
@@ -1565,11 +1566,10 @@ restart:
/*
* Flush all replication slots to disk.
*
- * This needn't actually be part of a checkpoint, but it's a convenient
- * location.
+ * is_shutdown is true in case of a shutdown checkpoint.
*/
void
-CheckPointReplicationSlots(void)
+CheckPointReplicationSlots(bool is_shutdown)
{
int i;
@@ -1594,7 +1594,7 @@ CheckPointReplicationSlots(void)
/* save the slot to disk, locking is handled in SaveSlotToPath() */
sprintf(path, "pg_replslot/%s", NameStr(s->data.name));
- SaveSlotToPath(s, path, LOG);
+ SaveSlotToPath(s, path, LOG, is_shutdown);
}
LWLockRelease(ReplicationSlotAllocationLock);
}
@@ -1700,7 +1700,7 @@ CreateSlotOnDisk(ReplicationSlot *slot)
/* Write the actual state file. */
slot->dirty = true; /* signal that we really need to write */
- SaveSlotToPath(slot, tmppath, ERROR);
+ SaveSlotToPath(slot, tmppath, ERROR, false);
/* Rename the directory into place. */
if (rename(tmppath, path) != 0)
@@ -1726,7 +1726,8 @@ CreateSlotOnDisk(ReplicationSlot *slot)
* Shared functionality between saving and creating a replication slot.
*/
static void
-SaveSlotToPath(ReplicationSlot *slot, const char *dir, int elevel)
+SaveSlotToPath(ReplicationSlot *slot, const char *dir, int elevel,
+ bool is_shutdown)
{
char tmppath[MAXPGPATH];
char path[MAXPGPATH];
@@ -1740,8 +1741,13 @@ SaveSlotToPath(ReplicationSlot *slot, const char *dir, int elevel)
slot->just_dirtied = false;
SpinLockRelease(&slot->mutex);
- /* and don't do anything if there's nothing to write */
- if (!was_dirty)
+ /*
+ * and don't do anything if there's nothing to write, unless it's this is
+ * called for a logical slot during a shutdown checkpoint, as we want to
+ * persist the confirmed_flush_lsn in that case, even if that's the only
+ * modification.
+ */
+ if (!was_dirty && !is_shutdown && !SlotIsLogical(slot))
return;
LWLockAcquire(&slot->io_in_progress_lock, LW_EXCLUSIVE);
diff --git a/src/include/replication/slot.h b/src/include/replication/slot.h
index a8a89dc784..7ca37c9f70 100644
--- a/src/include/replication/slot.h
+++ b/src/include/replication/slot.h
@@ -241,7 +241,7 @@ extern void ReplicationSlotNameForTablesync(Oid suboid, Oid relid, char *syncslo
extern void ReplicationSlotDropAtPubNode(WalReceiverConn *wrconn, char *slotname, bool missing_ok);
extern void StartupReplicationSlots(void);
-extern void CheckPointReplicationSlots(void);
+extern void CheckPointReplicationSlots(bool is_shutdown);
extern void CheckSlotRequirements(void);
extern void CheckSlotPermissions(void);
--
2.37.0
--bnsyjlt7tkak6hhc--