Thread

  1. [PATCH v1] Always persist to disk logical slots during a shutdown checkpoint.

    Julien Rouhaud <julien.rouhaud@free.fr> — 2023-04-14T05:49:09Z

    It's entirely possible for a logical slot to have a confirmed_flush_lsn higher
    than the last value saved on disk while not being marked as dirty.  It's
    currently not a problem to lose that value during a clean shutdown / restart
    cycle, but a later patch adding support for pg_upgrade of publications and
    logical slots will rely on that value being properly persisted to disk.
    
    Author: Julien Rouhaud
    Reviewed-by: FIXME
    Discussion: FIXME
    ---
     src/backend/access/transam/xlog.c |  2 +-
     src/backend/replication/slot.c    | 26 ++++++++++++++++----------
     src/include/replication/slot.h    |  2 +-
     3 files changed, 18 insertions(+), 12 deletions(-)
    
    diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
    index b540ee293b..8100ca656e 100644
    --- a/src/backend/access/transam/xlog.c
    +++ b/src/backend/access/transam/xlog.c
    @@ -7011,7 +7011,7 @@ static void
     CheckPointGuts(XLogRecPtr checkPointRedo, int flags)
     {
     	CheckPointRelationMap();
    -	CheckPointReplicationSlots();
    +	CheckPointReplicationSlots(flags & CHECKPOINT_IS_SHUTDOWN);
     	CheckPointSnapBuild();
     	CheckPointLogicalRewriteHeap();
     	CheckPointReplicationOrigin();
    diff --git a/src/backend/replication/slot.c b/src/backend/replication/slot.c
    index 8021aaa0a8..2bbf2af770 100644
    --- a/src/backend/replication/slot.c
    +++ b/src/backend/replication/slot.c
    @@ -109,7 +109,8 @@ static void ReplicationSlotDropPtr(ReplicationSlot *slot);
     /* internal persistency functions */
     static void RestoreSlotFromDisk(const char *name);
     static void CreateSlotOnDisk(ReplicationSlot *slot);
    -static void SaveSlotToPath(ReplicationSlot *slot, const char *dir, int elevel);
    +static void SaveSlotToPath(ReplicationSlot *slot, const char *dir, int elevel,
    +							bool is_shutdown);
     
     /*
      * Report shared-memory space needed by ReplicationSlotsShmemInit.
    @@ -783,7 +784,7 @@ ReplicationSlotSave(void)
     	Assert(MyReplicationSlot != NULL);
     
     	sprintf(path, "pg_replslot/%s", NameStr(MyReplicationSlot->data.name));
    -	SaveSlotToPath(MyReplicationSlot, path, ERROR);
    +	SaveSlotToPath(MyReplicationSlot, path, ERROR, false);
     }
     
     /*
    @@ -1565,11 +1566,10 @@ restart:
     /*
      * Flush all replication slots to disk.
      *
    - * This needn't actually be part of a checkpoint, but it's a convenient
    - * location.
    + * is_shutdown is true in case of a shutdown checkpoint.
      */
     void
    -CheckPointReplicationSlots(void)
    +CheckPointReplicationSlots(bool is_shutdown)
     {
     	int			i;
     
    @@ -1594,7 +1594,7 @@ CheckPointReplicationSlots(void)
     
     		/* save the slot to disk, locking is handled in SaveSlotToPath() */
     		sprintf(path, "pg_replslot/%s", NameStr(s->data.name));
    -		SaveSlotToPath(s, path, LOG);
    +		SaveSlotToPath(s, path, LOG, is_shutdown);
     	}
     	LWLockRelease(ReplicationSlotAllocationLock);
     }
    @@ -1700,7 +1700,7 @@ CreateSlotOnDisk(ReplicationSlot *slot)
     
     	/* Write the actual state file. */
     	slot->dirty = true;			/* signal that we really need to write */
    -	SaveSlotToPath(slot, tmppath, ERROR);
    +	SaveSlotToPath(slot, tmppath, ERROR, false);
     
     	/* Rename the directory into place. */
     	if (rename(tmppath, path) != 0)
    @@ -1726,7 +1726,8 @@ CreateSlotOnDisk(ReplicationSlot *slot)
      * Shared functionality between saving and creating a replication slot.
      */
     static void
    -SaveSlotToPath(ReplicationSlot *slot, const char *dir, int elevel)
    +SaveSlotToPath(ReplicationSlot *slot, const char *dir, int elevel,
    +			   bool is_shutdown)
     {
     	char		tmppath[MAXPGPATH];
     	char		path[MAXPGPATH];
    @@ -1740,8 +1741,13 @@ SaveSlotToPath(ReplicationSlot *slot, const char *dir, int elevel)
     	slot->just_dirtied = false;
     	SpinLockRelease(&slot->mutex);
     
    -	/* and don't do anything if there's nothing to write */
    -	if (!was_dirty)
    +	/*
    +	 * and don't do anything if there's nothing to write, unless it's this is
    +	 * called for a logical slot during a shutdown checkpoint, as we want to
    +	 * persist the confirmed_flush_lsn in that case, even if that's the only
    +	 * modification.
    +	 */
    +	if (!was_dirty && !is_shutdown && !SlotIsLogical(slot))
     		return;
     
     	LWLockAcquire(&slot->io_in_progress_lock, LW_EXCLUSIVE);
    diff --git a/src/include/replication/slot.h b/src/include/replication/slot.h
    index a8a89dc784..7ca37c9f70 100644
    --- a/src/include/replication/slot.h
    +++ b/src/include/replication/slot.h
    @@ -241,7 +241,7 @@ extern void ReplicationSlotNameForTablesync(Oid suboid, Oid relid, char *syncslo
     extern void ReplicationSlotDropAtPubNode(WalReceiverConn *wrconn, char *slotname, bool missing_ok);
     
     extern void StartupReplicationSlots(void);
    -extern void CheckPointReplicationSlots(void);
    +extern void CheckPointReplicationSlots(bool is_shutdown);
     
     extern void CheckSlotRequirements(void);
     extern void CheckSlotPermissions(void);
    -- 
    2.37.0
    
    
    --bnsyjlt7tkak6hhc--