0001-pg_rewind-Skip-false-positive-rewind-on-benign-shutd.patch

text/x-patch

Filename: 0001-pg_rewind-Skip-false-positive-rewind-on-benign-shutd.patch
Type: text/x-patch
Part: 0
Message: Re: [PATCH] Fix pg_rewind false positives caused by shutdown-only WAL

Patch

Same data as JSON: GET /api/v1/attachments/:id/patch the parsed metadata as JSON — format, series position, per-file stats; never the diff bytes. API reference →
Format: format-patch
Series: patch 0001
Subject: pg_rewind: Skip false-positive rewind on benign shutdown checkpoint difference
File+
src/bin/pg_rewind/parsexlog.c 79 8
src/bin/pg_rewind/pg_rewind.c 103 15
src/bin/pg_rewind/pg_rewind.h 11 1
From 83b32b56adb17fb15ebe6288acfbf57811d724dc Mon Sep 17 00:00:00 2001
From: BharatDBPG <bharatdbpg@gmail.com>
Date: Wed, 22 Oct 2025 17:18:02 +0530
Subject: [PATCH] pg_rewind: Skip false-positive rewind on benign shutdown
 checkpoint difference

Signed-off-by: BharatDBPG <bharatdbpg@gmail.com>
---
 src/bin/pg_rewind/parsexlog.c |  87 ++++++++++++++++++++++---
 src/bin/pg_rewind/pg_rewind.c | 118 +++++++++++++++++++++++++++++-----
 src/bin/pg_rewind/pg_rewind.h |  12 +++-
 3 files changed, 193 insertions(+), 24 deletions(-)

diff --git a/src/bin/pg_rewind/parsexlog.c b/src/bin/pg_rewind/parsexlog.c
index 8f4b282c6b..110ab17552 100644
--- a/src/bin/pg_rewind/parsexlog.c
+++ b/src/bin/pg_rewind/parsexlog.c
@@ -23,6 +23,7 @@
 #include "fe_utils/archive.h"
 #include "filemap.h"
 #include "pg_rewind.h"
+#include "storage/standbydefs.h"
 
 /*
  * RmgrNames is an array of the built-in resource manager names, to make error
@@ -50,6 +51,10 @@ typedef struct XLogPageReadPrivate
 	int			tliIndex;
 } XLogPageReadPrivate;
 
+static ControlFileData ControlFile_target;
+static ControlFileData ControlFile_source;
+
+
 static int	SimpleXLogPageRead(XLogReaderState *xlogreader,
 							   XLogRecPtr targetPagePtr,
 							   int reqLen, XLogRecPtr targetRecPtr, char *readBuf);
@@ -161,10 +166,71 @@ readOneRecord(const char *datadir, XLogRecPtr ptr, int tliIndex,
 	return endptr;
 }
 
+/*
+ * is_shutdown_only_sequence(xlogreader, endptr)
+ *
+ * An XLogReaderState positioned at the end-of-wal (endptr),
+ * walk backwards a small number of records to determine whether the
+ * tail consists solely of zero-or-more RUNNING_XACTS (RM_STANDBY_ID)
+ * optionally followed by a single CHECKPOINT_SHUTDOWN (RM_XLOG_ID).
+ *
+ * Returns true if the pattern matches and there are no intervening
+ * meaningful records.
+ */
+
+static bool
+is_shutdown_only_sequence(XLogReaderState *xlogreader, XLogRecPtr endptr)
+{
+    XLogRecPtr  searchptr = endptr;
+    int         seen_running_xacts = 0;
+    int         max_steps = 8;
+
+    while (max_steps-- > 0)
+    {
+        char       *errormsg;
+        XLogRecord *record;
+
+        XLogBeginRead(xlogreader, searchptr);
+        record = XLogReadRecord(xlogreader, &errormsg);
+
+        if (record == NULL)
+            return false;
+
+        /* fetch RMID and info */
+		uint8 rmid;
+		uint8 info;
+        rmid = XLogRecGetRmid(xlogreader);
+        info = XLogRecGetInfo(xlogreader) & ~XLR_INFO_MASK;
+
+        if (rmid == RM_STANDBY_ID && info == XLOG_RUNNING_XACTS)
+        {
+            seen_running_xacts++;
+            searchptr = record->xl_prev;
+            continue;
+        }
+
+        if (rmid == RM_XLOG_ID &&
+            (info == XLOG_CHECKPOINT_SHUTDOWN || info == XLOG_CHECKPOINT_ONLINE))
+        {
+            /* Only shutdown checkpoints with preceding RUNNING_XACTS allowed */
+            if (info == XLOG_CHECKPOINT_SHUTDOWN)
+                return true;
+            else
+                return false; /* online checkpoint → meaningful */
+        }
+
+        /* any other record type means there were changes */
+        return false;
+    }
+
+    /* we hit step cap without decisive answer → play safe */
+    return false;
+}
+
 /*
  * Find the previous checkpoint preceding given WAL location.
  */
-void
+WalEndStat
 findLastCheckpoint(const char *datadir, XLogRecPtr forkptr, int tliIndex,
 				   XLogRecPtr *lastchkptrec, TimeLineID *lastchkpttli,
 				   XLogRecPtr *lastchkptredo, const char *restoreCommand)
@@ -210,13 +276,18 @@ findLastCheckpoint(const char *datadir, XLogRecPtr forkptr, int tliIndex,
 
 		if (record == NULL)
 		{
-			if (errormsg)
-				pg_fatal("could not find previous WAL record at %X/%08X: %s",
-						 LSN_FORMAT_ARGS(searchptr),
-						 errormsg);
-			else
-				pg_fatal("could not find previous WAL record at %X/%08X",
-						 LSN_FORMAT_ARGS(searchptr));
+    		/* Check for benign shutdown checkpoint difference */
+    		if (control_diff_is_benign(&ControlFile_source, &ControlFile_target))
+    		{
+        		fprintf(stderr,
+                		"pg_rewind: benign shutdown checkpoint difference detected, skipping rewind\n");
+        		return InvalidXLogRecPtr;  /* or other appropriate return to skip rewind */
+    		}
+
+    		/* Otherwise, fatal error */
+    		pg_fatal("could not find previous WAL record at %X/%X: %s",
+             LSN_FORMAT_ARGS(searchptr),
+             errormsg ? errormsg : "unknown error");
 		}
 
 		/* Detect if a new WAL file has been opened */
diff --git a/src/bin/pg_rewind/pg_rewind.c b/src/bin/pg_rewind/pg_rewind.c
index 0c68dd4235..d2379e4169 100644
--- a/src/bin/pg_rewind/pg_rewind.c
+++ b/src/bin/pg_rewind/pg_rewind.c
@@ -116,6 +116,44 @@ usage(const char *progname)
 	printf(_("%s home page: <%s>\n"), PACKAGE_NAME, PACKAGE_URL);
 }
 
+/*
+ * Compare two ControlFileData structs.
+ * Return true if the differences are safe to ignore (benign),
+ * false if they are significant.
+ *
+ */
+bool
+control_diff_is_benign(void *src, void *tgt)
+{
+    ControlFileData *src_cf = (ControlFileData *) src;
+    ControlFileData *tgt_cf = (ControlFileData *) tgt;
+
+    /* 1. Cluster identity must always match */
+    if (src_cf->system_identifier != tgt_cf->system_identifier)
+        return false;
+
+    /* 2. Check timeline consistency */
+    if (src_cf->checkPointCopy.ThisTimeLineID != tgt_cf->checkPointCopy.ThisTimeLineID)
+        return false;
+
+    /* 3. Transaction ID progression */
+    if (src_cf->checkPointCopy.nextXid.value != tgt_cf->checkPointCopy.nextXid.value)
+        return false;
+
+    /* 4. OID progression */
+    if (src_cf->checkPointCopy.nextOid != tgt_cf->checkPointCopy.nextOid)
+        return false;
+
+    /* 5. MultiXact progression */
+    if (src_cf->checkPointCopy.nextMulti != tgt_cf->checkPointCopy.nextMulti)
+        return false;
+
+    /* 6. Checkpoint LSN should not go backwards */
+    if (src_cf->checkPoint < tgt_cf->checkPoint)
+        return false;
+
+    return true;
+}
 
 int
 main(int argc, char **argv)
@@ -429,21 +467,71 @@ main(int argc, char **argv)
 		}
 
 		/*
-		 * Check for the possibility that the target is in fact a direct
-		 * ancestor of the source. In that case, there is no divergent history
-		 * in the target that needs rewinding.
-		 */
-		if (target_wal_endrec > divergerec)
-		{
-			rewind_needed = true;
-		}
-		else
-		{
-			/* the last common checkpoint record must be part of target WAL */
-			Assert(target_wal_endrec == divergerec);
-
-			rewind_needed = false;
-		}
+      	 * Conservative check: determine whether we can safely skip rewind
+    	 * when the target's WAL tail only contains shutdown-only records.
+     	 */
+		
+    	WalEndStat end_status;
+		end_status = findLastCheckpoint(datadir_target, target_wal_endrec, lastcommontliIndex,
+                                    &chkptrec, &chkpttli, &chkptredo,
+                                    restore_command);
+
+    	/* Initialize safety flags and controlfile pointers */
+    	bool src_crc_ok = false;
+    	bool tgt_crc_ok = false;
+    	ControlFileData *src_ctrl = NULL;
+    	ControlFileData *tgt_ctrl = NULL;
+
+    	if (end_status == WAL_END_SHUTDOWN_ONLY)
+    	{
+        	pg_log_info("benign control file difference detected; skipping rewind");
+    		exit(0);
+
+#ifdef USE_LIBPQ
+        	if (connstr_source)
+        	{
+            	/* If fetching remotely (pg_rewind --source-server=...) */
+            	src_ctrl = get_remote_controlfile(&src_crc_ok);
+        	}
+        	else
+#endif
+        	{
+            	/* Local source cluster */
+            	src_ctrl = get_controlfile(datadir_source, &src_crc_ok);
+        	}
+
+        	/* Target control file is always local */
+        	tgt_ctrl = get_controlfile(datadir_target, &tgt_crc_ok);
+
+        	if (!src_ctrl || !tgt_ctrl)
+        	{
+            	pg_log_warning("could not read one or both control files; conservatively requiring rewind");
+            	rewind_needed = true;
+        	}
+        	else if (control_diff_is_benign(src_ctrl, tgt_ctrl))
+        	{
+            	pg_log_info("only benign shutdown control differences found; skipping rewind");
+            	rewind_needed = false;
+        	}
+        	else
+        	{
+            	pg_log_info("control file differences not benign; rewind required");
+            	rewind_needed = true;
+        	}
+
+        	/* free() if your get_controlfile allocates new structs (optional) */
+        	/* free(src_ctrl); free(tgt_ctrl); */
+    	}
+    	else if (end_status == WAL_END_MEANINGFUL)
+    	{
+        	/* Normal case: WAL tail contains meaningful changes */
+        	rewind_needed = (target_wal_endrec > divergerec);
+    	}
+    	else
+    	{
+        	/* Unknown or empty WAL end; play safe */
+        	rewind_needed = true;
+    	}
 	}
 
 	if (!rewind_needed)
diff --git a/src/bin/pg_rewind/pg_rewind.h b/src/bin/pg_rewind/pg_rewind.h
index 9cea144d2b..94b75ea9bf 100644
--- a/src/bin/pg_rewind/pg_rewind.h
+++ b/src/bin/pg_rewind/pg_rewind.h
@@ -35,7 +35,15 @@ extern uint64 fetch_done;
 extern void extractPageMap(const char *datadir, XLogRecPtr startpoint,
 						   int tliIndex, XLogRecPtr endpoint,
 						   const char *restoreCommand);
-extern void findLastCheckpoint(const char *datadir, XLogRecPtr forkptr,
+
+typedef enum WalEndStat
+{
+    WAL_END_MEANINGFUL,
+    WAL_END_SHUTDOWN_ONLY,
+	WAL_END_EMPTY
+} WalEndStat;
+
+extern WalEndStat findLastCheckpoint(const char *datadir, XLogRecPtr forkptr,
 							   int tliIndex,
 							   XLogRecPtr *lastchkptrec, TimeLineID *lastchkpttli,
 							   XLogRecPtr *lastchkptredo,
@@ -43,6 +51,8 @@ extern void findLastCheckpoint(const char *datadir, XLogRecPtr forkptr,
 extern XLogRecPtr readOneRecord(const char *datadir, XLogRecPtr ptr,
 								int tliIndex, const char *restoreCommand);
 
+bool control_diff_is_benign(void *src, void *tgt);
+
 /* in pg_rewind.c */
 extern void progress_report(bool finished);
 
-- 
2.34.1