0001-pg_rewind-Skip-false-positive-rewind-on-benign-shutd.patch
text/x-patch
Filename: 0001-pg_rewind-Skip-false-positive-rewind-on-benign-shutd.patch
Type: text/x-patch
Part: 0
Patch
Same data as JSON:
GET /api/v1/attachments/:id/patch
the parsed metadata as JSON — format, series position, per-file stats; never the diff bytes.
API reference →
Format: format-patch
Series: patch 0001
Subject: pg_rewind: Skip false-positive rewind on benign shutdown checkpoint difference
| File | + | − |
|---|---|---|
| src/bin/pg_rewind/parsexlog.c | 79 | 8 |
| src/bin/pg_rewind/pg_rewind.c | 103 | 15 |
| src/bin/pg_rewind/pg_rewind.h | 11 | 1 |
From 83b32b56adb17fb15ebe6288acfbf57811d724dc Mon Sep 17 00:00:00 2001
From: BharatDBPG <bharatdbpg@gmail.com>
Date: Wed, 22 Oct 2025 17:18:02 +0530
Subject: [PATCH] pg_rewind: Skip false-positive rewind on benign shutdown
checkpoint difference
Signed-off-by: BharatDBPG <bharatdbpg@gmail.com>
---
src/bin/pg_rewind/parsexlog.c | 87 ++++++++++++++++++++++---
src/bin/pg_rewind/pg_rewind.c | 118 +++++++++++++++++++++++++++++-----
src/bin/pg_rewind/pg_rewind.h | 12 +++-
3 files changed, 193 insertions(+), 24 deletions(-)
diff --git a/src/bin/pg_rewind/parsexlog.c b/src/bin/pg_rewind/parsexlog.c
index 8f4b282c6b..110ab17552 100644
--- a/src/bin/pg_rewind/parsexlog.c
+++ b/src/bin/pg_rewind/parsexlog.c
@@ -23,6 +23,7 @@
#include "fe_utils/archive.h"
#include "filemap.h"
#include "pg_rewind.h"
+#include "storage/standbydefs.h"
/*
* RmgrNames is an array of the built-in resource manager names, to make error
@@ -50,6 +51,10 @@ typedef struct XLogPageReadPrivate
int tliIndex;
} XLogPageReadPrivate;
+static ControlFileData ControlFile_target;
+static ControlFileData ControlFile_source;
+
+
static int SimpleXLogPageRead(XLogReaderState *xlogreader,
XLogRecPtr targetPagePtr,
int reqLen, XLogRecPtr targetRecPtr, char *readBuf);
@@ -161,10 +166,71 @@ readOneRecord(const char *datadir, XLogRecPtr ptr, int tliIndex,
return endptr;
}
+/*
+ * is_shutdown_only_sequence(xlogreader, endptr)
+ *
+ * An XLogReaderState positioned at the end-of-wal (endptr),
+ * walk backwards a small number of records to determine whether the
+ * tail consists solely of zero-or-more RUNNING_XACTS (RM_STANDBY_ID)
+ * optionally followed by a single CHECKPOINT_SHUTDOWN (RM_XLOG_ID).
+ *
+ * Returns true if the pattern matches and there are no intervening
+ * meaningful records.
+ */
+
+static bool
+is_shutdown_only_sequence(XLogReaderState *xlogreader, XLogRecPtr endptr)
+{
+ XLogRecPtr searchptr = endptr;
+ int seen_running_xacts = 0;
+ int max_steps = 8;
+
+ while (max_steps-- > 0)
+ {
+ char *errormsg;
+ XLogRecord *record;
+
+ XLogBeginRead(xlogreader, searchptr);
+ record = XLogReadRecord(xlogreader, &errormsg);
+
+ if (record == NULL)
+ return false;
+
+ /* fetch RMID and info */
+ uint8 rmid;
+ uint8 info;
+ rmid = XLogRecGetRmid(xlogreader);
+ info = XLogRecGetInfo(xlogreader) & ~XLR_INFO_MASK;
+
+ if (rmid == RM_STANDBY_ID && info == XLOG_RUNNING_XACTS)
+ {
+ seen_running_xacts++;
+ searchptr = record->xl_prev;
+ continue;
+ }
+
+ if (rmid == RM_XLOG_ID &&
+ (info == XLOG_CHECKPOINT_SHUTDOWN || info == XLOG_CHECKPOINT_ONLINE))
+ {
+ /* Only shutdown checkpoints with preceding RUNNING_XACTS allowed */
+ if (info == XLOG_CHECKPOINT_SHUTDOWN)
+ return true;
+ else
+ return false; /* online checkpoint → meaningful */
+ }
+
+ /* any other record type means there were changes */
+ return false;
+ }
+
+ /* we hit step cap without decisive answer → play safe */
+ return false;
+}
+
/*
* Find the previous checkpoint preceding given WAL location.
*/
-void
+WalEndStat
findLastCheckpoint(const char *datadir, XLogRecPtr forkptr, int tliIndex,
XLogRecPtr *lastchkptrec, TimeLineID *lastchkpttli,
XLogRecPtr *lastchkptredo, const char *restoreCommand)
@@ -210,13 +276,18 @@ findLastCheckpoint(const char *datadir, XLogRecPtr forkptr, int tliIndex,
if (record == NULL)
{
- if (errormsg)
- pg_fatal("could not find previous WAL record at %X/%08X: %s",
- LSN_FORMAT_ARGS(searchptr),
- errormsg);
- else
- pg_fatal("could not find previous WAL record at %X/%08X",
- LSN_FORMAT_ARGS(searchptr));
+ /* Check for benign shutdown checkpoint difference */
+ if (control_diff_is_benign(&ControlFile_source, &ControlFile_target))
+ {
+ fprintf(stderr,
+ "pg_rewind: benign shutdown checkpoint difference detected, skipping rewind\n");
+ return InvalidXLogRecPtr; /* or other appropriate return to skip rewind */
+ }
+
+ /* Otherwise, fatal error */
+ pg_fatal("could not find previous WAL record at %X/%X: %s",
+ LSN_FORMAT_ARGS(searchptr),
+ errormsg ? errormsg : "unknown error");
}
/* Detect if a new WAL file has been opened */
diff --git a/src/bin/pg_rewind/pg_rewind.c b/src/bin/pg_rewind/pg_rewind.c
index 0c68dd4235..d2379e4169 100644
--- a/src/bin/pg_rewind/pg_rewind.c
+++ b/src/bin/pg_rewind/pg_rewind.c
@@ -116,6 +116,44 @@ usage(const char *progname)
printf(_("%s home page: <%s>\n"), PACKAGE_NAME, PACKAGE_URL);
}
+/*
+ * Compare two ControlFileData structs.
+ * Return true if the differences are safe to ignore (benign),
+ * false if they are significant.
+ *
+ */
+bool
+control_diff_is_benign(void *src, void *tgt)
+{
+ ControlFileData *src_cf = (ControlFileData *) src;
+ ControlFileData *tgt_cf = (ControlFileData *) tgt;
+
+ /* 1. Cluster identity must always match */
+ if (src_cf->system_identifier != tgt_cf->system_identifier)
+ return false;
+
+ /* 2. Check timeline consistency */
+ if (src_cf->checkPointCopy.ThisTimeLineID != tgt_cf->checkPointCopy.ThisTimeLineID)
+ return false;
+
+ /* 3. Transaction ID progression */
+ if (src_cf->checkPointCopy.nextXid.value != tgt_cf->checkPointCopy.nextXid.value)
+ return false;
+
+ /* 4. OID progression */
+ if (src_cf->checkPointCopy.nextOid != tgt_cf->checkPointCopy.nextOid)
+ return false;
+
+ /* 5. MultiXact progression */
+ if (src_cf->checkPointCopy.nextMulti != tgt_cf->checkPointCopy.nextMulti)
+ return false;
+
+ /* 6. Checkpoint LSN should not go backwards */
+ if (src_cf->checkPoint < tgt_cf->checkPoint)
+ return false;
+
+ return true;
+}
int
main(int argc, char **argv)
@@ -429,21 +467,71 @@ main(int argc, char **argv)
}
/*
- * Check for the possibility that the target is in fact a direct
- * ancestor of the source. In that case, there is no divergent history
- * in the target that needs rewinding.
- */
- if (target_wal_endrec > divergerec)
- {
- rewind_needed = true;
- }
- else
- {
- /* the last common checkpoint record must be part of target WAL */
- Assert(target_wal_endrec == divergerec);
-
- rewind_needed = false;
- }
+ * Conservative check: determine whether we can safely skip rewind
+ * when the target's WAL tail only contains shutdown-only records.
+ */
+
+ WalEndStat end_status;
+ end_status = findLastCheckpoint(datadir_target, target_wal_endrec, lastcommontliIndex,
+ &chkptrec, &chkpttli, &chkptredo,
+ restore_command);
+
+ /* Initialize safety flags and controlfile pointers */
+ bool src_crc_ok = false;
+ bool tgt_crc_ok = false;
+ ControlFileData *src_ctrl = NULL;
+ ControlFileData *tgt_ctrl = NULL;
+
+ if (end_status == WAL_END_SHUTDOWN_ONLY)
+ {
+ pg_log_info("benign control file difference detected; skipping rewind");
+ exit(0);
+
+#ifdef USE_LIBPQ
+ if (connstr_source)
+ {
+ /* If fetching remotely (pg_rewind --source-server=...) */
+ src_ctrl = get_remote_controlfile(&src_crc_ok);
+ }
+ else
+#endif
+ {
+ /* Local source cluster */
+ src_ctrl = get_controlfile(datadir_source, &src_crc_ok);
+ }
+
+ /* Target control file is always local */
+ tgt_ctrl = get_controlfile(datadir_target, &tgt_crc_ok);
+
+ if (!src_ctrl || !tgt_ctrl)
+ {
+ pg_log_warning("could not read one or both control files; conservatively requiring rewind");
+ rewind_needed = true;
+ }
+ else if (control_diff_is_benign(src_ctrl, tgt_ctrl))
+ {
+ pg_log_info("only benign shutdown control differences found; skipping rewind");
+ rewind_needed = false;
+ }
+ else
+ {
+ pg_log_info("control file differences not benign; rewind required");
+ rewind_needed = true;
+ }
+
+ /* free() if your get_controlfile allocates new structs (optional) */
+ /* free(src_ctrl); free(tgt_ctrl); */
+ }
+ else if (end_status == WAL_END_MEANINGFUL)
+ {
+ /* Normal case: WAL tail contains meaningful changes */
+ rewind_needed = (target_wal_endrec > divergerec);
+ }
+ else
+ {
+ /* Unknown or empty WAL end; play safe */
+ rewind_needed = true;
+ }
}
if (!rewind_needed)
diff --git a/src/bin/pg_rewind/pg_rewind.h b/src/bin/pg_rewind/pg_rewind.h
index 9cea144d2b..94b75ea9bf 100644
--- a/src/bin/pg_rewind/pg_rewind.h
+++ b/src/bin/pg_rewind/pg_rewind.h
@@ -35,7 +35,15 @@ extern uint64 fetch_done;
extern void extractPageMap(const char *datadir, XLogRecPtr startpoint,
int tliIndex, XLogRecPtr endpoint,
const char *restoreCommand);
-extern void findLastCheckpoint(const char *datadir, XLogRecPtr forkptr,
+
+typedef enum WalEndStat
+{
+ WAL_END_MEANINGFUL,
+ WAL_END_SHUTDOWN_ONLY,
+ WAL_END_EMPTY
+} WalEndStat;
+
+extern WalEndStat findLastCheckpoint(const char *datadir, XLogRecPtr forkptr,
int tliIndex,
XLogRecPtr *lastchkptrec, TimeLineID *lastchkpttli,
XLogRecPtr *lastchkptredo,
@@ -43,6 +51,8 @@ extern void findLastCheckpoint(const char *datadir, XLogRecPtr forkptr,
extern XLogRecPtr readOneRecord(const char *datadir, XLogRecPtr ptr,
int tliIndex, const char *restoreCommand);
+bool control_diff_is_benign(void *src, void *tgt);
+
/* in pg_rewind.c */
extern void progress_report(bool finished);
--
2.34.1