v20250711-0004-debug-stuff.patch
text/x-patch
Filename: v20250711-0004-debug-stuff.patch
Type: text/x-patch
Part: 3
Patch
Same data as JSON:
GET /api/v1/attachments/:id/patch
the parsed metadata as JSON — format, series position, per-file stats; never the diff bytes.
API reference →
Format: format-patch
Series: patch v20250711-0004
Subject: debug stuff
| File | + | − |
|---|---|---|
| src/backend/access/transam/xlog.c | 83 | 3 |
| src/backend/postmaster/launch_backend.c | 3 | 0 |
| src/backend/storage/ipc/procsignal.c | 34 | 0 |
| src/backend/storage/page/bufpage.c | 35 | 0 |
| src/backend/utils/init/miscinit.c | 3 | 0 |
| src/bin/pg_controldata/pg_controldata.c | 2 | 0 |
From 9292aac9d8c3a78ceab7aaa5a1c2d80432fbcbaa Mon Sep 17 00:00:00 2001
From: Bernd Helmle <Bernd Helmle mailings@oopsware.de>
Date: Fri, 11 Jul 2025 16:10:32 +0200
Subject: [PATCH 4/4] debug stuff
---
src/backend/access/transam/xlog.c | 86 ++++++++++++++++++++++++-
src/backend/postmaster/launch_backend.c | 3 +
src/backend/storage/ipc/procsignal.c | 34 ++++++++++
src/backend/storage/page/bufpage.c | 35 ++++++++++
src/backend/utils/init/miscinit.c | 3 +
src/bin/pg_controldata/pg_controldata.c | 2 +
6 files changed, 160 insertions(+), 3 deletions(-)
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 097db70fcfc..a6b6c0eaddd 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -4398,6 +4398,8 @@ InitControlFile(uint64 sysidentifier, uint32 data_checksum_version)
* processes get the current value from. (Maybe it should go just there?)
*/
XLogCtl->data_checksum_version = data_checksum_version;
+
+ elog(LOG, "InitControlFile %p data_checksum_version %u", XLogCtl, ControlFile->data_checksum_version);
}
static void
@@ -4737,6 +4739,8 @@ ReadControlFile(void)
(SizeOfXLogLongPHD - SizeOfXLogShortPHD);
CalculateCheckpointSegments();
+
+ elog(LOG, "ReadControlFile ControlFile->data_checksum_version = %u", ControlFile->data_checksum_version);
}
/*
@@ -4872,6 +4876,8 @@ SetDataChecksumsOnInProgress(void)
XLogCtl->data_checksum_version = PG_DATA_CHECKSUM_INPROGRESS_ON_VERSION;
SpinLockRelease(&XLogCtl->info_lck);
+ elog(LOG, "SetDataChecksumsOnInProgress XLogCtl->data_checksum_version = %u", XLogCtl->data_checksum_version);
+
barrier = EmitProcSignalBarrier(PROCSIGNAL_BARRIER_CHECKSUM_INPROGRESS_ON);
END_CRIT_SECTION();
@@ -4937,6 +4943,8 @@ SetDataChecksumsOn(void)
XLogCtl->data_checksum_version = PG_DATA_CHECKSUM_VERSION;
SpinLockRelease(&XLogCtl->info_lck);
+ elog(LOG, "SetDataChecksumsOn XLogCtl->data_checksum_version = %u", XLogCtl->data_checksum_version);
+
barrier = EmitProcSignalBarrier(PROCSIGNAL_BARRIER_CHECKSUM_ON);
END_CRIT_SECTION();
@@ -4975,6 +4983,7 @@ SetDataChecksumsOff(void)
/* If data checksums are already disabled there is nothing to do */
if (XLogCtl->data_checksum_version == 0)
{
+ elog(LOG, "SetDataChecksumsOff XLogCtl->data_checksum_version = %u (SKIP)", XLogCtl->data_checksum_version);
SpinLockRelease(&XLogCtl->info_lck);
return;
}
@@ -4999,6 +5008,8 @@ SetDataChecksumsOff(void)
XLogCtl->data_checksum_version = PG_DATA_CHECKSUM_INPROGRESS_OFF_VERSION;
SpinLockRelease(&XLogCtl->info_lck);
+ elog(LOG, "SetDataChecksumsOff XLogCtl->data_checksum_version = %u", XLogCtl->data_checksum_version);
+
barrier = EmitProcSignalBarrier(PROCSIGNAL_BARRIER_CHECKSUM_INPROGRESS_OFF);
END_CRIT_SECTION();
@@ -5019,6 +5030,8 @@ SetDataChecksumsOff(void)
}
else
{
+ elog(LOG, "SetDataChecksumsOff XLogCtl->data_checksum_version = %u (SKIP)", XLogCtl->data_checksum_version);
+
/*
* Ending up here implies that the checksums state is "inprogress-on"
* or "inprogress-off" and we can transition directly to "off" from
@@ -5056,6 +5069,7 @@ bool
AbsorbChecksumsOnInProgressBarrier(void)
{
/* XXX can't we check we're in OFF or INPROGRESSS_ON? */
+ elog(LOG, "AbsorbChecksumsOnInProgressBarrier");
SetLocalDataChecksumVersion(PG_DATA_CHECKSUM_INPROGRESS_ON_VERSION);
return true;
}
@@ -5063,6 +5077,8 @@ AbsorbChecksumsOnInProgressBarrier(void)
bool
AbsorbChecksumsOnBarrier(void)
{
+ elog(LOG, "AbsorbChecksumsOnBarrier");
+
/*
* If the process was spawned between updating XLogCtl and emitting the
* barrier it will have seen the updated value, so for the first barrier
@@ -5081,6 +5097,7 @@ bool
AbsorbChecksumsOffInProgressBarrier(void)
{
/* XXX can't we check we're in ON or INPROGRESSS_OFF? */
+ elog(LOG, "AbsorbChecksumsOffInProgressBarrier");
SetLocalDataChecksumVersion(PG_DATA_CHECKSUM_INPROGRESS_OFF_VERSION);
return true;
}
@@ -5089,6 +5106,7 @@ bool
AbsorbChecksumsOffBarrier(void)
{
/* XXX can't we check we're in INPROGRESSS_OFF? */
+ elog(LOG, "AbsorbChecksumsOffBarrier");
SetLocalDataChecksumVersion(0);
return true;
}
@@ -5104,6 +5122,7 @@ AbsorbChecksumsOffBarrier(void)
void
InitLocalDataChecksumVersion(void)
{
+ elog(LOG, "InitLocalDataChecksumVersion XLogCtl->data_checksum_version = %u", XLogCtl->data_checksum_version);
SpinLockAcquire(&XLogCtl->info_lck);
SetLocalDataChecksumVersion(XLogCtl->data_checksum_version);
SpinLockRelease(&XLogCtl->info_lck);
@@ -5118,6 +5137,7 @@ InitLocalDataChecksumVersion(void)
void
SetLocalDataChecksumVersion(uint32 data_checksum_version)
{
+ elog(LOG, "SetLocalDataChecksumVersion %u", data_checksum_version);
LocalDataChecksumVersion = data_checksum_version;
switch (LocalDataChecksumVersion)
@@ -5615,9 +5635,15 @@ XLOGShmemInit(void)
XLogCtl->InstallXLogFileSegmentActive = false;
XLogCtl->WalWriterSleeping = false;
+ elog(LOG, "XLogCtl->data_checksum_version %u ControlFile->data_checksum_version %u",
+ XLogCtl->data_checksum_version, ControlFile->data_checksum_version);
+
/* use the checksum info from control file */
XLogCtl->data_checksum_version = ControlFile->data_checksum_version;
+ elog(LOG, "XLogCtl->data_checksum_version %u ControlFile->data_checksum_version %u (UPDATED)",
+ XLogCtl->data_checksum_version, ControlFile->data_checksum_version);
+
SetLocalDataChecksumVersion(XLogCtl->data_checksum_version);
SpinLockInit(&XLogCtl->Insert.insertpos_lck);
@@ -6758,6 +6784,8 @@ StartupXLOG(void)
*/
CompleteCommitTsInitialization();
+ elog(LOG, "StartupXLOG XLogCtl->data_checksum_version = %u", XLogCtl->data_checksum_version);
+
/*
* If we reach this point with checksums being enabled ("inprogress-on"
* state), we notify the user that they need to manually restart the
@@ -6775,6 +6803,9 @@ StartupXLOG(void)
* we know that we have a state where all backends have stopped validating
* checksums and we can move to off instead of prompting the user to
* perform any action.
+ *
+ * XXX Is it safe to access the data_checksum_version without holding the
+ * spinlock?
*/
if (XLogCtl->data_checksum_version == PG_DATA_CHECKSUM_INPROGRESS_OFF_VERSION)
{
@@ -6783,6 +6814,8 @@ StartupXLOG(void)
SpinLockAcquire(&XLogCtl->info_lck);
XLogCtl->data_checksum_version = 0;
SpinLockRelease(&XLogCtl->info_lck);
+
+ elog(LOG, "StartupXLOG XLogCtl->data_checksum_version = %u (UPDATED)", XLogCtl->data_checksum_version);
}
/*
@@ -7646,6 +7679,8 @@ CreateCheckPoint(int flags)
*/
checkPoint.data_checksum_version = XLogCtl->data_checksum_version;
+ elog(WARNING, "CREATECHECKPOINT XLogCtl->data_checksum_version %u", XLogCtl->data_checksum_version);
+
if (shutdown)
{
XLogRecPtr curInsert = XLogBytePosToRecPtr(Insert->CurrBytePos);
@@ -7901,6 +7936,10 @@ CreateCheckPoint(int flags)
ControlFile->minRecoveryPoint = InvalidXLogRecPtr;
ControlFile->minRecoveryPointTLI = 0;
+ elog(LOG, "CreateCheckPoint data_checksum_version %u %u",
+ ControlFile->data_checksum_version,
+ checkPoint.data_checksum_version);
+
/* make sure we start with the checksum version as of the checkpoint */
ControlFile->data_checksum_version = checkPoint.data_checksum_version;
@@ -8055,6 +8094,10 @@ CreateEndOfRecoveryRecord(void)
ControlFile->minRecoveryPoint = recptr;
ControlFile->minRecoveryPointTLI = xlrec.ThisTimeLineID;
+ elog(LOG, "CreateEndOfRecoveryRecord data_checksum_version %u xlog %u",
+ ControlFile->data_checksum_version,
+ XLogCtl->data_checksum_version);
+
/* start with the latest checksum version (as of the end of recovery) */
ControlFile->data_checksum_version = XLogCtl->data_checksum_version;
@@ -8227,6 +8270,14 @@ RecoveryRestartPoint(const CheckPoint *checkPoint, XLogReaderState *record)
XLogCtl->lastCheckPointEndPtr = record->EndRecPtr;
XLogCtl->lastCheckPoint = *checkPoint;
SpinLockRelease(&XLogCtl->info_lck);
+
+ elog(LOG, "RecoveryRestartPoint lastCheckPointRecPtr %X/%X XLogCtl->lastCheckPointEndPtr %X/%X redo %X/%X checksums xlogctl %u record %d file %u",
+ LSN_FORMAT_ARGS(XLogCtl->lastCheckPointRecPtr),
+ LSN_FORMAT_ARGS(XLogCtl->lastCheckPointEndPtr),
+ LSN_FORMAT_ARGS(XLogCtl->lastCheckPoint.redo),
+ XLogCtl->lastCheckPoint.data_checksum_version,
+ checkPoint->data_checksum_version,
+ ControlFile->data_checksum_version);
}
/*
@@ -8264,6 +8315,9 @@ CreateRestartPoint(int flags)
lastCheckPoint = XLogCtl->lastCheckPoint;
SpinLockRelease(&XLogCtl->info_lck);
+ elog(LOG, "CreateRestartPoint lastCheckPointRecPtr %X/%X lastCheckPointEndPtr %X/%X",
+ LSN_FORMAT_ARGS(lastCheckPointRecPtr), LSN_FORMAT_ARGS(lastCheckPointEndPtr));
+
/*
* Check that we're still in recovery mode. It's ok if we exit recovery
* mode after this check, the restart point is valid anyway.
@@ -8292,9 +8346,11 @@ CreateRestartPoint(int flags)
if (XLogRecPtrIsInvalid(lastCheckPointRecPtr) ||
lastCheckPoint.redo <= ControlFile->checkPointCopy.redo)
{
- ereport(DEBUG2,
- errmsg_internal("skipping restartpoint, already performed at %X/%08X",
- LSN_FORMAT_ARGS(lastCheckPoint.redo)));
+ ereport(LOG,
+ (errmsg_internal("CreateRestartPoint: skipping restartpoint, already performed at %X/%X <= %X/%X lastCheckPointRecPtr %X/%X",
+ LSN_FORMAT_ARGS(lastCheckPoint.redo),
+ LSN_FORMAT_ARGS(ControlFile->checkPointCopy.redo),
+ LSN_FORMAT_ARGS(lastCheckPointRecPtr))));
UpdateMinRecoveryPoint(InvalidXLogRecPtr, true);
if (flags & CHECKPOINT_IS_SHUTDOWN)
@@ -8362,6 +8418,8 @@ CreateRestartPoint(int flags)
* end-of-recovery checkpoint.
*/
LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
+ elog(LOG, "CreateRestartPoint ControlFile->checkPointCopy.redo %X/%X lastCheckPoint.redo %X/%X",
+ LSN_FORMAT_ARGS(ControlFile->checkPointCopy.redo), LSN_FORMAT_ARGS(lastCheckPoint.redo));
if (ControlFile->checkPointCopy.redo < lastCheckPoint.redo)
{
/*
@@ -8400,11 +8458,17 @@ CreateRestartPoint(int flags)
ControlFile->state = DB_SHUTDOWNED_IN_RECOVERY;
}
+ elog(LOG, "CreateRestartPoint data_checksum_version %u %u",
+ ControlFile->data_checksum_version,
+ lastCheckPoint.data_checksum_version);
+
/* we shall start with the latest checksum version */
ControlFile->data_checksum_version = lastCheckPoint.data_checksum_version;
UpdateControlFile();
}
+ else
+ elog(LOG, "CreateRestartPoint: skipped ControlFile update");
LWLockRelease(ControlFileLock);
/*
@@ -9264,13 +9328,25 @@ xlog_redo(XLogReaderState *record)
{
xl_checksum_state state;
uint64 barrier;
+ XLogRecPtr checkpointLsn;
+ uint32 value,
+ value_last;
memcpy(&state, XLogRecGetData(record), sizeof(xl_checksum_state));
SpinLockAcquire(&XLogCtl->info_lck);
+ value_last = XLogCtl->data_checksum_version;
XLogCtl->data_checksum_version = state.new_checksumtype;
SpinLockRelease(&XLogCtl->info_lck);
+ LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
+ checkpointLsn = ControlFile->checkPoint;
+ value = ControlFile->data_checksum_version;
+ LWLockRelease(ControlFileLock);
+
+ elog(LOG, "XLOG_CHECKSUMS xlog_redo %X/%X control checkpoint %X/%X control %u last %u record %u",
+ LSN_FORMAT_ARGS(lsn), LSN_FORMAT_ARGS(checkpointLsn), value, value_last, state.new_checksumtype);
+
/*
* Block on a procsignalbarrier to await all processes having seen the
* change to checksum status. Once the barrier has been passed we can
@@ -9279,22 +9355,26 @@ xlog_redo(XLogReaderState *record)
switch (state.new_checksumtype)
{
case PG_DATA_CHECKSUM_INPROGRESS_ON_VERSION:
+ elog(LOG, "XLOG_CHECKSUMS emit PROCSIGNAL_BARRIER_CHECKSUM_INPROGRESS_ON");
barrier = EmitProcSignalBarrier(PROCSIGNAL_BARRIER_CHECKSUM_INPROGRESS_ON);
WaitForProcSignalBarrier(barrier);
break;
case PG_DATA_CHECKSUM_INPROGRESS_OFF_VERSION:
+ elog(LOG, "XLOG_CHECKSUMS emit PROCSIGNAL_BARRIER_CHECKSUM_INPROGRESS_OFF");
barrier = EmitProcSignalBarrier(PROCSIGNAL_BARRIER_CHECKSUM_INPROGRESS_OFF);
WaitForProcSignalBarrier(barrier);
break;
case PG_DATA_CHECKSUM_VERSION:
+ elog(LOG, "XLOG_CHECKSUMS emit PROCSIGNAL_BARRIER_CHECKSUM_ON");
barrier = EmitProcSignalBarrier(PROCSIGNAL_BARRIER_CHECKSUM_ON);
WaitForProcSignalBarrier(barrier);
break;
default:
Assert(state.new_checksumtype == 0);
+ elog(LOG, "XLOG_CHECKSUMS emit PROCSIGNAL_BARRIER_CHECKSUM_OFF");
barrier = EmitProcSignalBarrier(PROCSIGNAL_BARRIER_CHECKSUM_OFF);
WaitForProcSignalBarrier(barrier);
break;
diff --git a/src/backend/postmaster/launch_backend.c b/src/backend/postmaster/launch_backend.c
index 955df32be5d..d2ae41172a6 100644
--- a/src/backend/postmaster/launch_backend.c
+++ b/src/backend/postmaster/launch_backend.c
@@ -241,6 +241,9 @@ postmaster_child_launch(BackendType child_type, int child_slot,
if (IsExternalConnectionBackend(child_type))
((BackendStartupData *) startup_data)->fork_started = GetCurrentTimestamp();
+ elog(LOG, "postmaster_child_launch: LocalDataChecksumVersion %u xlog %u", GetLocalDataChecksumVersion(),
+ GetCurrentDataChecksumVersion());
+
#ifdef EXEC_BACKEND
pid = internal_forkexec(child_process_kinds[child_type].name, child_slot,
startup_data, startup_data_len, client_sock);
diff --git a/src/backend/storage/ipc/procsignal.c b/src/backend/storage/ipc/procsignal.c
index c48abee2b60..d5e1bb12ba5 100644
--- a/src/backend/storage/ipc/procsignal.c
+++ b/src/backend/storage/ipc/procsignal.c
@@ -554,6 +554,40 @@ ProcessProcSignalBarrier(void)
PG_TRY();
{
+ /* print info about barriers */
+ {
+ uint32 tmp = flags;
+
+ elog(LOG, "ProcessProcSignalBarrier flags %u", tmp);
+
+ while (tmp != 0)
+ {
+ ProcSignalBarrierType type;
+
+ type = (ProcSignalBarrierType) pg_rightmost_one_pos32(tmp);
+ switch (type)
+ {
+ case PROCSIGNAL_BARRIER_SMGRRELEASE:
+ elog(LOG, "PROCSIGNAL_BARRIER_SMGRRELEASE");
+ break;
+ case PROCSIGNAL_BARRIER_CHECKSUM_INPROGRESS_ON:
+ elog(LOG, "PROCSIGNAL_BARRIER_CHECKSUM_INPROGRESS_ON");
+ break;
+ case PROCSIGNAL_BARRIER_CHECKSUM_ON:
+ elog(LOG, "PROCSIGNAL_BARRIER_CHECKSUM_ON");
+ break;
+ case PROCSIGNAL_BARRIER_CHECKSUM_INPROGRESS_OFF:
+ elog(LOG, "PROCSIGNAL_BARRIER_CHECKSUM_INPROGRESS_OFF");
+ break;
+ case PROCSIGNAL_BARRIER_CHECKSUM_OFF:
+ elog(LOG, "PROCSIGNAL_BARRIER_CHECKSUM_OFF");
+ break;
+ }
+
+ BARRIER_CLEAR_BIT(tmp, type);
+ }
+ }
+
/*
* Process each type of barrier. The barrier-processing functions
* should normally return true, but may return false if the
diff --git a/src/backend/storage/page/bufpage.c b/src/backend/storage/page/bufpage.c
index 19cf6512e52..7c8f4d73606 100644
--- a/src/backend/storage/page/bufpage.c
+++ b/src/backend/storage/page/bufpage.c
@@ -22,6 +22,7 @@
#include "utils/memdebug.h"
#include "utils/memutils.h"
+#include <execinfo.h>
/* GUC variable */
bool ignore_checksum_failure = false;
@@ -149,10 +150,44 @@ PageIsVerified(PageData *page, BlockNumber blkno, int flags, bool *checksum_fail
if (checksum_failure)
{
if ((flags & (PIV_LOG_WARNING | PIV_LOG_LOG)) != 0)
+ {
+ XLogRecPtr lsn = PageGetLSN(page);
+
ereport(flags & PIV_LOG_WARNING ? WARNING : LOG,
(errcode(ERRCODE_DATA_CORRUPTED),
errmsg("page verification failed, calculated checksum %u but expected %u",
checksum, p->pd_checksum)));
+ ereport(WARNING,
+ (errcode(ERRCODE_DATA_CORRUPTED),
+ errmsg("page verification failed, LSN %X/%X", LSN_FORMAT_ARGS(lsn))));
+ ereport(WARNING,
+ (errcode(ERRCODE_DATA_CORRUPTED),
+ errmsg("page verification failed, header flags %u lower %u upper %u special %u pagesize_version %u prune_xid %u",
+ p->pd_flags,
+ p->pd_lower,
+ p->pd_upper,
+ p->pd_special,
+ p->pd_pagesize_version,
+ p->pd_prune_xid)));
+
+ {
+#define BT_BUF_SIZE 32
+ int nptrs;
+ void *buffer[BT_BUF_SIZE];
+ char **strings;
+
+ nptrs = backtrace(buffer, BT_BUF_SIZE);
+ strings = backtrace_symbols(buffer, nptrs);
+
+ if (strings != NULL)
+ {
+ for (int i = 0; i < nptrs; i++)
+ {
+ elog(WARNING, "backtrace %d : %s", i, strings[i]);
+ }
+ }
+ }
+ }
if (header_sane && (flags & PIV_IGNORE_CHECKSUM_FAILURE))
return true;
diff --git a/src/backend/utils/init/miscinit.c b/src/backend/utils/init/miscinit.c
index e39498d1250..25f35c2690c 100644
--- a/src/backend/utils/init/miscinit.c
+++ b/src/backend/utils/init/miscinit.c
@@ -164,6 +164,9 @@ InitPostmasterChild(void)
(errcode_for_socket_access(),
errmsg_internal("could not set postmaster death monitoring pipe to FD_CLOEXEC mode: %m")));
#endif
+
+ elog(LOG, "InitPostmasterChild: LocalDataChecksumVersion %u xlog %u", GetLocalDataChecksumVersion(),
+ GetCurrentDataChecksumVersion());
}
/*
diff --git a/src/bin/pg_controldata/pg_controldata.c b/src/bin/pg_controldata/pg_controldata.c
index 10de058ce91..acf5c7b026e 100644
--- a/src/bin/pg_controldata/pg_controldata.c
+++ b/src/bin/pg_controldata/pg_controldata.c
@@ -280,6 +280,8 @@ main(int argc, char *argv[])
ControlFile->checkPointCopy.oldestCommitTsXid);
printf(_("Latest checkpoint's newestCommitTsXid:%u\n"),
ControlFile->checkPointCopy.newestCommitTsXid);
+ printf(_("Latest checkpoint's data_checksum_version:%u\n"),
+ ControlFile->checkPointCopy.data_checksum_version);
printf(_("Time of latest checkpoint: %s\n"),
ckpttime_str);
printf(_("Fake LSN counter for unlogged rels: %X/%08X\n"),
--
2.50.0