v20250711-0003-Rework-handling-of-procsignalbarrier-and-local-cache.patch
text/x-patch
Filename: v20250711-0003-Rework-handling-of-procsignalbarrier-and-local-cache.patch
Type: text/x-patch
Part: 2
Patch
Same data as JSON:
GET /api/v1/attachments/:id/patch
the parsed metadata as JSON — format, series position, per-file stats; never the diff bytes.
API reference →
Format: format-patch
Series: patch v20250711-0003
Subject: Rework handling of procsignalbarrier and local cache for data_checksion_version.
| File | + | − |
|---|---|---|
| src/backend/access/transam/xlog.c | 16 | 29 |
| src/backend/postmaster/auxprocess.c | 19 | 0 |
| src/backend/postmaster/launch_backend.c | 0 | 10 |
| src/backend/utils/init/postinit.c | 15 | 2 |
| src/include/access/xlog.h | 1 | 1 |
| src/include/miscadmin.h | 0 | 1 |
From f14cf88958858c637f18c11c4221dfed0a04ccba Mon Sep 17 00:00:00 2001
From: Bernd Helmle <Bernd Helmle mailings@oopsware.de>
Date: Thu, 10 Jul 2025 18:35:51 +0200
Subject: [PATCH 3/4] Rework handling of procsignalbarrier and local cache for
data_checksion_version.
---
src/backend/access/transam/xlog.c | 45 +++++++++----------------
src/backend/postmaster/auxprocess.c | 19 +++++++++++
src/backend/postmaster/launch_backend.c | 10 ------
src/backend/utils/init/postinit.c | 17 ++++++++--
src/include/access/xlog.h | 2 +-
src/include/miscadmin.h | 1 -
6 files changed, 51 insertions(+), 43 deletions(-)
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 53890c3e726..097db70fcfc 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -672,12 +672,15 @@ static bool updateMinRecoveryPoint = true;
static uint32 LocalDataChecksumVersion = 0;
/*
- * Flag to remember if the procsignalbarrier being absorbed for enabling
- * checksums is the first one or not. The first procsignalbarrier can in rare
- * circumstances cause a transition from 'on' to 'on' when a new process is
+ * Flag to remember if the procsignalbarrier being absorbed for checksums
+ * is the first one. The first procsignalbarrier can in rare cases be for
+ * the state we've initialized, i.e. a duplicate. This may happen for any
+ * data_checksum_version value, but for PG_DATA_CHECKSUM_ON_VERSION this
+ * would trigger an assert failure (this is the only transition with an
+ * assert) when processing the barrier. This may happen if the process is
* spawned between the update of XLogCtl->data_checksum_version and the
- * barrier being emitted. This can only happen on the very first barrier so
- * mark that with this flag.
+ * barrier being emitted. This can only happen on the very first barrier
+ * so mark that with this flag.
*/
static bool InitialDataChecksumTransition = true;
@@ -5052,6 +5055,7 @@ SetDataChecksumsOff(void)
bool
AbsorbChecksumsOnInProgressBarrier(void)
{
+ /* XXX can't we check we're in OFF or INPROGRESSS_ON? */
SetLocalDataChecksumVersion(PG_DATA_CHECKSUM_INPROGRESS_ON_VERSION);
return true;
}
@@ -5064,22 +5068,19 @@ AbsorbChecksumsOnBarrier(void)
* barrier it will have seen the updated value, so for the first barrier
* we accept both "on" and "inprogress-on".
*/
- if (InitialDataChecksumTransition)
- {
- Assert((LocalDataChecksumVersion == PG_DATA_CHECKSUM_INPROGRESS_ON_VERSION) ||
- (LocalDataChecksumVersion == PG_DATA_CHECKSUM_VERSION));
- InitialDataChecksumTransition = false;
- }
- else
- Assert(LocalDataChecksumVersion == PG_DATA_CHECKSUM_INPROGRESS_ON_VERSION);
+ Assert((LocalDataChecksumVersion == PG_DATA_CHECKSUM_INPROGRESS_ON_VERSION) ||
+ (InitialDataChecksumTransition &&
+ (LocalDataChecksumVersion == PG_DATA_CHECKSUM_VERSION)));
SetLocalDataChecksumVersion(PG_DATA_CHECKSUM_VERSION);
+ InitialDataChecksumTransition = false;
return true;
}
bool
AbsorbChecksumsOffInProgressBarrier(void)
{
+ /* XXX can't we check we're in ON or INPROGRESSS_OFF? */
SetLocalDataChecksumVersion(PG_DATA_CHECKSUM_INPROGRESS_OFF_VERSION);
return true;
}
@@ -5087,6 +5088,7 @@ AbsorbChecksumsOffInProgressBarrier(void)
bool
AbsorbChecksumsOffBarrier(void)
{
+ /* XXX can't we check we're in INPROGRESSS_OFF? */
SetLocalDataChecksumVersion(0);
return true;
}
@@ -5100,7 +5102,7 @@ AbsorbChecksumsOffBarrier(void)
* purpose enough to handle future cases.
*/
void
-InitLocalControldata(void)
+InitLocalDataChecksumVersion(void)
{
SpinLockAcquire(&XLogCtl->info_lck);
SetLocalDataChecksumVersion(XLogCtl->data_checksum_version);
@@ -5138,21 +5140,6 @@ SetLocalDataChecksumVersion(uint32 data_checksum_version)
}
}
-/*
- * Initialize the various data checksum values - GUC, local, ....
- */
-void
-InitLocalDataChecksumVersion(void)
-{
- uint32 data_checksum_version;
-
- SpinLockAcquire(&XLogCtl->info_lck);
- data_checksum_version = XLogCtl->data_checksum_version;
- SpinLockRelease(&XLogCtl->info_lck);
-
- SetLocalDataChecksumVersion(data_checksum_version);
-}
-
/*
* Get the local data_checksum_version (cached XLogCtl value).
*/
diff --git a/src/backend/postmaster/auxprocess.c b/src/backend/postmaster/auxprocess.c
index a6d3630398f..4b56ef0eb81 100644
--- a/src/backend/postmaster/auxprocess.c
+++ b/src/backend/postmaster/auxprocess.c
@@ -15,6 +15,7 @@
#include <unistd.h>
#include <signal.h>
+#include "access/xlog.h"
#include "miscadmin.h"
#include "pgstat.h"
#include "postmaster/auxprocess.h"
@@ -68,6 +69,24 @@ AuxiliaryProcessMainCommon(void)
ProcSignalInit(NULL, 0);
+ /*
+ * Initialize a local cache of the data_checksum_version, to be updated
+ * by the procsignal-based barriers.
+ *
+ * This intentionally happens after initializing the procsignal, otherwise
+ * we might miss a state change. This means we can get a barrier for the
+ * state we've just initialized - but it can happen only once.
+ *
+ * The postmaster (which is what gets forked into the new child process)
+ * does not handle barriers, therefore it may not have the current value
+ * of LocalDataChecksumVersion value (it'll have the value read from the
+ * control file, which may be arbitrarily old).
+ *
+ * NB: Even if the postmaster handled barriers, the value might still be
+ * stale, as it might have changed after this process forked.
+ */
+ InitLocalDataChecksumVersion();
+
/*
* Auxiliary processes don't run transactions, but they may need a
* resource owner anyway to manage buffer pins acquired outside
diff --git a/src/backend/postmaster/launch_backend.c b/src/backend/postmaster/launch_backend.c
index 4a86d2588ff..955df32be5d 100644
--- a/src/backend/postmaster/launch_backend.c
+++ b/src/backend/postmaster/launch_backend.c
@@ -287,16 +287,6 @@ postmaster_child_launch(BackendType child_type, int child_slot,
memcpy(MyClientSocket, client_sock, sizeof(ClientSocket));
}
- /*
- * update the LocalProcessControlFile to match XLogCtl->data_checksum_version
- *
- * XXX It seems the postmaster (which is what gets forked into the new
- * child process) does not absorb the checksum barriers, therefore it
- * does not update the value (except after a restart). Not sure if there
- * is some sort of race condition.
- */
- InitLocalDataChecksumVersion();
-
/*
* Run the appropriate Main function
*/
diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c
index 785b8d4b04f..7418deb10f5 100644
--- a/src/backend/utils/init/postinit.c
+++ b/src/backend/utils/init/postinit.c
@@ -752,9 +752,22 @@ InitPostgres(const char *in_dbname, Oid dboid,
ProcSignalInit(MyCancelKey, MyCancelKeyLength);
/*
- * Set up backend local cache of Controldata values.
+ * Initialize a local cache of the data_checksum_version, to be updated
+ * by the procsignal-based barriers.
+ *
+ * This intentionally happens after initializing the procsignal, otherwise
+ * we might miss a state change. This means we can get a barrier for the
+ * state we've just initialized - but it can happen only once.
+ *
+ * The postmaster (which is what gets forked into the new child process)
+ * does not handle barriers, therefore it may not have the current value
+ * of LocalDataChecksumVersion value (it'll have the value read from the
+ * control file, which may be arbitrarily old).
+ *
+ * NB: Even if the postmaster handled barriers, the value might still be
+ * stale, as it might have changed after this process forked.
*/
- InitLocalControldata();
+ InitLocalDataChecksumVersion();
/*
* Also set up timeout handlers needed for backend operation. We need
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index aec3ea0bc63..615b2cf4ec8 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -243,7 +243,7 @@ extern bool AbsorbChecksumsOffInProgressBarrier(void);
extern bool AbsorbChecksumsOnBarrier(void);
extern bool AbsorbChecksumsOffBarrier(void);
extern const char *show_data_checksums(void);
-extern void InitLocalControldata(void);
+extern void InitLocalDataChecksumVersion(void);
extern bool GetDefaultCharSignedness(void);
extern XLogRecPtr GetFakeLSNForUnloggedRel(void);
extern Size XLOGShmemSize(void);
diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h
index c86294b4d19..5ce7a1cbc65 100644
--- a/src/include/miscadmin.h
+++ b/src/include/miscadmin.h
@@ -544,7 +544,6 @@ extern void RestoreClientConnectionInfo(char *conninfo);
extern uint32 GetLocalDataChecksumVersion(void);
extern uint32 GetCurrentDataChecksumVersion(void);
-extern void InitLocalDataChecksumVersion(void);
/* in executor/nodeHash.c */
extern size_t get_hash_memory_limit(void);
--
2.50.0