checksums-review.txt
text/plain
Filename: checksums-review.txt
Type: text/plain
Part: 0
From 40d26a0406a213207101a837999e8b57df25fc1d Mon Sep 17 00:00:00 2001
From: Tomas Vondra <tomas@vondra.me>
Date: Wed, 5 Nov 2025 13:26:41 +0100
Subject: [PATCH v20251105 2/3] review
---
doc/src/sgml/func/func-admin.sgml | 20 ++++++++++++-----
doc/src/sgml/glossary.sgml | 2 +-
doc/src/sgml/monitoring.sgml | 23 ++++++++++++++++++--
doc/src/sgml/wal.sgml | 18 +++++++++++++++
src/backend/access/transam/xlog.c | 17 +++++++++------
src/backend/postmaster/datachecksumsworker.c | 14 ++++++++++++
src/backend/storage/ipc/ipci.c | 1 -
src/include/postmaster/datachecksumsworker.h | 2 +-
src/tools/pgindent/typedefs.list | 2 ++
9 files changed, 82 insertions(+), 17 deletions(-)
diff --git a/doc/src/sgml/func/func-admin.sgml b/doc/src/sgml/func/func-admin.sgml
index f3a8782ede0..4d6f6a7e486 100644
--- a/doc/src/sgml/func/func-admin.sgml
+++ b/doc/src/sgml/func/func-admin.sgml
@@ -3008,11 +3008,11 @@ SELECT convert_from(pg_read_binary_file('file_in_utf8.txt'), 'UTF8');
<indexterm>
<primary>pg_enable_data_checksums</primary>
</indexterm>
- <function>pg_enable_data_checksums</function> ( <optional><parameter>cost_delay</parameter> <type>int</type>, <parameter>cost_limit</parameter> <type>int</type></optional> )
+ <function>pg_enable_data_checksums</function> ( <optional><parameter>cost_delay</parameter> <type>int</type>, <parameter>cost_limit</parameter> <type>int</type></optional>, <parameter>fast</parameter> <type>bool</type></optional> )
<returnvalue>void</returnvalue>
</para>
<para>
- Initiates data checksums for the cluster. This will switch the data
+ Initiates the process of enabling data checksums for the cluster. This will switch the data
checksums mode to <literal>inprogress-on</literal> as well as start a
background worker that will process all pages in the database and
enable checksums on them. When all data pages have had checksums
@@ -3023,7 +3023,9 @@ SELECT convert_from(pg_read_binary_file('file_in_utf8.txt'), 'UTF8');
If <parameter>cost_delay</parameter> and <parameter>cost_limit</parameter> are
specified, the speed of the process is throttled using the same principles as
<link linkend="runtime-config-resource-vacuum-cost">Cost-based Vacuum Delay</link>.
- </para></entry>
+ </para>
+ <para>FIXME missing documentation of the "fast" parameter</para>
+ </entry>
</row>
<row>
@@ -3031,7 +3033,7 @@ SELECT convert_from(pg_read_binary_file('file_in_utf8.txt'), 'UTF8');
<indexterm>
<primary>pg_disable_data_checksums</primary>
</indexterm>
- <function>pg_disable_data_checksums</function> ()
+ <function>pg_disable_data_checksums</function> ( <parameter>fast</parameter> <type>bool</type></optional> )
<returnvalue>void</returnvalue>
</para>
<para>
@@ -3042,12 +3044,20 @@ SELECT convert_from(pg_read_binary_file('file_in_utf8.txt'), 'UTF8');
changed to <literal>off</literal>. At this point the data pages will
still have checksums recorded but they are not updated when pages are
modified.
- </para></entry>
+ </para>
+ <para>FIXME missing documentation of the "fast" parameter</para>
+ </entry>
</row>
</tbody>
</tgroup>
</table>
+ <para>
+ FIXME I think this should briefly explain how this interacts with checkpoints (through
+ the fast parameters). It should probably also discuss how this affects streaming standby
+ due to forcing a restart point, etc. And maybe comment on possible mitigations?
+ </para>
+
</sect2>
</sect1>
diff --git a/doc/src/sgml/glossary.sgml b/doc/src/sgml/glossary.sgml
index 9bac0c96348..3ba0e8c6c5c 100644
--- a/doc/src/sgml/glossary.sgml
+++ b/doc/src/sgml/glossary.sgml
@@ -580,7 +580,7 @@
<glossdef>
<para>
An <glossterm linkend="glossary-auxiliary-proc">auxiliary process</glossterm>
- which enables or disables data checksums in a specific database.
+ which enables data checksums in a specific database.
</para>
</glossdef>
</glossentry>
diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index b56e220f3d8..7efa1af746a 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -3554,7 +3554,10 @@ description | Waiting for a newly initialized WAL file to reach durable storage
database (or on a shared object).
Detected failures are reported regardless of the
<xref linkend="guc-data-checksums"/> setting.
- </para></entry>
+ </para>
+ <para>XXX I'm not sure what "regardless" means in this context. I guess
+ it means we don't reset the counters when disabling checksums?</para>
+ </entry>
</row>
<row>
@@ -6959,6 +6962,9 @@ FROM pg_stat_get_backend_idset() AS backendid;
<structname>pg_stat_progress_data_checksums</structname> view will contain
a row for the launcher process, and one row for each worker process which
is currently calculating checksums for the data pages in one database.
+ The launcher provides overview of the overall progress (how many database
+ have been processed, how many remain), while the workers track progress for
+ currently processed databases.
</para>
<table id="pg-stat-progress-data-checksums-view" xreflabel="pg_stat_progress_data_checksums">
@@ -6984,7 +6990,8 @@ FROM pg_stat_get_backend_idset() AS backendid;
<structfield>pid</structfield> <type>integer</type>
</para>
<para>
- Process ID of a datachecksumworker process.
+ Process ID of a datachecksumsworker process.
+ FIXME Is datachecksumsworker defined somewhere? Does it refer to the launcher too?
</para>
</entry>
</row>
@@ -7127,6 +7134,12 @@ FROM pg_stat_get_backend_idset() AS backendid;
The command is currently disabling data checksums on the cluster.
</entry>
</row>
+ <row>
+ <entry><literal>waiting</literal></entry>
+ <entry>
+ FIXME
+ </entry>
+ </row>
<row>
<entry><literal>waiting on temporary tables</literal></entry>
<entry>
@@ -7141,6 +7154,12 @@ FROM pg_stat_get_backend_idset() AS backendid;
state before finishing.
</entry>
</row>
+ <row>
+ <entry><literal>done</literal></entry>
+ <entry>
+ FIXME
+ </entry>
+ </row>
</tbody>
</tgroup>
</table>
diff --git a/doc/src/sgml/wal.sgml b/doc/src/sgml/wal.sgml
index 0ada90ca0b1..8ef16d7769f 100644
--- a/doc/src/sgml/wal.sgml
+++ b/doc/src/sgml/wal.sgml
@@ -284,6 +284,11 @@
<link linkend="functions-admin-checksum">functions</link>.
</para>
+ <para>
+ Both enabling and disabling checksums happens in two phases, separated by
+ a checkpoint to ensure durability.
+ </para>
+
<para>
Enabling checksums will put the cluster checksum mode in
<literal>inprogress-on</literal> mode. During this time, checksums will be
@@ -314,6 +319,14 @@
no support for resuming work from where it was interrupted.
</para>
+ <para>
+ Disabling checksums will put the cluster checksum mode in
+ <literal>inprogress-off</literal> mode. During this time, checksums will be
+ written but not verified. After all processes acknowledge the change,
+ the mode will automatically switch to <literal>off</literal>. In this case
+ rewriting all blocks is not needed, but checkpoints are still required.
+ </para>
+
<note>
<para>
Enabling checksums can cause significant I/O to the system, as most of the
@@ -324,6 +337,11 @@
</para>
</note>
+ <para>
+ XXX Maybe this is the place that should mention checkpoints/restartpoints,
+ how it impacts systems/replication and how to mitigate that?
+ </para>
+
</sect2>
</sect1>
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index f7633f47551..807b82eed4f 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -607,7 +607,7 @@ typedef struct ChecksumBarrierCondition
int barrier_ne[MAX_BARRIER_CONDITIONS];
/* The number of elements in the barrier_ne set */
int barrier_ne_sz;
-} ChecksumBarrierCondition;
+} ChecksumBarrierCondition;
static const ChecksumBarrierCondition checksum_barriers[] =
{
@@ -618,7 +618,6 @@ static const ChecksumBarrierCondition checksum_barriers[] =
{-1}
};
-
/*
* Calculate the amount of space left on the page after 'endptr'. Beware
* multiple evaluation!
@@ -694,10 +693,10 @@ static TimeLineID LocalMinRecoveryPointTLI;
static bool updateMinRecoveryPoint = true;
/*
- * Local state fror Controlfile data_checksum_version. After initialization
+ * Local state for Controlfile data_checksum_version. After initialization
* this is only updated when absorbing a procsignal barrier during interrupt
* processing. The reason for keeping a copy in backend-private memory is to
- * avoid locking for interrogating checksum state. Possible values are the
+ * avoid locking for interrogating the checksum state. Possible values are the
* checksum versions defined in storage/bufpage.h as well as zero when data
* checksums are disabled.
*/
@@ -712,6 +711,10 @@ static uint32 LocalDataChecksumVersion = 0;
* processing the barrier. This may happen if the process is spawned between
* the update of XLogCtl->data_checksum_version and the barrier being emitted.
* This can only happen on the very first barrier so mark that with this flag.
+ *
+ * XXX Is PG_DATA_CHECKSUM_ON_VERSION still the only transition with an assert?
+ * I think it was replaced by checking checksum_barriers for every transition,
+ * with elog(ERROR), no?
*/
static bool InitialDataChecksumTransition = true;
@@ -4335,7 +4338,7 @@ InitControlFile(uint64 sysidentifier, uint32 data_checksum_version)
/*
* Set the data_checksum_version value into XLogCtl, which is where all
- * processes get the current value from. (Maybe it should go just there?)
+ * processes get the current value from.
*/
XLogCtl->data_checksum_version = data_checksum_version;
}
@@ -4921,7 +4924,7 @@ SetDataChecksumsOff(bool immediate_checkpoint)
uint64 barrier;
int flags;
- Assert(ControlFile);
+ Assert(ControlFile != NULL);
SpinLockAcquire(&XLogCtl->info_lck);
@@ -5030,7 +5033,7 @@ SetDataChecksumsOff(bool immediate_checkpoint)
bool
AbsorbDataChecksumsBarrier(int target_state)
{
- const ChecksumBarrierCondition *condition = checksum_barriers;
+ const ChecksumBarrierCondition *condition = checksum_barriers;
int current = LocalDataChecksumVersion;
bool found = false;
diff --git a/src/backend/postmaster/datachecksumsworker.c b/src/backend/postmaster/datachecksumsworker.c
index 3deb57a96de..67045b9014d 100644
--- a/src/backend/postmaster/datachecksumsworker.c
+++ b/src/backend/postmaster/datachecksumsworker.c
@@ -52,6 +52,9 @@
* - Data checksums SHALL NOT be considered enabled cluster-wide until all
* currently connected backends have the local state "enabled"
*
+ * FIXME I'm not 100% sure I understand what the two above points say. What does
+ * "violate local data_checksums state" means"?
+ *
* There are two levels of synchronization required for enabling data checksums
* in an online cluster: (i) changing state in the active backends ("on",
* "off", "inprogress-on" and "inprogress-off"), and (ii) ensuring no
@@ -60,6 +63,10 @@
* latter with ensuring that any concurrent activity cannot break the data
* checksum contract during processing.
*
+ * FIXME This para talks about "enabling" but then mentions all four states,
+ * including "inprogress-off" and "off". Maybe it should talk about "changing
+ * data_checksums" instead?
+ *
* Synchronizing the state change is done with procsignal barriers, where the
* WAL logging backend updating the global state in the controlfile will wait
* for all other backends to absorb the barrier. Barrier absorption will happen
@@ -88,6 +95,10 @@
* enables data checksums cluster-wide, there are four sets of backends where
* Bd shall be an empty set:
*
+ * FIXME Maybe mention which process initiates the procsignalbarrier?
+ * FIXME Don't we actually wait or the barrier before we start rewriting data?
+ * I think Bd has to be empty prior to that, otherwise it might break checksums.
+ *
* Bg: Backend updating the global state and emitting the procsignalbarrier
* Bd: Backends in "off" state
* Be: Backends in "on" state
@@ -124,6 +135,8 @@
* stop writing data checksums as no backend is enforcing data checksum
* validation any longer.
*
+ * XXX Maybe it'd make sense to "visualize" the progress between the states
+ * and barriers in some way. Say, by doing
*
* Potential optimizations
* -----------------------
@@ -148,6 +161,7 @@
* to enable checksums on a cluster which is in inprogress-on state and
* may have checksummed pages (make pg_checksums be able to resume an
* online operation).
+ * * Restartability (not necessarily with page granularity).
*
*
* Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
diff --git a/src/backend/storage/ipc/ipci.c b/src/backend/storage/ipc/ipci.c
index 44213d140ae..9014e90f1c7 100644
--- a/src/backend/storage/ipc/ipci.c
+++ b/src/backend/storage/ipc/ipci.c
@@ -31,7 +31,6 @@
#include "postmaster/bgworker_internals.h"
#include "postmaster/bgwriter.h"
#include "postmaster/datachecksumsworker.h"
-#include "postmaster/postmaster.h"
#include "postmaster/walsummarizer.h"
#include "replication/logicallauncher.h"
#include "replication/origin.h"
diff --git a/src/include/postmaster/datachecksumsworker.h b/src/include/postmaster/datachecksumsworker.h
index 2cd066fd0fe..0daef709ec8 100644
--- a/src/include/postmaster/datachecksumsworker.h
+++ b/src/include/postmaster/datachecksumsworker.h
@@ -24,7 +24,7 @@ typedef enum DataChecksumsWorkerOperation
ENABLE_DATACHECKSUMS,
DISABLE_DATACHECKSUMS,
/* TODO: VERIFY_DATACHECKSUMS, */
-} DataChecksumsWorkerOperation;
+} DataChecksumsWorkerOperation;
/*
* Possible states for a database entry which has been processed. Exported
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 9d6b4e57cf3..3049e6018b3 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -417,6 +417,7 @@ CheckPointStmt
CheckpointStatsData
CheckpointerRequest
CheckpointerShmemStruct
+ChecksumBarrierCondition
ChecksumType
Chromosome
CkptSortItem
@@ -587,6 +588,7 @@ CustomScan
CustomScanMethods
CustomScanState
CycleCtr
+DataChecksumsWorkerOperation
DBState
DCHCacheEntry
DEADLOCK_INFO
--
2.51.0