v20250310b-0004-make-progress-reporting-work.patch
text/x-patch
Filename: v20250310b-0004-make-progress-reporting-work.patch
Type: text/x-patch
Part: 3
Patch
Same data as JSON:
GET /api/v1/attachments/:id/patch
the parsed metadata as JSON — format, series position, per-file stats; never the diff bytes.
API reference →
Format: format-patch
Series: patch 0004
Subject: make progress reporting work
| File | + | − |
|---|---|---|
| src/backend/catalog/system_views.sql | 18 | 18 |
| src/backend/postmaster/datachecksumsworker.c | 81 | 10 |
| src/include/commands/progress.h | 13 | 14 |
| src/test/regress/expected/rules.out | 19 | 10 |
From 351f66d51c9f604a956080aa3cc5cbd91becc8f0 Mon Sep 17 00:00:00 2001
From: Tomas Vondra <tomas@vondra.me>
Date: Sat, 8 Mar 2025 19:34:50 +0100
Subject: [PATCH v20250310b 4/5] make progress reporting work
- Splits the progress status by worker type - one row for launcher,
one row for checksum worker (might be more with parallel workers).
- The launcher only updates database counters, the workers only set
relation/block counters.
- Also reworks the columns in the system view a bit, discards the
"current" fields (we still know the database for each worker).
- Issue: Not sure what to do about relation forks, at this point it
tracks only blocks for MAIN fork.
---
src/backend/catalog/system_views.sql | 36 ++++----
src/backend/postmaster/datachecksumsworker.c | 91 +++++++++++++++++---
src/include/commands/progress.h | 27 +++---
src/test/regress/expected/rules.out | 29 ++++---
4 files changed, 131 insertions(+), 52 deletions(-)
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index 0d62976dc1f..4330d0ad656 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -1335,25 +1335,25 @@ CREATE VIEW pg_stat_progress_copy AS
LEFT JOIN pg_database D ON S.datid = D.oid;
CREATE VIEW pg_stat_progress_data_checksums AS
- SELECT
- S.pid AS pid, S.datid AS datid, D.datname AS datname,
- CASE S.param1 WHEN 0 THEN 'enabling'
+ SELECT
+ S.pid AS pid, S.datid, D.datname AS datname,
+ CASE S.param1 WHEN 0 THEN 'enabling'
WHEN 1 THEN 'disabling'
- WHEN 2 THEN 'waiting'
- WHEN 3 THEN 'waiting on backends'
- WHEN 4 THEN 'waiting on temporary tables'
- WHEN 5 THEN 'done'
- END AS phase,
- CASE S.param2 WHEN -1 THEN NULL ELSE S.param2 END AS databases_total,
- CASE S.param3 WHEN -1 THEN NULL ELSE S.param3 END AS relations_total,
- S.param4 AS databases_processed,
- S.param5 AS relations_processed,
- S.param6 AS databases_current,
- S.param7 AS relation_current,
- S.param8 AS relation_current_blocks,
- S.param9 AS relation_current_blocks_processed
- FROM pg_stat_get_progress_info('DATACHECKSUMS') AS S
- LEFT JOIN pg_database D ON S.datid = D.oid;
+ WHEN 2 THEN 'waiting'
+ WHEN 3 THEN 'waiting on backends'
+ WHEN 4 THEN 'waiting on temporary tables'
+ WHEN 5 THEN 'waiting on checkpoint'
+ WHEN 6 THEN 'done'
+ END AS phase,
+ CASE S.param2 WHEN -1 THEN NULL ELSE S.param2 END AS databases_total,
+ S.param3 AS databases_done,
+ CASE S.param4 WHEN -1 THEN NULL ELSE S.param4 END AS relations_total,
+ CASE S.param5 WHEN -1 THEN NULL ELSE S.param5 END AS relations_done,
+ CASE S.param6 WHEN -1 THEN NULL ELSE S.param6 END AS blocks_total,
+ CASE S.param7 WHEN -1 THEN NULL ELSE S.param7 END AS blocks_done
+ FROM pg_stat_get_progress_info('DATACHECKSUMS') AS S
+ LEFT JOIN pg_database D ON S.datid = D.oid
+ ORDER BY S.datid; -- return the launcher process first
CREATE VIEW pg_user_mappings AS
SELECT
diff --git a/src/backend/postmaster/datachecksumsworker.c b/src/backend/postmaster/datachecksumsworker.c
index b30d5481b06..4101fdff107 100644
--- a/src/backend/postmaster/datachecksumsworker.c
+++ b/src/backend/postmaster/datachecksumsworker.c
@@ -409,6 +409,11 @@ ProcessSingleRelationFork(Relation reln, ForkNumber forkNum, BufferAccessStrateg
relns, RelationGetRelationName(reln), forkNames[forkNum], numblocks);
pgstat_report_activity(STATE_RUNNING, activity);
+ /* XXX only do this for main forks, maybe we should do it for all? */
+ if (forkNum == MAIN_FORKNUM)
+ pgstat_progress_update_param(PROGRESS_DATACHECKSUMS_BLOCKS_TOTAL,
+ numblocks);
+
/*
* We are looping over the blocks which existed at the time of process
* start, which is safe since new blocks are created with checksums set
@@ -450,6 +455,11 @@ ProcessSingleRelationFork(Relation reln, ForkNumber forkNum, BufferAccessStrateg
if (abort_requested)
return false;
+ /* XXX only do this for main forks, maybe we should do it for all? */
+ if (forkNum == MAIN_FORKNUM)
+ pgstat_progress_update_param(PROGRESS_DATACHECKSUMS_BLOCKS_DONE,
+ (blknum + 1));
+
vacuum_delay_point(false);
}
@@ -798,6 +808,8 @@ again:
pgstat_progress_update_param(PROGRESS_DATACHECKSUMS_PHASE,
PROGRESS_DATACHECKSUMS_PHASE_WAITING_BACKENDS);
+ /* XXX isn't it weird there's no wait between the phase updates? */
+
/*
* Set the state to inprogress-on and wait on the procsignal barrier.
*/
@@ -908,8 +920,29 @@ ProcessAllDatabases(bool immediate_checkpoint)
* columns for processed databases is instead increased such that it can
* be compared against the total.
*/
- pgstat_progress_update_param(PROGRESS_DATACHECKSUMS_TOTAL_DB,
- list_length(DatabaseList));
+ {
+ const int index[] = {
+ PROGRESS_DATACHECKSUMS_DBS_TOTAL,
+ PROGRESS_DATACHECKSUMS_DBS_DONE,
+ PROGRESS_DATACHECKSUMS_RELS_TOTAL,
+ PROGRESS_DATACHECKSUMS_RELS_DONE,
+ PROGRESS_DATACHECKSUMS_BLOCKS_TOTAL,
+ PROGRESS_DATACHECKSUMS_BLOCKS_DONE,
+ };
+
+ int64 vals[6];
+
+ vals[0] = list_length(DatabaseList);
+ vals[1] = 0;
+
+ /* translated to NULL */
+ vals[2] = -1;
+ vals[3] = -1;
+ vals[4] = -1;
+ vals[5] = -1;
+
+ pgstat_progress_update_multi_param(6, index, vals);
+ }
while (true)
{
@@ -921,14 +954,6 @@ ProcessAllDatabases(bool immediate_checkpoint)
DataChecksumsWorkerResultEntry *entry;
bool found;
- /*
- * Indicate which database is being processed set the number of
- * relations to -1 to clear field from previous values. -1 will
- * translate to NULL in the progress view.
- */
- pgstat_progress_update_param(PROGRESS_DATACHECKSUMS_CUR_DB, db->dboid);
- pgstat_progress_update_param(PROGRESS_DATACHECKSUMS_TOTAL_REL, -1);
-
/*
* Check if this database has been processed already, and if so
* whether it should be retried or skipped.
@@ -957,6 +982,12 @@ ProcessAllDatabases(bool immediate_checkpoint)
result = ProcessDatabase(db);
processed_databases++;
+ /*
+ * Update the number of processed databases in the progress report.
+ */
+ pgstat_progress_update_param(PROGRESS_DATACHECKSUMS_DBS_DONE,
+ processed_databases);
+
if (result == DATACHECKSUMSWORKER_SUCCESSFUL)
{
/*
@@ -1050,6 +1081,13 @@ ProcessAllDatabases(bool immediate_checkpoint)
errhint("The server log might have more information on the cause of the error.")));
}
+ /*
+ * When enabling checksums, we have to wait for a checkpoint for the
+ * checksums to e.
+ */
+ pgstat_progress_update_param(PROGRESS_DATACHECKSUMS_PHASE,
+ PROGRESS_DATACHECKSUMS_PHASE_WAITING_CHECKPOINT);
+
/*
* Force a checkpoint to get everything out to disk. The use of immediate
* checkpoints is for running tests, as they would otherwise not execute
@@ -1255,6 +1293,7 @@ DataChecksumsWorkerMain(Datum arg)
List *InitialTempTableList = NIL;
BufferAccessStrategy strategy;
bool aborted = false;
+ int64 rels_done;
enabling_checksums = true;
@@ -1268,6 +1307,10 @@ DataChecksumsWorkerMain(Datum arg)
BackgroundWorkerInitializeConnectionByOid(dboid, InvalidOid,
BGWORKER_BYPASS_ALLOWCONN);
+ /* worker will have a separate entry in pg_stat_progress_data_checksums */
+ pgstat_progress_start_command(PROGRESS_COMMAND_DATACHECKSUMS,
+ InvalidOid);
+
/*
* Get a list of all temp tables present as we start in this database. We
* need to wait until they are all gone until we are done, since we cannot
@@ -1294,6 +1337,24 @@ DataChecksumsWorkerMain(Datum arg)
RelationList = BuildRelationList(false,
DataChecksumsWorkerShmem->process_shared_catalogs);
+
+ /* Update the total number of relations to be processed in this DB. */
+ {
+ const int index[] = {
+ PROGRESS_DATACHECKSUMS_RELS_TOTAL,
+ PROGRESS_DATACHECKSUMS_RELS_DONE
+ };
+
+ int64 vals[2];
+
+ vals[0] = list_length(RelationList);
+ vals[1] = 0;
+
+ pgstat_progress_update_multi_param(2, index, vals);
+ }
+
+ /* process the relations */
+ rels_done = 0;
foreach_oid(reloid, RelationList)
{
if (!ProcessSingleRelationByOid(reloid, strategy))
@@ -1301,6 +1362,9 @@ DataChecksumsWorkerMain(Datum arg)
aborted = true;
break;
}
+
+ pgstat_progress_update_param(PROGRESS_DATACHECKSUMS_RELS_DONE,
+ ++rels_done);
}
list_free(RelationList);
@@ -1313,6 +1377,10 @@ DataChecksumsWorkerMain(Datum arg)
return;
}
+ /* The worker is about to wait for temporary tables to go away. */
+ pgstat_progress_update_param(PROGRESS_DATACHECKSUMS_PHASE,
+ PROGRESS_DATACHECKSUMS_PHASE_WAITING_TEMPREL);
+
/*
* Wait for all temp tables that existed when we started to go away. This
* is necessary since we cannot "reach" them to enable checksums. Any temp
@@ -1369,5 +1437,8 @@ DataChecksumsWorkerMain(Datum arg)
list_free(InitialTempTableList);
+ /* worker done */
+ pgstat_progress_end_command();
+
DataChecksumsWorkerShmem->success = DATACHECKSUMSWORKER_SUCCESSFUL;
}
diff --git a/src/include/commands/progress.h b/src/include/commands/progress.h
index 1ebd0c792b4..94b478a6cc9 100644
--- a/src/include/commands/progress.h
+++ b/src/include/commands/progress.h
@@ -158,21 +158,20 @@
#define PROGRESS_COPY_TYPE_CALLBACK 4
/* Progress parameters for PROGRESS_DATACHECKSUMS */
-#define PROGRESS_DATACHECKSUMS_PHASE 0
-#define PROGRESS_DATACHECKSUMS_TOTAL_DB 1
-#define PROGRESS_DATACHECKSUMS_TOTAL_REL 2
-#define PROGRESS_DATACHECKSUMS_PROCESSED_DB 3
-#define PROGRESS_DATACHECKSUMS_PROCESSED_REL 4
-#define PROGRESS_DATACHECKSUMS_CUR_DB 5
-#define PROGRESS_DATACHECKSUMS_CUR_REL 6
-#define PROGRESS_DATACHECKSUMS_CUR_REL_TOTAL_BLOCKS 7
-#define PROGRESS_DATACHECKSUMS_CUR_REL_PROCESSED_BLOCKS 8
+#define PROGRESS_DATACHECKSUMS_PHASE 0
+#define PROGRESS_DATACHECKSUMS_DBS_TOTAL 1
+#define PROGRESS_DATACHECKSUMS_DBS_DONE 2
+#define PROGRESS_DATACHECKSUMS_RELS_TOTAL 3
+#define PROGRESS_DATACHECKSUMS_RELS_DONE 4
+#define PROGRESS_DATACHECKSUMS_BLOCKS_TOTAL 5
+#define PROGRESS_DATACHECKSUMS_BLOCKS_DONE 6
/* Phases of datachecksumsworker operation */
-#define PROGRESS_DATACHECKSUMS_PHASE_ENABLING 0
-#define PROGRESS_DATACHECKSUMS_PHASE_DISABLING 1
-#define PROGRESS_DATACHECKSUMS_PHASE_WAITING_BACKENDS 2
-#define PROGRESS_DATACHECKSUMS_PHASE_WAITING_TEMPREL 3
-#define PROGRESS_DATACHECKSUMS_DONE 4
+#define PROGRESS_DATACHECKSUMS_PHASE_ENABLING 0
+#define PROGRESS_DATACHECKSUMS_PHASE_DISABLING 1
+#define PROGRESS_DATACHECKSUMS_PHASE_WAITING 2
+#define PROGRESS_DATACHECKSUMS_PHASE_WAITING_BACKENDS 3
+#define PROGRESS_DATACHECKSUMS_PHASE_WAITING_TEMPREL 4
+#define PROGRESS_DATACHECKSUMS_PHASE_WAITING_CHECKPOINT 5
#endif
diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out
index 36bed4b168d..2cfea837554 100644
--- a/src/test/regress/expected/rules.out
+++ b/src/test/regress/expected/rules.out
@@ -2050,25 +2050,34 @@ pg_stat_progress_data_checksums| SELECT s.pid,
WHEN 2 THEN 'waiting'::text
WHEN 3 THEN 'waiting on backends'::text
WHEN 4 THEN 'waiting on temporary tables'::text
- WHEN 5 THEN 'done'::text
+ WHEN 5 THEN 'waiting on checkpoint'::text
+ WHEN 6 THEN 'done'::text
ELSE NULL::text
END AS phase,
CASE s.param2
WHEN '-1'::integer THEN NULL::bigint
ELSE s.param2
END AS databases_total,
- CASE s.param3
+ s.param3 AS databases_done,
+ CASE s.param4
WHEN '-1'::integer THEN NULL::bigint
- ELSE s.param3
+ ELSE s.param4
END AS relations_total,
- s.param4 AS databases_processed,
- s.param5 AS relations_processed,
- s.param6 AS databases_current,
- s.param7 AS relation_current,
- s.param8 AS relation_current_blocks,
- s.param9 AS relation_current_blocks_processed
+ CASE s.param5
+ WHEN '-1'::integer THEN NULL::bigint
+ ELSE s.param5
+ END AS relations_done,
+ CASE s.param6
+ WHEN '-1'::integer THEN NULL::bigint
+ ELSE s.param6
+ END AS blocks_total,
+ CASE s.param7
+ WHEN '-1'::integer THEN NULL::bigint
+ ELSE s.param7
+ END AS blocks_done
FROM (pg_stat_get_progress_info('DATACHECKSUMS'::text) s(pid, datid, relid, param1, param2, param3, param4, param5, param6, param7, param8, param9, param10, param11, param12, param13, param14, param15, param16, param17, param18, param19, param20)
- LEFT JOIN pg_database d ON ((s.datid = d.oid)));
+ LEFT JOIN pg_database d ON ((s.datid = d.oid)))
+ ORDER BY s.datid;
pg_stat_progress_vacuum| SELECT s.pid,
s.datid,
d.datname,
--
2.48.1