v7-0005-Include-Extended-Statistics-in-pg_dump.patch
text/x-patch
Filename: v7-0005-Include-Extended-Statistics-in-pg_dump.patch
Type: text/x-patch
Part: 4
Patch
Same data as JSON:
GET /api/v1/attachments/:id/patch
the parsed metadata as JSON — format, series position, per-file stats; never the diff bytes.
API reference →
Format: format-patch
Series: patch v7-0005
Subject: Include Extended Statistics in pg_dump.
| File | + | − |
|---|---|---|
| src/bin/pg_dump/pg_backup_archiver.c | 2 | 1 |
| src/bin/pg_dump/pg_backup.h | 1 | 0 |
| src/bin/pg_dump/pg_dump.c | 229 | 0 |
| src/bin/pg_dump/t/002_pg_dump.pl | 28 | 0 |
From 7041a6333c9ad273d16cf844cf9260dd4e795fb8 Mon Sep 17 00:00:00 2001
From: Corey Huinker <corey.huinker@gmail.com>
Date: Sat, 21 Jun 2025 03:16:24 -0400
Subject: [PATCH v7 5/5] Include Extended Statistics in pg_dump.
Incorporate the new pg_restore_extended_stats() function into pg_dump.
This detects the existence of extended statistics statistics (i.e.
pg_statistic_ext_data rows).
This handles many of the changes that have happened to extended
statistic statistics over the various versions, including:
* Format change for pg_ndistinct and pg_dependencies in current
development version. Earlier versions have the format translated via
the pg_dump SQL statement.
* Inherited extended statistics were introduced in v15.
* Expressions were introduced to extended statistics in v14.
* MCV extended statistics were introduced in v13.
* pg_statistic_ext_data and pg_stats_ext introduced in v12, prior to
that ndstinct and depdendencies data (the only kind of stats that
existed were directly on pg_statistic_ext.
* Extended Statistics were introduced in v10, so there is no support for
prior versions necessary.
---
src/bin/pg_dump/pg_backup.h | 1 +
src/bin/pg_dump/pg_backup_archiver.c | 3 +-
src/bin/pg_dump/pg_dump.c | 229 +++++++++++++++++++++++++++
src/bin/pg_dump/t/002_pg_dump.pl | 28 ++++
4 files changed, 260 insertions(+), 1 deletion(-)
diff --git a/src/bin/pg_dump/pg_backup.h b/src/bin/pg_dump/pg_backup.h
index d9041dad720..df708e4ced6 100644
--- a/src/bin/pg_dump/pg_backup.h
+++ b/src/bin/pg_dump/pg_backup.h
@@ -68,6 +68,7 @@ enum _dumpPreparedQueries
PREPQUERY_DUMPCOMPOSITETYPE,
PREPQUERY_DUMPDOMAIN,
PREPQUERY_DUMPENUMTYPE,
+ PREPQUERY_DUMPEXTSTATSSTATS,
PREPQUERY_DUMPFUNC,
PREPQUERY_DUMPOPR,
PREPQUERY_DUMPRANGETYPE,
diff --git a/src/bin/pg_dump/pg_backup_archiver.c b/src/bin/pg_dump/pg_backup_archiver.c
index 59eaecb4ed7..1bfd296e0ee 100644
--- a/src/bin/pg_dump/pg_backup_archiver.c
+++ b/src/bin/pg_dump/pg_backup_archiver.c
@@ -3008,7 +3008,8 @@ _tocEntryRequired(TocEntry *te, teSection curSection, ArchiveHandle *AH)
strcmp(te->desc, "SEARCHPATH") == 0)
return REQ_SPECIAL;
- if (strcmp(te->desc, "STATISTICS DATA") == 0)
+ if ((strcmp(te->desc, "STATISTICS DATA") == 0) ||
+ (strcmp(te->desc, "EXTENDED STATISTICS DATA") == 0))
{
if (!ropt->dumpStatistics)
return 0;
diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index 4b8cd49df09..5b2f3c1fbef 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -324,6 +324,7 @@ static void dumpSequenceData(Archive *fout, const TableDataInfo *tdinfo);
static void dumpIndex(Archive *fout, const IndxInfo *indxinfo);
static void dumpIndexAttach(Archive *fout, const IndexAttachInfo *attachinfo);
static void dumpStatisticsExt(Archive *fout, const StatsExtInfo *statsextinfo);
+static void dumpStatisticsExtStats(Archive *fout, const StatsExtInfo *statsextinfo);
static void dumpConstraint(Archive *fout, const ConstraintInfo *coninfo);
static void dumpTableConstraintComment(Archive *fout, const ConstraintInfo *coninfo);
static void dumpTSParser(Archive *fout, const TSParserInfo *prsinfo);
@@ -8258,6 +8259,9 @@ getExtendedStatistics(Archive *fout)
/* Decide whether we want to dump it */
selectDumpableStatisticsObject(&(statsextinfo[i]), fout);
+
+ if (fout->dopt->dumpStatistics)
+ statsextinfo[i].dobj.components |= DUMP_COMPONENT_STATISTICS;
}
PQclear(res);
@@ -11712,6 +11716,7 @@ dumpDumpableObject(Archive *fout, DumpableObject *dobj)
break;
case DO_STATSEXT:
dumpStatisticsExt(fout, (const StatsExtInfo *) dobj);
+ dumpStatisticsExtStats(fout, (const StatsExtInfo *) dobj);
break;
case DO_REFRESH_MATVIEW:
refreshMatViewData(fout, (const TableDataInfo *) dobj);
@@ -18515,6 +18520,230 @@ dumpStatisticsExt(Archive *fout, const StatsExtInfo *statsextinfo)
free(qstatsextname);
}
+/*
+ * dumpStatisticsExtStats
+ * write out to fout the stats for an extended statistics object
+ */
+static void
+dumpStatisticsExtStats(Archive *fout, const StatsExtInfo *statsextinfo)
+{
+ DumpOptions *dopt = fout->dopt;
+ PQExpBuffer query;
+ PGresult *res;
+ int nstats;
+
+ /* Do nothing if not dumping statistics */
+ if (!dopt->dumpStatistics)
+ return;
+
+ if (!fout->is_prepared[PREPQUERY_DUMPEXTSTATSSTATS])
+ {
+ PQExpBuffer pq = createPQExpBuffer();
+
+ /*
+ * Set up query for constraint-specific details.
+ *
+ * 19+: query pg_stats_ext and pg_stats_ext_exprs as-is 15-18: query
+ * pg_stats_ext translating the ndistinct and depdendencies, 14:
+ * inherited is always NULL 12-13: no pg_stats_ext_exprs 10-11: no
+ * pg_stats_ext, join pg_statistic_ext and pg_namespace
+ */
+
+ appendPQExpBufferStr(pq,
+ "PREPARE getExtStatsStats(pg_catalog.name, pg_catalog.name) AS\n"
+ "SELECT ");
+
+ /* Versions 15+ have inherited stats */
+ if (fout->remoteVersion >= 150000)
+ appendPQExpBufferStr(pq, "e.inherited, ");
+ else
+ appendPQExpBufferStr(pq, "false AS inherited, ");
+
+ /*
+ * Versions < 19 use the old ndistintinct and depdendencies formats
+ * Versions < 12 use the pg_statistic_ext columns
+ *
+ * TODO: Until v18 is released the master branch has a
+ * server_version_num of 180000. We will update this to 190000 as soon
+ * as the master branch updates.
+ */
+ if (fout->remoteVersion >= 180000)
+ appendPQExpBufferStr(pq, "e.n_distinct, e.dependencies, ");
+ else
+ appendPQExpBufferStr(pq,
+ "( "
+ "SELECT json_agg( "
+ " json_build_object( "
+ " 'attributes', "
+ " string_to_array(kv.key, ', ')::integer[], "
+ " 'ndistinct', "
+ " kv.value::bigint )) "
+ "FROM json_each_text(e.n_distinct::text::json) AS kv"
+ ") AS n_distinct, "
+ "( "
+ "SELECT json_agg( "
+ " json_build_object( "
+ " 'attributes', "
+ " string_to_array( "
+ " split_part(kv.key, ' => ', 1), "
+ " ', ')::integer[], "
+ " 'dependency', "
+ " split_part(kv.key, ' => ', 2)::integer, "
+ " 'degree', "
+ " kv.value::double precision )) "
+ "FROM json_each_text(e.dependencies::text::json) AS kv "
+ ") AS dependencies, ");
+
+ /* Versions < 12 do not have MCV */
+ if (fout->remoteVersion >= 130000)
+ appendPQExpBufferStr(pq,
+ "e.most_common_vals, e.most_common_val_nulls, "
+ "e.most_common_freqs, e.most_common_base_freqs, ");
+ else
+ appendPQExpBufferStr(pq,
+ "NULL AS most_common_vals, NULL AS most_common_val_nulls, "
+ "NULL AS most_common_freqs, NULL AS most_common_base_freqs, ");
+
+ /* Expressions were introduced in v14 */
+ if (fout->remoteVersion >= 140000)
+ {
+ appendPQExpBufferStr(pq,
+ "( "
+ "SELECT array_agg( "
+ " ARRAY[ee.null_frac::text, ee.avg_width::text, "
+ " ee.n_distinct::text, ee.most_common_vals::text, "
+ " ee.most_common_freqs::text, ee.histogram_bounds::text, "
+ " ee.correlation::text, ee.most_common_elems::text, "
+ " ee.most_common_elem_freqs::text, "
+ " ee.elem_count_histogram::text]) "
+ "FROM pg_stats_ext_exprs AS ee "
+ "WHERE ee.statistics_schemaname = $1 "
+ "AND ee.statistics_name = $2 ");
+
+ /* Inherited expressions introduced in v15 */
+ if (fout->remoteVersion >= 150000)
+ appendPQExpBufferStr(pq, "AND ee.inherited = e.inherited");
+
+ appendPQExpBufferStr(pq, ") AS exprs ");
+ }
+ else
+ appendPQExpBufferStr(pq, "NULL AS exprs ");
+
+ /* pg_stats_ext introduced in v12 */
+ if (fout->remoteVersion >= 120000)
+ appendPQExpBufferStr(pq,
+ "FROM pg_catalog.pg_stats_ext AS e "
+ "WHERE e.statistics_schemaname = $1 "
+ "AND e.statistics_name = $2 ");
+ else
+ appendPQExpBufferStr(pq,
+ "FROM ( "
+ "SELECT s.stxndistinct AS n_distinct, "
+ " s.stxdependencies AS dependencies "
+ "FROM pg_catalog.pg_statistics_ext AS s "
+ "JOIN pg_catalog.pg_namespace AS n "
+ "ON n.oid = s.stxnamespace "
+ "WHERE n.nspname = $1 "
+ "AND e.stxname = $2 "
+ ") AS e ");
+
+ appendPQExpBufferStr(pq, "ORDER BY e.inherited");
+
+ ExecuteSqlStatement(fout, pq->data);
+
+ fout->is_prepared[PREPQUERY_DUMPEXTSTATSSTATS] = true;
+
+ destroyPQExpBuffer(pq);
+ }
+
+ query = createPQExpBuffer();
+
+ appendPQExpBufferStr(query, "EXECUTE getExtStatsStats(");
+ appendStringLiteralAH(query, statsextinfo->dobj.namespace->dobj.name, fout);
+ appendPQExpBufferStr(query, "::pg_catalog.name, ");
+ appendStringLiteralAH(query, statsextinfo->dobj.name, fout);
+ appendPQExpBufferStr(query, "::pg_catalog.name)");
+
+ res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK);
+
+ destroyPQExpBuffer(query);
+
+ nstats = PQntuples(res);
+
+ if (nstats > 0)
+ {
+ PQExpBuffer out = createPQExpBuffer();
+
+ int i_inherited = PQfnumber(res, "inherited");
+ int i_ndistinct = PQfnumber(res, "n_distinct");
+ int i_dependencies = PQfnumber(res, "dependencies");
+ int i_mcv = PQfnumber(res, "most_common_vals");
+ int i_mcv_nulls = PQfnumber(res, "most_common_val_nulls");
+ int i_mcf = PQfnumber(res, "most_common_freqs");
+ int i_mcbf = PQfnumber(res, "most_common_base_freqs");
+ int i_exprs = PQfnumber(res, "exprs");
+
+ for (int i = 0; i < nstats; i++)
+ {
+ if (PQgetisnull(res, i, i_inherited))
+ pg_fatal("inherited cannot be NULL");
+
+ appendPQExpBufferStr(out,
+ "SELECT * FROM pg_catalog.pg_restore_extended_stats(\n");
+ appendPQExpBuffer(out, "\t'version', '%d'::integer,\n",
+ fout->remoteVersion);
+ appendPQExpBufferStr(out, "\t'statistics_schemaname', ");
+ appendStringLiteralAH(out, statsextinfo->dobj.namespace->dobj.name, fout);
+ appendPQExpBufferStr(out, ",\n\t'statistics_name', ");
+ appendStringLiteralAH(out, statsextinfo->dobj.name, fout);
+ appendNamedArgument(out, fout, "inherited", "boolean",
+ PQgetvalue(res, i, i_inherited));
+
+ if (!PQgetisnull(res, i, i_ndistinct))
+ appendNamedArgument(out, fout, "n_distinct", "pg_ndistinct",
+ PQgetvalue(res, i, i_ndistinct));
+
+ if (!PQgetisnull(res, i, i_dependencies))
+ appendNamedArgument(out, fout, "dependencies", "pg_dependencies",
+ PQgetvalue(res, i, i_dependencies));
+
+ if (!PQgetisnull(res, i, i_mcv))
+ appendNamedArgument(out, fout, "most_common_vals", "text[]",
+ PQgetvalue(res, i, i_mcv));
+
+ if (!PQgetisnull(res, i, i_mcv_nulls))
+ appendNamedArgument(out, fout, "most_common_val_nulls", "boolean[]",
+ PQgetvalue(res, i, i_mcv_nulls));
+
+ if (!PQgetisnull(res, i, i_mcf))
+ appendNamedArgument(out, fout, "most_common_freqs", "double precision[]",
+ PQgetvalue(res, i, i_mcf));
+
+ if (!PQgetisnull(res, i, i_mcbf))
+ appendNamedArgument(out, fout, "most_common_base_freqs", "double precision[]",
+ PQgetvalue(res, i, i_mcbf));
+
+ if (!PQgetisnull(res, i, i_exprs))
+ appendNamedArgument(out, fout, "exprs", "text[]",
+ PQgetvalue(res, i, i_exprs));
+
+ appendPQExpBufferStr(out, "\n);\n");
+ }
+
+ ArchiveEntry(fout, nilCatalogId, createDumpId(),
+ ARCHIVE_OPTS(.tag = statsextinfo->dobj.name,
+ .namespace = statsextinfo->dobj.namespace->dobj.name,
+ .owner = statsextinfo->rolname,
+ .description = "EXTENDED STATISTICS DATA",
+ .section = SECTION_POST_DATA,
+ .createStmt = out->data,
+ .deps = &statsextinfo->dobj.dumpId,
+ .nDeps = 1));
+ destroyPQExpBuffer(out);
+ }
+ PQclear(res);
+}
+
/*
* dumpConstraint
* write out to fout a user-defined constraint
diff --git a/src/bin/pg_dump/t/002_pg_dump.pl b/src/bin/pg_dump/t/002_pg_dump.pl
index 445a541abf6..6681265974f 100644
--- a/src/bin/pg_dump/t/002_pg_dump.pl
+++ b/src/bin/pg_dump/t/002_pg_dump.pl
@@ -4772,6 +4772,34 @@ my %tests = (
},
},
+ #
+ # EXTENDED stats will end up in SECTION_POST_DATA.
+ #
+ 'extended_statistics_import' => {
+ create_sql => '
+ CREATE TABLE dump_test.has_ext_stats
+ AS SELECT g.g AS x, g.g / 2 AS y FROM generate_series(1,100) AS g(g);
+ CREATE STATISTICS dump_test.es1 ON x, (y % 2) FROM dump_test.has_ext_stats;
+ ANALYZE dump_test.has_ext_stats;',
+ regexp => qr/^
+ \QSELECT * FROM pg_catalog.pg_restore_extended_stats(\E\s+/xm,
+ like => {
+ %full_runs,
+ %dump_test_schema_runs,
+ no_data_no_schema => 1,
+ no_schema => 1,
+ section_post_data => 1,
+ statistics_only => 1,
+ schema_only_with_statistics => 1,
+ },
+ unlike => {
+ exclude_dump_test_schema => 1,
+ no_statistics => 1,
+ only_dump_measurement => 1,
+ schema_only => 1,
+ },
+ },
+
#
# While attribute stats (aka pg_statistic stats) only appear for tables
# that have been analyzed, all tables will have relation stats because
--
2.51.0