v11-0003-Refactor-output-format-of-pg_ndistinct.patch

text/x-patch
Filename: v11-0003-Refactor-output-format-of-pg_ndistinct.patch
Type: text/x-patch
Part: 2
Message: Re: Extended Statistics set/restore/clear functions.
Patch

Same data as JSON: GET /api/v1/attachments/:id/patch the parsed metadata as JSON — format, series position, per-file stats; never the diff bytes. API reference →
Format: format-patch
Series: patch v11-0003
Subject: Refactor output format of pg_ndistinct.
File	+	−
doc/src/sgml/perform.sgml	32	2
src/backend/utils/adt/pg_ndistinct.c	13	9
src/include/statistics/statistics_format.h	30	0
src/test/regress/expected/stats_ext.out	138	18
src/test/regress/sql/stats_ext.sql	6	6
From 8b08131119ed8ca50dcdb8ac84bd7d8216963b2c Mon Sep 17 00:00:00 2001
From: Michael Paquier <michael@paquier.xyz>
Date: Tue, 11 Nov 2025 14:05:49 +0900
Subject: [PATCH v11 3/9] Refactor output format of pg_ndistinct.

The existing format of pg_ndistinct uses a single-object JSON structure
where each key is itself a comma-separated list of attnums. While this
is a very compact format, it's confusing to read and is difficult to
manipulate values within the object. This wasn't a concern until
statistics import functions were introduced, enabling users to inject
hypothetical statistics into an object to observe their effect on the
query planner.

The new format is an array of objects, each object must have the keys
"attributes", which must contain an array of attnums, and "ndistinct",
which must be an integer. This is a quirk because the underlying
internal storage is a double, but the value stored was always an
integer.

The change in format is described from the changes to
src/test/regress/expected/stats_ext.out.
---
 src/include/statistics/statistics_format.h |  30 ++++
 src/backend/utils/adt/pg_ndistinct.c       |  22 +--
 src/test/regress/expected/stats_ext.out    | 156 ++++++++++++++++++---
 src/test/regress/sql/stats_ext.sql         |  12 +-
 doc/src/sgml/perform.sgml                  |  34 ++++-
 5 files changed, 219 insertions(+), 35 deletions(-)
 create mode 100644 src/include/statistics/statistics_format.h

diff --git a/src/include/statistics/statistics_format.h b/src/include/statistics/statistics_format.h
new file mode 100644
index 00000000000..ba97c0880be
--- /dev/null
+++ b/src/include/statistics/statistics_format.h
@@ -0,0 +1,30 @@
+/*-------------------------------------------------------------------------
+ *
+ * statistics_format.h
+ *	  Data related to the format of extended statistics, usable by both
+ *	  frontend and backend code.
+ *
+ * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/statistics/statistics_format.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef STATISTICS_FORMAT_H
+#define STATISTICS_FORMAT_H
+
+/* ----------
+ * pg_ndistinct in human-readable format is a JSON array made of elements with
+ * a predefined set of keys, like:
+ *
+ * [{"ndistinct": 11, "attributes": [3,4]},
+ *  {"ndistinct": 11, "attributes": [3,6]},
+ *  {"ndistinct": 11, "attributes": [4,6]},
+ *  {"ndistinct": 11, "attributes": [3,4,6]}]
+ * ----------
+ */
+#define PG_NDISTINCT_KEY_ATTRIBUTES	"attributes"
+#define PG_NDISTINCT_KEY_NDISTINCT	"ndistinct"
+
+#endif							/* STATISTICS_FORMAT_H */
diff --git a/src/backend/utils/adt/pg_ndistinct.c b/src/backend/utils/adt/pg_ndistinct.c
index 5ce655bce75..56b17758ea5 100644
--- a/src/backend/utils/adt/pg_ndistinct.c
+++ b/src/backend/utils/adt/pg_ndistinct.c
@@ -16,6 +16,7 @@
 
 #include "lib/stringinfo.h"
 #include "statistics/extended_stats_internal.h"
+#include "statistics/statistics_format.h"
 #include "utils/fmgrprotos.h"
 
 
@@ -51,26 +52,29 @@ pg_ndistinct_out(PG_FUNCTION_ARGS)
 	StringInfoData str;
 
 	initStringInfo(&str);
-	appendStringInfoChar(&str, '{');
+	appendStringInfoChar(&str, '[');
 
 	for (i = 0; i < ndist->nitems; i++)
 	{
-		int			j;
 		MVNDistinctItem item = ndist->items[i];
 
 		if (i > 0)
 			appendStringInfoString(&str, ", ");
 
-		for (j = 0; j < item.nattributes; j++)
-		{
-			AttrNumber	attnum = item.attributes[j];
+		if (item.nattributes <= 0)
+			elog(ERROR, "invalid zero-length attribute array in MVNDistinct");
 
-			appendStringInfo(&str, "%s%d", (j == 0) ? "\"" : ", ", attnum);
-		}
-		appendStringInfo(&str, "\": %d", (int) item.ndistinct);
+		appendStringInfo(&str, "{\"" PG_NDISTINCT_KEY_ATTRIBUTES "\": [%d",
+						 item.attributes[0]);
+
+		for (int j = 1; j < item.nattributes; j++)
+			appendStringInfo(&str, ", %d", item.attributes[j]);
+
+		appendStringInfo(&str, "], \"" PG_NDISTINCT_KEY_NDISTINCT "\": %d}",
+						 (int) item.ndistinct);
 	}
 
-	appendStringInfoChar(&str, '}');
+	appendStringInfoChar(&str, ']');
 
 	PG_RETURN_CSTRING(str.data);
 }
diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out
index 495a1b35018..e9379afe39e 100644
--- a/src/test/regress/expected/stats_ext.out
+++ b/src/test/regress/expected/stats_ext.out
@@ -196,7 +196,7 @@ Statistics objects:
     "public.ab1_a_b_stats" ON a, b FROM ab1; STATISTICS 0
 
 ANALYZE ab1;
-SELECT stxname, stxdndistinct, stxddependencies, stxdmcv, stxdinherit
+SELECT stxname, jsonb_pretty(d.stxdndistinct::text::jsonb) AS stxdndistinct, stxddependencies, stxdmcv, stxdinherit
   FROM pg_statistic_ext s LEFT JOIN pg_statistic_ext_data d ON (d.stxoid = s.oid)
  WHERE s.stxname = 'ab1_a_b_stats';
     stxname    | stxdndistinct | stxddependencies | stxdmcv | stxdinherit 
@@ -476,13 +476,43 @@ SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (
 -- correct command
 CREATE STATISTICS s10 ON a, b, c FROM ndistinct;
 ANALYZE ndistinct;
-SELECT s.stxkind, d.stxdndistinct
+SELECT s.stxkind, jsonb_pretty(d.stxdndistinct::text::jsonb) AS stxdndistinct
   FROM pg_statistic_ext s, pg_statistic_ext_data d
  WHERE s.stxrelid = 'ndistinct'::regclass
    AND d.stxoid = s.oid;
- stxkind |                    stxdndistinct                    
----------+-----------------------------------------------------
- {d,f,m} | {"3, 4": 11, "3, 6": 11, "4, 6": 11, "3, 4, 6": 11}
+ stxkind |      stxdndistinct       
+---------+--------------------------
+ {d,f,m} | [                       +
+         |     {                   +
+         |         "ndistinct": 11,+
+         |         "attributes": [ +
+         |             3,          +
+         |             4           +
+         |         ]               +
+         |     },                  +
+         |     {                   +
+         |         "ndistinct": 11,+
+         |         "attributes": [ +
+         |             3,          +
+         |             6           +
+         |         ]               +
+         |     },                  +
+         |     {                   +
+         |         "ndistinct": 11,+
+         |         "attributes": [ +
+         |             4,          +
+         |             6           +
+         |         ]               +
+         |     },                  +
+         |     {                   +
+         |         "ndistinct": 11,+
+         |         "attributes": [ +
+         |             3,          +
+         |             4,          +
+         |             6           +
+         |         ]               +
+         |     }                   +
+         | ]
 (1 row)
 
 -- minor improvement, make sure the ctid does not break the matching
@@ -558,13 +588,43 @@ INSERT INTO ndistinct (a, b, c, filler1)
             mod(i,23) || ' dollars and zero cents'
        FROM generate_series(1,1000) s(i);
 ANALYZE ndistinct;
-SELECT s.stxkind, d.stxdndistinct
+SELECT s.stxkind, jsonb_pretty(d.stxdndistinct::text::jsonb) AS stxdndistinct
   FROM pg_statistic_ext s, pg_statistic_ext_data d
  WHERE s.stxrelid = 'ndistinct'::regclass
    AND d.stxoid = s.oid;
- stxkind |                      stxdndistinct                       
----------+----------------------------------------------------------
- {d,f,m} | {"3, 4": 221, "3, 6": 247, "4, 6": 323, "3, 4, 6": 1000}
+ stxkind |       stxdndistinct        
+---------+----------------------------
+ {d,f,m} | [                         +
+         |     {                     +
+         |         "ndistinct": 221, +
+         |         "attributes": [   +
+         |             3,            +
+         |             4             +
+         |         ]                 +
+         |     },                    +
+         |     {                     +
+         |         "ndistinct": 247, +
+         |         "attributes": [   +
+         |             3,            +
+         |             6             +
+         |         ]                 +
+         |     },                    +
+         |     {                     +
+         |         "ndistinct": 323, +
+         |         "attributes": [   +
+         |             4,            +
+         |             6             +
+         |         ]                 +
+         |     },                    +
+         |     {                     +
+         |         "ndistinct": 1000,+
+         |         "attributes": [   +
+         |             3,            +
+         |             4,            +
+         |             6             +
+         |         ]                 +
+         |     }                     +
+         | ]
 (1 row)
 
 -- correct estimates
@@ -623,7 +683,7 @@ SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (
 (1 row)
 
 DROP STATISTICS s10;
-SELECT s.stxkind, d.stxdndistinct
+SELECT s.stxkind, jsonb_pretty(d.stxdndistinct::text::jsonb) AS stxdndistinct
   FROM pg_statistic_ext s, pg_statistic_ext_data d
  WHERE s.stxrelid = 'ndistinct'::regclass
    AND d.stxoid = s.oid;
@@ -707,13 +767,43 @@ SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (
 
 CREATE STATISTICS s10 (ndistinct) ON (a+1), (b+100), (2*c) FROM ndistinct;
 ANALYZE ndistinct;
-SELECT s.stxkind, d.stxdndistinct
+SELECT s.stxkind, jsonb_pretty(d.stxdndistinct::text::jsonb) AS stxdndistinct
   FROM pg_statistic_ext s, pg_statistic_ext_data d
  WHERE s.stxrelid = 'ndistinct'::regclass
    AND d.stxoid = s.oid;
- stxkind |                           stxdndistinct                           
----------+-------------------------------------------------------------------
- {d,e}   | {"-1, -2": 221, "-1, -3": 247, "-2, -3": 323, "-1, -2, -3": 1000}
+ stxkind |       stxdndistinct        
+---------+----------------------------
+ {d,e}   | [                         +
+         |     {                     +
+         |         "ndistinct": 221, +
+         |         "attributes": [   +
+         |             -1,           +
+         |             -2            +
+         |         ]                 +
+         |     },                    +
+         |     {                     +
+         |         "ndistinct": 247, +
+         |         "attributes": [   +
+         |             -1,           +
+         |             -3            +
+         |         ]                 +
+         |     },                    +
+         |     {                     +
+         |         "ndistinct": 323, +
+         |         "attributes": [   +
+         |             -2,           +
+         |             -3            +
+         |         ]                 +
+         |     },                    +
+         |     {                     +
+         |         "ndistinct": 1000,+
+         |         "attributes": [   +
+         |             -1,           +
+         |             -2,           +
+         |             -3            +
+         |         ]                 +
+         |     }                     +
+         | ]
 (1 row)
 
 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a+1), (b+100)');
@@ -756,13 +846,43 @@ SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b
 
 CREATE STATISTICS s10 (ndistinct) ON a, b, (2*c) FROM ndistinct;
 ANALYZE ndistinct;
-SELECT s.stxkind, d.stxdndistinct
+SELECT s.stxkind, jsonb_pretty(d.stxdndistinct::text::jsonb) AS stxdndistinct
   FROM pg_statistic_ext s, pg_statistic_ext_data d
  WHERE s.stxrelid = 'ndistinct'::regclass
    AND d.stxoid = s.oid;
- stxkind |                        stxdndistinct                        
----------+-------------------------------------------------------------
- {d,e}   | {"3, 4": 221, "3, -1": 247, "4, -1": 323, "3, 4, -1": 1000}
+ stxkind |       stxdndistinct        
+---------+----------------------------
+ {d,e}   | [                         +
+         |     {                     +
+         |         "ndistinct": 221, +
+         |         "attributes": [   +
+         |             3,            +
+         |             4             +
+         |         ]                 +
+         |     },                    +
+         |     {                     +
+         |         "ndistinct": 247, +
+         |         "attributes": [   +
+         |             3,            +
+         |             -1            +
+         |         ]                 +
+         |     },                    +
+         |     {                     +
+         |         "ndistinct": 323, +
+         |         "attributes": [   +
+         |             4,            +
+         |             -1            +
+         |         ]                 +
+         |     },                    +
+         |     {                     +
+         |         "ndistinct": 1000,+
+         |         "attributes": [   +
+         |             3,            +
+         |             4,            +
+         |             -1            +
+         |         ]                 +
+         |     }                     +
+         | ]
 (1 row)
 
 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b');
diff --git a/src/test/regress/sql/stats_ext.sql b/src/test/regress/sql/stats_ext.sql
index fc6f152a072..fc4aee6d839 100644
--- a/src/test/regress/sql/stats_ext.sql
+++ b/src/test/regress/sql/stats_ext.sql
@@ -125,7 +125,7 @@ ALTER TABLE ab1 ALTER a SET STATISTICS -1;
 ALTER STATISTICS ab1_a_b_stats SET STATISTICS 0;
 \d ab1
 ANALYZE ab1;
-SELECT stxname, stxdndistinct, stxddependencies, stxdmcv, stxdinherit
+SELECT stxname, jsonb_pretty(d.stxdndistinct::text::jsonb) AS stxdndistinct, stxddependencies, stxdmcv, stxdinherit
   FROM pg_statistic_ext s LEFT JOIN pg_statistic_ext_data d ON (d.stxoid = s.oid)
  WHERE s.stxname = 'ab1_a_b_stats';
 ALTER STATISTICS ab1_a_b_stats SET STATISTICS -1;
@@ -297,7 +297,7 @@ CREATE STATISTICS s10 ON a, b, c FROM ndistinct;
 
 ANALYZE ndistinct;
 
-SELECT s.stxkind, d.stxdndistinct
+SELECT s.stxkind, jsonb_pretty(d.stxdndistinct::text::jsonb) AS stxdndistinct
   FROM pg_statistic_ext s, pg_statistic_ext_data d
  WHERE s.stxrelid = 'ndistinct'::regclass
    AND d.stxoid = s.oid;
@@ -338,7 +338,7 @@ INSERT INTO ndistinct (a, b, c, filler1)
 
 ANALYZE ndistinct;
 
-SELECT s.stxkind, d.stxdndistinct
+SELECT s.stxkind, jsonb_pretty(d.stxdndistinct::text::jsonb) AS stxdndistinct
   FROM pg_statistic_ext s, pg_statistic_ext_data d
  WHERE s.stxrelid = 'ndistinct'::regclass
    AND d.stxoid = s.oid;
@@ -364,7 +364,7 @@ SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (
 
 DROP STATISTICS s10;
 
-SELECT s.stxkind, d.stxdndistinct
+SELECT s.stxkind, jsonb_pretty(d.stxdndistinct::text::jsonb) AS stxdndistinct
   FROM pg_statistic_ext s, pg_statistic_ext_data d
  WHERE s.stxrelid = 'ndistinct'::regclass
    AND d.stxoid = s.oid;
@@ -399,7 +399,7 @@ CREATE STATISTICS s10 (ndistinct) ON (a+1), (b+100), (2*c) FROM ndistinct;
 
 ANALYZE ndistinct;
 
-SELECT s.stxkind, d.stxdndistinct
+SELECT s.stxkind, jsonb_pretty(d.stxdndistinct::text::jsonb) AS stxdndistinct
   FROM pg_statistic_ext s, pg_statistic_ext_data d
  WHERE s.stxrelid = 'ndistinct'::regclass
    AND d.stxoid = s.oid;
@@ -423,7 +423,7 @@ CREATE STATISTICS s10 (ndistinct) ON a, b, (2*c) FROM ndistinct;
 
 ANALYZE ndistinct;
 
-SELECT s.stxkind, d.stxdndistinct
+SELECT s.stxkind, jsonb_pretty(d.stxdndistinct::text::jsonb) AS stxdndistinct
   FROM pg_statistic_ext s, pg_statistic_ext_data d
  WHERE s.stxrelid = 'ndistinct'::regclass
    AND d.stxoid = s.oid;
diff --git a/doc/src/sgml/perform.sgml b/doc/src/sgml/perform.sgml
index 106583fb296..e0e9f0468cb 100644
--- a/doc/src/sgml/perform.sgml
+++ b/doc/src/sgml/perform.sgml
@@ -1579,9 +1579,39 @@ ANALYZE zipcodes;
 SELECT stxkeys AS k, stxdndistinct AS nd
   FROM pg_statistic_ext join pg_statistic_ext_data on (oid = stxoid)
   WHERE stxname = 'stts2';
--[ RECORD 1 ]------------------------------------------------------&zwsp;--
+-[ RECORD 1 ]-------------------
 k  | 1 2 5
-nd | {"1, 2": 33178, "1, 5": 33178, "2, 5": 27435, "1, 2, 5": 33178}
+nd | [                          +
+   |     {                      +
+   |         "ndistinct": 33178,+
+   |         "attributes": [    +
+   |             1,             +
+   |             2              +
+   |         ]                  +
+   |     },                     +
+   |     {                      +
+   |         "ndistinct": 33178,+
+   |         "attributes": [    +
+   |             1,             +
+   |             5              +
+   |         ]                  +
+   |     },                     +
+   |     {                      +
+   |         "ndistinct": 27435,+
+   |         "attributes": [    +
+   |             2,             +
+   |             5              +
+   |         ]                  +
+   |     },                     +
+   |     {                      +
+   |         "ndistinct": 33178,+
+   |         "attributes": [    +
+   |             1,             +
+   |             2,             +
+   |             5              +
+   |         ]                  +
+   |     }                      +
+   | ]
 (1 row)
 </programlisting>
      This indicates that there are three combinations of columns that
-- 
2.51.1