v7-0002-Add-working-input-function-for-pg_ndistinct.patch
text/x-patch
Filename: v7-0002-Add-working-input-function-for-pg_ndistinct.patch
Type: text/x-patch
Part: 1
Patch
Same data as JSON:
GET /api/v1/attachments/:id/patch
the parsed metadata as JSON — format, series position, per-file stats; never the diff bytes.
API reference →
Format: format-patch
Series: patch v7-0002
Subject: Add working input function for pg_ndistinct.
| File | + | − |
|---|---|---|
| src/backend/statistics/mvdistinct.c | 438 | 7 |
| src/test/regress/expected/stats_ext.out | 22 | 0 |
| src/test/regress/sql/stats_ext.sql | 12 | 0 |
From 938c24365169fe48716965f8f474c671e2286e0f Mon Sep 17 00:00:00 2001
From: Corey Huinker <corey.huinker@gmail.com>
Date: Tue, 4 Nov 2025 15:35:58 -0500
Subject: [PATCH v7 2/7] Add working input function for pg_ndistinct.
This will consume the format that was established when the output
function for pg_ndistinct was recently changed.
This will be needed for importing extended statistics.
---
src/backend/statistics/mvdistinct.c | 445 +++++++++++++++++++++++-
src/test/regress/expected/stats_ext.out | 22 ++
src/test/regress/sql/stats_ext.sql | 12 +
3 files changed, 472 insertions(+), 7 deletions(-)
diff --git a/src/backend/statistics/mvdistinct.c b/src/backend/statistics/mvdistinct.c
index ca60841b813..5b7b3aa26a4 100644
--- a/src/backend/statistics/mvdistinct.c
+++ b/src/backend/statistics/mvdistinct.c
@@ -27,9 +27,15 @@
#include "catalog/pg_statistic_ext.h"
#include "catalog/pg_statistic_ext_data.h"
+#include "common/int.h"
+#include "common/jsonapi.h"
#include "lib/stringinfo.h"
+#include "mb/pg_wchar.h"
+#include "nodes/miscnodes.h"
+#include "nodes/pg_list.h"
#include "statistics/extended_stats_internal.h"
#include "statistics/statistics.h"
+#include "utils/builtins.h"
#include "utils/fmgrprotos.h"
#include "utils/syscache.h"
#include "utils/typcache.h"
@@ -328,28 +334,453 @@ statext_ndistinct_deserialize(bytea *data)
return ndistinct;
}
+typedef enum
+{
+ NDIST_EXPECT_START = 0,
+ NDIST_EXPECT_ITEM,
+ NDIST_EXPECT_KEY,
+ NDIST_EXPECT_ATTNUM_LIST,
+ NDIST_EXPECT_ATTNUM,
+ NDIST_EXPECT_NDISTINCT,
+ NDIST_EXPECT_COMPLETE
+} ndistinctSemanticState;
+
+typedef struct
+{
+ const char *str;
+ ndistinctSemanticState state;
+
+ List *distinct_items; /* Accumulated complete MVNDistinctItems */
+ Node *escontext;
+
+ bool found_attributes; /* Item has an attributes key */
+ bool found_ndistinct; /* Item has ndistinct key */
+ List *attnum_list; /* Accumulated attributes attnums */
+ int64 ndistinct;
+} ndistinctParseState;
+
+/*
+ * Invoked at the start of each MVNDistinctItem.
+ *
+ * The entire JSON document shoul be one array of MVNDistinctItem objects.
+ *
+ * If we're anywhere else in the document, it's an error.
+ */
+static JsonParseErrorType
+ndistinct_object_start(void *state)
+{
+ ndistinctParseState *parse = state;
+
+ if (parse->state != NDIST_EXPECT_ITEM)
+ {
+ ereturn(parse->escontext, (Datum) 0,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
+ errdetail("Expected Item object")));
+ return JSON_SEM_ACTION_FAILED;
+ }
+
+ /* Now we expect to see attributes/ndistinct keys */
+ parse->state = NDIST_EXPECT_KEY;
+ return JSON_SUCCESS;
+}
+
+/*
+ * Routine to allow qsorting of AttNumbers
+ */
+static int
+attnum_compare(const void *aptr, const void *bptr)
+{
+ AttrNumber a = *(const AttrNumber *) aptr;
+ AttrNumber b = *(const AttrNumber *) bptr;
+
+ return pg_cmp_s16(a, b);
+}
+
+
+/*
+ * Invoked at the end of an object.
+ *
+ * Check to ensure that it was a complete MVNDistinctItem
+ *
+ */
+static JsonParseErrorType
+ndistinct_object_end(void *state)
+{
+ ndistinctParseState *parse = state;
+
+ int natts = 0;
+ AttrNumber *attrsort;
+
+ MVNDistinctItem *item;
+
+ if (!parse->found_attributes)
+ {
+ ereturn(parse->escontext, (Datum) 0,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
+ errdetail("Item must contain \"attributes\" key")));
+ return JSON_SEM_ACTION_FAILED;
+ }
+
+ if (!parse->found_ndistinct)
+ {
+ ereturn(parse->escontext, (Datum) 0,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
+ errdetail("Item must contain \"ndistinct\" key")));
+ return JSON_SEM_ACTION_FAILED;
+ }
+
+ if (parse->attnum_list == NIL)
+ {
+ ereturn(parse->escontext, (Datum) 0,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
+ errdetail("The \"attributes\" key must be an non-empty array")));
+ return JSON_SEM_ACTION_FAILED;
+ }
+
+ /*
+ * We need at least 2 attnums for a ndistinct item, anything less is
+ * malformed.
+ */
+ natts = parse->attnum_list->length;
+ if (natts < 2)
+ {
+ ereturn(parse->escontext, (Datum) 0,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
+ errdetail("The attributes key must contain an array of at least two attnums")));
+
+ return JSON_SEM_ACTION_FAILED;
+ }
+ attrsort = palloc0(natts * sizeof(AttrNumber));
+
+ /* Create the MVNDistinctItem */
+ item = palloc(sizeof(MVNDistinctItem));
+ item->nattributes = natts;
+ item->attributes = palloc0(natts * sizeof(AttrNumber));
+ item->ndistinct = (double) parse->ndistinct;
+
+ /* fill out both attnum list and sortable list */
+ for (int i = 0; i < natts; i++)
+ {
+ attrsort[i] = (AttrNumber) parse->attnum_list->elements[i].int_value;
+ item->attributes[i] = attrsort[i];
+ }
+
+ /* Check attrsort for uniqueness */
+ qsort(attrsort, natts, sizeof(AttrNumber), attnum_compare);
+ for (int i = 1; i < natts; i++)
+ if (attrsort[i] == attrsort[i - 1])
+ {
+ ereturn(parse->escontext, (Datum) 0,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
+ errdetail("attnum list duplicate value found: %d", attrsort[i])));
+
+ return JSON_SEM_ACTION_FAILED;
+ }
+ pfree(attrsort);
+
+ parse->distinct_items = lappend(parse->distinct_items, (void *) item);
+
+ /* reset item state vars */
+ list_free(parse->attnum_list);
+ parse->attnum_list = NIL;
+ parse->ndistinct = 0;
+ parse->found_attributes = false;
+ parse->found_ndistinct = false;
+
+ /* Now we are looking for the next MVNDistinctItem */
+ parse->state = NDIST_EXPECT_ITEM;
+ return JSON_SUCCESS;
+}
+
+
+/*
+ * ndsitinct input format has two types of arrays, the outer MVNDistinctItem
+ * array, and the attnum list array within each MVNDistinctItem.
+ */
+static JsonParseErrorType
+ndistinct_array_start(void *state)
+{
+ ndistinctParseState *parse = state;
+
+ switch (parse->state)
+ {
+ case NDIST_EXPECT_ATTNUM_LIST:
+ parse->state = NDIST_EXPECT_ATTNUM;
+ break;
+
+ case NDIST_EXPECT_START:
+ parse->state = NDIST_EXPECT_ITEM;
+ break;
+
+ default:
+ ereturn(parse->escontext, (Datum) 0,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
+ errdetail("Array found in unexpected place")));
+ return JSON_SEM_ACTION_FAILED;
+ }
+
+ return JSON_SUCCESS;
+}
+
+
+static JsonParseErrorType
+ndistinct_array_end(void *state)
+{
+ ndistinctParseState *parse = state;
+
+ /* The attnum list is complete, look for more MVNDistinctItem keys */
+ if (parse->state == NDIST_EXPECT_ATTNUM)
+ {
+ parse->state = NDIST_EXPECT_KEY;
+ return JSON_SUCCESS;
+ }
+
+ if (parse->state == NDIST_EXPECT_ITEM)
+ {
+ parse->state = NDIST_EXPECT_COMPLETE;
+ return JSON_SUCCESS;
+ }
+
+ ereturn(parse->escontext, (Datum) 0,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
+ errdetail("Array found in unexpected place")));
+ return JSON_SEM_ACTION_FAILED;
+}
+
+
+/*
+ * The valid keys for the MVNDistinctItem object are:
+ * - attributes
+ * - ndistinct
+ */
+static JsonParseErrorType
+ndistinct_object_field_start(void *state, char *fname, bool isnull)
+{
+ ndistinctParseState *parse = state;
+
+ const char *attributes = "attributes";
+ const char *ndistinct = "ndistinct";
+
+ if (strcmp(fname, attributes) == 0)
+ {
+ parse->found_attributes = true;
+ parse->state = NDIST_EXPECT_ATTNUM_LIST;
+ return JSON_SUCCESS;
+ }
+
+ if (strcmp(fname, ndistinct) == 0)
+ {
+ parse->found_ndistinct = true;
+ parse->state = NDIST_EXPECT_NDISTINCT;
+ return JSON_SUCCESS;
+ }
+
+ ereturn(parse->escontext, (Datum) 0,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
+ errdetail("Only allowed keys are \%s\" and \%s\".", attributes, ndistinct)));
+ return JSON_SEM_ACTION_FAILED;
+}
+
+/*
+ *
+ */
+static JsonParseErrorType
+ndistinct_array_element_start(void *state, bool isnull)
+{
+ ndistinctParseState *parse = state;
+
+ if (parse->state == NDIST_EXPECT_ATTNUM)
+ {
+ if (isnull)
+ {
+ ereturn(parse->escontext, (Datum) 0,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
+ errdetail("Attnum list elements cannot be null.")));
+
+ return JSON_SEM_ACTION_FAILED;
+ }
+ return JSON_SUCCESS;
+ }
+
+ if (parse->state == NDIST_EXPECT_ITEM)
+ {
+ if (isnull)
+ {
+ ereturn(parse->escontext, (Datum) 0,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
+ errdetail("Item list elements cannot be null.")));
+
+ return JSON_SEM_ACTION_FAILED;
+ }
+
+ return JSON_SUCCESS;
+ }
+
+ ereturn(parse->escontext, (Datum) 0,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
+ errdetail("Unexpected array element.")));
+
+ return JSON_SEM_ACTION_FAILED;
+}
+
+/*
+ * Handle scalar events from the ndistinct input parser.
+ *
+ */
+static JsonParseErrorType
+ndistinct_scalar(void *state, char *token, JsonTokenType tokentype)
+{
+ ndistinctParseState *parse = state;
+
+ if (parse->state == NDIST_EXPECT_ATTNUM)
+ {
+ AttrNumber attnum = pg_strtoint16_safe(token, parse->escontext);
+
+ if (SOFT_ERROR_OCCURRED(parse->escontext))
+ return JSON_SEM_ACTION_FAILED;
+
+ parse->attnum_list = lappend_int(parse->attnum_list, (int) attnum);
+ return JSON_SUCCESS;
+ }
+
+ if (parse->state == NDIST_EXPECT_NDISTINCT)
+ {
+ /*
+ * While the structure dictates that ndistinct in a double precision
+ * floating point, in practice it has always been an integer, and it
+ * is output as such. Therefore, we follow usage precendent over the
+ * actual storage structure, and read it in as an integer.
+ */
+ parse->ndistinct = pg_strtoint64_safe(token, parse->escontext);
+
+ if (SOFT_ERROR_OCCURRED(parse->escontext))
+ return JSON_SEM_ACTION_FAILED;
+
+ parse->state = NDIST_EXPECT_KEY;
+ return JSON_SUCCESS;
+ }
+
+ ereturn(parse->escontext, (Datum) 0,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("malformed pg_ndistinct: \"%s\"", parse->str),
+ errdetail("Unexpected scalar.")));
+
+ return JSON_SEM_ACTION_FAILED;
+}
+
/*
* pg_ndistinct_in
* input routine for type pg_ndistinct
*
- * pg_ndistinct is real enough to be a table column, but it has no
- * operations of its own, and disallows input (just like pg_node_tree).
+ * example input:
+ * [{"attributes": [6, -1], "ndistinct": 14},
+ * {"attributes": [6, -2], "ndistinct": 9143},
+ * {"attributes": [-1,-2], "ndistinct": 13454},
+ * {"attributes": [6, -1, -2], "ndistinct": 14549}]
*/
Datum
pg_ndistinct_in(PG_FUNCTION_ARGS)
{
- ereport(ERROR,
- (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
- errmsg("cannot accept a value of type %s", "pg_ndistinct")));
+ char *str = PG_GETARG_CSTRING(0);
- PG_RETURN_VOID(); /* keep compiler quiet */
+ ndistinctParseState parse_state;
+ JsonParseErrorType result;
+ JsonLexContext *lex;
+ JsonSemAction sem_action;
+
+ /* initialize semantic state */
+ parse_state.str = str;
+ parse_state.state = NDIST_EXPECT_START;
+ parse_state.distinct_items = NIL;
+ parse_state.escontext = fcinfo->context;
+ parse_state.found_attributes = false;
+ parse_state.found_ndistinct = false;
+ parse_state.attnum_list = NIL;
+ parse_state.ndistinct = 0;
+
+ /* set callbacks */
+ sem_action.semstate = (void *) &parse_state;
+ sem_action.object_start = ndistinct_object_start;
+ sem_action.object_end = ndistinct_object_end;
+ sem_action.array_start = ndistinct_array_start;
+ sem_action.array_end = ndistinct_array_end;
+ sem_action.object_field_start = ndistinct_object_field_start;
+ sem_action.object_field_end = NULL;
+ sem_action.array_element_start = ndistinct_array_element_start;
+ sem_action.array_element_end = NULL;
+ sem_action.scalar = ndistinct_scalar;
+
+ lex = makeJsonLexContextCstringLen(NULL, str, strlen(str),
+ PG_UTF8, true);
+ result = pg_parse_json(lex, &sem_action);
+ freeJsonLexContext(lex);
+
+ if (result == JSON_SUCCESS)
+ {
+ MVNDistinct *ndistinct;
+ int nitems = parse_state.distinct_items->length;
+ bytea *bytes;
+
+ ndistinct = palloc(offsetof(MVNDistinct, items) +
+ nitems * sizeof(MVNDistinctItem));
+
+ ndistinct->magic = STATS_NDISTINCT_MAGIC;
+ ndistinct->type = STATS_NDISTINCT_TYPE_BASIC;
+ ndistinct->nitems = nitems;
+
+ for (int i = 0; i < nitems; i++)
+ {
+ MVNDistinctItem *item = parse_state.distinct_items->elements[i].ptr_value;
+
+ ndistinct->items[i].ndistinct = item->ndistinct;
+ ndistinct->items[i].nattributes = item->nattributes;
+ ndistinct->items[i].attributes = item->attributes;
+
+ /*
+ * free the MVNDistinctItem, but not the attributes we're still
+ * using
+ */
+ pfree(item);
+ }
+ bytes = statext_ndistinct_serialize(ndistinct);
+
+ list_free(parse_state.distinct_items);
+ for (int i = 0; i < nitems; i++)
+ pfree(ndistinct->items[i].attributes);
+ pfree(ndistinct);
+
+ PG_RETURN_BYTEA_P(bytes);
+ }
+ else if (result == JSON_SEM_ACTION_FAILED)
+ PG_RETURN_NULL(); /* escontext already set */
+
+ /* Anything else is a generic JSON parse error */
+ ereturn(parse_state.escontext, (Datum) 0,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("malformed pg_ndistinct: \"%s\"", str),
+ errdetail("Must be valid JSON.")));
+ PG_RETURN_NULL();
}
/*
* pg_ndistinct
* output routine for type pg_ndistinct
*
- * Produces a human-readable representation of the value.
+ * Produces a human-readable representation of the value, in the format:
+ * [{"attributes": [attnum,. ..], "ndistinct": int}, ...]
+ *
*/
Datum
pg_ndistinct_out(PG_FUNCTION_ARGS)
diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out
index 2dc771369e5..1d837badb96 100644
--- a/src/test/regress/expected/stats_ext.out
+++ b/src/test/regress/expected/stats_ext.out
@@ -3689,6 +3689,28 @@ SELECT * FROM check_estimated_rows('SELECT * FROM sb_2 WHERE extstat_small(y)');
196 | 196
(1 row)
+-- Test input function of pg_ndistinct.
+SELECT '[{"attributes" : [2,3], "ndistinct" : 4},
+ {"attributes" : [2,-1], "ndistinct" : 4},
+ {"attributes" : [2,3,-1], "ndistinct" : 4},
+ {"attributes" : [1,3,-1,-2], "ndistinct" : 4}]'::pg_ndistinct;
+ pg_ndistinct
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ [{"attributes": [2, 3], "ndistinct": 4}, {"attributes": [2, -1], "ndistinct": 4}, {"attributes": [2, 3, -1], "ndistinct": 4}, {"attributes": [1, 3, -1, -2], "ndistinct": 4}]
+(1 row)
+
+-- error, cannot duplicate attribute
+SELECT '[{"attributes" : [2,3], "ndistinct" : 4},
+ {"attributes" : [2,-1], "ndistinct" : 4},
+ {"attributes" : [2,3,2], "ndistinct" : 4},
+ {"attributes" : [1,3,-1,-2], "ndistinct" : 4}]'::pg_ndistinct;
+ERROR: malformed pg_ndistinct: "[{"attributes" : [2,3], "ndistinct" : 4},
+ {"attributes" : [2,-1], "ndistinct" : 4},
+ {"attributes" : [2,3,2], "ndistinct" : 4},
+ {"attributes" : [1,3,-1,-2], "ndistinct" : 4}]"
+LINE 1: SELECT '[{"attributes" : [2,3], "ndistinct" : 4},
+ ^
+DETAIL: attnum list duplicate value found: 2
-- Tidy up
DROP TABLE sb_1, sb_2 CASCADE;
DROP FUNCTION extstat_small(x numeric);
diff --git a/src/test/regress/sql/stats_ext.sql b/src/test/regress/sql/stats_ext.sql
index 207c431e68c..2c306dcc971 100644
--- a/src/test/regress/sql/stats_ext.sql
+++ b/src/test/regress/sql/stats_ext.sql
@@ -1831,6 +1831,18 @@ ANALYZE sb_2;
SELECT * FROM check_estimated_rows('SELECT * FROM sb_2 WHERE extstat_small(y)');
+-- Test input function of pg_ndistinct.
+SELECT '[{"attributes" : [2,3], "ndistinct" : 4},
+ {"attributes" : [2,-1], "ndistinct" : 4},
+ {"attributes" : [2,3,-1], "ndistinct" : 4},
+ {"attributes" : [1,3,-1,-2], "ndistinct" : 4}]'::pg_ndistinct;
+
+-- error, cannot duplicate attribute
+SELECT '[{"attributes" : [2,3], "ndistinct" : 4},
+ {"attributes" : [2,-1], "ndistinct" : 4},
+ {"attributes" : [2,3,2], "ndistinct" : 4},
+ {"attributes" : [1,3,-1,-2], "ndistinct" : 4}]'::pg_ndistinct;
+
-- Tidy up
DROP TABLE sb_1, sb_2 CASCADE;
DROP FUNCTION extstat_small(x numeric);
--
2.51.1