v3-0002-Add-working-input-function-for-pg_dependencies.patch
text/x-patch
Filename: v3-0002-Add-working-input-function-for-pg_dependencies.patch
Type: text/x-patch
Part: 1
Patch
Same data as JSON:
GET /api/v1/attachments/:id/patch
the parsed metadata as JSON — format, series position, per-file stats; never the diff bytes.
API reference →
Format: format-patch
Series: patch v3-0002
Subject: Add working input function for pg_dependencies.
| File | + | − |
|---|---|---|
| src/backend/statistics/dependencies.c | 338 | 10 |
| src/test/regress/expected/stats_ext.out | 18 | 0 |
| src/test/regress/sql/stats_ext.sql | 6 | 0 |
From 59e3ed42dd528a1901ae42909398c6c6dd83e578 Mon Sep 17 00:00:00 2001
From: Corey Huinker <corey.huinker@gmail.com>
Date: Tue, 17 Dec 2024 19:47:43 -0500
Subject: [PATCH v3 2/4] Add working input function for pg_dependencies.
This is needed to import extended statistics.
---
src/backend/statistics/dependencies.c | 348 +++++++++++++++++++++++-
src/test/regress/expected/stats_ext.out | 18 ++
src/test/regress/sql/stats_ext.sql | 6 +
3 files changed, 362 insertions(+), 10 deletions(-)
diff --git a/src/backend/statistics/dependencies.c b/src/backend/statistics/dependencies.c
index eb2fc4366b4..ec26a2427e2 100644
--- a/src/backend/statistics/dependencies.c
+++ b/src/backend/statistics/dependencies.c
@@ -13,18 +13,27 @@
*/
#include "postgres.h"
+#include "access/attnum.h"
#include "access/htup_details.h"
#include "catalog/pg_statistic_ext.h"
#include "catalog/pg_statistic_ext_data.h"
+#include "common/int.h"
+#include "common/jsonapi.h"
+#include "fmgr.h"
#include "lib/stringinfo.h"
+#include "mb/pg_wchar.h"
+#include "nodes/miscnodes.h"
#include "nodes/nodeFuncs.h"
#include "nodes/nodes.h"
#include "nodes/pathnodes.h"
+#include "nodes/pg_list.h"
#include "optimizer/clauses.h"
#include "optimizer/optimizer.h"
#include "parser/parsetree.h"
#include "statistics/extended_stats_internal.h"
#include "statistics/statistics.h"
+#include "utils/builtins.h"
+#include "utils/float.h"
#include "utils/fmgroids.h"
#include "utils/fmgrprotos.h"
#include "utils/lsyscache.h"
@@ -643,24 +652,343 @@ statext_dependencies_load(Oid mvoid, bool inh)
return result;
}
+typedef struct
+{
+ const char *str;
+ bool found_only_object;
+ List *dependency_list;
+ Node *escontext;
+
+ MVDependency *current_dependency;
+} dependenciesParseState;
+
+/*
+ * Invoked at the start of each object in the JSON document.
+ * The entire JSON document should be one object with no sub-objects.
+ *
+ * If we're anywhere else in the document, it's an error.
+ */
+static JsonParseErrorType
+dependencies_object_start(void *state)
+{
+ dependenciesParseState *parse = state;
+
+ if (parse->found_only_object == true)
+ {
+ ereturn(parse->escontext, (Datum) 0,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("malformed pg_dependencies: \"%s\"", parse->str),
+ errdetail("Must begin with \"{\"")));
+ return JSON_SEM_ACTION_FAILED;
+ }
+
+ parse->found_only_object = true;
+ return JSON_SUCCESS;
+}
+
+/*
+ * dependencies input format does not have arrays, so any array elements encountered
+ * are an error.
+ */
+static JsonParseErrorType
+dependencies_array_start(void *state)
+{
+ dependenciesParseState *parse = state;
+
+ ereturn(parse->escontext, (Datum) 0,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("malformed pg_dependencies: \"%s\"", parse->str),
+ errdetail("All dependencies count values are scalar doubles.")));
+ return JSON_SEM_ACTION_FAILED;
+}
+
+static int
+attnum_compare(const void *aptr, const void *bptr)
+{
+ AttrNumber a = *(const AttrNumber *) aptr;
+ AttrNumber b = *(const AttrNumber *) bptr;
+
+ return pg_cmp_s16(a,b);
+}
+
+/*
+ * The object keys are themselves comma-separated lists of attnums
+ * with negative attnums representing one of the expressions defined
+ * in the extened statistics object, followed by a => and a final attnum.
+ *
+ * example: "-1, 2 => -1"
+ */
+static JsonParseErrorType
+dependencies_object_field_start(void *state, char *fname, bool isnull)
+{
+ dependenciesParseState *parse = state;
+ char *token;
+ char *saveptr;
+ const char *delim = ", ";
+ const char *arrow_delim = " => ";
+ char *scratch;
+ char *arrow_p;
+ char *after_arrow_p;
+ List *attnum_list = NIL;
+ int natts = 0;
+ AttrNumber final_attnum;
+ MVDependency *dep;
+ AttrNumber *attrsort;
+
+ if (isnull || fname == NULL)
+ {
+ ereturn(parse->escontext, (Datum) 0,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("malformed pg_dependencies: \"%s\"", parse->str),
+ errdetail("All dependencies attnum lists must be a comma separated list of attnums with a final => attnum.")));
+
+ return JSON_SEM_ACTION_FAILED;
+ }
+
+ scratch = pstrdup(fname);
+
+ /* The subtring ' => ' must occur exactly once */
+ arrow_p = strstr(scratch, arrow_delim);
+ if (arrow_p == NULL)
+ {
+ ereturn(parse->escontext, (Datum) 0,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("malformed pg_dependencies: \"%s\"", parse->str),
+ errdetail("All dependencies attnum lists must be a comma separated list of attnums with a final => attnum.")));
+
+ return JSON_SEM_ACTION_FAILED;
+ }
+
+ /*
+ * Everything to the left of the arrow is the attribute list, so split
+ * that off into its own string.
+ *
+ * Everything to the right should be the lone target attribute.
+ */
+ *arrow_p = '\0';
+
+ /* look for the character immediately beyond the delimiter we just found */
+ after_arrow_p = arrow_p + strlen(arrow_delim);
+
+ /* We should not find another arrow delim */
+ if (strstr(after_arrow_p, arrow_delim) != NULL)
+ {
+ ereturn(parse->escontext, (Datum) 0,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("malformed pg_dependencies: \"%s\"", parse->str),
+ errdetail("All dependencies attnum lists must be a comma separated list of attnums with a final => attnum.")));
+
+ return JSON_SEM_ACTION_FAILED;
+ }
+
+ /* what is left should be exactly one attnum */
+ final_attnum = pg_strtoint16_safe(after_arrow_p, parse->escontext);
+
+ if (SOFT_ERROR_OCCURRED(parse->escontext))
+ return JSON_SEM_ACTION_FAILED;
+
+ /* Left of the arrow is just regular attnums */
+ token = strtok_r(scratch, delim, &saveptr);
+
+ while (token != NULL)
+ {
+ attnum_list = lappend(attnum_list, (void *) token);
+
+ token = strtok_r(NULL, delim, &saveptr);
+ }
+ natts = attnum_list->length;
+
+ /*
+ * We need at least 2 attnums left of the arrow for a dependencies item,
+ * anything less is malformed.
+ */
+ if (natts < 1)
+ {
+ ereturn(parse->escontext, (Datum) 0,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("malformed pg_dependencies: \"%s\"", parse->str),
+ errdetail("All dependencies attnum lists must be a comma separated list of attnums.")));
+
+ return JSON_SEM_ACTION_FAILED;
+ }
+
+ /*
+ * Allocate enough space for the dependency, the attnums in the list, plus
+ * the final attnum
+ */
+ dep = palloc0(offsetof(MVDependency, attributes) + ((natts + 1) * sizeof(AttrNumber)));
+ dep->nattributes = natts + 1;
+ dep->attributes[natts] = final_attnum;
+
+ attrsort = palloc0(dep->nattributes * sizeof(AttrNumber));
+ attrsort[natts] = final_attnum;
+
+ for (int i = 0; i < natts; i++)
+ {
+ char *s = (char *) attnum_list->elements[i].ptr_value;
+
+ attrsort[i] = pg_strtoint16_safe(s, parse->escontext);
+ dep->attributes[i] = attrsort[i];
+
+ if (SOFT_ERROR_OCCURRED(parse->escontext))
+ return JSON_SEM_ACTION_FAILED;
+ }
+
+ list_free(attnum_list);
+ pfree(scratch);
+
+ qsort(attrsort,dep->nattributes,sizeof(AttrNumber),attnum_compare);
+ for (int i = 1; i < dep->nattributes; i++)
+ if (attrsort[i] == attrsort[i-1])
+ {
+ ereturn(parse->escontext, (Datum) 0,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("malformed pg_dependencies: \"%s\"", parse->str),
+ errdetail("attnum list duplicate value found: %d", attrsort[i])));
+
+ return JSON_SEM_ACTION_FAILED;
+ }
+
+ pfree(attrsort);
+
+ /* add dependencies-less MVdependenciesItem to the list */
+ parse->current_dependency = dep;
+ parse->dependency_list = lappend(parse->dependency_list, (void *) dep);
+ return JSON_SUCCESS;
+}
+
+/*
+ * ndsitinct input format does not have arrays, so any array elements encountered
+ * are an error.
+ */
+static JsonParseErrorType
+dependencies_array_element_start(void *state, bool isnull)
+{
+ dependenciesParseState *parse = state;
+
+ ereturn(parse->escontext, (Datum) 0,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("malformed pg_dependencies: \"%s\"", parse->str),
+ errdetail("Cannot contain array elements.")));
+
+ return JSON_SEM_ACTION_FAILED;
+}
+
+/*
+ * Handle scalar events from the dependencies input parser.
+ *
+ * There is only one case where we will encounter a scalar, and that is the
+ * dependency degree for the previous object key.
+ */
+static JsonParseErrorType
+dependencies_scalar(void *state, char *token, JsonTokenType tokentype)
+{
+ dependenciesParseState *parse = state;
+
+ /* if the entire json is just one scalar, that's wrong */
+ if (parse->found_only_object != true)
+ {
+ ereturn(parse->escontext, (Datum) 0,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("malformed pg_dependencies: \"%s\"", parse->str),
+ errdetail("Must begin with \"{\"")));
+
+ return JSON_SEM_ACTION_FAILED;
+ }
+
+ Assert(parse->current_dependency != NULL);
+
+ parse->current_dependency->degree = float8in_internal(token, NULL, "double",
+ token, parse->escontext);
+
+ if (SOFT_ERROR_OCCURRED(parse->escontext))
+ return JSON_SEM_ACTION_FAILED;
+
+ /* mark us done with this dependency */
+ parse->current_dependency = NULL;
+ return JSON_SUCCESS;
+}
+
/*
* pg_dependencies_in - input routine for type pg_dependencies.
*
- * pg_dependencies is real enough to be a table column, but it has no operations
- * of its own, and disallows input too
+ * example input:
+ * {"-2 => 6": 0.292508,
+ * "-2 => -1": 0.113999,
+ * "6, -2 => -1": 0.348479,
+ * "-1, -2 => 6": 0.839691}
+ *
+ * This import format is clearly a specific subset of JSON, therefore it makes
+ * sense to leverage those parsing utilities, and further validate it from there.
*/
Datum
pg_dependencies_in(PG_FUNCTION_ARGS)
{
- /*
- * pg_node_list stores the data in binary form and parsing text input is
- * not needed, so disallow this.
- */
- ereport(ERROR,
- (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
- errmsg("cannot accept a value of type %s", "pg_dependencies")));
+ char *str = PG_GETARG_CSTRING(0);
- PG_RETURN_VOID(); /* keep compiler quiet */
+ dependenciesParseState parse_state;
+ JsonParseErrorType result;
+ JsonLexContext *lex;
+ JsonSemAction sem_action;
+
+ /* initialize the semantic state */
+ parse_state.str = str;
+ parse_state.found_only_object = false;
+ parse_state.dependency_list = NIL;
+ parse_state.escontext = fcinfo->context;
+ parse_state.current_dependency = NULL;
+
+ /* set callbacks */
+ sem_action.semstate = (void *) &parse_state;
+ sem_action.object_start = dependencies_object_start;
+ sem_action.object_end = NULL;
+ sem_action.array_start = dependencies_array_start;
+ sem_action.array_end = NULL;
+ sem_action.array_element_start = dependencies_array_element_start;
+ sem_action.array_element_end = NULL;
+ sem_action.object_field_start = dependencies_object_field_start;
+ sem_action.object_field_end = NULL;
+ sem_action.scalar = dependencies_scalar;
+
+ lex = makeJsonLexContextCstringLen(NULL, str, strlen(str), PG_UTF8, true);
+
+ result = pg_parse_json(lex, &sem_action);
+ freeJsonLexContext(lex);
+
+ if (result == JSON_SUCCESS)
+ {
+ List *list = parse_state.dependency_list;
+ int ndeps = list->length;
+ MVDependencies *mvdeps;
+ bytea *bytes;
+
+ mvdeps = palloc0(offsetof(MVDependencies, deps) + ndeps * sizeof(MVDependency));
+ mvdeps->magic = STATS_DEPS_MAGIC;
+ mvdeps->type = STATS_DEPS_TYPE_BASIC;
+ mvdeps->ndeps = ndeps;
+
+ /* copy MVDependency structs out of the list into the MVDependencies */
+ for (int i = 0; i < ndeps; i++)
+ mvdeps->deps[i] = list->elements[i].ptr_value;
+ bytes = statext_dependencies_serialize(mvdeps);
+
+ list_free(list);
+ for (int i = 0; i < ndeps; i++)
+ pfree(mvdeps->deps[i]);
+ pfree(mvdeps);
+
+ PG_RETURN_BYTEA_P(bytes);
+ }
+ else if (result == JSON_SEM_ACTION_FAILED)
+ PG_RETURN_NULL();
+
+ /* Anything else is a generic JSON parse error */
+ ereturn(parse_state.escontext, (Datum) 0,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("malformed pg_dependencies: \"%s\"", str),
+ errdetail("Must be valid JSON.")));
+
+ PG_RETURN_NULL(); /* keep compiler quiet */
}
/*
diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out
index 20333667e5f..489e6a19771 100644
--- a/src/test/regress/expected/stats_ext.out
+++ b/src/test/regress/expected/stats_ext.out
@@ -3365,6 +3365,24 @@ SELECT '{"6, -1": 14, "6, -2": 9143, "-1, -2": 13454, "6, -1, -2": 14549}'::pg_n
{"6, -1": 14, "6, -2": 9143, "-1, -2": 13454, "6, -1, -2": 14549}
(1 row)
+-- can't have duplicates attnums in list
+SELECT '{"6, -1": 14, "6, -2": 9143, "-1, -1": 13454, "6, -1, -2": 14549}'::pg_ndistinct;
+ERROR: malformed pg_ndistinct: "{"6, -1": 14, "6, -2": 9143, "-1, -1": 13454, "6, -1, -2": 14549}"
+LINE 1: SELECT '{"6, -1": 14, "6, -2": 9143, "-1, -1": 13454, "6, -1...
+ ^
+DETAIL: attnum list duplicate value found: -1
+SELECT '{"-2 => 6": 0.292508, "-2 => -1": 0.113999, "6, -2 => -1": 0.348479, "-1, -2 => 6": 0.839691}'::pg_dependencies;
+ pg_dependencies
+-----------------------------------------------------------------------------------------------
+ {"-2 => 6": 0.292508, "-2 => -1": 0.113999, "6, -2 => -1": 0.348479, "-1, -2 => 6": 0.839691}
+(1 row)
+
+-- can't have duplicates attnums in list
+SELECT '{"6 => 6": 0.292508, "-2 => -1": 0.113999, "6, -2 => -1": 0.348479, "-1, -2 => 6": 0.839691}'::pg_dependencies;
+ERROR: malformed pg_dependencies: "{"6 => 6": 0.292508, "-2 => -1": 0.113999, "6, -2 => -1": 0.348479, "-1, -2 => 6": 0.839691}"
+LINE 1: SELECT '{"6 => 6": 0.292508, "-2 => -1": 0.113999, "6, -2 =>...
+ ^
+DETAIL: attnum list duplicate value found: 6
-- Tidy up
DROP OPERATOR <<< (int, int);
DROP FUNCTION op_leak(int, int);
diff --git a/src/test/regress/sql/stats_ext.sql b/src/test/regress/sql/stats_ext.sql
index 3539d7b5cd2..da5c11aedd7 100644
--- a/src/test/regress/sql/stats_ext.sql
+++ b/src/test/regress/sql/stats_ext.sql
@@ -1702,6 +1702,12 @@ SELECT statistics_name, most_common_vals FROM pg_stats_ext_exprs x
-- new input functions
SELECT '{"6, -1": 14, "6, -2": 9143, "-1, -2": 13454, "6, -1, -2": 14549}'::pg_ndistinct;
+-- can't have duplicates attnums in list
+SELECT '{"6, -1": 14, "6, -2": 9143, "-1, -1": 13454, "6, -1, -2": 14549}'::pg_ndistinct;
+
+SELECT '{"-2 => 6": 0.292508, "-2 => -1": 0.113999, "6, -2 => -1": 0.348479, "-1, -2 => 6": 0.839691}'::pg_dependencies;
+-- can't have duplicates attnums in list
+SELECT '{"6 => 6": 0.292508, "-2 => -1": 0.113999, "6, -2 => -1": 0.348479, "-1, -2 => 6": 0.839691}'::pg_dependencies;
-- Tidy up
DROP OPERATOR <<< (int, int);
--
2.48.1