From f0da95a88e43e2046bc1f5287b9ad7ddbb251e13 Mon Sep 17 00:00:00 2001
From: Corey Huinker <corey.huinker@gmail.com>
Date: Tue, 4 Nov 2025 23:50:01 -0500
Subject: [PATCH v20 1/3] Expose attribute statistics functions for use in
 extended_stats.

Many of the operations of attribute stats have analogous operations in
extended stats.

* get_attr_stat_type() renamed to statatt_get_type()
* init_empty_stats_tuple() renamed to statatt_init_empty_tuple()
* text_to_stavalues()
* get_elem_stat_type() renamed to statatt_get_elem_type()

Also, add comments explaining the function argument index enums, and the
arrays that are indexed by those enums.
---
 src/include/statistics/stat_utils.h      |  21 +-
 src/backend/statistics/attribute_stats.c | 424 +++--------------------
 src/backend/statistics/stat_utils.c      | 372 ++++++++++++++++++++
 3 files changed, 434 insertions(+), 383 deletions(-)

diff --git a/src/include/statistics/stat_utils.h b/src/include/statistics/stat_utils.h
index f41b181d4d3..e57a01043b7 100644
--- a/src/include/statistics/stat_utils.h
+++ b/src/include/statistics/stat_utils.h
@@ -14,9 +14,7 @@
 #define STATS_UTILS_H
 
 #include "fmgr.h"
-
-/* avoid including primnodes.h here */
-typedef struct RangeVar RangeVar;
+#include "nodes/pathnodes.h"
 
 struct StatsArgInfo
 {
@@ -40,4 +38,21 @@ extern bool stats_fill_fcinfo_from_arg_pairs(FunctionCallInfo pairs_fcinfo,
 											 FunctionCallInfo positional_fcinfo,
 											 struct StatsArgInfo *arginfo);
 
+extern void statatt_get_type(Oid reloid, AttrNumber attnum,
+							 Oid *atttypid, int32 *atttypmod,
+							 char *atttyptype, Oid *atttypcoll,
+							 Oid *eq_opr, Oid *lt_opr);
+extern void statatt_init_empty_tuple(Oid reloid, int16 attnum, bool inherited,
+									 Datum *values, bool *nulls, bool *replaces);
+
+extern void statatt_set_slot(Datum *values, bool *nulls, bool *replaces,
+							 int16 stakind, Oid staop, Oid stacoll,
+							 Datum stanumbers, bool stanumbers_isnull,
+							 Datum stavalues, bool stavalues_isnull);
+
+extern Datum text_to_stavalues(const char *staname, FmgrInfo *array_in, Datum d,
+							   Oid typid, int32 typmod, bool *ok);
+extern bool statatt_get_elem_type(Oid atttypid, char atttyptype,
+								  Oid *elemtypid, Oid *elem_eq_opr);
+
 #endif							/* STATS_UTILS_H */
diff --git a/src/backend/statistics/attribute_stats.c b/src/backend/statistics/attribute_stats.c
index ef4d768feab..9b289129fcc 100644
--- a/src/backend/statistics/attribute_stats.c
+++ b/src/backend/statistics/attribute_stats.c
@@ -20,10 +20,8 @@
 #include "access/heapam.h"
 #include "catalog/indexing.h"
 #include "catalog/namespace.h"
-#include "catalog/pg_collation.h"
 #include "catalog/pg_operator.h"
 #include "nodes/makefuncs.h"
-#include "nodes/nodeFuncs.h"
 #include "statistics/statistics.h"
 #include "statistics/stat_utils.h"
 #include "utils/array.h"
@@ -32,10 +30,6 @@
 #include "utils/lsyscache.h"
 #include "utils/syscache.h"
 
-#define DEFAULT_NULL_FRAC      Float4GetDatum(0.0)
-#define DEFAULT_AVG_WIDTH      Int32GetDatum(0) /* unknown */
-#define DEFAULT_N_DISTINCT     Float4GetDatum(0.0)	/* unknown */
-
 /*
  * Positional argument numbers, names, and types for
  * attribute_statistics_update() and pg_restore_attribute_stats().
@@ -64,6 +58,10 @@ enum attribute_stats_argnum
 	NUM_ATTRIBUTE_STATS_ARGS
 };
 
+/*
+ * The argument names and typoids of the arguments for
+ * attribute_statistics_update.
+ */
 static struct StatsArgInfo attarginfo[] =
 {
 	[ATTRELSCHEMA_ARG] = {"schemaname", TEXTOID},
@@ -101,6 +99,10 @@ enum clear_attribute_stats_argnum
 	C_NUM_ATTRIBUTE_STATS_ARGS
 };
 
+/*
+ * The argument names and typoids of the arguments for
+ * pg_clear_attribute_stats.
+ */
 static struct StatsArgInfo cleararginfo[] =
 {
 	[C_ATTRELSCHEMA_ARG] = {"relation", TEXTOID},
@@ -111,24 +113,9 @@ static struct StatsArgInfo cleararginfo[] =
 };
 
 static bool attribute_statistics_update(FunctionCallInfo fcinfo);
-static Node *get_attr_expr(Relation rel, int attnum);
-static void get_attr_stat_type(Oid reloid, AttrNumber attnum,
-							   Oid *atttypid, int32 *atttypmod,
-							   char *atttyptype, Oid *atttypcoll,
-							   Oid *eq_opr, Oid *lt_opr);
-static bool get_elem_stat_type(Oid atttypid, char atttyptype,
-							   Oid *elemtypid, Oid *elem_eq_opr);
-static Datum text_to_stavalues(const char *staname, FmgrInfo *array_in, Datum d,
-							   Oid typid, int32 typmod, bool *ok);
-static void set_stats_slot(Datum *values, bool *nulls, bool *replaces,
-						   int16 stakind, Oid staop, Oid stacoll,
-						   Datum stanumbers, bool stanumbers_isnull,
-						   Datum stavalues, bool stavalues_isnull);
 static void upsert_pg_statistic(Relation starel, HeapTuple oldtup,
 								const Datum *values, const bool *nulls, const bool *replaces);
 static bool delete_pg_statistic(Oid reloid, AttrNumber attnum, bool stainherit);
-static void init_empty_stats_tuple(Oid reloid, int16 attnum, bool inherited,
-								   Datum *values, bool *nulls, bool *replaces);
 
 /*
  * Insert or Update Attribute Statistics
@@ -298,16 +285,16 @@ attribute_statistics_update(FunctionCallInfo fcinfo)
 	}
 
 	/* derive information from attribute */
-	get_attr_stat_type(reloid, attnum,
-					   &atttypid, &atttypmod,
-					   &atttyptype, &atttypcoll,
-					   &eq_opr, &lt_opr);
+	statatt_get_type(reloid, attnum,
+					 &atttypid, &atttypmod,
+					 &atttyptype, &atttypcoll,
+					 &eq_opr, &lt_opr);
 
 	/* if needed, derive element type */
 	if (do_mcelem || do_dechist)
 	{
-		if (!get_elem_stat_type(atttypid, atttyptype,
-								&elemtypid, &elem_eq_opr))
+		if (!statatt_get_elem_type(atttypid, atttyptype,
+								   &elemtypid, &elem_eq_opr))
 		{
 			ereport(WARNING,
 					(errmsg("could not determine element type of column \"%s\"", attname),
@@ -361,8 +348,8 @@ attribute_statistics_update(FunctionCallInfo fcinfo)
 	if (HeapTupleIsValid(statup))
 		heap_deform_tuple(statup, RelationGetDescr(starel), values, nulls);
 	else
-		init_empty_stats_tuple(reloid, attnum, inherited, values, nulls,
-							   replaces);
+		statatt_init_empty_tuple(reloid, attnum, inherited, values, nulls,
+								 replaces);
 
 	/* if specified, set to argument values */
 	if (!PG_ARGISNULL(NULL_FRAC_ARG))
@@ -394,10 +381,10 @@ attribute_statistics_update(FunctionCallInfo fcinfo)
 
 		if (converted)
 		{
-			set_stats_slot(values, nulls, replaces,
-						   STATISTIC_KIND_MCV,
-						   eq_opr, atttypcoll,
-						   stanumbers, false, stavalues, false);
+			statatt_set_slot(values, nulls, replaces,
+							 STATISTIC_KIND_MCV,
+							 eq_opr, atttypcoll,
+							 stanumbers, false, stavalues, false);
 		}
 		else
 			result = false;
@@ -417,10 +404,10 @@ attribute_statistics_update(FunctionCallInfo fcinfo)
 
 		if (converted)
 		{
-			set_stats_slot(values, nulls, replaces,
-						   STATISTIC_KIND_HISTOGRAM,
-						   lt_opr, atttypcoll,
-						   0, true, stavalues, false);
+			statatt_set_slot(values, nulls, replaces,
+							 STATISTIC_KIND_HISTOGRAM,
+							 lt_opr, atttypcoll,
+							 0, true, stavalues, false);
 		}
 		else
 			result = false;
@@ -433,10 +420,10 @@ attribute_statistics_update(FunctionCallInfo fcinfo)
 		ArrayType  *arry = construct_array_builtin(elems, 1, FLOAT4OID);
 		Datum		stanumbers = PointerGetDatum(arry);
 
-		set_stats_slot(values, nulls, replaces,
-					   STATISTIC_KIND_CORRELATION,
-					   lt_opr, atttypcoll,
-					   stanumbers, false, 0, true);
+		statatt_set_slot(values, nulls, replaces,
+						 STATISTIC_KIND_CORRELATION,
+						 lt_opr, atttypcoll,
+						 stanumbers, false, 0, true);
 	}
 
 	/* STATISTIC_KIND_MCELEM */
@@ -454,10 +441,10 @@ attribute_statistics_update(FunctionCallInfo fcinfo)
 
 		if (converted)
 		{
-			set_stats_slot(values, nulls, replaces,
-						   STATISTIC_KIND_MCELEM,
-						   elem_eq_opr, atttypcoll,
-						   stanumbers, false, stavalues, false);
+			statatt_set_slot(values, nulls, replaces,
+							 STATISTIC_KIND_MCELEM,
+							 elem_eq_opr, atttypcoll,
+							 stanumbers, false, stavalues, false);
 		}
 		else
 			result = false;
@@ -468,10 +455,10 @@ attribute_statistics_update(FunctionCallInfo fcinfo)
 	{
 		Datum		stanumbers = PG_GETARG_DATUM(ELEM_COUNT_HISTOGRAM_ARG);
 
-		set_stats_slot(values, nulls, replaces,
-					   STATISTIC_KIND_DECHIST,
-					   elem_eq_opr, atttypcoll,
-					   stanumbers, false, 0, true);
+		statatt_set_slot(values, nulls, replaces,
+						 STATISTIC_KIND_DECHIST,
+						 elem_eq_opr, atttypcoll,
+						 stanumbers, false, 0, true);
 	}
 
 	/*
@@ -494,10 +481,10 @@ attribute_statistics_update(FunctionCallInfo fcinfo)
 
 		if (converted)
 		{
-			set_stats_slot(values, nulls, replaces,
-						   STATISTIC_KIND_BOUNDS_HISTOGRAM,
-						   InvalidOid, InvalidOid,
-						   0, true, stavalues, false);
+			statatt_set_slot(values, nulls, replaces,
+							 STATISTIC_KIND_BOUNDS_HISTOGRAM,
+							 InvalidOid, InvalidOid,
+							 0, true, stavalues, false);
 		}
 		else
 			result = false;
@@ -521,10 +508,10 @@ attribute_statistics_update(FunctionCallInfo fcinfo)
 
 		if (converted)
 		{
-			set_stats_slot(values, nulls, replaces,
-						   STATISTIC_KIND_RANGE_LENGTH_HISTOGRAM,
-						   Float8LessOperator, InvalidOid,
-						   stanumbers, false, stavalues, false);
+			statatt_set_slot(values, nulls, replaces,
+							 STATISTIC_KIND_RANGE_LENGTH_HISTOGRAM,
+							 Float8LessOperator, InvalidOid,
+							 stanumbers, false, stavalues, false);
 		}
 		else
 			result = false;
@@ -539,291 +526,6 @@ attribute_statistics_update(FunctionCallInfo fcinfo)
 	return result;
 }
 
-/*
- * If this relation is an index and that index has expressions in it, and
- * the attnum specified is known to be an expression, then we must walk
- * the list attributes up to the specified attnum to get the right
- * expression.
- */
-static Node *
-get_attr_expr(Relation rel, int attnum)
-{
-	List	   *index_exprs;
-	ListCell   *indexpr_item;
-
-	/* relation is not an index */
-	if (rel->rd_rel->relkind != RELKIND_INDEX &&
-		rel->rd_rel->relkind != RELKIND_PARTITIONED_INDEX)
-		return NULL;
-
-	index_exprs = RelationGetIndexExpressions(rel);
-
-	/* index has no expressions to give */
-	if (index_exprs == NIL)
-		return NULL;
-
-	/*
-	 * The index attnum points directly to a relation attnum, then it's not an
-	 * expression attribute.
-	 */
-	if (rel->rd_index->indkey.values[attnum - 1] != 0)
-		return NULL;
-
-	indexpr_item = list_head(rel->rd_indexprs);
-
-	for (int i = 0; i < attnum - 1; i++)
-		if (rel->rd_index->indkey.values[i] == 0)
-			indexpr_item = lnext(rel->rd_indexprs, indexpr_item);
-
-	if (indexpr_item == NULL)	/* shouldn't happen */
-		elog(ERROR, "too few entries in indexprs list");
-
-	return (Node *) lfirst(indexpr_item);
-}
-
-/*
- * Derive type information from the attribute.
- */
-static void
-get_attr_stat_type(Oid reloid, AttrNumber attnum,
-				   Oid *atttypid, int32 *atttypmod,
-				   char *atttyptype, Oid *atttypcoll,
-				   Oid *eq_opr, Oid *lt_opr)
-{
-	Relation	rel = relation_open(reloid, AccessShareLock);
-	Form_pg_attribute attr;
-	HeapTuple	atup;
-	Node	   *expr;
-	TypeCacheEntry *typcache;
-
-	atup = SearchSysCache2(ATTNUM, ObjectIdGetDatum(reloid),
-						   Int16GetDatum(attnum));
-
-	/* Attribute not found */
-	if (!HeapTupleIsValid(atup))
-		ereport(ERROR,
-				(errcode(ERRCODE_UNDEFINED_COLUMN),
-				 errmsg("column %d of relation \"%s\" does not exist",
-						attnum, RelationGetRelationName(rel))));
-
-	attr = (Form_pg_attribute) GETSTRUCT(atup);
-
-	if (attr->attisdropped)
-		ereport(ERROR,
-				(errcode(ERRCODE_UNDEFINED_COLUMN),
-				 errmsg("column %d of relation \"%s\" does not exist",
-						attnum, RelationGetRelationName(rel))));
-
-	expr = get_attr_expr(rel, attr->attnum);
-
-	/*
-	 * When analyzing an expression index, believe the expression tree's type
-	 * not the column datatype --- the latter might be the opckeytype storage
-	 * type of the opclass, which is not interesting for our purposes. This
-	 * mimics the behavior of examine_attribute().
-	 */
-	if (expr == NULL)
-	{
-		*atttypid = attr->atttypid;
-		*atttypmod = attr->atttypmod;
-		*atttypcoll = attr->attcollation;
-	}
-	else
-	{
-		*atttypid = exprType(expr);
-		*atttypmod = exprTypmod(expr);
-
-		if (OidIsValid(attr->attcollation))
-			*atttypcoll = attr->attcollation;
-		else
-			*atttypcoll = exprCollation(expr);
-	}
-	ReleaseSysCache(atup);
-
-	/*
-	 * If it's a multirange, step down to the range type, as is done by
-	 * multirange_typanalyze().
-	 */
-	if (type_is_multirange(*atttypid))
-		*atttypid = get_multirange_range(*atttypid);
-
-	/* finds the right operators even if atttypid is a domain */
-	typcache = lookup_type_cache(*atttypid, TYPECACHE_LT_OPR | TYPECACHE_EQ_OPR);
-	*atttyptype = typcache->typtype;
-	*eq_opr = typcache->eq_opr;
-	*lt_opr = typcache->lt_opr;
-
-	/*
-	 * Special case: collation for tsvector is DEFAULT_COLLATION_OID. See
-	 * compute_tsvector_stats().
-	 */
-	if (*atttypid == TSVECTOROID)
-		*atttypcoll = DEFAULT_COLLATION_OID;
-
-	relation_close(rel, NoLock);
-}
-
-/*
- * Derive element type information from the attribute type.
- */
-static bool
-get_elem_stat_type(Oid atttypid, char atttyptype,
-				   Oid *elemtypid, Oid *elem_eq_opr)
-{
-	TypeCacheEntry *elemtypcache;
-
-	if (atttypid == TSVECTOROID)
-	{
-		/*
-		 * Special case: element type for tsvector is text. See
-		 * compute_tsvector_stats().
-		 */
-		*elemtypid = TEXTOID;
-	}
-	else
-	{
-		/* find underlying element type through any domain */
-		*elemtypid = get_base_element_type(atttypid);
-	}
-
-	if (!OidIsValid(*elemtypid))
-		return false;
-
-	/* finds the right operator even if elemtypid is a domain */
-	elemtypcache = lookup_type_cache(*elemtypid, TYPECACHE_EQ_OPR);
-	if (!OidIsValid(elemtypcache->eq_opr))
-		return false;
-
-	*elem_eq_opr = elemtypcache->eq_opr;
-
-	return true;
-}
-
-/*
- * Cast a text datum into an array with element type elemtypid.
- *
- * If an error is encountered, capture it and re-throw a WARNING, and set ok
- * to false. If the resulting array contains NULLs, raise a WARNING and set ok
- * to false. Otherwise, set ok to true.
- */
-static Datum
-text_to_stavalues(const char *staname, FmgrInfo *array_in, Datum d, Oid typid,
-				  int32 typmod, bool *ok)
-{
-	LOCAL_FCINFO(fcinfo, 8);
-	char	   *s;
-	Datum		result;
-	ErrorSaveContext escontext = {T_ErrorSaveContext};
-
-	escontext.details_wanted = true;
-
-	s = TextDatumGetCString(d);
-
-	InitFunctionCallInfoData(*fcinfo, array_in, 3, InvalidOid,
-							 (Node *) &escontext, NULL);
-
-	fcinfo->args[0].value = CStringGetDatum(s);
-	fcinfo->args[0].isnull = false;
-	fcinfo->args[1].value = ObjectIdGetDatum(typid);
-	fcinfo->args[1].isnull = false;
-	fcinfo->args[2].value = Int32GetDatum(typmod);
-	fcinfo->args[2].isnull = false;
-
-	result = FunctionCallInvoke(fcinfo);
-
-	pfree(s);
-
-	if (escontext.error_occurred)
-	{
-		escontext.error_data->elevel = WARNING;
-		ThrowErrorData(escontext.error_data);
-		*ok = false;
-		return (Datum) 0;
-	}
-
-	if (array_contains_nulls(DatumGetArrayTypeP(result)))
-	{
-		ereport(WARNING,
-				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
-				 errmsg("\"%s\" array must not contain null values", staname)));
-		*ok = false;
-		return (Datum) 0;
-	}
-
-	*ok = true;
-
-	return result;
-}
-
-/*
- * Find and update the slot with the given stakind, or use the first empty
- * slot.
- */
-static void
-set_stats_slot(Datum *values, bool *nulls, bool *replaces,
-			   int16 stakind, Oid staop, Oid stacoll,
-			   Datum stanumbers, bool stanumbers_isnull,
-			   Datum stavalues, bool stavalues_isnull)
-{
-	int			slotidx;
-	int			first_empty = -1;
-	AttrNumber	stakind_attnum;
-	AttrNumber	staop_attnum;
-	AttrNumber	stacoll_attnum;
-
-	/* find existing slot with given stakind */
-	for (slotidx = 0; slotidx < STATISTIC_NUM_SLOTS; slotidx++)
-	{
-		stakind_attnum = Anum_pg_statistic_stakind1 - 1 + slotidx;
-
-		if (first_empty < 0 &&
-			DatumGetInt16(values[stakind_attnum]) == 0)
-			first_empty = slotidx;
-		if (DatumGetInt16(values[stakind_attnum]) == stakind)
-			break;
-	}
-
-	if (slotidx >= STATISTIC_NUM_SLOTS && first_empty >= 0)
-		slotidx = first_empty;
-
-	if (slotidx >= STATISTIC_NUM_SLOTS)
-		ereport(ERROR,
-				(errmsg("maximum number of statistics slots exceeded: %d",
-						slotidx + 1)));
-
-	stakind_attnum = Anum_pg_statistic_stakind1 - 1 + slotidx;
-	staop_attnum = Anum_pg_statistic_staop1 - 1 + slotidx;
-	stacoll_attnum = Anum_pg_statistic_stacoll1 - 1 + slotidx;
-
-	if (DatumGetInt16(values[stakind_attnum]) != stakind)
-	{
-		values[stakind_attnum] = Int16GetDatum(stakind);
-		replaces[stakind_attnum] = true;
-	}
-	if (DatumGetObjectId(values[staop_attnum]) != staop)
-	{
-		values[staop_attnum] = ObjectIdGetDatum(staop);
-		replaces[staop_attnum] = true;
-	}
-	if (DatumGetObjectId(values[stacoll_attnum]) != stacoll)
-	{
-		values[stacoll_attnum] = ObjectIdGetDatum(stacoll);
-		replaces[stacoll_attnum] = true;
-	}
-	if (!stanumbers_isnull)
-	{
-		values[Anum_pg_statistic_stanumbers1 - 1 + slotidx] = stanumbers;
-		nulls[Anum_pg_statistic_stanumbers1 - 1 + slotidx] = false;
-		replaces[Anum_pg_statistic_stanumbers1 - 1 + slotidx] = true;
-	}
-	if (!stavalues_isnull)
-	{
-		values[Anum_pg_statistic_stavalues1 - 1 + slotidx] = stavalues;
-		nulls[Anum_pg_statistic_stavalues1 - 1 + slotidx] = false;
-		replaces[Anum_pg_statistic_stavalues1 - 1 + slotidx] = true;
-	}
-}
-
 /*
  * Upsert the pg_statistic record.
  */
@@ -880,44 +582,6 @@ delete_pg_statistic(Oid reloid, AttrNumber attnum, bool stainherit)
 	return result;
 }
 
-/*
- * Initialize values and nulls for a new stats tuple.
- */
-static void
-init_empty_stats_tuple(Oid reloid, int16 attnum, bool inherited,
-					   Datum *values, bool *nulls, bool *replaces)
-{
-	memset(nulls, true, sizeof(bool) * Natts_pg_statistic);
-	memset(replaces, true, sizeof(bool) * Natts_pg_statistic);
-
-	/* must initialize non-NULL attributes */
-
-	values[Anum_pg_statistic_starelid - 1] = ObjectIdGetDatum(reloid);
-	nulls[Anum_pg_statistic_starelid - 1] = false;
-	values[Anum_pg_statistic_staattnum - 1] = Int16GetDatum(attnum);
-	nulls[Anum_pg_statistic_staattnum - 1] = false;
-	values[Anum_pg_statistic_stainherit - 1] = BoolGetDatum(inherited);
-	nulls[Anum_pg_statistic_stainherit - 1] = false;
-
-	values[Anum_pg_statistic_stanullfrac - 1] = DEFAULT_NULL_FRAC;
-	nulls[Anum_pg_statistic_stanullfrac - 1] = false;
-	values[Anum_pg_statistic_stawidth - 1] = DEFAULT_AVG_WIDTH;
-	nulls[Anum_pg_statistic_stawidth - 1] = false;
-	values[Anum_pg_statistic_stadistinct - 1] = DEFAULT_N_DISTINCT;
-	nulls[Anum_pg_statistic_stadistinct - 1] = false;
-
-	/* initialize stakind, staop, and stacoll slots */
-	for (int slotnum = 0; slotnum < STATISTIC_NUM_SLOTS; slotnum++)
-	{
-		values[Anum_pg_statistic_stakind1 + slotnum - 1] = (Datum) 0;
-		nulls[Anum_pg_statistic_stakind1 + slotnum - 1] = false;
-		values[Anum_pg_statistic_staop1 + slotnum - 1] = ObjectIdGetDatum(InvalidOid);
-		nulls[Anum_pg_statistic_staop1 + slotnum - 1] = false;
-		values[Anum_pg_statistic_stacoll1 + slotnum - 1] = ObjectIdGetDatum(InvalidOid);
-		nulls[Anum_pg_statistic_stacoll1 + slotnum - 1] = false;
-	}
-}
-
 /*
  * Delete statistics for the given attribute.
  */
diff --git a/src/backend/statistics/stat_utils.c b/src/backend/statistics/stat_utils.c
index 0c139bf43a7..1a7c6b024a1 100644
--- a/src/backend/statistics/stat_utils.c
+++ b/src/backend/statistics/stat_utils.c
@@ -21,9 +21,12 @@
 #include "catalog/index.h"
 #include "catalog/namespace.h"
 #include "catalog/pg_class.h"
+#include "catalog/pg_collation.h"
 #include "catalog/pg_database.h"
+#include "catalog/pg_statistic.h"
 #include "funcapi.h"
 #include "miscadmin.h"
+#include "nodes/nodeFuncs.h"
 #include "statistics/stat_utils.h"
 #include "storage/lmgr.h"
 #include "utils/acl.h"
@@ -33,6 +36,13 @@
 #include "utils/rel.h"
 #include "utils/syscache.h"
 
+
+#define DEFAULT_STATATT_NULL_FRAC      Float4GetDatum(0.0)
+#define DEFAULT_STATATT_AVG_WIDTH      Int32GetDatum(0) /* unknown */
+#define DEFAULT_STATATT_N_DISTINCT     Float4GetDatum(0.0)	/* unknown */
+
+static Node *get_attr_expr(Relation rel, int attnum);
+
 /*
  * Ensure that a given argument is not null.
  */
@@ -365,3 +375,365 @@ stats_fill_fcinfo_from_arg_pairs(FunctionCallInfo pairs_fcinfo,
 
 	return result;
 }
+
+/*
+ * If this relation is an index and that index has expressions in it, and
+ * the attnum specified is known to be an expression, then we must walk
+ * the list attributes up to the specified attnum to get the right
+ * expression.
+ */
+static Node *
+get_attr_expr(Relation rel, int attnum)
+{
+	List	   *index_exprs;
+	ListCell   *indexpr_item;
+
+	/* relation is not an index */
+	if (rel->rd_rel->relkind != RELKIND_INDEX &&
+		rel->rd_rel->relkind != RELKIND_PARTITIONED_INDEX)
+		return NULL;
+
+	index_exprs = RelationGetIndexExpressions(rel);
+
+	/* index has no expressions to give */
+	if (index_exprs == NIL)
+		return NULL;
+
+	/*
+	 * The index attnum points directly to a relation attnum, then it's not an
+	 * expression attribute.
+	 */
+	if (rel->rd_index->indkey.values[attnum - 1] != 0)
+		return NULL;
+
+	indexpr_item = list_head(rel->rd_indexprs);
+
+	for (int i = 0; i < attnum - 1; i++)
+		if (rel->rd_index->indkey.values[i] == 0)
+			indexpr_item = lnext(rel->rd_indexprs, indexpr_item);
+
+	if (indexpr_item == NULL)	/* shouldn't happen */
+		elog(ERROR, "too few entries in indexprs list");
+
+	return (Node *) lfirst(indexpr_item);
+}
+
+/*
+ * Derive type information from the attribute.
+ *
+ * This is needed for setting most slot statistics for all data types.
+ *
+ * This duplicates the logic in examine_attribute() but it will not skip the
+ * attribute if the attstattarget is 0.
+ *
+ * The information fetched here is a prerequisite to calling
+ * the other statatt_*() functions.
+ */
+void
+statatt_get_type(Oid reloid, AttrNumber attnum,
+				 Oid *atttypid, int32 *atttypmod,
+				 char *atttyptype, Oid *atttypcoll,
+				 Oid *eq_opr, Oid *lt_opr)
+{
+	Relation	rel = relation_open(reloid, AccessShareLock);
+	Form_pg_attribute attr;
+	HeapTuple	atup;
+	Node	   *expr;
+	TypeCacheEntry *typcache;
+
+	atup = SearchSysCache2(ATTNUM, ObjectIdGetDatum(reloid),
+						   Int16GetDatum(attnum));
+
+	/* Attribute not found */
+	if (!HeapTupleIsValid(atup))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_COLUMN),
+				 errmsg("column %d of relation \"%s\" does not exist",
+						attnum, RelationGetRelationName(rel))));
+
+	attr = (Form_pg_attribute) GETSTRUCT(atup);
+
+	if (attr->attisdropped)
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_COLUMN),
+				 errmsg("column %d of relation \"%s\" does not exist",
+						attnum, RelationGetRelationName(rel))));
+
+	expr = get_attr_expr(rel, attr->attnum);
+
+	/*
+	 * When analyzing an expression index, believe the expression tree's type
+	 * not the column datatype --- the latter might be the opckeytype storage
+	 * type of the opclass, which is not interesting for our purposes. This
+	 * mimics the behavior of examine_attribute().
+	 */
+	if (expr == NULL)
+	{
+		*atttypid = attr->atttypid;
+		*atttypmod = attr->atttypmod;
+		*atttypcoll = attr->attcollation;
+	}
+	else
+	{
+		*atttypid = exprType(expr);
+		*atttypmod = exprTypmod(expr);
+
+		if (OidIsValid(attr->attcollation))
+			*atttypcoll = attr->attcollation;
+		else
+			*atttypcoll = exprCollation(expr);
+	}
+	ReleaseSysCache(atup);
+
+	/*
+	 * If it's a multirange, step down to the range type, as is done by
+	 * multirange_typanalyze().
+	 */
+	if (type_is_multirange(*atttypid))
+		*atttypid = get_multirange_range(*atttypid);
+
+	/* finds the right operators even if atttypid is a domain */
+	typcache = lookup_type_cache(*atttypid, TYPECACHE_LT_OPR | TYPECACHE_EQ_OPR);
+	*atttyptype = typcache->typtype;
+	*eq_opr = typcache->eq_opr;
+	*lt_opr = typcache->lt_opr;
+
+	/*
+	 * Special case: collation for tsvector is DEFAULT_COLLATION_OID. See
+	 * compute_tsvector_stats().
+	 */
+	if (*atttypid == TSVECTOROID)
+		*atttypcoll = DEFAULT_COLLATION_OID;
+
+	relation_close(rel, NoLock);
+}
+
+/*
+ * Derive element type information from the attribute type. This information
+ * is needed when the given type is one that contains elements of other types.
+ *
+ * The atttypid and atttyptype should be derived from a previous call to
+ * statatt_get_type().
+ */
+bool
+statatt_get_elem_type(Oid atttypid, char atttyptype,
+					  Oid *elemtypid, Oid *elem_eq_opr)
+{
+	TypeCacheEntry *elemtypcache;
+
+	if (atttypid == TSVECTOROID)
+	{
+		/*
+		 * Special case: element type for tsvector is text. See
+		 * compute_tsvector_stats().
+		 */
+		*elemtypid = TEXTOID;
+	}
+	else
+	{
+		/* find underlying element type through any domain */
+		*elemtypid = get_base_element_type(atttypid);
+	}
+
+	if (!OidIsValid(*elemtypid))
+		return false;
+
+	/* finds the right operator even if elemtypid is a domain */
+	elemtypcache = lookup_type_cache(*elemtypid, TYPECACHE_EQ_OPR);
+	if (!OidIsValid(elemtypcache->eq_opr))
+		return false;
+
+	*elem_eq_opr = elemtypcache->eq_opr;
+
+	return true;
+}
+
+/*
+ * Cast a text datum into an array with element type elemtypid.
+ *
+ * The typid and typmod should be derived from a previous call to
+ * statatt_get_type().
+ *
+ * If an error is encountered, capture it and re-throw a WARNING, and set ok
+ * to false. If the resulting array contains NULLs, raise a WARNING and set ok
+ * to false. Otherwise, set ok to true.
+ */
+Datum
+text_to_stavalues(const char *staname, FmgrInfo *array_in, Datum d, Oid typid,
+				  int32 typmod, bool *ok)
+{
+	LOCAL_FCINFO(fcinfo, 8);
+	char	   *s;
+	Datum		result;
+	ErrorSaveContext escontext = {T_ErrorSaveContext};
+
+	escontext.details_wanted = true;
+
+	s = TextDatumGetCString(d);
+
+	InitFunctionCallInfoData(*fcinfo, array_in, 3, InvalidOid,
+							 (Node *) &escontext, NULL);
+
+	fcinfo->args[0].value = CStringGetDatum(s);
+	fcinfo->args[0].isnull = false;
+	fcinfo->args[1].value = ObjectIdGetDatum(typid);
+	fcinfo->args[1].isnull = false;
+	fcinfo->args[2].value = Int32GetDatum(typmod);
+	fcinfo->args[2].isnull = false;
+
+	result = FunctionCallInvoke(fcinfo);
+
+	pfree(s);
+
+	if (escontext.error_occurred)
+	{
+		escontext.error_data->elevel = WARNING;
+		ThrowErrorData(escontext.error_data);
+		*ok = false;
+		return (Datum) 0;
+	}
+
+	if (array_contains_nulls(DatumGetArrayTypeP(result)))
+	{
+		ereport(WARNING,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("\"%s\" array must not contain null values", staname)));
+		*ok = false;
+		return (Datum) 0;
+	}
+
+	*ok = true;
+
+	return result;
+}
+
+/*
+ * Find and update the slot with the given stakind, or use the first empty
+ * slot.
+ *
+ * Core statistics types expect the stakind value must be one of the
+ * STATISTIC_KIND_* constants defined in pg_statistic.h, but types defined by
+ * extensions are not restricted to those values.
+ *
+ * In the case of core statistics, the required staop is determined by the
+ * stakind given and will either be a hardcoded oid, or will be the eq/lt
+ * operator derived from statatt_get_type(). Likewise, types defined by
+ * extensions have no such restriction.
+ *
+ * The stacoll value will either be the atttypcoll derived from
+ * statatt_get_type() or a harcoded value required by that particular stakind.
+ *
+ * The value/null pairs for stanumbers and stavalues will have been calculated
+ * based on the stakind given.
+ */
+void
+statatt_set_slot(Datum *values, bool *nulls, bool *replaces,
+				 int16 stakind, Oid staop, Oid stacoll,
+				 Datum stanumbers, bool stanumbers_isnull,
+				 Datum stavalues, bool stavalues_isnull)
+{
+	int			slotidx;
+	int			first_empty = -1;
+	AttrNumber	stakind_attnum;
+	AttrNumber	staop_attnum;
+	AttrNumber	stacoll_attnum;
+
+	/* find existing slot with given stakind */
+	for (slotidx = 0; slotidx < STATISTIC_NUM_SLOTS; slotidx++)
+	{
+		stakind_attnum = Anum_pg_statistic_stakind1 - 1 + slotidx;
+
+		if (first_empty < 0 &&
+			DatumGetInt16(values[stakind_attnum]) == 0)
+			first_empty = slotidx;
+		if (DatumGetInt16(values[stakind_attnum]) == stakind)
+			break;
+	}
+
+	if (slotidx >= STATISTIC_NUM_SLOTS && first_empty >= 0)
+		slotidx = first_empty;
+
+	if (slotidx >= STATISTIC_NUM_SLOTS)
+		ereport(ERROR,
+				(errmsg("maximum number of statistics slots exceeded: %d",
+						slotidx + 1)));
+
+	stakind_attnum = Anum_pg_statistic_stakind1 - 1 + slotidx;
+	staop_attnum = Anum_pg_statistic_staop1 - 1 + slotidx;
+	stacoll_attnum = Anum_pg_statistic_stacoll1 - 1 + slotidx;
+
+	if (DatumGetInt16(values[stakind_attnum]) != stakind)
+	{
+		values[stakind_attnum] = Int16GetDatum(stakind);
+		replaces[stakind_attnum] = true;
+	}
+	if (DatumGetObjectId(values[staop_attnum]) != staop)
+	{
+		values[staop_attnum] = ObjectIdGetDatum(staop);
+		replaces[staop_attnum] = true;
+	}
+	if (DatumGetObjectId(values[stacoll_attnum]) != stacoll)
+	{
+		values[stacoll_attnum] = ObjectIdGetDatum(stacoll);
+		replaces[stacoll_attnum] = true;
+	}
+	if (!stanumbers_isnull)
+	{
+		values[Anum_pg_statistic_stanumbers1 - 1 + slotidx] = stanumbers;
+		nulls[Anum_pg_statistic_stanumbers1 - 1 + slotidx] = false;
+		replaces[Anum_pg_statistic_stanumbers1 - 1 + slotidx] = true;
+	}
+	if (!stavalues_isnull)
+	{
+		values[Anum_pg_statistic_stavalues1 - 1 + slotidx] = stavalues;
+		nulls[Anum_pg_statistic_stavalues1 - 1 + slotidx] = false;
+		replaces[Anum_pg_statistic_stavalues1 - 1 + slotidx] = true;
+	}
+}
+
+/*
+ * Initialize values and nulls for a new pg_statistic tuple.
+ *
+ * There are two possible destinations for the tuple created.
+ *
+ * The first is the pg_statistic table, in which case the reloid, attnum,
+ * and inherited flags should all be set.
+ *
+ * The second case is as an element of the stxdexpr array of a
+ * pg_statistic_ext_data tuple, in which case (reloid, attnum, inherited)
+ * should be set to (InvalidOid, InvalidAttrNumber, false).
+ */
+void
+statatt_init_empty_tuple(Oid reloid, int16 attnum, bool inherited,
+						 Datum *values, bool *nulls, bool *replaces)
+{
+	memset(nulls, true, sizeof(bool) * Natts_pg_statistic);
+	memset(replaces, true, sizeof(bool) * Natts_pg_statistic);
+
+	/* must initialize non-NULL attributes */
+
+	values[Anum_pg_statistic_starelid - 1] = ObjectIdGetDatum(reloid);
+	nulls[Anum_pg_statistic_starelid - 1] = false;
+	values[Anum_pg_statistic_staattnum - 1] = Int16GetDatum(attnum);
+	nulls[Anum_pg_statistic_staattnum - 1] = false;
+	values[Anum_pg_statistic_stainherit - 1] = BoolGetDatum(inherited);
+	nulls[Anum_pg_statistic_stainherit - 1] = false;
+
+	values[Anum_pg_statistic_stanullfrac - 1] = DEFAULT_STATATT_NULL_FRAC;
+	nulls[Anum_pg_statistic_stanullfrac - 1] = false;
+	values[Anum_pg_statistic_stawidth - 1] = DEFAULT_STATATT_AVG_WIDTH;
+	nulls[Anum_pg_statistic_stawidth - 1] = false;
+	values[Anum_pg_statistic_stadistinct - 1] = DEFAULT_STATATT_N_DISTINCT;
+	nulls[Anum_pg_statistic_stadistinct - 1] = false;
+
+	/* initialize stakind, staop, and stacoll slots */
+	for (int slotnum = 0; slotnum < STATISTIC_NUM_SLOTS; slotnum++)
+	{
+		values[Anum_pg_statistic_stakind1 + slotnum - 1] = (Datum) 0;
+		nulls[Anum_pg_statistic_stakind1 + slotnum - 1] = false;
+		values[Anum_pg_statistic_staop1 + slotnum - 1] = ObjectIdGetDatum(InvalidOid);
+		nulls[Anum_pg_statistic_staop1 + slotnum - 1] = false;
+		values[Anum_pg_statistic_stacoll1 + slotnum - 1] = ObjectIdGetDatum(InvalidOid);
+		nulls[Anum_pg_statistic_stacoll1 + slotnum - 1] = false;
+	}
+}

base-commit: 80f6e2fb4addb03e2e163a380b5e6e1f4b321286
-- 
2.52.0

