v1-0001-Use-hash-based-matching-for-MCVs-in-ScalarArrayOp.patch
text/x-patch
Filename: v1-0001-Use-hash-based-matching-for-MCVs-in-ScalarArrayOp.patch
Type: text/x-patch
Part: 0
From 57695ce330b758fc1ac165c9c96c0a4dd397b5d0 Mon Sep 17 00:00:00 2001
From: Evdokimov Ilia <ilya.evdokimov@tantorlabs.com>
Date: Mon, 29 Dec 2025 23:28:51 +0300
Subject: [PATCH v1] Use hash-based matching for MCVs in ScalarArrayOpExpr
selectivity
When estimating selectivity for ScalarArrayOpExpr (IN / ANY / ALL) with
available MCV statistics, the planner currently matches IN-list elements
against the MCV array using a nested loop, resulting in O(N*M) behavior
for large IN-lists and MCV lists.
Introduce a hash-based matching path, analogous to the one used for MCV
matching in join selectivity estimation. Build a hash table over the MCV
values and probe it for each IN-list element, avoiding repeated linear
scans of the MCV array.
The hash table is built over the MCV list rather than the IN-list, since
MCVs are guaranteed to be distinct and non-NULL, while IN-lists may
contain duplicates, NULLs, or non-Const expressions.
The hash-based path is enabled only when both a sufficiently large
IN-list and an MCV list are present, and suitable hash functions exist
for the equality operator.
---
src/backend/utils/adt/selfuncs.c | 390 ++++++++++++++++++++++++++++++-
src/tools/pgindent/typedefs.list | 3 +
2 files changed, 392 insertions(+), 1 deletion(-)
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index a996f0c4939..fcdf3b7ac1e 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -146,7 +146,7 @@
/*
* In production builds, switch to hash-based MCV matching when the lists are
* large enough to amortize hash setup cost. (This threshold is compared to
- * the sum of the lengths of the two MCV lists. This is simplistic but seems
+ * the sum of the lengths of the two lists. This is simplistic but seems
* to work well enough.) In debug builds, we use a smaller threshold so that
* the regression tests cover both paths well.
*/
@@ -156,6 +156,12 @@
#define EQJOINSEL_MCV_HASH_THRESHOLD 20
#endif
+#ifndef USE_ASSERT_CHECKING
+#define SCALARARRAY_MCV_HASH_THRESHOLD 200
+#else
+#define SCALARARRAY_MCV_HASH_THRESHOLD 20
+#endif
+
/* Entries in the simplehash hash table used by eqjoinsel_find_matches */
typedef struct MCVHashEntry
{
@@ -176,14 +182,40 @@ typedef struct MCVHashContext
int16 hash_typlen; /* typlen of hashed data type */
} MCVHashContext;
+/* Entries in the simplehash hash table used by scalararray_mcv_hash_match */
+typedef struct MCVInHashEntry
+{
+ Datum value; /* the value represented by this entry */
+ int index; /* its index in the relevant AttStatsSlot */
+ uint32 hash; /* hash code for the Datum */
+ char status; /* status code used by simplehash.h */
+} MCVInHashEntry;
+
+/* private_data for the simplehash hash table */
+typedef struct MCVInHashContext
+{
+ FunctionCallInfo equal_fcinfo; /* the equality join operator */
+ FunctionCallInfo hash_fcinfo; /* the hash function to use */
+ bool insert_mode; /* doing inserts or lookups? */
+ bool hash_typbyval; /* typbyval of hashed data type */
+ int16 hash_typlen; /* typlen of hashed data type */
+} MCVInHashContext;
+
/* forward reference */
typedef struct MCVHashTable_hash MCVHashTable_hash;
+typedef struct MCVInHashTable_hash MCVInHashTable_hash;
/* Hooks for plugins to get control when we ask for stats */
get_relation_stats_hook_type get_relation_stats_hook = NULL;
get_index_stats_hook_type get_index_stats_hook = NULL;
static double eqsel_internal(PG_FUNCTION_ARGS, bool negate);
+static double scalararray_mcv_hash_match(VariableStatData *vardata, Oid operator, Oid collation,
+ Node *other_op, bool var_on_left, Datum *elem_values,
+ bool *elem_nulls, int num_elems,
+ bool useOr, bool isEquality, bool isInequality);
+static uint32 hash_mcv_in(MCVInHashTable_hash *tab, Datum key);
+static bool mcvs_in_equal(MCVInHashTable_hash *tab, Datum key0, Datum key1);
static double eqjoinsel_inner(FmgrInfo *eqproc, Oid collation,
Oid hashLeft, Oid hashRight,
VariableStatData *vardata1, VariableStatData *vardata2,
@@ -287,6 +319,19 @@ static double btcost_correlation(IndexOptInfo *index,
#define SH_DECLARE
#include "lib/simplehash.h"
+#define SH_PREFIX MCVInHashTable
+#define SH_ELEMENT_TYPE MCVInHashEntry
+#define SH_KEY_TYPE Datum
+#define SH_KEY value
+#define SH_HASH_KEY(tab,key) hash_mcv_in(tab, key)
+#define SH_EQUAL(tab,key0,key1) mcvs_in_equal(tab, key0, key1)
+#define SH_SCOPE static inline
+#define SH_STORE_HASH
+#define SH_GET_HASH(tab,ent) (ent)->hash
+#define SH_DEFINE
+#define SH_DECLARE
+#include "lib/simplehash.h"
+
/*
* eqsel - Selectivity of "=" for any data types.
@@ -2025,6 +2070,35 @@ scalararraysel(PlannerInfo *root,
elmlen, elmbyval, elmalign,
&elem_values, &elem_nulls, &num_elems);
+ /*
+ * Try to calculate selectivity by hash-search O(N) instead of O(N^2)
+ * in case of MCV matching.
+ */
+ if ((isEquality || isInequality) && !is_join_clause)
+ {
+ VariableStatData vardata;
+ Node *other_op = NULL;
+ bool var_on_left;
+
+ /*
+ * If expression is not variable = something or something =
+ * variable, then punt and return a default estimate.
+ */
+ if (get_restriction_variable(root, clause->args, varRelid,
+ &vardata, &other_op, &var_on_left))
+ {
+ s1 = scalararray_mcv_hash_match(&vardata, operator, clause->inputcollid,
+ other_op, var_on_left, elem_values,
+ elem_nulls, num_elems,
+ useOr, isEquality, isInequality);
+
+ ReleaseVariableStats(vardata);
+
+ if (s1 >= 0.0)
+ return s1;
+ }
+ }
+
/*
* For generic operators, we assume the probability of success is
* independent for each array element. But for "= ANY" or "<> ALL",
@@ -2210,6 +2284,320 @@ scalararraysel(PlannerInfo *root,
return s1;
}
+/*
+ * Estimate selectivity of a ScalarArrayOpExpr using MCV statistics and,
+ * when beneficial, a hash table for efficient matching.
+ *
+ * The function evaluates selectivity by comparing each element of the
+ * IN-list against the column's most-common values (MCVs).
+ *
+ * Inputs:
+ * vardata: statistics and metadata for the variable being estimated
+ * operator: equality or inequality operator to apply
+ * collation: OID of collation to use
+ * other_op: expression for the non-variable side of the comparison
+ * var_on_left: true if the variable is on the left side of the operator
+ * elem_values: array of IN-list element values
+ * elem_nulls: array indicating which IN-list elements are NULL
+ * num_elems: number of IN-list elements
+ * useOr: true if elements are combined using OR semantics, false for AND
+ * isEquality: true if the operator is equality
+ * isInequality: true if the operator is inequality
+ *
+ * Result:
+ * Returns a selectivity estimate in the range [0.0, 1.0], or -1.0 if the
+ * selectivity cannot be reliably estimated by this function.
+ */
+static double
+scalararray_mcv_hash_match(VariableStatData *vardata, Oid operator, Oid collation,
+ Node *other_op, bool var_on_left, Datum *elem_values,
+ bool *elem_nulls, int num_elems,
+ bool useOr, bool isEquality, bool isInequality)
+{
+ Form_pg_statistic stats;
+ AttStatsSlot sslot;
+ FmgrInfo eqproc;
+ double selec = -1.0,
+ s1disjoint,
+ nullfrac = 0.0;
+ Oid hashLeft = InvalidOid,
+ hashRight = InvalidOid,
+ opfuncoid;
+ bool have_mcvs = false;
+
+ Assert(isEquality || isInequality);
+
+ /*
+ * If we matched the var to a unique index, DISTINCT or GROUP-BY clause,
+ * assume there is exactly one match regardless of anything else. (This
+ * is slightly bogus, since the index or clause's equality operator might
+ * be different from ours, but it's much more likely to be right than
+ * ignoring the information.)
+ */
+ if (vardata->isunique && vardata->rel && vardata->rel->tuples >= 1.0)
+ return -1.0;
+
+ /*
+ * When asked about <>, we do the estimation using the corresponding =
+ * operator.
+ */
+ if (isInequality)
+ {
+ operator = get_negator(operator);
+ if (!OidIsValid(operator))
+ return -1.0;
+ }
+
+ opfuncoid = get_opcode(operator);
+
+ memset(&sslot, 0, sizeof(sslot));
+
+ if (HeapTupleIsValid(vardata->statsTuple))
+ {
+ if (statistic_proc_security_check(vardata, opfuncoid))
+ have_mcvs = get_attstatsslot(&sslot, vardata->statsTuple,
+ STATISTIC_KIND_MCV, InvalidOid,
+ ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS);
+ }
+
+ if (have_mcvs)
+ {
+ fmgr_info(opfuncoid, &eqproc);
+
+ /*
+ * If the combined size of the MCV list and the IN-list is large
+ * enough to justify hashing, attempt to look up hash functions for
+ * the operator.
+ */
+ if (sslot.nvalues + num_elems >= SCALARARRAY_MCV_HASH_THRESHOLD)
+ (void) get_op_hash_functions(operator, &hashLeft, &hashRight);
+ }
+
+ if (have_mcvs && OidIsValid(hashLeft) && OidIsValid(hashRight))
+ {
+ /* Use a hash table to speed up the matching */
+ LOCAL_FCINFO(fcinfo, 2);
+ LOCAL_FCINFO(hash_fcinfo, 1);
+ MCVInHashTable_hash *hashTable;
+ FmgrInfo hash_proc;
+ MCVInHashContext hashContext;
+ double sumallcommon = 0.0,
+ remaining_selec = 0.0;
+ bool isdefault;
+ double otherdistinct;
+
+ /* Grab the nullfrac for use below. */
+ stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
+ nullfrac = stats->stanullfrac;
+
+ selec = s1disjoint = (useOr ? 0.0 : 1.0);
+
+ InitFunctionCallInfoData(*fcinfo, &eqproc, 2, collation,
+ NULL, NULL);
+ fcinfo->args[0].isnull = false;
+ fcinfo->args[1].isnull = false;
+
+ /*
+ * Build the hash table using the MCV array as keys, rather than the
+ * IN-list elements.
+ *
+ * The MCV list is guaranteed to contain distinct values. Hashing the
+ * MCVs allows us to efficiently identify whether a given IN-list
+ * element matches a most-common value, which is exactly what is
+ * needed for accurate selectivity estimation.
+ */
+ fmgr_info(hashLeft, &hash_proc);
+ InitFunctionCallInfoData(*hash_fcinfo, &hash_proc, 1, collation,
+ NULL, NULL);
+ hash_fcinfo->args[0].isnull = false;
+
+ hashContext.equal_fcinfo = fcinfo;
+ hashContext.hash_fcinfo = hash_fcinfo;
+ hashContext.insert_mode = true;
+
+ get_typlenbyval(sslot.valuetype,
+ &hashContext.hash_typlen,
+ &hashContext.hash_typbyval);
+
+ hashTable = MCVInHashTable_create(CurrentMemoryContext,
+ sslot.nvalues,
+ &hashContext);
+
+ for (int i = 0; i < sslot.nvalues; i++)
+ {
+ bool found = false;
+ MCVInHashEntry *entry;
+
+ entry = MCVInHashTable_insert(hashTable, sslot.values[i], &found);
+
+ /*
+ * MCVHashTable_insert will only report "found" if the new value
+ * is equal to some previous one per datum_image_eq(). That
+ * probably shouldn't happen, since we're not expecting duplicates
+ * in the MCV list. If we do find a dup, just ignore it, leaving
+ * the hash entry's index pointing at the first occurrence. That
+ * matches the behavior that the non-hashed code path would have.
+ */
+ if (likely(!found))
+ entry->index = i;
+
+ sumallcommon += sslot.numbers[i];
+ }
+
+ /*
+ * Prepare to probe the hash table. If the probe values are of a
+ * different data type, then we need to change hash functions. (This
+ * code relies on the assumption that since we defined SH_STORE_HASH,
+ * simplehash.h will never need to compute hash values for existing
+ * hash table entries.)
+ */
+ hashContext.insert_mode = false;
+ if (hashLeft != hashRight)
+ {
+
+ fmgr_info(hashRight, &hash_proc);
+ /* Resetting hash_fcinfo is probably unnecessary, but be safe */
+ InitFunctionCallInfoData(*hash_fcinfo, &hash_proc, 1, collation,
+ NULL, NULL);
+ hash_fcinfo->args[0].isnull = false;
+ }
+
+ /*
+ * Comparison is against a constant that is neither NULL nor any of
+ * the common values. Its selectivity cannot be more than this:
+ */
+ remaining_selec = 1.0 - sumallcommon - nullfrac;
+ CLAMP_PROBABILITY(remaining_selec);
+
+ /*
+ * and in fact it's probably a good deal less. We approximate that all
+ * the not-common values share this remaining fraction equally, so we
+ * divide by the number of other distinct values.
+ */
+ otherdistinct = get_variable_numdistinct(vardata, &isdefault) - sslot.nnumbers;
+ if (otherdistinct > 1)
+ remaining_selec /= otherdistinct;
+
+ /*
+ * Another cross-check: selectivity shouldn't be estimated as more
+ * than the least common "most common value".
+ */
+ if (sslot.nnumbers > 0 && remaining_selec > sslot.numbers[sslot.nnumbers - 1])
+ remaining_selec = sslot.numbers[sslot.nnumbers - 1];
+
+ /* Evaluate selectivity contribution of each IN-list element. */
+ for (int i = 0; i < num_elems; i++)
+ {
+ MCVInHashEntry *entry;
+ Selectivity s1;
+
+ /*
+ * If the constant is NULL, assume operator is strict and return
+ * s1 zero. (It's zero even for a negator op.)
+ */
+ if (elem_nulls[i])
+ continue;
+
+ if (IsA(other_op, Const))
+ {
+ entry = MCVInHashTable_lookup(hashTable, elem_values[i]);
+
+ if (entry != NULL)
+ {
+ /*
+ * As in the other code path, skip already-matched hash
+ * entries
+ */
+ s1 = sslot.numbers[entry->index];
+ }
+ else
+ s1 = remaining_selec;
+
+ if (isInequality)
+ s1 = 1.0 - s1 - nullfrac;
+ }
+ else
+ s1 = var_eq_non_const(vardata, operator, collation, other_op,
+ var_on_left, isInequality);
+
+ CLAMP_PROBABILITY(s1);
+
+ if (useOr)
+ {
+ selec = selec + s1 - selec * s1;
+ if (isEquality)
+ s1disjoint += s1;
+ }
+ else
+ {
+ selec = selec * s1;
+ if (isInequality)
+ s1disjoint += s1 - 1.0;
+ }
+ }
+
+ MCVInHashTable_destroy(hashTable);
+
+ /* accept disjoint-probability estimate if in range */
+ if ((useOr ? isEquality : isInequality) &&
+ s1disjoint >= 0.0 && s1disjoint <= 1.0)
+ selec = s1disjoint;
+
+ CLAMP_PROBABILITY(selec);
+ }
+
+ free_attstatsslot(&sslot);
+
+ return selec;
+}
+
+/*
+ * Support functions for the hash tables used by eqjoinsel_find_matches
+ */
+static uint32
+hash_mcv_in(MCVInHashTable_hash *tab, Datum key)
+{
+ MCVInHashContext *context = (MCVInHashContext *) tab->private_data;
+ FunctionCallInfo fcinfo = context->hash_fcinfo;
+ Datum fresult;
+
+ fcinfo->args[0].value = key;
+ fcinfo->isnull = false;
+ fresult = FunctionCallInvoke(fcinfo);
+ Assert(!fcinfo->isnull);
+ return DatumGetUInt32(fresult);
+}
+
+static bool
+mcvs_in_equal(MCVInHashTable_hash *tab, Datum key0, Datum key1)
+{
+ MCVInHashContext *context = (MCVInHashContext *) tab->private_data;
+
+ if (context->insert_mode)
+ {
+ /*
+ * During the insertion step, any comparisons will be between two
+ * Datums of the hash table's data type, so if the given operator is
+ * cross-type it will be the wrong thing to use. Fortunately, we can
+ * use datum_image_eq instead. The MCV values should all be distinct
+ * anyway, so it's mostly pro-forma to compare them at all.
+ */
+ return datum_image_eq(key0, key1,
+ context->hash_typbyval, context->hash_typlen);
+ }
+ else
+ {
+ FunctionCallInfo fcinfo = context->equal_fcinfo;
+ Datum fresult;
+
+ fcinfo->args[0].value = key0;
+ fcinfo->args[1].value = key1;
+ fcinfo->isnull = false;
+ fresult = FunctionCallInvoke(fcinfo);
+ return (!fcinfo->isnull && DatumGetBool(fresult));
+ }
+}
+
/*
* Estimate number of elements in the array yielded by an expression.
*
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index ceb3fc5d980..17f5cc9476b 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -1666,6 +1666,9 @@ MBuf
MCVHashContext
MCVHashEntry
MCVHashTable_hash
+MCVInHashContext
+MCVInHashEntry
+MCVInHashTable_hash
MCVItem
MCVList
MEMORY_BASIC_INFORMATION
--
2.34.1