v2-0001-Optimize-selectivity-estimation-for-Var-Var-clauses.patch
text/x-patch
Filename: v2-0001-Optimize-selectivity-estimation-for-Var-Var-clauses.patch
Type: text/x-patch
Part: 0
From 56cfbc9f93b69f877d7e9b350bfbce51c1908802 Mon Sep 17 00:00:00 2001
From: Ilia Evdokimov <ilya.evdokimov@tantorlabs.ru>
Date: Mon, 28 Jul 2025 13:34:05 +0300
Subject: [PATCH v2] Optimize selectivity estimation for Var = Var clauses
using merge-based MCV comparison
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Previously, selectivity estimation for join and semi-join clauses of the form
Var = Var relied on a nested-loop comparison of MCV lists, resulting in O(N^2)
behavior with respect to the number of common values.
This patch introduces a merge-based strategy when the underlying type supports
BTREE ordering. In such cases, the MCVs are sorted using the datatype-specific
comparison operator, and selectivity is estimated via linear-time merge scan.
---
src/backend/utils/adt/selfuncs.c | 302 ++++++++++++++++++++++------
src/backend/utils/cache/lsyscache.c | 61 ++++++
src/include/utils/lsyscache.h | 1 +
src/include/utils/selfuncs.h | 8 +
4 files changed, 312 insertions(+), 60 deletions(-)
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index 17fbfa9b410..6bcec4d0e59 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -217,6 +217,7 @@ static bool get_actual_variable_endpoint(Relation heapRel,
static RelOptInfo *find_join_input_rel(PlannerInfo *root, Relids relids);
static double btcost_correlation(IndexOptInfo *index,
VariableStatData *vardata);
+static int compare_mcv_items(const void *a, const void *b, void *arg);
/*
@@ -2463,7 +2464,6 @@ eqjoinsel_inner(Oid opfuncoid, Oid collation,
* of Wisconsin, Madison, March 1991 (available from ftp.cs.wisc.edu).
*/
LOCAL_FCINFO(fcinfo, 2);
- FmgrInfo eqproc;
bool *hasmatch1;
bool *hasmatch2;
double nullfrac1 = stats1->stanullfrac;
@@ -2479,52 +2479,134 @@ eqjoinsel_inner(Oid opfuncoid, Oid collation,
totalsel2;
int i,
nmatches;
-
- fmgr_info(opfuncoid, &eqproc);
-
- /*
- * Save a few cycles by setting up the fcinfo struct just once. Using
- * FunctionCallInvoke directly also avoids failure if the eqproc
- * returns NULL, though really equality functions should never do
- * that.
- */
- InitFunctionCallInfoData(*fcinfo, &eqproc, 2, collation,
- NULL, NULL);
- fcinfo->args[0].isnull = false;
- fcinfo->args[1].isnull = false;
+ Oid compare_func_oid = InvalidOid;
hasmatch1 = (bool *) palloc0(sslot1->nvalues * sizeof(bool));
hasmatch2 = (bool *) palloc0(sslot2->nvalues * sizeof(bool));
/*
* Note we assume that each MCV will match at most one member of the
- * other MCV list. If the operator isn't really equality, there could
- * be multiple matches --- but we don't look for them, both for speed
- * and because the math wouldn't add up...
+ * other MCV list.
*/
matchprodfreq = 0.0;
nmatches = 0;
- for (i = 0; i < sslot1->nvalues; i++)
+ if (get_comparison_op_for_sort(opfuncoid, &compare_func_oid))
{
- int j;
+ LOCAL_FCINFO(merge_fcinfo, 2);
+ FmgrInfo cmpproc;
+ McvItem *mcvs1,
+ *mcvs2;
+ int ptr1,
+ ptr2;
+
+ fmgr_info(compare_func_oid, &cmpproc);
+
+ InitFunctionCallInfoData(*merge_fcinfo, &cmpproc, 2,
+ collation, NULL, NULL);
- fcinfo->args[0].value = sslot1->values[i];
+ mcvs1 = (McvItem *) palloc(sslot1->nvalues * sizeof(McvItem));
+ mcvs2 = (McvItem *) palloc(sslot2->nvalues * sizeof(McvItem));
- for (j = 0; j < sslot2->nvalues; j++)
+ /*
+ * Copy the MCV values and their frequencies into a temporary array
+ * so we can safely sort them without altering the original statistics.
+ */
+ for (i = 0; i < sslot1->nvalues; i++)
{
- Datum fresult;
+ mcvs1[i].value = sslot1->values[i];
+ mcvs1[i].original_idx = i;
+ mcvs1[i].frequency = sslot1->numbers[i];
+ }
- if (hasmatch2[j])
- continue;
- fcinfo->args[1].value = sslot2->values[j];
- fcinfo->isnull = false;
- fresult = FunctionCallInvoke(fcinfo);
- if (!fcinfo->isnull && DatumGetBool(fresult))
- {
- hasmatch1[i] = hasmatch2[j] = true;
- matchprodfreq += sslot1->numbers[i] * sslot2->numbers[j];
+ for (i = 0; i < sslot2->nvalues; i++)
+ {
+ mcvs2[i].value = sslot2->values[i];
+ mcvs2[i].original_idx = i;
+ mcvs2[i].frequency = sslot2->numbers[i];
+ }
+
+ qsort_arg(mcvs1, sslot1->nvalues, sizeof(McvItem), compare_mcv_items, &merge_fcinfo);
+ qsort_arg(mcvs2, sslot2->nvalues, sizeof(McvItem), compare_mcv_items, &merge_fcinfo);
+
+ ptr1 = ptr2 = 0;
+ while (ptr1 < sslot1->nvalues && ptr2 < sslot2->nvalues)
+ {
+ McvItem *item1 = &mcvs1[ptr1];
+ McvItem *item2 = &mcvs2[ptr2];
+ Datum result_datum;
+ int cmp_result;
+
+ merge_fcinfo->args[0].value = item1->value;
+ merge_fcinfo->args[0].isnull = false;
+ merge_fcinfo->args[1].value = item2->value;
+ merge_fcinfo->args[1].isnull = false;
+
+ result_datum = FunctionCallInvoke(merge_fcinfo);
+ cmp_result = DatumGetInt32(result_datum);
+
+ if (cmp_result == 0)
+ {
+ hasmatch1[item1->original_idx] = true;
+ hasmatch2[item2->original_idx] = true;
+
+ matchprodfreq += item1->frequency * item2->frequency;
nmatches++;
- break;
+
+ ptr1++;
+ ptr2++;
+ }
+ else if (cmp_result < 0)
+ ptr1++;
+ else
+ ptr2++;
+ }
+
+ pfree(mcvs1);
+ pfree(mcvs2);
+ }
+ else
+ {
+ FmgrInfo eqproc;
+
+ fmgr_info(opfuncoid, &eqproc);
+ /*
+ * Save a few cycles by setting up the fcinfo struct just once. Using
+ * FunctionCallInvoke directly also avoids failure if the eqproc
+ * returns NULL, though really equality functions should never do
+ * that.
+ */
+ InitFunctionCallInfoData(*fcinfo, &eqproc, 2, collation,
+ NULL, NULL);
+ fcinfo->args[0].isnull = false;
+ fcinfo->args[1].isnull = false;
+
+ /*
+ * If the operator isn't really equality, there could
+ * be multiple matches --- but we don't look for them, both for speed
+ * and because the math wouldn't add up...
+ */
+ for (i = 0; i < sslot1->nvalues; i++)
+ {
+ int j;
+
+ fcinfo->args[0].value = sslot1->values[i];
+
+ for (j = 0; j < sslot2->nvalues; j++)
+ {
+ Datum fresult;
+
+ if (hasmatch2[j])
+ continue;
+ fcinfo->args[1].value = sslot2->values[j];
+ fcinfo->isnull = false;
+ fresult = FunctionCallInvoke(fcinfo);
+ if (!fcinfo->isnull && DatumGetBool(fresult))
+ {
+ hasmatch1[i] = hasmatch2[j] = true;
+ matchprodfreq += sslot1->numbers[i] * sslot2->numbers[j];
+ nmatches++;
+ break;
+ }
}
}
}
@@ -2690,7 +2772,6 @@ eqjoinsel_semi(Oid opfuncoid, Oid collation,
* in a skewed distribution this gives us a big leg up in accuracy.
*/
LOCAL_FCINFO(fcinfo, 2);
- FmgrInfo eqproc;
bool *hasmatch1;
bool *hasmatch2;
double nullfrac1 = stats1->stanullfrac;
@@ -2700,6 +2781,7 @@ eqjoinsel_semi(Oid opfuncoid, Oid collation,
int i,
nmatches,
clamped_nvalues2;
+ Oid compare_func_oid = InvalidOid;
/*
* The clamping above could have resulted in nd2 being less than
@@ -2710,19 +2792,6 @@ eqjoinsel_semi(Oid opfuncoid, Oid collation,
*/
clamped_nvalues2 = Min(sslot2->nvalues, nd2);
- fmgr_info(opfuncoid, &eqproc);
-
- /*
- * Save a few cycles by setting up the fcinfo struct just once. Using
- * FunctionCallInvoke directly also avoids failure if the eqproc
- * returns NULL, though really equality functions should never do
- * that.
- */
- InitFunctionCallInfoData(*fcinfo, &eqproc, 2, collation,
- NULL, NULL);
- fcinfo->args[0].isnull = false;
- fcinfo->args[1].isnull = false;
-
hasmatch1 = (bool *) palloc0(sslot1->nvalues * sizeof(bool));
hasmatch2 = (bool *) palloc0(clamped_nvalues2 * sizeof(bool));
@@ -2733,26 +2802,116 @@ eqjoinsel_semi(Oid opfuncoid, Oid collation,
* and because the math wouldn't add up...
*/
nmatches = 0;
- for (i = 0; i < sslot1->nvalues; i++)
+ if (get_comparison_op_for_sort(opfuncoid, &compare_func_oid))
{
- int j;
+ LOCAL_FCINFO(merge_fcinfo, 2);
+ FmgrInfo cmpproc;
+ McvItem *mcvs1,
+ *mcvs2;
+ int ptr1,
+ ptr2;
- fcinfo->args[0].value = sslot1->values[i];
+ fmgr_info(compare_func_oid, &cmpproc);
+ InitFunctionCallInfoData(*merge_fcinfo, &cmpproc, 2,
+ collation, NULL, NULL);
- for (j = 0; j < clamped_nvalues2; j++)
+ mcvs1 = (McvItem *) palloc(sslot1->nvalues * sizeof(McvItem));
+ mcvs2 = (McvItem *) palloc(clamped_nvalues2 * sizeof(McvItem));
+
+ /*
+ * Copy the MCV values and their frequencies into a temporary array
+ * so we can safely sort them without altering the original statistics.
+ */
+ for (i = 0; i < sslot1->nvalues; i++)
{
- Datum fresult;
+ mcvs1[i].value = sslot1->values[i];
+ mcvs1[i].original_idx = i;
+ mcvs1[i].frequency = sslot1->numbers[i];
+ }
- if (hasmatch2[j])
- continue;
- fcinfo->args[1].value = sslot2->values[j];
- fcinfo->isnull = false;
- fresult = FunctionCallInvoke(fcinfo);
- if (!fcinfo->isnull && DatumGetBool(fresult))
- {
- hasmatch1[i] = hasmatch2[j] = true;
+ for (i = 0; i < clamped_nvalues2; i++)
+ {
+ mcvs2[i].value = sslot2->values[i];
+ mcvs2[i].original_idx = i;
+ mcvs2[i].frequency = sslot2->numbers[i];
+ }
+
+ qsort_arg(mcvs1, sslot1->nvalues, sizeof(McvItem), compare_mcv_items, &merge_fcinfo);
+ qsort_arg(mcvs2, clamped_nvalues2, sizeof(McvItem), compare_mcv_items, &merge_fcinfo);
+
+ ptr1 = ptr2 = 0;
+ while (ptr1 < sslot1->nvalues && ptr2 < clamped_nvalues2)
+ {
+ McvItem *item1 = &mcvs1[ptr1];
+ McvItem *item2 = &mcvs2[ptr2];
+ Datum result_datum;
+ int cmp_result;
+
+ merge_fcinfo->args[0].value = item1->value;
+ merge_fcinfo->args[0].isnull = false;
+ merge_fcinfo->args[1].value = item2->value;
+ merge_fcinfo->args[1].isnull = false;
+
+ result_datum = FunctionCallInvoke(merge_fcinfo);
+ cmp_result = DatumGetInt32(result_datum);
+
+ if (cmp_result == 0)
+ {
+ hasmatch1[item1->original_idx] = true;
+ hasmatch2[item2->original_idx] = true;
+
nmatches++;
- break;
+
+ ptr1++;
+ ptr2++;
+ }
+ else if (cmp_result < 0)
+ ptr1++;
+ else
+ ptr2++;
+ }
+
+ pfree(mcvs1);
+ pfree(mcvs2);
+ }
+ else
+ {
+ FmgrInfo eqproc;
+
+ fmgr_info(opfuncoid, &eqproc);
+ InitFunctionCallInfoData(*fcinfo, &eqproc, 2, collation,
+ NULL, NULL);
+
+ fcinfo->args[0].isnull = false;
+ fcinfo->args[1].isnull = false;
+
+ /*
+ * Save a few cycles by setting up the fcinfo struct just once. Using
+ * FunctionCallInvoke directly also avoids failure if the eqproc
+ * returns NULL, though really equality functions should never do
+ * that.
+ */
+ for (i = 0; i < sslot1->nvalues; i++)
+ {
+ int j;
+
+ fcinfo->args[0].value = sslot1->values[i];
+
+ for (j = 0; j < clamped_nvalues2; j++)
+ {
+ Datum fresult;
+
+ if (hasmatch2[j])
+ continue;
+ fcinfo->args[1].value = sslot2->values[j];
+ fcinfo->isnull = false;
+ fresult = FunctionCallInvoke(fcinfo);
+ if (!fcinfo->isnull && DatumGetBool(fresult))
+ {
+ hasmatch1[i] = hasmatch2[j] = true;
+ nmatches++;
+ break;
+ }
}
}
}
@@ -8753,3 +8912,26 @@ brincostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
*indexPages = index->pages;
}
+
+/*
+ * Comparator for sorting MCV items
+ */
+static int
+compare_mcv_items(const void *a, const void *b, void *arg)
+{
+ FunctionCallInfo fcinfo_comp = *((FunctionCallInfo *) arg);
+ Datum result_datum;
+
+ const McvItem *item1 = (const McvItem *) a;
+ const McvItem *item2 = (const McvItem *) b;
+
+ fcinfo_comp->args[0].value = item1->value;
+ fcinfo_comp->args[0].isnull = false;
+
+ fcinfo_comp->args[1].value = item2->value;
+ fcinfo_comp->args[1].isnull = false;
+
+ result_datum = FunctionCallInvoke(fcinfo_comp);
+
+ return DatumGetInt32(result_datum);
+}
\ No newline at end of file
diff --git a/src/backend/utils/cache/lsyscache.c b/src/backend/utils/cache/lsyscache.c
index c460a72b75d..1a73a964bc4 100644
--- a/src/backend/utils/cache/lsyscache.c
+++ b/src/backend/utils/cache/lsyscache.c
@@ -17,6 +17,7 @@
#include "access/hash.h"
#include "access/htup_details.h"
+#include "access/nbtree.h"
#include "bootstrap/bootstrap.h"
#include "catalog/namespace.h"
#include "catalog/pg_am.h"
@@ -39,6 +40,7 @@
#include "catalog/pg_subscription.h"
#include "catalog/pg_transform.h"
#include "catalog/pg_type.h"
+#include "commands/defrem.h"
#include "miscadmin.h"
#include "nodes/makefuncs.h"
#include "utils/array.h"
@@ -315,6 +317,65 @@ get_ordering_op_properties(Oid opno,
return result;
}
+/*
+ * get_comparison_op_for_sort
+ * Get the OID of a datatype-specific comparison operator
+ * associated with a given equality operator.
+ *
+ * This is typically used in clauses like Var = Var, where both arguments
+ * are of the same type. We look up the corresponding BTORDER_PROC for
+ * the default B-tree opclass of the datatype.
+ *
+ * However, for special cases like array_eq and record_eq, a comparison
+ * operator may exist (btarraycmp, btrecordcmp), but sorting is only
+ * possible if all element or field types are themselves sortable.
+ * Rather than reimplement that logic here, we simply exclude such
+ * cases by checking for btarraycmp and btrecordcmp explicitly.
+ *
+ * Returns false if no suitable comparison operator is found.
+ */
+bool
+get_comparison_op_for_sort(Oid opno, Oid *cmpopno)
+{
+ bool result = false;
+ HeapTuple proctup;
+
+ proctup = SearchSysCache1(PROCOID, ObjectIdGetDatum(opno));
+ if (HeapTupleIsValid(proctup))
+ {
+ Form_pg_proc procform = (Form_pg_proc) GETSTRUCT(proctup);
+
+ /* Only consider binary operators where both arguments are of the same type. */
+ if (procform->pronargs == 2 &&
+ OidIsValid(procform->proargtypes.values[0]) &&
+ OidIsValid(procform->proargtypes.values[1]) &&
+ procform->proargtypes.values[0] == procform->proargtypes.values[1])
+ {
+ Oid type_oid = procform->proargtypes.values[0];
+ Oid opclass_oid = GetDefaultOpClass(type_oid, BTREE_AM_OID);
+
+ if (OidIsValid(opclass_oid))
+ {
+ Oid opfamily_oid = get_opclass_family(opclass_oid);
+
+ *cmpopno = get_opfamily_proc(opfamily_oid, type_oid, type_oid, BTORDER_PROC);
+
+ /*
+ * Skip array_eq and record_eq: their comparison functions exist,
+ * but sorting is only possible if all elements or fields are sortable.
+ */
+ if (OidIsValid(*cmpopno) &&
+ *cmpopno != F_BTARRAYCMP &&
+ *cmpopno != F_BTRECORDCMP)
+ result = true;
+ }
+ }
+ ReleaseSysCache(proctup);
+ }
+
+ return result;
+}
+
/*
* get_equality_op_for_ordering_op
* Get the OID of the datatype-specific equality operator
diff --git a/src/include/utils/lsyscache.h b/src/include/utils/lsyscache.h
index fa7c7e0323b..3ddcca5ac08 100644
--- a/src/include/utils/lsyscache.h
+++ b/src/include/utils/lsyscache.h
@@ -80,6 +80,7 @@ extern Oid get_opfamily_member_for_cmptype(Oid opfamily, Oid lefttype, Oid right
extern bool get_ordering_op_properties(Oid opno,
Oid *opfamily, Oid *opcintype, CompareType *cmptype);
extern Oid get_equality_op_for_ordering_op(Oid opno, bool *reverse);
+extern bool get_comparison_op_for_sort(Oid opno, Oid *cmpopno);
extern Oid get_ordering_op_for_equality_op(Oid opno, bool use_lhs_type);
extern List *get_mergejoin_opfamilies(Oid opno);
extern bool get_compatible_hash_operators(Oid opno,
diff --git a/src/include/utils/selfuncs.h b/src/include/utils/selfuncs.h
index 013049b3098..66505d3b79c 100644
--- a/src/include/utils/selfuncs.h
+++ b/src/include/utils/selfuncs.h
@@ -137,6 +137,14 @@ typedef struct
double num_sa_scans; /* # indexscans from ScalarArrayOpExprs */
} GenericCosts;
+/* Represents an MCV statistic entry */
+typedef struct McvItem
+{
+ Datum value;
+ int original_idx; /* The original index of the item in the array before sorting. */
+ double frequency; /* Frequency of value in statistics */
+} McvItem;
+
/* Hooks for plugins to get control when we ask for stats */
typedef bool (*get_relation_stats_hook_type) (PlannerInfo *root,
RangeTblEntry *rte,
--
2.34.1