v1-0001-Avoid-duplicate-MCV-matching-in-eqjoinsel_semi-an.patch
text/x-patch
Filename: v1-0001-Avoid-duplicate-MCV-matching-in-eqjoinsel_semi-an.patch
Type: text/x-patch
Part: 0
From 71f59bd83e559df6b36720a4592bf3fdd689504a Mon Sep 17 00:00:00 2001
From: Ilia Evdokimov <ilya.evdokimov@tantorlabs.ru>
Date: Mon, 10 Nov 2025 16:11:43 +0300
Subject: [PATCH v1] Avoid duplicate MCV matching in eqjoinsel_semi and
eqjoinsel_inner.
Previously both eqjoinsel_inner() and eqjoinsel_semi() performed identical
O(N^2) loops over MCV lists, even though the semi join case always follows
the inner join case in eqjoinsel(). Now the MCV matching results from
eqjoinsel_inner() are reused in eqjoinsel_semi() when possible (i.e., when
the RHS MCV list is not clamped). This saves redundant computation and
simplifies the code.
Author: Ilia Evdokimov <ilya.evdokimov@tantorlabs.com>
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Reviewed-by: David Geier <geidav.pg@gmail.com>
---
src/backend/utils/adt/selfuncs.c | 36 ++++++++++++++++++++++++++------
1 file changed, 30 insertions(+), 6 deletions(-)
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index cb23ad52782..55cd0486bf9 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -154,7 +154,9 @@ static double eqjoinsel_inner(Oid opfuncoid, Oid collation,
bool isdefault1, bool isdefault2,
AttStatsSlot *sslot1, AttStatsSlot *sslot2,
Form_pg_statistic stats1, Form_pg_statistic stats2,
- bool have_mcvs1, bool have_mcvs2);
+ bool have_mcvs1, bool have_mcvs2,
+ double *matchfreq_mcvs1, double *matchfreq_mcvs2,
+ int *nmatches_mcvs);
static double eqjoinsel_semi(Oid opfuncoid, Oid collation,
VariableStatData *vardata1, VariableStatData *vardata2,
double nd1, double nd2,
@@ -162,6 +164,7 @@ static double eqjoinsel_semi(Oid opfuncoid, Oid collation,
AttStatsSlot *sslot1, AttStatsSlot *sslot2,
Form_pg_statistic stats1, Form_pg_statistic stats2,
bool have_mcvs1, bool have_mcvs2,
+ double matchfreq1, int nmatches,
RelOptInfo *inner_rel);
static bool estimate_multivariate_ndistinct(PlannerInfo *root,
RelOptInfo *rel, List **varinfos, double *ndistinct);
@@ -2313,6 +2316,9 @@ eqjoinsel(PG_FUNCTION_ARGS)
bool get_mcv_stats;
bool join_is_reversed;
RelOptInfo *inner_rel;
+ int nmatches_mcvs = 0;
+ double matchfreq_mcvs1 = 0.0;
+ double matchfreq_mcvs2 = 0.0;
get_join_variables(root, args, sjinfo,
&vardata1, &vardata2, &join_is_reversed);
@@ -2367,7 +2373,9 @@ eqjoinsel(PG_FUNCTION_ARGS)
isdefault1, isdefault2,
&sslot1, &sslot2,
stats1, stats2,
- have_mcvs1, have_mcvs2);
+ have_mcvs1, have_mcvs2,
+ &matchfreq_mcvs1, &matchfreq_mcvs2,
+ &nmatches_mcvs);
switch (sjinfo->jointype)
{
@@ -2395,6 +2403,7 @@ eqjoinsel(PG_FUNCTION_ARGS)
&sslot1, &sslot2,
stats1, stats2,
have_mcvs1, have_mcvs2,
+ matchfreq_mcvs1, nmatches_mcvs,
inner_rel);
else
{
@@ -2408,6 +2417,7 @@ eqjoinsel(PG_FUNCTION_ARGS)
&sslot2, &sslot1,
stats2, stats1,
have_mcvs2, have_mcvs1,
+ matchfreq_mcvs2, nmatches_mcvs,
inner_rel);
}
@@ -2455,7 +2465,9 @@ eqjoinsel_inner(Oid opfuncoid, Oid collation,
bool isdefault1, bool isdefault2,
AttStatsSlot *sslot1, AttStatsSlot *sslot2,
Form_pg_statistic stats1, Form_pg_statistic stats2,
- bool have_mcvs1, bool have_mcvs2)
+ bool have_mcvs1, bool have_mcvs2,
+ double *matchfreq_mcvs1, double *matchfreq_mcvs2,
+ int *nmatches_mcvs)
{
double selec;
@@ -2595,6 +2607,11 @@ eqjoinsel_inner(Oid opfuncoid, Oid collation,
totalsel2 += otherfreq2 * (otherfreq1 + unmatchfreq1) /
(nd1 - nmatches);
+ /* Save MCV match statistics for possible reuse by eqjoinsel_semi() */
+ *matchfreq_mcvs1 = matchfreq1;
+ *matchfreq_mcvs2 = matchfreq2;
+ *nmatches_mcvs = nmatches;
+
/*
* Use the smaller of the two estimates. This can be justified in
* essentially the same terms as given below for the no-stats case: to
@@ -2653,6 +2670,7 @@ eqjoinsel_semi(Oid opfuncoid, Oid collation,
AttStatsSlot *sslot1, AttStatsSlot *sslot2,
Form_pg_statistic stats1, Form_pg_statistic stats2,
bool have_mcvs1, bool have_mcvs2,
+ double matchfreq1, int nmatches,
RelOptInfo *inner_rel)
{
double selec;
@@ -2705,11 +2723,9 @@ eqjoinsel_semi(Oid opfuncoid, Oid collation,
bool *hasmatch1;
bool *hasmatch2;
double nullfrac1 = stats1->stanullfrac;
- double matchfreq1,
- uncertainfrac,
+ double uncertainfrac,
uncertain;
int i,
- nmatches,
clamped_nvalues2;
/*
@@ -2721,6 +2737,13 @@ eqjoinsel_semi(Oid opfuncoid, Oid collation,
*/
clamped_nvalues2 = Min(sslot2->nvalues, nd2);
+ /*
+ * eqjoinsel_inner() normally already did the full MCV comparison,
+ * so we reuse its results unless RHS MCVs were clamped, in which
+ * case we must redo the loop for the reduced list.
+ */
+ if (clamped_nvalues2 != sslot2->nvalues)
+ {
fmgr_info(opfuncoid, &eqproc);
/*
@@ -2777,6 +2800,7 @@ eqjoinsel_semi(Oid opfuncoid, Oid collation,
CLAMP_PROBABILITY(matchfreq1);
pfree(hasmatch1);
pfree(hasmatch2);
+ }
/*
* Now we need to estimate the fraction of relation 1 that has at
--
2.34.1