0002-Optionally-build-per-key-hashjoin-bloom-filters-for-.patch.text
text/plain
Filename: 0002-Optionally-build-per-key-hashjoin-bloom-filters-for-.patch.text
Type: text/plain
Part: 1
From 2bac3bb8a4917f77deb19998752493f75b4f1c70 Mon Sep 17 00:00:00 2001
From: Andrew Dunstan <andrew@dunslane.net>
Date: Sun, 31 May 2026 07:13:58 -0400
Subject: [PATCH addon 2/3] Optionally build per-key hashjoin bloom filters for
opted-in recipients
Add an opt-in path that builds one bloom filter per join key, in
addition to the existing combined-hash filter, when the pushdown
recipient is a CustomScan that advertised CUSTOMPATH_SUPPORT_BLOOM_FILTERS
and the join has more than one key.
The combined filter, keyed on the hash of all keys together, stays the
default and remains the more selective one for a per-row probe: per-key
filters only test whether each column's value appears somewhere in the
build side, so on a multi-column join they are strictly weaker (they
cannot reject a row whose columns each match but not as a tuple). What
they enable is testing a single key column on its own -- a column store
can check one column against its per-column dictionary or zone map and
skip whole row groups before decompression, which the combined filter
cannot support.
The build reuses the per-key inner hash functions (the combined hash
value cannot be decomposed, so the Hash node builds one single-key hash
ExprState per key); the extra CPU and memory are paid only by a consumer
that opted in. A recipient correlates HashState.perkey_filters[i] with
BloomFilter.filter_exprs[i] by position. Heap and single-key joins are
unaffected.
---
src/backend/executor/nodeHash.c | 35 +++++++++++++++++++++++++
src/backend/executor/nodeHashjoin.c | 25 ++++++++++++++++++
src/backend/optimizer/plan/createplan.c | 12 +++++++++
src/include/nodes/execnodes.h | 14 ++++++++++
src/include/nodes/plannodes.h | 11 ++++++++
5 files changed, 97 insertions(+)
diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c
index 37224324bce..2b045eae186 100644
--- a/src/backend/executor/nodeHash.c
+++ b/src/backend/executor/nodeHash.c
@@ -197,6 +197,25 @@ MultiExecPrivateHash(HashState *node)
(unsigned char *) &hashvalue,
sizeof(hashvalue));
+ /*
+ * Likewise for the optional per-key filters, using the per-key
+ * (single-key) hash ExprStates. Same econtext as the combined
+ * hash above (ecxt_outertuple is the just-fetched inner tuple).
+ */
+ for (int k = 0; k < node->perkey_nfilters; k++)
+ {
+ bool keyisnull;
+ uint32 keyhash;
+
+ keyhash = DatumGetUInt32(ExecEvalExprSwitchContext(node->perkey_hash[k],
+ econtext,
+ &keyisnull));
+ if (!keyisnull)
+ bloom_add_element(node->perkey_filters[k],
+ (unsigned char *) &keyhash,
+ sizeof(keyhash));
+ }
+
bucketNumber = ExecHashGetSkewBucket(hashtable, hashvalue);
if (bucketNumber != INVALID_SKEW_BUCKET_NO)
{
@@ -722,6 +741,22 @@ ExecHashTableCreate(HashState *state)
oldctx = MemoryContextSwitchTo(hashtable->hashCxt);
state->bloom_filter = bloom_create((int64) Max(rows, 1.0),
bloom_work_mem, 0);
+
+ /*
+ * If a recipient opted in, also build one filter per join key (in
+ * addition to the combined one above). These let a recipient test an
+ * individual key column on its own; they are less selective than the
+ * combined filter, so they are built only on demand.
+ */
+ if (state->want_perkey_bloom)
+ {
+ state->perkey_filters = palloc_array(struct bloom_filter *,
+ state->perkey_nfilters);
+ for (int i = 0; i < state->perkey_nfilters; i++)
+ state->perkey_filters[i] = bloom_create((int64) Max(rows, 1.0),
+ bloom_work_mem, 0);
+ }
+
MemoryContextSwitchTo(oldctx);
}
diff --git a/src/backend/executor/nodeHashjoin.c b/src/backend/executor/nodeHashjoin.c
index 8fa7af4cfef..1eaf81285f8 100644
--- a/src/backend/executor/nodeHashjoin.c
+++ b/src/backend/executor/nodeHashjoin.c
@@ -908,6 +908,7 @@ ExecInitHashJoin(HashJoin *node, EState *estate, int eflags)
hashState = castNode(HashState, innerPlanState(hjstate));
hashState->want_bloom_filter = (node->bloom_consumer_count > 0);
hashState->bloom_filter_id = node->bloom_filter_id;
+ hashState->want_perkey_bloom = node->bloom_perkey;
/*
* Initialize result slot, type and projection.
@@ -1031,6 +1032,28 @@ ExecInitHashJoin(HashJoin *node, EState *estate, int eflags)
&hashstate->ps,
0);
+ /*
+ * If a recipient opted in to per-key bloom filters, build one inner
+ * (single-key) hash ExprState per join key, used by the Hash node to
+ * populate the per-key filters. The combined hash above cannot be
+ * decomposed, so this is the extra cost a per-key consumer pays.
+ */
+ if (hashstate->want_perkey_bloom)
+ {
+ hashstate->perkey_nfilters = nkeys;
+ hashstate->perkey_hash = palloc_array(ExprState *, nkeys);
+ for (int i = 0; i < nkeys; i++)
+ hashstate->perkey_hash[i] =
+ ExecBuildHash32Expr(hashstate->ps.ps_ResultTupleDesc,
+ hashstate->ps.resultops,
+ &inner_hashfuncid[i],
+ list_make1_oid(list_nth_oid(node->hashcollations, i)),
+ list_make1(list_nth(hash->hashkeys, i)),
+ &hash_strict[i],
+ &hashstate->ps,
+ 0);
+ }
+
/* Remember whether we need to save tuples with null join keys */
hjstate->hj_KeepNullTuples = HJ_FILL_OUTER(hjstate);
hashstate->keep_null_tuples = HJ_FILL_INNER(hjstate);
@@ -1118,6 +1141,7 @@ ExecEndHashJoin(HashJoinState *node)
ExecHashTableDestroy(node->hj_HashTable);
node->hj_HashTable = NULL;
hashNode->bloom_filter = NULL;
+ hashNode->perkey_filters = NULL;
}
/*
@@ -1775,6 +1799,7 @@ ExecReScanHashJoin(HashJoinState *node)
* freed by the ExecHashTableDestroy call.
*/
hashNode->bloom_filter = NULL;
+ hashNode->perkey_filters = NULL;
/*
* if chgParam of subnode is not null then plan will be re-scanned
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index 304ce0e3c0d..5b01b3e45cc 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -4992,6 +4992,18 @@ try_push_bloom_filter(PlannerInfo *root, HashJoin *hj, Plan *outer_plan)
recipient->bloom_filters = lappend(recipient->bloom_filters, bf);
+ /*
+ * If the recipient is a CustomScan that opted in, also build a separate
+ * filter per join key. Only such a recipient can make use of them (to
+ * test a single column against a dictionary or zone map); the combined
+ * filter is always built and is the more selective one for the per-row
+ * probe. There is nothing to gain for a single-key join, where the two
+ * coincide.
+ */
+ if (list_length(hashkeys) > 1 && IsA(recipient, CustomScan) &&
+ (((CustomScan *) recipient)->flags & CUSTOMPATH_SUPPORT_BLOOM_FILTERS))
+ hj->bloom_perkey = true;
+
/*
* XXX We've manged to push the filter to the scan node, but maybe
* we should wait with updating bloom_consumer_count when it actually
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 04333f1a4d0..ee98bcb3adf 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -2783,6 +2783,20 @@ typedef struct HashState
*/
struct bloom_filter *bloom_filter;
+ /*
+ * Optional per-key bloom filters, built in addition to the combined
+ * bloom_filter above when a recipient opted in (HashJoin.bloom_perkey).
+ * perkey_filters has perkey_nfilters entries, one per join key, in hashkey
+ * order; a recipient correlates them with BloomFilter.filter_exprs by
+ * position. perkey_hash holds the matching per-key (single-key) hash
+ * ExprStates used to populate them during the build. All live in hashCxt
+ * and follow the same lifecycle as bloom_filter.
+ */
+ bool want_perkey_bloom;
+ int perkey_nfilters;
+ struct bloom_filter **perkey_filters;
+ ExprState **perkey_hash;
+
/*
* Counters with total per-filter instrumentation. Separate from the
* per-recipient counters in BloomFilterState. Redundant, but will be
diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h
index 4e35d77cc49..21ec7ffae1a 100644
--- a/src/include/nodes/plannodes.h
+++ b/src/include/nodes/plannodes.h
@@ -1124,6 +1124,17 @@ typedef struct HashJoin
* Zero when this HashJoin has no consumers.
*/
int bloom_filter_id;
+
+ /*
+ * Whether to also build one bloom filter per join key (in addition to the
+ * combined-hash filter), so that a recipient can test an individual key
+ * column on its own -- e.g. a column store probing a per-column dictionary
+ * or zone map. Set at plan time only when the recipient is a CustomScan
+ * that advertised CUSTOMPATH_SUPPORT_BLOOM_FILTERS. The combined filter is
+ * always built and remains the more selective one; per-key filters are an
+ * opt-in extra that nobody else pays for.
+ */
+ bool bloom_perkey;
} HashJoin;
/* ----------------
--
2.43.0