0002-Optionally-build-per-key-hashjoin-bloom-filters-for-.patch.text

text/plain
Filename: 0002-Optionally-build-per-key-hashjoin-bloom-filters-for-.patch.text
Type: text/plain
Part: 1
Message: Re: hashjoins vs. Bloom filters (yet again)
From 2bac3bb8a4917f77deb19998752493f75b4f1c70 Mon Sep 17 00:00:00 2001
From: Andrew Dunstan <andrew@dunslane.net>
Date: Sun, 31 May 2026 07:13:58 -0400
Subject: [PATCH addon 2/3] Optionally build per-key hashjoin bloom filters for
 opted-in recipients

Add an opt-in path that builds one bloom filter per join key, in
addition to the existing combined-hash filter, when the pushdown
recipient is a CustomScan that advertised CUSTOMPATH_SUPPORT_BLOOM_FILTERS
and the join has more than one key.

The combined filter, keyed on the hash of all keys together, stays the
default and remains the more selective one for a per-row probe: per-key
filters only test whether each column's value appears somewhere in the
build side, so on a multi-column join they are strictly weaker (they
cannot reject a row whose columns each match but not as a tuple).  What
they enable is testing a single key column on its own -- a column store
can check one column against its per-column dictionary or zone map and
skip whole row groups before decompression, which the combined filter
cannot support.

The build reuses the per-key inner hash functions (the combined hash
value cannot be decomposed, so the Hash node builds one single-key hash
ExprState per key); the extra CPU and memory are paid only by a consumer
that opted in.  A recipient correlates HashState.perkey_filters[i] with
BloomFilter.filter_exprs[i] by position.  Heap and single-key joins are
unaffected.
---
 src/backend/executor/nodeHash.c         | 35 +++++++++++++++++++++++++
 src/backend/executor/nodeHashjoin.c     | 25 ++++++++++++++++++
 src/backend/optimizer/plan/createplan.c | 12 +++++++++
 src/include/nodes/execnodes.h           | 14 ++++++++++
 src/include/nodes/plannodes.h           | 11 ++++++++
 5 files changed, 97 insertions(+)

diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c
index 37224324bce..2b045eae186 100644
--- a/src/backend/executor/nodeHash.c
+++ b/src/backend/executor/nodeHash.c
@@ -197,6 +197,25 @@ MultiExecPrivateHash(HashState *node)
 								  (unsigned char *) &hashvalue,
 								  sizeof(hashvalue));
 
+			/*
+			 * Likewise for the optional per-key filters, using the per-key
+			 * (single-key) hash ExprStates.  Same econtext as the combined
+			 * hash above (ecxt_outertuple is the just-fetched inner tuple).
+			 */
+			for (int k = 0; k < node->perkey_nfilters; k++)
+			{
+				bool		keyisnull;
+				uint32		keyhash;
+
+				keyhash = DatumGetUInt32(ExecEvalExprSwitchContext(node->perkey_hash[k],
+																   econtext,
+																   &keyisnull));
+				if (!keyisnull)
+					bloom_add_element(node->perkey_filters[k],
+									  (unsigned char *) &keyhash,
+									  sizeof(keyhash));
+			}
+
 			bucketNumber = ExecHashGetSkewBucket(hashtable, hashvalue);
 			if (bucketNumber != INVALID_SKEW_BUCKET_NO)
 			{
@@ -722,6 +741,22 @@ ExecHashTableCreate(HashState *state)
 		oldctx = MemoryContextSwitchTo(hashtable->hashCxt);
 		state->bloom_filter = bloom_create((int64) Max(rows, 1.0),
 										   bloom_work_mem, 0);
+
+		/*
+		 * If a recipient opted in, also build one filter per join key (in
+		 * addition to the combined one above).  These let a recipient test an
+		 * individual key column on its own; they are less selective than the
+		 * combined filter, so they are built only on demand.
+		 */
+		if (state->want_perkey_bloom)
+		{
+			state->perkey_filters = palloc_array(struct bloom_filter *,
+												 state->perkey_nfilters);
+			for (int i = 0; i < state->perkey_nfilters; i++)
+				state->perkey_filters[i] = bloom_create((int64) Max(rows, 1.0),
+														bloom_work_mem, 0);
+		}
+
 		MemoryContextSwitchTo(oldctx);
 	}
 
diff --git a/src/backend/executor/nodeHashjoin.c b/src/backend/executor/nodeHashjoin.c
index 8fa7af4cfef..1eaf81285f8 100644
--- a/src/backend/executor/nodeHashjoin.c
+++ b/src/backend/executor/nodeHashjoin.c
@@ -908,6 +908,7 @@ ExecInitHashJoin(HashJoin *node, EState *estate, int eflags)
 	hashState = castNode(HashState, innerPlanState(hjstate));
 	hashState->want_bloom_filter = (node->bloom_consumer_count > 0);
 	hashState->bloom_filter_id = node->bloom_filter_id;
+	hashState->want_perkey_bloom = node->bloom_perkey;
 
 	/*
 	 * Initialize result slot, type and projection.
@@ -1031,6 +1032,28 @@ ExecInitHashJoin(HashJoin *node, EState *estate, int eflags)
 								&hashstate->ps,
 								0);
 
+		/*
+		 * If a recipient opted in to per-key bloom filters, build one inner
+		 * (single-key) hash ExprState per join key, used by the Hash node to
+		 * populate the per-key filters.  The combined hash above cannot be
+		 * decomposed, so this is the extra cost a per-key consumer pays.
+		 */
+		if (hashstate->want_perkey_bloom)
+		{
+			hashstate->perkey_nfilters = nkeys;
+			hashstate->perkey_hash = palloc_array(ExprState *, nkeys);
+			for (int i = 0; i < nkeys; i++)
+				hashstate->perkey_hash[i] =
+					ExecBuildHash32Expr(hashstate->ps.ps_ResultTupleDesc,
+										hashstate->ps.resultops,
+										&inner_hashfuncid[i],
+										list_make1_oid(list_nth_oid(node->hashcollations, i)),
+										list_make1(list_nth(hash->hashkeys, i)),
+										&hash_strict[i],
+										&hashstate->ps,
+										0);
+		}
+
 		/* Remember whether we need to save tuples with null join keys */
 		hjstate->hj_KeepNullTuples = HJ_FILL_OUTER(hjstate);
 		hashstate->keep_null_tuples = HJ_FILL_INNER(hjstate);
@@ -1118,6 +1141,7 @@ ExecEndHashJoin(HashJoinState *node)
 		ExecHashTableDestroy(node->hj_HashTable);
 		node->hj_HashTable = NULL;
 		hashNode->bloom_filter = NULL;
+		hashNode->perkey_filters = NULL;
 	}
 
 	/*
@@ -1775,6 +1799,7 @@ ExecReScanHashJoin(HashJoinState *node)
 			 * freed by the ExecHashTableDestroy call.
 			 */
 			hashNode->bloom_filter = NULL;
+			hashNode->perkey_filters = NULL;
 
 			/*
 			 * if chgParam of subnode is not null then plan will be re-scanned
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index 304ce0e3c0d..5b01b3e45cc 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -4992,6 +4992,18 @@ try_push_bloom_filter(PlannerInfo *root, HashJoin *hj, Plan *outer_plan)
 
 	recipient->bloom_filters = lappend(recipient->bloom_filters, bf);
 
+	/*
+	 * If the recipient is a CustomScan that opted in, also build a separate
+	 * filter per join key.  Only such a recipient can make use of them (to
+	 * test a single column against a dictionary or zone map); the combined
+	 * filter is always built and is the more selective one for the per-row
+	 * probe.  There is nothing to gain for a single-key join, where the two
+	 * coincide.
+	 */
+	if (list_length(hashkeys) > 1 && IsA(recipient, CustomScan) &&
+		(((CustomScan *) recipient)->flags & CUSTOMPATH_SUPPORT_BLOOM_FILTERS))
+		hj->bloom_perkey = true;
+
 	/*
 	 * XXX We've manged to push the filter to the scan node, but maybe
 	 * we should wait with updating bloom_consumer_count when it actually
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 04333f1a4d0..ee98bcb3adf 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -2783,6 +2783,20 @@ typedef struct HashState
 	 */
 	struct bloom_filter *bloom_filter;
 
+	/*
+	 * Optional per-key bloom filters, built in addition to the combined
+	 * bloom_filter above when a recipient opted in (HashJoin.bloom_perkey).
+	 * perkey_filters has perkey_nfilters entries, one per join key, in hashkey
+	 * order; a recipient correlates them with BloomFilter.filter_exprs by
+	 * position.  perkey_hash holds the matching per-key (single-key) hash
+	 * ExprStates used to populate them during the build.  All live in hashCxt
+	 * and follow the same lifecycle as bloom_filter.
+	 */
+	bool		want_perkey_bloom;
+	int			perkey_nfilters;
+	struct bloom_filter **perkey_filters;
+	ExprState **perkey_hash;
+
 	/*
 	 * Counters with total per-filter instrumentation. Separate from the
 	 * per-recipient counters in BloomFilterState. Redundant, but will be
diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h
index 4e35d77cc49..21ec7ffae1a 100644
--- a/src/include/nodes/plannodes.h
+++ b/src/include/nodes/plannodes.h
@@ -1124,6 +1124,17 @@ typedef struct HashJoin
 	 * Zero when this HashJoin has no consumers.
 	 */
 	int			bloom_filter_id;
+
+	/*
+	 * Whether to also build one bloom filter per join key (in addition to the
+	 * combined-hash filter), so that a recipient can test an individual key
+	 * column on its own -- e.g. a column store probing a per-column dictionary
+	 * or zone map.  Set at plan time only when the recipient is a CustomScan
+	 * that advertised CUSTOMPATH_SUPPORT_BLOOM_FILTERS.  The combined filter is
+	 * always built and remains the more selective one; per-key filters are an
+	 * opt-in extra that nobody else pays for.
+	 */
+	bool		bloom_perkey;
 } HashJoin;
 
 /* ----------------
-- 
2.43.0