v7-0003-Refactor-handling-of-nbtree-array-redundancies.patch

application/octet-stream

Filename: v7-0003-Refactor-handling-of-nbtree-array-redundancies.patch
Type: application/octet-stream
Part: 2
Message: Re: Adding skip scan (including MDAM style range skip scan) to nbtree

Patch

Same data as JSON: GET /api/v1/attachments/:id/patch the parsed metadata as JSON — format, series position, per-file stats; never the diff bytes. API reference →
Format: format-patch
Series: patch v7-0003
Subject: Refactor handling of nbtree array redundancies.
File+
src/backend/access/nbtree/nbtree.c 6 4
src/backend/access/nbtree/nbtutils.c 76 80
From 002753a55696ec3f51537b8ca2b6f49c5f01086e Mon Sep 17 00:00:00 2001
From: Peter Geoghegan <pg@bowt.ie>
Date: Thu, 8 Aug 2024 15:41:18 -0400
Subject: [PATCH v7 3/4] Refactor handling of nbtree array redundancies.

Rather than allocating memory for so.keyData[] at the start of each
btrescan, lazily allocate space later on, in _bt_preprocess_keys.  We
now allocate so.keyData[] after _bt_preprocess_array_keys is done
performing initial array related preprocessing.

An immediate benefit of this approach is that _bt_preprocess_array_keys
no longer needs to explicitly mark redundant array scan keys.  Other
code (_bt_preprocess_keys and its other subsidiary routines) no longer
have to interpret the scan key entries as redundant.  Redundant array
scan keys simply never appear in the _bt_preprocess_keys input array
(_bt_preprocess_array_keys removes them up front).

This refactoring is also preparation for an upcoming patch that will add
skip scan optimizations to nbtree.  _bt_preprocess_array_keys will be
taught to add new skip array scan keys to the _bt_preprocess_keys input
array (i.e. to arrayKeyData), so doing things this way avoids uselessly
palloc'ing so.keyData[], only to have to repalloc (to enlarge the array)
almost immediately afterwards.  This scheme allows _bt_preprocess_keys
to output a so.keyData[] scan key array that can be larger than the
original scan.keyData[] input array, due to the addition of skip array
scan keys within _bt_preprocess_array_keys.

Author: Peter Geoghegan <pg@bowt.ie>
Discussion: https://postgr.es/m/CAH2-Wz=9A_UtM7HzUThSkQ+BcrQsQZuNhWOvQWK06PRkEp=SKQ@mail.gmail.com
---
 src/backend/access/nbtree/nbtree.c   |  10 +-
 src/backend/access/nbtree/nbtutils.c | 156 +++++++++++++--------------
 2 files changed, 82 insertions(+), 84 deletions(-)

diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c
index f166c7549..4ecf883d3 100644
--- a/src/backend/access/nbtree/nbtree.c
+++ b/src/backend/access/nbtree/nbtree.c
@@ -326,11 +326,8 @@ btbeginscan(Relation rel, int nkeys, int norderbys)
 	so = (BTScanOpaque) palloc(sizeof(BTScanOpaqueData));
 	BTScanPosInvalidate(so->currPos);
 	BTScanPosInvalidate(so->markPos);
-	if (scan->numberOfKeys > 0)
-		so->keyData = (ScanKey) palloc(scan->numberOfKeys * sizeof(ScanKeyData));
-	else
-		so->keyData = NULL;
 
+	so->keyData = NULL;
 	so->needPrimScan = false;
 	so->scanBehind = false;
 	so->oppoDirCheck = false;
@@ -410,6 +407,11 @@ btrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys,
 		memcpy(scan->keyData, scankey, scan->numberOfKeys * sizeof(ScanKeyData));
 	so->numberOfKeys = 0;		/* until _bt_preprocess_keys sets it */
 	so->numArrayKeys = 0;		/* ditto */
+
+	/* Release private storage allocated in previous btrescan, if any */
+	if (so->keyData != NULL)
+		pfree(so->keyData);
+	so->keyData = NULL;
 }
 
 /*
diff --git a/src/backend/access/nbtree/nbtutils.c b/src/backend/access/nbtree/nbtutils.c
index 98688a3d6..d1423bd85 100644
--- a/src/backend/access/nbtree/nbtutils.c
+++ b/src/backend/access/nbtree/nbtutils.c
@@ -62,7 +62,7 @@ static bool _bt_compare_array_scankey_args(IndexScanDesc scan,
 										   ScanKey arraysk, ScanKey skey,
 										   FmgrInfo *orderproc, BTArrayKeyInfo *array,
 										   bool *qual_ok);
-static ScanKey _bt_preprocess_array_keys(IndexScanDesc scan);
+static ScanKey _bt_preprocess_array_keys(IndexScanDesc scan, int *numberOfKeys);
 static void _bt_preprocess_array_keys_final(IndexScanDesc scan, int *keyDataMap);
 static int	_bt_compare_array_elements(const void *a, const void *b, void *arg);
 static inline int32 _bt_compare_array_skey(FmgrInfo *orderproc,
@@ -251,9 +251,6 @@ _bt_freestack(BTStack stack)
  * It is convenient for _bt_preprocess_keys caller to have to deal with no
  * more than one equality strategy array scan key per index attribute.  We'll
  * always be able to set things up that way when complete opfamilies are used.
- * Eliminated array scan keys can be recognized as those that have had their
- * sk_strategy field set to InvalidStrategy here by us.  Caller should avoid
- * including these in the scan's so->keyData[] output array.
  *
  * We set the scan key references from the scan's BTArrayKeyInfo info array to
  * offsets into the temp modified input array returned to caller.  Scans that
@@ -261,18 +258,25 @@ _bt_freestack(BTStack stack)
  * preprocessing steps are complete.  This will convert the scan key offset
  * references into references to the scan's so->keyData[] output scan keys.
  *
+ * Caller must pass *numberOfKeys to give us a way to change the number of
+ * input scan keys (our output is caller's input).  The returned array can be
+ * smaller than scan->keyData[] when we eliminated a redundant array scan key
+ * (redundant with some other array scan key, for the same attribute).  Caller
+ * uses this to allocate so->keyData[] for the current btrescan.
+ *
  * Note: the reason we need to return a temp scan key array, rather than just
  * scribbling on scan->keyData, is that callers are permitted to call btrescan
  * without supplying a new set of scankey data.
  */
 static ScanKey
-_bt_preprocess_array_keys(IndexScanDesc scan)
+_bt_preprocess_array_keys(IndexScanDesc scan, int *numberOfKeys)
 {
 	BTScanOpaque so = (BTScanOpaque) scan->opaque;
 	Relation	rel = scan->indexRelation;
-	int			numberOfKeys = scan->numberOfKeys;
+	int			numArrayKeyData = scan->numberOfKeys;
 	int16	   *indoption = rel->rd_indoption;
-	int			numArrayKeys;
+	int			numArrayKeys,
+				output_ikey = 0;
 	int			origarrayatt = InvalidAttrNumber,
 				origarraykey = -1;
 	Oid			origelemtype = InvalidOid;
@@ -280,11 +284,11 @@ _bt_preprocess_array_keys(IndexScanDesc scan)
 	MemoryContext oldContext;
 	ScanKey		arrayKeyData;	/* modified copy of scan->keyData */
 
-	Assert(numberOfKeys);
+	Assert(scan->numberOfKeys);
 
 	/* Quick check to see if there are any array keys */
 	numArrayKeys = 0;
-	for (int i = 0; i < numberOfKeys; i++)
+	for (int i = 0; i < scan->numberOfKeys; i++)
 	{
 		cur = &scan->keyData[i];
 		if (cur->sk_flags & SK_SEARCHARRAY)
@@ -317,19 +321,18 @@ _bt_preprocess_array_keys(IndexScanDesc scan)
 
 	oldContext = MemoryContextSwitchTo(so->arrayContext);
 
-	/* Create modifiable copy of scan->keyData in the workspace context */
-	arrayKeyData = (ScanKey) palloc(numberOfKeys * sizeof(ScanKeyData));
-	memcpy(arrayKeyData, scan->keyData, numberOfKeys * sizeof(ScanKeyData));
+	/* Create output scan keys in the workspace context */
+	arrayKeyData = (ScanKey) palloc(numArrayKeyData * sizeof(ScanKeyData));
 
 	/* Allocate space for per-array data in the workspace context */
 	so->arrayKeys = (BTArrayKeyInfo *) palloc(numArrayKeys * sizeof(BTArrayKeyInfo));
 
 	/* Allocate space for ORDER procs used to help _bt_checkkeys */
-	so->orderProcs = (FmgrInfo *) palloc(numberOfKeys * sizeof(FmgrInfo));
+	so->orderProcs = (FmgrInfo *) palloc(numArrayKeyData * sizeof(FmgrInfo));
 
 	/* Now process each array key */
 	numArrayKeys = 0;
-	for (int i = 0; i < numberOfKeys; i++)
+	for (int input_ikey = 0; input_ikey < scan->numberOfKeys; input_ikey++)
 	{
 		FmgrInfo	sortproc;
 		FmgrInfo   *sortprocp = &sortproc;
@@ -345,14 +348,20 @@ _bt_preprocess_array_keys(IndexScanDesc scan)
 		int			num_nonnulls;
 		int			j;
 
-		cur = &arrayKeyData[i];
+		/*
+		 * Copy input scan key into temp arrayKeyData scan key array
+		 */
+		cur = &arrayKeyData[output_ikey];
+		*cur = scan->keyData[input_ikey];
+
 		if (!(cur->sk_flags & SK_SEARCHARRAY))
+		{
+			output_ikey++;		/* keep this non-array scan key */
 			continue;
+		}
 
 		/*
-		 * First, deconstruct the array into elements.  Anything allocated
-		 * here (including a possibly detoasted array value) is in the
-		 * workspace context.
+		 * Deconstruct the array into elements
 		 */
 		arrayval = DatumGetArrayTypeP(cur->sk_argument);
 		/* We could cache this data, but not clear it's worth it */
@@ -406,6 +415,7 @@ _bt_preprocess_array_keys(IndexScanDesc scan)
 					_bt_find_extreme_element(scan, cur, elemtype,
 											 BTGreaterStrategyNumber,
 											 elem_values, num_nonnulls);
+				output_ikey++;	/* keep this transformed scan key */
 				continue;
 			case BTEqualStrategyNumber:
 				/* proceed with rest of loop */
@@ -416,6 +426,7 @@ _bt_preprocess_array_keys(IndexScanDesc scan)
 					_bt_find_extreme_element(scan, cur, elemtype,
 											 BTLessStrategyNumber,
 											 elem_values, num_nonnulls);
+				output_ikey++;	/* keep this transformed scan key */
 				continue;
 			default:
 				elog(ERROR, "unrecognized StrategyNumber: %d",
@@ -432,7 +443,7 @@ _bt_preprocess_array_keys(IndexScanDesc scan)
 		 * sortproc just points to the same proc used during binary searches.
 		 */
 		_bt_setup_array_cmp(scan, cur, elemtype,
-							&so->orderProcs[i], &sortprocp);
+							&so->orderProcs[output_ikey], &sortprocp);
 
 		/*
 		 * Sort the non-null elements and eliminate any duplicates.  We must
@@ -476,11 +487,7 @@ _bt_preprocess_array_keys(IndexScanDesc scan)
 					break;
 				}
 
-				/*
-				 * Indicate to _bt_preprocess_keys caller that it must ignore
-				 * this scan key
-				 */
-				cur->sk_strategy = InvalidStrategy;
+				/* Throw away this scan key/array */
 				continue;
 			}
 
@@ -511,12 +518,15 @@ _bt_preprocess_array_keys(IndexScanDesc scan)
 		 * Note: _bt_preprocess_array_keys_final will fix-up each array's
 		 * scan_key field later on, after so->keyData[] has been finalized.
 		 */
-		so->arrayKeys[numArrayKeys].scan_key = i;
+		so->arrayKeys[numArrayKeys].scan_key = output_ikey;
 		so->arrayKeys[numArrayKeys].num_elems = num_elems;
 		so->arrayKeys[numArrayKeys].elem_values = elem_values;
 		numArrayKeys++;
+		output_ikey++;			/* keep this scan key/array */
 	}
 
+	/* Set final number of arrayKeyData[] keys, array keys */
+	*numberOfKeys = output_ikey;
 	so->numArrayKeys = numArrayKeys;
 
 	MemoryContextSwitchTo(oldContext);
@@ -2429,10 +2439,12 @@ end_toplevel_scan:
 /*
  *	_bt_preprocess_keys() -- Preprocess scan keys
  *
+ * The first call here (per btrescan) allocates so->keyData[].
  * The given search-type keys (taken from scan->keyData[])
  * are copied to so->keyData[] with possible transformation.
  * scan->numberOfKeys is the number of input keys, so->numberOfKeys gets
- * the number of output keys (possibly less, never greater).
+ * the number of output keys.  Calling here a second or subsequent time
+ * (during the same btrescan) is a no-op.
  *
  * The output keys are marked with additional sk_flags bits beyond the
  * system-standard bits supplied by the caller.  The DESC and NULLS_FIRST
@@ -2519,9 +2531,7 @@ _bt_preprocess_keys(IndexScanDesc scan)
 	int16	   *indoption = scan->indexRelation->rd_indoption;
 	int			new_numberOfKeys;
 	int			numberOfEqualCols;
-	ScanKey		inkeys;
-	ScanKey		outkeys;
-	ScanKey		cur;
+	ScanKey		inputsk;
 	BTScanKeyPreproc xform[BTMaxStrategyNumber];
 	bool		test_result;
 	int			i,
@@ -2553,7 +2563,7 @@ _bt_preprocess_keys(IndexScanDesc scan)
 		return;					/* done if qual-less scan */
 
 	/* If any keys are SK_SEARCHARRAY type, set up array-key info */
-	arrayKeyData = _bt_preprocess_array_keys(scan);
+	arrayKeyData = _bt_preprocess_array_keys(scan, &numberOfKeys);
 	if (!so->qual_ok)
 	{
 		/* unmatchable array, so give up */
@@ -2567,32 +2577,36 @@ _bt_preprocess_keys(IndexScanDesc scan)
 	 */
 	if (arrayKeyData)
 	{
-		inkeys = arrayKeyData;
+		inputsk = arrayKeyData;
 
 		/* Also maintain keyDataMap for remapping so->orderProc[] later */
 		keyDataMap = MemoryContextAlloc(so->arrayContext,
 										numberOfKeys * sizeof(int));
 	}
 	else
-		inkeys = scan->keyData;
+		inputsk = scan->keyData;
+
+	/*
+	 * Now that we have an estimate of the number of output scan keys,
+	 * allocate space for them
+	 */
+	so->keyData = palloc(sizeof(ScanKeyData) * numberOfKeys);
 
-	outkeys = so->keyData;
-	cur = &inkeys[0];
 	/* we check that input keys are correctly ordered */
-	if (cur->sk_attno < 1)
+	if (inputsk[0].sk_attno < 1)
 		elog(ERROR, "btree index keys must be ordered by attribute");
 
 	/* We can short-circuit most of the work if there's just one key */
 	if (numberOfKeys == 1)
 	{
 		/* Apply indoption to scankey (might change sk_strategy!) */
-		if (!_bt_fix_scankey_strategy(cur, indoption))
+		if (!_bt_fix_scankey_strategy(inputsk, indoption))
 			so->qual_ok = false;
-		memcpy(outkeys, cur, sizeof(ScanKeyData));
+		memcpy(&so->keyData[0], &inputsk[0], sizeof(ScanKeyData));
 		so->numberOfKeys = 1;
 		/* We can mark the qual as required if it's for first index col */
-		if (cur->sk_attno == 1)
-			_bt_mark_scankey_required(outkeys);
+		if (inputsk[0].sk_attno == 1)
+			_bt_mark_scankey_required(&so->keyData[0]);
 		if (arrayKeyData)
 		{
 			/*
@@ -2600,8 +2614,8 @@ _bt_preprocess_keys(IndexScanDesc scan)
 			 * (we'll miss out on the single value array transformation, but
 			 * that's not nearly as important when there's only one scan key)
 			 */
-			Assert(cur->sk_flags & SK_SEARCHARRAY);
-			Assert(cur->sk_strategy != BTEqualStrategyNumber ||
+			Assert(so->keyData[0].sk_flags & SK_SEARCHARRAY);
+			Assert(so->keyData[0].sk_strategy != BTEqualStrategyNumber ||
 				   (so->arrayKeys[0].scan_key == 0 &&
 					OidIsValid(so->orderProcs[0].fn_oid)));
 		}
@@ -2629,12 +2643,12 @@ _bt_preprocess_keys(IndexScanDesc scan)
 	 * handle after-last-key processing.  Actual exit from the loop is at the
 	 * "break" statement below.
 	 */
-	for (i = 0;; cur++, i++)
+	for (i = 0;; inputsk++, i++)
 	{
 		if (i < numberOfKeys)
 		{
 			/* Apply indoption to scankey (might change sk_strategy!) */
-			if (!_bt_fix_scankey_strategy(cur, indoption))
+			if (!_bt_fix_scankey_strategy(inputsk, indoption))
 			{
 				/* NULL can't be matched, so give up */
 				so->qual_ok = false;
@@ -2646,12 +2660,12 @@ _bt_preprocess_keys(IndexScanDesc scan)
 		 * If we are at the end of the keys for a particular attr, finish up
 		 * processing and emit the cleaned-up keys.
 		 */
-		if (i == numberOfKeys || cur->sk_attno != attno)
+		if (i == numberOfKeys || inputsk->sk_attno != attno)
 		{
 			int			priorNumberOfEqualCols = numberOfEqualCols;
 
 			/* check input keys are correctly ordered */
-			if (i < numberOfKeys && cur->sk_attno < attno)
+			if (i < numberOfKeys && inputsk->sk_attno < attno)
 				elog(ERROR, "btree index keys must be ordered by attribute");
 
 			/*
@@ -2755,7 +2769,7 @@ _bt_preprocess_keys(IndexScanDesc scan)
 			}
 
 			/*
-			 * Emit the cleaned-up keys into the outkeys[] array, and then
+			 * Emit the cleaned-up keys into the so->keyData[] array, and then
 			 * mark them if they are required.  They are required (possibly
 			 * only in one direction) if all attrs before this one had "=".
 			 */
@@ -2763,7 +2777,7 @@ _bt_preprocess_keys(IndexScanDesc scan)
 			{
 				if (xform[j].skey)
 				{
-					ScanKey		outkey = &outkeys[new_numberOfKeys++];
+					ScanKey		outkey = &so->keyData[new_numberOfKeys++];
 
 					memcpy(outkey, xform[j].skey, sizeof(ScanKeyData));
 					if (arrayKeyData)
@@ -2780,19 +2794,19 @@ _bt_preprocess_keys(IndexScanDesc scan)
 				break;
 
 			/* Re-initialize for new attno */
-			attno = cur->sk_attno;
+			attno = inputsk->sk_attno;
 			memset(xform, 0, sizeof(xform));
 		}
 
 		/* check strategy this key's operator corresponds to */
-		j = cur->sk_strategy - 1;
+		j = inputsk->sk_strategy - 1;
 
 		/* if row comparison, push it directly to the output array */
-		if (cur->sk_flags & SK_ROW_HEADER)
+		if (inputsk->sk_flags & SK_ROW_HEADER)
 		{
-			ScanKey		outkey = &outkeys[new_numberOfKeys++];
+			ScanKey		outkey = &so->keyData[new_numberOfKeys++];
 
-			memcpy(outkey, cur, sizeof(ScanKeyData));
+			memcpy(outkey, inputsk, sizeof(ScanKeyData));
 			if (arrayKeyData)
 				keyDataMap[new_numberOfKeys - 1] = i;
 			if (numberOfEqualCols == attno - 1)
@@ -2806,21 +2820,10 @@ _bt_preprocess_keys(IndexScanDesc scan)
 			continue;
 		}
 
-		/*
-		 * Does this input scan key require further processing as an array?
-		 */
-		if (cur->sk_strategy == InvalidStrategy)
+		if (inputsk->sk_strategy == BTEqualStrategyNumber &&
+			(inputsk->sk_flags & SK_SEARCHARRAY))
 		{
-			/* _bt_preprocess_array_keys marked this array key redundant */
-			Assert(arrayKeyData);
-			Assert(cur->sk_flags & SK_SEARCHARRAY);
-			continue;
-		}
-
-		if (cur->sk_strategy == BTEqualStrategyNumber &&
-			(cur->sk_flags & SK_SEARCHARRAY))
-		{
-			/* _bt_preprocess_array_keys kept this array key */
+			/* maintain arrayidx for xform[] array */
 			Assert(arrayKeyData);
 			arrayidx++;
 		}
@@ -2832,7 +2835,7 @@ _bt_preprocess_keys(IndexScanDesc scan)
 		if (xform[j].skey == NULL)
 		{
 			/* nope, so this scan key wins by default (at least for now) */
-			xform[j].skey = cur;
+			xform[j].skey = inputsk;
 			xform[j].ikey = i;
 			xform[j].arrayidx = arrayidx;
 		}
@@ -2850,7 +2853,7 @@ _bt_preprocess_keys(IndexScanDesc scan)
 				/*
 				 * Have to set up array keys
 				 */
-				if ((cur->sk_flags & SK_SEARCHARRAY))
+				if (inputsk->sk_flags & SK_SEARCHARRAY)
 				{
 					array = &so->arrayKeys[arrayidx - 1];
 					orderproc = so->orderProcs + i;
@@ -2878,7 +2881,7 @@ _bt_preprocess_keys(IndexScanDesc scan)
 				 */
 			}
 
-			if (_bt_compare_scankey_args(scan, cur, cur, xform[j].skey,
+			if (_bt_compare_scankey_args(scan, inputsk, inputsk, xform[j].skey,
 										 array, orderproc, &test_result))
 			{
 				/* Have all we need to determine redundancy */
@@ -2892,7 +2895,7 @@ _bt_preprocess_keys(IndexScanDesc scan)
 					if (j != (BTEqualStrategyNumber - 1) ||
 						!(xform[j].skey->sk_flags & SK_SEARCHARRAY))
 					{
-						xform[j].skey = cur;
+						xform[j].skey = inputsk;
 						xform[j].ikey = i;
 						xform[j].arrayidx = arrayidx;
 					}
@@ -2905,7 +2908,7 @@ _bt_preprocess_keys(IndexScanDesc scan)
 						 * scan key.  _bt_compare_scankey_args expects us to
 						 * always keep arrays (and discard non-arrays).
 						 */
-						Assert(!(cur->sk_flags & SK_SEARCHARRAY));
+						Assert(!(inputsk->sk_flags & SK_SEARCHARRAY));
 					}
 				}
 				else if (j == (BTEqualStrategyNumber - 1))
@@ -2928,14 +2931,14 @@ _bt_preprocess_keys(IndexScanDesc scan)
 				 * even with incomplete opfamilies.  _bt_advance_array_keys
 				 * depends on this.
 				 */
-				ScanKey		outkey = &outkeys[new_numberOfKeys++];
+				ScanKey		outkey = &so->keyData[new_numberOfKeys++];
 
 				memcpy(outkey, xform[j].skey, sizeof(ScanKeyData));
 				if (arrayKeyData)
 					keyDataMap[new_numberOfKeys - 1] = xform[j].ikey;
 				if (numberOfEqualCols == attno - 1)
 					_bt_mark_scankey_required(outkey);
-				xform[j].skey = cur;
+				xform[j].skey = inputsk;
 				xform[j].ikey = i;
 				xform[j].arrayidx = arrayidx;
 			}
@@ -3349,13 +3352,6 @@ _bt_fix_scankey_strategy(ScanKey skey, int16 *indoption)
 		return true;
 	}
 
-	if (skey->sk_strategy == InvalidStrategy)
-	{
-		/* Already-eliminated array scan key; don't need to fix anything */
-		Assert(skey->sk_flags & SK_SEARCHARRAY);
-		return true;
-	}
-
 	/* Adjust strategy for DESC, if we didn't already */
 	if ((addflags & SK_BT_DESC) && !(skey->sk_flags & SK_BT_DESC))
 		skey->sk_strategy = BTCommuteStrategyNumber(skey->sk_strategy);
-- 
2.45.2