v7-0003-Refactor-handling-of-nbtree-array-redundancies.patch
application/octet-stream
Filename: v7-0003-Refactor-handling-of-nbtree-array-redundancies.patch
Type: application/octet-stream
Part: 2
Patch
Same data as JSON:
GET /api/v1/attachments/:id/patch
the parsed metadata as JSON — format, series position, per-file stats; never the diff bytes.
API reference →
Format: format-patch
Series: patch v7-0003
Subject: Refactor handling of nbtree array redundancies.
| File | + | − |
|---|---|---|
| src/backend/access/nbtree/nbtree.c | 6 | 4 |
| src/backend/access/nbtree/nbtutils.c | 76 | 80 |
From 002753a55696ec3f51537b8ca2b6f49c5f01086e Mon Sep 17 00:00:00 2001
From: Peter Geoghegan <pg@bowt.ie>
Date: Thu, 8 Aug 2024 15:41:18 -0400
Subject: [PATCH v7 3/4] Refactor handling of nbtree array redundancies.
Rather than allocating memory for so.keyData[] at the start of each
btrescan, lazily allocate space later on, in _bt_preprocess_keys. We
now allocate so.keyData[] after _bt_preprocess_array_keys is done
performing initial array related preprocessing.
An immediate benefit of this approach is that _bt_preprocess_array_keys
no longer needs to explicitly mark redundant array scan keys. Other
code (_bt_preprocess_keys and its other subsidiary routines) no longer
have to interpret the scan key entries as redundant. Redundant array
scan keys simply never appear in the _bt_preprocess_keys input array
(_bt_preprocess_array_keys removes them up front).
This refactoring is also preparation for an upcoming patch that will add
skip scan optimizations to nbtree. _bt_preprocess_array_keys will be
taught to add new skip array scan keys to the _bt_preprocess_keys input
array (i.e. to arrayKeyData), so doing things this way avoids uselessly
palloc'ing so.keyData[], only to have to repalloc (to enlarge the array)
almost immediately afterwards. This scheme allows _bt_preprocess_keys
to output a so.keyData[] scan key array that can be larger than the
original scan.keyData[] input array, due to the addition of skip array
scan keys within _bt_preprocess_array_keys.
Author: Peter Geoghegan <pg@bowt.ie>
Discussion: https://postgr.es/m/CAH2-Wz=9A_UtM7HzUThSkQ+BcrQsQZuNhWOvQWK06PRkEp=SKQ@mail.gmail.com
---
src/backend/access/nbtree/nbtree.c | 10 +-
src/backend/access/nbtree/nbtutils.c | 156 +++++++++++++--------------
2 files changed, 82 insertions(+), 84 deletions(-)
diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c
index f166c7549..4ecf883d3 100644
--- a/src/backend/access/nbtree/nbtree.c
+++ b/src/backend/access/nbtree/nbtree.c
@@ -326,11 +326,8 @@ btbeginscan(Relation rel, int nkeys, int norderbys)
so = (BTScanOpaque) palloc(sizeof(BTScanOpaqueData));
BTScanPosInvalidate(so->currPos);
BTScanPosInvalidate(so->markPos);
- if (scan->numberOfKeys > 0)
- so->keyData = (ScanKey) palloc(scan->numberOfKeys * sizeof(ScanKeyData));
- else
- so->keyData = NULL;
+ so->keyData = NULL;
so->needPrimScan = false;
so->scanBehind = false;
so->oppoDirCheck = false;
@@ -410,6 +407,11 @@ btrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys,
memcpy(scan->keyData, scankey, scan->numberOfKeys * sizeof(ScanKeyData));
so->numberOfKeys = 0; /* until _bt_preprocess_keys sets it */
so->numArrayKeys = 0; /* ditto */
+
+ /* Release private storage allocated in previous btrescan, if any */
+ if (so->keyData != NULL)
+ pfree(so->keyData);
+ so->keyData = NULL;
}
/*
diff --git a/src/backend/access/nbtree/nbtutils.c b/src/backend/access/nbtree/nbtutils.c
index 98688a3d6..d1423bd85 100644
--- a/src/backend/access/nbtree/nbtutils.c
+++ b/src/backend/access/nbtree/nbtutils.c
@@ -62,7 +62,7 @@ static bool _bt_compare_array_scankey_args(IndexScanDesc scan,
ScanKey arraysk, ScanKey skey,
FmgrInfo *orderproc, BTArrayKeyInfo *array,
bool *qual_ok);
-static ScanKey _bt_preprocess_array_keys(IndexScanDesc scan);
+static ScanKey _bt_preprocess_array_keys(IndexScanDesc scan, int *numberOfKeys);
static void _bt_preprocess_array_keys_final(IndexScanDesc scan, int *keyDataMap);
static int _bt_compare_array_elements(const void *a, const void *b, void *arg);
static inline int32 _bt_compare_array_skey(FmgrInfo *orderproc,
@@ -251,9 +251,6 @@ _bt_freestack(BTStack stack)
* It is convenient for _bt_preprocess_keys caller to have to deal with no
* more than one equality strategy array scan key per index attribute. We'll
* always be able to set things up that way when complete opfamilies are used.
- * Eliminated array scan keys can be recognized as those that have had their
- * sk_strategy field set to InvalidStrategy here by us. Caller should avoid
- * including these in the scan's so->keyData[] output array.
*
* We set the scan key references from the scan's BTArrayKeyInfo info array to
* offsets into the temp modified input array returned to caller. Scans that
@@ -261,18 +258,25 @@ _bt_freestack(BTStack stack)
* preprocessing steps are complete. This will convert the scan key offset
* references into references to the scan's so->keyData[] output scan keys.
*
+ * Caller must pass *numberOfKeys to give us a way to change the number of
+ * input scan keys (our output is caller's input). The returned array can be
+ * smaller than scan->keyData[] when we eliminated a redundant array scan key
+ * (redundant with some other array scan key, for the same attribute). Caller
+ * uses this to allocate so->keyData[] for the current btrescan.
+ *
* Note: the reason we need to return a temp scan key array, rather than just
* scribbling on scan->keyData, is that callers are permitted to call btrescan
* without supplying a new set of scankey data.
*/
static ScanKey
-_bt_preprocess_array_keys(IndexScanDesc scan)
+_bt_preprocess_array_keys(IndexScanDesc scan, int *numberOfKeys)
{
BTScanOpaque so = (BTScanOpaque) scan->opaque;
Relation rel = scan->indexRelation;
- int numberOfKeys = scan->numberOfKeys;
+ int numArrayKeyData = scan->numberOfKeys;
int16 *indoption = rel->rd_indoption;
- int numArrayKeys;
+ int numArrayKeys,
+ output_ikey = 0;
int origarrayatt = InvalidAttrNumber,
origarraykey = -1;
Oid origelemtype = InvalidOid;
@@ -280,11 +284,11 @@ _bt_preprocess_array_keys(IndexScanDesc scan)
MemoryContext oldContext;
ScanKey arrayKeyData; /* modified copy of scan->keyData */
- Assert(numberOfKeys);
+ Assert(scan->numberOfKeys);
/* Quick check to see if there are any array keys */
numArrayKeys = 0;
- for (int i = 0; i < numberOfKeys; i++)
+ for (int i = 0; i < scan->numberOfKeys; i++)
{
cur = &scan->keyData[i];
if (cur->sk_flags & SK_SEARCHARRAY)
@@ -317,19 +321,18 @@ _bt_preprocess_array_keys(IndexScanDesc scan)
oldContext = MemoryContextSwitchTo(so->arrayContext);
- /* Create modifiable copy of scan->keyData in the workspace context */
- arrayKeyData = (ScanKey) palloc(numberOfKeys * sizeof(ScanKeyData));
- memcpy(arrayKeyData, scan->keyData, numberOfKeys * sizeof(ScanKeyData));
+ /* Create output scan keys in the workspace context */
+ arrayKeyData = (ScanKey) palloc(numArrayKeyData * sizeof(ScanKeyData));
/* Allocate space for per-array data in the workspace context */
so->arrayKeys = (BTArrayKeyInfo *) palloc(numArrayKeys * sizeof(BTArrayKeyInfo));
/* Allocate space for ORDER procs used to help _bt_checkkeys */
- so->orderProcs = (FmgrInfo *) palloc(numberOfKeys * sizeof(FmgrInfo));
+ so->orderProcs = (FmgrInfo *) palloc(numArrayKeyData * sizeof(FmgrInfo));
/* Now process each array key */
numArrayKeys = 0;
- for (int i = 0; i < numberOfKeys; i++)
+ for (int input_ikey = 0; input_ikey < scan->numberOfKeys; input_ikey++)
{
FmgrInfo sortproc;
FmgrInfo *sortprocp = &sortproc;
@@ -345,14 +348,20 @@ _bt_preprocess_array_keys(IndexScanDesc scan)
int num_nonnulls;
int j;
- cur = &arrayKeyData[i];
+ /*
+ * Copy input scan key into temp arrayKeyData scan key array
+ */
+ cur = &arrayKeyData[output_ikey];
+ *cur = scan->keyData[input_ikey];
+
if (!(cur->sk_flags & SK_SEARCHARRAY))
+ {
+ output_ikey++; /* keep this non-array scan key */
continue;
+ }
/*
- * First, deconstruct the array into elements. Anything allocated
- * here (including a possibly detoasted array value) is in the
- * workspace context.
+ * Deconstruct the array into elements
*/
arrayval = DatumGetArrayTypeP(cur->sk_argument);
/* We could cache this data, but not clear it's worth it */
@@ -406,6 +415,7 @@ _bt_preprocess_array_keys(IndexScanDesc scan)
_bt_find_extreme_element(scan, cur, elemtype,
BTGreaterStrategyNumber,
elem_values, num_nonnulls);
+ output_ikey++; /* keep this transformed scan key */
continue;
case BTEqualStrategyNumber:
/* proceed with rest of loop */
@@ -416,6 +426,7 @@ _bt_preprocess_array_keys(IndexScanDesc scan)
_bt_find_extreme_element(scan, cur, elemtype,
BTLessStrategyNumber,
elem_values, num_nonnulls);
+ output_ikey++; /* keep this transformed scan key */
continue;
default:
elog(ERROR, "unrecognized StrategyNumber: %d",
@@ -432,7 +443,7 @@ _bt_preprocess_array_keys(IndexScanDesc scan)
* sortproc just points to the same proc used during binary searches.
*/
_bt_setup_array_cmp(scan, cur, elemtype,
- &so->orderProcs[i], &sortprocp);
+ &so->orderProcs[output_ikey], &sortprocp);
/*
* Sort the non-null elements and eliminate any duplicates. We must
@@ -476,11 +487,7 @@ _bt_preprocess_array_keys(IndexScanDesc scan)
break;
}
- /*
- * Indicate to _bt_preprocess_keys caller that it must ignore
- * this scan key
- */
- cur->sk_strategy = InvalidStrategy;
+ /* Throw away this scan key/array */
continue;
}
@@ -511,12 +518,15 @@ _bt_preprocess_array_keys(IndexScanDesc scan)
* Note: _bt_preprocess_array_keys_final will fix-up each array's
* scan_key field later on, after so->keyData[] has been finalized.
*/
- so->arrayKeys[numArrayKeys].scan_key = i;
+ so->arrayKeys[numArrayKeys].scan_key = output_ikey;
so->arrayKeys[numArrayKeys].num_elems = num_elems;
so->arrayKeys[numArrayKeys].elem_values = elem_values;
numArrayKeys++;
+ output_ikey++; /* keep this scan key/array */
}
+ /* Set final number of arrayKeyData[] keys, array keys */
+ *numberOfKeys = output_ikey;
so->numArrayKeys = numArrayKeys;
MemoryContextSwitchTo(oldContext);
@@ -2429,10 +2439,12 @@ end_toplevel_scan:
/*
* _bt_preprocess_keys() -- Preprocess scan keys
*
+ * The first call here (per btrescan) allocates so->keyData[].
* The given search-type keys (taken from scan->keyData[])
* are copied to so->keyData[] with possible transformation.
* scan->numberOfKeys is the number of input keys, so->numberOfKeys gets
- * the number of output keys (possibly less, never greater).
+ * the number of output keys. Calling here a second or subsequent time
+ * (during the same btrescan) is a no-op.
*
* The output keys are marked with additional sk_flags bits beyond the
* system-standard bits supplied by the caller. The DESC and NULLS_FIRST
@@ -2519,9 +2531,7 @@ _bt_preprocess_keys(IndexScanDesc scan)
int16 *indoption = scan->indexRelation->rd_indoption;
int new_numberOfKeys;
int numberOfEqualCols;
- ScanKey inkeys;
- ScanKey outkeys;
- ScanKey cur;
+ ScanKey inputsk;
BTScanKeyPreproc xform[BTMaxStrategyNumber];
bool test_result;
int i,
@@ -2553,7 +2563,7 @@ _bt_preprocess_keys(IndexScanDesc scan)
return; /* done if qual-less scan */
/* If any keys are SK_SEARCHARRAY type, set up array-key info */
- arrayKeyData = _bt_preprocess_array_keys(scan);
+ arrayKeyData = _bt_preprocess_array_keys(scan, &numberOfKeys);
if (!so->qual_ok)
{
/* unmatchable array, so give up */
@@ -2567,32 +2577,36 @@ _bt_preprocess_keys(IndexScanDesc scan)
*/
if (arrayKeyData)
{
- inkeys = arrayKeyData;
+ inputsk = arrayKeyData;
/* Also maintain keyDataMap for remapping so->orderProc[] later */
keyDataMap = MemoryContextAlloc(so->arrayContext,
numberOfKeys * sizeof(int));
}
else
- inkeys = scan->keyData;
+ inputsk = scan->keyData;
+
+ /*
+ * Now that we have an estimate of the number of output scan keys,
+ * allocate space for them
+ */
+ so->keyData = palloc(sizeof(ScanKeyData) * numberOfKeys);
- outkeys = so->keyData;
- cur = &inkeys[0];
/* we check that input keys are correctly ordered */
- if (cur->sk_attno < 1)
+ if (inputsk[0].sk_attno < 1)
elog(ERROR, "btree index keys must be ordered by attribute");
/* We can short-circuit most of the work if there's just one key */
if (numberOfKeys == 1)
{
/* Apply indoption to scankey (might change sk_strategy!) */
- if (!_bt_fix_scankey_strategy(cur, indoption))
+ if (!_bt_fix_scankey_strategy(inputsk, indoption))
so->qual_ok = false;
- memcpy(outkeys, cur, sizeof(ScanKeyData));
+ memcpy(&so->keyData[0], &inputsk[0], sizeof(ScanKeyData));
so->numberOfKeys = 1;
/* We can mark the qual as required if it's for first index col */
- if (cur->sk_attno == 1)
- _bt_mark_scankey_required(outkeys);
+ if (inputsk[0].sk_attno == 1)
+ _bt_mark_scankey_required(&so->keyData[0]);
if (arrayKeyData)
{
/*
@@ -2600,8 +2614,8 @@ _bt_preprocess_keys(IndexScanDesc scan)
* (we'll miss out on the single value array transformation, but
* that's not nearly as important when there's only one scan key)
*/
- Assert(cur->sk_flags & SK_SEARCHARRAY);
- Assert(cur->sk_strategy != BTEqualStrategyNumber ||
+ Assert(so->keyData[0].sk_flags & SK_SEARCHARRAY);
+ Assert(so->keyData[0].sk_strategy != BTEqualStrategyNumber ||
(so->arrayKeys[0].scan_key == 0 &&
OidIsValid(so->orderProcs[0].fn_oid)));
}
@@ -2629,12 +2643,12 @@ _bt_preprocess_keys(IndexScanDesc scan)
* handle after-last-key processing. Actual exit from the loop is at the
* "break" statement below.
*/
- for (i = 0;; cur++, i++)
+ for (i = 0;; inputsk++, i++)
{
if (i < numberOfKeys)
{
/* Apply indoption to scankey (might change sk_strategy!) */
- if (!_bt_fix_scankey_strategy(cur, indoption))
+ if (!_bt_fix_scankey_strategy(inputsk, indoption))
{
/* NULL can't be matched, so give up */
so->qual_ok = false;
@@ -2646,12 +2660,12 @@ _bt_preprocess_keys(IndexScanDesc scan)
* If we are at the end of the keys for a particular attr, finish up
* processing and emit the cleaned-up keys.
*/
- if (i == numberOfKeys || cur->sk_attno != attno)
+ if (i == numberOfKeys || inputsk->sk_attno != attno)
{
int priorNumberOfEqualCols = numberOfEqualCols;
/* check input keys are correctly ordered */
- if (i < numberOfKeys && cur->sk_attno < attno)
+ if (i < numberOfKeys && inputsk->sk_attno < attno)
elog(ERROR, "btree index keys must be ordered by attribute");
/*
@@ -2755,7 +2769,7 @@ _bt_preprocess_keys(IndexScanDesc scan)
}
/*
- * Emit the cleaned-up keys into the outkeys[] array, and then
+ * Emit the cleaned-up keys into the so->keyData[] array, and then
* mark them if they are required. They are required (possibly
* only in one direction) if all attrs before this one had "=".
*/
@@ -2763,7 +2777,7 @@ _bt_preprocess_keys(IndexScanDesc scan)
{
if (xform[j].skey)
{
- ScanKey outkey = &outkeys[new_numberOfKeys++];
+ ScanKey outkey = &so->keyData[new_numberOfKeys++];
memcpy(outkey, xform[j].skey, sizeof(ScanKeyData));
if (arrayKeyData)
@@ -2780,19 +2794,19 @@ _bt_preprocess_keys(IndexScanDesc scan)
break;
/* Re-initialize for new attno */
- attno = cur->sk_attno;
+ attno = inputsk->sk_attno;
memset(xform, 0, sizeof(xform));
}
/* check strategy this key's operator corresponds to */
- j = cur->sk_strategy - 1;
+ j = inputsk->sk_strategy - 1;
/* if row comparison, push it directly to the output array */
- if (cur->sk_flags & SK_ROW_HEADER)
+ if (inputsk->sk_flags & SK_ROW_HEADER)
{
- ScanKey outkey = &outkeys[new_numberOfKeys++];
+ ScanKey outkey = &so->keyData[new_numberOfKeys++];
- memcpy(outkey, cur, sizeof(ScanKeyData));
+ memcpy(outkey, inputsk, sizeof(ScanKeyData));
if (arrayKeyData)
keyDataMap[new_numberOfKeys - 1] = i;
if (numberOfEqualCols == attno - 1)
@@ -2806,21 +2820,10 @@ _bt_preprocess_keys(IndexScanDesc scan)
continue;
}
- /*
- * Does this input scan key require further processing as an array?
- */
- if (cur->sk_strategy == InvalidStrategy)
+ if (inputsk->sk_strategy == BTEqualStrategyNumber &&
+ (inputsk->sk_flags & SK_SEARCHARRAY))
{
- /* _bt_preprocess_array_keys marked this array key redundant */
- Assert(arrayKeyData);
- Assert(cur->sk_flags & SK_SEARCHARRAY);
- continue;
- }
-
- if (cur->sk_strategy == BTEqualStrategyNumber &&
- (cur->sk_flags & SK_SEARCHARRAY))
- {
- /* _bt_preprocess_array_keys kept this array key */
+ /* maintain arrayidx for xform[] array */
Assert(arrayKeyData);
arrayidx++;
}
@@ -2832,7 +2835,7 @@ _bt_preprocess_keys(IndexScanDesc scan)
if (xform[j].skey == NULL)
{
/* nope, so this scan key wins by default (at least for now) */
- xform[j].skey = cur;
+ xform[j].skey = inputsk;
xform[j].ikey = i;
xform[j].arrayidx = arrayidx;
}
@@ -2850,7 +2853,7 @@ _bt_preprocess_keys(IndexScanDesc scan)
/*
* Have to set up array keys
*/
- if ((cur->sk_flags & SK_SEARCHARRAY))
+ if (inputsk->sk_flags & SK_SEARCHARRAY)
{
array = &so->arrayKeys[arrayidx - 1];
orderproc = so->orderProcs + i;
@@ -2878,7 +2881,7 @@ _bt_preprocess_keys(IndexScanDesc scan)
*/
}
- if (_bt_compare_scankey_args(scan, cur, cur, xform[j].skey,
+ if (_bt_compare_scankey_args(scan, inputsk, inputsk, xform[j].skey,
array, orderproc, &test_result))
{
/* Have all we need to determine redundancy */
@@ -2892,7 +2895,7 @@ _bt_preprocess_keys(IndexScanDesc scan)
if (j != (BTEqualStrategyNumber - 1) ||
!(xform[j].skey->sk_flags & SK_SEARCHARRAY))
{
- xform[j].skey = cur;
+ xform[j].skey = inputsk;
xform[j].ikey = i;
xform[j].arrayidx = arrayidx;
}
@@ -2905,7 +2908,7 @@ _bt_preprocess_keys(IndexScanDesc scan)
* scan key. _bt_compare_scankey_args expects us to
* always keep arrays (and discard non-arrays).
*/
- Assert(!(cur->sk_flags & SK_SEARCHARRAY));
+ Assert(!(inputsk->sk_flags & SK_SEARCHARRAY));
}
}
else if (j == (BTEqualStrategyNumber - 1))
@@ -2928,14 +2931,14 @@ _bt_preprocess_keys(IndexScanDesc scan)
* even with incomplete opfamilies. _bt_advance_array_keys
* depends on this.
*/
- ScanKey outkey = &outkeys[new_numberOfKeys++];
+ ScanKey outkey = &so->keyData[new_numberOfKeys++];
memcpy(outkey, xform[j].skey, sizeof(ScanKeyData));
if (arrayKeyData)
keyDataMap[new_numberOfKeys - 1] = xform[j].ikey;
if (numberOfEqualCols == attno - 1)
_bt_mark_scankey_required(outkey);
- xform[j].skey = cur;
+ xform[j].skey = inputsk;
xform[j].ikey = i;
xform[j].arrayidx = arrayidx;
}
@@ -3349,13 +3352,6 @@ _bt_fix_scankey_strategy(ScanKey skey, int16 *indoption)
return true;
}
- if (skey->sk_strategy == InvalidStrategy)
- {
- /* Already-eliminated array scan key; don't need to fix anything */
- Assert(skey->sk_flags & SK_SEARCHARRAY);
- return true;
- }
-
/* Adjust strategy for DESC, if we didn't already */
if ((addflags & SK_BT_DESC) && !(skey->sk_flags & SK_BT_DESC))
skey->sk_strategy = BTCommuteStrategyNumber(skey->sk_strategy);
--
2.45.2