From 1c83fbe2fad46ab73603e8f704b5c6f1a4c3fc12 Mon Sep 17 00:00:00 2001 From: Peter Geoghegan Date: Mon, 13 Jan 2025 16:08:32 -0500 Subject: [PATCH v29 4/5] Apply low-order skip key in _bt_first more often. Convert low_compare and high_compare nbtree skip array inequalities (with opclasses that offer skip support) such that _bt_first is consistently able to use later keys when descending the tree within _bt_first. For example, an index qual "WHERE a > 5 AND b = 2" is now converted to "WHERE a >= 6 AND b = 2" by a new preprocessing step that takes place after a final low_compare and/or high_compare are chosen by all earlier preprocessing steps. That way the scan's initial call to _bt_first will use "WHERE a >= 6 AND b = 2" to find the initial leaf level position, rather than merely using "WHERE a > 5" -- "b = 2" can always be applied. This has a decent chance of making the scan avoid an extra _bt_first call that would otherwise be needed just to determine the lowest-sorting "a" value in the index (the lowest that still satisfies "WHERE a > 5"). The transformation process can only lower the total number of index pages read when the use of a more restrictive set of initial positioning keys in _bt_first actually allows the scan to land on some later leaf page directly, relative to the unoptimized case (or on an earlier leaf page directly, when scanning backwards). The savings can be far greater when affected skip arrays come after some higher-order array. For example, a qual "WHERE x IN (1, 2, 3) AND y > 5 AND z = 2" can now save as many as 3 _bt_first calls as a result of these transformations (there can be as many as 1 _bt_first call saved per "x" array element). Author: Peter Geoghegan Discussion: https://postgr.es/m/CAH2-Wz=FJ78K3WsF3iWNxWnUCY9f=Jdg3QPxaXE=uYUbmuRz5Q@mail.gmail.com --- src/backend/access/nbtree/nbtpreprocesskeys.c | 181 ++++++++++++++++++ src/test/regress/expected/create_index.out | 21 ++ src/test/regress/sql/create_index.sql | 10 + 3 files changed, 212 insertions(+) diff --git a/src/backend/access/nbtree/nbtpreprocesskeys.c b/src/backend/access/nbtree/nbtpreprocesskeys.c index 04792eab8..999bb4578 100644 --- a/src/backend/access/nbtree/nbtpreprocesskeys.c +++ b/src/backend/access/nbtree/nbtpreprocesskeys.c @@ -50,6 +50,12 @@ static bool _bt_saoparray_shrink(IndexScanDesc scan, ScanKey arraysk, BTArrayKeyInfo *array, bool *qual_ok); static bool _bt_skiparray_shrink(IndexScanDesc scan, ScanKey skey, BTArrayKeyInfo *array, bool *qual_ok); +static void _bt_skiparray_strat_adjust(IndexScanDesc scan, ScanKey arraysk, + BTArrayKeyInfo *array); +static void _bt_skiparray_strat_decrement(IndexScanDesc scan, ScanKey arraysk, + BTArrayKeyInfo *array); +static void _bt_skiparray_strat_increment(IndexScanDesc scan, ScanKey arraysk, + BTArrayKeyInfo *array); static ScanKey _bt_preprocess_array_keys(IndexScanDesc scan, int *new_numberOfKeys); static void _bt_preprocess_array_keys_final(IndexScanDesc scan, int *keyDataMap); static int _bt_num_array_keys(IndexScanDesc scan, Oid *skip_eq_ops, @@ -1290,6 +1296,172 @@ _bt_skiparray_shrink(IndexScanDesc scan, ScanKey skey, BTArrayKeyInfo *array, return true; } +/* + * Applies the opfamily's skip support routine to convert the skip array's > + * low_compare key (if any) into a >= key, and to convert its < high_compare + * key (if any) into a <= key. Decrements the high_compare key's sk_argument, + * and/or increments the low_compare key's sk_argument (also adjusts their + * operator strategies, while changing the operator as appropriate). + * + * This optional optimization reduces the number of descents required within + * _bt_first. Whenever _bt_first is called with a skip array whose current + * array element is the sentinel value MINVAL, using a transformed >= key + * instead of using the original > key makes it safe to include lower-order + * scan keys in the insertion scan key (there must be lower-order scan keys + * after the skip array). We will avoid an extra _bt_first to find the first + * value in the index > sk_argument -- at least when the first real matching + * value in the index happens to be an exact match for the sk_argument value + * that we produced here by incrementing the original input key's sk_argument. + * (Backwards scans derive the same benefit when they encounter the sentinel + * value MAXVAL, by converting the high_compare key from < to <=.) + * + * Note: The transformation is only correct when it cannot allow the scan to + * overlook matching tuples, but we don't have enough semantic information to + * safely make sure that can't happen during scans with cross-type operators. + * That's why we'll never apply the transformation in cross-type scenarios. + * For example, if we attempted to convert "sales_ts > '2024-01-01'::date" + * into "sales_ts >= '2024-01-02'::date" given a "sales_ts" attribute whose + * input opclass is timestamp_ops, the scan would overlook _all_ tuples for + * sales that fell on '2024-01-01'. + * + * Note: We can safely modify array->low_compare/array->high_compare in place + * because they just point to copies of our scan->keyData[] input scan keys + * (namely the copies returned by _bt_preprocess_array_keys to be used as + * input into the standard preprocessing steps in _bt_preprocess_keys). + * Everything will be reset if there's a rescan. + */ +static void +_bt_skiparray_strat_adjust(IndexScanDesc scan, ScanKey arraysk, + BTArrayKeyInfo *array) +{ + BTScanOpaque so = (BTScanOpaque) scan->opaque; + MemoryContext oldContext; + + /* + * Called last among all preprocessing steps, when the skip array's final + * low_compare and high_compare have both been chosen + */ + Assert(so->skipScan); + Assert(arraysk->sk_flags & SK_BT_SKIP); + Assert(array->num_elems == -1 && !array->null_elem && array->sksup); + + oldContext = MemoryContextSwitchTo(so->arrayContext); + + if (array->high_compare && + array->high_compare->sk_strategy == BTLessStrategyNumber) + _bt_skiparray_strat_decrement(scan, arraysk, array); + + if (array->low_compare && + array->low_compare->sk_strategy == BTGreaterStrategyNumber) + _bt_skiparray_strat_increment(scan, arraysk, array); + + MemoryContextSwitchTo(oldContext); +} + +/* + * Convert skip array's > low_compare key into a >= key + */ +static void +_bt_skiparray_strat_decrement(IndexScanDesc scan, ScanKey arraysk, + BTArrayKeyInfo *array) +{ + Relation rel = scan->indexRelation; + Oid opfamily = rel->rd_opfamily[arraysk->sk_attno - 1], + opcintype = rel->rd_opcintype[arraysk->sk_attno - 1], + leop; + RegProcedure cmp_proc; + ScanKey high_compare = array->high_compare; + Datum orig_sk_argument = high_compare->sk_argument, + new_sk_argument; + bool uflow; + + Assert(high_compare->sk_strategy == BTLessStrategyNumber); + + /* + * Only perform the transformation when the operator type matches the + * index attribute's input opclass type + */ + if (high_compare->sk_subtype != opcintype && + high_compare->sk_subtype != InvalidOid) + return; + + /* Decrement, handling underflow by marking the qual unsatisfiable */ + new_sk_argument = array->sksup->decrement(rel, orig_sk_argument, &uflow); + if (uflow) + { + BTScanOpaque so = (BTScanOpaque) scan->opaque; + + so->qual_ok = false; + return; + } + + /* Look up <= operator (might fail) */ + leop = get_opfamily_member(opfamily, opcintype, opcintype, + BTLessEqualStrategyNumber); + if (!OidIsValid(leop)) + return; + cmp_proc = get_opcode(leop); + if (RegProcedureIsValid(cmp_proc)) + { + /* Transform < high_compare key into <= key */ + fmgr_info(cmp_proc, &high_compare->sk_func); + high_compare->sk_argument = new_sk_argument; + high_compare->sk_strategy = BTLessEqualStrategyNumber; + } +} + +/* + * Convert skip array's < low_compare key into a <= key + */ +static void +_bt_skiparray_strat_increment(IndexScanDesc scan, ScanKey arraysk, + BTArrayKeyInfo *array) +{ + Relation rel = scan->indexRelation; + Oid opfamily = rel->rd_opfamily[arraysk->sk_attno - 1], + opcintype = rel->rd_opcintype[arraysk->sk_attno - 1], + geop; + RegProcedure cmp_proc; + ScanKey low_compare = array->low_compare; + Datum orig_sk_argument = low_compare->sk_argument, + new_sk_argument; + bool oflow; + + Assert(low_compare->sk_strategy == BTGreaterStrategyNumber); + + /* + * Only perform the transformation when the operator type matches the + * index attribute's input opclass type + */ + if (low_compare->sk_subtype != opcintype && + low_compare->sk_subtype != InvalidOid) + return; + + /* Increment, handling overflow by marking the qual unsatisfiable */ + new_sk_argument = array->sksup->increment(rel, orig_sk_argument, &oflow); + if (oflow) + { + BTScanOpaque so = (BTScanOpaque) scan->opaque; + + so->qual_ok = false; + return; + } + + /* Look up >= operator (might fail) */ + geop = get_opfamily_member(opfamily, opcintype, opcintype, + BTGreaterEqualStrategyNumber); + if (!OidIsValid(geop)) + return; + cmp_proc = get_opcode(geop); + if (RegProcedureIsValid(cmp_proc)) + { + /* Transform > low_compare key into >= key */ + fmgr_info(cmp_proc, &low_compare->sk_func); + low_compare->sk_argument = new_sk_argument; + low_compare->sk_strategy = BTGreaterEqualStrategyNumber; + } +} + /* * _bt_preprocess_array_keys() -- Preprocess SK_SEARCHARRAY scan keys * @@ -1825,6 +1997,15 @@ _bt_preprocess_array_keys_final(IndexScanDesc scan, int *keyDataMap) } else { + /* + * Any skip array low_compare and high_compare scan keys + * are now final. Transform the array's > low_compare key + * into a >= key (and < high_compare keys into a <= key). + */ + if (array->num_elems == -1 && array->sksup && + !array->null_elem) + _bt_skiparray_strat_adjust(scan, outkey, array); + /* Match found, so done with this array */ arrayidx++; } diff --git a/src/test/regress/expected/create_index.out b/src/test/regress/expected/create_index.out index 15dde752f..fa4517e2d 100644 --- a/src/test/regress/expected/create_index.out +++ b/src/test/regress/expected/create_index.out @@ -2589,6 +2589,27 @@ ORDER BY thousand; 1 | 1001 (1 row) +-- Skip array preprocessing increments "thousand > -1" to "thousand >= 0" +explain (costs off) +SELECT thousand, tenthous FROM tenk1 +WHERE thousand > -1 AND tenthous IN (1001,3000) +ORDER BY thousand limit 2; + QUERY PLAN +-------------------------------------------------------------------------------------------------- + Limit + -> Index Only Scan using tenk1_thous_tenthous on tenk1 + Index Cond: ((thousand > '-1'::integer) AND (tenthous = ANY ('{1001,3000}'::integer[]))) +(3 rows) + +SELECT thousand, tenthous FROM tenk1 +WHERE thousand > -1 AND tenthous IN (1001,3000) +ORDER BY thousand limit 2; + thousand | tenthous +----------+---------- + 0 | 3000 + 1 | 1001 +(2 rows) + -- -- Check elimination of constant-NULL subexpressions -- diff --git a/src/test/regress/sql/create_index.sql b/src/test/regress/sql/create_index.sql index 40ba3d65b..e8c16959a 100644 --- a/src/test/regress/sql/create_index.sql +++ b/src/test/regress/sql/create_index.sql @@ -993,6 +993,16 @@ SELECT thousand, tenthous FROM tenk1 WHERE thousand < 3 and thousand <= 2 AND tenthous = 1001 ORDER BY thousand; +-- Skip array preprocessing increments "thousand > -1" to "thousand >= 0" +explain (costs off) +SELECT thousand, tenthous FROM tenk1 +WHERE thousand > -1 AND tenthous IN (1001,3000) +ORDER BY thousand limit 2; + +SELECT thousand, tenthous FROM tenk1 +WHERE thousand > -1 AND tenthous IN (1001,3000) +ORDER BY thousand limit 2; + -- -- Check elimination of constant-NULL subexpressions -- -- 2.47.2