v15-0003-POC-fix-for-regressions-in-unsympathetic-cases.patch
application/octet-stream
Filename: v15-0003-POC-fix-for-regressions-in-unsympathetic-cases.patch
Type: application/octet-stream
Part: 1
Patch
Same data as JSON:
GET /api/v1/attachments/:id/patch
the parsed metadata as JSON — format, series position, per-file stats; never the diff bytes.
API reference →
Format: format-patch
Series: patch v15-0003
Subject: POC fix for regressions in unsympathetic cases.
| File | + | − |
|---|---|---|
| src/backend/access/nbtree/nbtsearch.c | 5 | 0 |
| src/backend/access/nbtree/nbtutils.c | 46 | 24 |
| src/include/access/nbtree.h | 3 | 2 |
From 3d12aeba061337c5e26da6551c0a93da15334476 Mon Sep 17 00:00:00 2001
From: Peter Geoghegan <pg@bowt.ie>
Date: Sat, 16 Nov 2024 15:58:41 -0500
Subject: [PATCH v15 3/3] POC fix for regressions in unsympathetic cases.
Fix regressions in cases that are nominally eligible to use skip scan
but can never actually benefit from skipping. These are cases where the
leading skipped prefix column contains many distinct values -- often as
many distinct values are there are total index tuples.
For example, the test case posted here is fixed by the work from this
commit:
https://postgr.es/m/51d00219180323d121572e1f83ccde2a@oss.nttdata.com
Note that this commit doesn't actually change anything about when or how
skip scan decides when or how to skip. It just avoids wasting CPU
cycles on uselessly maintaining a skip array at the tuple granularity,
preferring to maintain the skip arrays at something closer to the page
granularity when that makes sense. See:
https://www.postgresql.org/message-id/flat/CAH2-Wz%3DE7XrkvscBN0U6V81NK3Q-dQOmivvbEsjG-zwEfDdFpg%40mail.gmail.com#7d34e8aa875d7a718043834c5ef4c167
Doing well on cases like this is important because we can't expect the
optimizer to never choose an affected plan -- we prefer to solve these
problems in the executor, which has access to the most reliable and
current information about the index. The optimizer can afford to be
very optimistic about skipping if actual runtime scan behavior is very
similar to a traditional full index scan in the worst case. See
"optimizer" section from the original intro mail for more information:
https://postgr.es/m/CAH2-Wzmn1YsLzOGgjAQZdn1STSG_y8qP__vggTaPAYXJP%2BG4bw%40mail.gmail.com
---
src/include/access/nbtree.h | 5 +-
src/backend/access/nbtree/nbtsearch.c | 5 ++
src/backend/access/nbtree/nbtutils.c | 70 ++++++++++++++++++---------
3 files changed, 54 insertions(+), 26 deletions(-)
diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h
index d841e85bc..06ccab8cf 100644
--- a/src/include/access/nbtree.h
+++ b/src/include/access/nbtree.h
@@ -1112,11 +1112,12 @@ typedef struct BTReadPageState
bool firstmatch; /* at least one match so far? */
/*
- * Private _bt_checkkeys state used to manage "look ahead" optimization
- * (only used during scans with array keys)
+ * Private _bt_checkkeys state used to manage "look ahead" and skip array
+ * optimizations (only used during scans with array keys)
*/
int16 rechecks;
int16 targetdistance;
+ bool beyondskip;
} BTReadPageState;
diff --git a/src/backend/access/nbtree/nbtsearch.c b/src/backend/access/nbtree/nbtsearch.c
index 43e321896..578bafbf4 100644
--- a/src/backend/access/nbtree/nbtsearch.c
+++ b/src/backend/access/nbtree/nbtsearch.c
@@ -1646,6 +1646,7 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum,
pstate.firstmatch = false;
pstate.rechecks = 0;
pstate.targetdistance = 0;
+ pstate.beyondskip = false;
/*
* Prechecking the value of the continuescan flag for the last item on the
@@ -1837,6 +1838,7 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum,
truncatt = BTreeTupleGetNAtts(itup, rel);
pstate.prechecked = false; /* precheck didn't cover HIKEY */
+ pstate.beyondskip = false; /* reset for finaltup */
_bt_checkkeys(scan, &pstate, arrayKeys, itup, truncatt);
}
@@ -1893,6 +1895,9 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum,
else
tuple_alive = true;
+ if (offnum == minoff)
+ pstate.beyondskip = false; /* reset for finaltup */
+
itup = (IndexTuple) PageGetItem(page, iid);
Assert(!BTreeTupleIsPivot(itup));
diff --git a/src/backend/access/nbtree/nbtutils.c b/src/backend/access/nbtree/nbtutils.c
index 5117fb164..a3b59789a 100644
--- a/src/backend/access/nbtree/nbtutils.c
+++ b/src/backend/access/nbtree/nbtutils.c
@@ -151,7 +151,8 @@ static bool _bt_fix_scankey_strategy(ScanKey skey, int16 *indoption);
static void _bt_mark_scankey_required(ScanKey skey);
static bool _bt_check_compare(IndexScanDesc scan, ScanDirection dir,
IndexTuple tuple, int tupnatts, TupleDesc tupdesc,
- bool advancenonrequired, bool prechecked, bool firstmatch,
+ bool advancenonrequired, bool beyondskip,
+ bool prechecked, bool firstmatch,
bool *continuescan, int *ikey);
static bool _bt_check_rowcompare(ScanKey skey,
IndexTuple tuple, int tupnatts, TupleDesc tupdesc,
@@ -3136,6 +3137,7 @@ _bt_advance_array_keys(IndexScanDesc scan, BTReadPageState *pstate,
int arrayidx = 0;
bool beyond_end_advance = false,
has_required_opposite_direction_only = false,
+ has_required_opposite_direction_skip = false,
oppodir_inequality_sktrig = false,
all_required_satisfied = true,
all_satisfied = true;
@@ -3189,6 +3191,10 @@ _bt_advance_array_keys(IndexScanDesc scan, BTReadPageState *pstate,
{
array = &so->arrayKeys[arrayidx++];
Assert(array->scan_key == ikey);
+ if (array->num_elems == -1 && ScanDirectionIsForward(dir) ?
+ array->low_compare :
+ array->high_compare)
+ has_required_opposite_direction_skip = true;
}
}
else
@@ -3209,10 +3215,8 @@ _bt_advance_array_keys(IndexScanDesc scan, BTReadPageState *pstate,
if (ikey < sktrig)
continue;
- if (cur->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD))
+ if (sktrig_required && (cur->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD)))
{
- Assert(sktrig_required);
-
required = true;
if (cur->sk_attno > tupnatts)
@@ -3416,7 +3420,7 @@ _bt_advance_array_keys(IndexScanDesc scan, BTReadPageState *pstate,
* array scan keys are considered interesting.)
*/
all_satisfied = false;
- if (required)
+ if (required || (cur->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD)))
all_required_satisfied = false;
else
{
@@ -3501,6 +3505,12 @@ _bt_advance_array_keys(IndexScanDesc scan, BTReadPageState *pstate,
Assert(_bt_verify_keys_with_arraykeys(scan));
+ if (beyond_end_advance && pstate->finaltup &&
+ !has_required_opposite_direction_skip)
+ pstate->beyondskip = true;
+ else if (pstate)
+ pstate->beyondskip = false;
+
/*
* Does tuple now satisfy our new qual? Recheck with _bt_check_compare.
*
@@ -3528,7 +3538,7 @@ _bt_advance_array_keys(IndexScanDesc scan, BTReadPageState *pstate,
/* Recheck _bt_check_compare on behalf of caller */
if (_bt_check_compare(scan, dir, tuple, tupnatts, tupdesc,
- false, false, false,
+ false, false, false, false,
&continuescan, &nsktrig) &&
!so->scanBehind)
{
@@ -3556,11 +3566,10 @@ _bt_advance_array_keys(IndexScanDesc scan, BTReadPageState *pstate,
* that we can know to be safe based on caller's tuple alone. If we
* didn't perform this step, then that guarantee wouldn't quite hold.
*/
- if (unlikely(!continuescan))
+ if (unlikely(!continuescan) && sktrig_required)
{
bool satisfied PG_USED_FOR_ASSERTS_ONLY;
- Assert(sktrig_required);
Assert(so->keyData[nsktrig].sk_strategy != BTEqualStrategyNumber);
/*
@@ -3615,9 +3624,7 @@ _bt_advance_array_keys(IndexScanDesc scan, BTReadPageState *pstate,
* for _bt_check_compare to behave as if they are required in the current
* scan direction to deal with NULLs. We'll account for that separately.)
*/
- Assert(_bt_tuple_before_array_skeys(scan, dir, tuple, tupdesc, tupnatts,
- false, 0, NULL) ==
- !all_required_satisfied);
+ /* FIXME Add back the assertion removed from here */
/*
* We generally permit primitive index scans to continue onto the next
@@ -4904,9 +4911,9 @@ _bt_checkkeys(IndexScanDesc scan, BTReadPageState *pstate, bool arrayKeys,
Assert(BTreeTupleGetNAtts(tuple, scan->indexRelation) == tupnatts);
- res = _bt_check_compare(scan, dir, tuple, tupnatts, tupdesc,
- arrayKeys, pstate->prechecked, pstate->firstmatch,
- &pstate->continuescan, &ikey);
+ res = _bt_check_compare(scan, dir, tuple, tupnatts, tupdesc, arrayKeys,
+ pstate->beyondskip, pstate->prechecked,
+ pstate->firstmatch, &pstate->continuescan, &ikey);
#ifdef USE_ASSERT_CHECKING
if (!arrayKeys && so->numArrayKeys)
@@ -4917,11 +4924,11 @@ _bt_checkkeys(IndexScanDesc scan, BTReadPageState *pstate, bool arrayKeys,
* Assert that the scan isn't in danger of becoming confused.
*/
Assert(!so->scanBehind && !so->oppositeDirCheck);
- Assert(!pstate->prechecked && !pstate->firstmatch);
- Assert(!_bt_tuple_before_array_skeys(scan, dir, tuple, tupdesc,
- tupnatts, false, 0, NULL));
+ Assert(!pstate->beyondskip && !pstate->prechecked &&
+ !pstate->firstmatch);
+ /* FIXME Add back the assertion removed from here */
}
- if (pstate->prechecked || pstate->firstmatch)
+ if (!pstate->beyondskip && (pstate->prechecked || pstate->firstmatch))
{
bool dcontinuescan;
int dikey = 0;
@@ -4931,7 +4938,7 @@ _bt_checkkeys(IndexScanDesc scan, BTReadPageState *pstate, bool arrayKeys,
* get the same answer without those optimizations
*/
Assert(res == _bt_check_compare(scan, dir, tuple, tupnatts, tupdesc,
- false, false, false,
+ false, false, false, false,
&dcontinuescan, &dikey));
Assert(pstate->continuescan == dcontinuescan);
}
@@ -5062,7 +5069,7 @@ _bt_oppodir_checkkeys(IndexScanDesc scan, ScanDirection dir,
Assert(so->numArrayKeys);
_bt_check_compare(scan, flipped, finaltup, nfinaltupatts, tupdesc,
- false, false, false, &continuescan, &ikey);
+ false, false, false, false, &continuescan, &ikey);
if (!continuescan && so->keyData[ikey].sk_strategy != BTEqualStrategyNumber)
return false;
@@ -5107,11 +5114,17 @@ _bt_oppodir_checkkeys(IndexScanDesc scan, ScanDirection dir,
*
* Pass advancenonrequired=false to avoid all array related side effects.
* This allows _bt_advance_array_keys caller to avoid infinite recursion.
+ *
+ * Callers with skip arrays can pass beyondskip=true to have us assume that
+ * the scan is still likely to be before the current array keys according to
+ * _bt_tuple_before_array_skeys. We can safely avoid evaluating skip array
+ * scan keys when this happens.
*/
static bool
_bt_check_compare(IndexScanDesc scan, ScanDirection dir,
IndexTuple tuple, int tupnatts, TupleDesc tupdesc,
- bool advancenonrequired, bool prechecked, bool firstmatch,
+ bool advancenonrequired, bool beyondskip,
+ bool prechecked, bool firstmatch,
bool *continuescan, int *ikey)
{
BTScanOpaque so = (BTScanOpaque) scan->opaque;
@@ -5128,10 +5141,19 @@ _bt_check_compare(IndexScanDesc scan, ScanDirection dir,
/*
* Check if the key is required in the current scan direction, in the
- * opposite scan direction _only_, or in neither direction
+ * opposite scan direction _only_, or in neither direction -- though
+ * only when "beyond advancement" skip array optimization isn't in use
*/
- if (((key->sk_flags & SK_BT_REQFWD) && ScanDirectionIsForward(dir)) ||
- ((key->sk_flags & SK_BT_REQBKWD) && ScanDirectionIsBackward(dir)))
+ if (beyondskip &&
+ !(key->sk_flags & (SK_BT_NEGPOSINF | SK_BT_NEXT | SK_BT_PRIOR)))
+ {
+ if (key->sk_flags & SK_BT_SKIP)
+ continue;
+ }
+ else if (((key->sk_flags & SK_BT_REQFWD) &&
+ ScanDirectionIsForward(dir)) ||
+ ((key->sk_flags & SK_BT_REQBKWD) &&
+ ScanDirectionIsBackward(dir)))
requiredSameDir = true;
else if (((key->sk_flags & SK_BT_REQFWD) && ScanDirectionIsBackward(dir)) ||
((key->sk_flags & SK_BT_REQBKWD) && ScanDirectionIsForward(dir)))
--
2.45.2