v14-0004-IOS-Support-tableAM-powered-prechecked-visibilit.patch
application/octet-stream
Filename: v14-0004-IOS-Support-tableAM-powered-prechecked-visibilit.patch
Type: application/octet-stream
Part: 2
From 346eb3fb59ba39dd36d88bc5239ec14d2435412b Mon Sep 17 00:00:00 2001
From: Matthias van de Meent <boekewurm+postgres@gmail.com>
Date: Sat, 20 Dec 2025 00:39:50 +0100
Subject: [PATCH v14 4/8] IOS: Support tableAM-powered prechecked visibility
statuses
Previously, we assumed VM_ALL_VISIBLE(...) is universal across all
AMs. Now, we use the new table_index_vischeck_tuples API, and allow
index AMs to do this check as well. This removes the need for index
AMs to hold on to pinned pages in index-only scans, as long as they
do the visibility checks on the tuples they pull from the pages
whilst they still have a pin on the page.
selfuncs.c's get_actual_variable_endpoint() is similarly updated
to use this new visibility checking infrastructure.
Future commits will implement this new infrastructure in gist,
sp-gist, and nbtree.
---
src/backend/access/index/indexam.c | 6 ++
src/backend/executor/nodeIndexonlyscan.c | 80 +++++++++++++++---------
src/backend/utils/adt/selfuncs.c | 74 +++++++++++++---------
src/include/access/relscan.h | 2 +
4 files changed, 104 insertions(+), 58 deletions(-)
diff --git a/src/backend/access/index/indexam.c b/src/backend/access/index/indexam.c
index 0492d92d23b..c9817e6004c 100644
--- a/src/backend/access/index/indexam.c
+++ b/src/backend/access/index/indexam.c
@@ -638,6 +638,12 @@ index_getnext_tid(IndexScanDesc scan, ScanDirection direction)
/* XXX: we should assert that a snapshot is pushed or registered */
Assert(TransactionIdIsValid(RecentXmin));
+ /*
+ * Reset xs_visrecheck, so we don't confuse the next tuple's visibility
+ * state with that of the previous.
+ */
+ scan->xs_visrecheck = TMVC_Unchecked;
+
/*
* The AM's amgettuple proc finds the next index entry matching the scan
* keys, and puts the TID into scan->xs_heaptid. It should also set
diff --git a/src/backend/executor/nodeIndexonlyscan.c b/src/backend/executor/nodeIndexonlyscan.c
index 6bea42f128f..e8811f9a3c1 100644
--- a/src/backend/executor/nodeIndexonlyscan.c
+++ b/src/backend/executor/nodeIndexonlyscan.c
@@ -121,6 +121,7 @@ IndexOnlyNext(IndexOnlyScanState *node)
while ((tid = index_getnext_tid(scandesc, direction)) != NULL)
{
bool tuple_from_heap = false;
+ TMVC_Result vischeck = scandesc->xs_visrecheck;
CHECK_FOR_INTERRUPTS();
@@ -128,6 +129,9 @@ IndexOnlyNext(IndexOnlyScanState *node)
* We can skip the heap fetch if the TID references a heap page on
* which all tuples are known visible to everybody. In any case,
* we'll use the index tuple not the heap tuple as the data source.
+ * The index may have already pre-checked the visibility of the tuple
+ * for us and stored the result in xs_visrecheck. In those cases, we
+ * can skip checking the visibility status.
*
* Note on Memory Ordering Effects: visibilitymap_get_status does not
* lock the visibility map buffer, and therefore the result we read
@@ -157,37 +161,57 @@ IndexOnlyNext(IndexOnlyScanState *node)
*
* It's worth going through this complexity to avoid needing to lock
* the VM buffer, which could cause significant contention.
+ *
+ * The index doing these checks for us doesn't materially change these
+ * considerations.
*/
- if (!VM_ALL_VISIBLE(scandesc->heapRelation,
- ItemPointerGetBlockNumber(tid),
- &node->ioss_VMBuffer))
- {
- /*
- * Rats, we have to visit the heap to check visibility.
- */
- InstrCountTuples2(node, 1);
- if (!index_fetch_heap(scandesc, node->ioss_TableSlot))
- continue; /* no visible tuple, try next index entry */
+ if (vischeck == TMVC_Unchecked)
+ vischeck = table_index_vischeck_tuple(scandesc->heapRelation,
+ &node->ioss_VMBuffer,
+ tid);
- ExecClearTuple(node->ioss_TableSlot);
-
- /*
- * Only MVCC snapshots are supported here, so there should be no
- * need to keep following the HOT chain once a visible entry has
- * been found. If we did want to allow that, we'd need to keep
- * more state to remember not to call index_getnext_tid next time.
- */
- if (scandesc->xs_heap_continue)
- elog(ERROR, "non-MVCC snapshots are not supported in index-only scans");
+ Assert(vischeck != TMVC_Unchecked);
- /*
- * Note: at this point we are holding a pin on the heap page, as
- * recorded in scandesc->xs_cbuf. We could release that pin now,
- * but it's not clear whether it's a win to do so. The next index
- * entry might require a visit to the same heap page.
- */
-
- tuple_from_heap = true;
+ switch (vischeck)
+ {
+ case TMVC_Unchecked:
+ elog(ERROR, "Failed to check visibility for tuple");
+ break;
+ case TMVC_Visible:
+ /* no further checks required here */
+ break;
+ case TMVC_MaybeVisible:
+ {
+ /*
+ * Rats, we have to visit the heap to check visibility.
+ */
+ InstrCountTuples2(node, 1);
+ if (!index_fetch_heap(scandesc, node->ioss_TableSlot))
+ continue; /* no visible tuple, try next index entry */
+
+ ExecClearTuple(node->ioss_TableSlot);
+
+ /*
+ * Only MVCC snapshots are supported here, so there should be
+ * no need to keep following the HOT chain once a visible
+ * entry has been found. If we did want to allow that, we'd
+ * need to keep more state to remember not to call
+ * index_getnext_tid next time.
+ */
+ if (scandesc->xs_heap_continue)
+ elog(ERROR, "non-MVCC snapshots are not supported in index-only scans");
+
+ /*
+ * Note: at this point we are holding a pin on the heap page,
+ * as recorded in scandesc->xs_cbuf. We could release that
+ * pin now, but it's not clear whether it's a win to do so.
+ * The next index entry might require a visit to the same heap
+ * page.
+ */
+
+ tuple_from_heap = true;
+ break;
+ }
}
/*
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index c760b19db55..f01adcd6f71 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -7109,44 +7109,58 @@ get_actual_variable_endpoint(Relation heapRel,
while ((tid = index_getnext_tid(index_scan, indexscandir)) != NULL)
{
BlockNumber block = ItemPointerGetBlockNumber(tid);
+ TMVC_Result visres = index_scan->xs_visrecheck;
- if (!VM_ALL_VISIBLE(heapRel,
- block,
- &vmbuffer))
+ if (visres == TMVC_Unchecked)
+ visres = table_index_vischeck_tuple(heapRel, &vmbuffer, tid);
+
+ Assert(visres != TMVC_Unchecked);
+
+ switch (visres)
{
- /* Rats, we have to visit the heap to check visibility */
- if (!index_fetch_heap(index_scan, tableslot))
+ case TMVC_Unchecked:
+ elog(ERROR, "Failed to check visibility for tuple");
+ case TMVC_Visible:
+ /* no further checks required here */
+ break;
+ case TMVC_MaybeVisible:
{
- /*
- * No visible tuple for this index entry, so we need to
- * advance to the next entry. Before doing so, count heap
- * page fetches and give up if we've done too many.
- *
- * We don't charge a page fetch if this is the same heap page
- * as the previous tuple. This is on the conservative side,
- * since other recently-accessed pages are probably still in
- * buffers too; but it's good enough for this heuristic.
- */
+ /* Rats, we have to visit the heap to check visibility */
+ if (!index_fetch_heap(index_scan, tableslot))
+ {
+ /*
+ * No visible tuple for this index entry, so we need to
+ * advance to the next entry. Before doing so, count heap
+ * page fetches and give up if we've done too many.
+ *
+ * We don't charge a page fetch if this is the same heap
+ * page as the previous tuple. This is on the
+ * conservative side, since other recently-accessed pages
+ * are probably still in buffers too; but it's good enough
+ * for this heuristic.
+ */
#define VISITED_PAGES_LIMIT 100
- if (block != last_heap_block)
- {
- last_heap_block = block;
- n_visited_heap_pages++;
- if (n_visited_heap_pages > VISITED_PAGES_LIMIT)
- break;
- }
+ if (block != last_heap_block)
+ {
+ last_heap_block = block;
+ n_visited_heap_pages++;
+ if (n_visited_heap_pages > VISITED_PAGES_LIMIT)
+ break;
+ }
- continue; /* no visible tuple, try next index entry */
- }
+ continue; /* no visible tuple, try next index entry */
+ }
- /* We don't actually need the heap tuple for anything */
- ExecClearTuple(tableslot);
+ /* We don't actually need the heap tuple for anything */
+ ExecClearTuple(tableslot);
- /*
- * We don't care whether there's more than one visible tuple in
- * the HOT chain; if any are visible, that's good enough.
- */
+ /*
+ * We don't care whether there's more than one visible tuple in
+ * the HOT chain; if any are visible, that's good enough.
+ */
+ break;
+ }
}
/*
diff --git a/src/include/access/relscan.h b/src/include/access/relscan.h
index 78989a959d4..76cf8d02795 100644
--- a/src/include/access/relscan.h
+++ b/src/include/access/relscan.h
@@ -178,6 +178,8 @@ typedef struct IndexScanDescData
bool xs_recheck; /* T means scan keys must be rechecked */
+ int xs_visrecheck; /* TM_VisCheckResult from tableam.h */
+
/*
* When fetching with an ordering operator, the values of the ORDER BY
* expressions of the last returned tuple, according to the index. If
--
2.50.1 (Apple Git-155)