heap-hot-search-refactoring.patch
application/octet-stream
Filename: heap-hot-search-refactoring.patch
Type: application/octet-stream
Part: 0
Message:
heap_hot_search_buffer refactoring
Patch
Same data as JSON:
GET /api/v1/attachments/:id/patch
the parsed metadata as JSON — format, series position, per-file stats; never the diff bytes.
API reference →
Format: unified
| File | + | − |
|---|---|---|
| src/backend/access/heap/heapam.c | 36 | 25 |
| src/backend/access/index/genam.c | 1 | 3 |
| src/backend/access/index/indexam.c | 21 | 152 |
| src/backend/executor/nodeBitmapHeapscan.c | 3 | 1 |
| src/include/access/heapam.h | 2 | 1 |
| src/include/access/relscan.h | 1 | 3 |
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index 01a492e..078d4ef 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -1522,28 +1522,31 @@ heap_fetch(Relation relation,
*/
bool
heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer,
- Snapshot snapshot, bool *all_dead)
+ Snapshot snapshot, HeapTuple heapTuple,
+ bool *all_dead, bool first_call)
{
Page dp = (Page) BufferGetPage(buffer);
TransactionId prev_xmax = InvalidTransactionId;
OffsetNumber offnum;
bool at_chain_start;
bool valid;
+ bool skip;
+ /* If this is not the first call, previous call returned a (live!) tuple */
if (all_dead)
- *all_dead = true;
+ *all_dead = !first_call;
Assert(TransactionIdIsValid(RecentGlobalXmin));
Assert(ItemPointerGetBlockNumber(tid) == BufferGetBlockNumber(buffer));
offnum = ItemPointerGetOffsetNumber(tid);
- at_chain_start = true;
+ at_chain_start = first_call;
+ skip = !first_call;
/* Scan through possible multiple members of HOT-chain */
for (;;)
{
ItemId lp;
- HeapTupleData heapTuple;
/* check for bogus TID */
if (offnum < FirstOffsetNumber || offnum > PageGetMaxOffsetNumber(dp))
@@ -1566,15 +1569,15 @@ heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer,
break;
}
- heapTuple.t_data = (HeapTupleHeader) PageGetItem(dp, lp);
- heapTuple.t_len = ItemIdGetLength(lp);
- heapTuple.t_tableOid = relation->rd_id;
- heapTuple.t_self = *tid;
+ heapTuple->t_data = (HeapTupleHeader) PageGetItem(dp, lp);
+ heapTuple->t_len = ItemIdGetLength(lp);
+ heapTuple->t_tableOid = relation->rd_id;
+ heapTuple->t_self = *tid;
/*
* Shouldn't see a HEAP_ONLY tuple at chain start.
*/
- if (at_chain_start && HeapTupleIsHeapOnly(&heapTuple))
+ if (at_chain_start && HeapTupleIsHeapOnly(heapTuple))
break;
/*
@@ -1583,20 +1586,26 @@ heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer,
*/
if (TransactionIdIsValid(prev_xmax) &&
!TransactionIdEquals(prev_xmax,
- HeapTupleHeaderGetXmin(heapTuple.t_data)))
+ HeapTupleHeaderGetXmin(heapTuple->t_data)))
break;
- /* If it's visible per the snapshot, we must return it */
- valid = HeapTupleSatisfiesVisibility(&heapTuple, snapshot, buffer);
- CheckForSerializableConflictOut(valid, relation, &heapTuple, buffer);
- if (valid)
+ /* On resuming a scan, first tuple already returned, so skip it. */
+ if (!skip)
{
- ItemPointerSetOffsetNumber(tid, offnum);
- PredicateLockTuple(relation, &heapTuple);
- if (all_dead)
- *all_dead = false;
- return true;
+ /* If it's visible per the snapshot, we must return it */
+ valid = HeapTupleSatisfiesVisibility(heapTuple, snapshot, buffer);
+ CheckForSerializableConflictOut(valid, relation, heapTuple,
+ buffer);
+ if (valid)
+ {
+ ItemPointerSetOffsetNumber(tid, offnum);
+ PredicateLockTuple(relation, heapTuple);
+ if (all_dead)
+ *all_dead = false;
+ return true;
+ }
}
+ skip = false;
/*
* If we can't see it, maybe no one else can either. At caller
@@ -1604,7 +1613,7 @@ heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer,
* transactions.
*/
if (all_dead && *all_dead &&
- HeapTupleSatisfiesVacuum(heapTuple.t_data, RecentGlobalXmin,
+ HeapTupleSatisfiesVacuum(heapTuple->t_data, RecentGlobalXmin,
buffer) != HEAPTUPLE_DEAD)
*all_dead = false;
@@ -1612,13 +1621,13 @@ heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer,
* Check to see if HOT chain continues past this tuple; if so fetch
* the next offnum and loop around.
*/
- if (HeapTupleIsHotUpdated(&heapTuple))
+ if (HeapTupleIsHotUpdated(heapTuple))
{
- Assert(ItemPointerGetBlockNumber(&heapTuple.t_data->t_ctid) ==
+ Assert(ItemPointerGetBlockNumber(&heapTuple->t_data->t_ctid) ==
ItemPointerGetBlockNumber(tid));
- offnum = ItemPointerGetOffsetNumber(&heapTuple.t_data->t_ctid);
+ offnum = ItemPointerGetOffsetNumber(&heapTuple->t_data->t_ctid);
at_chain_start = false;
- prev_xmax = HeapTupleHeaderGetXmax(heapTuple.t_data);
+ prev_xmax = HeapTupleHeaderGetXmax(heapTuple->t_data);
}
else
break; /* end of chain */
@@ -1640,10 +1649,12 @@ heap_hot_search(ItemPointer tid, Relation relation, Snapshot snapshot,
{
bool result;
Buffer buffer;
+ HeapTupleData heapTuple;
buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid));
LockBuffer(buffer, BUFFER_LOCK_SHARE);
- result = heap_hot_search_buffer(tid, relation, buffer, snapshot, all_dead);
+ result = heap_hot_search_buffer(tid, relation, buffer, snapshot,
+ &heapTuple, all_dead, true);
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
ReleaseBuffer(buffer);
return result;
diff --git a/src/backend/access/index/genam.c b/src/backend/access/index/genam.c
index db04e26..fe3aa3c 100644
--- a/src/backend/access/index/genam.c
+++ b/src/backend/access/index/genam.c
@@ -113,9 +113,7 @@ RelationGetIndexScan(Relation indexRelation, int nkeys, int norderbys)
ItemPointerSetInvalid(&scan->xs_ctup.t_self);
scan->xs_ctup.t_data = NULL;
scan->xs_cbuf = InvalidBuffer;
- scan->xs_hot_dead = false;
- scan->xs_next_hot = InvalidOffsetNumber;
- scan->xs_prev_xmax = InvalidTransactionId;
+ scan->xs_continue_hot = false;
return scan;
}
diff --git a/src/backend/access/index/indexam.c b/src/backend/access/index/indexam.c
index 27c37d6..1e22d9b 100644
--- a/src/backend/access/index/indexam.c
+++ b/src/backend/access/index/indexam.c
@@ -335,7 +335,7 @@ index_rescan(IndexScanDesc scan,
scan->xs_cbuf = InvalidBuffer;
}
- scan->xs_next_hot = InvalidOffsetNumber;
+ scan->xs_continue_hot = false;
scan->kill_prior_tuple = false; /* for safety */
@@ -417,7 +417,7 @@ index_restrpos(IndexScanDesc scan)
SCAN_CHECKS;
GET_SCAN_PROCEDURE(amrestrpos);
- scan->xs_next_hot = InvalidOffsetNumber;
+ scan->xs_continue_hot = false;
scan->kill_prior_tuple = false; /* for safety */
@@ -443,26 +443,18 @@ index_getnext(IndexScanDesc scan, ScanDirection direction)
HeapTuple heapTuple = &scan->xs_ctup;
ItemPointer tid = &heapTuple->t_self;
FmgrInfo *procedure;
+ bool all_dead = false;
SCAN_CHECKS;
GET_SCAN_PROCEDURE(amgettuple);
Assert(TransactionIdIsValid(RecentGlobalXmin));
- /*
- * We always reset xs_hot_dead; if we are here then either we are just
- * starting the scan, or we previously returned a visible tuple, and in
- * either case it's inappropriate to kill the prior index entry.
- */
- scan->xs_hot_dead = false;
-
for (;;)
{
- OffsetNumber offnum;
- bool at_chain_start;
- Page dp;
+ bool got_heap_tuple;
- if (scan->xs_next_hot != InvalidOffsetNumber)
+ if (scan->xs_continue_hot)
{
/*
* We are resuming scan of a HOT chain after having returned an
@@ -471,10 +463,6 @@ index_getnext(IndexScanDesc scan, ScanDirection direction)
Assert(BufferIsValid(scan->xs_cbuf));
Assert(ItemPointerGetBlockNumber(tid) ==
BufferGetBlockNumber(scan->xs_cbuf));
- Assert(TransactionIdIsValid(scan->xs_prev_xmax));
- offnum = scan->xs_next_hot;
- at_chain_start = false;
- scan->xs_next_hot = InvalidOffsetNumber;
}
else
{
@@ -488,7 +476,7 @@ index_getnext(IndexScanDesc scan, ScanDirection direction)
* comments in RelationGetIndexScan().
*/
if (!scan->xactStartedInRecovery)
- scan->kill_prior_tuple = scan->xs_hot_dead;
+ scan->kill_prior_tuple = all_dead;
/*
* The AM's gettuple proc finds the next index entry matching the
@@ -521,150 +509,31 @@ index_getnext(IndexScanDesc scan, ScanDirection direction)
if (prev_buf != scan->xs_cbuf)
heap_page_prune_opt(scan->heapRelation, scan->xs_cbuf,
RecentGlobalXmin);
-
- /* Prepare to scan HOT chain starting at index-referenced offnum */
- offnum = ItemPointerGetOffsetNumber(tid);
- at_chain_start = true;
-
- /* We don't know what the first tuple's xmin should be */
- scan->xs_prev_xmax = InvalidTransactionId;
-
- /* Initialize flag to detect if all entries are dead */
- scan->xs_hot_dead = true;
}
/* Obtain share-lock on the buffer so we can examine visibility */
LockBuffer(scan->xs_cbuf, BUFFER_LOCK_SHARE);
+ got_heap_tuple = heap_hot_search_buffer(tid, scan->heapRelation,
+ scan->xs_cbuf,
+ scan->xs_snapshot,
+ &scan->xs_ctup,
+ &all_dead,
+ !scan->xs_continue_hot);
+ LockBuffer(scan->xs_cbuf, BUFFER_LOCK_UNLOCK);
- dp = (Page) BufferGetPage(scan->xs_cbuf);
-
- /* Scan through possible multiple members of HOT-chain */
- for (;;)
+ if (got_heap_tuple)
{
- ItemId lp;
- ItemPointer ctid;
- bool valid;
-
- /* check for bogus TID */
- if (offnum < FirstOffsetNumber ||
- offnum > PageGetMaxOffsetNumber(dp))
- break;
-
- lp = PageGetItemId(dp, offnum);
-
- /* check for unused, dead, or redirected items */
- if (!ItemIdIsNormal(lp))
- {
- /* We should only see a redirect at start of chain */
- if (ItemIdIsRedirected(lp) && at_chain_start)
- {
- /* Follow the redirect */
- offnum = ItemIdGetRedirect(lp);
- at_chain_start = false;
- continue;
- }
- /* else must be end of chain */
- break;
- }
-
- /*
- * We must initialize all of *heapTuple (ie, scan->xs_ctup) since
- * it is returned to the executor on success.
- */
- heapTuple->t_data = (HeapTupleHeader) PageGetItem(dp, lp);
- heapTuple->t_len = ItemIdGetLength(lp);
- ItemPointerSetOffsetNumber(tid, offnum);
- heapTuple->t_tableOid = RelationGetRelid(scan->heapRelation);
- ctid = &heapTuple->t_data->t_ctid;
-
/*
- * Shouldn't see a HEAP_ONLY tuple at chain start. (This test
- * should be unnecessary, since the chain root can't be removed
- * while we have pin on the index entry, but let's make it
- * anyway.)
+ * Only in a non-MVCC snapshot can more than one member of the
+ * HOT chain be visible.
*/
- if (at_chain_start && HeapTupleIsHeapOnly(heapTuple))
- break;
-
- /*
- * The xmin should match the previous xmax value, else chain is
- * broken. (Note: this test is not optional because it protects
- * us against the case where the prior chain member's xmax aborted
- * since we looked at it.)
- */
- if (TransactionIdIsValid(scan->xs_prev_xmax) &&
- !TransactionIdEquals(scan->xs_prev_xmax,
- HeapTupleHeaderGetXmin(heapTuple->t_data)))
- break;
-
- /* If it's visible per the snapshot, we must return it */
- valid = HeapTupleSatisfiesVisibility(heapTuple, scan->xs_snapshot,
- scan->xs_cbuf);
-
- CheckForSerializableConflictOut(valid, scan->heapRelation,
- heapTuple, scan->xs_cbuf);
-
- if (valid)
- {
- /*
- * If the snapshot is MVCC, we know that it could accept at
- * most one member of the HOT chain, so we can skip examining
- * any more members. Otherwise, check for continuation of the
- * HOT-chain, and set state for next time.
- */
- if (IsMVCCSnapshot(scan->xs_snapshot))
- scan->xs_next_hot = InvalidOffsetNumber;
- else if (HeapTupleIsHotUpdated(heapTuple))
- {
- Assert(ItemPointerGetBlockNumber(ctid) ==
- ItemPointerGetBlockNumber(tid));
- scan->xs_next_hot = ItemPointerGetOffsetNumber(ctid);
- scan->xs_prev_xmax = HeapTupleHeaderGetXmax(heapTuple->t_data);
- }
- else
- scan->xs_next_hot = InvalidOffsetNumber;
-
- PredicateLockTuple(scan->heapRelation, heapTuple);
-
- LockBuffer(scan->xs_cbuf, BUFFER_LOCK_UNLOCK);
-
- pgstat_count_heap_fetch(scan->indexRelation);
-
- return heapTuple;
- }
-
- /*
- * If we can't see it, maybe no one else can either. Check to see
- * if the tuple is dead to all transactions. If we find that all
- * the tuples in the HOT chain are dead, we'll signal the index AM
- * to not return that TID on future indexscans.
- */
- if (scan->xs_hot_dead &&
- HeapTupleSatisfiesVacuum(heapTuple->t_data, RecentGlobalXmin,
- scan->xs_cbuf) != HEAPTUPLE_DEAD)
- scan->xs_hot_dead = false;
-
- /*
- * Check to see if HOT chain continues past this tuple; if so
- * fetch the next offnum (we don't bother storing it into
- * xs_next_hot, but must store xs_prev_xmax), and loop around.
- */
- if (HeapTupleIsHotUpdated(heapTuple))
- {
- Assert(ItemPointerGetBlockNumber(ctid) ==
- ItemPointerGetBlockNumber(tid));
- offnum = ItemPointerGetOffsetNumber(ctid);
- at_chain_start = false;
- scan->xs_prev_xmax = HeapTupleHeaderGetXmax(heapTuple->t_data);
- }
- else
- break; /* end of chain */
- } /* loop over a single HOT chain */
-
- LockBuffer(scan->xs_cbuf, BUFFER_LOCK_UNLOCK);
+ scan->xs_continue_hot = !IsMVCCSnapshot(scan->xs_snapshot);
+ pgstat_count_heap_fetch(scan->indexRelation);
+ return heapTuple;
+ }
/* Loop around to ask index AM for another TID */
- scan->xs_next_hot = InvalidOffsetNumber;
+ scan->xs_continue_hot = false;
}
/* Release any held pin on a heap page */
diff --git a/src/backend/executor/nodeBitmapHeapscan.c b/src/backend/executor/nodeBitmapHeapscan.c
index 20d5eb1..4bc4e39 100644
--- a/src/backend/executor/nodeBitmapHeapscan.c
+++ b/src/backend/executor/nodeBitmapHeapscan.c
@@ -350,9 +350,11 @@ bitgetpage(HeapScanDesc scan, TBMIterateResult *tbmres)
{
OffsetNumber offnum = tbmres->offsets[curslot];
ItemPointerData tid;
+ HeapTupleData heapTuple;
ItemPointerSet(&tid, page, offnum);
- if (heap_hot_search_buffer(&tid, scan->rs_rd, buffer, snapshot, NULL))
+ if (heap_hot_search_buffer(&tid, scan->rs_rd, buffer, snapshot,
+ &heapTuple, NULL, true))
scan->rs_vistuples[ntup++] = ItemPointerGetOffsetNumber(&tid);
}
}
diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h
index 4dbc393..cf0c044 100644
--- a/src/include/access/heapam.h
+++ b/src/include/access/heapam.h
@@ -83,7 +83,8 @@ extern bool heap_fetch(Relation relation, Snapshot snapshot,
HeapTuple tuple, Buffer *userbuf, bool keep_buf,
Relation stats_relation);
extern bool heap_hot_search_buffer(ItemPointer tid, Relation relation,
- Buffer buffer, Snapshot snapshot, bool *all_dead);
+ Buffer buffer, Snapshot snapshot, HeapTuple heapTuple,
+ bool *all_dead, bool first_call);
extern bool heap_hot_search(ItemPointer tid, Relation relation,
Snapshot snapshot, bool *all_dead);
diff --git a/src/include/access/relscan.h b/src/include/access/relscan.h
index 7663033..db1131e 100644
--- a/src/include/access/relscan.h
+++ b/src/include/access/relscan.h
@@ -84,9 +84,7 @@ typedef struct IndexScanDescData
bool xs_recheck; /* T means scan keys must be rechecked */
/* state data for traversing HOT chains in index_getnext */
- bool xs_hot_dead; /* T if all members of HOT chain are dead */
- OffsetNumber xs_next_hot; /* next member of HOT chain, if any */
- TransactionId xs_prev_xmax; /* previous HOT chain member's XMAX, if any */
+ bool xs_continue_hot; /* T if must keep walking HOT chain */
} IndexScanDescData;
/* Struct for heap-or-index scans of system tables */