v30-0015-Allow-on-access-pruning-to-set-pages-all-visible.patch
text/x-patch
Filename: v30-0015-Allow-on-access-pruning-to-set-pages-all-visible.patch
Type: text/x-patch
Part: 14
Patch
Same data as JSON:
GET /api/v1/attachments/:id/patch
the parsed metadata as JSON — format, series position, per-file stats; never the diff bytes.
API reference →
Format: format-patch
Series: patch v30-0015
Subject: Allow on-access pruning to set pages all-visible
| File | + | − |
|---|---|---|
| src/backend/access/heap/heapam.c | 14 | 1 |
| src/backend/access/heap/heapam_handler.c | 13 | 2 |
| src/backend/access/heap/pruneheap.c | 39 | 1 |
| src/include/access/heapam.h | 21 | 3 |
| src/test/recovery/t/035_standby_logical_decoding.pl | 2 | 1 |
From 665f41020eeea237c5538d679ae248161257a87b Mon Sep 17 00:00:00 2001
From: Melanie Plageman <melanieplageman@gmail.com>
Date: Wed, 3 Dec 2025 15:24:08 -0500
Subject: [PATCH v30 15/16] Allow on-access pruning to set pages all-visible
Many queries do not modify the underlying relation. For such queries, if
on-access pruning occurs during the scan, we can check whether the page
has become all-visible and update the visibility map accordingly.
Previously, only vacuum and COPY FREEZE marked pages as all-visible or
all-frozen.
This commit implements on-access VM setting for sequential scans as well
as for the underlying heap relation in index scans and bitmap heap
scans.
Author: Melanie Plageman <melanieplageman@gmail.com>
Reviewed-by: Andres Freund <andres@anarazel.de>
Reviewed-by: Kirill Reshke <reshkekirill@gmail.com>
Discussion: https://postgr.es/m/flat/CAAKRu_ZMw6Npd_qm2KM%2BFwQ3cMOMx1Dh3VMhp8-V7SOLxdK9-g%40mail.gmail.com
---
src/backend/access/heap/heapam.c | 15 ++++++-
src/backend/access/heap/heapam_handler.c | 15 ++++++-
src/backend/access/heap/pruneheap.c | 40 ++++++++++++++++++-
src/include/access/heapam.h | 24 +++++++++--
.../t/035_standby_logical_decoding.pl | 3 +-
5 files changed, 89 insertions(+), 8 deletions(-)
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index fb7a7548aa0..d9dc79f4a96 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -570,6 +570,7 @@ heap_prepare_pagescan(TableScanDesc sscan)
Buffer buffer = scan->rs_cbuf;
BlockNumber block = scan->rs_cblock;
Snapshot snapshot;
+ Buffer *vmbuffer = NULL;
Page page;
int lines;
bool all_visible;
@@ -584,7 +585,9 @@ heap_prepare_pagescan(TableScanDesc sscan)
/*
* Prune and repair fragmentation for the whole page, if possible.
*/
- heap_page_prune_opt(scan->rs_base.rs_rd, buffer);
+ if (sscan->rs_flags & SO_HINT_REL_READ_ONLY)
+ vmbuffer = &scan->rs_vmbuffer;
+ heap_page_prune_opt(scan->rs_base.rs_rd, buffer, vmbuffer);
/*
* We must hold share lock on the buffer content while examining tuple
@@ -1261,6 +1264,7 @@ heap_beginscan(Relation relation, Snapshot snapshot,
sizeof(TBMIterateResult));
}
+ scan->rs_vmbuffer = InvalidBuffer;
return (TableScanDesc) scan;
}
@@ -1299,6 +1303,12 @@ heap_rescan(TableScanDesc sscan, ScanKey key, bool set_params,
scan->rs_cbuf = InvalidBuffer;
}
+ if (BufferIsValid(scan->rs_vmbuffer))
+ {
+ ReleaseBuffer(scan->rs_vmbuffer);
+ scan->rs_vmbuffer = InvalidBuffer;
+ }
+
/*
* SO_TYPE_BITMAPSCAN would be cleaned up here, but it does not hold any
* additional data vs a normal HeapScan
@@ -1331,6 +1341,9 @@ heap_endscan(TableScanDesc sscan)
if (BufferIsValid(scan->rs_cbuf))
ReleaseBuffer(scan->rs_cbuf);
+ if (BufferIsValid(scan->rs_vmbuffer))
+ ReleaseBuffer(scan->rs_vmbuffer);
+
/*
* Must free the read stream before freeing the BufferAccessStrategy.
*/
diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c
index fc251e11f8a..6946da8c9d7 100644
--- a/src/backend/access/heap/heapam_handler.c
+++ b/src/backend/access/heap/heapam_handler.c
@@ -85,6 +85,7 @@ heapam_index_fetch_begin(Relation rel, uint32 flags)
hscan->xs_base.rel = rel;
hscan->xs_cbuf = InvalidBuffer;
+ hscan->xs_vmbuffer = InvalidBuffer;
hscan->modifies_base_rel = !(flags & SO_HINT_REL_READ_ONLY);
return &hscan->xs_base;
@@ -100,6 +101,12 @@ heapam_index_fetch_reset(IndexFetchTableData *scan)
ReleaseBuffer(hscan->xs_cbuf);
hscan->xs_cbuf = InvalidBuffer;
}
+
+ if (BufferIsValid(hscan->xs_vmbuffer))
+ {
+ ReleaseBuffer(hscan->xs_vmbuffer);
+ hscan->xs_vmbuffer = InvalidBuffer;
+ }
}
static void
@@ -139,7 +146,8 @@ heapam_index_fetch_tuple(struct IndexFetchTableData *scan,
* Prune page, but only if we weren't already on this page
*/
if (prev_buf != hscan->xs_cbuf)
- heap_page_prune_opt(hscan->xs_base.rel, hscan->xs_cbuf);
+ heap_page_prune_opt(hscan->xs_base.rel, hscan->xs_cbuf,
+ hscan->modifies_base_rel ? NULL : &hscan->xs_vmbuffer);
}
/* Obtain share-lock on the buffer so we can examine visibility */
@@ -2477,6 +2485,7 @@ BitmapHeapScanNextBlock(TableScanDesc scan,
TBMIterateResult *tbmres;
OffsetNumber offsets[TBM_MAX_TUPLES_PER_PAGE];
int noffsets = -1;
+ Buffer *vmbuffer = NULL;
Assert(scan->rs_flags & SO_TYPE_BITMAPSCAN);
Assert(hscan->rs_read_stream);
@@ -2523,7 +2532,9 @@ BitmapHeapScanNextBlock(TableScanDesc scan,
/*
* Prune and repair fragmentation for the whole page, if possible.
*/
- heap_page_prune_opt(scan->rs_rd, buffer);
+ if (scan->rs_flags & SO_HINT_REL_READ_ONLY)
+ vmbuffer = &hscan->rs_vmbuffer;
+ heap_page_prune_opt(scan->rs_rd, buffer, vmbuffer);
/*
* We must hold share lock on the buffer content while examining tuple
diff --git a/src/backend/access/heap/pruneheap.c b/src/backend/access/heap/pruneheap.c
index 8e59e7692c1..f414f02964d 100644
--- a/src/backend/access/heap/pruneheap.c
+++ b/src/backend/access/heap/pruneheap.c
@@ -202,6 +202,8 @@ static bool heap_page_will_set_vm(PruneState *prstate,
Relation relation,
BlockNumber heap_blk, Buffer heap_buffer, Page heap_page,
Buffer vmbuffer,
+ PruneReason reason,
+ bool do_prune, bool do_freeze,
int nlpdead_items,
uint8 *old_vmbits,
uint8 *new_vmbits);
@@ -223,9 +225,13 @@ static TransactionId get_conflict_xid(bool do_prune, bool do_freeze, bool do_set
* if there's not any use in pruning.
*
* Caller must have pin on the buffer, and must *not* have a lock on it.
+ *
+ * If vmbuffer is not NULL, it is okay for pruning to set the visibility map if
+ * the page is all-visible. We will take care of pinning and, if needed,
+ * reading in the page of the visibility map.
*/
void
-heap_page_prune_opt(Relation relation, Buffer buffer)
+heap_page_prune_opt(Relation relation, Buffer buffer, Buffer *vmbuffer)
{
Page page = BufferGetPage(buffer);
TransactionId prune_xid;
@@ -306,6 +312,13 @@ heap_page_prune_opt(Relation relation, Buffer buffer)
.cutoffs = NULL,
};
+ if (vmbuffer)
+ {
+ visibilitymap_pin(relation, BufferGetBlockNumber(buffer), vmbuffer);
+ params.options |= HEAP_PAGE_PRUNE_UPDATE_VM;
+ params.vmbuffer = *vmbuffer;
+ }
+
heap_page_prune_and_freeze(¶ms, &presult, &dummy_off_loc,
NULL, NULL);
@@ -951,6 +964,9 @@ identify_and_fix_vm_corruption(Relation rel, Buffer heap_buffer,
* corrupted, it will fix them by clearing the VM bits and visibility hint.
* This does not need to be done in a critical section.
*
+ * This should be called only after do_freeze has been decided (and do_prune
+ * has been set), as these factor into our heuristic-based decision.
+ *
* Returns true if one or both VM bits should be set, along with returning the
* current value of the VM bits in *old_vmbits and the desired new value of
* the VM bits in *new_vmbits.
@@ -964,6 +980,8 @@ heap_page_will_set_vm(PruneState *prstate,
Relation relation,
BlockNumber heap_blk, Buffer heap_buffer, Page heap_page,
Buffer vmbuffer,
+ PruneReason reason,
+ bool do_prune, bool do_freeze,
int nlpdead_items,
uint8 *old_vmbits,
uint8 *new_vmbits)
@@ -974,6 +992,24 @@ heap_page_will_set_vm(PruneState *prstate,
if (!prstate->attempt_update_vm)
return false;
+ /*
+ * If this is an on-access call and we're not actually pruning, avoid
+ * setting the visibility map if it would newly dirty the heap page or, if
+ * the page is already dirty, if doing so would require including a
+ * full-page image (FPI) of the heap page in the WAL. This situation
+ * should be rare, as on-access pruning is only attempted when
+ * pd_prune_xid is valid.
+ */
+ if (reason == PRUNE_ON_ACCESS &&
+ prstate->all_visible &&
+ !do_prune && !do_freeze &&
+ (!BufferIsDirty(heap_buffer) || XLogCheckBufferNeedsBackup(heap_buffer)))
+ {
+ prstate->all_visible = false;
+ prstate->all_frozen = false;
+ return false;
+ }
+
*old_vmbits = visibilitymap_get_status(relation, heap_blk,
&vmbuffer);
@@ -1171,6 +1207,8 @@ heap_page_prune_and_freeze(PruneFreezeParams *params,
buffer,
page,
vmbuffer,
+ params->reason,
+ do_prune, do_freeze,
prstate.lpdead_items,
&old_vmbits,
&new_vmbits);
diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h
index ba62a4d4cba..b0e7c71463c 100644
--- a/src/include/access/heapam.h
+++ b/src/include/access/heapam.h
@@ -95,6 +95,13 @@ typedef struct HeapScanDescData
*/
ParallelBlockTableScanWorkerData *rs_parallelworkerdata;
+ /*
+ * For sequential scans and bitmap heap scans. If the relation is not
+ * being modified, on-access pruning may read in the current heap page's
+ * corresponding VM block to this buffer.
+ */
+ Buffer rs_vmbuffer;
+
/* these fields only used in page-at-a-time mode and for bitmap scans */
uint32 rs_cindex; /* current tuple's index in vistuples */
uint32 rs_ntuples; /* number of visible tuples on page */
@@ -117,8 +124,18 @@ typedef struct IndexFetchHeapData
{
IndexFetchTableData xs_base; /* AM independent part of the descriptor */
- Buffer xs_cbuf; /* current heap buffer in scan, if any */
- /* NB: if xs_cbuf is not InvalidBuffer, we hold a pin on that buffer */
+ /*
+ * Current heap buffer in scan, if any. NB: if xs_cbuf is not
+ * InvalidBuffer, we hold a pin on that buffer.
+ */
+ Buffer xs_cbuf;
+
+ /*
+ * For index scans that do not modify the underlying heap table, on-access
+ * pruning may read in the current heap page's corresponding VM block to
+ * this buffer.
+ */
+ Buffer xs_vmbuffer;
/*
* Some optimizations can only be performed if the query does not modify
@@ -419,7 +436,8 @@ extern TransactionId heap_index_delete_tuples(Relation rel,
TM_IndexDeleteOp *delstate);
/* in heap/pruneheap.c */
-extern void heap_page_prune_opt(Relation relation, Buffer buffer);
+extern void heap_page_prune_opt(Relation relation, Buffer buffer,
+ Buffer *vmbuffer);
extern void heap_page_prune_and_freeze(PruneFreezeParams *params,
PruneFreezeResult *presult,
OffsetNumber *off_loc,
diff --git a/src/test/recovery/t/035_standby_logical_decoding.pl b/src/test/recovery/t/035_standby_logical_decoding.pl
index ebe2fae1789..bdd9f0a62cd 100644
--- a/src/test/recovery/t/035_standby_logical_decoding.pl
+++ b/src/test/recovery/t/035_standby_logical_decoding.pl
@@ -296,6 +296,7 @@ wal_level = 'logical'
max_replication_slots = 4
max_wal_senders = 4
autovacuum = off
+hot_standby_feedback = on
});
$node_primary->dump_info;
$node_primary->start;
@@ -748,7 +749,7 @@ check_pg_recvlogical_stderr($handle,
$logstart = -s $node_standby->logfile;
reactive_slots_change_hfs_and_wait_for_xmins('shared_row_removal_',
- 'no_conflict_', 0, 1);
+ 'no_conflict_', 1, 0);
# This should not trigger a conflict
wait_until_vacuum_can_remove(
--
2.43.0