v30-0006-Eliminate-XLOG_HEAP2_VISIBLE-from-vacuum-phase-I.patch
text/x-patch
Filename: v30-0006-Eliminate-XLOG_HEAP2_VISIBLE-from-vacuum-phase-I.patch
Type: text/x-patch
Part: 5
Patch
Same data as JSON:
GET /api/v1/attachments/:id/patch
the parsed metadata as JSON — format, series position, per-file stats; never the diff bytes.
API reference →
Format: format-patch
Series: patch v30-0006
Subject: Eliminate XLOG_HEAP2_VISIBLE from vacuum phase I prune/freeze
| File | + | − |
|---|---|---|
| src/backend/access/heap/pruneheap.c | 157 | 118 |
From eb94a7df040b6250d3ea3e0d1a79f24a3dc4fd6a Mon Sep 17 00:00:00 2001
From: Melanie Plageman <melanieplageman@gmail.com>
Date: Tue, 2 Dec 2025 16:16:22 -0500
Subject: [PATCH v30 06/16] Eliminate XLOG_HEAP2_VISIBLE from vacuum phase I
prune/freeze
Vacuum no longer emits a separate WAL record for each page set
all-visible or all-frozen during phase I. Instead, visibility map
updates are now included in the XLOG_HEAP2_PRUNE_VACUUM_SCAN record that
is already emitted for pruning and freezing.
Previously, heap_page_prune_and_freeze() determined whether a page was
all-visible, but the corresponding VM bits were only set later in
lazy_scan_prune(). Now the VM is updated immediately in
heap_page_prune_and_freeze(), at the same time as the heap
modifications.
This change applies only to vacuum phase I, not to pruning performed
during normal page access.
Author: Melanie Plageman <melanieplageman@gmail.com>
Reviewed-by: Andres Freund <andres@anarazel.de>
Reviewed-by: Robert Haas <robertmhaas@gmail.com>
Reviewed-by: Kirill Reshke <reshkekirill@gmail.com>
Reviewed-by: Chao Li <li.evan.chao@gmail.com>
Discussion: https://postgr.es/m/flat/CAAKRu_ZMw6Npd_qm2KM%2BFwQ3cMOMx1Dh3VMhp8-V7SOLxdK9-g%40mail.gmail.com
---
src/backend/access/heap/pruneheap.c | 275 ++++++++++++++++------------
1 file changed, 157 insertions(+), 118 deletions(-)
diff --git a/src/backend/access/heap/pruneheap.c b/src/backend/access/heap/pruneheap.c
index 7af6aea2d0e..49d3ebb0063 100644
--- a/src/backend/access/heap/pruneheap.c
+++ b/src/backend/access/heap/pruneheap.c
@@ -205,6 +205,11 @@ static bool heap_page_will_set_vm(PruneState *prstate,
int nlpdead_items,
uint8 *old_vmbits,
uint8 *new_vmbits);
+static TransactionId get_conflict_xid(bool do_prune, bool do_freeze, bool do_set_vm,
+ uint8 old_vmbits, uint8 new_vmbits,
+ TransactionId latest_xid_removed,
+ TransactionId frz_conflict_horizon,
+ TransactionId visibility_cutoff_xid);
/*
@@ -795,6 +800,68 @@ heap_page_will_freeze(Relation relation, Buffer buffer,
return do_freeze;
}
+/*
+ * Calculate the conflict horizon for the whole XLOG_HEAP2_PRUNE_VACUUM_SCAN
+ * or XLOG_HEAP2_PRUNE_ON_ACCESS record.
+ */
+static TransactionId
+get_conflict_xid(bool do_prune, bool do_freeze, bool do_set_vm,
+ uint8 old_vmbits, uint8 new_vmbits,
+ TransactionId latest_xid_removed, TransactionId frz_conflict_horizon,
+ TransactionId visibility_cutoff_xid)
+{
+ TransactionId conflict_xid;
+
+ /*
+ * We can omit the snapshot conflict horizon if we are not pruning or
+ * freezing any tuples and are setting an already all-visible page
+ * all-frozen in the VM. In this case, all of the tuples on the page must
+ * already be visible to all MVCC snapshots on the standby.
+ */
+ if (!do_prune &&
+ !do_freeze &&
+ do_set_vm &&
+ (old_vmbits & VISIBILITYMAP_ALL_VISIBLE) != 0 &&
+ (new_vmbits & VISIBILITYMAP_ALL_FROZEN) != 0)
+ return InvalidTransactionId;
+
+ /*
+ * The snapshotConflictHorizon for the whole record should be the most
+ * conservative of all the horizons calculated for any of the possible
+ * modifications. If this record will prune tuples, any transactions on
+ * the standby older than the youngest xmax of the most recently removed
+ * tuple this record will prune will conflict. If this record will freeze
+ * tuples, any transactions on the standby with xids older than the
+ * youngest tuple this record will freeze will conflict.
+ */
+ conflict_xid = InvalidTransactionId;
+
+ /*
+ * If we are updating the VM, the conflict horizon is almost always the
+ * visibility cutoff XID.
+ *
+ * Separately, if we are freezing any tuples, as an optimization, we can
+ * use the visibility_cutoff_xid as the conflict horizon if the page will
+ * be all-frozen. This is true even if there are LP_DEAD line pointers
+ * because we ignored those when maintaining the visibility_cutoff_xid.
+ * This will have been calculated earlier as the frz_conflict_horizon when
+ * we determined we would freeze.
+ */
+ if (do_set_vm)
+ conflict_xid = visibility_cutoff_xid;
+ else if (do_freeze)
+ conflict_xid = frz_conflict_horizon;
+
+ /*
+ * If we are removing tuples with a younger xmax than our so far
+ * calculated conflict_xid, we must use this as our horizon.
+ */
+ if (TransactionIdFollows(latest_xid_removed, conflict_xid))
+ conflict_xid = latest_xid_removed;
+
+ return conflict_xid;
+}
+
/*
* Helper to correct any corruption detected on a heap page and its
* corresponding visibility map page after pruning but before setting the
@@ -1010,7 +1077,6 @@ heap_page_prune_and_freeze(PruneFreezeParams *params,
Buffer vmbuffer = params->vmbuffer;
Page page = BufferGetPage(buffer);
BlockNumber blockno = BufferGetBlockNumber(buffer);
- TransactionId vm_conflict_horizon = InvalidTransactionId;
PruneState prstate;
bool do_freeze;
bool do_prune;
@@ -1018,6 +1084,7 @@ heap_page_prune_and_freeze(PruneFreezeParams *params,
bool do_set_vm;
bool did_tuple_hint_fpi;
int64 fpi_before = pgWalUsage.wal_fpi;
+ TransactionId conflict_xid;
uint8 new_vmbits;
uint8 old_vmbits;
@@ -1081,6 +1148,37 @@ heap_page_prune_and_freeze(PruneFreezeParams *params,
prstate.all_visible = prstate.all_frozen = false;
Assert(!prstate.all_frozen || prstate.all_visible);
+ Assert(!prstate.all_visible || (prstate.lpdead_items == 0));
+
+ /*
+ * Decide whether to set the VM bits based on information from the VM and
+ * the all_visible/all_frozen flags.
+ */
+ do_set_vm = heap_page_will_set_vm(&prstate,
+ params->relation,
+ blockno,
+ buffer,
+ page,
+ vmbuffer,
+ prstate.lpdead_items,
+ &old_vmbits,
+ &new_vmbits);
+
+ /*
+ * new_vmbits should be 0 regardless of whether or not the page is
+ * all-visible if we do not intend to set the VM.
+ */
+ Assert(do_set_vm || new_vmbits == 0);
+
+ conflict_xid = get_conflict_xid(do_prune, do_freeze, do_set_vm,
+ old_vmbits, new_vmbits,
+ prstate.latest_xid_removed,
+ prstate.frz_conflict_horizon,
+ prstate.visibility_cutoff_xid);
+
+ /* Lock vmbuffer before entering a critical section */
+ if (do_set_vm)
+ LockBuffer(vmbuffer, BUFFER_LOCK_EXCLUSIVE);
/* Any error while applying the changes is critical */
START_CRIT_SECTION();
@@ -1102,14 +1200,17 @@ heap_page_prune_and_freeze(PruneFreezeParams *params,
/*
* If that's all we had to do to the page, this is a non-WAL-logged
- * hint. If we are going to freeze or prune the page, we will mark
- * the buffer dirty below.
+ * hint. If we are going to freeze or prune the page or set
+ * PD_ALL_VISIBLE, we will mark the buffer dirty below.
+ *
+ * Setting PD_ALL_VISIBLE is fully WAL-logged because it is forbidden
+ * for the VM to be set and PD_ALL_VISIBLE to be clear.
*/
- if (!do_freeze && !do_prune)
+ if (!do_freeze && !do_prune && !do_set_vm)
MarkBufferDirtyHint(buffer, true);
}
- if (do_prune || do_freeze)
+ if (do_prune || do_freeze || do_set_vm)
{
/* Apply the planned item changes and repair page fragmentation. */
if (do_prune)
@@ -1123,6 +1224,26 @@ heap_page_prune_and_freeze(PruneFreezeParams *params,
if (do_freeze)
heap_freeze_prepared_tuples(buffer, prstate.frozen, prstate.nfrozen);
+ /* Set the visibility map and page visibility hint */
+ if (do_set_vm)
+ {
+ /*
+ * While it is valid for PD_ALL_VISIBLE to be set when the
+ * corresponding VM bit is clear, we strongly prefer to keep them
+ * in sync.
+ *
+ * The heap buffer must be marked dirty before adding it to the
+ * WAL chain when setting the VM. We don't worry about
+ * unnecessarily dirtying the heap buffer if PD_ALL_VISIBLE is
+ * already set, though. It is extremely rare to have a clean heap
+ * buffer with PD_ALL_VISIBLE already set and the VM bits clear,
+ * so there is no point in optimizing it.
+ */
+ PageSetAllVisible(page);
+ visibilitymap_set_vmbits(blockno, vmbuffer, new_vmbits,
+ params->relation->rd_locator);
+ }
+
MarkBufferDirty(buffer);
/*
@@ -1130,29 +1251,12 @@ heap_page_prune_and_freeze(PruneFreezeParams *params,
*/
if (RelationNeedsWAL(params->relation))
{
- /*
- * The snapshotConflictHorizon for the whole record should be the
- * most conservative of all the horizons calculated for any of the
- * possible modifications. If this record will prune tuples, any
- * transactions on the standby older than the youngest xmax of the
- * most recently removed tuple this record will prune will
- * conflict. If this record will freeze tuples, any transactions
- * on the standby with xids older than the youngest tuple this
- * record will freeze will conflict.
- */
- TransactionId conflict_xid;
-
- if (TransactionIdFollows(prstate.frz_conflict_horizon,
- prstate.latest_xid_removed))
- conflict_xid = prstate.frz_conflict_horizon;
- else
- conflict_xid = prstate.latest_xid_removed;
-
log_heap_prune_and_freeze(params->relation, buffer,
- InvalidBuffer, /* vmbuffer */
- 0, /* vmflags */
+ do_set_vm ? vmbuffer : InvalidBuffer,
+ do_set_vm ? new_vmbits : 0,
conflict_xid,
- true, params->reason,
+ true, /* cleanup lock */
+ params->reason,
prstate.frozen, prstate.nfrozen,
prstate.redirected, prstate.nredirected,
prstate.nowdead, prstate.ndead,
@@ -1162,43 +1266,8 @@ heap_page_prune_and_freeze(PruneFreezeParams *params,
END_CRIT_SECTION();
- /* Copy information back for caller */
- presult->ndeleted = prstate.ndeleted;
- presult->nnewlpdead = prstate.ndead;
- presult->nfrozen = prstate.nfrozen;
- presult->live_tuples = prstate.live_tuples;
- presult->recently_dead_tuples = prstate.recently_dead_tuples;
- presult->hastup = prstate.hastup;
-
- presult->lpdead_items = prstate.lpdead_items;
- /* the presult->deadoffsets array was already filled in */
-
- if (prstate.attempt_freeze)
- {
- if (presult->nfrozen > 0)
- {
- *new_relfrozen_xid = prstate.pagefrz.FreezePageRelfrozenXid;
- *new_relmin_mxid = prstate.pagefrz.FreezePageRelminMxid;
- }
- else
- {
- *new_relfrozen_xid = prstate.pagefrz.NoFreezePageRelfrozenXid;
- *new_relmin_mxid = prstate.pagefrz.NoFreezePageRelminMxid;
- }
- }
-
- /*
- * If updating the visibility map, the conflict horizon for that record
- * must be the newest xmin on the page. However, if the page is
- * completely frozen, there can be no conflict and the vm_conflict_horizon
- * should remain InvalidTransactionId. This includes the case that we
- * just froze all the tuples; the prune-freeze record included the
- * conflict XID already so we don't need to again.
- */
- if (prstate.all_frozen)
- vm_conflict_horizon = InvalidTransactionId;
- else
- vm_conflict_horizon = prstate.visibility_cutoff_xid;
+ if (do_set_vm)
+ LockBuffer(vmbuffer, BUFFER_LOCK_UNLOCK);
/*
* During its second pass over the heap, VACUUM calls
@@ -1213,7 +1282,8 @@ heap_page_prune_and_freeze(PruneFreezeParams *params,
TransactionId debug_cutoff;
bool debug_all_frozen;
- Assert(presult->lpdead_items == 0);
+ Assert(prstate.lpdead_items == 0);
+ Assert(prstate.cutoffs);
Assert(heap_page_is_all_visible(params->relation, buffer,
prstate.cutoffs->OldestXmin,
@@ -1223,67 +1293,36 @@ heap_page_prune_and_freeze(PruneFreezeParams *params,
Assert(prstate.all_frozen == debug_all_frozen);
Assert(!TransactionIdIsValid(debug_cutoff) ||
- debug_cutoff == vm_conflict_horizon);
+ debug_cutoff == prstate.visibility_cutoff_xid);
}
#endif
- /* Now update the visibility map and PD_ALL_VISIBLE hint */
- Assert(!prstate.all_visible || (prstate.lpdead_items == 0));
-
- do_set_vm = heap_page_will_set_vm(&prstate,
- params->relation,
- blockno,
- buffer,
- page,
- vmbuffer,
- prstate.lpdead_items,
- &old_vmbits,
- &new_vmbits);
+ /* Copy information back for caller */
+ presult->ndeleted = prstate.ndeleted;
+ presult->nnewlpdead = prstate.ndead;
+ presult->nfrozen = prstate.nfrozen;
+ presult->live_tuples = prstate.live_tuples;
+ presult->recently_dead_tuples = prstate.recently_dead_tuples;
+ presult->hastup = prstate.hastup;
+ presult->new_vmbits = new_vmbits;
+ presult->old_vmbits = old_vmbits;
- /*
- * new_vmbits should be 0 regardless of whether or not the page is
- * all-visible if we do not intend to set the VM.
- */
- Assert(do_set_vm || new_vmbits == 0);
+ presult->lpdead_items = prstate.lpdead_items;
+ /* the presult->deadoffsets array was already filled in */
- /* Set the visibility map and page visibility hint, if relevant */
- if (do_set_vm)
+ if (prstate.attempt_freeze)
{
- Assert(prstate.all_visible);
-
- /*
- * It should never be the case that the visibility map page is set
- * while the page-level bit is clear, but the reverse is allowed (if
- * checksums are not enabled). Regardless, set both bits so that we
- * get back in sync.
- *
- * The heap buffer must be marked dirty before adding it to the WAL
- * chain when setting the VM. We don't worry about unnecessarily
- * dirtying the heap buffer if PD_ALL_VISIBLE is already set, though.
- * It is extremely rare to have a clean heap buffer with
- * PD_ALL_VISIBLE already set and the VM bits clear, so there is no
- * point in optimizing it.
- */
- PageSetAllVisible(page);
- MarkBufferDirty(buffer);
-
- /*
- * If the page is being set all-frozen, we pass InvalidTransactionId
- * as the cutoff_xid, since a snapshot conflict horizon sufficient to
- * make everything safe for REDO was logged when the page's tuples
- * were frozen.
- */
- Assert(!prstate.all_frozen || !TransactionIdIsValid(vm_conflict_horizon));
-
- visibilitymap_set(params->relation, blockno, buffer,
- InvalidXLogRecPtr,
- vmbuffer, vm_conflict_horizon,
- new_vmbits);
+ if (presult->nfrozen > 0)
+ {
+ *new_relfrozen_xid = prstate.pagefrz.FreezePageRelfrozenXid;
+ *new_relmin_mxid = prstate.pagefrz.FreezePageRelminMxid;
+ }
+ else
+ {
+ *new_relfrozen_xid = prstate.pagefrz.NoFreezePageRelfrozenXid;
+ *new_relmin_mxid = prstate.pagefrz.NoFreezePageRelminMxid;
+ }
}
-
- /* Save the vmbits for caller */
- presult->old_vmbits = old_vmbits;
- presult->new_vmbits = new_vmbits;
}
--
2.43.0