Single-Pass-Vacuum-v1.patch
text/x-patch
Filename: Single-Pass-Vacuum-v1.patch
Type: text/x-patch
Part: 0
Message:
Single pass vacuum - take 1
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index 01a492e..12918d2 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -3912,7 +3912,8 @@ log_heap_clean(Relation reln, Buffer buffer,
OffsetNumber *redirected, int nredirected,
OffsetNumber *nowdead, int ndead,
OffsetNumber *nowunused, int nunused,
- TransactionId latestRemovedXid)
+ TransactionId latestRemovedXid,
+ bool hasvaclsn, XLogRecPtr indexvaclsn)
{
xl_heap_clean xlrec;
uint8 info;
@@ -3927,6 +3928,8 @@ log_heap_clean(Relation reln, Buffer buffer,
xlrec.latestRemovedXid = latestRemovedXid;
xlrec.nredirected = nredirected;
xlrec.ndead = ndead;
+ xlrec.hasvaclsn = hasvaclsn;
+ xlrec.indexvaclsn = indexvaclsn;
rdata[0].data = (char *) &xlrec;
rdata[0].len = SizeOfHeapClean;
@@ -4196,6 +4199,8 @@ heap_xlog_clean(XLogRecPtr lsn, XLogRecord *record)
int ndead;
int nunused;
Size freespace;
+ bool hasvaclsn;
+ XLogRecPtr indexvaclsn;
/*
* We're about to remove tuples. In Hot Standby mode, ensure that there's
@@ -4228,6 +4233,8 @@ heap_xlog_clean(XLogRecPtr lsn, XLogRecord *record)
nredirected = xlrec->nredirected;
ndead = xlrec->ndead;
+ hasvaclsn = xlrec->hasvaclsn;
+ indexvaclsn = xlrec->indexvaclsn;
end = (OffsetNumber *) ((char *) xlrec + record->xl_len);
redirected = (OffsetNumber *) ((char *) xlrec + SizeOfHeapClean);
nowdead = redirected + (nredirected * 2);
@@ -4239,7 +4246,8 @@ heap_xlog_clean(XLogRecPtr lsn, XLogRecord *record)
heap_page_prune_execute(buffer,
redirected, nredirected,
nowdead, ndead,
- nowunused, nunused);
+ nowunused, nunused,
+ hasvaclsn, indexvaclsn);
freespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */
diff --git a/src/backend/access/heap/pruneheap.c b/src/backend/access/heap/pruneheap.c
index 0cfa866..e402f51 100644
--- a/src/backend/access/heap/pruneheap.c
+++ b/src/backend/access/heap/pruneheap.c
@@ -31,9 +31,13 @@ typedef struct
TransactionId new_prune_xid; /* new prune hint value for page */
TransactionId latestRemovedXid; /* latest xid to be removed by this
* prune */
+ bool need_vaclsn; /* track if the page needs a vacuum lsn */
+ int already_dead; /* number of already dead line pointers */
+
int nredirected; /* numbers of entries in arrays below */
int ndead;
int nunused;
+
/* arrays that accumulate indexes of items to be changed */
OffsetNumber redirected[MaxHeapTuplesPerPage * 2];
OffsetNumber nowdead[MaxHeapTuplesPerPage];
@@ -74,6 +78,7 @@ heap_page_prune_opt(Relation relation, Buffer buffer, TransactionId OldestXmin)
{
Page page = BufferGetPage(buffer);
Size minfree;
+ XLogRecPtr invalid_lsn = {0, 0};
/*
* Let's see if we really need pruning.
@@ -126,7 +131,7 @@ heap_page_prune_opt(Relation relation, Buffer buffer, TransactionId OldestXmin)
* needed */
/* OK to prune */
- (void) heap_page_prune(relation, buffer, OldestXmin, true, &ignore);
+ (void) heap_page_prune(relation, buffer, OldestXmin, true, &ignore, invalid_lsn);
}
/* And release buffer lock */
@@ -153,7 +158,8 @@ heap_page_prune_opt(Relation relation, Buffer buffer, TransactionId OldestXmin)
*/
int
heap_page_prune(Relation relation, Buffer buffer, TransactionId OldestXmin,
- bool report_stats, TransactionId *latestRemovedXid)
+ bool report_stats, TransactionId *latestRemovedXid,
+ XLogRecPtr indexvaclsn)
{
int ndeleted = 0;
Page page = BufferGetPage(buffer);
@@ -175,8 +181,37 @@ heap_page_prune(Relation relation, Buffer buffer, TransactionId OldestXmin,
prstate.new_prune_xid = InvalidTransactionId;
prstate.latestRemovedXid = InvalidTransactionId;
prstate.nredirected = prstate.ndead = prstate.nunused = 0;
+ prstate.need_vaclsn = false;
+ prstate.already_dead = 0;
memset(prstate.marked, 0, sizeof(prstate.marked));
+ /*
+ * Check if the page has any dead-vacuumed line pointers. If the vacuum
+ * that created these dead-vacuumed line pointers has successfully
+ * completed, we can now remove those line pointers.
+ */
+ if (PageHasVacuumLSN(page))
+ {
+ XLogRecPtr pagevaclsn = PageGetVacuumLSN(page);
+ XLogRecPtr last_indexvaclsn;
+
+ prstate.need_vaclsn = true;
+
+ last_indexvaclsn.xlogid = relation->rd_rel->relindxvacxlogid;
+ last_indexvaclsn.xrecoff = relation->rd_rel->relindxvacxlogoff;
+
+ if (!XLogRecPtrIsInvalid(last_indexvaclsn) &&
+ !XLogRecPtrIsInvalid(pagevaclsn) &&
+ XLByteEQ(pagevaclsn, last_indexvaclsn))
+ {
+ /*
+ * Existing dead-vacuumed line pointers can be removed and the
+ * page no longer needs a vacuum lsn
+ */
+ prstate.need_vaclsn = false;
+ }
+ }
+
/* Scan the page */
maxoff = PageGetMaxOffsetNumber(page);
for (offnum = FirstOffsetNumber;
@@ -191,8 +226,26 @@ heap_page_prune(Relation relation, Buffer buffer, TransactionId OldestXmin,
/* Nothing to do if slot is empty or already dead */
itemid = PageGetItemId(page, offnum);
- if (!ItemIdIsUsed(itemid) || ItemIdIsDead(itemid))
+ if (!ItemIdIsUsed(itemid))
continue;
+
+ /*
+ * If the slot is dead-vacuumed and we know that the index pointers
+ * have already been vacuumed by the last index vacuum, just mark them
+ * unused so that they are removed when we defrag the page
+ */
+ if (ItemIdIsDeadVacuumed(itemid))
+ {
+ if (!prstate.need_vaclsn)
+ heap_prune_record_unused(&prstate, offnum);
+ continue;
+ }
+ else if (ItemIdIsDead(itemid))
+ {
+ heap_prune_record_dead(&prstate, offnum);
+ prstate.already_dead++;
+ continue;
+ }
/* Process this item or chain of items */
ndeleted += heap_prune_chain(relation, buffer, offnum,
@@ -213,7 +266,8 @@ heap_page_prune(Relation relation, Buffer buffer, TransactionId OldestXmin,
heap_page_prune_execute(buffer,
prstate.redirected, prstate.nredirected,
prstate.nowdead, prstate.ndead,
- prstate.nowunused, prstate.nunused);
+ prstate.nowunused, prstate.nunused,
+ prstate.need_vaclsn, indexvaclsn);
/*
* Update the page's pd_prune_xid field to either zero, or the lowest
@@ -241,7 +295,8 @@ heap_page_prune(Relation relation, Buffer buffer, TransactionId OldestXmin,
prstate.redirected, prstate.nredirected,
prstate.nowdead, prstate.ndead,
prstate.nowunused, prstate.nunused,
- prstate.latestRemovedXid);
+ prstate.latestRemovedXid, prstate.need_vaclsn,
+ indexvaclsn);
PageSetLSN(BufferGetPage(buffer), recptr);
PageSetTLI(BufferGetPage(buffer), ThisTimeLineID);
@@ -273,9 +328,12 @@ heap_page_prune(Relation relation, Buffer buffer, TransactionId OldestXmin,
* If requested, report the number of tuples reclaimed to pgstats. This is
* ndeleted minus ndead, because we don't want to count a now-DEAD root
* item as a deletion for this purpose.
+ *
+ * Adjust already_dead since they are counted as ndead and we really don't
+ * want to include them here
*/
- if (report_stats && ndeleted > prstate.ndead)
- pgstat_update_heap_dead_tuples(relation, ndeleted - prstate.ndead);
+ if (report_stats && ndeleted > (prstate.ndead - prstate.already_dead))
+ pgstat_update_heap_dead_tuples(relation, ndeleted - (prstate.ndead - prstate.already_dead));
*latestRemovedXid = prstate.latestRemovedXid;
@@ -645,7 +703,8 @@ void
heap_page_prune_execute(Buffer buffer,
OffsetNumber *redirected, int nredirected,
OffsetNumber *nowdead, int ndead,
- OffsetNumber *nowunused, int nunused)
+ OffsetNumber *nowunused, int nunused,
+ bool need_vaclsn, XLogRecPtr indexvaclsn)
{
Page page = (Page) BufferGetPage(buffer);
OffsetNumber *offnum;
@@ -669,7 +728,14 @@ heap_page_prune_execute(Buffer buffer,
OffsetNumber off = *offnum++;
ItemId lp = PageGetItemId(page, off);
- ItemIdSetDead(lp);
+ /*
+ * If indexvaclsn is not invalid, we are being called from a vacuum and
+ * we can mark the dead line pointers as dead-vacuumed
+ */
+ if (XLogRecPtrIsInvalid(indexvaclsn))
+ ItemIdSetDead(lp);
+ else
+ ItemIdSetDeadVacuumed(lp);
}
/* Update all now-unused line pointers */
@@ -684,9 +750,14 @@ heap_page_prune_execute(Buffer buffer,
/*
* Finally, repair any fragmentation, and update the page's hint bit about
- * whether it has free pointers.
+ * whether it has free pointers. Also, make room for storing vacuum lsn if
+ * required
*/
- PageRepairFragmentation(page);
+ PageRepairFragmentation(page,
+ (!XLogRecPtrIsInvalid(indexvaclsn) && ndead > 0) || need_vaclsn);
+
+ if (PageHasVacuumLSN(page) && !XLogRecPtrIsInvalid(indexvaclsn))
+ PageSetVacuumLSN(page, indexvaclsn);
}
diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c
index 71c9931..e60583d 100644
--- a/src/backend/catalog/heap.c
+++ b/src/backend/catalog/heap.c
@@ -770,6 +770,8 @@ InsertPgClassTuple(Relation pg_class_desc,
values[Anum_pg_class_relhastriggers - 1] = BoolGetDatum(rd_rel->relhastriggers);
values[Anum_pg_class_relhassubclass - 1] = BoolGetDatum(rd_rel->relhassubclass);
values[Anum_pg_class_relfrozenxid - 1] = TransactionIdGetDatum(rd_rel->relfrozenxid);
+ values[Anum_pg_class_relindxvacxlogid - 1] = Int32GetDatum(rd_rel->relindxvacxlogid);
+ values[Anum_pg_class_relindxvacxlogoff - 1] = Int32GetDatum(rd_rel->relindxvacxlogoff);
if (relacl != (Datum) 0)
values[Anum_pg_class_relacl - 1] = relacl;
else
@@ -864,6 +866,9 @@ AddNewRelationTuple(Relation pg_class_desc,
new_rel_reltup->relfrozenxid = InvalidTransactionId;
}
+ new_rel_reltup->relindxvacxlogid = 0;
+ new_rel_reltup->relindxvacxlogoff = 0;
+
new_rel_reltup->relowner = relowner;
new_rel_reltup->reltype = new_type_oid;
new_rel_reltup->reloftype = reloftype;
diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c
index fa84989..4aa49f6 100644
--- a/src/backend/commands/analyze.c
+++ b/src/backend/commands/analyze.c
@@ -1056,6 +1056,7 @@ acquire_sample_rows(Relation onerel, HeapTuple *rows, int targrows,
Page targpage;
OffsetNumber targoffset,
maxoffset;
+ bool vacuum_dead = false;
vacuum_delay_point();
@@ -1074,6 +1075,32 @@ acquire_sample_rows(Relation onerel, HeapTuple *rows, int targrows,
targpage = BufferGetPage(targbuffer);
maxoffset = PageGetMaxOffsetNumber(targpage);
+ /*
+ * Single-pass vacuum can leave a large number of dead line pointers in
+ * the heap and those would be cleaned up either during the next vacuum
+ * cycle or HOT-cleanup. If the page vacuum LSN is equal to the index
+ * vacuum LSN, the next HOT cleanup can remove the dead line pointers
+ * and we don't need a vacuum to remove those dead line pointers. OTOH
+ * if the dead line pointers were generated after we remoted the index
+ * pointers last time, we need a vacuum cycle to remove them.
+ */
+ if (PageHasVacuumLSN(targpage))
+ {
+ XLogRecPtr pagevaclsn = PageGetVacuumLSN(targpage);
+ XLogRecPtr indexvaclsn;
+
+ indexvaclsn.xlogid = onerel->rd_rel->relindxvacxlogid;
+ indexvaclsn.xrecoff = onerel->rd_rel->relindxvacxlogoff;
+
+ if (!XLogRecPtrIsInvalid(indexvaclsn) &&
+ !XLogRecPtrIsInvalid(pagevaclsn) &&
+ XLByteEQ(pagevaclsn, indexvaclsn))
+ {
+ vacuum_dead = true;
+ }
+ }
+
+
/* Inner loop over all tuples on the selected page */
for (targoffset = FirstOffsetNumber; targoffset <= maxoffset; targoffset++)
{
@@ -1088,11 +1115,20 @@ acquire_sample_rows(Relation onerel, HeapTuple *rows, int targrows,
* pointers should be counted as dead, because we need vacuum to
* run to get rid of them. Note that this rule agrees with the
* way that heap_page_prune() counts things.
+ *
+ * XXX We don't count dead line pointers if know that they can be
+ * removed by a HOT cleanup.
*/
if (!ItemIdIsNormal(itemid))
{
- if (ItemIdIsDead(itemid))
- deadrows += 1;
+ if (ItemIdIsDeadVacuumed(itemid))
+ {
+ if (!vacuum_dead)
+ deadrows += 1;
+ }
+ else if (ItemIdIsDead(itemid))
+ deadrows++;
+
continue;
}
diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c
index 224c34f..7b393fb 100644
--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -634,6 +634,36 @@ vac_update_relstats(Relation relation,
heap_close(rd, RowExclusiveLock);
}
+/*
+ * Update the lsn of the last successful index vacuum. We can fold this into
+ * the vac_update_relstats routine, but we want this to run irresective of
+ * whether we scanned all the pages or not, but that may be a small issue to
+ * handle. Nevertheless, keep this as a separate routine for now.
+ */
+void
+vac_update_indexvaclsn(Relation relation, XLogRecPtr indexvaclsn)
+{
+ Oid relid = RelationGetRelid(relation);
+ Relation rd;
+ HeapTuple ctup;
+ Form_pg_class pgcform;
+
+ rd = heap_open(RelationRelationId, RowExclusiveLock);
+
+ /* Fetch a copy of the tuple to scribble on */
+ ctup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relid));
+ if (!HeapTupleIsValid(ctup))
+ elog(ERROR, "pg_class entry for relid %u vanished during vacuuming",
+ relid);
+ pgcform = (Form_pg_class) GETSTRUCT(ctup);
+
+ pgcform->relindxvacxlogid = indexvaclsn.xlogid;
+ pgcform->relindxvacxlogoff = indexvaclsn.xrecoff;
+
+ heap_inplace_update(rd, ctup);
+
+ heap_close(rd, RowExclusiveLock);
+}
/*
* vac_update_datfrozenxid() -- update pg_database.datfrozenxid for our DB
diff --git a/src/backend/commands/vacuumlazy.c b/src/backend/commands/vacuumlazy.c
index ce5fa18..5fd90df 100644
--- a/src/backend/commands/vacuumlazy.c
+++ b/src/backend/commands/vacuumlazy.c
@@ -99,6 +99,7 @@ typedef struct LVRelStats
ItemPointer dead_tuples; /* array of ItemPointerData */
int num_index_scans;
TransactionId latestRemovedXid;
+ XLogRecPtr indexvaclsn;
} LVRelStats;
@@ -114,15 +115,12 @@ static BufferAccessStrategy vac_strategy;
/* non-export function prototypes */
static void lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
Relation *Irel, int nindexes, bool scan_all);
-static void lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats);
static void lazy_vacuum_index(Relation indrel,
IndexBulkDeleteResult **stats,
LVRelStats *vacrelstats);
static void lazy_cleanup_index(Relation indrel,
IndexBulkDeleteResult *stats,
LVRelStats *vacrelstats);
-static int lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer,
- int tupindex, LVRelStats *vacrelstats);
static void lazy_truncate_heap(Relation onerel, LVRelStats *vacrelstats);
static BlockNumber count_nondeletable_pages(Relation onerel,
LVRelStats *vacrelstats);
@@ -215,6 +213,10 @@ lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt,
InvalidTransactionId :
FreezeLimit);
+ /* Since vacuum ran to completion, remember the vacuum lsn */
+ if (!XLogRecPtrIsInvalid(vacrelstats->indexvaclsn))
+ vac_update_indexvaclsn(onerel, vacrelstats->indexvaclsn);
+
/* report results to the stats collector, too */
pgstat_report_vacuum(RelationGetRelid(onerel),
onerel->rd_rel->relisshared,
@@ -307,6 +309,7 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
Buffer vmbuffer = InvalidBuffer;
BlockNumber next_not_all_visible_block;
bool skipping_all_visible_blocks;
+ XLogRecPtr start_lsn_index_vacuum = {0, 0};
pg_rusage_init(&ru0);
@@ -319,6 +322,21 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
empty_pages = vacuumed_pages = 0;
num_tuples = tups_vacuumed = nkeep = nunused = 0;
+ /*
+ * Grab the index vacuum LSN. We should do this before calculating the
+ * number of blocks in the relation and starting the heap scan.
+ * If don't grab the index vacuum LSN now and rather defer it to just
+ * before the index vacuum starts, we will miss the dead line pointers that
+ * get generated after we scanned a page, but before calling index vacuum.
+ * Similarly, we will miss the dead line pointers generated in the new
+ * pages added to the end of the relation.
+ *
+ * Note: By grabing the LSN here, we might be setting index vacuum LSN a bit
+ * conservatively. Similarlym GetInsertRecPtr() may return a slightly old
+ * LSN, but that does not create any correctness issue either.
+ */
+ start_lsn_index_vacuum = GetInsertRecPtr();
+
indstats = (IndexBulkDeleteResult **)
palloc0(nindexes * sizeof(IndexBulkDeleteResult *));
@@ -432,8 +450,6 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
lazy_vacuum_index(Irel[i],
&indstats[i],
vacrelstats);
- /* Remove tuples from heap */
- lazy_vacuum_heap(onerel, vacrelstats);
/*
* Forget the now-vacuumed tuples, and press on, but be careful
@@ -528,7 +544,8 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
* We count tuples removed by the pruning step as removed by VACUUM.
*/
tups_vacuumed += heap_page_prune(onerel, buf, OldestXmin, false,
- &vacrelstats->latestRemovedXid);
+ &vacrelstats->latestRemovedXid,
+ start_lsn_index_vacuum);
/*
* Now scan the page to collect vacuumable items and check for tuples
@@ -712,24 +729,13 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
}
}
+ vacuumed_pages++;
+
/*
- * If there are no indexes then we can vacuum the page right now
- * instead of doing a second scan.
+ * If there are no indexes, we don't need to remember the dead tuples
*/
- if (nindexes == 0 &&
- vacrelstats->num_dead_tuples > 0)
- {
- /* Remove tuples from heap */
- lazy_vacuum_page(onerel, blkno, buf, 0, vacrelstats);
-
- /*
- * Forget the now-vacuumed tuples, and press on, but be careful
- * not to reset latestRemovedXid since we want that value to be
- * valid.
- */
+ if (nindexes == 0)
vacrelstats->num_dead_tuples = 0;
- vacuumed_pages++;
- }
freespace = PageGetHeapFreeSpace(page);
@@ -792,9 +798,11 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
* its post-compaction free space. If not, then we're done with this
* page, so remember its free space as-is. (This path will always be
* taken if there are no indexes.)
+ *
+ * For single pass vacuum, we must record the free space now since
+ * there is no second pass.
*/
- if (vacrelstats->num_dead_tuples == prev_dead_count)
- RecordPageWithFreeSpace(onerel, blkno, freespace);
+ RecordPageWithFreeSpace(onerel, blkno, freespace);
}
/* save stats for use later */
@@ -819,8 +827,6 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
lazy_vacuum_index(Irel[i],
&indstats[i],
vacrelstats);
- /* Remove tuples from heap */
- lazy_vacuum_heap(onerel, vacrelstats);
vacrelstats->num_index_scans++;
}
@@ -831,6 +837,9 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
vmbuffer = InvalidBuffer;
}
+ if (!XLogRecPtrIsInvalid(start_lsn_index_vacuum))
+ vacrelstats->indexvaclsn = start_lsn_index_vacuum;
+
/* Do post-vacuum cleanup and statistics update for each index */
for (i = 0; i < nindexes; i++)
lazy_cleanup_index(Irel[i], indstats[i], vacrelstats);
@@ -857,118 +866,6 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
pg_rusage_show(&ru0))));
}
-
-/*
- * lazy_vacuum_heap() -- second pass over the heap
- *
- * This routine marks dead tuples as unused and compacts out free
- * space on their pages. Pages not having dead tuples recorded from
- * lazy_scan_heap are not visited at all.
- *
- * Note: the reason for doing this as a second pass is we cannot remove
- * the tuples until we've removed their index entries, and we want to
- * process index entry removal in batches as large as possible.
- */
-static void
-lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats)
-{
- int tupindex;
- int npages;
- PGRUsage ru0;
-
- pg_rusage_init(&ru0);
- npages = 0;
-
- tupindex = 0;
- while (tupindex < vacrelstats->num_dead_tuples)
- {
- BlockNumber tblk;
- Buffer buf;
- Page page;
- Size freespace;
-
- vacuum_delay_point();
-
- tblk = ItemPointerGetBlockNumber(&vacrelstats->dead_tuples[tupindex]);
- buf = ReadBufferExtended(onerel, MAIN_FORKNUM, tblk, RBM_NORMAL,
- vac_strategy);
- LockBufferForCleanup(buf);
- tupindex = lazy_vacuum_page(onerel, tblk, buf, tupindex, vacrelstats);
-
- /* Now that we've compacted the page, record its available space */
- page = BufferGetPage(buf);
- freespace = PageGetHeapFreeSpace(page);
-
- UnlockReleaseBuffer(buf);
- RecordPageWithFreeSpace(onerel, tblk, freespace);
- npages++;
- }
-
- ereport(elevel,
- (errmsg("\"%s\": removed %d row versions in %d pages",
- RelationGetRelationName(onerel),
- tupindex, npages),
- errdetail("%s.",
- pg_rusage_show(&ru0))));
-}
-
-/*
- * lazy_vacuum_page() -- free dead tuples on a page
- * and repair its fragmentation.
- *
- * Caller must hold pin and buffer cleanup lock on the buffer.
- *
- * tupindex is the index in vacrelstats->dead_tuples of the first dead
- * tuple for this page. We assume the rest follow sequentially.
- * The return value is the first tupindex after the tuples of this page.
- */
-static int
-lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer,
- int tupindex, LVRelStats *vacrelstats)
-{
- Page page = BufferGetPage(buffer);
- OffsetNumber unused[MaxOffsetNumber];
- int uncnt = 0;
-
- START_CRIT_SECTION();
-
- for (; tupindex < vacrelstats->num_dead_tuples; tupindex++)
- {
- BlockNumber tblk;
- OffsetNumber toff;
- ItemId itemid;
-
- tblk = ItemPointerGetBlockNumber(&vacrelstats->dead_tuples[tupindex]);
- if (tblk != blkno)
- break; /* past end of tuples for this block */
- toff = ItemPointerGetOffsetNumber(&vacrelstats->dead_tuples[tupindex]);
- itemid = PageGetItemId(page, toff);
- ItemIdSetUnused(itemid);
- unused[uncnt++] = toff;
- }
-
- PageRepairFragmentation(page);
-
- MarkBufferDirty(buffer);
-
- /* XLOG stuff */
- if (RelationNeedsWAL(onerel))
- {
- XLogRecPtr recptr;
-
- recptr = log_heap_clean(onerel, buffer,
- NULL, 0, NULL, 0,
- unused, uncnt,
- vacrelstats->latestRemovedXid);
- PageSetLSN(page, recptr);
- PageSetTLI(page, ThisTimeLineID);
- }
-
- END_CRIT_SECTION();
-
- return tupindex;
-}
-
/*
* lazy_vacuum_index() -- vacuum one index relation.
*
@@ -1195,9 +1092,10 @@ count_nondeletable_pages(Relation onerel, LVRelStats *vacrelstats)
* Note: any non-unused item should be taken as a reason to keep
* this page. We formerly thought that DEAD tuples could be
* thrown away, but that's not so, because we'd not have cleaned
- * out their index entries.
+ * out their index entries. We can throw away DEAD VACUUMED tuples
+ * though since their index entries must have been removed by now
*/
- if (ItemIdIsUsed(itemid))
+ if (ItemIdIsUsed(itemid) && !ItemIdIsDeadVacuumed(itemid))
{
hastup = true;
break; /* can stop scanning */
diff --git a/src/backend/storage/page/bufpage.c b/src/backend/storage/page/bufpage.c
index 6bd3812..b36c24d 100644
--- a/src/backend/storage/page/bufpage.c
+++ b/src/backend/storage/page/bufpage.c
@@ -350,7 +350,12 @@ itemoffcompare(const void *itemidp1, const void *itemidp2)
/*
* PageRepairFragmentation
*
- * Frees fragmented space on a page.
+ * Frees fragmented space on a page, optionally allocating space for storing a
+ * vacuum LSN as part of the special area (and this applies only to heap pages).
+ * If there is not enough free space to store the vacuum LSN, just keep the
+ * existing special area unchanged. The caller should be aware of the
+ * possibility and make appropriate choice
+ *
* It doesn't remove unused line pointers! Please don't change this.
*
* This routine is usable for heap pages only, but see PageIndexMultiDelete.
@@ -358,11 +363,13 @@ itemoffcompare(const void *itemidp1, const void *itemidp2)
* As a side effect, the page's PD_HAS_FREE_LINES hint bit is updated.
*/
void
-PageRepairFragmentation(Page page)
+PageRepairFragmentation(Page page, bool need_vaclsn)
{
Offset pd_lower = ((PageHeader) page)->pd_lower;
Offset pd_upper = ((PageHeader) page)->pd_upper;
Offset pd_special = ((PageHeader) page)->pd_special;
+ Offset new_pd_special;
+ Size specialSize = need_vaclsn ? MAXALIGN(sizeof (XLogRecPtr)) : 0;
itemIdSort itemidbase,
itemidptr;
ItemId lp;
@@ -390,6 +397,8 @@ PageRepairFragmentation(Page page)
errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
pd_lower, pd_upper, pd_special)));
+ new_pd_special = BLCKSZ - specialSize;
+
nline = PageGetMaxOffsetNumber(page);
nunused = nstorage = 0;
for (i = FirstOffsetNumber; i <= nline; i++)
@@ -411,10 +420,12 @@ PageRepairFragmentation(Page page)
if (nstorage == 0)
{
/* Page is completely empty, so just reset it quickly */
- ((PageHeader) page)->pd_upper = pd_special;
+ ((PageHeader) page)->pd_upper = ((PageHeader) page)->pd_special = new_pd_special;
}
else
{ /* nstorage != 0 */
+ Offset adjust;
+
/* Need to compact the page the hard way */
itemidbase = (itemIdSort) palloc(sizeof(itemIdSortData) * nstorage);
itemidptr = itemidbase;
@@ -444,10 +455,24 @@ PageRepairFragmentation(Page page)
errmsg("corrupted item lengths: total %u, available space %u",
(unsigned int) totallen, pd_special - pd_lower)));
+ /*
+ * If there is not enough space to increase the special area, just
+ * preserve the existing special area. The caller should better check
+ * if the special area has been increased or not before writing
+ * anything to it
+ */
+ if (totallen > (Size) (new_pd_special - pd_lower))
+ {
+ new_pd_special = pd_special;
+ need_vaclsn = false;
+ }
+
/* sort itemIdSortData array into decreasing itemoff order */
qsort((char *) itemidbase, nstorage, sizeof(itemIdSortData),
itemoffcompare);
+ adjust = pd_special - new_pd_special;
+
/* compactify page */
upper = pd_special;
@@ -458,14 +483,25 @@ PageRepairFragmentation(Page page)
memmove((char *) page + upper,
(char *) page + itemidptr->itemoff,
itemidptr->alignedlen);
- lp->lp_off = upper;
+ lp->lp_off = upper - adjust;
}
- ((PageHeader) page)->pd_upper = upper;
+ if (adjust != 0)
+ memmove((char *) page + upper - adjust,
+ (char *) page + upper, pd_special - upper);
+
+ ((PageHeader) page)->pd_upper = upper - adjust;
+ ((PageHeader) page)->pd_special = new_pd_special;
pfree(itemidbase);
}
+ /* Record that the page now contains vacuum lsn */
+ if (need_vaclsn)
+ PageSetHasVacuumLSN(page);
+ else
+ PageClearHasVacuumLSN(page);
+
/* Set hint bit for PageAddItem */
if (nunused > 0)
PageSetHasFreeLinePointers(page);
@@ -828,3 +864,43 @@ PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems)
pfree(itemidbase);
}
+
+/*
+ * Get the LSN of the most recent retail-vacuum operation on the page that
+ * created a dead line pointer on the page. If there are no dead line pointers,
+ * just return an invalid XLogRecPtr
+ */
+XLogRecPtr
+PageGetVacuumLSN(Page page)
+{
+ XLogRecPtr vaclsn;
+
+ /* Set the LSN to invalid */
+ vaclsn.xrecoff = 0;
+
+ if (PageHasVacuumLSN(page))
+ {
+ Assert(PageGetSpecialSize(page) == MAXALIGN(sizeof (XLogRecPtr)));
+ vaclsn = *((XLogRecPtr *) PageGetSpecialPointer(page));
+ }
+
+ return vaclsn;
+}
+
+/*
+ * Set the LSN of the recent retail-vacuum operation that generated a dead line
+ * pointer on the page, overwriting any existing value. The function is
+ * called while holding a buffer clean-up lock on the page, but the caller
+ * should have already made room for the LSN. If there is no special space
+ * available, just return without writing anything.
+ */
+void
+PageSetVacuumLSN(Page page, XLogRecPtr vaclsn)
+{
+ Assert(!XLogRecPtrIsInvalid(vaclsn));
+
+ if (PageGetSpecialSize(page) != MAXALIGN(sizeof (XLogRecPtr)))
+ return;
+
+ *((XLogRecPtr *) PageGetSpecialPointer(page)) = vaclsn;
+}
diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h
index 4dbc393..c44db1a 100644
--- a/src/include/access/heapam.h
+++ b/src/include/access/heapam.h
@@ -132,7 +132,8 @@ extern XLogRecPtr log_heap_clean(Relation reln, Buffer buffer,
OffsetNumber *redirected, int nredirected,
OffsetNumber *nowdead, int ndead,
OffsetNumber *nowunused, int nunused,
- TransactionId latestRemovedXid);
+ TransactionId latestRemovedXid,
+ bool need_vaclsn, XLogRecPtr indexvaclsn);
extern XLogRecPtr log_heap_freeze(Relation reln, Buffer buffer,
TransactionId cutoff_xid,
OffsetNumber *offsets, int offcnt);
@@ -144,11 +145,13 @@ extern void heap_page_prune_opt(Relation relation, Buffer buffer,
TransactionId OldestXmin);
extern int heap_page_prune(Relation relation, Buffer buffer,
TransactionId OldestXmin,
- bool report_stats, TransactionId *latestRemovedXid);
+ bool report_stats, TransactionId *latestRemovedXid,
+ XLogRecPtr indexvaclsn);
extern void heap_page_prune_execute(Buffer buffer,
OffsetNumber *redirected, int nredirected,
OffsetNumber *nowdead, int ndead,
- OffsetNumber *nowunused, int nunused);
+ OffsetNumber *nowunused, int nunused,
+ bool need_vaclsn, XLogRecPtr indexvaclsn);
extern void heap_get_root_tuples(Page page, OffsetNumber *root_offsets);
/* in heap/syncscan.c */
diff --git a/src/include/access/htup.h b/src/include/access/htup.h
index c147707..08633e5 100644
--- a/src/include/access/htup.h
+++ b/src/include/access/htup.h
@@ -16,6 +16,7 @@
#include "access/tupdesc.h"
#include "access/tupmacs.h"
+#include "access/xlogdefs.h"
#include "storage/itemptr.h"
#include "storage/relfilenode.h"
@@ -689,10 +690,12 @@ typedef struct xl_heap_clean
TransactionId latestRemovedXid;
uint16 nredirected;
uint16 ndead;
+ bool hasvaclsn;
+ XLogRecPtr indexvaclsn;
/* OFFSET NUMBERS FOLLOW */
} xl_heap_clean;
-#define SizeOfHeapClean (offsetof(xl_heap_clean, ndead) + sizeof(uint16))
+#define SizeOfHeapClean (offsetof(xl_heap_clean, indexvaclsn) + sizeof(XLogRecPtr))
/*
* Cleanup_info is required in some cases during a lazy VACUUM.
diff --git a/src/include/catalog/pg_class.h b/src/include/catalog/pg_class.h
index ffcce3c..008680e 100644
--- a/src/include/catalog/pg_class.h
+++ b/src/include/catalog/pg_class.h
@@ -65,6 +65,8 @@ CATALOG(pg_class,1259) BKI_BOOTSTRAP BKI_ROWTYPE_OID(83) BKI_SCHEMA_MACRO
bool relhastriggers; /* has (or has had) any TRIGGERs */
bool relhassubclass; /* has (or has had) derived classes */
TransactionId relfrozenxid; /* all Xids < this are frozen in this rel */
+ int4 relindxvacxlogid; /* start LSN of the last successful index vacuum */
+ int4 relindxvacxlogoff;
/*
* VARIABLE LENGTH FIELDS start here. These fields may be NULL, too.
@@ -78,7 +80,7 @@ CATALOG(pg_class,1259) BKI_BOOTSTRAP BKI_ROWTYPE_OID(83) BKI_SCHEMA_MACRO
/* Size of fixed part of pg_class tuples, not counting var-length fields */
#define CLASS_TUPLE_SIZE \
- (offsetof(FormData_pg_class,relfrozenxid) + sizeof(TransactionId))
+ (offsetof(FormData_pg_class,relindxvacxlogoff) + sizeof(int4))
/* ----------------
* Form_pg_class corresponds to a pointer to a tuple with
@@ -92,7 +94,7 @@ typedef FormData_pg_class *Form_pg_class;
* ----------------
*/
-#define Natts_pg_class 26
+#define Natts_pg_class 28
#define Anum_pg_class_relname 1
#define Anum_pg_class_relnamespace 2
#define Anum_pg_class_reltype 3
@@ -117,8 +119,10 @@ typedef FormData_pg_class *Form_pg_class;
#define Anum_pg_class_relhastriggers 22
#define Anum_pg_class_relhassubclass 23
#define Anum_pg_class_relfrozenxid 24
-#define Anum_pg_class_relacl 25
-#define Anum_pg_class_reloptions 26
+#define Anum_pg_class_relindxvacxlogid 25
+#define Anum_pg_class_relindxvacxlogoff 26
+#define Anum_pg_class_relacl 27
+#define Anum_pg_class_reloptions 28
/* ----------------
* initial contents of pg_class
@@ -130,13 +134,13 @@ typedef FormData_pg_class *Form_pg_class;
*/
/* Note: "3" in the relfrozenxid column stands for FirstNormalTransactionId */
-DATA(insert OID = 1247 ( pg_type PGNSP 71 0 PGUID 0 0 0 0 0 0 0 f f p r 29 0 t f f f f 3 _null_ _null_ ));
+DATA(insert OID = 1247 ( pg_type PGNSP 71 0 PGUID 0 0 0 0 0 0 0 f f p r 29 0 t f f f f 3 0 0 _null_ _null_ ));
DESCR("");
-DATA(insert OID = 1249 ( pg_attribute PGNSP 75 0 PGUID 0 0 0 0 0 0 0 f f p r 20 0 f f f f f 3 _null_ _null_ ));
+DATA(insert OID = 1249 ( pg_attribute PGNSP 75 0 PGUID 0 0 0 0 0 0 0 f f p r 20 0 f f f f f 3 0 0 _null_ _null_ ));
DESCR("");
-DATA(insert OID = 1255 ( pg_proc PGNSP 81 0 PGUID 0 0 0 0 0 0 0 f f p r 25 0 t f f f f 3 _null_ _null_ ));
+DATA(insert OID = 1255 ( pg_proc PGNSP 81 0 PGUID 0 0 0 0 0 0 0 f f p r 25 0 t f f f f 3 0 0 _null_ _null_ ));
DESCR("");
-DATA(insert OID = 1259 ( pg_class PGNSP 83 0 PGUID 0 0 0 0 0 0 0 f f p r 26 0 t f f f f 3 _null_ _null_ ));
+DATA(insert OID = 1259 ( pg_class PGNSP 83 0 PGUID 0 0 0 0 0 0 0 f f p r 28 0 t f f f f 3 0 0 _null_ _null_ ));
DESCR("");
diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h
index cfbe0c4..4f3b119 100644
--- a/src/include/commands/vacuum.h
+++ b/src/include/commands/vacuum.h
@@ -151,6 +151,7 @@ extern void vac_update_relstats(Relation relation,
double num_tuples,
bool hasindex,
TransactionId frozenxid);
+extern void vac_update_indexvaclsn(Relation relation, XLogRecPtr indexvaclsn);
extern void vacuum_set_xid_limits(int freeze_min_age, int freeze_table_age,
bool sharedRel,
TransactionId *oldestXmin,
diff --git a/src/include/storage/bufpage.h b/src/include/storage/bufpage.h
index 42d6b10..49b6fa4 100644
--- a/src/include/storage/bufpage.h
+++ b/src/include/storage/bufpage.h
@@ -154,8 +154,12 @@ typedef PageHeaderData *PageHeader;
* tuple? */
#define PD_ALL_VISIBLE 0x0004 /* all tuples on page are visible to
* everyone */
+#define PD_HAS_VACUUM_LSN 0x0008 /* page has dead line pointers and the
+ * lsn of the operation that created
+ * the most recent dead line pointer is
+ * recorded in the page special area */
-#define PD_VALID_FLAG_BITS 0x0007 /* OR of all valid pd_flags bits */
+#define PD_VALID_FLAG_BITS 0x000f /* OR of all valid pd_flags bits */
/*
* Page layout version number 0 is for pre-7.3 Postgres releases.
@@ -345,6 +349,13 @@ typedef PageHeaderData *PageHeader;
#define PageClearAllVisible(page) \
(((PageHeader) (page))->pd_flags &= ~PD_ALL_VISIBLE)
+#define PageHasVacuumLSN(page) \
+ (((PageHeader) (page))->pd_flags & PD_HAS_VACUUM_LSN)
+#define PageSetHasVacuumLSN(page) \
+ (((PageHeader) (page))->pd_flags |= PD_HAS_VACUUM_LSN)
+#define PageClearHasVacuumLSN(page) \
+ (((PageHeader) (page))->pd_flags &= ~PD_HAS_VACUUM_LSN)
+
#define PageIsPrunable(page, oldestxmin) \
( \
AssertMacro(TransactionIdIsNormal(oldestxmin)), \
@@ -375,11 +386,13 @@ extern Page PageGetTempPage(Page page);
extern Page PageGetTempPageCopy(Page page);
extern Page PageGetTempPageCopySpecial(Page page);
extern void PageRestoreTempPage(Page tempPage, Page oldPage);
-extern void PageRepairFragmentation(Page page);
+extern void PageRepairFragmentation(Page page, bool need_vaclsn);
extern Size PageGetFreeSpace(Page page);
extern Size PageGetExactFreeSpace(Page page);
extern Size PageGetHeapFreeSpace(Page page);
extern void PageIndexTupleDelete(Page page, OffsetNumber offset);
extern void PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems);
+extern XLogRecPtr PageGetVacuumLSN(Page page);
+extern void PageSetVacuumLSN(Page page, XLogRecPtr vaclsn);
#endif /* BUFPAGE_H */
diff --git a/src/include/storage/itemid.h b/src/include/storage/itemid.h
index 961d2c2..0ae1298 100644
--- a/src/include/storage/itemid.h
+++ b/src/include/storage/itemid.h
@@ -113,6 +113,13 @@ typedef uint16 ItemLength;
((itemId)->lp_flags == LP_DEAD)
/*
+ * ItemIdIsDeadVacuumed
+ * True iff item identifier is in state DEAD_VACUUMED.
+ */
+#define ItemIdIsDeadVacuumed(itemId) \
+ (((itemId)->lp_flags == LP_DEAD) && ((itemId)->lp_off == 1))
+
+/*
* ItemIdHasStorage
* True iff item identifier has associated storage.
*/
@@ -168,6 +175,19 @@ typedef uint16 ItemLength;
)
/*
+ * ItemIdSetDeadVacuumed
+ * Set the item identifier to be DEAD_VACUUMED, with no storage.
+ * Beware of multiple evaluations of itemId!
+ */
+#define ItemIdSetDeadVacuumed(itemId) \
+( \
+ (itemId)->lp_flags = LP_DEAD, \
+ (itemId)->lp_off = 1, \
+ (itemId)->lp_len = 0 \
+)
+
+
+/*
* ItemIdMarkDead
* Set the item identifier to be DEAD, keeping its existing storage.
*