v13-0002-Use-64-bit-multixact-offsets.patch
application/octet-stream
Filename: v13-0002-Use-64-bit-multixact-offsets.patch
Type: application/octet-stream
Part: 4
Message:
Re: POC: make mxidoff 64 bits
Patch
Same data as JSON:
GET /api/v1/attachments/:id/patch
the parsed metadata as JSON — format, series position, per-file stats; never the diff bytes.
API reference →
Format: format-patch
Series: patch v13-0002
Subject: Use 64-bit multixact offsets.
| File | + | − |
|---|---|---|
| src/backend/access/transam/multixact.c | 33 | 508 |
| src/backend/access/transam/xlog.c | 1 | 1 |
| src/backend/commands/vacuum.c | 1 | 1 |
| src/backend/postmaster/autovacuum.c | 2 | 2 |
| src/bin/pg_resetwal/pg_resetwal.c | 1 | 1 |
| src/bin/pg_resetwal/t/001_basic.pl | 1 | 1 |
| src/include/access/multixact.h | 1 | 2 |
| src/include/access/multixact_internal.h | 115 | 0 |
| src/include/c.h | 1 | 1 |
From f3499102e2893e4b2e24d48975cbbd49385e190f Mon Sep 17 00:00:00 2001
From: Maxim Orlov <m.orlov@postgrespro.ru>
Date: Wed, 6 Mar 2024 11:11:33 +0300
Subject: [PATCH v13 2/7] Use 64-bit multixact offsets.
Author: Maxim Orlov <orlovmg@gmail.com>
---
src/backend/access/transam/multixact.c | 541 ++----------------------
src/backend/access/transam/xlog.c | 2 +-
src/backend/commands/vacuum.c | 2 +-
src/backend/postmaster/autovacuum.c | 4 +-
src/bin/pg_resetwal/pg_resetwal.c | 2 +-
src/bin/pg_resetwal/t/001_basic.pl | 2 +-
src/include/access/multixact.h | 3 +-
src/include/access/multixact_internal.h | 115 +++++
src/include/c.h | 2 +-
9 files changed, 156 insertions(+), 517 deletions(-)
create mode 100644 src/include/access/multixact_internal.h
diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c
index 623fc8bdac..cd9db52e95 100644
--- a/src/backend/access/transam/multixact.c
+++ b/src/backend/access/transam/multixact.c
@@ -69,6 +69,7 @@
#include "postgres.h"
#include "access/multixact.h"
+#include "access/multixact_internal.h"
#include "access/slru.h"
#include "access/transam.h"
#include "access/twophase.h"
@@ -92,130 +93,14 @@
#include "utils/injection_point.h"
#include "utils/memutils.h"
-
-/*
- * Defines for MultiXactOffset page sizes. A page is the same BLCKSZ as is
- * used everywhere else in Postgres.
- *
- * Note: because MultiXactOffsets are 32 bits and wrap around at 0xFFFFFFFF,
- * MultiXact page numbering also wraps around at
- * 0xFFFFFFFF/MULTIXACT_OFFSETS_PER_PAGE, and segment numbering at
- * 0xFFFFFFFF/MULTIXACT_OFFSETS_PER_PAGE/SLRU_PAGES_PER_SEGMENT. We need
- * take no explicit notice of that fact in this module, except when comparing
- * segment and page numbers in TruncateMultiXact (see
- * MultiXactOffsetPagePrecedes).
- */
-
-/* We need four bytes per offset */
-#define MULTIXACT_OFFSETS_PER_PAGE (BLCKSZ / sizeof(MultiXactOffset))
-
-static inline int64
-MultiXactIdToOffsetPage(MultiXactId multi)
-{
- return multi / MULTIXACT_OFFSETS_PER_PAGE;
-}
-
-static inline int
-MultiXactIdToOffsetEntry(MultiXactId multi)
-{
- return multi % MULTIXACT_OFFSETS_PER_PAGE;
-}
-
-static inline int64
-MultiXactIdToOffsetSegment(MultiXactId multi)
-{
- return MultiXactIdToOffsetPage(multi) / SLRU_PAGES_PER_SEGMENT;
-}
-
-/*
- * The situation for members is a bit more complex: we store one byte of
- * additional flag bits for each TransactionId. To do this without getting
- * into alignment issues, we store four bytes of flags, and then the
- * corresponding 4 Xids. Each such 5-word (20-byte) set we call a "group", and
- * are stored as a whole in pages. Thus, with 8kB BLCKSZ, we keep 409 groups
- * per page. This wastes 12 bytes per page, but that's OK -- simplicity (and
- * performance) trumps space efficiency here.
- *
- * Note that the "offset" macros work with byte offset, not array indexes, so
- * arithmetic must be done using "char *" pointers.
- */
-/* We need eight bits per xact, so one xact fits in a byte */
-#define MXACT_MEMBER_BITS_PER_XACT 8
-#define MXACT_MEMBER_FLAGS_PER_BYTE 1
-#define MXACT_MEMBER_XACT_BITMASK ((1 << MXACT_MEMBER_BITS_PER_XACT) - 1)
-
-/* how many full bytes of flags are there in a group? */
-#define MULTIXACT_FLAGBYTES_PER_GROUP 4
-#define MULTIXACT_MEMBERS_PER_MEMBERGROUP \
- (MULTIXACT_FLAGBYTES_PER_GROUP * MXACT_MEMBER_FLAGS_PER_BYTE)
-/* size in bytes of a complete group */
-#define MULTIXACT_MEMBERGROUP_SIZE \
- (sizeof(TransactionId) * MULTIXACT_MEMBERS_PER_MEMBERGROUP + MULTIXACT_FLAGBYTES_PER_GROUP)
-#define MULTIXACT_MEMBERGROUPS_PER_PAGE (BLCKSZ / MULTIXACT_MEMBERGROUP_SIZE)
-#define MULTIXACT_MEMBERS_PER_PAGE \
- (MULTIXACT_MEMBERGROUPS_PER_PAGE * MULTIXACT_MEMBERS_PER_MEMBERGROUP)
-
/*
- * Because the number of items per page is not a divisor of the last item
- * number (member 0xFFFFFFFF), the last segment does not use the maximum number
- * of pages, and moreover the last used page therein does not use the same
- * number of items as previous pages. (Another way to say it is that the
- * 0xFFFFFFFF member is somewhere in the middle of the last page, so the page
- * has some empty space after that item.)
- *
- * This constant is the number of members in the last page of the last segment.
+ * Multixact members warning threshold.
+ *
+ * If difference bettween nextOffset and oldestOffset exceed this value, we
+ * trigger autovacuumin order to release the disk space, reduce table bloat if
+ * possible.
*/
-#define MAX_MEMBERS_IN_LAST_MEMBERS_PAGE \
- ((uint32) ((0xFFFFFFFF % MULTIXACT_MEMBERS_PER_PAGE) + 1))
-
-/* page in which a member is to be found */
-static inline int64
-MXOffsetToMemberPage(MultiXactOffset offset)
-{
- return offset / MULTIXACT_MEMBERS_PER_PAGE;
-}
-
-static inline int64
-MXOffsetToMemberSegment(MultiXactOffset offset)
-{
- return MXOffsetToMemberPage(offset) / SLRU_PAGES_PER_SEGMENT;
-}
-
-/* Location (byte offset within page) of flag word for a given member */
-static inline int
-MXOffsetToFlagsOffset(MultiXactOffset offset)
-{
- MultiXactOffset group = offset / MULTIXACT_MEMBERS_PER_MEMBERGROUP;
- int grouponpg = group % MULTIXACT_MEMBERGROUPS_PER_PAGE;
- int byteoff = grouponpg * MULTIXACT_MEMBERGROUP_SIZE;
-
- return byteoff;
-}
-
-static inline int
-MXOffsetToFlagsBitShift(MultiXactOffset offset)
-{
- int member_in_group = offset % MULTIXACT_MEMBERS_PER_MEMBERGROUP;
- int bshift = member_in_group * MXACT_MEMBER_BITS_PER_XACT;
-
- return bshift;
-}
-
-/* Location (byte offset within page) of TransactionId of given member */
-static inline int
-MXOffsetToMemberOffset(MultiXactOffset offset)
-{
- int member_in_group = offset % MULTIXACT_MEMBERS_PER_MEMBERGROUP;
-
- return MXOffsetToFlagsOffset(offset) +
- MULTIXACT_FLAGBYTES_PER_GROUP +
- member_in_group * sizeof(TransactionId);
-}
-
-/* Multixact members wraparound thresholds. */
-#define MULTIXACT_MEMBER_SAFE_THRESHOLD (MaxMultiXactOffset / 2)
-#define MULTIXACT_MEMBER_DANGER_THRESHOLD \
- (MaxMultiXactOffset - MaxMultiXactOffset / 4)
+#define MULTIXACT_MEMBER_AUTOVAC_THRESHOLD UINT64CONST(0xFFFFFFFF)
static inline MultiXactId
PreviousMultiXactId(MultiXactId multi)
@@ -260,11 +145,9 @@ typedef struct MultiXactStateData
/*
* Oldest multixact offset that is potentially referenced by a multixact
- * referenced by a relation. We don't always know this value, so there's
- * a flag here to indicate whether or not we currently do.
+ * referenced by a relation.
*/
MultiXactOffset oldestOffset;
- bool oldestOffsetKnown;
/* support for anti-wraparound measures */
MultiXactId multiVacLimit;
@@ -272,9 +155,6 @@ typedef struct MultiXactStateData
MultiXactId multiStopLimit;
MultiXactId multiWrapLimit;
- /* support for members anti-wraparound measures */
- MultiXactOffset offsetStopLimit; /* known if oldestOffsetKnown */
-
/*
* This is used to sleep until a multixact offset is written when we want
* to create the next one.
@@ -409,10 +289,8 @@ static bool MultiXactOffsetPrecedes(MultiXactOffset offset1,
MultiXactOffset offset2);
static void ExtendMultiXactOffset(MultiXactId multi);
static void ExtendMultiXactMember(MultiXactOffset offset, int nmembers);
-static bool MultiXactOffsetWouldWrap(MultiXactOffset boundary,
- MultiXactOffset start, uint32 distance);
static bool SetOffsetVacuumLimit(bool is_startup);
-static bool find_multixact_start(MultiXactId multi, MultiXactOffset *result);
+static MultiXactOffset find_multixact_start(MultiXactId multi);
static void WriteMZeroPageXlogRec(int64 pageno, uint8 info);
static void WriteMTruncateXlogRec(Oid oldestMultiDB,
MultiXactId startTruncOff,
@@ -1054,9 +932,7 @@ GetNewMultiXactId(int nmembers, MultiXactOffset *offset)
* against catastrophic data loss due to multixact wraparound. The basic
* rules are:
*
- * If we're past multiVacLimit or the safe threshold for member storage
- * space, or we don't know what the safe threshold for member storage is,
- * start trying to force autovacuum cycles.
+ * If we're past multiVacLimit, start trying to force autovacuum cycles.
* If we're past multiWarnLimit, start issuing warnings.
* If we're past multiStopLimit, refuse to create new MultiXactIds.
*
@@ -1151,90 +1027,10 @@ GetNewMultiXactId(int nmembers, MultiXactOffset *offset)
ExtendMultiXactOffset(result);
/*
- * Reserve the members space, similarly to above. Also, be careful not to
- * return zero as the starting offset for any multixact. See
- * GetMultiXactIdMembers() for motivation.
+ * Reserve the members space, similarly to above.
*/
nextOffset = MultiXactState->nextOffset;
- if (nextOffset == 0)
- {
- *offset = 1;
- nmembers++; /* allocate member slot 0 too */
- }
- else
- *offset = nextOffset;
-
- /*----------
- * Protect against overrun of the members space as well, with the
- * following rules:
- *
- * If we're past offsetStopLimit, refuse to generate more multis.
- * If we're close to offsetStopLimit, emit a warning.
- *
- * Arbitrarily, we start emitting warnings when we're 20 segments or less
- * from offsetStopLimit.
- *
- * Note we haven't updated the shared state yet, so if we fail at this
- * point, the multixact ID we grabbed can still be used by the next guy.
- *
- * Note that there is no point in forcing autovacuum runs here: the
- * multixact freeze settings would have to be reduced for that to have any
- * effect.
- *----------
- */
-#define OFFSET_WARN_SEGMENTS 20
- if (MultiXactState->oldestOffsetKnown &&
- MultiXactOffsetWouldWrap(MultiXactState->offsetStopLimit, nextOffset,
- nmembers))
- {
- /* see comment in the corresponding offsets wraparound case */
- SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER);
-
- ereport(ERROR,
- (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
- errmsg("multixact \"members\" limit exceeded"),
- errdetail_plural("This command would create a multixact with %u members, but the remaining space is only enough for %u member.",
- "This command would create a multixact with %u members, but the remaining space is only enough for %u members.",
- MultiXactState->offsetStopLimit - nextOffset - 1,
- nmembers,
- MultiXactState->offsetStopLimit - nextOffset - 1),
- errhint("Execute a database-wide VACUUM in database with OID %u with reduced \"vacuum_multixact_freeze_min_age\" and \"vacuum_multixact_freeze_table_age\" settings.",
- MultiXactState->oldestMultiXactDB)));
- }
-
- /*
- * Check whether we should kick autovacuum into action, to prevent members
- * wraparound. NB we use a much larger window to trigger autovacuum than
- * just the warning limit. The warning is just a measure of last resort -
- * this is in line with GetNewTransactionId's behaviour.
- */
- if (!MultiXactState->oldestOffsetKnown ||
- (MultiXactState->nextOffset - MultiXactState->oldestOffset
- > MULTIXACT_MEMBER_SAFE_THRESHOLD))
- {
- /*
- * To avoid swamping the postmaster with signals, we issue the autovac
- * request only when crossing a segment boundary. With default
- * compilation settings that's roughly after 50k members. This still
- * gives plenty of chances before we get into real trouble.
- */
- if ((MXOffsetToMemberPage(nextOffset) / SLRU_PAGES_PER_SEGMENT) !=
- (MXOffsetToMemberPage(nextOffset + nmembers) / SLRU_PAGES_PER_SEGMENT))
- SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER);
- }
-
- if (MultiXactState->oldestOffsetKnown &&
- MultiXactOffsetWouldWrap(MultiXactState->offsetStopLimit,
- nextOffset,
- nmembers + MULTIXACT_MEMBERS_PER_PAGE * SLRU_PAGES_PER_SEGMENT * OFFSET_WARN_SEGMENTS))
- ereport(WARNING,
- (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
- errmsg_plural("database with OID %u must be vacuumed before %d more multixact member is used",
- "database with OID %u must be vacuumed before %d more multixact members are used",
- MultiXactState->offsetStopLimit - nextOffset + nmembers,
- MultiXactState->oldestMultiXactDB,
- MultiXactState->offsetStopLimit - nextOffset + nmembers),
- errhint("Execute a database-wide VACUUM in that database with reduced \"vacuum_multixact_freeze_min_age\" and \"vacuum_multixact_freeze_table_age\" settings.")));
+ *offset = nextOffset;
ExtendMultiXactMember(nextOffset, nmembers);
@@ -2620,22 +2416,9 @@ ExtendMultiXactMember(MultiXactOffset offset, int nmembers)
}
/*
- * Compute the number of items till end of current page. Careful: if
- * addition of unsigned ints wraps around, we're at the last page of
- * the last segment; since that page holds a different number of items
- * than other pages, we need to do it differently.
+ * Compute the number of items till end of current page.
*/
- if (offset + MAX_MEMBERS_IN_LAST_MEMBERS_PAGE < offset)
- {
- /*
- * This is the last page of the last segment; we can compute the
- * number of items left to allocate in it without modulo
- * arithmetic.
- */
- difference = MaxMultiXactOffset - offset + 1;
- }
- else
- difference = MULTIXACT_MEMBERS_PER_PAGE - offset % MULTIXACT_MEMBERS_PER_PAGE;
+ difference = MULTIXACT_MEMBERS_PER_PAGE - offset % MULTIXACT_MEMBERS_PER_PAGE;
/*
* Advance to next page, taking care to properly handle the wraparound
@@ -2701,15 +2484,13 @@ GetOldestMultiXactId(void)
}
/*
- * Determine how aggressively we need to vacuum in order to prevent member
- * wraparound.
+ * Determine if we need to vacuum for member or not.
*
* To do so determine what's the oldest member offset and install the limit
* info in MultiXactState, where it can be used to prevent overrun of old data
* in the members SLRU area.
*
- * The return value is true if emergency autovacuum is required and false
- * otherwise.
+ * The return value is true if autovacuum is required and false otherwise.
*/
static bool
SetOffsetVacuumLimit(bool is_startup)
@@ -2717,12 +2498,7 @@ SetOffsetVacuumLimit(bool is_startup)
MultiXactId oldestMultiXactId;
MultiXactId nextMXact;
MultiXactOffset oldestOffset = 0; /* placate compiler */
- MultiXactOffset prevOldestOffset;
MultiXactOffset nextOffset;
- bool oldestOffsetKnown = false;
- bool prevOldestOffsetKnown;
- MultiXactOffset offsetStopLimit = 0;
- MultiXactOffset prevOffsetStopLimit;
/*
* NB: Have to prevent concurrent truncation, we might otherwise try to
@@ -2735,9 +2511,6 @@ SetOffsetVacuumLimit(bool is_startup)
oldestMultiXactId = MultiXactState->oldestMultiXactId;
nextMXact = MultiXactState->nextMXact;
nextOffset = MultiXactState->nextOffset;
- prevOldestOffsetKnown = MultiXactState->oldestOffsetKnown;
- prevOldestOffset = MultiXactState->oldestOffset;
- prevOffsetStopLimit = MultiXactState->offsetStopLimit;
Assert(MultiXactState->finishedStartup);
LWLockRelease(MultiXactGenLock);
@@ -2755,139 +2528,31 @@ SetOffsetVacuumLimit(bool is_startup)
* offset.
*/
oldestOffset = nextOffset;
- oldestOffsetKnown = true;
}
else
- {
- /*
- * Figure out where the oldest existing multixact's offsets are
- * stored. Due to bugs in early release of PostgreSQL 9.3.X and 9.4.X,
- * the supposedly-earliest multixact might not really exist. We are
- * careful not to fail in that case.
- */
- oldestOffsetKnown =
- find_multixact_start(oldestMultiXactId, &oldestOffset);
-
- if (oldestOffsetKnown)
- ereport(DEBUG1,
- (errmsg_internal("oldest MultiXactId member is at offset %u",
- oldestOffset)));
- else
- ereport(LOG,
- (errmsg("MultiXact member wraparound protections are disabled because oldest checkpointed MultiXact %u does not exist on disk",
- oldestMultiXactId)));
- }
+ oldestOffset = find_multixact_start(oldestMultiXactId);
LWLockRelease(MultiXactTruncationLock);
- /*
- * If we can, compute limits (and install them MultiXactState) to prevent
- * overrun of old data in the members SLRU area. We can only do so if the
- * oldest offset is known though.
- */
- if (oldestOffsetKnown)
- {
- /* move back to start of the corresponding segment */
- offsetStopLimit = oldestOffset - (oldestOffset %
- (MULTIXACT_MEMBERS_PER_PAGE * SLRU_PAGES_PER_SEGMENT));
-
- /* always leave one segment before the wraparound point */
- offsetStopLimit -= (MULTIXACT_MEMBERS_PER_PAGE * SLRU_PAGES_PER_SEGMENT);
-
- if (!prevOldestOffsetKnown && !is_startup)
- ereport(LOG,
- (errmsg("MultiXact member wraparound protections are now enabled")));
-
- ereport(DEBUG1,
- (errmsg_internal("MultiXact member stop limit is now %u based on MultiXact %u",
- offsetStopLimit, oldestMultiXactId)));
- }
- else if (prevOldestOffsetKnown)
- {
- /*
- * If we failed to get the oldest offset this time, but we have a
- * value from a previous pass through this function, use the old
- * values rather than automatically forcing an emergency autovacuum
- * cycle again.
- */
- oldestOffset = prevOldestOffset;
- oldestOffsetKnown = true;
- offsetStopLimit = prevOffsetStopLimit;
- }
-
/* Install the computed values */
LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
MultiXactState->oldestOffset = oldestOffset;
- MultiXactState->oldestOffsetKnown = oldestOffsetKnown;
- MultiXactState->offsetStopLimit = offsetStopLimit;
LWLockRelease(MultiXactGenLock);
/*
- * Do we need an emergency autovacuum? If we're not sure, assume yes.
- */
- return !oldestOffsetKnown ||
- (nextOffset - oldestOffset > MULTIXACT_MEMBER_SAFE_THRESHOLD);
-}
-
-/*
- * Return whether adding "distance" to "start" would move past "boundary".
- *
- * We use this to determine whether the addition is "wrapping around" the
- * boundary point, hence the name. The reason we don't want to use the regular
- * 2^31-modulo arithmetic here is that we want to be able to use the whole of
- * the 2^32-1 space here, allowing for more multixacts than would fit
- * otherwise.
- */
-static bool
-MultiXactOffsetWouldWrap(MultiXactOffset boundary, MultiXactOffset start,
- uint32 distance)
-{
- MultiXactOffset finish;
-
- /*
- * Note that offset number 0 is not used (see GetMultiXactIdMembers), so
- * if the addition wraps around the UINT_MAX boundary, skip that value.
- */
- finish = start + distance;
- if (finish < start)
- finish++;
-
- /*-----------------------------------------------------------------------
- * When the boundary is numerically greater than the starting point, any
- * value numerically between the two is not wrapped:
- *
- * <----S----B---->
- * [---) = F wrapped past B (and UINT_MAX)
- * [---) = F not wrapped
- * [----] = F wrapped past B
- *
- * When the boundary is numerically less than the starting point (i.e. the
- * UINT_MAX wraparound occurs somewhere in between) then all values in
- * between are wrapped:
- *
- * <----B----S---->
- * [---) = F not wrapped past B (but wrapped past UINT_MAX)
- * [---) = F wrapped past B (and UINT_MAX)
- * [----] = F not wrapped
- *-----------------------------------------------------------------------
+ * Do we need autovacuum?
*/
- if (start < boundary)
- return finish >= boundary || finish < start;
- else
- return finish >= boundary && finish < start;
+ return (nextOffset - oldestOffset > MULTIXACT_MEMBER_AUTOVAC_THRESHOLD);
}
/*
* Find the starting offset of the given MultiXactId.
*
- * Returns false if the file containing the multi does not exist on disk.
- * Otherwise, returns true and sets *result to the starting member offset.
- *
* This function does not prevent concurrent truncation, so if that's
* required, the caller has to protect against that.
*/
-static bool
-find_multixact_start(MultiXactId multi, MultiXactOffset *result)
+static MultiXactOffset
+find_multixact_start(MultiXactId multi)
{
MultiXactOffset offset;
int64 pageno;
@@ -2900,15 +2565,6 @@ find_multixact_start(MultiXactId multi, MultiXactOffset *result)
pageno = MultiXactIdToOffsetPage(multi);
entryno = MultiXactIdToOffsetEntry(multi);
- /*
- * Write out dirty data, so PhysicalPageExists can work correctly.
- */
- SimpleLruWriteAll(MultiXactOffsetCtl, true);
- SimpleLruWriteAll(MultiXactMemberCtl, true);
-
- if (!SimpleLruDoesPhysicalPageExist(MultiXactOffsetCtl, pageno))
- return false;
-
/* lock is acquired by SimpleLruReadPage_ReadOnly */
slotno = SimpleLruReadPage_ReadOnly(MultiXactOffsetCtl, pageno, multi);
offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
@@ -2916,102 +2572,7 @@ find_multixact_start(MultiXactId multi, MultiXactOffset *result)
offset = *offptr;
LWLockRelease(SimpleLruGetBankLock(MultiXactOffsetCtl, pageno));
- *result = offset;
- return true;
-}
-
-/*
- * Determine how many multixacts, and how many multixact members, currently
- * exist. Return false if unable to determine.
- */
-static bool
-ReadMultiXactCounts(uint32 *multixacts, MultiXactOffset *members)
-{
- MultiXactOffset nextOffset;
- MultiXactOffset oldestOffset;
- MultiXactId oldestMultiXactId;
- MultiXactId nextMultiXactId;
- bool oldestOffsetKnown;
-
- LWLockAcquire(MultiXactGenLock, LW_SHARED);
- nextOffset = MultiXactState->nextOffset;
- oldestMultiXactId = MultiXactState->oldestMultiXactId;
- nextMultiXactId = MultiXactState->nextMXact;
- oldestOffset = MultiXactState->oldestOffset;
- oldestOffsetKnown = MultiXactState->oldestOffsetKnown;
- LWLockRelease(MultiXactGenLock);
-
- if (!oldestOffsetKnown)
- return false;
-
- *members = nextOffset - oldestOffset;
- *multixacts = nextMultiXactId - oldestMultiXactId;
- return true;
-}
-
-/*
- * Multixact members can be removed once the multixacts that refer to them
- * are older than every datminmxid. autovacuum_multixact_freeze_max_age and
- * vacuum_multixact_freeze_table_age work together to make sure we never have
- * too many multixacts; we hope that, at least under normal circumstances,
- * this will also be sufficient to keep us from using too many offsets.
- * However, if the average multixact has many members, we might exhaust the
- * members space while still using few enough members that these limits fail
- * to trigger relminmxid advancement by VACUUM. At that point, we'd have no
- * choice but to start failing multixact-creating operations with an error.
- *
- * To prevent that, if more than a threshold portion of the members space is
- * used, we effectively reduce autovacuum_multixact_freeze_max_age and
- * to a value just less than the number of multixacts in use. We hope that
- * this will quickly trigger autovacuuming on the table or tables with the
- * oldest relminmxid, thus allowing datminmxid values to advance and removing
- * some members.
- *
- * As the fraction of the member space currently in use grows, we become
- * more aggressive in clamping this value. That not only causes autovacuum
- * to ramp up, but also makes any manual vacuums the user issues more
- * aggressive. This happens because vacuum_get_cutoffs() will clamp the
- * freeze table and the minimum freeze age cutoffs based on the effective
- * autovacuum_multixact_freeze_max_age this function returns. In the worst
- * case, we'll claim the freeze_max_age to zero, and every vacuum of any
- * table will freeze every multixact.
- */
-int
-MultiXactMemberFreezeThreshold(void)
-{
- MultiXactOffset members;
- uint32 multixacts;
- uint32 victim_multixacts;
- double fraction;
- int result;
-
- /* If we can't determine member space utilization, assume the worst. */
- if (!ReadMultiXactCounts(&multixacts, &members))
- return 0;
-
- /* If member space utilization is low, no special action is required. */
- if (members <= MULTIXACT_MEMBER_SAFE_THRESHOLD)
- return autovacuum_multixact_freeze_max_age;
-
- /*
- * Compute a target for relminmxid advancement. The number of multixacts
- * we try to eliminate from the system is based on how far we are past
- * MULTIXACT_MEMBER_SAFE_THRESHOLD.
- */
- fraction = (double) (members - MULTIXACT_MEMBER_SAFE_THRESHOLD) /
- (MULTIXACT_MEMBER_DANGER_THRESHOLD - MULTIXACT_MEMBER_SAFE_THRESHOLD);
- victim_multixacts = multixacts * fraction;
-
- /* fraction could be > 1.0, but lowest possible freeze age is zero */
- if (victim_multixacts > multixacts)
- return 0;
- result = multixacts - victim_multixacts;
-
- /*
- * Clamp to autovacuum_multixact_freeze_max_age, so that we never make
- * autovacuum less aggressive than it would otherwise be.
- */
- return Min(result, autovacuum_multixact_freeze_max_age);
+ return offset;
}
typedef struct mxtruncinfo
@@ -3039,37 +2600,13 @@ SlruScanDirCbFindEarliest(SlruCtl ctl, char *filename, int64 segpage, void *data
/*
- * Delete members segments [oldest, newOldest)
- *
- * The members SLRU can, in contrast to the offsets one, be filled to almost
- * the full range at once. This means SimpleLruTruncate() can't trivially be
- * used - instead the to-be-deleted range is computed using the offsets
- * SLRU. C.f. TruncateMultiXact().
+ * Delete members segments before the newOldestOffset.
*/
static void
-PerformMembersTruncation(MultiXactOffset oldestOffset, MultiXactOffset newOldestOffset)
+PerformMembersTruncation(MultiXactOffset newOldestOffset)
{
- const int64 maxsegment = MXOffsetToMemberSegment(MaxMultiXactOffset);
- int64 startsegment = MXOffsetToMemberSegment(oldestOffset);
- int64 endsegment = MXOffsetToMemberSegment(newOldestOffset);
- int64 segment = startsegment;
-
- /*
- * Delete all the segments but the last one. The last segment can still
- * contain, possibly partially, valid data.
- */
- while (segment != endsegment)
- {
- elog(DEBUG2, "truncating multixact members segment %llx",
- (unsigned long long) segment);
- SlruDeleteSegment(MultiXactMemberCtl, segment);
-
- /* move to next segment, handling wraparound correctly */
- if (segment == maxsegment)
- segment = 0;
- else
- segment += 1;
- }
+ SimpleLruTruncate(MultiXactMemberCtl,
+ MXOffsetToMemberPage(newOldestOffset));
}
/*
@@ -3174,23 +2711,15 @@ TruncateMultiXact(MultiXactId newOldestMulti, Oid newOldestMultiDB)
/*
* First, compute the safe truncation point for MultiXactMember. This is
* the starting offset of the oldest multixact.
- *
- * Hopefully, find_multixact_start will always work here, because we've
- * already checked that it doesn't precede the earliest MultiXact on disk.
- * But if it fails, don't truncate anything, and log a message.
*/
if (oldestMulti == nextMulti)
{
/* there are NO MultiXacts */
oldestOffset = nextOffset;
}
- else if (!find_multixact_start(oldestMulti, &oldestOffset))
+ else
{
- ereport(LOG,
- (errmsg("oldest MultiXact %u not found, earliest MultiXact %u, skipping truncation",
- oldestMulti, earliest)));
- LWLockRelease(MultiXactTruncationLock);
- return;
+ oldestOffset = find_multixact_start(oldestMulti);
}
/*
@@ -3202,13 +2731,9 @@ TruncateMultiXact(MultiXactId newOldestMulti, Oid newOldestMultiDB)
/* there are NO MultiXacts */
newOldestOffset = nextOffset;
}
- else if (!find_multixact_start(newOldestMulti, &newOldestOffset))
+ else
{
- ereport(LOG,
- (errmsg("cannot truncate up to MultiXact %u because it does not exist on disk, skipping truncation",
- newOldestMulti)));
- LWLockRelease(MultiXactTruncationLock);
- return;
+ newOldestOffset = find_multixact_start(newOldestMulti);
}
elog(DEBUG1, "performing multixact truncation: "
@@ -3258,7 +2783,7 @@ TruncateMultiXact(MultiXactId newOldestMulti, Oid newOldestMultiDB)
LWLockRelease(MultiXactGenLock);
/* First truncate members */
- PerformMembersTruncation(oldestOffset, newOldestOffset);
+ PerformMembersTruncation(newOldestOffset);
/* Then offsets */
PerformOffsetsTruncation(oldestMulti, newOldestMulti);
@@ -3345,7 +2870,7 @@ MultiXactIdPrecedesOrEquals(MultiXactId multi1, MultiXactId multi2)
static bool
MultiXactOffsetPrecedes(MultiXactOffset offset1, MultiXactOffset offset2)
{
- int32 diff = (int32) (offset1 - offset2);
+ int64 diff = (int64) (offset1 - offset2);
return (diff < 0);
}
@@ -3492,7 +3017,7 @@ multixact_redo(XLogReaderState *record)
*/
SetMultiXactIdLimit(xlrec.endTruncOff, xlrec.oldestMultiDB, false);
- PerformMembersTruncation(xlrec.startTruncMemb, xlrec.endTruncMemb);
+ PerformMembersTruncation(xlrec.endTruncMemb);
/*
* During XLOG replay, latest_page_number isn't necessarily set up
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index bf3dbda901..a813a090fa 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -5083,7 +5083,7 @@ BootStrapXLOG(uint32 data_checksum_version)
FullTransactionIdFromEpochAndXid(0, FirstNormalTransactionId);
checkPoint.nextOid = FirstGenbkiObjectId;
checkPoint.nextMulti = FirstMultiXactId;
- checkPoint.nextMultiOffset = 0;
+ checkPoint.nextMultiOffset = 1;
checkPoint.oldestXid = FirstNormalTransactionId;
checkPoint.oldestXidDB = Template1DbOid;
checkPoint.oldestMulti = FirstMultiXactId;
diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c
index e6745e6145..c96fbf004d 100644
--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -1134,7 +1134,7 @@ vacuum_get_cutoffs(Relation rel, const VacuumParams *params,
* normally autovacuum_multixact_freeze_max_age, but may be less if we are
* short of multixact member space.
*/
- effective_multixact_freeze_max_age = MultiXactMemberFreezeThreshold();
+ effective_multixact_freeze_max_age = autovacuum_multixact_freeze_max_age;
/*
* Almost ready to set freeze output parameters; check if OldestXmin or
diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c
index 0ab921a169..ed5fc09c38 100644
--- a/src/backend/postmaster/autovacuum.c
+++ b/src/backend/postmaster/autovacuum.c
@@ -1134,7 +1134,7 @@ do_start_worker(void)
/* Also determine the oldest datminmxid we will consider. */
recentMulti = ReadNextMultiXactId();
- multiForceLimit = recentMulti - MultiXactMemberFreezeThreshold();
+ multiForceLimit = recentMulti - autovacuum_multixact_freeze_max_age;
if (multiForceLimit < FirstMultiXactId)
multiForceLimit -= FirstMultiXactId;
@@ -1922,7 +1922,7 @@ do_autovacuum(void)
* normally autovacuum_multixact_freeze_max_age, but may be less if we are
* short of multixact member space.
*/
- effective_multixact_freeze_max_age = MultiXactMemberFreezeThreshold();
+ effective_multixact_freeze_max_age = autovacuum_multixact_freeze_max_age;
/*
* Find the pg_database entry and select the default freeze ages. We use
diff --git a/src/bin/pg_resetwal/pg_resetwal.c b/src/bin/pg_resetwal/pg_resetwal.c
index fff401e469..4ad64cf1ed 100644
--- a/src/bin/pg_resetwal/pg_resetwal.c
+++ b/src/bin/pg_resetwal/pg_resetwal.c
@@ -264,7 +264,7 @@ main(int argc, char *argv[])
case 'O':
errno = 0;
- set_mxoff = strtoul(optarg, &endptr, 0);
+ set_mxoff = strtou64(optarg, &endptr, 0);
if (endptr == optarg || *endptr != '\0' || errno != 0)
{
pg_log_error("invalid argument for option %s", "-O");
diff --git a/src/bin/pg_resetwal/t/001_basic.pl b/src/bin/pg_resetwal/t/001_basic.pl
index 323cd483cf..e107646875 100644
--- a/src/bin/pg_resetwal/t/001_basic.pl
+++ b/src/bin/pg_resetwal/t/001_basic.pl
@@ -207,7 +207,7 @@ push @cmd,
sprintf("%d,%d", hex($files[0]) == 0 ? 3 : hex($files[0]), hex($files[-1]));
@files = get_slru_files('pg_multixact/offsets');
-$mult = 32 * $blcksz / 4;
+$mult = 32 * $blcksz / 8;
# --multixact-ids argument is "new,old"
push @cmd,
'--multixact-ids' => sprintf("%d,%d",
diff --git a/src/include/access/multixact.h b/src/include/access/multixact.h
index 4e6b0eec2f..5ee632dfe6 100644
--- a/src/include/access/multixact.h
+++ b/src/include/access/multixact.h
@@ -27,7 +27,7 @@
#define MultiXactIdIsValid(multi) ((multi) != InvalidMultiXactId)
-#define MaxMultiXactOffset ((MultiXactOffset) 0xFFFFFFFF)
+#define MaxMultiXactOffset UINT64CONST(0xFFFFFFFFFFFFFFFF)
/*
* Possible multixact lock modes ("status"). The first four modes are for
@@ -143,7 +143,6 @@ extern void MultiXactSetNextMXact(MultiXactId nextMulti,
extern void MultiXactAdvanceNextMXact(MultiXactId minMulti,
MultiXactOffset minMultiOffset);
extern void MultiXactAdvanceOldest(MultiXactId oldestMulti, Oid oldestMultiDB);
-extern int MultiXactMemberFreezeThreshold(void);
extern void multixact_twophase_recover(TransactionId xid, uint16 info,
void *recdata, uint32 len);
diff --git a/src/include/access/multixact_internal.h b/src/include/access/multixact_internal.h
new file mode 100644
index 0000000000..39e74a21c7
--- /dev/null
+++ b/src/include/access/multixact_internal.h
@@ -0,0 +1,115 @@
+/*
+ * multixact_internal.h
+ *
+ * Internal definitions for the on-disk format of multixact manager.
+ *
+ * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/access/multixact_internal.h
+ */
+#ifndef MULTIXACT_INTERNAL_H
+#define MULTIXACT_INTERNAL_H
+
+/* FIXME: had to duplicate this */
+#define SLRU_PAGES_PER_SEGMENT 32
+
+/*
+ * Defines for MultiXactOffset page sizes. A page is the same BLCKSZ as is
+ * used everywhere else in Postgres.
+ */
+
+/* We need four bytes per offset */
+#define MULTIXACT_OFFSETS_PER_PAGE (BLCKSZ / sizeof(MultiXactOffset))
+
+static inline int64
+MultiXactIdToOffsetPage(MultiXactId multi)
+{
+ return multi / MULTIXACT_OFFSETS_PER_PAGE;
+}
+
+static inline int
+MultiXactIdToOffsetEntry(MultiXactId multi)
+{
+ return multi % MULTIXACT_OFFSETS_PER_PAGE;
+}
+
+static inline int64
+MultiXactIdToOffsetSegment(MultiXactId multi)
+{
+ return MultiXactIdToOffsetPage(multi) / SLRU_PAGES_PER_SEGMENT;
+}
+
+/*
+ * The situation for members is a bit more complex: we store one byte of
+ * additional flag bits for each TransactionId. To do this without getting
+ * into alignment issues, we store four bytes of flags, and then the
+ * corresponding 4 Xids. Each such 5-word (20-byte) set we call a "group", and
+ * are stored as a whole in pages. Thus, with 8kB BLCKSZ, we keep 409 groups
+ * per page. This wastes 12 bytes per page, but that's OK -- simplicity (and
+ * performance) trumps space efficiency here.
+ *
+ * Note that the "offset" macros work with byte offset, not array indexes, so
+ * arithmetic must be done using "char *" pointers.
+ */
+/* We need eight bits per xact, so one xact fits in a byte */
+#define MXACT_MEMBER_BITS_PER_XACT 8
+#define MXACT_MEMBER_FLAGS_PER_BYTE 1
+#define MXACT_MEMBER_XACT_BITMASK ((1 << MXACT_MEMBER_BITS_PER_XACT) - 1)
+
+/* how many full bytes of flags are there in a group? */
+#define MULTIXACT_FLAGBYTES_PER_GROUP 4
+#define MULTIXACT_MEMBERS_PER_MEMBERGROUP \
+ (MULTIXACT_FLAGBYTES_PER_GROUP * MXACT_MEMBER_FLAGS_PER_BYTE)
+/* size in bytes of a complete group */
+#define MULTIXACT_MEMBERGROUP_SIZE \
+ (sizeof(TransactionId) * MULTIXACT_MEMBERS_PER_MEMBERGROUP + MULTIXACT_FLAGBYTES_PER_GROUP)
+#define MULTIXACT_MEMBERGROUPS_PER_PAGE (BLCKSZ / MULTIXACT_MEMBERGROUP_SIZE)
+#define MULTIXACT_MEMBERS_PER_PAGE \
+ (MULTIXACT_MEMBERGROUPS_PER_PAGE * MULTIXACT_MEMBERS_PER_MEMBERGROUP)
+
+/* page in which a member is to be found */
+static inline int64
+MXOffsetToMemberPage(MultiXactOffset offset)
+{
+ return offset / MULTIXACT_MEMBERS_PER_PAGE;
+}
+
+static inline int64
+MXOffsetToMemberSegment(MultiXactOffset offset)
+{
+ return MXOffsetToMemberPage(offset) / SLRU_PAGES_PER_SEGMENT;
+}
+
+/* Location (byte offset within page) of flag word for a given member */
+static inline int
+MXOffsetToFlagsOffset(MultiXactOffset offset)
+{
+ MultiXactOffset group = offset / MULTIXACT_MEMBERS_PER_MEMBERGROUP;
+ int grouponpg = group % MULTIXACT_MEMBERGROUPS_PER_PAGE;
+ int byteoff = grouponpg * MULTIXACT_MEMBERGROUP_SIZE;
+
+ return byteoff;
+}
+
+static inline int
+MXOffsetToFlagsBitShift(MultiXactOffset offset)
+{
+ int member_in_group = offset % MULTIXACT_MEMBERS_PER_MEMBERGROUP;
+ int bshift = member_in_group * MXACT_MEMBER_BITS_PER_XACT;
+
+ return bshift;
+}
+
+/* Location (byte offset within page) of TransactionId of given member */
+static inline int
+MXOffsetToMemberOffset(MultiXactOffset offset)
+{
+ int member_in_group = offset % MULTIXACT_MEMBERS_PER_MEMBERGROUP;
+
+ return MXOffsetToFlagsOffset(offset) +
+ MULTIXACT_FLAGBYTES_PER_GROUP +
+ member_in_group * sizeof(TransactionId);
+}
+
+#endif /* MULTIXACT_INTERNAL_H */
diff --git a/src/include/c.h b/src/include/c.h
index a14c631516..318194f78d 100644
--- a/src/include/c.h
+++ b/src/include/c.h
@@ -618,7 +618,7 @@ typedef uint32 SubTransactionId;
/* MultiXactId must be equivalent to TransactionId, to fit in t_xmax */
typedef TransactionId MultiXactId;
-typedef uint32 MultiXactOffset;
+typedef uint64 MultiXactOffset;
typedef uint32 CommandId;
--
2.43.0