v23-0001-Use-64-bit-multixact-offsets.patch
application/octet-stream
Filename: v23-0001-Use-64-bit-multixact-offsets.patch
Type: application/octet-stream
Part: 1
Message:
Re: POC: make mxidoff 64 bits
Patch
Same data as JSON:
GET /api/v1/attachments/:id/patch
the parsed metadata as JSON — format, series position, per-file stats; never the diff bytes.
API reference →
Format: format-patch
Series: patch v23-0001
Subject: Use 64-bit multixact offsets
| File | + | − |
|---|---|---|
| src/backend/access/rmgrdesc/mxactdesc.c | 2 | 2 |
| src/backend/access/rmgrdesc/xlogdesc.c | 1 | 1 |
| src/backend/access/transam/multixact.c | 57 | 443 |
| src/backend/access/transam/xlog.c | 1 | 1 |
| src/backend/access/transam/xlogrecovery.c | 1 | 1 |
| src/backend/commands/vacuum.c | 1 | 1 |
| src/backend/postmaster/autovacuum.c | 2 | 2 |
| src/bin/pg_controldata/pg_controldata.c | 1 | 1 |
| src/bin/pg_resetwal/pg_resetwal.c | 3 | 3 |
| src/bin/pg_resetwal/t/001_basic.pl | 1 | 1 |
| src/include/access/multixact.h | 0 | 3 |
| src/include/access/multixact_internal.h | 109 | 0 |
| src/include/access/slru.h | 0 | 15 |
| src/include/c.h | 1 | 1 |
| src/include/pg_config_manual.h | 15 | 0 |
From 132bf103728c3eb1605ac04b488b41c1edc90852 Mon Sep 17 00:00:00 2001
From: Maxim Orlov <orlovmg@gmail.com>
Date: Wed, 7 Aug 2024 16:35:22 +0300
Subject: [PATCH v23 1/7] Use 64-bit multixact offsets
Switching to 64-bit multitransaction offsets removes wraparound and the
2^32 limit on their total number.
Author: Maxim Orlov <orlovmg@gmail.com>
Discussion: FIXME
---
src/backend/access/rmgrdesc/mxactdesc.c | 4 +-
src/backend/access/rmgrdesc/xlogdesc.c | 2 +-
src/backend/access/transam/multixact.c | 500 +++-------------------
src/backend/access/transam/xlog.c | 2 +-
src/backend/access/transam/xlogrecovery.c | 2 +-
src/backend/commands/vacuum.c | 2 +-
src/backend/postmaster/autovacuum.c | 4 +-
src/bin/pg_controldata/pg_controldata.c | 2 +-
src/bin/pg_resetwal/pg_resetwal.c | 6 +-
src/bin/pg_resetwal/t/001_basic.pl | 2 +-
src/include/access/multixact.h | 3 -
src/include/access/multixact_internal.h | 109 +++++
src/include/access/slru.h | 15 -
src/include/c.h | 2 +-
src/include/pg_config_manual.h | 15 +
15 files changed, 195 insertions(+), 475 deletions(-)
create mode 100644 src/include/access/multixact_internal.h
diff --git a/src/backend/access/rmgrdesc/mxactdesc.c b/src/backend/access/rmgrdesc/mxactdesc.c
index 3ca0582db36..052dd0a4ce5 100644
--- a/src/backend/access/rmgrdesc/mxactdesc.c
+++ b/src/backend/access/rmgrdesc/mxactdesc.c
@@ -65,7 +65,7 @@ multixact_desc(StringInfo buf, XLogReaderState *record)
xl_multixact_create *xlrec = (xl_multixact_create *) rec;
int i;
- appendStringInfo(buf, "%u offset %u nmembers %d: ", xlrec->mid,
+ appendStringInfo(buf, "%u offset %" PRIu64 " nmembers %d: ", xlrec->mid,
xlrec->moff, xlrec->nmembers);
for (i = 0; i < xlrec->nmembers; i++)
out_member(buf, &xlrec->members[i]);
@@ -74,7 +74,7 @@ multixact_desc(StringInfo buf, XLogReaderState *record)
{
xl_multixact_truncate *xlrec = (xl_multixact_truncate *) rec;
- appendStringInfo(buf, "offsets [%u, %u), members [%u, %u)",
+ appendStringInfo(buf, "offsets [%u, %u), members [%" PRIu64 ", %" PRIu64 ")",
xlrec->startTruncOff, xlrec->endTruncOff,
xlrec->startTruncMemb, xlrec->endTruncMemb);
}
diff --git a/src/backend/access/rmgrdesc/xlogdesc.c b/src/backend/access/rmgrdesc/xlogdesc.c
index cd6c2a2f650..441034f5929 100644
--- a/src/backend/access/rmgrdesc/xlogdesc.c
+++ b/src/backend/access/rmgrdesc/xlogdesc.c
@@ -66,7 +66,7 @@ xlog_desc(StringInfo buf, XLogReaderState *record)
CheckPoint *checkpoint = (CheckPoint *) rec;
appendStringInfo(buf, "redo %X/%08X; "
- "tli %u; prev tli %u; fpw %s; wal_level %s; xid %u:%u; oid %u; multi %u; offset %u; "
+ "tli %u; prev tli %u; fpw %s; wal_level %s; xid %u:%u; oid %u; multi %u; offset %" PRIu64 "; "
"oldest xid %u in DB %u; oldest multi %u in DB %u; "
"oldest/newest commit timestamp xid: %u/%u; "
"oldest running xid %u; %s",
diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c
index 9d5f130af7e..732f048df5e 100644
--- a/src/backend/access/transam/multixact.c
+++ b/src/backend/access/transam/multixact.c
@@ -69,6 +69,7 @@
#include "postgres.h"
#include "access/multixact.h"
+#include "access/multixact_internal.h"
#include "access/slru.h"
#include "access/twophase.h"
#include "access/twophase_rmgr.h"
@@ -88,35 +89,6 @@
#include "utils/lsyscache.h"
#include "utils/memutils.h"
-
-/*
- * Defines for MultiXactOffset page sizes. A page is the same BLCKSZ as is
- * used everywhere else in Postgres.
- *
- * Note: because MultiXactOffsets are 32 bits and wrap around at 0xFFFFFFFF,
- * MultiXact page numbering also wraps around at
- * 0xFFFFFFFF/MULTIXACT_OFFSETS_PER_PAGE, and segment numbering at
- * 0xFFFFFFFF/MULTIXACT_OFFSETS_PER_PAGE/SLRU_PAGES_PER_SEGMENT. We need
- * take no explicit notice of that fact in this module, except when comparing
- * segment and page numbers in TruncateMultiXact (see
- * MultiXactOffsetPagePrecedes).
- */
-
-/* We need four bytes per offset */
-#define MULTIXACT_OFFSETS_PER_PAGE (BLCKSZ / sizeof(MultiXactOffset))
-
-static inline int64
-MultiXactIdToOffsetPage(MultiXactId multi)
-{
- return multi / MULTIXACT_OFFSETS_PER_PAGE;
-}
-
-static inline int
-MultiXactIdToOffsetEntry(MultiXactId multi)
-{
- return multi % MULTIXACT_OFFSETS_PER_PAGE;
-}
-
static inline int64
MultiXactIdToOffsetSegment(MultiXactId multi)
{
@@ -124,94 +96,13 @@ MultiXactIdToOffsetSegment(MultiXactId multi)
}
/*
- * The situation for members is a bit more complex: we store one byte of
- * additional flag bits for each TransactionId. To do this without getting
- * into alignment issues, we store four bytes of flags, and then the
- * corresponding 4 Xids. Each such 5-word (20-byte) set we call a "group", and
- * are stored as a whole in pages. Thus, with 8kB BLCKSZ, we keep 409 groups
- * per page. This wastes 12 bytes per page, but that's OK -- simplicity (and
- * performance) trumps space efficiency here.
- *
- * Note that the "offset" macros work with byte offset, not array indexes, so
- * arithmetic must be done using "char *" pointers.
- */
-/* We need eight bits per xact, so one xact fits in a byte */
-#define MXACT_MEMBER_BITS_PER_XACT 8
-#define MXACT_MEMBER_FLAGS_PER_BYTE 1
-#define MXACT_MEMBER_XACT_BITMASK ((1 << MXACT_MEMBER_BITS_PER_XACT) - 1)
-
-/* how many full bytes of flags are there in a group? */
-#define MULTIXACT_FLAGBYTES_PER_GROUP 4
-#define MULTIXACT_MEMBERS_PER_MEMBERGROUP \
- (MULTIXACT_FLAGBYTES_PER_GROUP * MXACT_MEMBER_FLAGS_PER_BYTE)
-/* size in bytes of a complete group */
-#define MULTIXACT_MEMBERGROUP_SIZE \
- (sizeof(TransactionId) * MULTIXACT_MEMBERS_PER_MEMBERGROUP + MULTIXACT_FLAGBYTES_PER_GROUP)
-#define MULTIXACT_MEMBERGROUPS_PER_PAGE (BLCKSZ / MULTIXACT_MEMBERGROUP_SIZE)
-#define MULTIXACT_MEMBERS_PER_PAGE \
- (MULTIXACT_MEMBERGROUPS_PER_PAGE * MULTIXACT_MEMBERS_PER_MEMBERGROUP)
-
-/*
- * Because the number of items per page is not a divisor of the last item
- * number (member 0xFFFFFFFF), the last segment does not use the maximum number
- * of pages, and moreover the last used page therein does not use the same
- * number of items as previous pages. (Another way to say it is that the
- * 0xFFFFFFFF member is somewhere in the middle of the last page, so the page
- * has some empty space after that item.)
- *
- * This constant is the number of members in the last page of the last segment.
+ * Multixact members warning threshold.
+ *
+ * If the difference between nextOffset and oldestOffset exceeds this value,
+ * we trigger autovacuum in order to release disk space consumed by the
+ * members SLRU.
*/
-#define MAX_MEMBERS_IN_LAST_MEMBERS_PAGE \
- ((uint32) ((0xFFFFFFFF % MULTIXACT_MEMBERS_PER_PAGE) + 1))
-
-/* page in which a member is to be found */
-static inline int64
-MXOffsetToMemberPage(MultiXactOffset offset)
-{
- return offset / MULTIXACT_MEMBERS_PER_PAGE;
-}
-
-static inline int64
-MXOffsetToMemberSegment(MultiXactOffset offset)
-{
- return MXOffsetToMemberPage(offset) / SLRU_PAGES_PER_SEGMENT;
-}
-
-/* Location (byte offset within page) of flag word for a given member */
-static inline int
-MXOffsetToFlagsOffset(MultiXactOffset offset)
-{
- MultiXactOffset group = offset / MULTIXACT_MEMBERS_PER_MEMBERGROUP;
- int grouponpg = group % MULTIXACT_MEMBERGROUPS_PER_PAGE;
- int byteoff = grouponpg * MULTIXACT_MEMBERGROUP_SIZE;
-
- return byteoff;
-}
-
-static inline int
-MXOffsetToFlagsBitShift(MultiXactOffset offset)
-{
- int member_in_group = offset % MULTIXACT_MEMBERS_PER_MEMBERGROUP;
- int bshift = member_in_group * MXACT_MEMBER_BITS_PER_XACT;
-
- return bshift;
-}
-
-/* Location (byte offset within page) of TransactionId of given member */
-static inline int
-MXOffsetToMemberOffset(MultiXactOffset offset)
-{
- int member_in_group = offset % MULTIXACT_MEMBERS_PER_MEMBERGROUP;
-
- return MXOffsetToFlagsOffset(offset) +
- MULTIXACT_FLAGBYTES_PER_GROUP +
- member_in_group * sizeof(TransactionId);
-}
-
-/* Multixact members wraparound thresholds. */
-#define MULTIXACT_MEMBER_SAFE_THRESHOLD (MaxMultiXactOffset / 2)
-#define MULTIXACT_MEMBER_DANGER_THRESHOLD \
- (MaxMultiXactOffset - MaxMultiXactOffset / 4)
+#define MULTIXACT_MEMBER_AUTOVAC_THRESHOLD UINT64CONST(4000000000)
static inline MultiXactId
PreviousMultiXactId(MultiXactId multi)
@@ -268,9 +159,6 @@ typedef struct MultiXactStateData
MultiXactId multiStopLimit;
MultiXactId multiWrapLimit;
- /* support for members anti-wraparound measures */
- MultiXactOffset offsetStopLimit; /* known if oldestOffsetKnown */
-
/*
* This is used to sleep until a multixact offset is written when we want
* to create the next one.
@@ -401,8 +289,6 @@ static bool MultiXactOffsetPrecedes(MultiXactOffset offset1,
MultiXactOffset offset2);
static void ExtendMultiXactOffset(MultiXactId multi);
static void ExtendMultiXactMember(MultiXactOffset offset, int nmembers);
-static bool MultiXactOffsetWouldWrap(MultiXactOffset boundary,
- MultiXactOffset start, uint32 distance);
static bool SetOffsetVacuumLimit(bool is_startup);
static bool find_multixact_start(MultiXactId multi, MultiXactOffset *result);
static void WriteMTruncateXlogRec(Oid oldestMultiDB,
@@ -1142,90 +1028,22 @@ GetNewMultiXactId(int nmembers, MultiXactOffset *offset)
ExtendMultiXactOffset(result);
/*
- * Reserve the members space, similarly to above. Also, be careful not to
- * return zero as the starting offset for any multixact. See
- * GetMultiXactIdMembers() for motivation.
+ * Reserve the members space, similarly to above.
*/
nextOffset = MultiXactState->nextOffset;
- if (nextOffset == 0)
- {
- *offset = 1;
- nmembers++; /* allocate member slot 0 too */
- }
- else
- *offset = nextOffset;
-
- /*----------
- * Protect against overrun of the members space as well, with the
- * following rules:
- *
- * If we're past offsetStopLimit, refuse to generate more multis.
- * If we're close to offsetStopLimit, emit a warning.
- *
- * Arbitrarily, we start emitting warnings when we're 20 segments or less
- * from offsetStopLimit.
- *
- * Note we haven't updated the shared state yet, so if we fail at this
- * point, the multixact ID we grabbed can still be used by the next guy.
- *
- * Note that there is no point in forcing autovacuum runs here: the
- * multixact freeze settings would have to be reduced for that to have any
- * effect.
- *----------
- */
-#define OFFSET_WARN_SEGMENTS 20
- if (MultiXactState->oldestOffsetKnown &&
- MultiXactOffsetWouldWrap(MultiXactState->offsetStopLimit, nextOffset,
- nmembers))
- {
- /* see comment in the corresponding offsets wraparound case */
- SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER);
-
- ereport(ERROR,
- (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
- errmsg("multixact \"members\" limit exceeded"),
- errdetail_plural("This command would create a multixact with %u members, but the remaining space is only enough for %u member.",
- "This command would create a multixact with %u members, but the remaining space is only enough for %u members.",
- MultiXactState->offsetStopLimit - nextOffset - 1,
- nmembers,
- MultiXactState->offsetStopLimit - nextOffset - 1),
- errhint("Execute a database-wide VACUUM in database with OID %u with reduced \"vacuum_multixact_freeze_min_age\" and \"vacuum_multixact_freeze_table_age\" settings.",
- MultiXactState->oldestMultiXactDB)));
- }
/*
- * Check whether we should kick autovacuum into action, to prevent members
- * wraparound. NB we use a much larger window to trigger autovacuum than
- * just the warning limit. The warning is just a measure of last resort -
- * this is in line with GetNewTransactionId's behaviour.
+ * Offsets are 64-bit integers and will never wrap around. Firstly, it
+ * would take an unrealistic amount of time and resources to consume 2^64
+ * offsets. Secondly, multixid creation is WAL-logged, so you would run
+ * out of LSNs before reaching offset wraparound. Nevertheless, check for
+ * wraparound as a sanity check.
*/
- if (!MultiXactState->oldestOffsetKnown ||
- (MultiXactState->nextOffset - MultiXactState->oldestOffset
- > MULTIXACT_MEMBER_SAFE_THRESHOLD))
- {
- /*
- * To avoid swamping the postmaster with signals, we issue the autovac
- * request only when crossing a segment boundary. With default
- * compilation settings that's roughly after 50k members. This still
- * gives plenty of chances before we get into real trouble.
- */
- if ((MXOffsetToMemberPage(nextOffset) / SLRU_PAGES_PER_SEGMENT) !=
- (MXOffsetToMemberPage(nextOffset + nmembers) / SLRU_PAGES_PER_SEGMENT))
- SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER);
- }
-
- if (MultiXactState->oldestOffsetKnown &&
- MultiXactOffsetWouldWrap(MultiXactState->offsetStopLimit,
- nextOffset,
- nmembers + MULTIXACT_MEMBERS_PER_PAGE * SLRU_PAGES_PER_SEGMENT * OFFSET_WARN_SEGMENTS))
- ereport(WARNING,
+ if (nextOffset + nmembers < nextOffset)
+ ereport(ERROR,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
- errmsg_plural("database with OID %u must be vacuumed before %d more multixact member is used",
- "database with OID %u must be vacuumed before %d more multixact members are used",
- MultiXactState->offsetStopLimit - nextOffset + nmembers,
- MultiXactState->oldestMultiXactDB,
- MultiXactState->offsetStopLimit - nextOffset + nmembers),
- errhint("Execute a database-wide VACUUM in that database with reduced \"vacuum_multixact_freeze_min_age\" and \"vacuum_multixact_freeze_table_age\" settings.")));
+ "MultiXact members would wrap around"));
+ *offset = nextOffset;
ExtendMultiXactMember(nextOffset, nmembers);
@@ -1246,8 +1064,7 @@ GetNewMultiXactId(int nmembers, MultiXactOffset *offset)
* the next iteration. But note that nextMXact may be InvalidMultiXactId
* or the first value on a segment-beginning page after this routine
* exits, so anyone else looking at the variable must be prepared to deal
- * with either case. Similarly, nextOffset may be zero, but we won't use
- * that as the actual start offset of the next multixact.
+ * with either case.
*/
(MultiXactState->nextMXact)++;
@@ -1255,7 +1072,8 @@ GetNewMultiXactId(int nmembers, MultiXactOffset *offset)
LWLockRelease(MultiXactGenLock);
- debug_elog4(DEBUG2, "GetNew: returning %u offset %u", result, *offset);
+ debug_elog4(DEBUG2, "GetNew: returning %u offset %" PRIu64, result,
+ *offset);
return result;
}
@@ -1297,7 +1115,6 @@ GetMultiXactIdMembers(MultiXactId multi, MultiXactMember **members,
MultiXactOffset *offptr;
MultiXactOffset offset;
int length;
- int truelength;
MultiXactId oldestMXact;
MultiXactId nextMXact;
MultiXactId tmpMXact;
@@ -1396,16 +1213,7 @@ GetMultiXactIdMembers(MultiXactId multi, MultiXactMember **members,
* we have just for this; the process in charge will signal the CV as soon
* as it has finished writing the multixact offset.
*
- * 3. Because GetNewMultiXactId increments offset zero to offset one to
- * handle case #2, there is an ambiguity near the point of offset
- * wraparound. If we see next multixact's offset is one, is that our
- * multixact's actual endpoint, or did it end at zero with a subsequent
- * increment? We handle this using the knowledge that if the zero'th
- * member slot wasn't filled, it'll contain zero, and zero isn't a valid
- * transaction ID so it can't be a multixact member. Therefore, if we
- * read a zero from the members array, just ignore it.
- *
- * This is all pretty messy, but the mess occurs only in infrequent corner
+ * This is a little messy, but the mess occurs only in infrequent corner
* cases, so it seems better than holding the MultiXactGenLock for a long
* time on every multixact creation.
*/
@@ -1491,6 +1299,9 @@ retry:
LWLockRelease(lock);
lock = NULL;
+ /* A multixid with zero members should not happen */
+ Assert(length > 0);
+
/*
* If we slept above, clean up state; it's no longer needed.
*/
@@ -1499,7 +1310,6 @@ retry:
ptr = (MultiXactMember *) palloc(length * sizeof(MultiXactMember));
- truelength = 0;
prev_pageno = -1;
for (int i = 0; i < length; i++, offset++)
{
@@ -1536,37 +1346,27 @@ retry:
xactptr = (TransactionId *)
(MultiXactMemberCtl->shared->page_buffer[slotno] + memberoff);
-
- if (!TransactionIdIsValid(*xactptr))
- {
- /* Corner case 3: we must be looking at unused slot zero */
- Assert(offset == 0);
- continue;
- }
+ Assert(TransactionIdIsValid(*xactptr));
flagsoff = MXOffsetToFlagsOffset(offset);
bshift = MXOffsetToFlagsBitShift(offset);
flagsptr = (uint32 *) (MultiXactMemberCtl->shared->page_buffer[slotno] + flagsoff);
- ptr[truelength].xid = *xactptr;
- ptr[truelength].status = (*flagsptr >> bshift) & MXACT_MEMBER_XACT_BITMASK;
- truelength++;
+ ptr[i].xid = *xactptr;
+ ptr[i].status = (*flagsptr >> bshift) & MXACT_MEMBER_XACT_BITMASK;
}
LWLockRelease(lock);
- /* A multixid with zero members should not happen */
- Assert(truelength > 0);
-
/*
* Copy the result into the local cache.
*/
- mXactCachePut(multi, truelength, ptr);
+ mXactCachePut(multi, length, ptr);
debug_elog3(DEBUG2, "GetMembers: no cache for %s",
- mxid_to_string(multi, truelength, ptr));
+ mxid_to_string(multi, length, ptr));
*members = ptr;
- return truelength;
+ return length;
}
/*
@@ -1973,7 +1773,7 @@ MultiXactShmemInit(void)
"pg_multixact/members", LWTRANCHE_MULTIXACTMEMBER_BUFFER,
LWTRANCHE_MULTIXACTMEMBER_SLRU,
SYNC_HANDLER_MULTIXACT_MEMBER,
- false);
+ true);
/* doesn't call SimpleLruTruncate() or meet criteria for unit tests */
/* Initialize our shared state struct */
@@ -2150,7 +1950,6 @@ TrimMultiXact(void)
slotno = SimpleLruReadPage(MultiXactOffsetCtl, pageno, true, nextMXact);
offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
offptr += entryno;
-
MemSet(offptr, 0, BLCKSZ - (entryno * sizeof(MultiXactOffset)));
MultiXactOffsetCtl->shared->page_dirty[slotno] = true;
@@ -2223,7 +2022,7 @@ MultiXactGetCheckptMulti(bool is_shutdown,
LWLockRelease(MultiXactGenLock);
debug_elog6(DEBUG2,
- "MultiXact: checkpoint is nextMulti %u, nextOffset %u, oldestMulti %u in DB %u",
+ "MultiXact: checkpoint is nextMulti %u, nextOffset %" PRIu64 ", oldestMulti %u in DB %u",
*nextMulti, *nextMultiOffset, *oldestMulti, *oldestMultiDB);
}
@@ -2258,7 +2057,7 @@ void
MultiXactSetNextMXact(MultiXactId nextMulti,
MultiXactOffset nextMultiOffset)
{
- debug_elog4(DEBUG2, "MultiXact: setting next multi to %u offset %u",
+ debug_elog4(DEBUG2, "MultiXact: setting next multi to %u offset %" PRIu64,
nextMulti, nextMultiOffset);
LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
MultiXactState->nextMXact = nextMulti;
@@ -2449,7 +2248,7 @@ MultiXactAdvanceNextMXact(MultiXactId minMulti,
}
if (MultiXactOffsetPrecedes(MultiXactState->nextOffset, minMultiOffset))
{
- debug_elog3(DEBUG2, "MultiXact: setting next offset to %u",
+ debug_elog3(DEBUG2, "MultiXact: setting next offset to %" PRIU64,
minMultiOffset);
MultiXactState->nextOffset = minMultiOffset;
}
@@ -2551,23 +2350,8 @@ ExtendMultiXactMember(MultiXactOffset offset, int nmembers)
LWLockRelease(lock);
}
- /*
- * Compute the number of items till end of current page. Careful: if
- * addition of unsigned ints wraps around, we're at the last page of
- * the last segment; since that page holds a different number of items
- * than other pages, we need to do it differently.
- */
- if (offset + MAX_MEMBERS_IN_LAST_MEMBERS_PAGE < offset)
- {
- /*
- * This is the last page of the last segment; we can compute the
- * number of items left to allocate in it without modulo
- * arithmetic.
- */
- difference = MaxMultiXactOffset - offset + 1;
- }
- else
- difference = MULTIXACT_MEMBERS_PER_PAGE - offset % MULTIXACT_MEMBERS_PER_PAGE;
+ /* Compute the number of items till end of current page. */
+ difference = MULTIXACT_MEMBERS_PER_PAGE - offset % MULTIXACT_MEMBERS_PER_PAGE;
/*
* Advance to next page, taking care to properly handle the wraparound
@@ -2633,15 +2417,14 @@ GetOldestMultiXactId(void)
}
/*
- * Determine how aggressively we need to vacuum in order to prevent member
- * wraparound.
+ * Determine if we need to vacuum to keep the size of the members SLRU in
+ * check.
*
* To do so determine what's the oldest member offset and install the limit
* info in MultiXactState, where it can be used to prevent overrun of old data
* in the members SLRU area.
*
- * The return value is true if emergency autovacuum is required and false
- * otherwise.
+ * The return value is true if autovacuum is required and false otherwise.
*/
static bool
SetOffsetVacuumLimit(bool is_startup)
@@ -2653,8 +2436,6 @@ SetOffsetVacuumLimit(bool is_startup)
MultiXactOffset nextOffset;
bool oldestOffsetKnown = false;
bool prevOldestOffsetKnown;
- MultiXactOffset offsetStopLimit = 0;
- MultiXactOffset prevOffsetStopLimit;
/*
* NB: Have to prevent concurrent truncation, we might otherwise try to
@@ -2669,7 +2450,6 @@ SetOffsetVacuumLimit(bool is_startup)
nextOffset = MultiXactState->nextOffset;
prevOldestOffsetKnown = MultiXactState->oldestOffsetKnown;
prevOldestOffset = MultiXactState->oldestOffset;
- prevOffsetStopLimit = MultiXactState->offsetStopLimit;
Assert(MultiXactState->finishedStartup);
LWLockRelease(MultiXactGenLock);
@@ -2700,13 +2480,9 @@ SetOffsetVacuumLimit(bool is_startup)
oldestOffsetKnown =
find_multixact_start(oldestMultiXactId, &oldestOffset);
- if (oldestOffsetKnown)
- ereport(DEBUG1,
- (errmsg_internal("oldest MultiXactId member is at offset %u",
- oldestOffset)));
- else
+ if (!oldestOffsetKnown)
ereport(LOG,
- (errmsg("MultiXact member wraparound protections are disabled because oldest checkpointed MultiXact %u does not exist on disk",
+ (errmsg("oldest checkpointed MultiXact %u does not exist on disk",
oldestMultiXactId)));
}
@@ -2716,97 +2492,32 @@ SetOffsetVacuumLimit(bool is_startup)
* If we can, compute limits (and install them MultiXactState) to prevent
* overrun of old data in the members SLRU area. We can only do so if the
* oldest offset is known though.
+ *
+ * FIXME: Is !oldestOffsetKnown possible anymore? At least update the comment:
+ * we won't overrun members anymore.
*/
- if (oldestOffsetKnown)
- {
- /* move back to start of the corresponding segment */
- offsetStopLimit = oldestOffset - (oldestOffset %
- (MULTIXACT_MEMBERS_PER_PAGE * SLRU_PAGES_PER_SEGMENT));
-
- /* always leave one segment before the wraparound point */
- offsetStopLimit -= (MULTIXACT_MEMBERS_PER_PAGE * SLRU_PAGES_PER_SEGMENT);
-
- if (!prevOldestOffsetKnown && !is_startup)
- ereport(LOG,
- (errmsg("MultiXact member wraparound protections are now enabled")));
-
- ereport(DEBUG1,
- (errmsg_internal("MultiXact member stop limit is now %u based on MultiXact %u",
- offsetStopLimit, oldestMultiXactId)));
- }
- else if (prevOldestOffsetKnown)
+ if (prevOldestOffsetKnown)
{
/*
* If we failed to get the oldest offset this time, but we have a
* value from a previous pass through this function, use the old
- * values rather than automatically forcing an emergency autovacuum
- * cycle again.
+ * values rather than automatically forcing an autovacuum cycle again.
*/
oldestOffset = prevOldestOffset;
oldestOffsetKnown = true;
- offsetStopLimit = prevOffsetStopLimit;
}
/* Install the computed values */
LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
MultiXactState->oldestOffset = oldestOffset;
MultiXactState->oldestOffsetKnown = oldestOffsetKnown;
- MultiXactState->offsetStopLimit = offsetStopLimit;
LWLockRelease(MultiXactGenLock);
/*
- * Do we need an emergency autovacuum? If we're not sure, assume yes.
+ * Do we need autovacuum? If we're not sure, assume yes.
*/
return !oldestOffsetKnown ||
- (nextOffset - oldestOffset > MULTIXACT_MEMBER_SAFE_THRESHOLD);
-}
-
-/*
- * Return whether adding "distance" to "start" would move past "boundary".
- *
- * We use this to determine whether the addition is "wrapping around" the
- * boundary point, hence the name. The reason we don't want to use the regular
- * 2^31-modulo arithmetic here is that we want to be able to use the whole of
- * the 2^32-1 space here, allowing for more multixacts than would fit
- * otherwise.
- */
-static bool
-MultiXactOffsetWouldWrap(MultiXactOffset boundary, MultiXactOffset start,
- uint32 distance)
-{
- MultiXactOffset finish;
-
- /*
- * Note that offset number 0 is not used (see GetMultiXactIdMembers), so
- * if the addition wraps around the UINT_MAX boundary, skip that value.
- */
- finish = start + distance;
- if (finish < start)
- finish++;
-
- /*-----------------------------------------------------------------------
- * When the boundary is numerically greater than the starting point, any
- * value numerically between the two is not wrapped:
- *
- * <----S----B---->
- * [---) = F wrapped past B (and UINT_MAX)
- * [---) = F not wrapped
- * [----] = F wrapped past B
- *
- * When the boundary is numerically less than the starting point (i.e. the
- * UINT_MAX wraparound occurs somewhere in between) then all values in
- * between are wrapped:
- *
- * <----B----S---->
- * [---) = F not wrapped past B (but wrapped past UINT_MAX)
- * [---) = F wrapped past B (and UINT_MAX)
- * [----] = F not wrapped
- *-----------------------------------------------------------------------
- */
- if (start < boundary)
- return finish >= boundary || finish < start;
- else
- return finish >= boundary && finish < start;
+ (nextOffset - oldestOffset > MULTIXACT_MEMBER_AUTOVAC_THRESHOLD);
}
/*
@@ -2846,6 +2557,7 @@ find_multixact_start(MultiXactId multi, MultiXactOffset *result)
offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
offptr += entryno;
offset = *offptr;
+
LWLockRelease(SimpleLruGetBankLock(MultiXactOffsetCtl, pageno));
*result = offset;
@@ -2893,73 +2605,6 @@ GetMultiXactInfo(uint32 *multixacts, MultiXactOffset *members,
return true;
}
-/*
- * Multixact members can be removed once the multixacts that refer to them
- * are older than every datminmxid. autovacuum_multixact_freeze_max_age and
- * vacuum_multixact_freeze_table_age work together to make sure we never have
- * too many multixacts; we hope that, at least under normal circumstances,
- * this will also be sufficient to keep us from using too many offsets.
- * However, if the average multixact has many members, we might exhaust the
- * members space while still using few enough members that these limits fail
- * to trigger relminmxid advancement by VACUUM. At that point, we'd have no
- * choice but to start failing multixact-creating operations with an error.
- *
- * To prevent that, if more than a threshold portion of the members space is
- * used, we effectively reduce autovacuum_multixact_freeze_max_age and
- * to a value just less than the number of multixacts in use. We hope that
- * this will quickly trigger autovacuuming on the table or tables with the
- * oldest relminmxid, thus allowing datminmxid values to advance and removing
- * some members.
- *
- * As the fraction of the member space currently in use grows, we become
- * more aggressive in clamping this value. That not only causes autovacuum
- * to ramp up, but also makes any manual vacuums the user issues more
- * aggressive. This happens because vacuum_get_cutoffs() will clamp the
- * freeze table and the minimum freeze age cutoffs based on the effective
- * autovacuum_multixact_freeze_max_age this function returns. In the worst
- * case, we'll claim the freeze_max_age to zero, and every vacuum of any
- * table will freeze every multixact.
- */
-int
-MultiXactMemberFreezeThreshold(void)
-{
- MultiXactOffset members;
- uint32 multixacts;
- uint32 victim_multixacts;
- double fraction;
- int result;
- MultiXactId oldestMultiXactId;
- MultiXactOffset oldestOffset;
-
- /* If we can't determine member space utilization, assume the worst. */
- if (!GetMultiXactInfo(&multixacts, &members, &oldestMultiXactId, &oldestOffset))
- return 0;
-
- /* If member space utilization is low, no special action is required. */
- if (members <= MULTIXACT_MEMBER_SAFE_THRESHOLD)
- return autovacuum_multixact_freeze_max_age;
-
- /*
- * Compute a target for relminmxid advancement. The number of multixacts
- * we try to eliminate from the system is based on how far we are past
- * MULTIXACT_MEMBER_SAFE_THRESHOLD.
- */
- fraction = (double) (members - MULTIXACT_MEMBER_SAFE_THRESHOLD) /
- (MULTIXACT_MEMBER_DANGER_THRESHOLD - MULTIXACT_MEMBER_SAFE_THRESHOLD);
- victim_multixacts = multixacts * fraction;
-
- /* fraction could be > 1.0, but lowest possible freeze age is zero */
- if (victim_multixacts > multixacts)
- return 0;
- result = multixacts - victim_multixacts;
-
- /*
- * Clamp to autovacuum_multixact_freeze_max_age, so that we never make
- * autovacuum less aggressive than it would otherwise be.
- */
- return Min(result, autovacuum_multixact_freeze_max_age);
-}
-
typedef struct mxtruncinfo
{
int64 earliestExistingPage;
@@ -2986,36 +2631,12 @@ SlruScanDirCbFindEarliest(SlruCtl ctl, char *filename, int64 segpage, void *data
/*
* Delete members segments [oldest, newOldest)
- *
- * The members SLRU can, in contrast to the offsets one, be filled to almost
- * the full range at once. This means SimpleLruTruncate() can't trivially be
- * used - instead the to-be-deleted range is computed using the offsets
- * SLRU. C.f. TruncateMultiXact().
*/
static void
PerformMembersTruncation(MultiXactOffset oldestOffset, MultiXactOffset newOldestOffset)
{
- const int64 maxsegment = MXOffsetToMemberSegment(MaxMultiXactOffset);
- int64 startsegment = MXOffsetToMemberSegment(oldestOffset);
- int64 endsegment = MXOffsetToMemberSegment(newOldestOffset);
- int64 segment = startsegment;
-
- /*
- * Delete all the segments but the last one. The last segment can still
- * contain, possibly partially, valid data.
- */
- while (segment != endsegment)
- {
- elog(DEBUG2, "truncating multixact members segment %" PRIx64,
- segment);
- SlruDeleteSegment(MultiXactMemberCtl, segment);
-
- /* move to next segment, handling wraparound correctly */
- if (segment == maxsegment)
- segment = 0;
- else
- segment += 1;
- }
+ SimpleLruTruncate(MultiXactMemberCtl,
+ MXOffsetToMemberPage(newOldestOffset));
}
/*
@@ -3159,7 +2780,7 @@ TruncateMultiXact(MultiXactId newOldestMulti, Oid newOldestMultiDB)
elog(DEBUG1, "performing multixact truncation: "
"offsets [%u, %u), offsets segments [%" PRIx64 ", %" PRIx64 "), "
- "members [%u, %u), members segments [%" PRIx64 ", %" PRIx64 ")",
+ "members [%" PRIu64 ", %" PRIu64 "), members segments [%" PRIx64 ", %" PRIx64 ")",
oldestMulti, newOldestMulti,
MultiXactIdToOffsetSegment(oldestMulti),
MultiXactIdToOffsetSegment(newOldestMulti),
@@ -3239,20 +2860,13 @@ MultiXactOffsetPagePrecedes(int64 page1, int64 page2)
/*
* Decide whether a MultiXactMember page number is "older" for truncation
- * purposes. There is no "invalid offset number" so use the numbers verbatim.
+ * purposes. There is no "invalid offset number" and members never wrap
+ * around, so use the numbers verbatim.
*/
static bool
MultiXactMemberPagePrecedes(int64 page1, int64 page2)
{
- MultiXactOffset offset1;
- MultiXactOffset offset2;
-
- offset1 = ((MultiXactOffset) page1) * MULTIXACT_MEMBERS_PER_PAGE;
- offset2 = ((MultiXactOffset) page2) * MULTIXACT_MEMBERS_PER_PAGE;
-
- return (MultiXactOffsetPrecedes(offset1, offset2) &&
- MultiXactOffsetPrecedes(offset1,
- offset2 + MULTIXACT_MEMBERS_PER_PAGE - 1));
+ return page1 < page2;
}
/*
@@ -3290,7 +2904,7 @@ MultiXactIdPrecedesOrEquals(MultiXactId multi1, MultiXactId multi2)
static bool
MultiXactOffsetPrecedes(MultiXactOffset offset1, MultiXactOffset offset2)
{
- int32 diff = (int32) (offset1 - offset2);
+ int64 diff = (int64) (offset1 - offset2);
return (diff < 0);
}
@@ -3387,7 +3001,7 @@ multixact_redo(XLogReaderState *record)
elog(DEBUG1, "replaying multixact truncation: "
"offsets [%u, %u), offsets segments [%" PRIx64 ", %" PRIx64 "), "
- "members [%u, %u), members segments [%" PRIx64 ", %" PRIx64 ")",
+ "members [%" PRIu64 ", %" PRIu64 "), members segments [%" PRIx64 ", %" PRIx64 ")",
xlrec.startTruncOff, xlrec.endTruncOff,
MultiXactIdToOffsetSegment(xlrec.startTruncOff),
MultiXactIdToOffsetSegment(xlrec.endTruncOff),
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 7c959051e11..2ade0b4a042 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -5128,7 +5128,7 @@ BootStrapXLOG(uint32 data_checksum_version)
FullTransactionIdFromEpochAndXid(0, FirstNormalTransactionId);
checkPoint.nextOid = FirstGenbkiObjectId;
checkPoint.nextMulti = FirstMultiXactId;
- checkPoint.nextMultiOffset = 0;
+ checkPoint.nextMultiOffset = 1;
checkPoint.oldestXid = FirstNormalTransactionId;
checkPoint.oldestXidDB = Template1DbOid;
checkPoint.oldestMulti = FirstMultiXactId;
diff --git a/src/backend/access/transam/xlogrecovery.c b/src/backend/access/transam/xlogrecovery.c
index 550de6e4a59..66c2364aa9b 100644
--- a/src/backend/access/transam/xlogrecovery.c
+++ b/src/backend/access/transam/xlogrecovery.c
@@ -886,7 +886,7 @@ InitWalRecovery(ControlFileData *ControlFile, bool *wasShutdown_ptr,
U64FromFullTransactionId(checkPoint.nextXid),
checkPoint.nextOid)));
ereport(DEBUG1,
- (errmsg_internal("next MultiXactId: %u; next MultiXactOffset: %u",
+ (errmsg_internal("next MultiXactId: %u; next MultiXactOffset: %" PRIu64,
checkPoint.nextMulti, checkPoint.nextMultiOffset)));
ereport(DEBUG1,
(errmsg_internal("oldest unfrozen transaction ID: %u, in database %u",
diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c
index ed03e3bd50d..259ef60bd31 100644
--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -1147,7 +1147,7 @@ vacuum_get_cutoffs(Relation rel, const VacuumParams params,
* normally autovacuum_multixact_freeze_max_age, but may be less if we are
* short of multixact member space.
*/
- effective_multixact_freeze_max_age = MultiXactMemberFreezeThreshold();
+ effective_multixact_freeze_max_age = autovacuum_multixact_freeze_max_age;
/*
* Almost ready to set freeze output parameters; check if OldestXmin or
diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c
index ed19c74bb19..34909ee54ff 100644
--- a/src/backend/postmaster/autovacuum.c
+++ b/src/backend/postmaster/autovacuum.c
@@ -1151,7 +1151,7 @@ do_start_worker(void)
/* Also determine the oldest datminmxid we will consider. */
recentMulti = ReadNextMultiXactId();
- multiForceLimit = recentMulti - MultiXactMemberFreezeThreshold();
+ multiForceLimit = recentMulti - autovacuum_multixact_freeze_max_age;
if (multiForceLimit < FirstMultiXactId)
multiForceLimit -= FirstMultiXactId;
@@ -1939,7 +1939,7 @@ do_autovacuum(void)
* normally autovacuum_multixact_freeze_max_age, but may be less if we are
* short of multixact member space.
*/
- effective_multixact_freeze_max_age = MultiXactMemberFreezeThreshold();
+ effective_multixact_freeze_max_age = autovacuum_multixact_freeze_max_age;
/*
* Find the pg_database entry and select the default freeze ages. We use
diff --git a/src/bin/pg_controldata/pg_controldata.c b/src/bin/pg_controldata/pg_controldata.c
index 10de058ce91..5295108ade3 100644
--- a/src/bin/pg_controldata/pg_controldata.c
+++ b/src/bin/pg_controldata/pg_controldata.c
@@ -264,7 +264,7 @@ main(int argc, char *argv[])
ControlFile->checkPointCopy.nextOid);
printf(_("Latest checkpoint's NextMultiXactId: %u\n"),
ControlFile->checkPointCopy.nextMulti);
- printf(_("Latest checkpoint's NextMultiOffset: %u\n"),
+ printf(_("Latest checkpoint's NextMultiOffset: %" PRIu64 "\n"),
ControlFile->checkPointCopy.nextMultiOffset);
printf(_("Latest checkpoint's oldestXID: %u\n"),
ControlFile->checkPointCopy.oldestXid);
diff --git a/src/bin/pg_resetwal/pg_resetwal.c b/src/bin/pg_resetwal/pg_resetwal.c
index a89d72fc5cf..4e5eeced89d 100644
--- a/src/bin/pg_resetwal/pg_resetwal.c
+++ b/src/bin/pg_resetwal/pg_resetwal.c
@@ -267,7 +267,7 @@ main(int argc, char *argv[])
case 'O':
errno = 0;
- set_mxoff = strtoul(optarg, &endptr, 0);
+ set_mxoff = strtou64(optarg, &endptr, 0);
if (endptr == optarg || *endptr != '\0' || errno != 0)
{
pg_log_error("invalid argument for option %s", "-O");
@@ -743,7 +743,7 @@ PrintControlValues(bool guessed)
ControlFile.checkPointCopy.nextOid);
printf(_("Latest checkpoint's NextMultiXactId: %u\n"),
ControlFile.checkPointCopy.nextMulti);
- printf(_("Latest checkpoint's NextMultiOffset: %u\n"),
+ printf(_("Latest checkpoint's NextMultiOffset: %" PRIu64 "\n"),
ControlFile.checkPointCopy.nextMultiOffset);
printf(_("Latest checkpoint's oldestXID: %u\n"),
ControlFile.checkPointCopy.oldestXid);
@@ -817,7 +817,7 @@ PrintNewControlValues(void)
if (set_mxoff != -1)
{
- printf(_("NextMultiOffset: %u\n"),
+ printf(_("NextMultiOffset: %" PRIu64 "\n"),
ControlFile.checkPointCopy.nextMultiOffset);
}
diff --git a/src/bin/pg_resetwal/t/001_basic.pl b/src/bin/pg_resetwal/t/001_basic.pl
index d6bbbd0ceda..cc89e0764ae 100644
--- a/src/bin/pg_resetwal/t/001_basic.pl
+++ b/src/bin/pg_resetwal/t/001_basic.pl
@@ -213,7 +213,7 @@ push @cmd,
sprintf("%d,%d", hex($files[0]) == 0 ? 3 : hex($files[0]), hex($files[-1]));
@files = get_slru_files('pg_multixact/offsets');
-$mult = 32 * $blcksz / 4;
+$mult = 32 * $blcksz / 8;
# --multixact-ids argument is "new,old"
push @cmd,
'--multixact-ids' => sprintf("%d,%d",
diff --git a/src/include/access/multixact.h b/src/include/access/multixact.h
index 82e4bb90dd5..7d98fe0fe32 100644
--- a/src/include/access/multixact.h
+++ b/src/include/access/multixact.h
@@ -28,8 +28,6 @@
#define MultiXactIdIsValid(multi) ((multi) != InvalidMultiXactId)
-#define MaxMultiXactOffset ((MultiXactOffset) 0xFFFFFFFF)
-
/*
* Possible multixact lock modes ("status"). The first four modes are for
* tuple locks (FOR KEY SHARE, FOR SHARE, FOR NO KEY UPDATE, FOR UPDATE); the
@@ -147,7 +145,6 @@ extern void MultiXactSetNextMXact(MultiXactId nextMulti,
extern void MultiXactAdvanceNextMXact(MultiXactId minMulti,
MultiXactOffset minMultiOffset);
extern void MultiXactAdvanceOldest(MultiXactId oldestMulti, Oid oldestMultiDB);
-extern int MultiXactMemberFreezeThreshold(void);
extern void multixact_twophase_recover(FullTransactionId fxid, uint16 info,
void *recdata, uint32 len);
diff --git a/src/include/access/multixact_internal.h b/src/include/access/multixact_internal.h
new file mode 100644
index 00000000000..73fb3e998fb
--- /dev/null
+++ b/src/include/access/multixact_internal.h
@@ -0,0 +1,109 @@
+/*
+ * multixact_internal.h
+ *
+ * Defines and helper functions for the PostgreSQL multi-transaction-log manager
+ *
+ * Portions Copyright (c) 2025, PostgreSQL Global Development Group
+ *
+ * src/include/access/multixact_internal.h
+ */
+#ifndef MULTIXACT_INTERNAL_H
+#define MULTIXACT_INTERNAL_H
+
+#include "postgres.h"
+
+#include "access/multixact.h"
+
+/*
+ * Defines for MultiXactOffset page sizes. A page is the same BLCKSZ as is
+ * used everywhere else in Postgres.
+ */
+
+/* We need 8 bytes per offset */
+#define MULTIXACT_OFFSETS_PER_PAGE (BLCKSZ / sizeof(MultiXactOffset))
+
+static inline int64
+MultiXactIdToOffsetPage(MultiXactId multi)
+{
+ return multi / MULTIXACT_OFFSETS_PER_PAGE;
+}
+
+static inline int
+MultiXactIdToOffsetEntry(MultiXactId multi)
+{
+ return multi % MULTIXACT_OFFSETS_PER_PAGE;
+}
+
+/*
+ * The situation for members is a bit more complex: we store one byte of
+ * additional flag bits for each TransactionId. To do this without getting
+ * into alignment issues, we store four bytes of flags, and then the
+ * corresponding 4 Xids. Each such 5-word (20-byte) set we call a "group", and
+ * are stored as a whole in pages. Thus, with 8kB BLCKSZ, we keep 409 groups
+ * per page. This wastes 12 bytes per page, but that's OK -- simplicity (and
+ * performance) trumps space efficiency here.
+ *
+ * Note that the "offset" macros work with byte offset, not array indexes, so
+ * arithmetic must be done using "char *" pointers.
+ */
+/* We need eight bits per xact, so one xact fits in a byte */
+#define MXACT_MEMBER_BITS_PER_XACT 8
+#define MXACT_MEMBER_FLAGS_PER_BYTE 1
+#define MXACT_MEMBER_XACT_BITMASK ((1 << MXACT_MEMBER_BITS_PER_XACT) - 1)
+
+/* how many full bytes of flags are there in a group? */
+#define MULTIXACT_FLAGBYTES_PER_GROUP 4
+#define MULTIXACT_MEMBERS_PER_MEMBERGROUP \
+ (MULTIXACT_FLAGBYTES_PER_GROUP * MXACT_MEMBER_FLAGS_PER_BYTE)
+/* size in bytes of a complete group */
+#define MULTIXACT_MEMBERGROUP_SIZE \
+ (sizeof(TransactionId) * MULTIXACT_MEMBERS_PER_MEMBERGROUP + MULTIXACT_FLAGBYTES_PER_GROUP)
+#define MULTIXACT_MEMBERGROUPS_PER_PAGE (BLCKSZ / MULTIXACT_MEMBERGROUP_SIZE)
+#define MULTIXACT_MEMBERS_PER_PAGE \
+ (MULTIXACT_MEMBERGROUPS_PER_PAGE * MULTIXACT_MEMBERS_PER_MEMBERGROUP)
+
+/* page in which a member is to be found */
+static inline int64
+MXOffsetToMemberPage(MultiXactOffset offset)
+{
+ return offset / MULTIXACT_MEMBERS_PER_PAGE;
+}
+
+static inline int64
+MXOffsetToMemberSegment(MultiXactOffset offset)
+{
+ return MXOffsetToMemberPage(offset) / SLRU_PAGES_PER_SEGMENT;
+}
+
+/* Location (byte offset within page) of flag word for a given member */
+static inline int
+MXOffsetToFlagsOffset(MultiXactOffset offset)
+{
+ MultiXactOffset group = offset / MULTIXACT_MEMBERS_PER_MEMBERGROUP;
+ int grouponpg = group % MULTIXACT_MEMBERGROUPS_PER_PAGE;
+ int byteoff = grouponpg * MULTIXACT_MEMBERGROUP_SIZE;
+
+ return byteoff;
+}
+
+static inline int
+MXOffsetToFlagsBitShift(MultiXactOffset offset)
+{
+ int member_in_group = offset % MULTIXACT_MEMBERS_PER_MEMBERGROUP;
+ int bshift = member_in_group * MXACT_MEMBER_BITS_PER_XACT;
+
+ return bshift;
+}
+
+/* Location (byte offset within page) of TransactionId of given member */
+static inline int
+MXOffsetToMemberOffset(MultiXactOffset offset)
+{
+ int member_in_group = offset % MULTIXACT_MEMBERS_PER_MEMBERGROUP;
+
+ return MXOffsetToFlagsOffset(offset) +
+ MULTIXACT_FLAGBYTES_PER_GROUP +
+ member_in_group * sizeof(TransactionId);
+}
+
+#endif /* MULTIXACT_INTERNAL_H */
diff --git a/src/include/access/slru.h b/src/include/access/slru.h
index 8d57753ed01..8576649b15e 100644
--- a/src/include/access/slru.h
+++ b/src/include/access/slru.h
@@ -23,21 +23,6 @@
*/
#define SLRU_MAX_ALLOWED_BUFFERS ((1024 * 1024 * 1024) / BLCKSZ)
-/*
- * Define SLRU segment size. A page is the same BLCKSZ as is used everywhere
- * else in Postgres. The segment size can be chosen somewhat arbitrarily;
- * we make it 32 pages by default, or 256Kb, i.e. 1M transactions for CLOG
- * or 64K transactions for SUBTRANS.
- *
- * Note: because TransactionIds are 32 bits and wrap around at 0xFFFFFFFF,
- * page numbering also wraps around at 0xFFFFFFFF/xxxx_XACTS_PER_PAGE (where
- * xxxx is CLOG or SUBTRANS, respectively), and segment numbering at
- * 0xFFFFFFFF/xxxx_XACTS_PER_PAGE/SLRU_PAGES_PER_SEGMENT. We need
- * take no explicit notice of that fact in slru.c, except when comparing
- * segment and page numbers in SimpleLruTruncate (see PagePrecedes()).
- */
-#define SLRU_PAGES_PER_SEGMENT 32
-
/*
* Page status codes. Note that these do not include the "dirty" bit.
* page_dirty can be true only in the VALID or WRITE_IN_PROGRESS states;
diff --git a/src/include/c.h b/src/include/c.h
index 757dfff4782..bc92a6f4565 100644
--- a/src/include/c.h
+++ b/src/include/c.h
@@ -670,7 +670,7 @@ typedef uint32 SubTransactionId;
/* MultiXactId must be equivalent to TransactionId, to fit in t_xmax */
typedef TransactionId MultiXactId;
-typedef uint32 MultiXactOffset;
+typedef uint64 MultiXactOffset;
typedef uint32 CommandId;
diff --git a/src/include/pg_config_manual.h b/src/include/pg_config_manual.h
index 7e1aa422332..8556ce40cbf 100644
--- a/src/include/pg_config_manual.h
+++ b/src/include/pg_config_manual.h
@@ -356,3 +356,18 @@
* Enable tracing of syncscan operations (see also the trace_syncscan GUC var).
*/
/* #define TRACE_SYNCSCAN */
+
+/*
+ * Define SLRU segment size. A page is the same BLCKSZ as is used everywhere
+ * else in Postgres. The segment size can be chosen somewhat arbitrarily;
+ * we make it 32 pages by default, or 256Kb, i.e. 1M transactions for CLOG
+ * or 64K transactions for SUBTRANS.
+ *
+ * Note: because TransactionIds are 32 bits and wrap around at 0xFFFFFFFF,
+ * page numbering also wraps around at 0xFFFFFFFF/xxxx_XACTS_PER_PAGE (where
+ * xxxx is CLOG or SUBTRANS, respectively), and segment numbering at
+ * 0xFFFFFFFF/xxxx_XACTS_PER_PAGE/SLRU_PAGES_PER_SEGMENT. We need
+ * take no explicit notice of that fact in slru.c, except when comparing
+ * segment and page numbers in SimpleLruTruncate (see PagePrecedes()).
+ */
+#define SLRU_PAGES_PER_SEGMENT 32
--
2.51.0