[PATCH v3 3/3] Periodically emit server logs when fewer than 500M remaining transaction IDs.
Nathan Bossart <nathan@postgresql.org>
From: Nathan Bossart <nathan@postgresql.org>
To:
Date: 2025-12-12T19:13:55Z
Lists: pgsql-hackers
---
src/backend/access/transam/multixact.c | 40 +++++++++++++++++++++++---
src/backend/access/transam/varsup.c | 40 +++++++++++++++++++++++---
src/include/access/transam.h | 5 ++--
3 files changed, 75 insertions(+), 10 deletions(-)
diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c
index 27a0baab8c7..abd89c3a73d 100644
--- a/src/backend/access/transam/multixact.c
+++ b/src/backend/access/transam/multixact.c
@@ -153,6 +153,7 @@ typedef struct MultiXactStateData
/* support for anti-wraparound measures */
MultiXactId multiVacLimit;
+ MultiXactId multiLogLimit;
MultiXactId multiWarnLimit;
MultiXactId multiStopLimit;
MultiXactId multiWrapLimit;
@@ -947,6 +948,7 @@ GetNewMultiXactId(int nmembers, MultiXactOffset *offset)
* If we're past multiVacLimit or the safe threshold for member storage
* space, or we don't know what the safe threshold for member storage is,
* start trying to force autovacuum cycles.
+ * If we're past multiLogLimit, start issuing logs periodically.
* If we're past multiWarnLimit, start issuing warnings.
* If we're past multiStopLimit, refuse to create new MultiXactIds.
*
@@ -962,6 +964,7 @@ GetNewMultiXactId(int nmembers, MultiXactOffset *offset)
* possibility of deadlock while doing get_database_name(). First,
* copy all the shared values we'll need in this path.
*/
+ MultiXactId multiLogLimit = MultiXactState->multiLogLimit;
MultiXactId multiWarnLimit = MultiXactState->multiWarnLimit;
MultiXactId multiStopLimit = MultiXactState->multiStopLimit;
MultiXactId multiWrapLimit = MultiXactState->multiWrapLimit;
@@ -1005,13 +1008,27 @@ GetNewMultiXactId(int nmembers, MultiXactOffset *offset)
if (IsUnderPostmaster && ((result % 65536) == 0 || result == FirstMultiXactId))
SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER);
- if (!MultiXactIdPrecedes(result, multiWarnLimit))
+ if (!MultiXactIdPrecedes(result, multiWarnLimit) ||
+ (!MultiXactIdPrecedes(result, multiLogLimit) &&
+ result % 1000000 == 0))
{
char *oldest_datname = get_database_name(oldest_datoid);
+ int elevel;
+
+ /*
+ * We only send the periodic warnings to the server log in an
+ * attempt to avoid confusion from clients (since the WARNING will
+ * disappear for 1M multis at a time). Once the warning limit is
+ * reached, we emit a proper WARNING every time.
+ */
+ if (!MultiXactIdPrecedes(result, multiWarnLimit))
+ elevel = WARNING;
+ else
+ elevel = LOG_SERVER_ONLY;
/* complain even if that DB has disappeared */
if (oldest_datname)
- ereport(WARNING,
+ ereport(elevel,
(errmsg_plural("database \"%s\" must be vacuumed before %u more MultiXactId is used",
"database \"%s\" must be vacuumed before %u more MultiXactIds are used",
multiWrapLimit - result,
@@ -1022,7 +1039,7 @@ GetNewMultiXactId(int nmembers, MultiXactOffset *offset)
errhint("Execute a database-wide VACUUM in that database.\n"
"You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
else
- ereport(WARNING,
+ ereport(elevel,
(errmsg_plural("database with OID %u must be vacuumed before %u more MultiXactId is used",
"database with OID %u must be vacuumed before %u more MultiXactIds are used",
multiWrapLimit - result,
@@ -2015,6 +2032,7 @@ void
SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid)
{
MultiXactId multiVacLimit;
+ MultiXactId multiLogLimit;
MultiXactId multiWarnLimit;
MultiXactId multiStopLimit;
MultiXactId multiWrapLimit;
@@ -2053,6 +2071,15 @@ SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid)
if (multiWarnLimit < FirstMultiXactId)
multiWarnLimit -= FirstMultiXactId;
+ /*
+ * We'll start complaining every 1M multis when we get within 500M multis
+ * of data loss. The idea is to provide an early warning system that is
+ * less noisy than multiWarnLimit but provides ample time to react.
+ */
+ multiLogLimit = multiWrapLimit - 500000000;
+ if (multiLogLimit < FirstMultiXactId)
+ multiLogLimit -= FirstMultiXactId;
+
/*
* We'll start trying to force autovacuums when oldest_datminmxid gets to
* be more than autovacuum_multixact_freeze_max_age mxids old.
@@ -2070,6 +2097,7 @@ SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid)
MultiXactState->oldestMultiXactId = oldest_datminmxid;
MultiXactState->oldestMultiXactDB = oldest_datoid;
MultiXactState->multiVacLimit = multiVacLimit;
+ MultiXactState->multiLogLimit = multiLogLimit;
MultiXactState->multiWarnLimit = multiWarnLimit;
MultiXactState->multiStopLimit = multiStopLimit;
MultiXactState->multiWrapLimit = multiWrapLimit;
@@ -2112,7 +2140,11 @@ SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid)
if (MultiXactIdPrecedes(multiVacLimit, curMulti) && IsUnderPostmaster)
SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER);
- /* Give an immediate warning if past the wrap warn point */
+ /*
+ * Give an immediate warning if past the wrap warn point. We don't bother
+ * with multiLogLimit here, as it's unlikely to apply. We leave that part
+ * to GetNewMultiXactId() instead.
+ */
if (MultiXactIdPrecedes(multiWarnLimit, curMulti))
{
char *oldest_datname;
diff --git a/src/backend/access/transam/varsup.c b/src/backend/access/transam/varsup.c
index 98aeea96e8a..0f633cc0e14 100644
--- a/src/backend/access/transam/varsup.c
+++ b/src/backend/access/transam/varsup.c
@@ -112,6 +112,7 @@ GetNewTransactionId(bool isSubXact)
* catastrophic data loss due to XID wraparound. The basic rules are:
*
* If we're past xidVacLimit, start trying to force autovacuum cycles.
+ * If we're past xidLogLimit, start issuing logs periodically.
* If we're past xidWarnLimit, start issuing warnings.
* If we're past xidStopLimit, refuse to execute transactions, unless
* we are running in single-user mode (which gives an escape hatch
@@ -129,6 +130,7 @@ GetNewTransactionId(bool isSubXact)
* possibility of deadlock while doing get_database_name(). First,
* copy all the shared values we'll need in this path.
*/
+ TransactionId xidLogLimit = TransamVariables->xidLogLimit;
TransactionId xidWarnLimit = TransamVariables->xidWarnLimit;
TransactionId xidStopLimit = TransamVariables->xidStopLimit;
TransactionId xidWrapLimit = TransamVariables->xidWrapLimit;
@@ -165,13 +167,27 @@ GetNewTransactionId(bool isSubXact)
errhint("Execute a database-wide VACUUM in that database.\n"
"You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
}
- else if (TransactionIdFollowsOrEquals(xid, xidWarnLimit))
+ else if (TransactionIdFollowsOrEquals(xid, xidWarnLimit) ||
+ (TransactionIdFollowsOrEquals(xid, xidLogLimit) &&
+ xid % 1000000 == 0))
{
char *oldest_datname = get_database_name(oldest_datoid);
+ int elevel;
+
+ /*
+ * We only send the periodic warnings to the server log in an
+ * attempt to avoid confusion from clients (since the WARNING will
+ * disappear for 1M transactions at a time). Once the warning
+ * limit is reached, we emit a proper WARNING every time.
+ */
+ if (TransactionIdFollowsOrEquals(xid, xidWarnLimit))
+ elevel = WARNING;
+ else
+ elevel = LOG_SERVER_ONLY;
/* complain even if that DB has disappeared */
if (oldest_datname)
- ereport(WARNING,
+ ereport(elevel,
(errmsg("database \"%s\" must be vacuumed within %u transactions",
oldest_datname,
xidWrapLimit - xid),
@@ -180,7 +196,7 @@ GetNewTransactionId(bool isSubXact)
errhint("To avoid transaction ID assignment failures, execute a database-wide VACUUM in that database.\n"
"You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
else
- ereport(WARNING,
+ ereport(elevel,
(errmsg("database with OID %u must be vacuumed within %u transactions",
oldest_datoid,
xidWrapLimit - xid),
@@ -376,6 +392,7 @@ void
SetTransactionIdLimit(TransactionId oldest_datfrozenxid, Oid oldest_datoid)
{
TransactionId xidVacLimit;
+ TransactionId xidLogLimit;
TransactionId xidWarnLimit;
TransactionId xidStopLimit;
TransactionId xidWrapLimit;
@@ -424,6 +441,16 @@ SetTransactionIdLimit(TransactionId oldest_datfrozenxid, Oid oldest_datoid)
if (xidWarnLimit < FirstNormalTransactionId)
xidWarnLimit -= FirstNormalTransactionId;
+ /*
+ * We'll start complaining every 1M transactions when we get within 500M
+ * transactions of data loss. The idea is to provide an early warning
+ * system that is less noisy than xidWarnLimit but provides ample time to
+ * react.
+ */
+ xidLogLimit = xidWrapLimit - 500000000;
+ if (xidLogLimit < FirstNormalTransactionId)
+ xidLogLimit -= FirstNormalTransactionId;
+
/*
* We'll start trying to force autovacuums when oldest_datfrozenxid gets
* to be more than autovacuum_freeze_max_age transactions old.
@@ -447,6 +474,7 @@ SetTransactionIdLimit(TransactionId oldest_datfrozenxid, Oid oldest_datoid)
LWLockAcquire(XidGenLock, LW_EXCLUSIVE);
TransamVariables->oldestXid = oldest_datfrozenxid;
TransamVariables->xidVacLimit = xidVacLimit;
+ TransamVariables->xidLogLimit = xidLogLimit;
TransamVariables->xidWarnLimit = xidWarnLimit;
TransamVariables->xidStopLimit = xidStopLimit;
TransamVariables->xidWrapLimit = xidWrapLimit;
@@ -470,7 +498,11 @@ SetTransactionIdLimit(TransactionId oldest_datfrozenxid, Oid oldest_datoid)
IsUnderPostmaster && !InRecovery)
SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER);
- /* Give an immediate warning if past the wrap warn point */
+ /*
+ * Give an immediate warning if past the wrap warn point. We don't bother
+ * with xidLogLimit here, as it's unlikely to apply. We leave that part
+ * to GetNewTransactionId() instead.
+ */
if (TransactionIdFollowsOrEquals(curXid, xidWarnLimit) && !InRecovery)
{
char *oldest_datname;
diff --git a/src/include/access/transam.h b/src/include/access/transam.h
index c9e20418275..a1bd4259f86 100644
--- a/src/include/access/transam.h
+++ b/src/include/access/transam.h
@@ -203,8 +203,8 @@ FullTransactionIdAdvance(FullTransactionId *dest)
* LWLocks.
*
* Note: xidWrapLimit and oldestXidDB are not "active" values, but are
- * used just to generate useful messages when xidWarnLimit or xidStopLimit
- * are exceeded.
+ * used just to generate useful messages when xidLogLimit, xidWarnLimit, or
+ * xidStopLimit are exceeded.
*/
typedef struct TransamVariablesData
{
@@ -221,6 +221,7 @@ typedef struct TransamVariablesData
TransactionId oldestXid; /* cluster-wide minimum datfrozenxid */
TransactionId xidVacLimit; /* start forcing autovacuums here */
+ TransactionId xidLogLimit; /* start logging periodically here */
TransactionId xidWarnLimit; /* start complaining here */
TransactionId xidStopLimit; /* refuse to advance nextXid beyond here */
TransactionId xidWrapLimit; /* where the world ends */
--
2.39.5 (Apple Git-154)
--hUJBJnA49L/nki7N--