[PATCH v3 3/3] Periodically emit server logs when fewer than 500M remaining transaction IDs.

Nathan Bossart <nathan@postgresql.org>

From: Nathan Bossart <nathan@postgresql.org>
To:
Date: 2025-12-12T19:13:55Z
Lists: pgsql-hackers
---
 src/backend/access/transam/multixact.c | 40 +++++++++++++++++++++++---
 src/backend/access/transam/varsup.c    | 40 +++++++++++++++++++++++---
 src/include/access/transam.h           |  5 ++--
 3 files changed, 75 insertions(+), 10 deletions(-)

diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c
index 27a0baab8c7..abd89c3a73d 100644
--- a/src/backend/access/transam/multixact.c
+++ b/src/backend/access/transam/multixact.c
@@ -153,6 +153,7 @@ typedef struct MultiXactStateData
 
 	/* support for anti-wraparound measures */
 	MultiXactId multiVacLimit;
+	MultiXactId multiLogLimit;
 	MultiXactId multiWarnLimit;
 	MultiXactId multiStopLimit;
 	MultiXactId multiWrapLimit;
@@ -947,6 +948,7 @@ GetNewMultiXactId(int nmembers, MultiXactOffset *offset)
 	 * If we're past multiVacLimit or the safe threshold for member storage
 	 * space, or we don't know what the safe threshold for member storage is,
 	 * start trying to force autovacuum cycles.
+	 * If we're past multiLogLimit, start issuing logs periodically.
 	 * If we're past multiWarnLimit, start issuing warnings.
 	 * If we're past multiStopLimit, refuse to create new MultiXactIds.
 	 *
@@ -962,6 +964,7 @@ GetNewMultiXactId(int nmembers, MultiXactOffset *offset)
 		 * possibility of deadlock while doing get_database_name(). First,
 		 * copy all the shared values we'll need in this path.
 		 */
+		MultiXactId multiLogLimit = MultiXactState->multiLogLimit;
 		MultiXactId multiWarnLimit = MultiXactState->multiWarnLimit;
 		MultiXactId multiStopLimit = MultiXactState->multiStopLimit;
 		MultiXactId multiWrapLimit = MultiXactState->multiWrapLimit;
@@ -1005,13 +1008,27 @@ GetNewMultiXactId(int nmembers, MultiXactOffset *offset)
 		if (IsUnderPostmaster && ((result % 65536) == 0 || result == FirstMultiXactId))
 			SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER);
 
-		if (!MultiXactIdPrecedes(result, multiWarnLimit))
+		if (!MultiXactIdPrecedes(result, multiWarnLimit) ||
+			(!MultiXactIdPrecedes(result, multiLogLimit) &&
+			 result % 1000000 == 0))
 		{
 			char	   *oldest_datname = get_database_name(oldest_datoid);
+			int			elevel;
+
+			/*
+			 * We only send the periodic warnings to the server log in an
+			 * attempt to avoid confusion from clients (since the WARNING will
+			 * disappear for 1M multis at a time).  Once the warning limit is
+			 * reached, we emit a proper WARNING every time.
+			 */
+			if (!MultiXactIdPrecedes(result, multiWarnLimit))
+				elevel = WARNING;
+			else
+				elevel = LOG_SERVER_ONLY;
 
 			/* complain even if that DB has disappeared */
 			if (oldest_datname)
-				ereport(WARNING,
+				ereport(elevel,
 						(errmsg_plural("database \"%s\" must be vacuumed before %u more MultiXactId is used",
 									   "database \"%s\" must be vacuumed before %u more MultiXactIds are used",
 									   multiWrapLimit - result,
@@ -1022,7 +1039,7 @@ GetNewMultiXactId(int nmembers, MultiXactOffset *offset)
 						 errhint("Execute a database-wide VACUUM in that database.\n"
 								 "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
 			else
-				ereport(WARNING,
+				ereport(elevel,
 						(errmsg_plural("database with OID %u must be vacuumed before %u more MultiXactId is used",
 									   "database with OID %u must be vacuumed before %u more MultiXactIds are used",
 									   multiWrapLimit - result,
@@ -2015,6 +2032,7 @@ void
 SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid)
 {
 	MultiXactId multiVacLimit;
+	MultiXactId multiLogLimit;
 	MultiXactId multiWarnLimit;
 	MultiXactId multiStopLimit;
 	MultiXactId multiWrapLimit;
@@ -2053,6 +2071,15 @@ SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid)
 	if (multiWarnLimit < FirstMultiXactId)
 		multiWarnLimit -= FirstMultiXactId;
 
+	/*
+	 * We'll start complaining every 1M multis when we get within 500M multis
+	 * of data loss.  The idea is to provide an early warning system that is
+	 * less noisy than multiWarnLimit but provides ample time to react.
+	 */
+	multiLogLimit = multiWrapLimit - 500000000;
+	if (multiLogLimit < FirstMultiXactId)
+		multiLogLimit -= FirstMultiXactId;
+
 	/*
 	 * We'll start trying to force autovacuums when oldest_datminmxid gets to
 	 * be more than autovacuum_multixact_freeze_max_age mxids old.
@@ -2070,6 +2097,7 @@ SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid)
 	MultiXactState->oldestMultiXactId = oldest_datminmxid;
 	MultiXactState->oldestMultiXactDB = oldest_datoid;
 	MultiXactState->multiVacLimit = multiVacLimit;
+	MultiXactState->multiLogLimit = multiLogLimit;
 	MultiXactState->multiWarnLimit = multiWarnLimit;
 	MultiXactState->multiStopLimit = multiStopLimit;
 	MultiXactState->multiWrapLimit = multiWrapLimit;
@@ -2112,7 +2140,11 @@ SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid)
 	if (MultiXactIdPrecedes(multiVacLimit, curMulti) && IsUnderPostmaster)
 		SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER);
 
-	/* Give an immediate warning if past the wrap warn point */
+	/*
+	 * Give an immediate warning if past the wrap warn point.  We don't bother
+	 * with multiLogLimit here, as it's unlikely to apply.  We leave that part
+	 * to GetNewMultiXactId() instead.
+	 */
 	if (MultiXactIdPrecedes(multiWarnLimit, curMulti))
 	{
 		char	   *oldest_datname;
diff --git a/src/backend/access/transam/varsup.c b/src/backend/access/transam/varsup.c
index 98aeea96e8a..0f633cc0e14 100644
--- a/src/backend/access/transam/varsup.c
+++ b/src/backend/access/transam/varsup.c
@@ -112,6 +112,7 @@ GetNewTransactionId(bool isSubXact)
 	 * catastrophic data loss due to XID wraparound.  The basic rules are:
 	 *
 	 * If we're past xidVacLimit, start trying to force autovacuum cycles.
+	 * If we're past xidLogLimit, start issuing logs periodically.
 	 * If we're past xidWarnLimit, start issuing warnings.
 	 * If we're past xidStopLimit, refuse to execute transactions, unless
 	 * we are running in single-user mode (which gives an escape hatch
@@ -129,6 +130,7 @@ GetNewTransactionId(bool isSubXact)
 		 * possibility of deadlock while doing get_database_name(). First,
 		 * copy all the shared values we'll need in this path.
 		 */
+		TransactionId xidLogLimit = TransamVariables->xidLogLimit;
 		TransactionId xidWarnLimit = TransamVariables->xidWarnLimit;
 		TransactionId xidStopLimit = TransamVariables->xidStopLimit;
 		TransactionId xidWrapLimit = TransamVariables->xidWrapLimit;
@@ -165,13 +167,27 @@ GetNewTransactionId(bool isSubXact)
 						 errhint("Execute a database-wide VACUUM in that database.\n"
 								 "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
 		}
-		else if (TransactionIdFollowsOrEquals(xid, xidWarnLimit))
+		else if (TransactionIdFollowsOrEquals(xid, xidWarnLimit) ||
+				 (TransactionIdFollowsOrEquals(xid, xidLogLimit) &&
+				  xid % 1000000 == 0))
 		{
 			char	   *oldest_datname = get_database_name(oldest_datoid);
+			int			elevel;
+
+			/*
+			 * We only send the periodic warnings to the server log in an
+			 * attempt to avoid confusion from clients (since the WARNING will
+			 * disappear for 1M transactions at a time).  Once the warning
+			 * limit is reached, we emit a proper WARNING every time.
+			 */
+			if (TransactionIdFollowsOrEquals(xid, xidWarnLimit))
+				elevel = WARNING;
+			else
+				elevel = LOG_SERVER_ONLY;
 
 			/* complain even if that DB has disappeared */
 			if (oldest_datname)
-				ereport(WARNING,
+				ereport(elevel,
 						(errmsg("database \"%s\" must be vacuumed within %u transactions",
 								oldest_datname,
 								xidWrapLimit - xid),
@@ -180,7 +196,7 @@ GetNewTransactionId(bool isSubXact)
 						 errhint("To avoid transaction ID assignment failures, execute a database-wide VACUUM in that database.\n"
 								 "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
 			else
-				ereport(WARNING,
+				ereport(elevel,
 						(errmsg("database with OID %u must be vacuumed within %u transactions",
 								oldest_datoid,
 								xidWrapLimit - xid),
@@ -376,6 +392,7 @@ void
 SetTransactionIdLimit(TransactionId oldest_datfrozenxid, Oid oldest_datoid)
 {
 	TransactionId xidVacLimit;
+	TransactionId xidLogLimit;
 	TransactionId xidWarnLimit;
 	TransactionId xidStopLimit;
 	TransactionId xidWrapLimit;
@@ -424,6 +441,16 @@ SetTransactionIdLimit(TransactionId oldest_datfrozenxid, Oid oldest_datoid)
 	if (xidWarnLimit < FirstNormalTransactionId)
 		xidWarnLimit -= FirstNormalTransactionId;
 
+	/*
+	 * We'll start complaining every 1M transactions when we get within 500M
+	 * transactions of data loss.  The idea is to provide an early warning
+	 * system that is less noisy than xidWarnLimit but provides ample time to
+	 * react.
+	 */
+	xidLogLimit = xidWrapLimit - 500000000;
+	if (xidLogLimit < FirstNormalTransactionId)
+		xidLogLimit -= FirstNormalTransactionId;
+
 	/*
 	 * We'll start trying to force autovacuums when oldest_datfrozenxid gets
 	 * to be more than autovacuum_freeze_max_age transactions old.
@@ -447,6 +474,7 @@ SetTransactionIdLimit(TransactionId oldest_datfrozenxid, Oid oldest_datoid)
 	LWLockAcquire(XidGenLock, LW_EXCLUSIVE);
 	TransamVariables->oldestXid = oldest_datfrozenxid;
 	TransamVariables->xidVacLimit = xidVacLimit;
+	TransamVariables->xidLogLimit = xidLogLimit;
 	TransamVariables->xidWarnLimit = xidWarnLimit;
 	TransamVariables->xidStopLimit = xidStopLimit;
 	TransamVariables->xidWrapLimit = xidWrapLimit;
@@ -470,7 +498,11 @@ SetTransactionIdLimit(TransactionId oldest_datfrozenxid, Oid oldest_datoid)
 		IsUnderPostmaster && !InRecovery)
 		SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER);
 
-	/* Give an immediate warning if past the wrap warn point */
+	/*
+	 * Give an immediate warning if past the wrap warn point.  We don't bother
+	 * with xidLogLimit here, as it's unlikely to apply.  We leave that part
+	 * to GetNewTransactionId() instead.
+	 */
 	if (TransactionIdFollowsOrEquals(curXid, xidWarnLimit) && !InRecovery)
 	{
 		char	   *oldest_datname;
diff --git a/src/include/access/transam.h b/src/include/access/transam.h
index c9e20418275..a1bd4259f86 100644
--- a/src/include/access/transam.h
+++ b/src/include/access/transam.h
@@ -203,8 +203,8 @@ FullTransactionIdAdvance(FullTransactionId *dest)
  * LWLocks.
  *
  * Note: xidWrapLimit and oldestXidDB are not "active" values, but are
- * used just to generate useful messages when xidWarnLimit or xidStopLimit
- * are exceeded.
+ * used just to generate useful messages when xidLogLimit, xidWarnLimit, or
+ * xidStopLimit are exceeded.
  */
 typedef struct TransamVariablesData
 {
@@ -221,6 +221,7 @@ typedef struct TransamVariablesData
 
 	TransactionId oldestXid;	/* cluster-wide minimum datfrozenxid */
 	TransactionId xidVacLimit;	/* start forcing autovacuums here */
+	TransactionId xidLogLimit;	/* start logging periodically here */
 	TransactionId xidWarnLimit; /* start complaining here */
 	TransactionId xidStopLimit; /* refuse to advance nextXid beyond here */
 	TransactionId xidWrapLimit; /* where the world ends */
-- 
2.39.5 (Apple Git-154)


--hUJBJnA49L/nki7N--