Thread

  1. [PATCH v3 3/3] Periodically emit server logs when fewer than 500M remaining transaction IDs.

    Nathan Bossart <nathan@postgresql.org> — 2025-12-12T19:13:55Z

    ---
     src/backend/access/transam/multixact.c | 40 +++++++++++++++++++++++---
     src/backend/access/transam/varsup.c    | 40 +++++++++++++++++++++++---
     src/include/access/transam.h           |  5 ++--
     3 files changed, 75 insertions(+), 10 deletions(-)
    
    diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c
    index 27a0baab8c7..abd89c3a73d 100644
    --- a/src/backend/access/transam/multixact.c
    +++ b/src/backend/access/transam/multixact.c
    @@ -153,6 +153,7 @@ typedef struct MultiXactStateData
     
     	/* support for anti-wraparound measures */
     	MultiXactId multiVacLimit;
    +	MultiXactId multiLogLimit;
     	MultiXactId multiWarnLimit;
     	MultiXactId multiStopLimit;
     	MultiXactId multiWrapLimit;
    @@ -947,6 +948,7 @@ GetNewMultiXactId(int nmembers, MultiXactOffset *offset)
     	 * If we're past multiVacLimit or the safe threshold for member storage
     	 * space, or we don't know what the safe threshold for member storage is,
     	 * start trying to force autovacuum cycles.
    +	 * If we're past multiLogLimit, start issuing logs periodically.
     	 * If we're past multiWarnLimit, start issuing warnings.
     	 * If we're past multiStopLimit, refuse to create new MultiXactIds.
     	 *
    @@ -962,6 +964,7 @@ GetNewMultiXactId(int nmembers, MultiXactOffset *offset)
     		 * possibility of deadlock while doing get_database_name(). First,
     		 * copy all the shared values we'll need in this path.
     		 */
    +		MultiXactId multiLogLimit = MultiXactState->multiLogLimit;
     		MultiXactId multiWarnLimit = MultiXactState->multiWarnLimit;
     		MultiXactId multiStopLimit = MultiXactState->multiStopLimit;
     		MultiXactId multiWrapLimit = MultiXactState->multiWrapLimit;
    @@ -1005,13 +1008,27 @@ GetNewMultiXactId(int nmembers, MultiXactOffset *offset)
     		if (IsUnderPostmaster && ((result % 65536) == 0 || result == FirstMultiXactId))
     			SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER);
     
    -		if (!MultiXactIdPrecedes(result, multiWarnLimit))
    +		if (!MultiXactIdPrecedes(result, multiWarnLimit) ||
    +			(!MultiXactIdPrecedes(result, multiLogLimit) &&
    +			 result % 1000000 == 0))
     		{
     			char	   *oldest_datname = get_database_name(oldest_datoid);
    +			int			elevel;
    +
    +			/*
    +			 * We only send the periodic warnings to the server log in an
    +			 * attempt to avoid confusion from clients (since the WARNING will
    +			 * disappear for 1M multis at a time).  Once the warning limit is
    +			 * reached, we emit a proper WARNING every time.
    +			 */
    +			if (!MultiXactIdPrecedes(result, multiWarnLimit))
    +				elevel = WARNING;
    +			else
    +				elevel = LOG_SERVER_ONLY;
     
     			/* complain even if that DB has disappeared */
     			if (oldest_datname)
    -				ereport(WARNING,
    +				ereport(elevel,
     						(errmsg_plural("database \"%s\" must be vacuumed before %u more MultiXactId is used",
     									   "database \"%s\" must be vacuumed before %u more MultiXactIds are used",
     									   multiWrapLimit - result,
    @@ -1022,7 +1039,7 @@ GetNewMultiXactId(int nmembers, MultiXactOffset *offset)
     						 errhint("Execute a database-wide VACUUM in that database.\n"
     								 "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
     			else
    -				ereport(WARNING,
    +				ereport(elevel,
     						(errmsg_plural("database with OID %u must be vacuumed before %u more MultiXactId is used",
     									   "database with OID %u must be vacuumed before %u more MultiXactIds are used",
     									   multiWrapLimit - result,
    @@ -2015,6 +2032,7 @@ void
     SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid)
     {
     	MultiXactId multiVacLimit;
    +	MultiXactId multiLogLimit;
     	MultiXactId multiWarnLimit;
     	MultiXactId multiStopLimit;
     	MultiXactId multiWrapLimit;
    @@ -2053,6 +2071,15 @@ SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid)
     	if (multiWarnLimit < FirstMultiXactId)
     		multiWarnLimit -= FirstMultiXactId;
     
    +	/*
    +	 * We'll start complaining every 1M multis when we get within 500M multis
    +	 * of data loss.  The idea is to provide an early warning system that is
    +	 * less noisy than multiWarnLimit but provides ample time to react.
    +	 */
    +	multiLogLimit = multiWrapLimit - 500000000;
    +	if (multiLogLimit < FirstMultiXactId)
    +		multiLogLimit -= FirstMultiXactId;
    +
     	/*
     	 * We'll start trying to force autovacuums when oldest_datminmxid gets to
     	 * be more than autovacuum_multixact_freeze_max_age mxids old.
    @@ -2070,6 +2097,7 @@ SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid)
     	MultiXactState->oldestMultiXactId = oldest_datminmxid;
     	MultiXactState->oldestMultiXactDB = oldest_datoid;
     	MultiXactState->multiVacLimit = multiVacLimit;
    +	MultiXactState->multiLogLimit = multiLogLimit;
     	MultiXactState->multiWarnLimit = multiWarnLimit;
     	MultiXactState->multiStopLimit = multiStopLimit;
     	MultiXactState->multiWrapLimit = multiWrapLimit;
    @@ -2112,7 +2140,11 @@ SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid)
     	if (MultiXactIdPrecedes(multiVacLimit, curMulti) && IsUnderPostmaster)
     		SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER);
     
    -	/* Give an immediate warning if past the wrap warn point */
    +	/*
    +	 * Give an immediate warning if past the wrap warn point.  We don't bother
    +	 * with multiLogLimit here, as it's unlikely to apply.  We leave that part
    +	 * to GetNewMultiXactId() instead.
    +	 */
     	if (MultiXactIdPrecedes(multiWarnLimit, curMulti))
     	{
     		char	   *oldest_datname;
    diff --git a/src/backend/access/transam/varsup.c b/src/backend/access/transam/varsup.c
    index 98aeea96e8a..0f633cc0e14 100644
    --- a/src/backend/access/transam/varsup.c
    +++ b/src/backend/access/transam/varsup.c
    @@ -112,6 +112,7 @@ GetNewTransactionId(bool isSubXact)
     	 * catastrophic data loss due to XID wraparound.  The basic rules are:
     	 *
     	 * If we're past xidVacLimit, start trying to force autovacuum cycles.
    +	 * If we're past xidLogLimit, start issuing logs periodically.
     	 * If we're past xidWarnLimit, start issuing warnings.
     	 * If we're past xidStopLimit, refuse to execute transactions, unless
     	 * we are running in single-user mode (which gives an escape hatch
    @@ -129,6 +130,7 @@ GetNewTransactionId(bool isSubXact)
     		 * possibility of deadlock while doing get_database_name(). First,
     		 * copy all the shared values we'll need in this path.
     		 */
    +		TransactionId xidLogLimit = TransamVariables->xidLogLimit;
     		TransactionId xidWarnLimit = TransamVariables->xidWarnLimit;
     		TransactionId xidStopLimit = TransamVariables->xidStopLimit;
     		TransactionId xidWrapLimit = TransamVariables->xidWrapLimit;
    @@ -165,13 +167,27 @@ GetNewTransactionId(bool isSubXact)
     						 errhint("Execute a database-wide VACUUM in that database.\n"
     								 "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
     		}
    -		else if (TransactionIdFollowsOrEquals(xid, xidWarnLimit))
    +		else if (TransactionIdFollowsOrEquals(xid, xidWarnLimit) ||
    +				 (TransactionIdFollowsOrEquals(xid, xidLogLimit) &&
    +				  xid % 1000000 == 0))
     		{
     			char	   *oldest_datname = get_database_name(oldest_datoid);
    +			int			elevel;
    +
    +			/*
    +			 * We only send the periodic warnings to the server log in an
    +			 * attempt to avoid confusion from clients (since the WARNING will
    +			 * disappear for 1M transactions at a time).  Once the warning
    +			 * limit is reached, we emit a proper WARNING every time.
    +			 */
    +			if (TransactionIdFollowsOrEquals(xid, xidWarnLimit))
    +				elevel = WARNING;
    +			else
    +				elevel = LOG_SERVER_ONLY;
     
     			/* complain even if that DB has disappeared */
     			if (oldest_datname)
    -				ereport(WARNING,
    +				ereport(elevel,
     						(errmsg("database \"%s\" must be vacuumed within %u transactions",
     								oldest_datname,
     								xidWrapLimit - xid),
    @@ -180,7 +196,7 @@ GetNewTransactionId(bool isSubXact)
     						 errhint("To avoid transaction ID assignment failures, execute a database-wide VACUUM in that database.\n"
     								 "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
     			else
    -				ereport(WARNING,
    +				ereport(elevel,
     						(errmsg("database with OID %u must be vacuumed within %u transactions",
     								oldest_datoid,
     								xidWrapLimit - xid),
    @@ -376,6 +392,7 @@ void
     SetTransactionIdLimit(TransactionId oldest_datfrozenxid, Oid oldest_datoid)
     {
     	TransactionId xidVacLimit;
    +	TransactionId xidLogLimit;
     	TransactionId xidWarnLimit;
     	TransactionId xidStopLimit;
     	TransactionId xidWrapLimit;
    @@ -424,6 +441,16 @@ SetTransactionIdLimit(TransactionId oldest_datfrozenxid, Oid oldest_datoid)
     	if (xidWarnLimit < FirstNormalTransactionId)
     		xidWarnLimit -= FirstNormalTransactionId;
     
    +	/*
    +	 * We'll start complaining every 1M transactions when we get within 500M
    +	 * transactions of data loss.  The idea is to provide an early warning
    +	 * system that is less noisy than xidWarnLimit but provides ample time to
    +	 * react.
    +	 */
    +	xidLogLimit = xidWrapLimit - 500000000;
    +	if (xidLogLimit < FirstNormalTransactionId)
    +		xidLogLimit -= FirstNormalTransactionId;
    +
     	/*
     	 * We'll start trying to force autovacuums when oldest_datfrozenxid gets
     	 * to be more than autovacuum_freeze_max_age transactions old.
    @@ -447,6 +474,7 @@ SetTransactionIdLimit(TransactionId oldest_datfrozenxid, Oid oldest_datoid)
     	LWLockAcquire(XidGenLock, LW_EXCLUSIVE);
     	TransamVariables->oldestXid = oldest_datfrozenxid;
     	TransamVariables->xidVacLimit = xidVacLimit;
    +	TransamVariables->xidLogLimit = xidLogLimit;
     	TransamVariables->xidWarnLimit = xidWarnLimit;
     	TransamVariables->xidStopLimit = xidStopLimit;
     	TransamVariables->xidWrapLimit = xidWrapLimit;
    @@ -470,7 +498,11 @@ SetTransactionIdLimit(TransactionId oldest_datfrozenxid, Oid oldest_datoid)
     		IsUnderPostmaster && !InRecovery)
     		SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER);
     
    -	/* Give an immediate warning if past the wrap warn point */
    +	/*
    +	 * Give an immediate warning if past the wrap warn point.  We don't bother
    +	 * with xidLogLimit here, as it's unlikely to apply.  We leave that part
    +	 * to GetNewTransactionId() instead.
    +	 */
     	if (TransactionIdFollowsOrEquals(curXid, xidWarnLimit) && !InRecovery)
     	{
     		char	   *oldest_datname;
    diff --git a/src/include/access/transam.h b/src/include/access/transam.h
    index c9e20418275..a1bd4259f86 100644
    --- a/src/include/access/transam.h
    +++ b/src/include/access/transam.h
    @@ -203,8 +203,8 @@ FullTransactionIdAdvance(FullTransactionId *dest)
      * LWLocks.
      *
      * Note: xidWrapLimit and oldestXidDB are not "active" values, but are
    - * used just to generate useful messages when xidWarnLimit or xidStopLimit
    - * are exceeded.
    + * used just to generate useful messages when xidLogLimit, xidWarnLimit, or
    + * xidStopLimit are exceeded.
      */
     typedef struct TransamVariablesData
     {
    @@ -221,6 +221,7 @@ typedef struct TransamVariablesData
     
     	TransactionId oldestXid;	/* cluster-wide minimum datfrozenxid */
     	TransactionId xidVacLimit;	/* start forcing autovacuums here */
    +	TransactionId xidLogLimit;	/* start logging periodically here */
     	TransactionId xidWarnLimit; /* start complaining here */
     	TransactionId xidStopLimit; /* refuse to advance nextXid beyond here */
     	TransactionId xidWrapLimit; /* where the world ends */
    -- 
    2.39.5 (Apple Git-154)
    
    
    --hUJBJnA49L/nki7N--