From 524d031716b61e44d29fed8a1d72b64125eebc1c Mon Sep 17 00:00:00 2001
From: Melanie Plageman <melanieplageman@gmail.com>
Date: Wed, 15 Oct 2025 16:16:58 -0400
Subject: [PATCH v10 7/7] Refactor SyncOneBuffer for bgwriter use only

Since xxx, only bgwriter uses SyncOneBuffer, so we can remove the
skip_recently_used parameter and make that behavior the default.

While we are at it, 5e89985928795f243 introduced the pattern of using a
CAS loop instead of locking the buffer header and then calling
PinBuffer_Locked(). Do that in SyncOneBuffer() so we can avoid taking
the buffer header spinlock in the common case that the buffer is
recently used.

Author: Melanie Plageman <melanieplageman@gmail.com>
Reviewed-by: Chao Li <li.evan.chao@gmail.com>
Discussion: https://postgr.es/m/2FA0BAC7-5413-4ABD-94CA-4398FE77750D%40gmail.com
---
 src/backend/storage/buffer/bufmgr.c | 96 +++++++++++++++++------------
 1 file changed, 56 insertions(+), 40 deletions(-)

diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index bfc07edb40e..378ae71a5ee 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -515,8 +515,7 @@ static void UnpinBuffer(BufferDesc *buf);
 static void UnpinBufferNoOwner(BufferDesc *buf);
 static uint32 CheckpointerMaxBatchSize(void);
 static void BufferSync(int flags);
-static int	SyncOneBuffer(int buf_id, bool skip_recently_used,
-						  WritebackContext *wb_context);
+static int	SyncOneBuffer(int buf_id, WritebackContext *wb_context);
 static void WaitIO(BufferDesc *buf);
 static void AbortBufferIO(Buffer buffer);
 static void shared_buffer_write_error_callback(void *arg);
@@ -4000,8 +3999,7 @@ BgBufferSync(WritebackContext *wb_context)
 	/* Execute the LRU scan */
 	while (num_to_scan > 0 && reusable_buffers < upcoming_alloc_est)
 	{
-		int			sync_state = SyncOneBuffer(next_to_clean, true,
-											   wb_context);
+		int			sync_state = SyncOneBuffer(next_to_clean, wb_context);
 
 		if (++next_to_clean >= NBuffers)
 		{
@@ -4064,8 +4062,8 @@ BgBufferSync(WritebackContext *wb_context)
 /*
  * SyncOneBuffer -- process a single buffer during syncing.
  *
- * If skip_recently_used is true, we don't write currently-pinned buffers, nor
- * buffers marked recently used, as these are not replacement candidates.
+ * We don't write currently-pinned buffers, nor buffers marked recently used,
+ * as these are not replacement candidates.
  *
  * Returns a bitmask containing the following flag bits:
  *	BUF_WRITTEN: we wrote the buffer.
@@ -4076,53 +4074,71 @@ BgBufferSync(WritebackContext *wb_context)
  * after locking it, but we don't care all that much.)
  */
 static int
-SyncOneBuffer(int buf_id, bool skip_recently_used, WritebackContext *wb_context)
+SyncOneBuffer(int buf_id, WritebackContext *wb_context)
 {
 	BufferDesc *bufHdr = GetBufferDescriptor(buf_id);
 	int			result = 0;
+	uint32		old_buf_state;
 	uint32		buf_state;
 	BufferTag	tag;
 
-	/* Make sure we can handle the pin */
-	ReservePrivateRefCountEntry();
-	ResourceOwnerEnlarge(CurrentResourceOwner);
-
 	/*
-	 * Check whether buffer needs writing.
-	 *
-	 * We can make this check without taking the buffer content lock so long
-	 * as we mark pages dirty in access methods *before* logging changes with
-	 * XLogInsert(): if someone marks the buffer dirty just after our check we
-	 * don't worry because our checkpoint.redo points before log record for
-	 * upcoming changes and so we are not required to write such dirty buffer.
+	 * Check whether the buffer can be used and pin it if so. Do this using a
+	 * CAS loop, to avoid having to lock the buffer header.
 	 */
-	buf_state = LockBufHdr(bufHdr);
-
-	if (BUF_STATE_GET_REFCOUNT(buf_state) == 0 &&
-		BUF_STATE_GET_USAGECOUNT(buf_state) == 0)
+	old_buf_state = pg_atomic_read_u32(&bufHdr->state);
+	for (;;)
 	{
+		buf_state = old_buf_state;
+
+		/*
+		 * We can make these checks without taking the buffer content lock so
+		 * long as we mark pages dirty in access methods *before* logging
+		 * changes with XLogInsert(): if someone marks the buffer dirty just
+		 * after our check we don't worry because our checkpoint.redo points
+		 * before log record for upcoming changes and so we are not required
+		 * to write such dirty buffer.
+		 */
+		if (BUF_STATE_GET_REFCOUNT(buf_state) != 0 ||
+			BUF_STATE_GET_USAGECOUNT(buf_state) != 0)
+		{
+			/* Don't write recently-used buffers */
+			return result;
+		}
+
 		result |= BUF_REUSABLE;
-	}
-	else if (skip_recently_used)
-	{
-		/* Caller told us not to write recently-used buffers */
-		UnlockBufHdr(bufHdr);
-		return result;
-	}
 
-	if (!(buf_state & BM_VALID) || !(buf_state & BM_DIRTY))
-	{
-		/* It's clean, so nothing to do */
-		UnlockBufHdr(bufHdr);
-		return result;
+		if (!(buf_state & BM_VALID) || !(buf_state & BM_DIRTY))
+		{
+			/* It's clean, so nothing to do */
+			return result;
+		}
+
+		if (unlikely(buf_state & BM_LOCKED))
+		{
+			old_buf_state = WaitBufHdrUnlocked(bufHdr);
+			continue;
+		}
+
+		/* Make sure we can handle the pin */
+		ReservePrivateRefCountEntry();
+		ResourceOwnerEnlarge(CurrentResourceOwner);
+
+		/* pin the buffer if the CAS succeeds */
+		buf_state += BUF_REFCOUNT_ONE;
+
+		if (pg_atomic_compare_exchange_u32(&bufHdr->state, &old_buf_state,
+										   buf_state))
+		{
+			TrackNewBufferPin(BufferDescriptorGetBuffer(bufHdr));
+			break;
+		}
 	}
 
 	/*
-	 * Pin it, share-lock it, write it.  (FlushBuffer will do nothing if the
-	 * buffer is clean by the time we've locked it.)
+	 * Share lock and write it out (FlushBuffer will do nothing if the buffer
+	 * is clean by the time we've locked it.)
 	 */
-	PinBuffer_Locked(bufHdr);
-
 	FlushUnlockedBuffer(bufHdr, NULL, IOOBJECT_RELATION, IOCONTEXT_NORMAL);
 
 	tag = bufHdr->tag;
@@ -4130,8 +4146,8 @@ SyncOneBuffer(int buf_id, bool skip_recently_used, WritebackContext *wb_context)
 	UnpinBuffer(bufHdr);
 
 	/*
-	 * SyncOneBuffer() is only called by checkpointer and bgwriter, so
-	 * IOContext will always be IOCONTEXT_NORMAL.
+	 * SyncOneBuffer() is only called by bgwriter, so IOContext will always be
+	 * IOCONTEXT_NORMAL.
 	 */
 	ScheduleBufferTagForWriteback(wb_context, IOCONTEXT_NORMAL, &tag);
 
-- 
2.43.0

