v11-0007-Refactor-SyncOneBuffer-for-bgwriter-use-only.patch
text/x-patch
Filename: v11-0007-Refactor-SyncOneBuffer-for-bgwriter-use-only.patch
Type: text/x-patch
Part: 6
Message:
Re: Checkpointer write combining
Patch
Same data as JSON:
GET /api/v1/attachments/:id/patch
the parsed metadata as JSON — format, series position, per-file stats; never the diff bytes.
API reference →
Format: format-patch
Series: patch v11-0007
Subject: Refactor SyncOneBuffer for bgwriter use only
| File | + | − |
|---|---|---|
| src/backend/storage/buffer/bufmgr.c | 56 | 40 |
From c614f3edcc135cad62bec92939469970389e752f Mon Sep 17 00:00:00 2001
From: Melanie Plageman <melanieplageman@gmail.com>
Date: Wed, 15 Oct 2025 16:16:58 -0400
Subject: [PATCH v11 7/7] Refactor SyncOneBuffer for bgwriter use only
Since xxx, only bgwriter uses SyncOneBuffer, so we can remove the
skip_recently_used parameter and make that behavior the default.
While we are at it, 5e89985928795f243 introduced the pattern of using a
CAS loop instead of locking the buffer header and then calling
PinBuffer_Locked(). Do that in SyncOneBuffer() so we can avoid taking
the buffer header spinlock in the common case that the buffer is
recently used.
Author: Melanie Plageman <melanieplageman@gmail.com>
Reviewed-by: Chao Li <li.evan.chao@gmail.com>
Discussion: https://postgr.es/m/2FA0BAC7-5413-4ABD-94CA-4398FE77750D%40gmail.com
---
src/backend/storage/buffer/bufmgr.c | 96 +++++++++++++++++------------
1 file changed, 56 insertions(+), 40 deletions(-)
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index 8cc2fc06646..73f2594207f 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -515,8 +515,7 @@ static void UnpinBuffer(BufferDesc *buf);
static void UnpinBufferNoOwner(BufferDesc *buf);
static uint32 CheckpointerMaxBatchSize(void);
static void BufferSync(int flags);
-static int SyncOneBuffer(int buf_id, bool skip_recently_used,
- WritebackContext *wb_context);
+static int SyncOneBuffer(int buf_id, WritebackContext *wb_context);
static void WaitIO(BufferDesc *buf);
static void AbortBufferIO(Buffer buffer);
static void shared_buffer_write_error_callback(void *arg);
@@ -4000,8 +3999,7 @@ BgBufferSync(WritebackContext *wb_context)
/* Execute the LRU scan */
while (num_to_scan > 0 && reusable_buffers < upcoming_alloc_est)
{
- int sync_state = SyncOneBuffer(next_to_clean, true,
- wb_context);
+ int sync_state = SyncOneBuffer(next_to_clean, wb_context);
if (++next_to_clean >= NBuffers)
{
@@ -4064,8 +4062,8 @@ BgBufferSync(WritebackContext *wb_context)
/*
* SyncOneBuffer -- process a single buffer during syncing.
*
- * If skip_recently_used is true, we don't write currently-pinned buffers, nor
- * buffers marked recently used, as these are not replacement candidates.
+ * We don't write currently-pinned buffers, nor buffers marked recently used,
+ * as these are not replacement candidates.
*
* Returns a bitmask containing the following flag bits:
* BUF_WRITTEN: we wrote the buffer.
@@ -4076,53 +4074,71 @@ BgBufferSync(WritebackContext *wb_context)
* after locking it, but we don't care all that much.)
*/
static int
-SyncOneBuffer(int buf_id, bool skip_recently_used, WritebackContext *wb_context)
+SyncOneBuffer(int buf_id, WritebackContext *wb_context)
{
BufferDesc *bufHdr = GetBufferDescriptor(buf_id);
int result = 0;
+ uint32 old_buf_state;
uint32 buf_state;
BufferTag tag;
- /* Make sure we can handle the pin */
- ReservePrivateRefCountEntry();
- ResourceOwnerEnlarge(CurrentResourceOwner);
-
/*
- * Check whether buffer needs writing.
- *
- * We can make this check without taking the buffer content lock so long
- * as we mark pages dirty in access methods *before* logging changes with
- * XLogInsert(): if someone marks the buffer dirty just after our check we
- * don't worry because our checkpoint.redo points before log record for
- * upcoming changes and so we are not required to write such dirty buffer.
+ * Check whether the buffer can be used and pin it if so. Do this using a
+ * CAS loop, to avoid having to lock the buffer header.
*/
- buf_state = LockBufHdr(bufHdr);
-
- if (BUF_STATE_GET_REFCOUNT(buf_state) == 0 &&
- BUF_STATE_GET_USAGECOUNT(buf_state) == 0)
+ old_buf_state = pg_atomic_read_u32(&bufHdr->state);
+ for (;;)
{
+ buf_state = old_buf_state;
+
+ /*
+ * We can make these checks without taking the buffer content lock so
+ * long as we mark pages dirty in access methods *before* logging
+ * changes with XLogInsert(): if someone marks the buffer dirty just
+ * after our check we don't worry because our checkpoint.redo points
+ * before log record for upcoming changes and so we are not required
+ * to write such dirty buffer.
+ */
+ if (BUF_STATE_GET_REFCOUNT(buf_state) != 0 ||
+ BUF_STATE_GET_USAGECOUNT(buf_state) != 0)
+ {
+ /* Don't write recently-used buffers */
+ return result;
+ }
+
result |= BUF_REUSABLE;
- }
- else if (skip_recently_used)
- {
- /* Caller told us not to write recently-used buffers */
- UnlockBufHdr(bufHdr);
- return result;
- }
- if (!(buf_state & BM_VALID) || !(buf_state & BM_DIRTY))
- {
- /* It's clean, so nothing to do */
- UnlockBufHdr(bufHdr);
- return result;
+ if (!(buf_state & BM_VALID) || !(buf_state & BM_DIRTY))
+ {
+ /* It's clean, so nothing to do */
+ return result;
+ }
+
+ if (unlikely(buf_state & BM_LOCKED))
+ {
+ old_buf_state = WaitBufHdrUnlocked(bufHdr);
+ continue;
+ }
+
+ /* Make sure we can handle the pin */
+ ReservePrivateRefCountEntry();
+ ResourceOwnerEnlarge(CurrentResourceOwner);
+
+ /* pin the buffer if the CAS succeeds */
+ buf_state += BUF_REFCOUNT_ONE;
+
+ if (pg_atomic_compare_exchange_u32(&bufHdr->state, &old_buf_state,
+ buf_state))
+ {
+ TrackNewBufferPin(BufferDescriptorGetBuffer(bufHdr));
+ break;
+ }
}
/*
- * Pin it, share-lock it, write it. (FlushBuffer will do nothing if the
- * buffer is clean by the time we've locked it.)
+ * Share lock and write it out (FlushBuffer will do nothing if the buffer
+ * is clean by the time we've locked it.)
*/
- PinBuffer_Locked(bufHdr);
-
FlushUnlockedBuffer(bufHdr, NULL, IOOBJECT_RELATION, IOCONTEXT_NORMAL);
tag = bufHdr->tag;
@@ -4130,8 +4146,8 @@ SyncOneBuffer(int buf_id, bool skip_recently_used, WritebackContext *wb_context)
UnpinBuffer(bufHdr);
/*
- * SyncOneBuffer() is only called by checkpointer and bgwriter, so
- * IOContext will always be IOCONTEXT_NORMAL.
+ * SyncOneBuffer() is only called by bgwriter, so IOContext will always be
+ * IOCONTEXT_NORMAL.
*/
ScheduleBufferTagForWriteback(wb_context, IOCONTEXT_NORMAL, &tag);
--
2.43.0