summaryrefslogtreecommitdiff
path: root/src/backend/storage/buffer/bufmgr.c
diff options
context:
space:
mode:
authorAndres Freund <andres@anarazel.de>2025-10-08 17:01:10 -0400
committerAndres Freund <andres@anarazel.de>2025-10-08 17:04:07 -0400
commit5e89985928795f243dc287210c2aa016dfd00bfe (patch)
tree6de165ceec9579216eb42a69c838ec1ca4624124 /src/backend/storage/buffer/bufmgr.c
parent3baae90013df58a8d124afa79df07075b8ebea09 (diff)
bufmgr: Don't lock buffer header in StrategyGetBuffer()
Previously StrategyGetBuffer() acquired the buffer header spinlock for every buffer, whether it was reusable or not. If reusable, it'd be returned, with the lock held, to GetVictimBuffer(), which then would pin the buffer with PinBuffer_Locked(). That's somewhat violating the spirit of the guidelines for holding spinlocks (i.e. that they are only held for a few lines of consecutive code) and necessitates using PinBuffer_Locked(), which scales worse than PinBuffer() due to holding the spinlock. This alone makes it worth changing the code. However, the main reason to change this is that a future commit will make PinBuffer_Locked() slower (due to making UnlockBufHdr() slower), to gain scalability for the much more common case of pinning a pre-existing buffer. By pinning the buffer with a single atomic operation, iff the buffer is reusable, we avoid any potential regression for miss-heavy workloads. There strictly are fewer atomic operations for each potential buffer after this change. The price for this improvement is that freelist.c needs two CAS loops and needs to be able to set up the resource accounting for pinned buffers. The latter is achieved by exposing a new function for that purpose from bufmgr.c, that seems better than exposing the entire private refcount infrastructure. The improvement seems worth the complexity. Reviewed-by: Robert Haas <robertmhaas@gmail.com> Reviewed-by: Matthias van de Meent <boekewurm+postgres@gmail.com> Discussion: https://postgr.es/m/fvfmkr5kk4nyex56ejgxj3uzi63isfxovp2biecb4bspbjrze7@az2pljabhnff
Diffstat (limited to 'src/backend/storage/buffer/bufmgr.c')
-rw-r--r--src/backend/storage/buffer/bufmgr.c45
1 files changed, 22 insertions, 23 deletions
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index da71fcf8d0e..ca62b9cdcf0 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -518,7 +518,6 @@ static void PinBuffer_Locked(BufferDesc *buf);
static void UnpinBuffer(BufferDesc *buf);
static void UnpinBufferNoOwner(BufferDesc *buf);
static void BufferSync(int flags);
-static uint32 WaitBufHdrUnlocked(BufferDesc *buf);
static int SyncOneBuffer(int buf_id, bool skip_recently_used,
WritebackContext *wb_context);
static void WaitIO(BufferDesc *buf);
@@ -2326,8 +2325,8 @@ GetVictimBuffer(BufferAccessStrategy strategy, IOContext io_context)
bool from_ring;
/*
- * Ensure, while the spinlock's not yet held, that there's a free refcount
- * entry, and a resource owner slot for the pin.
+ * Ensure, before we pin a victim buffer, that there's a free refcount
+ * entry and resource owner slot for the pin.
*/
ReservePrivateRefCountEntry();
ResourceOwnerEnlarge(CurrentResourceOwner);
@@ -2336,17 +2335,12 @@ GetVictimBuffer(BufferAccessStrategy strategy, IOContext io_context)
again:
/*
- * Select a victim buffer. The buffer is returned with its header
- * spinlock still held!
+ * Select a victim buffer. The buffer is returned pinned and owned by
+ * this backend.
*/
buf_hdr = StrategyGetBuffer(strategy, &buf_state, &from_ring);
buf = BufferDescriptorGetBuffer(buf_hdr);
- Assert(BUF_STATE_GET_REFCOUNT(buf_state) == 0);
-
- /* Pin the buffer and then release the buffer spinlock */
- PinBuffer_Locked(buf_hdr);
-
/*
* We shouldn't have any other pins for this buffer.
*/
@@ -3118,7 +3112,7 @@ PinBuffer(BufferDesc *buf, BufferAccessStrategy strategy,
{
result = (buf_state & BM_VALID) != 0;
- ref = NewPrivateRefCountEntry(b);
+ TrackNewBufferPin(b);
/*
* Assume that we acquired a buffer pin for the purposes of
@@ -3150,11 +3144,12 @@ PinBuffer(BufferDesc *buf, BufferAccessStrategy strategy,
* cannot meddle with that.
*/
result = (pg_atomic_read_u32(&buf->state) & BM_VALID) != 0;
+
+ Assert(ref->refcount > 0);
+ ref->refcount++;
+ ResourceOwnerRememberBuffer(CurrentResourceOwner, b);
}
- ref->refcount++;
- Assert(ref->refcount > 0);
- ResourceOwnerRememberBuffer(CurrentResourceOwner, b);
return result;
}
@@ -3183,8 +3178,6 @@ PinBuffer(BufferDesc *buf, BufferAccessStrategy strategy,
static void
PinBuffer_Locked(BufferDesc *buf)
{
- Buffer b;
- PrivateRefCountEntry *ref;
uint32 buf_state;
/*
@@ -3209,12 +3202,7 @@ PinBuffer_Locked(BufferDesc *buf)
buf_state += BUF_REFCOUNT_ONE;
UnlockBufHdr(buf, buf_state);
- b = BufferDescriptorGetBuffer(buf);
-
- ref = NewPrivateRefCountEntry(b);
- ref->refcount++;
-
- ResourceOwnerRememberBuffer(CurrentResourceOwner, b);
+ TrackNewBufferPin(BufferDescriptorGetBuffer(buf));
}
/*
@@ -3332,6 +3320,17 @@ UnpinBufferNoOwner(BufferDesc *buf)
}
}
+inline void
+TrackNewBufferPin(Buffer buf)
+{
+ PrivateRefCountEntry *ref;
+
+ ref = NewPrivateRefCountEntry(buf);
+ ref->refcount++;
+
+ ResourceOwnerRememberBuffer(CurrentResourceOwner, buf);
+}
+
#define ST_SORT sort_checkpoint_bufferids
#define ST_ELEMENT_TYPE CkptSortItem
#define ST_COMPARE(a, b) ckpt_buforder_comparator(a, b)
@@ -6288,7 +6287,7 @@ LockBufHdr(BufferDesc *desc)
* Obviously the buffer could be locked by the time the value is returned, so
* this is primarily useful in CAS style loops.
*/
-static uint32
+pg_noinline uint32
WaitBufHdrUnlocked(BufferDesc *buf)
{
SpinDelayStatus delayStatus;