diff options
author | Andres Freund <andres@anarazel.de> | 2025-10-08 17:01:10 -0400 |
---|---|---|
committer | Andres Freund <andres@anarazel.de> | 2025-10-08 17:04:07 -0400 |
commit | 5e89985928795f243dc287210c2aa016dfd00bfe (patch) | |
tree | 6de165ceec9579216eb42a69c838ec1ca4624124 /src/backend/storage/buffer/bufmgr.c | |
parent | 3baae90013df58a8d124afa79df07075b8ebea09 (diff) |
bufmgr: Don't lock buffer header in StrategyGetBuffer()
Previously StrategyGetBuffer() acquired the buffer header spinlock for every
buffer, whether it was reusable or not. If reusable, it'd be returned, with
the lock held, to GetVictimBuffer(), which then would pin the buffer with
PinBuffer_Locked(). That's somewhat violating the spirit of the guidelines for
holding spinlocks (i.e. that they are only held for a few lines of consecutive
code) and necessitates using PinBuffer_Locked(), which scales worse than
PinBuffer() due to holding the spinlock. This alone makes it worth changing
the code.
However, the main reason to change this is that a future commit will make
PinBuffer_Locked() slower (due to making UnlockBufHdr() slower), to gain
scalability for the much more common case of pinning a pre-existing buffer. By
pinning the buffer with a single atomic operation, iff the buffer is reusable,
we avoid any potential regression for miss-heavy workloads. There strictly are
fewer atomic operations for each potential buffer after this change.
The price for this improvement is that freelist.c needs two CAS loops and
needs to be able to set up the resource accounting for pinned buffers. The
latter is achieved by exposing a new function for that purpose from bufmgr.c,
that seems better than exposing the entire private refcount infrastructure.
The improvement seems worth the complexity.
Reviewed-by: Robert Haas <robertmhaas@gmail.com>
Reviewed-by: Matthias van de Meent <boekewurm+postgres@gmail.com>
Discussion: https://postgr.es/m/fvfmkr5kk4nyex56ejgxj3uzi63isfxovp2biecb4bspbjrze7@az2pljabhnff
Diffstat (limited to 'src/backend/storage/buffer/bufmgr.c')
-rw-r--r-- | src/backend/storage/buffer/bufmgr.c | 45 |
1 files changed, 22 insertions, 23 deletions
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index da71fcf8d0e..ca62b9cdcf0 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -518,7 +518,6 @@ static void PinBuffer_Locked(BufferDesc *buf); static void UnpinBuffer(BufferDesc *buf); static void UnpinBufferNoOwner(BufferDesc *buf); static void BufferSync(int flags); -static uint32 WaitBufHdrUnlocked(BufferDesc *buf); static int SyncOneBuffer(int buf_id, bool skip_recently_used, WritebackContext *wb_context); static void WaitIO(BufferDesc *buf); @@ -2326,8 +2325,8 @@ GetVictimBuffer(BufferAccessStrategy strategy, IOContext io_context) bool from_ring; /* - * Ensure, while the spinlock's not yet held, that there's a free refcount - * entry, and a resource owner slot for the pin. + * Ensure, before we pin a victim buffer, that there's a free refcount + * entry and resource owner slot for the pin. */ ReservePrivateRefCountEntry(); ResourceOwnerEnlarge(CurrentResourceOwner); @@ -2336,17 +2335,12 @@ GetVictimBuffer(BufferAccessStrategy strategy, IOContext io_context) again: /* - * Select a victim buffer. The buffer is returned with its header - * spinlock still held! + * Select a victim buffer. The buffer is returned pinned and owned by + * this backend. */ buf_hdr = StrategyGetBuffer(strategy, &buf_state, &from_ring); buf = BufferDescriptorGetBuffer(buf_hdr); - Assert(BUF_STATE_GET_REFCOUNT(buf_state) == 0); - - /* Pin the buffer and then release the buffer spinlock */ - PinBuffer_Locked(buf_hdr); - /* * We shouldn't have any other pins for this buffer. */ @@ -3118,7 +3112,7 @@ PinBuffer(BufferDesc *buf, BufferAccessStrategy strategy, { result = (buf_state & BM_VALID) != 0; - ref = NewPrivateRefCountEntry(b); + TrackNewBufferPin(b); /* * Assume that we acquired a buffer pin for the purposes of @@ -3150,11 +3144,12 @@ PinBuffer(BufferDesc *buf, BufferAccessStrategy strategy, * cannot meddle with that. */ result = (pg_atomic_read_u32(&buf->state) & BM_VALID) != 0; + + Assert(ref->refcount > 0); + ref->refcount++; + ResourceOwnerRememberBuffer(CurrentResourceOwner, b); } - ref->refcount++; - Assert(ref->refcount > 0); - ResourceOwnerRememberBuffer(CurrentResourceOwner, b); return result; } @@ -3183,8 +3178,6 @@ PinBuffer(BufferDesc *buf, BufferAccessStrategy strategy, static void PinBuffer_Locked(BufferDesc *buf) { - Buffer b; - PrivateRefCountEntry *ref; uint32 buf_state; /* @@ -3209,12 +3202,7 @@ PinBuffer_Locked(BufferDesc *buf) buf_state += BUF_REFCOUNT_ONE; UnlockBufHdr(buf, buf_state); - b = BufferDescriptorGetBuffer(buf); - - ref = NewPrivateRefCountEntry(b); - ref->refcount++; - - ResourceOwnerRememberBuffer(CurrentResourceOwner, b); + TrackNewBufferPin(BufferDescriptorGetBuffer(buf)); } /* @@ -3332,6 +3320,17 @@ UnpinBufferNoOwner(BufferDesc *buf) } } +inline void +TrackNewBufferPin(Buffer buf) +{ + PrivateRefCountEntry *ref; + + ref = NewPrivateRefCountEntry(buf); + ref->refcount++; + + ResourceOwnerRememberBuffer(CurrentResourceOwner, buf); +} + #define ST_SORT sort_checkpoint_bufferids #define ST_ELEMENT_TYPE CkptSortItem #define ST_COMPARE(a, b) ckpt_buforder_comparator(a, b) @@ -6288,7 +6287,7 @@ LockBufHdr(BufferDesc *desc) * Obviously the buffer could be locked by the time the value is returned, so * this is primarily useful in CAS style loops. */ -static uint32 +pg_noinline uint32 WaitBufHdrUnlocked(BufferDesc *buf) { SpinDelayStatus delayStatus; |