diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/backend/storage/buffer/bufmgr.c | 45 | ||||
-rw-r--r-- | src/backend/storage/buffer/freelist.c | 149 | ||||
-rw-r--r-- | src/include/storage/buf_internals.h | 4 |
3 files changed, 132 insertions, 66 deletions
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index da71fcf8d0e..ca62b9cdcf0 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -518,7 +518,6 @@ static void PinBuffer_Locked(BufferDesc *buf); static void UnpinBuffer(BufferDesc *buf); static void UnpinBufferNoOwner(BufferDesc *buf); static void BufferSync(int flags); -static uint32 WaitBufHdrUnlocked(BufferDesc *buf); static int SyncOneBuffer(int buf_id, bool skip_recently_used, WritebackContext *wb_context); static void WaitIO(BufferDesc *buf); @@ -2326,8 +2325,8 @@ GetVictimBuffer(BufferAccessStrategy strategy, IOContext io_context) bool from_ring; /* - * Ensure, while the spinlock's not yet held, that there's a free refcount - * entry, and a resource owner slot for the pin. + * Ensure, before we pin a victim buffer, that there's a free refcount + * entry and resource owner slot for the pin. */ ReservePrivateRefCountEntry(); ResourceOwnerEnlarge(CurrentResourceOwner); @@ -2336,17 +2335,12 @@ GetVictimBuffer(BufferAccessStrategy strategy, IOContext io_context) again: /* - * Select a victim buffer. The buffer is returned with its header - * spinlock still held! + * Select a victim buffer. The buffer is returned pinned and owned by + * this backend. */ buf_hdr = StrategyGetBuffer(strategy, &buf_state, &from_ring); buf = BufferDescriptorGetBuffer(buf_hdr); - Assert(BUF_STATE_GET_REFCOUNT(buf_state) == 0); - - /* Pin the buffer and then release the buffer spinlock */ - PinBuffer_Locked(buf_hdr); - /* * We shouldn't have any other pins for this buffer. */ @@ -3118,7 +3112,7 @@ PinBuffer(BufferDesc *buf, BufferAccessStrategy strategy, { result = (buf_state & BM_VALID) != 0; - ref = NewPrivateRefCountEntry(b); + TrackNewBufferPin(b); /* * Assume that we acquired a buffer pin for the purposes of @@ -3150,11 +3144,12 @@ PinBuffer(BufferDesc *buf, BufferAccessStrategy strategy, * cannot meddle with that. */ result = (pg_atomic_read_u32(&buf->state) & BM_VALID) != 0; + + Assert(ref->refcount > 0); + ref->refcount++; + ResourceOwnerRememberBuffer(CurrentResourceOwner, b); } - ref->refcount++; - Assert(ref->refcount > 0); - ResourceOwnerRememberBuffer(CurrentResourceOwner, b); return result; } @@ -3183,8 +3178,6 @@ PinBuffer(BufferDesc *buf, BufferAccessStrategy strategy, static void PinBuffer_Locked(BufferDesc *buf) { - Buffer b; - PrivateRefCountEntry *ref; uint32 buf_state; /* @@ -3209,12 +3202,7 @@ PinBuffer_Locked(BufferDesc *buf) buf_state += BUF_REFCOUNT_ONE; UnlockBufHdr(buf, buf_state); - b = BufferDescriptorGetBuffer(buf); - - ref = NewPrivateRefCountEntry(b); - ref->refcount++; - - ResourceOwnerRememberBuffer(CurrentResourceOwner, b); + TrackNewBufferPin(BufferDescriptorGetBuffer(buf)); } /* @@ -3332,6 +3320,17 @@ UnpinBufferNoOwner(BufferDesc *buf) } } +inline void +TrackNewBufferPin(Buffer buf) +{ + PrivateRefCountEntry *ref; + + ref = NewPrivateRefCountEntry(buf); + ref->refcount++; + + ResourceOwnerRememberBuffer(CurrentResourceOwner, buf); +} + #define ST_SORT sort_checkpoint_bufferids #define ST_ELEMENT_TYPE CkptSortItem #define ST_COMPARE(a, b) ckpt_buforder_comparator(a, b) @@ -6288,7 +6287,7 @@ LockBufHdr(BufferDesc *desc) * Obviously the buffer could be locked by the time the value is returned, so * this is primarily useful in CAS style loops. */ -static uint32 +pg_noinline uint32 WaitBufHdrUnlocked(BufferDesc *buf) { SpinDelayStatus delayStatus; diff --git a/src/backend/storage/buffer/freelist.c b/src/backend/storage/buffer/freelist.c index 7d59a92bd1a..7fe34d3ef4c 100644 --- a/src/backend/storage/buffer/freelist.c +++ b/src/backend/storage/buffer/freelist.c @@ -159,13 +159,16 @@ ClockSweepTick(void) * StrategyGetBuffer * * Called by the bufmgr to get the next candidate buffer to use in - * BufferAlloc(). The only hard requirement BufferAlloc() has is that + * GetVictimBuffer(). The only hard requirement GetVictimBuffer() has is that * the selected buffer must not currently be pinned by anyone. * * strategy is a BufferAccessStrategy object, or NULL for default strategy. * - * To ensure that no one else can pin the buffer before we do, we must - * return the buffer with the buffer header spinlock still held. + * It is the callers responsibility to ensure the buffer ownership can be + * tracked via TrackNewBufferPin(). + * + * The buffer is pinned and marked as owned, using TrackNewBufferPin(), + * before returning. */ BufferDesc * StrategyGetBuffer(BufferAccessStrategy strategy, uint32 *buf_state, bool *from_ring) @@ -173,7 +176,6 @@ StrategyGetBuffer(BufferAccessStrategy strategy, uint32 *buf_state, bool *from_r BufferDesc *buf; int bgwprocno; int trycounter; - uint32 local_buf_state; /* to avoid repeated (de-)referencing */ *from_ring = false; @@ -228,44 +230,79 @@ StrategyGetBuffer(BufferAccessStrategy strategy, uint32 *buf_state, bool *from_r trycounter = NBuffers; for (;;) { + uint32 old_buf_state; + uint32 local_buf_state; + buf = GetBufferDescriptor(ClockSweepTick()); /* - * If the buffer is pinned or has a nonzero usage_count, we cannot use - * it; decrement the usage_count (unless pinned) and keep scanning. + * Check whether the buffer can be used and pin it if so. Do this + * using a CAS loop, to avoid having to lock the buffer header. */ - local_buf_state = LockBufHdr(buf); - - if (BUF_STATE_GET_REFCOUNT(local_buf_state) == 0) + old_buf_state = pg_atomic_read_u32(&buf->state); + for (;;) { + local_buf_state = old_buf_state; + + /* + * If the buffer is pinned or has a nonzero usage_count, we cannot + * use it; decrement the usage_count (unless pinned) and keep + * scanning. + */ + + if (BUF_STATE_GET_REFCOUNT(local_buf_state) != 0) + { + if (--trycounter == 0) + { + /* + * We've scanned all the buffers without making any state + * changes, so all the buffers are pinned (or were when we + * looked at them). We could hope that someone will free + * one eventually, but it's probably better to fail than + * to risk getting stuck in an infinite loop. + */ + elog(ERROR, "no unpinned buffers available"); + } + break; + } + + if (unlikely(local_buf_state & BM_LOCKED)) + { + old_buf_state = WaitBufHdrUnlocked(buf); + continue; + } + if (BUF_STATE_GET_USAGECOUNT(local_buf_state) != 0) { local_buf_state -= BUF_USAGECOUNT_ONE; - trycounter = NBuffers; + if (pg_atomic_compare_exchange_u32(&buf->state, &old_buf_state, + local_buf_state)) + { + trycounter = NBuffers; + break; + } } else { - /* Found a usable buffer */ - if (strategy != NULL) - AddBufferToRing(strategy, buf); - *buf_state = local_buf_state; - return buf; + /* pin the buffer if the CAS succeeds */ + local_buf_state += BUF_REFCOUNT_ONE; + + if (pg_atomic_compare_exchange_u32(&buf->state, &old_buf_state, + local_buf_state)) + { + /* Found a usable buffer */ + if (strategy != NULL) + AddBufferToRing(strategy, buf); + *buf_state = local_buf_state; + + TrackNewBufferPin(BufferDescriptorGetBuffer(buf)); + + return buf; + } } + } - else if (--trycounter == 0) - { - /* - * We've scanned all the buffers without making any state changes, - * so all the buffers are pinned (or were when we looked at them). - * We could hope that someone will free one eventually, but it's - * probably better to fail than to risk getting stuck in an - * infinite loop. - */ - UnlockBufHdr(buf, local_buf_state); - elog(ERROR, "no unpinned buffers available"); - } - UnlockBufHdr(buf, local_buf_state); } } @@ -614,13 +651,15 @@ FreeAccessStrategy(BufferAccessStrategy strategy) * GetBufferFromRing -- returns a buffer from the ring, or NULL if the * ring is empty / not usable. * - * The bufhdr spin lock is held on the returned buffer. + * The buffer is pinned and marked as owned, using TrackNewBufferPin(), before + * returning. */ static BufferDesc * GetBufferFromRing(BufferAccessStrategy strategy, uint32 *buf_state) { BufferDesc *buf; Buffer bufnum; + uint32 old_buf_state; uint32 local_buf_state; /* to avoid repeated (de-)referencing */ @@ -637,24 +676,48 @@ GetBufferFromRing(BufferAccessStrategy strategy, uint32 *buf_state) if (bufnum == InvalidBuffer) return NULL; + buf = GetBufferDescriptor(bufnum - 1); + /* - * If the buffer is pinned we cannot use it under any circumstances. - * - * If usage_count is 0 or 1 then the buffer is fair game (we expect 1, - * since our own previous usage of the ring element would have left it - * there, but it might've been decremented by clock-sweep since then). A - * higher usage_count indicates someone else has touched the buffer, so we - * shouldn't re-use it. + * Check whether the buffer can be used and pin it if so. Do this using a + * CAS loop, to avoid having to lock the buffer header. */ - buf = GetBufferDescriptor(bufnum - 1); - local_buf_state = LockBufHdr(buf); - if (BUF_STATE_GET_REFCOUNT(local_buf_state) == 0 - && BUF_STATE_GET_USAGECOUNT(local_buf_state) <= 1) + old_buf_state = pg_atomic_read_u32(&buf->state); + for (;;) { - *buf_state = local_buf_state; - return buf; + local_buf_state = old_buf_state; + + /* + * If the buffer is pinned we cannot use it under any circumstances. + * + * If usage_count is 0 or 1 then the buffer is fair game (we expect 1, + * since our own previous usage of the ring element would have left it + * there, but it might've been decremented by clock-sweep since then). + * A higher usage_count indicates someone else has touched the buffer, + * so we shouldn't re-use it. + */ + if (BUF_STATE_GET_REFCOUNT(local_buf_state) != 0 + || BUF_STATE_GET_USAGECOUNT(local_buf_state) > 1) + break; + + if (unlikely(local_buf_state & BM_LOCKED)) + { + old_buf_state = WaitBufHdrUnlocked(buf); + continue; + } + + /* pin the buffer if the CAS succeeds */ + local_buf_state += BUF_REFCOUNT_ONE; + + if (pg_atomic_compare_exchange_u32(&buf->state, &old_buf_state, + local_buf_state)) + { + *buf_state = local_buf_state; + + TrackNewBufferPin(BufferDescriptorGetBuffer(buf)); + return buf; + } } - UnlockBufHdr(buf, local_buf_state); /* * Tell caller to allocate a new buffer with the normal allocation diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h index dfd614f7ca4..c1206a46aba 100644 --- a/src/include/storage/buf_internals.h +++ b/src/include/storage/buf_internals.h @@ -371,6 +371,8 @@ UnlockBufHdr(BufferDesc *desc, uint32 buf_state) pg_atomic_write_u32(&desc->state, buf_state & (~BM_LOCKED)); } +extern uint32 WaitBufHdrUnlocked(BufferDesc *buf); + /* in bufmgr.c */ /* @@ -425,6 +427,8 @@ extern void IssuePendingWritebacks(WritebackContext *wb_context, IOContext io_co extern void ScheduleBufferTagForWriteback(WritebackContext *wb_context, IOContext io_context, BufferTag *tag); +extern void TrackNewBufferPin(Buffer buf); + /* solely to make it easier to write tests */ extern bool StartBufferIO(BufferDesc *buf, bool forInput, bool nowait); extern void TerminateBufferIO(BufferDesc *buf, bool clear_dirty, uint32 set_flag_bits, |