diff options
Diffstat (limited to 'src/backend/storage/buffer/bufmgr.c')
-rw-r--r-- | src/backend/storage/buffer/bufmgr.c | 215 |
1 files changed, 125 insertions, 90 deletions
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index fe470de63f2..ca62b9cdcf0 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -512,12 +512,12 @@ static BlockNumber ExtendBufferedRelShared(BufferManagerRelation bmr, BlockNumber extend_upto, Buffer *buffers, uint32 *extended_by); -static bool PinBuffer(BufferDesc *buf, BufferAccessStrategy strategy); +static bool PinBuffer(BufferDesc *buf, BufferAccessStrategy strategy, + bool skip_if_not_valid); static void PinBuffer_Locked(BufferDesc *buf); static void UnpinBuffer(BufferDesc *buf); static void UnpinBufferNoOwner(BufferDesc *buf); static void BufferSync(int flags); -static uint32 WaitBufHdrUnlocked(BufferDesc *buf); static int SyncOneBuffer(int buf_id, bool skip_recently_used, WritebackContext *wb_context); static void WaitIO(BufferDesc *buf); @@ -533,6 +533,8 @@ static inline BufferDesc *BufferAlloc(SMgrRelation smgr, static bool AsyncReadBuffers(ReadBuffersOperation *operation, int *nblocks_progress); static void CheckReadBuffersOperation(ReadBuffersOperation *operation, bool is_complete); static Buffer GetVictimBuffer(BufferAccessStrategy strategy, IOContext io_context); +static void FlushUnlockedBuffer(BufferDesc *buf, SMgrRelation reln, + IOObject io_object, IOContext io_context); static void FlushBuffer(BufferDesc *buf, SMgrRelation reln, IOObject io_object, IOContext io_context); static void FindAndDropRelationBuffers(RelFileLocator rlocator, @@ -685,7 +687,6 @@ ReadRecentBuffer(RelFileLocator rlocator, ForkNumber forkNum, BlockNumber blockN BufferDesc *bufHdr; BufferTag tag; uint32 buf_state; - bool have_private_ref; Assert(BufferIsValid(recent_buffer)); @@ -713,38 +714,24 @@ ReadRecentBuffer(RelFileLocator rlocator, ForkNumber forkNum, BlockNumber blockN else { bufHdr = GetBufferDescriptor(recent_buffer - 1); - have_private_ref = GetPrivateRefCount(recent_buffer) > 0; /* - * Do we already have this buffer pinned with a private reference? If - * so, it must be valid and it is safe to check the tag without - * locking. If not, we have to lock the header first and then check. + * Is it still valid and holding the right tag? We do an unlocked tag + * comparison first, to make it unlikely that we'll increment the + * usage counter of the wrong buffer, if someone calls us with a very + * out of date recent_buffer. Then we'll check it again if we get the + * pin. */ - if (have_private_ref) - buf_state = pg_atomic_read_u32(&bufHdr->state); - else - buf_state = LockBufHdr(bufHdr); - - if ((buf_state & BM_VALID) && BufferTagsEqual(&tag, &bufHdr->tag)) + if (BufferTagsEqual(&tag, &bufHdr->tag) && + PinBuffer(bufHdr, NULL, true)) { - /* - * It's now safe to pin the buffer. We can't pin first and ask - * questions later, because it might confuse code paths like - * InvalidateBuffer() if we pinned a random non-matching buffer. - */ - if (have_private_ref) - PinBuffer(bufHdr, NULL); /* bump pin count */ - else - PinBuffer_Locked(bufHdr); /* pin for first time */ - - pgBufferUsage.shared_blks_hit++; - - return true; + if (BufferTagsEqual(&tag, &bufHdr->tag)) + { + pgBufferUsage.shared_blks_hit++; + return true; + } + UnpinBuffer(bufHdr); } - - /* If we locked the header above, now unlock. */ - if (!have_private_ref) - UnlockBufHdr(bufHdr, buf_state); } return false; @@ -1080,7 +1067,7 @@ ZeroAndLockBuffer(Buffer buffer, ReadBufferMode mode, bool already_valid) * already valid.) */ if (!isLocalBuf) - LWLockAcquire(BufferDescriptorGetContentLock(bufHdr), LW_EXCLUSIVE); + LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); /* Set BM_VALID, terminate IO, and wake up any waiters */ if (isLocalBuf) @@ -2036,7 +2023,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, */ buf = GetBufferDescriptor(existing_buf_id); - valid = PinBuffer(buf, strategy); + valid = PinBuffer(buf, strategy, false); /* Can release the mapping lock as soon as we've pinned it */ LWLockRelease(newPartitionLock); @@ -2098,7 +2085,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, existing_buf_hdr = GetBufferDescriptor(existing_buf_id); - valid = PinBuffer(existing_buf_hdr, strategy); + valid = PinBuffer(existing_buf_hdr, strategy, false); /* Can release the mapping lock as soon as we've pinned it */ LWLockRelease(newPartitionLock); @@ -2338,8 +2325,8 @@ GetVictimBuffer(BufferAccessStrategy strategy, IOContext io_context) bool from_ring; /* - * Ensure, while the spinlock's not yet held, that there's a free refcount - * entry, and a resource owner slot for the pin. + * Ensure, before we pin a victim buffer, that there's a free refcount + * entry and resource owner slot for the pin. */ ReservePrivateRefCountEntry(); ResourceOwnerEnlarge(CurrentResourceOwner); @@ -2348,17 +2335,12 @@ GetVictimBuffer(BufferAccessStrategy strategy, IOContext io_context) again: /* - * Select a victim buffer. The buffer is returned with its header - * spinlock still held! + * Select a victim buffer. The buffer is returned pinned and owned by + * this backend. */ buf_hdr = StrategyGetBuffer(strategy, &buf_state, &from_ring); buf = BufferDescriptorGetBuffer(buf_hdr); - Assert(BUF_STATE_GET_REFCOUNT(buf_state) == 0); - - /* Pin the buffer and then release the buffer spinlock */ - PinBuffer_Locked(buf_hdr); - /* * We shouldn't have any other pins for this buffer. */ @@ -2736,7 +2718,7 @@ ExtendBufferedRelShared(BufferManagerRelation bmr, * Pin the existing buffer before releasing the partition lock, * preventing it from being evicted. */ - valid = PinBuffer(existing_hdr, strategy); + valid = PinBuffer(existing_hdr, strategy, false); LWLockRelease(partition_lock); UnpinBuffer(victim_buf_hdr); @@ -2837,7 +2819,7 @@ ExtendBufferedRelShared(BufferManagerRelation bmr, } if (lock) - LWLockAcquire(BufferDescriptorGetContentLock(buf_hdr), LW_EXCLUSIVE); + LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); TerminateBufferIO(buf_hdr, false, BM_VALID, true, false); } @@ -2850,14 +2832,40 @@ ExtendBufferedRelShared(BufferManagerRelation bmr, } /* - * BufferIsExclusiveLocked + * BufferIsLockedByMe + * + * Checks if this backend has the buffer locked in any mode. + * + * Buffer must be pinned. + */ +bool +BufferIsLockedByMe(Buffer buffer) +{ + BufferDesc *bufHdr; + + Assert(BufferIsPinned(buffer)); + + if (BufferIsLocal(buffer)) + { + /* Content locks are not maintained for local buffers. */ + return true; + } + else + { + bufHdr = GetBufferDescriptor(buffer - 1); + return LWLockHeldByMe(BufferDescriptorGetContentLock(bufHdr)); + } +} + +/* + * BufferIsLockedByMeInMode * - * Checks if buffer is exclusive-locked. + * Checks if this backend has the buffer locked in the specified mode. * * Buffer must be pinned. */ bool -BufferIsExclusiveLocked(Buffer buffer) +BufferIsLockedByMeInMode(Buffer buffer, int mode) { BufferDesc *bufHdr; @@ -2870,9 +2878,23 @@ BufferIsExclusiveLocked(Buffer buffer) } else { + LWLockMode lw_mode; + + switch (mode) + { + case BUFFER_LOCK_EXCLUSIVE: + lw_mode = LW_EXCLUSIVE; + break; + case BUFFER_LOCK_SHARE: + lw_mode = LW_SHARED; + break; + default: + pg_unreachable(); + } + bufHdr = GetBufferDescriptor(buffer - 1); return LWLockHeldByMeInMode(BufferDescriptorGetContentLock(bufHdr), - LW_EXCLUSIVE); + lw_mode); } } @@ -2901,8 +2923,7 @@ BufferIsDirty(Buffer buffer) else { bufHdr = GetBufferDescriptor(buffer - 1); - Assert(LWLockHeldByMeInMode(BufferDescriptorGetContentLock(bufHdr), - LW_EXCLUSIVE)); + Assert(BufferIsLockedByMeInMode(buffer, BUFFER_LOCK_EXCLUSIVE)); } return pg_atomic_read_u32(&bufHdr->state) & BM_DIRTY; @@ -2936,8 +2957,7 @@ MarkBufferDirty(Buffer buffer) bufHdr = GetBufferDescriptor(buffer - 1); Assert(BufferIsPinned(buffer)); - Assert(LWLockHeldByMeInMode(BufferDescriptorGetContentLock(bufHdr), - LW_EXCLUSIVE)); + Assert(BufferIsLockedByMeInMode(buffer, BUFFER_LOCK_EXCLUSIVE)); old_buf_state = pg_atomic_read_u32(&bufHdr->state); for (;;) @@ -3035,10 +3055,13 @@ ReleaseAndReadBuffer(Buffer buffer, * must have been done already. * * Returns true if buffer is BM_VALID, else false. This provision allows - * some callers to avoid an extra spinlock cycle. + * some callers to avoid an extra spinlock cycle. If skip_if_not_valid is + * true, then a false return value also indicates that the buffer was + * (recently) invalid and has not been pinned. */ static bool -PinBuffer(BufferDesc *buf, BufferAccessStrategy strategy) +PinBuffer(BufferDesc *buf, BufferAccessStrategy strategy, + bool skip_if_not_valid) { Buffer b = BufferDescriptorGetBuffer(buf); bool result; @@ -3054,11 +3077,12 @@ PinBuffer(BufferDesc *buf, BufferAccessStrategy strategy) uint32 buf_state; uint32 old_buf_state; - ref = NewPrivateRefCountEntry(b); - old_buf_state = pg_atomic_read_u32(&buf->state); for (;;) { + if (unlikely(skip_if_not_valid && !(old_buf_state & BM_VALID))) + return false; + if (old_buf_state & BM_LOCKED) old_buf_state = WaitBufHdrUnlocked(buf); @@ -3088,6 +3112,8 @@ PinBuffer(BufferDesc *buf, BufferAccessStrategy strategy) { result = (buf_state & BM_VALID) != 0; + TrackNewBufferPin(b); + /* * Assume that we acquired a buffer pin for the purposes of * Valgrind buffer client checks (even in !result case) to @@ -3118,11 +3144,12 @@ PinBuffer(BufferDesc *buf, BufferAccessStrategy strategy) * cannot meddle with that. */ result = (pg_atomic_read_u32(&buf->state) & BM_VALID) != 0; + + Assert(ref->refcount > 0); + ref->refcount++; + ResourceOwnerRememberBuffer(CurrentResourceOwner, b); } - ref->refcount++; - Assert(ref->refcount > 0); - ResourceOwnerRememberBuffer(CurrentResourceOwner, b); return result; } @@ -3151,8 +3178,6 @@ PinBuffer(BufferDesc *buf, BufferAccessStrategy strategy) static void PinBuffer_Locked(BufferDesc *buf) { - Buffer b; - PrivateRefCountEntry *ref; uint32 buf_state; /* @@ -3177,12 +3202,7 @@ PinBuffer_Locked(BufferDesc *buf) buf_state += BUF_REFCOUNT_ONE; UnlockBufHdr(buf, buf_state); - b = BufferDescriptorGetBuffer(buf); - - ref = NewPrivateRefCountEntry(b); - ref->refcount++; - - ResourceOwnerRememberBuffer(CurrentResourceOwner, b); + TrackNewBufferPin(BufferDescriptorGetBuffer(buf)); } /* @@ -3265,7 +3285,10 @@ UnpinBufferNoOwner(BufferDesc *buf) */ VALGRIND_MAKE_MEM_NOACCESS(BufHdrGetBlock(buf), BLCKSZ); - /* I'd better not still hold the buffer content lock */ + /* + * I'd better not still hold the buffer content lock. Can't use + * BufferIsLockedByMe(), as that asserts the buffer is pinned. + */ Assert(!LWLockHeldByMe(BufferDescriptorGetContentLock(buf))); /* @@ -3297,6 +3320,17 @@ UnpinBufferNoOwner(BufferDesc *buf) } } +inline void +TrackNewBufferPin(Buffer buf) +{ + PrivateRefCountEntry *ref; + + ref = NewPrivateRefCountEntry(buf); + ref->refcount++; + + ResourceOwnerRememberBuffer(CurrentResourceOwner, buf); +} + #define ST_SORT sort_checkpoint_bufferids #define ST_ELEMENT_TYPE CkptSortItem #define ST_COMPARE(a, b) ckpt_buforder_comparator(a, b) @@ -3327,7 +3361,7 @@ BufferSync(int flags) Oid last_tsid; binaryheap *ts_heap; int i; - int mask = BM_DIRTY; + uint32 mask = BM_DIRTY; WritebackContext wb_context; /* @@ -3935,11 +3969,8 @@ SyncOneBuffer(int buf_id, bool skip_recently_used, WritebackContext *wb_context) * buffer is clean by the time we've locked it.) */ PinBuffer_Locked(bufHdr); - LWLockAcquire(BufferDescriptorGetContentLock(bufHdr), LW_SHARED); - - FlushBuffer(bufHdr, NULL, IOOBJECT_RELATION, IOCONTEXT_NORMAL); - LWLockRelease(BufferDescriptorGetContentLock(bufHdr)); + FlushUnlockedBuffer(bufHdr, NULL, IOOBJECT_RELATION, IOCONTEXT_NORMAL); tag = bufHdr->tag; @@ -4387,6 +4418,19 @@ FlushBuffer(BufferDesc *buf, SMgrRelation reln, IOObject io_object, } /* + * Convenience wrapper around FlushBuffer() that locks/unlocks the buffer + * before/after calling FlushBuffer(). + */ +static void +FlushUnlockedBuffer(BufferDesc *buf, SMgrRelation reln, + IOObject io_object, IOContext io_context) +{ + LWLockAcquire(BufferDescriptorGetContentLock(buf), LW_SHARED); + FlushBuffer(buf, reln, IOOBJECT_RELATION, IOCONTEXT_NORMAL); + LWLockRelease(BufferDescriptorGetContentLock(buf)); +} + +/* * RelationGetNumberOfBlocksInFork * Determines the current number of pages in the specified relation fork. * @@ -4975,9 +5019,7 @@ FlushRelationBuffers(Relation rel) (buf_state & (BM_VALID | BM_DIRTY)) == (BM_VALID | BM_DIRTY)) { PinBuffer_Locked(bufHdr); - LWLockAcquire(BufferDescriptorGetContentLock(bufHdr), LW_SHARED); - FlushBuffer(bufHdr, srel, IOOBJECT_RELATION, IOCONTEXT_NORMAL); - LWLockRelease(BufferDescriptorGetContentLock(bufHdr)); + FlushUnlockedBuffer(bufHdr, srel, IOOBJECT_RELATION, IOCONTEXT_NORMAL); UnpinBuffer(bufHdr); } else @@ -5072,9 +5114,7 @@ FlushRelationsAllBuffers(SMgrRelation *smgrs, int nrels) (buf_state & (BM_VALID | BM_DIRTY)) == (BM_VALID | BM_DIRTY)) { PinBuffer_Locked(bufHdr); - LWLockAcquire(BufferDescriptorGetContentLock(bufHdr), LW_SHARED); - FlushBuffer(bufHdr, srelent->srel, IOOBJECT_RELATION, IOCONTEXT_NORMAL); - LWLockRelease(BufferDescriptorGetContentLock(bufHdr)); + FlushUnlockedBuffer(bufHdr, srelent->srel, IOOBJECT_RELATION, IOCONTEXT_NORMAL); UnpinBuffer(bufHdr); } else @@ -5300,9 +5340,7 @@ FlushDatabaseBuffers(Oid dbid) (buf_state & (BM_VALID | BM_DIRTY)) == (BM_VALID | BM_DIRTY)) { PinBuffer_Locked(bufHdr); - LWLockAcquire(BufferDescriptorGetContentLock(bufHdr), LW_SHARED); - FlushBuffer(bufHdr, NULL, IOOBJECT_RELATION, IOCONTEXT_NORMAL); - LWLockRelease(BufferDescriptorGetContentLock(bufHdr)); + FlushUnlockedBuffer(bufHdr, NULL, IOOBJECT_RELATION, IOCONTEXT_NORMAL); UnpinBuffer(bufHdr); } else @@ -5326,7 +5364,7 @@ FlushOneBuffer(Buffer buffer) bufHdr = GetBufferDescriptor(buffer - 1); - Assert(LWLockHeldByMe(BufferDescriptorGetContentLock(bufHdr))); + Assert(BufferIsLockedByMe(buffer)); FlushBuffer(bufHdr, NULL, IOOBJECT_RELATION, IOCONTEXT_NORMAL); } @@ -5417,7 +5455,7 @@ MarkBufferDirtyHint(Buffer buffer, bool buffer_std) Assert(GetPrivateRefCount(buffer) > 0); /* here, either share or exclusive lock is OK */ - Assert(LWLockHeldByMe(BufferDescriptorGetContentLock(bufHdr))); + Assert(BufferIsLockedByMe(buffer)); /* * This routine might get called many times on the same page, if we are @@ -5900,8 +5938,7 @@ IsBufferCleanupOK(Buffer buffer) bufHdr = GetBufferDescriptor(buffer - 1); /* caller must hold exclusive lock on buffer */ - Assert(LWLockHeldByMeInMode(BufferDescriptorGetContentLock(bufHdr), - LW_EXCLUSIVE)); + Assert(BufferIsLockedByMeInMode(buffer, BUFFER_LOCK_EXCLUSIVE)); buf_state = LockBufHdr(bufHdr); @@ -6250,7 +6287,7 @@ LockBufHdr(BufferDesc *desc) * Obviously the buffer could be locked by the time the value is returned, so * this is primarily useful in CAS style loops. */ -static uint32 +pg_noinline uint32 WaitBufHdrUnlocked(BufferDesc *buf) { SpinDelayStatus delayStatus; @@ -6577,10 +6614,8 @@ EvictUnpinnedBufferInternal(BufferDesc *desc, bool *buffer_flushed) /* If it was dirty, try to clean it once. */ if (buf_state & BM_DIRTY) { - LWLockAcquire(BufferDescriptorGetContentLock(desc), LW_SHARED); - FlushBuffer(desc, NULL, IOOBJECT_RELATION, IOCONTEXT_NORMAL); + FlushUnlockedBuffer(desc, NULL, IOOBJECT_RELATION, IOCONTEXT_NORMAL); *buffer_flushed = true; - LWLockRelease(BufferDescriptorGetContentLock(desc)); } /* This will return false if it becomes dirty or someone else pins it. */ |