diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2005-03-20 22:00:54 +0000 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2005-03-20 22:00:54 +0000 |
commit | 354049c709c9d7d0918272e10e4f30d7f8f38788 (patch) | |
tree | 741c359a1e6494c3ef1b205b84603fd48239b015 /src/backend/storage/buffer/bufmgr.c | |
parent | 683f60da3d837236de5c4249fa2a62c8a94616ca (diff) |
Remove unnecessary calls of FlushRelationBuffers: there is no need
to write out data that we are about to tell the filesystem to drop.
smgr_internal_unlink already had a DropRelFileNodeBuffers call to
get rid of dead buffers without a write after it's no longer possible
to roll back the deleting transaction. Adding a similar call in
smgrtruncate simplifies callers and makes the overall division of
labor clearer. This patch removes the former behavior that VACUUM
would write all dirty buffers of a relation unconditionally.
Diffstat (limited to 'src/backend/storage/buffer/bufmgr.c')
-rw-r--r-- | src/backend/storage/buffer/bufmgr.c | 176 |
1 files changed, 59 insertions, 117 deletions
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index bda4544b279..e3a60612e30 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/buffer/bufmgr.c,v 1.187 2005/03/18 05:25:23 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/storage/buffer/bufmgr.c,v 1.188 2005/03/20 22:00:53 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1298,8 +1298,8 @@ RelationGetNumberOfBlocks(Relation relation) * RelationTruncate * Physically truncate a relation to the specified number of blocks. * - * Caller should already have done something to flush any buffered pages - * that are to be dropped. + * As of Postgres 8.1, this includes getting rid of any buffers for the + * blocks that are to be dropped; previously, callers had to do that. */ void RelationTruncate(Relation rel, BlockNumber nblocks) @@ -1315,37 +1315,29 @@ RelationTruncate(Relation rel, BlockNumber nblocks) } /* --------------------------------------------------------------------- - * DropRelationBuffers - * - * This function removes all the buffered pages for a relation - * from the buffer pool. Dirty pages are simply dropped, without - * bothering to write them out first. This is NOT rollback-able, - * and so should be used only with extreme caution! - * - * There is no particularly good reason why this doesn't have a - * firstDelBlock parameter, except that current callers don't need it. - * - * We assume that the caller holds an exclusive lock on the relation, - * which should assure that no new buffers will be acquired for the rel - * meanwhile. - * -------------------------------------------------------------------- - */ -void -DropRelationBuffers(Relation rel) -{ - DropRelFileNodeBuffers(rel->rd_node, rel->rd_istemp, 0); -} - -/* --------------------------------------------------------------------- * DropRelFileNodeBuffers * - * This is the same as DropRelationBuffers, except that the target - * relation is specified by RelFileNode and temp status, and one - * may specify the first block to drop. + * This function removes from the buffer pool all the pages of the + * specified relation that have block numbers >= firstDelBlock. + * (In particular, with firstDelBlock = 0, all pages are removed.) + * Dirty pages are simply dropped, without bothering to write them + * out first. Therefore, this is NOT rollback-able, and so should be + * used only with extreme caution! + * + * Currently, this is called only from smgr.c when the underlying file + * is about to be deleted or truncated (firstDelBlock is needed for + * the truncation case). The data in the affected pages would therefore + * be deleted momentarily anyway, and there is no point in writing it. + * It is the responsibility of higher-level code to ensure that the + * deletion or truncation does not lose any data that could be needed + * later. It is also the responsibility of higher-level code to ensure + * that no other process could be trying to load more pages of the + * relation into buffers. * - * This is NOT rollback-able. One legitimate use is to clear the - * buffer cache of buffers for a relation that is being deleted - * during transaction abort. + * XXX currently it sequentially searches the buffer pool, should be + * changed to more clever ways of searching. However, this routine + * is used only in code paths that aren't very performance-critical, + * and we shouldn't slow down the hot paths to make it faster ... * -------------------------------------------------------------------- */ void @@ -1398,7 +1390,7 @@ DropRelFileNodeBuffers(RelFileNode rnode, bool istemp, * bothering to write them out first. This is used when we destroy a * database, to avoid trying to flush data to disk when the directory * tree no longer exists. Implementation is pretty similar to - * DropRelationBuffers() which is for destroying just one relation. + * DropRelFileNodeBuffers() which is for destroying just one relation. * -------------------------------------------------------------------- */ void @@ -1480,44 +1472,24 @@ PrintPinnedBufs(void) /* --------------------------------------------------------------------- * FlushRelationBuffers * - * This function writes all dirty pages of a relation out to disk. - * Furthermore, pages that have blocknumber >= firstDelBlock are - * actually removed from the buffer pool. - * - * This is called by DROP TABLE to clear buffers for the relation - * from the buffer pool. Note that we must write dirty buffers, - * rather than just dropping the changes, because our transaction - * might abort later on; we want to roll back safely in that case. - * - * This is also called by VACUUM before truncating the relation to the - * given number of blocks. It might seem unnecessary for VACUUM to - * write dirty pages before firstDelBlock, since VACUUM should already - * have committed its changes. However, it is possible for there still - * to be dirty pages: if some page had unwritten on-row tuple status - * updates from a prior transaction, and VACUUM had no additional - * changes to make to that page, then VACUUM won't have written it. - * This is harmless in most cases but will break pg_upgrade, which - * relies on VACUUM to ensure that *all* tuples have correct on-row - * status. So, we check and flush all dirty pages of the rel - * regardless of block number. - * - * In all cases, the caller should be holding AccessExclusiveLock on - * the target relation to ensure that no other backend is busy reading - * more blocks of the relation (or might do so before we commit). - * This should also ensure that no one is busy dirtying these blocks. - * - * Formerly, we considered it an error condition if we found dirty - * buffers here. However, since BufferSync no longer forces out all - * dirty buffers at every xact commit, it's possible for dirty buffers - * to still be present in the cache due to failure of an earlier - * transaction. So, must flush dirty buffers without complaint. + * This function writes all dirty pages of a relation out to disk + * (or more accurately, out to kernel disk buffers), ensuring that the + * kernel has an up-to-date view of the relation. + * + * Generally, the caller should be holding AccessExclusiveLock on the + * target relation to ensure that no other backend is busy dirtying + * more blocks of the relation; the effects can't be expected to last + * after the lock is released. * * XXX currently it sequentially searches the buffer pool, should be - * changed to more clever ways of searching. + * changed to more clever ways of searching. This routine is not + * used in any performance-critical code paths, so it's not worth + * adding additional overhead to normal paths to make it go faster; + * but see also DropRelFileNodeBuffers. * -------------------------------------------------------------------- */ void -FlushRelationBuffers(Relation rel, BlockNumber firstDelBlock) +FlushRelationBuffers(Relation rel) { int i; BufferDesc *bufHdr; @@ -1530,38 +1502,26 @@ FlushRelationBuffers(Relation rel, BlockNumber firstDelBlock) for (i = 0; i < NLocBuffer; i++) { bufHdr = &LocalBufferDescriptors[i]; - if (RelFileNodeEquals(bufHdr->tag.rnode, rel->rd_node)) + if (RelFileNodeEquals(bufHdr->tag.rnode, rel->rd_node) && + (bufHdr->flags & BM_VALID) && (bufHdr->flags & BM_DIRTY)) { - if ((bufHdr->flags & BM_VALID) && (bufHdr->flags & BM_DIRTY)) - { - ErrorContextCallback errcontext; + ErrorContextCallback errcontext; - /* Setup error traceback support for ereport() */ - errcontext.callback = buffer_write_error_callback; - errcontext.arg = bufHdr; - errcontext.previous = error_context_stack; - error_context_stack = &errcontext; + /* Setup error traceback support for ereport() */ + errcontext.callback = buffer_write_error_callback; + errcontext.arg = bufHdr; + errcontext.previous = error_context_stack; + error_context_stack = &errcontext; - smgrwrite(rel->rd_smgr, - bufHdr->tag.blockNum, - (char *) LocalBufHdrGetBlock(bufHdr), - true); + smgrwrite(rel->rd_smgr, + bufHdr->tag.blockNum, + (char *) LocalBufHdrGetBlock(bufHdr), + true); - bufHdr->flags &= ~(BM_DIRTY | BM_JUST_DIRTIED); + bufHdr->flags &= ~(BM_DIRTY | BM_JUST_DIRTIED); - /* Pop the error context stack */ - error_context_stack = errcontext.previous; - } - if (LocalRefCount[i] > 0) - elog(ERROR, "FlushRelationBuffers(\"%s\" (local), %u): block %u is referenced (%d)", - RelationGetRelationName(rel), firstDelBlock, - bufHdr->tag.blockNum, LocalRefCount[i]); - if (bufHdr->tag.blockNum >= firstDelBlock) - { - CLEAR_BUFFERTAG(bufHdr->tag); - bufHdr->flags = 0; - bufHdr->usage_count = 0; - } + /* Pop the error context stack */ + error_context_stack = errcontext.previous; } } @@ -1574,33 +1534,15 @@ FlushRelationBuffers(Relation rel, BlockNumber firstDelBlock) for (i = 0; i < NBuffers; i++) { bufHdr = &BufferDescriptors[i]; - recheck: LockBufHdr(bufHdr); - if (RelFileNodeEquals(bufHdr->tag.rnode, rel->rd_node)) + if (RelFileNodeEquals(bufHdr->tag.rnode, rel->rd_node) && + (bufHdr->flags & BM_VALID) && (bufHdr->flags & BM_DIRTY)) { - if ((bufHdr->flags & BM_VALID) && (bufHdr->flags & BM_DIRTY)) - { - PinBuffer_Locked(bufHdr); - LWLockAcquire(bufHdr->content_lock, LW_SHARED); - FlushBuffer(bufHdr, rel->rd_smgr); - LWLockRelease(bufHdr->content_lock); - UnpinBuffer(bufHdr, true, false /* no freelist change */ ); - /* - * As soon as we unpin, it's possible for someone to take - * the buffer away from us; so loop back to re-lock and - * re-check if it still belongs to the target relation. - */ - goto recheck; - } - /* - * Even though it's not dirty, it could still be pinned because - * TerminateIO and UnpinBuffer are separate actions. Hence, - * we can't error out on nonzero reference count here. - */ - if (bufHdr->tag.blockNum >= firstDelBlock) - InvalidateBuffer(bufHdr); /* releases spinlock */ - else - UnlockBufHdr(bufHdr); + PinBuffer_Locked(bufHdr); + LWLockAcquire(bufHdr->content_lock, LW_SHARED); + FlushBuffer(bufHdr, rel->rd_smgr); + LWLockRelease(bufHdr->content_lock); + UnpinBuffer(bufHdr, true, false /* no freelist change */ ); } else UnlockBufHdr(bufHdr); |