summaryrefslogtreecommitdiff
path: root/contrib
diff options
context:
space:
mode:
authorAndres Freund <andres@anarazel.de>2025-11-06 16:42:10 -0500
committerAndres Freund <andres@anarazel.de>2025-11-06 16:42:10 -0500
commitc75ebc657ffce8dab76471da31aafb79fbe3fda2 (patch)
treee07e55c0ac011558e128ad03f69e4747a066791f /contrib
parent448b6a4173d007c75ba30fed666b60f0bd1afe8b (diff)
bufmgr: Allow some buffer state modifications while holding header lock
Until now BufferDesc.state was not allowed to be modified while the buffer header spinlock was held. This meant that operations like unpinning buffers needed to use a CAS loop, waiting for the buffer header spinlock to be released before updating. The benefit of that restriction is that it allowed us to unlock the buffer header spinlock with just a write barrier and an unlocked write (instead of a full atomic operation). That was important to avoid regressions in 48354581a49c. However, since then the hottest buffer header spinlock uses have been replaced with atomic operations (in particular, the most common use of PinBuffer_Locked(), in GetVictimBuffer() (formerly in BufferAlloc()), has been removed in 5e899859287). This change will allow, in a subsequent commit, to release buffer pins with a single atomic-sub operation. This previously was not possible while such operations were not allowed while the buffer header spinlock was held, as an atomic-sub would not have allowed a race-free check for the buffer header lock being held. Using atomic-sub to unpin buffers is a nice scalability win, however it is not the primary motivation for this change (although it would be sufficient). The primary motivation is that we would like to merge the buffer content lock into BufferDesc.state, which will result in more frequent changes of the state variable, which in some situations can cause a performance regression, due to an increased CAS failure rate when unpinning buffers. The regression entirely vanishes when using atomic-sub. Naively implementing this would require putting CAS loops in every place modifying the buffer state while holding the buffer header lock. To avoid that, introduce UnlockBufHdrExt(), which can set/add flags as well as the refcount, together with releasing the lock. Reviewed-by: Robert Haas <robertmhaas@gmail.com> Reviewed-by: Matthias van de Meent <boekewurm+postgres@gmail.com> Discussion: https://postgr.es/m/fvfmkr5kk4nyex56ejgxj3uzi63isfxovp2biecb4bspbjrze7@az2pljabhnff
Diffstat (limited to 'contrib')
-rw-r--r--contrib/pg_buffercache/pg_buffercache_pages.c7
-rw-r--r--contrib/pg_prewarm/autoprewarm.c2
2 files changed, 4 insertions, 5 deletions
diff --git a/contrib/pg_buffercache/pg_buffercache_pages.c b/contrib/pg_buffercache/pg_buffercache_pages.c
index 3df04c98959..ab790533ff6 100644
--- a/contrib/pg_buffercache/pg_buffercache_pages.c
+++ b/contrib/pg_buffercache/pg_buffercache_pages.c
@@ -220,7 +220,7 @@ pg_buffercache_pages(PG_FUNCTION_ARGS)
else
fctx->record[i].isvalid = false;
- UnlockBufHdr(bufHdr, buf_state);
+ UnlockBufHdr(bufHdr);
}
}
@@ -460,7 +460,6 @@ pg_buffercache_numa_pages(PG_FUNCTION_ARGS)
{
char *buffptr = (char *) BufferGetBlock(i + 1);
BufferDesc *bufHdr;
- uint32 buf_state;
uint32 bufferid;
int32 page_num;
char *startptr_buff,
@@ -471,9 +470,9 @@ pg_buffercache_numa_pages(PG_FUNCTION_ARGS)
bufHdr = GetBufferDescriptor(i);
/* Lock each buffer header before inspecting. */
- buf_state = LockBufHdr(bufHdr);
+ LockBufHdr(bufHdr);
bufferid = BufferDescriptorGetBuffer(bufHdr);
- UnlockBufHdr(bufHdr, buf_state);
+ UnlockBufHdr(bufHdr);
/* start of the first page of this buffer */
startptr_buff = (char *) TYPEALIGN_DOWN(os_page_size, buffptr);
diff --git a/contrib/pg_prewarm/autoprewarm.c b/contrib/pg_prewarm/autoprewarm.c
index 8b68dafc261..5ba1240d51f 100644
--- a/contrib/pg_prewarm/autoprewarm.c
+++ b/contrib/pg_prewarm/autoprewarm.c
@@ -730,7 +730,7 @@ apw_dump_now(bool is_bgworker, bool dump_unlogged)
++num_blocks;
}
- UnlockBufHdr(bufHdr, buf_state);
+ UnlockBufHdr(bufHdr);
}
snprintf(transient_dump_file_path, MAXPGPATH, "%s.tmp", AUTOPREWARM_FILE);