summaryrefslogtreecommitdiff
path: root/src/backend/access/transam/subtrans.c
diff options
context:
space:
mode:
authorAlvaro Herrera <alvherre@alvh.no-ip.org>2024-02-28 17:05:31 +0100
committerAlvaro Herrera <alvherre@alvh.no-ip.org>2024-02-28 17:05:31 +0100
commit53c2a97a92665be6bd7d70bd62ae6158fe4db96e (patch)
tree88d853f098fe925024b82e72f2beea523e24cbe6 /src/backend/access/transam/subtrans.c
parent1c1eec0f2d88b7e823af959103b2100da493caa9 (diff)
Improve performance of subsystems on top of SLRU
More precisely, what we do here is make the SLRU cache sizes configurable with new GUCs, so that sites with high concurrency and big ranges of transactions in flight (resp. multixacts/subtransactions) can benefit from bigger caches. In order for this to work with good performance, two additional changes are made: 1. the cache is divided in "banks" (to borrow terminology from CPU caches), and algorithms such as eviction buffer search only affect one specific bank. This forestalls the problem that linear searching for a specific buffer across the whole cache takes too long: we only have to search the specific bank, whose size is small. This work is authored by Andrey Borodin. 2. Change the locking regime for the SLRU banks, so that each bank uses a separate LWLock. This allows for increased scalability. This work is authored by Dilip Kumar. (A part of this was previously committed as d172b717c6f4.) Special care is taken so that the algorithms that can potentially traverse more than one bank release one bank's lock before acquiring the next. This should happen rarely, but particularly clog.c's group commit feature needed code adjustment to cope with this. I (Álvaro) also added lots of comments to make sure the design is sound. The new GUCs match the names introduced by bcdfa5f2e2f2 in the pg_stat_slru view. The default values for these parameters are similar to the previous sizes of each SLRU. commit_ts, clog and subtrans accept value 0, which means to adjust by dividing shared_buffers by 512 (so 2MB for every 1GB of shared_buffers), with a cap of 8MB. (A new slru.c function SimpleLruAutotuneBuffers() was added to support this.) The cap was previously 1MB for clog, so for sites with more than 512MB of shared memory the total memory used increases, which is likely a good tradeoff. However, other SLRUs (notably multixact ones) retain smaller sizes and don't support a configured value of 0. These values based on shared_buffers may need to be revisited, but that's an easy change. There was some resistance to adding these new GUCs: it would be better to adjust to memory pressure automatically somehow, for example by stealing memory from shared_buffers (where the caches can grow and shrink naturally). However, doing that seems to be a much larger project and one which has made virtually no progress in several years, and because this is such a pain point for so many users, here we take the pragmatic approach. Author: Andrey Borodin <x4mmm@yandex-team.ru> Author: Dilip Kumar <dilipbalaut@gmail.com> Reviewed-by: Amul Sul, Gilles Darold, Anastasia Lubennikova, Ivan Lazarev, Robert Haas, Thomas Munro, Tomas Vondra, Yura Sokolov, Васильев Дмитрий (Dmitry Vasiliev). Discussion: https://postgr.es/m/2BEC2B3F-9B61-4C1D-9FB5-5FAB0F05EF86@yandex-team.ru Discussion: https://postgr.es/m/CAFiTN-vzDvNz=ExGXz6gdyjtzGixKSqs0mKHMmaQ8sOSEFZ33A@mail.gmail.com
Diffstat (limited to 'src/backend/access/transam/subtrans.c')
-rw-r--r--src/backend/access/transam/subtrans.c110
1 files changed, 94 insertions, 16 deletions
diff --git a/src/backend/access/transam/subtrans.c b/src/backend/access/transam/subtrans.c
index 6aa47af43e2..dc9566fb51b 100644
--- a/src/backend/access/transam/subtrans.c
+++ b/src/backend/access/transam/subtrans.c
@@ -31,7 +31,9 @@
#include "access/slru.h"
#include "access/subtrans.h"
#include "access/transam.h"
+#include "miscadmin.h"
#include "pg_trace.h"
+#include "utils/guc_hooks.h"
#include "utils/snapmgr.h"
@@ -85,12 +87,14 @@ SubTransSetParent(TransactionId xid, TransactionId parent)
int64 pageno = TransactionIdToPage(xid);
int entryno = TransactionIdToEntry(xid);
int slotno;
+ LWLock *lock;
TransactionId *ptr;
Assert(TransactionIdIsValid(parent));
Assert(TransactionIdFollows(xid, parent));
- LWLockAcquire(SubtransSLRULock, LW_EXCLUSIVE);
+ lock = SimpleLruGetBankLock(SubTransCtl, pageno);
+ LWLockAcquire(lock, LW_EXCLUSIVE);
slotno = SimpleLruReadPage(SubTransCtl, pageno, true, xid);
ptr = (TransactionId *) SubTransCtl->shared->page_buffer[slotno];
@@ -108,7 +112,7 @@ SubTransSetParent(TransactionId xid, TransactionId parent)
SubTransCtl->shared->page_dirty[slotno] = true;
}
- LWLockRelease(SubtransSLRULock);
+ LWLockRelease(lock);
}
/*
@@ -138,7 +142,7 @@ SubTransGetParent(TransactionId xid)
parent = *ptr;
- LWLockRelease(SubtransSLRULock);
+ LWLockRelease(SimpleLruGetBankLock(SubTransCtl, pageno));
return parent;
}
@@ -186,6 +190,22 @@ SubTransGetTopmostTransaction(TransactionId xid)
return previousXid;
}
+/*
+ * Number of shared SUBTRANS buffers.
+ *
+ * If asked to autotune, use 2MB for every 1GB of shared buffers, up to 8MB.
+ * Otherwise just cap the configured amount to be between 16 and the maximum
+ * allowed.
+ */
+static int
+SUBTRANSShmemBuffers(void)
+{
+ /* auto-tune based on shared buffers */
+ if (subtransaction_buffers == 0)
+ return SimpleLruAutotuneBuffers(512, 1024);
+
+ return Min(Max(16, subtransaction_buffers), SLRU_MAX_ALLOWED_BUFFERS);
+}
/*
* Initialization of shared memory for SUBTRANS
@@ -193,21 +213,50 @@ SubTransGetTopmostTransaction(TransactionId xid)
Size
SUBTRANSShmemSize(void)
{
- return SimpleLruShmemSize(NUM_SUBTRANS_BUFFERS, 0);
+ return SimpleLruShmemSize(SUBTRANSShmemBuffers(), 0);
}
void
SUBTRANSShmemInit(void)
{
+ /* If auto-tuning is requested, now is the time to do it */
+ if (subtransaction_buffers == 0)
+ {
+ char buf[32];
+
+ snprintf(buf, sizeof(buf), "%d", SUBTRANSShmemBuffers());
+ SetConfigOption("subtransaction_buffers", buf, PGC_POSTMASTER,
+ PGC_S_DYNAMIC_DEFAULT);
+
+ /*
+ * We prefer to report this value's source as PGC_S_DYNAMIC_DEFAULT.
+ * However, if the DBA explicitly set subtransaction_buffers = 0 in
+ * the config file, then PGC_S_DYNAMIC_DEFAULT will fail to override
+ * that and we must force the matter with PGC_S_OVERRIDE.
+ */
+ if (subtransaction_buffers == 0) /* failed to apply it? */
+ SetConfigOption("subtransaction_buffers", buf, PGC_POSTMASTER,
+ PGC_S_OVERRIDE);
+ }
+ Assert(subtransaction_buffers != 0);
+
SubTransCtl->PagePrecedes = SubTransPagePrecedes;
- SimpleLruInit(SubTransCtl, "subtransaction", NUM_SUBTRANS_BUFFERS, 0,
- SubtransSLRULock, "pg_subtrans",
- LWTRANCHE_SUBTRANS_BUFFER, SYNC_HANDLER_NONE,
- false);
+ SimpleLruInit(SubTransCtl, "subtransaction", SUBTRANSShmemBuffers(), 0,
+ "pg_subtrans", LWTRANCHE_SUBTRANS_BUFFER,
+ LWTRANCHE_SUBTRANS_SLRU, SYNC_HANDLER_NONE, false);
SlruPagePrecedesUnitTests(SubTransCtl, SUBTRANS_XACTS_PER_PAGE);
}
/*
+ * GUC check_hook for subtransaction_buffers
+ */
+bool
+check_subtrans_buffers(int *newval, void **extra, GucSource source)
+{
+ return check_slru_buffers("subtransaction_buffers", newval);
+}
+
+/*
* This func must be called ONCE on system install. It creates
* the initial SUBTRANS segment. (The SUBTRANS directory is assumed to
* have been created by the initdb shell script, and SUBTRANSShmemInit
@@ -221,8 +270,9 @@ void
BootStrapSUBTRANS(void)
{
int slotno;
+ LWLock *lock = SimpleLruGetBankLock(SubTransCtl, 0);
- LWLockAcquire(SubtransSLRULock, LW_EXCLUSIVE);
+ LWLockAcquire(lock, LW_EXCLUSIVE);
/* Create and zero the first page of the subtrans log */
slotno = ZeroSUBTRANSPage(0);
@@ -231,7 +281,7 @@ BootStrapSUBTRANS(void)
SimpleLruWritePage(SubTransCtl, slotno);
Assert(!SubTransCtl->shared->page_dirty[slotno]);
- LWLockRelease(SubtransSLRULock);
+ LWLockRelease(lock);
}
/*
@@ -261,6 +311,8 @@ StartupSUBTRANS(TransactionId oldestActiveXID)
FullTransactionId nextXid;
int64 startPage;
int64 endPage;
+ LWLock *prevlock;
+ LWLock *lock;
/*
* Since we don't expect pg_subtrans to be valid across crashes, we
@@ -268,23 +320,47 @@ StartupSUBTRANS(TransactionId oldestActiveXID)
* Whenever we advance into a new page, ExtendSUBTRANS will likewise zero
* the new page without regard to whatever was previously on disk.
*/
- LWLockAcquire(SubtransSLRULock, LW_EXCLUSIVE);
-
startPage = TransactionIdToPage(oldestActiveXID);
nextXid = TransamVariables->nextXid;
endPage = TransactionIdToPage(XidFromFullTransactionId(nextXid));
+ prevlock = SimpleLruGetBankLock(SubTransCtl, startPage);
+ LWLockAcquire(prevlock, LW_EXCLUSIVE);
while (startPage != endPage)
{
+ lock = SimpleLruGetBankLock(SubTransCtl, startPage);
+
+ /*
+ * Check if we need to acquire the lock on the new bank then release
+ * the lock on the old bank and acquire on the new bank.
+ */
+ if (prevlock != lock)
+ {
+ LWLockRelease(prevlock);
+ LWLockAcquire(lock, LW_EXCLUSIVE);
+ prevlock = lock;
+ }
+
(void) ZeroSUBTRANSPage(startPage);
startPage++;
/* must account for wraparound */
if (startPage > TransactionIdToPage(MaxTransactionId))
startPage = 0;
}
- (void) ZeroSUBTRANSPage(startPage);
- LWLockRelease(SubtransSLRULock);
+ lock = SimpleLruGetBankLock(SubTransCtl, startPage);
+
+ /*
+ * Check if we need to acquire the lock on the new bank then release the
+ * lock on the old bank and acquire on the new bank.
+ */
+ if (prevlock != lock)
+ {
+ LWLockRelease(prevlock);
+ LWLockAcquire(lock, LW_EXCLUSIVE);
+ }
+ (void) ZeroSUBTRANSPage(startPage);
+ LWLockRelease(lock);
}
/*
@@ -318,6 +394,7 @@ void
ExtendSUBTRANS(TransactionId newestXact)
{
int64 pageno;
+ LWLock *lock;
/*
* No work except at first XID of a page. But beware: just after
@@ -329,12 +406,13 @@ ExtendSUBTRANS(TransactionId newestXact)
pageno = TransactionIdToPage(newestXact);
- LWLockAcquire(SubtransSLRULock, LW_EXCLUSIVE);
+ lock = SimpleLruGetBankLock(SubTransCtl, pageno);
+ LWLockAcquire(lock, LW_EXCLUSIVE);
/* Zero the page */
ZeroSUBTRANSPage(pageno);
- LWLockRelease(SubtransSLRULock);
+ LWLockRelease(lock);
}