Improve LWLock scalability.

The old LWLock implementation had the problem that concurrent lock acquisitions required exclusively acquiring a spinlock. Often that could lead to acquirers waiting behind the spinlock, even if the actual LWLock was free. The new implementation doesn't acquire the spinlock when acquiring the lock itself. Instead the new atomic operations are used to atomically manipulate the state. Only the waitqueue, used solely in the slow path, is still protected by the spinlock. Check lwlock.c's header for an explanation about the used algorithm. For some common workloads on larger machines this can yield significant performance improvements. Particularly in read mostly workloads. Reviewed-By: Amit Kapila and Robert Haas Author: Andres Freund Discussion: 20130926225545.GB26663@awork2.anarazel.de
author: Andres Freund <andres@anarazel.de> 2014-12-25 17:24:30 +0100
committer: Andres Freund <andres@anarazel.de> 2014-12-25 17:24:30 +0100
commit: ab5194e6f617a9a9e7aadb3dd1cee948a42d0755 (patch)
tree: f4f4721d152a393168bba9448726421d8ffc6f8e /src/include/storage
parent: 7882c3b0b95640e361f1533fe0f2d02e4e5d8610 (diff)
1 files changed, 15 insertions, 9 deletions
diff --git a/src/include/storage/lwlock.h b/src/include/storage/lwlock.h
index c84970a7add..f15a9517144 100644
--- a/src/include/storage/lwlock.h
+++ b/src/include/storage/lwlock.h
@@ -16,6 +16,7 @@
 
 #include "lib/ilist.h"
 #include "storage/s_lock.h"
+#include "port/atomics.h"
 
 struct PGPROC;
 
@@ -47,11 +48,16 @@ typedef struct LWLockTranche
 typedef struct LWLock
 {
 	slock_t		mutex;			/* Protects LWLock and queue of PGPROCs */
-	bool		releaseOK;		/* T if ok to release waiters */
-	char		exclusive;		/* # of exclusive holders (0 or 1) */
-	int			shared;			/* # of shared holders (0..MaxBackends) */
-	int			tranche;		/* tranche ID */
+	uint16		tranche;		/* tranche ID */
+
+	pg_atomic_uint32 state;		/* state of exlusive/nonexclusive lockers */
+#ifdef LOCK_DEBUG
+	pg_atomic_uint32 nwaiters;	/* number of waiters */
+#endif
 	dlist_head	waiters;		/* list of waiting PGPROCs */
+#ifdef LOCK_DEBUG
+	struct PGPROC *owner;		/* last exlusive owner of the lock */
+#endif
 } LWLock;
 
 /*
@@ -66,11 +72,11 @@ typedef struct LWLock
  * (Of course, we have to also ensure that the array start address is suitably
  * aligned.)
  *
- * Even on a 32-bit platform, an lwlock will be more than 16 bytes, because
- * it contains 2 integers and 2 pointers, plus other stuff.  It should fit
- * into 32 bytes, though, unless slock_t is really big.  On a 64-bit platform,
- * it should fit into 32 bytes unless slock_t is larger than 4 bytes.  We
- * allow for that just in case.
+ * On a 32-bit platforms a LWLock will these days fit into 16 bytes, but since
+ * that didn't use to be the case and cramming more lwlocks into a cacheline
+ * might be detrimental performancewise we still use 32 byte alignment
+ * there. So, both on 32 and 64 bit platforms, it should fit into 32 bytes
+ * unless slock_t is really big.  We allow for that just in case.
  */
 #define LWLOCK_PADDED_SIZE	(sizeof(LWLock) <= 32 ? 32 : 64)
author	Andres Freund <andres@anarazel.de>	2014-12-25 17:24:30 +0100
committer	Andres Freund <andres@anarazel.de>	2014-12-25 17:24:30 +0100
commit	ab5194e6f617a9a9e7aadb3dd1cee948a42d0755 (patch)
tree	f4f4721d152a393168bba9448726421d8ffc6f8e /src/include/storage
parent	7882c3b0b95640e361f1533fe0f2d02e4e5d8610 (diff)