Divide the lock manager's shared state into 'partitions', so as to

reduce contention for the former single LockMgrLock. Per my recent proposal. I set it up for 16 partitions, but on a pgbench test this gives only a marginal further improvement over 4 partitions --- we need to test more scenarios to choose the number of partitions.
author: Tom Lane <tgl@sss.pgh.pa.us> 2005-12-11 21:02:18 +0000
committer: Tom Lane <tgl@sss.pgh.pa.us> 2005-12-11 21:02:18 +0000
commit: ec0baf949ecdee0bf8d8e60cc8dba0137aac8d19 (patch)
tree: b435a97a4e87c31a6b644ac2d9d1f433de487588 /src
parent: be8100d64ec93ccd8160b37379ba189aab4d0ef1 (diff)
10 files changed, 627 insertions, 399 deletions
diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c
index ffdee8388b3..0898df62337 100644
--- a/src/backend/access/transam/twophase.c
+++ b/src/backend/access/transam/twophase.c
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *		$PostgreSQL: pgsql/src/backend/access/transam/twophase.c,v 1.17 2005/11/22 18:17:07 momjian Exp $
+ *		$PostgreSQL: pgsql/src/backend/access/transam/twophase.c,v 1.18 2005/12/11 21:02:17 tgl Exp $
  *
  * NOTES
  *		Each global transaction is associated with a global transaction
@@ -284,7 +284,8 @@ MarkAsPreparing(TransactionId xid, const char *gid,
 	gxact->proc.lwWaitLink = NULL;
 	gxact->proc.waitLock = NULL;
 	gxact->proc.waitProcLock = NULL;
-	SHMQueueInit(&(gxact->proc.procLocks));
+	for (i = 0; i < NUM_LOCK_PARTITIONS; i++)
+		SHMQueueInit(&(gxact->proc.myProcLocks[i]));
 	/* subxid data must be filled later by GXactLoadSubxactData */
 	gxact->proc.subxids.overflowed = false;
 	gxact->proc.subxids.nxids = 0;
diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c
index 7ac8084f6a3..cafadeb9054 100644
--- a/src/backend/storage/ipc/procarray.c
+++ b/src/backend/storage/ipc/procarray.c
@@ -14,8 +14,8 @@
  *
  * The process array now also includes PGPROC structures representing
  * prepared transactions.  The xid and subxids fields of these are valid,
- * as is the procLocks list.  They can be distinguished from regular backend
- * PGPROCs at need by checking for pid == 0.
+ * as are the myProcLocks lists.  They can be distinguished from regular
+ * backend PGPROCs at need by checking for pid == 0.
  *
  *
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
@@ -23,7 +23,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/storage/ipc/procarray.c,v 1.8 2005/11/22 18:17:20 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/storage/ipc/procarray.c,v 1.9 2005/12/11 21:02:18 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
diff --git a/src/backend/storage/lmgr/README b/src/backend/storage/lmgr/README
index 25820f4b73d..fdda5bf82a4 100644
--- a/src/backend/storage/lmgr/README
+++ b/src/backend/storage/lmgr/README
@@ -1,4 +1,4 @@
-$PostgreSQL: pgsql/src/backend/storage/lmgr/README,v 1.18 2005/12/09 01:22:04 tgl Exp $
+$PostgreSQL: pgsql/src/backend/storage/lmgr/README,v 1.19 2005/12/11 21:02:18 tgl Exp $
 
 
 LOCKING OVERVIEW
@@ -50,9 +50,12 @@ LOCK DATA STRUCTURES
 Lock methods describe the overall locking behavior.  Currently there are
 two lock methods: DEFAULT and USER.  (USER locks are non-blocking.)
 
-Lock modes describe the type of the lock (read/write or shared/exclusive). 
-See src/tools/backend/index.html and src/include/storage/lock.h for more
-details.
+Lock modes describe the type of the lock (read/write or shared/exclusive).
+In principle, each lock method can have its own set of lock modes with
+different conflict rules, but currently DEFAULT and USER methods use
+identical lock mode sets.  See src/tools/backend/index.html and
+src/include/storage/lock.h for more details.  (Lock modes are also called
+lock types in some places in the code and documentation.)
 
 There are two fundamental lock structures in shared memory: the
 per-lockable-object LOCK struct, and the per-lock-and-requestor PROCLOCK
@@ -67,7 +70,7 @@ be made per lockable object/lock mode/backend.  Internally to a backend,
 however, the same lock may be requested and perhaps released multiple times
 in a transaction, and it can also be held both transactionally and session-
 wide.  The internal request counts are held in LOCALLOCK so that the shared
-LockMgrLock need not be obtained to alter them.
+data structures need not be accessed to alter them.
 
 ---------------------------------------------------------------------------
 
@@ -103,10 +106,10 @@ procLocks -
     be waiting for more!).
 
 waitProcs -
-    This is a shared memory queue of all process structures corresponding to
-    a backend that is waiting (sleeping) until another backend releases this
+    This is a shared memory queue of all PGPROC structures corresponding to
+    backends that are waiting (sleeping) until another backend releases this
     lock.  The process structure holds the information needed to determine
-    if it should be woken up when this lock is released.
+    if it should be woken up when the lock is released.
 
 nRequested -
     Keeps a count of how many times this lock has been attempted to be
@@ -131,12 +134,12 @@ nGranted -
 granted -
     Keeps count of how many locks of each type are currently held.  Once again
     only elements 1 through MAX_LOCKMODES-1 are used (0 is not).  Also, like
-    requested, summing the values of granted should total to the value
+    requested[], summing the values of granted[] should total to the value
     of nGranted.
 
 We should always have 0 <= nGranted <= nRequested, and
-0 <= granted[i] <= requested[i] for each i.  If the request counts go to
-zero, the lock object is no longer needed and can be freed.
+0 <= granted[i] <= requested[i] for each i.  When all the request counts
+go to zero, the LOCK object is no longer needed and can be freed.
 
 ---------------------------------------------------------------------------
 
@@ -154,15 +157,16 @@ tag -
         SHMEM offset of PGPROC of backend process that owns this PROCLOCK.
 
 holdMask -
-    A bitmask for the lock types successfully acquired by this PROCLOCK.
+    A bitmask for the lock modes successfully acquired by this PROCLOCK.
     This should be a subset of the LOCK object's grantMask, and also a
-    subset of the PGPROC object's heldLocks mask.
+    subset of the PGPROC object's heldLocks mask (if the PGPROC is
+    currently waiting for another lock mode on this lock).
 
 releaseMask -
-    A bitmask for the lock types due to be released during LockReleaseAll.
+    A bitmask for the lock modes due to be released during LockReleaseAll.
     This must be a subset of the holdMask.  Note that it is modified without
-    taking the LockMgrLock, and therefore it is unsafe for any backend except
-    the one owning the PROCLOCK to examine/change it.
+    taking the partition LWLock, and therefore it is unsafe for any
+    backend except the one owning the PROCLOCK to examine/change it.
 
 lockLink -
     List link for shared memory queue of all the PROCLOCK objects for the
@@ -174,7 +178,60 @@ procLink -
 
 ---------------------------------------------------------------------------
 
-The deadlock detection algorithm:
+
+LOCK MANAGER INTERNAL LOCKING
+
+Before PostgreSQL 8.2, all of the shared-memory data structures used by
+the lock manager were protected by a single LWLock, the LockMgrLock;
+any operation involving these data structures had to exclusively lock
+LockMgrLock.  Not too surprisingly, this became a contention bottleneck.
+To reduce contention, the lock manager's data structures have been split
+into multiple "partitions", each protected by an independent LWLock.
+Most operations only need to lock the single partition they are working in.
+Here are the details:
+
+* Each possible lock is assigned to one partition according to a hash of
+its LOCKTAG value (see LockTagToPartition()).  The partition's LWLock is
+considered to protect all the LOCK objects of that partition as well as
+their subsidiary PROCLOCKs.  The shared-memory hash tables for LOCKs and
+PROCLOCKs are divided into separate hash tables for each partition, and
+operations on each hash table are likewise protected by the partition
+lock.
+
+* Formerly, each PGPROC had a single list of PROCLOCKs belonging to it.
+This has now been split into per-partition lists, so that access to a
+particular PROCLOCK list can be protected by the associated partition's
+LWLock.  (This is not strictly necessary at the moment, because at this
+writing a PGPROC's PROCLOCK list is only accessed by the owning backend
+anyway.  But it seems forward-looking to maintain a convention for how
+other backends could access it.  In any case LockReleaseAll needs to be
+able to quickly determine which partition each LOCK belongs to, and
+for the currently contemplated number of partitions, this way takes less
+shared memory than explicitly storing a partition number in LOCK structs
+would require.)
+
+* The other lock-related fields of a PGPROC are only interesting when
+the PGPROC is waiting for a lock, so we consider that they are protected
+by the partition LWLock of the awaited lock.
+
+For normal lock acquisition and release, it is sufficient to lock the
+partition containing the desired lock.  Deadlock checking needs to touch
+multiple partitions in general; for simplicity, we just make it lock all
+the partitions in partition-number order.  (To prevent LWLock deadlock,
+we establish the rule that any backend needing to lock more than one
+partition at once must lock them in partition-number order.)  It's
+possible that deadlock checking could be done without touching every
+partition in typical cases, but since in a properly functioning system
+deadlock checking should not occur often enough to be performance-critical,
+trying to make this work does not seem a productive use of effort.
+
+A backend's internal LOCALLOCK hash table is not partitioned.  We do store
+the partition number in LOCALLOCK table entries, but this is a straight
+speed-for-space tradeoff: we could instead recalculate the partition
+number from the LOCKTAG when needed.
+
+
+THE DEADLOCK DETECTION ALGORITHM
 
 Since we allow user transactions to request locks in any order, deadlock
 is possible.  We use a deadlock detection/breaking algorithm that is
diff --git a/src/backend/storage/lmgr/deadlock.c b/src/backend/storage/lmgr/deadlock.c
index adbd373bb7f..e72ab00b5b0 100644
--- a/src/backend/storage/lmgr/deadlock.c
+++ b/src/backend/storage/lmgr/deadlock.c
@@ -12,7 +12,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/storage/lmgr/deadlock.c,v 1.37 2005/12/09 01:22:04 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/storage/lmgr/deadlock.c,v 1.38 2005/12/11 21:02:18 tgl Exp $
  *
  *	Interface:
  *
@@ -53,9 +53,9 @@ typedef struct
  * Information saved about each edge in a detected deadlock cycle.	This
  * is used to print a diagnostic message upon failure.
  *
- * Note: because we want to examine this info after releasing the LockMgrLock,
- * we can't just store LOCK and PGPROC pointers; we must extract out all the
- * info we want to be able to print.
+ * Note: because we want to examine this info after releasing the lock
+ * manager's partition locks, we can't just store LOCK and PGPROC pointers;
+ * we must extract out all the info we want to be able to print.
  */
 typedef struct
 {
@@ -188,19 +188,11 @@ InitDeadLockChecking(void)
  * deadlock.  If resolution is impossible, return TRUE --- the caller
  * is then expected to abort the given proc's transaction.
  *
- * We can't block on user locks, so no sense testing for deadlock
- * because there is no blocking, and no timer for the block.  So,
- * only look at regular locks.
- *
- * We must have already locked the master lock before being called.
- * NOTE: although the lockmethod structure appears to allow each lock
- * table to have a different masterLock, all locks that can block had
- * better use the same LWLock, else this code will not be adequately
- * interlocked!
+ * Caller must already have locked all partitions of the lock tables.
  *
  * On failure, deadlock details are recorded in deadlockDetails[] for
  * subsequent printing by DeadLockReport().  That activity is separate
- * because we don't want to do it while holding the master lock.
+ * because we don't want to do it while holding all those LWLocks.
  */
 bool
 DeadLockCheck(PGPROC *proc)
diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c
index 344d677cd2f..7f42b477cc6 100644
--- a/src/backend/storage/lmgr/lock.c
+++ b/src/backend/storage/lmgr/lock.c
@@ -1,14 +1,14 @@
 /*-------------------------------------------------------------------------
  *
  * lock.c
- *	  POSTGRES low-level lock mechanism
+ *	  POSTGRES primary lock mechanism
  *
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/storage/lmgr/lock.c,v 1.161 2005/12/09 01:22:04 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/storage/lmgr/lock.c,v 1.162 2005/12/11 21:02:18 tgl Exp $
  *
  * NOTES
  *	  A lock table is a shared memory hash table.  When
@@ -163,10 +163,13 @@ typedef struct TwoPhaseLockRecord
 
 
 /*
- * Links to hash tables containing lock state
+ * Pointers to hash tables containing lock state
+ *
+ * The LockMethodLockHash and LockMethodProcLockHash hash tables are in
+ * shared memory; LockMethodLocalHash is local to each backend.
  */
-static HTAB *LockMethodLockHash;
-static HTAB *LockMethodProcLockHash;
+static HTAB *LockMethodLockHash[NUM_LOCK_PARTITIONS];
+static HTAB *LockMethodProcLockHash[NUM_LOCK_PARTITIONS];
 static HTAB *LockMethodLocalHash;
 
 
@@ -255,16 +258,25 @@ PROCLOCK_PRINT(const char *where, const PROCLOCK *proclockP)
 
 static void RemoveLocalLock(LOCALLOCK *locallock);
 static void GrantLockLocal(LOCALLOCK *locallock, ResourceOwner owner);
-static void WaitOnLock(LOCKMETHODID lockmethodid, LOCALLOCK *locallock,
-		   ResourceOwner owner);
+static void WaitOnLock(LOCALLOCK *locallock, ResourceOwner owner);
 static bool UnGrantLock(LOCK *lock, LOCKMODE lockmode,
 			PROCLOCK *proclock, LockMethod lockMethodTable);
-static void CleanUpLock(LOCKMETHODID lockmethodid, LOCK *lock,
-			PROCLOCK *proclock, bool wakeupNeeded);
+static void CleanUpLock(LOCK *lock, PROCLOCK *proclock,
+			LockMethod lockMethodTable, int partition,
+			bool wakeupNeeded);
 
 
 /*
- * InitLocks -- Initialize the lock module's shared memory.
+ * InitLocks -- Initialize the lock manager's data structures.
+ *
+ * This is called from CreateSharedMemoryAndSemaphores(), which see for
+ * more comments.  In the normal postmaster case, the shared hash tables
+ * are created here, as well as a locallock hash table that will remain
+ * unused and empty in the postmaster itself.  Backends inherit the pointers
+ * to the shared tables via fork(), and also inherit an image of the locallock
+ * hash table, which they proceed to use.  In the EXEC_BACKEND case, each
+ * backend re-executes this code to obtain pointers to the already existing
+ * shared hash tables and to create its locallock hash table.
  */
 void
 InitLocks(void)
@@ -274,13 +286,18 @@ InitLocks(void)
 	int			hash_flags;
 	long		init_table_size,
 				max_table_size;
+	int			i;
 
-	/* Compute init/max size to request for lock hashtables */
+	/*
+	 * Compute init/max size to request for lock hashtables.  Note these
+	 * calculations must agree with LockShmemSize!
+	 */
 	max_table_size = NLOCKENTS();
+	max_table_size = (max_table_size - 1) / NUM_LOCK_PARTITIONS + 1;
 	init_table_size = max_table_size / 2;
 
 	/*
-	 * allocate a hash table for LOCK structs.	This is used to store
+	 * Allocate hash tables for LOCK structs.  These are used to store
 	 * per-locked-object information.
 	 */
 	MemSet(&info, 0, sizeof(info));
@@ -289,37 +306,45 @@ InitLocks(void)
 	info.hash = tag_hash;
 	hash_flags = (HASH_ELEM | HASH_FUNCTION);
 
-	sprintf(shmemName, "LOCK hash");
-	LockMethodLockHash = ShmemInitHash(shmemName,
-									   init_table_size,
-									   max_table_size,
-									   &info,
-									   hash_flags);
+	for (i = 0; i < NUM_LOCK_PARTITIONS; i++)
+	{
+		sprintf(shmemName, "LOCK hash %d", i);
+		LockMethodLockHash[i] = ShmemInitHash(shmemName,
+											  init_table_size,
+											  max_table_size,
+											  &info,
+											  hash_flags);
+		if (!LockMethodLockHash[i])
+			elog(FATAL, "could not initialize lock table \"%s\"", shmemName);
+	}
 
-	if (!LockMethodLockHash)
-		elog(FATAL, "could not initialize lock table \"%s\"", shmemName);
+	/* Assume an average of 2 holders per lock */
+	max_table_size *= 2;
+	init_table_size *= 2;
 
 	/*
-	 * allocate a hash table for PROCLOCK structs.	This is used to store
-	 * per-lock-holder information.
+	 * Allocate hash tables for PROCLOCK structs.  These are used to store
+	 * per-lock-per-holder information.
 	 */
 	info.keysize = sizeof(PROCLOCKTAG);
 	info.entrysize = sizeof(PROCLOCK);
 	info.hash = tag_hash;
 	hash_flags = (HASH_ELEM | HASH_FUNCTION);
 
-	sprintf(shmemName, "PROCLOCK hash");
-	LockMethodProcLockHash = ShmemInitHash(shmemName,
-										   init_table_size,
-										   max_table_size,
-										   &info,
-										   hash_flags);
-
-	if (!LockMethodProcLockHash)
-		elog(FATAL, "could not initialize lock table \"%s\"", shmemName);
+	for (i = 0; i < NUM_LOCK_PARTITIONS; i++)
+	{
+		sprintf(shmemName, "PROCLOCK hash %d", i);
+		LockMethodProcLockHash[i] = ShmemInitHash(shmemName,
+												  init_table_size,
+												  max_table_size,
+												  &info,
+												  hash_flags);
+		if (!LockMethodProcLockHash[i])
+			elog(FATAL, "could not initialize lock table \"%s\"", shmemName);
+	}
 
 	/*
-	 * allocate a non-shared hash table for LOCALLOCK structs.	This is used
+	 * Allocate one non-shared hash table for LOCALLOCK structs.  This is used
 	 * to store lock counts and resource owner information.
 	 *
 	 * The non-shared table could already exist in this process (this occurs
@@ -356,6 +381,39 @@ GetLocksMethodTable(const LOCK *lock)
 
 
 /*
+ * Given a LOCKTAG, determine which partition the lock belongs in.
+ *
+ * Basically what we want to do here is hash the locktag.  However, it
+ * seems unwise to use hash_any() because that is the same function that
+ * will be used to distribute the locks within each partition's hash table;
+ * if we use it, we run a big risk of having uneven distribution of hash
+ * codes within each hash table.  Instead, we use a simple linear XOR of the
+ * bits of the locktag.
+ */
+int
+LockTagToPartition(const LOCKTAG *locktag)
+{
+	const uint8 *ptr = (const uint8 *) locktag;
+	int			result = 0;
+	int			i;
+
+	for (i = 0; i < sizeof(LOCKTAG); i++)
+		result ^= *ptr++;
+#if NUM_LOCK_PARTITIONS == 16
+	result ^= result >> 4;
+	result &= 0x0F;
+#elif NUM_LOCK_PARTITIONS == 4
+	result ^= result >> 4;
+	result ^= result >> 2;
+	result &= 0x03;
+#else
+#error unsupported NUM_LOCK_PARTITIONS
+#endif
+	return result;
+}
+
+
+/*
  * LockAcquire -- Check for lock conflicts, sleep if conflict found,
  *		set lock if/when no conflicts.
  *
@@ -397,7 +455,8 @@ LockAcquire(const LOCKTAG *locktag,
 	PROCLOCKTAG proclocktag;
 	bool		found;
 	ResourceOwner owner;
-	LWLockId	masterLock;
+	int			partition;
+	LWLockId	partitionLock;
 	int			status;
 
 	if (lockmethodid <= 0 || lockmethodid >= lengthof(LockMethods))
@@ -438,6 +497,7 @@ LockAcquire(const LOCKTAG *locktag,
 		locallock->lock = NULL;
 		locallock->proclock = NULL;
 		locallock->isTempObject = isTempObject;
+		locallock->partition = LockTagToPartition(&(localtag.lock));
 		locallock->nLocks = 0;
 		locallock->numLockOwners = 0;
 		locallock->maxLockOwners = 8;
@@ -474,9 +534,10 @@ LockAcquire(const LOCKTAG *locktag,
 	/*
 	 * Otherwise we've got to mess with the shared lock table.
 	 */
-	masterLock = LockMgrLock;
+	partition = locallock->partition;
+	partitionLock = FirstLockMgrLock + partition;
 
-	LWLockAcquire(masterLock, LW_EXCLUSIVE);
+	LWLockAcquire(partitionLock, LW_EXCLUSIVE);
 
 	/*
 	 * Find or create a lock with this tag.
@@ -486,12 +547,12 @@ LockAcquire(const LOCKTAG *locktag,
 	 * pointer is valid, since a lock object with no locks can go away
 	 * anytime.
 	 */
-	lock = (LOCK *) hash_search(LockMethodLockHash,
+	lock = (LOCK *) hash_search(LockMethodLockHash[partition],
 								(void *) locktag,
 								HASH_ENTER_NULL, &found);
 	if (!lock)
 	{
-		LWLockRelease(masterLock);
+		LWLockRelease(partitionLock);
 		ereport(ERROR,
 				(errcode(ERRCODE_OUT_OF_MEMORY),
 				 errmsg("out of shared memory"),
@@ -532,7 +593,7 @@ LockAcquire(const LOCKTAG *locktag,
 	/*
 	 * Find or create a proclock entry with this tag
 	 */
-	proclock = (PROCLOCK *) hash_search(LockMethodProcLockHash,
+	proclock = (PROCLOCK *) hash_search(LockMethodProcLockHash[partition],
 										(void *) &proclocktag,
 										HASH_ENTER_NULL, &found);
 	if (!proclock)
@@ -547,12 +608,12 @@ LockAcquire(const LOCKTAG *locktag,
 			 * anyone to release the lock object later.
 			 */
 			Assert(SHMQueueEmpty(&(lock->procLocks)));
-			if (!hash_search(LockMethodLockHash,
+			if (!hash_search(LockMethodLockHash[partition],
 							 (void *) &(lock->tag),
 							 HASH_REMOVE, NULL))
 				elog(PANIC, "lock table corrupted");
 		}
-		LWLockRelease(masterLock);
+		LWLockRelease(partitionLock);
 		ereport(ERROR,
 				(errcode(ERRCODE_OUT_OF_MEMORY),
 				 errmsg("out of shared memory"),
@@ -569,7 +630,8 @@ LockAcquire(const LOCKTAG *locktag,
 		proclock->releaseMask = 0;
 		/* Add proclock to appropriate lists */
 		SHMQueueInsertBefore(&lock->procLocks, &proclock->lockLink);
-		SHMQueueInsertBefore(&MyProc->procLocks, &proclock->procLink);
+		SHMQueueInsertBefore(&(MyProc->myProcLocks[partition]),
+							 &proclock->procLink);
 		PROCLOCK_PRINT("LockAcquire: new", proclock);
 	}
 	else
@@ -666,7 +728,7 @@ LockAcquire(const LOCKTAG *locktag,
 			{
 				SHMQueueDelete(&proclock->lockLink);
 				SHMQueueDelete(&proclock->procLink);
-				if (!hash_search(LockMethodProcLockHash,
+				if (!hash_search(LockMethodProcLockHash[partition],
 								 (void *) &(proclock->tag),
 								 HASH_REMOVE, NULL))
 					elog(PANIC, "proclock table corrupted");
@@ -678,7 +740,7 @@ LockAcquire(const LOCKTAG *locktag,
 			LOCK_PRINT("LockAcquire: conditional lock failed", lock, lockmode);
 			Assert((lock->nRequested > 0) && (lock->requested[lockmode] >= 0));
 			Assert(lock->nGranted <= lock->nRequested);
-			LWLockRelease(masterLock);
+			LWLockRelease(partitionLock);
 			if (locallock->nLocks == 0)
 				RemoveLocalLock(locallock);
 			return LOCKACQUIRE_NOT_AVAIL;
@@ -692,7 +754,7 @@ LockAcquire(const LOCKTAG *locktag,
 		/*
 		 * Sleep till someone wakes me up.
 		 */
-		WaitOnLock(lockmethodid, locallock, owner);
+		WaitOnLock(locallock, owner);
 
 		/*
 		 * NOTE: do not do any material change of state between here and
@@ -709,14 +771,14 @@ LockAcquire(const LOCKTAG *locktag,
 			PROCLOCK_PRINT("LockAcquire: INCONSISTENT", proclock);
 			LOCK_PRINT("LockAcquire: INCONSISTENT", lock, lockmode);
 			/* Should we retry ? */
-			LWLockRelease(masterLock);
+			LWLockRelease(partitionLock);
 			elog(ERROR, "LockAcquire failed");
 		}
 		PROCLOCK_PRINT("LockAcquire: granted", proclock);
 		LOCK_PRINT("LockAcquire: granted", lock, lockmode);
 	}
 
-	LWLockRelease(masterLock);
+	LWLockRelease(partitionLock);
 
 	return LOCKACQUIRE_OK;
 }
@@ -894,11 +956,12 @@ UnGrantLock(LOCK *lock, LOCKMODE lockmode,
  * should be called after UnGrantLock, and wakeupNeeded is the result from
  * UnGrantLock.)
  *
- * The locktable's masterLock must be held at entry, and will be
+ * The lock table's partition lock must be held at entry, and will be
  * held at exit.
  */
 static void
-CleanUpLock(LOCKMETHODID lockmethodid, LOCK *lock, PROCLOCK *proclock,
+CleanUpLock(LOCK *lock, PROCLOCK *proclock,
+			LockMethod lockMethodTable, int partition,
 			bool wakeupNeeded)
 {
 	/*
@@ -910,7 +973,7 @@ CleanUpLock(LOCKMETHODID lockmethodid, LOCK *lock, PROCLOCK *proclock,
 		PROCLOCK_PRINT("CleanUpLock: deleting", proclock);
 		SHMQueueDelete(&proclock->lockLink);
 		SHMQueueDelete(&proclock->procLink);
-		if (!hash_search(LockMethodProcLockHash,
+		if (!hash_search(LockMethodProcLockHash[partition],
 						 (void *) &(proclock->tag),
 						 HASH_REMOVE, NULL))
 			elog(PANIC, "proclock table corrupted");
@@ -924,7 +987,7 @@ CleanUpLock(LOCKMETHODID lockmethodid, LOCK *lock, PROCLOCK *proclock,
 		 */
 		LOCK_PRINT("CleanUpLock: deleting", lock, 0);
 		Assert(SHMQueueEmpty(&(lock->procLocks)));
-		if (!hash_search(LockMethodLockHash,
+		if (!hash_search(LockMethodLockHash[partition],
 						 (void *) &(lock->tag),
 						 HASH_REMOVE, NULL))
 			elog(PANIC, "lock table corrupted");
@@ -932,7 +995,7 @@ CleanUpLock(LOCKMETHODID lockmethodid, LOCK *lock, PROCLOCK *proclock,
 	else if (wakeupNeeded)
 	{
 		/* There are waiters on this lock, so wake them up. */
-		ProcLockWakeup(LockMethods[lockmethodid], lock);
+		ProcLockWakeup(lockMethodTable, lock);
 	}
 }
 
@@ -988,12 +1051,12 @@ GrantAwaitedLock(void)
  * Caller must have set MyProc->heldLocks to reflect locks already held
  * on the lockable object by this process.
  *
- * The locktable's masterLock must be held at entry.
+ * The appropriate partition lock must be held at entry.
  */
 static void
-WaitOnLock(LOCKMETHODID lockmethodid, LOCALLOCK *locallock,
-		   ResourceOwner owner)
+WaitOnLock(LOCALLOCK *locallock, ResourceOwner owner)
 {
+	LOCKMETHODID lockmethodid = LOCALLOCK_LOCKMETHOD(*locallock);
 	LockMethod	lockMethodTable = LockMethods[lockmethodid];
 	const char *old_status;
 	char	   *new_status;
@@ -1025,10 +1088,7 @@ WaitOnLock(LOCKMETHODID lockmethodid, LOCALLOCK *locallock,
 	 * will also happen in the cancel/die case.
 	 */
 
-	if (ProcSleep(lockMethodTable,
-				  locallock->tag.mode,
-				  locallock->lock,
-				  locallock->proclock) != STATUS_OK)
+	if (ProcSleep(locallock, lockMethodTable) != STATUS_OK)
 	{
 		/*
 		 * We failed as a result of a deadlock, see CheckDeadLock(). Quit now.
@@ -1036,10 +1096,10 @@ WaitOnLock(LOCKMETHODID lockmethodid, LOCALLOCK *locallock,
 		awaitedLock = NULL;
 		LOCK_PRINT("WaitOnLock: aborting on lock",
 				   locallock->lock, locallock->tag.mode);
-		LWLockRelease(LockMgrLock);
+		LWLockRelease(FirstLockMgrLock + locallock->partition);
 
 		/*
-		 * Now that we aren't holding the LockMgrLock, we can give an error
+		 * Now that we aren't holding the partition lock, we can give an error
 		 * report including details about the detected deadlock.
 		 */
 		DeadLockReport();
@@ -1059,12 +1119,12 @@ WaitOnLock(LOCKMETHODID lockmethodid, LOCALLOCK *locallock,
  * Remove a proc from the wait-queue it is on
  * (caller must know it is on one).
  *
- * Locktable lock must be held by caller.
+ * Appropriate partition lock must be held by caller.
  *
  * NB: this does not clean up any locallock object that may exist for the lock.
  */
 void
-RemoveFromWaitQueue(PGPROC *proc)
+RemoveFromWaitQueue(PGPROC *proc, int partition)
 {
 	LOCK	   *waitLock = proc->waitLock;
 	PROCLOCK   *proclock = proc->waitProcLock;
@@ -1102,7 +1162,9 @@ RemoveFromWaitQueue(PGPROC *proc)
 	 * LockRelease expects there to be no remaining proclocks.) Then see if
 	 * any other waiters for the lock can be woken up now.
 	 */
-	CleanUpLock(lockmethodid, waitLock, proclock, true);
+	CleanUpLock(waitLock, proclock,
+				LockMethods[lockmethodid], partition,
+				true);
 }
 
 /*
@@ -1125,7 +1187,8 @@ LockRelease(const LOCKTAG *locktag, LOCKMODE lockmode, bool sessionLock)
 	LOCALLOCK  *locallock;
 	LOCK	   *lock;
 	PROCLOCK   *proclock;
-	LWLockId	masterLock;
+	int			partition;
+	LWLockId	partitionLock;
 	bool		wakeupNeeded;
 
 	if (lockmethodid <= 0 || lockmethodid >= lengthof(LockMethods))
@@ -1212,9 +1275,10 @@ LockRelease(const LOCKTAG *locktag, LOCKMODE lockmode, bool sessionLock)
 	/*
 	 * Otherwise we've got to mess with the shared lock table.
 	 */
-	masterLock = LockMgrLock;
+	partition = locallock->partition;
+	partitionLock = FirstLockMgrLock + partition;
 
-	LWLockAcquire(masterLock, LW_EXCLUSIVE);
+	LWLockAcquire(partitionLock, LW_EXCLUSIVE);
 
 	/*
 	 * We don't need to re-find the lock or proclock, since we kept their
@@ -1233,7 +1297,7 @@ LockRelease(const LOCKTAG *locktag, LOCKMODE lockmode, bool sessionLock)
 	if (!(proclock->holdMask & LOCKBIT_ON(lockmode)))
 	{
 		PROCLOCK_PRINT("LockRelease: WRONGTYPE", proclock);
-		LWLockRelease(masterLock);
+		LWLockRelease(partitionLock);
 		elog(WARNING, "you don't own a lock of type %s",
 			 lockMethodTable->lockModeNames[lockmode]);
 		RemoveLocalLock(locallock);
@@ -1245,9 +1309,11 @@ LockRelease(const LOCKTAG *locktag, LOCKMODE lockmode, bool sessionLock)
 	 */
 	wakeupNeeded = UnGrantLock(lock, lockmode, proclock, lockMethodTable);
 
-	CleanUpLock(lockmethodid, lock, proclock, wakeupNeeded);
+	CleanUpLock(lock, proclock,
+				lockMethodTable, partition,
+				wakeupNeeded);
 
-	LWLockRelease(masterLock);
+	LWLockRelease(partitionLock);
 
 	RemoveLocalLock(locallock);
 	return TRUE;
@@ -1265,14 +1331,13 @@ void
 LockReleaseAll(LOCKMETHODID lockmethodid, bool allLocks)
 {
 	HASH_SEQ_STATUS status;
-	SHM_QUEUE  *procLocks = &(MyProc->procLocks);
-	LWLockId	masterLock;
 	LockMethod	lockMethodTable;
 	int			i,
 				numLockModes;
 	LOCALLOCK  *locallock;
-	PROCLOCK   *proclock;
 	LOCK	   *lock;
+	PROCLOCK   *proclock;
+	int			partition;
 
 	if (lockmethodid <= 0 || lockmethodid >= lengthof(LockMethods))
 		elog(ERROR, "unrecognized lock method: %d", lockmethodid);
@@ -1284,7 +1349,6 @@ LockReleaseAll(LOCKMETHODID lockmethodid, bool allLocks)
 #endif
 
 	numLockModes = lockMethodTable->numLockModes;
-	masterLock = LockMgrLock;
 
 	/*
 	 * First we run through the locallock table and get rid of unwanted
@@ -1351,74 +1415,89 @@ LockReleaseAll(LOCKMETHODID lockmethodid, bool allLocks)
 		RemoveLocalLock(locallock);
 	}
 
-	LWLockAcquire(masterLock, LW_EXCLUSIVE);
+	/*
+	 * Now, scan each lock partition separately.
+	 */
+	for (partition = 0; partition < NUM_LOCK_PARTITIONS; partition++)
+	{
+		LWLockId	partitionLock = FirstLockMgrLock + partition;
+		SHM_QUEUE  *procLocks = &(MyProc->myProcLocks[partition]);
 
-	proclock = (PROCLOCK *) SHMQueueNext(procLocks, procLocks,
-										 offsetof(PROCLOCK, procLink));
+		proclock = (PROCLOCK *) SHMQueueNext(procLocks, procLocks,
+											 offsetof(PROCLOCK, procLink));
 
-	while (proclock)
-	{
-		bool		wakeupNeeded = false;
-		PROCLOCK   *nextplock;
+		if (!proclock)
+			continue;			/* needn't examine this partition */
 
-		/* Get link first, since we may unlink/delete this proclock */
-		nextplock = (PROCLOCK *) SHMQueueNext(procLocks, &proclock->procLink,
-											  offsetof(PROCLOCK, procLink));
+		LWLockAcquire(partitionLock, LW_EXCLUSIVE);
 
-		Assert(proclock->tag.proc == MAKE_OFFSET(MyProc));
+		while (proclock)
+		{
+			bool		wakeupNeeded = false;
+			PROCLOCK   *nextplock;
 
-		lock = (LOCK *) MAKE_PTR(proclock->tag.lock);
+			/* Get link first, since we may unlink/delete this proclock */
+			nextplock = (PROCLOCK *)
+				SHMQueueNext(procLocks, &proclock->procLink,
+							 offsetof(PROCLOCK, procLink));
 
-		/* Ignore items that are not of the lockmethod to be removed */
-		if (LOCK_LOCKMETHOD(*lock) != lockmethodid)
-			goto next_item;
+			Assert(proclock->tag.proc == MAKE_OFFSET(MyProc));
 
-		/*
-		 * In allLocks mode, force release of all locks even if locallock
-		 * table had problems
-		 */
-		if (allLocks)
-			proclock->releaseMask = proclock->holdMask;
-		else
-			Assert((proclock->releaseMask & ~proclock->holdMask) == 0);
+			lock = (LOCK *) MAKE_PTR(proclock->tag.lock);
 
-		/*
-		 * Ignore items that have nothing to be released, unless they have
-		 * holdMask == 0 and are therefore recyclable
-		 */
-		if (proclock->releaseMask == 0 && proclock->holdMask != 0)
-			goto next_item;
+			/* Ignore items that are not of the lockmethod to be removed */
+			if (LOCK_LOCKMETHOD(*lock) != lockmethodid)
+				goto next_item;
 
-		PROCLOCK_PRINT("LockReleaseAll", proclock);
-		LOCK_PRINT("LockReleaseAll", lock, 0);
-		Assert(lock->nRequested >= 0);
-		Assert(lock->nGranted >= 0);
-		Assert(lock->nGranted <= lock->nRequested);
-		Assert((proclock->holdMask & ~lock->grantMask) == 0);
+			/*
+			 * In allLocks mode, force release of all locks even if locallock
+			 * table had problems
+			 */
+			if (allLocks)
+				proclock->releaseMask = proclock->holdMask;
+			else
+				Assert((proclock->releaseMask & ~proclock->holdMask) == 0);
 
-		/*
-		 * Release the previously-marked lock modes
-		 */
-		for (i = 1; i <= numLockModes; i++)
-		{
-			if (proclock->releaseMask & LOCKBIT_ON(i))
-				wakeupNeeded |= UnGrantLock(lock, i, proclock,
-											lockMethodTable);
-		}
-		Assert((lock->nRequested >= 0) && (lock->nGranted >= 0));
-		Assert(lock->nGranted <= lock->nRequested);
-		LOCK_PRINT("LockReleaseAll: updated", lock, 0);
+			/*
+			 * Ignore items that have nothing to be released, unless they have
+			 * holdMask == 0 and are therefore recyclable
+			 */
+			if (proclock->releaseMask == 0 && proclock->holdMask != 0)
+				goto next_item;
 
-		proclock->releaseMask = 0;
+			PROCLOCK_PRINT("LockReleaseAll", proclock);
+			LOCK_PRINT("LockReleaseAll", lock, 0);
+			Assert(lock->nRequested >= 0);
+			Assert(lock->nGranted >= 0);
+			Assert(lock->nGranted <= lock->nRequested);
+			Assert((proclock->holdMask & ~lock->grantMask) == 0);
+
+			/*
+			 * Release the previously-marked lock modes
+			 */
+			for (i = 1; i <= numLockModes; i++)
+			{
+				if (proclock->releaseMask & LOCKBIT_ON(i))
+					wakeupNeeded |= UnGrantLock(lock, i, proclock,
+												lockMethodTable);
+			}
+			Assert((lock->nRequested >= 0) && (lock->nGranted >= 0));
+			Assert(lock->nGranted <= lock->nRequested);
+			LOCK_PRINT("LockReleaseAll: updated", lock, 0);
 
-		/* CleanUpLock will wake up waiters if needed. */
-		CleanUpLock(lockmethodid, lock, proclock, wakeupNeeded);
+			proclock->releaseMask = 0;
 
-next_item:
-		proclock = nextplock;
-	}
+			/* CleanUpLock will wake up waiters if needed. */
+			CleanUpLock(lock, proclock,
+						lockMethodTable, partition,
+						wakeupNeeded);
 
-	LWLockRelease(masterLock);
+		next_item:
+			proclock = nextplock;
+		} /* loop over PROCLOCKs within this partition */
+
+		LWLockRelease(partitionLock);
+	} /* loop over partitions */
 
 #ifdef LOCK_DEBUG
 	if (*(lockMethodTable->trace_flag))
@@ -1627,19 +1706,16 @@ PostPrepare_Locks(TransactionId xid)
 {
 	PGPROC	   *newproc = TwoPhaseGetDummyProc(xid);
 	HASH_SEQ_STATUS status;
-	SHM_QUEUE  *procLocks = &(MyProc->procLocks);
-	LWLockId	masterLock;
 	LOCALLOCK  *locallock;
+	LOCK	   *lock;
 	PROCLOCK   *proclock;
 	PROCLOCKTAG proclocktag;
 	bool		found;
-	LOCK	   *lock;
+	int			partition;
 
 	/* This is a critical section: any error means big trouble */
 	START_CRIT_SECTION();
 
-	masterLock = LockMgrLock;
-
 	/*
 	 * First we run through the locallock table and get rid of unwanted
 	 * entries, then we scan the process's proclocks and transfer them to the
@@ -1678,105 +1754,121 @@ PostPrepare_Locks(TransactionId xid)
 		RemoveLocalLock(locallock);
 	}
 
-	LWLockAcquire(masterLock, LW_EXCLUSIVE);
+	/*
+	 * Now, scan each lock partition separately.
+	 */
+	for (partition = 0; partition < NUM_LOCK_PARTITIONS; partition++)
+	{
+		LWLockId	partitionLock = FirstLockMgrLock + partition;
+		SHM_QUEUE  *procLocks = &(MyProc->myProcLocks[partition]);
 
-	proclock = (PROCLOCK *) SHMQueueNext(procLocks, procLocks,
-										 offsetof(PROCLOCK, procLink));
+		proclock = (PROCLOCK *) SHMQueueNext(procLocks, procLocks,
+											 offsetof(PROCLOCK, procLink));
 
-	while (proclock)
-	{
-		PROCLOCK   *nextplock;
-		LOCKMASK	holdMask;
-		PROCLOCK   *newproclock;
+		if (!proclock)
+			continue;			/* needn't examine this partition */
 
-		/* Get link first, since we may unlink/delete this proclock */
-		nextplock = (PROCLOCK *) SHMQueueNext(procLocks, &proclock->procLink,
-											  offsetof(PROCLOCK, procLink));
+		LWLockAcquire(partitionLock, LW_EXCLUSIVE);
 
-		Assert(proclock->tag.proc == MAKE_OFFSET(MyProc));
+		while (proclock)
+		{
+			PROCLOCK   *nextplock;
+			LOCKMASK	holdMask;
+			PROCLOCK   *newproclock;
 
-		lock = (LOCK *) MAKE_PTR(proclock->tag.lock);
+			/* Get link first, since we may unlink/delete this proclock */
+			nextplock = (PROCLOCK *)
+				SHMQueueNext(procLocks, &proclock->procLink,
+							 offsetof(PROCLOCK, procLink));
 
-		/* Ignore nontransactional locks */
-		if (!LockMethods[LOCK_LOCKMETHOD(*lock)]->transactional)
-			goto next_item;
+			Assert(proclock->tag.proc == MAKE_OFFSET(MyProc));
 
-		PROCLOCK_PRINT("PostPrepare_Locks", proclock);
-		LOCK_PRINT("PostPrepare_Locks", lock, 0);
-		Assert(lock->nRequested >= 0);
-		Assert(lock->nGranted >= 0);
-		Assert(lock->nGranted <= lock->nRequested);
-		Assert((proclock->holdMask & ~lock->grantMask) == 0);
+			lock = (LOCK *) MAKE_PTR(proclock->tag.lock);
 
-		/*
-		 * Since there were no session locks, we should be releasing all locks
-		 */
-		if (proclock->releaseMask != proclock->holdMask)
-			elog(PANIC, "we seem to have dropped a bit somewhere");
+			/* Ignore nontransactional locks */
+			if (!LockMethods[LOCK_LOCKMETHOD(*lock)]->transactional)
+				goto next_item;
 
-		holdMask = proclock->holdMask;
+			PROCLOCK_PRINT("PostPrepare_Locks", proclock);
+			LOCK_PRINT("PostPrepare_Locks", lock, 0);
+			Assert(lock->nRequested >= 0);
+			Assert(lock->nGranted >= 0);
+			Assert(lock->nGranted <= lock->nRequested);
+			Assert((proclock->holdMask & ~lock->grantMask) == 0);
 
-		/*
-		 * We cannot simply modify proclock->tag.proc to reassign ownership of
-		 * the lock, because that's part of the hash key and the proclock
-		 * would then be in the wrong hash chain.  So, unlink and delete the
-		 * old proclock; create a new one with the right contents; and link it
-		 * into place.	We do it in this order to be certain we won't run out
-		 * of shared memory (the way dynahash.c works, the deleted object is
-		 * certain to be available for reallocation).
-		 */
-		SHMQueueDelete(&proclock->lockLink);
-		SHMQueueDelete(&proclock->procLink);
-		if (!hash_search(LockMethodProcLockHash,
-						 (void *) &(proclock->tag),
-						 HASH_REMOVE, NULL))
-			elog(PANIC, "proclock table corrupted");
+			/*
+			 * Since there were no session locks, we should be releasing all
+			 * locks
+			 */
+			if (proclock->releaseMask != proclock->holdMask)
+				elog(PANIC, "we seem to have dropped a bit somewhere");
 
-		/*
-		 * Create the hash key for the new proclock table.
-		 */
-		MemSet(&proclocktag, 0, sizeof(PROCLOCKTAG));
-		proclocktag.lock = MAKE_OFFSET(lock);
-		proclocktag.proc = MAKE_OFFSET(newproc);
-
-		newproclock = (PROCLOCK *) hash_search(LockMethodProcLockHash,
-											   (void *) &proclocktag,
-											   HASH_ENTER_NULL, &found);
-		if (!newproclock)
-			ereport(PANIC,		/* should not happen */
-					(errcode(ERRCODE_OUT_OF_MEMORY),
-					 errmsg("out of shared memory"),
-					 errdetail("Not enough memory for reassigning the prepared transaction's locks.")));
+			holdMask = proclock->holdMask;
 
-		/*
-		 * If new, initialize the new entry
-		 */
-		if (!found)
-		{
-			newproclock->holdMask = 0;
-			newproclock->releaseMask = 0;
-			/* Add new proclock to appropriate lists */
-			SHMQueueInsertBefore(&lock->procLocks, &newproclock->lockLink);
-			SHMQueueInsertBefore(&newproc->procLocks, &newproclock->procLink);
-			PROCLOCK_PRINT("PostPrepare_Locks: new", newproclock);
-		}
-		else
-		{
-			PROCLOCK_PRINT("PostPrepare_Locks: found", newproclock);
-			Assert((newproclock->holdMask & ~lock->grantMask) == 0);
-		}
+			/*
+			 * We cannot simply modify proclock->tag.proc to reassign
+			 * ownership of the lock, because that's part of the hash key and
+			 * the proclock would then be in the wrong hash chain.  So, unlink
+			 * and delete the old proclock; create a new one with the right
+			 * contents; and link it into place.  We do it in this order to be
+			 * certain we won't run out of shared memory (the way dynahash.c
+			 * works, the deleted object is certain to be available for
+			 * reallocation).
+			 */
+			SHMQueueDelete(&proclock->lockLink);
+			SHMQueueDelete(&proclock->procLink);
+			if (!hash_search(LockMethodProcLockHash[partition],
+							 (void *) &(proclock->tag),
+							 HASH_REMOVE, NULL))
+				elog(PANIC, "proclock table corrupted");
 
-		/*
-		 * Pass over the identified lock ownership.
-		 */
-		Assert((newproclock->holdMask & holdMask) == 0);
-		newproclock->holdMask |= holdMask;
+			/*
+			 * Create the hash key for the new proclock table.
+			 */
+			MemSet(&proclocktag, 0, sizeof(PROCLOCKTAG));
+			proclocktag.lock = MAKE_OFFSET(lock);
+			proclocktag.proc = MAKE_OFFSET(newproc);
+
+			newproclock = (PROCLOCK *) hash_search(LockMethodProcLockHash[partition],
+												   (void *) &proclocktag,
+												   HASH_ENTER_NULL, &found);
+			if (!newproclock)
+				ereport(PANIC,		/* should not happen */
+						(errcode(ERRCODE_OUT_OF_MEMORY),
+						 errmsg("out of shared memory"),
+						 errdetail("Not enough memory for reassigning the prepared transaction's locks.")));
 
-next_item:
-		proclock = nextplock;
-	}
+			/*
+			 * If new, initialize the new entry
+			 */
+			if (!found)
+			{
+				newproclock->holdMask = 0;
+				newproclock->releaseMask = 0;
+				/* Add new proclock to appropriate lists */
+				SHMQueueInsertBefore(&lock->procLocks, &newproclock->lockLink);
+				SHMQueueInsertBefore(&(newproc->myProcLocks[partition]),
+									 &newproclock->procLink);
+				PROCLOCK_PRINT("PostPrepare_Locks: new", newproclock);
+			}
+			else
+			{
+				PROCLOCK_PRINT("PostPrepare_Locks: found", newproclock);
+				Assert((newproclock->holdMask & ~lock->grantMask) == 0);
+			}
+
+			/*
+			 * Pass over the identified lock ownership.
+			 */
+			Assert((newproclock->holdMask & holdMask) == 0);
+			newproclock->holdMask |= holdMask;
+
+		next_item:
+			proclock = nextplock;
+		} /* loop over PROCLOCKs within this partition */
 
-	LWLockRelease(masterLock);
+		LWLockRelease(partitionLock);
+	} /* loop over partitions */
 
 	END_CRIT_SECTION();
 }
@@ -1789,20 +1881,23 @@ Size
 LockShmemSize(void)
 {
 	Size		size = 0;
-	long		max_table_size = NLOCKENTS();
+	Size		tabsize;
+	long		max_table_size;
 
-	/* lockHash table */
-	size = add_size(size, hash_estimate_size(max_table_size, sizeof(LOCK)));
+	/* lock hash tables */
+	max_table_size = NLOCKENTS();
+	max_table_size = (max_table_size - 1) / NUM_LOCK_PARTITIONS + 1;
+	tabsize = hash_estimate_size(max_table_size, sizeof(LOCK));
+	size = add_size(size, mul_size(tabsize, NUM_LOCK_PARTITIONS));
 
-	/* proclockHash table */
-	size = add_size(size, hash_estimate_size(max_table_size, sizeof(PROCLOCK)));
+	/* proclock hash tables */
+	max_table_size *= 2;
+	tabsize = hash_estimate_size(max_table_size, sizeof(PROCLOCK));
+	size = add_size(size, mul_size(tabsize, NUM_LOCK_PARTITIONS));
 
 	/*
-	 * Note we count only one pair of hash tables, since the userlocks table
-	 * actually overlays the main one.
-	 *
-	 * Since the lockHash entry count above is only an estimate, add 10%
-	 * safety margin.
+	 * Since there is likely to be some space wastage due to uneven use
+	 * of the partitions, add 10% safety margin.
 	 */
 	size = add_size(size, size / 10);
 
@@ -1818,9 +1913,9 @@ LockShmemSize(void)
  * copies of the same PGPROC and/or LOCK objects are likely to appear.
  * It is the caller's responsibility to match up duplicates if wanted.
  *
- * The design goal is to hold the LockMgrLock for as short a time as possible;
+ * The design goal is to hold the LWLocks for as short a time as possible;
  * thus, this function simply makes a copy of the necessary data and releases
- * the lock, allowing the caller to contemplate and format the data for as
+ * the locks, allowing the caller to contemplate and format the data for as
  * long as it pleases.
  */
 LockData *
@@ -1830,40 +1925,67 @@ GetLockStatusData(void)
 	HTAB	   *proclockTable;
 	PROCLOCK   *proclock;
 	HASH_SEQ_STATUS seqstat;
+	int			els;
+	int			el;
 	int			i;
 
 	data = (LockData *) palloc(sizeof(LockData));
 
-	LWLockAcquire(LockMgrLock, LW_EXCLUSIVE);
-
-	proclockTable = LockMethodProcLockHash;
-
-	data->nelements = i = proclockTable->hctl->nentries;
+	/*
+	 * Acquire lock on the entire shared lock data structures.  We can't
+	 * operate one partition at a time if we want to deliver a self-consistent
+	 * view of the state.
+	 *
+	 * Since this is a read-only operation, we take shared instead of exclusive
+	 * lock.  There's not a whole lot of point to this, because all the normal
+	 * operations require exclusive lock, but it doesn't hurt anything either.
+	 * It will at least allow two backends to do GetLockStatusData in parallel.
+	 *
+	 * Must grab LWLocks in partition-number order to avoid LWLock deadlock.
+	 *
+	 * Use same loop to count up the total number of PROCLOCK objects.
+	 */
+	els = 0;
+	for (i = 0; i < NUM_LOCK_PARTITIONS; i++)
+	{
+		LWLockAcquire(FirstLockMgrLock + i, LW_SHARED);
+		proclockTable = LockMethodProcLockHash[i];
+		els += proclockTable->hctl->nentries;
+	}
 
-	data->proclockaddrs = (SHMEM_OFFSET *) palloc(sizeof(SHMEM_OFFSET) * i);
-	data->proclocks = (PROCLOCK *) palloc(sizeof(PROCLOCK) * i);
-	data->procs = (PGPROC *) palloc(sizeof(PGPROC) * i);
-	data->locks = (LOCK *) palloc(sizeof(LOCK) * i);
+	data->nelements = els;
+	data->proclockaddrs = (SHMEM_OFFSET *) palloc(sizeof(SHMEM_OFFSET) * els);
+	data->proclocks = (PROCLOCK *) palloc(sizeof(PROCLOCK) * els);
+	data->procs = (PGPROC *) palloc(sizeof(PGPROC) * els);
+	data->locks = (LOCK *) palloc(sizeof(LOCK) * els);
 
-	hash_seq_init(&seqstat, proclockTable);
+	el = 0;
 
-	i = 0;
-	while ((proclock = hash_seq_search(&seqstat)))
+	/* Now scan the tables to copy the data */
+	for (i = 0; i < NUM_LOCK_PARTITIONS; i++)
 	{
-		PGPROC	   *proc = (PGPROC *) MAKE_PTR(proclock->tag.proc);
-		LOCK	   *lock = (LOCK *) MAKE_PTR(proclock->tag.lock);
+		proclockTable = LockMethodProcLockHash[i];
+		hash_seq_init(&seqstat, proclockTable);
 
-		data->proclockaddrs[i] = MAKE_OFFSET(proclock);
-		memcpy(&(data->proclocks[i]), proclock, sizeof(PROCLOCK));
-		memcpy(&(data->procs[i]), proc, sizeof(PGPROC));
-		memcpy(&(data->locks[i]), lock, sizeof(LOCK));
+		while ((proclock = hash_seq_search(&seqstat)))
+		{
+			PGPROC	   *proc = (PGPROC *) MAKE_PTR(proclock->tag.proc);
+			LOCK	   *lock = (LOCK *) MAKE_PTR(proclock->tag.lock);
+
+			data->proclockaddrs[el] = MAKE_OFFSET(proclock);
+			memcpy(&(data->proclocks[el]), proclock, sizeof(PROCLOCK));
+			memcpy(&(data->procs[el]), proc, sizeof(PGPROC));
+			memcpy(&(data->locks[el]), lock, sizeof(LOCK));
 
-		i++;
+			el++;
+		}
 	}
 
-	LWLockRelease(LockMgrLock);
+	/* And release locks */
+	for (i = NUM_LOCK_PARTITIONS; --i >= 0; )
+		LWLockRelease(FirstLockMgrLock + i);
 
-	Assert(i == data->nelements);
+	Assert(el == data->nelements);
 
 	return data;
 }
@@ -1879,7 +2001,7 @@ GetLockmodeName(LOCKMETHODID lockmethodid, LOCKMODE mode)
 
 #ifdef LOCK_DEBUG
 /*
- * Dump all locks in the given proc's procLocks list.
+ * Dump all locks in the given proc's myProcLocks lists.
  *
  * Caller is responsible for having acquired appropriate LWLocks.
  */
@@ -1889,29 +2011,34 @@ DumpLocks(PGPROC *proc)
 	SHM_QUEUE  *procLocks;
 	PROCLOCK   *proclock;
 	LOCK	   *lock;
+	int			i;
 
 	if (proc == NULL)
 		return;
 
-	procLocks = &proc->procLocks;
-
 	if (proc->waitLock)
 		LOCK_PRINT("DumpLocks: waiting on", proc->waitLock, 0);
 
-	proclock = (PROCLOCK *) SHMQueueNext(procLocks, procLocks,
-										 offsetof(PROCLOCK, procLink));
-
-	while (proclock)
+	for (i = 0; i < NUM_LOCK_PARTITIONS; i++)
 	{
-		Assert(proclock->tag.proc == MAKE_OFFSET(proc));
+		procLocks = &(proc->myProcLocks[i]);
 
-		lock = (LOCK *) MAKE_PTR(proclock->tag.lock);
+		proclock = (PROCLOCK *) SHMQueueNext(procLocks, procLocks,
+											 offsetof(PROCLOCK, procLink));
 
-		PROCLOCK_PRINT("DumpLocks", proclock);
-		LOCK_PRINT("DumpLocks", lock, 0);
+		while (proclock)
+		{
+			Assert(proclock->tag.proc == MAKE_OFFSET(proc));
 
-		proclock = (PROCLOCK *) SHMQueueNext(procLocks, &proclock->procLink,
-											 offsetof(PROCLOCK, procLink));
+			lock = (LOCK *) MAKE_PTR(proclock->tag.lock);
+
+			PROCLOCK_PRINT("DumpLocks", proclock);
+			LOCK_PRINT("DumpLocks", lock, 0);
+
+			proclock = (PROCLOCK *)
+				SHMQueueNext(procLocks, &proclock->procLink,
+							 offsetof(PROCLOCK, procLink));
+		}
 	}
 }
 
@@ -1928,25 +2055,30 @@ DumpAllLocks(void)
 	LOCK	   *lock;
 	HTAB	   *proclockTable;
 	HASH_SEQ_STATUS status;
+	int			i;
 
 	proc = MyProc;
-	proclockTable = LockMethodProcLockHash;
 
 	if (proc && proc->waitLock)
 		LOCK_PRINT("DumpAllLocks: waiting on", proc->waitLock, 0);
 
-	hash_seq_init(&status, proclockTable);
-	while ((proclock = (PROCLOCK *) hash_seq_search(&status)) != NULL)
+	for (i = 0; i < NUM_LOCK_PARTITIONS; i++)
 	{
-		PROCLOCK_PRINT("DumpAllLocks", proclock);
+		proclockTable = LockMethodProcLockHash[i];
+		hash_seq_init(&status, proclockTable);
 
-		if (proclock->tag.lock)
+		while ((proclock = (PROCLOCK *) hash_seq_search(&status)) != NULL)
 		{
-			lock = (LOCK *) MAKE_PTR(proclock->tag.lock);
-			LOCK_PRINT("DumpAllLocks", lock, 0);
+			PROCLOCK_PRINT("DumpAllLocks", proclock);
+
+			if (proclock->tag.lock)
+			{
+				lock = (LOCK *) MAKE_PTR(proclock->tag.lock);
+				LOCK_PRINT("DumpAllLocks", lock, 0);
+			}
+			else
+				elog(LOG, "DumpAllLocks: proclock->tag.lock = NULL");
 		}
-		else
-			elog(LOG, "DumpAllLocks: proclock->tag.lock = NULL");
 	}
 }
 #endif   /* LOCK_DEBUG */
@@ -1975,7 +2107,8 @@ lock_twophase_recover(TransactionId xid, uint16 info,
 	PROCLOCK   *proclock;
 	PROCLOCKTAG proclocktag;
 	bool		found;
-	LWLockId	masterLock;
+	int			partition;
+	LWLockId	partitionLock;
 	LockMethod	lockMethodTable;
 
 	Assert(len == sizeof(TwoPhaseLockRecord));
@@ -1987,19 +2120,20 @@ lock_twophase_recover(TransactionId xid, uint16 info,
 		elog(ERROR, "unrecognized lock method: %d", lockmethodid);
 	lockMethodTable = LockMethods[lockmethodid];
 
-	masterLock = LockMgrLock;
+	partition = LockTagToPartition(locktag);
+	partitionLock = FirstLockMgrLock + partition;
 
-	LWLockAcquire(masterLock, LW_EXCLUSIVE);
+	LWLockAcquire(partitionLock, LW_EXCLUSIVE);
 
 	/*
 	 * Find or create a lock with this tag.
 	 */
-	lock = (LOCK *) hash_search(LockMethodLockHash,
+	lock = (LOCK *) hash_search(LockMethodLockHash[partition],
 								(void *) locktag,
 								HASH_ENTER_NULL, &found);
 	if (!lock)
 	{
-		LWLockRelease(masterLock);
+		LWLockRelease(partitionLock);
 		ereport(ERROR,
 				(errcode(ERRCODE_OUT_OF_MEMORY),
 				 errmsg("out of shared memory"),
@@ -2039,7 +2173,7 @@ lock_twophase_recover(TransactionId xid, uint16 info,
 	/*
 	 * Find or create a proclock entry with this tag
 	 */
-	proclock = (PROCLOCK *) hash_search(LockMethodProcLockHash,
+	proclock = (PROCLOCK *) hash_search(LockMethodProcLockHash[partition],
 										(void *) &proclocktag,
 										HASH_ENTER_NULL, &found);
 	if (!proclock)
@@ -2054,12 +2188,12 @@ lock_twophase_recover(TransactionId xid, uint16 info,
 			 * anyone to release the lock object later.
 			 */
 			Assert(SHMQueueEmpty(&(lock->procLocks)));
-			if (!hash_search(LockMethodLockHash,
+			if (!hash_search(LockMethodLockHash[partition],
 							 (void *) &(lock->tag),
 							 HASH_REMOVE, NULL))
 				elog(PANIC, "lock table corrupted");
 		}
-		LWLockRelease(masterLock);
+		LWLockRelease(partitionLock);
 		ereport(ERROR,
 				(errcode(ERRCODE_OUT_OF_MEMORY),
 				 errmsg("out of shared memory"),
@@ -2075,7 +2209,8 @@ lock_twophase_recover(TransactionId xid, uint16 info,
 		proclock->releaseMask = 0;
 		/* Add proclock to appropriate lists */
 		SHMQueueInsertBefore(&lock->procLocks, &proclock->lockLink);
-		SHMQueueInsertBefore(&proc->procLocks, &proclock->procLink);
+		SHMQueueInsertBefore(&(proc->myProcLocks[partition]),
+							 &proclock->procLink);
 		PROCLOCK_PRINT("lock_twophase_recover: new", proclock);
 	}
 	else
@@ -2106,7 +2241,7 @@ lock_twophase_recover(TransactionId xid, uint16 info,
 	 */
 	GrantLock(lock, proclock, lockmode);
 
-	LWLockRelease(masterLock);
+	LWLockRelease(partitionLock);
 }
 
 /*
@@ -2123,10 +2258,11 @@ lock_twophase_postcommit(TransactionId xid, uint16 info,
 	LOCKTAG    *locktag;
 	LOCKMODE	lockmode;
 	LOCKMETHODID lockmethodid;
-	PROCLOCKTAG proclocktag;
 	LOCK	   *lock;
 	PROCLOCK   *proclock;
-	LWLockId	masterLock;
+	PROCLOCKTAG proclocktag;
+	int			partition;
+	LWLockId	partitionLock;
 	LockMethod	lockMethodTable;
 	bool		wakeupNeeded;
 
@@ -2139,14 +2275,15 @@ lock_twophase_postcommit(TransactionId xid, uint16 info,
 		elog(ERROR, "unrecognized lock method: %d", lockmethodid);
 	lockMethodTable = LockMethods[lockmethodid];
 
-	masterLock = LockMgrLock;
+	partition = LockTagToPartition(locktag);
+	partitionLock = FirstLockMgrLock + partition;
 
-	LWLockAcquire(masterLock, LW_EXCLUSIVE);
+	LWLockAcquire(partitionLock, LW_EXCLUSIVE);
 
 	/*
 	 * Re-find the lock object (it had better be there).
 	 */
-	lock = (LOCK *) hash_search(LockMethodLockHash,
+	lock = (LOCK *) hash_search(LockMethodLockHash[partition],
 								(void *) locktag,
 								HASH_FIND, NULL);
 	if (!lock)
@@ -2158,7 +2295,7 @@ lock_twophase_postcommit(TransactionId xid, uint16 info,
 	MemSet(&proclocktag, 0, sizeof(PROCLOCKTAG));		/* must clear padding */
 	proclocktag.lock = MAKE_OFFSET(lock);
 	proclocktag.proc = MAKE_OFFSET(proc);
-	proclock = (PROCLOCK *) hash_search(LockMethodProcLockHash,
+	proclock = (PROCLOCK *) hash_search(LockMethodProcLockHash[partition],
 										(void *) &proclocktag,
 										HASH_FIND, NULL);
 	if (!proclock)
@@ -2171,7 +2308,7 @@ lock_twophase_postcommit(TransactionId xid, uint16 info,
 	if (!(proclock->holdMask & LOCKBIT_ON(lockmode)))
 	{
 		PROCLOCK_PRINT("lock_twophase_postcommit: WRONGTYPE", proclock);
-		LWLockRelease(masterLock);
+		LWLockRelease(partitionLock);
 		elog(WARNING, "you don't own a lock of type %s",
 			 lockMethodTable->lockModeNames[lockmode]);
 		return;
@@ -2182,9 +2319,11 @@ lock_twophase_postcommit(TransactionId xid, uint16 info,
 	 */
 	wakeupNeeded = UnGrantLock(lock, lockmode, proclock, lockMethodTable);
 
-	CleanUpLock(lockmethodid, lock, proclock, wakeupNeeded);
+	CleanUpLock(lock, proclock,
+				lockMethodTable, partition,
+				wakeupNeeded);
 
-	LWLockRelease(masterLock);
+	LWLockRelease(partitionLock);
 }
 
 /*
diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c
index a215a652855..e1edabde905 100644
--- a/src/backend/storage/lmgr/lwlock.c
+++ b/src/backend/storage/lmgr/lwlock.c
@@ -8,14 +8,14 @@
  * exclusive and shared lock modes (to support read/write and read-only
  * access to a shared object).	There are few other frammishes.  User-level
  * locking should be done with the full lock manager --- which depends on
- * an LWLock to protect its shared state.
+ * LWLocks to protect its shared state.
  *
  *
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/storage/lmgr/lwlock.c,v 1.35 2005/12/06 23:08:33 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/storage/lmgr/lwlock.c,v 1.36 2005/12/11 21:02:18 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -125,7 +125,10 @@ NumLWLocks(void)
 	 */
 
 	/* Predefined LWLocks */
-	numLocks = (int) NumFixedLWLocks;
+	numLocks = (int) FirstLockMgrLock;
+
+	/* lock.c gets the ones starting at FirstLockMgrLock */
+	numLocks += NUM_LOCK_PARTITIONS;
 
 	/* bufmgr.c needs two for each shared buffer */
 	numLocks += 2 * NBuffers;
@@ -204,10 +207,11 @@ CreateLWLocks(void)
 
 	/*
 	 * Initialize the dynamic-allocation counter, which is stored just before
-	 * the first LWLock.
+	 * the first LWLock.  The LWLocks used by lock.c are not dynamically
+	 * allocated, it just assumes it has them.
 	 */
 	LWLockCounter = (int *) ((char *) LWLockArray - 2 * sizeof(int));
-	LWLockCounter[0] = (int) NumFixedLWLocks;
+	LWLockCounter[0] = (int) FirstLockMgrLock + NUM_LOCK_PARTITIONS;
 	LWLockCounter[1] = numLocks;
 }
 
diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c
index 8d8269041e7..34d80bfceea 100644
--- a/src/backend/storage/lmgr/proc.c
+++ b/src/backend/storage/lmgr/proc.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/storage/lmgr/proc.c,v 1.169 2005/12/09 01:22:04 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/storage/lmgr/proc.c,v 1.170 2005/12/11 21:02:18 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -18,9 +18,8 @@
  *		ProcQueueAlloc() -- create a shm queue for sleeping processes
  *		ProcQueueInit() -- create a queue without allocing memory
  *
- * Locking and waiting for buffers can cause the backend to be
- * put to sleep.  Whoever releases the lock, etc. wakes the
- * process up again (and gives it an error code so it knows
+ * Waiting for a lock causes the backend to be put to sleep.  Whoever releases
+ * the lock wakes the process up again (and gives it an error code so it knows
  * whether it was awoken on an error condition).
  *
  * Interface (b):
@@ -28,7 +27,7 @@
  * ProcReleaseLocks -- frees the locks associated with current transaction
  *
  * ProcKill -- destroys the shared memory state (and locks)
- *		associated with the process.
+ * associated with the process.
  */
 #include "postgres.h"
 
@@ -65,7 +64,8 @@ NON_EXEC_STATIC slock_t *ProcStructLock = NULL;
 static PROC_HDR *ProcGlobal = NULL;
 static PGPROC *DummyProcs = NULL;
 
-static bool waitingForLock = false;
+/* If we are waiting for a lock, this points to the associated LOCALLOCK */
+static LOCALLOCK *lockAwaited = NULL;
 
 /* Mark these volatile because they can be changed by signal handler */
 static volatile bool statement_timeout_active = false;
@@ -200,10 +200,10 @@ InitProcGlobal(void)
 void
 InitProcess(void)
 {
-	SHMEM_OFFSET myOffset;
-
 	/* use volatile pointer to prevent code rearrangement */
 	volatile PROC_HDR *procglobal = ProcGlobal;
+	SHMEM_OFFSET myOffset;
+	int			i;
 
 	/*
 	 * ProcGlobal should be set by a previous call to InitProcGlobal (if we
@@ -264,7 +264,8 @@ InitProcess(void)
 	MyProc->lwWaitLink = NULL;
 	MyProc->waitLock = NULL;
 	MyProc->waitProcLock = NULL;
-	SHMQueueInit(&(MyProc->procLocks));
+	for (i = 0; i < NUM_LOCK_PARTITIONS; i++)
+		SHMQueueInit(&(MyProc->myProcLocks[i]));
 
 	/*
 	 * Add our PGPROC to the PGPROC array in shared memory.
@@ -304,6 +305,7 @@ void
 InitDummyProcess(int proctype)
 {
 	PGPROC	   *dummyproc;
+	int			i;
 
 	/*
 	 * ProcGlobal should be set by a previous call to InitProcGlobal (we
@@ -360,7 +362,8 @@ InitDummyProcess(int proctype)
 	MyProc->lwWaitLink = NULL;
 	MyProc->waitLock = NULL;
 	MyProc->waitProcLock = NULL;
-	SHMQueueInit(&(MyProc->procLocks));
+	for (i = 0; i < NUM_LOCK_PARTITIONS; i++)
+		SHMQueueInit(&(MyProc->myProcLocks[i]));
 
 	/*
 	 * Arrange to clean up at process exit.
@@ -416,21 +419,24 @@ HaveNFreeProcs(int n)
 bool
 LockWaitCancel(void)
 {
+	LWLockId	partitionLock;
+
 	/* Nothing to do if we weren't waiting for a lock */
-	if (!waitingForLock)
+	if (lockAwaited == NULL)
 		return false;
 
 	/* Turn off the deadlock timer, if it's still running (see ProcSleep) */
 	disable_sig_alarm(false);
 
 	/* Unlink myself from the wait queue, if on it (might not be anymore!) */
-	LWLockAcquire(LockMgrLock, LW_EXCLUSIVE);
+	partitionLock = FirstLockMgrLock + lockAwaited->partition;
+	LWLockAcquire(partitionLock, LW_EXCLUSIVE);
 
 	if (MyProc->links.next != INVALID_OFFSET)
 	{
 		/* We could not have been granted the lock yet */
 		Assert(MyProc->waitStatus == STATUS_ERROR);
-		RemoveFromWaitQueue(MyProc);
+		RemoveFromWaitQueue(MyProc, lockAwaited->partition);
 	}
 	else
 	{
@@ -444,9 +450,9 @@ LockWaitCancel(void)
 			GrantAwaitedLock();
 	}
 
-	waitingForLock = false;
+	lockAwaited = NULL;
 
-	LWLockRelease(LockMgrLock);
+	LWLockRelease(partitionLock);
 
 	/*
 	 * Reset the proc wait semaphore to zero.  This is necessary in the
@@ -606,18 +612,18 @@ ProcQueueInit(PROC_QUEUE *queue)
 
 
 /*
- * ProcSleep -- put a process to sleep
+ * ProcSleep -- put a process to sleep on the specified lock
  *
  * Caller must have set MyProc->heldLocks to reflect locks already held
  * on the lockable object by this process (under all XIDs).
  *
- * Locktable's masterLock must be held at entry, and will be held
+ * The lock table's partition lock must be held at entry, and will be held
  * at exit.
  *
  * Result: STATUS_OK if we acquired the lock, STATUS_ERROR if not (deadlock).
  *
  * ASSUME: that no one will fiddle with the queue until after
- *		we release the masterLock.
+ *		we release the partition lock.
  *
  * NOTES: The process queue is now a priority queue for locking.
  *
@@ -625,12 +631,13 @@ ProcQueueInit(PROC_QUEUE *queue)
  * semaphore is normally zero, so when we try to acquire it, we sleep.
  */
 int
-ProcSleep(LockMethod lockMethodTable,
-		  LOCKMODE lockmode,
-		  LOCK *lock,
-		  PROCLOCK *proclock)
+ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
 {
-	LWLockId	masterLock = LockMgrLock;
+	LOCKMODE	lockmode = locallock->tag.mode;
+	LOCK	   *lock = locallock->lock;
+	PROCLOCK   *proclock = locallock->proclock;
+	int			partition = locallock->partition;
+	LWLockId	partitionLock = FirstLockMgrLock + partition;
 	PROC_QUEUE *waitQueue = &(lock->waitProcs);
 	LOCKMASK	myHeldLocks = MyProc->heldLocks;
 	bool		early_deadlock = false;
@@ -732,22 +739,22 @@ ProcSleep(LockMethod lockMethodTable,
 	 */
 	if (early_deadlock)
 	{
-		RemoveFromWaitQueue(MyProc);
+		RemoveFromWaitQueue(MyProc, partition);
 		return STATUS_ERROR;
 	}
 
 	/* mark that we are waiting for a lock */
-	waitingForLock = true;
+	lockAwaited = locallock;
 
 	/*
-	 * Release the locktable's masterLock.
+	 * Release the lock table's partition lock.
 	 *
 	 * NOTE: this may also cause us to exit critical-section state, possibly
 	 * allowing a cancel/die interrupt to be accepted. This is OK because we
 	 * have recorded the fact that we are waiting for a lock, and so
 	 * LockWaitCancel will clean up if cancel/die happens.
 	 */
-	LWLockRelease(masterLock);
+	LWLockRelease(partitionLock);
 
 	/*
 	 * Set timer so we can wake up after awhile and check for a deadlock. If a
@@ -785,16 +792,16 @@ ProcSleep(LockMethod lockMethodTable,
 		elog(FATAL, "could not disable timer for process wakeup");
 
 	/*
-	 * Re-acquire the locktable's masterLock.  We have to do this to hold off
-	 * cancel/die interrupts before we can mess with waitingForLock (else we
-	 * might have a missed or duplicated locallock update).
+	 * Re-acquire the lock table's partition lock.  We have to do this to
+	 * hold off cancel/die interrupts before we can mess with lockAwaited
+	 * (else we might have a missed or duplicated locallock update).
 	 */
-	LWLockAcquire(masterLock, LW_EXCLUSIVE);
+	LWLockAcquire(partitionLock, LW_EXCLUSIVE);
 
 	/*
 	 * We no longer want LockWaitCancel to do anything.
 	 */
-	waitingForLock = false;
+	lockAwaited = NULL;
 
 	/*
 	 * If we got the lock, be sure to remember it in the locallock table.
@@ -816,6 +823,8 @@ ProcSleep(LockMethod lockMethodTable,
  *	 Also remove the process from the wait queue and set its links invalid.
  *	 RETURN: the next process in the wait queue.
  *
+ * The appropriate lock partition lock must be held by caller.
+ *
  * XXX: presently, this code is only used for the "success" case, and only
  * works correctly for that case.  To clean up in failure case, would need
  * to twiddle the lock's request counts too --- see RemoveFromWaitQueue.
@@ -825,8 +834,6 @@ ProcWakeup(PGPROC *proc, int waitStatus)
 {
 	PGPROC	   *retProc;
 
-	/* assume that masterLock has been acquired */
-
 	/* Proc should be sleeping ... */
 	if (proc->links.prev == INVALID_OFFSET ||
 		proc->links.next == INVALID_OFFSET)
@@ -854,6 +861,8 @@ ProcWakeup(PGPROC *proc, int waitStatus)
  * ProcLockWakeup -- routine for waking up processes when a lock is
  *		released (or a prior waiter is aborted).  Scan all waiters
  *		for lock, waken any that are no longer blocked.
+ *
+ * The appropriate lock partition lock must be held by caller.
  */
 void
 ProcLockWakeup(LockMethod lockMethodTable, LOCK *lock)
@@ -908,25 +917,32 @@ ProcLockWakeup(LockMethod lockMethodTable, LOCK *lock)
 	Assert(waitQueue->size >= 0);
 }
 
-/* --------------------
+/*
+ * CheckDeadLock
+ *
  * We only get to this routine if we got SIGALRM after DeadlockTimeout
  * while waiting for a lock to be released by some other process.  Look
  * to see if there's a deadlock; if not, just return and continue waiting.
  * If we have a real deadlock, remove ourselves from the lock's wait queue
  * and signal an error to ProcSleep.
- * --------------------
  */
 static void
 CheckDeadLock(void)
 {
+	int			i;
+
 	/*
-	 * Acquire locktable lock.	Note that the deadlock check interrupt had
-	 * better not be enabled anywhere that this process itself holds the
-	 * locktable lock, else this will wait forever.  Also note that
-	 * LWLockAcquire creates a critical section, so that this routine cannot
-	 * be interrupted by cancel/die interrupts.
+	 * Acquire exclusive lock on the entire shared lock data structures.
+	 * Must grab LWLocks in partition-number order to avoid LWLock deadlock.
+	 *
+	 * Note that the deadlock check interrupt had better not be enabled
+	 * anywhere that this process itself holds lock partition locks, else this
+	 * will wait forever.  Also note that LWLockAcquire creates a critical
+	 * section, so that this routine cannot be interrupted by cancel/die
+	 * interrupts.
 	 */
-	LWLockAcquire(LockMgrLock, LW_EXCLUSIVE);
+	for (i = 0; i < NUM_LOCK_PARTITIONS; i++)
+		LWLockAcquire(FirstLockMgrLock + i, LW_EXCLUSIVE);
 
 	/*
 	 * Check to see if we've been awoken by anyone in the interim.
@@ -937,14 +953,11 @@ CheckDeadLock(void)
 	 *
 	 * We check by looking to see if we've been unlinked from the wait queue.
 	 * This is quicker than checking our semaphore's state, since no kernel
-	 * call is needed, and it is safe because we hold the locktable lock.
+	 * call is needed, and it is safe because we hold the lock partition lock.
 	 */
 	if (MyProc->links.prev == INVALID_OFFSET ||
 		MyProc->links.next == INVALID_OFFSET)
-	{
-		LWLockRelease(LockMgrLock);
-		return;
-	}
+		goto check_done;
 
 #ifdef LOCK_DEBUG
 	if (Debug_deadlocks)
@@ -954,16 +967,19 @@ CheckDeadLock(void)
 	if (!DeadLockCheck(MyProc))
 	{
 		/* No deadlock, so keep waiting */
-		LWLockRelease(LockMgrLock);
-		return;
+		goto check_done;
 	}
 
 	/*
 	 * Oops.  We have a deadlock.
 	 *
-	 * Get this process out of wait state.
+	 * Get this process out of wait state.  (Note: we could do this more
+	 * efficiently by relying on lockAwaited, but use this coding to preserve
+	 * the flexibility to kill some other transaction than the one detecting
+	 * the deadlock.)
 	 */
-	RemoveFromWaitQueue(MyProc);
+	Assert(MyProc->waitLock != NULL);
+	RemoveFromWaitQueue(MyProc, LockTagToPartition(&(MyProc->waitLock->tag)));
 
 	/*
 	 * Set MyProc->waitStatus to STATUS_ERROR so that ProcSleep will report an
@@ -987,7 +1003,15 @@ CheckDeadLock(void)
 	 * them anymore.  However, RemoveFromWaitQueue took care of waking up any
 	 * such processes.
 	 */
-	LWLockRelease(LockMgrLock);
+
+	/*
+	 * Release locks acquired at head of routine.  Order is not critical,
+	 * so do it back-to-front to avoid waking another CheckDeadLock instance
+	 * before it can get all the locks.
+	 */
+check_done:
+	for (i = NUM_LOCK_PARTITIONS; --i >= 0; )
+		LWLockRelease(FirstLockMgrLock + i);
 }
 
 
diff --git a/src/include/storage/lock.h b/src/include/storage/lock.h
index e289632054c..9af03fb4742 100644
--- a/src/include/storage/lock.h
+++ b/src/include/storage/lock.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/storage/lock.h,v 1.92 2005/12/09 01:22:04 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/storage/lock.h,v 1.93 2005/12/11 21:02:18 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -19,6 +19,13 @@
 #include "storage/shmem.h"
 
 
+/*
+ * Number of partitions the shared lock tables are divided into.
+ *
+ * See LockTagToPartition() if you change this.
+ */
+#define NUM_LOCK_PARTITIONS  16
+
 /* originally in procq.h */
 typedef struct PROC_QUEUE
 {
@@ -348,6 +355,7 @@ typedef struct LOCALLOCK
 	LOCK	   *lock;			/* associated LOCK object in shared mem */
 	PROCLOCK   *proclock;		/* associated PROCLOCK object in shmem */
 	bool		isTempObject;	/* true if lock is on a temporary object */
+	int			partition;		/* ID of partition containing this lock */
 	int			nLocks;			/* total number of times lock is held */
 	int			numLockOwners;	/* # of relevant ResourceOwners */
 	int			maxLockOwners;	/* allocated size of array */
@@ -389,6 +397,7 @@ typedef enum
  */
 extern void InitLocks(void);
 extern LockMethod GetLocksMethodTable(const LOCK *lock);
+extern int	LockTagToPartition(const LOCKTAG *locktag);
 extern LockAcquireResult LockAcquire(const LOCKTAG *locktag,
 			bool isTempObject,
 			LOCKMODE lockmode,
@@ -406,7 +415,7 @@ extern int LockCheckConflicts(LockMethod lockMethodTable,
 				   LOCK *lock, PROCLOCK *proclock, PGPROC *proc);
 extern void GrantLock(LOCK *lock, PROCLOCK *proclock, LOCKMODE lockmode);
 extern void GrantAwaitedLock(void);
-extern void RemoveFromWaitQueue(PGPROC *proc);
+extern void RemoveFromWaitQueue(PGPROC *proc, int partition);
 extern Size LockShmemSize(void);
 extern bool DeadLockCheck(PGPROC *proc);
 extern void DeadLockReport(void);
diff --git a/src/include/storage/lwlock.h b/src/include/storage/lwlock.h
index 4291e0b2e74..c318e60b577 100644
--- a/src/include/storage/lwlock.h
+++ b/src/include/storage/lwlock.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/storage/lwlock.h,v 1.23 2005/10/15 02:49:46 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/storage/lwlock.h,v 1.24 2005/12/11 21:02:18 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -16,9 +16,9 @@
 
 /*
  * We have a number of predefined LWLocks, plus a bunch of LWLocks that are
- * dynamically assigned (for shared buffers).  The LWLock structures live
- * in shared memory (since they contain shared data) and are identified by
- * values of this enumerated type.	We abuse the notion of an enum somewhat
+ * dynamically assigned (e.g., for shared buffers).  The LWLock structures
+ * live in shared memory (since they contain shared data) and are identified
+ * by values of this enumerated type.  We abuse the notion of an enum somewhat
  * by allowing values not listed in the enum declaration to be assigned.
  * The extra value MaxDynamicLWLock is there to keep the compiler from
  * deciding that the enum can be represented as char or short ...
@@ -27,7 +27,6 @@ typedef enum LWLockId
 {
 	BufMappingLock,
 	BufFreelistLock,
-	LockMgrLock,
 	OidGenLock,
 	XidGenLock,
 	ProcArrayLock,
@@ -46,8 +45,7 @@ typedef enum LWLockId
 	RelCacheInitLock,
 	BgWriterCommLock,
 	TwoPhaseStateLock,
-
-	NumFixedLWLocks,			/* must be last except for MaxDynamicLWLock */
+	FirstLockMgrLock,			/* must be last except for MaxDynamicLWLock */
 
 	MaxDynamicLWLock = 1000000000
 } LWLockId;
diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h
index 4cba391048e..2cfee41eff9 100644
--- a/src/include/storage/proc.h
+++ b/src/include/storage/proc.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/storage/proc.h,v 1.84 2005/10/15 02:49:46 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/storage/proc.h,v 1.85 2005/12/11 21:02:18 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -52,7 +52,8 @@ struct XidCache
  * so that the prepared transactions appear to be still running and are
  * correctly shown as holding locks.  A prepared transaction PGPROC can be
  * distinguished from a real one at need by the fact that it has pid == 0.
- * The semaphore and lock-related fields in a prepared-xact PGPROC are unused.
+ * The semaphore and lock-activity fields in a prepared-xact PGPROC are unused,
+ * but its myProcLocks[] lists are valid.
  */
 struct PGPROC
 {
@@ -86,8 +87,12 @@ struct PGPROC
 	LOCKMASK	heldLocks;		/* bitmask for lock types already held on this
 								 * lock object by this backend */
 
-	SHM_QUEUE	procLocks;		/* list of PROCLOCK objects for locks held or
-								 * awaited by this backend */
+	/*
+	 * All PROCLOCK objects for locks held or awaited by this backend are
+	 * linked into one of these lists, according to the partition number of
+	 * their lock.
+	 */
+	SHM_QUEUE	myProcLocks[NUM_LOCK_PARTITIONS];
 
 	struct XidCache subxids;	/* cache for subtransaction XIDs */
 };
@@ -99,7 +104,7 @@ extern DLLIMPORT PGPROC *MyProc;
 
 
 /*
- * There is one ProcGlobal struct for the whole installation.
+ * There is one ProcGlobal struct for the whole database cluster.
  */
 typedef struct PROC_HDR
 {
@@ -134,8 +139,7 @@ extern bool HaveNFreeProcs(int n);
 extern void ProcReleaseLocks(bool isCommit);
 
 extern void ProcQueueInit(PROC_QUEUE *queue);
-extern int ProcSleep(LockMethod lockMethodTable, LOCKMODE lockmode,
-		  LOCK *lock, PROCLOCK *proclock);
+extern int	ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable);
 extern PGPROC *ProcWakeup(PGPROC *proc, int waitStatus);
 extern void ProcLockWakeup(LockMethod lockMethodTable, LOCK *lock);
 extern bool LockWaitCancel(void);
author	Tom Lane <tgl@sss.pgh.pa.us>	2005-12-11 21:02:18 +0000
committer	Tom Lane <tgl@sss.pgh.pa.us>	2005-12-11 21:02:18 +0000
commit	ec0baf949ecdee0bf8d8e60cc8dba0137aac8d19 (patch)
tree	b435a97a4e87c31a6b644ac2d9d1f433de487588 /src
parent	be8100d64ec93ccd8160b37379ba189aab4d0ef1 (diff)