summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2005-12-11 21:02:18 +0000
committerTom Lane <tgl@sss.pgh.pa.us>2005-12-11 21:02:18 +0000
commitec0baf949ecdee0bf8d8e60cc8dba0137aac8d19 (patch)
treeb435a97a4e87c31a6b644ac2d9d1f433de487588 /src
parentbe8100d64ec93ccd8160b37379ba189aab4d0ef1 (diff)
Divide the lock manager's shared state into 'partitions', so as to
reduce contention for the former single LockMgrLock. Per my recent proposal. I set it up for 16 partitions, but on a pgbench test this gives only a marginal further improvement over 4 partitions --- we need to test more scenarios to choose the number of partitions.
Diffstat (limited to 'src')
-rw-r--r--src/backend/access/transam/twophase.c5
-rw-r--r--src/backend/storage/ipc/procarray.c6
-rw-r--r--src/backend/storage/lmgr/README91
-rw-r--r--src/backend/storage/lmgr/deadlock.c20
-rw-r--r--src/backend/storage/lmgr/lock.c719
-rw-r--r--src/backend/storage/lmgr/lwlock.c14
-rw-r--r--src/backend/storage/lmgr/proc.c128
-rw-r--r--src/include/storage/lock.h13
-rw-r--r--src/include/storage/lwlock.h12
-rw-r--r--src/include/storage/proc.h18
10 files changed, 627 insertions, 399 deletions
diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c
index ffdee8388b3..0898df62337 100644
--- a/src/backend/access/transam/twophase.c
+++ b/src/backend/access/transam/twophase.c
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/access/transam/twophase.c,v 1.17 2005/11/22 18:17:07 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/access/transam/twophase.c,v 1.18 2005/12/11 21:02:17 tgl Exp $
*
* NOTES
* Each global transaction is associated with a global transaction
@@ -284,7 +284,8 @@ MarkAsPreparing(TransactionId xid, const char *gid,
gxact->proc.lwWaitLink = NULL;
gxact->proc.waitLock = NULL;
gxact->proc.waitProcLock = NULL;
- SHMQueueInit(&(gxact->proc.procLocks));
+ for (i = 0; i < NUM_LOCK_PARTITIONS; i++)
+ SHMQueueInit(&(gxact->proc.myProcLocks[i]));
/* subxid data must be filled later by GXactLoadSubxactData */
gxact->proc.subxids.overflowed = false;
gxact->proc.subxids.nxids = 0;
diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c
index 7ac8084f6a3..cafadeb9054 100644
--- a/src/backend/storage/ipc/procarray.c
+++ b/src/backend/storage/ipc/procarray.c
@@ -14,8 +14,8 @@
*
* The process array now also includes PGPROC structures representing
* prepared transactions. The xid and subxids fields of these are valid,
- * as is the procLocks list. They can be distinguished from regular backend
- * PGPROCs at need by checking for pid == 0.
+ * as are the myProcLocks lists. They can be distinguished from regular
+ * backend PGPROCs at need by checking for pid == 0.
*
*
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
@@ -23,7 +23,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/storage/ipc/procarray.c,v 1.8 2005/11/22 18:17:20 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/storage/ipc/procarray.c,v 1.9 2005/12/11 21:02:18 tgl Exp $
*
*-------------------------------------------------------------------------
*/
diff --git a/src/backend/storage/lmgr/README b/src/backend/storage/lmgr/README
index 25820f4b73d..fdda5bf82a4 100644
--- a/src/backend/storage/lmgr/README
+++ b/src/backend/storage/lmgr/README
@@ -1,4 +1,4 @@
-$PostgreSQL: pgsql/src/backend/storage/lmgr/README,v 1.18 2005/12/09 01:22:04 tgl Exp $
+$PostgreSQL: pgsql/src/backend/storage/lmgr/README,v 1.19 2005/12/11 21:02:18 tgl Exp $
LOCKING OVERVIEW
@@ -50,9 +50,12 @@ LOCK DATA STRUCTURES
Lock methods describe the overall locking behavior. Currently there are
two lock methods: DEFAULT and USER. (USER locks are non-blocking.)
-Lock modes describe the type of the lock (read/write or shared/exclusive).
-See src/tools/backend/index.html and src/include/storage/lock.h for more
-details.
+Lock modes describe the type of the lock (read/write or shared/exclusive).
+In principle, each lock method can have its own set of lock modes with
+different conflict rules, but currently DEFAULT and USER methods use
+identical lock mode sets. See src/tools/backend/index.html and
+src/include/storage/lock.h for more details. (Lock modes are also called
+lock types in some places in the code and documentation.)
There are two fundamental lock structures in shared memory: the
per-lockable-object LOCK struct, and the per-lock-and-requestor PROCLOCK
@@ -67,7 +70,7 @@ be made per lockable object/lock mode/backend. Internally to a backend,
however, the same lock may be requested and perhaps released multiple times
in a transaction, and it can also be held both transactionally and session-
wide. The internal request counts are held in LOCALLOCK so that the shared
-LockMgrLock need not be obtained to alter them.
+data structures need not be accessed to alter them.
---------------------------------------------------------------------------
@@ -103,10 +106,10 @@ procLocks -
be waiting for more!).
waitProcs -
- This is a shared memory queue of all process structures corresponding to
- a backend that is waiting (sleeping) until another backend releases this
+ This is a shared memory queue of all PGPROC structures corresponding to
+ backends that are waiting (sleeping) until another backend releases this
lock. The process structure holds the information needed to determine
- if it should be woken up when this lock is released.
+ if it should be woken up when the lock is released.
nRequested -
Keeps a count of how many times this lock has been attempted to be
@@ -131,12 +134,12 @@ nGranted -
granted -
Keeps count of how many locks of each type are currently held. Once again
only elements 1 through MAX_LOCKMODES-1 are used (0 is not). Also, like
- requested, summing the values of granted should total to the value
+ requested[], summing the values of granted[] should total to the value
of nGranted.
We should always have 0 <= nGranted <= nRequested, and
-0 <= granted[i] <= requested[i] for each i. If the request counts go to
-zero, the lock object is no longer needed and can be freed.
+0 <= granted[i] <= requested[i] for each i. When all the request counts
+go to zero, the LOCK object is no longer needed and can be freed.
---------------------------------------------------------------------------
@@ -154,15 +157,16 @@ tag -
SHMEM offset of PGPROC of backend process that owns this PROCLOCK.
holdMask -
- A bitmask for the lock types successfully acquired by this PROCLOCK.
+ A bitmask for the lock modes successfully acquired by this PROCLOCK.
This should be a subset of the LOCK object's grantMask, and also a
- subset of the PGPROC object's heldLocks mask.
+ subset of the PGPROC object's heldLocks mask (if the PGPROC is
+ currently waiting for another lock mode on this lock).
releaseMask -
- A bitmask for the lock types due to be released during LockReleaseAll.
+ A bitmask for the lock modes due to be released during LockReleaseAll.
This must be a subset of the holdMask. Note that it is modified without
- taking the LockMgrLock, and therefore it is unsafe for any backend except
- the one owning the PROCLOCK to examine/change it.
+ taking the partition LWLock, and therefore it is unsafe for any
+ backend except the one owning the PROCLOCK to examine/change it.
lockLink -
List link for shared memory queue of all the PROCLOCK objects for the
@@ -174,7 +178,60 @@ procLink -
---------------------------------------------------------------------------
-The deadlock detection algorithm:
+
+LOCK MANAGER INTERNAL LOCKING
+
+Before PostgreSQL 8.2, all of the shared-memory data structures used by
+the lock manager were protected by a single LWLock, the LockMgrLock;
+any operation involving these data structures had to exclusively lock
+LockMgrLock. Not too surprisingly, this became a contention bottleneck.
+To reduce contention, the lock manager's data structures have been split
+into multiple "partitions", each protected by an independent LWLock.
+Most operations only need to lock the single partition they are working in.
+Here are the details:
+
+* Each possible lock is assigned to one partition according to a hash of
+its LOCKTAG value (see LockTagToPartition()). The partition's LWLock is
+considered to protect all the LOCK objects of that partition as well as
+their subsidiary PROCLOCKs. The shared-memory hash tables for LOCKs and
+PROCLOCKs are divided into separate hash tables for each partition, and
+operations on each hash table are likewise protected by the partition
+lock.
+
+* Formerly, each PGPROC had a single list of PROCLOCKs belonging to it.
+This has now been split into per-partition lists, so that access to a
+particular PROCLOCK list can be protected by the associated partition's
+LWLock. (This is not strictly necessary at the moment, because at this
+writing a PGPROC's PROCLOCK list is only accessed by the owning backend
+anyway. But it seems forward-looking to maintain a convention for how
+other backends could access it. In any case LockReleaseAll needs to be
+able to quickly determine which partition each LOCK belongs to, and
+for the currently contemplated number of partitions, this way takes less
+shared memory than explicitly storing a partition number in LOCK structs
+would require.)
+
+* The other lock-related fields of a PGPROC are only interesting when
+the PGPROC is waiting for a lock, so we consider that they are protected
+by the partition LWLock of the awaited lock.
+
+For normal lock acquisition and release, it is sufficient to lock the
+partition containing the desired lock. Deadlock checking needs to touch
+multiple partitions in general; for simplicity, we just make it lock all
+the partitions in partition-number order. (To prevent LWLock deadlock,
+we establish the rule that any backend needing to lock more than one
+partition at once must lock them in partition-number order.) It's
+possible that deadlock checking could be done without touching every
+partition in typical cases, but since in a properly functioning system
+deadlock checking should not occur often enough to be performance-critical,
+trying to make this work does not seem a productive use of effort.
+
+A backend's internal LOCALLOCK hash table is not partitioned. We do store
+the partition number in LOCALLOCK table entries, but this is a straight
+speed-for-space tradeoff: we could instead recalculate the partition
+number from the LOCKTAG when needed.
+
+
+THE DEADLOCK DETECTION ALGORITHM
Since we allow user transactions to request locks in any order, deadlock
is possible. We use a deadlock detection/breaking algorithm that is
diff --git a/src/backend/storage/lmgr/deadlock.c b/src/backend/storage/lmgr/deadlock.c
index adbd373bb7f..e72ab00b5b0 100644
--- a/src/backend/storage/lmgr/deadlock.c
+++ b/src/backend/storage/lmgr/deadlock.c
@@ -12,7 +12,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/storage/lmgr/deadlock.c,v 1.37 2005/12/09 01:22:04 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/storage/lmgr/deadlock.c,v 1.38 2005/12/11 21:02:18 tgl Exp $
*
* Interface:
*
@@ -53,9 +53,9 @@ typedef struct
* Information saved about each edge in a detected deadlock cycle. This
* is used to print a diagnostic message upon failure.
*
- * Note: because we want to examine this info after releasing the LockMgrLock,
- * we can't just store LOCK and PGPROC pointers; we must extract out all the
- * info we want to be able to print.
+ * Note: because we want to examine this info after releasing the lock
+ * manager's partition locks, we can't just store LOCK and PGPROC pointers;
+ * we must extract out all the info we want to be able to print.
*/
typedef struct
{
@@ -188,19 +188,11 @@ InitDeadLockChecking(void)
* deadlock. If resolution is impossible, return TRUE --- the caller
* is then expected to abort the given proc's transaction.
*
- * We can't block on user locks, so no sense testing for deadlock
- * because there is no blocking, and no timer for the block. So,
- * only look at regular locks.
- *
- * We must have already locked the master lock before being called.
- * NOTE: although the lockmethod structure appears to allow each lock
- * table to have a different masterLock, all locks that can block had
- * better use the same LWLock, else this code will not be adequately
- * interlocked!
+ * Caller must already have locked all partitions of the lock tables.
*
* On failure, deadlock details are recorded in deadlockDetails[] for
* subsequent printing by DeadLockReport(). That activity is separate
- * because we don't want to do it while holding the master lock.
+ * because we don't want to do it while holding all those LWLocks.
*/
bool
DeadLockCheck(PGPROC *proc)
diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c
index 344d677cd2f..7f42b477cc6 100644
--- a/src/backend/storage/lmgr/lock.c
+++ b/src/backend/storage/lmgr/lock.c
@@ -1,14 +1,14 @@
/*-------------------------------------------------------------------------
*
* lock.c
- * POSTGRES low-level lock mechanism
+ * POSTGRES primary lock mechanism
*
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/storage/lmgr/lock.c,v 1.161 2005/12/09 01:22:04 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/storage/lmgr/lock.c,v 1.162 2005/12/11 21:02:18 tgl Exp $
*
* NOTES
* A lock table is a shared memory hash table. When
@@ -163,10 +163,13 @@ typedef struct TwoPhaseLockRecord
/*
- * Links to hash tables containing lock state
+ * Pointers to hash tables containing lock state
+ *
+ * The LockMethodLockHash and LockMethodProcLockHash hash tables are in
+ * shared memory; LockMethodLocalHash is local to each backend.
*/
-static HTAB *LockMethodLockHash;
-static HTAB *LockMethodProcLockHash;
+static HTAB *LockMethodLockHash[NUM_LOCK_PARTITIONS];
+static HTAB *LockMethodProcLockHash[NUM_LOCK_PARTITIONS];
static HTAB *LockMethodLocalHash;
@@ -255,16 +258,25 @@ PROCLOCK_PRINT(const char *where, const PROCLOCK *proclockP)
static void RemoveLocalLock(LOCALLOCK *locallock);
static void GrantLockLocal(LOCALLOCK *locallock, ResourceOwner owner);
-static void WaitOnLock(LOCKMETHODID lockmethodid, LOCALLOCK *locallock,
- ResourceOwner owner);
+static void WaitOnLock(LOCALLOCK *locallock, ResourceOwner owner);
static bool UnGrantLock(LOCK *lock, LOCKMODE lockmode,
PROCLOCK *proclock, LockMethod lockMethodTable);
-static void CleanUpLock(LOCKMETHODID lockmethodid, LOCK *lock,
- PROCLOCK *proclock, bool wakeupNeeded);
+static void CleanUpLock(LOCK *lock, PROCLOCK *proclock,
+ LockMethod lockMethodTable, int partition,
+ bool wakeupNeeded);
/*
- * InitLocks -- Initialize the lock module's shared memory.
+ * InitLocks -- Initialize the lock manager's data structures.
+ *
+ * This is called from CreateSharedMemoryAndSemaphores(), which see for
+ * more comments. In the normal postmaster case, the shared hash tables
+ * are created here, as well as a locallock hash table that will remain
+ * unused and empty in the postmaster itself. Backends inherit the pointers
+ * to the shared tables via fork(), and also inherit an image of the locallock
+ * hash table, which they proceed to use. In the EXEC_BACKEND case, each
+ * backend re-executes this code to obtain pointers to the already existing
+ * shared hash tables and to create its locallock hash table.
*/
void
InitLocks(void)
@@ -274,13 +286,18 @@ InitLocks(void)
int hash_flags;
long init_table_size,
max_table_size;
+ int i;
- /* Compute init/max size to request for lock hashtables */
+ /*
+ * Compute init/max size to request for lock hashtables. Note these
+ * calculations must agree with LockShmemSize!
+ */
max_table_size = NLOCKENTS();
+ max_table_size = (max_table_size - 1) / NUM_LOCK_PARTITIONS + 1;
init_table_size = max_table_size / 2;
/*
- * allocate a hash table for LOCK structs. This is used to store
+ * Allocate hash tables for LOCK structs. These are used to store
* per-locked-object information.
*/
MemSet(&info, 0, sizeof(info));
@@ -289,37 +306,45 @@ InitLocks(void)
info.hash = tag_hash;
hash_flags = (HASH_ELEM | HASH_FUNCTION);
- sprintf(shmemName, "LOCK hash");
- LockMethodLockHash = ShmemInitHash(shmemName,
- init_table_size,
- max_table_size,
- &info,
- hash_flags);
+ for (i = 0; i < NUM_LOCK_PARTITIONS; i++)
+ {
+ sprintf(shmemName, "LOCK hash %d", i);
+ LockMethodLockHash[i] = ShmemInitHash(shmemName,
+ init_table_size,
+ max_table_size,
+ &info,
+ hash_flags);
+ if (!LockMethodLockHash[i])
+ elog(FATAL, "could not initialize lock table \"%s\"", shmemName);
+ }
- if (!LockMethodLockHash)
- elog(FATAL, "could not initialize lock table \"%s\"", shmemName);
+ /* Assume an average of 2 holders per lock */
+ max_table_size *= 2;
+ init_table_size *= 2;
/*
- * allocate a hash table for PROCLOCK structs. This is used to store
- * per-lock-holder information.
+ * Allocate hash tables for PROCLOCK structs. These are used to store
+ * per-lock-per-holder information.
*/
info.keysize = sizeof(PROCLOCKTAG);
info.entrysize = sizeof(PROCLOCK);
info.hash = tag_hash;
hash_flags = (HASH_ELEM | HASH_FUNCTION);
- sprintf(shmemName, "PROCLOCK hash");
- LockMethodProcLockHash = ShmemInitHash(shmemName,
- init_table_size,
- max_table_size,
- &info,
- hash_flags);
-
- if (!LockMethodProcLockHash)
- elog(FATAL, "could not initialize lock table \"%s\"", shmemName);
+ for (i = 0; i < NUM_LOCK_PARTITIONS; i++)
+ {
+ sprintf(shmemName, "PROCLOCK hash %d", i);
+ LockMethodProcLockHash[i] = ShmemInitHash(shmemName,
+ init_table_size,
+ max_table_size,
+ &info,
+ hash_flags);
+ if (!LockMethodProcLockHash[i])
+ elog(FATAL, "could not initialize lock table \"%s\"", shmemName);
+ }
/*
- * allocate a non-shared hash table for LOCALLOCK structs. This is used
+ * Allocate one non-shared hash table for LOCALLOCK structs. This is used
* to store lock counts and resource owner information.
*
* The non-shared table could already exist in this process (this occurs
@@ -356,6 +381,39 @@ GetLocksMethodTable(const LOCK *lock)
/*
+ * Given a LOCKTAG, determine which partition the lock belongs in.
+ *
+ * Basically what we want to do here is hash the locktag. However, it
+ * seems unwise to use hash_any() because that is the same function that
+ * will be used to distribute the locks within each partition's hash table;
+ * if we use it, we run a big risk of having uneven distribution of hash
+ * codes within each hash table. Instead, we use a simple linear XOR of the
+ * bits of the locktag.
+ */
+int
+LockTagToPartition(const LOCKTAG *locktag)
+{
+ const uint8 *ptr = (const uint8 *) locktag;
+ int result = 0;
+ int i;
+
+ for (i = 0; i < sizeof(LOCKTAG); i++)
+ result ^= *ptr++;
+#if NUM_LOCK_PARTITIONS == 16
+ result ^= result >> 4;
+ result &= 0x0F;
+#elif NUM_LOCK_PARTITIONS == 4
+ result ^= result >> 4;
+ result ^= result >> 2;
+ result &= 0x03;
+#else
+#error unsupported NUM_LOCK_PARTITIONS
+#endif
+ return result;
+}
+
+
+/*
* LockAcquire -- Check for lock conflicts, sleep if conflict found,
* set lock if/when no conflicts.
*
@@ -397,7 +455,8 @@ LockAcquire(const LOCKTAG *locktag,
PROCLOCKTAG proclocktag;
bool found;
ResourceOwner owner;
- LWLockId masterLock;
+ int partition;
+ LWLockId partitionLock;
int status;
if (lockmethodid <= 0 || lockmethodid >= lengthof(LockMethods))
@@ -438,6 +497,7 @@ LockAcquire(const LOCKTAG *locktag,
locallock->lock = NULL;
locallock->proclock = NULL;
locallock->isTempObject = isTempObject;
+ locallock->partition = LockTagToPartition(&(localtag.lock));
locallock->nLocks = 0;
locallock->numLockOwners = 0;
locallock->maxLockOwners = 8;
@@ -474,9 +534,10 @@ LockAcquire(const LOCKTAG *locktag,
/*
* Otherwise we've got to mess with the shared lock table.
*/
- masterLock = LockMgrLock;
+ partition = locallock->partition;
+ partitionLock = FirstLockMgrLock + partition;
- LWLockAcquire(masterLock, LW_EXCLUSIVE);
+ LWLockAcquire(partitionLock, LW_EXCLUSIVE);
/*
* Find or create a lock with this tag.
@@ -486,12 +547,12 @@ LockAcquire(const LOCKTAG *locktag,
* pointer is valid, since a lock object with no locks can go away
* anytime.
*/
- lock = (LOCK *) hash_search(LockMethodLockHash,
+ lock = (LOCK *) hash_search(LockMethodLockHash[partition],
(void *) locktag,
HASH_ENTER_NULL, &found);
if (!lock)
{
- LWLockRelease(masterLock);
+ LWLockRelease(partitionLock);
ereport(ERROR,
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("out of shared memory"),
@@ -532,7 +593,7 @@ LockAcquire(const LOCKTAG *locktag,
/*
* Find or create a proclock entry with this tag
*/
- proclock = (PROCLOCK *) hash_search(LockMethodProcLockHash,
+ proclock = (PROCLOCK *) hash_search(LockMethodProcLockHash[partition],
(void *) &proclocktag,
HASH_ENTER_NULL, &found);
if (!proclock)
@@ -547,12 +608,12 @@ LockAcquire(const LOCKTAG *locktag,
* anyone to release the lock object later.
*/
Assert(SHMQueueEmpty(&(lock->procLocks)));
- if (!hash_search(LockMethodLockHash,
+ if (!hash_search(LockMethodLockHash[partition],
(void *) &(lock->tag),
HASH_REMOVE, NULL))
elog(PANIC, "lock table corrupted");
}
- LWLockRelease(masterLock);
+ LWLockRelease(partitionLock);
ereport(ERROR,
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("out of shared memory"),
@@ -569,7 +630,8 @@ LockAcquire(const LOCKTAG *locktag,
proclock->releaseMask = 0;
/* Add proclock to appropriate lists */
SHMQueueInsertBefore(&lock->procLocks, &proclock->lockLink);
- SHMQueueInsertBefore(&MyProc->procLocks, &proclock->procLink);
+ SHMQueueInsertBefore(&(MyProc->myProcLocks[partition]),
+ &proclock->procLink);
PROCLOCK_PRINT("LockAcquire: new", proclock);
}
else
@@ -666,7 +728,7 @@ LockAcquire(const LOCKTAG *locktag,
{
SHMQueueDelete(&proclock->lockLink);
SHMQueueDelete(&proclock->procLink);
- if (!hash_search(LockMethodProcLockHash,
+ if (!hash_search(LockMethodProcLockHash[partition],
(void *) &(proclock->tag),
HASH_REMOVE, NULL))
elog(PANIC, "proclock table corrupted");
@@ -678,7 +740,7 @@ LockAcquire(const LOCKTAG *locktag,
LOCK_PRINT("LockAcquire: conditional lock failed", lock, lockmode);
Assert((lock->nRequested > 0) && (lock->requested[lockmode] >= 0));
Assert(lock->nGranted <= lock->nRequested);
- LWLockRelease(masterLock);
+ LWLockRelease(partitionLock);
if (locallock->nLocks == 0)
RemoveLocalLock(locallock);
return LOCKACQUIRE_NOT_AVAIL;
@@ -692,7 +754,7 @@ LockAcquire(const LOCKTAG *locktag,
/*
* Sleep till someone wakes me up.
*/
- WaitOnLock(lockmethodid, locallock, owner);
+ WaitOnLock(locallock, owner);
/*
* NOTE: do not do any material change of state between here and
@@ -709,14 +771,14 @@ LockAcquire(const LOCKTAG *locktag,
PROCLOCK_PRINT("LockAcquire: INCONSISTENT", proclock);
LOCK_PRINT("LockAcquire: INCONSISTENT", lock, lockmode);
/* Should we retry ? */
- LWLockRelease(masterLock);
+ LWLockRelease(partitionLock);
elog(ERROR, "LockAcquire failed");
}
PROCLOCK_PRINT("LockAcquire: granted", proclock);
LOCK_PRINT("LockAcquire: granted", lock, lockmode);
}
- LWLockRelease(masterLock);
+ LWLockRelease(partitionLock);
return LOCKACQUIRE_OK;
}
@@ -894,11 +956,12 @@ UnGrantLock(LOCK *lock, LOCKMODE lockmode,
* should be called after UnGrantLock, and wakeupNeeded is the result from
* UnGrantLock.)
*
- * The locktable's masterLock must be held at entry, and will be
+ * The lock table's partition lock must be held at entry, and will be
* held at exit.
*/
static void
-CleanUpLock(LOCKMETHODID lockmethodid, LOCK *lock, PROCLOCK *proclock,
+CleanUpLock(LOCK *lock, PROCLOCK *proclock,
+ LockMethod lockMethodTable, int partition,
bool wakeupNeeded)
{
/*
@@ -910,7 +973,7 @@ CleanUpLock(LOCKMETHODID lockmethodid, LOCK *lock, PROCLOCK *proclock,
PROCLOCK_PRINT("CleanUpLock: deleting", proclock);
SHMQueueDelete(&proclock->lockLink);
SHMQueueDelete(&proclock->procLink);
- if (!hash_search(LockMethodProcLockHash,
+ if (!hash_search(LockMethodProcLockHash[partition],
(void *) &(proclock->tag),
HASH_REMOVE, NULL))
elog(PANIC, "proclock table corrupted");
@@ -924,7 +987,7 @@ CleanUpLock(LOCKMETHODID lockmethodid, LOCK *lock, PROCLOCK *proclock,
*/
LOCK_PRINT("CleanUpLock: deleting", lock, 0);
Assert(SHMQueueEmpty(&(lock->procLocks)));
- if (!hash_search(LockMethodLockHash,
+ if (!hash_search(LockMethodLockHash[partition],
(void *) &(lock->tag),
HASH_REMOVE, NULL))
elog(PANIC, "lock table corrupted");
@@ -932,7 +995,7 @@ CleanUpLock(LOCKMETHODID lockmethodid, LOCK *lock, PROCLOCK *proclock,
else if (wakeupNeeded)
{
/* There are waiters on this lock, so wake them up. */
- ProcLockWakeup(LockMethods[lockmethodid], lock);
+ ProcLockWakeup(lockMethodTable, lock);
}
}
@@ -988,12 +1051,12 @@ GrantAwaitedLock(void)
* Caller must have set MyProc->heldLocks to reflect locks already held
* on the lockable object by this process.
*
- * The locktable's masterLock must be held at entry.
+ * The appropriate partition lock must be held at entry.
*/
static void
-WaitOnLock(LOCKMETHODID lockmethodid, LOCALLOCK *locallock,
- ResourceOwner owner)
+WaitOnLock(LOCALLOCK *locallock, ResourceOwner owner)
{
+ LOCKMETHODID lockmethodid = LOCALLOCK_LOCKMETHOD(*locallock);
LockMethod lockMethodTable = LockMethods[lockmethodid];
const char *old_status;
char *new_status;
@@ -1025,10 +1088,7 @@ WaitOnLock(LOCKMETHODID lockmethodid, LOCALLOCK *locallock,
* will also happen in the cancel/die case.
*/
- if (ProcSleep(lockMethodTable,
- locallock->tag.mode,
- locallock->lock,
- locallock->proclock) != STATUS_OK)
+ if (ProcSleep(locallock, lockMethodTable) != STATUS_OK)
{
/*
* We failed as a result of a deadlock, see CheckDeadLock(). Quit now.
@@ -1036,10 +1096,10 @@ WaitOnLock(LOCKMETHODID lockmethodid, LOCALLOCK *locallock,
awaitedLock = NULL;
LOCK_PRINT("WaitOnLock: aborting on lock",
locallock->lock, locallock->tag.mode);
- LWLockRelease(LockMgrLock);
+ LWLockRelease(FirstLockMgrLock + locallock->partition);
/*
- * Now that we aren't holding the LockMgrLock, we can give an error
+ * Now that we aren't holding the partition lock, we can give an error
* report including details about the detected deadlock.
*/
DeadLockReport();
@@ -1059,12 +1119,12 @@ WaitOnLock(LOCKMETHODID lockmethodid, LOCALLOCK *locallock,
* Remove a proc from the wait-queue it is on
* (caller must know it is on one).
*
- * Locktable lock must be held by caller.
+ * Appropriate partition lock must be held by caller.
*
* NB: this does not clean up any locallock object that may exist for the lock.
*/
void
-RemoveFromWaitQueue(PGPROC *proc)
+RemoveFromWaitQueue(PGPROC *proc, int partition)
{
LOCK *waitLock = proc->waitLock;
PROCLOCK *proclock = proc->waitProcLock;
@@ -1102,7 +1162,9 @@ RemoveFromWaitQueue(PGPROC *proc)
* LockRelease expects there to be no remaining proclocks.) Then see if
* any other waiters for the lock can be woken up now.
*/
- CleanUpLock(lockmethodid, waitLock, proclock, true);
+ CleanUpLock(waitLock, proclock,
+ LockMethods[lockmethodid], partition,
+ true);
}
/*
@@ -1125,7 +1187,8 @@ LockRelease(const LOCKTAG *locktag, LOCKMODE lockmode, bool sessionLock)
LOCALLOCK *locallock;
LOCK *lock;
PROCLOCK *proclock;
- LWLockId masterLock;
+ int partition;
+ LWLockId partitionLock;
bool wakeupNeeded;
if (lockmethodid <= 0 || lockmethodid >= lengthof(LockMethods))
@@ -1212,9 +1275,10 @@ LockRelease(const LOCKTAG *locktag, LOCKMODE lockmode, bool sessionLock)
/*
* Otherwise we've got to mess with the shared lock table.
*/
- masterLock = LockMgrLock;
+ partition = locallock->partition;
+ partitionLock = FirstLockMgrLock + partition;
- LWLockAcquire(masterLock, LW_EXCLUSIVE);
+ LWLockAcquire(partitionLock, LW_EXCLUSIVE);
/*
* We don't need to re-find the lock or proclock, since we kept their
@@ -1233,7 +1297,7 @@ LockRelease(const LOCKTAG *locktag, LOCKMODE lockmode, bool sessionLock)
if (!(proclock->holdMask & LOCKBIT_ON(lockmode)))
{
PROCLOCK_PRINT("LockRelease: WRONGTYPE", proclock);
- LWLockRelease(masterLock);
+ LWLockRelease(partitionLock);
elog(WARNING, "you don't own a lock of type %s",
lockMethodTable->lockModeNames[lockmode]);
RemoveLocalLock(locallock);
@@ -1245,9 +1309,11 @@ LockRelease(const LOCKTAG *locktag, LOCKMODE lockmode, bool sessionLock)
*/
wakeupNeeded = UnGrantLock(lock, lockmode, proclock, lockMethodTable);
- CleanUpLock(lockmethodid, lock, proclock, wakeupNeeded);
+ CleanUpLock(lock, proclock,
+ lockMethodTable, partition,
+ wakeupNeeded);
- LWLockRelease(masterLock);
+ LWLockRelease(partitionLock);
RemoveLocalLock(locallock);
return TRUE;
@@ -1265,14 +1331,13 @@ void
LockReleaseAll(LOCKMETHODID lockmethodid, bool allLocks)
{
HASH_SEQ_STATUS status;
- SHM_QUEUE *procLocks = &(MyProc->procLocks);
- LWLockId masterLock;
LockMethod lockMethodTable;
int i,
numLockModes;
LOCALLOCK *locallock;
- PROCLOCK *proclock;
LOCK *lock;
+ PROCLOCK *proclock;
+ int partition;
if (lockmethodid <= 0 || lockmethodid >= lengthof(LockMethods))
elog(ERROR, "unrecognized lock method: %d", lockmethodid);
@@ -1284,7 +1349,6 @@ LockReleaseAll(LOCKMETHODID lockmethodid, bool allLocks)
#endif
numLockModes = lockMethodTable->numLockModes;
- masterLock = LockMgrLock;
/*
* First we run through the locallock table and get rid of unwanted
@@ -1351,74 +1415,89 @@ LockReleaseAll(LOCKMETHODID lockmethodid, bool allLocks)
RemoveLocalLock(locallock);
}
- LWLockAcquire(masterLock, LW_EXCLUSIVE);
+ /*
+ * Now, scan each lock partition separately.
+ */
+ for (partition = 0; partition < NUM_LOCK_PARTITIONS; partition++)
+ {
+ LWLockId partitionLock = FirstLockMgrLock + partition;
+ SHM_QUEUE *procLocks = &(MyProc->myProcLocks[partition]);
- proclock = (PROCLOCK *) SHMQueueNext(procLocks, procLocks,
- offsetof(PROCLOCK, procLink));
+ proclock = (PROCLOCK *) SHMQueueNext(procLocks, procLocks,
+ offsetof(PROCLOCK, procLink));
- while (proclock)
- {
- bool wakeupNeeded = false;
- PROCLOCK *nextplock;
+ if (!proclock)
+ continue; /* needn't examine this partition */
- /* Get link first, since we may unlink/delete this proclock */
- nextplock = (PROCLOCK *) SHMQueueNext(procLocks, &proclock->procLink,
- offsetof(PROCLOCK, procLink));
+ LWLockAcquire(partitionLock, LW_EXCLUSIVE);
- Assert(proclock->tag.proc == MAKE_OFFSET(MyProc));
+ while (proclock)
+ {
+ bool wakeupNeeded = false;
+ PROCLOCK *nextplock;
- lock = (LOCK *) MAKE_PTR(proclock->tag.lock);
+ /* Get link first, since we may unlink/delete this proclock */
+ nextplock = (PROCLOCK *)
+ SHMQueueNext(procLocks, &proclock->procLink,
+ offsetof(PROCLOCK, procLink));
- /* Ignore items that are not of the lockmethod to be removed */
- if (LOCK_LOCKMETHOD(*lock) != lockmethodid)
- goto next_item;
+ Assert(proclock->tag.proc == MAKE_OFFSET(MyProc));
- /*
- * In allLocks mode, force release of all locks even if locallock
- * table had problems
- */
- if (allLocks)
- proclock->releaseMask = proclock->holdMask;
- else
- Assert((proclock->releaseMask & ~proclock->holdMask) == 0);
+ lock = (LOCK *) MAKE_PTR(proclock->tag.lock);
- /*
- * Ignore items that have nothing to be released, unless they have
- * holdMask == 0 and are therefore recyclable
- */
- if (proclock->releaseMask == 0 && proclock->holdMask != 0)
- goto next_item;
+ /* Ignore items that are not of the lockmethod to be removed */
+ if (LOCK_LOCKMETHOD(*lock) != lockmethodid)
+ goto next_item;
- PROCLOCK_PRINT("LockReleaseAll", proclock);
- LOCK_PRINT("LockReleaseAll", lock, 0);
- Assert(lock->nRequested >= 0);
- Assert(lock->nGranted >= 0);
- Assert(lock->nGranted <= lock->nRequested);
- Assert((proclock->holdMask & ~lock->grantMask) == 0);
+ /*
+ * In allLocks mode, force release of all locks even if locallock
+ * table had problems
+ */
+ if (allLocks)
+ proclock->releaseMask = proclock->holdMask;
+ else
+ Assert((proclock->releaseMask & ~proclock->holdMask) == 0);
- /*
- * Release the previously-marked lock modes
- */
- for (i = 1; i <= numLockModes; i++)
- {
- if (proclock->releaseMask & LOCKBIT_ON(i))
- wakeupNeeded |= UnGrantLock(lock, i, proclock,
- lockMethodTable);
- }
- Assert((lock->nRequested >= 0) && (lock->nGranted >= 0));
- Assert(lock->nGranted <= lock->nRequested);
- LOCK_PRINT("LockReleaseAll: updated", lock, 0);
+ /*
+ * Ignore items that have nothing to be released, unless they have
+ * holdMask == 0 and are therefore recyclable
+ */
+ if (proclock->releaseMask == 0 && proclock->holdMask != 0)
+ goto next_item;
- proclock->releaseMask = 0;
+ PROCLOCK_PRINT("LockReleaseAll", proclock);
+ LOCK_PRINT("LockReleaseAll", lock, 0);
+ Assert(lock->nRequested >= 0);
+ Assert(lock->nGranted >= 0);
+ Assert(lock->nGranted <= lock->nRequested);
+ Assert((proclock->holdMask & ~lock->grantMask) == 0);
+
+ /*
+ * Release the previously-marked lock modes
+ */
+ for (i = 1; i <= numLockModes; i++)
+ {
+ if (proclock->releaseMask & LOCKBIT_ON(i))
+ wakeupNeeded |= UnGrantLock(lock, i, proclock,
+ lockMethodTable);
+ }
+ Assert((lock->nRequested >= 0) && (lock->nGranted >= 0));
+ Assert(lock->nGranted <= lock->nRequested);
+ LOCK_PRINT("LockReleaseAll: updated", lock, 0);
- /* CleanUpLock will wake up waiters if needed. */
- CleanUpLock(lockmethodid, lock, proclock, wakeupNeeded);
+ proclock->releaseMask = 0;
-next_item:
- proclock = nextplock;
- }
+ /* CleanUpLock will wake up waiters if needed. */
+ CleanUpLock(lock, proclock,
+ lockMethodTable, partition,
+ wakeupNeeded);
- LWLockRelease(masterLock);
+ next_item:
+ proclock = nextplock;
+ } /* loop over PROCLOCKs within this partition */
+
+ LWLockRelease(partitionLock);
+ } /* loop over partitions */
#ifdef LOCK_DEBUG
if (*(lockMethodTable->trace_flag))
@@ -1627,19 +1706,16 @@ PostPrepare_Locks(TransactionId xid)
{
PGPROC *newproc = TwoPhaseGetDummyProc(xid);
HASH_SEQ_STATUS status;
- SHM_QUEUE *procLocks = &(MyProc->procLocks);
- LWLockId masterLock;
LOCALLOCK *locallock;
+ LOCK *lock;
PROCLOCK *proclock;
PROCLOCKTAG proclocktag;
bool found;
- LOCK *lock;
+ int partition;
/* This is a critical section: any error means big trouble */
START_CRIT_SECTION();
- masterLock = LockMgrLock;
-
/*
* First we run through the locallock table and get rid of unwanted
* entries, then we scan the process's proclocks and transfer them to the
@@ -1678,105 +1754,121 @@ PostPrepare_Locks(TransactionId xid)
RemoveLocalLock(locallock);
}
- LWLockAcquire(masterLock, LW_EXCLUSIVE);
+ /*
+ * Now, scan each lock partition separately.
+ */
+ for (partition = 0; partition < NUM_LOCK_PARTITIONS; partition++)
+ {
+ LWLockId partitionLock = FirstLockMgrLock + partition;
+ SHM_QUEUE *procLocks = &(MyProc->myProcLocks[partition]);
- proclock = (PROCLOCK *) SHMQueueNext(procLocks, procLocks,
- offsetof(PROCLOCK, procLink));
+ proclock = (PROCLOCK *) SHMQueueNext(procLocks, procLocks,
+ offsetof(PROCLOCK, procLink));
- while (proclock)
- {
- PROCLOCK *nextplock;
- LOCKMASK holdMask;
- PROCLOCK *newproclock;
+ if (!proclock)
+ continue; /* needn't examine this partition */
- /* Get link first, since we may unlink/delete this proclock */
- nextplock = (PROCLOCK *) SHMQueueNext(procLocks, &proclock->procLink,
- offsetof(PROCLOCK, procLink));
+ LWLockAcquire(partitionLock, LW_EXCLUSIVE);
- Assert(proclock->tag.proc == MAKE_OFFSET(MyProc));
+ while (proclock)
+ {
+ PROCLOCK *nextplock;
+ LOCKMASK holdMask;
+ PROCLOCK *newproclock;
- lock = (LOCK *) MAKE_PTR(proclock->tag.lock);
+ /* Get link first, since we may unlink/delete this proclock */
+ nextplock = (PROCLOCK *)
+ SHMQueueNext(procLocks, &proclock->procLink,
+ offsetof(PROCLOCK, procLink));
- /* Ignore nontransactional locks */
- if (!LockMethods[LOCK_LOCKMETHOD(*lock)]->transactional)
- goto next_item;
+ Assert(proclock->tag.proc == MAKE_OFFSET(MyProc));
- PROCLOCK_PRINT("PostPrepare_Locks", proclock);
- LOCK_PRINT("PostPrepare_Locks", lock, 0);
- Assert(lock->nRequested >= 0);
- Assert(lock->nGranted >= 0);
- Assert(lock->nGranted <= lock->nRequested);
- Assert((proclock->holdMask & ~lock->grantMask) == 0);
+ lock = (LOCK *) MAKE_PTR(proclock->tag.lock);
- /*
- * Since there were no session locks, we should be releasing all locks
- */
- if (proclock->releaseMask != proclock->holdMask)
- elog(PANIC, "we seem to have dropped a bit somewhere");
+ /* Ignore nontransactional locks */
+ if (!LockMethods[LOCK_LOCKMETHOD(*lock)]->transactional)
+ goto next_item;
- holdMask = proclock->holdMask;
+ PROCLOCK_PRINT("PostPrepare_Locks", proclock);
+ LOCK_PRINT("PostPrepare_Locks", lock, 0);
+ Assert(lock->nRequested >= 0);
+ Assert(lock->nGranted >= 0);
+ Assert(lock->nGranted <= lock->nRequested);
+ Assert((proclock->holdMask & ~lock->grantMask) == 0);
- /*
- * We cannot simply modify proclock->tag.proc to reassign ownership of
- * the lock, because that's part of the hash key and the proclock
- * would then be in the wrong hash chain. So, unlink and delete the
- * old proclock; create a new one with the right contents; and link it
- * into place. We do it in this order to be certain we won't run out
- * of shared memory (the way dynahash.c works, the deleted object is
- * certain to be available for reallocation).
- */
- SHMQueueDelete(&proclock->lockLink);
- SHMQueueDelete(&proclock->procLink);
- if (!hash_search(LockMethodProcLockHash,
- (void *) &(proclock->tag),
- HASH_REMOVE, NULL))
- elog(PANIC, "proclock table corrupted");
+ /*
+ * Since there were no session locks, we should be releasing all
+ * locks
+ */
+ if (proclock->releaseMask != proclock->holdMask)
+ elog(PANIC, "we seem to have dropped a bit somewhere");
- /*
- * Create the hash key for the new proclock table.
- */
- MemSet(&proclocktag, 0, sizeof(PROCLOCKTAG));
- proclocktag.lock = MAKE_OFFSET(lock);
- proclocktag.proc = MAKE_OFFSET(newproc);
-
- newproclock = (PROCLOCK *) hash_search(LockMethodProcLockHash,
- (void *) &proclocktag,
- HASH_ENTER_NULL, &found);
- if (!newproclock)
- ereport(PANIC, /* should not happen */
- (errcode(ERRCODE_OUT_OF_MEMORY),
- errmsg("out of shared memory"),
- errdetail("Not enough memory for reassigning the prepared transaction's locks.")));
+ holdMask = proclock->holdMask;
- /*
- * If new, initialize the new entry
- */
- if (!found)
- {
- newproclock->holdMask = 0;
- newproclock->releaseMask = 0;
- /* Add new proclock to appropriate lists */
- SHMQueueInsertBefore(&lock->procLocks, &newproclock->lockLink);
- SHMQueueInsertBefore(&newproc->procLocks, &newproclock->procLink);
- PROCLOCK_PRINT("PostPrepare_Locks: new", newproclock);
- }
- else
- {
- PROCLOCK_PRINT("PostPrepare_Locks: found", newproclock);
- Assert((newproclock->holdMask & ~lock->grantMask) == 0);
- }
+ /*
+ * We cannot simply modify proclock->tag.proc to reassign
+ * ownership of the lock, because that's part of the hash key and
+ * the proclock would then be in the wrong hash chain. So, unlink
+ * and delete the old proclock; create a new one with the right
+ * contents; and link it into place. We do it in this order to be
+ * certain we won't run out of shared memory (the way dynahash.c
+ * works, the deleted object is certain to be available for
+ * reallocation).
+ */
+ SHMQueueDelete(&proclock->lockLink);
+ SHMQueueDelete(&proclock->procLink);
+ if (!hash_search(LockMethodProcLockHash[partition],
+ (void *) &(proclock->tag),
+ HASH_REMOVE, NULL))
+ elog(PANIC, "proclock table corrupted");
- /*
- * Pass over the identified lock ownership.
- */
- Assert((newproclock->holdMask & holdMask) == 0);
- newproclock->holdMask |= holdMask;
+ /*
+ * Create the hash key for the new proclock table.
+ */
+ MemSet(&proclocktag, 0, sizeof(PROCLOCKTAG));
+ proclocktag.lock = MAKE_OFFSET(lock);
+ proclocktag.proc = MAKE_OFFSET(newproc);
+
+ newproclock = (PROCLOCK *) hash_search(LockMethodProcLockHash[partition],
+ (void *) &proclocktag,
+ HASH_ENTER_NULL, &found);
+ if (!newproclock)
+ ereport(PANIC, /* should not happen */
+ (errcode(ERRCODE_OUT_OF_MEMORY),
+ errmsg("out of shared memory"),
+ errdetail("Not enough memory for reassigning the prepared transaction's locks.")));
-next_item:
- proclock = nextplock;
- }
+ /*
+ * If new, initialize the new entry
+ */
+ if (!found)
+ {
+ newproclock->holdMask = 0;
+ newproclock->releaseMask = 0;
+ /* Add new proclock to appropriate lists */
+ SHMQueueInsertBefore(&lock->procLocks, &newproclock->lockLink);
+ SHMQueueInsertBefore(&(newproc->myProcLocks[partition]),
+ &newproclock->procLink);
+ PROCLOCK_PRINT("PostPrepare_Locks: new", newproclock);
+ }
+ else
+ {
+ PROCLOCK_PRINT("PostPrepare_Locks: found", newproclock);
+ Assert((newproclock->holdMask & ~lock->grantMask) == 0);
+ }
+
+ /*
+ * Pass over the identified lock ownership.
+ */
+ Assert((newproclock->holdMask & holdMask) == 0);
+ newproclock->holdMask |= holdMask;
+
+ next_item:
+ proclock = nextplock;
+ } /* loop over PROCLOCKs within this partition */
- LWLockRelease(masterLock);
+ LWLockRelease(partitionLock);
+ } /* loop over partitions */
END_CRIT_SECTION();
}
@@ -1789,20 +1881,23 @@ Size
LockShmemSize(void)
{
Size size = 0;
- long max_table_size = NLOCKENTS();
+ Size tabsize;
+ long max_table_size;
- /* lockHash table */
- size = add_size(size, hash_estimate_size(max_table_size, sizeof(LOCK)));
+ /* lock hash tables */
+ max_table_size = NLOCKENTS();
+ max_table_size = (max_table_size - 1) / NUM_LOCK_PARTITIONS + 1;
+ tabsize = hash_estimate_size(max_table_size, sizeof(LOCK));
+ size = add_size(size, mul_size(tabsize, NUM_LOCK_PARTITIONS));
- /* proclockHash table */
- size = add_size(size, hash_estimate_size(max_table_size, sizeof(PROCLOCK)));
+ /* proclock hash tables */
+ max_table_size *= 2;
+ tabsize = hash_estimate_size(max_table_size, sizeof(PROCLOCK));
+ size = add_size(size, mul_size(tabsize, NUM_LOCK_PARTITIONS));
/*
- * Note we count only one pair of hash tables, since the userlocks table
- * actually overlays the main one.
- *
- * Since the lockHash entry count above is only an estimate, add 10%
- * safety margin.
+ * Since there is likely to be some space wastage due to uneven use
+ * of the partitions, add 10% safety margin.
*/
size = add_size(size, size / 10);
@@ -1818,9 +1913,9 @@ LockShmemSize(void)
* copies of the same PGPROC and/or LOCK objects are likely to appear.
* It is the caller's responsibility to match up duplicates if wanted.
*
- * The design goal is to hold the LockMgrLock for as short a time as possible;
+ * The design goal is to hold the LWLocks for as short a time as possible;
* thus, this function simply makes a copy of the necessary data and releases
- * the lock, allowing the caller to contemplate and format the data for as
+ * the locks, allowing the caller to contemplate and format the data for as
* long as it pleases.
*/
LockData *
@@ -1830,40 +1925,67 @@ GetLockStatusData(void)
HTAB *proclockTable;
PROCLOCK *proclock;
HASH_SEQ_STATUS seqstat;
+ int els;
+ int el;
int i;
data = (LockData *) palloc(sizeof(LockData));
- LWLockAcquire(LockMgrLock, LW_EXCLUSIVE);
-
- proclockTable = LockMethodProcLockHash;
-
- data->nelements = i = proclockTable->hctl->nentries;
+ /*
+ * Acquire lock on the entire shared lock data structures. We can't
+ * operate one partition at a time if we want to deliver a self-consistent
+ * view of the state.
+ *
+ * Since this is a read-only operation, we take shared instead of exclusive
+ * lock. There's not a whole lot of point to this, because all the normal
+ * operations require exclusive lock, but it doesn't hurt anything either.
+ * It will at least allow two backends to do GetLockStatusData in parallel.
+ *
+ * Must grab LWLocks in partition-number order to avoid LWLock deadlock.
+ *
+ * Use same loop to count up the total number of PROCLOCK objects.
+ */
+ els = 0;
+ for (i = 0; i < NUM_LOCK_PARTITIONS; i++)
+ {
+ LWLockAcquire(FirstLockMgrLock + i, LW_SHARED);
+ proclockTable = LockMethodProcLockHash[i];
+ els += proclockTable->hctl->nentries;
+ }
- data->proclockaddrs = (SHMEM_OFFSET *) palloc(sizeof(SHMEM_OFFSET) * i);
- data->proclocks = (PROCLOCK *) palloc(sizeof(PROCLOCK) * i);
- data->procs = (PGPROC *) palloc(sizeof(PGPROC) * i);
- data->locks = (LOCK *) palloc(sizeof(LOCK) * i);
+ data->nelements = els;
+ data->proclockaddrs = (SHMEM_OFFSET *) palloc(sizeof(SHMEM_OFFSET) * els);
+ data->proclocks = (PROCLOCK *) palloc(sizeof(PROCLOCK) * els);
+ data->procs = (PGPROC *) palloc(sizeof(PGPROC) * els);
+ data->locks = (LOCK *) palloc(sizeof(LOCK) * els);
- hash_seq_init(&seqstat, proclockTable);
+ el = 0;
- i = 0;
- while ((proclock = hash_seq_search(&seqstat)))
+ /* Now scan the tables to copy the data */
+ for (i = 0; i < NUM_LOCK_PARTITIONS; i++)
{
- PGPROC *proc = (PGPROC *) MAKE_PTR(proclock->tag.proc);
- LOCK *lock = (LOCK *) MAKE_PTR(proclock->tag.lock);
+ proclockTable = LockMethodProcLockHash[i];
+ hash_seq_init(&seqstat, proclockTable);
- data->proclockaddrs[i] = MAKE_OFFSET(proclock);
- memcpy(&(data->proclocks[i]), proclock, sizeof(PROCLOCK));
- memcpy(&(data->procs[i]), proc, sizeof(PGPROC));
- memcpy(&(data->locks[i]), lock, sizeof(LOCK));
+ while ((proclock = hash_seq_search(&seqstat)))
+ {
+ PGPROC *proc = (PGPROC *) MAKE_PTR(proclock->tag.proc);
+ LOCK *lock = (LOCK *) MAKE_PTR(proclock->tag.lock);
+
+ data->proclockaddrs[el] = MAKE_OFFSET(proclock);
+ memcpy(&(data->proclocks[el]), proclock, sizeof(PROCLOCK));
+ memcpy(&(data->procs[el]), proc, sizeof(PGPROC));
+ memcpy(&(data->locks[el]), lock, sizeof(LOCK));
- i++;
+ el++;
+ }
}
- LWLockRelease(LockMgrLock);
+ /* And release locks */
+ for (i = NUM_LOCK_PARTITIONS; --i >= 0; )
+ LWLockRelease(FirstLockMgrLock + i);
- Assert(i == data->nelements);
+ Assert(el == data->nelements);
return data;
}
@@ -1879,7 +2001,7 @@ GetLockmodeName(LOCKMETHODID lockmethodid, LOCKMODE mode)
#ifdef LOCK_DEBUG
/*
- * Dump all locks in the given proc's procLocks list.
+ * Dump all locks in the given proc's myProcLocks lists.
*
* Caller is responsible for having acquired appropriate LWLocks.
*/
@@ -1889,29 +2011,34 @@ DumpLocks(PGPROC *proc)
SHM_QUEUE *procLocks;
PROCLOCK *proclock;
LOCK *lock;
+ int i;
if (proc == NULL)
return;
- procLocks = &proc->procLocks;
-
if (proc->waitLock)
LOCK_PRINT("DumpLocks: waiting on", proc->waitLock, 0);
- proclock = (PROCLOCK *) SHMQueueNext(procLocks, procLocks,
- offsetof(PROCLOCK, procLink));
-
- while (proclock)
+ for (i = 0; i < NUM_LOCK_PARTITIONS; i++)
{
- Assert(proclock->tag.proc == MAKE_OFFSET(proc));
+ procLocks = &(proc->myProcLocks[i]);
- lock = (LOCK *) MAKE_PTR(proclock->tag.lock);
+ proclock = (PROCLOCK *) SHMQueueNext(procLocks, procLocks,
+ offsetof(PROCLOCK, procLink));
- PROCLOCK_PRINT("DumpLocks", proclock);
- LOCK_PRINT("DumpLocks", lock, 0);
+ while (proclock)
+ {
+ Assert(proclock->tag.proc == MAKE_OFFSET(proc));
- proclock = (PROCLOCK *) SHMQueueNext(procLocks, &proclock->procLink,
- offsetof(PROCLOCK, procLink));
+ lock = (LOCK *) MAKE_PTR(proclock->tag.lock);
+
+ PROCLOCK_PRINT("DumpLocks", proclock);
+ LOCK_PRINT("DumpLocks", lock, 0);
+
+ proclock = (PROCLOCK *)
+ SHMQueueNext(procLocks, &proclock->procLink,
+ offsetof(PROCLOCK, procLink));
+ }
}
}
@@ -1928,25 +2055,30 @@ DumpAllLocks(void)
LOCK *lock;
HTAB *proclockTable;
HASH_SEQ_STATUS status;
+ int i;
proc = MyProc;
- proclockTable = LockMethodProcLockHash;
if (proc && proc->waitLock)
LOCK_PRINT("DumpAllLocks: waiting on", proc->waitLock, 0);
- hash_seq_init(&status, proclockTable);
- while ((proclock = (PROCLOCK *) hash_seq_search(&status)) != NULL)
+ for (i = 0; i < NUM_LOCK_PARTITIONS; i++)
{
- PROCLOCK_PRINT("DumpAllLocks", proclock);
+ proclockTable = LockMethodProcLockHash[i];
+ hash_seq_init(&status, proclockTable);
- if (proclock->tag.lock)
+ while ((proclock = (PROCLOCK *) hash_seq_search(&status)) != NULL)
{
- lock = (LOCK *) MAKE_PTR(proclock->tag.lock);
- LOCK_PRINT("DumpAllLocks", lock, 0);
+ PROCLOCK_PRINT("DumpAllLocks", proclock);
+
+ if (proclock->tag.lock)
+ {
+ lock = (LOCK *) MAKE_PTR(proclock->tag.lock);
+ LOCK_PRINT("DumpAllLocks", lock, 0);
+ }
+ else
+ elog(LOG, "DumpAllLocks: proclock->tag.lock = NULL");
}
- else
- elog(LOG, "DumpAllLocks: proclock->tag.lock = NULL");
}
}
#endif /* LOCK_DEBUG */
@@ -1975,7 +2107,8 @@ lock_twophase_recover(TransactionId xid, uint16 info,
PROCLOCK *proclock;
PROCLOCKTAG proclocktag;
bool found;
- LWLockId masterLock;
+ int partition;
+ LWLockId partitionLock;
LockMethod lockMethodTable;
Assert(len == sizeof(TwoPhaseLockRecord));
@@ -1987,19 +2120,20 @@ lock_twophase_recover(TransactionId xid, uint16 info,
elog(ERROR, "unrecognized lock method: %d", lockmethodid);
lockMethodTable = LockMethods[lockmethodid];
- masterLock = LockMgrLock;
+ partition = LockTagToPartition(locktag);
+ partitionLock = FirstLockMgrLock + partition;
- LWLockAcquire(masterLock, LW_EXCLUSIVE);
+ LWLockAcquire(partitionLock, LW_EXCLUSIVE);
/*
* Find or create a lock with this tag.
*/
- lock = (LOCK *) hash_search(LockMethodLockHash,
+ lock = (LOCK *) hash_search(LockMethodLockHash[partition],
(void *) locktag,
HASH_ENTER_NULL, &found);
if (!lock)
{
- LWLockRelease(masterLock);
+ LWLockRelease(partitionLock);
ereport(ERROR,
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("out of shared memory"),
@@ -2039,7 +2173,7 @@ lock_twophase_recover(TransactionId xid, uint16 info,
/*
* Find or create a proclock entry with this tag
*/
- proclock = (PROCLOCK *) hash_search(LockMethodProcLockHash,
+ proclock = (PROCLOCK *) hash_search(LockMethodProcLockHash[partition],
(void *) &proclocktag,
HASH_ENTER_NULL, &found);
if (!proclock)
@@ -2054,12 +2188,12 @@ lock_twophase_recover(TransactionId xid, uint16 info,
* anyone to release the lock object later.
*/
Assert(SHMQueueEmpty(&(lock->procLocks)));
- if (!hash_search(LockMethodLockHash,
+ if (!hash_search(LockMethodLockHash[partition],
(void *) &(lock->tag),
HASH_REMOVE, NULL))
elog(PANIC, "lock table corrupted");
}
- LWLockRelease(masterLock);
+ LWLockRelease(partitionLock);
ereport(ERROR,
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("out of shared memory"),
@@ -2075,7 +2209,8 @@ lock_twophase_recover(TransactionId xid, uint16 info,
proclock->releaseMask = 0;
/* Add proclock to appropriate lists */
SHMQueueInsertBefore(&lock->procLocks, &proclock->lockLink);
- SHMQueueInsertBefore(&proc->procLocks, &proclock->procLink);
+ SHMQueueInsertBefore(&(proc->myProcLocks[partition]),
+ &proclock->procLink);
PROCLOCK_PRINT("lock_twophase_recover: new", proclock);
}
else
@@ -2106,7 +2241,7 @@ lock_twophase_recover(TransactionId xid, uint16 info,
*/
GrantLock(lock, proclock, lockmode);
- LWLockRelease(masterLock);
+ LWLockRelease(partitionLock);
}
/*
@@ -2123,10 +2258,11 @@ lock_twophase_postcommit(TransactionId xid, uint16 info,
LOCKTAG *locktag;
LOCKMODE lockmode;
LOCKMETHODID lockmethodid;
- PROCLOCKTAG proclocktag;
LOCK *lock;
PROCLOCK *proclock;
- LWLockId masterLock;
+ PROCLOCKTAG proclocktag;
+ int partition;
+ LWLockId partitionLock;
LockMethod lockMethodTable;
bool wakeupNeeded;
@@ -2139,14 +2275,15 @@ lock_twophase_postcommit(TransactionId xid, uint16 info,
elog(ERROR, "unrecognized lock method: %d", lockmethodid);
lockMethodTable = LockMethods[lockmethodid];
- masterLock = LockMgrLock;
+ partition = LockTagToPartition(locktag);
+ partitionLock = FirstLockMgrLock + partition;
- LWLockAcquire(masterLock, LW_EXCLUSIVE);
+ LWLockAcquire(partitionLock, LW_EXCLUSIVE);
/*
* Re-find the lock object (it had better be there).
*/
- lock = (LOCK *) hash_search(LockMethodLockHash,
+ lock = (LOCK *) hash_search(LockMethodLockHash[partition],
(void *) locktag,
HASH_FIND, NULL);
if (!lock)
@@ -2158,7 +2295,7 @@ lock_twophase_postcommit(TransactionId xid, uint16 info,
MemSet(&proclocktag, 0, sizeof(PROCLOCKTAG)); /* must clear padding */
proclocktag.lock = MAKE_OFFSET(lock);
proclocktag.proc = MAKE_OFFSET(proc);
- proclock = (PROCLOCK *) hash_search(LockMethodProcLockHash,
+ proclock = (PROCLOCK *) hash_search(LockMethodProcLockHash[partition],
(void *) &proclocktag,
HASH_FIND, NULL);
if (!proclock)
@@ -2171,7 +2308,7 @@ lock_twophase_postcommit(TransactionId xid, uint16 info,
if (!(proclock->holdMask & LOCKBIT_ON(lockmode)))
{
PROCLOCK_PRINT("lock_twophase_postcommit: WRONGTYPE", proclock);
- LWLockRelease(masterLock);
+ LWLockRelease(partitionLock);
elog(WARNING, "you don't own a lock of type %s",
lockMethodTable->lockModeNames[lockmode]);
return;
@@ -2182,9 +2319,11 @@ lock_twophase_postcommit(TransactionId xid, uint16 info,
*/
wakeupNeeded = UnGrantLock(lock, lockmode, proclock, lockMethodTable);
- CleanUpLock(lockmethodid, lock, proclock, wakeupNeeded);
+ CleanUpLock(lock, proclock,
+ lockMethodTable, partition,
+ wakeupNeeded);
- LWLockRelease(masterLock);
+ LWLockRelease(partitionLock);
}
/*
diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c
index a215a652855..e1edabde905 100644
--- a/src/backend/storage/lmgr/lwlock.c
+++ b/src/backend/storage/lmgr/lwlock.c
@@ -8,14 +8,14 @@
* exclusive and shared lock modes (to support read/write and read-only
* access to a shared object). There are few other frammishes. User-level
* locking should be done with the full lock manager --- which depends on
- * an LWLock to protect its shared state.
+ * LWLocks to protect its shared state.
*
*
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/storage/lmgr/lwlock.c,v 1.35 2005/12/06 23:08:33 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/storage/lmgr/lwlock.c,v 1.36 2005/12/11 21:02:18 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -125,7 +125,10 @@ NumLWLocks(void)
*/
/* Predefined LWLocks */
- numLocks = (int) NumFixedLWLocks;
+ numLocks = (int) FirstLockMgrLock;
+
+ /* lock.c gets the ones starting at FirstLockMgrLock */
+ numLocks += NUM_LOCK_PARTITIONS;
/* bufmgr.c needs two for each shared buffer */
numLocks += 2 * NBuffers;
@@ -204,10 +207,11 @@ CreateLWLocks(void)
/*
* Initialize the dynamic-allocation counter, which is stored just before
- * the first LWLock.
+ * the first LWLock. The LWLocks used by lock.c are not dynamically
+ * allocated, it just assumes it has them.
*/
LWLockCounter = (int *) ((char *) LWLockArray - 2 * sizeof(int));
- LWLockCounter[0] = (int) NumFixedLWLocks;
+ LWLockCounter[0] = (int) FirstLockMgrLock + NUM_LOCK_PARTITIONS;
LWLockCounter[1] = numLocks;
}
diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c
index 8d8269041e7..34d80bfceea 100644
--- a/src/backend/storage/lmgr/proc.c
+++ b/src/backend/storage/lmgr/proc.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/storage/lmgr/proc.c,v 1.169 2005/12/09 01:22:04 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/storage/lmgr/proc.c,v 1.170 2005/12/11 21:02:18 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -18,9 +18,8 @@
* ProcQueueAlloc() -- create a shm queue for sleeping processes
* ProcQueueInit() -- create a queue without allocing memory
*
- * Locking and waiting for buffers can cause the backend to be
- * put to sleep. Whoever releases the lock, etc. wakes the
- * process up again (and gives it an error code so it knows
+ * Waiting for a lock causes the backend to be put to sleep. Whoever releases
+ * the lock wakes the process up again (and gives it an error code so it knows
* whether it was awoken on an error condition).
*
* Interface (b):
@@ -28,7 +27,7 @@
* ProcReleaseLocks -- frees the locks associated with current transaction
*
* ProcKill -- destroys the shared memory state (and locks)
- * associated with the process.
+ * associated with the process.
*/
#include "postgres.h"
@@ -65,7 +64,8 @@ NON_EXEC_STATIC slock_t *ProcStructLock = NULL;
static PROC_HDR *ProcGlobal = NULL;
static PGPROC *DummyProcs = NULL;
-static bool waitingForLock = false;
+/* If we are waiting for a lock, this points to the associated LOCALLOCK */
+static LOCALLOCK *lockAwaited = NULL;
/* Mark these volatile because they can be changed by signal handler */
static volatile bool statement_timeout_active = false;
@@ -200,10 +200,10 @@ InitProcGlobal(void)
void
InitProcess(void)
{
- SHMEM_OFFSET myOffset;
-
/* use volatile pointer to prevent code rearrangement */
volatile PROC_HDR *procglobal = ProcGlobal;
+ SHMEM_OFFSET myOffset;
+ int i;
/*
* ProcGlobal should be set by a previous call to InitProcGlobal (if we
@@ -264,7 +264,8 @@ InitProcess(void)
MyProc->lwWaitLink = NULL;
MyProc->waitLock = NULL;
MyProc->waitProcLock = NULL;
- SHMQueueInit(&(MyProc->procLocks));
+ for (i = 0; i < NUM_LOCK_PARTITIONS; i++)
+ SHMQueueInit(&(MyProc->myProcLocks[i]));
/*
* Add our PGPROC to the PGPROC array in shared memory.
@@ -304,6 +305,7 @@ void
InitDummyProcess(int proctype)
{
PGPROC *dummyproc;
+ int i;
/*
* ProcGlobal should be set by a previous call to InitProcGlobal (we
@@ -360,7 +362,8 @@ InitDummyProcess(int proctype)
MyProc->lwWaitLink = NULL;
MyProc->waitLock = NULL;
MyProc->waitProcLock = NULL;
- SHMQueueInit(&(MyProc->procLocks));
+ for (i = 0; i < NUM_LOCK_PARTITIONS; i++)
+ SHMQueueInit(&(MyProc->myProcLocks[i]));
/*
* Arrange to clean up at process exit.
@@ -416,21 +419,24 @@ HaveNFreeProcs(int n)
bool
LockWaitCancel(void)
{
+ LWLockId partitionLock;
+
/* Nothing to do if we weren't waiting for a lock */
- if (!waitingForLock)
+ if (lockAwaited == NULL)
return false;
/* Turn off the deadlock timer, if it's still running (see ProcSleep) */
disable_sig_alarm(false);
/* Unlink myself from the wait queue, if on it (might not be anymore!) */
- LWLockAcquire(LockMgrLock, LW_EXCLUSIVE);
+ partitionLock = FirstLockMgrLock + lockAwaited->partition;
+ LWLockAcquire(partitionLock, LW_EXCLUSIVE);
if (MyProc->links.next != INVALID_OFFSET)
{
/* We could not have been granted the lock yet */
Assert(MyProc->waitStatus == STATUS_ERROR);
- RemoveFromWaitQueue(MyProc);
+ RemoveFromWaitQueue(MyProc, lockAwaited->partition);
}
else
{
@@ -444,9 +450,9 @@ LockWaitCancel(void)
GrantAwaitedLock();
}
- waitingForLock = false;
+ lockAwaited = NULL;
- LWLockRelease(LockMgrLock);
+ LWLockRelease(partitionLock);
/*
* Reset the proc wait semaphore to zero. This is necessary in the
@@ -606,18 +612,18 @@ ProcQueueInit(PROC_QUEUE *queue)
/*
- * ProcSleep -- put a process to sleep
+ * ProcSleep -- put a process to sleep on the specified lock
*
* Caller must have set MyProc->heldLocks to reflect locks already held
* on the lockable object by this process (under all XIDs).
*
- * Locktable's masterLock must be held at entry, and will be held
+ * The lock table's partition lock must be held at entry, and will be held
* at exit.
*
* Result: STATUS_OK if we acquired the lock, STATUS_ERROR if not (deadlock).
*
* ASSUME: that no one will fiddle with the queue until after
- * we release the masterLock.
+ * we release the partition lock.
*
* NOTES: The process queue is now a priority queue for locking.
*
@@ -625,12 +631,13 @@ ProcQueueInit(PROC_QUEUE *queue)
* semaphore is normally zero, so when we try to acquire it, we sleep.
*/
int
-ProcSleep(LockMethod lockMethodTable,
- LOCKMODE lockmode,
- LOCK *lock,
- PROCLOCK *proclock)
+ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
{
- LWLockId masterLock = LockMgrLock;
+ LOCKMODE lockmode = locallock->tag.mode;
+ LOCK *lock = locallock->lock;
+ PROCLOCK *proclock = locallock->proclock;
+ int partition = locallock->partition;
+ LWLockId partitionLock = FirstLockMgrLock + partition;
PROC_QUEUE *waitQueue = &(lock->waitProcs);
LOCKMASK myHeldLocks = MyProc->heldLocks;
bool early_deadlock = false;
@@ -732,22 +739,22 @@ ProcSleep(LockMethod lockMethodTable,
*/
if (early_deadlock)
{
- RemoveFromWaitQueue(MyProc);
+ RemoveFromWaitQueue(MyProc, partition);
return STATUS_ERROR;
}
/* mark that we are waiting for a lock */
- waitingForLock = true;
+ lockAwaited = locallock;
/*
- * Release the locktable's masterLock.
+ * Release the lock table's partition lock.
*
* NOTE: this may also cause us to exit critical-section state, possibly
* allowing a cancel/die interrupt to be accepted. This is OK because we
* have recorded the fact that we are waiting for a lock, and so
* LockWaitCancel will clean up if cancel/die happens.
*/
- LWLockRelease(masterLock);
+ LWLockRelease(partitionLock);
/*
* Set timer so we can wake up after awhile and check for a deadlock. If a
@@ -785,16 +792,16 @@ ProcSleep(LockMethod lockMethodTable,
elog(FATAL, "could not disable timer for process wakeup");
/*
- * Re-acquire the locktable's masterLock. We have to do this to hold off
- * cancel/die interrupts before we can mess with waitingForLock (else we
- * might have a missed or duplicated locallock update).
+ * Re-acquire the lock table's partition lock. We have to do this to
+ * hold off cancel/die interrupts before we can mess with lockAwaited
+ * (else we might have a missed or duplicated locallock update).
*/
- LWLockAcquire(masterLock, LW_EXCLUSIVE);
+ LWLockAcquire(partitionLock, LW_EXCLUSIVE);
/*
* We no longer want LockWaitCancel to do anything.
*/
- waitingForLock = false;
+ lockAwaited = NULL;
/*
* If we got the lock, be sure to remember it in the locallock table.
@@ -816,6 +823,8 @@ ProcSleep(LockMethod lockMethodTable,
* Also remove the process from the wait queue and set its links invalid.
* RETURN: the next process in the wait queue.
*
+ * The appropriate lock partition lock must be held by caller.
+ *
* XXX: presently, this code is only used for the "success" case, and only
* works correctly for that case. To clean up in failure case, would need
* to twiddle the lock's request counts too --- see RemoveFromWaitQueue.
@@ -825,8 +834,6 @@ ProcWakeup(PGPROC *proc, int waitStatus)
{
PGPROC *retProc;
- /* assume that masterLock has been acquired */
-
/* Proc should be sleeping ... */
if (proc->links.prev == INVALID_OFFSET ||
proc->links.next == INVALID_OFFSET)
@@ -854,6 +861,8 @@ ProcWakeup(PGPROC *proc, int waitStatus)
* ProcLockWakeup -- routine for waking up processes when a lock is
* released (or a prior waiter is aborted). Scan all waiters
* for lock, waken any that are no longer blocked.
+ *
+ * The appropriate lock partition lock must be held by caller.
*/
void
ProcLockWakeup(LockMethod lockMethodTable, LOCK *lock)
@@ -908,25 +917,32 @@ ProcLockWakeup(LockMethod lockMethodTable, LOCK *lock)
Assert(waitQueue->size >= 0);
}
-/* --------------------
+/*
+ * CheckDeadLock
+ *
* We only get to this routine if we got SIGALRM after DeadlockTimeout
* while waiting for a lock to be released by some other process. Look
* to see if there's a deadlock; if not, just return and continue waiting.
* If we have a real deadlock, remove ourselves from the lock's wait queue
* and signal an error to ProcSleep.
- * --------------------
*/
static void
CheckDeadLock(void)
{
+ int i;
+
/*
- * Acquire locktable lock. Note that the deadlock check interrupt had
- * better not be enabled anywhere that this process itself holds the
- * locktable lock, else this will wait forever. Also note that
- * LWLockAcquire creates a critical section, so that this routine cannot
- * be interrupted by cancel/die interrupts.
+ * Acquire exclusive lock on the entire shared lock data structures.
+ * Must grab LWLocks in partition-number order to avoid LWLock deadlock.
+ *
+ * Note that the deadlock check interrupt had better not be enabled
+ * anywhere that this process itself holds lock partition locks, else this
+ * will wait forever. Also note that LWLockAcquire creates a critical
+ * section, so that this routine cannot be interrupted by cancel/die
+ * interrupts.
*/
- LWLockAcquire(LockMgrLock, LW_EXCLUSIVE);
+ for (i = 0; i < NUM_LOCK_PARTITIONS; i++)
+ LWLockAcquire(FirstLockMgrLock + i, LW_EXCLUSIVE);
/*
* Check to see if we've been awoken by anyone in the interim.
@@ -937,14 +953,11 @@ CheckDeadLock(void)
*
* We check by looking to see if we've been unlinked from the wait queue.
* This is quicker than checking our semaphore's state, since no kernel
- * call is needed, and it is safe because we hold the locktable lock.
+ * call is needed, and it is safe because we hold the lock partition lock.
*/
if (MyProc->links.prev == INVALID_OFFSET ||
MyProc->links.next == INVALID_OFFSET)
- {
- LWLockRelease(LockMgrLock);
- return;
- }
+ goto check_done;
#ifdef LOCK_DEBUG
if (Debug_deadlocks)
@@ -954,16 +967,19 @@ CheckDeadLock(void)
if (!DeadLockCheck(MyProc))
{
/* No deadlock, so keep waiting */
- LWLockRelease(LockMgrLock);
- return;
+ goto check_done;
}
/*
* Oops. We have a deadlock.
*
- * Get this process out of wait state.
+ * Get this process out of wait state. (Note: we could do this more
+ * efficiently by relying on lockAwaited, but use this coding to preserve
+ * the flexibility to kill some other transaction than the one detecting
+ * the deadlock.)
*/
- RemoveFromWaitQueue(MyProc);
+ Assert(MyProc->waitLock != NULL);
+ RemoveFromWaitQueue(MyProc, LockTagToPartition(&(MyProc->waitLock->tag)));
/*
* Set MyProc->waitStatus to STATUS_ERROR so that ProcSleep will report an
@@ -987,7 +1003,15 @@ CheckDeadLock(void)
* them anymore. However, RemoveFromWaitQueue took care of waking up any
* such processes.
*/
- LWLockRelease(LockMgrLock);
+
+ /*
+ * Release locks acquired at head of routine. Order is not critical,
+ * so do it back-to-front to avoid waking another CheckDeadLock instance
+ * before it can get all the locks.
+ */
+check_done:
+ for (i = NUM_LOCK_PARTITIONS; --i >= 0; )
+ LWLockRelease(FirstLockMgrLock + i);
}
diff --git a/src/include/storage/lock.h b/src/include/storage/lock.h
index e289632054c..9af03fb4742 100644
--- a/src/include/storage/lock.h
+++ b/src/include/storage/lock.h
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/storage/lock.h,v 1.92 2005/12/09 01:22:04 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/storage/lock.h,v 1.93 2005/12/11 21:02:18 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -19,6 +19,13 @@
#include "storage/shmem.h"
+/*
+ * Number of partitions the shared lock tables are divided into.
+ *
+ * See LockTagToPartition() if you change this.
+ */
+#define NUM_LOCK_PARTITIONS 16
+
/* originally in procq.h */
typedef struct PROC_QUEUE
{
@@ -348,6 +355,7 @@ typedef struct LOCALLOCK
LOCK *lock; /* associated LOCK object in shared mem */
PROCLOCK *proclock; /* associated PROCLOCK object in shmem */
bool isTempObject; /* true if lock is on a temporary object */
+ int partition; /* ID of partition containing this lock */
int nLocks; /* total number of times lock is held */
int numLockOwners; /* # of relevant ResourceOwners */
int maxLockOwners; /* allocated size of array */
@@ -389,6 +397,7 @@ typedef enum
*/
extern void InitLocks(void);
extern LockMethod GetLocksMethodTable(const LOCK *lock);
+extern int LockTagToPartition(const LOCKTAG *locktag);
extern LockAcquireResult LockAcquire(const LOCKTAG *locktag,
bool isTempObject,
LOCKMODE lockmode,
@@ -406,7 +415,7 @@ extern int LockCheckConflicts(LockMethod lockMethodTable,
LOCK *lock, PROCLOCK *proclock, PGPROC *proc);
extern void GrantLock(LOCK *lock, PROCLOCK *proclock, LOCKMODE lockmode);
extern void GrantAwaitedLock(void);
-extern void RemoveFromWaitQueue(PGPROC *proc);
+extern void RemoveFromWaitQueue(PGPROC *proc, int partition);
extern Size LockShmemSize(void);
extern bool DeadLockCheck(PGPROC *proc);
extern void DeadLockReport(void);
diff --git a/src/include/storage/lwlock.h b/src/include/storage/lwlock.h
index 4291e0b2e74..c318e60b577 100644
--- a/src/include/storage/lwlock.h
+++ b/src/include/storage/lwlock.h
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/storage/lwlock.h,v 1.23 2005/10/15 02:49:46 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/storage/lwlock.h,v 1.24 2005/12/11 21:02:18 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -16,9 +16,9 @@
/*
* We have a number of predefined LWLocks, plus a bunch of LWLocks that are
- * dynamically assigned (for shared buffers). The LWLock structures live
- * in shared memory (since they contain shared data) and are identified by
- * values of this enumerated type. We abuse the notion of an enum somewhat
+ * dynamically assigned (e.g., for shared buffers). The LWLock structures
+ * live in shared memory (since they contain shared data) and are identified
+ * by values of this enumerated type. We abuse the notion of an enum somewhat
* by allowing values not listed in the enum declaration to be assigned.
* The extra value MaxDynamicLWLock is there to keep the compiler from
* deciding that the enum can be represented as char or short ...
@@ -27,7 +27,6 @@ typedef enum LWLockId
{
BufMappingLock,
BufFreelistLock,
- LockMgrLock,
OidGenLock,
XidGenLock,
ProcArrayLock,
@@ -46,8 +45,7 @@ typedef enum LWLockId
RelCacheInitLock,
BgWriterCommLock,
TwoPhaseStateLock,
-
- NumFixedLWLocks, /* must be last except for MaxDynamicLWLock */
+ FirstLockMgrLock, /* must be last except for MaxDynamicLWLock */
MaxDynamicLWLock = 1000000000
} LWLockId;
diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h
index 4cba391048e..2cfee41eff9 100644
--- a/src/include/storage/proc.h
+++ b/src/include/storage/proc.h
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/storage/proc.h,v 1.84 2005/10/15 02:49:46 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/storage/proc.h,v 1.85 2005/12/11 21:02:18 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -52,7 +52,8 @@ struct XidCache
* so that the prepared transactions appear to be still running and are
* correctly shown as holding locks. A prepared transaction PGPROC can be
* distinguished from a real one at need by the fact that it has pid == 0.
- * The semaphore and lock-related fields in a prepared-xact PGPROC are unused.
+ * The semaphore and lock-activity fields in a prepared-xact PGPROC are unused,
+ * but its myProcLocks[] lists are valid.
*/
struct PGPROC
{
@@ -86,8 +87,12 @@ struct PGPROC
LOCKMASK heldLocks; /* bitmask for lock types already held on this
* lock object by this backend */
- SHM_QUEUE procLocks; /* list of PROCLOCK objects for locks held or
- * awaited by this backend */
+ /*
+ * All PROCLOCK objects for locks held or awaited by this backend are
+ * linked into one of these lists, according to the partition number of
+ * their lock.
+ */
+ SHM_QUEUE myProcLocks[NUM_LOCK_PARTITIONS];
struct XidCache subxids; /* cache for subtransaction XIDs */
};
@@ -99,7 +104,7 @@ extern DLLIMPORT PGPROC *MyProc;
/*
- * There is one ProcGlobal struct for the whole installation.
+ * There is one ProcGlobal struct for the whole database cluster.
*/
typedef struct PROC_HDR
{
@@ -134,8 +139,7 @@ extern bool HaveNFreeProcs(int n);
extern void ProcReleaseLocks(bool isCommit);
extern void ProcQueueInit(PROC_QUEUE *queue);
-extern int ProcSleep(LockMethod lockMethodTable, LOCKMODE lockmode,
- LOCK *lock, PROCLOCK *proclock);
+extern int ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable);
extern PGPROC *ProcWakeup(PGPROC *proc, int waitStatus);
extern void ProcLockWakeup(LockMethod lockMethodTable, LOCK *lock);
extern bool LockWaitCancel(void);