summaryrefslogtreecommitdiff
path: root/src/backend/storage/lmgr/deadlock.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/storage/lmgr/deadlock.c')
-rw-r--r--src/backend/storage/lmgr/deadlock.c279
1 files changed, 227 insertions, 52 deletions
diff --git a/src/backend/storage/lmgr/deadlock.c b/src/backend/storage/lmgr/deadlock.c
index a68aaf62072..69f678b5f8d 100644
--- a/src/backend/storage/lmgr/deadlock.c
+++ b/src/backend/storage/lmgr/deadlock.c
@@ -38,6 +38,7 @@ typedef struct
{
PGPROC *waiter; /* the waiting process */
PGPROC *blocker; /* the process it is waiting for */
+ LOCK *lock; /* the lock it is waiting for */
int pred; /* workspace for TopoSort */
int link; /* workspace for TopoSort */
} EDGE;
@@ -72,6 +73,9 @@ static bool FindLockCycle(PGPROC *checkProc,
EDGE *softEdges, int *nSoftEdges);
static bool FindLockCycleRecurse(PGPROC *checkProc, int depth,
EDGE *softEdges, int *nSoftEdges);
+static bool FindLockCycleRecurseMember(PGPROC *checkProc,
+ PGPROC *checkProcLeader,
+ int depth, EDGE *softEdges, int *nSoftEdges);
static bool ExpandConstraints(EDGE *constraints, int nConstraints);
static bool TopoSort(LOCK *lock, EDGE *constraints, int nConstraints,
PGPROC **ordering);
@@ -449,18 +453,15 @@ FindLockCycleRecurse(PGPROC *checkProc,
EDGE *softEdges, /* output argument */
int *nSoftEdges) /* output argument */
{
- PGPROC *proc;
- PGXACT *pgxact;
- LOCK *lock;
- PROCLOCK *proclock;
- SHM_QUEUE *procLocks;
- LockMethod lockMethodTable;
- PROC_QUEUE *waitQueue;
- int queue_size;
- int conflictMask;
int i;
- int numLockModes,
- lm;
+ dlist_iter iter;
+
+ /*
+ * If this process is a lock group member, check the leader instead. (Note
+ * that we might be the leader, in which case this is a no-op.)
+ */
+ if (checkProc->lockGroupLeader != NULL)
+ checkProc = checkProc->lockGroupLeader;
/*
* Have we already seen this proc?
@@ -494,13 +495,57 @@ FindLockCycleRecurse(PGPROC *checkProc,
visitedProcs[nVisitedProcs++] = checkProc;
/*
- * If the proc is not waiting, we have no outgoing waits-for edges.
+ * If the process is waiting, there is an outgoing waits-for edge to each
+ * process that blocks it.
+ */
+ if (checkProc->links.next != NULL && checkProc->waitLock != NULL &&
+ FindLockCycleRecurseMember(checkProc, checkProc, depth, softEdges,
+ nSoftEdges))
+ return true;
+
+ /*
+ * If the process is not waiting, there could still be outgoing waits-for
+ * edges if it is part of a lock group, because other members of the lock
+ * group might be waiting even though this process is not. (Given lock
+ * groups {A1, A2} and {B1, B2}, if A1 waits for B1 and B2 waits for A2,
+ * that is a deadlock even neither of B1 and A2 are waiting for anything.)
*/
- if (checkProc->links.next == NULL)
- return false;
- lock = checkProc->waitLock;
- if (lock == NULL)
- return false;
+ dlist_foreach(iter, &checkProc->lockGroupMembers)
+ {
+ PGPROC *memberProc;
+
+ memberProc = dlist_container(PGPROC, lockGroupLink, iter.cur);
+
+ if (memberProc->links.next != NULL && memberProc->waitLock != NULL &&
+ memberProc != checkProc &&
+ FindLockCycleRecurseMember(memberProc, checkProc, depth, softEdges,
+ nSoftEdges))
+ return true;
+ }
+
+ return false;
+}
+
+static bool
+FindLockCycleRecurseMember(PGPROC *checkProc,
+ PGPROC *checkProcLeader,
+ int depth,
+ EDGE *softEdges, /* output argument */
+ int *nSoftEdges) /* output argument */
+{
+ PGPROC *proc;
+ LOCK *lock = checkProc->waitLock;
+ PGXACT *pgxact;
+ PROCLOCK *proclock;
+ SHM_QUEUE *procLocks;
+ LockMethod lockMethodTable;
+ PROC_QUEUE *waitQueue;
+ int queue_size;
+ int conflictMask;
+ int i;
+ int numLockModes,
+ lm;
+
lockMethodTable = GetLocksMethodTable(lock);
numLockModes = lockMethodTable->numLockModes;
conflictMask = lockMethodTable->conflictTab[checkProc->waitLockMode];
@@ -516,11 +561,14 @@ FindLockCycleRecurse(PGPROC *checkProc,
while (proclock)
{
+ PGPROC *leader;
+
proc = proclock->tag.myProc;
pgxact = &ProcGlobal->allPgXact[proc->pgprocno];
+ leader = proc->lockGroupLeader == NULL ? proc : proc->lockGroupLeader;
- /* A proc never blocks itself */
- if (proc != checkProc)
+ /* A proc never blocks itself or any other lock group member */
+ if (leader != checkProcLeader)
{
for (lm = 1; lm <= numLockModes; lm++)
{
@@ -601,10 +649,20 @@ FindLockCycleRecurse(PGPROC *checkProc,
for (i = 0; i < queue_size; i++)
{
+ PGPROC *leader;
+
proc = procs[i];
+ leader = proc->lockGroupLeader == NULL ? proc :
+ proc->lockGroupLeader;
- /* Done when we reach the target proc */
- if (proc == checkProc)
+ /*
+ * TopoSort will always return an ordering with group members
+ * adjacent to each other in the wait queue (see comments
+ * therein). So, as soon as we reach a process in the same lock
+ * group as checkProc, we know we've found all the conflicts that
+ * precede any member of the lock group lead by checkProcLeader.
+ */
+ if (leader == checkProcLeader)
break;
/* Is there a conflict with this guy's request? */
@@ -625,8 +683,9 @@ FindLockCycleRecurse(PGPROC *checkProc,
* Add this edge to the list of soft edges in the cycle
*/
Assert(*nSoftEdges < MaxBackends);
- softEdges[*nSoftEdges].waiter = checkProc;
- softEdges[*nSoftEdges].blocker = proc;
+ softEdges[*nSoftEdges].waiter = checkProcLeader;
+ softEdges[*nSoftEdges].blocker = leader;
+ softEdges[*nSoftEdges].lock = lock;
(*nSoftEdges)++;
return true;
}
@@ -635,20 +694,52 @@ FindLockCycleRecurse(PGPROC *checkProc,
}
else
{
+ PGPROC *lastGroupMember = NULL;
+
/* Use the true lock wait queue order */
waitQueue = &(lock->waitProcs);
- queue_size = waitQueue->size;
- proc = (PGPROC *) waitQueue->links.next;
+ /*
+ * Find the last member of the lock group that is present in the wait
+ * queue. Anything after this is not a soft lock conflict. If group
+ * locking is not in use, then we know immediately which process we're
+ * looking for, but otherwise we've got to search the wait queue to
+ * find the last process actually present.
+ */
+ if (checkProc->lockGroupLeader == NULL)
+ lastGroupMember = checkProc;
+ else
+ {
+ proc = (PGPROC *) waitQueue->links.next;
+ queue_size = waitQueue->size;
+ while (queue_size-- > 0)
+ {
+ if (proc->lockGroupLeader == checkProcLeader)
+ lastGroupMember = proc;
+ proc = (PGPROC *) proc->links.next;
+ }
+ Assert(lastGroupMember != NULL);
+ }
+ /*
+ * OK, now rescan (or scan) the queue to identify the soft conflicts.
+ */
+ queue_size = waitQueue->size;
+ proc = (PGPROC *) waitQueue->links.next;
while (queue_size-- > 0)
{
+ PGPROC *leader;
+
+ leader = proc->lockGroupLeader == NULL ? proc :
+ proc->lockGroupLeader;
+
/* Done when we reach the target proc */
- if (proc == checkProc)
+ if (proc == lastGroupMember)
break;
/* Is there a conflict with this guy's request? */
- if ((LOCKBIT_ON(proc->waitLockMode) & conflictMask) != 0)
+ if ((LOCKBIT_ON(proc->waitLockMode) & conflictMask) != 0 &&
+ leader != checkProcLeader)
{
/* This proc soft-blocks checkProc */
if (FindLockCycleRecurse(proc, depth + 1,
@@ -665,8 +756,9 @@ FindLockCycleRecurse(PGPROC *checkProc,
* Add this edge to the list of soft edges in the cycle
*/
Assert(*nSoftEdges < MaxBackends);
- softEdges[*nSoftEdges].waiter = checkProc;
- softEdges[*nSoftEdges].blocker = proc;
+ softEdges[*nSoftEdges].waiter = checkProcLeader;
+ softEdges[*nSoftEdges].blocker = leader;
+ softEdges[*nSoftEdges].lock = lock;
(*nSoftEdges)++;
return true;
}
@@ -711,8 +803,7 @@ ExpandConstraints(EDGE *constraints,
*/
for (i = nConstraints; --i >= 0;)
{
- PGPROC *proc = constraints[i].waiter;
- LOCK *lock = proc->waitLock;
+ LOCK *lock = constraints[i].lock;
/* Did we already make a list for this lock? */
for (j = nWaitOrders; --j >= 0;)
@@ -778,7 +869,9 @@ TopoSort(LOCK *lock,
PGPROC *proc;
int i,
j,
+ jj,
k,
+ kk,
last;
/* First, fill topoProcs[] array with the procs in their current order */
@@ -798,41 +891,95 @@ TopoSort(LOCK *lock,
* stores its list link in constraints[i].link (note any constraint will
* be in just one list). The array index for the before-proc of the i'th
* constraint is remembered in constraints[i].pred.
+ *
+ * Note that it's not necessarily the case that every constraint affects
+ * this particular wait queue. Prior to group locking, a process could be
+ * waiting for at most one lock. But a lock group can be waiting for
+ * zero, one, or multiple locks. Since topoProcs[] is an array of the
+ * processes actually waiting, while constraints[] is an array of group
+ * leaders, we've got to scan through topoProcs[] for each constraint,
+ * checking whether both a waiter and a blocker for that group are
+ * present. If so, the constraint is relevant to this wait queue; if not,
+ * it isn't.
*/
MemSet(beforeConstraints, 0, queue_size * sizeof(int));
MemSet(afterConstraints, 0, queue_size * sizeof(int));
for (i = 0; i < nConstraints; i++)
{
+ /*
+ * Find a representative process that is on the lock queue and part of
+ * the waiting lock group. This may or may not be the leader, which
+ * may or may not be waiting at all. If there are any other processes
+ * in the same lock group on the queue, set their number of
+ * beforeConstraints to -1 to indicate that they should be emitted
+ * with their groupmates rather than considered separately.
+ */
proc = constraints[i].waiter;
- /* Ignore constraint if not for this lock */
- if (proc->waitLock != lock)
- continue;
- /* Find the waiter proc in the array */
+ Assert(proc != NULL);
+ jj = -1;
for (j = queue_size; --j >= 0;)
{
- if (topoProcs[j] == proc)
+ PGPROC *waiter = topoProcs[j];
+
+ if (waiter == proc || waiter->lockGroupLeader == proc)
+ {
+ Assert(waiter->waitLock == lock);
+ if (jj == -1)
+ jj = j;
+ else
+ {
+ Assert(beforeConstraints[j] <= 0);
+ beforeConstraints[j] = -1;
+ }
break;
+ }
}
- Assert(j >= 0); /* should have found a match */
- /* Find the blocker proc in the array */
+
+ /* If no matching waiter, constraint is not relevant to this lock. */
+ if (jj < 0)
+ continue;
+
+ /*
+ * Similarly, find a representative process that is on the lock queue
+ * and waiting for the blocking lock group. Again, this could be the
+ * leader but does not need to be.
+ */
proc = constraints[i].blocker;
+ Assert(proc != NULL);
+ kk = -1;
for (k = queue_size; --k >= 0;)
{
- if (topoProcs[k] == proc)
- break;
+ PGPROC *blocker = topoProcs[k];
+
+ if (blocker == proc || blocker->lockGroupLeader == proc)
+ {
+ Assert(blocker->waitLock == lock);
+ if (kk == -1)
+ kk = k;
+ else
+ {
+ Assert(beforeConstraints[k] <= 0);
+ beforeConstraints[k] = -1;
+ }
+ }
}
- Assert(k >= 0); /* should have found a match */
- beforeConstraints[j]++; /* waiter must come before */
+
+ /* If no matching blocker, constraint is not relevant to this lock. */
+ if (kk < 0)
+ continue;
+
+ beforeConstraints[jj]++; /* waiter must come before */
/* add this constraint to list of after-constraints for blocker */
- constraints[i].pred = j;
- constraints[i].link = afterConstraints[k];
- afterConstraints[k] = i + 1;
+ constraints[i].pred = jj;
+ constraints[i].link = afterConstraints[kk];
+ afterConstraints[kk] = i + 1;
}
+
/*--------------------
* Now scan the topoProcs array backwards. At each step, output the
- * last proc that has no remaining before-constraints, and decrease
- * the beforeConstraints count of each of the procs it was constrained
- * against.
+ * last proc that has no remaining before-constraints plus any other
+ * members of the same lock group; then decrease the beforeConstraints
+ * count of each of the procs it was constrained against.
* i = index of ordering[] entry we want to output this time
* j = search index for topoProcs[]
* k = temp for scanning constraint list for proc j
@@ -840,8 +987,11 @@ TopoSort(LOCK *lock,
*--------------------
*/
last = queue_size - 1;
- for (i = queue_size; --i >= 0;)
+ for (i = queue_size - 1; i >= 0;)
{
+ int c;
+ int nmatches = 0;
+
/* Find next candidate to output */
while (topoProcs[last] == NULL)
last--;
@@ -850,12 +1000,37 @@ TopoSort(LOCK *lock,
if (topoProcs[j] != NULL && beforeConstraints[j] == 0)
break;
}
+
/* If no available candidate, topological sort fails */
if (j < 0)
return false;
- /* Output candidate, and mark it done by zeroing topoProcs[] entry */
- ordering[i] = topoProcs[j];
- topoProcs[j] = NULL;
+
+ /*
+ * Output everything in the lock group. There's no point in outputing
+ * an ordering where members of the same lock group are not
+ * consecutive on the wait queue: if some other waiter is between two
+ * requests that belong to the same group, then either it conflicts
+ * with both of them and is certainly not a solution; or it conflicts
+ * with at most one of them and is thus isomorphic to an ordering
+ * where the group members are consecutive.
+ */
+ proc = topoProcs[j];
+ if (proc->lockGroupLeader != NULL)
+ proc = proc->lockGroupLeader;
+ Assert(proc != NULL);
+ for (c = 0; c <= last; ++c)
+ {
+ if (topoProcs[c] == proc || (topoProcs[c] != NULL &&
+ topoProcs[c]->lockGroupLeader == proc))
+ {
+ ordering[i - nmatches] = topoProcs[c];
+ topoProcs[c] = NULL;
+ ++nmatches;
+ }
+ }
+ Assert(nmatches > 0);
+ i -= nmatches;
+
/* Update beforeConstraints counts of its predecessors */
for (k = afterConstraints[j]; k > 0; k = constraints[k - 1].link)
beforeConstraints[constraints[k - 1].pred]--;