From 3420851a2c2d2ac49b8ba53ccec5d02aa1e6a272 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Mon, 29 Jul 2019 18:49:04 -0400
Subject: [PATCH] Fix busted logic for parallel lock grouping in TopoSort().

A "break" statement erroneously left behind by commit a1c1af2a1
caused TopoSort to do the wrong thing if a lock's wait list
contained multiple members of the same locking group.

Because parallel workers don't normally need any locks not already
taken by their leader, this is very hard --- maybe impossible ---
to hit in production.  Still, if it did happen, the queries involved
in an otherwise-resolvable deadlock would block until canceled.

In addition to removing the bogus "break", add an Assert showing
that the conflicting uses of the beforeConstraints[] array (for both
counts and flags) don't overlap, and add some commentary explaining
why not; because it's not obvious without explanation, IMHO.

Original report and patch from Rui Hai Jiang; additional assert
and commentary by me.  Back-patch to 9.6 where the bug came in.

Discussion: https://postgr.es/m/CAEri+mLd3bpHLyW+a9pSe1y=aEkeuJpwBSwvo-+m4n7-ceRmXw@mail.gmail.com
---
 src/backend/storage/lmgr/deadlock.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/backend/storage/lmgr/deadlock.c b/src/backend/storage/lmgr/deadlock.c
index 990d48377d..c687496b04 100644
--- a/src/backend/storage/lmgr/deadlock.c
+++ b/src/backend/storage/lmgr/deadlock.c
@@ -922,6 +922,12 @@ TopoSort(LOCK *lock,
 		 * in the same lock group on the queue, set their number of
 		 * beforeConstraints to -1 to indicate that they should be emitted
 		 * with their groupmates rather than considered separately.
+		 *
+		 * In this loop and the similar one just below, it's critical that we
+		 * consistently select the same representative member of any one lock
+		 * group, so that all the constraints are associated with the same
+		 * proc, and the -1's are only associated with not-representative
+		 * members.  We select the last one in the topoProcs array.
 		 */
 		proc = constraints[i].waiter;
 		Assert(proc != NULL);
@@ -940,7 +946,6 @@ TopoSort(LOCK *lock,
 					Assert(beforeConstraints[j] <= 0);
 					beforeConstraints[j] = -1;
 				}
-				break;
 			}
 		}
 
@@ -977,6 +982,7 @@ TopoSort(LOCK *lock,
 		if (kk < 0)
 			continue;
 
+		Assert(beforeConstraints[jj] >= 0);
 		beforeConstraints[jj]++;	/* waiter must come before */
 		/* add this constraint to list of after-constraints for blocker */
 		constraints[i].pred = jj;