diff --git a/src/test/isolation/expected/deadlock-parallel.out b/src/test/isolation/expected/deadlock-parallel.out index 871a80c4e3..cf4d07e615 100644 --- a/src/test/isolation/expected/deadlock-parallel.out +++ b/src/test/isolation/expected/deadlock-parallel.out @@ -1,10 +1,10 @@ Parsed test spec with 4 sessions starting permutation: d1a1 d2a2 e1l e2l d1a2 d2a1 d1c e1c d2c e2c -step d1a1: SELECT lock_share(1,x) FROM bigt LIMIT 1; -lock_share +step d1a1: SELECT lock_share(1,x), lock_excl(3,x) FROM bigt LIMIT 1; +lock_share lock_excl -1 +1 1 step d2a2: select lock_share(2,x) FROM bigt LIMIT 1; lock_share @@ -16,15 +16,19 @@ step d1a2: SET force_parallel_mode = on; SET parallel_tuple_cost = 0; SET min_parallel_table_scan_size = 0; SET parallel_leader_participation = off; - SET max_parallel_workers_per_gather = 4; + SET max_parallel_workers_per_gather = 3; SELECT sum(lock_share(2,x)) FROM bigt; step d2a1: SET force_parallel_mode = on; SET parallel_setup_cost = 0; SET parallel_tuple_cost = 0; SET min_parallel_table_scan_size = 0; SET parallel_leader_participation = off; - SET max_parallel_workers_per_gather = 4; - SELECT sum(lock_share(1,x)) FROM bigt; + SET max_parallel_workers_per_gather = 3; + SELECT sum(lock_share(1,x)) FROM bigt; + SET force_parallel_mode = off; + RESET parallel_setup_cost; + RESET parallel_tuple_cost; + SELECT lock_share(3,x) FROM bigt LIMIT 1; step d1a2: <... completed> sum @@ -38,6 +42,9 @@ step d2a1: <... completed> sum 10000 +lock_share + +1 step e1c: COMMIT; step d2c: COMMIT; step e2l: <... completed> diff --git a/src/test/isolation/specs/deadlock-parallel.spec b/src/test/isolation/specs/deadlock-parallel.spec index aa4a0847e0..7ad290c0bd 100644 --- a/src/test/isolation/specs/deadlock-parallel.spec +++ b/src/test/isolation/specs/deadlock-parallel.spec @@ -15,6 +15,25 @@ # The deadlock detector resolves the deadlock by reversing the d1-e2 edge, # unblocking d1. +# However ... it's not actually that well-defined whether the deadlock +# detector will prefer to unblock d1 or d2. It depends on which backend +# is first to run DeadLockCheck after the deadlock condition is created: +# that backend will search outwards from its own wait condition, and will +# first find a loop involving the *other* lock. We encourage that to be +# one of the d2a1 parallel workers, which will therefore unblock d1a2 +# workers, by setting a shorter deadlock_timeout in session d2. But on +# slow machines, one or more d1a2 workers may not yet have reached their +# lock waits, so that they're not unblocked by the first DeadLockCheck. +# The next DeadLockCheck may choose to unblock the d2a1 workers instead, +# which would allow d2a1 to complete before d1a2, causing the test to +# freeze up because isolationtester isn't expecting that completion order. +# (In effect, we have an undetectable deadlock because d2 is waiting for +# d1's completion, but on the client side.) To fix this, introduce an +# additional lock (advisory lock 3), which is initially taken by d1 and +# then d2a1 will wait for it after completing the main part of the test. +# In this way, the deadlock detector can see that d1 must be completed +# first, regardless of timing. + setup { create function lock_share(int,int) returns int language sql as @@ -39,15 +58,15 @@ setup { BEGIN isolation level repeatable read; SET force_parallel_mode = off; SET deadlock_timeout = '10s'; } -# this lock will be taken in the leader, so it will persist: -step "d1a1" { SELECT lock_share(1,x) FROM bigt LIMIT 1; } +# these locks will be taken in the leader, so they will persist: +step "d1a1" { SELECT lock_share(1,x), lock_excl(3,x) FROM bigt LIMIT 1; } # this causes all the parallel workers to take locks: step "d1a2" { SET force_parallel_mode = on; SET parallel_setup_cost = 0; SET parallel_tuple_cost = 0; SET min_parallel_table_scan_size = 0; SET parallel_leader_participation = off; - SET max_parallel_workers_per_gather = 4; + SET max_parallel_workers_per_gather = 3; SELECT sum(lock_share(2,x)) FROM bigt; } step "d1c" { COMMIT; } @@ -58,14 +77,19 @@ setup { BEGIN isolation level repeatable read; } # this lock will be taken in the leader, so it will persist: step "d2a2" { select lock_share(2,x) FROM bigt LIMIT 1; } -# this causes all the parallel workers to take locks: +# this causes all the parallel workers to take locks; +# after which, make the leader take lock 3 to prevent client-driven deadlock step "d2a1" { SET force_parallel_mode = on; SET parallel_setup_cost = 0; SET parallel_tuple_cost = 0; SET min_parallel_table_scan_size = 0; SET parallel_leader_participation = off; - SET max_parallel_workers_per_gather = 4; - SELECT sum(lock_share(1,x)) FROM bigt; } + SET max_parallel_workers_per_gather = 3; + SELECT sum(lock_share(1,x)) FROM bigt; + SET force_parallel_mode = off; + RESET parallel_setup_cost; + RESET parallel_tuple_cost; + SELECT lock_share(3,x) FROM bigt LIMIT 1; } step "d2c" { COMMIT; } session "e1"