From 624e440a474420fa0d6cf26c19bfb256547ab71d Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Tue, 9 Jan 2018 13:07:52 -0500 Subject: [PATCH] Improve the heuristic for ordering child paths of a parallel append. Commit ab7271677 introduced code that attempts to order the child scans of a Parallel Append node in a way that will minimize execution time, based on total cost and startup cost. However, it failed to think hard about what to do when estimated costs are exactly equal; a case that's particularly likely to occur when comparing on startup cost. In such a case the ordering of the child paths would be left to the whims of qsort, an algorithm that isn't even stable. We can improve matters by applying the rule used elsewhere in the planner: if total costs are equal, sort on startup cost, and vice versa. When both cost estimates are exactly equal, rather than letting qsort do something unpredictable, sort based on the child paths' relids, which should typically result in sorting in inheritance order. (The latter provision requires inventing a qsort-style comparator for bitmapsets, but maybe we'll have use for that for other reasons in future.) This results in a few plan changes in the select_parallel test, but those all look more reasonable than before, when the actual underlying cost numbers are taken into account. Discussion: https://postgr.es/m/4944.1515446989@sss.pgh.pa.us --- src/backend/nodes/bitmapset.c | 46 ++++++++++++++++++- src/backend/optimizer/util/pathnode.c | 34 ++++++++------ src/include/nodes/bitmapset.h | 1 + src/test/regress/expected/select_parallel.out | 14 +++--- 4 files changed, 73 insertions(+), 22 deletions(-) diff --git a/src/backend/nodes/bitmapset.c b/src/backend/nodes/bitmapset.c index 733fe3cf2a..edcd19a4fd 100644 --- a/src/backend/nodes/bitmapset.c +++ b/src/backend/nodes/bitmapset.c @@ -172,6 +172,50 @@ bms_equal(const Bitmapset *a, const Bitmapset *b) return true; } +/* + * bms_compare - qsort-style comparator for bitmapsets + * + * This guarantees to report values as equal iff bms_equal would say they are + * equal. Otherwise, the highest-numbered bit that is set in one value but + * not the other determines the result. (This rule means that, for example, + * {6} is greater than {5}, which seems plausible.) + */ +int +bms_compare(const Bitmapset *a, const Bitmapset *b) +{ + int shortlen; + int i; + + /* Handle cases where either input is NULL */ + if (a == NULL) + return bms_is_empty(b) ? 0 : -1; + else if (b == NULL) + return bms_is_empty(a) ? 0 : +1; + /* Handle cases where one input is longer than the other */ + shortlen = Min(a->nwords, b->nwords); + for (i = shortlen; i < a->nwords; i++) + { + if (a->words[i] != 0) + return +1; + } + for (i = shortlen; i < b->nwords; i++) + { + if (b->words[i] != 0) + return -1; + } + /* Process words in common */ + i = shortlen; + while (--i >= 0) + { + bitmapword aw = a->words[i]; + bitmapword bw = b->words[i]; + + if (aw != bw) + return (aw > bw) ? +1 : -1; + } + return 0; +} + /* * bms_make_singleton - build a bitmapset containing a single member */ @@ -838,7 +882,7 @@ bms_add_range(Bitmapset *a, int lower, int upper) if (lwordnum == uwordnum) { a->words[lwordnum] |= ~(bitmapword) (((bitmapword) 1 << lbitnum) - 1) - & (~(bitmapword) 0) >> ushiftbits; + & (~(bitmapword) 0) >> ushiftbits; } else { diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index 7df8761710..48b4db72bc 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -1274,38 +1274,44 @@ create_append_path(RelOptInfo *rel, /* * append_total_cost_compare - * list_qsort comparator for sorting append child paths by total_cost + * qsort comparator for sorting append child paths by total_cost descending + * + * For equal total costs, we fall back to comparing startup costs; if those + * are equal too, break ties using bms_compare on the paths' relids. + * (This is to avoid getting unpredictable results from qsort.) */ static int append_total_cost_compare(const void *a, const void *b) { Path *path1 = (Path *) lfirst(*(ListCell **) a); Path *path2 = (Path *) lfirst(*(ListCell **) b); + int cmp; - if (path1->total_cost > path2->total_cost) - return -1; - if (path1->total_cost < path2->total_cost) - return 1; - - return 0; + cmp = compare_path_costs(path1, path2, TOTAL_COST); + if (cmp != 0) + return -cmp; + return bms_compare(path1->parent->relids, path2->parent->relids); } /* * append_startup_cost_compare - * list_qsort comparator for sorting append child paths by startup_cost + * qsort comparator for sorting append child paths by startup_cost descending + * + * For equal startup costs, we fall back to comparing total costs; if those + * are equal too, break ties using bms_compare on the paths' relids. + * (This is to avoid getting unpredictable results from qsort.) */ static int append_startup_cost_compare(const void *a, const void *b) { Path *path1 = (Path *) lfirst(*(ListCell **) a); Path *path2 = (Path *) lfirst(*(ListCell **) b); + int cmp; - if (path1->startup_cost > path2->startup_cost) - return -1; - if (path1->startup_cost < path2->startup_cost) - return 1; - - return 0; + cmp = compare_path_costs(path1, path2, STARTUP_COST); + if (cmp != 0) + return -cmp; + return bms_compare(path1->parent->relids, path2->parent->relids); } /* diff --git a/src/include/nodes/bitmapset.h b/src/include/nodes/bitmapset.h index 15397e9584..67e8920f65 100644 --- a/src/include/nodes/bitmapset.h +++ b/src/include/nodes/bitmapset.h @@ -65,6 +65,7 @@ typedef enum extern Bitmapset *bms_copy(const Bitmapset *a); extern bool bms_equal(const Bitmapset *a, const Bitmapset *b); +extern int bms_compare(const Bitmapset *a, const Bitmapset *b); extern Bitmapset *bms_make_singleton(int x); extern void bms_free(Bitmapset *a); diff --git a/src/test/regress/expected/select_parallel.out b/src/test/regress/expected/select_parallel.out index 7824ca52ca..452494fbfa 100644 --- a/src/test/regress/expected/select_parallel.out +++ b/src/test/regress/expected/select_parallel.out @@ -21,12 +21,12 @@ explain (costs off) Workers Planned: 3 -> Partial Aggregate -> Parallel Append - -> Parallel Seq Scan on a_star + -> Parallel Seq Scan on d_star + -> Parallel Seq Scan on f_star + -> Parallel Seq Scan on e_star -> Parallel Seq Scan on b_star -> Parallel Seq Scan on c_star - -> Parallel Seq Scan on d_star - -> Parallel Seq Scan on e_star - -> Parallel Seq Scan on f_star + -> Parallel Seq Scan on a_star (11 rows) select round(avg(aa)), sum(aa) from a_star a1; @@ -49,10 +49,10 @@ explain (costs off) -> Parallel Append -> Seq Scan on d_star -> Seq Scan on c_star - -> Parallel Seq Scan on a_star - -> Parallel Seq Scan on b_star - -> Parallel Seq Scan on e_star -> Parallel Seq Scan on f_star + -> Parallel Seq Scan on e_star + -> Parallel Seq Scan on b_star + -> Parallel Seq Scan on a_star (11 rows) select round(avg(aa)), sum(aa) from a_star a2;