From 828e94c9d2fd87c06a75354361543119d9937068 Mon Sep 17 00:00:00 2001 From: Richard Guo Date: Wed, 9 Oct 2024 17:14:42 +0900 Subject: Consider explicit incremental sort for mergejoins For a mergejoin, if the given outer path or inner path is not already well enough ordered, we need to do an explicit sort. Currently, we only consider explicit full sort and do not account for incremental sort. In this patch, for the outer path of a mergejoin, we choose to use explicit incremental sort if it is enabled and there are presorted keys. For the inner path, though, we cannot use incremental sort because it does not support mark/restore at present. The rationale is based on the assumption that incremental sort is always faster than full sort when there are presorted keys, a premise that has been applied in various parts of the code. In addition, the current cost model tends to favor incremental sort as being cheaper than full sort in the presence of presorted keys, making it reasonable not to consider full sort in such cases. It could be argued that what if a mergejoin with an incremental sort as the outer path is selected as the inner path of another mergejoin. However, this should not be a problem, because mergejoin itself does not support mark/restore either, and we will add a Material node on top of it anyway in this case (see final_cost_mergejoin). There is one ensuing plan change in the regression tests, and we have to modify that test case to ensure that it continues to test what it is intended to. No backpatch as this could result in plan changes. Author: Richard Guo Reviewed-by: David Rowley, Tomas Vondra Discussion: https://postgr.es/m/CAMbWs49x425QrX7h=Ux05WEnt8GS757H-jOP3_xsX5t1FoUsZw@mail.gmail.com --- src/test/regress/expected/aggregates.out | 34 ++++++++++++-------------- src/test/regress/expected/incremental_sort.out | 21 ++++++++++++++++ src/test/regress/sql/aggregates.sql | 6 ++--- src/test/regress/sql/incremental_sort.sql | 6 +++++ 4 files changed, 46 insertions(+), 21 deletions(-) (limited to 'src/test') diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out index e14e7356567..495deb606e2 100644 --- a/src/test/regress/expected/aggregates.out +++ b/src/test/regress/expected/aggregates.out @@ -2858,29 +2858,27 @@ GROUP BY w, x, z, y; -> Index Scan using btg_x_y_idx on btg (6 rows) --- Utilize the ordering of merge join to avoid a full Sort operation +-- Utilize the ordering of merge join to avoid a Sort operation SET enable_hashjoin = off; SET enable_nestloop = off; EXPLAIN (COSTS OFF) SELECT count(*) - FROM btg t1 JOIN btg t2 ON t1.z = t2.z AND t1.w = t2.w AND t1.x = t2.x - GROUP BY t1.x, t1.y, t1.z, t1.w; - QUERY PLAN -------------------------------------------------------------------------------- + FROM btg t1 JOIN btg t2 ON t1.w = t2.w AND t1.x = t2.x AND t1.z = t2.z + GROUP BY t1.w, t1.z, t1.x; + QUERY PLAN +------------------------------------------------------------------------- GroupAggregate - Group Key: t1.z, t1.w, t1.x, t1.y - -> Incremental Sort - Sort Key: t1.z, t1.w, t1.x, t1.y - Presorted Key: t1.z, t1.w, t1.x - -> Merge Join - Merge Cond: ((t1.z = t2.z) AND (t1.w = t2.w) AND (t1.x = t2.x)) - -> Sort - Sort Key: t1.z, t1.w, t1.x - -> Index Scan using btg_x_y_idx on btg t1 - -> Sort - Sort Key: t2.z, t2.w, t2.x - -> Index Scan using btg_x_y_idx on btg t2 -(13 rows) + Group Key: t1.x, t1.w, t1.z + -> Merge Join + Merge Cond: ((t1.x = t2.x) AND (t1.w = t2.w) AND (t1.z = t2.z)) + -> Incremental Sort + Sort Key: t1.x, t1.w, t1.z + Presorted Key: t1.x + -> Index Scan using btg_x_y_idx on btg t1 + -> Sort + Sort Key: t2.x, t2.w, t2.z + -> Index Scan using btg_x_y_idx on btg t2 +(11 rows) RESET enable_nestloop; RESET enable_hashjoin; diff --git a/src/test/regress/expected/incremental_sort.out b/src/test/regress/expected/incremental_sort.out index 79f0d37a87e..c561b62b2db 100644 --- a/src/test/regress/expected/incremental_sort.out +++ b/src/test/regress/expected/incremental_sort.out @@ -1701,3 +1701,24 @@ explain (costs off) select a, b, a <-> point(5, 5) dist from point_table order b Order By: (a <-> '(5,5)'::point) (6 rows) +-- Ensure we get an incremental sort on the outer side of the mergejoin +explain (costs off) +select * from + (select * from tenk1 order by four) t1 join tenk1 t2 on t1.four = t2.four and t1.two = t2.two +order by t1.four, t1.two limit 1; + QUERY PLAN +----------------------------------------------------------------------- + Limit + -> Merge Join + Merge Cond: ((tenk1.four = t2.four) AND (tenk1.two = t2.two)) + -> Incremental Sort + Sort Key: tenk1.four, tenk1.two + Presorted Key: tenk1.four + -> Sort + Sort Key: tenk1.four + -> Seq Scan on tenk1 + -> Sort + Sort Key: t2.four, t2.two + -> Seq Scan on tenk1 t2 +(12 rows) + diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql index ddf38bafb42..4885daffe63 100644 --- a/src/test/regress/sql/aggregates.sql +++ b/src/test/regress/sql/aggregates.sql @@ -1232,13 +1232,13 @@ EXPLAIN (COSTS OFF) SELECT count(*) FROM (SELECT * FROM btg ORDER BY x, y, w, z) AS q1 GROUP BY w, x, z, y; --- Utilize the ordering of merge join to avoid a full Sort operation +-- Utilize the ordering of merge join to avoid a Sort operation SET enable_hashjoin = off; SET enable_nestloop = off; EXPLAIN (COSTS OFF) SELECT count(*) - FROM btg t1 JOIN btg t2 ON t1.z = t2.z AND t1.w = t2.w AND t1.x = t2.x - GROUP BY t1.x, t1.y, t1.z, t1.w; + FROM btg t1 JOIN btg t2 ON t1.w = t2.w AND t1.x = t2.x AND t1.z = t2.z + GROUP BY t1.w, t1.z, t1.x; RESET enable_nestloop; RESET enable_hashjoin; diff --git a/src/test/regress/sql/incremental_sort.sql b/src/test/regress/sql/incremental_sort.sql index ab471bdfffc..98b20e17e18 100644 --- a/src/test/regress/sql/incremental_sort.sql +++ b/src/test/regress/sql/incremental_sort.sql @@ -292,3 +292,9 @@ create index point_table_a_idx on point_table using gist(a); -- Ensure we get an incremental sort plan for both of the following queries explain (costs off) select a, b, a <-> point(5, 5) dist from point_table order by dist, b limit 1; explain (costs off) select a, b, a <-> point(5, 5) dist from point_table order by dist, b desc limit 1; + +-- Ensure we get an incremental sort on the outer side of the mergejoin +explain (costs off) +select * from + (select * from tenk1 order by four) t1 join tenk1 t2 on t1.four = t2.four and t1.two = t2.two +order by t1.four, t1.two limit 1; -- cgit v1.2.3