summaryrefslogtreecommitdiff
path: root/src/backend/optimizer/path
diff options
context:
space:
mode:
authorRichard Guo <rguo@postgresql.org>2024-10-09 17:14:42 +0900
committerRichard Guo <rguo@postgresql.org>2024-10-09 17:14:42 +0900
commit828e94c9d2fd87c06a75354361543119d9937068 (patch)
tree9f03e159ac17d60cd350b5050141f3f7533740fa /src/backend/optimizer/path
parentc4528fdfa903c74cf99837a554bd3c7e115bb366 (diff)
Consider explicit incremental sort for mergejoins
For a mergejoin, if the given outer path or inner path is not already well enough ordered, we need to do an explicit sort. Currently, we only consider explicit full sort and do not account for incremental sort. In this patch, for the outer path of a mergejoin, we choose to use explicit incremental sort if it is enabled and there are presorted keys. For the inner path, though, we cannot use incremental sort because it does not support mark/restore at present. The rationale is based on the assumption that incremental sort is always faster than full sort when there are presorted keys, a premise that has been applied in various parts of the code. In addition, the current cost model tends to favor incremental sort as being cheaper than full sort in the presence of presorted keys, making it reasonable not to consider full sort in such cases. It could be argued that what if a mergejoin with an incremental sort as the outer path is selected as the inner path of another mergejoin. However, this should not be a problem, because mergejoin itself does not support mark/restore either, and we will add a Material node on top of it anyway in this case (see final_cost_mergejoin). There is one ensuing plan change in the regression tests, and we have to modify that test case to ensure that it continues to test what it is intended to. No backpatch as this could result in plan changes. Author: Richard Guo Reviewed-by: David Rowley, Tomas Vondra Discussion: https://postgr.es/m/CAMbWs49x425QrX7h=Ux05WEnt8GS757H-jOP3_xsX5t1FoUsZw@mail.gmail.com
Diffstat (limited to 'src/backend/optimizer/path')
-rw-r--r--src/backend/optimizer/path/costsize.c69
1 files changed, 57 insertions, 12 deletions
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index e1523d15df1..c6e66e46f4a 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -3532,7 +3532,8 @@ final_cost_nestloop(PlannerInfo *root, NestPath *path,
* join quals here, except for obtaining the scan selectivity estimate which
* is really essential (but fortunately, use of caching keeps the cost of
* getting that down to something reasonable).
- * We also assume that cost_sort is cheap enough to use here.
+ * We also assume that cost_sort/cost_incremental_sort is cheap enough to use
+ * here.
*
* 'workspace' is to be filled with startup_cost, total_cost, and perhaps
* other data to be used by final_cost_mergejoin
@@ -3569,7 +3570,8 @@ initial_cost_mergejoin(PlannerInfo *root, JoinCostWorkspace *workspace,
outerendsel,
innerstartsel,
innerendsel;
- Path sort_path; /* dummy for result of cost_sort */
+ Path sort_path; /* dummy for result of
+ * cost_sort/cost_incremental_sort */
/* Protect some assumptions below that rowcounts aren't zero */
if (outer_path_rows <= 0)
@@ -3682,16 +3684,54 @@ initial_cost_mergejoin(PlannerInfo *root, JoinCostWorkspace *workspace,
if (outersortkeys) /* do we need to sort outer? */
{
- cost_sort(&sort_path,
- root,
- outersortkeys,
- outer_path->disabled_nodes,
- outer_path->total_cost,
- outer_path_rows,
- outer_path->pathtarget->width,
- 0.0,
- work_mem,
- -1.0);
+ bool use_incremental_sort = false;
+ int presorted_keys;
+
+ /*
+ * We choose to use incremental sort if it is enabled and there are
+ * presorted keys; otherwise we use full sort.
+ */
+ if (enable_incremental_sort)
+ {
+ bool is_sorted PG_USED_FOR_ASSERTS_ONLY;
+
+ is_sorted = pathkeys_count_contained_in(outersortkeys,
+ outer_path->pathkeys,
+ &presorted_keys);
+ Assert(!is_sorted);
+
+ if (presorted_keys > 0)
+ use_incremental_sort = true;
+ }
+
+ if (!use_incremental_sort)
+ {
+ cost_sort(&sort_path,
+ root,
+ outersortkeys,
+ outer_path->disabled_nodes,
+ outer_path->total_cost,
+ outer_path_rows,
+ outer_path->pathtarget->width,
+ 0.0,
+ work_mem,
+ -1.0);
+ }
+ else
+ {
+ cost_incremental_sort(&sort_path,
+ root,
+ outersortkeys,
+ presorted_keys,
+ outer_path->disabled_nodes,
+ outer_path->startup_cost,
+ outer_path->total_cost,
+ outer_path_rows,
+ outer_path->pathtarget->width,
+ 0.0,
+ work_mem,
+ -1.0);
+ }
disabled_nodes += sort_path.disabled_nodes;
startup_cost += sort_path.startup_cost;
startup_cost += (sort_path.total_cost - sort_path.startup_cost)
@@ -3711,6 +3751,11 @@ initial_cost_mergejoin(PlannerInfo *root, JoinCostWorkspace *workspace,
if (innersortkeys) /* do we need to sort inner? */
{
+ /*
+ * We do not consider incremental sort for inner path, because
+ * incremental sort does not support mark/restore.
+ */
+
cost_sort(&sort_path,
root,
innersortkeys,