diff options
author | David Rowley <drowley@postgresql.org> | 2024-03-25 14:31:14 +1300 |
---|---|---|
committer | David Rowley <drowley@postgresql.org> | 2024-03-25 14:31:14 +1300 |
commit | 66c0185a3d14bbbf51d0fc9d267093ffec735231 (patch) | |
tree | ed16cb0999652ad23efef6b5e025554f4136020c /src/backend/optimizer/plan/planner.c | |
parent | 47f99a407de224df6f9c43697d0a9c0a5598b250 (diff) |
Allow planner to use Merge Append to efficiently implement UNION
Until now, UNION queries have often been suboptimal as the planner has
only ever considered using an Append node and making the results unique
by either using a Hash Aggregate, or by Sorting the entire Append result
and running it through the Unique operator. Both of these methods
always require reading all rows from the union subqueries.
Here we adjust the union planner so that it can request that each subquery
produce results in target list order so that these can be Merge Appended
together and made unique with a Unique node. This can improve performance
significantly as the union child can make use of the likes of btree
indexes and/or Merge Joins to provide the top-level UNION with presorted
input. This is especially good if the top-level UNION contains a LIMIT
node that limits the output rows to a small subset of the unioned rows as
cheap startup plans can be used.
Author: David Rowley
Reviewed-by: Richard Guo, Andy Fan
Discussion: https://postgr.es/m/CAApHDvpb_63XQodmxKUF8vb9M7CxyUyT4sWvEgqeQU-GB7QFoQ@mail.gmail.com
Diffstat (limited to 'src/backend/optimizer/plan/planner.c')
-rw-r--r-- | src/backend/optimizer/plan/planner.c | 85 |
1 files changed, 84 insertions, 1 deletions
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 5564826cb4a..6d08cc8cdd5 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -54,6 +54,7 @@ #include "optimizer/tlist.h" #include "parser/analyze.h" #include "parser/parse_agg.h" +#include "parser/parse_clause.h" #include "parser/parse_relation.h" #include "parser/parsetree.h" #include "partitioning/partdesc.h" @@ -119,6 +120,8 @@ typedef struct { List *activeWindows; /* active windows, if any */ grouping_sets_data *gset_data; /* grouping sets data, if any */ + SetOperationStmt *setop; /* parent set operation or NULL if not a + * subquery belonging to a set operation */ } standard_qp_extra; /* Local functions */ @@ -249,6 +252,8 @@ static bool group_by_has_partkey(RelOptInfo *input_rel, List *targetList, List *groupClause); static int common_prefix_cmp(const void *a, const void *b); +static List *generate_setop_child_grouplist(SetOperationStmt *op, + List *targetlist); /***************************************************************************** @@ -1501,6 +1506,18 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) qp_extra.gset_data = gset_data; /* + * Check if we're a subquery for a set operation. If we are, store + * the SetOperationStmt in qp_extra. + */ + if (root->parent_root != NULL && + root->parent_root->parse->setOperations != NULL && + IsA(root->parent_root->parse->setOperations, SetOperationStmt)) + qp_extra.setop = + (SetOperationStmt *) root->parent_root->parse->setOperations; + else + qp_extra.setop = NULL; + + /* * Generate the best unsorted and presorted paths for the scan/join * portion of this Query, ie the processing represented by the * FROM/WHERE clauses. (Note there may not be any presorted paths.) @@ -3433,6 +3450,27 @@ standard_qp_callback(PlannerInfo *root, void *extra) parse->sortClause, tlist); + /* setting setop_pathkeys might be useful to the union planner */ + if (qp_extra->setop != NULL && + set_operation_ordered_results_useful(qp_extra->setop)) + { + List *groupClauses; + bool sortable; + + groupClauses = generate_setop_child_grouplist(qp_extra->setop, tlist); + + root->setop_pathkeys = + make_pathkeys_for_sortclauses_extended(root, + &groupClauses, + tlist, + false, + &sortable); + if (!sortable) + root->setop_pathkeys = NIL; + } + else + root->setop_pathkeys = NIL; + /* * Figure out whether we want a sorted result from query_planner. * @@ -3442,7 +3480,9 @@ standard_qp_callback(PlannerInfo *root, void *extra) * sortable DISTINCT clause that's more rigorous than the ORDER BY clause, * we try to produce output that's sufficiently well sorted for the * DISTINCT. Otherwise, if there is an ORDER BY clause, we want to sort - * by the ORDER BY clause. + * by the ORDER BY clause. Otherwise, if we're a subquery being planned + * for a set operation which can benefit from presorted results and have a + * sortable targetlist, we want to sort by the target list. * * Note: if we have both ORDER BY and GROUP BY, and ORDER BY is a superset * of GROUP BY, it would be tempting to request sort by ORDER BY --- but @@ -3460,6 +3500,8 @@ standard_qp_callback(PlannerInfo *root, void *extra) root->query_pathkeys = root->distinct_pathkeys; else if (root->sort_pathkeys) root->query_pathkeys = root->sort_pathkeys; + else if (root->setop_pathkeys != NIL) + root->query_pathkeys = root->setop_pathkeys; else root->query_pathkeys = NIL; } @@ -7866,3 +7908,44 @@ group_by_has_partkey(RelOptInfo *input_rel, return true; } + +/* + * generate_setop_child_grouplist + * Build a SortGroupClause list defining the sort/grouping properties + * of the child of a set operation. + * + * This is similar to generate_setop_grouplist() but differs as the setop + * child query's targetlist entries may already have a tleSortGroupRef + * assigned for other purposes, such as GROUP BYs. Here we keep the + * SortGroupClause list in the same order as 'op' groupClauses and just adjust + * the tleSortGroupRef to reference the TargetEntry's 'ressortgroupref'. + */ +static List * +generate_setop_child_grouplist(SetOperationStmt *op, List *targetlist) +{ + List *grouplist = copyObject(op->groupClauses); + ListCell *lg; + ListCell *lt; + + lg = list_head(grouplist); + foreach(lt, targetlist) + { + TargetEntry *tle = (TargetEntry *) lfirst(lt); + SortGroupClause *sgc; + + /* resjunk columns could have sortgrouprefs. Leave these alone */ + if (tle->resjunk) + continue; + + /* we expect every non-resjunk target to have a SortGroupClause */ + Assert(lg != NULL); + sgc = (SortGroupClause *) lfirst(lg); + lg = lnext(grouplist, lg); + + /* assign a tleSortGroupRef, or reuse the existing one */ + sgc->tleSortGroupRef = assignSortGroupRef(tle, targetlist); + tle->ressortgroupref = sgc->tleSortGroupRef; + } + Assert(lg == NULL); + return grouplist; +} |