summaryrefslogtreecommitdiff
path: root/src/backend/optimizer/plan/planner.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/optimizer/plan/planner.c')
-rw-r--r--src/backend/optimizer/plan/planner.c520
1 files changed, 456 insertions, 64 deletions
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index fc0a2d8de35..5229c845d20 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -106,6 +106,11 @@ static double get_number_of_groups(PlannerInfo *root,
double path_rows,
List *rollup_lists,
List *rollup_groupclauses);
+static void set_grouped_rel_consider_parallel(PlannerInfo *root,
+ RelOptInfo *grouped_rel,
+ PathTarget *target);
+static Size estimate_hashagg_tablesize(Path *path, AggClauseCosts *agg_costs,
+ double dNumGroups);
static RelOptInfo *create_grouping_paths(PlannerInfo *root,
RelOptInfo *input_rel,
PathTarget *target,
@@ -134,6 +139,8 @@ static RelOptInfo *create_ordered_paths(PlannerInfo *root,
double limit_tuples);
static PathTarget *make_group_input_target(PlannerInfo *root,
PathTarget *final_target);
+static PathTarget *make_partialgroup_input_target(PlannerInfo *root,
+ PathTarget *final_target);
static List *postprocess_setop_tlist(List *new_tlist, List *orig_tlist);
static List *select_active_windows(PlannerInfo *root, WindowFuncLists *wflists);
static PathTarget *make_window_input_target(PlannerInfo *root,
@@ -1741,6 +1748,19 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
}
/*
+ * Likewise for any partial paths, although this case is simpler, since
+ * we don't track the cheapest path.
+ */
+ foreach(lc, current_rel->partial_pathlist)
+ {
+ Path *subpath = (Path *) lfirst(lc);
+
+ Assert(subpath->param_info == NULL);
+ lfirst(lc) = apply_projection_to_path(root, current_rel,
+ subpath, scanjoin_target);
+ }
+
+ /*
* Save the various upper-rel PathTargets we just computed into
* root->upper_targets[]. The core code doesn't use this, but it
* provides a convenient place for extensions to get at the info. For
@@ -3134,6 +3154,79 @@ get_number_of_groups(PlannerInfo *root,
}
/*
+ * set_grouped_rel_consider_parallel
+ * Determine if it's safe to generate partial paths for grouping.
+ */
+static void
+set_grouped_rel_consider_parallel(PlannerInfo *root, RelOptInfo *grouped_rel,
+ PathTarget *target)
+{
+ Query *parse = root->parse;
+
+ Assert(grouped_rel->reloptkind == RELOPT_UPPER_REL);
+
+ /*
+ * If there are no aggregates or GROUP BY clause, then no parallel
+ * aggregation is possible. At present, it doesn't matter whether
+ * consider_parallel gets set in this case, because none of the upper rels
+ * on top of this one try to set the flag or examine it, so we just bail
+ * out as quickly as possible. We might need to be more clever here in
+ * the future.
+ */
+ if (!parse->hasAggs && parse->groupClause == NIL)
+ return;
+
+ /*
+ * Similarly, bail out quickly if GROUPING SETS are present; we can't
+ * support those at present.
+ */
+ if (parse->groupingSets)
+ return;
+
+ /*
+ * If parallel-restricted functiosn are present in the target list or the
+ * HAVING clause, we cannot safely go parallel.
+ */
+ if (has_parallel_hazard((Node *) target->exprs, false) ||
+ has_parallel_hazard((Node *) parse->havingQual, false))
+ return;
+
+ /*
+ * All that's left to check now is to make sure all aggregate functions
+ * support partial mode. If there's no aggregates then we can skip checking
+ * that.
+ */
+ if (!parse->hasAggs)
+ grouped_rel->consider_parallel = true;
+ else if (aggregates_allow_partial((Node *) target->exprs) == PAT_ANY &&
+ aggregates_allow_partial(root->parse->havingQual) == PAT_ANY)
+ grouped_rel->consider_parallel = true;
+}
+
+/*
+ * estimate_hashagg_tablesize
+ * estimate the number of bytes that a hash aggregate hashtable will
+ * require based on the agg_costs, path width and dNumGroups.
+ */
+static Size
+estimate_hashagg_tablesize(Path *path, AggClauseCosts *agg_costs,
+ double dNumGroups)
+{
+ Size hashentrysize;
+
+ /* Estimate per-hash-entry space at tuple width... */
+ hashentrysize = MAXALIGN(path->pathtarget->width) +
+ MAXALIGN(SizeofMinimalTupleHeader);
+
+ /* plus space for pass-by-ref transition values... */
+ hashentrysize += agg_costs->transitionSpace;
+ /* plus the per-hash-entry overhead */
+ hashentrysize += hash_agg_entry_size(agg_costs->numAggs);
+
+ return hashentrysize * dNumGroups;
+}
+
+/*
* create_grouping_paths
*
* Build a new upperrel containing Paths for grouping and/or aggregation.
@@ -3149,9 +3242,8 @@ get_number_of_groups(PlannerInfo *root,
*
* We need to consider sorted and hashed aggregation in the same function,
* because otherwise (1) it would be harder to throw an appropriate error
- * message if neither way works, and (2) we should not allow enable_hashagg or
- * hashtable size considerations to dissuade us from using hashing if sorting
- * is not possible.
+ * message if neither way works, and (2) we should not allow hashtable size
+ * considerations to dissuade us from using hashing if sorting is not possible.
*/
static RelOptInfo *
create_grouping_paths(PlannerInfo *root,
@@ -3163,9 +3255,14 @@ create_grouping_paths(PlannerInfo *root,
Query *parse = root->parse;
Path *cheapest_path = input_rel->cheapest_total_path;
RelOptInfo *grouped_rel;
+ PathTarget *partial_grouping_target = NULL;
AggClauseCosts agg_costs;
+ Size hashaggtablesize;
double dNumGroups;
- bool allow_hash;
+ double dNumPartialGroups = 0;
+ bool can_hash;
+ bool can_sort;
+
ListCell *lc;
/* For now, do all work in the (GROUP_AGG, NULL) upperrel */
@@ -3259,12 +3356,155 @@ create_grouping_paths(PlannerInfo *root,
rollup_groupclauses);
/*
- * Consider sort-based implementations of grouping, if possible. (Note
- * that if groupClause is empty, grouping_is_sortable() is trivially true,
- * and all the pathkeys_contained_in() tests will succeed too, so that
- * we'll consider every surviving input path.)
+ * Partial paths in the input rel could allow us to perform aggregation in
+ * parallel. set_grouped_rel_consider_parallel() will determine if it's
+ * going to be safe to do so.
+ */
+ if (input_rel->partial_pathlist != NIL)
+ set_grouped_rel_consider_parallel(root, grouped_rel, target);
+
+ /*
+ * Determine whether it's possible to perform sort-based implementations
+ * of grouping. (Note that if groupClause is empty, grouping_is_sortable()
+ * is trivially true, and all the pathkeys_contained_in() tests will
+ * succeed too, so that we'll consider every surviving input path.)
*/
- if (grouping_is_sortable(parse->groupClause))
+ can_sort = grouping_is_sortable(parse->groupClause);
+
+ /*
+ * Determine whether we should consider hash-based implementations of
+ * grouping.
+ *
+ * Hashed aggregation only applies if we're grouping. We currently can't
+ * hash if there are grouping sets, though.
+ *
+ * Executor doesn't support hashed aggregation with DISTINCT or ORDER BY
+ * aggregates. (Doing so would imply storing *all* the input values in
+ * the hash table, and/or running many sorts in parallel, either of which
+ * seems like a certain loser.) We similarly don't support ordered-set
+ * aggregates in hashed aggregation, but that case is also included in the
+ * numOrderedAggs count.
+ *
+ * Note: grouping_is_hashable() is much more expensive to check than the
+ * other gating conditions, so we want to do it last.
+ */
+ can_hash = (parse->groupClause != NIL &&
+ parse->groupingSets == NIL &&
+ agg_costs.numOrderedAggs == 0 &&
+ grouping_is_hashable(parse->groupClause));
+
+ /*
+ * Before generating paths for grouped_rel, we first generate any possible
+ * partial paths; that way, later code can easily consider both parallel
+ * and non-parallel approaches to grouping. Note that the partial paths
+ * we generate here are also partially aggregated, so simply pushing a
+ * Gather node on top is insufficient to create a final path, as would be
+ * the case for a scan/join rel.
+ */
+ if (grouped_rel->consider_parallel)
+ {
+ Path *cheapest_partial_path = linitial(input_rel->partial_pathlist);
+
+ /*
+ * Build target list for partial aggregate paths. We cannot reuse the
+ * final target as Aggrefs must be set in partial mode, and we must
+ * also include Aggrefs from the HAVING clause in the target as these
+ * may not be present in the final target.
+ */
+ partial_grouping_target = make_partialgroup_input_target(root, target);
+
+ /* Estimate number of partial groups. */
+ dNumPartialGroups = get_number_of_groups(root,
+ clamp_row_est(cheapest_partial_path->rows),
+ NIL,
+ NIL);
+
+ if (can_sort)
+ {
+ /* Checked in set_grouped_rel_consider_parallel() */
+ Assert(parse->hasAggs || parse->groupClause);
+
+ /*
+ * Use any available suitably-sorted path as input, and also
+ * consider sorting the cheapest partial path.
+ */
+ foreach(lc, input_rel->partial_pathlist)
+ {
+ Path *path = (Path *) lfirst(lc);
+ bool is_sorted;
+
+ is_sorted = pathkeys_contained_in(root->group_pathkeys,
+ path->pathkeys);
+ if (path == cheapest_partial_path || is_sorted)
+ {
+ /* Sort the cheapest partial path, if it isn't already */
+ if (!is_sorted)
+ path = (Path *) create_sort_path(root,
+ grouped_rel,
+ path,
+ root->group_pathkeys,
+ -1.0);
+
+ if (parse->hasAggs)
+ add_partial_path(grouped_rel, (Path *)
+ create_agg_path(root,
+ grouped_rel,
+ path,
+ partial_grouping_target,
+ parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+ parse->groupClause,
+ NIL,
+ &agg_costs,
+ dNumPartialGroups,
+ false,
+ false));
+ else
+ add_partial_path(grouped_rel, (Path *)
+ create_group_path(root,
+ grouped_rel,
+ path,
+ partial_grouping_target,
+ parse->groupClause,
+ NIL,
+ dNumPartialGroups));
+ }
+ }
+ }
+
+ if (can_hash)
+ {
+ /* Checked above */
+ Assert(parse->hasAggs || parse->groupClause);
+
+ hashaggtablesize =
+ estimate_hashagg_tablesize(cheapest_partial_path,
+ &agg_costs,
+ dNumPartialGroups);
+
+ /*
+ * Tentatively produce a partial HashAgg Path, depending on if it
+ * looks as if the hash table will fit in work_mem.
+ */
+ if (hashaggtablesize < work_mem * 1024L)
+ {
+ add_partial_path(grouped_rel, (Path *)
+ create_agg_path(root,
+ grouped_rel,
+ cheapest_partial_path,
+ partial_grouping_target,
+ AGG_HASHED,
+ parse->groupClause,
+ NIL,
+ &agg_costs,
+ dNumPartialGroups,
+ false,
+ false));
+ }
+ }
+ }
+
+ /* Build final grouping paths */
+ if (can_sort)
{
/*
* Use any available suitably-sorted path as input, and also consider
@@ -3320,7 +3560,9 @@ create_grouping_paths(PlannerInfo *root,
parse->groupClause,
(List *) parse->havingQual,
&agg_costs,
- dNumGroups));
+ dNumGroups,
+ false,
+ true));
}
else if (parse->groupClause)
{
@@ -3344,69 +3586,131 @@ create_grouping_paths(PlannerInfo *root,
}
}
}
- }
- /*
- * Consider hash-based implementations of grouping, if possible.
- *
- * Hashed aggregation only applies if we're grouping. We currently can't
- * hash if there are grouping sets, though.
- *
- * Executor doesn't support hashed aggregation with DISTINCT or ORDER BY
- * aggregates. (Doing so would imply storing *all* the input values in
- * the hash table, and/or running many sorts in parallel, either of which
- * seems like a certain loser.) We similarly don't support ordered-set
- * aggregates in hashed aggregation, but that case is also included in the
- * numOrderedAggs count.
- *
- * Note: grouping_is_hashable() is much more expensive to check than the
- * other gating conditions, so we want to do it last.
- */
- allow_hash = (parse->groupClause != NIL &&
- parse->groupingSets == NIL &&
- agg_costs.numOrderedAggs == 0);
-
- /* Consider reasons to disable hashing, but only if we can sort instead */
- if (allow_hash && grouped_rel->pathlist != NIL)
- {
- if (!enable_hashagg)
- allow_hash = false;
- else
+ /*
+ * Now generate a complete GroupAgg Path atop of the cheapest partial
+ * path. We need only bother with the cheapest path here, as the output
+ * of Gather is never sorted.
+ */
+ if (grouped_rel->partial_pathlist)
{
+ Path *path = (Path *) linitial(grouped_rel->partial_pathlist);
+ double total_groups = path->rows * path->parallel_degree;
+
+ path = (Path *) create_gather_path(root,
+ grouped_rel,
+ path,
+ partial_grouping_target,
+ NULL,
+ &total_groups);
+
/*
- * Don't hash if it doesn't look like the hashtable will fit into
- * work_mem.
+ * Gather is always unsorted, so we'll need to sort, unless there's
+ * no GROUP BY clause, in which case there will only be a single
+ * group.
*/
- Size hashentrysize;
-
- /* Estimate per-hash-entry space at tuple width... */
- hashentrysize = MAXALIGN(cheapest_path->pathtarget->width) +
- MAXALIGN(SizeofMinimalTupleHeader);
- /* plus space for pass-by-ref transition values... */
- hashentrysize += agg_costs.transitionSpace;
- /* plus the per-hash-entry overhead */
- hashentrysize += hash_agg_entry_size(agg_costs.numAggs);
-
- if (hashentrysize * dNumGroups > work_mem * 1024L)
- allow_hash = false;
+ if (parse->groupClause)
+ path = (Path *) create_sort_path(root,
+ grouped_rel,
+ path,
+ root->group_pathkeys,
+ -1.0);
+
+ if (parse->hasAggs)
+ add_path(grouped_rel, (Path *)
+ create_agg_path(root,
+ grouped_rel,
+ path,
+ target,
+ parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+ parse->groupClause,
+ (List *) parse->havingQual,
+ &agg_costs,
+ dNumGroups,
+ true,
+ true));
+ else
+ add_path(grouped_rel, (Path *)
+ create_group_path(root,
+ grouped_rel,
+ path,
+ target,
+ parse->groupClause,
+ (List *) parse->havingQual,
+ dNumGroups));
}
}
- if (allow_hash && grouping_is_hashable(parse->groupClause))
+ if (can_hash)
{
+ hashaggtablesize = estimate_hashagg_tablesize(cheapest_path,
+ &agg_costs,
+ dNumGroups);
+
/*
- * We just need an Agg over the cheapest-total input path, since input
- * order won't matter.
+ * Provided that the estimated size of the hashtable does not exceed
+ * work_mem, we'll generate a HashAgg Path, although if we were unable
+ * to sort above, then we'd better generate a Path, so that we at least
+ * have one.
*/
- add_path(grouped_rel, (Path *)
- create_agg_path(root, grouped_rel,
- cheapest_path,
- target,
- AGG_HASHED,
- parse->groupClause,
- (List *) parse->havingQual,
- &agg_costs,
- dNumGroups));
+ if (hashaggtablesize < work_mem * 1024L ||
+ grouped_rel->pathlist == NIL)
+ {
+ /*
+ * We just need an Agg over the cheapest-total input path, since input
+ * order won't matter.
+ */
+ add_path(grouped_rel, (Path *)
+ create_agg_path(root, grouped_rel,
+ cheapest_path,
+ target,
+ AGG_HASHED,
+ parse->groupClause,
+ (List *) parse->havingQual,
+ &agg_costs,
+ dNumGroups,
+ false,
+ true));
+ }
+
+ /*
+ * Generate a HashAgg Path atop of the cheapest partial path. Once
+ * again, we'll only do this if it looks as though the hash table won't
+ * exceed work_mem.
+ */
+ if (grouped_rel->partial_pathlist)
+ {
+ Path *path = (Path *) linitial(grouped_rel->partial_pathlist);
+
+ hashaggtablesize = estimate_hashagg_tablesize(path,
+ &agg_costs,
+ dNumGroups);
+
+ if (hashaggtablesize < work_mem * 1024L)
+ {
+ double total_groups = path->rows * path->parallel_degree;
+
+ path = (Path *) create_gather_path(root,
+ grouped_rel,
+ path,
+ partial_grouping_target,
+ NULL,
+ &total_groups);
+
+ add_path(grouped_rel, (Path *)
+ create_agg_path(root,
+ grouped_rel,
+ path,
+ target,
+ AGG_HASHED,
+ parse->groupClause,
+ (List *) parse->havingQual,
+ &agg_costs,
+ dNumGroups,
+ true,
+ true));
+ }
+ }
}
/* Give a helpful error if we failed to find any implementation */
@@ -3735,7 +4039,9 @@ create_distinct_paths(PlannerInfo *root,
parse->distinctClause,
NIL,
NULL,
- numDistinctRows));
+ numDistinctRows,
+ false,
+ true));
}
/* Give a helpful error if we failed to find any implementation */
@@ -3915,6 +4221,92 @@ make_group_input_target(PlannerInfo *root, PathTarget *final_target)
}
/*
+ * make_partialgroup_input_target
+ * Generate appropriate PathTarget for input for Partial Aggregate nodes.
+ *
+ * Similar to make_group_input_target(), only we don't recurse into Aggrefs, as
+ * we need these to remain intact so that they can be found later in Combine
+ * Aggregate nodes during set_combineagg_references(). Vars will be still
+ * pulled out of non-Aggref nodes as these will still be required by the
+ * combine aggregate phase.
+ *
+ * We also convert any Aggrefs which we do find and put them into partial mode,
+ * this adjusts the Aggref's return type so that the partially calculated
+ * aggregate value can make its way up the execution tree up to the Finalize
+ * Aggregate node.
+ */
+static PathTarget *
+make_partialgroup_input_target(PlannerInfo *root, PathTarget *final_target)
+{
+ Query *parse = root->parse;
+ PathTarget *input_target;
+ List *non_group_cols;
+ List *non_group_exprs;
+ int i;
+ ListCell *lc;
+
+ input_target = create_empty_pathtarget();
+ non_group_cols = NIL;
+
+ i = 0;
+ foreach(lc, final_target->exprs)
+ {
+ Expr *expr = (Expr *) lfirst(lc);
+ Index sgref = final_target->sortgrouprefs[i];
+
+ if (sgref && parse->groupClause &&
+ get_sortgroupref_clause_noerr(sgref, parse->groupClause) != NULL)
+ {
+ /*
+ * It's a grouping column, so add it to the input target as-is.
+ */
+ add_column_to_pathtarget(input_target, expr, sgref);
+ }
+ else
+ {
+ /*
+ * Non-grouping column, so just remember the expression for later
+ * call to pull_var_clause.
+ */
+ non_group_cols = lappend(non_group_cols, expr);
+ }
+
+ i++;
+ }
+
+ /*
+ * If there's a HAVING clause, we'll need the Aggrefs it uses, too.
+ */
+ if (parse->havingQual)
+ non_group_cols = lappend(non_group_cols, parse->havingQual);
+
+ /*
+ * Pull out all the Vars mentioned in non-group cols (plus HAVING), and
+ * add them to the input target if not already present. (A Var used
+ * directly as a GROUP BY item will be present already.) Note this
+ * includes Vars used in resjunk items, so we are covering the needs of
+ * ORDER BY and window specifications. Vars used within Aggrefs will be
+ * ignored and the Aggrefs themselves will be added to the PathTarget.
+ */
+ non_group_exprs = pull_var_clause((Node *) non_group_cols,
+ PVC_INCLUDE_AGGREGATES |
+ PVC_RECURSE_WINDOWFUNCS |
+ PVC_INCLUDE_PLACEHOLDERS);
+
+ add_new_columns_to_pathtarget(input_target, non_group_exprs);
+
+ /* clean up cruft */
+ list_free(non_group_exprs);
+ list_free(non_group_cols);
+
+ /* Adjust Aggrefs to put them in partial mode. */
+ apply_partialaggref_adjustment(input_target);
+
+ /* XXX this causes some redundant cost calculation ... */
+ return set_pathtarget_cost_width(root, input_target);
+}
+
+/*
* postprocess_setop_tlist
* Fix up targetlist returned by plan_set_operations().
*