diff options
Diffstat (limited to 'src/backend/optimizer/plan')
-rw-r--r-- | src/backend/optimizer/plan/initsplan.c | 370 | ||||
-rw-r--r-- | src/backend/optimizer/plan/planmain.c | 9 | ||||
-rw-r--r-- | src/backend/optimizer/plan/planner.c | 165 |
3 files changed, 510 insertions, 34 deletions
diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c index 3e3fec89252..b8d1c7e88a3 100644 --- a/src/backend/optimizer/plan/initsplan.c +++ b/src/backend/optimizer/plan/initsplan.c @@ -14,6 +14,7 @@ */ #include "postgres.h" +#include "access/nbtree.h" #include "catalog/pg_constraint.h" #include "catalog/pg_type.h" #include "nodes/makefuncs.h" @@ -31,6 +32,7 @@ #include "optimizer/restrictinfo.h" #include "parser/analyze.h" #include "rewrite/rewriteManip.h" +#include "utils/fmgroids.h" #include "utils/lsyscache.h" #include "utils/rel.h" #include "utils/typcache.h" @@ -81,6 +83,12 @@ typedef struct JoinTreeItem } JoinTreeItem; +static bool is_partial_agg_memory_risky(PlannerInfo *root); +static void create_agg_clause_infos(PlannerInfo *root); +static void create_grouping_expr_infos(PlannerInfo *root); +static EquivalenceClass *get_eclass_for_sortgroupclause(PlannerInfo *root, + SortGroupClause *sgc, + Expr *expr); static void extract_lateral_references(PlannerInfo *root, RelOptInfo *brel, Index rtindex); static List *deconstruct_recurse(PlannerInfo *root, Node *jtnode, @@ -628,6 +636,368 @@ remove_useless_groupby_columns(PlannerInfo *root) } } +/* + * setup_eager_aggregation + * Check if eager aggregation is applicable, and if so collect suitable + * aggregate expressions and grouping expressions in the query. + */ +void +setup_eager_aggregation(PlannerInfo *root) +{ + /* + * Don't apply eager aggregation if disabled by user. + */ + if (!enable_eager_aggregate) + return; + + /* + * Don't apply eager aggregation if there are no available GROUP BY + * clauses. + */ + if (!root->processed_groupClause) + return; + + /* + * For now we don't try to support grouping sets. + */ + if (root->parse->groupingSets) + return; + + /* + * For now we don't try to support DISTINCT or ORDER BY aggregates. + */ + if (root->numOrderedAggs > 0) + return; + + /* + * If there are any aggregates that do not support partial mode, or any + * partial aggregates that are non-serializable, do not apply eager + * aggregation. + */ + if (root->hasNonPartialAggs || root->hasNonSerialAggs) + return; + + /* + * We don't try to apply eager aggregation if there are set-returning + * functions in targetlist. + */ + if (root->parse->hasTargetSRFs) + return; + + /* + * Eager aggregation only makes sense if there are multiple base rels in + * the query. + */ + if (bms_membership(root->all_baserels) != BMS_MULTIPLE) + return; + + /* + * Don't apply eager aggregation if any aggregate poses a risk of + * excessive memory usage during partial aggregation. + */ + if (is_partial_agg_memory_risky(root)) + return; + + /* + * Collect aggregate expressions and plain Vars that appear in the + * targetlist and havingQual. + */ + create_agg_clause_infos(root); + + /* + * If there are no suitable aggregate expressions, we cannot apply eager + * aggregation. + */ + if (root->agg_clause_list == NIL) + return; + + /* + * Collect grouping expressions that appear in grouping clauses. + */ + create_grouping_expr_infos(root); +} + +/* + * is_partial_agg_memory_risky + * Check if any aggregate poses a risk of excessive memory usage during + * partial aggregation. + * + * We check if any aggregate has a negative aggtransspace value, which + * indicates that its transition state data can grow unboundedly in size. + * Applying eager aggregation in such cases risks high memory usage since + * partial aggregation results might be stored in join hash tables or + * materialized nodes. + */ +static bool +is_partial_agg_memory_risky(PlannerInfo *root) +{ + ListCell *lc; + + foreach(lc, root->aggtransinfos) + { + AggTransInfo *transinfo = lfirst_node(AggTransInfo, lc); + + if (transinfo->aggtransspace < 0) + return true; + } + + return false; +} + +/* + * create_agg_clause_infos + * Search the targetlist and havingQual for Aggrefs and plain Vars, and + * create an AggClauseInfo for each Aggref node. + */ +static void +create_agg_clause_infos(PlannerInfo *root) +{ + List *tlist_exprs; + List *agg_clause_list = NIL; + List *tlist_vars = NIL; + Relids aggregate_relids = NULL; + bool eager_agg_applicable = true; + ListCell *lc; + + Assert(root->agg_clause_list == NIL); + Assert(root->tlist_vars == NIL); + + tlist_exprs = pull_var_clause((Node *) root->processed_tlist, + PVC_INCLUDE_AGGREGATES | + PVC_RECURSE_WINDOWFUNCS | + PVC_RECURSE_PLACEHOLDERS); + + /* + * Aggregates within the HAVING clause need to be processed in the same + * way as those in the targetlist. Note that HAVING can contain Aggrefs + * but not WindowFuncs. + */ + if (root->parse->havingQual != NULL) + { + List *having_exprs; + + having_exprs = pull_var_clause((Node *) root->parse->havingQual, + PVC_INCLUDE_AGGREGATES | + PVC_RECURSE_PLACEHOLDERS); + if (having_exprs != NIL) + { + tlist_exprs = list_concat(tlist_exprs, having_exprs); + list_free(having_exprs); + } + } + + foreach(lc, tlist_exprs) + { + Expr *expr = (Expr *) lfirst(lc); + Aggref *aggref; + Relids agg_eval_at; + AggClauseInfo *ac_info; + + /* For now we don't try to support GROUPING() expressions */ + if (IsA(expr, GroupingFunc)) + { + eager_agg_applicable = false; + break; + } + + /* Collect plain Vars for future reference */ + if (IsA(expr, Var)) + { + tlist_vars = list_append_unique(tlist_vars, expr); + continue; + } + + aggref = castNode(Aggref, expr); + + Assert(aggref->aggorder == NIL); + Assert(aggref->aggdistinct == NIL); + + /* + * If there are any securityQuals, do not try to apply eager + * aggregation if any non-leakproof aggregate functions are present. + * This is overly strict, but for now... + */ + if (root->qual_security_level > 0 && + !get_func_leakproof(aggref->aggfnoid)) + { + eager_agg_applicable = false; + break; + } + + agg_eval_at = pull_varnos(root, (Node *) aggref); + + /* + * If all base relations in the query are referenced by aggregate + * functions, then eager aggregation is not applicable. + */ + aggregate_relids = bms_add_members(aggregate_relids, agg_eval_at); + if (bms_is_subset(root->all_baserels, aggregate_relids)) + { + eager_agg_applicable = false; + break; + } + + /* OK, create the AggClauseInfo node */ + ac_info = makeNode(AggClauseInfo); + ac_info->aggref = aggref; + ac_info->agg_eval_at = agg_eval_at; + + /* ... and add it to the list */ + agg_clause_list = list_append_unique(agg_clause_list, ac_info); + } + + list_free(tlist_exprs); + + if (eager_agg_applicable) + { + root->agg_clause_list = agg_clause_list; + root->tlist_vars = tlist_vars; + } + else + { + list_free_deep(agg_clause_list); + list_free(tlist_vars); + } +} + +/* + * create_grouping_expr_infos + * Create a GroupingExprInfo for each expression usable as grouping key. + * + * If any grouping expression is not suitable, we will just return with + * root->group_expr_list being NIL. + */ +static void +create_grouping_expr_infos(PlannerInfo *root) +{ + List *exprs = NIL; + List *sortgrouprefs = NIL; + List *ecs = NIL; + ListCell *lc, + *lc1, + *lc2, + *lc3; + + Assert(root->group_expr_list == NIL); + + foreach(lc, root->processed_groupClause) + { + SortGroupClause *sgc = lfirst_node(SortGroupClause, lc); + TargetEntry *tle = get_sortgroupclause_tle(sgc, root->processed_tlist); + TypeCacheEntry *tce; + Oid equalimageproc; + + Assert(tle->ressortgroupref > 0); + + /* + * For now we only support plain Vars as grouping expressions. + */ + if (!IsA(tle->expr, Var)) + return; + + /* + * Eager aggregation is only possible if equality implies image + * equality for each grouping key. Otherwise, placing keys with + * different byte images into the same group may result in the loss of + * information that could be necessary to evaluate upper qual clauses. + * + * For instance, the NUMERIC data type is not supported, as values + * that are considered equal by the equality operator (e.g., 0 and + * 0.0) can have different scales. + */ + tce = lookup_type_cache(exprType((Node *) tle->expr), + TYPECACHE_BTREE_OPFAMILY); + if (!OidIsValid(tce->btree_opf) || + !OidIsValid(tce->btree_opintype)) + return; + + equalimageproc = get_opfamily_proc(tce->btree_opf, + tce->btree_opintype, + tce->btree_opintype, + BTEQUALIMAGE_PROC); + if (!OidIsValid(equalimageproc) || + !DatumGetBool(OidFunctionCall1Coll(equalimageproc, + tce->typcollation, + ObjectIdGetDatum(tce->btree_opintype)))) + return; + + exprs = lappend(exprs, tle->expr); + sortgrouprefs = lappend_int(sortgrouprefs, tle->ressortgroupref); + ecs = lappend(ecs, get_eclass_for_sortgroupclause(root, sgc, tle->expr)); + } + + /* + * Construct a GroupingExprInfo for each expression. + */ + forthree(lc1, exprs, lc2, sortgrouprefs, lc3, ecs) + { + Expr *expr = (Expr *) lfirst(lc1); + int sortgroupref = lfirst_int(lc2); + EquivalenceClass *ec = (EquivalenceClass *) lfirst(lc3); + GroupingExprInfo *ge_info; + + ge_info = makeNode(GroupingExprInfo); + ge_info->expr = (Expr *) copyObject(expr); + ge_info->sortgroupref = sortgroupref; + ge_info->ec = ec; + + root->group_expr_list = lappend(root->group_expr_list, ge_info); + } +} + +/* + * get_eclass_for_sortgroupclause + * Given a group clause and an expression, find an existing equivalence + * class that the expression is a member of; return NULL if none. + */ +static EquivalenceClass * +get_eclass_for_sortgroupclause(PlannerInfo *root, SortGroupClause *sgc, + Expr *expr) +{ + Oid opfamily, + opcintype, + collation; + CompareType cmptype; + Oid equality_op; + List *opfamilies; + + /* Punt if the group clause is not sortable */ + if (!OidIsValid(sgc->sortop)) + return NULL; + + /* Find the operator in pg_amop --- failure shouldn't happen */ + if (!get_ordering_op_properties(sgc->sortop, + &opfamily, &opcintype, &cmptype)) + elog(ERROR, "operator %u is not a valid ordering operator", + sgc->sortop); + + /* Because SortGroupClause doesn't carry collation, consult the expr */ + collation = exprCollation((Node *) expr); + + /* + * EquivalenceClasses need to contain opfamily lists based on the family + * membership of mergejoinable equality operators, which could belong to + * more than one opfamily. So we have to look up the opfamily's equality + * operator and get its membership. + */ + equality_op = get_opfamily_member_for_cmptype(opfamily, + opcintype, + opcintype, + COMPARE_EQ); + if (!OidIsValid(equality_op)) /* shouldn't happen */ + elog(ERROR, "missing operator %d(%u,%u) in opfamily %u", + COMPARE_EQ, opcintype, opcintype, opfamily); + opfamilies = get_mergejoin_opfamilies(equality_op); + if (!opfamilies) /* certainly should find some */ + elog(ERROR, "could not find opfamilies for equality operator %u", + equality_op); + + /* Now find a matching EquivalenceClass */ + return get_eclass_for_sort_expr(root, expr, opfamilies, opcintype, + collation, sgc->tleSortGroupRef, + NULL, false); +} + /***************************************************************************** * * LATERAL REFERENCES diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c index 5467e094ca7..eefc486a566 100644 --- a/src/backend/optimizer/plan/planmain.c +++ b/src/backend/optimizer/plan/planmain.c @@ -76,6 +76,9 @@ query_planner(PlannerInfo *root, root->placeholder_list = NIL; root->placeholder_array = NULL; root->placeholder_array_size = 0; + root->agg_clause_list = NIL; + root->group_expr_list = NIL; + root->tlist_vars = NIL; root->fkey_list = NIL; root->initial_rels = NIL; @@ -266,6 +269,12 @@ query_planner(PlannerInfo *root, extract_restriction_or_clauses(root); /* + * Check if eager aggregation is applicable, and if so, set up + * root->agg_clause_list and root->group_expr_list. + */ + setup_eager_aggregation(root); + + /* * Now expand appendrels by adding "otherrels" for their children. We * delay this to the end so that we have as much information as possible * available for each baserel, including all restriction clauses. That diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 0c9397a36c3..e8ea78c0c97 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -73,6 +73,12 @@ bool enable_distinct_reordering = true; /* Hook for plugins to get control in planner() */ planner_hook_type planner_hook = NULL; +/* Hook for plugins to get control after PlannerGlobal is initialized */ +planner_setup_hook_type planner_setup_hook = NULL; + +/* Hook for plugins to get control before PlannerGlobal is discarded */ +planner_shutdown_hook_type planner_shutdown_hook = NULL; + /* Hook for plugins to get control when grouping_planner() plans upper rels */ create_upper_paths_hook_type create_upper_paths_hook = NULL; @@ -232,7 +238,6 @@ static void add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, RelOptInfo *partially_grouped_rel, const AggClauseCosts *agg_costs, grouping_sets_data *gd, - double dNumGroups, GroupPathExtraData *extra); static RelOptInfo *create_partial_grouping_paths(PlannerInfo *root, RelOptInfo *grouped_rel, @@ -280,6 +285,23 @@ static void create_partial_unique_paths(PlannerInfo *root, RelOptInfo *input_rel * * Query optimizer entry point * + * Inputs: + * parse: an analyzed-and-rewritten query tree for an optimizable statement + * query_string: source text for the query tree (used for error reports) + * cursorOptions: bitmask of CURSOR_OPT_XXX flags, see parsenodes.h + * boundParams: passed-in parameter values, or NULL if none + * es: ExplainState if being called from EXPLAIN, else NULL + * + * The result is a PlannedStmt tree. + * + * PARAM_EXTERN Param nodes within the parse tree can be replaced by Consts + * using values from boundParams, if those values are marked PARAM_FLAG_CONST. + * Parameter values not so marked are still relied on for estimation purposes. + * + * The ExplainState pointer is not currently used by the core planner, but it + * is passed through to some planner hooks so that they can report information + * back to EXPLAIN extension hooks. + * * To support loadable plugins that monitor or modify planner behavior, * we provide a hook variable that lets a plugin get control before and * after the standard planning process. The plugin would normally call @@ -291,14 +313,16 @@ static void create_partial_unique_paths(PlannerInfo *root, RelOptInfo *input_rel *****************************************************************************/ PlannedStmt * planner(Query *parse, const char *query_string, int cursorOptions, - ParamListInfo boundParams) + ParamListInfo boundParams, ExplainState *es) { PlannedStmt *result; if (planner_hook) - result = (*planner_hook) (parse, query_string, cursorOptions, boundParams); + result = (*planner_hook) (parse, query_string, cursorOptions, + boundParams, es); else - result = standard_planner(parse, query_string, cursorOptions, boundParams); + result = standard_planner(parse, query_string, cursorOptions, + boundParams, es); pgstat_report_plan_id(result->planId, false); @@ -307,7 +331,7 @@ planner(Query *parse, const char *query_string, int cursorOptions, PlannedStmt * standard_planner(Query *parse, const char *query_string, int cursorOptions, - ParamListInfo boundParams) + ParamListInfo boundParams, ExplainState *es) { PlannedStmt *result; PlannerGlobal *glob; @@ -438,6 +462,10 @@ standard_planner(Query *parse, const char *query_string, int cursorOptions, tuple_fraction = 0.0; } + /* Allow plugins to take control after we've initialized "glob" */ + if (planner_setup_hook) + (*planner_setup_hook) (glob, parse, query_string, &tuple_fraction, es); + /* primary planning entry point (may recurse for subqueries) */ root = subquery_planner(glob, parse, NULL, NULL, false, tuple_fraction, NULL); @@ -617,6 +645,10 @@ standard_planner(Query *parse, const char *query_string, int cursorOptions, result->jitFlags |= PGJIT_DEFORM; } + /* Allow plugins to take control before we discard "glob" */ + if (planner_shutdown_hook) + (*planner_shutdown_hook) (glob, parse, query_string, result); + if (glob->partition_directory != NULL) DestroyPartitionDirectory(glob->partition_directory); @@ -4014,9 +4046,7 @@ create_ordinary_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel, GroupPathExtraData *extra, RelOptInfo **partially_grouped_rel_p) { - Path *cheapest_path = input_rel->cheapest_total_path; RelOptInfo *partially_grouped_rel = NULL; - double dNumGroups; PartitionwiseAggregateType patype = PARTITIONWISE_AGGREGATE_NONE; /* @@ -4098,23 +4128,16 @@ create_ordinary_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel, /* Gather any partially grouped partial paths. */ if (partially_grouped_rel && partially_grouped_rel->partial_pathlist) - { gather_grouping_paths(root, partially_grouped_rel); - set_cheapest(partially_grouped_rel); - } - /* - * Estimate number of groups. - */ - dNumGroups = get_number_of_groups(root, - cheapest_path->rows, - gd, - extra->targetList); + /* Now choose the best path(s) for partially_grouped_rel. */ + if (partially_grouped_rel && partially_grouped_rel->pathlist) + set_cheapest(partially_grouped_rel); /* Build final grouping paths */ add_paths_to_grouping_rel(root, input_rel, grouped_rel, partially_grouped_rel, agg_costs, gd, - dNumGroups, extra); + extra); /* Give a helpful error if we failed to find any implementation */ if (grouped_rel->pathlist == NIL) @@ -7059,16 +7082,42 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, RelOptInfo *grouped_rel, RelOptInfo *partially_grouped_rel, const AggClauseCosts *agg_costs, - grouping_sets_data *gd, double dNumGroups, + grouping_sets_data *gd, GroupPathExtraData *extra) { Query *parse = root->parse; Path *cheapest_path = input_rel->cheapest_total_path; + Path *cheapest_partially_grouped_path = NULL; ListCell *lc; bool can_hash = (extra->flags & GROUPING_CAN_USE_HASH) != 0; bool can_sort = (extra->flags & GROUPING_CAN_USE_SORT) != 0; List *havingQual = (List *) extra->havingQual; AggClauseCosts *agg_final_costs = &extra->agg_final_costs; + double dNumGroups = 0; + double dNumFinalGroups = 0; + + /* + * Estimate number of groups for non-split aggregation. + */ + dNumGroups = get_number_of_groups(root, + cheapest_path->rows, + gd, + extra->targetList); + + if (partially_grouped_rel && partially_grouped_rel->pathlist) + { + cheapest_partially_grouped_path = + partially_grouped_rel->cheapest_total_path; + + /* + * Estimate number of groups for final phase of partial aggregation. + */ + dNumFinalGroups = + get_number_of_groups(root, + cheapest_partially_grouped_path->rows, + gd, + extra->targetList); + } if (can_sort) { @@ -7181,7 +7230,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, path = make_ordered_path(root, grouped_rel, path, - partially_grouped_rel->cheapest_total_path, + cheapest_partially_grouped_path, info->pathkeys, -1.0); @@ -7199,7 +7248,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, info->clauses, havingQual, agg_final_costs, - dNumGroups)); + dNumFinalGroups)); else add_path(grouped_rel, (Path *) create_group_path(root, @@ -7207,7 +7256,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, path, info->clauses, havingQual, - dNumGroups)); + dNumFinalGroups)); } } @@ -7249,19 +7298,17 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, */ if (partially_grouped_rel && partially_grouped_rel->pathlist) { - Path *path = partially_grouped_rel->cheapest_total_path; - add_path(grouped_rel, (Path *) create_agg_path(root, grouped_rel, - path, + cheapest_partially_grouped_path, grouped_rel->reltarget, AGG_HASHED, AGGSPLIT_FINAL_DESERIAL, root->processed_groupClause, havingQual, agg_final_costs, - dNumGroups)); + dNumFinalGroups)); } } @@ -7301,6 +7348,7 @@ create_partial_grouping_paths(PlannerInfo *root, { Query *parse = root->parse; RelOptInfo *partially_grouped_rel; + RelOptInfo *eager_agg_rel = NULL; AggClauseCosts *agg_partial_costs = &extra->agg_partial_costs; AggClauseCosts *agg_final_costs = &extra->agg_final_costs; Path *cheapest_partial_path = NULL; @@ -7312,6 +7360,15 @@ create_partial_grouping_paths(PlannerInfo *root, bool can_sort = (extra->flags & GROUPING_CAN_USE_SORT) != 0; /* + * Check whether any partially aggregated paths have been generated + * through eager aggregation. + */ + if (input_rel->grouped_rel && + !IS_DUMMY_REL(input_rel->grouped_rel) && + input_rel->grouped_rel->pathlist != NIL) + eager_agg_rel = input_rel->grouped_rel; + + /* * Consider whether we should generate partially aggregated non-partial * paths. We can only do this if we have a non-partial path, and only if * the parent of the input rel is performing partial partitionwise @@ -7332,11 +7389,13 @@ create_partial_grouping_paths(PlannerInfo *root, /* * If we can't partially aggregate partial paths, and we can't partially - * aggregate non-partial paths, then don't bother creating the new + * aggregate non-partial paths, and no partially aggregated paths were + * generated by eager aggregation, then don't bother creating the new * RelOptInfo at all, unless the caller specified force_rel_creation. */ if (cheapest_total_path == NULL && cheapest_partial_path == NULL && + eager_agg_rel == NULL && !force_rel_creation) return NULL; @@ -7562,6 +7621,51 @@ create_partial_grouping_paths(PlannerInfo *root, } /* + * Add any partially aggregated paths generated by eager aggregation to + * the new upper relation after applying projection steps as needed. + */ + if (eager_agg_rel) + { + /* Add the paths */ + foreach(lc, eager_agg_rel->pathlist) + { + Path *path = (Path *) lfirst(lc); + + /* Shouldn't have any parameterized paths anymore */ + Assert(path->param_info == NULL); + + path = (Path *) create_projection_path(root, + partially_grouped_rel, + path, + partially_grouped_rel->reltarget); + + add_path(partially_grouped_rel, path); + } + + /* + * Likewise add the partial paths, but only if parallelism is possible + * for partially_grouped_rel. + */ + if (partially_grouped_rel->consider_parallel) + { + foreach(lc, eager_agg_rel->partial_pathlist) + { + Path *path = (Path *) lfirst(lc); + + /* Shouldn't have any parameterized paths anymore */ + Assert(path->param_info == NULL); + + path = (Path *) create_projection_path(root, + partially_grouped_rel, + path, + partially_grouped_rel->reltarget); + + add_partial_path(partially_grouped_rel, path); + } + } + } + + /* * If there is an FDW that's responsible for all baserels of the query, * let it consider adding partially grouped ForeignPaths. */ @@ -8124,13 +8228,6 @@ create_partitionwise_grouping_paths(PlannerInfo *root, add_paths_to_append_rel(root, partially_grouped_rel, partially_grouped_live_children); - - /* - * We need call set_cheapest, since the finalization step will use the - * cheapest path from the rel. - */ - if (partially_grouped_rel->pathlist) - set_cheapest(partially_grouped_rel); } /* If possible, create append paths for fully grouped children. */ |