diff options
Diffstat (limited to 'src/backend/optimizer/plan/planner.c')
-rw-r--r-- | src/backend/optimizer/plan/planner.c | 2820 |
1 files changed, 1138 insertions, 1682 deletions
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 65b99e2af38..5fc8e5bd362 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -84,8 +84,9 @@ typedef struct /* Local functions */ static Node *preprocess_expression(PlannerInfo *root, Node *expr, int kind); static void preprocess_qual_conditions(PlannerInfo *root, Node *jtnode); -static Plan *inheritance_planner(PlannerInfo *root); -static Plan *grouping_planner(PlannerInfo *root, double tuple_fraction); +static void inheritance_planner(PlannerInfo *root); +static void grouping_planner(PlannerInfo *root, bool inheritance_update, + double tuple_fraction); static void preprocess_rowmarks(PlannerInfo *root); static double preprocess_limit(PlannerInfo *root, double tuple_fraction, @@ -96,52 +97,44 @@ static List *preprocess_groupclause(PlannerInfo *root, List *force); static List *extract_rollup_sets(List *groupingSets); static List *reorder_grouping_sets(List *groupingSets, List *sortclause); static void standard_qp_callback(PlannerInfo *root, void *extra); -static bool choose_hashed_grouping(PlannerInfo *root, - double tuple_fraction, double limit_tuples, - double path_rows, - Path *cheapest_path, Path *sorted_path, - double dNumGroups, AggClauseCosts *agg_costs); -static bool choose_hashed_distinct(PlannerInfo *root, - double tuple_fraction, double limit_tuples, - double path_rows, - Cost cheapest_startup_cost, Cost cheapest_total_cost, - int cheapest_path_width, - Cost sorted_startup_cost, Cost sorted_total_cost, - int sorted_path_width, - List *sorted_pathkeys, - double dNumDistinctRows); -static List *make_subplanTargetList(PlannerInfo *root, List *tlist, - AttrNumber **groupColIdx, bool *need_tlist_eval); +static double get_number_of_groups(PlannerInfo *root, + double path_rows, + List *rollup_lists, + List *rollup_groupclauses); +static RelOptInfo *create_grouping_paths(PlannerInfo *root, + RelOptInfo *input_rel, + PathTarget *target, + AttrNumber *groupColIdx, + List *rollup_lists, + List *rollup_groupclauses); +static RelOptInfo *create_window_paths(PlannerInfo *root, + RelOptInfo *input_rel, + List *base_tlist, + List *tlist, + WindowFuncLists *wflists, + List *activeWindows); +static void create_one_window_path(PlannerInfo *root, + RelOptInfo *window_rel, + Path *path, + List *base_tlist, + List *tlist, + WindowFuncLists *wflists, + List *activeWindows); +static RelOptInfo *create_distinct_paths(PlannerInfo *root, + RelOptInfo *input_rel); +static RelOptInfo *create_ordered_paths(PlannerInfo *root, + RelOptInfo *input_rel, + double limit_tuples); +static PathTarget *make_scanjoin_target(PlannerInfo *root, List *tlist, + AttrNumber **groupColIdx); static int get_grouping_column_index(Query *parse, TargetEntry *tle); -static void locate_grouping_columns(PlannerInfo *root, - List *tlist, - List *sub_tlist, - AttrNumber *groupColIdx); static List *postprocess_setop_tlist(List *new_tlist, List *orig_tlist); static List *select_active_windows(PlannerInfo *root, WindowFuncLists *wflists); static List *make_windowInputTargetList(PlannerInfo *root, List *tlist, List *activeWindows); static List *make_pathkeys_for_window(PlannerInfo *root, WindowClause *wc, List *tlist); -static void get_column_info_for_window(PlannerInfo *root, WindowClause *wc, - List *tlist, - int numSortCols, AttrNumber *sortColIdx, - int *partNumCols, - AttrNumber **partColIdx, - Oid **partOperators, - int *ordNumCols, - AttrNumber **ordColIdx, - Oid **ordOperators); -static Plan *build_grouping_chain(PlannerInfo *root, - Query *parse, - List *tlist, - bool need_sort_for_grouping, - List *rollup_groupclauses, - List *rollup_lists, - AttrNumber *groupColIdx, - AggClauseCosts *agg_costs, - long numGroups, - Plan *result_plan); + /***************************************************************************** * @@ -175,6 +168,8 @@ standard_planner(Query *parse, int cursorOptions, ParamListInfo boundParams) PlannerGlobal *glob; double tuple_fraction; PlannerInfo *root; + RelOptInfo *final_rel; + Path *best_path; Plan *top_plan; ListCell *lp, *lr; @@ -292,8 +287,14 @@ standard_planner(Query *parse, int cursorOptions, ParamListInfo boundParams) } /* primary planning entry point (may recurse for subqueries) */ - top_plan = subquery_planner(glob, parse, NULL, - false, tuple_fraction, &root); + root = subquery_planner(glob, parse, NULL, + false, tuple_fraction); + + /* Select best Path and turn it into a Plan */ + final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL); + best_path = get_cheapest_fractional_path(final_rel, tuple_fraction); + + top_plan = create_plan(root, best_path); /* * If creating a plan for a scrollable cursor, make sure it can run @@ -407,9 +408,6 @@ standard_planner(Query *parse, int cursorOptions, ParamListInfo boundParams) * tuple_fraction is the fraction of tuples we expect will be retrieved. * tuple_fraction is interpreted as explained for grouping_planner, below. * - * If subroot isn't NULL, we pass back the query's final PlannerInfo struct; - * among other things this tells the output sort ordering of the plan. - * * Basically, this routine does the stuff that should only be done once * per Query object. It then calls grouping_planner. At one time, * grouping_planner could be invoked recursively on the same Query object; @@ -419,20 +417,23 @@ standard_planner(Query *parse, int cursorOptions, ParamListInfo boundParams) * subquery_planner will be called recursively to handle sub-Query nodes * found within the query's expressions and rangetable. * - * Returns a query plan. + * Returns the PlannerInfo struct ("root") that contains all data generated + * while planning the subquery. In particular, the Path(s) attached to + * the (UPPERREL_FINAL, NULL) upperrel represent our conclusions about the + * cheapest way(s) to implement the query. The top level will select the + * best Path and pass it through createplan.c to produce a finished Plan. *-------------------- */ -Plan * +PlannerInfo * subquery_planner(PlannerGlobal *glob, Query *parse, PlannerInfo *parent_root, - bool hasRecursion, double tuple_fraction, - PlannerInfo **subroot) + bool hasRecursion, double tuple_fraction) { PlannerInfo *root; - Plan *plan; List *newWithCheckOptions; List *newHaving; bool hasOuterJoins; + RelOptInfo *final_rel; ListCell *l; /* Create a PlannerInfo data structure for this subquery */ @@ -450,15 +451,17 @@ subquery_planner(PlannerGlobal *glob, Query *parse, root->eq_classes = NIL; root->append_rel_list = NIL; root->rowMarks = NIL; - root->hasInheritedTarget = false; + memset(root->upper_rels, 0, sizeof(root->upper_rels)); + root->processed_tlist = NIL; root->grouping_map = NULL; - + root->minmax_aggs = NIL; + root->hasInheritedTarget = false; root->hasRecursion = hasRecursion; if (hasRecursion) root->wt_param_id = SS_assign_special_param(root); else root->wt_param_id = -1; - root->non_recursive_plan = NULL; + root->non_recursive_path = NULL; /* * If there is a WITH list, process each WITH query and build an initplan @@ -732,54 +735,9 @@ subquery_planner(PlannerGlobal *glob, Query *parse, */ if (parse->resultRelation && rt_fetch(parse->resultRelation, parse->rtable)->inh) - plan = inheritance_planner(root); + inheritance_planner(root); else - { - plan = grouping_planner(root, tuple_fraction); - /* If it's not SELECT, we need a ModifyTable node */ - if (parse->commandType != CMD_SELECT) - { - List *withCheckOptionLists; - List *returningLists; - List *rowMarks; - - /* - * Set up the WITH CHECK OPTION and RETURNING lists-of-lists, if - * needed. - */ - if (parse->withCheckOptions) - withCheckOptionLists = list_make1(parse->withCheckOptions); - else - withCheckOptionLists = NIL; - - if (parse->returningList) - returningLists = list_make1(parse->returningList); - else - returningLists = NIL; - - /* - * If there was a FOR [KEY] UPDATE/SHARE clause, the LockRows node - * will have dealt with fetching non-locked marked rows, else we - * need to have ModifyTable do that. - */ - if (parse->rowMarks) - rowMarks = NIL; - else - rowMarks = root->rowMarks; - - plan = (Plan *) make_modifytable(root, - parse->commandType, - parse->canSetTag, - parse->resultRelation, - list_make1_int(parse->resultRelation), - list_make1(plan), - withCheckOptionLists, - returningLists, - rowMarks, - parse->onConflict, - SS_assign_special_param(root)); - } - } + grouping_planner(root, false, tuple_fraction); /* * Capture the set of outer-level param IDs we have access to, for use in @@ -788,17 +746,22 @@ subquery_planner(PlannerGlobal *glob, Query *parse, SS_identify_outer_params(root); /* - * If any initPlans were created in this query level, attach them to the - * topmost plan node for the level, and increment that node's cost to - * account for them. + * If any initPlans were created in this query level, increment the + * surviving Paths' costs to account for them. They won't actually get + * attached to the plan tree till create_plan() runs, but we want to be + * sure their costs are included now. */ - SS_attach_initplans(root, plan); + final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL); + SS_charge_for_initplans(root, final_rel); - /* Return internal info if caller wants it */ - if (subroot) - *subroot = root; + /* + * Make sure we've identified the cheapest Path for the final rel. (By + * doing this here not in grouping_planner, we include initPlan costs in + * the decision, though it's unlikely that will change anything.) + */ + set_cheapest(final_rel); - return plan; + return root; } /* @@ -944,7 +907,7 @@ preprocess_phv_expression(PlannerInfo *root, Expr *expr) /* * inheritance_planner - * Generate a plan in the case where the result relation is an + * Generate Paths in the case where the result relation is an * inheritance set. * * We have to handle this case differently from cases where a source relation @@ -955,9 +918,13 @@ preprocess_phv_expression(PlannerInfo *root, Expr *expr) * the UPDATE/DELETE target can never be the nullable side of an outer join, * so it's OK to generate the plan this way. * - * Returns a query plan. + * Returns nothing; the useful output is in the Paths we attach to + * the (UPPERREL_FINAL, NULL) upperrel stored in *root. + * + * Note that we have not done set_cheapest() on the final rel; it's convenient + * to leave this to the caller. */ -static Plan * +static void inheritance_planner(PlannerInfo *root) { Query *parse = root->parse; @@ -969,11 +936,13 @@ inheritance_planner(PlannerInfo *root) List *final_rtable = NIL; int save_rel_array_size = 0; RelOptInfo **save_rel_array = NULL; - List *subplans = NIL; + List *subpaths = NIL; + List *subroots = NIL; List *resultRelations = NIL; List *withCheckOptionLists = NIL; List *returningLists = NIL; List *rowMarks; + RelOptInfo *final_rel; ListCell *lc; Index rti; @@ -1060,8 +1029,9 @@ inheritance_planner(PlannerInfo *root) foreach(lc, root->append_rel_list) { AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(lc); - PlannerInfo subroot; - Plan *subplan; + PlannerInfo *subroot; + RelOptInfo *sub_final_rel; + Path *subpath; /* append_rel_list contains all append rels; ignore others */ if (appinfo->parent_relid != parentRTindex) @@ -1071,7 +1041,8 @@ inheritance_planner(PlannerInfo *root) * We need a working copy of the PlannerInfo so that we can control * propagation of information back to the main copy. */ - memcpy(&subroot, root, sizeof(PlannerInfo)); + subroot = makeNode(PlannerInfo); + memcpy(subroot, root, sizeof(PlannerInfo)); /* * Generate modified query with this rel as target. We first apply @@ -1079,7 +1050,7 @@ inheritance_planner(PlannerInfo *root) * references to the parent RTE to refer to the current child RTE, * then fool around with subquery RTEs. */ - subroot.parse = (Query *) + subroot->parse = (Query *) adjust_appendrel_attrs(root, (Node *) parse, appinfo); @@ -1090,7 +1061,7 @@ inheritance_planner(PlannerInfo *root) * executor doesn't need to see the modified copies --- we can just * pass it the original rowMarks list.) */ - subroot.rowMarks = (List *) copyObject(root->rowMarks); + subroot->rowMarks = (List *) copyObject(root->rowMarks); /* * The append_rel_list likewise might contain references to subquery @@ -1106,7 +1077,7 @@ inheritance_planner(PlannerInfo *root) { ListCell *lc2; - subroot.append_rel_list = NIL; + subroot->append_rel_list = NIL; foreach(lc2, root->append_rel_list) { AppendRelInfo *appinfo2 = (AppendRelInfo *) lfirst(lc2); @@ -1114,8 +1085,8 @@ inheritance_planner(PlannerInfo *root) if (bms_is_member(appinfo2->child_relid, modifiableARIindexes)) appinfo2 = (AppendRelInfo *) copyObject(appinfo2); - subroot.append_rel_list = lappend(subroot.append_rel_list, - appinfo2); + subroot->append_rel_list = lappend(subroot->append_rel_list, + appinfo2); } } @@ -1125,9 +1096,9 @@ inheritance_planner(PlannerInfo *root) * These won't be referenced, so there's no need to make them very * valid-looking. */ - while (list_length(subroot.parse->rtable) < list_length(final_rtable)) - subroot.parse->rtable = lappend(subroot.parse->rtable, - makeNode(RangeTblEntry)); + while (list_length(subroot->parse->rtable) < list_length(final_rtable)) + subroot->parse->rtable = lappend(subroot->parse->rtable, + makeNode(RangeTblEntry)); /* * If this isn't the first child Query, generate duplicates of all @@ -1156,15 +1127,15 @@ inheritance_planner(PlannerInfo *root) * save a few cycles by applying ChangeVarNodes before we * append the RTE to the rangetable. */ - newrti = list_length(subroot.parse->rtable) + 1; - ChangeVarNodes((Node *) subroot.parse, rti, newrti, 0); - ChangeVarNodes((Node *) subroot.rowMarks, rti, newrti, 0); + newrti = list_length(subroot->parse->rtable) + 1; + ChangeVarNodes((Node *) subroot->parse, rti, newrti, 0); + ChangeVarNodes((Node *) subroot->rowMarks, rti, newrti, 0); /* Skip processing unchanging parts of append_rel_list */ if (modifiableARIindexes != NULL) { ListCell *lc2; - foreach(lc2, subroot.append_rel_list) + foreach(lc2, subroot->append_rel_list) { AppendRelInfo *appinfo2 = (AppendRelInfo *) lfirst(lc2); @@ -1175,28 +1146,28 @@ inheritance_planner(PlannerInfo *root) } rte = copyObject(rte); ChangeVarNodes((Node *) rte->securityQuals, rti, newrti, 0); - subroot.parse->rtable = lappend(subroot.parse->rtable, - rte); + subroot->parse->rtable = lappend(subroot->parse->rtable, + rte); } rti++; } } /* There shouldn't be any OJ info to translate, as yet */ - Assert(subroot.join_info_list == NIL); + Assert(subroot->join_info_list == NIL); /* and we haven't created PlaceHolderInfos, either */ - Assert(subroot.placeholder_list == NIL); + Assert(subroot->placeholder_list == NIL); /* hack to mark target relation as an inheritance partition */ - subroot.hasInheritedTarget = true; + subroot->hasInheritedTarget = true; - /* Generate plan */ - subplan = grouping_planner(&subroot, 0.0 /* retrieve all tuples */ ); + /* Generate Path(s) for accessing this result relation */ + grouping_planner(subroot, true, 0.0 /* retrieve all tuples */ ); /* * Planning may have modified the query result relation (if there were * security barrier quals on the result RTE). */ - appinfo->child_relid = subroot.parse->resultRelation; + appinfo->child_relid = subroot->parse->resultRelation; /* * We'll use the first child relation (even if it's excluded) as the @@ -1213,21 +1184,28 @@ inheritance_planner(PlannerInfo *root) nominalRelation = appinfo->child_relid; /* + * Select cheapest path in case there's more than one. We always run + * modification queries to conclusion, so we care only for the + * cheapest-total path. + */ + sub_final_rel = fetch_upper_rel(subroot, UPPERREL_FINAL, NULL); + set_cheapest(sub_final_rel); + subpath = sub_final_rel->cheapest_total_path; + + /* * If this child rel was excluded by constraint exclusion, exclude it * from the result plan. */ - if (is_dummy_plan(subplan)) + if (IS_DUMMY_PATH(subpath)) continue; - subplans = lappend(subplans, subplan); - /* * If this is the first non-excluded child, its post-planning rtable * becomes the initial contents of final_rtable; otherwise, append * just its modified subquery RTEs to final_rtable. */ if (final_rtable == NIL) - final_rtable = subroot.parse->rtable; + final_rtable = subroot->parse->rtable; else { List *tmp_rtable = NIL; @@ -1244,7 +1222,7 @@ inheritance_planner(PlannerInfo *root) * When this happens, we want to use the new subqueries in the * final rtable. */ - forboth(cell1, final_rtable, cell2, subroot.parse->rtable) + forboth(cell1, final_rtable, cell2, subroot->parse->rtable) { RangeTblEntry *rte1 = (RangeTblEntry *) lfirst(cell1); RangeTblEntry *rte2 = (RangeTblEntry *) lfirst(cell2); @@ -1261,7 +1239,7 @@ inheritance_planner(PlannerInfo *root) } final_rtable = list_concat(tmp_rtable, - list_copy_tail(subroot.parse->rtable, + list_copy_tail(subroot->parse->rtable, list_length(final_rtable))); } @@ -1272,19 +1250,25 @@ inheritance_planner(PlannerInfo *root) * have to propagate forward the RelOptInfos that were already built * in previous children. */ - Assert(subroot.simple_rel_array_size >= save_rel_array_size); + Assert(subroot->simple_rel_array_size >= save_rel_array_size); for (rti = 1; rti < save_rel_array_size; rti++) { RelOptInfo *brel = save_rel_array[rti]; if (brel) - subroot.simple_rel_array[rti] = brel; + subroot->simple_rel_array[rti] = brel; } - save_rel_array_size = subroot.simple_rel_array_size; - save_rel_array = subroot.simple_rel_array; + save_rel_array_size = subroot->simple_rel_array_size; + save_rel_array = subroot->simple_rel_array; /* Make sure any initplans from this rel get into the outer list */ - root->init_plans = subroot.init_plans; + root->init_plans = subroot->init_plans; + + /* Build list of sub-paths */ + subpaths = lappend(subpaths, subpath); + + /* Build list of modified subroots, too */ + subroots = lappend(subroots, subroot); /* Build list of target-relation RT indexes */ resultRelations = lappend_int(resultRelations, appinfo->child_relid); @@ -1292,40 +1276,44 @@ inheritance_planner(PlannerInfo *root) /* Build lists of per-relation WCO and RETURNING targetlists */ if (parse->withCheckOptions) withCheckOptionLists = lappend(withCheckOptionLists, - subroot.parse->withCheckOptions); + subroot->parse->withCheckOptions); if (parse->returningList) returningLists = lappend(returningLists, - subroot.parse->returningList); + subroot->parse->returningList); Assert(!parse->onConflict); } - /* Mark result as unordered (probably unnecessary) */ - root->query_pathkeys = NIL; + /* Result path must go into outer query's FINAL upperrel */ + final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL); /* * If we managed to exclude every child rel, return a dummy plan; it * doesn't even need a ModifyTable node. */ - if (subplans == NIL) + if (subpaths == NIL) { - /* although dummy, it must have a valid tlist for executor */ - List *tlist; - - tlist = preprocess_targetlist(root, parse->targetList); - return (Plan *) make_result(root, - tlist, - (Node *) list_make1(makeBoolConst(false, - false)), - NULL); + set_dummy_rel_pathlist(final_rel); + return; } /* * Put back the final adjusted rtable into the master copy of the Query. + * (We mustn't do this if we found no non-excluded children.) */ parse->rtable = final_rtable; root->simple_rel_array_size = save_rel_array_size; root->simple_rel_array = save_rel_array; + /* Must reconstruct master's simple_rte_array, too */ + root->simple_rte_array = (RangeTblEntry **) + palloc0((list_length(final_rtable) + 1) * sizeof(RangeTblEntry *)); + rti = 1; + foreach(lc, final_rtable) + { + RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc); + + root->simple_rte_array[rti++] = rte; + } /* * If there was a FOR [KEY] UPDATE/SHARE clause, the LockRows node will @@ -1337,28 +1325,35 @@ inheritance_planner(PlannerInfo *root) else rowMarks = root->rowMarks; - /* And last, tack on a ModifyTable node to do the UPDATE/DELETE work */ - return (Plan *) make_modifytable(root, + /* Create Path representing a ModifyTable to do the UPDATE/DELETE work */ + add_path(final_rel, (Path *) + create_modifytable_path(root, final_rel, parse->commandType, parse->canSetTag, nominalRelation, resultRelations, - subplans, + subpaths, + subroots, withCheckOptionLists, returningLists, rowMarks, NULL, - SS_assign_special_param(root)); + SS_assign_special_param(root))); } /*-------------------- * grouping_planner * Perform planning steps related to grouping, aggregation, etc. - * This primarily means adding top-level processing to the basic - * query plan produced by query_planner. * - * tuple_fraction is the fraction of tuples we expect will be retrieved + * This function adds all required top-level processing to the scan/join + * Path(s) produced by query_planner. + * + * If inheritance_update is true, we're being called from inheritance_planner + * and should not include a ModifyTable step in the resulting Path(s). + * (inheritance_planner will create a single ModifyTable node covering all the + * target tables.) * + * tuple_fraction is the fraction of tuples we expect will be retrieved. * tuple_fraction is interpreted as follows: * 0: expect all tuples to be retrieved (normal case) * 0 < tuple_fraction < 1: expect the given fraction of tuples available @@ -1366,23 +1361,26 @@ inheritance_planner(PlannerInfo *root) * tuple_fraction >= 1: tuple_fraction is the absolute number of tuples * expected to be retrieved (ie, a LIMIT specification) * - * Returns a query plan. Also, root->query_pathkeys is returned as the - * actual output ordering of the plan (in pathkey format). + * Returns nothing; the useful output is in the Paths we attach to the + * (UPPERREL_FINAL, NULL) upperrel in *root. In addition, + * root->processed_tlist contains the final processed targetlist. + * + * Note that we have not done set_cheapest() on the final rel; it's convenient + * to leave this to the caller. *-------------------- */ -static Plan * -grouping_planner(PlannerInfo *root, double tuple_fraction) +static void +grouping_planner(PlannerInfo *root, bool inheritance_update, + double tuple_fraction) { Query *parse = root->parse; List *tlist = parse->targetList; int64 offset_est = 0; int64 count_est = 0; double limit_tuples = -1.0; - Plan *result_plan; - List *current_pathkeys; - double dNumGroups = 0; - bool use_hashed_distinct = false; - bool tested_hashed_distinct = false; + RelOptInfo *current_rel; + RelOptInfo *final_rel; + ListCell *lc; /* Tweak caller-supplied tuple_fraction if have LIMIT/OFFSET */ if (parse->limitCount || parse->limitOffset) @@ -1398,36 +1396,29 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) limit_tuples = (double) count_est + (double) offset_est; } + /* Make tuple_fraction accessible to lower-level routines */ + root->tuple_fraction = tuple_fraction; + if (parse->setOperations) { - List *set_sortclauses; - /* * If there's a top-level ORDER BY, assume we have to fetch all the * tuples. This might be too simplistic given all the hackery below * to possibly avoid the sort; but the odds of accurate estimates here - * are pretty low anyway. + * are pretty low anyway. XXX try to get rid of this in favor of + * letting plan_set_operations generate both fast-start and + * cheapest-total paths. */ if (parse->sortClause) - tuple_fraction = 0.0; + root->tuple_fraction = 0.0; /* - * Construct the plan for set operations. The result will not need - * any work except perhaps a top-level sort and/or LIMIT. Note that - * any special work for recursive unions is the responsibility of + * Construct Paths for set operations. The results will not need any + * work except perhaps a top-level sort and/or LIMIT. Note that any + * special work for recursive unions is the responsibility of * plan_set_operations. */ - result_plan = plan_set_operations(root, tuple_fraction, - &set_sortclauses); - - /* - * Calculate pathkeys representing the sort order (if any) of the set - * operation's result. We have to do this before overwriting the sort - * key information... - */ - current_pathkeys = make_pathkeys_for_sortclauses(root, - set_sortclauses, - result_plan->targetlist); + current_rel = plan_set_operations(root); /* * We should not need to call preprocess_targetlist, since we must be @@ -1438,8 +1429,13 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) */ Assert(parse->commandType == CMD_SELECT); - tlist = postprocess_setop_tlist(copyObject(result_plan->targetlist), - tlist); + tlist = root->processed_tlist; /* from plan_set_operations */ + + /* for safety, copy processed_tlist instead of modifying in-place */ + tlist = postprocess_setop_tlist(copyObject(tlist), parse->targetList); + + /* Save aside the final decorated tlist */ + root->processed_tlist = tlist; /* * Can't handle FOR [KEY] UPDATE/SHARE here (parser should have @@ -1465,33 +1461,25 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) else { /* No set operations, do regular planning */ - long numGroups = 0; - AggClauseCosts agg_costs; - int numGroupCols; - double path_rows; - bool use_hashed_grouping = false; + PathTarget *sub_target; + AttrNumber *groupColIdx; + double tlist_rows; + List *grouping_tlist; WindowFuncLists *wflists = NULL; List *activeWindows = NIL; - OnConflictExpr *onconfl; - int maxref = 0; List *rollup_lists = NIL; List *rollup_groupclauses = NIL; standard_qp_extra qp_extra; - RelOptInfo *final_rel; - Path *cheapest_path; - Path *sorted_path; - Path *best_path; - - MemSet(&agg_costs, 0, sizeof(AggClauseCosts)); /* A recursive query should always have setOperations */ Assert(!root->hasRecursion); - /* Preprocess grouping sets, if any */ + /* Preprocess grouping sets and GROUP BY clause, if any */ if (parse->groupingSets) { int *tleref_to_colnum_map; List *sets; + int maxref; ListCell *lc; ListCell *lc2; ListCell *lc_set; @@ -1499,7 +1487,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) parse->groupingSets = expand_grouping_sets(parse->groupingSets, -1); /* Identify max SortGroupRef in groupClause, for array sizing */ - /* (note this value will be used again later) */ + maxref = 0; foreach(lc, parse->groupClause) { SortGroupClause *gc = lfirst(lc); @@ -1570,21 +1558,17 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) } else { - /* Preprocess GROUP BY clause, if any */ + /* Preprocess regular GROUP BY clause, if any */ if (parse->groupClause) parse->groupClause = preprocess_groupclause(root, NIL); - rollup_groupclauses = list_make1(parse->groupClause); } - numGroupCols = list_length(parse->groupClause); - /* Preprocess targetlist */ tlist = preprocess_targetlist(root, tlist); - onconfl = parse->onConflict; - if (onconfl) - onconfl->onConflictSet = - preprocess_onconflict_targetlist(onconfl->onConflictSet, + if (parse->onConflict) + parse->onConflict->onConflictSet = + preprocess_onconflict_targetlist(parse->onConflict->onConflictSet, parse->resultRelation, parse->rtable); @@ -1597,6 +1581,15 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) root->glob->hasRowSecurity = true; /* + * We are now done hacking up the query's targetlist. Most of the + * remaining planning work will be done with the PathTarget + * representation of tlists, but save aside the full representation so + * that we can transfer its decoration (resnames etc) to the topmost + * tlist of the finished Plan. + */ + root->processed_tlist = tlist; + + /* * Locate any window functions in the tlist. (We don't need to look * anywhere else, since expressions used in ORDER BY will be in there * too.) Note that they could all have been eliminated by constant @@ -1613,34 +1606,13 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) } /* - * Do aggregate preprocessing, if the query has any aggs. - * - * Note: think not that we can turn off hasAggs if we find no aggs. It - * is possible for constant-expression simplification to remove all - * explicit references to aggs, but we still have to follow the - * aggregate semantics (eg, producing only one output row). + * Preprocess MIN/MAX aggregates, if any. Note: be careful about + * adding logic between here and the query_planner() call. Anything + * that is needed in MIN/MAX-optimizable cases will have to be + * duplicated in planagg.c. */ if (parse->hasAggs) - { - /* - * Collect statistics about aggregates for estimating costs. Note: - * we do not attempt to detect duplicate aggregates here; a - * somewhat-overestimated cost is okay for our present purposes. - */ - count_agg_clauses(root, (Node *) tlist, &agg_costs); - count_agg_clauses(root, parse->havingQual, &agg_costs); - - /* - * Preprocess MIN/MAX aggregates, if any. Note: be careful about - * adding logic between here and the optimize_minmax_aggregates - * call. Anything that is needed in MIN/MAX-optimizable cases - * will have to be duplicated in planagg.c. - */ preprocess_minmax_aggregates(root, tlist); - } - - /* Make tuple_fraction accessible to lower-level routines */ - root->tuple_fraction = tuple_fraction; /* * Figure out whether there's a hard limit on the number of rows that @@ -1661,1072 +1633,255 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) /* Set up data needed by standard_qp_callback */ qp_extra.tlist = tlist; qp_extra.activeWindows = activeWindows; - qp_extra.groupClause = llast(rollup_groupclauses); + qp_extra.groupClause = + parse->groupingSets ? llast(rollup_groupclauses) : parse->groupClause; /* - * Generate the best unsorted and presorted paths for this Query (but - * note there may not be any presorted paths). We also generate (in - * standard_qp_callback) pathkey representations of the query's sort - * clause, distinct clause, etc. + * Generate the best unsorted and presorted paths for the scan/join + * portion of this Query, ie the processing represented by the + * FROM/WHERE clauses. (Note there may not be any presorted paths.) + * We also generate (in standard_qp_callback) pathkey representations + * of the query's sort clause, distinct clause, etc. */ - final_rel = query_planner(root, tlist, - standard_qp_callback, &qp_extra); + current_rel = query_planner(root, tlist, + standard_qp_callback, &qp_extra); /* - * Extract rowcount estimate for use below. If final_rel has been - * proven dummy, its rows estimate will be zero; clamp it to one to - * avoid zero-divide in subsequent calculations. + * Now determine the tlist that we want the topmost scan/join plan + * node to emit; this may be different from the final tlist if + * grouping or aggregation is needed. This is also a convenient spot + * for conversion of the tlist to PathTarget format. + * + * Note: it's desirable to not do this till after query_planner(), + * because the target width estimates can use per-Var width numbers + * that were obtained within query_planner(). */ - path_rows = clamp_row_est(final_rel->rows); + sub_target = make_scanjoin_target(root, tlist, + &groupColIdx); /* - * If there's grouping going on, estimate the number of result groups. - * We couldn't do this any earlier because it depends on relation size - * estimates that are created within query_planner(). + * Forcibly apply that tlist to all the Paths for the scan/join rel. * - * Then convert tuple_fraction to fractional form if it is absolute, - * and if grouping or aggregation is involved, adjust tuple_fraction - * to describe the fraction of the underlying un-aggregated tuples - * that will be fetched. + * In principle we should re-run set_cheapest() here to identify the + * cheapest path, but it seems unlikely that adding the same tlist + * eval costs to all the paths would change that, so we don't bother. + * Instead, just assume that the cheapest-startup and cheapest-total + * paths remain so. (There should be no parameterized paths anymore, + * so we needn't worry about updating cheapest_parameterized_paths.) */ - dNumGroups = 1; /* in case not grouping */ - - if (parse->groupClause) + foreach(lc, current_rel->pathlist) { - List *groupExprs; - - if (parse->groupingSets) + Path *subpath = (Path *) lfirst(lc); + Path *path; + + Assert(subpath->param_info == NULL); + path = apply_projection_to_path(root, current_rel, + subpath, sub_target); + /* If we had to add a Result, path is different from subpath */ + if (path != subpath) { - ListCell *lc, - *lc2; - - dNumGroups = 0; - - forboth(lc, rollup_groupclauses, lc2, rollup_lists) - { - ListCell *lc3; - - groupExprs = get_sortgrouplist_exprs(lfirst(lc), - parse->targetList); - - foreach(lc3, lfirst(lc2)) - { - List *gset = lfirst(lc3); - - dNumGroups += estimate_num_groups(root, - groupExprs, - path_rows, - &gset); - } - } + lfirst(lc) = path; + if (subpath == current_rel->cheapest_startup_path) + current_rel->cheapest_startup_path = path; + if (subpath == current_rel->cheapest_total_path) + current_rel->cheapest_total_path = path; } - else - { - groupExprs = get_sortgrouplist_exprs(parse->groupClause, - parse->targetList); - - dNumGroups = estimate_num_groups(root, groupExprs, path_rows, - NULL); - } - - /* - * In GROUP BY mode, an absolute LIMIT is relative to the number - * of groups not the number of tuples. If the caller gave us a - * fraction, keep it as-is. (In both cases, we are effectively - * assuming that all the groups are about the same size.) - */ - if (tuple_fraction >= 1.0) - tuple_fraction /= dNumGroups; - - /* - * If there's more than one grouping set, we'll have to sort the - * entire input. - */ - if (list_length(rollup_lists) > 1) - tuple_fraction = 0.0; - - /* - * If both GROUP BY and ORDER BY are specified, we will need two - * levels of sort --- and, therefore, certainly need to read all - * the tuples --- unless ORDER BY is a subset of GROUP BY. - * Likewise if we have both DISTINCT and GROUP BY, or if we have a - * window specification not compatible with the GROUP BY. - */ - if (!pathkeys_contained_in(root->sort_pathkeys, - root->group_pathkeys) || - !pathkeys_contained_in(root->distinct_pathkeys, - root->group_pathkeys) || - !pathkeys_contained_in(root->window_pathkeys, - root->group_pathkeys)) - tuple_fraction = 0.0; - } - else if (parse->hasAggs || root->hasHavingQual || parse->groupingSets) - { - /* - * Ungrouped aggregate will certainly want to read all the tuples, - * and it will deliver a single result row per grouping set (or 1 - * if no grouping sets were explicitly given, in which case leave - * dNumGroups as-is) - */ - tuple_fraction = 0.0; - if (parse->groupingSets) - dNumGroups = list_length(parse->groupingSets); - } - else if (parse->distinctClause) - { - /* - * Since there was no grouping or aggregation, it's reasonable to - * assume the UNIQUE filter has effects comparable to GROUP BY. - * (If DISTINCT is used with grouping, we ignore its effects for - * rowcount estimation purposes; this amounts to assuming the - * grouped rows are distinct already.) - */ - List *distinctExprs; - - distinctExprs = get_sortgrouplist_exprs(parse->distinctClause, - parse->targetList); - dNumGroups = estimate_num_groups(root, distinctExprs, path_rows, NULL); - - /* - * Adjust tuple_fraction the same way as for GROUP BY, too. - */ - if (tuple_fraction >= 1.0) - tuple_fraction /= dNumGroups; - } - else - { - /* - * Plain non-grouped, non-aggregated query: an absolute tuple - * fraction can be divided by the number of tuples. - */ - if (tuple_fraction >= 1.0) - tuple_fraction /= path_rows; } /* - * Pick out the cheapest-total path as well as the cheapest presorted - * path for the requested pathkeys (if there is one). We should take - * the tuple fraction into account when selecting the cheapest - * presorted path, but not when selecting the cheapest-total path, - * since if we have to sort then we'll have to fetch all the tuples. - * (But there's a special case: if query_pathkeys is NIL, meaning - * order doesn't matter, then the "cheapest presorted" path will be - * the cheapest overall for the tuple fraction.) + * Determine the tlist we need grouping paths to emit. While we could + * skip this if we're not going to call create_grouping_paths, it's + * trivial unless we've got window functions, and then we have to do + * the work anyway. (XXX: refactor to work with PathTargets instead + * of tlists) */ - cheapest_path = final_rel->cheapest_total_path; - - sorted_path = - get_cheapest_fractional_path_for_pathkeys(final_rel->pathlist, - root->query_pathkeys, - NULL, - tuple_fraction); - - /* Don't consider same path in both guises; just wastes effort */ - if (sorted_path == cheapest_path) - sorted_path = NULL; + if (activeWindows) + grouping_tlist = make_windowInputTargetList(root, + tlist, + activeWindows); + else + grouping_tlist = tlist; /* - * Forget about the presorted path if it would be cheaper to sort the - * cheapest-total path. Here we need consider only the behavior at - * the tuple_fraction point. Also, limit_tuples is only relevant if - * not grouping/aggregating, so use root->limit_tuples in the - * cost_sort call. + * If we have grouping and/or aggregation, consider ways to implement + * that. We build a new upperrel representing the output of this + * phase. */ - if (sorted_path) + if (parse->groupClause || parse->groupingSets || parse->hasAggs || + root->hasHavingQual) { - Path sort_path; /* dummy for result of cost_sort */ - - if (root->query_pathkeys == NIL || - pathkeys_contained_in(root->query_pathkeys, - cheapest_path->pathkeys)) - { - /* No sort needed for cheapest path */ - sort_path.startup_cost = cheapest_path->startup_cost; - sort_path.total_cost = cheapest_path->total_cost; - } - else - { - /* Figure cost for sorting */ - cost_sort(&sort_path, root, root->query_pathkeys, - cheapest_path->total_cost, - path_rows, cheapest_path->pathtarget->width, - 0.0, work_mem, root->limit_tuples); - } - - if (compare_fractional_path_costs(sorted_path, &sort_path, - tuple_fraction) > 0) - { - /* Presorted path is a loser */ - sorted_path = NULL; - } + current_rel = create_grouping_paths(root, + current_rel, + create_pathtarget(root, + grouping_tlist), + groupColIdx, + rollup_lists, + rollup_groupclauses); } /* - * Consider whether we want to use hashing instead of sorting. + * If we have window functions, consider ways to implement those. We + * build a new upperrel representing the output of this phase. */ - if (parse->groupClause) - { - /* - * If grouping, decide whether to use sorted or hashed grouping. - * If grouping sets are present, we can currently do only sorted - * grouping. - */ - - if (parse->groupingSets) - { - use_hashed_grouping = false; - } - else - { - use_hashed_grouping = - choose_hashed_grouping(root, - tuple_fraction, limit_tuples, - path_rows, - cheapest_path, sorted_path, - dNumGroups, &agg_costs); - } - - /* Also convert # groups to long int --- but 'ware overflow! */ - numGroups = (long) Min(dNumGroups, (double) LONG_MAX); - } - else if (parse->distinctClause && sorted_path && - !root->hasHavingQual && !parse->hasAggs && !activeWindows) + if (activeWindows) { - /* - * We'll reach the DISTINCT stage without any intermediate - * processing, so figure out whether we will want to hash or not - * so we can choose whether to use cheapest or sorted path. - */ - use_hashed_distinct = - choose_hashed_distinct(root, - tuple_fraction, limit_tuples, - path_rows, - cheapest_path->startup_cost, - cheapest_path->total_cost, - cheapest_path->pathtarget->width, - sorted_path->startup_cost, - sorted_path->total_cost, - sorted_path->pathtarget->width, - sorted_path->pathkeys, - dNumGroups); - tested_hashed_distinct = true; + current_rel = create_window_paths(root, + current_rel, + grouping_tlist, + tlist, + wflists, + activeWindows); } /* - * Select the best path. If we are doing hashed grouping, we will - * always read all the input tuples, so use the cheapest-total path. - * Otherwise, the comparison above is correct. + * If there are set-returning functions in the tlist, scale up the + * assumed output rowcounts of all surviving Paths to account for + * that. This is a bit of a kluge, but it's not clear how to account + * for it in a more principled way. We definitely don't want to apply + * the multiplier more than once, which would happen if we tried to + * fold it into PathTarget accounting. And the expansion does happen + * before any explicit DISTINCT or ORDER BY processing is done. */ - if (use_hashed_grouping || use_hashed_distinct || !sorted_path) - best_path = cheapest_path; - else - best_path = sorted_path; - - /* - * Check to see if it's possible to optimize MIN/MAX aggregates. If - * so, we will forget all the work we did so far to choose a "regular" - * path ... but we had to do it anyway to be able to tell which way is - * cheaper. - */ - result_plan = optimize_minmax_aggregates(root, - tlist, - &agg_costs, - best_path); - if (result_plan != NULL) - { - /* - * optimize_minmax_aggregates generated the full plan, with the - * right tlist, and it has no sort order. - */ - current_pathkeys = NIL; - } - else + tlist_rows = tlist_returns_set_rows(tlist); + if (tlist_rows > 1) { - /* - * Normal case --- create a plan according to query_planner's - * results. - */ - List *sub_tlist; - AttrNumber *groupColIdx = NULL; - bool need_tlist_eval = true; - bool need_sort_for_grouping = false; - - result_plan = create_plan(root, best_path); - current_pathkeys = best_path->pathkeys; - - /* Detect if we'll need an explicit sort for grouping */ - if (parse->groupClause && !use_hashed_grouping && - !pathkeys_contained_in(root->group_pathkeys, current_pathkeys)) - need_sort_for_grouping = true; - - /* - * Generate appropriate target list for scan/join subplan; may be - * different from tlist if grouping or aggregation is needed. - */ - sub_tlist = make_subplanTargetList(root, tlist, - &groupColIdx, - &need_tlist_eval); - - /* - * create_plan returns a plan with just a "flat" tlist of required - * Vars. Usually we need to insert the sub_tlist as the tlist of - * the top plan node. However, we can skip that if we determined - * that whatever create_plan chose to return will be good enough. - * - * If we need_sort_for_grouping, always override create_plan's - * tlist, so that we don't sort useless data from a "physical" - * tlist. - */ - if (need_tlist_eval || need_sort_for_grouping) + foreach(lc, current_rel->pathlist) { - /* - * If the top-level plan node is one that cannot do expression - * evaluation and its existing target list isn't already what - * we need, we must insert a Result node to project the - * desired tlist. - */ - if (!is_projection_capable_plan(result_plan) && - !tlist_same_exprs(sub_tlist, result_plan->targetlist)) - { - result_plan = (Plan *) make_result(root, - sub_tlist, - NULL, - result_plan); - } - else - { - /* - * Otherwise, just replace the subplan's flat tlist with - * the desired tlist. - */ - result_plan->targetlist = sub_tlist; - } + Path *path = (Path *) lfirst(lc); /* - * Also, account for the cost of evaluation of the sub_tlist. - * See comments for add_tlist_costs_to_plan() for more info. + * We assume that execution costs of the tlist as such were + * already accounted for. However, it still seems appropriate + * to charge something more for the executor's general costs + * of processing the added tuples. The cost is probably less + * than cpu_tuple_cost, though, so we arbitrarily use half of + * that. */ - add_tlist_costs_to_plan(root, result_plan, sub_tlist); - } - else - { - /* - * Since we're using create_plan's tlist and not the one - * make_subplanTargetList calculated, we have to refigure any - * grouping-column indexes make_subplanTargetList computed. - */ - locate_grouping_columns(root, tlist, result_plan->targetlist, - groupColIdx); - } + path->total_cost += path->rows * (tlist_rows - 1) * + cpu_tuple_cost / 2; - /* - * groupColIdx is now cast in stone, so record a mapping from - * tleSortGroupRef to column index. setrefs.c will need this to - * finalize GROUPING() operations. - */ - if (parse->groupingSets) - { - AttrNumber *grouping_map = palloc0(sizeof(AttrNumber) * (maxref + 1)); - ListCell *lc; - int i = 0; - - foreach(lc, parse->groupClause) - { - SortGroupClause *gc = lfirst(lc); - - grouping_map[gc->tleSortGroupRef] = groupColIdx[i++]; - } - - root->grouping_map = grouping_map; - } - - /* - * Insert AGG or GROUP node if needed, plus an explicit sort step - * if necessary. - * - * HAVING clause, if any, becomes qual of the Agg or Group node. - */ - if (use_hashed_grouping) - { - /* Hashed aggregate plan --- no sort needed */ - result_plan = (Plan *) make_agg(root, - tlist, - (List *) parse->havingQual, - AGG_HASHED, - &agg_costs, - numGroupCols, - groupColIdx, - extract_grouping_ops(parse->groupClause), - NIL, - numGroups, - false, - true, - result_plan); - /* Hashed aggregation produces randomly-ordered results */ - current_pathkeys = NIL; - } - else if (parse->hasAggs || - (parse->groupingSets && parse->groupClause)) - { - /* - * Aggregation and/or non-degenerate grouping sets. - * - * Output is in sorted order by group_pathkeys if, and only - * if, there is a single rollup operation on a non-empty list - * of grouping expressions. - */ - if (list_length(rollup_groupclauses) == 1 - && list_length(linitial(rollup_groupclauses)) > 0) - current_pathkeys = root->group_pathkeys; - else - current_pathkeys = NIL; - - result_plan = build_grouping_chain(root, - parse, - tlist, - need_sort_for_grouping, - rollup_groupclauses, - rollup_lists, - groupColIdx, - &agg_costs, - numGroups, - result_plan); + path->rows *= tlist_rows; } - else if (parse->groupClause) - { - /* - * GROUP BY without aggregation, so insert a group node (plus - * the appropriate sort node, if necessary). - * - * Add an explicit sort if we couldn't make the path come out - * the way the GROUP node needs it. - */ - if (need_sort_for_grouping) - { - result_plan = (Plan *) - make_sort_from_groupcols(root, - parse->groupClause, - groupColIdx, - result_plan); - current_pathkeys = root->group_pathkeys; - } - - result_plan = (Plan *) make_group(root, - tlist, - (List *) parse->havingQual, - numGroupCols, - groupColIdx, - extract_grouping_ops(parse->groupClause), - dNumGroups, - result_plan); - /* The Group node won't change sort ordering */ - } - else if (root->hasHavingQual || parse->groupingSets) - { - int nrows = list_length(parse->groupingSets); - - /* - * No aggregates, and no GROUP BY, but we have a HAVING qual - * or grouping sets (which by elimination of cases above must - * consist solely of empty grouping sets, since otherwise - * groupClause will be non-empty). - * - * This is a degenerate case in which we are supposed to emit - * either 0 or 1 row for each grouping set depending on - * whether HAVING succeeds. Furthermore, there cannot be any - * variables in either HAVING or the targetlist, so we - * actually do not need the FROM table at all! We can just - * throw away the plan-so-far and generate a Result node. This - * is a sufficiently unusual corner case that it's not worth - * contorting the structure of this routine to avoid having to - * generate the plan in the first place. - */ - result_plan = (Plan *) make_result(root, - tlist, - parse->havingQual, - NULL); - - /* - * Doesn't seem worthwhile writing code to cons up a - * generate_series or a values scan to emit multiple rows. - * Instead just clone the result in an Append. - */ - if (nrows > 1) - { - List *plans = list_make1(result_plan); - - while (--nrows > 0) - plans = lappend(plans, copyObject(result_plan)); - - result_plan = (Plan *) make_append(plans, tlist); - } - } - } /* end of non-minmax-aggregate case */ - - /* - * Since each window function could require a different sort order, we - * stack up a WindowAgg node for each window, with sort steps between - * them as needed. - */ - if (activeWindows) - { - List *window_tlist; - ListCell *l; - - /* - * If the top-level plan node is one that cannot do expression - * evaluation, we must insert a Result node to project the desired - * tlist. (In some cases this might not really be required, but - * it's not worth trying to avoid it. In particular, think not to - * skip adding the Result if the initial window_tlist matches the - * top-level plan node's output, because we might change the tlist - * inside the following loop.) Note that on second and subsequent - * passes through the following loop, the top-level node will be a - * WindowAgg which we know can project; so we only need to check - * once. - */ - if (!is_projection_capable_plan(result_plan)) - { - result_plan = (Plan *) make_result(root, - NIL, - NULL, - result_plan); - } - - /* - * The "base" targetlist for all steps of the windowing process is - * a flat tlist of all Vars and Aggs needed in the result. (In - * some cases we wouldn't need to propagate all of these all the - * way to the top, since they might only be needed as inputs to - * WindowFuncs. It's probably not worth trying to optimize that - * though.) We also add window partitioning and sorting - * expressions to the base tlist, to ensure they're computed only - * once at the bottom of the stack (that's critical for volatile - * functions). As we climb up the stack, we'll add outputs for - * the WindowFuncs computed at each level. - */ - window_tlist = make_windowInputTargetList(root, - tlist, - activeWindows); - - /* - * The copyObject steps here are needed to ensure that each plan - * node has a separately modifiable tlist. (XXX wouldn't a - * shallow list copy do for that?) - */ - result_plan->targetlist = (List *) copyObject(window_tlist); - - foreach(l, activeWindows) - { - WindowClause *wc = (WindowClause *) lfirst(l); - List *window_pathkeys; - int partNumCols; - AttrNumber *partColIdx; - Oid *partOperators; - int ordNumCols; - AttrNumber *ordColIdx; - Oid *ordOperators; - - window_pathkeys = make_pathkeys_for_window(root, - wc, - tlist); - - /* - * This is a bit tricky: we build a sort node even if we don't - * really have to sort. Even when no explicit sort is needed, - * we need to have suitable resjunk items added to the input - * plan's tlist for any partitioning or ordering columns that - * aren't plain Vars. (In theory, make_windowInputTargetList - * should have provided all such columns, but let's not assume - * that here.) Furthermore, this way we can use existing - * infrastructure to identify which input columns are the - * interesting ones. - */ - if (window_pathkeys) - { - Sort *sort_plan; - - sort_plan = make_sort_from_pathkeys(root, - result_plan, - window_pathkeys, - -1.0); - if (!pathkeys_contained_in(window_pathkeys, - current_pathkeys)) - { - /* we do indeed need to sort */ - result_plan = (Plan *) sort_plan; - current_pathkeys = window_pathkeys; - } - /* In either case, extract the per-column information */ - get_column_info_for_window(root, wc, tlist, - sort_plan->numCols, - sort_plan->sortColIdx, - &partNumCols, - &partColIdx, - &partOperators, - &ordNumCols, - &ordColIdx, - &ordOperators); - } - else - { - /* empty window specification, nothing to sort */ - partNumCols = 0; - partColIdx = NULL; - partOperators = NULL; - ordNumCols = 0; - ordColIdx = NULL; - ordOperators = NULL; - } - if (lnext(l)) - { - /* Add the current WindowFuncs to the running tlist */ - window_tlist = add_to_flat_tlist(window_tlist, - wflists->windowFuncs[wc->winref]); - } - else - { - /* Install the original tlist in the topmost WindowAgg */ - window_tlist = tlist; - } - - /* ... and make the WindowAgg plan node */ - result_plan = (Plan *) - make_windowagg(root, - (List *) copyObject(window_tlist), - wflists->windowFuncs[wc->winref], - wc->winref, - partNumCols, - partColIdx, - partOperators, - ordNumCols, - ordColIdx, - ordOperators, - wc->frameOptions, - wc->startOffset, - wc->endOffset, - result_plan); - } + /* There seems no need for a fresh set_cheapest comparison. */ } - } /* end of if (setOperations) */ - - /* - * If there is a DISTINCT clause, add the necessary node(s). - */ - if (parse->distinctClause) - { - double dNumDistinctRows; - long numDistinctRows; /* - * If there was grouping or aggregation, use the current number of - * rows as the estimated number of DISTINCT rows (ie, assume the - * result was already mostly unique). If not, use the number of - * distinct-groups calculated previously. + * If there is a DISTINCT clause, consider ways to implement that. We + * build a new upperrel representing the output of this phase. */ - if (parse->groupClause || parse->groupingSets || root->hasHavingQual || parse->hasAggs) - dNumDistinctRows = result_plan->plan_rows; - else - dNumDistinctRows = dNumGroups; - - /* Also convert to long int --- but 'ware overflow! */ - numDistinctRows = (long) Min(dNumDistinctRows, (double) LONG_MAX); - - /* Choose implementation method if we didn't already */ - if (!tested_hashed_distinct) + if (parse->distinctClause) { - /* - * At this point, either hashed or sorted grouping will have to - * work from result_plan, so we pass that as both "cheapest" and - * "sorted". - */ - use_hashed_distinct = - choose_hashed_distinct(root, - tuple_fraction, limit_tuples, - result_plan->plan_rows, - result_plan->startup_cost, - result_plan->total_cost, - result_plan->plan_width, - result_plan->startup_cost, - result_plan->total_cost, - result_plan->plan_width, - current_pathkeys, - dNumDistinctRows); + current_rel = create_distinct_paths(root, + current_rel); } - if (use_hashed_distinct) - { - /* Hashed aggregate plan --- no sort needed */ - result_plan = (Plan *) make_agg(root, - result_plan->targetlist, - NIL, - AGG_HASHED, - NULL, - list_length(parse->distinctClause), - extract_grouping_cols(parse->distinctClause, - result_plan->targetlist), - extract_grouping_ops(parse->distinctClause), - NIL, - numDistinctRows, - false, - true, - result_plan); - /* Hashed aggregation produces randomly-ordered results */ - current_pathkeys = NIL; - } - else - { - /* - * Use a Unique node to implement DISTINCT. Add an explicit sort - * if we couldn't make the path come out the way the Unique node - * needs it. If we do have to sort, always sort by the more - * rigorous of DISTINCT and ORDER BY, to avoid a second sort - * below. However, for regular DISTINCT, don't sort now if we - * don't have to --- sorting afterwards will likely be cheaper, - * and also has the possibility of optimizing via LIMIT. But for - * DISTINCT ON, we *must* force the final sort now, else it won't - * have the desired behavior. - */ - List *needed_pathkeys; - - if (parse->hasDistinctOn && - list_length(root->distinct_pathkeys) < - list_length(root->sort_pathkeys)) - needed_pathkeys = root->sort_pathkeys; - else - needed_pathkeys = root->distinct_pathkeys; - - if (!pathkeys_contained_in(needed_pathkeys, current_pathkeys)) - { - if (list_length(root->distinct_pathkeys) >= - list_length(root->sort_pathkeys)) - current_pathkeys = root->distinct_pathkeys; - else - { - current_pathkeys = root->sort_pathkeys; - /* Assert checks that parser didn't mess up... */ - Assert(pathkeys_contained_in(root->distinct_pathkeys, - current_pathkeys)); - } - - result_plan = (Plan *) make_sort_from_pathkeys(root, - result_plan, - current_pathkeys, - -1.0); - } - - result_plan = (Plan *) make_unique(result_plan, - parse->distinctClause); - result_plan->plan_rows = dNumDistinctRows; - /* The Unique node won't change sort ordering */ - } - } + } /* end of if (setOperations) */ /* - * If ORDER BY was given and we were not able to make the plan come out in - * the right order, add an explicit sort step. + * If ORDER BY was given, consider ways to implement that, and generate a + * new upperrel containing only paths that emit the correct ordering. We + * can apply the original limit_tuples limit in sorting now. */ if (parse->sortClause) { - if (!pathkeys_contained_in(root->sort_pathkeys, current_pathkeys)) - { - result_plan = (Plan *) make_sort_from_pathkeys(root, - result_plan, - root->sort_pathkeys, - limit_tuples); - current_pathkeys = root->sort_pathkeys; - } + current_rel = create_ordered_paths(root, + current_rel, + limit_tuples); } /* - * If there is a FOR [KEY] UPDATE/SHARE clause, add the LockRows node. - * (Note: we intentionally test parse->rowMarks not root->rowMarks here. - * If there are only non-locking rowmarks, they should be handled by the - * ModifyTable node instead.) + * Now we are prepared to build the final-output upperrel. Insert all + * surviving paths, with LockRows, Limit, and/or ModifyTable steps added + * if needed. */ - if (parse->rowMarks) + final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL); + + foreach(lc, current_rel->pathlist) { - result_plan = (Plan *) make_lockrows(result_plan, - root->rowMarks, - SS_assign_special_param(root)); + Path *path = (Path *) lfirst(lc); /* - * The result can no longer be assumed sorted, since locking might - * cause the sort key columns to be replaced with new values. + * If there is a FOR [KEY] UPDATE/SHARE clause, add the LockRows node. + * (Note: we intentionally test parse->rowMarks not root->rowMarks + * here. If there are only non-locking rowmarks, they should be + * handled by the ModifyTable node instead. However, root->rowMarks + * is what goes into the LockRows node.) */ - current_pathkeys = NIL; - } - - /* - * Finally, if there is a LIMIT/OFFSET clause, add the LIMIT node. - */ - if (limit_needed(parse)) - { - result_plan = (Plan *) make_limit(result_plan, - parse->limitOffset, - parse->limitCount, - offset_est, - count_est); - } - - /* - * Return the actual output ordering in query_pathkeys for possible use by - * an outer query level. - */ - root->query_pathkeys = current_pathkeys; - - return result_plan; -} - - -/* - * Given a groupclause for a collection of grouping sets, produce the - * corresponding groupColIdx. - * - * root->grouping_map maps the tleSortGroupRef to the actual column position in - * the input tuple. So we get the ref from the entries in the groupclause and - * look them up there. - */ -static AttrNumber * -remap_groupColIdx(PlannerInfo *root, List *groupClause) -{ - AttrNumber *grouping_map = root->grouping_map; - AttrNumber *new_grpColIdx; - ListCell *lc; - int i; - - Assert(grouping_map); - - new_grpColIdx = palloc0(sizeof(AttrNumber) * list_length(groupClause)); - - i = 0; - foreach(lc, groupClause) - { - SortGroupClause *clause = lfirst(lc); - - new_grpColIdx[i++] = grouping_map[clause->tleSortGroupRef]; - } - - return new_grpColIdx; -} - -/* - * Build Agg and Sort nodes to implement sorted grouping with one or more - * grouping sets. A plain GROUP BY or just the presence of aggregates counts - * for this purpose as a single grouping set; the calling code is responsible - * for providing a single-element rollup_groupclauses list for such cases, - * though rollup_lists may be nil. - * - * The last entry in rollup_groupclauses (which is the one the input is sorted - * on, if at all) is the one used for the returned Agg node. Any additional - * rollups are attached, with corresponding sort info, to subsidiary Agg and - * Sort nodes attached to the side of the real Agg node; these nodes don't - * participate in the plan directly, but they are both a convenient way to - * represent the required data and a convenient way to account for the costs - * of execution. - */ -static Plan * -build_grouping_chain(PlannerInfo *root, - Query *parse, - List *tlist, - bool need_sort_for_grouping, - List *rollup_groupclauses, - List *rollup_lists, - AttrNumber *groupColIdx, - AggClauseCosts *agg_costs, - long numGroups, - Plan *result_plan) -{ - AttrNumber *top_grpColIdx = groupColIdx; - List *chain = NIL; - - /* - * Prepare the grpColIdx for the real Agg node first, because we may need - * it for sorting - */ - if (parse->groupingSets) - top_grpColIdx = remap_groupColIdx(root, llast(rollup_groupclauses)); - - /* - * If we need a Sort operation on the input, generate that. - */ - if (need_sort_for_grouping) - { - result_plan = (Plan *) - make_sort_from_groupcols(root, - llast(rollup_groupclauses), - top_grpColIdx, - result_plan); - } - - /* - * Generate the side nodes that describe the other sort and group - * operations besides the top one. - */ - if (list_length(rollup_groupclauses) > 1) - { - ListCell *lc, - *lc2; - - Assert(list_length(rollup_groupclauses) == list_length(rollup_lists)); - forboth(lc, rollup_groupclauses, lc2, rollup_lists) + if (parse->rowMarks) { - List *groupClause = (List *) lfirst(lc); - List *gsets = (List *) lfirst(lc2); - AttrNumber *new_grpColIdx; - Plan *sort_plan; - Plan *agg_plan; - - /* We want to iterate over all but the last rollup list elements */ - if (lnext(lc) == NULL) - break; + path = (Path *) create_lockrows_path(root, final_rel, path, + root->rowMarks, + SS_assign_special_param(root)); + } - new_grpColIdx = remap_groupColIdx(root, groupClause); + /* + * If there is a LIMIT/OFFSET clause, add the LIMIT node. + */ + if (limit_needed(parse)) + { + path = (Path *) create_limit_path(root, final_rel, path, + parse->limitOffset, + parse->limitCount, + offset_est, count_est); + } - sort_plan = (Plan *) - make_sort_from_groupcols(root, - groupClause, - new_grpColIdx, - result_plan); + /* + * If this is an INSERT/UPDATE/DELETE, and we're not being called from + * inheritance_planner, add the ModifyTable node. + */ + if (parse->commandType != CMD_SELECT && !inheritance_update) + { + List *withCheckOptionLists; + List *returningLists; + List *rowMarks; /* - * sort_plan includes the cost of result_plan, which is not what - * we want (since we'll not actually run that plan again). So - * correct the cost figures. + * Set up the WITH CHECK OPTION and RETURNING lists-of-lists, if + * needed. */ - sort_plan->startup_cost -= result_plan->total_cost; - sort_plan->total_cost -= result_plan->total_cost; - - agg_plan = (Plan *) make_agg(root, - tlist, - (List *) parse->havingQual, - AGG_SORTED, - agg_costs, - list_length(linitial(gsets)), - new_grpColIdx, - extract_grouping_ops(groupClause), - gsets, - numGroups, - false, - true, - sort_plan); + if (parse->withCheckOptions) + withCheckOptionLists = list_make1(parse->withCheckOptions); + else + withCheckOptionLists = NIL; + + if (parse->returningList) + returningLists = list_make1(parse->returningList); + else + returningLists = NIL; /* - * Nuke stuff we don't need to avoid bloating debug output. + * If there was a FOR [KEY] UPDATE/SHARE clause, the LockRows node + * will have dealt with fetching non-locked marked rows, else we + * need to have ModifyTable do that. */ - sort_plan->targetlist = NIL; - sort_plan->lefttree = NULL; - - agg_plan->targetlist = NIL; - agg_plan->qual = NIL; + if (parse->rowMarks) + rowMarks = NIL; + else + rowMarks = root->rowMarks; - chain = lappend(chain, agg_plan); + path = (Path *) + create_modifytable_path(root, final_rel, + parse->commandType, + parse->canSetTag, + parse->resultRelation, + list_make1_int(parse->resultRelation), + list_make1(path), + list_make1(root), + withCheckOptionLists, + returningLists, + rowMarks, + parse->onConflict, + SS_assign_special_param(root)); } - } - - /* - * Now make the final Agg node - */ - { - List *groupClause = (List *) llast(rollup_groupclauses); - List *gsets = rollup_lists ? (List *) llast(rollup_lists) : NIL; - int numGroupCols; - ListCell *lc; - - if (gsets) - numGroupCols = list_length(linitial(gsets)); - else - numGroupCols = list_length(parse->groupClause); - - result_plan = (Plan *) make_agg(root, - tlist, - (List *) parse->havingQual, - (numGroupCols > 0) ? AGG_SORTED : AGG_PLAIN, - agg_costs, - numGroupCols, - top_grpColIdx, - extract_grouping_ops(groupClause), - gsets, - numGroups, - false, - true, - result_plan); - - ((Agg *) result_plan)->chain = chain; - - /* - * Add the additional costs. But only the total costs count, since the - * additional sorts aren't run on startup. - */ - foreach(lc, chain) - { - Plan *subplan = lfirst(lc); - result_plan->total_cost += subplan->total_cost; - } + /* And shove it into final_rel */ + add_path(final_rel, path); } - return result_plan; + /* Note: currently, we leave it to callers to do set_cheapest() */ } -/* - * add_tlist_costs_to_plan - * - * Estimate the execution costs associated with evaluating the targetlist - * expressions, and add them to the cost estimates for the Plan node. - * - * If the tlist contains set-returning functions, also inflate the Plan's cost - * and plan_rows estimates accordingly. (Hence, this must be called *after* - * any logic that uses plan_rows to, eg, estimate qual evaluation costs.) - * - * Note: during initial stages of planning, we mostly consider plan nodes with - * "flat" tlists, containing just Vars and PlaceHolderVars. The evaluation - * cost of Vars is zero according to the model used by cost_qual_eval() (or if - * you prefer, the cost is factored into cpu_tuple_cost). The evaluation cost - * of a PHV's expression is charged as part of the scan cost of whichever plan - * node first computes it, and then subsequent references to the PHV can be - * taken as having cost zero. Thus we can avoid worrying about tlist cost - * as such throughout query_planner() and subroutines. But once we apply a - * tlist that might contain actual operators, sub-selects, etc, we'd better - * account for its cost. Any set-returning functions in the tlist must also - * affect the estimated rowcount. - * - * Once grouping_planner() has applied a general tlist to the topmost - * scan/join plan node, any tlist eval cost for added-on nodes should be - * accounted for as we create those nodes. Presently, of the node types we - * can add on later, only Agg, WindowAgg, and Group project new tlists (the - * rest just copy their input tuples) --- so make_agg(), make_windowagg() and - * make_group() are responsible for calling this function to account for their - * tlist costs. - */ -void -add_tlist_costs_to_plan(PlannerInfo *root, Plan *plan, List *tlist) -{ - QualCost tlist_cost; - double tlist_rows; - - cost_qual_eval(&tlist_cost, tlist, root); - plan->startup_cost += tlist_cost.startup; - plan->total_cost += tlist_cost.startup + - tlist_cost.per_tuple * plan->plan_rows; - - tlist_rows = tlist_returns_set_rows(tlist); - if (tlist_rows > 1) - { - /* - * We assume that execution costs of the tlist proper were all - * accounted for by cost_qual_eval. However, it still seems - * appropriate to charge something more for the executor's general - * costs of processing the added tuples. The cost is probably less - * than cpu_tuple_cost, though, so we arbitrarily use half of that. - */ - plan->total_cost += plan->plan_rows * (tlist_rows - 1) * - cpu_tuple_cost / 2; - - plan->plan_rows *= tlist_rows; - } -} /* * Detect whether a plan node is a "dummy" plan created when a relation @@ -2987,7 +2142,7 @@ select_rowmark_type(RangeTblEntry *rte, LockClauseStrength strength) * for OFFSET but a little bit bogus for LIMIT: effectively we estimate * LIMIT 0 as though it were LIMIT 1. But this is in line with the planner's * usual practice of never estimating less than one row.) These values will - * be passed to make_limit, which see if you change this code. + * be passed to create_limit_path, which see if you change this code. * * The return value is the suitably adjusted tuple_fraction to use for * planning the query. This adjustment is not overridable, since it reflects @@ -3839,333 +2994,767 @@ standard_qp_callback(PlannerInfo *root, void *extra) } /* - * choose_hashed_grouping - should we use hashed grouping? + * Estimate number of groups produced by grouping clauses (1 if not grouping) * - * Returns TRUE to select hashing, FALSE to select sorting. + * path_rows: number of output rows from scan/join step + * rollup_lists: list of grouping sets, or NIL if not doing grouping sets + * rollup_groupclauses: list of grouping clauses for grouping sets, + * or NIL if not doing grouping sets */ -static bool -choose_hashed_grouping(PlannerInfo *root, - double tuple_fraction, double limit_tuples, - double path_rows, - Path *cheapest_path, Path *sorted_path, - double dNumGroups, AggClauseCosts *agg_costs) +static double +get_number_of_groups(PlannerInfo *root, + double path_rows, + List *rollup_lists, + List *rollup_groupclauses) { Query *parse = root->parse; - int numGroupCols = list_length(parse->groupClause); - bool can_hash; - bool can_sort; - Size hashentrysize; - List *target_pathkeys; - List *current_pathkeys; - Path hashed_p; - Path sorted_p; - int sorted_p_width; - - /* - * Executor doesn't support hashed aggregation with DISTINCT or ORDER BY - * aggregates. (Doing so would imply storing *all* the input values in - * the hash table, and/or running many sorts in parallel, either of which - * seems like a certain loser.) We similarly don't support ordered-set - * aggregates in hashed aggregation, but that case is included in the - * numOrderedAggs count. - */ - can_hash = (agg_costs->numOrderedAggs == 0 && - grouping_is_hashable(parse->groupClause)); - can_sort = grouping_is_sortable(parse->groupClause); + double dNumGroups; - /* Quick out if only one choice is workable */ - if (!(can_hash && can_sort)) + if (parse->groupClause) { - if (can_hash) - return true; - else if (can_sort) - return false; + List *groupExprs; + + if (parse->groupingSets) + { + /* Add up the estimates for each grouping set */ + ListCell *lc, + *lc2; + + dNumGroups = 0; + forboth(lc, rollup_groupclauses, lc2, rollup_lists) + { + List *groupClause = (List *) lfirst(lc); + List *gsets = (List *) lfirst(lc2); + ListCell *lc3; + + groupExprs = get_sortgrouplist_exprs(groupClause, + parse->targetList); + + foreach(lc3, gsets) + { + List *gset = (List *) lfirst(lc3); + + dNumGroups += estimate_num_groups(root, + groupExprs, + path_rows, + &gset); + } + } + } else - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("could not implement GROUP BY"), - errdetail("Some of the datatypes only support hashing, while others only support sorting."))); + { + /* Plain GROUP BY */ + groupExprs = get_sortgrouplist_exprs(parse->groupClause, + parse->targetList); + + dNumGroups = estimate_num_groups(root, groupExprs, path_rows, + NULL); + } + } + else if (parse->groupingSets) + { + /* Empty grouping sets ... one result row for each one */ + dNumGroups = list_length(parse->groupingSets); } + else if (parse->hasAggs || root->hasHavingQual) + { + /* Plain aggregation, one result row */ + dNumGroups = 1; + } + else + { + /* Not grouping */ + dNumGroups = 1; + } + + return dNumGroups; +} + +/* + * create_grouping_paths + * + * Build a new upperrel containing Paths for grouping and/or aggregation. + * + * input_rel: contains the source-data Paths + * target: the pathtarget for the result Paths to compute + * groupColIdx: array of indexes of grouping columns in the source data + * rollup_lists: list of grouping sets, or NIL if not doing grouping sets + * rollup_groupclauses: list of grouping clauses for grouping sets, + * or NIL if not doing grouping sets + * + * We need to consider sorted and hashed aggregation in the same function, + * because otherwise (1) it would be harder to throw an appropriate error + * message if neither way works, and (2) we should not allow enable_hashagg or + * hashtable size considerations to dissuade us from using hashing if sorting + * is not possible. + */ +static RelOptInfo * +create_grouping_paths(PlannerInfo *root, + RelOptInfo *input_rel, + PathTarget *target, + AttrNumber *groupColIdx, + List *rollup_lists, + List *rollup_groupclauses) +{ + Query *parse = root->parse; + Path *cheapest_path = input_rel->cheapest_total_path; + RelOptInfo *grouped_rel; + AggClauseCosts agg_costs; + double dNumGroups; + bool allow_hash; + ListCell *lc; - /* Prefer sorting when enable_hashagg is off */ - if (!enable_hashagg) - return false; + /* For now, do all work in the (GROUP_AGG, NULL) upperrel */ + grouped_rel = fetch_upper_rel(root, UPPERREL_GROUP_AGG, NULL); /* - * Don't do it if it doesn't look like the hashtable will fit into - * work_mem. + * Check for degenerate grouping. */ + if ((root->hasHavingQual || parse->groupingSets) && + !parse->hasAggs && parse->groupClause == NIL) + { + /* + * We have a HAVING qual and/or grouping sets, but no aggregates and + * no GROUP BY (which implies that the grouping sets are all empty). + * + * This is a degenerate case in which we are supposed to emit either + * zero or one row for each grouping set depending on whether HAVING + * succeeds. Furthermore, there cannot be any variables in either + * HAVING or the targetlist, so we actually do not need the FROM table + * at all! We can just throw away the plan-so-far and generate a + * Result node. This is a sufficiently unusual corner case that it's + * not worth contorting the structure of this module to avoid having + * to generate the earlier paths in the first place. + */ + int nrows = list_length(parse->groupingSets); + Path *path; + + if (nrows > 1) + { + /* + * Doesn't seem worthwhile writing code to cons up a + * generate_series or a values scan to emit multiple rows. Instead + * just make N clones and append them. (With a volatile HAVING + * clause, this means you might get between 0 and N output rows. + * Offhand I think that's desired.) + */ + List *paths = NIL; + + while (--nrows >= 0) + { + path = (Path *) + create_result_path(grouped_rel, + target, + (List *) parse->havingQual); + paths = lappend(paths, path); + } + path = (Path *) + create_append_path(grouped_rel, + paths, + NULL, + 0); + path->pathtarget = target; + } + else + { + /* No grouping sets, or just one, so one output row */ + path = (Path *) + create_result_path(grouped_rel, + target, + (List *) parse->havingQual); + } + + add_path(grouped_rel, path); - /* Estimate per-hash-entry space at tuple width... */ - hashentrysize = MAXALIGN(cheapest_path->pathtarget->width) + - MAXALIGN(SizeofMinimalTupleHeader); - /* plus space for pass-by-ref transition values... */ - hashentrysize += agg_costs->transitionSpace; - /* plus the per-hash-entry overhead */ - hashentrysize += hash_agg_entry_size(agg_costs->numAggs); + /* No need to consider any other alternatives. */ + set_cheapest(grouped_rel); - if (hashentrysize * dNumGroups > work_mem * 1024L) - return false; + return grouped_rel; + } /* - * When we have both GROUP BY and DISTINCT, use the more-rigorous of - * DISTINCT and ORDER BY as the assumed required output sort order. This - * is an oversimplification because the DISTINCT might get implemented via - * hashing, but it's not clear that the case is common enough (or that our - * estimates are good enough) to justify trying to solve it exactly. + * Collect statistics about aggregates for estimating costs. Note: we do + * not detect duplicate aggregates here; a somewhat-overestimated cost is + * okay for our purposes. */ - if (list_length(root->distinct_pathkeys) > - list_length(root->sort_pathkeys)) - target_pathkeys = root->distinct_pathkeys; - else - target_pathkeys = root->sort_pathkeys; + MemSet(&agg_costs, 0, sizeof(AggClauseCosts)); + if (parse->hasAggs) + { + count_agg_clauses(root, (Node *) target->exprs, &agg_costs); + count_agg_clauses(root, parse->havingQual, &agg_costs); + } + + /* + * Estimate number of groups. Note: if cheapest_path is a dummy, it will + * have zero rowcount estimate, which we don't want to use for fear of + * divide-by-zero. Hence clamp. + */ + dNumGroups = get_number_of_groups(root, + clamp_row_est(cheapest_path->rows), + rollup_lists, + rollup_groupclauses); /* - * See if the estimated cost is no more than doing it the other way. While - * avoiding the need for sorted input is usually a win, the fact that the - * output won't be sorted may be a loss; so we need to do an actual cost - * comparison. + * Consider sort-based implementations of grouping, if possible. (Note + * that if groupClause is empty, grouping_is_sortable() is trivially true, + * and all the pathkeys_contained_in() tests will succeed too, so that + * we'll consider every surviving input path.) + */ + if (grouping_is_sortable(parse->groupClause)) + { + /* + * Use any available suitably-sorted path as input, and also consider + * sorting the cheapest-total path. + */ + foreach(lc, input_rel->pathlist) + { + Path *path = (Path *) lfirst(lc); + bool is_sorted; + + is_sorted = pathkeys_contained_in(root->group_pathkeys, + path->pathkeys); + if (path == cheapest_path || is_sorted) + { + /* Sort the cheapest-total path if it isn't already sorted */ + if (!is_sorted) + path = (Path *) create_sort_path(root, + grouped_rel, + path, + root->group_pathkeys, + -1.0); + + /* Now decide what to stick atop it */ + if (parse->groupingSets) + { + /* + * We have grouping sets, possibly with aggregation. Make + * a GroupingSetsPath. + */ + add_path(grouped_rel, (Path *) + create_groupingsets_path(root, + grouped_rel, + path, + target, + (List *) parse->havingQual, + groupColIdx, + rollup_lists, + rollup_groupclauses, + &agg_costs, + dNumGroups)); + } + else if (parse->hasAggs) + { + /* + * We have aggregation, possibly with plain GROUP BY. Make + * an AggPath. + */ + add_path(grouped_rel, (Path *) + create_agg_path(root, + grouped_rel, + path, + target, + parse->groupClause ? AGG_SORTED : AGG_PLAIN, + parse->groupClause, + (List *) parse->havingQual, + &agg_costs, + dNumGroups)); + } + else if (parse->groupClause) + { + /* + * We have GROUP BY without aggregation or grouping sets. + * Make a GroupPath. + */ + add_path(grouped_rel, (Path *) + create_group_path(root, + grouped_rel, + path, + target, + parse->groupClause, + (List *) parse->havingQual, + dNumGroups)); + } + else + { + /* Other cases should have been handled above */ + Assert(false); + } + } + } + } + + /* + * Consider hash-based implementations of grouping, if possible. * - * We need to consider cheapest_path + hashagg [+ final sort] versus - * either cheapest_path [+ sort] + group or agg [+ final sort] or - * presorted_path + group or agg [+ final sort] where brackets indicate a - * step that may not be needed. We assume grouping_planner() will have - * passed us a presorted path only if it's a winner compared to - * cheapest_path for this purpose. + * Hashed aggregation only applies if we're grouping. We currently can't + * hash if there are grouping sets, though. + * + * Executor doesn't support hashed aggregation with DISTINCT or ORDER BY + * aggregates. (Doing so would imply storing *all* the input values in + * the hash table, and/or running many sorts in parallel, either of which + * seems like a certain loser.) We similarly don't support ordered-set + * aggregates in hashed aggregation, but that case is also included in the + * numOrderedAggs count. * - * These path variables are dummies that just hold cost fields; we don't - * make actual Paths for these steps. + * Note: grouping_is_hashable() is much more expensive to check than the + * other gating conditions, so we want to do it last. */ - cost_agg(&hashed_p, root, AGG_HASHED, agg_costs, - numGroupCols, dNumGroups, - cheapest_path->startup_cost, cheapest_path->total_cost, - path_rows); - /* Result of hashed agg is always unsorted */ - if (target_pathkeys) - cost_sort(&hashed_p, root, target_pathkeys, hashed_p.total_cost, - dNumGroups, cheapest_path->pathtarget->width, - 0.0, work_mem, limit_tuples); - - if (sorted_path) + allow_hash = (parse->groupClause != NIL && + parse->groupingSets == NIL && + agg_costs.numOrderedAggs == 0); + + /* Consider reasons to disable hashing, but only if we can sort instead */ + if (allow_hash && grouped_rel->pathlist != NIL) { - sorted_p.startup_cost = sorted_path->startup_cost; - sorted_p.total_cost = sorted_path->total_cost; - sorted_p_width = sorted_path->pathtarget->width; - current_pathkeys = sorted_path->pathkeys; + if (!enable_hashagg) + allow_hash = false; + else + { + /* + * Don't hash if it doesn't look like the hashtable will fit into + * work_mem. + */ + Size hashentrysize; + + /* Estimate per-hash-entry space at tuple width... */ + hashentrysize = MAXALIGN(cheapest_path->pathtarget->width) + + MAXALIGN(SizeofMinimalTupleHeader); + /* plus space for pass-by-ref transition values... */ + hashentrysize += agg_costs.transitionSpace; + /* plus the per-hash-entry overhead */ + hashentrysize += hash_agg_entry_size(agg_costs.numAggs); + + if (hashentrysize * dNumGroups > work_mem * 1024L) + allow_hash = false; + } } - else + + if (allow_hash && grouping_is_hashable(parse->groupClause)) { - sorted_p.startup_cost = cheapest_path->startup_cost; - sorted_p.total_cost = cheapest_path->total_cost; - sorted_p_width = cheapest_path->pathtarget->width; - current_pathkeys = cheapest_path->pathkeys; + /* + * We just need an Agg over the cheapest-total input path, since input + * order won't matter. + */ + add_path(grouped_rel, (Path *) + create_agg_path(root, grouped_rel, + cheapest_path, + target, + AGG_HASHED, + parse->groupClause, + (List *) parse->havingQual, + &agg_costs, + dNumGroups)); } - if (!pathkeys_contained_in(root->group_pathkeys, current_pathkeys)) + + /* Give a helpful error if we failed to find any implementation */ + if (grouped_rel->pathlist == NIL) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("could not implement GROUP BY"), + errdetail("Some of the datatypes only support hashing, while others only support sorting."))); + + /* Now choose the best path(s) */ + set_cheapest(grouped_rel); + + return grouped_rel; +} + +/* + * create_window_paths + * + * Build a new upperrel containing Paths for window-function evaluation. + * + * input_rel: contains the source-data Paths + * base_tlist: result of make_windowInputTargetList + * tlist: query's final target list (which is what output paths should emit) + * wflists: result of find_window_functions + * activeWindows: result of select_active_windows + */ +static RelOptInfo * +create_window_paths(PlannerInfo *root, + RelOptInfo *input_rel, + List *base_tlist, + List *tlist, + WindowFuncLists *wflists, + List *activeWindows) +{ + RelOptInfo *window_rel; + ListCell *lc; + + /* For now, do all work in the (WINDOW, NULL) upperrel */ + window_rel = fetch_upper_rel(root, UPPERREL_WINDOW, NULL); + + /* + * Consider computing window functions starting from the existing + * cheapest-total path (which will likely require a sort) as well as any + * existing paths that satisfy root->window_pathkeys (which won't). + */ + foreach(lc, input_rel->pathlist) { - cost_sort(&sorted_p, root, root->group_pathkeys, sorted_p.total_cost, - path_rows, sorted_p_width, - 0.0, work_mem, -1.0); - current_pathkeys = root->group_pathkeys; + Path *path = (Path *) lfirst(lc); + + if (path == input_rel->cheapest_total_path || + pathkeys_contained_in(root->window_pathkeys, path->pathkeys)) + create_one_window_path(root, + window_rel, + path, + base_tlist, + tlist, + wflists, + activeWindows); } - if (parse->hasAggs) - cost_agg(&sorted_p, root, AGG_SORTED, agg_costs, - numGroupCols, dNumGroups, - sorted_p.startup_cost, sorted_p.total_cost, - path_rows); - else - cost_group(&sorted_p, root, numGroupCols, dNumGroups, - sorted_p.startup_cost, sorted_p.total_cost, - path_rows); - /* The Agg or Group node will preserve ordering */ - if (target_pathkeys && - !pathkeys_contained_in(target_pathkeys, current_pathkeys)) - cost_sort(&sorted_p, root, target_pathkeys, sorted_p.total_cost, - dNumGroups, sorted_p_width, - 0.0, work_mem, limit_tuples); + /* Now choose the best path(s) */ + set_cheapest(window_rel); + + return window_rel; +} + +/* + * Stack window-function implementation steps atop the given Path, and + * add the result to window_rel. + * + * window_rel: upperrel to contain result + * path: input Path to use + * base_tlist: result of make_windowInputTargetList + * tlist: query's final target list (which is what output paths should emit) + * wflists: result of find_window_functions + * activeWindows: result of select_active_windows + */ +static void +create_one_window_path(PlannerInfo *root, + RelOptInfo *window_rel, + Path *path, + List *base_tlist, + List *tlist, + WindowFuncLists *wflists, + List *activeWindows) +{ + List *window_tlist; + ListCell *l; /* - * Now make the decision using the top-level tuple fraction. + * Since each window clause could require a different sort order, we stack + * up a WindowAgg node for each clause, with sort steps between them as + * needed. (We assume that select_active_windows chose a good order for + * executing the clauses in.) + * + * The "base" targetlist for all steps of the windowing process is a flat + * tlist of all Vars and Aggs needed in the result. (In some cases we + * wouldn't need to propagate all of these all the way to the top, since + * they might only be needed as inputs to WindowFuncs. It's probably not + * worth trying to optimize that though.) We also add window partitioning + * and sorting expressions to the base tlist, to ensure they're computed + * only once at the bottom of the stack (that's critical for volatile + * functions). As we climb up the stack, we'll add outputs for the + * WindowFuncs computed at each level. + */ + window_tlist = base_tlist; + + /* + * Apply base_tlist to the given base path. If that path node is one that + * cannot do expression evaluation, we must insert a Result node to + * project the desired tlist. (In some cases this might not really be + * required, but it's not worth trying to avoid it.) If the query has + * both grouping and windowing, base_tlist was already applied to the + * input path, but apply_projection_to_path is smart about that. + * + * The seemingly redundant create_pathtarget() steps here are important to + * ensure that each path node has a separately modifiable tlist. */ - if (compare_fractional_path_costs(&hashed_p, &sorted_p, - tuple_fraction) < 0) + path = apply_projection_to_path(root, window_rel, + path, + create_pathtarget(root, base_tlist)); + + foreach(l, activeWindows) { - /* Hashed is cheaper, so use it */ - return true; + WindowClause *wc = (WindowClause *) lfirst(l); + List *window_pathkeys; + + window_pathkeys = make_pathkeys_for_window(root, + wc, + tlist); + + /* Sort if necessary */ + if (!pathkeys_contained_in(window_pathkeys, path->pathkeys)) + { + path = (Path *) create_sort_path(root, window_rel, + path, + window_pathkeys, + -1.0); + } + + if (lnext(l)) + { + /* Add the current WindowFuncs to the running tlist */ + window_tlist = add_to_flat_tlist(window_tlist, + wflists->windowFuncs[wc->winref]); + } + else + { + /* Install the final tlist in the topmost WindowAgg */ + window_tlist = tlist; + } + + path = (Path *) + create_windowagg_path(root, window_rel, path, + create_pathtarget(root, window_tlist), + wflists->windowFuncs[wc->winref], + wc, + window_pathkeys); } - return false; + + add_path(window_rel, path); } /* - * choose_hashed_distinct - should we use hashing for DISTINCT? + * create_distinct_paths * - * This is fairly similar to choose_hashed_grouping, but there are enough - * differences that it doesn't seem worth trying to unify the two functions. - * (One difference is that we sometimes apply this after forming a Plan, - * so the input alternatives can't be represented as Paths --- instead we - * pass in the costs as individual variables.) + * Build a new upperrel containing Paths for SELECT DISTINCT evaluation. * - * But note that making the two choices independently is a bit bogus in - * itself. If the two could be combined into a single choice operation - * it'd probably be better, but that seems far too unwieldy to be practical, - * especially considering that the combination of GROUP BY and DISTINCT - * isn't very common in real queries. By separating them, we are giving - * extra preference to using a sorting implementation when a common sort key - * is available ... and that's not necessarily wrong anyway. + * input_rel: contains the source-data Paths * - * Returns TRUE to select hashing, FALSE to select sorting. + * Note: input paths should already compute the desired pathtarget, since + * Sort/Unique won't project anything. */ -static bool -choose_hashed_distinct(PlannerInfo *root, - double tuple_fraction, double limit_tuples, - double path_rows, - Cost cheapest_startup_cost, Cost cheapest_total_cost, - int cheapest_path_width, - Cost sorted_startup_cost, Cost sorted_total_cost, - int sorted_path_width, - List *sorted_pathkeys, - double dNumDistinctRows) +static RelOptInfo * +create_distinct_paths(PlannerInfo *root, + RelOptInfo *input_rel) { Query *parse = root->parse; - int numDistinctCols = list_length(parse->distinctClause); - bool can_sort; - bool can_hash; - Size hashentrysize; - List *current_pathkeys; - List *needed_pathkeys; - Path hashed_p; - Path sorted_p; - - /* - * If we have a sortable DISTINCT ON clause, we always use sorting. This - * enforces the expected behavior of DISTINCT ON. - */ - can_sort = grouping_is_sortable(parse->distinctClause); - if (can_sort && parse->hasDistinctOn) - return false; + Path *cheapest_input_path = input_rel->cheapest_total_path; + RelOptInfo *distinct_rel; + double numDistinctRows; + bool allow_hash; + Path *path; + ListCell *lc; - can_hash = grouping_is_hashable(parse->distinctClause); + /* For now, do all work in the (DISTINCT, NULL) upperrel */ + distinct_rel = fetch_upper_rel(root, UPPERREL_DISTINCT, NULL); - /* Quick out if only one choice is workable */ - if (!(can_hash && can_sort)) + /* Estimate number of distinct rows there will be */ + if (parse->groupClause || parse->groupingSets || parse->hasAggs || + root->hasHavingQual) { - if (can_hash) - return true; - else if (can_sort) - return false; - else - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("could not implement DISTINCT"), - errdetail("Some of the datatypes only support hashing, while others only support sorting."))); + /* + * If there was grouping or aggregation, use the number of input rows + * as the estimated number of DISTINCT rows (ie, assume the input is + * already mostly unique). + */ + numDistinctRows = cheapest_input_path->rows; } + else + { + /* + * Otherwise, the UNIQUE filter has effects comparable to GROUP BY. + */ + List *distinctExprs; - /* Prefer sorting when enable_hashagg is off */ - if (!enable_hashagg) - return false; + distinctExprs = get_sortgrouplist_exprs(parse->distinctClause, + parse->targetList); + numDistinctRows = estimate_num_groups(root, distinctExprs, + cheapest_input_path->rows, + NULL); + } /* - * Don't do it if it doesn't look like the hashtable will fit into - * work_mem. + * Consider sort-based implementations of DISTINCT, if possible. */ + if (grouping_is_sortable(parse->distinctClause)) + { + /* + * First, if we have any adequately-presorted paths, just stick a + * Unique node on those. Then consider doing an explicit sort of the + * cheapest input path and Unique'ing that. + * + * When we have DISTINCT ON, we must sort by the more rigorous of + * DISTINCT and ORDER BY, else it won't have the desired behavior. + * Also, if we do have to do an explicit sort, we might as well use + * the more rigorous ordering to avoid a second sort later. (Note + * that the parser will have ensured that one clause is a prefix of + * the other.) + */ + List *needed_pathkeys; + + if (parse->hasDistinctOn && + list_length(root->distinct_pathkeys) < + list_length(root->sort_pathkeys)) + needed_pathkeys = root->sort_pathkeys; + else + needed_pathkeys = root->distinct_pathkeys; - /* Estimate per-hash-entry space at tuple width... */ - hashentrysize = MAXALIGN(cheapest_path_width) + - MAXALIGN(SizeofMinimalTupleHeader); - /* plus the per-hash-entry overhead */ - hashentrysize += hash_agg_entry_size(0); + foreach(lc, input_rel->pathlist) + { + Path *path = (Path *) lfirst(lc); + + if (pathkeys_contained_in(needed_pathkeys, path->pathkeys)) + { + add_path(distinct_rel, (Path *) + create_upper_unique_path(root, distinct_rel, + path, + list_length(root->distinct_pathkeys), + numDistinctRows)); + } + } - if (hashentrysize * dNumDistinctRows > work_mem * 1024L) - return false; + /* For explicit-sort case, always use the more rigorous clause */ + if (list_length(root->distinct_pathkeys) < + list_length(root->sort_pathkeys)) + { + needed_pathkeys = root->sort_pathkeys; + /* Assert checks that parser didn't mess up... */ + Assert(pathkeys_contained_in(root->distinct_pathkeys, + needed_pathkeys)); + } + else + needed_pathkeys = root->distinct_pathkeys; + + path = cheapest_input_path; + if (!pathkeys_contained_in(needed_pathkeys, path->pathkeys)) + path = (Path *) create_sort_path(root, distinct_rel, + path, + needed_pathkeys, + -1.0); + + add_path(distinct_rel, (Path *) + create_upper_unique_path(root, distinct_rel, + path, + list_length(root->distinct_pathkeys), + numDistinctRows)); + } /* - * See if the estimated cost is no more than doing it the other way. While - * avoiding the need for sorted input is usually a win, the fact that the - * output won't be sorted may be a loss; so we need to do an actual cost - * comparison. + * Consider hash-based implementations of DISTINCT, if possible. * - * We need to consider cheapest_path + hashagg [+ final sort] versus - * sorted_path [+ sort] + group [+ final sort] where brackets indicate a - * step that may not be needed. + * If we were not able to make any other types of path, we *must* hash or + * die trying. If we do have other choices, there are several things that + * should prevent selection of hashing: if the query uses DISTINCT ON + * (because it won't really have the expected behavior if we hash), or if + * enable_hashagg is off, or if it looks like the hashtable will exceed + * work_mem. * - * These path variables are dummies that just hold cost fields; we don't - * make actual Paths for these steps. + * Note: grouping_is_hashable() is much more expensive to check than the + * other gating conditions, so we want to do it last. */ - cost_agg(&hashed_p, root, AGG_HASHED, NULL, - numDistinctCols, dNumDistinctRows, - cheapest_startup_cost, cheapest_total_cost, - path_rows); + if (distinct_rel->pathlist == NIL) + allow_hash = true; /* we have no alternatives */ + else if (parse->hasDistinctOn || !enable_hashagg) + allow_hash = false; /* policy-based decision not to hash */ + else + { + Size hashentrysize; - /* - * Result of hashed agg is always unsorted, so if ORDER BY is present we - * need to charge for the final sort. - */ - if (parse->sortClause) - cost_sort(&hashed_p, root, root->sort_pathkeys, hashed_p.total_cost, - dNumDistinctRows, cheapest_path_width, - 0.0, work_mem, limit_tuples); + /* Estimate per-hash-entry space at tuple width... */ + hashentrysize = MAXALIGN(cheapest_input_path->pathtarget->width) + + MAXALIGN(SizeofMinimalTupleHeader); + /* plus the per-hash-entry overhead */ + hashentrysize += hash_agg_entry_size(0); - /* - * Now for the GROUP case. See comments in grouping_planner about the - * sorting choices here --- this code should match that code. - */ - sorted_p.startup_cost = sorted_startup_cost; - sorted_p.total_cost = sorted_total_cost; - current_pathkeys = sorted_pathkeys; - if (parse->hasDistinctOn && - list_length(root->distinct_pathkeys) < - list_length(root->sort_pathkeys)) - needed_pathkeys = root->sort_pathkeys; - else - needed_pathkeys = root->distinct_pathkeys; - if (!pathkeys_contained_in(needed_pathkeys, current_pathkeys)) + /* Allow hashing only if hashtable is predicted to fit in work_mem */ + allow_hash = (hashentrysize * numDistinctRows <= work_mem * 1024L); + } + + if (allow_hash && grouping_is_hashable(parse->distinctClause)) { - if (list_length(root->distinct_pathkeys) >= - list_length(root->sort_pathkeys)) - current_pathkeys = root->distinct_pathkeys; - else - current_pathkeys = root->sort_pathkeys; - cost_sort(&sorted_p, root, current_pathkeys, sorted_p.total_cost, - path_rows, sorted_path_width, - 0.0, work_mem, -1.0); + /* Generate hashed aggregate path --- no sort needed */ + add_path(distinct_rel, (Path *) + create_agg_path(root, + distinct_rel, + cheapest_input_path, + cheapest_input_path->pathtarget, + AGG_HASHED, + parse->distinctClause, + NIL, + NULL, + numDistinctRows)); } - cost_group(&sorted_p, root, numDistinctCols, dNumDistinctRows, - sorted_p.startup_cost, sorted_p.total_cost, - path_rows); - if (parse->sortClause && - !pathkeys_contained_in(root->sort_pathkeys, current_pathkeys)) - cost_sort(&sorted_p, root, root->sort_pathkeys, sorted_p.total_cost, - dNumDistinctRows, sorted_path_width, - 0.0, work_mem, limit_tuples); - /* - * Now make the decision using the top-level tuple fraction. - */ - if (compare_fractional_path_costs(&hashed_p, &sorted_p, - tuple_fraction) < 0) + /* Give a helpful error if we failed to find any implementation */ + if (distinct_rel->pathlist == NIL) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("could not implement DISTINCT"), + errdetail("Some of the datatypes only support hashing, while others only support sorting."))); + + /* Now choose the best path(s) */ + set_cheapest(distinct_rel); + + return distinct_rel; +} + +/* + * create_ordered_paths + * + * Build a new upperrel containing Paths for ORDER BY evaluation. + * + * All paths in the result must satisfy the ORDER BY ordering. + * The only new path we need consider is an explicit sort on the + * cheapest-total existing path. + * + * input_rel: contains the source-data Paths + * limit_tuples: estimated bound on the number of output tuples, + * or -1 if no LIMIT or couldn't estimate + */ +static RelOptInfo * +create_ordered_paths(PlannerInfo *root, + RelOptInfo *input_rel, + double limit_tuples) +{ + Path *cheapest_input_path = input_rel->cheapest_total_path; + RelOptInfo *ordered_rel; + ListCell *lc; + + /* For now, do all work in the (ORDERED, NULL) upperrel */ + ordered_rel = fetch_upper_rel(root, UPPERREL_ORDERED, NULL); + + foreach(lc, input_rel->pathlist) { - /* Hashed is cheaper, so use it */ - return true; + Path *path = (Path *) lfirst(lc); + bool is_sorted; + + is_sorted = pathkeys_contained_in(root->sort_pathkeys, + path->pathkeys); + if (path == cheapest_input_path || is_sorted) + { + if (!is_sorted) + { + /* An explicit sort here can take advantage of LIMIT */ + path = (Path *) create_sort_path(root, + ordered_rel, + path, + root->sort_pathkeys, + limit_tuples); + } + add_path(ordered_rel, path); + } } - return false; + + /* + * No need to bother with set_cheapest here; grouping_planner does not + * need us to do it. + */ + Assert(ordered_rel->pathlist != NIL); + + return ordered_rel; } + /* - * make_subplanTargetList - * Generate appropriate target list when grouping is required. + * make_scanjoin_target + * Generate appropriate PathTarget for the result of scan/join steps. * - * When grouping_planner inserts grouping or aggregation plan nodes - * above the scan/join plan constructed by query_planner+create_plan, - * we typically want the scan/join plan to emit a different target list - * than the outer plan nodes should have. This routine generates the - * correct target list for the scan/join subplan. + * If there is grouping/aggregation or window functions, we typically want the + * scan/join plan to emit a different target list than the upper plan nodes + * will (in particular, it certainly can't include any aggregate or window + * function calls). This routine generates the correct target list for the + * scan/join subplan. * * The initial target list passed from the parser already contains entries * for all ORDER BY and GROUP BY expressions, but it will not have entries * for variables used only in HAVING clauses; so we need to add those * variables to the subplan target list. Also, we flatten all expressions - * except GROUP BY items into their component variables; the other expressions - * will be computed by the inserted nodes rather than by the subplan. + * except GROUP BY items into their component variables; other expressions + * will be computed by the upper plan nodes rather than by the subplan. * For example, given a query like * SELECT a+b,SUM(c+d) FROM table GROUP BY a+b; * we want to pass this targetlist to the subplan: @@ -4173,28 +3762,20 @@ choose_hashed_distinct(PlannerInfo *root, * where the a+b target will be used by the Sort/Group steps, and the * other targets will be used for computing the final results. * - * If we are grouping or aggregating, *and* there are no non-Var grouping - * expressions, then the returned tlist is effectively dummy; we do not - * need to force it to be evaluated, because all the Vars it contains - * should be present in the "flat" tlist generated by create_plan, though - * possibly in a different order. In that case we'll use create_plan's tlist, - * and the tlist made here is only needed as input to query_planner to tell - * it which Vars are needed in the output of the scan/join plan. + * We also convert from targetlist format (List of TargetEntry nodes) + * into PathTarget format, which is more compact and includes cost/width. * * 'tlist' is the query's target list. * 'groupColIdx' receives an array of column numbers for the GROUP BY * expressions (if there are any) in the returned target list. - * 'need_tlist_eval' is set true if we really need to evaluate the - * returned tlist as-is. (Note: locate_grouping_columns assumes - * that if this is FALSE, all grouping columns are simple Vars.) * - * The result is the targetlist to be passed to query_planner. + * The result is the PathTarget to be applied to the Paths returned from + * query_planner(). */ -static List * -make_subplanTargetList(PlannerInfo *root, - List *tlist, - AttrNumber **groupColIdx, - bool *need_tlist_eval) +static PathTarget * +make_scanjoin_target(PlannerInfo *root, + List *tlist, + AttrNumber **groupColIdx) { Query *parse = root->parse; List *sub_tlist; @@ -4205,15 +3786,12 @@ make_subplanTargetList(PlannerInfo *root, *groupColIdx = NULL; /* - * If we're not grouping or aggregating, there's nothing to do here; - * query_planner should receive the unmodified target list. + * If we're not grouping or aggregating or windowing, there's nothing to + * do here except convert to PathTarget format. */ - if (!parse->hasAggs && !parse->groupClause && !parse->groupingSets && !root->hasHavingQual && - !parse->hasWindowFuncs) - { - *need_tlist_eval = true; - return tlist; - } + if (!parse->hasAggs && !parse->groupClause && !parse->groupingSets && + !root->hasHavingQual && !parse->hasWindowFuncs) + return create_pathtarget(root, tlist); /* * Otherwise, we must build a tlist containing all grouping columns, plus @@ -4221,7 +3799,6 @@ make_subplanTargetList(PlannerInfo *root, */ sub_tlist = NIL; non_group_cols = NIL; - *need_tlist_eval = false; /* only eval if not flat tlist */ numCols = list_length(parse->groupClause); if (numCols > 0) @@ -4257,13 +3834,11 @@ make_subplanTargetList(PlannerInfo *root, list_length(sub_tlist) + 1, NULL, false); + newtle->ressortgroupref = tle->ressortgroupref; sub_tlist = lappend(sub_tlist, newtle); Assert(grpColIdx[colno] == 0); /* no dups expected */ grpColIdx[colno] = newtle->resno; - - if (!(newtle->expr && IsA(newtle->expr, Var))) - *need_tlist_eval = true; /* tlist contains non Vars */ } else { @@ -4308,7 +3883,7 @@ make_subplanTargetList(PlannerInfo *root, list_free(non_group_vars); list_free(non_group_cols); - return sub_tlist; + return create_pathtarget(root, sub_tlist); } /* @@ -4343,59 +3918,6 @@ get_grouping_column_index(Query *parse, TargetEntry *tle) } /* - * locate_grouping_columns - * Locate grouping columns in the tlist chosen by create_plan. - * - * This is only needed if we don't use the sub_tlist chosen by - * make_subplanTargetList. We have to forget the column indexes found - * by that routine and re-locate the grouping exprs in the real sub_tlist. - * We assume the grouping exprs are just Vars (see make_subplanTargetList). - */ -static void -locate_grouping_columns(PlannerInfo *root, - List *tlist, - List *sub_tlist, - AttrNumber *groupColIdx) -{ - int keyno = 0; - ListCell *gl; - - /* - * No work unless grouping. - */ - if (!root->parse->groupClause) - { - Assert(groupColIdx == NULL); - return; - } - Assert(groupColIdx != NULL); - - foreach(gl, root->parse->groupClause) - { - SortGroupClause *grpcl = (SortGroupClause *) lfirst(gl); - Var *groupexpr = (Var *) get_sortgroupclause_expr(grpcl, tlist); - TargetEntry *te; - - /* - * The grouping column returned by create_plan might not have the same - * typmod as the original Var. (This can happen in cases where a - * set-returning function has been inlined, so that we now have more - * knowledge about what it returns than we did when the original Var - * was created.) So we can't use tlist_member() to search the tlist; - * instead use tlist_member_match_var. For safety, still check that - * the vartype matches. - */ - if (!(groupexpr && IsA(groupexpr, Var))) - elog(ERROR, "grouping column is not a Var as expected"); - te = tlist_member_match_var(groupexpr, sub_tlist); - if (!te) - elog(ERROR, "failed to locate grouping columns"); - Assert(((Var *) te->expr)->vartype == groupexpr->vartype); - groupColIdx[keyno++] = te->resno; - } -} - -/* * postprocess_setop_tlist * Fix up targetlist returned by plan_set_operations(). * @@ -4506,28 +4028,31 @@ select_active_windows(PlannerInfo *root, WindowFuncLists *wflists) * make_windowInputTargetList * Generate appropriate target list for initial input to WindowAgg nodes. * - * When grouping_planner inserts one or more WindowAgg nodes into the plan, - * this function computes the initial target list to be computed by the node - * just below the first WindowAgg. This list must contain all values needed - * to evaluate the window functions, compute the final target list, and - * perform any required final sort step. If multiple WindowAggs are needed, - * each intermediate one adds its window function results onto this tlist; - * only the topmost WindowAgg computes the actual desired target list. + * When the query has window functions, this function computes the initial + * target list to be computed by the node just below the first WindowAgg. + * This list must contain all values needed to evaluate the window functions, + * compute the final target list, and perform any required final sort step. + * If multiple WindowAggs are needed, each intermediate one adds its window + * function results onto this tlist; only the topmost WindowAgg computes the + * actual desired target list. * - * This function is much like make_subplanTargetList, though not quite enough + * This function is much like make_scanjoin_target, though not quite enough * like it to share code. As in that function, we flatten most expressions * into their component variables. But we do not want to flatten window * PARTITION BY/ORDER BY clauses, since that might result in multiple * evaluations of them, which would be bad (possibly even resulting in - * inconsistent answers, if they contain volatile functions). Also, we must - * not flatten GROUP BY clauses that were left unflattened by - * make_subplanTargetList, because we may no longer have access to the + * inconsistent answers, if they contain volatile functions). + * Also, we must not flatten GROUP BY clauses that were left unflattened by + * make_scanjoin_target, because we may no longer have access to the * individual Vars in them. * - * Another key difference from make_subplanTargetList is that we don't flatten + * Another key difference from make_scanjoin_target is that we don't flatten * Aggref expressions, since those are to be computed below the window * functions and just referenced like Vars above that. * + * XXX another difference is that this produces targetlist format not a + * PathTarget, but that should change sometime soon. + * * 'tlist' is the query's final target list. * 'activeWindows' is the list of active windows previously identified by * select_active_windows. @@ -4651,6 +4176,8 @@ make_windowInputTargetList(PlannerInfo *root, * The required ordering is first the PARTITION keys, then the ORDER keys. * In the future we might try to implement windowing using hashing, in which * case the ordering could be relaxed, but for now we always sort. + * + * Caution: if you change this, see createplan.c's get_column_info_for_window! */ static List * make_pathkeys_for_window(PlannerInfo *root, WindowClause *wc, @@ -4681,113 +4208,42 @@ make_pathkeys_for_window(PlannerInfo *root, WindowClause *wc, return window_pathkeys; } -/*---------- - * get_column_info_for_window - * Get the partitioning/ordering column numbers and equality operators - * for a WindowAgg node. - * - * This depends on the behavior of make_pathkeys_for_window()! +/* + * get_cheapest_fractional_path + * Find the cheapest path for retrieving a specified fraction of all + * the tuples expected to be returned by the given relation. * - * We are given the target WindowClause and an array of the input column - * numbers associated with the resulting pathkeys. In the easy case, there - * are the same number of pathkey columns as partitioning + ordering columns - * and we just have to copy some data around. However, it's possible that - * some of the original partitioning + ordering columns were eliminated as - * redundant during the transformation to pathkeys. (This can happen even - * though the parser gets rid of obvious duplicates. A typical scenario is a - * window specification "PARTITION BY x ORDER BY y" coupled with a clause - * "WHERE x = y" that causes the two sort columns to be recognized as - * redundant.) In that unusual case, we have to work a lot harder to - * determine which keys are significant. + * We interpret tuple_fraction the same way as grouping_planner. * - * The method used here is a bit brute-force: add the sort columns to a list - * one at a time and note when the resulting pathkey list gets longer. But - * it's a sufficiently uncommon case that a faster way doesn't seem worth - * the amount of code refactoring that'd be needed. - *---------- + * We assume set_cheapest() has been run on the given rel. */ -static void -get_column_info_for_window(PlannerInfo *root, WindowClause *wc, List *tlist, - int numSortCols, AttrNumber *sortColIdx, - int *partNumCols, - AttrNumber **partColIdx, - Oid **partOperators, - int *ordNumCols, - AttrNumber **ordColIdx, - Oid **ordOperators) +Path * +get_cheapest_fractional_path(RelOptInfo *rel, double tuple_fraction) { - int numPart = list_length(wc->partitionClause); - int numOrder = list_length(wc->orderClause); + Path *best_path = rel->cheapest_total_path; + ListCell *l; - if (numSortCols == numPart + numOrder) - { - /* easy case */ - *partNumCols = numPart; - *partColIdx = sortColIdx; - *partOperators = extract_grouping_ops(wc->partitionClause); - *ordNumCols = numOrder; - *ordColIdx = sortColIdx + numPart; - *ordOperators = extract_grouping_ops(wc->orderClause); - } - else + /* If all tuples will be retrieved, just return the cheapest-total path */ + if (tuple_fraction <= 0.0) + return best_path; + + /* Convert absolute # of tuples to a fraction; no need to clamp */ + if (tuple_fraction >= 1.0) + tuple_fraction /= best_path->rows; + + foreach(l, rel->pathlist) { - List *sortclauses; - List *pathkeys; - int scidx; - ListCell *lc; - - /* first, allocate what's certainly enough space for the arrays */ - *partNumCols = 0; - *partColIdx = (AttrNumber *) palloc(numPart * sizeof(AttrNumber)); - *partOperators = (Oid *) palloc(numPart * sizeof(Oid)); - *ordNumCols = 0; - *ordColIdx = (AttrNumber *) palloc(numOrder * sizeof(AttrNumber)); - *ordOperators = (Oid *) palloc(numOrder * sizeof(Oid)); - sortclauses = NIL; - pathkeys = NIL; - scidx = 0; - foreach(lc, wc->partitionClause) - { - SortGroupClause *sgc = (SortGroupClause *) lfirst(lc); - List *new_pathkeys; + Path *path = (Path *) lfirst(l); - sortclauses = lappend(sortclauses, sgc); - new_pathkeys = make_pathkeys_for_sortclauses(root, - sortclauses, - tlist); - if (list_length(new_pathkeys) > list_length(pathkeys)) - { - /* this sort clause is actually significant */ - (*partColIdx)[*partNumCols] = sortColIdx[scidx++]; - (*partOperators)[*partNumCols] = sgc->eqop; - (*partNumCols)++; - pathkeys = new_pathkeys; - } - } - foreach(lc, wc->orderClause) - { - SortGroupClause *sgc = (SortGroupClause *) lfirst(lc); - List *new_pathkeys; + if (path == rel->cheapest_total_path || + compare_fractional_path_costs(best_path, path, tuple_fraction) <= 0) + continue; - sortclauses = lappend(sortclauses, sgc); - new_pathkeys = make_pathkeys_for_sortclauses(root, - sortclauses, - tlist); - if (list_length(new_pathkeys) > list_length(pathkeys)) - { - /* this sort clause is actually significant */ - (*ordColIdx)[*ordNumCols] = sortColIdx[scidx++]; - (*ordOperators)[*ordNumCols] = sgc->eqop; - (*ordNumCols)++; - pathkeys = new_pathkeys; - } - } - /* complain if we didn't eat exactly the right number of sort cols */ - if (scidx != numSortCols) - elog(ERROR, "failed to deconstruct sort operators into partitioning/ordering operators"); + best_path = path; } -} + return best_path; +} /* * expression_planner |