diff options
Diffstat (limited to 'src/backend/optimizer/plan/planner.c')
-rw-r--r-- | src/backend/optimizer/plan/planner.c | 152 |
1 files changed, 32 insertions, 120 deletions
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 0b0cb4eabf6..d87e4089b51 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.191 2005/08/18 17:51:11 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.192 2005/08/27 22:13:43 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -63,7 +63,6 @@ static double preprocess_limit(PlannerInfo *root, int *offset_est, int *count_est); static bool choose_hashed_grouping(PlannerInfo *root, double tuple_fraction, Path *cheapest_path, Path *sorted_path, - List *sort_pathkeys, List *group_pathkeys, double dNumGroups, AggClauseCounts *agg_counts); static bool hash_safe_grouping(PlannerInfo *root); static List *make_subplanTargetList(PlannerInfo *root, List *tlist, @@ -655,6 +654,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) Plan *result_plan; List *current_pathkeys; List *sort_pathkeys; + double dNumGroups = 0; /* Tweak caller-supplied tuple_fraction if have LIMIT/OFFSET */ if (parse->limitCount || parse->limitOffset) @@ -727,11 +727,9 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) AttrNumber *groupColIdx = NULL; bool need_tlist_eval = true; QualCost tlist_cost; - double sub_tuple_fraction; Path *cheapest_path; Path *sorted_path; Path *best_path; - double dNumGroups = 0; long numGroups = 0; AggClauseCounts agg_counts; int numGroupCols = list_length(parse->groupClause); @@ -750,13 +748,14 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) &groupColIdx, &need_tlist_eval); /* - * Calculate pathkeys that represent grouping/ordering - * requirements + * Calculate pathkeys that represent grouping/ordering requirements. + * Stash them in PlannerInfo so that query_planner can canonicalize + * them. */ - group_pathkeys = make_pathkeys_for_sortclauses(parse->groupClause, - tlist); - sort_pathkeys = make_pathkeys_for_sortclauses(parse->sortClause, - tlist); + root->group_pathkeys = + make_pathkeys_for_sortclauses(parse->groupClause, tlist); + root->sort_pathkeys = + make_pathkeys_for_sortclauses(parse->sortClause, tlist); /* * Will need actual number of aggregates for estimating costs. @@ -787,112 +786,36 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) * Needs more thought...) */ if (parse->groupClause) - root->query_pathkeys = group_pathkeys; + root->query_pathkeys = root->group_pathkeys; else if (parse->sortClause) - root->query_pathkeys = sort_pathkeys; + root->query_pathkeys = root->sort_pathkeys; else root->query_pathkeys = NIL; /* - * With grouping or aggregation, the tuple fraction to pass to - * query_planner() may be different from what it is at top level. - */ - sub_tuple_fraction = tuple_fraction; - - if (parse->groupClause) - { - /* - * In GROUP BY mode, we have the little problem that we don't - * really know how many input tuples will be needed to make a - * group, so we can't translate an output LIMIT count into an - * input count. For lack of a better idea, assume 25% of the - * input data will be processed if there is any output limit. - * However, if the caller gave us a fraction rather than an - * absolute count, we can keep using that fraction (which - * amounts to assuming that all the groups are about the same - * size). - */ - if (sub_tuple_fraction >= 1.0) - sub_tuple_fraction = 0.25; - - /* - * If both GROUP BY and ORDER BY are specified, we will need - * two levels of sort --- and, therefore, certainly need to - * read all the input tuples --- unless ORDER BY is a subset - * of GROUP BY. (We have not yet canonicalized the pathkeys, - * so must use the slower noncanonical comparison method.) - */ - if (parse->groupClause && parse->sortClause && - !noncanonical_pathkeys_contained_in(sort_pathkeys, - group_pathkeys)) - sub_tuple_fraction = 0.0; - } - else if (parse->hasAggs) - { - /* - * Ungrouped aggregate will certainly want all the input - * tuples. - */ - sub_tuple_fraction = 0.0; - } - else if (parse->distinctClause) - { - /* - * SELECT DISTINCT, like GROUP, will absorb an unpredictable - * number of input tuples per output tuple. Handle the same - * way. - */ - if (sub_tuple_fraction >= 1.0) - sub_tuple_fraction = 0.25; - } - - /* * Generate the best unsorted and presorted paths for this Query - * (but note there may not be any presorted path). + * (but note there may not be any presorted path). query_planner + * will also estimate the number of groups in the query, and + * canonicalize all the pathkeys. */ - query_planner(root, sub_tlist, sub_tuple_fraction, - &cheapest_path, &sorted_path); + query_planner(root, sub_tlist, tuple_fraction, + &cheapest_path, &sorted_path, &dNumGroups); - /* - * We couldn't canonicalize group_pathkeys and sort_pathkeys - * before running query_planner(), so do it now. - */ - group_pathkeys = canonicalize_pathkeys(root, group_pathkeys); - sort_pathkeys = canonicalize_pathkeys(root, sort_pathkeys); + group_pathkeys = root->group_pathkeys; + sort_pathkeys = root->sort_pathkeys; /* - * If grouping, estimate the number of groups. (We can't do this - * until after running query_planner(), either.) Then decide - * whether we want to use hashed grouping. + * If grouping, decide whether we want to use hashed grouping. */ if (parse->groupClause) { - List *groupExprs; - double cheapest_path_rows; - - /* - * Beware of the possibility that cheapest_path->parent is NULL. - * This could happen if user does something silly like - * SELECT 'foo' GROUP BY 1; - */ - if (cheapest_path->parent) - cheapest_path_rows = cheapest_path->parent->rows; - else - cheapest_path_rows = 1; /* assume non-set result */ - - groupExprs = get_sortgrouplist_exprs(parse->groupClause, - parse->targetList); - dNumGroups = estimate_num_groups(root, - groupExprs, - cheapest_path_rows); - /* Also want it as a long int --- but 'ware overflow! */ - numGroups = (long) Min(dNumGroups, (double) LONG_MAX); - use_hashed_grouping = choose_hashed_grouping(root, tuple_fraction, cheapest_path, sorted_path, - sort_pathkeys, group_pathkeys, dNumGroups, &agg_counts); + + /* Also convert # groups to long int --- but 'ware overflow! */ + numGroups = (long) Min(dNumGroups, (double) LONG_MAX); } /* @@ -1130,19 +1053,10 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) /* * If there was grouping or aggregation, leave plan_rows as-is * (ie, assume the result was already mostly unique). If not, - * it's reasonable to assume the UNIQUE filter has effects - * comparable to GROUP BY. + * use the number of distinct-groups calculated by query_planner. */ if (!parse->groupClause && !root->hasHavingQual && !parse->hasAggs) - { - List *distinctExprs; - - distinctExprs = get_sortgrouplist_exprs(parse->distinctClause, - parse->targetList); - result_plan->plan_rows = estimate_num_groups(root, - distinctExprs, - result_plan->plan_rows); - } + result_plan->plan_rows = dNumGroups; } /* @@ -1360,7 +1274,6 @@ preprocess_limit(PlannerInfo *root, double tuple_fraction, static bool choose_hashed_grouping(PlannerInfo *root, double tuple_fraction, Path *cheapest_path, Path *sorted_path, - List *sort_pathkeys, List *group_pathkeys, double dNumGroups, AggClauseCounts *agg_counts) { int numGroupCols = list_length(root->parse->groupClause); @@ -1439,8 +1352,8 @@ choose_hashed_grouping(PlannerInfo *root, double tuple_fraction, cheapest_path->startup_cost, cheapest_path->total_cost, cheapest_path_rows); /* Result of hashed agg is always unsorted */ - if (sort_pathkeys) - cost_sort(&hashed_p, root, sort_pathkeys, hashed_p.total_cost, + if (root->sort_pathkeys) + cost_sort(&hashed_p, root, root->sort_pathkeys, hashed_p.total_cost, dNumGroups, cheapest_path_width); if (sorted_path) @@ -1455,12 +1368,11 @@ choose_hashed_grouping(PlannerInfo *root, double tuple_fraction, sorted_p.total_cost = cheapest_path->total_cost; current_pathkeys = cheapest_path->pathkeys; } - if (!pathkeys_contained_in(group_pathkeys, - current_pathkeys)) + if (!pathkeys_contained_in(root->group_pathkeys, current_pathkeys)) { - cost_sort(&sorted_p, root, group_pathkeys, sorted_p.total_cost, + cost_sort(&sorted_p, root, root->group_pathkeys, sorted_p.total_cost, cheapest_path_rows, cheapest_path_width); - current_pathkeys = group_pathkeys; + current_pathkeys = root->group_pathkeys; } if (root->parse->hasAggs) @@ -1473,9 +1385,9 @@ choose_hashed_grouping(PlannerInfo *root, double tuple_fraction, sorted_p.startup_cost, sorted_p.total_cost, cheapest_path_rows); /* The Agg or Group node will preserve ordering */ - if (sort_pathkeys && - !pathkeys_contained_in(sort_pathkeys, current_pathkeys)) - cost_sort(&sorted_p, root, sort_pathkeys, sorted_p.total_cost, + if (root->sort_pathkeys && + !pathkeys_contained_in(root->sort_pathkeys, current_pathkeys)) + cost_sort(&sorted_p, root, root->sort_pathkeys, sorted_p.total_cost, dNumGroups, cheapest_path_width); /* |