diff options
Diffstat (limited to 'src/backend/optimizer/plan/planner.c')
-rw-r--r-- | src/backend/optimizer/plan/planner.c | 873 |
1 files changed, 693 insertions, 180 deletions
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 85805ff5c70..54f2da70cb1 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -94,25 +94,6 @@ typedef struct } standard_qp_extra; /* - * Various flags indicating what kinds of grouping are possible. - * - * GROUPING_CAN_USE_SORT should be set if it's possible to perform - * sort-based implementations of grouping. When grouping sets are in use, - * this will be true if sorting is potentially usable for any of the grouping - * sets, even if it's not usable for all of them. - * - * GROUPING_CAN_USE_HASH should be set if it's possible to perform - * hash-based implementations of grouping. - * - * GROUPING_CAN_PARTIAL_AGG should be set if the aggregation is of a type - * for which we support partial aggregation (not, for example, grouping sets). - * It says nothing about parallel-safety or the availability of suitable paths. - */ -#define GROUPING_CAN_USE_SORT 0x0001 -#define GROUPING_CAN_USE_HASH 0x0002 -#define GROUPING_CAN_PARTIAL_AGG 0x0004 - -/* * Data specific to grouping sets */ @@ -164,11 +145,16 @@ static bool is_degenerate_grouping(PlannerInfo *root); static void create_degenerate_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel, RelOptInfo *grouped_rel); +static RelOptInfo *make_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, + PathTarget *target, bool target_parallel_safe, + Node *havingQual); static void create_ordinary_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel, RelOptInfo *grouped_rel, const AggClauseCosts *agg_costs, - grouping_sets_data *gd, int flags); + grouping_sets_data *gd, + GroupPathExtraData *extra, + RelOptInfo **partially_grouped_rel_p); static void consider_groupingsets_paths(PlannerInfo *root, RelOptInfo *grouped_rel, Path *path, @@ -221,19 +207,34 @@ static void add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, RelOptInfo *grouped_rel, RelOptInfo *partially_grouped_rel, const AggClauseCosts *agg_costs, - const AggClauseCosts *agg_final_costs, - grouping_sets_data *gd, bool can_sort, bool can_hash, - double dNumGroups, List *havingQual); + grouping_sets_data *gd, + double dNumGroups, + GroupPathExtraData *extra); static RelOptInfo *create_partial_grouping_paths(PlannerInfo *root, RelOptInfo *grouped_rel, RelOptInfo *input_rel, grouping_sets_data *gd, - bool can_sort, - bool can_hash, - AggClauseCosts *agg_final_costs); + GroupPathExtraData *extra, + bool force_rel_creation); static void gather_grouping_paths(PlannerInfo *root, RelOptInfo *rel); static bool can_partial_agg(PlannerInfo *root, const AggClauseCosts *agg_costs); +static void apply_scanjoin_target_to_paths(PlannerInfo *root, + RelOptInfo *rel, + PathTarget *scanjoin_target, + bool scanjoin_target_parallel_safe, + bool modify_in_place); +static void create_partitionwise_grouping_paths(PlannerInfo *root, + RelOptInfo *input_rel, + RelOptInfo *grouped_rel, + RelOptInfo *partially_grouped_rel, + const AggClauseCosts *agg_costs, + grouping_sets_data *gd, + PartitionwiseAggregateType patype, + GroupPathExtraData *extra); +static bool group_by_has_partkey(RelOptInfo *input_rel, + List *targetList, + List *groupClause); /***************************************************************************** @@ -1950,74 +1951,9 @@ grouping_planner(PlannerInfo *root, bool inheritance_update, /* * Forcibly apply SRF-free scan/join target to all the Paths for the * scan/join rel. - * - * In principle we should re-run set_cheapest() here to identify the - * cheapest path, but it seems unlikely that adding the same tlist - * eval costs to all the paths would change that, so we don't bother. - * Instead, just assume that the cheapest-startup and cheapest-total - * paths remain so. (There should be no parameterized paths anymore, - * so we needn't worry about updating cheapest_parameterized_paths.) */ - foreach(lc, current_rel->pathlist) - { - Path *subpath = (Path *) lfirst(lc); - Path *path; - - Assert(subpath->param_info == NULL); - path = apply_projection_to_path(root, current_rel, - subpath, scanjoin_target); - /* If we had to add a Result, path is different from subpath */ - if (path != subpath) - { - lfirst(lc) = path; - if (subpath == current_rel->cheapest_startup_path) - current_rel->cheapest_startup_path = path; - if (subpath == current_rel->cheapest_total_path) - current_rel->cheapest_total_path = path; - } - } - - /* - * Upper planning steps which make use of the top scan/join rel's - * partial pathlist will expect partial paths for that rel to produce - * the same output as complete paths ... and we just changed the - * output for the complete paths, so we'll need to do the same thing - * for partial paths. But only parallel-safe expressions can be - * computed by partial paths. - */ - if (current_rel->partial_pathlist && scanjoin_target_parallel_safe) - { - /* Apply the scan/join target to each partial path */ - foreach(lc, current_rel->partial_pathlist) - { - Path *subpath = (Path *) lfirst(lc); - Path *newpath; - - /* Shouldn't have any parameterized paths anymore */ - Assert(subpath->param_info == NULL); - - /* - * Don't use apply_projection_to_path() here, because there - * could be other pointers to these paths, and therefore we - * mustn't modify them in place. - */ - newpath = (Path *) create_projection_path(root, - current_rel, - subpath, - scanjoin_target); - lfirst(lc) = newpath; - } - } - else - { - /* - * In the unfortunate event that scanjoin_target is not - * parallel-safe, we can't apply it to the partial paths; in that - * case, we'll need to forget about the partial paths, which - * aren't valid input for upper planning steps. - */ - current_rel->partial_pathlist = NIL; - } + apply_scanjoin_target_to_paths(root, current_rel, scanjoin_target, + scanjoin_target_parallel_safe, true); /* Now fix things up if scan/join target contains SRFs */ if (parse->hasTargetSRFs) @@ -3705,30 +3641,14 @@ create_grouping_paths(PlannerInfo *root, { Query *parse = root->parse; RelOptInfo *grouped_rel; + RelOptInfo *partially_grouped_rel; /* - * For now, all aggregated paths are added to the (GROUP_AGG, NULL) - * upperrel. - */ - grouped_rel = fetch_upper_rel(root, UPPERREL_GROUP_AGG, NULL); - grouped_rel->reltarget = target; - - /* - * If the input relation is not parallel-safe, then the grouped relation - * can't be parallel-safe, either. Otherwise, it's parallel-safe if the - * target list and HAVING quals are parallel-safe. - */ - if (input_rel->consider_parallel && target_parallel_safe && - is_parallel_safe(root, (Node *) parse->havingQual)) - grouped_rel->consider_parallel = true; - - /* - * If the input rel belongs to a single FDW, so does the grouped rel. + * Create grouping relation to hold fully aggregated grouping and/or + * aggregation paths. */ - grouped_rel->serverid = input_rel->serverid; - grouped_rel->userid = input_rel->userid; - grouped_rel->useridiscurrent = input_rel->useridiscurrent; - grouped_rel->fdwroutine = input_rel->fdwroutine; + grouped_rel = make_grouping_rel(root, input_rel, target, + target_parallel_safe, parse->havingQual); /* * Create either paths for a degenerate grouping or paths for ordinary @@ -3739,6 +3659,7 @@ create_grouping_paths(PlannerInfo *root, else { int flags = 0; + GroupPathExtraData extra; /* * Determine whether it's possible to perform sort-based @@ -3786,8 +3707,27 @@ create_grouping_paths(PlannerInfo *root, if (can_partial_agg(root, agg_costs)) flags |= GROUPING_CAN_PARTIAL_AGG; + extra.flags = flags; + extra.target = target; + extra.target_parallel_safe = target_parallel_safe; + extra.havingQual = parse->havingQual; + extra.targetList = parse->targetList; + extra.partial_costs_set = false; + + /* + * Determine whether partitionwise aggregation is in theory possible. + * It can be disabled by the user, and for now, we don't try to + * support grouping sets. create_ordinary_grouping_paths() will check + * additional conditions, such as whether input_rel is partitioned. + */ + if (enable_partitionwise_aggregate && !parse->groupingSets) + extra.patype = PARTITIONWISE_AGGREGATE_FULL; + else + extra.patype = PARTITIONWISE_AGGREGATE_NONE; + create_ordinary_grouping_paths(root, input_rel, grouped_rel, - agg_costs, gd, flags); + agg_costs, gd, &extra, + &partially_grouped_rel); } set_cheapest(grouped_rel); @@ -3795,6 +3735,60 @@ create_grouping_paths(PlannerInfo *root, } /* + * make_grouping_rel + * + * Create a new grouping rel and set basic properties. + * + * input_rel represents the underlying scan/join relation. + * target is the output expected from the grouping relation. + */ +static RelOptInfo * +make_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, + PathTarget *target, bool target_parallel_safe, + Node *havingQual) +{ + RelOptInfo *grouped_rel; + + if (IS_OTHER_REL(input_rel)) + { + grouped_rel = fetch_upper_rel(root, UPPERREL_GROUP_AGG, + input_rel->relids); + grouped_rel->reloptkind = RELOPT_OTHER_UPPER_REL; + } + else + { + /* + * By tradition, the relids set for the main grouping relation is + * NULL. (This could be changed, but might require adjustments + * elsewhere.) + */ + grouped_rel = fetch_upper_rel(root, UPPERREL_GROUP_AGG, NULL); + } + + /* Set target. */ + grouped_rel->reltarget = target; + + /* + * If the input relation is not parallel-safe, then the grouped relation + * can't be parallel-safe, either. Otherwise, it's parallel-safe if the + * target list and HAVING quals are parallel-safe. + */ + if (input_rel->consider_parallel && target_parallel_safe && + is_parallel_safe(root, (Node *) havingQual)) + grouped_rel->consider_parallel = true; + + /* + * If the input rel belongs to a single FDW, so does the grouped rel. + */ + grouped_rel->serverid = input_rel->serverid; + grouped_rel->userid = input_rel->userid; + grouped_rel->useridiscurrent = input_rel->useridiscurrent; + grouped_rel->fdwroutine = input_rel->fdwroutine; + + return grouped_rel; +} + +/* * is_degenerate_grouping * * A degenerate grouping is one in which the query has a HAVING qual and/or @@ -3881,55 +3875,117 @@ create_degenerate_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel, * because otherwise (1) it would be harder to throw an appropriate error * message if neither way works, and (2) we should not allow hashtable size * considerations to dissuade us from using hashing if sorting is not possible. + * + * *partially_grouped_rel_p will be set to the partially grouped rel which this + * function creates, or to NULL if it doesn't create one. */ static void create_ordinary_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel, RelOptInfo *grouped_rel, const AggClauseCosts *agg_costs, - grouping_sets_data *gd, int flags) + grouping_sets_data *gd, + GroupPathExtraData *extra, + RelOptInfo **partially_grouped_rel_p) { - Query *parse = root->parse; Path *cheapest_path = input_rel->cheapest_total_path; RelOptInfo *partially_grouped_rel = NULL; - AggClauseCosts agg_final_costs; /* parallel only */ double dNumGroups; - bool can_hash = (flags & GROUPING_CAN_USE_HASH) != 0; - bool can_sort = (flags & GROUPING_CAN_USE_SORT) != 0; + PartitionwiseAggregateType patype = PARTITIONWISE_AGGREGATE_NONE; /* - * Estimate number of groups. + * If this is the topmost grouping relation or if the parent relation is + * doing some form of partitionwise aggregation, then we may be able to do + * it at this level also. However, if the input relation is not + * partitioned, partitionwise aggregate is impossible, and if it is dummy, + * partitionwise aggregate is pointless. */ - dNumGroups = get_number_of_groups(root, - cheapest_path->rows, - gd, - parse->targetList); + if (extra->patype != PARTITIONWISE_AGGREGATE_NONE && + input_rel->part_scheme && input_rel->part_rels && + !IS_DUMMY_REL(input_rel)) + { + /* + * If this is the topmost relation or if the parent relation is doing + * full partitionwise aggregation, then we can do full partitionwise + * aggregation provided that the GROUP BY clause contains all of the + * partitioning columns at this level. Otherwise, we can do at most + * partial partitionwise aggregation. But if partial aggregation is + * not supported in general then we can't use it for partitionwise + * aggregation either. + */ + if (extra->patype == PARTITIONWISE_AGGREGATE_FULL && + group_by_has_partkey(input_rel, extra->targetList, + root->parse->groupClause)) + patype = PARTITIONWISE_AGGREGATE_FULL; + else if ((extra->flags & GROUPING_CAN_PARTIAL_AGG) != 0) + patype = PARTITIONWISE_AGGREGATE_PARTIAL; + else + patype = PARTITIONWISE_AGGREGATE_NONE; + } /* * Before generating paths for grouped_rel, we first generate any possible * partially grouped paths; that way, later code can easily consider both * parallel and non-parallel approaches to grouping. */ - MemSet(&agg_final_costs, 0, sizeof(AggClauseCosts)); - if (grouped_rel->consider_parallel && input_rel->partial_pathlist != NIL - && (flags & GROUPING_CAN_PARTIAL_AGG) != 0) + if ((extra->flags & GROUPING_CAN_PARTIAL_AGG) != 0) { + bool force_rel_creation; + + /* + * If we're doing partitionwise aggregation at this level, force + * creation of a partially_grouped_rel so we can add partitionwise + * paths to it. + */ + force_rel_creation = (patype == PARTITIONWISE_AGGREGATE_PARTIAL); + partially_grouped_rel = create_partial_grouping_paths(root, grouped_rel, input_rel, gd, - can_sort, - can_hash, - &agg_final_costs); + extra, + force_rel_creation); + } + + /* Set out parameter. */ + *partially_grouped_rel_p = partially_grouped_rel; + + /* Apply partitionwise aggregation technique, if possible. */ + if (patype != PARTITIONWISE_AGGREGATE_NONE) + create_partitionwise_grouping_paths(root, input_rel, grouped_rel, + partially_grouped_rel, agg_costs, + gd, patype, extra); + + /* If we are doing partial aggregation only, return. */ + if (extra->patype == PARTITIONWISE_AGGREGATE_PARTIAL) + { + Assert(partially_grouped_rel); + + if (partially_grouped_rel->pathlist) + set_cheapest(partially_grouped_rel); + + return; + } + + /* Gather any partially grouped partial paths. */ + if (partially_grouped_rel && partially_grouped_rel->partial_pathlist) + { gather_grouping_paths(root, partially_grouped_rel); set_cheapest(partially_grouped_rel); } + /* + * Estimate number of groups. + */ + dNumGroups = get_number_of_groups(root, + cheapest_path->rows, + gd, + extra->targetList); + /* Build final grouping paths */ add_paths_to_grouping_rel(root, input_rel, grouped_rel, - partially_grouped_rel, agg_costs, - &agg_final_costs, gd, can_sort, can_hash, - dNumGroups, (List *) parse->havingQual); + partially_grouped_rel, agg_costs, gd, + dNumGroups, extra); /* Give a helpful error if we failed to find any implementation */ if (grouped_rel->pathlist == NIL) @@ -6103,13 +6159,16 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, RelOptInfo *grouped_rel, RelOptInfo *partially_grouped_rel, const AggClauseCosts *agg_costs, - const AggClauseCosts *agg_final_costs, - grouping_sets_data *gd, bool can_sort, bool can_hash, - double dNumGroups, List *havingQual) + grouping_sets_data *gd, double dNumGroups, + GroupPathExtraData *extra) { Query *parse = root->parse; Path *cheapest_path = input_rel->cheapest_total_path; ListCell *lc; + bool can_hash = (extra->flags & GROUPING_CAN_USE_HASH) != 0; + bool can_sort = (extra->flags & GROUPING_CAN_USE_SORT) != 0; + List *havingQual = (List *) extra->havingQual; + AggClauseCosts *agg_final_costs = &extra->agg_final_costs; if (can_sort) { @@ -6302,6 +6361,15 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, dNumGroups)); } } + + /* + * When partitionwise aggregate is used, we might have fully aggregated + * paths in the partial pathlist, because add_paths_to_append_rel() will + * consider a path for grouped_rel consisting of a Parallel Append of + * non-partial paths from each child. + */ + if (grouped_rel->partial_pathlist != NIL) + gather_grouping_paths(root, grouped_rel); } /* @@ -6316,23 +6384,58 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, * All paths for this new upper relation -- both partial and non-partial -- * have been partially aggregated but require a subsequent FinalizeAggregate * step. + * + * NB: This function is allowed to return NULL if it determines that there is + * no real need to create a new RelOptInfo. */ static RelOptInfo * create_partial_grouping_paths(PlannerInfo *root, RelOptInfo *grouped_rel, RelOptInfo *input_rel, grouping_sets_data *gd, - bool can_sort, - bool can_hash, - AggClauseCosts *agg_final_costs) + GroupPathExtraData *extra, + bool force_rel_creation) { Query *parse = root->parse; RelOptInfo *partially_grouped_rel; - AggClauseCosts agg_partial_costs; - Path *cheapest_partial_path = linitial(input_rel->partial_pathlist); - Size hashaggtablesize; + AggClauseCosts *agg_partial_costs = &extra->agg_partial_costs; + AggClauseCosts *agg_final_costs = &extra->agg_final_costs; + Path *cheapest_partial_path = NULL; + Path *cheapest_total_path = NULL; double dNumPartialGroups = 0; + double dNumPartialPartialGroups = 0; ListCell *lc; + bool can_hash = (extra->flags & GROUPING_CAN_USE_HASH) != 0; + bool can_sort = (extra->flags & GROUPING_CAN_USE_SORT) != 0; + + /* + * Consider whether we should generate partially aggregated non-partial + * paths. We can only do this if we have a non-partial path, and only if + * the parent of the input rel is performing partial partitionwise + * aggregation. (Note that extra->patype is the type of partitionwise + * aggregation being used at the parent level, not this level.) + */ + if (input_rel->pathlist != NIL && + extra->patype == PARTITIONWISE_AGGREGATE_PARTIAL) + cheapest_total_path = input_rel->cheapest_total_path; + + /* + * If parallelism is possible for grouped_rel, then we should consider + * generating partially-grouped partial paths. However, if the input rel + * has no partial paths, then we can't. + */ + if (grouped_rel->consider_parallel && input_rel->partial_pathlist != NIL) + cheapest_partial_path = linitial(input_rel->partial_pathlist); + + /* + * If we can't partially aggregate partial paths, and we can't partially + * aggregate non-partial paths, then don't bother creating the new + * RelOptInfo at all, unless the caller specified force_rel_creation. + */ + if (cheapest_total_path == NULL && + cheapest_partial_path == NULL && + !force_rel_creation) + return NULL; /* * Build a new upper relation to represent the result of partially @@ -6343,6 +6446,7 @@ create_partial_grouping_paths(PlannerInfo *root, grouped_rel->relids); partially_grouped_rel->consider_parallel = grouped_rel->consider_parallel; + partially_grouped_rel->reloptkind = grouped_rel->reloptkind; partially_grouped_rel->serverid = grouped_rel->serverid; partially_grouped_rel->userid = grouped_rel->userid; partially_grouped_rel->useridiscurrent = grouped_rel->useridiscurrent; @@ -6356,39 +6460,53 @@ create_partial_grouping_paths(PlannerInfo *root, */ partially_grouped_rel->reltarget = make_partial_grouping_target(root, grouped_rel->reltarget, - (Node *) parse->havingQual); + extra->havingQual); - /* - * Collect statistics about aggregates for estimating costs of performing - * aggregation in parallel. - */ - MemSet(&agg_partial_costs, 0, sizeof(AggClauseCosts)); - if (parse->hasAggs) + if (!extra->partial_costs_set) { - List *partial_target_exprs; - - /* partial phase */ - partial_target_exprs = partially_grouped_rel->reltarget->exprs; - get_agg_clause_costs(root, (Node *) partial_target_exprs, - AGGSPLIT_INITIAL_SERIAL, - &agg_partial_costs); - - /* final phase */ - get_agg_clause_costs(root, (Node *) grouped_rel->reltarget->exprs, - AGGSPLIT_FINAL_DESERIAL, - agg_final_costs); - get_agg_clause_costs(root, parse->havingQual, - AGGSPLIT_FINAL_DESERIAL, - agg_final_costs); + /* + * Collect statistics about aggregates for estimating costs of + * performing aggregation in parallel. + */ + MemSet(agg_partial_costs, 0, sizeof(AggClauseCosts)); + MemSet(agg_final_costs, 0, sizeof(AggClauseCosts)); + if (parse->hasAggs) + { + List *partial_target_exprs; + + /* partial phase */ + partial_target_exprs = partially_grouped_rel->reltarget->exprs; + get_agg_clause_costs(root, (Node *) partial_target_exprs, + AGGSPLIT_INITIAL_SERIAL, + agg_partial_costs); + + /* final phase */ + get_agg_clause_costs(root, (Node *) grouped_rel->reltarget->exprs, + AGGSPLIT_FINAL_DESERIAL, + agg_final_costs); + get_agg_clause_costs(root, extra->havingQual, + AGGSPLIT_FINAL_DESERIAL, + agg_final_costs); + } + + extra->partial_costs_set = true; } /* Estimate number of partial groups. */ - dNumPartialGroups = get_number_of_groups(root, - cheapest_partial_path->rows, - gd, - parse->targetList); - - if (can_sort) + if (cheapest_total_path != NULL) + dNumPartialGroups = + get_number_of_groups(root, + cheapest_total_path->rows, + gd, + extra->targetList); + if (cheapest_partial_path != NULL) + dNumPartialPartialGroups = + get_number_of_groups(root, + cheapest_partial_path->rows, + gd, + extra->targetList); + + if (can_sort && cheapest_total_path != NULL) { /* This should have been checked previously */ Assert(parse->hasAggs || parse->groupClause); @@ -6397,6 +6515,50 @@ create_partial_grouping_paths(PlannerInfo *root, * Use any available suitably-sorted path as input, and also consider * sorting the cheapest partial path. */ + foreach(lc, input_rel->pathlist) + { + Path *path = (Path *) lfirst(lc); + bool is_sorted; + + is_sorted = pathkeys_contained_in(root->group_pathkeys, + path->pathkeys); + if (path == cheapest_total_path || is_sorted) + { + /* Sort the cheapest partial path, if it isn't already */ + if (!is_sorted) + path = (Path *) create_sort_path(root, + partially_grouped_rel, + path, + root->group_pathkeys, + -1.0); + + if (parse->hasAggs) + add_path(partially_grouped_rel, (Path *) + create_agg_path(root, + partially_grouped_rel, + path, + partially_grouped_rel->reltarget, + parse->groupClause ? AGG_SORTED : AGG_PLAIN, + AGGSPLIT_INITIAL_SERIAL, + parse->groupClause, + NIL, + agg_partial_costs, + dNumPartialGroups)); + else + add_path(partially_grouped_rel, (Path *) + create_group_path(root, + partially_grouped_rel, + path, + parse->groupClause, + NIL, + dNumPartialGroups)); + } + } + } + + if (can_sort && cheapest_partial_path != NULL) + { + /* Similar to above logic, but for partial paths. */ foreach(lc, input_rel->partial_pathlist) { Path *path = (Path *) lfirst(lc); @@ -6424,8 +6586,8 @@ create_partial_grouping_paths(PlannerInfo *root, AGGSPLIT_INITIAL_SERIAL, parse->groupClause, NIL, - &agg_partial_costs, - dNumPartialGroups)); + agg_partial_costs, + dNumPartialPartialGroups)); else add_partial_path(partially_grouped_rel, (Path *) create_group_path(root, @@ -6433,26 +6595,56 @@ create_partial_grouping_paths(PlannerInfo *root, path, parse->groupClause, NIL, - dNumPartialGroups)); + dNumPartialPartialGroups)); } } } - if (can_hash) + if (can_hash && cheapest_total_path != NULL) { + Size hashaggtablesize; + /* Checked above */ Assert(parse->hasAggs || parse->groupClause); hashaggtablesize = - estimate_hashagg_tablesize(cheapest_partial_path, - &agg_partial_costs, + estimate_hashagg_tablesize(cheapest_total_path, + agg_partial_costs, dNumPartialGroups); /* * Tentatively produce a partial HashAgg Path, depending on if it * looks as if the hash table will fit in work_mem. */ - if (hashaggtablesize < work_mem * 1024L) + if (hashaggtablesize < work_mem * 1024L && + cheapest_total_path != NULL) + { + add_path(partially_grouped_rel, (Path *) + create_agg_path(root, + partially_grouped_rel, + cheapest_total_path, + partially_grouped_rel->reltarget, + AGG_HASHED, + AGGSPLIT_INITIAL_SERIAL, + parse->groupClause, + NIL, + agg_partial_costs, + dNumPartialGroups)); + } + } + + if (can_hash && cheapest_partial_path != NULL) + { + Size hashaggtablesize; + + hashaggtablesize = + estimate_hashagg_tablesize(cheapest_partial_path, + agg_partial_costs, + dNumPartialPartialGroups); + + /* Do the same for partial paths. */ + if (hashaggtablesize < work_mem * 1024L && + cheapest_partial_path != NULL) { add_partial_path(partially_grouped_rel, (Path *) create_agg_path(root, @@ -6463,8 +6655,8 @@ create_partial_grouping_paths(PlannerInfo *root, AGGSPLIT_INITIAL_SERIAL, parse->groupClause, NIL, - &agg_partial_costs, - dNumPartialGroups)); + agg_partial_costs, + dNumPartialPartialGroups)); } } @@ -6565,3 +6757,324 @@ can_partial_agg(PlannerInfo *root, const AggClauseCosts *agg_costs) /* Everything looks good. */ return true; } + +/* + * apply_scanjoin_target_to_paths + * + * Applies scan/join target to all the Paths for the scan/join rel. + */ +static void +apply_scanjoin_target_to_paths(PlannerInfo *root, + RelOptInfo *rel, + PathTarget *scanjoin_target, + bool scanjoin_target_parallel_safe, + bool modify_in_place) +{ + ListCell *lc; + + /* + * In principle we should re-run set_cheapest() here to identify the + * cheapest path, but it seems unlikely that adding the same tlist eval + * costs to all the paths would change that, so we don't bother. Instead, + * just assume that the cheapest-startup and cheapest-total paths remain + * so. (There should be no parameterized paths anymore, so we needn't + * worry about updating cheapest_parameterized_paths.) + */ + foreach(lc, rel->pathlist) + { + Path *subpath = (Path *) lfirst(lc); + Path *newpath; + + Assert(subpath->param_info == NULL); + + /* + * Don't use apply_projection_to_path() when modify_in_place is false, + * because there could be other pointers to these paths, and therefore + * we mustn't modify them in place. + */ + if (modify_in_place) + newpath = apply_projection_to_path(root, rel, subpath, + scanjoin_target); + else + newpath = (Path *) create_projection_path(root, rel, subpath, + scanjoin_target); + + /* If we had to add a Result, newpath is different from subpath */ + if (newpath != subpath) + { + lfirst(lc) = newpath; + if (subpath == rel->cheapest_startup_path) + rel->cheapest_startup_path = newpath; + if (subpath == rel->cheapest_total_path) + rel->cheapest_total_path = newpath; + } + } + + /* + * Upper planning steps which make use of the top scan/join rel's partial + * pathlist will expect partial paths for that rel to produce the same + * output as complete paths ... and we just changed the output for the + * complete paths, so we'll need to do the same thing for partial paths. + * But only parallel-safe expressions can be computed by partial paths. + */ + if (rel->partial_pathlist && scanjoin_target_parallel_safe) + { + /* Apply the scan/join target to each partial path */ + foreach(lc, rel->partial_pathlist) + { + Path *subpath = (Path *) lfirst(lc); + Path *newpath; + + /* Shouldn't have any parameterized paths anymore */ + Assert(subpath->param_info == NULL); + + /* + * Don't use apply_projection_to_path() here, because there could + * be other pointers to these paths, and therefore we mustn't + * modify them in place. + */ + newpath = (Path *) create_projection_path(root, + rel, + subpath, + scanjoin_target); + lfirst(lc) = newpath; + } + } + else + { + /* + * In the unfortunate event that scanjoin_target is not parallel-safe, + * we can't apply it to the partial paths; in that case, we'll need to + * forget about the partial paths, which aren't valid input for upper + * planning steps. + */ + rel->partial_pathlist = NIL; + } +} + +/* + * create_partitionwise_grouping_paths + * + * If the partition keys of input relation are part of the GROUP BY clause, all + * the rows belonging to a given group come from a single partition. This + * allows aggregation/grouping over a partitioned relation to be broken down + * into aggregation/grouping on each partition. This should be no worse, and + * often better, than the normal approach. + * + * However, if the GROUP BY clause does not contain all the partition keys, + * rows from a given group may be spread across multiple partitions. In that + * case, we perform partial aggregation for each group, append the results, + * and then finalize aggregation. This is less certain to win than the + * previous case. It may win if the PartialAggregate stage greatly reduces + * the number of groups, because fewer rows will pass through the Append node. + * It may lose if we have lots of small groups. + */ +static void +create_partitionwise_grouping_paths(PlannerInfo *root, + RelOptInfo *input_rel, + RelOptInfo *grouped_rel, + RelOptInfo *partially_grouped_rel, + const AggClauseCosts *agg_costs, + grouping_sets_data *gd, + PartitionwiseAggregateType patype, + GroupPathExtraData *extra) +{ + int nparts = input_rel->nparts; + int cnt_parts; + List *grouped_live_children = NIL; + List *partially_grouped_live_children = NIL; + PathTarget *target = extra->target; + + Assert(patype != PARTITIONWISE_AGGREGATE_NONE); + Assert(patype != PARTITIONWISE_AGGREGATE_PARTIAL || + partially_grouped_rel != NULL); + + /* Add paths for partitionwise aggregation/grouping. */ + for (cnt_parts = 0; cnt_parts < nparts; cnt_parts++) + { + RelOptInfo *child_input_rel = input_rel->part_rels[cnt_parts]; + PathTarget *child_target = copy_pathtarget(target); + AppendRelInfo **appinfos; + int nappinfos; + PathTarget *scanjoin_target; + GroupPathExtraData child_extra; + RelOptInfo *child_grouped_rel; + RelOptInfo *child_partially_grouped_rel; + + /* Input child rel must have a path */ + Assert(child_input_rel->pathlist != NIL); + + /* + * Copy the given "extra" structure as is and then override the + * members specific to this child. + */ + memcpy(&child_extra, extra, sizeof(child_extra)); + + appinfos = find_appinfos_by_relids(root, child_input_rel->relids, + &nappinfos); + + child_target->exprs = (List *) + adjust_appendrel_attrs(root, + (Node *) target->exprs, + nappinfos, appinfos); + child_extra.target = child_target; + + /* Translate havingQual and targetList. */ + child_extra.havingQual = (Node *) + adjust_appendrel_attrs(root, + extra->havingQual, + nappinfos, appinfos); + child_extra.targetList = (List *) + adjust_appendrel_attrs(root, + (Node *) extra->targetList, + nappinfos, appinfos); + + /* + * extra->patype was the value computed for our parent rel; patype is + * the value for this relation. For the child, our value is its + * parent rel's value. + */ + child_extra.patype = patype; + + /* + * Create grouping relation to hold fully aggregated grouping and/or + * aggregation paths for the child. + */ + child_grouped_rel = make_grouping_rel(root, child_input_rel, + child_target, + extra->target_parallel_safe, + child_extra.havingQual); + + /* Ignore empty children. They contribute nothing. */ + if (IS_DUMMY_REL(child_input_rel)) + { + mark_dummy_rel(child_grouped_rel); + + continue; + } + + /* + * Copy pathtarget from underneath scan/join as we are modifying it + * and translate its Vars with respect to this appendrel. The input + * relation's reltarget might not be the final scanjoin_target, but + * the pathtarget any given individual path should be. + */ + scanjoin_target = + copy_pathtarget(input_rel->cheapest_startup_path->pathtarget); + scanjoin_target->exprs = (List *) + adjust_appendrel_attrs(root, + (Node *) scanjoin_target->exprs, + nappinfos, appinfos); + + /* + * Forcibly apply scan/join target to all the Paths for the scan/join + * rel. + */ + apply_scanjoin_target_to_paths(root, child_input_rel, scanjoin_target, + extra->target_parallel_safe, false); + + /* Create grouping paths for this child relation. */ + create_ordinary_grouping_paths(root, child_input_rel, + child_grouped_rel, + agg_costs, gd, &child_extra, + &child_partially_grouped_rel); + + if (child_partially_grouped_rel) + { + partially_grouped_live_children = + lappend(partially_grouped_live_children, + child_partially_grouped_rel); + } + + if (patype == PARTITIONWISE_AGGREGATE_FULL) + { + set_cheapest(child_grouped_rel); + grouped_live_children = lappend(grouped_live_children, + child_grouped_rel); + } + + pfree(appinfos); + } + + /* + * All children can't be dummy at this point. If they are, then the parent + * too marked as dummy. + */ + Assert(grouped_live_children != NIL || + partially_grouped_live_children != NIL); + + /* + * Try to create append paths for partially grouped children. For full + * partitionwise aggregation, we might have paths in the partial_pathlist + * if parallel aggregation is possible. For partial partitionwise + * aggregation, we may have paths in both pathlist and partial_pathlist. + */ + if (partially_grouped_rel) + { + add_paths_to_append_rel(root, partially_grouped_rel, + partially_grouped_live_children); + + /* + * We need call set_cheapest, since the finalization step will use the + * cheapest path from the rel. + */ + if (partially_grouped_rel->pathlist) + set_cheapest(partially_grouped_rel); + } + + /* If possible, create append paths for fully grouped children. */ + if (patype == PARTITIONWISE_AGGREGATE_FULL) + add_paths_to_append_rel(root, grouped_rel, grouped_live_children); +} + +/* + * group_by_has_partkey + * + * Returns true, if all the partition keys of the given relation are part of + * the GROUP BY clauses, false otherwise. + */ +static bool +group_by_has_partkey(RelOptInfo *input_rel, + List *targetList, + List *groupClause) +{ + List *groupexprs = get_sortgrouplist_exprs(groupClause, targetList); + int cnt = 0; + int partnatts; + + /* Input relation should be partitioned. */ + Assert(input_rel->part_scheme); + + /* Rule out early, if there are no partition keys present. */ + if (!input_rel->partexprs) + return false; + + partnatts = input_rel->part_scheme->partnatts; + + for (cnt = 0; cnt < partnatts; cnt++) + { + List *partexprs = input_rel->partexprs[cnt]; + ListCell *lc; + bool found = false; + + foreach(lc, partexprs) + { + Expr *partexpr = lfirst(lc); + + if (list_member(groupexprs, partexpr)) + { + found = true; + break; + } + } + + /* + * If none of the partition key expressions match with any of the + * GROUP BY expression, return false. + */ + if (!found) + return false; + } + + return true; +} |