diff options
Diffstat (limited to 'src/backend/optimizer/path/joinrels.c')
-rw-r--r-- | src/backend/optimizer/path/joinrels.c | 193 |
1 files changed, 193 insertions, 0 deletions
diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c index 535248aa525..43b84d239ed 100644 --- a/src/backend/optimizer/path/joinrels.c +++ b/src/backend/optimizer/path/joinrels.c @@ -16,6 +16,7 @@ #include "miscadmin.h" #include "optimizer/appendinfo.h" +#include "optimizer/cost.h" #include "optimizer/joininfo.h" #include "optimizer/pathnode.h" #include "optimizer/paths.h" @@ -36,6 +37,9 @@ static bool has_legal_joinclause(PlannerInfo *root, RelOptInfo *rel); static bool restriction_is_constant_false(List *restrictlist, RelOptInfo *joinrel, bool only_pushed_down); +static void make_grouped_join_rel(PlannerInfo *root, RelOptInfo *rel1, + RelOptInfo *rel2, RelOptInfo *joinrel, + SpecialJoinInfo *sjinfo, List *restrictlist); static void populate_joinrel_with_paths(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2, RelOptInfo *joinrel, SpecialJoinInfo *sjinfo, List *restrictlist); @@ -762,6 +766,10 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2) return joinrel; } + /* Build a grouped join relation for 'joinrel' if possible. */ + make_grouped_join_rel(root, rel1, rel2, joinrel, sjinfo, + restrictlist); + /* Add paths to the join relation. */ populate_joinrel_with_paths(root, rel1, rel2, joinrel, sjinfo, restrictlist); @@ -874,6 +882,186 @@ add_outer_joins_to_relids(PlannerInfo *root, Relids input_relids, } /* + * make_grouped_join_rel + * Build a grouped join relation for the given "joinrel" if eager + * aggregation is applicable and the resulting grouped paths are considered + * useful. + * + * There are two strategies for generating grouped paths for a join relation: + * + * 1. Join a grouped (partially aggregated) input relation with a non-grouped + * input (e.g., AGG(B) JOIN A). + * + * 2. Apply partial aggregation (sorted or hashed) on top of existing + * non-grouped join paths (e.g., AGG(A JOIN B)). + * + * To limit planning effort and avoid an explosion of alternatives, we adopt a + * strategy where partial aggregation is only pushed to the lowest possible + * level in the join tree that is deemed useful. That is, if grouped paths can + * be built using the first strategy, we skip consideration of the second + * strategy for the same join level. + * + * Additionally, if there are multiple lowest useful levels where partial + * aggregation could be applied, such as in a join tree with relations A, B, + * and C where both "AGG(A JOIN B) JOIN C" and "A JOIN AGG(B JOIN C)" are valid + * placements, we choose only the first one encountered during join search. + * This avoids generating multiple versions of the same grouped relation based + * on different aggregation placements. + * + * These heuristics also ensure that all grouped paths for the same grouped + * relation produce the same set of rows, which is a basic assumption in the + * planner. + */ +static void +make_grouped_join_rel(PlannerInfo *root, RelOptInfo *rel1, + RelOptInfo *rel2, RelOptInfo *joinrel, + SpecialJoinInfo *sjinfo, List *restrictlist) +{ + RelOptInfo *grouped_rel; + RelOptInfo *grouped_rel1; + RelOptInfo *grouped_rel2; + bool rel1_empty; + bool rel2_empty; + Relids agg_apply_at; + + /* + * If there are no aggregate expressions or grouping expressions, eager + * aggregation is not possible. + */ + if (root->agg_clause_list == NIL || + root->group_expr_list == NIL) + return; + + /* Retrieve the grouped relations for the two input rels */ + grouped_rel1 = rel1->grouped_rel; + grouped_rel2 = rel2->grouped_rel; + + rel1_empty = (grouped_rel1 == NULL || IS_DUMMY_REL(grouped_rel1)); + rel2_empty = (grouped_rel2 == NULL || IS_DUMMY_REL(grouped_rel2)); + + /* Find or construct a grouped joinrel for this joinrel */ + grouped_rel = joinrel->grouped_rel; + if (grouped_rel == NULL) + { + RelAggInfo *agg_info = NULL; + + /* + * Prepare the information needed to create grouped paths for this + * join relation. + */ + agg_info = create_rel_agg_info(root, joinrel, rel1_empty == rel2_empty); + if (agg_info == NULL) + return; + + /* + * If grouped paths for the given join relation are not considered + * useful, and no grouped paths can be built by joining grouped input + * relations, skip building the grouped join relation. + */ + if (!agg_info->agg_useful && + (rel1_empty == rel2_empty)) + return; + + /* build the grouped relation */ + grouped_rel = build_grouped_rel(root, joinrel); + grouped_rel->reltarget = agg_info->target; + + if (rel1_empty != rel2_empty) + { + /* + * If there is exactly one grouped input relation, then we can + * build grouped paths by joining the input relations. Set size + * estimates for the grouped join relation based on the input + * relations, and update the set of relids where partial + * aggregation is applied to that of the grouped input relation. + */ + set_joinrel_size_estimates(root, grouped_rel, + rel1_empty ? rel1 : grouped_rel1, + rel2_empty ? rel2 : grouped_rel2, + sjinfo, restrictlist); + agg_info->apply_at = rel1_empty ? + grouped_rel2->agg_info->apply_at : + grouped_rel1->agg_info->apply_at; + } + else + { + /* + * Otherwise, grouped paths can be built by applying partial + * aggregation on top of existing non-grouped join paths. Set + * size estimates for the grouped join relation based on the + * estimated number of groups, and track the set of relids where + * partial aggregation is applied. Note that these values may be + * updated later if it is determined that grouped paths can be + * constructed by joining other input relations. + */ + grouped_rel->rows = agg_info->grouped_rows; + agg_info->apply_at = bms_copy(joinrel->relids); + } + + grouped_rel->agg_info = agg_info; + joinrel->grouped_rel = grouped_rel; + } + + Assert(IS_GROUPED_REL(grouped_rel)); + + /* We may have already proven this grouped join relation to be dummy. */ + if (IS_DUMMY_REL(grouped_rel)) + return; + + /* + * Nothing to do if there's no grouped input relation. Also, joining two + * grouped relations is not currently supported. + */ + if (rel1_empty == rel2_empty) + return; + + /* + * Get the set of relids where partial aggregation is applied among the + * given input relations. + */ + agg_apply_at = rel1_empty ? + grouped_rel2->agg_info->apply_at : + grouped_rel1->agg_info->apply_at; + + /* + * If it's not the designated level, skip building grouped paths. + * + * One exception is when it is a subset of the previously recorded level. + * In that case, we need to update the designated level to this one, and + * adjust the size estimates for the grouped join relation accordingly. + * For example, suppose partial aggregation can be applied on top of (B + * JOIN C). If we first construct the join as ((A JOIN B) JOIN C), we'd + * record the designated level as including all three relations (A B C). + * Later, when we consider (A JOIN (B JOIN C)), we encounter the smaller + * (B C) join level directly. Since this is a subset of the previous + * level and still valid for partial aggregation, we update the designated + * level to (B C), and adjust the size estimates accordingly. + */ + if (!bms_equal(agg_apply_at, grouped_rel->agg_info->apply_at)) + { + if (bms_is_subset(agg_apply_at, grouped_rel->agg_info->apply_at)) + { + /* Adjust the size estimates for the grouped join relation. */ + set_joinrel_size_estimates(root, grouped_rel, + rel1_empty ? rel1 : grouped_rel1, + rel2_empty ? rel2 : grouped_rel2, + sjinfo, restrictlist); + grouped_rel->agg_info->apply_at = agg_apply_at; + } + else + return; + } + + /* Make paths for the grouped join relation. */ + populate_joinrel_with_paths(root, + rel1_empty ? rel1 : grouped_rel1, + rel2_empty ? rel2 : grouped_rel2, + grouped_rel, + sjinfo, + restrictlist); +} + +/* * populate_joinrel_with_paths * Add paths to the given joinrel for given pair of joining relations. The * SpecialJoinInfo provides details about the join and the restrictlist @@ -1615,6 +1803,11 @@ try_partitionwise_join(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2, adjust_child_relids(joinrel->relids, nappinfos, appinfos))); + /* Build a grouped join relation for 'child_joinrel' if possible */ + make_grouped_join_rel(root, child_rel1, child_rel2, + child_joinrel, child_sjinfo, + child_restrictlist); + /* And make paths for the child join */ populate_joinrel_with_paths(root, child_rel1, child_rel2, child_joinrel, child_sjinfo, |