diff options
Diffstat (limited to 'src/backend/optimizer/path/costsize.c')
-rw-r--r-- | src/backend/optimizer/path/costsize.c | 132 |
1 files changed, 79 insertions, 53 deletions
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index ed07e2f6559..1f510c28198 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -2081,15 +2081,13 @@ cost_group(Path *path, PlannerInfo *root, * 'jointype' is the type of join to be performed * 'outer_path' is the outer input to the join * 'inner_path' is the inner input to the join - * 'sjinfo' is extra info about the join for selectivity estimation - * 'semifactors' contains valid data if jointype is SEMI or ANTI + * 'extra' contains miscellaneous information about the join */ void initial_cost_nestloop(PlannerInfo *root, JoinCostWorkspace *workspace, JoinType jointype, Path *outer_path, Path *inner_path, - SpecialJoinInfo *sjinfo, - SemiAntiJoinFactors *semifactors) + JoinPathExtraData *extra) { Cost startup_cost = 0; Cost run_cost = 0; @@ -2120,10 +2118,12 @@ initial_cost_nestloop(PlannerInfo *root, JoinCostWorkspace *workspace, inner_run_cost = inner_path->total_cost - inner_path->startup_cost; inner_rescan_run_cost = inner_rescan_total_cost - inner_rescan_start_cost; - if (jointype == JOIN_SEMI || jointype == JOIN_ANTI) + if (jointype == JOIN_SEMI || jointype == JOIN_ANTI || + extra->inner_unique) { /* - * SEMI or ANTI join: executor will stop after first match. + * With a SEMI or ANTI join, or if the innerrel is known unique, the + * executor will stop after the first match. * * Getting decent estimates requires inspection of the join quals, * which we choose to postpone to final_cost_nestloop. @@ -2156,14 +2156,12 @@ initial_cost_nestloop(PlannerInfo *root, JoinCostWorkspace *workspace, * * 'path' is already filled in except for the rows and cost fields * 'workspace' is the result from initial_cost_nestloop - * 'sjinfo' is extra info about the join for selectivity estimation - * 'semifactors' contains valid data if path->jointype is SEMI or ANTI + * 'extra' contains miscellaneous information about the join */ void final_cost_nestloop(PlannerInfo *root, NestPath *path, JoinCostWorkspace *workspace, - SpecialJoinInfo *sjinfo, - SemiAntiJoinFactors *semifactors) + JoinPathExtraData *extra) { Path *outer_path = path->outerjoinpath; Path *inner_path = path->innerjoinpath; @@ -2206,10 +2204,12 @@ final_cost_nestloop(PlannerInfo *root, NestPath *path, /* cost of inner-relation source data (we already dealt with outer rel) */ - if (path->jointype == JOIN_SEMI || path->jointype == JOIN_ANTI) + if (path->jointype == JOIN_SEMI || path->jointype == JOIN_ANTI || + extra->inner_unique) { /* - * SEMI or ANTI join: executor will stop after first match. + * With a SEMI or ANTI join, or if the innerrel is known unique, the + * executor will stop after the first match. */ Cost inner_run_cost = workspace->inner_run_cost; Cost inner_rescan_run_cost = workspace->inner_rescan_run_cost; @@ -2225,8 +2225,8 @@ final_cost_nestloop(PlannerInfo *root, NestPath *path, * clamp inner_scan_frac to at most 1.0; but since match_count is at * least 1, no such clamp is needed now.) */ - outer_matched_rows = rint(outer_path_rows * semifactors->outer_match_frac); - inner_scan_frac = 2.0 / (semifactors->match_count + 1.0); + outer_matched_rows = rint(outer_path_rows * extra->semifactors.outer_match_frac); + inner_scan_frac = 2.0 / (extra->semifactors.match_count + 1.0); /* * Compute number of tuples processed (not number emitted!). First, @@ -2350,7 +2350,7 @@ final_cost_nestloop(PlannerInfo *root, NestPath *path, * 'inner_path' is the inner input to the join * 'outersortkeys' is the list of sort keys for the outer path * 'innersortkeys' is the list of sort keys for the inner path - * 'sjinfo' is extra info about the join for selectivity estimation + * 'extra' contains miscellaneous information about the join * * Note: outersortkeys and innersortkeys should be NIL if no explicit * sort is needed because the respective source path is already ordered. @@ -2361,7 +2361,7 @@ initial_cost_mergejoin(PlannerInfo *root, JoinCostWorkspace *workspace, List *mergeclauses, Path *outer_path, Path *inner_path, List *outersortkeys, List *innersortkeys, - SpecialJoinInfo *sjinfo) + JoinPathExtraData *extra) { Cost startup_cost = 0; Cost run_cost = 0; @@ -2562,26 +2562,33 @@ initial_cost_mergejoin(PlannerInfo *root, JoinCostWorkspace *workspace, * final_cost_mergejoin * Final estimate of the cost and result size of a mergejoin path. * - * Unlike other costsize functions, this routine makes one actual decision: - * whether we should materialize the inner path. We do that either because - * the inner path can't support mark/restore, or because it's cheaper to - * use an interposed Material node to handle mark/restore. When the decision - * is cost-based it would be logically cleaner to build and cost two separate - * paths with and without that flag set; but that would require repeating most - * of the cost calculations, which are not all that cheap. Since the choice - * will not affect output pathkeys or startup cost, only total cost, there is - * no possibility of wanting to keep both paths. So it seems best to make - * the decision here and record it in the path's materialize_inner field. + * Unlike other costsize functions, this routine makes two actual decisions: + * whether the executor will need to do mark/restore, and whether we should + * materialize the inner path. It would be logically cleaner to build + * separate paths testing these alternatives, but that would require repeating + * most of the cost calculations, which are not all that cheap. Since the + * choice will not affect output pathkeys or startup cost, only total cost, + * there is no possibility of wanting to keep more than one path. So it seems + * best to make the decisions here and record them in the path's + * skip_mark_restore and materialize_inner fields. + * + * Mark/restore overhead is usually required, but can be skipped if we know + * that the executor need find only one match per outer tuple, and that the + * mergeclauses are sufficient to identify a match. + * + * We materialize the inner path if we need mark/restore and either the inner + * path can't support mark/restore, or it's cheaper to use an interposed + * Material node to handle mark/restore. * * 'path' is already filled in except for the rows and cost fields and - * materialize_inner + * skip_mark_restore and materialize_inner * 'workspace' is the result from initial_cost_mergejoin - * 'sjinfo' is extra info about the join for selectivity estimation + * 'extra' contains miscellaneous information about the join */ void final_cost_mergejoin(PlannerInfo *root, MergePath *path, JoinCostWorkspace *workspace, - SpecialJoinInfo *sjinfo) + JoinPathExtraData *extra) { Path *outer_path = path->jpath.outerjoinpath; Path *inner_path = path->jpath.innerjoinpath; @@ -2641,6 +2648,21 @@ final_cost_mergejoin(PlannerInfo *root, MergePath *path, qp_qual_cost.per_tuple -= merge_qual_cost.per_tuple; /* + * With a SEMI or ANTI join, or if the innerrel is known unique, the + * executor will stop scanning for matches after the first match. When + * all the joinclauses are merge clauses, this means we don't ever need to + * back up the merge, and so we can skip mark/restore overhead. + */ + if ((path->jpath.jointype == JOIN_SEMI || + path->jpath.jointype == JOIN_ANTI || + extra->inner_unique) && + (list_length(path->jpath.joinrestrictinfo) == + list_length(path->path_mergeclauses))) + path->skip_mark_restore = true; + else + path->skip_mark_restore = false; + + /* * Get approx # tuples passing the mergequals. We use approx_tuple_count * here because we need an estimate done with JOIN_INNER semantics. */ @@ -2670,9 +2692,9 @@ final_cost_mergejoin(PlannerInfo *root, MergePath *path, * computations? * * The whole issue is moot if we are working from a unique-ified outer - * input. + * input, or if we know we don't need to mark/restore at all. */ - if (IsA(outer_path, UniquePath)) + if (IsA(outer_path, UniquePath) ||path->skip_mark_restore) rescannedtuples = 0; else { @@ -2712,10 +2734,16 @@ final_cost_mergejoin(PlannerInfo *root, MergePath *path, cpu_operator_cost * inner_path_rows * rescanratio; /* + * If we don't need mark/restore at all, we don't need materialization. + */ + if (path->skip_mark_restore) + path->materialize_inner = false; + + /* * Prefer materializing if it looks cheaper, unless the user has asked to * suppress materialization. */ - if (enable_material && mat_inner_cost < bare_inner_cost) + else if (enable_material && mat_inner_cost < bare_inner_cost) path->materialize_inner = true; /* @@ -2876,16 +2904,14 @@ cached_scansel(PlannerInfo *root, RestrictInfo *rinfo, PathKey *pathkey) * 'hashclauses' is the list of joinclauses to be used as hash clauses * 'outer_path' is the outer input to the join * 'inner_path' is the inner input to the join - * 'sjinfo' is extra info about the join for selectivity estimation - * 'semifactors' contains valid data if jointype is SEMI or ANTI + * 'extra' contains miscellaneous information about the join */ void initial_cost_hashjoin(PlannerInfo *root, JoinCostWorkspace *workspace, JoinType jointype, List *hashclauses, Path *outer_path, Path *inner_path, - SpecialJoinInfo *sjinfo, - SemiAntiJoinFactors *semifactors) + JoinPathExtraData *extra) { Cost startup_cost = 0; Cost run_cost = 0; @@ -2970,14 +2996,12 @@ initial_cost_hashjoin(PlannerInfo *root, JoinCostWorkspace *workspace, * 'path' is already filled in except for the rows and cost fields and * num_batches * 'workspace' is the result from initial_cost_hashjoin - * 'sjinfo' is extra info about the join for selectivity estimation - * 'semifactors' contains valid data if path->jointype is SEMI or ANTI + * 'extra' contains miscellaneous information about the join */ void final_cost_hashjoin(PlannerInfo *root, HashPath *path, JoinCostWorkspace *workspace, - SpecialJoinInfo *sjinfo, - SemiAntiJoinFactors *semifactors) + JoinPathExtraData *extra) { Path *outer_path = path->jpath.outerjoinpath; Path *inner_path = path->jpath.innerjoinpath; @@ -3101,13 +3125,16 @@ final_cost_hashjoin(PlannerInfo *root, HashPath *path, /* CPU costs */ - if (path->jpath.jointype == JOIN_SEMI || path->jpath.jointype == JOIN_ANTI) + if (path->jpath.jointype == JOIN_SEMI || + path->jpath.jointype == JOIN_ANTI || + extra->inner_unique) { double outer_matched_rows; Selectivity inner_scan_frac; /* - * SEMI or ANTI join: executor will stop after first match. + * With a SEMI or ANTI join, or if the innerrel is known unique, the + * executor will stop after the first match. * * For an outer-rel row that has at least one match, we can expect the * bucket scan to stop after a fraction 1/(match_count+1) of the @@ -3117,8 +3144,8 @@ final_cost_hashjoin(PlannerInfo *root, HashPath *path, * to clamp inner_scan_frac to at most 1.0; but since match_count is * at least 1, no such clamp is needed now.) */ - outer_matched_rows = rint(outer_path_rows * semifactors->outer_match_frac); - inner_scan_frac = 2.0 / (semifactors->match_count + 1.0); + outer_matched_rows = rint(outer_path_rows * extra->semifactors.outer_match_frac); + inner_scan_frac = 2.0 / (extra->semifactors.match_count + 1.0); startup_cost += hash_qual_cost.startup; run_cost += hash_qual_cost.per_tuple * outer_matched_rows * @@ -3684,11 +3711,12 @@ get_restriction_qual_cost(PlannerInfo *root, RelOptInfo *baserel, /* * compute_semi_anti_join_factors - * Estimate how much of the inner input a SEMI or ANTI join + * Estimate how much of the inner input a SEMI, ANTI, or inner_unique join * can be expected to scan. * * In a hash or nestloop SEMI/ANTI join, the executor will stop scanning * inner rows as soon as it finds a match to the current outer row. + * The same happens if we have detected the inner rel is unique. * We should therefore adjust some of the cost components for this effect. * This function computes some estimates needed for these adjustments. * These estimates will be the same regardless of the particular paths used @@ -3698,7 +3726,7 @@ get_restriction_qual_cost(PlannerInfo *root, RelOptInfo *baserel, * Input parameters: * outerrel: outer relation under consideration * innerrel: inner relation under consideration - * jointype: must be JOIN_SEMI or JOIN_ANTI + * jointype: if not JOIN_SEMI or JOIN_ANTI, we assume it's inner_unique * sjinfo: SpecialJoinInfo relevant to this join * restrictlist: join quals * Output parameters: @@ -3720,16 +3748,14 @@ compute_semi_anti_join_factors(PlannerInfo *root, List *joinquals; ListCell *l; - /* Should only be called in these cases */ - Assert(jointype == JOIN_SEMI || jointype == JOIN_ANTI); - /* * In an ANTI join, we must ignore clauses that are "pushed down", since * those won't affect the match logic. In a SEMI join, we do not * distinguish joinquals from "pushed down" quals, so just use the whole - * restrictinfo list. + * restrictinfo list. For other outer join types, we should consider only + * non-pushed-down quals, so that this devolves to an IS_OUTER_JOIN check. */ - if (jointype == JOIN_ANTI) + if (IS_OUTER_JOIN(jointype)) { joinquals = NIL; foreach(l, restrictlist) @@ -3749,7 +3775,7 @@ compute_semi_anti_join_factors(PlannerInfo *root, jselec = clauselist_selectivity(root, joinquals, 0, - jointype, + (jointype == JOIN_ANTI) ? JOIN_ANTI : JOIN_SEMI, sjinfo); /* @@ -3776,7 +3802,7 @@ compute_semi_anti_join_factors(PlannerInfo *root, &norm_sjinfo); /* Avoid leaking a lot of ListCells */ - if (jointype == JOIN_ANTI) + if (IS_OUTER_JOIN(jointype)) list_free(joinquals); /* |