diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2012-04-19 15:52:46 -0400 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2012-04-19 15:53:47 -0400 |
commit | 5b7b5518d0ea56c422a197875f7efa5deddbb388 (patch) | |
tree | 1e647989f2f6399fff7fe68a493200ccf9d2b01f /src/backend/optimizer/util/restrictinfo.c | |
parent | cd1f4db4aec0c4b71d2ed0d29bbe388dfcd11527 (diff) |
Revise parameterized-path mechanism to fix assorted issues.
This patch adjusts the treatment of parameterized paths so that all paths
with the same parameterization (same set of required outer rels) for the
same relation will have the same rowcount estimate. We cache the rowcount
estimates to ensure that property, and hopefully save a few cycles too.
Doing this makes it practical for add_path_precheck to operate without
a rowcount estimate: it need only assume that paths with different
parameterizations never dominate each other, which is close enough to
true anyway for coarse filtering, because normally a more-parameterized
path should yield fewer rows thanks to having more join clauses to apply.
In add_path, we do the full nine yards of comparing rowcount estimates
along with everything else, so that we can discard parameterized paths that
don't actually have an advantage. This fixes some issues I'd found with
add_path rejecting parameterized paths on the grounds that they were more
expensive than not-parameterized ones, even though they yielded many fewer
rows and hence would be cheaper once subsequent joining was considered.
To make the same-rowcounts assumption valid, we have to require that any
parameterized path enforce *all* join clauses that could be obtained from
the particular set of outer rels, even if not all of them are useful for
indexing. This is required at both base scans and joins. It's a good
thing anyway since the net impact is that join quals are checked at the
lowest practical level in the join tree. Hence, discard the original
rather ad-hoc mechanism for choosing parameterization joinquals, and build
a better one that has a more principled rule for when clauses can be moved.
The original rule was actually buggy anyway for lack of knowledge about
which relations are part of an outer join's outer side; getting this right
requires adding an outer_relids field to RestrictInfo.
Diffstat (limited to 'src/backend/optimizer/util/restrictinfo.c')
-rw-r--r-- | src/backend/optimizer/util/restrictinfo.c | 161 |
1 files changed, 97 insertions, 64 deletions
diff --git a/src/backend/optimizer/util/restrictinfo.c b/src/backend/optimizer/util/restrictinfo.c index 7d03d91f5d3..48e96ab5d7e 100644 --- a/src/backend/optimizer/util/restrictinfo.c +++ b/src/backend/optimizer/util/restrictinfo.c @@ -26,15 +26,15 @@ static RestrictInfo *make_restrictinfo_internal(Expr *clause, bool outerjoin_delayed, bool pseudoconstant, Relids required_relids, + Relids outer_relids, Relids nullable_relids); static Expr *make_sub_restrictinfos(Expr *clause, bool is_pushed_down, bool outerjoin_delayed, bool pseudoconstant, Relids required_relids, + Relids outer_relids, Relids nullable_relids); -static bool join_clause_is_redundant(RestrictInfo *rinfo, - List *reference_list); /* @@ -43,9 +43,10 @@ static bool join_clause_is_redundant(RestrictInfo *rinfo, * Build a RestrictInfo node containing the given subexpression. * * The is_pushed_down, outerjoin_delayed, and pseudoconstant flags for the - * RestrictInfo must be supplied by the caller, as well as the correct value - * for nullable_relids. required_relids can be NULL, in which case it - * defaults to the actual clause contents (i.e., clause_relids). + * RestrictInfo must be supplied by the caller, as well as the correct values + * for outer_relids and nullable_relids. + * required_relids can be NULL, in which case it defaults to the actual clause + * contents (i.e., clause_relids). * * We initialize fields that depend only on the given subexpression, leaving * others that depend on context (or may never be needed at all) to be filled @@ -57,6 +58,7 @@ make_restrictinfo(Expr *clause, bool outerjoin_delayed, bool pseudoconstant, Relids required_relids, + Relids outer_relids, Relids nullable_relids) { /* @@ -69,6 +71,7 @@ make_restrictinfo(Expr *clause, outerjoin_delayed, pseudoconstant, required_relids, + outer_relids, nullable_relids); /* Shouldn't be an AND clause, else AND/OR flattening messed up */ @@ -80,6 +83,7 @@ make_restrictinfo(Expr *clause, outerjoin_delayed, pseudoconstant, required_relids, + outer_relids, nullable_relids); } @@ -94,8 +98,8 @@ make_restrictinfo(Expr *clause, * RestrictInfos. * * The caller must pass is_pushed_down, but we assume outerjoin_delayed - * and pseudoconstant are false and nullable_relids is NULL (no other - * kind of qual should ever get into a bitmapqual). + * and pseudoconstant are false while outer_relids and nullable_relids + * are NULL (no other kind of qual should ever get into a bitmapqual). * * If include_predicates is true, we add any partial index predicates to * the explicit index quals. When this is not true, we return a condition @@ -227,6 +231,7 @@ make_restrictinfo_from_bitmapqual(Path *bitmapqual, false, false, NULL, + NULL, NULL)); } } @@ -254,6 +259,7 @@ make_restrictinfo_from_bitmapqual(Path *bitmapqual, false, false, NULL, + NULL, NULL)); } } @@ -275,8 +281,9 @@ make_restrictinfo_from_bitmapqual(Path *bitmapqual, * representation after doing transformations of a list of clauses. * * We assume that the clauses are relation-level restrictions and therefore - * we don't have to worry about is_pushed_down, outerjoin_delayed, or - * nullable_relids (these can be assumed true, false, and NULL, respectively). + * we don't have to worry about is_pushed_down, outerjoin_delayed, + * outer_relids, and nullable_relids (these can be assumed true, false, + * NULL, and NULL, respectively). * We do take care to recognize pseudoconstant clauses properly. */ List * @@ -312,6 +319,7 @@ make_restrictinfos_from_actual_clauses(PlannerInfo *root, false, pseudoconstant, NULL, + NULL, NULL); result = lappend(result, rinfo); } @@ -330,6 +338,7 @@ make_restrictinfo_internal(Expr *clause, bool outerjoin_delayed, bool pseudoconstant, Relids required_relids, + Relids outer_relids, Relids nullable_relids) { RestrictInfo *restrictinfo = makeNode(RestrictInfo); @@ -340,6 +349,7 @@ make_restrictinfo_internal(Expr *clause, restrictinfo->outerjoin_delayed = outerjoin_delayed; restrictinfo->pseudoconstant = pseudoconstant; restrictinfo->can_join = false; /* may get set below */ + restrictinfo->outer_relids = outer_relids; restrictinfo->nullable_relids = nullable_relids; /* @@ -427,7 +437,7 @@ make_restrictinfo_internal(Expr *clause, * * The same is_pushed_down, outerjoin_delayed, and pseudoconstant flag * values can be applied to all RestrictInfo nodes in the result. Likewise - * for nullable_relids. + * for outer_relids and nullable_relids. * * The given required_relids are attached to our top-level output, * but any OR-clause constituents are allowed to default to just the @@ -439,6 +449,7 @@ make_sub_restrictinfos(Expr *clause, bool outerjoin_delayed, bool pseudoconstant, Relids required_relids, + Relids outer_relids, Relids nullable_relids) { if (or_clause((Node *) clause)) @@ -453,6 +464,7 @@ make_sub_restrictinfos(Expr *clause, outerjoin_delayed, pseudoconstant, NULL, + outer_relids, nullable_relids)); return (Expr *) make_restrictinfo_internal(clause, make_orclause(orlist), @@ -460,6 +472,7 @@ make_sub_restrictinfos(Expr *clause, outerjoin_delayed, pseudoconstant, required_relids, + outer_relids, nullable_relids); } else if (and_clause((Node *) clause)) @@ -474,6 +487,7 @@ make_sub_restrictinfos(Expr *clause, outerjoin_delayed, pseudoconstant, required_relids, + outer_relids, nullable_relids)); return make_andclause(andlist); } @@ -484,6 +498,7 @@ make_sub_restrictinfos(Expr *clause, outerjoin_delayed, pseudoconstant, required_relids, + outer_relids, nullable_relids); } @@ -620,72 +635,90 @@ extract_actual_join_clauses(List *restrictinfo_list, /* - * select_nonredundant_join_clauses - * Select the members of restrictinfo_list that are not redundant with - * any member of reference_list. - * - * Given a list of RestrictInfo clauses that are to be applied in a join, - * select the ones that are not redundant with any clause that's listed in - * reference_list. This is used, for example, to avoid checking joinclauses - * again at a nestloop join when they've already been enforced by a - * parameterized inner path. - * - * "Redundant" means either equal() or derived from the same EquivalenceClass. - * We have to check the latter because indxpath.c may select different derived - * clauses than were selected by generate_join_implied_equalities(). - * - * Note that we are *not* checking for local redundancies within the given - * restrictinfo_list; that should have been handled elsewhere. + * join_clause_is_movable_to + * Test whether a join clause is a safe candidate for parameterization + * of a scan on the specified base relation. + * + * A movable join clause is one that can safely be evaluated at a rel below + * its normal semantic level (ie, its required_relids), if the values of + * variables that it would need from other rels are provided. + * + * We insist that the clause actually reference the target relation; this + * prevents undesirable movement of degenerate join clauses, and ensures + * that there is a unique place that a clause can be moved down to. + * + * We cannot move an outer-join clause into the non-nullable side of its + * outer join, as that would change the results (rows would be suppressed + * rather than being null-extended). + * + * And the target relation must not be in the clause's nullable_relids, i.e., + * there must not be an outer join below the clause that would null the Vars + * coming from the target relation. Otherwise the clause might give results + * different from what it would give at its normal semantic level. */ -List * -select_nonredundant_join_clauses(List *restrictinfo_list, - List *reference_list) +bool +join_clause_is_movable_to(RestrictInfo *rinfo, Index baserelid) { - List *result = NIL; - ListCell *item; - - /* Quick out if nothing could be removed */ - if (reference_list == NIL) - return restrictinfo_list; - - foreach(item, restrictinfo_list) - { - RestrictInfo *rinfo = (RestrictInfo *) lfirst(item); + /* Clause must physically reference target rel */ + if (!bms_is_member(baserelid, rinfo->clause_relids)) + return false; - /* drop it if redundant with any reference clause */ - if (join_clause_is_redundant(rinfo, reference_list)) - continue; + /* Cannot move an outer-join clause into the join's outer side */ + if (bms_is_member(baserelid, rinfo->outer_relids)) + return false; - /* otherwise, add it to result list */ - result = lappend(result, rinfo); - } + /* Target rel must not be nullable below the clause */ + if (bms_is_member(baserelid, rinfo->nullable_relids)) + return false; - return result; + return true; } /* - * join_clause_is_redundant - * Test whether rinfo is redundant with any clause in reference_list. + * join_clause_is_movable_into + * Test whether a join clause is movable and can be evaluated within + * the current join context. + * + * currentrelids: the relids of the proposed evaluation location + * current_and_outer: the union of currentrelids and the required_outer + * relids (parameterization's outer relations) + * + * The API would be a bit clearer if we passed the current relids and the + * outer relids separately and did bms_union internally; but since most + * callers need to apply this function to multiple clauses, we make the + * caller perform the union. + * + * Obviously, the clause must only refer to Vars available from the current + * relation plus the outer rels. We also check that it does reference at + * least one current Var, ensuring that the clause will be pushed down to + * a unique place in a parameterized join tree. And we check that we're + * not pushing the clause into its outer-join outer side, nor down into + * a lower outer join's inner side. + * + * Note: get_joinrel_parampathinfo depends on the fact that if + * current_and_outer is NULL, this function will always return false + * (since one or the other of the first two tests must fail). */ -static bool -join_clause_is_redundant(RestrictInfo *rinfo, - List *reference_list) +bool +join_clause_is_movable_into(RestrictInfo *rinfo, + Relids currentrelids, + Relids current_and_outer) { - ListCell *refitem; + /* Clause must be evaluatable given available context */ + if (!bms_is_subset(rinfo->clause_relids, current_and_outer)) + return false; - foreach(refitem, reference_list) - { - RestrictInfo *refrinfo = (RestrictInfo *) lfirst(refitem); + /* Clause must physically reference target rel(s) */ + if (!bms_overlap(currentrelids, rinfo->clause_relids)) + return false; - /* always consider exact duplicates redundant */ - if (equal(rinfo, refrinfo)) - return true; + /* Cannot move an outer-join clause into the join's outer side */ + if (bms_overlap(currentrelids, rinfo->outer_relids)) + return false; - /* check if derived from same EquivalenceClass */ - if (rinfo->parent_ec != NULL && - rinfo->parent_ec == refrinfo->parent_ec) - return true; - } + /* Target rel(s) must not be nullable below the clause */ + if (bms_overlap(currentrelids, rinfo->nullable_relids)) + return false; - return false; + return true; } |