diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2012-08-29 22:05:27 -0400 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2012-08-29 22:06:07 -0400 |
commit | e83bb10d6dcf05a666d4ada00d9788c7974ad378 (patch) | |
tree | 94179afff67c5fb7b424ead5e46a75ab227851ed /src/backend/optimizer/path/joinpath.c | |
parent | 9fe6da5c0d8e70c9a541c3e1e0d637ab19c75ac1 (diff) |
Adjust definition of cheapest_total_path to work better with LATERAL.
In the initial cut at LATERAL, I kept the rule that cheapest_total_path
was always unparameterized, which meant it had to be NULL if the relation
has no unparameterized paths. It turns out to work much more nicely if
we always have *some* path nominated as cheapest-total for each relation.
In particular, let's still say it's the cheapest unparameterized path if
there is one; if not, take the cheapest-total-cost path among those of
the minimum available parameterization. (The first rule is actually
a special case of the second.)
This allows reversion of some temporary lobotomizations I'd put in place.
In particular, the planner can now consider hash and merge joins for
joins below a parameter-supplying nestloop, even if there aren't any
unparameterized paths available. This should bring planning of
LATERAL-containing queries to the same level as queries not using that
feature.
Along the way, simplify management of parameterized paths in add_path()
and friends. In the original coding for parameterized paths in 9.2,
I tried to minimize the logic changes in add_path(), so it just treated
parameterization as yet another dimension of comparison for paths.
We later made it ignore pathkeys (sort ordering) of parameterized paths,
on the grounds that ordering isn't a useful property for the path on the
inside of a nestloop, so we might as well get rid of useless parameterized
paths as quickly as possible. But we didn't take that reasoning as far as
we should have. Startup cost isn't a useful property inside a nestloop
either, so add_path() ought to discount startup cost of parameterized paths
as well. Having done that, the secondary sorting I'd implemented (in
add_parameterized_path) is no longer needed --- any parameterized path that
survives add_path() at all is worth considering at higher levels. So this
should be a bit faster as well as simpler.
Diffstat (limited to 'src/backend/optimizer/path/joinpath.c')
-rw-r--r-- | src/backend/optimizer/path/joinpath.c | 85 |
1 files changed, 52 insertions, 33 deletions
diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c index d87ba464014..5a17cb05d8a 100644 --- a/src/backend/optimizer/path/joinpath.c +++ b/src/backend/optimizer/path/joinpath.c @@ -22,6 +22,9 @@ #include "optimizer/paths.h" +#define PATH_PARAM_BY_REL(path, rel) \ + ((path)->param_info && bms_overlap(PATH_REQ_OUTER(path), (rel)->relids)) + static void sort_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, List *restrictlist, List *mergeclause_list, @@ -503,18 +506,24 @@ sort_inner_and_outer(PlannerInfo *root, * cheapest-startup-cost input paths later, and only if they don't need a * sort. * - * This function intentionally does not consider parameterized input paths - * (implicit in the fact that it only looks at cheapest_total_path, which - * is always unparameterized). If we did so, we'd have a combinatorial - * explosion of mergejoin paths of dubious value. This interacts with - * decisions elsewhere that also discriminate against mergejoins with - * parameterized inputs; see comments in src/backend/optimizer/README. + * This function intentionally does not consider parameterized input + * paths, except when the cheapest-total is parameterized. If we did so, + * we'd have a combinatorial explosion of mergejoin paths of dubious + * value. This interacts with decisions elsewhere that also discriminate + * against mergejoins with parameterized inputs; see comments in + * src/backend/optimizer/README. */ outer_path = outerrel->cheapest_total_path; inner_path = innerrel->cheapest_total_path; - /* Punt if either rel has only parameterized paths */ - if (!outer_path || !inner_path) + /* + * If either cheapest-total path is parameterized by the other rel, we + * can't use a mergejoin. (There's no use looking for alternative input + * paths, since these should already be the least-parameterized available + * paths.) + */ + if (PATH_PARAM_BY_REL(outer_path, innerrel) || + PATH_PARAM_BY_REL(inner_path, outerrel)) return; /* @@ -715,15 +724,22 @@ match_unsorted_outer(PlannerInfo *root, } /* + * If inner_cheapest_total is parameterized by the outer rel, ignore it; + * we will consider it below as a member of cheapest_parameterized_paths, + * but the other possibilities considered in this routine aren't usable. + */ + if (PATH_PARAM_BY_REL(inner_cheapest_total, outerrel)) + inner_cheapest_total = NULL; + + /* * If we need to unique-ify the inner path, we will consider only the * cheapest-total inner. */ if (save_jointype == JOIN_UNIQUE_INNER) { - /* XXX for the moment, don't crash on LATERAL --- rethink this */ + /* No way to do this with an inner path parameterized by outer rel */ if (inner_cheapest_total == NULL) return; - inner_cheapest_total = (Path *) create_unique_path(root, innerrel, inner_cheapest_total, sjinfo); Assert(inner_cheapest_total); @@ -756,15 +772,13 @@ match_unsorted_outer(PlannerInfo *root, /* * We cannot use an outer path that is parameterized by the inner rel. */ - if (bms_overlap(PATH_REQ_OUTER(outerpath), innerrel->relids)) + if (PATH_PARAM_BY_REL(outerpath, innerrel)) continue; /* * If we need to unique-ify the outer path, it's pointless to consider * any but the cheapest outer. (XXX we don't consider parameterized * outers, nor inners, for unique-ified cases. Should we?) - * - * XXX does nothing for LATERAL, rethink */ if (save_jointype == JOIN_UNIQUE_OUTER) { @@ -844,8 +858,8 @@ match_unsorted_outer(PlannerInfo *root, if (save_jointype == JOIN_UNIQUE_OUTER) continue; - /* Can't do anything else if inner has no unparameterized paths */ - if (!inner_cheapest_total) + /* Can't do anything else if inner rel is parameterized by outer */ + if (inner_cheapest_total == NULL) continue; /* Look for useful mergeclauses (if any) */ @@ -1126,10 +1140,14 @@ hash_inner_and_outer(PlannerInfo *root, Path *cheapest_total_outer = outerrel->cheapest_total_path; Path *cheapest_total_inner = innerrel->cheapest_total_path; - /* Punt if either rel has only parameterized paths */ - if (!cheapest_startup_outer || - !cheapest_total_outer || - !cheapest_total_inner) + /* + * If either cheapest-total path is parameterized by the other rel, we + * can't use a hashjoin. (There's no use looking for alternative + * input paths, since these should already be the least-parameterized + * available paths.) + */ + if (PATH_PARAM_BY_REL(cheapest_total_outer, innerrel) || + PATH_PARAM_BY_REL(cheapest_total_inner, outerrel)) return; /* Unique-ify if need be; we ignore parameterized possibilities */ @@ -1169,7 +1187,8 @@ hash_inner_and_outer(PlannerInfo *root, cheapest_total_inner, restrictlist, hashclauses); - if (cheapest_startup_outer != cheapest_total_outer) + if (cheapest_startup_outer != NULL && + cheapest_startup_outer != cheapest_total_outer) try_hashjoin_path(root, joinrel, jointype, @@ -1193,16 +1212,17 @@ hash_inner_and_outer(PlannerInfo *root, ListCell *lc1; ListCell *lc2; - try_hashjoin_path(root, - joinrel, - jointype, - sjinfo, - semifactors, - param_source_rels, - cheapest_startup_outer, - cheapest_total_inner, - restrictlist, - hashclauses); + if (cheapest_startup_outer != NULL) + try_hashjoin_path(root, + joinrel, + jointype, + sjinfo, + semifactors, + param_source_rels, + cheapest_startup_outer, + cheapest_total_inner, + restrictlist, + hashclauses); foreach(lc1, outerrel->cheapest_parameterized_paths) { @@ -1212,7 +1232,7 @@ hash_inner_and_outer(PlannerInfo *root, * We cannot use an outer path that is parameterized by the * inner rel. */ - if (bms_overlap(PATH_REQ_OUTER(outerpath), innerrel->relids)) + if (PATH_PARAM_BY_REL(outerpath, innerrel)) continue; foreach(lc2, innerrel->cheapest_parameterized_paths) @@ -1223,8 +1243,7 @@ hash_inner_and_outer(PlannerInfo *root, * We cannot use an inner path that is parameterized by * the outer rel, either. */ - if (bms_overlap(PATH_REQ_OUTER(innerpath), - outerrel->relids)) + if (PATH_PARAM_BY_REL(innerpath, outerrel)) continue; if (outerpath == cheapest_startup_outer && |