diff options
Diffstat (limited to 'src/backend/optimizer/path')
| -rw-r--r-- | src/backend/optimizer/path/allpaths.c | 4 | ||||
| -rw-r--r-- | src/backend/optimizer/path/clausesel.c | 7 | ||||
| -rw-r--r-- | src/backend/optimizer/path/costsize.c | 180 | ||||
| -rw-r--r-- | src/backend/optimizer/path/joinpath.c | 72 | ||||
| -rw-r--r-- | src/backend/optimizer/path/tidpath.c | 12 |
5 files changed, 191 insertions, 84 deletions
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index afb3259e736..bdc1c033296 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/path/allpaths.c,v 1.74 2001/05/20 20:28:18 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/path/allpaths.c,v 1.75 2001/06/05 05:26:04 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -223,7 +223,7 @@ set_plain_rel_pathlist(Query *root, RelOptInfo *rel, RangeTblEntry *rte) */ /* Consider sequential scan */ - add_path(rel, create_seqscan_path(rel)); + add_path(rel, create_seqscan_path(root, rel)); /* Consider TID scans */ create_tidscan_paths(root, rel); diff --git a/src/backend/optimizer/path/clausesel.c b/src/backend/optimizer/path/clausesel.c index 78407fb833a..cafc01fc33f 100644 --- a/src/backend/optimizer/path/clausesel.c +++ b/src/backend/optimizer/path/clausesel.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/path/clausesel.c,v 1.44 2001/05/20 20:28:18 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/path/clausesel.c,v 1.45 2001/06/05 05:26:04 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -48,9 +48,6 @@ typedef struct RangeQueryClause static void addRangeClause(RangeQueryClause **rqlist, Node *clause, bool varonleft, bool isLTsel, Selectivity s2); -static Selectivity clause_selectivity(Query *root, - Node *clause, - int varRelid); /**************************************************************************** @@ -364,7 +361,7 @@ addRangeClause(RangeQueryClause **rqlist, Node *clause, * When varRelid is 0, all variables are treated as variables. This * is appropriate for ordinary join clauses and restriction clauses. */ -static Selectivity +Selectivity clause_selectivity(Query *root, Node *clause, int varRelid) diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index b4379e4b39b..65c211deaee 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -42,7 +42,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.74 2001/05/20 20:28:18 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.75 2001/06/05 05:26:04 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -83,7 +83,9 @@ bool enable_mergejoin = true; bool enable_hashjoin = true; +static Selectivity estimate_hash_bucketsize(Query *root, Var *var); static bool cost_qual_eval_walker(Node *node, Cost *total); +static Selectivity approx_selectivity(Query *root, List *quals); static void set_rel_width(Query *root, RelOptInfo *rel); static double relation_byte_size(double tuples, int width); static double page_size(double tuples, int width); @@ -99,7 +101,8 @@ static double page_size(double tuples, int width); * parameters, even though much of it could be extracted from the Path. */ void -cost_seqscan(Path *path, RelOptInfo *baserel) +cost_seqscan(Path *path, Query *root, + RelOptInfo *baserel) { Cost startup_cost = 0; Cost run_cost = 0; @@ -356,10 +359,11 @@ cost_index(Path *path, Query *root, /* * cost_tidscan - * Determines and returns the cost of scanning a relation using tid-s. + * Determines and returns the cost of scanning a relation using TIDs. */ void -cost_tidscan(Path *path, RelOptInfo *baserel, List *tideval) +cost_tidscan(Path *path, Query *root, + RelOptInfo *baserel, List *tideval) { Cost startup_cost = 0; Cost run_cost = 0; @@ -417,7 +421,8 @@ cost_tidscan(Path *path, RelOptInfo *baserel, List *tideval) * but if it ever does, it should react gracefully to lack of key data. */ void -cost_sort(Path *path, List *pathkeys, double tuples, int width) +cost_sort(Path *path, Query *root, + List *pathkeys, double tuples, int width) { Cost startup_cost = 0; Cost run_cost = 0; @@ -479,7 +484,7 @@ cost_sort(Path *path, List *pathkeys, double tuples, int width) * 'restrictlist' are the RestrictInfo nodes to be applied at the join */ void -cost_nestloop(Path *path, +cost_nestloop(Path *path, Query *root, Path *outer_path, Path *inner_path, List *restrictlist) @@ -510,7 +515,8 @@ cost_nestloop(Path *path, run_cost += outer_path->parent->rows * (inner_path->total_cost - inner_path->startup_cost); if (outer_path->parent->rows > 1) - run_cost += (outer_path->parent->rows - 1) * inner_path->startup_cost; + run_cost += (outer_path->parent->rows - 1) * + inner_path->startup_cost * 0.5; /* * Number of tuples processed (not number emitted!). If inner path is @@ -540,15 +546,18 @@ cost_nestloop(Path *path, * 'outer_path' is the path for the outer relation * 'inner_path' is the path for the inner relation * 'restrictlist' are the RestrictInfo nodes to be applied at the join + * 'mergeclauses' are the RestrictInfo nodes to use as merge clauses + * (this should be a subset of the restrictlist) * 'outersortkeys' and 'innersortkeys' are lists of the keys to be used * to sort the outer and inner relations, or NIL if no explicit * sort is needed because the source path is already ordered */ void -cost_mergejoin(Path *path, +cost_mergejoin(Path *path, Query *root, Path *outer_path, Path *inner_path, List *restrictlist, + List *mergeclauses, List *outersortkeys, List *innersortkeys) { @@ -573,6 +582,7 @@ cost_mergejoin(Path *path, { startup_cost += outer_path->total_cost; cost_sort(&sort_path, + root, outersortkeys, outer_path->parent->rows, outer_path->parent->width); @@ -589,6 +599,7 @@ cost_mergejoin(Path *path, { startup_cost += inner_path->total_cost; cost_sort(&sort_path, + root, innersortkeys, inner_path->parent->rows, inner_path->parent->width); @@ -602,12 +613,24 @@ cost_mergejoin(Path *path, } /* - * Estimate the number of tuples to be processed in the mergejoin - * itself as one per tuple in the two source relations. This could be - * a drastic underestimate if there are many equal-keyed tuples in - * either relation, but we have no good way of estimating that... + * The number of tuple comparisons needed depends drastically on the + * number of equal keys in the two source relations, which we have no + * good way of estimating. Somewhat arbitrarily, we charge one + * tuple comparison (one cpu_operator_cost) for each tuple in the + * two source relations. This is probably a lower bound. */ - ntuples = outer_path->parent->rows + inner_path->parent->rows; + run_cost += cpu_operator_cost * + (outer_path->parent->rows + inner_path->parent->rows); + + /* + * For each tuple that gets through the mergejoin proper, we charge + * cpu_tuple_cost plus the cost of evaluating additional restriction + * clauses that are to be applied at the join. It's OK to use an + * approximate selectivity here, since in most cases this is a minor + * component of the cost. + */ + ntuples = approx_selectivity(root, mergeclauses) * + outer_path->parent->rows * inner_path->parent->rows; /* CPU costs */ cpu_per_tuple = cpu_tuple_cost + cost_qual_eval(restrictlist); @@ -625,15 +648,15 @@ cost_mergejoin(Path *path, * 'outer_path' is the path for the outer relation * 'inner_path' is the path for the inner relation * 'restrictlist' are the RestrictInfo nodes to be applied at the join - * 'innerbucketsize' is an estimate of the bucketsize statistic - * for the inner hash key. + * 'hashclauses' is a list of the hash join clause (always a 1-element list) + * (this should be a subset of the restrictlist) */ void -cost_hashjoin(Path *path, +cost_hashjoin(Path *path, Query *root, Path *outer_path, Path *inner_path, List *restrictlist, - Selectivity innerbucketsize) + List *hashclauses) { Cost startup_cost = 0; Cost run_cost = 0; @@ -644,6 +667,10 @@ cost_hashjoin(Path *path, double innerbytes = relation_byte_size(inner_path->parent->rows, inner_path->parent->width); long hashtablebytes = SortMem * 1024L; + RestrictInfo *restrictinfo; + Var *left, + *right; + Selectivity innerbucketsize; if (!enable_hashjoin) startup_cost += disable_cost; @@ -658,6 +685,46 @@ cost_hashjoin(Path *path, run_cost += cpu_operator_cost * outer_path->parent->rows; /* + * Determine bucketsize fraction for inner relation. First we have + * to figure out which side of the hashjoin clause is the inner side. + */ + Assert(length(hashclauses) == 1); + Assert(IsA(lfirst(hashclauses), RestrictInfo)); + restrictinfo = (RestrictInfo *) lfirst(hashclauses); + /* these must be OK, since check_hashjoinable accepted the clause */ + left = get_leftop(restrictinfo->clause); + right = get_rightop(restrictinfo->clause); + + /* + * Since we tend to visit the same clauses over and over when + * planning a large query, we cache the bucketsize estimate in + * the RestrictInfo node to avoid repeated lookups of statistics. + */ + if (intMember(right->varno, inner_path->parent->relids)) + { + /* righthand side is inner */ + innerbucketsize = restrictinfo->right_bucketsize; + if (innerbucketsize < 0) + { + /* not cached yet */ + innerbucketsize = estimate_hash_bucketsize(root, right); + restrictinfo->right_bucketsize = innerbucketsize; + } + } + else + { + Assert(intMember(left->varno, inner_path->parent->relids)); + /* lefthand side is inner */ + innerbucketsize = restrictinfo->left_bucketsize; + if (innerbucketsize < 0) + { + /* not cached yet */ + innerbucketsize = estimate_hash_bucketsize(root, left); + restrictinfo->left_bucketsize = innerbucketsize; + } + } + + /* * The number of tuple comparisons needed is the number of outer * tuples times the typical number of tuples in a hash bucket, * which is the inner relation size times its bucketsize fraction. @@ -667,14 +734,14 @@ cost_hashjoin(Path *path, ceil(inner_path->parent->rows * innerbucketsize); /* - * Estimate the number of tuples that get through the hashing filter - * as one per tuple in the two source relations. This could be a - * drastic underestimate if there are many equal-keyed tuples in - * either relation, but we have no simple way of estimating that; - * and since this is only a second-order parameter, it's probably - * not worth expending a lot of effort on the estimate. + * For each tuple that gets through the hashjoin proper, we charge + * cpu_tuple_cost plus the cost of evaluating additional restriction + * clauses that are to be applied at the join. It's OK to use an + * approximate selectivity here, since in most cases this is a minor + * component of the cost. */ - ntuples = outer_path->parent->rows + inner_path->parent->rows; + ntuples = approx_selectivity(root, hashclauses) * + outer_path->parent->rows * inner_path->parent->rows; /* CPU costs */ cpu_per_tuple = cpu_tuple_cost + cost_qual_eval(restrictlist); @@ -718,10 +785,6 @@ cost_hashjoin(Path *path, * divided by total tuples in relation) if the specified Var is used * as a hash key. * - * This statistic is used by cost_hashjoin. We split out the calculation - * because it's useful to cache the result for re-use across multiple path - * cost calculations. - * * XXX This is really pretty bogus since we're effectively assuming that the * distribution of hash keys will be the same after applying restriction * clauses as it was in the underlying relation. However, we are not nearly @@ -747,7 +810,7 @@ cost_hashjoin(Path *path, * which is what we want. We do not want to hash unless we know that the * inner rel is well-dispersed (or the alternatives seem much worse). */ -Selectivity +static Selectivity estimate_hash_bucketsize(Query *root, Var *var) { Oid relid; @@ -1001,6 +1064,65 @@ cost_qual_eval_walker(Node *node, Cost *total) /* + * approx_selectivity + * Quick-and-dirty estimation of clause selectivities. + * The input can be either an implicitly-ANDed list of boolean + * expressions, or a list of RestrictInfo nodes (typically the latter). + * + * The "quick" part comes from caching the selectivity estimates so we can + * avoid recomputing them later. (Since the same clauses are typically + * examined over and over in different possible join trees, this makes a + * big difference.) + * + * The "dirty" part comes from the fact that the selectivities of multiple + * clauses are estimated independently and multiplied together. Currently, + * clauselist_selectivity can seldom do any better than that anyhow, but + * someday it might be smarter. + * + * Since we are only using the results to estimate how many potential + * output tuples are generated and passed through qpqual checking, it + * seems OK to live with the approximation. + */ +static Selectivity +approx_selectivity(Query *root, List *quals) +{ + Selectivity total = 1.0; + List *l; + + foreach(l, quals) + { + Node *qual = (Node *) lfirst(l); + Selectivity selec; + + /* + * RestrictInfo nodes contain a this_selec field reserved for this + * routine's use, so that it's not necessary to evaluate the qual + * clause's selectivity more than once. If the clause's selectivity + * hasn't been computed yet, the field will contain -1. + */ + if (qual && IsA(qual, RestrictInfo)) + { + RestrictInfo *restrictinfo = (RestrictInfo *) qual; + + if (restrictinfo->this_selec < 0) + restrictinfo->this_selec = + clause_selectivity(root, + (Node *) restrictinfo->clause, + 0); + selec = restrictinfo->this_selec; + } + else + { + /* If it's a bare expression, must always do it the hard way */ + selec = clause_selectivity(root, qual, 0); + } + total *= selec; + } + return total; +} + + +/* * set_baserel_size_estimates * Set the size estimates for the given base relation. * diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c index cd7cabd41de..5a0734224f2 100644 --- a/src/backend/optimizer/path/joinpath.c +++ b/src/backend/optimizer/path/joinpath.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.64 2001/05/07 00:43:20 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.65 2001/06/05 05:26:04 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -236,7 +236,8 @@ sort_inner_and_outer(Query *root, * paths later, and only if they don't need a sort. */ add_path(joinrel, (Path *) - create_mergejoin_path(joinrel, + create_mergejoin_path(root, + joinrel, jointype, outerrel->cheapest_total_path, innerrel->cheapest_total_path, @@ -357,7 +358,8 @@ match_unsorted_outer(Query *root, * innerjoin indexpath. */ add_path(joinrel, (Path *) - create_nestloop_path(joinrel, + create_nestloop_path(root, + joinrel, jointype, outerpath, innerrel->cheapest_total_path, @@ -366,7 +368,8 @@ match_unsorted_outer(Query *root, if (innerrel->cheapest_startup_path != innerrel->cheapest_total_path) add_path(joinrel, (Path *) - create_nestloop_path(joinrel, + create_nestloop_path(root, + joinrel, jointype, outerpath, innerrel->cheapest_startup_path, @@ -374,7 +377,8 @@ match_unsorted_outer(Query *root, merge_pathkeys)); if (bestinnerjoin != NULL) add_path(joinrel, (Path *) - create_nestloop_path(joinrel, + create_nestloop_path(root, + joinrel, jointype, outerpath, bestinnerjoin, @@ -405,7 +409,8 @@ match_unsorted_outer(Query *root, * innerrel->cheapest_total_path is already correctly sorted.) */ add_path(joinrel, (Path *) - create_mergejoin_path(joinrel, + create_mergejoin_path(root, + joinrel, jointype, outerpath, innerrel->cheapest_total_path, @@ -464,7 +469,8 @@ match_unsorted_outer(Query *root, else newclauses = mergeclauses; add_path(joinrel, (Path *) - create_mergejoin_path(joinrel, + create_mergejoin_path(root, + joinrel, jointype, outerpath, innerpath, @@ -507,7 +513,8 @@ match_unsorted_outer(Query *root, newclauses = mergeclauses; } add_path(joinrel, (Path *) - create_mergejoin_path(joinrel, + create_mergejoin_path(root, + joinrel, jointype, outerpath, innerpath, @@ -605,7 +612,8 @@ match_unsorted_inner(Query *root, */ merge_pathkeys = build_join_pathkeys(root, joinrel, outersortkeys); add_path(joinrel, (Path *) - create_mergejoin_path(joinrel, + create_mergejoin_path(root, + joinrel, jointype, outerrel->cheapest_total_path, innerpath, @@ -633,7 +641,8 @@ match_unsorted_inner(Query *root, merge_pathkeys = build_join_pathkeys(root, joinrel, totalouterpath->pathkeys); add_path(joinrel, (Path *) - create_mergejoin_path(joinrel, + create_mergejoin_path(root, + joinrel, jointype, totalouterpath, innerpath, @@ -651,7 +660,8 @@ match_unsorted_inner(Query *root, merge_pathkeys = build_join_pathkeys(root, joinrel, startupouterpath->pathkeys); add_path(joinrel, (Path *) - create_mergejoin_path(joinrel, + create_mergejoin_path(root, + joinrel, jointype, startupouterpath, innerpath, @@ -718,10 +728,8 @@ hash_inner_and_outer(Query *root, foreach(i, restrictlist) { RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(i); - Expr *clause; Var *left, *right; - Selectivity innerbucketsize; List *hashclauses; if (restrictinfo->hashjoinoperator == InvalidOid) @@ -734,42 +742,22 @@ hash_inner_and_outer(Query *root, if (isouterjoin && restrictinfo->ispusheddown) continue; - clause = restrictinfo->clause; /* these must be OK, since check_hashjoinable accepted the clause */ - left = get_leftop(clause); - right = get_rightop(clause); + left = get_leftop(restrictinfo->clause); + right = get_rightop(restrictinfo->clause); /* - * Check if clause is usable with these sub-rels, find inner side, - * estimate bucketsize of inner var for costing purposes. - * - * Since we tend to visit the same clauses over and over when - * planning a large query, we cache the bucketsize estimates in - * the RestrictInfo node to avoid repeated lookups of statistics. + * Check if clause is usable with these input rels. */ if (intMember(left->varno, outerrelids) && intMember(right->varno, innerrelids)) { /* righthand side is inner */ - innerbucketsize = restrictinfo->right_bucketsize; - if (innerbucketsize < 0) - { - /* not cached yet */ - innerbucketsize = estimate_hash_bucketsize(root, right); - restrictinfo->right_bucketsize = innerbucketsize; - } } else if (intMember(left->varno, innerrelids) && intMember(right->varno, outerrelids)) { /* lefthand side is inner */ - innerbucketsize = restrictinfo->left_bucketsize; - if (innerbucketsize < 0) - { - /* not cached yet */ - innerbucketsize = estimate_hash_bucketsize(root, left); - restrictinfo->left_bucketsize = innerbucketsize; - } } else continue; /* no good for these input relations */ @@ -783,22 +771,22 @@ hash_inner_and_outer(Query *root, * any but the cheapest-total-cost inner path, however. */ add_path(joinrel, (Path *) - create_hashjoin_path(joinrel, + create_hashjoin_path(root, + joinrel, jointype, outerrel->cheapest_total_path, innerrel->cheapest_total_path, restrictlist, - hashclauses, - innerbucketsize)); + hashclauses)); if (outerrel->cheapest_startup_path != outerrel->cheapest_total_path) add_path(joinrel, (Path *) - create_hashjoin_path(joinrel, + create_hashjoin_path(root, + joinrel, jointype, outerrel->cheapest_startup_path, innerrel->cheapest_total_path, restrictlist, - hashclauses, - innerbucketsize)); + hashclauses)); } } diff --git a/src/backend/optimizer/path/tidpath.c b/src/backend/optimizer/path/tidpath.c index 7106fa75d48..198a7f02690 100644 --- a/src/backend/optimizer/path/tidpath.c +++ b/src/backend/optimizer/path/tidpath.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/path/tidpath.c,v 1.8 2001/01/24 19:42:58 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/path/tidpath.c,v 1.9 2001/06/05 05:26:04 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -25,7 +25,7 @@ #include "parser/parse_coerce.h" #include "utils/lsyscache.h" -static void create_tidscan_joinpaths(RelOptInfo *rel); +static void create_tidscan_joinpaths(Query *root, RelOptInfo *rel); static List *TidqualFromRestrictinfo(List *relids, List *restrictinfo); static bool isEvaluable(int varno, Node *node); static Node *TidequalClause(int varno, Expr *node); @@ -243,7 +243,7 @@ TidqualFromRestrictinfo(List *relids, List *restrictinfo) * XXX does this actually work? */ static void -create_tidscan_joinpaths(RelOptInfo *rel) +create_tidscan_joinpaths(Query *root, RelOptInfo *rel) { List *rlst = NIL, *lst; @@ -266,7 +266,7 @@ create_tidscan_joinpaths(RelOptInfo *rel) pathnode->tideval = tideval; pathnode->unjoined_relids = joininfo->unjoined_relids; - cost_tidscan(&pathnode->path, rel, tideval); + cost_tidscan(&pathnode->path, root, rel, tideval); rlst = lappend(rlst, pathnode); } @@ -286,6 +286,6 @@ create_tidscan_paths(Query *root, RelOptInfo *rel) rel->baserestrictinfo); if (tideval) - add_path(rel, (Path *) create_tidscan_path(rel, tideval)); - create_tidscan_joinpaths(rel); + add_path(rel, (Path *) create_tidscan_path(root, rel, tideval)); + create_tidscan_joinpaths(root, rel); } |
