summaryrefslogtreecommitdiff
path: root/src/backend/optimizer/path
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/optimizer/path')
-rw-r--r--src/backend/optimizer/path/allpaths.c4
-rw-r--r--src/backend/optimizer/path/clausesel.c7
-rw-r--r--src/backend/optimizer/path/costsize.c180
-rw-r--r--src/backend/optimizer/path/joinpath.c72
-rw-r--r--src/backend/optimizer/path/tidpath.c12
5 files changed, 191 insertions, 84 deletions
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index afb3259e736..bdc1c033296 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/optimizer/path/allpaths.c,v 1.74 2001/05/20 20:28:18 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/backend/optimizer/path/allpaths.c,v 1.75 2001/06/05 05:26:04 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -223,7 +223,7 @@ set_plain_rel_pathlist(Query *root, RelOptInfo *rel, RangeTblEntry *rte)
*/
/* Consider sequential scan */
- add_path(rel, create_seqscan_path(rel));
+ add_path(rel, create_seqscan_path(root, rel));
/* Consider TID scans */
create_tidscan_paths(root, rel);
diff --git a/src/backend/optimizer/path/clausesel.c b/src/backend/optimizer/path/clausesel.c
index 78407fb833a..cafc01fc33f 100644
--- a/src/backend/optimizer/path/clausesel.c
+++ b/src/backend/optimizer/path/clausesel.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/optimizer/path/clausesel.c,v 1.44 2001/05/20 20:28:18 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/backend/optimizer/path/clausesel.c,v 1.45 2001/06/05 05:26:04 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -48,9 +48,6 @@ typedef struct RangeQueryClause
static void addRangeClause(RangeQueryClause **rqlist, Node *clause,
bool varonleft, bool isLTsel, Selectivity s2);
-static Selectivity clause_selectivity(Query *root,
- Node *clause,
- int varRelid);
/****************************************************************************
@@ -364,7 +361,7 @@ addRangeClause(RangeQueryClause **rqlist, Node *clause,
* When varRelid is 0, all variables are treated as variables. This
* is appropriate for ordinary join clauses and restriction clauses.
*/
-static Selectivity
+Selectivity
clause_selectivity(Query *root,
Node *clause,
int varRelid)
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index b4379e4b39b..65c211deaee 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -42,7 +42,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.74 2001/05/20 20:28:18 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.75 2001/06/05 05:26:04 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -83,7 +83,9 @@ bool enable_mergejoin = true;
bool enable_hashjoin = true;
+static Selectivity estimate_hash_bucketsize(Query *root, Var *var);
static bool cost_qual_eval_walker(Node *node, Cost *total);
+static Selectivity approx_selectivity(Query *root, List *quals);
static void set_rel_width(Query *root, RelOptInfo *rel);
static double relation_byte_size(double tuples, int width);
static double page_size(double tuples, int width);
@@ -99,7 +101,8 @@ static double page_size(double tuples, int width);
* parameters, even though much of it could be extracted from the Path.
*/
void
-cost_seqscan(Path *path, RelOptInfo *baserel)
+cost_seqscan(Path *path, Query *root,
+ RelOptInfo *baserel)
{
Cost startup_cost = 0;
Cost run_cost = 0;
@@ -356,10 +359,11 @@ cost_index(Path *path, Query *root,
/*
* cost_tidscan
- * Determines and returns the cost of scanning a relation using tid-s.
+ * Determines and returns the cost of scanning a relation using TIDs.
*/
void
-cost_tidscan(Path *path, RelOptInfo *baserel, List *tideval)
+cost_tidscan(Path *path, Query *root,
+ RelOptInfo *baserel, List *tideval)
{
Cost startup_cost = 0;
Cost run_cost = 0;
@@ -417,7 +421,8 @@ cost_tidscan(Path *path, RelOptInfo *baserel, List *tideval)
* but if it ever does, it should react gracefully to lack of key data.
*/
void
-cost_sort(Path *path, List *pathkeys, double tuples, int width)
+cost_sort(Path *path, Query *root,
+ List *pathkeys, double tuples, int width)
{
Cost startup_cost = 0;
Cost run_cost = 0;
@@ -479,7 +484,7 @@ cost_sort(Path *path, List *pathkeys, double tuples, int width)
* 'restrictlist' are the RestrictInfo nodes to be applied at the join
*/
void
-cost_nestloop(Path *path,
+cost_nestloop(Path *path, Query *root,
Path *outer_path,
Path *inner_path,
List *restrictlist)
@@ -510,7 +515,8 @@ cost_nestloop(Path *path,
run_cost += outer_path->parent->rows *
(inner_path->total_cost - inner_path->startup_cost);
if (outer_path->parent->rows > 1)
- run_cost += (outer_path->parent->rows - 1) * inner_path->startup_cost;
+ run_cost += (outer_path->parent->rows - 1) *
+ inner_path->startup_cost * 0.5;
/*
* Number of tuples processed (not number emitted!). If inner path is
@@ -540,15 +546,18 @@ cost_nestloop(Path *path,
* 'outer_path' is the path for the outer relation
* 'inner_path' is the path for the inner relation
* 'restrictlist' are the RestrictInfo nodes to be applied at the join
+ * 'mergeclauses' are the RestrictInfo nodes to use as merge clauses
+ * (this should be a subset of the restrictlist)
* 'outersortkeys' and 'innersortkeys' are lists of the keys to be used
* to sort the outer and inner relations, or NIL if no explicit
* sort is needed because the source path is already ordered
*/
void
-cost_mergejoin(Path *path,
+cost_mergejoin(Path *path, Query *root,
Path *outer_path,
Path *inner_path,
List *restrictlist,
+ List *mergeclauses,
List *outersortkeys,
List *innersortkeys)
{
@@ -573,6 +582,7 @@ cost_mergejoin(Path *path,
{
startup_cost += outer_path->total_cost;
cost_sort(&sort_path,
+ root,
outersortkeys,
outer_path->parent->rows,
outer_path->parent->width);
@@ -589,6 +599,7 @@ cost_mergejoin(Path *path,
{
startup_cost += inner_path->total_cost;
cost_sort(&sort_path,
+ root,
innersortkeys,
inner_path->parent->rows,
inner_path->parent->width);
@@ -602,12 +613,24 @@ cost_mergejoin(Path *path,
}
/*
- * Estimate the number of tuples to be processed in the mergejoin
- * itself as one per tuple in the two source relations. This could be
- * a drastic underestimate if there are many equal-keyed tuples in
- * either relation, but we have no good way of estimating that...
+ * The number of tuple comparisons needed depends drastically on the
+ * number of equal keys in the two source relations, which we have no
+ * good way of estimating. Somewhat arbitrarily, we charge one
+ * tuple comparison (one cpu_operator_cost) for each tuple in the
+ * two source relations. This is probably a lower bound.
*/
- ntuples = outer_path->parent->rows + inner_path->parent->rows;
+ run_cost += cpu_operator_cost *
+ (outer_path->parent->rows + inner_path->parent->rows);
+
+ /*
+ * For each tuple that gets through the mergejoin proper, we charge
+ * cpu_tuple_cost plus the cost of evaluating additional restriction
+ * clauses that are to be applied at the join. It's OK to use an
+ * approximate selectivity here, since in most cases this is a minor
+ * component of the cost.
+ */
+ ntuples = approx_selectivity(root, mergeclauses) *
+ outer_path->parent->rows * inner_path->parent->rows;
/* CPU costs */
cpu_per_tuple = cpu_tuple_cost + cost_qual_eval(restrictlist);
@@ -625,15 +648,15 @@ cost_mergejoin(Path *path,
* 'outer_path' is the path for the outer relation
* 'inner_path' is the path for the inner relation
* 'restrictlist' are the RestrictInfo nodes to be applied at the join
- * 'innerbucketsize' is an estimate of the bucketsize statistic
- * for the inner hash key.
+ * 'hashclauses' is a list of the hash join clause (always a 1-element list)
+ * (this should be a subset of the restrictlist)
*/
void
-cost_hashjoin(Path *path,
+cost_hashjoin(Path *path, Query *root,
Path *outer_path,
Path *inner_path,
List *restrictlist,
- Selectivity innerbucketsize)
+ List *hashclauses)
{
Cost startup_cost = 0;
Cost run_cost = 0;
@@ -644,6 +667,10 @@ cost_hashjoin(Path *path,
double innerbytes = relation_byte_size(inner_path->parent->rows,
inner_path->parent->width);
long hashtablebytes = SortMem * 1024L;
+ RestrictInfo *restrictinfo;
+ Var *left,
+ *right;
+ Selectivity innerbucketsize;
if (!enable_hashjoin)
startup_cost += disable_cost;
@@ -658,6 +685,46 @@ cost_hashjoin(Path *path,
run_cost += cpu_operator_cost * outer_path->parent->rows;
/*
+ * Determine bucketsize fraction for inner relation. First we have
+ * to figure out which side of the hashjoin clause is the inner side.
+ */
+ Assert(length(hashclauses) == 1);
+ Assert(IsA(lfirst(hashclauses), RestrictInfo));
+ restrictinfo = (RestrictInfo *) lfirst(hashclauses);
+ /* these must be OK, since check_hashjoinable accepted the clause */
+ left = get_leftop(restrictinfo->clause);
+ right = get_rightop(restrictinfo->clause);
+
+ /*
+ * Since we tend to visit the same clauses over and over when
+ * planning a large query, we cache the bucketsize estimate in
+ * the RestrictInfo node to avoid repeated lookups of statistics.
+ */
+ if (intMember(right->varno, inner_path->parent->relids))
+ {
+ /* righthand side is inner */
+ innerbucketsize = restrictinfo->right_bucketsize;
+ if (innerbucketsize < 0)
+ {
+ /* not cached yet */
+ innerbucketsize = estimate_hash_bucketsize(root, right);
+ restrictinfo->right_bucketsize = innerbucketsize;
+ }
+ }
+ else
+ {
+ Assert(intMember(left->varno, inner_path->parent->relids));
+ /* lefthand side is inner */
+ innerbucketsize = restrictinfo->left_bucketsize;
+ if (innerbucketsize < 0)
+ {
+ /* not cached yet */
+ innerbucketsize = estimate_hash_bucketsize(root, left);
+ restrictinfo->left_bucketsize = innerbucketsize;
+ }
+ }
+
+ /*
* The number of tuple comparisons needed is the number of outer
* tuples times the typical number of tuples in a hash bucket,
* which is the inner relation size times its bucketsize fraction.
@@ -667,14 +734,14 @@ cost_hashjoin(Path *path,
ceil(inner_path->parent->rows * innerbucketsize);
/*
- * Estimate the number of tuples that get through the hashing filter
- * as one per tuple in the two source relations. This could be a
- * drastic underestimate if there are many equal-keyed tuples in
- * either relation, but we have no simple way of estimating that;
- * and since this is only a second-order parameter, it's probably
- * not worth expending a lot of effort on the estimate.
+ * For each tuple that gets through the hashjoin proper, we charge
+ * cpu_tuple_cost plus the cost of evaluating additional restriction
+ * clauses that are to be applied at the join. It's OK to use an
+ * approximate selectivity here, since in most cases this is a minor
+ * component of the cost.
*/
- ntuples = outer_path->parent->rows + inner_path->parent->rows;
+ ntuples = approx_selectivity(root, hashclauses) *
+ outer_path->parent->rows * inner_path->parent->rows;
/* CPU costs */
cpu_per_tuple = cpu_tuple_cost + cost_qual_eval(restrictlist);
@@ -718,10 +785,6 @@ cost_hashjoin(Path *path,
* divided by total tuples in relation) if the specified Var is used
* as a hash key.
*
- * This statistic is used by cost_hashjoin. We split out the calculation
- * because it's useful to cache the result for re-use across multiple path
- * cost calculations.
- *
* XXX This is really pretty bogus since we're effectively assuming that the
* distribution of hash keys will be the same after applying restriction
* clauses as it was in the underlying relation. However, we are not nearly
@@ -747,7 +810,7 @@ cost_hashjoin(Path *path,
* which is what we want. We do not want to hash unless we know that the
* inner rel is well-dispersed (or the alternatives seem much worse).
*/
-Selectivity
+static Selectivity
estimate_hash_bucketsize(Query *root, Var *var)
{
Oid relid;
@@ -1001,6 +1064,65 @@ cost_qual_eval_walker(Node *node, Cost *total)
/*
+ * approx_selectivity
+ * Quick-and-dirty estimation of clause selectivities.
+ * The input can be either an implicitly-ANDed list of boolean
+ * expressions, or a list of RestrictInfo nodes (typically the latter).
+ *
+ * The "quick" part comes from caching the selectivity estimates so we can
+ * avoid recomputing them later. (Since the same clauses are typically
+ * examined over and over in different possible join trees, this makes a
+ * big difference.)
+ *
+ * The "dirty" part comes from the fact that the selectivities of multiple
+ * clauses are estimated independently and multiplied together. Currently,
+ * clauselist_selectivity can seldom do any better than that anyhow, but
+ * someday it might be smarter.
+ *
+ * Since we are only using the results to estimate how many potential
+ * output tuples are generated and passed through qpqual checking, it
+ * seems OK to live with the approximation.
+ */
+static Selectivity
+approx_selectivity(Query *root, List *quals)
+{
+ Selectivity total = 1.0;
+ List *l;
+
+ foreach(l, quals)
+ {
+ Node *qual = (Node *) lfirst(l);
+ Selectivity selec;
+
+ /*
+ * RestrictInfo nodes contain a this_selec field reserved for this
+ * routine's use, so that it's not necessary to evaluate the qual
+ * clause's selectivity more than once. If the clause's selectivity
+ * hasn't been computed yet, the field will contain -1.
+ */
+ if (qual && IsA(qual, RestrictInfo))
+ {
+ RestrictInfo *restrictinfo = (RestrictInfo *) qual;
+
+ if (restrictinfo->this_selec < 0)
+ restrictinfo->this_selec =
+ clause_selectivity(root,
+ (Node *) restrictinfo->clause,
+ 0);
+ selec = restrictinfo->this_selec;
+ }
+ else
+ {
+ /* If it's a bare expression, must always do it the hard way */
+ selec = clause_selectivity(root, qual, 0);
+ }
+ total *= selec;
+ }
+ return total;
+}
+
+
+/*
* set_baserel_size_estimates
* Set the size estimates for the given base relation.
*
diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c
index cd7cabd41de..5a0734224f2 100644
--- a/src/backend/optimizer/path/joinpath.c
+++ b/src/backend/optimizer/path/joinpath.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.64 2001/05/07 00:43:20 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.65 2001/06/05 05:26:04 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -236,7 +236,8 @@ sort_inner_and_outer(Query *root,
* paths later, and only if they don't need a sort.
*/
add_path(joinrel, (Path *)
- create_mergejoin_path(joinrel,
+ create_mergejoin_path(root,
+ joinrel,
jointype,
outerrel->cheapest_total_path,
innerrel->cheapest_total_path,
@@ -357,7 +358,8 @@ match_unsorted_outer(Query *root,
* innerjoin indexpath.
*/
add_path(joinrel, (Path *)
- create_nestloop_path(joinrel,
+ create_nestloop_path(root,
+ joinrel,
jointype,
outerpath,
innerrel->cheapest_total_path,
@@ -366,7 +368,8 @@ match_unsorted_outer(Query *root,
if (innerrel->cheapest_startup_path !=
innerrel->cheapest_total_path)
add_path(joinrel, (Path *)
- create_nestloop_path(joinrel,
+ create_nestloop_path(root,
+ joinrel,
jointype,
outerpath,
innerrel->cheapest_startup_path,
@@ -374,7 +377,8 @@ match_unsorted_outer(Query *root,
merge_pathkeys));
if (bestinnerjoin != NULL)
add_path(joinrel, (Path *)
- create_nestloop_path(joinrel,
+ create_nestloop_path(root,
+ joinrel,
jointype,
outerpath,
bestinnerjoin,
@@ -405,7 +409,8 @@ match_unsorted_outer(Query *root,
* innerrel->cheapest_total_path is already correctly sorted.)
*/
add_path(joinrel, (Path *)
- create_mergejoin_path(joinrel,
+ create_mergejoin_path(root,
+ joinrel,
jointype,
outerpath,
innerrel->cheapest_total_path,
@@ -464,7 +469,8 @@ match_unsorted_outer(Query *root,
else
newclauses = mergeclauses;
add_path(joinrel, (Path *)
- create_mergejoin_path(joinrel,
+ create_mergejoin_path(root,
+ joinrel,
jointype,
outerpath,
innerpath,
@@ -507,7 +513,8 @@ match_unsorted_outer(Query *root,
newclauses = mergeclauses;
}
add_path(joinrel, (Path *)
- create_mergejoin_path(joinrel,
+ create_mergejoin_path(root,
+ joinrel,
jointype,
outerpath,
innerpath,
@@ -605,7 +612,8 @@ match_unsorted_inner(Query *root,
*/
merge_pathkeys = build_join_pathkeys(root, joinrel, outersortkeys);
add_path(joinrel, (Path *)
- create_mergejoin_path(joinrel,
+ create_mergejoin_path(root,
+ joinrel,
jointype,
outerrel->cheapest_total_path,
innerpath,
@@ -633,7 +641,8 @@ match_unsorted_inner(Query *root,
merge_pathkeys = build_join_pathkeys(root, joinrel,
totalouterpath->pathkeys);
add_path(joinrel, (Path *)
- create_mergejoin_path(joinrel,
+ create_mergejoin_path(root,
+ joinrel,
jointype,
totalouterpath,
innerpath,
@@ -651,7 +660,8 @@ match_unsorted_inner(Query *root,
merge_pathkeys = build_join_pathkeys(root, joinrel,
startupouterpath->pathkeys);
add_path(joinrel, (Path *)
- create_mergejoin_path(joinrel,
+ create_mergejoin_path(root,
+ joinrel,
jointype,
startupouterpath,
innerpath,
@@ -718,10 +728,8 @@ hash_inner_and_outer(Query *root,
foreach(i, restrictlist)
{
RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(i);
- Expr *clause;
Var *left,
*right;
- Selectivity innerbucketsize;
List *hashclauses;
if (restrictinfo->hashjoinoperator == InvalidOid)
@@ -734,42 +742,22 @@ hash_inner_and_outer(Query *root,
if (isouterjoin && restrictinfo->ispusheddown)
continue;
- clause = restrictinfo->clause;
/* these must be OK, since check_hashjoinable accepted the clause */
- left = get_leftop(clause);
- right = get_rightop(clause);
+ left = get_leftop(restrictinfo->clause);
+ right = get_rightop(restrictinfo->clause);
/*
- * Check if clause is usable with these sub-rels, find inner side,
- * estimate bucketsize of inner var for costing purposes.
- *
- * Since we tend to visit the same clauses over and over when
- * planning a large query, we cache the bucketsize estimates in
- * the RestrictInfo node to avoid repeated lookups of statistics.
+ * Check if clause is usable with these input rels.
*/
if (intMember(left->varno, outerrelids) &&
intMember(right->varno, innerrelids))
{
/* righthand side is inner */
- innerbucketsize = restrictinfo->right_bucketsize;
- if (innerbucketsize < 0)
- {
- /* not cached yet */
- innerbucketsize = estimate_hash_bucketsize(root, right);
- restrictinfo->right_bucketsize = innerbucketsize;
- }
}
else if (intMember(left->varno, innerrelids) &&
intMember(right->varno, outerrelids))
{
/* lefthand side is inner */
- innerbucketsize = restrictinfo->left_bucketsize;
- if (innerbucketsize < 0)
- {
- /* not cached yet */
- innerbucketsize = estimate_hash_bucketsize(root, left);
- restrictinfo->left_bucketsize = innerbucketsize;
- }
}
else
continue; /* no good for these input relations */
@@ -783,22 +771,22 @@ hash_inner_and_outer(Query *root,
* any but the cheapest-total-cost inner path, however.
*/
add_path(joinrel, (Path *)
- create_hashjoin_path(joinrel,
+ create_hashjoin_path(root,
+ joinrel,
jointype,
outerrel->cheapest_total_path,
innerrel->cheapest_total_path,
restrictlist,
- hashclauses,
- innerbucketsize));
+ hashclauses));
if (outerrel->cheapest_startup_path != outerrel->cheapest_total_path)
add_path(joinrel, (Path *)
- create_hashjoin_path(joinrel,
+ create_hashjoin_path(root,
+ joinrel,
jointype,
outerrel->cheapest_startup_path,
innerrel->cheapest_total_path,
restrictlist,
- hashclauses,
- innerbucketsize));
+ hashclauses));
}
}
diff --git a/src/backend/optimizer/path/tidpath.c b/src/backend/optimizer/path/tidpath.c
index 7106fa75d48..198a7f02690 100644
--- a/src/backend/optimizer/path/tidpath.c
+++ b/src/backend/optimizer/path/tidpath.c
@@ -9,7 +9,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/optimizer/path/tidpath.c,v 1.8 2001/01/24 19:42:58 momjian Exp $
+ * $Header: /cvsroot/pgsql/src/backend/optimizer/path/tidpath.c,v 1.9 2001/06/05 05:26:04 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -25,7 +25,7 @@
#include "parser/parse_coerce.h"
#include "utils/lsyscache.h"
-static void create_tidscan_joinpaths(RelOptInfo *rel);
+static void create_tidscan_joinpaths(Query *root, RelOptInfo *rel);
static List *TidqualFromRestrictinfo(List *relids, List *restrictinfo);
static bool isEvaluable(int varno, Node *node);
static Node *TidequalClause(int varno, Expr *node);
@@ -243,7 +243,7 @@ TidqualFromRestrictinfo(List *relids, List *restrictinfo)
* XXX does this actually work?
*/
static void
-create_tidscan_joinpaths(RelOptInfo *rel)
+create_tidscan_joinpaths(Query *root, RelOptInfo *rel)
{
List *rlst = NIL,
*lst;
@@ -266,7 +266,7 @@ create_tidscan_joinpaths(RelOptInfo *rel)
pathnode->tideval = tideval;
pathnode->unjoined_relids = joininfo->unjoined_relids;
- cost_tidscan(&pathnode->path, rel, tideval);
+ cost_tidscan(&pathnode->path, root, rel, tideval);
rlst = lappend(rlst, pathnode);
}
@@ -286,6 +286,6 @@ create_tidscan_paths(Query *root, RelOptInfo *rel)
rel->baserestrictinfo);
if (tideval)
- add_path(rel, (Path *) create_tidscan_path(rel, tideval));
- create_tidscan_joinpaths(rel);
+ add_path(rel, (Path *) create_tidscan_path(root, rel, tideval));
+ create_tidscan_joinpaths(root, rel);
}