diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2000-03-14 02:23:15 +0000 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2000-03-14 02:23:15 +0000 |
commit | 6217a8c7ba008a796ff42fa9eba614a2fa0ba1cf (patch) | |
tree | a4405c42bad412ed8cef0fbc5bd05ded58d22303 | |
parent | a1642089bf4d4d85abec6cdde777471e97561657 (diff) |
Fix some bogosities in the code that deals with estimating the fraction
of tuples we are going to retrieve from a sub-SELECT. Must have been
half asleep when I did this code the first time :-(
-rw-r--r-- | src/backend/optimizer/path/costsize.c | 20 | ||||
-rw-r--r-- | src/backend/optimizer/plan/planner.c | 18 | ||||
-rw-r--r-- | src/backend/optimizer/plan/subselect.c | 33 |
3 files changed, 42 insertions, 29 deletions
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index c14692d5b97..e70d2a7abee 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -42,7 +42,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.52 2000/02/15 20:49:16 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.53 2000/03/14 02:23:14 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -687,8 +687,8 @@ cost_qual_eval_walker(Node *node, Cost *total) * (We assume that sub-selects that can be executed as * InitPlans have already been removed from the expression.) * - * NOTE: this logic should agree with make_subplan in - * subselect.c. + * NOTE: this logic should agree with the estimates used by + * make_subplan() in plan/subselect.c. */ { SubPlan *subplan = (SubPlan *) expr->oper; @@ -701,16 +701,18 @@ cost_qual_eval_walker(Node *node, Cost *total) subcost = plan->startup_cost + (plan->total_cost - plan->startup_cost) / plan->plan_rows; } - else if (subplan->sublink->subLinkType == EXPR_SUBLINK) - { - /* assume we need all tuples */ - subcost = plan->total_cost; - } - else + else if (subplan->sublink->subLinkType == ALL_SUBLINK || + subplan->sublink->subLinkType == ANY_SUBLINK) { /* assume we need 50% of the tuples */ subcost = plan->startup_cost + 0.50 * (plan->total_cost - plan->startup_cost); + /* XXX what if subplan has been materialized? */ + } + else + { + /* assume we need all tuples */ + subcost = plan->total_cost; } *total += subcost; } diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 21794fe0d38..3faf0904d39 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.76 2000/02/21 01:13:04 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.77 2000/03/14 02:23:15 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -360,11 +360,14 @@ union_planner(Query *parse, * In GROUP BY mode, we have the little problem that we don't * really know how many input tuples will be needed to make a * group, so we can't translate an output LIMIT count into an - * input count. For lack of a better idea, assume 10% of the + * input count. For lack of a better idea, assume 25% of the * input data will be processed if there is any output limit. + * However, if the caller gave us a fraction rather than an + * absolute count, we can keep using that fraction (which amounts + * to assuming that all the groups are about the same size). */ - if (tuple_fraction > 0.0) - tuple_fraction = 0.10; + if (tuple_fraction >= 1.0) + tuple_fraction = 0.25; /* * If both GROUP BY and ORDER BY are specified, we will need * two levels of sort --- and, therefore, certainly need to @@ -386,11 +389,10 @@ union_planner(Query *parse, { /* * SELECT DISTINCT, like GROUP, will absorb an unpredictable - * number of input tuples per output tuple. So, fall back to - * our same old 10% default... + * number of input tuples per output tuple. Handle the same way. */ - if (tuple_fraction > 0.0) - tuple_fraction = 0.10; + if (tuple_fraction >= 1.0) + tuple_fraction = 0.25; } /* Generate the (sub) plan */ diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c index a5df56d9204..16f4a95a785 100644 --- a/src/backend/optimizer/plan/subselect.c +++ b/src/backend/optimizer/plan/subselect.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/subselect.c,v 1.30 2000/03/11 23:53:41 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/subselect.c,v 1.31 2000/03/14 02:23:15 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -134,25 +134,34 @@ make_subplan(SubLink *slink) PlannerInitPlan = NULL; - PlannerQueryLevel++; /* we becomes child */ + PlannerQueryLevel++; /* we become child */ /* * For an EXISTS subplan, tell lower-level planner to expect that - * only the first tuple will be retrieved. For ALL, ANY, and MULTIEXPR - * subplans, we will be able to stop evaluating if the test condition - * fails, so very often not all the tuples will be retrieved; for lack - * of a better idea, specify 50% retrieval. For EXPR_SUBLINK use default - * behavior. + * only the first tuple will be retrieved. For ALL and ANY subplans, + * we will be able to stop evaluating if the test condition fails, + * so very often not all the tuples will be retrieved; for lack of a + * better idea, specify 50% retrieval. For EXPR and MULTIEXPR subplans, + * use default behavior (we're only expecting one row out, anyway). * - * NOTE: if you change these numbers, also change cost_qual_eval_walker - * in costsize.c. + * NOTE: if you change these numbers, also change cost_qual_eval_walker() + * in path/costsize.c. + * + * XXX If an ALL/ANY subplan is uncorrelated, we may decide to materialize + * its result below. In that case it would've been better to specify + * full retrieval. At present, however, we can only detect correlation + * or lack of it after we've made the subplan :-(. Perhaps detection + * of correlation should be done as a separate step. Meanwhile, we don't + * want to be too optimistic about the percentage of tuples retrieved, + * for fear of selecting a plan that's bad for the materialization case. */ if (slink->subLinkType == EXISTS_SUBLINK) tuple_fraction = 1.0; /* just like a LIMIT 1 */ - else if (slink->subLinkType == EXPR_SUBLINK) - tuple_fraction = -1.0; /* default behavior */ - else + else if (slink->subLinkType == ALL_SUBLINK || + slink->subLinkType == ANY_SUBLINK) tuple_fraction = 0.5; /* 50% */ + else + tuple_fraction = -1.0; /* default behavior */ node->plan = plan = union_planner(subquery, tuple_fraction); |